Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
fja
eval
Commits
600b4f19
Verified
Commit
600b4f19
authored
Apr 03, 2020
by
Vladimír Štill
Browse files
CFL: Improve the random generator heuristics
parent
33140e20
Changes
1
Hide whitespace changes
Inline
Side-by-side
cfl.py
View file @
600b4f19
...
...
@@ -50,9 +50,15 @@ class InfinityType:
def
__lt__
(
self
,
other
)
->
bool
:
return
False
def
__le__
(
self
,
other
)
->
bool
:
return
isinstance
(
other
,
InfinityType
)
def
__gt__
(
self
,
other
)
->
bool
:
return
not
isinstance
(
other
,
InfinityType
)
def
__ge__
(
self
,
other
)
->
bool
:
return
True
def
__repr__
(
self
)
->
str
:
return
"Infinity"
...
...
@@ -299,7 +305,7 @@ class CFG:
new_rules
:
CFG
.
Rules
=
dict
()
for
src
in
self
.
nonterminals
:
for
esrc
in
simple_to
[
src
]:
for
prod
in
self
.
rules
[
esrc
]
:
for
prod
in
self
.
rules
.
get
(
esrc
,
[])
:
if
len
(
prod
)
!=
1
or
prod
[
0
]
in
self
.
terminals
:
if
src
not
in
new_rules
:
new_rules
[
src
]
=
set
()
...
...
@@ -577,8 +583,10 @@ class CFG:
return
lang_size
def
_generate_random
(
self
,
min_length
:
int
,
max_length
:
int
,
seed
:
int
=
0
)
->
Iterable
[
CFG
.
Word
]:
def
_generate_random
(
self
,
min_length
:
int
,
max_length
:
int
,
seed
:
Optional
[
int
]
=
0
,
rec_bias
:
int
=
2
,
max_fin_size
:
int
=
16
)
\
->
Iterable
[
CFG
.
Word
]:
"""
Yields a stream of random words of the grammar up to {max_length} in
length. The stream is infinite Words can repeat.
...
...
@@ -587,10 +595,29 @@ class CFG:
normalized (so proper grammar is OK). Otherwise it might fail to
generate some words or the generation might not terminate.
"""
assert
rec_bias
>=
1
random
.
seed
(
seed
)
recursive
=
self
.
_recursive_nonterminals
()
shortest_word
=
self
.
_nonterminal_min_length
()
longest_word
=
self
.
_nonterminal_max_length
(
recursive
)
lang_size
=
self
.
_nonterminal_lang_size
(
recursive
)
def
prod_size
(
prod
:
CFG
.
Production
)
->
Union
[
int
,
InfinityType
]:
out
:
Union
[
int
,
InfinityType
]
=
1
for
x
in
prod
:
if
isinstance
(
x
,
Nonterminal
):
out
*=
lang_size
[
x
]
return
max
(
out
,
max_fin_size
)
max_fin
=
max
(
x
for
x
in
(
prod_size
(
p
)
for
_
,
p
in
self
.
productions
())
if
isinstance
(
x
,
int
))
def
prod_weight
(
prod
:
CFG
.
Production
)
->
int
:
sz
=
prod_size
(
prod
)
if
isinstance
(
sz
,
int
):
return
sz
return
max_fin
*
rec_bias
def
sentence_length_bound
(
sentence
:
CFG
.
Sentence
,
length_map
)
->
int
:
return
sum
(
map
(
lambda
s
:
length_map
[
s
]
...
...
@@ -607,7 +634,9 @@ class CFG:
while
not
CFG
.
all_terminal
(
sentence
):
current_min_len
=
sentence_min_length
(
sentence
)
candidates
:
List
[
Tuple
[
int
,
CFG
.
Production
,
int
,
int
]]
=
[]
candidates
:
List
[
Tuple
[
int
,
CFG
.
Production
]]
=
[]
weights
:
List
[
int
]
=
[]
for
i
in
range
(
len
(
sentence
)):
sym
=
sentence
[
i
]
if
not
isinstance
(
sym
,
Nonterminal
)
\
...
...
@@ -620,13 +649,15 @@ class CFG:
for
prod
in
self
.
rules
[
sym
]:
minl
=
base_min
+
sentence_min_length
(
prod
)
maxl
=
base_max
+
sentence_max_length
(
prod
)
if
minl
<
max_length
and
maxl
>
min_length
:
candidates
.
append
((
i
,
prod
,
minl
,
maxl
))
move
=
random
.
choice
(
candidates
)
if
minl
<=
max_length
and
maxl
>=
min_length
:
candidates
.
append
((
i
,
prod
))
weights
.
append
(
prod_weight
(
prod
))
# print([(CFG._terminal_sequence_to_str(p), w) for (_,p), w in zip(candidates, weights)])
move
=
random
.
choices
(
candidates
,
weights
=
weights
)[
0
]
i
=
move
[
0
]
sentence
=
sentence
[:
i
]
+
move
[
1
]
+
sentence
[
1
+
i
:]
yield
typing
.
cast
(
CFG
.
Word
,
sentence
)
return
@
staticmethod
def
_terminal_sequence_to_str
(
seq
:
Optional
[
Iterable
[
Terminal
]])
\
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment