Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
fja
eval
Commits
047662ef
Verified
Commit
047662ef
authored
Mar 27, 2020
by
Vladimír Štill
Browse files
CFL: An unfinished random generation support
parent
436af282
Changes
1
Hide whitespace changes
Inline
Side-by-side
cfl.py
View file @
047662ef
...
...
@@ -7,6 +7,7 @@ from copy import deepcopy
from
collections
import
deque
from
common
import
Terminal
,
Nonterminal
from
reg_automata
import
IsEquivalentResult
# TODO: to common
import
random
T
=
TypeVar
(
"T"
)
TA
=
TypeVar
(
"TA"
)
...
...
@@ -32,6 +33,27 @@ class GeneratesResult:
return
self
.
value
class
InfinityType
:
pass
def
__add__
(
self
,
other
)
->
InfinityType
:
return
self
__radd__
=
__add__
def
__lt__
(
self
,
other
)
->
bool
:
return
False
def
__gt__
(
self
,
other
)
->
bool
:
return
not
isinstance
(
other
,
InfinityType
)
def
__repr__
(
self
)
->
str
:
return
"Infinity"
Infinity
=
InfinityType
()
def
zip_fill
(
seqa
:
Iterable
[
TA
],
seqb
:
Iterable
[
TB
],
sentinel
:
bool
=
False
)
\
->
Iterable
[
Tuple
[
Optional
[
TA
],
Optional
[
TB
]]]:
def
end
():
...
...
@@ -61,6 +83,23 @@ def zip_fill(seqa: Iterable[TA], seqb: Iterable[TB], sentinel: bool = False) \
yield
from
end
()
class
ChangeTracker
:
def
__init__
(
self
):
self
.
_changed
=
True
def
__iter__
(
self
)
->
ChangeTracker
:
return
self
def
__next__
(
self
)
->
ChangeTracker
:
if
not
self
.
_changed
:
raise
StopIteration
self
.
_changed
=
False
return
self
def
changed
(
self
)
->
None
:
self
.
_changed
=
True
class
CFG
:
Symbol
=
Union
[
Terminal
,
Nonterminal
]
Production
=
Tuple
[
Symbol
,
...]
...
...
@@ -172,6 +211,15 @@ class CFG:
return
self
.
restrict_symbols
(
reachable
)
def
is_empty
(
self
)
->
bool
:
norm
=
self
.
normalized
()
return
norm
.
init
not
in
norm
.
rules
def
is_infinite
(
self
)
->
bool
:
prop
=
self
.
proper
()
rec
=
prop
.
_recursive_nonterminals
()
return
len
(
rec
)
>
0
def
is_epsilon_normal_form
(
self
)
->
bool
:
has_eps
=
False
has_non_start_eps
=
False
...
...
@@ -322,6 +370,16 @@ class CFG:
def
generates
(
self
,
word
:
Union
[
str
,
Iterable
[
Terminal
]])
\
->
GeneratesResult
:
"""
Check if the grammar generates the given word.
Uses the C-Y-K algorithm, so the check is in O(|CNF|^3) where CNF is
the Chomsky normal form of the grammar.
Returns an instance of GeneratesResult, which is convertible to bool.
It also contains a CNF of the original grammar and a C-Y-K table
(unless the input word was an empty word).
"""
cnf
=
self
if
self
.
is_cnf
()
else
self
.
cnf
()
if
isinstance
(
word
,
str
):
word
=
[
Terminal
(
x
)
for
x
in
word
]
...
...
@@ -362,31 +420,43 @@ class CFG:
nonterms
.
insert
(
0
,
self
.
init
)
out
=
[]
for
r
in
nonterms
:
for
r
in
self
.
rules
.
keys
()
:
to
=
sorted
(
map
(
lambda
prds
:
""
.
join
(
map
(
lambda
x
:
x
.
name
,
prds
))
if
prds
else
"ε"
,
self
.
rules
[
r
]))
out
.
append
(
f
"
{
r
.
name
}
->
{
' | '
.
join
(
to
)
}
"
)
return
"
\n
"
.
join
(
out
)
@
staticmethod
def
all_terminal
(
sentence
:
CFG
.
Sentence
)
->
bool
:
return
all_of
(
lambda
x
:
isinstance
(
x
,
Terminal
),
sentence
)
def
_generate
(
self
,
max_length
:
int
)
->
Iterable
[
CFG
.
Word
]:
"""
Yields words of the grammar up to {max_length} in length. Shorter
words come first. No words are repeated.
Yields all words of the grammar up to {max_length} in length. If the
grammar is in CNF (Chomsky normal form) then shorter words come first.
No words are repeated.
Needs grammar in epsilon normal form.
Otherwise it might fail to
Needs grammar in epsilon normal form. Otherwise it might fail to
generate some words.
"""
seen
=
set
()
queue
:
Deque
[
CFG
.
Sentence
]
=
deque
([(
self
.
init
,)])
# Walk in BFS order so the the sentences are explored from shorter
# to longer for CNF.
# As we yield a word immediatelly on finding it between the sentences
# (i.e., when we find a sentence with no nonterminals), we also yield
# words from shorter to longer for CNF grammars (because a word of
# length N needs exactly N + (N - 1) derivations in CNF and therefore
# shorter words preceed longer once in BFS order.
while
queue
:
sentence
=
queue
.
popleft
()
if
len
(
sentence
)
>
max_length
or
sentence
in
seen
:
continue
seen
.
add
(
sentence
)
if
all_of
(
lambda
x
:
isinstance
(
x
,
T
erminal
),
sentence
):
if
CFG
.
all_t
erminal
(
sentence
):
yield
typing
.
cast
(
CFG
.
Word
,
sentence
)
else
:
for
i
in
range
(
len
(
sentence
)):
...
...
@@ -397,6 +467,113 @@ class CFG:
queue
.
append
(
new_sentence
)
break
# it suffices to perform left derivations
def
_recursive_nonterminals
(
self
)
->
Set
[
Nonterminal
]:
rewritable_to
:
Dict
[
Nonterminal
,
Set
[
Nonterminal
]]
\
=
{
n
:
set
()
for
n
in
self
.
nonterminals
}
for
tracker
in
ChangeTracker
():
for
src
,
prod
in
self
.
productions
():
for
sym
in
prod
:
if
not
isinstance
(
sym
,
Nonterminal
):
continue
if
sym
not
in
rewritable_to
[
src
]:
rewritable_to
[
src
].
add
(
sym
)
tracker
.
changed
()
# print(f"A: {rewritable_to} ({src} -> {sym})")
for
tgt
in
rewritable_to
[
sym
]:
if
tgt
not
in
rewritable_to
[
src
]:
rewritable_to
[
src
].
add
(
tgt
)
tracker
.
changed
()
# print(f"B: {rewritable_to} ({src} -> {sym} -> {tgt})")
return
{
n
for
n
in
self
.
nonterminals
if
n
in
rewritable_to
[
n
]}
def
_shortest_words_for_nonterminals
(
self
)
->
Dict
[
Nonterminal
,
int
]:
shortest_word
:
Dict
[
Nonterminal
,
int
]
=
dict
()
for
tracker
in
ChangeTracker
():
for
src
,
prod
in
self
.
productions
():
if
CFG
.
all_terminal
(
prod
):
shortest
:
Optional
[
int
]
=
len
(
prod
)
else
:
shortest
=
0
for
sym
in
prod
:
if
isinstance
(
sym
,
Nonterminal
):
if
sym
in
shortest_word
:
shortest
+=
shortest_word
[
sym
]
else
:
shortest
=
None
break
else
:
shortest
+=
1
if
shortest
is
not
None
and
(
src
not
in
shortest_word
or
shortest_word
[
src
]
>
shortest
):
shortest_word
[
src
]
=
shortest
tracker
.
changed
()
return
shortest_word
def
_longest_words_for_nonterminals
(
self
,
recset
:
Optional
[
Set
[
Nonterminal
]]
=
None
)
\
->
Dict
[
Nonterminal
,
Union
[
int
,
InfinityType
]]:
if
recset
is
None
:
recset
=
self
.
_recursive_nonterminals
()
longest_word
:
Dict
[
Nonterminal
,
Union
[
int
,
InfinityType
]]
\
=
{
n
:
Infinity
if
n
in
recset
else
0
for
n
in
self
.
nonterminals
}
for
tracker
in
ChangeTracker
():
for
src
,
prod
in
self
.
productions
():
if
CFG
.
all_terminal
(
prod
):
longest
:
Union
[
int
,
InfinityType
]
=
len
(
prod
)
else
:
longest
=
0
for
sym
in
prod
:
if
isinstance
(
sym
,
Nonterminal
):
longest
+=
longest_word
[
sym
]
else
:
longest
+=
1
if
longest
>
longest_word
[
src
]:
longest_word
[
src
]
=
longest
tracker
.
changed
()
return
longest_word
def
_generate_random
(
self
,
min_length
:
int
,
max_length
:
int
,
seed
:
int
=
0
)
->
Iterable
[
CFG
.
Word
]:
"""
Yields a stream of random words of the grammar up to {max_length} in
length. The stream is infinite Words can repeat.
Needs grammar in epsilon normal form, without simple rules, and
normalized (so proper grammar is OK). Otherwise it might fail to
generate some words or the generation might not terminate.
"""
random
.
seed
(
seed
)
recursive
=
self
.
_recursive_nonterminals
()
shortest_word
=
self
.
_shortest_words_for_nonterminals
()
longest_word
=
self
.
_longest_words_for_nonterminals
(
recursive
)
def
sentence_min_length
(
sentence
:
CFG
.
Sentence
)
->
int
:
return
sum
(
map
(
lambda
s
:
shortest_word
[
s
]
if
isinstance
(
s
,
Nonterminal
)
else
1
,
sentence
))
while
True
:
sentence
:
CFG
.
Sentence
=
(
self
.
init
,)
while
not
CFG
.
all_terminal
(
sentence
):
current_min_length
=
sentence_min_length
(
sentence
)
candidates
:
List
[
Tuple
[
int
,
CFG
.
Production
,
int
]]
=
[]
for
i
in
range
(
len
(
sentence
)):
sym
=
sentence
[
i
]
if
not
isinstance
(
sym
,
Nonterminal
)
\
or
sym
not
in
self
.
rules
:
continue
base_length
=
current_min_length
-
shortest_word
[
sym
]
for
prod
in
self
.
rules
[
sym
]:
length
=
base_length
+
sentence_min_length
(
prod
)
if
length
<
max_length
:
candidates
.
append
((
i
,
prod
,
length
))
print
(
f
"
{
CFG
.
_terminal_sequence_to_str
(
sentence
)
}
->
{
candidates
}
"
)
move
=
random
.
choice
(
candidates
)
i
=
move
[
0
]
sentence
=
sentence
[:
i
]
+
move
[
1
]
+
sentence
[
1
+
i
:]
yield
typing
.
cast
(
CFG
.
Word
,
sentence
)
return
@
staticmethod
def
_terminal_sequence_to_str
(
seq
:
Optional
[
Iterable
[
Terminal
]])
\
->
Optional
[
str
]:
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment