Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
fja
eval
Commits
473edc7b
Verified
Commit
473edc7b
authored
Apr 04, 2020
by
Vladimír Štill
Browse files
CFL: A new, uniform random generator
parent
600b4f19
Changes
1
Hide whitespace changes
Inline
Side-by-side
cfl.py
View file @
473edc7b
...
...
@@ -727,3 +727,111 @@ class CFG:
def
__str__
(
self
)
->
str
:
return
self
.
to_string
()
class
CFGRandom
:
CountMap
=
Dict
[
Nonterminal
,
int
]
ProdCountMap
=
Dict
[
CFG
.
Production
,
int
]
def
__init__
(
self
,
cfg
):
self
.
cfg
=
cfg
.
proper
()
# init and productions of lenght 0
self
.
counts
:
List
[
CFGRandom
.
CountMap
]
\
=
[{
n
:
0
for
n
in
self
.
cfg
.
nonterminals
},
{
n
:
0
for
n
in
self
.
cfg
.
nonterminals
}]
if
()
in
self
.
cfg
.
rules
.
get
(
self
.
cfg
.
init
,
[]):
self
.
counts
[
0
][
self
.
cfg
.
init
]
=
1
self
.
prod_counts
:
List
[
CFGRandom
.
ProdCountMap
]
=
[
dict
(),
dict
()]
for
src
,
prod
in
self
.
cfg
.
productions
():
if
len
(
prod
)
==
1
:
# No A -> B (siple) rules
self
.
counts
[
1
][
src
]
+=
1
def
derivations_count
(
self
,
length
:
int
,
nterm
:
Optional
[
Nonterminal
]
=
None
)
->
int
:
self
.
_materialize
(
length
)
if
nterm
is
None
:
nterm
=
self
.
cfg
.
init
return
self
.
counts
[
length
][
nterm
]
def
_materialize
(
self
,
length
:
int
)
->
None
:
# print(f"_materialize({length})")
if
len
(
self
.
counts
)
>
length
:
return
for
l
in
range
(
len
(
self
.
counts
),
length
):
self
.
_materialize
(
l
)
self
.
counts
.
append
({
n
:
0
for
n
in
self
.
cfg
.
nonterminals
})
for
src
,
prod
in
self
.
cfg
.
productions
():
count
=
self
.
_materialize_prod
(
prod
,
length
)
self
.
counts
[
length
][
src
]
+=
count
def
_materialize_prod
(
self
,
prod
:
CFG
.
Production
,
length
:
int
,
prefix
=
""
):
"""Assumes smaller length are already computed"""
# print(f"{prefix}_materialize_prod({CFG._terminal_sequence_to_str(prod)}, {length})")
assert
len
(
self
.
prod_counts
)
>=
length
,
\
"smaller production lengths must be already computed"
if
len
(
prod
)
==
1
:
if
isinstance
(
prod
[
0
],
Terminal
):
return
int
(
length
==
1
)
return
self
.
counts
[
length
][
prod
[
0
]]
if
CFG
.
all_terminal
(
prod
):
return
len
(
prod
)
==
length
if
len
(
self
.
prod_counts
)
==
length
:
self
.
prod_counts
.
append
(
dict
())
if
prod
in
self
.
prod_counts
[
length
]:
# print(f"{prefix} -> {self.prod_counts[length][prod]} (c)")
return
self
.
prod_counts
[
length
][
prod
]
# for N -> γ to get number of words of lenght l
# consider split γ = αβ such that |α| = 1
# and all lenght a, b. a + b = l such that words of lenght a are
# derived from α and words of lenght b are derived from β
count
=
0
alpha
=
prod
[:
1
]
beta
=
prod
[
1
:]
for
a
in
range
(
1
,
length
):
# end is exclusive
b
=
length
-
a
cnt_alpha
=
self
.
_materialize_prod
(
alpha
,
a
,
f
"
{
prefix
}
"
)
if
cnt_alpha
==
0
:
continue
cnt_beta
=
self
.
_materialize_prod
(
beta
,
b
,
f
"
{
prefix
}
"
)
count
+=
cnt_alpha
*
cnt_beta
self
.
prod_counts
[
length
][
prod
]
=
count
# print(f"{prefix} -> {count}")
return
count
def
rnd_word
(
self
,
length
:
int
)
->
Optional
[
CFG
.
Word
]:
if
self
.
derivations_count
(
length
)
==
0
:
return
None
sentence
:
CFG
.
Sentence
=
(
self
.
cfg
.
init
,)
while
not
CFG
.
all_terminal
(
sentence
):
for
i
in
range
(
len
(
sentence
)):
sym
=
sentence
[
i
]
if
not
isinstance
(
sym
,
Nonterminal
):
continue
break
# left derivations only
candidates
:
List
[
CFG
.
Production
]
=
[]
weights
:
List
[
int
]
=
[]
for
prod
in
self
.
cfg
.
rules
[
sym
]:
cand
=
sentence
[:
i
]
+
prod
+
sentence
[
i
+
1
:]
w
=
self
.
_materialize_prod
(
cand
,
length
)
if
w
>
0
:
candidates
.
append
(
cand
)
weights
.
append
(
w
)
print
([(
CFG
.
_terminal_sequence_to_str
(
candidates
[
i
]),
weights
[
i
])
for
i
in
range
(
len
(
candidates
))])
sentence
=
random
.
choices
(
candidates
,
weights
=
weights
)[
0
]
return
typing
.
cast
(
CFG
.
Word
,
sentence
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment