Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
fja
eval
Commits
de4bc741
Verified
Commit
de4bc741
authored
Apr 05, 2020
by
Vladimír Štill
Browse files
CFL: Implement cached version of CYK for batch processing
parent
76284e76
Changes
1
Hide whitespace changes
Inline
Side-by-side
cfl.py
View file @
de4bc741
...
...
@@ -536,7 +536,7 @@ class CFG:
right_ce
=
may_pop
(
right_words
-
left_words
)
def
try_word
(
maybe_ce
:
Optional
[
CFG
.
Word
],
rng
:
CFGRandom
,
other
:
C
FG
,
length
:
int
)
->
Optional
[
CFG
.
Word
]:
other
:
C
achedCYK
,
length
:
int
)
->
Optional
[
CFG
.
Word
]:
if
maybe_ce
is
not
None
:
return
maybe_ce
...
...
@@ -555,7 +555,7 @@ class CFG:
if
max_cmp_len
is
None
:
max_cmp_len
=
min
(
max
(
pow
(
2
,
len
(
left
.
nonterminals
)
+
1
),
pow
(
2
,
len
(
right
.
nonterminals
)
+
1
)),
100
)
25
)
print
(
f
"max_cmp_len =
{
max_cmp_len
}
"
)
if
full_cmp_len
>
0
:
...
...
@@ -586,12 +586,14 @@ class CFG:
return
mkres
()
left_rnd
=
CFGRandom
(
left
)
left_cyk
=
CachedCYK
(
left
)
right_rnd
=
CFGRandom
(
right
)
right_cyk
=
CachedCYK
(
right
)
for
length
in
range
(
full_cmp_len
+
1
,
max_cmp_len
+
1
):
for
_
in
range
(
random_samples
):
left_ce
=
try_word
(
left_ce
,
left_rnd
,
right
,
length
)
right_ce
=
try_word
(
right_ce
,
right_rnd
,
left
,
length
)
left_ce
=
try_word
(
left_ce
,
left_rnd
,
right
_cyk
,
length
)
right_ce
=
try_word
(
right_ce
,
right_rnd
,
left
_cyk
,
length
)
if
left_ce
is
not
None
and
right_ce
is
not
None
:
return
mkres
()
print
(
f
"Tested for length
{
length
}
…"
)
...
...
@@ -706,3 +708,46 @@ class CFGRandom:
sentence
=
random
.
choices
(
candidates
,
weights
=
weights
)[
0
]
return
typing
.
cast
(
CFG
.
Word
,
sentence
)
class
CachedCYK
:
def
__init__
(
self
,
cfg
:
CFG
):
self
.
cfg
=
cfg
.
cnf
()
self
.
cache
:
Dict
[
CFG
.
Word
,
Set
[
Nonterminal
]]
=
dict
()
for
src
,
dst
in
self
.
cfg
.
productions
():
if
len
(
dst
)
<=
1
:
dst
=
typing
.
cast
(
CFG
.
Word
,
dst
)
if
dst
not
in
self
.
cache
:
self
.
cache
[
dst
]
=
{
src
}
else
:
self
.
cache
[
dst
].
add
(
src
)
def
generates
(
self
,
word
:
Union
[
str
,
Iterable
[
Terminal
]])
->
bool
:
if
isinstance
(
word
,
str
):
word
=
tuple
(
Terminal
(
x
)
for
x
in
word
)
else
:
word
=
tuple
(
word
)
return
self
.
cfg
.
init
in
self
.
_generates
(
word
)
def
_generates
(
self
,
word
:
CFG
.
Word
)
->
Set
[
Nonterminal
]:
if
word
in
self
.
cache
:
return
self
.
cache
[
word
]
out
:
Set
[
Nonterminal
]
=
set
()
for
i
in
range
(
1
,
len
(
word
)):
alpha
=
word
[:
i
]
beta
=
word
[
i
:]
alpha_nterms
=
self
.
_generates
(
alpha
)
if
len
(
alpha_nterms
)
==
0
:
continue
beta_nterms
=
self
.
_generates
(
beta
)
if
len
(
beta_nterms
)
==
0
:
continue
for
src
,
dst
in
self
.
cfg
.
productions
():
if
len
(
dst
)
!=
2
:
continue
if
dst
[
0
]
in
alpha_nterms
and
dst
[
1
]
in
beta_nterms
:
out
.
add
(
src
)
self
.
cache
[
word
]
=
out
return
out
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment