Verified Commit 473edc7b authored by Vladimír Štill's avatar Vladimír Štill
Browse files

CFL: A new, uniform random generator

parent 600b4f19
......@@ -727,3 +727,111 @@ class CFG:
def __str__(self) -> str:
return self.to_string()
class CFGRandom:
CountMap = Dict[Nonterminal, int]
ProdCountMap = Dict[CFG.Production, int]
def __init__(self, cfg):
self.cfg = cfg.proper()
# init and productions of lenght 0
self.counts: List[CFGRandom.CountMap] \
= [{n: 0 for n in self.cfg.nonterminals},
{n: 0 for n in self.cfg.nonterminals}]
if () in self.cfg.rules.get(self.cfg.init, []):
self.counts[0][self.cfg.init] = 1
self.prod_counts: List[CFGRandom.ProdCountMap] = [dict(), dict()]
for src, prod in self.cfg.productions():
if len(prod) == 1: # No A -> B (siple) rules
self.counts[1][src] += 1
def derivations_count(self, length: int,
nterm: Optional[Nonterminal] = None) -> int:
self._materialize(length)
if nterm is None:
nterm = self.cfg.init
return self.counts[length][nterm]
def _materialize(self, length: int) -> None:
# print(f"_materialize({length})")
if len(self.counts) > length:
return
for l in range(len(self.counts), length):
self._materialize(l)
self.counts.append({n: 0 for n in self.cfg.nonterminals})
for src, prod in self.cfg.productions():
count = self._materialize_prod(prod, length)
self.counts[length][src] += count
def _materialize_prod(self, prod: CFG.Production, length: int, prefix=""):
"""Assumes smaller length are already computed"""
# print(f"{prefix}_materialize_prod({CFG._terminal_sequence_to_str(prod)}, {length})")
assert len(self.prod_counts) >= length, \
"smaller production lengths must be already computed"
if len(prod) == 1:
if isinstance(prod[0], Terminal):
return int(length == 1)
return self.counts[length][prod[0]]
if CFG.all_terminal(prod):
return len(prod) == length
if len(self.prod_counts) == length:
self.prod_counts.append(dict())
if prod in self.prod_counts[length]:
# print(f"{prefix} -> {self.prod_counts[length][prod]} (c)")
return self.prod_counts[length][prod]
# for N -> γ to get number of words of lenght l
# consider split γ = αβ such that |α| = 1
# and all lenght a, b. a + b = l such that words of lenght a are
# derived from α and words of lenght b are derived from β
count = 0
alpha = prod[:1]
beta = prod[1:]
for a in range(1, length): # end is exclusive
b = length - a
cnt_alpha = self._materialize_prod(alpha, a, f"{prefix} ")
if cnt_alpha == 0:
continue
cnt_beta = self._materialize_prod(beta, b, f"{prefix} ")
count += cnt_alpha * cnt_beta
self.prod_counts[length][prod] = count
# print(f"{prefix} -> {count}")
return count
def rnd_word(self, length: int) -> Optional[CFG.Word]:
if self.derivations_count(length) == 0:
return None
sentence: CFG.Sentence = (self.cfg.init,)
while not CFG.all_terminal(sentence):
for i in range(len(sentence)):
sym = sentence[i]
if not isinstance(sym, Nonterminal):
continue
break # left derivations only
candidates: List[CFG.Production] = []
weights: List[int] = []
for prod in self.cfg.rules[sym]:
cand = sentence[:i] + prod + sentence[i + 1:]
w = self._materialize_prod(cand, length)
if w > 0:
candidates.append(cand)
weights.append(w)
print([(CFG._terminal_sequence_to_str(candidates[i]), weights[i]) for i in range(len(candidates))])
sentence = random.choices(candidates, weights=weights)[0]
return typing.cast(CFG.Word, sentence)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment