Verified Commit 600b4f19 authored by Vladimír Štill's avatar Vladimír Štill
Browse files

CFL: Improve the random generator heuristics

parent 33140e20
......@@ -50,9 +50,15 @@ class InfinityType:
def __lt__(self, other) -> bool:
return False
def __le__(self, other) -> bool:
return isinstance(other, InfinityType)
def __gt__(self, other) -> bool:
return not isinstance(other, InfinityType)
def __ge__(self, other) -> bool:
return True
def __repr__(self) -> str:
return "Infinity"
......@@ -299,7 +305,7 @@ class CFG:
new_rules: CFG.Rules = dict()
for src in self.nonterminals:
for esrc in simple_to[src]:
for prod in self.rules[esrc]:
for prod in self.rules.get(esrc, []):
if len(prod) != 1 or prod[0] in self.terminals:
if src not in new_rules:
new_rules[src] = set()
......@@ -577,8 +583,10 @@ class CFG:
return lang_size
def _generate_random(self, min_length: int, max_length: int, seed: int = 0) -> Iterable[CFG.Word]:
def _generate_random(self, min_length: int, max_length: int,
seed: Optional[int] = 0, rec_bias: int = 2,
max_fin_size: int = 16) \
-> Iterable[CFG.Word]:
"""
Yields a stream of random words of the grammar up to {max_length} in
length. The stream is infinite Words can repeat.
......@@ -587,10 +595,29 @@ class CFG:
normalized (so proper grammar is OK). Otherwise it might fail to
generate some words or the generation might not terminate.
"""
assert rec_bias >= 1
random.seed(seed)
recursive = self._recursive_nonterminals()
shortest_word = self._nonterminal_min_length()
longest_word = self._nonterminal_max_length(recursive)
lang_size = self._nonterminal_lang_size(recursive)
def prod_size(prod: CFG.Production) -> Union[int, InfinityType]:
out: Union[int, InfinityType] = 1
for x in prod:
if isinstance(x, Nonterminal):
out *= lang_size[x]
return max(out, max_fin_size)
max_fin = max(x for x in (prod_size(p) for _, p in self.productions())
if isinstance(x, int))
def prod_weight(prod: CFG.Production) -> int:
sz = prod_size(prod)
if isinstance(sz, int):
return sz
return max_fin * rec_bias
def sentence_length_bound(sentence: CFG.Sentence, length_map) -> int:
return sum(map(lambda s: length_map[s]
......@@ -607,7 +634,9 @@ class CFG:
while not CFG.all_terminal(sentence):
current_min_len = sentence_min_length(sentence)
candidates: List[Tuple[int, CFG.Production, int, int]] = []
candidates: List[Tuple[int, CFG.Production]] = []
weights: List[int] = []
for i in range(len(sentence)):
sym = sentence[i]
if not isinstance(sym, Nonterminal) \
......@@ -620,13 +649,15 @@ class CFG:
for prod in self.rules[sym]:
minl = base_min + sentence_min_length(prod)
maxl = base_max + sentence_max_length(prod)
if minl < max_length and maxl > min_length:
candidates.append((i, prod, minl, maxl))
move = random.choice(candidates)
if minl <= max_length and maxl >= min_length:
candidates.append((i, prod))
weights.append(prod_weight(prod))
# print([(CFG._terminal_sequence_to_str(p), w) for (_,p), w in zip(candidates, weights)])
move = random.choices(candidates, weights=weights)[0]
i = move[0]
sentence = sentence[:i] + move[1] + sentence[1 + i:]
yield typing.cast(CFG.Word, sentence)
return
@staticmethod
def _terminal_sequence_to_str(seq: Optional[Iterable[Terminal]]) \
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment