Verified Commit 047662ef authored by Vladimír Štill's avatar Vladimír Štill
Browse files

CFL: An unfinished random generation support

parent 436af282
......@@ -7,6 +7,7 @@ from copy import deepcopy
from collections import deque
from common import Terminal, Nonterminal
from reg_automata import IsEquivalentResult # TODO: to common
import random
T = TypeVar("T")
TA = TypeVar("TA")
......@@ -32,6 +33,27 @@ class GeneratesResult:
return self.value
class InfinityType:
pass
def __add__(self, other) -> InfinityType:
return self
__radd__ = __add__
def __lt__(self, other) -> bool:
return False
def __gt__(self, other) -> bool:
return not isinstance(other, InfinityType)
def __repr__(self) -> str:
return "Infinity"
Infinity = InfinityType()
def zip_fill(seqa: Iterable[TA], seqb: Iterable[TB], sentinel: bool = False) \
-> Iterable[Tuple[Optional[TA], Optional[TB]]]:
def end():
......@@ -61,6 +83,23 @@ def zip_fill(seqa: Iterable[TA], seqb: Iterable[TB], sentinel: bool = False) \
yield from end()
class ChangeTracker:
def __init__(self):
self._changed = True
def __iter__(self) -> ChangeTracker:
return self
def __next__(self) -> ChangeTracker:
if not self._changed:
raise StopIteration
self._changed = False
return self
def changed(self) -> None:
self._changed = True
class CFG:
Symbol = Union[Terminal, Nonterminal]
Production = Tuple[Symbol, ...]
......@@ -172,6 +211,15 @@ class CFG:
return self.restrict_symbols(reachable)
def is_empty(self) -> bool:
norm = self.normalized()
return norm.init not in norm.rules
def is_infinite(self) -> bool:
prop = self.proper()
rec = prop._recursive_nonterminals()
return len(rec) > 0
def is_epsilon_normal_form(self) -> bool:
has_eps = False
has_non_start_eps = False
......@@ -322,6 +370,16 @@ class CFG:
def generates(self, word: Union[str, Iterable[Terminal]])\
-> GeneratesResult:
"""
Check if the grammar generates the given word.
Uses the C-Y-K algorithm, so the check is in O(|CNF|^3) where CNF is
the Chomsky normal form of the grammar.
Returns an instance of GeneratesResult, which is convertible to bool.
It also contains a CNF of the original grammar and a C-Y-K table
(unless the input word was an empty word).
"""
cnf = self if self.is_cnf() else self.cnf()
if isinstance(word, str):
word = [Terminal(x) for x in word]
......@@ -362,31 +420,43 @@ class CFG:
nonterms.insert(0, self.init)
out = []
for r in nonterms:
for r in self.rules.keys():
to = sorted(map(lambda prds: "".join(map(lambda x: x.name, prds))
if prds else "ε", self.rules[r]))
out.append(f"{r.name} -> {' | '.join(to)}")
return "\n".join(out)
@staticmethod
def all_terminal(sentence: CFG.Sentence) -> bool:
return all_of(lambda x: isinstance(x, Terminal), sentence)
def _generate(self, max_length: int) -> Iterable[CFG.Word]:
"""
Yields words of the grammar up to {max_length} in length. Shorter
words come first. No words are repeated.
Yields all words of the grammar up to {max_length} in length. If the
grammar is in CNF (Chomsky normal form) then shorter words come first.
No words are repeated.
Needs grammar in epsilon normal form. Otherwise it might fail to
Needs grammar in epsilon normal form. Otherwise it might fail to
generate some words.
"""
seen = set()
queue: Deque[CFG.Sentence] = deque([(self.init,)])
# Walk in BFS order so the the sentences are explored from shorter
# to longer for CNF.
# As we yield a word immediatelly on finding it between the sentences
# (i.e., when we find a sentence with no nonterminals), we also yield
# words from shorter to longer for CNF grammars (because a word of
# length N needs exactly N + (N - 1) derivations in CNF and therefore
# shorter words preceed longer once in BFS order.
while queue:
sentence = queue.popleft()
if len(sentence) > max_length or sentence in seen:
continue
seen.add(sentence)
if all_of(lambda x: isinstance(x, Terminal), sentence):
if CFG.all_terminal(sentence):
yield typing.cast(CFG.Word, sentence)
else:
for i in range(len(sentence)):
......@@ -397,6 +467,113 @@ class CFG:
queue.append(new_sentence)
break # it suffices to perform left derivations
def _recursive_nonterminals(self) -> Set[Nonterminal]:
rewritable_to: Dict[Nonterminal, Set[Nonterminal]] \
= {n: set() for n in self.nonterminals}
for tracker in ChangeTracker():
for src, prod in self.productions():
for sym in prod:
if not isinstance(sym, Nonterminal):
continue
if sym not in rewritable_to[src]:
rewritable_to[src].add(sym)
tracker.changed()
# print(f"A: {rewritable_to} ({src} -> {sym})")
for tgt in rewritable_to[sym]:
if tgt not in rewritable_to[src]:
rewritable_to[src].add(tgt)
tracker.changed()
# print(f"B: {rewritable_to} ({src} -> {sym} -> {tgt})")
return {n for n in self.nonterminals if n in rewritable_to[n]}
def _shortest_words_for_nonterminals(self) -> Dict[Nonterminal, int]:
shortest_word: Dict[Nonterminal, int] = dict()
for tracker in ChangeTracker():
for src, prod in self.productions():
if CFG.all_terminal(prod):
shortest: Optional[int] = len(prod)
else:
shortest = 0
for sym in prod:
if isinstance(sym, Nonterminal):
if sym in shortest_word:
shortest += shortest_word[sym]
else:
shortest = None
break
else:
shortest += 1
if shortest is not None and (src not in shortest_word
or shortest_word[src] > shortest):
shortest_word[src] = shortest
tracker.changed()
return shortest_word
def _longest_words_for_nonterminals(self, recset: Optional[Set[Nonterminal]] = None) \
-> Dict[Nonterminal, Union[int, InfinityType]]:
if recset is None:
recset = self._recursive_nonterminals()
longest_word: Dict[Nonterminal, Union[int, InfinityType]] \
= {n: Infinity if n in recset else 0 for n in self.nonterminals}
for tracker in ChangeTracker():
for src, prod in self.productions():
if CFG.all_terminal(prod):
longest: Union[int, InfinityType] = len(prod)
else:
longest = 0
for sym in prod:
if isinstance(sym, Nonterminal):
longest += longest_word[sym]
else:
longest += 1
if longest > longest_word[src]:
longest_word[src] = longest
tracker.changed()
return longest_word
def _generate_random(self, min_length: int, max_length: int, seed: int = 0) -> Iterable[CFG.Word]:
"""
Yields a stream of random words of the grammar up to {max_length} in
length. The stream is infinite Words can repeat.
Needs grammar in epsilon normal form, without simple rules, and
normalized (so proper grammar is OK). Otherwise it might fail to
generate some words or the generation might not terminate.
"""
random.seed(seed)
recursive = self._recursive_nonterminals()
shortest_word = self._shortest_words_for_nonterminals()
longest_word = self._longest_words_for_nonterminals(recursive)
def sentence_min_length(sentence: CFG.Sentence) -> int:
return sum(map(lambda s: shortest_word[s]
if isinstance(s, Nonterminal) else 1, sentence))
while True:
sentence: CFG.Sentence = (self.init,)
while not CFG.all_terminal(sentence):
current_min_length = sentence_min_length(sentence)
candidates: List[Tuple[int, CFG.Production, int]] = []
for i in range(len(sentence)):
sym = sentence[i]
if not isinstance(sym, Nonterminal) \
or sym not in self.rules:
continue
base_length = current_min_length - shortest_word[sym]
for prod in self.rules[sym]:
length = base_length + sentence_min_length(prod)
if length < max_length:
candidates.append((i, prod, length))
print(f"{CFG._terminal_sequence_to_str(sentence)} -> {candidates}")
move = random.choice(candidates)
i = move[0]
sentence = sentence[:i] + move[1] + sentence[1 + i:]
yield typing.cast(CFG.Word, sentence)
return
@staticmethod
def _terminal_sequence_to_str(seq: Optional[Iterable[Terminal]]) \
-> Optional[str]:
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment