Loading grammars_cfg.py +80 −82 Original line number Original line Diff line number Diff line Loading @@ -520,9 +520,6 @@ class CFG: left_gen = WordGenerator(left) left_gen = WordGenerator(left) right_gen = WordGenerator(right) right_gen = WordGenerator(right) left_rnd = CFGRandom(left) right_rnd = CFGRandom(right) left_ce: Optional[CFG.Word] = None left_ce: Optional[CFG.Word] = None right_ce: Optional[CFG.Word] = None right_ce: Optional[CFG.Word] = None Loading @@ -544,7 +541,7 @@ class CFG: if right_ce is None: if right_ce is None: right_ce = may_pop(right_words - left_words) right_ce = may_pop(right_words - left_words) def try_word(maybe_ce: Optional[CFG.Word], rng: CFGRandom, def try_word(maybe_ce: Optional[CFG.Word], rng: WordGenerator, other: CachedCYK, length: int) -> Optional[CFG.Word]: other: CachedCYK, length: int) -> Optional[CFG.Word]: if maybe_ce is not None: if maybe_ce is not None: return maybe_ce return maybe_ce Loading Loading @@ -573,8 +570,8 @@ class CFG: return sum(len(x) for k, x in words.items() return sum(len(x) for k, x in words.items() if k <= last_min_size) if k <= last_min_size) def nxderivations(words: Dict[int, Set[CFG.Word]], rng: CFGRandom, def nxderivations(words: Dict[int, Set[CFG.Word]], new_size: int) -> int: rng: WordGenerator, new_size: int) -> int: return total(words) + \ return total(words) + \ sum(rng.derivations_count(l) for l in sum(rng.derivations_count(l) for l in range(last_min_size + 1, new_size + 1)) range(last_min_size + 1, new_size + 1)) Loading @@ -596,8 +593,8 @@ class CFG: fill_ces(left_words.get(sz, set()), fill_ces(left_words.get(sz, set()), right_words.get(sz, set())) right_words.get(sz, set())) last_checked_len = sz last_checked_len = sz nxl = nxderivations(left_words, left_rnd, min_size) nxl = nxderivations(left_words, left_gen, min_size) nxr = nxderivations(right_words, right_rnd, min_size) nxr = nxderivations(right_words, right_gen, min_size) last_min_size = min_size last_min_size = min_size Loading @@ -615,8 +612,8 @@ class CFG: for length in range(last_checked_len + 1, max_cmp_len + 1): for length in range(last_checked_len + 1, max_cmp_len + 1): for _ in range(random_samples): for _ in range(random_samples): left_ce = try_word(left_ce, left_rnd, right_cyk, length) left_ce = try_word(left_ce, left_gen, right_cyk, length) right_ce = try_word(right_ce, right_rnd, left_cyk, length) right_ce = try_word(right_ce, right_gen, left_cyk, length) if left_ce is not None and right_ce is not None: if left_ce is not None and right_ce is not None: return mkres() return mkres() Loading @@ -626,25 +623,90 @@ class CFG: return self.to_string() return self.to_string() class CFGRandom: # TODO: split into word counter, generator and random generator class WordGenerator: CountMap = Dict[Nonterminal, int] CountMap = Dict[Nonterminal, int] ProdCountMap = Dict[CFG.Production, int] ProdCountMap = Dict[CFG.Production, int] def __init__(self, cfg): def __init__(self, cfg: CFG): self.cfg = cfg.proper() self.cfg = cfg.proper() self._seen: Set[CFG.Symbols] = set() self._stack: List[CFG.Symbols] = [(self.cfg.init,)] self._cur_lenght = 1 self._next_length: Optional[int] = 2 self.last: Optional[CFG.Word] = None \ if Eps() not in self.cfg.rules.get(self.cfg.init, []) \ else () # init and productions of lenght 0 # init and productions of lenght 0 self.counts: List[CFGRandom.CountMap] \ self.counts: List[WordGenerator.CountMap] \ = [{n: 0 for n in self.cfg.nonterminals}, = [{n: 0 for n in self.cfg.nonterminals}, {n: 0 for n in self.cfg.nonterminals}] {n: 0 for n in self.cfg.nonterminals}] if Eps() in self.cfg.rules.get(self.cfg.init, []): if Eps() in self.cfg.rules.get(self.cfg.init, []): self.counts[0][self.cfg.init] = 1 self.counts[0][self.cfg.init] = 1 self.prod_counts: List[CFGRandom.ProdCountMap] = [dict(), dict()] self.prod_counts: List[WordGenerator.ProdCountMap] = [dict(), dict()] for src, prod in self.cfg.productions(): for src, prod in self.cfg.productions(): if len(prod) == 1: # No A -> B (siple) rules if len(prod) == 1: # No A -> B (siple) rules self.counts[1][src] += 1 self.counts[1][src] += 1 def get_and_shift(self) -> Optional[CFG.Word]: """ Generates all words of the grammar in ascending order, or random words of given length. No words are repeated. """ word = self.last if self.last is not None else self._next() self.last = None return word def get(self) -> Optional[CFG.Word]: if self.last is None: return self._next() return self.last def _next(self) -> Optional[CFG.Word]: # Iterative-Deepening pseudo DFS (as a generator) while self._stack or self._next_length is not None: while self._stack: sentence = self._stack.pop() if CFG.all_terminal(sentence): if len(sentence) == self._cur_lenght: self.last = typing.cast(CFG.Word, sentence) return self.last continue for i in range(len(sentence)): if isinstance(sentence[i], Nonterminal): for p in self.cfg.rules.get( typing.cast(Nonterminal, sentence[i]), []): if isinstance(p, Eps): continue new_sentence = sentence[:i] + p + sentence[i + 1:] nlen = len(new_sentence) # prune seen and too long if new_sentence not in self._seen \ and nlen <= self._cur_lenght: self._seen.add(new_sentence) self._stack.append(new_sentence) # recursion elif nlen > self._cur_lenght \ and (self._next_length is None or nlen < self._next_length): self._next_length = nlen break # it suffices to perform left derivations if self._next_length is not None \ and self._next_length > self._cur_lenght: self._cur_lenght = self._next_length self._next_length = None self._stack = [(self.cfg.init,)] self._seen = set() return None def derivations_count(self, length: int, def derivations_count(self, length: int, nterm: Optional[Nonterminal] = None) -> int: nterm: Optional[Nonterminal] = None) -> int: self._materialize(length) self._materialize(length) Loading Loading @@ -706,6 +768,10 @@ class CFGRandom: return count return count def rnd_word(self, length: int) -> Optional[CFG.Word]: def rnd_word(self, length: int) -> Optional[CFG.Word]: """ Returns a random word uniformly selected from all the derivations of given length or None if shuch a word does not exist. """ if self.derivations_count(length) == 0: if self.derivations_count(length) == 0: return None return None Loading Loading @@ -779,71 +845,3 @@ class CachedCYK: self.cache[word] = out self.cache[word] = out return out return out class WordGenerator: """ Generates all words of the grammar in ascending order. No words are repeated. """ def __init__(self, cfg: CFG): self.cfg = cfg.proper() self._seen: Set[CFG.Symbols] = set() self._stack: List[CFG.Symbols] = [(self.cfg.init,)] self._cur_lenght = 1 self._next_length: Optional[int] = 2 self.last: Optional[CFG.Word] = None \ if Eps() not in self.cfg.rules.get(self.cfg.init, []) \ else () def get_and_shift(self) -> Optional[CFG.Word]: word = self.last if self.last is not None else self._next() self.last = None return word def get(self) -> Optional[CFG.Word]: if self.last is None: return self._next() return self.last def _next(self) -> Optional[CFG.Word]: # Iterative-Deepening pseudo DFS (as a generator) while self._stack or self._next_length is not None: while self._stack: sentence = self._stack.pop() if CFG.all_terminal(sentence): if len(sentence) == self._cur_lenght: self.last = typing.cast(CFG.Word, sentence) return self.last continue for i in range(len(sentence)): if isinstance(sentence[i], Nonterminal): for p in self.cfg.rules.get( typing.cast(Nonterminal, sentence[i]), []): if isinstance(p, Eps): continue new_sentence = sentence[:i] + p + sentence[i + 1:] nlen = len(new_sentence) # prune seen and too long if new_sentence not in self._seen \ and nlen <= self._cur_lenght: self._seen.add(new_sentence) self._stack.append(new_sentence) # recursion elif nlen > self._cur_lenght \ and (self._next_length is None or nlen < self._next_length): self._next_length = nlen break # it suffices to perform left derivations if self._next_length is not None \ and self._next_length > self._cur_lenght: self._cur_lenght = self._next_length self._next_length = None self._stack = [(self.cfg.init,)] self._seen = set() return None Loading
grammars_cfg.py +80 −82 Original line number Original line Diff line number Diff line Loading @@ -520,9 +520,6 @@ class CFG: left_gen = WordGenerator(left) left_gen = WordGenerator(left) right_gen = WordGenerator(right) right_gen = WordGenerator(right) left_rnd = CFGRandom(left) right_rnd = CFGRandom(right) left_ce: Optional[CFG.Word] = None left_ce: Optional[CFG.Word] = None right_ce: Optional[CFG.Word] = None right_ce: Optional[CFG.Word] = None Loading @@ -544,7 +541,7 @@ class CFG: if right_ce is None: if right_ce is None: right_ce = may_pop(right_words - left_words) right_ce = may_pop(right_words - left_words) def try_word(maybe_ce: Optional[CFG.Word], rng: CFGRandom, def try_word(maybe_ce: Optional[CFG.Word], rng: WordGenerator, other: CachedCYK, length: int) -> Optional[CFG.Word]: other: CachedCYK, length: int) -> Optional[CFG.Word]: if maybe_ce is not None: if maybe_ce is not None: return maybe_ce return maybe_ce Loading Loading @@ -573,8 +570,8 @@ class CFG: return sum(len(x) for k, x in words.items() return sum(len(x) for k, x in words.items() if k <= last_min_size) if k <= last_min_size) def nxderivations(words: Dict[int, Set[CFG.Word]], rng: CFGRandom, def nxderivations(words: Dict[int, Set[CFG.Word]], new_size: int) -> int: rng: WordGenerator, new_size: int) -> int: return total(words) + \ return total(words) + \ sum(rng.derivations_count(l) for l in sum(rng.derivations_count(l) for l in range(last_min_size + 1, new_size + 1)) range(last_min_size + 1, new_size + 1)) Loading @@ -596,8 +593,8 @@ class CFG: fill_ces(left_words.get(sz, set()), fill_ces(left_words.get(sz, set()), right_words.get(sz, set())) right_words.get(sz, set())) last_checked_len = sz last_checked_len = sz nxl = nxderivations(left_words, left_rnd, min_size) nxl = nxderivations(left_words, left_gen, min_size) nxr = nxderivations(right_words, right_rnd, min_size) nxr = nxderivations(right_words, right_gen, min_size) last_min_size = min_size last_min_size = min_size Loading @@ -615,8 +612,8 @@ class CFG: for length in range(last_checked_len + 1, max_cmp_len + 1): for length in range(last_checked_len + 1, max_cmp_len + 1): for _ in range(random_samples): for _ in range(random_samples): left_ce = try_word(left_ce, left_rnd, right_cyk, length) left_ce = try_word(left_ce, left_gen, right_cyk, length) right_ce = try_word(right_ce, right_rnd, left_cyk, length) right_ce = try_word(right_ce, right_gen, left_cyk, length) if left_ce is not None and right_ce is not None: if left_ce is not None and right_ce is not None: return mkres() return mkres() Loading @@ -626,25 +623,90 @@ class CFG: return self.to_string() return self.to_string() class CFGRandom: # TODO: split into word counter, generator and random generator class WordGenerator: CountMap = Dict[Nonterminal, int] CountMap = Dict[Nonterminal, int] ProdCountMap = Dict[CFG.Production, int] ProdCountMap = Dict[CFG.Production, int] def __init__(self, cfg): def __init__(self, cfg: CFG): self.cfg = cfg.proper() self.cfg = cfg.proper() self._seen: Set[CFG.Symbols] = set() self._stack: List[CFG.Symbols] = [(self.cfg.init,)] self._cur_lenght = 1 self._next_length: Optional[int] = 2 self.last: Optional[CFG.Word] = None \ if Eps() not in self.cfg.rules.get(self.cfg.init, []) \ else () # init and productions of lenght 0 # init and productions of lenght 0 self.counts: List[CFGRandom.CountMap] \ self.counts: List[WordGenerator.CountMap] \ = [{n: 0 for n in self.cfg.nonterminals}, = [{n: 0 for n in self.cfg.nonterminals}, {n: 0 for n in self.cfg.nonterminals}] {n: 0 for n in self.cfg.nonterminals}] if Eps() in self.cfg.rules.get(self.cfg.init, []): if Eps() in self.cfg.rules.get(self.cfg.init, []): self.counts[0][self.cfg.init] = 1 self.counts[0][self.cfg.init] = 1 self.prod_counts: List[CFGRandom.ProdCountMap] = [dict(), dict()] self.prod_counts: List[WordGenerator.ProdCountMap] = [dict(), dict()] for src, prod in self.cfg.productions(): for src, prod in self.cfg.productions(): if len(prod) == 1: # No A -> B (siple) rules if len(prod) == 1: # No A -> B (siple) rules self.counts[1][src] += 1 self.counts[1][src] += 1 def get_and_shift(self) -> Optional[CFG.Word]: """ Generates all words of the grammar in ascending order, or random words of given length. No words are repeated. """ word = self.last if self.last is not None else self._next() self.last = None return word def get(self) -> Optional[CFG.Word]: if self.last is None: return self._next() return self.last def _next(self) -> Optional[CFG.Word]: # Iterative-Deepening pseudo DFS (as a generator) while self._stack or self._next_length is not None: while self._stack: sentence = self._stack.pop() if CFG.all_terminal(sentence): if len(sentence) == self._cur_lenght: self.last = typing.cast(CFG.Word, sentence) return self.last continue for i in range(len(sentence)): if isinstance(sentence[i], Nonterminal): for p in self.cfg.rules.get( typing.cast(Nonterminal, sentence[i]), []): if isinstance(p, Eps): continue new_sentence = sentence[:i] + p + sentence[i + 1:] nlen = len(new_sentence) # prune seen and too long if new_sentence not in self._seen \ and nlen <= self._cur_lenght: self._seen.add(new_sentence) self._stack.append(new_sentence) # recursion elif nlen > self._cur_lenght \ and (self._next_length is None or nlen < self._next_length): self._next_length = nlen break # it suffices to perform left derivations if self._next_length is not None \ and self._next_length > self._cur_lenght: self._cur_lenght = self._next_length self._next_length = None self._stack = [(self.cfg.init,)] self._seen = set() return None def derivations_count(self, length: int, def derivations_count(self, length: int, nterm: Optional[Nonterminal] = None) -> int: nterm: Optional[Nonterminal] = None) -> int: self._materialize(length) self._materialize(length) Loading Loading @@ -706,6 +768,10 @@ class CFGRandom: return count return count def rnd_word(self, length: int) -> Optional[CFG.Word]: def rnd_word(self, length: int) -> Optional[CFG.Word]: """ Returns a random word uniformly selected from all the derivations of given length or None if shuch a word does not exist. """ if self.derivations_count(length) == 0: if self.derivations_count(length) == 0: return None return None Loading Loading @@ -779,71 +845,3 @@ class CachedCYK: self.cache[word] = out self.cache[word] = out return out return out class WordGenerator: """ Generates all words of the grammar in ascending order. No words are repeated. """ def __init__(self, cfg: CFG): self.cfg = cfg.proper() self._seen: Set[CFG.Symbols] = set() self._stack: List[CFG.Symbols] = [(self.cfg.init,)] self._cur_lenght = 1 self._next_length: Optional[int] = 2 self.last: Optional[CFG.Word] = None \ if Eps() not in self.cfg.rules.get(self.cfg.init, []) \ else () def get_and_shift(self) -> Optional[CFG.Word]: word = self.last if self.last is not None else self._next() self.last = None return word def get(self) -> Optional[CFG.Word]: if self.last is None: return self._next() return self.last def _next(self) -> Optional[CFG.Word]: # Iterative-Deepening pseudo DFS (as a generator) while self._stack or self._next_length is not None: while self._stack: sentence = self._stack.pop() if CFG.all_terminal(sentence): if len(sentence) == self._cur_lenght: self.last = typing.cast(CFG.Word, sentence) return self.last continue for i in range(len(sentence)): if isinstance(sentence[i], Nonterminal): for p in self.cfg.rules.get( typing.cast(Nonterminal, sentence[i]), []): if isinstance(p, Eps): continue new_sentence = sentence[:i] + p + sentence[i + 1:] nlen = len(new_sentence) # prune seen and too long if new_sentence not in self._seen \ and nlen <= self._cur_lenght: self._seen.add(new_sentence) self._stack.append(new_sentence) # recursion elif nlen > self._cur_lenght \ and (self._next_length is None or nlen < self._next_length): self._next_length = nlen break # it suffices to perform left derivations if self._next_length is not None \ and self._next_length > self._cur_lenght: self._cur_lenght = self._next_length self._next_length = None self._stack = [(self.cfg.init,)] self._seen = set() return None