Loading cfl.py +67 −13 Original line number Diff line number Diff line Loading @@ -41,6 +41,12 @@ class InfinityType: __radd__ = __add__ def __mul__(self, other) -> InfinityType: assert isinstance(other, InfinityType) or other > 0 return self __rmul__ = __mul__ def __lt__(self, other) -> bool: return False Loading Loading @@ -487,7 +493,7 @@ class CFG: return {n for n in self.nonterminals if n in rewritable_to[n]} def _shortest_words_for_nonterminals(self) -> Dict[Nonterminal, int]: def _nonterminal_min_length(self) -> Dict[Nonterminal, int]: shortest_word: Dict[Nonterminal, int] = dict() for tracker in ChangeTracker(): for src, prod in self.productions(): Loading @@ -512,7 +518,7 @@ class CFG: return shortest_word def _longest_words_for_nonterminals(self, recset: Optional[Set[Nonterminal]] = None) \ def _nonterminal_max_length(self, recset: Optional[Set[Nonterminal]] = None) \ -> Dict[Nonterminal, Union[int, InfinityType]]: if recset is None: recset = self._recursive_nonterminals() Loading @@ -534,6 +540,44 @@ class CFG: tracker.changed() return longest_word def _nonterminal_lang_size(self, recset: Optional[Set[Nonterminal]] = None) \ -> Dict[Nonterminal, Union[int, InfinityType]]: if recset is None: recset = self._recursive_nonterminals() lang_size: Dict[Nonterminal, Union[int, InfinityType]] \ = {n: Infinity for n in recset} for tracker in ChangeTracker(): for src, prods in self.rules.items(): if src in lang_size: continue src_wc: Union[None, InfinityType, int] = 0 for prod in prods: prod_wc: Union[int, InfinityType] = 1 for sym in prod: if isinstance(sym, Nonterminal): sym_wc = lang_size.get(sym) if sym_wc is None: src_wc = None break if sym_wc is Infinity: src_wc = Infinity break prod_wc *= sym_wc if src_wc is None or src_wc is Infinity: break if src_wc is not None: src_wc += prod_wc if src_wc is not None: lang_size[src] = src_wc tracker.changed() return lang_size def _generate_random(self, min_length: int, max_length: int, seed: int = 0) -> Iterable[CFG.Word]: """ Yields a stream of random words of the grammar up to {max_length} in Loading @@ -545,29 +589,39 @@ class CFG: """ random.seed(seed) recursive = self._recursive_nonterminals() shortest_word = self._shortest_words_for_nonterminals() longest_word = self._longest_words_for_nonterminals(recursive) shortest_word = self._nonterminal_min_length() longest_word = self._nonterminal_max_length(recursive) def sentence_min_length(sentence: CFG.Sentence) -> int: return sum(map(lambda s: shortest_word[s] def sentence_length_bound(sentence: CFG.Sentence, length_map) -> int: return sum(map(lambda s: length_map[s] if isinstance(s, Nonterminal) else 1, sentence)) def sentence_min_length(sentence: CFG.Sentence) -> int: return sentence_length_bound(sentence, shortest_word) def sentence_max_length(sentence: CFG.Sentence) -> int: return sentence_length_bound(sentence, longest_word) while True: sentence: CFG.Sentence = (self.init,) while not CFG.all_terminal(sentence): current_min_length = sentence_min_length(sentence) candidates: List[Tuple[int, CFG.Production, int]] = [] current_min_len = sentence_min_length(sentence) candidates: List[Tuple[int, CFG.Production, int, int]] = [] for i in range(len(sentence)): sym = sentence[i] if not isinstance(sym, Nonterminal) \ or sym not in self.rules: continue base_length = current_min_length - shortest_word[sym] base_min = current_min_len - shortest_word[sym] # cannot use the same trick for max due to inifinity base_max = sentence_max_length(sentence[:i] + sentence[1 + i:]) for prod in self.rules[sym]: length = base_length + sentence_min_length(prod) if length < max_length: candidates.append((i, prod, length)) print(f"{CFG._terminal_sequence_to_str(sentence)} -> {candidates}") minl = base_min + sentence_min_length(prod) maxl = base_max + sentence_max_length(prod) if minl < max_length and maxl > min_length: candidates.append((i, prod, minl, maxl)) move = random.choice(candidates) i = move[0] sentence = sentence[:i] + move[1] + sentence[1 + i:] Loading Loading
cfl.py +67 −13 Original line number Diff line number Diff line Loading @@ -41,6 +41,12 @@ class InfinityType: __radd__ = __add__ def __mul__(self, other) -> InfinityType: assert isinstance(other, InfinityType) or other > 0 return self __rmul__ = __mul__ def __lt__(self, other) -> bool: return False Loading Loading @@ -487,7 +493,7 @@ class CFG: return {n for n in self.nonterminals if n in rewritable_to[n]} def _shortest_words_for_nonterminals(self) -> Dict[Nonterminal, int]: def _nonterminal_min_length(self) -> Dict[Nonterminal, int]: shortest_word: Dict[Nonterminal, int] = dict() for tracker in ChangeTracker(): for src, prod in self.productions(): Loading @@ -512,7 +518,7 @@ class CFG: return shortest_word def _longest_words_for_nonterminals(self, recset: Optional[Set[Nonterminal]] = None) \ def _nonterminal_max_length(self, recset: Optional[Set[Nonterminal]] = None) \ -> Dict[Nonterminal, Union[int, InfinityType]]: if recset is None: recset = self._recursive_nonterminals() Loading @@ -534,6 +540,44 @@ class CFG: tracker.changed() return longest_word def _nonterminal_lang_size(self, recset: Optional[Set[Nonterminal]] = None) \ -> Dict[Nonterminal, Union[int, InfinityType]]: if recset is None: recset = self._recursive_nonterminals() lang_size: Dict[Nonterminal, Union[int, InfinityType]] \ = {n: Infinity for n in recset} for tracker in ChangeTracker(): for src, prods in self.rules.items(): if src in lang_size: continue src_wc: Union[None, InfinityType, int] = 0 for prod in prods: prod_wc: Union[int, InfinityType] = 1 for sym in prod: if isinstance(sym, Nonterminal): sym_wc = lang_size.get(sym) if sym_wc is None: src_wc = None break if sym_wc is Infinity: src_wc = Infinity break prod_wc *= sym_wc if src_wc is None or src_wc is Infinity: break if src_wc is not None: src_wc += prod_wc if src_wc is not None: lang_size[src] = src_wc tracker.changed() return lang_size def _generate_random(self, min_length: int, max_length: int, seed: int = 0) -> Iterable[CFG.Word]: """ Yields a stream of random words of the grammar up to {max_length} in Loading @@ -545,29 +589,39 @@ class CFG: """ random.seed(seed) recursive = self._recursive_nonterminals() shortest_word = self._shortest_words_for_nonterminals() longest_word = self._longest_words_for_nonterminals(recursive) shortest_word = self._nonterminal_min_length() longest_word = self._nonterminal_max_length(recursive) def sentence_min_length(sentence: CFG.Sentence) -> int: return sum(map(lambda s: shortest_word[s] def sentence_length_bound(sentence: CFG.Sentence, length_map) -> int: return sum(map(lambda s: length_map[s] if isinstance(s, Nonterminal) else 1, sentence)) def sentence_min_length(sentence: CFG.Sentence) -> int: return sentence_length_bound(sentence, shortest_word) def sentence_max_length(sentence: CFG.Sentence) -> int: return sentence_length_bound(sentence, longest_word) while True: sentence: CFG.Sentence = (self.init,) while not CFG.all_terminal(sentence): current_min_length = sentence_min_length(sentence) candidates: List[Tuple[int, CFG.Production, int]] = [] current_min_len = sentence_min_length(sentence) candidates: List[Tuple[int, CFG.Production, int, int]] = [] for i in range(len(sentence)): sym = sentence[i] if not isinstance(sym, Nonterminal) \ or sym not in self.rules: continue base_length = current_min_length - shortest_word[sym] base_min = current_min_len - shortest_word[sym] # cannot use the same trick for max due to inifinity base_max = sentence_max_length(sentence[:i] + sentence[1 + i:]) for prod in self.rules[sym]: length = base_length + sentence_min_length(prod) if length < max_length: candidates.append((i, prod, length)) print(f"{CFG._terminal_sequence_to_str(sentence)} -> {candidates}") minl = base_min + sentence_min_length(prod) maxl = base_max + sentence_max_length(prod) if minl < max_length and maxl > min_length: candidates.append((i, prod, minl, maxl)) move = random.choice(candidates) i = move[0] sentence = sentence[:i] + move[1] + sentence[1 + i:] Loading