Loading grammars_cfg.py +7 −4 Original line number Original line Diff line number Diff line Loading @@ -704,6 +704,7 @@ class WordGenerator: self._next_length = None self._next_length = None self._stack = [(self.cfg.init,)] self._stack = [(self.cfg.init,)] self._seen = set() self._seen = set() self._materialize(self._cur_lenght) return None return None Loading @@ -726,7 +727,8 @@ class WordGenerator: count = self._materialize_prod_count(prod, length) count = self._materialize_prod_count(prod, length) self.counts[length][src] += count self.counts[length][src] += count def _materialize_prod(self, prod: CFG.Production, length: int, prefix=""): def _materialize_prod_count(self, prod: CFG.Production, length: int, prefix="") -> int: """Assumes smaller length are already computed""" """Assumes smaller length are already computed""" if isinstance(prod, Eps): if isinstance(prod, Eps): Loading Loading @@ -758,10 +760,10 @@ class WordGenerator: beta = prod[1:] beta = prod[1:] for a in range(1, length): # end is exclusive for a in range(1, length): # end is exclusive b = length - a b = length - a cnt_alpha = self._materialize_prod(alpha, a, f"{prefix} ") cnt_alpha = self._materialize_prod_count(alpha, a, f"{prefix} ") if cnt_alpha == 0: if cnt_alpha == 0: continue continue cnt_beta = self._materialize_prod(beta, b, f"{prefix} ") cnt_beta = self._materialize_prod_count(beta, b, f"{prefix} ") count += cnt_alpha * cnt_beta count += cnt_alpha * cnt_beta self.prod_counts[length][prod] = count self.prod_counts[length][prod] = count Loading Loading @@ -791,12 +793,13 @@ class WordGenerator: candidates: List[CFG.Symbols] = [] candidates: List[CFG.Symbols] = [] weights: List[int] = [] weights: List[int] = [] assert isinstance(sym, Nonterminal) for prod in self.cfg.rules[sym]: for prod in self.cfg.rules[sym]: suff = tuple(prod) + sentence[i + 1:] suff = tuple(prod) + sentence[i + 1:] # note: no nonterminals before i ~> the weight of the whole # note: no nonterminals before i ~> the weight of the whole # candidate is the same as the weight of its suffix starting at # candidate is the same as the weight of its suffix starting at # position i # position i w = self._materialize_prod(suff, length - i) w = self._materialize_prod_count(suff, length - i) if w > 0: if w > 0: cand = sentence[:i] + suff cand = sentence[:i] + suff candidates.append(cand) candidates.append(cand) Loading Loading
grammars_cfg.py +7 −4 Original line number Original line Diff line number Diff line Loading @@ -704,6 +704,7 @@ class WordGenerator: self._next_length = None self._next_length = None self._stack = [(self.cfg.init,)] self._stack = [(self.cfg.init,)] self._seen = set() self._seen = set() self._materialize(self._cur_lenght) return None return None Loading @@ -726,7 +727,8 @@ class WordGenerator: count = self._materialize_prod_count(prod, length) count = self._materialize_prod_count(prod, length) self.counts[length][src] += count self.counts[length][src] += count def _materialize_prod(self, prod: CFG.Production, length: int, prefix=""): def _materialize_prod_count(self, prod: CFG.Production, length: int, prefix="") -> int: """Assumes smaller length are already computed""" """Assumes smaller length are already computed""" if isinstance(prod, Eps): if isinstance(prod, Eps): Loading Loading @@ -758,10 +760,10 @@ class WordGenerator: beta = prod[1:] beta = prod[1:] for a in range(1, length): # end is exclusive for a in range(1, length): # end is exclusive b = length - a b = length - a cnt_alpha = self._materialize_prod(alpha, a, f"{prefix} ") cnt_alpha = self._materialize_prod_count(alpha, a, f"{prefix} ") if cnt_alpha == 0: if cnt_alpha == 0: continue continue cnt_beta = self._materialize_prod(beta, b, f"{prefix} ") cnt_beta = self._materialize_prod_count(beta, b, f"{prefix} ") count += cnt_alpha * cnt_beta count += cnt_alpha * cnt_beta self.prod_counts[length][prod] = count self.prod_counts[length][prod] = count Loading Loading @@ -791,12 +793,13 @@ class WordGenerator: candidates: List[CFG.Symbols] = [] candidates: List[CFG.Symbols] = [] weights: List[int] = [] weights: List[int] = [] assert isinstance(sym, Nonterminal) for prod in self.cfg.rules[sym]: for prod in self.cfg.rules[sym]: suff = tuple(prod) + sentence[i + 1:] suff = tuple(prod) + sentence[i + 1:] # note: no nonterminals before i ~> the weight of the whole # note: no nonterminals before i ~> the weight of the whole # candidate is the same as the weight of its suffix starting at # candidate is the same as the weight of its suffix starting at # position i # position i w = self._materialize_prod(suff, length - i) w = self._materialize_prod_count(suff, length - i) if w > 0: if w > 0: cand = sentence[:i] + suff cand = sentence[:i] + suff candidates.append(cand) candidates.append(cand) Loading