Verified Commit 33140e20 authored by Vladimír Štill's avatar Vladimír Štill
Browse files

CFL: Basic version of the random generator

parent 047662ef
Loading
Loading
Loading
Loading
+67 −13
Original line number Diff line number Diff line
@@ -41,6 +41,12 @@ class InfinityType:

    __radd__ = __add__

    def __mul__(self, other) -> InfinityType:
        assert isinstance(other, InfinityType) or other > 0
        return self

    __rmul__ = __mul__

    def __lt__(self, other) -> bool:
        return False

@@ -487,7 +493,7 @@ class CFG:

        return {n for n in self.nonterminals if n in rewritable_to[n]}

    def _shortest_words_for_nonterminals(self) -> Dict[Nonterminal, int]:
    def _nonterminal_min_length(self) -> Dict[Nonterminal, int]:
        shortest_word: Dict[Nonterminal, int] = dict()
        for tracker in ChangeTracker():
            for src, prod in self.productions():
@@ -512,7 +518,7 @@ class CFG:

        return shortest_word

    def _longest_words_for_nonterminals(self, recset: Optional[Set[Nonterminal]] = None) \
    def _nonterminal_max_length(self, recset: Optional[Set[Nonterminal]] = None) \
            -> Dict[Nonterminal, Union[int, InfinityType]]:
        if recset is None:
            recset = self._recursive_nonterminals()
@@ -534,6 +540,44 @@ class CFG:
                    tracker.changed()
        return longest_word

    def _nonterminal_lang_size(self, recset: Optional[Set[Nonterminal]] = None) \
            -> Dict[Nonterminal, Union[int, InfinityType]]:
        if recset is None:
            recset = self._recursive_nonterminals()
        lang_size: Dict[Nonterminal, Union[int, InfinityType]] \
            = {n: Infinity for n in recset}

        for tracker in ChangeTracker():
            for src, prods in self.rules.items():
                if src in lang_size:
                    continue

                src_wc: Union[None, InfinityType, int] = 0
                for prod in prods:
                    prod_wc: Union[int, InfinityType] = 1
                    for sym in prod:
                        if isinstance(sym, Nonterminal):
                            sym_wc = lang_size.get(sym)
                            if sym_wc is None:
                                src_wc = None
                                break
                            if sym_wc is Infinity:
                                src_wc = Infinity
                                break
                            prod_wc *= sym_wc

                        if src_wc is None or src_wc is Infinity:
                            break

                    if src_wc is not None:
                        src_wc += prod_wc
                if src_wc is not None:
                    lang_size[src] = src_wc
                    tracker.changed()

        return lang_size


    def _generate_random(self, min_length: int, max_length: int, seed: int = 0) -> Iterable[CFG.Word]:
        """
        Yields a stream of random words of the grammar up to {max_length} in
@@ -545,29 +589,39 @@ class CFG:
        """
        random.seed(seed)
        recursive = self._recursive_nonterminals()
        shortest_word = self._shortest_words_for_nonterminals()
        longest_word = self._longest_words_for_nonterminals(recursive)
        shortest_word = self._nonterminal_min_length()
        longest_word = self._nonterminal_max_length(recursive)

        def sentence_min_length(sentence: CFG.Sentence) -> int:
            return sum(map(lambda s: shortest_word[s]
        def sentence_length_bound(sentence: CFG.Sentence, length_map) -> int:
            return sum(map(lambda s: length_map[s]
                           if isinstance(s, Nonterminal) else 1, sentence))

        def sentence_min_length(sentence: CFG.Sentence) -> int:
            return sentence_length_bound(sentence, shortest_word)

        def sentence_max_length(sentence: CFG.Sentence) -> int:
            return sentence_length_bound(sentence, longest_word)

        while True:
            sentence: CFG.Sentence = (self.init,)
            while not CFG.all_terminal(sentence):
                current_min_length = sentence_min_length(sentence)
                candidates: List[Tuple[int, CFG.Production, int]] = []
                current_min_len = sentence_min_length(sentence)

                candidates: List[Tuple[int, CFG.Production, int, int]] = []
                for i in range(len(sentence)):
                    sym = sentence[i]
                    if not isinstance(sym, Nonterminal) \
                            or sym not in self.rules:
                        continue
                    base_length = current_min_length - shortest_word[sym]
                    base_min = current_min_len - shortest_word[sym]
                    # cannot use the same trick for max due to inifinity
                    base_max = sentence_max_length(sentence[:i]
                                                   + sentence[1 + i:])
                    for prod in self.rules[sym]:
                        length = base_length + sentence_min_length(prod)
                        if length < max_length:
                            candidates.append((i, prod, length))
                print(f"{CFG._terminal_sequence_to_str(sentence)} -> {candidates}")
                        minl = base_min + sentence_min_length(prod)
                        maxl = base_max + sentence_max_length(prod)
                        if minl < max_length and maxl > min_length:
                            candidates.append((i, prod, minl, maxl))
                move = random.choice(candidates)
                i = move[0]
                sentence = sentence[:i] + move[1] + sentence[1 + i:]