Verified Commit 9d4203ee authored by Vladimír Štill's avatar Vladimír Štill
Browse files

CFG: Merge the random and nonrandom generator for now

parent 275174a7
Loading
Loading
Loading
Loading
+80 −82
Original line number Original line Diff line number Diff line
@@ -520,9 +520,6 @@ class CFG:
        left_gen = WordGenerator(left)
        left_gen = WordGenerator(left)
        right_gen = WordGenerator(right)
        right_gen = WordGenerator(right)


        left_rnd = CFGRandom(left)
        right_rnd = CFGRandom(right)

        left_ce: Optional[CFG.Word] = None
        left_ce: Optional[CFG.Word] = None
        right_ce: Optional[CFG.Word] = None
        right_ce: Optional[CFG.Word] = None


@@ -544,7 +541,7 @@ class CFG:
            if right_ce is None:
            if right_ce is None:
                right_ce = may_pop(right_words - left_words)
                right_ce = may_pop(right_words - left_words)


        def try_word(maybe_ce: Optional[CFG.Word], rng: CFGRandom,
        def try_word(maybe_ce: Optional[CFG.Word], rng: WordGenerator,
                     other: CachedCYK, length: int) -> Optional[CFG.Word]:
                     other: CachedCYK, length: int) -> Optional[CFG.Word]:
            if maybe_ce is not None:
            if maybe_ce is not None:
                return maybe_ce
                return maybe_ce
@@ -573,8 +570,8 @@ class CFG:
                return sum(len(x) for k, x in words.items()
                return sum(len(x) for k, x in words.items()
                           if k <= last_min_size)
                           if k <= last_min_size)


            def nxderivations(words: Dict[int, Set[CFG.Word]], rng: CFGRandom,
            def nxderivations(words: Dict[int, Set[CFG.Word]],
                              new_size: int) -> int:
                              rng: WordGenerator, new_size: int) -> int:
                return total(words) + \
                return total(words) + \
                    sum(rng.derivations_count(l) for l in
                    sum(rng.derivations_count(l) for l in
                        range(last_min_size + 1, new_size + 1))
                        range(last_min_size + 1, new_size + 1))
@@ -596,8 +593,8 @@ class CFG:
                        fill_ces(left_words.get(sz, set()),
                        fill_ces(left_words.get(sz, set()),
                                 right_words.get(sz, set()))
                                 right_words.get(sz, set()))
                        last_checked_len = sz
                        last_checked_len = sz
                    nxl = nxderivations(left_words, left_rnd, min_size)
                    nxl = nxderivations(left_words, left_gen, min_size)
                    nxr = nxderivations(right_words, right_rnd, min_size)
                    nxr = nxderivations(right_words, right_gen, min_size)


                    last_min_size = min_size
                    last_min_size = min_size


@@ -615,8 +612,8 @@ class CFG:


        for length in range(last_checked_len + 1, max_cmp_len + 1):
        for length in range(last_checked_len + 1, max_cmp_len + 1):
            for _ in range(random_samples):
            for _ in range(random_samples):
                left_ce = try_word(left_ce, left_rnd, right_cyk, length)
                left_ce = try_word(left_ce, left_gen, right_cyk, length)
                right_ce = try_word(right_ce, right_rnd, left_cyk, length)
                right_ce = try_word(right_ce, right_gen, left_cyk, length)
                if left_ce is not None and right_ce is not None:
                if left_ce is not None and right_ce is not None:
                    return mkres()
                    return mkres()


@@ -626,25 +623,90 @@ class CFG:
        return self.to_string()
        return self.to_string()




class CFGRandom:
# TODO: split into word counter, generator and random generator
class WordGenerator:

    CountMap = Dict[Nonterminal, int]
    CountMap = Dict[Nonterminal, int]
    ProdCountMap = Dict[CFG.Production, int]
    ProdCountMap = Dict[CFG.Production, int]


    def __init__(self, cfg):
    def __init__(self, cfg: CFG):
        self.cfg = cfg.proper()
        self.cfg = cfg.proper()
        self._seen: Set[CFG.Symbols] = set()
        self._stack: List[CFG.Symbols] = [(self.cfg.init,)]
        self._cur_lenght = 1
        self._next_length: Optional[int] = 2
        self.last: Optional[CFG.Word] = None \
            if Eps() not in self.cfg.rules.get(self.cfg.init, []) \
            else ()


        # init and productions of lenght 0
        # init and productions of lenght 0
        self.counts: List[CFGRandom.CountMap] \
        self.counts: List[WordGenerator.CountMap] \
            = [{n: 0 for n in self.cfg.nonterminals},
            = [{n: 0 for n in self.cfg.nonterminals},
               {n: 0 for n in self.cfg.nonterminals}]
               {n: 0 for n in self.cfg.nonterminals}]
        if Eps() in self.cfg.rules.get(self.cfg.init, []):
        if Eps() in self.cfg.rules.get(self.cfg.init, []):
            self.counts[0][self.cfg.init] = 1
            self.counts[0][self.cfg.init] = 1
        self.prod_counts: List[CFGRandom.ProdCountMap] = [dict(), dict()]
        self.prod_counts: List[WordGenerator.ProdCountMap] = [dict(), dict()]


        for src, prod in self.cfg.productions():
        for src, prod in self.cfg.productions():
            if len(prod) == 1:  # No A -> B (siple) rules
            if len(prod) == 1:  # No A -> B (siple) rules
                self.counts[1][src] += 1
                self.counts[1][src] += 1


    def get_and_shift(self) -> Optional[CFG.Word]:
        """
        Generates all words of the grammar in ascending order, or random words
        of given length.
        No words are repeated.
        """
        word = self.last if self.last is not None else self._next()
        self.last = None
        return word

    def get(self) -> Optional[CFG.Word]:
        if self.last is None:
            return self._next()
        return self.last

    def _next(self) -> Optional[CFG.Word]:
        # Iterative-Deepening pseudo DFS (as a generator)
        while self._stack or self._next_length is not None:
            while self._stack:
                sentence = self._stack.pop()

                if CFG.all_terminal(sentence):
                    if len(sentence) == self._cur_lenght:
                        self.last = typing.cast(CFG.Word, sentence)
                        return self.last
                    continue

                for i in range(len(sentence)):
                    if isinstance(sentence[i], Nonterminal):
                        for p in self.cfg.rules.get(
                                typing.cast(Nonterminal, sentence[i]), []):
                            if isinstance(p, Eps):
                                continue
                            new_sentence = sentence[:i] + p + sentence[i + 1:]
                            nlen = len(new_sentence)

                            # prune seen and too long
                            if new_sentence not in self._seen \
                                    and nlen <= self._cur_lenght:
                                self._seen.add(new_sentence)
                                self._stack.append(new_sentence)  # recursion
                            elif nlen > self._cur_lenght \
                                    and (self._next_length is None
                                         or nlen < self._next_length):
                                self._next_length = nlen
                        break  # it suffices to perform left derivations

            if self._next_length is not None \
                    and self._next_length > self._cur_lenght:
                self._cur_lenght = self._next_length
                self._next_length = None
                self._stack = [(self.cfg.init,)]
                self._seen = set()

        return None

    def derivations_count(self, length: int,
    def derivations_count(self, length: int,
                          nterm: Optional[Nonterminal] = None) -> int:
                          nterm: Optional[Nonterminal] = None) -> int:
        self._materialize(length)
        self._materialize(length)
@@ -706,6 +768,10 @@ class CFGRandom:
        return count
        return count


    def rnd_word(self, length: int) -> Optional[CFG.Word]:
    def rnd_word(self, length: int) -> Optional[CFG.Word]:
        """
        Returns a random word uniformly selected from all the derivations of
        given length or None if shuch a word does not exist.
        """
        if self.derivations_count(length) == 0:
        if self.derivations_count(length) == 0:
            return None
            return None


@@ -779,71 +845,3 @@ class CachedCYK:


        self.cache[word] = out
        self.cache[word] = out
        return out
        return out


class WordGenerator:
    """
    Generates all words of the grammar in ascending order.
    No words are repeated.
    """

    def __init__(self, cfg: CFG):
        self.cfg = cfg.proper()
        self._seen: Set[CFG.Symbols] = set()
        self._stack: List[CFG.Symbols] = [(self.cfg.init,)]
        self._cur_lenght = 1
        self._next_length: Optional[int] = 2
        self.last: Optional[CFG.Word] = None \
            if Eps() not in self.cfg.rules.get(self.cfg.init, []) \
            else ()

    def get_and_shift(self) -> Optional[CFG.Word]:
        word = self.last if self.last is not None else self._next()
        self.last = None
        return word

    def get(self) -> Optional[CFG.Word]:
        if self.last is None:
            return self._next()
        return self.last

    def _next(self) -> Optional[CFG.Word]:
        # Iterative-Deepening pseudo DFS (as a generator)
        while self._stack or self._next_length is not None:
            while self._stack:
                sentence = self._stack.pop()

                if CFG.all_terminal(sentence):
                    if len(sentence) == self._cur_lenght:
                        self.last = typing.cast(CFG.Word, sentence)
                        return self.last
                    continue

                for i in range(len(sentence)):
                    if isinstance(sentence[i], Nonterminal):
                        for p in self.cfg.rules.get(
                                typing.cast(Nonterminal, sentence[i]), []):
                            if isinstance(p, Eps):
                                continue
                            new_sentence = sentence[:i] + p + sentence[i + 1:]
                            nlen = len(new_sentence)

                            # prune seen and too long
                            if new_sentence not in self._seen \
                                    and nlen <= self._cur_lenght:
                                self._seen.add(new_sentence)
                                self._stack.append(new_sentence)  # recursion
                            elif nlen > self._cur_lenght \
                                    and (self._next_length is None
                                         or nlen < self._next_length):
                                self._next_length = nlen
                        break  # it suffices to perform left derivations

            if self._next_length is not None \
                    and self._next_length > self._cur_lenght:
                self._cur_lenght = self._next_length
                self._next_length = None
                self._stack = [(self.cfg.init,)]
                self._seen = set()

        return None