Commit 1feddd15 authored by Vladimír Štill's avatar Vladimír Štill
Browse files

lib: Drop .parser for the parser imports

parent fa0c432b
Loading
Loading
Loading
Loading
+1 −1
Original line number Original line Diff line number Diff line
from typing import Tuple, Union
from typing import Tuple, Union
from lib import reg
from lib import reg
from lib.parsing.parser import Parser, ParsingError
from lib.parsing import Parser, ParsingError


# support functions common for both fja_checker and web_checker
# support functions common for both fja_checker and web_checker


+496 −0
Original line number Original line Diff line number Diff line
from typing import List, Dict, Tuple, Optional, Union, Set, TypeVar
from copy import deepcopy
from lib.common import State, Character, Eps, Terminal, Nonterminal, Emptyset
from lib.reg import DFA, NFA, RegGrammar
from lib.grammars_cfg import CFG
from lib.regex import RegEx, AST, Bin, Iter, BinOp, IterOp, CharNode
import antlr4 # type: ignore
from antlr4.error.ErrorListener import ErrorListener
from lib.parsing.DFALexer import DFALexer
from lib.parsing.DFAParser import DFAParser
from lib.parsing.DFAListener import DFAListener
from lib.parsing.NFALexer import NFALexer
from lib.parsing.NFAParser import NFAParser
from lib.parsing.NFAListener import NFAListener
from lib.parsing.RegExLexer import RegExLexer
from lib.parsing.RegExParser import RegExParser
from lib.parsing.RegExVisitor import RegExVisitor
from lib.parsing.CFGLexer import CFGLexer
from lib.parsing.CFGParser import CFGParser
from lib.parsing.CFGListener import CFGListener

class ParsingError(Exception):
    def __init__(self, args):
        self.args = args


# This is needed because antlr is too smart and parse at least something possible
# even when input formalism and given type don't match. This way it aborts on any parsing problem.
class ErrorShouter(ErrorListener):
    def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e):
        raise Exception("ERROR: when parsing line %d column %d: %s\n" % \
                        (line, column, msg))


def anyvalue_attributes(parser: Union[DFAParser, NFAParser, RegExParser, CFGParser]) -> List:
    return [func for func in dir(parser.AnyvalueContext)
            if callable(getattr(parser.AnyvalueContext, func))
            and not func.startswith("__") and func.isupper()]


class Parser:

    def __init__(self):
        pass

    def names_to_str(self, collection: Union[Set[State], Set[Character], Set[Terminal], Set[Nonterminal]]) -> str:
        return "{" + ','.join(set(map(lambda x: x.name, collection))) + "}"

    def reggrammar_to_str(self, reg: RegGrammar, full: bool = False) -> str:
        nonterminals = deepcopy(reg.nonterminals).difference({reg.init})
        nonterminals = [reg.init] + list(nonterminals)
        rules = self.rules_to_str(reg.rules, nonterminals)
        if not full:
            return rules

        # full - verbose description of DFA - only for development, dismiss later
        nonterminals_names = self.names_to_str(reg.nonterminals)
        terminals = self.names_to_str(reg.terminals)
        return f"Grammar: ({nonterminals_names}, {terminals}, P, {reg.init.name})\n{self.rules_to_str(reg.rules, nonterminals)}"

    def cfg_to_str(self, gra: CFG, full: bool = False) -> str:
        nonterminals = deepcopy(gra.nonterminals).difference({gra.init})
        nonterminals = [gra.init] + list(nonterminals)
        rules = self.rules_to_str(gra.rules, nonterminals)
        if not full:
            return rules

        # full - verbose description of DFA - only for development, dismiss later
        nonterminals_names = self.names_to_str(gra.nonterminals)
        terminals = self.names_to_str(gra.terminals)
        return f"Grammar: ({nonterminals_names}, {terminals}, P, {gra.init.name})\n{self.rules_to_str(gra.rules, nonterminals)}"

    def rules_to_str(self, rules: Union[CFG.Rules, RegGrammar.Rules], nonterminals: List[Nonterminal]) -> str:
        out = ""
        for nonterminal in nonterminals:
            if nonterminal not in rules:
                continue
            rewritten = ' | '.join(set(map(lambda x: self.rewrite_variant(x), rules[nonterminal])))
            out += f"{nonterminal.name} -> {rewritten}\n"
        return out[:-1]

    def rewrite_variant(self, variant: Union[Eps, Terminal, Tuple[Union[Terminal, Nonterminal], ...]]) -> str:
        if isinstance(variant, Tuple):
            return ''.join(map(lambda x: x.name, variant))
        return variant.name

    def dfa_to_str(self, dfa: DFA, full : bool = False) -> str:
        transition = ""
        for key, dest_state in dfa.transition.items():
            state_1, character = key
            transition += f"({state_1.name},{character.name})={dest_state.name} "

        init = f"init={dfa.init.name}"
        final = f"final={self.names_to_str(dfa.final)}"

        # full - verbose description of DFA - only for development, dismiss later
        if full:
            return f"DFA = ({self.names_to_str(dfa.states)}, {self.names_to_str(dfa.characters)}, " \
                   f"d, {init}, {final})\n{transition}"
        return f"{init} {transition} {final}"

    def nfa_to_str(self, nfa: NFA, full : bool = False) -> str:
        transition = ""
        for key, set_states in nfa.transition.items():
            state, character = key
            dest_states = nfa.transition[state, character]
            transition += f"({state.name},{character.name})={self.names_to_str(dest_states)} "

        init = f"init={nfa.init.name}"
        final = f"final={self.names_to_str(nfa.final)}"

        if full:
            return f"NFA = ({self.names_to_str(nfa.states)}, {self.names_to_str(nfa.characters)}, " \
                   f"d, {init}, {final})\n{transition}"
        return f"{init} {transition} {final}"

    def regex_to_str(self, reg: RegEx) -> str:
        return reg.expression.astprint()

    def common_parse(self, string: str, given_lexer, given_parser, given_builder):
        error_listener = ErrorShouter()
        chars = antlr4.InputStream(string)
        lexer = given_lexer(chars)
        lexer.addErrorListener(error_listener)
        tokens = antlr4.CommonTokenStream(lexer)
        parser = given_parser(tokens)
        parser.addErrorListener(error_listener)

        tree = parser.start()
        builder = given_builder()
        walker = antlr4.ParseTreeWalker()
        walker.walk(builder, tree)
        return builder

    def str_to_cfg(self, string: str) -> CFG:
        try:
            builder = self.common_parse(string, CFGLexer, CFGParser, CFGBuilder)
            return CFG(builder.nonterminals, builder.terminals, builder.rules, builder.init)

        except Exception as e:
            raise ParsingError(e.args)

    def str_to_reggrammar(self, string: str) -> RegGrammar:
        try:
            cfg = self.str_to_cfg(string)
            return RegGrammar.from_cfg(cfg)

        except Exception as e:
            raise ParsingError(e.args)

    def str_to_dfa(self, string: str) -> DFA:
        try:
            builder = self.common_parse(string, DFALexer, DFAParser, DFABuilder)

            if builder.init is None:
                builder.init = builder.first_state
                if builder.init is None:
                    raise ParsingError("Automat musí obsahovat alespoň jeden stav.")

            dfa = DFA(builder.states, builder.characters, builder.transition, builder.init, builder.final)
            return dfa

        except Exception as e:
            raise ParsingError(e.args)

    def str_to_nfa(self, string: str) -> NFA:
        try:
            builder = self.common_parse(string, NFALexer, NFAParser, NFABuilder)

            if builder.init is None:
                builder.init = builder.first_state
                if builder.init is None:
                    raise ParsingError("Automat musí obsahovat alespoň jeden stav.")

            return NFA(builder.states, builder.characters, builder.transition, builder.init, builder.final)

        except Exception as e:
            raise ParsingError(e.args)

    def str_to_regex(self, string: str) -> RegEx:
        try:
            error_listener = ErrorShouter()
            chars = antlr4.InputStream(string)
            lexer = RegExLexer(chars)
            lexer.addErrorListener(error_listener)
            tokens = antlr4.CommonTokenStream(lexer)
            parser = RegExParser(tokens)
            parser.addErrorListener(error_listener)

            tree = parser.start()
            ast = RegExBuilder()
            ast.visitStart(tree)

            return RegEx(ast.characters, ast.expression)

        except Exception as e:
            raise ParsingError(e.args)


class DFABuilder(DFAListener):
    # anyvalue possibilities
    anyvalue_attributes = anyvalue_attributes(DFAParser)

    def __init__(self):
        self.states = set()
        self.characters = set()
        self.transition = {}
        self.init = None
        self.first_state = None
        self.final = set()

    def exitInit(self, ctx):
        if ctx.statename() is not None:
            state = State(self.visitStatename(ctx.statename()))
            self.init = state
            self.states.add(state)

    def visitStatename(self, ctx) -> str:
        if ctx.STATE():
            return str(ctx.STATE())
        elif ctx.QUOTE():
            name = ""
            i = 0
            while ctx.anyvalue(i) is not None:
                for attribute in self.anyvalue_attributes:
                    value = getattr(ctx.anyvalue(i), attribute)()
                    if value is not None:
                        name += str(value)
                i += 1

            return name

    def exitProduction(self, ctx):
        state = State(self.visitStatename(ctx.statename(0)))
        character = Character(self.visitStatename(ctx.statename(1)))
        dest_state = State(self.visitStatename(ctx.statename(2)))
        self.states.add(state)
        self.states.add(dest_state)
        self.characters.add(character)

        if (state, character) in self.transition:
            print(f"Upozornění: v textovém zápisu se objevilo více přechodů pro stejnou dvojici ({state.name}, {character.name}).")
        self.transition[state, character] = dest_state

        if self.first_state is None:
            self.first_state = state

    def exitFinal(self, ctx):
        i = 0
        while ctx.statename(i) is not None:
            state = State(self.visitStatename(ctx.statename(i)))
            self.states.add(state)
            self.final.add(state)
            i += 1

    # for future support of comments
    def exitComment(self, ctx):
        return None


class NFABuilder(NFAListener):
    anyvalue_attributes = anyvalue_attributes(NFAParser)

    def __init__(self):
        self.states = set()
        self.characters = set()
        self.transition = {}
        self.init = None
        self.first_state = None
        self.final = set()
        self.efa = False

    def exitInit(self, ctx):
        if ctx.statename() is not None:
            state = State(self.visitStatename(ctx.statename()))
            self.init = state
            self.states.add(state)

    def visitStatename(self, ctx) -> str:
        if ctx.STATE():
            return str(ctx.STATE())
        elif ctx.QUOTE():
            name = ""
            i = 0
            while ctx.anyvalue(i) is not None:
                for attribute in self.anyvalue_attributes:
                    value = getattr(ctx.anyvalue(i), attribute)()
                    if value is not None:
                        name += str(value)
                i += 1

            return name

    def exitProduction(self, ctx):
        state = State(self.visitStatename(ctx.statename(0)))
        self.states.add(state)
        dest_states = set()
        i = 0
        while ctx.stateset().statename(i) is not None:
            dest_state = State(self.visitStatename(ctx.stateset().statename(i)))
            self.states.add(dest_state)
            dest_states.add(dest_state)
            i += 1

        if ctx.EPSILON():
            if (state, Eps()) in self.transition:
                print(f"Upozornění: v textovém zápisu se objevilo více přechodů pro stejnou dvojici ({state.name}, ε).")
            self.transition[state, Eps()] = dest_states
            self.efa = True
        else:
            character = Character(self.visitStatename(ctx.statename(1)))
            self.characters.add(character)
            if (state, character) in self.transition:
                print(f"Upozornění: v textovém zápisu se objevilo více přechodů pro stejnou dvojici ({state.name}, {character.name}).")
            self.transition[state, character] = dest_states

        if self.first_state is None:
            self.first_state = state

    def exitFinal(self, ctx) -> None:
        i = 0
        while ctx.stateset().statename(i) is not None:
            state = State(self.visitStatename(ctx.stateset().statename(i)))
            self.states.add(state)
            self.final.add(state)
            i += 1

    # for future support of comments
    def exitComment(self, ctx) -> None:
        return None


class RegExBuilder(RegExVisitor):
    anyvalue_attributes = anyvalue_attributes(RegExParser)

    def __init__(self):
        self.characters: Set[Character] = set()
        self.expression: AST

    def visitStart(self, ctx):
        self.expression = self.visitExpr(ctx.expr())

    def visitExpr(self, ctx):
        # Binary operation: union or explicit concatenation
        if ctx.UNION() or ctx.CONCAT():
            op = Bin.Union if ctx.UNION() is not None else Bin.Concat
            return BinOp(self.visitExpr(ctx.expr(0)), op, self.visitExpr(ctx.expr(1)))

        # Implicit concatenation of (iterated) symbols or expressions in parentheses
        expressions = list(map(lambda x: self.visitConcatenable(x), ctx.concatenated()))
        return self.implicit_concat(expressions)

    def visitConcatenable(self, ctx):
        if ctx.symbol():
            return self.visitSymbol(ctx.symbol())

        elif ctx.iterable():
            if ctx.ITER():
                return self.visitIterable(ctx.iterable())

            elif ctx.POS_ITER():
                return self.visitIterable(ctx.iterable(), True)

        elif ctx.parentheses():
            return self.visitParentheses(ctx.parentheses())

    def visitSymbol(self, ctx):
        if ctx.ALPHABET():
            self.characters.add(Character(str(ctx.ALPHABET())))
            return CharNode(Character(str(ctx.ALPHABET())))

        elif ctx.EPSILON():
            return CharNode(Eps())

        elif ctx.EMPTYSET():
            return CharNode(Emptyset())

        elif ctx.QUOTE():
            name = ""
            i = 0
            while ctx.anyvalue(i) is not None:
                for attribute in self.anyvalue_attributes:
                    value = getattr(ctx.anyvalue(i), attribute)()
                    if value is not None:
                        name += str(value)
                i += 1

            self.characters.add(Character(name))
            return CharNode(Character(name))

    def visitParentheses(self, ctx):
        return self.visitExpr(ctx.expr())

    def visitIterable(self, ctx, positive=False):
        if ctx.symbol():
            expression = self.visitSymbol(ctx.symbol())

        elif ctx.parentheses():
            expression = self.visitParentheses(ctx.parentheses())

        return IterOp(expression, Iter.Positive) if positive else IterOp(expression, Iter.Iteration)

    def implicit_concat(self, to_concat):
        ast = to_concat[0]
        if len(to_concat) > 1:
            for expression in to_concat[1:]:
                ast = BinOp(ast, Bin.Concat, expression)
        return ast


    # for future support of comments
    def exitComment(self, ctx) -> None:
        return None

class CFGBuilder(CFGListener):
    anyvalue_attributes = anyvalue_attributes(CFGParser)

    def __init__(self):
        self.terminals = set()
        self.nonterminals = set()
        self.rules = dict()
        self.init = None

    def visitSymbol(self, ctx):
        if ctx.TERMINAL():
            return str(ctx.TERMINAL())
        elif ctx.CAPS():
            return str(ctx.CAPS())
        else:
            return '_'

    def visitNonterminal(self, ctx):
        if ctx.CAPS():
            name = str(ctx.CAPS())
        elif ctx.LEFT_ANGLE():
            name = '<' + ''.join(map(lambda x: self.visitSymbol(x), ctx.symbol())) + '>'
            if ctx.APOSTROPHE():
                name = name + len(ctx.APOSTROPHE())*"'"
        elif ctx.APOSTROPHE():
            name = self.visitSymbol(ctx.symbol(0)) + len(ctx.APOSTROPHE())*"'"

        nonterminal = Nonterminal(name)
        self.nonterminals.add(nonterminal)
        return nonterminal

    def visitRewrite(self, ctx):
        i = 0
        sequence = []
        while ctx.term_or_nonterm(i) is not None:
            if ctx.term_or_nonterm(i).terminal():
                term_ctx = ctx.term_or_nonterm(i).terminal()
                if term_ctx.TERMINAL():
                    name = str(term_ctx.TERMINAL())
                elif term_ctx.QUOTE():
                    name = ""
                    j = 0
                    while term_ctx.anyvalue(j) is not None:
                        for attribute in self.anyvalue_attributes:
                            value = getattr(term_ctx.anyvalue(j), attribute)()
                            if value is not None:
                                name += str(value)
                        j += 1
                    name = name

                terminal = Terminal(name)
                self.terminals.add(terminal)
                sequence.append(terminal)

            else:
                sequence.append(self.visitNonterminal(ctx.term_or_nonterm(i).nonterminal()))
            i += 1

        return sequence

    def exitOnerule(self, ctx):
        nonterminal = self.visitNonterminal(ctx.nonterminal())
        self.nonterminals.add(nonterminal)
        if self.init == None:
            self.init = nonterminal

        # multiple lines for one nonterminal are possible this way
        if nonterminal not in self.rules:
            self.rules[nonterminal] = set()

        i = 0
        while ctx.rewrite(i) is not None:
            if ctx.rewrite(i).EPSILON():
                self.rules[nonterminal].add(Eps())
            if ctx.rewrite(i).term_or_nonterm():
                sequence = self.visitRewrite(ctx.rewrite(i))
                self.rules[nonterminal].add(tuple(sequence))
            i += 1

    # for future support of comments
    def exitComment(self, ctx) -> None:
        return None

lib/parsing/parser.py

deleted100644 → 0
+0 −496

File deleted.

Preview size limit exceeded, changes collapsed.