Loading lib/checker.py +1 −1 Original line number Original line Diff line number Diff line from typing import Tuple, Union from typing import Tuple, Union from lib import reg from lib import reg from lib.parsing.parser import Parser, ParsingError from lib.parsing import Parser, ParsingError # support functions common for both fja_checker and web_checker # support functions common for both fja_checker and web_checker Loading lib/parsing/__init__.py +496 −0 Original line number Original line Diff line number Diff line from typing import List, Dict, Tuple, Optional, Union, Set, TypeVar from copy import deepcopy from lib.common import State, Character, Eps, Terminal, Nonterminal, Emptyset from lib.reg import DFA, NFA, RegGrammar from lib.grammars_cfg import CFG from lib.regex import RegEx, AST, Bin, Iter, BinOp, IterOp, CharNode import antlr4 # type: ignore from antlr4.error.ErrorListener import ErrorListener from lib.parsing.DFALexer import DFALexer from lib.parsing.DFAParser import DFAParser from lib.parsing.DFAListener import DFAListener from lib.parsing.NFALexer import NFALexer from lib.parsing.NFAParser import NFAParser from lib.parsing.NFAListener import NFAListener from lib.parsing.RegExLexer import RegExLexer from lib.parsing.RegExParser import RegExParser from lib.parsing.RegExVisitor import RegExVisitor from lib.parsing.CFGLexer import CFGLexer from lib.parsing.CFGParser import CFGParser from lib.parsing.CFGListener import CFGListener class ParsingError(Exception): def __init__(self, args): self.args = args # This is needed because antlr is too smart and parse at least something possible # even when input formalism and given type don't match. This way it aborts on any parsing problem. class ErrorShouter(ErrorListener): def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e): raise Exception("ERROR: when parsing line %d column %d: %s\n" % \ (line, column, msg)) def anyvalue_attributes(parser: Union[DFAParser, NFAParser, RegExParser, CFGParser]) -> List: return [func for func in dir(parser.AnyvalueContext) if callable(getattr(parser.AnyvalueContext, func)) and not func.startswith("__") and func.isupper()] class Parser: def __init__(self): pass def names_to_str(self, collection: Union[Set[State], Set[Character], Set[Terminal], Set[Nonterminal]]) -> str: return "{" + ','.join(set(map(lambda x: x.name, collection))) + "}" def reggrammar_to_str(self, reg: RegGrammar, full: bool = False) -> str: nonterminals = deepcopy(reg.nonterminals).difference({reg.init}) nonterminals = [reg.init] + list(nonterminals) rules = self.rules_to_str(reg.rules, nonterminals) if not full: return rules # full - verbose description of DFA - only for development, dismiss later nonterminals_names = self.names_to_str(reg.nonterminals) terminals = self.names_to_str(reg.terminals) return f"Grammar: ({nonterminals_names}, {terminals}, P, {reg.init.name})\n{self.rules_to_str(reg.rules, nonterminals)}" def cfg_to_str(self, gra: CFG, full: bool = False) -> str: nonterminals = deepcopy(gra.nonterminals).difference({gra.init}) nonterminals = [gra.init] + list(nonterminals) rules = self.rules_to_str(gra.rules, nonterminals) if not full: return rules # full - verbose description of DFA - only for development, dismiss later nonterminals_names = self.names_to_str(gra.nonterminals) terminals = self.names_to_str(gra.terminals) return f"Grammar: ({nonterminals_names}, {terminals}, P, {gra.init.name})\n{self.rules_to_str(gra.rules, nonterminals)}" def rules_to_str(self, rules: Union[CFG.Rules, RegGrammar.Rules], nonterminals: List[Nonterminal]) -> str: out = "" for nonterminal in nonterminals: if nonterminal not in rules: continue rewritten = ' | '.join(set(map(lambda x: self.rewrite_variant(x), rules[nonterminal]))) out += f"{nonterminal.name} -> {rewritten}\n" return out[:-1] def rewrite_variant(self, variant: Union[Eps, Terminal, Tuple[Union[Terminal, Nonterminal], ...]]) -> str: if isinstance(variant, Tuple): return ''.join(map(lambda x: x.name, variant)) return variant.name def dfa_to_str(self, dfa: DFA, full : bool = False) -> str: transition = "" for key, dest_state in dfa.transition.items(): state_1, character = key transition += f"({state_1.name},{character.name})={dest_state.name} " init = f"init={dfa.init.name}" final = f"final={self.names_to_str(dfa.final)}" # full - verbose description of DFA - only for development, dismiss later if full: return f"DFA = ({self.names_to_str(dfa.states)}, {self.names_to_str(dfa.characters)}, " \ f"d, {init}, {final})\n{transition}" return f"{init} {transition} {final}" def nfa_to_str(self, nfa: NFA, full : bool = False) -> str: transition = "" for key, set_states in nfa.transition.items(): state, character = key dest_states = nfa.transition[state, character] transition += f"({state.name},{character.name})={self.names_to_str(dest_states)} " init = f"init={nfa.init.name}" final = f"final={self.names_to_str(nfa.final)}" if full: return f"NFA = ({self.names_to_str(nfa.states)}, {self.names_to_str(nfa.characters)}, " \ f"d, {init}, {final})\n{transition}" return f"{init} {transition} {final}" def regex_to_str(self, reg: RegEx) -> str: return reg.expression.astprint() def common_parse(self, string: str, given_lexer, given_parser, given_builder): error_listener = ErrorShouter() chars = antlr4.InputStream(string) lexer = given_lexer(chars) lexer.addErrorListener(error_listener) tokens = antlr4.CommonTokenStream(lexer) parser = given_parser(tokens) parser.addErrorListener(error_listener) tree = parser.start() builder = given_builder() walker = antlr4.ParseTreeWalker() walker.walk(builder, tree) return builder def str_to_cfg(self, string: str) -> CFG: try: builder = self.common_parse(string, CFGLexer, CFGParser, CFGBuilder) return CFG(builder.nonterminals, builder.terminals, builder.rules, builder.init) except Exception as e: raise ParsingError(e.args) def str_to_reggrammar(self, string: str) -> RegGrammar: try: cfg = self.str_to_cfg(string) return RegGrammar.from_cfg(cfg) except Exception as e: raise ParsingError(e.args) def str_to_dfa(self, string: str) -> DFA: try: builder = self.common_parse(string, DFALexer, DFAParser, DFABuilder) if builder.init is None: builder.init = builder.first_state if builder.init is None: raise ParsingError("Automat musí obsahovat alespoň jeden stav.") dfa = DFA(builder.states, builder.characters, builder.transition, builder.init, builder.final) return dfa except Exception as e: raise ParsingError(e.args) def str_to_nfa(self, string: str) -> NFA: try: builder = self.common_parse(string, NFALexer, NFAParser, NFABuilder) if builder.init is None: builder.init = builder.first_state if builder.init is None: raise ParsingError("Automat musí obsahovat alespoň jeden stav.") return NFA(builder.states, builder.characters, builder.transition, builder.init, builder.final) except Exception as e: raise ParsingError(e.args) def str_to_regex(self, string: str) -> RegEx: try: error_listener = ErrorShouter() chars = antlr4.InputStream(string) lexer = RegExLexer(chars) lexer.addErrorListener(error_listener) tokens = antlr4.CommonTokenStream(lexer) parser = RegExParser(tokens) parser.addErrorListener(error_listener) tree = parser.start() ast = RegExBuilder() ast.visitStart(tree) return RegEx(ast.characters, ast.expression) except Exception as e: raise ParsingError(e.args) class DFABuilder(DFAListener): # anyvalue possibilities anyvalue_attributes = anyvalue_attributes(DFAParser) def __init__(self): self.states = set() self.characters = set() self.transition = {} self.init = None self.first_state = None self.final = set() def exitInit(self, ctx): if ctx.statename() is not None: state = State(self.visitStatename(ctx.statename())) self.init = state self.states.add(state) def visitStatename(self, ctx) -> str: if ctx.STATE(): return str(ctx.STATE()) elif ctx.QUOTE(): name = "" i = 0 while ctx.anyvalue(i) is not None: for attribute in self.anyvalue_attributes: value = getattr(ctx.anyvalue(i), attribute)() if value is not None: name += str(value) i += 1 return name def exitProduction(self, ctx): state = State(self.visitStatename(ctx.statename(0))) character = Character(self.visitStatename(ctx.statename(1))) dest_state = State(self.visitStatename(ctx.statename(2))) self.states.add(state) self.states.add(dest_state) self.characters.add(character) if (state, character) in self.transition: print(f"Upozornění: v textovém zápisu se objevilo více přechodů pro stejnou dvojici ({state.name}, {character.name}).") self.transition[state, character] = dest_state if self.first_state is None: self.first_state = state def exitFinal(self, ctx): i = 0 while ctx.statename(i) is not None: state = State(self.visitStatename(ctx.statename(i))) self.states.add(state) self.final.add(state) i += 1 # for future support of comments def exitComment(self, ctx): return None class NFABuilder(NFAListener): anyvalue_attributes = anyvalue_attributes(NFAParser) def __init__(self): self.states = set() self.characters = set() self.transition = {} self.init = None self.first_state = None self.final = set() self.efa = False def exitInit(self, ctx): if ctx.statename() is not None: state = State(self.visitStatename(ctx.statename())) self.init = state self.states.add(state) def visitStatename(self, ctx) -> str: if ctx.STATE(): return str(ctx.STATE()) elif ctx.QUOTE(): name = "" i = 0 while ctx.anyvalue(i) is not None: for attribute in self.anyvalue_attributes: value = getattr(ctx.anyvalue(i), attribute)() if value is not None: name += str(value) i += 1 return name def exitProduction(self, ctx): state = State(self.visitStatename(ctx.statename(0))) self.states.add(state) dest_states = set() i = 0 while ctx.stateset().statename(i) is not None: dest_state = State(self.visitStatename(ctx.stateset().statename(i))) self.states.add(dest_state) dest_states.add(dest_state) i += 1 if ctx.EPSILON(): if (state, Eps()) in self.transition: print(f"Upozornění: v textovém zápisu se objevilo více přechodů pro stejnou dvojici ({state.name}, ε).") self.transition[state, Eps()] = dest_states self.efa = True else: character = Character(self.visitStatename(ctx.statename(1))) self.characters.add(character) if (state, character) in self.transition: print(f"Upozornění: v textovém zápisu se objevilo více přechodů pro stejnou dvojici ({state.name}, {character.name}).") self.transition[state, character] = dest_states if self.first_state is None: self.first_state = state def exitFinal(self, ctx) -> None: i = 0 while ctx.stateset().statename(i) is not None: state = State(self.visitStatename(ctx.stateset().statename(i))) self.states.add(state) self.final.add(state) i += 1 # for future support of comments def exitComment(self, ctx) -> None: return None class RegExBuilder(RegExVisitor): anyvalue_attributes = anyvalue_attributes(RegExParser) def __init__(self): self.characters: Set[Character] = set() self.expression: AST def visitStart(self, ctx): self.expression = self.visitExpr(ctx.expr()) def visitExpr(self, ctx): # Binary operation: union or explicit concatenation if ctx.UNION() or ctx.CONCAT(): op = Bin.Union if ctx.UNION() is not None else Bin.Concat return BinOp(self.visitExpr(ctx.expr(0)), op, self.visitExpr(ctx.expr(1))) # Implicit concatenation of (iterated) symbols or expressions in parentheses expressions = list(map(lambda x: self.visitConcatenable(x), ctx.concatenated())) return self.implicit_concat(expressions) def visitConcatenable(self, ctx): if ctx.symbol(): return self.visitSymbol(ctx.symbol()) elif ctx.iterable(): if ctx.ITER(): return self.visitIterable(ctx.iterable()) elif ctx.POS_ITER(): return self.visitIterable(ctx.iterable(), True) elif ctx.parentheses(): return self.visitParentheses(ctx.parentheses()) def visitSymbol(self, ctx): if ctx.ALPHABET(): self.characters.add(Character(str(ctx.ALPHABET()))) return CharNode(Character(str(ctx.ALPHABET()))) elif ctx.EPSILON(): return CharNode(Eps()) elif ctx.EMPTYSET(): return CharNode(Emptyset()) elif ctx.QUOTE(): name = "" i = 0 while ctx.anyvalue(i) is not None: for attribute in self.anyvalue_attributes: value = getattr(ctx.anyvalue(i), attribute)() if value is not None: name += str(value) i += 1 self.characters.add(Character(name)) return CharNode(Character(name)) def visitParentheses(self, ctx): return self.visitExpr(ctx.expr()) def visitIterable(self, ctx, positive=False): if ctx.symbol(): expression = self.visitSymbol(ctx.symbol()) elif ctx.parentheses(): expression = self.visitParentheses(ctx.parentheses()) return IterOp(expression, Iter.Positive) if positive else IterOp(expression, Iter.Iteration) def implicit_concat(self, to_concat): ast = to_concat[0] if len(to_concat) > 1: for expression in to_concat[1:]: ast = BinOp(ast, Bin.Concat, expression) return ast # for future support of comments def exitComment(self, ctx) -> None: return None class CFGBuilder(CFGListener): anyvalue_attributes = anyvalue_attributes(CFGParser) def __init__(self): self.terminals = set() self.nonterminals = set() self.rules = dict() self.init = None def visitSymbol(self, ctx): if ctx.TERMINAL(): return str(ctx.TERMINAL()) elif ctx.CAPS(): return str(ctx.CAPS()) else: return '_' def visitNonterminal(self, ctx): if ctx.CAPS(): name = str(ctx.CAPS()) elif ctx.LEFT_ANGLE(): name = '<' + ''.join(map(lambda x: self.visitSymbol(x), ctx.symbol())) + '>' if ctx.APOSTROPHE(): name = name + len(ctx.APOSTROPHE())*"'" elif ctx.APOSTROPHE(): name = self.visitSymbol(ctx.symbol(0)) + len(ctx.APOSTROPHE())*"'" nonterminal = Nonterminal(name) self.nonterminals.add(nonterminal) return nonterminal def visitRewrite(self, ctx): i = 0 sequence = [] while ctx.term_or_nonterm(i) is not None: if ctx.term_or_nonterm(i).terminal(): term_ctx = ctx.term_or_nonterm(i).terminal() if term_ctx.TERMINAL(): name = str(term_ctx.TERMINAL()) elif term_ctx.QUOTE(): name = "" j = 0 while term_ctx.anyvalue(j) is not None: for attribute in self.anyvalue_attributes: value = getattr(term_ctx.anyvalue(j), attribute)() if value is not None: name += str(value) j += 1 name = name terminal = Terminal(name) self.terminals.add(terminal) sequence.append(terminal) else: sequence.append(self.visitNonterminal(ctx.term_or_nonterm(i).nonterminal())) i += 1 return sequence def exitOnerule(self, ctx): nonterminal = self.visitNonterminal(ctx.nonterminal()) self.nonterminals.add(nonterminal) if self.init == None: self.init = nonterminal # multiple lines for one nonterminal are possible this way if nonterminal not in self.rules: self.rules[nonterminal] = set() i = 0 while ctx.rewrite(i) is not None: if ctx.rewrite(i).EPSILON(): self.rules[nonterminal].add(Eps()) if ctx.rewrite(i).term_or_nonterm(): sequence = self.visitRewrite(ctx.rewrite(i)) self.rules[nonterminal].add(tuple(sequence)) i += 1 # for future support of comments def exitComment(self, ctx) -> None: return None lib/parsing/parser.pydeleted 100644 → 0 +0 −496 File deleted.Preview size limit exceeded, changes collapsed. Show changes Loading
lib/checker.py +1 −1 Original line number Original line Diff line number Diff line from typing import Tuple, Union from typing import Tuple, Union from lib import reg from lib import reg from lib.parsing.parser import Parser, ParsingError from lib.parsing import Parser, ParsingError # support functions common for both fja_checker and web_checker # support functions common for both fja_checker and web_checker Loading
lib/parsing/__init__.py +496 −0 Original line number Original line Diff line number Diff line from typing import List, Dict, Tuple, Optional, Union, Set, TypeVar from copy import deepcopy from lib.common import State, Character, Eps, Terminal, Nonterminal, Emptyset from lib.reg import DFA, NFA, RegGrammar from lib.grammars_cfg import CFG from lib.regex import RegEx, AST, Bin, Iter, BinOp, IterOp, CharNode import antlr4 # type: ignore from antlr4.error.ErrorListener import ErrorListener from lib.parsing.DFALexer import DFALexer from lib.parsing.DFAParser import DFAParser from lib.parsing.DFAListener import DFAListener from lib.parsing.NFALexer import NFALexer from lib.parsing.NFAParser import NFAParser from lib.parsing.NFAListener import NFAListener from lib.parsing.RegExLexer import RegExLexer from lib.parsing.RegExParser import RegExParser from lib.parsing.RegExVisitor import RegExVisitor from lib.parsing.CFGLexer import CFGLexer from lib.parsing.CFGParser import CFGParser from lib.parsing.CFGListener import CFGListener class ParsingError(Exception): def __init__(self, args): self.args = args # This is needed because antlr is too smart and parse at least something possible # even when input formalism and given type don't match. This way it aborts on any parsing problem. class ErrorShouter(ErrorListener): def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e): raise Exception("ERROR: when parsing line %d column %d: %s\n" % \ (line, column, msg)) def anyvalue_attributes(parser: Union[DFAParser, NFAParser, RegExParser, CFGParser]) -> List: return [func for func in dir(parser.AnyvalueContext) if callable(getattr(parser.AnyvalueContext, func)) and not func.startswith("__") and func.isupper()] class Parser: def __init__(self): pass def names_to_str(self, collection: Union[Set[State], Set[Character], Set[Terminal], Set[Nonterminal]]) -> str: return "{" + ','.join(set(map(lambda x: x.name, collection))) + "}" def reggrammar_to_str(self, reg: RegGrammar, full: bool = False) -> str: nonterminals = deepcopy(reg.nonterminals).difference({reg.init}) nonterminals = [reg.init] + list(nonterminals) rules = self.rules_to_str(reg.rules, nonterminals) if not full: return rules # full - verbose description of DFA - only for development, dismiss later nonterminals_names = self.names_to_str(reg.nonterminals) terminals = self.names_to_str(reg.terminals) return f"Grammar: ({nonterminals_names}, {terminals}, P, {reg.init.name})\n{self.rules_to_str(reg.rules, nonterminals)}" def cfg_to_str(self, gra: CFG, full: bool = False) -> str: nonterminals = deepcopy(gra.nonterminals).difference({gra.init}) nonterminals = [gra.init] + list(nonterminals) rules = self.rules_to_str(gra.rules, nonterminals) if not full: return rules # full - verbose description of DFA - only for development, dismiss later nonterminals_names = self.names_to_str(gra.nonterminals) terminals = self.names_to_str(gra.terminals) return f"Grammar: ({nonterminals_names}, {terminals}, P, {gra.init.name})\n{self.rules_to_str(gra.rules, nonterminals)}" def rules_to_str(self, rules: Union[CFG.Rules, RegGrammar.Rules], nonterminals: List[Nonterminal]) -> str: out = "" for nonterminal in nonterminals: if nonterminal not in rules: continue rewritten = ' | '.join(set(map(lambda x: self.rewrite_variant(x), rules[nonterminal]))) out += f"{nonterminal.name} -> {rewritten}\n" return out[:-1] def rewrite_variant(self, variant: Union[Eps, Terminal, Tuple[Union[Terminal, Nonterminal], ...]]) -> str: if isinstance(variant, Tuple): return ''.join(map(lambda x: x.name, variant)) return variant.name def dfa_to_str(self, dfa: DFA, full : bool = False) -> str: transition = "" for key, dest_state in dfa.transition.items(): state_1, character = key transition += f"({state_1.name},{character.name})={dest_state.name} " init = f"init={dfa.init.name}" final = f"final={self.names_to_str(dfa.final)}" # full - verbose description of DFA - only for development, dismiss later if full: return f"DFA = ({self.names_to_str(dfa.states)}, {self.names_to_str(dfa.characters)}, " \ f"d, {init}, {final})\n{transition}" return f"{init} {transition} {final}" def nfa_to_str(self, nfa: NFA, full : bool = False) -> str: transition = "" for key, set_states in nfa.transition.items(): state, character = key dest_states = nfa.transition[state, character] transition += f"({state.name},{character.name})={self.names_to_str(dest_states)} " init = f"init={nfa.init.name}" final = f"final={self.names_to_str(nfa.final)}" if full: return f"NFA = ({self.names_to_str(nfa.states)}, {self.names_to_str(nfa.characters)}, " \ f"d, {init}, {final})\n{transition}" return f"{init} {transition} {final}" def regex_to_str(self, reg: RegEx) -> str: return reg.expression.astprint() def common_parse(self, string: str, given_lexer, given_parser, given_builder): error_listener = ErrorShouter() chars = antlr4.InputStream(string) lexer = given_lexer(chars) lexer.addErrorListener(error_listener) tokens = antlr4.CommonTokenStream(lexer) parser = given_parser(tokens) parser.addErrorListener(error_listener) tree = parser.start() builder = given_builder() walker = antlr4.ParseTreeWalker() walker.walk(builder, tree) return builder def str_to_cfg(self, string: str) -> CFG: try: builder = self.common_parse(string, CFGLexer, CFGParser, CFGBuilder) return CFG(builder.nonterminals, builder.terminals, builder.rules, builder.init) except Exception as e: raise ParsingError(e.args) def str_to_reggrammar(self, string: str) -> RegGrammar: try: cfg = self.str_to_cfg(string) return RegGrammar.from_cfg(cfg) except Exception as e: raise ParsingError(e.args) def str_to_dfa(self, string: str) -> DFA: try: builder = self.common_parse(string, DFALexer, DFAParser, DFABuilder) if builder.init is None: builder.init = builder.first_state if builder.init is None: raise ParsingError("Automat musí obsahovat alespoň jeden stav.") dfa = DFA(builder.states, builder.characters, builder.transition, builder.init, builder.final) return dfa except Exception as e: raise ParsingError(e.args) def str_to_nfa(self, string: str) -> NFA: try: builder = self.common_parse(string, NFALexer, NFAParser, NFABuilder) if builder.init is None: builder.init = builder.first_state if builder.init is None: raise ParsingError("Automat musí obsahovat alespoň jeden stav.") return NFA(builder.states, builder.characters, builder.transition, builder.init, builder.final) except Exception as e: raise ParsingError(e.args) def str_to_regex(self, string: str) -> RegEx: try: error_listener = ErrorShouter() chars = antlr4.InputStream(string) lexer = RegExLexer(chars) lexer.addErrorListener(error_listener) tokens = antlr4.CommonTokenStream(lexer) parser = RegExParser(tokens) parser.addErrorListener(error_listener) tree = parser.start() ast = RegExBuilder() ast.visitStart(tree) return RegEx(ast.characters, ast.expression) except Exception as e: raise ParsingError(e.args) class DFABuilder(DFAListener): # anyvalue possibilities anyvalue_attributes = anyvalue_attributes(DFAParser) def __init__(self): self.states = set() self.characters = set() self.transition = {} self.init = None self.first_state = None self.final = set() def exitInit(self, ctx): if ctx.statename() is not None: state = State(self.visitStatename(ctx.statename())) self.init = state self.states.add(state) def visitStatename(self, ctx) -> str: if ctx.STATE(): return str(ctx.STATE()) elif ctx.QUOTE(): name = "" i = 0 while ctx.anyvalue(i) is not None: for attribute in self.anyvalue_attributes: value = getattr(ctx.anyvalue(i), attribute)() if value is not None: name += str(value) i += 1 return name def exitProduction(self, ctx): state = State(self.visitStatename(ctx.statename(0))) character = Character(self.visitStatename(ctx.statename(1))) dest_state = State(self.visitStatename(ctx.statename(2))) self.states.add(state) self.states.add(dest_state) self.characters.add(character) if (state, character) in self.transition: print(f"Upozornění: v textovém zápisu se objevilo více přechodů pro stejnou dvojici ({state.name}, {character.name}).") self.transition[state, character] = dest_state if self.first_state is None: self.first_state = state def exitFinal(self, ctx): i = 0 while ctx.statename(i) is not None: state = State(self.visitStatename(ctx.statename(i))) self.states.add(state) self.final.add(state) i += 1 # for future support of comments def exitComment(self, ctx): return None class NFABuilder(NFAListener): anyvalue_attributes = anyvalue_attributes(NFAParser) def __init__(self): self.states = set() self.characters = set() self.transition = {} self.init = None self.first_state = None self.final = set() self.efa = False def exitInit(self, ctx): if ctx.statename() is not None: state = State(self.visitStatename(ctx.statename())) self.init = state self.states.add(state) def visitStatename(self, ctx) -> str: if ctx.STATE(): return str(ctx.STATE()) elif ctx.QUOTE(): name = "" i = 0 while ctx.anyvalue(i) is not None: for attribute in self.anyvalue_attributes: value = getattr(ctx.anyvalue(i), attribute)() if value is not None: name += str(value) i += 1 return name def exitProduction(self, ctx): state = State(self.visitStatename(ctx.statename(0))) self.states.add(state) dest_states = set() i = 0 while ctx.stateset().statename(i) is not None: dest_state = State(self.visitStatename(ctx.stateset().statename(i))) self.states.add(dest_state) dest_states.add(dest_state) i += 1 if ctx.EPSILON(): if (state, Eps()) in self.transition: print(f"Upozornění: v textovém zápisu se objevilo více přechodů pro stejnou dvojici ({state.name}, ε).") self.transition[state, Eps()] = dest_states self.efa = True else: character = Character(self.visitStatename(ctx.statename(1))) self.characters.add(character) if (state, character) in self.transition: print(f"Upozornění: v textovém zápisu se objevilo více přechodů pro stejnou dvojici ({state.name}, {character.name}).") self.transition[state, character] = dest_states if self.first_state is None: self.first_state = state def exitFinal(self, ctx) -> None: i = 0 while ctx.stateset().statename(i) is not None: state = State(self.visitStatename(ctx.stateset().statename(i))) self.states.add(state) self.final.add(state) i += 1 # for future support of comments def exitComment(self, ctx) -> None: return None class RegExBuilder(RegExVisitor): anyvalue_attributes = anyvalue_attributes(RegExParser) def __init__(self): self.characters: Set[Character] = set() self.expression: AST def visitStart(self, ctx): self.expression = self.visitExpr(ctx.expr()) def visitExpr(self, ctx): # Binary operation: union or explicit concatenation if ctx.UNION() or ctx.CONCAT(): op = Bin.Union if ctx.UNION() is not None else Bin.Concat return BinOp(self.visitExpr(ctx.expr(0)), op, self.visitExpr(ctx.expr(1))) # Implicit concatenation of (iterated) symbols or expressions in parentheses expressions = list(map(lambda x: self.visitConcatenable(x), ctx.concatenated())) return self.implicit_concat(expressions) def visitConcatenable(self, ctx): if ctx.symbol(): return self.visitSymbol(ctx.symbol()) elif ctx.iterable(): if ctx.ITER(): return self.visitIterable(ctx.iterable()) elif ctx.POS_ITER(): return self.visitIterable(ctx.iterable(), True) elif ctx.parentheses(): return self.visitParentheses(ctx.parentheses()) def visitSymbol(self, ctx): if ctx.ALPHABET(): self.characters.add(Character(str(ctx.ALPHABET()))) return CharNode(Character(str(ctx.ALPHABET()))) elif ctx.EPSILON(): return CharNode(Eps()) elif ctx.EMPTYSET(): return CharNode(Emptyset()) elif ctx.QUOTE(): name = "" i = 0 while ctx.anyvalue(i) is not None: for attribute in self.anyvalue_attributes: value = getattr(ctx.anyvalue(i), attribute)() if value is not None: name += str(value) i += 1 self.characters.add(Character(name)) return CharNode(Character(name)) def visitParentheses(self, ctx): return self.visitExpr(ctx.expr()) def visitIterable(self, ctx, positive=False): if ctx.symbol(): expression = self.visitSymbol(ctx.symbol()) elif ctx.parentheses(): expression = self.visitParentheses(ctx.parentheses()) return IterOp(expression, Iter.Positive) if positive else IterOp(expression, Iter.Iteration) def implicit_concat(self, to_concat): ast = to_concat[0] if len(to_concat) > 1: for expression in to_concat[1:]: ast = BinOp(ast, Bin.Concat, expression) return ast # for future support of comments def exitComment(self, ctx) -> None: return None class CFGBuilder(CFGListener): anyvalue_attributes = anyvalue_attributes(CFGParser) def __init__(self): self.terminals = set() self.nonterminals = set() self.rules = dict() self.init = None def visitSymbol(self, ctx): if ctx.TERMINAL(): return str(ctx.TERMINAL()) elif ctx.CAPS(): return str(ctx.CAPS()) else: return '_' def visitNonterminal(self, ctx): if ctx.CAPS(): name = str(ctx.CAPS()) elif ctx.LEFT_ANGLE(): name = '<' + ''.join(map(lambda x: self.visitSymbol(x), ctx.symbol())) + '>' if ctx.APOSTROPHE(): name = name + len(ctx.APOSTROPHE())*"'" elif ctx.APOSTROPHE(): name = self.visitSymbol(ctx.symbol(0)) + len(ctx.APOSTROPHE())*"'" nonterminal = Nonterminal(name) self.nonterminals.add(nonterminal) return nonterminal def visitRewrite(self, ctx): i = 0 sequence = [] while ctx.term_or_nonterm(i) is not None: if ctx.term_or_nonterm(i).terminal(): term_ctx = ctx.term_or_nonterm(i).terminal() if term_ctx.TERMINAL(): name = str(term_ctx.TERMINAL()) elif term_ctx.QUOTE(): name = "" j = 0 while term_ctx.anyvalue(j) is not None: for attribute in self.anyvalue_attributes: value = getattr(term_ctx.anyvalue(j), attribute)() if value is not None: name += str(value) j += 1 name = name terminal = Terminal(name) self.terminals.add(terminal) sequence.append(terminal) else: sequence.append(self.visitNonterminal(ctx.term_or_nonterm(i).nonterminal())) i += 1 return sequence def exitOnerule(self, ctx): nonterminal = self.visitNonterminal(ctx.nonterminal()) self.nonterminals.add(nonterminal) if self.init == None: self.init = nonterminal # multiple lines for one nonterminal are possible this way if nonterminal not in self.rules: self.rules[nonterminal] = set() i = 0 while ctx.rewrite(i) is not None: if ctx.rewrite(i).EPSILON(): self.rules[nonterminal].add(Eps()) if ctx.rewrite(i).term_or_nonterm(): sequence = self.visitRewrite(ctx.rewrite(i)) self.rules[nonterminal].add(tuple(sequence)) i += 1 # for future support of comments def exitComment(self, ctx) -> None: return None
lib/parsing/parser.pydeleted 100644 → 0 +0 −496 File deleted.Preview size limit exceeded, changes collapsed. Show changes