Commit 1feddd15 authored by Vladimír Štill's avatar Vladimír Štill
Browse files

lib: Drop .parser for the parser imports

parent fa0c432b
from typing import Tuple, Union
from lib import reg
from lib.parsing.parser import Parser, ParsingError
from lib.parsing import Parser, ParsingError
# support functions common for both fja_checker and web_checker
......
from typing import List, Dict, Tuple, Optional, Union, Set, TypeVar
from copy import deepcopy
from lib.common import State, Character, Eps, Terminal, Nonterminal, Emptyset
from lib.reg import DFA, NFA, RegGrammar
from lib.grammars_cfg import CFG
from lib.regex import RegEx, AST, Bin, Iter, BinOp, IterOp, CharNode
import antlr4 # type: ignore
from antlr4.error.ErrorListener import ErrorListener
from lib.parsing.DFALexer import DFALexer
from lib.parsing.DFAParser import DFAParser
from lib.parsing.DFAListener import DFAListener
from lib.parsing.NFALexer import NFALexer
from lib.parsing.NFAParser import NFAParser
from lib.parsing.NFAListener import NFAListener
from lib.parsing.RegExLexer import RegExLexer
from lib.parsing.RegExParser import RegExParser
from lib.parsing.RegExVisitor import RegExVisitor
from lib.parsing.CFGLexer import CFGLexer
from lib.parsing.CFGParser import CFGParser
from lib.parsing.CFGListener import CFGListener
class ParsingError(Exception):
def __init__(self, args):
self.args = args
# This is needed because antlr is too smart and parse at least something possible
# even when input formalism and given type don't match. This way it aborts on any parsing problem.
class ErrorShouter(ErrorListener):
def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e):
raise Exception("ERROR: when parsing line %d column %d: %s\n" % \
(line, column, msg))
def anyvalue_attributes(parser: Union[DFAParser, NFAParser, RegExParser, CFGParser]) -> List:
return [func for func in dir(parser.AnyvalueContext)
if callable(getattr(parser.AnyvalueContext, func))
and not func.startswith("__") and func.isupper()]
class Parser:
def __init__(self):
pass
def names_to_str(self, collection: Union[Set[State], Set[Character], Set[Terminal], Set[Nonterminal]]) -> str:
return "{" + ','.join(set(map(lambda x: x.name, collection))) + "}"
def reggrammar_to_str(self, reg: RegGrammar, full: bool = False) -> str:
nonterminals = deepcopy(reg.nonterminals).difference({reg.init})
nonterminals = [reg.init] + list(nonterminals)
rules = self.rules_to_str(reg.rules, nonterminals)
if not full:
return rules
# full - verbose description of DFA - only for development, dismiss later
nonterminals_names = self.names_to_str(reg.nonterminals)
terminals = self.names_to_str(reg.terminals)
return f"Grammar: ({nonterminals_names}, {terminals}, P, {reg.init.name})\n{self.rules_to_str(reg.rules, nonterminals)}"
def cfg_to_str(self, gra: CFG, full: bool = False) -> str:
nonterminals = deepcopy(gra.nonterminals).difference({gra.init})
nonterminals = [gra.init] + list(nonterminals)
rules = self.rules_to_str(gra.rules, nonterminals)
if not full:
return rules
# full - verbose description of DFA - only for development, dismiss later
nonterminals_names = self.names_to_str(gra.nonterminals)
terminals = self.names_to_str(gra.terminals)
return f"Grammar: ({nonterminals_names}, {terminals}, P, {gra.init.name})\n{self.rules_to_str(gra.rules, nonterminals)}"
def rules_to_str(self, rules: Union[CFG.Rules, RegGrammar.Rules], nonterminals: List[Nonterminal]) -> str:
out = ""
for nonterminal in nonterminals:
if nonterminal not in rules:
continue
rewritten = ' | '.join(set(map(lambda x: self.rewrite_variant(x), rules[nonterminal])))
out += f"{nonterminal.name} -> {rewritten}\n"
return out[:-1]
def rewrite_variant(self, variant: Union[Eps, Terminal, Tuple[Union[Terminal, Nonterminal], ...]]) -> str:
if isinstance(variant, Tuple):
return ''.join(map(lambda x: x.name, variant))
return variant.name
def dfa_to_str(self, dfa: DFA, full : bool = False) -> str:
transition = ""
for key, dest_state in dfa.transition.items():
state_1, character = key
transition += f"({state_1.name},{character.name})={dest_state.name} "
init = f"init={dfa.init.name}"
final = f"final={self.names_to_str(dfa.final)}"
# full - verbose description of DFA - only for development, dismiss later
if full:
return f"DFA = ({self.names_to_str(dfa.states)}, {self.names_to_str(dfa.characters)}, " \
f"d, {init}, {final})\n{transition}"
return f"{init} {transition} {final}"
def nfa_to_str(self, nfa: NFA, full : bool = False) -> str:
transition = ""
for key, set_states in nfa.transition.items():
state, character = key
dest_states = nfa.transition[state, character]
transition += f"({state.name},{character.name})={self.names_to_str(dest_states)} "
init = f"init={nfa.init.name}"
final = f"final={self.names_to_str(nfa.final)}"
if full:
return f"NFA = ({self.names_to_str(nfa.states)}, {self.names_to_str(nfa.characters)}, " \
f"d, {init}, {final})\n{transition}"
return f"{init} {transition} {final}"
def regex_to_str(self, reg: RegEx) -> str:
return reg.expression.astprint()
def common_parse(self, string: str, given_lexer, given_parser, given_builder):
error_listener = ErrorShouter()
chars = antlr4.InputStream(string)
lexer = given_lexer(chars)
lexer.addErrorListener(error_listener)
tokens = antlr4.CommonTokenStream(lexer)
parser = given_parser(tokens)
parser.addErrorListener(error_listener)
tree = parser.start()
builder = given_builder()
walker = antlr4.ParseTreeWalker()
walker.walk(builder, tree)
return builder
def str_to_cfg(self, string: str) -> CFG:
try:
builder = self.common_parse(string, CFGLexer, CFGParser, CFGBuilder)
return CFG(builder.nonterminals, builder.terminals, builder.rules, builder.init)
except Exception as e:
raise ParsingError(e.args)
def str_to_reggrammar(self, string: str) -> RegGrammar:
try:
cfg = self.str_to_cfg(string)
return RegGrammar.from_cfg(cfg)
except Exception as e:
raise ParsingError(e.args)
def str_to_dfa(self, string: str) -> DFA:
try:
builder = self.common_parse(string, DFALexer, DFAParser, DFABuilder)
if builder.init is None:
builder.init = builder.first_state
if builder.init is None:
raise ParsingError("Automat musí obsahovat alespoň jeden stav.")
dfa = DFA(builder.states, builder.characters, builder.transition, builder.init, builder.final)
return dfa
except Exception as e:
raise ParsingError(e.args)
def str_to_nfa(self, string: str) -> NFA:
try:
builder = self.common_parse(string, NFALexer, NFAParser, NFABuilder)
if builder.init is None:
builder.init = builder.first_state
if builder.init is None:
raise ParsingError("Automat musí obsahovat alespoň jeden stav.")
return NFA(builder.states, builder.characters, builder.transition, builder.init, builder.final)
except Exception as e:
raise ParsingError(e.args)
def str_to_regex(self, string: str) -> RegEx:
try:
error_listener = ErrorShouter()
chars = antlr4.InputStream(string)
lexer = RegExLexer(chars)
lexer.addErrorListener(error_listener)
tokens = antlr4.CommonTokenStream(lexer)
parser = RegExParser(tokens)
parser.addErrorListener(error_listener)
tree = parser.start()
ast = RegExBuilder()
ast.visitStart(tree)
return RegEx(ast.characters, ast.expression)
except Exception as e:
raise ParsingError(e.args)
class DFABuilder(DFAListener):
# anyvalue possibilities
anyvalue_attributes = anyvalue_attributes(DFAParser)
def __init__(self):
self.states = set()
self.characters = set()
self.transition = {}
self.init = None
self.first_state = None
self.final = set()
def exitInit(self, ctx):
if ctx.statename() is not None:
state = State(self.visitStatename(ctx.statename()))
self.init = state
self.states.add(state)
def visitStatename(self, ctx) -> str:
if ctx.STATE():
return str(ctx.STATE())
elif ctx.QUOTE():
name = ""
i = 0
while ctx.anyvalue(i) is not None:
for attribute in self.anyvalue_attributes:
value = getattr(ctx.anyvalue(i), attribute)()
if value is not None:
name += str(value)
i += 1
return name
def exitProduction(self, ctx):
state = State(self.visitStatename(ctx.statename(0)))
character = Character(self.visitStatename(ctx.statename(1)))
dest_state = State(self.visitStatename(ctx.statename(2)))
self.states.add(state)
self.states.add(dest_state)
self.characters.add(character)
if (state, character) in self.transition:
print(f"Upozornění: v textovém zápisu se objevilo více přechodů pro stejnou dvojici ({state.name}, {character.name}).")
self.transition[state, character] = dest_state
if self.first_state is None:
self.first_state = state
def exitFinal(self, ctx):
i = 0
while ctx.statename(i) is not None:
state = State(self.visitStatename(ctx.statename(i)))
self.states.add(state)
self.final.add(state)
i += 1
# for future support of comments
def exitComment(self, ctx):
return None
class NFABuilder(NFAListener):
anyvalue_attributes = anyvalue_attributes(NFAParser)
def __init__(self):
self.states = set()
self.characters = set()
self.transition = {}
self.init = None
self.first_state = None
self.final = set()
self.efa = False
def exitInit(self, ctx):
if ctx.statename() is not None:
state = State(self.visitStatename(ctx.statename()))
self.init = state
self.states.add(state)
def visitStatename(self, ctx) -> str:
if ctx.STATE():
return str(ctx.STATE())
elif ctx.QUOTE():
name = ""
i = 0
while ctx.anyvalue(i) is not None:
for attribute in self.anyvalue_attributes:
value = getattr(ctx.anyvalue(i), attribute)()
if value is not None:
name += str(value)
i += 1
return name
def exitProduction(self, ctx):
state = State(self.visitStatename(ctx.statename(0)))
self.states.add(state)
dest_states = set()
i = 0
while ctx.stateset().statename(i) is not None:
dest_state = State(self.visitStatename(ctx.stateset().statename(i)))
self.states.add(dest_state)
dest_states.add(dest_state)
i += 1
if ctx.EPSILON():
if (state, Eps()) in self.transition:
print(f"Upozornění: v textovém zápisu se objevilo více přechodů pro stejnou dvojici ({state.name}, ε).")
self.transition[state, Eps()] = dest_states
self.efa = True
else:
character = Character(self.visitStatename(ctx.statename(1)))
self.characters.add(character)
if (state, character) in self.transition:
print(f"Upozornění: v textovém zápisu se objevilo více přechodů pro stejnou dvojici ({state.name}, {character.name}).")
self.transition[state, character] = dest_states
if self.first_state is None:
self.first_state = state
def exitFinal(self, ctx) -> None:
i = 0
while ctx.stateset().statename(i) is not None:
state = State(self.visitStatename(ctx.stateset().statename(i)))
self.states.add(state)
self.final.add(state)
i += 1
# for future support of comments
def exitComment(self, ctx) -> None:
return None
class RegExBuilder(RegExVisitor):
anyvalue_attributes = anyvalue_attributes(RegExParser)
def __init__(self):
self.characters: Set[Character] = set()
self.expression: AST
def visitStart(self, ctx):
self.expression = self.visitExpr(ctx.expr())
def visitExpr(self, ctx):
# Binary operation: union or explicit concatenation
if ctx.UNION() or ctx.CONCAT():
op = Bin.Union if ctx.UNION() is not None else Bin.Concat
return BinOp(self.visitExpr(ctx.expr(0)), op, self.visitExpr(ctx.expr(1)))
# Implicit concatenation of (iterated) symbols or expressions in parentheses
expressions = list(map(lambda x: self.visitConcatenable(x), ctx.concatenated()))
return self.implicit_concat(expressions)
def visitConcatenable(self, ctx):
if ctx.symbol():
return self.visitSymbol(ctx.symbol())
elif ctx.iterable():
if ctx.ITER():
return self.visitIterable(ctx.iterable())
elif ctx.POS_ITER():
return self.visitIterable(ctx.iterable(), True)
elif ctx.parentheses():
return self.visitParentheses(ctx.parentheses())
def visitSymbol(self, ctx):
if ctx.ALPHABET():
self.characters.add(Character(str(ctx.ALPHABET())))
return CharNode(Character(str(ctx.ALPHABET())))
elif ctx.EPSILON():
return CharNode(Eps())
elif ctx.EMPTYSET():
return CharNode(Emptyset())
elif ctx.QUOTE():
name = ""
i = 0
while ctx.anyvalue(i) is not None:
for attribute in self.anyvalue_attributes:
value = getattr(ctx.anyvalue(i), attribute)()
if value is not None:
name += str(value)
i += 1
self.characters.add(Character(name))
return CharNode(Character(name))
def visitParentheses(self, ctx):
return self.visitExpr(ctx.expr())
def visitIterable(self, ctx, positive=False):
if ctx.symbol():
expression = self.visitSymbol(ctx.symbol())
elif ctx.parentheses():
expression = self.visitParentheses(ctx.parentheses())
return IterOp(expression, Iter.Positive) if positive else IterOp(expression, Iter.Iteration)
def implicit_concat(self, to_concat):
ast = to_concat[0]
if len(to_concat) > 1:
for expression in to_concat[1:]:
ast = BinOp(ast, Bin.Concat, expression)
return ast
# for future support of comments
def exitComment(self, ctx) -> None:
return None
class CFGBuilder(CFGListener):
anyvalue_attributes = anyvalue_attributes(CFGParser)
def __init__(self):
self.terminals = set()
self.nonterminals = set()
self.rules = dict()
self.init = None
def visitSymbol(self, ctx):
if ctx.TERMINAL():
return str(ctx.TERMINAL())
elif ctx.CAPS():
return str(ctx.CAPS())
else:
return '_'
def visitNonterminal(self, ctx):
if ctx.CAPS():
name = str(ctx.CAPS())
elif ctx.LEFT_ANGLE():
name = '<' + ''.join(map(lambda x: self.visitSymbol(x), ctx.symbol())) + '>'
if ctx.APOSTROPHE():
name = name + len(ctx.APOSTROPHE())*"'"
elif ctx.APOSTROPHE():
name = self.visitSymbol(ctx.symbol(0)) + len(ctx.APOSTROPHE())*"'"
nonterminal = Nonterminal(name)
self.nonterminals.add(nonterminal)
return nonterminal
def visitRewrite(self, ctx):
i = 0
sequence = []
while ctx.term_or_nonterm(i) is not None:
if ctx.term_or_nonterm(i).terminal():
term_ctx = ctx.term_or_nonterm(i).terminal()
if term_ctx.TERMINAL():
name = str(term_ctx.TERMINAL())
elif term_ctx.QUOTE():
name = ""
j = 0
while term_ctx.anyvalue(j) is not None:
for attribute in self.anyvalue_attributes:
value = getattr(term_ctx.anyvalue(j), attribute)()
if value is not None:
name += str(value)
j += 1
name = name
terminal = Terminal(name)
self.terminals.add(terminal)
sequence.append(terminal)
else:
sequence.append(self.visitNonterminal(ctx.term_or_nonterm(i).nonterminal()))
i += 1
return sequence
def exitOnerule(self, ctx):
nonterminal = self.visitNonterminal(ctx.nonterminal())
self.nonterminals.add(nonterminal)
if self.init == None:
self.init = nonterminal
# multiple lines for one nonterminal are possible this way
if nonterminal not in self.rules:
self.rules[nonterminal] = set()
i = 0
while ctx.rewrite(i) is not None:
if ctx.rewrite(i).EPSILON():
self.rules[nonterminal].add(Eps())
if ctx.rewrite(i).term_or_nonterm():
sequence = self.visitRewrite(ctx.rewrite(i))
self.rules[nonterminal].add(tuple(sequence))
i += 1
# for future support of comments
def exitComment(self, ctx) -> None:
return None
from typing import List, Dict, Tuple, Optional, Union, Set, TypeVar
from copy import deepcopy
from lib.common import State, Character, Eps, Terminal, Nonterminal, Emptyset
from lib.reg import DFA, NFA, RegGrammar
from lib.grammars_cfg import CFG
from lib.regex import RegEx, AST, Bin, Iter, BinOp, IterOp, CharNode
import antlr4 # type: ignore
from antlr4.error.ErrorListener import ErrorListener
from lib.parsing.DFALexer import DFALexer
from lib.parsing.DFAParser import DFAParser
from lib.parsing.DFAListener import DFAListener
from lib.parsing.NFALexer import NFALexer
from lib.parsing.NFAParser import NFAParser
from lib.parsing.NFAListener import NFAListener
from lib.parsing.RegExLexer import RegExLexer
from lib.parsing.RegExParser import RegExParser
from lib.parsing.RegExVisitor import RegExVisitor
from lib.parsing.CFGLexer import CFGLexer
from lib.parsing.CFGParser import CFGParser
from lib.parsing.CFGListener import CFGListener
class ParsingError(Exception):
def __init__(self, args):
self.args = args
# This is needed because antlr is too smart and parse at least something possible
# even when input formalism and given type don't match. This way it aborts on any parsing problem.
class ErrorShouter(ErrorListener):
def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e):
raise Exception("ERROR: when parsing line %d column %d: %s\n" % \
(line, column, msg))
def anyvalue_attributes(parser: Union[DFAParser, NFAParser, RegExParser, CFGParser]) -> List:
return [func for func in dir(parser.AnyvalueContext)
if callable(getattr(parser.AnyvalueContext, func))
and not func.startswith("__") and func.isupper()]
class Parser:
def __init__(self):
pass
def names_to_str(self, collection: Union[Set[State], Set[Character], Set[Terminal], Set[Nonterminal]]) -> str:
return "{" + ','.join(set(map(lambda x: x.name, collection))) + "}"
def reggrammar_to_str(self, reg: RegGrammar, full: bool = False) -> str:
nonterminals = deepcopy(reg.nonterminals).difference({reg.init})
nonterminals = [reg.init] + list(nonterminals)
rules = self.rules_to_str(reg.rules, nonterminals)
if not full:
return rules
# full - verbose description of DFA - only for development, dismiss later
nonterminals_names = self.names_to_str(reg.nonterminals)
terminals = self.names_to_str(reg.terminals)
return f"Grammar: ({nonterminals_names}, {terminals}, P, {reg.init.name})\n{self.rules_to_str(reg.rules, nonterminals)}"
def cfg_to_str(self, gra: CFG, full: bool = False) -> str:
nonterminals = deepcopy(gra.nonterminals).difference({gra.init})
nonterminals = [gra.init] + list(nonterminals)
rules = self.rules_to_str(gra.rules, nonterminals)
if not full:
return rules
# full - verbose description of DFA - only for development, dismiss later
nonterminals_names = self.names_to_str(gra.nonterminals)
terminals = self.names_to_str(gra.terminals)
return f"Grammar: ({nonterminals_names}, {terminals}, P, {gra.init.name})\n{self.rules_to_str(gra.rules, nonterminals)}"
def rules_to_str(self, rules: Union[CFG.Rules, RegGrammar.Rules], nonterminals: List[Nonterminal]) -> str:
out = ""
for nonterminal in nonterminals:
if nonterminal not in rules:
continue
rewritten = ' | '.join(set(map(lambda x: self.rewrite_variant(x), rules[nonterminal])))
out += f"{nonterminal.name} -> {rewritten}\n"
return out[:-1]
def rewrite_variant(self, variant: Union[Eps, Terminal, Tuple[Union[Terminal, Nonterminal], ...]]) -> str:
if isinstance(variant, Tuple):
return ''.join(map(lambda x: x.name, variant))
return variant.name
def dfa_to_str(self, dfa: DFA, full : bool = False) -> str: