Commit 61decabf authored by Vladimír Štill's avatar Vladimír Štill
Browse files

lib: Polishing parser

parent 44bd0954
Pipeline #95125 passed with stage
in 1 minute and 25 seconds
from typing import List, Dict, Tuple, Optional, Union, Set, TypeVar
from typing import List, Tuple, Union, Set
from copy import deepcopy
from lib.common import State, Character, Eps, Terminal, Nonterminal, Emptyset
from lib.reg import DFA, NFA, RegGrammar
from lib.grammars_cfg import CFG
from lib.regex import RegEx, AST, Bin, Iter, BinOp, IterOp, CharNode
import antlr4 # type: ignore
import antlr4 # type: ignore
from antlr4.error.ErrorListener import ErrorListener
from lib.parser.DFALexer import DFALexer
from lib.parser.DFAParser import DFAParser
......@@ -19,20 +19,23 @@ from lib.parser.CFGLexer import CFGLexer
from lib.parser.CFGParser import CFGParser
from lib.parser.CFGListener import CFGListener
class ParsingError(Exception):
def __init__(self, args):
self.args = args
# This is needed because antlr is too smart and parse at least something possible
# even when input formalism and given type don't match. This way it aborts on any parsing problem.
# This is needed because antlr is too smart and parse at least something
# possible even when input formalism and given type don't match. This way it
# aborts on any parsing problem.
class ErrorShouter(ErrorListener):
def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e):
raise Exception("ERROR: when parsing line %d column %d: %s\n" % \
(line, column, msg))
raise Exception(
f"ERROR: when parsing line {line} column {column}: {msg}")
def _anyvalue_attributes(parser: Union[DFAParser, NFAParser, RegExParser, CFGParser]) -> List:
def _anyvalue_attributes(
parser: Union[DFAParser, NFAParser, RegExParser, CFGParser]) -> List:
return [func for func in dir(parser.AnyvalueContext)
if callable(getattr(parser.AnyvalueContext, func))
and not func.startswith("__") and func.isupper()]
......@@ -49,7 +52,9 @@ def _rules_to_str(rules: Union[CFG.Rules, RegGrammar.Rules],
for nonterminal in nonterminals:
if nonterminal not in rules:
continue
rewritten = ' | '.join(set(map(lambda x: _rewrite_variant(x), rules[nonterminal])))
rewritten = ' | '.join(set(map(lambda x:
_rewrite_variant(x),
rules[nonterminal])))
out += f"{nonterminal.name} -> {rewritten}\n"
return out[:-1]
......@@ -60,7 +65,8 @@ def _rewrite_variant(variant: Union[Eps, Terminal,
return ''.join(map(lambda x: x.name, variant))
return variant.name
def dfa_to_str(dfa: DFA, full : bool = False) -> str:
def dfa_to_str(dfa: DFA, full: bool = False) -> str:
transition = ""
for key, dest_state in dfa.transition.items():
state_1, character = key
......@@ -71,7 +77,8 @@ def dfa_to_str(dfa: DFA, full : bool = False) -> str:
# full - verbose description of DFA - only for development, dismiss later
if full:
return f"DFA = ({_names_to_str(dfa.states)}, {_names_to_str(dfa.characters)}, " \
return f"DFA = ({_names_to_str(dfa.states)}, " \
f"{_names_to_str(dfa.characters)}, " \
f"d, {init}, {final})\n{transition}"
return f"{init} {transition} {final}"
......@@ -86,7 +93,9 @@ def reggrammar_to_str(reg: RegGrammar, full: bool = False) -> str:
# full - verbose description of DFA - only for development, dismiss later
nonterminals_names = _names_to_str(reg.nonterminals)
terminals = _names_to_str(reg.terminals)
return f"Grammar: ({nonterminals_names}, {terminals}, P, {reg.init.name})\n{_rules_to_str(reg.rules, nonterminals)}"
return f"Grammar: ({nonterminals_names}, {terminals}, P, " \
f"{reg.init.name})\n{_rules_to_str(reg.rules, nonterminals)}"
def cfg_to_str(gra: CFG, full: bool = False) -> str:
nonterminals = deepcopy(gra.nonterminals).difference({gra.init})
......@@ -98,26 +107,32 @@ def cfg_to_str(gra: CFG, full: bool = False) -> str:
# full - verbose description of DFA - only for development, dismiss later
nonterminals_names = _names_to_str(gra.nonterminals)
terminals = _names_to_str(gra.terminals)
return f"Grammar: ({nonterminals_names}, {terminals}, P, {gra.init.name})\n{_rules_to_str(gra.rules, nonterminals)}"
return f"Grammar: ({nonterminals_names}, {terminals}, P, " \
f"{gra.init.name})\n{_rules_to_str(gra.rules, nonterminals)}"
def nfa_to_str(nfa: NFA, full : bool = False) -> str:
def nfa_to_str(nfa: NFA, full: bool = False) -> str:
transition = ""
for key, set_states in nfa.transition.items():
state, character = key
dest_states = nfa.transition[state, character]
transition += f"({state.name},{character.name})={_names_to_str(dest_states)} "
transition += f"({state.name},{character.name})=" \
f"{_names_to_str(dest_states)} "
init = f"init={nfa.init.name}"
final = f"final={_names_to_str(nfa.final)}"
if full:
return f"NFA = ({_names_to_str(nfa.states)}, {_names_to_str(nfa.characters)}, " \
return f"NFA = ({_names_to_str(nfa.states)}, " \
f"{_names_to_str(nfa.characters)}, " \
f"d, {init}, {final})\n{transition}"
return f"{init} {transition} {final}"
def regex_to_str(reg: RegEx) -> str:
return reg.expression.astprint()
def _common_parse(string: str, given_lexer, given_parser, given_builder):
error_listener = ErrorShouter()
chars = antlr4.InputStream(string)
......@@ -137,7 +152,8 @@ def _common_parse(string: str, given_lexer, given_parser, given_builder):
def cfg(string: str) -> CFG:
try:
builder = _common_parse(string, CFGLexer, CFGParser, CFGBuilder)
return CFG(builder.nonterminals, builder.terminals, builder.rules, builder.init)
return CFG(builder.nonterminals, builder.terminals, builder.rules,
builder.init)
except Exception as e:
raise ParsingError(e.args)
......@@ -159,9 +175,11 @@ def dfa(string: str) -> DFA:
if builder.init is None:
builder.init = builder.first_state
if builder.init is None:
raise ParsingError("Automat musí obsahovat alespoň jeden stav.")
raise ParsingError(
"Automat musí obsahovat alespoň jeden stav.")
dfa = DFA(builder.states, builder.characters, builder.transition, builder.init, builder.final)
dfa = DFA(builder.states, builder.characters, builder.transition,
builder.init, builder.final)
return dfa
except Exception as e:
......@@ -175,9 +193,11 @@ def nfa(string: str) -> NFA:
if builder.init is None:
builder.init = builder.first_state
if builder.init is None:
raise ParsingError("Automat musí obsahovat alespoň jeden stav.")
raise ParsingError(
"Automat musí obsahovat alespoň jeden stav.")
return NFA(builder.states, builder.characters, builder.transition, builder.init, builder.final)
return NFA(builder.states, builder.characters, builder.transition,
builder.init, builder.final)
except Exception as e:
raise ParsingError(e.args)
......@@ -252,7 +272,8 @@ class DFABuilder(DFAListener, StateVisitor):
self.characters.add(character)
if (state, character) in self.transition:
print(f"Upozornění: v textovém zápisu se objevilo více přechodů pro stejnou dvojici ({state.name}, {character.name}).")
print("Upozornění: v textovém zápisu se objevilo více přechodů "
f"pro stejnou dvojici ({state.name}, {character.name}).")
self.transition[state, character] = dest_state
if self.first_state is None:
......@@ -295,21 +316,25 @@ class NFABuilder(NFAListener, StateVisitor):
dest_states = set()
i = 0
while ctx.stateset().statename(i) is not None:
dest_state = State(self.visitStatename(ctx.stateset().statename(i)))
dest_state = State(
self.visitStatename(ctx.stateset().statename(i)))
self.states.add(dest_state)
dest_states.add(dest_state)
i += 1
if ctx.EPSILON():
if (state, Eps()) in self.transition:
print(f"Upozornění: v textovém zápisu se objevilo více přechodů pro stejnou dvojici ({state.name}, ε).")
print("Upozornění: v textovém zápisu se objevilo více "
f"přechodů pro stejnou dvojici ({state.name}, ε).")
self.transition[state, Eps()] = dest_states
self.efa = True
else:
character = Character(self.visitStatename(ctx.statename(1)))
self.characters.add(character)
if (state, character) in self.transition:
print(f"Upozornění: v textovém zápisu se objevilo více přechodů pro stejnou dvojici ({state.name}, {character.name}).")
print("Upozornění: v textovém zápisu se objevilo více "
f"přechodů pro stejnou dvojici ({state.name}, "
f"{character.name}).")
self.transition[state, character] = dest_states
if self.first_state is None:
......@@ -342,10 +367,13 @@ class RegExBuilder(RegExVisitor):
# Binary operation: union or explicit concatenation
if ctx.UNION() or ctx.CONCAT():
op = Bin.Union if ctx.UNION() is not None else Bin.Concat
return BinOp(self.visitExpr(ctx.expr(0)), op, self.visitExpr(ctx.expr(1)))
return BinOp(self.visitExpr(ctx.expr(0)), op,
self.visitExpr(ctx.expr(1)))
# Implicit concatenation of (iterated) symbols or expressions in parentheses
expressions = list(map(lambda x: self.visitConcatenable(x), ctx.concatenated()))
# Implicit concatenation of (iterated) symbols or expressions in
# parentheses
expressions = list(map(lambda x:
self.visitConcatenable(x), ctx.concatenated()))
return self.implicit_concat(expressions)
def visitConcatenable(self, ctx):
......@@ -396,7 +424,8 @@ class RegExBuilder(RegExVisitor):
elif ctx.parentheses():
expression = self.visitParentheses(ctx.parentheses())
return IterOp(expression, Iter.Positive) if positive else IterOp(expression, Iter.Iteration)
return IterOp(expression, Iter.Positive) if positive \
else IterOp(expression, Iter.Iteration)
def implicit_concat(self, to_concat):
ast = to_concat[0]
......@@ -405,11 +434,11 @@ class RegExBuilder(RegExVisitor):
ast = BinOp(ast, Bin.Concat, expression)
return ast
# for future support of comments
def exitComment(self, ctx) -> None:
return None
class CFGBuilder(CFGListener):
anyvalue_attributes = _anyvalue_attributes(CFGParser)
......@@ -431,11 +460,13 @@ class CFGBuilder(CFGListener):
if ctx.CAPS():
name = str(ctx.CAPS())
elif ctx.LEFT_ANGLE():
name = '<' + ''.join(map(lambda x: self.visitSymbol(x), ctx.symbol())) + '>'
name = '<' + ''.join(map(lambda x: self.visitSymbol(x),
ctx.symbol())) + '>'
if ctx.APOSTROPHE():
name = name + len(ctx.APOSTROPHE())*"'"
name = name + len(ctx.APOSTROPHE()) * "'"
elif ctx.APOSTROPHE():
name = self.visitSymbol(ctx.symbol(0)) + len(ctx.APOSTROPHE())*"'"
name = self.visitSymbol(ctx.symbol(0)) \
+ len(ctx.APOSTROPHE()) * "'"
nonterminal = Nonterminal(name)
self.nonterminals.add(nonterminal)
......@@ -465,7 +496,8 @@ class CFGBuilder(CFGListener):
sequence.append(terminal)
else:
sequence.append(self.visitNonterminal(ctx.term_or_nonterm(i).nonterminal()))
sequence.append(self.visitNonterminal(
ctx.term_or_nonterm(i).nonterminal()))
i += 1
return sequence
......@@ -473,7 +505,7 @@ class CFGBuilder(CFGListener):
def exitOnerule(self, ctx):
nonterminal = self.visitNonterminal(ctx.nonterminal())
self.nonterminals.add(nonterminal)
if self.init == None:
if self.init is None:
self.init = nonterminal
# multiple lines for one nonterminal are possible this way
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment