Commit e1e7942d authored by Vladimír Štill's avatar Vladimír Štill
Browse files

lib: Split-off regular grammar parser

in preparation for ANTLR-based valiadators, #9
parent 125cfec7
Pipeline #95269 failed with stage
in 19 seconds
......@@ -21,7 +21,7 @@ class RegGrammar:
self.terminals = terminals
self.rules = rules
self.init = init
assert self.check()
self.check()
@staticmethod
def from_cfg(cfg: CFG) -> RegGrammar:
......@@ -38,23 +38,31 @@ class RegGrammar:
return RegGrammar(cfg.nonterminals, cfg.terminals, reg_rules, cfg.init)
# exception
# unused formal requirements check, regular grammars specific (rule variants)
def check(self) -> bool:
def check(self) -> None:
assert len(self.nonterminals) > 0, "empty grammar"
has_eps_start = False
has_start_loop = False
for nonterminal in self.rules:
assert nonterminal in self.nonterminals, "unknown nonterminal " + nonterminal.name
assert nonterminal in self.nonterminals, \
f"unknown nonterminal {nonterminal.name}"
for rule in self.rules[nonterminal]:
if isinstance(rule, Terminal):
if isinstance(rule, Eps):
assert nonterminal == self.init, \
"ε can only appear for initial nonterminal, " \
f"appears for {nonterminal}"
has_eps_start = True
elif isinstance(rule, Terminal):
assert rule in self.terminals, "unknown terminal " + rule.name
elif not isinstance(rule, Eps):
else:
assert rule[0] in self.terminals, "unknown terminal " + rule[0].name
assert rule[1] in self.nonterminals, "unknown nonterminal " + rule[1].name
has_start_loop |= rule[1] == self.init
assert self.init in self.nonterminals, "init not in nonterminals"
return True
assert not has_eps_start or not has_start_loop, \
"since ε is present the start nonterminal must not appear on any " \
"right-hand side of a rule"
def reggrammar_to_nfa(self) -> NFA:
states: Set[State] = set()
......
......@@ -4,15 +4,15 @@ grammar CFG;
start: (onerule NEWLINE+)* onerule (NEWLINE+ | ) comment;
onerule: nonterminal ARROW (rewrite DELIMITER)* rewrite;
onerule: nonterm ARROW (rewrite DELIMITER)* rewrite;
rewrite: (term_or_nonterm+ | EPSILON);
term_or_nonterm: (terminal | nonterminal);
term_or_nonterm: (term | nonterm);
terminal: (TERMINAL | QUOTE anyvalue+ QUOTE);
term: (TERMINAL | QUOTE anyvalue+ QUOTE);
nonterminal: (CAPS | (LEFT_ANGLE symbol+ RIGHT_ANGLE (APOSTROPHE*)) | (symbol APOSTROPHE+));
nonterm: (CAPS | (LEFT_ANGLE symbol+ RIGHT_ANGLE (APOSTROPHE*)) | (symbol APOSTROPHE+));
symbol: (TERMINAL | CAPS | UNDERSCORE);
......@@ -41,3 +41,7 @@ QUOTE : '"';
WS : [ \r\t]+ -> skip ;
ANYCHAR : .;
/*
vim: ft=antlr
*/
......@@ -44,21 +44,21 @@ class CFGListener(ParseTreeListener):
pass
# Enter a parse tree produced by CFGParser#terminal.
def enterTerminal(self, ctx:CFGParser.TerminalContext):
# Enter a parse tree produced by CFGParser#term.
def enterTerm(self, ctx:CFGParser.TermContext):
pass
# Exit a parse tree produced by CFGParser#terminal.
def exitTerminal(self, ctx:CFGParser.TerminalContext):
# Exit a parse tree produced by CFGParser#term.
def exitTerm(self, ctx:CFGParser.TermContext):
pass
# Enter a parse tree produced by CFGParser#nonterminal.
def enterNonterminal(self, ctx:CFGParser.NonterminalContext):
# Enter a parse tree produced by CFGParser#nonterm.
def enterNonterm(self, ctx:CFGParser.NontermContext):
pass
# Exit a parse tree produced by CFGParser#nonterminal.
def exitNonterminal(self, ctx:CFGParser.NonterminalContext):
# Exit a parse tree produced by CFGParser#nonterm.
def exitNonterm(self, ctx:CFGParser.NontermContext):
pass
......
......@@ -73,14 +73,14 @@ class CFGParser ( Parser ):
RULE_onerule = 1
RULE_rewrite = 2
RULE_term_or_nonterm = 3
RULE_terminal = 4
RULE_nonterminal = 5
RULE_term = 4
RULE_nonterm = 5
RULE_symbol = 6
RULE_comment = 7
RULE_anyvalue = 8
ruleNames = [ "start", "onerule", "rewrite", "term_or_nonterm", "terminal",
"nonterminal", "symbol", "comment", "anyvalue" ]
ruleNames = [ "start", "onerule", "rewrite", "term_or_nonterm", "term",
"nonterm", "symbol", "comment", "anyvalue" ]
EOF = Token.EOF
LEFT_ANGLE=1
......@@ -222,8 +222,8 @@ class CFGParser ( Parser ):
super().__init__(parent, invokingState)
self.parser = parser
def nonterminal(self):
return self.getTypedRuleContext(CFGParser.NonterminalContext,0)
def nonterm(self):
return self.getTypedRuleContext(CFGParser.NontermContext,0)
def ARROW(self):
......@@ -269,7 +269,7 @@ class CFGParser ( Parser ):
try:
self.enterOuterAlt(localctx, 1)
self.state = 40
self.nonterminal()
self.nonterm()
self.state = 41
self.match(CFGParser.ARROW)
self.state = 47
......@@ -380,12 +380,12 @@ class CFGParser ( Parser ):
super().__init__(parent, invokingState)
self.parser = parser
def terminal(self):
return self.getTypedRuleContext(CFGParser.TerminalContext,0)
def term(self):
return self.getTypedRuleContext(CFGParser.TermContext,0)
def nonterminal(self):
return self.getTypedRuleContext(CFGParser.NonterminalContext,0)
def nonterm(self):
return self.getTypedRuleContext(CFGParser.NontermContext,0)
def getRuleIndex(self):
......@@ -419,12 +419,12 @@ class CFGParser ( Parser ):
la_ = self._interp.adaptivePredict(self._input,7,self._ctx)
if la_ == 1:
self.state = 60
self.terminal()
self.term()
pass
elif la_ == 2:
self.state = 61
self.nonterminal()
self.nonterm()
pass
......@@ -437,7 +437,7 @@ class CFGParser ( Parser ):
return localctx
class TerminalContext(ParserRuleContext):
class TermContext(ParserRuleContext):
__slots__ = 'parser'
def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1):
......@@ -461,29 +461,29 @@ class CFGParser ( Parser ):
def getRuleIndex(self):
return CFGParser.RULE_terminal
return CFGParser.RULE_term
def enterRule(self, listener:ParseTreeListener):
if hasattr( listener, "enterTerminal" ):
listener.enterTerminal(self)
if hasattr( listener, "enterTerm" ):
listener.enterTerm(self)
def exitRule(self, listener:ParseTreeListener):
if hasattr( listener, "exitTerminal" ):
listener.exitTerminal(self)
if hasattr( listener, "exitTerm" ):
listener.exitTerm(self)
def accept(self, visitor:ParseTreeVisitor):
if hasattr( visitor, "visitTerminal" ):
return visitor.visitTerminal(self)
if hasattr( visitor, "visitTerm" ):
return visitor.visitTerm(self)
else:
return visitor.visitChildren(self)
def terminal(self):
def term(self):
localctx = CFGParser.TerminalContext(self, self._ctx, self.state)
self.enterRule(localctx, 8, self.RULE_terminal)
localctx = CFGParser.TermContext(self, self._ctx, self.state)
self.enterRule(localctx, 8, self.RULE_term)
self._la = 0 # Token type
try:
self.enterOuterAlt(localctx, 1)
......@@ -524,7 +524,7 @@ class CFGParser ( Parser ):
return localctx
class NonterminalContext(ParserRuleContext):
class NontermContext(ParserRuleContext):
__slots__ = 'parser'
def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1):
......@@ -554,29 +554,29 @@ class CFGParser ( Parser ):
return self.getToken(CFGParser.APOSTROPHE, i)
def getRuleIndex(self):
return CFGParser.RULE_nonterminal
return CFGParser.RULE_nonterm
def enterRule(self, listener:ParseTreeListener):
if hasattr( listener, "enterNonterminal" ):
listener.enterNonterminal(self)
if hasattr( listener, "enterNonterm" ):
listener.enterNonterm(self)
def exitRule(self, listener:ParseTreeListener):
if hasattr( listener, "exitNonterminal" ):
listener.exitNonterminal(self)
if hasattr( listener, "exitNonterm" ):
listener.exitNonterm(self)
def accept(self, visitor:ParseTreeVisitor):
if hasattr( visitor, "visitNonterminal" ):
return visitor.visitNonterminal(self)
if hasattr( visitor, "visitNonterm" ):
return visitor.visitNonterm(self)
else:
return visitor.visitChildren(self)
def nonterminal(self):
def nonterm(self):
localctx = CFGParser.NonterminalContext(self, self._ctx, self.state)
self.enterRule(localctx, 10, self.RULE_nonterminal)
localctx = CFGParser.NontermContext(self, self._ctx, self.state)
self.enterRule(localctx, 10, self.RULE_nonterm)
self._la = 0 # Token type
try:
self.enterOuterAlt(localctx, 1)
......
grammar RegG;
import CFG;
rewrite: (term nonterm | term | EPSILON);
/*
vim: ft=antlr
*/
......@@ -18,6 +18,9 @@ from lib.parser.RegExVisitor import RegExVisitor
from lib.parser.CFGLexer import CFGLexer
from lib.parser.CFGParser import CFGParser
from lib.parser.CFGListener import CFGListener
from lib.parser.RegGLexer import RegGLexer
from lib.parser.RegGParser import RegGParser
from lib.parser.RegGListener import RegGListener
class ParsingError(Exception):
......@@ -162,8 +165,9 @@ def cfg(string: str) -> CFG:
def reggrammar(string: str) -> RegGrammar:
try:
cfg_ = cfg(string)
return RegGrammar.from_cfg(cfg_)
builder = _common_parse(string, RegGLexer, RegGParser, RegGBuilder)
return RegGrammar(builder.nonterminals, builder.terminals,
builder.rules, builder.init)
except Exception as e:
raise ParsingError(e.args)
......@@ -224,7 +228,7 @@ def regex(string: str) -> RegEx:
raise ParsingError(e.args)
class CommonVisitor:
class CommonFABuilder(object):
def visitStatename(self, ctx: Any) -> str:
if ctx.QUOTE():
......@@ -248,8 +252,8 @@ class CommonVisitor:
# The order of inheritance is important – we need the ‹exit*› functions from
# ‹CommonVisitor› to precede the empty versions from ‹DFAListener›
class DFABuilder(CommonVisitor, DFAListener):
# ‹CommonFABuilder› to precede the empty versions from ‹DFAListener›
class DFABuilder(CommonFABuilder, DFAListener):
def __init__(self) -> None:
self.states = set()
......@@ -276,7 +280,7 @@ class DFABuilder(CommonVisitor, DFAListener):
self.first_state = state
class NFABuilder(CommonVisitor, NFAListener):
class NFABuilder(CommonFABuilder, NFAListener):
def __init__(self) -> None:
self.states = set()
......@@ -392,7 +396,7 @@ class RegExBuilder(RegExVisitor):
return None
class CFGBuilder(CFGListener):
class CommonGrammarBuilder(object):
def __init__(self) -> None:
self.terminals = set()
self.nonterminals = set()
......@@ -407,7 +411,7 @@ class CFGBuilder(CFGListener):
else:
return '_'
def visitNonterminal(self, ctx: Any) -> None:
def visitNonterm(self, ctx: Any) -> None:
if ctx.CAPS():
name = str(ctx.CAPS())
elif ctx.LEFT_ANGLE():
......@@ -423,28 +427,18 @@ class CFGBuilder(CFGListener):
self.nonterminals.add(nonterminal)
return nonterminal
def visitRewrite(self, ctx: Any) -> None:
sequence = []
for subctx in ctx.term_or_nonterm():
if subctx.terminal():
term_ctx = subctx.terminal()
if term_ctx.TERMINAL():
name = str(term_ctx.TERMINAL())
elif term_ctx.QUOTE():
name = _unquote(term_ctx.getText())
terminal = Terminal(name)
self.terminals.add(terminal)
sequence.append(terminal)
else:
sequence.append(self.visitNonterminal(
subctx.nonterminal()))
def visitTerm(self, term_ctx: Any) -> None:
if term_ctx.TERMINAL():
name = str(term_ctx.TERMINAL())
elif term_ctx.QUOTE():
name = _unquote(term_ctx.getText())
return sequence
terminal = Terminal(name)
self.terminals.add(terminal)
return terminal
def exitOnerule(self, ctx: Any) -> None:
nonterminal = self.visitNonterminal(ctx.nonterminal())
nonterminal = self.visitNonterm(ctx.nonterm())
self.nonterminals.add(nonterminal)
if self.init is None:
self.init = nonterminal
......@@ -454,12 +448,36 @@ class CFGBuilder(CFGListener):
self.rules[nonterminal] = set()
for subctx in ctx.rewrite():
if subctx.EPSILON():
self.rules[nonterminal].add(Eps())
if subctx.term_or_nonterm():
sequence = self.visitRewrite(subctx)
self.rules[nonterminal].add(tuple(sequence))
self.rules[nonterminal].add(self.visitRewrite(subctx))
# for future support of comments
def exitComment(self, ctx: Any) -> None:
return None
class CFGBuilder(CommonGrammarBuilder, CFGListener):
def visitRewrite(self, ctx: Any) \
-> Union[Eps, Tuple[Union[Terminal, Nonterminal], ...]]:
if ctx.EPSILON():
return Eps()
sequence = []
for subctx in ctx.term_or_nonterm():
if subctx.term():
sequence.append(self.visitTerm(subctx.term()))
else:
sequence.append(self.visitNonterm(subctx.nonterm()))
return tuple(sequence)
class RegGBuilder(CommonGrammarBuilder, RegGListener):
def visitRewrite(self, ctx: Any) -> None:
if ctx.EPSILON():
return Eps()
assert ctx.term() is not None
term = self.visitTerm(ctx.term())
if ctx.nonterm():
return tuple([term, self.visitNonterm(ctx.nonterm())])
return term
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment