Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
fja
eval
Commits
e1e7942d
Commit
e1e7942d
authored
Jun 10, 2021
by
Vladimír Štill
Browse files
lib: Split-off regular grammar parser
in preparation for ANTLR-based valiadators,
#9
parent
125cfec7
Pipeline
#95269
failed with stage
in 19 seconds
Changes
6
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
lib/grammars.py
View file @
e1e7942d
...
...
@@ -21,7 +21,7 @@ class RegGrammar:
self
.
terminals
=
terminals
self
.
rules
=
rules
self
.
init
=
init
assert
self
.
check
()
self
.
check
()
@
staticmethod
def
from_cfg
(
cfg
:
CFG
)
->
RegGrammar
:
...
...
@@ -38,23 +38,31 @@ class RegGrammar:
return
RegGrammar
(
cfg
.
nonterminals
,
cfg
.
terminals
,
reg_rules
,
cfg
.
init
)
# exception
# unused formal requirements check, regular grammars specific (rule variants)
def
check
(
self
)
->
bool
:
def
check
(
self
)
->
None
:
assert
len
(
self
.
nonterminals
)
>
0
,
"empty grammar"
has_eps_start
=
False
has_start_loop
=
False
for
nonterminal
in
self
.
rules
:
assert
nonterminal
in
self
.
nonterminals
,
"unknown nonterminal "
+
nonterminal
.
name
assert
nonterminal
in
self
.
nonterminals
,
\
f
"unknown nonterminal
{
nonterminal
.
name
}
"
for
rule
in
self
.
rules
[
nonterminal
]:
if
isinstance
(
rule
,
Terminal
):
if
isinstance
(
rule
,
Eps
):
assert
nonterminal
==
self
.
init
,
\
"ε can only appear for initial nonterminal, "
\
f
"appears for
{
nonterminal
}
"
has_eps_start
=
True
elif
isinstance
(
rule
,
Terminal
):
assert
rule
in
self
.
terminals
,
"unknown terminal "
+
rule
.
name
el
if
not
isinstance
(
rule
,
Eps
)
:
el
se
:
assert
rule
[
0
]
in
self
.
terminals
,
"unknown terminal "
+
rule
[
0
].
name
assert
rule
[
1
]
in
self
.
nonterminals
,
"unknown nonterminal "
+
rule
[
1
].
name
has_start_loop
|=
rule
[
1
]
==
self
.
init
assert
self
.
init
in
self
.
nonterminals
,
"init not in nonterminals"
return
True
assert
not
has_eps_start
or
not
has_start_loop
,
\
"since ε is present the start nonterminal must not appear on any "
\
"right-hand side of a rule"
def
reggrammar_to_nfa
(
self
)
->
NFA
:
states
:
Set
[
State
]
=
set
()
...
...
lib/parser/CFG.g4
View file @
e1e7942d
...
...
@@ -4,15 +4,15 @@ grammar CFG;
start: (onerule NEWLINE+)* onerule (NEWLINE+ | ) comment;
onerule: nonterm
inal
ARROW (rewrite DELIMITER)* rewrite;
onerule: nonterm ARROW (rewrite DELIMITER)* rewrite;
rewrite: (term_or_nonterm+ | EPSILON);
term_or_nonterm: (term
inal
| nonterm
inal
);
term_or_nonterm: (term | nonterm);
term
inal:
(TERMINAL | QUOTE anyvalue+ QUOTE);
term
:
(TERMINAL | QUOTE anyvalue+ QUOTE);
nonterm
inal:
(CAPS | (LEFT_ANGLE symbol+ RIGHT_ANGLE (APOSTROPHE*)) | (symbol APOSTROPHE+));
nonterm
:
(CAPS | (LEFT_ANGLE symbol+ RIGHT_ANGLE (APOSTROPHE*)) | (symbol APOSTROPHE+));
symbol: (TERMINAL | CAPS | UNDERSCORE);
...
...
@@ -41,3 +41,7 @@ QUOTE : '"';
WS : [ \r\t]+ -> skip ;
ANYCHAR : .;
/*
vim: ft=antlr
*/
lib/parser/CFGListener.py
View file @
e1e7942d
...
...
@@ -44,21 +44,21 @@ class CFGListener(ParseTreeListener):
pass
# Enter a parse tree produced by CFGParser#term
inal
.
def
enterTerm
inal
(
self
,
ctx
:
CFGParser
.
Term
inal
Context
):
# Enter a parse tree produced by CFGParser#term.
def
enterTerm
(
self
,
ctx
:
CFGParser
.
TermContext
):
pass
# Exit a parse tree produced by CFGParser#term
inal
.
def
exitTerm
inal
(
self
,
ctx
:
CFGParser
.
Term
inal
Context
):
# Exit a parse tree produced by CFGParser#term.
def
exitTerm
(
self
,
ctx
:
CFGParser
.
TermContext
):
pass
# Enter a parse tree produced by CFGParser#nonterm
inal
.
def
enterNonterm
inal
(
self
,
ctx
:
CFGParser
.
Nonterm
inal
Context
):
# Enter a parse tree produced by CFGParser#nonterm.
def
enterNonterm
(
self
,
ctx
:
CFGParser
.
NontermContext
):
pass
# Exit a parse tree produced by CFGParser#nonterm
inal
.
def
exitNonterm
inal
(
self
,
ctx
:
CFGParser
.
Nonterm
inal
Context
):
# Exit a parse tree produced by CFGParser#nonterm.
def
exitNonterm
(
self
,
ctx
:
CFGParser
.
NontermContext
):
pass
...
...
lib/parser/CFGParser.py
View file @
e1e7942d
...
...
@@ -73,14 +73,14 @@ class CFGParser ( Parser ):
RULE_onerule
=
1
RULE_rewrite
=
2
RULE_term_or_nonterm
=
3
RULE_term
inal
=
4
RULE_nonterm
inal
=
5
RULE_term
=
4
RULE_nonterm
=
5
RULE_symbol
=
6
RULE_comment
=
7
RULE_anyvalue
=
8
ruleNames
=
[
"start"
,
"onerule"
,
"rewrite"
,
"term_or_nonterm"
,
"term
inal
"
,
"nonterm
inal
"
,
"symbol"
,
"comment"
,
"anyvalue"
]
ruleNames
=
[
"start"
,
"onerule"
,
"rewrite"
,
"term_or_nonterm"
,
"term"
,
"nonterm"
,
"symbol"
,
"comment"
,
"anyvalue"
]
EOF
=
Token
.
EOF
LEFT_ANGLE
=
1
...
...
@@ -222,8 +222,8 @@ class CFGParser ( Parser ):
super
().
__init__
(
parent
,
invokingState
)
self
.
parser
=
parser
def
nonterm
inal
(
self
):
return
self
.
getTypedRuleContext
(
CFGParser
.
Nonterm
inal
Context
,
0
)
def
nonterm
(
self
):
return
self
.
getTypedRuleContext
(
CFGParser
.
NontermContext
,
0
)
def
ARROW
(
self
):
...
...
@@ -269,7 +269,7 @@ class CFGParser ( Parser ):
try
:
self
.
enterOuterAlt
(
localctx
,
1
)
self
.
state
=
40
self
.
nonterm
inal
()
self
.
nonterm
()
self
.
state
=
41
self
.
match
(
CFGParser
.
ARROW
)
self
.
state
=
47
...
...
@@ -380,12 +380,12 @@ class CFGParser ( Parser ):
super
().
__init__
(
parent
,
invokingState
)
self
.
parser
=
parser
def
term
inal
(
self
):
return
self
.
getTypedRuleContext
(
CFGParser
.
Term
inal
Context
,
0
)
def
term
(
self
):
return
self
.
getTypedRuleContext
(
CFGParser
.
TermContext
,
0
)
def
nonterm
inal
(
self
):
return
self
.
getTypedRuleContext
(
CFGParser
.
Nonterm
inal
Context
,
0
)
def
nonterm
(
self
):
return
self
.
getTypedRuleContext
(
CFGParser
.
NontermContext
,
0
)
def
getRuleIndex
(
self
):
...
...
@@ -419,12 +419,12 @@ class CFGParser ( Parser ):
la_
=
self
.
_interp
.
adaptivePredict
(
self
.
_input
,
7
,
self
.
_ctx
)
if
la_
==
1
:
self
.
state
=
60
self
.
term
inal
()
self
.
term
()
pass
elif
la_
==
2
:
self
.
state
=
61
self
.
nonterm
inal
()
self
.
nonterm
()
pass
...
...
@@ -437,7 +437,7 @@ class CFGParser ( Parser ):
return
localctx
class
Term
inal
Context
(
ParserRuleContext
):
class
TermContext
(
ParserRuleContext
):
__slots__
=
'parser'
def
__init__
(
self
,
parser
,
parent
:
ParserRuleContext
=
None
,
invokingState
:
int
=-
1
):
...
...
@@ -461,29 +461,29 @@ class CFGParser ( Parser ):
def
getRuleIndex
(
self
):
return
CFGParser
.
RULE_term
inal
return
CFGParser
.
RULE_term
def
enterRule
(
self
,
listener
:
ParseTreeListener
):
if
hasattr
(
listener
,
"enterTerm
inal
"
):
listener
.
enterTerm
inal
(
self
)
if
hasattr
(
listener
,
"enterTerm"
):
listener
.
enterTerm
(
self
)
def
exitRule
(
self
,
listener
:
ParseTreeListener
):
if
hasattr
(
listener
,
"exitTerm
inal
"
):
listener
.
exitTerm
inal
(
self
)
if
hasattr
(
listener
,
"exitTerm"
):
listener
.
exitTerm
(
self
)
def
accept
(
self
,
visitor
:
ParseTreeVisitor
):
if
hasattr
(
visitor
,
"visitTerm
inal
"
):
return
visitor
.
visitTerm
inal
(
self
)
if
hasattr
(
visitor
,
"visitTerm"
):
return
visitor
.
visitTerm
(
self
)
else
:
return
visitor
.
visitChildren
(
self
)
def
term
inal
(
self
):
def
term
(
self
):
localctx
=
CFGParser
.
Term
inal
Context
(
self
,
self
.
_ctx
,
self
.
state
)
self
.
enterRule
(
localctx
,
8
,
self
.
RULE_term
inal
)
localctx
=
CFGParser
.
TermContext
(
self
,
self
.
_ctx
,
self
.
state
)
self
.
enterRule
(
localctx
,
8
,
self
.
RULE_term
)
self
.
_la
=
0
# Token type
try
:
self
.
enterOuterAlt
(
localctx
,
1
)
...
...
@@ -524,7 +524,7 @@ class CFGParser ( Parser ):
return
localctx
class
Nonterm
inal
Context
(
ParserRuleContext
):
class
NontermContext
(
ParserRuleContext
):
__slots__
=
'parser'
def
__init__
(
self
,
parser
,
parent
:
ParserRuleContext
=
None
,
invokingState
:
int
=-
1
):
...
...
@@ -554,29 +554,29 @@ class CFGParser ( Parser ):
return
self
.
getToken
(
CFGParser
.
APOSTROPHE
,
i
)
def
getRuleIndex
(
self
):
return
CFGParser
.
RULE_nonterm
inal
return
CFGParser
.
RULE_nonterm
def
enterRule
(
self
,
listener
:
ParseTreeListener
):
if
hasattr
(
listener
,
"enterNonterm
inal
"
):
listener
.
enterNonterm
inal
(
self
)
if
hasattr
(
listener
,
"enterNonterm"
):
listener
.
enterNonterm
(
self
)
def
exitRule
(
self
,
listener
:
ParseTreeListener
):
if
hasattr
(
listener
,
"exitNonterm
inal
"
):
listener
.
exitNonterm
inal
(
self
)
if
hasattr
(
listener
,
"exitNonterm"
):
listener
.
exitNonterm
(
self
)
def
accept
(
self
,
visitor
:
ParseTreeVisitor
):
if
hasattr
(
visitor
,
"visitNonterm
inal
"
):
return
visitor
.
visitNonterm
inal
(
self
)
if
hasattr
(
visitor
,
"visitNonterm"
):
return
visitor
.
visitNonterm
(
self
)
else
:
return
visitor
.
visitChildren
(
self
)
def
nonterm
inal
(
self
):
def
nonterm
(
self
):
localctx
=
CFGParser
.
Nonterm
inal
Context
(
self
,
self
.
_ctx
,
self
.
state
)
self
.
enterRule
(
localctx
,
10
,
self
.
RULE_nonterm
inal
)
localctx
=
CFGParser
.
NontermContext
(
self
,
self
.
_ctx
,
self
.
state
)
self
.
enterRule
(
localctx
,
10
,
self
.
RULE_nonterm
)
self
.
_la
=
0
# Token type
try
:
self
.
enterOuterAlt
(
localctx
,
1
)
...
...
lib/parser/RegG.g4
0 → 100644
View file @
e1e7942d
grammar RegG;
import CFG;
rewrite: (term nonterm | term | EPSILON);
/*
vim: ft=antlr
*/
lib/parser/__init__.py
View file @
e1e7942d
...
...
@@ -18,6 +18,9 @@ from lib.parser.RegExVisitor import RegExVisitor
from
lib.parser.CFGLexer
import
CFGLexer
from
lib.parser.CFGParser
import
CFGParser
from
lib.parser.CFGListener
import
CFGListener
from
lib.parser.RegGLexer
import
RegGLexer
from
lib.parser.RegGParser
import
RegGParser
from
lib.parser.RegGListener
import
RegGListener
class
ParsingError
(
Exception
):
...
...
@@ -162,8 +165,9 @@ def cfg(string: str) -> CFG:
def
reggrammar
(
string
:
str
)
->
RegGrammar
:
try
:
cfg_
=
cfg
(
string
)
return
RegGrammar
.
from_cfg
(
cfg_
)
builder
=
_common_parse
(
string
,
RegGLexer
,
RegGParser
,
RegGBuilder
)
return
RegGrammar
(
builder
.
nonterminals
,
builder
.
terminals
,
builder
.
rules
,
builder
.
init
)
except
Exception
as
e
:
raise
ParsingError
(
e
.
args
)
...
...
@@ -224,7 +228,7 @@ def regex(string: str) -> RegEx:
raise
ParsingError
(
e
.
args
)
class
Common
Visitor
:
class
Common
FABuilder
(
object
)
:
def
visitStatename
(
self
,
ctx
:
Any
)
->
str
:
if
ctx
.
QUOTE
():
...
...
@@ -248,8 +252,8 @@ class CommonVisitor:
# The order of inheritance is important – we need the ‹exit*› functions from
# ‹Common
Visito
r› to precede the empty versions from ‹DFAListener›
class
DFABuilder
(
Common
Visito
r
,
DFAListener
):
# ‹Common
FABuilde
r› to precede the empty versions from ‹DFAListener›
class
DFABuilder
(
Common
FABuilde
r
,
DFAListener
):
def
__init__
(
self
)
->
None
:
self
.
states
=
set
()
...
...
@@ -276,7 +280,7 @@ class DFABuilder(CommonVisitor, DFAListener):
self
.
first_state
=
state
class
NFABuilder
(
Common
Visito
r
,
NFAListener
):
class
NFABuilder
(
Common
FABuilde
r
,
NFAListener
):
def
__init__
(
self
)
->
None
:
self
.
states
=
set
()
...
...
@@ -392,7 +396,7 @@ class RegExBuilder(RegExVisitor):
return
None
class
C
FGBuilder
(
CFGListener
):
class
C
ommonGrammarBuilder
(
object
):
def
__init__
(
self
)
->
None
:
self
.
terminals
=
set
()
self
.
nonterminals
=
set
()
...
...
@@ -407,7 +411,7 @@ class CFGBuilder(CFGListener):
else
:
return
'_'
def
visitNonterm
inal
(
self
,
ctx
:
Any
)
->
None
:
def
visitNonterm
(
self
,
ctx
:
Any
)
->
None
:
if
ctx
.
CAPS
():
name
=
str
(
ctx
.
CAPS
())
elif
ctx
.
LEFT_ANGLE
():
...
...
@@ -423,28 +427,18 @@ class CFGBuilder(CFGListener):
self
.
nonterminals
.
add
(
nonterminal
)
return
nonterminal
def
visitRewrite
(
self
,
ctx
:
Any
)
->
None
:
sequence
=
[]
for
subctx
in
ctx
.
term_or_nonterm
():
if
subctx
.
terminal
():
term_ctx
=
subctx
.
terminal
()
if
term_ctx
.
TERMINAL
():
name
=
str
(
term_ctx
.
TERMINAL
())
elif
term_ctx
.
QUOTE
():
name
=
_unquote
(
term_ctx
.
getText
())
terminal
=
Terminal
(
name
)
self
.
terminals
.
add
(
terminal
)
sequence
.
append
(
terminal
)
else
:
sequence
.
append
(
self
.
visitNonterminal
(
subctx
.
nonterminal
()))
def
visitTerm
(
self
,
term_ctx
:
Any
)
->
None
:
if
term_ctx
.
TERMINAL
():
name
=
str
(
term_ctx
.
TERMINAL
())
elif
term_ctx
.
QUOTE
():
name
=
_unquote
(
term_ctx
.
getText
())
return
sequence
terminal
=
Terminal
(
name
)
self
.
terminals
.
add
(
terminal
)
return
terminal
def
exitOnerule
(
self
,
ctx
:
Any
)
->
None
:
nonterminal
=
self
.
visitNonterm
inal
(
ctx
.
nonterm
inal
())
nonterminal
=
self
.
visitNonterm
(
ctx
.
nonterm
())
self
.
nonterminals
.
add
(
nonterminal
)
if
self
.
init
is
None
:
self
.
init
=
nonterminal
...
...
@@ -454,12 +448,36 @@ class CFGBuilder(CFGListener):
self
.
rules
[
nonterminal
]
=
set
()
for
subctx
in
ctx
.
rewrite
():
if
subctx
.
EPSILON
():
self
.
rules
[
nonterminal
].
add
(
Eps
())
if
subctx
.
term_or_nonterm
():
sequence
=
self
.
visitRewrite
(
subctx
)
self
.
rules
[
nonterminal
].
add
(
tuple
(
sequence
))
self
.
rules
[
nonterminal
].
add
(
self
.
visitRewrite
(
subctx
))
# for future support of comments
def
exitComment
(
self
,
ctx
:
Any
)
->
None
:
return
None
class
CFGBuilder
(
CommonGrammarBuilder
,
CFGListener
):
def
visitRewrite
(
self
,
ctx
:
Any
)
\
->
Union
[
Eps
,
Tuple
[
Union
[
Terminal
,
Nonterminal
],
...]]:
if
ctx
.
EPSILON
():
return
Eps
()
sequence
=
[]
for
subctx
in
ctx
.
term_or_nonterm
():
if
subctx
.
term
():
sequence
.
append
(
self
.
visitTerm
(
subctx
.
term
()))
else
:
sequence
.
append
(
self
.
visitNonterm
(
subctx
.
nonterm
()))
return
tuple
(
sequence
)
class
RegGBuilder
(
CommonGrammarBuilder
,
RegGListener
):
def
visitRewrite
(
self
,
ctx
:
Any
)
->
None
:
if
ctx
.
EPSILON
():
return
Eps
()
assert
ctx
.
term
()
is
not
None
term
=
self
.
visitTerm
(
ctx
.
term
())
if
ctx
.
nonterm
():
return
tuple
([
term
,
self
.
visitNonterm
(
ctx
.
nonterm
())])
return
term
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment