Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
fja
eval
Commits
a1b80779
Commit
a1b80779
authored
Jun 09, 2021
by
Vladimír Štill
Browse files
lib: Deduplicate & simplify parser (main for FA)
parent
61decabf
Changes
17
Expand all
Hide whitespace changes
Inline
Side-by-side
lib/parser/CFGLexer.py
View file @
a1b80779
# Generated from lib/pars
ing
/CFG.g4 by ANTLR 4.9.2
# Generated from lib/pars
er
/CFG.g4 by ANTLR 4.9.2
from
antlr4
import
*
from
io
import
StringIO
import
sys
...
...
lib/parser/CFGListener.py
View file @
a1b80779
# Generated from lib/pars
ing
/CFG.g4 by ANTLR 4.9.2
# Generated from lib/pars
er
/CFG.g4 by ANTLR 4.9.2
from
antlr4
import
*
if
__name__
is
not
None
and
"."
in
__name__
:
from
.CFGParser
import
CFGParser
...
...
lib/parser/CFGParser.py
View file @
a1b80779
# Generated from lib/pars
ing
/CFG.g4 by ANTLR 4.9.2
# Generated from lib/pars
er
/CFG.g4 by ANTLR 4.9.2
# encoding: utf-8
from
antlr4
import
*
from
io
import
StringIO
...
...
lib/parser/DFA.g4
View file @
a1b80779
grammar DFA;
/* Parser Rules */
start: init production* final comment;
init: (INIT EQUALS statename | );
import FA;
production: LEFT_PARENTHESIS statename COMMA statename RIGHT_PARENTHESIS EQUALS statename;
final: FINAL EQUALS LEFT_BRACKET (statename (COMMA statename)* | ) RIGHT_BRACKET;
statename: (INIT | FINAL | STATE | QUOTE anyvalue+ QUOTE);
comment: (HASH anyvalue* | );
anyvalue: INIT | EQUALS | LEFT_PARENTHESIS | RIGHT_PARENTHESIS | LEFT_BRACKET | RIGHT_BRACKET | COMMA | FINAL | STATE | ANYCHAR | HASH;
/* Lexer Rules */
/* Tokens */
INIT : 'init';
EQUALS : '=';
LEFT_PARENTHESIS : '(';
RIGHT_PARENTHESIS : ')';
LEFT_BRACKET : '{';
RIGHT_BRACKET : '}';
COMMA : ',';
FINAL : 'final';
STATE : ([a-zA-Z0-9] | '_' | '-' | '\'' | '<' | '>' )+;
HASH : '#';
QUOTE : '"';
/* Characters to be ignored */
WS : [ \r\t\n]+ -> skip ;
ANYCHAR : .;
lib/parser/DFALexer.py
View file @
a1b80779
# Generated from lib/pars
ing
/DFA.g4 by ANTLR 4.9.2
# Generated from lib/pars
er
/DFA.g4 by ANTLR 4.9.2
from
antlr4
import
*
from
io
import
StringIO
import
sys
...
...
@@ -11,30 +11,33 @@ else:
def
serializedATN
():
with
StringIO
()
as
buf
:
buf
.
write
(
"
\3\u608b\ua72a\u8133\ub9ed\u417c\u3be7\u7786\u5964\2\
17
"
)
buf
.
write
(
"
F
\b\1\4\2\t\2\4\3\t\3\4\4\t\4\4\5\t\5\4\6\t\6\4\7\t\7
"
)
buf
.
write
(
"
\3\u608b\ua72a\u8133\ub9ed\u417c\u3be7\u7786\u5964\2\
20
"
)
buf
.
write
(
"
M
\b\1\4\2\t\2\4\3\t\3\4\4\t\4\4\5\t\5\4\6\t\6\4\7\t\7
"
)
buf
.
write
(
"
\4\b\t\b\4\t\t\t\4\n\t\n\4\13\t\13\4\f\t\f\4\r\t\r\4\16
"
)
buf
.
write
(
"
\t\16\3\2\3\2\3\2\3\2\3\2\3\3\3\3\3\4\3\4\3\5\3\5\3\6
"
)
buf
.
write
(
"
\3\6\3\7\3\7\3\b\3\b\3\t\3\t\3\t\3\t\3\t\3\t\3\n\6\n\66
"
)
buf
.
write
(
"
\n\n\r\n\16\n\67\3\13\3\13\3\f\3\f\3\r\6\r
?
\n\r\r\r\16
"
)
buf
.
write
(
"
\r
@
\3\r\3\r\3\16\3\16\2\2\17\3\3\5\4\7\5\t\6\13\7\r\b
"
)
buf
.
write
(
"
\17\t\21\n\23\13\25\f\27\r\31\16\33\17\3\2\4\n\2
))//
\62
"
)
buf
.
write
(
";>>@@C
\\
aac|
\5\2\13\f\17\17\"\"\2
G
\2\3\3\2\2\2\2\5\3\2
"
)
buf
.
write
(
"
\2\2\2\7\3\2\2\2\2\t\3\2\2\2\2\13\3\2\2\2\2\r\3\2\2\2
"
)
buf
.
write
(
"
\2\17\3\2\2\2\2\21\3\2\2\2\2\23\3\2\2\2\2\25\3\2\2\2\2
"
)
buf
.
write
(
"
\27\3\2\2\2\2\31\3\2\2\2\2\33\3\2\2\2\3\35\3\2\2\2\5\"
"
)
buf
.
write
(
"
\3\2\2\2\7
$
\3\2\2\2\t
&
\3\2\2\2\13
(
\3\2\2\2\r
*
\3\2\2\2
"
)
buf
.
write
(
"
\17
,
\3\2\2\2\21
.
\3\2\2\2\23\65\3\2\2\2\25
9
\3\2\2\2\27
"
)
buf
.
write
(
";
\3\2\2\2\31
>
\3\2\2\2\33
D
\3\2\2\2\35\36\7
k
\2\2\36\37\7
"
)
buf
.
write
(
"p
\2\2\37
\7
k
\2\2
!
\7
v
\2\2
!
\4\3\2\2\2\"
#
\7
?
\2\2
#
\6\3\2
"
)
buf
.
write
(
"
\2\2
$%
\7
*
\2\2
%
\b\3\2\2\2
&
\'\7
+
\2\2\'\n\3\2\2\2
()
\7
}
\2
"
)
buf
.
write
(
"
\2
)
\f\3\2\2\2
*+
\7\177\2\2
+
\16\3\2\2\2
,-
\7
.
\2\2
-
\20\3\2
"
)
buf
.
write
(
"
\2\2
./
\7
h
\2\2
/
\60\7
k
\2\2\60\61\7
p
\2\2\61\62\7
c
\2\2\62
"
)
buf
.
write
(
"
\63\7
n
\2\2\63\22\3\2\2\2\64\66\t\2\2\2\65\64\3\2\2\2\66
"
)
buf
.
write
(
"
\67\3\2\2\2\67\65\3\2\2\2\67
8
\3\2\2\2
8
\24\3\2\2\2
9:
\7
"
)
buf
.
write
(
"%
\2\2
:
\26\3\2\2\2
;<
\7
$
\2\2
<
\30\3\2\2\2
=?
\t\3\2\2
>=
\3\2
"
)
buf
.
write
(
"
\2\2
?@
\3\2\2\2
@>
\3\2\2\2
@A
\3\2\2\2
AB
\3\2\2\2
BC
\b\r\2\2
"
)
buf
.
write
(
"C
\32\3\2\2\2
DE
\13\2\2\2
E
\34\3\2\2\2\6\2\65\67
@
\3\b\2\2
"
)
buf
.
write
(
"
\t\16\4\17\t\17\3\2\3\2\3\2\3\2\3\2\3\3\3\3\3\4\3\4\3
"
)
buf
.
write
(
"
\5\3\5\3\6\3\6\3\7\3\7\3\b\3\b\3\t\3\t\3\t\3\t\3\t\3\t
"
)
buf
.
write
(
"
\3\n\3\n\3\n\5\n
:
\n\n\3\13\6\13
=
\n\13\r\13\16\13
>
\3\f
"
)
buf
.
write
(
"
\3\f\3\r\3\r\3\16\6\16
F
\n\16\r\16\16\16
G
\3\16\3\16\3\17
"
)
buf
.
write
(
"
\3\17\2\2\20\3\3\5\4\7\5\t\6\13\7\r\b\17\t\21\n\23\13
"
)
buf
.
write
(
"
\25\f\27\r\31\16\33\17\35\20\3\2\4\n\2
))//
\62
;>>@@C
\\
"
)
buf
.
write
(
"aac|
\5\2\13\f\17\17\"\"\2
O
\2\3\3\2\2\2\2\5\3\2\2\2\2\7
"
)
buf
.
write
(
"
\3\2\2\2\2\t\3\2\2\2\2\13\3\2\2\2\2\r\3\2\2\2\2\17\3\2
"
)
buf
.
write
(
"
\2\2\2\21\3\2\2\2\2\23\3\2\2\2\2\25\3\2\2\2\2\27\3\2\2
"
)
buf
.
write
(
"
\2\2\31\3\2\2\2\2\33\3\2\2\2\2\35\3\2\2\2\3\37\3\2\2\2
"
)
buf
.
write
(
"
\5
$
\3\2\2\2\7
&
\3\2\2\2\t
(
\3\2\2\2\13
*
\3\2\2\2\r
,
\3\2\2
"
)
buf
.
write
(
"
\2\17
.
\3\2\2\2\21\60\3\2\2\2\23
9
\3\2\2\2\25
<
\3\2\2\2\27
"
)
buf
.
write
(
"@
\3\2\2\2\31
B
\3\2\2\2\33
E
\3\2\2\2\35
K
\3\2\2\2\37
\7
k
\2
"
)
buf
.
write
(
"
\2
!
\7
p
\2\2
!
\"\7
k
\2\2\"
#
\7
v
\2\2
#
\4\3\2\2\2
$%
\7
?
\2\2
%
\6
"
)
buf
.
write
(
"
\3\2\2\2
&
\'\7
*
\2\2\'\b\3\2\2\2
()
\7
+
\2\2
)
\n\3\2\2\2
*+
\7
"
)
buf
.
write
(
"}
\2\2
+
\f\3\2\2\2
,-
\7\177\2\2
-
\16\3\2\2\2
./
\7
.
\2\2
/
\20
"
)
buf
.
write
(
"
\3\2\2\2\60\61\7
h
\2\2\61\62\7
k
\2\2\62\63\7
p
\2\2\63\64
"
)
buf
.
write
(
"
\7
c
\2\2\64\65\7
n
\2\2\65\22\3\2\2\2\66
:
\7\u03b7\2\2\67
"
)
buf
.
write
(
"8
\7
^
\2\2
8:
\7
g
\2\2
9
\66\3\2\2\2
9
\67\3\2\2\2
:
\24\3\2\2\2
"
)
buf
.
write
(
";=
\t\2\2\2
<;
\3\2\2\2
=>
\3\2\2\2
><
\3\2\2\2
>?
\3\2\2\2
?
\26
"
)
buf
.
write
(
"
\3\2\2\2
@A
\7
%
\2\2
A
\30\3\2\2\2
BC
\7
$
\2\2
C
\32\3\2\2\2
DF
\t
"
)
buf
.
write
(
"
\3\2\2
ED
\3\2\2\2
FG
\3\2\2\2
GE
\3\2\2\2
GH
\3\2\2\2
HI
\3\2\2
"
)
buf
.
write
(
"
\2
IJ
\b\16\2\2
J
\34\3\2\2\2
KL
\13\2\2\2
L
\36\3\2\2\2\7\2
9"
)
buf
.
write
(
"<>G
\3\b\2\2
"
)
return
buf
.
getvalue
()
...
...
@@ -52,11 +55,12 @@ class DFALexer(Lexer):
RIGHT_BRACKET
=
6
COMMA
=
7
FINAL
=
8
STATE
=
9
HASH
=
10
QUOTE
=
11
WS
=
12
ANYCHAR
=
13
EPSILON
=
9
STATE
=
10
HASH
=
11
QUOTE
=
12
WS
=
13
ANYCHAR
=
14
channelNames
=
[
u
"DEFAULT_TOKEN_CHANNEL"
,
u
"HIDDEN"
]
...
...
@@ -68,12 +72,12 @@ class DFALexer(Lexer):
symbolicNames
=
[
"<INVALID>"
,
"INIT"
,
"EQUALS"
,
"LEFT_PARENTHESIS"
,
"RIGHT_PARENTHESIS"
,
"LEFT_BRACKET"
,
"RIGHT_BRACKET"
,
"COMMA"
,
"FINAL"
,
"STATE"
,
"HASH"
,
"QUOTE"
,
"WS"
,
"ANYCHAR"
]
"RIGHT_BRACKET"
,
"COMMA"
,
"FINAL"
,
"EPSILON"
,
"STATE"
,
"HASH"
,
"QUOTE"
,
"WS"
,
"ANYCHAR"
]
ruleNames
=
[
"INIT"
,
"EQUALS"
,
"LEFT_PARENTHESIS"
,
"RIGHT_PARENTHESIS"
,
"LEFT_BRACKET"
,
"RIGHT_BRACKET"
,
"COMMA"
,
"FINAL"
,
"
STATE
"
,
"HASH"
,
"QUOTE"
,
"WS"
,
"ANYCHAR"
]
"LEFT_BRACKET"
,
"RIGHT_BRACKET"
,
"COMMA"
,
"FINAL"
,
"
EPSILON
"
,
"STATE"
,
"HASH"
,
"QUOTE"
,
"WS"
,
"ANYCHAR"
]
grammarFileName
=
"DFA.g4"
...
...
lib/parser/DFAListener.py
View file @
a1b80779
# Generated from lib/pars
ing
/DFA.g4 by ANTLR 4.9.2
# Generated from lib/pars
er
/DFA.g4 by ANTLR 4.9.2
from
antlr4
import
*
if
__name__
is
not
None
and
"."
in
__name__
:
from
.DFAParser
import
DFAParser
...
...
@@ -8,6 +8,15 @@ else:
# This class defines a complete listener for a parse tree produced by DFAParser.
class
DFAListener
(
ParseTreeListener
):
# Enter a parse tree produced by DFAParser#production.
def
enterProduction
(
self
,
ctx
:
DFAParser
.
ProductionContext
):
pass
# Exit a parse tree produced by DFAParser#production.
def
exitProduction
(
self
,
ctx
:
DFAParser
.
ProductionContext
):
pass
# Enter a parse tree produced by DFAParser#start.
def
enterStart
(
self
,
ctx
:
DFAParser
.
StartContext
):
pass
...
...
@@ -26,12 +35,12 @@ class DFAListener(ParseTreeListener):
pass
# Enter a parse tree produced by DFAParser#
production
.
def
enter
Production
(
self
,
ctx
:
DFAParser
.
Production
Context
):
# Enter a parse tree produced by DFAParser#
stateset
.
def
enter
Stateset
(
self
,
ctx
:
DFAParser
.
Stateset
Context
):
pass
# Exit a parse tree produced by DFAParser#
production
.
def
exit
Production
(
self
,
ctx
:
DFAParser
.
Production
Context
):
# Exit a parse tree produced by DFAParser#
stateset
.
def
exit
Stateset
(
self
,
ctx
:
DFAParser
.
Stateset
Context
):
pass
...
...
lib/parser/DFAParser.py
View file @
a1b80779
This diff is collapsed.
Click to expand it.
lib/parser/FA.g4
0 → 100644
View file @
a1b80779
grammar FA;
start: init production* final comment;
init: (INIT EQUALS statename | );
stateset: LEFT_BRACKET (statename (COMMA statename)* | ) RIGHT_BRACKET;
final: FINAL EQUALS stateset;
statename: (INIT | FINAL | STATE | QUOTE anyvalue+ QUOTE);
comment: (HASH (anyvalue | QUOTE)* | );
anyvalue: INIT | EQUALS | LEFT_PARENTHESIS | RIGHT_PARENTHESIS | LEFT_BRACKET | RIGHT_BRACKET | COMMA | FINAL | EPSILON | STATE | HASH | ANYCHAR;
INIT : 'init';
EQUALS : '=';
LEFT_PARENTHESIS : '(';
RIGHT_PARENTHESIS : ')';
LEFT_BRACKET : '{';
RIGHT_BRACKET : '}';
COMMA : ',';
FINAL : 'final';
EPSILON : ('ε' | '\\''e');
STATE : ([a-zA-Z0-9] | '_' | '-' | '\'' | '<' | '>' )+;
HASH : '#';
QUOTE : '"';
/* Characters to be ignored */
WS : [ \r\t\n]+ -> skip ;
ANYCHAR : .;
/*
vim: ft=antlr
*/
lib/parser/NFA.g4
View file @
a1b80779
grammar NFA;
/* Parser Rules */
start: init production* final comment;
init: (INIT EQUALS statename | );
import FA;
production: LEFT_PARENTHESIS statename COMMA (statename | EPSILON) RIGHT_PARENTHESIS EQUALS stateset;
stateset: LEFT_BRACKET (statename (COMMA statename)* | ) RIGHT_BRACKET;
final: FINAL EQUALS stateset;
statename: (STATE | INIT | FINAL | QUOTE anyvalue+ QUOTE);
comment: (HASH anyvalue* | );
anyvalue: INIT | EQUALS | LEFT_PARENTHESIS | RIGHT_PARENTHESIS | LEFT_BRACKET | RIGHT_BRACKET | COMMA | FINAL | STATE | ANYCHAR | HASH;
/* Lexer Rules */
/* Tokens */
INIT : 'init';
EQUALS : '=';
LEFT_PARENTHESIS : '(';
RIGHT_PARENTHESIS : ')';
LEFT_BRACKET : '{';
RIGHT_BRACKET : '}';
COMMA : ',';
FINAL : ('final');
EPSILON : ('ε' | '\\''e');
STATE : ([a-zA-Z0-9] | '_' | '-' | '\'' | '<' | '>' )+;
HASH : '#';
QUOTE : '"';
/* Characters to be ignored */
WS : [ \r\t\n]+ -> skip ;
ANYCHAR : .;
lib/parser/NFALexer.py
View file @
a1b80779
# Generated from lib/pars
ing
/NFA.g4 by ANTLR 4.9.2
# Generated from lib/pars
er
/NFA.g4 by ANTLR 4.9.2
from
antlr4
import
*
from
io
import
StringIO
import
sys
...
...
@@ -67,7 +67,8 @@ class NFALexer(Lexer):
modeNames
=
[
"DEFAULT_MODE"
]
literalNames
=
[
"<INVALID>"
,
"'init'"
,
"'='"
,
"'('"
,
"')'"
,
"'{'"
,
"'}'"
,
"','"
,
"'#'"
,
"'
\"
'"
]
"'init'"
,
"'='"
,
"'('"
,
"')'"
,
"'{'"
,
"'}'"
,
"','"
,
"'final'"
,
"'#'"
,
"'
\"
'"
]
symbolicNames
=
[
"<INVALID>"
,
"INIT"
,
"EQUALS"
,
"LEFT_PARENTHESIS"
,
"RIGHT_PARENTHESIS"
,
"LEFT_BRACKET"
,
...
...
lib/parser/NFAListener.py
View file @
a1b80779
# Generated from lib/pars
ing
/NFA.g4 by ANTLR 4.9.2
# Generated from lib/pars
er
/NFA.g4 by ANTLR 4.9.2
from
antlr4
import
*
if
__name__
is
not
None
and
"."
in
__name__
:
from
.NFAParser
import
NFAParser
...
...
@@ -8,6 +8,15 @@ else:
# This class defines a complete listener for a parse tree produced by NFAParser.
class
NFAListener
(
ParseTreeListener
):
# Enter a parse tree produced by NFAParser#production.
def
enterProduction
(
self
,
ctx
:
NFAParser
.
ProductionContext
):
pass
# Exit a parse tree produced by NFAParser#production.
def
exitProduction
(
self
,
ctx
:
NFAParser
.
ProductionContext
):
pass
# Enter a parse tree produced by NFAParser#start.
def
enterStart
(
self
,
ctx
:
NFAParser
.
StartContext
):
pass
...
...
@@ -26,15 +35,6 @@ class NFAListener(ParseTreeListener):
pass
# Enter a parse tree produced by NFAParser#production.
def
enterProduction
(
self
,
ctx
:
NFAParser
.
ProductionContext
):
pass
# Exit a parse tree produced by NFAParser#production.
def
exitProduction
(
self
,
ctx
:
NFAParser
.
ProductionContext
):
pass
# Enter a parse tree produced by NFAParser#stateset.
def
enterStateset
(
self
,
ctx
:
NFAParser
.
StatesetContext
):
pass
...
...
lib/parser/NFAParser.py
View file @
a1b80779
This diff is collapsed.
Click to expand it.
lib/parser/RegExLexer.py
View file @
a1b80779
# Generated from lib/pars
ing
/RegEx.g4 by ANTLR 4.9.2
# Generated from lib/pars
er
/RegEx.g4 by ANTLR 4.9.2
from
antlr4
import
*
from
io
import
StringIO
import
sys
...
...
lib/parser/RegExListener.py
View file @
a1b80779
# Generated from lib/pars
ing
/RegEx.g4 by ANTLR 4.9.2
# Generated from lib/pars
er
/RegEx.g4 by ANTLR 4.9.2
from
antlr4
import
*
if
__name__
is
not
None
and
"."
in
__name__
:
from
.RegExParser
import
RegExParser
...
...
lib/parser/RegExParser.py
View file @
a1b80779
# Generated from lib/pars
ing
/RegEx.g4 by ANTLR 4.9.2
# Generated from lib/pars
er
/RegEx.g4 by ANTLR 4.9.2
# encoding: utf-8
from
antlr4
import
*
from
io
import
StringIO
...
...
lib/parser/RegExVisitor.py
View file @
a1b80779
# Generated from lib/pars
ing
/RegEx.g4 by ANTLR 4.9.2
# Generated from lib/pars
er
/RegEx.g4 by ANTLR 4.9.2
from
antlr4
import
*
if
__name__
is
not
None
and
"."
in
__name__
:
from
.RegExParser
import
RegExParser
...
...
lib/parser/__init__.py
View file @
a1b80779
from
typing
import
List
,
Tuple
,
Union
,
Set
from
typing
import
List
,
Tuple
,
Union
,
Set
,
Any
from
copy
import
deepcopy
from
lib.common
import
State
,
Character
,
Eps
,
Terminal
,
Nonterminal
,
Emptyset
from
lib.reg
import
DFA
,
NFA
,
RegGrammar
...
...
@@ -29,18 +29,12 @@ class ParsingError(Exception):
# possible even when input formalism and given type don't match. This way it
# aborts on any parsing problem.
class
ErrorShouter
(
ErrorListener
):
def
syntaxError
(
self
,
recognizer
,
offendingSymbol
,
line
,
column
,
msg
,
e
):
def
syntaxError
(
self
,
recognizer
:
Any
,
offendingSymbol
:
Any
,
line
:
int
,
column
:
int
,
msg
:
str
,
e
:
Any
)
->
None
:
raise
Exception
(
f
"ERROR: when parsing line
{
line
}
column
{
column
}
:
{
msg
}
"
)
def
_anyvalue_attributes
(
parser
:
Union
[
DFAParser
,
NFAParser
,
RegExParser
,
CFGParser
])
->
List
:
return
[
func
for
func
in
dir
(
parser
.
AnyvalueContext
)
if
callable
(
getattr
(
parser
.
AnyvalueContext
,
func
))
and
not
func
.
startswith
(
"__"
)
and
func
.
isupper
()]
def
_names_to_str
(
collection
:
Union
[
Set
[
State
],
Set
[
Character
],
Set
[
Terminal
],
Set
[
Nonterminal
]])
->
str
:
return
"{"
+
','
.
join
(
set
(
map
(
lambda
x
:
x
.
name
,
collection
)))
+
"}"
...
...
@@ -66,6 +60,12 @@ def _rewrite_variant(variant: Union[Eps, Terminal,
return
variant
.
name
def
_unquote
(
txt
:
str
)
->
str
:
assert
txt
[
0
]
==
'"'
assert
txt
[
-
1
]
==
'"'
return
txt
[
1
:
-
1
]
def
dfa_to_str
(
dfa
:
DFA
,
full
:
bool
=
False
)
->
str
:
transition
=
""
for
key
,
dest_state
in
dfa
.
transition
.
items
():
...
...
@@ -133,7 +133,8 @@ def regex_to_str(reg: RegEx) -> str:
return
reg
.
expression
.
astprint
()
def
_common_parse
(
string
:
str
,
given_lexer
,
given_parser
,
given_builder
):
def
_common_parse
(
string
:
str
,
given_lexer
:
Any
,
given_parser
:
Any
,
given_builder
:
Any
)
->
Any
:
error_listener
=
ErrorShouter
()
chars
=
antlr4
.
InputStream
(
string
)
lexer
=
given_lexer
(
chars
)
...
...
@@ -223,33 +224,34 @@ def regex(string: str) -> RegEx:
raise
ParsingError
(
e
.
args
)
class
StateVisitor
:
def
visitStatename
(
self
,
ctx
)
->
str
:
if
ctx
.
STATE
():
return
str
(
ctx
.
STATE
())
elif
ctx
.
FINAL
():
return
"final"
elif
ctx
.
INIT
():
return
"init"
elif
ctx
.
QUOTE
():
name
=
""
i
=
0
while
ctx
.
anyvalue
(
i
)
is
not
None
:
for
attribute
in
self
.
anyvalue_attributes
:
value
=
getattr
(
ctx
.
anyvalue
(
i
),
attribute
)()
if
value
is
not
None
:
name
+=
str
(
value
)
i
+=
1
class
CommonVisitor
:
return
name
assert
False
,
"invalid context in visitStatename"
def
visitStatename
(
self
,
ctx
:
Any
)
->
str
:
if
ctx
.
QUOTE
():
return
_unquote
(
ctx
.
getText
())
return
ctx
.
getText
()
def
exitInit
(
self
,
ctx
:
Any
)
->
None
:
if
ctx
.
statename
()
is
not
None
:
state
=
State
(
self
.
visitStatename
(
ctx
.
statename
()))
self
.
init
=
state
self
.
states
.
add
(
state
)
def
exitFinal
(
self
,
ctx
:
Any
)
->
None
:
for
stctx
in
ctx
.
stateset
().
statename
():
state
=
State
(
self
.
visitStatename
(
stctx
))
self
.
states
.
add
(
state
)
self
.
final
.
add
(
state
)
def
exitComment
(
self
,
ctx
:
Any
)
->
None
:
pass
class
DFABuilder
(
DFAListener
,
StateVisitor
):
# anyvalue possibilities
anyvalue_attributes
=
_anyvalue_attributes
(
DFAParser
)
def
__init__
(
self
):
# The order of inheritance is important – we need the ‹exit*› functions from
# ‹CommonVisitor› to precede the empty versions from ‹DFAListener›
class
DFABuilder
(
CommonVisitor
,
DFAListener
):
def
__init__
(
self
)
->
None
:
self
.
states
=
set
()
self
.
characters
=
set
()
self
.
transition
=
{}
...
...
@@ -257,13 +259,7 @@ class DFABuilder(DFAListener, StateVisitor):
self
.
first_state
=
None
self
.
final
=
set
()
def
exitInit
(
self
,
ctx
):
if
ctx
.
statename
()
is
not
None
:
state
=
State
(
self
.
visitStatename
(
ctx
.
statename
()))
self
.
init
=
state
self
.
states
.
add
(
state
)
def
exitProduction
(
self
,
ctx
):
def
exitProduction
(
self
,
ctx
:
Any
)
->
None
:
state
=
State
(
self
.
visitStatename
(
ctx
.
statename
(
0
)))
character
=
Character
(
self
.
visitStatename
(
ctx
.
statename
(
1
)))
dest_state
=
State
(
self
.
visitStatename
(
ctx
.
statename
(
2
)))
...
...
@@ -279,23 +275,10 @@ class DFABuilder(DFAListener, StateVisitor):
if
self
.
first_state
is
None
:
self
.
first_state
=
state
def
exitFinal
(
self
,
ctx
):
i
=
0
while
ctx
.
statename
(
i
)
is
not
None
:
state
=
State
(
self
.
visitStatename
(
ctx
.
statename
(
i
)))
self
.
states
.
add
(
state
)
self
.
final
.
add
(
state
)
i
+=
1
# for future support of comments
def
exitComment
(
self
,
ctx
):
return
None
class
NFABuilder
(
CommonVisitor
,
NFAListener
):
class
NFABuilder
(
NFAListener
,
StateVisitor
):
anyvalue_attributes
=
_anyvalue_attributes
(
NFAParser
)
def
__init__
(
self
):
def
__init__
(
self
)
->
None
:
self
.
states
=
set
()
self
.
characters
=
set
()
self
.
transition
=
{}
...
...
@@ -304,23 +287,14 @@ class NFABuilder(NFAListener, StateVisitor):
self
.
final
=
set
()
self
.
efa
=
False
def
exitInit
(
self
,
ctx
):
if
ctx
.
statename
()
is
not
None
:
state
=
State
(
self
.
visitStatename
(
ctx
.
statename
()))
self
.
init
=
state
self
.
states
.
add
(
state
)
def
exitProduction
(
self
,
ctx
):
def
exitProduction
(
self
,
ctx
:
Any
)
->
None
:
state
=
State
(
self
.
visitStatename
(
ctx
.
statename
(
0
)))
self
.
states
.
add
(
state
)
dest_states
=
set
()
i
=
0
while
ctx
.
stateset
().
statename
(
i
)
is
not
None
:
dest_state
=
State
(
self
.
visitStatename
(
ctx
.
stateset
().
statename
(
i
)))
for
stctx
in
ctx
.
stateset
().
statename
():
dest_state
=
State
(
self
.
visitStatename
(
stctx
))
self
.
states
.
add
(
dest_state
)
dest_states
.
add
(
dest_state
)
i
+=
1
if
ctx
.
EPSILON
():
if
(
state
,
Eps
())
in
self
.
transition
:
...
...
@@ -340,30 +314,17 @@ class NFABuilder(NFAListener, StateVisitor):
if
self
.
first_state
is
None
:
self
.
first_state
=
state
def
exitFinal
(
self
,
ctx
)
->
None
:
i
=
0
while
ctx
.
stateset
().
statename
(
i
)
is
not
None
:
state
=
State
(
self
.
visitStatename
(
ctx
.
stateset
().
statename
(
i
)))
self
.
states
.
add
(
state
)
self
.
final
.
add
(
state
)
i
+=
1
# for future support of comments
def
exitComment
(
self
,
ctx
)
->
None
:
return
None
class
RegExBuilder
(
RegExVisitor
):
anyvalue_attributes
=
_anyvalue_attributes
(
RegExParser
)
def
__init__
(
self
):
def
__init__
(
self
)
->
None
:
self
.
characters
:
Set
[
Character
]
=
set
()
self
.
expression
:
AST
def
visitStart
(
self
,
ctx
)
:
def
visitStart
(
self
,
ctx
:
Any
)
->
None
:
self
.
expression
=
self
.
visitExpr
(
ctx
.
expr
())
def
visitExpr
(
self
,
ctx
)
:
def
visitExpr
(
self
,
ctx
:
Any
)
->
None
:
# Binary operation: union or explicit concatenation
if
ctx
.
UNION
()
or
ctx
.
CONCAT
():
op
=
Bin
.
Union
if
ctx
.
UNION
()
is
not
None
else
Bin
.
Concat
...
...
@@ -376,7 +337,7 @@ class RegExBuilder(RegExVisitor):
self
.
visitConcatenable
(
x
),
ctx
.
concatenated
()))
return
self
.
implicit_concat
(
expressions
)
def
visitConcatenable
(
self
,
ctx
)
:
def
visitConcatenable
(
self
,
ctx
:
Any
)
->
None
:
if
ctx
.
symbol
():
return
self
.
visitSymbol
(
ctx
.
symbol
())
...
...
@@ -390,7 +351,7 @@ class RegExBuilder(RegExVisitor):
elif
ctx
.
parentheses
():
return
self
.
visitParentheses
(
ctx
.
parentheses
())
def
visitSymbol
(
self
,
ctx
)
:
def
visitSymbol
(
self
,
ctx
:
Any
)
->
None
:
if
ctx
.
ALPHABET
():
self
.
characters
.
add
(
Character
(
str
(
ctx
.
ALPHABET
())))
return
CharNode
(
Character
(
str
(
ctx
.
ALPHABET
())))
...
...
@@ -402,22 +363,14 @@ class RegExBuilder(RegExVisitor):
return
CharNode
(
Emptyset
())
elif
ctx
.
QUOTE
():
name
=
""
i
=
0
while
ctx
.
anyvalue
(
i
)
is
not
None
:
for
attribute
in
self
.
anyvalue_attributes
:
value
=
getattr
(
ctx
.
anyvalue
(
i
),
attribute
)()
if
value
is
not
None
:
name
+=
str
(
value
)
i
+=
1
self
.
characters
.
add
(
Character
(
name
))
return
CharNode
(
Character
(
name
))
def
visitParentheses
(
self
,
ctx
):
char
=
Character
(
_unquote
(
self
.
getText
()))
self
.
characters
.
add
(
char
)
return
CharNode
(
char
)
def
visitParentheses
(
self
,
ctx
:
Any
)
->
None
:
return
self
.
visitExpr
(
ctx
.
expr
())
def
visitIterable
(
self
,
ctx
,
positive
=
False
)
: