Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
fja
eval
Commits
1feddd15
Commit
1feddd15
authored
Jun 08, 2021
by
Vladimír Štill
Browse files
lib: Drop .parser for the parser imports
parent
fa0c432b
Changes
3
Hide whitespace changes
Inline
Side-by-side
lib/checker.py
View file @
1feddd15
from
typing
import
Tuple
,
Union
from
lib
import
reg
from
lib.parsing
.parser
import
Parser
,
ParsingError
from
lib.parsing
import
Parser
,
ParsingError
# support functions common for both fja_checker and web_checker
...
...
lib/parsing/__init__.py
View file @
1feddd15
from
typing
import
List
,
Dict
,
Tuple
,
Optional
,
Union
,
Set
,
TypeVar
from
copy
import
deepcopy
from
lib.common
import
State
,
Character
,
Eps
,
Terminal
,
Nonterminal
,
Emptyset
from
lib.reg
import
DFA
,
NFA
,
RegGrammar
from
lib.grammars_cfg
import
CFG
from
lib.regex
import
RegEx
,
AST
,
Bin
,
Iter
,
BinOp
,
IterOp
,
CharNode
import
antlr4
# type: ignore
from
antlr4.error.ErrorListener
import
ErrorListener
from
lib.parsing.DFALexer
import
DFALexer
from
lib.parsing.DFAParser
import
DFAParser
from
lib.parsing.DFAListener
import
DFAListener
from
lib.parsing.NFALexer
import
NFALexer
from
lib.parsing.NFAParser
import
NFAParser
from
lib.parsing.NFAListener
import
NFAListener
from
lib.parsing.RegExLexer
import
RegExLexer
from
lib.parsing.RegExParser
import
RegExParser
from
lib.parsing.RegExVisitor
import
RegExVisitor
from
lib.parsing.CFGLexer
import
CFGLexer
from
lib.parsing.CFGParser
import
CFGParser
from
lib.parsing.CFGListener
import
CFGListener
class
ParsingError
(
Exception
):
def
__init__
(
self
,
args
):
self
.
args
=
args
# This is needed because antlr is too smart and parse at least something possible
# even when input formalism and given type don't match. This way it aborts on any parsing problem.
class
ErrorShouter
(
ErrorListener
):
def
syntaxError
(
self
,
recognizer
,
offendingSymbol
,
line
,
column
,
msg
,
e
):
raise
Exception
(
"ERROR: when parsing line %d column %d: %s
\n
"
%
\
(
line
,
column
,
msg
))
def
anyvalue_attributes
(
parser
:
Union
[
DFAParser
,
NFAParser
,
RegExParser
,
CFGParser
])
->
List
:
return
[
func
for
func
in
dir
(
parser
.
AnyvalueContext
)
if
callable
(
getattr
(
parser
.
AnyvalueContext
,
func
))
and
not
func
.
startswith
(
"__"
)
and
func
.
isupper
()]
class
Parser
:
def
__init__
(
self
):
pass
def
names_to_str
(
self
,
collection
:
Union
[
Set
[
State
],
Set
[
Character
],
Set
[
Terminal
],
Set
[
Nonterminal
]])
->
str
:
return
"{"
+
','
.
join
(
set
(
map
(
lambda
x
:
x
.
name
,
collection
)))
+
"}"
def
reggrammar_to_str
(
self
,
reg
:
RegGrammar
,
full
:
bool
=
False
)
->
str
:
nonterminals
=
deepcopy
(
reg
.
nonterminals
).
difference
({
reg
.
init
})
nonterminals
=
[
reg
.
init
]
+
list
(
nonterminals
)
rules
=
self
.
rules_to_str
(
reg
.
rules
,
nonterminals
)
if
not
full
:
return
rules
# full - verbose description of DFA - only for development, dismiss later
nonterminals_names
=
self
.
names_to_str
(
reg
.
nonterminals
)
terminals
=
self
.
names_to_str
(
reg
.
terminals
)
return
f
"Grammar: (
{
nonterminals_names
}
,
{
terminals
}
, P,
{
reg
.
init
.
name
}
)
\n
{
self
.
rules_to_str
(
reg
.
rules
,
nonterminals
)
}
"
def
cfg_to_str
(
self
,
gra
:
CFG
,
full
:
bool
=
False
)
->
str
:
nonterminals
=
deepcopy
(
gra
.
nonterminals
).
difference
({
gra
.
init
})
nonterminals
=
[
gra
.
init
]
+
list
(
nonterminals
)
rules
=
self
.
rules_to_str
(
gra
.
rules
,
nonterminals
)
if
not
full
:
return
rules
# full - verbose description of DFA - only for development, dismiss later
nonterminals_names
=
self
.
names_to_str
(
gra
.
nonterminals
)
terminals
=
self
.
names_to_str
(
gra
.
terminals
)
return
f
"Grammar: (
{
nonterminals_names
}
,
{
terminals
}
, P,
{
gra
.
init
.
name
}
)
\n
{
self
.
rules_to_str
(
gra
.
rules
,
nonterminals
)
}
"
def
rules_to_str
(
self
,
rules
:
Union
[
CFG
.
Rules
,
RegGrammar
.
Rules
],
nonterminals
:
List
[
Nonterminal
])
->
str
:
out
=
""
for
nonterminal
in
nonterminals
:
if
nonterminal
not
in
rules
:
continue
rewritten
=
' | '
.
join
(
set
(
map
(
lambda
x
:
self
.
rewrite_variant
(
x
),
rules
[
nonterminal
])))
out
+=
f
"
{
nonterminal
.
name
}
->
{
rewritten
}
\n
"
return
out
[:
-
1
]
def
rewrite_variant
(
self
,
variant
:
Union
[
Eps
,
Terminal
,
Tuple
[
Union
[
Terminal
,
Nonterminal
],
...]])
->
str
:
if
isinstance
(
variant
,
Tuple
):
return
''
.
join
(
map
(
lambda
x
:
x
.
name
,
variant
))
return
variant
.
name
def
dfa_to_str
(
self
,
dfa
:
DFA
,
full
:
bool
=
False
)
->
str
:
transition
=
""
for
key
,
dest_state
in
dfa
.
transition
.
items
():
state_1
,
character
=
key
transition
+=
f
"(
{
state_1
.
name
}
,
{
character
.
name
}
)=
{
dest_state
.
name
}
"
init
=
f
"init=
{
dfa
.
init
.
name
}
"
final
=
f
"final=
{
self
.
names_to_str
(
dfa
.
final
)
}
"
# full - verbose description of DFA - only for development, dismiss later
if
full
:
return
f
"DFA = (
{
self
.
names_to_str
(
dfa
.
states
)
}
,
{
self
.
names_to_str
(
dfa
.
characters
)
}
, "
\
f
"d,
{
init
}
,
{
final
}
)
\n
{
transition
}
"
return
f
"
{
init
}
{
transition
}
{
final
}
"
def
nfa_to_str
(
self
,
nfa
:
NFA
,
full
:
bool
=
False
)
->
str
:
transition
=
""
for
key
,
set_states
in
nfa
.
transition
.
items
():
state
,
character
=
key
dest_states
=
nfa
.
transition
[
state
,
character
]
transition
+=
f
"(
{
state
.
name
}
,
{
character
.
name
}
)=
{
self
.
names_to_str
(
dest_states
)
}
"
init
=
f
"init=
{
nfa
.
init
.
name
}
"
final
=
f
"final=
{
self
.
names_to_str
(
nfa
.
final
)
}
"
if
full
:
return
f
"NFA = (
{
self
.
names_to_str
(
nfa
.
states
)
}
,
{
self
.
names_to_str
(
nfa
.
characters
)
}
, "
\
f
"d,
{
init
}
,
{
final
}
)
\n
{
transition
}
"
return
f
"
{
init
}
{
transition
}
{
final
}
"
def
regex_to_str
(
self
,
reg
:
RegEx
)
->
str
:
return
reg
.
expression
.
astprint
()
def
common_parse
(
self
,
string
:
str
,
given_lexer
,
given_parser
,
given_builder
):
error_listener
=
ErrorShouter
()
chars
=
antlr4
.
InputStream
(
string
)
lexer
=
given_lexer
(
chars
)
lexer
.
addErrorListener
(
error_listener
)
tokens
=
antlr4
.
CommonTokenStream
(
lexer
)
parser
=
given_parser
(
tokens
)
parser
.
addErrorListener
(
error_listener
)
tree
=
parser
.
start
()
builder
=
given_builder
()
walker
=
antlr4
.
ParseTreeWalker
()
walker
.
walk
(
builder
,
tree
)
return
builder
def
str_to_cfg
(
self
,
string
:
str
)
->
CFG
:
try
:
builder
=
self
.
common_parse
(
string
,
CFGLexer
,
CFGParser
,
CFGBuilder
)
return
CFG
(
builder
.
nonterminals
,
builder
.
terminals
,
builder
.
rules
,
builder
.
init
)
except
Exception
as
e
:
raise
ParsingError
(
e
.
args
)
def
str_to_reggrammar
(
self
,
string
:
str
)
->
RegGrammar
:
try
:
cfg
=
self
.
str_to_cfg
(
string
)
return
RegGrammar
.
from_cfg
(
cfg
)
except
Exception
as
e
:
raise
ParsingError
(
e
.
args
)
def
str_to_dfa
(
self
,
string
:
str
)
->
DFA
:
try
:
builder
=
self
.
common_parse
(
string
,
DFALexer
,
DFAParser
,
DFABuilder
)
if
builder
.
init
is
None
:
builder
.
init
=
builder
.
first_state
if
builder
.
init
is
None
:
raise
ParsingError
(
"Automat musí obsahovat alespoň jeden stav."
)
dfa
=
DFA
(
builder
.
states
,
builder
.
characters
,
builder
.
transition
,
builder
.
init
,
builder
.
final
)
return
dfa
except
Exception
as
e
:
raise
ParsingError
(
e
.
args
)
def
str_to_nfa
(
self
,
string
:
str
)
->
NFA
:
try
:
builder
=
self
.
common_parse
(
string
,
NFALexer
,
NFAParser
,
NFABuilder
)
if
builder
.
init
is
None
:
builder
.
init
=
builder
.
first_state
if
builder
.
init
is
None
:
raise
ParsingError
(
"Automat musí obsahovat alespoň jeden stav."
)
return
NFA
(
builder
.
states
,
builder
.
characters
,
builder
.
transition
,
builder
.
init
,
builder
.
final
)
except
Exception
as
e
:
raise
ParsingError
(
e
.
args
)
def
str_to_regex
(
self
,
string
:
str
)
->
RegEx
:
try
:
error_listener
=
ErrorShouter
()
chars
=
antlr4
.
InputStream
(
string
)
lexer
=
RegExLexer
(
chars
)
lexer
.
addErrorListener
(
error_listener
)
tokens
=
antlr4
.
CommonTokenStream
(
lexer
)
parser
=
RegExParser
(
tokens
)
parser
.
addErrorListener
(
error_listener
)
tree
=
parser
.
start
()
ast
=
RegExBuilder
()
ast
.
visitStart
(
tree
)
return
RegEx
(
ast
.
characters
,
ast
.
expression
)
except
Exception
as
e
:
raise
ParsingError
(
e
.
args
)
class
DFABuilder
(
DFAListener
):
# anyvalue possibilities
anyvalue_attributes
=
anyvalue_attributes
(
DFAParser
)
def
__init__
(
self
):
self
.
states
=
set
()
self
.
characters
=
set
()
self
.
transition
=
{}
self
.
init
=
None
self
.
first_state
=
None
self
.
final
=
set
()
def
exitInit
(
self
,
ctx
):
if
ctx
.
statename
()
is
not
None
:
state
=
State
(
self
.
visitStatename
(
ctx
.
statename
()))
self
.
init
=
state
self
.
states
.
add
(
state
)
def
visitStatename
(
self
,
ctx
)
->
str
:
if
ctx
.
STATE
():
return
str
(
ctx
.
STATE
())
elif
ctx
.
QUOTE
():
name
=
""
i
=
0
while
ctx
.
anyvalue
(
i
)
is
not
None
:
for
attribute
in
self
.
anyvalue_attributes
:
value
=
getattr
(
ctx
.
anyvalue
(
i
),
attribute
)()
if
value
is
not
None
:
name
+=
str
(
value
)
i
+=
1
return
name
def
exitProduction
(
self
,
ctx
):
state
=
State
(
self
.
visitStatename
(
ctx
.
statename
(
0
)))
character
=
Character
(
self
.
visitStatename
(
ctx
.
statename
(
1
)))
dest_state
=
State
(
self
.
visitStatename
(
ctx
.
statename
(
2
)))
self
.
states
.
add
(
state
)
self
.
states
.
add
(
dest_state
)
self
.
characters
.
add
(
character
)
if
(
state
,
character
)
in
self
.
transition
:
print
(
f
"Upozornění: v textovém zápisu se objevilo více přechodů pro stejnou dvojici (
{
state
.
name
}
,
{
character
.
name
}
)."
)
self
.
transition
[
state
,
character
]
=
dest_state
if
self
.
first_state
is
None
:
self
.
first_state
=
state
def
exitFinal
(
self
,
ctx
):
i
=
0
while
ctx
.
statename
(
i
)
is
not
None
:
state
=
State
(
self
.
visitStatename
(
ctx
.
statename
(
i
)))
self
.
states
.
add
(
state
)
self
.
final
.
add
(
state
)
i
+=
1
# for future support of comments
def
exitComment
(
self
,
ctx
):
return
None
class
NFABuilder
(
NFAListener
):
anyvalue_attributes
=
anyvalue_attributes
(
NFAParser
)
def
__init__
(
self
):
self
.
states
=
set
()
self
.
characters
=
set
()
self
.
transition
=
{}
self
.
init
=
None
self
.
first_state
=
None
self
.
final
=
set
()
self
.
efa
=
False
def
exitInit
(
self
,
ctx
):
if
ctx
.
statename
()
is
not
None
:
state
=
State
(
self
.
visitStatename
(
ctx
.
statename
()))
self
.
init
=
state
self
.
states
.
add
(
state
)
def
visitStatename
(
self
,
ctx
)
->
str
:
if
ctx
.
STATE
():
return
str
(
ctx
.
STATE
())
elif
ctx
.
QUOTE
():
name
=
""
i
=
0
while
ctx
.
anyvalue
(
i
)
is
not
None
:
for
attribute
in
self
.
anyvalue_attributes
:
value
=
getattr
(
ctx
.
anyvalue
(
i
),
attribute
)()
if
value
is
not
None
:
name
+=
str
(
value
)
i
+=
1
return
name
def
exitProduction
(
self
,
ctx
):
state
=
State
(
self
.
visitStatename
(
ctx
.
statename
(
0
)))
self
.
states
.
add
(
state
)
dest_states
=
set
()
i
=
0
while
ctx
.
stateset
().
statename
(
i
)
is
not
None
:
dest_state
=
State
(
self
.
visitStatename
(
ctx
.
stateset
().
statename
(
i
)))
self
.
states
.
add
(
dest_state
)
dest_states
.
add
(
dest_state
)
i
+=
1
if
ctx
.
EPSILON
():
if
(
state
,
Eps
())
in
self
.
transition
:
print
(
f
"Upozornění: v textovém zápisu se objevilo více přechodů pro stejnou dvojici (
{
state
.
name
}
, ε)."
)
self
.
transition
[
state
,
Eps
()]
=
dest_states
self
.
efa
=
True
else
:
character
=
Character
(
self
.
visitStatename
(
ctx
.
statename
(
1
)))
self
.
characters
.
add
(
character
)
if
(
state
,
character
)
in
self
.
transition
:
print
(
f
"Upozornění: v textovém zápisu se objevilo více přechodů pro stejnou dvojici (
{
state
.
name
}
,
{
character
.
name
}
)."
)
self
.
transition
[
state
,
character
]
=
dest_states
if
self
.
first_state
is
None
:
self
.
first_state
=
state
def
exitFinal
(
self
,
ctx
)
->
None
:
i
=
0
while
ctx
.
stateset
().
statename
(
i
)
is
not
None
:
state
=
State
(
self
.
visitStatename
(
ctx
.
stateset
().
statename
(
i
)))
self
.
states
.
add
(
state
)
self
.
final
.
add
(
state
)
i
+=
1
# for future support of comments
def
exitComment
(
self
,
ctx
)
->
None
:
return
None
class
RegExBuilder
(
RegExVisitor
):
anyvalue_attributes
=
anyvalue_attributes
(
RegExParser
)
def
__init__
(
self
):
self
.
characters
:
Set
[
Character
]
=
set
()
self
.
expression
:
AST
def
visitStart
(
self
,
ctx
):
self
.
expression
=
self
.
visitExpr
(
ctx
.
expr
())
def
visitExpr
(
self
,
ctx
):
# Binary operation: union or explicit concatenation
if
ctx
.
UNION
()
or
ctx
.
CONCAT
():
op
=
Bin
.
Union
if
ctx
.
UNION
()
is
not
None
else
Bin
.
Concat
return
BinOp
(
self
.
visitExpr
(
ctx
.
expr
(
0
)),
op
,
self
.
visitExpr
(
ctx
.
expr
(
1
)))
# Implicit concatenation of (iterated) symbols or expressions in parentheses
expressions
=
list
(
map
(
lambda
x
:
self
.
visitConcatenable
(
x
),
ctx
.
concatenated
()))
return
self
.
implicit_concat
(
expressions
)
def
visitConcatenable
(
self
,
ctx
):
if
ctx
.
symbol
():
return
self
.
visitSymbol
(
ctx
.
symbol
())
elif
ctx
.
iterable
():
if
ctx
.
ITER
():
return
self
.
visitIterable
(
ctx
.
iterable
())
elif
ctx
.
POS_ITER
():
return
self
.
visitIterable
(
ctx
.
iterable
(),
True
)
elif
ctx
.
parentheses
():
return
self
.
visitParentheses
(
ctx
.
parentheses
())
def
visitSymbol
(
self
,
ctx
):
if
ctx
.
ALPHABET
():
self
.
characters
.
add
(
Character
(
str
(
ctx
.
ALPHABET
())))
return
CharNode
(
Character
(
str
(
ctx
.
ALPHABET
())))
elif
ctx
.
EPSILON
():
return
CharNode
(
Eps
())
elif
ctx
.
EMPTYSET
():
return
CharNode
(
Emptyset
())
elif
ctx
.
QUOTE
():
name
=
""
i
=
0
while
ctx
.
anyvalue
(
i
)
is
not
None
:
for
attribute
in
self
.
anyvalue_attributes
:
value
=
getattr
(
ctx
.
anyvalue
(
i
),
attribute
)()
if
value
is
not
None
:
name
+=
str
(
value
)
i
+=
1
self
.
characters
.
add
(
Character
(
name
))
return
CharNode
(
Character
(
name
))
def
visitParentheses
(
self
,
ctx
):
return
self
.
visitExpr
(
ctx
.
expr
())
def
visitIterable
(
self
,
ctx
,
positive
=
False
):
if
ctx
.
symbol
():
expression
=
self
.
visitSymbol
(
ctx
.
symbol
())
elif
ctx
.
parentheses
():
expression
=
self
.
visitParentheses
(
ctx
.
parentheses
())
return
IterOp
(
expression
,
Iter
.
Positive
)
if
positive
else
IterOp
(
expression
,
Iter
.
Iteration
)
def
implicit_concat
(
self
,
to_concat
):
ast
=
to_concat
[
0
]
if
len
(
to_concat
)
>
1
:
for
expression
in
to_concat
[
1
:]:
ast
=
BinOp
(
ast
,
Bin
.
Concat
,
expression
)
return
ast
# for future support of comments
def
exitComment
(
self
,
ctx
)
->
None
:
return
None
class
CFGBuilder
(
CFGListener
):
anyvalue_attributes
=
anyvalue_attributes
(
CFGParser
)
def
__init__
(
self
):
self
.
terminals
=
set
()
self
.
nonterminals
=
set
()
self
.
rules
=
dict
()
self
.
init
=
None
def
visitSymbol
(
self
,
ctx
):
if
ctx
.
TERMINAL
():
return
str
(
ctx
.
TERMINAL
())
elif
ctx
.
CAPS
():
return
str
(
ctx
.
CAPS
())
else
:
return
'_'
def
visitNonterminal
(
self
,
ctx
):
if
ctx
.
CAPS
():
name
=
str
(
ctx
.
CAPS
())
elif
ctx
.
LEFT_ANGLE
():
name
=
'<'
+
''
.
join
(
map
(
lambda
x
:
self
.
visitSymbol
(
x
),
ctx
.
symbol
()))
+
'>'
if
ctx
.
APOSTROPHE
():
name
=
name
+
len
(
ctx
.
APOSTROPHE
())
*
"'"
elif
ctx
.
APOSTROPHE
():
name
=
self
.
visitSymbol
(
ctx
.
symbol
(
0
))
+
len
(
ctx
.
APOSTROPHE
())
*
"'"
nonterminal
=
Nonterminal
(
name
)
self
.
nonterminals
.
add
(
nonterminal
)
return
nonterminal
def
visitRewrite
(
self
,
ctx
):
i
=
0
sequence
=
[]
while
ctx
.
term_or_nonterm
(
i
)
is
not
None
:
if
ctx
.
term_or_nonterm
(
i
).
terminal
():
term_ctx
=
ctx
.
term_or_nonterm
(
i
).
terminal
()
if
term_ctx
.
TERMINAL
():
name
=
str
(
term_ctx
.
TERMINAL
())
elif
term_ctx
.
QUOTE
():
name
=
""
j
=
0
while
term_ctx
.
anyvalue
(
j
)
is
not
None
:
for
attribute
in
self
.
anyvalue_attributes
:
value
=
getattr
(
term_ctx
.
anyvalue
(
j
),
attribute
)()
if
value
is
not
None
:
name
+=
str
(
value
)
j
+=
1
name
=
name
terminal
=
Terminal
(
name
)
self
.
terminals
.
add
(
terminal
)
sequence
.
append
(
terminal
)
else
:
sequence
.
append
(
self
.
visitNonterminal
(
ctx
.
term_or_nonterm
(
i
).
nonterminal
()))
i
+=
1
return
sequence
def
exitOnerule
(
self
,
ctx
):
nonterminal
=
self
.
visitNonterminal
(
ctx
.
nonterminal
())
self
.
nonterminals
.
add
(
nonterminal
)
if
self
.
init
==
None
:
self
.
init
=
nonterminal
# multiple lines for one nonterminal are possible this way
if
nonterminal
not
in
self
.
rules
:
self
.
rules
[
nonterminal
]
=
set
()
i
=
0
while
ctx
.
rewrite
(
i
)
is
not
None
:
if
ctx
.
rewrite
(
i
).
EPSILON
():
self
.
rules
[
nonterminal
].
add
(
Eps
())
if
ctx
.
rewrite
(
i
).
term_or_nonterm
():
sequence
=
self
.
visitRewrite
(
ctx
.
rewrite
(
i
))
self
.
rules
[
nonterminal
].
add
(
tuple
(
sequence
))
i
+=
1
# for future support of comments
def
exitComment
(
self
,
ctx
)
->
None
:
return
None
lib/parsing/parser.py
deleted
100644 → 0
View file @
fa0c432b
from
typing
import
List
,
Dict
,
Tuple
,
Optional
,
Union
,
Set
,
TypeVar
from
copy
import
deepcopy
from
lib.common
import
State
,
Character
,
Eps
,
Terminal
,
Nonterminal
,
Emptyset
from
lib.reg
import
DFA
,
NFA
,
RegGrammar
from
lib.grammars_cfg
import
CFG
from
lib.regex
import
RegEx
,
AST
,
Bin
,
Iter
,
BinOp
,
IterOp
,
CharNode
import
antlr4
# type: ignore
from
antlr4.error.ErrorListener
import
ErrorListener
from
lib.parsing.DFALexer
import
DFALexer
from
lib.parsing.DFAParser
import
DFAParser
from
lib.parsing.DFAListener
import
DFAListener
from
lib.parsing.NFALexer
import
NFALexer
from
lib.parsing.NFAParser
import
NFAParser
from
lib.parsing.NFAListener
import
NFAListener
from
lib.parsing.RegExLexer
import
RegExLexer
from
lib.parsing.RegExParser
import
RegExParser
from
lib.parsing.RegExVisitor
import
RegExVisitor
from
lib.parsing.CFGLexer
import
CFGLexer
from
lib.parsing.CFGParser
import
CFGParser
from
lib.parsing.CFGListener
import
CFGListener
class
ParsingError
(
Exception
):
def
__init__
(
self
,
args
):
self
.
args
=
args
# This is needed because antlr is too smart and parse at least something possible
# even when input formalism and given type don't match. This way it aborts on any parsing problem.
class
ErrorShouter
(
ErrorListener
):
def
syntaxError
(
self
,
recognizer
,
offendingSymbol
,
line
,
column
,
msg
,
e
):
raise
Exception
(
"ERROR: when parsing line %d column %d: %s
\n
"
%
\
(
line
,
column
,
msg
))
def
anyvalue_attributes
(
parser
:
Union
[
DFAParser
,
NFAParser
,
RegExParser
,
CFGParser
])
->
List
:
return
[
func
for
func
in
dir
(
parser
.
AnyvalueContext
)
if
callable
(
getattr
(
parser
.
AnyvalueContext
,
func
))
and
not
func
.
startswith
(
"__"
)
and
func
.
isupper
()]
class
Parser
:
def
__init__
(
self
):
pass
def
names_to_str
(
self
,
collection
:
Union
[
Set
[
State
],
Set
[
Character
],
Set
[
Terminal
],
Set
[
Nonterminal
]])
->
str
:
return
"{"
+
','
.
join
(
set
(
map
(
lambda
x
:
x
.
name
,
collection
)))
+
"}"
def
reggrammar_to_str
(
self
,
reg
:
RegGrammar
,
full
:
bool
=
False
)
->
str
:
nonterminals
=
deepcopy
(
reg
.
nonterminals
).
difference
({
reg
.
init
})
nonterminals
=
[
reg
.
init
]
+
list
(
nonterminals
)
rules
=
self
.
rules_to_str
(
reg
.
rules
,
nonterminals
)
if
not
full
:
return
rules
# full - verbose description of DFA - only for development, dismiss later
nonterminals_names
=
self
.
names_to_str
(
reg
.
nonterminals
)
terminals
=
self
.
names_to_str
(
reg
.
terminals
)
return
f
"Grammar: (
{
nonterminals_names
}
,
{
terminals
}
, P,
{
reg
.
init
.
name
}
)
\n
{
self
.
rules_to_str
(
reg
.
rules
,
nonterminals
)
}
"
def
cfg_to_str
(
self
,
gra
:
CFG
,
full
:
bool
=
False
)
->
str
:
nonterminals
=
deepcopy
(
gra
.
nonterminals
).
difference
({
gra
.
init
})
nonterminals
=
[
gra
.
init
]
+
list
(
nonterminals
)
rules
=
self
.
rules_to_str
(
gra
.
rules
,
nonterminals
)
if
not
full
:
return
rules
# full - verbose description of DFA - only for development, dismiss later
nonterminals_names
=
self
.
names_to_str
(
gra
.
nonterminals
)
terminals
=
self
.
names_to_str
(
gra
.
terminals
)
return
f
"Grammar: (
{
nonterminals_names
}
,
{
terminals
}
, P,
{
gra
.
init
.
name
}
)
\n
{
self
.
rules_to_str
(
gra
.
rules
,
nonterminals
)
}
"
def
rules_to_str
(
self
,
rules
:
Union
[
CFG
.
Rules
,
RegGrammar
.
Rules
],
nonterminals
:
List
[
Nonterminal
])
->
str
:
out
=
""
for
nonterminal
in
nonterminals
:
if
nonterminal
not
in
rules
:
continue
rewritten
=
' | '
.
join
(
set
(
map
(
lambda
x
:
self
.
rewrite_variant
(
x
),
rules
[
nonterminal
])))
out
+=
f
"
{
nonterminal
.
name
}
->
{
rewritten
}
\n
"
return
out
[:
-
1
]
def
rewrite_variant
(
self
,
variant
:
Union
[
Eps
,
Terminal
,
Tuple
[
Union
[
Terminal
,
Nonterminal
],
...]])
->
str
:
if
isinstance
(
variant
,
Tuple
):
return
''
.
join
(
map
(
lambda
x
:
x
.
name
,
variant
))
return
variant
.
name