Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
fja
eval
Commits
61decabf
Commit
61decabf
authored
Jun 09, 2021
by
Vladimír Štill
Browse files
lib: Polishing parser
parent
44bd0954
Pipeline
#95125
passed with stage
in 1 minute and 25 seconds
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
lib/parser/__init__.py
View file @
61decabf
from
typing
import
List
,
Dict
,
Tuple
,
Optional
,
Union
,
Set
,
TypeVar
from
typing
import
List
,
Tuple
,
Union
,
Set
from
copy
import
deepcopy
from
lib.common
import
State
,
Character
,
Eps
,
Terminal
,
Nonterminal
,
Emptyset
from
lib.reg
import
DFA
,
NFA
,
RegGrammar
from
lib.grammars_cfg
import
CFG
from
lib.regex
import
RegEx
,
AST
,
Bin
,
Iter
,
BinOp
,
IterOp
,
CharNode
import
antlr4
# type: ignore
import
antlr4
# type: ignore
from
antlr4.error.ErrorListener
import
ErrorListener
from
lib.parser.DFALexer
import
DFALexer
from
lib.parser.DFAParser
import
DFAParser
...
...
@@ -19,20 +19,23 @@ from lib.parser.CFGLexer import CFGLexer
from
lib.parser.CFGParser
import
CFGParser
from
lib.parser.CFGListener
import
CFGListener
class
ParsingError
(
Exception
):
def
__init__
(
self
,
args
):
self
.
args
=
args
# This is needed because antlr is too smart and parse at least something possible
# even when input formalism and given type don't match. This way it aborts on any parsing problem.
# This is needed because antlr is too smart and parse at least something
# possible even when input formalism and given type don't match. This way it
# aborts on any parsing problem.
class
ErrorShouter
(
ErrorListener
):
def
syntaxError
(
self
,
recognizer
,
offendingSymbol
,
line
,
column
,
msg
,
e
):
raise
Exception
(
"ERROR: when parsing line %d column %d: %s
\n
"
%
\
(
line
,
column
,
msg
)
)
raise
Exception
(
f
"ERROR: when parsing line
{
line
}
column
{
column
}
:
{
msg
}
"
)
def
_anyvalue_attributes
(
parser
:
Union
[
DFAParser
,
NFAParser
,
RegExParser
,
CFGParser
])
->
List
:
def
_anyvalue_attributes
(
parser
:
Union
[
DFAParser
,
NFAParser
,
RegExParser
,
CFGParser
])
->
List
:
return
[
func
for
func
in
dir
(
parser
.
AnyvalueContext
)
if
callable
(
getattr
(
parser
.
AnyvalueContext
,
func
))
and
not
func
.
startswith
(
"__"
)
and
func
.
isupper
()]
...
...
@@ -49,7 +52,9 @@ def _rules_to_str(rules: Union[CFG.Rules, RegGrammar.Rules],
for
nonterminal
in
nonterminals
:
if
nonterminal
not
in
rules
:
continue
rewritten
=
' | '
.
join
(
set
(
map
(
lambda
x
:
_rewrite_variant
(
x
),
rules
[
nonterminal
])))
rewritten
=
' | '
.
join
(
set
(
map
(
lambda
x
:
_rewrite_variant
(
x
),
rules
[
nonterminal
])))
out
+=
f
"
{
nonterminal
.
name
}
->
{
rewritten
}
\n
"
return
out
[:
-
1
]
...
...
@@ -60,7 +65,8 @@ def _rewrite_variant(variant: Union[Eps, Terminal,
return
''
.
join
(
map
(
lambda
x
:
x
.
name
,
variant
))
return
variant
.
name
def
dfa_to_str
(
dfa
:
DFA
,
full
:
bool
=
False
)
->
str
:
def
dfa_to_str
(
dfa
:
DFA
,
full
:
bool
=
False
)
->
str
:
transition
=
""
for
key
,
dest_state
in
dfa
.
transition
.
items
():
state_1
,
character
=
key
...
...
@@ -71,7 +77,8 @@ def dfa_to_str(dfa: DFA, full : bool = False) -> str:
# full - verbose description of DFA - only for development, dismiss later
if
full
:
return
f
"DFA = (
{
_names_to_str
(
dfa
.
states
)
}
,
{
_names_to_str
(
dfa
.
characters
)
}
, "
\
return
f
"DFA = (
{
_names_to_str
(
dfa
.
states
)
}
, "
\
f
"
{
_names_to_str
(
dfa
.
characters
)
}
, "
\
f
"d,
{
init
}
,
{
final
}
)
\n
{
transition
}
"
return
f
"
{
init
}
{
transition
}
{
final
}
"
...
...
@@ -86,7 +93,9 @@ def reggrammar_to_str(reg: RegGrammar, full: bool = False) -> str:
# full - verbose description of DFA - only for development, dismiss later
nonterminals_names
=
_names_to_str
(
reg
.
nonterminals
)
terminals
=
_names_to_str
(
reg
.
terminals
)
return
f
"Grammar: (
{
nonterminals_names
}
,
{
terminals
}
, P,
{
reg
.
init
.
name
}
)
\n
{
_rules_to_str
(
reg
.
rules
,
nonterminals
)
}
"
return
f
"Grammar: (
{
nonterminals_names
}
,
{
terminals
}
, P, "
\
f
"
{
reg
.
init
.
name
}
)
\n
{
_rules_to_str
(
reg
.
rules
,
nonterminals
)
}
"
def
cfg_to_str
(
gra
:
CFG
,
full
:
bool
=
False
)
->
str
:
nonterminals
=
deepcopy
(
gra
.
nonterminals
).
difference
({
gra
.
init
})
...
...
@@ -98,26 +107,32 @@ def cfg_to_str(gra: CFG, full: bool = False) -> str:
# full - verbose description of DFA - only for development, dismiss later
nonterminals_names
=
_names_to_str
(
gra
.
nonterminals
)
terminals
=
_names_to_str
(
gra
.
terminals
)
return
f
"Grammar: (
{
nonterminals_names
}
,
{
terminals
}
, P,
{
gra
.
init
.
name
}
)
\n
{
_rules_to_str
(
gra
.
rules
,
nonterminals
)
}
"
return
f
"Grammar: (
{
nonterminals_names
}
,
{
terminals
}
, P, "
\
f
"
{
gra
.
init
.
name
}
)
\n
{
_rules_to_str
(
gra
.
rules
,
nonterminals
)
}
"
def
nfa_to_str
(
nfa
:
NFA
,
full
:
bool
=
False
)
->
str
:
def
nfa_to_str
(
nfa
:
NFA
,
full
:
bool
=
False
)
->
str
:
transition
=
""
for
key
,
set_states
in
nfa
.
transition
.
items
():
state
,
character
=
key
dest_states
=
nfa
.
transition
[
state
,
character
]
transition
+=
f
"(
{
state
.
name
}
,
{
character
.
name
}
)=
{
_names_to_str
(
dest_states
)
}
"
transition
+=
f
"(
{
state
.
name
}
,
{
character
.
name
}
)="
\
f
"
{
_names_to_str
(
dest_states
)
}
"
init
=
f
"init=
{
nfa
.
init
.
name
}
"
final
=
f
"final=
{
_names_to_str
(
nfa
.
final
)
}
"
if
full
:
return
f
"NFA = (
{
_names_to_str
(
nfa
.
states
)
}
,
{
_names_to_str
(
nfa
.
characters
)
}
, "
\
return
f
"NFA = (
{
_names_to_str
(
nfa
.
states
)
}
, "
\
f
"
{
_names_to_str
(
nfa
.
characters
)
}
, "
\
f
"d,
{
init
}
,
{
final
}
)
\n
{
transition
}
"
return
f
"
{
init
}
{
transition
}
{
final
}
"
def
regex_to_str
(
reg
:
RegEx
)
->
str
:
return
reg
.
expression
.
astprint
()
def
_common_parse
(
string
:
str
,
given_lexer
,
given_parser
,
given_builder
):
error_listener
=
ErrorShouter
()
chars
=
antlr4
.
InputStream
(
string
)
...
...
@@ -137,7 +152,8 @@ def _common_parse(string: str, given_lexer, given_parser, given_builder):
def
cfg
(
string
:
str
)
->
CFG
:
try
:
builder
=
_common_parse
(
string
,
CFGLexer
,
CFGParser
,
CFGBuilder
)
return
CFG
(
builder
.
nonterminals
,
builder
.
terminals
,
builder
.
rules
,
builder
.
init
)
return
CFG
(
builder
.
nonterminals
,
builder
.
terminals
,
builder
.
rules
,
builder
.
init
)
except
Exception
as
e
:
raise
ParsingError
(
e
.
args
)
...
...
@@ -159,9 +175,11 @@ def dfa(string: str) -> DFA:
if
builder
.
init
is
None
:
builder
.
init
=
builder
.
first_state
if
builder
.
init
is
None
:
raise
ParsingError
(
"Automat musí obsahovat alespoň jeden stav."
)
raise
ParsingError
(
"Automat musí obsahovat alespoň jeden stav."
)
dfa
=
DFA
(
builder
.
states
,
builder
.
characters
,
builder
.
transition
,
builder
.
init
,
builder
.
final
)
dfa
=
DFA
(
builder
.
states
,
builder
.
characters
,
builder
.
transition
,
builder
.
init
,
builder
.
final
)
return
dfa
except
Exception
as
e
:
...
...
@@ -175,9 +193,11 @@ def nfa(string: str) -> NFA:
if
builder
.
init
is
None
:
builder
.
init
=
builder
.
first_state
if
builder
.
init
is
None
:
raise
ParsingError
(
"Automat musí obsahovat alespoň jeden stav."
)
raise
ParsingError
(
"Automat musí obsahovat alespoň jeden stav."
)
return
NFA
(
builder
.
states
,
builder
.
characters
,
builder
.
transition
,
builder
.
init
,
builder
.
final
)
return
NFA
(
builder
.
states
,
builder
.
characters
,
builder
.
transition
,
builder
.
init
,
builder
.
final
)
except
Exception
as
e
:
raise
ParsingError
(
e
.
args
)
...
...
@@ -252,7 +272,8 @@ class DFABuilder(DFAListener, StateVisitor):
self
.
characters
.
add
(
character
)
if
(
state
,
character
)
in
self
.
transition
:
print
(
f
"Upozornění: v textovém zápisu se objevilo více přechodů pro stejnou dvojici (
{
state
.
name
}
,
{
character
.
name
}
)."
)
print
(
"Upozornění: v textovém zápisu se objevilo více přechodů "
f
"pro stejnou dvojici (
{
state
.
name
}
,
{
character
.
name
}
)."
)
self
.
transition
[
state
,
character
]
=
dest_state
if
self
.
first_state
is
None
:
...
...
@@ -295,21 +316,25 @@ class NFABuilder(NFAListener, StateVisitor):
dest_states
=
set
()
i
=
0
while
ctx
.
stateset
().
statename
(
i
)
is
not
None
:
dest_state
=
State
(
self
.
visitStatename
(
ctx
.
stateset
().
statename
(
i
)))
dest_state
=
State
(
self
.
visitStatename
(
ctx
.
stateset
().
statename
(
i
)))
self
.
states
.
add
(
dest_state
)
dest_states
.
add
(
dest_state
)
i
+=
1
if
ctx
.
EPSILON
():
if
(
state
,
Eps
())
in
self
.
transition
:
print
(
f
"Upozornění: v textovém zápisu se objevilo více přechodů pro stejnou dvojici (
{
state
.
name
}
, ε)."
)
print
(
"Upozornění: v textovém zápisu se objevilo více "
f
"přechodů pro stejnou dvojici (
{
state
.
name
}
, ε)."
)
self
.
transition
[
state
,
Eps
()]
=
dest_states
self
.
efa
=
True
else
:
character
=
Character
(
self
.
visitStatename
(
ctx
.
statename
(
1
)))
self
.
characters
.
add
(
character
)
if
(
state
,
character
)
in
self
.
transition
:
print
(
f
"Upozornění: v textovém zápisu se objevilo více přechodů pro stejnou dvojici (
{
state
.
name
}
,
{
character
.
name
}
)."
)
print
(
"Upozornění: v textovém zápisu se objevilo více "
f
"přechodů pro stejnou dvojici (
{
state
.
name
}
, "
f
"
{
character
.
name
}
)."
)
self
.
transition
[
state
,
character
]
=
dest_states
if
self
.
first_state
is
None
:
...
...
@@ -342,10 +367,13 @@ class RegExBuilder(RegExVisitor):
# Binary operation: union or explicit concatenation
if
ctx
.
UNION
()
or
ctx
.
CONCAT
():
op
=
Bin
.
Union
if
ctx
.
UNION
()
is
not
None
else
Bin
.
Concat
return
BinOp
(
self
.
visitExpr
(
ctx
.
expr
(
0
)),
op
,
self
.
visitExpr
(
ctx
.
expr
(
1
)))
return
BinOp
(
self
.
visitExpr
(
ctx
.
expr
(
0
)),
op
,
self
.
visitExpr
(
ctx
.
expr
(
1
)))
# Implicit concatenation of (iterated) symbols or expressions in parentheses
expressions
=
list
(
map
(
lambda
x
:
self
.
visitConcatenable
(
x
),
ctx
.
concatenated
()))
# Implicit concatenation of (iterated) symbols or expressions in
# parentheses
expressions
=
list
(
map
(
lambda
x
:
self
.
visitConcatenable
(
x
),
ctx
.
concatenated
()))
return
self
.
implicit_concat
(
expressions
)
def
visitConcatenable
(
self
,
ctx
):
...
...
@@ -396,7 +424,8 @@ class RegExBuilder(RegExVisitor):
elif
ctx
.
parentheses
():
expression
=
self
.
visitParentheses
(
ctx
.
parentheses
())
return
IterOp
(
expression
,
Iter
.
Positive
)
if
positive
else
IterOp
(
expression
,
Iter
.
Iteration
)
return
IterOp
(
expression
,
Iter
.
Positive
)
if
positive
\
else
IterOp
(
expression
,
Iter
.
Iteration
)
def
implicit_concat
(
self
,
to_concat
):
ast
=
to_concat
[
0
]
...
...
@@ -405,11 +434,11 @@ class RegExBuilder(RegExVisitor):
ast
=
BinOp
(
ast
,
Bin
.
Concat
,
expression
)
return
ast
# for future support of comments
def
exitComment
(
self
,
ctx
)
->
None
:
return
None
class
CFGBuilder
(
CFGListener
):
anyvalue_attributes
=
_anyvalue_attributes
(
CFGParser
)
...
...
@@ -431,11 +460,13 @@ class CFGBuilder(CFGListener):
if
ctx
.
CAPS
():
name
=
str
(
ctx
.
CAPS
())
elif
ctx
.
LEFT_ANGLE
():
name
=
'<'
+
''
.
join
(
map
(
lambda
x
:
self
.
visitSymbol
(
x
),
ctx
.
symbol
()))
+
'>'
name
=
'<'
+
''
.
join
(
map
(
lambda
x
:
self
.
visitSymbol
(
x
),
ctx
.
symbol
()))
+
'>'
if
ctx
.
APOSTROPHE
():
name
=
name
+
len
(
ctx
.
APOSTROPHE
())
*
"'"
name
=
name
+
len
(
ctx
.
APOSTROPHE
())
*
"'"
elif
ctx
.
APOSTROPHE
():
name
=
self
.
visitSymbol
(
ctx
.
symbol
(
0
))
+
len
(
ctx
.
APOSTROPHE
())
*
"'"
name
=
self
.
visitSymbol
(
ctx
.
symbol
(
0
))
\
+
len
(
ctx
.
APOSTROPHE
())
*
"'"
nonterminal
=
Nonterminal
(
name
)
self
.
nonterminals
.
add
(
nonterminal
)
...
...
@@ -465,7 +496,8 @@ class CFGBuilder(CFGListener):
sequence
.
append
(
terminal
)
else
:
sequence
.
append
(
self
.
visitNonterminal
(
ctx
.
term_or_nonterm
(
i
).
nonterminal
()))
sequence
.
append
(
self
.
visitNonterminal
(
ctx
.
term_or_nonterm
(
i
).
nonterminal
()))
i
+=
1
return
sequence
...
...
@@ -473,7 +505,7 @@ class CFGBuilder(CFGListener):
def
exitOnerule
(
self
,
ctx
):
nonterminal
=
self
.
visitNonterminal
(
ctx
.
nonterminal
())
self
.
nonterminals
.
add
(
nonterminal
)
if
self
.
init
==
None
:
if
self
.
init
is
None
:
self
.
init
=
nonterminal
# multiple lines for one nonterminal are possible this way
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment