Commit 597b1e4e authored by Vladimír Štill's avatar Vladimír Štill
Browse files

lib: Add WIP for set expression

parent 400667e6
Pipeline #95769 passed with stage
in 1 minute and 34 seconds
grammar SET;
/* Parser Rules */
start: expr[0] (alph |) comment <EOF>;
alph : SIGSEP SIGMA EQUALS atom_set;
expr[int pr]
: unop
| unop ( {5 >= $pr}? CONCAT expr[6]
| {4 >= $pr}? INTERS expr[5]
| {3 >= $pr}? UNION expr[4]
| {2 >= $pr}? DIFF expr[3])
;
unop
: parentheses
| atom_set
| COMPL unop
| unop POS_COMPL
| unop ITER
| unop POS_ITER
;
parentheses: LEFT_PAR expr[0] RIGHT_PAR;
atom_set
: LEFT_BR ( | symbol | symbol (COMMA symbol) ) RIGHT_BR
| EMPTYSET
;
symbol : (ALPHABET | EPSILON | QUOTE anyvalue+ QUOTE);
comment: (HASH anyvalue* | );
anyvalue : LEFT_PAR | RIGHT_PAR | LEFT_BR | RIGHT_BR | COMMA | ITER | POS_ITER | CONCAT | UNION | INTERS | COMPL | POS_COMPL | ALPHABET | EPSILON | EMPTYSET | QUOTE | SIGMA | SIGSEP | EQUALS | DIFF | HASH | ANYCHAR;
LEFT_PAR : '(';
RIGHT_PAR: ')';
LEFT_BR : '{';
RIGHT_BR : '}';
COMMA : ',';
ITER : ('*' | '^*');
POS_ITER : ('+' | '^+');
CONCAT : ('.' | '·');
UNION : ('υ' | '\\u' | '\\union' | '\\cup');
INTERS : ('∩' | '\\i' | '\\intersection' | '\\cap');
COMPL : ('\\co''-'? | '¬' | '!');
POS_COMPL: ('^c' | '\'');
ALPHABET : [a-zA-Z0-9];
EPSILON : ('ε' | '\\''e');
EMPTYSET : ('∅' | '\\''0');
SIGMA : ('Σ' | '\\Sigma');
SIGSEP : ('\n' | ';');
EQUALS : '=';
/* lower priority then the rest of \\ uses **/
DIFF : ('∖' | '\\' | '\\setminus');
HASH : '#';
QUOTE : '"';
/* Characters to be ignored */
WS : [ \r\t\n]+ -> skip ;
ANYCHAR : .;
/*
vim: syntax=antlr
*/
# Generated from lib/parser/SET.g4 by ANTLR 4.9.2
from antlr4 import *
from io import StringIO
import sys
if sys.version_info[1] > 5:
from typing import TextIO
else:
from typing.io import TextIO
def serializedATN():
with StringIO() as buf:
buf.write("\3\u608b\ua72a\u8133\ub9ed\u417c\u3be7\u7786\u5964\2\31")
buf.write("\u00ad\b\1\4\2\t\2\4\3\t\3\4\4\t\4\4\5\t\5\4\6\t\6\4\7")
buf.write("\t\7\4\b\t\b\4\t\t\t\4\n\t\n\4\13\t\13\4\f\t\f\4\r\t\r")
buf.write("\4\16\t\16\4\17\t\17\4\20\t\20\4\21\t\21\4\22\t\22\4\23")
buf.write("\t\23\4\24\t\24\4\25\t\25\4\26\t\26\4\27\t\27\4\30\t\30")
buf.write("\3\2\3\2\3\3\3\3\3\4\3\4\3\5\3\5\3\6\3\6\3\7\3\7\3\7\5")
buf.write("\7?\n\7\3\b\3\b\3\b\5\bD\n\b\3\t\3\t\3\n\3\n\3\n\3\n\3")
buf.write("\n\3\n\3\n\3\n\3\n\3\n\3\n\3\n\3\n\5\nU\n\n\3\13\3\13")
buf.write("\3\13\3\13\3\13\3\13\3\13\3\13\3\13\3\13\3\13\3\13\3\13")
buf.write("\3\13\3\13\3\13\3\13\3\13\3\13\3\13\5\13k\n\13\3\f\3\f")
buf.write("\3\f\3\f\3\f\5\fr\n\f\3\f\5\fu\n\f\3\r\3\r\3\r\5\rz\n")
buf.write("\r\3\16\3\16\3\17\3\17\3\17\5\17\u0081\n\17\3\20\3\20")
buf.write("\3\20\5\20\u0086\n\20\3\21\3\21\3\21\3\21\3\21\3\21\3")
buf.write("\21\5\21\u008f\n\21\3\22\3\22\3\23\3\23\3\24\3\24\3\24")
buf.write("\3\24\3\24\3\24\3\24\3\24\3\24\3\24\5\24\u009f\n\24\3")
buf.write("\25\3\25\3\26\3\26\3\27\6\27\u00a6\n\27\r\27\16\27\u00a7")
buf.write("\3\27\3\27\3\30\3\30\2\2\31\3\3\5\4\7\5\t\6\13\7\r\b\17")
buf.write("\t\21\n\23\13\25\f\27\r\31\16\33\17\35\20\37\21!\22#\23")
buf.write("%\24\'\25)\26+\27-\30/\31\3\2\b\4\2\60\60\u00b9\u00b9")
buf.write("\4\2##\u00ae\u00ae\5\2\62;C\\c|\4\2\f\f==\4\2^^\u2218")
buf.write("\u2218\5\2\13\f\17\17\"\"\2\u00bc\2\3\3\2\2\2\2\5\3\2")
buf.write("\2\2\2\7\3\2\2\2\2\t\3\2\2\2\2\13\3\2\2\2\2\r\3\2\2\2")
buf.write("\2\17\3\2\2\2\2\21\3\2\2\2\2\23\3\2\2\2\2\25\3\2\2\2\2")
buf.write("\27\3\2\2\2\2\31\3\2\2\2\2\33\3\2\2\2\2\35\3\2\2\2\2\37")
buf.write("\3\2\2\2\2!\3\2\2\2\2#\3\2\2\2\2%\3\2\2\2\2\'\3\2\2\2")
buf.write("\2)\3\2\2\2\2+\3\2\2\2\2-\3\2\2\2\2/\3\2\2\2\3\61\3\2")
buf.write("\2\2\5\63\3\2\2\2\7\65\3\2\2\2\t\67\3\2\2\2\139\3\2\2")
buf.write("\2\r>\3\2\2\2\17C\3\2\2\2\21E\3\2\2\2\23T\3\2\2\2\25j")
buf.write("\3\2\2\2\27t\3\2\2\2\31y\3\2\2\2\33{\3\2\2\2\35\u0080")
buf.write("\3\2\2\2\37\u0085\3\2\2\2!\u008e\3\2\2\2#\u0090\3\2\2")
buf.write("\2%\u0092\3\2\2\2\'\u009e\3\2\2\2)\u00a0\3\2\2\2+\u00a2")
buf.write("\3\2\2\2-\u00a5\3\2\2\2/\u00ab\3\2\2\2\61\62\7*\2\2\62")
buf.write("\4\3\2\2\2\63\64\7+\2\2\64\6\3\2\2\2\65\66\7}\2\2\66\b")
buf.write("\3\2\2\2\678\7\177\2\28\n\3\2\2\29:\7.\2\2:\f\3\2\2\2")
buf.write(";?\7,\2\2<=\7`\2\2=?\7,\2\2>;\3\2\2\2><\3\2\2\2?\16\3")
buf.write("\2\2\2@D\7-\2\2AB\7`\2\2BD\7-\2\2C@\3\2\2\2CA\3\2\2\2")
buf.write("D\20\3\2\2\2EF\t\2\2\2F\22\3\2\2\2GU\7\u03c7\2\2HI\7^")
buf.write("\2\2IU\7w\2\2JK\7^\2\2KL\7w\2\2LM\7p\2\2MN\7k\2\2NO\7")
buf.write("q\2\2OU\7p\2\2PQ\7^\2\2QR\7e\2\2RS\7w\2\2SU\7r\2\2TG\3")
buf.write("\2\2\2TH\3\2\2\2TJ\3\2\2\2TP\3\2\2\2U\24\3\2\2\2Vk\7\u222b")
buf.write("\2\2WX\7^\2\2Xk\7k\2\2YZ\7^\2\2Z[\7k\2\2[\\\7p\2\2\\]")
buf.write("\7v\2\2]^\7g\2\2^_\7t\2\2_`\7u\2\2`a\7g\2\2ab\7e\2\2b")
buf.write("c\7v\2\2cd\7k\2\2de\7q\2\2ek\7p\2\2fg\7^\2\2gh\7e\2\2")
buf.write("hi\7c\2\2ik\7r\2\2jV\3\2\2\2jW\3\2\2\2jY\3\2\2\2jf\3\2")
buf.write("\2\2k\26\3\2\2\2lm\7^\2\2mn\7e\2\2no\7q\2\2oq\3\2\2\2")
buf.write("pr\7/\2\2qp\3\2\2\2qr\3\2\2\2ru\3\2\2\2su\t\3\2\2tl\3")
buf.write("\2\2\2ts\3\2\2\2u\30\3\2\2\2vw\7`\2\2wz\7e\2\2xz\7)\2")
buf.write("\2yv\3\2\2\2yx\3\2\2\2z\32\3\2\2\2{|\t\4\2\2|\34\3\2\2")
buf.write("\2}\u0081\7\u03b7\2\2~\177\7^\2\2\177\u0081\7g\2\2\u0080")
buf.write("}\3\2\2\2\u0080~\3\2\2\2\u0081\36\3\2\2\2\u0082\u0086")
buf.write("\7\u2207\2\2\u0083\u0084\7^\2\2\u0084\u0086\7\62\2\2\u0085")
buf.write("\u0082\3\2\2\2\u0085\u0083\3\2\2\2\u0086 \3\2\2\2\u0087")
buf.write("\u008f\7\u03a5\2\2\u0088\u0089\7^\2\2\u0089\u008a\7U\2")
buf.write("\2\u008a\u008b\7k\2\2\u008b\u008c\7i\2\2\u008c\u008d\7")
buf.write("o\2\2\u008d\u008f\7c\2\2\u008e\u0087\3\2\2\2\u008e\u0088")
buf.write("\3\2\2\2\u008f\"\3\2\2\2\u0090\u0091\t\5\2\2\u0091$\3")
buf.write("\2\2\2\u0092\u0093\7?\2\2\u0093&\3\2\2\2\u0094\u009f\t")
buf.write("\6\2\2\u0095\u0096\7^\2\2\u0096\u0097\7u\2\2\u0097\u0098")
buf.write("\7g\2\2\u0098\u0099\7v\2\2\u0099\u009a\7o\2\2\u009a\u009b")
buf.write("\7k\2\2\u009b\u009c\7p\2\2\u009c\u009d\7w\2\2\u009d\u009f")
buf.write("\7u\2\2\u009e\u0094\3\2\2\2\u009e\u0095\3\2\2\2\u009f")
buf.write("(\3\2\2\2\u00a0\u00a1\7%\2\2\u00a1*\3\2\2\2\u00a2\u00a3")
buf.write("\7$\2\2\u00a3,\3\2\2\2\u00a4\u00a6\t\7\2\2\u00a5\u00a4")
buf.write("\3\2\2\2\u00a6\u00a7\3\2\2\2\u00a7\u00a5\3\2\2\2\u00a7")
buf.write("\u00a8\3\2\2\2\u00a8\u00a9\3\2\2\2\u00a9\u00aa\b\27\2")
buf.write("\2\u00aa.\3\2\2\2\u00ab\u00ac\13\2\2\2\u00ac\60\3\2\2")
buf.write("\2\17\2>CTjqty\u0080\u0085\u008e\u009e\u00a7\3\b\2\2")
return buf.getvalue()
class SETLexer(Lexer):
atn = ATNDeserializer().deserialize(serializedATN())
decisionsToDFA = [ DFA(ds, i) for i, ds in enumerate(atn.decisionToState) ]
LEFT_PAR = 1
RIGHT_PAR = 2
LEFT_BR = 3
RIGHT_BR = 4
COMMA = 5
ITER = 6
POS_ITER = 7
CONCAT = 8
UNION = 9
INTERS = 10
COMPL = 11
POS_COMPL = 12
ALPHABET = 13
EPSILON = 14
EMPTYSET = 15
SIGMA = 16
SIGSEP = 17
EQUALS = 18
DIFF = 19
HASH = 20
QUOTE = 21
WS = 22
ANYCHAR = 23
channelNames = [ u"DEFAULT_TOKEN_CHANNEL", u"HIDDEN" ]
modeNames = [ "DEFAULT_MODE" ]
literalNames = [ "<INVALID>",
"'('", "')'", "'{'", "'}'", "','", "'='", "'#'", "'\"'" ]
symbolicNames = [ "<INVALID>",
"LEFT_PAR", "RIGHT_PAR", "LEFT_BR", "RIGHT_BR", "COMMA", "ITER",
"POS_ITER", "CONCAT", "UNION", "INTERS", "COMPL", "POS_COMPL",
"ALPHABET", "EPSILON", "EMPTYSET", "SIGMA", "SIGSEP", "EQUALS",
"DIFF", "HASH", "QUOTE", "WS", "ANYCHAR" ]
ruleNames = [ "LEFT_PAR", "RIGHT_PAR", "LEFT_BR", "RIGHT_BR", "COMMA",
"ITER", "POS_ITER", "CONCAT", "UNION", "INTERS", "COMPL",
"POS_COMPL", "ALPHABET", "EPSILON", "EMPTYSET", "SIGMA",
"SIGSEP", "EQUALS", "DIFF", "HASH", "QUOTE", "WS", "ANYCHAR" ]
grammarFileName = "SET.g4"
def __init__(self, input=None, output:TextIO = sys.stdout):
super().__init__(input, output)
self._interp = LexerATNSimulator(self, self.atn, self.decisionsToDFA, PredictionContextCache())
self._actions = None
self._predicates = None
# Generated from lib/parser/SET.g4 by ANTLR 4.9.2
from antlr4 import *
if __name__ is not None and "." in __name__:
from .SETParser import SETParser
else:
from SETParser import SETParser
# This class defines a complete listener for a parse tree produced by SETParser.
class SETListener(ParseTreeListener):
# Enter a parse tree produced by SETParser#start.
def enterStart(self, ctx:SETParser.StartContext):
pass
# Exit a parse tree produced by SETParser#start.
def exitStart(self, ctx:SETParser.StartContext):
pass
# Enter a parse tree produced by SETParser#alph.
def enterAlph(self, ctx:SETParser.AlphContext):
pass
# Exit a parse tree produced by SETParser#alph.
def exitAlph(self, ctx:SETParser.AlphContext):
pass
# Enter a parse tree produced by SETParser#expr.
def enterExpr(self, ctx:SETParser.ExprContext):
pass
# Exit a parse tree produced by SETParser#expr.
def exitExpr(self, ctx:SETParser.ExprContext):
pass
# Enter a parse tree produced by SETParser#unop.
def enterUnop(self, ctx:SETParser.UnopContext):
pass
# Exit a parse tree produced by SETParser#unop.
def exitUnop(self, ctx:SETParser.UnopContext):
pass
# Enter a parse tree produced by SETParser#parentheses.
def enterParentheses(self, ctx:SETParser.ParenthesesContext):
pass
# Exit a parse tree produced by SETParser#parentheses.
def exitParentheses(self, ctx:SETParser.ParenthesesContext):
pass
# Enter a parse tree produced by SETParser#atom_set.
def enterAtom_set(self, ctx:SETParser.Atom_setContext):
pass
# Exit a parse tree produced by SETParser#atom_set.
def exitAtom_set(self, ctx:SETParser.Atom_setContext):
pass
# Enter a parse tree produced by SETParser#symbol.
def enterSymbol(self, ctx:SETParser.SymbolContext):
pass
# Exit a parse tree produced by SETParser#symbol.
def exitSymbol(self, ctx:SETParser.SymbolContext):
pass
# Enter a parse tree produced by SETParser#comment.
def enterComment(self, ctx:SETParser.CommentContext):
pass
# Exit a parse tree produced by SETParser#comment.
def exitComment(self, ctx:SETParser.CommentContext):
pass
# Enter a parse tree produced by SETParser#anyvalue.
def enterAnyvalue(self, ctx:SETParser.AnyvalueContext):
pass
# Exit a parse tree produced by SETParser#anyvalue.
def exitAnyvalue(self, ctx:SETParser.AnyvalueContext):
pass
del SETParser
\ No newline at end of file
This diff is collapsed.
# Generated from lib/parser/SET.g4 by ANTLR 4.9.2
from antlr4 import *
if __name__ is not None and "." in __name__:
from .SETParser import SETParser
else:
from SETParser import SETParser
# This class defines a complete generic visitor for a parse tree produced by SETParser.
class SETVisitor(ParseTreeVisitor):
# Visit a parse tree produced by SETParser#start.
def visitStart(self, ctx:SETParser.StartContext):
return self.visitChildren(ctx)
# Visit a parse tree produced by SETParser#alph.
def visitAlph(self, ctx:SETParser.AlphContext):
return self.visitChildren(ctx)
# Visit a parse tree produced by SETParser#expr.
def visitExpr(self, ctx:SETParser.ExprContext):
return self.visitChildren(ctx)
# Visit a parse tree produced by SETParser#unop.
def visitUnop(self, ctx:SETParser.UnopContext):
return self.visitChildren(ctx)
# Visit a parse tree produced by SETParser#parentheses.
def visitParentheses(self, ctx:SETParser.ParenthesesContext):
return self.visitChildren(ctx)
# Visit a parse tree produced by SETParser#atom_set.
def visitAtom_set(self, ctx:SETParser.Atom_setContext):
return self.visitChildren(ctx)
# Visit a parse tree produced by SETParser#symbol.
def visitSymbol(self, ctx:SETParser.SymbolContext):
return self.visitChildren(ctx)
# Visit a parse tree produced by SETParser#comment.
def visitComment(self, ctx:SETParser.CommentContext):
return self.visitChildren(ctx)
# Visit a parse tree produced by SETParser#anyvalue.
def visitAnyvalue(self, ctx:SETParser.AnyvalueContext):
return self.visitChildren(ctx)
del SETParser
\ No newline at end of file
from __future__ import annotations
from collections import defaultdict
from enum import Enum, auto
from lib.common import State, Character, Emptyset, Eps
from lib.nfa import NFA
from lib.dfa import DFA
from typing import Set, Tuple, Union, Optional
class BinOp(Enum):
Concat = auto()
Intersection = auto()
Union = auto()
Difference = auto()
class UnOp(Enum):
Complement = auto()
Iteration = auto()
PositiveIteration = auto()
class SetEx(object):
class AtomSet(object):
def __init__(self, words: Set[Tuple[Character, ...]]) -> None:
self.words = words
def _extract_alphabet(self) -> Set[Character]:
return set.union(*(set(w) for w in self.words))
def to_nfa(self, alphabet: Set[Character]) -> NFA:
ini = State("init")
fin = State("final")
trans = defaultdict(set)
states = {ini, fin}
for w, word in enumerate(self.words):
last_st = ini
for c, char in enumerate(word):
st = State(f"{w}-{c}")
states.add(st)
trans[(last_st, char)].add(st)
last_st = st
trans[(last_st, Eps())].add(fin)
return NFA(states=states, transition=dict(trans),
characters=alphabet, init=ini, final={fin})
def to_dfa(self, alphabet: Set[Character]) -> DFA:
return self.to_nfa(alphabet).determinize().minimize()
class Bin(object):
def __init__(self, left: Node, right: Node, op: BinOp) -> None:
self.left = left
self.right = right
self.op = op
def _extract_alphabet(self) -> Set[Character]:
return self.left._extract_alphabet() \
| self.right._extract_alphabet()
def to_nfa(self, alphabet: Set[Character]) -> NFA:
# TODO: union (and intersection) can be also done on NFA
if self.op == BinOp.Concat:
next_state = 0
def state_factory() -> State:
nonlocal next_state
next_state += 1
return State(str(next_state))
lstatemap = defaultdict(state_factory)
rstatemap = defaultdict(state_factory)
left = self.left.to_nfa(alphabet)
right = self.right.to_nfa(alphabet)
new_trans = defaultdict(set)
new_init = lstatemap[left.init]
new_st = {lstatemap[f] for f in left.states} \
| {rstatemap[f] for f in right.states}
new_final = {rstatemap[f] for f in right.final}
for (sst, ch), dsts in left.transition.items():
print(f"δ({sst}, {ch.name}) = {dsts}")
print(f"δ'({lstatemap[sst]}, {ch.name}) = {set(lstatemap[s] for s in dsts)}")
new_trans[(lstatemap[sst], ch)] = \
{lstatemap[dst] for dst in dsts}
for f in left.final:
new_trans[(lstatemap[f], Eps())].add(rstatemap[right.init])
for (sst, ch), dsts in right.transition.items():
new_trans[(rstatemap[sst], ch)] = \
{rstatemap[dst] for dst in dsts}
return NFA(init=new_init, characters=alphabet,
states=new_st, transition=new_trans,
final=new_final)
else:
return self.to_dfa(alphabet).to_nfa()
def to_dfa(self, alphabet: Set[Character]) -> DFA:
left = self.left.to_dfa(alphabet)
right = self.right.to_dfa(alphabet)
if self.op == BinOp.Intersection:
return DFA.intersection(left, right)
if self.op == BinOp.Union:
return DFA.union(left, right)
if self.op == BinOp.Difference:
return DFA.subtraction(left, right)
if self.op == BinOp.Concat:
return self.to_nfa().determinize().minimize()
assert False, f"invalid operation {self.op}"
class Un(object):
def __init__(self, node: Node, op: UnOp) -> None:
self.node = node
self.op = op
def _extract_alphabet(self) -> Set[Character]:
return self.node._extract_alphabet()
def to_nfa(self, alphabet: Set[Character]) -> NFA:
pass
def __init__(self, node: Node,
alphabet: Optional[Set[Character]] = None) -> None:
self.node = node
if alphabet is not None:
self.alphabet = alphabet
else:
self.alphabet = self.node._extract_alphabet()
def to_nfa(self) -> NFA:
return self.node.to_nfa(self.alphabet)
Node = Union[SetEx.AtomSet, SetEx.Bin, SetEx.Un]
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment