Commit 757ac88b authored by Kateřina Sloupová's avatar Kateřina Sloupová
Browse files

added support of empty sets to regular expressions

parent 8d08ef21
Pipeline #56838 failed with stage
in 24 seconds
......@@ -295,6 +295,7 @@ def main():
regex_test("(b)(a)(c)")
regex_test("b+a+c+a+c")
regex_test("∅")
regex_test("(∅ + ∅^*).(∅^* + ∅∅^+)")
main()
\ No newline at end of file
......@@ -2,7 +2,7 @@ from typing import List, Dict, Tuple, Optional, Union, Set, TypeVar
from common import State, Character, Eps, Terminal, Nonterminal, Emptyset
from reg_automata import DFA, NFA, EFA
from reg_grammars import GRA
from reg_regex import REG, AST, Bin, BinOp, IterOp, CharNode
from reg_regex import REG, AST, Bin, Iter, BinOp, IterOp, CharNode
import antlr4 # type: ignore
from DFA_grammarLexer import DFA_grammarLexer
from DFA_grammarParser import DFA_grammarParser
......@@ -61,7 +61,6 @@ class Parser:
return "{" + out[2:len(out) - 1] + "}"
def dfa_to_str(self, dfa: DFA, full : bool = False) -> str:
transition = ""
for key, state_2 in dfa.transition.items():
state_1, character = key
......@@ -74,13 +73,10 @@ class Parser:
if full:
return "DFA = (" + self.names_to_str(dfa.states) + ", " + self.names_to_str(dfa.characters) + \
", d, " + init + ", " + final + ")\n" + transition
else:
return init + " " + transition + final
def nfa_to_str(self, nfa: Union[NFA, EFA], full : bool = False) -> str:
transition = ""
for key, set_states in nfa.transition.items():
state, character = key
......@@ -94,23 +90,26 @@ class Parser:
if full:
return "DFA = (" + self.names_to_str(nfa.states) + ", " + self.names_to_str(nfa.characters) + \
", d, " + init + ", " + final + ")\n" + transition
else:
return init + " " + transition + " " + final
def reg_to_str(self, reg: REG) -> str:
return reg.expression.astprint()
def str_to_dfa(self, string: str) -> DFA:
def common_parse(self, string: str, given_lexer, given_parser, given_listener):
chars = antlr4.InputStream(string)
lexer = DFA_grammarLexer(chars)
lexer = given_lexer(chars)
tokens = antlr4.CommonTokenStream(lexer)
parser = DFA_grammarParser(tokens)
parser = given_parser(tokens)
tree = parser.start()
listener = DFAListener()
listener = given_listener()
walker = antlr4.ParseTreeWalker()
walker.walk(listener, tree)
return listener
def str_to_dfa(self, string: str) -> DFA:
listener = self.common_parse(string, DFA_grammarLexer, DFA_grammarParser, DFAListener)
if listener.init is None:
listener.init = listener.first_state
......@@ -121,15 +120,7 @@ class Parser:
return dfa
def str_to_nfa(self, string: str) -> NFA:
chars = antlr4.InputStream(string)
lexer = NFA_grammarLexer(chars)
tokens = antlr4.CommonTokenStream(lexer)
parser = NFA_grammarParser(tokens)
tree = parser.start()
listener = NFAListener()
walker = antlr4.ParseTreeWalker()
walker.walk(listener, tree)
listener = self.common_parse(string, NFA_grammarLexer, NFA_grammarParser, NFAListener)
if listener.init is None:
listener.init = listener.first_state
......@@ -280,7 +271,7 @@ class REGVisitor(REG_grammarVisitor):
elif ctx.parentheses():
expression = self.visitParentheses(ctx.parentheses())
return BinOp(expression, Bin.Concat, IterOp(expression)) if positive else IterOp(expression)
return IterOp(expression, Iter.Positive) if positive else IterOp(expression, Iter.Iteration)
def implicit_concat(self, to_concat):
ast = to_concat[0]
......
from __future__ import annotations
from typing import Set, FrozenSet, List, Dict, Union, Tuple, Deque, Optional, TypeVar, Any
from typing import Set, Dict, Union, Tuple
from ast import AST
from reg_automata import EFA
from common import State, Character, Eps, Emptyset
from copy import deepcopy
from enum import Enum
import enum
......@@ -13,8 +12,9 @@ class Bin(Enum):
Concat = enum.auto()
class Iter():
pass
class Iter(Enum):
Iteration = enum.auto()
Positive = enum.auto()
class AST(object):
......@@ -25,25 +25,28 @@ class AST(object):
class BinOp(AST):
def __init__(self, left: AST, op: Bin, right: AST):
self.left = left
self.op = op
self.right = right
self.token = op
def astprint(self):
if self.op == Bin.Union:
if self.token == Bin.Union:
return "(" + self.left.astprint() + "+" + self.right.astprint() + ")"
else:
return self.left.astprint() + "." + self.right.astprint()
class IterOp(AST):
def __init__(self, node: AST):
def __init__(self, node: AST, op: Iter):
self.node = node
self.op = Iter
self.token = Iter
self.token = op
def astprint(self):
return self.node.astprint() + "*"
# If we want to print positive iterations (a^+) with asterisk (a.a^*), just uncomment this:
# positive = self.node.astprint() + "." if self.token == Iter.Positive else ""
# return positive + self.node.astprint() + "^*"
sign = "^*" if self.token == Iter.Iteration else "^+"
return self.node.astprint() + sign
class CharNode(AST):
......@@ -79,20 +82,26 @@ class REG:
changed = True
elif ast.token == Bin.Concat:
new_state = State(str(new_name))
states.add(new_state)
new_name += 1
self.transition_add(transition, state, ast.left, new_state)
self.transition_add(transition, new_state, ast.right, dest_state)
if ast.left.token != Emptyset() and ast.right.token != Emptyset():
new_state = State(str(new_name))
states.add(new_state)
new_name += 1
self.transition_add(transition, state, ast.left, new_state)
self.transition_add(transition, new_state, ast.right, dest_state)
changed = True
elif ast.token == Iter:
new_state = State(str(new_name))
states.add(new_state)
new_name += 1
self.transition_add(transition, state, CharNode(Eps()), new_state)
self.transition_add(transition, new_state, ast.node, new_state)
self.transition_add(transition, new_state, CharNode(Eps()), dest_state)
elif type(ast.token) == Iter:
if ast.node == Emptyset():
if ast.token == Iter.Iteration:
self.transition_add(transition, state, CharNode(Eps()), dest_state)
else:
new_state = State(str(new_name))
states.add(new_state)
new_name += 1
incoming = ast.node if ast.token == Iter.Positive else CharNode(Eps())
self.transition_add(transition, state, incoming, new_state)
self.transition_add(transition, new_state, ast.node, new_state)
self.transition_add(transition, new_state, CharNode(Eps()), dest_state)
changed = True
if changed:
......@@ -101,11 +110,15 @@ class REG:
new_transition: Dict[Tuple[State, Character], State] = dict()
for (state, char) in transition:
new_transition[state, char.token] = transition[state, char]
if type(char.token) != Emptyset:
new_transition[state, char.token] = transition[state, char]
return EFA(states, self.characters, new_transition, init, {final})
def transition_add(self, transition, state, ast, dest_state):
if ast.token == Emptyset():
return
if (state, ast) in transition:
transition[state, ast].add(dest_state)
else:
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment