CFG: Merge the random and nonrandom generator for now (9d4203ee) · Commits · fja / eval

grammars_cfg.py

+80 −82

Original line number	Original line	Diff line number	Diff line
	@@ -520,9 +520,6 @@ class CFG:
	left_gen = WordGenerator(left)		left_gen = WordGenerator(left)
	right_gen = WordGenerator(right)		right_gen = WordGenerator(right)

	left_rnd = CFGRandom(left)
	right_rnd = CFGRandom(right)

	left_ce: Optional[CFG.Word] = None		left_ce: Optional[CFG.Word] = None
	right_ce: Optional[CFG.Word] = None		right_ce: Optional[CFG.Word] = None

	@@ -544,7 +541,7 @@ class CFG:
	if right_ce is None:		if right_ce is None:
	right_ce = may_pop(right_words - left_words)		right_ce = may_pop(right_words - left_words)

	def try_word(maybe_ce: Optional[CFG.Word], rng: CFGRandom,		def try_word(maybe_ce: Optional[CFG.Word], rng: WordGenerator,
	other: CachedCYK, length: int) -> Optional[CFG.Word]:		other: CachedCYK, length: int) -> Optional[CFG.Word]:
	if maybe_ce is not None:		if maybe_ce is not None:
	return maybe_ce		return maybe_ce
	@@ -573,8 +570,8 @@ class CFG:
	return sum(len(x) for k, x in words.items()		return sum(len(x) for k, x in words.items()
	if k <= last_min_size)		if k <= last_min_size)

	def nxderivations(words: Dict[int, Set[CFG.Word]], rng: CFGRandom,		def nxderivations(words: Dict[int, Set[CFG.Word]],
	new_size: int) -> int:		rng: WordGenerator, new_size: int) -> int:
	return total(words) + \		return total(words) + \
	sum(rng.derivations_count(l) for l in		sum(rng.derivations_count(l) for l in
	range(last_min_size + 1, new_size + 1))		range(last_min_size + 1, new_size + 1))
	@@ -596,8 +593,8 @@ class CFG:
	fill_ces(left_words.get(sz, set()),		fill_ces(left_words.get(sz, set()),
	right_words.get(sz, set()))		right_words.get(sz, set()))
	last_checked_len = sz		last_checked_len = sz
	nxl = nxderivations(left_words, left_rnd, min_size)		nxl = nxderivations(left_words, left_gen, min_size)
	nxr = nxderivations(right_words, right_rnd, min_size)		nxr = nxderivations(right_words, right_gen, min_size)

	last_min_size = min_size		last_min_size = min_size

	@@ -615,8 +612,8 @@ class CFG:

	for length in range(last_checked_len + 1, max_cmp_len + 1):		for length in range(last_checked_len + 1, max_cmp_len + 1):
	for _ in range(random_samples):		for _ in range(random_samples):
	left_ce = try_word(left_ce, left_rnd, right_cyk, length)		left_ce = try_word(left_ce, left_gen, right_cyk, length)
	right_ce = try_word(right_ce, right_rnd, left_cyk, length)		right_ce = try_word(right_ce, right_gen, left_cyk, length)
	if left_ce is not None and right_ce is not None:		if left_ce is not None and right_ce is not None:
	return mkres()		return mkres()

	@@ -626,25 +623,90 @@ class CFG:
	return self.to_string()		return self.to_string()


	class CFGRandom:		# TODO: split into word counter, generator and random generator
			class WordGenerator:

	CountMap = Dict[Nonterminal, int]		CountMap = Dict[Nonterminal, int]
	ProdCountMap = Dict[CFG.Production, int]		ProdCountMap = Dict[CFG.Production, int]

	def __init__(self, cfg):		def __init__(self, cfg: CFG):
	self.cfg = cfg.proper()		self.cfg = cfg.proper()
			self._seen: Set[CFG.Symbols] = set()
			self._stack: List[CFG.Symbols] = [(self.cfg.init,)]
			self._cur_lenght = 1
			self._next_length: Optional[int] = 2
			self.last: Optional[CFG.Word] = None \
			if Eps() not in self.cfg.rules.get(self.cfg.init, []) \
			else ()

	# init and productions of lenght 0		# init and productions of lenght 0
	self.counts: List[CFGRandom.CountMap] \		self.counts: List[WordGenerator.CountMap] \
	= [{n: 0 for n in self.cfg.nonterminals},		= [{n: 0 for n in self.cfg.nonterminals},
	{n: 0 for n in self.cfg.nonterminals}]		{n: 0 for n in self.cfg.nonterminals}]
	if Eps() in self.cfg.rules.get(self.cfg.init, []):		if Eps() in self.cfg.rules.get(self.cfg.init, []):
	self.counts[0][self.cfg.init] = 1		self.counts[0][self.cfg.init] = 1
	self.prod_counts: List[CFGRandom.ProdCountMap] = [dict(), dict()]		self.prod_counts: List[WordGenerator.ProdCountMap] = [dict(), dict()]

	for src, prod in self.cfg.productions():		for src, prod in self.cfg.productions():
	if len(prod) == 1: # No A -> B (siple) rules		if len(prod) == 1: # No A -> B (siple) rules
	self.counts[1][src] += 1		self.counts[1][src] += 1

			def get_and_shift(self) -> Optional[CFG.Word]:
			"""
			Generates all words of the grammar in ascending order, or random words
			of given length.
			No words are repeated.
			"""
			word = self.last if self.last is not None else self._next()
			self.last = None
			return word

			def get(self) -> Optional[CFG.Word]:
			if self.last is None:
			return self._next()
			return self.last

			def _next(self) -> Optional[CFG.Word]:
			# Iterative-Deepening pseudo DFS (as a generator)
			while self._stack or self._next_length is not None:
			while self._stack:
			sentence = self._stack.pop()

			if CFG.all_terminal(sentence):
			if len(sentence) == self._cur_lenght:
			self.last = typing.cast(CFG.Word, sentence)
			return self.last
			continue

			for i in range(len(sentence)):
			if isinstance(sentence[i], Nonterminal):
			for p in self.cfg.rules.get(
			typing.cast(Nonterminal, sentence[i]), []):
			if isinstance(p, Eps):
			continue
			new_sentence = sentence[:i] + p + sentence[i + 1:]
			nlen = len(new_sentence)

			# prune seen and too long
			if new_sentence not in self._seen \
			and nlen <= self._cur_lenght:
			self._seen.add(new_sentence)
			self._stack.append(new_sentence) # recursion
			elif nlen > self._cur_lenght \
			and (self._next_length is None
			or nlen < self._next_length):
			self._next_length = nlen
			break # it suffices to perform left derivations

			if self._next_length is not None \
			and self._next_length > self._cur_lenght:
			self._cur_lenght = self._next_length
			self._next_length = None
			self._stack = [(self.cfg.init,)]
			self._seen = set()

			return None

	def derivations_count(self, length: int,		def derivations_count(self, length: int,
	nterm: Optional[Nonterminal] = None) -> int:		nterm: Optional[Nonterminal] = None) -> int:
	self._materialize(length)		self._materialize(length)
	@@ -706,6 +768,10 @@ class CFGRandom:
	return count		return count

	def rnd_word(self, length: int) -> Optional[CFG.Word]:		def rnd_word(self, length: int) -> Optional[CFG.Word]:
			"""
			Returns a random word uniformly selected from all the derivations of
			given length or None if shuch a word does not exist.
			"""
	if self.derivations_count(length) == 0:		if self.derivations_count(length) == 0:
	return None		return None

	@@ -779,71 +845,3 @@ class CachedCYK:

	self.cache[word] = out		self.cache[word] = out
	return out		return out


	class WordGenerator:
	"""
	Generates all words of the grammar in ascending order.
	No words are repeated.
	"""

	def __init__(self, cfg: CFG):
	self.cfg = cfg.proper()
	self._seen: Set[CFG.Symbols] = set()
	self._stack: List[CFG.Symbols] = [(self.cfg.init,)]
	self._cur_lenght = 1
	self._next_length: Optional[int] = 2
	self.last: Optional[CFG.Word] = None \
	if Eps() not in self.cfg.rules.get(self.cfg.init, []) \
	else ()

	def get_and_shift(self) -> Optional[CFG.Word]:
	word = self.last if self.last is not None else self._next()
	self.last = None
	return word

	def get(self) -> Optional[CFG.Word]:
	if self.last is None:
	return self._next()
	return self.last

	def _next(self) -> Optional[CFG.Word]:
	# Iterative-Deepening pseudo DFS (as a generator)
	while self._stack or self._next_length is not None:
	while self._stack:
	sentence = self._stack.pop()

	if CFG.all_terminal(sentence):
	if len(sentence) == self._cur_lenght:
	self.last = typing.cast(CFG.Word, sentence)
	return self.last
	continue

	for i in range(len(sentence)):
	if isinstance(sentence[i], Nonterminal):
	for p in self.cfg.rules.get(
	typing.cast(Nonterminal, sentence[i]), []):
	if isinstance(p, Eps):
	continue
	new_sentence = sentence[:i] + p + sentence[i + 1:]
	nlen = len(new_sentence)

	# prune seen and too long
	if new_sentence not in self._seen \
	and nlen <= self._cur_lenght:
	self._seen.add(new_sentence)
	self._stack.append(new_sentence) # recursion
	elif nlen > self._cur_lenght \
	and (self._next_length is None
	or nlen < self._next_length):
	self._next_length = nlen
	break # it suffices to perform left derivations

	if self._next_length is not None \
	and self._next_length > self._cur_lenght:
	self._cur_lenght = self._next_length
	self._next_length = None
	self._stack = [(self.cfg.init,)]
	self._seen = set()

	return None