merging trfs with overlap (3fedaecf) · Commits · Matej Lexa / nested

nested/core/tools/tandem_repeat_finder.py

+17 −0

Original line number	Diff line number	Diff line
		@@ -67,6 +67,23 @@ class TandemRepeatFinder(ToolInterface):
		result = []
		candidates.sort(key=lambda repeat: repeat.location[0])

		for candidate in candidates:
		if not result:
		result.append(candidate)
		else:
		last = result[-1]
		if intervals.intersect(last.location, candidate.location):
		right = max(last.location[1], candidate.location[1])
		last.location[1] = right
		else:
		result.append(candidate)

		return result

		def _filter_candidates_assign_overlap_strategy(self, candidates: List[TandemRepeat]) -> List[TandemRepeat]:
		result = []
		candidates.sort(key=lambda repeat: repeat.location[0])

		for candidate in candidates:
		if not result:
		result.append(candidate)

+6 −6

Original line number	Diff line number	Diff line
		@@ -13,8 +13,7 @@ class TrfOutputGenerator(OutputGeneratorInterface):
		def generate_output(self, output_object: TrfOutputObject) -> NoReturn:
		self.create_data_directory(output_object.sequence_id)

		with open(f"{self.directory}/{output_object.sequence_id}/{output_object.sequence_id}_trf.gff", "w+")\
		as gff:
		with open(f"{self.directory}/{output_object.sequence_id}/{output_object.sequence_id}_trf.gff", "w+") as gff:
		i = 0
		gff.write("##gff-version 3\n")
		for repeat in output_object.elements:
		@@ -23,9 +22,10 @@ class TrfOutputGenerator(OutputGeneratorInterface):
		+ str(repeat.location[0]) + "\t"
		+ str(repeat.location[1]) + "\t"
		+ ".\t.\t.\t"
		+ "ID=tandem_repeat_" + str(i)
		+ ";monomer_count=" + str(repeat.copies)
		+ ";monomer=" + repeat.monomer
		+ ";color=" + config["igv_colors"]["trf"][repeat.type]
		# + "ID=tandem_repeat_" + str(i)
		# + ";monomer_count=" + str(repeat.copies)
		# + ";monomer=" + repeat.monomer
		# + ";color=" + config["igv_colors"]["trf"][repeat.type]
		+ "."
		+ "\n")
		i += 1