modules: fixed sketcher; fixed bug in output generators (9328f294) · Commits · Matej Lexa / nested

nested/cli/nester.py

+3 −3

Original line number	Diff line number	Diff line
		@@ -6,7 +6,7 @@ import glob
		from concurrent import futures
		from datetime import datetime
		from subprocess import CalledProcessError
		from typing import NoReturn, Optional, Any
		from typing import NoReturn, Optional, Any, List

		from Bio import SeqIO
		from Bio.Seq import Seq
		@@ -73,7 +73,7 @@ def main(input_fasta: str, sketch: bool, format: str, output_fasta_offset: int,

		def process_sequence(sequence: Seq, sketcher, sketch: bool, format: str,
		output_fasta_offset: int, output_folder: str, initial_threshold: int,
		threshold_multiplier: float, errors: int,
		threshold_multiplier: float, errors: List[int],
		dependency_resolver: DependencyResolver) -> NoReturn:
		sequence.id = sequence.id.replace('/', '--')
		seq_start_time = datetime.now()
		@@ -85,7 +85,7 @@ def process_sequence(sequence: Seq, sketcher, sketch: bool, format: str,
		output = [NesterOutputGenerator(output_folder, output_fasta_offset)]

		if sketch:
		output.append(NesterSketcherOutputGenerator(output_folder))
		output.append(NesterSketcherOutputGenerator(output_folder, output_fasta_offset))

		type = TE
		module_nester = Module("nester", nester, type, output)

+2 −4

Original line number	Diff line number	Diff line
		@@ -40,7 +40,6 @@ class TeFinder(DiscoveryToolInterface):
		tmp_file,
		'fasta')

		print("STARTING LTR FINDER")
		# call LTR finder and feed stdin to it
		process = subprocess.Popen(
		[config['ltr']['path']] + args_dict_to_list(config['ltr']['args']) + [
		@@ -51,8 +50,7 @@ class TeFinder(DiscoveryToolInterface):
		# os.remove('/tmp/nested/ltr/{}.fa'.format(seqid))

		stdout, stderr = process.communicate()
		print("FINISHED LTR FINDER")
		print(stdout)

		parsed_output = self._parse_raw_output(stdout)
		for entry in parsed_output:
		params = {
		@@ -78,7 +76,7 @@ class TeFinder(DiscoveryToolInterface):
		dict: parsed raw output
		"""
		entries = raw_output.decode('utf-8').split('>Sequence: ')
		print(raw_output.decode('utf-8'))

		for e in entries:
		entry_name = e.split(' ')[0]
		if entry_name == 'Program':

+0 −4

Original line number	Diff line number	Diff line
		@@ -55,10 +55,6 @@ class Nester(ToolInterface):
		return []
		candidates = [best_candidate]

		print(f"iteration {self._iteration}")
		for c in candidates:
		print(c)

		# sort by score (from highest)
		candidates.sort(key=lambda x: x.score, reverse=True)
		# remove intersections

+3 −7

Original line number	Diff line number	Diff line
		@@ -44,12 +44,10 @@ class TandemRepeatFinder(ToolInterface):

		repeats = self._filter_candidates(self._get_candidates(self.seqId))
		repeats.sort(key=lambda repeat: repeat.location[0], reverse=True)
		print("CHOSEN")
		for r in repeats:
		print(r)

		self.output_object = TrfOutputObject(self.seqId, sequence, repeats)

		#self.cleanup()
		self.cleanup()

		return repeats

		@@ -62,9 +60,7 @@ class TandemRepeatFinder(ToolInterface):
		candidates.append(TandemRepeat([int(split[0]), int(split[1])], int(split[2]), float(split[3]),
		int(split[5]), int(split[6]), int(split[7]), float(split[12]),
		split[-1]))
		print("CANDIDATES")
		for c in candidates:
		print(c)

		return candidates

		def _filter_candidates(self, candidates: List[TandemRepeat]) -> List[TandemRepeat]:

+6 −2

Original line number	Diff line number	Diff line
		#!/usr/bin/env python3
		import os
		from typing import Type, NoReturn
		from typing import NoReturn

		from nested.output.output_objects.base_output_object import BaseOutputObject

		@@ -11,5 +11,9 @@ class BaseOutputGenerator:
		def __init__(self, directory: str):
		self.directory = os.path.join(directory, self.default_directory)

		def generate_output(self, output_object: Type[BaseOutputObject]) -> NoReturn:
		def generate_output(self, output_object: BaseOutputObject) -> NoReturn:
		pass

		def create_data_directory(self, sequence_id: str) -> NoReturn:
		if not os.path.exists('{}/{}'.format(self.directory, sequence_id)):
		os.makedirs('{}/{}'.format(self.directory, sequence_id))