Loading nested/cli/nester.py +3 −3 Original line number Diff line number Diff line Loading @@ -6,7 +6,7 @@ import glob from concurrent import futures from datetime import datetime from subprocess import CalledProcessError from typing import NoReturn, Optional, Any from typing import NoReturn, Optional, Any, List from Bio import SeqIO from Bio.Seq import Seq Loading Loading @@ -73,7 +73,7 @@ def main(input_fasta: str, sketch: bool, format: str, output_fasta_offset: int, def process_sequence(sequence: Seq, sketcher, sketch: bool, format: str, output_fasta_offset: int, output_folder: str, initial_threshold: int, threshold_multiplier: float, errors: int, threshold_multiplier: float, errors: List[int], dependency_resolver: DependencyResolver) -> NoReturn: sequence.id = sequence.id.replace('/', '--') seq_start_time = datetime.now() Loading @@ -85,7 +85,7 @@ def process_sequence(sequence: Seq, sketcher, sketch: bool, format: str, output = [NesterOutputGenerator(output_folder, output_fasta_offset)] if sketch: output.append(NesterSketcherOutputGenerator(output_folder)) output.append(NesterSketcherOutputGenerator(output_folder, output_fasta_offset)) type = TE module_nester = Module("nester", nester, type, output) Loading nested/core/tools/ltr/te_finder.py +2 −4 Original line number Diff line number Diff line Loading @@ -40,7 +40,6 @@ class TeFinder(DiscoveryToolInterface): tmp_file, 'fasta') print("STARTING LTR FINDER") # call LTR finder and feed stdin to it process = subprocess.Popen( [config['ltr']['path']] + args_dict_to_list(config['ltr']['args']) + [ Loading @@ -51,8 +50,7 @@ class TeFinder(DiscoveryToolInterface): # os.remove('/tmp/nested/ltr/{}.fa'.format(seqid)) stdout, stderr = process.communicate() print("FINISHED LTR FINDER") print(stdout) parsed_output = self._parse_raw_output(stdout) for entry in parsed_output: params = { Loading @@ -78,7 +76,7 @@ class TeFinder(DiscoveryToolInterface): dict: parsed raw output """ entries = raw_output.decode('utf-8').split('>Sequence: ') print(raw_output.decode('utf-8')) for e in entries: entry_name = e.split(' ')[0] if entry_name == 'Program': Loading nested/core/tools/nester.py +0 −4 Original line number Diff line number Diff line Loading @@ -55,10 +55,6 @@ class Nester(ToolInterface): return [] candidates = [best_candidate] print(f"iteration {self._iteration}") for c in candidates: print(c) # sort by score (from highest) candidates.sort(key=lambda x: x.score, reverse=True) # remove intersections Loading nested/core/tools/tandem_repeat_finder.py +3 −7 Original line number Diff line number Diff line Loading @@ -44,12 +44,10 @@ class TandemRepeatFinder(ToolInterface): repeats = self._filter_candidates(self._get_candidates(self.seqId)) repeats.sort(key=lambda repeat: repeat.location[0], reverse=True) print("CHOSEN") for r in repeats: print(r) self.output_object = TrfOutputObject(self.seqId, sequence, repeats) #self.cleanup() self.cleanup() return repeats Loading @@ -62,9 +60,7 @@ class TandemRepeatFinder(ToolInterface): candidates.append(TandemRepeat([int(split[0]), int(split[1])], int(split[2]), float(split[3]), int(split[5]), int(split[6]), int(split[7]), float(split[12]), split[-1])) print("CANDIDATES") for c in candidates: print(c) return candidates def _filter_candidates(self, candidates: List[TandemRepeat]) -> List[TandemRepeat]: Loading nested/output/output_generators/base_output_generator.py +6 −2 Original line number Diff line number Diff line #!/usr/bin/env python3 import os from typing import Type, NoReturn from typing import NoReturn from nested.output.output_objects.base_output_object import BaseOutputObject Loading @@ -11,5 +11,9 @@ class BaseOutputGenerator: def __init__(self, directory: str): self.directory = os.path.join(directory, self.default_directory) def generate_output(self, output_object: Type[BaseOutputObject]) -> NoReturn: def generate_output(self, output_object: BaseOutputObject) -> NoReturn: pass def create_data_directory(self, sequence_id: str) -> NoReturn: if not os.path.exists('{}/{}'.format(self.directory, sequence_id)): os.makedirs('{}/{}'.format(self.directory, sequence_id)) Loading
nested/cli/nester.py +3 −3 Original line number Diff line number Diff line Loading @@ -6,7 +6,7 @@ import glob from concurrent import futures from datetime import datetime from subprocess import CalledProcessError from typing import NoReturn, Optional, Any from typing import NoReturn, Optional, Any, List from Bio import SeqIO from Bio.Seq import Seq Loading Loading @@ -73,7 +73,7 @@ def main(input_fasta: str, sketch: bool, format: str, output_fasta_offset: int, def process_sequence(sequence: Seq, sketcher, sketch: bool, format: str, output_fasta_offset: int, output_folder: str, initial_threshold: int, threshold_multiplier: float, errors: int, threshold_multiplier: float, errors: List[int], dependency_resolver: DependencyResolver) -> NoReturn: sequence.id = sequence.id.replace('/', '--') seq_start_time = datetime.now() Loading @@ -85,7 +85,7 @@ def process_sequence(sequence: Seq, sketcher, sketch: bool, format: str, output = [NesterOutputGenerator(output_folder, output_fasta_offset)] if sketch: output.append(NesterSketcherOutputGenerator(output_folder)) output.append(NesterSketcherOutputGenerator(output_folder, output_fasta_offset)) type = TE module_nester = Module("nester", nester, type, output) Loading
nested/core/tools/ltr/te_finder.py +2 −4 Original line number Diff line number Diff line Loading @@ -40,7 +40,6 @@ class TeFinder(DiscoveryToolInterface): tmp_file, 'fasta') print("STARTING LTR FINDER") # call LTR finder and feed stdin to it process = subprocess.Popen( [config['ltr']['path']] + args_dict_to_list(config['ltr']['args']) + [ Loading @@ -51,8 +50,7 @@ class TeFinder(DiscoveryToolInterface): # os.remove('/tmp/nested/ltr/{}.fa'.format(seqid)) stdout, stderr = process.communicate() print("FINISHED LTR FINDER") print(stdout) parsed_output = self._parse_raw_output(stdout) for entry in parsed_output: params = { Loading @@ -78,7 +76,7 @@ class TeFinder(DiscoveryToolInterface): dict: parsed raw output """ entries = raw_output.decode('utf-8').split('>Sequence: ') print(raw_output.decode('utf-8')) for e in entries: entry_name = e.split(' ')[0] if entry_name == 'Program': Loading
nested/core/tools/nester.py +0 −4 Original line number Diff line number Diff line Loading @@ -55,10 +55,6 @@ class Nester(ToolInterface): return [] candidates = [best_candidate] print(f"iteration {self._iteration}") for c in candidates: print(c) # sort by score (from highest) candidates.sort(key=lambda x: x.score, reverse=True) # remove intersections Loading
nested/core/tools/tandem_repeat_finder.py +3 −7 Original line number Diff line number Diff line Loading @@ -44,12 +44,10 @@ class TandemRepeatFinder(ToolInterface): repeats = self._filter_candidates(self._get_candidates(self.seqId)) repeats.sort(key=lambda repeat: repeat.location[0], reverse=True) print("CHOSEN") for r in repeats: print(r) self.output_object = TrfOutputObject(self.seqId, sequence, repeats) #self.cleanup() self.cleanup() return repeats Loading @@ -62,9 +60,7 @@ class TandemRepeatFinder(ToolInterface): candidates.append(TandemRepeat([int(split[0]), int(split[1])], int(split[2]), float(split[3]), int(split[5]), int(split[6]), int(split[7]), float(split[12]), split[-1])) print("CANDIDATES") for c in candidates: print(c) return candidates def _filter_candidates(self, candidates: List[TandemRepeat]) -> List[TandemRepeat]: Loading
nested/output/output_generators/base_output_generator.py +6 −2 Original line number Diff line number Diff line #!/usr/bin/env python3 import os from typing import Type, NoReturn from typing import NoReturn from nested.output.output_objects.base_output_object import BaseOutputObject Loading @@ -11,5 +11,9 @@ class BaseOutputGenerator: def __init__(self, directory: str): self.directory = os.path.join(directory, self.default_directory) def generate_output(self, output_object: Type[BaseOutputObject]) -> NoReturn: def generate_output(self, output_object: BaseOutputObject) -> NoReturn: pass def create_data_directory(self, sequence_id: str) -> NoReturn: if not os.path.exists('{}/{}'.format(self.directory, sequence_id)): os.makedirs('{}/{}'.format(self.directory, sequence_id))