Loading .gitignore +5 −0 Original line number Original line Diff line number Diff line Loading @@ -7,3 +7,8 @@ tmp images images results results config.yml config.yml LTR_Finder/ build/ nested.egg-info/ dist/ .vscode/ config_template.yml +11 −1 Original line number Original line Diff line number Diff line Loading @@ -15,6 +15,16 @@ ltr: a: *prosite_path a: *prosite_path s: *tRNAdb_path s: *tRNAdb_path ltr_harvest: path: 'gt' command: 'ltrharvest' args: similar: 40 maxlenltr: 3000 maxdistltr: 20000 mintsd: 4 maxtsd: 10 gt: gt: path: 'gt' path: 'gt' command: 'sketch' command: 'sketch' Loading nested/cli/nester.py +18 −9 Original line number Original line Diff line number Diff line Loading @@ -12,7 +12,8 @@ from Bio import SeqIO from nested.core.nester import Nester from nested.core.nester import Nester from nested.output.sketcher import Sketcher from nested.output.sketcher import Sketcher from nested.core.sequence_thread import SequenceThread from nested.core.discovery_tool import DiscoveryTool from nested.utils.dependency_resolver import DependencyResolver @click.command() @click.command() Loading @@ -24,28 +25,36 @@ from nested.core.sequence_thread import SequenceThread @click.option('--initial_threshold', '-t', type=int, default=500, help='Initial threshold value.') @click.option('--initial_threshold', '-t', type=int, default=500, help='Initial threshold value.') @click.option('--threshold_multiplier', '-m', type=float, default=1, help='Threshold multiplier.') @click.option('--threshold_multiplier', '-m', type=float, default=1, help='Threshold multiplier.') @click.option('--threads', '-n', type=int, default=1, help='Number of threads') @click.option('--threads', '-n', type=int, default=1, help='Number of threads') def main(input_fasta, sketch, format, output_fasta_offset, output_folder, initial_threshold, threshold_multiplier, threads): @click.option('--discovery_tool', '-dt', default=DiscoveryTool.LTR_finder.name, type=click.Choice(list(map(lambda val: val, DiscoveryTool.__members__))), help='Determines which tool is used for retrotransoson discovery. Default: LTR_finder') def main(input_fasta, sketch, format, output_fasta_offset, output_folder, initial_threshold, threshold_multiplier, threads, discovery_tool): check_permissions(output_folder, os.W_OK | os.X_OK) check_permissions(output_folder, os.W_OK | os.X_OK) number_of_errors = [0] number_of_errors = [0] start_time = datetime.now() start_time = datetime.now() sequences = list(SeqIO.parse(open(input_fasta), 'fasta')) sequences = list(SeqIO.parse(open(input_fasta), 'fasta')) sketcher = Sketcher() sketcher = Sketcher() futuress = [] futuress = [] dependencyResolver = DependencyResolver(discovery_tool) with futures.ThreadPoolExecutor(threads) as executor: with futures.ThreadPoolExecutor(threads) as executor: for sequence in sequences: for sequence in sequences: futuress.append(executor.submit(process_sequence, sequence, sketcher, sketch, format, output_fasta_offset, output_folder, initial_threshold, threshold_multiplier, number_of_errors)) futuress.append(executor.submit(process_sequence, sequence, sketcher, sketch, format, output_fasta_offset, output_folder, initial_threshold, threshold_multiplier, number_of_errors, dependencyResolver)) futures.wait(futuress) futures.wait(futuress) end_time = datetime.now() end_time = datetime.now() print('Total time: {}'.format(end_time - start_time)) print('Total time: {}'.format(end_time - start_time)) print('Number of errors: {}'.format(number_of_errors[0])) print('Number of errors: {}'.format(number_of_errors[0])) def process_sequence(sequence, sketcher, sketch, format, output_fasta_offset, output_folder, initial_threshold, threshold_multiplier, errors): def process_sequence(sequence, sketcher, sketch, format, output_fasta_offset, output_folder, initial_threshold, threshold_multiplier, errors, dependency_resolver): sequence.id = sequence.id.replace('/', '--') sequence.id = sequence.id.replace('/', '--') seq_start_time = datetime.now() seq_start_time = datetime.now() strlen = 15 strlen = 15 print("Processing {}".format(sequence.id)) print("Processing {}".format(sequence.id)) try: try: nester = Nester(sequence, initial_threshold, threshold_multiplier) nester = Nester(sequence, initial_threshold, threshold_multiplier, dependency_resolver) sketcher.create_gff(nester.nested_element, dirpath=output_folder, output_fasta_offset=output_fasta_offset, format=format) sketcher.create_gff(nester.nested_element, dirpath=output_folder, output_fasta_offset=output_fasta_offset, format=format) sketcher.create_solo_ltr_gff(nester.solo_ltrs, dirpath=output_folder) sketcher.create_solo_ltr_gff(nester.solo_ltrs, dirpath=output_folder) if sketch: if sketch: Loading @@ -57,14 +66,14 @@ def process_sequence(sequence, sketcher, sketch, format, output_fasta_offset, ou except KeyboardInterrupt: except KeyboardInterrupt: cleanup() cleanup() raise raise except CalledProcessError: except CalledProcessError as ex: cleanup() cleanup() errors[0] += 1 errors[0] += 1 print('Processing {}: SUBPROCESS ERROR'.format(sequence.id[:strlen])) print('Processing {}: SUBPROCESS ERROR: {}'.format(sequence.id[:strlen], ex)) except Exception: except Exception as ex: cleanup() cleanup() errors[0] += 1 errors[0] += 1 print('Processing {}: UNEXPECTED ERROR:'.format(sequence.id[:strlen]), sys.exc_info()[0]) print('Processing {}: UNEXPECTED ERROR: {}'.format(sequence.id[:strlen], ex)) def check_permissions(path, permissions): def check_permissions(path, permissions): path = os.getcwd() if len(path) == 0 else path path = os.getcwd() if len(path) == 0 else path Loading nested/core/discovery_tool.py 0 → 100644 +11 −0 Original line number Original line Diff line number Diff line #!/usr/bin/env python3 from enum import Enum class DiscoveryTool(Enum): LTR_finder = 1 LTRharvest = 2 #add aliases with same value as the tool above finder = 1 harvest = 2 nested/core/gene.py +5 −3 Original line number Original line Diff line number Diff line #!/usr/bin/env python3 #!/usr/bin/env python3 from nested.core.te import run_ltr_finder #from nested.core.te import run_ltr_finder # from nested.core.domain import run_blastx # from nested.core.domain import run_blastx # from nested.core.domain_dante import run_last # from nested.core.domain_dante import run_last from nested.core.domain_dante import run_blastx from nested.core.domain_dante import run_blastx from nested.scoring.graph import Graph from nested.scoring.graph import Graph from nested.utils import intervals from nested.utils import intervals from nested.utils.dependency_resolver import DependencyResolver class Gene(object): class Gene(object): Loading @@ -20,10 +21,11 @@ class Gene(object): """ """ def __init__(self, seqid=None, sequence=None): def __init__(self, dependency_resolver, seqid=None, sequence=None): self.seqid = seqid self.seqid = seqid self.sequence = sequence self.sequence = sequence self.te_list = run_ltr_finder(seqid, sequence) self.te_list = dependency_resolver.discovery_tool.run(seqid, sequence) #self.te_list = run_ltr_finder(seqid, sequence) self.domain_list = run_blastx(sequence) self.domain_list = run_blastx(sequence) # self.domain_list = run_last(sequence) # self.domain_list = run_last(sequence) self.scores = [] self.scores = [] Loading Loading
.gitignore +5 −0 Original line number Original line Diff line number Diff line Loading @@ -7,3 +7,8 @@ tmp images images results results config.yml config.yml LTR_Finder/ build/ nested.egg-info/ dist/ .vscode/
config_template.yml +11 −1 Original line number Original line Diff line number Diff line Loading @@ -15,6 +15,16 @@ ltr: a: *prosite_path a: *prosite_path s: *tRNAdb_path s: *tRNAdb_path ltr_harvest: path: 'gt' command: 'ltrharvest' args: similar: 40 maxlenltr: 3000 maxdistltr: 20000 mintsd: 4 maxtsd: 10 gt: gt: path: 'gt' path: 'gt' command: 'sketch' command: 'sketch' Loading
nested/cli/nester.py +18 −9 Original line number Original line Diff line number Diff line Loading @@ -12,7 +12,8 @@ from Bio import SeqIO from nested.core.nester import Nester from nested.core.nester import Nester from nested.output.sketcher import Sketcher from nested.output.sketcher import Sketcher from nested.core.sequence_thread import SequenceThread from nested.core.discovery_tool import DiscoveryTool from nested.utils.dependency_resolver import DependencyResolver @click.command() @click.command() Loading @@ -24,28 +25,36 @@ from nested.core.sequence_thread import SequenceThread @click.option('--initial_threshold', '-t', type=int, default=500, help='Initial threshold value.') @click.option('--initial_threshold', '-t', type=int, default=500, help='Initial threshold value.') @click.option('--threshold_multiplier', '-m', type=float, default=1, help='Threshold multiplier.') @click.option('--threshold_multiplier', '-m', type=float, default=1, help='Threshold multiplier.') @click.option('--threads', '-n', type=int, default=1, help='Number of threads') @click.option('--threads', '-n', type=int, default=1, help='Number of threads') def main(input_fasta, sketch, format, output_fasta_offset, output_folder, initial_threshold, threshold_multiplier, threads): @click.option('--discovery_tool', '-dt', default=DiscoveryTool.LTR_finder.name, type=click.Choice(list(map(lambda val: val, DiscoveryTool.__members__))), help='Determines which tool is used for retrotransoson discovery. Default: LTR_finder') def main(input_fasta, sketch, format, output_fasta_offset, output_folder, initial_threshold, threshold_multiplier, threads, discovery_tool): check_permissions(output_folder, os.W_OK | os.X_OK) check_permissions(output_folder, os.W_OK | os.X_OK) number_of_errors = [0] number_of_errors = [0] start_time = datetime.now() start_time = datetime.now() sequences = list(SeqIO.parse(open(input_fasta), 'fasta')) sequences = list(SeqIO.parse(open(input_fasta), 'fasta')) sketcher = Sketcher() sketcher = Sketcher() futuress = [] futuress = [] dependencyResolver = DependencyResolver(discovery_tool) with futures.ThreadPoolExecutor(threads) as executor: with futures.ThreadPoolExecutor(threads) as executor: for sequence in sequences: for sequence in sequences: futuress.append(executor.submit(process_sequence, sequence, sketcher, sketch, format, output_fasta_offset, output_folder, initial_threshold, threshold_multiplier, number_of_errors)) futuress.append(executor.submit(process_sequence, sequence, sketcher, sketch, format, output_fasta_offset, output_folder, initial_threshold, threshold_multiplier, number_of_errors, dependencyResolver)) futures.wait(futuress) futures.wait(futuress) end_time = datetime.now() end_time = datetime.now() print('Total time: {}'.format(end_time - start_time)) print('Total time: {}'.format(end_time - start_time)) print('Number of errors: {}'.format(number_of_errors[0])) print('Number of errors: {}'.format(number_of_errors[0])) def process_sequence(sequence, sketcher, sketch, format, output_fasta_offset, output_folder, initial_threshold, threshold_multiplier, errors): def process_sequence(sequence, sketcher, sketch, format, output_fasta_offset, output_folder, initial_threshold, threshold_multiplier, errors, dependency_resolver): sequence.id = sequence.id.replace('/', '--') sequence.id = sequence.id.replace('/', '--') seq_start_time = datetime.now() seq_start_time = datetime.now() strlen = 15 strlen = 15 print("Processing {}".format(sequence.id)) print("Processing {}".format(sequence.id)) try: try: nester = Nester(sequence, initial_threshold, threshold_multiplier) nester = Nester(sequence, initial_threshold, threshold_multiplier, dependency_resolver) sketcher.create_gff(nester.nested_element, dirpath=output_folder, output_fasta_offset=output_fasta_offset, format=format) sketcher.create_gff(nester.nested_element, dirpath=output_folder, output_fasta_offset=output_fasta_offset, format=format) sketcher.create_solo_ltr_gff(nester.solo_ltrs, dirpath=output_folder) sketcher.create_solo_ltr_gff(nester.solo_ltrs, dirpath=output_folder) if sketch: if sketch: Loading @@ -57,14 +66,14 @@ def process_sequence(sequence, sketcher, sketch, format, output_fasta_offset, ou except KeyboardInterrupt: except KeyboardInterrupt: cleanup() cleanup() raise raise except CalledProcessError: except CalledProcessError as ex: cleanup() cleanup() errors[0] += 1 errors[0] += 1 print('Processing {}: SUBPROCESS ERROR'.format(sequence.id[:strlen])) print('Processing {}: SUBPROCESS ERROR: {}'.format(sequence.id[:strlen], ex)) except Exception: except Exception as ex: cleanup() cleanup() errors[0] += 1 errors[0] += 1 print('Processing {}: UNEXPECTED ERROR:'.format(sequence.id[:strlen]), sys.exc_info()[0]) print('Processing {}: UNEXPECTED ERROR: {}'.format(sequence.id[:strlen], ex)) def check_permissions(path, permissions): def check_permissions(path, permissions): path = os.getcwd() if len(path) == 0 else path path = os.getcwd() if len(path) == 0 else path Loading
nested/core/discovery_tool.py 0 → 100644 +11 −0 Original line number Original line Diff line number Diff line #!/usr/bin/env python3 from enum import Enum class DiscoveryTool(Enum): LTR_finder = 1 LTRharvest = 2 #add aliases with same value as the tool above finder = 1 harvest = 2
nested/core/gene.py +5 −3 Original line number Original line Diff line number Diff line #!/usr/bin/env python3 #!/usr/bin/env python3 from nested.core.te import run_ltr_finder #from nested.core.te import run_ltr_finder # from nested.core.domain import run_blastx # from nested.core.domain import run_blastx # from nested.core.domain_dante import run_last # from nested.core.domain_dante import run_last from nested.core.domain_dante import run_blastx from nested.core.domain_dante import run_blastx from nested.scoring.graph import Graph from nested.scoring.graph import Graph from nested.utils import intervals from nested.utils import intervals from nested.utils.dependency_resolver import DependencyResolver class Gene(object): class Gene(object): Loading @@ -20,10 +21,11 @@ class Gene(object): """ """ def __init__(self, seqid=None, sequence=None): def __init__(self, dependency_resolver, seqid=None, sequence=None): self.seqid = seqid self.seqid = seqid self.sequence = sequence self.sequence = sequence self.te_list = run_ltr_finder(seqid, sequence) self.te_list = dependency_resolver.discovery_tool.run(seqid, sequence) #self.te_list = run_ltr_finder(seqid, sequence) self.domain_list = run_blastx(sequence) self.domain_list = run_blastx(sequence) # self.domain_list = run_last(sequence) # self.domain_list = run_last(sequence) self.scores = [] self.scores = [] Loading