Loading nested/cli/nester.py +13 −9 Original line number Original line Diff line number Diff line #!/usr/bin/env python3 #!/usr/bin/env python3 import os import os import sys import click import click import glob import glob import tracemalloc from concurrent import futures from concurrent import futures from datetime import datetime from datetime import datetime from subprocess import CalledProcessError from subprocess import CalledProcessError Loading @@ -31,25 +29,21 @@ from nested.utils.dependency_resolver import DependencyResolver help='Determines which tool is used for retrotransoson discovery. Default: LTR_finder') help='Determines which tool is used for retrotransoson discovery. Default: LTR_finder') def main(input_fasta, sketch, format, output_fasta_offset, output_folder, initial_threshold, def main(input_fasta, sketch, format, output_fasta_offset, output_folder, initial_threshold, threshold_multiplier, threads, discovery_tool): threshold_multiplier, threads, discovery_tool): check_ram(input_fasta, threads) check_permissions(output_folder, os.W_OK | os.X_OK) check_permissions(output_folder, os.W_OK | os.X_OK) tracemalloc.start() number_of_errors = [0] number_of_errors = [0] start_time = datetime.now() start_time = datetime.now() sequences = list(SeqIO.parse(open(input_fasta), 'fasta')) sketcher = Sketcher() sketcher = Sketcher() futuress = [] futuress = [] dependencyResolver = DependencyResolver(discovery_tool) dependencyResolver = DependencyResolver(discovery_tool) with futures.ThreadPoolExecutor(threads) as executor: with futures.ThreadPoolExecutor(threads) as executor: for sequence in sequences: for sequence in SeqIO.parse(input_fasta, 'fasta'): futuress.append(executor.submit(process_sequence, sequence, sketcher, futuress.append(executor.submit(process_sequence, sequence, sketcher, sketch, format, output_fasta_offset, output_folder, sketch, format, output_fasta_offset, output_folder, initial_threshold, threshold_multiplier, number_of_errors, initial_threshold, threshold_multiplier, number_of_errors, dependencyResolver)) dependencyResolver)) print("max RAM usage: " + str(tracemalloc.get_traced_memory()[1] / 10**6) + " MB") futures.wait(futuress) futures.wait(futuress) end_time = datetime.now() end_time = datetime.now() print("max RAM usage: " + str(tracemalloc.get_traced_memory()[1] / 10**6) + " MB") tracemalloc.stop() print('Total time: {}'.format(end_time - start_time)) print('Total time: {}'.format(end_time - start_time)) print('Number of errors: {}'.format(number_of_errors[0])) print('Number of errors: {}'.format(number_of_errors[0])) Loading Loading @@ -90,6 +84,16 @@ def check_permissions(path, permissions): except Exception: except Exception: return return def check_ram(input_file, n): mem_bytes = os.sysconf('SC_PAGE_SIZE') * os.sysconf('SC_PHYS_PAGES') mem_gib = mem_bytes/(1024**3) seq_sizes = [] for seq in SeqIO.parse(input_file, 'fasta'): seq_sizes.append(len(seq) / (1024**3)) seq_sizes.sort() if (sum(seq_sizes[-n:]) * 15) > (0.8 * mem_gib): print("\033[93m" + "During the analysis memory usage may exceed 80% of system's total physical memory." + "\033[0m") def cleanup(): def cleanup(): for file in glob.glob("*ltrs.fa"): for file in glob.glob("*ltrs.fa"): os.remove(file) os.remove(file) Loading Loading
nested/cli/nester.py +13 −9 Original line number Original line Diff line number Diff line #!/usr/bin/env python3 #!/usr/bin/env python3 import os import os import sys import click import click import glob import glob import tracemalloc from concurrent import futures from concurrent import futures from datetime import datetime from datetime import datetime from subprocess import CalledProcessError from subprocess import CalledProcessError Loading @@ -31,25 +29,21 @@ from nested.utils.dependency_resolver import DependencyResolver help='Determines which tool is used for retrotransoson discovery. Default: LTR_finder') help='Determines which tool is used for retrotransoson discovery. Default: LTR_finder') def main(input_fasta, sketch, format, output_fasta_offset, output_folder, initial_threshold, def main(input_fasta, sketch, format, output_fasta_offset, output_folder, initial_threshold, threshold_multiplier, threads, discovery_tool): threshold_multiplier, threads, discovery_tool): check_ram(input_fasta, threads) check_permissions(output_folder, os.W_OK | os.X_OK) check_permissions(output_folder, os.W_OK | os.X_OK) tracemalloc.start() number_of_errors = [0] number_of_errors = [0] start_time = datetime.now() start_time = datetime.now() sequences = list(SeqIO.parse(open(input_fasta), 'fasta')) sketcher = Sketcher() sketcher = Sketcher() futuress = [] futuress = [] dependencyResolver = DependencyResolver(discovery_tool) dependencyResolver = DependencyResolver(discovery_tool) with futures.ThreadPoolExecutor(threads) as executor: with futures.ThreadPoolExecutor(threads) as executor: for sequence in sequences: for sequence in SeqIO.parse(input_fasta, 'fasta'): futuress.append(executor.submit(process_sequence, sequence, sketcher, futuress.append(executor.submit(process_sequence, sequence, sketcher, sketch, format, output_fasta_offset, output_folder, sketch, format, output_fasta_offset, output_folder, initial_threshold, threshold_multiplier, number_of_errors, initial_threshold, threshold_multiplier, number_of_errors, dependencyResolver)) dependencyResolver)) print("max RAM usage: " + str(tracemalloc.get_traced_memory()[1] / 10**6) + " MB") futures.wait(futuress) futures.wait(futuress) end_time = datetime.now() end_time = datetime.now() print("max RAM usage: " + str(tracemalloc.get_traced_memory()[1] / 10**6) + " MB") tracemalloc.stop() print('Total time: {}'.format(end_time - start_time)) print('Total time: {}'.format(end_time - start_time)) print('Number of errors: {}'.format(number_of_errors[0])) print('Number of errors: {}'.format(number_of_errors[0])) Loading Loading @@ -90,6 +84,16 @@ def check_permissions(path, permissions): except Exception: except Exception: return return def check_ram(input_file, n): mem_bytes = os.sysconf('SC_PAGE_SIZE') * os.sysconf('SC_PHYS_PAGES') mem_gib = mem_bytes/(1024**3) seq_sizes = [] for seq in SeqIO.parse(input_file, 'fasta'): seq_sizes.append(len(seq) / (1024**3)) seq_sizes.sort() if (sum(seq_sizes[-n:]) * 15) > (0.8 * mem_gib): print("\033[93m" + "During the analysis memory usage may exceed 80% of system's total physical memory." + "\033[0m") def cleanup(): def cleanup(): for file in glob.glob("*ltrs.fa"): for file in glob.glob("*ltrs.fa"): os.remove(file) os.remove(file) Loading