Commit 35c664d9 authored by Pavel Jedlicka's avatar Pavel Jedlicka
Browse files

added ram tracker

parent 64f80f3b
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -138,6 +138,7 @@ Options:
  -s, --sketch_only       If true, nesting is not computed. Genes are sketched
                          only from existing gff files.
  -d, --data_folder TEXT  Output data folder.
  -dt --discovery-tool TEXT       Used discovery tool. Default is LTR-finder.
  --help                  Show this message and exit.
```

@@ -159,3 +160,11 @@ Options:
  -d, --data_folder TEXT          Output data folder.
  --help                          Show this message and exit.
```

## Adding new LTR discovery tool

* Implement run() method in new TE class based on core/TE_template.py

* Add new value to core/DiscoveryTool.py including any aliases, make sure the numeric value is different from already implemented tools and that it is the same for all of your aliases. (used in calling nester ``-dt <value you set>``)

* Import your newly implemented class in utils/DependencyResolver.py and add it to ``valueToClassDict`` attribute. (key is numeric value you set in core/DiscoveryTool.py)
+5 −0
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@ import os
import sys
import click
import glob
import tracemalloc
from concurrent import futures
from datetime import datetime
from subprocess import CalledProcessError
@@ -31,6 +32,7 @@ from nested.utils.dependency_resolver import DependencyResolver
def main(input_fasta, sketch, format, output_fasta_offset, output_folder, initial_threshold, 
        threshold_multiplier, threads, discovery_tool):
    check_permissions(output_folder, os.W_OK | os.X_OK)
    tracemalloc.start()
    number_of_errors = [0]
    start_time = datetime.now()
    sequences = list(SeqIO.parse(open(input_fasta), 'fasta'))
@@ -43,8 +45,11 @@ def main(input_fasta, sketch, format, output_fasta_offset, output_folder, initia
                sketch, format, output_fasta_offset, output_folder, 
                initial_threshold, threshold_multiplier, number_of_errors,
                dependencyResolver))
            print("max RAM usage: " + str(tracemalloc.get_traced_memory()[1] / 10**6) + " MB")
        futures.wait(futuress)
    end_time = datetime.now()
    print("max RAM usage: " + str(tracemalloc.get_traced_memory()[1] / 10**6) + " MB")
    tracemalloc.stop()
    print('Total time: {}'.format(end_time - start_time))
    print('Number of errors: {}'.format(number_of_errors[0]))

+64 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3

import os
import subprocess
import re

from Bio import SeqIO
from Bio.SeqRecord import SeqRecord

from nested.config.config import config, args_dict_to_list


class TE(object):
    """Class representing TE. Every location is in format [from, to].

    Attributes:
        ppt (list): location of ppt
        pbs (list): location of pbs
        location (list): position of TE in gene
        ltr_left_location (list): location of left ltr
        ltr_right_location (list): location of right ltr
        tsr_left (list): location of left tsr
        tsr_right (list): location of right tsr
        features (dict): dictionary of features assigned to TE (i.e. list of domains on the optimal path in graph)
        score (float): evaluated score of TE
    """

    def __init__(self, ppt=[0, 0], pbs=[0, 0], location=[0, 0],
                 ltr_left_location=[0, 0], ltr_right_location=[0, 0],
                 tsr_left=[0, 0], tsr_right=[0, 0], features={}, score=None):
        self.ppt = ppt
        self.pbs = pbs
        self.location = location
        self.ltr_left_location = ltr_left_location
        self.ltr_right_location = ltr_right_location
        self.tsr_left = tsr_left
        self.tsr_right = tsr_right
        self.features = features
        self.score = score

    def __str__(self):
        lines = ['{{location = {},'.format(self.location),
                 ' left ltr = {},'.format(self.ltr_left_location),
                 ' right ltr = {},'.format(self.ltr_right_location),
                 ' left tsr = {},'.format(self.tsr_left),
                 ' right tsr = {},'.format(self.tsr_right),
                 ' ppt = {},'.format(self.ppt),
                 ' pbs = {},'.format(self.pbs),
                 # ' features = {},'.format(self.features),
                 ' score = {}}}'.format(self.score)]
        return '\n'.join(lines)
    
    @staticmethod
    def run(seqid, sequence):
        """Run discovery tool of choice on sequence and return list of transposons
    
        Arguments:
            seqid (str): sequence id
            sequence (Bio.Seq.Seq): sequence
    
        Returns:
            list[TE]: list of found ltr pairs as a TE class
        """
        
+3 −3
Original line number Diff line number Diff line
@@ -6,11 +6,11 @@ from nested.core.discovery_tool import DiscoveryTool

class DependencyResolver():
    def __init__(self, tool_name):
        self.valueToClassDict = {1: TE, 2: TE_harvest}
        self.discovery_tool = self.resolve_discovery_tool(tool_name)

    def resolve_discovery_tool(self, tool_name):        
        valueToClassDict = {1: TE, 2: TE_harvest}
        value = 1
        if (tool_name in DiscoveryTool.__members__):
            value = DiscoveryTool[tool_name].value
        return valueToClassDict[value]
 No newline at end of file
        return self.valueToClassDict[value]