Commit 35c664d9 authored by Pavel Jedlicka's avatar Pavel Jedlicka
Browse files

added ram tracker

parent 64f80f3b
Loading
Loading
Loading
Loading
+9 −0
Original line number Original line Diff line number Diff line
@@ -138,6 +138,7 @@ Options:
  -s, --sketch_only       If true, nesting is not computed. Genes are sketched
  -s, --sketch_only       If true, nesting is not computed. Genes are sketched
                          only from existing gff files.
                          only from existing gff files.
  -d, --data_folder TEXT  Output data folder.
  -d, --data_folder TEXT  Output data folder.
  -dt --discovery-tool TEXT       Used discovery tool. Default is LTR-finder.
  --help                  Show this message and exit.
  --help                  Show this message and exit.
```
```


@@ -159,3 +160,11 @@ Options:
  -d, --data_folder TEXT          Output data folder.
  -d, --data_folder TEXT          Output data folder.
  --help                          Show this message and exit.
  --help                          Show this message and exit.
```
```

## Adding new LTR discovery tool

* Implement run() method in new TE class based on core/TE_template.py

* Add new value to core/DiscoveryTool.py including any aliases, make sure the numeric value is different from already implemented tools and that it is the same for all of your aliases. (used in calling nester ``-dt <value you set>``)

* Import your newly implemented class in utils/DependencyResolver.py and add it to ``valueToClassDict`` attribute. (key is numeric value you set in core/DiscoveryTool.py)
+5 −0
Original line number Original line Diff line number Diff line
@@ -4,6 +4,7 @@ import os
import sys
import sys
import click
import click
import glob
import glob
import tracemalloc
from concurrent import futures
from concurrent import futures
from datetime import datetime
from datetime import datetime
from subprocess import CalledProcessError
from subprocess import CalledProcessError
@@ -31,6 +32,7 @@ from nested.utils.dependency_resolver import DependencyResolver
def main(input_fasta, sketch, format, output_fasta_offset, output_folder, initial_threshold, 
def main(input_fasta, sketch, format, output_fasta_offset, output_folder, initial_threshold, 
        threshold_multiplier, threads, discovery_tool):
        threshold_multiplier, threads, discovery_tool):
    check_permissions(output_folder, os.W_OK | os.X_OK)
    check_permissions(output_folder, os.W_OK | os.X_OK)
    tracemalloc.start()
    number_of_errors = [0]
    number_of_errors = [0]
    start_time = datetime.now()
    start_time = datetime.now()
    sequences = list(SeqIO.parse(open(input_fasta), 'fasta'))
    sequences = list(SeqIO.parse(open(input_fasta), 'fasta'))
@@ -43,8 +45,11 @@ def main(input_fasta, sketch, format, output_fasta_offset, output_folder, initia
                sketch, format, output_fasta_offset, output_folder, 
                sketch, format, output_fasta_offset, output_folder, 
                initial_threshold, threshold_multiplier, number_of_errors,
                initial_threshold, threshold_multiplier, number_of_errors,
                dependencyResolver))
                dependencyResolver))
            print("max RAM usage: " + str(tracemalloc.get_traced_memory()[1] / 10**6) + " MB")
        futures.wait(futuress)
        futures.wait(futuress)
    end_time = datetime.now()
    end_time = datetime.now()
    print("max RAM usage: " + str(tracemalloc.get_traced_memory()[1] / 10**6) + " MB")
    tracemalloc.stop()
    print('Total time: {}'.format(end_time - start_time))
    print('Total time: {}'.format(end_time - start_time))
    print('Number of errors: {}'.format(number_of_errors[0]))
    print('Number of errors: {}'.format(number_of_errors[0]))


+64 −0
Original line number Original line Diff line number Diff line
#!/usr/bin/env python3

import os
import subprocess
import re

from Bio import SeqIO
from Bio.SeqRecord import SeqRecord

from nested.config.config import config, args_dict_to_list


class TE(object):
    """Class representing TE. Every location is in format [from, to].

    Attributes:
        ppt (list): location of ppt
        pbs (list): location of pbs
        location (list): position of TE in gene
        ltr_left_location (list): location of left ltr
        ltr_right_location (list): location of right ltr
        tsr_left (list): location of left tsr
        tsr_right (list): location of right tsr
        features (dict): dictionary of features assigned to TE (i.e. list of domains on the optimal path in graph)
        score (float): evaluated score of TE
    """

    def __init__(self, ppt=[0, 0], pbs=[0, 0], location=[0, 0],
                 ltr_left_location=[0, 0], ltr_right_location=[0, 0],
                 tsr_left=[0, 0], tsr_right=[0, 0], features={}, score=None):
        self.ppt = ppt
        self.pbs = pbs
        self.location = location
        self.ltr_left_location = ltr_left_location
        self.ltr_right_location = ltr_right_location
        self.tsr_left = tsr_left
        self.tsr_right = tsr_right
        self.features = features
        self.score = score

    def __str__(self):
        lines = ['{{location = {},'.format(self.location),
                 ' left ltr = {},'.format(self.ltr_left_location),
                 ' right ltr = {},'.format(self.ltr_right_location),
                 ' left tsr = {},'.format(self.tsr_left),
                 ' right tsr = {},'.format(self.tsr_right),
                 ' ppt = {},'.format(self.ppt),
                 ' pbs = {},'.format(self.pbs),
                 # ' features = {},'.format(self.features),
                 ' score = {}}}'.format(self.score)]
        return '\n'.join(lines)
    
    @staticmethod
    def run(seqid, sequence):
        """Run discovery tool of choice on sequence and return list of transposons
    
        Arguments:
            seqid (str): sequence id
            sequence (Bio.Seq.Seq): sequence
    
        Returns:
            list[TE]: list of found ltr pairs as a TE class
        """
        
+3 −3
Original line number Original line Diff line number Diff line
@@ -6,11 +6,11 @@ from nested.core.discovery_tool import DiscoveryTool


class DependencyResolver():
class DependencyResolver():
    def __init__(self, tool_name):
    def __init__(self, tool_name):
        self.valueToClassDict = {1: TE, 2: TE_harvest}
        self.discovery_tool = self.resolve_discovery_tool(tool_name)
        self.discovery_tool = self.resolve_discovery_tool(tool_name)


    def resolve_discovery_tool(self, tool_name):
    def resolve_discovery_tool(self, tool_name):        
        valueToClassDict = {1: TE, 2: TE_harvest}
        value = 1
        value = 1
        if (tool_name in DiscoveryTool.__members__):
        if (tool_name in DiscoveryTool.__members__):
            value = DiscoveryTool[tool_name].value
            value = DiscoveryTool[tool_name].value
        return valueToClassDict[value]
        return self.valueToClassDict[value]
 No newline at end of file