Loading nested/core/generator/generator.py +3 −2 Original line number Diff line number Diff line Loading @@ -4,14 +4,15 @@ import random from Bio import SeqIO from Bio.SeqRecord import SeqRecord from nested.core.gene import Gene from nested.core.tools.nester.gene import Gene from nested.core.generator.te_generator import TEGenerator from nested.core.generator.nested_element import NestedElement from nested.utils import intervals class Generator(object): """Class used to generate artificial nested elements for testing purposes """ Class used to generate artificial nested elements for testing purposes Attributes: source_db (str): path to source database of TE's in fasta format Loading nested/core/tools/ltr/discovery_tool.py +1 −0 Original line number Diff line number Diff line Loading @@ -5,6 +5,7 @@ from enum import Enum class DiscoveryTool(Enum): LTR_finder = 1 LTRharvest = 2 # add aliases with same value as the tool above finder = 1 harvest = 2 nested/core/tools/ltr/te_finder.py +0 −9 Original line number Diff line number Diff line Loading @@ -18,15 +18,6 @@ class TeFinder(DiscoveryToolInterface): super().__init__() def run(self, sequence_id: str, sequence: Seq) -> List[TE]: """Run LTR finder on sequence and return list of transposons Arguments: sequence_id (str): sequence id sequence (Bio.Seq.Seq): sequence Returns: list[TE]: list of found ltr pairs as a TE class """ transposons = [] if not os.path.exists("/tmp/nested"): Loading nested/core/tools/nester/gene.py +11 −8 Original line number Diff line number Diff line Loading @@ -11,15 +11,17 @@ from nested.entities.te import TE class Gene(object): """Class representing a genereic gene in the program. Used in recursive calls, contain information about the gene domains and found TE's """ Class representing one iteration of nested-nester module recursive call. Attributes: seqid (str): id of sequence sequence (Bio.Seq.Seq): sequence te_list (list[TE]): list of pairs of LTR's found from LTR finder domain_list (list[Domain]): list of found domains scores (list[float]): list of evaluations of each TE seqid: id of sequence sequence: sequence discovery_tool: implementation of DiscoveryToolInterface currently used te_list: list of pairs of LTR's found from LTR finder domain_finder: class used for identification of domains domain_list: list of found domains scores: list of evaluations of each TE """ copia_pattern = ["ltr_left", "pbs", "GAG", "PROT", "INT", "RT", "RH", "ppt", "ltr_right"] Loading Loading @@ -47,7 +49,8 @@ class Gene(object): return '\n'.join(lines) def get_best_candidate(self) -> Optional[TE]: """Evaluate all LTR pairs and return best scored pair, or None if no pair found """ Evaluate all LTR pairs and return best scored pair, or None if no pair found Returns: TE: best evaluated pair Loading nested/core/tools/nester/nester.py +0 −13 Original line number Diff line number Diff line Loading @@ -7,30 +7,18 @@ from Bio.Seq import Seq from nested.utils import intervals from nested.core.tools.nester.gene import Gene from nested.core.tools.solo_ltrs import SoloLtrs from nested.logging.logger import NesterLogger from nested.config.config import config from nested.core.tools.tool_interface import ToolInterface from nested.entities.te import TE from nested.utils.dependency_resolver import DependencyResolver from nested.output.output_objects.nester_output_object import NesterOutputObject from nested.core.module_management.module import Module class Nester(ToolInterface): """Class represent nesting in sequence, recursively find best evaluated transposon and crop it until no new transposons found Attributes: seqid (str): id of sequence sequence (Bio.Seq.Seq): sequence nested_element (NestedElement): nested element """ def __init__(self, sequence: SeqRecord, threshold: float, multiplier: float, discovery_tool: str): super().__init__(sequence) self.solo_ltrs: SoloLtrs = None self._iteration: int = 0 #self._logger: NesterLogger = NesterLogger('{}/{}'.format(config['logdir'], self.seqId)) self.threshold = threshold self.multiplier = multiplier self.discovery_tool = discovery_tool Loading @@ -43,7 +31,6 @@ class Nester(ToolInterface): return nested_list def _get_unexpanded_transposon_list(self, sequence: Seq, threshold: float) -> List[TE]: # recursivelly find and crop best evaluated transposon, return unexpanded list of found transposons self._iteration += 1 gene = Gene(self.discovery_tool, self.seqId, sequence) candidates = gene.get_candidates_above_threshold(threshold) Loading Loading
nested/core/generator/generator.py +3 −2 Original line number Diff line number Diff line Loading @@ -4,14 +4,15 @@ import random from Bio import SeqIO from Bio.SeqRecord import SeqRecord from nested.core.gene import Gene from nested.core.tools.nester.gene import Gene from nested.core.generator.te_generator import TEGenerator from nested.core.generator.nested_element import NestedElement from nested.utils import intervals class Generator(object): """Class used to generate artificial nested elements for testing purposes """ Class used to generate artificial nested elements for testing purposes Attributes: source_db (str): path to source database of TE's in fasta format Loading
nested/core/tools/ltr/discovery_tool.py +1 −0 Original line number Diff line number Diff line Loading @@ -5,6 +5,7 @@ from enum import Enum class DiscoveryTool(Enum): LTR_finder = 1 LTRharvest = 2 # add aliases with same value as the tool above finder = 1 harvest = 2
nested/core/tools/ltr/te_finder.py +0 −9 Original line number Diff line number Diff line Loading @@ -18,15 +18,6 @@ class TeFinder(DiscoveryToolInterface): super().__init__() def run(self, sequence_id: str, sequence: Seq) -> List[TE]: """Run LTR finder on sequence and return list of transposons Arguments: sequence_id (str): sequence id sequence (Bio.Seq.Seq): sequence Returns: list[TE]: list of found ltr pairs as a TE class """ transposons = [] if not os.path.exists("/tmp/nested"): Loading
nested/core/tools/nester/gene.py +11 −8 Original line number Diff line number Diff line Loading @@ -11,15 +11,17 @@ from nested.entities.te import TE class Gene(object): """Class representing a genereic gene in the program. Used in recursive calls, contain information about the gene domains and found TE's """ Class representing one iteration of nested-nester module recursive call. Attributes: seqid (str): id of sequence sequence (Bio.Seq.Seq): sequence te_list (list[TE]): list of pairs of LTR's found from LTR finder domain_list (list[Domain]): list of found domains scores (list[float]): list of evaluations of each TE seqid: id of sequence sequence: sequence discovery_tool: implementation of DiscoveryToolInterface currently used te_list: list of pairs of LTR's found from LTR finder domain_finder: class used for identification of domains domain_list: list of found domains scores: list of evaluations of each TE """ copia_pattern = ["ltr_left", "pbs", "GAG", "PROT", "INT", "RT", "RH", "ppt", "ltr_right"] Loading Loading @@ -47,7 +49,8 @@ class Gene(object): return '\n'.join(lines) def get_best_candidate(self) -> Optional[TE]: """Evaluate all LTR pairs and return best scored pair, or None if no pair found """ Evaluate all LTR pairs and return best scored pair, or None if no pair found Returns: TE: best evaluated pair Loading
nested/core/tools/nester/nester.py +0 −13 Original line number Diff line number Diff line Loading @@ -7,30 +7,18 @@ from Bio.Seq import Seq from nested.utils import intervals from nested.core.tools.nester.gene import Gene from nested.core.tools.solo_ltrs import SoloLtrs from nested.logging.logger import NesterLogger from nested.config.config import config from nested.core.tools.tool_interface import ToolInterface from nested.entities.te import TE from nested.utils.dependency_resolver import DependencyResolver from nested.output.output_objects.nester_output_object import NesterOutputObject from nested.core.module_management.module import Module class Nester(ToolInterface): """Class represent nesting in sequence, recursively find best evaluated transposon and crop it until no new transposons found Attributes: seqid (str): id of sequence sequence (Bio.Seq.Seq): sequence nested_element (NestedElement): nested element """ def __init__(self, sequence: SeqRecord, threshold: float, multiplier: float, discovery_tool: str): super().__init__(sequence) self.solo_ltrs: SoloLtrs = None self._iteration: int = 0 #self._logger: NesterLogger = NesterLogger('{}/{}'.format(config['logdir'], self.seqId)) self.threshold = threshold self.multiplier = multiplier self.discovery_tool = discovery_tool Loading @@ -43,7 +31,6 @@ class Nester(ToolInterface): return nested_list def _get_unexpanded_transposon_list(self, sequence: Seq, threshold: float) -> List[TE]: # recursivelly find and crop best evaluated transposon, return unexpanded list of found transposons self._iteration += 1 gene = Gene(self.discovery_tool, self.seqId, sequence) candidates = gene.get_candidates_above_threshold(threshold) Loading