Commit 64f80f3b authored by Pavel Jedlicka's avatar Pavel Jedlicka
Browse files

LTRharvest implemented; can add more discovery tools

parent ec377525
Loading
Loading
Loading
Loading
+5 −0
Original line number Original line Diff line number Diff line
@@ -7,3 +7,8 @@ tmp
images
images
results
results
config.yml
config.yml
LTR_Finder/
build/
nested.egg-info/
dist/
.vscode/
+11 −1
Original line number Original line Diff line number Diff line
@@ -15,6 +15,16 @@ ltr:
    a: *prosite_path
    a: *prosite_path
    s: *tRNAdb_path
    s: *tRNAdb_path


ltr_harvest:
  path: 'gt'
  command: 'ltrharvest'
  args:
    similar: 40
    maxlenltr: 3000
    maxdistltr: 20000
    mintsd: 4
    maxtsd: 10
       
gt:
gt:
  path: 'gt'
  path: 'gt'
  command: 'sketch'
  command: 'sketch'
+18 −9
Original line number Original line Diff line number Diff line
@@ -12,7 +12,8 @@ from Bio import SeqIO


from nested.core.nester import Nester
from nested.core.nester import Nester
from nested.output.sketcher import Sketcher
from nested.output.sketcher import Sketcher
from nested.core.sequence_thread import SequenceThread
from nested.core.discovery_tool import DiscoveryTool
from nested.utils.dependency_resolver import DependencyResolver




@click.command()
@click.command()
@@ -24,28 +25,36 @@ from nested.core.sequence_thread import SequenceThread
@click.option('--initial_threshold', '-t', type=int, default=500, help='Initial threshold value.')
@click.option('--initial_threshold', '-t', type=int, default=500, help='Initial threshold value.')
@click.option('--threshold_multiplier', '-m', type=float, default=1, help='Threshold multiplier.')
@click.option('--threshold_multiplier', '-m', type=float, default=1, help='Threshold multiplier.')
@click.option('--threads', '-n', type=int, default=1, help='Number of threads')
@click.option('--threads', '-n', type=int, default=1, help='Number of threads')
def main(input_fasta, sketch, format, output_fasta_offset, output_folder, initial_threshold, threshold_multiplier, threads):    
@click.option('--discovery_tool', '-dt', default=DiscoveryTool.LTR_finder.name,
    type=click.Choice(list(map(lambda val: val, DiscoveryTool.__members__))),
    help='Determines which tool is used for retrotransoson discovery. Default: LTR_finder')
def main(input_fasta, sketch, format, output_fasta_offset, output_folder, initial_threshold, 
        threshold_multiplier, threads, discovery_tool):
    check_permissions(output_folder, os.W_OK | os.X_OK)
    check_permissions(output_folder, os.W_OK | os.X_OK)
    number_of_errors = [0]
    number_of_errors = [0]
    start_time = datetime.now()
    start_time = datetime.now()
    sequences = list(SeqIO.parse(open(input_fasta), 'fasta'))
    sequences = list(SeqIO.parse(open(input_fasta), 'fasta'))
    sketcher = Sketcher()  
    sketcher = Sketcher()  
    futuress = []
    futuress = []
    dependencyResolver = DependencyResolver(discovery_tool)
    with futures.ThreadPoolExecutor(threads) as executor:
    with futures.ThreadPoolExecutor(threads) as executor:
        for sequence in sequences:
        for sequence in sequences:
            futuress.append(executor.submit(process_sequence, sequence, sketcher, sketch, format, output_fasta_offset, output_folder, initial_threshold, threshold_multiplier, number_of_errors))
            futuress.append(executor.submit(process_sequence, sequence, sketcher, 
                sketch, format, output_fasta_offset, output_folder, 
                initial_threshold, threshold_multiplier, number_of_errors,
                dependencyResolver))
        futures.wait(futuress)
        futures.wait(futuress)
    end_time = datetime.now()
    end_time = datetime.now()
    print('Total time: {}'.format(end_time - start_time))
    print('Total time: {}'.format(end_time - start_time))
    print('Number of errors: {}'.format(number_of_errors[0]))
    print('Number of errors: {}'.format(number_of_errors[0]))


def process_sequence(sequence, sketcher, sketch, format, output_fasta_offset, output_folder, initial_threshold, threshold_multiplier, errors):
def process_sequence(sequence, sketcher, sketch, format, output_fasta_offset, output_folder, initial_threshold, threshold_multiplier, errors, dependency_resolver):
    sequence.id = sequence.id.replace('/', '--')
    sequence.id = sequence.id.replace('/', '--')
    seq_start_time = datetime.now()
    seq_start_time = datetime.now()
    strlen = 15
    strlen = 15
    print("Processing {}".format(sequence.id))
    print("Processing {}".format(sequence.id))
    try:
    try:
        nester = Nester(sequence, initial_threshold, threshold_multiplier)
        nester = Nester(sequence, initial_threshold, threshold_multiplier, dependency_resolver)
        sketcher.create_gff(nester.nested_element, dirpath=output_folder, output_fasta_offset=output_fasta_offset, format=format)
        sketcher.create_gff(nester.nested_element, dirpath=output_folder, output_fasta_offset=output_fasta_offset, format=format)
        sketcher.create_solo_ltr_gff(nester.solo_ltrs, dirpath=output_folder)  
        sketcher.create_solo_ltr_gff(nester.solo_ltrs, dirpath=output_folder)  
        if sketch:
        if sketch:
@@ -57,14 +66,14 @@ def process_sequence(sequence, sketcher, sketch, format, output_fasta_offset, ou
    except KeyboardInterrupt:
    except KeyboardInterrupt:
        cleanup()
        cleanup()
        raise
        raise
    except CalledProcessError:
    except CalledProcessError as ex:
        cleanup()
        cleanup()
        errors[0] += 1
        errors[0] += 1
        print('Processing {}: SUBPROCESS ERROR'.format(sequence.id[:strlen]))
        print('Processing {}: SUBPROCESS ERROR: {}'.format(sequence.id[:strlen], ex))
    except Exception:
    except Exception as ex:
        cleanup()
        cleanup()
        errors[0] += 1
        errors[0] += 1
        print('Processing {}: UNEXPECTED ERROR:'.format(sequence.id[:strlen]), sys.exc_info()[0])
        print('Processing {}: UNEXPECTED ERROR: {}'.format(sequence.id[:strlen], ex))


def check_permissions(path, permissions):
def check_permissions(path, permissions):
    path = os.getcwd() if len(path) == 0 else path
    path = os.getcwd() if len(path) == 0 else path
+11 −0
Original line number Original line Diff line number Diff line
#!/usr/bin/env python3

from enum import Enum

class DiscoveryTool(Enum):
    LTR_finder = 1
    LTRharvest = 2
    #add aliases with same value as the tool above
    finder = 1
    harvest = 2
+5 −3
Original line number Original line Diff line number Diff line
#!/usr/bin/env python3
#!/usr/bin/env python3


from nested.core.te import run_ltr_finder
#from nested.core.te import run_ltr_finder
# from nested.core.domain import run_blastx
# from nested.core.domain import run_blastx
# from nested.core.domain_dante import run_last
# from nested.core.domain_dante import run_last
from nested.core.domain_dante import run_blastx
from nested.core.domain_dante import run_blastx
from nested.scoring.graph import Graph
from nested.scoring.graph import Graph
from nested.utils import intervals
from nested.utils import intervals
from nested.utils.dependency_resolver import DependencyResolver




class Gene(object):
class Gene(object):
@@ -20,10 +21,11 @@ class Gene(object):


    """
    """


    def __init__(self, seqid=None, sequence=None):
    def __init__(self, dependency_resolver, seqid=None, sequence=None):
        self.seqid = seqid
        self.seqid = seqid
        self.sequence = sequence
        self.sequence = sequence
        self.te_list = run_ltr_finder(seqid, sequence)
        self.te_list = dependency_resolver.discovery_tool.run(seqid, sequence)
        #self.te_list = run_ltr_finder(seqid, sequence)
        self.domain_list = run_blastx(sequence)
        self.domain_list = run_blastx(sequence)
        # self.domain_list = run_last(sequence)
        # self.domain_list = run_last(sequence)
        self.scores = []
        self.scores = []
Loading