Loading nested/cli/nester.py +39 −28 Original line number Original line Diff line number Diff line Loading @@ -2,6 +2,8 @@ import sys import sys import click import click import glob from concurrent import futures from datetime import datetime from datetime import datetime from subprocess import CalledProcessError from subprocess import CalledProcessError Loading @@ -9,6 +11,7 @@ from Bio import SeqIO from nested.core.nester import Nester from nested.core.nester import Nester from nested.output.sketcher import Sketcher from nested.output.sketcher import Sketcher from nested.core.sequence_thread import SequenceThread @click.command() @click.command() Loading @@ -19,43 +22,51 @@ from nested.output.sketcher import Sketcher @click.option('--output_folder', '-d', default="", help='Output data folder.') @click.option('--output_folder', '-d', default="", help='Output data folder.') @click.option('--initial_threshold', '-t', type=int, default=500, help='Initial threshold value.') @click.option('--initial_threshold', '-t', type=int, default=500, help='Initial threshold value.') @click.option('--threshold_multiplier', '-m', type=float, default=1, help='Threshold multiplier.') @click.option('--threshold_multiplier', '-m', type=float, default=1, help='Threshold multiplier.') # TODO DATA_FOLDER @click.option('--threads', '-n', type=int, default=1, help='Number of threads') def main(input_fasta, sketch, format, output_fasta_offset, output_folder, initial_threshold, threshold_multiplier): def main(input_fasta, sketch, format, output_fasta_offset, output_folder, initial_threshold, threshold_multiplier, threads): number_of_errors = 0 number_of_errors = 0 start_time = datetime.now() start_time = datetime.now() sequences = list(SeqIO.parse(open(input_fasta), 'fasta')) sequences = list(SeqIO.parse(open(input_fasta), 'fasta')) sketcher = Sketcher() sketcher = Sketcher() i = 0 i = 0 for sequence in sequences: with futures.ThreadPoolExecutor(threads) as executor: i += 1 for sequence in sequences: sequence.id = sequence.id.replace('/', '--') executor.submit(process_sequence, sequence, sketcher, i, sketch, format, output_fasta_offset, output_folder, initial_threshold, threshold_multiplier) seq_start_time = datetime.now() strlen = 15 print('Processing {a}...'.format(a=sequence.id[:strlen]), end='\r') try: nester = Nester(sequence, i, initial_threshold, threshold_multiplier) sketcher.create_gff(nester.nested_element, dirpath=output_folder, output_fasta_offset=output_fasta_offset, format=format) sketcher.create_solo_ltr_gff(nester.solo_ltrs, dirpath=output_folder) if sketch: if format != 'default': sketcher.create_gff(nester.nested_element, dirpath=output_folder, output_fasta_offset=output_fasta_offset) sketcher.sketch(sequence.id, dirpath=output_folder) seq_end_time = datetime.now() print('Processing {a}: DONE [{b}]'.format(a=sequence.id[:strlen], b=seq_end_time - seq_start_time)) except KeyboardInterrupt: # delete ltrs.fa raise except CalledProcessError: number_of_errors += 1 print('Processing {}: SUBPROCESS ERROR'.format(sequence.id[:strlen])) except: number_of_errors += 1 print('Processing {}: UNEXPECTED ERROR:'.format(sequence.id[:strlen]), sys.exc_info()[0]) end_time = datetime.now() end_time = datetime.now() print('Total time: {}'.format(end_time - start_time)) print('Total time: {}'.format(end_time - start_time)) print('Number of errors: {}'.format(number_of_errors)) print('Number of errors: {}'.format(number_of_errors)) def process_sequence(sequence, sketcher, i, sketch, format, output_fasta_offset, output_folder, initial_threshold, threshold_multiplier): i += 1 sequence.id = sequence.id.replace('/', '--') seq_start_time = datetime.now() strlen = 15 print("Processing {}".format(sequence.id)) try: nester = Nester(sequence, i, initial_threshold, threshold_multiplier) sketcher.create_gff(nester.nested_element, dirpath=output_folder, output_fasta_offset=output_fasta_offset, format=format) sketcher.create_solo_ltr_gff(nester.solo_ltrs, dirpath=output_folder) if sketch: if format != 'default': sketcher.create_gff(nester.nested_element, dirpath=output_folder, output_fasta_offset=output_fasta_offset) sketcher.sketch(sequence.id, dirpath=output_folder) seq_end_time = datetime.now() print('Processing {a}: DONE [{b}]'.format(a=sequence.id, b=seq_end_time - seq_start_time)) except KeyboardInterrupt: cleanup() raise except CalledProcessError: cleanup() number_of_errors += 1 print('Processing {}: SUBPROCESS ERROR'.format(sequence.id[:strlen])) except: cleanup() number_of_errors += 1 print('Processing {}: UNEXPECTED ERROR:'.format(sequence.id[:strlen]), sys.exc_info()[0]) def cleanup(): for file in glob.glob("*ltrs.fa"): os.remove(file) if __name__ == '__main__': if __name__ == '__main__': main() main() nested/config/gt.style +51 −3 Original line number Original line Diff line number Diff line Loading @@ -68,7 +68,7 @@ style = max_show_width = nil, max_show_width = nil, }, }, -------------------------------------- -------------------------------------- AP = { PROT = { -- Color definitions -- Color definitions stroke = {red=0.0, green=0.0, blue=0.0, alpha = 1.0}, stroke = {red=0.0, green=0.0, blue=0.0, alpha = 1.0}, stroke_marked = {red=1.0, green=0.0, blue=0.0, alpha = 1.0}, stroke_marked = {red=1.0, green=0.0, blue=0.0, alpha = 1.0}, Loading Loading @@ -116,7 +116,55 @@ style = max_show_width = nil, max_show_width = nil, }, }, -------------------------------------- -------------------------------------- RNaseH = { RH = { -- Color definitions stroke = {red=0.0, green=0.0, blue=0.0, alpha = 1.0}, stroke_marked = {red=1.0, green=0.0, blue=0.0, alpha = 1.0}, fill = {red=0.0510, green=0.2784, blue=0.6314, alpha = 5.0}, style = "box", -- Collapsing options collapse_to_parent = true, split_lines = true, -- Caption options max_capt_show_width= nil, -- Display this track only if the viewport is not wider than this -- number of nucleotides. Set to 0 to disable type track. max_show_width = nil, }, -------------------------------------- aRH = { -- Color definitions stroke = {red=0.0, green=0.0, blue=0.0, alpha = 1.0}, stroke_marked = {red=1.0, green=0.0, blue=0.0, alpha = 1.0}, fill = {red=0.0510, green=0.2784, blue=0.6314, alpha = 5.0}, style = "box", -- Collapsing options collapse_to_parent = true, split_lines = true, -- Caption options max_capt_show_width= nil, -- Display this track only if the viewport is not wider than this -- number of nucleotides. Set to 0 to disable type track. max_show_width = nil, }, -------------------------------------- CHD = { -- Color definitions stroke = {red=0.0, green=0.0, blue=0.0, alpha = 1.0}, stroke_marked = {red=1.0, green=0.0, blue=0.0, alpha = 1.0}, fill = {red=0.0510, green=0.2784, blue=0.6314, alpha = 5.0}, style = "box", -- Collapsing options collapse_to_parent = true, split_lines = true, -- Caption options max_capt_show_width= nil, -- Display this track only if the viewport is not wider than this -- number of nucleotides. Set to 0 to disable type track. max_show_width = nil, }, -------------------------------------- CHDCR = { -- Color definitions -- Color definitions stroke = {red=0.0, green=0.0, blue=0.0, alpha = 1.0}, stroke = {red=0.0, green=0.0, blue=0.0, alpha = 1.0}, stroke_marked = {red=1.0, green=0.0, blue=0.0, alpha = 1.0}, stroke_marked = {red=1.0, green=0.0, blue=0.0, alpha = 1.0}, Loading Loading @@ -246,4 +294,4 @@ style = default_stroke_color = {red=0.1, green=0.1, blue=0.1, alpha = 1.0}, default_stroke_color = {red=0.1, green=0.1, blue=0.1, alpha = 1.0}, background_color = {red=1.0, green=1.0, blue=1.0, alpha = 1.0}, background_color = {red=1.0, green=1.0, blue=1.0, alpha = 1.0}, }, }, } } No newline at end of file nested/core/domain_dante.py +2 −24 Original line number Original line Diff line number Diff line Loading @@ -52,19 +52,10 @@ def run_blastx(sequence): Returns: Returns: list[Domain]: list of matched domains list[Domain]: list of matched domains """ """ domains = [] domains = [] # args = dict(config['blastx']['args']) # args['db'] = "/opt/nester/dbs/gydb_arh_chdcr/gydb_arh_chdcr.fa" # args['max_target_seqs'] = 1950 # args['num_threads'] = 6 # args['evalue'] = 1e-5 #blastx_cline = NcbiblastxCommandline(**args) blastx_cline = NcbiblastxCommandline(**config['blastx']['args']) blastx_cline = NcbiblastxCommandline(**config['blastx']['args']) xml_out, stderr = blastx_cline(stdin=str(sequence)) xml_out, stderr = blastx_cline(stdin=str(sequence)) #output = open("xml_output_blast.xml", "a+").write(str(xml_out)) blast_records = NCBIXML.parse(StringIO(xml_out)) blast_records = NCBIXML.parse(StringIO(xml_out)) try: try: blast_record = next(blast_records).alignments blast_record = next(blast_records).alignments Loading @@ -85,10 +76,8 @@ def run_blastx(sequence): else: else: domain.location = [hsp.query_start, hsp.query_start + 3 * (hsp.align_length - hsp.gaps)] domain.location = [hsp.query_start, hsp.query_start + 3 * (hsp.align_length - hsp.gaps)] #domain.type = alignment.title.split(' ')[1].split('_')[0] domain.type = alignment.title.split('__')[0].split('-')[1] domain.type = alignment.title.split('__')[0].split('-')[1] domain.annotation = alignment.title.split('__')[1] domain.annotation = alignment.title.split('__')[1] #print(domain.type, domain.location) domains.append(domain) domains.append(domain) return domains return domains Loading Loading @@ -133,15 +122,4 @@ def parse_last_output(raw_output): domain.annotation = split_line[1].split('__')[1] domain.annotation = split_line[1].split('__')[1] domains.append(domain) domains.append(domain) print(len(domains)) print(len(domains)) return domains return domains nested/core/gene.py +0 −1 Original line number Original line Diff line number Diff line Loading @@ -67,7 +67,6 @@ class Gene(object): gypsy_minus = Graph(te, self.domain_list, True, True) gypsy_minus = Graph(te, self.domain_list, True, True) graphs = [copia_plus, copia_minus, gypsy_plus, gypsy_minus] graphs = [copia_plus, copia_minus, gypsy_plus, gypsy_minus] te.score, te.features = self._best_score(graphs) te.score, te.features = self._best_score(graphs) # te.score /= float(intervals.length(te.location)) ''' ''' # evaluate LTR pair using graph, set up score and features # evaluate LTR pair using graph, set up score and features Loading nested/core/nester.py +2 −12 Original line number Original line Diff line number Diff line Loading @@ -33,13 +33,10 @@ class Nester(object): def _find_nesting(self): def _find_nesting(self): nested_list = self._get_unexpanded_transposon_list(self.sequence, self.threshold) # find list of nested transposons nested_list = self._get_unexpanded_transposon_list(self.sequence, self.threshold) # find list of nested transposons nested_list = self._expand_transposon_list(nested_list) nested_list = self._expand_transposon_list(nested_list) nested_list = self._filter_nested_list(nested_list) nested_list = self._filter_nested_list(nested_list) self.nested_element = NestedElement(self.seqid, self.sequence, nested_list) self.nested_element = NestedElement(self.seqid, self.sequence, nested_list) # threshold causes loss of elements def _get_unexpanded_transposon_list(self, sequence, threshold): def _get_unexpanded_transposon_list(self, sequence, threshold): # recursivelly find and crop best evaluated transposon, return unexpanded list of found transposons # recursivelly find and crop best evaluated transposon, return unexpanded list of found transposons self._iteration += 1 self._iteration += 1 Loading @@ -48,7 +45,6 @@ class Nester(object): if not candidates: if not candidates: best_candidate = gene.get_best_candidate() best_candidate = gene.get_best_candidate() if not best_candidate: if not best_candidate: # call blastn here self.solo_ltrs.find_solo_ltrs(sequence) self.solo_ltrs.find_solo_ltrs(sequence) self.solo_ltrs.solo_ltrs.sort(key=lambda x: x.location[0], reverse=True) self.solo_ltrs.solo_ltrs.sort(key=lambda x: x.location[0], reverse=True) return self.solo_ltrs.solo_ltrs return self.solo_ltrs.solo_ltrs Loading @@ -63,16 +59,11 @@ class Nester(object): for element in nested_list: for element in nested_list: if intervals.intersect(candidate.location, element.location): if intervals.intersect(candidate.location, element.location): choose = False choose = False # not sure about the impact # choose = intervals.contains(candidate.location, element.location) break break for other_candidate in candidates: for other_candidate in candidates: if candidate.location == other_candidate.location: if candidate.location == other_candidate.location: continue continue if intervals.contains(candidate.location, other_candidate.location): if intervals.contains(candidate.location, other_candidate.location): # if candidate.score >= other_candidate.score: # choose = True # else: choose = False choose = False break break if choose: if choose: Loading Loading @@ -110,7 +101,6 @@ class Nester(object): nested_list[j].features['ppt']) nested_list[j].features['ppt']) nested_list[j].features['pbs'] = intervals.expand(nested_list[i].location, nested_list[j].features['pbs'] = intervals.expand(nested_list[i].location, nested_list[j].features['pbs']) nested_list[j].features['pbs']) #TODO check if works in corner case nested_list[j].tsr_left = intervals.expand(nested_list[i].location, nested_list[j].tsr_left = intervals.expand(nested_list[i].location, nested_list[j].tsr_left) nested_list[j].tsr_left) nested_list[j].tsr_right = intervals.expand(nested_list[i].location, nested_list[j].tsr_right = intervals.expand(nested_list[i].location, Loading Loading
nested/cli/nester.py +39 −28 Original line number Original line Diff line number Diff line Loading @@ -2,6 +2,8 @@ import sys import sys import click import click import glob from concurrent import futures from datetime import datetime from datetime import datetime from subprocess import CalledProcessError from subprocess import CalledProcessError Loading @@ -9,6 +11,7 @@ from Bio import SeqIO from nested.core.nester import Nester from nested.core.nester import Nester from nested.output.sketcher import Sketcher from nested.output.sketcher import Sketcher from nested.core.sequence_thread import SequenceThread @click.command() @click.command() Loading @@ -19,43 +22,51 @@ from nested.output.sketcher import Sketcher @click.option('--output_folder', '-d', default="", help='Output data folder.') @click.option('--output_folder', '-d', default="", help='Output data folder.') @click.option('--initial_threshold', '-t', type=int, default=500, help='Initial threshold value.') @click.option('--initial_threshold', '-t', type=int, default=500, help='Initial threshold value.') @click.option('--threshold_multiplier', '-m', type=float, default=1, help='Threshold multiplier.') @click.option('--threshold_multiplier', '-m', type=float, default=1, help='Threshold multiplier.') # TODO DATA_FOLDER @click.option('--threads', '-n', type=int, default=1, help='Number of threads') def main(input_fasta, sketch, format, output_fasta_offset, output_folder, initial_threshold, threshold_multiplier): def main(input_fasta, sketch, format, output_fasta_offset, output_folder, initial_threshold, threshold_multiplier, threads): number_of_errors = 0 number_of_errors = 0 start_time = datetime.now() start_time = datetime.now() sequences = list(SeqIO.parse(open(input_fasta), 'fasta')) sequences = list(SeqIO.parse(open(input_fasta), 'fasta')) sketcher = Sketcher() sketcher = Sketcher() i = 0 i = 0 for sequence in sequences: with futures.ThreadPoolExecutor(threads) as executor: i += 1 for sequence in sequences: sequence.id = sequence.id.replace('/', '--') executor.submit(process_sequence, sequence, sketcher, i, sketch, format, output_fasta_offset, output_folder, initial_threshold, threshold_multiplier) seq_start_time = datetime.now() strlen = 15 print('Processing {a}...'.format(a=sequence.id[:strlen]), end='\r') try: nester = Nester(sequence, i, initial_threshold, threshold_multiplier) sketcher.create_gff(nester.nested_element, dirpath=output_folder, output_fasta_offset=output_fasta_offset, format=format) sketcher.create_solo_ltr_gff(nester.solo_ltrs, dirpath=output_folder) if sketch: if format != 'default': sketcher.create_gff(nester.nested_element, dirpath=output_folder, output_fasta_offset=output_fasta_offset) sketcher.sketch(sequence.id, dirpath=output_folder) seq_end_time = datetime.now() print('Processing {a}: DONE [{b}]'.format(a=sequence.id[:strlen], b=seq_end_time - seq_start_time)) except KeyboardInterrupt: # delete ltrs.fa raise except CalledProcessError: number_of_errors += 1 print('Processing {}: SUBPROCESS ERROR'.format(sequence.id[:strlen])) except: number_of_errors += 1 print('Processing {}: UNEXPECTED ERROR:'.format(sequence.id[:strlen]), sys.exc_info()[0]) end_time = datetime.now() end_time = datetime.now() print('Total time: {}'.format(end_time - start_time)) print('Total time: {}'.format(end_time - start_time)) print('Number of errors: {}'.format(number_of_errors)) print('Number of errors: {}'.format(number_of_errors)) def process_sequence(sequence, sketcher, i, sketch, format, output_fasta_offset, output_folder, initial_threshold, threshold_multiplier): i += 1 sequence.id = sequence.id.replace('/', '--') seq_start_time = datetime.now() strlen = 15 print("Processing {}".format(sequence.id)) try: nester = Nester(sequence, i, initial_threshold, threshold_multiplier) sketcher.create_gff(nester.nested_element, dirpath=output_folder, output_fasta_offset=output_fasta_offset, format=format) sketcher.create_solo_ltr_gff(nester.solo_ltrs, dirpath=output_folder) if sketch: if format != 'default': sketcher.create_gff(nester.nested_element, dirpath=output_folder, output_fasta_offset=output_fasta_offset) sketcher.sketch(sequence.id, dirpath=output_folder) seq_end_time = datetime.now() print('Processing {a}: DONE [{b}]'.format(a=sequence.id, b=seq_end_time - seq_start_time)) except KeyboardInterrupt: cleanup() raise except CalledProcessError: cleanup() number_of_errors += 1 print('Processing {}: SUBPROCESS ERROR'.format(sequence.id[:strlen])) except: cleanup() number_of_errors += 1 print('Processing {}: UNEXPECTED ERROR:'.format(sequence.id[:strlen]), sys.exc_info()[0]) def cleanup(): for file in glob.glob("*ltrs.fa"): os.remove(file) if __name__ == '__main__': if __name__ == '__main__': main() main()
nested/config/gt.style +51 −3 Original line number Original line Diff line number Diff line Loading @@ -68,7 +68,7 @@ style = max_show_width = nil, max_show_width = nil, }, }, -------------------------------------- -------------------------------------- AP = { PROT = { -- Color definitions -- Color definitions stroke = {red=0.0, green=0.0, blue=0.0, alpha = 1.0}, stroke = {red=0.0, green=0.0, blue=0.0, alpha = 1.0}, stroke_marked = {red=1.0, green=0.0, blue=0.0, alpha = 1.0}, stroke_marked = {red=1.0, green=0.0, blue=0.0, alpha = 1.0}, Loading Loading @@ -116,7 +116,55 @@ style = max_show_width = nil, max_show_width = nil, }, }, -------------------------------------- -------------------------------------- RNaseH = { RH = { -- Color definitions stroke = {red=0.0, green=0.0, blue=0.0, alpha = 1.0}, stroke_marked = {red=1.0, green=0.0, blue=0.0, alpha = 1.0}, fill = {red=0.0510, green=0.2784, blue=0.6314, alpha = 5.0}, style = "box", -- Collapsing options collapse_to_parent = true, split_lines = true, -- Caption options max_capt_show_width= nil, -- Display this track only if the viewport is not wider than this -- number of nucleotides. Set to 0 to disable type track. max_show_width = nil, }, -------------------------------------- aRH = { -- Color definitions stroke = {red=0.0, green=0.0, blue=0.0, alpha = 1.0}, stroke_marked = {red=1.0, green=0.0, blue=0.0, alpha = 1.0}, fill = {red=0.0510, green=0.2784, blue=0.6314, alpha = 5.0}, style = "box", -- Collapsing options collapse_to_parent = true, split_lines = true, -- Caption options max_capt_show_width= nil, -- Display this track only if the viewport is not wider than this -- number of nucleotides. Set to 0 to disable type track. max_show_width = nil, }, -------------------------------------- CHD = { -- Color definitions stroke = {red=0.0, green=0.0, blue=0.0, alpha = 1.0}, stroke_marked = {red=1.0, green=0.0, blue=0.0, alpha = 1.0}, fill = {red=0.0510, green=0.2784, blue=0.6314, alpha = 5.0}, style = "box", -- Collapsing options collapse_to_parent = true, split_lines = true, -- Caption options max_capt_show_width= nil, -- Display this track only if the viewport is not wider than this -- number of nucleotides. Set to 0 to disable type track. max_show_width = nil, }, -------------------------------------- CHDCR = { -- Color definitions -- Color definitions stroke = {red=0.0, green=0.0, blue=0.0, alpha = 1.0}, stroke = {red=0.0, green=0.0, blue=0.0, alpha = 1.0}, stroke_marked = {red=1.0, green=0.0, blue=0.0, alpha = 1.0}, stroke_marked = {red=1.0, green=0.0, blue=0.0, alpha = 1.0}, Loading Loading @@ -246,4 +294,4 @@ style = default_stroke_color = {red=0.1, green=0.1, blue=0.1, alpha = 1.0}, default_stroke_color = {red=0.1, green=0.1, blue=0.1, alpha = 1.0}, background_color = {red=1.0, green=1.0, blue=1.0, alpha = 1.0}, background_color = {red=1.0, green=1.0, blue=1.0, alpha = 1.0}, }, }, } } No newline at end of file
nested/core/domain_dante.py +2 −24 Original line number Original line Diff line number Diff line Loading @@ -52,19 +52,10 @@ def run_blastx(sequence): Returns: Returns: list[Domain]: list of matched domains list[Domain]: list of matched domains """ """ domains = [] domains = [] # args = dict(config['blastx']['args']) # args['db'] = "/opt/nester/dbs/gydb_arh_chdcr/gydb_arh_chdcr.fa" # args['max_target_seqs'] = 1950 # args['num_threads'] = 6 # args['evalue'] = 1e-5 #blastx_cline = NcbiblastxCommandline(**args) blastx_cline = NcbiblastxCommandline(**config['blastx']['args']) blastx_cline = NcbiblastxCommandline(**config['blastx']['args']) xml_out, stderr = blastx_cline(stdin=str(sequence)) xml_out, stderr = blastx_cline(stdin=str(sequence)) #output = open("xml_output_blast.xml", "a+").write(str(xml_out)) blast_records = NCBIXML.parse(StringIO(xml_out)) blast_records = NCBIXML.parse(StringIO(xml_out)) try: try: blast_record = next(blast_records).alignments blast_record = next(blast_records).alignments Loading @@ -85,10 +76,8 @@ def run_blastx(sequence): else: else: domain.location = [hsp.query_start, hsp.query_start + 3 * (hsp.align_length - hsp.gaps)] domain.location = [hsp.query_start, hsp.query_start + 3 * (hsp.align_length - hsp.gaps)] #domain.type = alignment.title.split(' ')[1].split('_')[0] domain.type = alignment.title.split('__')[0].split('-')[1] domain.type = alignment.title.split('__')[0].split('-')[1] domain.annotation = alignment.title.split('__')[1] domain.annotation = alignment.title.split('__')[1] #print(domain.type, domain.location) domains.append(domain) domains.append(domain) return domains return domains Loading Loading @@ -133,15 +122,4 @@ def parse_last_output(raw_output): domain.annotation = split_line[1].split('__')[1] domain.annotation = split_line[1].split('__')[1] domains.append(domain) domains.append(domain) print(len(domains)) print(len(domains)) return domains return domains
nested/core/gene.py +0 −1 Original line number Original line Diff line number Diff line Loading @@ -67,7 +67,6 @@ class Gene(object): gypsy_minus = Graph(te, self.domain_list, True, True) gypsy_minus = Graph(te, self.domain_list, True, True) graphs = [copia_plus, copia_minus, gypsy_plus, gypsy_minus] graphs = [copia_plus, copia_minus, gypsy_plus, gypsy_minus] te.score, te.features = self._best_score(graphs) te.score, te.features = self._best_score(graphs) # te.score /= float(intervals.length(te.location)) ''' ''' # evaluate LTR pair using graph, set up score and features # evaluate LTR pair using graph, set up score and features Loading
nested/core/nester.py +2 −12 Original line number Original line Diff line number Diff line Loading @@ -33,13 +33,10 @@ class Nester(object): def _find_nesting(self): def _find_nesting(self): nested_list = self._get_unexpanded_transposon_list(self.sequence, self.threshold) # find list of nested transposons nested_list = self._get_unexpanded_transposon_list(self.sequence, self.threshold) # find list of nested transposons nested_list = self._expand_transposon_list(nested_list) nested_list = self._expand_transposon_list(nested_list) nested_list = self._filter_nested_list(nested_list) nested_list = self._filter_nested_list(nested_list) self.nested_element = NestedElement(self.seqid, self.sequence, nested_list) self.nested_element = NestedElement(self.seqid, self.sequence, nested_list) # threshold causes loss of elements def _get_unexpanded_transposon_list(self, sequence, threshold): def _get_unexpanded_transposon_list(self, sequence, threshold): # recursivelly find and crop best evaluated transposon, return unexpanded list of found transposons # recursivelly find and crop best evaluated transposon, return unexpanded list of found transposons self._iteration += 1 self._iteration += 1 Loading @@ -48,7 +45,6 @@ class Nester(object): if not candidates: if not candidates: best_candidate = gene.get_best_candidate() best_candidate = gene.get_best_candidate() if not best_candidate: if not best_candidate: # call blastn here self.solo_ltrs.find_solo_ltrs(sequence) self.solo_ltrs.find_solo_ltrs(sequence) self.solo_ltrs.solo_ltrs.sort(key=lambda x: x.location[0], reverse=True) self.solo_ltrs.solo_ltrs.sort(key=lambda x: x.location[0], reverse=True) return self.solo_ltrs.solo_ltrs return self.solo_ltrs.solo_ltrs Loading @@ -63,16 +59,11 @@ class Nester(object): for element in nested_list: for element in nested_list: if intervals.intersect(candidate.location, element.location): if intervals.intersect(candidate.location, element.location): choose = False choose = False # not sure about the impact # choose = intervals.contains(candidate.location, element.location) break break for other_candidate in candidates: for other_candidate in candidates: if candidate.location == other_candidate.location: if candidate.location == other_candidate.location: continue continue if intervals.contains(candidate.location, other_candidate.location): if intervals.contains(candidate.location, other_candidate.location): # if candidate.score >= other_candidate.score: # choose = True # else: choose = False choose = False break break if choose: if choose: Loading Loading @@ -110,7 +101,6 @@ class Nester(object): nested_list[j].features['ppt']) nested_list[j].features['ppt']) nested_list[j].features['pbs'] = intervals.expand(nested_list[i].location, nested_list[j].features['pbs'] = intervals.expand(nested_list[i].location, nested_list[j].features['pbs']) nested_list[j].features['pbs']) #TODO check if works in corner case nested_list[j].tsr_left = intervals.expand(nested_list[i].location, nested_list[j].tsr_left = intervals.expand(nested_list[i].location, nested_list[j].tsr_left) nested_list[j].tsr_left) nested_list[j].tsr_right = intervals.expand(nested_list[i].location, nested_list[j].tsr_right = intervals.expand(nested_list[i].location, Loading