Skip to content
Snippets Groups Projects
Commit c600325b authored by rlapar's avatar rlapar
Browse files

generator

parent 38027b64
No related branches found
No related tags found
1 merge request!8Generator
*.pyc
python/*.pyc
data
tmp
#!/usr/bin/env python3
import random
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
from nested.core.te import TE
from nested.core.nested_element import NestedElement
from nested.utils import intervals
class Generator(object):
"""Class used to generate artificial nested elements for testing purposes
Attributes:
sourceFile (str): path to source database of TE's in fasta format
"""
def __init__(self, source_db):
self._source_db = source_db
self.elements = []
def generate_random_nested_element(self, baselength=0, number_of_iterations=1, filter_string='LTR'):
sequences = list(SeqIO.parse(open(self._source_db), 'fasta'))
sequences = list(filter(lambda x: filter_string in x.id, sequences))
#random initial sequence
element_sequence = ''.join([random.choice('atgc') for x in range(baselength)])
nested_intervals = [[0, len(element_sequence) - 1]]
for i in range(number_of_iterations):
chosen_id = random.randint(0, len(sequences) - 1)
chosen_seq = sequences[chosen_id].seq
#print('Chosen sequence ({}) with length {}'.format(sequences[chosen_id].id, len(chosen_seq)))
insert_position = random.randint(0, len(element_sequence) - 1)
nested_intervals = [[insert_position, insert_position + len(chosen_seq) - 1]] + nested_intervals
element_sequence = element_sequence[:insert_position] + chosen_seq + element_sequence[insert_position:]
#expand intervals
nested_intervals = intervals.expand_list(nested_intervals)
nested_tes = []
for interval in nested_intervals:
nested_tes.append(TE(
location=interval
))
element_id = 'GENERATED_{}'.format(len(self.elements) + 1)
element = NestedElement(element_id, element_sequence, nested_tes)
self.elements.append(element)
def generate_random_nested_elements(self, number_of_elements=1, baselength=0, number_of_iterations=1, filter_string='LTR'):
for i in range(number_of_elements):
self.generate_random_nested_element(baselength=baselength, number_of_iterations=number_of_iterations, filter_string=filter_string)
def save_elements_to_fasta(self, filepath):
sequences = [SeqRecord(x.sequence, id=x.id, description='') for x in self.elements]
with open(filepath, 'w') as output_handle:
SeqIO.write(sequences, output_handle, 'fasta')
\ No newline at end of file
......@@ -20,7 +20,7 @@ class TE(object):
features (dict): dictionary of features assigned to TE (i.e. list of domains on the optimal path in graph)
score (float): evaluated score of TE
"""
def __init__(self, ppt=None, pbs=None, location=None, ltr_left_location=None, ltr_right_location=None, features=None, score=None):
def __init__(self, ppt=None, pbs=None, location=None, ltr_left_location=None, ltr_right_location=None, features={}, score=None):
self.ppt = ppt
self.pbs = pbs
self.location = location
......
......@@ -77,13 +77,13 @@ def expand(source, target):
target[1] += length
return target
def expandList(intervalList):
def expand_list(interval_list):
"""Expand intervals by the prevoius ones starting from the end
Arguments:
intervalList(list): list of intervals to expand
interval_list(list): list of intervals to expand
"""
for i in reversed(range(len(intervalList) - 1)):
for j in range(i + 1, len(intervalList)):
intervalList[j] = expand(intervalList[i], intervalList[j])
return intervalList
\ No newline at end of file
for i in reversed(range(len(interval_list) - 1)):
for j in range(i + 1, len(interval_list)):
interval_list[j] = expand(interval_list[i], interval_list[j])
return interval_list
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment