Skip to content
Snippets Groups Projects
sketch.py 4.31 KiB
Newer Older
import pprint
rlapar's avatar
rlapar committed

rlapar's avatar
rlapar committed
from BCBio import GFF
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.SeqFeature import SeqFeature, FeatureLocation

rlapar's avatar
rlapar committed
from gt.core import *
from gt.extended import *
from gt.annotationsketch import *
from gt.annotationsketch.custom_track import CustomTrack
from gt.core.gtrange import Range

#a contains b as subinterval
def contains(a,b):
    return a[0] <= b[0] and a[1] >= b[1]
#remove a from b
def remove(a,b):
    return [[b[0],a[0]], [a[1], b[1]]]

#get intervals of a cropped by b
def crop(a, b):
    if not len(b):
        return [a]
    #sort children
    b.sort(key=lambda x: x[0])

    cropped = [[a[0], b[0][0]]]
    for i in range(len(b) - 1):
        cropped.append([b[i][1], b[i+1][0]])
    cropped.append([b[-1][1], a[1]])

    return cropped

def sketch(nested, genes):
    print 'Running GT annotation sketch...'
    for g in genes:
        createGFF(genes[g], nested[g])
        visualize(g)

def createGFF(gene, nested):
    #find closest parent
    if nested:
        nested[-1]['parent'] = -1
    for i in range(len(nested) - 1):
        nested[i]['parent'] = -1
        for j in range(i+1, len(nested)):
            if contains(nested[j]['location'], nested[i]['location']):
                nested[i]['parent'] = j
                break

    #append direct children  
    for i in reversed(range(len(nested))):
        nested[i]['direct_children_locations'] = []        
        parent = nested[i]['parent']
        if parent != -1:
            nested[parent]['direct_children_locations'].append(nested[i]['location'])
          
    #GFF
rlapar's avatar
rlapar committed
    rec = SeqRecord(gene['sequence'], gene['id'])
rlapar's avatar
rlapar committed
    features = []

    for i in range(len(nested)):
        #insert BaseLine
        features.append(SeqFeature(FeatureLocation(nested[i]['location'][0], nested[i]['location'][1]),
                    type='te_base', 
                    strand=0, 
                    qualifiers={'ID': 'TE_BASE {}'.format(i)}))

        #Insert croppedIntervals
        children = nested[i]['direct_children_locations']
        cropped = crop(nested[i]['location'], children)
        for subinterval in cropped:
            features.append(SeqFeature(FeatureLocation(subinterval[0],subinterval[1]), 
                                        type='te', 
                                        strand=0,
                                        qualifiers={'ID': 'TE {}'.format(i), 'Parent': 'TE_BASE {}'.format(i)}))

        #Insert elements (domains, ppt, pbs, ...)
        j = 0
        for domain in nested[i]['features']['domains']:
            sign = (lambda x: x and (1, -1)[x < 0])(domain['frame'][0])
            if sign < 0:
                domain['location'] = [domain['location'][1], domain['location'][0]]
            
            overlap = [x for x in children if contains(domain['location'], x)]

            cropped_domain = crop(domain['location'], overlap)
            for part in cropped_domain:
                features.append(SeqFeature(FeatureLocation(part[0], part[1]),
rlapar's avatar
rlapar committed
                                                    type=domain['type'],
                                                    strand=sign,
                                                    #qualifiers={'Parent': 'DOMAIN {}-{}'.format(i,j)}))
rlapar's avatar
rlapar committed
                                                    qualifiers={'ID': 'DOMAIN {}-{}'.format(i,j),'Parent': 'TE_BASE {}'.format(i)}))
rlapar's avatar
rlapar committed
    rec.features = features
rlapar's avatar
rlapar committed
    #possible other formats as well
rlapar's avatar
rlapar committed
    with open('data/' + gene['id'] + '.gff', 'w+') as out_handle:
rlapar's avatar
rlapar committed
        gff = GFF.write([rec], out_handle)
rlapar's avatar
rlapar committed

def visualize(gene_id):
rlapar's avatar
rlapar committed
    #Style
rlapar's avatar
rlapar committed
    style = Style()
rlapar's avatar
rlapar committed
    style.load_file('gt.style')

    #Feature index
    feature_index = FeatureIndexMemory()
rlapar's avatar
rlapar committed

rlapar's avatar
rlapar committed
    #add gff file
    feature_index.add_gff3file('data/' + gene_id + '.gff')
rlapar's avatar
rlapar committed

rlapar's avatar
rlapar committed
    #create diagram for first sequence ID
    seqid = feature_index.get_first_seqid()
    range = feature_index.get_range_for_seqid(seqid)
    diagram = Diagram.from_index(feature_index, seqid, range, style)

    #create layout
rlapar's avatar
rlapar committed
    layout = Layout(diagram, 1400, style)
rlapar's avatar
rlapar committed
    height = layout.get_height()
rlapar's avatar
rlapar committed

    #create canvas
rlapar's avatar
rlapar committed
    canvas = CanvasCairoFile(style, 1400, height)
    #canvas = CanvasCairoFilePDF(style, 1400, height)
rlapar's avatar
rlapar committed

    #sketch layout on canvas
rlapar's avatar
rlapar committed
    layout.sketch(canvas)

rlapar's avatar
rlapar committed
    #write to file
    pngfile = 'data/' + gene_id + '.png'
rlapar's avatar
rlapar committed
    canvas.to_file(pngfile)