Skip to content
Snippets Groups Projects
sketch.py 5.67 KiB
Newer Older
rlapar's avatar
rlapar committed
import math
import pprint
rlapar's avatar
rlapar committed

rlapar's avatar
rlapar committed
from BCBio import GFF
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.SeqFeature import SeqFeature, FeatureLocation

rlapar's avatar
rlapar committed
from gt.core import *
from gt.extended import *
from gt.annotationsketch import *
from gt.annotationsketch.custom_track import CustomTrack
from gt.core.gtrange import Range

def containsSubinterval(a, b):
    #a contains b as subinterval
    return b[0] >= a[0] and b[1] <= a[1] 

def removeSubinterval(a,b):
    #remove subinterval b from a
    return [[a[0], b[0]], [b[1], a[1]]]

rlapar's avatar
rlapar committed
def intervalsToGFF(gene, tree):
    #crop intervals
    croppedTree = []
    for i in range(len(tree)):
        #find direct children
        directChildren = []
        for j in reversed(range(0,i)):
            directChild = True
            for dc in directChildren:
                if containsSubinterval(dc, tree[j]):
                    directChild = False
            if not directChild:
                continue
            if containsSubinterval(tree[i], tree[j]):
                directChildren.append(tree[j])
        directChildren.sort()
        node = [tree[i]]
        for child in directChildren:
            node = node[:-1] + removeSubinterval(node[-1], child)
        croppedTree.append(node)
    
    #GFF
    rec = SeqRecord(gene['sequence'], gene['id'])
    features = []
    i = 0
    for interval in croppedTree:
        #insert base
        feat = SeqFeature(FeatureLocation(interval[0][0], interval[-1][1]), type='gene', strand=0, qualifiers={'ID': 'Transposon {}'.format(i)})
        feat.sub_features = []
        #insert all intervals
        for subinterval in interval:
            feat.sub_features.append(SeqFeature(FeatureLocation(subinterval[0],subinterval[1]), type='te_base', strand=0))

        features.append(feat)
        i += 1

    rec.features = features
    #possible other formats as well
    with open('data/' + gene['id'] + '.gff', 'w+') as out_handle:
        gff = GFF.write([rec], out_handle)

def sketch(genes):
    print 'Running GT annotation sketch...'
    for g in genes:
rlapar's avatar
rlapar committed
        intervalsToGFF(genes[g], genes[g]['nested']['intervals'])
rlapar's avatar
rlapar committed
        #treeToGFF(genes[g]['nested']['tree'])
        visualize(genes[g])

rlapar's avatar
rlapar committed
def geneToGFF(gene):
    #insert gene
rlapar's avatar
rlapar committed
    rec = SeqRecord(gene['sequence'], gene['id'])
    qualifiers = {'ID': 'Gene'}
rlapar's avatar
rlapar committed
    features = []
rlapar's avatar
rlapar committed
    features.append(SeqFeature(FeatureLocation(0, len(gene['sequence'])), type='gene', strand=0, qualifiers=qualifiers))
rlapar's avatar
rlapar committed

    #insert LTR finder results
rlapar's avatar
rlapar committed
    for transposon in gene['ltr_finder']:
        LTR_feature = SeqFeature(FeatureLocation(transposon['location'][0], transposon['location'][1]), 
rlapar's avatar
rlapar committed
            type='LTR_finder_TE', strand=1)
        LTR_feature.sub_features = []
        if not math.isnan(transposon['ppt'][0]):
rlapar's avatar
rlapar committed
            strand = 1
            if transposon['ppt'][0] > transposon['ppt'][1]:
rlapar's avatar
rlapar committed
                strand = -1
            LTR_feature.sub_features.append(SeqFeature(FeatureLocation(transposon['ppt'][0], transposon['ppt'][1]), 
                type='LTR_ppt', strand=1))
        if not math.isnan(transposon['pbs'][0]):
rlapar's avatar
rlapar committed
            strand = 1
            if transposon['pbs'][0] > transposon['pbs'][1]:
rlapar's avatar
rlapar committed
                strand = -1
            LTR_feature.sub_features.append(SeqFeature(FeatureLocation(transposon['pbs'][0], transposon['pbs'][1]), 
                type='LTR_pbs', strand=1))
rlapar's avatar
rlapar committed
        features.append(LTR_feature)        

    #domain type: INT, GAG, AP, RT, RNaseH, Unkwnown
rlapar's avatar
rlapar committed
    domain_features = {}
    domain_features['INT'] = SeqFeature(FeatureLocation(0, len(gene['sequence'])), type='INT', strand=0)
    domain_features['GAG'] = SeqFeature(FeatureLocation(0, len(gene['sequence'])), type='GAG', strand=0)
    domain_features['AP'] = SeqFeature(FeatureLocation(0, len(gene['sequence'])), type='AP', strand=0)
    domain_features['RNaseH'] = SeqFeature(FeatureLocation(0, len(gene['sequence'])), type='RNaseH', strand=0)
    domain_features['RT'] = SeqFeature(FeatureLocation(0, len(gene['sequence'])), type='RT', strand=0)
    domain_features['XX'] = SeqFeature(FeatureLocation(0, len(gene['sequence'])), type='XX', strand=0)
    for df in domain_features:
        domain_features[df].sub_features = []
    #insert domains
    for domain in gene['domains']:
        strand = 1
        x,y = domain['location'][0], domain['location'][1]
        if domain['location'][0] > domain['location'][1]:
rlapar's avatar
rlapar committed
            strand = -1
            x,y = y,x
        domain_type = domain['Type'].split('_')[0]
        if domain_type not in ['INT', 'GAG', 'AP', 'RT', 'RNaseH']:
            domain_type = 'XX'
        domain_features[domain_type].sub_features.append(SeqFeature(FeatureLocation(x, y), type=domain_type, strand=strand))

    for df in domain_features:
        features.append(domain_features[df])
rlapar's avatar
rlapar committed
    rec.features = features
    #possible other formats as well
rlapar's avatar
rlapar committed
    with open('data/' + gene['id'] + '.gff', 'w+') as out_handle:
rlapar's avatar
rlapar committed
        gff = GFF.write([rec], out_handle)
rlapar's avatar
rlapar committed

rlapar's avatar
rlapar committed
def visualize(gene):
    #geneToGFF(gene)
rlapar's avatar
rlapar committed

    #Style
rlapar's avatar
rlapar committed
    style = Style()
rlapar's avatar
rlapar committed
    style.load_file('gt.style')

    #Feature index
    feature_index = FeatureIndexMemory()
rlapar's avatar
rlapar committed

rlapar's avatar
rlapar committed
    #add gff file
rlapar's avatar
rlapar committed
    feature_index.add_gff3file('data/' + gene['id'] + '.gff')
rlapar's avatar
rlapar committed

rlapar's avatar
rlapar committed
    #create diagram for first sequence ID
    seqid = feature_index.get_first_seqid()
    range = feature_index.get_range_for_seqid(seqid)
    diagram = Diagram.from_index(feature_index, seqid, range, style)

    #create layout
rlapar's avatar
rlapar committed
    layout = Layout(diagram, 1400, style)
rlapar's avatar
rlapar committed
    height = layout.get_height()
rlapar's avatar
rlapar committed

    #create canvas
rlapar's avatar
rlapar committed
    canvas = CanvasCairoFile(style, 1400, height)
    #canvas = CanvasCairoFilePDF(style, 1400, height)
rlapar's avatar
rlapar committed

    #sketch layout on canvas
rlapar's avatar
rlapar committed
    layout.sketch(canvas)

rlapar's avatar
rlapar committed
    #write to file
rlapar's avatar
rlapar committed
    pngfile = 'data/' + gene['id'] + '.png'
rlapar's avatar
rlapar committed
    canvas.to_file(pngfile)