import math from BCBio import GFF from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord from Bio.SeqFeature import SeqFeature, FeatureLocation from gt.core import * from gt.extended import * from gt.annotationsketch import * from gt.annotationsketch.custom_track import CustomTrack from gt.core.gtrange import Range def geneToGFF(gene): #insert gene rec = SeqRecord(gene['sequence'], gene['id']) qualifiers = {'ID': 'Gene'} features = [] features.append(SeqFeature(FeatureLocation(0, len(gene['sequence'])), type='gene', strand=0, qualifiers=qualifiers)) #insert LTR finder results for transposon in gene['ltr_finder']: LTR_feature = SeqFeature(FeatureLocation(transposon['Location'][0], transposon['Location'][1]), type='LTR_finder_TE', strand=1) LTR_feature.sub_features = [] if not math.isnan(transposon['PPT'][0]): strand = 1 if transposon['PPT'][0] > transposon['PPT'][1]: strand = -1 LTR_feature.sub_features.append(SeqFeature(FeatureLocation(transposon['PPT'][0], transposon['PPT'][1]), type='LTR_PPT', strand=1)) if not math.isnan(transposon['PBS'][0]): strand = 1 if transposon['PBS'][0] > transposon['PBS'][1]: strand = -1 LTR_feature.sub_features.append(SeqFeature(FeatureLocation(transposon['PBS'][0], transposon['PBS'][1]), type='LTR_PBS', strand=1)) features.append(LTR_feature) #domain type: INT, GAG, AP, RT, RNaseH, Unkwnown domain_features = {} domain_features['INT'] = SeqFeature(FeatureLocation(0, len(gene['sequence'])), type='INT', strand=0) domain_features['GAG'] = SeqFeature(FeatureLocation(0, len(gene['sequence'])), type='GAG', strand=0) domain_features['AP'] = SeqFeature(FeatureLocation(0, len(gene['sequence'])), type='AP', strand=0) domain_features['RNaseH'] = SeqFeature(FeatureLocation(0, len(gene['sequence'])), type='RNaseH', strand=0) domain_features['RT'] = SeqFeature(FeatureLocation(0, len(gene['sequence'])), type='RT', strand=0) domain_features['XX'] = SeqFeature(FeatureLocation(0, len(gene['sequence'])), type='XX', strand=0) for df in domain_features: domain_features[df].sub_features = [] #insert domains for domain in gene['domains']: strand = 1 x,y = domain['Location'][0], domain['Location'][1] if domain['Location'][0] > domain['Location'][1]: strand = -1 x,y = y,x domain_type = domain['Type'].split('_')[0] if domain_type not in ['INT', 'GAG', 'AP', 'RT', 'RNaseH']: domain_type = 'XX' domain_features[domain_type].sub_features.append(SeqFeature(FeatureLocation(x, y), type=domain_type, strand=strand)) for df in domain_features: features.append(domain_features[df]) rec.features = features #possible other formats as well with open('data/' + gene['id'] + '.gff', 'w+') as out_handle: gff = GFF.write([rec], out_handle) def visualize(gene): geneToGFF(gene) #Style style = Style() style.load_file('gt.style') #Feature index feature_index = FeatureIndexMemory() #add gff file feature_index.add_gff3file('data/' + gene['id'] + '.gff') #create diagram for first sequence ID seqid = feature_index.get_first_seqid() range = feature_index.get_range_for_seqid(seqid) diagram = Diagram.from_index(feature_index, seqid, range, style) #create layout layout = Layout(diagram, 1400, style) height = layout.get_height() #create canvas canvas = CanvasCairoFile(style, 1400, height) #canvas = CanvasCairoFilePDF(style, 1400, height) #sketch layout on canvas layout.sketch(canvas) #write to file pngfile = 'data/' + gene['id'] + '.png' canvas.to_file(pngfile)