Skip to content
Snippets Groups Projects
domains.py 1.82 KiB
Newer Older
rlapar's avatar
rlapar committed
from StringIO import StringIO

from Bio import SeqIO
from Bio.Blast import NCBIXML
from Bio.Blast.Applications import NcbiblastxCommandline

from python import config

rlapar's avatar
rlapar committed
def runBlastx(genes):
    #print 'Running blastx...'
    for g in genes:
        genes[g]['domains'] = findDomains(genes[g])
rlapar's avatar
rlapar committed
    return genes

rlapar's avatar
rlapar committed
def findDomains(gene):
    domains = {'INT':[],'GAG':[],'AP':[],'RNaseH':[],'RT':[],'XX':[]}
rlapar's avatar
rlapar committed
    blastx_cline = NcbiblastxCommandline(db=config.blastx_gydb_protein_db, outfmt=5, 
        num_threads=config.blastx_args['num_threads'], dbsize=config.blastx_args['dbsize'], 
        word_size=config.blastx_args['word_size'], evalue=config.blastx_args['evalue'])
    
    xml_out, stderr = blastx_cline(stdin=str(gene['sequence']))
    blast_records = NCBIXML.parse(StringIO(xml_out))
    try:
        blast_record = next(blast_records).alignments
    except ValueError:
        return domains
rlapar's avatar
rlapar committed
    #print len(blast_record)
    if blast_record:
        for alignment in blast_record:
            for hsp in alignment.hsps:
                domain = {
rlapar's avatar
rlapar committed
                    'evalue': hsp.expect,
                    'frame': hsp.frame,
                    'score': hsp.score,
                    'title': alignment.title
rlapar's avatar
rlapar committed
                }
                if hsp.frame[0] < 0: #complementary strand
rlapar's avatar
rlapar committed
                    domain['location'] = [hsp.query_start, hsp.query_start - 3 * hsp.align_length]
rlapar's avatar
rlapar committed
                else: #forward strand
rlapar's avatar
rlapar committed
                    domain['location'] =[hsp.query_start, hsp.query_start + 3 * hsp.align_length]               
rlapar's avatar
rlapar committed
                
rlapar's avatar
rlapar committed
                domain_type = alignment.title.split(' ')[1].split('_')[0]
                if domain_type not in domains.keys():
                    domains['XX'].append(domain)
                else:
                    domains[domain_type].append(domain)
rlapar's avatar
rlapar committed
        
    return domains