import itertoolsfrom Bio.Seq import Seqfrom Bio.Data import CodonTablefrom Bio.Data import IUPACData</pre>
# Takes Bio.Seq.Seq object as input# Returns list of all possible proteins# Assumes sequence is in frame +1def generateProtFromAmbiguousDNA(s): std_nt = CodonTable.unambiguous_dna_by_name["Standard"] nonstd = IUPACData.ambiguous_dna_values aa_trans = [] for i in range(0,len(s),3): codon = s.tostring()[i:i+3] aa = CodonTable.list_possible_proteins(codon,std_nt.forward_table,nonstd) aa_trans.append(aa) proteins = list(itertools.product(*aa_trans)) possible_proteins = [] for x in proteins: possible_proteins.append("".join(x)) return possible_proteins
def main(): a = Seq('ATGGCARTTGTAHAC') print "DNA: ",a.tostring() print "Proteins:" foo = generateProtFromAmbiguousDNA(a) for s in foo: print s
if __name__ == '__main__': main()
Wednesday, January 19, 2011
Generate all possible proteins from ambiguous DNA
This had me stumped for awhile, but this works pretty well. Does NOT handle stop codons or gap characters like '-'. Requires BioPython
Labels:
bioinformatics,
BioPython,
python
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment