Hello!

I have this code that I wrote and if works the way I want to.

from Bio import SeqIO
import collections as C
import requests as R
import itertools

proteins2 = list(SeqIO.parse("2-proteins-tryout.gb","gb"))
#print(proteins2)

for protein in proteins2:
#print(protein)
for f in protein.features:
if f.type == "BLAST":
if "db_xref" in f.qualifiers:
idss = f.qualifiers["db_xref"]
print(idss)

#Read files
for idd in idss:

fullProtein = SeqIO.read(f'{idd}.xml','uniprot-xml')
#print(fullProtein)

goTerms = []
for ref in fullProtein.dbxrefs:
if ref.startswith('GO:'):
goTerm = ref[3:]
goTerms.append(goTerm)
#print(goTerms)

allTerms = [t for terms in goTerms for t in terms]
print(allTerms)

Here is the output:

['P52326', 'P26480', 'P52327', 'P0A2E3', 'P00579', 'O24744', 'P32001', 'P57163', 'Q89B10', 'Q83BB6', 'Q9PDM9', 'Q87DT7', 'Q8PG33', 'P43766', 'Q8P4H2', 'P52325', 'P33452', 'Q59753', 'Q2K619', 'P52324', 'D5AQI9', 'P17531', 'P0CZ15', 'Q1RKH7', 'Q92FZ8', 'Q4UJT1', 'P33451', 'Q68VQ5']
['GO:0001123', 'GO:0003677', 'GO:0003700', 'GO:0005737', 'GO:00 ...



Source link