Search on uniprot¶
You can search on uniprot to find uniprot accessions and structure identifiers.
In [23]:
Copied!
# Setup some logging
import logging
from pprint import pprint
logging.basicConfig(level=logging.WARNING)
# Set to WARNING to see only warnings
# Set to INFO to see sparql queries
# Set to DEBUG to see raw results
# Setup some logging
import logging
from pprint import pprint
logging.basicConfig(level=logging.WARNING)
# Set to WARNING to see only warnings
# Set to INFO to see sparql queries
# Set to DEBUG to see raw results
Search for uniprot acessions based on a query¶
In [24]:
Copied!
from protein_quest.uniprot import Query, search4uniprot
from protein_quest.uniprot import Query, search4uniprot
In [25]:
Copied!
query = Query(
taxon_id="9606",
reviewed=True,
subcellular_location_uniprot="nucleus",
subcellular_location_go=["GO:0005634"], # Cellular component - Nucleus
molecular_function_go=["GO:0003677"], # Molecular function - DNA binding
)
query = Query(
taxon_id="9606",
reviewed=True,
subcellular_location_uniprot="nucleus",
subcellular_location_go=["GO:0005634"], # Cellular component - Nucleus
molecular_function_go=["GO:0003677"], # Molecular function - DNA binding
)
In [26]:
Copied!
uniprot_accessions = search4uniprot(query, limit=200)
uniprot_accessions = search4uniprot(query, limit=200)
WARNING:protein_quest.uniprot:Search for uniprot accessions returned 200 results. There may be more results available, but they are not returned due to the limit of 200. Consider increasing the limit to get more results.
In [27]:
Copied!
print(f"Number of Uniprot accessions: {len(uniprot_accessions)}")
print("First 5:")
pprint(list(uniprot_accessions)[:5])
print("Last 5:")
pprint(list(uniprot_accessions)[-5:])
print(f"Number of Uniprot accessions: {len(uniprot_accessions)}")
print("First 5:")
pprint(list(uniprot_accessions)[:5])
print("Last 5:")
pprint(list(uniprot_accessions)[-5:])
Number of Uniprot accessions: 200 First 5: ['O43435', 'O43364', 'O14627', 'O43151', 'O43913'] Last 5: ['O15266', 'O60393', 'A8K8V0', 'A8MUV8', 'O14746']
Find Protein Data Bank (PDB) entries for uniprot entries¶
In [28]:
Copied!
from protein_quest.uniprot import search4pdb
from protein_quest.uniprot import search4pdb
In [29]:
Copied!
pdb_results = search4pdb(uniprot_accessions, limit=200)
pdb_results = search4pdb(uniprot_accessions, limit=200)
WARNING:protein_quest.uniprot:Search for pdbs on uniprot returned 200 results. There may be more results available, but they are not returned due to the limit of 200. Consider increasing the limit to get more results.
In [30]:
Copied!
pprint(f"Number of PDB results: {len(pdb_results)}")
pprint("First 5 PDBs of first Uniprot entry:")
first_uniprot = next(iter(pdb_results.items()))
pprint(first_uniprot[0])
pprint(list(first_uniprot[1])[:5])
pprint(f"Number of PDB results: {len(pdb_results)}")
pprint("First 5 PDBs of first Uniprot entry:")
first_uniprot = next(iter(pdb_results.items()))
pprint(first_uniprot[0])
pprint(list(first_uniprot[1])[:5])
'Number of PDB results: 46' 'First 5 PDBs of first Uniprot entry:' 'O15350' [PdbResult(id='2WQI', method='X-Ray_Crystallography', uniprot_chains='A/B/C/D=351-399', resolution='1.7'), PdbResult(id='4G83', method='X-Ray_Crystallography', uniprot_chains='A/B=115-312', resolution='4'), PdbResult(id='2XWC', method='X-Ray_Crystallography', uniprot_chains='A=112-311', resolution='1.82'), PdbResult(id='2KBY', method='NMR_Spectroscopy', uniprot_chains='A/B/C/D=351-398', resolution=None), PdbResult(id='2WQJ', method='X-Ray_Crystallography', uniprot_chains='1/2/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/Q/R/S/T/U/V/W/X/Y/Z=351-383', resolution='2')]
Find AlphaFold models for uniprot entries¶
In [31]:
Copied!
from protein_quest.uniprot import search4af
from protein_quest.uniprot import search4af
In [32]:
Copied!
afresults = search4af(uniprot_accessions, limit=200)
afresults = search4af(uniprot_accessions, limit=200)
In [33]:
Copied!
pprint(f"Number of AlphaFold results: {len(afresults)}")
first_af = next(iter(afresults.items()))
pprint(first_af)
pprint(f"Number of AlphaFold results: {len(afresults)}")
first_af = next(iter(afresults.items()))
pprint(first_af)
'Number of AlphaFold results: 198' ('A0A087WUV0', {'A0A087WUV0'})
Find Electron Microscopy Data Bank (EMDB) entries for uniprot entries¶
In [34]:
Copied!
from protein_quest.uniprot import search4emdb
from protein_quest.uniprot import search4emdb
In [35]:
Copied!
uniprot_accessions = search4emdb(uniprot_accessions, limit=200)
uniprot_accessions = search4emdb(uniprot_accessions, limit=200)
In [36]:
Copied!
pprint(f"Number of Uniprot accessions with EMDB entries: {len(uniprot_accessions)}")
first_uniprot = next(iter(uniprot_accessions.items()))
pprint(first_uniprot)
pprint(f"Number of Uniprot accessions with EMDB entries: {len(uniprot_accessions)}")
first_uniprot = next(iter(uniprot_accessions.items()))
pprint(first_uniprot)
'Number of Uniprot accessions with EMDB entries: 22' ('O14497', {'EMD-0974'})