In [6]:
Copied!
from pprint import pprint
from protein_detective.uniprot import Query, search4af, search4emdb, search4pdb, search4uniprot
from pprint import pprint
from protein_detective.uniprot import Query, search4af, search4emdb, search4pdb, search4uniprot
Search on uniprot¶
In [2]:
Copied!
import logging
logging.basicConfig(level=logging.WARNING)
# Set to WARNING to see only warnings
# Set to INFO to see sparql queries
# Set to DEBUG to see raw results
import logging
logging.basicConfig(level=logging.WARNING)
# Set to WARNING to see only warnings
# Set to INFO to see sparql queries
# Set to DEBUG to see raw results
In [3]:
Copied!
query = Query(
taxon_id="9606",
reviewed=True,
subcellular_location_uniprot="nucleus",
subcellular_location_go="GO:0005634", # Cellular component - Nucleus
molecular_function_go="GO:0003677", # Molecular function - DNA binding
)
query = Query(
taxon_id="9606",
reviewed=True,
subcellular_location_uniprot="nucleus",
subcellular_location_go="GO:0005634", # Cellular component - Nucleus
molecular_function_go="GO:0003677", # Molecular function - DNA binding
)
In [4]:
Copied!
uniprot_accessions = search4uniprot(query, limit=200)
pprint(uniprot_accessions)
uniprot_accessions = search4uniprot(query, limit=200)
pprint(uniprot_accessions)
WARNING:protein_detective.uniprot:Search for uniprot accessions returned 200 results. There may be more results available, but they are not returned due to the limit of 200. Consider increasing the limit to get more results.
{'A0A087WUV0', 'A0A0C5B5G6', 'A0A0U1RQI7', 'A0A1B0GTS1', 'A0A1B0GVZ6', 'A0A1B0GWH4', 'A0A1W2PPF3', 'A0A1W2PPK0', 'A0A1W2PPM1', 'A0A1W2PQ73', 'A0A1W2PQL4', 'A0A1W2PRP0', 'A0A2R8Y619', 'A0A2Z4LIS9', 'A0A3B3IU63', 'A0A5F9ZHS7', 'A1A519', 'A1YPR0', 'A2RRD8', 'A2RU54', 'A3KN83', 'A6NCS4', 'A6NDR6', 'A6NDX5', 'A6NDZ8', 'A6NE82', 'A6NFD8', 'A6NFI3', 'A6NFQ7', 'A6NGD5', 'A6NHJ4', 'A6NHT5', 'A6NI15', 'A6NJ08', 'A6NJ46', 'A6NJG6', 'A6NJL1', 'A6NJT0', 'A6NK53', 'A6NK75', 'A6NKF2', 'A6NLW8', 'A6NM28', 'A6NMT0', 'A6NN14', 'A6NNA5', 'A6NNF4', 'A6NP11', 'A8K0S8', 'A8K830', 'A8K8V0', 'A8MPP1', 'A8MQ14', 'A8MT65', 'A8MT69', 'A8MTJ6', 'A8MTQ0', 'A8MTY0', 'A8MUV8', 'A8MUZ8', 'A8MWA4', 'A8MXY4', 'A8MYZ6', 'A8MZ59', 'A9YTQ3', 'B1APH4', 'B2RD01', 'B2RPK0', 'B2RXF5', 'B4DU55', 'B4DX44', 'B4DXR9', 'C9JN71', 'C9JSJ3', 'E7ETH6', 'E9PAV3', 'E9PGG2', 'O00110', 'O00255', 'O00257', 'O00268', 'O00287', 'O00321', 'O00327', 'O00358', 'O00409', 'O00470', 'O00479', 'O00482', 'O00570', 'O00571', 'O00712', 'O00716', 'O14497', 'O14503', 'O14529', 'O14593', 'O14627', 'O14628', 'O14646', 'O14647', 'O14686', 'O14709', 'O14737', 'O14744', 'O14746', 'O14753', 'O14770', 'O14771', 'O14802', 'O14813', 'O14867', 'O14896', 'O14901', 'O14948', 'O14978', 'O14979', 'O15015', 'O15054', 'O15055', 'O15062', 'O15090', 'O15119', 'O15160', 'O15162', 'O15164', 'O15178', 'O15198', 'O15209', 'O15226', 'O15266', 'O15294'}
Find Protein Data Bank (PDB) entries for uniprot entries¶
In [5]:
Copied!
pdb_results = search4pdb(uniprot_accessions, limit=200)
pprint(pdb_results)
pdb_results = search4pdb(uniprot_accessions, limit=200)
pprint(pdb_results)
WARNING:protein_detective.uniprot:Search for pdbs on uniprot returned 200 results. There may be more results available, but they are not returned due to the limit of 200. Consider increasing the limit to get more results.
{'A8MT69': {PdbResult(id='4DRA', method='X-Ray_Crystallography', chain='E/F/G/H=1-81', resolution='2.41'), PdbResult(id='4DRB', method='X-Ray_Crystallography', chain='J/K/L/M/N/O=1-81', resolution='2.63'), PdbResult(id='4E44', method='X-Ray_Crystallography', chain='B/D=1-81', resolution='2.1'), PdbResult(id='4E45', method='X-Ray_Crystallography', chain='B/D/G/I/L/N=1-81', resolution='2'), PdbResult(id='4NDY', method='X-Ray_Crystallography', chain='B/D/H/L/M/N/U/V/W/X=8-81', resolution='7'), PdbResult(id='4NE1', method='X-Ray_Crystallography', chain='B/D/H/L/M/N/U/V/W/X/Z/b/d/h/i/j/o/p/q/r=8-81', resolution='6.5'), PdbResult(id='4NE3', method='X-Ray_Crystallography', chain='B=8-81', resolution='1.8'), PdbResult(id='4NE5', method='X-Ray_Crystallography', chain='B/D/F/H=8-81', resolution='2.5'), PdbResult(id='4NE6', method='X-Ray_Crystallography', chain='B/D=8-81', resolution='2.1')}, 'O00255': {PdbResult(id='4GPQ', method='X-Ray_Crystallography', chain='A=74-386,A=399-459,A=1-53,A=537-593', resolution='1.46'), PdbResult(id='4GQ3', method='X-Ray_Crystallography', chain='A=1-53,A=74-386,A=537-593,A=399-459', resolution='1.56'), PdbResult(id='4GQ4', method='X-Ray_Crystallography', chain='A=1-53,A=399-459,A=537-593,A=74-386', resolution='1.27'), PdbResult(id='4GQ6', method='X-Ray_Crystallography', chain='A=1-53,A=74-386,A=537-593,A=399-459', resolution='1.55'), PdbResult(id='4I80', method='X-Ray_Crystallography', chain='A=2-458,A=520-610', resolution='3.1'), PdbResult(id='4OG3', method='X-Ray_Crystallography', chain='A=74-386,A=549-593,A=399-459,A=1-53', resolution='2.01'), PdbResult(id='4OG4', method='X-Ray_Crystallography', chain='A=74-386,A=1-53,A=399-459,A=549-593', resolution='1.45'), PdbResult(id='4OG5', method='X-Ray_Crystallography', chain='A=399-459,A=1-53,A=74-386,A=549-593', resolution='1.63'), PdbResult(id='4OG6', method='X-Ray_Crystallography', chain='A=74-386,A=549-593,A=399-459,A=1-53', resolution='1.49'), PdbResult(id='4OG7', method='X-Ray_Crystallography', chain='A=74-386,A=1-53,A=549-593,A=399-459', resolution='2.08'), PdbResult(id='4OG8', method='X-Ray_Crystallography', chain='A=1-53,A=74-386,A=549-593,A=399-459', resolution='1.53'), PdbResult(id='4X5Y', method='X-Ray_Crystallography', chain='A=1-53,A=74-386,A=537-593,A=399-459', resolution='1.59'), PdbResult(id='4X5Z', method='X-Ray_Crystallography', chain='A=1-53,A=399-459,A=537-593,A=74-386', resolution='1.86'), PdbResult(id='5DB0', method='X-Ray_Crystallography', chain='A=1-53,A=399-459,A=74-386,A=537-593', resolution='1.5'), PdbResult(id='5DB1', method='X-Ray_Crystallography', chain='A=537-593,A=74-386,A=399-459,A=1-53', resolution='1.86'), PdbResult(id='5DB2', method='X-Ray_Crystallography', chain='A=537-593,A=399-459,A=1-53,A=74-386', resolution='1.54'), PdbResult(id='5DB3', method='X-Ray_Crystallography', chain='A=74-386,A=537-593,A=399-459,A=1-53', resolution='1.71'), PdbResult(id='5DD9', method='X-Ray_Crystallography', chain='A=1-53,A=399-459,A=537-593,A=74-386', resolution='1.62'), PdbResult(id='5DDA', method='X-Ray_Crystallography', chain='A=1-53,A=399-459,A=537-593,A=74-386', resolution='1.83'), PdbResult(id='5DDB', method='X-Ray_Crystallography', chain='A=399-459,A=74-386,A=537-593,A=1-53', resolution='1.54'), PdbResult(id='5DDC', method='X-Ray_Crystallography', chain='A=74-386,A=1-53,A=537-593,A=399-459', resolution='1.62'), PdbResult(id='5DDD', method='X-Ray_Crystallography', chain='A=399-459,A=537-593,A=1-53,A=74-386', resolution='2.14'), PdbResult(id='5DDE', method='X-Ray_Crystallography', chain='A=399-459,A=1-53,A=74-386,A=537-593', resolution='1.78'), PdbResult(id='5DDF', method='X-Ray_Crystallography', chain='A=399-459,A=74-386,A=537-593,A=1-53', resolution='1.66'), PdbResult(id='3U85', method='X-Ray_Crystallography', chain='A=2-459,A=520-610', resolution='3'), PdbResult(id='3U86', method='X-Ray_Crystallography', chain='A=2-459,A=520-610', resolution='2.84'), PdbResult(id='3U88', method='X-Ray_Crystallography', chain='A/B=520-610,A/B=2-459', resolution='3'), PdbResult(id='3U84', method='X-Ray_Crystallography', chain='A/B=2-459,A/B=520-610', resolution='2.5')}, 'O00257': {PdbResult(id='5EPL', method='X-Ray_Crystallography', chain='A/B=8-65', resolution='1.81'), PdbResult(id='3I8Z', method='X-Ray_Crystallography', chain='A=8-62', resolution='1.51'), PdbResult(id='2K28', method='NMR_Spectroscopy', chain='A=8-65', resolution=None)}, 'O00268': {PdbResult(id='2P6V', method='X-Ray_Crystallography', chain='A=575-688', resolution='2'), PdbResult(id='1H3O', method='X-Ray_Crystallography', chain='A/C=872-945', resolution='2.3')}, 'O00287': {PdbResult(id='2KW3', method='NMR_Spectroscopy', chain='C=214-272', resolution=None)}, 'O00327': {PdbResult(id='4H10', method='X-Ray_Crystallography', chain='A=66-128', resolution='2.4')}, 'O00470': {PdbResult(id='4XRS', method='X-Ray_Crystallography', chain='A/B=279-336', resolution='3.5'), PdbResult(id='5EGO', method='X-Ray_Crystallography', chain='A=279-333', resolution='2.54')}, 'O00482': {PdbResult(id='4DOR', method='X-Ray_Crystallography', chain='A/B=290-541', resolution='1.9'), PdbResult(id='4DOS', method='X-Ray_Crystallography', chain='A=299-538', resolution='2'), PdbResult(id='4IS8', method='X-Ray_Crystallography', chain='A/B=300-538', resolution='2.78'), PdbResult(id='4ONI', method='X-Ray_Crystallography', chain='A/B=291-541', resolution='1.8'), PdbResult(id='4PLD', method='X-Ray_Crystallography', chain='A=301-541', resolution='1.75'), PdbResult(id='4PLE', method='X-Ray_Crystallography', chain='A/C/E/G=301-541', resolution='1.75'), PdbResult(id='4RWV', method='X-Ray_Crystallography', chain='A=294-541', resolution='1.86'), PdbResult(id='5L0M', method='X-Ray_Crystallography', chain='A=79-187', resolution='2.2'), PdbResult(id='5L11', method='X-Ray_Crystallography', chain='A=299-541', resolution='1.85'), PdbResult(id='3TX7', method='X-Ray_Crystallography', chain='B=191-541', resolution='2.76'), PdbResult(id='3PLZ', method='X-Ray_Crystallography', chain='A/B=300-541', resolution='1.75'), PdbResult(id='2A66', method='X-Ray_Crystallography', chain='A=79-187', resolution='2.2'), PdbResult(id='1YOK', method='X-Ray_Crystallography', chain='A=300-541', resolution='2.5'), PdbResult(id='1YUC', method='X-Ray_Crystallography', chain='A/B=290-541', resolution='1.9'), PdbResult(id='1ZDU', method='X-Ray_Crystallography', chain='A=297-541', resolution='2.5')}, 'O00571': {PdbResult(id='4O2C', method='X-Ray_Crystallography', chain='C=2-10', resolution='1.8'), PdbResult(id='4O2E', method='X-Ray_Crystallography', chain='C/F=2-10', resolution='1.98'), PdbResult(id='4O2F', method='X-Ray_Crystallography', chain='C/F=3-10', resolution='1.9'), PdbResult(id='4PX9', method='X-Ray_Crystallography', chain='A/B/C=135-407', resolution='2.31'), PdbResult(id='4PXA', method='X-Ray_Crystallography', chain='A=135-582', resolution='3.2'), PdbResult(id='5E7I', method='X-Ray_Crystallography', chain='A/B/C=133-584', resolution='2.22'), PdbResult(id='5E7J', method='X-Ray_Crystallography', chain='A=133-584', resolution='2.23'), PdbResult(id='5E7M', method='X-Ray_Crystallography', chain='A=133-584', resolution='2.3'), PdbResult(id='3JRV', method='X-Ray_Crystallography', chain='C/D/E=71-90', resolution='1.6'), PdbResult(id='2JGN', method='X-Ray_Crystallography', chain='A/B/C=409-580', resolution='1.91'), PdbResult(id='2I4I', method='X-Ray_Crystallography', chain='A=168-582', resolution='2.2')}, 'O14497': {PdbResult(id='1RYU', method='NMR_Spectroscopy', chain='A=1000-1119', resolution=None)}, 'O14529': {PdbResult(id='1WH6', method='NMR_Spectroscopy', chain='A=887-974', resolution=None), PdbResult(id='1WH8', method='NMR_Spectroscopy', chain='A=1028-1125', resolution=None), PdbResult(id='1X2L', method='NMR_Spectroscopy', chain='A=544-631', resolution=None)}, 'O14593': {PdbResult(id='3V30', method='X-Ray_Crystallography', chain='A=90-260', resolution='1.57'), PdbResult(id='3UXG', method='X-Ray_Crystallography', chain='A=90-260', resolution='1.85')}, 'O14646': {PdbResult(id='2B2T', method='X-Ray_Crystallography', chain='C=268-373,A/B=268-443', resolution='2.45'), PdbResult(id='4B4C', method='X-Ray_Crystallography', chain='A=1119-1327', resolution='1.62'), PdbResult(id='4NW2', method='X-Ray_Crystallography', chain='A/C=268-443', resolution='1.9'), PdbResult(id='4O42', method='X-Ray_Crystallography', chain='A=268-443', resolution='1.87'), PdbResult(id='5AFW', method='X-Ray_Crystallography', chain='A=270-443', resolution='1.6'), PdbResult(id='2B2V', method='X-Ray_Crystallography', chain='C=268-373,A/B=268-443', resolution='2.65'), PdbResult(id='2N39', method='NMR_Spectroscopy', chain='A=1409-1511', resolution=None), PdbResult(id='2B2W', method='X-Ray_Crystallography', chain='C=268-373,A/B=268-443', resolution='2.4'), PdbResult(id='2B2U', method='X-Ray_Crystallography', chain='C=268-373,A/B=268-443', resolution='2.95'), PdbResult(id='2B2Y', method='X-Ray_Crystallography', chain='C=268-373,A/B=268-443', resolution='2.35')}, 'O14686': {PdbResult(id='4ERQ', method='X-Ray_Crystallography', chain='D/E/F=5333-5346', resolution='1.91'), PdbResult(id='4Z4P', method='X-Ray_Crystallography', chain='A=5382-5536', resolution='2.2'), PdbResult(id='3UVK', method='X-Ray_Crystallography', chain='B=5337-5347', resolution='1.4')}, 'O14737': {PdbResult(id='2CRU', method='NMR_Spectroscopy', chain='A=9-113', resolution=None), PdbResult(id='2K6B', method='NMR_Spectroscopy', chain='A=2-112', resolution=None), PdbResult(id='1YYB', method='NMR_Spectroscopy', chain='A=1-26', resolution=None)}, 'O14744': {PdbResult(id='4GQB', method='X-Ray_Crystallography', chain='A=1-637', resolution='2.06'), PdbResult(id='4X60', method='X-Ray_Crystallography', chain='A=2-637', resolution='2.35'), PdbResult(id='4X61', method='X-Ray_Crystallography', chain='A=2-637', resolution='2.85'), PdbResult(id='4X63', method='X-Ray_Crystallography', chain='A=2-637', resolution='3.05'), PdbResult(id='5C9Z', method='X-Ray_Crystallography', chain='A=2-637', resolution='2.36'), PdbResult(id='5EMJ', method='X-Ray_Crystallography', chain='A=2-637', resolution='2.27'), PdbResult(id='5EMK', method='X-Ray_Crystallography', chain='A=2-637', resolution='2.52'), PdbResult(id='5EML', method='X-Ray_Crystallography', chain='A=2-637', resolution='2.39'), PdbResult(id='5EMM', method='X-Ray_Crystallography', chain='A=2-637', resolution='2.37'), PdbResult(id='5FA5', method='X-Ray_Crystallography', chain='A=1-637', resolution='2.34')}, 'O14746': {PdbResult(id='4B18', method='X-Ray_Crystallography', chain='B=222-240', resolution='2.52'), PdbResult(id='4MNQ', method='X-Ray_Crystallography', chain='C=540-548', resolution='2.74'), PdbResult(id='5MEN', method='X-Ray_Crystallography', chain='C=540-548', resolution='2.81'), PdbResult(id='5MEO', method='X-Ray_Crystallography', chain='C=540-548', resolution='1.77'), PdbResult(id='5MEP', method='X-Ray_Crystallography', chain='C/F=540-548', resolution='2.71'), PdbResult(id='5MEQ', method='X-Ray_Crystallography', chain='C=540-546', resolution='2.27'), PdbResult(id='5MER', method='X-Ray_Crystallography', chain='C/F=540-546', resolution='1.88'), PdbResult(id='2BCK', method='X-Ray_Crystallography', chain='C/F=461-469', resolution='2.8')}, 'O14770': {PdbResult(id='4XRM', method='X-Ray_Crystallography', chain='A/B=281-342', resolution='1.6'), PdbResult(id='5BNG', method='X-Ray_Crystallography', chain='A/B=283-342', resolution='3.5'), PdbResult(id='5EG0', method='X-Ray_Crystallography', chain='A=284-338', resolution='3.1'), PdbResult(id='3K2A', method='X-Ray_Crystallography', chain='A/B=281-345', resolution='1.95')}, 'O14867': {PdbResult(id='2IHC', method='X-Ray_Crystallography', chain='A/B/C/D=7-128', resolution='2.44')}, 'O15054': {PdbResult(id='4ASK', method='X-Ray_Crystallography', chain='A/B=1141-1643', resolution='1.86'), PdbResult(id='5FP3', method='X-Ray_Crystallography', chain='A/B=1141-1643', resolution='2.05'), PdbResult(id='5OY3', method='X-Ray_Crystallography', chain='A=1141-1643', resolution='2.14'), PdbResult(id='2XXZ', method='X-Ray_Crystallography', chain='A/B=1176-1505', resolution='1.8'), PdbResult(id='2XUE', method='X-Ray_Crystallography', chain='A/B=1141-1643', resolution='2')}, 'O15119': {PdbResult(id='1H6F', method='X-Ray_Crystallography', chain='A/B=101-311', resolution='1.7')}, 'O15162': {PdbResult(id='1Y2A', method='X-Ray_Crystallography', chain='P=257-266', resolution='2.2')}, 'O15164': {PdbResult(id='4YAB', method='X-Ray_Crystallography', chain='A/B=824-1006', resolution='1.9'), PdbResult(id='4YAD', method='X-Ray_Crystallography', chain='A/B=824-1006', resolution='1.73'), PdbResult(id='4YAT', method='X-Ray_Crystallography', chain='A/B=824-1006', resolution='2.18'), PdbResult(id='4YAX', method='X-Ray_Crystallography', chain='A/B=824-1006', resolution='2.25'), PdbResult(id='4YBM', method='X-Ray_Crystallography', chain='A/B=824-1006', resolution='1.46'), PdbResult(id='4YBS', method='X-Ray_Crystallography', chain='A=824-1006', resolution='1.83'), PdbResult(id='4YBT', method='X-Ray_Crystallography', chain='A=824-1006', resolution='1.82'), PdbResult(id='4YC9', method='X-Ray_Crystallography', chain='A=824-1006', resolution='1.82'), PdbResult(id='4ZQL', method='X-Ray_Crystallography', chain='A/B=825-1006', resolution='1.79'), PdbResult(id='5H1T', method='X-Ray_Crystallography', chain='A/B/C/D=824-1006', resolution='1.95'), PdbResult(id='5H1U', method='X-Ray_Crystallography', chain='A/B/C/D=824-1006', resolution='1.9'), PdbResult(id='5H1V', method='X-Ray_Crystallography', chain='A/B=824-1006', resolution='2'), PdbResult(id='3O37', method='X-Ray_Crystallography', chain='A/B/C/D=824-1006', resolution='2'), PdbResult(id='3O35', method='X-Ray_Crystallography', chain='A/B=824-1006', resolution='1.76'), PdbResult(id='3O33', method='X-Ray_Crystallography', chain='A/B/C/D=824-1006', resolution='2'), PdbResult(id='3O34', method='X-Ray_Crystallography', chain='A=824-1006', resolution='1.9'), PdbResult(id='3O36', method='X-Ray_Crystallography', chain='A/B=824-1006', resolution='1.7'), PdbResult(id='2YYN', method='X-Ray_Crystallography', chain='A/B/C/D=891-1012', resolution='2.5')}, 'O15178': {PdbResult(id='5QRF', method='X-Ray_Crystallography', chain='A=41-211', resolution='2.03'), PdbResult(id='5QRG', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.95'), PdbResult(id='5QRH', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.81'), PdbResult(id='5QRI', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.83'), PdbResult(id='5QRJ', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.81'), PdbResult(id='5QRK', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.63'), PdbResult(id='5QRL', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.76'), PdbResult(id='5QRM', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.55'), PdbResult(id='5QRN', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.62'), PdbResult(id='5QRO', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.61'), PdbResult(id='5QRP', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.67'), PdbResult(id='5QRQ', method='X-Ray_Crystallography', chain='A=41-211', resolution='2.1'), PdbResult(id='5QRR', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.69'), PdbResult(id='5QRS', method='X-Ray_Crystallography', chain='A=41-211', resolution='2.06'), PdbResult(id='5QRT', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.77'), PdbResult(id='5QRU', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.76'), PdbResult(id='5QRV', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.67'), PdbResult(id='5QRW', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.74'), PdbResult(id='5QRX', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.87'), PdbResult(id='5QRY', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.58'), PdbResult(id='5QRZ', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.96'), PdbResult(id='5QS0', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.6'), PdbResult(id='5QS1', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.66'), PdbResult(id='5QS2', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.68'), PdbResult(id='5QS3', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.71'), PdbResult(id='5QS4', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.65'), PdbResult(id='5QS5', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.81'), PdbResult(id='5QS6', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.67'), PdbResult(id='5QS7', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.66'), PdbResult(id='5QS8', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.47'), PdbResult(id='5QS9', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.43'), PdbResult(id='5QSA', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.55'), PdbResult(id='5QSB', method='X-Ray_Crystallography', chain='A=41-211', resolution='1.82')}, 'O15294': {PdbResult(id='4AY5', method='X-Ray_Crystallography', chain='A/B/C/D=323-1041', resolution='3.15'), PdbResult(id='4AY6', method='X-Ray_Crystallography', chain='A/B/C/D=323-1041', resolution='3.3'), PdbResult(id='4CDR', method='X-Ray_Crystallography', chain='A/B/C/D=323-1041', resolution='3.15'), PdbResult(id='4GYW', method='X-Ray_Crystallography', chain='A/C=323-1041', resolution='1.7'), PdbResult(id='4GYY', method='X-Ray_Crystallography', chain='A/C=323-1041', resolution='1.85'), PdbResult(id='4GZ3', method='X-Ray_Crystallography', chain='A/C=323-1041', resolution='1.9'), PdbResult(id='4GZ5', method='X-Ray_Crystallography', chain='A/B/C/D=323-1041', resolution='3.08'), PdbResult(id='4GZ6', method='X-Ray_Crystallography', chain='A/B/C/D=323-1041', resolution='2.98'), PdbResult(id='4N39', method='X-Ray_Crystallography', chain='A=323-1041', resolution='1.76'), PdbResult(id='4N3A', method='X-Ray_Crystallography', chain='A=323-1041', resolution='1.88'), PdbResult(id='4N3B', method='X-Ray_Crystallography', chain='A=323-1041', resolution='2.17'), PdbResult(id='4N3C', method='X-Ray_Crystallography', chain='A=323-1041', resolution='2.55'), PdbResult(id='4XI9', method='X-Ray_Crystallography', chain='A/B/C/D=323-1041', resolution='3.1'), PdbResult(id='4XIF', method='X-Ray_Crystallography', chain='A/B/C/D=323-1041', resolution='3.2'), PdbResult(id='5BNW', method='X-Ray_Crystallography', chain='A=323-1041', resolution='2.4'), PdbResult(id='5C1D', method='X-Ray_Crystallography', chain='A=323-1041', resolution='2.05'), PdbResult(id='5HGV', method='X-Ray_Crystallography', chain='A/C=323-1041', resolution='2.05'), PdbResult(id='5LVV', method='X-Ray_Crystallography', chain='A=325-1046', resolution='2.54'), PdbResult(id='5LWV', method='X-Ray_Crystallography', chain='A=325-1046', resolution='1.9'), PdbResult(id='5NPR', method='X-Ray_Crystallography', chain='A=325-1041', resolution='1.85'), PdbResult(id='5NPS', method='X-Ray_Crystallography', chain='A=324-1041', resolution='1.68'), PdbResult(id='3TAX', method='X-Ray_Crystallography', chain='A/C=323-1041', resolution='1.88'), PdbResult(id='3PE3', method='X-Ray_Crystallography', chain='A/B/C/D=323-1041', resolution='2.78'), PdbResult(id='3PE4', method='X-Ray_Crystallography', chain='A/C=323-1041', resolution='1.95'), PdbResult(id='1W3B', method='X-Ray_Crystallography', chain='A/B=26-410', resolution='2.85')}}
Find AlphaFold models for uniprot entries¶
In [7]:
Copied!
afresults = search4af(uniprot_accessions, limit=200)
pprint(afresults)
afresults = search4af(uniprot_accessions, limit=200)
pprint(afresults)
{'A0A087WUV0': {'A0A087WUV0'}, 'A0A0C5B5G6': {'A0A0C5B5G6'}, 'A0A0U1RQI7': {'A0A0U1RQI7'}, 'A0A1B0GTS1': {'A0A1B0GTS1'}, 'A0A1B0GVZ6': {'A0A1B0GVZ6'}, 'A0A1B0GWH4': {'A0A1B0GWH4'}, 'A0A1W2PPF3': {'A0A1W2PPF3'}, 'A0A1W2PPK0': {'A0A1W2PPK0'}, 'A0A1W2PPM1': {'A0A1W2PPM1'}, 'A0A1W2PQ73': {'A0A1W2PQ73'}, 'A0A1W2PQL4': {'A0A1W2PQL4'}, 'A0A1W2PRP0': {'A0A1W2PRP0'}, 'A0A2R8Y619': {'A0A2R8Y619'}, 'A0A2Z4LIS9': {'A0A2Z4LIS9'}, 'A0A3B3IU63': {'A0A3B3IU63'}, 'A0A5F9ZHS7': {'A0A5F9ZHS7'}, 'A1A519': {'A1A519'}, 'A1YPR0': {'A1YPR0'}, 'A2RRD8': {'A2RRD8'}, 'A2RU54': {'A2RU54'}, 'A3KN83': {'A3KN83'}, 'A6NCS4': {'A6NCS4'}, 'A6NDR6': {'A6NDR6'}, 'A6NDX5': {'A6NDX5'}, 'A6NDZ8': {'A6NDZ8'}, 'A6NE82': {'A6NE82'}, 'A6NFD8': {'A6NFD8'}, 'A6NFI3': {'A6NFI3'}, 'A6NFQ7': {'A6NFQ7'}, 'A6NGD5': {'A6NGD5'}, 'A6NHJ4': {'A6NHJ4'}, 'A6NHT5': {'A6NHT5'}, 'A6NI15': {'A6NI15'}, 'A6NJ08': {'A6NJ08'}, 'A6NJ46': {'A6NJ46'}, 'A6NJG6': {'A6NJG6'}, 'A6NJL1': {'A6NJL1'}, 'A6NJT0': {'A6NJT0'}, 'A6NK53': {'A6NK53'}, 'A6NK75': {'A6NK75'}, 'A6NKF2': {'A6NKF2'}, 'A6NLW8': {'A6NLW8'}, 'A6NM28': {'A6NM28'}, 'A6NMT0': {'A6NMT0'}, 'A6NN14': {'A6NN14'}, 'A6NNA5': {'A6NNA5'}, 'A6NNF4': {'A6NNF4'}, 'A6NP11': {'A6NP11'}, 'A8K0S8': {'A8K0S8'}, 'A8K830': {'A8K830'}, 'A8K8V0': {'A8K8V0'}, 'A8MPP1': {'A8MPP1'}, 'A8MQ14': {'A8MQ14'}, 'A8MT65': {'A8MT65'}, 'A8MT69': {'A8MT69'}, 'A8MTJ6': {'A8MTJ6'}, 'A8MTQ0': {'A8MTQ0'}, 'A8MTY0': {'A8MTY0'}, 'A8MUV8': {'A8MUV8'}, 'A8MUZ8': {'A8MUZ8'}, 'A8MWA4': {'A8MWA4'}, 'A8MXY4': {'A8MXY4'}, 'A8MYZ6': {'A8MYZ6'}, 'A8MZ59': {'A8MZ59'}, 'A9YTQ3': {'A9YTQ3'}, 'B1APH4': {'B1APH4'}, 'B2RD01': {'B2RD01'}, 'B2RPK0': {'B2RPK0'}, 'B2RXF5': {'B2RXF5'}, 'B4DU55': {'B4DU55'}, 'B4DX44': {'B4DX44'}, 'B4DXR9': {'B4DXR9'}, 'C9JN71': {'C9JN71'}, 'C9JSJ3': {'C9JSJ3'}, 'E7ETH6': {'E7ETH6'}, 'E9PAV3': {'E9PAV3'}, 'E9PGG2': {'E9PGG2'}, 'O00110': {'O00110'}, 'O00255': {'O00255'}, 'O00257': {'O00257'}, 'O00268': {'O00268'}, 'O00287': {'O00287'}, 'O00321': {'O00321'}, 'O00327': {'O00327'}, 'O00358': {'O00358'}, 'O00409': {'O00409'}, 'O00470': {'O00470'}, 'O00479': {'O00479'}, 'O00482': {'O00482'}, 'O00570': {'O00570'}, 'O00571': {'O00571'}, 'O00712': {'O00712'}, 'O00716': {'O00716'}, 'O14497': {'O14497'}, 'O14503': {'O14503'}, 'O14529': {'O14529'}, 'O14593': {'O14593'}, 'O14627': {'O14627'}, 'O14628': {'O14628'}, 'O14646': {'O14646'}, 'O14647': {'O14647'}, 'O14709': {'O14709'}, 'O14737': {'O14737'}, 'O14744': {'O14744'}, 'O14746': {'O14746'}, 'O14753': {'O14753'}, 'O14770': {'O14770'}, 'O14771': {'O14771'}, 'O14802': {'O14802'}, 'O14813': {'O14813'}, 'O14867': {'O14867'}, 'O14896': {'O14896'}, 'O14901': {'O14901'}, 'O14948': {'O14948'}, 'O14978': {'O14978'}, 'O14979': {'O14979'}, 'O15015': {'O15015'}, 'O15054': {'O15054'}, 'O15055': {'O15055'}, 'O15062': {'O15062'}, 'O15090': {'O15090'}, 'O15119': {'O15119'}, 'O15160': {'O15160'}, 'O15162': {'O15162'}, 'O15164': {'O15164'}, 'O15178': {'O15178'}, 'O15198': {'O15198'}, 'O15209': {'O15209'}, 'O15226': {'O15226'}, 'O15266': {'O15266'}, 'O15294': {'O15294'}}
Find Electron Microscopy Data Bank (EMDB) entries for uniprot entries¶
In [8]:
Copied!
uniprot_accessions = search4emdb(uniprot_accessions, limit=200)
pprint(uniprot_accessions)
uniprot_accessions = search4emdb(uniprot_accessions, limit=200)
pprint(uniprot_accessions)
{'A8MT69': {'EMD-14351', 'EMD-33196', 'EMD-33197', 'EMD-14336'}, 'O00255': {'EMD-34195'}, 'O00268': {'EMD-31075', 'EMD-31107', 'EMD-31108', 'EMD-31109', 'EMD-31110', 'EMD-31111', 'EMD-31112', 'EMD-31113', 'EMD-31114', 'EMD-31115', 'EMD-31116', 'EMD-31118', 'EMD-31119', 'EMD-31204', 'EMD-31207', 'EMD-34359', 'EMD-34360', 'EMD-37395', 'EMD-37396', 'EMD-37398', 'EMD-37399', 'EMD-37400', 'EMD-37401', 'EMD-37402', 'EMD-37403', 'EMD-9298', 'EMD-9302', 'EMD-9305', 'EMD-9306'}, 'O00482': {'EMD-17740'}, 'O14497': {'EMD-0974'}, 'O14744': {'EMD-29677', 'EMD-27078', 'EMD-20764', 'EMD-7137', 'EMD-23609'}, 'O14746': {'EMD-12174', 'EMD-14196', 'EMD-14197', 'EMD-14198', 'EMD-14199', 'EMD-2310', 'EMD-2311', 'EMD-2312', 'EMD-26086', 'EMD-26087', 'EMD-26088', 'EMD-31811', 'EMD-31812'}, 'O14802': {'EMD-11673', 'EMD-11736', 'EMD-11738', 'EMD-11742', 'EMD-11904', 'EMD-30577', 'EMD-30578', 'EMD-30779', 'EMD-30865', 'EMD-31621', 'EMD-31622', 'EMD-35712', 'EMD-35719', 'EMD-35722'}, 'O14867': {'EMD-19766', 'EMD-42049', 'EMD-42051', 'EMD-42064', 'EMD-42102', 'EMD-42105', 'EMD-42106'}, 'O14979': {'EMD-14738'}, 'O15062': {'EMD-44389'}, 'O15160': {'EMD-11673', 'EMD-11736', 'EMD-11738', 'EMD-11742', 'EMD-11904', 'EMD-12795', 'EMD-12796', 'EMD-12797', 'EMD-15135', 'EMD-30577', 'EMD-30578', 'EMD-30779', 'EMD-30865', 'EMD-31621', 'EMD-31622', 'EMD-31876', 'EMD-31877', 'EMD-31878', 'EMD-35712', 'EMD-35719', 'EMD-35722'}, 'O15294': {'EMD-33768', 'EMD-12588', 'EMD-33773'}}
In [ ]:
Copied!