Source code for prody.database.uniprot
from prody import LOGGER
from prody.proteins import parsePDB
from prody.utilities import dictElement, dictElementLoop, openURL
import re
from xml.etree.cElementTree import XML
__all__ = ['UniprotRecord', 'searchUniprot', 'queryUniprot']
comma_splitter = re.compile(r'\s*,\s*').split
[docs]class UniprotRecord(object):
"""This class provides a wrapper for UniProt data including functions
for accessing particular fields and parsing associated PDB entries."""
def __init__(self, data):
self._rawdata = data
self._pdbids = []
# self._selstrs = []
self._parse()
def __repr__(self):
return '<UniprotRecord: %s>'%self.getTitle()
def __str__(self):
return self.getTitle()
def setData(self, value):
self._rawdata = value
self._parse()
def getData(self):
return self._rawdata
def getPDBs(self):
return self._pdbids
# def getSelstrs(self):
# return self._selstrs
def getSequence(self, index=0):
return self.getEntry('sequence', index)
def getAccession(self, index=0):
return self.getEntry('accession', index)
def getName(self, index=0):
return self.getEntry('name', index)
def getTitle(self):
uid = self.getAccession()
name = self.getName()
return '%s (%s)'%(uid, name)
def getEntry(self, item, index=0):
key = '%s%4d'%(item, index)
if key in self._rawdata:
return self._rawdata[key]
else:
raise KeyError('%s does not exist in the Uniprot record'%key)
def _parse(self):
data = self._rawdata
PDBIDs = []
# SELSTRs = []
for key, value in data.items():
if not key.startswith('dbReference'):
continue
try:
pdbid = value['PDB']
except (KeyError, TypeError) as e:
continue
pdbchains = value['chains']
# example chain strings: "A=27-139, B=140-150" or "A/B=27-150"
chains = []
ranges = []
pdbchains = comma_splitter(pdbchains)
for chain in pdbchains:
chids, resrange = chain.split('=')
chids = [chid.strip() for chid in chids.split('/')]
resrange = resrange.split('-')
for chid in chids:
chains.append(chid)
ranges.append(resrange)
for chid, rng in zip(chains, ranges):
pdbchid = pdbid + chid if chid != '@' else pdbid
PDBIDs.append(pdbchid)
# SELSTRs.append('resnum %s to %s'%tuple(rng))
self._pdbids = PDBIDs
# self._selstrs = SELSTRs
[docs] def parsePDBs(self, **kwargs):
"""Load PDB into memory as :class:`.AtomGroup` instances using :func:`.parsePDB` and
perform selection based on residue ranges given by CATH."""
pdbs = self.getPDBs()
# selstrs = self.getSelstrs()
header = kwargs.get('header', False)
model = kwargs.get('model', None)
LOGGER.timeit('_uniprot_parsePDB')
LOGGER.info('Parsing {0} PDB files...'.format(len(pdbs)))
ret = parsePDB(*pdbs, **kwargs)
if model != 0:
headers = None
if header:
prots, headers = ret
else:
prots = ret
if not isinstance(prots, list):
prots = [prots]
if header:
headers = [headers]
ret = (prots, headers)
else:
ret = prots
LOGGER.info('Extracting domains...')
# for i in range(len(prots)):
# sel = prots[i].select(selstrs[i])
# prots[i] = sel
LOGGER.report('Uniprot domains are parsed and extracted in %.2fs', '_uniprot_parsePDB')
return ret
[docs]def queryUniprot(id, expand=[], regex=True):
"""Query Uniprot with *id* and return a `dict` containing the raw results.
Regular users should use :func:`searchUniprot` instead.
:arg expand: entries through which you want to loop dictElements
until there aren't any elements left
:type expand: list
"""
if not isinstance(id, str):
raise TypeError('id should be a string')
try:
record_file = openURL('http://www.uniprot.org/uniprot/{0}.xml'.format(id))
except:
raise ValueError('No Uniprot record found with that id')
data = record_file.read()
record_file.close()
data = XML(data)
data = dictElement(data[0], '{http://uniprot.org/uniprot}', number_multiples=True)
for key in data:
value = data[key]
if not key.startswith('dbReference'):
continue
try:
if value.get('type') != 'PDB':
continue
except AttributeError:
continue
pdbid = value.get('id')
refdata = {'PDB': pdbid}
for prop in value:
prop_key = prop.get('type')
prop_val = prop.get('value')
refdata[prop_key] = prop_val
data[key] = refdata
if expand:
keys = []
if regex:
for lt in expand:
lt_re = re.compile(lt)
for key in data:
if lt_re.match(key):
keys.append(key)
else:
keys = expand
data = dictElementLoop(data, keys, '{http://uniprot.org/uniprot}')
return data
[docs]def searchUniprot(id):
"""Search Uniprot with *id* and return a :class:`UniprotRecord` containing the results.
"""
data = queryUniprot(id)
return UniprotRecord(data)