# -*- coding: utf-8 -*-
"""This module defines atom flags that are used in :ref:`selections`.
You can read this page in interactive sessions using ``help(flags)``.
Flag labels can be used in atom selections:
.. ipython:: python
from prody import *
p = parsePDB('1ubi')
p.select('protein')
Flag labels can be combined with dot operator as follows to make selections:
.. ipython:: python
p.protein
p.protein.acidic # selects acidic residues
Flag labels can be prefixed with ``'is'`` to check whether all atoms in
an :class:`.Atomic` instance are flagged the same way:
.. ipython:: python
p.protein.ishetero
p.water.ishetero
Flag labels can also be used to make quick atom counts:
.. ipython:: python
p.numAtoms()
p.numAtoms('protein')
p.numAtoms('water')"""
from re import compile as recompile
from time import time
from collections import defaultdict
from numpy import array, ones, zeros
from prody import SETTINGS, LOGGER
from prody.utilities import joinLinks, joinTerms, wrapText
__all__ = ['flagDefinition', 'listNonstdAAProps', 'getNonstdProperties',
'addNonstdAminoacid', 'delNonstdAminoacid']
TIMESTAMP_KEY = 'flags_timestamp'
DEFINITIONS_KEY = 'flags_definitions'
NONSTANDARD_KEY = 'flags_nonstandard'
ALIASES = {}
PLANTERS = {}
EDITORS = {}
FIELDS = defaultdict(set) # flags that fill be nulled when a field changes
FIELDSDEFAULT = ['name', 'resname', 'resnum']
TIMESTAMP = 0
PDBLIGSUM = ('.. _{0}: '
'http://www.pdb.org/pdb/ligand/ligandsummary.do?hetId={0}\n')
STANDARDAA = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS',
'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP',
'TYR', 'VAL']
NONSTANDARD = {
'ASX': set(['acyclic', 'surface', 'polar', 'medium']),
'GLX': set(['acyclic', 'surface', 'large', 'polar']),
'CSO': set(['acyclic', 'neutral', 'surface', 'medium', 'polar']),
'CYX': set(['acyclic', 'neutral', 'buried', 'medium', 'polar']),
'HIP': set(['cyclic', 'basic', 'surface', 'large', 'polar']),
'HID': set(['cyclic', 'basic', 'surface', 'large', 'polar']),
'HIE': set(['cyclic', 'basic', 'surface', 'large', 'polar']),
'HSD': set(['cyclic', 'basic', 'surface', 'large', 'polar']),
'HSE': set(['cyclic', 'basic', 'surface', 'large', 'polar']),
'HSP': set(['cyclic', 'acidic', 'surface', 'large', 'polar']),
'MSE': set(['acyclic', 'neutral', 'buried', 'large']),
'SEC': set(['acyclic', 'neutral', 'buried', 'polar', 'medium']),
'SEP': set(['acyclic', 'surface', 'acidic', 'large', 'polar']),
'TPO': set(['acyclic', 'surface', 'acidic', 'large', 'polar']),
'PTR': set(['cyclic', 'aromatic', 'surface', 'acidic', 'large', 'polar']),
'PHD': set(['acyclic', 'acidic', 'surface', 'polar', 'large']),
'XLE': set(['aliphatic', 'acyclic', 'buried', 'hydrophobic', 'large']),
'XAA': set(),
'MEN': set(),
'CSB': set()
}
DEFAULTS = {
'stdaa': set(['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY',
'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER',
'THR', 'TRP', 'TYR', 'VAL']),
'nonstdaa': set(NONSTANDARD),
# GLY is excluded from this list
'aliphatic': set(['ALA', 'ILE', 'LEU', 'VAL', 'PRO', 'GLY']),
'aromatic': set(['HIS', 'PHE', 'TRP', 'TYR']),
'acidic': set(['ASP', 'GLU']),
'basic': set(['LYS', 'ARG', 'HIS']),
'neutral': set(['ALA', 'ASN', 'CYS', 'GLN', 'GLY', 'ILE', 'LEU', 'MET',
'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL']),
'buried': set(['ALA', 'CYS', 'ILE', 'LEU', 'MET', 'PHE', 'TRP', 'VAL']),
'surface': set(['ARG', 'ASN', 'ASP', 'GLN', 'GLU', 'GLY', 'HIS', 'LYS',
'PRO', 'SER', 'THR', 'TYR']),
'cyclic': set(['HIS', 'PHE', 'PRO', 'TRP', 'TYR']),
'acyclic': set(['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY',
'ILE', 'LEU', 'LYS', 'MET', 'SER', 'THR', 'VAL']),
'hydrophobic': set(['ALA', 'ILE', 'LEU', 'MET', 'PHE', 'PRO', 'TRP',
'VAL']),
'polar': set(['ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS',
'LYS', 'SER', 'THR', 'TYR']),
'small': set(['ALA', 'GLY', 'SER']),
'medium': set(['ASN', 'ASP', 'CYS', 'PRO', 'THR', 'VAL']),
'large': set(['ARG', 'GLN', 'GLU', 'HIS', 'ILE', 'LEU', 'LYS', 'MET',
'PHE', 'TRP', 'TYR']),
'hydrogen': '[0-9]?H.*',
'carbon': 'C.*',
'nitrogen': 'N.*',
'oxygen': 'O.*',
'sulfur': 'S.*',
'nucleobase': set(['GUN', 'ADE', 'CYT', 'THY', 'URA']),
'nucleotide': set(['DA', 'DC', 'DG', 'DT', 'DU', 'A', 'C', 'G', 'T', 'U']),
'nucleoside': set(['AMP', 'ADP', 'ATP', 'CDP', 'CTP', 'GMP', 'GDP', 'GTP',
'TMP', 'TTP', 'UMP', 'UDP', 'UTP']),
'at': set(['ADE', 'A', 'THY', 'T']),
'cg': set(['CYT', 'C', 'GUN', 'G']),
'purine': set(['ADE', 'A', 'GUN', 'G']),
'pyrimidine': set(['CYT', 'C', 'THY', 'T', 'URA', 'U']),
'water': set(['HOH', 'DOD', 'WAT', 'TIP3', 'H2O', 'OH2', 'TIP', 'TIP2',
'TIP4']),
'ion': set(['AL', 'BA', 'CA', 'CD', 'CL', 'CO', 'CS', 'CU', 'CU1', 'CUA',
'HG', 'IN', 'IOD', 'K', 'MG', 'MN3', 'NA', 'PB', 'PT', 'RB',
'TB', 'TL', 'WO4', 'YB', 'ZN']),
'ion_other': set(['CAL', 'CES', 'CLA', 'POT', 'SOD', 'ZN2']),
'lipid': set(['GPE', 'LPP', 'OLA', 'SDS', 'STE']),
'lipid_other': set(['DLPE', 'DMPC', 'LPPC', 'OLEO', 'PALM', 'PCGL', 'POPC',
'POPE', 'STEA']),
'sugar': set(['GLC', 'GLO', 'BGC']),
'sugar_other': set(['AGLC']),
'heme': set(['1FH', '2FH', 'DDH', 'DHE', 'HAS', 'HDD', 'HDE', 'HDM',
'HEA', 'HEB', 'HEC', 'HEM', 'HEO', 'HES', 'HEV', 'NTE',
'SRM', 'VER']),
'heme_other': set(['HEMO', 'HEMR'])
}
DEFAULTS['backbone'] = DEFAULTS['bb'] = set(['CA', 'C', 'O', 'N'])
DEFAULTS['backbonefull'] = DEFAULTS['bbfull'] = set(['CA', 'C', 'O', 'N', 'H',
'H1', 'H2', 'H3', 'OXT'])
CATEGORIES = {
'aromaticity': set(['aromatic', 'aliphatic']),
'cyclic': set(['acyclic', 'cyclic']),
'charge': set(['acidic', 'basic', 'neutral']),
'depth': set(['buried', 'surface']),
'hydrophobicity': set(['hydrophobic', 'polar']),
'size': set(['small', 'medium', 'large']),
}
CATEGORIZED = {}
for catgroup in CATEGORIES.values():
for category in catgroup:
CATEGORIZED[category] = list(DEFAULTS[category])
INDEPENDENT = list(CATEGORIZED)
CATEGORIZED['charged'] = list(CATEGORIZED['acidic'])
CATEGORIZED['charged'].extend(CATEGORIZED['basic'])
for resi, cats in NONSTANDARD.items():
for cat in cats:
CATEGORIZED[cat].append(resi)
def addPlanter(func, *labels, **kwargs):
aliases = kwargs.get('aliases', True)
editor = kwargs.get('editor', False)
for label in labels:
PLANTERS[label] = func
if aliases:
ALIASES[label] = labels
else:
ALIASES[label] = [label]
if editor:
EDITORS[label] = editor
for field in kwargs.get('fields', FIELDSDEFAULT):
FIELDS[field].update(labels)
def updateNonstandard(nonstd):
SETTINGS[NONSTANDARD_KEY] = nonstd
SETTINGS[TIMESTAMP_KEY] = int(time())
SETTINGS.save()
updateDefinitions()
def changeDefinitions(**kwargs):
defs = SETTINGS.get(DEFINITIONS_KEY, {})
defs.update(kwargs)
SETTINGS[DEFINITIONS_KEY] = defs
SETTINGS[TIMESTAMP_KEY] = int(time())
SETTINGS.save()
updateDefinitions()
def resetDefinitions(flag):
if flag == 'all':
SETTINGS.pop(DEFINITIONS_KEY, None)
SETTINGS.pop(NONSTANDARD_KEY, None)
SETTINGS[TIMESTAMP_KEY] = int(time())
SETTINGS.save()
updateDefinitions()
elif flag == 'nonstdaa':
SETTINGS.pop(NONSTANDARD_KEY, None)
SETTINGS[TIMESTAMP_KEY] = int(time())
SETTINGS.save()
updateDefinitions()
else:
try:
SETTINGS.pop(DEFINITIONS_KEY, {}).pop(flag)
except KeyError:
pass
else:
SETTINGS[TIMESTAMP_KEY] = int(time())
SETTINGS.save()
updateDefinitions()
def changeResnames(flag, resnames):
"""Change the list of residue names associated with a flag. *flag* must
be a string, and *resnames* may be a list, tuple, or set of strings. The
existing list of residue names will be overwritten with the given residue
names."""
if flag not in EDITORS:
raise ValueError('{0} is not an editable flag'.format(repr(flag)))
resnames = [str(rn) for rn in resnames]
changeDefinitions(**{flag: resnames})
def changeNameRegex(flag, regex):
"""Set regular expression used for flagging elements based on atom names.
See :ref:`element-flags` for the default list of flags."""
if flag not in EDITORS:
raise ValueError('{0} is not an editable flag'.format(repr(flag)))
try:
recompile(regex)
except Exception as err:
raise ValueError('{0} is not a valid regular expression, {1}'
.format(repr(regex), str(err)))
else:
changeDefinitions(**{flag: regex})
def changeBackbone(flag, names):
"""Set protein :term:`backbone` or :term:`backbonefull` atom names.
*names* must be a list of atom names."""
if flag not in EDITORS:
raise ValueError('{0} is not an editable flag'.format(repr(flag)))
names = [str(nm) for nm in names]
if flag.endswith('full'):
changeDefinitions(bbfull=names, backbonefull=names)
else:
changeDefinitions(bb=names, backbone=names)
__doc__ += """
Protein
-------------------------------------------------------------------------------
.. glossary::
protein
aminoacid
indicates the twenty standard amino acids (:term:`stdaa`) and some
non-standard amino acids (:term:`nonstdaa`) described below. Residue
must also have an atom named ``'CA'`` in addition to having a qualifying
residue name.
stdaa
{stdaa}
nonstdaa
indicates one of the following residues:
========== ===================================================
`ASX`_ (B) asparagine or aspartic acid
`GLX`_ (Z) glutamine or glutamic acid
`CSO`_ (C) S-hydroxycysteine
`HIP`_ (H) ND1-phosphohistidine
HSD (H) prototropic tautomer of histidine, H on ND1 (CHARMM)
HSE (H) prototropic tautomer of histidine, H on NE2 (CHARMM)
HSP (H) protonated histidine
`MSE`_ selenomethionine
`SEC`_ (U) selenocysteine
`SEP`_ (S) phosphoserine
`TPO`_ (T) phosphothreonine
`PTR`_ (Y) O-phosphotyrosine
XLE (J) leucine or isoleucine
XAA (X) unspecified or unknown
========== ===================================================
You can modify the list of non-standard amino acids using
:func:`addNonstdAminoacid`, :func:`delNonstdAminoacid`, and
:func:`listNonstdAAProps`.
calpha
ca
Cα atoms of :term:`protein` residues, same as selection
``'name CA and protein'``
backbone
bb
non-hydrogen backbone atoms of :term:`protein` residues, same as
selection ``'name CA C O N and protein'``
backbonefull
bbfull
backbone atoms of :term:`protein` residues, same as selection
``'name CA C O N H H1 H2 H3 OXT and protein'``
sidechain
sc
side-chain atoms of :term:`protein` residues, same as selection
``'protein and not backbonefull'``
""".format(
stdaa = wrapText('indicates the standard amino acid residues: ' +
joinLinks(DEFAULTS['stdaa'], last=', and ', sort=True),
subsequent_indent=' '*6))
cats = list(CATEGORIZED)
cats.sort()
for cat in cats:
res = CATEGORIZED[cat]
res.sort()
__doc__ += """
{cat}
{res}
""".format(cat=cat, res=wrapText('residues ' + ', '.join(res),
subsequent_indent=' '*6))
for resi in DEFAULTS['stdaa']:
__doc__ += PDBLIGSUM.format(resi)
for resi in DEFAULTS['nonstdaa']:
__doc__ += PDBLIGSUM.format(resi)
__doc__ += """
Nucleic
-------------------------------------------------------------------------------
.. glossary::
nucleic
indicates :term:`nucleobase`, :term:`nucleotide`, and some
:term:`nucleoside` derivatives that are described below, so it is same
as ``'nucleobase or nucleotide or nucleoside'``.
nucleobase
indicates `ADE`_ (adenine), `GUN`_ (guanine), `CYT`_
(cytosine), `THY`_ (thymine), and `URA`_ (uracil).
nucleotide
indicates residues with the following names:
======= ==================================
`DA`_ 2'-deoxyadenosine-5'-monophosphate
`DC`_ 2'-deoxycytidine-5'-monophosphate
`DG`_ 2'-deoxyguanosine-5'-monophosphate
`DT`_ 2'-deoxythymidine-5'-monophosphate
`DU`_ 2'-deoxyuridine-5'-monophosphate
`A`_ adenosine-5'-monophosphate
`C`_ cytidine-5'-monophosphate
`G`_ guanosine-5'-monophosphate
`T`_ 2'-deoxythymidine-5'-monophosphate
`U`_ uridine-5'-monophosphate
======= ==================================
nucleoside
indicates following nucleoside derivatives that are recognized by *PDB*:
======= ================================
`AMP`_ adenosine monophosphate
`ADP`_ adenosine-5'-diphosphate
`ATP`_ adenosine-5'-triphosphate
`CDP`_ cytidine-5'-diphosphate
`CTP`_ cytidine-5'-triphosphate
`GMP`_ guanosine
`GDP`_ guanosine-5'-diphosphate
`GTP`_ guanosine-5'-triphosphate
`TMP`_ thymidine-5'-phosphate
`TTP`_ thymidine-5'-triphosphate
`UMP`_ 2'-deoxyuridine 5'-monophosphate
`UDP`_ uridine 5'-diphosphate
`UTP`_ uridine 5'-triphosphate
======= ================================
at
same as selection ``'resname ADE A THY T'``
cg
same as selection ``'resname CYT C GUN G'``
purine
same as selection ``'resname ADE A GUN G'``
pyrimidine
same as selection ``'resname CYT C THY T URA U'``
"""
for resi in DEFAULTS['nucleobase']:
__doc__ += PDBLIGSUM.format(resi)
for resi in DEFAULTS['nucleotide']:
__doc__ += PDBLIGSUM.format(resi)
for resi in DEFAULTS['nucleoside']:
__doc__ += PDBLIGSUM.format(resi)
__doc__ += """
Heteros
-------------------------------------------------------------------------------
.. glossary::
hetero
indicates anything other than a :term:`protein` or a
:term:`nucleic` residue, i.e. ``'not (protein or nucleic)'``.
hetatm
is available when atomic data is parsed from a PDB or similar
format file and indicates atoms that are marked ``'HETATM'`` in the file.
water
indices `HOH`_ and `DOD`_ recognized by *PDB* and also WAT, TIP3, H2O,
OH2, TIP, TIP2, and TIP4 recognized by molecular dynamics (MD) force
fields.
.. _HOH: http://www.pdb.org/pdb/ligand/ligandsummary.do?hetId=HOH
.. _DOD: http://www.pdb.org/pdb/ligand/ligandsummary.do?hetId=DOD
Previously used water types HH0, OHH, and SOL conflict with other
compounds in the *PDB*, so are removed from the definition of this flag.
ion
indicates the following ions most of which are recognized by the *PDB*
and others by MD force fields.
======= ================== ===== ======== ==========
\ *PDB* *Source* *Conflict*
`AL`_ aluminum Yes
`BA`_ barium Yes
`CA`_ calcium Yes
`CD`_ cadmium Yes
`CL`_ chloride Yes
`CO`_ cobalt (ii) Yes
`CS`_ cesium Yes
`CU`_ copper (ii) Yes
`CU1`_ copper (i) Yes
`CUA`_ dinuclear copper Yes
`HG`_ mercury (ii) Yes
`IN`_ indium (iii) Yes
`IOD`_ iodide Yes
`K`_ potassium Yes
`MG`_ magnesium Yes
`MN3`_ manganese (iii) Yes
`NA`_ sodium Yes
`PB`_ lead (ii) Yes
`PT`_ platinum (ii) Yes
`RB`_ rubidium Yes
`TB`_ terbium (iii) Yes
`TL`_ thallium (i) Yes
`WO4`_ thungstate (vi) Yes
`YB`_ ytterbium (iii) Yes
`ZN`_ zinc Yes
CAL calcium No CHARMM Yes
CES cesium No CHARMM Yes
CLA chloride No CHARMM Yes
POT potassium No CHARMM Yes
SOD sodium No CHARMM Yes
ZN2 zinc No CHARMM No
======= ================== ===== ======== ==========
Ion identifiers that are obsoleted by *PDB* (MO3, MO4, MO5, MO6, NAW,
OC7, and ZN1) are removed from this definition.
lipid
{lipid}
sugar
{sugar}
heme
{heme}
pdbter
is available when atomic data is parsed from a PDB format file and
indicates atoms that were followed by ``'TER'`` record.
""".format(
lipid = wrapText('indicates ' +
joinLinks(DEFAULTS['lipid'], last=', and ', sort=True) +
' from *PDB*, and also ' + ', '.join(DEFAULTS['lipid_other']) +
' from *CHARMM* force field.', subsequent_indent=' '*6),
sugar = wrapText('indicates ' +
joinLinks(DEFAULTS['sugar'], last=', and ', sort=True) +
' from *PDB*, and also AGLC from *CHARMM*.',
subsequent_indent=' '*6),
heme = wrapText('indicates ' +
joinLinks(DEFAULTS['heme'], last=', and ', sort=True) +
' from *PDB*, and also HEMO and HEMR from CHARMM.',
subsequent_indent=' '*6)
)
for resi in DEFAULTS['ion']:
__doc__ += PDBLIGSUM.format(resi)
for resi in DEFAULTS['lipid']:
__doc__ += PDBLIGSUM.format(resi)
for resi in DEFAULTS['sugar']:
__doc__ += PDBLIGSUM.format(resi)
for resi in DEFAULTS['heme']:
__doc__ += PDBLIGSUM.format(resi)
__doc__ += """
Elements
-------------------------------------------------------------------------------
Following elements found in proteins are recognized by applying regular
expressions to atom names:
.. glossary::
carbon
carbon atoms, same as ``'name "C.*" and not ion'``
nitrogen
nitrogen atoms, same as ``'name "N.*" and not ion'``
oxygen
oxygen atoms, same as ``'name "O.*" and not ion'``
sulfur
sulfur atoms, same as ``'name "S.*" and not ion'``
hydrogen
hydrogen atoms, same as ``'name "[1-9]?H.*" and not ion'``
noh
heavy
non hydrogen atoms, same as ``'not hydrogen``
``'not ion'`` is appended to above definitions to avoid conflicts with
:term:`ion` atoms.
Structure
-------------------------------------------------------------------------------
Following secondary structure flags are defined but before they can be used,
secondary structure assignments must be made.
.. glossary::
extended
extended conformation, same as ``'secondary E'``
helix
α-helix conformation, same as ``'secondary H'``
helix310
3_10-helix conformation, same as ``'secondary G'``
helixpi
π-helix conformation, same as ``'secondary I'``
turn
hydrogen bonded turn conformation, same as ``'secondary T'``
bridge
isolated beta-bridge conformation, same as ``'secondary B'``
bend
bend conformation, same as ``'secondary S'``
coil
not in one of above conformations, same as ``'secondary C'``
Others
-------------------------------------------------------------------------------
.. glossary::
all
indicates all atoms, returns a new view of the instance
none
indicates no atoms, returns **None**
dummy
indicates dummy atoms in an :class:`.AtomMap`
mapped
indicates mapped atoms in an :class:`.AtomMap`
Functions
===============================================================================
The following functions can be used to customize flag definitions:
* :func:`.flagDefinition`
* :func:`.addNonstdAminoacid`
* :func:`.delNonstdAminoacid`
* :func:`.listNonstdAAProps`
"""
# join split lists
for key in ['ion', 'lipid', 'sugar', 'heme']:
DEFAULTS[key].update(DEFAULTS.pop(key + '_other'))
DEFINITIONS = None
AMINOACIDS = None
BACKBONE = None
def updateDefinitions():
"""Update definitions and set some global variables. This function must be
called at the end of the module."""
global DEFINITIONS, AMINOACIDS, BACKBONE, TIMESTAMP
DEFINITIONS = {}
user = SETTINGS.get('flag_definitions', {})
# nucleics
nucleic = set()
for key in ['nucleobase', 'nucleoside', 'nucleotide']:
aset = set(user.get(key, DEFAULTS[key]))
nucleic.update(aset)
DEFINITIONS[key] = aset
DEFINITIONS['nucleic'] = nucleic
# heteros
for key in ['water', 'lipid', 'ion', 'sugar', 'heme',
'at', 'cg', 'purine', 'pyrimidine']:
DEFINITIONS[key] = set(user.get(key, DEFAULTS[key]))
DEFINITIONS['backbone'] = DEFINITIONS['bb'] = set(user.get(key,
DEFAULTS['bb']))
DEFINITIONS['backbonefull'] = DEFINITIONS['bbfull'] = set(user.get(key,
DEFAULTS['bbfull']))
# element regex
for key in ['hydrogen', 'carbon', 'nitrogen', 'oxygen', 'sulfur']:
DEFINITIONS[key] = recompile(user.get(key, DEFAULTS[key]))
try:
nonstd = SETTINGS[NONSTANDARD_KEY]
except KeyError:
nonstd = NONSTANDARD
DEFINITIONS.update(CATEGORIZED)
else:
for cat in CATEGORIES:
for key in CATEGORIES[cat]:
DEFINITIONS[key] = set(DEFAULTS[key])
DEFINITIONS['charged'] = set(DEFINITIONS['acidic'])
DEFINITIONS['charged'].update(DEFINITIONS['basic'])
for resi, props in nonstd.items():
for prop in props:
DEFINITIONS[prop].add(resi)
DEFINITIONS['stdaa'] = DEFAULTS['stdaa']
DEFINITIONS['nonstdaa'] = set(nonstd)
AMINOACIDS = set(DEFINITIONS['stdaa'])
AMINOACIDS.update(DEFINITIONS['nonstdaa'])
DEFINITIONS['protein'] = DEFINITIONS['aminoacid'] = AMINOACIDS
BACKBONE = DEFINITIONS['bb']
global TIMESTAMP
TIMESTAMP = SETTINGS.get(TIMESTAMP_KEY, 0)
#==============================================================================
# PLANTERS
#==============================================================================
# flag planters are functions that take an AtomGroup instance
# and set flags for a given label using the atomic data stored in the AtomGroup
# a planter can also set the indices of subset atoms for calculated flag
# planters return the flags after calculation is over
# protein
#==============================================================================
def setCalpha(ag, label):
flags = ag._getNames() == 'CA'
indices = flags.nonzero()[0]
if len(indices):
torf = array([rn in AMINOACIDS for rn in ag._getResnames()[indices]])
flags[indices] = torf
ag._setFlags(label, flags)
ag._setSubset(label, indices[torf])
else:
ag._setFlags(label, flags)
ag._setSubset(label, array([]))
return flags
addPlanter(setCalpha, 'ca', 'calpha')
def setProtein(ag, label):
calpha = ag._getSubset('calpha')
if len(calpha):
torf = zeros(ag.numResidues(), bool)
resindices = ag._getResindices()
torf[resindices[calpha]] = True
flags = torf[resindices]
else:
flags = zeros(ag.numAtoms(), bool)
ag._setFlags('protein', flags)
return flags
addPlanter(setProtein, 'protein', 'aminoacid')
# subsets
#==============================================================================
def setBackbone(ag, label):
protein = ag._getSubset('protein')
if len(protein):
bb = DEFINITIONS[label]
names = ag._getNames()[protein]
flags = zeros(ag.numAtoms(), bool)
flags[protein] = [nm in bb for nm in names]
else:
flags = zeros(ag.numAtoms(), bool)
ag._setFlags(label, flags)
return flags
addPlanter(setBackbone, 'bb', 'backbone', editor=changeBackbone)
addPlanter(setBackbone, 'bbfull', 'backbonefull', editor=changeBackbone)
def setSidechain(ag, label):
protein = ag._getSubset('protein')
if len(protein):
flags = zeros(ag.numAtoms(), bool)
flags[protein] = True
flags[ag._getSubset('bbfull')] = False
else:
flags = zeros(ag.numAtoms(), bool)
ag._setFlags(label, flags)
return flags
addPlanter(setSidechain, 'sc', 'sidechain')
def setCategories(ag, label):
calpha = ag._getSubset('ca')
if len(calpha):
resnames = DEFINITIONS[label]
residx = ag._getResindices()
torf = zeros(ag.numResidues(), bool)
torf[residx[calpha]] = [rn in resnames
for rn in ag._getResnames()[calpha]]
flags = torf[residx]
else:
flags = zeros(ag.numAtoms(), bool)
ag._setFlags(label, flags)
return flags
addPlanter(setCategories, 'stdaa', 'nonstdaa', *list(CATEGORIZED.keys()),
aliases=False)
def setAll(ag, label):
flags = ones(ag.numAtoms(), bool)
ag._setFlags('all', flags)
return flags
addPlanter(setAll, 'all')
def setNone(ag, label):
flags = zeros(ag.numAtoms(), bool)
ag._setFlags('none', flags)
return flags
addPlanter(setNone, 'none')
# hetero, nucleic, water, etc.
#==============================================================================
def setResiflag(ag, label):
resnames = DEFINITIONS[label]
flags = array([rn in resnames for rn in ag._getResnames()], bool)
ag._setFlags(label, flags)
return flags
addPlanter(setResiflag, 'nucleobase', 'nucleoside', 'nucleotide',
'water', 'ion', 'lipid', 'sugar', 'heme', 'at', 'cg', 'purine',
'pyrimidine', aliases=False, editor=changeResnames)
addPlanter(setResiflag, 'nucleic')
def setHetero(ag, label):
flags = ones(ag.numAtoms(), bool)
flags[ag._getSubset('protein')] = False
flags[ag._getSubset('nucleic')] = False
ag._setFlags('hetero', flags)
return flags
addPlanter(setHetero, 'hetero',)
# element
#==============================================================================
def setElement(ag, label):
match = DEFINITIONS[label].match
flags = array([match(nm) is not None for nm in ag._getNames()])
flags[ag._getSubset('ion')] = False
ag._setFlags(label, flags)
return flags
addPlanter(setElement, 'hydrogen', 'carbon', 'nitrogen', 'oxygen', 'sulfur',
aliases=False, editor=changeNameRegex)
def setNoh(ag, label):
flags = ones(ag.numAtoms(), bool)
flags[ag._getSubset('hydrogen')] = False
ag._setFlags(label, flags)
return flags
addPlanter(setNoh, 'noh', 'heavy')
# secondary
#==============================================================================
SECONDARY = {'extended': 'E', 'helix': 'H', 'helix310': 'G', 'helixpi': 'I',
'turn': 'T', 'bridge': 'B', 'bend': 'S', 'coil': 'C'}
def setSecondary(ag, label):
secstrs = ag._getSecstrs()
if secstrs is None:
raise ValueError('no secondary structure assignments')
flags = secstrs == SECONDARY[label]
ag._setFlags(label, flags)
return flags
addPlanter(setSecondary, *list(SECONDARY.keys()), aliases=False,
fields=['secondary'])
# secondary
#==============================================================================
def setDummy(ag, label):
if label == 'dummy':
flags = zeros(len(ag), bool)
else:
flags = ones(len(ag), bool)
ag._setFlags(label, flags)
return flags
addPlanter(setDummy, 'dummy', 'mapped', aliases=False, fields=[])
#==============================================================================
# User functions
#==============================================================================
[docs]def flagDefinition(*arg, **kwarg):
"""Learn, change, or reset :ref:`flags` definitions.
**Learn a definition**
Calling this function with no arguments will return list of flag names
whose definitions you can learn:
.. ipython:: python
flagDefinition()
Passing a flag name will return its definition:
.. ipython:: python
flagDefinition('backbone')
flagDefinition('hydrogen')
**Change a definition**
Calling the function with ``editable=True`` argument will return flag
names those definitions that can be edited:
.. ipython:: python
flagDefinition(editable=True)
Pass an editable flag name with its new definition:
.. ipython:: python
flagDefinition(nitrogen='N.*')
flagDefinition(backbone=['CA', 'C', 'O', 'N'])
flagDefinition(nucleobase=['ADE', 'CYT', 'GUN', 'THY', 'URA'])
Note that the type of the new definition must be the same as the type
of the old definition. Flags with editable definitions are:
{editable}
**Reset definitions**
Pass *reset* keyword as follows to restore all default definitions of
editable flags and also non-standard amino acids.
.. ipython:: python
flagDefinition(reset='all')
Or, pass a specific editable flag label to restore its definition:
.. ipython:: python
flagDefinition(reset='nitrogen')"""
if arg and kwarg:
raise ValueError('only a single argument or a single keyword '
'argument is accepted at a time')
elif arg:
if len(arg) > 1:
raise ValueError('specify only one flag label')
arg = arg[0]
try:
definition = DEFINITIONS[arg]
except KeyError:
raise ValueError('{0} is not a valid flag label'
.format(repr(arg)))
else:
try:
return definition.pattern
except AttributeError:
definition = list(definition)
definition.sort()
return definition
elif kwarg:
if len(kwarg) > 1:
raise ValueError('specify only one keyword argument')
key, val = list(kwarg.items())[0]
if key == 'editable' and val:
alist = list(EDITORS)
alist.sort()
return alist
if key == 'reset' and val:
resetDefinitions(val)
return
try:
editor = EDITORS[key]
except KeyError:
raise ValueError('{0} is not an editable flag or a valid '
'keyword'.format(repr(key)))
else:
type_ = type(flagDefinition(key))
if type(val) != type_:
raise TypeError('expected {0} as type of new definition, '
'found {1}'.format(repr(type_.__name__),
repr(type(val).__name__)))
editor(key, val)
else:
alist = list(DEFINITIONS)
alist.sort()
return alist
flagDefinition.__doc__ = flagDefinition.__doc__.format(
editable=wrapText(joinTerms(flagDefinition(editable=True), last=', and '),
subsequent_indent=' '*4))
[docs]def addNonstdAminoacid(resname, *properties):
"""Add non-standard amino acid *resname* with *properties* selected from:
* {props}
.. ipython:: python
addNonstdAminoacid('PTR', 'acidic', 'aromatic', 'cyclic', 'large',
'polar', 'surface')
Default set of non-standard amino acids can be restored as follows:
.. ipython:: python
flagDefinition(reset='nonstdaa')"""
resname = str(resname)
if len(resname) > 4:
LOGGER.warn('Residue name {0} is unusually long.'
.format(repr(resname)))
propset = set(properties)
for cat, val in CATEGORIES.items():
intersection = val.intersection(propset)
if intersection:
if len(intersection) > 1:
raise ValueError('amino acid properties {0} cannot be '
'present together'
.format(', '.join([repr(prp)
for prp in intersection])))
for prop in intersection:
propset.remove(prop)
if propset:
raise ValueError('amino acid property {0} is not valid'
.format(repr(propset.pop())))
nonstd = SETTINGS.get(NONSTANDARD_KEY, NONSTANDARD)
nonstd[resname] = set(properties)
updateNonstandard(nonstd)
INDEPENDENT.sort()
addNonstdAminoacid.__doc__ = addNonstdAminoacid.__doc__.format(
props='\n * '.join([
'*{0}*: {1}'.format(cat, joinTerms(terms, last=', or ', sort=True))
for cat, terms in CATEGORIES.items()
])
)
[docs]def delNonstdAminoacid(resname):
"""Delete non-standard amino acid *resname*.
.. ipython:: python
delNonstdAminoacid('PTR')
flagDefinition('nonstdaa')
Default set of non-standard amino acids can be restored as follows:
.. ipython:: python
flagDefinition(reset='nonstdaa')"""
nonstd = SETTINGS.get(NONSTANDARD_KEY, NONSTANDARD)
try:
nonstd.pop(resname)
except KeyError:
raise ValueError('{0} is not a non-standard residue name'
.format(repr(resname)))
else:
updateNonstandard(nonstd)
[docs]def getNonstdProperties(resname):
"""Deprecated for removal in v1.4, use :func:`listNonstdAAProps` instead.
"""
from prody import deprecate
deprecate('getNonstdProperties', 'listNonstdAAProps')
return listNonstdAAProps(resname)
[docs]def listNonstdAAProps(resname):
"""Returns properties of non-standard amino acid *resname*.
.. ipython:: python
listNonstdAAProps('PTR')"""
try:
alist = list(SETTINGS.get(NONSTANDARD_KEY, NONSTANDARD)[resname])
except KeyError:
raise ValueError('{0} is not a non-standard residue name'
.format(repr(resname)))
else:
alist.sort()
return alist
updateDefinitions()