# -*- coding: utf-8 -*-
"""This module defines functions for executing DSSP program and parsing
its output."""
import os.path
import numpy as np
from prody.atomic import ATOMIC_FIELDS
from prody.atomic import AtomGroup
from prody.utilities import gunzip, which, PLATFORM
from .pdbfile import parsePDB
from .localpdb import fetchPDB
__all__ = ['execDSSP', 'parseDSSP', 'performDSSP']
[docs]def execDSSP(pdb, outputname=None, outputdir=None, stderr=True):
"""Execute DSSP for given *pdb*. *pdb* can be a PDB identifier or a PDB
file path. If *pdb* is a compressed file, it will be decompressed using
Python :mod:`gzip` library. When no *outputname* is given, output name
will be :file:`pdb.dssp`. :file:`.dssp` extension will be appended
automatically to *outputname*. If :file:`outputdir` is given, DSSP
output and uncompressed PDB file will be written into this folder.
Upon successful execution of :command:`dssp pdb > out` command, output
filename is returned. On Linux platforms, when *stderr* is false,
standard error messages are suppressed, i.e.
``dssp pdb > outputname 2> /dev/null``.
For more information on DSSP see http://swift.cmbi.ru.nl/gv/dssp/.
If you benefited from DSSP, please consider citing [WK83]_.
.. [WK83] Kabsch W, Sander C. Dictionary of protein secondary structure:
pattern recognition of hydrogen-bonded and geometrical features.
*Biopolymers* **1983** 22:2577-2637."""
dssp = which('mkdssp')
if dssp is None:
dssp = which('dssp')
if dssp is None:
raise EnvironmentError('command not found: dssp executable is not '
'found in one of system paths')
assert outputname is None or isinstance(outputname, str),\
'outputname must be a string'
assert outputdir is None or isinstance(outputdir, str),\
'outputdir must be a string'
if not os.path.isfile(pdb):
pdb = fetchPDB(pdb, compressed=False)
if pdb is None:
raise ValueError('pdb is not a valid PDB identifier or filename')
if os.path.splitext(pdb)[1] == '.gz':
if outputdir is None:
pdb = gunzip(pdb, os.path.splitext(pdb)[0])
else:
pdb = gunzip(pdb, os.path.join(outputdir,
os.path.split(os.path.splitext(pdb)[0])[1]))
if outputdir is None:
outputdir = '.'
if outputname is None:
out = os.path.join(outputdir,
os.path.splitext(os.path.split(pdb)[1])[0] +
'.dssp')
else:
out = os.path.join(outputdir, outputname + '.dssp')
if not stderr and PLATFORM != 'Windows':
status = os.system('{0} {1} > {2} 2> /dev/null'.format(
dssp, pdb, out))
else:
status = os.system('{0} {1} > {2}'.format(dssp, pdb, out))
if status == 0:
return out
[docs]def parseDSSP(dssp, ag, parseall=False):
"""Parse DSSP data from file *dssp* into :class:`.AtomGroup` instance
*ag*. DSSP output file must be in the new format used from July 1995
and onwards. When *dssp* file is parsed, following attributes are added
to *ag*:
* *dssp_resnum*: DSSP's sequential residue number, starting at the first
residue actually in the data set and including chain breaks; this number
is used to refer to residues throughout.
* *dssp_acc*: number of water molecules in contact with this residue \*10.
or residue water exposed surface in Angstrom^2.
* *dssp_kappa*: virtual bond angle (bend angle) defined by the three Cα
atoms of residues I-2,I,I+2. Used to define bend (structure code 'S').
* *dssp_alpha*: virtual torsion angle (dihedral angle) defined by the four
Cα atoms of residues I-1,I,I+1,I+2.Used to define chirality (structure
code '+' or '-').
* *dssp_phi* and *dssp_psi*: IUPAC peptide backbone torsion angles
The following attributes are parsed when ``parseall=True`` is passed:
* *dssp_bp1*, *dssp_bp2*, and *dssp_sheet_label*: residue number of first
and second bridge partner followed by one letter sheet label
* *dssp_tco*: cosine of angle between C=O of residue I and C=O of residue
I-1. For α-helices, TCO is near +1, for β-sheets TCO is near -1. Not
used for structure definition.
* *dssp_NH_O_1_index*, *dssp_NH_O_1_energy*, etc.: hydrogen bonds; e.g.
-3,-1.4 means: if this residue is residue i then N-H of I is h-bonded to
C=O of I-3 with an electrostatic H-bond energy of -1.4 kcal/mol. There
are two columns for each type of H-bond, to allow for bifurcated H-bonds.
See http://swift.cmbi.ru.nl/gv/dssp/DSSP_3.html for details."""
if not os.path.isfile(dssp):
raise IOError('{0} is not a valid file path'.format(dssp))
if not isinstance(ag, AtomGroup):
raise TypeError('ag argument must be an AtomGroup instance')
dssp = open(dssp)
n_atoms = ag.numAtoms()
NUMBER = np.zeros(n_atoms, int)
SHEETLABEL = np.zeros(n_atoms, np.array(['a']).dtype.char + '1')
ACC = np.zeros(n_atoms, float)
KAPPA = np.zeros(n_atoms, float)
ALPHA = np.zeros(n_atoms, float)
PHI = np.zeros(n_atoms, float)
PSI = np.zeros(n_atoms, float)
if parseall:
BP1 = np.zeros(n_atoms, int)
BP2 = np.zeros(n_atoms, int)
NH_O_1 = np.zeros(n_atoms, int)
NH_O_1_nrg = np.zeros(n_atoms, float)
O_HN_1 = np.zeros(n_atoms, int)
O_HN_1_nrg = np.zeros(n_atoms, float)
NH_O_2 = np.zeros(n_atoms, int)
NH_O_2_nrg = np.zeros(n_atoms, float)
O_HN_2 = np.zeros(n_atoms, int)
O_HN_2_nrg = np.zeros(n_atoms, float)
TCO = np.zeros(n_atoms, float)
ag.setSecstrs(np.zeros(n_atoms, dtype=ATOMIC_FIELDS['secondary'].dtype))
for line in dssp:
if line.startswith(' # RESIDUE'):
break
for line in dssp:
if line[13] == '!':
continue
res = ag[(line[11], int(line[5:10]), line[10].strip())]
if res is None:
continue
indices = res.getIndices()
res.setSecstrs(line[16].strip())
NUMBER[indices] = int(line[:5])
SHEETLABEL[indices] = line[33].strip()
ACC[indices] = int(line[35:38])
KAPPA[indices] = float(line[91:97])
ALPHA[indices] = float(line[97:103])
PHI[indices] = float(line[103:109])
PSI[indices] = float(line[109:115])
if parseall:
BP1[indices] = int(line[25:29])
BP2[indices] = int(line[29:33])
NH_O_1[indices] = int(line[38:45])
NH_O_1_nrg[indices] = float(line[46:50])
O_HN_1[indices] = int(line[50:56])
O_HN_1_nrg[indices] = float(line[57:61])
NH_O_2[indices] = int(line[61:67])
NH_O_2_nrg[indices] = float(line[68:72])
O_HN_2[indices] = int(line[72:78])
O_HN_2_nrg[indices] = float(line[79:83])
TCO[indices] = float(line[85:91])
ag.setData('dssp_resnum', NUMBER)
ag.setData('dssp_sheet_label', SHEETLABEL)
ag.setData('dssp_acc', ACC)
ag.setData('dssp_kappa', KAPPA)
ag.setData('dssp_alpha', ALPHA)
ag.setData('dssp_phi', PHI)
ag.setData('dssp_psi', PSI)
if parseall:
ag.setData('dssp_bp1', BP1)
ag.setData('dssp_bp2', BP2)
ag.setData('dssp_NH_O_1_index', NH_O_1)
ag.setData('dssp_NH_O_1_energy', NH_O_1_nrg)
ag.setData('dssp_O_NH_1_index', O_HN_1)
ag.setData('dssp_O_NH_1_energy', O_HN_1_nrg)
ag.setData('dssp_NH_O_2_index', NH_O_2)
ag.setData('dssp_NH_O_2_energy', NH_O_2_nrg)
ag.setData('dssp_O_NH_2_index', O_HN_2)
ag.setData('dssp_O_NH_2_energy', O_HN_2_nrg)
ag.setData('dssp_tco', TCO)
return ag