Release Notes
v2.0 series come with new and improved sequence, structure, and dynamics analysis features. See release notes for details.
How to Cite
Bakan A, Meireles LM, Bahar I ProDy: Protein Dynamics Inferred from Theory and Experiments
Bioinformatics 2011 27(11):1575-1577.
Bakan A, Dutta A, Mao W, Liu Y, Chennubhotla C, Lezon TR, Bahar I Evol and ProDy for Bridging Protein Sequence Evolution and Structural Dynamics
Bioinformatics 2014 30(18):2681-2683.
Source code for prody.proteins.dssp

# -*- coding: utf-8 -*-
"""This module defines functions for executing DSSP program and parsing
its output."""

import os.path

import numpy as np

from prody.atomic import ATOMIC_FIELDS
from prody.atomic import AtomGroup
from prody.utilities import gunzip, which, PLATFORM

from .pdbfile import parsePDB
from .localpdb import fetchPDB

__all__ = ['execDSSP', 'parseDSSP', 'performDSSP']


[docs]def execDSSP(pdb, outputname=None, outputdir=None, stderr=True):
    """Execute DSSP for given *pdb*.  *pdb* can be a PDB identifier or a PDB
    file path.  If *pdb* is a compressed file, it will be decompressed using
    Python :mod:`gzip` library.  When no *outputname* is given, output name
    will be :file:`pdb.dssp`.  :file:`.dssp` extension will be appended
    automatically to *outputname*.  If :file:`outputdir` is given, DSSP
    output and uncompressed PDB file will be written into this folder.
    Upon successful execution of :command:`dssp pdb > out` command, output
    filename is returned.  On Linux platforms, when *stderr* is false,
    standard error messages are suppressed, i.e.
    ``dssp pdb > outputname 2> /dev/null``.

    For more information on DSSP see http://swift.cmbi.ru.nl/gv/dssp/.
    If you benefited from DSSP, please consider citing [WK83]_.

    .. [WK83] Kabsch W, Sander C. Dictionary of protein secondary structure:
       pattern recognition of hydrogen-bonded and geometrical features.
       *Biopolymers* **1983** 22:2577-2637."""

    dssp = which('mkdssp')
    if dssp is None:
        dssp = which('dssp')
    if dssp is None:
        raise EnvironmentError('command not found: dssp executable is not '
                               'found in one of system paths')
    assert outputname is None or isinstance(outputname, str),\
        'outputname must be a string'
    assert outputdir is None or isinstance(outputdir, str),\
        'outputdir must be a string'
    if not os.path.isfile(pdb):
        pdb = fetchPDB(pdb, compressed=False)
    if pdb is None:
        raise ValueError('pdb is not a valid PDB identifier or filename')
    if os.path.splitext(pdb)[1] == '.gz':
        if outputdir is None:
            pdb = gunzip(pdb, os.path.splitext(pdb)[0])
        else:
            pdb = gunzip(pdb, os.path.join(outputdir,
                         os.path.split(os.path.splitext(pdb)[0])[1]))
    if outputdir is None:
        outputdir = '.'
    if outputname is None:
        out = os.path.join(outputdir,
                           os.path.splitext(os.path.split(pdb)[1])[0] +
                           '.dssp')
    else:
        out = os.path.join(outputdir, outputname + '.dssp')

    if not stderr and PLATFORM != 'Windows':
        status = os.system('{0} {1} > {2} 2> /dev/null'.format(
                           dssp, pdb, out))
    else:
        status = os.system('{0} {1} > {2}'.format(dssp, pdb, out))

    if status == 0:
        return out


[docs]def parseDSSP(dssp, ag, parseall=False):
    """Parse DSSP data from file *dssp* into :class:`.AtomGroup` instance
    *ag*.  DSSP output file must be in the new format used from July 1995
    and onwards.  When *dssp* file is parsed, following attributes are added
    to *ag*:

    * *dssp_resnum*: DSSP's sequential residue number, starting at the first
      residue actually in the data set and including chain breaks; this number
      is used to refer to residues throughout.

    * *dssp_acc*: number of water molecules in contact with this residue \*10.
      or residue water exposed surface in Angstrom^2.

    * *dssp_kappa*: virtual bond angle (bend angle) defined by the three Cα
      atoms of residues I-2,I,I+2.  Used to define bend (structure code 'S').

    * *dssp_alpha*: virtual torsion angle (dihedral angle) defined by the four
      Cα atoms of residues I-1,I,I+1,I+2.Used to define chirality (structure
      code '+' or '-').

    * *dssp_phi* and *dssp_psi*: IUPAC peptide backbone torsion angles

    The following attributes are parsed when ``parseall=True`` is passed:

    * *dssp_bp1*, *dssp_bp2*, and *dssp_sheet_label*: residue number of first
      and second bridge partner followed by one letter sheet label

    * *dssp_tco*: cosine of angle between C=O of residue I and C=O of residue
      I-1.  For α-helices, TCO is near +1, for β-sheets TCO is near -1.  Not
      used for structure definition.

    * *dssp_NH_O_1_index*, *dssp_NH_O_1_energy*, etc.: hydrogen bonds; e.g.
      -3,-1.4 means: if this residue is residue i then N-H of I is h-bonded to
      C=O of I-3 with an electrostatic H-bond energy of -1.4 kcal/mol.  There
      are two columns for each type of H-bond, to allow for bifurcated H-bonds.

    See http://swift.cmbi.ru.nl/gv/dssp/DSSP_3.html for details."""

    if not os.path.isfile(dssp):
        raise IOError('{0} is not a valid file path'.format(dssp))
    if not isinstance(ag, AtomGroup):
        raise TypeError('ag argument must be an AtomGroup instance')

    dssp = open(dssp)

    n_atoms = ag.numAtoms()
    NUMBER = np.zeros(n_atoms, int)
    SHEETLABEL = np.zeros(n_atoms, np.array(['a']).dtype.char + '1')
    ACC = np.zeros(n_atoms, float)
    KAPPA = np.zeros(n_atoms, float)
    ALPHA = np.zeros(n_atoms, float)
    PHI = np.zeros(n_atoms, float)
    PSI = np.zeros(n_atoms, float)

    if parseall:
        BP1 = np.zeros(n_atoms, int)
        BP2 = np.zeros(n_atoms, int)
        NH_O_1 = np.zeros(n_atoms, int)
        NH_O_1_nrg = np.zeros(n_atoms, float)
        O_HN_1 = np.zeros(n_atoms, int)
        O_HN_1_nrg = np.zeros(n_atoms, float)
        NH_O_2 = np.zeros(n_atoms, int)
        NH_O_2_nrg = np.zeros(n_atoms, float)
        O_HN_2 = np.zeros(n_atoms, int)
        O_HN_2_nrg = np.zeros(n_atoms, float)
        TCO = np.zeros(n_atoms, float)

    ag.setSecstrs(np.zeros(n_atoms, dtype=ATOMIC_FIELDS['secondary'].dtype))
    for line in dssp:
        if line.startswith('  #  RESIDUE'):
            break
    for line in dssp:
        if line[13] == '!':
            continue
        res = ag[(line[11], int(line[5:10]), line[10].strip())]
        if res is None:
            continue
        indices = res.getIndices()
        res.setSecstrs(line[16].strip())
        NUMBER[indices] = int(line[:5])
        SHEETLABEL[indices] = line[33].strip()
        ACC[indices] = int(line[35:38])
        KAPPA[indices] = float(line[91:97])
        ALPHA[indices] = float(line[97:103])
        PHI[indices] = float(line[103:109])
        PSI[indices] = float(line[109:115])

        if parseall:
            BP1[indices] = int(line[25:29])
            BP2[indices] = int(line[29:33])
            NH_O_1[indices] = int(line[38:45])
            NH_O_1_nrg[indices] = float(line[46:50])
            O_HN_1[indices] = int(line[50:56])
            O_HN_1_nrg[indices] = float(line[57:61])
            NH_O_2[indices] = int(line[61:67])
            NH_O_2_nrg[indices] = float(line[68:72])
            O_HN_2[indices] = int(line[72:78])
            O_HN_2_nrg[indices] = float(line[79:83])
            TCO[indices] = float(line[85:91])

    ag.setData('dssp_resnum', NUMBER)
    ag.setData('dssp_sheet_label', SHEETLABEL)
    ag.setData('dssp_acc', ACC)
    ag.setData('dssp_kappa', KAPPA)
    ag.setData('dssp_alpha', ALPHA)
    ag.setData('dssp_phi', PHI)
    ag.setData('dssp_psi', PSI)

    if parseall:
        ag.setData('dssp_bp1', BP1)
        ag.setData('dssp_bp2', BP2)
        ag.setData('dssp_NH_O_1_index', NH_O_1)
        ag.setData('dssp_NH_O_1_energy', NH_O_1_nrg)
        ag.setData('dssp_O_NH_1_index', O_HN_1)
        ag.setData('dssp_O_NH_1_energy', O_HN_1_nrg)
        ag.setData('dssp_NH_O_2_index', NH_O_2)
        ag.setData('dssp_NH_O_2_energy', NH_O_2_nrg)
        ag.setData('dssp_O_NH_2_index', O_HN_2)
        ag.setData('dssp_O_NH_2_energy', O_HN_2_nrg)
        ag.setData('dssp_tco', TCO)
    return ag


[docs]def performDSSP(pdb, parseall=False, stderr=True):
    """Perform DSSP calculations and parse results.  DSSP data is returned
    in an :class:`~.AtomGroup` instance.  See also :func:`execDSSP`
    and :func:`parseDSSP`."""

    pdb = fetchPDB(pdb, compressed=False)
    return parseDSSP(execDSSP(pdb, stderr=stderr), parsePDB(pdb), parseall)