Source code for prody.proteins.stride
# -*- coding: utf-8 -*-
"""This module defines functions for executing STRIDE program and parsing
its output."""
import os.path
import numpy as np
from prody.atomic import ATOMIC_FIELDS
from prody.atomic import AtomGroup
from prody.utilities import gunzip, which
from .pdbfile import parsePDB
from .localpdb import fetchPDB
__all__ = ['execSTRIDE', 'parseSTRIDE', 'performSTRIDE']
[docs]def execSTRIDE(pdb, outputname=None, outputdir=None):
"""Execute STRIDE program for given *pdb*. *pdb* can be an identifier or
a PDB file path. If *pdb* is a compressed file, it will be decompressed
using Python :mod:`gzip` library. When no *outputname* is given, output
name will be :file:`pdb.stride`. :file:`.stride` extension will be
appended automatically to *outputname*. If :file:`outputdir` is given,
STRIDE output and uncompressed PDB file will be written into this folder.
Upon successful execution of :command:`stride pdb > out` command, output
filename is returned.
For more information on STRIDE see http://webclu.bio.wzw.tum.de/stride/.
If you benefited from STRIDE, please consider citing [DF95]_.
.. [DF95] Frishman D, Argos P. Knowledge-Based Protein Secondary Structure
Assignment. *Proteins* **1995** 23:566-579."""
stride = which('stride')
if stride is None:
raise EnvironmentError('command not found: stride executable is not '
'found in one of system paths')
assert outputname is None or isinstance(outputname, str),\
'outputname must be a string'
assert outputdir is None or isinstance(outputdir, str),\
'outputdir must be a string'
if not os.path.isfile(pdb):
pdb = fetchPDB(pdb, compressed=False)
if pdb is None:
raise ValueError('pdb is not a valid PDB identifier or filename')
if os.path.splitext(pdb)[1] == '.gz':
if outputdir is None:
pdb = gunzip(pdb, os.path.splitext(pdb)[0])
else:
pdb = gunzip(pdb, os.path.join(outputdir,
os.path.split(os.path.splitext(pdb)[0])[1]))
if outputdir is None:
outputdir = '.'
if outputname is None:
out = os.path.join(outputdir,
os.path.splitext(os.path.split(pdb)[1])[0] +
'.stride')
else:
out = os.path.join(outputdir, outputname + '.stride')
status = os.system('{0} {1} > {2}'.format(stride, pdb, out))
if status == 0:
return out
[docs]def parseSTRIDE(stride, ag):
"""Parse STRIDE output from file *stride* into :class:`~.AtomGroup`
instance *ag*. STRIDE output file must be in the new format used
from July 1995 and onwards. When *stride* file is parsed, following
attributes are added to *ag*:
* *stride_resnum*: STRIDE's sequential residue number, starting at the
first residue actually in the data set.
* *stride_phi*, *stride_psi*: peptide backbone torsion angles phi and psi
* *stride_area*: residue solvent accessible area"""
if not os.path.isfile(stride):
raise IOError('{0} is not a valid file path'.format(stride))
if not isinstance(ag, AtomGroup):
raise TypeError('ag argument must be an AtomGroup instance')
stride = open(stride)
n_atoms = ag.numAtoms()
NUMBER = np.zeros(n_atoms, int)
AREA = np.zeros(n_atoms, float)
PHI = np.zeros(n_atoms, float)
PSI = np.zeros(n_atoms, float)
ag.setSecstrs(np.zeros(n_atoms, dtype=ATOMIC_FIELDS['secondary'].dtype))
for line in stride:
if not line.startswith('ASG '):
continue
res = ag[(line[9], int(line[10:15]), line[15].strip())]
if res is None:
continue
indices = res.getIndices()
res.setSecstrs(line[24].strip())
NUMBER[indices] = int(line[16:20])
PHI[indices] = float(line[42:49])
PSI[indices] = float(line[52:59])
AREA[indices] = float(line[64:69])
ag.setData('stride_resnum', NUMBER)
ag.setData('stride_phi', PHI)
ag.setData('stride_psi', PSI)
ag.setData('stride_area', AREA)
return ag