# -*- coding: utf-8 -*-
"""This module defines a function for parsing protein structure files in
`PSF format`_.
.. _PSF format:
http://www.ks.uiuc.edu/Training/Tutorials/namd/
namd-tutorial-unix-html/node21.html"""
import os.path
from numpy import fromstring, zeros, ones, array, add
from prody import PY2K
from prody.atomic import ATOMIC_FIELDS, AtomGroup
from prody.utilities import openFile
if PY2K:
range = xrange
__all__ = ['parsePSF', 'writePSF']
[docs]def parsePSF(filename, title=None, ag=None):
"""Returns an :class:`.AtomGroup` instance storing data parsed from X-PLOR
format PSF file *filename*. Atom and bond information is parsed from the
file. If *title* is not given, *filename* will be set as the title of the
:class:`.AtomGroup` instance. An :class:`.AtomGroup` instance may be
provided as *ag* argument. When provided, *ag* must have the same number
of atoms in the same order as the file. Data from PSF file will be added
to the *ag*. This may overwrite present data if it overlaps with PSF file
content.
This function now includes the angles, dihedrals, and impropers sections
as well as donors, acceptors and crossterms!"""
if ag is not None:
if not isinstance(ag, AtomGroup):
raise TypeError('ag must be an AtomGroup instance')
psf = openFile(filename, 'rb')
line = psf.readline()
while line:
line = line.strip()
if line.endswith(b'!NATOM'):
n_atoms = int(line.split(b'!')[0])
break
line = psf.readline()
if title is None:
title = os.path.splitext(os.path.split(filename)[1])[0]
else:
title = str(title)
if ag is None:
ag = AtomGroup(title)
else:
if n_atoms != ag.numAtoms():
raise ValueError('ag and PSF file must have same number of atoms')
serials = zeros(n_atoms, ATOMIC_FIELDS['serial'].dtype)
segnames = zeros(n_atoms, ATOMIC_FIELDS['segment'].dtype)
resnums = zeros(n_atoms, ATOMIC_FIELDS['resnum'].dtype)
resnames = zeros(n_atoms, ATOMIC_FIELDS['resname'].dtype)
atomnames = zeros(n_atoms, ATOMIC_FIELDS['name'].dtype)
atomtypes = zeros(n_atoms, ATOMIC_FIELDS['type'].dtype)
charges = zeros(n_atoms, ATOMIC_FIELDS['charge'].dtype)
masses = zeros(n_atoms, ATOMIC_FIELDS['mass'].dtype)
n = 0
n_bonds = 0
for line in psf:
if line.strip() == b'':
continue
if b'!NBOND:' in line.upper():
items = line.split()
n_bonds = int(items[0])
break
if n + 1 > n_atoms:
continue
if len(line) <= 71:
serials[n] = line[:8]
segnames[n] = line[9:13].strip()
resnums[n] = line[14:19]
resnames[n] = line[19:23].strip()
atomnames[n] = line[24:28].strip()
atomtypes[n] = line[29:35].strip()
charges[n] = line[35:44]
masses[n] = line[50:60]
else:
items = line.split()
serials[n] = items[0]
segnames[n] = items[1]
resnums[n] = items[2]
resnames[n] = items[3]
atomnames[n] = items[4]
atomtypes[n] = items[5]
charges[n] = items[6]
masses[n] = items[7]
n += 1
if n < n_atoms:
raise IOError('number of lines in PSF atoms block is less than the number of '
'atoms')
n_angles = 0
lines = []
for i, line in enumerate(psf):
if line.strip() == b'':
continue
if b'!NTHETA' in line:
items = line.split()
n_angles = int(items[0])
break
lines.append(line.decode(encoding='UTF-8'))
lines = ''.join(lines)
b_array = fromstring(lines, count=n_bonds*2, dtype=int, sep=' ')
if len(b_array) != n_bonds*2:
raise IOError('number of bonds expected and parsed do not match')
n_dihedrals = 0
lines = []
for i, line in enumerate(psf):
if line.strip() == b'':
continue
if b'!NPHI' in line:
items = line.split()
n_dihedrals = int(items[0])
break
lines.append(line.decode(encoding='UTF-8'))
lines = ''.join(lines)
a_array = fromstring(lines, count=n_angles*3, dtype=int, sep=' ')
if len(a_array) != n_angles*3:
raise IOError('number of angles expected and parsed do not match')
n_impropers = 0
lines = []
for i, line in enumerate(psf):
if line.strip() == b'':
continue
if b'!NIMPHI' in line:
items = line.split()
n_impropers = int(items[0])
break
lines.append(line.decode(encoding='UTF-8'))
lines = ''.join(lines)
d_array = fromstring(lines, count=n_dihedrals*4, dtype=int, sep=' ')
if len(d_array) != n_dihedrals*4:
raise IOError('number of dihedrals expected and parsed do not match')
n_donors = 0
lines = []
for i, line in enumerate(psf):
if line.strip() == b'':
continue
if b'!NDON' in line:
items = line.split()
n_donors = int(items[0])
break
lines.append(line.decode(encoding='UTF-8'))
lines = ''.join(lines)
i_array = fromstring(lines, count=n_impropers*4, dtype=int, sep=' ')
if len(i_array) != n_impropers*4:
raise IOError('number of impropers expected and parsed do not match')
n_acceptors = 0
lines = []
for i, line in enumerate(psf):
if line.strip() == b'':
continue
if b'!NACC' in line:
items = line.split()
n_acceptors = int(items[0])
break
lines.append(line.decode(encoding='UTF-8'))
lines = ''.join(lines)
do_array = fromstring(lines, count=n_donors*2, dtype=int, sep=' ')
if len(do_array) != n_donors*2:
raise IOError('number of donors expected and parsed do not match')
n_exclusions = 0
lines = []
for i, line in enumerate(psf):
if line.strip() == b'':
continue
if b'!NNB' in line:
items = line.split()
n_exclusions = int(items[0])
break
lines.append(line.decode(encoding='UTF-8'))
lines = ''.join(lines)
ac_array = fromstring(lines, count=n_acceptors*2, dtype=int, sep=' ')
if len(ac_array) != n_acceptors*2:
raise IOError('number of acceptors expected and parsed do not match')
lines = []
for i, line in enumerate(psf):
if line.strip() == b'':
continue
if b'!' in line:
break
lines.append(line.decode(encoding='UTF-8'))
lines = ''.join(lines)
nbe_array = fromstring(lines, count=n_exclusions*2, dtype=int, sep=' ')
if len(nbe_array) != n_exclusions*2:
raise IOError('number of nonbonded exclusions expected and parsed do not match')
n_crossterms = 0
for i, line in enumerate(psf):
if b'!NCRTERM' in line:
items = line.split()
n_crossterms = int(items[0])
break
lines = []
for i, line in enumerate(psf):
lines.append(line.decode(encoding='UTF-8'))
lines = ''.join(lines)
c_array = fromstring(lines, count=n_crossterms*4, dtype=int, sep=' ')
if len(c_array) != n_crossterms*4:
raise IOError('number of crossterms expected and parsed do not match')
psf.close()
ag.setSerials(serials)
ag.setSegnames(segnames)
ag.setResnums(resnums)
ag.setResnames(resnames)
ag.setNames(atomnames)
ag.setTypes(atomtypes)
ag.setCharges(charges)
ag.setMasses(masses)
if n_bonds > 0:
b_array = add(b_array, -1, b_array)
ag.setBonds(b_array.reshape((n_bonds, 2)))
if n_angles > 0:
a_array = add(a_array, -1, a_array)
ag.setAngles(a_array.reshape((n_angles, 3)))
if n_dihedrals > 0:
d_array = add(d_array, -1, d_array)
ag.setDihedrals(d_array.reshape((n_dihedrals, 4)))
if n_impropers > 0:
i_array = add(i_array, -1, i_array)
ag.setImpropers(i_array.reshape((n_impropers, 4)))
if n_donors > 0:
do_array = add(do_array, -1, do_array)
ag.setDonors(do_array.reshape((n_donors, 2)))
if n_acceptors > 0:
ac_array = add(ac_array, -1, ac_array)
ag.setAcceptors(ac_array.reshape((n_acceptors, 2)))
if n_exclusions > 0:
nbe_array = add(nbe_array, -1, nbe_array)
ag.setNBExclusions(nbe_array.reshape((n_exclusions, 2)))
if n_crossterms > 0:
c_array = add(c_array, -1, c_array)
ag.setCrossterms(c_array.reshape((n_crossterms, 4)))
return ag
PSFLINE = ('%8d %-4s %-4d %-4s %-4s %-4s %10.6f %13.4f %11d\n')
[docs]def writePSF(filename, atoms):
"""Write atoms in X-PLOR format PSF file with name *filename* and return
*filename*. This function will write available atom and bond information
only."""
if not filename.endswith('.psf'):
filename = filename + '.psf'
try:
n_atoms, segments, rnums, rnames, names, types, charges, masses = (
atoms.numAtoms(), atoms._getSegnames(), atoms._getResnums(),
atoms._getResnames(), atoms._getNames(), atoms._getTypes(),
atoms._getCharges(), atoms._getMasses())
except AttributeError:
raise TypeError('atoms must be an Atomic instance')
if segments is None:
segments = atoms._getChids()
if segments is None:
segments = ['UNK'] * n_atoms
if rnums is None:
rnums = ones(n_atoms, int)
if rnames is None:
rnames = ['UNK'] * n_atoms
if names is None:
raise ValueError('atom names are not set')
if types is None:
types = zeros(n_atoms, ATOMIC_FIELDS['type'].dtype)
if charges is None:
charges = zeros(n_atoms, ATOMIC_FIELDS['charge'].dtype)
if masses is None:
masses = zeros(n_atoms, ATOMIC_FIELDS['mass'].dtype)
long_fields = array([len(tp) for tp in types]).max() > 4
out = openFile(filename, 'w')
write = out.write
write('PSF{0}\n'.format( ' NAMD' if long_fields else ''))
write('\n')
write('{0:8d} !NTITLE\n'.format(1))
write(' REMARKS {0}\n'.format(str(atoms)))
write('\n')
write('{0:8d} !NATOM\n'.format(n_atoms))
for i in range(n_atoms):
write(PSFLINE % (i + 1, segments[i], rnums[i], rnames[i], names[i],
types[i], charges[i], masses[i], 0))
bonds = list(atoms._iterBonds())
if len(bonds) > 0:
bonds = array(bonds, int) + 1
write('\n')
write('{0:8d} !NBOND: bonds\n'.format(len(bonds)))
for i, bond in enumerate(bonds):
write('%8s%8s' % (bond[0], bond[1]))
if i % 4 == 3:
write('\n')
if i % 4 != 3:
write('\n')
angles = list(atoms._iterAngles())
if len(angles) > 0:
angles = array(angles, int) + 1
write('\n')
write('{0:8d} !NTHETA: angles\n'.format(len(angles)))
for i, angle in enumerate(angles):
write('%8s%8s%8s' % (angle[0], angle[1], angle[2]))
if i % 3 == 2:
write('\n')
if i % 3 != 2:
write('\n')
dihedrals = list(atoms._iterDihedrals())
if len(dihedrals) > 0:
dihedrals = array(dihedrals, int) + 1
write('\n')
write('{0:8d} !NPHI: dihedrals\n'.format(len(dihedrals)))
for i, dihedral in enumerate(dihedrals):
write('%8s%8s%8s%8s' % (dihedral[0], dihedral[1], dihedral[2], dihedral[3]))
if i % 4 == 3:
write('\n')
if i % 4 != 3:
write('\n')
impropers = list(atoms._iterImpropers())
if len(impropers) > 0:
impropers = array(impropers, int) + 1
write('\n')
write('{0:8d} !NIMPHI: impropers\n'.format(len(impropers)))
for i, improper in enumerate(impropers):
write('%8s%8s%8s%8s' % (improper[0], improper[1], improper[2], improper[3]))
if i % 2 == 1:
write('\n')
if i % 2 != 1:
write('\n')
write('\n')
donors = list(atoms._iterDonors())
if len(donors) > 0:
donors = array(donors, int) + 1
write('\n')
write('{0:8d} !NDON: donors\n'.format(len(donors)))
for i, donor in enumerate(donors):
write('%8s%8s' % (donor[0], donor[1]))
if i % 4 == 3:
write('\n')
if i % 4 != 3:
write('\n')
else:
write('{0:8d} !NDON: donors\n'.format(0))
write('\n')
write('\n')
acceptors = list(atoms._iterAcceptors())
if len(acceptors) > 0:
acceptors = array(acceptors, int) + 1
write('\n')
write('{0:8d} !NACC: acceptors\n'.format(len(acceptors)))
for i, acceptor in enumerate(acceptors):
write('%8s%8s' % (acceptor[0], acceptor[1]))
if i % 4 == 3:
write('\n')
if i % 4 != 3:
write('\n')
else:
write('{0:8d} !NACC: acceptors\n'.format(0))
write('\n')
nbexclusions = list(atoms._iterNBExclusions())
if len(nbexclusions) > 0:
nbexclusions = array(nbexclusions, int) + 1
write('\n')
write('{0:8d} !NNB\n'.format(len(nbexclusions)))
for i, nbexclusion in enumerate(nbexclusions):
write('%8s%8s' % (nbexclusion[0], nbexclusion[1]))
if i % 4 == 3:
write('\n')
if i % 4 != 3:
write('\n')
else:
write('{0:8d} !NNB\n'.format(0))
write('\n')
crossterms = list(atoms._iterCrossterms())
if len(crossterms) > 0:
crossterms = array(crossterms, int) + 1
write('\n')
write('{0:8d} !NCRTERM: crossterms\n'.format(len(crossterms)))
for i, crossterm in enumerate(crossterms):
write('%8s%8s%8s%8s' % (crossterm[0], crossterm[1], crossterm[2], crossterm[3]))
if i % 2 == 1:
write('\n')
if i % 2 != 1:
write('\n')
write('\n')
out.close()
return filename