# -*- coding: utf-8 -*-
"""This module defines functions for analyzing normal modes obtained
for conformations in an ensemble."""
import time
from numbers import Integral
from numpy import ndarray
import numpy as np
import warnings
from prody import LOGGER, SETTINGS
from prody.utilities import showFigure, showMatrix, copy, checkWeights, openFile, DTYPE
from prody.utilities import getValue, importLA, wmean, div0
from prody.ensemble import Ensemble, Conformation
from prody.atomic import AtomGroup
from .nma import NMA
from .modeset import ModeSet
from .mode import Mode, Vector
from .functions import calcENM
from .compare import calcSpectralOverlap, matchModes, calcOverlap
from .analysis import calcSqFlucts, calcCrossCorr, calcFractVariance, calcCollectivity
from .plotting import showAtomicLines, showAtomicMatrix, showDomainBar
from .perturb import calcPerturbResponse
from .anm import ANM
from .gnm import GNM
__all__ = ['ModeEnsemble', 'sdarray', 'calcEnsembleENMs',
'showSignature1D', 'psplot', 'showSignatureAtomicLines',
'showSignatureMode', 'showSignatureDistribution', 'showSignatureCollectivity',
'showSignatureSqFlucts', 'calcEnsembleSpectralOverlaps', 'calcSignatureSqFlucts',
'calcSignatureCollectivity', 'calcSignatureFractVariance', 'calcSignatureModes',
'calcSignatureCrossCorr', 'showSignatureCrossCorr', 'showVarianceBar',
'showSignatureVariances', 'calcSignatureOverlaps', 'showSignatureOverlaps',
'saveModeEnsemble', 'loadModeEnsemble', 'saveSignature', 'loadSignature',
'calcSubfamilySpectralOverlaps','showSubfamilySpectralOverlaps', 'calcSignaturePerturbResponse']
[docs]class ModeEnsemble(object):
"""
A collection of ENMs calculated for conformations in an :class:`Ensemble`.
or :class:`PDBEnsemble`.
"""
__slots__ = ['_modesets', '_title', '_labels', '_atoms', '_weights', '_matched', '_reweighted']
def __init__(self, title=None):
self._modesets = []
self._title = 'Unknown' if title is None else title
self._labels = None
self._atoms = None
self._weights = None
self._matched = False
self._reweighted = False
def __len__(self):
"""Returns the number of modesets."""
return len(self._modesets)
def __iter__(self):
for modeset in self._modesets:
yield modeset
iterModeSets = __iter__
def iterModes(self):
for i in range(self.numModes()):
modesets = []
for modeset in self._modesets:
modesets.append(modeset[i])
ens = ModeEnsemble(self.getTitle())
ens._modesets = modesets
yield ens
def __repr__(self):
if self.numModeSets() == 1:
modesets_str = '1 modeset'
else:
modesets_str = '{0} modesets'.format(self.numModeSets())
if self.numModes() == 1:
modes_str = '1 mode'
else:
modes_str = '{0} modes'.format(self.numModes())
if self.numAtoms() == 1:
atoms_str = '1 atom'
else:
atoms_str = '{0} atoms'.format(self.numAtoms())
return '<ModeEnsemble: {0} ({1}, {2})>'\
.format(modesets_str, modes_str, atoms_str)
def __str__(self):
return self.getTitle()
def _getitembase(self, modeset_index, mode_index):
if isinstance(modeset_index, slice):
modesets = self._modesets[modeset_index]
modeset_indices = modeset_index
labels = None if self._labels is None else self._labels[modeset_index]
matched = self.getMatchingStatus()[modeset_index]
reweighted = self.getReweightingStatus()[modeset_index]
elif not np.isscalar(modeset_index):
modesets = []; modeset_indices = []; labels = []; matched = []; reweighted = []
for i in modeset_index:
if isinstance(i, Integral):
j = i
elif isinstance(i, str):
try:
j = self._labels.index(i)
except:
raise IndexError('invalid label: %s'%i)
else:
raise IndexError('all indices must be integers or strings (labels)')
modesets.append(self._modesets[j])
modeset_indices.append(j)
if self._labels is not None:
labels.append(self._labels[j])
allstatus = self.getMatchingStatus()
matched.append(allstatus[j])
allstatus = self.getReweightingStatus()
reweighted.append(allstatus[j])
else:
if isinstance(modeset_index, Integral):
pass
elif isinstance(modeset_index, str):
try:
modeset_index = self._labels.index(modeset_index)
except:
raise IndexError('invalid label: %s'%modeset_index)
else:
raise IndexError('indices must be int, slice, or array-like objects')
return self._modesets[modeset_index][mode_index]
if np.isscalar(mode_index):
mode_index = [mode_index]
for i in range(len(modesets)):
modesets[i] = modesets[i][mode_index]
if self._weights is None:
weights = None
else:
weights = self._weights[modeset_indices, :, :]
ens = ModeEnsemble(title=self.getTitle())
ens.addModeSet(modesets, weights=weights, label=labels, matched=matched, reweighted=reweighted)
ens.setAtoms(self.getAtoms())
return ens
def __getitem__(self, index):
"""A list or tuple of integers can be used for indexing."""
modeset_index = slice(None, None, None)
mode_index = slice(None, None, None)
if isinstance(index, tuple):
if len(index) >= 1:
modeset_index = index[0]
if len(index) >= 2:
mode_index = index[1]
if len(index) >= 3:
raise ValueError('ModeEnsemble indexing supports up to two arguments')
else:
modeset_index = index
ens = self._getitembase(modeset_index, mode_index)
return ens
def __add__(self, other):
"""Concatenate two mode ensembles. """
if not isinstance(other, ModeEnsemble):
raise TypeError('an ModeEnsemble instance cannot be added to an {0} '
'instance'.format(type(other)))
if self.numAtoms() != other.numAtoms():
raise ValueError('Ensembles must have same number of atoms.')
if self.numModes() != other.numModes():
raise ValueError('Ensembles must have same number of modes.')
ensemble = ModeEnsemble('{0} + {1}'.format(self.getTitle(),
other.getTitle()))
ensemble.addModeSet(self._modesets, weights=self._weights, label=self._labels,
matched=self.getMatchingStatus(), reweighted=self.getReweightingStatus())
ensemble.setAtoms(self.getAtoms())
ensemble.addModeSet(other.getModeSets(), weights=other.getWeights(),
label=other.getLabels(), matched=other.getMatchingStatus(),
reweighted=other.getReweightingStatus())
return ensemble
[docs] def is3d(self):
"""Returns **True** is model is 3-dimensional."""
if self._modesets:
return self._modesets[0].is3d()
return False
[docs] def numAtoms(self):
"""Returns number of atoms."""
if self._modesets:
return self._modesets[0].numAtoms()
return 0
[docs] def numModes(self):
"""Returns number of modes in the instance (not necessarily maximum
number of possible modes)."""
if self._modesets:
return self._modesets[0].numModes()
return 0
[docs] def numModeSets(self):
"""Returns number of modesets in the instance."""
return len(self)
[docs] def getTitle(self):
"""Returns title of the signature."""
return self._title
[docs] def getWeights(self):
"""Returns a copy of weights."""
return copy(self._weights)
[docs] def setWeights(self, weights):
"""Set atomic weights."""
n_atoms = self.numAtoms()
n_modesets = self.numModeSets()
if n_atoms > 0 and n_modesets > 0:
self._weights = checkWeights(weights, n_atoms, n_modesets)
else:
raise RuntimeError('modesets must be set before weights can be assigned')
[docs] def getModeSets(self, index=None):
"""
Returns the modeset of the given index. If index is **None** then all
modesets are returned.
"""
if index is None:
return self._modesets
subset = self[index]
if isinstance(subset, ModeSet):
return subset
else:
return subset._modesets
[docs] def getArray(self, mode_index=0):
"""Returns a sdarray of row arrays."""
if np.isscalar(mode_index):
sig = self.getEigvec(mode_index)
else:
sig = self.getEigvecs(mode_index)
return sig
_getArray = getArray
def _getModeData(self, name, mode_index=0, weights=None, sign_correction=False):
modesets = self._modesets
V = []
for i, modeset in enumerate(modesets):
mode = modeset[mode_index]
func = getattr(mode, name)
v = func()
if sign_correction:
if i == 0:
v0 = v
else:
c = np.dot(v, v0)
if c < 0:
v *= -1
V.append(v)
is3d = mode.is3d()
V = np.vstack(V)
mode_num = modesets[0][mode_index].getIndex()
title = 'mode %d'%(mode_num+1)
sig = sdarray(V, title=title, weights=weights, labels=self.getLabels(), is3d=is3d)
return sig
def _getData(self, name, mode_indices=None, weights=None, sign_correction=False):
modesets = self._modesets
V = []
for i, modeset in enumerate(modesets):
modes = modeset if mode_indices is None else modeset[mode_indices]
func = getattr(modes, name)
vecs = func()
if sign_correction:
if i == 0:
vecs0 = vecs
else:
c = np.sign(np.dot(vecs.T, vecs0))
c = np.diag(np.diag(c))
vecs = np.dot(vecs, c)
V.append(vecs)
is3d = modeset.is3d()
V = np.array(V)
title = '%d modes'%len(V)
sig = sdarray(V, title=title, weights=weights, labels=self.getLabels(), is3d=is3d)
return sig
[docs] def getEigvec(self, mode_index=0, sign_correction=True):
"""Returns a sdarray of eigenvector across modesets."""
weights = self.getWeights()
if weights is not None:
weights = weights[:, :, 0]
if self.is3d():
weights = np.repeat(weights, 3, axis=1)
return self._getModeData('getEigvec', mode_index, weights=weights, sign_correction=True)
[docs] def getEigvecs(self, mode_indices=None, sign_correction=True):
"""Returns a sdarray of eigenvectors across modesets."""
weights = self.getWeights()
if weights is not None:
weights = weights[:, :, 0]
if mode_indices is not None:
n_modes = len(mode_indices)
else:
n_modes = self.numModes()
W = [weights.copy() for _ in range(n_modes)] # almost equivalent to W = [weights]*n_modes
W = np.stack(W, axis=-1)
if self.is3d():
W = np.repeat(W, 3, axis=1)
return self._getData('getEigvecs', mode_indices, weights=W, sign_correction=True)
[docs] def getVariance(self, mode_index=0):
"""Returns variances of a given mode index with respect to the reference."""
return self._getModeData('getVariance', mode_index)
[docs] def getVariances(self, mode_indices=None):
"""Returns a sdarray of variances across modesets."""
return self._getData('getVariances', mode_indices)
[docs] def getEigval(self, mode_index=0):
"""Returns eigenvalue of a given mode index with respect to the reference."""
return self._getModeData('getEigval', mode_index)
[docs] def getEigvals(self, mode_indices=None):
"""Returns a sdarray of eigenvalues across modesets."""
return self._getData('getEigvals', mode_indices)
[docs] def getIndex(self, mode_index=0):
"""Returns indices of modes matched to the reference modeset."""
return self._getModeData('getIndex', mode_index)
[docs] def getIndices(self, mode_indices=None):
"""Returns indices of modes in the mode ensemble."""
return self._getData('getIndices', mode_indices)
[docs] def getAtoms(self):
"""Returns associated atoms of the mode ensemble."""
return self._atoms
[docs] def setAtoms(self, atoms):
"""Sets the atoms of the mode ensemble."""
if atoms is not None and self._atoms is not None:
if len(atoms) != self.numAtoms():
raise ValueError('atoms should have %d atoms'%self.numAtoms())
self._atoms = atoms
[docs] def getLabels(self):
"""Returns the labels of the mode ensemble."""
return self._labels
[docs] def setLabels(self, labels):
"""Returns the labels of the mode ensemble."""
if len(labels) != self.numModeSets():
raise ValueError('the number of labels and mode sets mismatch')
self._labels = labels
[docs] def getMatchingStatus(self):
"""Returns the matching status of each mode ensemble."""
if isinstance(self._matched, bool):
matched = [self._matched for _ in self]
else:
matched = self._matched
return matched
[docs] def setMatchingStatus(self, status):
"""Returns the matching status of each mode ensemble."""
if not isinstance(status, bool):
if len(status) != self.numModeSets():
raise ValueError('the number of status and mode sets mismatch')
self._matched = status
[docs] def getReweightingStatus(self):
"""Returns the reweighting status of each mode ensemble."""
if isinstance(self._reweighted, bool):
reweighted = [self._reweighted for _ in self]
else:
reweighted = self._reweighted
return reweighted
[docs] def setReweightingStatus(self, status):
"""Returns the reweighting status of each mode ensemble."""
if not isinstance(status, bool):
if len(status) != self.numModeSets():
raise ValueError('the number of status and mode sets mismatch')
self._reweighted = status
[docs] def match(self, turbo=False, method=None):
"""Matches the modes across mode sets according the mode overlaps.
:arg turbo: if **True** then the computation will be performed in parallel.
The number of threads is set to be the same as the number of
CPUs. Assigning a number to specify the number of threads to be
used. Default is **False**
:type turbo: bool, int
"""
if self._modesets:
#LOGGER.debug('Matching {0} modes across {1} modesets...'
# .format(self.numModes(), self.numModeSets()))
start = time.time()
matched = self.getMatchingStatus()
if np.any(matched):
matched[0] = False
indices = [i for i in range(len(matched)) if not matched[i]]
modesets = [self._modesets[i] for i in indices]
modesets = matchModes(*modesets, turbo=turbo, method=method)
for n, i in enumerate(indices):
if n > 0:
self._modesets[i] = modesets[n]
n_modesets = len(modesets)
else: # if all not matched, start from scratch
self._modesets = matchModes(*self._modesets, turbo=turbo, method=method)
n_modesets = len(self._modesets)
LOGGER.debug('{0} modes across {1} modesets were matched in {2:.2f}s.'
.format(self.numModes(), n_modesets, time.time()-start))
else:
LOGGER.warn('Mode ensemble has no modesets')
self._matched = True
return
[docs] def reweight(self):
"""Reweight the modes based on matched orders"""
self._reweighted = []
for i in range(self.numModeSets()):
modes = self._modesets[i]
model = modes._model
I = modes.getIndices() # matched order for the subset of modes
J = np.sort(I) # original order for the subset
V = model.getEigvecs()
W = model.getEigvals()
W[I] = W[J]
model.setEigens(V, W)
self._reweighted.append(True)
[docs] def undoMatching(self):
"""Restores the original orders of modes"""
n_modes = self.numModes()
matched = self.getMatchingStatus()
for i, enm in enumerate(self):
if matched[i]:
model = enm._model
self._modesets[i] = model[:n_modes]
self.setMatchingStatus(False)
[docs] def undoReweighting(self):
"""Restores the original weighting of modes"""
reweighted = self.getReweightingStatus()
for i, enm in enumerate(self):
model = enm._model
if reweighted[i]:
vars = model.getVariances()
I = np.argsort(vars)[::-1]
V = model.getEigvecs()
W = model.getEigvals()
W = W[I]
model.setEigens(V, W)
self.setReweightingStatus(False)
[docs] def reorder(self):
"""Reorders the modes across mode sets according to their collectivity"""
if not self.isMatched():
LOGGER.warn('Mode ensemble has not been all matched')
else:
vals = self.getEigvals()
mean_val = vals.mean()
order = np.argsort(mean_val)
ret = []
for modeset in self._modesets:
ret.append(ModeSet(modeset.getModel(), order))
self._modesets = ret
[docs] def addModeSet(self, modeset, weights=None, label=None, matched=False, reweighted=False):
"""Adds a modeset or modesets to the mode ensemble."""
if isinstance(modeset, (NMA, ModeSet, Mode)):
modesets = [modeset]
else:
modesets = modeset
matchingstatus = self.getMatchingStatus()
if isinstance(matched, bool):
matched = [matched for _ in range(len(modesets))]
matchingstatus.extend(matched)
self._matched = matchingstatus
reweightingstatus = self.getReweightingStatus()
if isinstance(reweighted, bool):
reweighted = [reweighted for _ in range(len(modesets))]
reweightingstatus.extend(reweighted)
self._reweighted = reweightingstatus
if self._labels is not None or label is not None:
if label is None:
labels = ['']*len(modesets)
elif np.isscalar(label):
labels = [label]
else:
labels = label
if len(labels) != len(modesets):
raise ValueError('labels should have the same length as modesets')
if self._labels is None:
self._labels = ['']*len(self._modesets)
self._labels.extend(labels)
for i in range(len(modesets)):
modeset = modesets[i]
if isinstance(modeset, NMA):
modeset = modeset[:]
elif isinstance(modeset, Mode):
modeset = ModeSet(modeset.getModel(), [modeset.getIndex()])
if not isinstance(modeset, ModeSet):
raise TypeError('modesets should be a list of Mode or ModeSet instances')
if self._modesets:
if modeset.numAtoms() != self.numAtoms():
raise ValueError('to be added, modesets should contain exactly %d atoms'%self.numAtoms())
if modeset.numModes() < self.numModes():
raise ValueError('to be added, modesets should contain at least %d modes'%self.numModes())
if modeset.numModes() > self.numModes():
modeset = modeset[:self.numModes()]
self._modesets.append(modeset)
else:
self._modesets = [modeset]
if weights is None:
weights = np.ones((len(modesets), self.numAtoms(), 1))
if self._weights is not None:
self._weights = np.concatenate((self._weights, weights), axis=0)
else:
self._weights = weights
else:
weights = checkWeights(weights, self.numAtoms(), len(modesets))
if self._weights is None:
self._weights = np.ones((self.numModeSets()-len(modesets), self.numAtoms(), 1))
self._weights = np.concatenate((self._weights, weights), axis=0)
[docs] def delModeSet(self, index):
"""Removes a modeset or modesets from the mode ensemble."""
if np.isscalar(index):
index = [index]
else:
index = list(index)
n_modesets = self.numModeSets()
for i in reversed(index):
self._modesets.pop(i)
if self._labels:
self._labels.pop(i)
if isinstance(self._matched, list):
self._matched.pop(i)
if isinstance(self._reweighted, list):
self._reweighted.pop(i)
if self._weights is not None:
torf = np.ones(n_modesets, dtype=bool)
torf[index] = False
self._weights = self._weights[torf, :, :]
[docs] def isMatched(self):
"""Returns whether the modes are matched across ALL modesets in the mode ensemble"""
return np.all(self._matched)
[docs] def isReweighted(self):
"""Returns whether the modes are matched across ALL modesets in the mode ensemble"""
return np.all(self._reweighted)
[docs]class sdarray(ndarray):
"""
A class for representing a collection of arrays. It is derived from
:class:`~numpy.ndarray`, and the first axis is reserved for indexing
the collection.
:class:`sdarray` functions exactly the same as :class:`~numpy.ndarray`,
except that :meth:`sdarray.mean`, :meth:`sdarray.std`,
:meth:`sdarray.max`, :meth:`sdarray.min` are overriden.
Average, standard deviation, minimum, maximum, etc. are weighted and
calculated over the first axis by default. "sdarray" stands for
"signature dynamics array".
Note for developers: please read the following article about subclassing
:class:`~numpy.ndarray` before modifying this class:
https://docs.scipy.org/doc/numpy-1.14.0/user/basics.subclassing.html
"""
__slots__ = ['_title', '_labels', '_is3d', '_weights', '_oneset']
def __new__(self, array, weights=None, labels=None, title=None, is3d=False,
oneset=False):
array = np.asarray(array)
obj = array.view(self)
shape = array.shape
if title is None:
if oneset:
obj._title = '1 array of size {0}'.format(shape)
else:
obj._title = '{0} array{2} of size {1}'.format(
shape[0], shape[1:], 's' if shape[0]>1 else '')
else:
obj._title = title
if labels is not None and np.isscalar(labels):
labels = [labels]
n_modesets = 1 if oneset else len(array)
if len(labels) != n_modesets:
raise ValueError('the number of labels does not match the size of the array')
obj._labels = labels
obj._is3d = is3d
if weights is not None:
weights = np.asarray(weights)
obj._weights = weights
obj._oneset = oneset
return obj
# def __getitem__(self, index):
# if isinstance(index, tuple):
# index0 = index[0]
# index1 = index[1:]
# else:
# index0 = index
# index1 = ()
# arr = np.asarray(self)[index0]
# w = self._weights
# if w is not None:
# w = w[index0]
# if arr.ndim != self.ndim:
# arr = np.expand_dims(arr, axis=0)
# if w is not None:
# w = np.expand_dims(w, axis=0)
# new_index = [slice(None, None, None)]
# new_index.extend(index1)
# arr = arr[new_index]
# if w is not None:
# w = w[new_index]
# labels = self._labels
# if labels is not None:
# labels = np.array(labels)[index0].tolist()
# return sdarray(arr, weights=w, labels=labels, title=self._title, is3d=self.is3d)
def __getitem__(self, index):
if self._oneset:
index0 = 0
else:
if isinstance(index, tuple):
index0 = index[0]
else:
index0 = index
arr = np.asarray(self)[index]
if np.ndim(arr) == 0:
return arr
oneset = np.isscalar(index0)
w = self._weights
if w is not None:
w = w[index]
labels = self._labels
if labels is not None:
labels = np.array(labels)[index0].tolist()
return sdarray(arr, weights=w, labels=labels, title=self._title,
is3d=self.is3d, oneset=oneset)
def __array_finalize__(self, obj):
if obj is None:
return
self._title = getattr(obj, '_title', None)
self._weights = getattr(obj, '_weights', None)
self._is3d = getattr(obj, '_is3d', None)
self._labels = getattr(obj, '_labels', None)
self._oneset = getattr(obj, '_oneset', None)
def __str__(self):
return self.getTitle()
[docs] def is3d(self):
"""Returns **True** is model is 3-dimensional."""
return self._is3d
def __repr__(self):
arr_repr = ndarray.__repr__(self)
weights = self._weights
if weights is not None:
weights_repr = repr(weights)
arr_repr += '\nweights=\n{0}'.format(weights_repr)
return arr_repr
[docs] def numAtoms(self):
"""Returns the number of atoms assuming it is represented by the second axis."""
try:
if self._oneset:
n_atoms = self.shape[0]
else:
n_atoms = self.shape[1]
if self.is3d():
n_atoms /= 3
return int(n_atoms)
except IndexError:
LOGGER.warn('{0} is not related to the number of atoms'.format(self.getTitle()))
return 0
[docs] def numModeSets(self):
"""Returns the number of modesets in the instance """
if self._oneset:
return 1
return self.shape[0]
[docs] def getTitle(self):
"""Returns the title of the signature."""
return self._title
[docs] def getLabels(self):
"""Returns the labels of the signature."""
return self._labels
[docs] def mean(self, axis=0, **kwargs):
"""Calculates the weighted average of the sdarray over modesets (`axis=0`)."""
arr = np.asarray(self)
weights = self._weights
return wmean(arr, weights, axis)
[docs] def std(self, axis=0, **kwargs):
"""Calculates the weighted standard deviations of the sdarray over modesets (`axis=0`)."""
arr = np.asarray(self)
mean = wmean(arr, self._weights, axis)
if axis is not None:
reps = list(arr.shape)
axes = []
if isinstance(axis, tuple):
axes.extend(axis)
else:
axes = [axis]
for ax in axes:
reps[ax] = 1
mean = np.reshape(mean, reps)
variance = wmean((arr - mean)**2, self._weights, axis)
return np.sqrt(variance)
[docs] def min(self, axis=0, **kwargs):
"""Calculates the minimum values of the sdarray over modesets (`axis=0`)."""
# np.array instead asarray is used to make sure a copy of the original data is created
arr = np.array(self)
weights = self._weights
if weights is not None:
weights = weights.astype(bool)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
rmin = np.nanmin(arr, axis=axis)
return rmin
[docs] def max(self, axis=0, **kwargs):
"""Calculates the maximum values of the sdarray over modesets (`axis=0`)."""
# np.array instead asarray is used to make sure a copy of the original data is created
arr = np.array(self)
weights = self._weights
if weights is not None:
weights = weights.astype(bool)
arr[~weights] = np.nan
with warnings.catch_warnings():
warnings.simplefilter("ignore")
rmax = np.nanmax(arr, axis=axis)
return rmax
[docs] def getWeights(self):
"""Returns the weights of the signature."""
return self._weights
[docs] def setWeights(self, weights):
"""Sets the weights of the signature."""
self._weights = weights
weights = property(getWeights, setWeights)
[docs] def getArray(self):
"""Returns the signature as an numpy array."""
return np.asarray(self)
def transpose(self, axes=None):
a = np.asarray(self)
return np.transpose(a, axes=axes)
[docs]def calcEnsembleENMs(ensemble, model='gnm', trim='reduce', n_modes=20, **kwargs):
"""Calculates normal modes for each member of *ensemble*.
:arg ensemble: normal modes of whose members to be computed
:type ensemble: :class:`.PDBEnsemble`
:arg model: type of ENM that will be performed. It can be either 'anm'
or 'gnm'
:type model: str
:arg trim: type of method that will be used to trim the model. It can
be either 'trim' , 'slice', or 'reduce'. If set to 'trim', the parts
that is not in the selection will simply be removed
:type trim: str
:arg n_modes: number of modes to be computed
:type trim: int
:arg turbo: if **True** then the computation will be performed in parallel.
The number of threads is set to be the same as the number of
CPUs. Assigning a number to specify the number of threads to be
used. Default is **False**
:type turbo: bool, int
:arg match: whether the modes should be matched using :func:`.matchModes`.
Default is **True**
:type match: bool
:arg method: the alternative function that is used to match the modes.
Default is **None**
:type method: function
:arg turbo: whether use :class:`~multiprocessing.Pool` to accelerate the computation.
Note that if writing a script, ``if __name__ == '__main__'`` is necessary
to protect your code when multi-tasking.
See https://docs.python.org/2/library/multiprocessing.html for details.
Default is **False**
:type turbo: bool
:returns: :class:`.ModeEnsemble`
"""
match = kwargs.pop('match', True)
method = kwargs.pop('method', None)
turbo = kwargs.pop('turbo', False)
if isinstance(ensemble, Conformation):
conformation = ensemble
ensemble = conformation.getEnsemble()
index = conformation.getIndex()
ensemble = ensemble[index:index+1]
if model is GNM:
model_type = 'GNM'
elif model is ANM:
model_type = 'ANM'
else:
model_type = str(model).strip().upper()
start = time.time()
select = ensemble.getIndices()
labels = ensemble.getLabels()
n_atoms = ensemble.numAtoms(selected=False)
if select is None:
torf_selected = np.ones(n_atoms, dtype=bool)
else:
torf_selected = np.zeros(n_atoms, dtype=bool)
torf_selected[select] = True
### ENMs ###
## ENM for every conf
enms = []
n_confs = ensemble.numConfs()
str_modes = 'all' if n_modes is None else str(n_modes)
LOGGER.progress('Calculating {0} {1} modes for {2} conformations...'
.format(str_modes, model_type, n_confs), n_confs, '_prody_calcEnsembleENMs')
coordsets = ensemble.getCoordsets(selected=False)
weights = ensemble.getWeights(selected=False)
for i in range(n_confs):
LOGGER.update(i, label='_prody_calcEnsembleENMs')
coords = coordsets[i]
if weights.ndim == 3:
weight = weights[i].flatten()
else:
weight = weights.flatten()
torf_mapped = weight != 0
coords = coords[torf_mapped, :]
system = torf_selected[torf_mapped]
mask = torf_mapped[torf_selected]
enm, _ = calcENM(coords, system, model=model, mask=mask, trim=trim,
n_modes=n_modes, title=labels[i], **kwargs)
enm.masked = False
enms.append(enm)
#lbl = labels[i] if labels[i] != '' else '%d-th conformation'%(i+1)
LOGGER.finish()
min_n_modes = ensemble.numAtoms() * 3
for enm in enms:
n_modes = enm.numModes()
if n_modes < min_n_modes:
min_n_modes = n_modes
for i in range(len(enms)):
n_modes = enms[i].numModes()
if n_modes > min_n_modes:
enms[i] = enms[i][:min_n_modes]
LOGGER.warn('last {0} modes for {1} has been discarded because at least one '
'conformation has only {2} modes'.format(n_modes-min_n_modes,
enms[i].getTitle(), min_n_modes))
if n_modes > 1:
LOGGER.info('{0} {1} modes were calculated for each of the {2} conformations in {3:.2f}s.'
.format(str_modes, model_type, n_confs, time.time()-start))
else:
LOGGER.info('{0} {1} mode was calculated for each of the {2} conformations in {3:.2f}s.'
.format(str_modes, model_type, n_confs, time.time()-start))
modeens = ModeEnsemble(title=ensemble.getTitle())
modeens.addModeSet(enms, weights=ensemble.getWeights(),
label=ensemble.getLabels())
modeens.setAtoms(ensemble.getAtoms())
if match:
modeens.match(turbo=turbo, method=method)
return modeens
def _getEnsembleENMs(ensemble, **kwargs):
if isinstance(ensemble, (Ensemble, Conformation)):
enms = calcEnsembleENMs(ensemble, **kwargs)
elif isinstance(ensemble, ModeEnsemble):
enms = ensemble
else:
try:
enms = ModeEnsemble()
enms.addModeSet(ensemble)
except TypeError:
raise TypeError('ensemble must be an Ensemble or a ModeEnsemble instance,'
'or a list of NMA, Mode, or ModeSet instances.')
return enms
[docs]def calcEnsembleSpectralOverlaps(ensemble, distance=False, turbo=False, **kwargs):
"""Calculate the spectral overlaps between each pair of conformations in the
*ensemble*.
:arg ensemble: an ensemble of structures or ENMs
:type ensemble: :class: `Ensemble`, :class: `ModeEnsemble`
:arg distance: if set to **True**, spectral overlap will be converted to spectral
distance via arccos.
:type distance: bool
:arg turbo: if **True**, extra memory will be used to remember previous calculation
results to accelerate the next calculation, so this option is particularly
useful if spectral overlaps of the same ensemble are calculated repeatedly,
e.g. using different number of modes. Note that for single calculation,
*turbo* will compromise the speed.
Default is **False**
:type turbo: bool
"""
enms = _getEnsembleENMs(ensemble, **kwargs)
overlaps = np.ones((len(enms), len(enms)))
for i in range(enms.numModeSets()):
for j in range(i+1, enms.numModeSets()):
covlap = calcSpectralOverlap(enms[i, :], enms[j, :], turbo=turbo)
overlaps[i, j] = overlaps[j, i] = covlap
if distance:
overlaps = np.arccos(overlaps)
return overlaps
[docs]def calcSignatureSqFlucts(mode_ensemble, **kwargs):
"""
Get the signature square fluctuations of *mode_ensemble*.
:arg mode_ensemble: an ensemble of ENMs
:type mode_ensemble: :class: `ModeEnsemble`
:keyword norm: whether to normalize the square fluctuations. Default is **True**
:type norm: bool
:keyword scale: whether to rescale the square fluctuations based on the reference.
Default is **False**
:type scale: bool
"""
if not isinstance(mode_ensemble, ModeEnsemble):
raise TypeError('mode_ensemble should be an instance of ModeEnsemble')
if not mode_ensemble.isMatched():
LOGGER.warn('modes in mode_ensemble did not match cross modesets. '
'Consider running mode_ensemble.match() prior to using this function')
ifnorm = kwargs.pop('norm', True)
ifscale = kwargs.pop('scale', False)
norm = importLA().norm
V = []
for i, modes in enumerate(mode_ensemble):
sqfs = calcSqFlucts(modes)
if ifnorm:
sqfs = div0(sqfs, norm(sqfs))
elif ifscale:
if i == 0:
norm0 = norm(sqfs)
else:
sqfs = div0(sqfs, norm(sqfs) * norm0)
V.append(sqfs)
V = np.vstack(V)
title_str = '%d modes'%mode_ensemble.numModes()
weights = mode_ensemble.getWeights()
if weights is not None:
weights = weights[:, :, 0]
labels = mode_ensemble.getLabels()
# even the original model is 3d, sqfs are still 1d
sig = sdarray(V, title=title_str, weights=weights, labels=labels, is3d=False)
return sig
[docs]def showSignatureAtomicLines(y, std=None, min=None, max=None, atoms=None, **kwargs):
"""
Show the signature dynamics data using :func:`showAtomicLines`.
:arg y: the mean values of signature dynamics to be plotted
:type y: :class:`~numpy.ndarray`
:arg std: the standard deviations of signature dynamics to be plotted
:type std: :class:`~numpy.ndarray`
:arg min: the minimum values of signature dynamics to be plotted
:type min: :class:`~numpy.ndarray`
:arg max: the maximum values of signature dynamics to be plotted
:type max: :class:`~numpy.ndarray`
:arg linespec: line specifications that will be passed to :func:`showAtomicLines`
:type linespec: str
:arg atoms: an object with method :meth:`getResnums` for use
on the x-axis.
:type atoms: :class:`Atomic`
"""
from matplotlib.pyplot import figure, plot, fill_between, \
gca, xlabel, ylabel, title, ylim
linespec = kwargs.pop('linespec', '-')
zero_line = kwargs.pop('zero_line', False)
lines, polys, bars, texts = showAtomicLines(y, atoms=atoms, dy=std, lower=max, upper=min,
linespec=linespec, show_zero=zero_line, **kwargs)
return lines, polys, bars, texts
[docs]def showSignature1D(signature, linespec='-', **kwargs):
"""
Show *signature* using :func:`showAtomicLines`.
:arg signature: the signature dynamics to be plotted
:type signature: :class:`sdarray`
:arg linespec: line specifications that will be passed to :func:`showAtomicLines`
:type linespec: str
:arg atoms: an object with method :func:`getResnums` for use
on the x-axis.
:type atoms: :class:`Atomic`
:arg alpha: the transparency of the band(s).
:type alpha: float
:arg range: whether shows the minimum and maximum values.
Default is **True**
:type range: bool
"""
from matplotlib.pyplot import figure, plot, fill_between, \
gca, xlabel, ylabel, title, ylim
V = signature
meanV, stdV, minV, maxV = V.mean(), V.std(), V.min(), V.max()
atoms = kwargs.pop('atoms', None)
zero_line = kwargs.pop('show_zero', False)
zero_line = kwargs.pop('zero', zero_line)
show_range = kwargs.pop('range', True)
bars = []; polys = []; lines = []; texts = []
if V.is3d():
meanV = np.reshape(meanV, (V.numAtoms(), 3)).T
stdV = np.reshape(stdV, (V.numAtoms(), 3)).T
minV = np.reshape(minV, (V.numAtoms(), 3)).T
maxV = np.reshape(maxV, (V.numAtoms(), 3)).T
atoms_ = None; zero_line_ = False
for i in range(3):
if i == 2:
atoms_ = atoms
zero_line_ = zero_line
if not show_range:
minV[i] = maxV[i] = None
_lines, _polys, _bars, _texts = showSignatureAtomicLines(meanV[i], stdV[i], minV[i], maxV[i],
atoms=atoms_, zero_line=zero_line_,
linespec=linespec, **kwargs)
lines.extend(_lines)
bars.extend(_bars)
polys.extend(_polys)
texts.extend(_texts)
else:
if not show_range:
minV = maxV = None
_lines, _polys, _bars, _texts = showSignatureAtomicLines(meanV, stdV, minV, maxV,
atoms=atoms, zero_line=zero_line,
linespec=linespec, **kwargs)
lines.extend(_lines)
bars.extend(_bars)
polys.extend(_polys)
texts.extend(_texts)
xlabel('Residues')
title('Signature profile of ' + V.getTitle())
return lines, polys, bars, texts
psplot = showSignature1D
[docs]def showSignatureMode(mode_ensemble, **kwargs):
"""Show signature mode profile.
:arg mode_ensemble: mode ensemble from which to extract an eigenvector
If this is not indexed already then index 0 is used by default
:type mode_ensemble: :class:`ModeEnsemble`
:arg atoms: atoms for showing residues along the x-axis
Default option is to use mode_ensemble.getAtoms()
:type atoms: :class:`Atomic`
:arg scale: scaling factor. Default is 1.0
:type scale: float
"""
if not isinstance(mode_ensemble, ModeEnsemble):
raise TypeError('mode_ensemble should be an instance of ModeEnsemble')
if not mode_ensemble.isMatched():
LOGGER.warn('modes in mode_ensemble did not match cross modesets. '
'Consider running mode_ensemble.match() prior to using this function')
atoms = kwargs.pop('atoms', mode_ensemble.getAtoms())
scale = kwargs.pop('scale', 1.0)
mode = mode_ensemble.getEigvec() * scale
show_zero = kwargs.pop('show_zero', True)
return showSignature1D(mode, atoms=atoms, show_zero=show_zero, **kwargs)
[docs]def showSignatureSqFlucts(mode_ensemble, **kwargs):
"""Show signature profile of square fluctations.
:arg mode_ensemble: mode ensemble from which to calculate square fluctutations
:type mode_ensemble: :class:`ModeEnsemble`
:arg atoms: atoms for showing residues along the x-axis
Default option is to use mode_ensemble.getAtoms()
:type atoms: :class:`Atomic`
:arg scale: scaling factor. Default is 1.0
:type scale: float
:arg show_zero: where to show a grey line at y=0
Default is False
:type show_zero: bool
"""
if not isinstance(mode_ensemble, ModeEnsemble):
raise TypeError('mode_ensemble should be an instance of ModeEnsemble')
if not mode_ensemble.isMatched():
LOGGER.warn('modes in mode_ensemble did not match cross modesets. '
'Consider running mode_ensemble.match() prior to using this function')
atoms = kwargs.pop('atoms', mode_ensemble.getAtoms())
scale = kwargs.pop('scale', 1.0)
sqf = calcSignatureSqFlucts(mode_ensemble) * scale
show_zero = kwargs.pop('show_zero', False)
return showSignature1D(sqf, atoms=atoms, show_zero=show_zero, **kwargs)
[docs]def calcSignatureCrossCorr(mode_ensemble, norm=True):
"""Calculate the signature cross-correlations based on a :class:`ModeEnsemble` instance.
:arg mode_ensemble: an ensemble of ENMs
:type mode_ensemble: :class: `ModeEnsemble`
:keyword norm: whether to normalize the cross-correlations. Default is **True**
:type norm: bool
"""
if not isinstance(mode_ensemble, ModeEnsemble):
raise TypeError('mode_ensemble should be an instance of ModeEnsemble')
if not mode_ensemble.isMatched():
LOGGER.warn('modes in mode_ensemble did not match cross modesets. '
'Consider running mode_ensemble.match() prior to using this function')
n_atoms = mode_ensemble.numAtoms()
n_sets = len(mode_ensemble)
C = np.zeros((n_sets, n_atoms, n_atoms))
for i in range(n_sets):
modes = mode_ensemble[i]
c = calcCrossCorr(modes, norm=norm)
C[i, :, :] = c
title_str = '%d modes'%mode_ensemble.numModes()
weights = mode_ensemble.getWeights()
if weights is not None:
W = np.zeros((mode_ensemble.numModeSets(),
mode_ensemble.numAtoms(),
mode_ensemble.numAtoms()))
for i, w in enumerate(weights):
w2 = np.outer(w, w)
W[i, :, :] = w2
labels = mode_ensemble.getLabels()
# even the original model is 3d, cross-correlations are still 1d
sig = sdarray(C, title=title_str, weights=W, labels=labels, is3d=False)
return sig
[docs]def calcSignaturePerturbResponse(mode_ensemble, **kwargs):
"""Calculate the signature perturbation response scanning based on a :class:`ModeEnsemble` instance.
:arg mode_ensemble: an ensemble of ENMs
:type mode_ensemble: :class: `ModeEnsemble`
"""
if not isinstance(mode_ensemble, ModeEnsemble):
raise TypeError('mode_ensemble should be an instance of ModeEnsemble')
if not mode_ensemble.isMatched():
LOGGER.warn('modes in mode_ensemble did not match cross modesets. '
'Consider running mode_ensemble.match() prior to using this function')
n_atoms = mode_ensemble.numAtoms()
n_sets = len(mode_ensemble)
P = np.zeros((n_sets, n_atoms, n_atoms))
E = np.zeros((n_sets, n_atoms))
S = np.zeros((n_sets, n_atoms))
for i in range(n_sets):
modes = mode_ensemble[i]
prs_mat, eff, sen = calcPerturbResponse(modes, **kwargs)
P[i, :, :] = prs_mat
E[i, :] = eff
S[i, :] = sen
title_str = '%d modes'%mode_ensemble.numModes()
weights = mode_ensemble.getWeights()
if weights is not None:
W2 = np.zeros((mode_ensemble.numModeSets(),
mode_ensemble.numAtoms(),
mode_ensemble.numAtoms()))
for i, w in enumerate(weights):
w2 = np.outer(w, w)
W2[i, :, :] = w2
W = weights[:, :, 0]
labels = mode_ensemble.getLabels()
# even the original model is 3d, cross-correlations are still 1d
sig_prs_mat = sdarray(P, title=title_str, weights=W2, labels=labels, is3d=False)
sig_eff = sdarray(E, title=title_str, weights=W, labels=labels, is3d=False)
sig_sen = sdarray(S, title=title_str, weights=W, labels=labels, is3d=False)
return sig_prs_mat, sig_eff, sig_sen
[docs]def calcSignatureCollectivity(mode_ensemble, masses=None):
"""Calculate average collectivities for a ModeEnsemble."""
if not isinstance(mode_ensemble, ModeEnsemble):
raise TypeError('mode_ensemble should be an instance of ModeEnsemble')
if not mode_ensemble.isMatched():
LOGGER.warn('modes in mode_ensemble did not match cross modesets. '
'Consider running mode_ensemble.match() prior to using this function')
n_modes = mode_ensemble.numModes()
n_sets = len(mode_ensemble)
C = np.zeros((n_sets, n_modes))
for i in range(n_sets):
m = mode_ensemble[i]
c = calcCollectivity(m, masses=masses)
C[i, :] = c
title_str = 'collectivities of %d modes'%mode_ensemble.numModes()
labels = mode_ensemble.getLabels()
# even the original model is 3d, cross-correlations are still 1d
sig = sdarray(C, title=title_str, weights=None, labels=labels, is3d=False)
return sig
[docs]def calcSignatureOverlaps(mode_ensemble, diag=True):
"""Calculate average mode-mode overlaps for a ModeEnsemble."""
if not isinstance(mode_ensemble, ModeEnsemble):
raise TypeError('mode_ensemble should be an instance of ModeEnsemble')
if not mode_ensemble.isMatched():
LOGGER.warn('modes in mode_ensemble did not match cross modesets. '
'Consider running mode_ensemble.match() prior to using this function')
n_sets = mode_ensemble.numModeSets()
n_modes = mode_ensemble.numModes()
if diag:
overlaps = np.zeros((n_modes, n_sets, n_sets))
else:
overlaps = np.zeros((n_modes, n_modes, n_sets, n_sets))
for i, modeset_i in enumerate(mode_ensemble):
for j, modeset_j in enumerate(mode_ensemble):
if j >= i:
if diag:
overlaps[:,i,j] = overlaps[:,j,i] = abs(calcOverlap(modeset_i,
modeset_j,
diag=True))
else:
overlaps[:,:,i,j] = overlaps[:,:,j,i] = abs(calcOverlap(modeset_i,
modeset_j,
diag=False))
return overlaps
[docs]def calcSignatureModes(mode_ensemble):
"""Calculate mean eigenvalues and eigenvectors
and return a new GNM or ANM object containing them."""
if not isinstance(mode_ensemble, ModeEnsemble):
raise TypeError('mode_ensemble should be a ModeEnsemble')
if not mode_ensemble.isMatched():
LOGGER.warn('modes in mode_ensemble did not match cross modesets. '
'Consider running mode_ensemble.match() prior to using this function')
eigvecs = mode_ensemble.getEigvecs().mean()
eigvals = mode_ensemble.getEigvals().mean()
if isinstance(mode_ensemble[0].getModel(), GNM):
ret = GNM('mean of ' + mode_ensemble.getTitle())
elif isinstance(mode_ensemble[0].getModel(), ANM):
ret = ANM('mean of ' + mode_ensemble.getTitle())
ret.setEigens(eigvecs, eigvals)
return ret
[docs]def showSignatureOverlaps(mode_ensemble, **kwargs):
"""Show a curve of mode-mode overlaps against mode number
with shades for standard deviation and range
:arg diag: Whether to calculate the diagonal values only.
Default is **False** and :func:`showMatrix` is used.
If set to **True**, :func:`showSignatureAtomicLines` is used.
:type diag: bool
:arg std: Whether to show the standard deviation matrix
when **diag** is **False** (and whole matrix is shown).
Default is **False**, meaning the mean matrix is shown.
"""
diag = kwargs.get('diag', False)
std = kwargs.get('std', False)
from matplotlib.pyplot import xlabel, ylabel
if not isinstance(mode_ensemble, ModeEnsemble):
raise TypeError('mode_ensemble should be an instance of ModeEnsemble')
if not mode_ensemble.isMatched():
LOGGER.warn('modes in mode_ensemble did not match cross modesets. '
'Consider running mode_ensemble.match() prior to using this function')
overlaps = calcSignatureOverlaps(mode_ensemble, diag=diag)
if diag:
r, c = np.triu_indices(overlaps.shape[1], k=1)
overlap_triu = overlaps[:, r, c]
meanV = overlap_triu.mean(axis=1)
stdV = overlap_triu.std(axis=1)
show = showSignatureAtomicLines(meanV, stdV)
xlabel('Mode index')
ylabel('Overlap')
else:
r, c = np.triu_indices(overlaps.shape[2], k=1)
overlap_triu = overlaps[:, :, r, c]
if std:
stdV = overlap_triu.std(axis=-1)
show = showMatrix(stdV)
else:
meanV = overlap_triu.mean(axis=-1)
show = showMatrix(meanV)
return show
[docs]def calcSignatureFractVariance(mode_ensemble):
"""Calculate signature fractional variance for a ModeEnsemble."""
if not isinstance(mode_ensemble, ModeEnsemble):
raise TypeError('mode_ensemble should be an instance of ModeEnsemble')
if not mode_ensemble.isMatched():
LOGGER.warn('modes in mode_ensemble did not match cross modesets. '
'Consider running mode_ensemble.match() prior to using this function')
n_sets = mode_ensemble.numModeSets()
n_modes = mode_ensemble.numModes()
W = []; is3d = None
for i in range(n_sets):
m = mode_ensemble[i]
if n_modes == 1:
var = np.array([calcFractVariance(m)])
else:
var = calcFractVariance(m)
W.append(var)
if is3d is None:
is3d = m.is3d()
title_str = '%d modes'%mode_ensemble.numModes()
labels = mode_ensemble.getLabels()
sig = sdarray(W, title=title_str, weights=None, labels=labels, is3d=is3d)
return sig
[docs]def showSignatureCrossCorr(mode_ensemble, std=False, **kwargs):
"""Show average cross-correlations using :func:`showAtomicMatrix`.
By default, *origin=lower* and *interpolation=bilinear* keyword arguments
are passed to this function, but user can overwrite these parameters.
See also :func:`.calcSignatureCrossCorr`.
:arg ensemble: an ensemble of structures or ENMs, or a signature profile
:type ensemble: :class: `Ensemble`, list, :class:`sdarray`
:arg atoms: an object with method :func:`getResnums` for use
on the x-axis.
:type atoms: :class:`Atomic`
"""
import matplotlib.pyplot as plt
norm = kwargs.pop('norm', True)
C = calcSignatureCrossCorr(mode_ensemble, norm=norm)
atoms = kwargs.pop('atoms', None)
if atoms is None:
try:
atoms = mode_ensemble.getAtoms()
except:
pass
if std:
matrixData = C.std()
else:
matrixData = C.mean()
if not 'interpolation' in kwargs:
kwargs['interpolation'] = 'bilinear'
show = showAtomicMatrix(matrixData, atoms=atoms, **kwargs)
indices = mode_ensemble.getIndices()[0]
if len(indices) == 1:
title_str = ', mode '+str(indices[0]+1)
else:
modeIndexStr = ','.join([str(x+1) for x in indices])
if len(modeIndexStr) > 8:
title_str = ', '+str(len(indices))+' modes '+modeIndexStr[:5]+'...'
else:
title_str = ', modes '+modeIndexStr
# title_str = ', '+str(len(modeIndex))+' modes'
if std:
plt.title('Cross-correlations (standard deviation)'+title_str)
else:
plt.title('Cross-correlations (average)'+title_str)
plt.xlabel('Residues')
plt.ylabel('Residues')
return show
[docs]def showSignatureDistribution(signature, **kwargs):
"""
Show the distribution of signature values using
:func:`~matplotlib.pyplot.hist`.
"""
from matplotlib.pyplot import figure, hist, annotate, legend, xlabel, ylabel
from matplotlib.figure import Figure
fig = kwargs.pop('figure', None)
if isinstance(fig, Figure):
fig_num = fig.number
elif fig is None or isinstance(fig, (int, str)):
fig_num = fig
else:
raise TypeError('figure can be either an instance of matplotlib.figure.Figure '
'or a figure number.')
if SETTINGS['auto_show']:
figure(fig_num)
elif fig_num is not None:
figure(fig_num)
W = signature.getArray()[:, ::-1] # reversed to accommodate with matplotlib.pyplot.hist
weights = np.ones_like(W)/float(len(W))
show_legend = kwargs.pop('legend', True)
bins = kwargs.pop('bins', 'auto')
if bins == 'auto':
_, bins = np.histogram(W.flatten(), bins='auto')
elif np.isscalar(bins) and isinstance(bins, (int, np.integer)):
step = (W.max() - W.min())/bins
bins = np.arange(W.min(), W.max(), step)
histtype = kwargs.pop('histtype', 'stepfilled')
label = kwargs.pop('label', None)
labels = kwargs.pop('labels', label)
weights = kwargs.pop('weights', weights)
n, bins, patches = hist(W, bins=bins, weights=weights,
histtype=histtype, label=labels, **kwargs)
colors = []
for patch_i in patches:
if isinstance(patch_i, list):
colors.append(patch_i[0].get_facecolor())
else:
colors.append(patch_i.get_facecolor())
for i, patch_i in enumerate(patches):
if isinstance(patch_i, list):
for j, patch_j in enumerate(patch_i):
patch_j.set_color(colors[-(i+1)])
if show_legend:
legend()
xlabel('Signature value')
ylabel('Probability')
if SETTINGS['auto_show']:
showFigure()
return n, bins, patches
[docs]def showSignatureVariances(mode_ensemble, **kwargs):
"""
Show the distribution of signature variances using
:func:`showSignatureDistribution`.
"""
from matplotlib.pyplot import xlabel
fract = kwargs.pop('fraction', True)
if fract:
sig = calcSignatureFractVariance(mode_ensemble)
else:
sig = mode_ensemble.getVariances()
indices = np.asarray(mode_ensemble.getIndices())[0]
labels = ['mode %d'%(i+1) for i in indices][::-1]
show = showSignatureDistribution(sig, label=labels, **kwargs)
xlabel('Variance')
return show
[docs]def showSignatureCollectivity(mode_ensemble, **kwargs):
"""
Show the distribution of signature variances using
:func:`showSignatureDistribution`.
"""
from matplotlib.pyplot import xlabel
sig = calcSignatureCollectivity(mode_ensemble)
indices = np.asarray(mode_ensemble.getIndices())[0]
labels = ['mode %d'%(i+1) for i in indices][::-1]
show = showSignatureDistribution(sig, label=labels, **kwargs)
xlabel('Collectivity')
return show
[docs]def showVarianceBar(mode_ensemble, highlights=None, **kwargs):
"""Show the distribution of variances (cumulative if multiple modes) using
:func:`~numpy.histogram`.
:arg mode_ensemble: an ensemble of modes whose variances are displayed
:type mode_ensemble: :class:`.ModeEnsemble`
:arg highlights: labels of conformations whose locations on the bar
will be highlighted by arrows and texts
:type highlights: list
:arg fraction: whether the variances should be weighted or not.
Default is **True**
:type fraction: bool
"""
from matplotlib.pyplot import figure, gca, annotate, subplots_adjust
from matplotlib.pyplot import fill_between, xlabel, yticks, xlim
from matplotlib.figure import Figure
from matplotlib.colors import Normalize, NoNorm
from matplotlib import cm, colors
fig = kwargs.pop('figure', None)
fract = kwargs.pop('fraction', True)
bins = kwargs.pop('bins', 50)
cmap = kwargs.pop('cmap', 'Reds')
if isinstance(fig, Figure):
fig_num = fig.number
elif fig is None or isinstance(fig, (int, str)):
fig_num = fig
else:
raise TypeError('figure can be either an instance of matplotlib.figure.Figure '
'or a figure number.')
if SETTINGS['auto_show']:
if fig_num is None:
figure(figsize=(6, 2))
else:
figure(fig_num)
elif fig_num is not None:
figure(fig_num)
ax = gca()
# adjust layouts
box = ax.get_position()
_, _, _, height = box.bounds
ratio = 2.5
box.y1 = box.y0 + height/ratio
#box.y0 += height/7.
ax.set_position(box)
#defarrow = {'width':1, 'headwidth':2,
# 'facecolor':'black',
# 'headlength': 4}
defarrow = {'arrowstyle': '->'}
arrowprops = kwargs.pop('arrowprops', defarrow)
if fract:
sig = calcSignatureFractVariance(mode_ensemble)
else:
sig = mode_ensemble.getVariances()
variances = sig.getArray().sum(axis=1)
hist, edges = np.histogram(variances, bins=bins)
color_norm = colors.Normalize(vmin=hist.min(), vmax=hist.max())
scalar_map = cm.ScalarMappable(norm=color_norm, cmap=cmap)
colors = scalar_map.to_rgba(hist)
areas = []
for i in range(len(hist)):
x = [edges[i], edges[i+1]]
area = fill_between(x, [0, 0], [1, 1], color=colors[i])
areas.append(area)
if not highlights:
highlights = []
indices = []; labels = []
ens_labels = mode_ensemble.getLabels()
for hl in highlights:
if isinstance(hl, str):
if not ens_labels:
raise TypeError('highlights should be a list of integers because '
'mode_ensemble has no label')
index = ens_labels.index(hl)
if isinstance(highlights, dict):
label = highlights[hl]
else:
label = hl
else:
try:
index = int(hl)
except:
raise TypeError('highlights should be a list of integers or strings')
if isinstance(highlights, dict):
label = highlights[hl]
else:
label = ens_labels[index] if ens_labels else str(index)
indices.append(index)
labels.append(label)
annotations = []
for i, label in zip(indices, labels):
x = variances[i]
an = annotate(label, xy=(x, 1), xytext=(x, ratio), arrowprops=arrowprops)
annotations.append(an)
# for i in range(len(variances)):
# x = variances[i]
# plot([x, x], [0, 1], 'w')
xlabel('Variances')
yticks([])
xlim([variances.min(), variances.max()])
if SETTINGS['auto_show']:
showFigure()
return areas, annotations
[docs]def saveModeEnsemble(mode_ensemble, filename=None, atoms=False, **kwargs):
"""Save *mode_ensemble* as :file:`filename.modeens.npz`. If *filename*
is **None**, title of the *mode_ensemble* will be used as the
filename, after ``" "`` (white spaces) in the title are replaced with
``"_"`` (underscores). Upon successful completion of saving, filename
is returned. This function makes use of :func:`~numpy.savez_compressed`
function."""
if not isinstance(mode_ensemble, ModeEnsemble):
raise TypeError('invalid type for mode_ensemble, {0}'
.format(type(mode_ensemble)))
if len(mode_ensemble) == 0:
raise ValueError('mode_ensemble instance does not contain data')
attr_list = ['_modesets', '_title', '_labels', '_weights', '_matched', '_reweighted']
attr_dict = {}
if atoms:
attr_list.append('_atoms')
for attr in attr_list:
value = getattr(mode_ensemble, attr)
if value is not None:
if attr == '_atoms':
value = [value, None]
if attr == '_modesets':
value = list(value)
value.append(None)
attr_dict[attr] = value
if filename is None:
filename = mode_ensemble.getTitle().replace(' ', '_')
suffix = '.modeens'
if not filename.lower().endswith('.npz'):
if not filename.lower().endswith(suffix):
filename += suffix + '.npz'
else:
filename += '.npz'
ostream = openFile(filename, 'wb', **kwargs)
np.savez_compressed(ostream, **attr_dict)
ostream.close()
return filename
[docs]def loadModeEnsemble(filename, **kwargs):
"""Returns ModeEnsemble instance after loading it from file (*filename*).
This function makes use of :func:`numpy.load` function. See
also :func:`saveModeEnsemble`."""
if not 'encoding' in kwargs:
kwargs['encoding'] = 'latin1'
if not 'allow_pickle' in kwargs:
kwargs['allow_pickle'] = True
data = np.load(filename, **kwargs)
weights = getValue(data, '_weights', None)
labels = getValue(data, '_labels', None)
matched = getValue(data, '_matched', False)
title = getValue(data, '_title', None)
modesets = getValue(data, '_modesets', [])
atoms = getValue(data, '_atoms', [None])[0]
reweighted = getValue(data, '_reweighted', False)
if isinstance(title, np.ndarray):
title = np.asarray(title, dtype=str)
title = str(title)
if isinstance(modesets, np.ndarray):
modesets = modesets.tolist()
while (None in modesets):
modesets.remove(None)
if labels is not None:
char = labels.dtype.char
if char in 'SU' and char != DTYPE:
labels = labels.astype(str)
labels = labels.tolist()
if isinstance(matched, np.ndarray):
matched = matched.tolist()
if isinstance(reweighted, np.ndarray):
reweighted = reweighted.tolist()
modeens = ModeEnsemble(title=title)
modeens._weights = weights
modeens._labels = labels
modeens._matched = matched
modeens._reweighted = reweighted
modeens._modesets = modesets
if atoms is not None:
if isinstance(atoms, AtomGroup):
data = atoms._data
else:
data = atoms._ag._data
for key in data:
arr = data[key]
char = arr.dtype.char
if char in 'SU' and char != DTYPE:
arr = arr.astype(str)
data[key] = arr
modeens._atoms = atoms
return modeens
[docs]def saveSignature(signature, filename=None, **kwargs):
"""Save *signature* as :file:`filename.sdarray.npz`. If *filename*
is **None**, title of the *signature* will be used as the
filename, after ``" "`` (white spaces) in the title are replaced with
``"_"`` (underscores). Upon successful completion of saving, filename
is returned. This function makes use of :func:`~numpy.savez_compressed`
function."""
if not isinstance(signature, sdarray):
raise TypeError('invalid type for signature, {0}'
.format(type(signature)))
attr_list = ['_title', '_labels', '_is3d', '_weights', '_oneset', '_array']
attr_dict = {}
for attr in attr_list:
if attr == '_array':
value = np.asarray(signature)
else:
value = getattr(signature, attr)
if value is not None:
attr_dict[attr] = value
if filename is None:
filename = signature.getTitle().replace(' ', '_')
suffix = '.sdarray'
if not filename.lower().endswith('.npz'):
if not filename.lower().endswith(suffix):
filename += suffix + '.npz'
else:
filename += '.npz'
ostream = openFile(filename, 'wb', **kwargs)
np.savez_compressed(ostream, **attr_dict)
ostream.close()
return filename
[docs]def loadSignature(filename, **kwargs):
"""Returns :class:`sdarray` instance after loading it from file (*filename*).
This function makes use of :func:`numpy.load` function. See
also :func:`saveSignature`."""
if not 'encoding' in kwargs:
kwargs['encoding'] = 'latin1'
data = np.load(filename, **kwargs)
weights = getValue(data, '_weights', None)
labels = getValue(data, '_labels', None)
title = getValue(data, '_title', None)
is3d = getValue(data, '_is3d', False)
oneset = getValue(data, '_oneset', False)
array = getValue(data, '_array', None)
if isinstance(title, np.ndarray):
title = np.asarray(title, dtype=str)
title = str(title)
if isinstance(is3d, np.ndarray):
is3d = bool(is3d)
if isinstance(oneset, np.ndarray):
oneset = bool(oneset)
if labels is not None:
labels = labels.tolist()
signature = sdarray(array, weights=weights, labels=labels, title=title,
is3d=is3d, oneset=oneset)
return signature
[docs]def calcSubfamilySpectralOverlaps(mode_ens, subfamily_dict, **kwargs):
"""Calculate average spectral overlaps (or distances) within and between
subfamilies in a mode ensemble defined using a dictionary where each key is an
ensemble member and the associate value is a subfamily name.
To use a range of modes, please index the mode ensemble e.g.
mode_ens=mode_ensemble[:,3:20] to use modes 4 to 20 inclusive.
Alternatively, there is the option to provide first and last
keyword arguments, which would be used as the 3 and 20 above.
:arg mode_ensemble: an ensemble of modes corresponding to a set of modes
for each family member
:type mode_ensemble: :class:`.ModeEnsemble`
:arg subfamily_dict: a dictionary providing a subfamily label for
each family member
:type subfamily_dict: dict
:keyword first: the first index for a range of modes
:type first: int
:keyword last: the last index for a range of modes
:type last: int
:keyword remove_small: whether to remove small subfamilies with
fewer than 4 members. Default is True
:type remove_small: bool
:keyword return_reordered_subfamilies: whether to return the reordered
subfamilies in addition to the matrix. Default is False
type return_reordered_subfamilies: bool
"""
if not isinstance(mode_ens, ModeEnsemble):
raise TypeError('mode_ens should be a mode ensemble')
if not isinstance(subfamily_dict, dict):
raise TypeError('subfamily_dict should be a dictionary')
if any([label not in list(subfamily_dict.keys()) for label in mode_ens.getLabels()]):
raise ValueError('The are member labels in mode_ens with no associated entry in subfamily_dict')
first = kwargs.get('first', 0)
if first is not None:
if not isinstance(first,int):
raise TypeError('first should be an integer')
last = kwargs.get('last', -1)
if last is not None:
if not isinstance(last,int):
raise TypeError('last should be an integer')
try:
mode_ens = mode_ens[:,first:last]
except:
try:
mode_ens = mode_ens[:,first:]
except:
raise ValueError('first is not a valid index for indexing mode_ens')
try:
mode_ens = mode_ens[:,:last]
except:
raise ValueError('last is not a valid index for indexing mode_ens')
first_mode_index = mode_ens.getIndices()[0,0]
last_mode_index = mode_ens.getIndices()[0,-1]
LOGGER.info('The mode range used for this analysis is {0} to {1}'
.format(first_mode_index+1,last_mode_index+1))
tree_labels = mode_ens.getLabels()
distance = kwargs.get('distance',True)
distm = calcEnsembleSpectralOverlaps(mode_ens, distance=distance)
subfamilies = np.unique(list(subfamily_dict.values()))
reverse_dict = dict()
for i in subfamilies:
reverse_dict[i] = []
for i in range(len(tree_labels)):
subfamily_i = subfamily_dict[tree_labels[i]]
if subfamily_i in subfamilies:
reverse_dict[subfamily_i].append(i)
remove_small = kwargs.get('remove_small',True)
if remove_small:
temp_dict = dict()
for key, value in reverse_dict.items():
if len(value) >= 4:
temp_dict[key] = value
reverse_dict = temp_dict
N_group = len(reverse_dict)
subfamily_overlap_matrix = []
for subfamily_i in subfamilies:
index_i = reverse_dict[subfamily_i]
for subfamily_j in subfamilies:
index_j = reverse_dict[subfamily_j]
temp_sub_matrix = distm[np.ix_(index_i, index_j)]
if subfamily_i == subfamily_j:
subfamily_overlap_matrix.append(temp_sub_matrix[np.triu_indices(
np.shape(temp_sub_matrix)[0])].mean())
else:
subfamily_overlap_matrix.append(temp_sub_matrix.mean())
subfamily_overlap_matrix = np.asarray(subfamily_overlap_matrix)
subfamily_overlap_matrix = subfamily_overlap_matrix.reshape([N_group, N_group])
return_reordered_subfamilies = kwargs.get('return_reordered_subfamilies',False)
if return_reordered_subfamilies:
return subfamily_overlap_matrix, subfamilies
return subfamily_overlap_matrix
[docs]def showSubfamilySpectralOverlaps(mode_ens, subfamily_dict, **kwargs):
"""Calculate and show the matrix of spectral overlaps or distances averaged
over subfamilies. Inputs are the same as calcSubfamilySpectralOverlaps
plus the following and those of showDomainBar if you wish.
:keyword show_subfamily_bar: whether to show the subfamilies as colored bars
using showDomainBar. Default is False
:type show_subfamily_bar: bool
"""
kwargs['return_reordered_subfamilies'] = True
subfamily_overlap_matrix, subfamilies = calcSubfamilySpectralOverlaps(mode_ens, subfamily_dict, **kwargs)
show = showMatrix(subfamily_overlap_matrix, origin='lower',
xticklabels=subfamilies, yticklabels=subfamilies,
allticks=True, vmin=0., vmax=1.6)
show_subfamily_bar = kwargs.get('show_subfamily_bar',False)
text = kwargs.pop('text',False)
if show_subfamily_bar:
showDomainBar(subfamilies, axis='x', text=text, **kwargs)
showDomainBar(subfamilies, axis='y', text=text, **kwargs)
if SETTINGS['auto_show']:
showFigure()
return show