Source code for prody.dynamics.signature

# -*- coding: utf-8 -*-
"""This module defines functions for analyzing normal modes obtained 
for conformations in an ensemble."""

import time
from numbers import Integral
from numpy import ndarray
import numpy as np
import warnings

from prody import LOGGER, SETTINGS
from prody.utilities import showFigure, showMatrix, copy, checkWeights, openFile, DTYPE
from prody.utilities import getValue, importLA, wmean, div0
from prody.ensemble import Ensemble, Conformation
from prody.atomic import AtomGroup

from .nma import NMA
from .modeset import ModeSet
from .mode import Mode, Vector
from .functions import calcENM
from .compare import calcSpectralOverlap, matchModes, calcOverlap

from .analysis import calcSqFlucts, calcCrossCorr, calcFractVariance, calcCollectivity
from .plotting import showAtomicLines, showAtomicMatrix, showDomainBar
from .perturb import calcPerturbResponse
from .anm import ANM
from .gnm import GNM

__all__ = ['ModeEnsemble', 'sdarray', 'calcEnsembleENMs', 
           'showSignature1D', 'psplot', 'showSignatureAtomicLines', 
           'showSignatureMode', 'showSignatureDistribution', 'showSignatureCollectivity',
           'showSignatureSqFlucts', 'calcEnsembleSpectralOverlaps', 'calcSignatureSqFlucts', 
           'calcSignatureCollectivity', 'calcSignatureFractVariance', 'calcSignatureModes', 
           'calcSignatureCrossCorr', 'showSignatureCrossCorr', 'showVarianceBar',
           'showSignatureVariances', 'calcSignatureOverlaps', 'showSignatureOverlaps',
           'saveModeEnsemble', 'loadModeEnsemble', 'saveSignature', 'loadSignature',
           'calcSubfamilySpectralOverlaps','showSubfamilySpectralOverlaps', 'calcSignaturePerturbResponse']

[docs]class ModeEnsemble(object): """ A collection of ENMs calculated for conformations in an :class:`Ensemble`. or :class:`PDBEnsemble`. """ __slots__ = ['_modesets', '_title', '_labels', '_atoms', '_weights', '_matched', '_reweighted'] def __init__(self, title=None): self._modesets = [] self._title = 'Unknown' if title is None else title self._labels = None self._atoms = None self._weights = None self._matched = False self._reweighted = False def __len__(self): """Returns the number of modesets.""" return len(self._modesets) def __iter__(self): for modeset in self._modesets: yield modeset iterModeSets = __iter__ def iterModes(self): for i in range(self.numModes()): modesets = [] for modeset in self._modesets: modesets.append(modeset[i]) ens = ModeEnsemble(self.getTitle()) ens._modesets = modesets yield ens def __repr__(self): if self.numModeSets() == 1: modesets_str = '1 modeset' else: modesets_str = '{0} modesets'.format(self.numModeSets()) if self.numModes() == 1: modes_str = '1 mode' else: modes_str = '{0} modes'.format(self.numModes()) if self.numAtoms() == 1: atoms_str = '1 atom' else: atoms_str = '{0} atoms'.format(self.numAtoms()) return '<ModeEnsemble: {0} ({1}, {2})>'\ .format(modesets_str, modes_str, atoms_str) def __str__(self): return self.getTitle() def _getitembase(self, modeset_index, mode_index): if isinstance(modeset_index, slice): modesets = self._modesets[modeset_index] modeset_indices = modeset_index labels = None if self._labels is None else self._labels[modeset_index] matched = self.getMatchingStatus()[modeset_index] reweighted = self.getReweightingStatus()[modeset_index] elif not np.isscalar(modeset_index): modesets = []; modeset_indices = []; labels = []; matched = []; reweighted = [] for i in modeset_index: if isinstance(i, Integral): j = i elif isinstance(i, str): try: j = self._labels.index(i) except: raise IndexError('invalid label: %s'%i) else: raise IndexError('all indices must be integers or strings (labels)') modesets.append(self._modesets[j]) modeset_indices.append(j) if self._labels is not None: labels.append(self._labels[j]) allstatus = self.getMatchingStatus() matched.append(allstatus[j]) allstatus = self.getReweightingStatus() reweighted.append(allstatus[j]) else: if isinstance(modeset_index, Integral): pass elif isinstance(modeset_index, str): try: modeset_index = self._labels.index(modeset_index) except: raise IndexError('invalid label: %s'%modeset_index) else: raise IndexError('indices must be int, slice, or array-like objects') return self._modesets[modeset_index][mode_index] if np.isscalar(mode_index): mode_index = [mode_index] for i in range(len(modesets)): modesets[i] = modesets[i][mode_index] if self._weights is None: weights = None else: weights = self._weights[modeset_indices, :, :] ens = ModeEnsemble(title=self.getTitle()) ens.addModeSet(modesets, weights=weights, label=labels, matched=matched, reweighted=reweighted) ens.setAtoms(self.getAtoms()) return ens def __getitem__(self, index): """A list or tuple of integers can be used for indexing.""" modeset_index = slice(None, None, None) mode_index = slice(None, None, None) if isinstance(index, tuple): if len(index) >= 1: modeset_index = index[0] if len(index) >= 2: mode_index = index[1] if len(index) >= 3: raise ValueError('ModeEnsemble indexing supports up to two arguments') else: modeset_index = index ens = self._getitembase(modeset_index, mode_index) return ens def __add__(self, other): """Concatenate two mode ensembles. """ if not isinstance(other, ModeEnsemble): raise TypeError('an ModeEnsemble instance cannot be added to an {0} ' 'instance'.format(type(other))) if self.numAtoms() != other.numAtoms(): raise ValueError('Ensembles must have same number of atoms.') if self.numModes() != other.numModes(): raise ValueError('Ensembles must have same number of modes.') ensemble = ModeEnsemble('{0} + {1}'.format(self.getTitle(), other.getTitle())) ensemble.addModeSet(self._modesets, weights=self._weights, label=self._labels, matched=self.getMatchingStatus(), reweighted=self.getReweightingStatus()) ensemble.setAtoms(self.getAtoms()) ensemble.addModeSet(other.getModeSets(), weights=other.getWeights(), label=other.getLabels(), matched=other.getMatchingStatus(), reweighted=other.getReweightingStatus()) return ensemble
[docs] def is3d(self): """Returns **True** is model is 3-dimensional.""" if self._modesets: return self._modesets[0].is3d() return False
[docs] def numAtoms(self): """Returns number of atoms.""" if self._modesets: return self._modesets[0].numAtoms() return 0
[docs] def numModes(self): """Returns number of modes in the instance (not necessarily maximum number of possible modes).""" if self._modesets: return self._modesets[0].numModes() return 0
[docs] def numModeSets(self): """Returns number of modesets in the instance.""" return len(self)
[docs] def getTitle(self): """Returns title of the signature.""" return self._title
[docs] def getWeights(self): """Returns a copy of weights.""" return copy(self._weights)
[docs] def setWeights(self, weights): """Set atomic weights.""" n_atoms = self.numAtoms() n_modesets = self.numModeSets() if n_atoms > 0 and n_modesets > 0: self._weights = checkWeights(weights, n_atoms, n_modesets) else: raise RuntimeError('modesets must be set before weights can be assigned')
[docs] def getModeSets(self, index=None): """ Returns the modeset of the given index. If index is **None** then all modesets are returned. """ if index is None: return self._modesets subset = self[index] if isinstance(subset, ModeSet): return subset else: return subset._modesets
[docs] def getArray(self, mode_index=0): """Returns a sdarray of row arrays.""" if np.isscalar(mode_index): sig = self.getEigvec(mode_index) else: sig = self.getEigvecs(mode_index) return sig
_getArray = getArray def _getModeData(self, name, mode_index=0, weights=None, sign_correction=False): modesets = self._modesets V = [] for i, modeset in enumerate(modesets): mode = modeset[mode_index] func = getattr(mode, name) v = func() if sign_correction: if i == 0: v0 = v else: c = np.dot(v, v0) if c < 0: v *= -1 V.append(v) is3d = mode.is3d() V = np.vstack(V) mode_num = modesets[0][mode_index].getIndex() title = 'mode %d'%(mode_num+1) sig = sdarray(V, title=title, weights=weights, labels=self.getLabels(), is3d=is3d) return sig def _getData(self, name, mode_indices=None, weights=None, sign_correction=False): modesets = self._modesets V = [] for i, modeset in enumerate(modesets): modes = modeset if mode_indices is None else modeset[mode_indices] func = getattr(modes, name) vecs = func() if sign_correction: if i == 0: vecs0 = vecs else: c = np.sign(np.dot(vecs.T, vecs0)) c = np.diag(np.diag(c)) vecs = np.dot(vecs, c) V.append(vecs) is3d = modeset.is3d() V = np.array(V) title = '%d modes'%len(V) sig = sdarray(V, title=title, weights=weights, labels=self.getLabels(), is3d=is3d) return sig
[docs] def getEigvec(self, mode_index=0, sign_correction=True): """Returns a sdarray of eigenvector across modesets.""" weights = self.getWeights() if weights is not None: weights = weights[:, :, 0] if self.is3d(): weights = np.repeat(weights, 3, axis=1) return self._getModeData('getEigvec', mode_index, weights=weights, sign_correction=True)
[docs] def getEigvecs(self, mode_indices=None, sign_correction=True): """Returns a sdarray of eigenvectors across modesets.""" weights = self.getWeights() if weights is not None: weights = weights[:, :, 0] if mode_indices is not None: n_modes = len(mode_indices) else: n_modes = self.numModes() W = [weights.copy() for _ in range(n_modes)] # almost equivalent to W = [weights]*n_modes W = np.stack(W, axis=-1) if self.is3d(): W = np.repeat(W, 3, axis=1) return self._getData('getEigvecs', mode_indices, weights=W, sign_correction=True)
[docs] def getVariance(self, mode_index=0): """Returns variances of a given mode index with respect to the reference.""" return self._getModeData('getVariance', mode_index)
[docs] def getVariances(self, mode_indices=None): """Returns a sdarray of variances across modesets.""" return self._getData('getVariances', mode_indices)
[docs] def getEigval(self, mode_index=0): """Returns eigenvalue of a given mode index with respect to the reference.""" return self._getModeData('getEigval', mode_index)
[docs] def getEigvals(self, mode_indices=None): """Returns a sdarray of eigenvalues across modesets.""" return self._getData('getEigvals', mode_indices)
[docs] def getIndex(self, mode_index=0): """Returns indices of modes matched to the reference modeset.""" return self._getModeData('getIndex', mode_index)
[docs] def getIndices(self, mode_indices=None): """Returns indices of modes in the mode ensemble.""" return self._getData('getIndices', mode_indices)
[docs] def getAtoms(self): """Returns associated atoms of the mode ensemble.""" return self._atoms
[docs] def setAtoms(self, atoms): """Sets the atoms of the mode ensemble.""" if atoms is not None and self._atoms is not None: if len(atoms) != self.numAtoms(): raise ValueError('atoms should have %d atoms'%self.numAtoms()) self._atoms = atoms
[docs] def getLabels(self): """Returns the labels of the mode ensemble.""" return self._labels
[docs] def setLabels(self, labels): """Returns the labels of the mode ensemble.""" if len(labels) != self.numModeSets(): raise ValueError('the number of labels and mode sets mismatch') self._labels = labels
[docs] def getMatchingStatus(self): """Returns the matching status of each mode ensemble.""" if isinstance(self._matched, bool): matched = [self._matched for _ in self] else: matched = self._matched return matched
[docs] def setMatchingStatus(self, status): """Returns the matching status of each mode ensemble.""" if not isinstance(status, bool): if len(status) != self.numModeSets(): raise ValueError('the number of status and mode sets mismatch') self._matched = status
[docs] def getReweightingStatus(self): """Returns the reweighting status of each mode ensemble.""" if isinstance(self._reweighted, bool): reweighted = [self._reweighted for _ in self] else: reweighted = self._reweighted return reweighted
[docs] def setReweightingStatus(self, status): """Returns the reweighting status of each mode ensemble.""" if not isinstance(status, bool): if len(status) != self.numModeSets(): raise ValueError('the number of status and mode sets mismatch') self._reweighted = status
[docs] def match(self, turbo=False, method=None): """Matches the modes across mode sets according the mode overlaps. :arg turbo: if **True** then the computation will be performed in parallel. The number of threads is set to be the same as the number of CPUs. Assigning a number to specify the number of threads to be used. Default is **False** :type turbo: bool, int """ if self._modesets: #LOGGER.debug('Matching {0} modes across {1} modesets...' # .format(self.numModes(), self.numModeSets())) start = time.time() matched = self.getMatchingStatus() if np.any(matched): matched[0] = False indices = [i for i in range(len(matched)) if not matched[i]] modesets = [self._modesets[i] for i in indices] modesets = matchModes(*modesets, turbo=turbo, method=method) for n, i in enumerate(indices): if n > 0: self._modesets[i] = modesets[n] n_modesets = len(modesets) else: # if all not matched, start from scratch self._modesets = matchModes(*self._modesets, turbo=turbo, method=method) n_modesets = len(self._modesets) LOGGER.debug('{0} modes across {1} modesets were matched in {2:.2f}s.' .format(self.numModes(), n_modesets, time.time()-start)) else: LOGGER.warn('Mode ensemble has no modesets') self._matched = True return
[docs] def reweight(self): """Reweight the modes based on matched orders""" self._reweighted = [] for i in range(self.numModeSets()): modes = self._modesets[i] model = modes._model I = modes.getIndices() # matched order for the subset of modes J = np.sort(I) # original order for the subset V = model.getEigvecs() W = model.getEigvals() W[I] = W[J] model.setEigens(V, W) self._reweighted.append(True)
[docs] def undoMatching(self): """Restores the original orders of modes""" n_modes = self.numModes() matched = self.getMatchingStatus() for i, enm in enumerate(self): if matched[i]: model = enm._model self._modesets[i] = model[:n_modes] self.setMatchingStatus(False)
[docs] def undoReweighting(self): """Restores the original weighting of modes""" reweighted = self.getReweightingStatus() for i, enm in enumerate(self): model = enm._model if reweighted[i]: vars = model.getVariances() I = np.argsort(vars)[::-1] V = model.getEigvecs() W = model.getEigvals() W = W[I] model.setEigens(V, W) self.setReweightingStatus(False)
[docs] def reorder(self): """Reorders the modes across mode sets according to their collectivity""" if not self.isMatched(): LOGGER.warn('Mode ensemble has not been all matched') else: vals = self.getEigvals() mean_val = vals.mean() order = np.argsort(mean_val) ret = [] for modeset in self._modesets: ret.append(ModeSet(modeset.getModel(), order)) self._modesets = ret
[docs] def addModeSet(self, modeset, weights=None, label=None, matched=False, reweighted=False): """Adds a modeset or modesets to the mode ensemble.""" if isinstance(modeset, (NMA, ModeSet, Mode)): modesets = [modeset] else: modesets = modeset matchingstatus = self.getMatchingStatus() if isinstance(matched, bool): matched = [matched for _ in range(len(modesets))] matchingstatus.extend(matched) self._matched = matchingstatus reweightingstatus = self.getReweightingStatus() if isinstance(reweighted, bool): reweighted = [reweighted for _ in range(len(modesets))] reweightingstatus.extend(reweighted) self._reweighted = reweightingstatus if self._labels is not None or label is not None: if label is None: labels = ['']*len(modesets) elif np.isscalar(label): labels = [label] else: labels = label if len(labels) != len(modesets): raise ValueError('labels should have the same length as modesets') if self._labels is None: self._labels = ['']*len(self._modesets) self._labels.extend(labels) for i in range(len(modesets)): modeset = modesets[i] if isinstance(modeset, NMA): modeset = modeset[:] elif isinstance(modeset, Mode): modeset = ModeSet(modeset.getModel(), [modeset.getIndex()]) if not isinstance(modeset, ModeSet): raise TypeError('modesets should be a list of Mode or ModeSet instances') if self._modesets: if modeset.numAtoms() != self.numAtoms(): raise ValueError('to be added, modesets should contain exactly %d atoms'%self.numAtoms()) if modeset.numModes() < self.numModes(): raise ValueError('to be added, modesets should contain at least %d modes'%self.numModes()) if modeset.numModes() > self.numModes(): modeset = modeset[:self.numModes()] self._modesets.append(modeset) else: self._modesets = [modeset] if weights is None: weights = np.ones((len(modesets), self.numAtoms(), 1)) if self._weights is not None: self._weights = np.concatenate((self._weights, weights), axis=0) else: self._weights = weights else: weights = checkWeights(weights, self.numAtoms(), len(modesets)) if self._weights is None: self._weights = np.ones((self.numModeSets()-len(modesets), self.numAtoms(), 1)) self._weights = np.concatenate((self._weights, weights), axis=0)
[docs] def delModeSet(self, index): """Removes a modeset or modesets from the mode ensemble.""" if np.isscalar(index): index = [index] else: index = list(index) n_modesets = self.numModeSets() for i in reversed(index): self._modesets.pop(i) if self._labels: self._labels.pop(i) if isinstance(self._matched, list): self._matched.pop(i) if isinstance(self._reweighted, list): self._reweighted.pop(i) if self._weights is not None: torf = np.ones(n_modesets, dtype=bool) torf[index] = False self._weights = self._weights[torf, :, :]
[docs] def isMatched(self): """Returns whether the modes are matched across ALL modesets in the mode ensemble""" return np.all(self._matched)
[docs] def isReweighted(self): """Returns whether the modes are matched across ALL modesets in the mode ensemble""" return np.all(self._reweighted)
[docs]class sdarray(ndarray): """ A class for representing a collection of arrays. It is derived from :class:`~numpy.ndarray`, and the first axis is reserved for indexing the collection. :class:`sdarray` functions exactly the same as :class:`~numpy.ndarray`, except that :meth:`sdarray.mean`, :meth:`sdarray.std`, :meth:`sdarray.max`, :meth:`sdarray.min` are overriden. Average, standard deviation, minimum, maximum, etc. are weighted and calculated over the first axis by default. "sdarray" stands for "signature dynamics array". Note for developers: please read the following article about subclassing :class:`~numpy.ndarray` before modifying this class: https://docs.scipy.org/doc/numpy-1.14.0/user/basics.subclassing.html """ __slots__ = ['_title', '_labels', '_is3d', '_weights', '_oneset'] def __new__(self, array, weights=None, labels=None, title=None, is3d=False, oneset=False): array = np.asarray(array) obj = array.view(self) shape = array.shape if title is None: if oneset: obj._title = '1 array of size {0}'.format(shape) else: obj._title = '{0} array{2} of size {1}'.format( shape[0], shape[1:], 's' if shape[0]>1 else '') else: obj._title = title if labels is not None and np.isscalar(labels): labels = [labels] n_modesets = 1 if oneset else len(array) if len(labels) != n_modesets: raise ValueError('the number of labels does not match the size of the array') obj._labels = labels obj._is3d = is3d if weights is not None: weights = np.asarray(weights) obj._weights = weights obj._oneset = oneset return obj # def __getitem__(self, index): # if isinstance(index, tuple): # index0 = index[0] # index1 = index[1:] # else: # index0 = index # index1 = () # arr = np.asarray(self)[index0] # w = self._weights # if w is not None: # w = w[index0] # if arr.ndim != self.ndim: # arr = np.expand_dims(arr, axis=0) # if w is not None: # w = np.expand_dims(w, axis=0) # new_index = [slice(None, None, None)] # new_index.extend(index1) # arr = arr[new_index] # if w is not None: # w = w[new_index] # labels = self._labels # if labels is not None: # labels = np.array(labels)[index0].tolist() # return sdarray(arr, weights=w, labels=labels, title=self._title, is3d=self.is3d) def __getitem__(self, index): if self._oneset: index0 = 0 else: if isinstance(index, tuple): index0 = index[0] else: index0 = index arr = np.asarray(self)[index] if np.ndim(arr) == 0: return arr oneset = np.isscalar(index0) w = self._weights if w is not None: w = w[index] labels = self._labels if labels is not None: labels = np.array(labels)[index0].tolist() return sdarray(arr, weights=w, labels=labels, title=self._title, is3d=self.is3d, oneset=oneset) def __array_finalize__(self, obj): if obj is None: return self._title = getattr(obj, '_title', None) self._weights = getattr(obj, '_weights', None) self._is3d = getattr(obj, '_is3d', None) self._labels = getattr(obj, '_labels', None) self._oneset = getattr(obj, '_oneset', None) def __str__(self): return self.getTitle()
[docs] def is3d(self): """Returns **True** is model is 3-dimensional.""" return self._is3d
def __repr__(self): arr_repr = ndarray.__repr__(self) weights = self._weights if weights is not None: weights_repr = repr(weights) arr_repr += '\nweights=\n{0}'.format(weights_repr) return arr_repr
[docs] def numAtoms(self): """Returns the number of atoms assuming it is represented by the second axis.""" try: if self._oneset: n_atoms = self.shape[0] else: n_atoms = self.shape[1] if self.is3d(): n_atoms /= 3 return int(n_atoms) except IndexError: LOGGER.warn('{0} is not related to the number of atoms'.format(self.getTitle())) return 0
[docs] def numModeSets(self): """Returns the number of modesets in the instance """ if self._oneset: return 1 return self.shape[0]
[docs] def getTitle(self): """Returns the title of the signature.""" return self._title
[docs] def getLabels(self): """Returns the labels of the signature.""" return self._labels
[docs] def mean(self, axis=0, **kwargs): """Calculates the weighted average of the sdarray over modesets (`axis=0`).""" arr = np.asarray(self) weights = self._weights return wmean(arr, weights, axis)
[docs] def std(self, axis=0, **kwargs): """Calculates the weighted standard deviations of the sdarray over modesets (`axis=0`).""" arr = np.asarray(self) mean = wmean(arr, self._weights, axis) if axis is not None: reps = list(arr.shape) axes = [] if isinstance(axis, tuple): axes.extend(axis) else: axes = [axis] for ax in axes: reps[ax] = 1 mean = np.reshape(mean, reps) variance = wmean((arr - mean)**2, self._weights, axis) return np.sqrt(variance)
[docs] def min(self, axis=0, **kwargs): """Calculates the minimum values of the sdarray over modesets (`axis=0`).""" # np.array instead asarray is used to make sure a copy of the original data is created arr = np.array(self) weights = self._weights if weights is not None: weights = weights.astype(bool) with warnings.catch_warnings(): warnings.simplefilter("ignore") rmin = np.nanmin(arr, axis=axis) return rmin
[docs] def max(self, axis=0, **kwargs): """Calculates the maximum values of the sdarray over modesets (`axis=0`).""" # np.array instead asarray is used to make sure a copy of the original data is created arr = np.array(self) weights = self._weights if weights is not None: weights = weights.astype(bool) arr[~weights] = np.nan with warnings.catch_warnings(): warnings.simplefilter("ignore") rmax = np.nanmax(arr, axis=axis) return rmax
[docs] def getWeights(self): """Returns the weights of the signature.""" return self._weights
[docs] def setWeights(self, weights): """Sets the weights of the signature.""" self._weights = weights
weights = property(getWeights, setWeights)
[docs] def getArray(self): """Returns the signature as an numpy array.""" return np.asarray(self)
def transpose(self, axes=None): a = np.asarray(self) return np.transpose(a, axes=axes)
[docs]def calcEnsembleENMs(ensemble, model='gnm', trim='reduce', n_modes=20, **kwargs): """Calculates normal modes for each member of *ensemble*. :arg ensemble: normal modes of whose members to be computed :type ensemble: :class:`.PDBEnsemble` :arg model: type of ENM that will be performed. It can be either 'anm' or 'gnm' :type model: str :arg trim: type of method that will be used to trim the model. It can be either 'trim' , 'slice', or 'reduce'. If set to 'trim', the parts that is not in the selection will simply be removed :type trim: str :arg n_modes: number of modes to be computed :type trim: int :arg turbo: if **True** then the computation will be performed in parallel. The number of threads is set to be the same as the number of CPUs. Assigning a number to specify the number of threads to be used. Default is **False** :type turbo: bool, int :arg match: whether the modes should be matched using :func:`.matchModes`. Default is **True** :type match: bool :arg method: the alternative function that is used to match the modes. Default is **None** :type method: function :arg turbo: whether use :class:`~multiprocessing.Pool` to accelerate the computation. Note that if writing a script, ``if __name__ == '__main__'`` is necessary to protect your code when multi-tasking. See https://docs.python.org/2/library/multiprocessing.html for details. Default is **False** :type turbo: bool :returns: :class:`.ModeEnsemble` """ match = kwargs.pop('match', True) method = kwargs.pop('method', None) turbo = kwargs.pop('turbo', False) if isinstance(ensemble, Conformation): conformation = ensemble ensemble = conformation.getEnsemble() index = conformation.getIndex() ensemble = ensemble[index:index+1] if model is GNM: model_type = 'GNM' elif model is ANM: model_type = 'ANM' else: model_type = str(model).strip().upper() start = time.time() select = ensemble.getIndices() labels = ensemble.getLabels() n_atoms = ensemble.numAtoms(selected=False) if select is None: torf_selected = np.ones(n_atoms, dtype=bool) else: torf_selected = np.zeros(n_atoms, dtype=bool) torf_selected[select] = True ### ENMs ### ## ENM for every conf enms = [] n_confs = ensemble.numConfs() str_modes = 'all' if n_modes is None else str(n_modes) LOGGER.progress('Calculating {0} {1} modes for {2} conformations...' .format(str_modes, model_type, n_confs), n_confs, '_prody_calcEnsembleENMs') coordsets = ensemble.getCoordsets(selected=False) weights = ensemble.getWeights(selected=False) for i in range(n_confs): LOGGER.update(i, label='_prody_calcEnsembleENMs') coords = coordsets[i] if weights.ndim == 3: weight = weights[i].flatten() else: weight = weights.flatten() torf_mapped = weight != 0 coords = coords[torf_mapped, :] system = torf_selected[torf_mapped] mask = torf_mapped[torf_selected] enm, _ = calcENM(coords, system, model=model, mask=mask, trim=trim, n_modes=n_modes, title=labels[i], **kwargs) enm.masked = False enms.append(enm) #lbl = labels[i] if labels[i] != '' else '%d-th conformation'%(i+1) LOGGER.finish() min_n_modes = ensemble.numAtoms() * 3 for enm in enms: n_modes = enm.numModes() if n_modes < min_n_modes: min_n_modes = n_modes for i in range(len(enms)): n_modes = enms[i].numModes() if n_modes > min_n_modes: enms[i] = enms[i][:min_n_modes] LOGGER.warn('last {0} modes for {1} has been discarded because at least one ' 'conformation has only {2} modes'.format(n_modes-min_n_modes, enms[i].getTitle(), min_n_modes)) if n_modes > 1: LOGGER.info('{0} {1} modes were calculated for each of the {2} conformations in {3:.2f}s.' .format(str_modes, model_type, n_confs, time.time()-start)) else: LOGGER.info('{0} {1} mode was calculated for each of the {2} conformations in {3:.2f}s.' .format(str_modes, model_type, n_confs, time.time()-start)) modeens = ModeEnsemble(title=ensemble.getTitle()) modeens.addModeSet(enms, weights=ensemble.getWeights(), label=ensemble.getLabels()) modeens.setAtoms(ensemble.getAtoms()) if match: modeens.match(turbo=turbo, method=method) return modeens
def _getEnsembleENMs(ensemble, **kwargs): if isinstance(ensemble, (Ensemble, Conformation)): enms = calcEnsembleENMs(ensemble, **kwargs) elif isinstance(ensemble, ModeEnsemble): enms = ensemble else: try: enms = ModeEnsemble() enms.addModeSet(ensemble) except TypeError: raise TypeError('ensemble must be an Ensemble or a ModeEnsemble instance,' 'or a list of NMA, Mode, or ModeSet instances.') return enms
[docs]def calcEnsembleSpectralOverlaps(ensemble, distance=False, turbo=False, **kwargs): """Calculate the spectral overlaps between each pair of conformations in the *ensemble*. :arg ensemble: an ensemble of structures or ENMs :type ensemble: :class: `Ensemble`, :class: `ModeEnsemble` :arg distance: if set to **True**, spectral overlap will be converted to spectral distance via arccos. :type distance: bool :arg turbo: if **True**, extra memory will be used to remember previous calculation results to accelerate the next calculation, so this option is particularly useful if spectral overlaps of the same ensemble are calculated repeatedly, e.g. using different number of modes. Note that for single calculation, *turbo* will compromise the speed. Default is **False** :type turbo: bool """ enms = _getEnsembleENMs(ensemble, **kwargs) overlaps = np.ones((len(enms), len(enms))) for i in range(enms.numModeSets()): for j in range(i+1, enms.numModeSets()): covlap = calcSpectralOverlap(enms[i, :], enms[j, :], turbo=turbo) overlaps[i, j] = overlaps[j, i] = covlap if distance: overlaps = np.arccos(overlaps) return overlaps
[docs]def calcSignatureSqFlucts(mode_ensemble, **kwargs): """ Get the signature square fluctuations of *mode_ensemble*. :arg mode_ensemble: an ensemble of ENMs :type mode_ensemble: :class: `ModeEnsemble` :keyword norm: whether to normalize the square fluctuations. Default is **True** :type norm: bool :keyword scale: whether to rescale the square fluctuations based on the reference. Default is **False** :type scale: bool """ if not isinstance(mode_ensemble, ModeEnsemble): raise TypeError('mode_ensemble should be an instance of ModeEnsemble') if not mode_ensemble.isMatched(): LOGGER.warn('modes in mode_ensemble did not match cross modesets. ' 'Consider running mode_ensemble.match() prior to using this function') ifnorm = kwargs.pop('norm', True) ifscale = kwargs.pop('scale', False) norm = importLA().norm V = [] for i, modes in enumerate(mode_ensemble): sqfs = calcSqFlucts(modes) if ifnorm: sqfs = div0(sqfs, norm(sqfs)) elif ifscale: if i == 0: norm0 = norm(sqfs) else: sqfs = div0(sqfs, norm(sqfs) * norm0) V.append(sqfs) V = np.vstack(V) title_str = '%d modes'%mode_ensemble.numModes() weights = mode_ensemble.getWeights() if weights is not None: weights = weights[:, :, 0] labels = mode_ensemble.getLabels() # even the original model is 3d, sqfs are still 1d sig = sdarray(V, title=title_str, weights=weights, labels=labels, is3d=False) return sig
[docs]def showSignatureAtomicLines(y, std=None, min=None, max=None, atoms=None, **kwargs): """ Show the signature dynamics data using :func:`showAtomicLines`. :arg y: the mean values of signature dynamics to be plotted :type y: :class:`~numpy.ndarray` :arg std: the standard deviations of signature dynamics to be plotted :type std: :class:`~numpy.ndarray` :arg min: the minimum values of signature dynamics to be plotted :type min: :class:`~numpy.ndarray` :arg max: the maximum values of signature dynamics to be plotted :type max: :class:`~numpy.ndarray` :arg linespec: line specifications that will be passed to :func:`showAtomicLines` :type linespec: str :arg atoms: an object with method :meth:`getResnums` for use on the x-axis. :type atoms: :class:`Atomic` """ from matplotlib.pyplot import figure, plot, fill_between, \ gca, xlabel, ylabel, title, ylim linespec = kwargs.pop('linespec', '-') zero_line = kwargs.pop('zero_line', False) lines, polys, bars, texts = showAtomicLines(y, atoms=atoms, dy=std, lower=max, upper=min, linespec=linespec, show_zero=zero_line, **kwargs) return lines, polys, bars, texts
[docs]def showSignature1D(signature, linespec='-', **kwargs): """ Show *signature* using :func:`showAtomicLines`. :arg signature: the signature dynamics to be plotted :type signature: :class:`sdarray` :arg linespec: line specifications that will be passed to :func:`showAtomicLines` :type linespec: str :arg atoms: an object with method :func:`getResnums` for use on the x-axis. :type atoms: :class:`Atomic` :arg alpha: the transparency of the band(s). :type alpha: float :arg range: whether shows the minimum and maximum values. Default is **True** :type range: bool """ from matplotlib.pyplot import figure, plot, fill_between, \ gca, xlabel, ylabel, title, ylim V = signature meanV, stdV, minV, maxV = V.mean(), V.std(), V.min(), V.max() atoms = kwargs.pop('atoms', None) zero_line = kwargs.pop('show_zero', False) zero_line = kwargs.pop('zero', zero_line) show_range = kwargs.pop('range', True) bars = []; polys = []; lines = []; texts = [] if V.is3d(): meanV = np.reshape(meanV, (V.numAtoms(), 3)).T stdV = np.reshape(stdV, (V.numAtoms(), 3)).T minV = np.reshape(minV, (V.numAtoms(), 3)).T maxV = np.reshape(maxV, (V.numAtoms(), 3)).T atoms_ = None; zero_line_ = False for i in range(3): if i == 2: atoms_ = atoms zero_line_ = zero_line if not show_range: minV[i] = maxV[i] = None _lines, _polys, _bars, _texts = showSignatureAtomicLines(meanV[i], stdV[i], minV[i], maxV[i], atoms=atoms_, zero_line=zero_line_, linespec=linespec, **kwargs) lines.extend(_lines) bars.extend(_bars) polys.extend(_polys) texts.extend(_texts) else: if not show_range: minV = maxV = None _lines, _polys, _bars, _texts = showSignatureAtomicLines(meanV, stdV, minV, maxV, atoms=atoms, zero_line=zero_line, linespec=linespec, **kwargs) lines.extend(_lines) bars.extend(_bars) polys.extend(_polys) texts.extend(_texts) xlabel('Residues') title('Signature profile of ' + V.getTitle()) return lines, polys, bars, texts
psplot = showSignature1D
[docs]def showSignatureMode(mode_ensemble, **kwargs): """Show signature mode profile. :arg mode_ensemble: mode ensemble from which to extract an eigenvector If this is not indexed already then index 0 is used by default :type mode_ensemble: :class:`ModeEnsemble` :arg atoms: atoms for showing residues along the x-axis Default option is to use mode_ensemble.getAtoms() :type atoms: :class:`Atomic` :arg scale: scaling factor. Default is 1.0 :type scale: float """ if not isinstance(mode_ensemble, ModeEnsemble): raise TypeError('mode_ensemble should be an instance of ModeEnsemble') if not mode_ensemble.isMatched(): LOGGER.warn('modes in mode_ensemble did not match cross modesets. ' 'Consider running mode_ensemble.match() prior to using this function') atoms = kwargs.pop('atoms', mode_ensemble.getAtoms()) scale = kwargs.pop('scale', 1.0) mode = mode_ensemble.getEigvec() * scale show_zero = kwargs.pop('show_zero', True) return showSignature1D(mode, atoms=atoms, show_zero=show_zero, **kwargs)
[docs]def showSignatureSqFlucts(mode_ensemble, **kwargs): """Show signature profile of square fluctations. :arg mode_ensemble: mode ensemble from which to calculate square fluctutations :type mode_ensemble: :class:`ModeEnsemble` :arg atoms: atoms for showing residues along the x-axis Default option is to use mode_ensemble.getAtoms() :type atoms: :class:`Atomic` :arg scale: scaling factor. Default is 1.0 :type scale: float :arg show_zero: where to show a grey line at y=0 Default is False :type show_zero: bool """ if not isinstance(mode_ensemble, ModeEnsemble): raise TypeError('mode_ensemble should be an instance of ModeEnsemble') if not mode_ensemble.isMatched(): LOGGER.warn('modes in mode_ensemble did not match cross modesets. ' 'Consider running mode_ensemble.match() prior to using this function') atoms = kwargs.pop('atoms', mode_ensemble.getAtoms()) scale = kwargs.pop('scale', 1.0) sqf = calcSignatureSqFlucts(mode_ensemble) * scale show_zero = kwargs.pop('show_zero', False) return showSignature1D(sqf, atoms=atoms, show_zero=show_zero, **kwargs)
[docs]def calcSignatureCrossCorr(mode_ensemble, norm=True): """Calculate the signature cross-correlations based on a :class:`ModeEnsemble` instance. :arg mode_ensemble: an ensemble of ENMs :type mode_ensemble: :class: `ModeEnsemble` :keyword norm: whether to normalize the cross-correlations. Default is **True** :type norm: bool """ if not isinstance(mode_ensemble, ModeEnsemble): raise TypeError('mode_ensemble should be an instance of ModeEnsemble') if not mode_ensemble.isMatched(): LOGGER.warn('modes in mode_ensemble did not match cross modesets. ' 'Consider running mode_ensemble.match() prior to using this function') n_atoms = mode_ensemble.numAtoms() n_sets = len(mode_ensemble) C = np.zeros((n_sets, n_atoms, n_atoms)) for i in range(n_sets): modes = mode_ensemble[i] c = calcCrossCorr(modes, norm=norm) C[i, :, :] = c title_str = '%d modes'%mode_ensemble.numModes() weights = mode_ensemble.getWeights() if weights is not None: W = np.zeros((mode_ensemble.numModeSets(), mode_ensemble.numAtoms(), mode_ensemble.numAtoms())) for i, w in enumerate(weights): w2 = np.outer(w, w) W[i, :, :] = w2 labels = mode_ensemble.getLabels() # even the original model is 3d, cross-correlations are still 1d sig = sdarray(C, title=title_str, weights=W, labels=labels, is3d=False) return sig
[docs]def calcSignaturePerturbResponse(mode_ensemble, **kwargs): """Calculate the signature perturbation response scanning based on a :class:`ModeEnsemble` instance. :arg mode_ensemble: an ensemble of ENMs :type mode_ensemble: :class: `ModeEnsemble` """ if not isinstance(mode_ensemble, ModeEnsemble): raise TypeError('mode_ensemble should be an instance of ModeEnsemble') if not mode_ensemble.isMatched(): LOGGER.warn('modes in mode_ensemble did not match cross modesets. ' 'Consider running mode_ensemble.match() prior to using this function') n_atoms = mode_ensemble.numAtoms() n_sets = len(mode_ensemble) P = np.zeros((n_sets, n_atoms, n_atoms)) E = np.zeros((n_sets, n_atoms)) S = np.zeros((n_sets, n_atoms)) for i in range(n_sets): modes = mode_ensemble[i] prs_mat, eff, sen = calcPerturbResponse(modes, **kwargs) P[i, :, :] = prs_mat E[i, :] = eff S[i, :] = sen title_str = '%d modes'%mode_ensemble.numModes() weights = mode_ensemble.getWeights() if weights is not None: W2 = np.zeros((mode_ensemble.numModeSets(), mode_ensemble.numAtoms(), mode_ensemble.numAtoms())) for i, w in enumerate(weights): w2 = np.outer(w, w) W2[i, :, :] = w2 W = weights[:, :, 0] labels = mode_ensemble.getLabels() # even the original model is 3d, cross-correlations are still 1d sig_prs_mat = sdarray(P, title=title_str, weights=W2, labels=labels, is3d=False) sig_eff = sdarray(E, title=title_str, weights=W, labels=labels, is3d=False) sig_sen = sdarray(S, title=title_str, weights=W, labels=labels, is3d=False) return sig_prs_mat, sig_eff, sig_sen
[docs]def calcSignatureCollectivity(mode_ensemble, masses=None): """Calculate average collectivities for a ModeEnsemble.""" if not isinstance(mode_ensemble, ModeEnsemble): raise TypeError('mode_ensemble should be an instance of ModeEnsemble') if not mode_ensemble.isMatched(): LOGGER.warn('modes in mode_ensemble did not match cross modesets. ' 'Consider running mode_ensemble.match() prior to using this function') n_modes = mode_ensemble.numModes() n_sets = len(mode_ensemble) C = np.zeros((n_sets, n_modes)) for i in range(n_sets): m = mode_ensemble[i] c = calcCollectivity(m, masses=masses) C[i, :] = c title_str = 'collectivities of %d modes'%mode_ensemble.numModes() labels = mode_ensemble.getLabels() # even the original model is 3d, cross-correlations are still 1d sig = sdarray(C, title=title_str, weights=None, labels=labels, is3d=False) return sig
[docs]def calcSignatureOverlaps(mode_ensemble, diag=True): """Calculate average mode-mode overlaps for a ModeEnsemble.""" if not isinstance(mode_ensemble, ModeEnsemble): raise TypeError('mode_ensemble should be an instance of ModeEnsemble') if not mode_ensemble.isMatched(): LOGGER.warn('modes in mode_ensemble did not match cross modesets. ' 'Consider running mode_ensemble.match() prior to using this function') n_sets = mode_ensemble.numModeSets() n_modes = mode_ensemble.numModes() if diag: overlaps = np.zeros((n_modes, n_sets, n_sets)) else: overlaps = np.zeros((n_modes, n_modes, n_sets, n_sets)) for i, modeset_i in enumerate(mode_ensemble): for j, modeset_j in enumerate(mode_ensemble): if j >= i: if diag: overlaps[:,i,j] = overlaps[:,j,i] = abs(calcOverlap(modeset_i, modeset_j, diag=True)) else: overlaps[:,:,i,j] = overlaps[:,:,j,i] = abs(calcOverlap(modeset_i, modeset_j, diag=False)) return overlaps
[docs]def calcSignatureModes(mode_ensemble): """Calculate mean eigenvalues and eigenvectors and return a new GNM or ANM object containing them.""" if not isinstance(mode_ensemble, ModeEnsemble): raise TypeError('mode_ensemble should be a ModeEnsemble') if not mode_ensemble.isMatched(): LOGGER.warn('modes in mode_ensemble did not match cross modesets. ' 'Consider running mode_ensemble.match() prior to using this function') eigvecs = mode_ensemble.getEigvecs().mean() eigvals = mode_ensemble.getEigvals().mean() if isinstance(mode_ensemble[0].getModel(), GNM): ret = GNM('mean of ' + mode_ensemble.getTitle()) elif isinstance(mode_ensemble[0].getModel(), ANM): ret = ANM('mean of ' + mode_ensemble.getTitle()) ret.setEigens(eigvecs, eigvals) return ret
[docs]def showSignatureOverlaps(mode_ensemble, **kwargs): """Show a curve of mode-mode overlaps against mode number with shades for standard deviation and range :arg diag: Whether to calculate the diagonal values only. Default is **False** and :func:`showMatrix` is used. If set to **True**, :func:`showSignatureAtomicLines` is used. :type diag: bool :arg std: Whether to show the standard deviation matrix when **diag** is **False** (and whole matrix is shown). Default is **False**, meaning the mean matrix is shown. """ diag = kwargs.get('diag', False) std = kwargs.get('std', False) from matplotlib.pyplot import xlabel, ylabel if not isinstance(mode_ensemble, ModeEnsemble): raise TypeError('mode_ensemble should be an instance of ModeEnsemble') if not mode_ensemble.isMatched(): LOGGER.warn('modes in mode_ensemble did not match cross modesets. ' 'Consider running mode_ensemble.match() prior to using this function') overlaps = calcSignatureOverlaps(mode_ensemble, diag=diag) if diag: r, c = np.triu_indices(overlaps.shape[1], k=1) overlap_triu = overlaps[:, r, c] meanV = overlap_triu.mean(axis=1) stdV = overlap_triu.std(axis=1) show = showSignatureAtomicLines(meanV, stdV) xlabel('Mode index') ylabel('Overlap') else: r, c = np.triu_indices(overlaps.shape[2], k=1) overlap_triu = overlaps[:, :, r, c] if std: stdV = overlap_triu.std(axis=-1) show = showMatrix(stdV) else: meanV = overlap_triu.mean(axis=-1) show = showMatrix(meanV) return show
[docs]def calcSignatureFractVariance(mode_ensemble): """Calculate signature fractional variance for a ModeEnsemble.""" if not isinstance(mode_ensemble, ModeEnsemble): raise TypeError('mode_ensemble should be an instance of ModeEnsemble') if not mode_ensemble.isMatched(): LOGGER.warn('modes in mode_ensemble did not match cross modesets. ' 'Consider running mode_ensemble.match() prior to using this function') n_sets = mode_ensemble.numModeSets() n_modes = mode_ensemble.numModes() W = []; is3d = None for i in range(n_sets): m = mode_ensemble[i] if n_modes == 1: var = np.array([calcFractVariance(m)]) else: var = calcFractVariance(m) W.append(var) if is3d is None: is3d = m.is3d() title_str = '%d modes'%mode_ensemble.numModes() labels = mode_ensemble.getLabels() sig = sdarray(W, title=title_str, weights=None, labels=labels, is3d=is3d) return sig
[docs]def showSignatureCrossCorr(mode_ensemble, std=False, **kwargs): """Show average cross-correlations using :func:`showAtomicMatrix`. By default, *origin=lower* and *interpolation=bilinear* keyword arguments are passed to this function, but user can overwrite these parameters. See also :func:`.calcSignatureCrossCorr`. :arg ensemble: an ensemble of structures or ENMs, or a signature profile :type ensemble: :class: `Ensemble`, list, :class:`sdarray` :arg atoms: an object with method :func:`getResnums` for use on the x-axis. :type atoms: :class:`Atomic` """ import matplotlib.pyplot as plt norm = kwargs.pop('norm', True) C = calcSignatureCrossCorr(mode_ensemble, norm=norm) atoms = kwargs.pop('atoms', None) if atoms is None: try: atoms = mode_ensemble.getAtoms() except: pass if std: matrixData = C.std() else: matrixData = C.mean() if not 'interpolation' in kwargs: kwargs['interpolation'] = 'bilinear' show = showAtomicMatrix(matrixData, atoms=atoms, **kwargs) indices = mode_ensemble.getIndices()[0] if len(indices) == 1: title_str = ', mode '+str(indices[0]+1) else: modeIndexStr = ','.join([str(x+1) for x in indices]) if len(modeIndexStr) > 8: title_str = ', '+str(len(indices))+' modes '+modeIndexStr[:5]+'...' else: title_str = ', modes '+modeIndexStr # title_str = ', '+str(len(modeIndex))+' modes' if std: plt.title('Cross-correlations (standard deviation)'+title_str) else: plt.title('Cross-correlations (average)'+title_str) plt.xlabel('Residues') plt.ylabel('Residues') return show
[docs]def showSignatureDistribution(signature, **kwargs): """ Show the distribution of signature values using :func:`~matplotlib.pyplot.hist`. """ from matplotlib.pyplot import figure, hist, annotate, legend, xlabel, ylabel from matplotlib.figure import Figure fig = kwargs.pop('figure', None) if isinstance(fig, Figure): fig_num = fig.number elif fig is None or isinstance(fig, (int, str)): fig_num = fig else: raise TypeError('figure can be either an instance of matplotlib.figure.Figure ' 'or a figure number.') if SETTINGS['auto_show']: figure(fig_num) elif fig_num is not None: figure(fig_num) W = signature.getArray()[:, ::-1] # reversed to accommodate with matplotlib.pyplot.hist weights = np.ones_like(W)/float(len(W)) show_legend = kwargs.pop('legend', True) bins = kwargs.pop('bins', 'auto') if bins == 'auto': _, bins = np.histogram(W.flatten(), bins='auto') elif np.isscalar(bins) and isinstance(bins, (int, np.integer)): step = (W.max() - W.min())/bins bins = np.arange(W.min(), W.max(), step) histtype = kwargs.pop('histtype', 'stepfilled') label = kwargs.pop('label', None) labels = kwargs.pop('labels', label) weights = kwargs.pop('weights', weights) n, bins, patches = hist(W, bins=bins, weights=weights, histtype=histtype, label=labels, **kwargs) colors = [] for patch_i in patches: if isinstance(patch_i, list): colors.append(patch_i[0].get_facecolor()) else: colors.append(patch_i.get_facecolor()) for i, patch_i in enumerate(patches): if isinstance(patch_i, list): for j, patch_j in enumerate(patch_i): patch_j.set_color(colors[-(i+1)]) if show_legend: legend() xlabel('Signature value') ylabel('Probability') if SETTINGS['auto_show']: showFigure() return n, bins, patches
[docs]def showSignatureVariances(mode_ensemble, **kwargs): """ Show the distribution of signature variances using :func:`showSignatureDistribution`. """ from matplotlib.pyplot import xlabel fract = kwargs.pop('fraction', True) if fract: sig = calcSignatureFractVariance(mode_ensemble) else: sig = mode_ensemble.getVariances() indices = np.asarray(mode_ensemble.getIndices())[0] labels = ['mode %d'%(i+1) for i in indices][::-1] show = showSignatureDistribution(sig, label=labels, **kwargs) xlabel('Variance') return show
[docs]def showSignatureCollectivity(mode_ensemble, **kwargs): """ Show the distribution of signature variances using :func:`showSignatureDistribution`. """ from matplotlib.pyplot import xlabel sig = calcSignatureCollectivity(mode_ensemble) indices = np.asarray(mode_ensemble.getIndices())[0] labels = ['mode %d'%(i+1) for i in indices][::-1] show = showSignatureDistribution(sig, label=labels, **kwargs) xlabel('Collectivity') return show
[docs]def showVarianceBar(mode_ensemble, highlights=None, **kwargs): """Show the distribution of variances (cumulative if multiple modes) using :func:`~numpy.histogram`. :arg mode_ensemble: an ensemble of modes whose variances are displayed :type mode_ensemble: :class:`.ModeEnsemble` :arg highlights: labels of conformations whose locations on the bar will be highlighted by arrows and texts :type highlights: list :arg fraction: whether the variances should be weighted or not. Default is **True** :type fraction: bool """ from matplotlib.pyplot import figure, gca, annotate, subplots_adjust from matplotlib.pyplot import fill_between, xlabel, yticks, xlim from matplotlib.figure import Figure from matplotlib.colors import Normalize, NoNorm from matplotlib import cm, colors fig = kwargs.pop('figure', None) fract = kwargs.pop('fraction', True) bins = kwargs.pop('bins', 50) cmap = kwargs.pop('cmap', 'Reds') if isinstance(fig, Figure): fig_num = fig.number elif fig is None or isinstance(fig, (int, str)): fig_num = fig else: raise TypeError('figure can be either an instance of matplotlib.figure.Figure ' 'or a figure number.') if SETTINGS['auto_show']: if fig_num is None: figure(figsize=(6, 2)) else: figure(fig_num) elif fig_num is not None: figure(fig_num) ax = gca() # adjust layouts box = ax.get_position() _, _, _, height = box.bounds ratio = 2.5 box.y1 = box.y0 + height/ratio #box.y0 += height/7. ax.set_position(box) #defarrow = {'width':1, 'headwidth':2, # 'facecolor':'black', # 'headlength': 4} defarrow = {'arrowstyle': '->'} arrowprops = kwargs.pop('arrowprops', defarrow) if fract: sig = calcSignatureFractVariance(mode_ensemble) else: sig = mode_ensemble.getVariances() variances = sig.getArray().sum(axis=1) hist, edges = np.histogram(variances, bins=bins) color_norm = colors.Normalize(vmin=hist.min(), vmax=hist.max()) scalar_map = cm.ScalarMappable(norm=color_norm, cmap=cmap) colors = scalar_map.to_rgba(hist) areas = [] for i in range(len(hist)): x = [edges[i], edges[i+1]] area = fill_between(x, [0, 0], [1, 1], color=colors[i]) areas.append(area) if not highlights: highlights = [] indices = []; labels = [] ens_labels = mode_ensemble.getLabels() for hl in highlights: if isinstance(hl, str): if not ens_labels: raise TypeError('highlights should be a list of integers because ' 'mode_ensemble has no label') index = ens_labels.index(hl) if isinstance(highlights, dict): label = highlights[hl] else: label = hl else: try: index = int(hl) except: raise TypeError('highlights should be a list of integers or strings') if isinstance(highlights, dict): label = highlights[hl] else: label = ens_labels[index] if ens_labels else str(index) indices.append(index) labels.append(label) annotations = [] for i, label in zip(indices, labels): x = variances[i] an = annotate(label, xy=(x, 1), xytext=(x, ratio), arrowprops=arrowprops) annotations.append(an) # for i in range(len(variances)): # x = variances[i] # plot([x, x], [0, 1], 'w') xlabel('Variances') yticks([]) xlim([variances.min(), variances.max()]) if SETTINGS['auto_show']: showFigure() return areas, annotations
[docs]def saveModeEnsemble(mode_ensemble, filename=None, atoms=False, **kwargs): """Save *mode_ensemble* as :file:`filename.modeens.npz`. If *filename* is **None**, title of the *mode_ensemble* will be used as the filename, after ``" "`` (white spaces) in the title are replaced with ``"_"`` (underscores). Upon successful completion of saving, filename is returned. This function makes use of :func:`~numpy.savez_compressed` function.""" if not isinstance(mode_ensemble, ModeEnsemble): raise TypeError('invalid type for mode_ensemble, {0}' .format(type(mode_ensemble))) if len(mode_ensemble) == 0: raise ValueError('mode_ensemble instance does not contain data') attr_list = ['_modesets', '_title', '_labels', '_weights', '_matched', '_reweighted'] attr_dict = {} if atoms: attr_list.append('_atoms') for attr in attr_list: value = getattr(mode_ensemble, attr) if value is not None: if attr == '_atoms': value = [value, None] if attr == '_modesets': value = list(value) value.append(None) attr_dict[attr] = value if filename is None: filename = mode_ensemble.getTitle().replace(' ', '_') suffix = '.modeens' if not filename.lower().endswith('.npz'): if not filename.lower().endswith(suffix): filename += suffix + '.npz' else: filename += '.npz' ostream = openFile(filename, 'wb', **kwargs) np.savez_compressed(ostream, **attr_dict) ostream.close() return filename
[docs]def loadModeEnsemble(filename, **kwargs): """Returns ModeEnsemble instance after loading it from file (*filename*). This function makes use of :func:`numpy.load` function. See also :func:`saveModeEnsemble`.""" if not 'encoding' in kwargs: kwargs['encoding'] = 'latin1' if not 'allow_pickle' in kwargs: kwargs['allow_pickle'] = True data = np.load(filename, **kwargs) weights = getValue(data, '_weights', None) labels = getValue(data, '_labels', None) matched = getValue(data, '_matched', False) title = getValue(data, '_title', None) modesets = getValue(data, '_modesets', []) atoms = getValue(data, '_atoms', [None])[0] reweighted = getValue(data, '_reweighted', False) if isinstance(title, np.ndarray): title = np.asarray(title, dtype=str) title = str(title) if isinstance(modesets, np.ndarray): modesets = modesets.tolist() while (None in modesets): modesets.remove(None) if labels is not None: char = labels.dtype.char if char in 'SU' and char != DTYPE: labels = labels.astype(str) labels = labels.tolist() if isinstance(matched, np.ndarray): matched = matched.tolist() if isinstance(reweighted, np.ndarray): reweighted = reweighted.tolist() modeens = ModeEnsemble(title=title) modeens._weights = weights modeens._labels = labels modeens._matched = matched modeens._reweighted = reweighted modeens._modesets = modesets if atoms is not None: if isinstance(atoms, AtomGroup): data = atoms._data else: data = atoms._ag._data for key in data: arr = data[key] char = arr.dtype.char if char in 'SU' and char != DTYPE: arr = arr.astype(str) data[key] = arr modeens._atoms = atoms return modeens
[docs]def saveSignature(signature, filename=None, **kwargs): """Save *signature* as :file:`filename.sdarray.npz`. If *filename* is **None**, title of the *signature* will be used as the filename, after ``" "`` (white spaces) in the title are replaced with ``"_"`` (underscores). Upon successful completion of saving, filename is returned. This function makes use of :func:`~numpy.savez_compressed` function.""" if not isinstance(signature, sdarray): raise TypeError('invalid type for signature, {0}' .format(type(signature))) attr_list = ['_title', '_labels', '_is3d', '_weights', '_oneset', '_array'] attr_dict = {} for attr in attr_list: if attr == '_array': value = np.asarray(signature) else: value = getattr(signature, attr) if value is not None: attr_dict[attr] = value if filename is None: filename = signature.getTitle().replace(' ', '_') suffix = '.sdarray' if not filename.lower().endswith('.npz'): if not filename.lower().endswith(suffix): filename += suffix + '.npz' else: filename += '.npz' ostream = openFile(filename, 'wb', **kwargs) np.savez_compressed(ostream, **attr_dict) ostream.close() return filename
[docs]def loadSignature(filename, **kwargs): """Returns :class:`sdarray` instance after loading it from file (*filename*). This function makes use of :func:`numpy.load` function. See also :func:`saveSignature`.""" if not 'encoding' in kwargs: kwargs['encoding'] = 'latin1' data = np.load(filename, **kwargs) weights = getValue(data, '_weights', None) labels = getValue(data, '_labels', None) title = getValue(data, '_title', None) is3d = getValue(data, '_is3d', False) oneset = getValue(data, '_oneset', False) array = getValue(data, '_array', None) if isinstance(title, np.ndarray): title = np.asarray(title, dtype=str) title = str(title) if isinstance(is3d, np.ndarray): is3d = bool(is3d) if isinstance(oneset, np.ndarray): oneset = bool(oneset) if labels is not None: labels = labels.tolist() signature = sdarray(array, weights=weights, labels=labels, title=title, is3d=is3d, oneset=oneset) return signature
[docs]def calcSubfamilySpectralOverlaps(mode_ens, subfamily_dict, **kwargs): """Calculate average spectral overlaps (or distances) within and between subfamilies in a mode ensemble defined using a dictionary where each key is an ensemble member and the associate value is a subfamily name. To use a range of modes, please index the mode ensemble e.g. mode_ens=mode_ensemble[:,3:20] to use modes 4 to 20 inclusive. Alternatively, there is the option to provide first and last keyword arguments, which would be used as the 3 and 20 above. :arg mode_ensemble: an ensemble of modes corresponding to a set of modes for each family member :type mode_ensemble: :class:`.ModeEnsemble` :arg subfamily_dict: a dictionary providing a subfamily label for each family member :type subfamily_dict: dict :keyword first: the first index for a range of modes :type first: int :keyword last: the last index for a range of modes :type last: int :keyword remove_small: whether to remove small subfamilies with fewer than 4 members. Default is True :type remove_small: bool :keyword return_reordered_subfamilies: whether to return the reordered subfamilies in addition to the matrix. Default is False type return_reordered_subfamilies: bool """ if not isinstance(mode_ens, ModeEnsemble): raise TypeError('mode_ens should be a mode ensemble') if not isinstance(subfamily_dict, dict): raise TypeError('subfamily_dict should be a dictionary') if any([label not in list(subfamily_dict.keys()) for label in mode_ens.getLabels()]): raise ValueError('The are member labels in mode_ens with no associated entry in subfamily_dict') first = kwargs.get('first', 0) if first is not None: if not isinstance(first,int): raise TypeError('first should be an integer') last = kwargs.get('last', -1) if last is not None: if not isinstance(last,int): raise TypeError('last should be an integer') try: mode_ens = mode_ens[:,first:last] except: try: mode_ens = mode_ens[:,first:] except: raise ValueError('first is not a valid index for indexing mode_ens') try: mode_ens = mode_ens[:,:last] except: raise ValueError('last is not a valid index for indexing mode_ens') first_mode_index = mode_ens.getIndices()[0,0] last_mode_index = mode_ens.getIndices()[0,-1] LOGGER.info('The mode range used for this analysis is {0} to {1}' .format(first_mode_index+1,last_mode_index+1)) tree_labels = mode_ens.getLabels() distance = kwargs.get('distance',True) distm = calcEnsembleSpectralOverlaps(mode_ens, distance=distance) subfamilies = np.unique(list(subfamily_dict.values())) reverse_dict = dict() for i in subfamilies: reverse_dict[i] = [] for i in range(len(tree_labels)): subfamily_i = subfamily_dict[tree_labels[i]] if subfamily_i in subfamilies: reverse_dict[subfamily_i].append(i) remove_small = kwargs.get('remove_small',True) if remove_small: temp_dict = dict() for key, value in reverse_dict.items(): if len(value) >= 4: temp_dict[key] = value reverse_dict = temp_dict N_group = len(reverse_dict) subfamily_overlap_matrix = [] for subfamily_i in subfamilies: index_i = reverse_dict[subfamily_i] for subfamily_j in subfamilies: index_j = reverse_dict[subfamily_j] temp_sub_matrix = distm[np.ix_(index_i, index_j)] if subfamily_i == subfamily_j: subfamily_overlap_matrix.append(temp_sub_matrix[np.triu_indices( np.shape(temp_sub_matrix)[0])].mean()) else: subfamily_overlap_matrix.append(temp_sub_matrix.mean()) subfamily_overlap_matrix = np.asarray(subfamily_overlap_matrix) subfamily_overlap_matrix = subfamily_overlap_matrix.reshape([N_group, N_group]) return_reordered_subfamilies = kwargs.get('return_reordered_subfamilies',False) if return_reordered_subfamilies: return subfamily_overlap_matrix, subfamilies return subfamily_overlap_matrix
[docs]def showSubfamilySpectralOverlaps(mode_ens, subfamily_dict, **kwargs): """Calculate and show the matrix of spectral overlaps or distances averaged over subfamilies. Inputs are the same as calcSubfamilySpectralOverlaps plus the following and those of showDomainBar if you wish. :keyword show_subfamily_bar: whether to show the subfamilies as colored bars using showDomainBar. Default is False :type show_subfamily_bar: bool """ kwargs['return_reordered_subfamilies'] = True subfamily_overlap_matrix, subfamilies = calcSubfamilySpectralOverlaps(mode_ens, subfamily_dict, **kwargs) show = showMatrix(subfamily_overlap_matrix, origin='lower', xticklabels=subfamilies, yticklabels=subfamilies, allticks=True, vmin=0., vmax=1.6) show_subfamily_bar = kwargs.get('show_subfamily_bar',False) text = kwargs.pop('text',False) if show_subfamily_bar: showDomainBar(subfamilies, axis='x', text=text, **kwargs) showDomainBar(subfamilies, axis='y', text=text, **kwargs) if SETTINGS['auto_show']: showFigure() return show