Source code for prody.chromatin.functions

import numpy as np

from prody.dynamics import NMA, MaskedGNM
from prody.dynamics.mode import Mode
from prody.dynamics.modeset import ModeSet

from prody.utilities import importLA, copy, showFigure, div0
from prody import LOGGER, SETTINGS

__all__ = ['showDomains', 'showEmbedding', 'getDomainList']

## normalization methods ##

[docs]def showDomains(domains, linespec='-', **kwargs): """A convenient function that can be used to visualize Hi-C structural domains. *kwargs* will be passed to :func:`matplotlib.pyplot.plot`. :arg domains: a 2D array of Hi-C domains, such as [[start1, end1], [start2, end2], ...]. :type domains: :class:`numpy.ndarray` """ fill_ends = kwargs.pop('fill_ends', 'close') domains = np.array(domains) shape = domains.shape if len(shape) == 1: # convert to domain list if labels are provided indicators = np.diff(domains) length = len(domains) if fill_ends in ['open', 'close']: indicators = np.append(1., indicators) indicators[-1] = 1 elif fill_ends == 'skip': indicators = np.append(0., indicators) else: raise ValueError('invalid fill_ends mode: %s'%str(fill_ends)) sites = np.where(indicators != 0)[0] starts = sites[:-1] ends = sites[1:] domains = np.array([starts, ends]).T consecutive = True elif len(shape) == 2: if domains.dtype == bool: length = domains.shape[1] domains_ = [] for h in domains: start = None for i, b in enumerate(h): if b: if start is None: # start start = i else: if start is not None: # end domains_.append([start, i-1]) start = None if start is not None: domains_.append([start, i]) domains = np.array(domains_) else: length = domains.max() consecutive = False else: raise ValueError('domains must be either one or two dimensions') from matplotlib.pyplot import figure, plot a = []; b = [] lwd = kwargs.pop('linewidth', 1) lwd = kwargs.pop('lw', lwd) linewidth = np.abs(lwd) if fill_ends == 'open' and len(domains) == 1: domains = [] for i in range(len(domains)): domain = domains[i] start = domain[0]; end = domain[1] if fill_ends == 'open' and start == 0: a.extend([end, end]) b.extend([start, end]) elif fill_ends == 'open' and end == length-1: a.extend([start, end]) b.extend([start, start]) else: a.extend([start, end, end]) b.extend([start, start, end]) if not consecutive: a.append(np.nan) b.append(np.nan) if lwd > 0: x = a; y = b else: x = b; y = a plt = plot(x, y, linespec, linewidth=linewidth, **kwargs) if SETTINGS['auto_show']: showFigure() return plt
def _getEigvecs(modes, row_norm=False, dummy_mode=False): la = importLA() if isinstance(modes, (Mode, ModeSet, NMA)): if hasattr(modes, '_model'): model = modes._model else: model = modes if isinstance(model, MaskedGNM): masked = model.masked model.masked = True V = modes.getArray() model.masked = masked else: V = modes.getArray() elif isinstance(modes, np.ndarray): V = modes else: try: mode0 = modes[0] if isinstance(mode0, Mode): V = np.empty((len(mode0),0)) for mode in modes: assert isinstance(mode, Mode), 'Modes should be a list of modes.' v = mode.getEigvec() v = np.expand_dims(v, axis=1) V = np.hstack((V, v)) else: V = np.array(modes) except TypeError: raise TypeError('Modes should be a list of modes.') if V.ndim == 1: V = np.expand_dims(V, axis=1) # add a dummy zero mode to the modeset if dummy_mode: v0 = V[:, 0] if np.allclose(v0, np.mean(v0)): dummy_mode = False LOGGER.warn('at least one zero mode is detected therefore dummy mode will NOT be added') if dummy_mode: n, _ = V.shape v0 = np.ones((n, 1), dtype=V.dtype) v0 /= la.norm(v0) V = np.hstack((v0, V)) LOGGER.debug('a dummy zero mode is added') # normalize the rows so that feature vectors are unit vectors if row_norm: norms = la.norm(V, axis=1) N = np.diag(div0(1., norms)) V = np.dot(N, V) return V
[docs]def showEmbedding(modes, labels=None, trace=True, headtail=True, cmap='prism'): """Visualizes Laplacian embedding of Hi-C data. :arg modes: modes in which loci are embedded. It can only have 2 or 3 modes for the purpose of visualization. :type modes: :class:`ModeSet` :arg labels: a list of integers indicating the segmentation of the sequence. :type labels: list :arg trace: if **True** then the trace of the sequence will be indicated by a grey dashed line. :type trace: bool :arg headtail: if **True** then a star and a closed circle will indicate the head and the tail of the sequence respectively. :type headtail: bool :arg cmap: the color map used to render the *labels*. :type cmap: str """ V, _ = _getEigvecs(modes, True) m, n = V.shape if labels is not None: if len(labels) != m: raise ValueError('Modes (%d) and the Hi-C map (%d) should have the same number' ' of atoms. Turn off "masked" if you intended to apply the' ' modes to the full map.' %(m, len(labels))) if n > 3: raise ValueError('This function can only visualize the embedding of 2 or 3 modes.') from matplotlib.pyplot import figure, plot, scatter from mpl_toolkits.mplot3d import Axes3D if n == 2: la = importLA() X, Y = V[:,:2].T R = np.array(range(len(X))) R = R / la.norm(R) X *= R; Y *= R f = figure() if trace: plot(X, Y, ':', color=[0.3, 0.3, 0.3]) if labels is None: C = 'b' else: C = labels scatter(X, Y, s=30, c=C, cmap=cmap) if headtail: plot(X[:1], Y[:1], 'k*', markersize=12) plot(X[-1:], Y[-1:], 'ko', markersize=12) elif n == 3: X, Y, Z = V[:,:3].T f = figure() ax = Axes3D(f) if trace: ax.plot(X, Y, Z, ':', color=[0.3, 0.3, 0.3]) if labels is None: C = 'b' else: C = labels ax.scatter(X, Y, Z, s=30, c=C, depthshade=True, cmap=cmap) if headtail: ax.plot(X[:1], Y[:1], Z[:1], 'k*', markersize=12) ax.plot(X[-1:], Y[-1:], Z[-1:], 'ko', markersize=12) if SETTINGS['auto_show']: showFigure() return f
[docs]def getDomainList(labels): """Returns a list of domain separations. The list has two columns: the first is for the domain starts and the second is for the domain ends.""" indicators = np.diff(labels) indicators = np.append(1., indicators) indicators[-1] = 1 sites = np.where(indicators != 0)[0] starts = sites[:-1] ends = sites[1:] domains = np.array([starts, ends]).T return domains