Release Notes
v2.0 series come with new and improved sequence, structure, and dynamics analysis features. See release notes for details.
How to Cite
Bakan A, Meireles LM, Bahar I ProDy: Protein Dynamics Inferred from Theory and Experiments
Bioinformatics 2011 27(11):1575-1577.
Bakan A, Dutta A, Mao W, Liu Y, Chennubhotla C, Lezon TR, Bahar I Evol and ProDy for Bridging Protein Sequence Evolution and Structural Dynamics
Bioinformatics 2014 30(18):2681-2683.
Source code for prody.apps.evol_apps.evol_coevol

"""MSA residue coevolution calculation application."""

__author__ = 'Anindita Dutta, Ahmet Bakan'

from ..apptools import DevelApp

__all__ = ['evol_coevol']

APP = DevelApp('coevol',
               help='analyze co-evolution using mutual information')

APP.setExample(
"""Analyze coevolution by performing mutual information calculation between
MSA positions.  A refined MSA without gaps should be used.

Following example will save coevolution data and plot using default options:

  $ evol coevol piwi_refined.slx -S

Following example will save coevolution data and plot for all correction and \
normalizations:

  $ evol coevol piwi_refined.slx -S -c apc -c asc -m sument -m minent \
-m maxent -m mincon -m maxcon -m joint""", [])


APP.addArgument('msa',
    help='refined MSA file')

APP.addGroup('calc', 'calculation options')
APP.addArgument('-n', '--no-ambiguity',
    dest='ambiguity',
    help='treat amino acids characters B, Z, J, and X as non-ambiguous',
    default=True,
    action='store_false',
    group='calc')

APP.addArgument('-c', '--correction',
    dest='correction',
    help='also save corrected mutual information matrix data and plot',
    choices=['apc', 'asc'],
    metavar='STR',
    type=str,
    action='append',
    group='calc')

APP.addArgument('-m', '--normalization',
    dest='normalization',
    help='also save normalized mutual information matrix data and plot',
    choices='sument minent maxent mincon maxcon joint'.split(),
    metavar='STR',
    type=str,
    action='append',
    group='calc')

APP.addGroup('output', 'output options')

APP.addArgument('-t', '--heatmap',
    dest='heatmap',
    help='save heatmap files for all mutual information matrices',
    default=False,
    action='store_true',
    group='output')

APP.addArgument('-p', '--prefix',
    dest='prefix',
    help='output filename prefix, default is '
         'msa filename with _coevol suffix',
    type=str,
    metavar='STR',
    group='output')

APP.addArgument('-f', '--number-format',
    dest='numformat', type=str, default='%12g',
    metavar='STR', help='number output format', group='output')


APP.addFigarg('-L', '--cmin',
    dest='cmin',
    help='apply lower limits for figure plot',
    type=float,
    metavar='FLOAT')

APP.addFigarg('-U', '--cmax',
    dest='cmax',
    help='apply upper limits for figure plot',
    type=float,
    metavar='FLOAT')
APP.addFigarg('-X', '--xlabel',
    dest='xlabel',
    help='specify xlabel, by default will be applied on ylabel',
    type=str,
    metavar='STR',
    default=None)
APP.addFigarg('-T', '--title',
    dest='title',
    help='figure title',
    type=str,
    metavar='STR',
    default=None)
APP.addFigure('-S', '--save-plot',
    dest='figcoevol',
    action='store_true',
    help='save coevolution plot')


[docs]def evol_coevol(msa, **kwargs):

    from numpy import arange

    import prody
    from prody import parseMSA, buildMutinfoMatrix, showMutinfoMatrix
    from prody import applyMutinfoCorr, calcShannonEntropy
    from prody import writeArray, LOGGER, applyMutinfoNorm, writeHeatmap
    from os.path import splitext

    prefix = kwargs.get('prefix')
    if prefix is None:
        prefix, _ = splitext(msa)
        if _.lower() == '.gz':
            prefix, _ = splitext(prefix)
        prefix += '_mutinfo'

    msa = parseMSA(msa)
    mutinfo = buildMutinfoMatrix(msa, **kwargs)
    numformat = kwargs.get('numformat', '%12g')
    heatmap = kwargs.get('heatmap', False)
    #writeArray(prefix + '.txt', mutinfo, format=numformat)
    if heatmap:
        hmargs = {
                  'xlabel': 'Residue', 'ylabel': 'Residue',
                  'xorigin': 1, 'xstep': 1,
                  'residue': arange(msa.numResidues())}

    todo = [(None, None)]
    norm = kwargs.get('normalization', [])
    corr = kwargs.get('correction', [])
    if norm is not None:
        if 'joint' in norm:
            todo.append(('norm', 'joint'))
        for which in norm:
            if which == 'join': continue
            todo.append(('norm', which))
    if corr is not None:
        for which in corr:
            todo.append(('corr', which))
    entropy = None

    for what, which in todo:
        if what is None:
            matrix = mutinfo
            suffix = ''
            tuffix = ' Mutual Information'
        elif which == 'joint':
            LOGGER.info('Applying {0} normalization.'.format(repr(which)))
            matrix = buildMutinfoMatrix(msa, norm=True, **kwargs)
            suffix = '_norm_joint'
            tuffix = ' MI - Normalization: ' + which
        elif what == 'norm':
            LOGGER.info('Applying {0} normalization.'.format(repr(which)))
            if entropy is None:
                entropy = calcShannonEntropy(msa, **kwargs)
            matrix = applyMutinfoNorm(mutinfo, entropy, norm=which)
            suffix = '_norm_' + which
            tuffix = ' MI - Normalization: ' + which
        else:
            LOGGER.info('Applying {0} correction.'.format(repr(which)))
            matrix = applyMutinfoCorr(mutinfo, which)
            suffix = '_corr_' + which
            tuffix = ' MI - Correction: ' + which

        writeArray(prefix + suffix + '.txt',
                   matrix, format=kwargs.get('numformat', '%12g'))

        if heatmap:
            writeHeatmap(prefix + suffix + '.hm', matrix,
                         title = msa.getTitle() + tuffix, **hmargs)

        if kwargs.get('figcoevol'):
            try:
                import matplotlib.pyplot as plt
            except ImportError:
                LOGGER.warn('Matplotlib could not be imported, '
                            'figures are not saved.')
            else:
                cmin = kwargs.get('cmin', matrix.min())
                cmax = kwargs.get('cmax', matrix.max())
                prody.SETTINGS['auto_show'] = False
                width = kwargs.get('figwidth', 8)
                height = kwargs.get('figheight', 6)
                xlabel = kwargs.get('xlabel')
                title = kwargs.get('title')
                figure = plt.figure(figsize=(width, height))
                show = showMutinfoMatrix(matrix, msa=msa, clim=(cmin, cmax),
                                         xlabel=xlabel, title=title)

                format = kwargs.get('figformat', 'pdf')
                figure.savefig(prefix + suffix + '.' + format, format=format,
                            dpi=kwargs.get('figdpi', 300))


APP.setFunction(evol_coevol)