Source code for prody.apps.evol_apps.evol_refine

"""Refine MSA application."""

from ..apptools import DevelApp

__all__ = ['evol_refine']

APP = DevelApp('refine', 'refine an MSA by removing gapped rows/colums')

APP.setExample(
"""Refines MSA by removing gapped columns (residue positions)
and rows (sequences).

In the following, columns that are gaps in sequence with label
GTHB2_ONCKE will be removed from MSA:

  $ evol refine piwi.slx -l GTHB2_ONCKE""", [])


APP.addArgument('msa',
    help='MSA filename to be refined')

APP.addGroup('refine', 'refinement options')
APP.addArgument('-l', '--label',
    dest='label',
    help='sequence label, UniProt ID code, or PDB and chain identifier',
    type=str,
    metavar='STR',
    group='refine')

APP.addArgument('-s', '--seqid',
    dest='seqid',
    help='identity threshold for selecting unique sequences',
    default=None,
    type=float,
    metavar='FLOAT',
    group='refine')

APP.addArgument('-c', '--colocc',
    dest='colocc',
    help='column (residue position) occupancy',
    default=None,
    type=float,
    metavar='FLOAT',
    group='refine')

APP.addArgument('-r', '--rowocc',
    dest='rowocc',
    help='row (sequence) occupancy',
    default=None,
    type=float,
    metavar='FLOAT',
    group='refine')

APP.addArgument('-k', '--keep',
    dest='pdbres',
    help='keep columns corresponding to residues not resolved in PDB '
         'structure, applies label argument is a PDB identifier',
    default=False,
    action='store_true',
    group='refine')

APP.addGroup('output', 'output options')
APP.addArgument('-o', '--outname',
    dest='outname',
    help='output filename, default is msa filename with _refined suffix',
    type=str,
    metavar='STR',
    group='output')

APP.addArgument('-f', '--format',
    dest='format',
    type=str,
    metavar='STR',
    help='output MSA file format, default is same as input',
    group='output')

APP.addArgument('-z', '--compressed',
    dest='compressed',
    action='store_true',
    help='gzip refined MSA output',
    group='output')

[docs]def evol_refine(msa, **kwargs): from prody import parseMSA, refineMSA, writeMSA, LOGGER from os.path import splitext outname = kwargs.get('outname') if outname is None: outname, ext = splitext(msa) if ext.lower() == '.gz': outname, _ = splitext(msa) outname += '_refined' + ext writeMSA(outname, refineMSA(parseMSA(msa), **kwargs), **kwargs) LOGGER.info('Refined MSA is written in file: ' + outname)
APP.setFunction(evol_refine)