Source code for prody.apps.evol_apps.evol_filter

"""Refine MSA application."""

from ..apptools import DevelApp

__all__ = ['evol_filter']

APP = DevelApp('filter', 'filter an MSA using sequence labels')

APP.setExample(
"""Filter sequences in an MSA based on label data.

Following example will filter human sequences:

  $ evol filter piwi_seed.slx HUMAN -e""", [])


APP.addArgument('msa',
    help='MSA filename to be filtered')

APP.addArgument('word',
    help='word to be compared to sequence label', nargs='+')

APP.addGroup('filter', 'filtering method (required)', True, True)
APP.addArgument('-s', '--startswith',
    dest='startswith',
    help='sequence label starts with given words',
    action='store_true',
    group='filter')

APP.addArgument('-e', '--endswith',
    dest='endswith',
    help='sequence label ends with given words',
    action='store_true',
    group='filter')

APP.addArgument('-c', '--contains',
    dest='contains',
    help='sequence label contains with given words',
    action='store_true',
    group='filter')

APP.addGroup('filter2', 'filter option')
APP.addArgument('-F', '--full-label',
    dest='filter_full',
    help='compare full label with word(s)',
    action='store_true',
    group='filter2')


APP.addGroup('output', 'output options')
APP.addArgument('-o', '--outname',
    dest='outname',
    help='output filename, default is msa filename with _refined suffix',
    type=str,
    metavar='STR',
    group='output')

APP.addArgument('-f', '--format',
    dest='format',
    type=str,
    metavar='STR',
    help='output MSA file format, default is same as input',
    group='output')

APP.addArgument('-z', '--compressed',
    dest='compressed',
    action='store_true',
    help='gzip refined MSA output',
    group='output')


[docs]def evol_filter(msa, *word, **kwargs): import prody from prody import MSAFile, writeMSA, LOGGER from os.path import splitext outname = kwargs.get('outname') if outname is None: outname, ext = splitext(msa) if ext.lower() == '.gz': outname, _ = splitext(msa) outname += '_filtered' + ext single = len(word) == 1 if single: word = word[0] if kwargs.get('startswith', False): if single: filter = lambda label, seq, word=word: label.startswith(word) elif kwargs.get('endswith', False): if single: filter = lambda label, seq, word=word: label.endswith(word) elif kwargs.get('contains', False): if single: filter = lambda label, seq, word=word: word in label elif kwargs.get('equals', False): if single: filter = lambda label, seq, word=word: word == label else: filter = lambda label, seq, word=set(word): label in word else: raise TypeError('one of startswith, endswith, contains, or equals ' 'must be specified') msa = MSAFile(msa, filter=filter, filter_full=kwargs.get('filter_full', False)) LOGGER.info('Filtered MSA is written in file: ' + writeMSA(outname, msa, **kwargs))
APP.setFunction(evol_filter)