Source code for prody.sequence.sequence

# -*- coding: utf-8 -*-
"""This module handles individual sequences."""

from numpy import char, fromstring

from prody import LOGGER, PY3K
from prody.atomic import Atomic

from prody.utilities import splitSeqLabel

__all__ = ['Sequence']

[docs]class Sequence(object): """Handle individual sequences of an :class:`.MSA` object""" __slots__ = ['_msa', '_seq', '_index', '_label'] def __init__(self, *args): """Depending on input arguments, instances may point to an :class:`.MSA` object or store its own data.""" if len(args) == 2: one, two = args try: one.lower, two.lower except AttributeError: self._msa = one self._index = two self._seq = self._label = None else: self._seq = fromstring(one, '|S1') self._label = two self._msa = self._index = None elif len(args) == 1: self._seq = fromstring(args[0], '|S1') self._msa = self._index = None self._label = '' else: raise ValueError('msa and index, or seq [and label] must be' 'specified') @property def _array(self): """Sequence data array.""" return self._seq if self._msa is None else self._msa._msa[self._index] def __str__(self): if PY3K: return self._array.tostring().decode() else: return self._array.tostring() def __len__(self): return len(self._array) def __repr__(self): msa = '' if self._msa is not None: msa = '{0}[{1}]; '.format(self._msa.getTitle(), self._index) return ('<Sequence: {0} ({1}length {2}; {3} residues and ' '{4} gaps)>').format(self.getLabel(), msa, len(self), self.numResidues(), self.numGaps()) def __eq__(self, other): try: this = self._array that = other._array return this.shape == that.shape and (this == that).all() except AttributeError: return False
[docs] def getMSA(self): """Returns :class:`.MSA` instance or **None**.""" return self._msa
[docs] def getArray(self): return self._array
[docs] def getIndex(self): """Returns sequence index or **None**.""" return self._index
# This function should be able to update MSA._mapping and MSA._labels #def setLabel(self, label): # """Set the label to be associated with object""" # # self._label = str(label)
[docs] def getLabel(self, full=False): """Returns label of the sequence.""" label = self._label if label is None: label = self._msa._labels[self._index] return (label if full else splitSeqLabel(label)[0]).strip()
[docs] def numGaps(self): """Returns number of gap characters.""" array = self._array return len(array) - sum(char.isalpha(array))
[docs] def numResidues(self): """Returns the number of alphabet characters.""" return sum(char.isalpha(self._array))
[docs] def getResnums(self, gaps=False, report_match=False): """Returns list of residue numbers associated with non-gapped *seq*. When *gaps* is **True**, return a list containing the residue numbers with gaps appearing as **None**. Residue numbers are inferred from the full label if possible. When the label does not contain residue number information, a range of numbers starting from 1 is returned.""" title, start, end = splitSeqLabel(self.getLabel(True)) match = False try: start, end = int(start), int(end) except: LOGGER.info('Cannot parse start and end values from sequence label {0}. Setting ' 'resnums 1 to {1:d}'.format(title, self.numResidues())) start, end = 1, self.numResidues() else: if (end - start + 1) != self.numResidues(): LOGGER.info('Label {0} start-end entry does not match ' 'length of ungapped sequence. Setting ' 'resnums 1 to {1:d}'.format(title, self.numResidues())) start, end = 1, self.numResidues() else: LOGGER.info('Label {0} start-end entry matches ' 'length of ungapped sequence. Setting ' 'resnums {1:d} to {2:d}'.format(title, start, end)) match = True resnums = iter(range(start, end + 1)) if gaps: result = [next(resnums) if torf else None for torf in char.isalpha(self._array)] else: result = list(resnums) if report_match: return match, result return result
[docs] def copy(self): """Returns a copy of the instance that owns its sequence data.""" return Sequence(str(self), self.getLabel())