Source code for prody.proteins.compare

# -*- coding: utf-8 -*-
"""This module defines functions for comparing and mapping polypeptide chains.
"""

from numbers import Integral

import numpy as np
from numpy import arange

from prody.atomic import AtomMap as AM
from prody.atomic import AtomGroup, Chain, AtomSubset, Selection
from prody.atomic import AAMAP
from prody.atomic import flags
from prody.measure import calcTransformation, printRMSD, calcDistance, calcRMSD, superpose
from prody import LOGGER, SELECT, PY2K, PY3K
from prody.sequence import MSA
from prody.utilities import cmp, pystr, isListLike, multilap, SolutionDepletionException, index
from prody.utilities import MATCH_SCORE, MISMATCH_SCORE, GAP_PENALTY, GAP_EXT_PENALTY, ALIGNMENT_METHOD

from Bio import pairwise2

if PY2K:
    range = xrange

if PY3K:
    basestring = str

__all__ = ['matchChains', 'matchAlign', 'mapChainOntoChain', 'mapOntoChain', 'alignChains',
           'mapOntoChains', 'bestMatch', 'sameChid', 'userDefined', 
           'mapOntoChainByAlignment', 'getMatchScore', 'setMatchScore',
           'getMismatchScore', 'setMismatchScore', 'getGapPenalty', 
           'setGapPenalty', 'getGapExtPenalty', 'setGapExtPenalty',
           'getGoodSeqId', 'setGoodSeqId', 'getGoodCoverage', 'combineAtomMaps',
           'setGoodCoverage', 'getAlignmentMethod', 'setAlignmentMethod']

GOOD_SEQID = 90.
GOOD_COVERAGE = 90.

GAPCHARS = ['-', '.']
NONE_A = '_'

_a2aaa = {
    'A': 'ALA', 'R': 'ARG', 'N': 'ASN', 'D': 'ASP', 'C': 'CYS', 'Q': 'GLN',
    'E': 'GLU', 'G': 'GLY', 'H': 'HIS', 'I': 'ILE', 'L': 'LEU', 'K': 'LYS',
    'M': 'MET', 'F': 'PHE', 'P': 'PRO', 'S': 'SER', 'T': 'THR', 'W': 'TRP',
    'Y': 'TYR', 'V': 'VAL'
}

def calcScores(n_match, n_mapped, n_total):
    if n_mapped > 0:
        seqid = n_match * 100 / n_mapped
        cover = n_mapped * 100 / n_total
    else:
        seqid = cover = 0
    return seqid, cover


[docs]def getGoodSeqId(): """Returns good sequence identity.""" return GOOD_SEQID
[docs]def setGoodSeqId(seqid): """Set good sequence identity.""" if isinstance(seqid, (float, int)) and seqid >= 0: global GOOD_SEQID GOOD_SEQID = seqid else: raise TypeError('seqid must be a positive number or zero')
[docs]def getGoodCoverage(): """Returns good sequence coverage.""" return GOOD_COVERAGE
[docs]def setGoodCoverage(coverage): """Set good sequence coverage.""" if isinstance(coverage, (float, int)) and coverage >= 0: global GOOD_COVERAGE GOOD_COVERAGE = coverage else: raise TypeError('coverage must be a positive number or zero')
[docs]def getMatchScore(): """Returns match score used to align sequences.""" return MATCH_SCORE
[docs]def setMatchScore(match_score): """Set match score used to align sequences.""" if isinstance(match_score, (float, int)) and match_score >= 0: global MATCH_SCORE MATCH_SCORE = match_score else: raise TypeError('match_score must be a positive number or zero')
[docs]def getMismatchScore(): """Returns mismatch score used to align sequences.""" return MISMATCH_SCORE
[docs]def setMismatchScore(mismatch_score): """Set mismatch score used to align sequences.""" if isinstance(mismatch_score, (float, int)) and mismatch_score >= 0: global MISMATCH_SCORE MISMATCH_SCORE = mismatch_score else: raise TypeError('mismatch_score must be a positive number or zero')
[docs]def getGapPenalty(): """Returns gap opening penalty used for pairwise alignment.""" return GAP_PENALTY
[docs]def setGapPenalty(gap_penalty): """Set gap opening penalty used for pairwise alignment.""" if isinstance(gap_penalty, (float, int)) and gap_penalty <= 0: global GAP_PENALTY GAP_PENALTY = gap_penalty else: raise TypeError('gap_penalty must be a negative number')
[docs]def getGapExtPenalty(): """Returns gap extension penalty used for pairwise alignment.""" return GAP_EXT_PENALTY
[docs]def setGapExtPenalty(gap_ext_penalty): """Set gap extension penalty used for pairwise alignment.""" if isinstance(gap_ext_penalty, (float, int)) and gap_ext_penalty <= 0: global GAP_EXT_PENALTY GAP_EXT_PENALTY = gap_ext_penalty else: raise TypeError('gap_ext_penalty must be a negative number or zero')
[docs]def getAlignmentMethod(): """Returns pairwise alignment method.""" return ALIGNMENT_METHOD
[docs]def setAlignmentMethod(method): """Set pairwise alignment method (global or local).""" if method in ('local', 'global'): global ALIGNMENT_METHOD ALIGNMENT_METHOD = method else: raise ValueError('method must be "local" or "global"')
class SimpleResidue(object): __slots__ = ['_chain', '_index', '_res', '_name', '_num', '_inc'] def __init__(self, chain, index, number, name, insertioncode='', residue=None): self._num = number self._name = name self._inc = insertioncode self._res = residue self._chain = chain self._index = index def __repr__(self): return '<SimpleResidue: {0}{1}>'.format(self._name, self._num) def __iter__(self): res = self.getResidue() if res is self: # iterate the residue itself as atoms in it, assuming the # residue contains a single alpha carbon for atom in [self]: yield atom else: for atom in res: yield atom def getResidue(self): if self._res is not None: return self._res else: return self def getChain(self): return self._chain def getResnum(self): return self._num def getIndex(self): return self._index def getIcode(self): return self._inc def getResname(self): return self._name def getCoords(self): if self._res: return self._res._getCoords() return None def getName(self): "A dummy method that returns the atom name of alpha carbon of this residue." return 'CA' class SimpleChain(object): """An internal class used to compare two polypeptide chains. SimpleChain instances can be indexed using residue numbers. If a residue with given number is not found in the chain, **None** is returned.""" __slots__ = ['_list', '_seq', '_title', '_dict', '_gaps', '_chain'] def __init__(self, chain=None, allow_gaps=False): """Initialize SimpleChain with a chain id and a sequence (available). :arg chain: chain instance or single-letter amino acid sequence :type chain: str, :class:`.Chain` :arg allow_gaps: allow gaps in the sequence of simple chain instance, default is False :type allow_gaps: bool""" self._dict = dict() self._list = list() self._seq = '' self._title = '' self._gaps = allow_gaps self._chain = None if isinstance(chain, Chain): self.buildFromChain(chain) elif isinstance(chain, str): self.buildFromSequence(chain) def __len__(self): return len(self._list) def __iter__(self): return self._list.__iter__() def __repr__(self): return '<SimpleChain: {0} with {1} residues>'.format( self._title, len(self._list)) def __str__(self): return '{0} with {1} residues'.format(self._title, len(self._list)) def __getitem__(self, index): if isinstance(index, Integral): return self._dict.get((index, '')) return self._dict.get(index) def getSequence(self): return self._seq def getTitle(self): return self._title def getCoords(self, calpha=False): if self._chain is None: return None if calpha: return self._chain.ca._getCoords() else: return self._chain._getCoords() def buildFromSequence(self, sequence, resnums=None): """Build from amino acid sequence. "-" or "." are acceptable amino acid types and are treated as gaps. :arg sequence: sequence of single letter amino acid codes :type sequence: str :arg resnums: residue numbers corresponding the sequence :type resnums: a list of numbers, or a string representation of numbers Examples of *resnums* are: * 1:200 250:300""" assert isinstance(sequence, str), 'sequence must be string' assert sequence.isalpha(), 'sequence must be all alpha' if resnums is None: resnums = arange(1, len(sequence)+1) resid = 0 gaps = self._gaps for i, aa in enumerate(sequence): resid = resnums[i] if gaps and aa in GAPCHARS: aa = NONE_A simpres = None else: simpres = SimpleResidue(self, i, resid, aa) self._list.append(simpres) self._dict[resid] = simpres self._seq += aa self._title = 'built from sequence %s...'%sequence[:5] def buildFromChain(self, chain): """Build from a :class:`.Chain`.""" assert isinstance(chain, Chain), 'chain must be a Chain instance' self._chain = chain gaps = self._gaps residues = list(chain.iterResidues()) temp = residues[0].getResnum()-1 protein_resnames = flags.AMINOACIDS for i, res in enumerate(chain): if not res.getResname() in protein_resnames: continue resid = res.getResnum() incod = res.getIcode() aa = AAMAP.get(res.getResname(), 'X') simpres = SimpleResidue(self, i, resid, aa, incod, res) if gaps: diff = resid - temp - 1 if diff > 0: self._seq += NONE_A * diff temp = resid self._seq += aa self._list.append(simpres) self._dict[(resid, incod)] = simpres self._title = 'Chain {0} from {1}'.format(chain.getChid(), chain.getAtomGroup() .getTitle()) def countUnpairedBreaks(chone, chtwo, resnum=True): """This function is under development. Return number of unpaired breaks in aligned chains *chone* and *chtwo*, which are expected to be :class:`.AtomMap` instances obtained from one of :func:`.matchChains` or :func:`.mapOntoChain` functions. Pairwise global or local alignment of chains with missing residues may be problematic, as in the following illustrations for example. This function helps identifying some of these problems. Breaks in a chain are determined using Cα-Cα distances between consecutive residues, i.e. Cα to Cα distance larger than 4 Å corresponds to a break or gap of 1 or more residues. This function counts such breaks in *chone* or *chtwo* that is not paired with a break in the other. The following example illustrates a problem that may occur when aligning two structures of the same protein chain where one of the chains have a few missing residues:: Correct alignment: A.L.R.S - - V.W.Y.K.L -> no unpaired breaks Target chain : A.L.R.S.V.T.V.W.Y.K.L Wrong alignment : A.L.R.S_V - - W.Y.K.L | --> 1 unpaired break, counted Key: - (dash) is a gap in the alignment . (dot) is a peptide bond _ (underscore) is a break In this case, one unpaired break is an indicator of the problem in the alignment. The following example illustrates a case where an unpaired break is due to an insertion in the homologous chain:: Target chain : 1A.2L.3R.4S.5V.6T.7V Homologous chain : 1A.2L.3K.4S.6V_9S.10L | --> 1 unpaired break, not counted In this case, residue numbers are used to determine whether the unpaired break is due to an insertion/deletion in the chain or misalignment.""" try: if chone.numAtoms() != chtwo.numAtoms(): raise ValueError('number of atoms do not match') except AttributeError: raise TypeError('one and two must be Atomic instances') mapped = chone.getFlags('mapped') * chtwo.getFlags('mapped') if mapped.sum() == 0: raise ValueError('chains do not have common mapped atoms') chone = chone[mapped] chtwo = chtwo[mapped] rnone = chone.getResnums() rntwo = chtwo.getResnums() brone = calcDistance(chone[1:], chone[:-1]) > 4. brtwo = calcDistance(chtwo[1:], chtwo[:-1]) > 4. brone[(rnone[1:] - rnone[:-1]) > 1] = False brtwo[(rntwo[1:] - rntwo[:-1]) > 1] = False brone = set(brone.nonzero()[0]) brtwo = set(brtwo.nonzero()[0]) return len(brone.union(brtwo)) - len(brone.intersection(brtwo)) _SUBSETS = { 'ca': 'ca', 'calpha': 'ca', 'bb': 'bb', 'backbone': 'bb', 'heavy': 'noh', 'noh': 'noh', 'all': 'all' }
[docs]def matchAlign(mobile, target, **kwargs): """Superpose *mobile* onto *target* based on best matching pair of chains. This function uses :func:`matchChains` for matching chains and returns a tuple that contains the following items: * *mobile* after it is superposed, * matching chain from *mobile* as a :class:`.AtomMap` instance, * matching chain from *target* as a :class:`.AtomMap` instance, * percent sequence identity of the match, * percent sequence overlap of the match. :arg mobile: atoms that contain a protein chain :type mobile: :class:`.Chain`, :class:`.AtomGroup`, :class:`.Selection` :arg target: atoms that contain a protein chain :type target: :class:`.Chain`, :class:`.AtomGroup`, :class:`.Selection` :arg tarsel: *target* atoms that will be used for alignment, default is ``'calpha'`` :type tarsel: str :arg allcsets: align all coordinate sets of *mobile*, default is **True** :type allcsets: bool :keyword seqid: percent sequence identity, default is 90 :type seqid: float :keyword overlap: percent overlap, default is 90 :type overlap: float :keyword pwalign: perform pairwise sequence alignment :type pwalign: bool""" selstr = kwargs.pop('tarsel', 'calpha') if selstr == 'calpha': selstr = None subset = 'calpha' if selstr: if selstr in _SUBSETS: subset = selstr else: subset = 'all' sel = target.select(selstr) if sel is None: raise ValueError('selection {0} did not match any atoms' .format(repr(selstr))) chid = set(sel.getChids()) if len(chid) == 1: chid = chid.pop() target = target.select('chain ' + chid) match = matchChains(mobile, target, subset=subset, **kwargs) if not match: return match = match[0] mob = match[0] tar = match[1] if selstr: which = SELECT.getIndices(tar, selstr) n_atoms = len(which) else: which = slice(None) n_atoms = len(tar) selstr = 'calpha' if kwargs.get('allcets', True): csets = range(mobile.numCoordsets()) # PY3K: OK else: csets = [mobile.getACSIndex()] LOGGER.info('Alignment is based on {0} atoms matching {1}.' .format(n_atoms, repr(selstr))) printRMSD(tar._getCoords()[which], mob._getCoordsets()[:, which], msg='Before alignment ') for acsi in csets: mob.setACSIndex(acsi) mobile.setACSIndex(acsi) calcTransformation(mob._getCoords()[which], tar._getCoords()[which]).apply(mobile) printRMSD(tar._getCoords()[which], mob._getCoordsets()[:, which], msg='After alignment ') return (mobile,) + match
[docs]def matchChains(atoms1, atoms2, **kwargs): """Returns pairs of chains matched based on sequence similarity. Makes an all-to-all comparison of chains in *atoms1* and *atoms2*. Chains are obtained from hierarchical views (:class:`.HierView`) of atom groups. This function returns a list of matching chains in a tuple that contain 4 items: * matching chain from *atoms1* as a :class:`.AtomMap` instance, * matching chain from *atoms2* as a :class:`.AtomMap` instance, * percent sequence identity of the match, * percent sequence overlap of the match. List of matches are sorted in decreasing percent sequence identity order. :class:`.AtomMap` instances can be used to calculate RMSD values and superpose atom groups. :arg atoms1: atoms that contain a chain :type atoms1: :class:`.Chain`, :class:`.AtomGroup`, :class:`.Selection` :arg atoms2: atoms that contain a chain :type atoms2: :class:`.Chain`, :class:`.AtomGroup`, :class:`.Selection` :keyword subset: one of the following well-defined subsets of atoms: ``"calpha"`` (or ``"ca"``), ``"backbone"`` (or ``"bb"``), ``"heavy"`` (or ``"noh"``), or ``"all"``, default is ``"calpha"`` :type subset: string :keyword seqid: percent sequence identity, default is 90 :type seqid: float :keyword overlap: percent overlap, default is 90 :type overlap: float :keyword pwalign: perform pairwise sequence alignment :type pwalign: bool If *subset* is set to *calpha* or *backbone*, only alpha carbon atoms or backbone atoms will be paired. If set to *all*, all atoms common to matched residues will be returned. This function tries to match chains based on residue numbers and names. All chains in *atoms1* is compared to all chains in *atoms2*. This works well for different structures of the same protein. When it fails, :mod:`Bio.pairwise2` is used for pairwise sequence alignment, and matching is performed based on the sequence alignment. User can control, whether sequence alignment is performed or not with *pwalign* keyword. If ``pwalign=True`` is passed, pairwise alignment is enforced.""" if not isinstance(atoms1, (AtomGroup, Chain, Selection)): raise TypeError('atoms1 must be an AtomGroup, Chain, or Selection') if not isinstance(atoms2, (AtomGroup, Chain, Selection)): raise TypeError('atoms2 must be an AtomGroup, Chain, or Selection') subset = kwargs.get('subset', 'calpha') if subset not in _SUBSETS: raise ValueError('{0} is not a valid subset argument' .format(str(subset))) seqid = kwargs.get('seqid', 90.) assert isinstance(seqid, (float, int)), 'seqid must be float' assert 0 < seqid <= 100, 'seqid must be in the range from 0 to 100' coverage = kwargs.get('overlap') if coverage is None: coverage = kwargs.get('coverage', 90.) assert isinstance(coverage, (float, int)), 'overlap must be float' assert 0 < coverage <= 100, 'overlap must be in the range from 0 to 100' pwalign = kwargs.get('pwalign', None) if isinstance(atoms1, Chain): chains1 = [atoms1] atoms1 = atoms1.getAtomGroup() else: chains1 = list(atoms1.getHierView().iterChains()) if not isinstance(atoms1, AtomGroup): atoms1 = atoms1.getAtomGroup() chains = list() for ch in chains1: simpch = SimpleChain(ch) if len(simpch) > 0: chains.append(simpch) chains1 = chains if not isinstance(atoms1, Chain): LOGGER.debug('Checking {0}: {1} chains are identified' .format(str(atoms1), len(chains1))) if isinstance(atoms2, Chain): chains2 = [atoms2] atoms2 = atoms2.getAtomGroup() else: chains2 = list(atoms2.getHierView().iterChains()) if not isinstance(atoms2, AtomGroup): atoms2 = atoms2.getAtomGroup() chains = list() for ch in chains2: simpch = SimpleChain(ch) if len(simpch) > 0: chains.append(simpch) chains2 = chains if not isinstance(atoms2, Chain): LOGGER.debug('Checking {0}: {1} chains are identified' .format(str(atoms2), len(chains2))) matches = [] unmatched = [] LOGGER.debug('Trying to match chains based on residue numbers and names:') for simpch1 in chains1: for simpch2 in chains2: LOGGER.debug(' Comparing {0} (len={1}) and {2} (len={3}):' .format(simpch1.getTitle(), len(simpch1), simpch2.getTitle(), len(simpch2))) match1, match2, nmatches = getTrivialMatch(simpch1, simpch2) _seqid = nmatches * 100 / min(len(simpch1), len(simpch2)) _cover = len(match2) * 100 / max(len(simpch1), len(simpch2)) if _seqid >= seqid and _cover >= coverage: LOGGER.debug('\tMatch: {0} residues match with {1:.0f}% ' 'sequence identity and {2:.0f}% overlap.' .format(len(match1), _seqid, _cover)) matches.append((match1, match2, _seqid, _cover, simpch1, simpch2)) else: LOGGER.debug('\tFailed to match chains (seqid={0:.0f}%, ' 'overlap={1:.0f}%).'.format(_seqid, _cover)) unmatched.append((simpch1, simpch2)) if pwalign or (not matches and (pwalign is None or pwalign)): if pairwise2: LOGGER.debug('Trying to match chains based on {0} sequence ' 'alignment:'.format(ALIGNMENT_METHOD)) for simpch1, simpch2 in unmatched: LOGGER.debug(' Comparing {0} (len={1}) and {2} ' '(len={3}):' .format(simpch1.getTitle(), len(simpch1), simpch2.getTitle(), len(simpch2))) match1, match2, nmatches = getAlignedMatch(simpch1, simpch2) _seqid = nmatches * 100 / min(len(simpch1), len(simpch2)) _cover = len(match2) * 100 / max(len(simpch1), len(simpch2)) if _seqid >= seqid and _cover >= coverage: LOGGER.debug('\tMatch: {0} residues match with {1:.0f}% ' 'sequence identity and {2:.0f}% overlap.' .format(len(match1), _seqid, _cover)) matches.append((match1, match2, _seqid, _cover, simpch1, simpch2)) else: LOGGER.debug('\tFailed to match chains (seqid={0:.0f}%, ' 'overlap={1:.0f}%).' .format(_seqid, _cover)) else: LOGGER.warning('Pairwise alignment could not be performed.') if not matches: return None subset = _SUBSETS[subset] for mi, result in enumerate(matches): match1, match2, _seqid, _cover, simpch1, simpch2 = result indices1 = [] indices2 = [] for i in range(len(match1)): ares = match1[i] bres = match2[i] if subset == 'ca': try: aid = ares.getNames().tolist().index('CA') except ValueError: aid = None try: bid = bres.getNames().tolist().index('CA') if aid is not None: indices1.append(ares._indices[aid]) indices2.append(bres._indices[bid]) except ValueError: pass elif subset == 'bb': for bban in ('N', 'CA', 'C', 'O'): try: aid = ares.getNames().tolist().index(bban) except ValueError: continue try: bid = bres.getNames().tolist().index(bban) except ValueError: continue else: indices1.append(ares._indices[aid]) indices2.append(bres._indices[bid]) elif subset == 'noh': for han, aid, noh in zip(ares.getNames(), ares._indices, ares.getFlags('noh')): if not noh: continue try: bid = bres.getNames().tolist().index(han) except ValueError: continue else: indices1.append(aid) indices2.append(bres._indices[bid]) elif subset is None or subset == 'all': aans = ares.getNames() bans = bres.getNames().tolist() aids = ares.getIndices() #bids = bres.getIndices() for j in range(len(aans)): try: bid = bres._indices[bans.index(aans[j])] indices1.append(aids[j]) indices2.append(bid) except ValueError: pass indices1 = np.array(indices1, int) indices2 = np.array(indices2, int) match1 = AM(atoms1, indices1, atoms1.getACSIndex(), title=simpch1.getTitle() + ' -> ' + simpch2.getTitle(), intarrays=True) match2 = AM(atoms2, indices2, atoms2.getACSIndex(), title=simpch2.getTitle() + ' -> ' + simpch1.getTitle(), intarrays=True) matches[mi] = (match1, match2, _seqid, _cover) if len(matches) > 1: matches.sort(key=lambda m: m[-2:], reverse=True) return matches
def getTrivialMatch(ach, bch): """Returns lists of matching residues (match based on residue number). """ #if not isinstance(ach, SimpleChain): # raise TypeError('ach must be a SimpleChain instance') #if not isinstance(bch, SimpleChain): # raise TypeError('bch must be a SimpleChain instance') amatch = [] bmatch = [] match = 0.0 for ares in ach: bres = bch[(ares.getResnum(), ares.getIcode())] if bres is not None: if ares.getResname() == bres.getResname(): match += 1 amatch.append(ares.getResidue()) bmatch.append(bres.getResidue()) return amatch, bmatch, match def getAlignedMatch(ach, bch): """Returns list of matching residues (match is based on sequence alignment). """ if ALIGNMENT_METHOD == 'local': alignment = pairwise2.align.localms(ach.getSequence(), bch.getSequence(), MATCH_SCORE, MISMATCH_SCORE, GAP_PENALTY, GAP_EXT_PENALTY, one_alignment_only=1) else: alignment = pairwise2.align.globalms(ach.getSequence(), bch.getSequence(), MATCH_SCORE, MISMATCH_SCORE, GAP_PENALTY, GAP_EXT_PENALTY, one_alignment_only=1) this = alignment[0][0] that = alignment[0][1] amatch = [] bmatch = [] aiter = ach.__iter__() biter = bch.__iter__() match = 0.0 for i in range(len(this)): a = this[i] b = that[i] if a not in GAPCHARS: ares = next(aiter) if b not in GAPCHARS: bres = next(biter) if a not in GAPCHARS: amatch.append(ares.getResidue()) bmatch.append(bres.getResidue()) if a == b: match += 1 return amatch, bmatch, match
[docs]def mapOntoChain(atoms, chain, **kwargs): """Map *atoms* onto *chain*. This function is a wrapper of :func:`.mapChainOntoChain` that manages to map chains onto target *chain*. The function returns a list of mappings. Each mapping is a tuple that contains 4 items: * Mapped chain as an :class:`.AtomMap` instance, * *chain* as an :class:`.AtomMap` instance, * Percent sequence identitity, * Percent sequence overlap Mappings are returned in decreasing percent sequence identity order. :class:`.AtomMap` that keeps mapped atom indices contains dummy atoms in place of unmapped atoms. :arg atoms: atoms that will be mapped to the target *chain* :type atoms: :class:`.Chain`, :class:`.AtomGroup`, :class:`.Selection` :arg chain: chain to which atoms will be mapped :type chain: :class:`.Chain` :keyword subset: one of the following well-defined subsets of atoms: ``"calpha"`` (or ``"ca"``), ``"backbone"`` (or ``"bb"``), ``"heavy"`` (or ``"noh"``), or ``"all"``, default is ``"calpha"`` :type subset: str See :func:`.mapChainOntoChain` for other keyword arguments. This function tries to map *atoms* to *chain* based on residue numbers and types. Each individual chain in *atoms* is compared to target *chain*. .. [IS98] Shindyalov IN, Bourne PE. Protein structure alignment by incremental combinatorial extension (CE) of the optimal path. *Protein engineering* **1998** 11(9):739-47. """ if not isinstance(atoms, (AtomGroup, AtomSubset)): raise TypeError('atoms must be an AtomGroup or a AtomSubset (Chain, ' 'Segment, etc.) instance') if not isinstance(chain, Chain): raise TypeError('chain must be Chain instance') subset = str(kwargs.get('subset', 'all')).lower() if subset not in _SUBSETS: raise ValueError('{0} is not a valid subset argument' .format(str(subset))) if subset != 'all': chid = chain.getChid() segname = chain.getSegname() chain_subset = chain.select(subset) target_chain = chain_subset.getHierView()[segname, chid] mobile = atoms.select(subset) else: target_chain = chain mobile = atoms if isinstance(mobile, Chain): chains = [mobile] else: chains = list(mobile.getHierView().iterChains()) LOGGER.debug('Evaluating {0}: {1} chains are identified' .format(str(atoms), len(chains))) mappings = [] simple_target = SimpleChain(target_chain, False) LOGGER.debug('Trying to map atoms based on residue numbers and ' 'identities:') for chain in chains: simple_chain = SimpleChain(chain, False) mapping = mapChainOntoChain(simple_chain, simple_target, **kwargs) if mapping is not None: mappings.append(mapping) if len(mappings) > 1: mappings.sort(key=lambda m: m[-2]*m[-1], reverse=True) return mappings
[docs]def mapChainOntoChain(mobile, target, **kwargs): """Map *mobile* chain onto *target* chain. This function returns a mapping that contains 4 items: * Mapped chain as an :class:`.AtomMap` instance, * *chain* as an :class:`.AtomMap` instance, * Percent sequence identitity, * Percent sequence overlap Mappings are returned in decreasing percent sequence identity order. :class:`.AtomMap` that keeps mapped atom indices contains dummy atoms in place of unmapped atoms. :arg mobile: mobile that will be mapped to the *target* chain :type mobile: :class:`.Chain` :arg target: chain to which atoms will be mapped :type target: :class:`.Chain` :keyword seqid: percent sequence identity, default is **90**. Note that This parameter is only effective for sequence alignment :type seqid: float :keyword overlap: percent overlap with *target*, default is **70** :type overlap: float :keyword mapping: what method will be used if the trivial mapping based on residue numbers fails. If ``"ce"`` or ``"cealign"``, then the CE algorithm [IS98]_ will be performed. It can also be a list of prealigned sequences, a :class:`.MSA` instance, or a dict of indices such as that derived from a :class:`.DaliRecord`. If set to **True** then the sequence alignment from :mod:`~Bio.pairwise2` will be used. If set to **False**, only the trivial mapping will be performed. Default is **"auto"** :type mapping: list, str, bool :keyword pwalign: if **True**, then pairwise sequence alignment will be performed. If **False** then a simple mapping will be performed based on residue numbers (as well as insertion codes). This will be overridden by the *mapping* keyword's value. :type pwalign: bool .. [IS98] Shindyalov IN, Bourne PE. Protein structure alignment by incremental combinatorial extension (CE) of the optimal path. *Protein engineering* **1998** 11(9):739-47. """ if isinstance(mobile, Chain): simple_mobile = SimpleChain(mobile, False) elif isinstance(mobile, SimpleChain): simple_mobile = mobile mobile = mobile._chain else: raise TypeError('mobile must be a Chain instance') if len(simple_mobile) == 0: LOGGER.debug('\tCannot process {0}, which does not contain any amino ' 'acid residues.'.format(simple_mobile)) return None if isinstance(target, Chain): simple_target = SimpleChain(target, False) elif isinstance(target, SimpleChain): simple_target = target target = target._chain else: raise TypeError('target must be a Chain instance') seqid = kwargs.get('seqid', 90.) coverage = kwargs.get('overlap', 70.) coverage = kwargs.get('coverage', coverage) pwalign = kwargs.get('pwalign', 'auto') pwalign = kwargs.get('mapping', pwalign) alignment = None if isinstance(pwalign, basestring): pwalign = pystr(pwalign).strip().lower() elif not isinstance(pwalign, bool): alignment = pwalign pwalign = True map_ag = mobile.getAtomGroup() if mobile is not None else None target_ag = target.getAtomGroup() if target is not None else None if map_ag is None and target_ag is None: raise ValueError('At least one of mobile and target should be a Chain object ' 'or a SimpleChain object associated with a Chain object.') mapping = None LOGGER.debug('Trying to map atoms based on residue numbers and ' 'identities:') LOGGER.debug(' Comparing {0} (len={1}) with {2}:' .format(simple_mobile.getTitle(), len(simple_mobile), simple_target.getTitle())) # trivial mapping serves as a first simple trial of alignment the two # sequences based on residue number, therefore the sequence identity # (GOOD_SEQID) criterion is strict. target_list, chain_list, n_match, n_mapped = getTrivialMapping( simple_target, simple_mobile) _seqid, _cover = calcScores(n_match, n_mapped, len(simple_target)) trivial_seqid = GOOD_SEQID if pwalign else seqid trivial_cover = GOOD_COVERAGE if pwalign else coverage if _seqid >= trivial_seqid and _cover >= trivial_cover: LOGGER.debug('\tMapped: {0} residues match with {1:.0f}% ' 'sequence identity and {2:.0f}% overlap.' .format(n_mapped, _seqid, _cover)) mapping = (target_list, chain_list, _seqid, _cover) else: if not pwalign: LOGGER.debug('\tFailed to match chains based on residue numbers ' '(seqid={0:.0f}%, overlap={1:.0f}%).' .format(_seqid, _cover)) if pwalign and mapping is None: SEQ_ALIGNMENT = ('seq', ALIGNMENT_METHOD + ' sequence alignment', seqid, coverage) CE_ALIGNMENT = ('ce', 'CEalign', 0., coverage) if not 'seqid' in kwargs: tar_seqid = 0. else: tar_seqid = seqid PREDEF_ALIGNMENT = ('predef', 'predefined alignment', tar_seqid, coverage) if alignment is None: if pwalign in ['ce', 'cealign']: methods = [CE_ALIGNMENT] elif pwalign == 'auto': methods = [SEQ_ALIGNMENT, CE_ALIGNMENT] else: methods = [SEQ_ALIGNMENT] else: methods = [PREDEF_ALIGNMENT] for method, desc, seqid, coverage in methods: LOGGER.debug('Trying to map atoms based on {0}:' .format(desc)) LOGGER.debug(' Comparing {0} (len={1}) with {2}:' .format(simple_mobile.getTitle(), len(simple_mobile), simple_target.getTitle())) if method == 'ce': result = getCEAlignMapping(simple_target, simple_mobile) elif method == 'seq': result = getAlignedMapping(simple_target, simple_mobile) else: if isinstance(alignment, dict): result = getDictMapping(simple_target, simple_mobile, alignment) else: result = getAlignedMapping(simple_target, simple_mobile, alignment) if result is not None: target_list, chain_list, n_match, n_mapped = result _seqid, _cover = calcScores(n_match, n_mapped, max(len(simple_target), len(simple_mobile))) if _seqid >= seqid and _cover >= coverage: LOGGER.debug('\tMapped: {0} residues match with {1:.0f}%' ' sequence identity and {2:.0f}% overlap.' .format(n_mapped, _seqid, _cover)) mapping = (target_list, chain_list, _seqid, _cover) break else: LOGGER.debug('\tFailed to match chains (seqid={0:.0f}%, ' 'overlap={1:.0f}%).' .format(_seqid, _cover)) if mapping is not None: residues_target, residues_chain, _seqid, _cover = mapping indices_target = [] indices_chain = [] indices_mapping = [] indices_dummies = [] counter = 0 for i in range(len(residues_target)): res_tar = residues_target[i] res_chn = residues_chain[i] for atom_tar in res_tar: indices_target.append(atom_tar.getIndex()) if res_chn is not None: atom_chn = res_chn.getAtom(atom_tar.getName()) if atom_chn is not None: indices_chain.append(atom_chn.getIndex()) indices_mapping.append(counter) else: indices_dummies.append(counter) else: indices_dummies.append(counter) counter += 1 ch_tar = next((r for r in residues_target if r is not None)).getChain() ch_chn = next((r for r in residues_chain if r is not None)).getChain() if isinstance(ch_tar, Chain): title_tar = 'Chain {0} from {1}'.format(ch_tar.getChid(), ch_tar.getAtomGroup().getTitle()) else: title_tar = 'SimpleChain {0}'.format(ch_tar.getTitle()) if isinstance(ch_chn, Chain): title_chn = 'Chain {0} from {1}'.format(ch_chn.getChid(), ch_chn.getAtomGroup().getTitle()) else: title_chn = 'SimpleChain {0}'.format(ch_tar.getTitle()) # note that chain here is from atoms if map_ag is not None: atommap = AM(map_ag, indices_chain, mobile.getACSIndex(), mapping=indices_mapping, dummies=indices_dummies, title=title_chn + ' -> ' + title_tar) else: atommap = None if target_ag is not None: selection = AM(target_ag, indices_target, target.getACSIndex(), title=title_tar + ' -> ' + title_chn, intarrays=True) else: selection = None mapping = (atommap, selection, _seqid, _cover) return mapping
[docs]def userDefined(chain1, chain2, correspondence): id1 = chain1.getTitle() id2 = chain2.getTitle() if not isinstance(correspondence, dict): chmap = {} try: chmap[id1] = correspondence[0] chmap[id2] = correspondence[1] except (IndexError, TypeError): raise TypeError('correspondence should be a dict with keys being titles of atoms and ref, ' 'and values are str indicating chID correspondences') corr1 = correspondence[id1] corr2 = correspondence[id2] if len(corr1) != len(corr2): raise ValueError('%s and %s have different number of chain identifiers ' 'in the correspondence'%(id1, id2)) try: i = corr1.index(chain1.getChid()) chid = corr2[i] except: return False return chain2.getChid() == chid
[docs]def sameChid(chain1, chain2): return chain1.getChid() == chain2.getChid()
[docs]def bestMatch(chain1, chain2): return True
[docs]def mapOntoChains(atoms, ref, match_func=bestMatch, **kwargs): """This function is a generalization and wrapper of :func:`.mapOntoChain` that manages to map chains onto chains (instead of a single chain). :arg atoms: atoms to map onto the reference :type atoms: :class:`.Atomic` :arg ref: reference structure for mapping :type ref: :class:`.Atomic` :arg match_func: function determines which chains from ``ref`` and ``atoms`` are matched. Default is to use the best match. :type match_func: func """ if not isinstance(atoms, (SimpleChain, AtomGroup, AtomSubset)): raise TypeError('atoms must be an AtomGroup or a AtomSubset (Chain, ' 'Segment, etc.) instance') if not isinstance(ref, (SimpleChain, AtomGroup, AtomSubset)): raise TypeError('ref must be an AtomGroup or a AtomSubset (Chain, ' 'Segment, etc.) instance') subset = str(kwargs.get('subset', 'all')).lower() if subset not in _SUBSETS: raise ValueError('{0} is not a valid subset argument' .format(str(subset))) if subset != 'all': if not isinstance(ref, SimpleChain): target = ref.select(subset) else: target = ref if not isinstance(atoms, SimpleChain): mobile = atoms.select(subset) else: mobile = atoms else: target = ref mobile = atoms if isinstance(mobile, (SimpleChain, Chain)): chs_atm = [mobile] else: chs_atm = [chain for chain in mobile.getHierView().iterChains()] if isinstance(target, (SimpleChain, Chain)): chs_ref = [target] else: chs_ref = [chain for chain in target.getHierView().iterChains()] # iterate through chains of both target and mobile mappings = np.empty((len(chs_ref), len(chs_atm)), dtype='O') for i, chain in enumerate(chs_ref): simple_chain = chain if isinstance(chain, SimpleChain) else SimpleChain(chain, False) for j, target_chain in enumerate(chs_atm): if not match_func(chain, target_chain): continue simple_target = target_chain if isinstance(target_chain, SimpleChain) else SimpleChain(target_chain, False) mappings[i, j] = mapChainOntoChain(simple_target, simple_chain, **kwargs) return mappings
[docs]def mapOntoChainByAlignment(atoms, chain, **kwargs): """This function is similar to :func:`.mapOntoChain` but correspondence of chains is found by alignment provided. :arg alignments: A list of predefined alignments. It can be also a dictionary or :class:`MSA` instance where the keys or labels are the title of *atoms* or *chains*. :type alignments: list, dict, :class:`MSA` """ alignments = kwargs.pop('alignments', None) if alignments is None: return mapOntoChain(atoms, chain, **kwargs) else: if isinstance(alignments, (MSA, dict)): refseq = str(alignments[chain.getTitle()]) tarseq = str(alignments[atoms.getTitle()]) alignment = [refseq, tarseq] else: index = kwargs.pop('index', 0) alignment = alignments[index] tar_aligned_seq = alignment[-1] for char in GAPCHARS: tar_aligned_seq = tar_aligned_seq.replace(char, '').upper() hv = atoms.getHierView() for target_chain in hv.iterChains(): tar_seq = target_chain.getSequence().upper() if tar_seq == tar_aligned_seq: mappings = mapOntoChain(target_chain, chain, pwalign=alignment, **kwargs) return mappings LOGGER.warn('The sequence of chain does not match that in alignment (%s).'%atoms.getTitle()) return []
def getTrivialMapping(target, chain): """Returns lists of matching residues (map based on residue number).""" target_list = [] chain_list = [] n_match = 0 n_mapped = 0 chain_dict_get = chain._dict.get append = target_list.append for target_residue in target: append(target_residue.getResidue()) chain_residue = chain_dict_get((target_residue.getResnum(), target_residue.getIcode())) if chain_residue is None: chain_list.append(chain_residue) else: if target_residue.getResname() == chain_residue.getResname(): n_match += 1 chain_list.append(chain_residue.getResidue()) n_mapped += 1 return target_list, chain_list, n_match, n_mapped def getDictMapping(target, chain, map_dict): """Returns lists of matching residues (based on *map_dict*).""" pdbid = chain._chain.getTitle()[:4].lower() chid = chain._chain.getChid().upper() key = pdbid + chid mapping = map_dict.get(key) if mapping is None: LOGGER.warn('map_dict does not have the mapping for {0}'.format(key)) return None tar_indices = mapping[0] chn_indices = mapping[1] chain_res_list = [res for res in chain] amatch = [] bmatch = [] n_match = 0 n_mapped = 0 for i, a in enumerate(target): ares = a.getResidue() amatch.append(ares) if i in tar_indices: try: n = index(tar_indices, i) except IndexError: LOGGER.warn('\nthe number of residues in the map_dict ({0} residues) is inconsistent with {2} ({1} residues)' .format(max(tar_indices)+1, len(chain_res_list), target.getTitle())) return None try: b = chain_res_list[chn_indices[n]] except IndexError: LOGGER.warn('\nthe number of residues in the map_dict ({0} residues) is inconsistent with {2} ({1} residues)' .format(max(chn_indices)+1, len(chain_res_list), chain.getTitle())) return None bres = b.getResidue() bmatch.append(bres) if a.getResname() == b.getResname(): n_match += 1 n_mapped += 1 else: bmatch.append(None) return amatch, bmatch, n_match, n_mapped def getAlignedMapping(target, chain, alignment=None): """Returns lists of matching residues (map based on pairwise alignment or predefined alignment).""" if alignment is None: if ALIGNMENT_METHOD == 'local': alignments = pairwise2.align.localms(target.getSequence(), chain.getSequence(), MATCH_SCORE, MISMATCH_SCORE, GAP_PENALTY, GAP_EXT_PENALTY, one_alignment_only=1) else: alignments = pairwise2.align.globalms(target.getSequence(), chain.getSequence(), MATCH_SCORE, MISMATCH_SCORE, GAP_PENALTY, GAP_EXT_PENALTY, one_alignment_only=1) alignment = alignments[0] this, that = alignment[:2] else: def _findAlignment(sequence, alignment): for seq in alignment: strseq = str(seq).upper() for gap in GAPCHARS: strseq = strseq.replace(gap, '') if sequence.upper() == strseq: return str(seq) return None this = _findAlignment(target.getSequence(), alignment) if this is None: LOGGER.warn('alignment does not contain the target ({0}) sequence' .format(target.getTitle())) return None that = _findAlignment(chain.getSequence(), alignment) if that is None: LOGGER.warn('alignment does not contain the chain ({0}) sequence' .format(chain.getTitle())) return None amatch = [] bmatch = [] aiter = target.__iter__() biter = chain.__iter__() n_match = 0 n_mapped = 0 gap_chars = list(GAPCHARS) gap_chars.append(NONE_A) for i in range(len(this)): a = this[i] b = that[i] if a not in gap_chars: ares = next(aiter) amatch.append(ares.getResidue()) if b not in gap_chars: bres = next(biter) bmatch.append(bres.getResidue()) if a == b: n_match += 1 n_mapped += 1 else: bmatch.append(None) elif b not in gap_chars: bres = next(biter) return amatch, bmatch, n_match, n_mapped def getCEAlignMapping(target, chain): try: from .ccealign import ccealign except ImportError: LOGGER.warn('Could not import ccealign C/C++ extension.' 'It may not be installed properly.') return None tar_coords = target.getCoords().tolist() mob_coords = chain.getCoords().tolist() if len(tar_coords) < 8: LOGGER.warn('target ({1}) is too small to be aligned ' 'by CE algorithm (at least {0} residues)' .format(8, repr(target))) return None if len(mob_coords) < 8: LOGGER.warn('chain ({1}) is too small to be aligned ' 'by CE algorithm (at least {0} residues)' .format(8, repr(chain))) return None try: aln_info = ccealign((tar_coords, mob_coords)) except: LOGGER.warn('cealign could not align {0} and {1}'.format(repr(target), repr(chain))) return None paths, bestIdx, nres, rmsd = aln_info[:4] path = paths[bestIdx] tar_indices = [] chn_indices = [] for i, j in path: # if i not in tar_dummies: # if j not in mob_dummies: tar_indices.append(i) chn_indices.append(j) chain_res_list = [res for res in chain] amatch = [] bmatch = [] n_match = 0 n_mapped = 0 for i, a in enumerate(target): ares = a.getResidue() amatch.append(ares) if i in tar_indices: n = tar_indices.index(i) try: b = chain_res_list[chn_indices[n]] except IndexError: bmatch.append(None) continue bres = b.getResidue() bmatch.append(bres) if a.getResname() == b.getResname(): n_match += 1 n_mapped += 1 else: bmatch.append(None) return amatch, bmatch, n_match, n_mapped
[docs]def combineAtomMaps(mappings, target=None, **kwargs): """Builds a grand :class:`.AtomMap` instance based on *mappings* obtained from :func:`.mapOntoChains`. The function also accepts the output :func:`.mapOntoChain` but will trivially return all the :class:`.AtomMap` in *mappings*. *mappings* should be a list or an array of matching chains in a tuple that contain 4 items: * matching chain from *atoms1* as a :class:`.AtomMap` instance, * matching chain from *atoms2* as a :class:`.AtomMap` instance, * percent sequence identity of the match, * percent sequence overlap of the match. :arg mappings: a list or an array of matching chains in a tuple, or just the tuple :type mappings: tuple, list, :class:`~numpy.ndarray` :arg target: reference structure for superposition and checking RMSD :type target: :class:`.Atomic` :arg drmsd: amount deviation of the RMSD with respect to the top ranking atommap. This is to allow multiple matches when *mobile* has more chains than *target*. Default is 3.0 :type drmsd: float :arg rmsd_reject: upper RMSD cutoff that rejects an atommap. Default is 15.0 :type rmsd_reject: float :arg least: the least number of atommaps requested. If **None**, it will be automatically determined by the number of chains present in *target* and *mobile*. Default is **None** :type least: int :arg debug: a container (dict) that saves the following information for debugging purposes: * coverage: original coverage matrix, rows and columns correspond to the reference and the mobile, respectively, * solutions: matched index groups that obtained by modeling the coverage matrix as a linear assignment problem, * rmsd: a list of ranked RMSDs of identified atommaps. :type debug: dict """ BIG_NUMBER = 1e6 drmsd = kwargs.pop('drmsd', 3.) debug = kwargs.pop('debug', {}) reject_rmsd = kwargs.pop('rmsd_reject', 15.) least_n_atommaps = kwargs.pop('least', None) def _build(mappings, nodes=[]): m, n = mappings.shape cov_matrix = np.zeros((m, n), dtype=float) cost_matrix = np.zeros((m, n), dtype=float) for i in range(m): for j in range(n): mapping = mappings[i, j] if mapping is None: cov_matrix[i, j] = 0 cost_matrix[i, j] = BIG_NUMBER else: cov_matrix[i, j] = mapping[3] / 100. cost_matrix[i, j] = 1 - cov_matrix[i, j] # uses LAP to find the optimal mappings of chains atommaps = [] (R, C), crrpds = multilap(cost_matrix, nodes, BIG_NUMBER) for row_ind, col_ind in crrpds: if len(row_ind) != m: continue atommap = None title = '' for r, c in zip(row_ind, col_ind): # if one of the chains failed to match then discard the entire atommap if mappings[r, c] is None: atommap = None break atommap_ = mappings[r, c][0] title_ = '(' + atommap_.getTitle() + ')' if atommap is None: atommap = atommap_ title = title_ else: atommap += atommap_ title = title_ + ' + ' + title if atommap is not None: atommap.setTitle(title) atommaps.append(atommap) return atommaps, cov_matrix, (R, C) def _optimize(atommaps): # extract nonoverlaping mappings if len(atommaps): atommaps, rmsds = rankAtomMaps(atommaps, target) if rmsds is not None: debug['rmsd'] = list(rmsds) # if rmsd_cutoff is not None: # for i in reversed(range(len(atommaps))): # if rmsds[i] > rmsd_cutoff: # atommaps.pop(i) # rmsds.pop(i) # pre-store chain IDs of atommaps atommap_segchids = [] for atommap in atommaps: nodummies = atommap.select('not dummy') chids = nodummies.getChids() segids = nodummies.getSegnames() segchids = [] for segid, chid in zip(segids, chids): if (segid, chid) not in segchids: segchids.append((segid, chid)) atommap_segchids.append(segchids) atommaps_ = [] rmsd_standard = rmsds[0] while len(atommaps): atommap = atommaps.pop(0) rmsd = rmsds.pop(0) segchids = atommap_segchids.pop(0) if reject_rmsd is not None: if rmsd > reject_rmsd: break if rmsd > rmsd_standard + drmsd: break atommaps_.append(atommap) # remove atommaps that share chains with the popped atommap for i in reversed(range(len(atommap_segchids))): amsegchids = atommap_segchids[i] for segchid in amsegchids: if segchid in segchids: atommaps.pop(i) atommap_segchids.pop(i) break atommaps = atommaps_ else: debug['rmsd'] = None return atommaps # checkers if not isListLike(mappings): raise TypeError('mappings should be list-like') if len(mappings) == 0: raise ValueError('mappings cannot be empty') if isinstance(mappings, tuple): am, am_r, s, c = mappings return am mappings = np.atleast_2d(mappings) if mappings.ndim != 2: raise ValueError('mappings can only be either an 1-D or 2-D array') # build atommaps LOGGER.debug('Finding the atommaps based on their coverages...') nodes = [] atommaps, cov_matrix, (R, C) = _build(mappings, nodes) if least_n_atommaps is None: n_mapped = 0 for r, c in zip(R, C): if cov_matrix[r, c] > 0: n_mapped += 1 least_n_atommaps = int(np.floor(float(n_mapped) / mappings.shape[0])) LOGGER.debug('Identified that there exists %d atommap(s) potentially.'%least_n_atommaps) debug['coverage'] = cov_matrix debug['solution'] = [1] # optimize atommaps based on superposition if target is given if target is not None and len(nodes): atommaps = _optimize(atommaps) i = 2 if len(atommaps) < least_n_atommaps: LOGGER.debug('At least %d atommaps requested. ' 'Finding alternative solutions.'%least_n_atommaps) LOGGER.progress('Solving for %d-best solution...', None, label='_atommap_lap') while len(atommaps) < least_n_atommaps: LOGGER.update(i, label='_atommap_lap') try: more_atommaps, _, _ = _build(mappings, nodes) except SolutionDepletionException: break more_atommaps = _optimize(more_atommaps) for j in reversed(range(len(more_atommaps))): if more_atommaps[j] in atommaps: more_atommaps.pop(j) if len(more_atommaps): debug['solution'].append(i) atommaps.extend(more_atommaps) i += 1 LOGGER.finish() LOGGER.report('%d atommaps were found in %%.2fs. %d requested'%(len(atommaps), least_n_atommaps), label='_atommap_lap') if len(atommaps) == 0: if np.count_nonzero(cov_matrix) == 0: LOGGER.warn('no atommaps were available. Consider adjusting accepting criteria') else: LOGGER.warn('no atommaps were found. Consider inceasing rmsd_reject or drmsd') return atommaps
def rankAtomMaps(atommaps, target): """Ranks :class:`.AtomMap` instances from *atommaps* based on its RMSD with *target*. """ rmsds = [] coords0 = target.getCoords() if coords0 is None: rmsds = None else: for atommap in atommaps: weights = atommap.getFlags('mapped') coords = atommap.getCoords() rcoords, t = superpose(coords, coords0, weights) rmsd = calcRMSD(rcoords, coords0, weights) rmsds.append(rmsd) I = np.argsort(rmsds) atommaps = [atommaps[i] for i in I] rmsds = [rmsds[i] for i in I] return atommaps, rmsds
[docs]def alignChains(atoms, target, match_func=bestMatch, **kwargs): """Aligns chains of *atoms* to those of *target* using :func:`.mapOntoChains` and :func:`.combineAtomMaps`. Please check out those two functions for details about the parameters. """ mappings = mapOntoChains(atoms, target, match_func, **kwargs) m, n = mappings.shape if m > n: LOGGER.warn('%s has fewer chains than %s'%(atoms.getTitle(), target.getTitle())) return [] atommaps = combineAtomMaps(mappings, target, **kwargs) return atommaps
if __name__ == '__main__': from prody import * p = parsePDB('1p38') r = parsePDB('1r39') chtwo, chone = mapOntoChain(r, p['A'])[0][:2]