# -*- coding: utf-8 -*-
"""This module defines functions for comparing and mapping polypeptide chains.
"""
from numbers import Integral
import numpy as np
from numpy import arange
from prody.atomic import AtomMap as AM
from prody.atomic import AtomGroup, Chain, AtomSubset, Selection
from prody.atomic import AAMAP
from prody.atomic import flags
from prody.measure import calcTransformation, printRMSD, calcDistance, calcRMSD, superpose
from prody import LOGGER, SELECT, PY2K, PY3K
from prody.sequence import MSA
from prody.utilities import cmp, pystr, isListLike, multilap, SolutionDepletionException, index
from prody.utilities import MATCH_SCORE, MISMATCH_SCORE, GAP_PENALTY, GAP_EXT_PENALTY, ALIGNMENT_METHOD
from Bio import pairwise2
if PY2K:
range = xrange
if PY3K:
basestring = str
__all__ = ['matchChains', 'matchAlign', 'mapChainOntoChain', 'mapOntoChain', 'alignChains',
'mapOntoChains', 'bestMatch', 'sameChid', 'userDefined',
'mapOntoChainByAlignment', 'getMatchScore', 'setMatchScore',
'getMismatchScore', 'setMismatchScore', 'getGapPenalty',
'setGapPenalty', 'getGapExtPenalty', 'setGapExtPenalty',
'getGoodSeqId', 'setGoodSeqId', 'getGoodCoverage', 'combineAtomMaps',
'setGoodCoverage', 'getAlignmentMethod', 'setAlignmentMethod']
GOOD_SEQID = 90.
GOOD_COVERAGE = 90.
GAPCHARS = ['-', '.']
NONE_A = '_'
_a2aaa = {
'A': 'ALA', 'R': 'ARG', 'N': 'ASN', 'D': 'ASP', 'C': 'CYS', 'Q': 'GLN',
'E': 'GLU', 'G': 'GLY', 'H': 'HIS', 'I': 'ILE', 'L': 'LEU', 'K': 'LYS',
'M': 'MET', 'F': 'PHE', 'P': 'PRO', 'S': 'SER', 'T': 'THR', 'W': 'TRP',
'Y': 'TYR', 'V': 'VAL'
}
def calcScores(n_match, n_mapped, n_total):
if n_mapped > 0:
seqid = n_match * 100 / n_mapped
cover = n_mapped * 100 / n_total
else:
seqid = cover = 0
return seqid, cover
[docs]def getGoodSeqId():
"""Returns good sequence identity."""
return GOOD_SEQID
[docs]def setGoodSeqId(seqid):
"""Set good sequence identity."""
if isinstance(seqid, (float, int)) and seqid >= 0:
global GOOD_SEQID
GOOD_SEQID = seqid
else:
raise TypeError('seqid must be a positive number or zero')
[docs]def getGoodCoverage():
"""Returns good sequence coverage."""
return GOOD_COVERAGE
[docs]def setGoodCoverage(coverage):
"""Set good sequence coverage."""
if isinstance(coverage, (float, int)) and coverage >= 0:
global GOOD_COVERAGE
GOOD_COVERAGE = coverage
else:
raise TypeError('coverage must be a positive number or zero')
[docs]def getMatchScore():
"""Returns match score used to align sequences."""
return MATCH_SCORE
[docs]def setMatchScore(match_score):
"""Set match score used to align sequences."""
if isinstance(match_score, (float, int)) and match_score >= 0:
global MATCH_SCORE
MATCH_SCORE = match_score
else:
raise TypeError('match_score must be a positive number or zero')
[docs]def getMismatchScore():
"""Returns mismatch score used to align sequences."""
return MISMATCH_SCORE
[docs]def setMismatchScore(mismatch_score):
"""Set mismatch score used to align sequences."""
if isinstance(mismatch_score, (float, int)) and mismatch_score >= 0:
global MISMATCH_SCORE
MISMATCH_SCORE = mismatch_score
else:
raise TypeError('mismatch_score must be a positive number or zero')
[docs]def getGapPenalty():
"""Returns gap opening penalty used for pairwise alignment."""
return GAP_PENALTY
[docs]def setGapPenalty(gap_penalty):
"""Set gap opening penalty used for pairwise alignment."""
if isinstance(gap_penalty, (float, int)) and gap_penalty <= 0:
global GAP_PENALTY
GAP_PENALTY = gap_penalty
else:
raise TypeError('gap_penalty must be a negative number')
[docs]def getGapExtPenalty():
"""Returns gap extension penalty used for pairwise alignment."""
return GAP_EXT_PENALTY
[docs]def setGapExtPenalty(gap_ext_penalty):
"""Set gap extension penalty used for pairwise alignment."""
if isinstance(gap_ext_penalty, (float, int)) and gap_ext_penalty <= 0:
global GAP_EXT_PENALTY
GAP_EXT_PENALTY = gap_ext_penalty
else:
raise TypeError('gap_ext_penalty must be a negative number or zero')
[docs]def getAlignmentMethod():
"""Returns pairwise alignment method."""
return ALIGNMENT_METHOD
[docs]def setAlignmentMethod(method):
"""Set pairwise alignment method (global or local)."""
if method in ('local', 'global'):
global ALIGNMENT_METHOD
ALIGNMENT_METHOD = method
else:
raise ValueError('method must be "local" or "global"')
class SimpleResidue(object):
__slots__ = ['_chain', '_index', '_res', '_name', '_num', '_inc']
def __init__(self, chain, index, number, name, insertioncode='', residue=None):
self._num = number
self._name = name
self._inc = insertioncode
self._res = residue
self._chain = chain
self._index = index
def __repr__(self):
return '<SimpleResidue: {0}{1}>'.format(self._name, self._num)
def __iter__(self):
res = self.getResidue()
if res is self:
# iterate the residue itself as atoms in it, assuming the
# residue contains a single alpha carbon
for atom in [self]:
yield atom
else:
for atom in res:
yield atom
def getResidue(self):
if self._res is not None:
return self._res
else:
return self
def getChain(self):
return self._chain
def getResnum(self):
return self._num
def getIndex(self):
return self._index
def getIcode(self):
return self._inc
def getResname(self):
return self._name
def getCoords(self):
if self._res:
return self._res._getCoords()
return None
def getName(self):
"A dummy method that returns the atom name of alpha carbon of this residue."
return 'CA'
class SimpleChain(object):
"""An internal class used to compare two polypeptide chains.
SimpleChain instances can be indexed using residue numbers. If a residue
with given number is not found in the chain, **None** is returned."""
__slots__ = ['_list', '_seq', '_title', '_dict', '_gaps', '_chain']
def __init__(self, chain=None, allow_gaps=False):
"""Initialize SimpleChain with a chain id and a sequence (available).
:arg chain: chain instance or single-letter amino acid sequence
:type chain: str, :class:`.Chain`
:arg allow_gaps: allow gaps in the sequence of simple chain instance,
default is False
:type allow_gaps: bool"""
self._dict = dict()
self._list = list()
self._seq = ''
self._title = ''
self._gaps = allow_gaps
self._chain = None
if isinstance(chain, Chain):
self.buildFromChain(chain)
elif isinstance(chain, str):
self.buildFromSequence(chain)
def __len__(self):
return len(self._list)
def __iter__(self):
return self._list.__iter__()
def __repr__(self):
return '<SimpleChain: {0} with {1} residues>'.format(
self._title, len(self._list))
def __str__(self):
return '{0} with {1} residues'.format(self._title, len(self._list))
def __getitem__(self, index):
if isinstance(index, Integral):
return self._dict.get((index, ''))
return self._dict.get(index)
def getSequence(self):
return self._seq
def getTitle(self):
return self._title
def getCoords(self, calpha=False):
if self._chain is None:
return None
if calpha:
return self._chain.ca._getCoords()
else:
return self._chain._getCoords()
def buildFromSequence(self, sequence, resnums=None):
"""Build from amino acid sequence.
"-" or "." are acceptable amino acid types and are treated as gaps.
:arg sequence: sequence of single letter amino acid codes
:type sequence: str
:arg resnums: residue numbers corresponding the sequence
:type resnums: a list of numbers, or a string representation of numbers
Examples of *resnums* are:
* 1:200 250:300"""
assert isinstance(sequence, str), 'sequence must be string'
assert sequence.isalpha(), 'sequence must be all alpha'
if resnums is None:
resnums = arange(1, len(sequence)+1)
resid = 0
gaps = self._gaps
for i, aa in enumerate(sequence):
resid = resnums[i]
if gaps and aa in GAPCHARS:
aa = NONE_A
simpres = None
else:
simpres = SimpleResidue(self, i, resid, aa)
self._list.append(simpres)
self._dict[resid] = simpres
self._seq += aa
self._title = 'built from sequence %s...'%sequence[:5]
def buildFromChain(self, chain):
"""Build from a :class:`.Chain`."""
assert isinstance(chain, Chain), 'chain must be a Chain instance'
self._chain = chain
gaps = self._gaps
residues = list(chain.iterResidues())
temp = residues[0].getResnum()-1
protein_resnames = flags.AMINOACIDS
for i, res in enumerate(chain):
if not res.getResname() in protein_resnames:
continue
resid = res.getResnum()
incod = res.getIcode()
aa = AAMAP.get(res.getResname(), 'X')
simpres = SimpleResidue(self, i, resid, aa, incod, res)
if gaps:
diff = resid - temp - 1
if diff > 0:
self._seq += NONE_A * diff
temp = resid
self._seq += aa
self._list.append(simpres)
self._dict[(resid, incod)] = simpres
self._title = 'Chain {0} from {1}'.format(chain.getChid(),
chain.getAtomGroup()
.getTitle())
def countUnpairedBreaks(chone, chtwo, resnum=True):
"""This function is under development.
Return number of unpaired breaks in aligned chains *chone* and *chtwo*,
which are expected to be :class:`.AtomMap` instances obtained from one of
:func:`.matchChains` or :func:`.mapOntoChain` functions.
Pairwise global or local alignment of chains with missing residues may be
problematic, as in the following illustrations for example. This function
helps identifying some of these problems.
Breaks in a chain are determined using Cα-Cα distances between consecutive
residues, i.e. Cα to Cα distance larger than 4 Å corresponds to a break or
gap of 1 or more residues. This function counts such breaks in *chone* or
*chtwo* that is not paired with a break in the other.
The following example illustrates a problem that may occur when aligning
two structures of the same protein chain where one of the chains have
a few missing residues::
Correct alignment: A.L.R.S - - V.W.Y.K.L -> no unpaired breaks
Target chain : A.L.R.S.V.T.V.W.Y.K.L
Wrong alignment : A.L.R.S_V - - W.Y.K.L
|
--> 1 unpaired break, counted
Key:
- (dash) is a gap in the alignment
. (dot) is a peptide bond
_ (underscore) is a break
In this case, one unpaired break is an indicator of the problem in the
alignment.
The following example illustrates a case where an unpaired break is due to
an insertion in the homologous chain::
Target chain : 1A.2L.3R.4S.5V.6T.7V
Homologous chain : 1A.2L.3K.4S.6V_9S.10L
|
--> 1 unpaired break, not counted
In this case, residue numbers are used to determine whether the unpaired
break is due to an insertion/deletion in the chain or misalignment."""
try:
if chone.numAtoms() != chtwo.numAtoms():
raise ValueError('number of atoms do not match')
except AttributeError:
raise TypeError('one and two must be Atomic instances')
mapped = chone.getFlags('mapped') * chtwo.getFlags('mapped')
if mapped.sum() == 0:
raise ValueError('chains do not have common mapped atoms')
chone = chone[mapped]
chtwo = chtwo[mapped]
rnone = chone.getResnums()
rntwo = chtwo.getResnums()
brone = calcDistance(chone[1:], chone[:-1]) > 4.
brtwo = calcDistance(chtwo[1:], chtwo[:-1]) > 4.
brone[(rnone[1:] - rnone[:-1]) > 1] = False
brtwo[(rntwo[1:] - rntwo[:-1]) > 1] = False
brone = set(brone.nonzero()[0])
brtwo = set(brtwo.nonzero()[0])
return len(brone.union(brtwo)) - len(brone.intersection(brtwo))
_SUBSETS = {
'ca': 'ca',
'calpha': 'ca',
'bb': 'bb',
'backbone': 'bb',
'heavy': 'noh',
'noh': 'noh',
'all': 'all'
}
[docs]def matchAlign(mobile, target, **kwargs):
"""Superpose *mobile* onto *target* based on best matching pair of chains.
This function uses :func:`matchChains` for matching chains and returns a
tuple that contains the following items:
* *mobile* after it is superposed,
* matching chain from *mobile* as a :class:`.AtomMap` instance,
* matching chain from *target* as a :class:`.AtomMap` instance,
* percent sequence identity of the match,
* percent sequence overlap of the match.
:arg mobile: atoms that contain a protein chain
:type mobile: :class:`.Chain`, :class:`.AtomGroup`, :class:`.Selection`
:arg target: atoms that contain a protein chain
:type target: :class:`.Chain`, :class:`.AtomGroup`, :class:`.Selection`
:arg tarsel: *target* atoms that will be used for alignment,
default is ``'calpha'``
:type tarsel: str
:arg allcsets: align all coordinate sets of *mobile*, default is **True**
:type allcsets: bool
:keyword seqid: percent sequence identity, default is 90
:type seqid: float
:keyword overlap: percent overlap, default is 90
:type overlap: float
:keyword pwalign: perform pairwise sequence alignment
:type pwalign: bool"""
selstr = kwargs.pop('tarsel', 'calpha')
if selstr == 'calpha':
selstr = None
subset = 'calpha'
if selstr:
if selstr in _SUBSETS:
subset = selstr
else:
subset = 'all'
sel = target.select(selstr)
if sel is None:
raise ValueError('selection {0} did not match any atoms'
.format(repr(selstr)))
chid = set(sel.getChids())
if len(chid) == 1:
chid = chid.pop()
target = target.select('chain ' + chid)
match = matchChains(mobile, target, subset=subset, **kwargs)
if not match:
return
match = match[0]
mob = match[0]
tar = match[1]
if selstr:
which = SELECT.getIndices(tar, selstr)
n_atoms = len(which)
else:
which = slice(None)
n_atoms = len(tar)
selstr = 'calpha'
if kwargs.get('allcets', True):
csets = range(mobile.numCoordsets()) # PY3K: OK
else:
csets = [mobile.getACSIndex()]
LOGGER.info('Alignment is based on {0} atoms matching {1}.'
.format(n_atoms, repr(selstr)))
printRMSD(tar._getCoords()[which], mob._getCoordsets()[:, which],
msg='Before alignment ')
for acsi in csets:
mob.setACSIndex(acsi)
mobile.setACSIndex(acsi)
calcTransformation(mob._getCoords()[which],
tar._getCoords()[which]).apply(mobile)
printRMSD(tar._getCoords()[which], mob._getCoordsets()[:, which],
msg='After alignment ')
return (mobile,) + match
[docs]def matchChains(atoms1, atoms2, **kwargs):
"""Returns pairs of chains matched based on sequence similarity. Makes an
all-to-all comparison of chains in *atoms1* and *atoms2*. Chains are
obtained from hierarchical views (:class:`.HierView`) of atom groups.
This function returns a list of matching chains in a tuple that contain
4 items:
* matching chain from *atoms1* as a :class:`.AtomMap`
instance,
* matching chain from *atoms2* as a :class:`.AtomMap`
instance,
* percent sequence identity of the match,
* percent sequence overlap of the match.
List of matches are sorted in decreasing percent sequence identity order.
:class:`.AtomMap` instances can be used to calculate RMSD values and
superpose atom groups.
:arg atoms1: atoms that contain a chain
:type atoms1: :class:`.Chain`, :class:`.AtomGroup`, :class:`.Selection`
:arg atoms2: atoms that contain a chain
:type atoms2: :class:`.Chain`, :class:`.AtomGroup`, :class:`.Selection`
:keyword subset: one of the following well-defined subsets of atoms:
``"calpha"`` (or ``"ca"``), ``"backbone"`` (or ``"bb"``),
``"heavy"`` (or ``"noh"``), or ``"all"``, default is ``"calpha"``
:type subset: string
:keyword seqid: percent sequence identity, default is 90
:type seqid: float
:keyword overlap: percent overlap, default is 90
:type overlap: float
:keyword pwalign: perform pairwise sequence alignment
:type pwalign: bool
If *subset* is set to *calpha* or *backbone*, only alpha carbon
atoms or backbone atoms will be paired. If set to *all*, all atoms
common to matched residues will be returned.
This function tries to match chains based on residue numbers and names.
All chains in *atoms1* is compared to all chains in *atoms2*. This works
well for different structures of the same protein. When it fails,
:mod:`Bio.pairwise2` is used for pairwise sequence alignment, and matching
is performed based on the sequence alignment. User can control, whether
sequence alignment is performed or not with *pwalign* keyword. If
``pwalign=True`` is passed, pairwise alignment is enforced."""
if not isinstance(atoms1, (AtomGroup, Chain, Selection)):
raise TypeError('atoms1 must be an AtomGroup, Chain, or Selection')
if not isinstance(atoms2, (AtomGroup, Chain, Selection)):
raise TypeError('atoms2 must be an AtomGroup, Chain, or Selection')
subset = kwargs.get('subset', 'calpha')
if subset not in _SUBSETS:
raise ValueError('{0} is not a valid subset argument'
.format(str(subset)))
seqid = kwargs.get('seqid', 90.)
assert isinstance(seqid, (float, int)), 'seqid must be float'
assert 0 < seqid <= 100, 'seqid must be in the range from 0 to 100'
coverage = kwargs.get('overlap')
if coverage is None:
coverage = kwargs.get('coverage', 90.)
assert isinstance(coverage, (float, int)), 'overlap must be float'
assert 0 < coverage <= 100, 'overlap must be in the range from 0 to 100'
pwalign = kwargs.get('pwalign', None)
if isinstance(atoms1, Chain):
chains1 = [atoms1]
atoms1 = atoms1.getAtomGroup()
else:
chains1 = list(atoms1.getHierView().iterChains())
if not isinstance(atoms1, AtomGroup):
atoms1 = atoms1.getAtomGroup()
chains = list()
for ch in chains1:
simpch = SimpleChain(ch)
if len(simpch) > 0:
chains.append(simpch)
chains1 = chains
if not isinstance(atoms1, Chain):
LOGGER.debug('Checking {0}: {1} chains are identified'
.format(str(atoms1), len(chains1)))
if isinstance(atoms2, Chain):
chains2 = [atoms2]
atoms2 = atoms2.getAtomGroup()
else:
chains2 = list(atoms2.getHierView().iterChains())
if not isinstance(atoms2, AtomGroup):
atoms2 = atoms2.getAtomGroup()
chains = list()
for ch in chains2:
simpch = SimpleChain(ch)
if len(simpch) > 0:
chains.append(simpch)
chains2 = chains
if not isinstance(atoms2, Chain):
LOGGER.debug('Checking {0}: {1} chains are identified'
.format(str(atoms2), len(chains2)))
matches = []
unmatched = []
LOGGER.debug('Trying to match chains based on residue numbers and names:')
for simpch1 in chains1:
for simpch2 in chains2:
LOGGER.debug(' Comparing {0} (len={1}) and {2} (len={3}):'
.format(simpch1.getTitle(), len(simpch1),
simpch2.getTitle(), len(simpch2)))
match1, match2, nmatches = getTrivialMatch(simpch1, simpch2)
_seqid = nmatches * 100 / min(len(simpch1), len(simpch2))
_cover = len(match2) * 100 / max(len(simpch1), len(simpch2))
if _seqid >= seqid and _cover >= coverage:
LOGGER.debug('\tMatch: {0} residues match with {1:.0f}% '
'sequence identity and {2:.0f}% overlap.'
.format(len(match1), _seqid, _cover))
matches.append((match1, match2, _seqid, _cover, simpch1, simpch2))
else:
LOGGER.debug('\tFailed to match chains (seqid={0:.0f}%, '
'overlap={1:.0f}%).'.format(_seqid, _cover))
unmatched.append((simpch1, simpch2))
if pwalign or (not matches and (pwalign is None or pwalign)):
if pairwise2:
LOGGER.debug('Trying to match chains based on {0} sequence '
'alignment:'.format(ALIGNMENT_METHOD))
for simpch1, simpch2 in unmatched:
LOGGER.debug(' Comparing {0} (len={1}) and {2} '
'(len={3}):'
.format(simpch1.getTitle(), len(simpch1),
simpch2.getTitle(), len(simpch2)))
match1, match2, nmatches = getAlignedMatch(simpch1, simpch2)
_seqid = nmatches * 100 / min(len(simpch1), len(simpch2))
_cover = len(match2) * 100 / max(len(simpch1), len(simpch2))
if _seqid >= seqid and _cover >= coverage:
LOGGER.debug('\tMatch: {0} residues match with {1:.0f}% '
'sequence identity and {2:.0f}% overlap.'
.format(len(match1), _seqid, _cover))
matches.append((match1, match2, _seqid, _cover,
simpch1, simpch2))
else:
LOGGER.debug('\tFailed to match chains (seqid={0:.0f}%, '
'overlap={1:.0f}%).'
.format(_seqid, _cover))
else:
LOGGER.warning('Pairwise alignment could not be performed.')
if not matches:
return None
subset = _SUBSETS[subset]
for mi, result in enumerate(matches):
match1, match2, _seqid, _cover, simpch1, simpch2 = result
indices1 = []
indices2 = []
for i in range(len(match1)):
ares = match1[i]
bres = match2[i]
if subset == 'ca':
try:
aid = ares.getNames().tolist().index('CA')
except ValueError:
aid = None
try:
bid = bres.getNames().tolist().index('CA')
if aid is not None:
indices1.append(ares._indices[aid])
indices2.append(bres._indices[bid])
except ValueError:
pass
elif subset == 'bb':
for bban in ('N', 'CA', 'C', 'O'):
try:
aid = ares.getNames().tolist().index(bban)
except ValueError:
continue
try:
bid = bres.getNames().tolist().index(bban)
except ValueError:
continue
else:
indices1.append(ares._indices[aid])
indices2.append(bres._indices[bid])
elif subset == 'noh':
for han, aid, noh in zip(ares.getNames(), ares._indices,
ares.getFlags('noh')):
if not noh:
continue
try:
bid = bres.getNames().tolist().index(han)
except ValueError:
continue
else:
indices1.append(aid)
indices2.append(bres._indices[bid])
elif subset is None or subset == 'all':
aans = ares.getNames()
bans = bres.getNames().tolist()
aids = ares.getIndices()
#bids = bres.getIndices()
for j in range(len(aans)):
try:
bid = bres._indices[bans.index(aans[j])]
indices1.append(aids[j])
indices2.append(bid)
except ValueError:
pass
indices1 = np.array(indices1, int)
indices2 = np.array(indices2, int)
match1 = AM(atoms1, indices1, atoms1.getACSIndex(),
title=simpch1.getTitle() + ' -> ' + simpch2.getTitle(),
intarrays=True)
match2 = AM(atoms2, indices2, atoms2.getACSIndex(),
title=simpch2.getTitle() + ' -> ' + simpch1.getTitle(),
intarrays=True)
matches[mi] = (match1, match2, _seqid, _cover)
if len(matches) > 1:
matches.sort(key=lambda m: m[-2:], reverse=True)
return matches
def getTrivialMatch(ach, bch):
"""Returns lists of matching residues (match based on residue number).
"""
#if not isinstance(ach, SimpleChain):
# raise TypeError('ach must be a SimpleChain instance')
#if not isinstance(bch, SimpleChain):
# raise TypeError('bch must be a SimpleChain instance')
amatch = []
bmatch = []
match = 0.0
for ares in ach:
bres = bch[(ares.getResnum(), ares.getIcode())]
if bres is not None:
if ares.getResname() == bres.getResname():
match += 1
amatch.append(ares.getResidue())
bmatch.append(bres.getResidue())
return amatch, bmatch, match
def getAlignedMatch(ach, bch):
"""Returns list of matching residues (match is based on sequence alignment).
"""
if ALIGNMENT_METHOD == 'local':
alignment = pairwise2.align.localms(ach.getSequence(),
bch.getSequence(),
MATCH_SCORE, MISMATCH_SCORE,
GAP_PENALTY, GAP_EXT_PENALTY,
one_alignment_only=1)
else:
alignment = pairwise2.align.globalms(ach.getSequence(),
bch.getSequence(),
MATCH_SCORE, MISMATCH_SCORE,
GAP_PENALTY, GAP_EXT_PENALTY,
one_alignment_only=1)
this = alignment[0][0]
that = alignment[0][1]
amatch = []
bmatch = []
aiter = ach.__iter__()
biter = bch.__iter__()
match = 0.0
for i in range(len(this)):
a = this[i]
b = that[i]
if a not in GAPCHARS:
ares = next(aiter)
if b not in GAPCHARS:
bres = next(biter)
if a not in GAPCHARS:
amatch.append(ares.getResidue())
bmatch.append(bres.getResidue())
if a == b:
match += 1
return amatch, bmatch, match
[docs]def mapOntoChain(atoms, chain, **kwargs):
"""Map *atoms* onto *chain*. This function is a wrapper of
:func:`.mapChainOntoChain` that manages to map chains onto target *chain*.
The function returns a list of mappings. Each mapping is a tuple that
contains 4 items:
* Mapped chain as an :class:`.AtomMap` instance,
* *chain* as an :class:`.AtomMap` instance,
* Percent sequence identitity,
* Percent sequence overlap
Mappings are returned in decreasing percent sequence identity order.
:class:`.AtomMap` that keeps mapped atom indices contains dummy atoms
in place of unmapped atoms.
:arg atoms: atoms that will be mapped to the target *chain*
:type atoms: :class:`.Chain`, :class:`.AtomGroup`, :class:`.Selection`
:arg chain: chain to which atoms will be mapped
:type chain: :class:`.Chain`
:keyword subset: one of the following well-defined subsets of atoms:
``"calpha"`` (or ``"ca"``), ``"backbone"`` (or ``"bb"``),
``"heavy"`` (or ``"noh"``), or ``"all"``, default is ``"calpha"``
:type subset: str
See :func:`.mapChainOntoChain` for other keyword arguments.
This function tries to map *atoms* to *chain* based on residue
numbers and types. Each individual chain in *atoms* is compared to
target *chain*.
.. [IS98] Shindyalov IN, Bourne PE. Protein structure alignment by
incremental combinatorial extension (CE) of the optimal path.
*Protein engineering* **1998** 11(9):739-47.
"""
if not isinstance(atoms, (AtomGroup, AtomSubset)):
raise TypeError('atoms must be an AtomGroup or a AtomSubset (Chain, '
'Segment, etc.) instance')
if not isinstance(chain, Chain):
raise TypeError('chain must be Chain instance')
subset = str(kwargs.get('subset', 'all')).lower()
if subset not in _SUBSETS:
raise ValueError('{0} is not a valid subset argument'
.format(str(subset)))
if subset != 'all':
chid = chain.getChid()
segname = chain.getSegname()
chain_subset = chain.select(subset)
target_chain = chain_subset.getHierView()[segname, chid]
mobile = atoms.select(subset)
else:
target_chain = chain
mobile = atoms
if isinstance(mobile, Chain):
chains = [mobile]
else:
chains = list(mobile.getHierView().iterChains())
LOGGER.debug('Evaluating {0}: {1} chains are identified'
.format(str(atoms), len(chains)))
mappings = []
simple_target = SimpleChain(target_chain, False)
LOGGER.debug('Trying to map atoms based on residue numbers and '
'identities:')
for chain in chains:
simple_chain = SimpleChain(chain, False)
mapping = mapChainOntoChain(simple_chain, simple_target, **kwargs)
if mapping is not None:
mappings.append(mapping)
if len(mappings) > 1:
mappings.sort(key=lambda m: m[-2]*m[-1], reverse=True)
return mappings
[docs]def mapChainOntoChain(mobile, target, **kwargs):
"""Map *mobile* chain onto *target* chain. This function returns a mapping that
contains 4 items:
* Mapped chain as an :class:`.AtomMap` instance,
* *chain* as an :class:`.AtomMap` instance,
* Percent sequence identitity,
* Percent sequence overlap
Mappings are returned in decreasing percent sequence identity order.
:class:`.AtomMap` that keeps mapped atom indices contains dummy atoms
in place of unmapped atoms.
:arg mobile: mobile that will be mapped to the *target* chain
:type mobile: :class:`.Chain`
:arg target: chain to which atoms will be mapped
:type target: :class:`.Chain`
:keyword seqid: percent sequence identity, default is **90**. Note that This parameter is
only effective for sequence alignment
:type seqid: float
:keyword overlap: percent overlap with *target*, default is **70**
:type overlap: float
:keyword mapping: what method will be used if the trivial mapping based on residue numbers
fails. If ``"ce"`` or ``"cealign"``, then the CE algorithm [IS98]_ will be
performed. It can also be a list of prealigned sequences, a :class:`.MSA` instance,
or a dict of indices such as that derived from a :class:`.DaliRecord`.
If set to **True** then the sequence alignment from :mod:`~Bio.pairwise2`
will be used. If set to **False**, only the trivial mapping will be performed.
Default is **"auto"**
:type mapping: list, str, bool
:keyword pwalign: if **True**, then pairwise sequence alignment will
be performed. If **False** then a simple mapping will be performed
based on residue numbers (as well as insertion codes). This will be
overridden by the *mapping* keyword's value.
:type pwalign: bool
.. [IS98] Shindyalov IN, Bourne PE. Protein structure alignment by
incremental combinatorial extension (CE) of the optimal path.
*Protein engineering* **1998** 11(9):739-47.
"""
if isinstance(mobile, Chain):
simple_mobile = SimpleChain(mobile, False)
elif isinstance(mobile, SimpleChain):
simple_mobile = mobile
mobile = mobile._chain
else:
raise TypeError('mobile must be a Chain instance')
if len(simple_mobile) == 0:
LOGGER.debug('\tCannot process {0}, which does not contain any amino '
'acid residues.'.format(simple_mobile))
return None
if isinstance(target, Chain):
simple_target = SimpleChain(target, False)
elif isinstance(target, SimpleChain):
simple_target = target
target = target._chain
else:
raise TypeError('target must be a Chain instance')
seqid = kwargs.get('seqid', 90.)
coverage = kwargs.get('overlap', 70.)
coverage = kwargs.get('coverage', coverage)
pwalign = kwargs.get('pwalign', 'auto')
pwalign = kwargs.get('mapping', pwalign)
alignment = None
if isinstance(pwalign, basestring):
pwalign = pystr(pwalign).strip().lower()
elif not isinstance(pwalign, bool):
alignment = pwalign
pwalign = True
map_ag = mobile.getAtomGroup() if mobile is not None else None
target_ag = target.getAtomGroup() if target is not None else None
if map_ag is None and target_ag is None:
raise ValueError('At least one of mobile and target should be a Chain object '
'or a SimpleChain object associated with a Chain object.')
mapping = None
LOGGER.debug('Trying to map atoms based on residue numbers and '
'identities:')
LOGGER.debug(' Comparing {0} (len={1}) with {2}:'
.format(simple_mobile.getTitle(), len(simple_mobile),
simple_target.getTitle()))
# trivial mapping serves as a first simple trial of alignment the two
# sequences based on residue number, therefore the sequence identity
# (GOOD_SEQID) criterion is strict.
target_list, chain_list, n_match, n_mapped = getTrivialMapping(
simple_target, simple_mobile)
_seqid, _cover = calcScores(n_match, n_mapped, len(simple_target))
trivial_seqid = GOOD_SEQID if pwalign else seqid
trivial_cover = GOOD_COVERAGE if pwalign else coverage
if _seqid >= trivial_seqid and _cover >= trivial_cover:
LOGGER.debug('\tMapped: {0} residues match with {1:.0f}% '
'sequence identity and {2:.0f}% overlap.'
.format(n_mapped, _seqid, _cover))
mapping = (target_list, chain_list, _seqid, _cover)
else:
if not pwalign:
LOGGER.debug('\tFailed to match chains based on residue numbers '
'(seqid={0:.0f}%, overlap={1:.0f}%).'
.format(_seqid, _cover))
if pwalign and mapping is None:
SEQ_ALIGNMENT = ('seq', ALIGNMENT_METHOD + ' sequence alignment', seqid, coverage)
CE_ALIGNMENT = ('ce', 'CEalign', 0., coverage)
if not 'seqid' in kwargs:
tar_seqid = 0.
else:
tar_seqid = seqid
PREDEF_ALIGNMENT = ('predef', 'predefined alignment', tar_seqid, coverage)
if alignment is None:
if pwalign in ['ce', 'cealign']:
methods = [CE_ALIGNMENT]
elif pwalign == 'auto':
methods = [SEQ_ALIGNMENT,
CE_ALIGNMENT]
else:
methods = [SEQ_ALIGNMENT]
else:
methods = [PREDEF_ALIGNMENT]
for method, desc, seqid, coverage in methods:
LOGGER.debug('Trying to map atoms based on {0}:'
.format(desc))
LOGGER.debug(' Comparing {0} (len={1}) with {2}:'
.format(simple_mobile.getTitle(), len(simple_mobile),
simple_target.getTitle()))
if method == 'ce':
result = getCEAlignMapping(simple_target, simple_mobile)
elif method == 'seq':
result = getAlignedMapping(simple_target, simple_mobile)
else:
if isinstance(alignment, dict):
result = getDictMapping(simple_target, simple_mobile, alignment)
else:
result = getAlignedMapping(simple_target, simple_mobile, alignment)
if result is not None:
target_list, chain_list, n_match, n_mapped = result
_seqid, _cover = calcScores(n_match, n_mapped, max(len(simple_target),
len(simple_mobile)))
if _seqid >= seqid and _cover >= coverage:
LOGGER.debug('\tMapped: {0} residues match with {1:.0f}%'
' sequence identity and {2:.0f}% overlap.'
.format(n_mapped, _seqid, _cover))
mapping = (target_list, chain_list, _seqid, _cover)
break
else:
LOGGER.debug('\tFailed to match chains (seqid={0:.0f}%, '
'overlap={1:.0f}%).'
.format(_seqid, _cover))
if mapping is not None:
residues_target, residues_chain, _seqid, _cover = mapping
indices_target = []
indices_chain = []
indices_mapping = []
indices_dummies = []
counter = 0
for i in range(len(residues_target)):
res_tar = residues_target[i]
res_chn = residues_chain[i]
for atom_tar in res_tar:
indices_target.append(atom_tar.getIndex())
if res_chn is not None:
atom_chn = res_chn.getAtom(atom_tar.getName())
if atom_chn is not None:
indices_chain.append(atom_chn.getIndex())
indices_mapping.append(counter)
else:
indices_dummies.append(counter)
else:
indices_dummies.append(counter)
counter += 1
ch_tar = next((r for r in residues_target if r is not None)).getChain()
ch_chn = next((r for r in residues_chain if r is not None)).getChain()
if isinstance(ch_tar, Chain):
title_tar = 'Chain {0} from {1}'.format(ch_tar.getChid(), ch_tar.getAtomGroup().getTitle())
else:
title_tar = 'SimpleChain {0}'.format(ch_tar.getTitle())
if isinstance(ch_chn, Chain):
title_chn = 'Chain {0} from {1}'.format(ch_chn.getChid(), ch_chn.getAtomGroup().getTitle())
else:
title_chn = 'SimpleChain {0}'.format(ch_tar.getTitle())
# note that chain here is from atoms
if map_ag is not None:
atommap = AM(map_ag, indices_chain, mobile.getACSIndex(),
mapping=indices_mapping, dummies=indices_dummies,
title=title_chn + ' -> ' + title_tar)
else:
atommap = None
if target_ag is not None:
selection = AM(target_ag, indices_target, target.getACSIndex(),
title=title_tar + ' -> ' + title_chn, intarrays=True)
else:
selection = None
mapping = (atommap, selection, _seqid, _cover)
return mapping
[docs]def userDefined(chain1, chain2, correspondence):
id1 = chain1.getTitle()
id2 = chain2.getTitle()
if not isinstance(correspondence, dict):
chmap = {}
try:
chmap[id1] = correspondence[0]
chmap[id2] = correspondence[1]
except (IndexError, TypeError):
raise TypeError('correspondence should be a dict with keys being titles of atoms and ref, '
'and values are str indicating chID correspondences')
corr1 = correspondence[id1]
corr2 = correspondence[id2]
if len(corr1) != len(corr2):
raise ValueError('%s and %s have different number of chain identifiers '
'in the correspondence'%(id1, id2))
try:
i = corr1.index(chain1.getChid())
chid = corr2[i]
except:
return False
return chain2.getChid() == chid
[docs]def sameChid(chain1, chain2):
return chain1.getChid() == chain2.getChid()
[docs]def bestMatch(chain1, chain2):
return True
[docs]def mapOntoChains(atoms, ref, match_func=bestMatch, **kwargs):
"""This function is a generalization and wrapper of :func:`.mapOntoChain` that
manages to map chains onto chains (instead of a single chain).
:arg atoms: atoms to map onto the reference
:type atoms: :class:`.Atomic`
:arg ref: reference structure for mapping
:type ref: :class:`.Atomic`
:arg match_func: function determines which chains from ``ref`` and ``atoms`` are matched.
Default is to use the best match.
:type match_func: func
"""
if not isinstance(atoms, (SimpleChain, AtomGroup, AtomSubset)):
raise TypeError('atoms must be an AtomGroup or a AtomSubset (Chain, '
'Segment, etc.) instance')
if not isinstance(ref, (SimpleChain, AtomGroup, AtomSubset)):
raise TypeError('ref must be an AtomGroup or a AtomSubset (Chain, '
'Segment, etc.) instance')
subset = str(kwargs.get('subset', 'all')).lower()
if subset not in _SUBSETS:
raise ValueError('{0} is not a valid subset argument'
.format(str(subset)))
if subset != 'all':
if not isinstance(ref, SimpleChain):
target = ref.select(subset)
else:
target = ref
if not isinstance(atoms, SimpleChain):
mobile = atoms.select(subset)
else:
mobile = atoms
else:
target = ref
mobile = atoms
if isinstance(mobile, (SimpleChain, Chain)):
chs_atm = [mobile]
else:
chs_atm = [chain for chain in mobile.getHierView().iterChains()]
if isinstance(target, (SimpleChain, Chain)):
chs_ref = [target]
else:
chs_ref = [chain for chain in target.getHierView().iterChains()]
# iterate through chains of both target and mobile
mappings = np.empty((len(chs_ref), len(chs_atm)), dtype='O')
for i, chain in enumerate(chs_ref):
simple_chain = chain if isinstance(chain, SimpleChain) else SimpleChain(chain, False)
for j, target_chain in enumerate(chs_atm):
if not match_func(chain, target_chain):
continue
simple_target = target_chain if isinstance(target_chain, SimpleChain) else SimpleChain(target_chain, False)
mappings[i, j] = mapChainOntoChain(simple_target, simple_chain, **kwargs)
return mappings
[docs]def mapOntoChainByAlignment(atoms, chain, **kwargs):
"""This function is similar to :func:`.mapOntoChain` but correspondence
of chains is found by alignment provided.
:arg alignments: A list of predefined alignments. It can be also a
dictionary or :class:`MSA` instance where the keys or
labels are the title of *atoms* or *chains*.
:type alignments: list, dict, :class:`MSA`
"""
alignments = kwargs.pop('alignments', None)
if alignments is None:
return mapOntoChain(atoms, chain, **kwargs)
else:
if isinstance(alignments, (MSA, dict)):
refseq = str(alignments[chain.getTitle()])
tarseq = str(alignments[atoms.getTitle()])
alignment = [refseq, tarseq]
else:
index = kwargs.pop('index', 0)
alignment = alignments[index]
tar_aligned_seq = alignment[-1]
for char in GAPCHARS:
tar_aligned_seq = tar_aligned_seq.replace(char, '').upper()
hv = atoms.getHierView()
for target_chain in hv.iterChains():
tar_seq = target_chain.getSequence().upper()
if tar_seq == tar_aligned_seq:
mappings = mapOntoChain(target_chain, chain, pwalign=alignment, **kwargs)
return mappings
LOGGER.warn('The sequence of chain does not match that in alignment (%s).'%atoms.getTitle())
return []
def getTrivialMapping(target, chain):
"""Returns lists of matching residues (map based on residue number)."""
target_list = []
chain_list = []
n_match = 0
n_mapped = 0
chain_dict_get = chain._dict.get
append = target_list.append
for target_residue in target:
append(target_residue.getResidue())
chain_residue = chain_dict_get((target_residue.getResnum(),
target_residue.getIcode()))
if chain_residue is None:
chain_list.append(chain_residue)
else:
if target_residue.getResname() == chain_residue.getResname():
n_match += 1
chain_list.append(chain_residue.getResidue())
n_mapped += 1
return target_list, chain_list, n_match, n_mapped
def getDictMapping(target, chain, map_dict):
"""Returns lists of matching residues (based on *map_dict*)."""
pdbid = chain._chain.getTitle()[:4].lower()
chid = chain._chain.getChid().upper()
key = pdbid + chid
mapping = map_dict.get(key)
if mapping is None:
LOGGER.warn('map_dict does not have the mapping for {0}'.format(key))
return None
tar_indices = mapping[0]
chn_indices = mapping[1]
chain_res_list = [res for res in chain]
amatch = []
bmatch = []
n_match = 0
n_mapped = 0
for i, a in enumerate(target):
ares = a.getResidue()
amatch.append(ares)
if i in tar_indices:
try:
n = index(tar_indices, i)
except IndexError:
LOGGER.warn('\nthe number of residues in the map_dict ({0} residues) is inconsistent with {2} ({1} residues)'
.format(max(tar_indices)+1, len(chain_res_list), target.getTitle()))
return None
try:
b = chain_res_list[chn_indices[n]]
except IndexError:
LOGGER.warn('\nthe number of residues in the map_dict ({0} residues) is inconsistent with {2} ({1} residues)'
.format(max(chn_indices)+1, len(chain_res_list), chain.getTitle()))
return None
bres = b.getResidue()
bmatch.append(bres)
if a.getResname() == b.getResname():
n_match += 1
n_mapped += 1
else:
bmatch.append(None)
return amatch, bmatch, n_match, n_mapped
def getAlignedMapping(target, chain, alignment=None):
"""Returns lists of matching residues (map based on pairwise
alignment or predefined alignment)."""
if alignment is None:
if ALIGNMENT_METHOD == 'local':
alignments = pairwise2.align.localms(target.getSequence(),
chain.getSequence(),
MATCH_SCORE, MISMATCH_SCORE,
GAP_PENALTY, GAP_EXT_PENALTY,
one_alignment_only=1)
else:
alignments = pairwise2.align.globalms(target.getSequence(),
chain.getSequence(),
MATCH_SCORE, MISMATCH_SCORE,
GAP_PENALTY, GAP_EXT_PENALTY,
one_alignment_only=1)
alignment = alignments[0]
this, that = alignment[:2]
else:
def _findAlignment(sequence, alignment):
for seq in alignment:
strseq = str(seq).upper()
for gap in GAPCHARS:
strseq = strseq.replace(gap, '')
if sequence.upper() == strseq:
return str(seq)
return None
this = _findAlignment(target.getSequence(), alignment)
if this is None:
LOGGER.warn('alignment does not contain the target ({0}) sequence'
.format(target.getTitle()))
return None
that = _findAlignment(chain.getSequence(), alignment)
if that is None:
LOGGER.warn('alignment does not contain the chain ({0}) sequence'
.format(chain.getTitle()))
return None
amatch = []
bmatch = []
aiter = target.__iter__()
biter = chain.__iter__()
n_match = 0
n_mapped = 0
gap_chars = list(GAPCHARS)
gap_chars.append(NONE_A)
for i in range(len(this)):
a = this[i]
b = that[i]
if a not in gap_chars:
ares = next(aiter)
amatch.append(ares.getResidue())
if b not in gap_chars:
bres = next(biter)
bmatch.append(bres.getResidue())
if a == b:
n_match += 1
n_mapped += 1
else:
bmatch.append(None)
elif b not in gap_chars:
bres = next(biter)
return amatch, bmatch, n_match, n_mapped
def getCEAlignMapping(target, chain):
try:
from .ccealign import ccealign
except ImportError:
LOGGER.warn('Could not import ccealign C/C++ extension.'
'It may not be installed properly.')
return None
tar_coords = target.getCoords().tolist()
mob_coords = chain.getCoords().tolist()
if len(tar_coords) < 8:
LOGGER.warn('target ({1}) is too small to be aligned '
'by CE algorithm (at least {0} residues)'
.format(8, repr(target)))
return None
if len(mob_coords) < 8:
LOGGER.warn('chain ({1}) is too small to be aligned '
'by CE algorithm (at least {0} residues)'
.format(8, repr(chain)))
return None
try:
aln_info = ccealign((tar_coords, mob_coords))
except:
LOGGER.warn('cealign could not align {0} and {1}'.format(repr(target), repr(chain)))
return None
paths, bestIdx, nres, rmsd = aln_info[:4]
path = paths[bestIdx]
tar_indices = []
chn_indices = []
for i, j in path:
# if i not in tar_dummies:
# if j not in mob_dummies:
tar_indices.append(i)
chn_indices.append(j)
chain_res_list = [res for res in chain]
amatch = []
bmatch = []
n_match = 0
n_mapped = 0
for i, a in enumerate(target):
ares = a.getResidue()
amatch.append(ares)
if i in tar_indices:
n = tar_indices.index(i)
try:
b = chain_res_list[chn_indices[n]]
except IndexError:
bmatch.append(None)
continue
bres = b.getResidue()
bmatch.append(bres)
if a.getResname() == b.getResname():
n_match += 1
n_mapped += 1
else:
bmatch.append(None)
return amatch, bmatch, n_match, n_mapped
[docs]def combineAtomMaps(mappings, target=None, **kwargs):
"""Builds a grand :class:`.AtomMap` instance based on *mappings* obtained from
:func:`.mapOntoChains`. The function also accepts the output :func:`.mapOntoChain`
but will trivially return all the :class:`.AtomMap` in *mappings*.
*mappings* should be a list or an array of matching chains in a tuple that contain
4 items:
* matching chain from *atoms1* as a :class:`.AtomMap` instance,
* matching chain from *atoms2* as a :class:`.AtomMap` instance,
* percent sequence identity of the match,
* percent sequence overlap of the match.
:arg mappings: a list or an array of matching chains in a tuple, or just the tuple
:type mappings: tuple, list, :class:`~numpy.ndarray`
:arg target: reference structure for superposition and checking RMSD
:type target: :class:`.Atomic`
:arg drmsd: amount deviation of the RMSD with respect to the top ranking atommap.
This is to allow multiple matches when *mobile* has more chains than *target*.
Default is 3.0
:type drmsd: float
:arg rmsd_reject: upper RMSD cutoff that rejects an atommap. Default is 15.0
:type rmsd_reject: float
:arg least: the least number of atommaps requested. If **None**, it will be automatically
determined by the number of chains present in *target* and *mobile*.
Default is **None**
:type least: int
:arg debug: a container (dict) that saves the following information for debugging purposes:
* coverage: original coverage matrix, rows and columns correspond to the reference and the
mobile, respectively,
* solutions: matched index groups that obtained by modeling the coverage matrix as a linear
assignment problem,
* rmsd: a list of ranked RMSDs of identified atommaps.
:type debug: dict
"""
BIG_NUMBER = 1e6
drmsd = kwargs.pop('drmsd', 3.)
debug = kwargs.pop('debug', {})
reject_rmsd = kwargs.pop('rmsd_reject', 15.)
least_n_atommaps = kwargs.pop('least', None)
def _build(mappings, nodes=[]):
m, n = mappings.shape
cov_matrix = np.zeros((m, n), dtype=float)
cost_matrix = np.zeros((m, n), dtype=float)
for i in range(m):
for j in range(n):
mapping = mappings[i, j]
if mapping is None:
cov_matrix[i, j] = 0
cost_matrix[i, j] = BIG_NUMBER
else:
cov_matrix[i, j] = mapping[3] / 100.
cost_matrix[i, j] = 1 - cov_matrix[i, j]
# uses LAP to find the optimal mappings of chains
atommaps = []
(R, C), crrpds = multilap(cost_matrix, nodes, BIG_NUMBER)
for row_ind, col_ind in crrpds:
if len(row_ind) != m:
continue
atommap = None
title = ''
for r, c in zip(row_ind, col_ind):
# if one of the chains failed to match then discard the entire atommap
if mappings[r, c] is None:
atommap = None
break
atommap_ = mappings[r, c][0]
title_ = '(' + atommap_.getTitle() + ')'
if atommap is None:
atommap = atommap_
title = title_
else:
atommap += atommap_
title = title_ + ' + ' + title
if atommap is not None:
atommap.setTitle(title)
atommaps.append(atommap)
return atommaps, cov_matrix, (R, C)
def _optimize(atommaps):
# extract nonoverlaping mappings
if len(atommaps):
atommaps, rmsds = rankAtomMaps(atommaps, target)
if rmsds is not None:
debug['rmsd'] = list(rmsds)
# if rmsd_cutoff is not None:
# for i in reversed(range(len(atommaps))):
# if rmsds[i] > rmsd_cutoff:
# atommaps.pop(i)
# rmsds.pop(i)
# pre-store chain IDs of atommaps
atommap_segchids = []
for atommap in atommaps:
nodummies = atommap.select('not dummy')
chids = nodummies.getChids()
segids = nodummies.getSegnames()
segchids = []
for segid, chid in zip(segids, chids):
if (segid, chid) not in segchids:
segchids.append((segid, chid))
atommap_segchids.append(segchids)
atommaps_ = []
rmsd_standard = rmsds[0]
while len(atommaps):
atommap = atommaps.pop(0)
rmsd = rmsds.pop(0)
segchids = atommap_segchids.pop(0)
if reject_rmsd is not None:
if rmsd > reject_rmsd:
break
if rmsd > rmsd_standard + drmsd:
break
atommaps_.append(atommap)
# remove atommaps that share chains with the popped atommap
for i in reversed(range(len(atommap_segchids))):
amsegchids = atommap_segchids[i]
for segchid in amsegchids:
if segchid in segchids:
atommaps.pop(i)
atommap_segchids.pop(i)
break
atommaps = atommaps_
else:
debug['rmsd'] = None
return atommaps
# checkers
if not isListLike(mappings):
raise TypeError('mappings should be list-like')
if len(mappings) == 0:
raise ValueError('mappings cannot be empty')
if isinstance(mappings, tuple):
am, am_r, s, c = mappings
return am
mappings = np.atleast_2d(mappings)
if mappings.ndim != 2:
raise ValueError('mappings can only be either an 1-D or 2-D array')
# build atommaps
LOGGER.debug('Finding the atommaps based on their coverages...')
nodes = []
atommaps, cov_matrix, (R, C) = _build(mappings, nodes)
if least_n_atommaps is None:
n_mapped = 0
for r, c in zip(R, C):
if cov_matrix[r, c] > 0:
n_mapped += 1
least_n_atommaps = int(np.floor(float(n_mapped) / mappings.shape[0]))
LOGGER.debug('Identified that there exists %d atommap(s) potentially.'%least_n_atommaps)
debug['coverage'] = cov_matrix
debug['solution'] = [1]
# optimize atommaps based on superposition if target is given
if target is not None and len(nodes):
atommaps = _optimize(atommaps)
i = 2
if len(atommaps) < least_n_atommaps:
LOGGER.debug('At least %d atommaps requested. '
'Finding alternative solutions.'%least_n_atommaps)
LOGGER.progress('Solving for %d-best solution...', None, label='_atommap_lap')
while len(atommaps) < least_n_atommaps:
LOGGER.update(i, label='_atommap_lap')
try:
more_atommaps, _, _ = _build(mappings, nodes)
except SolutionDepletionException:
break
more_atommaps = _optimize(more_atommaps)
for j in reversed(range(len(more_atommaps))):
if more_atommaps[j] in atommaps:
more_atommaps.pop(j)
if len(more_atommaps):
debug['solution'].append(i)
atommaps.extend(more_atommaps)
i += 1
LOGGER.finish()
LOGGER.report('%d atommaps were found in %%.2fs. %d requested'%(len(atommaps), least_n_atommaps),
label='_atommap_lap')
if len(atommaps) == 0:
if np.count_nonzero(cov_matrix) == 0:
LOGGER.warn('no atommaps were available. Consider adjusting accepting criteria')
else:
LOGGER.warn('no atommaps were found. Consider inceasing rmsd_reject or drmsd')
return atommaps
def rankAtomMaps(atommaps, target):
"""Ranks :class:`.AtomMap` instances from *atommaps* based on its RMSD
with *target*.
"""
rmsds = []
coords0 = target.getCoords()
if coords0 is None:
rmsds = None
else:
for atommap in atommaps:
weights = atommap.getFlags('mapped')
coords = atommap.getCoords()
rcoords, t = superpose(coords, coords0, weights)
rmsd = calcRMSD(rcoords, coords0, weights)
rmsds.append(rmsd)
I = np.argsort(rmsds)
atommaps = [atommaps[i] for i in I]
rmsds = [rmsds[i] for i in I]
return atommaps, rmsds
[docs]def alignChains(atoms, target, match_func=bestMatch, **kwargs):
"""Aligns chains of *atoms* to those of *target* using :func:`.mapOntoChains`
and :func:`.combineAtomMaps`. Please check out those two functions for details
about the parameters.
"""
mappings = mapOntoChains(atoms, target, match_func, **kwargs)
m, n = mappings.shape
if m > n:
LOGGER.warn('%s has fewer chains than %s'%(atoms.getTitle(), target.getTitle()))
return []
atommaps = combineAtomMaps(mappings, target, **kwargs)
return atommaps
if __name__ == '__main__':
from prody import *
p = parsePDB('1p38')
r = parsePDB('1r39')
chtwo, chone = mapOntoChain(r, p['A'])[0][:2]