from prody import *
from pylab import *
%matplotlib inline
confProDy(auto_show=False)

@> ProDy is configured: auto_show=False


pathPDBFolder('./pdbs/')

@> Local PDB folder is set: '/Users/bentley/Dropbox/Pitt/Bahar/MMBioS/2021/notebooks/pdbs'
@> A plain folder structure will be assumed.


filename = fetchPfamMSA('PF00074')
filename

@> Pfam MSA for PF00074 is written as PF00074_full.sth.

'PF00074_full.sth'


msa = parseMSA(filename)
msa

@> 1494 sequence(s) with 348 residues were parsed in 0.01s.

<MSA: PF00074_full (1494 sequences, 348 residues)>


msa[:10,:10]

<MSA: PF00074_full (10 sequences, 10 residues)>


seq0 = msa[0]
seq0

<Sequence: G1ST62_RABIT (PF00074_full[0]; length 348; 119 residues and 229 gaps)>


str(seq0)

'..................................TKARWFEIQHIQP.NL.L.Q.---....--C...NR.AM..RG.V.NN......YT.........Q........HC..KP..FNTFL.H.D.........S.F......QD...V............AAV.....C...DF........P.N...V.TC....R........NG..RHNC....HQS....PK..PINMTNCRLT......-AGK..YP....D..CS..Y..S.D.A..T........Q.Y..K.F..FIV..A.CDpp.qkSDPP..YHLVPVHLD..........................'


msa_refined = refineMSA(msa, label='RNAS1_BOVIN', rowocc=0.8, seqid=0.98)
msa_refined

@> Label refinement reduced number of columns from 348 to 119 in 0.00s.
@> Row occupancy refinement reduced number of rows from 1494 to 1314 in 0.00s.
@> Sequence identity refinement reduced number of rows from 1314 to 1054 in 0.28s.

<MSA: PF00074_full refined (label=RNAS1_BOVIN, rowocc>=0.8, seqid>=0.98) (1054 sequences, 119 residues)>


entropy = calcShannonEntropy(msa_refined)


showShannonEntropy(msa_refined);

@> Label L5K5X3_PTEAL start-end entry matches length of ungapped sequence. Setting resnums 31 to 149


ag = parsePDB('2W5I', chain='B')
ag

@> Connecting wwPDB FTP server RCSB PDB (USA).
@> 2w5i downloaded (2w5i.pdb.gz)
@> PDB download via FTP completed (1 downloaded, 0 failed).
@> 973 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> Secondary structures were assigned to 75 residues.

<AtomGroup: 2W5IB (973 atoms)>


aln, idx_1, idx_2 = alignSequenceToMSA(ag.ca, msa_refined, label='RNAS1_BOVIN')
showAlignment(aln, indices=[idx_1, idx_2])

               	                  20        30        40        50        60
2W5IB          	KETAAAKFERQHMDSSTSAASSSNYCNQMMKSRNLTKDRCKPVNTFVHESLADVQAVCSQ

               	                  18        28        38        48        58
RNAS1_BOVIN    	--TAAAKFERQHMDSSTSAASSSNYCNQMMKSRNLTKDRCKPVNTFVHESLADVQAVCSQ


               	        70        80        90       100       110       120
2W5IB          	KNVACKNGQTNCYQSYSTMSITDCRETGSSKYPNCAYKTTQANKHIIVACEGNPYVPVHF

               	        68        78        88        98       108       118
RNAS1_BOVIN    	KNVACKNGQTNCYQSYSTMSITDCRETGSSKYPNCAYKTTQANKHIIVACEGNPYVPVHF


2W5IB          	DASV

               	                                                            
RNAS1_BOVIN    	D---


print(ag.ca.getResnums())

[  1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108
 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124]


chB = ag.select('resid 3 to 121')
chB

<Selection: 'resid 3 to 121' from 2W5IB (879 atoms)>


print(msa_refined['RNAS1_BOVIN'])
print(chB.ca.getSequence())

tAAAKFERQHMDSSTSAASSsNYCNQMMKSRNLTKDRCKPVNTFVHESLADVQAVCSQKNVACKNGQTNCYQSYSTMSITDCRETGSSKYPNCAYKTTQANKHIIVACEGNPYVPVHFD
TAAAKFERQHMDSSTSAASSSNYCNQMMKSRNLTKDRCKPVNTFVHESLADVQAVCSQKNVACKNGQTNCYQSYSTMSITDCRETGSSKYPNCAYKTTQANKHIIVACEGNPYVPVHFD


gnm = GNM('2W5I')
gnm.buildKirchhoff(chB.ca)
gnm.calcModes(n_modes=None)  # calculate all modes

@> Kirchhoff was built in 0.01s.
@> 118 modes were calculated in 0.00s.


mobility = calcSqFlucts(gnm)


figure(figsize=(13,6))

# plot entropy as grey bars
bar(chB.ca.getResnums(), entropy, width=1.2, color='grey', label='entropy');

# rescale mobility
mobility = mobility*(max(entropy)/max(mobility))

# plot mobility as a blue line
showAtomicLines(mobility, atoms=chB.ca, color='b', linewidth=2, label='mobility');

legend()

<matplotlib.legend.Legend at 0x7f42fc6c1940>


mutinfo = buildMutinfoMatrix(msa_refined)

@> Mutual information matrix was calculated in 0.04s.


showMutinfoMatrix(msa_refined, cmap='inferno');
title(None);

@> Mutual information matrix was calculated in 0.04s.
@> Label L5K5X3_PTEAL start-end entry matches length of ungapped sequence. Setting resnums 31 to 149


mi_apc = applyMutinfoCorr(mutinfo)


showMatrix(mi_apc, cmap='inferno');


showMatrix(mi_apc, cmap='inferno', norm=Normalize(0, 0.5));


di = buildDirectInfoMatrix(msa_refined)

@> DI matrix was calculated in 0.73s.


showDirectInfoMatrix(msa_refined, cmap='inferno');
title(None);

@> DI matrix was calculated in 0.70s.
@> Label L5K5X3_PTEAL start-end entry matches length of ungapped sequence. Setting resnums 31 to 149


showContactMap(gnm, origin='lower', cmap='Greys');


di_rank_row, di_rank_col, di_zscore_sort = calcRankorder(di, zscore=True)
print('row:   ', di_rank_row[:5])
print('column:', di_rank_col[:5])

@> Zscore normalization has been applied.
@> Matrix is symmetric, only lower triangle indices will be returned.

row:    [ 79  92  64  69 110]
column: [ 45  37  63  62 109]


mi_rank_row, mi_rank_col, mi_zscore_sort = calcRankorder(mi_apc, zscore=True)
print('row:   ', mi_rank_row[:5])
print('column:', mi_rank_col[:5])

@> Zscore normalization has been applied.
@> Matrix is symmetric, only lower triangle indices will be returned.

row:    [ 79 115  69 111  92]
column: [ 45 114  62 110  37]


import time


dali_rec = searchDali('3H5V','A')
dali_rec

@> Submitted Dali search for PDB "3H5VA".
@> http://ekhidna2.biocenter.helsinki.fi/barcosel/tmp//3H5VA/
@> Dali results were fetched in 0.3s.   
@> Obtained 3692 PDB chains from Dali for 3H5VA.

<prody.database.dali.DaliRecord at 0x7f819fd2bac0>


while not dali_rec.isSuccess:
    dali_rec.fetch()
    time.sleep(120)
    
dali_rec

<prody.database.dali.DaliRecord at 0x7f819fd2bac0>


pdb_ids = dali_rec.filter(cutoff_len=0.7, cutoff_rmsd=1.0, cutoff_Z=30)

@> 3526 PDBs have been filtered out from 3692 Dali hits (remaining: 166).


mappings = dali_rec.getMappings()


ags = parsePDB(pdb_ids, subset='ca')
len(ags)

@> 166 PDBs were parsed in 22.18s.

166


dali_ens = buildPDBEnsemble(ags, mapping=mappings, seqid=20, labels=pdb_ids)
dali_ens

@> Mapping 5l1fB_ca to the reference... [  0%]@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 4u5cB_ca to the reference... [  1%]@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 5l1eB_ca to the reference... [  1%]@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 5l1hB_ca to the reference... [  2%]@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 5l1gB_ca to the reference... [  3%] 22s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 4u1wD_ca to the reference... [  4%] 22s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6ruqD_ca to the reference... [  4%] 22s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6xsrB_ca to the reference... [  5%] 21s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 5welD_ca to the reference... [  6%] 20s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 5welB_ca to the reference... [  6%] 20s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 5welC_ca to the reference... [  7%] 20s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 5welA_ca to the reference... [  7%] 22s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 5wenB_ca to the reference... [  9%] 20s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 5wenC_ca to the reference... [ 10%] 19s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 5wenD_ca to the reference... [ 10%] 19s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 5wenA_ca to the reference... [ 11%] 19s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 4u2pA_ca to the reference... [ 14%] 19s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 4u1xA_ca to the reference... [ 15%] 19s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 4u1yA_ca to the reference... [ 19%] 16s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6dlzD_ca to the reference... [ 22%] 15s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6dlzC_ca to the reference... [ 22%] 15s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6o9gA_ca to the reference... [ 23%] 15s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6dm2A_ca to the reference... [ 24%] 14s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6dm2C_ca to the reference... [ 24%] 14s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6dm0B_ca to the reference... [ 25%] 14s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6o9gD_ca to the reference... [ 25%] 14s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6o9gC_ca to the reference... [ 26%] 14s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6dm1A_ca to the reference... [ 27%] 14s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6dm0C_ca to the reference... [ 27%] 14s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6dm0D_ca to the reference... [ 28%] 13s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 4u1xD_ca to the reference... [ 28%] 13s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6dlzA_ca to the reference... [ 32%] 13s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 4u1xB_ca to the reference... [ 37%] 12s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6njmA_ca to the reference... [ 38%] 12s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6njnC_ca to the reference... [ 39%] 12s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6njmC_ca to the reference... [ 39%] 12s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 7ocaA_ca to the reference... [ 43%] 11s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 7lddC_ca to the reference... [ 45%] 11s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 7lddA_ca to the reference... [ 46%] 11s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6dm1B_ca to the reference... [ 47%] 11s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6dm1D_ca to the reference... [ 48%] 11s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 7ldeC_ca to the reference... [ 48%] 10s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6njlC_ca to the reference... [ 50%] 10s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6njlA_ca to the reference... [ 50%] 10s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 7ldeA_ca to the reference... [ 51%] 10s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6njnA_ca to the reference... [ 51%] 10s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6dlzB_ca to the reference... [ 52%] 10s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 7ocaC_ca to the reference... [ 53%] 9s @> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6dm2D_ca to the reference... [ 53%] 9s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 5weoC_ca to the reference... [ 54%] 9s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 7ks0B_ca to the reference... [ 54%] 9s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 7ks0D_ca to the reference... [ 55%] 9s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 7ks3B_ca to the reference... [ 56%] 9s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 7ks3D_ca to the reference... [ 56%] 9s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 4uqqD_ca to the reference... [ 59%] 8s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 4uqqB_ca to the reference... [ 59%] 8s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 5weoA_ca to the reference... [ 66%] 6s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 5ideD_ca to the reference... [ 67%] 6s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 5kufC_ca to the reference... [ 68%] 6s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 5kufA_ca to the reference... [ 69%] 6s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6qkzC_ca to the reference... [ 69%] 6s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 7ks3C_ca to the reference... [ 70%] 6s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 7ks0C_ca to the reference... [ 71%] 6s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 7ks0A_ca to the reference... [ 72%] 6s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 7ks3A_ca to the reference... [ 73%] 5s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6qkzA_ca to the reference... [ 75%] 5s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 5ideB_ca to the reference... [ 76%] 5s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6jfyD_ca to the reference... [ 80%] 4s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6dm1C_ca to the reference... [ 81%] 4s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 5idfB_ca to the reference... [ 83%] 3s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 5idfD_ca to the reference... [ 83%] 3s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6jfyA_ca to the reference... [ 84%] 3s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 5weoB_ca to the reference... [ 84%] 3s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 5weoD_ca to the reference... [ 85%] 3s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6dm0A_ca to the reference... [ 86%] 3s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6l6fA_ca to the reference... [ 87%] 3s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6l6fC_ca to the reference... [ 88%] 2s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 5kuhD_ca to the reference... [ 89%] 2s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6dm2B_ca to the reference... [ 89%] 2s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6o9gB_ca to the reference... [ 90%] 2s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 5l2eA_ca to the reference... [ 90%] 2s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6lu9A_ca to the reference... [ 91%] 2s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6l6fB_ca to the reference... [ 92%] 2s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 5l2eB_ca to the reference... [ 93%] 2s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6kzmA_ca to the reference... [ 93%] 2s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6jfzD_ca to the reference... [ 94%] 1s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6kzmB_ca to the reference... [ 95%] 1s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 5kc9A_ca to the reference... [ 95%] 1s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6jfzA_ca to the reference... [ 96%] 1s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6kzmC_ca to the reference... [ 96%] 1s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 3td9A_ca to the reference... [ 97%] 1s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6jfzB_ca to the reference... [ 98%] 1s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 6lu9D_ca to the reference... [ 98%] 1s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Mapping 5kc9B_ca to the reference... [ 99%] 1s@> WARNING no atommaps were available. Consider adjusting accepting criteria
@> Starting iterative superposition:             
@> Step #1: RMSD difference = 1.2288e+00
@> Step #2: RMSD difference = 1.0616e-02
@> Step #3: RMSD difference = 2.5476e-04
@> Step #4: RMSD difference = 7.5375e-06
@> Iterative superposition completed in 0.15s.
@> Final superposition to calculate transformations.
@> Superposition completed in 0.03 seconds.
@> Ensemble (72 conformations) were built in 16.62s.
@> WARNING 94 structures cannot be mapped.

<PDBEnsemble: Unknown (72 conformations; 376 atoms)>


saveEnsemble(dali_ens, 'PBP-I')

'PBP-I.ens.npz'


dali_ens = loadEnsemble('PBP-I.ens.npz')


gnms = calcEnsembleENMs(dali_ens, model='GNM', trim='reduce')
gnms

@> 20 GNM modes were calculated for each of the 72 conformations in 6.14s.
@> 20 modes across 72 modesets were matched in 0.51s.

<ModeEnsemble: 72 modesets (20 modes, 376 atoms)>


saveModeEnsemble(gnms, 'PBP-I')

/Users/bentley/opt/anaconda3/envs/workshop21/lib/python3.8/site-packages/numpy/core/_asarray.py:171: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.
  return array(a, dtype, copy=False, order=order, subok=True)

'PBP-I.modeens.npz'


gnms = loadModeEnsemble('PBP-I.modeens.npz')


gnms[0]

<ModeSet: 20 modes from MaskedGNM 3h5vA reduced>


gnms[:,0]

<ModeEnsemble: 72 modesets (1 mode, 376 atoms)>


gnms[5:10,2:4]

<ModeEnsemble: 5 modesets (2 modes, 376 atoms)>


gnms[5,2]

<Mode: 3 from MaskedGNM 2wjxB reduced>


showSignatureMode(gnms[:, 0]);


showSignatureSqFlucts(gnms[:, :5]);


showSignatureCrossCorr(gnms[:, :20]);


highlights = {'3h5vA': 'GluA2','3o21C': 'GluA3',
              '3h6gA': 'GluK2', '3olzA': 'GluK3', 
              '5kc8A': 'GluD2'}


gs = GridSpec(ncols=1, nrows=2, height_ratios=[1, 10], hspace=0.15)

subplot(gs[0]);
showVarianceBar(gnms[:, :5], fraction=True, highlights=highlights);
xlabel('');

subplot(gs[1]);
showSignatureVariances(gnms[:, :5], fraction=True, bins=80, alpha=0.7);
xlabel('Fraction of inverse eigenvalue');


eigvals = gnms.getEigvals()
eigvals

sdarray([[0.40332629, 1.09698756, 1.4466639 , ..., 5.30924955,
          5.5792069 , 5.77873354],
         [0.41578566, 1.12906523, 1.4632766 , ..., 5.77565607,
          5.03060667, 5.65945268],
         [0.41366158, 1.07931586, 1.40302256, ..., 5.60784667,
          4.76928731, 5.0126084 ],
         ...,
         [0.39875231, 1.14861099, 1.58945168, ..., 5.73761851,
          5.68069539, 4.8536725 ],
         [0.39875231, 1.14861099, 1.58945168, ..., 5.73761851,
          5.68069539, 4.8536725 ],
         [0.2452986 , 1.20065343, 1.38619425, ..., 5.01755708,
          4.66115699, 4.26878977]])


eigvecs = gnms.getEigvecs()
eigvecs

sdarray([[[ 6.32823556e-02, -2.50192899e-02,  2.24757152e-02, ...,
           -2.75806462e-02, -2.91342858e-02,  2.35751313e-02],
          [ 6.14399172e-02, -2.55671762e-02,  1.82215962e-02, ...,
            2.37048870e-02,  1.30948891e-02,  8.00770496e-03],
          [ 6.03893156e-02, -2.75289674e-02,  1.47023932e-02, ...,
            6.27873120e-02,  4.50426791e-02,  7.48137751e-04],
          ...,
          [-3.45674867e-02,  5.36219952e-02, -8.18341574e-02, ...,
            3.73431044e-03, -1.56226528e-02,  1.48147066e-02],
          [-3.58459460e-02,  5.83491556e-02, -8.68632869e-02, ...,
            2.19044923e-04, -9.12061694e-03,  1.77561547e-02],
          [-3.86893737e-02,  6.78185627e-02, -1.09283894e-01, ...,
           -1.24293912e-01,  1.19842480e-01, -9.65913188e-02]],

         [[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
            0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
          [ 6.26195354e-02, -1.86306909e-02,  3.54198419e-02, ...,
            1.59074752e-02, -8.08406848e-02,  9.26191303e-02],
          [ 6.11619603e-02, -2.55164251e-02,  2.48636956e-02, ...,
            9.21467324e-03, -1.63478462e-02,  2.82806029e-02],
          ...,
          [-3.31550817e-02,  6.14823652e-02, -7.99846405e-02, ...,
            1.21072250e-02,  6.91858700e-03,  1.36501602e-02],
          [-3.57119564e-02,  7.24766520e-02, -8.70849570e-02, ...,
            1.18748882e-02,  2.68064801e-03,  1.22261092e-02],
          [-3.81538579e-02,  7.88324088e-02, -9.40577127e-02, ...,
            3.94138736e-02, -1.95224061e-02, -1.18914659e-01]],

         [[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
            0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
          [ 6.15555711e-02, -2.36075312e-02,  1.92856007e-02, ...,
           -2.63711302e-01,  1.22835729e-01,  2.75585842e-02],
          [ 6.04578725e-02, -2.51940333e-02,  1.60829014e-02, ...,
           -1.71820640e-01,  1.07845661e-01,  2.16192703e-02],
          ...,
          [-3.58438460e-02,  5.72745098e-02, -8.14492120e-02, ...,
           -1.02361513e-01, -5.60937869e-02,  2.25927650e-02],
          [-3.55754360e-02,  6.12920488e-02, -8.20429364e-02, ...,
           -1.58671436e-01, -8.33516387e-02,  4.64514413e-02],
          [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
            0.00000000e+00,  0.00000000e+00,  0.00000000e+00]],

         ...,

         [[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
            0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
          [ 6.26386283e-02, -2.51539903e-02,  4.06441675e-02, ...,
            1.24556612e-02,  9.82507745e-03,  1.48376078e-02],
          [ 6.10332991e-02, -2.65226588e-02,  3.23235560e-02, ...,
            5.70342641e-02,  9.77598491e-03, -7.11702459e-03],
          ...,
          [-3.48534987e-02,  4.07979825e-02, -1.02628475e-01, ...,
            6.61577028e-02, -4.15615160e-02,  8.79296801e-03],
          [-3.75181948e-02,  4.30152469e-02, -1.02249201e-01, ...,
            1.17856437e-01, -8.80188732e-02,  1.60250778e-02],
          [-3.79952007e-02,  4.69015048e-02, -1.00454973e-01, ...,
            2.30500578e-01, -1.89173790e-01,  2.17667557e-02]],

         [[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
            0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
          [ 6.26386283e-02, -2.51539903e-02,  4.06441675e-02, ...,
            1.24556612e-02,  9.82507745e-03,  1.48376078e-02],
          [ 6.10332991e-02, -2.65226588e-02,  3.23235560e-02, ...,
            5.70342641e-02,  9.77598491e-03, -7.11702459e-03],
          ...,
          [-3.48534987e-02,  4.07979825e-02, -1.02628475e-01, ...,
            6.61577028e-02, -4.15615160e-02,  8.79296801e-03],
          [-3.75181948e-02,  4.30152469e-02, -1.02249201e-01, ...,
            1.17856437e-01, -8.80188732e-02,  1.60250778e-02],
          [-3.79952007e-02,  4.69015048e-02, -1.00454973e-01, ...,
            2.30500578e-01, -1.89173790e-01,  2.17667557e-02]],

         [[ 5.92625164e-02, -3.80092028e-02,  1.93696992e-02, ...,
           -7.64038869e-02, -1.55296361e-01,  1.32080734e-02],
          [ 5.72439285e-02, -3.23813691e-02,  1.48363404e-02, ...,
           -7.49500731e-03, -3.14121577e-02,  6.55554064e-03],
          [ 5.69656645e-02, -3.84836647e-02,  9.37921915e-03, ...,
            6.39603068e-03, -1.44156127e-02,  1.06056943e-02],
          ...,
          [-4.53609827e-02,  5.74638427e-02, -6.49376830e-02, ...,
            4.50234609e-02,  1.17368081e-01,  1.22103313e-01],
          [-4.74569814e-02,  5.74899067e-02, -8.26159133e-02, ...,
            4.63149439e-02,  1.29226536e-01,  1.18899247e-01],
          [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
            0.00000000e+00,  0.00000000e+00,  0.00000000e+00]]])
weights=
array([[[1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.],
        ...,
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.],
        ...,
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.],
        ...,
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.],
        [0., 0., 0., ..., 0., 0., 0.]],

       ...,

       [[0., 0., 0., ..., 0., 0., 0.],
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.],
        ...,
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.],
        ...,
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.]],

       [[1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.],
        ...,
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.],
        [0., 0., 0., ..., 0., 0., 0.]]])


eigvals.shape

(72, 20)


eigvals[0:5,0:5]

sdarray([[0.40332629, 1.09698756, 1.4466639 , 1.73630206, 1.99349677],
         [0.41578566, 1.12906523, 1.4632766 , 1.93509855, 1.91409476],
         [0.41366158, 1.07931586, 1.40302256, 1.77505326, 1.83721084],
         [0.38353231, 1.09438353, 1.43541322, 1.86532372, 1.838075  ],
         [0.42414171, 1.08373853, 1.42553784, 1.79322051, 1.84047078]])


eigvecs.shape

(72, 376, 20)


so_matrix = calcEnsembleSpectralOverlaps(gnms[:, :1])


figure(figsize=(8,8))
showMatrix(so_matrix);


sd_matrix = calcEnsembleSpectralOverlaps(gnms[:, :1], distance=True)
figure(figsize=(8,8)); showMatrix(sd_matrix);


labels = dali_ens.getLabels()
so_tree = calcTree(names=labels, distance_matrix=sd_matrix, method='upgma')


showTree(so_tree);


reordered_so, new_so_indices = reorderMatrix(names=labels, matrix=so_matrix, tree=so_tree)


figure(figsize=(8,8))
showMatrix(reordered_so, ticklabels=new_so_indices);


figure(figsize=(8,8))
showMatrix(reordered_so, ticklabels=new_so_indices, origin='upper');


figure(figsize=(11,8))
showMatrix(reordered_so, ticklabels=new_so_indices, origin='upper', 
           y_array=so_tree);


so_reordered_ens = dali_ens[new_so_indices]
so_reordered_gnms = gnms[new_so_indices, :]


so_reordered_labels = np.array(labels)[new_so_indices]


seqid_matrix = buildSeqidMatrix(so_reordered_ens.getMSA())
seqdist_matrix = 1. - seqid_matrix

@> Sequence identity matrix was calculated in 0.01s.


figure(figsize=(8,8));
showMatrix(seqdist_matrix);


seqdist_tree = calcTree(names=so_reordered_labels, distance_matrix=seqdist_matrix, method='upgma')
showTree(seqdist_tree);


reordered_seqdist_seqdist, new_seqdist_indices = reorderMatrix(names=so_reordered_labels, 
                                                               matrix=seqdist_matrix, tree=seqdist_tree)
figure(figsize=(8,8));
showMatrix(reordered_seqdist_seqdist, ticklabels=new_seqdist_indices);


rmsd_matrix = so_reordered_ens.getRMSDs(pairwise=True)
figure(figsize=(8,8)); showMatrix(rmsd_matrix);


prody.__version__

'2.0'


rmsd_tree = calcTree(names=so_reordered_labels, 
                     distance_matrix=rmsd_matrix, 
                     method='upgma')


figure(figsize=(20,8));
subplot(1, 3, 1);
showTree(seqdist_tree, format='plt');
title('Sequence');
subplot(1, 3, 2);
showTree(rmsd_tree, format='plt');
title('Structure');
subplot(1, 3, 3);
showTree(so_tree, format='plt');
title('Dynamics');


reordered_rmsd_seqdist, new_seqdist_indices = reorderMatrix(names=so_reordered_labels, 
                                                            matrix=rmsd_matrix, tree=seqdist_tree)
reordered_sd_seqdist, new_seqdist_indices = reorderMatrix(names=so_reordered_labels, 
                                                          matrix=sd_matrix, tree=seqdist_tree)


figure(figsize=(20,8));
subplot(1, 3, 1);
showMatrix(reordered_seqdist_seqdist, ticklabels=new_seqdist_indices, origin='upper');
title('Sequence');
subplot(1, 3, 2);
showMatrix(reordered_rmsd_seqdist, ticklabels=new_seqdist_indices, origin='upper');
title('Structure');
subplot(1, 3, 3);
showMatrix(reordered_sd_seqdist, ticklabels=new_seqdist_indices, origin='upper');
title('Dynamics');


pathPDBFolder('')

@> PDB folder '/Users/bentley/Dropbox/Pitt/Bahar/MMBioS/2021/notebooks/pdbs' is released.


pathPDBFolder?


pathPDBFolder()

Evolution of sequence, structure and dynamics with Evol and SignDy¶

1. Sequence evolution with Evol¶

Fetching, parsing and refining MSAs from Pfam¶

Measuring sequence conservation with Shannon entropy¶

Comparisons of sequence evolution and structural dynamics¶

Coevolution Calculation¶

2. Signature Dynamics analysis with SignDy¶

Overview¶

Step 1: Prepare Ensemble (using Dali)¶

Step 2: Mode ensemble¶

Slicing and Indexing Mode Ensembles¶

Step 3: Signature dynamics¶

Step 4: Spectral overlap and distance¶

Comparing with sequence and structural distances¶