from prody import *
from numpy import *
from matplotlib.pyplot import *


prody.__version__

'2.0'


p38 = parsePDB('1p38', compressed=False) # MAP KINASE

@> PDB file is found in working directory (1p38.pdb).
@> 2962 atoms and 1 coordinate set(s) were parsed in 0.12s.
@> Secondary structures were assigned to 188 residues.


p38_sequence = p38['A'].getSequence()
p38_sequence

'ERPTFYRQELNKTIWEVPERYQNLSPVGSGAYGSVCAAFDTKTGHRVAVKKLSRPFQSIIHAKRTYRELRLLKHMKHENVIGLLDVFTPARSLEEFNDVYLVTHLMGADLNNIVKCQKLTDDHVQFLIYQILRGLKYIHSADIIHRDLKPSNLAVNEDCELKILDFGLARHTDDEMTGYVATRWYRAPEIMLNWMHYNQTVDIWSVGCIMAELLTGRTLFPGTDHIDQLKLILRLVGTPGAELLKKISSESARNYIQSLAQMPKMNFANVFIGANPLAVDLLEKMLVLDSDKRITAAQALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLD'


import pickle

blast_record = pickle.load(open('blast_record3.pkl', 'rb'))


blast_record

<prody.proteins.blastpdb.PDBBlastRecord at 0x7fb8fbc71100>


hits = blast_record.getHits(percent_identity=90, percent_overlap=70)


pdbids = list(hits.keys())
len(pdbids)

253


pdbids[:10]

['1lew',
 '1lez',
 '4ka3',
 '4loo',
 '4lop',
 '4loq',
 '5lar',
 '3tg1',
 '1bmk',
 '5uoj']


from os import mkdir
from os.path import isdir

if not isdir('pdbs'):
    mkdir('pdbs')


pathPDBFolder('pdbs')

@> Local PDB folder is set: '/Users/bentley/Dropbox/Pitt/Bahar/MMBioS/2021/notebooks/pdbs'
@> A plain folder structure will be assumed.


pdbs = parsePDB(pdbids)

@> 253 PDBs were parsed in 34.10s.


pathPDBFolder('')

@> PDB folder '/Users/bentley/Dropbox/Pitt/Bahar/MMBioS/2021/notebooks/pdbs' is released.


ref_structure = p38
ref_selection = ref_structure.select('resnum 5 to 31 36 to 114 122 to '
                                     '169 185 to 351 and calpha')


ref_chain = ref_selection['A']
repr(ref_chain)

'<Chain: A from 1p38 (321 residues, 321 atoms)>'


pdbs.insert(0, ref_chain)


ensemble = buildPDBEnsemble(pdbs, mapping='seq', ref=ref_chain, title='p38')

@> Mapping 4geo to the reference... [ 99%] 1s


ensemble

<PDBEnsemble: p38 (278 conformations; 321 atoms)>


pca = PCA('p38 xray')           # Instantiate a PCA instance
pca.buildCovariance(ensemble)   # Build covariance for the ensemble
pca.calcModes()                 # Calculate modes (20 of the by default)


for mode in pca[:5]:    # Print % variance explained by top PCs
    var = calcFractVariance(mode)*100
    print('{0:s}  % variance = {1:.2f}'.format(repr(mode), var))

<Mode: 1 from PCA p38 xray>  % variance = 23.32
<Mode: 2 from PCA p38 xray>  % variance = 15.79
<Mode: 3 from PCA p38 xray>  % variance = 14.17
<Mode: 4 from PCA p38 xray>  % variance = 10.19
<Mode: 5 from PCA p38 xray>  % variance = 6.50


anm = ANM('1p38')             # Instantiate a ANM instance
anm.buildHessian(ref_chain)   # Build Hessian for the reference chain
anm.calcModes()               # Calculate slowest non-trivial 20 modes


for mode in pca[:3]:    # Print PCA mode collectivity
    coll = calcCollectivity(mode)
    print('{0:s}  collectivity = {1:.2f}'.format(repr(mode), coll))

<Mode: 1 from PCA p38 xray>  collectivity = 0.53
<Mode: 2 from PCA p38 xray>  collectivity = 0.18
<Mode: 3 from PCA p38 xray>  collectivity = 0.63


for mode in anm[:3]:    # Print ANM mode collectivity
    coll = calcCollectivity(mode)
    print('{0:s}  collectivity = {1:.2f}'.format(repr(mode), coll))

<Mode: 1 from ANM 1p38>  collectivity = 0.65
<Mode: 2 from ANM 1p38>  collectivity = 0.55
<Mode: 3 from ANM 1p38>  collectivity = 0.68


printOverlapTable(pca[:3], anm[:3]) # Top 3 PCs vs slowest 3 ANM modes

Overlap Table
                        ANM 1p38
                    #1     #2     #3
PCA p38 xray #1   -0.78  -0.07  -0.47
PCA p38 xray #2   -0.26  +0.09  +0.46
PCA p38 xray #3   +0.38  +0.43  -0.37


showOverlapTable(pca[:6], anm[:6]);
title('PCA - ANM Overlap Table');


showOverlap(pca[0], anm);
showCumulOverlap(pca[0], anm, color='r');


showScaledSqFlucts(pca[1], anm[1]);
legend();


showScaledSqFlucts(pca[3], anm[1]);
legend();


showCrossCorr(pca[0]);


showCrossCorr(anm[0]);


writePDB('p38_ref_chain.pdb', ref_chain)
saveEnsemble(ensemble)
saveModel(pca)
saveModel(anm)

'1p38.anm.npz'


writeNMD('p38_pca.nmd',anm,ref_chain)

'p38_pca.nmd'


writeNMD('p38_anm.nmd',pca,ref_chain)

'p38_anm.nmd'

Ensemble Analysis¶

Retrieve dataset¶

Set reference chain¶

Ensemble Preparation¶

Ensemble Dynamics¶

1. Principal Component Analysis (PCA)¶

2. Anisotropic Network Model (ANM) Normal Mode Analysis (NMA)¶

Analysis of PCA and ANM modes¶

Collectivity of modes¶

PCA - ANM overlap¶

Square Fluctuations¶

Cross Correlations¶

Saving your work¶