%%html
<script src="https://bits.csb.pitt.edu/preamble.js"></script>
<style>:root {--jp-cell-prompt-width: 32px}</style>

from pdbfixer.pdbfixer import PDBFixer
fixer = PDBFixer('data/1qg8.cif')
fixer.removeHeterogens(False) # remove water and ions
fixer.findMissingResidues(); fixer.findMissingAtoms()
fixer.addMissingAtoms(); fixer.addMissingHydrogens(7.0)
with open('fixed.pdb', 'w') as outfile:
    PDBFile.writeFile(fixer.topology, fixer.positions, outfile)

v = py3Dmol.view(data=open('data/1qg8.cif').read()); v.addModel(open('fixed.pdb').read())
v.setStyle('cartoon');v.setStyle({'model':1,'resi':'216-231'},{'cartoon':{'colorscheme':'greenCarbon'},'stick':{'colorscheme':'greenCarbon'}})
v.zoomTo({'resi':'216-231'}).show()

ff = ForceField('amber14-all.xml')
system = ff.createSystem(fixer.topology,  nonbondedMethod=PME,constraints=HBonds)
simulation = Simulation(fixer.topology, system, VerletIntegrator(2*femtosecond))
simulation.context.setPositions(fixer.positions)
state = simulation.context.getState(getEnergy=True)
print("Original Energy:",state.getPotentialEnergy())
simulation.minimizeEnergy()
state = simulation.context.getState(getEnergy=True,getPositions=True)
print("Minimized Energy:",state.getPotentialEnergy())

Original Energy: 17062182.334642887 kJ/mol
Minimized Energy: -32475.109364987977 kJ/mol

with open('minimized.pdb', 'w') as outfile:
    PDBFile.writeFile(fixer.topology, state.getPositions(), outfile)

v = py3Dmol.view(data=open('fixed.pdb').read()); v.addModel(open('minimized.pdb').read())
v.setStyle({'model':0,'resi':'216-231'},{'stick':{'colorscheme':'greenCarbon','radius':.15}})
v.setStyle({'model':1,'resi':'216-231'},{'stick':{'colorscheme':'yellowCarbon'}})
v.zoomTo({'resi':'216-231'}).show()

import py3Dmol
import openmm
from openmm.app import *
from openmm.unit import *
from openmm.openmm import *

modeller = Modeller(Topology(),[])
forcefield = ForceField('amber14-all.xml', 'amber14/tip3p.xml')
modeller.addSolvent(forcefield,boxSize=(5,5,5),ionicStrength=.01*molar)
system = forcefield.createSystem(modeller.topology,  nonbondedMethod=PME,constraints=HBonds)
simulation = Simulation(modeller.topology, system, VerletIntegrator(2*femtosecond))
simulation.context.setPositions(modeller.positions)
simulation.minimizeEnergy()

show(modeller.topology, modeller.positions).show()

simulation.reporters.append(StateDataReporter('output.txt', 1, step=True, volume=True,
        potentialEnergy=True, kineticEnergy=True, totalEnergy=True, temperature=True))
simulation.step(10000)

import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv('output.txt')

data['Total Energy (kJ/mole)'].plot(); data['Potential Energy (kJ/mole)'].plot()
plt.legend(); plt.xlabel('Frame'); plt.ylabel('Energy');

data['Temperature (K)'].plot()
plt.legend(); plt.xlabel('Frame'); plt.ylabel('Temperature')

Text(0, 0.5, 'Temperature')

data['Box Volume (nm^3)'].plot()
plt.legend(); plt.xlabel('Frame');

system = forcefield.createSystem(modeller.topology,  nonbondedMethod=PME,constraints=HBonds)
system.addForce(AndersenThermostat(300*kelvin,1/picosecond))
simulation = Simulation(modeller.topology, system, VerletIntegrator(2*femtosecond))
simulation.context.setPositions(modeller.positions)
simulation.minimizeEnergy()
simulation.reporters.append(StateDataReporter('outputa.txt', 1, step=True, volume=True,
        potentialEnergy=True, kineticEnergy=True, totalEnergy=True, temperature=True))
simulation.step(10000)

dataa = pd.read_csv('outputa.txt')

data['Temperature (K)'].plot(label='no Thermostat')
dataa['Temperature (K)'].plot(label='w/Thermostat')
plt.legend(); plt.xlabel('Frame');

system = forcefield.createSystem(modeller.topology,  nonbondedMethod=PME,constraints=HBonds)
simulation = Simulation(modeller.topology, system, LangevinIntegrator(300*kelvin,1/picosecond,2*femtosecond))
simulation.context.setPositions(modeller.positions)
simulation.minimizeEnergy()
simulation.reporters.append(StateDataReporter('outputl.txt', 1, step=True, volume=True,
        potentialEnergy=True, kineticEnergy=True, totalEnergy=True, temperature=True))
simulation.step(10000)

datal = pd.read_csv('outputl.txt')

data['Temperature (K)'].plot(label='no Thermostat')
dataa['Temperature (K)'].plot(label='w/Thermostat')
datal['Temperature (K)'].plot(label='Langevin')

plt.legend(); plt.ylabel('Temperature (K)'); plt.xlabel('Frame');

%%html
<div id="energyplots" style="width: 500px"></div>
<script>
$('head').append('<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />');

    var divid = '#energyplots';
	jQuery(divid).asker({
	    id: divid,
	    question: "What will the plots of the energy look like?",
		answers: ['KE↑,PE↑','KE↑,PE↓','KE↓,PE↑','KE↓,PE↓','Const'],
        server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",
		charter: chartmaker})
    
$(".jp-InputArea .o:contains(html)").closest('.jp-InputArea').hide();


</script>

data['Total Energy (kJ/mole)'].plot(label='no Thermostat')
dataa['Total Energy (kJ/mole)'].plot(label='w/Thermostat')
datal['Total Energy (kJ/mole)'].plot(label='Langevin')

plt.legend(); plt.ylabel('Total Energy (kJ/mol)'); plt.xlabel('Frame');

data['Potential Energy (kJ/mole)'].plot(label='no Thermostat')
dataa['Potential Energy (kJ/mole)'].plot(label='w/Thermostat')
datal['Potential Energy (kJ/mole)'].plot(label='Langevin')

plt.legend();  plt.ylabel('Potential Energy (kJ/mol)'); plt.xlabel('Frame');

data['Kinetic Energy (kJ/mole)'].plot(label='no Thermostat')
dataa['Kinetic Energy (kJ/mole)'].plot(label='w/Thermostat')
datal['Kinetic Energy (kJ/mole)'].plot(label='Langevin')

plt.legend();  plt.ylabel('Kinetic Energy (kJ/mol)'); plt.xlabel('Frame');

system = forcefield.createSystem(modeller.topology,  nonbondedMethod=PME,constraints=HBonds)
simulation = Simulation(modeller.topology, system, LangevinIntegrator(300*kelvin,1/picosecond,2*femtosecond))
simulation.context.setPositions(modeller.positions)
simulation.minimizeEnergy()
simulation.context.setVelocitiesToTemperature(300*kelvin) # Start at desired temperature
simulation.reporters.append(StateDataReporter('outputv.txt', 1, step=True, volume=True,
        potentialEnergy=True, kineticEnergy=True, totalEnergy=True, temperature=True))
simulation.step(10000)
datav = pd.read_csv('outputv.txt')

datal['Temperature (K)'].plot(label='Langevin')
datav['Temperature (K)'].plot(label='Langevin + Velocity Initialization')
plt.legend(); plt.ylabel('Temperature (K)'); plt.xlabel('Frame');

datal['Potential Energy (kJ/mole)'].plot(label='Langevin')
datav['Potential Energy (kJ/mole)'].plot(label='Langevin + Velocity Initialization')
plt.legend(); plt.ylabel('Potential Energy (kJ/mole)'); plt.xlabel('Frame');

import openmm
from openmm.app import *
from openmm import *
from openmm.unit import *
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import scipy, os
import argparse
import scipy.signal

def dihedral(p1, p2, p3, p4):
    '''Return dihedral angle in radians between provided points.
    This is the same calculation used by OpenMM for force calculations. '''
    v12 = np.array(p1-p2)
    v32 = np.array(p3-p2)
    v34 = np.array(p3-p4)
    
    #compute cross products 
    cp0 = np.cross(v12,v32)
    cp1 = np.cross(v32,v34)
    
    #get angle between cross products
    dot = np.dot(cp0,cp1)
    if dot != 0:
        norm1 = np.dot(cp0,cp0)
        norm2 = np.dot(cp1,cp1)
        dot /= np.sqrt(norm1*norm2)
        
    if dot > 1.0:
        dot = 1.0
    elif dot < -1.0:
        dot = -1.0

    if dot > 0.99 or dot < -0.99:
        #close to acos singularity, so use asin isntead
        cross = np.cross(cp0,cp1)
        scale = np.dot(cp0,cp0)*np.dot(cp1,cp1)
        angle = np.arcsin(np.sqrt(np.dot(cross,cross)/scale))
        if dot < 0.0:
            angle = np.pi - angle
    else:
        angle = np.arccos(dot)
    
    #figure out sign
    sdot = np.dot(v12,cp1)
    angle *= np.sign(sdot)
    return angle
    
    
def make_rotation_matrix(p1, p2, angle):
    '''Make a rotation matrix for rotating angle radians about the p2-p1 axis'''
    # https://en.wikipedia.org/wiki/Rotation_matrix#Rotation_matrix_from_axis_and_angle
    vec = np.array((p2-p1))
    x,y,z = vec/np.linalg.norm(vec)
    cos = np.cos(angle)
    sin = np.sin(angle)
    R = np.array([[cos+x*x*(1-cos), x*y*(1-cos)-z*sin, x*z*(1-cos)+y*sin],
                  [y*x*(1-cos)+z*sin, cos+y*y*(1-cos), y*z*(1-cos)-x*sin],
                  [z*x*(1-cos)-y*sin, z*y*(1-cos)+x*sin, cos+z*z*(1-cos)]])
    return R
    
    
def moving_atoms(pdb,a=1,b=2):
    '''Identify the atoms on the b side of a dihedral with the central
    atoms a and b.  This is not the most efficient algorithm, but 
    for small molecules it really does not matter.
    A boolean mask of these atoms is returned.'''
    moving = np.zeros(pdb.topology.getNumAtoms())
    moving[b] = 1
    moving[a] = -1
    changed = True
    while changed:
        changed = False
        for b in pdb.topology.bonds():
            if (moving[b.atom1.index] + moving[b.atom2.index]) == 1:
                moving[b.atom1.index] = moving[b.atom2.index] = 1
                changed = True
    moving[1] = 0    
    return moving.astype(bool)

# read file
pdb = PDBFile('data/gly.pdb')

# setup openmm system using amber 14 forcefield which defines the potential energy function
ff = ForceField('amber14-all.xml')
system = ff.createSystem(pdb.topology,ignoreExternalBonds=True)
# we aren't actually simulating, but need a simulation object to calculate energies
integrator = VerletIntegrator(1*femtosecond)
simulation = Simulation(pdb.topology, system, integrator)

# store atom positions
# note that more code is necessary for this code to be a general solution
# as opposed to only working with our carefully prepared inputs
origpos = np.array(pdb.getPositions()._value)
newpos = origpos.copy()

aindex, bindex = 1,2
capos = origpos[aindex]
cpos = origpos[bindex]

# a boolean mask of the atoms that should be rotated around the dihedral
mask = moving_atoms(pdb, aindex, bindex)

def get_energy(d):
    '''Return energy for amino acid at given dihedral value d (in degrees)'''
    R = make_rotation_matrix(capos,cpos,np.deg2rad(d))    
    newpos[mask] = np.matmul(R,origpos[mask].T).T     
    # setPositions to newpos in simulation 
    simulation.context.setPositions(newpos)
    # get simulation.context state, fetching energy
    state = simulation.context.getState(getEnergy=True)
    # record the energy
    return state.getPotentialEnergy()
    
energies = []
angles = []
for d in range(0,360,1):
    # record the energy
    energies.append(get_energy(d))
    # record the dihedral
    d = dihedral(*newpos[:4])
    if d < 0: d += 2*np.pi
    angles.append(np.rad2deg(d))

E = [e._value for e in energies] # unitless for numpy manipulation
T = 300*kelvin
weights = np.array([np.exp(-e/(MOLAR_GAS_CONSTANT_R*T)) for e in energies])
Z = weights.sum()
probs = weights/Z

plt.figure(figsize=(8,4),dpi=300); plt.plot(angles,probs); plt.xlabel("Dihedral (Degrees)"); plt.ylabel("Probability (at 300K)"); plt.xlim(0,360); plt.title('GLY');

def run_mcmc(scale=360,steps=100000,T=300*kelvin,initd=0):    
    '''Return unnormalized distribution of angles from MCMC'''
    anglecnts = np.zeros(360)
    d = initd
    lastE = get_energy(d)
    lastP = np.exp(-lastE/(MOLAR_GAS_CONSTANT_R*T))
    # d is last evaluated dihedral
    for t in range(steps):    
        #propose a move
        dprime = (d + (2*np.random.random()-1)*scale)%360

        R = make_rotation_matrix(capos,cpos,np.deg2rad(dprime))    
        newpos[mask] = np.matmul(R,origpos[mask].T).T

        # setPositions to newpos in simulation 
        simulation.context.setPositions(newpos)
        state = simulation.context.getState(getEnergy=True)
        Eprime = state.getPotentialEnergy()
        Pprime = np.exp(-Eprime/(MOLAR_GAS_CONSTANT_R*T))
        # Metropolis
        if Pprime >= lastP or (np.random.random() < Pprime/lastP): # make move
            d = dprime
            anglecnts[int(np.trunc(d))] += 1
            lastP = Pprime
            lastE = Eprime
        else:
            anglecnts[int(np.trunc(d))] += 1
    return anglecnts,d

cnts, d = run_mcmc(steps=100000)
P = cnts/cnts.sum()

plt.figure(figsize=(8,4),dpi=300); plt.plot(angles,probs); plt.xlabel("Dihedral (Degrees)"); plt.ylabel("Probability (at 300K)"); plt.xlim(0,360)
plt.bar(angles,P,width=1.0,color='gold');

dprime = (d + np.random.random()*scale)%360

%%html
<div id="mcmcscale" style="width: 500px"></div>
<script>
$('head').append('<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />');

    var divid = '#mcmcscale';
	jQuery(divid).asker({
	    id: divid,
	    question: "A smaller scale will make the convergence to the target probability distribution...",
		answers: ['Slower','Faster','Inaccurate','Nonergodic'],
        server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",
		charter: chartmaker})
    
$(".jp-InputArea .o:contains(html)").closest('.jp-InputArea').hide();


</script>

cnts = np.zeros(360)
fig = plt.figure(figsize=(8,4)); plt.xlabel("Dihedral (Degrees)"); plt.ylabel("Probability (at 300K)"); plt.xlim(0,360); plt.plot(angles,probs); 
ax = plt.gca()
N = 20
lastd = 0
def make_fig(i):
    global cnts, lastd #the horror!
    C, lastd = run_mcmc(steps=500,initd=lastd)
    cnts += C
    P = cnts/cnts.sum()
    ax.bar(angles,P,width=1.0,color=cm.hot(i/N),alpha=.8);

anim = FuncAnimation(fig,make_fig,frames=range(N),interval=500)
video = anim.to_html5_video()
plt.close()

html = display.HTML(video)
display.display(html)

cnts = np.zeros(360)
fig = plt.figure(figsize=(8,4)); plt.xlabel("Dihedral (Degrees)"); plt.ylabel("Probability (at 300K)"); plt.xlim(0,360); plt.plot(angles,probs); 
ax = plt.gca()
N = 20
lastd = 0
def make_fig(i):
    global cnts, lastd #the horror!
    C, lastd = run_mcmc(scale=5,steps=500,initd=lastd)
    cnts += C
    P = cnts/cnts.sum()
    ax.bar(angles,P,width=1.0,color=cm.hot(i/N),alpha=.8);

anim = FuncAnimation(fig,make_fig,frames=range(N),interval=500)
video = anim.to_html5_video()
plt.close()

html = display.HTML(video)
display.display(html)

system = forcefield.createSystem(modeller.topology,  nonbondedMethod=PME,constraints=HBonds)
barostat = MonteCarloBarostat(1*atmospheres, 300*kelvin, 25)
system.addForce(barostat)

simulation = Simulation(modeller.topology, system, LangevinIntegrator(300*kelvin,1/picosecond,2*femtosecond))
simulation.context.setPositions(modeller.positions)
simulation.minimizeEnergy()
simulation.reporters.append(StateDataReporter('outputp.txt', 1, step=True, volume=True,
        potentialEnergy=True, kineticEnergy=True, totalEnergy=True, temperature=True))


simulation.step(25000)

datap = pd.read_csv('outputp.txt')
datap['Box Volume (nm^3)'].plot()
plt.legend(); plt.xlabel('Frame');

import MDAnalysis

universe = MDAnalysis.Universe('shmtex.prmtop', 'shmtex.dcd')

universe.atoms

<AtomGroup with 85695 atoms>

 universe.select_atoms("protein")

<AtomGroup with 14432 atoms>

 universe.select_atoms("resname PRO")

<AtomGroup with 644 atoms>

 universe.select_atoms("byres around 5 resid 370")

<AtomGroup with 245 atoms>

prot = universe.select_atoms("protein")
prot.select_atoms("byres around 5 resid 370") #select whole residues within 5 of residue 100

<AtomGroup with 209 atoms>

prot.write('frame.pdb')
py3Dmol.view(data=open('frame.pdb').read(),style='cartoon:color~spectrum').show()

universe.trajectory

<DCDReader shmtex.dcd with 100 frames of 85695 atoms>

for ts in universe.trajectory[:5]:
    print(ts.frame, universe.trajectory.frame, ts.time, prot.center_of_mass())

0 0 4.8888212322065376e-05 [63.06938232 61.67971211 47.8073606 ]
1 1 9.777642464413075e-05 [62.23701511 62.61718365 47.92329793]
2 2 0.00014666463696619611 [62.30295647 62.41173235 47.51776376]
3 3 0.0001955528492882615 [62.32219661 61.82873976 46.42322351]
4 4 0.0002444410616103269 [61.41691018 62.50910413 45.66813246]

import MDAnalysis.analysis

from MDAnalysis.analysis.rms import *  #this pulls in an rmsd function

universe.trajectory[0] #sets the current frame to the start
refcoord = prot.positions # once stored, _coordinates_ do NOT change with trajectory
refcoord

array([[67.44726 , 79.259   , 22.918747],
       [68.181496, 78.82586 , 22.377106],
       [67.914246, 80.0178  , 23.394417],
       ...,
       [54.802658, 77.382744, 36.064526],
       [55.336662, 76.558395, 36.86686 ],
       [55.43933 , 77.794464, 35.015507]], dtype=float32)

n = universe.trajectory.n_frames
universe.trajectory[-1] #last frame
print(rmsd(refcoord,prot.positions))

38.09799719167415

protrmsd = []
carmsd = []
protref = prot.positions
caref = prot.select_atoms('name CA').positions
for ts in universe.trajectory:
    protrmsd.append(rmsd(protref,prot.positions))
    carmsd.append(rmsd(caref,prot.select_atoms('name CA').positions))

plt.plot(range(n),protrmsd,range(n),carmsd)
plt.xlabel("Frame #"); plt.ylabel('RMSD'); plt.legend(['Protein','CA'],loc='lower right');

from MDAnalysis.analysis.align import *

universe.trajectory[0]
#if we align to ourselves, will fit to current frame
alignment = AlignTraj(universe, universe, select='protein',filename='rmsfit.dcd')
alignment.run()

<MDAnalysis.analysis.align.AlignTraj at 0x72f53d9aadb0>

universe = MDAnalysis.Universe('shmtex.prmtop', 'rmsfit.dcd')
prot = universe.select_atoms('protein')

universe.trajectory[0]
protref = prot.positions
caref = prot.select_atoms('name CA').positions

protrmsd = []
carmsd = []
for ts in universe.trajectory:
    protrmsd.append(rmsd(protref,prot.positions))
    carmsd.append(rmsd(caref,prot.select_atoms('name CA').positions))

n = universe.trajectory.n_frames
plt.plot(range(n),protrmsd,range(n),carmsd)
plt.xlabel("Frame #"); plt.ylabel('RMSD'); plt.legend(['Protein','CA'],loc='lower right');

%%html
<div id="mdtri" style="width: 500px"></div>
<script>
$('head').append('<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />');

    var divid = '#mdtri';
	jQuery(divid).asker({
	    id: divid,
	    question: "If frame 40 is ~2 RMSD from the start and frame 80 is ~2 RMSD from the start. What can be said about the RMSD between frames 40 and 80?",
		answers: ['It is ~0', 'It is < ~2','It is < ~4','Nothing'],
        server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",
		charter: chartmaker})
    
$(".jp-InputArea .o:contains(html)").closest('.jp-InputArea').hide();


</script>

startref = prot.positions
universe.trajectory[-1]
endref = prot.positions  

startrmsd = []
endrmsd = []
for ts in universe.trajectory:
    startrmsd.append(rmsd(startref,prot.positions))
    endrmsd.append(rmsd(endref,prot.positions))

n = universe.trajectory.n_frames
plt.plot(range(n),startrmsd,range(n),endrmsd)
plt.xlabel("Frame #"); plt.ylabel('RMSD'); plt.legend(['Start RMSD','End RMSD'],loc='lower right');

rmsf = RMSF(prot.select_atoms('name CA')).run()

plt.plot(rmsf.results.rmsf); plt.xlabel('Residue'); plt.ylabel(r'RMSF ($\AA$)');

universe.add_TopologyAttr('tempfactors')
prot.select_atoms('name CA').tempfactors = rmsf.results.rmsf; prot.write('rmsf.pdb'); v = py3Dmol.view(data=open('rmsf.pdb').read())
v.setStyle({'cartoon':{'colorscheme':{'prop':'b','gradient':'linear','min':0,'max':5,'colors':['white','yellow','orange','red']}}}); v.show();

Practical MD continued + MCMC¶

9/23/2025¶

Getting atoms to wiggle and jiggle correctly¶

Recall¶

System Preparation Summary¶

Energy Minimization¶

Recall our fixed up protein...¶

Ensembles¶

NVE: Microcanonical¶

NVT: Canonical¶

NPT: Isothermal-isobaric¶

Equilibration¶

Thermostats¶

Andersen Thermostat¶

Reminder: Langevin¶

Initializing Velocities¶

Barostats¶

Metropolis-Hastings¶

Metropolis-Hastings (continued)¶

Why does this work?¶

Detailed Balance¶

Balance¶

MCMC: Markov chain Monte Carlo¶

Assign2/3 Revisited¶

Implementing MCMC¶

Effect of move set choice?¶

Metropolis algorithm step $t$ for `MonteCarloBarostat`¶

Anisotropic Barostats¶

MCMC Aside¶

Analyzing MD Trajectories¶

Atom Groups¶

Trajectories¶

Analysis¶

MDAnalysis.rms¶

Alignment¶

RMSF¶

Mapping to Structure¶

Comparative MD¶

Practical MD continued + MCMC¶

9/23/2025¶

Getting atoms to wiggle and jiggle correctly¶

Recall¶

System Preparation Summary¶

Energy Minimization¶

Recall our fixed up protein...¶

Ensembles¶

NVE: Microcanonical¶

NVT: Canonical¶

NPT: Isothermal-isobaric¶

Equilibration¶

Thermostats¶

Andersen Thermostat¶

Reminder: Langevin¶

Initializing Velocities¶

Barostats¶

Metropolis-Hastings¶

Metropolis-Hastings (continued)¶

Why does this work?¶

Detailed Balance¶

Balance¶

MCMC: Markov chain Monte Carlo¶

Assign2/3 Revisited¶

Implementing MCMC¶

Effect of move set choice?¶

Metropolis algorithm step $t$ for MonteCarloBarostat¶

Anisotropic Barostats¶

MCMC Aside¶

Analyzing MD Trajectories¶

Atom Groups¶

Trajectories¶

Analysis¶

MDAnalysis.rms¶

Alignment¶

RMSF¶

Mapping to Structure¶

Comparative MD¶

Metropolis algorithm step $t$ for `MonteCarloBarostat`¶