diff --git a/ConfPrune.pyx b/ConfPrune.pyx
new file mode 100644
index 0000000..76f60fe
--- /dev/null
+++ b/ConfPrune.pyx
@@ -0,0 +1,435 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Jan 30 12:33:18 2015
+
+@author: ke291
+
+Cython file for conformer alignment and RMSD pruning. Called by Gaussian.py
+and NWChem.py
+"""
+
+from math import sqrt
+from libc.stdlib cimport malloc, free
+"""
+Main function of this file. Takes an array of [x, y, z] for each molecule,
+as well as a list of atom symbols in the same order as their coordinates
+
+Translates both molecules to origin, gets the rotation matrix from quatfit
+and rotates the second molecule to align with the first
+"""
+def RMSDPrune(conformers, atoms, cutoff):
+    
+    ToDel = []
+    cdef long int c1, c2, c
+    cdef long int l = len(conformers)
+    cdef double res
+    cdef double *RMSDMatrix = <double *>malloc(l * l * sizeof(double))
+    
+    for c1 in range(0, l):
+        for c2 in range(c1, l):
+            if c1==c2:
+                RMSDMatrix[c2 + c1*l]=0.0
+            else:
+                res = AlignedRMS(conformers[c1], conformers[c2], atoms)
+                RMSDMatrix[c2 + c1*l] = res
+                RMSDMatrix[c1 + c2*l] = res
+    #Check for similar conformations
+    for c1 in range(0, l):
+        for c2 in range(0, l):
+            if c1!=c2 and (not c1 in ToDel) and (not c2 in ToDel):
+                #if Align.AlignedRMS(conformers[c1], conformers[c2], atoms) < cutoff:
+                #    ToDel.append(c2)
+                if RMSDMatrix[c2 + c1*l]<cutoff:
+                    ToDel.append(c2)
+    
+    #Compose set of non-redundant conformations
+    PrunedConformers = []
+    for c in range(0, l):
+        if not c in ToDel:
+            PrunedConformers.append(conformers[c])
+            
+    #return PrunedConformers
+    free(RMSDMatrix)
+    return PrunedConformers
+
+def AdaptRMSDPrune(conformers, atoms, cutoff, ConfLimit):
+    
+    ToDel = []
+    cdef long int c1, c2
+    cdef long int l = len(conformers)
+    cdef double res
+    cdef double *RMSDMatrix = <double *>malloc(l * l * sizeof(double))
+    
+    for c1 in range(0, l):
+        for c2 in range(c1, l):
+            if c1==c2:
+                RMSDMatrix[c2 + c1*l]=0.0
+            else:
+                res = AlignedRMS(conformers[c1], conformers[c2], atoms)
+                RMSDMatrix[c2 + c1*l] = res
+                RMSDMatrix[c1 + c2*l] = res
+    #Check for similar conformations
+    for c1 in range(0, l):
+        for c2 in range(0, l):
+            if c1!=c2 and (not c1 in ToDel) and (not c2 in ToDel):
+                if RMSDMatrix[c2 + c1*l]<cutoff:
+                    ToDel.append(c2)
+    AdjCutoff = cutoff
+    while (l-len(ToDel))>ConfLimit:
+        AdjCutoff +=0.2
+        ToDel = []
+        for c1 in range(0, l):
+            for c2 in range(0, l):
+                if c1!=c2 and (not c1 in ToDel) and (not c2 in ToDel):
+                    if RMSDMatrix[c2 + c1*l]<AdjCutoff:
+                        ToDel.append(c2)
+              
+    #return PrunedConformers
+    free(RMSDMatrix)
+    return AdjCutoff
+
+def AlignMolecules(mol1, mol2, atoms):
+    
+    w= []
+    #prepare weights
+    for a in atoms:
+        atomnum = GetAtomNum(a)
+        #w.append(GetAtomWeight(atomnum))
+        w.append(1)
+    
+    #move both molecules to origin
+    mol1 = Move2Origin(mol1, w)
+    mol2 = Move2Origin(mol2, w)
+    
+    #QuatrFit to get the rotation matrix
+    u = qtrfit(mol1, mol2, w)
+    
+    #RotateMolecule
+    mol1 = RotMol(mol1, u)
+    
+    return mol1, mol2, w
+
+#Converts to pure geometry data, aligns molecules and
+#returns the RMSD of the aligned molecules
+def AlignedRMS(mol1, mol2, atoms):
+    
+    #Convert to pure geometry data from [atomsym,text x,text y,text z]
+    mol1b = []
+    mol2b = []
+    for a in mol1:
+        mol1b.append([float(x) for x in a[1:]])
+        
+    for a in mol2:
+        mol2b.append([float(x) for x in a[1:]])
+    
+    (amol1, amol2, w) = AlignMolecules(mol1b, mol2b, atoms)
+    
+    return RMSMol(amol1, amol2, w)
+
+#calculates the RMS between the two molecules
+def RMSMol(mol1, mol2, w):
+    e = [0.0, 0.0, 0.0]
+    
+    for i in range(0, len(mol1)):
+        e[0] += w[i] * (mol1[i][0] - mol2[i][0])**2
+        e[1] += w[i] * (mol1[i][1] - mol2[i][1])**2
+        e[2] += w[i] * (mol1[i][2] - mol2[i][2])**2
+    
+    return sqrt(sum(e)/sum(w))
+
+def NMR_RMS(s1, s2):
+    
+    e = 0
+    
+    for i in range(0, len(s1)):
+        e += (s1[i] - s2[i])**2
+
+    return sqrt(e/len(s1))
+#Rotates molecule, given an array of atoms and a rotation matrix
+#molecule is an array of form a[atom][coordinate]
+
+def RotMol(mol, u):
+    
+    t = [0.0, 0.0, 0.0]
+    
+    for i in range(0, len(mol)):
+        t[0] = u[0][0] * mol[i][0] + u[0][1] * mol[i][1] + u[0][2] * mol[i][2]
+        t[1] = u[1][0] * mol[i][0] + u[1][1] * mol[i][1] + u[1][2] * mol[i][2]
+        t[2] = u[2][0] * mol[i][0] + u[2][1] * mol[i][1] + u[2][2] * mol[i][2]
+        mol[i][0] = t[0]
+        mol[i][1] = t[1]
+        mol[i][2] = t[2]
+    
+    return mol
+
+#Translates the molecule so that it's centroid is at the origin
+#weights can be atomic weights or numbers?
+def Move2Origin(mol, weights):
+    
+    xsum = 0.0
+    ysum = 0.0
+    zsum = 0.0
+    wsum = 0.0
+    
+    for i in range(0, len(mol)):
+        xsum += mol[i][0] * sqrt(weights[i])
+        ysum += mol[i][1] * sqrt(weights[i])
+        zsum += mol[i][2] * sqrt(weights[i])
+        wsum += sqrt(weights[i])
+    
+    xsum /= wsum
+    ysum /= wsum
+    zsum /= wsum
+    
+    for i in range(0, len(mol)):
+        mol[i][0] -= xsum
+        mol[i][1] -= ysum
+        mol[i][2] -= zsum
+    
+    return mol
+
+def GetAtomNum(symbol):
+    
+    Lookup = ['H', 'He', 'Li', 'Be', 'B', 'C','N','O','F','Ne','Na','Mg','Al',\
+            'Si','P','S','Cl','Ar','K','Ca','Sc','Ti','V','Cr','Mn','Fe','Co',\
+            'Ni','Cu','Zn','Ga','Ge','As','Se','Br','Kr','Rb','Sr','Y','Zr',\
+            'Nb','Mo','Tc','Ru','Rh','Pd','Ag','Cd','In','Sn','Sb','Te','I',\
+            'Xe','Cs','Ba','La','Ce','Pr','Nd','Pm','Sm','Eu','Gd','Tb','Dy',\
+            'Ho','Er','Tm','Yb','Lu','Hf','Ta','W','Re','Os','Ir','Pt','Au',\
+            'Hg','Tl','Pb','Bi','Po','At','Rn']
+    
+    if symbol in Lookup:
+        return Lookup.index(symbol) + 1
+    else:
+        return 0
+
+def GetAtomWeight(AtomNum):
+    
+    Lookup = [1, 4, 7, 9, 11, 12, 14, 16, 19, 20, 23, 24, 27, 28, 31, 32, 35,\
+            40, 39, 40, 45,  48, 51, 52, 55, 56, 59, 59, 64, 65, 70, 73, 75,\
+            79, 80, 84, 85, 88, 89, 91, 93, 96, 98, 101, 103, 106, 108, 112,\
+            115, 119, 122, 128, 127, 131, 133, 137]
+    return Lookup[AtomNum-1]
+
+"""
+ * This routine is a python translation of the QTRFIT Fortran routine
+ * of David Hesiterberg
+ *
+ * David J. Heisterberg
+ * The Ohio Supercomputer Center, 1224 Kinnear Rd,
+ * Columbus, OH  43212-1163
+ * (614)292-6036; djh@osc.edu
+ *
+ * Python translation by K Ermanis 2015
+ *
+ * QTRFIT
+ * Find the quaternion, q,[and left rotation matrix, u] 
+ * that minimizes
+ *   |qTBq - A| ^ 2   [|uB - A| ^ 2]
+ * This is equivalent to maximizing Re (qTBTqA).
+ * This is equivalent to finding the largest 
+ * eigenvalue and corresponding
+ * eigenvector of the matrix
+ *  [V2   VUx  VUy  VUz ]
+ *  [VUx  Ux2  UxUy UzUx]
+ *  [VUy  UxUy Uy2  UyUz]
+ *  [VUz  UzUx UyUz Uz2 ]
+ * where
+ *   V2   = Bx Ax + By Ay + Bz Az
+ *   Ux2  = Bx Ax - By Ay - Bz Az
+ *   Uy2  = By Ay - Bz Az - Bx Ax
+ *   Uz2  = Bz Az - Bx Ax - By Ay
+ *   VUx  = Bz Ay - By Az
+ *   VUy  = Bx Az - Bz Ax
+ *   VUz  = By Ax - Bx Ay
+ *   UxUy = Bx Ay + By Ax
+ *   UyUz = By Az + Bz Ay
+ *   UzUx = Bz Ax + Bx Az
+ * The left rotation matrix, u, is obtained from q by
+ *   u = qT1q
+ * INPUT
+ *   n      - number of points
+ *   b      - molecule to be rotated
+ *   a      - reference molecule
+ *   w      - weight vector
+ * OUTPUT
+ *   z      - eigenvalue
+ *   q      - the best-fit quaternion
+ *   u      - the best-fit left rotation matrix
+ *   nr     - number of jacobi sweeps required
+ 
+ KE:
+ structure of molecule data:
+ a[atomnr][coordinate]
+ """
+cdef double v[4][4]
+cdef double d[4]
+    
+def qtrfit(mol, refmol, w):
+    
+    cdef int n = len(mol)
+    cdef double xxyx = 0.0, xxyy = 0.0, xxyz = 0.0, xyyx = 0.0, xyyy = 0.0
+    cdef double xyyz = 0.0, xzyx = 0.0, xzyy = 0.0, xzyz = 0.0
+    cdef double c[4][4]
+    cdef double q[4]
+    cdef int i, j
+    #generate the upper triangle of the quadratic form matrix:
+    for i in range(0,n):
+        xxyx += mol[i][0] * refmol[i][0] * w[i]
+        xxyy += mol[i][0] * refmol[i][1] * w[i]
+        xxyz += mol[i][0] * refmol[i][2] * w[i]
+        xyyx += mol[i][1] * refmol[i][0] * w[i]
+        xyyy += mol[i][1] * refmol[i][1] * w[i]
+        xyyz += mol[i][1] * refmol[i][2] * w[i]
+        xzyx += mol[i][2] * refmol[i][0] * w[i]
+        xzyy += mol[i][2] * refmol[i][1] * w[i]
+        xzyz += mol[i][2] * refmol[i][2] * w[i]
+    
+    c[0][0] = xxyx + xyyy + xzyz
+    c[0][1] = xzyy - xyyz
+    c[1][1] = xxyx - xyyy - xzyz
+    c[0][2] = xxyz - xzyx
+    c[1][2] = xxyy + xyyx
+    c[2][2] = xyyy - xzyz - xxyx
+    c[0][3] = xyyx - xxyy
+    c[1][3] = xzyx + xxyz
+    c[2][3] = xyyz + xzyy
+    c[3][3] = xzyz - xxyx - xyyy
+    
+    #Diagonalize c
+    cdef double **ret
+    ret = qtrjac(c, 4, 4)
+        
+    #extract the desired quaternion
+    cdef double z = d[3]
+    q[0] = v[0][3]
+    q[1] = v[1][3]
+    q[2] = v[2][3]
+    q[3] = v[3][3]
+    
+    #generate the rotation matrix
+    u = q2mat(q)
+    
+    return u
+
+cdef double** qtrjac(double a[4][4], int n, int np):
+    
+    """v = [[0.0, 0.0, 0.0, 0.0],[0.0, 0.0, 0.0, 0.0],\
+        [0.0, 0.0, 0.0, 0.0],[0.0, 0.0, 0.0, 0.0]]
+    d = [0.0, 0.0, 0.0, 0.0]
+    onorm = 0.0
+    dnorm = 0.0
+    b = 0.0
+    dma = 0.0
+    q = 0.0
+    t= 0.0
+    c = 0.0
+    s = 0.0
+    atemp = 0.0
+    vtemp = 0.0
+    dtemp = 0.0
+    nrot = 128"""
+    cdef double onorm, dnorm, b, dma, q, t, c, s, atemp, vtemp, dtemp
+    cdef int nrot = 128
+    cdef int j, i, l, k
+    for j in range(0,n):
+        for i in range(0,n):
+            v[i][j]=0.0
+        v[j][j] = 1.0
+        d[j] = a[j][j]
+
+    for l in range(0, nrot):
+        dnorm = 0.0
+        onorm = 0.0
+        for j in range(0, n):
+            dnorm += abs(d[j])
+            for i in range(j-1):
+                onorm += abs(a[i][j])
+        if (onorm + dnorm) <= dnorm:
+            break
+        
+        for j in range(1, n):
+            
+            for i in range(0,j-1):
+                
+                b = a[i][j]
+                if (abs(b)>0.0):
+                    dma = d[j] - d[i]
+                    if (abs(dma)+abs(b) < abs(dma)):
+                        t = b/dma
+                    else:
+                        q = 0.5 * dma / b
+                        t = fsign(1.0 / (abs(q) + sqrt(1.0 + q*q)), q)
+                    
+                    c = 1.0 / sqrt(t*t + 1.0)
+                    s = t * c
+                    a[i][j] = 0.0
+                    
+                    for k in range(0, i-1):
+                        atemp = c * a[k][i] - s * a[k][j]
+                        a[k][j] = s * a[k][i] + c * a[k][j]
+                        a[k][i] = atemp
+                        
+                    for k in range(i+1, j):
+                        atemp = c * a[i][k] - s * a[k][j]
+                        a[k][j] = s * a[i][k] + c * a[k][j]
+                        a[i][k] = atemp
+                        
+                    for k in range(j, n):
+                        atemp = c * a[i][k] - s * a[j][k]
+                        a[j][k] = s * a[i][k] + c * a[j][k]
+                        a[i][k] = atemp
+                    
+                    for k in range(0, n):
+                        vtemp = c * v[k][i] - s * v[k][j]
+                        v[k][j] = s * v[k][i] + c * v[k][j]
+                        v[k][i] = vtemp
+                    
+                    dtemp = c * c * d[i] + s * s * d[j] - 2.0 + c * s * b
+                    d[j] = s * s * d[i] + c * c * d[j] + 2.0 * c * s * b
+                    d[i] = dtemp
+    nrot = l
+    
+    for j in range(0, n-1):
+        k = j
+        dtemp = d[k]
+        for i in range(j, n):
+            if d[i]<dtemp:
+                k = i
+                dtemp = d[k]
+        if k>j:
+            d[k] = d[j]
+            d[j] = dtemp
+            for i in range(0, n):
+                dtemp = v[i][k]
+                v[i][k] = v[i][j]
+                v[i][j] = dtemp
+    
+    cdef double ret[5][4]
+    for i in range(0,4):
+        for j in range(0,4):
+            ret[i][j]=v[i][j]
+    for i in range(0,4):
+        ret[4][i] = d[i]
+    
+
+cdef q2mat(double q[4]):
+    
+    u = [[0.0, 0.0, 0.0],[0.0, 0.0, 0.0],[0.0, 0.0, 0.0]]
+    u[0][0] = q[0]*q[0] + q[1]*q[1] - q[2]*q[2] - q[3]*q[3]
+    u[1][0] = 2.0 *(q[1] * q[2] - q[0] * q[3])
+    u[2][0] = 2.0 *(q[1] * q[3] + q[0] * q[2])
+    u[0][1] = 2.0 *(q[2] * q[1] + q[0] * q[3])
+    u[1][1] = q[0]*q[0] - q[1]*q[1] + q[2]*q[2] - q[3]*q[3]
+    u[2][1] = 2.0 *(q[2] * q[3] - q[0] * q[1])
+    u[0][2] = 2.0 *(q[3] * q[1] - q[0] * q[2])
+    u[1][2] = 2.0 *(q[3] * q[2] + q[0] * q[1])
+    u[2][2] = q[0]*q[0] - q[1]*q[1] - q[2]*q[2] + q[3]*q[3]
+    
+    return u
+
+def fsign(a, b):
+    if (b>=0):
+        return a
+    else:
+        return -a
\ No newline at end of file
diff --git a/DP4.jar b/DP4.jar
new file mode 100644
index 0000000..0e22309
Binary files /dev/null and b/DP4.jar differ
diff --git a/DP4.py b/DP4.py
new file mode 100644
index 0000000..79d1ca6
--- /dev/null
+++ b/DP4.py
@@ -0,0 +1,229 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Wed May 27 14:18:37 2015
+Updated on July 30 14:18:37 2015
+@author: ke291
+
+Equivalent and compact port of DP4.jar to python. The results
+produced are essentially equivalent, but not identical due to different
+floating point precision used in the Python (53 bits) and Java (32 bits)
+implementation.
+"""
+from scipy import stats
+import pickle
+import re
+import bisect
+
+meanC = 0.0
+meanH = 0.0
+stdevC = 2.269372270818724
+stdevH = 0.18731058105269952
+output = []
+
+
+def main(Clabels, Cvalues, Hlabels, Hvalues, Cexp, Hexp, settings):
+
+    Print(str(Cexp))
+    Print(str(Hexp))
+
+    C_cdp4 = []
+    H_cdp4 = []
+    Comb_cdp4 = []
+
+    for isomer in range(0, len(Cvalues)):
+
+        sortedClabels, sortedCvalues, sortedCexp =\
+            AssignExpNMR(Clabels, Cvalues[isomer], Cexp)
+        scaledC = ScaleNMR(sortedCvalues, sortedCexp)
+
+        sortedHlabels, sortedHvalues, sortedHexp = \
+            AssignExpNMR(Hlabels, Hvalues[isomer], Hexp)
+        scaledH = ScaleNMR(sortedHvalues, sortedHexp)
+
+        ScaledErrorsC = [sortedCexp[i] - scaledC[i]
+                         for i in range(0, len(scaledC))]
+        ScaledErrorsH = [sortedHexp[i] - scaledH[i]
+                         for i in range(0, len(scaledH))]
+
+        Print("\nAssigned shifts for isomer " + str(isomer+1) + ": ")
+        PrintNMR('C', sortedClabels, sortedCvalues, scaledC, sortedCexp)
+        Print("Max C error: " + format(max(ScaledErrorsC, key=abs), "6.2f"))
+        PrintNMR('H', sortedHlabels, sortedHvalues, scaledH, sortedHexp)
+        Print("Max H error: " + format(max(ScaledErrorsH, key=abs), "6.2f"))
+
+        if settings.PDP4:
+            C_cdp4.append(CalculateCDP4(ScaledErrorsC, meanC, stdevC))
+            H_cdp4.append(CalculateCDP4(ScaledErrorsH, meanH, stdevH))
+        elif settings.EP5:
+            C_cdp4.append(CalculatePDP4(ScaledErrorsC, meanC, stdevC))
+            #C_cdp4.append(CalculateKDE(ScaledErrorsC, settings.ScriptDir + '/NucCErr.pkl'))
+            H_cdp4.append(CalculatePDP4(ScaledErrorsH, meanH, stdevH))
+            #H_cdp4.append(CalculateKDE(ScaledErrorsH, settings.ScriptDir + '/NucHErr.pkl'))
+        Comb_cdp4.append(C_cdp4[-1]*H_cdp4[-1])
+        Print("\nDP4 based on C: " + format(C_cdp4[-1], "6.2e"))
+        Print("DP4 based on H: " + format(H_cdp4[-1], "6.2e"))
+
+    relCDP4 = [(100*x)/sum(C_cdp4) for x in C_cdp4]
+    relHDP4 = [(100*x)/sum(H_cdp4) for x in H_cdp4]
+    relCombDP4 = [(100*x)/sum(Comb_cdp4) for x in Comb_cdp4]
+
+    PrintRelDP4('both carbon and proton data', relCombDP4)
+    PrintRelDP4('carbon data only', relCDP4)
+    PrintRelDP4('proton data only', relHDP4)
+
+    return output
+
+
+def AssignExpNMR(labels, calcShifts, exp):
+
+    #Replace all 'or' and 'OR' with ',', remove all spaces and 'any'
+    exp = re.sub(r"or|OR", ',', exp, flags=re.DOTALL)
+    exp = re.sub(r" |any", '', exp, flags=re.DOTALL)
+
+    #Get all assignments, split mulitassignments
+    ExpLabels = re.findall(r"(?<=\().*?(?=\))", exp, flags=re.DOTALL)
+    ExpLabels = [x.split(',') for x in ExpLabels]
+
+    #Remove assignments and get shifts
+    ShiftData = (re.sub(r"\(.*?\)", "", exp, flags=re.DOTALL)).split(',')
+    expShifts = [float(x) for x in ShiftData]
+
+    #Prepare sorted calculated data with labels and sorted exp data
+    sortedCalc = sorted(calcShifts)
+    sortedExp = sorted(expShifts)
+    sortedExpLabels = SortExpAssignments(expShifts, ExpLabels)
+    sortedCalcLabels = []
+    for v in sortedCalc:
+        index = calcShifts.index(v)
+        sortedCalcLabels.append(labels[index])
+
+    assignedExpLabels = ['' for i in range(0, len(sortedExp))]
+
+    #First pass - assign the unambiguous shifts
+    for v in range(0, len(sortedExp)):
+        if len(sortedExpLabels[v]) == 1 and sortedExpLabels[v][0] != '':
+            #Check that assignment exists in computational data
+            if sortedExpLabels[v][0] in labels:
+                assignedExpLabels[v] = sortedExpLabels[v][0]
+            else:
+                Print("Label " + sortedExpLabels[v][0] +
+                " not found in among computed shifts, please check NMR assignment.")
+                quit()
+    #Second pass - assign shifts from a limited set
+    for v in range(0, len(sortedExp)):
+        if len(sortedExpLabels[v]) != 1 and sortedExpLabels[v][0] != '':
+            for l in sortedCalcLabels:
+                if l in sortedExpLabels[v] and l not in assignedExpLabels:
+                    assignedExpLabels[v] = l
+                    break
+    #Final pass - assign unassigned shifts in order
+    for v in range(0, len(sortedExp)):
+        if sortedExpLabels[v][0] == '':
+            for l in sortedCalcLabels:  # Take the first free label
+                if l not in assignedExpLabels:
+                    assignedExpLabels[v] = l
+                    break
+    sortedCalc = []
+    #Rearrange calc values to match the assigned labels
+    for l in assignedExpLabels:
+        index = labels.index(l)
+        sortedCalc.append(calcShifts[index])
+
+    return assignedExpLabels, sortedCalc, sortedExp
+
+
+def SortExpAssignments(shifts, assignments):
+    tempshifts = list(shifts)
+    tempassignments = list(assignments)
+    sortedassignments = []
+    while len(tempassignments) > 0:
+        index = tempshifts.index(min(tempshifts))
+        sortedassignments.append(tempassignments[index])
+        tempshifts.pop(index)
+        tempassignments.pop(index)
+    return sortedassignments
+
+
+#Scale the NMR shifts
+def ScaleNMR(calcShifts, expShifts):
+    slope, intercept, r_value, p_value, std_err = stats.linregress(expShifts,
+                                                                   calcShifts)
+    scaled = [(x-intercept)/slope for x in calcShifts]
+    return scaled
+
+
+def PrintNMR(nucleus, labels, values, scaled, exp):
+    Print("\nAssigned " + nucleus +
+          " shifts: (label, calc, corrected, exp, error)")
+    for i in range(0, len(labels)):
+        Print(format(labels[i], "6s") + ' ' + format(values[i], "6.2f") + ' '
+            + format(scaled[i], "6.2f") + ' ' + format(exp[i], "6.2f") + ' ' +
+            format(exp[i]-scaled[i], "6.2f"))
+
+
+def PrintRelDP4(title, RelDP4):
+    Print("\nResults of DP4 using " + title + ":")
+    for i in range(0, len(RelDP4)):
+        Print("Isomer " + str(i+1) + ": " + format(RelDP4[i], "4.1f") + "%")
+
+
+def Print(s):
+    print s
+    output.append(s)
+
+
+def CalculateCDP4(errors, expect, stdev):
+    cdp4 = 1.0
+    for e in errors:
+        z = abs((e-expect)/stdev)
+        cdp4 = cdp4*2*stats.norm.cdf(-z)
+    return cdp4
+
+
+#Alternative function using probability density function instead of cdf
+def CalculatePDP4(errors, expect, stdev):
+    pdp4 = 1.0
+    for e in errors:
+        #z = (e-expect)/stdev
+        #pdp4 = pdp4*stats.norm.pdf(z)
+        pdp4 = pdp4*stats.norm(expect, stdev).pdf(e)
+    return pdp4
+
+
+#use as CalculateKDE(errors, 'NucCErr.pkl') for C or
+#CalculateKDE(errors, 'NucHErr.pkl') for H
+#load empirical error data from file and use KDE to construct pdf
+def CalculateKDE(errors, PickleFile):
+
+    pkl_file = open(PickleFile, 'rb')
+    ErrorData = pickle.load(pkl_file)
+    kde = stats.gaussian_kde(ErrorData)
+
+    ep5 = 1.0
+    for e in errors:
+        #z = (e-expect)/stdev
+        #pdp4 = pdp4*stats.norm.pdf(z)
+        ep5 = ep5*float(kde(e)[0])
+    return ep5
+
+
+#use as CalculateRKDE(errors, 'RKDEC.pkl') for C or
+#CalculateKDE(errors, 'RKDEH.pkl') for H
+#load empirical error data from file and use KDE to construct several pdfs,
+#one for each chemical shift region
+def CalculateRKDE(errors, shifts, PickleFile):
+    
+    #Load the data
+    pkl_file = open(PickleFile, 'rb')
+    regions, RErrors = pickle.load(pkl_file)
+    
+    #Reconstruct the distributions for each region
+    kdes = []
+    for es in RErrors:
+        kdes.append(stats.gaussian_kde(es))
+
+    ep5 = 1.0
+    for i, e in enumerate(errors):
+        region = bisect.bisect_left(regions, shifts[i])
+        ep5 = ep5*float((kdes[region])(e)[0])
+    return ep5
diff --git a/FiveConf.py b/FiveConf.py
new file mode 100644
index 0000000..672fddc
--- /dev/null
+++ b/FiveConf.py
@@ -0,0 +1,259 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Dec 17 15:20:18 2014
+
+@author: ke291
+
+Gets called by PyDP4.py if automatic 5-membered cycle corner-flipping is used.
+"""
+
+import numpy as np
+from math import sqrt, pi, cos, sin, acos
+import scipy.optimize as sciopt
+import sys
+sys.path.append(
+    '/home/ke291/Tools/openbabel-install/lib/python2.7/site-packages/')
+from openbabel import *
+
+
+def main(f, settings):
+
+    """
+    Find the axis atoms
+    Find all the atoms to be rotated
+
+    Rotate it and the substiuents to the other side of the plane
+    """
+    obconversion = OBConversion()
+    obconversion.SetInFormat("sdf")
+    obmol = OBMol()
+
+    obconversion.ReadFile(obmol, f)
+
+    obmol.ConnectTheDots()
+
+    #Find the atoms composing furan ring
+    Rings = obmol.GetSSSR()
+    furan = []
+    for ring in Rings:
+        if len(settings.RingAtoms) == 5:
+            if all(x in ring._path for x in settings.RingAtoms):
+                furan = ring
+                break
+        else:
+            if ring.Size() == 5 and not ring.IsAromatic():
+                furan = ring
+                break
+
+    if furan == []:
+        "No five membered rings to rotate. Quitting..."
+        quit()
+    #Find the plane of the 5-membered ring and the outlying atom
+    norm, d, outAtom = FindFuranPlane(obmol, furan)
+
+    #Find the atoms connected to the outlying atom and sort them
+    #as either part of the ring(axis atoms) or as atoms to be rotated
+    AxisAtoms = []
+    RotAtoms = []
+
+    for NbrAtom in OBAtomAtomIter(outAtom):
+        #if NbrAtom.IsInRingSize(5):
+        if furan.IsInRing(NbrAtom.GetIdx()):
+            AxisAtoms.append(NbrAtom)
+        else:
+            RotAtoms.append(NbrAtom)
+            FindSubstAtoms(NbrAtom, outAtom, RotAtoms)
+
+    #Simple switch to help detect if the atoms are rotated the right way
+    WasAbove90 = False
+    angle = FindRotAngle(AxisAtoms[0], AxisAtoms[1], outAtom, norm)
+    if angle > 0.5*pi:
+        WasAbove90 = True
+        rotangle = 2*(angle-0.5*pi)
+    else:
+        WasAbove90 = False
+        rotangle = 2*(0.5*pi-angle)
+    OldAtomCoords = outAtom.GetVector()
+    print "Atom " + str(outAtom.GetAtomicNum()) + " will be rotated by " +\
+        str(rotangle*57.3) + ' degrees'
+    RotateAtom(outAtom, AxisAtoms[0], AxisAtoms[1], rotangle)
+    angle2 = FindRotAngle(AxisAtoms[0], AxisAtoms[1], outAtom, norm)
+
+    #if the atom is on the same side of the plane as it was,
+    # it has been rotated in the wrong direction
+    if ((angle2 > 0.5*pi) and WasAbove90) or ((angle2 < 0.5*pi) and not WasAbove90):
+        #Flip the sign of the rotation angle, restore the coords
+        #and rotate the atom in the opposite direction
+        print "Atom was rotated the wrong way, switching the direction"
+        rotangle = -rotangle
+        outAtom.SetVector(OldAtomCoords)
+        RotateAtom(outAtom, AxisAtoms[0], AxisAtoms[1], rotangle)
+
+    RotatedAtoms = []  # Index to make sure that atoms are not rotated twice
+    for atom in RotAtoms:
+        if atom not in RotatedAtoms:
+            RotateAtom(atom, AxisAtoms[0], AxisAtoms[1], rotangle)
+            RotatedAtoms.append(atom)
+        else:
+            print "Atom already rotated, skipping"
+
+    obconversion.SetOutFormat("sdf")
+    obconversion.WriteFile(obmol, f[:-4] + 'rot.sdf')
+
+
+#Recursively finds all the atoms connected to the input
+def FindSubstAtoms(atom, outAtom, al):
+
+    indexes = [a.GetIdx() for a in al]
+    for NbrAtom in OBAtomAtomIter(atom):
+
+        if (NbrAtom.GetIdx() not in indexes) and\
+                (NbrAtom.GetIdx() != outAtom.GetIdx()):
+            al.append(NbrAtom)
+            FindSubstAtoms(NbrAtom, outAtom, al)
+
+
+#Rotate atom around and axis by an angle
+def RotateAtom(atom, AxisAtom1, AxisAtom2, angle):
+
+    [u, v, w] = GetUnitVector(AxisAtom1, AxisAtom2)
+    [x, y, z] = [atom.x(), atom.y(), atom.z()]
+    [a, b, c] = [(AxisAtom1.x()+AxisAtom1.x())/2, (AxisAtom1.y()+AxisAtom1.y())/2,\
+                 (AxisAtom1.z()+AxisAtom1.z())/2]
+
+    X = (a*(v**2 + w**2) - u*(b*v+c*w-u*x-v*y-w*z))*(1-cos(angle))+x*cos(angle)\
+        +(-1*c*v+b*w-w*y+v*z)*sin(angle)
+    Y = (b*(u**2 + w**2) - v*(a*u+c*w-u*x-v*y-w*z))*(1-cos(angle))+y*cos(angle)\
+        +(c*u-a*w+w*x-u*z)*sin(angle) #was _+_u*z)*sin(angle)
+    Z = (c*(u**2 + v**2) - w*(a*u+b*v-u*x-v*y-w*z))*(1-cos(angle))+z*cos(angle)\
+        +(-1*b*u+a*v-v*x+u*y)*sin(angle)
+    
+    atom.SetVector(X, Y, Z)
+
+
+def GetUnitVector(Atom1, Atom2):
+    vector = []
+    vector.append(Atom2.x() - Atom1.x())
+    vector.append(Atom2.y() - Atom1.y())
+    vector.append(Atom2.z() - Atom1.z())
+
+    length = np.linalg.norm(vector)
+    return [x/length for x in vector]
+
+
+#Finds the angle by which atoms need to be rotated by taking the angle
+#the atom is out of the plane (the 2 neighbor atoms being the axis)
+#and doubling it
+def FindRotAngle(AxisAtom1, AxisAtom2, OutAtom, Normal):
+
+    start = []
+    start.append((AxisAtom1.x() + AxisAtom2.x())/2)
+    start.append((AxisAtom1.y() + AxisAtom2.y())/2)
+    start.append((AxisAtom1.z() + AxisAtom2.z())/2)
+
+    vector = []
+    vector.append(OutAtom.x() - start[0])
+    vector.append(OutAtom.y() - start[1])
+    vector.append(OutAtom.z() - start[2])
+
+    #Angle between plane normal and OOP atom
+    vangle = angle(vector, Normal)
+
+    #print "Measured angle: " + str(vangle*57.3)
+    return vangle
+
+
+def crossproduct(v1, v2):
+    product = [0, 0, 0]
+    product[0] = v1[1]*v2[2]-v1[2]*v2[1]
+    product[1] = v1[2]*v2[0]-v1[0]*v2[2]
+    product[2] = v1[0]*v2[1]-v1[1]*v2[0]
+    return product
+
+
+def dotproduct(v1, v2):
+        return sum((a*b) for a, b in zip(v1, v2))
+
+
+def length(v):
+    return sqrt(dotproduct(v, v))
+
+
+def angle(v1, v2):
+    return acos(dotproduct(v1, v2) / (length(v1) * length(v2)))
+
+
+"""Finds planes for every 3 atoms, calculates distances to the plane
+for the other 2 atoms andchoose the plane with the smallest smallest distance
+"""
+def FindFuranPlane(mol, furan):
+
+    atomIds = furan._path
+    atoms = []
+
+    for i in atomIds:
+        atoms.append(mol.GetAtom(i))
+
+    MinError = 100.0
+
+    for atom in atoms:
+        pats = [a for a in atoms if a != atom]
+        norm, d, error = LstSqPlane(pats[0], pats[1], pats[2], pats[3])
+        if error < MinError:
+            MinError = error
+            MaxNorm = norm
+            MaxD = d
+            OutAtom = atom
+
+    return MaxNorm, MaxD, OutAtom
+
+
+#Given 3 atoms, finds a plane defined by a normal vector and d
+def FindPlane(atom1, atom2, atom3):
+
+    vector1 = [atom2.x() - atom1.x(), atom2.y() - atom1.y(),
+               atom2.z() - atom1.z()]
+    vector2 = [atom3.x() - atom1.x(), atom3.y() - atom1.y(),
+               atom3.z() - atom1.z()]
+    cross_product = [vector1[1] * vector2[2] - vector1[2] * vector2[1],
+                     -1 * vector1[0] * vector2[2] - vector1[2] * vector2[0],
+                     vector1[0] * vector2[1] - vector1[1] * vector2[0]]
+
+    d = cross_product[0] * atom1.x() - cross_product[1] * atom1.y() + \
+        cross_product[2] * atom1.z()
+
+    return cross_product, d
+
+
+def LstSqPlane(atom1, atom2, atom3, atom4):
+
+    # Inital guess of the plane
+    [a0, b0, c0], d0 = FindPlane(atom1, atom2, atom3)
+
+    f = lambda (a, b, c, d): PlaneError([atom1, atom2, atom3, atom4], a, b, c, d)
+    res = sciopt.minimize(f, (a0, b0, c0, d0), method='nelder-mead')
+    plane = list(res.x)
+
+    return plane[:3], plane[3], f(plane)
+
+
+def PlaneError(atoms, a, b, c, d):
+    dists = []
+    for atom in atoms:
+        dists.append(abs(PointPlaneDist([a, b, c], d, atom)))
+    return sum(dists)/len(dists)
+
+
+#Calculates distance from an atom to a plane
+def PointPlaneDist(norm, d, atom):
+
+    point = []
+
+    point.append(atom.x())
+    point.append(atom.y())
+    point.append(atom.z())
+
+    a = norm[0]*point[0] + norm[1]*point[1] + norm[2]*point[2] + d
+    b = sqrt(norm[0]**2 + norm[1]**2 + norm[2]**2)
+
+    return a/b
diff --git a/Gaussian.py b/Gaussian.py
new file mode 100644
index 0000000..28200e9
--- /dev/null
+++ b/Gaussian.py
@@ -0,0 +1,437 @@
+#!/usr/bin/env python
+from __future__ import division
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Nov 19 15:56:54 2014
+
+@author: ke291
+
+Contains all of the Gaussian specific code for input generation and calculation
+execution. Called by PyDP4.py.
+"""
+
+import Tinker
+import MacroModel
+
+import subprocess
+import socket
+import os
+import time
+import sys
+import glob
+import pyximport
+pyximport.install()
+import ConfPrune
+
+
+def SetupGaussian(MMoutp, Gausinp, numDigits, settings, adjRMSDcutoff):
+
+    if settings.MMTinker:
+        #Reads conformer geometry, energies and atom labels from Tinker output
+        (atoms, conformers, charge) = Tinker.ReadTinker(MMoutp, settings)
+    else:
+        (atoms, conformers, charge) = MacroModel.ReadMacromodel(MMoutp,
+                                                                settings)
+
+    #Prune similar conformations, if the number exceeds the limit
+    if len(conformers) > settings.PerStructConfLimit:
+        pruned = ConfPrune.RMSDPrune(conformers, atoms, adjRMSDcutoff)
+    else:
+        pruned = conformers
+
+    print str(len(conformers) - len(pruned)) +\
+        " or " + "{:.1f}".format(100*(len(conformers) - len(pruned)) /
+        len(conformers))+"% of conformations have been pruned based on " +\
+        str(adjRMSDcutoff) + " angstrom cutoff"
+
+    for num in range(0, len(pruned)):
+        filename = Gausinp+str(num+1).zfill(3)
+        if not settings.DFTOpt:
+            WriteGausFile(filename, pruned[num], atoms, charge, settings)
+        else:
+            WriteGausFileOpt(filename, pruned[num], atoms, charge, settings)
+
+    print str(len(pruned)) + " .com files written"
+
+
+#Adjust the RMSD cutoff to keep the conformation numbers reasonable
+def AdaptiveRMSD(MMoutp, settings):
+
+    if settings.MMTinker:
+        #Reads conformer geometry, energies and atom labels from Tinker output
+        (atoms, conformers, charge) = Tinker.ReadTinker(MMoutp, settings)
+    else:
+        (atoms, conformers, charge) = MacroModel.ReadMacromodel(MMoutp,
+                                                                settings)
+
+    return ConfPrune.AdaptRMSDPrune(conformers, atoms,
+                                    settings.InitialRMSDcutoff,
+                                    settings.PerStructConfLimit)
+
+
+def WriteGausFile(Gausinp, conformer, atoms, charge, settings):
+
+    f = file(Gausinp + '.com', 'w')
+    f.write('%mem=2800MB\n%chk='+Gausinp + '.chk\n')
+    #f.write('# b3lyp/6-31g(d,p) Opt nmr=giao\n')
+    if settings.Solvent != '':
+        f.write('# b3lyp/6-31g(d,p) nmr=giao scrf=(solvent=' +
+                settings.Solvent+')\n')
+    else:
+        f.write('# b3lyp/6-31g(d,p) nmr=giao\n')
+    f.write('\n'+Gausinp+'\n\n')
+    f.write(str(charge) + ' 1\n')
+
+    natom = 0
+
+    for atom in conformer:
+        f.write(atoms[natom] + '  ' + atom[1] + '  ' + atom[2] + '  ' +
+                atom[3] + '\n')
+        natom = natom + 1
+    f.write('\n')
+    f.close()
+
+
+def WriteGausFileOpt(Gausinp, conformer, atoms, charge, settings):
+
+    #write the initial DFT geometry optimisation input file first
+    f1 = file(Gausinp + 'a.com', 'w')
+    f1.write('%mem=2800MB\n%chk='+Gausinp + '.chk\n')
+
+    if settings.Solvent != '':
+        f1.write('# b3lyp/6-31g(d,p) Opt=(maxcycles=30) scrf=(solvent=' +
+                 settings.Solvent+')\n')
+    else:
+        f1.write('# b3lyp/6-31g(d,p) Opt=(maxcycles=30)\n')
+
+    f1.write('\n'+Gausinp+'\n\n')
+    f1.write(str(charge) + ' 1\n')
+
+    natom = 0
+
+    for atom in conformer:
+        f1.write(atoms[natom] + '  ' + atom[1] + '  ' + atom[2] + '  ' +
+                 atom[3] + '\n')
+        natom = natom + 1
+    f1.write('\n')
+    f1.close()
+
+    #Write the nmr prediction input file,
+    #using the geometry from checkpoint file
+    f2 = file(Gausinp + 'b.com', 'w')
+    f2.write('%mem=2800MB\n%chk='+Gausinp + '.chk\n')
+    if settings.Solvent != '':
+        f2.write('# b3lyp/6-31g(d,p) nmr=giao geom=checkpoint scrf=(solvent=' +
+                 settings.Solvent+')\n')
+    else:
+        f2.write('# b3lyp/6-31g(d,p) nmr=giao geom=checkpoint\n')
+    f2.write('\n'+Gausinp+'\n\n')
+    f2.write(str(charge) + ' 1\n')
+    f2.write('\n')
+    f2.close()
+
+
+def GetFiles2Run(inpfiles, settings):
+    #Get the names of all relevant input files
+    GinpFiles = []
+    for filename in inpfiles:
+        if not settings.DFTOpt:
+            GinpFiles = GinpFiles + glob.glob(filename + 'ginp???.com')
+        else:
+            GinpFiles = GinpFiles + glob.glob(filename + 'ginp???a.com')
+    Files2Run = []
+
+    #for every input file check that there is a completed output file,
+    #delete the incomplete outputs and add the inputs to be done to Files2Run
+    for filename in GinpFiles:
+        if not settings.DFTOpt:
+            if not os.path.exists(filename[:-3]+'out'):
+                Files2Run.append(filename)
+            else:
+                if IsGausCompleted(filename[:-3] + 'out'):
+                    #print filename[:-3]+'out already exists'
+                    continue
+                else:
+                    os.remove(filename[:-3] + 'out')
+                    Files2Run.append(filename)
+        else:
+            if not os.path.exists(filename[:-5]+'.out'):
+                Files2Run.append(filename)
+            else:
+                if IsGausCompleted(filename[:-5] + '.out'):
+                    #print filename[:-3]+'out already exists'
+                    continue
+                else:
+                    os.remove(filename[:-5] + '.out')
+                    Files2Run.append(filename)
+
+    return Files2Run
+
+
+def IsGausCompleted(f):
+    Gfile = open(f, 'r')
+    outp = Gfile.readlines()
+    Gfile.close()
+    if len(outp) < 10:
+        return False
+    if "Normal termination" in outp[-1]:
+        return True
+    else:
+        return False
+
+
+#Still need addition of support for geometry optimisation
+def RunOnZiggy(folder, queue, GausFiles, settings):
+
+    print "ziggy GAUSSIAN job submission script\n"
+
+    #Check that folder does not exist, create job folder on ziggy
+    outp = subprocess.check_output('ssh ziggy ls', shell=True)
+    if folder in outp:
+        print "Folder exists on ziggy, choose another folder name."
+        return
+
+    outp = subprocess.check_output('ssh ziggy mkdir ' + folder, shell=True)
+
+    #Write the qsub scripts
+    for f in GausFiles:
+        if not settings.DFTOpt:
+            WriteSubScript(f[:-4], queue, folder, settings)
+        else:
+            WriteSubScriptOpt(f[:-4], queue, folder, settings)
+    print str(len(GausFiles)) + ' .qsub scripts generated'
+
+    #Upload .com files and .qsub files to directory
+    print "Uploading files to ziggy..."
+    for f in GausFiles:
+        if not settings.DFTOpt:
+            outp = subprocess.check_output('scp ' + f +' ziggy:~/' + folder,
+                                           shell=True)
+        else:
+            outp = subprocess.check_output('scp ' + f[:-4] +'a.com ziggy:~/' +
+                                           folder, shell=True)
+            outp = subprocess.check_output('scp ' + f[:-4] +'b.com ziggy:~/' +
+                                           folder, shell=True)
+        outp = subprocess.check_output('scp ' + f[:-4] +'.qsub ziggy:~/' +
+                                       folder, shell=True)
+
+    print str(len(GausFiles)) + ' .com and .qsub files uploaded to ziggy'
+
+    #Launch the calculations
+    for f in GausFiles:
+        job = '~/' + folder + '/' + f[:-4]
+        outp = subprocess.check_output('ssh ziggy qsub -q ' + queue + ' -o ' +
+            job + '.log -e ' + job + '.err ' + job + '.qsub', shell=True)
+
+    print str(len(GausFiles)) + ' jobs submitted to the queue on ziggy'
+
+    outp = subprocess.check_output('ssh ziggy showq', shell=True)
+    if settings.user in outp:
+        print "Jobs are running on ziggy"
+
+    Jobs2Complete = list(GausFiles)
+    n2complete = len(Jobs2Complete)
+
+    #Check and report on the progress of calculations
+    while len(Jobs2Complete) > 0:
+        JustCompleted = [job for job in Jobs2Complete if
+            IsZiggyGComplete(job[:-3] + 'out', folder, settings)]
+        Jobs2Complete[:] = [job for job in Jobs2Complete if
+             not IsZiggyGComplete(job[:-3] + 'out', folder, settings)]
+        if n2complete != len(Jobs2Complete):
+            n2complete = len(Jobs2Complete)
+            print str(n2complete) + " remaining."
+
+        time.sleep(60)
+
+    #When done, copy the results back
+    print "\nCopying the output files back to localhost..."
+    print 'ssh ziggy scp /home/' + settings.user + '/' + folder + '/*.out ' +\
+        socket.getfqdn() + ':' + os.getcwd()
+    outp = subprocess.check_output('ssh ziggy scp /home/' + settings.user +
+                                   '/' + folder + '/*.out ' + socket.getfqdn()
+                                   + ':' + os.getcwd(), shell=True)
+
+    #Delete the *.chk files from /sharedscratch/
+    print "\nCleaning up - deleting the *.chk files form /sharedscratch/"
+    for f in GausFiles:
+        outp = subprocess.check_output('ssh ziggy rm -r /sharedscratch/' +
+                                       settings.user + '/' + f[:-4], shell=True)
+
+
+def WriteSubScript(GausJob, queue, ZiggyJobFolder, settings):
+
+    if not (os.path.exists(GausJob+'.com')):
+        print "The input file " + GausJob + ".com does not exist. Exiting..."
+        return
+
+    #Create the submission script
+    QSub = open(GausJob + ".qsub", 'w')
+
+    #Choose the queue
+    QSub.write('#PBS -q ' + queue + '\n#PBS -l nodes=1:ppn=1\n#\n')
+
+    #define input files and output files
+    QSub.write('file=' + GausJob + '\n\n')
+    QSub.write('inpfile=${file}.com\noutfile=${file}.out\n')
+
+    #define cwd and scratch folder and ask the machine
+    #to make it before running the job
+    QSub.write('HERE=/home/' + settings.user +'/' + ZiggyJobFolder + '\n')
+    QSub.write('SCRATCH=/sharedscratch/' + settings.user + '/' +
+               GausJob + '\n')
+    QSub.write('mkdir ${SCRATCH}\n')
+
+    #Setup GAUSSIAN environment variables
+    QSub.write('set OMP_NUM_THREADS=1\n')
+    QSub.write('export GAUSS_EXEDIR=/usr/local/shared/gaussian/em64t/09-D01/g09\n')
+    QSub.write('export g09root=/usr/local/shared/gaussian/em64t/09-D01\n')
+    QSub.write('export PATH=/usr/local/shared/gaussian/em64t/09-D01/g09:$PATH\n')
+    QSub.write('export GAUSS_SCRDIR=$SCRATCH\n')
+
+    #copy the input file to scratch
+    QSub.write('cp ${HERE}/${inpfile}  $SCRATCH\ncd $SCRATCH\n')
+
+    #write useful info to the job output file (not the gaussian)
+    QSub.write('echo "Starting job $PBS_JOBID"\necho\n')
+    QSub.write('echo "PBS assigned me this node:"\ncat $PBS_NODEFILE\necho\n')
+
+    QSub.write('ln -s $HERE/$outfile $SCRATCH/$outfile\n')
+    QSub.write('$GAUSS_EXEDIR/g09 < $inpfile > $outfile\n')
+
+    #Cleanup
+    QSub.write('mkdir ${HERE}/${file}\n')
+    QSub.write('cp ${SCRATCH}/*.chk  $HERE/${file}/\n')
+    QSub.write('rm -f ${SCRATCH}/*\n')
+    QSub.write('cp $HERE/${file}/*.chk ${SCRATCH}/\n')
+    QSub.write('rm -r ${HERE}/${file}\n')
+    QSub.write('qstat -f $PBS_JOBID\n')
+
+    QSub.close()
+
+#Function to write ziggy script when dft optimisation is used
+def WriteSubScriptOpt(GausJob, queue, ZiggyJobFolder, settings):
+
+    if not (os.path.exists(GausJob+'a.com')):
+        print "The input file " + GausJob + "a.com does not exist. Exiting..."
+        return
+    if not (os.path.exists(GausJob+'b.com')):
+        print "The input file " + GausJob + "b.com does not exist. Exiting..."
+        return
+
+    #Create the submission script
+    QSub = open(GausJob + ".qsub", 'w')
+
+    #Choose the queue
+    QSub.write('#PBS -q ' + queue + '\n#PBS -l nodes=1:ppn=1\n#\n')
+
+    #define input files and output files
+    QSub.write('file=' + GausJob + '\n\n')
+    QSub.write('inpfile1=${file}a.com\ninpfile2=${file}b.com\n')
+    QSub.write('outfile1=${file}temp.out\noutfile2=${file}.out\n')
+
+    #define cwd and scratch folder and ask the machine
+    #to make it before running the job
+    QSub.write('HERE=/home/' + settings.user +'/' + ZiggyJobFolder + '\n')
+    QSub.write('SCRATCH=/sharedscratch/' + settings.user + '/' + GausJob + '\n')
+    QSub.write('mkdir ${SCRATCH}\n')
+
+    #Setup GAUSSIAN environment variables
+    QSub.write('set OMP_NUM_THREADS=1\n')
+    QSub.write('export GAUSS_EXEDIR=/usr/local/shared/gaussian/em64t/09-D01/g09\n')
+    QSub.write('export g09root=/usr/local/shared/gaussian/em64t/09-D01\n')
+    QSub.write('export PATH=/usr/local/shared/gaussian/em64t/09-D01/g09:$PATH\n')
+    QSub.write('export GAUSS_SCRDIR=$SCRATCH\n')
+
+    #copy the input files to scratch
+    QSub.write('cp ${HERE}/${inpfile1}  $SCRATCH\n')
+    QSub.write('cp ${HERE}/${inpfile2}  $SCRATCH\ncd $SCRATCH\n')
+
+    #write useful info to the job output file (not the gaussian)
+    QSub.write('echo "Starting job $PBS_JOBID"\necho\n')
+    QSub.write('echo "PBS assigned me this node:"\ncat $PBS_NODEFILE\necho\n')
+
+    QSub.write('ln -s $HERE/$outfile2 $SCRATCH/$outfile2\n')
+    QSub.write('$GAUSS_EXEDIR/g09 < $inpfile1 > $outfile1\n')
+    QSub.write('$GAUSS_EXEDIR/g09 < $inpfile2 > $outfile2\n')
+
+    #Cleanup
+    QSub.write('mkdir ${HERE}/${file}\n')
+    QSub.write('cp ${SCRATCH}/*.chk  $HERE/${file}/\n')
+    QSub.write('rm -f ${SCRATCH}/*\n')
+    QSub.write('cp $HERE/${file}/*.chk ${SCRATCH}/\n')
+    QSub.write('rm -r ${HERE}/${file}\n')
+    QSub.write('qstat -f $PBS_JOBID\n')
+
+    QSub.close()
+
+
+def IsZiggyGComplete(f, folder, settings):
+
+    path = '/home/' + settings.user + '/' + folder + '/'
+    try:
+        outp1 = subprocess.check_output('ssh ziggy ls ' + path, shell=True)
+    except subprocess.CalledProcessError, e:
+        print "ssh ziggy ls failed: " + str(e.output)
+        return False
+    if f in outp1:
+        try:
+            outp2 = subprocess.check_output('ssh ziggy cat ' + path + f,
+                                            shell=True)
+        except subprocess.CalledProcessError, e:
+            print "ssh ziggy cat failed: " + str(e.output)
+            return False
+        if "Normal termination" in outp2:
+            return True
+    return False
+
+
+"""
+Change to support tautomers - treat a tautomer as a few extra conformers with
+different file names
+Correction: Treat them as diastereomers and submit to nmrpredict, but remember
+that they are tautomers, so that their populations can be optimized
+"""
+def RunNMRPredict(numDS, *args):
+
+    TautInputs = []
+    Ntaut = []
+    NumFiles = []
+    arg = 0
+
+    #Pick all tautomer counts and filenames
+    for ds in range(0, numDS):
+        Ntaut.append(int(args[arg]))
+        arg = arg+1
+        TautInputs.append([])
+        for taut in range(0, Ntaut[ds]):
+            TautInputs[ds].append('')
+            NumFiles.append(0)
+            for f in glob.glob(args[arg] + 'ginp*.out'):
+                TautInputs[ds][taut] = TautInputs[ds][taut] + f[:-4] + ' '
+                NumFiles[-1] = NumFiles[-1] + 1
+            arg = arg+1
+
+    outputs = []
+
+    #This loop runs nmrPredict for each diastereomer and collects
+    #the outputs
+    """ To change: run nmrpredict for each tautomer seperately"""
+    for ds in range(0, numDS):
+        for taut in range(0, Ntaut[ds]):
+
+            #Prepares input for nmrPredict
+            javafolder = getScriptPath()
+            jinp = 'CLASSPATH=' + javafolder + \
+                ' java nmrPredictGaussian ' + TautInputs[ds][taut]
+            print jinp
+
+            #Runs java nmrPredict JagName001, ... and collects output
+            outputs.append(subprocess.check_output(jinp, shell=True))
+            #print '\n\n' + outputs[isomer]
+
+    return (outputs, NumFiles, Ntaut)
+
+
+def getScriptPath():
+    return os.path.dirname(os.path.realpath(sys.argv[0]))
diff --git a/InchiGen.py b/InchiGen.py
new file mode 100644
index 0000000..fd8b7b6
--- /dev/null
+++ b/InchiGen.py
@@ -0,0 +1,666 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Dec 17 15:20:18 2014
+
+@author: ke291
+
+Code for diastereomer, tautomer and protomer generation via InChI strings.
+This file gets called by PyDP4.py if diastereomer and/or tautomer and/or
+protomer generation is used.
+"""
+import sys
+sys.path.append('/home/ke291/Tools/openbabel-install/lib/python2.7/site-packages/')
+from openbabel import *
+import subprocess
+import itertools
+
+MolConPath = '/home/ke291/ChemAxon/MarvinBeans/bin/molconvert'
+
+
+def main(f):
+
+    inchi, aux = GetInchi(f)
+
+    print inchi
+
+    ds_inchis = GenDiastereomers(inchi)
+    ds_inchis = [FixTautProtons(f, i, aux) for i in ds_inchis]
+
+    for ds in range(0, len(ds_inchis)):
+        Inchi2Struct(ds_inchis[ds], f[:-4] + str(ds+1), aux)
+        RestoreNumsSDF(f[:-4] + str(ds+1) + '.sdf', f, aux)
+
+    """taut_inchis = GenTautomers(inchi)
+
+    for taut in range(0, len(taut_inchis)):
+        Inchi2Struct(taut_inchis[taut], 'taut' + str(taut), aux)"""
+
+
+def GetInchiRenumMap(AuxInfo):
+
+    for l in AuxInfo.split('/'):
+        if 'N:' in l:
+            RenumLayer = l
+            break
+    amap = [int(x) for x in RenumLayer[2:].split(',')]
+    return amap
+
+
+def FixTautProtons(f, inchi, AuxInfo):
+
+    #Get tautomeric protons and atoms they are connected to from Inchi
+    TautProts = GetTautProtons(inchi)
+    amap = GetInchiRenumMap(AuxInfo)
+
+    #get the correspondence of the Inchi numbers to the source numbers
+    hmap = []
+    for taut in TautProts:
+        for heavyatom in range(1, len(taut)):
+            hmap.append([int(taut[heavyatom]), amap[int(taut[heavyatom])-1]])
+
+    #Read molecule from file
+    obconversion = OBConversion()
+    obconversion.SetInFormat("sdf")
+    obmol = OBMol()
+    obconversion.ReadFile(obmol, f)
+
+    Fixprotpos = []
+    for heavyatom in hmap:
+        atom = obmol.GetAtom(heavyatom[1])
+        for nbratom in OBAtomAtomIter(atom):
+            if nbratom.GetAtomicNum() == 1:
+                Fixprotpos.append(heavyatom[0])
+    draftFH = []
+    for i in range(0, len(Fixprotpos)):
+        if Fixprotpos[i] not in [a[0] for a in draftFH]:
+            draftFH.append([Fixprotpos[i], Fixprotpos.count(Fixprotpos[i])])
+
+    fixedlayer = '/f/h'
+    for h in draftFH:
+        if h[1] == 1:
+            fixedlayer = fixedlayer + str(h[0])+'H,'
+        else:
+            fixedlayer = fixedlayer + str(h[0])+'H' + str(h[1]) + ','
+
+    resinchi = inchi + fixedlayer[:-1]
+
+    return resinchi
+
+
+#Get H connections from sdf file
+def GetHcons(f):
+    obconversion = OBConversion()
+    obconversion.SetInFormat("sdf")
+    obmol = OBMol()
+    obconversion.ReadFile(obmol, f)
+    Hcons = []
+    for atom in OBMolAtomIter(obmol):
+        idx = atom.GetIdx()
+        anum = atom.GetAtomicNum()
+        if anum == 1:
+            for NbrAtom in OBAtomAtomIter(atom):
+                Hcons.append([idx, NbrAtom.GetIdx()])
+    return Hcons
+
+
+def RestoreNumsSDF(f, fold, AuxInfo):
+
+    #Read molecule from file
+    obconversion = OBConversion()
+    obconversion.SetInFormat("sdf")
+    obmol = OBMol()
+    obconversion.ReadFile(obmol, f)
+    #Get the atoms Hs are connected to
+    oldHcons = GetHcons(fold)
+    #translate the H connected atoms to the new numbering system
+    amap = GetInchiRenumMap(AuxInfo)
+    for i in range(0, len(oldHcons)):
+        oldHcons[i][1] = amap.index(oldHcons[i][1])+1
+
+    newHcons = []
+    temp = []
+    i = 0
+    for atom in OBMolAtomIter(obmol):
+        idx = atom.GetIdx()
+        anum = atom.GetAtomicNum()
+        #If atom is hydrogen, check what it is connected to
+        if anum == 1:
+            for NbrAtom in OBAtomAtomIter(atom):
+                newHcons.append([idx, NbrAtom.GetIdx()])
+        #Pick the temporary atom
+        temp.append(atom)
+
+    for i in range(0, len(newHcons)):
+        conatom = newHcons[i][1]
+        for b in range(0, len(oldHcons)):
+            if conatom == oldHcons[b][1]:
+                amap.append(oldHcons[b][0])
+                #remove the number, so that it doesn't get added twice
+                oldHcons[b][1] = 0
+
+    newmol = OBMol()
+    added = []
+
+    for i in range(1, len(amap)+1):
+        newn = amap.index(i)
+        newmol.AddAtom(temp[newn])
+        added.append(newn)
+
+    #Final runthrough to check that all atoms have been added,
+    #tautomeric protons can be missed. If tautomeric proton tracking
+    #is implemented this can be removed
+    for i in range(0, len(temp)):
+        if not i in added:
+            newmol.AddAtom(temp[i])
+
+    #Restore the bonds
+    newmol.ConnectTheDots()
+    newmol.PerceiveBondOrders()
+    #Write renumbered molecule to file
+    obconversion.SetOutFormat("sdf")
+    obconversion.WriteFile(newmol, f)
+
+
+def GetInchi(f):
+
+    outp = subprocess.check_output(MolConPath + ' inchi ' + f, shell=True)
+    idata = outp.split('\n')
+
+    aux = idata[1][:]
+    return idata[0], aux
+
+
+def Inchi2Struct(inchi, f, aux):
+
+    infile = open(f + '.inchi', 'w')
+    infile.write(inchi)
+    infile.close()
+
+    outp = subprocess.check_output(MolConPath + ' sdf ' + f +
+        '.inchi -3:S{fine}[prehydrogenize] -o ' + f + '.sdf', shell=True)
+
+
+def GenProtomers(structf, atoms):
+
+    f = structf + ".sdf"
+    inchi, aux = GetInchi(f)
+    print inchi
+    amap = GetInchiRenumMap(aux)
+
+    prot_atoms = []
+    for atom in atoms:
+        prot_atoms.append(amap.index(atom))
+
+    finchi = FixTautProtons(f, inchi, aux)
+    prot_inchis = GenProtInchis(finchi, prot_atoms)
+
+    print prot_inchis
+    filenames = []
+    for prot in range(0, len(prot_inchis)):
+        Inchi2Struct(prot_inchis[prot], f[:-4] + 'p' + str(prot+1), aux)
+        RestoreNumsSDF(f[:-4] + 'p' + str(prot+1) + '.sdf', f, aux)
+        filenames.append(f[:-4] + 'p' + str(prot+1))
+    return len(filenames), filenames
+
+
+def GenProtInchis(inchi, atoms):
+
+    #Read and interpret proton counts on heavy atoms from inchi,
+    # including tautomeric layers
+    protons, formula, tautomerics, fprotons = ReadProtonCounts(inchi)
+
+    #Construct list of heavy atoms with tautomeric protons and which system
+    #they belong to
+    tlist = [[], []]
+    i = 0
+    for tlayer in tautomerics:
+        temp = tlayer[1:-1].split(',')
+        tlist[0].extend([int(x) for x in temp[1:]])
+        tlist[1].extend([i for x in temp[1:]])
+    print tlist
+    #Increase H count and regenerate the formula
+    for i in range(0, len(formula)):
+        if formula[i][0] == 'H':
+            formula[i][1] += 1
+        formula[i][1] = str(formula[i][1])
+    formula = [''.join(x) for x in formula]
+    formula = ''.join(formula)
+
+    #For each basic atom in atoms, generate a copy of original protons
+    #add atom and save it
+    print "Protonating atoms with these InChI numbers: " +\
+        str([x+1 for x in atoms])
+    protlayers = []
+    fprotlayers = []
+    for atom in atoms:
+        if atom+1 not in tlist[0]:
+            extraprotons = list(protons)
+            extrafprotons = list(fprotons)
+            extraprotons[atom] += 1
+            if tautomerics == []:
+                protlayers.append(WriteProtonCounts(extraprotons))
+            else:
+                protlayers.append(WriteProtonCounts(extraprotons) +
+                                  ',' + ','.join(tautomerics))
+            fprotlayers.append(WriteProtonCounts(extrafprotons))
+        else:
+            extraprotons = list(protons)
+            extrafprotons = list(fprotons)
+            extratautomerics = list(tautomerics)
+            extrafprotons[atom] += 1
+            #which tautomeric system atom belongs to?
+            tindex = tlist[0].index(atom+1)
+            tindex = tlist[1][tindex]
+            temp = tautomerics[tindex].split(',')
+            #Get the proton count and increase by 1
+            protcount = int(temp[0][2:]) + 1
+            #Write the proton count back
+            temp[0] = temp[0][:2] + str(protcount)
+            extratautomerics[tindex] = ','.join(temp)
+            protlayers.append(WriteProtonCounts(extraprotons)+',' +
+                              ','.join(extratautomerics))
+            fprotlayers.append(WriteProtonCounts(extrafprotons))
+    protinchis = []
+    protinchis.append(inchi)
+    for l in range(0, len(protlayers)):
+        layers = inchi.split('/')
+        MainLayerPassed = False
+        ChargeAdded = False
+        i = 1
+        while (i < len(layers)):
+            if 'h' in layers[i]:
+                if not MainLayerPassed:
+                    layers[i] = protlayers[l]
+                    MainLayerPassed = True
+                    if 'q' not in inchi:
+                        layers.insert(i+1, 'q+1')
+                        ChargeAdded = True
+                else:
+                    layers[i] = fprotlayers[l]
+            if ('q' in layers[i]) and ChargeAdded is False:
+                charge = int(layers[i][1:])
+                layers[i] = 'q'+"%+d" % charge
+            if 'C' in layers[i] and 'H' in layers[i]:
+                layers[i] = formula
+            i += 1
+        #insert charge layer here
+        protinchis.append('/'.join(layers))
+    return protinchis
+
+
+def WriteProtonCounts(protons):
+    collectedprotons = [[], [], [], []]
+
+    i = 1
+    lastcount = protons[0]
+    start = 0
+    while i < len(protons):
+        if protons[i] != lastcount:
+            if start == i-1:
+                collectedprotons[lastcount].append(str(start+1))
+            else:
+                collectedprotons[lastcount].append(str(start+1)+'-'+str(i))
+            lastcount = protons[i]
+            start = i
+        i += 1
+
+    if start == i-1:
+        collectedprotons[lastcount].append(str(start+1))
+    else:
+        collectedprotons[lastcount].append(str(start+1)+'-'+str(i))
+
+    hlayer = 'h'
+    if len(collectedprotons[1]) > 0:
+        hlayer += ','.join(collectedprotons[1])
+        hlayer += 'H,'
+    if len(collectedprotons[2]) > 0:
+        hlayer += ','.join(collectedprotons[2])
+        hlayer += 'H2,'
+    if len(collectedprotons[3]) > 0:
+        hlayer += ','.join(collectedprotons[3])
+        hlayer += 'H3,'
+    hlayer = hlayer[:-1]
+    return hlayer
+
+
+def ReadProtonCounts(inchi):
+    import re
+
+    #Get inchi layers
+    layers = inchi.split('/')
+    ProtLayer = ''
+    FixedLayer = ''
+    for l in layers[1:]:
+        if 'C' in l and 'H' in l:
+            atoms = re.findall(r"[a-zA-Z]+", l)
+            indexes = [int(x) for x in re.findall(r"\d+", l)]
+            formula = [list(x) for x in zip(atoms, indexes)]
+        if 'h' in l and ProtLayer != '':
+            FixedLayer = l[1:]
+        if 'h' in l and ProtLayer == '':
+            ProtLayer = l[1:]
+
+    #initialize proton list
+    nheavy = sum([x[1] for x in formula if x[0] != 'H'])
+
+    #Find, save and remove tautomeric portions from main proton layer
+    tautomerics = re.findall(r"\(.*?\)", ProtLayer)
+    ProtLayer = re.sub(r"\(.*?\)", "", ProtLayer)
+    if ProtLayer[-1] == ',':
+        ProtLayer = ProtLayer[:-1]
+
+    #Read the main and the fixed proton layer
+    protons = ReadPSections(ProtLayer, nheavy)
+    fprotons = ReadPSections(FixedLayer, nheavy)
+
+    return protons, formula, tautomerics, fprotons
+
+
+def ReadPSections(ProtLayer, nheavy):
+    import re
+    protons = [0 for x in range(0, nheavy)]
+    #seperate the 1proton, 2proton and 3proton atoms, then seperate the records
+    psections = [x for x in re.findall(r".*?(?=H)", ProtLayer) if x != '']
+    secvals = [0 for x in range(0, len(psections))]
+    psections[0] = psections[0].split(',')
+
+    #interpret each record and fill in the proton table
+    #start by finding the proton count value for each section
+    for i in range(1, len(psections)):
+        if psections[i][0] == ',':
+            secvals[i-1] = 1
+            psections[i] = psections[i][1:].split(',')
+        else:
+            secvals[i-1] = int(psections[i][0])
+            psections[i] = psections[i][2:].split(',')
+    if ProtLayer[-1] != 'H':
+        secvals[-1] = int(ProtLayer[-1])
+    else:
+        secvals[-1] = 1
+
+    #now expand each entry in the sections and fill the corresponding value
+    #in proton table
+    for i in range(0, len(psections)):
+        for s in psections[i]:
+            if '-' in s:
+                [start, finish] = [int(x) for x in s.split('-')]
+                protons[start-1:finish] = [secvals[i] for x in
+                    range(0, len(protons[start-1:finish]))]
+            else:
+                protons[int(s)-1] = secvals[i]
+    return protons
+
+
+def GetTautProtons(inchi):
+    #get the tautomer layer and pickup the data
+    layers = inchi.split('/')
+
+    for l in layers:
+        if 'h' in l:
+            ProtLayer = l
+    ProtList = list(ProtLayer)
+    starts = []
+    ends = []
+    for i in range(0, len(ProtList)):
+        if ProtList[i] == '(':
+            starts.append(i)
+        if ProtList[i] == ')':
+            ends.append(i)
+    TautProts = []
+    for i in range(0, len(starts)):
+        TautProts.append((ProtLayer[starts[i]+1:ends[i]]).split(','))
+
+    return TautProts
+
+
+def GenTautomers(structf):
+
+    f = structf + ".sdf"
+    inchi, aux = GetInchi(f)
+
+    abc = 'abcdefghijklmnopqrstuvwxyz'
+
+    t_inchis = GenTautInchis(inchi, aux, structf)
+    filenames = []
+    for ds in range(0, len(t_inchis)):
+        Inchi2Struct(t_inchis[ds], f[:-4] + abc[ds], aux)
+        RestoreNumsSDF(f[:-4] + abc[ds] + '.sdf', f, aux)
+        filenames.append(f[:-4] + abc[ds])
+    return len(filenames), filenames
+
+
+#For now only works on one tautomeric system - for nucleosides don't need more
+def GenTautInchis(inchi, aux, structf):
+
+    resinchis = []  # Inchis of all tautomers, including the parent structure
+
+    #Get tautomeric protons and atoms they are connected to
+    TautProts = GetTautProtons(inchi)
+
+    #The total number of tautomeric protons in the system
+    if str(TautProts[0][0][1:]) != '':
+        totprotons = int(str(TautProts[0][0][1:]))
+    else:
+        totprotons = 1
+
+    #Check for and remove non-hetero atoms
+    temp = [int(x) for x in TautProts[0][1:] if IsHetero(int(x), inchi)]
+
+    #Get the numbering map, to see which atomes are the tautomeric ones in the
+    #original structure file. From there determine their type and valency
+    #based on connectivity
+    amap = GetInchiRenumMap(aux)
+    OldTautProts = [amap[prot-1] for prot in temp]
+    valencies = GetTautValency(structf, OldTautProts)
+
+    #the multivalent atoms will always have at least one proton
+    superfixed = []
+    for i in range(0, len(valencies)):
+        if valencies[i] > 1:
+            superfixed.append(TautProts[0][i+1])
+            #TautProts.append(['H', TautProts[0][i+1]])
+
+    #Generate all the possible proton positions
+    #with repetitions for multivalency
+    fixedprotons = list(itertools.combinations(TautProts[0][1:],
+                        r=totprotons-len(superfixed)))
+
+    for i in range(0, len(fixedprotons)):
+        fixedprotons[i] = superfixed + list(fixedprotons[i])
+        fixedprotons[i].sort(key=int)
+
+    #Count the occurences of protons positions, save the counts and
+    #remove redundant positions
+    counts = []
+    for i in range(0, len(fixedprotons)):
+        counts.append([])
+        j = 0
+        while j < len(fixedprotons[i]):
+            counts[i].append(fixedprotons[i].count(fixedprotons[i][j]))
+            if counts[i][-1] > 1:
+                for _ in range(0, counts[i][-1]-1):
+                    fixedprotons[i].remove(fixedprotons[i][j])
+            #j+=counts[i][-1]
+            j += 1
+
+    fixprefix = '/f/h'
+    tauts = []
+    for i in range(0, len(fixedprotons)):
+        tauts.append(fixprefix)
+        for j in range(0, len(fixedprotons[i])):
+            if j > 0:
+                tauts[i] += ','
+            tauts[i] += fixedprotons[i][j] + 'H'
+            if counts[i][j] > 1:
+                tauts[i] += str(counts[i][j])
+
+    for taut in tauts:
+        resinchis.append(inchi + taut)
+
+    return resinchis
+
+
+def GetTautValency(structf, tautatoms):
+     #Read molecule from file
+    obconversion = OBConversion()
+    obconversion.SetInFormat("sdf")
+    obmol = OBMol()
+    obconversion.ReadFile(obmol, structf + ".sdf")
+
+    protvalency = []
+    for idx in tautatoms:
+        atom = obmol.GetAtom(idx)
+        corenum = atom.GetAtomicNum()
+        #If atom is hydrogen, check what it is connected to
+        nheavynbr = 0
+        for NbrAtom in OBAtomAtomIter(atom):
+            nbrnum = NbrAtom.GetAtomicNum()
+            if nbrnum > 1:
+                nheavynbr += 1
+        if corenum == 8:
+            protvalency.append(1)
+        if corenum == 7:
+            protvalency.append(3-nheavynbr)
+
+    return protvalency
+
+
+#utility function that determines if an atom is a heteroatom
+def IsHetero(n, inchi):
+    layers = inchi.split('/')
+    nC = int((layers[1].split('H'))[0][1:])
+    if (n > nC):
+        return True
+    else:
+        return False
+
+
+def GenSelectDiastereomers(structf, atoms):
+
+    f = structf + ".sdf"
+    inchi, aux = GetInchi(f)
+    amap = GetInchiRenumMap(aux)
+
+    translated_atoms = []
+    for atom in atoms:
+        translated_atoms.append(amap.index(atom)+1)
+
+    ds_inchis = GenSelectDSInchis(inchi, translated_atoms)
+    ds_inchis = [FixTautProtons(f, i, aux) for i in ds_inchis]
+    filenames = []
+    for ds in range(0, len(ds_inchis)):
+        Inchi2Struct(ds_inchis[ds], f[:-4] + str(ds+1), aux)
+        RestoreNumsSDF(f[:-4] + str(ds+1) + '.sdf', f, aux)
+        filenames.append(f[:-4] + str(ds+1))
+    return len(filenames), filenames
+
+
+def GenSelectDSInchis(inchi, atoms):
+    #Inchis of all diastereomers, including the parent structure
+    resinchis = []
+
+    #get the number of potential diastereomers
+    layers = inchi.split('/')
+    for l in layers:
+        if 't' in l:
+            slayer = l
+            sc = l[1:].split(',')
+
+    ignore = []
+    for i in range(0, len(sc)):
+        if not int(sc[i][:-1]) in atoms:
+            ignore.append(sc[i])
+    sc = [x for x in sc if x not in ignore]
+
+    if len(sc) == 0:
+        "No stereocentres remaining, no diastereomers will be generated."
+        return 0
+
+    numds = 2**(len(sc))
+    print "Number of diastereomers to be generated: " + str(numds)
+    temps = []
+    #Generate inversion patterns - essentially just binary strings
+    for i in range(0, numds):
+        template = bin(i)[2:].zfill(len(sc))
+        temps.append(template)
+
+    #For each 1 in template, invert the corresponding stereocentre
+    #and add the resulting diastereomer to the list
+    invert = {'+': '-', '-': '+'}
+
+    reslayers = []
+    for ds in range(0, numds):
+        newds = list(sc)
+        for stereocentre in range(0, len(sc)):
+            if temps[ds][stereocentre] == '1':
+                tlist = list(newds[stereocentre])
+                tlist[-1] = invert[tlist[-1]]
+                newds[stereocentre] = "".join(tlist)
+        newlayer = str(slayer)
+        for stereocentre in range(0, len(sc)):
+            newlayer = newlayer.replace(sc[stereocentre], newds[stereocentre])
+        reslayers.append(newlayer)
+    print reslayers
+    resinchis = []
+    for layer in reslayers:
+        resinchis.append(inchi.replace(slayer, layer))
+    return resinchis
+
+
+def GenDiastereomers(structf):
+
+    f = structf + ".sdf"
+    inchi, aux = GetInchi(f)
+
+    print inchi
+
+    ds_inchis = GenDSInchis(inchi)
+    ds_inchis = [FixTautProtons(f, i, aux) for i in ds_inchis]
+    filenames = []
+    for ds in range(0, len(ds_inchis)):
+        Inchi2Struct(ds_inchis[ds], f[:-4] + str(ds+1), aux)
+        RestoreNumsSDF(f[:-4] + str(ds+1) + '.sdf', f, aux)
+        filenames.append(f[:-4] + str(ds+1))
+    return len(filenames), filenames
+
+
+def GenDSInchis(inchi):
+
+    ilist = list(inchi)
+    #Inchis of all diastereomers, including the parent structure
+    resinchis = []
+
+    #get the number of potential diastereomers
+    layers = inchi.split('/')
+    for l in layers:
+        if 't' in l:
+            numds = 2**(len(l.translate(None, 't,1234567890'))-1)
+
+    print "Number of diastereomers to be generated: " + str(numds)
+
+    #find configuration sites (+ and -)
+    bs = ilist.index('t')
+    es = ilist[bs:].index('/')
+    spos = []
+    for s in range(bs, bs+es):
+        if ilist[s] == '+' or ilist[s] == '-':
+            spos.append(s)
+
+    temps = []
+    #Generate inversion patterns - essentially just binary strings
+    for i in range(0, numds):
+        template = bin(i)[2:].zfill(len(spos)-1)
+        temps.append(template)
+
+    #For each 1 in template, invert the corresponding stereocentre
+    #and add the resulting diastereomer to the list
+    invert = {'+': '-', '-': '+'}
+
+    for ds in range(0, numds):
+        t = list(ilist)
+        for stereocentre in range(1, len(spos)):
+            if temps[ds][stereocentre-1] == '1':
+                t[spos[stereocentre]] = invert[t[spos[stereocentre]]]
+        resinchis.append(''.join(t))
+
+    return resinchis
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..a83d17d
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,23 @@
+PyDP4 integrated workflow for the running of MM, DFT GIAO calculations and
+DP4 analysis
+v0.4
+
+Copyright (c) 2015 Kristaps Ermanis, Jonathan M. Goodman
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/MacroModel.py b/MacroModel.py
new file mode 100644
index 0000000..2fd9981
--- /dev/null
+++ b/MacroModel.py
@@ -0,0 +1,262 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Jun 12 13:15:47 2015
+
+@author: ke291
+
+Contains all of the MacroModel specific code for input generation, calculation
+execution and output interpretation. Called by PyDP4.py.
+"""
+
+import os
+import sys
+import subprocess
+import shutil
+import time
+
+
+def SetupMacromodel(numDS, settings, *args):
+
+    for f in args:
+        if settings.Rot5Cycle is True:
+            if not os.path.exists(f+'rot.sdf'):
+                import FiveConf
+                #Generate the flipped fivemembered ring,
+                #result is in '*rot.sdf' file
+                FiveConf.main(f + '.sdf', settings)
+
+        scriptdir = getScriptPath()
+        cwd = os.getcwd()
+
+        #Convert input structure to mae file
+        convinp = settings.SCHRODINGER + '/utilities/sdconvert -isd '
+        outp = subprocess.check_output(convinp + f + '.sdf -omae ' + f +
+                                       '.mae', shell=True)
+
+        #Copy default com file to directory
+        shutil.copyfile(scriptdir + '/default.com', cwd + '/' + f + '.com')
+        #Change input and output file names in com file
+        comf = open(f + '.com', 'r+')
+        com = comf.readlines()
+        com[0] = f + '.mae\n'
+        com[1] = f + '-out.mae\n'
+
+        #Change the molecular mechanics step count in the com file
+        cycles = (str(settings.MMstepcount)).rjust(6)
+        temp = list(com[7])
+        temp[7:13] = list(cycles)
+        com[7] = "".join(temp)
+        comf.truncate(0)
+        comf.seek(0)
+        comf.writelines(com)
+                
+        #Change the forcefield in the com file
+        if (settings.ForceField).lower() == "opls":
+            temp = list(com[3])
+            
+            temp[11:13] = list('14')
+            com[3] = "".join(temp)
+            comf.truncate(0)
+            
+            comf.seek(0)
+            comf.writelines(com)
+        
+        comf.close()
+        
+        if settings.Rot5Cycle is True:
+            #Convert input structure to mae file
+            convinp = settings.SCHRODINGER + '/utilities/sdconvert -isd '
+            outp = subprocess.check_output(convinp + f + 'rot.sdf -omae ' + f
+                                           + 'rot.mae', shell=True)
+
+            #Copy default com file to directory
+            shutil.copyfile(scriptdir + '/default.com', cwd + '/' + f +
+                            'rot.com')
+            #Change input and output file names in com file
+            comf = open(f + 'rot.com', 'r+')
+            com = comf.readlines()
+            com[0] = f + 'rot.mae\n'
+            com[1] = f + 'rot-out.mae\n'
+
+            #Change the molecular mechanics step count in the com file
+            cycles = (str(settings.MMstepcount)).rjust(6)
+            temp = list(com[7])
+            temp[7:13] = list(cycles)
+            com[7] = "".join(temp)
+            comf.truncate(0)
+            comf.seek(0)
+            comf.writelines(com)
+            comf.close()
+        print "Macromodel input for " + f + " prepared."
+
+
+def RunMacromodel(numDS, settings, *args):
+    #Run Macromodel conformation search for all diastereomeric inputs
+    NCompleted = 0
+
+    for ds in args:
+        if not os.path.exists(ds+'.log'):
+            print settings.SCHRODINGER + '/bmin ' + ds
+            outp = subprocess.check_output(settings.SCHRODINGER + '/bmin ' +
+                                           ds, shell=True)
+        else:
+            print ds + ".log exists, skipping"
+            continue
+
+        time.sleep(60)
+        while(not IsMMCompleted(ds + '.log')):
+            time.sleep(30)
+        NCompleted = NCompleted + 1
+
+        if settings.Rot5Cycle is True:
+            print "Macromodel job " + str(NCompleted) + " of " + str(numDS*2)\
+                + " completed."
+
+            print settings.SCHRODINGER + '/bmin ' + ds + 'rot'
+            outp = subprocess.check_output(settings.SCHRODINGER + '/bmin '
+                                           + ds+'rot', shell=True)
+            time.sleep(60)
+            while(not IsMMCompleted(ds + 'rot.log')):
+                time.sleep(30)
+            NCompleted = NCompleted + 1
+            print "Macromodel job " + str(NCompleted) + " of " + str(numDS*2)\
+                + " completed."
+        else:
+            print "Macromodel job " + str(NCompleted) + " of " + str(numDS) +\
+                " completed."
+
+
+def ReadMacromodel(MMoutp, settings):
+
+    conformers = []
+    conformer = -1
+    AbsEs = []
+
+    atoms = []
+    charge = 0
+    MaeInps = []
+
+    MaeFile = file(MMoutp + '-out.mae', 'r')
+    MaeInps.append(MaeFile.readlines())
+    MaeFile.close()
+
+    if settings.Rot5Cycle is True:
+        MaeFile = file(MMoutp + 'rot-out.mae', 'r')
+        MaeInps.append(MaeFile.readlines())
+        MaeFile.close()
+
+    for MaeInp in MaeInps:
+        index = 0
+        AbsEOffsets = []
+        #find conformer description blocks
+        blocks = []
+        for i in range(0, len(MaeInp)):
+            if 'f_m_ct' in MaeInp[i]:
+                blocks.append(i)
+            if 'p_m_ct' in MaeInp[i]:
+                blocks.append(i)
+
+        #find absolute energy offsets
+        for block in blocks:
+            for i in range(block, len(MaeInp)):
+                if 'mmod_Potential_Energy' in MaeInp[i]:
+                    AbsEOffsets.append(i-block)
+                    break
+
+        #Get absolute energies for conformers
+        for i in range(0, len(blocks)):
+            for line in range(blocks[i], len(MaeInp)):
+                if ':::' in MaeInp[line]:
+                    AbsEs.append(float(MaeInp[line+AbsEOffsets[i]]))
+                    break
+
+        #find geometry descriptions for each block
+        for i in range(0, len(blocks)):
+            for line in (MaeInp[blocks[i]:]):
+                if 'm_atom' in line:
+                    blocks[i] = blocks[i] + MaeInp[blocks[i]:].index(line)
+                    break
+
+        #find start of atom coordinates for each block
+        for i in range(0, len(blocks)):
+            for line in (MaeInp[blocks[i]:]):
+                if ':::' in line:
+                    blocks[i] = blocks[i] + MaeInp[blocks[i]:].index(line)
+                    break
+
+        #Read the atom numbers and coordinates
+        for block in blocks:
+            conformers.append([])
+            conformer = conformer + 1
+            index = block+1
+            atom = 0
+            while not ':::' in MaeInp[index]:
+                line = MaeInp[index].split(' ')
+                line = [word for word in line if word != '']
+                conformers[conformer].append([])
+                if conformer == 0:
+                    atoms.append(GetMacromodelSymbol(int(line[1])))
+                    conformers[0][atom].append(line[0])  # add atom number
+                    conformers[0][atom].append(line[2])  # add X
+                    conformers[0][atom].append(line[3])  # add Y
+                    conformers[0][atom].append(line[4])  # add Z
+                    charge = charge + int(line[21])
+                else:
+                    if blocks.index(block) == 0:
+                        conformers[conformer][atom].append(line[0])  # add atom number
+                        conformers[conformer][atom].append(line[2])  # add X
+                        conformers[conformer][atom].append(line[3])  # add Y
+                        conformers[conformer][atom].append(line[4])  # add Z
+                    else:
+                        conformers[conformer][atom].append(line[0])  # add atom number
+                        conformers[conformer][atom].append(line[1])  # add X
+                        conformers[conformer][atom].append(line[2])  # add Y
+                        conformers[conformer][atom].append(line[3])  # add Z
+
+                index = index + 1   # Move to next line
+                atom = atom + 1     # Move to next atom
+    #Pick only the conformers in the energy window
+    MinE = min(AbsEs)
+
+    conformers2 = []
+    for i in range(0, len(conformers)):
+        if AbsEs[i] < MinE+settings.MaxCutoffEnergy:
+            conformers2.append(conformers[i])
+
+    return atoms, conformers2, charge
+
+
+def GetMacromodelSymbol(atomType):
+
+    Lookup = ['C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C',
+              'C', 'C', 'C', 'O', 'O', 'O', ' ', 'O', ' ', 'O',
+              'O', ' ', 'O', 'N', 'N', 'N', ' ', ' ', ' ', ' ',
+              'N', 'N', ' ', ' ', ' ', ' ', ' ', 'N', 'N', 'N',
+              'H', 'H', 'H', 'H', 'H', ' ', ' ', 'H', 'S', ' ',
+              'S', 'S', 'P', 'B', 'B', 'F', 'Cl', 'Br', 'I', 'Si',
+              ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
+              ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
+              ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
+              ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 'S',
+              'S', 'Cl', 'B', 'F', ' ', ' ', ' ', ' ', 'S', 'S',
+              ' ', ' ', 'S', 'S']
+
+    if Lookup[atomType-1] == ' ':
+        print 'Unknown atom type'
+
+    return Lookup[atomType-1]
+
+
+def getScriptPath():
+    return os.path.dirname(os.path.realpath(sys.argv[0]))
+
+
+def IsMMCompleted(f):
+    Gfile = open(f, 'r')
+    outp = Gfile.readlines()
+    Gfile.close()
+
+    if "normal termination" in outp[-3]:
+        return True
+    else:
+        return False
diff --git a/NMRDP4GTF.py b/NMRDP4GTF.py
new file mode 100644
index 0000000..b8df8af
--- /dev/null
+++ b/NMRDP4GTF.py
@@ -0,0 +1,469 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Jan  12 14:42:47 2015
+
+@author: ke291
+
+Takes care of all the NMR description interpretation, equivalent atom
+averaging, Boltzmann averaging, tautomer population optimisation (if used)
+and DP4 input preparation and running either DP4.jar or DP4.py. Called by
+PyDP4.py
+"""
+
+import Gaussian
+import NWChem
+
+import subprocess
+import sys
+import scipy.optimize as sciopt
+import math
+import os
+
+TMS_SC_C13 = 191.69255
+TMS_SC_H1 = 31.7518583
+
+
+def main(numDS, settings, *args):
+
+    #This function runs nmrPredict for each diastereomer and collects
+    #the outputs
+    print '\nRunning NMRpredict script...'
+
+    if settings.DFT == 'z' or settings.DFT == 'g':
+        (outputs, NumFiles, Ntaut) = Gaussian.RunNMRPredict(numDS, *args[:-1])
+        Noutp = len(outputs)
+    elif settings.DFT == 'n' or settings.DFT == 'w':
+        (RelEs, populations, labels, BoltzmannShieldings, Ntaut) = \
+                                            NWChem.RunNMRPredict(numDS, *args)
+        Noutp = len(BoltzmannShieldings)
+
+    #Reads the experimental NMR data from the file
+    ExpNMR = args[-1]
+    ExpNMR_file = open(ExpNMR, 'r')
+    Cexp = ExpNMR_file.readline()
+    ExpNMR_file.readline()
+    Hexp = ExpNMR_file.readline()
+
+    expHlabels = []
+    expHvalues = []
+
+    #Loops through the experimental proton NMR data and picks out values
+    # and atom numbers and places them in expHlabels, expHvalues, expClabels,
+    #expCvalues
+
+    expbuf = Hexp.split(',')
+
+    for i in range(0, len(expbuf)):
+        shiftend = expbuf[i].find('(')
+        expHvalues.append(float(expbuf[i][0:shiftend]))
+        labelend = expbuf[i].find(')')
+        expHlabels.append(expbuf[i][shiftend+1:labelend])
+
+    #Check if exp NMR file contains info about equivalent atoms and read it
+    #into an array
+    #Also reads a list of atoms to omit from analysis
+
+    equivalents = []
+    omits = []
+
+    ExpNMR_file.readline()
+    for line in ExpNMR_file:
+        if not 'OMIT' in line and len(line) > 1:
+            equivalents.append(line[:-1].split(','))
+        else:
+            omits.append(line[5:-1].split(','))
+    ExpNMR_file.close()
+
+    Clabels = []
+    Hlabels = []
+    Cvalues = []
+    Hvalues = []
+
+    #This loops through predictNMR outputs for each diastereomer and collects
+    #NMR data
+
+    flatequiv = [val for sublist in equivalents for val in sublist]    
+    flatomits = [val for sublist in omits for val in sublist]
+
+    if settings.DFT == 'z' or settings.DFT == 'g':
+        for DS in range(0, len(outputs)):
+
+            Cvalues.append([])
+            Hvalues.append([])
+
+            nmrdata = outputs[DS].split('\n')
+
+            RelEnergies = nmrdata[2].split(',')
+            buf = ''
+            for val in RelEnergies[1:NumFiles[DS]]:
+                num = float(val)
+                buf = buf + "{:5.2f}".format(num) + ', '
+            buf = buf + "{:5.2f}".format(float(RelEnergies[NumFiles[DS]]))
+            print '\nConformer relative energies (kJ/mol): ' + buf
+
+            #Print population(%) for diagnostic purposes
+            Populations = nmrdata[3].split(',')
+            buf = ''
+            for val in Populations[1:NumFiles[DS]]:
+                num = float(val)
+                buf = buf + "{:4.1f}".format(num) + ', '
+            buf = buf + "{:4.1f}".format(float(Populations[NumFiles[DS]]))
+            print '\nPopulations (%): ' + buf
+
+            #loops through particular output and collects shielding constants
+            #and calculates shifts relative to TMS
+            for row in nmrdata[4:]:
+                data = row.split(',')
+                shift = 0
+                if len(data) > NumFiles[DS]:
+                    if data[0][23] == 'C' and not data[0][23:] in flatomits:
+                        # only read labels once, i.e. the first diastereomer
+                        if DS == 0:
+                            Clabels.append(str(data[0][23:]))
+                        shift = (TMS_SC_C13-float(data[NumFiles[DS]+1])) / \
+                            (1-(TMS_SC_C13/10**6))
+                        Cvalues[DS].append(shift)
+
+                    if data[0][23] == 'H' and not data[0][23:] in flatomits:
+                        # only read labels once, i.e. the first diastereomer
+                        if DS == 0:
+                            Hlabels.append(str(data[0][23:]))
+                        shift = (TMS_SC_H1-float(data[NumFiles[DS]+1])) / \
+                            (1-(TMS_SC_H1/10**6))
+                        Hvalues[DS].append(shift)
+
+    elif settings.DFT == 'n' or settings.DFT == 'w':
+        for DS in range(0, numDS):
+
+            Cvalues.append([])
+            Hvalues.append([])
+
+            buf = ''
+            for val in RelEs[DS]:
+                num = float(val)
+                buf = buf + "{:5.2f}".format(num) + ', '
+            print '\nConformer relative energies (kJ/mol): ' + buf[:-2]
+
+            buf = ''
+            for val in populations[DS]:
+                num = float(val)
+                buf = buf + "{:4.1f}".format(num*100) + ', '
+            print '\nPopulations (%): ' + buf[:-2]
+
+            #loops through particular output and collects shielding constants
+            #and calculates shifts relative to TMS
+            for atom in range(0, len(BoltzmannShieldings[DS])):
+                shift = 0
+                if labels[atom][0] == 'C' and not labels[atom] in flatomits:
+                    # only read labels once, i.e. the first diastereomer
+                    if DS == 0:
+                        Clabels.append(labels[atom])
+                    shift = (TMS_SC_C13-BoltzmannShieldings[DS][atom]) / \
+                        (1-(TMS_SC_C13/10**6))
+                    Cvalues[DS].append(shift)
+
+                if labels[atom][0] == 'H' and not labels[atom] in flatomits:
+                    # only read labels once, i.e. the first diastereomer
+                    if DS == 0:
+                        Hlabels.append(labels[atom])
+                    shift = (TMS_SC_H1-BoltzmannShieldings[DS][atom]) / \
+                        (1-(TMS_SC_H1/10**6))
+                    Hvalues[DS].append(shift)
+
+    #Looks for equivalent atoms in the computational data, averages the shifts
+    #and removes the redundant signals
+    for eqAtoms in equivalents:
+
+        eqSums = [0.0]*Noutp
+        eqAvgs = [0.0]*Noutp
+
+        if eqAtoms[0][0] == 'H':
+            #print eqAtoms, Hlabels
+            for atom in eqAtoms:
+                eqIndex = Hlabels.index(atom)
+                for ds in range(0, Noutp):
+                    eqSums[ds] = eqSums[ds] + Hvalues[ds][eqIndex]
+            for ds in range(0, Noutp):
+                eqAvgs[ds] = eqSums[ds]/len(eqAtoms)
+
+            #Place the new average value in the first atom shifts place
+            target_index = Hlabels.index(eqAtoms[0])
+            for ds in range(0, Noutp):
+                Hvalues[ds][target_index] = eqAvgs[ds]
+
+            #Delete the redundant atoms from the computed list
+            #start with second atom - e.g. don't delete the original one
+            for atom in range(1, len(eqAtoms)):
+                del_index = Hlabels.index(eqAtoms[atom])
+                del Hlabels[del_index]
+                for ds in range(0, Noutp):
+                    del Hvalues[ds][del_index]
+
+        if eqAtoms[0][0] == 'C':
+            for atom in eqAtoms:
+                eqIndex = Clabels.index(atom)
+                for ds in range(0, Noutp):
+                    eqSums[ds] = eqSums[ds] + Cvalues[ds][eqIndex]
+            for ds in range(0, Noutp):
+                eqAvgs[ds] = eqSums[ds]/len(eqAtoms)
+
+            #Place the new average value in the first atom shifts place
+            target_index = Clabels.index(eqAtoms[0])
+            for ds in range(0, Noutp):
+                Cvalues[ds][target_index] = eqAvgs[ds]
+
+            #Delete the redundant atoms from the computed list
+            #start with second atom - e.g. don't delete the original one
+            for atom in range(1, len(eqAtoms)):
+                del_index = Clabels.index(eqAtoms[atom])
+                del Clabels[del_index]
+                for ds in range(0, Noutp):
+                    del Cvalues[ds][del_index]
+
+    tstart = 0
+    OptCvalues = []
+    OptHvalues = []
+
+    for tindex in range(0, len(Ntaut)):
+        print 'looking at tautomers ' + str(tstart) + ' to ' + \
+            str(tstart+Ntaut[tindex])
+        if Ntaut[tindex] == 1:
+            print "Only one tautomer found, skipping optimisation."
+            OptCvalues.append(Cvalues[tstart])
+            OptHvalues.append(Hvalues[tstart])
+            tstart = tstart + Ntaut[tindex]
+        else:
+            (BuffC, BuffH) = OptTautPop(Clabels,
+                                        Cvalues[tstart:tstart+Ntaut[tindex]],
+                                        Hlabels,
+                                        Hvalues[tstart:tstart+Ntaut[tindex]],
+                                        Cexp, Hexp)
+            OptCvalues.append(BuffC)
+            OptHvalues.append(BuffH)
+            tstart = tstart + Ntaut[tindex]
+
+    #Output the seperated shifts to terminal and DP4 input file
+    #along with the experimental NMR data
+    if (not settings.PDP4) and (not settings.EP5):
+
+        WriteDP4input(Clabels, OptCvalues, Cexp, Hlabels, OptHvalues, Hexp)
+        #Run the DP4 java file and collect the output
+        javafolder = getScriptPath()
+        DP4outp = subprocess.check_output('CLASSPATH=' + javafolder +
+            ' java -jar ' + javafolder + '/DP4.jar DP4inp.inp',
+            shell=True)
+        print '\n' + DP4outp
+
+    else:
+        import DP4
+        DP4outp = DP4.main(Clabels, OptCvalues, Hlabels, OptHvalues, Cexp,
+                           Hexp, settings)
+
+    return '\n'.join(DP4outp) + '\n'
+
+
+def WriteDP4input(Clabels, Cvalues, Cexp, Hlabels, Hvalues, Hexp):
+
+    print '\nWriting input file for DP4...'
+    DP4_file = open('DP4inp.inp', 'w')
+
+    DP4_file.write(','.join(Clabels) + '\n')
+    print '\n' + ','.join(Clabels)
+    for line in Cvalues:
+        print ','.join(format(v, "4.2f") for v in line)
+        DP4_file.write(','.join(format(v, "4.2f") for v in line) + '\n')
+
+    DP4_file.write('\n' + Cexp)
+    print '\n' + Cexp
+
+    DP4_file.write('\n' + ','.join(Hlabels) + '\n')
+    print '\n' + ','.join(Hlabels)
+    for line in Hvalues:
+        print ','.join(format(v, "4.2f") for v in line)
+        DP4_file.write(','.join(format(v, "4.2f") for v in line) + '\n')
+
+    DP4_file.write('\n' + Hexp + '\n')
+    print '\n' + Hexp
+
+    DP4_file.close()
+
+
+def OptTautPop(Clabels, Cvalues, Hlabels, Hvalues, Cexp, Hexp):
+    #Pairwise match exp signals to computed ones based on assignments first,
+    #on erorrs afterwards
+    ExpCvalues = [-1 for i in range(0, len(Clabels))]
+    ExpHvalues = [-1 for i in range(0, len(Hlabels))]
+
+    Hdata = Hexp.split(',')
+    for s in range(0, len(Hdata)):
+        Hdata[s] = Hdata[s].strip()
+
+    Cdata = Cexp.split(',')
+    for s in range(0, len(Cdata)):
+        Cdata[s] = Cdata[s].strip()
+
+    UAExpCshifts = list(Cdata)
+    UAExpHshifts = list(Hdata)
+
+    #Assign known(experimentally assigned) signals first
+    for l in range(0, len(Clabels)):
+        for s in Cdata:
+            if (Clabels[l] + ')') in s and (not 'or' in s) and (not 'OR' in s):
+                shiftend = s.find('(')
+                ExpCvalues[l] = float(s[:shiftend])
+                UAExpCshifts.remove(s)
+                break
+    for l in range(0, len(Hlabels)):
+        for s in Hdata:
+            if (Hlabels[l] + ')') in s and (not 'or' in s) and (not 'OR' in s):
+                shiftend = s.find('(')
+                ExpHvalues[l] = float(s[:shiftend])
+                UAExpHshifts.remove(s)
+                break
+
+    #Prepare unassigned experimental values for matching
+    for i in range(0, len(UAExpHshifts)):
+        shiftend = UAExpHshifts[i].find('(')
+        UAExpHshifts[i] = float(UAExpHshifts[i][:shiftend])
+
+    for i in range(0, len(UAExpCshifts)):
+        shiftend = UAExpCshifts[i].find('(')
+        UAExpCshifts[i] = float(UAExpCshifts[i][:shiftend])
+
+    #Try to assign unassigned values based on every calculated tautomer
+    MinMAE = 1000
+    for k in range(0, len(Cvalues)):
+        #Pick out unassigned computational values for matching
+        UACompCshifts = []
+        UACompHshifts = []
+        for i in range(0, len(ExpCvalues)):
+            if ExpCvalues[i] == -1:
+                UACompCshifts.append(Cvalues[k][i])
+        for i in range(0, len(ExpHvalues)):
+            if ExpHvalues[i] == -1:
+                UACompHshifts.append(Hvalues[k][i])
+
+        #Sort both sets of data - this essentially pairs them up
+        UAExpCshifts.sort()
+        UAExpHshifts.sort()
+        UACompCshifts.sort()
+        UACompHshifts.sort()
+
+        #Go through half-assigned experimental data and fill in the holes with
+        #paired data
+        for i in range(0, len(ExpCvalues)):
+            if ExpCvalues[i] == -1 and len(UAExpCshifts) > 0:
+                j = UACompCshifts.index(Cvalues[k][i])
+                ExpCvalues[i] = UAExpCshifts[j]
+        for i in range(0, len(ExpHvalues)):
+            if ExpHvalues[i] == -1 and len(UAExpHshifts) > 0:
+                j = UACompHshifts.index(Hvalues[k][i])
+                ExpHvalues[i] = UAExpHshifts[j]
+
+        #Optimize tautomer populations for
+        #tpops is 1 shorter than the number of tautomers,
+        #the remaining weight is 1-sum(rest)
+        tpops = [1.0/len(Cvalues) for i in range(len(Cvalues)-1)]
+        f = lambda w: TautError(Cvalues, ExpCvalues, Hvalues, ExpHvalues, w)
+        res = sciopt.minimize(f, tpops, method='nelder-mead')
+        if float(res.fun) < MinMAE:
+            print "New min MAE: " + str(res.fun)
+            MinMAE = float(res.fun)
+            NewPops = list(res.x)
+            NewPops.append(1-sum(NewPops))
+            print NewPops
+
+    NewCvalues = []
+    NewHvalues = []
+
+    #calculate the new C values
+    for atom in range(0, len(Clabels)):
+        C = 0
+        for taut in range(0, len(Cvalues)):
+            C = C + Cvalues[taut][atom]*NewPops[taut]
+        NewCvalues.append(C)
+
+    for atom in range(0, len(Hlabels)):
+        H = 0
+        for taut in range(0, len(Hvalues)):
+            H = H + Hvalues[taut][atom]*NewPops[taut]
+        NewHvalues.append(H)
+
+    #Return the new Cvalues and Hvalues
+    return (NewCvalues, NewHvalues)
+
+
+def TautError(Cs, CExp, Hs, HExp, TPopsIn):
+
+    if len(Cs) != len(TPopsIn)+1 or len(Hs) != len(TPopsIn)+1:
+        print len(Cs), len(Hs), len(TPopsIn)
+        print ("Input dimensions in TautError don't match, exiting...")
+        return 1000
+    TPops = list(TPopsIn)
+    TPops.append(1-sum(TPops))
+    SumC = []
+    SumH = []
+    #print len(Cs), len(Hs), len(TPops)
+    #print TPops
+    for i in range(0, len(TPops)):
+        if TPops[i] < 0:
+            return 100
+    if sum(TPops) > 1:
+        s = sum(TPops)
+        for i in range(0, len(TPops)):
+            TPops[i] = TPops[i]/s
+
+    for atom in range(0, len(CExp)):
+        C = 0
+        for taut in range(0, len(Cs)):
+            C = C + Cs[taut][atom]*TPops[taut]
+        SumC.append(C)
+
+    for atom in range(0, len(HExp)):
+        H = 0
+        for taut in range(0, len(Hs)):
+            H = H + Hs[taut][atom]*TPops[taut]
+        SumH.append(H)
+
+    ErrC = MAE(SumC, CExp)
+    #ErrC = RMSE(SumC, CExp)
+    ErrH = MAE(SumH, HExp)
+    #ErrH = RMSE(SumH, HExp)
+    #print 'MAE for C: ' + str(ErrC)
+    #print 'MAE for H: ' + str(ErrH)
+
+    return ErrC + 20*ErrH
+
+
+def getScriptPath():
+    return os.path.dirname(os.path.realpath(sys.argv[0]))
+
+
+def MAE(L1, L2):
+
+    if len(L1) != len(L2):
+        return -1
+    else:
+        L = []
+        for i in range(0, len(L1)):
+            L.append(abs(L1[i]-L2[i]))
+        return sum(L)/len(L)
+
+
+def RMSE(L1, L2):
+
+    if len(L1) != len(L2):
+        return -1
+    else:
+        L = []
+        for i in range(0, len(L1)):
+            L.append((L1[i]-L2[i])**2)
+        return math.sqrt(sum(L)/len(L))
+
+if __name__ == '__main__':
+    #print sys.argv
+    cpargs = sys.argv[2:]
+    numDS = int(sys.argv[1])
+    for ds in range(0, numDS):
+        cpargs[ds*2+1] = int(cpargs[ds*2+1])
+    main(numDS, *cpargs)
diff --git a/NWChem.py b/NWChem.py
new file mode 100644
index 0000000..98e8e27
--- /dev/null
+++ b/NWChem.py
@@ -0,0 +1,308 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Nov 19 15:56:54 2014
+
+@author: ke291
+
+Contains all of the NWChem specific code for input generation and calculation
+execution. Called by PyDP4.py.
+"""
+
+import Tinker
+import MacroModel
+import nmrPredictNWChem
+
+import pyximport
+pyximport.install()
+import ConfPrune
+import glob
+import os
+import subprocess
+import time
+import socket
+
+
+def SetupNWChem(MMoutp, NWCheminp, numDigits, settings, adjRMSDcutoff):
+
+    #Reads conformer geometry, energies and atom labels from Tinker output
+    #(atoms, conformers) = ReadConformers(MMoutp, MaxEnergy)
+
+    if settings.MMTinker:
+        #Reads conformer geometry, energies and atom labels from Tinker output
+        (atoms, conformers, charge) = Tinker.ReadTinker(MMoutp, settings)
+    else:
+        (atoms, conformers, charge) = MacroModel.ReadMacromodel(MMoutp,
+                                                                settings)
+
+    #Prune similar conformations, if the number exceeds the limit
+    if len(conformers) > settings.PerStructConfLimit:
+        pruned = ConfPrune.RMSDPrune(conformers, atoms, adjRMSDcutoff)
+    else:
+        pruned = conformers
+
+    print str(len(conformers) - len(pruned)) +\
+        " or " + "{:.1f}".format(100 * (len(conformers) - len(pruned)) /
+        len(conformers)) + "% of conformations have been pruned based on " + \
+        str(adjRMSDcutoff) + " angstrom cutoff"
+
+    for num in range(0, len(pruned)):
+        filename = NWCheminp+str(num+1).zfill(3)
+        WriteNWChemFile(filename, pruned[num], atoms, charge, settings)
+
+    print str(len(pruned)) + " .nw files written"
+
+
+#Adjust the RMSD cutoff to keep the conformation numbers reasonable
+def AdaptiveRMSD(MMoutp, settings):
+
+    if settings.MMTinker:
+        #Reads conformer geometry, energies and atom labels from Tinker output
+        (atoms, conformers, charge) = Tinker.ReadTinker(MMoutp, settings)
+    else:
+        (atoms, conformers, charge) = MacroModel.ReadMacromodel(MMoutp,
+                                                                settings)
+
+    return ConfPrune.AdaptRMSDPrune(conformers, atoms,
+                                    settings.InitialRMSDcutoff,
+                                    settings.PerStructConfLimit)
+
+
+def WriteNWChemFile(NWCheminp, conformer, atoms, charge, settings):
+
+    f = file(NWCheminp + '.nw', 'w')
+    f.write('memory stack 1500 mb heap 1500 mb global 3000 mb\n')
+    if settings.DFT == 'w':
+        f.write('scratch_dir /scratch/' + settings.user + '/' + NWCheminp + '\n')
+    f.write('echo\n\nstart molecule\n\ntitle "'+NWCheminp+'"\n')
+    f.write('echo\n\nstart\n\n')
+    f.write('charge ' + str(charge) + '\n\n')
+    f.write('geometry units angstroms print xyz autosym\n')
+
+    natom = 0
+    for atom in conformer:
+        f.write('  ' + atoms[natom] + ' ' + atom[1] + ' ' + atom[2] + ' ' +
+                atom[3] + '\n')
+        natom = natom + 1
+
+    f.write('end\n\nbasis\n  * library 6-31G**\nend\n\n')
+    f.write('dft\n  xc b3lyp\n  mult 1\nend\n\n')
+    if settings.Solvent != "":
+        f.write('cosmo\n  do_cosmo_smd true\n  solvent ' + settings.Solvent + '\n')
+        f.write('end\n\n')
+    f.write('task dft energy\n\n')
+    f.write('property\n  shielding\nend\n')
+    f.write('task dft property\n')
+    f.close()
+
+
+def GetFiles2Run(inpfiles, settings):
+    #Get the names of all relevant input files
+    NinpFiles = []
+    for filename in inpfiles:
+        NinpFiles = NinpFiles + glob.glob(filename + 'nwinp???.nw')
+
+    Files2Run = []
+
+    #for every input file check that there is a completed output file,
+    #delete the incomplete outputs and add the inputs to be done to Files2Run
+    for filename in NinpFiles:
+        if not os.path.exists(filename[:-3]+'.nwo'):
+            Files2Run.append(filename)
+        else:
+            if IsNWChemCompleted(filename[:-3] + '.nwo'):
+                continue
+            else:
+                os.remove(filename[:-3] + '.nwo')
+                Files2Run.append(filename)
+
+    return Files2Run
+
+
+def RunNWChem(inpfiles, settings):
+
+    NCompleted = 0
+    NWChemPrefix = "nwchem "
+
+    for f in inpfiles:
+        print NWChemPrefix + f + ' > ' + f[:-2] + 'nwo'
+        outp = subprocess.check_output(NWChemPrefix + f + ' > ' + f[:-2] +
+                                       'nwo', shell=True)
+        NCompleted += 1
+        print "NWChem job " + str(NCompleted) + " of " + str(len(inpfiles)) + \
+            " completed."
+
+
+def RunNMRPredict(numDS, *args):
+
+    NWNames = []
+    NTaut = []
+
+    for val in range(0, numDS):
+        NTaut.append(args[val*2])
+        NWNames.append(args[val*2+1])
+
+    RelEs = []
+    populations = []
+    BoltzmannShieldings = []
+
+    print NWNames
+    print NTaut
+    #This loop runs nmrPredict for each diastereomer and collects
+    #the outputs    
+    for isomer in NWNames:
+
+        NWFiles = glob.glob(isomer + 'nwinp*.nwo')
+        for f in range(0, len(NWFiles)):
+            NWFiles[f] = NWFiles[f][:-4]
+
+        #Runs nmrPredictNWChem Name001, ... and collects output
+        (x, y, labels, z) = nmrPredictNWChem.main(*NWFiles)
+        RelEs.append(x)
+        populations.append(y)
+        BoltzmannShieldings.append(z)
+
+    return (RelEs, populations, labels, BoltzmannShieldings, NTaut)
+
+
+def IsNWChemCompleted(f):
+    Nfile = open(f, 'r')
+    outp = Nfile.readlines()
+    Nfile.close()
+    outp = "".join(outp)
+    if "AUTHORS" in outp:
+        return True
+    else:
+        return False
+
+
+def RunOnZiggy(folder, queue, NWFiles, settings):
+
+    print "ziggy NWChem job submission script\n"
+
+    #Check that folder does not exist, create job folder on ziggy
+    outp = subprocess.check_output('ssh ziggy ls', shell=True)
+    if folder in outp:
+        print "Folder exists on ziggy, choose another folder name."
+        return
+
+    outp = subprocess.check_output('ssh ziggy mkdir ' + folder, shell=True)
+    #Write the qsub scripts
+    for f in NWFiles:
+        WriteSubScript(f[:-3], queue, folder, settings)
+    print str(len(NWFiles)) + ' .qsub scripts generated'
+
+    #Upload .com files and .qsub files to directory
+    print "Uploading files to ziggy..."
+    for f in NWFiles:
+        outp = subprocess.check_output('scp ' + f +' ziggy:~/' + folder,
+                                       shell=True)
+        outp = subprocess.check_output('scp ' + f[:-3] +'.qsub ziggy:~/' +
+                                       folder, shell=True)
+
+    print str(len(NWFiles)) + ' .nw and .qsub files uploaded to ziggy'
+
+    #Launch the calculations
+    for f in NWFiles:
+        job = '~/' + folder + '/' + f[:-3]
+        outp = subprocess.check_output('ssh ziggy qsub -q ' + queue + ' -o '
+            + job + '.log -e ' + job + '.err -l nodes=1:ppn=1:ivybridge ' +
+            job + '.qsub', shell=True)
+        time.sleep(3)
+
+    print str(len(NWFiles)) + ' jobs submitted to the queue on ziggy'
+
+    outp = subprocess.check_output('ssh ziggy showq', shell=True)
+    if settings.user in outp:
+        print "Jobs are running on ziggy"
+
+    Jobs2Complete = list(NWFiles)
+    n2complete = len(Jobs2Complete)
+
+    #Check and report on the progress of calculations
+    while len(Jobs2Complete) > 0:
+        JustCompleted = [job for job in Jobs2Complete if
+            IsZiggyGComplete(job[:-2] + 'nwo', folder, settings)]
+        Jobs2Complete[:] = [job for job in Jobs2Complete if
+             not IsZiggyGComplete(job[:-2] + 'nwo', folder, settings)]
+        if n2complete != len(Jobs2Complete):
+            n2complete = len(Jobs2Complete)
+            print str(n2complete) + " remaining."
+
+        time.sleep(60)
+
+    #When done, copy the results back
+    print "\nCopying the output files back to localhost..."
+    print 'ssh ziggy scp /home/' + settings.user + '/' + folder + '/*.nwo ' +\
+        socket.getfqdn() + ':' + os.getcwd()
+    outp = subprocess.check_output('ssh ziggy scp /home/' + settings.user + '/'
+                                   + folder + '/*.nwo ' + socket.getfqdn() + ':'
+                                   + os.getcwd(), shell=True)
+
+
+def WriteSubScript(NWJob, queue, ZiggyJobFolder, settings):
+
+    if not (os.path.exists(NWJob+'.nw')):
+        print "The input file " + NWJob + ".nw does not exist. Exiting..."
+        return
+
+    #Create the submission script
+    QSub = open(NWJob + ".qsub", 'w')
+
+    #Choose the queue
+    QSub.write('#PBS -q ' + queue + '\n#PBS -l nodes=1:ppn=1\n#\n')
+
+    #define input files and output files
+    QSub.write('file=' + NWJob + '\n\n')
+    QSub.write('inpfile=${file}.nw\noutfile=${file}.nwo\n')
+
+    #define cwd and scratch folder and ask the machine
+    #to make it before running the job
+    QSub.write('HERE=/home/' + settings.user + '/' + ZiggyJobFolder + '\n')
+    QSub.write('SCRATCH=/sharedscratch/' + settings.user + '/' + NWJob + '\n')
+    QSub.write('LSCRATCH=/scratch/' + settings.user + '/' + NWJob + '\n')
+    QSub.write('mkdir ${SCRATCH}\n')
+    QSub.write('mkdir ${LSCRATCH}\n')
+
+    #load relevant modules
+    QSub.write('set OMP_NUM_THREADS=1\n')
+    QSub.write('module load anaconda\n')
+    QSub.write('module load gcc/4.8.3\n')
+    QSub.write('module load mpi/openmpi/gnu/1.8.1\n')
+    QSub.write('module load nwchem\n')
+
+    #copy the input file to scratch
+    QSub.write('cp ${HERE}/${inpfile}  $SCRATCH\ncd $SCRATCH\n')
+
+    #write useful info to the job output file (not the gaussian)
+    QSub.write('echo "Starting job $PBS_JOBID"\necho\n')
+    QSub.write('echo "PBS assigned me this node:"\ncat $PBS_NODEFILE\necho\n')
+
+    QSub.write('ln -s $HERE/$outfile $SCRATCH/$outfile\n')
+    QSub.write('nwchem $inpfile > $outfile\n')
+
+    #Cleanup
+    QSub.write('rm -rf ${SCRATCH}/\n')
+    QSub.write('rm -rf ${LSCRATCH}/\n')
+    QSub.write('qstat -f $PBS_JOBID\n')
+
+    QSub.close()
+
+
+def IsZiggyGComplete(f, folder, settings):
+
+    path = '/home/' + settings.user + '/' + folder + '/'
+    try:
+        outp1 = subprocess.check_output('ssh ziggy ls ' + path, shell=True)
+    except subprocess.CalledProcessError, e:
+        print "ssh ziggy ls failed: " + str(e.output)
+        return False
+    if f in outp1:
+        try:
+            outp2 = subprocess.check_output('ssh ziggy cat ' + path + f,
+                                            shell=True)
+        except subprocess.CalledProcessError, e:
+            print "ssh ziggy cat failed: " + str(e.output)
+            return False
+        if "AUTHORS" in outp2:
+            return True
+    return False
diff --git a/NucCErr.pkl b/NucCErr.pkl
new file mode 100644
index 0000000..f6f629b
Binary files /dev/null and b/NucCErr.pkl differ
diff --git a/NucCErrOpt.pkl b/NucCErrOpt.pkl
new file mode 100644
index 0000000..e763dd4
Binary files /dev/null and b/NucCErrOpt.pkl differ
diff --git a/NucHErr.pkl b/NucHErr.pkl
new file mode 100644
index 0000000..8b7f33f
Binary files /dev/null and b/NucHErr.pkl differ
diff --git a/NucHErrOpt.pkl b/NucHErrOpt.pkl
new file mode 100644
index 0000000..ccda636
Binary files /dev/null and b/NucHErrOpt.pkl differ
diff --git a/PyDP4.py b/PyDP4.py
new file mode 100755
index 0000000..4aaeb2b
--- /dev/null
+++ b/PyDP4.py
@@ -0,0 +1,435 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+PyDP4 integrated workflow for the running of MM, DFT GIAO calculations and
+DP4 analysis
+v0.4
+
+Copyright (c) 2015 Kristaps Ermanis, Jonathan M. Goodman
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+Created on Wed Nov 19 15:26:32 2014
+Updated on Mon Jul 30 2015
+
+@author: ke291
+
+The main file, that should be called to start the PyDP4 workflow.
+Interprets the arguments and takes care of the general workflow logic.
+"""
+
+from __future__ import division
+
+import Gaussian
+import NMRDP4GTF
+import Tinker
+import MacroModel
+import NWChem
+
+import glob
+import sys
+import os
+import datetime
+import argparse
+import math
+
+
+#Assigning the config default values
+class Settings:
+    MMTinker = True
+    MMMacromodel = False
+    DFT = 'z'
+    Rot5Cycle = False
+    RingAtoms = []
+    GenDS = True
+    GenTaut = False
+    GenProt = False
+    Solvent = ''
+    DFTOpt = False
+    PDP4 = False
+    EP5 = False
+    queue = 's1'
+    TinkerPath = '~/tinker7/bin/scan '
+    OBPath = '/home/ke291/Tools/openbabel-install/lib/python2.7/site-packages/'
+    SCHRODINGER = '/usr/local/shared/schrodinger/current'
+    ScriptDir = ''
+    user = 'ke291'
+    MMstepcount = 6000
+    MMfactor = 500  # nsteps = MMfactor*degrees of freedom
+    HardConfLimit = 10000
+    MaxConcurrentJobs = 75
+    PerStructConfLimit = 100
+    InitialRMSDcutoff = 0.75
+    MaxCutoffEnergy = 10.0
+    NTaut = 1
+    LogFile = 'PyDP4.log'
+    AssumeDone = False
+    GenOnly = False
+    SelectedStereocentres = []
+    charge = None
+    BasicAtoms = []
+    ForceField = 'mmff'
+
+settings = Settings()
+
+
+def main(filename, ExpNMR, nfiles):
+
+    print "=========================="
+    print "PyDP4 script,\nintegrating Tinker/MacroModel,"
+    print "Gaussian/NWChem and DP4\nv0.4"
+    print "\nCopyright (c) 2015 Kristaps Ermanis, Jonathan M. Goodman"
+    print "Distributed under MIT license"
+    print "==========================\n\n"
+
+    if nfiles < settings.NTaut or nfiles % settings.NTaut != 0:
+        print "Invalid number of tautomers/input files - number of input files\
+        must be a multiple of number of tautomers"
+        quit()
+
+    #Check the number of input files, generate some if necessary
+    if nfiles == 1:
+        import InchiGen
+        if len(settings.SelectedStereocentres) > 0:
+            numDS, inpfiles = InchiGen.GenSelectDiastereomers(filename,
+                                                settings.SelectedStereocentres)
+        else:
+            numDS, inpfiles = InchiGen.GenDiastereomers(filename)
+        if settings.GenTaut:
+            newinpfiles = []
+            for ds in inpfiles:
+                print "Generating tautomers for " + ds
+                settings.NTaut, files = InchiGen.GenTautomers(ds)
+                newinpfiles.extend(files)
+            inpfiles = list(newinpfiles)
+        if settings.GenProt:
+            newinpfiles = []
+            for ds in inpfiles:
+                print "Generating protomers for " + ds
+                settings.NTaut, files = InchiGen.GenProtomers(ds,
+                                                        settings.BasicAtoms)
+                newinpfiles.extend(files)
+            inpfiles = list(newinpfiles)
+    else:
+        inpfiles = filename
+        if settings.GenTaut:
+            numDS = nfiles
+            import InchiGen
+            newinpfiles = []
+            for ds in inpfiles:
+                print "Generating tautomers for " + ds
+                settings.NTaut, files = InchiGen.GenTautomers(ds)
+                newinpfiles.extend(files)
+            inpfiles = list(newinpfiles)
+        else:
+            numDS = int(nfiles/settings.NTaut)
+            if (numDS == 1):
+                import InchiGen
+                for f in filename:
+                    tdiastereomers = []
+                    numDS, tinpfiles = InchiGen.GenDiastereomers(f)
+                    tdiastereomers.append(tinpfiles)
+                tinpfiles = zip(*tdiastereomers)
+                inpfiles = []
+                for ds in tinpfiles:
+                    inpfiles.extend(list(ds))
+
+    print inpfiles
+
+    #Check the existence of mm output files
+    MMRun = False
+
+    if settings.MMTinker:
+        #Check if there already are Tinker output files with the right names
+        tinkfiles = glob.glob('*.tout')
+        mminpfiles = []
+        for filename in inpfiles:
+            if filename + '.tout' in tinkfiles and (filename + 'rot.tout' in
+                                    tinkfiles or settings.Rot5Cycle is False):
+                if len(mminpfiles) == 0:
+                    MMRun = True
+            else:
+                MMRun = False
+                mminpfiles.append(filename)
+    else:
+        #Check if there already are Tinker output files with the right names
+        mmfiles = glob.glob('*.log')
+        mminpfiles = []
+        for filename in inpfiles:
+            if filename + '.log' in mmfiles and (filename + 'rot.log' in
+                                        mmfiles or settings.Rot5Cycle is False):
+                if len(mminpfiles) == 0:
+                    MMRun = True
+            else:
+                MMRun = False
+                mminpfiles.append(filename)
+
+    if MMRun:
+        print 'Conformation search has already been run for these inputs.\
+                \nSkipping...'
+        if settings.GenOnly:
+            print "Input files generated, quitting..."
+            quit()
+    else:
+        if settings.MMTinker:
+            print 'Some Tinker files missing.'
+            print '\Seting up Tinker files...'
+            Tinker.SetupTinker(len(inpfiles), settings, *mminpfiles)
+            if settings.GenOnly:
+                print "Input files generated, quitting..."
+                quit()
+            print '\nRunning Tinker...'
+            Tinker.RunTinker(len(inpfiles), settings, *mminpfiles)
+        else:
+            print 'Some Macromodel files missing.'
+            print '\nSetting up Macromodel files...'
+            MacroModel.SetupMacromodel(len(inpfiles), settings, *mminpfiles)
+            if settings.GenOnly:
+                print "Input files generated, quitting..."
+                quit()
+            print '\nRunning Macromodel...'
+            MacroModel.RunMacromodel(len(inpfiles), settings, *mminpfiles)
+
+    if not settings.AssumeDone:
+        if settings.DFT == 'z' or settings.DFT == 'g':
+            adjRMSDcutoff = Gaussian.AdaptiveRMSD(inpfiles[0], settings)
+        elif settings.DFT == 'n' or settings.DFT == 'w':
+            adjRMSDcutoff = NWChem.AdaptiveRMSD(inpfiles[0], settings)
+        print 'RMSD cutoff adjusted to ' + str(adjRMSDcutoff)
+        #Run NWChem setup script for every diastereomer
+        print '\nRunning DFT setup...'
+        i = 1
+        for ds in inpfiles:
+            if settings.DFT == 'z' or settings.DFT == 'g':
+                print "\nGaussian setup for file " + ds + " (" + str(i) +\
+                    " of " +  str(len(inpfiles)) + ")"
+                Gaussian.SetupGaussian(ds, ds + 'ginp', 3, settings,
+                                       adjRMSDcutoff)
+            elif settings.DFT == 'n' or 'w':
+                print "\nNWChem setup for file " + ds +\
+                    " (" + str(i) + " of " + str(len(inpfiles)) + ")"
+                NWChem.SetupNWChem(ds, ds + 'nwinp', 3, settings,
+                                   adjRMSDcutoff)
+            i += 1
+        QRun = False
+    else:
+        QRun = True
+
+    if settings.DFT == 'z' or settings.DFT == 'g':
+        Files2Run = Gaussian.GetFiles2Run(inpfiles, settings)
+    elif settings.DFT == 'n' or 'w':
+        Files2Run = NWChem.GetFiles2Run(inpfiles, settings)
+    print Files2Run
+    if len(Files2Run) == 0:
+        QRun = True
+
+    if len(Files2Run) > settings.HardConfLimit:
+        print "Hard conformation count limit exceeded, DFT calculations aborted."
+        quit()
+
+    if QRun:
+        print 'DFT has already been run for these inputs. Skipping...'
+    else:
+        if settings.DFT == 'z':
+            print '\nRunning Gaussian on Ziggy...'
+
+            #Run Gaussian jobs on Ziggy cluster in folder named after date
+            #and time in the short 1processor job queue
+            #and wait until the last file is completed
+            now = datetime.datetime.now()
+            MaxCon = settings.MaxConcurrentJobs
+            if settings.DFTOpt:
+                for i in range(len(Files2Run)):
+                    Files2Run[i] = Files2Run[i][:-5] + '.com'
+            if len(Files2Run) < MaxCon:
+                Gaussian.RunOnZiggy(now.strftime('%d%b%H%M'), settings.queue,
+                                    Files2Run, settings)
+            else:
+                print "The DFT calculations will be done in " +\
+                    str(math.ceil(len(Files2Run)/MaxCon)) + " batches"
+                i = 0
+                while((i+1)*MaxCon < len(Files2Run)):
+                    print "Starting batch nr " + str(i+1)
+                    Gaussian.RunOnZiggy(now.strftime('%d%b%H%M')+str(i+1),
+                        settings.queue, Files2Run[(i*MaxCon):((i+1)*MaxCon)], settings)
+                    i += 1
+                print "Starting batch nr " + str(i+1)
+                Gaussian.RunOnZiggy(now.strftime('%d%b%H%M')+str(i+1),
+                    settings.queue, Files2Run[(i*MaxCon):], settings)
+
+        elif settings.DFT == 'n':
+            print '\nRunning NWChem locally...'
+            NWChem.RunNWChem(Files2Run, settings)
+        elif settings.DFT == 'w':
+            print '\nRunning NWChem on Ziggy...'
+
+            #Run NWChem jobs on Ziggy cluster in folder named after date
+            #and time in the short 1 processor job queue
+            #and wait until the last file is completed
+            now = datetime.datetime.now()
+            MaxCon = settings.MaxConcurrentJobs
+            if len(Files2Run) < MaxCon:
+                NWChem.RunOnZiggy(now.strftime('%d%b%H%M'), settings.queue,
+                                  Files2Run, settings)
+            else:
+                print "The DFT calculations will be done in " +\
+                    str(math.ceil(len(Files2Run)/MaxCon)) + " batches"
+                i = 0
+                while((i+1)*MaxCon < len(Files2Run)):
+                    print "Starting batch nr " + str(i+1)
+                    NWChem.RunOnZiggy(now.strftime('%d%b%H%M')+str(i+1),
+                        settings.queue, Files2Run[(i*MaxCon):((i+1)*MaxCon)], settings)
+                    i += 1
+                print "Starting batch nr " + str(i+1)
+                NWChem.RunOnZiggy(now.strftime('%d%b%H%M')+str(i+1),
+                    settings.queue, Files2Run[(i*MaxCon):], settings)
+
+    if (numDS < 2):
+        print "DP4 requires at least 2 candidate structures!"
+    else:
+        allargs = []
+        for i in range(numDS):
+            allargs.append(settings.NTaut)
+            allargs.extend(inpfiles[i*settings.NTaut:(i+1)*settings.NTaut])
+        allargs.append(ExpNMR)
+        DP4outp = NMRDP4GTF.main(numDS, settings, *allargs)
+        print '\nWriting the DP4 output to DP4outp'
+        if not settings.EP5:
+            if nfiles == 1:
+                DP4_ofile = open(filename + '.dp4', 'w')
+            else:
+                DP4_ofile = open(filename[0] + '.dp4', 'w')
+        else:
+            if nfiles == 1:
+                DP4_ofile = open(filename + '_emp.dp4', 'w')
+            else:
+                DP4_ofile = open(filename[0] + '_emp.dp4', 'w')
+        DP4_ofile.write(DP4outp)
+        DP4_ofile.close()
+        print 'DP4 process completed successfully.'
+
+
+def getScriptPath():
+    return os.path.dirname(os.path.realpath(sys.argv[0]))
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='PyDP4 script to setup\
+    and run Tinker, Gaussian (on ziggy) and DP4')
+    parser.add_argument('-m', '--mm', help="Select molecular mechanics program,\
+    t for tinker or m for macromodel, default is t", choices=['t', 'm'],
+    default='t')
+    parser.add_argument('-d', '--dft', help="Select DFT program, j for Jaguar,\
+    g for Gaussian, n for NWChem, z for Gaussian on ziggy, w for NWChem on\
+    ziggy, default is z", choices=['j', 'g', 'n', 'z', 'w'], default='z')
+    parser.add_argument('--StepCount', help="Specify\
+    stereocentres for diastereomer generation")
+    parser.add_argument('StructureFiles', nargs='+', default=['-'], help=
+    "One or more SDF file for the structures to be verified by DP4. At least one\
+    is required, if automatic diastereomer and tautomer generation is used.\
+    One for each candidate structure, if automatic generation is not used")
+    parser.add_argument("ExpNMR", help="Experimental NMR description, assigned\
+    with the atom numbers from the structure file")
+    parser.add_argument("-s", "--solvent", help="Specify solvent to use\
+    for dft calculations")
+    parser.add_argument("-q", "--queue", help="Specify queue for job submission\
+    on ziggy", default='s1')
+    parser.add_argument("-t", "--ntaut", help="Specify number of explicit\
+    tautomers per diastereomer given in structure files, must be a multiple\
+    of structure files", type=int, default=1)
+    parser.add_argument("-r", "--rot5", help="Manually generate conformers for\
+    5-memebered rings", action="store_true")
+    parser.add_argument('--ra', help="Specify ring atoms, for the ring to be\
+    rotated, useful for molecules with several 5-membered rings")
+    parser.add_argument("--AssumeDFTDone", help="Assume RMSD pruning, DFT setup\
+    and DFT calculations have been run already", action="store_true")
+    parser.add_argument("-g", "--GenOnly", help="Only generate diastereomers\
+    and tinker input files, but don't run any calculations", action="store_true")
+    parser.add_argument('-c', '--StereoCentres', help="Specify\
+    stereocentres for diastereomer generation")
+    parser.add_argument('-T', '--GenTautomers', help="Automatically generate\
+    tautomers", action="store_true")
+    parser.add_argument('-o', '--DFTOpt', help="Optimize geometries at DFT\
+    level before NMR prediction", action="store_true")
+    parser.add_argument('--pd', help="Use python port of DP4", action="store_true")
+    parser.add_argument('--ep5', help="Use EP5", action="store_true")
+    parser.add_argument('-n', '--Charge', help="Specify\
+    charge of the molecule. Do not use when input files have different charges")
+    parser.add_argument('-b', '--BasicAtoms', help="Generate protonated states\
+    on the specified atoms and consider as tautomers")
+    parser.add_argument('-f', '--ff', help="Selects force field for the \
+    conformational search, implemented options 'mmff' and 'opls' (2005\
+    version)", choices=['mmff', 'opls'], default='mmff')
+    args = parser.parse_args()
+    print args.StructureFiles
+    print args.ExpNMR
+    settings.NTaut = args.ntaut
+    settings.DFT = args.dft
+    settings.queue = args.queue
+    settings.ScriptDir = getScriptPath()
+    settings.ForceField = args.ff
+
+    if args.pd:
+        settings.PDP4 = True
+        settings.EP5 = False
+    else:
+        settings.PDP4 = False
+
+    if args.ep5:
+        settings.EP5 = True
+        settings.PDP4 = False
+    else:
+        settings.EP5 = False
+
+    if args.mm == 't':
+        settings.MMTinker = True
+        settings.MMMacromodel = False
+    else:
+        settings.MMMacromodel = True
+        settings.MMTinker = False
+    if args.DFTOpt:
+        settings.DFTOpt = True
+    if args.BasicAtoms is not None:
+        settings.BasicAtoms =\
+            [int(x) for x in (args.BasicAtoms).split(',')]
+        settings.GenProt = True
+    if args.StepCount is not None:
+        settings.MMstepcount = int(args.StepCount)
+    if args.Charge is not None:
+        settings.charge = int(args.Charge)
+    if args.GenTautomers:
+        settings.GenTaut = True
+    if args.StereoCentres is not None:
+        settings.SelectedStereocentres =\
+            [int(x) for x in (args.StereoCentres).split(',')]
+    if args.GenOnly:
+        settings.GenOnly = True
+    if args.AssumeDFTDone:
+        settings.AssumeDone = True
+    if args.solvent:
+        settings.Solvent = args.solvent
+    if args.rot5:
+        settings.Rot5Cycle = True
+    if args.ra is not None:
+        settings.RingAtoms =\
+            [int(x) for x in (args.ra).split(',')]
+    if len(args.StructureFiles) == 1:
+        main(args.StructureFiles[0], args.ExpNMR, 1)
+    else:
+        main(args.StructureFiles, args.ExpNMR, len(args.StructureFiles))
+    #main()
diff --git a/README b/README
new file mode 100644
index 0000000..515d494
--- /dev/null
+++ b/README
@@ -0,0 +1,273 @@
+===============================================================
+
+PyDP4 workflow integrating MacroModel/TINKER, Gaussian/NWChem
+and DP4 analysis
+
+version 0.4
+
+Copyright (c) 2015 Kristaps Ermanis, Jonathan M. Goodman
+
+distributed under MIT license
+
+===============================================================
+
+CONTENTS
+1) Requirements and Setup
+2) Usage
+3) NMR Description Format
+4) Included Utilites
+5) Code Organization
+
+===============================================================
+
+REQUIREMENTS AND SETUP
+
+All the python and Java files and one utility to convert to and from TINKER
+nonstandard xyz file are in the attached archive. They are set up to work
+from a centralised location. It is not a requirement, but it is probably
+best to add the location to the PATH variable.
+
+The script currently is set up to use MacroModel for molecular mechanics and
+NWChem for DFT and it runs NWChem locally. Gaussian and TINKER is also supported
+This setup has several requirements.
+
+1) One should have MacroModel or TINKER and NWChem or Gaussian.
+The beginning PyDP4.py file contains a structure "Settings", where the location
+of the TINKER scan executable or MacroModel bmin executable should be specified.
+
+2) The various manipulations of sdf files (renumbering, ring corner flipping)
+requires OpenBabel, including Python bindings. The following links provide
+instructions for building OpenBabel with Python bindings:
+http://openbabel.org/docs/dev/UseTheLibrary/PythonInstall.html
+http://openbabel.org/docs/dev/Installation/install.html#compile-bindings
+The Settings structure contain path to OpenBabel, but currently it also
+needs to be specified in InchiGen.py and FiveConf.py
+This dependency can be ignored, if no diastereomer generation or
+5 membered ring flipping is done.
+
+3) Finally, to run calculations on a computational cluster, a passwordless
+ssh connection should be set up in both directions -
+desktop -> cluster and cluster -> desktop. In most cases the modification
+of the relevant functions in Gaussian.py or NWChem.py will be required
+to fit your situation.
+
+4) All development and testing was done on Linux. However, both the scripts
+and all the backend software should work equally well on windows with little
+modification.
+
+===================
+
+USAGE
+
+To call the script:
+1) With all diastereomer generation:
+
+PyDP4.py Candidate CandidateNMR
+
+where Candidate is the sdf file containing 3D coordinates of the candidate
+structure (without the extension),
+and CandidateNMR contains the NMR description. The NMR description largely
+follows the DP4 format, but see bellow for differences.
+
+Alternatively:
+
+PyDP4.py -s chloroform Candidate CandidateNMR
+
+specifies solvent for DFT calculation. If solvent is not given, no solvent is used.
+
+2) With explicit diastereomer/other candidate structures:
+
+PyDP4.py Candidate1 Candidate2 Candidate3 ... CandidateNMR
+
+The script does not attempt to generate diastereomers, simply carries out the
+DP4 on the specified candidate structures.
+
+Script has several other switches, including switching the molecular mechanics and dft software etc.
+
+  -m {t,m}, --mm {t,m}  Select molecular mechanics program, t for tinker or m
+                        for macromodel, default is t
+
+  -d {j,g,n,z,w}, --dft {j,g,n,z,w}
+                        Select DFT program, j for Jaguar, g for Gaussian, n
+                        for NWChem, z for Gaussian on ziggy, w for NWChem on
+                        ziggy, default is z (jaguar is not yet implemented)
+
+  --StepCount STEPCOUNT
+                        Specify stereocentres for diastereomer generation
+
+  -s SOLVENT, --solvent SOLVENT
+                        Specify solvent to use for dft calculations
+
+  -q QUEUE, --queue QUEUE
+                        Specify queue for job submission on ziggy
+			(default is s1)
+
+  -t NTAUT, --ntaut NTAUT
+                        Specify number of explicit tautomers per diastereomer
+                        given in structure files, must be a multiple of
+                        structure files
+
+  -r, --rot5            Manually generate conformers for 5-memebered rings
+
+  --ra RA               Specify ring atoms, for the ring to be rotated, useful
+                        for molecules with several 5-membered rings
+
+  --AssumeDFTDone       Assume RMSD pruning, DFT setup and DFT calculations
+                        have been run already (saves time when repeating DP4
+			analysis)
+
+  -g, --GenOnly         Only generate diastereomers and tinker input files,
+                        but don't run any calculations (useful for diastereomer
+			generation for calculations ran on computers
+			without OpenBabel)
+
+  -c STEREOCENTRES, --StereoCentres STEREOCENTRES
+                        Specify stereocentres for diastereomer generation
+
+  -T, --GenTautomers    Automatically generate tautomers
+
+  -o, --DFTOpt          Optimize geometries at DFT level before NMR prediction
+
+  --pd                  Use python port of DP4
+
+  -b BASICATOMS, --BasicAtoms BASICATOMS
+                        Generate protonated states on the specified atoms and
+                        consider as tautomers
+
+More information on those can be obtained by running PyDP4.py -h
+
+======================
+
+NMR DESCRIPTION FORMAT
+
+NMRFILE example begins:
+59.58(C3),127.88(C11),127.52(C10),115.71(C9),157.42(C8),133.98(C23),118.22(C22),115.79(C21),158.00(C20),167.33(C1),59.40(C2),24.50(C31),36.36(C34),71.05(C37),142.14(C42),127.50(C41),114.64(C40),161.02(C39)
+
+4.81(H5),7.18(H15),6.76(H14),7.22(H28),7.13(H27),3.09(H4),1.73(H32 or H33),1.83(H32 or H33),1.73(H36 or H35),1.73(H36 or H35),4.50(H38),7.32(H47),7.11(H46)
+
+H15,H16
+H14,H17
+H28,H29
+H27,H30
+H47,H48
+H46,H49
+C10,C12
+C9,C13
+C22,C24
+C21,C25
+C41,C43
+C40,C44
+
+OMIT H19,H51
+
+:example ends
+
+Sections are seperated by empty lines.
+1) The first section is assigned C shifts, can also be (any).
+2) Second section is (un)assigned H shifts.
+3) This section defines chemically equivalent atoms. Each line is a new set,
+all atoms in a line are treated as equivalent, their computed shifts averaged.
+4) Final section, starting with a keyword OMIT defines atoms to be ignored.
+Atoms defined in this section do not need a corresponding shift in the NMR
+description
+
+
+=====================
+
+UTILITIES
+
+There are 2 utilities included, not necessary for the process, but sometimes
+useful.
+
+If the DP4 workflow fails at the TINKER stage, the 2 likely reasons are either 
+lack of 1gb of free memory or TINKER not accepting the numbering of the sdf
+file (this is a bug in TINKER). The latter can be fixed by running the following
+script:
+
+TreeRenum.py Candidate CandidateNMR
+
+It takes the sdf file and performs a spanning tree renumbering - making sure,
+that there are as many connected atoms in sequence as possible. So far this
+has always solved the TINKER problem.
+The script also renumbers the NMR description file, if it contains any atom numbers.
+The renumbered files are saved as Candidater and CandidateNMRr (r appended to their
+original name).
+
+----------------------
+Another utility is NMRhelper (called by simply typing NMRhelper.py in shell).
+It is a script with GUI interface, that assists in describing and assigning the
+NMR. In the top textbox a structure file can be chosen. This allows the utility
+to automatically detect protons attached to heteroatoms and add them to the
+OMIT list, as well as detect the chemically eqivalent atoms (currently only
+implemented for methyl groups). It also lets the script to help tracking which
+atoms are yet to be assigned (show in the bottom 2 text boxes).
+The next 2 large textboxes are for pasting raw NMR descriptions.Based on the pasted
+text, the script will try to detect the shifts and make up a rough draft of the 
+description file. After this the final version can be prepared in the main textbox.
+At any point the button to generate the NMR file can be pressed and this will write
+the file to the NMRhelper folder with the name CandidateNMR, where Candidate is the
+name of the structure file.
+IMPORTANT NOTE: Do not edit the raw data textboxes, if you have done any work in the
+main textbox, as this will cause the main textbox to revert to the rough
+automatically generated version
+
+
+=====================
+
+CODE ORGANIZATION
+
+The code is organized in several python script files, as well as several java
+files.
+
+PyDP4.py
+Main file, that should be called to start the PyDP4 workflow. Interprets the
+arguments and takes care of the general workflow logic.
+
+InchiGen.py
+Gets called if diastereomer and/or tautomer and/or protomer generation is
+used. Called by PyDP4.py.
+
+FiveConf.py
+Gets called if automatic 5-membered cycle corner-flipping is used. Called by
+PyDP4.py.
+
+MacroModel.py
+Contains all of the MacroModel specific code for input generation, calculation
+execution and output interpretation. Called by PyDP4.py.
+
+Tinker.py
+Contains all of the Tinker specific code for input generation, calculation
+execution and output interpretation. Called by PyDP4.py.
+
+ConfPrune.pyx
+Cython file for conformer alignment and RMSD pruning. Called by Gaussian.py
+and NWChem.py
+
+Gaussian.py
+Contains all of the Gaussian specific code for input generation and calculation
+execution. Called by PyDP4.py.
+
+NWChem.py
+Contains all of the NWChem specific code for input generation and calculation
+execution. Called by PyDP4.py.
+
+NMRDP4GTF.py
+Takes care of all the NMR description interpretation, equivalent atom
+averaging, Boltzmann averaging, tautomer population optimisation (if used)
+and DP4 input preparation and running either DP4.jar or DP4.py. Called by
+PyDP4.py
+
+nmrPredictNWChem.py
+Extracts NMR shifts from NWChem output files
+
+nmrPredictGaussian.java
+Extracts NMR shifts from Gaussian output files
+
+DP4.jar
+Original DP4 implementation as in J. Am. Chem. Soc. 2010, 132, 12946.
+
+DP4.py
+Equivalent and compact port to python of the same DP4 process. The results
+produced are essentially equivalent, but not identical due to different
+floating point precision used in the Python (53 bits) and Java (32 bits)
+implementation.
diff --git a/Tinker.py b/Tinker.py
new file mode 100755
index 0000000..8edbb08
--- /dev/null
+++ b/Tinker.py
@@ -0,0 +1,264 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Jun 12 12:52:21 2015
+
+@author: ke291
+
+Contains all of the Tinker specific code for input generation, calculation
+execution and output interpretation. Called by PyDP4.py.
+"""
+
+import os
+import sys
+import subprocess
+
+
+def SetupTinker(numDS, settings, *args):
+
+    print args
+    for ds in args:
+        if settings.Rot5Cycle is True:
+            if not os.path.exists(ds+'rot.sdf'):
+                import FiveConf
+                #Generate the flipped fivemembered ring,
+                #result is put in '*rot.sdf' file
+                FiveConf.main(ds + '.sdf', settings)
+
+        #Makes sure that the name in the title line matches filename
+        #sdf2tinkerxyz uses this as the output file name
+        f = open(ds + '.sdf', 'r+')
+        sdf = f.readlines()
+        sdf[0] = ds + '\n'
+        f.seek(0)
+        f.writelines(sdf)
+        f.close()
+
+        if settings.Rot5Cycle is True:
+            f = open(ds + 'rot.sdf', 'r+')
+            sdf = f.readlines()
+            sdf[0] = ds + 'rot\n'
+            if ds in sdf[-3]:
+                sdf[-3] = ds + 'rot.1\n'
+            f.seek(0)
+            f.writelines(sdf)
+            f.close()
+
+        scriptdir = getScriptPath()
+        convinp = 'sdf2tinkerxyz -k ' + scriptdir + '/default.key <'
+
+        outp = subprocess.check_output(convinp + ds + '.sdf', shell=True)
+
+        print "Tinker input for " + ds + " prepared."
+
+        if settings.Rot5Cycle is True:
+            outp = subprocess.check_output(convinp + ds + 'rot.sdf',
+                                           shell=True)
+            print "Tinker input for " + ds + "rot prepared."
+
+
+def RunTinker(numDS, settings, *args):
+    #Run Tinker scan for all diastereomeric inputs
+
+    NCompleted = 0
+
+    for ds in args:
+        print settings.TinkerPath + ds + ' 0 10 20 0.00001 | tee ./' + ds + \
+            '.tout'
+        outp = subprocess.check_output(settings.TinkerPath + ds +
+            ' 0 10 20 0.00001 | tee ./' + ds + '.tout', shell=True)
+        NCompleted = NCompleted + 1
+        print "Tinker job " + str(NCompleted) + " of " + str(numDS) + \
+            " completed."
+
+        if settings.Rot5Cycle is True:
+            print settings.TinkerPath + ds + 'rot 0 10 20 0.00001 | tee ./' + \
+                ds + 'rot.tout'
+            outp = subprocess.check_output(settings.TinkerPath + ds +
+                'rot 0 10 20 0.00001 | tee ./' + ds + 'rot.tout', shell=True)
+            NCompleted = NCompleted + 1
+            print "Tinker job " + str(NCompleted) + " of " + str(numDS*2) + \
+                " completed."
+
+
+#Reads the relevant tinker geometry files
+#v0.2 - reads seperate rot file as well
+def ReadTinker(TinkerOutput, settings):
+
+    #Get conformer energies
+    ETable, charge = GetEnergiesCharge(TinkerOutput, settings)
+
+    if settings.Rot5Cycle is True:
+        #Get conformer energies for the flipped 5-membered ring
+        ETableRot, charge = GetEnergiesCharge(TinkerOutput + 'rot', settings)
+
+    #Determine which conformers we want
+    MinE = 10000
+    MinE = min([float(x[1]) for x in ETable])
+
+    if settings.Rot5Cycle is True:
+        MinERot = 10000
+        MinERot = min([float(x[1]) for x in ETableRot])
+        if MinE > MinERot:
+            MinE = MinERot
+
+    FileNums = []
+    RotFileNums = []
+
+    AcceptedEs = []
+
+    for conf in ETable:
+        if float(conf[1]) < MinE + settings.MaxCutoffEnergy:
+             #Dealing with special case when nconf>100 000
+            if 'Minimum' in conf[0]:
+                data = conf[0].strip()
+                FileNums.append(data[7:].strip())
+            else:
+                FileNums.append(conf[0].strip())
+            AcceptedEs.append(float(conf[1]))
+
+    if settings.Rot5Cycle is True:
+        for conf in ETableRot:
+            if float(conf[1]) < MinE + settings.MaxCutoffEnergy:
+                RotFileNums.append(conf[0].strip())
+                AcceptedEs.append(float(conf[1]))
+
+    print "Number of accepted conformers by energies: " + str(len(AcceptedEs))
+
+    Files = []
+    #Generate conformer filenames
+    for num in FileNums:
+        Files.append(TinkerOutput + '.' + num.zfill(3))
+    if settings.Rot5Cycle is True:
+        for num in RotFileNums:
+            Files.append(TinkerOutput + 'rot.' + num.zfill(3))
+
+    conformers = []
+    conformer = 0
+    atoms = []
+
+    for f in Files:
+        conformers.append([])
+
+        atom = 0
+        infile = open(f, 'r')
+        inp = infile.readlines()
+
+        for line in inp[1:]:
+            data = line.split(' ')
+            data = filter(None, data)
+            if conformer == 0:
+                atoms.append(GetTinkerSymbol(int(data[5])))  # Add atom symbol
+            conformers[conformer].append([])                # Add new atom
+            conformers[conformer][atom].append(data[0])     # add atom number
+            conformers[conformer][atom].append(data[2])     # add X
+            conformers[conformer][atom].append(data[3])     # add Y
+            conformers[conformer][atom].append(data[4])     # add Z
+            atom = atom + 1     # Move to the next atom
+
+        infile.close()
+        conformer = conformer + 1   # Move to the next conformer
+
+    return atoms, conformers, charge
+
+
+# Get energies of conformers from tinker output file
+def GetEnergiesCharge(TinkerOutput, settings):
+
+    infile = open(TinkerOutput + '.tout', 'r')
+
+    inp = infile.readlines()
+    if len(inp) < 56:
+        print "Tinker output " + TinkerOutput + " is corrupted, aborting."
+        quit()
+
+    #Get the conformer energies from the file
+    ETable = []
+    for line in inp[13:]:
+        data = line.split('  ')
+        data = filter(None, data)
+        if len(data) >= 3:
+            if 'Map' in data[0] and 'Minimum' in data[1]:
+                ETable.append(data[-2:])
+                #print data
+
+    infile.close()
+    if settings.charge is None:
+        if os.path.exists(TinkerOutput+'.inchi'):
+            return ETable, GetInchiCharge(TinkerOutput)
+        else:
+            return ETable, GetSDFCharge(TinkerOutput)
+    else:
+        return ETable, settings.charge
+
+
+#translate Tinker atom types to element symbols for NWChem file
+def GetTinkerSymbol(atomType):
+
+    Lookup = ['C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', ' ', 'C',
+              'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C',
+              'C', 'C', 'H', 'H', 'O', 'O', 'O', 'O', 'O', 'O',
+              'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
+              'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'N', 'N',
+              'N', 'N', 'N', 'N', 'N', 'F', 'Cl', 'Br', 'I', 'S',
+              'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S',
+              'Si', 'C', 'H', 'H', 'H', 'C', 'H', 'H', 'H', 'H',
+              'H', 'H', 'H', 'H', 'P', 'P', 'P', 'P', 'P', 'P',
+              'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H',
+              'H', 'H', 'H', 'H', 'C', 'H', 'O', 'O', 'O', 'O',
+              'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
+              'O', 'H', 'N', 'O', 'O', 'H', 'H', 'H', 'H', 'H',
+              'H', 'H', 'C', 'N', 'N', 'N', 'N', 'N', 'N', 'C',
+              'C', 'N', 'N', 'N', 'N', 'N', 'N', 'S', 'N', 'N',
+              'N', 'N', 'N', 'O', 'H', 'O', 'H', 'N', 'N', 'N',
+              'N', 'N', 'C', 'C', 'N', 'O', 'C', 'N', 'N', 'C',
+              'C', 'N', 'N', 'N', 'N', 'N', 'O', 'H', 'H', 'H',
+              'S', 'S', 'S', 'S', 'S', 'S', 'S', 'P', 'N', 'Cl',
+              'C', 'N', 'C', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
+              'Fe', 'Fe', 'F', 'Cl', 'Br', 'Li', 'Na', 'K', 'Zn', 'Zn',
+              'Ca', 'Cu', 'Cu', 'Mg']
+
+    if Lookup[atomType-1] == ' ':
+        print 'Unknown atom type'
+
+    return Lookup[atomType-1]
+
+
+def GetInchiCharge(inchifile):
+
+    infile = open(inchifile + '.inchi', 'r')
+    inp = infile.readlines()
+    infile.close()
+
+    ChargeFound = False
+    #Get inchi layers
+    layers = inp[0].split('/')
+    for l in layers[1:]:
+        if 'q' in l:
+            charge = int(l[1:])
+            ChargeFound = True
+            break
+        if 'p' in l:
+            charge = int(l[1:])
+            ChargeFound = True
+            break
+    if not ChargeFound:
+        charge = 0
+
+    return charge
+
+
+def GetSDFCharge(sdf):
+    import sys
+    sys.path.append('/home/ke291/Tools/openbabel-install/lib/python2.7/site-packages/')
+    import openbabel
+
+    obconversion = openbabel.OBConversion()
+    obconversion.SetInFormat("sdf")
+    obmol = openbabel.OBMol()
+    obconversion.ReadFile(obmol, sdf+'.sdf')
+
+    return obmol.GetTotalCharge()
+
+
+def getScriptPath():
+    return os.path.dirname(os.path.realpath(sys.argv[0]))
diff --git a/TreeRenum.py b/TreeRenum.py
new file mode 100755
index 0000000..0eecdde
--- /dev/null
+++ b/TreeRenum.py
@@ -0,0 +1,166 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Mar 31 14:54:17 2015
+
+@author: ke291
+"""
+import sys
+sys.path.append('/home/ke291/Tools/openbabel-install/lib/python2.7/site-packages/')
+from openbabel import *
+
+
+def FindAllPaths(molgraph, start, end, path=[]):
+    path = path + [start]
+    if start == end:
+        return [path]
+    if start not in [x[0] for x in molgraph]:
+        print "No such node in graph"
+        return []
+    paths = []
+    for node in molgraph[start-1][1:]:
+        if node not in path:
+            newpaths = FindAllPaths(molgraph, node, end, path)
+            for newpath in newpaths:
+                paths.append(newpath)
+    return paths
+
+
+def FindTerminatingPaths(molgraph, start, trunk, path=[]):
+    path = path + [start]
+    if start not in [x[0] for x in molgraph]:
+        print "No such node in graph"
+        return []
+    paths = []
+    for node in molgraph[start-1][1:]:
+        if node not in path and node not in trunk:
+            newpaths = FindTerminatingPaths(molgraph, node, trunk, path)
+            for newpath in newpaths:
+                paths.append(newpath)
+    if paths == []:
+        return [path]
+    else:
+        return paths
+
+
+def FindTreeMap(f):
+    #get molecular graph
+    molgraph = GenMolGraph(f)
+
+    #Find potential endpoints - atoms with only one neighbour
+    endpoints = []
+    for node in molgraph:
+        if len(node) < 3:
+            endpoints.append(node[0])
+
+    #get the longest paths for all endpoint combinations
+    maxpaths = []
+    for i in range(0, len(endpoints)):
+        for j in range(0, len(endpoints)):
+            if i != j:
+                maxpaths.append(max(FindAllPaths(molgraph, endpoints[i],
+                                                 endpoints[j]), key=len))
+    #get the absolute longest path possible in the molecule
+    molmap = max(maxpaths, key=len)
+    #Add longest branches to the longest trunk
+    for atom in molmap:
+        for node in molgraph[atom-1]:
+            if node not in molmap:
+                maxbranch = []
+                branches = FindTerminatingPaths(molgraph, node, molmap)
+                if branches != []:
+                    maxbranch = max(branches, key=len)
+                if maxbranch != []:
+                    molmap.extend(maxbranch)
+    return molmap
+
+
+def TreeRenumSDF(f, ExpNMR):
+
+    molmap = FindTreeMap(f)
+
+    #Read molecule from file
+    obconversion = OBConversion()
+    obconversion.SetInFormat("sdf")
+    obmol = OBMol()
+    obconversion.ReadFile(obmol, f)
+
+    temp = []
+    anums = []
+    for atom in OBMolAtomIter(obmol):
+        temp.append(atom)
+        anums.append(atom.GetAtomicNum())
+
+    newmol = OBMol()
+    for atom in molmap:
+        newmol.AddAtom(temp[atom-1])
+
+    #Restore the bonds
+    newmol.ConnectTheDots()
+    newmol.PerceiveBondOrders()
+    #Write renumbered molecule to file
+    obconversion.SetOutFormat("sdf")
+    obconversion.WriteFile(newmol, f[:-4] + 'r.sdf')
+
+    #Prepare NMR translation
+    NMRmap = []
+    i = 1
+    for atom in molmap:
+        anum = anums[atom-1]
+        if anum == 1:
+            NMRmap.append(['H' + str(atom), 'H' + str(i)])
+        if anum == 6:
+            NMRmap.append(['C' + str(atom), 'C' + str(i)])
+        i += 1
+    print NMRmap
+    RenumNMR(ExpNMR, NMRmap)
+
+
+def RenumNMR(ExpNMR, NMRmap):
+    f = open(ExpNMR, 'r')
+    NMRfile = f.read(1000000)
+    f.close()
+
+    print '\nOld NMR file:\n' + NMRfile
+
+    #Replace old atom labels with new atom labels
+    #tag replacements with '_' to avoid double replacement
+    for atom in NMRmap:
+        NMRfile = NMRfile.replace(atom[0] + ')', atom[1] + '_)')
+        NMRfile = NMRfile.replace(atom[0] + ' ', atom[1] + '_ ')
+        NMRfile = NMRfile.replace(atom[0] + ',', atom[1] + '_,')
+        NMRfile = NMRfile.replace(atom[0] + '\n', atom[1] + '_\n')
+
+    #Strip temporary udnerscore tags
+    NMRfile = NMRfile.replace('_', '')
+
+    print '\nNew NMR file:\n' + NMRfile
+    f = open(ExpNMR + 'r', 'w')
+    f.write(NMRfile)
+    f.close()
+
+
+def GenMolGraph(f):
+    obconversion = OBConversion()
+    obconversion.SetInFormat("sdf")
+    obmol = OBMol()
+    obconversion.ReadFile(obmol, f)
+
+    molgraph = []
+
+    for atom in OBMolAtomIter(obmol):
+        idx = atom.GetIdx()
+        molgraph.append([])
+        molgraph[idx-1].append(idx)
+
+        for NbrAtom in OBAtomAtomIter(atom):
+            molgraph[idx-1].append(NbrAtom.GetIdx())
+
+    return molgraph
+
+if __name__ == '__main__':
+    #print sys.argv
+    ExpNMR = sys.argv[2]
+    filename = sys.argv[1] + '.sdf'
+    TreeRenumSDF(filename, ExpNMR)
+    #main()
\ No newline at end of file
diff --git a/default.com b/default.com
new file mode 100644
index 0000000..a8e1a29
--- /dev/null
+++ b/default.com
@@ -0,0 +1,18 @@
+confsearch.mae
+confsearch-out.maegz
+ MMOD       0      1      0      0     0.0000     0.0000     0.0000     0.0000
+ FFLD      10      1      0      0     1.0000     0.0000     0.0000     0.0000
+ BDCO       0      0      0      0    41.5692 99999.0000     0.0000     0.0000
+ READ       0      0      0      0     0.0000     0.0000     0.0000     0.0000
+ CRMS       0      0      0      0     0.0000     0.2500     0.0000     0.0000
+ LMCS    5000      0      0      0     0.0000     0.0000     3.0000     6.0000
+ NANT       0      0      0      0     0.0000     0.0000     0.0000     0.0000
+ MCNV       0      0      0      0     0.0000     0.0000     0.0000     0.0000
+ MCSS       2      0      0      0    21.0000     0.0000     0.0000     0.0000
+ MCOP       1      0      0      0     0.5000     0.0000     0.0000     0.0000
+ DEMX       0 333333      0      0    21.0000    42.0000     0.0000     0.0000
+ MSYM       0      0      0      0     0.0000     0.0000     0.0000     0.0000
+ AUOP       0      0      0      0   500.0000     0.0000     0.0000     0.0000
+ AUTO       0      2      1      1     0.0000    -1.0000     0.0000     2.0000
+ CONV       2      0      0      0     0.0010     0.0000     0.0000     0.0000
+ MINI       1      0 999999      0     0.0000     0.0000     0.0000     0.0000
diff --git a/default.key b/default.key
new file mode 100644
index 0000000..2feb51d
--- /dev/null
+++ b/default.key
@@ -0,0 +1,11 @@
+
+# Force Field Selection
+PARAMETERS        /home/ke291/tinker/params/mmff.prm
+
+# Random Number
+RANDOMSEED        123456789
+
+# Constriant And Restraint
+ENFORCE-CHIRALITY 
+
+
diff --git a/default1.com b/default1.com
new file mode 100644
index 0000000..9468638
--- /dev/null
+++ b/default1.com
@@ -0,0 +1,17 @@
+default1.mae
+default1-out.maegz
+ MMOD       0      1      0      0     0.0000     0.0000     0.0000     0.0000
+ FFLD      10      1      0      0     1.0000     0.0000     0.0000     0.0000
+ BDCO       0      0      0      0    41.5692 99999.0000     0.0000     0.0000
+ READ       0      0      0      0     0.0000     0.0000     0.0000     0.0000
+ CRMS       0      0      0      0     0.0000     0.2500     0.0000     0.0000
+ LMCS    6000      0      0      0     0.0000     0.0000     3.0000     6.0000
+ MCNV       0      0      0      0     0.0000     0.0000     0.0000     0.0000
+ MCSS       2      0      0      0    21.0000     0.0000     0.0000     0.0000
+ MCOP       1      0      0      0     0.5000     0.0000     0.0000     0.0000
+ DEMX       0 333333      0      0    21.0000    42.0000     0.0000     0.0000
+ MSYM       0      0      0      0     0.0000     0.0000     0.0000     0.0000
+ AUOP       0      0      0      0   300.0000     0.0000     0.0000     0.0000
+ AUTO       0      2      1      1     0.0000    -1.0000     0.0000     3.0000
+ CONV       2      0      0      0     0.0010     0.0000     0.0000     0.0000
+ MINI       1      0 999999      0     0.0000     0.0000     0.0000     0.0000
diff --git a/nmrPredictGaussian.class b/nmrPredictGaussian.class
new file mode 100644
index 0000000..197792a
Binary files /dev/null and b/nmrPredictGaussian.class differ
diff --git a/nmrPredictGaussian.java b/nmrPredictGaussian.java
new file mode 100644
index 0000000..1d7da3e
--- /dev/null
+++ b/nmrPredictGaussian.java
@@ -0,0 +1,208 @@
+import java.io.*;
+import java.util.*;
+import java.math.*;
+
+
+/*
+This program reads Gaussian output files and pulls out the NMR shielding constants for each atom in each conformer.  It also extracts energies and calculates a Boltzmann-averaged shielding constant for each atom.
+
+Input syntax is the names of the output files without the .out:
+
+java nmrPredictGaussian myMoleculeCnf001 myMoleculeCnf002 myMoleculeCnf003 ....
+
+(or java nmrPredictGaussian `ls myMoleculeCnf*.out | cut -d"." -f1`)
+
+The program prints the output as a comma separated list suitable for pasting into a spreadsheet.
+
+*/
+
+
+class nmrPredictGaussian
+{
+
+  static double gasConstant = 8.3145;
+  static double temperature = 298.15;
+  static double hartreeEnergy = 2625.499629554010;
+
+
+  public static void main(String[] args)
+  {
+  int nstructs = args.length;
+  int natoms = 0;
+
+  String[] fileName = new String[nstructs];
+  for(int i=0;i<nstructs;i++)
+    {
+    fileName[i]=(args[i]+".out");
+    }
+
+   BufferedReader buffRead;
+   String currLine="";
+
+   try
+     {
+     buffRead = new BufferedReader(new FileReader(new File(fileName[0])));
+     while(buffRead.ready())
+       {
+       currLine=buffRead.readLine();
+       if(currLine.indexOf("Isotropic")!=-1)
+         {
+         natoms++;
+         }
+       }
+     buffRead.close();
+     }
+   catch(Exception e)
+     {
+     e.printStackTrace();
+     }
+ 
+    String[] atomLabels = new String[natoms];
+    int[] atomNumbers = new int[natoms];
+    double[][] shieldingConstants = new double[nstructs][natoms];
+    double[] energies = new double[nstructs];
+    double[] relenergies = new double[nstructs];
+    double[] populations = new double[nstructs];
+    double[] boltzshifts = new double[natoms];
+
+    currLine="";
+
+    for(int i=0;i<nstructs;i++)
+      {
+      try
+        {
+        buffRead = new BufferedReader(new FileReader(new File(fileName[i])));
+        int currAtom=0;
+        while(buffRead.ready())
+          {
+          currLine=buffRead.readLine();
+          if(currLine.indexOf("SCF Done")!=-1)
+            {
+            StringTokenizer st = new StringTokenizer(currLine);
+            for(int j=0;j<4;j++)
+              {
+              String temp = st.nextToken();
+              }
+            energies[i] = Double.valueOf(st.nextToken());
+            }
+          if(currLine.indexOf("Isotropic")!=-1)
+            {
+            StringTokenizer st = new StringTokenizer(currLine);
+            if(i==0)
+              {
+              atomNumbers[currAtom]=Integer.valueOf(st.nextToken());
+              atomLabels[currAtom]=st.nextToken();
+              }
+            else 
+              {
+              for(int j=0;j<2;j++) 
+                {
+                String temp = st.nextToken();
+                }
+              }
+            for(int j=0;j<2;j++)
+              {
+              String temp = st.nextToken();
+              }
+            shieldingConstants[i][currAtom] = Double.valueOf(st.nextToken());
+            currAtom++;
+            }
+          }
+        buffRead.close();
+        }
+      catch(Exception e)
+        {
+        e.printStackTrace();
+        }
+
+      }
+
+
+    //Find global minimum energy
+    double minEnergy = energies[0];
+    for(int i=0;i<nstructs;i++)
+      {
+      if(energies[i]<minEnergy)
+        {
+        minEnergy = energies[i];
+        }
+      }
+
+    //Fill in relenergies
+    for(int i=0;i<nstructs;i++)
+      {
+      relenergies[i]=(energies[i]-minEnergy)*hartreeEnergy;
+      }
+
+    //Calculate populations
+    for(int i=0;i<nstructs;i++)
+      {
+      populations[i]=Math.exp(-relenergies[i]*1000/(gasConstant*temperature));
+      }
+    double q = 0;
+    for(int i=0;i<nstructs;i++)
+      {
+      q=q+populations[i];
+      }
+    for(int i=0;i<nstructs;i++)
+      {
+      populations[i]=populations[i]/q*100;
+      }
+
+    //Calculate averaged shifts
+    for(int i=0;i<natoms;i++)
+      {
+      boltzshifts[i]=0;
+      for(int j=0;j<nstructs;j++)
+        {
+        boltzshifts[i]=boltzshifts[i]+shieldingConstants[j][i]*populations[j]/100;
+        }
+      }
+
+
+
+
+     System.out.print("Gaussian results for,");
+     for(int i=0;i<nstructs;i++)
+       {
+       System.out.print(args[i]+",");
+       }
+     System.out.print("Boltzmann average"+",");
+     System.out.print("\n");
+     System.out.print("Energy / hartree,");
+     for(int i=0;i<nstructs;i++)
+       {
+       System.out.print(energies[i]+",");
+       }
+     System.out.print("\n");
+     System.out.print("Relative energy / kJ mol-1,");
+     for(int i=0;i<nstructs;i++)
+        {
+        System.out.print(relenergies[i]+",");
+        }
+     System.out.print("\n");
+     System.out.print("Population (%),");
+     for(int i=0;i<nstructs;i++)
+        {
+        System.out.print(populations[i]+",");
+        }
+     System.out.print("\n");
+     for(int j=0;j<natoms;j++)
+       {
+       //if((atomLabels[j].compareTo("C")==0)||(atomLabels[j].compareTo("H")==0))
+         {
+         System.out.print("Shielding Constant for "+atomLabels[j]+atomNumbers[j]+",");
+         for(int i=0;i<nstructs;i++)
+           {
+           System.out.print(shieldingConstants[i][j]+",");
+           }
+         System.out.print(boltzshifts[j]);
+         System.out.print("\n");
+         }
+       }
+     }     
+
+
+
+
+  }
diff --git a/nmrPredictNWChem.py b/nmrPredictNWChem.py
new file mode 100644
index 0000000..d792d3d
--- /dev/null
+++ b/nmrPredictNWChem.py
@@ -0,0 +1,99 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Nov  4 12:46:34 2014
+
+@author: ke291
+
+Extracts NMR shifts from NWChem output files
+"""
+
+import sys
+import math
+
+gasConstant = 8.3145
+temperature = 298.15
+hartreeEnergy = 2625.499629554010
+
+def main(*args):
+
+    labels = []
+    allshieldings = []
+    energies = []
+
+    #For every file run ReadShieldings and store the extracted shielding
+    #constants
+    for f in args:
+        (labels, shieldings, energy) = ReadShieldings(f)
+        allshieldings.append(shieldings)
+        energies.append(energy)
+
+    minE = min(energies)
+
+    relEs = []
+
+    #Calculate rel. energies in kJ/mol
+    for e in energies:
+        relEs.append(((e-minE)*hartreeEnergy))
+
+    populations = []
+
+    #Calculate Boltzmann populations
+    for e in relEs:
+        populations.append(math.exp(-e*1000/(gasConstant*temperature)))
+
+    q = sum(populations)
+
+    for i in range(0, len(populations)):
+        populations[i] = populations[i]/q
+
+    #Calculate Boltzmann weighed shielding constants
+    #by summing the shifts multiplied by the isomers population
+    Natoms = len(labels)
+    BoltzmannShieldings = []
+
+    for atom in range(0, Natoms):
+        shielding = 0
+        for conformer in range(0, len(allshieldings)):
+            shielding = shielding + allshieldings[conformer][atom] * \
+                populations[conformer]
+        BoltzmannShieldings.append(shielding)
+
+    return (relEs, populations, labels, BoltzmannShieldings)
+
+
+def ReadShieldings(NWOutpFile):
+
+    nwfile = open(NWOutpFile + '.nwo', 'r')
+    NWOutp = nwfile.readlines()
+
+    index = 0
+    shieldings = []
+    labels = []
+    energies = []
+
+    #Find the NMR shielding calculation section
+    while not 'Chemical Shielding' in NWOutp[index]:
+        index = index + 1
+
+    for line in NWOutp:
+        if 'Total DFT energy' in line:
+            start = line.index('Total')
+            energies.append(float(line[start+19:]))
+
+    #Read shielding constants and labels
+    for line in NWOutp[index:]:
+        if 'isotropic' in line:
+            start = line.index('isotropic')
+            shieldings.append(float(line[start+13:]))
+        if 'Atom' in line:
+            start = line.index('Atom')
+            labels.append(line[start+12] + line[start+7:start+10].strip())
+
+    nwfile.close()
+
+    return labels, shieldings, energies[-1]
+
+if __name__ == '__main__':
+    #print sys.argv
+    cpArgs = sys.argv[1:]
+    main(*cpArgs)
diff --git a/sdf2tinkerxyz b/sdf2tinkerxyz
new file mode 100755
index 0000000..9038eeb
Binary files /dev/null and b/sdf2tinkerxyz differ