From ee743f593e2f3556ae602400a979ca073e94eb2d Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Tue, 2 May 2023 10:29:01 -0700 Subject: [PATCH 01/30] Updates to internal.py and molecule.py for interpolation --- geometric/internal.py | 696 ++++++++++++++++++++++++++++++++++++------ geometric/molecule.py | 55 +++- 2 files changed, 639 insertions(+), 112 deletions(-) diff --git a/geometric/internal.py b/geometric/internal.py index d1b970b6..2bbc12d8 100644 --- a/geometric/internal.py +++ b/geometric/internal.py @@ -46,7 +46,7 @@ from geometric.molecule import Molecule, Elements, Radii from geometric.nifty import click, commadash, ang2bohr, bohr2ang, logger, pvec1d, pmat2d -from geometric.rotate import get_expmap, get_expmap_der, calc_rot_vec_diff, get_quat, build_F, sorted_eigh +from geometric.rotate import get_expmap, get_expmap_der, calc_rot_vec_diff, get_quat, build_F, sorted_eigh, is_linear ## Some vector calculus functions def unit_vector(a): @@ -154,7 +154,7 @@ def calcDiff(self, xyz1, xyz2=None, val2=None): """ Return the difference of the internal coordinate calculated for c(xyz1) - c(xyz2) or c(xyz1) - val2. - + Parameters ---------- xyz1 : np.ndarray @@ -188,7 +188,12 @@ def calcDiff(self, xyz1, xyz2=None, val2=None): diff = Minus2Pi diff *= w return diff - + + def diagnostic(self, xyz): + result = 0 + message = "Diagnostic message not implemented for this coordinate" + return result, message + class CartesianX(PrimitiveCoordinate): def __init__(self, a, w=1.0): self.a = a @@ -203,8 +208,8 @@ def __repr__(self): def __eq__(self, other): if type(self) is not type(other): return False eq = self.a == other.a - if eq and self.w != other.w: - logger.warning("Warning: CartesianX same atoms, different weights (%.4f %.4f)\n" % (self.w, other.w)) + # if eq and self.w != other.w: + # logger.warning("Warning: CartesianX same atoms, different weights (%.4f %.4f)\n" % (self.w, other.w)) return eq def __ne__(self, other): @@ -240,8 +245,8 @@ def __repr__(self): def __eq__(self, other): if type(self) is not type(other): return False eq = self.a == other.a - if eq and self.w != other.w: - logger.warning("Warning: CartesianY same atoms, different weights (%.4f %.4f)\n" % (self.w, other.w)) + # if eq and self.w != other.w: + # logger.warning("Warning: CartesianY same atoms, different weights (%.4f %.4f)\n" % (self.w, other.w)) return eq def __ne__(self, other): @@ -277,8 +282,8 @@ def __repr__(self): def __eq__(self, other): if type(self) is not type(other): return False eq = self.a == other.a - if eq and self.w != other.w: - logger.warning("Warning: CartesianZ same atoms, different weights (%.4f %.4f)\n" % (self.w, other.w)) + # if eq and self.w != other.w: + # logger.warning("Warning: CartesianZ same atoms, different weights (%.4f %.4f)\n" % (self.w, other.w)) return eq def __ne__(self, other): @@ -302,8 +307,8 @@ def second_derivative(self, xyz): class TranslationX(PrimitiveCoordinate): def __init__(self, a, w): - self.a = a - self.w = w + self.a = a[:] + self.w = w.copy() assert len(a) == len(w) self.isAngular = False self.isPeriodic = False @@ -315,9 +320,9 @@ def __repr__(self): def __eq__(self, other): if type(self) is not type(other): return False eq = set(self.a) == set(other.a) - if eq and np.sum((self.w-other.w)**2) > 1e-6: - logger.warning("Warning: TranslationX same atoms, different weights\n") - eq = False + # if eq and np.sum((self.w-other.w)**2) > 1e-6: + # logger.warning("Warning: TranslationX same atoms, different weights\n") + # eq = False return eq def __ne__(self, other): @@ -352,10 +357,10 @@ def second_derivative(self, xyz): deriv2 = np.zeros((xyz.shape[0], xyz.shape[1], xyz.shape[0], xyz.shape[1])) return deriv2 -class TranslationY(object): +class TranslationY(PrimitiveCoordinate): def __init__(self, a, w): - self.a = a - self.w = w + self.a = a[:] + self.w = w.copy() assert len(a) == len(w) self.isAngular = False self.isPeriodic = False @@ -367,9 +372,9 @@ def __repr__(self): def __eq__(self, other): if type(self) is not type(other): return False eq = set(self.a) == set(other.a) - if eq and np.sum((self.w-other.w)**2) > 1e-6: - logger.warning("Warning: TranslationY same atoms, different weights\n") - eq = False + # if eq and np.sum((self.w-other.w)**2) > 1e-6: + # logger.warning("Warning: TranslationY same atoms, different weights\n") + # eq = False return eq def __ne__(self, other): @@ -406,8 +411,8 @@ def second_derivative(self, xyz): class TranslationZ(PrimitiveCoordinate): def __init__(self, a, w): - self.a = a - self.w = w + self.a = a[:] + self.w = w.copy() assert len(a) == len(w) self.isAngular = False self.isPeriodic = False @@ -419,9 +424,9 @@ def __repr__(self): def __eq__(self, other): if type(self) is not type(other): return False eq = set(self.a) == set(other.a) - if eq and np.sum((self.w-other.w)**2) > 1e-6: - logger.warning("Warning: TranslationZ same atoms, different weights\n") - eq = False + # if eq and np.sum((self.w-other.w)**2) > 1e-6: + # logger.warning("Warning: TranslationZ same atoms, different weights\n") + # eq = False return eq def __ne__(self, other): @@ -762,16 +767,21 @@ def second_derivative(self, xyz): return second_derivatives class Distance(PrimitiveCoordinate): - def __init__(self, a, b): + def __init__(self, a, b, rab=1.0, n=1.0): self.a = a self.b = b + self.rab = rab + self.n = n if a == b: raise RuntimeError('a and b must be different') self.isAngular = False self.isPeriodic = False def __repr__(self): - return "Distance %i-%i" % (self.a+1, self.b+1) + if self.rab != 1.0 or self.n != 1.0: + return "Distance %i-%i (rab=%.3f n=%.3f)" % (self.a+1, self.b+1, self.rab, self.n) + else: + return "Distance %i-%i" % (self.a+1, self.b+1) def __eq__(self, other): if type(self) is not type(other): return False @@ -790,32 +800,186 @@ def value(self, xyz): xyz = xyz.reshape(-1,3) a = self.a b = self.b - return np.sqrt(np.sum((xyz[a]-xyz[b])**2)) + rab = self.rab + n = self.n + r = np.linalg.norm(xyz[a] - xyz[b]) + answer = (r/rab)**n + return answer def derivative(self, xyz): xyz = xyz.reshape(-1,3) + a = self.a + b = self.b + rab = self.rab + n = self.n + r = np.linalg.norm(xyz[a] - xyz[b]) derivatives = np.zeros_like(xyz) - m = self.a - n = self.b - u = (xyz[m] - xyz[n]) / np.linalg.norm(xyz[m] - xyz[n]) - derivatives[m, :] = u - derivatives[n, :] = -u + u = (xyz[a] - xyz[b]) / r + prefactor = n * r ** (n-1) / rab ** n + derivatives[a, :] = u * prefactor + derivatives[b, :] = -u * prefactor return derivatives def second_derivative(self, xyz): xyz = xyz.reshape(-1,3) + a = self.a + b = self.b + rab = self.rab + n = self.n + r = np.linalg.norm(xyz[a] - xyz[b]) deriv2 = np.zeros((xyz.shape[0], xyz.shape[1], xyz.shape[0], xyz.shape[1])) - m = self.a - n = self.b - l = np.linalg.norm(xyz[m] - xyz[n]) - u = (xyz[m] - xyz[n]) / l - mtx = (np.outer(u, u) - np.eye(3))/l - deriv2[m, :, m, :] = -mtx - deriv2[n, :, n, :] = -mtx - deriv2[m, :, n, :] = mtx - deriv2[n, :, m, :] = mtx + u = (xyz[a] - xyz[b]) / r + mtx = (np.outer(u, u) - np.eye(3)) / r + prefactor = n * r ** (n-1) / rab ** n + prefactor2 = n * (n-1) * r ** (n-2) / rab ** n + deriv2[a, :, a, :] = -mtx * prefactor + np.outer(u, u) * prefactor2 + deriv2[b, :, b, :] = -mtx * prefactor + np.outer(u, u) * prefactor2 + deriv2[a, :, b, :] = mtx * prefactor - np.outer(u, u) * prefactor2 + deriv2[b, :, a, :] = mtx * prefactor - np.outer(u, u) * prefactor2 return deriv2 - + +class DistanceDifference(PrimitiveCoordinate): + def __init__(self, a, b, c): + self.a = a + self.b = b + self.c = c + self.isAngular = False + self.isPeriodic = False + + def __repr__(self): + return "DistanceDifference %i-%i-%i" % (self.a+1, self.b+1, self.c+1) + + def __eq__(self, other): + if type(self) is not type(other): return False + if self.b == other.b: + if self.a == other.a: + if self.c == other.c: + return True + if self.a == other.c: + if self.c == other.a: + return True + return False + + def __ne__(self, other): + return not self.__eq__(other) + + def value(self, xyz): + xyz = xyz.reshape(-1, 3) + a = self.a + b = self.b + c = self.c + r1 = np.linalg.norm(xyz[a]-xyz[b]) + r2 = np.linalg.norm(xyz[b]-xyz[c]) + # print("DistanceDifference %i-%i-%i Value=%.5f" % (self.a+1, self.b+1, self.c+1, r1-r2)) + return r1-r2 + + def derivative(self, xyz): + xyz = xyz.reshape(-1, 3) + derivatives = np.zeros_like(xyz) + a = self.a + b = self.b + c = self.c + + u = (xyz[a] - xyz[b]) / np.linalg.norm(xyz[a] - xyz[b]) + v = (xyz[b] - xyz[c]) / np.linalg.norm(xyz[b] - xyz[c]) + + derivatives[a,:] = u + derivatives[b,:] = -u-v + derivatives[c,:] = v + + return derivatives + +class ReducedDistance(PrimitiveCoordinate): + def __init__(self, a, b, c, rab=1.0, rbc=1.0, n=1.0): + self.a = a + self.b = b + self.c = c + # Equilibrium bond length between a and b + self.rab = rab + # Equilibrium bond length between b and c + self.rbc = rbc + # "Power" to use when computing the IC (higher power gives a more square 'surface') + self.n = n + self.isAngular = False + self.isPeriodic = False + + def __repr__(self): + return "ReducedDistance %i-%i-%i" % (self.a + 1, self.b + 1, self.c + 1) + + def __eq__(self, other): + if type(self) is not type(other): return False + if self.b == other.b: + if self.a == other.a: + if self.c == other.c: + return True + if self.a == other.c: + if self.c == other.a: + return True + return False + + def __ne__(self, other): + return not self.__eq__(other) + + def value(self, xyz): + xyz = xyz.reshape(-1, 3) + a = self.a + b = self.b + c = self.c + n = self.n + rab = self.rab + rbc = self.rbc + r1 = np.linalg.norm(xyz[b] - xyz[a]) + r2 = np.linalg.norm(xyz[b] - xyz[c]) + r1n = r1/rab + r2n = r2/rbc + answer = r1n**n * r2n**n/(r1n**n + r2n**n) + # print("ReducedDistance %2i-%2i-%2i" % (self.a+1, self.b+1, self.c+1), end=' ') + # print("Dist Cov Ratio a-b: %8.3f %8.3f %8.3f" % (r1*bohr2ang, rab*bohr2ang, r1/rab), end=' ') + # print("b-c: %8.3f %8.3f %8.3f" % (r2*bohr2ang, rbc*bohr2ang, r2/rbc), end=' ') + # print("Value: %8.3f" % answer) + return answer + + def derivative(self, xyz): + xyz = xyz.reshape(-1, 3) + derivatives = np.zeros_like(xyz) + a = self.a + b = self.b + c = self.c + n = self.n + rab = self.rab + rbc = self.rbc + + u_vec = xyz[b] - xyz[a] + v_vec = xyz[b] - xyz[c] + r1 = np.linalg.norm(u_vec) + r2 = np.linalg.norm(v_vec) + + # Unit vector that points from a to b, equal to dr1/dxyz[b], or -dr1/dxyz[a] + eu = u_vec/r1 + # Unit vector that points from c to b, equal to dr1/dxyz[b], or -dr1/dxyz[c] + ev = v_vec/r2 + + # Numerator in applying the quotient rule + numer = r1**n * r2**n + # Denominator in applying the quotient rule + denom = rbc**n * r1**n + rab**n * r2**n + # This is f'g/g^2 in the quotient rule + derivatives[a, :] -= n*(r1**(n-1) * r2**n * eu)/denom + derivatives[b, :] += n*(r1**(n-1) * r2**n * eu + r1**n * r2**(n-1) * ev)/denom + derivatives[c, :] -= n*(r1**n * r2**(n-1) * ev)/denom + # This is -fg'/g^2 in the quotient rule + # Double negative for [a, :] comes from the -fg' and the -eu + derivatives[a, :] += numer*n*(rbc**n * r1**(n-1) * eu)/denom**2 + derivatives[b, :] -= numer*n*(rbc**n * r1**(n-1) * eu + rab**n * r2**(n-1) * ev)/denom**2 + derivatives[c, :] += numer*n*(rab**n * r2**(n-1) * ev)/denom**2 + # r1r2 = r1+r2 + + # derivatives[a, :] = (-r2/r1*u_vec*r1r2 + r2*u_vec)/r1r2**2 + # derivatives[b, :] = ((r2/r1*u_vec + r1/r2*v_vec)*r1r2 - r2*u_vec - r1*v_vec)/r1r2**2 + # derivatives[c, :] = (-r1/r2*v_vec*r1r2 + r1*v_vec)/r1r2**2 + + return derivatives + class Angle(PrimitiveCoordinate): def __init__(self, a, b, c): self.a = a @@ -1148,6 +1312,27 @@ def second_derivative(self, xyz): fderiv = (FPlus-FMinus)/(2*h) deriv2[ii, j, :, :] = fderiv return deriv2 + + def diagnostic(self, xyz): + # result = 3 or above: failure of the IC system is imminent + # result = 2: IC may fail for small perturbations from the provided geometry + # result = 1: IC is unsuitable for the provided geometry, should pay attention + LinThres = [0.5, 0.3, 0.1] + descrips = ["close to nonlinear", "very nonlinear", "extremely nonlinear"] + a = self.a + b = self.b + c = self.c + Ang1 = Angle(a,b,c) + result = 0 + message = "no problems detected" + cos1 = np.abs(np.cos(Ang1.value(xyz))) + for i in range(len(LinThres)): + thre = LinThres[i] + desc = descrips[i] + if cos1 < thre: + result = i+1 + message = "%s angle is %s (|cos(theta)| %.4f < %.4f threshold)" % (repr(Ang1), desc, cos1, thre) + return result, message class MultiAngle(PrimitiveCoordinate): # pragma: no cover def __init__(self, a, b, c): @@ -1450,6 +1635,37 @@ def zeta(a_, m_, n_): # printTerm('1x1y', 3.33702e-01) # printTerm('1x1z', 5.90389e-02) + def diagnostic(self, xyz): + # result = 3 or above: failure of the IC system is imminent + # result = 2: IC may fail for small perturbations from the provided geometry + # result = 1: IC is unsuitable for the provided geometry, should pay attention + LinThres = [0.95, 0.98, 0.995] + descrips = ["close to linear", "very close to linear", "extremely close to linear"] + a = self.a + b = self.b + c = self.c + d = self.d + Ang1 = Angle(a,b,c) + Ang2 = Angle(b,c,d) + result = 0 + message = "no problems detected" + cos1 = np.abs(np.cos(Ang1.value(xyz))) + cos2 = np.abs(np.cos(Ang2.value(xyz))) + cos3 = np.abs(np.dot(Ang1.normal_vector(xyz), Ang2.normal_vector(xyz))) + for i in range(len(LinThres)): + thre = LinThres[i] + desc = descrips[i] + if cos1 > thre and cos2 > thre: + result = i+2 + message = "%s and %s angles are %s (|cos(theta)| %.4f, %.4f > %.4f threshold)" % (repr(Ang1), repr(Ang2), desc, cos1, cos2, thre) + elif cos1 > thre: + result = i+1 + message = "%s angle is %s (|cos(theta)| %.4f > %.4f threshold)" % (repr(Ang1), desc, cos1, thre) + elif cos2 > thre: + result = i+1 + message = "%s angle is %s (|cos(theta)| %.4f > %.4f threshold)" % (repr(Ang2), desc, cos2, thre) + return result, message + class MultiDihedral(PrimitiveCoordinate): # pragma: no cover def __init__(self, a, b, c, d): if type(a) is int: @@ -1571,8 +1787,8 @@ def __eq__(self, other): if type(self) is not type(other): return False if self.a == other.a: if {self.b, self.c, self.d} == {other.b, other.c, other.d}: - if [self.b, self.c, self.d] != [other.b, other.c, other.d]: - logger.warning("Warning: OutOfPlane atoms are the same, ordering is different\n") + # if [self.b, self.c, self.d] != [other.b, other.c, other.d]: + # logger.warning("Warning: OutOfPlane atoms are the same, ordering is different\n") return True # if self.b == other.b: # if self.c == other.c: @@ -1663,6 +1879,37 @@ def second_derivative(self, xyz): deriv2[ii, j, :, :] = fderiv return deriv2 + def diagnostic(self, xyz): + # result = 3 or above: failure of the IC system is imminent + # result = 2: IC may fail for small perturbations from the provided geometry + # result = 1: IC is unsuitable for the provided geometry, should pay attention + LinThres = [0.95, 0.98, 0.99] + descrips = ["close to linear", "very close to linear", "extremely close to linear"] + a = self.a + b = self.b + c = self.c + d = self.d + Ang1 = Angle(a,b,c) + Ang2 = Angle(b,c,d) + result = 0 + message = "no problems detected" + cos1 = np.abs(np.cos(Ang1.value(xyz))) + cos2 = np.abs(np.cos(Ang2.value(xyz))) + cos3 = np.abs(np.dot(Ang1.normal_vector(xyz), Ang2.normal_vector(xyz))) + for i in range(len(LinThres)): + thre = LinThres[i] + desc = descrips[i] + if cos1 > thre and cos2 > thre: + result = i+2 + message = "%s and %s angles are %s (|cos(theta)| %.4f, %.4f > %.4f threshold)" % (repr(Ang1), repr(Ang2), desc, cos1, cos2, thre) + elif cos1 > thre: + result = i+1 + message = "%s angle is %s (|cos(theta)| %.4f > %.4f threshold)" % (repr(Ang1), desc, cos1, thre) + elif cos2 > thre: + result = i+1 + message = "%s angle is %s (|cos(theta)| %.4f > %.4f threshold)" % (repr(Ang2), desc, cos2, thre) + return result, message + def convert_angstroms_degrees(prims, values): """ Convert values of primitive ICs (or differences) from weighted atomic units to Angstroms and degrees. """ @@ -1740,6 +1987,8 @@ def GInverse_SVD(self, xyz): # Perform singular value decomposition click() loops = 0 + # The expected number of nonzero eigenvalues. + Expect = self.expected_dof() while True: try: G = self.GMatrix(xyz) @@ -1760,15 +2009,36 @@ def GInverse_SVD(self, xyz): Sinv = np.zeros_like(S) LargeVals = 0 for ival, value in enumerate(S): - # print "%.5e % .5e" % (ival,value) + # if ival == (3*xyz.shape[0]-1): + # print("SVD: %5i % .5e" % (ival,value)) if np.abs(value) > 1e-6: LargeVals += 1 Sinv[ival] = 1/value - # print "%i atoms; %i/%i singular values are > 1e-6" % (xyz.shape[0], LargeVals, len(S)) + # print("SVD Condition Number: % .5e" % (S[0]/S[3*xyz.shape[0]-1])) + # if LargeVals < 3*xyz.shape[0]: + # print("%3i degrees of freedom; %i/%i singular values are > 1e-6" % (3*xyz.shape[0], LargeVals, len(S))) Sinv = np.diag(Sinv) Inv = multi_dot([V, Sinv, UT]) + if G.shape[0] < Expect: + # Return a very large number if the G-matrix dimension is actually smaller than expected. + # This can happen if we lost a DoF at the stage of forming the DLCs. + self.GCond_Inverse = S[0]/S[-1] + else: + self.GCond_Inverse = S[0]/S[Expect-1] return Inv + def G_condition(self, xyz): + # Calculate the condition number of the G matrix. + G = self.GMatrix(xyz) + # The expected number of nonzero eigenvalues. + # Expect = self.expected_dof() + # if G.shape[0] < Expect: + # # Return a very large number if the G-matrix dimension is actually smaller than expected. + # # This can happen if we lost a DoF at the stage of forming the DLCs. + # return 1e15 + U, S, VT = np.linalg.svd(G) + return S[0]/S[-1] + def GInverse_EIG(self, xyz): # pragma: no cover # Currently unused function, but could possibly speed up calculations # if used instead of SVD. Needs testing for reliability. @@ -1930,65 +2200,86 @@ def writeCache(self, xyz, dQ, newxyz): self.stored_newxyz = newxyz.copy() def newCartesian(self, xyz, dQ, verbose=True): + # Perform iterations to find new coordinates starting from xyz + # with the requested internal coordinate change dQ. cached = self.readCache(xyz, dQ) if cached is not None: - # print "Returning cached result" return cached + # The coordinates at the start of each iteration xyz1 = xyz.copy() + # The required IC change at the start of each iteration dQ1 = dQ.copy() - # Iterate until convergence: + # Iteration counter microiter = 0 - ndqs = [] + # The history of the required IC changes; should converge with iteration nr. + ndqs = [np.linalg.norm(dQ)] + # The current-best (smallest) IC change out of all iterations taken + ndqt = ndqs[0] + # The RMSD of the Cartesian step taken rmsds = [] + # Whether we failed to get the desired step self.bork = False # Damping factor damp = 1.0 # Function to exit from loop - if verbose >= 2: logger.info(" InternalCoordinates.newCartesian converting internal to Cartesian step\n") + if verbose >= 2: logger.info(" InternalCoordinates.newCartesian converting internal to Cartesian step (desired |dQ| = %.3e)\n" % np.linalg.norm(dQ)) def finish(microiter, rmsdt, ndqt, xyzsave, xyz_iter1): + # This function returns the result, and is called from inside the iterations. if ndqt > 1e-1: - if verbose: logger.info(" newCartesian Iter: %i Failed to obtain coordinates (rmsd = %.3e |dQ| = %.3e)\n" % (microiter, rmsdt, ndqt)) + if verbose: logger.info(" newCartesian Iter: %i Failed to obtain coordinates (rmsd = %.3e Err-dQ = %.3e)\n" % (microiter, rmsdt, ndqt)) self.bork = True self.writeCache(xyz, dQ, xyz_iter1) + # Return the result after taking just one step. return xyz_iter1.flatten() elif ndqt > 1e-3: - if verbose: logger.info(" newCartesian Iter: %i Approximate coordinates obtained (rmsd = %.3e |dQ| = %.3e)\n" % (microiter, rmsdt, ndqt)) + if verbose: logger.info(" newCartesian Iter: %i Approximate coordinates obtained (rmsd = %.3e Err-dQ = %.3e)\n" % (microiter, rmsdt, ndqt)) else: - if verbose: logger.info(" newCartesian Iter: %i Cartesian coordinates obtained (rmsd = %.3e |dQ| = %.3e)\n" % (microiter, rmsdt, ndqt)) + if verbose: logger.info(" newCartesian Iter: %i Cartesian coordinates obtained (rmsd = %.3e Err-dQ = %.3e)\n" % (microiter, rmsdt, ndqt)) + # Return the final result. self.writeCache(xyz, dQ, xyzsave) return xyzsave.flatten() fail_counter = 0 + # Start iterations. while True: microiter += 1 + # Get generalized inverse of Wilson B-matrix Bmat = self.wilsonB(xyz1) Ginv = self.GInverse(xyz1) # Get new Cartesian coordinates dxyz = damp*multi_dot([Bmat.T,Ginv,dQ1.T]) xyz2 = xyz1 + np.array(dxyz).flatten() + rmsd = np.sqrt(np.mean((np.array(xyz2-xyz1).flatten())**2)) + # The coordinates after the first step are saved no matter good or bad. if microiter == 1: xyzsave = xyz2.copy() xyz_iter1 = xyz2.copy() + rmsdt = rmsd # Calculate the actual change in internal coordinates dQ_actual = self.calcDiff(xyz2, xyz1) - rmsd = np.sqrt(np.mean((np.array(xyz2-xyz1).flatten())**2)) + if hasattr(self, 'Prims'): + Prims = self.Prims + else: + Prims = self + dQ_Prims = Prims.calcDiff(xyz2, xyz1) + # dQ1-dQ_actual is the remaining IC step needed to achieve convergence ndq = np.linalg.norm(dQ1-dQ_actual) - if len(ndqs) > 0: - if ndq > ndqt: - if verbose >= 2: logger.info(" newCartesian Iter: %i Err-dQ (Best) = %.5e (%.5e) RMSD: %.5e Damp: %.5e (Bad)\n" % (microiter, ndq, ndqt, rmsd, damp)) - damp /= 2 - fail_counter += 1 - # xyz2 = xyz1.copy() - else: - if verbose >= 2: logger.info(" newCartesian Iter: %i Err-dQ (Best) = %.5e (%.5e) RMSD: %.5e Damp: %.5e (Good)\n" % (microiter, ndq, ndqt, rmsd, damp)) - fail_counter = 0 - damp = min(damp*1.2, 1.0) - rmsdt = rmsd - ndqt = ndq - xyzsave = xyz2.copy() + if ndq > ndqt: + # Bad result: |dQ1-dQ_actual| increases from previous iteration + # Discard the IC step and retry with damping. + if verbose >= 2: logger.info(" newCartesian Iter: %i |dQ(Step)| = %.5e Err-dQ (Best) = %.5e (%.5e) RMSD: %.5e Damp: %.5e Gcond: %.5e (Bad)\n" + % (microiter, np.linalg.norm(dQ_actual), ndq, ndqt, rmsd, damp, self.GCond_Inverse)) + damp /= 2 + fail_counter += 1 + xyz2 = xyz1.copy() + dQ_actual *= 0 else: - if verbose >= 2: logger.info(" newCartesian Iter: %i Err-dQ = %.5e RMSD: %.5e Damp: %.5e\n" % (microiter, ndq, rmsd, damp)) + if verbose >= 2: logger.info(" newCartesian Iter: %i |dQ(Step)| = %.5e Err-dQ (Best) = %.5e (%.5e) RMSD: %.5e Damp: %.5e Gcond: %.5e (Good)\n" + % (microiter, np.linalg.norm(dQ_actual), ndq, ndqt, rmsd, damp, self.GCond_Inverse)) + fail_counter = 0 + damp = min(damp*1.2, 1.0) rmsdt = rmsd ndqt = ndq + xyzsave = xyz2.copy() ndqs.append(ndq) rmsds.append(rmsd) # Check convergence / fail criteria @@ -1998,18 +2289,26 @@ def finish(microiter, rmsdt, ndqt, xyzsave, xyz_iter1): return finish(microiter, rmsdt, ndqt, xyzsave, xyz_iter1) if microiter == 50: return finish(microiter, rmsdt, ndqt, xyzsave, xyz_iter1) - # Figure out the further change needed + # The next IC step required to reach convergence. dQ1 = dQ1 - dQ_actual xyz1 = xyz2.copy() - + class PrimitiveInternalCoordinates(InternalCoordinates): - def __init__(self, molecule, connect=False, addcart=False, constraints=None, cvals=None, **kwargs): + def __init__(self, molecule, connect=False, addcart=False, constraints=None, cvals=None, repulsions=[], transfers=[], connect_isolated=True, **kwargs): super(PrimitiveInternalCoordinates, self).__init__() self.connect = connect self.addcart = addcart + self.connect_isolated = connect_isolated self.Internals = [] + self.repulsions = repulsions + # self.nbpairs = nbpairs + # Atom transfer from donor to acceptor + # This should be a list of 3-tuples as (D, H, A) + # where D-H......A becomes D......H-A + self.transfers = transfers self.cPrims = [] self.cVals = [] + self.sync = [] self.Rotators = OrderedDict() self.elem = molecule.elem # List of fragments as determined by residue ID, distance criteria or bond order @@ -2022,11 +2321,33 @@ def __init__(self, molecule, connect=False, addcart=False, constraints=None, cva # Note that reorderPrimitives() _must_ be updated with each new InternalCoordinate class written. self.reorderPrimitives() + def expected_dof(self, xyz=None, linear_check=False): + """ Return the expected number of nonredundant degrees of freedom. """ + natoms = len(self.elem) + if self.addcart or (not self.connect): + # if self.addcart is true, then a full set of Cartesian coords is added (HDLC). + # if self.connect is false, then it is either HDLC (addcart=True) or TRIC (addcart=False). + # In these cases we expect 3*natoms degrees of freedom. + return 3*natoms + else: + # These situations correspond to ICs that do not span overall translation and rotation. + if natoms == 2 or (linear_check and is_linear(xyz, xyz)): + # When the entire system is linear, there are 3n-5 DoFs + return 3*natoms-5 + else: + # Otherwise, there are 3n-6 DoFs + return 3*natoms-6 + def makePrimitives(self, molecule, connect, addcart): # force_bonds=False is set because we don't want to override # bond order-based bonds that may have been obtained earlier. molecule.build_topology(force_bonds=False) - connect_isolated = True + transfer_atoms = [] + transfer_bonds = [] + for (d, h, a) in self.transfers: + transfer_bonds.append((min(d, h), max(d, h))) + transfer_bonds.append((min(a, h), max(a, h))) + transfer_atoms.append(h) if 'resid' in molecule.Data.keys(): # Create fragments corresponding to unique resID numbers if provided. frags = [] @@ -2047,11 +2368,12 @@ def makePrimitives(self, molecule, connect, addcart): else: # Create fragments based on connectivity in provided molecule object. frags = [list(m.nodes()) for m in molecule.molecules] - if connect_isolated: + if self.connect_isolated: isolates = [list(m.nodes())[0] for m in molecule.molecules if len(m.nodes()) == 1] for i in isolates: + # if i in transfer_atoms: continue j = molecule.get_closest_atom(i, pbc=False)[0] - logger.info("Creating artifical bond to isolated atom: %i-%i\n" % (i+1, j+1)) + # logger.info("Creating artifical bond to isolated atom: %i-%i\n" % (i+1, j+1)) molecule.bonds.append((i, j)) old_read_bonds = molecule.top_settings['read_bonds'] molecule.top_settings['read_bonds'] = True @@ -2165,7 +2487,60 @@ def makePrimitives(self, molecule, connect, addcart): # Add an internal coordinate for bonded atom pairs for (a, b) in molecule.topology.edges(): + bond = (min(a, b), max(a, b)) + if bond in transfer_bonds: + continue self.add(Distance(a, b)) + + for (a, b) in self.repulsions: + (a, b) = (min(a, b), max(a, b)) + if a >= len(self.elem) or b >= len(self.elem): + raise RuntimeError("The provided repulsion %i,%i is greater than the number of atoms in the system" % (a, b)) + rab = (Radii[Elements.index(self.elem[a])-1] + Radii[Elements.index(self.elem[b])-1])*ang2bohr + if Distance(a, b) in self.Internals: + self.delete(Distance(a, b)) + # raise RuntimeError("Repulsion %i-%i is in existing list of Distances" % (a, b)) + # Replace the distance with a repulsion + # This has caused lack of convergence issues, so it's commented out. + # Prim = self.Internals[self.Internals.index((Distance(a, b)))] + # Prim.rab = 4*rab + # Prim.n = -1 + # else: + self.add(Distance(a, b, rab=4*rab, n=-1)) + # self.add(Exponential(a, b, rab=1.6*rab, alpha=1.6, w=1.0)) + for TranslationType in [TranslationX, TranslationY, TranslationZ]: + APrims = [] + BPrims = [] + for Prim in [p for p in self.Internals if type(p) is TranslationType]: + if a in Prim.a: + APrims.append(Prim) + if b in Prim.a: + BPrims.append(Prim) + if len(APrims) != 1 or len(BPrims) != 1: continue + APrim = APrims[0] + BPrim = BPrims[0] + if APrim.a != BPrim.a: + if len(APrim.a) >= len(BPrim.a): + BPrim.a.append(a) + BPrim.w = np.append(BPrim.w, -1.0) + # BPrim.a += APrim.a[:] + # BPrim.w = np.append(BPrim.w, -APrim.w) + # print("New translation coordinate:") + # print(BPrim.a, BPrim.w) + else: + APrim.a.append(b) + APrim.w = np.append(APrim.w, -1.0) + # APrim.a += BPrim.a[:] + # APrim.w = np.append(APrim.w, -BPrim.w) + # print("New translation coordinate:") + # print(APrim.a, APrim.w) + + for (a, b, c) in self.transfers: + self.add(DistanceDifference(a, b, c)) + rab = (Radii[Elements.index(self.elem[a])-1] + Radii[Elements.index(self.elem[b])-1])*ang2bohr + rbc = (Radii[Elements.index(self.elem[b])-1] + Radii[Elements.index(self.elem[c])-1])*ang2bohr + rac = (Radii[Elements.index(self.elem[a])-1] + Radii[Elements.index(self.elem[c])-1])*ang2bohr + self.add(ReducedDistance(a, b, c, rab=rab, rbc=rbc, n=2)) # Linear angle threshold - corresponds to about 162 degrees. LinThre = 0.95 @@ -2217,17 +2592,44 @@ def makePrimitives(self, molecule, connect, addcart): nnc = (min(a, b), max(a, b)) in noncov nnc += (min(b, c), max(b, c)) in noncov nnc += (min(b, d), max(b, d)) in noncov - # if nnc >= 1: continue + candidates = [] + maxdists = [] + # First pass: build the list of candidate angles that we can replace with an OutOfPlane + for i, j, k in sorted(list(itertools.permutations([a, c, d], 3))): + Ang1 = Angle(b,i,j) + Ang2 = Angle(i,j,k) + rbi = (Radii[Elements.index(self.elem[b])-1] + Radii[Elements.index(self.elem[i])-1]) + rbj = (Radii[Elements.index(self.elem[b])-1] + Radii[Elements.index(self.elem[j])-1]) + maxdist = max(Distance(b, i).value(coords)/rbi, Distance(b, j).value(coords)/rbj) + if np.abs(np.cos(Ang1.value(coords))) > LinThre: continue + if np.abs(np.cos(Ang2.value(coords))) > LinThre: continue + if np.abs(np.dot(Ang1.normal_vector(coords), Ang2.normal_vector(coords))) < LinThre: continue + if any([set(x).issubset(set([a, b, c, d])) for x in self.transfers]): continue + # self.delete(Angle(i, b, j)) + # self.add(OutOfPlane(b, i, j, k)) + # break + candidates.append((i, b, j, k)) + maxdists.append(maxdist) + if not candidates: continue + # Second pass: make sure to remove an angle that has close to the maximum distance + # but don't switch if the ordering of distances is swapped minutely for i, j, k in sorted(list(itertools.permutations([a, c, d], 3))): Ang1 = Angle(b,i,j) Ang2 = Angle(i,j,k) + rbi = (Radii[Elements.index(self.elem[b])-1] + Radii[Elements.index(self.elem[i])-1]) + rbj = (Radii[Elements.index(self.elem[b])-1] + Radii[Elements.index(self.elem[j])-1]) + maxdist = max(Distance(b, i).value(coords)/rbi, Distance(b, j).value(coords)/rbj) if np.abs(np.cos(Ang1.value(coords))) > LinThre: continue if np.abs(np.cos(Ang2.value(coords))) > LinThre: continue - if np.abs(np.dot(Ang1.normal_vector(coords), Ang2.normal_vector(coords))) > LinThre: - self.delete(Angle(i, b, j)) - self.add(OutOfPlane(b, i, j, k)) - break - + if np.abs(np.dot(Ang1.normal_vector(coords), Ang2.normal_vector(coords))) < LinThre: continue + if any([set(x).issubset(set([a, b, c, d])) for x in self.transfers]): continue + if maxdist <= 0.8*max(maxdists): continue + # print("Deleting Angle %i-%i-%i for OutOfPlane %i-%i-%i-%i; maxdist %.3f (%.1f%% of %.3f)" + # % (i+1, b+1, j+1, b+1, i+1, j+1, k+1, maxdist, 100*maxdist/max(maxdists), max(maxdists))) + if Angle(i, b, j) in self.Internals: self.delete(Angle(i, b, j)) + self.add(OutOfPlane(b, i, j, k)) + break + # Find groups of atoms that are in straight lines atom_lines = [list(i) for i in molecule.topology.edges()] while True: @@ -2265,6 +2667,7 @@ def makePrimitives(self, molecule, connect, addcart): # print "Lines of three or more atoms:", ', '.join(['-'.join(["%i" % (i+1) for i in l]) for l in lthree]) # Normal dihedral code + dihedral_idx = [] for aline in atom_lines_uniq: # Go over ALL pairs of atoms in a line for (b, c) in itertools.combinations(aline, 2): @@ -2285,8 +2688,12 @@ def makePrimitives(self, molecule, connect, addcart): # (should be eliminated already) if np.abs(np.cos(Ang1.value(coords))) > LinThre: continue if np.abs(np.cos(Ang2.value(coords))) > LinThre: continue + # if any([set(x).issubset(set([a, b, c, d])) for x in self.transfers]): + # logger.info("Skipping dihedral %i-%i-%i-%i due to transfer" % (a+1, b+1, c+1, d+1)) + # continue self.add(Dihedral(a, b, c, d)) - + dihedral_idx.append((a, b, c, d)) + ### Following are codes that evaluate angles and dihedrals involving entire lines-of-atoms ### as single degrees of freedom ### Unfortunately, they do not seem to improve the performance @@ -2455,6 +2862,26 @@ def resetRotations(self, xyz): for rot in self.Rotators.values(): rot.reset(xyz) + def diagnostics(self, xyz, verbose=False, print_thre=3, full_output=False): + warning_results = [] + warning_messages = [] + header = 0 + print_str = "" + for i, Internal in enumerate(self.Internals): + result, message = Internal.diagnostic(xyz) + warning_results.append(result) + warning_messages.append(message) + if result >= print_thre: + if not header: + print_str += "%5s %50s %6s %-50s\n" % ("#N", "Primitive-IC", "Status", "Message") + header = True + print_str += "%5i %50s %6i %-50s\n" % (i, repr(Internal), result, message) + if verbose: logger.info(print_str) + if full_output: + return max(warning_results), print_str, warning_results, warning_messages + else: + return max(warning_results), print_str + def torsionConstraintLinearAngles(self, coords, thre=175): """ Check if a torsion constrained optimization is about to fail @@ -2552,13 +2979,50 @@ def second_derivatives(self, xyz): # 5) 3 return np.array(answer) - def calcDiff(self, xyz1, xyz2): + def calcDiff(self, xyz1, xyz2, sync=0): """ Calculate difference in internal coordinates (coord1-coord2), accounting for changes in 2*pi of angles. """ answer = [] - for Internal in self.Internals: - answer.append(Internal.calcDiff(xyz1, xyz2)) + for i, Internal in enumerate(self.Internals): + diff = Internal.calcDiff(xyz1, xyz2) + if sync != 0 and i in self.sync: + # Sometimes, e.g. during interpolation, multiple dihedrals can change in opposite directions + # resulting in clashes. By turning on the sync flag we ensure the change is always in the same direction. + if sync == 1 and diff < -np.pi/2: + # print("dihedral-sync %i: %50s %.3f -> %.3f" % (sync, Internal, diff, diff+2*np.pi)) + diff += 2*np.pi + elif sync == -1 and diff > np.pi/2: + # print("dihedral-sync %i: %50s %.3f -> %.3f" % (sync, Internal, diff, diff-2*np.pi)) + diff -= 2*np.pi + elif sync not in [-1, 1]: + raise RuntimeError("Invalid value of sync") + answer.append(diff) return np.array(answer) - + + def syncDihedrals(self, xyz1, xyz2): + answer = self.calcDiff(xyz1, xyz2) + dih_by_trip = {} + for i, Prim in enumerate(self.Internals): + if type(Prim) is Dihedral: + dih_by_trip.setdefault((Prim.a, Prim.b, Prim.c), []).append(i) + dih_by_trip.setdefault((Prim.d, Prim.c, Prim.b), []).append(i) + for (a, b, c), dihIdx in dih_by_trip.items(): + # Being conservative here; only sync dihedrals where a-b-c are bonded and angles are not linear. + if np.abs(Distance(a, b).calcDiff(xyz2, xyz1)) > 0.5: continue + if np.abs(Distance(b, c).calcDiff(xyz2, xyz1)) > 0.5: continue + if np.abs(np.cos(Angle(a, b, c).value(xyz1))) > 0.95: continue + if np.abs(np.cos(Angle(a, b, c).value(xyz2))) > 0.95: continue + diffs = np.array([answer[i] for i in dihIdx]) + if all(np.abs(diffs) > np.pi/2) and len(np.unique(np.sign(diffs))) > 1: + for i in dihIdx: + self.sync.append(i) + # print("Turning on dihedral sync for", self.Internals[i]) + # self.Internals[i].sync = True + # if answer[i] < 0: + # answer[i] += 2*np.pi + # print("Dihedrals with center bond %s:" % str(bond)) + # for i in dihIdx: + # print("%50s %.3f" % (self.Internals[i], answer[i])) + def GInverse(self, xyz): return self.GInverse_SVD(xyz) @@ -2567,9 +3031,24 @@ def add(self, dof): self.Internals.append(dof) def delete(self, dof): + deleted = False for ii in range(len(self.Internals))[::-1]: if dof == self.Internals[ii]: del self.Internals[ii] + deleted = True + if not deleted: + logger.warning("Warning: Attempted to delete %s but it does not exist in this object\n" % dof) + raise RuntimeError + + def delete_type(self, doftype): + deleted = False + for ii in range(len(self.Internals))[::-1]: + if doftype == type(self.Internals[ii]): + # logger.warning("Deleting %s\n" % self.Internals[ii]) + del self.Internals[ii] + deleted = True + if not deleted: + logger.warning("Warning: Attempted to delete type %s but it does not exist in this object\n" % doftype) def addConstraint(self, cPrim, cVal=None, xyz=None): if cVal is None and xyz is None: @@ -2593,15 +3072,19 @@ def addConstraint(self, cPrim, cVal=None, xyz=None): def reorderPrimitives(self): # Reorder primitives to be in line with cc's code newPrims = [] + newSync = [] for cPrim in self.cPrims: newPrims.append(cPrim) - for typ in [Distance, Angle, LinearAngle, MultiAngle, OutOfPlane, Dihedral, MultiDihedral, CartesianX, CartesianY, CartesianZ, TranslationX, TranslationY, TranslationZ, RotationA, RotationB, RotationC]: - for p in self.Internals: + for typ in [Distance, DistanceDifference, ReducedDistance, Exponential, Angle, LinearAngle, MultiAngle, OutOfPlane, Dihedral, MultiDihedral, CartesianX, CartesianY, CartesianZ, TranslationX, TranslationY, TranslationZ, RotationA, RotationB, RotationC]: + for i, p in enumerate(self.Internals): if type(p) is typ and p not in self.cPrims: newPrims.append(p) + if i in self.sync: + newSync.append(len(newPrims)-1) if len(newPrims) != len(self.Internals): raise RuntimeError("Not all internal coordinates have been accounted for. You may need to add something to reorderPrimitives()") self.Internals = newPrims + self.sync = newSync def getConstraints_from(self, other): if other.haveConstraints(): @@ -2792,7 +3275,7 @@ def covalent(a, b): class DelocalizedInternalCoordinates(InternalCoordinates): - def __init__(self, molecule, imagenr=0, build=False, connect=False, addcart=False, constraints=None, cvals=None, remove_tr=False, cart_only=False, conmethod=0): + def __init__(self, molecule, imagenr=0, build=False, connect=False, addcart=False, constraints=None, cvals=None, remove_tr=False, cart_only=False, conmethod=0, repulsions=[], transfers=[], connect_isolated=True): super(DelocalizedInternalCoordinates, self).__init__() # cart_only is just because of how I set up the class structure. if cart_only: return @@ -2808,7 +3291,7 @@ def __init__(self, molecule, imagenr=0, build=False, connect=False, addcart=Fals # Add Cartesian coordinates to all. self.addcart = addcart # The DLC contains an instance of primitive internal coordinates. - self.Prims = PrimitiveInternalCoordinates(molecule, connect=connect, addcart=addcart, constraints=constraints, cvals=cvals) + self.Prims = PrimitiveInternalCoordinates(molecule, connect=connect, addcart=addcart, constraints=constraints, cvals=cvals, repulsions=repulsions, transfers=transfers, connect_isolated=connect_isolated) self.frags = self.Prims.frags self.na = molecule.na # Whether constraints have been enforced previously @@ -2834,9 +3317,12 @@ def update(self, other): def join(self, other): return self.Prims.join(other.Prims) + + def expected_dof(self, xyz=None, linear_check=False): + return self.Prims.expected_dof(xyz, linear_check) - def addConstraint(self, cPrim, cVal, xyz): - self.Prims.addConstraint(cPrim, cVal, xyz) + def addConstraint(self, cPrim, cVal=None, xyz=None): + self.Prims.addConstraint(cPrim, cVal=cVal, xyz=xyz) def getConstraints_from(self, other): self.Prims.getConstraints_from(other.Prims) @@ -3062,9 +3548,12 @@ def build_dlc_0(self, xyz): LargeVals += 1 LargeIdx.append(ival) Expect = 3*self.na - # print "%i atoms (expect %i coordinates); %i/%i singular values are > 1e-6" % (self.na, Expect, LargeVals, len(L)) + # print("%i atoms (expect %i coordinates); %i/%i singular values are > 1e-6" % (self.na, Expect, LargeVals, len(L))) # if LargeVals <= Expect: self.Vecs = Q[:, LargeIdx] + # print(G.shape, self.Vecs.shape, len(self.Prims.Internals)) + # if self.Vecs.shape[0] != len(self.Prims.Internals): + # print(self.Prims) # Vecs has number of rows equal to the number of primitives, and # number of columns equal to the number of delocalized internal coordinates. @@ -3194,7 +3683,7 @@ def build_dlc_1(self, xyz): raise RuntimeError("Expected at least %i delocalized coordinates, but got only %i" % (Expect, ncon + len(LargeIdx))) # print("%i atoms (expect %i coordinates); %i/%i singular values are > 1e-6" % (self.na, Expect, LargeVals, len(L))) - # Create "generalized" DLCs where the first six columns are the constrained primitive ICs + # Create "generalized" DLCs where the first few columns are the constrained primitive ICs # and the other columns are the DLCs formed from the rest self.Vecs = np.zeros((nprim, ncon+LargeVals), dtype=float) for i in range(ncon): @@ -3203,7 +3692,12 @@ def build_dlc_1(self, xyz): # Perform Gram-Schmidt orthogonalization def ov(vi, vj): - return multi_dot([vi, G, vj]) + answer = multi_dot([vi, G, vj]) + if (vi == vj).all(): + return max(0.0, answer) + else: + return answer + if self.haveConstraints(): click() V = self.Vecs.copy() @@ -3218,6 +3712,7 @@ def ov(vi, vj): # Copy V column corresponding to the next constraint to U. U[:, ic] = V[:, ic].copy() ui = U[:, ic] + Unorms[ic] = np.sqrt(ov(ui, ui)) if Unorms[ic]/Vnorms[ic] < 0.1: logger.warning("Constraint %i is almost redundant; after projection norm is %.3f of original\n" % (ic, Unorms[ic]/Vnorms[ic])) @@ -3259,6 +3754,7 @@ def ov(vi, vj): # print(L) def build_dlc(self, xyz): + self.clearCache() if self.conmethod == 1: return self.build_dlc_1(xyz) elif self.conmethod == 0: @@ -3400,12 +3896,15 @@ def largeRots(self): def printRotations(self, xyz): return self.Prims.printRotations(xyz) - def calcDiff(self, coord1, coord2): + def calcDiff(self, coord1, coord2, sync=False): """ Calculate difference in internal coordinates (coord1-coord2), accounting for changes in 2*pi of angles. """ - PMDiff = self.Prims.calcDiff(coord1, coord2) + PMDiff = self.Prims.calcDiff(coord1, coord2, sync=sync) Answer = np.dot(PMDiff, self.Vecs) return np.array(Answer).flatten() + def syncDihedrals(self, xyz1, xyz2): + self.Prims.syncDihedrals(xyz1, xyz2) + def calculate(self, coords): """ Calculate the DLCs given the Cartesian coordinates. """ PrimVals = self.Prims.calculate(coords) @@ -3461,6 +3960,9 @@ def resetRotations(self, xyz): """ Reset the reference geometries for calculating the orientational variables. """ self.Prims.resetRotations(xyz) + def diagnostics(self, xyz, verbose=False, print_thre=3, full_output=False): + return self.Prims.diagnostics(xyz, verbose=verbose, print_thre=print_thre, full_output=full_output) + class CartesianCoordinates(PrimitiveInternalCoordinates): """ Cartesian coordinate system, written as a kind of internal coordinate class. diff --git a/geometric/molecule.py b/geometric/molecule.py index 22982b8d..a7f7bd30 100644 --- a/geometric/molecule.py +++ b/geometric/molecule.py @@ -762,7 +762,7 @@ def get_rotate_translate(matrix1,matrix2): def cartesian_product2(arrays): """ Form a Cartesian product of two NumPy arrays. """ la = len(arrays) - arr = np.empty([len(a) for a in arrays] + [la], dtype=np.int32) + arr = np.empty([len(a) for a in arrays] + [la], dtype=int) for i, a in enumerate(np.ix_(*arrays)): arr[...,i] = a return arr.reshape(-1, la) @@ -899,7 +899,12 @@ def arc(Mol, begin=None, end=None, RMSD=True, align=True): if RMSD: Arc = Mol.pathwise_rmsd(align) else: - Arc = np.array([np.max([np.linalg.norm(Mol.xyzs[i+1][j]-Mol.xyzs[i][j]) for j in range(Mol.na)]) for i in range(begin, end-1)]) + Arc = [] + for i in range(begin, end-1): + dmax = np.max([np.linalg.norm(Mol.xyzs[i+1][j]-Mol.xyzs[i][j]) for j in range(Mol.na)]) + if dmax < 1e-15: dmax = 1e-15 + Arc.append(dmax) + Arc = np.array(Arc) #[np.max([np.linalg.norm(Mol.xyzs[i+1][j]-Mol.xyzs[i][j]) for j in range(Mol.na)]) for i in range(begin, end-1)]) return Arc def EqualSpacing(Mol, frames=0, dx=0, RMSD=True, align=True): @@ -920,7 +925,7 @@ def EqualSpacing(Mol, frames=0, dx=0, RMSD=True, align=True): frames : int Return a Molecule object with this number of frames. RMSD : bool - Use RMSD in the arc length calculation. + Use RMSD in the arc length calculation. Returns ------- @@ -928,21 +933,24 @@ def EqualSpacing(Mol, frames=0, dx=0, RMSD=True, align=True): New molecule object, either the same one (if frames > len(Mol)) or with equally spaced frames. """ + import scipy.interpolate ArcMol = arc(Mol, RMSD=RMSD, align=align) ArcMolCumul = np.insert(np.cumsum(ArcMol), 0, 0.0) if frames != 0 and dx != 0: logger.error("Provide dx or frames or neither") elif dx != 0: frames = int(float(max(ArcMolCumul))/dx) + if frames == 0: frames = 1 elif frames == 0: frames = len(ArcMolCumul) - ArcMolEqual = np.linspace(0, max(ArcMolCumul), frames) xyzold = np.array(Mol.xyzs) xyznew = np.zeros((frames, Mol.na, 3)) for a in range(Mol.na): for i in range(3): - xyznew[:,a,i] = np.interp(ArcMolEqual, ArcMolCumul, xyzold[:, a, i]) + cs = scipy.interpolate.CubicSpline(ArcMolCumul, xyzold[:, a, i]) + xyznew[:,a,i] = cs(ArcMolEqual) + # xyznew[:,a,i] = np.interp(ArcMolEqual, ArcMolCumul, xyzold[:, a, i]) if len(xyzold) == len(xyznew): Mol1 = copy.deepcopy(Mol) else: @@ -1383,6 +1391,8 @@ def __getitem__(self, key): The Molecule class has list-like behavior, so we can get slices of it. If we say MyMolecule[0:10], then we'll return a copy of MyMolecule with frames 0 through 9. """ + if isinstance(key, np.int32) or isinstance(key, np.int64): + key = int(key) if isinstance(key, int) or isinstance(key, slice) or isinstance(key,np.ndarray) or isinstance(key,list): if isinstance(key, int): key = [key] @@ -1451,7 +1461,11 @@ def __add__(self,other): if type(other.Data[key]) is not list: logger.error('Key %s in other is a FrameKey, it must be a list\n' % key) raise RuntimeError - if isinstance(self.Data[key][0], np.ndarray): + if len(self.Data[key]) == 0: + Sum.Data[key] = copy.deepcopy(other.Data[key]) + elif len(other.Data[key]) == 0: + Sum.Data[key] = copy.deepcopy(self.Data[key]) + elif isinstance(self.Data[key][0], np.ndarray): Sum.Data[key] = [i.copy() for i in self.Data[key]] + [i.copy() for i in other.Data[key]] else: Sum.Data[key] = list(self.Data[key] + other.Data[key]) @@ -2022,7 +2036,7 @@ def center(self, center_mass = False): for i in range(len(self)): self.xyzs[i] -= self.xyzs[i].mean(0) - def build_bonds(self): + def build_bonds(self, extras=[]): """ Build the bond connectivity graph. """ sn = self.top_settings['topframe'] toppbc = self.top_settings['toppbc'] @@ -2138,7 +2152,7 @@ def build_bonds(self): else: # Create a list of 2-tuples corresponding to combinations of atomic indices. # This is much faster than using itertools.combinations. - AtomIterator = np.ascontiguousarray(np.vstack((np.fromiter(itertools.chain(*[[i]*(self.na-i-1) for i in range(self.na)]),dtype=np.int32), np.fromiter(itertools.chain(*[range(i+1,self.na) for i in range(self.na)]),dtype=np.int32))).T) + AtomIterator = np.ascontiguousarray(np.vstack((np.fromiter(itertools.chain(*[[i]*(self.na-i-1) for i in range(self.na)]),dtype=int), np.fromiter(itertools.chain(*[range(i+1,self.na) for i in range(self.na)]),dtype=int))).T) # Create a list of thresholds for determining whether a certain interatomic distance is considered to be a bond. BT0 = R[AtomIterator[:,0]] @@ -2150,6 +2164,10 @@ def build_bonds(self): else: dxij = AtomContact(self.xyzs[sn][np.newaxis, :], AtomIterator)[0] + warn_thre = 0.2 + if np.min(dxij) < warn_thre: + logger.warning("*** %i distances are less than 0.2 Angstrom - check coordinates!! ***" % np.sum(dxij 100000: logger.warning("Warning: Large number of atoms (%i), topology building may take a long time" % self.na) if bond_order > 0.0: @@ -2221,10 +2243,12 @@ def build_topology(self, force_bonds=True, bond_order=0.0, metal_bo_factor=0.5, elif ((self.elem[i] in TransitionMetals or self.elem[j] in TransitionMetals) and self.qm_bondorder[sn][i, j] > bond_order*metal_bo_factor): bondlist.append((i, j)) + elif (i, j) in extras or (j, i) in extras: + bondlist.append((i, j)) self.Data['bonds'] = sorted(list(set(bondlist))) # Build bonds from connectivity graph. elif (not self.top_settings['read_bonds']) or force_bonds: - self.build_bonds() + self.build_bonds(extras=extras) # Create a NetworkX graph object to hold the bonds. G = MyG() for i, a in enumerate(self.elem): @@ -2251,8 +2275,8 @@ def build_topology(self, force_bonds=True, bond_order=0.0, metal_bo_factor=0.5, def distance_matrix(self, pbc=True): """ Obtain distance matrix between all pairs of atoms. """ - AtomIterator = np.ascontiguousarray(np.vstack((np.fromiter(itertools.chain(*[[i]*(self.na-i-1) for i in range(self.na)]),dtype=np.int32), - np.fromiter(itertools.chain(*[range(i+1,self.na) for i in range(self.na)]),dtype=np.int32))).T) + AtomIterator = np.ascontiguousarray(np.vstack((np.fromiter(itertools.chain(*[[i]*(self.na-i-1) for i in range(self.na)]),dtype=int), + np.fromiter(itertools.chain(*[range(i+1,self.na) for i in range(self.na)]),dtype=int))).T) if hasattr(self, 'boxes') and pbc: boxes = np.array([[self.boxes[i].a, self.boxes[i].b, self.boxes[i].c] for i in range(len(self))]) drij = AtomContact(np.array(self.xyzs), AtomIterator, box=boxes) @@ -2262,8 +2286,8 @@ def distance_matrix(self, pbc=True): def distance_displacement(self): """ Obtain distance matrix and displacement vectors between all pairs of atoms. """ - AtomIterator = np.ascontiguousarray(np.vstack((np.fromiter(itertools.chain(*[[i]*(self.na-i-1) for i in range(self.na)]),dtype=np.int32), - np.fromiter(itertools.chain(*[range(i+1,self.na) for i in range(self.na)]),dtype=np.int32))).T) + AtomIterator = np.ascontiguousarray(np.vstack((np.fromiter(itertools.chain(*[[i]*(self.na-i-1) for i in range(self.na)]),dtype=int), + np.fromiter(itertools.chain(*[range(i+1,self.na) for i in range(self.na)]),dtype=int))).T) if hasattr(self, 'boxes') and pbc: boxes = np.array([[self.boxes[i].a, self.boxes[i].b, self.boxes[i].c] for i in range(len(self))]) drij, dxij = AtomContact(np.array(self.xyzs), AtomIterator, box=boxes, displace=True) @@ -2399,8 +2423,8 @@ def find_clashes(self, thre=0.0, pbc=True, groups=None): if groups is not None: AtomIterator = np.ascontiguousarray([[min(g), max(g)] for g in itertools.product(groups[0], groups[1])]) else: - AtomIterator = np.ascontiguousarray(np.vstack((np.fromiter(itertools.chain(*[[i]*(self.na-i-1) for i in range(self.na)]),dtype=np.int32), - np.fromiter(itertools.chain(*[range(i+1,self.na) for i in range(self.na)]),dtype=np.int32))).T) + AtomIterator = np.ascontiguousarray(np.vstack((np.fromiter(itertools.chain(*[[i]*(self.na-i-1) for i in range(self.na)]),dtype=int), + np.fromiter(itertools.chain(*[range(i+1,self.na) for i in range(self.na)]),dtype=int))).T) ang13 = [(min(a[0], a[2]), max(a[0], a[2])) for a in self.find_angles()] dih14 = [(min(d[0], d[3]), max(d[0], d[3])) for d in self.find_dihedrals()] bondedPairs = np.where([tuple(aPair) in (self.bonds+ang13+dih14) for aPair in AtomIterator])[0] @@ -2845,6 +2869,7 @@ def pathwise_rmsd(self, align=True): tr, rt = get_rotate_translate(xyzj, xyzi) xyzj = np.dot(xyzj, rt) + tr rmsd = np.sqrt(3*np.mean((xyzj - xyzi) ** 2)) + if rmsd < 1e-15: rmsd = 1e-15 Vec[i] = rmsd return Vec From b27b022f31911b72ae3e99f27aca2d401bbdc1c1 Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Fri, 5 May 2023 05:11:06 -0700 Subject: [PATCH 02/30] Add interpolation code --- geometric/internal.py | 8 +- geometric/interpolate.py | 1177 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 1181 insertions(+), 4 deletions(-) create mode 100755 geometric/interpolate.py diff --git a/geometric/internal.py b/geometric/internal.py index 2bbc12d8..892b2f5f 100644 --- a/geometric/internal.py +++ b/geometric/internal.py @@ -1317,7 +1317,7 @@ def diagnostic(self, xyz): # result = 3 or above: failure of the IC system is imminent # result = 2: IC may fail for small perturbations from the provided geometry # result = 1: IC is unsuitable for the provided geometry, should pay attention - LinThres = [0.5, 0.3, 0.1] + LinThres = [0.8, 0.6, 0.4] descrips = ["close to nonlinear", "very nonlinear", "extremely nonlinear"] a = self.a b = self.b @@ -1967,8 +1967,8 @@ def wilsonB(self, xyz): for i in range(Der.shape[0]): WilsonB.append(Der[i].flatten()) self.stored_wilsonB[xhash] = np.array(WilsonB) - if len(self.stored_wilsonB) > 1000 and not CacheWarning: - logger.warning("\x1b[91mWarning: more than 1000 B-matrices stored, memory leaks likely\x1b[0m\n") + if len(self.stored_wilsonB) > 10000 and not CacheWarning: + logger.warning("\x1b[91mWarning: more than 10000 B-matrices stored, memory leaks likely\x1b[0m\n") CacheWarning = True ans = np.array(WilsonB) return ans @@ -3075,7 +3075,7 @@ def reorderPrimitives(self): newSync = [] for cPrim in self.cPrims: newPrims.append(cPrim) - for typ in [Distance, DistanceDifference, ReducedDistance, Exponential, Angle, LinearAngle, MultiAngle, OutOfPlane, Dihedral, MultiDihedral, CartesianX, CartesianY, CartesianZ, TranslationX, TranslationY, TranslationZ, RotationA, RotationB, RotationC]: + for typ in [Distance, DistanceDifference, ReducedDistance, Angle, LinearAngle, MultiAngle, OutOfPlane, Dihedral, MultiDihedral, CartesianX, CartesianY, CartesianZ, TranslationX, TranslationY, TranslationZ, RotationA, RotationB, RotationC]: for i, p in enumerate(self.Internals): if type(p) is typ and p not in self.cPrims: newPrims.append(p) diff --git a/geometric/interpolate.py b/geometric/interpolate.py new file mode 100755 index 00000000..7e8babb1 --- /dev/null +++ b/geometric/interpolate.py @@ -0,0 +1,1177 @@ +#!/usr/bin/env python + +import sys +sys.setrecursionlimit(10000) +from copy import deepcopy +import json +import numpy as np +import networkx as nx +import scipy.special +from geometric.molecule import * +from geometric.internal import * +from geometric.nifty import ang2bohr, logger, commadash +from geometric.step import calc_drms_dmax + +# Pretend the "memory leaks likely.." has been printed already +CacheWarning = True + +# Enable logging output from geomeTRIC packages +from logging import * + +# Define two handlers that don't print newline characters at the end of each line +class RawStreamHandler(StreamHandler): + """ + Exactly like StreamHandler, except no newline character is printed at the end of each message. + This is done in order to ensure functions in molecule.py and nifty.py work consistently + across multiple packages. + """ + def __init__(self, stream = sys.stdout): + super(RawStreamHandler, self).__init__(stream) + self.terminator = "" + +# Uncomment the below 3 lines to activate geomeTRIC module logging +logger.setLevel(INFO) +handler = RawStreamHandler() +logger.addHandler(handler) + +# Need to refactor this so that it uses the minimum of the equilibrium bond length and the sum of covalent radii. +def get_rab(M, i, j, bohr=True): + if bohr: + return (Radii[Elements.index(M.elem[i])-1] + Radii[Elements.index(M.elem[j])-1])*ang2bohr + else: + return (Radii[Elements.index(M.elem[i])-1] + Radii[Elements.index(M.elem[j])-1]) + +def find_transfers_common_bonds(molecule, allow_larger=False): + M = molecule + transfers = [] + transfer_bonds = [] + elem = M.elem + natom = len(elem) + + reac = nx.Graph() + prod = nx.Graph() + common = nx.Graph() + + M_reac = M[0] + M_prod = M[-1] + M_reac.build_topology() + M_prod.build_topology() + + for i in range(natom): + reac.add_node(i) + prod.add_node(i) + common.add_node(i) + + for (i, j) in M_reac.bonds: + reac.add_edge(i, j) + + for (i, j) in M_prod.bonds: + prod.add_edge(i, j) + + for edge in reac.edges: + if prod.has_edge(*edge): + common.add_edge(*edge) + + atom_to_fragment_size = {} + for c in nx.connected_components(common): + for i in c: + atom_to_fragment_size[i] = len(c) + + for i in range(natom): + bonds_init = set(nx.neighbors(reac, i)) + bonds_final = set(nx.neighbors(prod, i)) + potential_donors = bonds_init - bonds_final + potential_acceptors = bonds_final - bonds_init + if len(potential_donors) == 1 and len(potential_acceptors) == 1: + don = list(potential_donors)[0] + acc = list(potential_acceptors)[0] + if allow_larger or (atom_to_fragment_size[i] <= atom_to_fragment_size[don] and atom_to_fragment_size[i] <= atom_to_fragment_size[acc]): + #if True: + transfers.append((don, i, acc)) + transfer_bonds.append((min(don, i), max(don, i))) + transfer_bonds.append((min(acc, i), max(acc, i))) + return transfers, transfer_bonds, list(common.edges) + +def find_nonbonded_pairs(molecule): + M = molecule + union_bonds = set() + nbpairs = [] + elem = M.elem + natom = len(elem) + for i in [0, -1]: + Mi = M[i] + Mi.build_topology() + for i, j in Mi.bonds: + union_bonds.add((i, j)) + for i in range(natom): + for j in range(i+1, natom): + if (i, j) not in union_bonds: + nbpairs.append((i, j)) + return sorted(nbpairs), sorted(list(union_bonds)) + +def find_blocks(mtx, thre=1): + blocks = [] + block_rights = [] + for i in range(mtx.shape[0]): + for j in range(i+1, mtx.shape[0]): + if not (mtx[i:j, i:j]<=thre).all(): + if j not in block_rights: + blocks.append((i, j-1)) + block_rights.append(j) + break + elif j == mtx.shape[0]-1: + if j not in block_rights: + blocks.append((i, j)) + block_rights.append(j) + print(blocks) + +def find_path(mtx, thre=1): + # Converts the diagnostic map into a number of segments. + # These should be overlapping segments of frame numbers + # where a single IC is known to describe them all well. + n = mtx.shape[0] + okay = (mtx <= thre).astype(int) + start_ic = 0 + allowed_IC = [[] for i in range(n)] + mtx_tried = np.zeros_like(mtx) + + if not okay[n-1, n-1] or not okay[0, 0]: + raise RuntimeError("Spoo!") + + okay2 = okay.copy() + for niter in range(n**2): + for ic in range(0, n): + for ix in range(0, n): + if ic == n-1 and ix == n-1: + pass + elif ic == 0 and ix == 0: + pass + elif ic == n-1 and not okay[ic, ix+1]: # Lower right corner on bottom + okay2[ic, ix] = 0 + # elif ix == n-1 and not okay[ic+1, ix]: # Lower right corner on right side + # okay2[ic, ix] = 0 + # elif ix == 0 and not okay[ic-1, ix]: # Upper left corner on left side + # okay2[ic, ix] = 0 + elif ic == 0 and not okay[ic, ix-1]: # Upper left corner on top side + okay2[ic, ix] = 0 + elif (ic<(n-1) and not okay[ic+1, ix]) and (ix<(n-1) and not okay[ic, ix+1]): + okay2[ic, ix] = 0 + elif (ic>0 and not okay[ic-1, ix]) and (ix>0 and not okay[ic, ix-1]): + okay2[ic, ix] = 0 + ndiff = sum(okay2 != okay) + okay = okay2.copy() + if ndiff == 0: break + + print_map(okay, "Allowed driving regions", colorscheme=1) + + def valid(i, j, size): + i0=max(0,i-size) + i1=min(n,i+size+1) + j0=max(0,j-size) + j1=min(n,j+size+1) + return okay[i0:i1,j0:j1].all() + + segments_by_size = {} + + verbose = 0 + for size in range(n//2): + if verbose >= 1: + print("Trying size %i (grid size %i)" % (size, 2*size+1)) + # Start from the left side.. + for ic0 in range(0, n): + segments = [] + ic = ic0 + ix = 0 + steps = 0 + if not okay[ic, ix]: continue + ix_last = 0 + ic_ix_mode = 0 + if verbose >= 1: + print("From Left: IC %i Size %i" % (ic, size)) + while True: + steps += 1 + if ix == n-1: # or ic == n-1: + if verbose >= 2: + print("Reached the end! (%i,%i)" % (ic, ix)) + segments.append((ix_last, n-1)) + if verbose >= 1: + print("Found segments: ", segments) + if size not in segments_by_size: + segments_by_size[size] = segments[:] + else: + if len(segments) <= len(segments_by_size[size]): + shortest_segment = min([j-i for (i, j) in segments]) + shortest_segment_stored = min([j-i for (i, j) in segments_by_size[size]]) + if shortest_segment > shortest_segment_stored: + segments_by_size[size] = segments[:] + success = True + break + elif not valid(ic, ix, size) or steps > 2*n: + if verbose >= 2: + print("Dead end at (%i,%i)" % (ic, ix)) + success = False + break + if ic_ix_mode == 0: + ix += 1 + if not valid(ic, ix, size): + ix -= 1 + ic_ix_mode = 1 + if verbose >= 2: + print("Changed to ic-direction at (%i,%i)" % (ic, ix)) + segments.append((ix_last, ix)) + ix_last = ix + 1 + ic += 1 + else: + ic += 1 + if ic == n or not valid(ic, ix, size): + ic -= 1 + ic_ix_mode = 0 + if verbose >= 2: + print("Changed to ix-direction at (%i,%i)" % (ic, ix)) + ix += 1 + # Start from the right side. + for ic0 in list(range(0, n))[::-1]: + segments = [] + ic = ic0 + ix = n-1 + steps = 0 + if not okay[ic, ix]: continue + ix_last = n-1 + ic_ix_mode = 0 + if verbose >= 1: + print("From Right: IC %i Size %i" % (ic, size)) + while True: + steps += 1 + if ix == 0: # or ic == 0: + if verbose >= 2: + print("Reached the end! (%i,%i)" % (ic, ix)) + segments.append((0, ix_last)) + segments = segments[::-1] + if verbose >= 1: + print("Found segments: ", segments) + if size not in segments_by_size: + segments_by_size[size] = segments[:] + else: + if len(segments) <= len(segments_by_size[size]): + shortest_segment = min([j-i for (i, j) in segments]) + shortest_segment_stored = min([j-i for (i, j) in segments_by_size[size]]) + if shortest_segment > shortest_segment_stored: + segments_by_size[size] = segments[:] + success = True + break + elif not valid(ic, ix, size) or steps > 2*n: + if verbose >= 2: + print("Dead end at (%i,%i)" % (ic, ix)) + success = False + break + if ic_ix_mode == 0: + ix -= 1 + if not valid(ic, ix, size): + ix += 1 + ic_ix_mode = 1 + if verbose >= 2: + print("Changed to ic-direction at (%i,%i)" % (ic, ix)) + segments.append((ix, ix_last)) + ix_last = ix - 1 + ic -= 1 + else: + ic -= 1 + if ic == -1 or not valid(ic, ix, size): + ic += 1 + ic_ix_mode = 0 + if verbose >= 2: + print("Changed to ix-direction at (%i,%i)" % (ic, ix)) + ix -= 1 + # if not success: + # print("No segments found at this size") + if segments_by_size: + min_pathsize = None + largest_size = None + for key in sorted(list(segments_by_size.keys())): + if min_pathsize is None: + largest_size = key + min_pathsize = len(segments_by_size[key]) + if len(segments_by_size[key]) == min_pathsize: + largest_size = key + print("Chose these segments (car size %i):" % largest_size) + keep_segments = segments_by_size[largest_size] + print(keep_segments) + else: + raise RuntimeError("Path finding algorithm failed") + # if success: + # if keep_segments and len(segments) > len(keep_segments): + # print("Keeping the *previous* segments as there were fewer of them") + # break + # else: + # print("Obtained segments at grid size %i:" % (2*size+1), segments) + # keep_segments = segments[:] + return keep_segments + +def merge_one_pair(segments, imerge): + merged = [] + for i in range(len(segments)): + if i == imerge: + merged.append((segments[i][0], segments[i+1][1])) + elif i == imerge + 1: + continue + else: + merged.append((segments[i])) + return merged + +def fill_splice_segments(segments, splice_length): + n_frames = segments[-1][-1] + 1 + filled = segments[:] + spliced = [] + while min([s[1]-s[0] for s in filled]) < splice_length: + merge_lengths = [] + for imerge in range(len(filled)-1): + merged = merge_one_pair(filled, imerge) + merge_lengths.append(max([s[1]-s[0] for s in merged])) + imerge_best = np.argmin(merge_lengths) + filled = merge_one_pair(filled, imerge_best) + for i, j in filled: + i = max(0, i-splice_length//2) + j = min(n_frames-1, j+splice_length//2) + spliced.append((i, j)) + # Sometimes the first or last segment is too short + if len(spliced) > 1 and spliced[1][0] == 0: + spliced = spliced[1:] + filled = filled[1:] + filled[0] = (0, filled[0][1]) + if len(spliced) > 1 and spliced[-2][1] == n_frames-1: + spliced = spliced[:-1] + filled = filled[:-1] + filled[-1] = (filled[-1][0], n_frames-1) + return filled, spliced + +def splice_segment_endpoints(coord_segments, segment_endpoints, segments_spliced, splice_length, damping, verbose=False): + max_mismatch = 0.0 + segment_endpoints = deepcopy(segment_endpoints) + for a in range(len(segments_spliced)-1): + b = a + 1 + xyz_a_spliceStart = coord_segments[a][-splice_length] + xyz_b_spliceStart = coord_segments[b][0] + xyz_a_spliceEnd = coord_segments[a][-1] + xyz_b_spliceEnd = coord_segments[b][splice_length-1] + _, dmax1 = calc_drms_dmax(xyz_b_spliceStart, xyz_a_spliceStart, align=False) + _, dmax2 = calc_drms_dmax(xyz_b_spliceEnd, xyz_a_spliceEnd, align=False) + if verbose: print("Segment %i-%i splice mismatches: %.3e %.3e" % (a, b, dmax1, dmax2)) + max_mismatch = max(max_mismatch, max(dmax1, dmax2)) + segment_endpoints[a][1] = damping*xyz_b_spliceEnd.copy() + (1.0-damping)*xyz_a_spliceEnd.copy() + segment_endpoints[b][0] = damping*xyz_a_spliceStart.copy() + (1.0-damping)*xyz_b_spliceStart.copy() + return segment_endpoints, max_mismatch + +def dq_scale_prims(IC, dest_coords, curr_coords, scale_factors={}, sync=0): + # Calculate the differences in primitive coordinates. + dqPrims = IC.Prims.calcDiff(dest_coords, curr_coords, sync=sync) + for i, Prim in enumerate(IC.Prims.Internals): + # Scale each primitive by the corresponding scale factor for that type + if type(Prim) in [RotationA, RotationB, RotationC]: + dqPrims[i] *= scale_factors.get('rotation', 0.0) + elif type(Prim) in [Dihedral]: + # Dihedrals that aren't "proper" have their dq's set to zero + bond1 = Distance(Prim.a, Prim.b) + bond2 = Distance(Prim.b, Prim.c) + bond3 = Distance(Prim.c, Prim.d) + ang1 = Angle(Prim.a, Prim.b, Prim.c) + ang2 = Angle(Prim.b, Prim.c, Prim.d) + if any([np.abs(bond.value(dest_coords) - bond.value(curr_coords)) > 0.5 for bond in [bond1, bond2, bond3]]): dqPrims[i] *= 0.0 + for coords in [curr_coords, dest_coords]: + if np.abs(np.cos(ang1.value(coords))) > 0.7: dqPrims[i] *= 0.0 + if np.abs(np.cos(ang2.value(coords))) > 0.7: dqPrims[i] *= 0.0 + dqPrims[i] *= scale_factors.get('dihedral', 0.0) + elif type(Prim) in [TranslationX, TranslationY, TranslationZ, CartesianX, CartesianY, CartesianZ]: + dqPrims[i] *= scale_factors.get('translation', 0.0) + elif type(Prim) is Distance and Prim.n < 0: + # Experimental; inverse distances are scaled to one-half of their destination values + thre = 0.5 + if Prim.value(dest_coords) > thre: + dqPrims[i] = thre - Prim.value(curr_coords) + else: + dqPrims[i] *= 0.0 + dq = np.dot(dqPrims, IC.Vecs) + return dq + +def interpolate_segment(IC, curr_coords, dest_coords, nDiv, backward=False, rebuild_dlc=False, err_thre=1e-2): + if backward: + tmp = curr_coords.copy() + curr_coords = dest_coords.copy() + dest_coords = tmp.copy() + sync = -1 + else: + sync = 1 + dq = IC.calcDiff(dest_coords, curr_coords, sync=sync) + coord_segment = [curr_coords] + for k in range(nDiv): + if rebuild_dlc: + IC.build_dlc(curr_coords) + dq = IC.calcDiff(dest_coords, curr_coords, sync=sync) + new_coords = IC.newCartesian(curr_coords, dq/(nDiv-k), verbose=0) + else: + new_coords = IC.newCartesian(curr_coords, dq/nDiv, verbose=0) + coord_segment.append(new_coords) + curr_coords = new_coords.copy() + _, endpt_err = calc_drms_dmax(curr_coords, dest_coords, align=True) + if backward: + coord_segment = coord_segment[::-1] + success = endpt_err < err_thre + return coord_segment, endpt_err, success + +def get_segment_endpoints(M, segments_spliced): + segment_endpoints = [] + for a, (i, j) in enumerate(segments_spliced): + xyzi = M.xyzs[i].flatten() * ang2bohr + xyzj = M.xyzs[j].flatten() * ang2bohr + segment_endpoints.append(np.array([xyzi.copy(), xyzj.copy()])) + return segment_endpoints + +def split_segments(segments_spliced, fail_segment, splice_length): + segments_filled_new = [] + segments_spliced_new = [] + n = len(segments_spliced) + + for a, (i, j) in enumerate(segments_spliced): + ii = i if a == 0 else i + splice_length//2 + jj = j if a == n-1 else j - splice_length//2 + if a == fail_segment: + if (j-i) < splice_length: + raise RuntimeError("Failed to interpolate a segment that's shorter than the splice length") + midpt = (i+j)//2 + segments_filled_new.append((ii, midpt-1)) + segments_filled_new.append((midpt, jj)) + segments_spliced_new.append((i, midpt-1+splice_length//2)) + segments_spliced_new.append((midpt-splice_length//2, j)) + else: + segments_filled_new.append((ii, jj)) + segments_spliced_new.append((i, j)) + # i = max(0, i-splice_length//2) + # j = min(len(M)-1, j+splice_length//2) + # segments_spliced.append((i, j)) + print("Split segment %i - now the segments are:" % fail_segment) + print("Without splices:", segments_filled_new) + print("With splices:", segments_spliced_new) + return segments_filled_new, segments_spliced_new + +def print_map(mtx, title, colorscheme=0): + print(title) + n = mtx.shape[0] + for i in range(n): + if i == 0: + print(" x: ", end='') + for j in range(n): + print("%2i" % j, end='') + print() + print("IC%2i " % i, end='') + for j in range(n): + value = mtx[i, j] + if colorscheme == 0: + if value == 0: color = '\x1b[94m' + elif value == 1: color = '\x1b[92m' + elif value >= 2: color = '\x1b[91m' + elif colorscheme == 1: + if value == 0: color = '\x1b[91m' + elif value == 1: color = '\x1b[92m' + else: raise RuntimeError("Invalid color scheme") + print("%s%1i\x1b[0m " % (color, value), end='') + print("IC%2i " % i, end='') + print() + if i == n-1: + print(" x: ", end='') + for j in range(n): + print("%2i" % j, end='') + print() + +class Interpolator(object): + def __init__(self, M_in, n_frames = 50, use_midframes = False, align_system=False, do_prealign=False): + # The input molecule; it should not be modified. + self.M_in = deepcopy(M_in) + # Check the length of the input molecule + assert len(self.M_in) >= 2 + if use_midframes: + self.do_init = False + assert len(self.M_in) >= 5, "When setting use_midframes, input molecule must have >= 5 structures" + assert do_prealign is False, "Do not pass do_prealign if using intermediate frames" + assert n_frames == 0, "Do not pass n_frames when setting use_midframes" + self.n_frames = len(self.M_in) + else: + self.do_init = True + self.n_frames = n_frames + assert self.n_frames >= 5, "Number of frames for interpolation must be >= 5" + # Determine the splice length; currently restricted to a number between 2 and 6 + self.get_splice_length() + # The Molecule object that is currently being processed. + self.M = deepcopy(self.M_in) + self.n_atoms = len(self.M.elem) + # Whether to pre-align the fragments in the reactant and product structure + self.do_prealign = do_prealign + # Whether to align the input frames to the reactant structure upon input and before writing output + self.align_system = True if self.do_prealign else align_system + if self.align_system: + self.M.align() + # Atom pairs that are not bonded (resp. bonded) in either the reactant or product + self.nbpairs, self.union_bonds = find_nonbonded_pairs(self.M) + # List of (donor, transfer, acceptor) triplets. + # List of atom pairs in bonds that are involved in transfer triplets. + # List of atom pairs that are bonded in BOTH the reactant and product + self.transfers, self.transfer_bonds, self.common_bonds = find_transfers_common_bonds(self.M) + print("Found these transfers:", self.transfers) + # G matrix condition numbers. + self.Gcond_matrix = np.zeros((self.n_frames, self.n_frames), dtype=float) + # Molecule objects containing the endpoints. + # endmols_in contains the path endpoints before any pre-alignment of fragments. + self.endmols_in = [deepcopy(self.M[0]), deepcopy(self.M[-1])] + self.endmols = deepcopy(self.endmols_in) + self.endbonds = [] + self.endfrags = [] + self.endxyzs = [] + self.enddists = [] + for mol in self.endmols: + mol.build_topology() + self.endbonds.append(mol.bonds[:]) + self.endfrags.append(len(mol.molecules)) + self.endxyzs.append(mol.xyzs[0].flatten()*ang2bohr) + atom_pairs, distance_matrix = mol.distance_matrix(pbc=False) + self.enddists.append(distance_matrix[0].copy()) + # List of atom pairs from Molecule.distance_matrix() + # but with np.int32 converted to int + self.atom_pairs = [list(pair) for pair in atom_pairs.copy()] + # The smaller of the distances at either endpoint for each pair + self.min_enddists = np.min(np.array(self.enddists), axis=0) + + def get_splice_length(self): + # Determine the splice length + splice_length = int(np.round(self.n_frames/10)) + splice_length += splice_length % 2 + if splice_length > 6: splice_length = 6 + if splice_length < 2: splice_length = 2 + self.splice_length = splice_length + + def new_molecule(self, coord_list, comms=None): + M = deepcopy(self.M[0]) + M.xyzs = [x.reshape(-1, 3)*bohr2ang for x in coord_list] + if comms is not None: + M.comms = comms + else: + M.comms = ['generated by geometric-interpolate; frame %i' % i for i in range(len(coord_list))] + return M + + def get_clash_thresholds(self, addthre=0.6, altdists=None, altthre=0.9): + R = [] + for pairidx, (i, j) in enumerate(self.atom_pairs): + rab = get_rab(self.M, i, j, bohr=False) + if altdists is not None: + R.append(min(max(1.0, rab)+addthre, altdists[pairidx]*altthre)) + else: + R.append(max(1.0, rab)+addthre) + R = np.array(R) + return R + + def detect_clash_one_frame(self, coords, addthre=0.6, altdists=None, altthre=0.9): + # Detect clashes for input coordinates provided in Bohr. + assert type(coords) is np.ndarray + M = deepcopy(self.M[0]) + M.xyzs = [coords.reshape(-1, 3)*bohr2ang] + atom_pairs = self.atom_pairs + _, distance_matrix = M.distance_matrix(pbc=False) + R = self.get_clash_thresholds(addthre=addthre, altdists=altdists, altthre=altthre) + + clash_pairs = [] + for pairidx, close in enumerate(distance_matrix[0] < R): + if not close: continue + i = self.atom_pairs[pairidx][0] + j = self.atom_pairs[pairidx][1] + assert i < j + if (i, j) not in clash_pairs: + clash_pairs.append((int(i), int(j))) + return clash_pairs + + def detect_clash_trajectory(self, coords, clash_known=[], addthre=0.6, altdists=None, altthre=0.9, verbose=True): + # Detect clashes for list of input coordinates provided in Bohr. + assert type(coords) is list + assert type(coords[0]) is np.ndarray + M = deepcopy(self.M[0]) + M.xyzs = [x.reshape(-1, 3)*bohr2ang for x in coords] + atom_pairs = self.atom_pairs + _, distance_matrix = M.distance_matrix(pbc=False) + R = self.get_clash_thresholds(addthre=addthre, altdists=altdists, altthre=altthre) + + clash_known = deepcopy(clash_known) + clash_new = [] + clash_frames = {} + for k in range(len(M)): + for pairidx, close in enumerate(distance_matrix[k] < R): + if not close: continue + i = atom_pairs[pairidx][0] + j = atom_pairs[pairidx][1] + assert i < j + if (i, j) not in clash_known: + clash_known.append((int(i), int(j))) + clash_new.append((int(i), int(j))) + clash_frames.setdefault(pairidx, []).append(k) + + for pairidx, frames in clash_frames.items(): + i = atom_pairs[pairidx][0] + j = atom_pairs[pairidx][1] + min_dist = np.min(np.array(distance_matrix)[np.array(frames),pairidx]) + if verbose: + print(">> Clash %10s: %8s at frames %6s (%9s, closest = %.3f, %.3f of thre)" % ("(new)" if (i, j) in clash_new else "(existing)", + "%s%i-%s%i" % (M.elem[i], i+1, M.elem[j], j+1), + commadash(frames), "bonded" if (i, j) in self.union_bonds else "nonbonded", + min_dist, min_dist/R[pairidx])) + return clash_known, clash_new + + def prealign_one_stage(self, curr_coords, dest_coords, nDiv, scale_factors={'dihedral':0.0, 'translation':0.0, 'rotation':0.0}): + IC0, IC1 = self.endICs + n_frag0, n_frag1 = self.endfrags + if n_frag0 == 1 and n_frag1 == 1: + coord_segment0 = [curr_coords] + coord_segment1 = [dest_coords] + coord_segment = [curr_coords, dest_coords] + return coord_segment, coord_segment0, coord_segment1 + + # The formula here is a bit complicated. Basically, if scale_factors[key] + # is set to a quantity between 0 and 1, we want the prealignment to bring + # the endpoints together only part-way. However, if we simply multiply the dq + # of primitives by the scale factor, the remaining distance is still large + # by the time we get to the middle and the dividing "n" becomes one, causing larger steps + # to be taken than intended. Therefore, for each step taken, we calculate how + # far we have moved toward the middle on the "number line", and change the + # scale factor accordingly so the next step takes us the same distance down + # the number line. + scale_factors = deepcopy(scale_factors) + numerators = {} + for key, val in scale_factors.items(): + numerators[key] = val/nDiv + + for trial in range(2): + # First trial - Attempt to bring the selected DoFs of the fragments together completely and keep the result if the final frame has no clashes. + # Second trial - Each side stops when it encounters a clash and exits when fragments are brought together or clashes found for both. + coord_segment0 = [curr_coords.copy()] + coord_segment1 = [dest_coords.copy()] + IC0.clearCache() + IC1.clearCache() + clash0 = [] + clash1 = [] + k = 0 + nstep0 = 0 + nstep1 = 0 + distances = {'dihedral':1.0, 'translation':1.0, 'rotation':1.0} + while True: + if n_frag0 > 1 and (trial==0 or not clash0): + # IC0.build_dlc(grow0) + dq0 = dq_scale_prims(IC0, coord_segment1[-1], coord_segment0[-1], scale_factors, 1) + n = nDiv-k + new_coord = IC0.newCartesian(coord_segment0[-1], dq0/n, verbose=0) + clash0 = self.detect_clash_one_frame(new_coord, altdists=self.enddists[0]) + # The codes in this if statement are executed only if this end is still growing + if trial==0 or not clash0: + for key, val in scale_factors.items(): + distances[key] *= 1-val/n + scale_factors[key] = numerators[key]/(distances[key]/(n-1)) if n > 1 else scale_factors[key] + coord_segment0.append(new_coord.copy()) + nstep0 += 1 + k += 1 + # if clash0: + # print("Trial %i: Clash in step %i in reactant direction:" % (trial, nstep0), clash0) + if trial==1 and (clash0 and (clash1 or n_frag1 == 1)): break + if k >= nDiv: break + if n_frag1 > 1 and (trial==0 or not clash1): + # IC1.build_dlc(grow1) + dq1 = dq_scale_prims(IC1, coord_segment1[-1], coord_segment0[-1], scale_factors, 1) + dq1 *= -1 + n = nDiv-k + new_coord = IC1.newCartesian(coord_segment1[-1], dq1/n, verbose=0) + clash1 = self.detect_clash_one_frame(new_coord, altdists=self.enddists[1]) + # The codes in this if statement are executed only if this end is still growing + if trial==0 or not clash1: + for key, val in scale_factors.items(): + distances[key] *= 1-val/n + scale_factors[key] = numerators[key]/(distances[key]/(n-1)) if n > 1 else scale_factors[key] + coord_segment1.append(new_coord.copy()) + nstep1 += 1 + k += 1 + # if clash1: + # print("Trial %i: Clash in step %i in product direction:" % (trial, nstep1), clash1) + if trial==1 and (clash1 and (clash0 or n_frag0 == 1)): break + if k >= nDiv: break + if trial==0 and not clash0 and not clash1: + break + + coord_segment = coord_segment0 + coord_segment1[::-1][1:] + return coord_segment, coord_segment0, coord_segment1 + + def prealign_fragments(self): + print(">> Aligning molecules in reactant and product") + # Build ICs for alignment + M0, M1 = self.endmols + xyz0, xyz1 = self.endxyzs + xyz0_stage = xyz0.copy() + xyz1_stage = xyz1.copy() + IC0 = DelocalizedInternalCoordinates(M0, build=True, connect=False, addcart=False, connect_isolated=False) + IC1 = DelocalizedInternalCoordinates(M1, build=True, connect=False, addcart=False, connect_isolated=False) + IC0.syncDihedrals(xyz0, xyz1) + IC1.syncDihedrals(xyz0, xyz1) + self.endICs = [IC0, IC1] + M_reac = None + M_prod = None + for stage in [0, 1]: + if stage == 0: + print("Pre-alignment stage 0: Rotations") + scale_factors = {'dihedral':0.0, 'translation':0.0, 'rotation':1.0} + elif stage == 1: + print("Pre-alignment stage 1: Translations") + scale_factors = {'dihedral':0.0, 'translation':1.0, 'rotation':0.0} + coord_segment, segment_reac, segment_prod = self.prealign_one_stage(xyz0_stage, xyz1_stage, self.n_frames, scale_factors) + M_reac_stage = self.new_molecule(segment_reac) + M_prod_stage = self.new_molecule(segment_prod) + M_reac = M_reac_stage if M_reac is None else M_reac + M_reac_stage + M_prod = M_prod_stage if M_prod is None else M_prod + M_prod_stage + xyz0_stage = segment_reac[-1].copy() + xyz1_stage = segment_prod[-1].copy() + + M_reac_frames = EqualSpacing(M_reac, frames=self.n_frames//2+1, RMSD=False) + M_prod_frames = EqualSpacing(M_prod, frames=self.n_frames//2+1, RMSD=False) + M_reac_dx = EqualSpacing(M_reac, dx=0.1, RMSD=False) + M_prod_dx = EqualSpacing(M_prod, dx=0.1, RMSD=False) + # Keep the shorter of the dmax=0.1 or n_frames//2 segments + M_reac = M_reac_dx if len(M_reac_dx) < len(M_reac_frames) else M_reac_frames + M_prod = M_prod_dx if len(M_prod_dx) < len(M_prod_frames) else M_prod_frames + M_reac.comms = ["Alignment for reactant segment; frame %i" % i for i in range(len(M_reac))] + M_prod.comms = ["Alignment for product segment; frame %i" % i for i in range(len(M_prod))] + if len(M_reac) > 1: + if len(M_prod) > 1: + print("Alignment produced %i additional (reactant) and %i (product) frames" % (len(M_reac)-1, len(M_prod)-1)) + else: + print("Alignment produced %i additional (reactant) frames" % (len(M_reac)-1)) + elif len(M_prod) > 1: + print("Alignment produced %i additional (product) frames" % (len(M_prod)-1)) + else: + print("Alignment produced no additional frames") + # The endpoints after prealingment. + M_end = M_reac[-1] + M_prod[-1] + # The product segment goes from the product back to initial. Reverse it here. + M_prod = M_prod[::-1] + if len(M_reac) > 1 or len(M_prod) > 1: + (M_reac+M_prod).write('prealign.xyz') + # Overwrite some variables that later routines will use. + self.M = M_end + self.endxyzs = [] + self.enddists = [] + self.endmols = [deepcopy(M_reac), deepcopy(M_prod)] + for mol in self.endmols: + self.endxyzs.append(mol.xyzs[-1].flatten()*ang2bohr) + atom_pairs, distance_matrix = mol.distance_matrix(pbc=False) + self.enddists.append(distance_matrix[-1].copy()) + + def initial_guess_one_method(self, method = 0): + # Class variables that are used in this method + M = deepcopy(self.M) + n_frames = self.n_frames + common_bonds = self.common_bonds + union_bonds = self.union_bonds + n_atoms = self.n_atoms + + # if method%2 == 1: + # M = M[::-1] + M0 = M[0] + M1 = M[-1] + xyz0 = M.xyzs[0].flatten()*ang2bohr + xyz1 = M.xyzs[-1].flatten()*ang2bohr + M_IC = M0 if method%2 == 0 else M1 + xyz_IC = xyz0 if method%2 == 0 else xyz1 + + # Build the IC system used to interpolate from one endpoint to the other. + if method in [0, 1]: + # Use TRIC coordinate system, but use only the bonds that exist at both endpoints. + M_IC.bonds = common_bonds + M_IC.top_settings['read_bonds'] = True + IC = DelocalizedInternalCoordinates(M_IC, build=True, connect=False, addcart=False, connect_isolated=False) + newPrims = IC.Prims.Internals + elif method in [2, 3]: + # Use HDLC coordinate system, but use only the bonds that exist at both endpoints. + # Set weight of 0.5 to all Cartesian primitive ICs. + M_IC.bonds = common_bonds + M_IC.top_settings['read_bonds'] = True + IC = DelocalizedInternalCoordinates(M_IC, build=True, connect=False, addcart=False, connect_isolated=False) + newPrims = IC.Prims.Internals + for i in range(n_atoms): + newPrims.append(CartesianX(i, w=0.5)) + newPrims.append(CartesianY(i, w=0.5)) + newPrims.append(CartesianZ(i, w=0.5)) + elif method in [4, 5]: + # Use all-inverse distances plus Cartesian coordinates with small weight. + IC = DelocalizedInternalCoordinates(M_IC, build=True, connect=False, addcart=False) + newPrims = [] + for Prim in IC.Prims.Internals: + if type(Prim) is Distance: + # Include distances that were added automatically (corresponding to bonds) + newPrim = Distance(Prim.a, Prim.b, rab=4*get_rab(M, Prim.a, Prim.b), n=-1) + if newPrim not in newPrims: newPrims.append(newPrim) + elif type(Prim) in [Angle, LinearAngle]: + # Include distances corresponding to 1-3 atoms of angles + newPrim = Distance(Prim.a, Prim.c, rab=4*get_rab(M, Prim.a, Prim.c), n=-1) + if newPrim not in newPrims: newPrims.append(newPrim) + elif type(Prim) is Dihedral: + # Include distances corresponding to 1-4 atoms of dihedrals + (i, j) = (min(Prim.a, Prim.d), max(Prim.a, Prim.d)) + newPrim = Distance(i, j, rab=4*get_rab(M, i, j), n=-1) + if newPrim not in newPrims: newPrims.append(newPrim) + elif type(Prim) is OutOfPlane: + # Include distances corresponding to b-c, b-d and c-d for out of plane (a is central atom) + for (i, j) in [(min(Prim.b, Prim.c), max(Prim.b, Prim.c)), + (min(Prim.b, Prim.d), max(Prim.b, Prim.d)), + (min(Prim.c, Prim.d), max(Prim.c, Prim.d))]: + newPrim = Distance(i, j, rab=4*get_rab(M, i, j), n=-1) + if newPrim not in newPrims: newPrims.append(newPrim) + for i in range(n_atoms): + newPrims.append(CartesianX(i, w=0.25)) + newPrims.append(CartesianY(i, w=0.25)) + newPrims.append(CartesianZ(i, w=0.25)) + else: + raise RuntimeError("Invalid value for method") + + # Finalize the IC system. + IC.Prims.Internals = newPrims + IC.Prims.reorderPrimitives() + IC.Prims.syncDihedrals(xyz1, xyz0) + IC.build_dlc(xyz_IC) + # print("=== Primitives for method %i ===" % method) + # print(IC.Prims) + + # Build a "checking" IC system used to detect whether any primitive ICs are changing very rapidly between frames. + # This is one of our diagnostics for whether the generated path is "good". + M0.bonds = common_bonds + M0.top_settings['read_bonds'] = True + IC_Chk = PrimitiveInternalCoordinates(M0, build=True, connect=False, addcart=True, connect_isolated=False) + M1.bonds = common_bonds + M1.top_settings['read_bonds'] = True + IC1_Chk = PrimitiveInternalCoordinates(M1, build=True, connect=False, addcart=True, connect_isolated=False) + chkPrims = [] + for Prim in IC_Chk.Internals + IC1_Chk.Internals: + # Keep the primitives on either end that are valid for both points. + if Prim.diagnostic(xyz0)[0] <= 1 and Prim.diagnostic(xyz1)[0] <= 1 and Prim not in chkPrims: + chkPrims.append(Prim) + IC_Chk.Internals = chkPrims + IC_Chk.reorderPrimitives() + + err_thre = 1e-2 + clash_pairs = [] + nDiv = n_frames - 1 + for clash_round in range(5): + coord_segment, endpt_err, _ = interpolate_segment(IC, xyz0, xyz1, nDiv, backward=(method%2==1), rebuild_dlc=True) + dq_segment = np.array([IC_Chk.calcDiff(coord_segment[i], coord_segment[i+1]) for i in range(n_frames-1)]) + # Setting to -1e-6 essentially ignores the checking primitives if they evaluate to +/- inf. + dq_segment[dq_segment == -np.inf] = -1e-6 + dq_segment[dq_segment == np.inf] = 1e-6 + dq_max = np.max(np.abs(dq_segment), axis=0) + dq_med = np.median(np.abs(dq_segment), axis=0) + dq_med[dq_med<0.01] = 0.01 + dq_ratio = max(dq_max/dq_med) + + M.xyzs = [x.reshape(-1, 3)*bohr2ang for x in coord_segment] + M.comms = ['Interpolated frame %i' % k for k in range(len(coord_segment))] + # Fail if the endpt_err is greater than the threshold + if endpt_err > err_thre: + M.align() + return M, 1, dq_ratio, endpt_err + # Check for clashes + clash_pairs, clash_new = self.detect_clash_trajectory(coord_segment, clash_known=clash_pairs, altdists=self.min_enddists, verbose=False) + if not clash_new: + # print("No new clashes found; finishing") + break + else: + for (i, j) in clash_pairs: + if Distance(i, j) in IC.Prims.Internals: IC.Prims.delete(Distance(i, j)) + IC.Prims.add(Distance(i, j, rab=4*get_rab(M, i, j), n=-1)) + IC.Prims.reorderPrimitives() + IC.build_dlc(xyz_IC) + + if self.align_system: + M.align() + return M, 0, dq_ratio, endpt_err + + def initial_guess(self): + M = None + dq_ratio_min = 1e6 + for method in range(6): + M_, status, dq_ratio, endpt_err = self.initial_guess_one_method(method=method) + # M_.write('interpolated_endpoints_method%i.xyz' % method) + # Keep the interpolation path with status=0 and the lowest dq_ratio + print("Initial pathway generation using method %i %s; dq_ratio = %.3f endpt_err = %.3f" % + (method, "success" if status == 0 else "failed", dq_ratio, endpt_err)) + if status == 0 and dq_ratio < dq_ratio_min: + M = deepcopy(M_) + dq_ratio_min = dq_ratio + keep_method = method + if M is None: + raise RuntimeError("Failed to generate an initial path") + M.comms = [c + " (method %i)" % keep_method for c in M.comms] + M.write("initial_guess.xyz") + # Overwrite the Molecule object currently being processed + self.M = deepcopy(M) + print("Keeping the result from method %i" % keep_method) + + def assign_ICs_to_segments(self): + M = self.M + ICs = self.ICs + segments_filled = self.segments_filled + diag_matrix = self.diag_matrix + Gcond_matrix = self.Gcond_matrix + n_frames = self.n_frames + + segment_to_ICs = [] + print("Determining which IC to use in each segment:") + for a, (ii, jj) in enumerate(segments_filled): + print("=== Now working on segment (%i, %i) ===" % (ii, jj)) + Gcond_maxs = [] + diags = [] + # For each IC, calculate the maximum condition number over all frames + # and choose the IC that has the smallest maximum. + for c in range(ii, jj+1): + Gconds = [] + diag = 0 + for b in range(ii, jj+1): + if Gcond_matrix[c, b] == 0.0: + Gcond = ICs[c].G_condition(M.xyzs[b].flatten()*ang2bohr) + Gcond_matrix[c, b] = Gcond + else: + Gcond = Gcond_matrix[c, b] + Gconds.append(Gcond) + diag = max(diag, diag_matrix[c, b]) + Gcond_maxs.append(max(Gconds)) + diags.append(diag) + + valid_frames = np.array([k <= max(1, min(diags)) for k in diags]) + sorted_valid_frames = np.argsort(Gcond_maxs)[np.where(valid_frames[np.argsort(Gcond_maxs)])] + candidate_frames = [] + candidate_ICs = [] + + def add_candidate_IC(c): + iic = ii + c + if ICs[iic] not in candidate_ICs: + candidate_frames.append(iic) + candidate_ICs.append(ICs[iic]) + print("(%i, %i): adding the IC from frame %i; diagnostic = %i, Gcond_max = %.5e" % (ii, jj, iic, diags[c], Gcond_maxs[c])) + + max_candidates = 5 + if ii == 0:# and diags[0] <= 1: + add_candidate_IC(0) + max_candidates += 1 + if jj == n_frames-1:# and diags[jj-ii] <= 1: + add_candidate_IC(jj-ii) + max_candidates += 1 + for c in sorted_valid_frames: + add_candidate_IC(c) + if len(candidate_ICs) == max_candidates: break + + segment_to_ICs.append([(i, j) for i, j in zip(candidate_frames, candidate_ICs)]) + + # For each segment, these are the frame numbers and corresponding IC systems that can be used to interpolate that segment. + self.segment_to_ICs = segment_to_ICs + + def build_IC_segments(self): + print("Building internal coordinates and determining segments:") + assert len(self.M) == self.n_frames + M = deepcopy(self.M) + transfers = self.transfers + n_frames = self.n_frames + splice_length = self.splice_length + + segments = [] + ICs = [] + xyzs = [M.xyzs[i].flatten()*ang2bohr for i in range(len(M))] + + # Build the list of ICs; these are TRIC coordinates, isolated atoms disconnected, with atom transfers + for i in range(len(M)): + M_i = M[i] + IC = DelocalizedInternalCoordinates(M_i, build=True, connect=False, addcart=False, transfers=transfers, connect_isolated=False) + IC.Prims.syncDihedrals(xyzs[-1], xyzs[0]) + IC.build_dlc(xyzs[i]) + ICs.append(IC) + + diag_matrix = np.zeros((len(M), len(M)), dtype=int) + diag_messages = {} + for i in range(len(M)): + for j in range(len(M)): + diag, messages = ICs[i].diagnostics(xyzs[j], print_thre=2) + diag_matrix[i, j] += diag + diag_messages[(i, j)] = messages + + print_map(diag_matrix, "IC diagnostic map", colorscheme=0) + segments = find_path(diag_matrix) + segments_filled, segments_spliced = fill_splice_segments(segments, splice_length) + print("The frame numbers of the spliced segments are:") + print(segments_spliced) + + # Assign some class variables to be used by other methods + self.ICs = ICs + self.diag_matrix = diag_matrix + self.segments_filled = segments_filled + self.segments_spliced = segments_spliced + self.assign_ICs_to_segments() + self.segment_endpoints = get_segment_endpoints(M, segments_spliced) + + def splice_iterations(self): + segment_endpoints_orig = deepcopy(self.segment_endpoints) + segment_endpoints = deepcopy(self.segment_endpoints) + # M_in contains the initial guess coordinates, which were created by self.initial_guess() + # from the (optionally prealigned) endpoints. + M_in = deepcopy(self.M) + M = deepcopy(self.M) + M_reac, M_prod = self.endmols + xyz0, xyz1 = self.endxyzs + splice_length = self.splice_length + segments_spliced = self.segments_spliced + segment_to_ICs = self.segment_to_ICs + n_frames = self.n_frames + + n_cycles = 1000 + min_max_mismatch = 1e10 + last_max_mismatch = 1e10 + increase_count = 0 + decrease_count = 0 + damping = 1.0 + backwards = [[False for c in range(len(segment_to_ICs[a]))] for a in range(len(segments_spliced))] + clash_pairs = [] + for cycle in range(n_cycles): + coord_segments = [] + endpt_errs = [] + M.xyzs = [] + M.comms = [] + frame_to_segment = {} + for a in range(len(segments_spliced)): + i, j = segments_spliced[a] + xyzi, xyzj = segment_endpoints[a] + nDiv = j-i + success = False + for c, (iic, IC) in enumerate(segment_to_ICs[a]): + attempt = 0 + coord_segment, endpt_err, success = interpolate_segment(IC, xyzi, xyzj, nDiv, backward=backwards[a][c], rebuild_dlc=False) + if success: break + attempt = 1 + coord_segment, endpt_err, success = interpolate_segment(IC, xyzi, xyzj, nDiv, backward=not backwards[a][c], rebuild_dlc=False) + if success: + backwards[a][c] = not backwards[a][c] + break + if success: + # Reorder the segment_to_ICs so the successful one is at the front. + reordered_segment_to_ICs = [] + for cc, (iic, IC) in enumerate(segment_to_ICs[a]): + if c == cc: + reordered_segment_to_ICs.append((iic, IC)) + for cc, (iic, IC) in enumerate(segment_to_ICs[a]): + if c != cc: + reordered_segment_to_ICs.append((iic, IC)) + segment_to_ICs[a] = reordered_segment_to_ICs + else: + print("Failed to interpolate segment %i-%i" % (i, j)) + return 1, a + + print("Frames %2i-%2i error in final interpolated vs. product structure: %.3e (candidate %i attempt %i)" % (i, j, endpt_err, c, attempt)) + coord_segments.append(np.array(coord_segment).copy()) + endpt_errs.append(endpt_err) + if a == 0: + keep_start = 0 + else: + keep_start = splice_length//2 + if a == len(segments_spliced)-1: + keep_end = len(coord_segment) + else: + keep_end = len(coord_segment) - splice_length//2 + for k in range(keep_start, keep_end): + frame_to_segment[i+k] = a + M.xyzs += [coord_segment[k].reshape(-1, 3)*bohr2ang for k in range(keep_start, keep_end)] + M.comms += ['Frame %i-%i interpolated frame %i' % (i, j, i+k) for k in range(keep_start, keep_end)] + + + _, max_mismatch = splice_segment_endpoints(coord_segments, segment_endpoints, segments_spliced, splice_length, damping, verbose=True) + + if max_mismatch < min_max_mismatch: + min_max_mismatch = max_mismatch + print(">> Current best result (mismatch=%.3e) saved to interpolated_splice.xyz" % max_mismatch) + include_alignment = False + if include_alignment: + M_append = M_reac[:-1] + M + M_prod[1:] + if len(M_append) != len(M): + M_append.align() + M = EqualSpacing(M_append, frames=len(M), RMSD=False) + else: + if len(M_reac) > 1: + M = M_reac[0] + M + if len(M_prod) > 1: + M = M + M_prod[-1] + M.align() + M.write("interpolated_splice.xyz") + + if max_mismatch < last_max_mismatch: + increase_count = 0 + decrease_count += 1 + if decrease_count >= 3 and damping < 1.0: + print(">> Mismatch decreasing; resetting damping") + damping = 1.0 + else: + increase_count += 1 + decrease_count = 0 + print(">> Mismatch fails to decrease; increasing damping") + damping *= 0.8 + last_max_mismatch = max_mismatch + + segment_endpoints, _ = splice_segment_endpoints(coord_segments, segment_endpoints, segments_spliced, splice_length, damping, verbose=False) + + if max_mismatch > 0.3: continue + + # Check for clashes. + coord_traj = [M.xyzs[k].flatten()*ang2bohr for k in range(len(M))] + clash_pairs, clash_new = self.detect_clash_trajectory(coord_traj, clash_known=clash_pairs, altdists=self.min_enddists, verbose=True) + if clash_new: + print("Current list of clashing pairs:", ', '.join(["%s%i-%s%i" % (M.elem[i], i+1, M.elem[j], j+1) for i, j in clash_pairs])) + for a in range(len(segments_spliced)): + print(">> Rebuilding ICs for segment %i" % a) + rebuilt_segment_to_ICs = [] + for c, (iic, IC) in enumerate(segment_to_ICs[a]): + new_repulsions = sorted(list(set(IC.Prims.repulsions).union(set(clash_pairs)))) + IC1 = DelocalizedInternalCoordinates(M_in[iic], build=True, connect=False, addcart=False, + repulsions=new_repulsions, transfers=IC.Prims.transfers, connect_isolated=False) + IC1.syncDihedrals(xyz1, xyz0) + IC1.build_dlc(M_in.xyzs[iic].flatten()*ang2bohr) + rebuilt_segment_to_ICs.append((iic, IC1)) + segment_to_ICs[a] = rebuilt_segment_to_ICs + print("Resetting due to clashes") + segment_endpoints = deepcopy(segment_endpoints_orig) + min_max_mismatch = 1e10 + last_max_mismatch = 1e10 + else: + if max_mismatch < 1.8e-3: + print("Converged!") + return 0, 0 + else: + continue + if cycle == n_cycles - 1: + print("Not converged after %i cycles; best max-mismatch is %.5f" % (n_cycles, min_max_mismatch)) + + def split_IC_segments(self, fail_segment): + self.segments_filled, self.segments_spliced = split_segments(self.segments_spliced, fail_segment, self.splice_length) + self.assign_ICs_to_segments() + self.segment_endpoints = get_segment_endpoints(self.M, self.segments_spliced) + + def run_workflow(self): + if self.do_prealign: + self.prealign_fragments() + if self.do_init: + self.initial_guess() + self.build_IC_segments() + status, fail_segment = self.splice_iterations() + count = 0 + while status != 0: + self.split_IC_segments(fail_segment) + status, fail_segment = self.splice_iterations() + if count == 3: + print("Failed after 3 splits; exiting") + +def main(): + M0 = Molecule(sys.argv[1]) + interpolator = Interpolator(M0, align_system=True, do_prealign=True) + interpolator.run_workflow() + +if __name__ == "__main__": + main() From f289ea087be607bb56a772e13491c55eb538cdcf Mon Sep 17 00:00:00 2001 From: Heejune Park Date: Wed, 10 May 2023 12:22:29 -0700 Subject: [PATCH 03/30] sum -> np.sum --- geometric/interpolate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geometric/interpolate.py b/geometric/interpolate.py index 7e8babb1..8e2f79be 100755 --- a/geometric/interpolate.py +++ b/geometric/interpolate.py @@ -158,7 +158,7 @@ def find_path(mtx, thre=1): okay2[ic, ix] = 0 elif (ic>0 and not okay[ic-1, ix]) and (ix>0 and not okay[ic, ix-1]): okay2[ic, ix] = 0 - ndiff = sum(okay2 != okay) + ndiff = np.sum(okay2 != okay) okay = okay2.copy() if ndiff == 0: break From 968aa3b4aef9d3aa2addb522c8f8c62aa21f5e0d Mon Sep 17 00:00:00 2001 From: Heejune Park Date: Sat, 13 May 2023 11:33:47 -0700 Subject: [PATCH 04/30] temporary commit, commenting needs to be done --- geometric/interpolate.py | 19 ++++++++++++++++--- setup.py | 1 + 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/geometric/interpolate.py b/geometric/interpolate.py index 8e2f79be..e668ebce 100755 --- a/geometric/interpolate.py +++ b/geometric/interpolate.py @@ -57,31 +57,37 @@ def find_transfers_common_bonds(molecule, allow_larger=False): M_reac.build_topology() M_prod.build_topology() + # Adding atoms for i in range(natom): reac.add_node(i) prod.add_node(i) common.add_node(i) - + + # Adding bonds for the reactant and product for (i, j) in M_reac.bonds: reac.add_edge(i, j) for (i, j) in M_prod.bonds: prod.add_edge(i, j) - + + # Adding common bonds for edge in reac.edges: if prod.has_edge(*edge): common.add_edge(*edge) + # Sizes of the fragments that are intact throughout the reaction atom_to_fragment_size = {} for c in nx.connected_components(common): for i in c: atom_to_fragment_size[i] = len(c) + # Detecting bond donors and acceptors for i in range(natom): bonds_init = set(nx.neighbors(reac, i)) bonds_final = set(nx.neighbors(prod, i)) potential_donors = bonds_init - bonds_final potential_acceptors = bonds_final - bonds_init + # If one bond breaks and one bond forms: if len(potential_donors) == 1 and len(potential_acceptors) == 1: don = list(potential_donors)[0] acc = list(potential_acceptors)[0] @@ -98,11 +104,15 @@ def find_nonbonded_pairs(molecule): nbpairs = [] elem = M.elem natom = len(elem) + + # Detecting all bonds for i in [0, -1]: Mi = M[i] Mi.build_topology() for i, j in Mi.bonds: union_bonds.add((i, j)) + + # Saving the bonds that weren't detected (non-bonded pairs) for i in range(natom): for j in range(i+1, natom): if (i, j) not in union_bonds: @@ -110,6 +120,7 @@ def find_nonbonded_pairs(molecule): return sorted(nbpairs), sorted(list(union_bonds)) def find_blocks(mtx, thre=1): + # This is for the diagnostic map blocks = [] block_rights = [] for i in range(mtx.shape[0]): @@ -130,11 +141,13 @@ def find_path(mtx, thre=1): # These should be overlapping segments of frame numbers # where a single IC is known to describe them all well. n = mtx.shape[0] + # Making a matrix that is an "IC maze" okay = (mtx <= thre).astype(int) start_ic = 0 allowed_IC = [[] for i in range(n)] mtx_tried = np.zeros_like(mtx) + # If the starting point or end point is blocked, we can't escape. if not okay[n-1, n-1] or not okay[0, 0]: raise RuntimeError("Spoo!") @@ -1170,7 +1183,7 @@ def run_workflow(self): def main(): M0 = Molecule(sys.argv[1]) - interpolator = Interpolator(M0, align_system=True, do_prealign=True) + interpolator = Interpolator(M0, align_system=True, do_prealign=False) interpolator.run_workflow() if __name__ == "__main__": diff --git a/setup.py b/setup.py index 3255636d..9acdf33f 100644 --- a/setup.py +++ b/setup.py @@ -18,6 +18,7 @@ long_description_content_type="text/markdown", entry_points={'console_scripts': [ 'geometric-optimize = geometric.optimize:main', + 'geometric-interpolate = geometric.interpolate:main' ]}, install_requires=[ 'numpy>=1.11', From 82d2536920e6449b82f192c1af0e19d7323d1c47 Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Sun, 14 May 2023 18:13:26 -0700 Subject: [PATCH 05/30] Implement erisemax; fix some regression tests --- examples/0-regression-tests/heme/command.sh | 2 +- examples/0-regression-tests/imatinib-salt/command.sh | 2 +- examples/0-regression-tests/ivermectin/command.sh | 2 +- examples/0-regression-tests/sialyl-lewis-x/command.sh | 2 +- geometric/interpolate.py | 5 +++-- geometric/optimize.py | 7 ++++++- geometric/params.py | 3 +++ tools/plot-performance.py | 8 ++++++++ 8 files changed, 24 insertions(+), 7 deletions(-) diff --git a/examples/0-regression-tests/heme/command.sh b/examples/0-regression-tests/heme/command.sh index 2eeba219..13524b21 100755 --- a/examples/0-regression-tests/heme/command.sh +++ b/examples/0-regression-tests/heme/command.sh @@ -1 +1 @@ -geometric-optimize --engine tera run.in +geometric-optimize --engine tera run.tcin diff --git a/examples/0-regression-tests/imatinib-salt/command.sh b/examples/0-regression-tests/imatinib-salt/command.sh index 2eeba219..13524b21 100755 --- a/examples/0-regression-tests/imatinib-salt/command.sh +++ b/examples/0-regression-tests/imatinib-salt/command.sh @@ -1 +1 @@ -geometric-optimize --engine tera run.in +geometric-optimize --engine tera run.tcin diff --git a/examples/0-regression-tests/ivermectin/command.sh b/examples/0-regression-tests/ivermectin/command.sh index 2eeba219..13524b21 100755 --- a/examples/0-regression-tests/ivermectin/command.sh +++ b/examples/0-regression-tests/ivermectin/command.sh @@ -1 +1 @@ -geometric-optimize --engine tera run.in +geometric-optimize --engine tera run.tcin diff --git a/examples/0-regression-tests/sialyl-lewis-x/command.sh b/examples/0-regression-tests/sialyl-lewis-x/command.sh index 2eeba219..13524b21 100755 --- a/examples/0-regression-tests/sialyl-lewis-x/command.sh +++ b/examples/0-regression-tests/sialyl-lewis-x/command.sh @@ -1 +1 @@ -geometric-optimize --engine tera run.in +geometric-optimize --engine tera run.tcin diff --git a/geometric/interpolate.py b/geometric/interpolate.py index 7e8babb1..0918ac35 100755 --- a/geometric/interpolate.py +++ b/geometric/interpolate.py @@ -158,7 +158,7 @@ def find_path(mtx, thre=1): okay2[ic, ix] = 0 elif (ic>0 and not okay[ic-1, ix]) and (ix>0 and not okay[ic, ix-1]): okay2[ic, ix] = 0 - ndiff = sum(okay2 != okay) + ndiff = np.sum(okay2 != okay) okay = okay2.copy() if ndiff == 0: break @@ -1170,7 +1170,8 @@ def run_workflow(self): def main(): M0 = Molecule(sys.argv[1]) - interpolator = Interpolator(M0, align_system=True, do_prealign=True) + #interpolator = Interpolator(M0, use_midframes=True, n_frames=0, align_system=True, do_prealign=False) + interpolator = Interpolator(M0, align_system=True, do_prealign=False) interpolator.run_workflow() if __name__ == "__main__": diff --git a/geometric/optimize.py b/geometric/optimize.py index a06f3c99..b0f79e92 100644 --- a/geometric/optimize.py +++ b/geometric/optimize.py @@ -541,6 +541,8 @@ def evaluateStep(self): colors['energy'] = "\x1b[92m" if Converged_energy else "\x1b[91m" colors['quality'] = "\x1b[91m" step_state = StepState.Reject if (Quality < -1.0 or params.transition) else StepState.Poor + # 2023-05-10: In the case of a minimization step that increases energy by more than erisemax, reject the step. + if not params.transition and not self.farConstraints and self.E-self.Eprev > params.erisemax: step_state = StepState.Reject if 'energy' not in colors: colors['energy'] = "\x1b[92m" if Converged_energy else "\x1b[0m" if 'quality' not in colors: colors['quality'] = "\x1b[0m" colors['grms'] = "\x1b[92m" if Converged_grms else "\x1b[0m" @@ -634,7 +636,10 @@ def evaluateStep(self): elif self.farConstraints: logger.info("\x1b[93mNot rejecting step - far from constraint satisfaction\x1b[0m\n") else: - logger.info("\x1b[93mRejecting step - quality is lower than %.1f\x1b[0m\n" % (0.0 if params.transition else -1.0)) + if not params.transition and not self.farConstraints and self.E-self.Eprev > params.erisemax: + logger.info("\x1b[93mRejecting step - energy increases > %.2f during minimization\x1b[0m\n" % params.erisemax) + else: + logger.info("\x1b[93mRejecting step - quality is lower than %.1f\x1b[0m\n" % (0.0 if params.transition else -1.0)) self.trustprint = "\x1b[1;91mx\x1b[0m" # Store the rejected step. In case the next step is identical to the rejected one, the next step should be accepted to avoid infinite loops. self.X_rj = self.X.copy() diff --git a/geometric/params.py b/geometric/params.py index 0c05fded..5d62224e 100644 --- a/geometric/params.py +++ b/geometric/params.py @@ -56,6 +56,8 @@ def __init__(self, **kwargs): self.enforce = kwargs.get('enforce', 0.0) # Small eigenvalue threshold self.epsilon = kwargs.get('epsilon', 1e-5) + # Energy increase threshold; 3e-2 -> about 20 kcal/mol + self.erisemax = kwargs.get('erisemax', 3e-2) # Interval for checking the coordinate system for changes self.check = kwargs.get('check', 0) # More verbose printout @@ -328,6 +330,7 @@ def parse_optimizer_args(*args): grp_optparam.add_argument('--reset', type=str2bool, help='Reset approximate Hessian to guess when eigenvalues are under epsilon.\n ' 'Defaults to True for minimization and False for transition states.\n ') grp_optparam.add_argument('--epsilon', type=float, help='Small eigenvalue threshold for resetting Hessian, default 1e-5.\n ') + grp_optparam.add_argument('--erisemax', type=float, help='For energy minimization, the maximum value that energy can rise without step being rejected.\n ') grp_optparam.add_argument('--check', type=int, help='Check coordinates every steps and rebuild coordinate system, disabled by default.\n') grp_optparam.add_argument('--subfrctor', type=int, help='Project out net force and torque components from nuclear gradient.\n' '0 = never project; 1 = auto-detect (default); 2 = always project.') diff --git a/tools/plot-performance.py b/tools/plot-performance.py index 39e6d851..99842f5d 100755 --- a/tools/plot-performance.py +++ b/tools/plot-performance.py @@ -107,9 +107,17 @@ def get_vmin_vmax_log(df, pad=0.2): ax1.set_title('Energy change, from start (kcal/mol)') df_energy_kcal.plot(ax=ax1, legend=False) labels = [] + final_es = [] for col in df_energy_kcal.columns: labels.append("%s %s N=%i" % (col, status[col], df_energy_kcal[col].last_valid_index())) + final_es.append(df_energy_kcal[col][df_energy_kcal[col].last_valid_index()]) + final_es = np.array(final_es) fig.legend(labels, loc='center', bbox_to_anchor=(0.5, 0.05), ncol=2) + ax1in = ax1.inset_axes([0.3, 0.3, 0.68, 0.68]) + ymin = np.min(final_es) - 1 + ymax = np.max(final_es) + 1 + ax1in.set_ylim(ymin, ymax) + df_energy_kcal.plot(ax=ax1in, legend=False) ax5.set_xlabel('Optimization Cycle') titles = ['RMS Gradient (a.u.)', 'Max Gradient (a.u.)', 'Energy change per step (a.u.)', 'RMS Displacement (a.u.)', 'Max Displacement (a.u.)'] From 35851123fb5f157f59c7788ac3f58bc49cbd36c5 Mon Sep 17 00:00:00 2001 From: Heejune Park Date: Wed, 23 Aug 2023 12:47:21 -0700 Subject: [PATCH 06/30] args can be passed by users for the interpolation method --- geometric/interpolate.py | 5 +++- geometric/params.py | 53 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/geometric/interpolate.py b/geometric/interpolate.py index 4414f8fe..eea3f143 100755 --- a/geometric/interpolate.py +++ b/geometric/interpolate.py @@ -7,6 +7,7 @@ import numpy as np import networkx as nx import scipy.special +from geometric.params import InterpParams, parse_interpolate_args from geometric.molecule import * from geometric.internal import * from geometric.nifty import ang2bohr, logger, commadash @@ -1180,9 +1181,11 @@ def run_workflow(self): print("Failed after 3 splits; exiting") def main(): + args = parse_interpolate_args(sys.argv[1:]) + params = InterpParams(**args) M0 = Molecule(sys.argv[1]) #interpolator = Interpolator(M0, use_midframes=True, n_frames=0, align_system=True, do_prealign=False) - interpolator = Interpolator(M0, align_system=True, do_prealign=False) + interpolator = Interpolator(M0, n_frames= params.nframes, use_midframes=params.optimize, align_system=params.align, do_prealign=params.prealign) interpolator.run_workflow() if __name__ == "__main__": diff --git a/geometric/params.py b/geometric/params.py index 1cde093a..1d636ae5 100644 --- a/geometric/params.py +++ b/geometric/params.py @@ -215,6 +215,22 @@ def printInfo(self): elif self.hessian.startswith('file+last:'): logger.info(' Hessian data will be read from file: %s, then computed for the last step.\n' % self.hessian[5:]) +class InterpParams(object): + """ + Container for interpolation parameters. + """ + def __init__(self, **kwargs): + # Whether we are optimizing a given trajectory. + self.optimize = kwargs.get('optimize', False) + # Number of frames that will be used for the interpolation. + self.nframes = kwargs.get('nframes', False) + # Whether we want to align the molecules in reactant and product frames. + self.prealign = kwargs.get('prealign', False) + # Whether we want to align the initial interpolate trajectory before optimization. + self.align = kwargs.get('align', False) + # Verbose printout + self.verbose = kwargs.get('verbose', 0) + def str2bool(v): """ Allows command line options such as "yes" and "True" to be converted into Booleans. """ if isinstance(v, bool): @@ -406,3 +422,40 @@ def parse_optimizer_args(*args): args_dict['engine'] = 'tera' return args_dict + +def parse_interpolate_args(*args): + + """ + Read user input from the command line interface. + Designed to be called by interpolate.main() passing in sys.argv[1:] + """ + + parser = ArgumentParserWithFile(add_help=False, formatter_class=argparse.RawTextHelpFormatter, fromfile_prefix_chars='@') + + grp_univ = parser.add_argument_group('universal', 'Relevant to every job') + grp_univ.add_argument('input', type=str, help='REQUIRED positional argument: xyz file containing reactant and product structures.\n') + grp_univ.add_argument('--optimize', type=str2bool, help='Provide "yes" to optimize an interpolated trajectory.\n' + 'The followting two arguments, nframes and prealign, will be ignored.\n' + 'The input xyz file must contain 5 or more structures.\n') + grp_univ.add_argument('--nframes', type=int, help='Number of frames, needs to be bigger than 5, that will be used to interpolate, default 50\n') + grp_univ.add_argument('--prealign', type=str2bool, help='Provide "yes" to align the molecules in reactant and product before the interpolation.\n') + grp_univ.add_argument('--align', type=str2bool, help='Provide "yes" to align the initial interpolated trajectory.\n') + + grp_output = parser.add_argument_group('output', 'Control the format and amount of the output') + grp_output.add_argument('--prefix', type=str, help='Specify a prefix for log file and temporary directory.\n' + 'Defaults to the input file path (incl. file name with extension removed).\n ') + grp_output.add_argument('--verbose', type=int, help='Set to positive for more verbose printout.\n' + '0 = Default print level. 1 = Basic info about optimization step.\n' + '2 = Include microiterations. 3 = Lots of printout from low-level functions.\n ') + + grp_help = parser.add_argument_group('help', 'Get help') + grp_help.add_argument('-h', '--help', action='help', help='Show this help message and exit') + + # Keep all arguments whose values are not None, so that the setting of default values + # in InterpParams() and get_molecule_engine() will work properly. + args_dict = {} + for k, v in vars(parser.parse_args(*args)).items(): + if v is not None: + args_dict[k] = v + + return args_dict \ No newline at end of file From 4de83d5bb6078dfc662b5d6f658e18b9fa7a6b15 Mon Sep 17 00:00:00 2001 From: Heejune Park Date: Wed, 23 Aug 2023 16:59:09 -0700 Subject: [PATCH 07/30] ignoring nframes and prealign when an interpolated trajectory is provided --- geometric/params.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/geometric/params.py b/geometric/params.py index 1d636ae5..c1dca1a2 100644 --- a/geometric/params.py +++ b/geometric/params.py @@ -222,10 +222,14 @@ class InterpParams(object): def __init__(self, **kwargs): # Whether we are optimizing a given trajectory. self.optimize = kwargs.get('optimize', False) - # Number of frames that will be used for the interpolation. - self.nframes = kwargs.get('nframes', False) - # Whether we want to align the molecules in reactant and product frames. - self.prealign = kwargs.get('prealign', False) + if self.optimize: + self.nframes = 0 + self.prealign = False + else: + # Number of frames that will be used for the interpolation. + self.nframes = kwargs.get('nframes', False) + # Whether we want to align the molecules in reactant and product frames. + self.prealign = kwargs.get('prealign', False) # Whether we want to align the initial interpolate trajectory before optimization. self.align = kwargs.get('align', False) # Verbose printout From 99873603e9bb1ffebc5f9709a94003180ec559b6 Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Thu, 21 Sep 2023 16:24:17 -0700 Subject: [PATCH 08/30] Resolve an inconsistency in how G condition number was being evaluated --- geometric/internal.py | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/geometric/internal.py b/geometric/internal.py index 46a61740..97c834f7 100644 --- a/geometric/internal.py +++ b/geometric/internal.py @@ -2089,8 +2089,6 @@ def GInverse_SVD(self, xyz): # Perform singular value decomposition click() loops = 0 - # The expected number of nonzero eigenvalues. - Expect = self.expected_dof() while True: try: G = self.GMatrix(xyz) @@ -2121,23 +2119,12 @@ def GInverse_SVD(self, xyz): # print("%3i degrees of freedom; %i/%i singular values are > 1e-6" % (3*xyz.shape[0], LargeVals, len(S))) Sinv = np.diag(Sinv) Inv = multi_dot([V, Sinv, UT]) - if G.shape[0] < Expect: - # Return a very large number if the G-matrix dimension is actually smaller than expected. - # This can happen if we lost a DoF at the stage of forming the DLCs. - self.GCond_Inverse = S[0]/S[-1] - else: - self.GCond_Inverse = S[0]/S[Expect-1] + self.GCond_Inverse = S[0]/S[-1] return Inv def G_condition(self, xyz): # Calculate the condition number of the G matrix. G = self.GMatrix(xyz) - # The expected number of nonzero eigenvalues. - # Expect = self.expected_dof() - # if G.shape[0] < Expect: - # # Return a very large number if the G-matrix dimension is actually smaller than expected. - # # This can happen if we lost a DoF at the stage of forming the DLCs. - # return 1e15 U, S, VT = np.linalg.svd(G) return S[0]/S[-1] From 225f85db08d12c1da65d05f60f21be0376159403 Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Thu, 21 Sep 2023 17:36:38 -0700 Subject: [PATCH 09/30] Improve newCartesian() performance --- geometric/internal.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/geometric/internal.py b/geometric/internal.py index 97c834f7..bf727d1c 100644 --- a/geometric/internal.py +++ b/geometric/internal.py @@ -2329,13 +2329,16 @@ def finish(microiter, rmsdt, ndqt, xyzsave, xyz_iter1): return xyzsave.flatten() fail_counter = 0 # Start iterations. + reuse_BG = False while True: microiter += 1 # Get generalized inverse of Wilson B-matrix - Bmat = self.wilsonB(xyz1) - Ginv = self.GInverse(xyz1) + if not reuse_BG: + Bmat = self.wilsonB(xyz1) + Ginv = self.GInverse(xyz1) + dxyz1 = multi_dot([Bmat.T,Ginv,dQ1.T]) # Get new Cartesian coordinates - dxyz = damp*multi_dot([Bmat.T,Ginv,dQ1.T]) + dxyz = damp*dxyz1 xyz2 = xyz1 + np.array(dxyz).flatten() rmsd = np.sqrt(np.mean((np.array(xyz2-xyz1).flatten())**2)) # The coordinates after the first step are saved no matter good or bad. @@ -2345,22 +2348,18 @@ def finish(microiter, rmsdt, ndqt, xyzsave, xyz_iter1): rmsdt = rmsd # Calculate the actual change in internal coordinates dQ_actual = self.calcDiff(xyz2, xyz1) - if hasattr(self, 'Prims'): - Prims = self.Prims - else: - Prims = self - dQ_Prims = Prims.calcDiff(xyz2, xyz1) # dQ1-dQ_actual is the remaining IC step needed to achieve convergence ndq = np.linalg.norm(dQ1-dQ_actual) if ndq > ndqt: # Bad result: |dQ1-dQ_actual| increases from previous iteration # Discard the IC step and retry with damping. - if verbose >= 2: logger.info(" newCartesian Iter: %i |dQ(Step)| = %.5e Err-dQ (Best) = %.5e (%.5e) RMSD: %.5e Damp: %.5e Gcond: %.5e (Bad)\n" - % (microiter, np.linalg.norm(dQ_actual), ndq, ndqt, rmsd, damp, self.GCond_Inverse)) + if verbose >= 2: logger.info(" newCartesian Iter: %i |dQ(Step)| = %.5e Err-dQ (Best) = %.5e (%.5e) RMSD: %.5e Damp: %s%.5e\x1b[0m Gcond: %.5e (Bad)\n" + % (microiter, np.linalg.norm(dQ_actual), ndq, ndqt, rmsd, "\x1b[91m" if damp < 0.1 else "", damp, self.GCond_Inverse)) damp /= 2 fail_counter += 1 xyz2 = xyz1.copy() dQ_actual *= 0 + reuse_BG = True else: if verbose >= 2: logger.info(" newCartesian Iter: %i |dQ(Step)| = %.5e Err-dQ (Best) = %.5e (%.5e) RMSD: %.5e Damp: %.5e Gcond: %.5e (Good)\n" % (microiter, np.linalg.norm(dQ_actual), ndq, ndqt, rmsd, damp, self.GCond_Inverse)) @@ -2369,6 +2368,7 @@ def finish(microiter, rmsdt, ndqt, xyzsave, xyz_iter1): rmsdt = rmsd ndqt = ndq xyzsave = xyz2.copy() + reuse_BG = False ndqs.append(ndq) rmsds.append(rmsd) # Check convergence / fail criteria @@ -2376,6 +2376,8 @@ def finish(microiter, rmsdt, ndqt, xyzsave, xyz_iter1): return finish(microiter, rmsdt, ndqt, xyzsave, xyz_iter1) if fail_counter >= 5: return finish(microiter, rmsdt, ndqt, xyzsave, xyz_iter1) + # if microiter == 10 and ndqt > 1e-1: + # return finish(microiter, rmsdt, ndqt, xyzsave, xyz_iter1) if microiter == 50: return finish(microiter, rmsdt, ndqt, xyzsave, xyz_iter1) # The next IC step required to reach convergence. From 72bc66cc4022a01dc11b85134d97a6a2ad8fd8c4 Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Thu, 21 Sep 2023 18:00:47 -0700 Subject: [PATCH 10/30] Bug fix - check if rigid is in (None, False) --- geometric/internal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/geometric/internal.py b/geometric/internal.py index bf727d1c..0f05897e 100644 --- a/geometric/internal.py +++ b/geometric/internal.py @@ -2442,7 +2442,7 @@ def __init__(self, molecule, connect=False, addcart=False, constraints=None, cva # connect = False, addcart = False corresponds to TRIC self.addcart = addcart self.connect_isolated = connect_isolated - if 'rigid' in kwargs and kwargs['rigid'] is not None: + if 'rigid' in kwargs and kwargs['rigid'] not in (None, False): raise RuntimeError('Do not use rigid molecules with PrimitiveInternalCoordinates') self.Internals = [] self.repulsions = repulsions @@ -4309,7 +4309,7 @@ def __init__(self, molecule, connect=False, addcart=False, **kwargs): self.cVals = [] if 'constraints' in kwargs and kwargs['constraints'] is not None: raise RuntimeError('Do not use constraints with Cartesian coordinates') - if 'rigid' in kwargs and kwargs['rigid'] is not None: + if 'rigid' in kwargs and kwargs['rigid'] not in (None, False): raise RuntimeError('Do not use rigid molecules with Cartesian coordinates') self.elem = molecule.elem self.ImageICs = [EmptyCoordinates(molecule[0], **kwargs)] From 5a68400071817b35f8d9a3d4b8a4da00cf49eb3d Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Thu, 21 Sep 2023 18:51:45 -0700 Subject: [PATCH 11/30] Fix a bug in ov(vi, vi) --- geometric/internal.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/geometric/internal.py b/geometric/internal.py index 0f05897e..58ceaa28 100644 --- a/geometric/internal.py +++ b/geometric/internal.py @@ -3906,7 +3906,11 @@ def build_dlc_1(self, xyz): # Perform Gram-Schmidt orthogonalization def ov(vi, vj): - return np.abs(multi_dot([vi, G, vj])) + answer = multi_dot([vi, G, vj]) + if (vi == vj).all(): + return max(0.0, answer) + else: + return answer if self.haveConstraints() or self.rigid: V = self.Vecs.copy() @@ -4006,7 +4010,12 @@ def remove_TR(self, xyz): # Carry out Gram-Schmidt orthogonalization # Define a function for computing overlap def ov(vi, vj): - return multi_dot([vi, G, vj]) + answer = multi_dot([vi, G, vj]) + if (vi == vj).all(): + return max(0.0, answer) + else: + return answer + V = self.Vecs.copy() nv = V.shape[1] Vnorms = np.array([np.sqrt(ov(V[:, ic], V[:, ic])) for ic in range(nv)]) From 83f32ed8d61455b443272e1f884afddcd51366ab Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Thu, 21 Sep 2023 18:52:09 -0700 Subject: [PATCH 12/30] Tweak two tests in test_openmm.py; still need one that breaks the IC --- geometric/tests/test_openmm.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/geometric/tests/test_openmm.py b/geometric/tests/test_openmm.py index 2d1f1d57..bb32bb57 100644 --- a/geometric/tests/test_openmm.py +++ b/geometric/tests/test_openmm.py @@ -19,8 +19,10 @@ def test_dlc_openmm_water3(localizer): Optimize the geometry of three water molecules using standard delocalized internal coordinates. The coordinate system will break down and have to be rebuilt. """ - progress = geometric.optimize.run_optimizer(engine='openmm', pdb=os.path.join(datad,'water3.pdb'), coordsys='dlc', input='tip3p.xml', - converge=['gmax', '1.0e-5'], check=50) + # LPW 2023-09-21 Increased check from 50 to 100; this was "perturbed" by the improvement to InternalCoordinates.newCartesian() . + progress = geometric.optimize.run_optimizer(engine='openmm', pdb=os.path.join(datad,'water3.pdb'), coordsys='dlc', input='tip3p.xml', + converge=['gmax', '1.0e-5'], check=100) + # The results here are in Angstrom # ref = np.array([[ 1.19172917, -1.71174316, 0.79961878], @@ -46,7 +48,8 @@ def test_dlc_openmm_water3(localizer): xdiff = (progress.xyzs[-1] - ref).flatten() rmsd, maxd = geometric.optimize.calc_drms_dmax(progress.xyzs[-1], ref, align=True) rmsd2, maxd2 = geometric.optimize.calc_drms_dmax(progress.xyzs[-1], ref2, align=True) - print("RMS / Max displacement from reference:", rmsd, maxd) + print("RMS / Max displacement from reference 1:", rmsd, maxd) + print("RMS / Max displacement from reference 2:", rmsd2, maxd2) # This test is a bit stochastic and doesn't converge to the same minimized geometry every time. # Check that the energy is 0.01 a.u. above reference. Not really the qm_energy, this is a misnomer assert progress.qm_energies[-1] < (e_ref + 0.01) @@ -62,14 +65,14 @@ def test_dlc_openmm_water12(localizer): The coordinate system is expected to break down and the optimizer should skip the optimization step after rebuilding the coordinate system. """ - progress = geometric.optimize.run_optimizer(engine='openmm', pdb=os.path.join(datad,'water12.pdb'), + progress = geometric.optimize.run_optimizer(engine='openmm', pdb=os.path.join(datad,'water12.pdb'), coordsys='dlc', input='tip3p.xml', maxiter=20, converge=['maxiter']) - - have_skip_step = False - for line in open('tip3p.log').readlines(): - if 'Skipping optimization step' in line: - have_skip_step = True - assert have_skip_step + # LPW 2023-09-21: The coordinate system no longer breaks down after the improvement to newCartesian(). + # have_skip_step = False + # for line in open('tip3p.log').readlines(): + # if 'Skipping optimization step' in line: + # have_skip_step = True + # assert have_skip_step @addons.using_openmm From cf358142311313622db4e1b566526d1ba9053d37 Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Fri, 29 Sep 2023 11:16:56 -0700 Subject: [PATCH 13/30] Change default behavior of EqualSpacing back to the linear interpolator --- geometric/interpolate.py | 10 +++++----- geometric/molecule.py | 15 ++++++++++----- geometric/nifty.py | 3 ++- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/geometric/interpolate.py b/geometric/interpolate.py index eea3f143..f0b36d41 100755 --- a/geometric/interpolate.py +++ b/geometric/interpolate.py @@ -741,10 +741,10 @@ def prealign_fragments(self): xyz0_stage = segment_reac[-1].copy() xyz1_stage = segment_prod[-1].copy() - M_reac_frames = EqualSpacing(M_reac, frames=self.n_frames//2+1, RMSD=False) - M_prod_frames = EqualSpacing(M_prod, frames=self.n_frames//2+1, RMSD=False) - M_reac_dx = EqualSpacing(M_reac, dx=0.1, RMSD=False) - M_prod_dx = EqualSpacing(M_prod, dx=0.1, RMSD=False) + M_reac_frames = EqualSpacing(M_reac, frames=self.n_frames//2+1, RMSD=False, spline=True) + M_prod_frames = EqualSpacing(M_prod, frames=self.n_frames//2+1, RMSD=False, spline=True) + M_reac_dx = EqualSpacing(M_reac, dx=0.1, RMSD=False, spline=True) + M_prod_dx = EqualSpacing(M_prod, dx=0.1, RMSD=False, spline=True) # Keep the shorter of the dmax=0.1 or n_frames//2 segments M_reac = M_reac_dx if len(M_reac_dx) < len(M_reac_frames) else M_reac_frames M_prod = M_prod_dx if len(M_prod_dx) < len(M_prod_frames) else M_prod_frames @@ -1106,7 +1106,7 @@ def splice_iterations(self): M_append = M_reac[:-1] + M + M_prod[1:] if len(M_append) != len(M): M_append.align() - M = EqualSpacing(M_append, frames=len(M), RMSD=False) + M = EqualSpacing(M_append, frames=len(M), RMSD=False, spline=True) else: if len(M_reac) > 1: M = M_reac[0] + M diff --git a/geometric/molecule.py b/geometric/molecule.py index 661aca11..077cd295 100644 --- a/geometric/molecule.py +++ b/geometric/molecule.py @@ -905,15 +905,18 @@ def arc(Mol, begin=None, end=None, RMSD=True, align=True): if RMSD: Arc = Mol.pathwise_rmsd(align) else: + # The commented line isn't desirable because sometimes it will give zero elements if Mol.xyzs[i+1] and Mol.xyzs[i] are the same, + # causing downstream codes such as EqualSpacing() to fail. + # Arc = [np.max([np.linalg.norm(Mol.xyzs[i+1][j]-Mol.xyzs[i][j]) for j in range(Mol.na)]) for i in range(begin, end-1)]) Arc = [] for i in range(begin, end-1): dmax = np.max([np.linalg.norm(Mol.xyzs[i+1][j]-Mol.xyzs[i][j]) for j in range(Mol.na)]) if dmax < 1e-15: dmax = 1e-15 Arc.append(dmax) - Arc = np.array(Arc) #[np.max([np.linalg.norm(Mol.xyzs[i+1][j]-Mol.xyzs[i][j]) for j in range(Mol.na)]) for i in range(begin, end-1)]) + Arc = np.array(Arc) return Arc -def EqualSpacing(Mol, frames=0, dx=0, RMSD=True, align=True): +def EqualSpacing(Mol, frames=0, dx=0, RMSD=True, align=True, spline=False): """ Equalize the spacing of frames in a trajectory with linear interpolation. This is done in a very simple way, first calculating the arc length @@ -954,9 +957,11 @@ def EqualSpacing(Mol, frames=0, dx=0, RMSD=True, align=True): xyznew = np.zeros((frames, Mol.na, 3)) for a in range(Mol.na): for i in range(3): - cs = scipy.interpolate.CubicSpline(ArcMolCumul, xyzold[:, a, i]) - xyznew[:,a,i] = cs(ArcMolEqual) - # xyznew[:,a,i] = np.interp(ArcMolEqual, ArcMolCumul, xyzold[:, a, i]) + if spline: + cs = scipy.interpolate.CubicSpline(ArcMolCumul, xyzold[:, a, i]) + xyznew[:,a,i] = cs(ArcMolEqual) + else: + xyznew[:,a,i] = np.interp(ArcMolEqual, ArcMolCumul, xyzold[:, a, i]) if len(xyzold) == len(xyznew): Mol1 = copy.deepcopy(Mol) else: diff --git a/geometric/nifty.py b/geometric/nifty.py index 4d7d4203..1181cbab 100644 --- a/geometric/nifty.py +++ b/geometric/nifty.py @@ -840,10 +840,11 @@ def destroyWorkQueue(): # Convenience function to destroy the Work Queue objects. global WORK_QUEUE, WQIDS if hasattr(WORK_QUEUE, '_free'): - print("Freeing Work Queue") WORK_QUEUE._free() WORK_QUEUE = None WQIDS = defaultdict(list) + print("Freed Work Queue, sleeping for 5 seconds...") + time.sleep(5) def queue_up(wq, command, input_files, output_files, tag=None, tgt=None, verbose=True, print_time=60): """ From 952a6bec4c0c8b133230c01e27454b2d5a732541 Mon Sep 17 00:00:00 2001 From: Heejune Park Date: Fri, 29 Sep 2023 11:50:33 -0700 Subject: [PATCH 14/30] Interpolate parameter bug fixed --- geometric/params.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geometric/params.py b/geometric/params.py index d46ad8cd..e1232a3f 100644 --- a/geometric/params.py +++ b/geometric/params.py @@ -265,7 +265,7 @@ def __init__(self, **kwargs): self.prealign = False else: # Number of frames that will be used for the interpolation. - self.nframes = kwargs.get('nframes', False) + self.nframes = kwargs.get('nframes', 50) # Whether we want to align the molecules in reactant and product frames. self.prealign = kwargs.get('prealign', False) # Whether we want to align the initial interpolate trajectory before optimization. From cfba8c50a5a008c429690bd5ec6b100e8390fc30 Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Tue, 3 Oct 2023 14:24:19 -0700 Subject: [PATCH 15/30] Implement improved dihedral angle syncing --- geometric/internal.py | 164 ++++++++++++++++++++++++++------------- geometric/interpolate.py | 50 +++++++----- geometric/params.py | 2 + 3 files changed, 146 insertions(+), 70 deletions(-) diff --git a/geometric/internal.py b/geometric/internal.py index 58ceaa28..61a18d72 100644 --- a/geometric/internal.py +++ b/geometric/internal.py @@ -1,7 +1,7 @@ """ internal.py: Internal coordinate systems -Copyright 2016-2020 Regents of the University of California and the Authors +Copyright 2016-2023 Regents of the University of California and the Authors Authors: Lee-Ping Wang, Chenchen Song @@ -2034,8 +2034,9 @@ def convert_angstroms_degrees(prims, values): CacheWarning = False class InternalCoordinates(object): - def __init__(self): + def __init__(self, verbose=0): self.stored_wilsonB = OrderedDict() + self.verbose = verbose def addConstraint(self, cPrim, cVal): raise NotImplementedError("Constraints not supported with Cartesian coordinates") @@ -2434,8 +2435,8 @@ def rigid(self, val): self._rigid = val class PrimitiveInternalCoordinates(InternalCoordinates): - def __init__(self, molecule, connect=False, addcart=False, constraints=None, cvals=None, repulsions=[], transfers=[], connect_isolated=True, **kwargs): - super(PrimitiveInternalCoordinates, self).__init__() + def __init__(self, molecule, connect=False, addcart=False, constraints=None, cvals=None, repulsions=[], transfers=[], connect_isolated=True, verbose=0, **kwargs): + super(PrimitiveInternalCoordinates, self).__init__(verbose=verbose) # connect = True corresponds to "traditional" internal coordinates with minimum spanning bonds self.connect = connect # connect = False, addcart = True corresponds to HDLC @@ -2453,7 +2454,6 @@ def __init__(self, molecule, connect=False, addcart=False, constraints=None, cva self.transfers = transfers self.cPrims = [] self.cVals = [] - self.sync = [] self.Rotators = OrderedDict() self.elem = molecule.elem # List of fragments as determined by residue ID, distance criteria or bond order @@ -3122,50 +3122,117 @@ def second_derivatives(self, xyz): # 5) 3 return np.array(answer) - def calcDiff(self, xyz1, xyz2, sync=0): + def calcDiff(self, xyz1, xyz2, sync=False): """ Calculate difference in internal coordinates (coord1-coord2), accounting for changes in 2*pi of angles. """ answer = [] for i, Internal in enumerate(self.Internals): diff = Internal.calcDiff(xyz1, xyz2) - if sync != 0 and i in self.sync: - # Sometimes, e.g. during interpolation, multiple dihedrals can change in opposite directions - # resulting in clashes. By turning on the sync flag we ensure the change is always in the same direction. - if sync == 1 and diff < -np.pi/2: - # print("dihedral-sync %i: %50s %.3f -> %.3f" % (sync, Internal, diff, diff+2*np.pi)) - diff += 2*np.pi - elif sync == -1 and diff > np.pi/2: - # print("dihedral-sync %i: %50s %.3f -> %.3f" % (sync, Internal, diff, diff-2*np.pi)) - diff -= 2*np.pi - elif sync not in [-1, 1]: - raise RuntimeError("Invalid value of sync") answer.append(diff) - return np.array(answer) + answer = np.array(answer) + if sync: + answer = self.syncDihedrals(xyz1, xyz2, answer) + return answer + + def syncDihedrals(self, xyz1, xyz2, primdiff, distthre=1.0, linthre=0.95, diffthre=np.pi/3): + rotors = {} + val1 = self.calculate(xyz1) - def syncDihedrals(self, xyz1, xyz2): - answer = self.calcDiff(xyz1, xyz2) - dih_by_trip = {} for i, Prim in enumerate(self.Internals): if type(Prim) is Dihedral: - dih_by_trip.setdefault((Prim.a, Prim.b, Prim.c), []).append(i) - dih_by_trip.setdefault((Prim.d, Prim.c, Prim.b), []).append(i) - for (a, b, c), dihIdx in dih_by_trip.items(): - # Being conservative here; only sync dihedrals where a-b-c are bonded and angles are not linear. - if np.abs(Distance(a, b).calcDiff(xyz2, xyz1)) > 0.5: continue - if np.abs(Distance(b, c).calcDiff(xyz2, xyz1)) > 0.5: continue - if np.abs(np.cos(Angle(a, b, c).value(xyz1))) > 0.95: continue - if np.abs(np.cos(Angle(a, b, c).value(xyz2))) > 0.95: continue - diffs = np.array([answer[i] for i in dihIdx]) - if all(np.abs(diffs) > np.pi/2) and len(np.unique(np.sign(diffs))) > 1: - for i in dihIdx: - self.sync.append(i) - # print("Turning on dihedral sync for", self.Internals[i]) - # self.Internals[i].sync = True - # if answer[i] < 0: - # answer[i] += 2*np.pi - # print("Dihedrals with center bond %s:" % str(bond)) - # for i in dihIdx: - # print("%50s %.3f" % (self.Internals[i], answer[i])) - + if Prim.b < Prim.c: + a, b, c, d = (Prim.a, Prim.b, Prim.c, Prim.d) + else: + a, b, c, d = (Prim.d, Prim.c, Prim.b, Prim.a) + if np.abs(Distance(a, b).calcDiff(xyz2, xyz1)) > distthre: continue + if np.abs(Distance(b, c).calcDiff(xyz2, xyz1)) > distthre: continue + if np.abs(Distance(c, d).calcDiff(xyz2, xyz1)) > distthre: continue + if np.abs(np.cos(Angle(a, b, c).value(xyz1))) > linthre: continue + if np.abs(np.cos(Angle(a, b, c).value(xyz2))) > linthre: continue + if np.abs(np.cos(Angle(b, c, d).value(xyz1))) > linthre: continue + if np.abs(np.cos(Angle(b, c, d).value(xyz2))) > linthre: continue + rotors.setdefault((b, c), []).append(i) + + newdiff = primdiff.copy() + newdiffAlt = primdiff.copy() + + def dihedral_clash(dih_by_abc, dih_by_bcd, diff_): + clashPrims = [] + for dih_by_trip in (dih_by_abc, dih_by_bcd): + for iEnd, tPrims in dih_by_trip.items(): + # print("Dihedral angles pairs with %i, %i central bond and %i end atom:" % (b+1, c+1, iEnd+1)) + for i in range(len(tPrims)): + for j in range(i): + iPrim = tPrims[i] + jPrim = tPrims[j] + iDih = self.Internals[iPrim] + jDih = self.Internals[jPrim] + A = iDih.value(xyz1) + B = A + diff_[iPrim] + C = jDih.value(xyz1) + D = C + diff_[jPrim] + xzero = (C-A)/(B+C-D-A) + xplus = (C-A+2*np.pi)/(B+C-D-A) + xminus = (C-A-2*np.pi)/(B+C-D-A) + xarray = np.array([xzero, xplus, xminus]) + if ((xarray<1)*(xarray>0)).any(): + # print("\x1b[1;91mWarning:\x1b[0m %20s - %-20s may clash" % (self.Internals[iPrim], self.Internals[jPrim]), end=' ') + if self.verbose >= 2: + print("%20s - %-20s may clash" % (self.Internals[iPrim], self.Internals[jPrim]), end=' ') + print("% 10.5f -> % 10.5f ; % 10.5f -> % 10.5f" % (A*180/np.pi, B*180/np.pi, C*180/np.pi, D*180/np.pi)) + # print("Crossing points: % 10.5f % 10.5f % 10.5f" % (xzero, xplus, xminus)) + clashPrims.append((iDih, jDih)) + return clashPrims + + for (b, c), iPrims in rotors.items(): + if len(iPrims) < 2: continue + dih_by_abc = {} + dih_by_bcd = {} + for iPrim in iPrims: + Prim = self.Internals[iPrim] + if Prim.b < Prim.c: + a, b, c, d = (Prim.a, Prim.b, Prim.c, Prim.d) + else: + a, b, c, d = (Prim.d, Prim.c, Prim.b, Prim.a) + dih_by_abc.setdefault(a, []).append(iPrim) + dih_by_bcd.setdefault(d, []).append(iPrim) + # The "unchanged" delta(dihedral angles) that come from calcDiff + diffzero = primdiff[iPrims] + # Modified dihedral angle deltas where deltas larger than a threshold are forced to be all positive + diffplus = primdiff[iPrims] + 2*np.pi*(primdiff[iPrims]<0) * (np.abs(primdiff[iPrims])>diffthre) + # Modified dihedral angle deltas where deltas larger than a threshold are forced to be all negative + diffminus = primdiff[iPrims] - 2*np.pi*(primdiff[iPrims]>0) * (np.abs(primdiff[iPrims])>diffthre) + # Choose the set of deltas that has a smaller overall rotation. + # The "alternatives" correspond to rotating the other way. + print_changes = False + if np.allclose(diffzero, diffplus, atol=1e-6, rtol=0) or np.allclose(diffzero, diffminus, atol=1e-6, rtol=0): + print_changes = False + elif np.abs(np.mean(diffplus)) < np.abs(np.mean(diffminus)): + newdiff[iPrims] = diffplus + newdiffAlt[iPrims] = diffminus + print_changes = True + elif np.abs(np.mean(diffminus)) < np.abs(np.mean(diffplus)): + newdiff[iPrims] = diffminus + newdiffAlt[iPrims] = diffplus + print_changes = True + # Detect if any dihedral pairs that share 3 atoms will clash (i.e. have the same value) somewhere along the pathway + clash = dihedral_clash(dih_by_abc, dih_by_bcd, newdiff) + clash_alt = dihedral_clash(dih_by_abc, dih_by_bcd, newdiffAlt) + # If the clash is removed in the alternate direction, then go in the alternate direction. + # Otherwise the clash is unavoidable (think swapping two H's bonded to the same C in cyclopropane). + if clash: + if clash_alt: + if self.verbose >= 2: print("Warning: Unavoidable clash in %i dihedral angles." % len(clash)) + else: + if self.verbose >= 2: print("Warning: Dihedral clash in one direction, reversing.") + newdiff = newdiffAlt.copy() + clash = clash_alt + if print_changes: + print("Dihedral sign change detected around bond %i-%i %s" % (b+1, c+1, "with %i clashes" % len(clash) if len(clash) > 0 else "")) + for iPrim in iPrims: + Prim = self.Internals[iPrim] + if self.verbose >= 2: print("%-20s init final diff -> newdiff = % 10.5f % 10.5f % 10.5f -> % 10.5f" % (Prim, Prim.value(xyz1)*180/np.pi, Prim.value(xyz2)*180/np.pi, primdiff[iPrim]*180/np.pi, newdiff[iPrim]*180/np.pi)) + return newdiff + def GInverse(self, xyz): return self.GInverse_SVD(xyz) @@ -3215,19 +3282,15 @@ def addConstraint(self, cPrim, cVal=None, xyz=None): def reorderPrimitives(self): # Reorder primitives to be in line with cc's code newPrims = [] - newSync = [] for cPrim in self.cPrims: newPrims.append(cPrim) for typ in [Distance, DistanceDifference, ReducedDistance, Angle, LinearAngle, MultiAngle, OutOfPlane, Dihedral, MultiDihedral, CartesianX, CartesianY, CartesianZ, TranslationX, TranslationY, TranslationZ, RotationA, RotationB, RotationC]: for i, p in enumerate(self.Internals): if type(p) is typ and p not in self.cPrims: newPrims.append(p) - if i in self.sync: - newSync.append(len(newPrims)-1) if len(newPrims) != len(self.Internals): raise RuntimeError("Not all internal coordinates have been accounted for. You may need to add something to reorderPrimitives()") self.Internals = newPrims - self.sync = newSync def getConstraints_from(self, other): if other.haveConstraints(): @@ -3418,8 +3481,8 @@ def covalent(a, b): class DelocalizedInternalCoordinates(InternalCoordinates): - def __init__(self, molecule, imagenr=0, build=False, connect=False, addcart=False, constraints=None, cvals=None, rigid=False, remove_tr=False, cart_only=False, conmethod=0, repulsions=[], transfers=[], connect_isolated=True): - super(DelocalizedInternalCoordinates, self).__init__() + def __init__(self, molecule, imagenr=0, build=False, connect=False, addcart=False, constraints=None, cvals=None, rigid=False, remove_tr=False, cart_only=False, conmethod=0, repulsions=[], transfers=[], connect_isolated=True, verbose=0): + super(DelocalizedInternalCoordinates, self).__init__(verbose=verbose) # cart_only is just because of how I set up the class structure. if cart_only: return # Set the algorithm for constraint satisfaction. @@ -4106,9 +4169,6 @@ def calcDiff(self, coord1, coord2, sync=False): Answer = np.dot(PMDiff, self.Vecs) return np.array(Answer).flatten() - def syncDihedrals(self, xyz1, xyz2): - self.Prims.syncDihedrals(xyz1, xyz2) - def calculate(self, coords): """ Calculate the DLCs given the Cartesian coordinates. """ PrimVals = self.Prims.calculate(coords) @@ -4172,8 +4232,8 @@ class CartesianCoordinates(PrimitiveInternalCoordinates): This one does not support constraints, because that requires adding some primitive internal coordinates. """ - def __init__(self, molecule, **kwargs): - super(CartesianCoordinates, self).__init__(molecule) + def __init__(self, molecule, verbose=0, **kwargs): + super(CartesianCoordinates, self).__init__(molecule, verbose=verbose) self.Internals = [] self.cPrims = [] self.cVals = [] diff --git a/geometric/interpolate.py b/geometric/interpolate.py index f0b36d41..b76d67c2 100755 --- a/geometric/interpolate.py +++ b/geometric/interpolate.py @@ -35,6 +35,11 @@ def __init__(self, stream = sys.stdout): handler = RawStreamHandler() logger.addHandler(handler) +def write_coord_segment(M, coord_segment, output_filename): + M_copy = deepcopy(M) + M_copy.xyzs = [x.reshape(-1, 3)*bohr2ang for x in coord_segment] + M_copy.write(output_filename) + # Need to refactor this so that it uses the minimum of the equilibrium bond length and the sum of covalent radii. def get_rab(M, i, j, bohr=True): if bohr: @@ -404,23 +409,20 @@ def dq_scale_prims(IC, dest_coords, curr_coords, scale_factors={}, sync=0): dq = np.dot(dqPrims, IC.Vecs) return dq -def interpolate_segment(IC, curr_coords, dest_coords, nDiv, backward=False, rebuild_dlc=False, err_thre=1e-2): +def interpolate_segment(IC, curr_coords, dest_coords, nDiv, backward=False, rebuild_dlc=False, err_thre=1e-2, verbose=0): if backward: tmp = curr_coords.copy() curr_coords = dest_coords.copy() dest_coords = tmp.copy() - sync = -1 - else: - sync = 1 - dq = IC.calcDiff(dest_coords, curr_coords, sync=sync) + dq = IC.calcDiff(dest_coords, curr_coords, sync=1) coord_segment = [curr_coords] for k in range(nDiv): if rebuild_dlc: IC.build_dlc(curr_coords) - dq = IC.calcDiff(dest_coords, curr_coords, sync=sync) - new_coords = IC.newCartesian(curr_coords, dq/(nDiv-k), verbose=0) + dq = IC.calcDiff(dest_coords, curr_coords, sync=1) + new_coords = IC.newCartesian(curr_coords, dq/(nDiv-k), verbose=verbose) else: - new_coords = IC.newCartesian(curr_coords, dq/nDiv, verbose=0) + new_coords = IC.newCartesian(curr_coords, dq/nDiv, verbose=verbose) coord_segment.append(new_coords) curr_coords = new_coords.copy() _, endpt_err = calc_drms_dmax(curr_coords, dest_coords, align=True) @@ -494,7 +496,7 @@ def print_map(mtx, title, colorscheme=0): print() class Interpolator(object): - def __init__(self, M_in, n_frames = 50, use_midframes = False, align_system=False, do_prealign=False): + def __init__(self, M_in, n_frames = 50, use_midframes = False, align_system=False, do_prealign=False, verbose=0): # The input molecule; it should not be modified. self.M_in = deepcopy(M_in) # Check the length of the input molecule @@ -549,6 +551,7 @@ def __init__(self, M_in, n_frames = 50, use_midframes = False, align_system=Fals self.atom_pairs = [list(pair) for pair in atom_pairs.copy()] # The smaller of the distances at either endpoint for each pair self.min_enddists = np.min(np.array(self.enddists), axis=0) + self.verbose = verbose def get_splice_length(self): # Determine the splice length @@ -721,8 +724,6 @@ def prealign_fragments(self): xyz1_stage = xyz1.copy() IC0 = DelocalizedInternalCoordinates(M0, build=True, connect=False, addcart=False, connect_isolated=False) IC1 = DelocalizedInternalCoordinates(M1, build=True, connect=False, addcart=False, connect_isolated=False) - IC0.syncDihedrals(xyz0, xyz1) - IC1.syncDihedrals(xyz0, xyz1) self.endICs = [IC0, IC1] M_reac = None M_prod = None @@ -845,7 +846,6 @@ def initial_guess_one_method(self, method = 0): # Finalize the IC system. IC.Prims.Internals = newPrims IC.Prims.reorderPrimitives() - IC.Prims.syncDihedrals(xyz1, xyz0) IC.build_dlc(xyz_IC) # print("=== Primitives for method %i ===" % method) # print(IC.Prims) @@ -997,7 +997,6 @@ def build_IC_segments(self): for i in range(len(M)): M_i = M[i] IC = DelocalizedInternalCoordinates(M_i, build=True, connect=False, addcart=False, transfers=transfers, connect_isolated=False) - IC.Prims.syncDihedrals(xyzs[-1], xyzs[0]) IC.build_dlc(xyzs[i]) ICs.append(IC) @@ -1057,11 +1056,27 @@ def splice_iterations(self): nDiv = j-i success = False for c, (iic, IC) in enumerate(segment_to_ICs[a]): + if self.verbose: + print("In splice_iterations: c = %i, iic = %i, IC = %s" % (c, iic, IC.__repr__())) + if self.verbose >= 2: + vali = IC.Prims.calculate(xyzi) + valj = IC.Prims.calculate(xyzj) + primDiff = IC.Prims.calcDiff(xyzj, xyzi, sync=1) + for iPrim in range(len(IC.Prims.Internals)): + print("%3i %25s % 9.5f % 9.5f % 9.5f" % (iPrim, IC.Prims.Internals[iPrim], vali[iPrim], valj[iPrim], primDiff[iPrim])) attempt = 0 - coord_segment, endpt_err, success = interpolate_segment(IC, xyzi, xyzj, nDiv, backward=backwards[a][c], rebuild_dlc=False) + coord_segment, endpt_err, success = interpolate_segment(IC, xyzi, xyzj, nDiv, backward=backwards[a][c], rebuild_dlc=False, verbose=self.verbose) + if self.verbose: + print("forward direction: endpt_err = %8.3f success = %i" % (endpt_err, success)) + if self.verbose >= 2: + write_coord_segment(M, coord_segment, "cycle%i_segment%i_IC%i_attempt%i.xyz" % (cycle, a, c, attempt)) if success: break attempt = 1 - coord_segment, endpt_err, success = interpolate_segment(IC, xyzi, xyzj, nDiv, backward=not backwards[a][c], rebuild_dlc=False) + coord_segment, endpt_err, success = interpolate_segment(IC, xyzi, xyzj, nDiv, backward=not backwards[a][c], rebuild_dlc=False, verbose=self.verbose) + if self.verbose: + print("backward direction: endpt_err = %8.3f success = %i" % (endpt_err, success)) + if self.verbose >= 2: + write_coord_segment(M, coord_segment, "cycle%i_segment%i_IC%i_attempt%i.xyz" % (cycle, a, c, attempt)) if success: backwards[a][c] = not backwards[a][c] break @@ -1144,7 +1159,6 @@ def splice_iterations(self): new_repulsions = sorted(list(set(IC.Prims.repulsions).union(set(clash_pairs)))) IC1 = DelocalizedInternalCoordinates(M_in[iic], build=True, connect=False, addcart=False, repulsions=new_repulsions, transfers=IC.Prims.transfers, connect_isolated=False) - IC1.syncDihedrals(xyz1, xyz0) IC1.build_dlc(M_in.xyzs[iic].flatten()*ang2bohr) rebuilt_segment_to_ICs.append((iic, IC1)) segment_to_ICs[a] = rebuilt_segment_to_ICs @@ -1183,9 +1197,9 @@ def run_workflow(self): def main(): args = parse_interpolate_args(sys.argv[1:]) params = InterpParams(**args) - M0 = Molecule(sys.argv[1]) + M0 = Molecule(args['input']) #interpolator = Interpolator(M0, use_midframes=True, n_frames=0, align_system=True, do_prealign=False) - interpolator = Interpolator(M0, n_frames= params.nframes, use_midframes=params.optimize, align_system=params.align, do_prealign=params.prealign) + interpolator = Interpolator(M0, n_frames= params.nframes, use_midframes=params.optimize, align_system=params.align, do_prealign=params.prealign, verbose=params.verbose) interpolator.run_workflow() if __name__ == "__main__": diff --git a/geometric/params.py b/geometric/params.py index f599631f..d318397b 100644 --- a/geometric/params.py +++ b/geometric/params.py @@ -570,4 +570,6 @@ def parse_interpolate_args(*args): if v is not None: args_dict[k] = v + print("LPW debug:", args_dict) + return args_dict From 09d48a96a3b937e7f681a9df7002bddd34ca2063 Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Tue, 3 Oct 2023 17:29:16 -0700 Subject: [PATCH 16/30] Enable warning messages for dihedral angle clash --- geometric/internal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/geometric/internal.py b/geometric/internal.py index 61a18d72..37a58c73 100644 --- a/geometric/internal.py +++ b/geometric/internal.py @@ -3221,9 +3221,9 @@ def dihedral_clash(dih_by_abc, dih_by_bcd, diff_): # Otherwise the clash is unavoidable (think swapping two H's bonded to the same C in cyclopropane). if clash: if clash_alt: - if self.verbose >= 2: print("Warning: Unavoidable clash in %i dihedral angles." % len(clash)) + print("Warning: Unavoidable clash in %i dihedral angles." % len(clash)) else: - if self.verbose >= 2: print("Warning: Dihedral clash in one direction, reversing.") + print("Warning: Dihedral clash in one direction, reversing.") newdiff = newdiffAlt.copy() clash = clash_alt if print_changes: From 0a7326d21567f9c694488e330ac547c735a59d1f Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Sun, 8 Oct 2023 11:28:02 -0700 Subject: [PATCH 17/30] Default align reac/prod; split large dihedral angle changes --- geometric/internal.py | 10 ++++--- geometric/interpolate.py | 60 ++++++++++++++++++++++++++++++++++------ geometric/molecule.py | 11 ++++++-- geometric/params.py | 2 +- 4 files changed, 68 insertions(+), 15 deletions(-) diff --git a/geometric/internal.py b/geometric/internal.py index 37a58c73..077a98c6 100644 --- a/geometric/internal.py +++ b/geometric/internal.py @@ -2803,7 +2803,7 @@ def makePrimitives(self, molecule, connect, addcart): aline.append(az) if atom_lines == atom_lines0: break atom_lines_uniq = [] - for i in atom_lines: # + for i in atom_lines: if tuple(i) not in set(atom_lines_uniq): atom_lines_uniq.append(tuple(i)) lthree = [l for l in atom_lines_uniq if len(l) > 2] @@ -3221,13 +3221,15 @@ def dihedral_clash(dih_by_abc, dih_by_bcd, diff_): # Otherwise the clash is unavoidable (think swapping two H's bonded to the same C in cyclopropane). if clash: if clash_alt: - print("Warning: Unavoidable clash in %i dihedral angles." % len(clash)) + if not hasattr(self, 'dihedral_warning_printed'): + print("Warning: Unavoidable clash in %i dihedral angles." % len(clash)) + self.dihedral_warning_printed = True else: - print("Warning: Dihedral clash in one direction, reversing.") + if self.verbose: print("Warning: Dihedral clash in one direction, reversing.") newdiff = newdiffAlt.copy() clash = clash_alt if print_changes: - print("Dihedral sign change detected around bond %i-%i %s" % (b+1, c+1, "with %i clashes" % len(clash) if len(clash) > 0 else "")) + if self.verbose: print("Dihedral sign change detected around bond %i-%i %s" % (b+1, c+1, "with %i clashes" % len(clash) if len(clash) > 0 else "")) for iPrim in iPrims: Prim = self.Internals[iPrim] if self.verbose >= 2: print("%-20s init final diff -> newdiff = % 10.5f % 10.5f % 10.5f -> % 10.5f" % (Prim, Prim.value(xyz1)*180/np.pi, Prim.value(xyz2)*180/np.pi, primdiff[iPrim]*180/np.pi, newdiff[iPrim]*180/np.pi)) diff --git a/geometric/interpolate.py b/geometric/interpolate.py index b76d67c2..3854368d 100755 --- a/geometric/interpolate.py +++ b/geometric/interpolate.py @@ -409,17 +409,17 @@ def dq_scale_prims(IC, dest_coords, curr_coords, scale_factors={}, sync=0): dq = np.dot(dqPrims, IC.Vecs) return dq -def interpolate_segment(IC, curr_coords, dest_coords, nDiv, backward=False, rebuild_dlc=False, err_thre=1e-2, verbose=0): +def interpolate_segment(IC, curr_coords, dest_coords, nDiv, backward=False, rebuild_dlc=False, err_thre=1e-2, verbose=0, sync=0): if backward: tmp = curr_coords.copy() curr_coords = dest_coords.copy() dest_coords = tmp.copy() - dq = IC.calcDiff(dest_coords, curr_coords, sync=1) + dq = IC.calcDiff(dest_coords, curr_coords, sync=sync) coord_segment = [curr_coords] for k in range(nDiv): if rebuild_dlc: IC.build_dlc(curr_coords) - dq = IC.calcDiff(dest_coords, curr_coords, sync=1) + dq = IC.calcDiff(dest_coords, curr_coords, sync=sync) new_coords = IC.newCartesian(curr_coords, dq/(nDiv-k), verbose=verbose) else: new_coords = IC.newCartesian(curr_coords, dq/nDiv, verbose=verbose) @@ -870,18 +870,26 @@ def initial_guess_one_method(self, method = 0): clash_pairs = [] nDiv = n_frames - 1 for clash_round in range(5): - coord_segment, endpt_err, _ = interpolate_segment(IC, xyz0, xyz1, nDiv, backward=(method%2==1), rebuild_dlc=True) + coord_segment, endpt_err, _ = interpolate_segment(IC, xyz0, xyz1, nDiv, backward=(method%2==1), rebuild_dlc=True, sync=1) dq_segment = np.array([IC_Chk.calcDiff(coord_segment[i], coord_segment[i+1]) for i in range(n_frames-1)]) + # Don't evaluate dq_segment for primitives that have a diagnostic of 2 or higher because the result will be invalid. + prim_diag = np.max([[Prim.diagnostic(coord_segment[i])[0] for Prim in IC_Chk.Internals] for i in range(n_frames)], axis=0) # Setting to -1e-6 essentially ignores the checking primitives if they evaluate to +/- inf. + dq_segment[:, prim_diag >= 2] = -1e-6 dq_segment[dq_segment == -np.inf] = -1e-6 dq_segment[dq_segment == np.inf] = 1e-6 dq_max = np.max(np.abs(dq_segment), axis=0) dq_med = np.median(np.abs(dq_segment), axis=0) dq_med[dq_med<0.01] = 0.01 dq_ratio = max(dq_max/dq_med) - + M.xyzs = [x.reshape(-1, 3)*bohr2ang for x in coord_segment] M.comms = ['Interpolated frame %i' % k for k in range(len(coord_segment))] + + # Perform equal spacing of the pathway according to the largest change in any primitive coordinate + dq_relative = dq_segment / dq_med[np.newaxis, :] + M = EqualSpacing(M, RMSD=False, align=False, custom=np.max(dq_relative, axis=1)) + # Fail if the endpt_err is greater than the threshold if endpt_err > err_thre: M.align() @@ -1010,6 +1018,42 @@ def build_IC_segments(self): print_map(diag_matrix, "IC diagnostic map", colorscheme=0) segments = find_path(diag_matrix) + + # For each point k on the path, compute the maximum dihedral angle change for i ... k and k ... j + # And if the dihedral angle change i ... j is greater than the threshold, split at k + # Repeat these steps until there are no dihedral angle changes larger than the threshold + # in the intervals i ... k1 ... k2 ... j . + while True: + splitted = False + new_segments = [] + for isegment, (i, j) in enumerate(segments): + dih_ij = [] + for k in range(i, j+1): + for Prim in ICs[k].Prims.Internals: + if type(Prim) is Dihedral and Prim not in dih_ij: + dih_ij.append(Prim) + if len(dih_ij) == 0: + new_segments.append((i, j)) + continue + dihArcs = [[0.0 for Prim in dih_ij]] + for k in range(i, j-1): + dihArcs.append([np.abs(Prim.calcDiff(xyzs[k+1], xyzs[k]) * (Prim.diagnostic(xyzs[k+1])[0] < 2)) for Prim in dih_ij]) + dihArcs = np.cumsum(np.array(dihArcs), axis=0) + dMax = np.max(np.abs(dihArcs)) + dThre = 5*np.pi/6 + if dMax > dThre: + dSplits = [] + for k in range(j-i): + dSplits.append(max(np.max(dihArcs[k]), np.max(dihArcs[-1]-dihArcs[k]))) + split_k = i + np.argmin(dSplits) + print("maximum dihedral difference between %i-%i is %.3f; splitting at %i (reduce to %.3f)" % (i, j, dMax, split_k, np.min(dSplits))) + new_segments.append((i, split_k)) + new_segments.append((split_k+1, j)) + else: + new_segments.append((i, j)) + if segments == new_segments: break + segments = new_segments[:] + segments_filled, segments_spliced = fill_splice_segments(segments, splice_length) print("The frame numbers of the spliced segments are:") print(segments_spliced) @@ -1061,18 +1105,18 @@ def splice_iterations(self): if self.verbose >= 2: vali = IC.Prims.calculate(xyzi) valj = IC.Prims.calculate(xyzj) - primDiff = IC.Prims.calcDiff(xyzj, xyzi, sync=1) + primDiff = IC.Prims.calcDiff(xyzj, xyzi, sync=0) for iPrim in range(len(IC.Prims.Internals)): print("%3i %25s % 9.5f % 9.5f % 9.5f" % (iPrim, IC.Prims.Internals[iPrim], vali[iPrim], valj[iPrim], primDiff[iPrim])) attempt = 0 - coord_segment, endpt_err, success = interpolate_segment(IC, xyzi, xyzj, nDiv, backward=backwards[a][c], rebuild_dlc=False, verbose=self.verbose) + coord_segment, endpt_err, success = interpolate_segment(IC, xyzi, xyzj, nDiv, backward=backwards[a][c], rebuild_dlc=False, verbose=self.verbose, sync=0) if self.verbose: print("forward direction: endpt_err = %8.3f success = %i" % (endpt_err, success)) if self.verbose >= 2: write_coord_segment(M, coord_segment, "cycle%i_segment%i_IC%i_attempt%i.xyz" % (cycle, a, c, attempt)) if success: break attempt = 1 - coord_segment, endpt_err, success = interpolate_segment(IC, xyzi, xyzj, nDiv, backward=not backwards[a][c], rebuild_dlc=False, verbose=self.verbose) + coord_segment, endpt_err, success = interpolate_segment(IC, xyzi, xyzj, nDiv, backward=not backwards[a][c], rebuild_dlc=False, verbose=self.verbose, sync=0) if self.verbose: print("backward direction: endpt_err = %8.3f success = %i" % (endpt_err, success)) if self.verbose >= 2: diff --git a/geometric/molecule.py b/geometric/molecule.py index 077cd295..44070107 100644 --- a/geometric/molecule.py +++ b/geometric/molecule.py @@ -916,7 +916,7 @@ def arc(Mol, begin=None, end=None, RMSD=True, align=True): Arc = np.array(Arc) return Arc -def EqualSpacing(Mol, frames=0, dx=0, RMSD=True, align=True, spline=False): +def EqualSpacing(Mol, frames=0, dx=0, RMSD=True, align=True, spline=False, custom=None): """ Equalize the spacing of frames in a trajectory with linear interpolation. This is done in a very simple way, first calculating the arc length @@ -935,6 +935,8 @@ def EqualSpacing(Mol, frames=0, dx=0, RMSD=True, align=True, spline=False): Return a Molecule object with this number of frames. RMSD : bool Use RMSD in the arc length calculation. + custom : np.array or None + Provide custom spacings between frames. Returns ------- @@ -943,7 +945,12 @@ def EqualSpacing(Mol, frames=0, dx=0, RMSD=True, align=True, spline=False): or with equally spaced frames. """ import scipy.interpolate - ArcMol = arc(Mol, RMSD=RMSD, align=align) + if type(custom) is np.ndarray: + ArcMol = custom.copy() + if ArcMol.shape != (len(Mol)-1, ): + raise RuntimeError("Custom spacing needs to match Molecule-length - 1") + else: + ArcMol = arc(Mol, RMSD=RMSD, align=align) ArcMolCumul = np.insert(np.cumsum(ArcMol), 0, 0.0) if frames != 0 and dx != 0: logger.error("Provide dx or frames or neither") diff --git a/geometric/params.py b/geometric/params.py index d318397b..503a97e1 100644 --- a/geometric/params.py +++ b/geometric/params.py @@ -269,7 +269,7 @@ def __init__(self, **kwargs): # Whether we want to align the molecules in reactant and product frames. self.prealign = kwargs.get('prealign', False) # Whether we want to align the initial interpolate trajectory before optimization. - self.align = kwargs.get('align', False) + self.align = kwargs.get('align', True) # Verbose printout self.verbose = kwargs.get('verbose', 0) From 3bbf66739cd274840b906a9667a8858eefbb652d Mon Sep 17 00:00:00 2001 From: Heejune Park Date: Mon, 9 Oct 2023 10:07:09 -0700 Subject: [PATCH 18/30] interpolation parameter bug fixed --- geometric/params.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geometric/params.py b/geometric/params.py index 503a97e1..d952b2be 100644 --- a/geometric/params.py +++ b/geometric/params.py @@ -265,7 +265,7 @@ def __init__(self, **kwargs): self.prealign = False else: # Number of frames that will be used for the interpolation. - self.nframes = kwargs.get('nframes', False) + self.nframes = kwargs.get('nframes', 50) # Whether we want to align the molecules in reactant and product frames. self.prealign = kwargs.get('prealign', False) # Whether we want to align the initial interpolate trajectory before optimization. From 1d037eedc2fec70e336623694eaed19556511902 Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Thu, 12 Oct 2023 00:01:44 -0700 Subject: [PATCH 19/30] Lower diagnostic thresholds for dihedrals --- geometric/internal.py | 2 +- geometric/interpolate.py | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/geometric/internal.py b/geometric/internal.py index 077a98c6..34f3d2db 100644 --- a/geometric/internal.py +++ b/geometric/internal.py @@ -1741,7 +1741,7 @@ def diagnostic(self, xyz): # result = 3 or above: failure of the IC system is imminent # result = 2: IC may fail for small perturbations from the provided geometry # result = 1: IC is unsuitable for the provided geometry, should pay attention - LinThres = [0.95, 0.98, 0.995] + LinThres = [np.abs(np.cos(theta*np.pi/180)) for theta in [155, 165, 175]] descrips = ["close to linear", "very close to linear", "extremely close to linear"] a = self.a b = self.b diff --git a/geometric/interpolate.py b/geometric/interpolate.py index 3854368d..c59b53e1 100755 --- a/geometric/interpolate.py +++ b/geometric/interpolate.py @@ -1028,7 +1028,7 @@ def build_IC_segments(self): new_segments = [] for isegment, (i, j) in enumerate(segments): dih_ij = [] - for k in range(i, j+1): + for k in [i, j]: for Prim in ICs[k].Prims.Internals: if type(Prim) is Dihedral and Prim not in dih_ij: dih_ij.append(Prim) @@ -1183,8 +1183,13 @@ def splice_iterations(self): else: increase_count += 1 decrease_count = 0 - print(">> Mismatch fails to decrease; increasing damping") - damping *= 0.8 + if damping <= 0.2: + print(">> Mismatch fails to decrease but damping at maximum (% .3e)" % damping) + raise RuntimeError + else: + damping = max(0.2, damping*0.8) + print(">> Mismatch fails to decrease; increasing damping (currently % .3e)" % damping) + last_max_mismatch = max_mismatch segment_endpoints, _ = splice_segment_endpoints(coord_segments, segment_endpoints, segments_spliced, splice_length, damping, verbose=False) From b9e0a0a98c48af1f2ed711da8baaf9d33fce7bc2 Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Wed, 18 Oct 2023 11:58:07 -0700 Subject: [PATCH 20/30] Add group_atoms_by_topology() method --- geometric/molecule.py | 133 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) diff --git a/geometric/molecule.py b/geometric/molecule.py index 44070107..609f24dd 100644 --- a/geometric/molecule.py +++ b/geometric/molecule.py @@ -2291,6 +2291,139 @@ def build_topology(self, force_bonds=True, bond_order=0.0, metal_bo_factor=0.5, # Deprecated in networkx 2.2 # self.molecules = list(nx.connected_component_subgraphs(G)) + def group_atoms_by_topology(self, bond_lim=10, verbose=False): + """ + Determine topologically equivalent atoms. + + bond_lim: Limit on the size of the fingerprint (default 10 bonds). + + Works like this: Suppose we have a molecular graph given as + H H H + \ \ / + H--C--C##C--H C==C==C + / / \ + H H H + + with numbers given as + + 4 9 13 + \ \ / + 5--3--2##0--1 8==7==11 + / / \ + 6 10 12 + + The goal is to return a list of chemically equivalent atom groups as: + [[0], [1], [2], [3], [4, 5, 6], [7], [8, 11], [9, 10, 12, 13]] + + (Note: This grouping is by connectivity only, and does not distinguish + between bonds of different orders, enantiomers or cis/trans isomerism.) + + If we assign a fingerprint to an atom, given by a list where element 'i' + is the concatenated symbols of the atoms separated by 'i' bonds from that atom, + the fingerprints for the above system would be: + + 0 C-CH-C-HHH + 1 H-C-C-C-HHH + 2 C-CC-HHHH + 3 C-CHHH-C-H + 4 H-C-CHH-C-H + 5 H-C-CHH-C-H + 6 H-C-CHH-C-H + 7 C-CC-HHHH + 8 C-CHH-C-HH + 9 H-C-CH-C-HH + 10 H-C-CH-C-HH + 11 C-CHH-C-HH + 12 H-C-CH-C-HH + 13 H-C-CH-C-HH + + and a corresponding grouping of: + [[0], [1], [2, 7], [3], [4, 5, 6], [8, 11], [9, 10, 12, 13]] + + This doesn't distinguish atoms uniquely because atoms 2 and 7 (the central carbons) + have the same fingerprint even though they are chemically different. + This is because the fingerprint does not take into account the different topology + around carbons 0, 3 (i.e. bonded to 1 and 3 Hs respectively) and carbons + 8, 11 (i.e. both bonded to two Hs). + + The solution is to do another iteration of the above but using the fingerprints + in place of the atomic symbols. This creates new, larger fingerprints as: + + 0 C-CH-C-HHH_C-CC-HHHH,H-C-C-C-HHH_C-CHHH-C-H_H-C-CHH-C-H,H-C-CHH-C-H,H-C-CHH-C-H + 1 H-C-C-C-HHH_C-CH-C-HHH_C-CC-HHHH_C-CHHH-C-H_H-C-CHH-C-H,H-C-CHH-C-H,H-C-CHH-C-H + 2 C-CC-HHHH_C-CH-C-HHH,C-CHHH-C-H_H-C-C-C-HHH,H-C-CHH-C-H,H-C-CHH-C-H,H-C-CHH-C-H + 3 C-CHHH-C-H_C-CC-HHHH,H-C-CHH-C-H,H-C-CHH-C-H,H-C-CHH-C-H_C-CH-C-HHH_H-C-C-C-HHH + 4 H-C-CHH-C-H_C-CHHH-C-H_C-CC-HHHH,H-C-CHH-C-H,H-C-CHH-C-H_C-CH-C-HHH_H-C-C-C-HHH + 5 H-C-CHH-C-H_C-CHHH-C-H_C-CC-HHHH,H-C-CHH-C-H,H-C-CHH-C-H_C-CH-C-HHH_H-C-C-C-HHH + 6 H-C-CHH-C-H_C-CHHH-C-H_C-CC-HHHH,H-C-CHH-C-H,H-C-CHH-C-H_C-CH-C-HHH_H-C-C-C-HHH + 7 C-CC-HHHH_C-CHH-C-HH,C-CHH-C-HH_H-C-CH-C-HH,H-C-CH-C-HH,H-C-CH-C-HH,H-C-CH-C-HH + 8 C-CHH-C-HH_C-CC-HHHH,H-C-CH-C-HH,H-C-CH-C-HH_C-CHH-C-HH_H-C-CH-C-HH,H-C-CH-C-HH + 9 H-C-CH-C-HH_C-CHH-C-HH_C-CC-HHHH,H-C-CH-C-HH_C-CHH-C-HH_H-C-CH-C-HH,H-C-CH-C-HH + 10 H-C-CH-C-HH_C-CHH-C-HH_C-CC-HHHH,H-C-CH-C-HH_C-CHH-C-HH_H-C-CH-C-HH,H-C-CH-C-HH + 11 C-CHH-C-HH_C-CC-HHHH,H-C-CH-C-HH,H-C-CH-C-HH_C-CHH-C-HH_H-C-CH-C-HH,H-C-CH-C-HH + 12 H-C-CH-C-HH_C-CHH-C-HH_C-CC-HHHH,H-C-CH-C-HH_C-CHH-C-HH_H-C-CH-C-HH,H-C-CH-C-HH + 13 H-C-CH-C-HH_C-CHH-C-HH_C-CC-HHHH,H-C-CH-C-HH_C-CHH-C-HH_H-C-CH-C-HH,H-C-CH-C-HH + + and a corresponding grouping of: + [[0], [1], [2], [3], [4, 5, 6], [7], [8, 11], [9, 10, 12, 13]] + + Now atoms 2 and 7 are considered to be different, and the differences in the topology + of the atoms bonded to atoms 2 and 7 are built into the representation. It encodes that + atom 2 is bonded to atoms C-CH-C-HHH,C-CHHH-C-H + atom 7 is bonded to atoms C-CHH-C-HH,C-CHH-C-HH. + Note the separator characters have changed from '', '-' to ',', '_' as well. + + This process can be repeated until a self-consistent set of groups is found. + An upper limit on the number of cycles is imposed because the label size can blow up + very quickly. + """ + + ids = [self.elem[i] for i in range(self.na)] + old_groups = None + cycle = 0 + + bond_seps = ['-', '_', '#', '@', '$'] + id_seps = ['', ',', '.', ';', ':'] + while True: + group_dict = {} + for i in range(self.na): + group_dict.setdefault(ids[i], []).append(i) + groups = sorted(list(group_dict.values())) + if groups == old_groups: break + if cycle == 5: + raise RuntimeError("Not designed to go for more than 5 cycles.") + if verbose: + logger.info("===Cycle %i===\n" % cycle) + logger.info("IDs of each atom:\n") + for i in range(self.na): + logger.info("%3i %s\n" % (i, ids[i])) + logger.info("Atom groups:\n") + logger.info(str(groups)+"\n") + + # Dictionary that looks like: + # { anum1 : {anum2 : path_12}} + spl = dict(nx.all_pairs_shortest_path_length(self.topology)) + + new_ids = [] + + for key in sorted(spl.keys()): + val = spl[key] + fp_dict = {} + for key2, val2 in val.items(): + if val2 > (bond_lim-cycle): continue + fp_dict.setdefault(val2, []).append(ids[key2]) + fp_dict[val2].sort() + fp_list = [] + for dist in sorted(fp_dict.keys()): + fp_list.append(id_seps[cycle].join(fp_dict[dist])) + + new_ids.append(bond_seps[cycle].join(fp_list)) + + old_groups = copy.deepcopy(groups) + ids = new_ids[:] + cycle += 1 + return groups + def distance_matrix(self, pbc=True): """ Obtain distance matrix between all pairs of atoms. """ AtomIterator = np.ascontiguousarray(np.vstack((np.fromiter(itertools.chain(*[[i]*(self.na-i-1) for i in range(self.na)]),dtype=int), From e2da27f979ef8e7ae3331611d631d3ff6bb0e59f Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Thu, 19 Oct 2023 08:10:58 -0700 Subject: [PATCH 21/30] Don't quit when damping hits 0.2; tweak dihedral diagnostic; align_frags, align_system keywords --- geometric/internal.py | 2 +- geometric/interpolate.py | 24 ++++++++++++------------ geometric/params.py | 10 +++++----- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/geometric/internal.py b/geometric/internal.py index 34f3d2db..a7cf2bed 100644 --- a/geometric/internal.py +++ b/geometric/internal.py @@ -1758,7 +1758,7 @@ def diagnostic(self, xyz): thre = LinThres[i] desc = descrips[i] if cos1 > thre and cos2 > thre: - result = i+2 + result = i+1 message = "%s and %s angles are %s (|cos(theta)| %.4f, %.4f > %.4f threshold)" % (repr(Ang1), repr(Ang2), desc, cos1, cos2, thre) elif cos1 > thre: result = i+1 diff --git a/geometric/interpolate.py b/geometric/interpolate.py index c59b53e1..0acc11d3 100755 --- a/geometric/interpolate.py +++ b/geometric/interpolate.py @@ -155,7 +155,7 @@ def find_path(mtx, thre=1): # If the starting point or end point is blocked, we can't escape. if not okay[n-1, n-1] or not okay[0, 0]: - raise RuntimeError("Spoo!") + raise RuntimeError("Failed to find a path through the diagnostic map!") okay2 = okay.copy() for niter in range(n**2): @@ -496,7 +496,7 @@ def print_map(mtx, title, colorscheme=0): print() class Interpolator(object): - def __init__(self, M_in, n_frames = 50, use_midframes = False, align_system=False, do_prealign=False, verbose=0): + def __init__(self, M_in, n_frames = 50, use_midframes = False, align_system=False, align_frags=False, verbose=0): # The input molecule; it should not be modified. self.M_in = deepcopy(M_in) # Check the length of the input molecule @@ -504,7 +504,7 @@ def __init__(self, M_in, n_frames = 50, use_midframes = False, align_system=Fals if use_midframes: self.do_init = False assert len(self.M_in) >= 5, "When setting use_midframes, input molecule must have >= 5 structures" - assert do_prealign is False, "Do not pass do_prealign if using intermediate frames" + assert align_frags is False, "Do not pass align_frags if using intermediate frames" assert n_frames == 0, "Do not pass n_frames when setting use_midframes" self.n_frames = len(self.M_in) else: @@ -517,9 +517,9 @@ def __init__(self, M_in, n_frames = 50, use_midframes = False, align_system=Fals self.M = deepcopy(self.M_in) self.n_atoms = len(self.M.elem) # Whether to pre-align the fragments in the reactant and product structure - self.do_prealign = do_prealign + self.align_frags = align_frags # Whether to align the input frames to the reactant structure upon input and before writing output - self.align_system = True if self.do_prealign else align_system + self.align_system = True if self.align_frags else align_system if self.align_system: self.M.align() # Atom pairs that are not bonded (resp. bonded) in either the reactant or product @@ -1183,11 +1183,11 @@ def splice_iterations(self): else: increase_count += 1 decrease_count = 0 - if damping <= 0.2: - print(">> Mismatch fails to decrease but damping at maximum (% .3e)" % damping) - raise RuntimeError + min_damping = 0.2 + if damping <= min_damping: + print(">> Mismatch fails to decrease but damping at maximum (% .3e) - continuing" % damping) else: - damping = max(0.2, damping*0.8) + damping = max(min_damping, damping*0.8) print(">> Mismatch fails to decrease; increasing damping (currently % .3e)" % damping) last_max_mismatch = max_mismatch @@ -1230,7 +1230,7 @@ def split_IC_segments(self, fail_segment): self.segment_endpoints = get_segment_endpoints(self.M, self.segments_spliced) def run_workflow(self): - if self.do_prealign: + if self.align_frags: self.prealign_fragments() if self.do_init: self.initial_guess() @@ -1247,8 +1247,8 @@ def main(): args = parse_interpolate_args(sys.argv[1:]) params = InterpParams(**args) M0 = Molecule(args['input']) - #interpolator = Interpolator(M0, use_midframes=True, n_frames=0, align_system=True, do_prealign=False) - interpolator = Interpolator(M0, n_frames= params.nframes, use_midframes=params.optimize, align_system=params.align, do_prealign=params.prealign, verbose=params.verbose) + #interpolator = Interpolator(M0, use_midframes=True, n_frames=0, align_system=True, align_frags=False) + interpolator = Interpolator(M0, n_frames= params.nframes, use_midframes=params.optimize, align_system=params.align_system, align_frags=params.align_frags, verbose=params.verbose) interpolator.run_workflow() if __name__ == "__main__": diff --git a/geometric/params.py b/geometric/params.py index d952b2be..41b59162 100644 --- a/geometric/params.py +++ b/geometric/params.py @@ -262,14 +262,14 @@ def __init__(self, **kwargs): self.optimize = kwargs.get('optimize', False) if self.optimize: self.nframes = 0 - self.prealign = False + self.align_frags = False else: # Number of frames that will be used for the interpolation. self.nframes = kwargs.get('nframes', 50) # Whether we want to align the molecules in reactant and product frames. - self.prealign = kwargs.get('prealign', False) + self.align_frags = kwargs.get('align_frags', False) # Whether we want to align the initial interpolate trajectory before optimization. - self.align = kwargs.get('align', True) + self.align_system = kwargs.get('align_system', True) # Verbose printout self.verbose = kwargs.get('verbose', 0) @@ -550,8 +550,8 @@ def parse_interpolate_args(*args): 'The followting two arguments, nframes and prealign, will be ignored.\n' 'The input xyz file must contain 5 or more structures.\n') grp_univ.add_argument('--nframes', type=int, help='Number of frames, needs to be bigger than 5, that will be used to interpolate, default 50\n') - grp_univ.add_argument('--prealign', type=str2bool, help='Provide "yes" to align the molecules in reactant and product before the interpolation.\n') - grp_univ.add_argument('--align', type=str2bool, help='Provide "yes" to align the initial interpolated trajectory.\n') + grp_univ.add_argument('--align_frags', type=str2bool, help='Provide "yes" to align fragments in reactant and product structure prior to interpolation.\n') + grp_univ.add_argument('--align_system', type=str2bool, help='Provide "yes" to align the entire system prior to interpolation.\n') grp_output = parser.add_argument_group('output', 'Control the format and amount of the output') grp_output.add_argument('--prefix', type=str, help='Specify a prefix for log file and temporary directory.\n' From 0e0f81c9c54a0cc63ca6274904adf247e1e0d3e8 Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Tue, 24 Oct 2023 13:49:15 -0700 Subject: [PATCH 22/30] Implement extrapolation feature --- geometric/interpolate.py | 56 ++++++++++++++++++++++++++++++++++++++-- geometric/molecule.py | 4 +-- geometric/params.py | 8 +++++- 3 files changed, 63 insertions(+), 5 deletions(-) diff --git a/geometric/interpolate.py b/geometric/interpolate.py index 0acc11d3..593e318c 100755 --- a/geometric/interpolate.py +++ b/geometric/interpolate.py @@ -431,6 +431,24 @@ def interpolate_segment(IC, curr_coords, dest_coords, nDiv, backward=False, rebu success = endpt_err < err_thre return coord_segment, endpt_err, success +def extrapolate_segment(IC, curr_coords, dest_coords, nDiv, nDivExt, backward=False, verbose=0, sync=0): + if backward: + tmp = curr_coords.copy() + curr_coords = dest_coords.copy() + dest_coords = tmp.copy() + dq = IC.calcDiff(dest_coords, curr_coords, sync=sync) + dq *= nDivExt/nDiv + # For extrapolation, we want to go beyond dest_coords + curr_coords = dest_coords.copy() + coord_segment = [curr_coords] + for k in range(nDivExt): + new_coords = IC.newCartesian(curr_coords, dq/nDivExt, verbose=verbose) + coord_segment.append(new_coords) + curr_coords = new_coords.copy() + if backward: + coord_segment = coord_segment[::-1] + return coord_segment + def get_segment_endpoints(M, segments_spliced): segment_endpoints = [] for a, (i, j) in enumerate(segments_spliced): @@ -496,7 +514,7 @@ def print_map(mtx, title, colorscheme=0): print() class Interpolator(object): - def __init__(self, M_in, n_frames = 50, use_midframes = False, align_system=False, align_frags=False, verbose=0): + def __init__(self, M_in, n_frames = 50, use_midframes = False, align_system=False, align_frags=False, extrapolate=None, verbose=0): # The input molecule; it should not be modified. self.M_in = deepcopy(M_in) # Check the length of the input molecule @@ -552,6 +570,11 @@ def __init__(self, M_in, n_frames = 50, use_midframes = False, align_system=Fals # The smaller of the distances at either endpoint for each pair self.min_enddists = np.min(np.array(self.enddists), axis=0) self.verbose = verbose + # Whether to extrapolate endpoints - give a number of frames + if extrapolate is not None: + assert type(extrapolate) is tuple + assert len(extrapolate) == 2 + self.extrapolate = extrapolate def get_splice_length(self): # Determine the splice length @@ -1088,6 +1111,7 @@ def splice_iterations(self): damping = 1.0 backwards = [[False for c in range(len(segment_to_ICs[a]))] for a in range(len(segments_spliced))] clash_pairs = [] + M_extra = {} for cycle in range(n_cycles): coord_segments = [] endpt_errs = [] @@ -1125,6 +1149,21 @@ def splice_iterations(self): backwards[a][c] = not backwards[a][c] break if success: + # At this point, IC is the one that was used to successfully interpolate the segment. + if self.extrapolate: + if a == 0 and self.extrapolate[0] > 0: + nDivExt = self.extrapolate[0] + extra = extrapolate_segment(IC, xyzi, xyzj, nDiv, nDivExt, backward=True, verbose=self.verbose, sync=0) + M_extra[0] = copy.deepcopy(M) + M_extra[0].xyzs = [x.reshape(-1, 3)*bohr2ang for x in extra[:-1]] + M_extra[0].comms = ["Extrapolated from head; frame %i/%i" % (nDivExt-i, nDivExt) for i in range(nDivExt)] + if a == (len(segments_spliced) - 1) and self.extrapolate[1] > 0: + nDivExt = self.extrapolate[1] + extra = extrapolate_segment(IC, xyzi, xyzj, nDiv, nDivExt, backward=False, verbose=self.verbose, sync=0) + M_extra[1] = copy.deepcopy(M) + M_extra[1].xyzs = [x.reshape(-1, 3)*bohr2ang for x in extra[1:]] + M_extra[1].comms = ["Extrapolated from tail; frame %i/%i" % (i+1, nDivExt) for i in range(nDivExt)] + # Reorder the segment_to_ICs so the successful one is at the front. reordered_segment_to_ICs = [] for cc, (iic, IC) in enumerate(segment_to_ICs[a]): @@ -1172,7 +1211,20 @@ def splice_iterations(self): if len(M_prod) > 1: M = M + M_prod[-1] M.align() + M.write("interpolated_splice.xyz") + if self.extrapolate: + M_with_extra = deepcopy(M) + refidx = 0 + if self.extrapolate[0] > 0: + refidx += self.extrapolate[0] + M_with_extra = M_extra[0] + M_with_extra + if self.extrapolate[1] > 0: + M_with_extra = M_with_extra + M_extra[1] + M_with_extra.align(refidx=refidx) + + print(">> Path with (%i head, %i tail) extrapolated frames saved to interpolated_splice_extra.xyz" % (self.extrapolate[0], self.extrapolate[1])) + M_with_extra.write("interpolated_splice_extra.xyz") if max_mismatch < last_max_mismatch: increase_count = 0 @@ -1248,7 +1300,7 @@ def main(): params = InterpParams(**args) M0 = Molecule(args['input']) #interpolator = Interpolator(M0, use_midframes=True, n_frames=0, align_system=True, align_frags=False) - interpolator = Interpolator(M0, n_frames= params.nframes, use_midframes=params.optimize, align_system=params.align_system, align_frags=params.align_frags, verbose=params.verbose) + interpolator = Interpolator(M0, n_frames= params.nframes, use_midframes=params.optimize, align_system=params.align_system, align_frags=params.align_frags, extrapolate=params.extrapolate, verbose=params.verbose) interpolator.run_workflow() if __name__ == "__main__": diff --git a/geometric/molecule.py b/geometric/molecule.py index 609f24dd..a1722217 100644 --- a/geometric/molecule.py +++ b/geometric/molecule.py @@ -2006,7 +2006,7 @@ def load_popxyz(self, fnm): self.qm_mulliken_charges = list(np.array(QS.xyzs)[:, :, 0]) self.qm_mulliken_spins = list(np.array(QS.xyzs)[:, :, 1]) - def align(self, smooth = False, center = True, center_mass = False, atom_select=None): + def align(self, smooth = False, refidx = 0, center = True, center_mass = False, atom_select=None): """ Align molecules. Has the option to create smooth trajectories @@ -2036,7 +2036,7 @@ def align(self, smooth = False, center = True, center_mass = False, atom_select= if smooth: ref = index2-1 else: - ref = 0 + ref = refidx if atom_select is not None: tr, rt = get_rotate_translate(xyz2[atom_select],self.xyzs[ref][atom_select]) else: diff --git a/geometric/params.py b/geometric/params.py index 41b59162..5a32dfba 100644 --- a/geometric/params.py +++ b/geometric/params.py @@ -272,6 +272,10 @@ def __init__(self, **kwargs): self.align_system = kwargs.get('align_system', True) # Verbose printout self.verbose = kwargs.get('verbose', 0) + if 'extrapolate' in kwargs: + self.extrapolate = tuple(kwargs['extrapolate']) + else: + self.extrapolate = None def str2bool(v): """ Allows command line options such as "yes" and "True" to be converted into Booleans. """ @@ -552,7 +556,9 @@ def parse_interpolate_args(*args): grp_univ.add_argument('--nframes', type=int, help='Number of frames, needs to be bigger than 5, that will be used to interpolate, default 50\n') grp_univ.add_argument('--align_frags', type=str2bool, help='Provide "yes" to align fragments in reactant and product structure prior to interpolation.\n') grp_univ.add_argument('--align_system', type=str2bool, help='Provide "yes" to align the entire system prior to interpolation.\n') - + grp_univ.add_argument('--extrapolate', type=int, nargs=2, help='Provide two integers to generate extrapolated frames at the head and tail ends.\n' + 'The displacement norm between extrapolated frames is the same as for interpolated frames.\n') + grp_output = parser.add_argument_group('output', 'Control the format and amount of the output') grp_output.add_argument('--prefix', type=str, help='Specify a prefix for log file and temporary directory.\n' 'Defaults to the input file path (incl. file name with extension removed).\n ') From ef8c5cd842a486fa08d968c671564a8b1d6b8195 Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Tue, 31 Oct 2023 13:37:22 -0700 Subject: [PATCH 23/30] mintrust can be lower than convergence_drms ; conmethod 1 bugfix --- geometric/internal.py | 1 + geometric/optimize.py | 2 ++ geometric/step.py | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/geometric/internal.py b/geometric/internal.py index a7cf2bed..ab9652a9 100644 --- a/geometric/internal.py +++ b/geometric/internal.py @@ -4001,6 +4001,7 @@ def ov(vi, vj): vj = V[:, jc] vj -= ui * ov(ui, vj)/Unorms[icu]**2 icu += 1 + if icu >= U.shape[1]: break # self.Vecs contains the linear combination coefficients that are our new DLCs self.Vecs = U[:, :icu].copy() diff --git a/geometric/optimize.py b/geometric/optimize.py index bd845005..9ec631c9 100644 --- a/geometric/optimize.py +++ b/geometric/optimize.py @@ -600,6 +600,8 @@ def evaluateStep(self): if Converged_energy and Converged_grms and Converged_drms and Converged_gmax and Converged_dmax and self.conSatisfied: self.SortedEigenvalues(self.H) logger.info("Converged! =D\n") + if self.trust < min(1.2e-3, params.Convergence_drms): + logger.info("*_* Warning - trust radius is lower than RMS displacement convergence criterion; please check gradient accuracy. *_*\n") self.state = OPT_STATE.CONVERGED return diff --git a/geometric/step.py b/geometric/step.py index 99553146..d8c755de 100644 --- a/geometric/step.py +++ b/geometric/step.py @@ -834,7 +834,7 @@ def trust_step(target, v0, X, G, H, IC, rfo, verbose=0): m_sol = sol # Break out of infinite oscillation loops if niter%100 == 99: - logger.info(" trust_step Iter: %4i, randomizing\n" % niter) + # logger.info(" trust_step Iter: %4i, randomizing\n" % niter) v += np.random.random() * niter / 100 if niter%1000 == 999: if verbose >= 3: get_delta_prime(v, X, G, H, IC, rfo, verbose+1) From 862d22b5776e289da3ca276d7a354c4d377f4b11 Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Sat, 4 Nov 2023 11:51:32 -0700 Subject: [PATCH 24/30] Bug fix for extrapolation not activated --- geometric/interpolate.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/geometric/interpolate.py b/geometric/interpolate.py index 593e318c..0a028de7 100755 --- a/geometric/interpolate.py +++ b/geometric/interpolate.py @@ -575,6 +575,8 @@ def __init__(self, M_in, n_frames = 50, use_midframes = False, align_system=Fals assert type(extrapolate) is tuple assert len(extrapolate) == 2 self.extrapolate = extrapolate + else: + self.extrapolate = None def get_splice_length(self): # Determine the splice length From f0a1e37324efd8fc4bfebbd65c52366737aa5ab0 Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Mon, 6 Nov 2023 14:27:03 -0800 Subject: [PATCH 25/30] A strange merge conflict in interpolate branch --- geometric/xml_helper.py | 44 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 geometric/xml_helper.py diff --git a/geometric/xml_helper.py b/geometric/xml_helper.py new file mode 100644 index 00000000..8f895f1b --- /dev/null +++ b/geometric/xml_helper.py @@ -0,0 +1,44 @@ +import xml.etree.ElementTree as ET +from .molecule import Molecule +import copy + +def read_coors_from_xml(M, state_xml): + with open(state_xml) as fobj: + xml = fobj.read() + root = ET.fromstring(xml) + + for child in root: + if child.tag == "Positions": + for aid in range(M.na): + x = float(child[aid].attrib['x']) + y = float(child[aid].attrib['y']) + z = float(child[aid].attrib['z']) + M.xyzs[0][aid, 0] = x*10.0 + M.xyzs[0][aid, 1] = y*10.0 + M.xyzs[0][aid, 2] = z*10.0 + return root; + +def write_coors_to_xml(M, state_xml, out_xml): + if type(state_xml) is str: + with open(state_xml) as fobj: + xml = fobj.read() + root = ET.fromstring(xml) + elif type(state_xml) is ET.Element: + root = copy.deepcopy(state_xml) + else: + raise IOError("Expected second argument to write_coors_to_xml to be file name or xml.etree.ElementTree.Element type") + + for child in root: + if child.tag == "Positions": + for aid in range(M.na): + x = M.xyzs[0][aid, 0] / 10.0 + y = M.xyzs[0][aid, 1] / 10.0 + z = M.xyzs[0][aid, 2] / 10.0 + child[aid].attrib['x'] = "%.16e" % x + child[aid].attrib['y'] = "%.16e" % y + child[aid].attrib['z'] = "%.16e" % z + # write to the output XML file. + fout = open(out_xml, "w") + print(ET.tostring(root).decode("utf-8"), file=fout) + fout.close() + From fd41651afd021046c731b096d19247b3a03486e2 Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Wed, 6 Dec 2023 10:42:55 -0800 Subject: [PATCH 26/30] Make align_system option more global --- geometric/interpolate.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/geometric/interpolate.py b/geometric/interpolate.py index 0a028de7..f757eb09 100755 --- a/geometric/interpolate.py +++ b/geometric/interpolate.py @@ -917,7 +917,8 @@ def initial_guess_one_method(self, method = 0): # Fail if the endpt_err is greater than the threshold if endpt_err > err_thre: - M.align() + if self.align_system: + M.align() return M, 1, dq_ratio, endpt_err # Check for clashes clash_pairs, clash_new = self.detect_clash_trajectory(coord_segment, clash_known=clash_pairs, altdists=self.min_enddists, verbose=False) @@ -1205,14 +1206,16 @@ def splice_iterations(self): if include_alignment: M_append = M_reac[:-1] + M + M_prod[1:] if len(M_append) != len(M): - M_append.align() + if self.align_system: + M.align() M = EqualSpacing(M_append, frames=len(M), RMSD=False, spline=True) else: if len(M_reac) > 1: M = M_reac[0] + M if len(M_prod) > 1: M = M + M_prod[-1] - M.align() + if self.align_system: + M.align() M.write("interpolated_splice.xyz") if self.extrapolate: @@ -1223,7 +1226,8 @@ def splice_iterations(self): M_with_extra = M_extra[0] + M_with_extra if self.extrapolate[1] > 0: M_with_extra = M_with_extra + M_extra[1] - M_with_extra.align(refidx=refidx) + if self.align_system: + M_with_extra.align(refidx=refidx) print(">> Path with (%i head, %i tail) extrapolated frames saved to interpolated_splice_extra.xyz" % (self.extrapolate[0], self.extrapolate[1])) M_with_extra.write("interpolated_splice_extra.xyz") From d5a617057ea8963c6199031c865c5505748c3798 Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Thu, 11 Jan 2024 14:49:57 -0800 Subject: [PATCH 27/30] Add atom deletion support to molecule.py, improve handling of CONECT records --- geometric/molecule.py | 75 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 71 insertions(+), 4 deletions(-) diff --git a/geometric/molecule.py b/geometric/molecule.py index de01f8cd..d1059df8 100644 --- a/geometric/molecule.py +++ b/geometric/molecule.py @@ -1962,6 +1962,67 @@ def FrameStack2D(k): New.Data['bonds'] = self.bonds + [(b[0]+self.na, b[1]+self.na) for b in other.bonds] return New + def delete_atoms(self, indices): + # Returns a copy of Molecule object with the indicated atoms deleted. + # An array of true or false indicating whether each atom is + # to be included in the returned molecule + mask = np.ones(self.na, dtype=bool) + mask[indices] = False + + # Mapping old to new atom indices + old_to_new_idx = np.cumsum(mask)-1 + new_na = old_to_new_idx[-1] + 1 + + # Copy data fields that don't depend on number of atoms + New = Molecule() + for key in self.FrameKeys | self.MetaKeys: + New.Data[key] = copy.deepcopy(self.Data[key]) + + # Copy and modify data fields that contain numerical atom-wise data + for key in ['xyzs', 'qm_grads', 'qm_espxyzs', 'qm_espvals', 'qm_extchgs', 'qm_mulliken_charges', 'qm_mulliken_spins']: + if key in self.Data: + if key == 'qm_hessians': + new_data = [self.Data[key][i].reshape(self.na, 3, self.na, 3)[mask, :, mask, :].reshape(new_na, new_na).copy() for i in range(len(self))] + elif key == 'qm_bondorder': + new_data = [self.Data[key][i][mask, mask].copy() for i in range(len(self))] + else: + new_data = [self.Data[key][i][mask].copy() for i in range(len(self))] + New.Data[key] = new_data + + # Copy and modify other atom-wise data fields. + for key in self.AtomKeys: + if key == 'tinkersuf': # Tinker suffix is a bit tricky + NewSuf = [] + for line in self.Data[key]: + whites = re.split('[^ ]+',line) + s = line.split() + if len(s) > 1: + for i in range(1,len(s)): + # Convert the "bonded atom numbers" by changing string to integer, + # subtracting 0, looking up corresponding new atom index, then adding 1 + s[i] = str(old_to_new_idx[int(s[i])-1]+1) + NewSuf.append(''.join([whites[j]+s[j] for j in range(len(s))])) + New.Data[key] = NewSuf + else: + if type(self.Data[key]) is np.ndarray: + new_data = self.Data[key][mask] + New.Data[key] = new_data.copy() + elif type(self.Data[key]) is list: + new_data = [self.Data[key][i] for i in range(self.na) if mask[i]] + New.Data[key] = new_data[:] + else: + logger.error('Cannot stack %s because it is of type %s\n' % (key, str(type(New.Data[key])))) + raise RuntimeError + if 'bonds' in self.Data: + new_data = [] + for b in self.bonds: + # Only keep bonds in non-deleted atoms + if mask[b[0]] and mask[b[1]]: + new_data.append((old_to_new_idx[b[0]], old_to_new_idx[b[1]])) + New.Data['bonds'] = new_data + + return New + def get_populations(self): """ Return a cloned molecule object but with X-coordinates set to Mulliken charges and Y-coordinates set to Mulliken @@ -3737,6 +3798,8 @@ def read_pdb(self, fnm, **kwargs): X=PDBLines[0] XYZ=np.array([[x.x,x.y,x.z] for x in X])/10.0#Convert to nanometers + + Serial=[x.serial for x in X] # Serial number - not the same as the atom index because it starts from 1 and TER takes up a serial number AltLoc=np.array([x.altLoc for x in X],'str') # Alternate location ICode=np.array([x.iCode for x in X],'str') # Insertion code ChainID=np.array([x.chainID for x in X],'str') @@ -3781,17 +3844,21 @@ def read_pdb(self, fnm, **kwargs): F2=open(fnm,'r') # QYD: Rewrite to support atom indices with 5 digits # i.e. CONECT143321433314334 -> 14332 connected to 14333 and 14334 + # LPW: The serial numbers in the CONECT records start from 1 and include the TER records, + # so are not the same as the 'atom indices' in the Molecule object. for line in F2: if line[:6] == "CONECT": - conect_A = int(line[6:11]) - 1 + conect_A = int(line[6:11]) conect_B_list = [] line_rest = line[11:] while line_rest.strip(): # Take 5 characters a time until run out of characters - conect_B_list.append(int(line_rest[:5]) - 1) + conect_B_list.append(int(line_rest[:5])) line_rest = line_rest[5:] for conect_B in conect_B_list: - bond = (min((conect_A, conect_B)), max((conect_A, conect_B))) + idx_conect_A = Serial.index(conect_A) + idx_conect_B = Serial.index(conect_B) + bond = (min((idx_conect_A, idx_conect_B)), max((idx_conect_A, idx_conect_B))) if bond not in bonds: bonds.append(bond) @@ -4553,7 +4620,7 @@ def write_pdb(self, **kwargs): # Standardize formatting of residue IDs. resIds = [] for resid in self.resid: - resIds.append("%4d" % (resid%10000)) + resIds.append("%4d" % (resid%10000 if resid > 0 else resid)) # Standardize record names. records = [] for resname in resNames: From d8e897ee5c25bae5b8c0003915d88ea8e4cab29b Mon Sep 17 00:00:00 2001 From: Heejune Park Date: Fri, 8 Mar 2024 10:18:36 -0800 Subject: [PATCH 28/30] typo --- geometric/params.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geometric/params.py b/geometric/params.py index f89ce0ec..26ee4f9b 100644 --- a/geometric/params.py +++ b/geometric/params.py @@ -555,7 +555,7 @@ def parse_interpolate_args(*args): grp_univ = parser.add_argument_group('universal', 'Relevant to every job') grp_univ.add_argument('input', type=str, help='REQUIRED positional argument: xyz file containing reactant and product structures.\n') grp_univ.add_argument('--optimize', type=str2bool, help='Provide "yes" to optimize an interpolated trajectory.\n' - 'The followting two arguments, nframes and prealign, will be ignored.\n' + 'The following two arguments, nframes and prealign, will be ignored.\n' 'The input xyz file must contain 5 or more structures.\n') grp_univ.add_argument('--nframes', type=int, help='Number of frames, needs to be bigger than 5, that will be used to interpolate, default 50\n') grp_univ.add_argument('--align_frags', type=str2bool, help='Provide "yes" to align fragments in reactant and product structure prior to interpolation.\n') From 086ff879a0d1d50d68bcfceee053ad00c568c67d Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Wed, 13 Mar 2024 11:30:06 -0700 Subject: [PATCH 29/30] Fix a "zero divide by zero" error --- geometric/internal.py | 1 + 1 file changed, 1 insertion(+) diff --git a/geometric/internal.py b/geometric/internal.py index ab9652a9..bcb0883a 100644 --- a/geometric/internal.py +++ b/geometric/internal.py @@ -3170,6 +3170,7 @@ def dihedral_clash(dih_by_abc, dih_by_bcd, diff_): B = A + diff_[iPrim] C = jDih.value(xyz1) D = C + diff_[jPrim] + if (np.abs(diff_[iPrim]) < 1e-10 and np.abs(diff_[jPrim]) < 1e-10): continue xzero = (C-A)/(B+C-D-A) xplus = (C-A+2*np.pi)/(B+C-D-A) xminus = (C-A-2*np.pi)/(B+C-D-A) From 90b0bff11762cae2d54bcb117e36b486466a0de2 Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Wed, 13 Mar 2024 11:40:34 -0700 Subject: [PATCH 30/30] Add interpolation "fast mode" and two new initial guess methods --- geometric/interpolate.py | 54 ++++++++++++++++++++++++++++++++-------- geometric/params.py | 3 +++ 2 files changed, 47 insertions(+), 10 deletions(-) diff --git a/geometric/interpolate.py b/geometric/interpolate.py index 0a028de7..423ca95e 100755 --- a/geometric/interpolate.py +++ b/geometric/interpolate.py @@ -514,7 +514,7 @@ def print_map(mtx, title, colorscheme=0): print() class Interpolator(object): - def __init__(self, M_in, n_frames = 50, use_midframes = False, align_system=False, align_frags=False, extrapolate=None, verbose=0): + def __init__(self, M_in, n_frames = 50, use_midframes = False, align_system=False, align_frags=False, fast=False, extrapolate=None, verbose=0): # The input molecule; it should not be modified. self.M_in = deepcopy(M_in) # Check the length of the input molecule @@ -540,6 +540,8 @@ def __init__(self, M_in, n_frames = 50, use_midframes = False, align_system=Fals self.align_system = True if self.align_frags else align_system if self.align_system: self.M.align() + # Whether to run in "fast mode" + self.fast_mode = fast # Atom pairs that are not bonded (resp. bonded) in either the reactant or product self.nbpairs, self.union_bonds = find_nonbonded_pairs(self.M) # List of (donor, transfer, acceptor) triplets. @@ -801,7 +803,7 @@ def prealign_fragments(self): atom_pairs, distance_matrix = mol.distance_matrix(pbc=False) self.enddists.append(distance_matrix[-1].copy()) - def initial_guess_one_method(self, method = 0): + def end_to_end(self, method=0, rebuild_dlc=True, respace=True): # Class variables that are used in this method M = deepcopy(self.M) n_frames = self.n_frames @@ -820,12 +822,16 @@ def initial_guess_one_method(self, method = 0): # Build the IC system used to interpolate from one endpoint to the other. if method in [0, 1]: + # Use TRIC coordinate system + IC = DelocalizedInternalCoordinates(M_IC, build=True, connect=False, addcart=False, connect_isolated=False) + newPrims = IC.Prims.Internals + elif method in [2, 3]: # Use TRIC coordinate system, but use only the bonds that exist at both endpoints. M_IC.bonds = common_bonds M_IC.top_settings['read_bonds'] = True IC = DelocalizedInternalCoordinates(M_IC, build=True, connect=False, addcart=False, connect_isolated=False) newPrims = IC.Prims.Internals - elif method in [2, 3]: + elif method in [4, 5]: # Use HDLC coordinate system, but use only the bonds that exist at both endpoints. # Set weight of 0.5 to all Cartesian primitive ICs. M_IC.bonds = common_bonds @@ -836,7 +842,7 @@ def initial_guess_one_method(self, method = 0): newPrims.append(CartesianX(i, w=0.5)) newPrims.append(CartesianY(i, w=0.5)) newPrims.append(CartesianZ(i, w=0.5)) - elif method in [4, 5]: + elif method in [6, 7]: # Use all-inverse distances plus Cartesian coordinates with small weight. IC = DelocalizedInternalCoordinates(M_IC, build=True, connect=False, addcart=False) newPrims = [] @@ -895,7 +901,7 @@ def initial_guess_one_method(self, method = 0): clash_pairs = [] nDiv = n_frames - 1 for clash_round in range(5): - coord_segment, endpt_err, _ = interpolate_segment(IC, xyz0, xyz1, nDiv, backward=(method%2==1), rebuild_dlc=True, sync=1) + coord_segment, endpt_err, _ = interpolate_segment(IC, xyz0, xyz1, nDiv, backward=(method%2==1), rebuild_dlc=rebuild_dlc, sync=1) dq_segment = np.array([IC_Chk.calcDiff(coord_segment[i], coord_segment[i+1]) for i in range(n_frames-1)]) # Don't evaluate dq_segment for primitives that have a diagnostic of 2 or higher because the result will be invalid. prim_diag = np.max([[Prim.diagnostic(coord_segment[i])[0] for Prim in IC_Chk.Internals] for i in range(n_frames)], axis=0) @@ -912,8 +918,9 @@ def initial_guess_one_method(self, method = 0): M.comms = ['Interpolated frame %i' % k for k in range(len(coord_segment))] # Perform equal spacing of the pathway according to the largest change in any primitive coordinate - dq_relative = dq_segment / dq_med[np.newaxis, :] - M = EqualSpacing(M, RMSD=False, align=False, custom=np.max(dq_relative, axis=1)) + if respace: + dq_relative = dq_segment / dq_med[np.newaxis, :] + M = EqualSpacing(M, RMSD=False, align=False, custom=np.max(dq_relative, axis=1)) # Fail if the endpt_err is greater than the threshold if endpt_err > err_thre: @@ -938,8 +945,8 @@ def initial_guess_one_method(self, method = 0): def initial_guess(self): M = None dq_ratio_min = 1e6 - for method in range(6): - M_, status, dq_ratio, endpt_err = self.initial_guess_one_method(method=method) + for method in range(8): + M_, status, dq_ratio, endpt_err = self.end_to_end(method=method) # M_.write('interpolated_endpoints_method%i.xyz' % method) # Keep the interpolation path with status=0 and the lowest dq_ratio print("Initial pathway generation using method %i %s; dq_ratio = %.3f endpt_err = %.3f" % @@ -956,6 +963,30 @@ def initial_guess(self): self.M = deepcopy(M) print("Keeping the result from method %i" % keep_method) + def run_fast(self): + M = None + dq_ratio_min = 1e6 + dq_stop_thre = 5 + for method in range(8): + M_, status, dq_ratio, endpt_err = self.end_to_end(method=method, rebuild_dlc=False, respace=False) + # M_.write('interpolated_endpoints_method%i.xyz' % method) + # Keep the interpolation path with status=0 and the lowest dq_ratio + print("Initial pathway generation using method %i %s; dq_ratio = %.3f endpt_err = %.3f" % + (method, "success" if status == 0 else "failed", dq_ratio, endpt_err)) + if status == 0 and dq_ratio < dq_ratio_min: + M = deepcopy(M_) + dq_ratio_min = dq_ratio + keep_method = method + if dq_ratio < dq_stop_thre: break + if M is None: + raise RuntimeError("Failed to generate an initial path") + M.comms = [c + " (method %i)" % keep_method for c in M.comms] + M.write("interpolated_fast_mode.xyz") + # Overwrite the Molecule object currently being processed + self.M = deepcopy(M) + print("Keeping the result from method %i" % keep_method) + return + def assign_ICs_to_segments(self): M = self.M ICs = self.ICs @@ -1284,6 +1315,9 @@ def split_IC_segments(self, fail_segment): self.segment_endpoints = get_segment_endpoints(self.M, self.segments_spliced) def run_workflow(self): + if self.fast_mode: + self.run_fast() + return if self.align_frags: self.prealign_fragments() if self.do_init: @@ -1302,7 +1336,7 @@ def main(): params = InterpParams(**args) M0 = Molecule(args['input']) #interpolator = Interpolator(M0, use_midframes=True, n_frames=0, align_system=True, align_frags=False) - interpolator = Interpolator(M0, n_frames= params.nframes, use_midframes=params.optimize, align_system=params.align_system, align_frags=params.align_frags, extrapolate=params.extrapolate, verbose=params.verbose) + interpolator = Interpolator(M0, n_frames= params.nframes, use_midframes=params.optimize, align_system=params.align_system, align_frags=params.align_frags, fast=params.fast, extrapolate=params.extrapolate, verbose=params.verbose) interpolator.run_workflow() if __name__ == "__main__": diff --git a/geometric/params.py b/geometric/params.py index f89ce0ec..ffd3e79a 100644 --- a/geometric/params.py +++ b/geometric/params.py @@ -274,6 +274,8 @@ def __init__(self, **kwargs): self.align_frags = kwargs.get('align_frags', False) # Whether we want to align the initial interpolate trajectory before optimization. self.align_system = kwargs.get('align_system', True) + # Run in fast mode + self.fast = kwargs.get('fast', False) # Verbose printout self.verbose = kwargs.get('verbose', 0) if 'extrapolate' in kwargs: @@ -558,6 +560,7 @@ def parse_interpolate_args(*args): 'The followting two arguments, nframes and prealign, will be ignored.\n' 'The input xyz file must contain 5 or more structures.\n') grp_univ.add_argument('--nframes', type=int, help='Number of frames, needs to be bigger than 5, that will be used to interpolate, default 50\n') + grp_univ.add_argument('--fast', type=str2bool, help='Provide "yes" to run in fast mode (no splicing).\n') grp_univ.add_argument('--align_frags', type=str2bool, help='Provide "yes" to align fragments in reactant and product structure prior to interpolation.\n') grp_univ.add_argument('--align_system', type=str2bool, help='Provide "yes" to align the entire system prior to interpolation.\n') grp_univ.add_argument('--extrapolate', type=int, nargs=2, help='Provide two integers to generate extrapolated frames at the head and tail ends.\n'