-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Rewrite atom typer for UFF in Gulp. #59
Comments
I have begun working on this in uff_typer_tests branch on my fork, where I have added test molecules and made those tests pass and match the expected result. However, a full rewrite is ideal. |
Does this gulp wrapper have a better solution to atom typing? https://github.com/learningmatter-mit/gulpy |
It sounds like it could be a good option. Would you want to include it as a dependency? |
Prepared a test script with some molecules optimised with stko.UFF() [uses rdkit engine] and stko.GulpUFFOptimizer() that currently plots parities of bonds, angles and dihedrals. The effect of the optimisation engine could be at play. Bond distances are reasonable, save a few deviations (perhaps incorrect aromaticity assignment). Angles are also reasonable - although I would expect better agreement. Dihedrals show a horrible scatter - unsure what to make of this. Need to go further into this in the future. import stk
import stko
import matplotlib.pyplot as plt
import numpy as np
from scipy.spatial.distance import euclidean
import rdkit.Chem.AllChem as rdkit
def molecules():
smiles = (
'C#C',
'C1=CC=CC=C1',
'C1=CC=C2C=CC=CC2=C1',
'c1cncc(c1)C#Cc2cc(cc(c2)c3ccc(cc3)c4cc(cc(c4)C#Cc5cccnc5)C#Cc6cccnc6)C#Cc7cccnc7',
'C1=CC(=CC(=C1)C#CC2=CN=CC=C2)C#CC3=CN=CC=C3',
'CS[CH]=[C]1C[CH2]C1',
'CC1CCCCC(C1)c2cc(C)cc(O)c2',
'CN1C=NC2=C1C(=O)N(C(=O)N2C)C',
'CCC(C)C(C(=O)O)N',
'CC(C)CC(C(=O)O)N',
'CC(C)C(C(=O)O)N',
'CC(C(C(=O)O)N)O',
'CSCCC(C(=O)O)N',
'C1=CC=C(C=C1)CC(C(=O)O)N',
'C1=CC=C2C(=C1)C(=CN2)CC(C(=O)O)N',
'C(CCN)CC(C(=O)O)N',
)
for smi in smiles:
yield stk.BuildingBlock(smi)
def parity(uff, gulp, filename, lim):
fig, ax = plt.subplots(figsize=(5, 5))
ax.scatter(uff, gulp, c='gold', edgecolors='k',
alpha=1.0, s=40)
ax.plot(np.linspace(min(lim) - 1, max(lim) + 1, 2),
np.linspace(min(lim) - 1, max(lim) + 1, 2),
c='k', alpha=0.4)
# Set number of ticks for x-axis
ax.tick_params(axis='both', which='major', labelsize=16)
ax.set_xlabel('uff', fontsize=16)
ax.set_ylabel('gulp', fontsize=16)
ax.set_xlim(lim)
ax.set_ylim(lim)
fig.tight_layout()
fig.savefig(
f'{filename}.pdf',
dpi=720,
bbox_inches='tight'
)
def get_bond_length(atom1_id, atom2_id, position_matrix):
distance = euclidean(
u=position_matrix[atom1_id],
v=position_matrix[atom2_id]
)
return float(distance)
def get_dihedral(pt1, pt2, pt3, pt4):
p0 = np.asarray(pt1)
p1 = np.asarray(pt2)
p2 = np.asarray(pt3)
p3 = np.asarray(pt4)
b0 = -1.0 * (p1 - p0)
b1 = p2 - p1
b2 = p3 - p2
# normalize b1 so that it does not influence magnitude of vector
# rejections that come next
b1 /= np.linalg.norm(b1)
# vector rejections
# v = projection of b0 onto plane perpendicular to b1
# = b0 minus component that aligns with b1
# w = projection of b2 onto plane perpendicular to b1
# = b2 minus component that aligns with b1
v = b0 - np.dot(b0, b1) * b1
w = b2 - np.dot(b2, b1) * b1
# angle between v and w in a plane is the torsion angle
# v and w may not be normalized but that's fine since tan is y/x
x = np.dot(v, w)
y = np.dot(np.cross(b1, v), w)
return np.degrees(np.arctan2(y, x))
def unit_vector(vector):
return vector / np.linalg.norm(vector)
def angle_between(v1, v2, normal=None):
v1_u = unit_vector(v1)
v2_u = unit_vector(v2)
angle = np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))
if normal is not None:
# Get normal vector and cross product to determine sign.
cross = np.cross(v1_u, v2_u)
if np.dot(normal, cross) < 0:
angle = -angle
return angle
def main():
u_dists = []
u_angles = []
u_dihedrals = []
g_dists = []
g_angles = []
g_dihedrals = []
for i, molecule in enumerate(molecules()):
print(f'doing {molecule}')
# Run optimisations.
uff = stko.UFF()
u = uff.optimize(molecule)
u.write(f'uff_{i}.mol')
gulp = stko.GulpUFFOptimizer(
gulp_path='/home/atarzia/software/gulp-5.1/Src/gulp/gulp',
output_dir=f'gulp_{i}',
conjugate_gradient=False,
)
# Assign the force field.
gulp.assign_FF(molecule)
g = gulp.optimize(molecule)
g.write(f'gulp_{i}.mol')
for bond in molecule.get_bonds():
u_dists.append(get_bond_length(bond.get_atom1().get_id(), bond.get_atom2().get_id(), u.get_position_matrix()))
g_dists.append(get_bond_length(bond.get_atom1().get_id(), bond.get_atom2().get_id(), g.get_position_matrix()))
paths = rdkit.FindAllPathsOfLengthN(
mol=molecule.to_rdkit_mol(),
length=3,
useBonds=False,
useHs=True,
)
for atom_ids in paths:
pos_mat = u.get_position_matrix()
vector1 = pos_mat[atom_ids[1]]-pos_mat[atom_ids[0]]
vector2 = pos_mat[atom_ids[1]]-pos_mat[atom_ids[2]]
u_angles.append(np.degrees(
angle_between(vector1, vector2)
))
pos_mat = g.get_position_matrix()
vector1 = pos_mat[atom_ids[1]]-pos_mat[atom_ids[0]]
vector2 = pos_mat[atom_ids[1]]-pos_mat[atom_ids[2]]
g_angles.append(np.degrees(
angle_between(vector1, vector2)
))
paths = rdkit.FindAllPathsOfLengthN(
mol=molecule.to_rdkit_mol(),
length=4,
useBonds=False,
useHs=True,
)
for atom_ids in paths:
u_dihedrals.append(abs(get_dihedral(
pt1=tuple(
u.get_atomic_positions(atom_ids[0])
)[0],
pt2=tuple(
u.get_atomic_positions(atom_ids[1])
)[0],
pt3=tuple(
u.get_atomic_positions(atom_ids[2])
)[0],
pt4=tuple(
u.get_atomic_positions(atom_ids[3])
)[0],
)))
g_dihedrals.append(abs(get_dihedral(
pt1=tuple(
g.get_atomic_positions(atom_ids[0])
)[0],
pt2=tuple(
g.get_atomic_positions(atom_ids[1])
)[0],
pt3=tuple(
g.get_atomic_positions(atom_ids[2])
)[0],
pt4=tuple(
g.get_atomic_positions(atom_ids[3])
)[0],
)))
parity(u_dists, g_dists, 'dists', (min([min(u_dists), min(g_dists)]), max([max(u_dists), max(g_dists)])))
parity(u_angles, g_angles, 'angles', (min([min(u_angles), min(g_angles)]), max([max(u_angles), max(g_angles)])))
parity(u_dihedrals, g_dihedrals, 'dihedrals', (min([min(u_dihedrals), min(g_dihedrals)]), max([max(u_dihedrals), max(g_dihedrals)])))
if __name__ == "__main__":
main() |
Previous commits of this file contain test cases that failed due to this issue: https://github.com/JelfsMaterialsGroup/stko/blob/0a7c74ab2ce2bbef2469478474363dc70fc3ef30/tests/optimizers/gulp/test_uff_assign_ff.py Removed as part of #175 |
Found issues with the handling of conjugation in stko.GulpUFF.assign_FF().
The method was written based on rdkit code, but the rdkit code also uses the bond order to set bond types, which we do not do here, leading to conjugated atoms having
resonant
bonds in Gulp/UFF.The text was updated successfully, but these errors were encountered: