-
Notifications
You must be signed in to change notification settings - Fork 0
/
transform.py
84 lines (73 loc) · 3.63 KB
/
transform.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import argparse
from rdkit import Chem
from rdkit.Chem import AllChem, rdChemReactions
def apply_transformations(input_file, output_file, error_file):
transformations = {
"Aromatic N-Oxide": "[N:1]=[O:2]>>[N+:1]-[O-:2]",
#"Azide": "[N:1][N:2]=[N:3]>>[N:1]=[N+:2]=[N-:3]",
#"Diazo": "[C:1][N:2]=[N:3]>>[C:1]=[N+:2]=[N-:3]",
"Diazonium": "[C:1]-[N:2]=[N+:3]>>[C:1][N+:2]#[N:3]",
"Azo Oxide": "[O-:1][N:2]=[N+:3]>>[O-:1]-[N+:2]=[N:3]",
"Diazo 2": "[N-:1]=[N:2]=[N+:3]>>[N:1]=[N+:2]=[N-:3]",
#"Iminium": "[C:1]-[N:2]>>[C:1]=[N+:2]",
"Isocyanate": "[N+:1][C:2]=[O:3]>>[N:1]=[C:2]=[O:3]",
"Nitrilium": "[C+:1]=[N:2]>>[C:1]#[N+:2]",
"Nitro": "[O:1]=[N:2]=[O:3]>>[O-:3]-[N+:2]=[O:1]",
"Nitrone Nitronate": "[C:1]=[N:2]=[O:3]>>[O-:3]-[N+:2]=[C:1]",
"Nitroso": "[C:1]-[N:2]-[O:3]>>[C:1]-[N:2]=[O:3]",
"Phosphonic": "[P+:1]([O:2])([O:3])[O-:4]>>[P:1]([O:2])([O:3])=[O:4]",
"Phosphonium Ylide": "[P-:1]([C:2])([C:3])[C+:4]>>[P:1]([C:2])([C:3])=[C:4]",
"Selenite": "[O:1][Se+:2][O:3][O-:4]>>[O:1][Se:2]([O:3])=[O:4]",
"Sulfine": "[C:1]-[S+:2][O-:3]>>[C:1]=[S:2]=[O:3]",
"Sulfoxide": "[C:1][S+:2][O-:3]>>[C:1][S:2]=[O:3]",
"Tertiary N-Oxide": "[N:1]=[O:2]>>[N+:1]-[O-:2]"
}
suppl = Chem.SDMolSupplier(input_file)
writer = Chem.SDWriter(output_file)
error_writer = Chem.SDWriter(error_file)
for idx, mol in enumerate(suppl):
if mol is None:
error_message = "Could not parse molecule from input file."
print(f"Molecule {idx+1}: {error_message}")
error_mol = Chem.Mol()
error_mol.SetProp("_Name", f"Molecule {idx+1}")
error_mol.SetProp("Error", error_message)
error_writer.write(error_mol)
continue
error_message = "None"
try:
mol.UpdatePropertyCache(strict=False)
Chem.SanitizeMol(mol)
Chem.Kekulize(mol, clearAromaticFlags=True)
for name, reaction_smarts in transformations.items():
try:
rxn = rdChemReactions.ReactionFromSmarts(reaction_smarts)
substructure = Chem.MolFromSmarts(reaction_smarts.split(">>")[0])
if not mol.HasSubstructMatch(substructure):
continue
products = rxn.RunReactants((mol,))
if products:
product = products[0][0]
Chem.SanitizeMol(product)
#print(f"Valid product after '{name}': {Chem.MolToSmiles(product)}")
mol = product
except Exception as e:
error_message = f"Error in reaction '{name}': {e}"
raise e
mol.SetProp("Error", error_message)
writer.write(mol)
except Exception as e:
print(f"Error processing Molecule {idx+1}: {e}")
mol.SetProp("Error", error_message)
error_writer.write(mol)
writer.close()
error_writer.close()
print(f"Transformation complete. Successfully processed molecules saved to {output_file}")
print(f"Failed molecules saved to {error_file}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Apply molecular transformations to an SDF file.")
parser.add_argument("-i", "--input", required=True, help="Input SDF file")
parser.add_argument("-o", "--output", required=True, help="Output SDF file (successful molecules)")
parser.add_argument("-e", "--error", required=True, help="Error log file (failed molecules)")
args = parser.parse_args()
apply_transformations(args.input, args.output, args.error)