diff --git a/.gitignore b/.gitignore
index cbc05b2..7a6dd93 100644
--- a/.gitignore
+++ b/.gitignore
@@ -148,3 +148,4 @@ cookie.txt
*.csv
*.txt
*.sh
+.DS_Store
diff --git a/docs/API/available_datasets.md b/docs/API/available_datasets.md
new file mode 100644
index 0000000..fa630b8
--- /dev/null
+++ b/docs/API/available_datasets.md
@@ -0,0 +1,3 @@
+# Available Datasets
+
+::: openqdc.datasets
diff --git a/docs/API/isolated_atom_energies.md b/docs/API/isolated_atom_energies.md
new file mode 100644
index 0000000..966b6a8
--- /dev/null
+++ b/docs/API/isolated_atom_energies.md
@@ -0,0 +1,5 @@
+# Isolated atoms energy
+
+This page contains the isolated atom energies.
+
+::: openqdc.utils.atomization_energies
diff --git a/docs/_overrides/main.html b/docs/_overrides/main.html
new file mode 100644
index 0000000..2eafd76
--- /dev/null
+++ b/docs/_overrides/main.html
@@ -0,0 +1,46 @@
+{% extends "base.html" %}
+
+{% block content %}
+{{ super() }}
+
+
+{% endblock content %}
diff --git a/docs/css/custom.css b/docs/css/custom.css
new file mode 100644
index 0000000..65db8ea
--- /dev/null
+++ b/docs/css/custom.css
@@ -0,0 +1,33 @@
+/* Indentation. */
+div.doc-contents:not(.first) {
+ padding-left: 25px;
+ border-left: 4px solid rgba(230, 230, 230);
+ margin-bottom: 80px;
+ }
+
+ /* Don't capitalize names. */
+ h5.doc-heading {
+ text-transform: none !important;
+ }
+
+ /* Don't use vertical space on hidden ToC entries. */
+ .hidden-toc::before {
+ margin-top: 0 !important;
+ padding-top: 0 !important;
+ }
+
+ /* Don't show permalink of hidden ToC entries. */
+ .hidden-toc a.headerlink {
+ display: none;
+ }
+
+ /* Avoid breaking parameters name, etc. in table cells. */
+ td code {
+ word-break: normal !important;
+ }
+
+ /* For pieces of Markdown rendered in table cells. */
+ td p {
+ margin-top: 0 !important;
+ margin-bottom: 0 !important;
+ }
diff --git a/docs/datasets.md b/docs/datasets.md
new file mode 100644
index 0000000..a2323fb
--- /dev/null
+++ b/docs/datasets.md
@@ -0,0 +1,27 @@
+# Overview of Datasets
+
+
+
+We provide support for the following publicly available QM Datasets.
+
+| Dataset | # Molecules | # Conformers | Average Conformers per Molecule | Force Labels | Atom Types | QM Level of Theory | Off-Equilibrium Conformations|
+| --- | --- | --- | --- | --- | --- | --- | --- |
+| [GEOM](https://www.nature.com/articles/s41597-022-01288-4) | 450,000 | 37,000,000 | 82 | No | 18 | GFN2-xTB | No |
+| [Molecule3D](https://arxiv.org/abs/2110.01717) | 3,899,647 | 3,899,647 | 1 | No | 5 | B3LYP/6-31G* | No |
+| [NablaDFT](https://pubs.rsc.org/en/content/articlelanding/2022/CP/D2CP03966D) | 1,000,000 | 5,000,000 | 5 | No | 6 | ωB97X-D/def2-SVP | |
+| [QMugs](https://www.nature.com/articles/s41597-022-01390-7) | 665,000 | 2,000,000 | 3 | No | 10 | GFN2-xTB, ωB97X-D/def2-SVP | No |
+| [Spice](https://arxiv.org/abs/2209.10702) | 19,238 | 1,132,808 | 59 | Yes | 15 | ωB97M-D3(BJ)/def2-TZVPPD | Yes |
+| [ANI](https://pubs.rsc.org/en/content/articlelanding/2017/SC/C6SC05720A) | 57,462 | 20,000,000 | 348 | No | 4 | ωB97x:6-31G(d) | Yes |
+| [tmQM](https://pubs.acs.org/doi/10.1021/acs.jcim.0c01041) | 86,665 | | | No | | TPSSh-D3BJ/def2-SVP | |
+| [DES370K](https://www.nature.com/articles/s41597-021-00833-x) | 3,700 | 370,000 | 100 | No | 20 | CCSD(T) | Yes |
+| [DES5M](https://www.nature.com/articles/s41597-021-00833-x) | 3,700 | 5,000,000 | 1351 | No | 20 | SNS-MP2 | Yes |
+| [OrbNet Denali](https://arxiv.org/abs/2107.00299) | 212,905 | 2,300,000 | 11 | No | 16 | GFN1-xTB | Yes |
+| [SN2RXN](https://pubs.acs.org/doi/10.1021/acs.jctc.9b00181) | 39 | 452709 | 11,600 | Yes | 6 | DSD-BLYP-D3(BJ)/def2-TZVP | |
+| [QM7X](https://www.nature.com/articles/s41597-021-00812-2) | 6,950 | 4,195,237 | 603 | Yes | 7 | PBE0+MBD | Yes |
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 0000000..264211f
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,30 @@
+# openQDC
+
+Open Quantum Data Commons
+
+## Setup Datasets
+
+Use the scripts in `setup/` to download the datasets. For more information, see the [README](setup/README.md) in the `setup/` directory.
+
+# Install the library in dev mode
+```bash
+# Install the deps
+mamba env create -n qdc -f env.yml
+
+# Activate the environment
+mamba activate qdc
+
+# Install the qdc library in dev mode
+pip install -e .
+
+```
+
+## Development lifecycle
+
+### Tests
+
+You can run tests locally with:
+
+```bash
+pytest .
+```
diff --git a/docs/tutorials/usage.ipynb b/docs/tutorials/usage.ipynb
new file mode 100644
index 0000000..b494396
--- /dev/null
+++ b/docs/tutorials/usage.ipynb
@@ -0,0 +1,424 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Really Hard example\n",
+ "\n",
+ "## Instantiate and GO!\n",
+ "\n",
+ "If you don't have the dataset downloaded it will be downloaded automatically and cached. You just instantiate the class and you are ready to go."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/homebrew/Caskroom/miniconda/base/envs/qdc/lib/python3.11/site-packages/google/auth/_default.py:76: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n",
+ " warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n",
+ "\u001b[32m2023-10-31 11:43:09.510\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mopenqdc.datasets.base\u001b[0m:\u001b[36mread_preprocess\u001b[0m:\u001b[36m236\u001b[0m - \u001b[1mReading preprocessed data\u001b[0m\n",
+ "\u001b[32m2023-10-31 11:43:09.511\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mopenqdc.datasets.base\u001b[0m:\u001b[36mread_preprocess\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mspice data with the following units:\n",
+ " Energy: hartree,\n",
+ " Distance: bohr,\n",
+ " Forces: hartree/bohr\u001b[0m\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Loaded atomic_inputs with shape (33175288, 5), dtype float32\n",
+ "Loaded position_idx_range with shape (1110165, 2), dtype int32\n",
+ "Loaded energies with shape (1110165, 1), dtype float32\n",
+ "Loaded forces with shape (33175288, 3, 1), dtype float32\n",
+ "Loaded name_uniques with shape (19155,), dtype =2.3.1
- - pytorch_sparse >=0.6.17
- - pytorch_cluster >=1.6
- - pytorch_scatter >=2.1
- - torch-ema
+ #- einops =0.6.0
+ - pytorch
+ - dscribe
# other stuffs
- h5py >=3.8.0
- - omegaconf #==2.3.0
- gdown #==4.6.4
- - hydra-core #==1.3.1
- - wandb #==0.13.10
# Viz
- matplotlib
@@ -63,7 +50,14 @@ dependencies:
- pre-commit
- ruff
- ipykernel
- - pydantic <= 2.0
-
- - pip:
- - torch-nl
+ - isort
+
+ # Doc
+ - mkdocs
+ - mkdocs-material
+ - mkdocs-material-extensions
+ - mkdocstrings
+ - mkdocs-click
+ - mkdocs-jupyter
+ - markdown-include
+ - mdx_truly_sane_lists
diff --git a/mkdocs.yml b/mkdocs.yml
new file mode 100644
index 0000000..e174b70
--- /dev/null
+++ b/mkdocs.yml
@@ -0,0 +1,76 @@
+site_name: "Open Quantum Data Commons (openQDC)"
+site_description: "I don't know... Something about data and Quantum stuff I guess :D"
+site_url: "https://github.com/OpenDrugDiscovery/openQDC"
+repo_url: "https://github.com/OpenDrugDiscovery/openQDC"
+repo_name: "openQDC"
+copyright: Copyright 2023 Valence Labs
+
+remote_branch: "privpage"
+use_directory_urls: false
+docs_dir: "docs"
+
+nav:
+ - Overview: index.md
+ - Available Datasets: datasets.md
+ - Tutorials:
+ - Really hard example: tutorials/usage.ipynb
+ - API:
+ - Datasets: API/available_datasets.md
+ - Isolated Atoms Energies: API/isolated_atom_energies.md
+theme:
+ name: material
+ custom_dir: docs/_overrides
+ palette:
+ primary: teal
+ accent: purple
+ features:
+ - navigation.tabs
+ - navigation.expand
+
+
+extra_css:
+ - css/custom.css
+
+extra_javascript:
+ - javascripts/config.js
+ - https://polyfill.io/v3/polyfill.min.js?features=es6
+ - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js
+
+markdown_extensions:
+ - admonition
+ - markdown_include.include
+ - pymdownx.emoji
+ - pymdownx.highlight
+ - pymdownx.magiclink
+ - pymdownx.superfences
+ - pymdownx.tabbed
+ - pymdownx.tasklist
+ # For `tab_length=2` in the markdown extension
+ # See https://github.com/mkdocs/mkdocs/issues/545
+ - mdx_truly_sane_lists
+ - mkdocs-click
+ - attr_list
+ - md_in_html
+ - toc:
+ permalink: true
+
+plugins:
+ - search
+ - mkdocstrings:
+ watch:
+ - src/
+ handlers:
+ python:
+ setup_commands:
+ - import sys
+ - sys.path.append("docs")
+ - sys.path.append("src")
+ selection:
+ new_path_syntax: yes
+ rendering:
+ show_root_heading: yes
+ heading_level: 3
+ show_if_no_docstring: true
+ - mkdocs-jupyter:
+ execute: False
+ # kernel_name: python3
diff --git a/src/openqdc/__init__.py b/src/openqdc/__init__.py
index e69de29..1432923 100644
--- a/src/openqdc/__init__.py
+++ b/src/openqdc/__init__.py
@@ -0,0 +1,41 @@
+import importlib
+import os
+from typing import TYPE_CHECKING # noqa F401
+
+# The below lazy import logic is coming from openff-toolkit:
+# https://github.com/openforcefield/openff-toolkit/blob/b52879569a0344878c40248ceb3bd0f90348076a/openff/toolkit/__init__.py#L44
+
+# Dictionary of objects to lazily import; maps the object's name to its module path
+
+_lazy_imports_obj = {}
+
+_lazy_imports_mod = {"datasets": "openqdc.datamodule", "utils": "openqdc.utils"}
+
+
+def __getattr__(name):
+ """Lazily import objects from _lazy_imports_obj or _lazy_imports_mod
+
+ Note that this method is only called by Python if the name cannot be found
+ in the current module."""
+ obj_mod = _lazy_imports_obj.get(name)
+ if obj_mod is not None:
+ mod = importlib.import_module(obj_mod)
+ return mod.__dict__[name]
+
+ lazy_mod = _lazy_imports_mod.get(name)
+ if lazy_mod is not None:
+ return importlib.import_module(lazy_mod)
+
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+
+
+def __dir__():
+ """Add _lazy_imports_obj and _lazy_imports_mod to dir()"""
+ keys = (*globals().keys(), *_lazy_imports_obj.keys(), *_lazy_imports_mod.keys())
+ return sorted(keys)
+
+
+if TYPE_CHECKING or os.environ.get("OPENQDC_DISABLE_LAZY_LOADING", "0") == "1":
+ # These types are imported lazily at runtime, but we need to tell type
+ # checkers what they are.
+ from .datasets import *
diff --git a/src/openqdc/datasets/__init__.py b/src/openqdc/datasets/__init__.py
index 5dfe6a1..d989935 100644
--- a/src/openqdc/datasets/__init__.py
+++ b/src/openqdc/datasets/__init__.py
@@ -1,42 +1,107 @@
-from .ani import ANI1, ANI1CCX, ANI1X
-from .comp6 import COMP6
-from .dess import DESS
-from .gdml import GDML
-from .geom import GEOM
-from .iso_17 import ISO17
-from .molecule3d import Molecule3D
-from .nabladft import NablaDFT
-from .orbnet_denali import OrbnetDenali
-from .pcqm import PCQM_B3LYP, PCQM_PM6
-from .qm7x import QM7X
-from .qmugs import QMugs
-from .sn2_rxn import SN2RXN
-from .solvated_peptides import SolvatedPeptides
-from .spice import Spice
-from .tmqm import TMQM
-from .transition1x import Transition1X
-from .waterclusters3_30 import WaterClusters
-
-__all__ = [
- "ANI1",
- "ANI1CCX",
- "ANI1X",
- "COMP6",
- "DESS",
- "GDML",
- "GEOM",
- "ISO17",
- "Molecule3D",
- "NablaDFT",
- "OrbnetDenali",
- "PCQM_B3LYP",
- "PCQM_PM6",
- "QM7X",
- "QMugs",
- "SN2RXN",
- "SolvatedPeptides",
- "Spice",
- "TMQM",
- "Transition1X",
- "WaterClusters",
-]
+import importlib
+import os
+from typing import TYPE_CHECKING # noqa F401
+
+# The below lazy import logic is coming from openff-toolkit:
+# https://github.com/openforcefield/openff-toolkit/blob/b52879569a0344878c40248ceb3bd0f90348076a/openff/toolkit/__init__.py#L44
+
+# Dictionary of objects to lazily import; maps the object's name to its module path
+
+_lazy_imports_obj = {
+ "ANI1": "openqdc.datasets.ani",
+ "ANI1CCX": "openqdc.datasets.ani",
+ "ANI1X": "openqdc.datasets.ani",
+ "Spice": "openqdc.datasets.spice",
+ "GEOM": "openqdc.datasets.geom",
+ "QMugs": "openqdc.datasets.qmugs",
+ "ISO17": "openqdc.datasets.iso_17",
+ "COMP6": "openqdc.datasets.comp6",
+ "GDML": "openqdc.datasets.gdml",
+ "Molecule3D": "openqdc.datasets.molecule3d",
+ "OrbnetDenali": "openqdc.datasets.orbnet_denali",
+ "SN2RXN": "openqdc.datasets.sn2_rxn",
+ "QM7X": "openqdc.datasets.qm7x",
+ "DESS": "openqdc.datasets.dess",
+ "NablaDFT": "openqdc.datasets.nabladft",
+ "SolvatedPeptides": "openqdc.datasets.solvated_peptides",
+ "WaterClusters": "openqdc.datasets.waterclusters3_30",
+ "TMQM": "openqdc.datasets.tmqm",
+ "Dummy": "openqdc.datasets.dummy",
+ "PCQM_B3LYP": "openqdc.datasets.pcqm",
+ "PCQM_PM6": "openqdc.datasets.pcqm",
+ "Transition1X": "openqdc.datasets.transition1x",
+}
+
+_lazy_imports_mod = {}
+
+
+def __getattr__(name):
+ """Lazily import objects from _lazy_imports_obj or _lazy_imports_mod
+
+ Note that this method is only called by Python if the name cannot be found
+ in the current module."""
+ obj_mod = _lazy_imports_obj.get(name)
+ if obj_mod is not None:
+ mod = importlib.import_module(obj_mod)
+ return mod.__dict__[name]
+
+ lazy_mod = _lazy_imports_mod.get(name)
+ if lazy_mod is not None:
+ return importlib.import_module(lazy_mod)
+
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+
+
+def __dir__():
+ """Add _lazy_imports_obj and _lazy_imports_mod to dir()"""
+ keys = (*globals().keys(), *_lazy_imports_obj.keys(), *_lazy_imports_mod.keys())
+ return sorted(keys)
+
+
+if TYPE_CHECKING or os.environ.get("OPENQDC_DISABLE_LAZY_LOADING", "0") == "1":
+ # These types are imported lazily at runtime, but we need to tell type
+ # checkers what they are.
+ from .ani import ANI1, ANI1CCX, ANI1X # noqa
+ from .comp6 import COMP6 # noqa
+ from .dess import DESS # noqa
+ from .dummy import Dummy # noqa
+ from .gdml import GDML # noqa
+ from .geom import GEOM # noqa
+ from .iso_17 import ISO17 # noqa
+ from .molecule3d import Molecule3D # noqa
+ from .nabladft import NablaDFT # noqa
+ from .orbnet_denali import OrbnetDenali # noqa
+ from .pcqm import PCQM_B3LYP, PCQM_PM6 # noqa
+ from .qm7x import QM7X # noqa
+ from .qmugs import QMugs # noqa
+ from .sn2_rxn import SN2RXN # noqa
+ from .solvated_peptides import SolvatedPeptides # noqa
+ from .spice import Spice # noqa
+ from .tmqm import TMQM # noqa
+ from .transition1x import Transition1X # noqa
+ from .waterclusters3_30 import WaterClusters # noqa
+
+ __all__ = [
+ "ANI1",
+ "ANI1X",
+ "ANI1CCX",
+ "Spice",
+ "GEOM",
+ "QMugs",
+ "ISO17",
+ "COMP6",
+ "GDML",
+ "Molecule3D",
+ "OrbnetDenali",
+ "SN2RXN",
+ "QM7X",
+ "DESS",
+ "NablaDFT",
+ "SolvatedPeptides",
+ "WaterClusters",
+ "TMQM",
+ "PCQM_B3LYP",
+ "PCQM_PM6",
+ "Transition1X",
+ "Dummy",
+ ]
diff --git a/src/openqdc/datasets/ani.py b/src/openqdc/datasets/ani.py
index 73a1ccb..913fb8a 100644
--- a/src/openqdc/datasets/ani.py
+++ b/src/openqdc/datasets/ani.py
@@ -32,11 +32,8 @@ class ANI1(BaseDataset):
"ωB97x:6-31G(d) Energy",
]
__energy_unit__ = "hartree"
- __distance_unit__ = "ang"
- __forces_unit__ = "hartree/ang"
-
- def __init__(self, energy_unit=None, distance_unit=None) -> None:
- super().__init__(energy_unit=energy_unit, distance_unit=distance_unit)
+ __distance_unit__ = "bohr"
+ __forces_unit__ = "hartree/bohr"
@property
def root(self):
@@ -71,12 +68,15 @@ class ANI1CCX(ANI1):
"""
__name__ = "ani1ccx"
+ __energy_unit__ = "hartree"
+ __distance_unit__ = "ang"
+ __forces_unit__ = "hartree/ang"
__energy_methods__ = [
- "ccsd(t)_cbs",
- "npno_ccsd(t)_dz",
- "npno_ccsd(t)_tz",
- "tpno_ccsd(t)_dz",
+ "ccsd(t)/cbs",
+ "ccsd(t)/cc-pvdz",
+ "ccsd(t)/cc-pvtz",
+ "tccsd(t)/cc-pvdz",
]
energy_target_names = [
@@ -89,9 +89,6 @@ class ANI1CCX(ANI1):
__force_methods__ = []
force_target_names = []
- def __init__(self) -> None:
- super().__init__()
-
class ANI1X(ANI1):
"""
@@ -110,16 +107,19 @@ class ANI1X(ANI1):
"""
__name__ = "ani1x"
+ __energy_unit__ = "hartree"
+ __distance_unit__ = "ang"
+ __forces_unit__ = "hartree/ang"
__energy_methods__ = [
- "hf_dz",
- "hf_qz",
- "hf_tz",
- "mp2_dz",
- "mp2_qz",
- "mp2_tz",
- "wb97x_6-31g(d)",
- "wb97x_tz",
+ "hf/cc-pvdz",
+ "hf/cc-pvqz",
+ "hf/cc-pvtz",
+ "mp2/cc-pvdz",
+ "mp2/cc-pvqz",
+ "mp2/cc-pvtz",
+ "wb97x/6-31g(d)",
+ "wb97x/cc-pvtz",
]
energy_target_names = [
@@ -139,9 +139,9 @@ class ANI1X(ANI1):
]
__force_methods__ = [
- "wb97x_6-31g(d)",
- "wb97x_tz",
+ "wb97x/6-31g(d)",
+ "wb97x/cc-pvtz",
]
- def __init__(self) -> None:
- super().__init__()
+ def convert_forces(self, x):
+ return super().convert_forces(x) * 0.529177249 # correct the Dataset error
diff --git a/src/openqdc/datasets/base.py b/src/openqdc/datasets/base.py
index 598a0b3..1de6ff1 100644
--- a/src/openqdc/datasets/base.py
+++ b/src/openqdc/datasets/base.py
@@ -1,23 +1,31 @@
import os
from os.path import join as p_join
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Union
import numpy as np
import pandas as pd
import torch
+from ase.io.extxyz import write_extxyz
from loguru import logger
from sklearn.utils import Bunch
from tqdm import tqdm
+from openqdc.utils.atomization_energies import (
+ IsolatedAtomEnergyFactory,
+ chemical_symbols,
+)
from openqdc.utils.constants import NB_ATOMIC_FEATURES
from openqdc.utils.io import (
copy_exists,
+ dict_to_atoms,
get_local_cache,
load_hdf5_file,
pull_locally,
push_remote,
+ set_cache_dir,
)
from openqdc.utils.molecule import atom_table
+from openqdc.utils.package_utils import requires_package
from openqdc.utils.units import get_conversion
@@ -67,21 +75,44 @@ class BaseDataset(torch.utils.data.Dataset):
__force_methods__ = []
energy_target_names = []
force_target_names = []
+ __isolated_atom_energies__ = []
__energy_unit__ = "hartree"
- __distance_unit__ = "bohr"
- __forces_unit__ = "hartree/bohr"
+ __distance_unit__ = "ang"
+ __forces_unit__ = "hartree/ang"
__fn_energy__ = lambda x: x
__fn_distance__ = lambda x: x
__fn_forces__ = lambda x: x
- def __init__(self, energy_unit=None, distance_unit=None, overwrite_local_cache=False) -> None:
+ def __init__(
+ self,
+ energy_unit: Optional[str] = None,
+ distance_unit: Optional[str] = None,
+ overwrite_local_cache: bool = False,
+ cache_dir: Optional[str] = None,
+ ) -> None:
+ set_cache_dir(cache_dir)
self.data = None
self._set_units(energy_unit, distance_unit)
if not self.is_preprocessed():
logger.info("This dataset not available. Please open an issue on Github for the team to look into it.")
+ # entries = self.read_raw_entries()
+ # res = self.collate_list(entries)
+ # self.save_preprocess(res)
else:
self.read_preprocess(overwrite_local_cache=overwrite_local_cache)
+ self._set_isolated_atom_energies()
+
+ @property
+ def numbers(self):
+ if hasattr(self, "_numbers"):
+ return self._numbers
+ self._numbers = np.array(list(set(self.data["atomic_inputs"][..., 0])), dtype=np.int32)
+ return self._numbers
+
+ @property
+ def chemical_species(self):
+ return [chemical_symbols[z] for z in self.numbers]
@property
def energy_unit(self):
@@ -140,6 +171,14 @@ def _set_units(self, en, ds):
self.__forces_unit__ = self.energy_unit + "/" + self.distance_unit
self.__class__.__fn_forces__ = get_conversion(old_en + "/" + old_ds, self.__forces_unit__)
+ def _set_isolated_atom_energies(self):
+ if self.__energy_methods__ is None:
+ logger.error("No energy methods defined for this dataset.")
+ f = get_conversion("hartree", self.__energy_unit__)
+ self.__isolated_atom_energies__ = f(
+ np.array([IsolatedAtomEnergyFactory.get_matrix(en_method) for en_method in self.__energy_methods__])
+ )
+
def convert_energy(self, x):
return self.__class__.__fn_energy__(x)
@@ -149,12 +188,18 @@ def convert_distance(self, x):
def convert_forces(self, x):
return self.__class__.__fn_forces__(x)
- def set_energy_unit(self, value):
+ def set_energy_unit(self, value: str):
+ """
+ Set a new energy unit for the dataset.
+ """
old_unit = self.energy_unit
self.__energy_unit__ = value
self.__class__.__fn_energy__ = get_conversion(old_unit, value)
- def set_distance_unit(self, value):
+ def set_distance_unit(self, value: str):
+ """
+ Set a new distance unit for the dataset.
+ """
old_unit = self.distance_unit
self.__distance_unit__ = value
self.__class__.__fn_distance__ = get_conversion(old_unit, value)
@@ -175,11 +220,6 @@ def collate_list(self, list_entries):
def save_preprocess(self, data_dict):
# save memmaps
logger.info("Preprocessing data and saving it to cache.")
- logger.info(
- f"Dataset {self.__name__} data with the following units:\n"
- f"Energy: {self.energy_unit}, Distance: {self.distance_unit}, "
- f"Forces: {self.force_unit if self.__force_methods__ else 'None'}"
- )
for key in self.data_keys:
local_path = p_join(self.preprocess_path, f"{key}.mmap")
out = np.memmap(local_path, mode="w+", dtype=data_dict[key].dtype, shape=data_dict[key].shape)
@@ -198,10 +238,10 @@ def save_preprocess(self, data_dict):
def read_preprocess(self, overwrite_local_cache=False):
logger.info("Reading preprocessed data")
logger.info(
- f"{self.__name__} data with the following units:\
- Energy: {self.energy_unit},\
- Distance: {self.distance_unit},\
- Forces: {self.force_unit}"
+ f"{self.__name__} data with the following units:\n\
+ Energy: {self.energy_unit},\n\
+ Distance: {self.distance_unit},\n\
+ Forces: {self.force_unit if self.__force_methods__ else 'None'}"
)
self.data = {}
for key in self.data_keys:
@@ -237,10 +277,123 @@ def preprocess(self):
res = self.collate_list(entries)
self.save_preprocess(res)
+ def save_xyz(self, idx: int, path: Optional[str] = None):
+ """
+ Save the entry at index idx as an extxyz file.
+ """
+ if path is None:
+ path = os.getcwd()
+ at = self.get_ase_atoms(idx, ext=True)
+ name = at.info["name"]
+ write_extxyz(p_join(path, f"{name}.xyz"), at)
+
+ def get_ase_atoms(self, idx: int, ext=True):
+ """
+ Get the ASE atoms object for the entry at index idx.
+
+ Parameters
+ ----------
+ idx : int
+ Index of the entry.
+ ext : bool, optional
+ Whether to include additional informations
+ """
+ entry = self[idx]
+ # _ = entry.pop("forces")
+ at = dict_to_atoms(entry, ext=ext)
+ return at
+
+ @requires_package("dscribe")
+ @requires_package("datamol")
+ def chemical_space(
+ self,
+ n_samples: Optional[Union[List[int], int]] = None,
+ return_idxs: bool = True,
+ progress: bool = True,
+ **soap_kwargs,
+ ) -> Dict[str, np.ndarray]:
+ """
+ Compute the SOAP descriptors for the dataset.
+
+ Parameters
+ ----------
+ n_samples : Optional[Union[List[int],int]], optional
+ Number of samples to use for the computation, by default None. If None, all the dataset is used.
+ If a list of integers is provided, the descriptors are computed for each of the specified idx of samples.
+ return_idxs : bool, optional
+ Whether to return the indices of the samples used, by default True.
+ progress : bool, optional
+ Whether to show a progress bar, by default True.
+ **soap_kwargs : dict
+ Keyword arguments to pass to the SOAP descriptor.
+ By defaut, the following values are used:
+ - r_cut : 5.0
+ - n_max : 8
+ - l_max : 6
+ - average : "inner"
+ - periodic : False
+ - compression : {"mode" : "mu1nu1"}
+
+ Returns
+ -------
+ Dict[str, np.ndarray]
+ Dictionary containing the following keys:
+ - soap : np.ndarray of shape (N, M) containing the SOAP descriptors for the dataset
+ - soap_kwargs : dict containing the keyword arguments used for the SOAP descriptor
+ - idxs : np.ndarray of shape (N,) containing the indices of the samples used
+
+ """
+ import datamol as dm
+ from dscribe.descriptors import SOAP
+
+ if n_samples is None:
+ idxs = list(range(len(self)))
+ elif isinstance(n_samples, int):
+ idxs = np.random.choice(len(self), size=n_samples, replace=False)
+ elif isinstance(n_samples, list):
+ idxs = n_samples
+ datum = {}
+ r_cut = soap_kwargs.pop("r_cut", 5.0)
+ n_max = soap_kwargs.pop("n_max", 8)
+ l_max = soap_kwargs.pop("l_max", 6)
+ average = soap_kwargs.pop("average", "inner")
+ periodic = soap_kwargs.pop("periodic", False)
+ compression = soap_kwargs.pop("compression", {"mode": "mu1nu1"})
+ soap = SOAP(
+ species=self.chemical_species,
+ periodic=periodic,
+ r_cut=r_cut,
+ n_max=n_max,
+ l_max=l_max,
+ average=average,
+ compression=compression,
+ )
+ datum["soap_kwargs"] = {
+ "r_cut": r_cut,
+ "n_max": n_max,
+ "l_max": l_max,
+ "average": average,
+ "compression": compression,
+ "species": self.chemical_species,
+ "periodic": periodic,
+ **soap_kwargs,
+ }
+
+ def wrapper(idx):
+ entry = self.get_ase_atoms(idx, ext=False)
+ return soap.create(entry, centers=entry.positions)
+
+ descr = dm.parallelized(wrapper, idxs, progress=progress, scheduler="threads")
+ datum["soap"] = np.vstack(descr)
+ if return_idxs:
+ datum["idxs"] = idxs
+ return datum
+
def __len__(self):
return self.data["energies"].shape[0]
def __getitem__(self, idx: int):
+ shift = IsolatedAtomEnergyFactory.max_charge
p_start, p_end = self.data["position_idx_range"][idx]
input = self.data["atomic_inputs"][p_start:p_end]
z, c, positions, energies = (
@@ -256,14 +409,19 @@ def __getitem__(self, idx: int):
forces = self.convert_forces(np.array(self.data["forces"][p_start:p_end], dtype=np.float32))
else:
forces = None
-
return Bunch(
positions=positions,
atomic_numbers=z,
charges=c,
- e0=self.convert_energy(self.atomic_energies[z]),
+ e0=self.__isolated_atom_energies__[..., z, c + shift].T,
energies=energies,
name=name,
subset=subset,
forces=forces,
)
+
+ def __str__(self):
+ return f"{self.__name__}"
+
+ def __repr__(self):
+ return f"{self.__name__}"
diff --git a/src/openqdc/datasets/comp6.py b/src/openqdc/datasets/comp6.py
index 16f43ca..c95ec17 100644
--- a/src/openqdc/datasets/comp6.py
+++ b/src/openqdc/datasets/comp6.py
@@ -23,20 +23,20 @@ class COMP6(BaseDataset):
__name__ = "comp6"
# watchout that forces are stored as -grad(E)
- __energy_unit__ = "hartree"
- __distance_unit__ = "ang"
- __forces_unit__ = "hartree/ang"
+ __energy_unit__ = "kcal/mol"
+ __distance_unit__ = "bohr" # bohr
+ __forces_unit__ = "kcal/mol/bohr"
__energy_methods__ = [
"wb97x/6-31g*",
- "b3lyp-d3m(bj)_tz",
- "b3lyp_tz",
- "hf_tz",
- "pbe-d3(bj)_dz",
- "pbe_tz",
- "svwm_tz",
- "wb97m-d3(bj)_tz",
- "wb97m_tz",
+ "b3lyp-d3mbj/def2-tzvp",
+ "b3lyp/def2-tzvp",
+ "hf/def2-tzvp",
+ "pbe-d3bj/def2-tzvp",
+ "pbe/def2-tzvp",
+ "svwn/def2-tzvp",
+ "wb97m-d3bj/def2-tzvp",
+ "wb97m/def2-tzvp",
]
energy_target_names = [
@@ -59,9 +59,6 @@ class COMP6(BaseDataset):
"Gradient",
]
- def __init__(self, energy_unit=None, distance_unit=None) -> None:
- super().__init__(energy_unit=energy_unit, distance_unit=distance_unit)
-
def read_raw_entries(self):
samples = []
for subset in ["ani_md", "drugbank", "gdb7_9", "gdb10_13", "s66x8", "tripeptides"]:
diff --git a/src/openqdc/datasets/dess.py b/src/openqdc/datasets/dess.py
index 7136ab1..80b1e1c 100644
--- a/src/openqdc/datasets/dess.py
+++ b/src/openqdc/datasets/dess.py
@@ -34,15 +34,18 @@ def read_mol(mol_path, smiles, subset, targets):
class DESS(BaseDataset):
__name__ = "dess"
+ __energy_unit__ = "hartree"
+ __distance_unit__ = "ang"
+ __forces_unit__ = "hartree/ang"
__energy_methods__ = [
- "mp2_cc",
- "mp2_qz",
- "mp2_tz",
- "mp2_cbs",
- "ccsd(t)_cc",
- "ccsd(t)_cbs",
- "ccsd(t)_nn",
- "sapt",
+ "mp2/cc-pvdz",
+ "mp2/cc-pvqz",
+ "mp2/cc-pvtz",
+ "mp2/cbs",
+ "ccsd(t)/cc-pvdz",
+ "ccsd(t)/cbs", # cbs
+ "ccsd(t)/nn", # nn
+ "sapt0/aug-cc-pwcvxz",
]
energy_target_names = [
@@ -59,9 +62,6 @@ class DESS(BaseDataset):
partitions = ["DES370K", "DES5M"]
- def __init__(self, energy_unit=None, distance_unit=None) -> None:
- super().__init__(energy_unit=energy_unit, distance_unit=distance_unit)
-
def _read_raw_(self, part):
df = pd.read_csv(p_join(self.root, f"{part}.csv"))
for col in self.energy_target_names:
diff --git a/src/openqdc/datasets/dummy.py b/src/openqdc/datasets/dummy.py
new file mode 100644
index 0000000..4e1ff17
--- /dev/null
+++ b/src/openqdc/datasets/dummy.py
@@ -0,0 +1,47 @@
+import numpy as np # noqa
+from sklearn.utils import Bunch
+
+from openqdc.datasets.base import BaseDataset
+
+
+class Dummy(BaseDataset):
+ """
+ Dummy dataset
+ """
+
+ __name__ = "dummy"
+ __energy_methods__ = ["I_solved_the_schrodinger_equation_by_hand"]
+ __force_methods__ = ["I_made_up_random_forces"]
+ __energy_unit__ = "kcal/mol"
+ __distance_unit__ = "ang"
+ __forces_unit__ = "kcal/mol/ang"
+
+ energy_target_names = ["energy"]
+
+ force_target_names = ["forces"]
+
+ def __init__(self, energy_unit=None, distance_unit=None, cache_dir=None) -> None:
+ try:
+ super().__init__(energy_unit=energy_unit, distance_unit=distance_unit, cache_dir=cache_dir)
+ except: # noqa
+ pass
+
+ def read_raw_entries(self):
+ pass
+
+ def __len__(self):
+ return 999999999
+
+ def __getitem__(self, idx: int):
+ size = np.random.randint(1, 250)
+ z = np.random.randint(1, 100, size)
+ return Bunch(
+ positions=np.random.rand(size, 3) * 10,
+ atomic_numbers=z,
+ charges=np.random.randint(-1, 2, size),
+ e0=np.zeros(size),
+ energies=np.random.rand(1) * 100,
+ name="dummy_{}".format(idx),
+ subset="dummy",
+ forces=np.random.rand(size, 3) * 100,
+ )
diff --git a/src/openqdc/datasets/gdml.py b/src/openqdc/datasets/gdml.py
index 80ab0ba..789f84a 100644
--- a/src/openqdc/datasets/gdml.py
+++ b/src/openqdc/datasets/gdml.py
@@ -32,9 +32,9 @@ class GDML(BaseDataset):
__energy_methods__ = [
"ccsd/cc-pvdz",
"ccsd(t)/cc-pvdz",
- # "pbe+mbd/light", #MD22
+ "pbe/mbd", # MD22
# "pbe+mbd/tight", #MD22
- "pbe+vdw-ts", # MD17
+ "pbe/vdw-ts", # MD17
]
energy_target_names = [
@@ -46,9 +46,9 @@ class GDML(BaseDataset):
__force_methods__ = [
"ccsd/cc-pvdz",
"ccsd(t)/cc-pvdz",
- # "pbe+mbd/light", #MD22
+ "pbe/mbd", # MD22
# "pbe+mbd/tight", #MD22
- "pbe+vdw-ts", # MD17
+ "pbe/vdw-ts", # MD17
]
force_target_names = [
@@ -58,11 +58,8 @@ class GDML(BaseDataset):
]
__energy_unit__ = "kcal/mol"
- __distance_unit__ = "ang"
- __forces_unit__ = "kcal/mol/ang"
-
- def __init__(self, energy_unit=None, distance_unit=None) -> None:
- super().__init__(energy_unit=energy_unit, distance_unit=distance_unit)
+ __distance_unit__ = "bohr"
+ __forces_unit__ = "kcal/mol/bohr"
def read_raw_entries(self):
raw_path = p_join(self.root, "gdml.h5")
diff --git a/src/openqdc/datasets/geom.py b/src/openqdc/datasets/geom.py
index 065606d..c016a9f 100644
--- a/src/openqdc/datasets/geom.py
+++ b/src/openqdc/datasets/geom.py
@@ -87,9 +87,6 @@ class GEOM(BaseDataset):
partitions = ["qm9", "drugs"]
- def __init__(self, energy_unit=None, distance_unit=None) -> None:
- super().__init__(energy_unit=energy_unit, distance_unit=distance_unit)
-
def _read_raw_(self, partition):
raw_path = p_join(self.root, "rdkit_folder")
diff --git a/src/openqdc/datasets/iso_17.py b/src/openqdc/datasets/iso_17.py
index be811c4..735ae67 100644
--- a/src/openqdc/datasets/iso_17.py
+++ b/src/openqdc/datasets/iso_17.py
@@ -24,7 +24,7 @@ class ISO17(BaseDataset):
__name__ = "iso_17"
__energy_methods__ = [
- "pbe+vdw-ts",
+ "pbe/vdw-ts",
]
energy_target_names = [
@@ -32,7 +32,7 @@ class ISO17(BaseDataset):
]
__force_methods__ = [
- "pbe+vdw-ts",
+ "pbe/vdw-ts",
]
force_target_names = [
@@ -40,11 +40,8 @@ class ISO17(BaseDataset):
]
__energy_unit__ = "ev"
- __distance_unit__ = "ang"
- __forces_unit__ = "ev/ang"
-
- def __init__(self, energy_unit=None, distance_unit=None) -> None:
- super().__init__(energy_unit=energy_unit, distance_unit=distance_unit)
+ __distance_unit__ = "bohr" # bohr
+ __forces_unit__ = "ev/bohr"
def read_raw_entries(self):
raw_path = p_join(self.root, "iso_17.h5")
diff --git a/src/openqdc/datasets/molecule3d.py b/src/openqdc/datasets/molecule3d.py
index 9a49445..dc47e53 100644
--- a/src/openqdc/datasets/molecule3d.py
+++ b/src/openqdc/datasets/molecule3d.py
@@ -84,15 +84,12 @@ class Molecule3D(BaseDataset):
__name__ = "molecule3d"
__energy_methods__ = ["b3lyp/6-31g*"]
# UNITS MOST LIKELY WRONG, MUST CHECK THEM MANUALLY
- __energy_unit__ = "hartree"
+ __energy_unit__ = "ev" # CALCULATED
__distance_unit__ = "ang"
- __forces_unit__ = "hartree/ang"
+ __forces_unit__ = "ev/ang"
energy_target_names = ["b3lyp/6-31g*.energy"]
- def __init__(self, energy_unit=None, distance_unit=None) -> None:
- super().__init__(energy_unit=energy_unit, distance_unit=distance_unit)
-
def read_raw_entries(self):
raw = p_join(self.root, "data", "raw")
sdf_paths = glob(p_join(raw, "*.sdf"))
diff --git a/src/openqdc/datasets/nabladft.py b/src/openqdc/datasets/nabladft.py
index 58f7839..e7d9eb8 100644
--- a/src/openqdc/datasets/nabladft.py
+++ b/src/openqdc/datasets/nabladft.py
@@ -4,10 +4,10 @@
import datamol as dm
import numpy as np
-from nablaDFT.dataset import HamiltonianDatabase
from tqdm import tqdm
from openqdc.datasets.base import BaseDataset
+from openqdc.utils.package_utils import requires_package
def to_mol(entry) -> Dict[str, np.ndarray]:
@@ -26,7 +26,10 @@ def to_mol(entry) -> Dict[str, np.ndarray]:
return res
+@requires_package("nablaDFT")
def read_chunk_from_db(raw_path, start_idx, stop_idx, step_size=1000):
+ from nablaDFT.dataset import HamiltonianDatabase
+
print(f"Loading from {start_idx} to {stop_idx}")
db = HamiltonianDatabase(raw_path)
idxs = list(np.arange(start_idx, stop_idx))
@@ -58,13 +61,13 @@ class NablaDFT(BaseDataset):
energy_target_names = ["wb97x-d/def2-svp"]
__energy_unit__ = "hartree"
- __distance_unit__ = "ang"
- __forces_unit__ = "hartree/ang"
-
- def __init__(self, energy_unit=None, distance_unit=None) -> None:
- super().__init__(energy_unit=energy_unit, distance_unit=distance_unit)
+ __distance_unit__ = "bohr"
+ __forces_unit__ = "hartree/bohr"
+ @requires_package("nablaDFT")
def read_raw_entries(self):
+ from nablaDFT.dataset import HamiltonianDatabase
+
raw_path = p_join(self.root, "dataset_full.db")
train = HamiltonianDatabase(raw_path)
n, c = len(train), 20
diff --git a/src/openqdc/datasets/orbnet_denali.py b/src/openqdc/datasets/orbnet_denali.py
index a39933c..614e252 100644
--- a/src/openqdc/datasets/orbnet_denali.py
+++ b/src/openqdc/datasets/orbnet_denali.py
@@ -53,15 +53,11 @@ class OrbnetDenali(BaseDataset):
__name__ = "orbnet_denali"
__energy_methods__ = ["wb97x-d3/def2-tzvp", "gfn1_xtb"]
- # not sure probably Hartree ang -> must manually check
energy_target_names = ["dft_energy", "xtb1_energy"]
__energy_unit__ = "hartree"
__distance_unit__ = "ang"
__forces_unit__ = "hartree/ang"
- def __init__(self, energy_unit=None, distance_unit=None) -> None:
- super().__init__(energy_unit=energy_unit, distance_unit=distance_unit)
-
def read_raw_entries(self):
label_path = p_join(self.root, "denali_labels.csv")
df = pd.read_csv(label_path, usecols=["sample_id", "mol_id", "subset", "dft_energy", "xtb1_energy"])
diff --git a/src/openqdc/datasets/qm7x.py b/src/openqdc/datasets/qm7x.py
index be68794..eb8b015 100644
--- a/src/openqdc/datasets/qm7x.py
+++ b/src/openqdc/datasets/qm7x.py
@@ -35,11 +35,11 @@ def read_mol(mol_h5, mol_name, energy_target_names, force_target_names):
class QM7X(BaseDataset):
__name__ = "qm7x"
- __energy_methods__ = ["pbe0+mbd", "dft3b+mbd"]
+ __energy_methods__ = ["pbe0/mbd", "dft3b"]
energy_target_names = ["ePBE0", "eMBD"]
- __force_methods__ = ["pbe0+mbd", "dft3b+mbd"]
+ __force_methods__ = ["pbe0/mbd", "dft3b"]
force_target_names = ["pbe0FOR", "vdwFOR"]
diff --git a/src/openqdc/datasets/qmugs.py b/src/openqdc/datasets/qmugs.py
index e1ca2c2..c75f8b5 100644
--- a/src/openqdc/datasets/qmugs.py
+++ b/src/openqdc/datasets/qmugs.py
@@ -52,7 +52,7 @@ class QMugs(BaseDataset):
"""
__name__ = "qmugs"
- __energy_methods__ = ["gfn2_xtb", "b3lyp/6-31g*"]
+ __energy_methods__ = ["gfn2_xtb", "wb97x-d/def2-svp"]
__energy_unit__ = "hartree"
__distance_unit__ = "ang"
__forces_unit__ = "hartree/ang"
@@ -62,9 +62,6 @@ class QMugs(BaseDataset):
"DFT:TOTAL_ENERGY",
]
- def __init__(self, energy_unit=None, distance_unit=None) -> None:
- super().__init__(energy_unit=energy_unit, distance_unit=distance_unit)
-
def read_raw_entries(self):
raw_path = p_join(self.root, "structures")
mol_dirs = [p_join(raw_path, d) for d in os.listdir(raw_path)]
diff --git a/src/openqdc/datasets/sn2_rxn.py b/src/openqdc/datasets/sn2_rxn.py
index fcdcf24..3e75e91 100644
--- a/src/openqdc/datasets/sn2_rxn.py
+++ b/src/openqdc/datasets/sn2_rxn.py
@@ -7,27 +7,24 @@ class SN2RXN(BaseDataset):
__name__ = "sn2_rxn"
__energy_methods__ = [
- "dsd-blyp-d3(bj)_tz",
+ "dsd-blyp-d3(bj)/def2-tzvp",
]
__energy_unit__ = "ev"
- __distance_unit__ = "ang"
- __forces_unit__ = "ev/ang"
+ __distance_unit__ = "bohr"
+ __forces_unit__ = "ev/bohr"
energy_target_names = [
"DSD-BLYP-D3(BJ):def2-TZVP Atomization Energy",
]
__force_methods__ = [
- "dsd-blyp-d3(bj)_tz",
+ "dsd-blyp-d3(bj)/def2-tzvp",
]
force_target_names = [
"DSD-BLYP-D3(BJ):def2-TZVP Gradient",
]
- def __init__(self, energy_unit=None, distance_unit=None) -> None:
- super().__init__(energy_unit=energy_unit, distance_unit=distance_unit)
-
def read_raw_entries(self):
raw_path = p_join(self.root, "sn2_rxn.h5")
samples = read_qc_archive_h5(raw_path, "sn2_rxn", self.energy_target_names, self.force_target_names)
diff --git a/src/openqdc/datasets/solvated_peptides.py b/src/openqdc/datasets/solvated_peptides.py
index 9b44b76..9846bdf 100644
--- a/src/openqdc/datasets/solvated_peptides.py
+++ b/src/openqdc/datasets/solvated_peptides.py
@@ -27,9 +27,6 @@ class SolvatedPeptides(BaseDataset):
__distance_unit__ = "bohr"
__forces_unit__ = "hartree/bohr"
- def __init__(self, energy_unit=None, distance_unit=None) -> None:
- super().__init__(energy_unit=energy_unit, distance_unit=distance_unit)
-
def read_raw_entries(self):
raw_path = p_join(self.root, "solvated_peptides.h5")
samples = read_qc_archive_h5(raw_path, "solvated_peptides", self.energy_target_names, self.force_target_names)
diff --git a/src/openqdc/datasets/spice.py b/src/openqdc/datasets/spice.py
index ace4ecc..974d45f 100644
--- a/src/openqdc/datasets/spice.py
+++ b/src/openqdc/datasets/spice.py
@@ -6,7 +6,6 @@
from openqdc.datasets.base import BaseDataset
from openqdc.utils import load_hdf5_file
-from openqdc.utils.constants import MAX_ATOMIC_NUMBER
from openqdc.utils.molecule import get_atomic_number_and_charge
@@ -21,7 +20,9 @@ def read_record(r):
name=np.array([smiles] * n_confs),
subset=np.array([Spice.subset_mapping[subset]] * n_confs),
energies=r[Spice.energy_target_names[0]][:][:, None].astype(np.float32),
- forces=r[Spice.force_target_names[0]][:].reshape(-1, 3, 1) * (-1.0), # forces -ve of energy gradient
+ forces=r[Spice.force_target_names[0]][:].reshape(
+ -1, 3, 1
+ ), # forces -ve of energy gradient but the -1.0 is done in the convert_forces method
atomic_inputs=np.concatenate(
(x[None, ...].repeat(n_confs, axis=0), positions), axis=-1, dtype=np.float32
).reshape(-1, 5),
@@ -49,38 +50,16 @@ class Spice(BaseDataset):
"""
__name__ = "spice"
- __energy_methods__ = ["wb97x/def2-tzvp"]
- __force_methods__ = ["wb97x/def2-tzvp"]
+ __energy_methods__ = ["wb97m-d3bj/def2-tzvppd"]
+ __force_methods__ = ["wb97m-d3bj/def2-tzvppd"]
__energy_unit__ = "hartree"
- __distance_unit__ = "ang"
- __forces_unit__ = "hartree/ang"
+ __distance_unit__ = "bohr"
+ __forces_unit__ = "hartree/bohr"
energy_target_names = ["dft_total_energy"]
force_target_names = ["dft_total_gradient"]
- # Energy in hartree, all zeros by default
- atomic_energies = np.zeros((MAX_ATOMIC_NUMBER,), dtype=np.float32)
- tmp = {
- 35: -2574.2451510945853,
- 6: -37.91424135791358,
- 20: -676.9528465198214,
- 17: -460.3350243496703,
- 9: -99.91298732343974,
- 1: -0.5027370838721259,
- 53: -297.8813829975981,
- 19: -599.8025677513111,
- 3: -7.285254714046546,
- 12: -199.2688420040449,
- 7: -54.62327513368922,
- 11: -162.11366478783253,
- 8: -75.17101657391741,
- 15: -341.3059197024934,
- 16: -398.2405387031612,
- }
- for key in tmp:
- atomic_energies[key] = tmp[key]
-
subset_mapping = {
"SPICE Solvated Amino Acids Single Points Dataset v1.1": "Solvated Amino Acids",
"SPICE Dipeptides Single Points Dataset v1.2": "Dipeptides",
@@ -96,8 +75,8 @@ class Spice(BaseDataset):
"SPICE Ion Pairs Single Points Dataset v1.1": "Ion Pairs",
}
- def __init__(self, energy_unit=None, distance_unit=None) -> None:
- super().__init__(energy_unit=energy_unit, distance_unit=distance_unit)
+ def convert_forces(self, x):
+ return (-1.0) * super().convert_forces(x)
def read_raw_entries(self):
raw_path = p_join(self.root, "SPICE-1.1.4.hdf5")
diff --git a/src/openqdc/datasets/tmqm.py b/src/openqdc/datasets/tmqm.py
index 4fae561..8952aaa 100644
--- a/src/openqdc/datasets/tmqm.py
+++ b/src/openqdc/datasets/tmqm.py
@@ -47,12 +47,13 @@ def read_xyz(fname, e_map):
class TMQM(BaseDataset):
__name__ = "tmqm"
- __energy_methods__ = ["tpssh/def2tzvp"]
+ __energy_methods__ = ["tpssh/def2-tzvp"]
energy_target_names = ["TPSSh/def2TZVP level"]
- def __init__(self) -> None:
- super().__init__()
+ __energy_unit__ = "hartree"
+ __distance_unit__ = "ang"
+ __forces_unit__ = "hartree/ang"
def read_raw_entries(self):
df = pd.read_csv(p_join(self.root, "tmQM_y.csv"), sep=";", usecols=["CSD_code", "Electronic_E"])
diff --git a/src/openqdc/datasets/transition1x.py b/src/openqdc/datasets/transition1x.py
index 6a6f844..56ae7e6 100644
--- a/src/openqdc/datasets/transition1x.py
+++ b/src/openqdc/datasets/transition1x.py
@@ -55,9 +55,6 @@ class Transition1X(BaseDataset):
"wB97x_6-31G(d).forces",
]
- def __init__(self) -> None:
- super().__init__()
-
def read_raw_entries(self):
raw_path = p_join(self.root, "Transition1x.h5")
f = load_hdf5_file(raw_path)["data"]
diff --git a/src/openqdc/datasets/waterclusters3_30.py b/src/openqdc/datasets/waterclusters3_30.py
index 1de2e14..6aa5748 100644
--- a/src/openqdc/datasets/waterclusters3_30.py
+++ b/src/openqdc/datasets/waterclusters3_30.py
@@ -5,6 +5,7 @@
from tqdm import tqdm
from openqdc.datasets.base import BaseDataset
+from openqdc.utils.constants import MAX_ATOMIC_NUMBER
from openqdc.utils.molecule import atom_table
# we could use ase.io.read to read extxyz files
@@ -50,14 +51,15 @@ def read_xyz(fname, n_waters):
class WaterClusters(BaseDataset):
__name__ = "waterclusters3_30"
- # need to know where to find the data
- __energy_methods__ = ["ttm2.1-f"]
+ # Energy in hartree, all zeros by default
+ atomic_energies = np.zeros((MAX_ATOMIC_NUMBER,), dtype=np.float32)
+ __energy_unit__ = "kcal/mol"
+ __distance_unit__ = "ang"
+ __forces_unit__ = "kcal/mol/ang"
+ __energy_methods__ = ["ttm2.1-f"]
energy_target_names = ["TTM2.1-F Potential"]
- def __init__(self, energy_unit=None, distance_unit=None) -> None:
- super().__init__(energy_unit=energy_unit, distance_unit=distance_unit)
-
def read_raw_entries(self):
samples = []
for i in range(3, 31):
diff --git a/src/openqdc/utils/__init__.py b/src/openqdc/utils/__init__.py
index 92eec25..aeb5321 100644
--- a/src/openqdc/utils/__init__.py
+++ b/src/openqdc/utils/__init__.py
@@ -1,13 +1,17 @@
from .io import (
check_file,
create_hdf5_file,
+ get_local_cache,
+ get_remote_cache,
load_hdf5_file,
load_json,
load_pkl,
load_torch,
makedirs,
save_pkl,
+ set_cache_dir,
)
+from .units import get_conversion
__all__ = [
"load_pkl",
@@ -18,4 +22,8 @@
"load_torch",
"create_hdf5_file",
"check_file",
+ "set_cache_dir",
+ "get_local_cache",
+ "get_remote_cache",
+ "get_conversion",
]
diff --git a/src/openqdc/utils/atomization_energies.py b/src/openqdc/utils/atomization_energies.py
new file mode 100644
index 0000000..40d0d13
--- /dev/null
+++ b/src/openqdc/utils/atomization_energies.py
@@ -0,0 +1,1877 @@
+from typing import Dict, Tuple, TypeAlias
+
+import numpy as np
+from loguru import logger
+
+from openqdc.utils.constants import MAX_ATOMIC_NUMBER
+
+__all__ = ["chemical_symbols", "atomic_numbers", "IsolatedAtomEnergyFactory"]
+
+EF_KEY: TypeAlias = Tuple[str, int]
+
+ATOM_SPECIES = "H", "Li", "B", "C", "N", "O", "F", "Na", "Mg", "Si", "P", "S", "Cl", "K", "Ca", "Br", "I"
+# Energy in atomic unit/ Hartree / Ang
+
+# didn t calculate for Pd, Pt, Mo, Ni, Fe, Cu, see DESS
+atomic_numbers = {}
+chemical_symbols = [
+ "X",
+ "H",
+ "He",
+ "Li",
+ "Be",
+ "B",
+ "C",
+ "N",
+ "O",
+ "F",
+ "Ne",
+ "Na",
+ "Mg",
+ "Al",
+ "Si",
+ "P",
+ "S",
+ "Cl",
+ "Ar",
+ "K",
+ "Ca",
+ "Sc",
+ "Ti",
+ "V",
+ "Cr",
+ "Mn",
+ "Fe",
+ "Co",
+ "Ni",
+ "Cu",
+ "Zn",
+ "Ga",
+ "Ge",
+ "As",
+ "Se",
+ "Br",
+ "Kr",
+ "Rb",
+ "Sr",
+ "Y",
+ "Zr",
+ "Nb",
+ "Mo",
+ "Tc",
+ "Ru",
+ "Rh",
+ "Pd",
+ "Ag",
+ "Cd",
+ "In",
+ "Sn",
+ "Sb",
+ "Te",
+ "I",
+ "Xe",
+ "Cs",
+ "Ba",
+ "La",
+ "Ce",
+ "Pr",
+ "Nd",
+ "Pm",
+ "Sm",
+ "Eu",
+ "Gd",
+ "Tb",
+ "Dy",
+ "Ho",
+ "Er",
+ "Tm",
+ "Yb",
+ "Lu",
+ "Hf",
+ "Ta",
+ "W",
+ "Re",
+ "Os",
+ "Ir",
+ "Pt",
+ "Au",
+ "Hg",
+ "Tl",
+ "Pb",
+ "Bi",
+ "Po",
+ "At",
+ "Rn",
+ "Fr",
+ "Ra",
+ "Ac",
+ "Th",
+ "Pa",
+ "U",
+ "Np",
+ "Pu",
+ "Am",
+ "Cm",
+ "Bk",
+ "Cf",
+ "Es",
+ "Fm",
+ "Md",
+ "No",
+ "Lr",
+]
+
+
+for Z, symbol in enumerate(chemical_symbols):
+ atomic_numbers[symbol] = Z
+
+
+class IsolatedAtomEnergyFactory:
+ """
+ Factory method to get the isolated atom energies for a given level of theory.
+ """
+
+ max_charge = 4
+
+ def __init__(self):
+ pass
+
+ def __call__(self, level_of_theory: str):
+ """
+ Wrapper to the get method
+
+ Parameters
+ ----------
+ level_of_theory: str
+ """
+ return self.get(level_of_theory=level_of_theory)
+
+ @staticmethod
+ def get(level_of_theory: str) -> Dict[EF_KEY, float]:
+ """
+ Get the dict isolated atom energies for a given level of theory
+
+ Parameters
+ ----------
+ level_of_theory: str
+ Level of theory in the format "functional/basis" or "functional" if semi empirical
+
+ Returns
+ -------
+ dict[tuple[str, int], float]
+ Dictionary containing the isolated atom energies for each entry written as a tuple (atom, charge):
+
+ {("H", 1): 0.0, ...}
+
+ """
+ level_of_theory = level_of_theory.lower()
+ is_dft = True
+ try:
+ func, basis = level_of_theory.split("/")
+ except ValueError:
+ func = level_of_theory
+ is_dft = not is_dft
+ functional_dict = ISOLATED_ATOM_ENERGIES.get(func, None)
+ if functional_dict is None:
+ logger.warning(f"Isolated atom energies not found for {level_of_theory}")
+ return ZEROS
+ if not is_dft:
+ return functional_dict
+ return functional_dict.get(basis, ZEROS)
+
+ @staticmethod
+ def get_matrix(level_of_theory: str) -> np.ndarray:
+ """
+ Get the matrix of isolated atom energies for a given level of theory
+
+ Parameters
+ ----------
+ level_of_theory: str
+ Level of theory in the format "functional/basis" or "functional" if semi empirical
+
+ Returns
+ -------
+ np.ndarray of shape (MAX_ATOMIC_NUMBER, 2 * max_charge + 1)
+ Matrix containing the isolated atom energies for each atom and charge written in the form:
+
+ | | -2 | -1 | 0 | +1 | +2 | <- charges
+ |---|----|----|---|----|----|
+ | 0 | | | | | |
+ | 1 | | | | | |
+ | 2 | | | | | |
+
+ """
+ shift = IsolatedAtomEnergyFactory.max_charge
+ matrix = np.zeros((MAX_ATOMIC_NUMBER, shift * 2 + 1))
+ tuple_hashmap = IsolatedAtomEnergyFactory.get(level_of_theory)
+ if tuple_hashmap is None:
+ return matrix
+ for key in tuple_hashmap.keys():
+ matrix[atomic_numbers[key[0]], key[1] + shift] = tuple_hashmap[key]
+ return matrix
+
+
+ZEROS = {
+ ("Br", -1): 0.0,
+ ("Br", 0): 0.0,
+ ("C", -1): 0.0,
+ ("C", 0): 0.0,
+ ("C", 1): 0.0,
+ ("Ca", 2): 0.0,
+ ("Cl", -1): 0.0,
+ ("Cl", 0): 0.0,
+ ("F", -1): 0.0,
+ ("F", 0): 0.0,
+ ("H", 0): 0.0,
+ ("I", -1): 0.0,
+ ("I", 0): 0.0,
+ ("K", 1): 0.0,
+ ("Li", 1): 0.0,
+ ("Mg", 2): 0.0,
+ ("N", -1): 0.0,
+ ("N", 0): 0.0,
+ ("N", 1): 0.0,
+ ("Na", 1): 0.0,
+ ("O", -1): 0.0,
+ ("O", 0): 0.0,
+ ("O", 1): 0.0,
+ ("P", 0): 0.0,
+ ("P", 1): 0.0,
+ ("S", -1): 0.0,
+ ("S", 0): 0.0,
+ ("S", 1): 0.0,
+}
+
+wb97m_d3bj_def2_tzvp = {
+ ("Br", -1): -2574.2451510945853,
+ ("Br", 0): -2574.1167240829964,
+ ("C", -1): -37.91424135791358,
+ ("C", 0): -37.87264507233593,
+ ("C", 1): -37.45349214963933,
+ ("Ca", 2): -676.9528465198214,
+ ("Cl", -2): -459.6072967078548,
+ ("Cl", -1): -460.3350243496703,
+ ("Cl", 0): -460.1988762285739,
+ ("Cl", 2): -458.7433813454319,
+ ("F", -1): -99.91298732343974,
+ ("F", 0): -99.78611622985483,
+ ("H", -1): -0.5027370838721212,
+ ("H", 0): -0.4987605100487341,
+ ("H", 1): 0.0,
+ ("I", -1): -297.8813829975981,
+ ("I", 0): -297.76228914445625,
+ ("K", 1): -599.8025677513111,
+ ("Li", 1): -7.285254714046546,
+ ("Mg", 2): -199.2688420040449,
+ ("N", -1): -54.602291095426494,
+ ("N", 0): -54.62327513368922,
+ ("N", 1): -54.08594142587869,
+ ("Na", 1): -162.11366478783253,
+ ("O", -1): -75.17101657391741,
+ ("O", 0): -75.11317840410095,
+ ("O", 1): -74.60241514396725,
+ ("P", 0): -341.3059197024934,
+ ("P", 1): -340.9258392474849,
+ ("S", -1): -398.2405387031612,
+ ("S", 0): -398.1599636677874,
+ ("S", 1): -397.7746615977658,
+}
+GFN1 = {
+ ("H", -1): -0.5678094489236601,
+ ("H", 0): -0.4014294744618301,
+ ("H", 1): 0.2350495,
+ ("Li", 1): 0.13691666666666666,
+ ("B", -3): -1.652343221335327,
+ ("B", -1): -1.3514075648859643,
+ ("B", 0): -1.1998696279038876,
+ ("B", 3): 2.7107996287190113,
+ ("C", -1): -1.9170116002810327,
+ ("C", 0): -1.7411359557542052,
+ ("C", 1): -1.1060742863488982,
+ ("N", -1): -3.128423313087365,
+ ("N", 0): -2.8988862104065958,
+ ("N", 1): -2.1782414865973068,
+ ("O", -1): -4.705386032968986,
+ ("O", 0): -4.352652340864803,
+ ("O", 1): -3.3929027848641797,
+ ("F", -1): -5.322297034311178,
+ ("F", 0): -4.9969448424630265,
+ ("Na", 1): 0.12295400000000001,
+ ("Mg", 2): 1.0016353333333334,
+ ("Si", 4): 5.448927240930351,
+ ("Si", 0): -1.625263132618416,
+ ("Si", -4): -4.503876330547808,
+ ("P", 0): -2.4250620380497385,
+ ("P", 1): -1.7319786163576927,
+ ("S", -1): -3.761566793286506,
+ ("S", 0): -3.535920743315634,
+ ("S", 1): -2.772567335542398,
+ ("Cl", -2): -4.177925186599567,
+ ("Cl", -1): -4.527948236258716,
+ ("Cl", 0): -4.166353944016668,
+ ("Cl", 2): -2.3809951798365505,
+ ("K", 1): 0.08160976666666667,
+ ("Ca", 2): 0.5662308,
+ ("Br", -1): -3.957113536482028,
+ ("Br", 0): -3.818039553459528,
+ ("I", -1): -4.043592677461303,
+ ("I", 0): -3.885757275227844,
+}
+GFN2 = {
+ ("H", -1): -0.6107466928548624,
+ ("H", 0): -0.3934827590437188,
+ ("H", 1): 0.22955216666666667,
+ ("Li", 1): 0.1659637,
+ ("B", -3): 0.4947743711421284,
+ ("B", -1): -0.8833252789733281,
+ ("B", 0): -0.9524366145568732,
+ ("B", 3): 2.886742362272,
+ ("C", -1): -1.9209221941523813,
+ ("C", 0): -1.7951105194038206,
+ ("C", 1): -1.7951105194038206,
+ ("N", -1): -2.8228473813671173,
+ ("N", 0): -2.609452454632062,
+ ("N", 1): -1.9127945803017519,
+ ("O", -1): -4.0689442489122944,
+ ("O", 0): -3.769421095414337,
+ ("O", 1): -2.948538063156781,
+ ("F", -1): -4.909635517185826,
+ ("F", 0): -4.619339955465996,
+ ("Na", 1): 0.19548556666666667,
+ ("Mg", 2): 1.3160877333333334,
+ ("Si", 4): 4.473259319583333,
+ ("Si", 0): -1.5714240856447492,
+ ("Si", -4): -1.0243162958137662,
+ ("P", 0): -2.377807088085606,
+ ("P", 1): -1.8635041144652795,
+ ("S", -1): -3.4046900452338025,
+ ("S", 0): -3.1482710158768508,
+ ("S", 1): -2.5869831371080387,
+ ("Cl", -2): -4.249780801412338,
+ ("Cl", -1): -4.785133953760966,
+ ("Cl", 2): -2.6084223252074965,
+ ("Cl", 0): -4.482525134292114,
+ ("K", 1): 0.19157049999999998,
+ ("Ca", 2): 1.1759288,
+ ("Br", -1): -4.332231166471951,
+ ("Br", 0): -4.048339370569741,
+ ("I", -1): -4.060355599036047,
+ ("I", 0): -3.7796302627467933,
+}
+DFTB = {
+ ("H", -1): -0.267450800,
+ ("H", 0): -0.2386004000,
+ ("H", 1): 0.2097500000,
+ ("Li", 1): 0.000000000,
+ ("B", -3): 0.1087536003,
+ ("B", -1): -0.8108828001,
+ ("B", 0): -0.8263560001,
+ ("B", 3): 1.3330350000,
+ ("C", -1): -1.4104987700,
+ ("C", 0): -1.3984936602,
+ ("C", 1): -1.0217885507,
+ ("N", -1): -2.1474619199,
+ ("N", 0): -2.1021839400,
+ ("N", 1): -1.6260059609,
+ ("O", -1): -3.1706232699,
+ ("O", 0): -3.0861916005,
+ ("O", 1): -2.5063599300,
+ ("F", -1): -4.3647240000,
+ ("F", 0): -4.2352190003,
+ ("Na", 1): 0.0825500000,
+ ("Mg", 2): 0.4492000000,
+ ("Si", 4): 0.2875390800,
+ ("Si", 0): -1.0920777201,
+ ("Si", -4): 1.9808720000,
+ ("P", 0): -1.6295741400,
+ ("P", 1): -1.2821088196,
+ ("S", -1): -2.3857500900,
+ ("S", 0): -2.2921235603,
+ ("S", 1): -1.8696970300,
+ ("Cl", -2): -3.31200000,
+ ("Cl", -1): -3.2238180000,
+ ("Cl", 0): -3.0908230002,
+ ("Cl", 2): -1.7244330000,
+ ("K", 1): 0.0678210000,
+ ("Ca", 2): 0.3528980000,
+ ("Br", -1): -3.0478250000,
+ ("Br", 0): -2.9228540002,
+ ("I", -1): -2.6981275000,
+ ("I", 0): -2.5796080002,
+}
+PM6 = {
+ ("H", -1): 0.20069130482,
+ ("H", 0): 0.08302988483033709,
+ ("H", 1): 0.49634827548,
+ ("Li", 1): 0.23429648020984556,
+ ("B", -3): 1.042845967149475,
+ ("B", -1): 0.2915413006028599,
+ ("B", 0): 0.2162518784591137,
+ ("B", 3): 2.036692812374006,
+ ("C", -1): 0.3702885058222273,
+ ("C", 0): 0.34355728762455995,
+ ("C", 1): 0.5942116527412356,
+ ("N", -1): 0.29851662685316066,
+ ("N", 0): 0.3266578327960236,
+ ("N", 1): 0.8167661499675701,
+ ("O", -1): 0.06245921572439598,
+ ("O", 0): 0.2760200570828466,
+ ("O", 1): 0.6881966155067099,
+ ("F", -1): -0.09819551592088718,
+ ("F", 0): 0.030103153898987902,
+ ("Na", 1): 0.20761332506784766,
+ ("Mg", 2): 0.8654790767941177,
+ ("Si", 4): 2.6874249452995893,
+ ("Si", 0): 0.19559781612694002,
+ ("Si", -4): 0.909424581958187,
+ ("P", 0): 0.1881765839215055,
+ ("P", 1): 0.5283679118546506,
+ ("S", -1): 0.00773920374050412,
+ ("S", 0): 0.15340740929612162,
+ ("S", 1): 0.5198027279290017,
+ ("Cl", -2): 3.87282505908,
+ ("Cl", -1): -0.09598933242391743,
+ ("Cl", 2): 1.6530454862,
+ ("Cl", 0): 0.04614458119325779,
+ ("K", 1): 0.17382321209735638,
+ ("Ca", 2): 0.6490542924483952,
+ ("Br", -1): -0.0878626123290662,
+ ("Br", 0): 0.04068832478896717,
+ ("I", -1): -0.06868953273976947,
+ ("I", 0): 0.038916541436059084,
+}
+
+
+# tpssh/def2-tzvp
+TMQM = {
+ ("H", -1): -0.5066148831768739,
+ ("H", 0): -0.4998936035891093,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.285942861425713,
+ ("B", -3): -24.011884397333016,
+ ("B", -1): -24.671478908940745,
+ ("B", 0): -24.66555991803692,
+ ("B", 3): -22.03729209090186,
+ ("C", -1): -37.902383828698945,
+ ("C", 0): -37.8619600939805,
+ ("C", 1): -37.44108173595555,
+ ("N", -1): -54.58878376740317,
+ ("N", 0): -54.61011499135528,
+ ("N", 1): -54.07150720832228,
+ ("O", -1): -75.12797596615384,
+ ("O", 0): -75.0993524949928,
+ ("O", 1): -74.58770047919643,
+ ("F", -1): -99.86387164958151,
+ ("F", 0): -99.76596802854195,
+ ("Na", 1): -162.0916076478938,
+ ("Mg", 2): -199.24528576913457,
+ ("Si", 4): -285.59703939232946,
+ ("Si", 0): -289.3842044105128,
+ ("Si", -4): -288.1798768489279,
+ ("P", 0): -341.2798907965112,
+ ("P", 1): -340.89320025019333,
+ ("S", -1): -398.19525449701325,
+ ("S", 0): -398.130358877624,
+ ("S", 1): -397.7467993687058,
+ ("Cl", -2): -459.4908872312368,
+ ("Cl", -1): -460.28412127843484,
+ ("Cl", 0): -460.1641720279233,
+ ("Cl", 2): -458.485405333257,
+ ("K", 1): -599.7644436257333,
+ ("Ca", 2): -676.9154959968483,
+ ("Br", -1): -2574.1448096288846,
+ ("Br", 0): -2574.0232838745055,
+ ("I", -1): -297.70580680306847,
+ ("I", 0): -297.5887657326151,
+}
+# "wb97m-d3bj/def2-TZVPPD"
+SPICE = {
+ ("H", -1): -0.5027370838426788,
+ ("H", 0): -0.4987605100487541,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.285254714046117,
+ ("B", -3): -24.191211616488623,
+ ("B", -1): -24.677421752607636,
+ ("B", 0): -24.671520535412856,
+ ("B", 3): -22.051237471894204,
+ ("C", -1): -37.914241357934024,
+ ("C", 0): -37.872645072317844,
+ ("C", 1): -37.45349214963851,
+ ("N", -1): -54.602291095940885,
+ ("N", 0): -54.62327513391132,
+ ("N", 1): -54.08594142612827,
+ ("O", -1): -75.17101657361833,
+ ("O", 0): -75.11317840403545,
+ ("O", 1): -74.6024151438455,
+ ("F", -1): -99.9129873233742,
+ ("F", 0): -99.78611622966918,
+ ("Na", 1): -162.11366478753402,
+ ("Mg", 2): -199.26884200420963,
+ ("Si", 4): -285.6283113353237,
+ ("Si", 0): -289.413135230185,
+ ("Si", -4): -288.27589059244787,
+ ("P", 0): -341.3059197004091,
+ ("P", 1): -340.92583924542475,
+ ("S", -1): -398.24053870171247,
+ ("S", 0): -398.15996366615616,
+ ("S", 1): -397.7746615960709,
+ ("Cl", -2): -460.08763805127313,
+ ("Cl", -1): -460.33502435018204,
+ ("Cl", 0): -460.1988762286936,
+ ("Cl", 2): -458.7438528011782,
+ ("K", 1): -599.8025677532396,
+ ("Ca", 2): -676.9528465165403,
+ ("Br", -1): -2574.2451510820465,
+ ("Br", 0): -2574.1167240800246,
+ ("I", -1): -297.88138299501395,
+ ("I", 0): -297.7622891423178,
+}
+# "revpbe-d3(bj)/def2-tzvp"
+SolvatedPeptides = {
+ ("H", -1): -0.4931715827683033,
+ ("H", 0): -0.5041476427597161,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.280731201437635,
+ ("B", -3): -24.006372610643076,
+ ("B", -1): -24.660992037766704,
+ ("B", 0): -24.652853868669744,
+ ("B", 3): -22.023688582481086,
+ ("C", -1): -37.88698396215454,
+ ("C", 0): -37.845600548516586,
+ ("C", 1): -37.42375720909004,
+ ("N", -1): -54.56844448819074,
+ ("N", 0): -54.58772405988695,
+ ("N", 1): -54.04957647943518,
+ ("O", -1): -75.10545816278959,
+ ("O", 0): -75.07120398742593,
+ ("O", 1): -74.55841255571633,
+ ("F", -1): -99.83653702337733,
+ ("F", 0): -99.7348800787186,
+ ("Na", 1): -162.04202541023028,
+ ("Mg", 2): -199.1857779742493,
+ ("Si", 4): -285.5196533711662,
+ ("Si", 0): -289.31537776907356,
+ ("Si", -4): -288.11458640061954,
+ ("P", 0): -341.20094262951534,
+ ("P", 1): -340.81665455610573,
+ ("S", -1): -398.10497764958086,
+ ("S", 0): -398.04159371790865,
+ ("S", 1): -397.6599146755941,
+ ("Cl", -2): -459.3527862471638,
+ ("Cl", -1): -460.1836953722962,
+ ("Cl", 0): -460.0661711540315,
+ ("Cl", 2): -458.51775405333257,
+ ("K", 1): -599.6472569880391,
+ ("Ca", 2): -676.7916386065199,
+ ("Br", -1): -2574.0081469191155,
+ ("Br", 0): -2573.890240418883,
+ ("I", -1): -297.8357436124949,
+ ("I", 0): -297.72268439613055,
+}
+# "DSD-BLYP-D3BJ/def2-TZVPPD"
+SN2RXN = {
+ ("H", -1): -0.4931715827683033,
+ ("H", 0): -0.4990585651127987,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.2751828330696995,
+ ("B", -3): -24.127790514752746,
+ ("B", -1): -24.62825292497449,
+ ("B", 0): -24.628518170377323,
+ ("B", 3): -22.01440439226537,
+ ("C", -1): -37.85187643574064,
+ ("C", 0): -37.81800653654633,
+ ("C", 1): -37.4026616247957,
+ ("N", -1): -54.529773519860626,
+ ("N", 0): -54.55929475542038,
+ ("N", 1): -54.02654716655024,
+ ("O", -1): -75.08730105751656,
+ ("O", 0): -75.03632370546934,
+ ("O", 1): -74.53620016366052,
+ ("F", -1): -99.82374475663487,
+ ("F", 0): -99.6990797359127,
+ ("Na", 1): -161.96633141740327,
+ ("Mg", 2): -199.1186151803418,
+ ("Si", 4): -285.4592439444118,
+ ("Si", 0): -289.2354767511652,
+ ("Si", -4): -288.12487758144147,
+ ("P", 0): -341.1278868392075,
+ ("P", 1): -340.7469511203367,
+ ("S", -1): -398.0441756257772,
+ ("S", 0): -397.9705195592595,
+ ("S", 1): -397.5944122508692,
+ ("Cl", -2): -459.3527862471638,
+ ("Cl", -1): -460.13181548141955,
+ ("Cl", 0): -460.0006937311494,
+ ("Cl", 2): -458.51775405333257,
+ ("K", 1): -599.4901238823808,
+ ("Ca", 2): -676.6456698988475,
+ ("Br", -1): -2573.604327011817,
+ ("Br", 0): -2573.477602568216,
+ ("I", -1): -297.5733470600828,
+ ("I", 0): -297.4541938789708,
+}
+# "b3lyp/6-31g*"
+QMUGS_DFT = {
+ ("H", -1): -0.4618190740256503,
+ ("H", 0): -0.5002733301377901,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.284546111273075,
+ ("B", -3): -23.577268753399462,
+ ("B", -1): -24.614577395156598,
+ ("B", 0): -24.65435524492553,
+ ("B", 3): -22.018169862974275,
+ ("C", -1): -37.844269871879376,
+ ("C", 0): -37.84628033285479,
+ ("C", 1): -37.42731164237431,
+ ("N", -1): -54.52864356359092,
+ ("N", 0): -54.584488815424095,
+ ("N", 1): -54.0458621835885,
+ ("O", -1): -75.05272792994404,
+ ("O", 0): -75.06062109946738,
+ ("O", 1): -74.54659271939704,
+ ("F", -1): -99.75408410035712,
+ ("F", 0): -99.71553471526475,
+ ("Na", 1): -162.081235395777,
+ ("Mg", 2): -199.22734695613283,
+ ("Si", 4): -285.5564410277949,
+ ("Si", 0): -289.3717359984153,
+ ("Si", -4): -288.02795351148654,
+ ("P", 0): -341.2580911838578,
+ ("P", 1): -340.8765976669208,
+ ("S", -1): -398.16568433994024,
+ ("S", 0): -398.1049932797066,
+ ("S", 1): -397.7199808615457,
+ ("Cl", -2): -459.5066184980746,
+ ("Cl", -1): -460.25223446009306,
+ ("Cl", 0): -460.13624346967765,
+ ("Cl", 2): -458.6740467177361,
+ ("K", 1): -599.7247062673807,
+ ("Ca", 2): -676.8667395990246,
+ ("Br", -1): -2573.824201570383,
+ ("Br", 0): -2573.705283744811,
+ ("I", -1): None, # not available with this basis set
+ ("I", 0): None, # not available with this basis set
+}
+# "wb97x-d3/def2-tzvp"
+ORBNET = {
+ ("H", -1): -0.5051390575292232,
+ ("H", 0): -0.5025865385814652,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.289728176048259,
+ ("B", -3): -23.984063702375366,
+ ("B", -1): -24.655892805089884,
+ ("B", 0): -24.652426319775287,
+ ("B", 3): -22.068923453406843,
+ ("C", -1): -37.88249635015094,
+ ("C", 0): -37.84495506623085,
+ ("C", 1): -37.42572594563294,
+ ("N", -1): -54.566013571722955,
+ ("N", 0): -54.58956332659741,
+ ("N", 1): -54.053510120855016,
+ ("O", -1): -75.10770262264376,
+ ("O", 0): -75.07371685344017,
+ ("O", 1): -74.56770852466894,
+ ("F", -1): -99.84730255807874,
+ ("F", 0): -99.74441357744517,
+ ("Na", 1): -162.08090997566165,
+ ("Mg", 2): -199.2423311291131,
+ ("Si", 4): -285.61307018231093,
+ ("Si", 0): -289.36007009205474,
+ ("Si", -4): -288.13938913442,
+ ("P", 0): -341.2535866489386,
+ ("P", 1): -340.8713081439191,
+ ("S", -1): -398.17523835330115,
+ ("S", 0): -398.1081144325829,
+ ("S", 1): -397.7235371215097,
+ ("Cl", -2): -459.55571935610567,
+ ("Cl", -1): -460.26962615981756,
+ ("Cl", 0): -460.1472726772528,
+ ("Cl", 2): -458.68793188715097,
+ ("K", 1): -599.7560426196044,
+ ("Ca", 2): -676.9122500284535,
+ ("Br", -1): -2574.293316484485,
+ ("Br", 0): -2574.1721188129304,
+ ("I", -1): -297.8647496186801,
+ ("I", 0): -297.7482461760336,
+}
+# "wb97x-d/def2-svp"
+NABLADFT = {
+ ("H", -1): -0.487196574630614,
+ ("H", 0): -0.5024927493280441,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.289461512680954,
+ ("B", -3): -23.76326340520956,
+ ("B", -1): -24.616565541453497,
+ ("B", 0): -24.62229041950939,
+ ("B", 3): -22.05799995059738,
+ ("C", -1): -37.819977678758974,
+ ("C", 0): -37.79809943233551,
+ ("C", 1): -37.37569908192604,
+ ("N", -1): -54.459277717462086,
+ ("N", 0): -54.522416758144296,
+ ("N", 1): -53.98339066860825,
+ ("O", -1): -74.96664546628877,
+ ("O", 0): -74.97667950172594,
+ ("O", 1): -74.47138898492452,
+ ("F", -1): -99.66683980036512,
+ ("F", 0): -99.61447206028255,
+ ("Na", 1): -162.0226698276339,
+ ("Mg", 2): -199.1739400418112,
+ ("Si", 4): -285.52441678317916,
+ ("Si", 0): -289.2630396380861,
+ ("Si", -4): -287.76522279776617,
+ ("P", 0): -341.13939934765074,
+ ("P", 1): -340.75715448577955,
+ ("S", -1): -398.0129589348639,
+ ("S", 0): -397.9719510287289,
+ ("S", 1): -397.58695970543334,
+ ("Cl", -2): -459.17907026002734,
+ ("Cl", -1): -460.0809386171713,
+ ("Cl", 0): -459.9885726673416,
+ ("Cl", 2): -458.52265869014025,
+ ("K", 1): -599.6772169304438,
+ ("Ca", 2): -676.8244048230532,
+ ("Br", -1): -2573.9600885084546,
+ ("Br", 0): -2573.856581446253,
+ ("I", -1): -297.8445820598362,
+ ("I", 0): -297.7376955031015,
+}
+# "wb97x/6-31g(d)"
+ANI1 = {
+ ("H", -1): -0.45658037701866955,
+ ("H", 0): -0.4993457316092281,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.2856300653219614,
+ ("B", -3): -23.575157416550805,
+ ("B", -1): -24.603134775026213,
+ ("B", 0): -24.642610267398982,
+ ("B", 3): -22.07124234970699,
+ ("C", -1): -37.834042127064706,
+ ("C", 0): -37.83384116353608,
+ ("C", 1): -37.41881056856161,
+ ("N", -1): -54.513028620185864,
+ ("N", 0): -54.573313922039716,
+ ("N", 1): -54.036340248157515,
+ ("O", -1): -75.03386211245754,
+ ("O", 0): -75.04249624495868,
+ ("O", 1): -74.53884510892807,
+ ("F", -1): -99.7350451879463,
+ ("F", 0): -99.69494212517318,
+ ("Na", 1): -162.0682250235374,
+ ("Mg", 2): -199.22919949102433,
+ ("Si", 4): -285.5967323489095,
+ ("Si", 0): -289.3398443488577,
+ ("Si", -4): -288.0053873657048,
+ ("P", 0): -341.2319240654614,
+ ("P", 1): -340.85012602930203,
+ ("S", -1): -398.14261145000256,
+ ("S", 0): -398.0814606242194,
+ ("S", 1): -397.6998359561112,
+ ("Cl", -2): -459.479319530353,
+ ("Cl", -1): -460.2341096421279,
+ ("Cl", 0): -460.1166957612669,
+ ("Cl", 2): -458.6588365149308,
+ ("K", 1): -599.7184666927276,
+ ("Ca", 2): -676.8704088358037,
+ ("Br", -1): -2573.8502718776604,
+ ("Br", 0): -2573.733913792756,
+ ("I", -1): None, # not available with this basis set
+ ("I", 0): None, # not available with this basis set
+}
+# "WB97X/6-31g*"
+COMP6_1 = {
+ ("H", -1): -0.4565803770186695,
+ ("H", 0): -0.4993457316092281,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.285630065321961,
+ ("B", -3): -23.5751574165508,
+ ("B", -1): -24.603134775026216,
+ ("B", 0): -24.64261026739898,
+ ("B", 3): -22.071242349706992,
+ ("C", -1): -37.834042127064706,
+ ("C", 0): -37.83384116353608,
+ ("C", 1): -37.4188105685616,
+ ("N", -1): -54.5130286201859,
+ ("N", 0): -54.57331392203972,
+ ("N", 1): -54.03634024815754,
+ ("O", -1): -75.03386211245756,
+ ("O", 0): -75.0424962449587,
+ ("O", 1): -74.5388451089281,
+ ("F", -1): -99.7350451879463,
+ ("F", 0): -99.69494212517317,
+ ("Na", 1): -162.06822502353745,
+ ("Mg", 2): -199.2291994910244,
+ ("Si", 4): -285.5967323489095,
+ ("Si", 0): -289.3398443488578,
+ ("Si", -4): -288.00538736570485,
+ ("P", 0): -341.2319240654613,
+ ("P", 1): -340.85012602930215,
+ ("S", -1): -398.14261145000256,
+ ("S", 0): -398.0814606242193,
+ ("S", 1): -397.6998359561114,
+ ("Cl", -2): -459.47931953035305,
+ ("Cl", -1): -460.23410964212803,
+ ("Cl", 0): -460.1166957612671,
+ ("Cl", 2): -458.65883651493084,
+ ("K", 1): -599.7184666927277,
+ ("Ca", 2): -676.8704088358036,
+ ("Br", -1): -2573.8502718776604,
+ ("Br", 0): -2573.7339137927547,
+ ("I", -1): None, # not available with this basis set
+ ("I", 0): None, # not available with this basis set
+}
+# "ccsd/aug-cc-pVDZ"
+ccsdaug = {
+ ("H", -1): -0.5240286252725133,
+ ("H", 0): -0.49933431543958506,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.23623079003172,
+ ("B", -3): -24.135298809957895,
+ ("B", -1): -24.595731151135812,
+ ("B", 0): -24.591070884515084,
+ ("B", 3): -21.985913735106703,
+ ("C", -1): -37.80520563794191,
+ ("C", 0): -37.76484921430014,
+ ("C", 1): -37.35862660518426,
+ ("N", -1): -54.46561904421205,
+ ("N", 0): -54.48723914213882,
+ ("N", 1): -53.959899854043286,
+ ("O", -1): -74.96558003564495,
+ ("O", 0): -74.9255348291028,
+ ("O", 1): -74.4432579985748,
+ ("F", -1): -99.66462266282274,
+ ("F", 0): -99.54960172383534,
+ ("Na", 1): -161.67194573263333,
+ ("Mg", 2): -198.8268633109654,
+ ("Si", 4): -285.1795420310209,
+ ("Si", 0): -288.9225171059681,
+ ("Si", -4): -288.13012523255236,
+ ("P", 0): -340.80119511758613,
+ ("P", 1): -340.42190068851625,
+ ("S", -1): -397.67826887815926,
+ ("S", 0): -397.6146112492681,
+ ("S", 1): -397.2542253763525,
+ ("Cl", -2): -459.42201473799554,
+ ("Cl", -1): -459.7398865093852,
+ ("Cl", 0): -459.6156482951034,
+ ("Cl", 2): -458.1975299396907,
+ ("K", 1): None, # not available with this basis set
+ ("Ca", 2): None, # not available with this basis set
+ ("Br", -1): -2572.6265539931533,
+ ("Br", 0): -2572.5063313966352,
+ ("I", -1): None, # not available with this basis set
+ ("I", 0): None, # not available with this basis set
+}
+# "ccsd(t)/aug-cc-pVDZ"
+ccsdtaug = {
+ ("H", -1): -0.489676276755859,
+ ("H", 0): -0.4993343154395853,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.236230790031718,
+ ("B", -3): -24.14659676027675,
+ ("B", -1): -24.59834841644963,
+ ("B", 0): -24.592013924578307,
+ ("B", 3): -21.98591373510674,
+ ("C", -1): -37.80822234639533,
+ ("C", 0): -37.7661399495972,
+ ("C", 1): -37.3593489962868,
+ ("N", -1): -54.46970203317129,
+ ("N", 0): -54.488530163663306,
+ ("N", 1): -53.96079905255966,
+ ("O", -1): -74.97107484978555,
+ ("O", 0): -74.92736838177342,
+ ("O", 1): -74.44405741349318,
+ ("F", -1): -99.67058259815346,
+ ("F", 0): -99.55194323117622,
+ ("Na", 1): -161.67196199847683,
+ ("Mg", 2): -198.8269101640321,
+ ("Si", 4): -285.1796031904412,
+ ("Si", 0): -288.9239884021825,
+ ("Si", -4): -288.14250182593497,
+ ("P", 0): -340.80293105856066,
+ ("P", 1): -340.4231288782063,
+ ("S", -1): -397.68239119590464,
+ ("S", 0): -397.61679149962197,
+ ("S", 1): -397.2555638941634,
+ ("Cl", -1): -459.74421517568555,
+ ("Cl", 0): -459.6181191157645,
+ ("K", 1): None, # not available with this basis set
+ ("Ca", 2): None, # not available with this basis set
+ ("Br", -1): -2572.630606833861,
+ ("Br", 0): -2572.508930744571,
+ ("I", -1): None, # not available with this basis set
+ ("I", 0): None, # not available with this basis set
+}
+# "mp2/aug-cc-pVDZ"
+mp2aug = {
+ ("H", -1): -0.5118536127440081,
+ ("H", 0): -0.4993343154395852,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.2362434239942885,
+ ("B", -3): -24.11454063530035,
+ ("B", -1): -24.57403291869507,
+ ("B", 0): -24.568723938484855,
+ ("B", 3): -21.98592739023366,
+ ("C", -1): -37.78658968444089,
+ ("C", 0): -37.74289655875525,
+ ("C", 1): -37.33330128905729,
+ ("N", -1): -54.44347106000461,
+ ("N", 0): -54.46985977846849,
+ ("N", 1): -53.93770877612693,
+ ("O", -1): -74.95558042845218,
+ ("O", 0): -74.90882930239204,
+ ("O", 1): -74.42742702171483,
+ ("F", -1): -99.66810645703836,
+ ("F", 0): -99.5377379527871,
+ ("Na", 1): -161.67200581779124,
+ ("Mg", 2): -198.8269131203642,
+ ("Si", 4): -285.17950758651557,
+ ("Si", 0): -288.90336148257995,
+ ("Si", -4): -288.12382709478203,
+ ("P", 0): -340.78346939708916,
+ ("P", 1): -340.4015180393644,
+ ("S", -1): -397.6614469463811,
+ ("S", 0): -397.5953187556735,
+ ("S", 1): -397.236034450623,
+ ("Cl", -2): -459.4111711211486,
+ ("Cl", -1): -459.7293671162834,
+ ("Cl", 0): -459.5986332871817,
+ ("Cl", 2): -458.16109262813154,
+ ("K", 1): None, # not available with this basis set
+ ("Ca", 2): None, # not available with this basis set
+ ("Br", -1): -2571.9455214335435,
+ ("Br", 0): -2571.8203622687925,
+ ("I", -1): None, # not available with this basis set
+ ("I", 0): None, # not available with this basis set
+}
+# "mp2/def2-TZVP"
+mp2def2TZVP = {
+ ("H", -1): -0.48253121006249655,
+ ("H", 0): -0.4998098322318883,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.26625465274989,
+ ("B", -3): -23.89130329586724,
+ ("B", -1): -24.58967154224317,
+ ("B", 0): -24.59074548143485,
+ ("B", 3): -21.99943494200725,
+ ("C", -1): -37.81110910609783,
+ ("C", 0): -37.77471406753249,
+ ("C", 1): -37.36120515772786,
+ ("N", -1): -54.474221753525356,
+ ("N", 0): -54.51486367243164,
+ ("N", 1): -53.97922862858532,
+ ("O", -1): -75.00152176187984,
+ ("O", 0): -74.97513105465687,
+ ("O", 1): -74.48759502971161,
+ ("F", -1): -99.73457909250294,
+ ("F", 0): -99.62808382176112,
+ ("Na", 1): -161.83073450947992,
+ ("Mg", 2): -198.9798405609494,
+ ("Si", 4): -285.26774080524564,
+ ("Si", 0): -289.0086162111446,
+ ("Si", -4): -287.737519515362,
+ ("P", 0): -340.89251993087385,
+ ("P", 1): -340.5074615537276,
+ ("S", -1): -397.7717421040001,
+ ("S", 0): -397.71573728264894,
+ ("S", 1): -397.34975334831165,
+ ("Cl", -2): -459.09862455580026,
+ ("Cl", -1): -459.84969455647206,
+ ("Cl", 0): -459.7312731162239,
+ ("Cl", 2): -458.28486559837125,
+ ("K", 1): -599.1623610013563,
+ ("Ca", 2): -676.3191334447123,
+ ("Br", -1): -2572.8329868011315,
+ ("Br", 0): -2572.7140648042205,
+ ("I", -1): -297.32915651116025,
+ ("I", 0): -297.2135511448063,
+}
+# SVWN/def2-TZVP
+COMP6_7 = {
+ ("H", -1): -0.5173468733170209,
+ ("H", 0): -0.4961415246858913,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.182160595407815,
+ ("B", -3): -23.858154175760482,
+ ("B", -1): -24.477102446655582,
+ ("B", 0): -24.446672986035107,
+ ("B", 3): -21.78388674779827,
+ ("C", -1): -37.648803413486476,
+ ("C", 0): -37.57960202253736,
+ ("C", 1): -37.13377025356311,
+ ("N", -1): -54.268858501552714,
+ ("N", 0): -54.264236284313675,
+ ("N", 1): -53.69660297293359,
+ ("O", -1): -74.75021611814427,
+ ("O", 0): -74.68022879998783,
+ ("O", 1): -74.14595350398997,
+ ("F", -1): -99.4308126971536,
+ ("F", 0): -99.2855801211432,
+ ("Na", 1): -161.43940087938617,
+ ("Mg", 2): -198.482989208704,
+ ("Si", 4): -284.6095063412437,
+ ("Si", 0): None,
+ ("Si", -4): -287.36361152706985,
+ ("P", 0): -340.28781390909336,
+ ("P", 1): None,
+ ("S", -1): -396.74391290562517,
+ ("S", 0): -397.0472344910708,
+ ("S", 1): -396.6400428334645,
+ ("Cl", -2): None,
+ ("Cl", -1): -459.1427217366059,
+ ("Cl", 0): -457.029433121817,
+ ("Cl", 2): -457.5432679710133,
+ ("K", 1): -598.3826110301004,
+ ("Ca", 2): -675.4148005786843,
+ ("Br", -1): -2571.43279407191,
+ ("Br", 0): None,
+ ("I", -1): -297.89817894897124,
+ ("I", 0): None,
+}
+# "PBE-D3BJ2B/def2-TZVP"
+COMP6_5 = {
+ ("H", -1): -0.4984251407077053,
+ ("H", 0): -0.49963874688778964,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.256644236856915,
+ ("B", -3): -23.965651173919607,
+ ("B", -1): -24.61987718656591,
+ ("B", 0): -24.610084509857693,
+ ("B", 3): -21.981186468975643,
+ ("C", -1): -37.839839802893856,
+ ("C", 0): -37.79597394493031,
+ ("C", 1): -37.37216480722536,
+ ("N", -1): -54.51524854184836,
+ ("N", 0): -54.53214830302369,
+ ("N", 1): -53.99133373760564,
+ ("O", -1): -75.04792601078884,
+ ("O", 0): -75.00968214869428,
+ ("O", 1): -74.49434051926339,
+ ("F", -1): -99.77558183886408,
+ ("F", 0): -99.6691400940838,
+ ("Na", 1): -161.96413737180777,
+ ("Mg", 2): -199.10001096170987,
+ ("Si", 4): -285.4180171255296,
+ ("Si", 0): -289.2228701070572,
+ ("Si", -4): -288.0227167833236,
+ ("P", 0): -341.1030537066697,
+ ("P", 1): -340.7177213193741,
+ ("S", -1): -398.00391422389356,
+ ("S", 0): -397.93836821335026,
+ ("S", 1): -397.5554184472038,
+ ("Cl", -2): -459.386408262179,
+ ("Cl", -1): -460.0784728779802,
+ ("Cl", 0): -459.9584144179813,
+ ("Cl", 2): -458.5661867317756,
+ ("K", 1): -599.5277926006078,
+ ("Ca", 2): -676.665524794864,
+ ("Br", -1): -2573.8415230490864,
+ ("Br", 0): -2573.720729522128,
+ ("I", -1): -297.7815346863239,
+ ("I", 0): -297.66553802500096,
+}
+# "B3LYP-D3MBJ2B/def2-TZVP"
+COMP6_2 = {
+ ("H", -1): -0.5104276111528594,
+ ("H", 0): -0.5021763508982502,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.28605166725753,
+ ("B", -3): -24.00227248681287,
+ ("B", -1): -24.670150534162623,
+ ("B", 0): -24.66392221445664,
+ ("B", 3): -22.020454695632036,
+ ("C", -1): -37.89817823158867,
+ ("C", 0): -37.85948152785869,
+ ("C", 1): -37.43552078960403,
+ ("N", -1): -54.58873727556918,
+ ("N", 0): -54.60398141018468,
+ ("N", 1): -54.065523148633176,
+ ("O", -1): -75.13521710860505,
+ ("O", 0): -75.09628346877744,
+ ("O", 1): -74.57769937644677,
+ ("F", -1): -99.87634645410799,
+ ("F", 0): -99.77016379237457,
+ ("Na", 1): -162.09255440877646,
+ ("Mg", 2): -199.2394349246892,
+ ("Si", 4): -285.575845762374,
+ ("Si", 0): -289.3920722437195,
+ ("Si", -4): -288.17382798168956,
+ ("P", 0): -341.28064911053326,
+ ("P", 1): -340.89904032318145,
+ ("S", -1): -398.200223492228,
+ ("S", 0): -398.1324076067549,
+ ("S", 1): -397.7448455107872,
+ ("Cl", -2): -459.58678053070076,
+ ("Cl", -1): -460.2889124003806,
+ ("Cl", 0): -460.16699382696663,
+ ("Cl", 2): -458.70493083496865,
+ ("K", 1): -599.7602668684151,
+ ("Ca", 2): -676.9064118669689,
+ ("Br", -1): -2574.264312179195,
+ ("Br", 0): -2574.140975849301,
+ ("I", -1): -297.89704873064437,
+ ("I", 0): -297.7784640477503,
+}
+# "b3lyp/def2-TZVP"
+COMP6_3 = {
+ ("H", -1): -0.5104276111528594,
+ ("H", 0): -0.5021763508982502,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.2860516672575315,
+ ("B", -3): -24.002272486812885,
+ ("B", -1): -24.67015053416263,
+ ("B", 0): -24.663922214456655,
+ ("B", 3): -22.020454695632043,
+ ("C", -1): -37.89817823158866,
+ ("C", 0): -37.85948152785869,
+ ("C", 1): -37.435520789604034,
+ ("N", -1): -54.588737275569194,
+ ("N", 0): -54.603981410184666,
+ ("N", 1): -54.065523148633176,
+ ("O", -1): -75.13521710860508,
+ ("O", 0): -75.09628346877746,
+ ("O", 1): -74.57769937644687,
+ ("F", -1): -99.8763464541079,
+ ("F", 0): -99.7701637923746,
+ ("Na", 1): -162.0925544087764,
+ ("Mg", 2): -199.23943492468925,
+ ("Si", 4): -285.5758457623741,
+ ("Si", 0): -289.3920722437192,
+ ("Si", -4): -288.1738279816895,
+ ("P", 0): -341.28064911053326,
+ ("P", 1): -340.8990403231815,
+ ("S", -1): -398.2002234922283,
+ ("S", 0): -398.1324076067552,
+ ("S", 1): -397.744845510787,
+ ("Cl", -2): -459.58678053070065,
+ ("Cl", -1): -460.28891240038075,
+ ("Cl", 0): -460.1669938269668,
+ ("Cl", 2): -458.70493083496893,
+ ("K", 1): -599.7602668684153,
+ ("Ca", 2): -676.9064118669687,
+ ("Br", -1): -2574.264312179194,
+ ("Br", 0): -2574.140975849301,
+ ("I", -1): -297.8970487306444,
+ ("I", 0): -297.7784640477502,
+}
+
+# ccsd(t)/cc-pVDZ
+GDML_2 = {
+ ("H", -1): -0.489739656382323,
+ ("H", 0): -0.49927840341958285,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.236223739656382,
+ ("B", -3): -23.61782373835322,
+ ("B", -1): -24.528388906235705,
+ ("B", 0): -24.590264050112527,
+ ("B", 3): -21.98588333987049,
+ ("C", -1): -37.688228871632006,
+ ("C", 0): -37.70277208656365,
+ ("C", 1): -37.3579597779074,
+ ("N", -1): -54.321974972075715,
+ ("N", 0): -54.373768477368074,
+ ("N", 1): -53.87510137954731,
+ ("O", -1): -74.87516352403559,
+ ("O", 0): -74.82827800838686,
+ ("O", 1): -74.30135465859384,
+ ("F", -1): -99.56030962418485,
+ ("F", 0): -99.52932183945009,
+ ("Na", 1): -161.67188329184694,
+ ("Mg", 2): -198.82669320079302,
+ ("Si", 4): -285.17919483395195,
+ ("Si", 0): -288.88085983569533,
+ ("Si", -4): -287.40461285633614,
+ ("P", 0): -340.7265584017754,
+ ("P", 1): -340.36984136674585,
+ ("S", -1): -397.63315120158666,
+ ("S", 0): -397.55317747510554,
+ ("S", 1): -397.1659426092399,
+ ("Cl", -1): -459.69470422539786,
+ ("Cl", 0): -459.60398876941906,
+ ("K", 1): None, # not available with this basis set
+ ("Ca", 2): -676.2271898047749,
+ ("Br", -1): -2572.584907858833,
+ ("Br", 0): -2572.4941153123455,
+ ("I", -1): None, # not available with this basis set
+ ("I", 0): None, # not available with this basis set
+}
+# ccsd(t)/cc-pVTZ
+ANI1CCX_2 = {
+ ("H", -1): -0.4963122609799637,
+ ("H", 0): -0.49980981130184293,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.249353374937752,
+ ("B", -3): -23.793685421585884,
+ ("B", -1): -24.56648780776967,
+ ("B", 0): -24.605381789792233,
+ ("B", 3): -21.991368552278544,
+ ("C", -1): -37.747141724045164,
+ ("C", 0): -37.735863889731654,
+ ("C", 1): -37.37850843579137,
+ ("N", -1): -54.41337048412563,
+ ("N", 0): -54.42353049479941,
+ ("N", 1): -53.91625772121427,
+ ("O", -1): -74.99249367544891,
+ ("O", 0): -74.90337716789482,
+ ("O", 1): -74.36027901195692,
+ ("F", -1): -99.71046952902925,
+ ("F", 0): -99.63219230886922,
+ ("Na", 1): -161.68615285472157,
+ ("Mg", 2): -198.8436504300981,
+ ("Si", 4): -285.2290232109956,
+ ("Si", 0): -288.954195226872,
+ ("Si", -4): -287.62141587617776,
+ ("P", 0): -340.79678977311414,
+ ("P", 1): -340.432199862984,
+ ("S", -1): -397.7409199255247,
+ ("S", 0): -397.6361063083311,
+ ("S", 1): -397.2347675440139,
+ ("Cl", -2): -459.069378694994,
+ ("Cl", -1): -459.8163494320064,
+ ("Cl", 0): -459.70310084056786,
+ ("Cl", 2): -458.277524056067,
+ ("K", 1): None, # not available with this basis set
+ ("Ca", 2): -676.3176100772968,
+ ("Br", -1): -2572.8167538662433,
+ ("Br", 0): -2572.702100151291,
+ ("I", -1): None, # not available with this basis set
+ ("I", 0): None, # not available with this basis set
+}
+# ccsd/cc-pVDZ
+GDML_1 = {
+ ("H", -1): -0.49927840341958285,
+ ("H", 0): -0.49927840341958285,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.236223739656382,
+ ("B", -3): -23.613877846876942,
+ ("B", -1): -24.52547666267111,
+ ("B", 0): -24.589429443373188,
+ ("B", 3): -21.98588333987049,
+ ("C", -1): -37.68362301484667,
+ ("C", 0): -37.69937564411741,
+ ("C", 1): -37.35727461654343,
+ ("N", -1): -54.31612564560329,
+ ("N", 0): -54.3667355223191,
+ ("N", 1): -53.871756805827864,
+ ("O", -1): -74.87454456240714,
+ ("O", 0): -74.82074180638969,
+ ("O", 1): -74.29143146516834,
+ ("F", -1): -99.55969095436343,
+ ("F", 0): -99.5284215563597,
+ ("Na", 1): -161.67186865791962,
+ ("Mg", 2): -198.826650230425,
+ ("Si", 4): -285.17913845059644,
+ ("Si", 0): -288.87753485972564,
+ ("Si", -4): -287.40275985231415,
+ ("P", 0): -340.7210732625289,
+ ("P", 1): -340.3662836136086,
+ ("S", -1): -397.631810717651,
+ ("S", 0): -397.54760940641853,
+ ("S", 1): -397.15909131565013,
+ ("Cl", -2): -458.6471183178738,
+ ("Cl", -1): -459.6933866998589,
+ ("Cl", 0): -459.60268687745884,
+ ("Cl", 2): -458.1932998145885,
+ ("K", 1): None, # not available with this basis set
+ ("Ca", 2): -676.2265307613668,
+ ("Br", -1): -2572.5834492880094,
+ ("Br", 0): -2572.492623348252,
+ ("I", -1): None, # not available with this basis set
+ ("I", 0): None, # not available with this basis set
+}
+# ccsd/cc-pVTZ
+CCSD_VTZ = {
+ ("H", -1): -0.49631226097996367,
+ ("H", 0): -0.49980981130184293,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.249353374937752,
+ ("B", -3): -23.78682468678494,
+ ("B", -1): -24.56193370904525,
+ ("B", 0): -24.60388179904298,
+ ("B", 3): -21.991368552278544,
+ ("C", -1): -37.74093800618891,
+ ("C", 0): -37.73042268826894,
+ ("C", 1): -37.377165803324715,
+ ("N", -1): -54.40441588438247,
+ ("N", 0): -54.4152043962678,
+ ("N", 1): -53.91038920924042,
+ ("O", -1): -74.98771409352835,
+ ("O", 0): -74.89293727915536,
+ ("O", 1): -74.34899994406153,
+ ("F", -1): -99.70481088713056,
+ ("F", 0): -99.62851668514091,
+ ("Na", 1): -161.68598877560345,
+ ("Mg", 2): -198.84332758531946,
+ ("Si", 4): -285.228514965889,
+ ("Si", 0): -288.9476846603088,
+ ("Si", -4): -287.6138873496766,
+ ("P", 0): -340.78870701737065,
+ ("P", 1): -340.42522678302885,
+ ("S", -1): -397.73415929387704,
+ ("S", 0): -397.62619555322124,
+ ("S", 1): -397.225460043223,
+ ("Cl", -2): -459.06087948746443,
+ ("Cl", -1): -459.80856103622415,
+ ("Cl", 0): -459.69693046874454,
+ ("Cl", 2): -458.26687876975234,
+ ("K", 1): None, # not available with this basis set
+ ("Ca", 2): -676.3160445414744,
+ ("Br", -1): -2572.8073946290465,
+ ("Br", 0): -2572.694327605488,
+ ("I", -1): None, # not available with this basis set
+ ("I", 0): None, # not available with this basis set
+}
+# hf/cc-pVDZ
+ANI1X_1 = {
+ ("H", -1): -0.4488383380351602,
+ ("H", 0): -0.4992784034195828,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.236120435571012,
+ ("B", -3): -23.517631518350836,
+ ("B", -1): -24.43849458753095,
+ ("B", 0): -24.52995828509406,
+ ("B", 3): -21.98542712791857,
+ ("C", -1): -37.57949842909864,
+ ("C", 0): -37.59598618627132,
+ ("C", 1): -37.28952528470851,
+ ("N", -1): -54.170756777551894,
+ ("N", 0): -54.251655645342815,
+ ("N", 1): -53.75577765594358,
+ ("O", -1): -74.72122641123744,
+ ("O", 0): -74.66528700138886,
+ ("O", 1): -74.16935785917661,
+ ("F", -1): -99.3660232395006,
+ ("F", 0): -99.37525020985224,
+ ("Na", 1): -161.67106997000676,
+ ("Mg", 2): -198.82420265081305,
+ ("Si", 4): -285.17413886038224,
+ ("Si", 0): -288.7869064370983,
+ ("Si", -4): -287.3055013422455,
+ ("P", 0): -340.6188035921855,
+ ("P", 1): -340.26328028589194,
+ ("S", -1): -397.506997287547,
+ ("S", 0): -397.4131194811572,
+ ("S", 1): -397.04821663752654,
+ ("Cl", -2): -458.49341773983207,
+ ("Cl", -1): -459.54222556583767,
+ ("Cl", 0): -459.4711432886898,
+ ("Cl", 2): -458.07541032143655,
+ ("K", 1): None, # not available with this basis set
+ ("Ca", 2): -676.1457625057777,
+ ("Br", -1): -2571.766685524917,
+ ("Br", 0): -2571.6943737649776,
+ ("I", -1): None, # not available with this basis set
+ ("I", 0): None, # not available with this basis set
+}
+# hf/cc-pVTZ
+ANI1X_3 = {
+ ("H", -1): -0.4668418892599132,
+ ("H", 0): -0.49980981130184304,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.236381928884647,
+ ("B", -3): -23.654030528094694,
+ ("B", -1): -24.45440782122731,
+ ("B", 0): -24.532065412570418,
+ ("B", 3): -21.985654326745827,
+ ("C", -1): -37.6036322232934,
+ ("C", 0): -37.602187116127666,
+ ("C", 1): -37.294742506720475,
+ ("N", -1): -54.20897619252452,
+ ("N", 0): -54.263903101255586,
+ ("N", 1): -53.765473796977965,
+ ("O", -1): -74.76618798136187,
+ ("O", 0): -74.6842428689006,
+ ("O", 1): -74.18751432538998,
+ ("F", -1): -99.42428986904464,
+ ("F", 0): -99.40551931536073,
+ ("Na", 1): -161.67601880318512,
+ ("Mg", 2): -198.82947207595663,
+ ("Si", 4): -285.1793556127226,
+ ("Si", 0): -288.7945961163259,
+ ("Si", -4): -287.41256067563575,
+ ("P", 0): -340.6294583289231,
+ ("P", 1): -340.2717794204319,
+ ("S", -1): -397.5319459632172,
+ ("S", 0): -397.4249161291449,
+ ("S", 1): -397.06067984991046,
+ ("Cl", -2): -458.80494925757927,
+ ("Cl", -1): -459.5646668064105,
+ ("Cl", 0): -459.4854291853036,
+ ("Cl", 2): -458.09232019709674,
+ ("K", 1): None, # not available with this basis set
+ ("Ca", 2): -676.1540716436532,
+ ("Br", -1): -2572.528468875192,
+ ("Br", 0): -2572.445069318686,
+ ("I", -1): None, # not available with this basis set
+ ("I", 0): None, # not available with this basis set
+}
+
+# mp2/cc-pVDZ
+DES1 = {
+ ("H", -1): -0.46472136044848017,
+ ("H", 0): -0.4992784034195828,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.236236031279599,
+ ("B", -3): -23.59075634654498,
+ ("B", -1): -24.496049160245956,
+ ("B", 0): -24.56749154944109,
+ ("B", 3): -21.985897030619704,
+ ("C", -1): -37.65666509987848,
+ ("C", 0): -37.66302875884139,
+ ("C", 1): -37.3321238689667,
+ ("N", -1): -54.28620525567718,
+ ("N", 0): -54.334987200983385,
+ ("N", 1): -53.827357208281775,
+ ("O", -1): -74.86327217217499,
+ ("O", 0): -74.78617322485147,
+ ("O", 1): -74.25332362507456,
+ ("F", -1): -99.55668287878551,
+ ("F", 0): -99.51775797009576,
+ ("Na", 1): -161.67192521516694,
+ ("Mg", 2): -198.82669914019823,
+ ("Si", 4): -285.1791105165065,
+ ("Si", 0): -288.8472784365606,
+ ("Si", -4): -287.3919999801635,
+ ("P", 0): -340.6925553040255,
+ ("P", 1): -340.33066918694686,
+ ("S", -1): -397.61602048346754,
+ ("S", 0): -397.5157894668129,
+ ("S", 1): -397.126843359414,
+ ("Cl", -2): -458.63292301888237,
+ ("Cl", -1): -459.68240407270594,
+ ("Cl", 0): -459.5865928328137,
+ ("Cl", 2): -458.1568260632668,
+ ("K", 1): None, # not available with this basis set
+ ("Ca", 2): -676.2188060975801,
+ ("Br", -1): -2571.903217203978,
+ ("Br", 0): -2571.8074873037867,
+ ("I", -1): None, # not available with this basis set
+ ("I", 0): None, # not available with this basis set
+}
+
+# mp2/cc-pVQZ
+DES2 = {
+ ("H", -1): -0.49885469416811784,
+ ("H", 0): -0.4999455685829884,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.250250946178424,
+ ("B", -3): -23.881056379140478,
+ ("B", -1): -24.562769033198762,
+ ("B", 0): -24.601332055304802,
+ ("B", 3): -22.00384581220691,
+ ("C", -1): -37.78757616460555,
+ ("C", 0): -37.72055375923268,
+ ("C", 1): -37.374641050923756,
+ ("N", -1): -54.42675509155296,
+ ("N", 0): -54.41599555658964,
+ ("N", 1): -53.89571949369111,
+ ("O", -1): -75.03532831936059,
+ ("O", 0): -74.89960636766679,
+ ("O", 1): -74.42732171580235,
+ ("F", -1): -99.77773243315134,
+ ("F", 0): -99.66592682518191,
+ ("Na", 1): -161.68639387893282,
+ ("Mg", 2): -198.85342876070732,
+ ("Si", 4): -285.21266596906895,
+ ("Si", 0): -288.9153023940409,
+ ("Si", -4): -287.84995588475664,
+ ("P", 0): -340.78254912688595,
+ ("P", 1): -340.41137033923945,
+ ("S", -1): -397.764457176497,
+ ("S", 0): -397.63328479696963,
+ ("S", 1): -397.2291889048987,
+ ("Cl", -2): -459.276002809114,
+ ("Cl", -1): -459.85575358503627,
+ ("Cl", 0): -459.725756402736,
+ ("Cl", 2): -458.27234841921444,
+ ("K", 1): None, # not available with this basis set
+ ("Ca", 2): -676.353471955094,
+ ("Br", -1): -2572.9216392833405,
+ ("Br", 0): -2572.79376070567,
+ ("I", -1): None, # not available with this basis set
+ ("I", 0): None, # not available with this basis set
+}
+# pbe/def2-tzvp
+ISO17 = {
+ ("H", -1): -0.4984251407077052,
+ ("H", 0): -0.4996387468896132,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.256644236856955,
+ ("B", -3): -23.935382459402287,
+ ("B", -1): -24.585965866081416,
+ ("B", 0): -24.610084509908482,
+ ("B", 3): -21.98118646897415,
+ ("C", -1): -37.77594560897306,
+ ("C", 0): -37.732895049756756,
+ ("C", 1): -37.38238697233679,
+ ("N", -1): -54.441487575279545,
+ ("N", 0): -54.43218609912527,
+ ("N", 1): -53.89863329199101,
+ ("O", -1): -75.04792601076215,
+ ("O", 0): -74.9084975444151,
+ ("O", 1): -74.35740906502845,
+ ("F", -1): -99.77558183886431,
+ ("F", 0): -99.66914009406862,
+ ("Na", 1): -161.9641373718238,
+ ("Mg", 2): -199.1000109617099,
+ ("Si", 4): -285.4180171255296,
+ ("Si", 0): -289.2015108290971,
+ ("Si", -4): -288.02271678330254,
+ ("P", 0): -341.06484223053843,
+ ("P", 1): -340.68322234698707,
+ ("S", -1): -398.00391422392744,
+ ("S", 0): -397.9053091661701,
+ ("S", 1): -397.5008759502245,
+ ("Cl", -2): -459.38640826217886,
+ ("Cl", -1): -460.0784728780043,
+ ("Cl", 0): -459.95841441797796,
+ ("Cl", 2): -458.566186731762,
+ ("K", 1): -599.5277926006352,
+ ("Ca", 2): -676.6655247948639,
+ ("Br", -1): -2573.8415230488945,
+ ("Br", 0): -2573.720729522105,
+ ("I", -1): -297.7815346863186,
+ ("I", 0): -297.66553802494457,
+}
+
+
+# hf/cc-pVQZ
+ANI1X_2 = {
+ ("H", -1): -0.47386028485392406,
+ ("H", 0): -0.49994556858298844,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.236386237851972,
+ ("B", -3): -23.74309031828107,
+ ("B", -1): -24.46286773184739,
+ ("B", 0): -24.5329645824744,
+ ("B", 3): -21.986158801102064,
+ ("C", -1): -37.66896328779905,
+ ("C", 0): -37.604262031495196,
+ ("C", 1): -37.29646463702154,
+ ("N", -1): -54.22426108804101,
+ ("N", 0): -54.26750374803837,
+ ("N", 1): -53.76849831230501,
+ ("O", -1): -74.78286297582162,
+ ("O", 0): -74.68967002333635,
+ ("O", 1): -74.19286214550267,
+ ("F", -1): -99.44462949539432,
+ ("F", 0): -99.41376829607128,
+ ("Na", 1): -161.67672032176134,
+ ("Mg", 2): -198.83037897754207,
+ ("Si", 4): -285.1803724364078,
+ ("Si", 0): -288.79743501319945,
+ ("Si", -4): -287.65204471889274,
+ ("P", 0): -340.63262408709096,
+ ("P", 1): -340.27442412596326,
+ ("S", -1): -397.54055244875906,
+ ("S", 0): -397.42820343953593,
+ ("S", 1): -397.06412575498064,
+ ("Cl", -2): -458.978571599394,
+ ("Cl", -1): -459.57282279413744,
+ ("Cl", 0): -459.4890928627921,
+ ("Cl", 2): -458.0963453990511,
+ ("K", 1): None, # not available with this basis set
+ ("Ca", 2): -676.1542980250254,
+ ("Br", -1): -2572.5345236382864,
+ ("Br", 0): -2572.448003418184,
+ ("I", -1): None, # not available with this basis set
+ ("I", 0): None, # not available with this basis set
+}
+
+
+# mp2/cc-pVTZ
+DES3 = {
+ ("H", -1): -0.4891625462679369,
+ ("H", 0): -0.49980981130184304,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.24726155786237,
+ ("B", -3): -23.763643794842856,
+ ("B", -1): -24.53409654753541,
+ ("B", 0): -24.583383154203396,
+ ("B", 3): -21.991094434286477,
+ ("C", -1): -37.71496709817741,
+ ("C", 0): -37.69583488009523,
+ ("C", 1): -37.35364857976649,
+ ("N", -1): -54.37687246581612,
+ ("N", 0): -54.38498928095387,
+ ("N", 1): -53.86758718077272,
+ ("O", -1): -74.97696880669871,
+ ("O", 0): -74.85981462857248,
+ ("O", 1): -74.3128417784704,
+ ("F", -1): -99.70562180844765,
+ ("F", 0): -99.61731492045887,
+ ("Na", 1): -161.68534038705675,
+ ("Mg", 2): -198.84302024453982,
+ ("Si", 4): -285.22727858476895,
+ ("Si", 0): -288.9183509250862,
+ ("Si", -4): -287.5995448051336,
+ ("P", 0): -340.75961526664724,
+ ("P", 1): -340.3904498977919,
+ ("S", -1): -397.7141036332652,
+ ("S", 0): -397.5920220310466,
+ ("S", 1): -397.19206598949114,
+ ("Cl", -2): -459.0459580553311,
+ ("Cl", -1): -459.79402765207186,
+ ("Cl", 0): -459.67567575694216,
+ ("Cl", 2): -458.22960655909685,
+ ("K", 1): None, # not available with this basis set
+ ("Ca", 2): -676.3023664599882,
+ ("Br", -1): -2572.801814668155,
+ ("Br", 0): -2572.6834739695705,
+ ("I", -1): None, # not available with this basis set
+ ("I", 0): None, # not available with this basis set
+}
+
+# pbe0/def2-tzvp
+QM7X_DFT = {
+ ("H", -1): -0.5000012696776297,
+ ("H", 0): -0.5010619187567116,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.262402336780465,
+ ("B", -3): -23.93538245940231,
+ ("B", -1): -24.58596586608141,
+ ("B", 0): -24.618279526937158,
+ ("B", 3): -21.993880405036222,
+ ("C", -1): -37.775945608973075,
+ ("C", 0): -37.73289504975675,
+ ("C", 1): -37.38238697233677,
+ ("N", -1): -54.4414875752795,
+ ("N", 0): -54.43218609912527,
+ ("N", 1): -53.898633291991025,
+ ("O", -1): -75.04858314388663,
+ ("O", 0): -74.9084975444151,
+ ("O", 1): -74.35740906502848,
+ ("F", -1): -99.77378866090523,
+ ("F", 0): -99.67618937527747,
+ ("Na", 1): -161.98136849490916,
+ ("Mg", 2): -199.1241396537923,
+ ("Si", 4): -285.4539026316095,
+ ("Si", 0): -289.20151082909706,
+ ("Si", -4): -288.04650100943854,
+ ("P", 0): -341.06484223053843,
+ ("P", 1): -340.6832223469869,
+ ("S", -1): -398.03842612700186,
+ ("S", 0): -397.90530916617007,
+ ("S", 1): -397.5008759502245,
+ ("Cl", -2): -459.4152688089829,
+ ("Cl", -1): -460.11739716845636,
+ ("Cl", 0): -459.9974100829532,
+ ("Cl", 2): -458.6052342125039,
+ ("K", 1): -599.5783201878277,
+ ("Ca", 2): -676.7194481655977,
+ ("Br", -1): -2573.9328383617813,
+ ("Br", 0): -2573.8118913577364,
+ ("I", -1): -297.8097622358941,
+ ("I", 0): -297.6931741613416,
+}
+
+# LEVEL OF THEORY: WB97M-V/def2-tzvp
+COMP6_9 = {
+ ("H", -1): -0.5043034149209957,
+ ("H", 0): -0.4942304316867456,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.275845986964876,
+ ("B", -3): -23.944386486890433,
+ ("B", -1): -24.620648350767315,
+ ("B", 0): -24.649626180737634,
+ ("B", 3): -22.041679002146115,
+ ("C", -1): -37.81902657653025,
+ ("C", 0): -37.78784557278033,
+ ("C", 1): -37.43099787866309,
+ ("N", -1): -54.50330209852381,
+ ("N", 0): -54.48942541262065,
+ ("N", 1): -53.97039551980893,
+ ("O", -1): -75.10937339867125,
+ ("O", 0): -74.98274472768641,
+ ("O", 1): -74.42816465620183,
+ ("F", -1): -99.8448159370651,
+ ("F", 0): -99.74528654206127,
+ ("Na", 1): -162.06872009995914,
+ ("Mg", 2): -199.22338375053474,
+ ("Si", 4): -285.5821192636676,
+ ("Si", 0): -289.31658008917617,
+ ("Si", -4): -288.11126408870666,
+ ("P", 0): -341.2109132073535,
+ ("P", 1): -340.8136624526414,
+ ("S", -1): -398.1550625555495,
+ ("S", 0): -398.0362575878335,
+ ("S", 1): -397.63036775088466,
+ ("Cl", -2): -459.52873734619544,
+ ("Cl", -1): -460.24520403058557,
+ ("Cl", 0): -460.12503955811985,
+ ("Cl", 2): -458.6770781144964,
+ ("K", 1): -599.7242257909018,
+ ("Ca", 2): -676.8737360488551,
+ ("Br", -1): -2574.0859799330883,
+ ("Br", 0): -2573.967555604986,
+ ("I", -1): -297.7777930229968,
+ ("I", 0): -297.66455265533017,
+}
+
+# hf/def2-tzvp
+HF_DEF2 = {
+ ("H", -1): -0.4668133747908114,
+ ("H", 0): -0.4998098322318885,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.236374246714073,
+ ("B", -3): -23.74140302512685,
+ ("B", -1): -24.462195925378662,
+ ("B", 0): -24.53233202503875,
+ ("B", 3): -21.985926089783565,
+ ("C", -1): -37.613473799868544,
+ ("C", 0): -37.603219252494,
+ ("C", 1): -37.295541183753926,
+ ("N", -1): -54.223174834464814,
+ ("N", 0): -54.266099796938654,
+ ("N", 1): -53.76717547003795,
+ ("O", -1): -74.78142147694243,
+ ("O", 0): -74.68804805190297,
+ ("O", 1): -74.19115875887655,
+ ("F", -1): -99.44317910914634,
+ ("F", 0): -99.41179977280933,
+ ("Na", 1): -161.67025708598274,
+ ("Mg", 2): -198.82300763311338,
+ ("Si", 4): -285.17360760657004,
+ ("Si", 0): -288.7894100524365,
+ ("Si", -4): -287.5042786445288,
+ ("P", 0): -340.6233882863439,
+ ("P", 1): -340.26541318034015,
+ ("S", -1): -397.5252097143351,
+ ("S", 0): -397.4176274212401,
+ ("S", 1): -397.0534456500219,
+ ("Cl", -2): -458.7948759929542,
+ ("Cl", -1): -459.55564984013716,
+ ("Cl", 0): -459.47680800709793,
+ ("Cl", 2): -458.0838125597828,
+ ("K", 1): -599.0060338509219,
+ ("Ca", 2): -676.1418445564589,
+ ("Br", -1): -2572.4811033491237,
+ ("Br", 0): -2572.398074528429,
+ ("I", -1): -296.7409981252531,
+ ("I", 0): -296.6585948224954,
+}
+ANI1X_8 = {
+ ("H", -1): -0.5043034149209957,
+ ("H", 0): -0.5013136410415637,
+ ("H", 1): 0.0,
+ ("Li", 1): -7.286464366413948,
+ ("B", -3): -23.86534129296109,
+ ("B", -1): -24.613473886395223,
+ ("B", 0): -24.65142963156562,
+ ("B", 3): -22.073004626190233,
+ ("C", 0): -37.780134440896255,
+ ("N", -1): -54.481657808873116,
+ ("N", 0): -54.48280823582692,
+ ("N", 1): -53.95708783281901,
+ ("O", -1): -75.09104966465256,
+ ("O", 0): -74.97131697424727,
+ ("O", 1): -74.41885693671637,
+ ("F", -1): -99.82474743242214,
+ ("F", 0): -99.73990054006921,
+ ("Na", 1): -162.08501075159776,
+ ("Mg", 2): -199.24620625842113,
+ ("Si", 4): -285.6197527177925,
+ ("Si", 0): -289.323387632431,
+ ("Si", -4): -288.04657476482333,
+ ("P", 0): -341.1958015245573,
+ ("P", 1): -340.8193558685238,
+ ("S", -1): -398.1805976553139,
+ ("S", 0): -398.0529588010547,
+ ("S", 1): -397.69734443410385,
+ ("Cl", -2): -459.5595393232076,
+ ("Cl", -1): -460.2768559014631,
+ ("Cl", 0): -460.1543938788908,
+ ("Cl", 2): -458.6962780587144,
+ ("K", 1): None,
+ ("Ca", 2): -676.921587688464,
+ ("Br", -1): -2574.3069571951482,
+ ("Br", 0): -2574.1862987794157,
+ ("I", -1): None,
+ ("I", 0): None,
+}
+# FF ttm2.1-f, calculated with ttm3-f f90 routine
+# Link: https://www.pnnl.gov/science/ttm3f.asp
+# For isolated atoms doesn't change as it is always 0
+# Typed down for clarity
+TTM2 = {
+ ("H", 0): 0.0,
+ ("O", 0): 0.0,
+}
+
+
+ISOLATED_ATOM_ENERGIES = {
+ # DFT
+ "wb97x": {
+ "6-31g*": COMP6_1,
+ "6-31g(d)": ANI1,
+ "cc-pvtz": ANI1X_8,
+ },
+ "wb97x-d": {"def2-svp": NABLADFT},
+ "wb97x-d3": {"def2-tzvp": ORBNET},
+ "wb97m": {
+ "def2-tzvp": COMP6_9,
+ },
+ "wb97m-d3bj": {"def2-tzvp": wb97m_d3bj_def2_tzvp, "def2-tzvppd": SPICE},
+ "tpssh": {"def2-tzvp": TMQM},
+ "revpbe-d3(bj)": {"def2-tzvp": SolvatedPeptides},
+ "dsd-blyp-d3(bj)": {"def2-tzvp": SN2RXN},
+ "b3lyp": {
+ "6-31g*": QMUGS_DFT,
+ "def2-tzv": COMP6_3,
+ },
+ "b3lyp-d3mbj": {"def2-tzvp": COMP6_2},
+ "pbe-d3bj": {
+ "def2-tzvp": COMP6_5,
+ },
+ "hf": {
+ "def2-tzvp": HF_DEF2,
+ "cc-pvdz": ANI1X_1,
+ "cc-pvqz": ANI1X_2,
+ "cc-pvtz": ANI1X_3,
+ },
+ "svwn": {
+ "def2-tzv": COMP6_7,
+ },
+ # PAW
+ "pbe0": {
+ "mbd": QM7X_DFT,
+ },
+ "pbe": {
+ "vdw-ts": ISO17,
+ "mbd": ISO17,
+ "def2-tzvp": ISO17,
+ },
+ # HIGHER LEVEL OF THEORY
+ "ccsd": {
+ "cc-pvdz": GDML_1,
+ "cc-pvtz": CCSD_VTZ,
+ },
+ "tccsd(t)": {
+ "cc-pvdz": ANI1CCX_2,
+ },
+ "ccsd(t)": {
+ "cc-pvdz": GDML_2,
+ "cc-pvtz": ANI1CCX_2,
+ "cbs": ccsdtaug,
+ "nn": None, # ML Calculated
+ },
+ "mp2": {
+ "cc-pvdz": DES1,
+ "cc-pvqz": DES2,
+ "cc-pvtz": DES3,
+ "cbs": mp2aug,
+ },
+ # SAPT0
+ "sapt0": {
+ "aug-cc-pwcvxz": None, # DOESNT MAKE SENSE
+ },
+ # SEMI EMPIRICAL
+ "gfn2_xtb": GFN2,
+ "gfn1_xtb": GFN1,
+ "dft3b": DFTB,
+ "pm6": PM6,
+ # FF
+ "ttm2.1-f": TTM2,
+}
+
+# TODO: Talk with ivan about cbs extrapolation from from av[TQ]z. For now this should be ok
diff --git a/src/openqdc/utils/io.py b/src/openqdc/utils/io.py
index f0853dd..6105d93 100644
--- a/src/openqdc/utils/io.py
+++ b/src/openqdc/utils/io.py
@@ -6,6 +6,7 @@
import fsspec
import h5py
import torch
+from ase.atoms import Atoms
from fsspec.implementations.local import LocalFileSystem
from gcsfs import GCSFileSystem
from rdkit.Chem import MolFromXYZFile
@@ -13,14 +14,35 @@
gcp_filesys = fsspec.filesystem("gs")
local_filesys = LocalFileSystem()
+_OPENQDC_CACHE_DIR = "~/.cache/openqdc"
-def get_local_cache():
- cache_dir = os.path.expanduser(os.path.expandvars("~/.cache/openqdc"))
+
+def set_cache_dir(d):
+ r"""
+ Optionally set the _OPENQDC_CACHE_DIR directory.
+
+ Args:
+ d (str): path to a local folder.
+ """
+ if d is None:
+ return
+ global _OPENQDC_CACHE_DIR
+ _OPENQDC_CACHE_DIR = os.path.expanduser(d)
+
+
+def get_local_cache() -> str:
+ """
+ Returns the local cache directory. It creates it if it does not exist.
+
+ Returns:
+ str: path to the local cache directory
+ """
+ cache_dir = os.path.expanduser(os.path.expandvars(_OPENQDC_CACHE_DIR))
os.makedirs(cache_dir, exist_ok=True)
return cache_dir
-def get_remote_cache():
+def get_remote_cache() -> str:
remote_cache = "gs://opendatasets/openqdc"
return remote_cache
@@ -152,6 +174,22 @@ def load_xyz(path):
return MolFromXYZFile(path)
+def dict_to_atoms(d: dict, ext: bool = False) -> Atoms:
+ """
+ Converts dictionary to ase atoms object
+
+ Args:
+ d (dict): dictionary containing keys: positions, atomic_numbers, charges
+ ext (bool, optional): Whether to include all the rest of the dictionary in the atoms object info field.
+ Defaults to False.
+ """
+ pos, atomic_numbers, charges = d.pop("positions"), d.pop("atomic_numbers"), d.pop("charges")
+ at = Atoms(positions=pos, numbers=atomic_numbers, charges=charges)
+ if ext:
+ at.info = d
+ return at
+
+
def print_h5_tree(val, pre=""):
items = len(val)
for key, val in val.items():
diff --git a/src/openqdc/utils/package_utils.py b/src/openqdc/utils/package_utils.py
new file mode 100644
index 0000000..c7b8aac
--- /dev/null
+++ b/src/openqdc/utils/package_utils.py
@@ -0,0 +1,130 @@
+import importlib
+from functools import wraps
+from typing import Any, Callable, TypeVar
+
+F = TypeVar("F", bound=Callable[..., Any])
+
+
+class MissingOptionalDependencyError(BaseException):
+ """
+ An exception raised when an optional dependency is required
+ but cannot be found.
+
+ Attributes
+ ----------
+ library_name
+ The name of the missing library.
+ """
+
+ def __init__(self, library_name: str):
+ """
+
+ Parameters
+ ----------
+ library_name
+ The name of the missing library.
+ license_issue
+ Whether the library was importable but was unusable due
+ to a missing license.
+ """
+
+ message = f"The required {library_name} module could not be imported."
+
+ super(MissingOptionalDependencyError, self).__init__(message)
+
+ self.library_name = library_name
+
+
+def has_package(package_name: str) -> bool:
+ """
+ Helper function to generically check if a Python package is installed.
+ Intended to be used to check for optional dependencies.
+
+ Parameters
+ ----------
+ package_name : str
+ The name of the Python package to check the availability of
+
+ Returns
+ -------
+ package_available : bool
+ Boolean indicator if the package is available or not
+
+ Examples
+ --------
+ >>> has_numpy = has_package('numpy')
+ >>> has_numpy
+ True
+ >>> has_foo = has_package('other_non_installed_package')
+ >>> has_foo
+ False
+ """
+ try:
+ importlib.import_module(package_name)
+ except ModuleNotFoundError:
+ return False
+ return True
+
+
+def requires_package(package_name: str) -> Callable[..., Any]:
+ """
+ Helper function to denote that a funciton requires some optional
+ dependency. A function decorated with this decorator will raise
+ `MissingOptionalDependencyError` if the package is not found by
+ `importlib.import_module()`.
+
+ Parameters
+ ----------
+ package_name : str
+ The name of the module to be imported.
+
+ Raises
+ ------
+ MissingOptionalDependencyError
+
+ """
+
+ def inner_decorator(function: F) -> F:
+ @wraps(function)
+ def wrapper(*args, **kwargs):
+ import importlib
+
+ try:
+ importlib.import_module(package_name)
+ except ImportError:
+ raise MissingOptionalDependencyError(library_name=package_name)
+ except Exception as e:
+ raise e
+
+ return function(*args, **kwargs)
+
+ return wrapper
+
+ return inner_decorator
+
+
+def get_dir():
+ r"""
+ Get the Torch Hub cache directory used for storing downloaded models & weights.
+
+ If :func:`~torch.hub.set_dir` is not called, default path is ``$TORCH_HOME/hub`` where
+ environment variable ``$TORCH_HOME`` defaults to ``$XDG_CACHE_HOME/torch``.
+ ``$XDG_CACHE_HOME`` follows the X Design Group specification of the Linux
+ filesystem layout, with a default value ``~/.cache`` if the environment
+ variable is not set.
+ """
+
+ if _hub_dir is not None:
+ return _hub_dir
+ # return os.path.join(_get_torch_home(), 'hub')
+
+
+def set_dir(d):
+ r"""
+ Optionally set the Torch Hub directory used to save downloaded models & weights.
+
+ Args:
+ d (str): path to a local folder to save downloaded models & weights.
+ """
+ global _hub_dir
+ # _hub_dir = os.path.expanduser(d)
diff --git a/src/openqdc/utils/units.py b/src/openqdc/utils/units.py
index a810f1f..fb895ce 100644
--- a/src/openqdc/utils/units.py
+++ b/src/openqdc/utils/units.py
@@ -72,3 +72,4 @@ def get_conversion(in_unit: str, out_unit: str):
)
Conversion("hartree/ang", "kcal/mol/ang", lambda x: get_conversion("hartree", "kcal/mol")(x))
Conversion("hartree/ang", "hartree/bohr", lambda x: get_conversion("bohr", "ang")(x))
+Conversion("hartree/bohr", "hartree/ang", lambda x: get_conversion("ang", "bohr")(x))
diff --git a/tests/test_dummy.py b/tests/test_dummy.py
new file mode 100644
index 0000000..65fe9b6
--- /dev/null
+++ b/tests/test_dummy.py
@@ -0,0 +1,21 @@
+"""Path hack to make tests work."""
+
+from openqdc.datasets.dummy import Dummy # noqa: E402
+from openqdc.utils.atomization_energies import (
+ ISOLATED_ATOM_ENERGIES,
+ IsolatedAtomEnergyFactory,
+)
+
+
+def test_dummy():
+ ds = Dummy()
+ assert len(ds) > 10
+ assert ds[100]
+
+
+def test_is_at_factory():
+ res = IsolatedAtomEnergyFactory.get("mp2/cc-pvdz")
+ assert len(res) == len(ISOLATED_ATOM_ENERGIES["mp2"]["cc-pvdz"])
+ res = IsolatedAtomEnergyFactory.get("PM6")
+ assert len(res) == len(ISOLATED_ATOM_ENERGIES["pm6"])
+ assert isinstance(res[("H", 0)], float)