Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Emulator descriptors #100

Merged
merged 7 commits into from
Mar 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,18 @@
import sys
import subprocess
import pkg_resources
import datetime

sys.path.insert(0, os.path.abspath("../.."))


# -- Project information -----------------------------------------------------

project = "Summit"
copyright = "2020, Kobi Felton, Jan Rittig"
author = "Kobi Felton, Jan Rittig"
dt = datetime.datetime.today()
year = dt.year
copyright = f"{year}, Summit Authors"
author = "Kobi Felton and Summit Authors"

# The full version, including alpha/beta/rc tags
release = pkg_resources.get_distribution("summit").version
Expand All @@ -48,6 +51,11 @@
"sphinx.ext.intersphinx",
# read the docs theme
"sphinx_rtd_theme",
# show plots
"matplotlib.sphinxext.mathmpl",
"matplotlib.sphinxext.plot_directive",
# Doctest
"sphinx.ext.doctest",
]

# Add any paths that contain templates here, relative to this directory.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,16 @@ SnAr Benchmark
Cross-Coupling Emulator Benchmarks
----------------------------------

.. autofunction:: summit.benchmarks.get_pretrained_reizman_suzuki_emulator


.. autoclass:: summit.benchmarks.ReizmanSuzukiEmulator
:inherited-members:
:members:

.. autofunction:: summit.benchmarks.get_pretrained_baumgartner_cc_emulator

.. autoclass:: summit.benchmarks.BaumgartnerCrossCouplingEmulator
:inherited-members:
:members:
:members:

3 changes: 2 additions & 1 deletion experiments/snar_benchmark/slurm_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import re
from socket import timeout as SocketTimeout
import types
import time


class SlurmRunner(NeptuneRunner):
Expand Down Expand Up @@ -691,4 +692,4 @@ def get(transport, remote_path, local_path="", recursive=False, preserve_times=F
@type preserve_times: bool
"""
with SCPClient(transport) as client:
client.get(remote_path, local_path, recursive, preserve_times)
client.get(remote_path, local_path, recursive, preserve_times)
3 changes: 1 addition & 2 deletions experiments/snar_benchmark/test_snar_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def test_no_transform(strategy):


# Run experiments
@pytest.mark.parametrize("strategy", [DRO, NelderMead, SNOBFIT, SOBO, GRYFFIN])
@pytest.mark.parametrize("strategy", [DRO, NelderMead, SNOBFIT, SOBO])
@pytest.mark.parametrize("transform", transforms)
def test_snar_experiment(strategy, transform):
warnings.filterwarnings("ignore", category=RuntimeWarning)
Expand Down Expand Up @@ -131,4 +131,3 @@ def test_snar_experiment(strategy, transform):
hypervolume_ref=[-2957, 10.7],
)
r.run(save_at_end=True)

15 changes: 8 additions & 7 deletions scripts/train_emulators/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@ The `train_emulators.py` script will train emulators and create this report.
This is the data from training of the reizman suzuki benchmark for 1000 epochs with 5 cross-validation folds.
| case | avg_fit_time | avg_val_r2 | avg_val_RMSE | avg_test_r2 | avg_test_RMSE |
|:-------|---------------:|-------------:|---------------:|--------------:|----------------:|
| case_1 | 10.35 | 0.83 | 10.82 | 0.93 | 7.5 |
| case_2 | 8.93 | 0.62 | 5.4 | 0.67 | 4.91 |
| case_3 | 10.12 | 0.74 | 13.79 | 0.84 | 12.07 |
| case_4 | 9.6 | 0.7 | 15.9 | 0.74 | 13.98 |
| case_1 | 8.63 | 0.82 | 11.14 | 0.93 | 7.54 |
| case_2 | 8.8 | 0.61 | 5.38 | 0.66 | 4.99 |
| case_3 | 8.24 | 0.78 | 12.91 | 0.84 | 11.9 |
| case_4 | 8.31 | 0.7 | 15.67 | 0.73 | 14.06 |
## Baumgartner C-N Cross Cross Coupling
This is the data from training of the Baumgartner C-N aniline cross-coupling benchmark for 1000 epochs with 5 cross-validation folds.
| case | avg_fit_time | avg_val_r2 | avg_val_RMSE | avg_test_r2 | avg_test_RMSE |
|:--------|---------------:|-------------:|---------------:|--------------:|----------------:|
| one-hot | 8.43 | 0.81 | 0.17 | 0.88 | 0.13 |
| case | avg_fit_time | avg_val_r2 | avg_val_RMSE | avg_test_r2 | avg_test_RMSE |
|:------------|---------------:|-------------:|---------------:|--------------:|----------------:|
| one-hot | 8.17 | 0.8 | 0.18 | 0.89 | 0.13 |
| descriptors | 8.19 | 0.86 | 0.15 | 0.91 | 0.11 |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
case,avg_fit_time,avg_score_time,avg_val_r2,avg_val_neg_root_mean_squared_error,avg_test_r2,avg_test_neg_root_mean_squared_error
one-hot,8.43237853050232,0.0061893463134765625,0.8130802170821865,-0.17386471927165986,0.8788791513272004,-0.13320153332761886
one-hot,8.166110754013062,0.005637454986572266,0.801623251767853,-0.17928863167762757,0.8913133689375095,-0.12660206639121113
descriptors,8.191894769668579,0.006843852996826172,0.8564572592750457,-0.15195560306310654,0.9149636380225289,-0.11228418010873668
Binary file modified scripts/train_emulators/results/reizman_suzuki_case_1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified scripts/train_emulators/results/reizman_suzuki_case_2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified scripts/train_emulators/results/reizman_suzuki_case_3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified scripts/train_emulators/results/reizman_suzuki_case_4.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
8 changes: 4 additions & 4 deletions scripts/train_emulators/results/reizman_suzuki_scores.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
case,avg_fit_time,avg_score_time,avg_val_r2,avg_val_neg_root_mean_squared_error,avg_test_r2,avg_test_neg_root_mean_squared_error
case_1,10.348453617095947,0.00654149055480957,0.8295508810399357,-10.816744422912597,0.9307952086976204,-7.498236728819775
case_2,8.928261852264404,0.005949831008911133,0.6182039897632764,-5.401305246353149,0.6746129777462376,-4.910729931080706
case_3,10.123983812332153,0.005974340438842774,0.7380188495373521,-13.788355827331543,0.8379006941956488,-12.066490239891527
case_4,9.60025577545166,0.007497835159301758,0.7013988693185367,-15.903024101257325,0.736780456428538,-13.980203005904974
case_1,8.6258864402771,0.005854988098144531,0.8212155526088756,-11.137054443359375,0.9295815380048114,-7.543151896248529
case_2,8.79561619758606,0.00565180778503418,0.605790245122981,-5.384605264663696,0.6605449615937999,-4.99218215483449
case_3,8.240145397186279,0.00576167106628418,0.7760751544224982,-12.907710647583007,0.842104321811395,-11.899007938905271
case_4,8.307093811035156,0.005579137802124023,0.7043271376927565,-15.66837387084961,0.7326339105287581,-14.055312980735687
17 changes: 11 additions & 6 deletions scripts/train_emulators/train_emulators.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import logging
import pkg_resources
import pathlib
from tqdm import trange
from tqdm import trange, tqdm
import argparse

DATA_PATH = pathlib.Path(pkg_resources.resource_filename("summit", "benchmarks/data"))
Expand Down Expand Up @@ -74,37 +74,42 @@ def train_one_reizman(case, show_plots=False, save_plots=True):

def train_baumgartner(show_plots=False):
# Train model using one-hot encoding for categorical
print("Training Baumgartner model")
result = train_baumgartner_no_descriptors()
results = [
_train_baumgartner(use_descriptors=include)
for include in tqdm([False, True], desc="Baumgartner")
]
results_average = [
{f"avg_{score_name}": scores.mean() for score_name, scores in result.items()}
for result in results
]

index = ["one-hot"]
index = ["one-hot", "descriptors"]
results_df = pd.DataFrame.from_records(results_average, index=index)
results_df.index.rename("case", inplace=True)
results_df.to_csv(f"results/baumgartner_aniline_cn_crosscoupling_scores.csv")


def train_baumgartner_no_descriptors(show_plots=False, save_plots=True):
def _train_baumgartner(use_descriptors=False, show_plots=False, save_plots=True):
# Setup
model_name = f"baumgartner_aniline_cn_crosscoupling"
domain = BaumgartnerCrossCouplingEmulator.setup_domain()
ds = DataSet.read_csv(DATA_PATH / f"{model_name}.csv")

# Create emulator and train
model_name += "_descriptors" if use_descriptors else ""
exp = ExperimentalEmulator(
model_name,
domain,
dataset=ds,
regressor=ANNRegressor,
output_variable_names=["yield"],
descriptors_features=["catalyst", "base"] if use_descriptors else [],
)
res = exp.train(
max_epochs=MAX_EPOCHS, cv_folds=CV_FOLDS, random_state=100, test_size=0.2
)

# # Run test
# Run test
res_test = exp.test()
res.update(res_test)

Expand Down
Loading