sustainable-processes · marcosfelt · Mar 5, 2021 · Mar 1, 2021 · Mar 1, 2021 · Mar 2, 2021
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -14,15 +14,18 @@
 import sys
 import subprocess
 import pkg_resources
+import datetime
 
 sys.path.insert(0, os.path.abspath("../.."))
 
 
 # -- Project information -----------------------------------------------------
 
 project = "Summit"
-copyright = "2020, Kobi Felton, Jan Rittig"
-author = "Kobi Felton, Jan Rittig"
+dt = datetime.datetime.today()
+year = dt.year
+copyright = f"{year}, Summit Authors"
+author = "Kobi Felton and Summit Authors"
 
 # The full version, including alpha/beta/rc tags
 release = pkg_resources.get_distribution("summit").version
@@ -48,6 +51,11 @@
     "sphinx.ext.intersphinx",
     # read the docs theme
     "sphinx_rtd_theme",
+    # show plots
+    "matplotlib.sphinxext.mathmpl",
+    "matplotlib.sphinxext.plot_directive",
+    # Doctest
+    "sphinx.ext.doctest",
 ]
 
 # Add any paths that contain templates here, relative to this directory.

diff --git a/docs/source/experiments_benchmarks/implemented_benchmarks.rst b/docs/source/experiments_benchmarks/implemented_benchmarks.rst
@@ -15,11 +15,16 @@ SnAr Benchmark
 Cross-Coupling Emulator Benchmarks
 ----------------------------------
 
+.. autofunction:: summit.benchmarks.get_pretrained_reizman_suzuki_emulator
+
+
 .. autoclass:: summit.benchmarks.ReizmanSuzukiEmulator
    :inherited-members:
    :members:
 
+.. autofunction:: summit.benchmarks.get_pretrained_baumgartner_cc_emulator
 
 .. autoclass:: summit.benchmarks.BaumgartnerCrossCouplingEmulator
    :inherited-members:
-   :members:
+   :members:
+
diff --git a/experiments/snar_benchmark/slurm_runner.py b/experiments/snar_benchmark/slurm_runner.py
@@ -10,6 +10,7 @@
 import re
 from socket import timeout as SocketTimeout
 import types
+import time
 
 
 class SlurmRunner(NeptuneRunner):
@@ -691,4 +692,4 @@ def get(transport, remote_path, local_path="", recursive=False, preserve_times=F
     @type preserve_times: bool
     """
     with SCPClient(transport) as client:
-        client.get(remote_path, local_path, recursive, preserve_times)
+        client.get(remote_path, local_path, recursive, preserve_times)
diff --git a/experiments/snar_benchmark/test_snar_experiment.py b/experiments/snar_benchmark/test_snar_experiment.py
@@ -84,7 +84,7 @@ def test_no_transform(strategy):
 
 
 # Run experiments
-@pytest.mark.parametrize("strategy", [DRO, NelderMead, SNOBFIT, SOBO, GRYFFIN])
+@pytest.mark.parametrize("strategy", [DRO, NelderMead, SNOBFIT, SOBO])
 @pytest.mark.parametrize("transform", transforms)
 def test_snar_experiment(strategy, transform):
     warnings.filterwarnings("ignore", category=RuntimeWarning)
@@ -131,4 +131,3 @@ def test_snar_experiment(strategy, transform):
             hypervolume_ref=[-2957, 10.7],
         )
         r.run(save_at_end=True)
-
diff --git a/scripts/train_emulators/README.md b/scripts/train_emulators/README.md
@@ -5,12 +5,13 @@ The `train_emulators.py` script will train emulators and create this report.
 This is the data from training of the reizman suzuki benchmark for 1000 epochs with 5 cross-validation folds.
 | case   |   avg_fit_time |   avg_val_r2 |   avg_val_RMSE |   avg_test_r2 |   avg_test_RMSE |
 |:-------|---------------:|-------------:|---------------:|--------------:|----------------:|
-| case_1 |          10.35 |         0.83 |          10.82 |          0.93 |            7.5  |
-| case_2 |           8.93 |         0.62 |           5.4  |          0.67 |            4.91 |
-| case_3 |          10.12 |         0.74 |          13.79 |          0.84 |           12.07 |
-| case_4 |           9.6  |         0.7  |          15.9  |          0.74 |           13.98 |
+| case_1 |           8.63 |         0.82 |          11.14 |          0.93 |            7.54 |
+| case_2 |           8.8  |         0.61 |           5.38 |          0.66 |            4.99 |
+| case_3 |           8.24 |         0.78 |          12.91 |          0.84 |           11.9  |
+| case_4 |           8.31 |         0.7  |          15.67 |          0.73 |           14.06 |
 ## Baumgartner C-N Cross Cross Coupling 
 This is the data from training of the Baumgartner C-N aniline cross-coupling benchmark for 1000 epochs with 5 cross-validation folds.
-| case    |   avg_fit_time |   avg_val_r2 |   avg_val_RMSE |   avg_test_r2 |   avg_test_RMSE |
-|:--------|---------------:|-------------:|---------------:|--------------:|----------------:|
-| one-hot |           8.43 |         0.81 |           0.17 |          0.88 |            0.13 |
+| case        |   avg_fit_time |   avg_val_r2 |   avg_val_RMSE |   avg_test_r2 |   avg_test_RMSE |
+|:------------|---------------:|-------------:|---------------:|--------------:|----------------:|
+| one-hot     |           8.17 |         0.8  |           0.18 |          0.89 |            0.13 |
+| descriptors |           8.19 |         0.86 |           0.15 |          0.91 |            0.11 |
diff --git a/scripts/train_emulators/results/baumgartner_aniline_cn_crosscoupling.png b/scripts/train_emulators/results/baumgartner_aniline_cn_crosscoupling.png
diff --git a/scripts/train_emulators/results/baumgartner_aniline_cn_crosscoupling_scores.csv b/scripts/train_emulators/results/baumgartner_aniline_cn_crosscoupling_scores.csv
@@ -1,2 +1,3 @@
 case,avg_fit_time,avg_score_time,avg_val_r2,avg_val_neg_root_mean_squared_error,avg_test_r2,avg_test_neg_root_mean_squared_error
-one-hot,8.43237853050232,0.0061893463134765625,0.8130802170821865,-0.17386471927165986,0.8788791513272004,-0.13320153332761886
+one-hot,8.166110754013062,0.005637454986572266,0.801623251767853,-0.17928863167762757,0.8913133689375095,-0.12660206639121113
+descriptors,8.191894769668579,0.006843852996826172,0.8564572592750457,-0.15195560306310654,0.9149636380225289,-0.11228418010873668
diff --git a/scripts/train_emulators/results/reizman_suzuki_case_1.png b/scripts/train_emulators/results/reizman_suzuki_case_1.png
diff --git a/scripts/train_emulators/results/reizman_suzuki_case_2.png b/scripts/train_emulators/results/reizman_suzuki_case_2.png
diff --git a/scripts/train_emulators/results/reizman_suzuki_case_3.png b/scripts/train_emulators/results/reizman_suzuki_case_3.png
diff --git a/scripts/train_emulators/results/reizman_suzuki_case_4.png b/scripts/train_emulators/results/reizman_suzuki_case_4.png
diff --git a/scripts/train_emulators/results/reizman_suzuki_scores.csv b/scripts/train_emulators/results/reizman_suzuki_scores.csv
@@ -1,5 +1,5 @@
 case,avg_fit_time,avg_score_time,avg_val_r2,avg_val_neg_root_mean_squared_error,avg_test_r2,avg_test_neg_root_mean_squared_error
-case_1,10.348453617095947,0.00654149055480957,0.8295508810399357,-10.816744422912597,0.9307952086976204,-7.498236728819775
-case_2,8.928261852264404,0.005949831008911133,0.6182039897632764,-5.401305246353149,0.6746129777462376,-4.910729931080706
-case_3,10.123983812332153,0.005974340438842774,0.7380188495373521,-13.788355827331543,0.8379006941956488,-12.066490239891527
-case_4,9.60025577545166,0.007497835159301758,0.7013988693185367,-15.903024101257325,0.736780456428538,-13.980203005904974
+case_1,8.6258864402771,0.005854988098144531,0.8212155526088756,-11.137054443359375,0.9295815380048114,-7.543151896248529
+case_2,8.79561619758606,0.00565180778503418,0.605790245122981,-5.384605264663696,0.6605449615937999,-4.99218215483449
+case_3,8.240145397186279,0.00576167106628418,0.7760751544224982,-12.907710647583007,0.842104321811395,-11.899007938905271
+case_4,8.307093811035156,0.005579137802124023,0.7043271376927565,-15.66837387084961,0.7326339105287581,-14.055312980735687
diff --git a/scripts/train_emulators/train_emulators.py b/scripts/train_emulators/train_emulators.py
@@ -6,7 +6,7 @@
 import logging
 import pkg_resources
 import pathlib
-from tqdm import trange
+from tqdm import trange, tqdm
 import argparse
 
 DATA_PATH = pathlib.Path(pkg_resources.resource_filename("summit", "benchmarks/data"))
@@ -74,37 +74,42 @@ def train_one_reizman(case, show_plots=False, save_plots=True):
 
 def train_baumgartner(show_plots=False):
     # Train model using one-hot encoding for categorical
-    print("Training Baumgartner model")
-    result = train_baumgartner_no_descriptors()
+    results = [
+        _train_baumgartner(use_descriptors=include)
+        for include in tqdm([False, True], desc="Baumgartner")
+    ]
     results_average = [
         {f"avg_{score_name}": scores.mean() for score_name, scores in result.items()}
+        for result in results
     ]
 
-    index = ["one-hot"]
+    index = ["one-hot", "descriptors"]
     results_df = pd.DataFrame.from_records(results_average, index=index)
     results_df.index.rename("case", inplace=True)
     results_df.to_csv(f"results/baumgartner_aniline_cn_crosscoupling_scores.csv")
 
 
-def train_baumgartner_no_descriptors(show_plots=False, save_plots=True):
+def _train_baumgartner(use_descriptors=False, show_plots=False, save_plots=True):
     # Setup
     model_name = f"baumgartner_aniline_cn_crosscoupling"
     domain = BaumgartnerCrossCouplingEmulator.setup_domain()
     ds = DataSet.read_csv(DATA_PATH / f"{model_name}.csv")
 
     # Create emulator and train
+    model_name += "_descriptors" if use_descriptors else ""
     exp = ExperimentalEmulator(
         model_name,
         domain,
         dataset=ds,
         regressor=ANNRegressor,
         output_variable_names=["yield"],
+        descriptors_features=["catalyst", "base"] if use_descriptors else [],
     )
     res = exp.train(
         max_epochs=MAX_EPOCHS, cv_folds=CV_FOLDS, random_state=100, test_size=0.2
     )
 
-    # # Run test
+    # Run test
     res_test = exp.test()
     res.update(res_test)