diff --git a/docs/source/conf.py b/docs/source/conf.py index c47cb8d6..99e989f6 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -14,6 +14,7 @@ import sys import subprocess import pkg_resources +import datetime sys.path.insert(0, os.path.abspath("../..")) @@ -21,8 +22,10 @@ # -- Project information ----------------------------------------------------- project = "Summit" -copyright = "2020, Kobi Felton, Jan Rittig" -author = "Kobi Felton, Jan Rittig" +dt = datetime.datetime.today() +year = dt.year +copyright = f"{year}, Summit Authors" +author = "Kobi Felton and Summit Authors" # The full version, including alpha/beta/rc tags release = pkg_resources.get_distribution("summit").version @@ -48,6 +51,11 @@ "sphinx.ext.intersphinx", # read the docs theme "sphinx_rtd_theme", + # show plots + "matplotlib.sphinxext.mathmpl", + "matplotlib.sphinxext.plot_directive", + # Doctest + "sphinx.ext.doctest", ] # Add any paths that contain templates here, relative to this directory. diff --git a/docs/source/experiments_benchmarks/implemented_benchmarks.rst b/docs/source/experiments_benchmarks/implemented_benchmarks.rst index 65ed06db..b4e995f5 100644 --- a/docs/source/experiments_benchmarks/implemented_benchmarks.rst +++ b/docs/source/experiments_benchmarks/implemented_benchmarks.rst @@ -15,11 +15,16 @@ SnAr Benchmark Cross-Coupling Emulator Benchmarks ---------------------------------- +.. autofunction:: summit.benchmarks.get_pretrained_reizman_suzuki_emulator + + .. autoclass:: summit.benchmarks.ReizmanSuzukiEmulator :inherited-members: :members: +.. autofunction:: summit.benchmarks.get_pretrained_baumgartner_cc_emulator .. autoclass:: summit.benchmarks.BaumgartnerCrossCouplingEmulator :inherited-members: - :members: \ No newline at end of file + :members: + diff --git a/experiments/snar_benchmark/slurm_runner.py b/experiments/snar_benchmark/slurm_runner.py index de58c512..8a04aaf1 100644 --- a/experiments/snar_benchmark/slurm_runner.py +++ b/experiments/snar_benchmark/slurm_runner.py @@ -10,6 +10,7 @@ import re from socket import timeout as SocketTimeout import types +import time class SlurmRunner(NeptuneRunner): @@ -691,4 +692,4 @@ def get(transport, remote_path, local_path="", recursive=False, preserve_times=F @type preserve_times: bool """ with SCPClient(transport) as client: - client.get(remote_path, local_path, recursive, preserve_times) \ No newline at end of file + client.get(remote_path, local_path, recursive, preserve_times) diff --git a/experiments/snar_benchmark/test_snar_experiment.py b/experiments/snar_benchmark/test_snar_experiment.py index cf2d6082..f91a859c 100644 --- a/experiments/snar_benchmark/test_snar_experiment.py +++ b/experiments/snar_benchmark/test_snar_experiment.py @@ -84,7 +84,7 @@ def test_no_transform(strategy): # Run experiments -@pytest.mark.parametrize("strategy", [DRO, NelderMead, SNOBFIT, SOBO, GRYFFIN]) +@pytest.mark.parametrize("strategy", [DRO, NelderMead, SNOBFIT, SOBO]) @pytest.mark.parametrize("transform", transforms) def test_snar_experiment(strategy, transform): warnings.filterwarnings("ignore", category=RuntimeWarning) @@ -131,4 +131,3 @@ def test_snar_experiment(strategy, transform): hypervolume_ref=[-2957, 10.7], ) r.run(save_at_end=True) - diff --git a/scripts/train_emulators/README.md b/scripts/train_emulators/README.md index e320f0ca..1b274e62 100644 --- a/scripts/train_emulators/README.md +++ b/scripts/train_emulators/README.md @@ -5,12 +5,13 @@ The `train_emulators.py` script will train emulators and create this report. This is the data from training of the reizman suzuki benchmark for 1000 epochs with 5 cross-validation folds. | case | avg_fit_time | avg_val_r2 | avg_val_RMSE | avg_test_r2 | avg_test_RMSE | |:-------|---------------:|-------------:|---------------:|--------------:|----------------:| -| case_1 | 10.35 | 0.83 | 10.82 | 0.93 | 7.5 | -| case_2 | 8.93 | 0.62 | 5.4 | 0.67 | 4.91 | -| case_3 | 10.12 | 0.74 | 13.79 | 0.84 | 12.07 | -| case_4 | 9.6 | 0.7 | 15.9 | 0.74 | 13.98 | +| case_1 | 8.63 | 0.82 | 11.14 | 0.93 | 7.54 | +| case_2 | 8.8 | 0.61 | 5.38 | 0.66 | 4.99 | +| case_3 | 8.24 | 0.78 | 12.91 | 0.84 | 11.9 | +| case_4 | 8.31 | 0.7 | 15.67 | 0.73 | 14.06 | ## Baumgartner C-N Cross Cross Coupling This is the data from training of the Baumgartner C-N aniline cross-coupling benchmark for 1000 epochs with 5 cross-validation folds. -| case | avg_fit_time | avg_val_r2 | avg_val_RMSE | avg_test_r2 | avg_test_RMSE | -|:--------|---------------:|-------------:|---------------:|--------------:|----------------:| -| one-hot | 8.43 | 0.81 | 0.17 | 0.88 | 0.13 | +| case | avg_fit_time | avg_val_r2 | avg_val_RMSE | avg_test_r2 | avg_test_RMSE | +|:------------|---------------:|-------------:|---------------:|--------------:|----------------:| +| one-hot | 8.17 | 0.8 | 0.18 | 0.89 | 0.13 | +| descriptors | 8.19 | 0.86 | 0.15 | 0.91 | 0.11 | diff --git a/scripts/train_emulators/results/baumgartner_aniline_cn_crosscoupling.png b/scripts/train_emulators/results/baumgartner_aniline_cn_crosscoupling.png index acb90128..022780cc 100644 Binary files a/scripts/train_emulators/results/baumgartner_aniline_cn_crosscoupling.png and b/scripts/train_emulators/results/baumgartner_aniline_cn_crosscoupling.png differ diff --git a/scripts/train_emulators/results/baumgartner_aniline_cn_crosscoupling_scores.csv b/scripts/train_emulators/results/baumgartner_aniline_cn_crosscoupling_scores.csv index 9fd6bccf..ca09c72c 100644 --- a/scripts/train_emulators/results/baumgartner_aniline_cn_crosscoupling_scores.csv +++ b/scripts/train_emulators/results/baumgartner_aniline_cn_crosscoupling_scores.csv @@ -1,2 +1,3 @@ case,avg_fit_time,avg_score_time,avg_val_r2,avg_val_neg_root_mean_squared_error,avg_test_r2,avg_test_neg_root_mean_squared_error -one-hot,8.43237853050232,0.0061893463134765625,0.8130802170821865,-0.17386471927165986,0.8788791513272004,-0.13320153332761886 +one-hot,8.166110754013062,0.005637454986572266,0.801623251767853,-0.17928863167762757,0.8913133689375095,-0.12660206639121113 +descriptors,8.191894769668579,0.006843852996826172,0.8564572592750457,-0.15195560306310654,0.9149636380225289,-0.11228418010873668 diff --git a/scripts/train_emulators/results/reizman_suzuki_case_1.png b/scripts/train_emulators/results/reizman_suzuki_case_1.png index fc25a806..1d4f3d9d 100644 Binary files a/scripts/train_emulators/results/reizman_suzuki_case_1.png and b/scripts/train_emulators/results/reizman_suzuki_case_1.png differ diff --git a/scripts/train_emulators/results/reizman_suzuki_case_2.png b/scripts/train_emulators/results/reizman_suzuki_case_2.png index 2acd0cde..a788fb78 100644 Binary files a/scripts/train_emulators/results/reizman_suzuki_case_2.png and b/scripts/train_emulators/results/reizman_suzuki_case_2.png differ diff --git a/scripts/train_emulators/results/reizman_suzuki_case_3.png b/scripts/train_emulators/results/reizman_suzuki_case_3.png index 6e5b0864..8c8e9962 100644 Binary files a/scripts/train_emulators/results/reizman_suzuki_case_3.png and b/scripts/train_emulators/results/reizman_suzuki_case_3.png differ diff --git a/scripts/train_emulators/results/reizman_suzuki_case_4.png b/scripts/train_emulators/results/reizman_suzuki_case_4.png index f9a862ff..e94996a5 100644 Binary files a/scripts/train_emulators/results/reizman_suzuki_case_4.png and b/scripts/train_emulators/results/reizman_suzuki_case_4.png differ diff --git a/scripts/train_emulators/results/reizman_suzuki_scores.csv b/scripts/train_emulators/results/reizman_suzuki_scores.csv index da869f97..559a6e4e 100644 --- a/scripts/train_emulators/results/reizman_suzuki_scores.csv +++ b/scripts/train_emulators/results/reizman_suzuki_scores.csv @@ -1,5 +1,5 @@ case,avg_fit_time,avg_score_time,avg_val_r2,avg_val_neg_root_mean_squared_error,avg_test_r2,avg_test_neg_root_mean_squared_error -case_1,10.348453617095947,0.00654149055480957,0.8295508810399357,-10.816744422912597,0.9307952086976204,-7.498236728819775 -case_2,8.928261852264404,0.005949831008911133,0.6182039897632764,-5.401305246353149,0.6746129777462376,-4.910729931080706 -case_3,10.123983812332153,0.005974340438842774,0.7380188495373521,-13.788355827331543,0.8379006941956488,-12.066490239891527 -case_4,9.60025577545166,0.007497835159301758,0.7013988693185367,-15.903024101257325,0.736780456428538,-13.980203005904974 +case_1,8.6258864402771,0.005854988098144531,0.8212155526088756,-11.137054443359375,0.9295815380048114,-7.543151896248529 +case_2,8.79561619758606,0.00565180778503418,0.605790245122981,-5.384605264663696,0.6605449615937999,-4.99218215483449 +case_3,8.240145397186279,0.00576167106628418,0.7760751544224982,-12.907710647583007,0.842104321811395,-11.899007938905271 +case_4,8.307093811035156,0.005579137802124023,0.7043271376927565,-15.66837387084961,0.7326339105287581,-14.055312980735687 diff --git a/scripts/train_emulators/train_emulators.py b/scripts/train_emulators/train_emulators.py index c3f62e7d..8099c7d0 100644 --- a/scripts/train_emulators/train_emulators.py +++ b/scripts/train_emulators/train_emulators.py @@ -6,7 +6,7 @@ import logging import pkg_resources import pathlib -from tqdm import trange +from tqdm import trange, tqdm import argparse DATA_PATH = pathlib.Path(pkg_resources.resource_filename("summit", "benchmarks/data")) @@ -74,37 +74,42 @@ def train_one_reizman(case, show_plots=False, save_plots=True): def train_baumgartner(show_plots=False): # Train model using one-hot encoding for categorical - print("Training Baumgartner model") - result = train_baumgartner_no_descriptors() + results = [ + _train_baumgartner(use_descriptors=include) + for include in tqdm([False, True], desc="Baumgartner") + ] results_average = [ {f"avg_{score_name}": scores.mean() for score_name, scores in result.items()} + for result in results ] - index = ["one-hot"] + index = ["one-hot", "descriptors"] results_df = pd.DataFrame.from_records(results_average, index=index) results_df.index.rename("case", inplace=True) results_df.to_csv(f"results/baumgartner_aniline_cn_crosscoupling_scores.csv") -def train_baumgartner_no_descriptors(show_plots=False, save_plots=True): +def _train_baumgartner(use_descriptors=False, show_plots=False, save_plots=True): # Setup model_name = f"baumgartner_aniline_cn_crosscoupling" domain = BaumgartnerCrossCouplingEmulator.setup_domain() ds = DataSet.read_csv(DATA_PATH / f"{model_name}.csv") # Create emulator and train + model_name += "_descriptors" if use_descriptors else "" exp = ExperimentalEmulator( model_name, domain, dataset=ds, regressor=ANNRegressor, output_variable_names=["yield"], + descriptors_features=["catalyst", "base"] if use_descriptors else [], ) res = exp.train( max_epochs=MAX_EPOCHS, cv_folds=CV_FOLDS, random_state=100, test_size=0.2 ) - # # Run test + # Run test res_test = exp.test() res.update(res_test) diff --git a/summit/benchmarks/experimental_emulator.py b/summit/benchmarks/experimental_emulator.py index 11416de2..781d5555 100644 --- a/summit/benchmarks/experimental_emulator.py +++ b/summit/benchmarks/experimental_emulator.py @@ -19,7 +19,13 @@ ParameterGrid, ) from sklearn.model_selection._search import BaseSearchCV, _check_param_grid -from sklearn.base import BaseEstimator, RegressorMixin, is_classifier, clone +from sklearn.base import ( + BaseEstimator, + RegressorMixin, + is_classifier, + clone, + TransformerMixin, +) from sklearn.model_selection._split import check_cv from sklearn.model_selection._validation import ( _fit_and_score, @@ -37,6 +43,8 @@ from sklearn.utils.fixes import delayed from sklearn.metrics._scorer import _check_multimetric_scoring +from scipy.sparse import issparse + from tqdm.auto import tqdm from joblib import Parallel import pathlib @@ -85,13 +93,41 @@ class ExperimentalEmulator(Experiment): output_variable_names : str or list, optional The names of the variables that should be trained by the predictor. Defaults to all objectives in the domain. - clip : bool or list + descriptors_features : list, optional + A list of input categorical variable names that should be transformed + into their descriptors instead of using one-hot encoding. + clip : bool or list, optional Whether to clip predictions to the limits of the objectives in the domain. True (default) means clipping is activated for all outputs and False means it is not activated at all. A list of specific outputs to clip can also be passed. + Notes + ----- + By default, categorical features are pre-processed using one-hot encoding. + If descriptors are avaialble, they can be used on a feature-by-feature basis + by specifying names of categorical variables in the descriptors_features keyword + argument. + + Examples + -------- + >>> from summit.benchmarks import ExperimentalEmulator, ReizmanSuzukiEmulator + >>> from summit.utils.dataset import DataSet + >>> import matplotlib.pyplot as plt + >>> import pathlib + >>> import pkg_resources + >>> # Steal domain and ata from Reizman example + >>> DATA_PATH = pathlib.Path(pkg_resources.resource_filename("summit", "benchmarks/data")) + >>> model_name = f"reizman_suzuki_case_1" + >>> domain = ReizmanSuzukiEmulator.setup_domain() + >>> ds = DataSet.read_csv(DATA_PATH / f"{model_name}.csv") + >>> # Create emulator and train + >>> exp = ExperimentalEmulator(model_name,domain,dataset=ds) + >>> res = exp.train(max_epochs=1000, cv_folds=2, random_state=100, test_size=0.2) + >>> fig, ax = exp.parity_plot(include_test=True) + >>> plt.show() + """ def __init__(self, model_name, domain, **kwargs): @@ -100,8 +136,11 @@ def __init__(self, model_name, domain, **kwargs): # Data self.ds = kwargs.get("dataset") + self.descriptors_features = kwargs.get("descriptors_features", []) if self.ds is not None: - self.n_features = self._caclulate_input_dimensions(self.domain) + self.n_features = self._caclulate_input_dimensions( + self.domain, self.descriptors_features + ) self.n_examples = self.ds.shape[0] self.output_variable_names = kwargs.get( @@ -199,6 +238,7 @@ def train(self, **kwargs): self.n_features, self.n_examples, output_variable_names=self.output_variable_names, + descriptors_features=self.descriptors_features, **kwargs, ) @@ -289,7 +329,7 @@ def _create_predictor( output_variable_names = kwargs.get( "output_variable_names", [v.name for v in domain.output_variables] ) - X_preprocessor = cls._create_input_preprocessor(domain) + X_preprocessor = cls._create_input_preprocessor(domain, **kwargs) y_preprocessor = cls._create_output_preprocessor(output_variable_names) # Create network @@ -322,26 +362,34 @@ def _create_predictor( ] ) - # output_pipeline = Pipeline( - # steps=[("scaler", StandardScaler()), ("dst", ds_to_tensor)] - # ) - return UpdatedTransformedTargetRegressor( regressor=pipe, transformer=StandardScaler(), check_inverse=False ) @staticmethod - def _caclulate_input_dimensions(domain): + def _caclulate_input_dimensions(domain: Domain, descriptors_features): num_dimensions = 0 for v in domain.input_variables: if v.variable_type == "continuous": num_dimensions += 1 elif v.variable_type == "categorical": - num_dimensions += len(v.levels) + if v.name in descriptors_features: + if v.ds is not None: + num_dimensions += len(v.ds.data_columns) + else: + raise DomainError( + ( + f"Descriptors not available for {v.name})," + f" but it is list in descriptors_features." + "Make sure descriptors is set on the categorical variable." + ) + ) + else: + num_dimensions += len(v.levels) return num_dimensions @staticmethod - def _create_input_preprocessor(domain): + def _create_input_preprocessor(domain, **kwargs): """Create feature preprocessors """ transformers = [] # Numeric transforms @@ -352,23 +400,45 @@ def _create_input_preprocessor(domain): transformers.append(("num", StandardScaler(), numeric_features)) # Categorical transforms + descriptors_features = kwargs.get("descriptors_features", []) categorical_features = [ - v.name for v in domain.input_variables if v.variable_type == "categorical" + v.name + for v in domain.input_variables + if (v.variable_type == "categorical") + and (v.name not in descriptors_features) ] categories = [ - v.levels for v in domain.input_variables if v.variable_type == "categorical" + v.levels + for v in domain.input_variables + if (v.variable_type == "categorical") + and (v.name not in descriptors_features) ] if len(categorical_features) > 0: transformers.append( ("cat", OneHotEncoder(categories=categories), categorical_features) ) + if len(descriptors_features) > 0: + datasets = [ + v.ds for v in domain.input_variables if v.name in descriptors_features + ] + transformers.append( + ( + "des", + DescriptorEncoder(datasets=datasets), + descriptors_features, + ) + ) # Create preprocessor if len(numeric_features) == 0 and len(categorical_features) > 0: raise DomainError( "With only categorical features, you can do a simple lookup." ) - elif len(numeric_features) > 0 or len(categorical_features) > 0: + elif ( + len(numeric_features) > 0 + or len(categorical_features) > 0 + or len(descriptors_features) > 0 + ): preprocessor = ColumnTransformer(transformers=transformers) else: raise DomainError( @@ -406,6 +476,7 @@ def to_dict(self, **experiment_params): "regressor_name": str(self.regressor.__name__), "n_features": self.n_features, "n_examples": self.n_examples, + "descriptors_features": self.descriptors_features, "output_variable_names": self.output_variable_names, "predictors": predictors, } @@ -415,7 +486,6 @@ def to_dict(self, **experiment_params): @staticmethod def _create_predictor_dict(predictor): num = predictor.regressor_.named_steps.preprocessor.named_transformers_.num - cat = predictor.regressor_.named_steps.preprocessor.named_transformers_.cat input_preprocessor = { # Numerical "num": { @@ -424,7 +494,7 @@ def _create_predictor_dict(predictor): "scale_": num.scale_, "n_samples_seen_": num.n_samples_seen_, } - # Categorical is automatic from the domain + # Categorical and descriptors is automatic from the domain / kwargs } out = predictor.transformer_ output_preprocessor = { @@ -467,6 +537,7 @@ def from_dict(cls, d, **kwargs): params["n_features"], params["n_examples"], output_variable_names=params["output_variable_names"], + descriptors_features=params["descriptors_features"], ) for predictor_params in predictors_params ] @@ -498,7 +569,6 @@ def from_dict(cls, d, **kwargs): def set_predictor_params(predictor, predictor_params): # Input transforms num = predictor.regressor_.named_steps.preprocessor.named_transformers_.num - cat = predictor.regressor_.named_steps.preprocessor.named_transformers_.cat input_preprocessor = RecursiveNamespace( **predictor_params["input_preprocessor"] ) @@ -683,6 +753,7 @@ def make_parity_plot( min = np.min(np.concatenate([y_train, y_train_pred])) max = np.max(np.concatenate([y_train, y_train_pred])) ax.plot([min, max], [min, max], c="#747378") + # Scores handles = [] r2_train = r2_score(y_train, y_train_pred) @@ -711,9 +782,109 @@ def make_parity_plot( def numpy_to_tensor(X): """Convert datasets into """ + if issparse(X): + X = X.todense() return torch.tensor(X).float() +class DescriptorEncoder(StandardScaler): + """ + Convert categorical variables to descriptors. + + Parameters + ----------- + datasets : list of DataSet + The dataset in datasets[i] should contain an index + matching the label in column i of X. + copy : bool, default=True + If False, try to avoid a copy and do inplace scaling instead. + This is not guaranteed to always work inplace; e.g. if the data is + not a NumPy array or scipy.sparse CSR matrix, a copy may still be + returned. + with_mean : bool, default=True + If True, center the data before scaling. + This does not work (and will raise an exception) when attempted on + sparse matrices, because centering them entails building a dense + matrix which in common use cases is likely to be too large to fit in + memory. + with_std : bool, default=True + If True, scale the data to unit variance (or equivalently, + unit standard deviation). + + Attributes + ---------- + scale_ : ndarray of shape (n_features,) or None + Per feature relative scaling of the data to achieve zero mean and unit + variance. Generally this is calculated using `np.sqrt(var_)`. If a + variance is zero, we can't achieve unit variance, and the data is left + as-is, giving a scaling factor of 1. `scale_` is equal to `None` + when `with_std=False`. + .. versionadded:: 0.17 + *scale_* + mean_ : ndarray of shape (n_features,) or None + The mean value for each feature in the training set. + Equal to ``None`` when ``with_mean=False``. + var_ : ndarray of shape (n_features,) or None + The variance for each feature in the training set. Used to compute + `scale_`. Equal to ``None`` when ``with_std=False``. + n_samples_seen_ : int or ndarray of shape (n_features,) + The number of samples processed by the estimator for each feature. + If there are no missing samples, the ``n_samples_seen`` will be an + integer, otherwise it will be an array of dtype int. If + `sample_weights` are used it will be a float (if no missing data) + or an array of dtype float that sums the weights seen so far. + Will be reset on new calls to fit, but increments across + ``partial_fit`` calls. + + + """ + + @_deprecate_positional_args + def __init__(self, datasets, *, copy=True, with_mean=True, with_std=True): + self.datasets = datasets + super().__init__(copy=copy, with_mean=with_mean, with_std=with_std) + + def fit(self, X, y=None, sample_weight=None): + X_new = self._cat_to_descriptor(X, self.datasets) + return super().fit(X_new, y=y, sample_weight=sample_weight) + + def transform(self, X, copy=None): + X_new = self._cat_to_descriptor(X, self.datasets) + return super().transform(X_new, copy=copy) + + def inverse_transform(self, X, copy=None): + raise NotImplementedError( + "Inverse transform not implemented for DescriptorsEncoder" + ) + + @staticmethod + def _cat_to_descriptor(X, datasets): + """Convert categorical variables into descriptors + + Parameters + ---------- + X : np.ndarray + An array of labels to be converted to descriptors + datasets : list of DataSet + The dataset in datasets[i] should contain an index + matching the label in column i of X. + """ + + n_descriptors = sum([len(ds.data_columns) for ds in datasets]) + X_new = np.zeros([X.shape[0], n_descriptors]) + col = 0 + for i, ds in enumerate(datasets): + if type(X) == pd.DataFrame: + labels = X.iloc[:, i] + else: + labels = X[:, i] + descriptors = ds.loc[labels, :].data_to_numpy() + n_descriptors = descriptors.shape[1] + X_new[:, col : col + n_descriptors] = descriptors + col += n_descriptors + return X_new + + class UpdatedTransformedTargetRegressor(TransformedTargetRegressor): def fit(self, X, y, **fit_params): """Fit the model according to the given training data. @@ -1192,6 +1363,22 @@ def get_model_path(): def get_pretrained_reizman_suzuki_emulator(case=1): + """Get the pretrained Reziman Suzuki Emulator + + Parameters + ---------- + case: int, optional, default=1 + Reizman et al. (2016) reported experimental data for 4 different + cases. Each case was has a different set of substrates but the + same possible catalysts. Please see their paper for more information on the cases. + + + Examples + --------- + + >>> exp = get_pretrained_reizman_suzuki_emulator(case=1) + + """ model_name = f"reizman_suzuki_case_{case}" model_path = get_model_path() / model_name if not model_path.exists(): @@ -1209,6 +1396,8 @@ class ReizmanSuzukiEmulator(ExperimentalEmulator): similar to Reizman et al. (2016). Experimental outcomes are based on an emulator that is trained on the experimental data published by Reizman et al. + You should use get_pretrained_reizman_suzuki_emulator to get a pretrained verison. + Parameters ---------- case: int, optional, default=1 @@ -1224,6 +1413,7 @@ class ReizmanSuzukiEmulator(ExperimentalEmulator): ----- This benchmark is based on data from [Reizman]_ et al. + References ---------- .. [Reizman] B. J. Reizman et al., React. Chem. Eng., 2016, 1, 658–666. @@ -1310,15 +1500,34 @@ def to_dict(self): return super().to_dict(**experiment_params) -def get_pretrained_baumgartner_cc_emulator(include_cost=False): +def get_pretrained_baumgartner_cc_emulator(include_cost=False, use_descriptors=False): + """Get a pretrained BaumgartnerCrossCouplingEmulator + + Parameters + ---------- + include_cost : bool, optional + Include minimization of cost as an extra objective. Cost is calculated + as a deterministic function of the inputs (i.e., no model is trained). + Defaults to False. + use_descriptors : bool, optional + Use descriptors for the catalyst and base instead of one-hot encoding (defaults to False). T + The descriptors been pre-calculated using COSMO-RS. To only use descriptors with + a single feature, pass descriptors_features a list where + the only item is the name of the desired categorical variable. + + """ model_name = "baumgartner_aniline_cn_crosscoupling" model_path = get_model_path() / model_name if not model_path.exists(): raise NotADirectoryError("Could not initialize from expected path.") data_path = get_data_path() ds = DataSet.read_csv(data_path / f"{model_name}.csv") - exp = BaumgartnerCrossCouplingEmulator.load(model_path, dataset=ds) - + exp = BaumgartnerCrossCouplingEmulator.load( + model_path, + dataset=ds, + include_cost=include_cost, + use_descriptors=use_descriptors, + ) return exp @@ -1335,12 +1544,19 @@ class BaumgartnerCrossCouplingEmulator(ExperimentalEmulator): The categorical variables (catalyst and base) contain descriptors calculated using COSMO-RS. Specifically, the descriptors are the first two sigma moments. + To use the pretrained version, call get_pretrained_baumgartner_cc_emulator + Parameters ---------- include_cost : bool, optional Include minimization of cost as an extra objective. Cost is calculated as a deterministic function of the inputs (i.e., no model is trained). Defaults to False. + use_descriptors : bool, optional + Use descriptors for the catalyst and base instead of one-hot encoding (defaults to False). T + The descriptors been pre-calculated using COSMO-RS. To only use descriptors with + a single feature, pass descriptors_features a list where + the only item is the name of the desired categorical variable. Examples -------- @@ -1358,12 +1574,14 @@ class BaumgartnerCrossCouplingEmulator(ExperimentalEmulator): """ - def __init__(self, include_cost=False, **kwargs): + def __init__(self, include_cost=False, use_descriptors=False, **kwargs): # TODO: make it possible to select model based on one-hot encoding or descriptors model_name = kwargs.pop("model_name", "baumgartner_aniline_cn_crosscoupling") self.include_cost = include_cost + if use_descriptors: + descriptors_features = ["catalyst", "base"] + kwargs["descriptors_features"] = descriptors_features domain = kwargs.pop("domain", self.setup_domain(self.include_cost)) - data_path = get_data_path() super().__init__(model_name, domain, **kwargs) @staticmethod @@ -1446,7 +1664,7 @@ def setup_domain(include_cost=False): return domain @classmethod - def load(cls, save_dir, **kwargs): + def load(cls, save_dir, include_cost=False, use_descriptors=False, **kwargs): """Load all the essential parameters of the BaumgartnerCrossCouplingEmulator from disc @@ -1457,7 +1675,13 @@ def load(cls, save_dir, **kwargs): """ model_name = "baumgartner_aniline_cn_crosscoupling" - return super().load(model_name, save_dir, **kwargs) + save_dir = pathlib.Path(save_dir) + with open(save_dir / f"{model_name}.json", "r") as f: + d = json.load(f) + d["experiment_params"]["include_cost"] = include_cost + exp = ExperimentalEmulator.from_dict(d, **kwargs) + exp.load_regressor(save_dir) + return exp def _run(self, conditions, **kwargs): conditions, _ = super()._run(conditions=conditions, **kwargs) diff --git a/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling/baumgartner_aniline_cn_crosscoupling.json b/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling/baumgartner_aniline_cn_crosscoupling.json index 9a0c7f2f..bff08445 100644 --- a/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling/baumgartner_aniline_cn_crosscoupling.json +++ b/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling/baumgartner_aniline_cn_crosscoupling.json @@ -1 +1,366 @@ -{"domain": [{"type": "CategoricalVariable", "is_objective": false, "name": "catalyst", "description": "Catalyst type", "units": null, "levels": ["tBuXPhos", "tBuBrettPhos", "AlPhos"], "ds": {"index": ["tBuXPhos", "tBuBrettPhos", "AlPhos"], "columns": [["area_cat", "DATA"], ["M2_cat", "DATA"]], "data": [[460.7543, 67.2057], [518.8408, 89.8738], [819.933, 129.0808]]}}, {"type": "CategoricalVariable", "is_objective": false, "name": "base", "description": "Base", "units": null, "levels": ["DBU", "BTMG", "TMG", "TEA"], "ds": {"index": ["TEA", "TMG", "BTMG", "DBU"], "columns": [["area", "DATA"], ["M2", "DATA"]], "data": [[162.2992, 25.8165], [165.5447, 81.4847], [227.3523, 30.554], [192.4693, 59.8367]]}}, {"type": "ContinuousVariable", "is_objective": false, "name": "base_equivalents", "description": "Base equivalents", "units": null, "bounds": [1.0, 2.5]}, {"type": "ContinuousVariable", "is_objective": false, "name": "temperature", "description": "Temperature in degrees Celsius (\u00baC)", "units": null, "bounds": [30.0, 100.0]}, {"type": "ContinuousVariable", "is_objective": false, "name": "t_res", "description": "residence time in seconds (s)", "units": null, "bounds": [60.0, 1800.0]}, {"type": "ContinuousVariable", "is_objective": true, "name": "yield", "description": "Yield", "units": null, "bounds": [0.0, 1.0]}], "name": "ExperimentalEmulator", "data": {"index": [], "columns": [["catalyst", "DATA"], ["base", "DATA"], ["base_equivalents", "DATA"], ["temperature", "DATA"], ["t_res", "DATA"], ["yield", "DATA"], ["computation_t", "METADATA"], ["experiment_t", "METADATA"], ["strategy", "METADATA"]], "data": []}, "experiment_params": {"model_name": "baumgartner_aniline_cn_crosscoupling", "regressor_name": "ANNRegressor", "n_features": 10, "n_examples": 96, "output_variable_names": ["yield"], "predictors": [{"input_preprocessor": {"num": {"mean_": [1.6559957171333333, 69.63333333333334, 675.2387380961666], "var_": [0.24093575016750415, 906.7308888888889, 253625.04696145264], "scale_": [0.4908520654611776, 30.11197251740392, 503.6120004144586], "n_samples_seen_": 60}}, "output_preprocessor": {"mean_": [0.5805532822851092], "var_": [0.1785550681951766], "scale_": [0.42255776906261777], "n_samples_seen_": 60}}, {"input_preprocessor": {"num": {"mean_": [1.683371605967213, 74.62295081967213, 715.8182047760656], "var_": [0.24120375329778332, 866.5532437516797, 223573.2988868711], "scale_": [0.49112498745002103, 29.43727643230059, 472.83538243967223], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [0.6254036210660563], "var_": [0.16832977557768586], "scale_": [0.4102801184284779], "n_samples_seen_": 61}}, {"input_preprocessor": {"num": {"mean_": [1.6615540930327868, 69.30163934426228, 681.7872910763934], "var_": [0.24927125858022928, 924.5657350174685, 269346.5438305566], "scale_": [0.49927072674074263, 30.40667254103067, 518.9860728676219], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [0.5703947171110844], "var_": [0.18538124205880477], "scale_": [0.43055922015305254], "n_samples_seen_": 61}}, {"input_preprocessor": {"num": {"mean_": [1.628605248, 72.78196721311477, 735.1206038822951], "var_": [0.24907596673949953, 907.5027895726955, 244282.01121683227], "scale_": [0.4990751113204299, 30.124786963108892, 494.24893648528194], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [0.6131021287750269], "var_": [0.17856050759065348], "scale_": [0.4225642052879698], "n_samples_seen_": 61}}, {"input_preprocessor": {"num": {"mean_": [1.6073154240819671, 73.0311475409836, 743.0736161352461], "var_": [0.2525966705755136, 848.4237839290512, 251362.03663802444], "scale_": [0.5025899626688873, 29.12771504819853, 501.3601865306263], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [0.564398883872467], "var_": [0.18049929808199283], "scale_": [0.4248520896523787], "n_samples_seen_": 61}}]}, "extras": []} \ No newline at end of file +{ + "domain" : [ + { + "type" : "CategoricalVariable", + "is_objective": false, + "name" : "catalyst", + "description" : "Catalyst type", + "units" : null, + "levels" : [ + "tBuXPhos", + "tBuBrettPhos", + "AlPhos" + ], + "ds" : { + "index" : [ + "tBuXPhos", + "tBuBrettPhos", + "AlPhos" + ], + "columns": [ + [ + "area_cat", + "DATA" + ], + [ + "M2_cat", + "DATA" + ] + ], + "data" : [ + [ + 460.7543, + 67.2057 + ], + [ + 518.8408, + 89.8738 + ], + [ + 819.933, + 129.0808 + ] + ] + } + }, + { + "type" : "CategoricalVariable", + "is_objective": false, + "name" : "base", + "description" : "Base", + "units" : null, + "levels" : [ + "DBU", + "BTMG", + "TMG", + "TEA" + ], + "ds" : { + "index" : [ + "TEA", + "TMG", + "BTMG", + "DBU" + ], + "columns": [ + [ + "area", + "DATA" + ], + [ + "M2", + "DATA" + ] + ], + "data" : [ + [ + 162.2992, + 25.8165 + ], + [ + 165.5447, + 81.4847 + ], + [ + 227.3523, + 30.554 + ], + [ + 192.4693, + 59.8367 + ] + ] + } + }, + { + "type" : "ContinuousVariable", + "is_objective": false, + "name" : "base_equivalents", + "description" : "Base equivalents", + "units" : null, + "bounds" : [ + 1.0, + 2.5 + ] + }, + { + "type" : "ContinuousVariable", + "is_objective": false, + "name" : "temperature", + "description" : "Temperature in degrees Celsius (\u00baC)", + "units" : null, + "bounds" : [ + 30.0, + 100.0 + ] + }, + { + "type" : "ContinuousVariable", + "is_objective": false, + "name" : "t_res", + "description" : "residence time in seconds (s)", + "units" : null, + "bounds" : [ + 60.0, + 1800.0 + ] + }, + { + "type" : "ContinuousVariable", + "is_objective": true, + "name" : "yield", + "description" : "Yield", + "units" : null, + "bounds" : [ + 0.0, + 1.0 + ] + } + ], + "name" : "ExperimentalEmulator", + "data" : { + "index" : [], + "columns": [ + [ + "catalyst", + "DATA" + ], + [ + "base", + "DATA" + ], + [ + "base_equivalents", + "DATA" + ], + [ + "temperature", + "DATA" + ], + [ + "t_res", + "DATA" + ], + [ + "yield", + "DATA" + ], + [ + "computation_t", + "METADATA" + ], + [ + "experiment_t", + "METADATA" + ], + [ + "strategy", + "METADATA" + ] + ], + "data" : [] + }, + "experiment_params": { + "model_name" : "baumgartner_aniline_cn_crosscoupling", + "regressor_name" : "ANNRegressor", + "n_features" : 10, + "n_examples" : 96, + "descriptors_features" : [], + "output_variable_names": [ + "yield" + ], + "predictors" : [ + { + "input_preprocessor" : { + "num": { + "mean_" : [ + 1.6559957171333333, + 69.63333333333334, + 675.2387380961666 + ], + "var_" : [ + 0.24093575016750415, + 906.7308888888889, + 253625.04696145264 + ], + "scale_" : [ + 0.4908520654611776, + 30.11197251740392, + 503.6120004144586 + ], + "n_samples_seen_": 60 + } + }, + "output_preprocessor": { + "mean_" : [ + 0.5805532822851092 + ], + "var_" : [ + 0.1785550681951766 + ], + "scale_" : [ + 0.42255776906261777 + ], + "n_samples_seen_": 60 + } + }, + { + "input_preprocessor" : { + "num": { + "mean_" : [ + 1.683371605967213, + 74.62295081967213, + 715.8182047760656 + ], + "var_" : [ + 0.24120375329778332, + 866.5532437516797, + 223573.2988868711 + ], + "scale_" : [ + 0.49112498745002103, + 29.43727643230059, + 472.83538243967223 + ], + "n_samples_seen_": 61 + } + }, + "output_preprocessor": { + "mean_" : [ + 0.6254036210660563 + ], + "var_" : [ + 0.16832977557768586 + ], + "scale_" : [ + 0.4102801184284779 + ], + "n_samples_seen_": 61 + } + }, + { + "input_preprocessor" : { + "num": { + "mean_" : [ + 1.6615540930327868, + 69.30163934426228, + 681.7872910763934 + ], + "var_" : [ + 0.24927125858022928, + 924.5657350174685, + 269346.5438305566 + ], + "scale_" : [ + 0.49927072674074263, + 30.40667254103067, + 518.9860728676219 + ], + "n_samples_seen_": 61 + } + }, + "output_preprocessor": { + "mean_" : [ + 0.5703947171110844 + ], + "var_" : [ + 0.18538124205880477 + ], + "scale_" : [ + 0.43055922015305254 + ], + "n_samples_seen_": 61 + } + }, + { + "input_preprocessor" : { + "num": { + "mean_" : [ + 1.628605248, + 72.78196721311477, + 735.1206038822951 + ], + "var_" : [ + 0.24907596673949953, + 907.5027895726955, + 244282.01121683227 + ], + "scale_" : [ + 0.4990751113204299, + 30.124786963108892, + 494.24893648528194 + ], + "n_samples_seen_": 61 + } + }, + "output_preprocessor": { + "mean_" : [ + 0.6131021287750269 + ], + "var_" : [ + 0.17856050759065348 + ], + "scale_" : [ + 0.4225642052879698 + ], + "n_samples_seen_": 61 + } + }, + { + "input_preprocessor" : { + "num": { + "mean_" : [ + 1.6073154240819671, + 73.0311475409836, + 743.0736161352461 + ], + "var_" : [ + 0.2525966705755136, + 848.4237839290512, + 251362.03663802444 + ], + "scale_" : [ + 0.5025899626688873, + 29.12771504819853, + 501.3601865306263 + ], + "n_samples_seen_": 61 + } + }, + "output_preprocessor": { + "mean_" : [ + 0.564398883872467 + ], + "var_" : [ + 0.18049929808199283 + ], + "scale_" : [ + 0.4248520896523787 + ], + "n_samples_seen_": 61 + } + } + ] + }, + "extras" : [] +} \ No newline at end of file diff --git a/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling/baumgartner_aniline_cn_crosscoupling_predictor_0.pt b/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling/baumgartner_aniline_cn_crosscoupling_predictor_0.pt index 241fd8e0..eeff0217 100644 Binary files a/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling/baumgartner_aniline_cn_crosscoupling_predictor_0.pt and b/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling/baumgartner_aniline_cn_crosscoupling_predictor_0.pt differ diff --git a/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling/baumgartner_aniline_cn_crosscoupling_predictor_1.pt b/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling/baumgartner_aniline_cn_crosscoupling_predictor_1.pt index 8cc71c18..295ec4e6 100644 Binary files a/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling/baumgartner_aniline_cn_crosscoupling_predictor_1.pt and b/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling/baumgartner_aniline_cn_crosscoupling_predictor_1.pt differ diff --git a/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling/baumgartner_aniline_cn_crosscoupling_predictor_2.pt b/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling/baumgartner_aniline_cn_crosscoupling_predictor_2.pt index deca7a13..02b3d93b 100644 Binary files a/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling/baumgartner_aniline_cn_crosscoupling_predictor_2.pt and b/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling/baumgartner_aniline_cn_crosscoupling_predictor_2.pt differ diff --git a/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling/baumgartner_aniline_cn_crosscoupling_predictor_3.pt b/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling/baumgartner_aniline_cn_crosscoupling_predictor_3.pt index 8136278e..3149d6b9 100644 Binary files a/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling/baumgartner_aniline_cn_crosscoupling_predictor_3.pt and b/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling/baumgartner_aniline_cn_crosscoupling_predictor_3.pt differ diff --git a/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling/baumgartner_aniline_cn_crosscoupling_predictor_4.pt b/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling/baumgartner_aniline_cn_crosscoupling_predictor_4.pt index 3f2577e5..7fc4cd66 100644 Binary files a/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling/baumgartner_aniline_cn_crosscoupling_predictor_4.pt and b/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling/baumgartner_aniline_cn_crosscoupling_predictor_4.pt differ diff --git a/summit/benchmarks/models/reizman_suzuki_case_1/reizman_suzuki_case_1.json b/summit/benchmarks/models/reizman_suzuki_case_1/reizman_suzuki_case_1.json index e3ee6557..1ba0ff44 100644 --- a/summit/benchmarks/models/reizman_suzuki_case_1/reizman_suzuki_case_1.json +++ b/summit/benchmarks/models/reizman_suzuki_case_1/reizman_suzuki_case_1.json @@ -1 +1 @@ -{"domain": [{"type": "CategoricalVariable", "is_objective": false, "name": "catalyst", "description": "Catalyst type - different ligands", "units": null, "levels": ["P1-L1", "P2-L1", "P1-L2", "P1-L3", "P1-L4", "P1-L5", "P1-L6", "P1-L7"], "ds": null}, {"type": "ContinuousVariable", "is_objective": false, "name": "t_res", "description": "Residence time in seconds (s)", "units": null, "bounds": [60.0, 600.0]}, {"type": "ContinuousVariable", "is_objective": false, "name": "temperature", "description": "Reactor temperature in degrees Celsius (\u00baC)", "units": null, "bounds": [30.0, 110.0]}, {"type": "ContinuousVariable", "is_objective": false, "name": "catalyst_loading", "description": "Catalyst loading in mol%", "units": null, "bounds": [0.5, 2.5]}, {"type": "ContinuousVariable", "is_objective": true, "name": "ton", "description": "Turnover number - moles product generated divided by moles catalyst used", "units": null, "bounds": [0.0, 200.0]}, {"type": "ContinuousVariable", "is_objective": true, "name": "yield", "description": "Yield", "units": null, "bounds": [0.0, 100.0]}], "name": "ExperimentalEmulator", "data": {"index": [], "columns": [["catalyst", "DATA"], ["t_res", "DATA"], ["temperature", "DATA"], ["catalyst_loading", "DATA"], ["ton", "DATA"], ["yield", "DATA"], ["computation_t", "METADATA"], ["experiment_t", "METADATA"], ["strategy", "METADATA"]], "data": []}, "experiment_params": {"model_name": "reizman_suzuki_case_1", "regressor_name": "ANNRegressor", "n_features": 11, "n_examples": 96, "output_variable_names": ["ton", "yield"], "predictors": [{"input_preprocessor": {"num": {"mean_": [354.02166666666665, 93.35666666666665, 1.5370833333333331], "var_": [62364.78669722223, 790.510788888889, 0.630418109722222], "scale_": [249.72942697492067, 28.116023703377564, 0.7939887340020776], "n_samples_seen_": 60}}, "output_preprocessor": {"mean_": [30.31333336258928, 41.88166637221972], "var_": [635.5938398516427, 1069.4477598033263], "scale_": [25.210986491044785, 32.70241214044197], "n_samples_seen_": 60}}, {"input_preprocessor": {"num": {"mean_": [326.60819672131146, 92.32622950819672, 1.5993606557377045], "var_": [61304.89124428917, 841.8547218489653, 0.6307227223864553], "scale_": [247.5982456405723, 29.014732841247483, 0.7941805351344587], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [28.045901633676934, 42.237704714790716], "var_": [608.6952533265118, 1152.2032867646853], "scale_": [24.671750106680957, 33.94412006172329], "n_samples_seen_": 61}}, {"input_preprocessor": {"num": {"mean_": [331.3311475409836, 90.84918032786884, 1.631934426229508], "var_": [63160.69001343723, 853.8743026068263, 0.6774574383230314], "scale_": [251.3179062729857, 29.22112767513989, 0.8230780268741423], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [26.267213071223164, 38.83934418881526], "var_": [548.3861460177625, 1099.3515893069962], "scale_": [23.41764603921074, 33.156471303608235], "n_samples_seen_": 61}}, {"input_preprocessor": {"num": {"mean_": [346.4393442622951, 95.5344262295082, 1.616327868852459], "var_": [60173.75156678312, 662.3563558183281, 0.6452992039774254], "scale_": [245.30338678212968, 25.736284809939605, 0.8033051748728035], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [30.268852538016976, 46.137704603007585], "var_": [631.0283785378102, 1125.974178376745], "scale_": [25.120278233686232, 33.55553871385088], "n_samples_seen_": 61}}, {"input_preprocessor": {"num": {"mean_": [348.5508196721312, 90.172131147541, 1.6511147540983604], "var_": [62814.376597688795, 855.4459446385379, 0.6407999048642837], "scale_": [250.6279645165096, 29.24800753279679, 0.8004997844248828], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [27.61475420022597, 39.5245899352871], "var_": [627.0747155828524, 1018.971371868839], "scale_": [25.041459933135936, 31.921330985233666], "n_samples_seen_": 61}}]}, "extras": []} \ No newline at end of file +{"domain": [{"type": "CategoricalVariable", "is_objective": false, "name": "catalyst", "description": "Catalyst type - different ligands", "units": null, "levels": ["P1-L1", "P2-L1", "P1-L2", "P1-L3", "P1-L4", "P1-L5", "P1-L6", "P1-L7"], "ds": null}, {"type": "ContinuousVariable", "is_objective": false, "name": "t_res", "description": "Residence time in seconds (s)", "units": null, "bounds": [60.0, 600.0]}, {"type": "ContinuousVariable", "is_objective": false, "name": "temperature", "description": "Reactor temperature in degrees Celsius (\u00baC)", "units": null, "bounds": [30.0, 110.0]}, {"type": "ContinuousVariable", "is_objective": false, "name": "catalyst_loading", "description": "Catalyst loading in mol%", "units": null, "bounds": [0.5, 2.5]}, {"type": "ContinuousVariable", "is_objective": true, "name": "ton", "description": "Turnover number - moles product generated divided by moles catalyst used", "units": null, "bounds": [0.0, 200.0]}, {"type": "ContinuousVariable", "is_objective": true, "name": "yield", "description": "Yield", "units": null, "bounds": [0.0, 100.0]}], "name": "ExperimentalEmulator", "data": {"index": [], "columns": [["catalyst", "DATA"], ["t_res", "DATA"], ["temperature", "DATA"], ["catalyst_loading", "DATA"], ["ton", "DATA"], ["yield", "DATA"], ["computation_t", "METADATA"], ["experiment_t", "METADATA"], ["strategy", "METADATA"]], "data": []}, "experiment_params": {"model_name": "reizman_suzuki_case_1", "regressor_name": "ANNRegressor", "n_features": 11, "n_examples": 96, "descriptors_features": [], "output_variable_names": ["ton", "yield"], "predictors": [{"input_preprocessor": {"num": {"mean_": [354.02166666666665, 93.35666666666665, 1.5370833333333331], "var_": [62364.78669722223, 790.510788888889, 0.630418109722222], "scale_": [249.72942697492067, 28.116023703377564, 0.7939887340020776], "n_samples_seen_": 60}}, "output_preprocessor": {"mean_": [30.31333336258928, 41.88166637221972], "var_": [635.5938398516427, 1069.4477598033263], "scale_": [25.210986491044785, 32.70241214044197], "n_samples_seen_": 60}}, {"input_preprocessor": {"num": {"mean_": [326.60819672131146, 92.32622950819672, 1.5993606557377045], "var_": [61304.89124428917, 841.8547218489653, 0.6307227223864553], "scale_": [247.5982456405723, 29.014732841247483, 0.7941805351344587], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [28.045901633676934, 42.237704714790716], "var_": [608.6952533265118, 1152.2032867646853], "scale_": [24.671750106680957, 33.94412006172329], "n_samples_seen_": 61}}, {"input_preprocessor": {"num": {"mean_": [331.3311475409836, 90.84918032786884, 1.631934426229508], "var_": [63160.69001343723, 853.8743026068263, 0.6774574383230314], "scale_": [251.3179062729857, 29.22112767513989, 0.8230780268741423], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [26.267213071223164, 38.83934418881526], "var_": [548.3861460177625, 1099.3515893069962], "scale_": [23.41764603921074, 33.156471303608235], "n_samples_seen_": 61}}, {"input_preprocessor": {"num": {"mean_": [346.4393442622951, 95.5344262295082, 1.616327868852459], "var_": [60173.75156678312, 662.3563558183281, 0.6452992039774254], "scale_": [245.30338678212968, 25.736284809939605, 0.8033051748728035], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [30.268852538016976, 46.137704603007585], "var_": [631.0283785378102, 1125.974178376745], "scale_": [25.120278233686232, 33.55553871385088], "n_samples_seen_": 61}}, {"input_preprocessor": {"num": {"mean_": [348.5508196721312, 90.172131147541, 1.6511147540983604], "var_": [62814.376597688795, 855.4459446385379, 0.6407999048642837], "scale_": [250.6279645165096, 29.24800753279679, 0.8004997844248828], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [27.61475420022597, 39.5245899352871], "var_": [627.0747155828524, 1018.971371868839], "scale_": [25.041459933135936, 31.921330985233666], "n_samples_seen_": 61}}]}, "extras": []} \ No newline at end of file diff --git a/summit/benchmarks/models/reizman_suzuki_case_1/reizman_suzuki_case_1_predictor_0.pt b/summit/benchmarks/models/reizman_suzuki_case_1/reizman_suzuki_case_1_predictor_0.pt index 29132ecb..7596423e 100644 Binary files a/summit/benchmarks/models/reizman_suzuki_case_1/reizman_suzuki_case_1_predictor_0.pt and b/summit/benchmarks/models/reizman_suzuki_case_1/reizman_suzuki_case_1_predictor_0.pt differ diff --git a/summit/benchmarks/models/reizman_suzuki_case_1/reizman_suzuki_case_1_predictor_1.pt b/summit/benchmarks/models/reizman_suzuki_case_1/reizman_suzuki_case_1_predictor_1.pt index ac196fd1..60089e32 100644 Binary files a/summit/benchmarks/models/reizman_suzuki_case_1/reizman_suzuki_case_1_predictor_1.pt and b/summit/benchmarks/models/reizman_suzuki_case_1/reizman_suzuki_case_1_predictor_1.pt differ diff --git a/summit/benchmarks/models/reizman_suzuki_case_1/reizman_suzuki_case_1_predictor_2.pt b/summit/benchmarks/models/reizman_suzuki_case_1/reizman_suzuki_case_1_predictor_2.pt index c0a4d249..69d01501 100644 Binary files a/summit/benchmarks/models/reizman_suzuki_case_1/reizman_suzuki_case_1_predictor_2.pt and b/summit/benchmarks/models/reizman_suzuki_case_1/reizman_suzuki_case_1_predictor_2.pt differ diff --git a/summit/benchmarks/models/reizman_suzuki_case_1/reizman_suzuki_case_1_predictor_3.pt b/summit/benchmarks/models/reizman_suzuki_case_1/reizman_suzuki_case_1_predictor_3.pt index 8d74b0b9..b3b6433f 100644 Binary files a/summit/benchmarks/models/reizman_suzuki_case_1/reizman_suzuki_case_1_predictor_3.pt and b/summit/benchmarks/models/reizman_suzuki_case_1/reizman_suzuki_case_1_predictor_3.pt differ diff --git a/summit/benchmarks/models/reizman_suzuki_case_1/reizman_suzuki_case_1_predictor_4.pt b/summit/benchmarks/models/reizman_suzuki_case_1/reizman_suzuki_case_1_predictor_4.pt index dfad7dbb..406e5de5 100644 Binary files a/summit/benchmarks/models/reizman_suzuki_case_1/reizman_suzuki_case_1_predictor_4.pt and b/summit/benchmarks/models/reizman_suzuki_case_1/reizman_suzuki_case_1_predictor_4.pt differ diff --git a/summit/benchmarks/models/reizman_suzuki_case_2/reizman_suzuki_case_2.json b/summit/benchmarks/models/reizman_suzuki_case_2/reizman_suzuki_case_2.json index 9eef1605..3b7ded58 100644 --- a/summit/benchmarks/models/reizman_suzuki_case_2/reizman_suzuki_case_2.json +++ b/summit/benchmarks/models/reizman_suzuki_case_2/reizman_suzuki_case_2.json @@ -1 +1 @@ -{"domain": [{"type": "CategoricalVariable", "is_objective": false, "name": "catalyst", "description": "Catalyst type - different ligands", "units": null, "levels": ["P1-L1", "P2-L1", "P1-L2", "P1-L3", "P1-L4", "P1-L5", "P1-L6", "P1-L7"], "ds": null}, {"type": "ContinuousVariable", "is_objective": false, "name": "t_res", "description": "Residence time in seconds (s)", "units": null, "bounds": [60.0, 600.0]}, {"type": "ContinuousVariable", "is_objective": false, "name": "temperature", "description": "Reactor temperature in degrees Celsius (\u00baC)", "units": null, "bounds": [30.0, 110.0]}, {"type": "ContinuousVariable", "is_objective": false, "name": "catalyst_loading", "description": "Catalyst loading in mol%", "units": null, "bounds": [0.5, 2.5]}, {"type": "ContinuousVariable", "is_objective": true, "name": "ton", "description": "Turnover number - moles product generated divided by moles catalyst used", "units": null, "bounds": [0.0, 200.0]}, {"type": "ContinuousVariable", "is_objective": true, "name": "yield", "description": "Yield", "units": null, "bounds": [0.0, 100.0]}], "name": "ExperimentalEmulator", "data": {"index": [], "columns": [["catalyst", "DATA"], ["t_res", "DATA"], ["temperature", "DATA"], ["catalyst_loading", "DATA"], ["ton", "DATA"], ["yield", "DATA"], ["computation_t", "METADATA"], ["experiment_t", "METADATA"], ["strategy", "METADATA"]], "data": []}, "experiment_params": {"model_name": "reizman_suzuki_case_2", "regressor_name": "ANNRegressor", "n_features": 11, "n_examples": 96, "output_variable_names": ["ton", "yield"], "predictors": [{"input_preprocessor": {"num": {"mean_": [388.87166666666667, 85.14333333333335, 2.1326333333333336], "var_": [51629.748030555544, 1059.4557888888892, 0.4159331655555555], "scale_": [227.22180359850051, 32.54928246350277, 0.6449288065791103], "n_samples_seen_": 60}}, "output_preprocessor": {"mean_": [6.178333353375395, 13.89833338521421], "var_": [38.187364444819586, 204.10316777823803], "scale_": [6.179592579193193, 14.286467995212744], "n_samples_seen_": 60}}, {"input_preprocessor": {"num": {"mean_": [376.4754098360656, 85.4, 2.011622950819673], "var_": [53202.64152647137, 1135.808524590164, 0.5176740053748993], "scale_": [230.65697805718207, 33.70175847919755, 0.7194956604281219], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [5.442622941659122, 11.96721318461856], "var_": [34.401789025483716, 181.47925989085533], "scale_": [5.865303830619835, 13.471423825670964], "n_samples_seen_": 61}}, {"input_preprocessor": {"num": {"mean_": [370.2770491803279, 87.06885245901641, 2.053114754098361], "var_": [52169.36930932545, 1016.805751142166, 0.513469937651169], "scale_": [228.40614989383593, 31.88739172685916, 0.7165681667860839], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [5.681967221689029, 12.68852458210265], "var_": [34.88410335211236, 185.96397087625303], "scale_": [5.9062766064681025, 13.636860741250276], "n_samples_seen_": 61}}, {"input_preprocessor": {"num": {"mean_": [408.0852459016394, 83.91147540983607, 2.1295245901639346], "var_": [49406.179946251, 1074.2885568395593, 0.4012853969363074], "scale_": [222.27500972050592, 32.77634141937686, 0.6334709124626855], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [6.513114772004182, 14.72131149825014], "var_": [38.77622134464442, 216.8098759612099], "scale_": [6.227055591902518, 14.724465218173796], "n_samples_seen_": 61}}, {"input_preprocessor": {"num": {"mean_": [365.5311475409836, 89.45245901639345, 2.0956065573770495], "var_": [51459.27230851923, 890.1831496909433, 0.4506949599570008], "scale_": [226.84636278441678, 29.835937218243092, 0.6713381859815519], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [5.616393469396185, 12.532786916758193], "var_": [32.921700561709216, 176.46679211664394], "scale_": [5.737743507835568, 13.284080401617718], "n_samples_seen_": 61}}]}, "extras": []} \ No newline at end of file +{"domain": [{"type": "CategoricalVariable", "is_objective": false, "name": "catalyst", "description": "Catalyst type - different ligands", "units": null, "levels": ["P1-L1", "P2-L1", "P1-L2", "P1-L3", "P1-L4", "P1-L5", "P1-L6", "P1-L7"], "ds": null}, {"type": "ContinuousVariable", "is_objective": false, "name": "t_res", "description": "Residence time in seconds (s)", "units": null, "bounds": [60.0, 600.0]}, {"type": "ContinuousVariable", "is_objective": false, "name": "temperature", "description": "Reactor temperature in degrees Celsius (\u00baC)", "units": null, "bounds": [30.0, 110.0]}, {"type": "ContinuousVariable", "is_objective": false, "name": "catalyst_loading", "description": "Catalyst loading in mol%", "units": null, "bounds": [0.5, 2.5]}, {"type": "ContinuousVariable", "is_objective": true, "name": "ton", "description": "Turnover number - moles product generated divided by moles catalyst used", "units": null, "bounds": [0.0, 200.0]}, {"type": "ContinuousVariable", "is_objective": true, "name": "yield", "description": "Yield", "units": null, "bounds": [0.0, 100.0]}], "name": "ExperimentalEmulator", "data": {"index": [], "columns": [["catalyst", "DATA"], ["t_res", "DATA"], ["temperature", "DATA"], ["catalyst_loading", "DATA"], ["ton", "DATA"], ["yield", "DATA"], ["computation_t", "METADATA"], ["experiment_t", "METADATA"], ["strategy", "METADATA"]], "data": []}, "experiment_params": {"model_name": "reizman_suzuki_case_2", "regressor_name": "ANNRegressor", "n_features": 11, "n_examples": 96, "descriptors_features": [], "output_variable_names": ["ton", "yield"], "predictors": [{"input_preprocessor": {"num": {"mean_": [388.87166666666667, 85.14333333333335, 2.1326333333333336], "var_": [51629.748030555544, 1059.4557888888892, 0.4159331655555555], "scale_": [227.22180359850051, 32.54928246350277, 0.6449288065791103], "n_samples_seen_": 60}}, "output_preprocessor": {"mean_": [6.178333353375395, 13.89833338521421], "var_": [38.187364444819586, 204.10316777823803], "scale_": [6.179592579193193, 14.286467995212744], "n_samples_seen_": 60}}, {"input_preprocessor": {"num": {"mean_": [376.4754098360656, 85.4, 2.011622950819673], "var_": [53202.64152647137, 1135.808524590164, 0.5176740053748993], "scale_": [230.65697805718207, 33.70175847919755, 0.7194956604281219], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [5.442622941659122, 11.96721318461856], "var_": [34.401789025483716, 181.47925989085533], "scale_": [5.865303830619835, 13.471423825670964], "n_samples_seen_": 61}}, {"input_preprocessor": {"num": {"mean_": [370.2770491803279, 87.06885245901641, 2.053114754098361], "var_": [52169.36930932545, 1016.805751142166, 0.513469937651169], "scale_": [228.40614989383593, 31.88739172685916, 0.7165681667860839], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [5.681967221689029, 12.68852458210265], "var_": [34.88410335211236, 185.96397087625303], "scale_": [5.9062766064681025, 13.636860741250276], "n_samples_seen_": 61}}, {"input_preprocessor": {"num": {"mean_": [408.0852459016394, 83.91147540983607, 2.1295245901639346], "var_": [49406.179946251, 1074.2885568395593, 0.4012853969363074], "scale_": [222.27500972050592, 32.77634141937686, 0.6334709124626855], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [6.513114772004182, 14.72131149825014], "var_": [38.77622134464442, 216.8098759612099], "scale_": [6.227055591902518, 14.724465218173796], "n_samples_seen_": 61}}, {"input_preprocessor": {"num": {"mean_": [365.5311475409836, 89.45245901639345, 2.0956065573770495], "var_": [51459.27230851923, 890.1831496909433, 0.4506949599570008], "scale_": [226.84636278441678, 29.835937218243092, 0.6713381859815519], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [5.616393469396185, 12.532786916758193], "var_": [32.921700561709216, 176.46679211664394], "scale_": [5.737743507835568, 13.284080401617718], "n_samples_seen_": 61}}]}, "extras": []} \ No newline at end of file diff --git a/summit/benchmarks/models/reizman_suzuki_case_2/reizman_suzuki_case_2_predictor_0.pt b/summit/benchmarks/models/reizman_suzuki_case_2/reizman_suzuki_case_2_predictor_0.pt index cf80d5b8..4eceaf8d 100644 Binary files a/summit/benchmarks/models/reizman_suzuki_case_2/reizman_suzuki_case_2_predictor_0.pt and b/summit/benchmarks/models/reizman_suzuki_case_2/reizman_suzuki_case_2_predictor_0.pt differ diff --git a/summit/benchmarks/models/reizman_suzuki_case_2/reizman_suzuki_case_2_predictor_1.pt b/summit/benchmarks/models/reizman_suzuki_case_2/reizman_suzuki_case_2_predictor_1.pt index 1ac629e7..a2595c71 100644 Binary files a/summit/benchmarks/models/reizman_suzuki_case_2/reizman_suzuki_case_2_predictor_1.pt and b/summit/benchmarks/models/reizman_suzuki_case_2/reizman_suzuki_case_2_predictor_1.pt differ diff --git a/summit/benchmarks/models/reizman_suzuki_case_2/reizman_suzuki_case_2_predictor_2.pt b/summit/benchmarks/models/reizman_suzuki_case_2/reizman_suzuki_case_2_predictor_2.pt index a487776d..da4a0965 100644 Binary files a/summit/benchmarks/models/reizman_suzuki_case_2/reizman_suzuki_case_2_predictor_2.pt and b/summit/benchmarks/models/reizman_suzuki_case_2/reizman_suzuki_case_2_predictor_2.pt differ diff --git a/summit/benchmarks/models/reizman_suzuki_case_2/reizman_suzuki_case_2_predictor_3.pt b/summit/benchmarks/models/reizman_suzuki_case_2/reizman_suzuki_case_2_predictor_3.pt index 192c9f4c..65f25a09 100644 Binary files a/summit/benchmarks/models/reizman_suzuki_case_2/reizman_suzuki_case_2_predictor_3.pt and b/summit/benchmarks/models/reizman_suzuki_case_2/reizman_suzuki_case_2_predictor_3.pt differ diff --git a/summit/benchmarks/models/reizman_suzuki_case_2/reizman_suzuki_case_2_predictor_4.pt b/summit/benchmarks/models/reizman_suzuki_case_2/reizman_suzuki_case_2_predictor_4.pt index ca42c899..79c04d6a 100644 Binary files a/summit/benchmarks/models/reizman_suzuki_case_2/reizman_suzuki_case_2_predictor_4.pt and b/summit/benchmarks/models/reizman_suzuki_case_2/reizman_suzuki_case_2_predictor_4.pt differ diff --git a/summit/benchmarks/models/reizman_suzuki_case_3/reizman_suzuki_case_3.json b/summit/benchmarks/models/reizman_suzuki_case_3/reizman_suzuki_case_3.json index 4dc09f72..06e29487 100644 --- a/summit/benchmarks/models/reizman_suzuki_case_3/reizman_suzuki_case_3.json +++ b/summit/benchmarks/models/reizman_suzuki_case_3/reizman_suzuki_case_3.json @@ -1 +1 @@ -{"domain": [{"type": "CategoricalVariable", "is_objective": false, "name": "catalyst", "description": "Catalyst type - different ligands", "units": null, "levels": ["P1-L1", "P2-L1", "P1-L2", "P1-L3", "P1-L4", "P1-L5", "P1-L6", "P1-L7"], "ds": null}, {"type": "ContinuousVariable", "is_objective": false, "name": "t_res", "description": "Residence time in seconds (s)", "units": null, "bounds": [60.0, 600.0]}, {"type": "ContinuousVariable", "is_objective": false, "name": "temperature", "description": "Reactor temperature in degrees Celsius (\u00baC)", "units": null, "bounds": [30.0, 110.0]}, {"type": "ContinuousVariable", "is_objective": false, "name": "catalyst_loading", "description": "Catalyst loading in mol%", "units": null, "bounds": [0.5, 2.5]}, {"type": "ContinuousVariable", "is_objective": true, "name": "ton", "description": "Turnover number - moles product generated divided by moles catalyst used", "units": null, "bounds": [0.0, 200.0]}, {"type": "ContinuousVariable", "is_objective": true, "name": "yield", "description": "Yield", "units": null, "bounds": [0.0, 100.0]}], "name": "ExperimentalEmulator", "data": {"index": [], "columns": [["catalyst", "DATA"], ["t_res", "DATA"], ["temperature", "DATA"], ["catalyst_loading", "DATA"], ["ton", "DATA"], ["yield", "DATA"], ["computation_t", "METADATA"], ["experiment_t", "METADATA"], ["strategy", "METADATA"]], "data": []}, "experiment_params": {"model_name": "reizman_suzuki_case_3", "regressor_name": "ANNRegressor", "n_features": 11, "n_examples": 96, "output_variable_names": ["ton", "yield"], "predictors": [{"input_preprocessor": {"num": {"mean_": [251.72833333333335, 97.14833333333334, 1.566916666666667], "var_": [47050.376030555555, 657.3611638888889, 0.4318778097222222], "scale_": [216.91098642197807, 25.63905544065321, 0.6571741091386835], "n_samples_seen_": 60}}, "output_preprocessor": {"mean_": [35.49833326935768, 56.09333371246854], "var_": [681.0345249872655, 1400.3602732022603], "scale_": [26.096638193209206, 37.42138791122345], "n_samples_seen_": 60}}, {"input_preprocessor": {"num": {"mean_": [242.61147540983606, 96.20655737704918, 1.6528688524590167], "var_": [48523.75183552809, 745.9760225745767, 0.4709996221445848], "scale_": [220.28107461951444, 27.312561626009686, 0.6862941221842023], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [34.87868853548511, 57.49016414607158], "var_": [584.9813694724904, 1257.7133901861848], "scale_": [24.186388103073398, 35.46425510547465], "n_samples_seen_": 61}}, {"input_preprocessor": {"num": {"mean_": [235.5672131147541, 93.20819672131147, 1.593377049180328], "var_": [46069.8372856759, 826.8020639613006, 0.47456813652244023], "scale_": [214.63885315961764, 28.754166027921947, 0.6888890596623235], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [30.559016146254343, 48.531147868662586], "var_": [645.0237283999803, 1387.2529703124624], "scale_": [25.39731734652265, 37.2458450073624], "n_samples_seen_": 61}}, {"input_preprocessor": {"num": {"mean_": [233.6032786885246, 96.62622950819672, 1.6115737704918034], "var_": [43002.469497446924, 662.940295619457, 0.4058188019349637], "scale_": [207.3703679348786, 25.747626990063704, 0.6370390898013746], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [35.72950815079642, 55.47704955007209], "var_": [706.310614824295, 1308.7831021605946], "scale_": [26.57650493997085, 36.17710743219522], "n_samples_seen_": 61}}, {"input_preprocessor": {"num": {"mean_": [236.55737704918036, 94.43606557377048, 1.6498032786885244], "var_": [45848.09654393981, 819.1524697661919, 0.4757863875302337], "scale_": [214.12168629996313, 28.6208397809392, 0.6897727071508656], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [33.09672119395166, 53.89836108489115], "var_": [608.3327577677784, 1302.892620258644], "scale_": [24.6644026436437, 36.09560389103698], "n_samples_seen_": 61}}]}, "extras": []} \ No newline at end of file +{"domain": [{"type": "CategoricalVariable", "is_objective": false, "name": "catalyst", "description": "Catalyst type - different ligands", "units": null, "levels": ["P1-L1", "P2-L1", "P1-L2", "P1-L3", "P1-L4", "P1-L5", "P1-L6", "P1-L7"], "ds": null}, {"type": "ContinuousVariable", "is_objective": false, "name": "t_res", "description": "Residence time in seconds (s)", "units": null, "bounds": [60.0, 600.0]}, {"type": "ContinuousVariable", "is_objective": false, "name": "temperature", "description": "Reactor temperature in degrees Celsius (\u00baC)", "units": null, "bounds": [30.0, 110.0]}, {"type": "ContinuousVariable", "is_objective": false, "name": "catalyst_loading", "description": "Catalyst loading in mol%", "units": null, "bounds": [0.5, 2.5]}, {"type": "ContinuousVariable", "is_objective": true, "name": "ton", "description": "Turnover number - moles product generated divided by moles catalyst used", "units": null, "bounds": [0.0, 200.0]}, {"type": "ContinuousVariable", "is_objective": true, "name": "yield", "description": "Yield", "units": null, "bounds": [0.0, 100.0]}], "name": "ExperimentalEmulator", "data": {"index": [], "columns": [["catalyst", "DATA"], ["t_res", "DATA"], ["temperature", "DATA"], ["catalyst_loading", "DATA"], ["ton", "DATA"], ["yield", "DATA"], ["computation_t", "METADATA"], ["experiment_t", "METADATA"], ["strategy", "METADATA"]], "data": []}, "experiment_params": {"model_name": "reizman_suzuki_case_3", "regressor_name": "ANNRegressor", "n_features": 11, "n_examples": 96, "descriptors_features": [], "output_variable_names": ["ton", "yield"], "predictors": [{"input_preprocessor": {"num": {"mean_": [251.72833333333335, 97.14833333333334, 1.566916666666667], "var_": [47050.376030555555, 657.3611638888889, 0.4318778097222222], "scale_": [216.91098642197807, 25.63905544065321, 0.6571741091386835], "n_samples_seen_": 60}}, "output_preprocessor": {"mean_": [35.49833326935768, 56.09333371246854], "var_": [681.0345249872655, 1400.3602732022603], "scale_": [26.096638193209206, 37.42138791122345], "n_samples_seen_": 60}}, {"input_preprocessor": {"num": {"mean_": [242.61147540983606, 96.20655737704918, 1.6528688524590167], "var_": [48523.75183552809, 745.9760225745767, 0.4709996221445848], "scale_": [220.28107461951444, 27.312561626009686, 0.6862941221842023], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [34.87868853548511, 57.49016414607158], "var_": [584.9813694724904, 1257.7133901861848], "scale_": [24.186388103073398, 35.46425510547465], "n_samples_seen_": 61}}, {"input_preprocessor": {"num": {"mean_": [235.5672131147541, 93.20819672131147, 1.593377049180328], "var_": [46069.8372856759, 826.8020639613006, 0.47456813652244023], "scale_": [214.63885315961764, 28.754166027921947, 0.6888890596623235], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [30.559016146254343, 48.531147868662586], "var_": [645.0237283999803, 1387.2529703124624], "scale_": [25.39731734652265, 37.2458450073624], "n_samples_seen_": 61}}, {"input_preprocessor": {"num": {"mean_": [233.6032786885246, 96.62622950819672, 1.6115737704918034], "var_": [43002.469497446924, 662.940295619457, 0.4058188019349637], "scale_": [207.3703679348786, 25.747626990063704, 0.6370390898013746], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [35.72950815079642, 55.47704955007209], "var_": [706.310614824295, 1308.7831021605946], "scale_": [26.57650493997085, 36.17710743219522], "n_samples_seen_": 61}}, {"input_preprocessor": {"num": {"mean_": [236.55737704918036, 94.43606557377048, 1.6498032786885244], "var_": [45848.09654393981, 819.1524697661919, 0.4757863875302337], "scale_": [214.12168629996313, 28.6208397809392, 0.6897727071508656], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [33.09672119395166, 53.89836108489115], "var_": [608.3327577677784, 1302.892620258644], "scale_": [24.6644026436437, 36.09560389103698], "n_samples_seen_": 61}}]}, "extras": []} \ No newline at end of file diff --git a/summit/benchmarks/models/reizman_suzuki_case_3/reizman_suzuki_case_3_predictor_0.pt b/summit/benchmarks/models/reizman_suzuki_case_3/reizman_suzuki_case_3_predictor_0.pt index ce0e655e..eed8dfce 100644 Binary files a/summit/benchmarks/models/reizman_suzuki_case_3/reizman_suzuki_case_3_predictor_0.pt and b/summit/benchmarks/models/reizman_suzuki_case_3/reizman_suzuki_case_3_predictor_0.pt differ diff --git a/summit/benchmarks/models/reizman_suzuki_case_3/reizman_suzuki_case_3_predictor_1.pt b/summit/benchmarks/models/reizman_suzuki_case_3/reizman_suzuki_case_3_predictor_1.pt index bc464d4c..eea459a6 100644 Binary files a/summit/benchmarks/models/reizman_suzuki_case_3/reizman_suzuki_case_3_predictor_1.pt and b/summit/benchmarks/models/reizman_suzuki_case_3/reizman_suzuki_case_3_predictor_1.pt differ diff --git a/summit/benchmarks/models/reizman_suzuki_case_3/reizman_suzuki_case_3_predictor_2.pt b/summit/benchmarks/models/reizman_suzuki_case_3/reizman_suzuki_case_3_predictor_2.pt index fd785742..c6a62e0d 100644 Binary files a/summit/benchmarks/models/reizman_suzuki_case_3/reizman_suzuki_case_3_predictor_2.pt and b/summit/benchmarks/models/reizman_suzuki_case_3/reizman_suzuki_case_3_predictor_2.pt differ diff --git a/summit/benchmarks/models/reizman_suzuki_case_3/reizman_suzuki_case_3_predictor_3.pt b/summit/benchmarks/models/reizman_suzuki_case_3/reizman_suzuki_case_3_predictor_3.pt index d9334bea..f44d0431 100644 Binary files a/summit/benchmarks/models/reizman_suzuki_case_3/reizman_suzuki_case_3_predictor_3.pt and b/summit/benchmarks/models/reizman_suzuki_case_3/reizman_suzuki_case_3_predictor_3.pt differ diff --git a/summit/benchmarks/models/reizman_suzuki_case_3/reizman_suzuki_case_3_predictor_4.pt b/summit/benchmarks/models/reizman_suzuki_case_3/reizman_suzuki_case_3_predictor_4.pt index 0e886ed9..c755a6cf 100644 Binary files a/summit/benchmarks/models/reizman_suzuki_case_3/reizman_suzuki_case_3_predictor_4.pt and b/summit/benchmarks/models/reizman_suzuki_case_3/reizman_suzuki_case_3_predictor_4.pt differ diff --git a/summit/benchmarks/models/reizman_suzuki_case_4/reizman_suzuki_case_4.json b/summit/benchmarks/models/reizman_suzuki_case_4/reizman_suzuki_case_4.json index 7acfc8cf..072b8f28 100644 --- a/summit/benchmarks/models/reizman_suzuki_case_4/reizman_suzuki_case_4.json +++ b/summit/benchmarks/models/reizman_suzuki_case_4/reizman_suzuki_case_4.json @@ -1 +1 @@ -{"domain": [{"type": "CategoricalVariable", "is_objective": false, "name": "catalyst", "description": "Catalyst type - different ligands", "units": null, "levels": ["P1-L1", "P2-L1", "P1-L2", "P1-L3", "P1-L4", "P1-L5", "P1-L6", "P1-L7"], "ds": null}, {"type": "ContinuousVariable", "is_objective": false, "name": "t_res", "description": "Residence time in seconds (s)", "units": null, "bounds": [60.0, 600.0]}, {"type": "ContinuousVariable", "is_objective": false, "name": "temperature", "description": "Reactor temperature in degrees Celsius (\u00baC)", "units": null, "bounds": [30.0, 110.0]}, {"type": "ContinuousVariable", "is_objective": false, "name": "catalyst_loading", "description": "Catalyst loading in mol%", "units": null, "bounds": [0.5, 2.5]}, {"type": "ContinuousVariable", "is_objective": true, "name": "ton", "description": "Turnover number - moles product generated divided by moles catalyst used", "units": null, "bounds": [0.0, 200.0]}, {"type": "ContinuousVariable", "is_objective": true, "name": "yield", "description": "Yield", "units": null, "bounds": [0.0, 100.0]}], "name": "ExperimentalEmulator", "data": {"index": [], "columns": [["catalyst", "DATA"], ["t_res", "DATA"], ["temperature", "DATA"], ["catalyst_loading", "DATA"], ["ton", "DATA"], ["yield", "DATA"], ["computation_t", "METADATA"], ["experiment_t", "METADATA"], ["strategy", "METADATA"]], "data": []}, "experiment_params": {"model_name": "reizman_suzuki_case_4", "regressor_name": "ANNRegressor", "n_features": 11, "n_examples": 97, "output_variable_names": ["ton", "yield"], "predictors": [{"input_preprocessor": {"num": {"mean_": [361.3918032786886, 83.57868852459015, 1.6783114754098363], "var_": [51894.334195108844, 538.3793818865896, 0.5491226079011019], "scale_": [227.80327959691195, 23.203003725522038, 0.7410280749749647], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [46.508196917469384, 68.03934463113546], "var_": [965.1315826947083, 914.6086297925623], "scale_": [31.066566960234088, 30.24249708262468], "n_samples_seen_": 61}}, {"input_preprocessor": {"num": {"mean_": [328.1016393442623, 85.09344262295082, 1.6505737704918035], "var_": [48394.58081698467, 625.0373340499864, 0.5491268675087343], "scale_": [219.98768333019163, 25.00074666984941, 0.7410309490896682], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [46.657377005111975, 65.44754103485678], "var_": [1047.2663762907757, 1006.2438135523288], "scale_": [32.361495272789476, 31.721346338898176], "n_samples_seen_": 61}}, {"input_preprocessor": {"num": {"mean_": [347.89193548387095, 83.9274193548387, 1.7433870967741936], "var_": [49269.6665478668, 637.4981191467223, 0.5718076566077004], "scale_": [221.96771510259504, 25.24872509943269, 0.7561796457242819], "n_samples_seen_": 62}}, "output_preprocessor": {"mean_": [41.432258085618095, 63.55322592477164], "var_": [939.911529020917, 1141.7537632219253], "scale_": [30.65797659697908, 33.78984704348224], "n_samples_seen_": 62}}, {"input_preprocessor": {"num": {"mean_": [354.1322580645161, 83.4241935483871, 1.6983064516129034], "var_": [49819.123798126966, 571.1295759625391, 0.5281285996357961], "scale_": [223.2019798257331, 23.8983174295292, 0.7267245693079298], "n_samples_seen_": 62}}, "output_preprocessor": {"mean_": [44.380645274635285, 65.95000013060147], "var_": [961.0954115775323, 1026.6679600015764], "scale_": [31.0015388582169, 32.041659757284364], "n_samples_seen_": 62}}, {"input_preprocessor": {"num": {"mean_": [348.53225806451616, 86.98064516129034, 1.7053709677419355], "var_": [55211.68863683663, 514.798657648283, 0.5502617172216441], "scale_": [234.97167624383292, 22.68917490012105, 0.7417962774385188], "n_samples_seen_": 62}}, "output_preprocessor": {"mean_": [46.59516116447987, 67.53709707745621], "var_": [961.811094079407, 888.484573507381], "scale_": [31.013079403364753, 29.807458353696997], "n_samples_seen_": 62}}]}, "extras": []} \ No newline at end of file +{"domain": [{"type": "CategoricalVariable", "is_objective": false, "name": "catalyst", "description": "Catalyst type - different ligands", "units": null, "levels": ["P1-L1", "P2-L1", "P1-L2", "P1-L3", "P1-L4", "P1-L5", "P1-L6", "P1-L7"], "ds": null}, {"type": "ContinuousVariable", "is_objective": false, "name": "t_res", "description": "Residence time in seconds (s)", "units": null, "bounds": [60.0, 600.0]}, {"type": "ContinuousVariable", "is_objective": false, "name": "temperature", "description": "Reactor temperature in degrees Celsius (\u00baC)", "units": null, "bounds": [30.0, 110.0]}, {"type": "ContinuousVariable", "is_objective": false, "name": "catalyst_loading", "description": "Catalyst loading in mol%", "units": null, "bounds": [0.5, 2.5]}, {"type": "ContinuousVariable", "is_objective": true, "name": "ton", "description": "Turnover number - moles product generated divided by moles catalyst used", "units": null, "bounds": [0.0, 200.0]}, {"type": "ContinuousVariable", "is_objective": true, "name": "yield", "description": "Yield", "units": null, "bounds": [0.0, 100.0]}], "name": "ExperimentalEmulator", "data": {"index": [], "columns": [["catalyst", "DATA"], ["t_res", "DATA"], ["temperature", "DATA"], ["catalyst_loading", "DATA"], ["ton", "DATA"], ["yield", "DATA"], ["computation_t", "METADATA"], ["experiment_t", "METADATA"], ["strategy", "METADATA"]], "data": []}, "experiment_params": {"model_name": "reizman_suzuki_case_4", "regressor_name": "ANNRegressor", "n_features": 11, "n_examples": 97, "descriptors_features": [], "output_variable_names": ["ton", "yield"], "predictors": [{"input_preprocessor": {"num": {"mean_": [361.3918032786886, 83.57868852459015, 1.6783114754098363], "var_": [51894.334195108844, 538.3793818865896, 0.5491226079011019], "scale_": [227.80327959691195, 23.203003725522038, 0.7410280749749647], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [46.508196917469384, 68.03934463113546], "var_": [965.1315826947083, 914.6086297925623], "scale_": [31.066566960234088, 30.24249708262468], "n_samples_seen_": 61}}, {"input_preprocessor": {"num": {"mean_": [328.1016393442623, 85.09344262295082, 1.6505737704918035], "var_": [48394.58081698467, 625.0373340499864, 0.5491268675087343], "scale_": [219.98768333019163, 25.00074666984941, 0.7410309490896682], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [46.657377005111975, 65.44754103485678], "var_": [1047.2663762907757, 1006.2438135523288], "scale_": [32.361495272789476, 31.721346338898176], "n_samples_seen_": 61}}, {"input_preprocessor": {"num": {"mean_": [347.89193548387095, 83.9274193548387, 1.7433870967741936], "var_": [49269.6665478668, 637.4981191467223, 0.5718076566077004], "scale_": [221.96771510259504, 25.24872509943269, 0.7561796457242819], "n_samples_seen_": 62}}, "output_preprocessor": {"mean_": [41.432258085618095, 63.55322592477164], "var_": [939.911529020917, 1141.7537632219253], "scale_": [30.65797659697908, 33.78984704348224], "n_samples_seen_": 62}}, {"input_preprocessor": {"num": {"mean_": [354.1322580645161, 83.4241935483871, 1.6983064516129034], "var_": [49819.123798126966, 571.1295759625391, 0.5281285996357961], "scale_": [223.2019798257331, 23.8983174295292, 0.7267245693079298], "n_samples_seen_": 62}}, "output_preprocessor": {"mean_": [44.380645274635285, 65.95000013060147], "var_": [961.0954115775323, 1026.6679600015764], "scale_": [31.0015388582169, 32.041659757284364], "n_samples_seen_": 62}}, {"input_preprocessor": {"num": {"mean_": [348.53225806451616, 86.98064516129034, 1.7053709677419355], "var_": [55211.68863683663, 514.798657648283, 0.5502617172216441], "scale_": [234.97167624383292, 22.68917490012105, 0.7417962774385188], "n_samples_seen_": 62}}, "output_preprocessor": {"mean_": [46.59516116447987, 67.53709707745621], "var_": [961.811094079407, 888.484573507381], "scale_": [31.013079403364753, 29.807458353696997], "n_samples_seen_": 62}}]}, "extras": []} \ No newline at end of file diff --git a/summit/benchmarks/models/reizman_suzuki_case_4/reizman_suzuki_case_4_predictor_0.pt b/summit/benchmarks/models/reizman_suzuki_case_4/reizman_suzuki_case_4_predictor_0.pt index 5b2076ec..4ad51272 100644 Binary files a/summit/benchmarks/models/reizman_suzuki_case_4/reizman_suzuki_case_4_predictor_0.pt and b/summit/benchmarks/models/reizman_suzuki_case_4/reizman_suzuki_case_4_predictor_0.pt differ diff --git a/summit/benchmarks/models/reizman_suzuki_case_4/reizman_suzuki_case_4_predictor_1.pt b/summit/benchmarks/models/reizman_suzuki_case_4/reizman_suzuki_case_4_predictor_1.pt index 3e04d13f..71e0dbc6 100644 Binary files a/summit/benchmarks/models/reizman_suzuki_case_4/reizman_suzuki_case_4_predictor_1.pt and b/summit/benchmarks/models/reizman_suzuki_case_4/reizman_suzuki_case_4_predictor_1.pt differ diff --git a/summit/benchmarks/models/reizman_suzuki_case_4/reizman_suzuki_case_4_predictor_2.pt b/summit/benchmarks/models/reizman_suzuki_case_4/reizman_suzuki_case_4_predictor_2.pt index f7a09768..948e3ecc 100644 Binary files a/summit/benchmarks/models/reizman_suzuki_case_4/reizman_suzuki_case_4_predictor_2.pt and b/summit/benchmarks/models/reizman_suzuki_case_4/reizman_suzuki_case_4_predictor_2.pt differ diff --git a/summit/benchmarks/models/reizman_suzuki_case_4/reizman_suzuki_case_4_predictor_3.pt b/summit/benchmarks/models/reizman_suzuki_case_4/reizman_suzuki_case_4_predictor_3.pt index cf9f98c5..c9ef64bb 100644 Binary files a/summit/benchmarks/models/reizman_suzuki_case_4/reizman_suzuki_case_4_predictor_3.pt and b/summit/benchmarks/models/reizman_suzuki_case_4/reizman_suzuki_case_4_predictor_3.pt differ diff --git a/summit/benchmarks/models/reizman_suzuki_case_4/reizman_suzuki_case_4_predictor_4.pt b/summit/benchmarks/models/reizman_suzuki_case_4/reizman_suzuki_case_4_predictor_4.pt index baefd629..b4cb6928 100644 Binary files a/summit/benchmarks/models/reizman_suzuki_case_4/reizman_suzuki_case_4_predictor_4.pt and b/summit/benchmarks/models/reizman_suzuki_case_4/reizman_suzuki_case_4_predictor_4.pt differ diff --git a/tests/test_benchmarks.py b/tests/test_benchmarks.py index de541fef..3a3d7262 100644 --- a/tests/test_benchmarks.py +++ b/tests/test_benchmarks.py @@ -99,9 +99,11 @@ def test_reizman_emulator(show_plots=False): return results -def test_baumgartner_CC_emulator(show_plots=False): +@pytest.mark.parametrize("use_descriptors", [True, False]) +@pytest.mark.parametrize("include_cost", [True, False]) +def test_baumgartner_CC_emulator(use_descriptors, include_cost, show_plots=False): """ Test the Baumgartner Cross Coupling emulator""" - b = get_pretrained_baumgartner_cc_emulator() + b = get_pretrained_baumgartner_cc_emulator(use_descriptors) b.parity_plot(include_test=True) if show_plots: plt.show()