From 49a09e4e8d466882e5e2dcdb080b3019509573fa Mon Sep 17 00:00:00 2001
From: Emmanuel Lujan <lujan.emmanuel@gmail.com>
Date: Fri, 21 Jun 2024 14:41:16 -0400
Subject: [PATCH 1/9] Hyperparameter optimization example. First version.

---
 docs/Project.toml                           |   1 +
 docs/make.jl                                |   7 +
 examples/Opt-ACE-aHfO2/Project.toml         |  16 +++
 examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl | 144 ++++++++++++++++++++
 4 files changed, 168 insertions(+)
 create mode 100644 examples/Opt-ACE-aHfO2/Project.toml
 create mode 100644 examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl

diff --git a/docs/Project.toml b/docs/Project.toml
index 603cc361..9c07b862 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -7,6 +7,7 @@ DisplayAs = "0b91fe84-8a4c-11e9-3e1d-67c38462b6d6"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244"
+Hyperopt = "93e5fe13-2215-51db-baaf-2e9a34fb2712"
 InteratomicPotentials = "a9efe35a-c65d-452d-b8a8-82646cd5cb04"
 InvertedIndices = "41ab1584-1d38-5bbf-9106-f11c6c58b48f"
 IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
diff --git a/docs/make.jl b/docs/make.jl
index ae25211e..3c29f630 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -48,6 +48,12 @@ examples = [
 ]
 ss_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)
 
+# Optimization examples
+examples = [
+    "1 - Optimize ACE hyper-parameters: minimize force time and fitting error" => "Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl",
+]
+opt_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)
+
 # Dimension reduction examples
 examples = [
     "1 - Reduce ACE descriptors with PCA and fit a-HfO2 dataset" => "PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl",
@@ -72,6 +78,7 @@ makedocs(
                "How to run the examples" => "how-to-run-the-examples.md",
                "Basic examples" => basic_examples,
                "Optimize atomistic data via intelligent subsampling" => ss_examples,
+               "Optimize interatomic potential models via hyper-paramter optimization" => opt_examples,
                "Optimize interatomic potential models via dimension reduction" => dr_examples,
                "API" => "api.md"],
       format = Documenter.HTML(;
diff --git a/examples/Opt-ACE-aHfO2/Project.toml b/examples/Opt-ACE-aHfO2/Project.toml
new file mode 100644
index 00000000..14b25567
--- /dev/null
+++ b/examples/Opt-ACE-aHfO2/Project.toml
@@ -0,0 +1,16 @@
+[deps]
+AtomsBase = "a963bdd2-2df7-4f54-a1ee-49d51e6be12a"
+CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+DisplayAs = "0b91fe84-8a4c-11e9-3e1d-67c38462b6d6"
+Hyperopt = "93e5fe13-2215-51db-baaf-2e9a34fb2712"
+InteratomicPotentials = "a9efe35a-c65d-452d-b8a8-82646cd5cb04"
+IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
+Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
+PotentialLearning = "82b0a93c-c2e3-44bc-a418-f0f89b0ae5c2"
+ProgressBars = "49802e3a-d2f1-5c88-81d8-b72133a6f568"
+StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
+Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d"
+UnitfulAtomic = "a7773ee8-282e-5fa2-be4e-bd808c38a91a"
diff --git a/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl b/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
new file mode 100644
index 00000000..d84900f1
--- /dev/null
+++ b/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
@@ -0,0 +1,144 @@
+# # Optimize ACE hyper-parameters: minimize force time and fitting error.
+
+# ## a. Load packages, define paths, and create experiment folder.
+
+# Load packages.
+using AtomsBase, InteratomicPotentials, PotentialLearning
+using Unitful, UnitfulAtomic
+using LinearAlgebra, Random, DisplayAs
+using DataFrames, Hyperopt
+
+# Define paths.
+path = joinpath(dirname(pathof(PotentialLearning)), "../examples/Opt-ACE-aHfO2")
+ds_path =  "$path/../data/a-HfO2/a-HfO2-300K-NVT-6000.extxyz"
+res_path = "$path/results/";
+
+# Load utility functions.
+include("$path/../utils/utils.jl")
+
+# Create experiment folder.
+run(`mkdir -p $res_path`);
+
+# ## b. Load atomistic dataset and split it into training and test.
+
+# Load atomistic dataset: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.)
+ds = load_data(ds_path, uparse("eV"), uparse("Å"))[1:1000] # Only the first 1K samples are used in this example.
+
+# Split atomistic dataset into training and test
+n_train, n_test = 50, 50 # Only 50 samples per dataset are used in this example.
+conf_train, conf_test = split(ds, n_train, n_test)
+
+
+# NEW: utilizty functions #######################################################
+
+function estimate_time(confs, iap; batch_size = 30)
+    if length(confs) < batch_size
+        batch_size = length(confs)
+    end
+    random_selector = RandomSelector(length(confs), batch_size)
+    inds = PotentialLearning.get_random_subset(random_selector)
+    time = @elapsed begin
+        f_descr = compute_force_descriptors(confs[inds],
+                                            iap.basis,
+                                            pbar = false)
+        ds = DataSet(confs[inds] .+ f_descr)
+        f_pred = get_all_forces(ds, iap)
+    end
+    n_atoms = sum(length(get_system(c)) for c in confs[inds])
+    return time / n_atoms
+end
+
+function get_results(ho)
+    column_names = string.(vcat(keys(ho.results[1][2])..., ho.params...))
+    rows = [[values(r[2])..., p...] for (r, p) in zip(ho.results, ho.history)]
+    results = DataFrame([Any[] for _ in 1:length(column_names)], column_names)
+    [push!(results, r) for r in rows]
+    return sort!(results)
+end
+
+function plot_err_time(ho)
+    error      = [r[2][:error] for r in ho.results]
+    times      = [r[2][:time_us] for r in ho.results]
+    scatter(times,
+            error,
+            label = "",
+            xaxis = "Time per force per atom | µs",
+            yaxis = "we MSE(E, E') + wf MSE(F, F')")
+end
+
+
+# Hyperparamter optimization ###################################################
+e_mae_max = 0.05
+f_mae_max = 0.05
+weights = [1.0, 1.0]
+intercept = true
+
+ho = Hyperoptimizer(10,
+                    species           = [[:Hf, :O]],
+                    body_order        = [2, 3, 4],
+                    polynomial_degree = [3, 4, 5],
+                    rcutoff           = [4.5, 5.0, 5.5],
+                    wL                = [0.5, 1.0, 1.5],
+                    csp               = [0.5, 1.0, 1.5],
+                    r0                = [0.5, 1.0, 1.5])
+
+for (i, species, body_order, polynomial_degree, rcutoff, wL, csp, r0) in ho
+    basis = ACE(species           = species,
+                body_order        = body_order,
+                polynomial_degree = polynomial_degree,
+                rcutoff           = rcutoff,
+                wL                = wL,
+                csp               = csp,
+                r0                = r0)
+    iap = LBasisPotential(basis)
+    e_descr_new = compute_local_descriptors(conf_train, iap.basis, pbar = false)
+    f_descr_new = compute_force_descriptors(conf_train, iap.basis, pbar = false)
+    ds_cur = DataSet(conf_train .+ e_descr_new .+ f_descr_new)
+    learn!(iap, ds_cur, weights, intercept)
+    e, e_pred = get_all_energies(ds_cur), get_all_energies(ds_cur, iap)
+    f, f_pred = get_all_forces(ds_cur), get_all_forces(ds_cur, iap)
+    e_mae, e_rmse, e_rsq = calc_metrics(e_pred, e)
+    f_mae, f_rmse, f_rsq = calc_metrics(f_pred, f)
+    time_us  = estimate_time(conf_train, iap) * 10^6
+    error = weights[1] * e_rmse^2 + weights[2] * f_rmse^2
+    metrics  = OrderedDict( :error     => error,
+                            :e_mae     => e_mae,
+                            :e_rmse    => e_rmse,
+                            :e_rsq     => e_rsq,
+                            :f_mae     => f_mae,
+                            :f_rmse    => f_rmse,
+                            :f_rsq     => f_rsq,
+                            :time_us   => time_us)
+    if e_mae < e_mae_max &&
+       f_mae < f_mae_max
+       loss = time_us
+    else
+       loss = time_us + error * 10^3
+    end
+    println("")
+    print("E_MAE:$(round(e_mae; digits=3)), ")
+    print("F_MAE:$(round(f_mae; digits=3)), ")
+    println("Time per force per atom | µs:$(round(time_us; digits=3))")
+    flush(stdout)
+    push!(ho.history, (species, body_order, polynomial_degree, rcutoff, wL, csp, r0))
+    push!(ho.results, (loss, metrics, iap))
+end
+
+# Post-process output: calculate metrics, create plots, and save results #######
+
+# Prnt and save optimization results
+results = get_results(ho)
+println(results)
+@save_dataframe path results
+
+# Optimal IAP
+opt_iap = ho.minimum[3]
+@save_var res_path opt_iap.β
+@save_var res_path opt_iap.β0
+@save_var res_path opt_iap.basis
+
+# Plot loss vs time
+err_time = plot_err_time(ho)
+@save_fig res_path err_time
+DisplayAs.PNG(err_time)
+

From d525197712aa77a1e842d0f6c4b08bce616fd95a Mon Sep 17 00:00:00 2001
From: Emmanuel Lujan <lujan.emmanuel@gmail.com>
Date: Fri, 21 Jun 2024 17:45:53 -0400
Subject: [PATCH 2/9] Improvements in opt-example and co

---
 examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl b/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
index d84900f1..7106bf25 100644
--- a/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
+++ b/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
@@ -31,7 +31,7 @@ conf_train, conf_test = split(ds, n_train, n_test)
 
 # NEW: utilizty functions #######################################################
 
-function estimate_time(confs, iap; batch_size = 30)
+function estimate_time(confs, iap; batch_size = 50)
     if length(confs) < batch_size
         batch_size = length(confs)
     end
@@ -91,12 +91,16 @@ for (i, species, body_order, polynomial_degree, rcutoff, wL, csp, r0) in ho
                 csp               = csp,
                 r0                = r0)
     iap = LBasisPotential(basis)
+    ## Compute energy and force descriptors
     e_descr_new = compute_local_descriptors(conf_train, iap.basis, pbar = false)
     f_descr_new = compute_force_descriptors(conf_train, iap.basis, pbar = false)
     ds_cur = DataSet(conf_train .+ e_descr_new .+ f_descr_new)
+    ## Learn
     learn!(iap, ds_cur, weights, intercept)
+    ## Get true and predicted values
     e, e_pred = get_all_energies(ds_cur), get_all_energies(ds_cur, iap)
     f, f_pred = get_all_forces(ds_cur), get_all_forces(ds_cur, iap)
+    ## Compute metrics
     e_mae, e_rmse, e_rsq = calc_metrics(e_pred, e)
     f_mae, f_rmse, f_rsq = calc_metrics(f_pred, f)
     time_us  = estimate_time(conf_train, iap) * 10^6
@@ -109,17 +113,20 @@ for (i, species, body_order, polynomial_degree, rcutoff, wL, csp, r0) in ho
                             :f_rmse    => f_rmse,
                             :f_rsq     => f_rsq,
                             :time_us   => time_us)
+    ## Compute multi-objetive loss based on error and time
     if e_mae < e_mae_max &&
        f_mae < f_mae_max
        loss = time_us
     else
        loss = time_us + error * 10^3
     end
+    ## Print results
     println("")
     print("E_MAE:$(round(e_mae; digits=3)), ")
     print("F_MAE:$(round(f_mae; digits=3)), ")
     println("Time per force per atom | µs:$(round(time_us; digits=3))")
     flush(stdout)
+    ## Return loss
     push!(ho.history, (species, body_order, polynomial_degree, rcutoff, wL, csp, r0))
     push!(ho.results, (loss, metrics, iap))
 end
@@ -128,8 +135,8 @@ end
 
 # Prnt and save optimization results
 results = get_results(ho)
-println(results)
 @save_dataframe path results
+results
 
 # Optimal IAP
 opt_iap = ho.minimum[3]
@@ -137,7 +144,7 @@ opt_iap = ho.minimum[3]
 @save_var res_path opt_iap.β0
 @save_var res_path opt_iap.basis
 
-# Plot loss vs time
+# Plot error vs time
 err_time = plot_err_time(ho)
 @save_fig res_path err_time
 DisplayAs.PNG(err_time)

From 6e375be69111b2c6af19b45c04e6ed2b53bab847 Mon Sep 17 00:00:00 2001
From: Emmanuel Lujan <lujan.emmanuel@gmail.com>
Date: Wed, 26 Jun 2024 14:02:55 -0400
Subject: [PATCH 3/9] Improvements in hyperparameter optimization

---
 examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl | 130 ++++----------------
 examples/Opt-ACE-aHfO2/utils.jl             |  96 +++++++++++++++
 2 files changed, 117 insertions(+), 109 deletions(-)
 create mode 100644 examples/Opt-ACE-aHfO2/utils.jl

diff --git a/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl b/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
index 7106bf25..c759aae2 100644
--- a/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
+++ b/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
@@ -15,6 +15,7 @@ res_path = "$path/results/";
 
 # Load utility functions.
 include("$path/../utils/utils.jl")
+include("$path/utils.jl")
 
 # Create experiment folder.
 run(`mkdir -p $res_path`);
@@ -22,127 +23,38 @@ run(`mkdir -p $res_path`);
 # ## b. Load atomistic dataset and split it into training and test.
 
 # Load atomistic dataset: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.)
-ds = load_data(ds_path, uparse("eV"), uparse("Å"))[1:1000] # Only the first 1K samples are used in this example.
+ds = load_data(ds_path, uparse("eV"), uparse("Å"))[1:1000]
 
 # Split atomistic dataset into training and test
 n_train, n_test = 50, 50 # Only 50 samples per dataset are used in this example.
 conf_train, conf_test = split(ds, n_train, n_test)
 
 
-# NEW: utilizty functions #######################################################
-
-function estimate_time(confs, iap; batch_size = 50)
-    if length(confs) < batch_size
-        batch_size = length(confs)
-    end
-    random_selector = RandomSelector(length(confs), batch_size)
-    inds = PotentialLearning.get_random_subset(random_selector)
-    time = @elapsed begin
-        f_descr = compute_force_descriptors(confs[inds],
-                                            iap.basis,
-                                            pbar = false)
-        ds = DataSet(confs[inds] .+ f_descr)
-        f_pred = get_all_forces(ds, iap)
-    end
-    n_atoms = sum(length(get_system(c)) for c in confs[inds])
-    return time / n_atoms
-end
-
-function get_results(ho)
-    column_names = string.(vcat(keys(ho.results[1][2])..., ho.params...))
-    rows = [[values(r[2])..., p...] for (r, p) in zip(ho.results, ho.history)]
-    results = DataFrame([Any[] for _ in 1:length(column_names)], column_names)
-    [push!(results, r) for r in rows]
-    return sort!(results)
-end
-
-function plot_err_time(ho)
-    error      = [r[2][:error] for r in ho.results]
-    times      = [r[2][:time_us] for r in ho.results]
-    scatter(times,
-            error,
-            label = "",
-            xaxis = "Time per force per atom | µs",
-            yaxis = "we MSE(E, E') + wf MSE(F, F')")
-end
-
-
-# Hyperparamter optimization ###################################################
-e_mae_max = 0.05
-f_mae_max = 0.05
-weights = [1.0, 1.0]
-intercept = true
-
-ho = Hyperoptimizer(10,
-                    species           = [[:Hf, :O]],
-                    body_order        = [2, 3, 4],
-                    polynomial_degree = [3, 4, 5],
-                    rcutoff           = [4.5, 5.0, 5.5],
-                    wL                = [0.5, 1.0, 1.5],
-                    csp               = [0.5, 1.0, 1.5],
-                    r0                = [0.5, 1.0, 1.5])
-
-for (i, species, body_order, polynomial_degree, rcutoff, wL, csp, r0) in ho
-    basis = ACE(species           = species,
-                body_order        = body_order,
-                polynomial_degree = polynomial_degree,
-                rcutoff           = rcutoff,
-                wL                = wL,
-                csp               = csp,
-                r0                = r0)
-    iap = LBasisPotential(basis)
-    ## Compute energy and force descriptors
-    e_descr_new = compute_local_descriptors(conf_train, iap.basis, pbar = false)
-    f_descr_new = compute_force_descriptors(conf_train, iap.basis, pbar = false)
-    ds_cur = DataSet(conf_train .+ e_descr_new .+ f_descr_new)
-    ## Learn
-    learn!(iap, ds_cur, weights, intercept)
-    ## Get true and predicted values
-    e, e_pred = get_all_energies(ds_cur), get_all_energies(ds_cur, iap)
-    f, f_pred = get_all_forces(ds_cur), get_all_forces(ds_cur, iap)
-    ## Compute metrics
-    e_mae, e_rmse, e_rsq = calc_metrics(e_pred, e)
-    f_mae, f_rmse, f_rsq = calc_metrics(f_pred, f)
-    time_us  = estimate_time(conf_train, iap) * 10^6
-    error = weights[1] * e_rmse^2 + weights[2] * f_rmse^2
-    metrics  = OrderedDict( :error     => error,
-                            :e_mae     => e_mae,
-                            :e_rmse    => e_rmse,
-                            :e_rsq     => e_rsq,
-                            :f_mae     => f_mae,
-                            :f_rmse    => f_rmse,
-                            :f_rsq     => f_rsq,
-                            :time_us   => time_us)
-    ## Compute multi-objetive loss based on error and time
-    if e_mae < e_mae_max &&
-       f_mae < f_mae_max
-       loss = time_us
-    else
-       loss = time_us + error * 10^3
-    end
-    ## Print results
-    println("")
-    print("E_MAE:$(round(e_mae; digits=3)), ")
-    print("F_MAE:$(round(f_mae; digits=3)), ")
-    println("Time per force per atom | µs:$(round(time_us; digits=3))")
-    flush(stdout)
-    ## Return loss
-    push!(ho.history, (species, body_order, polynomial_degree, rcutoff, wL, csp, r0))
-    push!(ho.results, (loss, metrics, iap))
-end
+# ## Hyper-parameter optimization
+n_samples = 10
+model1 = ACE
+pars = OrderedDict( :species           => [[:Hf, :O]],
+                    :body_order        => [2, 3, 4],
+                    :polynomial_degree => [3, 4, 5],
+                    :rcutoff           => [4.5, 5.0, 5.5],
+                    :wL                => [0.5, 1.0, 1.5],
+                    :csp               => [0.5, 1.0, 1.5],
+                    :r0                => [0.5, 1.0, 1.5])
+ws, int = [1.0, 1.0], true
+iap, res = hyperlearn!(n_samples, model1, pars, conf_train; ws = ws, int = int)
+
 
 # Post-process output: calculate metrics, create plots, and save results #######
 
 # Prnt and save optimization results
-results = get_results(ho)
-@save_dataframe path results
-results
+#results = get_results(ho)
+@save_dataframe path res
+res
 
 # Optimal IAP
-opt_iap = ho.minimum[3]
-@save_var res_path opt_iap.β
-@save_var res_path opt_iap.β0
-@save_var res_path opt_iap.basis
+@save_var res_path iap.β
+@save_var res_path iap.β0
+@save_var res_path iap.basis
 
 # Plot error vs time
 err_time = plot_err_time(ho)
diff --git a/examples/Opt-ACE-aHfO2/utils.jl b/examples/Opt-ACE-aHfO2/utils.jl
new file mode 100644
index 00000000..27177d51
--- /dev/null
+++ b/examples/Opt-ACE-aHfO2/utils.jl
@@ -0,0 +1,96 @@
+
+# Estimate force calculation time
+function estimate_time(confs, iap; batch_size = 50)
+    if length(confs) < batch_size
+        batch_size = length(confs)
+    end
+    random_selector = RandomSelector(length(confs), batch_size)
+    inds = PotentialLearning.get_random_subset(random_selector)
+    time = @elapsed begin
+        f_descr = compute_force_descriptors(confs[inds],
+                                            iap.basis,
+                                            pbar = false)
+        ds = DataSet(confs[inds] .+ f_descr)
+        f_pred = get_all_forces(ds, iap)
+    end
+    n_atoms = sum(length(get_system(c)) for c in confs[inds])
+    return time / n_atoms
+end
+
+# Get results from the hyperoptimizer
+function get_results(ho)
+    column_names = string.(vcat(keys(ho.results[1][2])..., ho.params...))
+    rows = [[values(r[2])..., p...] for (r, p) in zip(ho.results, ho.history)]
+    results = DataFrame([Any[] for _ in 1:length(column_names)], column_names)
+    [push!(results, r) for r in rows]
+    return sort!(results)
+end
+
+# Plot fitting error vs force time (Pareto front)
+function plot_err_time(ho)
+    error      = [r[2][:error] for r in ho.results]
+    times      = [r[2][:time_us] for r in ho.results]
+    scatter(times,
+            error,
+            label = "",
+            xaxis = "Time per force per atom | µs",
+            yaxis = "we MSE(E, E') + wf MSE(F, F')")
+end
+
+function hyper_loss(p)
+    err, e_mae, f_mae, time_us = p[1], p[2], p[3], p[4]
+    e_mae_max, f_mae_max = 0.05, 0.05
+    if e_mae < e_mae_max && f_mae < f_mae_max
+       loss = time_us
+    else
+       loss = time_us + err * 10^3
+    end
+    return loss
+end
+
+# hyperlearn!
+function hyperlearn!(n_samples, model, pars, conf_train;
+                     ws = [1.0, 1.0], int = true, loss = hyper_loss)
+
+    s = "ho = Hyperoptimizer($n_samples," * join("$k = $v, " for (k, v) in pars) * ")"
+    eval(Meta.parse(s))
+    for (i, state...) in ho
+        basis = model(; state...)
+        iap = LBasisPotential(basis)
+        ## Compute energy and force descriptors
+        e_descr_new = compute_local_descriptors(conf_train, iap.basis, pbar = false)
+        f_descr_new = compute_force_descriptors(conf_train, iap.basis, pbar = false)
+        ds_cur = DataSet(conf_train .+ e_descr_new .+ f_descr_new)
+        ## Learn
+        learn!(iap, ds_cur, ws, int)
+        ## Get true and predicted values
+        e, e_pred = get_all_energies(ds_cur), get_all_energies(ds_cur, iap)
+        f, f_pred = get_all_forces(ds_cur), get_all_forces(ds_cur, iap)
+        ## Compute metrics
+        e_mae, e_rmse, e_rsq = calc_metrics(e_pred, e)
+        f_mae, f_rmse, f_rsq = calc_metrics(f_pred, f)
+        time_us  = estimate_time(conf_train, iap) * 10^6
+        err = ws[1] * e_rmse^2 + ws[2] * f_rmse^2
+        metrics  = OrderedDict( :e_mae     => e_mae,
+                                :e_rmse    => e_rmse,
+                                :e_rsq     => e_rsq,
+                                :f_mae     => f_mae,
+                                :f_rmse    => f_rmse,
+                                :f_rsq     => f_rsq,
+                                :time_us   => time_us)
+        ## Compute multi-objetive loss based on error and time
+        l = loss([err, e_mae, f_mae, time_us])
+        ## Print results
+        print("E_MAE:$(round(e_mae; digits=3)), ")
+        print("F_MAE:$(round(f_mae; digits=3)), ")
+        println("Time per force per atom | µs:$(round(time_us; digits=3))")
+        flush(stdout)
+        ## Return loss
+        push!(ho.history, [v for v in state])
+        push!(ho.results, (l, metrics, iap))
+    end
+    iap = ho.minimum[3]
+    res = get_results(ho)
+    return iap, res
+end
+

From b4420fc981322e4af9f73148e5f450c72d4f70bc Mon Sep 17 00:00:00 2001
From: Emmanuel Lujan <lujan.emmanuel@gmail.com>
Date: Wed, 26 Jun 2024 23:21:27 -0400
Subject: [PATCH 4/9] Improvements in hyperparameter optimization.

---
 examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl | 41 ++++++++++++++-------
 examples/Opt-ACE-aHfO2/utils.jl             | 33 ++++++++++++-----
 2 files changed, 51 insertions(+), 23 deletions(-)

diff --git a/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl b/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
index c759aae2..88da3b5b 100644
--- a/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
+++ b/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
@@ -30,34 +30,47 @@ n_train, n_test = 50, 50 # Only 50 samples per dataset are used in this example.
 conf_train, conf_test = split(ds, n_train, n_test)
 
 
-# ## Hyper-parameter optimization
-n_samples = 10
-model1 = ACE
-pars = OrderedDict( :species           => [[:Hf, :O]],
-                    :body_order        => [2, 3, 4],
+# ## c. Hyper-parameter optimization
+
+# Define the model and hyper-parameter value ranges to be optimized.
+model = ACE
+pars = OrderedDict( :body_order        => [2, 3, 4],
                     :polynomial_degree => [3, 4, 5],
                     :rcutoff           => [4.5, 5.0, 5.5],
                     :wL                => [0.5, 1.0, 1.5],
                     :csp               => [0.5, 1.0, 1.5],
                     :r0                => [0.5, 1.0, 1.5])
-ws, int = [1.0, 1.0], true
-iap, res = hyperlearn!(n_samples, model1, pars, conf_train; ws = ws, int = int)
+# Use random sampling to find the optimal hyper-parameters.
+iap, res = hyperlearn!(model, pars, conf_train; 
+                       n_samples = 10, sampler = RandomSampler(),
+                       ws = [1.0, 1.0], int = true)
+
+@save_var res_path iap.β
+@save_var res_path iap.β0
+@save_var res_path iap.basis
+@save_dataframe res_path res
+res
 
+# Plot error vs time
+err_time = plot_err_time(res)
+@save_fig res_path err_time
+DisplayAs.PNG(err_time)
 
-# Post-process output: calculate metrics, create plots, and save results #######
 
-# Prnt and save optimization results
-#results = get_results(ho)
-@save_dataframe path res
-res
+# Alternatively, use latin hypercube sampling to find the optimal hyper-parameters.
+iap, res = hyperlearn!(model, pars, conf_train; 
+                       n_samples = 3, sampler = LHSampler(),
+                       ws = [1.0, 1.0], int = true)
 
-# Optimal IAP
 @save_var res_path iap.β
 @save_var res_path iap.β0
 @save_var res_path iap.basis
+@save_dataframe res_path res
+res
 
 # Plot error vs time
-err_time = plot_err_time(ho)
+err_time = plot_err_time(res)
 @save_fig res_path err_time
 DisplayAs.PNG(err_time)
 
+
diff --git a/examples/Opt-ACE-aHfO2/utils.jl b/examples/Opt-ACE-aHfO2/utils.jl
index 27177d51..6012d56b 100644
--- a/examples/Opt-ACE-aHfO2/utils.jl
+++ b/examples/Opt-ACE-aHfO2/utils.jl
@@ -27,9 +27,9 @@ function get_results(ho)
 end
 
 # Plot fitting error vs force time (Pareto front)
-function plot_err_time(ho)
-    error      = [r[2][:error] for r in ho.results]
-    times      = [r[2][:time_us] for r in ho.results]
+function plot_err_time(res)
+    error      = res[!, :error] 
+    times      = res[!, :time_us]
     scatter(times,
             error,
             label = "",
@@ -37,7 +37,8 @@ function plot_err_time(ho)
             yaxis = "we MSE(E, E') + wf MSE(F, F')")
 end
 
-function hyper_loss(p)
+
+function loss(p)
     err, e_mae, f_mae, time_us = p[1], p[2], p[3], p[4]
     e_mae_max, f_mae_max = 0.05, 0.05
     if e_mae < e_mae_max && f_mae < f_mae_max
@@ -48,14 +49,27 @@ function hyper_loss(p)
     return loss
 end
 
+function get_species(confs)
+    return unique(vcat(unique.(atomic_symbol.(get_system.(confs)))...))
+end
+
+create_ho(x) = Hyperoptimizer(1)
+
 # hyperlearn!
-function hyperlearn!(n_samples, model, pars, conf_train;
-                     ws = [1.0, 1.0], int = true, loss = hyper_loss)
+function hyperlearn!(model, pars, conf_train;
+                     n_samples = 5, sampler = RandomSampler(), loss = loss,
+                     ws = [1.0, 1.0], int = true)
 
-    s = "ho = Hyperoptimizer($n_samples," * join("$k = $v, " for (k, v) in pars) * ")"
+    s = "create_ho(sampler) = Hyperoptimizer($n_samples, sampler, " *
+         join("$k = $v, " for (k, v) in pars) * ")"
     eval(Meta.parse(s))
+    ho = Base.invokelatest(create_ho, sampler)
+    if (ho.sampler isa LHSampler) || (ho.sampler isa CLHSampler)
+        Hyperopt.init!(ho.sampler, ho)
+    end
+    species = get_species(conf_train)
     for (i, state...) in ho
-        basis = model(; state...)
+        basis = model(; species = species, state...)
         iap = LBasisPotential(basis)
         ## Compute energy and force descriptors
         e_descr_new = compute_local_descriptors(conf_train, iap.basis, pbar = false)
@@ -71,7 +85,8 @@ function hyperlearn!(n_samples, model, pars, conf_train;
         f_mae, f_rmse, f_rsq = calc_metrics(f_pred, f)
         time_us  = estimate_time(conf_train, iap) * 10^6
         err = ws[1] * e_rmse^2 + ws[2] * f_rmse^2
-        metrics  = OrderedDict( :e_mae     => e_mae,
+        metrics  = OrderedDict( :error     => err,
+                                :e_mae     => e_mae,
                                 :e_rmse    => e_rmse,
                                 :e_rsq     => e_rsq,
                                 :f_mae     => f_mae,

From 0883a8d18a5ccede92cdfdf883049ec8a4d97500 Mon Sep 17 00:00:00 2001
From: Emmanuel Lujan <lujan.emmanuel@gmail.com>
Date: Thu, 27 Jun 2024 15:44:55 -0400
Subject: [PATCH 5/9] Improving plot of hyperparameter optimization.

---
 examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl |  4 +--
 examples/Opt-ACE-aHfO2/utils.jl             | 34 ++++++++++++++++++++-
 2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl b/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
index 88da3b5b..88b9c269 100644
--- a/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
+++ b/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
@@ -30,7 +30,7 @@ n_train, n_test = 50, 50 # Only 50 samples per dataset are used in this example.
 conf_train, conf_test = split(ds, n_train, n_test)
 
 
-# ## c. Hyper-parameter optimization
+# ## c. Hyper-parameter optimization.
 
 # Define the model and hyper-parameter value ranges to be optimized.
 model = ACE
@@ -39,7 +39,7 @@ pars = OrderedDict( :body_order        => [2, 3, 4],
                     :rcutoff           => [4.5, 5.0, 5.5],
                     :wL                => [0.5, 1.0, 1.5],
                     :csp               => [0.5, 1.0, 1.5],
-                    :r0                => [0.5, 1.0, 1.5])
+                    :r0                => [0.5, 1.0, 1.5]);
 # Use random sampling to find the optimal hyper-parameters.
 iap, res = hyperlearn!(model, pars, conf_train; 
                        n_samples = 10, sampler = RandomSampler(),
diff --git a/examples/Opt-ACE-aHfO2/utils.jl b/examples/Opt-ACE-aHfO2/utils.jl
index 6012d56b..cc2d2236 100644
--- a/examples/Opt-ACE-aHfO2/utils.jl
+++ b/examples/Opt-ACE-aHfO2/utils.jl
@@ -34,7 +34,39 @@ function plot_err_time(res)
             error,
             label = "",
             xaxis = "Time per force per atom | µs",
-            yaxis = "we MSE(E, E') + wf MSE(F, F')")
+            yaxis = "we MAE(E, E') + wf MAE(F, F')")
+end
+
+function plot_err_time(res)
+    e_mae = res[!, :e_mae]
+    f_mae = res[!, :f_mae]
+    times = res[!, :time_us]
+    plot(times,
+         e_mae,
+         seriestype = :scatter,
+         alpha = 0.55,
+         thickness_scaling = 1.35,
+         markersize = 3,
+         markerstrokewidth = 1,
+         markerstrokecolor = :black,
+         markershape = :circle,
+         markercolor = :gray,
+         label = "MAE(E_Pred, E_DFT)")
+    plot!(times,
+          f_mae,
+          seriestype = :scatter,
+          alpha = 0.55,
+          thickness_scaling = 1.35,
+          markersize = 3,
+          markerstrokewidth = 1,
+          markerstrokecolor = :red,
+          markershape = :utriangle,
+          markercolor = :red2,
+          label = "MAE(F_Pred, F_DFT)")
+    plot!(dpi = 300,
+          label = "",
+          xlabel = "Time per force per atom | µs",
+          ylabel = "MAE")
 end
 
 

From b85d7841941aa8a696089355c3941c834f6ee721 Mon Sep 17 00:00:00 2001
From: Emmanuel Lujan <lujan.emmanuel@gmail.com>
Date: Thu, 27 Jun 2024 17:00:36 -0400
Subject: [PATCH 6/9] Improvements in loss function in hyperparamter
 optiimization.

---
 examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl | 25 ++++++++++++++++++---
 examples/Opt-ACE-aHfO2/utils.jl             | 23 +------------------
 2 files changed, 23 insertions(+), 25 deletions(-)

diff --git a/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl b/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
index 88b9c269..34aa10e6 100644
--- a/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
+++ b/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
@@ -32,7 +32,23 @@ conf_train, conf_test = split(ds, n_train, n_test)
 
 # ## c. Hyper-parameter optimization.
 
-# Define the model and hyper-parameter value ranges to be optimized.
+# Define loss function. Here, we minimize error and time.
+function loss(metrics)
+    e_mae_max = 0.05
+    f_mae_max = 0.05
+    err       = metrics[:error] # weighted error: w_e * e_mae + w_f * f_mae
+    e_mae     = metrics[:e_mae]
+    f_mae     = metrics[:f_mae]
+    time_us   = metrics[:time_us]
+    if e_mae < e_mae_max && f_mae < f_mae_max
+       loss = time_us
+    else
+       loss = time_us + err * 10^3
+    end
+    return loss
+end
+
+# Define model and hyper-parameter value ranges to be optimized.
 model = ACE
 pars = OrderedDict( :body_order        => [2, 3, 4],
                     :polynomial_degree => [3, 4, 5],
@@ -40,18 +56,20 @@ pars = OrderedDict( :body_order        => [2, 3, 4],
                     :wL                => [0.5, 1.0, 1.5],
                     :csp               => [0.5, 1.0, 1.5],
                     :r0                => [0.5, 1.0, 1.5]);
+
 # Use random sampling to find the optimal hyper-parameters.
 iap, res = hyperlearn!(model, pars, conf_train; 
                        n_samples = 10, sampler = RandomSampler(),
                        ws = [1.0, 1.0], int = true)
 
+# Save and show results.
 @save_var res_path iap.β
 @save_var res_path iap.β0
 @save_var res_path iap.basis
 @save_dataframe res_path res
 res
 
-# Plot error vs time
+# Plot error vs time.
 err_time = plot_err_time(res)
 @save_fig res_path err_time
 DisplayAs.PNG(err_time)
@@ -62,13 +80,14 @@ iap, res = hyperlearn!(model, pars, conf_train;
                        n_samples = 3, sampler = LHSampler(),
                        ws = [1.0, 1.0], int = true)
 
+# Save and show results.
 @save_var res_path iap.β
 @save_var res_path iap.β0
 @save_var res_path iap.basis
 @save_dataframe res_path res
 res
 
-# Plot error vs time
+# Plot error vs time.
 err_time = plot_err_time(res)
 @save_fig res_path err_time
 DisplayAs.PNG(err_time)
diff --git a/examples/Opt-ACE-aHfO2/utils.jl b/examples/Opt-ACE-aHfO2/utils.jl
index cc2d2236..112c52d7 100644
--- a/examples/Opt-ACE-aHfO2/utils.jl
+++ b/examples/Opt-ACE-aHfO2/utils.jl
@@ -27,16 +27,6 @@ function get_results(ho)
 end
 
 # Plot fitting error vs force time (Pareto front)
-function plot_err_time(res)
-    error      = res[!, :error] 
-    times      = res[!, :time_us]
-    scatter(times,
-            error,
-            label = "",
-            xaxis = "Time per force per atom | µs",
-            yaxis = "we MAE(E, E') + wf MAE(F, F')")
-end
-
 function plot_err_time(res)
     e_mae = res[!, :e_mae]
     f_mae = res[!, :f_mae]
@@ -70,17 +60,6 @@ function plot_err_time(res)
 end
 
 
-function loss(p)
-    err, e_mae, f_mae, time_us = p[1], p[2], p[3], p[4]
-    e_mae_max, f_mae_max = 0.05, 0.05
-    if e_mae < e_mae_max && f_mae < f_mae_max
-       loss = time_us
-    else
-       loss = time_us + err * 10^3
-    end
-    return loss
-end
-
 function get_species(confs)
     return unique(vcat(unique.(atomic_symbol.(get_system.(confs)))...))
 end
@@ -126,7 +105,7 @@ function hyperlearn!(model, pars, conf_train;
                                 :f_rsq     => f_rsq,
                                 :time_us   => time_us)
         ## Compute multi-objetive loss based on error and time
-        l = loss([err, e_mae, f_mae, time_us])
+        l = loss(metrics)
         ## Print results
         print("E_MAE:$(round(e_mae; digits=3)), ")
         print("F_MAE:$(round(f_mae; digits=3)), ")

From 1bdc6bef421bf403f71e44444b4460e4e0d95577 Mon Sep 17 00:00:00 2001
From: Emmanuel Lujan <lujan.emmanuel@gmail.com>
Date: Thu, 27 Jun 2024 17:09:09 -0400
Subject: [PATCH 7/9] Improvements in loss function and plots in hyperparameter
 optimization.

---
 examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl |  2 +-
 examples/Opt-ACE-aHfO2/utils.jl             | 37 +-----------------
 examples/utils/plots.jl                     | 43 +++++++++++++++++++++
 3 files changed, 46 insertions(+), 36 deletions(-)

diff --git a/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl b/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
index 34aa10e6..fca907df 100644
--- a/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
+++ b/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
@@ -60,7 +60,7 @@ pars = OrderedDict( :body_order        => [2, 3, 4],
 # Use random sampling to find the optimal hyper-parameters.
 iap, res = hyperlearn!(model, pars, conf_train; 
                        n_samples = 10, sampler = RandomSampler(),
-                       ws = [1.0, 1.0], int = true)
+                       loss = loss, ws = [1.0, 1.0], int = true)
 
 # Save and show results.
 @save_var res_path iap.β
diff --git a/examples/Opt-ACE-aHfO2/utils.jl b/examples/Opt-ACE-aHfO2/utils.jl
index 112c52d7..4672b8e4 100644
--- a/examples/Opt-ACE-aHfO2/utils.jl
+++ b/examples/Opt-ACE-aHfO2/utils.jl
@@ -26,39 +26,6 @@ function get_results(ho)
     return sort!(results)
 end
 
-# Plot fitting error vs force time (Pareto front)
-function plot_err_time(res)
-    e_mae = res[!, :e_mae]
-    f_mae = res[!, :f_mae]
-    times = res[!, :time_us]
-    plot(times,
-         e_mae,
-         seriestype = :scatter,
-         alpha = 0.55,
-         thickness_scaling = 1.35,
-         markersize = 3,
-         markerstrokewidth = 1,
-         markerstrokecolor = :black,
-         markershape = :circle,
-         markercolor = :gray,
-         label = "MAE(E_Pred, E_DFT)")
-    plot!(times,
-          f_mae,
-          seriestype = :scatter,
-          alpha = 0.55,
-          thickness_scaling = 1.35,
-          markersize = 3,
-          markerstrokewidth = 1,
-          markerstrokecolor = :red,
-          markershape = :utriangle,
-          markercolor = :red2,
-          label = "MAE(F_Pred, F_DFT)")
-    plot!(dpi = 300,
-          label = "",
-          xlabel = "Time per force per atom | µs",
-          ylabel = "MAE")
-end
-
 
 function get_species(confs)
     return unique(vcat(unique.(atomic_symbol.(get_system.(confs)))...))
@@ -68,8 +35,8 @@ create_ho(x) = Hyperoptimizer(1)
 
 # hyperlearn!
 function hyperlearn!(model, pars, conf_train;
-                     n_samples = 5, sampler = RandomSampler(), loss = loss,
-                     ws = [1.0, 1.0], int = true)
+                     n_samples = 5, sampler = RandomSampler(),
+                     loss = loss, ws = [1.0, 1.0], int = true)
 
     s = "create_ho(sampler) = Hyperoptimizer($n_samples, sampler, " *
          join("$k = $v, " for (k, v) in pars) * ")"
diff --git a/examples/utils/plots.jl b/examples/utils/plots.jl
index 356c3156..e29efe31 100644
--- a/examples/utils/plots.jl
+++ b/examples/utils/plots.jl
@@ -238,3 +238,46 @@ function plot_cos(
     return p
 end
 
+
+"""
+    plot_err_time(
+        res
+    )
+
+`res`: Dataframe with fitting error and force time information.
+
+Returns a plot with fitting errors vs force times. Required in hyper-parameter optimization.
+
+"""
+function plot_err_time(res)
+    e_mae = res[!, :e_mae]
+    f_mae = res[!, :f_mae]
+    times = res[!, :time_us]
+    plot(times,
+         e_mae,
+         seriestype = :scatter,
+         alpha = 0.55,
+         thickness_scaling = 1.35,
+         markersize = 3,
+         markerstrokewidth = 1,
+         markerstrokecolor = :black,
+         markershape = :circle,
+         markercolor = :gray,
+         label = "MAE(E_Pred, E_DFT) | eV/atom")
+    plot!(times,
+          f_mae,
+          seriestype = :scatter,
+          alpha = 0.55,
+          thickness_scaling = 1.35,
+          markersize = 3,
+          markerstrokewidth = 1,
+          markerstrokecolor = :red,
+          markershape = :utriangle,
+          markercolor = :red2,
+          label = "MAE(F_Pred, F_DFT) | eV/Å")
+    plot!(dpi = 300,
+          label = "",
+          xlabel = "Time per force per atom | µs",
+          ylabel = "MAE")
+end
+

From b99bb1970b6ba41f4ff61e0745ebb7fe562d9fea Mon Sep 17 00:00:00 2001
From: Emmanuel Lujan <lujan.emmanuel@gmail.com>
Date: Fri, 28 Jun 2024 15:52:22 -0400
Subject: [PATCH 8/9] Integrating HyperLearning to main source code.

---
 Project.toml                                |   4 +-
 examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl |  28 ++---
 src/Data/data.jl                            |   1 +
 src/Data/utils.jl                           |  14 +++
 src/HyperLearning/hyperlearning.jl          |   4 +
 src/HyperLearning/linear-hyperlearn.jl      | 112 ++++++++++++++++++++
 src/HyperLearning/utils.jl                  |  28 +++++
 src/PotentialLearning.jl                    |   5 +
 src/interface.jl                            |  24 -----
 9 files changed, 181 insertions(+), 39 deletions(-)
 create mode 100644 src/HyperLearning/hyperlearning.jl
 create mode 100644 src/HyperLearning/linear-hyperlearn.jl
 create mode 100644 src/HyperLearning/utils.jl
 delete mode 100644 src/interface.jl

diff --git a/Project.toml b/Project.toml
index c2fe1a8e..dc074572 100644
--- a/Project.toml
+++ b/Project.toml
@@ -6,9 +6,11 @@ version = "0.2.3"
 [deps]
 AtomsBase = "a963bdd2-2df7-4f54-a1ee-49d51e6be12a"
 Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5"
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 Determinantal = "2673d5e8-682c-11e9-2dfd-471b09c6c819"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
+Hyperopt = "93e5fe13-2215-51db-baaf-2e9a34fb2712"
 InteratomicPotentials = "a9efe35a-c65d-452d-b8a8-82646cd5cb04"
 IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
@@ -26,5 +28,5 @@ YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [compat]
-julia = ">= 1.10"
 InteratomicPotentials = ">= 0.2.9"
+julia = ">= 1.10"
diff --git a/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl b/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
index fca907df..8c3fd7f8 100644
--- a/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
+++ b/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
@@ -15,7 +15,6 @@ res_path = "$path/results/";
 
 # Load utility functions.
 include("$path/../utils/utils.jl")
-include("$path/utils.jl")
 
 # Create experiment folder.
 run(`mkdir -p $res_path`);
@@ -32,21 +31,23 @@ conf_train, conf_test = split(ds, n_train, n_test)
 
 # ## c. Hyper-parameter optimization.
 
-# Define loss function. Here, we minimize error and time.
-function loss(metrics)
-    e_mae_max = 0.05
+# Define a custom loss function. Here, we minimize fitting error and force calculation time.
+function hyperloss(
+    metrics::OrderedDict;
+    w_e       = 1.0,
+    w_f       = 1.0,
+    w_t       = 1.0E-3,
+    e_mae_max = 0.05,
     f_mae_max = 0.05
-    err       = metrics[:error] # weighted error: w_e * e_mae + w_f * f_mae
+)
     e_mae     = metrics[:e_mae]
     f_mae     = metrics[:f_mae]
     time_us   = metrics[:time_us]
-    if e_mae < e_mae_max && f_mae < f_mae_max
-       loss = time_us
-    else
-       loss = time_us + err * 10^3
-    end
+    w_e = w_e * e_mae/e_mae_max
+    w_f = w_f * f_mae/f_mae_max
+    loss = w_e * e_mae + w_f * e_mae + w_t * time_us
     return loss
-end
+end;
 
 # Define model and hyper-parameter value ranges to be optimized.
 model = ACE
@@ -60,7 +61,7 @@ pars = OrderedDict( :body_order        => [2, 3, 4],
 # Use random sampling to find the optimal hyper-parameters.
 iap, res = hyperlearn!(model, pars, conf_train; 
                        n_samples = 10, sampler = RandomSampler(),
-                       loss = loss, ws = [1.0, 1.0], int = true)
+                       loss = hyperloss, ws = [1.0, 1.0], int = true);
 
 # Save and show results.
 @save_var res_path iap.β
@@ -78,7 +79,7 @@ DisplayAs.PNG(err_time)
 # Alternatively, use latin hypercube sampling to find the optimal hyper-parameters.
 iap, res = hyperlearn!(model, pars, conf_train; 
                        n_samples = 3, sampler = LHSampler(),
-                       ws = [1.0, 1.0], int = true)
+                       loss = hyperloss, ws = [1.0, 1.0], int = true);
 
 # Save and show results.
 @save_var res_path iap.β
@@ -92,4 +93,3 @@ err_time = plot_err_time(res)
 @save_fig res_path err_time
 DisplayAs.PNG(err_time)
 
-
diff --git a/src/Data/data.jl b/src/Data/data.jl
index d8a6e552..f8f1b18f 100644
--- a/src/Data/data.jl
+++ b/src/Data/data.jl
@@ -21,6 +21,7 @@ export  ConfigurationDataSet,
         get_system,
         get_positions,
         get_energy,
+        get_species,
         get_descriptors,
         get_forces,
         get_force_descriptors,
diff --git a/src/Data/utils.jl b/src/Data/utils.jl
index f626b841..57ebff0a 100644
--- a/src/Data/utils.jl
+++ b/src/Data/utils.jl
@@ -200,3 +200,17 @@ function compute_force_descriptors(
     return f_des
 end
 
+# Get species from a dataset ###################################################
+"""
+function get_species(
+    ds::DataSet
+)
+
+Get species from a dataset.
+"""
+function get_species(
+    ds::DataSet
+)
+    return unique(vcat(unique.(atomic_symbol.(get_system.(ds)))...))
+end
+
diff --git a/src/HyperLearning/hyperlearning.jl b/src/HyperLearning/hyperlearning.jl
new file mode 100644
index 00000000..ce8679b5
--- /dev/null
+++ b/src/HyperLearning/hyperlearning.jl
@@ -0,0 +1,4 @@
+export hyperlearn!
+
+include("linear-hyperlearn.jl")
+include("utils.jl")
diff --git a/src/HyperLearning/linear-hyperlearn.jl b/src/HyperLearning/linear-hyperlearn.jl
new file mode 100644
index 00000000..5230c16a
--- /dev/null
+++ b/src/HyperLearning/linear-hyperlearn.jl
@@ -0,0 +1,112 @@
+create_ho(x) = Hyperoptimizer(1)
+
+"""
+function hyperloss(
+    metrics::OrderedDict:
+    w_e       = 1.0,
+    w_f       = 1.0,
+    w_t       = 1.0E-3,
+    e_mae_max = 0.05,
+    f_mae_max = 0.05
+)
+
+`metrics`: OrderedDict object with metrics of the fitting process. 
+    - Mean absolute error of energies: e_mae.
+    - Mean absolute error of forces: f_mae.
+    - Time per force per atom: time_us.
+`w_e`: energy weight.
+`w_f`: force weight.
+`w_t`: time weight.
+`e_mae_max`: maximum mean absolute error for energies.
+`f_mae_max`: maximum mean absolute error for forces.
+
+Loss function for hyper-parameter optimization: minimizes fitting error and time.
+"""
+function hyperloss(
+    metrics::OrderedDict;
+    w_e       = 1.0,
+    w_f       = 1.0,
+    w_t       = 1.0E-3,
+    e_mae_max = 0.05,
+    f_mae_max = 0.05
+)
+    e_mae     = metrics[:e_mae]
+    f_mae     = metrics[:f_mae]
+    time_us   = metrics[:time_us]
+    w_e = w_e * e_mae/e_mae_max
+    w_f = w_f * f_mae/f_mae_max
+    loss = w_e * e_mae + w_f * e_mae + w_t * time_us
+    return loss
+end;
+
+"""
+function hyperlearn!(
+    model::DataType,
+    pars::OrderedDict,
+    conf_train::DataSet;
+    n_samples = 5,
+    sampler = RandomSampler(),
+    loss = loss,
+    ws = [1.0, 1.0],
+    int = true
+)
+
+Hyper-parameter optimization of linear interatomic potentials.
+"""
+function hyperlearn!(
+    model::DataType,
+    pars::OrderedDict,
+    conf_train::DataSet;
+    n_samples = 5,
+    sampler = RandomSampler(),
+    loss = loss,
+    ws = [1.0, 1.0],
+    int = true
+)
+    s = "create_ho(sampler) = Hyperoptimizer($n_samples, sampler, " *
+         join("$k = $v, " for (k, v) in pars) * ")"
+    eval(Meta.parse(s))
+    ho = Base.invokelatest(create_ho, sampler)
+    if (ho.sampler isa LHSampler) || (ho.sampler isa CLHSampler)
+        Hyperopt.init!(ho.sampler, ho)
+    end
+    species = get_species(conf_train)
+    for (i, state...) in ho
+        basis = model(; species = species, state...)
+        iap = LBasisPotential(basis)
+        ## Compute energy and force descriptors
+        e_descr_new = compute_local_descriptors(conf_train, iap.basis, pbar = false)
+        f_descr_new = compute_force_descriptors(conf_train, iap.basis, pbar = false)
+        ds_cur = DataSet(conf_train .+ e_descr_new .+ f_descr_new)
+        ## Learn
+        learn!(iap, ds_cur, ws, int)
+        ## Get true and predicted values
+        e, e_pred = get_all_energies(ds_cur), get_all_energies(ds_cur, iap)
+        f, f_pred = get_all_forces(ds_cur), get_all_forces(ds_cur, iap)
+        ## Compute metrics
+        e_mae, e_rmse, e_rsq = calc_metrics(e_pred, e)
+        f_mae, f_rmse, f_rsq = calc_metrics(f_pred, f)
+        time_us  = estimate_time(conf_train, iap) * 10^6
+        metrics  = OrderedDict( :e_mae     => e_mae,
+                                :e_rmse    => e_rmse,
+                                :e_rsq     => e_rsq,
+                                :f_mae     => f_mae,
+                                :f_rmse    => f_rmse,
+                                :f_rsq     => f_rsq,
+                                :time_us   => time_us)
+        ## Compute multi-objetive loss based on error and time
+        l = hyperloss(metrics)
+        ## Print results
+        print("E_MAE:$(round(e_mae; digits=3)) eV/atom, ")
+        print("F_MAE:$(round(f_mae; digits=3)) eV/Å, ")
+        println("Time per force per atom:$(round(time_us; digits=3)) µs")
+        flush(stdout)
+        ## Return loss
+        push!(ho.history, [v for v in state])
+        push!(ho.results, (l, metrics, iap))
+    end
+    iap = ho.minimum[3]
+    res = get_results(ho)
+    return iap, res
+end 
+
diff --git a/src/HyperLearning/utils.jl b/src/HyperLearning/utils.jl
new file mode 100644
index 00000000..c29cce7d
--- /dev/null
+++ b/src/HyperLearning/utils.jl
@@ -0,0 +1,28 @@
+
+# Estimate force calculation time
+function estimate_time(confs, iap; batch_size = 50)
+    if length(confs) < batch_size
+        batch_size = length(confs)
+    end
+    random_selector = RandomSelector(length(confs), batch_size)
+    inds = PotentialLearning.get_random_subset(random_selector)
+    time = @elapsed begin
+        f_descr = compute_force_descriptors(confs[inds],
+                                            iap.basis,
+                                            pbar = false)
+        ds = DataSet(confs[inds] .+ f_descr)
+        f_pred = get_all_forces(ds, iap)
+    end
+    n_atoms = sum(length(get_system(c)) for c in confs[inds])
+    return time / n_atoms
+end
+
+# Get results from the hyperoptimizer
+function get_results(ho)
+    column_names = string.(vcat(keys(ho.results[1][2])..., ho.params...))
+    rows = [[values(r[2])..., p...] for (r, p) in zip(ho.results, ho.history)]
+    results = DataFrame([Any[] for _ in 1:length(column_names)], column_names)
+    [push!(results, r) for r in rows]
+    return sort!(results)
+end
+
diff --git a/src/PotentialLearning.jl b/src/PotentialLearning.jl
index 3c2fd79e..d332190e 100644
--- a/src/PotentialLearning.jl
+++ b/src/PotentialLearning.jl
@@ -6,6 +6,8 @@ using Unitful, UnitfulAtomic
 using LinearAlgebra, Statistics, Random, Distributions
 using StaticArrays
 using OrderedCollections
+using DataFrames
+using Hyperopt
 using Flux
 using Zygote
 using Optimization
@@ -33,6 +35,9 @@ include("DimensionReduction/dimension_reduction.jl")
 # Learning problems 
 include("Learning/learning.jl")
 
+# Learning problems 
+include("HyperLearning/hyperlearning.jl")
+
 # Metrics 
 include("Metrics/metrics.jl") 
 
diff --git a/src/interface.jl b/src/interface.jl
deleted file mode 100644
index f0c37a89..00000000
--- a/src/interface.jl
+++ /dev/null
@@ -1,24 +0,0 @@
-################################################################################
-#
-#    Interface.jl
-#
-################################################################################
-
-using AtomsBase
-using InteratomicPotentials
-using OrderedCollections
-using IterTools
-using LinearAlgebra
-using StaticArrays
-using Statistics
-using Optimization
-using OptimizationOptimisers
-using UnitfulAtomic
-using Unitful
-using Flux
-using Flux.Data: DataLoader
-using Random
-using CSV
-using Plots
-
-# TODO

From 1086cd11f94f8b299ce976fa236820a0015de1f0 Mon Sep 17 00:00:00 2001
From: Emmanuel Lujan <lujan.emmanuel@gmail.com>
Date: Fri, 28 Jun 2024 17:01:24 -0400
Subject: [PATCH 9/9] Improvements in hyperoptimization example and remove
 unnecesary file.

---
 examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl | 21 +++--
 examples/Opt-ACE-aHfO2/utils.jl             | 89 ---------------------
 2 files changed, 10 insertions(+), 100 deletions(-)
 delete mode 100644 examples/Opt-ACE-aHfO2/utils.jl

diff --git a/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl b/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
index 8c3fd7f8..92cd1263 100644
--- a/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
+++ b/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
@@ -32,19 +32,18 @@ conf_train, conf_test = split(ds, n_train, n_test)
 # ## c. Hyper-parameter optimization.
 
 # Define a custom loss function. Here, we minimize fitting error and force calculation time.
-function hyperloss(
-    metrics::OrderedDict;
-    w_e       = 1.0,
-    w_f       = 1.0,
-    w_t       = 1.0E-3,
-    e_mae_max = 0.05,
-    f_mae_max = 0.05
+# Possible metrics are `e_mae`, `e_rmse`, `e_rsq`, `f_mae`, `f_rmse`, `f_rsq`, and `time_us`.
+function custom_loss(
+    metrics::OrderedDict
 )
     e_mae     = metrics[:e_mae]
     f_mae     = metrics[:f_mae]
     time_us   = metrics[:time_us]
-    w_e = w_e * e_mae/e_mae_max
-    w_f = w_f * f_mae/f_mae_max
+    e_mae_max = 0.05 # eV/atom
+    f_mae_max = 0.05 # eV/Å
+    w_e       = e_mae/e_mae_max
+    w_f       = f_mae/f_mae_max
+    w_t       = 1.0E-3
     loss = w_e * e_mae + w_f * e_mae + w_t * time_us
     return loss
 end;
@@ -61,7 +60,7 @@ pars = OrderedDict( :body_order        => [2, 3, 4],
 # Use random sampling to find the optimal hyper-parameters.
 iap, res = hyperlearn!(model, pars, conf_train; 
                        n_samples = 10, sampler = RandomSampler(),
-                       loss = hyperloss, ws = [1.0, 1.0], int = true);
+                       loss = custom_loss, ws = [1.0, 1.0], int = true);
 
 # Save and show results.
 @save_var res_path iap.β
@@ -79,7 +78,7 @@ DisplayAs.PNG(err_time)
 # Alternatively, use latin hypercube sampling to find the optimal hyper-parameters.
 iap, res = hyperlearn!(model, pars, conf_train; 
                        n_samples = 3, sampler = LHSampler(),
-                       loss = hyperloss, ws = [1.0, 1.0], int = true);
+                       loss = custom_loss, ws = [1.0, 1.0], int = true);
 
 # Save and show results.
 @save_var res_path iap.β
diff --git a/examples/Opt-ACE-aHfO2/utils.jl b/examples/Opt-ACE-aHfO2/utils.jl
deleted file mode 100644
index 4672b8e4..00000000
--- a/examples/Opt-ACE-aHfO2/utils.jl
+++ /dev/null
@@ -1,89 +0,0 @@
-
-# Estimate force calculation time
-function estimate_time(confs, iap; batch_size = 50)
-    if length(confs) < batch_size
-        batch_size = length(confs)
-    end
-    random_selector = RandomSelector(length(confs), batch_size)
-    inds = PotentialLearning.get_random_subset(random_selector)
-    time = @elapsed begin
-        f_descr = compute_force_descriptors(confs[inds],
-                                            iap.basis,
-                                            pbar = false)
-        ds = DataSet(confs[inds] .+ f_descr)
-        f_pred = get_all_forces(ds, iap)
-    end
-    n_atoms = sum(length(get_system(c)) for c in confs[inds])
-    return time / n_atoms
-end
-
-# Get results from the hyperoptimizer
-function get_results(ho)
-    column_names = string.(vcat(keys(ho.results[1][2])..., ho.params...))
-    rows = [[values(r[2])..., p...] for (r, p) in zip(ho.results, ho.history)]
-    results = DataFrame([Any[] for _ in 1:length(column_names)], column_names)
-    [push!(results, r) for r in rows]
-    return sort!(results)
-end
-
-
-function get_species(confs)
-    return unique(vcat(unique.(atomic_symbol.(get_system.(confs)))...))
-end
-
-create_ho(x) = Hyperoptimizer(1)
-
-# hyperlearn!
-function hyperlearn!(model, pars, conf_train;
-                     n_samples = 5, sampler = RandomSampler(),
-                     loss = loss, ws = [1.0, 1.0], int = true)
-
-    s = "create_ho(sampler) = Hyperoptimizer($n_samples, sampler, " *
-         join("$k = $v, " for (k, v) in pars) * ")"
-    eval(Meta.parse(s))
-    ho = Base.invokelatest(create_ho, sampler)
-    if (ho.sampler isa LHSampler) || (ho.sampler isa CLHSampler)
-        Hyperopt.init!(ho.sampler, ho)
-    end
-    species = get_species(conf_train)
-    for (i, state...) in ho
-        basis = model(; species = species, state...)
-        iap = LBasisPotential(basis)
-        ## Compute energy and force descriptors
-        e_descr_new = compute_local_descriptors(conf_train, iap.basis, pbar = false)
-        f_descr_new = compute_force_descriptors(conf_train, iap.basis, pbar = false)
-        ds_cur = DataSet(conf_train .+ e_descr_new .+ f_descr_new)
-        ## Learn
-        learn!(iap, ds_cur, ws, int)
-        ## Get true and predicted values
-        e, e_pred = get_all_energies(ds_cur), get_all_energies(ds_cur, iap)
-        f, f_pred = get_all_forces(ds_cur), get_all_forces(ds_cur, iap)
-        ## Compute metrics
-        e_mae, e_rmse, e_rsq = calc_metrics(e_pred, e)
-        f_mae, f_rmse, f_rsq = calc_metrics(f_pred, f)
-        time_us  = estimate_time(conf_train, iap) * 10^6
-        err = ws[1] * e_rmse^2 + ws[2] * f_rmse^2
-        metrics  = OrderedDict( :error     => err,
-                                :e_mae     => e_mae,
-                                :e_rmse    => e_rmse,
-                                :e_rsq     => e_rsq,
-                                :f_mae     => f_mae,
-                                :f_rmse    => f_rmse,
-                                :f_rsq     => f_rsq,
-                                :time_us   => time_us)
-        ## Compute multi-objetive loss based on error and time
-        l = loss(metrics)
-        ## Print results
-        print("E_MAE:$(round(e_mae; digits=3)), ")
-        print("F_MAE:$(round(f_mae; digits=3)), ")
-        println("Time per force per atom | µs:$(round(time_us; digits=3))")
-        flush(stdout)
-        ## Return loss
-        push!(ho.history, [v for v in state])
-        push!(ho.results, (l, metrics, iap))
-    end
-    iap = ho.minimum[3]
-    res = get_results(ho)
-    return iap, res
-end
-