From 407bdcfe7c1971e29a90cf2c7d86a52ea4a0b18e Mon Sep 17 00:00:00 2001
From: Emmanuel Lujan <lujan.emmanuel@gmail.com>
Date: Tue, 18 Jun 2024 10:22:03 -0400
Subject: [PATCH 1/4] New documentation sections.

---
 docs/make.jl | 48 +++++++++++++++++++++++++++++++++---------------
 1 file changed, 33 insertions(+), 15 deletions(-)

diff --git a/docs/make.jl b/docs/make.jl
index e0f1c4a2..22685c10 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -12,35 +12,50 @@ DocMeta.setdocmeta!(
     recursive = true,
 )
 
+# Citations ####################################################################
 
 bib = CitationBibliography(joinpath(@__DIR__, "citation.bib"))
 
-# Generate examples
+# Generate examples ############################################################
 
 const EXAMPLES_DIR = joinpath(@__DIR__, "..", "examples")
 const OUTPUT_DIR   = joinpath(@__DIR__, "src/generated")
 
+function create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)
+    for (_, example_path) in examples
+        s = split(example_path, "/")
+        sub_path, file_name = string(s[1:end-1]...), s[end]
+        example_filepath = joinpath(EXAMPLES_DIR, example_path)
+        Literate.markdown(example_filepath,
+                          joinpath(OUTPUT_DIR, sub_path),
+                          documenter = true)
+    end
+    examples = [title => joinpath("generated", replace(example_path, ".jl" => ".md"))
+                for (title, example_path) in examples]
+end
+
+# Basic examples
 examples = [
     "Fit a-HfO2 dataset with ACE" => "ACE-aHfO2/fit-ace-aHfO2.jl",
+    "Load Ar+Lennard-Jones dataset and postprocess" => "LJ-Ar/lennard-jones-ar.jl"
+]
+basic_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)
+
+# Subsampling examples
+examples = [
     "Subsample a-HfO2 dataset with DPP and fit with ACE" => "DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl",
     "Subsample Na dataset with DPP and fit with ACE" => "DPP-ACE-Na/fit-dpp-ace-na.jl",
     "Subsample Si dataset with DPP, fit with ACE, and cross validate" => "DPP-ACE-Si/fit-dpp-ace-si.jl",
-    "Reduce ACE descriptors with PCA and fit a-HfO2 dataset" => "PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl",
-    "Load Ar+Lennard-Jones dataset and postprocess" => "LJ-Ar/lennard-jones-ar.jl"
 ]
+ss_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)
 
-for (_, example_path) in examples
-    s = split(example_path, "/")
-    sub_path, file_name = string(s[1:end-1]...), s[end]
-    example_filepath = joinpath(EXAMPLES_DIR, example_path)
-    Literate.markdown(example_filepath,
-                      joinpath(OUTPUT_DIR, sub_path),
-                      documenter = true)
-end
-
-examples = [title => joinpath("generated", replace(example_path, ".jl" => ".md"))
-            for (title, example_path) in examples]
+# Dimension reduction examples
+examples = [
+    "Reduce ACE descriptors with PCA and fit a-HfO2 dataset" => "PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl",
+]
+dr_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)
 
+# Make and deploy docs #########################################################
 
 makedocs(
       root    =  joinpath(dirname(pathof(PotentialLearning)), "..", "docs"),
@@ -56,7 +71,9 @@ makedocs(
       draft = false,
       pages = ["Home" => "index.md",
                "How to run the examples" => "how-to-run-the-examples.md",
-               "Examples" => examples,
+               "Basic examples" => basic_examples,
+               "Subsampling examples" => ss_examples,
+               "Dimension reduction examples" => dr_examples,
                "API" => "api.md"],
       format = Documenter.HTML(;
         prettyurls = get(ENV, "CI", "false") == "true",
@@ -71,3 +88,4 @@ deploydocs(;
     devbranch = "main",
     push_preview = true,
 )
+

From 560f4edaa7c9f65b3e5fcc20b72b0be10eaefb28 Mon Sep 17 00:00:00 2001
From: Emmanuel Lujan <lujan.emmanuel@gmail.com>
Date: Tue, 18 Jun 2024 12:12:57 -0400
Subject: [PATCH 2/4] Small improvements in examples and documentation.

---
 docs/make.jl                                  |  5 ++--
 docs/src/how-to-run-the-examples.md           |  4 +--
 .../{fit-ace-aHfO2.jl => fit-ace-ahfo2.jl}    | 30 +++++++++----------
 examples/DPP-ACE-Na/fit-dpp-ace-na.jl         |  4 +--
 examples/DPP-ACE-Si/fit-dpp-ace-si.jl         | 14 ++++-----
 examples/DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl | 24 +++++++--------
 examples/LJ-Ar/lennard-jones-ar.jl            | 16 +++++-----
 examples/PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl   | 20 ++++++-------
 examples/README.md                            | 19 +-----------
 9 files changed, 60 insertions(+), 76 deletions(-)
 rename examples/ACE-aHfO2/{fit-ace-aHfO2.jl => fit-ace-ahfo2.jl} (89%)

diff --git a/docs/make.jl b/docs/make.jl
index 22685c10..441d1f44 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -1,4 +1,4 @@
-pushfirst!(LOAD_PATH, joinpath(@__DIR__, "..")) # add PotentialLearning to environment stack
+#pushfirst!(LOAD_PATH, joinpath(@__DIR__, "..")) # add PotentialLearning to environment stack
 
 using PotentialLearning
 using Documenter
@@ -32,11 +32,12 @@ function create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)
     end
     examples = [title => joinpath("generated", replace(example_path, ".jl" => ".md"))
                 for (title, example_path) in examples]
+    return examples
 end
 
 # Basic examples
 examples = [
-    "Fit a-HfO2 dataset with ACE" => "ACE-aHfO2/fit-ace-aHfO2.jl",
+    "Fit a-HfO2 dataset with ACE" => "ACE-aHfO2/fit-ace-ahfo2.jl",
     "Load Ar+Lennard-Jones dataset and postprocess" => "LJ-Ar/lennard-jones-ar.jl"
 ]
 basic_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)
diff --git a/docs/src/how-to-run-the-examples.md b/docs/src/how-to-run-the-examples.md
index a063835a..09c3dbfa 100644
--- a/docs/src/how-to-run-the-examples.md
+++ b/docs/src/how-to-run-the-examples.md
@@ -16,7 +16,7 @@ Clone `PotentialLearning.jl` repository in your working directory.
 ```
 Access to any folder within `PotentialLearning.jl/examples`. E.g.
 ```shell
-    $ cd PotentialLearning.jl/examples/Na
+    $ cd PotentialLearning.jl/examples/DPP-ACE-aHfO2-1
 ```
 
 ## Run example
@@ -30,6 +30,6 @@ Type `]` to enter the Pkg REPL and instantiate.
 ```
 Finally, include the example file.
 ```julia
-    julia> include("fit-dpp-ace-na.jl")
+    julia> include("fit-dpp-ace-ahfo2.jl")
 ```
 
diff --git a/examples/ACE-aHfO2/fit-ace-aHfO2.jl b/examples/ACE-aHfO2/fit-ace-ahfo2.jl
similarity index 89%
rename from examples/ACE-aHfO2/fit-ace-aHfO2.jl
rename to examples/ACE-aHfO2/fit-ace-ahfo2.jl
index 08528e95..9fa09b38 100644
--- a/examples/ACE-aHfO2/fit-ace-aHfO2.jl
+++ b/examples/ACE-aHfO2/fit-ace-ahfo2.jl
@@ -2,7 +2,7 @@
 
 # ## Load packages, define paths, and create experiment folder.
 
-# Load packages
+# Load packages.
 using AtomsBase, InteratomicPotentials, PotentialLearning
 using Unitful, UnitfulAtomic
 using LinearAlgebra, Random, DisplayAs
@@ -10,22 +10,22 @@ using LinearAlgebra, Random, DisplayAs
 # Define paths.
 path = joinpath(dirname(pathof(PotentialLearning)), "../examples/ACE-aHfO2")
 ds_path =  "$path/../data/a-HfO2/a-HfO2-300K-NVT-6000.extxyz"
-res_path = "$path/results/"
+res_path = "$path/results/";
 
 # Load utility functions.
 include("$path/../utils/utils.jl")
 
 # Create experiment folder.
-run(`mkdir -p $res_path`)
+run(`mkdir -p $res_path`);
 
 # ## Load atomistic dataset and split it into training and test.
 
 # Load atomistic dataset: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.)
-ds = load_data(ds_path, uparse("eV"), uparse("Å"))
+ds = load_data(ds_path, uparse("eV"), uparse("Å"))[1:1000] # Only the first 1K samples are used in this example.
 
 # Split atomistic dataset into training and test
-n_train, n_test = 50, 50 # only 50 samples per dataset are used in this example.
-conf_train, conf_test = split(ds[1:1000], n_train, n_test)
+n_train, n_test = 50, 50 # Only 50 samples per dataset are used in this example.
+conf_train, conf_test = split(ds, n_train, n_test)
 
 # ## Create ACE basis, compute descriptors and add them to the dataset.
 
@@ -37,7 +37,7 @@ basis = ACE(species           = [:Hf, :O],
             wL                = 1.0,
             csp               = 1.0,
             r0                = 1.0)
-@save_var res_path basis
+@save_var res_path basis;
 
 # Compute ACE descriptors for energy and forces based on the atomistic training configurations.
 println("Computing energy descriptors of training dataset...")
@@ -67,7 +67,7 @@ e_descr_test = compute_local_descriptors(conf_test, basis;
                                          pbar = false)
 println("Computing force descriptors of test dataset...")
 f_descr_test = compute_force_descriptors(conf_test, basis;
-                                         pbar = false)
+                                         pbar = false);
 
 # Update test dataset by adding energy and force descriptors.
 ds_test = DataSet(conf_test .+ e_descr_test .+ f_descr_test)
@@ -92,9 +92,9 @@ f_test, f_test_pred = get_all_forces(ds_test),
 @save_var res_path e_test
 @save_var res_path e_test_pred
 @save_var res_path f_test
-@save_var res_path f_test_pred
+@save_var res_path f_test_pred;
 
-# Compute training metrics
+# Compute training metrics.
 e_train_metrics = get_metrics(e_train, e_train_pred,
                               metrics = [mae, rmse, rsq],
                               label = "e_train")
@@ -105,7 +105,7 @@ train_metrics = merge(e_train_metrics, f_train_metrics)
 @save_dict res_path train_metrics
 train_metrics
 
-# Compute test metrics
+# Compute test metrics.
 e_test_metrics = get_metrics(e_test, e_test_pred,
                              metrics = [mae, rmse, rsq],
                              label = "e_test")
@@ -116,19 +116,19 @@ test_metrics = merge(e_test_metrics, f_test_metrics)
 @save_dict res_path test_metrics
 test_metrics
 
-# Plot and save energy results
+# Plot and save energy results.
 e_plot = plot_energy(e_train, e_train_pred,
                      e_test, e_test_pred)
 @save_fig res_path e_plot
 DisplayAs.PNG(e_plot)
 
-# Plot and save force results
+# Plot and save force results.
 f_plot = plot_forces(f_train, f_train_pred,
                      f_test, f_test_pred)
 @save_fig res_path f_plot
 DisplayAs.PNG(f_plot)
 
-# Plot and save training force cosine
+# Plot and save training force cosine.
 e_train_plot = plot_energy(e_train, e_train_pred)
 f_train_plot = plot_forces(f_train, f_train_pred)
 f_train_cos  = plot_cos(f_train, f_train_pred)
@@ -137,7 +137,7 @@ f_train_cos  = plot_cos(f_train, f_train_pred)
 @save_fig res_path f_train_cos
 DisplayAs.PNG(f_train_cos)
 
-# Plot and save test force cosine
+# Plot and save test force cosine.
 e_test_plot = plot_energy(e_test, e_test_pred)
 f_test_plot = plot_forces(f_test, f_test_pred)
 f_test_cos  = plot_cos(f_test, f_test_pred)
diff --git a/examples/DPP-ACE-Na/fit-dpp-ace-na.jl b/examples/DPP-ACE-Na/fit-dpp-ace-na.jl
index 0e7276e7..53f3bdb1 100644
--- a/examples/DPP-ACE-Na/fit-dpp-ace-na.jl
+++ b/examples/DPP-ACE-Na/fit-dpp-ace-na.jl
@@ -2,14 +2,14 @@
 
 # ## Load packages and define paths.
 
-# Load packages
+# Load packages.
 using Unitful, UnitfulAtomic
 using AtomsBase, InteratomicPotentials, PotentialLearning
 using LinearAlgebra, Plots
 
 # Define paths.
 path = joinpath(dirname(pathof(PotentialLearning)), "../examples/DPP-ACE-Na")
-ds_path = "$path/../data/Na/liquify_sodium.yaml"
+ds_path = "$path/../data/Na/liquify_sodium.yaml";
 
 # ## Load atomistic dataset and split it into training and test.
 
diff --git a/examples/DPP-ACE-Si/fit-dpp-ace-si.jl b/examples/DPP-ACE-Si/fit-dpp-ace-si.jl
index 19d16339..b82bf8fd 100644
--- a/examples/DPP-ACE-Si/fit-dpp-ace-si.jl
+++ b/examples/DPP-ACE-Si/fit-dpp-ace-si.jl
@@ -2,14 +2,14 @@
 
 # ## Load packages, define paths, and create experiment folder.
 
-# Load packages
+# Load packages.
 using LinearAlgebra, Random, InvertedIndices
 using Statistics, StatsBase, Distributions, Determinantal
 using Unitful, UnitfulAtomic
 using AtomsBase, InteratomicPotentials, PotentialLearning
 using CSV, JLD, DataFrames
 
-# Define atomic type information
+# Define atomic type information.
 elname, elspec = "Si", [:Si] 
 
 # Define paths.
@@ -20,7 +20,7 @@ outpath = "$path/output/$elname/"
 # Load utility functions.
 include("$path/subsampling_utils.jl")
 
-# ## Load atomistic datasets
+# ## Load atomistic datasets.
 
 # Load all atomistic datasets: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.)
 file_arr = readext(inpath, "xyz")
@@ -28,7 +28,7 @@ nfile = length(file_arr)
 confs_arr = [load_data(inpath*file, ExtXYZ(u"eV", u"Å")) for file in file_arr]
 confs = concat_dataset(confs_arr)
 
-# Id of configurations per file
+# Id of configurations per file.
 n = 0
 confs_id = Vector{Vector{Int64}}(undef, nfile)
 for k = 1:nfile
@@ -37,9 +37,9 @@ for k = 1:nfile
     n += length(confs_arr[k])
 end
 
-# ## Subsampling by DPP
+# ## Subsampling by DPP.
 
-# Create ACE basis
+# Create ACE basis.
 nbody = 4
 deg = 5
 ace = ACE(species = elspec,             # species
@@ -61,7 +61,7 @@ JLD.save(outpath*"$(elname)_force_descriptors.jld", "f_descr", f_descr)
 ds = DataSet(confs .+ e_descr .+ f_descr)
 ndata = length(ds)
 
-# ## Compute cross validation error from training dataset
+# ## Compute cross validation error from training dataset.
 batch_size = [80, 40]
 sel_ind = Dict{Int64, Vector}()
 cond_num = Dict{Int64, Vector}()
diff --git a/examples/DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl b/examples/DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl
index 6b8b8665..b391e6c9 100644
--- a/examples/DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl
+++ b/examples/DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl
@@ -2,7 +2,7 @@
 
 # ## Load packages, define paths, and create experiment folder.
 
-# Load packages
+# Load packages.
 using AtomsBase, InteratomicPotentials, PotentialLearning
 using Unitful, UnitfulAtomic
 using LinearAlgebra, Random, DisplayAs
@@ -10,13 +10,13 @@ using LinearAlgebra, Random, DisplayAs
 # Define paths.
 path = joinpath(dirname(pathof(PotentialLearning)), "../examples/DPP-ACE-aHfO2-1")
 ds_path =  "$path/../data/a-HfO2/a-HfO2-300K-NVT-6000.extxyz"
-res_path = "$path/results/"
+res_path = "$path/results/";
 
 # Load utility functions.
 include("$path/../utils/utils.jl")
 
 # Create experiment folder.
-run(`mkdir -p $res_path`)
+run(`mkdir -p $res_path`);
 
 # ## Load atomistic dataset and split it into training and test.
 
@@ -66,7 +66,7 @@ basis = ACE(species           = [:Hf, :O],
             wL                = 1.0,
             csp               = 1.0,
             r0                = 1.0)
-@save_var res_path basis
+@save_var res_path basis;
 
 # Compute ACE descriptors for energy and forces based on the atomistic training configurations.
 println("Computing energy descriptors of training dataset...")
@@ -96,7 +96,7 @@ e_descr_test = compute_local_descriptors(conf_test, basis;
                                          pbar = false)
 println("Computing force descriptors of test dataset...")
 f_descr_test = compute_force_descriptors(conf_test, basis;
-                                         pbar = false)
+                                         pbar = false);
 
 # Update test dataset by adding energy and force descriptors.
 ds_test = DataSet(conf_test .+ e_descr_test .+ f_descr_test)
@@ -121,9 +121,9 @@ f_test, f_test_pred = get_all_forces(ds_test),
 @save_var res_path e_test
 @save_var res_path e_test_pred
 @save_var res_path f_test
-@save_var res_path f_test_pred
+@save_var res_path f_test_pred;
 
-# Compute training metrics
+# Compute training metrics.
 e_train_metrics = get_metrics(e_train, e_train_pred,
                               metrics = [mae, rmse, rsq],
                               label = "e_train")
@@ -134,7 +134,7 @@ train_metrics = merge(e_train_metrics, f_train_metrics)
 @save_dict res_path train_metrics
 train_metrics
 
-# Compute test metrics
+# Compute test metrics.
 e_test_metrics = get_metrics(e_test, e_test_pred,
                              metrics = [mae, rmse, rsq],
                              label = "e_test")
@@ -145,19 +145,19 @@ test_metrics = merge(e_test_metrics, f_test_metrics)
 @save_dict res_path test_metrics
 test_metrics
 
-# Plot and save energy results
+# Plot and save energy results.
 e_plot = plot_energy(e_train, e_train_pred,
                      e_test, e_test_pred)
 @save_fig res_path e_plot
 DisplayAs.PNG(e_plot)
 
-# Plot and save force results
+# Plot and save force results.
 f_plot = plot_forces(f_train, f_train_pred,
                      f_test, f_test_pred)
 @save_fig res_path f_plot
 DisplayAs.PNG(f_plot)
 
-# Plot and save training force cosine
+# Plot and save training force cosine.
 e_train_plot = plot_energy(e_train, e_train_pred)
 f_train_plot = plot_forces(f_train, f_train_pred)
 f_train_cos  = plot_cos(f_train, f_train_pred)
@@ -166,7 +166,7 @@ f_train_cos  = plot_cos(f_train, f_train_pred)
 @save_fig res_path f_train_cos
 DisplayAs.PNG(f_train_cos)
 
-# Plot and save test force cosine
+# Plot and save test force cosine.
 e_test_plot = plot_energy(e_test, e_test_pred)
 f_test_plot = plot_forces(f_test, f_test_pred)
 f_test_cos  = plot_cos(f_test, f_test_pred)
diff --git a/examples/LJ-Ar/lennard-jones-ar.jl b/examples/LJ-Ar/lennard-jones-ar.jl
index 7dbfd7d6..c0b509df 100644
--- a/examples/LJ-Ar/lennard-jones-ar.jl
+++ b/examples/LJ-Ar/lennard-jones-ar.jl
@@ -1,17 +1,17 @@
 # # Load Ar dataset with energies computed by Lennard-Jones and postprocess
 
-# ## Load packages and define paths
+# ## Load packages and define paths.
 
-# Load packages
+# Load packages.
 using Unitful, UnitfulAtomic
 using AtomsBase, InteratomicPotentials, PotentialLearning
 using LinearAlgebra, Plots, DisplayAs
 
 # Define paths.
 path = joinpath(dirname(pathof(PotentialLearning)), "../examples/LJ-Ar")
-ds_path = "$path/../data/LJ-AR/lj-ar.yaml"
+ds_path = "$path/../data/LJ-AR/lj-ar.yaml";
 
-# ## Load atomistic dataset
+# ## Load atomistic dataset.
 ds, thermo = load_data(ds_path, YAML(:Ar, u"eV", u"Å"))
 ds = @views ds[2:end] # Filter first configuration (zero energy)
 
@@ -23,15 +23,15 @@ n_atoms = length(first(systems)) # Note: in this dataset all systems contain the
 positions = position.(systems)
 dists_origin = map(x->ustrip.(norm.(x)), positions)
 
-# Extract LJ energies from dataset
+# Extract LJ energies from dataset.
 energies = get_values.(get_energy.(ds))
 
-# Define time range
+# Define time range.
 time_range = 0.5:0.5:5000
 
 # ## Post-process data.
 
-# Plot distance from origin vs time
+# Plot distance from origin vs time.
 p = plot(xlabel = "τ | ps",
          ylabel = "Distance from origin | Å", 
          dpi = 300, fontsize = 12)
@@ -40,7 +40,7 @@ for i = 1:n_atoms
 end
 DisplayAs.PNG(p)
 
-# Plot LJ energies vs time
+# Plot LJ energies vs time.
 p = plot(time_range, energies,
          xlabel = "τ | ps",
          ylabel = "Lennard Jones energy | eV",
diff --git a/examples/PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl b/examples/PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl
index 6c6254af..342f3a6d 100644
--- a/examples/PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl
+++ b/examples/PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl
@@ -2,7 +2,7 @@
 
 # ## Load packages, define paths, and create experiment folder.
 
-# Load packages
+# Load packages.
 using AtomsBase, InteratomicPotentials, PotentialLearning
 using Unitful, UnitfulAtomic
 using LinearAlgebra, Random, DisplayAs
@@ -10,26 +10,26 @@ using LinearAlgebra, Random, DisplayAs
 # Define paths.
 path = joinpath(dirname(pathof(PotentialLearning)), "../examples/PCA-ACE-aHfO2")
 ds_path =  "$path/../data/a-HfO2/a-HfO2-300K-NVT-6000.extxyz"
-res_path = "$path/results/"
+res_path = "$path/results/";
 
 # Load utility functions.
 include("$path/../utils/utils.jl")
 
 # Create experiment folder.
-run(`mkdir -p $res_path`)
+run(`mkdir -p $res_path`);
 
 # ## Load atomistic dataset and split it into training and test.
 
 # Load atomistic dataset: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.)
-ds = load_data(ds_path, uparse("eV"), uparse("Å"))
+ds = load_data(ds_path, uparse("eV"), uparse("Å"))[1:1000] # Only first 1K samples are used in this example.
 
-# Split atomistic dataset into training and test
-n_train, n_test = 50, 50 # only 50 samples per dataset are used in this example.
-conf_train, conf_test = split(ds[1:1000], n_train, n_test)
+# Split atomistic dataset into training and test.
+n_train, n_test = 50, 50 # Only 50 samples per dataset are used in this example.
+conf_train, conf_test = split(ds, n_train, n_test)
 
 # ## Create ACE basis, compute descriptors and add them to the dataset.
 
-# Create ACE basis
+# Create ACE basis.
 basis = ACE(species           = [:Hf, :O],
             body_order        = 3,
             polynomial_degree = 4,
@@ -37,7 +37,7 @@ basis = ACE(species           = [:Hf, :O],
             wL                = 1.0,
             csp               = 1.0,
             r0                = 1.0)
-@save_var res_path basis
+@save_var res_path basis;
 
 # Compute ACE descriptors for energy and forces based on the atomistic training configurations.
 println("Computing energy descriptors of training dataset...")
@@ -101,7 +101,7 @@ f_test, f_test_pred = get_all_forces(ds_test),
 @save_var res_path e_test
 @save_var res_path e_test_pred
 @save_var res_path f_test
-@save_var res_path f_test_pred
+@save_var res_path f_test_pred;
 
 # Compute training metrics.
 e_train_metrics = get_metrics(e_train, e_train_pred,
diff --git a/examples/README.md b/examples/README.md
index 5f8c0da6..8fe758f4 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -1,21 +1,4 @@
 # How to run an example
 
-Change the directory to the desired example folder. E.g.
-```bash
-$ cd PotentialLearning.jl/examples/DPP-ACE-Na
-```
+See instructions [here](https://cesmix-mit.github.io/PotentialLearning.jl/dev) :-)
 
-Open Julia REPL, activate ```Project.toml``` file in folder ```examples```, and chose the number of threads. E.g.
-```bash
-$ julia --project=. --threads=4
-```
-
-Type ```]``` in Julia REPL, and then run ```instantiate```.
-```julia
-    pkg> instantiate
-```
-
-Include example script. E.g.
-```julia
-    julia> include("fit-dpp-ace-na.jl")
-```

From 04e15e0b66f62a25dc487d83f93241b933b6342b Mon Sep 17 00:00:00 2001
From: Emmanuel Lujan <lujan.emmanuel@gmail.com>
Date: Tue, 18 Jun 2024 14:51:20 -0400
Subject: [PATCH 3/4] Small improvements in documentation.

---
 docs/make.jl                                  | 18 ++++++++----------
 examples/ACE-aHfO2/fit-ace-ahfo2.jl           | 10 +++++-----
 examples/DPP-ACE-Na/fit-dpp-ace-na.jl         |  8 ++++----
 examples/DPP-ACE-Si/fit-dpp-ace-si.jl         |  8 ++++----
 examples/DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl | 12 ++++++------
 examples/LJ-Ar/lennard-jones-ar.jl            |  8 ++++----
 examples/PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl   | 12 ++++++------
 7 files changed, 37 insertions(+), 39 deletions(-)

diff --git a/docs/make.jl b/docs/make.jl
index 441d1f44..ae25211e 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -1,5 +1,3 @@
-#pushfirst!(LOAD_PATH, joinpath(@__DIR__, "..")) # add PotentialLearning to environment stack
-
 using PotentialLearning
 using Documenter
 using DocumenterCitations
@@ -37,22 +35,22 @@ end
 
 # Basic examples
 examples = [
-    "Fit a-HfO2 dataset with ACE" => "ACE-aHfO2/fit-ace-ahfo2.jl",
-    "Load Ar+Lennard-Jones dataset and postprocess" => "LJ-Ar/lennard-jones-ar.jl"
+    "1 - Fit a-HfO2 dataset with ACE" => "ACE-aHfO2/fit-ace-ahfo2.jl",
+    "2 - Load Ar+Lennard-Jones dataset and postprocess" => "LJ-Ar/lennard-jones-ar.jl"
 ]
 basic_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)
 
 # Subsampling examples
 examples = [
-    "Subsample a-HfO2 dataset with DPP and fit with ACE" => "DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl",
-    "Subsample Na dataset with DPP and fit with ACE" => "DPP-ACE-Na/fit-dpp-ace-na.jl",
-    "Subsample Si dataset with DPP, fit with ACE, and cross validate" => "DPP-ACE-Si/fit-dpp-ace-si.jl",
+    "1 - Subsample a-HfO2 dataset with DPP and fit with ACE" => "DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl",
+    "2 - Subsample Na dataset with DPP and fit with ACE" => "DPP-ACE-Na/fit-dpp-ace-na.jl",
+    "3 - Subsample Si dataset with DPP, fit with ACE, and cross validate" => "DPP-ACE-Si/fit-dpp-ace-si.jl",
 ]
 ss_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)
 
 # Dimension reduction examples
 examples = [
-    "Reduce ACE descriptors with PCA and fit a-HfO2 dataset" => "PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl",
+    "1 - Reduce ACE descriptors with PCA and fit a-HfO2 dataset" => "PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl",
 ]
 dr_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)
 
@@ -73,8 +71,8 @@ makedocs(
       pages = ["Home" => "index.md",
                "How to run the examples" => "how-to-run-the-examples.md",
                "Basic examples" => basic_examples,
-               "Subsampling examples" => ss_examples,
-               "Dimension reduction examples" => dr_examples,
+               "Optimize atomistic data via intelligent subsampling" => ss_examples,
+               "Optimize interatomic potential models via dimension reduction" => dr_examples,
                "API" => "api.md"],
       format = Documenter.HTML(;
         prettyurls = get(ENV, "CI", "false") == "true",
diff --git a/examples/ACE-aHfO2/fit-ace-ahfo2.jl b/examples/ACE-aHfO2/fit-ace-ahfo2.jl
index 9fa09b38..67d740b0 100644
--- a/examples/ACE-aHfO2/fit-ace-ahfo2.jl
+++ b/examples/ACE-aHfO2/fit-ace-ahfo2.jl
@@ -1,6 +1,6 @@
 # # Fit a-HfO2 dataset with ACE
 
-# ## Load packages, define paths, and create experiment folder.
+# ## a. Load packages, define paths, and create experiment folder.
 
 # Load packages.
 using AtomsBase, InteratomicPotentials, PotentialLearning
@@ -18,7 +18,7 @@ include("$path/../utils/utils.jl")
 # Create experiment folder.
 run(`mkdir -p $res_path`);
 
-# ## Load atomistic dataset and split it into training and test.
+# ## b. Load atomistic dataset and split it into training and test.
 
 # Load atomistic dataset: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.)
 ds = load_data(ds_path, uparse("eV"), uparse("Å"))[1:1000] # Only the first 1K samples are used in this example.
@@ -27,7 +27,7 @@ ds = load_data(ds_path, uparse("eV"), uparse("Å"))[1:1000] # Only the first 1K
 n_train, n_test = 50, 50 # Only 50 samples per dataset are used in this example.
 conf_train, conf_test = split(ds, n_train, n_test)
 
-# ## Create ACE basis, compute descriptors and add them to the dataset.
+# ## c. Create ACE basis, compute descriptors and add them to the dataset.
 
 # Create ACE basis
 basis = ACE(species           = [:Hf, :O],
@@ -50,7 +50,7 @@ f_descr_train = compute_force_descriptors(conf_train, basis;
 # Update training dataset by adding energy and force descriptors.
 ds_train = DataSet(conf_train .+ e_descr_train .+ f_descr_train)
 
-# ## Learn ACE coefficients based on ACE descriptors and DFT data.
+# ## d. Learn ACE coefficients based on ACE descriptors and DFT data.
 println("Learning energies and forces...")
 lb = LBasisPotential(basis)
 ws, int = [1.0, 1.0], false
@@ -59,7 +59,7 @@ learn!(lb, ds_train, ws, int)
 @save_var res_path lb.β0
 lb.β, lb.β0
 
-# ## Post-process output: calculate metrics, create plots, and save results.
+# ## e. Post-process output: calculate metrics, create plots, and save results.
 
 # Compute ACE descriptors for energy and forces based on the atomistic test configurations.
 println("Computing energy descriptors of test dataset...")
diff --git a/examples/DPP-ACE-Na/fit-dpp-ace-na.jl b/examples/DPP-ACE-Na/fit-dpp-ace-na.jl
index 53f3bdb1..23ee68c0 100644
--- a/examples/DPP-ACE-Na/fit-dpp-ace-na.jl
+++ b/examples/DPP-ACE-Na/fit-dpp-ace-na.jl
@@ -11,7 +11,7 @@ using LinearAlgebra, Plots
 path = joinpath(dirname(pathof(PotentialLearning)), "../examples/DPP-ACE-Na")
 ds_path = "$path/../data/Na/liquify_sodium.yaml";
 
-# ## Load atomistic dataset and split it into training and test.
+# ## a. Load atomistic dataset and split it into training and test.
 
 # Load atomistic dataset: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.).
 confs, thermo = load_data(ds_path, YAML(:Na, u"eV", u"Å"))
@@ -20,7 +20,7 @@ confs, thermo = confs[220:end], thermo[220:end]
 # Split atomistic dataset into training and test.
 conf_train, conf_test = confs[1:1000], confs[1001:end]
 
-# ## Create ACE basis, compute energy descriptors and add them to the dataset.
+# ## b. Create ACE basis, compute energy descriptors and add them to the dataset.
 
 # Create ACE basis.
 ace = ACE(species = [:Na],         # species
@@ -38,7 +38,7 @@ e_descr_train = compute_local_descriptors(conf_train, ace) # JLD.load("data/sodi
 # Update training dataset by adding energy and force descriptors.
 ds_train = DataSet(conf_train .+ e_descr_train)
 
-# ## Subsampling via DPP.
+# ## c. Subsampling via DPP.
 
 # Create DPP subselector.
 dpp = kDPP(ds_train, GlobalMean(), DotProduct(); batch_size = 200)
@@ -51,7 +51,7 @@ lb = LBasisPotential(ace)
 α = 1e-8
 Σ = learn!(lb, ds_train[dpp_inds], α)
 
-# ## Post-process output: calculate metrics, create plots, and save results.
+# ## d. Post-process output: calculate metrics, create plots, and save results.
 
 # Update test dataset by adding energy descriptors.
 println("Computing local descriptors of test dataset")
diff --git a/examples/DPP-ACE-Si/fit-dpp-ace-si.jl b/examples/DPP-ACE-Si/fit-dpp-ace-si.jl
index b82bf8fd..ed087c55 100644
--- a/examples/DPP-ACE-Si/fit-dpp-ace-si.jl
+++ b/examples/DPP-ACE-Si/fit-dpp-ace-si.jl
@@ -1,6 +1,6 @@
 # # Subsample Si dataset and fit with ACE
 
-# ## Load packages, define paths, and create experiment folder.
+# ## a. Load packages, define paths, and create experiment folder.
 
 # Load packages.
 using LinearAlgebra, Random, InvertedIndices
@@ -20,7 +20,7 @@ outpath = "$path/output/$elname/"
 # Load utility functions.
 include("$path/subsampling_utils.jl")
 
-# ## Load atomistic datasets.
+# ## b. Load atomistic datasets.
 
 # Load all atomistic datasets: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.)
 file_arr = readext(inpath, "xyz")
@@ -37,7 +37,7 @@ for k = 1:nfile
     n += length(confs_arr[k])
 end
 
-# ## Subsampling by DPP.
+# ## c. Subsampling by DPP.
 
 # Create ACE basis.
 nbody = 4
@@ -61,7 +61,7 @@ JLD.save(outpath*"$(elname)_force_descriptors.jld", "f_descr", f_descr)
 ds = DataSet(confs .+ e_descr .+ f_descr)
 ndata = length(ds)
 
-# ## Compute cross validation error from training dataset.
+# ## d. Compute cross validation error from training dataset.
 batch_size = [80, 40]
 sel_ind = Dict{Int64, Vector}()
 cond_num = Dict{Int64, Vector}()
diff --git a/examples/DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl b/examples/DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl
index b391e6c9..8b6f99dd 100644
--- a/examples/DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl
+++ b/examples/DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl
@@ -1,6 +1,6 @@
 # # Subsample a-HfO2 dataset and fit with ACE
 
-# ## Load packages, define paths, and create experiment folder.
+# ## a. Load packages, define paths, and create experiment folder.
 
 # Load packages.
 using AtomsBase, InteratomicPotentials, PotentialLearning
@@ -18,7 +18,7 @@ include("$path/../utils/utils.jl")
 # Create experiment folder.
 run(`mkdir -p $res_path`);
 
-# ## Load atomistic dataset and split it into training and test.
+# ## b. Load atomistic dataset and split it into training and test.
 
 # Load atomistic dataset: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.)
 ds = load_data(ds_path, uparse("eV"), uparse("Å"))
@@ -28,7 +28,7 @@ n_train, n_test = 100, 50 # Few samples per dataset are used in this example.
 conf_train, conf_test = split(ds[1:1000], n_train, n_test)
 
 
-# ## Subsampling
+# ## c. Subsampling
 
 # Compute ACE descriptors for energies as subsampling input.
 basis = ACE(species           = [:Hf, :O],
@@ -56,7 +56,7 @@ inds = get_random_subset(dataset_selector)
 conf_train = @views conf_train[inds]
 
 
-# ## Create ACE basis, compute descriptors and add them to the dataset.
+# ## d. Create ACE basis, compute descriptors and add them to the dataset.
 
 # Create ACE basis
 basis = ACE(species           = [:Hf, :O],
@@ -79,7 +79,7 @@ f_descr_train = compute_force_descriptors(conf_train, basis;
 # Update training dataset by adding energy and force descriptors.
 ds_train = DataSet(conf_train .+ e_descr_train .+ f_descr_train)
 
-# ## Learn ACE coefficients based on ACE descriptors and DFT data.
+# ## e. Learn ACE coefficients based on ACE descriptors and DFT data.
 println("Learning energies and forces...")
 lb = LBasisPotential(basis)
 ws, int = [1.0, 1.0], false
@@ -88,7 +88,7 @@ learn!(lb, ds_train, ws, int)
 @save_var res_path lb.β0
 lb.β, lb.β0
 
-# ## Post-process output: calculate metrics, create plots, and save results.
+# ## f. Post-process output: calculate metrics, create plots, and save results.
 
 # Compute ACE descriptors for energy and forces based on the atomistic test configurations.
 println("Computing energy descriptors of test dataset...")
diff --git a/examples/LJ-Ar/lennard-jones-ar.jl b/examples/LJ-Ar/lennard-jones-ar.jl
index c0b509df..7e5b52ec 100644
--- a/examples/LJ-Ar/lennard-jones-ar.jl
+++ b/examples/LJ-Ar/lennard-jones-ar.jl
@@ -1,6 +1,6 @@
 # # Load Ar dataset with energies computed by Lennard-Jones and postprocess
 
-# ## Load packages and define paths.
+# ## a. Load packages and define paths.
 
 # Load packages.
 using Unitful, UnitfulAtomic
@@ -11,11 +11,11 @@ using LinearAlgebra, Plots, DisplayAs
 path = joinpath(dirname(pathof(PotentialLearning)), "../examples/LJ-Ar")
 ds_path = "$path/../data/LJ-AR/lj-ar.yaml";
 
-# ## Load atomistic dataset.
+# ## b. Load atomistic dataset.
 ds, thermo = load_data(ds_path, YAML(:Ar, u"eV", u"Å"))
 ds = @views ds[2:end] # Filter first configuration (zero energy)
 
-# ## Compute distance from origin, extract LJ energies, and define time range.
+# ## c. Compute distance from origin, extract LJ energies, and define time range.
 
 # Get atom positions and compute distance from origin.
 systems = get_system.(ds)
@@ -29,7 +29,7 @@ energies = get_values.(get_energy.(ds))
 # Define time range.
 time_range = 0.5:0.5:5000
 
-# ## Post-process data.
+# ## d. Post-process data.
 
 # Plot distance from origin vs time.
 p = plot(xlabel = "τ | ps",
diff --git a/examples/PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl b/examples/PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl
index 342f3a6d..43eafca9 100644
--- a/examples/PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl
+++ b/examples/PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl
@@ -1,6 +1,6 @@
 # # Reduce ACE descriptors with PCA and fit a-HfO2 dataset
 
-# ## Load packages, define paths, and create experiment folder.
+# ## a. Load packages, define paths, and create experiment folder.
 
 # Load packages.
 using AtomsBase, InteratomicPotentials, PotentialLearning
@@ -18,7 +18,7 @@ include("$path/../utils/utils.jl")
 # Create experiment folder.
 run(`mkdir -p $res_path`);
 
-# ## Load atomistic dataset and split it into training and test.
+# ## b. Load atomistic dataset and split it into training and test.
 
 # Load atomistic dataset: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.)
 ds = load_data(ds_path, uparse("eV"), uparse("Å"))[1:1000] # Only first 1K samples are used in this example.
@@ -27,7 +27,7 @@ ds = load_data(ds_path, uparse("eV"), uparse("Å"))[1:1000] # Only first 1K sam
 n_train, n_test = 50, 50 # Only 50 samples per dataset are used in this example.
 conf_train, conf_test = split(ds, n_train, n_test)
 
-# ## Create ACE basis, compute descriptors and add them to the dataset.
+# ## c. Create ACE basis, compute descriptors and add them to the dataset.
 
 # Create ACE basis.
 basis = ACE(species           = [:Hf, :O],
@@ -50,13 +50,13 @@ f_descr_train = compute_force_descriptors(conf_train, basis;
 # Update training dataset by adding energy and force descriptors.
 ds_train = DataSet(conf_train .+ e_descr_train .+ f_descr_train)
 
-# ## Dimension reduction of energy and force descriptors of training dataset.
+# ## d. Dimension reduction of energy and force descriptors of training dataset.
 n_desc = 20
 pca = PCAState(tol = n_desc)
 fit!(ds_train, pca)
 transform!(ds_train, pca)
 
-# ## Learn ACE coefficients based on ACE descriptors and DFT data.
+# ## e. Learn ACE coefficients based on ACE descriptors and DFT data.
 println("Learning energies and forces...")
 lb = LBasisPotential(basis)
 ws, int = [1.0, 1.0], true
@@ -65,7 +65,7 @@ learn!(lb, ds_train, ws, int)
 @save_var res_path lb.β0
 lb.β, lb.β0
 
-# ## Post-process output: calculate metrics, create plots, and save results.
+# ## f. Post-process output: calculate metrics, create plots, and save results.
 
 # Compute ACE descriptors for energy and forces based on the atomistic test configurations.
 println("Computing energy descriptors of test dataset...")

From 8bc12cd571cf9c99161275ce66ac767e67a803ea Mon Sep 17 00:00:00 2001
From: Emmanuel Lujan <lujan.emmanuel@gmail.com>
Date: Tue, 18 Jun 2024 15:13:13 -0400
Subject: [PATCH 4/4] Small fix in documentation

---
 examples/DPP-ACE-Na/fit-dpp-ace-na.jl | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/examples/DPP-ACE-Na/fit-dpp-ace-na.jl b/examples/DPP-ACE-Na/fit-dpp-ace-na.jl
index 23ee68c0..db7b70b6 100644
--- a/examples/DPP-ACE-Na/fit-dpp-ace-na.jl
+++ b/examples/DPP-ACE-Na/fit-dpp-ace-na.jl
@@ -1,6 +1,6 @@
 # # Subsample Na dataset with DPP and fit energies with ACE
 
-# ## Load packages and define paths.
+# ## a. Load packages and define paths.
 
 # Load packages.
 using Unitful, UnitfulAtomic
@@ -11,7 +11,7 @@ using LinearAlgebra, Plots
 path = joinpath(dirname(pathof(PotentialLearning)), "../examples/DPP-ACE-Na")
 ds_path = "$path/../data/Na/liquify_sodium.yaml";
 
-# ## a. Load atomistic dataset and split it into training and test.
+# ## b. Load atomistic dataset and split it into training and test.
 
 # Load atomistic dataset: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.).
 confs, thermo = load_data(ds_path, YAML(:Na, u"eV", u"Å"))
@@ -20,7 +20,7 @@ confs, thermo = confs[220:end], thermo[220:end]
 # Split atomistic dataset into training and test.
 conf_train, conf_test = confs[1:1000], confs[1001:end]
 
-# ## b. Create ACE basis, compute energy descriptors and add them to the dataset.
+# ## c. Create ACE basis, compute energy descriptors and add them to the dataset.
 
 # Create ACE basis.
 ace = ACE(species = [:Na],         # species
@@ -38,7 +38,7 @@ e_descr_train = compute_local_descriptors(conf_train, ace) # JLD.load("data/sodi
 # Update training dataset by adding energy and force descriptors.
 ds_train = DataSet(conf_train .+ e_descr_train)
 
-# ## c. Subsampling via DPP.
+# ## d. Subsampling via DPP.
 
 # Create DPP subselector.
 dpp = kDPP(ds_train, GlobalMean(), DotProduct(); batch_size = 200)
@@ -51,7 +51,7 @@ lb = LBasisPotential(ace)
 α = 1e-8
 Σ = learn!(lb, ds_train[dpp_inds], α)
 
-# ## d. Post-process output: calculate metrics, create plots, and save results.
+# ## e. Post-process output: calculate metrics, create plots, and save results.
 
 # Update test dataset by adding energy descriptors.
 println("Computing local descriptors of test dataset")