Merge pull request #75 from cesmix-mit/doc

New documentation sections + Small improvements in examples and documentation.
cesmix-mit · Jun 18, 2024 · d16e181 · d16e181
2 parents ce274f2 + 8bc12cd
commit d16e181
Show file tree

Hide file tree

Showing 9 changed files with 119 additions and 119 deletions.
diff --git a/docs/make.jl b/docs/make.jl
@@ -1,5 +1,3 @@
-pushfirst!(LOAD_PATH, joinpath(@__DIR__, "..")) # add PotentialLearning to environment stack
-
 using PotentialLearning
 using Documenter
 using DocumenterCitations
@@ -12,35 +10,51 @@ DocMeta.setdocmeta!(
     recursive = true,
 )
 
+# Citations ####################################################################
 
 bib = CitationBibliography(joinpath(@__DIR__, "citation.bib"))
 
-# Generate examples
+# Generate examples ############################################################
 
 const EXAMPLES_DIR = joinpath(@__DIR__, "..", "examples")
 const OUTPUT_DIR   = joinpath(@__DIR__, "src/generated")
 
+function create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)
+    for (_, example_path) in examples
+        s = split(example_path, "/")
+        sub_path, file_name = string(s[1:end-1]...), s[end]
+        example_filepath = joinpath(EXAMPLES_DIR, example_path)
+        Literate.markdown(example_filepath,
+                          joinpath(OUTPUT_DIR, sub_path),
+                          documenter = true)
+    end
+    examples = [title => joinpath("generated", replace(example_path, ".jl" => ".md"))
+                for (title, example_path) in examples]
+    return examples
+end
+
+# Basic examples
 examples = [
-    "Fit a-HfO2 dataset with ACE" => "ACE-aHfO2/fit-ace-aHfO2.jl",
-    "Subsample a-HfO2 dataset with DPP and fit with ACE" => "DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl",
-    "Subsample Na dataset with DPP and fit with ACE" => "DPP-ACE-Na/fit-dpp-ace-na.jl",
-    "Subsample Si dataset with DPP, fit with ACE, and cross validate" => "DPP-ACE-Si/fit-dpp-ace-si.jl",
-    "Reduce ACE descriptors with PCA and fit a-HfO2 dataset" => "PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl",
-    "Load Ar+Lennard-Jones dataset and postprocess" => "LJ-Ar/lennard-jones-ar.jl"
+    "1 - Fit a-HfO2 dataset with ACE" => "ACE-aHfO2/fit-ace-ahfo2.jl",
+    "2 - Load Ar+Lennard-Jones dataset and postprocess" => "LJ-Ar/lennard-jones-ar.jl"
 ]
+basic_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)
 
-for (_, example_path) in examples
-    s = split(example_path, "/")
-    sub_path, file_name = string(s[1:end-1]...), s[end]
-    example_filepath = joinpath(EXAMPLES_DIR, example_path)
-    Literate.markdown(example_filepath,
-                      joinpath(OUTPUT_DIR, sub_path),
-                      documenter = true)
-end
+# Subsampling examples
+examples = [
+    "1 - Subsample a-HfO2 dataset with DPP and fit with ACE" => "DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl",
+    "2 - Subsample Na dataset with DPP and fit with ACE" => "DPP-ACE-Na/fit-dpp-ace-na.jl",
+    "3 - Subsample Si dataset with DPP, fit with ACE, and cross validate" => "DPP-ACE-Si/fit-dpp-ace-si.jl",
+]
+ss_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)
 
-examples = [title => joinpath("generated", replace(example_path, ".jl" => ".md"))
-            for (title, example_path) in examples]
+# Dimension reduction examples
+examples = [
+    "1 - Reduce ACE descriptors with PCA and fit a-HfO2 dataset" => "PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl",
+]
+dr_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)
 
+# Make and deploy docs #########################################################
 
 makedocs(
       root    =  joinpath(dirname(pathof(PotentialLearning)), "..", "docs"),
@@ -56,7 +70,9 @@ makedocs(
       draft = false,
       pages = ["Home" => "index.md",
                "How to run the examples" => "how-to-run-the-examples.md",
-               "Examples" => examples,
+               "Basic examples" => basic_examples,
+               "Optimize atomistic data via intelligent subsampling" => ss_examples,
+               "Optimize interatomic potential models via dimension reduction" => dr_examples,
                "API" => "api.md"],
       format = Documenter.HTML(;
         prettyurls = get(ENV, "CI", "false") == "true",
@@ -71,3 +87,4 @@ deploydocs(;
     devbranch = "main",
     push_preview = true,
 )
+
diff --git a/docs/src/how-to-run-the-examples.md b/docs/src/how-to-run-the-examples.md
@@ -16,7 +16,7 @@ Clone `PotentialLearning.jl` repository in your working directory.
 ```
 Access to any folder within `PotentialLearning.jl/examples`. E.g.
 ```shell
-    $ cd PotentialLearning.jl/examples/Na
+    $ cd PotentialLearning.jl/examples/DPP-ACE-aHfO2-1
 ```
 
 ## Run example
@@ -30,6 +30,6 @@ Type `]` to enter the Pkg REPL and instantiate.
 ```
 Finally, include the example file.
 ```julia
-    julia> include("fit-dpp-ace-na.jl")
+    julia> include("fit-dpp-ace-ahfo2.jl")
 ```
 
diff --git a/examples/ACE-aHfO2/fit-ace-aHfO2.jl → examples/ACE-aHfO2/fit-ace-ahfo2.jl b/examples/ACE-aHfO2/fit-ace-aHfO2.jl → examples/ACE-aHfO2/fit-ace-ahfo2.jl
@@ -1,33 +1,33 @@
 # # Fit a-HfO2 dataset with ACE
 
-# ## Load packages, define paths, and create experiment folder.
+# ## a. Load packages, define paths, and create experiment folder.
 
-# Load packages
+# Load packages.
 using AtomsBase, InteratomicPotentials, PotentialLearning
 using Unitful, UnitfulAtomic
 using LinearAlgebra, Random, DisplayAs
 
 # Define paths.
 path = joinpath(dirname(pathof(PotentialLearning)), "../examples/ACE-aHfO2")
 ds_path =  "$path/../data/a-HfO2/a-HfO2-300K-NVT-6000.extxyz"
-res_path = "$path/results/"
+res_path = "$path/results/";
 
 # Load utility functions.
 include("$path/../utils/utils.jl")
 
 # Create experiment folder.
-run(`mkdir -p $res_path`)
+run(`mkdir -p $res_path`);
 
-# ## Load atomistic dataset and split it into training and test.
+# ## b. Load atomistic dataset and split it into training and test.
 
 # Load atomistic dataset: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.)
-ds = load_data(ds_path, uparse("eV"), uparse("Å"))
+ds = load_data(ds_path, uparse("eV"), uparse("Å"))[1:1000] # Only the first 1K samples are used in this example.
 
 # Split atomistic dataset into training and test
-n_train, n_test = 50, 50 # only 50 samples per dataset are used in this example.
-conf_train, conf_test = split(ds[1:1000], n_train, n_test)
+n_train, n_test = 50, 50 # Only 50 samples per dataset are used in this example.
+conf_train, conf_test = split(ds, n_train, n_test)
 
-# ## Create ACE basis, compute descriptors and add them to the dataset.
+# ## c. Create ACE basis, compute descriptors and add them to the dataset.
 
 # Create ACE basis
 basis = ACE(species           = [:Hf, :O],
@@ -37,7 +37,7 @@ basis = ACE(species           = [:Hf, :O],
             wL                = 1.0,
             csp               = 1.0,
             r0                = 1.0)
-@save_var res_path basis
+@save_var res_path basis;
 
 # Compute ACE descriptors for energy and forces based on the atomistic training configurations.
 println("Computing energy descriptors of training dataset...")
@@ -50,7 +50,7 @@ f_descr_train = compute_force_descriptors(conf_train, basis;
 # Update training dataset by adding energy and force descriptors.
 ds_train = DataSet(conf_train .+ e_descr_train .+ f_descr_train)
 
-# ## Learn ACE coefficients based on ACE descriptors and DFT data.
+# ## d. Learn ACE coefficients based on ACE descriptors and DFT data.
 println("Learning energies and forces...")
 lb = LBasisPotential(basis)
 ws, int = [1.0, 1.0], false
@@ -59,15 +59,15 @@ learn!(lb, ds_train, ws, int)
 @save_var res_path lb.β0
 lb.β, lb.β0
 
-# ## Post-process output: calculate metrics, create plots, and save results.
+# ## e. Post-process output: calculate metrics, create plots, and save results.
 
 # Compute ACE descriptors for energy and forces based on the atomistic test configurations.
 println("Computing energy descriptors of test dataset...")
 e_descr_test = compute_local_descriptors(conf_test, basis;
                                          pbar = false)
 println("Computing force descriptors of test dataset...")
 f_descr_test = compute_force_descriptors(conf_test, basis;
-                                         pbar = false)
+                                         pbar = false);
 
 # Update test dataset by adding energy and force descriptors.
 ds_test = DataSet(conf_test .+ e_descr_test .+ f_descr_test)
@@ -92,9 +92,9 @@ f_test, f_test_pred = get_all_forces(ds_test),
 @save_var res_path e_test
 @save_var res_path e_test_pred
 @save_var res_path f_test
-@save_var res_path f_test_pred
+@save_var res_path f_test_pred;
 
-# Compute training metrics
+# Compute training metrics.
 e_train_metrics = get_metrics(e_train, e_train_pred,
                               metrics = [mae, rmse, rsq],
                               label = "e_train")
@@ -105,7 +105,7 @@ train_metrics = merge(e_train_metrics, f_train_metrics)
 @save_dict res_path train_metrics
 train_metrics
 
-# Compute test metrics
+# Compute test metrics.
 e_test_metrics = get_metrics(e_test, e_test_pred,
                              metrics = [mae, rmse, rsq],
                              label = "e_test")
@@ -116,19 +116,19 @@ test_metrics = merge(e_test_metrics, f_test_metrics)
 @save_dict res_path test_metrics
 test_metrics
 
-# Plot and save energy results
+# Plot and save energy results.
 e_plot = plot_energy(e_train, e_train_pred,
                      e_test, e_test_pred)
 @save_fig res_path e_plot
 DisplayAs.PNG(e_plot)
 
-# Plot and save force results
+# Plot and save force results.
 f_plot = plot_forces(f_train, f_train_pred,
                      f_test, f_test_pred)
 @save_fig res_path f_plot
 DisplayAs.PNG(f_plot)
 
-# Plot and save training force cosine
+# Plot and save training force cosine.
 e_train_plot = plot_energy(e_train, e_train_pred)
 f_train_plot = plot_forces(f_train, f_train_pred)
 f_train_cos  = plot_cos(f_train, f_train_pred)
@@ -137,7 +137,7 @@ f_train_cos  = plot_cos(f_train, f_train_pred)
 @save_fig res_path f_train_cos
 DisplayAs.PNG(f_train_cos)
 
-# Plot and save test force cosine
+# Plot and save test force cosine.
 e_test_plot = plot_energy(e_test, e_test_pred)
 f_test_plot = plot_forces(f_test, f_test_pred)
 f_test_cos  = plot_cos(f_test, f_test_pred)

diff --git a/examples/DPP-ACE-Na/fit-dpp-ace-na.jl b/examples/DPP-ACE-Na/fit-dpp-ace-na.jl
@@ -1,17 +1,17 @@
 # # Subsample Na dataset with DPP and fit energies with ACE
 
-# ## Load packages and define paths.
+# ## a. Load packages and define paths.
 
-# Load packages
+# Load packages.
 using Unitful, UnitfulAtomic
 using AtomsBase, InteratomicPotentials, PotentialLearning
 using LinearAlgebra, Plots
 
 # Define paths.
 path = joinpath(dirname(pathof(PotentialLearning)), "../examples/DPP-ACE-Na")
-ds_path = "$path/../data/Na/liquify_sodium.yaml"
+ds_path = "$path/../data/Na/liquify_sodium.yaml";
 
-# ## Load atomistic dataset and split it into training and test.
+# ## b. Load atomistic dataset and split it into training and test.
 
 # Load atomistic dataset: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.).
 confs, thermo = load_data(ds_path, YAML(:Na, u"eV", u"Å"))
@@ -20,7 +20,7 @@ confs, thermo = confs[220:end], thermo[220:end]
 # Split atomistic dataset into training and test.
 conf_train, conf_test = confs[1:1000], confs[1001:end]
 
-# ## Create ACE basis, compute energy descriptors and add them to the dataset.
+# ## c. Create ACE basis, compute energy descriptors and add them to the dataset.
 
 # Create ACE basis.
 ace = ACE(species = [:Na],         # species
@@ -38,7 +38,7 @@ e_descr_train = compute_local_descriptors(conf_train, ace) # JLD.load("data/sodi
 # Update training dataset by adding energy and force descriptors.
 ds_train = DataSet(conf_train .+ e_descr_train)
 
-# ## Subsampling via DPP.
+# ## d. Subsampling via DPP.
 
 # Create DPP subselector.
 dpp = kDPP(ds_train, GlobalMean(), DotProduct(); batch_size = 200)
@@ -51,7 +51,7 @@ lb = LBasisPotential(ace)
 α = 1e-8
 Σ = learn!(lb, ds_train[dpp_inds], α)
 
-# ## Post-process output: calculate metrics, create plots, and save results.
+# ## e. Post-process output: calculate metrics, create plots, and save results.
 
 # Update test dataset by adding energy descriptors.
 println("Computing local descriptors of test dataset")

diff --git a/examples/DPP-ACE-Si/fit-dpp-ace-si.jl b/examples/DPP-ACE-Si/fit-dpp-ace-si.jl
@@ -1,15 +1,15 @@
 # # Subsample Si dataset and fit with ACE
 
-# ## Load packages, define paths, and create experiment folder.
+# ## a. Load packages, define paths, and create experiment folder.
 
-# Load packages
+# Load packages.
 using LinearAlgebra, Random, InvertedIndices
 using Statistics, StatsBase, Distributions, Determinantal
 using Unitful, UnitfulAtomic
 using AtomsBase, InteratomicPotentials, PotentialLearning
 using CSV, JLD, DataFrames
 
-# Define atomic type information
+# Define atomic type information.
 elname, elspec = "Si", [:Si] 
 
 # Define paths.
@@ -20,15 +20,15 @@ outpath = "$path/output/$elname/"
 # Load utility functions.
 include("$path/subsampling_utils.jl")
 
-# ## Load atomistic datasets
+# ## b. Load atomistic datasets.
 
 # Load all atomistic datasets: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.)
 file_arr = readext(inpath, "xyz")
 nfile = length(file_arr)
 confs_arr = [load_data(inpath*file, ExtXYZ(u"eV", u"Å")) for file in file_arr]
 confs = concat_dataset(confs_arr)
 
-# Id of configurations per file
+# Id of configurations per file.
 n = 0
 confs_id = Vector{Vector{Int64}}(undef, nfile)
 for k = 1:nfile
@@ -37,9 +37,9 @@ for k = 1:nfile
     n += length(confs_arr[k])
 end
 
-# ## Subsampling by DPP
+# ## c. Subsampling by DPP.
 
-# Create ACE basis
+# Create ACE basis.
 nbody = 4
 deg = 5
 ace = ACE(species = elspec,             # species
@@ -61,7 +61,7 @@ JLD.save(outpath*"$(elname)_force_descriptors.jld", "f_descr", f_descr)
 ds = DataSet(confs .+ e_descr .+ f_descr)
 ndata = length(ds)
 
-# ## Compute cross validation error from training dataset
+# ## d. Compute cross validation error from training dataset.
 batch_size = [80, 40]
 sel_ind = Dict{Int64, Vector}()
 cond_num = Dict{Int64, Vector}()