From ee70d68265945c2325a2903fc21c35368705c488 Mon Sep 17 00:00:00 2001
From: Emmanuel Lujan <lujan.emmanuel@gmail.com>
Date: Fri, 5 Jul 2024 18:55:35 -0400
Subject: [PATCH] Some documentation fixes and improvements.

---
 README.md         |  3 +++
 docs/make.jl      | 14 +++++++-------
 docs/src/index.md | 27 +++++++++++++++++++--------
 3 files changed, 29 insertions(+), 15 deletions(-)
diff --git a/README.md b/README.md
index d39dbbb..d61f7b5 100644
--- a/README.md
+++ b/README.md
@@ -53,11 +53,14 @@ See [example](https://cesmix-mit.github.io/PotentialLearning.jl/dev/generated/Op
 The models are compatible with the interfaces of our sister package [InteratomicPotentials.jl](https://github.com/cesmix-mit/InteratomicPotentials.jl). In particular, we are interested in maintaining compatibility with [ACESuit](https://github.com/ACEsuit), as well as integrating [LAMMPS](https://www.lammps.org/) based potentials such as [ML-POD](https://docs.lammps.org/Packages_details.html#pkg-ml-pod) and [ML-PACE](https://docs.lammps.org/Packages_details.html#ml-pace-package). We are also working to provide neural network potential architecture optimization.
 
 <ins>**Compress your interatomic potential data and model**</ins> using dimensionality reduction of energy and force descriptors:
+1) Define a PCA state, fit PCA with your the energy and force descriptors of your dataset, and transform all dataset descriptors.
 ```julia
 pca = PCAState(tol = n_desc)
 fit!(ds_train, pca)
 transform!(ds_train, pca)
 ```
+2) Export PCA fitted data to be used in your workflow.
+
 See [example](https://cesmix-mit.github.io/PotentialLearning.jl/dev/generated/PCA-ACE-aHfO2/fit-pca-ace-ahfo2/).
 
 We are working to provide feature selection of energy and force descriptors based on [CUR](https://github.com/JuliaLinearAlgebra/LowRankApprox.jl).
diff --git a/docs/make.jl b/docs/make.jl
index 2814cfd..f508f6e 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -35,32 +35,32 @@ end
 
 # Basic examples
 examples = [
-    "Example 1 - Fit a-HfO2 dataset with ACE" => "ACE-aHfO2/fit-ace-ahfo2.jl",
+    "Fit a-HfO2 dataset with ACE" => "ACE-aHfO2/fit-ace-ahfo2.jl",
 ]
 basic_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)
 
 # Subsampling examples
 examples = [
-    "Example 1 - Subsample a-HfO2 dataset with DPP and fit with ACE" => "DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl",
-    "Example 2 - Subsample Na dataset with DPP and fit with ACE" => "DPP-ACE-Na/fit-dpp-ace-na.jl",
-    "Example 3 - Subsample Si dataset with DPP, fit with ACE, and cross validate" => "DPP-ACE-Si/fit-dpp-ace-si.jl",
+    "Subsample a-HfO2 dataset with DPP and fit with ACE" => "DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl",
+    "Subsample Na dataset with DPP and fit with ACE" => "DPP-ACE-Na/fit-dpp-ace-na.jl",
+    "Subsample Si dataset with DPP, fit with ACE, and cross validate" => "DPP-ACE-Si/fit-dpp-ace-si.jl",
 ]
 ss_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)
 
 # Optimization examples
 examples = [
-    "Example 1 - Optimize ACE hyper-parameters: minimize force time and fitting error" => "Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl",
+    "Optimize ACE hyper-parameters: minimize force time and fitting error" => "Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl",
 ]
 opt_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)
 
 # Dimension reduction examples
 examples = [
-    "Example 1 - Reduce ACE descriptors with PCA and fit a-HfO2 dataset" => "PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl",
+    "Reduce ACE descriptors with PCA and fit a-HfO2 dataset" => "PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl",
 ]
 dr_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)
 
 examples = [
-    "Example 1 - Load Ar+Lennard-Jones dataset and postprocess" => "LJ-Ar/lennard-jones-ar.jl"
+    "Load Ar+Lennard-Jones dataset and postprocess" => "LJ-Ar/lennard-jones-ar.jl"
 ]
 misc_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)
 
diff --git a/docs/src/index.md b/docs/src/index.md
index 701170c..7539098 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -3,24 +3,29 @@
 Optimize your atomistic data and interatomic potential models in your molecular dynamic workflows.
 
 
-<ins>**Reduce expensive ***Density functional theory*** calculations**</ins>  while maintaining training accuracy by intelligently subsampling your atomistic dataset:
+### Reduce expensive Density functional theory calculations
 
-1) Subsample your [atomistic configurations](https://github.com/JuliaMolSim/AtomsBase.jl) using a Determinantal Point Process ([DPP](https://github.com/dahtah/Determinantal.jl)) based algorithm that compares energy descriptors computed with the Atomic Cluster Expansion ([ACE](https://github.com/ACEsuit)).
+Reduce expensive Density functional theory calculations while maintaining training accuracy by intelligently subsampling your atomistic dataset:
+
+1 - Subsample your [atomistic configurations](https://github.com/JuliaMolSim/AtomsBase.jl) using a Determinantal Point Process ([DPP](https://github.com/dahtah/Determinantal.jl)) based algorithm that compares energy descriptors computed with the Atomic Cluster Expansion ([ACE](https://github.com/ACEsuit)).
 ```julia
 ds = DataSet(conf_train .+ e_descr)
 dataset_selector = kDPP(ds, GlobalMean(), DotProduct())
 inds = get_random_subset(dataset_selector)
 conf_train = @views conf_train[inds]
 ```
-2) Export the reduced dataset, use Density functional theory ([DFT](https://docs.dftk.org/stable/)) on it, and fit your model.
+2 - Export the reduced dataset, use Density functional theory ([DFT](https://docs.dftk.org/stable/)) on it, and fit your model.
 
 See [example](https://cesmix-mit.github.io/PotentialLearning.jl/dev/generated/DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2/).
 
 We are working to provide different intelligent subsampling algorithms based on [DPP](https://github.com/dahtah/Determinantal.jl), [DBSCAN](https://docs.google.com/document/d/1SWAanEWQkpsbr2lqetMO3uvdX_QK-Z7dwrgPaM1Dl0o/edit), and [CUR](https://github.com/JuliaLinearAlgebra/LowRankApprox.jl); highly scalable parallel subsampling via hierarchical subsampling and [distributed parallelism](https://github.com/JuliaParallel/Dagger.jl); and optimal subsampler selection.
 
-<ins>**Get fast and accurate interatomic potential models**</ins>  through parallel multi-objective hyper-parameter optimization:
 
-1) Define the interatomic potential model, hyper-parameter value ranges, and custom loss function. Then, [optimize](https://github.com/baggepinnen/Hyperopt.jl) your model.
+### Get fast and accurate interatomic potential models
+
+Get fast and accurate interatomic potential models through parallel multi-objective hyper-parameter optimization:
+
+1 - Define the interatomic potential model, hyper-parameter value ranges, and custom loss function. Then, [optimize](https://github.com/baggepinnen/Hyperopt.jl) your model.
 ```julia
 model = ACE
 pars = OrderedDict( :body_order        => [2, 3, 4],
@@ -31,22 +36,28 @@ function custom_loss(metrics::OrderedDict)
 end
 iap, res = hyperlearn!(model, pars, conf_train; loss = custom_loss);
 ```
-2) Export optimal values to your molecular dynamic workflow.
+2 - Export optimal values to your molecular dynamic workflow.
 
 See [example](https://cesmix-mit.github.io/PotentialLearning.jl/dev/generated/Opt-ACE-aHfO2/fit-opt-ace-ahfo2/).
 
 The models are compatible with the interfaces of our sister package [InteratomicPotentials.jl](https://github.com/cesmix-mit/InteratomicPotentials.jl). In particular, we are interested in maintaining compatibility with [ACESuit](https://github.com/ACEsuit), as well as integrating [LAMMPS](https://www.lammps.org/) based potentials such as [ML-POD](https://docs.lammps.org/Packages_details.html#pkg-ml-pod) and [ML-PACE](https://docs.lammps.org/Packages_details.html#ml-pace-package). We are also working to provide neural network potential architecture optimization.
 
-<ins>**Compress your interatomic potential data and model**</ins> using dimensionality reduction of energy and force descriptors:
+### Compress your interatomic potential data and model
+
+Compress your interatomic potential data and model using dimensionality reduction of energy and force descriptors:
+
+1 - Define a PCA state, fit PCA with your the energy and force descriptors of your dataset, and transform all dataset descriptors.
 ```julia
 pca = PCAState(tol = n_desc)
 fit!(ds_train, pca)
 transform!(ds_train, pca)
 ```
+2 - Export PCA fitted data to be used in your workflow.
+
 See [example](https://cesmix-mit.github.io/PotentialLearning.jl/dev/generated/PCA-ACE-aHfO2/fit-pca-ace-ahfo2/).
 
 We are working to provide feature selection of energy and force descriptors based on [CUR](https://github.com/JuliaLinearAlgebra/LowRankApprox.jl).
 
 Additionally, this package includes utilities for loading input data (such as XYZ files), computing various metrics (including MAE, MSE, RSQ, and COV), exporting results, and generating plots.
 
-**Acknowledgment:** Center for the Exascale Simulation of Materials in Extreme Environments ([CESMIX](https://computing.mit.edu/cesmix/)). Massachusetts Institute of Technology ([MIT](https://www.mit.edu/)).
\ No newline at end of file
+**Acknowledgment:** Center for the Exascale Simulation of Materials in Extreme Environments ([CESMIX](https://computing.mit.edu/cesmix/)). Massachusetts Institute of Technology ([MIT](https://www.mit.edu/)).