Skip to content

Commit

Permalink
Merge pull request #75 from cesmix-mit/doc
Browse files Browse the repository at this point in the history
New documentation sections + Small improvements in examples and documentation.
  • Loading branch information
emmanuellujan authored Jun 18, 2024
2 parents ce274f2 + 8bc12cd commit d16e181
Show file tree
Hide file tree
Showing 9 changed files with 119 additions and 119 deletions.
57 changes: 37 additions & 20 deletions docs/make.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
pushfirst!(LOAD_PATH, joinpath(@__DIR__, "..")) # add PotentialLearning to environment stack

using PotentialLearning
using Documenter
using DocumenterCitations
Expand All @@ -12,35 +10,51 @@ DocMeta.setdocmeta!(
recursive = true,
)

# Citations ####################################################################

bib = CitationBibliography(joinpath(@__DIR__, "citation.bib"))

# Generate examples
# Generate examples ############################################################

const EXAMPLES_DIR = joinpath(@__DIR__, "..", "examples")
const OUTPUT_DIR = joinpath(@__DIR__, "src/generated")

function create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)
for (_, example_path) in examples
s = split(example_path, "/")
sub_path, file_name = string(s[1:end-1]...), s[end]
example_filepath = joinpath(EXAMPLES_DIR, example_path)
Literate.markdown(example_filepath,
joinpath(OUTPUT_DIR, sub_path),
documenter = true)
end
examples = [title => joinpath("generated", replace(example_path, ".jl" => ".md"))
for (title, example_path) in examples]
return examples
end

# Basic examples
examples = [
"Fit a-HfO2 dataset with ACE" => "ACE-aHfO2/fit-ace-aHfO2.jl",
"Subsample a-HfO2 dataset with DPP and fit with ACE" => "DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl",
"Subsample Na dataset with DPP and fit with ACE" => "DPP-ACE-Na/fit-dpp-ace-na.jl",
"Subsample Si dataset with DPP, fit with ACE, and cross validate" => "DPP-ACE-Si/fit-dpp-ace-si.jl",
"Reduce ACE descriptors with PCA and fit a-HfO2 dataset" => "PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl",
"Load Ar+Lennard-Jones dataset and postprocess" => "LJ-Ar/lennard-jones-ar.jl"
"1 - Fit a-HfO2 dataset with ACE" => "ACE-aHfO2/fit-ace-ahfo2.jl",
"2 - Load Ar+Lennard-Jones dataset and postprocess" => "LJ-Ar/lennard-jones-ar.jl"
]
basic_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)

for (_, example_path) in examples
s = split(example_path, "/")
sub_path, file_name = string(s[1:end-1]...), s[end]
example_filepath = joinpath(EXAMPLES_DIR, example_path)
Literate.markdown(example_filepath,
joinpath(OUTPUT_DIR, sub_path),
documenter = true)
end
# Subsampling examples
examples = [
"1 - Subsample a-HfO2 dataset with DPP and fit with ACE" => "DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl",
"2 - Subsample Na dataset with DPP and fit with ACE" => "DPP-ACE-Na/fit-dpp-ace-na.jl",
"3 - Subsample Si dataset with DPP, fit with ACE, and cross validate" => "DPP-ACE-Si/fit-dpp-ace-si.jl",
]
ss_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)

examples = [title => joinpath("generated", replace(example_path, ".jl" => ".md"))
for (title, example_path) in examples]
# Dimension reduction examples
examples = [
"1 - Reduce ACE descriptors with PCA and fit a-HfO2 dataset" => "PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl",
]
dr_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)

# Make and deploy docs #########################################################

makedocs(
root = joinpath(dirname(pathof(PotentialLearning)), "..", "docs"),
Expand All @@ -56,7 +70,9 @@ makedocs(
draft = false,
pages = ["Home" => "index.md",
"How to run the examples" => "how-to-run-the-examples.md",
"Examples" => examples,
"Basic examples" => basic_examples,
"Optimize atomistic data via intelligent subsampling" => ss_examples,
"Optimize interatomic potential models via dimension reduction" => dr_examples,
"API" => "api.md"],
format = Documenter.HTML(;
prettyurls = get(ENV, "CI", "false") == "true",
Expand All @@ -71,3 +87,4 @@ deploydocs(;
devbranch = "main",
push_preview = true,
)

4 changes: 2 additions & 2 deletions docs/src/how-to-run-the-examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Clone `PotentialLearning.jl` repository in your working directory.
```
Access to any folder within `PotentialLearning.jl/examples`. E.g.
```shell
$ cd PotentialLearning.jl/examples/Na
$ cd PotentialLearning.jl/examples/DPP-ACE-aHfO2-1
```

## Run example
Expand All @@ -30,6 +30,6 @@ Type `]` to enter the Pkg REPL and instantiate.
```
Finally, include the example file.
```julia
julia> include("fit-dpp-ace-na.jl")
julia> include("fit-dpp-ace-ahfo2.jl")
```

Original file line number Diff line number Diff line change
@@ -1,33 +1,33 @@
# # Fit a-HfO2 dataset with ACE

# ## Load packages, define paths, and create experiment folder.
# ## a. Load packages, define paths, and create experiment folder.

# Load packages
# Load packages.
using AtomsBase, InteratomicPotentials, PotentialLearning
using Unitful, UnitfulAtomic
using LinearAlgebra, Random, DisplayAs

# Define paths.
path = joinpath(dirname(pathof(PotentialLearning)), "../examples/ACE-aHfO2")
ds_path = "$path/../data/a-HfO2/a-HfO2-300K-NVT-6000.extxyz"
res_path = "$path/results/"
res_path = "$path/results/";

# Load utility functions.
include("$path/../utils/utils.jl")

# Create experiment folder.
run(`mkdir -p $res_path`)
run(`mkdir -p $res_path`);

# ## Load atomistic dataset and split it into training and test.
# ## b. Load atomistic dataset and split it into training and test.

# Load atomistic dataset: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.)
ds = load_data(ds_path, uparse("eV"), uparse(""))
ds = load_data(ds_path, uparse("eV"), uparse(""))[1:1000] # Only the first 1K samples are used in this example.

# Split atomistic dataset into training and test
n_train, n_test = 50, 50 # only 50 samples per dataset are used in this example.
conf_train, conf_test = split(ds[1:1000], n_train, n_test)
n_train, n_test = 50, 50 # Only 50 samples per dataset are used in this example.
conf_train, conf_test = split(ds, n_train, n_test)

# ## Create ACE basis, compute descriptors and add them to the dataset.
# ## c. Create ACE basis, compute descriptors and add them to the dataset.

# Create ACE basis
basis = ACE(species = [:Hf, :O],
Expand All @@ -37,7 +37,7 @@ basis = ACE(species = [:Hf, :O],
wL = 1.0,
csp = 1.0,
r0 = 1.0)
@save_var res_path basis
@save_var res_path basis;

# Compute ACE descriptors for energy and forces based on the atomistic training configurations.
println("Computing energy descriptors of training dataset...")
Expand All @@ -50,7 +50,7 @@ f_descr_train = compute_force_descriptors(conf_train, basis;
# Update training dataset by adding energy and force descriptors.
ds_train = DataSet(conf_train .+ e_descr_train .+ f_descr_train)

# ## Learn ACE coefficients based on ACE descriptors and DFT data.
# ## d. Learn ACE coefficients based on ACE descriptors and DFT data.
println("Learning energies and forces...")
lb = LBasisPotential(basis)
ws, int = [1.0, 1.0], false
Expand All @@ -59,15 +59,15 @@ learn!(lb, ds_train, ws, int)
@save_var res_path lb.β0
lb.β, lb.β0

# ## Post-process output: calculate metrics, create plots, and save results.
# ## e. Post-process output: calculate metrics, create plots, and save results.

# Compute ACE descriptors for energy and forces based on the atomistic test configurations.
println("Computing energy descriptors of test dataset...")
e_descr_test = compute_local_descriptors(conf_test, basis;
pbar = false)
println("Computing force descriptors of test dataset...")
f_descr_test = compute_force_descriptors(conf_test, basis;
pbar = false)
pbar = false);

# Update test dataset by adding energy and force descriptors.
ds_test = DataSet(conf_test .+ e_descr_test .+ f_descr_test)
Expand All @@ -92,9 +92,9 @@ f_test, f_test_pred = get_all_forces(ds_test),
@save_var res_path e_test
@save_var res_path e_test_pred
@save_var res_path f_test
@save_var res_path f_test_pred
@save_var res_path f_test_pred;

# Compute training metrics
# Compute training metrics.
e_train_metrics = get_metrics(e_train, e_train_pred,
metrics = [mae, rmse, rsq],
label = "e_train")
Expand All @@ -105,7 +105,7 @@ train_metrics = merge(e_train_metrics, f_train_metrics)
@save_dict res_path train_metrics
train_metrics

# Compute test metrics
# Compute test metrics.
e_test_metrics = get_metrics(e_test, e_test_pred,
metrics = [mae, rmse, rsq],
label = "e_test")
Expand All @@ -116,19 +116,19 @@ test_metrics = merge(e_test_metrics, f_test_metrics)
@save_dict res_path test_metrics
test_metrics

# Plot and save energy results
# Plot and save energy results.
e_plot = plot_energy(e_train, e_train_pred,
e_test, e_test_pred)
@save_fig res_path e_plot
DisplayAs.PNG(e_plot)

# Plot and save force results
# Plot and save force results.
f_plot = plot_forces(f_train, f_train_pred,
f_test, f_test_pred)
@save_fig res_path f_plot
DisplayAs.PNG(f_plot)

# Plot and save training force cosine
# Plot and save training force cosine.
e_train_plot = plot_energy(e_train, e_train_pred)
f_train_plot = plot_forces(f_train, f_train_pred)
f_train_cos = plot_cos(f_train, f_train_pred)
Expand All @@ -137,7 +137,7 @@ f_train_cos = plot_cos(f_train, f_train_pred)
@save_fig res_path f_train_cos
DisplayAs.PNG(f_train_cos)

# Plot and save test force cosine
# Plot and save test force cosine.
e_test_plot = plot_energy(e_test, e_test_pred)
f_test_plot = plot_forces(f_test, f_test_pred)
f_test_cos = plot_cos(f_test, f_test_pred)
Expand Down
14 changes: 7 additions & 7 deletions examples/DPP-ACE-Na/fit-dpp-ace-na.jl
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
# # Subsample Na dataset with DPP and fit energies with ACE

# ## Load packages and define paths.
# ## a. Load packages and define paths.

# Load packages
# Load packages.
using Unitful, UnitfulAtomic
using AtomsBase, InteratomicPotentials, PotentialLearning
using LinearAlgebra, Plots

# Define paths.
path = joinpath(dirname(pathof(PotentialLearning)), "../examples/DPP-ACE-Na")
ds_path = "$path/../data/Na/liquify_sodium.yaml"
ds_path = "$path/../data/Na/liquify_sodium.yaml";

# ## Load atomistic dataset and split it into training and test.
# ## b. Load atomistic dataset and split it into training and test.

# Load atomistic dataset: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.).
confs, thermo = load_data(ds_path, YAML(:Na, u"eV", u""))
Expand All @@ -20,7 +20,7 @@ confs, thermo = confs[220:end], thermo[220:end]
# Split atomistic dataset into training and test.
conf_train, conf_test = confs[1:1000], confs[1001:end]

# ## Create ACE basis, compute energy descriptors and add them to the dataset.
# ## c. Create ACE basis, compute energy descriptors and add them to the dataset.

# Create ACE basis.
ace = ACE(species = [:Na], # species
Expand All @@ -38,7 +38,7 @@ e_descr_train = compute_local_descriptors(conf_train, ace) # JLD.load("data/sodi
# Update training dataset by adding energy and force descriptors.
ds_train = DataSet(conf_train .+ e_descr_train)

# ## Subsampling via DPP.
# ## d. Subsampling via DPP.

# Create DPP subselector.
dpp = kDPP(ds_train, GlobalMean(), DotProduct(); batch_size = 200)
Expand All @@ -51,7 +51,7 @@ lb = LBasisPotential(ace)
α = 1e-8
Σ = learn!(lb, ds_train[dpp_inds], α)

# ## Post-process output: calculate metrics, create plots, and save results.
# ## e. Post-process output: calculate metrics, create plots, and save results.

# Update test dataset by adding energy descriptors.
println("Computing local descriptors of test dataset")
Expand Down
16 changes: 8 additions & 8 deletions examples/DPP-ACE-Si/fit-dpp-ace-si.jl
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# # Subsample Si dataset and fit with ACE

# ## Load packages, define paths, and create experiment folder.
# ## a. Load packages, define paths, and create experiment folder.

# Load packages
# Load packages.
using LinearAlgebra, Random, InvertedIndices
using Statistics, StatsBase, Distributions, Determinantal
using Unitful, UnitfulAtomic
using AtomsBase, InteratomicPotentials, PotentialLearning
using CSV, JLD, DataFrames

# Define atomic type information
# Define atomic type information.
elname, elspec = "Si", [:Si]

# Define paths.
Expand All @@ -20,15 +20,15 @@ outpath = "$path/output/$elname/"
# Load utility functions.
include("$path/subsampling_utils.jl")

# ## Load atomistic datasets
# ## b. Load atomistic datasets.

# Load all atomistic datasets: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.)
file_arr = readext(inpath, "xyz")
nfile = length(file_arr)
confs_arr = [load_data(inpath*file, ExtXYZ(u"eV", u"")) for file in file_arr]
confs = concat_dataset(confs_arr)

# Id of configurations per file
# Id of configurations per file.
n = 0
confs_id = Vector{Vector{Int64}}(undef, nfile)
for k = 1:nfile
Expand All @@ -37,9 +37,9 @@ for k = 1:nfile
n += length(confs_arr[k])
end

# ## Subsampling by DPP
# ## c. Subsampling by DPP.

# Create ACE basis
# Create ACE basis.
nbody = 4
deg = 5
ace = ACE(species = elspec, # species
Expand All @@ -61,7 +61,7 @@ JLD.save(outpath*"$(elname)_force_descriptors.jld", "f_descr", f_descr)
ds = DataSet(confs .+ e_descr .+ f_descr)
ndata = length(ds)

# ## Compute cross validation error from training dataset
# ## d. Compute cross validation error from training dataset.
batch_size = [80, 40]
sel_ind = Dict{Int64, Vector}()
cond_num = Dict{Int64, Vector}()
Expand Down
Loading

0 comments on commit d16e181

Please sign in to comment.