From 407bdcfe7c1971e29a90cf2c7d86a52ea4a0b18e Mon Sep 17 00:00:00 2001 From: Emmanuel Lujan Date: Tue, 18 Jun 2024 10:22:03 -0400 Subject: [PATCH 1/4] New documentation sections. --- docs/make.jl | 48 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/docs/make.jl b/docs/make.jl index e0f1c4a2..22685c10 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -12,35 +12,50 @@ DocMeta.setdocmeta!( recursive = true, ) +# Citations #################################################################### bib = CitationBibliography(joinpath(@__DIR__, "citation.bib")) -# Generate examples +# Generate examples ############################################################ const EXAMPLES_DIR = joinpath(@__DIR__, "..", "examples") const OUTPUT_DIR = joinpath(@__DIR__, "src/generated") +function create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR) + for (_, example_path) in examples + s = split(example_path, "/") + sub_path, file_name = string(s[1:end-1]...), s[end] + example_filepath = joinpath(EXAMPLES_DIR, example_path) + Literate.markdown(example_filepath, + joinpath(OUTPUT_DIR, sub_path), + documenter = true) + end + examples = [title => joinpath("generated", replace(example_path, ".jl" => ".md")) + for (title, example_path) in examples] +end + +# Basic examples examples = [ "Fit a-HfO2 dataset with ACE" => "ACE-aHfO2/fit-ace-aHfO2.jl", + "Load Ar+Lennard-Jones dataset and postprocess" => "LJ-Ar/lennard-jones-ar.jl" +] +basic_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR) + +# Subsampling examples +examples = [ "Subsample a-HfO2 dataset with DPP and fit with ACE" => "DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl", "Subsample Na dataset with DPP and fit with ACE" => "DPP-ACE-Na/fit-dpp-ace-na.jl", "Subsample Si dataset with DPP, fit with ACE, and cross validate" => "DPP-ACE-Si/fit-dpp-ace-si.jl", - "Reduce ACE descriptors with PCA and fit a-HfO2 dataset" => "PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl", - "Load Ar+Lennard-Jones dataset and postprocess" => "LJ-Ar/lennard-jones-ar.jl" ] +ss_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR) -for (_, example_path) in examples - s = split(example_path, "/") - sub_path, file_name = string(s[1:end-1]...), s[end] - example_filepath = joinpath(EXAMPLES_DIR, example_path) - Literate.markdown(example_filepath, - joinpath(OUTPUT_DIR, sub_path), - documenter = true) -end - -examples = [title => joinpath("generated", replace(example_path, ".jl" => ".md")) - for (title, example_path) in examples] +# Dimension reduction examples +examples = [ + "Reduce ACE descriptors with PCA and fit a-HfO2 dataset" => "PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl", +] +dr_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR) +# Make and deploy docs ######################################################### makedocs( root = joinpath(dirname(pathof(PotentialLearning)), "..", "docs"), @@ -56,7 +71,9 @@ makedocs( draft = false, pages = ["Home" => "index.md", "How to run the examples" => "how-to-run-the-examples.md", - "Examples" => examples, + "Basic examples" => basic_examples, + "Subsampling examples" => ss_examples, + "Dimension reduction examples" => dr_examples, "API" => "api.md"], format = Documenter.HTML(; prettyurls = get(ENV, "CI", "false") == "true", @@ -71,3 +88,4 @@ deploydocs(; devbranch = "main", push_preview = true, ) + From 560f4edaa7c9f65b3e5fcc20b72b0be10eaefb28 Mon Sep 17 00:00:00 2001 From: Emmanuel Lujan Date: Tue, 18 Jun 2024 12:12:57 -0400 Subject: [PATCH 2/4] Small improvements in examples and documentation. --- docs/make.jl | 5 ++-- docs/src/how-to-run-the-examples.md | 4 +-- .../{fit-ace-aHfO2.jl => fit-ace-ahfo2.jl} | 30 +++++++++---------- examples/DPP-ACE-Na/fit-dpp-ace-na.jl | 4 +-- examples/DPP-ACE-Si/fit-dpp-ace-si.jl | 14 ++++----- examples/DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl | 24 +++++++-------- examples/LJ-Ar/lennard-jones-ar.jl | 16 +++++----- examples/PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl | 20 ++++++------- examples/README.md | 19 +----------- 9 files changed, 60 insertions(+), 76 deletions(-) rename examples/ACE-aHfO2/{fit-ace-aHfO2.jl => fit-ace-ahfo2.jl} (89%) diff --git a/docs/make.jl b/docs/make.jl index 22685c10..441d1f44 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,4 +1,4 @@ -pushfirst!(LOAD_PATH, joinpath(@__DIR__, "..")) # add PotentialLearning to environment stack +#pushfirst!(LOAD_PATH, joinpath(@__DIR__, "..")) # add PotentialLearning to environment stack using PotentialLearning using Documenter @@ -32,11 +32,12 @@ function create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR) end examples = [title => joinpath("generated", replace(example_path, ".jl" => ".md")) for (title, example_path) in examples] + return examples end # Basic examples examples = [ - "Fit a-HfO2 dataset with ACE" => "ACE-aHfO2/fit-ace-aHfO2.jl", + "Fit a-HfO2 dataset with ACE" => "ACE-aHfO2/fit-ace-ahfo2.jl", "Load Ar+Lennard-Jones dataset and postprocess" => "LJ-Ar/lennard-jones-ar.jl" ] basic_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR) diff --git a/docs/src/how-to-run-the-examples.md b/docs/src/how-to-run-the-examples.md index a063835a..09c3dbfa 100644 --- a/docs/src/how-to-run-the-examples.md +++ b/docs/src/how-to-run-the-examples.md @@ -16,7 +16,7 @@ Clone `PotentialLearning.jl` repository in your working directory. ``` Access to any folder within `PotentialLearning.jl/examples`. E.g. ```shell - $ cd PotentialLearning.jl/examples/Na + $ cd PotentialLearning.jl/examples/DPP-ACE-aHfO2-1 ``` ## Run example @@ -30,6 +30,6 @@ Type `]` to enter the Pkg REPL and instantiate. ``` Finally, include the example file. ```julia - julia> include("fit-dpp-ace-na.jl") + julia> include("fit-dpp-ace-ahfo2.jl") ``` diff --git a/examples/ACE-aHfO2/fit-ace-aHfO2.jl b/examples/ACE-aHfO2/fit-ace-ahfo2.jl similarity index 89% rename from examples/ACE-aHfO2/fit-ace-aHfO2.jl rename to examples/ACE-aHfO2/fit-ace-ahfo2.jl index 08528e95..9fa09b38 100644 --- a/examples/ACE-aHfO2/fit-ace-aHfO2.jl +++ b/examples/ACE-aHfO2/fit-ace-ahfo2.jl @@ -2,7 +2,7 @@ # ## Load packages, define paths, and create experiment folder. -# Load packages +# Load packages. using AtomsBase, InteratomicPotentials, PotentialLearning using Unitful, UnitfulAtomic using LinearAlgebra, Random, DisplayAs @@ -10,22 +10,22 @@ using LinearAlgebra, Random, DisplayAs # Define paths. path = joinpath(dirname(pathof(PotentialLearning)), "../examples/ACE-aHfO2") ds_path = "$path/../data/a-HfO2/a-HfO2-300K-NVT-6000.extxyz" -res_path = "$path/results/" +res_path = "$path/results/"; # Load utility functions. include("$path/../utils/utils.jl") # Create experiment folder. -run(`mkdir -p $res_path`) +run(`mkdir -p $res_path`); # ## Load atomistic dataset and split it into training and test. # Load atomistic dataset: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.) -ds = load_data(ds_path, uparse("eV"), uparse("Å")) +ds = load_data(ds_path, uparse("eV"), uparse("Å"))[1:1000] # Only the first 1K samples are used in this example. # Split atomistic dataset into training and test -n_train, n_test = 50, 50 # only 50 samples per dataset are used in this example. -conf_train, conf_test = split(ds[1:1000], n_train, n_test) +n_train, n_test = 50, 50 # Only 50 samples per dataset are used in this example. +conf_train, conf_test = split(ds, n_train, n_test) # ## Create ACE basis, compute descriptors and add them to the dataset. @@ -37,7 +37,7 @@ basis = ACE(species = [:Hf, :O], wL = 1.0, csp = 1.0, r0 = 1.0) -@save_var res_path basis +@save_var res_path basis; # Compute ACE descriptors for energy and forces based on the atomistic training configurations. println("Computing energy descriptors of training dataset...") @@ -67,7 +67,7 @@ e_descr_test = compute_local_descriptors(conf_test, basis; pbar = false) println("Computing force descriptors of test dataset...") f_descr_test = compute_force_descriptors(conf_test, basis; - pbar = false) + pbar = false); # Update test dataset by adding energy and force descriptors. ds_test = DataSet(conf_test .+ e_descr_test .+ f_descr_test) @@ -92,9 +92,9 @@ f_test, f_test_pred = get_all_forces(ds_test), @save_var res_path e_test @save_var res_path e_test_pred @save_var res_path f_test -@save_var res_path f_test_pred +@save_var res_path f_test_pred; -# Compute training metrics +# Compute training metrics. e_train_metrics = get_metrics(e_train, e_train_pred, metrics = [mae, rmse, rsq], label = "e_train") @@ -105,7 +105,7 @@ train_metrics = merge(e_train_metrics, f_train_metrics) @save_dict res_path train_metrics train_metrics -# Compute test metrics +# Compute test metrics. e_test_metrics = get_metrics(e_test, e_test_pred, metrics = [mae, rmse, rsq], label = "e_test") @@ -116,19 +116,19 @@ test_metrics = merge(e_test_metrics, f_test_metrics) @save_dict res_path test_metrics test_metrics -# Plot and save energy results +# Plot and save energy results. e_plot = plot_energy(e_train, e_train_pred, e_test, e_test_pred) @save_fig res_path e_plot DisplayAs.PNG(e_plot) -# Plot and save force results +# Plot and save force results. f_plot = plot_forces(f_train, f_train_pred, f_test, f_test_pred) @save_fig res_path f_plot DisplayAs.PNG(f_plot) -# Plot and save training force cosine +# Plot and save training force cosine. e_train_plot = plot_energy(e_train, e_train_pred) f_train_plot = plot_forces(f_train, f_train_pred) f_train_cos = plot_cos(f_train, f_train_pred) @@ -137,7 +137,7 @@ f_train_cos = plot_cos(f_train, f_train_pred) @save_fig res_path f_train_cos DisplayAs.PNG(f_train_cos) -# Plot and save test force cosine +# Plot and save test force cosine. e_test_plot = plot_energy(e_test, e_test_pred) f_test_plot = plot_forces(f_test, f_test_pred) f_test_cos = plot_cos(f_test, f_test_pred) diff --git a/examples/DPP-ACE-Na/fit-dpp-ace-na.jl b/examples/DPP-ACE-Na/fit-dpp-ace-na.jl index 0e7276e7..53f3bdb1 100644 --- a/examples/DPP-ACE-Na/fit-dpp-ace-na.jl +++ b/examples/DPP-ACE-Na/fit-dpp-ace-na.jl @@ -2,14 +2,14 @@ # ## Load packages and define paths. -# Load packages +# Load packages. using Unitful, UnitfulAtomic using AtomsBase, InteratomicPotentials, PotentialLearning using LinearAlgebra, Plots # Define paths. path = joinpath(dirname(pathof(PotentialLearning)), "../examples/DPP-ACE-Na") -ds_path = "$path/../data/Na/liquify_sodium.yaml" +ds_path = "$path/../data/Na/liquify_sodium.yaml"; # ## Load atomistic dataset and split it into training and test. diff --git a/examples/DPP-ACE-Si/fit-dpp-ace-si.jl b/examples/DPP-ACE-Si/fit-dpp-ace-si.jl index 19d16339..b82bf8fd 100644 --- a/examples/DPP-ACE-Si/fit-dpp-ace-si.jl +++ b/examples/DPP-ACE-Si/fit-dpp-ace-si.jl @@ -2,14 +2,14 @@ # ## Load packages, define paths, and create experiment folder. -# Load packages +# Load packages. using LinearAlgebra, Random, InvertedIndices using Statistics, StatsBase, Distributions, Determinantal using Unitful, UnitfulAtomic using AtomsBase, InteratomicPotentials, PotentialLearning using CSV, JLD, DataFrames -# Define atomic type information +# Define atomic type information. elname, elspec = "Si", [:Si] # Define paths. @@ -20,7 +20,7 @@ outpath = "$path/output/$elname/" # Load utility functions. include("$path/subsampling_utils.jl") -# ## Load atomistic datasets +# ## Load atomistic datasets. # Load all atomistic datasets: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.) file_arr = readext(inpath, "xyz") @@ -28,7 +28,7 @@ nfile = length(file_arr) confs_arr = [load_data(inpath*file, ExtXYZ(u"eV", u"Å")) for file in file_arr] confs = concat_dataset(confs_arr) -# Id of configurations per file +# Id of configurations per file. n = 0 confs_id = Vector{Vector{Int64}}(undef, nfile) for k = 1:nfile @@ -37,9 +37,9 @@ for k = 1:nfile n += length(confs_arr[k]) end -# ## Subsampling by DPP +# ## Subsampling by DPP. -# Create ACE basis +# Create ACE basis. nbody = 4 deg = 5 ace = ACE(species = elspec, # species @@ -61,7 +61,7 @@ JLD.save(outpath*"$(elname)_force_descriptors.jld", "f_descr", f_descr) ds = DataSet(confs .+ e_descr .+ f_descr) ndata = length(ds) -# ## Compute cross validation error from training dataset +# ## Compute cross validation error from training dataset. batch_size = [80, 40] sel_ind = Dict{Int64, Vector}() cond_num = Dict{Int64, Vector}() diff --git a/examples/DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl b/examples/DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl index 6b8b8665..b391e6c9 100644 --- a/examples/DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl +++ b/examples/DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl @@ -2,7 +2,7 @@ # ## Load packages, define paths, and create experiment folder. -# Load packages +# Load packages. using AtomsBase, InteratomicPotentials, PotentialLearning using Unitful, UnitfulAtomic using LinearAlgebra, Random, DisplayAs @@ -10,13 +10,13 @@ using LinearAlgebra, Random, DisplayAs # Define paths. path = joinpath(dirname(pathof(PotentialLearning)), "../examples/DPP-ACE-aHfO2-1") ds_path = "$path/../data/a-HfO2/a-HfO2-300K-NVT-6000.extxyz" -res_path = "$path/results/" +res_path = "$path/results/"; # Load utility functions. include("$path/../utils/utils.jl") # Create experiment folder. -run(`mkdir -p $res_path`) +run(`mkdir -p $res_path`); # ## Load atomistic dataset and split it into training and test. @@ -66,7 +66,7 @@ basis = ACE(species = [:Hf, :O], wL = 1.0, csp = 1.0, r0 = 1.0) -@save_var res_path basis +@save_var res_path basis; # Compute ACE descriptors for energy and forces based on the atomistic training configurations. println("Computing energy descriptors of training dataset...") @@ -96,7 +96,7 @@ e_descr_test = compute_local_descriptors(conf_test, basis; pbar = false) println("Computing force descriptors of test dataset...") f_descr_test = compute_force_descriptors(conf_test, basis; - pbar = false) + pbar = false); # Update test dataset by adding energy and force descriptors. ds_test = DataSet(conf_test .+ e_descr_test .+ f_descr_test) @@ -121,9 +121,9 @@ f_test, f_test_pred = get_all_forces(ds_test), @save_var res_path e_test @save_var res_path e_test_pred @save_var res_path f_test -@save_var res_path f_test_pred +@save_var res_path f_test_pred; -# Compute training metrics +# Compute training metrics. e_train_metrics = get_metrics(e_train, e_train_pred, metrics = [mae, rmse, rsq], label = "e_train") @@ -134,7 +134,7 @@ train_metrics = merge(e_train_metrics, f_train_metrics) @save_dict res_path train_metrics train_metrics -# Compute test metrics +# Compute test metrics. e_test_metrics = get_metrics(e_test, e_test_pred, metrics = [mae, rmse, rsq], label = "e_test") @@ -145,19 +145,19 @@ test_metrics = merge(e_test_metrics, f_test_metrics) @save_dict res_path test_metrics test_metrics -# Plot and save energy results +# Plot and save energy results. e_plot = plot_energy(e_train, e_train_pred, e_test, e_test_pred) @save_fig res_path e_plot DisplayAs.PNG(e_plot) -# Plot and save force results +# Plot and save force results. f_plot = plot_forces(f_train, f_train_pred, f_test, f_test_pred) @save_fig res_path f_plot DisplayAs.PNG(f_plot) -# Plot and save training force cosine +# Plot and save training force cosine. e_train_plot = plot_energy(e_train, e_train_pred) f_train_plot = plot_forces(f_train, f_train_pred) f_train_cos = plot_cos(f_train, f_train_pred) @@ -166,7 +166,7 @@ f_train_cos = plot_cos(f_train, f_train_pred) @save_fig res_path f_train_cos DisplayAs.PNG(f_train_cos) -# Plot and save test force cosine +# Plot and save test force cosine. e_test_plot = plot_energy(e_test, e_test_pred) f_test_plot = plot_forces(f_test, f_test_pred) f_test_cos = plot_cos(f_test, f_test_pred) diff --git a/examples/LJ-Ar/lennard-jones-ar.jl b/examples/LJ-Ar/lennard-jones-ar.jl index 7dbfd7d6..c0b509df 100644 --- a/examples/LJ-Ar/lennard-jones-ar.jl +++ b/examples/LJ-Ar/lennard-jones-ar.jl @@ -1,17 +1,17 @@ # # Load Ar dataset with energies computed by Lennard-Jones and postprocess -# ## Load packages and define paths +# ## Load packages and define paths. -# Load packages +# Load packages. using Unitful, UnitfulAtomic using AtomsBase, InteratomicPotentials, PotentialLearning using LinearAlgebra, Plots, DisplayAs # Define paths. path = joinpath(dirname(pathof(PotentialLearning)), "../examples/LJ-Ar") -ds_path = "$path/../data/LJ-AR/lj-ar.yaml" +ds_path = "$path/../data/LJ-AR/lj-ar.yaml"; -# ## Load atomistic dataset +# ## Load atomistic dataset. ds, thermo = load_data(ds_path, YAML(:Ar, u"eV", u"Å")) ds = @views ds[2:end] # Filter first configuration (zero energy) @@ -23,15 +23,15 @@ n_atoms = length(first(systems)) # Note: in this dataset all systems contain the positions = position.(systems) dists_origin = map(x->ustrip.(norm.(x)), positions) -# Extract LJ energies from dataset +# Extract LJ energies from dataset. energies = get_values.(get_energy.(ds)) -# Define time range +# Define time range. time_range = 0.5:0.5:5000 # ## Post-process data. -# Plot distance from origin vs time +# Plot distance from origin vs time. p = plot(xlabel = "τ | ps", ylabel = "Distance from origin | Å", dpi = 300, fontsize = 12) @@ -40,7 +40,7 @@ for i = 1:n_atoms end DisplayAs.PNG(p) -# Plot LJ energies vs time +# Plot LJ energies vs time. p = plot(time_range, energies, xlabel = "τ | ps", ylabel = "Lennard Jones energy | eV", diff --git a/examples/PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl b/examples/PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl index 6c6254af..342f3a6d 100644 --- a/examples/PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl +++ b/examples/PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl @@ -2,7 +2,7 @@ # ## Load packages, define paths, and create experiment folder. -# Load packages +# Load packages. using AtomsBase, InteratomicPotentials, PotentialLearning using Unitful, UnitfulAtomic using LinearAlgebra, Random, DisplayAs @@ -10,26 +10,26 @@ using LinearAlgebra, Random, DisplayAs # Define paths. path = joinpath(dirname(pathof(PotentialLearning)), "../examples/PCA-ACE-aHfO2") ds_path = "$path/../data/a-HfO2/a-HfO2-300K-NVT-6000.extxyz" -res_path = "$path/results/" +res_path = "$path/results/"; # Load utility functions. include("$path/../utils/utils.jl") # Create experiment folder. -run(`mkdir -p $res_path`) +run(`mkdir -p $res_path`); # ## Load atomistic dataset and split it into training and test. # Load atomistic dataset: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.) -ds = load_data(ds_path, uparse("eV"), uparse("Å")) +ds = load_data(ds_path, uparse("eV"), uparse("Å"))[1:1000] # Only first 1K samples are used in this example. -# Split atomistic dataset into training and test -n_train, n_test = 50, 50 # only 50 samples per dataset are used in this example. -conf_train, conf_test = split(ds[1:1000], n_train, n_test) +# Split atomistic dataset into training and test. +n_train, n_test = 50, 50 # Only 50 samples per dataset are used in this example. +conf_train, conf_test = split(ds, n_train, n_test) # ## Create ACE basis, compute descriptors and add them to the dataset. -# Create ACE basis +# Create ACE basis. basis = ACE(species = [:Hf, :O], body_order = 3, polynomial_degree = 4, @@ -37,7 +37,7 @@ basis = ACE(species = [:Hf, :O], wL = 1.0, csp = 1.0, r0 = 1.0) -@save_var res_path basis +@save_var res_path basis; # Compute ACE descriptors for energy and forces based on the atomistic training configurations. println("Computing energy descriptors of training dataset...") @@ -101,7 +101,7 @@ f_test, f_test_pred = get_all_forces(ds_test), @save_var res_path e_test @save_var res_path e_test_pred @save_var res_path f_test -@save_var res_path f_test_pred +@save_var res_path f_test_pred; # Compute training metrics. e_train_metrics = get_metrics(e_train, e_train_pred, diff --git a/examples/README.md b/examples/README.md index 5f8c0da6..8fe758f4 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,21 +1,4 @@ # How to run an example -Change the directory to the desired example folder. E.g. -```bash -$ cd PotentialLearning.jl/examples/DPP-ACE-Na -``` +See instructions [here](https://cesmix-mit.github.io/PotentialLearning.jl/dev) :-) -Open Julia REPL, activate ```Project.toml``` file in folder ```examples```, and chose the number of threads. E.g. -```bash -$ julia --project=. --threads=4 -``` - -Type ```]``` in Julia REPL, and then run ```instantiate```. -```julia - pkg> instantiate -``` - -Include example script. E.g. -```julia - julia> include("fit-dpp-ace-na.jl") -``` From 04e15e0b66f62a25dc487d83f93241b933b6342b Mon Sep 17 00:00:00 2001 From: Emmanuel Lujan Date: Tue, 18 Jun 2024 14:51:20 -0400 Subject: [PATCH 3/4] Small improvements in documentation. --- docs/make.jl | 18 ++++++++---------- examples/ACE-aHfO2/fit-ace-ahfo2.jl | 10 +++++----- examples/DPP-ACE-Na/fit-dpp-ace-na.jl | 8 ++++---- examples/DPP-ACE-Si/fit-dpp-ace-si.jl | 8 ++++---- examples/DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl | 12 ++++++------ examples/LJ-Ar/lennard-jones-ar.jl | 8 ++++---- examples/PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl | 12 ++++++------ 7 files changed, 37 insertions(+), 39 deletions(-) diff --git a/docs/make.jl b/docs/make.jl index 441d1f44..ae25211e 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,5 +1,3 @@ -#pushfirst!(LOAD_PATH, joinpath(@__DIR__, "..")) # add PotentialLearning to environment stack - using PotentialLearning using Documenter using DocumenterCitations @@ -37,22 +35,22 @@ end # Basic examples examples = [ - "Fit a-HfO2 dataset with ACE" => "ACE-aHfO2/fit-ace-ahfo2.jl", - "Load Ar+Lennard-Jones dataset and postprocess" => "LJ-Ar/lennard-jones-ar.jl" + "1 - Fit a-HfO2 dataset with ACE" => "ACE-aHfO2/fit-ace-ahfo2.jl", + "2 - Load Ar+Lennard-Jones dataset and postprocess" => "LJ-Ar/lennard-jones-ar.jl" ] basic_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR) # Subsampling examples examples = [ - "Subsample a-HfO2 dataset with DPP and fit with ACE" => "DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl", - "Subsample Na dataset with DPP and fit with ACE" => "DPP-ACE-Na/fit-dpp-ace-na.jl", - "Subsample Si dataset with DPP, fit with ACE, and cross validate" => "DPP-ACE-Si/fit-dpp-ace-si.jl", + "1 - Subsample a-HfO2 dataset with DPP and fit with ACE" => "DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl", + "2 - Subsample Na dataset with DPP and fit with ACE" => "DPP-ACE-Na/fit-dpp-ace-na.jl", + "3 - Subsample Si dataset with DPP, fit with ACE, and cross validate" => "DPP-ACE-Si/fit-dpp-ace-si.jl", ] ss_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR) # Dimension reduction examples examples = [ - "Reduce ACE descriptors with PCA and fit a-HfO2 dataset" => "PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl", + "1 - Reduce ACE descriptors with PCA and fit a-HfO2 dataset" => "PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl", ] dr_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR) @@ -73,8 +71,8 @@ makedocs( pages = ["Home" => "index.md", "How to run the examples" => "how-to-run-the-examples.md", "Basic examples" => basic_examples, - "Subsampling examples" => ss_examples, - "Dimension reduction examples" => dr_examples, + "Optimize atomistic data via intelligent subsampling" => ss_examples, + "Optimize interatomic potential models via dimension reduction" => dr_examples, "API" => "api.md"], format = Documenter.HTML(; prettyurls = get(ENV, "CI", "false") == "true", diff --git a/examples/ACE-aHfO2/fit-ace-ahfo2.jl b/examples/ACE-aHfO2/fit-ace-ahfo2.jl index 9fa09b38..67d740b0 100644 --- a/examples/ACE-aHfO2/fit-ace-ahfo2.jl +++ b/examples/ACE-aHfO2/fit-ace-ahfo2.jl @@ -1,6 +1,6 @@ # # Fit a-HfO2 dataset with ACE -# ## Load packages, define paths, and create experiment folder. +# ## a. Load packages, define paths, and create experiment folder. # Load packages. using AtomsBase, InteratomicPotentials, PotentialLearning @@ -18,7 +18,7 @@ include("$path/../utils/utils.jl") # Create experiment folder. run(`mkdir -p $res_path`); -# ## Load atomistic dataset and split it into training and test. +# ## b. Load atomistic dataset and split it into training and test. # Load atomistic dataset: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.) ds = load_data(ds_path, uparse("eV"), uparse("Å"))[1:1000] # Only the first 1K samples are used in this example. @@ -27,7 +27,7 @@ ds = load_data(ds_path, uparse("eV"), uparse("Å"))[1:1000] # Only the first 1K n_train, n_test = 50, 50 # Only 50 samples per dataset are used in this example. conf_train, conf_test = split(ds, n_train, n_test) -# ## Create ACE basis, compute descriptors and add them to the dataset. +# ## c. Create ACE basis, compute descriptors and add them to the dataset. # Create ACE basis basis = ACE(species = [:Hf, :O], @@ -50,7 +50,7 @@ f_descr_train = compute_force_descriptors(conf_train, basis; # Update training dataset by adding energy and force descriptors. ds_train = DataSet(conf_train .+ e_descr_train .+ f_descr_train) -# ## Learn ACE coefficients based on ACE descriptors and DFT data. +# ## d. Learn ACE coefficients based on ACE descriptors and DFT data. println("Learning energies and forces...") lb = LBasisPotential(basis) ws, int = [1.0, 1.0], false @@ -59,7 +59,7 @@ learn!(lb, ds_train, ws, int) @save_var res_path lb.β0 lb.β, lb.β0 -# ## Post-process output: calculate metrics, create plots, and save results. +# ## e. Post-process output: calculate metrics, create plots, and save results. # Compute ACE descriptors for energy and forces based on the atomistic test configurations. println("Computing energy descriptors of test dataset...") diff --git a/examples/DPP-ACE-Na/fit-dpp-ace-na.jl b/examples/DPP-ACE-Na/fit-dpp-ace-na.jl index 53f3bdb1..23ee68c0 100644 --- a/examples/DPP-ACE-Na/fit-dpp-ace-na.jl +++ b/examples/DPP-ACE-Na/fit-dpp-ace-na.jl @@ -11,7 +11,7 @@ using LinearAlgebra, Plots path = joinpath(dirname(pathof(PotentialLearning)), "../examples/DPP-ACE-Na") ds_path = "$path/../data/Na/liquify_sodium.yaml"; -# ## Load atomistic dataset and split it into training and test. +# ## a. Load atomistic dataset and split it into training and test. # Load atomistic dataset: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.). confs, thermo = load_data(ds_path, YAML(:Na, u"eV", u"Å")) @@ -20,7 +20,7 @@ confs, thermo = confs[220:end], thermo[220:end] # Split atomistic dataset into training and test. conf_train, conf_test = confs[1:1000], confs[1001:end] -# ## Create ACE basis, compute energy descriptors and add them to the dataset. +# ## b. Create ACE basis, compute energy descriptors and add them to the dataset. # Create ACE basis. ace = ACE(species = [:Na], # species @@ -38,7 +38,7 @@ e_descr_train = compute_local_descriptors(conf_train, ace) # JLD.load("data/sodi # Update training dataset by adding energy and force descriptors. ds_train = DataSet(conf_train .+ e_descr_train) -# ## Subsampling via DPP. +# ## c. Subsampling via DPP. # Create DPP subselector. dpp = kDPP(ds_train, GlobalMean(), DotProduct(); batch_size = 200) @@ -51,7 +51,7 @@ lb = LBasisPotential(ace) α = 1e-8 Σ = learn!(lb, ds_train[dpp_inds], α) -# ## Post-process output: calculate metrics, create plots, and save results. +# ## d. Post-process output: calculate metrics, create plots, and save results. # Update test dataset by adding energy descriptors. println("Computing local descriptors of test dataset") diff --git a/examples/DPP-ACE-Si/fit-dpp-ace-si.jl b/examples/DPP-ACE-Si/fit-dpp-ace-si.jl index b82bf8fd..ed087c55 100644 --- a/examples/DPP-ACE-Si/fit-dpp-ace-si.jl +++ b/examples/DPP-ACE-Si/fit-dpp-ace-si.jl @@ -1,6 +1,6 @@ # # Subsample Si dataset and fit with ACE -# ## Load packages, define paths, and create experiment folder. +# ## a. Load packages, define paths, and create experiment folder. # Load packages. using LinearAlgebra, Random, InvertedIndices @@ -20,7 +20,7 @@ outpath = "$path/output/$elname/" # Load utility functions. include("$path/subsampling_utils.jl") -# ## Load atomistic datasets. +# ## b. Load atomistic datasets. # Load all atomistic datasets: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.) file_arr = readext(inpath, "xyz") @@ -37,7 +37,7 @@ for k = 1:nfile n += length(confs_arr[k]) end -# ## Subsampling by DPP. +# ## c. Subsampling by DPP. # Create ACE basis. nbody = 4 @@ -61,7 +61,7 @@ JLD.save(outpath*"$(elname)_force_descriptors.jld", "f_descr", f_descr) ds = DataSet(confs .+ e_descr .+ f_descr) ndata = length(ds) -# ## Compute cross validation error from training dataset. +# ## d. Compute cross validation error from training dataset. batch_size = [80, 40] sel_ind = Dict{Int64, Vector}() cond_num = Dict{Int64, Vector}() diff --git a/examples/DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl b/examples/DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl index b391e6c9..8b6f99dd 100644 --- a/examples/DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl +++ b/examples/DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl @@ -1,6 +1,6 @@ # # Subsample a-HfO2 dataset and fit with ACE -# ## Load packages, define paths, and create experiment folder. +# ## a. Load packages, define paths, and create experiment folder. # Load packages. using AtomsBase, InteratomicPotentials, PotentialLearning @@ -18,7 +18,7 @@ include("$path/../utils/utils.jl") # Create experiment folder. run(`mkdir -p $res_path`); -# ## Load atomistic dataset and split it into training and test. +# ## b. Load atomistic dataset and split it into training and test. # Load atomistic dataset: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.) ds = load_data(ds_path, uparse("eV"), uparse("Å")) @@ -28,7 +28,7 @@ n_train, n_test = 100, 50 # Few samples per dataset are used in this example. conf_train, conf_test = split(ds[1:1000], n_train, n_test) -# ## Subsampling +# ## c. Subsampling # Compute ACE descriptors for energies as subsampling input. basis = ACE(species = [:Hf, :O], @@ -56,7 +56,7 @@ inds = get_random_subset(dataset_selector) conf_train = @views conf_train[inds] -# ## Create ACE basis, compute descriptors and add them to the dataset. +# ## d. Create ACE basis, compute descriptors and add them to the dataset. # Create ACE basis basis = ACE(species = [:Hf, :O], @@ -79,7 +79,7 @@ f_descr_train = compute_force_descriptors(conf_train, basis; # Update training dataset by adding energy and force descriptors. ds_train = DataSet(conf_train .+ e_descr_train .+ f_descr_train) -# ## Learn ACE coefficients based on ACE descriptors and DFT data. +# ## e. Learn ACE coefficients based on ACE descriptors and DFT data. println("Learning energies and forces...") lb = LBasisPotential(basis) ws, int = [1.0, 1.0], false @@ -88,7 +88,7 @@ learn!(lb, ds_train, ws, int) @save_var res_path lb.β0 lb.β, lb.β0 -# ## Post-process output: calculate metrics, create plots, and save results. +# ## f. Post-process output: calculate metrics, create plots, and save results. # Compute ACE descriptors for energy and forces based on the atomistic test configurations. println("Computing energy descriptors of test dataset...") diff --git a/examples/LJ-Ar/lennard-jones-ar.jl b/examples/LJ-Ar/lennard-jones-ar.jl index c0b509df..7e5b52ec 100644 --- a/examples/LJ-Ar/lennard-jones-ar.jl +++ b/examples/LJ-Ar/lennard-jones-ar.jl @@ -1,6 +1,6 @@ # # Load Ar dataset with energies computed by Lennard-Jones and postprocess -# ## Load packages and define paths. +# ## a. Load packages and define paths. # Load packages. using Unitful, UnitfulAtomic @@ -11,11 +11,11 @@ using LinearAlgebra, Plots, DisplayAs path = joinpath(dirname(pathof(PotentialLearning)), "../examples/LJ-Ar") ds_path = "$path/../data/LJ-AR/lj-ar.yaml"; -# ## Load atomistic dataset. +# ## b. Load atomistic dataset. ds, thermo = load_data(ds_path, YAML(:Ar, u"eV", u"Å")) ds = @views ds[2:end] # Filter first configuration (zero energy) -# ## Compute distance from origin, extract LJ energies, and define time range. +# ## c. Compute distance from origin, extract LJ energies, and define time range. # Get atom positions and compute distance from origin. systems = get_system.(ds) @@ -29,7 +29,7 @@ energies = get_values.(get_energy.(ds)) # Define time range. time_range = 0.5:0.5:5000 -# ## Post-process data. +# ## d. Post-process data. # Plot distance from origin vs time. p = plot(xlabel = "τ | ps", diff --git a/examples/PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl b/examples/PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl index 342f3a6d..43eafca9 100644 --- a/examples/PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl +++ b/examples/PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl @@ -1,6 +1,6 @@ # # Reduce ACE descriptors with PCA and fit a-HfO2 dataset -# ## Load packages, define paths, and create experiment folder. +# ## a. Load packages, define paths, and create experiment folder. # Load packages. using AtomsBase, InteratomicPotentials, PotentialLearning @@ -18,7 +18,7 @@ include("$path/../utils/utils.jl") # Create experiment folder. run(`mkdir -p $res_path`); -# ## Load atomistic dataset and split it into training and test. +# ## b. Load atomistic dataset and split it into training and test. # Load atomistic dataset: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.) ds = load_data(ds_path, uparse("eV"), uparse("Å"))[1:1000] # Only first 1K samples are used in this example. @@ -27,7 +27,7 @@ ds = load_data(ds_path, uparse("eV"), uparse("Å"))[1:1000] # Only first 1K sam n_train, n_test = 50, 50 # Only 50 samples per dataset are used in this example. conf_train, conf_test = split(ds, n_train, n_test) -# ## Create ACE basis, compute descriptors and add them to the dataset. +# ## c. Create ACE basis, compute descriptors and add them to the dataset. # Create ACE basis. basis = ACE(species = [:Hf, :O], @@ -50,13 +50,13 @@ f_descr_train = compute_force_descriptors(conf_train, basis; # Update training dataset by adding energy and force descriptors. ds_train = DataSet(conf_train .+ e_descr_train .+ f_descr_train) -# ## Dimension reduction of energy and force descriptors of training dataset. +# ## d. Dimension reduction of energy and force descriptors of training dataset. n_desc = 20 pca = PCAState(tol = n_desc) fit!(ds_train, pca) transform!(ds_train, pca) -# ## Learn ACE coefficients based on ACE descriptors and DFT data. +# ## e. Learn ACE coefficients based on ACE descriptors and DFT data. println("Learning energies and forces...") lb = LBasisPotential(basis) ws, int = [1.0, 1.0], true @@ -65,7 +65,7 @@ learn!(lb, ds_train, ws, int) @save_var res_path lb.β0 lb.β, lb.β0 -# ## Post-process output: calculate metrics, create plots, and save results. +# ## f. Post-process output: calculate metrics, create plots, and save results. # Compute ACE descriptors for energy and forces based on the atomistic test configurations. println("Computing energy descriptors of test dataset...") From 8bc12cd571cf9c99161275ce66ac767e67a803ea Mon Sep 17 00:00:00 2001 From: Emmanuel Lujan Date: Tue, 18 Jun 2024 15:13:13 -0400 Subject: [PATCH 4/4] Small fix in documentation --- examples/DPP-ACE-Na/fit-dpp-ace-na.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/DPP-ACE-Na/fit-dpp-ace-na.jl b/examples/DPP-ACE-Na/fit-dpp-ace-na.jl index 23ee68c0..db7b70b6 100644 --- a/examples/DPP-ACE-Na/fit-dpp-ace-na.jl +++ b/examples/DPP-ACE-Na/fit-dpp-ace-na.jl @@ -1,6 +1,6 @@ # # Subsample Na dataset with DPP and fit energies with ACE -# ## Load packages and define paths. +# ## a. Load packages and define paths. # Load packages. using Unitful, UnitfulAtomic @@ -11,7 +11,7 @@ using LinearAlgebra, Plots path = joinpath(dirname(pathof(PotentialLearning)), "../examples/DPP-ACE-Na") ds_path = "$path/../data/Na/liquify_sodium.yaml"; -# ## a. Load atomistic dataset and split it into training and test. +# ## b. Load atomistic dataset and split it into training and test. # Load atomistic dataset: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.). confs, thermo = load_data(ds_path, YAML(:Na, u"eV", u"Å")) @@ -20,7 +20,7 @@ confs, thermo = confs[220:end], thermo[220:end] # Split atomistic dataset into training and test. conf_train, conf_test = confs[1:1000], confs[1001:end] -# ## b. Create ACE basis, compute energy descriptors and add them to the dataset. +# ## c. Create ACE basis, compute energy descriptors and add them to the dataset. # Create ACE basis. ace = ACE(species = [:Na], # species @@ -38,7 +38,7 @@ e_descr_train = compute_local_descriptors(conf_train, ace) # JLD.load("data/sodi # Update training dataset by adding energy and force descriptors. ds_train = DataSet(conf_train .+ e_descr_train) -# ## c. Subsampling via DPP. +# ## d. Subsampling via DPP. # Create DPP subselector. dpp = kDPP(ds_train, GlobalMean(), DotProduct(); batch_size = 200) @@ -51,7 +51,7 @@ lb = LBasisPotential(ace) α = 1e-8 Σ = learn!(lb, ds_train[dpp_inds], α) -# ## d. Post-process output: calculate metrics, create plots, and save results. +# ## e. Post-process output: calculate metrics, create plots, and save results. # Update test dataset by adding energy descriptors. println("Computing local descriptors of test dataset")