Skip to content

Commit

Permalink
Merge cc6624f into 1e7fae5
Browse files Browse the repository at this point in the history
  • Loading branch information
emmanuellujan authored Jul 14, 2024
2 parents 1e7fae5 + cc6624f commit bc1ec36
Show file tree
Hide file tree
Showing 8 changed files with 101 additions and 97 deletions.
15 changes: 5 additions & 10 deletions docs/src/install-and-run-examples.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Install package and run examples
## Install and run examples

## Add registries

## Add registries and package
Open a Julia REPL (`$ julia`), type `]` to enter the Pkg REPL, and add the following registries:
```julia
pkg> registry add https://github.com/JuliaRegistries/General
Expand All @@ -9,16 +10,10 @@ Open a Julia REPL (`$ julia`), type `]` to enter the Pkg REPL, and add the follo
pkg> registry add https://github.com/ACEsuit/ACEregistry
```

Then, add PotentialLearning:
```julia
pkg> add PotentialLearning

```

## Clone repository and access an example folder
## Clone repository to access example folders
Clone `PotentialLearning.jl` repository in your working directory.
```shell
$ git clone git@github.com:cesmix-mit/PotentialLearning.jl.git
$ git clone https://github.com/cesmix-mit/PotentialLearning.jl.git
```
Access to any folder within `PotentialLearning.jl/examples`. E.g.
```shell
Expand Down
29 changes: 15 additions & 14 deletions examples/ACE-aHfO2/fit-ace-ahfo2.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# # Fit a-HfO2 dataset with ACE

# ## a. Load packages, define paths, and create experiment folder.
# ## Setup experiment

# Load packages.
using AtomsBase, InteratomicPotentials, PotentialLearning
Expand All @@ -13,24 +13,23 @@ ds_path = "$base_path/examples/data/a-HfO2/a-HfO2-300K-NVT-6000.extxyz"
res_path = "$base_path/examples/ACE-aHfO2/results/";

# Load utility functions.

include("$base_path/examples/utils/utils.jl")

# Create experiment folder.
run(`mkdir -p $res_path`);

# ## b. Load atomistic dataset and split it into training and test.
# ## Load datasets

# Load atomistic dataset: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.)
ds = load_data(ds_path, uparse("eV"), uparse(""))[1:1000] # Only the first 1K samples are used in this example.
ds = load_data(ds_path, uparse("eV"), uparse(""))[1:1000]; # Load first 1K samples.

# Split atomistic dataset into training and test
# Split atomistic dataset into training and test.
n_train, n_test = 50, 50 # Only 50 samples per dataset are used in this example.
conf_train, conf_test = split(ds, n_train, n_test)

# ## c. Create ACE basis, compute descriptors and add them to the dataset.
# ## Compute descriptors

# Create ACE basis
# Create and save ACE basis.
basis = ACE(species = [:Hf, :O],
body_order = 3,
polynomial_degree = 4,
Expand All @@ -49,9 +48,11 @@ f_descr_train = compute_force_descriptors(conf_train, basis;
pbar=false)

# Update training dataset by adding energy and force descriptors.
ds_train = DataSet(conf_train .+ e_descr_train .+ f_descr_train)
ds_train = DataSet(conf_train .+ e_descr_train .+ f_descr_train);

# ## Learn coefficients

# ## d. Learn ACE coefficients based on ACE descriptors and DFT data.
# Learn and save ACE coefficients based on ACE descriptors and DFT data.
println("Learning energies and forces...")
lb = LBasisPotential(basis)
ws, int = [1.0, 1.0], false
Expand All @@ -60,7 +61,7 @@ learn!(lb, ds_train, ws, int)
@save_var res_path lb.β0
lb.β, lb.β0

# ## e. Post-process output: calculate metrics, create plots, and save results.
# ## Post-process results

# Compute ACE descriptors for energy and forces based on the atomistic test configurations.
println("Computing energy descriptors of test dataset...")
Expand All @@ -71,9 +72,9 @@ f_descr_test = compute_force_descriptors(conf_test, basis;
pbar = false);

# Update test dataset by adding energy and force descriptors.
ds_test = DataSet(conf_test .+ e_descr_test .+ f_descr_test)
ds_test = DataSet(conf_test .+ e_descr_test .+ f_descr_test);

# Get true and predicted values for energies and forces.
# Get and save true and predicted values for energies and forces.
n_atoms_train = length.(get_system.(ds_train))
n_atoms_test = length.(get_system.(ds_test))

Expand All @@ -95,7 +96,7 @@ f_test, f_test_pred = get_all_forces(ds_test),
@save_var res_path f_test
@save_var res_path f_test_pred;

# Compute training metrics.
# Compute and save training metrics.
e_train_metrics = get_metrics(e_train, e_train_pred,
metrics = [mae, rmse, rsq],
label = "e_train")
Expand All @@ -106,7 +107,7 @@ train_metrics = merge(e_train_metrics, f_train_metrics)
@save_dict res_path train_metrics
train_metrics

# Compute test metrics.
# Compute and save test metrics.
e_test_metrics = get_metrics(e_test, e_test_pred,
metrics = [mae, rmse, rsq],
label = "e_test")
Expand Down
22 changes: 12 additions & 10 deletions examples/DPP-ACE-Na/fit-dpp-ace-na.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# # Subsample Na dataset with DPP and fit energies with ACE

# ## a. Load packages and define paths.
# ## Setup experiment

# Load packages.
using Unitful, UnitfulAtomic
Expand All @@ -11,7 +11,7 @@ using LinearAlgebra, Plots
base_path = haskey(ENV, "BASE_PATH") ? ENV["BASE_PATH"] : "../../"
ds_path = "$base_path/examples/data/Na/liquify_sodium.yaml"

# ## b. Load atomistic dataset and split it into training and test.
# ## Load datasets

# Load atomistic dataset: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.).
confs, thermo = load_data(ds_path, YAML(:Na, u"eV", u""))
Expand All @@ -20,7 +20,7 @@ confs, thermo = confs[220:end], thermo[220:end]
# Split atomistic dataset into training and test.
conf_train, conf_test = confs[1:1000], confs[1001:end]

# ## c. Create ACE basis, compute energy descriptors and add them to the dataset.
# ## Compute descriptors

# Create ACE basis.
ace = ACE(species = [:Na], # species
Expand All @@ -29,39 +29,41 @@ ace = ACE(species = [:Na], # species
wL = 1.0, # Defaults, See ACE.jl documentation
csp = 1.0, # Defaults, See ACE.jl documentation
r0 = 1.0, # minimum distance between atoms
rcutoff = 5.0) # cutoff radius
rcutoff = 5.0); # cutoff radius

# Update training dataset by adding energy (local) descriptors.
println("Computing local descriptors of training dataset")
e_descr_train = compute_local_descriptors(conf_train, ace) # JLD.load("data/sodium_empirical_full.jld", "descriptors")

# Update training dataset by adding energy and force descriptors.
ds_train = DataSet(conf_train .+ e_descr_train)
ds_train = DataSet(conf_train .+ e_descr_train);

# ## d. Subsampling via DPP.
# ## Subsample dataset

# Create DPP subselector.
dpp = kDPP(ds_train, GlobalMean(), DotProduct(); batch_size = 200)

# Subsample trainig dataset.
dpp_inds = get_random_subset(dpp)

# ## e. Learn ACE coefficients based on ACE descriptors and DFT data.
# ## e. Learn coefficients

# Learn ACE coefficients based on ACE descriptors and DFT data.
lb = LBasisPotential(ace)
α = 1e-8
Σ = learn!(lb, ds_train[dpp_inds], α)

# ## f. Post-process output: calculate metrics, create plots, and save results.
# ## Post-process results

# Update test dataset by adding energy descriptors.
println("Computing local descriptors of test dataset")
e_descr_test = compute_local_descriptors(conf_test, ace)
ds_test = DataSet(conf_test .+ e_descr_test)
ds_test = DataSet(conf_test .+ e_descr_test);

# Get true and predicted energy values (assuming that all configurations have the same no. of atoms).
n = size(get_system(ds_train[1]))[1]
e_train, e_train_pred = get_all_energies(ds_train)/n, get_all_energies(ds_train, lb)/n
e_test, e_test_pred = get_all_energies(ds_test)/n, get_all_energies(ds_test, lb)/n
e_test, e_test_pred = get_all_energies(ds_test)/n, get_all_energies(ds_test, lb)/n;

# Compute and print metrics.
e_mae, e_rmse, e_rsq = calc_metrics(e_train, e_train_pred)
Expand Down
26 changes: 14 additions & 12 deletions examples/DPP-ACE-Si/fit-dpp-ace-si.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# # Subsample Si dataset and fit with ACE

# ## a. Load packages, define paths, and create experiment folder.
# ## Setup experiment

# Load packages.
using LinearAlgebra, Random, InvertedIndices
Expand All @@ -10,17 +10,17 @@ using AtomsBase, InteratomicPotentials, PotentialLearning
using CSV, JLD, DataFrames

# Define atomic type information.
elname, elspec = "Si", [:Si]
elname, elspec = "Si", [:Si];

# Define paths.
base_path = haskey(ENV, "BASE_PATH") ? ENV["BASE_PATH"] : "../../"
inpath = "$base_path/examples/data/Si-3Body-LAMMPS/"
outpath = "$base_path/examples/DPP-ACE-Si/output/$elname/";
outpath = "$base_path/examples/DPP-ACE-Si/output/$elname/"

# Load utility functions.
include("$base_path/examples/DPP-ACE-Si/subsampling_utils.jl")
include("$base_path/examples/DPP-ACE-Si/subsampling_utils.jl");

# ## b. Load atomistic datasets.
# ## Load datasets

# Load all atomistic datasets: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.)
file_arr = readext(inpath, "xyz")
Expand All @@ -37,7 +37,7 @@ for k = 1:nfile
n += length(confs_arr[k])
end

# ## c. Subsampling by DPP.
# ## Subsample dataset

# Create ACE basis.
nbody = 4
Expand All @@ -48,9 +48,9 @@ ace = ACE(species = elspec, # species
wL = 1.0, # Defaults, See ACE.jl documentation
csp = 1.0, # Defaults, See ACE.jl documentation
r0 = 1.0, # minimum distance between atoms
rcutoff = 10.0)
rcutoff = 10.0);

# Compute ACE descriptors for energies and forces.
# Compute and save ACE descriptors for energies and forces.
println("Computing local descriptors")
e_descr = compute_local_descriptors(confs, ace; pbar=false)
f_descr = compute_force_descriptors(confs, ace; pbar=false)
Expand All @@ -59,9 +59,11 @@ JLD.save(outpath*"$(elname)_force_descriptors.jld", "f_descr", f_descr)

# Update training dataset by adding energy and force descriptors.
ds = DataSet(confs .+ e_descr .+ f_descr)
ndata = length(ds)
ndata = length(ds);

# ## d. Compute cross validation error from training dataset.
# ## Post-process results

# Compute cross validation error from training dataset.
batch_size = [80, 40]
sel_ind = Dict{Int64, Vector}()
cond_num = Dict{Int64, Vector}()
Expand All @@ -73,6 +75,6 @@ for bs in batch_size
end

JLD.save(outpath*"$(elname)_ACE-$(nbody)-$(deg)_DPP_indices_and_condnum.jld",
"ind", sel_ind,
"condnum", cond_num)
"ind", sel_ind,
"condnum", cond_num)

47 changes: 24 additions & 23 deletions examples/DPP-ACE-aHfO2-1/fit-dpp-ace-ahfo2.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# # Subsample a-HfO2 dataset and fit with ACE

# ## a. Load packages, define paths, and create experiment folder.
# ## Setup experiment

# Load packages.
using AtomsBase, InteratomicPotentials, PotentialLearning
Expand All @@ -13,24 +13,23 @@ ds_path = "$base_path/examples/data/a-HfO2/a-HfO2-300K-NVT-6000.extxyz"
res_path = "$base_path/examples/DPP-ACE-aHfO2-1/results/";

# Load utility functions.
include("$base_path/examples/utils/utils.jl")
include("$base_path/examples/utils/utils.jl");

# Create experiment folder.
run(`mkdir -p $res_path`);

# ## b. Load atomistic dataset and split it into training and test.
# ## Load datasets

# Load atomistic dataset: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.)
ds = load_data(ds_path, uparse("eV"), uparse(""))
ds = load_data(ds_path, uparse("eV"), uparse(""))[1:1000]; # Load first 1K samples.

# Split atomistic dataset into training and test
n_train, n_test = 100, 50 # Few samples per dataset are used in this example.
conf_train, conf_test = split(ds[1:1000], n_train, n_test)
conf_train, conf_test = split(ds, n_train, n_test)

# ## Subsample dataset

# ## c. Subsampling

# Compute ACE descriptors for energies as subsampling input.
# Compute ACE descriptors for energies to be used as subsampling input.
basis = ACE(species = [:Hf, :O],
body_order = 2,
polynomial_degree = 3,
Expand All @@ -40,25 +39,25 @@ basis = ACE(species = [:Hf, :O],
r0 = 1.0)
e_descr = compute_local_descriptors(conf_train,
basis,
pbar = false)
pbar = false);

# Update subsampling dataset
conf_train_kDPP = DataSet(conf_train .+ e_descr)
# Update subsampling dataset.
conf_train_kDPP = DataSet(conf_train .+ e_descr);

# Create DPP subselector
# Create DPP subselector.
dataset_selector = kDPP( conf_train_kDPP,
GlobalMean(),
DotProduct();
batch_size = 50)

# Subsample trainig dataset
# Subsample trainig dataset.
inds = get_random_subset(dataset_selector)
conf_train = @views conf_train[inds]
conf_train = @views conf_train[inds];


# ## d. Create ACE basis, compute descriptors and add them to the dataset.
# ## Compute descriptors

# Create ACE basis
# Create ACE basis.
basis = ACE(species = [:Hf, :O],
body_order = 3,
polynomial_degree = 4,
Expand All @@ -77,9 +76,11 @@ f_descr_train = compute_force_descriptors(conf_train, basis;
pbar=false)

# Update training dataset by adding energy and force descriptors.
ds_train = DataSet(conf_train .+ e_descr_train .+ f_descr_train)
ds_train = DataSet(conf_train .+ e_descr_train .+ f_descr_train);

# ## Learn coefficients

# ## e. Learn ACE coefficients based on ACE descriptors and DFT data.
# Learn ACE coefficients based on ACE descriptors and DFT data.
println("Learning energies and forces...")
lb = LBasisPotential(basis)
ws, int = [1.0, 1.0], false
Expand All @@ -88,7 +89,7 @@ learn!(lb, ds_train, ws, int)
@save_var res_path lb.β0
lb.β, lb.β0

# ## f. Post-process output: calculate metrics, create plots, and save results.
# ## Post-process results

# Compute ACE descriptors for energy and forces based on the atomistic test configurations.
println("Computing energy descriptors of test dataset...")
Expand All @@ -99,9 +100,9 @@ f_descr_test = compute_force_descriptors(conf_test, basis;
pbar = false);

# Update test dataset by adding energy and force descriptors.
ds_test = DataSet(conf_test .+ e_descr_test .+ f_descr_test)
ds_test = DataSet(conf_test .+ e_descr_test .+ f_descr_test);

# Get true and predicted values for energies and forces.
# Get and save true and predicted values for energies and forces.
n_atoms_train = length.(get_system.(ds_train))
n_atoms_test = length.(get_system.(ds_test))

Expand All @@ -123,7 +124,7 @@ f_test, f_test_pred = get_all_forces(ds_test),
@save_var res_path f_test
@save_var res_path f_test_pred;

# Compute training metrics.
# Compute and save training metrics.
e_train_metrics = get_metrics(e_train, e_train_pred,
metrics = [mae, rmse, rsq],
label = "e_train")
Expand All @@ -134,7 +135,7 @@ train_metrics = merge(e_train_metrics, f_train_metrics)
@save_dict res_path train_metrics
train_metrics

# Compute test metrics.
# Compute and save test metrics.
e_test_metrics = get_metrics(e_test, e_test_pred,
metrics = [mae, rmse, rsq],
label = "e_test")
Expand Down
Loading

0 comments on commit bc1ec36

Please sign in to comment.