Skip to content

Commit

Permalink
Merge pull request #76 from cesmix-mit/opt-ace-example-doc
Browse files Browse the repository at this point in the history
IAP hyperparameter optimization example + documentation.
  • Loading branch information
emmanuellujan authored Jun 28, 2024
2 parents d16e181 + 1086cd1 commit abfca33
Show file tree
Hide file tree
Showing 13 changed files with 328 additions and 25 deletions.
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@ version = "0.2.3"
[deps]
AtomsBase = "a963bdd2-2df7-4f54-a1ee-49d51e6be12a"
Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Determinantal = "2673d5e8-682c-11e9-2dfd-471b09c6c819"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
Hyperopt = "93e5fe13-2215-51db-baaf-2e9a34fb2712"
InteratomicPotentials = "a9efe35a-c65d-452d-b8a8-82646cd5cb04"
IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Expand All @@ -26,5 +28,5 @@ YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"

[compat]
julia = ">= 1.10"
InteratomicPotentials = ">= 0.2.9"
julia = ">= 1.10"
1 change: 1 addition & 0 deletions docs/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ DisplayAs = "0b91fe84-8a4c-11e9-3e1d-67c38462b6d6"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244"
Hyperopt = "93e5fe13-2215-51db-baaf-2e9a34fb2712"
InteratomicPotentials = "a9efe35a-c65d-452d-b8a8-82646cd5cb04"
InvertedIndices = "41ab1584-1d38-5bbf-9106-f11c6c58b48f"
IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
Expand Down
7 changes: 7 additions & 0 deletions docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ examples = [
]
ss_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)

# Optimization examples
examples = [
"1 - Optimize ACE hyper-parameters: minimize force time and fitting error" => "Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl",
]
opt_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)

# Dimension reduction examples
examples = [
"1 - Reduce ACE descriptors with PCA and fit a-HfO2 dataset" => "PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl",
Expand All @@ -72,6 +78,7 @@ makedocs(
"How to run the examples" => "how-to-run-the-examples.md",
"Basic examples" => basic_examples,
"Optimize atomistic data via intelligent subsampling" => ss_examples,
"Optimize interatomic potential models via hyper-paramter optimization" => opt_examples,
"Optimize interatomic potential models via dimension reduction" => dr_examples,
"API" => "api.md"],
format = Documenter.HTML(;
Expand Down
16 changes: 16 additions & 0 deletions examples/Opt-ACE-aHfO2/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[deps]
AtomsBase = "a963bdd2-2df7-4f54-a1ee-49d51e6be12a"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
DisplayAs = "0b91fe84-8a4c-11e9-3e1d-67c38462b6d6"
Hyperopt = "93e5fe13-2215-51db-baaf-2e9a34fb2712"
InteratomicPotentials = "a9efe35a-c65d-452d-b8a8-82646cd5cb04"
IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
PotentialLearning = "82b0a93c-c2e3-44bc-a418-f0f89b0ae5c2"
ProgressBars = "49802e3a-d2f1-5c88-81d8-b72133a6f568"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d"
UnitfulAtomic = "a7773ee8-282e-5fa2-be4e-bd808c38a91a"
94 changes: 94 additions & 0 deletions examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# # Optimize ACE hyper-parameters: minimize force time and fitting error.

# ## a. Load packages, define paths, and create experiment folder.

# Load packages.
using AtomsBase, InteratomicPotentials, PotentialLearning
using Unitful, UnitfulAtomic
using LinearAlgebra, Random, DisplayAs
using DataFrames, Hyperopt

# Define paths.
path = joinpath(dirname(pathof(PotentialLearning)), "../examples/Opt-ACE-aHfO2")
ds_path = "$path/../data/a-HfO2/a-HfO2-300K-NVT-6000.extxyz"
res_path = "$path/results/";

# Load utility functions.
include("$path/../utils/utils.jl")

# Create experiment folder.
run(`mkdir -p $res_path`);

# ## b. Load atomistic dataset and split it into training and test.

# Load atomistic dataset: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.)
ds = load_data(ds_path, uparse("eV"), uparse(""))[1:1000]

# Split atomistic dataset into training and test
n_train, n_test = 50, 50 # Only 50 samples per dataset are used in this example.
conf_train, conf_test = split(ds, n_train, n_test)


# ## c. Hyper-parameter optimization.

# Define a custom loss function. Here, we minimize fitting error and force calculation time.
# Possible metrics are `e_mae`, `e_rmse`, `e_rsq`, `f_mae`, `f_rmse`, `f_rsq`, and `time_us`.
function custom_loss(
metrics::OrderedDict
)
e_mae = metrics[:e_mae]
f_mae = metrics[:f_mae]
time_us = metrics[:time_us]
e_mae_max = 0.05 # eV/atom
f_mae_max = 0.05 # eV/Å
w_e = e_mae/e_mae_max
w_f = f_mae/f_mae_max
w_t = 1.0E-3
loss = w_e * e_mae + w_f * e_mae + w_t * time_us
return loss
end;

# Define model and hyper-parameter value ranges to be optimized.
model = ACE
pars = OrderedDict( :body_order => [2, 3, 4],
:polynomial_degree => [3, 4, 5],
:rcutoff => [4.5, 5.0, 5.5],
:wL => [0.5, 1.0, 1.5],
:csp => [0.5, 1.0, 1.5],
:r0 => [0.5, 1.0, 1.5]);

# Use random sampling to find the optimal hyper-parameters.
iap, res = hyperlearn!(model, pars, conf_train;
n_samples = 10, sampler = RandomSampler(),
loss = custom_loss, ws = [1.0, 1.0], int = true);

# Save and show results.
@save_var res_path iap.β
@save_var res_path iap.β0
@save_var res_path iap.basis
@save_dataframe res_path res
res

# Plot error vs time.
err_time = plot_err_time(res)
@save_fig res_path err_time
DisplayAs.PNG(err_time)


# Alternatively, use latin hypercube sampling to find the optimal hyper-parameters.
iap, res = hyperlearn!(model, pars, conf_train;
n_samples = 3, sampler = LHSampler(),
loss = custom_loss, ws = [1.0, 1.0], int = true);

# Save and show results.
@save_var res_path iap.β
@save_var res_path iap.β0
@save_var res_path iap.basis
@save_dataframe res_path res
res

# Plot error vs time.
err_time = plot_err_time(res)
@save_fig res_path err_time
DisplayAs.PNG(err_time)

43 changes: 43 additions & 0 deletions examples/utils/plots.jl
Original file line number Diff line number Diff line change
Expand Up @@ -238,3 +238,46 @@ function plot_cos(
return p
end


"""
plot_err_time(
res
)
`res`: Dataframe with fitting error and force time information.
Returns a plot with fitting errors vs force times. Required in hyper-parameter optimization.
"""
function plot_err_time(res)
e_mae = res[!, :e_mae]
f_mae = res[!, :f_mae]
times = res[!, :time_us]
plot(times,
e_mae,
seriestype = :scatter,
alpha = 0.55,
thickness_scaling = 1.35,
markersize = 3,
markerstrokewidth = 1,
markerstrokecolor = :black,
markershape = :circle,
markercolor = :gray,
label = "MAE(E_Pred, E_DFT) | eV/atom")
plot!(times,
f_mae,
seriestype = :scatter,
alpha = 0.55,
thickness_scaling = 1.35,
markersize = 3,
markerstrokewidth = 1,
markerstrokecolor = :red,
markershape = :utriangle,
markercolor = :red2,
label = "MAE(F_Pred, F_DFT) | eV/Å")
plot!(dpi = 300,
label = "",
xlabel = "Time per force per atom | µs",
ylabel = "MAE")
end

1 change: 1 addition & 0 deletions src/Data/data.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ export ConfigurationDataSet,
get_system,
get_positions,
get_energy,
get_species,
get_descriptors,
get_forces,
get_force_descriptors,
Expand Down
14 changes: 14 additions & 0 deletions src/Data/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -200,3 +200,17 @@ function compute_force_descriptors(
return f_des
end

# Get species from a dataset ###################################################
"""
function get_species(
ds::DataSet
)
Get species from a dataset.
"""
function get_species(
ds::DataSet
)
return unique(vcat(unique.(atomic_symbol.(get_system.(ds)))...))
end

4 changes: 4 additions & 0 deletions src/HyperLearning/hyperlearning.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
export hyperlearn!

include("linear-hyperlearn.jl")
include("utils.jl")
112 changes: 112 additions & 0 deletions src/HyperLearning/linear-hyperlearn.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
create_ho(x) = Hyperoptimizer(1)

"""
function hyperloss(
metrics::OrderedDict:
w_e = 1.0,
w_f = 1.0,
w_t = 1.0E-3,
e_mae_max = 0.05,
f_mae_max = 0.05
)
`metrics`: OrderedDict object with metrics of the fitting process.
- Mean absolute error of energies: e_mae.
- Mean absolute error of forces: f_mae.
- Time per force per atom: time_us.
`w_e`: energy weight.
`w_f`: force weight.
`w_t`: time weight.
`e_mae_max`: maximum mean absolute error for energies.
`f_mae_max`: maximum mean absolute error for forces.
Loss function for hyper-parameter optimization: minimizes fitting error and time.
"""
function hyperloss(
metrics::OrderedDict;
w_e = 1.0,
w_f = 1.0,
w_t = 1.0E-3,
e_mae_max = 0.05,
f_mae_max = 0.05
)
e_mae = metrics[:e_mae]
f_mae = metrics[:f_mae]
time_us = metrics[:time_us]
w_e = w_e * e_mae/e_mae_max
w_f = w_f * f_mae/f_mae_max
loss = w_e * e_mae + w_f * e_mae + w_t * time_us
return loss
end;

"""
function hyperlearn!(
model::DataType,
pars::OrderedDict,
conf_train::DataSet;
n_samples = 5,
sampler = RandomSampler(),
loss = loss,
ws = [1.0, 1.0],
int = true
)
Hyper-parameter optimization of linear interatomic potentials.
"""
function hyperlearn!(
model::DataType,
pars::OrderedDict,
conf_train::DataSet;
n_samples = 5,
sampler = RandomSampler(),
loss = loss,
ws = [1.0, 1.0],
int = true
)
s = "create_ho(sampler) = Hyperoptimizer($n_samples, sampler, " *
join("$k = $v, " for (k, v) in pars) * ")"
eval(Meta.parse(s))
ho = Base.invokelatest(create_ho, sampler)
if (ho.sampler isa LHSampler) || (ho.sampler isa CLHSampler)
Hyperopt.init!(ho.sampler, ho)
end
species = get_species(conf_train)
for (i, state...) in ho
basis = model(; species = species, state...)
iap = LBasisPotential(basis)
## Compute energy and force descriptors
e_descr_new = compute_local_descriptors(conf_train, iap.basis, pbar = false)
f_descr_new = compute_force_descriptors(conf_train, iap.basis, pbar = false)
ds_cur = DataSet(conf_train .+ e_descr_new .+ f_descr_new)
## Learn
learn!(iap, ds_cur, ws, int)
## Get true and predicted values
e, e_pred = get_all_energies(ds_cur), get_all_energies(ds_cur, iap)
f, f_pred = get_all_forces(ds_cur), get_all_forces(ds_cur, iap)
## Compute metrics
e_mae, e_rmse, e_rsq = calc_metrics(e_pred, e)
f_mae, f_rmse, f_rsq = calc_metrics(f_pred, f)
time_us = estimate_time(conf_train, iap) * 10^6
metrics = OrderedDict( :e_mae => e_mae,
:e_rmse => e_rmse,
:e_rsq => e_rsq,
:f_mae => f_mae,
:f_rmse => f_rmse,
:f_rsq => f_rsq,
:time_us => time_us)
## Compute multi-objetive loss based on error and time
l = hyperloss(metrics)
## Print results
print("E_MAE:$(round(e_mae; digits=3)) eV/atom, ")
print("F_MAE:$(round(f_mae; digits=3)) eV/Å, ")
println("Time per force per atom:$(round(time_us; digits=3)) µs")
flush(stdout)
## Return loss
push!(ho.history, [v for v in state])
push!(ho.results, (l, metrics, iap))
end
iap = ho.minimum[3]
res = get_results(ho)
return iap, res
end

28 changes: 28 additions & 0 deletions src/HyperLearning/utils.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@

# Estimate force calculation time
function estimate_time(confs, iap; batch_size = 50)
if length(confs) < batch_size
batch_size = length(confs)
end
random_selector = RandomSelector(length(confs), batch_size)
inds = PotentialLearning.get_random_subset(random_selector)
time = @elapsed begin
f_descr = compute_force_descriptors(confs[inds],
iap.basis,
pbar = false)
ds = DataSet(confs[inds] .+ f_descr)
f_pred = get_all_forces(ds, iap)
end
n_atoms = sum(length(get_system(c)) for c in confs[inds])
return time / n_atoms
end

# Get results from the hyperoptimizer
function get_results(ho)
column_names = string.(vcat(keys(ho.results[1][2])..., ho.params...))
rows = [[values(r[2])..., p...] for (r, p) in zip(ho.results, ho.history)]
results = DataFrame([Any[] for _ in 1:length(column_names)], column_names)
[push!(results, r) for r in rows]
return sort!(results)
end

5 changes: 5 additions & 0 deletions src/PotentialLearning.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ using Unitful, UnitfulAtomic
using LinearAlgebra, Statistics, Random, Distributions
using StaticArrays
using OrderedCollections
using DataFrames
using Hyperopt
using Flux
using Zygote
using Optimization
Expand Down Expand Up @@ -33,6 +35,9 @@ include("DimensionReduction/dimension_reduction.jl")
# Learning problems
include("Learning/learning.jl")

# Learning problems
include("HyperLearning/hyperlearning.jl")

# Metrics
include("Metrics/metrics.jl")

Expand Down
Loading

0 comments on commit abfca33

Please sign in to comment.