Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate linear IAP hyperparameter optimization + example + documentation. #76

Merged
merged 9 commits into from
Jun 28, 2024
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@ version = "0.2.3"
[deps]
AtomsBase = "a963bdd2-2df7-4f54-a1ee-49d51e6be12a"
Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Determinantal = "2673d5e8-682c-11e9-2dfd-471b09c6c819"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
Hyperopt = "93e5fe13-2215-51db-baaf-2e9a34fb2712"
InteratomicPotentials = "a9efe35a-c65d-452d-b8a8-82646cd5cb04"
IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Expand All @@ -26,5 +28,5 @@ YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"

[compat]
julia = ">= 1.10"
InteratomicPotentials = ">= 0.2.9"
julia = ">= 1.10"
1 change: 1 addition & 0 deletions docs/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ DisplayAs = "0b91fe84-8a4c-11e9-3e1d-67c38462b6d6"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244"
Hyperopt = "93e5fe13-2215-51db-baaf-2e9a34fb2712"
InteratomicPotentials = "a9efe35a-c65d-452d-b8a8-82646cd5cb04"
InvertedIndices = "41ab1584-1d38-5bbf-9106-f11c6c58b48f"
IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
Expand Down
7 changes: 7 additions & 0 deletions docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ examples = [
]
ss_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)

# Optimization examples
examples = [
"1 - Optimize ACE hyper-parameters: minimize force time and fitting error" => "Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl",
]
opt_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)

# Dimension reduction examples
examples = [
"1 - Reduce ACE descriptors with PCA and fit a-HfO2 dataset" => "PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl",
Expand All @@ -72,6 +78,7 @@ makedocs(
"How to run the examples" => "how-to-run-the-examples.md",
"Basic examples" => basic_examples,
"Optimize atomistic data via intelligent subsampling" => ss_examples,
"Optimize interatomic potential models via hyper-paramter optimization" => opt_examples,
"Optimize interatomic potential models via dimension reduction" => dr_examples,
"API" => "api.md"],
format = Documenter.HTML(;
Expand Down
16 changes: 16 additions & 0 deletions examples/Opt-ACE-aHfO2/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[deps]
AtomsBase = "a963bdd2-2df7-4f54-a1ee-49d51e6be12a"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
DisplayAs = "0b91fe84-8a4c-11e9-3e1d-67c38462b6d6"
Hyperopt = "93e5fe13-2215-51db-baaf-2e9a34fb2712"
InteratomicPotentials = "a9efe35a-c65d-452d-b8a8-82646cd5cb04"
IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
PotentialLearning = "82b0a93c-c2e3-44bc-a418-f0f89b0ae5c2"
ProgressBars = "49802e3a-d2f1-5c88-81d8-b72133a6f568"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d"
UnitfulAtomic = "a7773ee8-282e-5fa2-be4e-bd808c38a91a"
94 changes: 94 additions & 0 deletions examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# # Optimize ACE hyper-parameters: minimize force time and fitting error.

# ## a. Load packages, define paths, and create experiment folder.

# Load packages.
using AtomsBase, InteratomicPotentials, PotentialLearning
using Unitful, UnitfulAtomic
using LinearAlgebra, Random, DisplayAs
using DataFrames, Hyperopt

# Define paths.
path = joinpath(dirname(pathof(PotentialLearning)), "../examples/Opt-ACE-aHfO2")
ds_path = "$path/../data/a-HfO2/a-HfO2-300K-NVT-6000.extxyz"
res_path = "$path/results/";

# Load utility functions.
include("$path/../utils/utils.jl")

# Create experiment folder.
run(`mkdir -p $res_path`);

# ## b. Load atomistic dataset and split it into training and test.

# Load atomistic dataset: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.)
ds = load_data(ds_path, uparse("eV"), uparse("Å"))[1:1000]

# Split atomistic dataset into training and test
n_train, n_test = 50, 50 # Only 50 samples per dataset are used in this example.
conf_train, conf_test = split(ds, n_train, n_test)


# ## c. Hyper-parameter optimization.

# Define a custom loss function. Here, we minimize fitting error and force calculation time.
# Possible metrics are `e_mae`, `e_rmse`, `e_rsq`, `f_mae`, `f_rmse`, `f_rsq`, and `time_us`.
function custom_loss(
metrics::OrderedDict
)
e_mae = metrics[:e_mae]
f_mae = metrics[:f_mae]
time_us = metrics[:time_us]
e_mae_max = 0.05 # eV/atom
f_mae_max = 0.05 # eV/Å
w_e = e_mae/e_mae_max
w_f = f_mae/f_mae_max
w_t = 1.0E-3
loss = w_e * e_mae + w_f * e_mae + w_t * time_us
return loss
end;

# Define model and hyper-parameter value ranges to be optimized.
model = ACE
pars = OrderedDict( :body_order => [2, 3, 4],
:polynomial_degree => [3, 4, 5],
:rcutoff => [4.5, 5.0, 5.5],
:wL => [0.5, 1.0, 1.5],
:csp => [0.5, 1.0, 1.5],
:r0 => [0.5, 1.0, 1.5]);

# Use random sampling to find the optimal hyper-parameters.
iap, res = hyperlearn!(model, pars, conf_train;
n_samples = 10, sampler = RandomSampler(),
loss = custom_loss, ws = [1.0, 1.0], int = true);

# Save and show results.
@save_var res_path iap.β
@save_var res_path iap.β0
@save_var res_path iap.basis
@save_dataframe res_path res
res

# Plot error vs time.
err_time = plot_err_time(res)
@save_fig res_path err_time
DisplayAs.PNG(err_time)


# Alternatively, use latin hypercube sampling to find the optimal hyper-parameters.
iap, res = hyperlearn!(model, pars, conf_train;
n_samples = 3, sampler = LHSampler(),
loss = custom_loss, ws = [1.0, 1.0], int = true);

# Save and show results.
@save_var res_path iap.β
@save_var res_path iap.β0
@save_var res_path iap.basis
@save_dataframe res_path res
res

# Plot error vs time.
err_time = plot_err_time(res)
@save_fig res_path err_time
DisplayAs.PNG(err_time)

43 changes: 43 additions & 0 deletions examples/utils/plots.jl
Original file line number Diff line number Diff line change
Expand Up @@ -238,3 +238,46 @@ function plot_cos(
return p
end


"""
plot_err_time(
res
)

`res`: Dataframe with fitting error and force time information.

Returns a plot with fitting errors vs force times. Required in hyper-parameter optimization.

"""
function plot_err_time(res)
e_mae = res[!, :e_mae]
f_mae = res[!, :f_mae]
times = res[!, :time_us]
plot(times,
e_mae,
seriestype = :scatter,
alpha = 0.55,
thickness_scaling = 1.35,
markersize = 3,
markerstrokewidth = 1,
markerstrokecolor = :black,
markershape = :circle,
markercolor = :gray,
label = "MAE(E_Pred, E_DFT) | eV/atom")
plot!(times,
f_mae,
seriestype = :scatter,
alpha = 0.55,
thickness_scaling = 1.35,
markersize = 3,
markerstrokewidth = 1,
markerstrokecolor = :red,
markershape = :utriangle,
markercolor = :red2,
label = "MAE(F_Pred, F_DFT) | eV/Å")
plot!(dpi = 300,
label = "",
xlabel = "Time per force per atom | µs",
ylabel = "MAE")
end

1 change: 1 addition & 0 deletions src/Data/data.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ export ConfigurationDataSet,
get_system,
get_positions,
get_energy,
get_species,
get_descriptors,
get_forces,
get_force_descriptors,
Expand Down
14 changes: 14 additions & 0 deletions src/Data/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -200,3 +200,17 @@ function compute_force_descriptors(
return f_des
end

# Get species from a dataset ###################################################
"""
function get_species(
ds::DataSet
)

Get species from a dataset.
"""
function get_species(
ds::DataSet
)
return unique(vcat(unique.(atomic_symbol.(get_system.(ds)))...))
end

4 changes: 4 additions & 0 deletions src/HyperLearning/hyperlearning.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
export hyperlearn!

include("linear-hyperlearn.jl")
include("utils.jl")
112 changes: 112 additions & 0 deletions src/HyperLearning/linear-hyperlearn.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
create_ho(x) = Hyperoptimizer(1)

"""
function hyperloss(
metrics::OrderedDict:
w_e = 1.0,
w_f = 1.0,
w_t = 1.0E-3,
e_mae_max = 0.05,
f_mae_max = 0.05
)

`metrics`: OrderedDict object with metrics of the fitting process.
- Mean absolute error of energies: e_mae.
- Mean absolute error of forces: f_mae.
- Time per force per atom: time_us.
`w_e`: energy weight.
`w_f`: force weight.
`w_t`: time weight.
`e_mae_max`: maximum mean absolute error for energies.
`f_mae_max`: maximum mean absolute error for forces.

Loss function for hyper-parameter optimization: minimizes fitting error and time.
"""
function hyperloss(
metrics::OrderedDict;
w_e = 1.0,
w_f = 1.0,
w_t = 1.0E-3,
e_mae_max = 0.05,
f_mae_max = 0.05
)
e_mae = metrics[:e_mae]
f_mae = metrics[:f_mae]
time_us = metrics[:time_us]
w_e = w_e * e_mae/e_mae_max
w_f = w_f * f_mae/f_mae_max
loss = w_e * e_mae + w_f * e_mae + w_t * time_us
return loss
end;

"""
function hyperlearn!(
model::DataType,
pars::OrderedDict,
conf_train::DataSet;
n_samples = 5,
sampler = RandomSampler(),
loss = loss,
ws = [1.0, 1.0],
int = true
)

Hyper-parameter optimization of linear interatomic potentials.
"""
function hyperlearn!(
model::DataType,
pars::OrderedDict,
conf_train::DataSet;
n_samples = 5,
sampler = RandomSampler(),
loss = loss,
ws = [1.0, 1.0],
int = true
)
s = "create_ho(sampler) = Hyperoptimizer($n_samples, sampler, " *
join("$k = $v, " for (k, v) in pars) * ")"
eval(Meta.parse(s))
ho = Base.invokelatest(create_ho, sampler)
if (ho.sampler isa LHSampler) || (ho.sampler isa CLHSampler)
Hyperopt.init!(ho.sampler, ho)
end
species = get_species(conf_train)
for (i, state...) in ho
basis = model(; species = species, state...)
iap = LBasisPotential(basis)
## Compute energy and force descriptors
e_descr_new = compute_local_descriptors(conf_train, iap.basis, pbar = false)
f_descr_new = compute_force_descriptors(conf_train, iap.basis, pbar = false)
ds_cur = DataSet(conf_train .+ e_descr_new .+ f_descr_new)
## Learn
learn!(iap, ds_cur, ws, int)
## Get true and predicted values
e, e_pred = get_all_energies(ds_cur), get_all_energies(ds_cur, iap)
f, f_pred = get_all_forces(ds_cur), get_all_forces(ds_cur, iap)
## Compute metrics
e_mae, e_rmse, e_rsq = calc_metrics(e_pred, e)
f_mae, f_rmse, f_rsq = calc_metrics(f_pred, f)
time_us = estimate_time(conf_train, iap) * 10^6
metrics = OrderedDict( :e_mae => e_mae,
:e_rmse => e_rmse,
:e_rsq => e_rsq,
:f_mae => f_mae,
:f_rmse => f_rmse,
:f_rsq => f_rsq,
:time_us => time_us)
## Compute multi-objetive loss based on error and time
l = hyperloss(metrics)
## Print results
print("E_MAE:$(round(e_mae; digits=3)) eV/atom, ")
print("F_MAE:$(round(f_mae; digits=3)) eV/Å, ")
println("Time per force per atom:$(round(time_us; digits=3)) µs")
flush(stdout)
## Return loss
push!(ho.history, [v for v in state])
push!(ho.results, (l, metrics, iap))
end
iap = ho.minimum[3]
res = get_results(ho)
return iap, res
end

28 changes: 28 additions & 0 deletions src/HyperLearning/utils.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@

# Estimate force calculation time
function estimate_time(confs, iap; batch_size = 50)
if length(confs) < batch_size
batch_size = length(confs)
end
random_selector = RandomSelector(length(confs), batch_size)
inds = PotentialLearning.get_random_subset(random_selector)
time = @elapsed begin
f_descr = compute_force_descriptors(confs[inds],
iap.basis,
pbar = false)
ds = DataSet(confs[inds] .+ f_descr)
f_pred = get_all_forces(ds, iap)
end
n_atoms = sum(length(get_system(c)) for c in confs[inds])
return time / n_atoms
end

# Get results from the hyperoptimizer
function get_results(ho)
column_names = string.(vcat(keys(ho.results[1][2])..., ho.params...))
rows = [[values(r[2])..., p...] for (r, p) in zip(ho.results, ho.history)]
results = DataFrame([Any[] for _ in 1:length(column_names)], column_names)
[push!(results, r) for r in rows]
return sort!(results)
end

5 changes: 5 additions & 0 deletions src/PotentialLearning.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ using Unitful, UnitfulAtomic
using LinearAlgebra, Statistics, Random, Distributions
using StaticArrays
using OrderedCollections
using DataFrames
using Hyperopt
using Flux
using Zygote
using Optimization
Expand Down Expand Up @@ -33,6 +35,9 @@ include("DimensionReduction/dimension_reduction.jl")
# Learning problems
include("Learning/learning.jl")

# Learning problems
include("HyperLearning/hyperlearning.jl")

# Metrics
include("Metrics/metrics.jl")

Expand Down
Loading
Loading