Skip to content

Commit

Permalink
Add parallel-subsampling changes to merge.
Browse files Browse the repository at this point in the history
  • Loading branch information
emmanuellujan committed Sep 5, 2024
2 parents 8da3e93 + 8ef7cdb commit 0a58996
Show file tree
Hide file tree
Showing 2 changed files with 194 additions and 82 deletions.
152 changes: 70 additions & 82 deletions examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ rank = MPI.Comm_rank(comm)

# Define paths.
base_path = haskey(ENV, "BASE_PATH") ? ENV["BASE_PATH"] : "../../"
ds_path = "$base_path/examples/data/Hfo_data/"
res_path = "$base_path/examples/Parallel-DPP-ACE-HfO2/results/";
ds_path = "$base_path/examples/data/Hf/"
res_path = "$base_path/examples/Parallel-DPP-ACE-HfO2/results-Hf/";

# Load utility functions.
include("$base_path/examples/utils/utils.jl")
Expand Down Expand Up @@ -107,24 +107,36 @@ end

# Load training and test configuration datasets ################################

paths = [
#"$ds_path/HfO2_figshare_form_sorted.extxyz", # ERROR: LoadError: SingularException(18)
"$ds_path/HfO2_mp550893_EOS_1D_form_sorted.extxyz", # 200, :)
"$ds_path/HfO_gas_form_sorted.extxyz", # 9377, :(
#"$ds_path/HfO2_figshare_form_sorted.extxyz", # 17.2k, :-D or out of memory
#"$ds_path/HfO2_mp352_EOS_1D_form_sorted.extxyz", # 306, :(
#"$ds_path/HfO2_mp550893_EOS_6D_form_sorted.extxyz", # 50, ...
#"$ds_path/Hf2_gas_form_sorted.extxyz", # 50, ...
#"$ds_path/Hf2_mp103_EOS_1D_form_sorted.extxyz", # 50, ...
#"$ds_path/Hf2_mp103_EOS_3D_form_sorted.extxyz", # 50, ...
#"$ds_path/Hf2_mp103_EOS_6D_form_sorted.extxyz", # 50, ...
#"$ds_path/Hf_mp100_EOS_1D_form_sorted.extxyz", # 50, ...
#"$ds_path/Hf128_MC_rattled_mp100_form_sorted.extxyz", # 50, ...
#"$ds_path/Hf128_MC_rattled_mp103_form_sorted.extxyz", # 50, ...
#"$ds_path/Hf128_MC_rattled_random_form_sorted.extxyz", # 50, ...
#"$ds_path/Hf_mp100_primitive_EOS_1D_form_sorted.extxyz", # 50, ...

]
# Dataset 1 (28k)
paths = ["$ds_path/Hf2_gas_form_sorted.extxyz",
"$ds_path/Hf2_mp103_EOS_1D_form_sorted.extxyz", # 200
"$ds_path/Hf2_mp103_EOS_3D_form_sorted.extxyz", # 9377
"$ds_path/Hf2_mp103_EOS_6D_form_sorted.extxyz", # 17.2k
"$ds_path/Hf128_MC_rattled_mp100_form_sorted.extxyz", # 306
"$ds_path/Hf128_MC_rattled_mp103_form_sorted.extxyz", # 50
"$ds_path/Hf128_MC_rattled_random_form_sorted.extxyz", # 498
"$ds_path/Hf_mp100_EOS_1D_form_sorted.extxyz", # 201
"$ds_path/Hf_mp100_primitive_EOS_1D_form_sorted.extxyz"
]

# Dataset 2
#paths = [
# "$ds_path/HfO2_figshare_form_sorted.extxyz",
# "$ds_path/HfO2_mp550893_EOS_1D_form_sorted.extxyz",
# "$ds_path/HfO_gas_form_sorted.extxyz",
# "$ds_path/HfO2_figshare_form_sorted.extxyz",
# "$ds_path/HfO2_mp352_EOS_1D_form_sorted.extxyz",
# "$ds_path/HfO2_mp550893_EOS_6D_form_sorted.extxyz",
# "$ds_path/Hf2_gas_form_sorted.extxyz",
# "$ds_path/Hf2_mp103_EOS_1D_form_sorted.extxyz",
# "$ds_path/Hf2_mp103_EOS_3D_form_sorted.extxyz",
# "$ds_path/Hf2_mp103_EOS_6D_form_sorted.extxyz",
# "$ds_path/Hf_mp100_EOS_1D_form_sorted.extxyz",
# "$ds_path/Hf128_MC_rattled_mp100_form_sorted.extxyz",
# "$ds_path/Hf128_MC_rattled_mp103_form_sorted.extxyz",
# "$ds_path/Hf128_MC_rattled_random_form_sorted.extxyz",
# "$ds_path/Hf_mp100_primitive_EOS_1D_form_sorted.extxyz",
#]

confs = []
for ds_path in paths
Expand All @@ -147,8 +159,8 @@ species = unique(vcat([atomic_symbol.(get_system(c).particles)

# Compute ACE descriptors
basis = ACE(species = species,
body_order = 4,
polynomial_degree = 5,
body_order = 8,
polynomial_degree = 8,
rcutoff = 10.0,
wL = 1.0,
csp = 1.0,
Expand All @@ -172,14 +184,13 @@ metric_names = [:exp_number, :method, :batch_size_prop, :batch_size, :time,
metrics = DataFrame([Any[] for _ in 1:length(metric_names)], metric_names)

# Subsampling experiments: subsample full dataset vs subsample dataset by chunks
n_experiments = 5 # 100
n_experiments = 100
local_exp = ceil(Int, n_experiments / size)
for nc in 1:ceil(Int, n_experiments / size)
for nc in 1:local_exp

#Assign iterations based on rank and cyclicly distribute iterations across different iterations
#check it there is left over
j = rank + size * (nc-1) + 1

#check if there is left over

if j > n_experiments
break
end
Expand All @@ -196,16 +207,13 @@ for nc in 1:ceil(Int, n_experiments / size)
ds_test_rnd = @views ds[rnd_inds_test]

# Subsampling experiments: different sample sizes
for batch_size_prop in [0.01, 0.02, 0.04, 0.08, 0.16, 0.32] #[0.05, 0.10, 0.25]
#[0.01, 0.02, 0.04, 0.08, 0.16, 0.32] #[0.05, 0.25, 0.5, 0.75, 0.95] #[0.05, 0.10, 0.20, 0.30] #[0.05, 0.25, 0.5, 0.75, 0.95]
for batch_size_prop in [0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 0.99]

# Experiment j - SRS ###############################################
println("Experiment:$j, method:SRS, batch_size_prop:$batch_size_prop")
exp_path = "$res_path/$j-SRS-bsp$batch_size_prop/"
run(`mkdir -p $exp_path`)
batch_size = floor(Int, n_train * batch_size_prop)
@show batch_size
@show batch_size_prop
sampling_time = @elapsed begin
inds = randperm(n_train)[1:batch_size]
end
Expand All @@ -220,26 +228,30 @@ for nc in 1:ceil(Int, n_experiments / size)
@save_dataframe(res_path, metrics)

# Experiment j - DPP ###############################################
println("Experiment:$j, method:DPP, batch_size_prop:$batch_size_prop")
exp_path = "$res_path/$j-DPP-bsp$batch_size_prop/"
run(`mkdir -p $exp_path`)
batch_size = floor(Int, n_train * batch_size_prop)
sampling_time = @elapsed begin
dataset_selector = kDPP( ds_train_rnd,
GlobalMean(),
DotProduct();
batch_size = batch_size)
inds = get_random_subset(dataset_selector)
try
println("Experiment:$j, method:DPP, batch_size_prop:$batch_size_prop")
exp_path = "$res_path/$j-DPP-bsp$batch_size_prop/"
run(`mkdir -p $exp_path`)
batch_size = floor(Int, n_train * batch_size_prop)
sampling_time = @elapsed begin
dataset_selector = kDPP(ds_train_rnd,
GlobalMean(),
DotProduct();
batch_size = batch_size)
inds = get_random_subset(dataset_selector)
end
metrics_j = fit(exp_path, (@views ds_train_rnd[inds]), ds_test_rnd, basis)
metrics_j = merge(OrderedDict("exp_number" => j,
"method" => "DPP",
"batch_size_prop" => batch_size_prop,
"batch_size" => batch_size,
"time" => sampling_time),
merge(metrics_j...))
push!(metrics, metrics_j)
@save_dataframe(res_path, metrics)
catch e # Catch error from excessive matrix allocation.
println(e)
end
metrics_j = fit(exp_path, (@views ds_train_rnd[inds]), ds_test_rnd, basis)
metrics_j = merge(OrderedDict("exp_number" => j,
"method" => "DPP",
"batch_size_prop" => batch_size_prop,
"batch_size" => batch_size,
"time" => sampling_time),
merge(metrics_j...))
push!(metrics, metrics_j)
@save_dataframe(res_path, metrics)

# Experiment j - DPP′ using n_chunks ##############################
for n_chunks in [2, 4, 8]
Expand All @@ -255,11 +267,12 @@ for nc in 1:ceil(Int, n_experiments / size)

#sampling_time = @elapsed @threads for i in 1:n_threads
sampling_time = @elapsed for i in 1:n_chunks
a, b = 1 + (i-1) * n_chunk, i * n_chunk
dataset_selector = kDPP( ds_train_rnd[a:b],
GlobalMean(),
DotProduct();
batch_size = batch_size_chunk)
a, b = 1 + (i-1) * n_chunk, i * n_chunk + 1
b = norm(b-n_train)<n_chunk ? n_train : b
dataset_selector = kDPP(@views(ds_train_rnd[a:b]),
GlobalMean(),
DotProduct();
batch_size = batch_size_chunk)
inds_i = get_random_subset(dataset_selector)
append!(inds, inds_i .+ (a .- 1))
end
Expand All @@ -271,38 +284,13 @@ for nc in 1:ceil(Int, n_experiments / size)
"time" => sampling_time),
merge(metrics_j...))
push!(metrics, metrics_j)
#@save_dataframe(res_path, metrics)
CSV.write("metrics_$j.csv", metrics)
@save_dataframe(res_path, metrics)
end
GC.gc()
end
end

# Postprocess ##################################################################

for metric in [:e_train_mae, :f_train_mae, :e_test_mae, :f_test_mae, :time]
scatter()
for method in reverse(unique(metrics[:, :method])[1:end])
batch_size_vals = metrics[metrics.method .== method, :][:, :batch_size]
metric_vals = metrics[metrics.method .== method, :][:, metric]
scatter!(batch_size_vals, metric_vals, label = method,
alpha = 0.5, dpi=300, markerstrokewidth=0, markersize=5, xaxis=:log2,
xlabel = "Sample size",
ylabel = "$metric")
end
savefig("$res_path/$metric-srs.png")
end

scatter()
for method in reverse(unique(metrics[:, :method])[2:end])
batch_size_vals = metrics[metrics.method .== method, :][:, :batch_size]
speedup_vals = metrics[metrics.method .== "DPP", :][:, :time] ./
metrics[metrics.method .== method, :][:, :time]
scatter!(batch_size_vals, speedup_vals, label = "DPP time / $method time",
alpha = 0.5, dpi=300, markerstrokewidth=0, markersize=5, xaxis=:log2,
xlabel = "Sample size",
ylabel = "Speedup")
end
savefig("$res_path/speedup-srs.png")

include("$base_path/examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl")

124 changes: 124 additions & 0 deletions examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
using DataFrames, CSV, Statistics, Plots

metrics = CSV.read("$res_path/metrics.csv", DataFrame)

methods = reverse(unique(metrics.method))
batch_sizes = unique(metrics.batch_size)
batch_size_prop = unique(metrics.batch_size_prop)
xticks_label = ("$b\n$(p*100)%" for (b, p) in zip(batch_sizes, batch_size_prop))
colors = palette(:tab10)
metrics_cols = [:e_train_mae, :f_train_mae, :e_test_mae, :f_test_mae, :time]
metric_labels = ["E MAE | eV/atom",
"F MAE | eV/Å",
"E MAE | eV/atom",
"F MAE | eV/Å",
"Time | s"]
for (i, metric) in enumerate(metrics_cols)
plot()
for (j, method) in enumerate(methods)
metric_means = []; metric_se = []
for batch_size in batch_sizes
ms = metrics[ metrics.method .== method .&&
metrics.batch_size .== batch_size , metric]
m = mean(ms)
se = stdm(ms, m) / sqrt(length(ms)) # standard error
push!(metric_means, m)
push!(metric_se, se)
end
plot!(batch_sizes,
metric_means,
ribbon = metric_se,
color = colors[j],
fillalpha=.1,
label=method)
plot!(batch_sizes,
metric_means,
seriestype = :scatter,
thickness_scaling = 1.35,
markersize = 3,
markerstrokewidth = 0,
markerstrokecolor = :black,
markercolor = colors[j],
label="")
max = metric == :time ? 4000 : 1
min = metric == :time ? -100 : minimum(metric_means) * 0.99
plot!(dpi = 300,
label = "",
xscale=:log2,
xticks = (batch_sizes, xticks_label),
ylim=(min, max),
xlabel = "Training Dataset Sample Size",
ylabel = metric_labels[i])
end
savefig("$res_path/$metric.png")
end


# xformatter = :scientific,
# markershape = :circle,
# markercolor = :gray
# yerror=metric_std,
#ribbon=metric_std,
#yerror=metric_std,
# markerstrokewidth=0, markersize=5,
#yaxis=:log,
#xaxis=:log2, yaxis=:log,

#for metric in [:e_train_mae, :f_train_mae, :e_test_mae, :f_test_mae, :time]
# scatter()
# for method in reverse(unique(metrics[:, :method])[1:end])
# batch_size_vals = metrics[metrics.method .== method, :][:, :batch_size]
# metric_vals = metrics[metrics.method .== method, :][:, metric]
# scatter!(batch_size_vals, metric_vals, label = method,
# alpha = 0.5, dpi=300, markerstrokewidth=0, markersize=5, xaxis=:log2, yaxis=:log,
# xlabel = "Sample size",
# ylabel = "$metric")
# end
# savefig("$res_path/$metric-srs.png")
#end

#scatter()
#for method in reverse(unique(metrics[:, :method])[2:end])
# batch_size_vals = metrics[metrics.method .== method, :][:, :batch_size]
# speedup_vals = metrics[metrics.method .== "DPP", :][:, :time] ./
# metrics[metrics.method .== method, :][:, :time]
# scatter!(batch_size_vals, speedup_vals, label = "DPP time / $method time",
# alpha = 0.5, dpi=300, markerstrokewidth=0, markersize=5, xaxis=:log2,
# xlabel = "Sample size",
# ylabel = "Speedup")
#end
#savefig("$res_path/speedup-srs.png")



#using DataFrames, CSV, Plots

#metrics = CSV.read("metrics.csv", DataFrame)
#res_path = "dyomet/"

#for metric in [:e_train_mae, :f_train_mae, :e_test_mae, :f_test_mae, :time]
# scatter()
# for method in reverse(unique(metrics[:, :method])[1:end])
# batch_size_vals = metrics[metrics.method .== method, :][:, :batch_size]
# metric_vals = metrics[metrics.method .== method, :][:, metric]
# scatter!(batch_size_vals, metric_vals, label = method,
# alpha = 0.5, dpi=300, markerstrokewidth=0, markersize=5, xaxis=:log2, yaxis=:log,
# xlabel = "Sample size",
# ylabel = "$metric")
# end
# savefig("$res_path/$metric-srs.png")
#end

#scatter()
#for method in reverse(unique(metrics[:, :method])[2:end])
# batch_size_vals = metrics[metrics.method .== method, :][:, :batch_size]
# speedup_vals = metrics[metrics.method .== "DPP", :][:, :time] ./
# metrics[metrics.method .== method, :][:, :time]
# scatter!(batch_size_vals, speedup_vals, label = "DPP time / $method time",
# alpha = 0.5, dpi=300, markerstrokewidth=0, markersize=5, xaxis=:log2,
# xlabel = "Sample size",
# ylabel = "Speedup")
#end
#savefig("$res_path/speedup-srs.png")


0 comments on commit 0a58996

Please sign in to comment.