From 97dd57a17b568a6ee8bef87ba8a56927d886015a Mon Sep 17 00:00:00 2001 From: Zhaoyi Shen <11598433+szy21@users.noreply.github.com> Date: Tue, 23 Apr 2024 21:32:15 -0700 Subject: [PATCH] clean up ci and gpu scaling config files --- .buildkite/gpu_pipeline/pipeline.yml | 3 --- .buildkite/pipeline.yml | 20 +++++++++---------- NEWS.md | 2 ++ ...ud_diag_gpu_hs_rhoe_equil_55km_nz63_0M.yml | 0 ...central_gpu_hs_rhoe_equil_55km_nz63_0M.yml | 0 .../gpu_aquaplanet_dyamond.yml | 0 .../gpu_baroclinic_wave_rhoe.yml | 0 .../gpu_diagnostic_edmfx_aquaplanet.yml | 0 .../gpu_prognostic_edmfx_aquaplanet.yml | 0 9 files changed, 12 insertions(+), 13 deletions(-) rename config/{gpu_configs => model_configs}/central_cloud_diag_gpu_hs_rhoe_equil_55km_nz63_0M.yml (100%) rename config/{gpu_configs => model_configs}/central_gpu_hs_rhoe_equil_55km_nz63_0M.yml (100%) rename config/{gpu_configs => model_configs}/gpu_aquaplanet_dyamond.yml (100%) rename config/{perf_configs => model_configs}/gpu_baroclinic_wave_rhoe.yml (100%) rename config/{gpu_configs => model_configs}/gpu_diagnostic_edmfx_aquaplanet.yml (100%) rename config/{gpu_configs => model_configs}/gpu_prognostic_edmfx_aquaplanet.yml (100%) diff --git a/.buildkite/gpu_pipeline/pipeline.yml b/.buildkite/gpu_pipeline/pipeline.yml index ae530ea21b..0edda2d444 100644 --- a/.buildkite/gpu_pipeline/pipeline.yml +++ b/.buildkite/gpu_pipeline/pipeline.yml @@ -11,10 +11,7 @@ env: OMPI_MCA_opal_warn_on_missing_libcuda: 0 SLURM_KILL_BAD_EXIT: 1 SLURM_GRES_FLAGS: "allow-task-sharing" - CONFIG_PATH: "config/model_configs" GPU_CONFIG_PATH: "config/gpu_configs/" - PERF_CONFIG_PATH: "config/perf_configs" - MPI_CONFIG_PATH: "config/mpi_configs" CLIMAATMOS_GC_NSTEPS: 10 steps: diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index d376ffe44a..4e12224c5a 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -11,7 +11,7 @@ env: JULIA_MAX_NUM_PRECOMPILE_FILES: 100 JULIA_DEPOT_PATH: "${BUILDKITE_BUILD_PATH}/${BUILDKITE_PIPELINE_SLUG}/depot/default" CONFIG_PATH: "config/model_configs" - GPU_CONFIG_PATH: "config/gpu_configs/" + GPU_CONFIG_PATH: "config/gpu_configs" PERF_CONFIG_PATH: "config/perf_configs" MPI_CONFIG_PATH: "config/mpi_configs" @@ -669,7 +669,7 @@ steps: key: "gpu_baroclinic_wave_rhoe" command: > julia --color=yes --project=examples examples/hybrid/driver.jl - --config_file $PERF_CONFIG_PATH/gpu_baroclinic_wave_rhoe.yml + --config_file $CONFIG_PATH/gpu_baroclinic_wave_rhoe.yml artifact_paths: "gpu_implicit_barowave_ref/output_active/*" agents: slurm_mem: 16G @@ -679,7 +679,7 @@ steps: command: > tar xvf sphere_baroclinic_wave_rhoe/output_active/hdf5_files.tar -C sphere_baroclinic_wave_rhoe - tar xvf gpu_baroclinic_wave_rhoe//output_active/hdf5_files.tar -C gpu_baroclinic_wave_rhoe + tar xvf gpu_baroclinic_wave_rhoe/output_active/hdf5_files.tar -C gpu_baroclinic_wave_rhoe julia --color=yes --project=examples post_processing/compare_outputs.jl --output_folder_1 sphere_baroclinic_wave_rhoe @@ -696,7 +696,7 @@ steps: - > nsys profile --trace=nvtx,cuda --output=target_gpu_implicit_baroclinic_wave/output_active/report julia --color=yes --project=examples examples/hybrid/driver.jl - --config_file ${GPU_CONFIG_PATH}target_gpu_implicit_baroclinic_wave.yml + --config_file ${GPU_CONFIG_PATH}/target_gpu_implicit_baroclinic_wave.yml artifact_paths: "target_gpu_implicit_baroclinic_wave/output_active/*" agents: slurm_gpus: 1 @@ -710,7 +710,7 @@ steps: - > srun --cpu-bind=threads --cpus-per-task=4 julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl - --config_file ${GPU_CONFIG_PATH}target_gpu_implicit_baroclinic_wave_4process.yml + --config_file ${GPU_CONFIG_PATH}/target_gpu_implicit_baroclinic_wave_4process.yml artifact_paths: "target_gpu_implicit_baroclinic_wave_4process/output_active/*" env: CLIMACORE_DISTRIBUTED: "MPI" @@ -725,7 +725,7 @@ steps: - > nsys profile --trace=nvtx,cuda --output=central_gpu_hs_rhoe_equil_55km_nz63_0M/output_active/report julia --color=yes --project=examples examples/hybrid/driver.jl - --config_file $GPU_CONFIG_PATH/central_gpu_hs_rhoe_equil_55km_nz63_0M.yml + --config_file $CONFIG_PATH/central_gpu_hs_rhoe_equil_55km_nz63_0M.yml artifact_paths: "central_gpu_hs_rhoe_equil_55km_nz63_0M/output_active/*" agents: slurm_gpus: 1 @@ -736,7 +736,7 @@ steps: - > nsys profile --trace=nvtx,cuda --output=central_cloud_diag_gpu_hs_rhoe_equil_55km_nz63_0M/output_active/report julia --color=yes --project=examples examples/hybrid/driver.jl - --config_file $GPU_CONFIG_PATH/central_cloud_diag_gpu_hs_rhoe_equil_55km_nz63_0M.yml + --config_file $CONFIG_PATH/central_cloud_diag_gpu_hs_rhoe_equil_55km_nz63_0M.yml artifact_paths: "central_cloud_diag_gpu_hs_rhoe_equil_55km_nz63_0M/output_active/*" agents: slurm_gpus: 1 @@ -748,7 +748,7 @@ steps: - > nsys profile --trace=nvtx,cuda --output=gpu_aquaplanet_dyamond/output_active/report julia --color=yes --project=examples examples/hybrid/driver.jl - --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond.yml + --config_file ${CONFIG_PATH}/gpu_aquaplanet_dyamond.yml artifact_paths: "gpu_aquaplanet_dyamond/output_active/*" agents: slurm_gpus: 1 @@ -756,7 +756,7 @@ steps: - label: "GPU: Diagnostic EDMFX aquaplanet" command: > julia --color=yes --project=examples examples/hybrid/driver.jl - --config_file $GPU_CONFIG_PATH/gpu_diagnostic_edmfx_aquaplanet.yml + --config_file $CONFIG_PATH/gpu_diagnostic_edmfx_aquaplanet.yml artifact_paths: "gpu_diagnostic_edmfx_aquaplanet/output_active/*" agents: slurm_gpus: 1 @@ -765,7 +765,7 @@ steps: - label: "GPU: Prognostic EDMFX aquaplanet" command: > julia --color=yes --project=examples examples/hybrid/driver.jl - --config_file $GPU_CONFIG_PATH/gpu_prognostic_edmfx_aquaplanet.yml + --config_file $CONFIG_PATH/gpu_prognostic_edmfx_aquaplanet.yml artifact_paths: "gpu_prognostic_edmfx_aquaplanet/output_active/*" agents: slurm_gpus: 1 diff --git a/NEWS.md b/NEWS.md index 6875b2e749..d0022891d5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,8 @@ ClimaAtmos.jl Release Notes Main ------- +- Move config files for gpu jobs on ci to config/model_configs/. + PR [#2948](https://github.com/CliMA/ClimaAtmos.jl/pull/2948). v0.23.0 ------- diff --git a/config/gpu_configs/central_cloud_diag_gpu_hs_rhoe_equil_55km_nz63_0M.yml b/config/model_configs/central_cloud_diag_gpu_hs_rhoe_equil_55km_nz63_0M.yml similarity index 100% rename from config/gpu_configs/central_cloud_diag_gpu_hs_rhoe_equil_55km_nz63_0M.yml rename to config/model_configs/central_cloud_diag_gpu_hs_rhoe_equil_55km_nz63_0M.yml diff --git a/config/gpu_configs/central_gpu_hs_rhoe_equil_55km_nz63_0M.yml b/config/model_configs/central_gpu_hs_rhoe_equil_55km_nz63_0M.yml similarity index 100% rename from config/gpu_configs/central_gpu_hs_rhoe_equil_55km_nz63_0M.yml rename to config/model_configs/central_gpu_hs_rhoe_equil_55km_nz63_0M.yml diff --git a/config/gpu_configs/gpu_aquaplanet_dyamond.yml b/config/model_configs/gpu_aquaplanet_dyamond.yml similarity index 100% rename from config/gpu_configs/gpu_aquaplanet_dyamond.yml rename to config/model_configs/gpu_aquaplanet_dyamond.yml diff --git a/config/perf_configs/gpu_baroclinic_wave_rhoe.yml b/config/model_configs/gpu_baroclinic_wave_rhoe.yml similarity index 100% rename from config/perf_configs/gpu_baroclinic_wave_rhoe.yml rename to config/model_configs/gpu_baroclinic_wave_rhoe.yml diff --git a/config/gpu_configs/gpu_diagnostic_edmfx_aquaplanet.yml b/config/model_configs/gpu_diagnostic_edmfx_aquaplanet.yml similarity index 100% rename from config/gpu_configs/gpu_diagnostic_edmfx_aquaplanet.yml rename to config/model_configs/gpu_diagnostic_edmfx_aquaplanet.yml diff --git a/config/gpu_configs/gpu_prognostic_edmfx_aquaplanet.yml b/config/model_configs/gpu_prognostic_edmfx_aquaplanet.yml similarity index 100% rename from config/gpu_configs/gpu_prognostic_edmfx_aquaplanet.yml rename to config/model_configs/gpu_prognostic_edmfx_aquaplanet.yml