diff --git a/hpc/archer2/grid_search.slurm b/hpc/archer2/grid_search.slurm index 9a71c5a9..91f7a93a 100644 --- a/hpc/archer2/grid_search.slurm +++ b/hpc/archer2/grid_search.slurm @@ -47,10 +47,10 @@ export SRUN_CPUS_PER_TASK=$SLURM_CPUS_PER_TASK n_points=1000000 # points per MPI process n_tasks=8 global_depth=1 # Number of local roots matches the number of MPI processes, therefore the number of NUMA regions -local_depth=(3 4 5 6) -n_samples=(100 1000 5000) -block_size=(16 32 64 128 256) -n_threads=(2 4 8 16) # See if bandwidth saturates with different threading parameters for Rayon thread pool +local_depth=(5 6) +n_samples=(100 1000) +block_size=(128 256) +n_threads=(2 16) # See if bandwidth saturates with different threading parameters for Rayon thread pool export OMP_NUM_THREADS=1 # Need to set to 1 to avoid oversubsciption between Rayon and OpenMP # Create a CSV output file for analysis @@ -60,7 +60,8 @@ echo " runtime, p2m, m2m, l2l, m2l, p2p, \ source_tree, target_tree, source_domain, target_domain, layout, \ ghost_exchange_v, ghost_exchange_u, gather_global_fmm, scatter_global_fmm, \ -source_to_target_data, source_data, target_data, global_fmm, ghost_fmm_v, ghost_fmm_u +source_to_target_data, source_data, target_data, global_fmm, ghost_fmm_v, ghost_fmm_u \ +n_points, n_tasks, global_depth, local_depth, n_samples, block_size, n_threads " >> ${OUTPUT} # Perform grid search @@ -68,7 +69,8 @@ for i in ${!local_depth[@]}; do for j in ${!n_samples[@]}; do for k in ${!block_size[@]}; do for l in ${!n_threads[@]}; do - srun --ntasks=$n_tasks --cpus-per-task=16 --distribution=block:block --hint=nomultithread \ + + runtime_output=$(srun --ntasks=$n_tasks --cpus-per-task=16 --distribution=block:block --hint=nomultithread \ ${WORK}/grid_search_mpi --n-points $n_points \ --expansion-order 3 \ --prune-empty \ @@ -76,7 +78,9 @@ for i in ${!local_depth[@]}; do --local-depth ${local_depth[$i]} \ --n-samples ${n_samples[$j]} \ --block-size ${block_size[$k]} \ - --n-threads ${n_threads[$l]} >> ${OUTPUT} + --n-threads ${n_threads[$l]}) + + echo "$runtime_output, $n_points, $n_tasks, $global_depth, ${local_depth[$i]}, ${n_samples[$j]}, ${block_size[$k]}, ${n_threads[$l]}" >> ${OUTPUT} done done done