automatically decide the num_gpus

allenai · Jul 15, 2024 · de573cf · de573cf
1 parent 7fbdf14
commit de573cf
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 3 deletions.
diff --git a/scripts/_common_vllm.sh b/scripts/_common_vllm.sh
@@ -14,8 +14,13 @@ output_dir="result_dirs/wild_bench_v2/"
 # If the n_shards is 1, then we can directly run the model
 # else, use  Data-parallellism
 if [ $n_shards -eq 1 ]; then
-    gpu="0,1,2,3"; num_gpus=4; # change the number of gpus to your preference
-    echo "tsp = 1"
+    # gpu="0,1,2,3"; num_gpus=4; # change the number of gpus to your preference
+    # decide the number gpus automatically from cuda 
+    num_gpus=$(nvidia-smi --query-gpu=count --format=csv,noheader | head -n 1)
+    # gpu= # from 0 to the last gpu id
+    gpu=$(seq -s, 0 $((num_gpus - 1)))
+
+    echo "n_shards = 1; num_gpus = $num_gpus; gpu = $gpu"
     CUDA_VISIBLE_DEVICES=$gpu \
     python src/unified_infer.py \
         --data_name wild_bench \

diff --git a/src/unified_infer.py b/src/unified_infer.py
@@ -38,6 +38,7 @@ def parse_args():
     parser.add_argument('--no_repeat_ngram_size', default=0, type=int)
     parser.add_argument('--hf_bf16', action='store_true')
     parser.add_argument('--hf_gptq', action='store_true')
+    parser.add_argument('--gpu_memory_utilization', default=0.9, type=float)
 
     parser.add_argument('--use_hf_conv_template', action='store_true')
     parser.add_argument('--use_imend_stop', action='store_true')
@@ -65,7 +66,7 @@ def sanitize_args(args):
         from vllm import LLM, SamplingParams
         llm = LLM(model=args.model_name, tokenizer=args.tokenizer_name, tensor_parallel_size=args.tensor_parallel_size,
                         download_dir=args.download_dir, dtype=args.dtype, tokenizer_mode=args.tokenizer_mode,
-                        max_model_len=args.max_model_len, trust_remote_code=True,
+                        max_model_len=args.max_model_len, trust_remote_code=True, gpu_memory_utilization=args.gpu_memory_utilization,
                         )
     elif args.engine == "hf":
         llm = DecoderOnlyModelManager(args.model_name, args.model_name, cache_dir=args.download_dir,