Skip to content

Commit

Permalink
automatically decide the num_gpus
Browse files Browse the repository at this point in the history
  • Loading branch information
yuchenlin committed Jul 15, 2024
1 parent 7fbdf14 commit de573cf
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 3 deletions.
9 changes: 7 additions & 2 deletions scripts/_common_vllm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,13 @@ output_dir="result_dirs/wild_bench_v2/"
# If the n_shards is 1, then we can directly run the model
# else, use Data-parallellism
if [ $n_shards -eq 1 ]; then
gpu="0,1,2,3"; num_gpus=4; # change the number of gpus to your preference
echo "tsp = 1"
# gpu="0,1,2,3"; num_gpus=4; # change the number of gpus to your preference
# decide the number gpus automatically from cuda
num_gpus=$(nvidia-smi --query-gpu=count --format=csv,noheader | head -n 1)
# gpu= # from 0 to the last gpu id
gpu=$(seq -s, 0 $((num_gpus - 1)))

echo "n_shards = 1; num_gpus = $num_gpus; gpu = $gpu"
CUDA_VISIBLE_DEVICES=$gpu \
python src/unified_infer.py \
--data_name wild_bench \
Expand Down
3 changes: 2 additions & 1 deletion src/unified_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def parse_args():
parser.add_argument('--no_repeat_ngram_size', default=0, type=int)
parser.add_argument('--hf_bf16', action='store_true')
parser.add_argument('--hf_gptq', action='store_true')
parser.add_argument('--gpu_memory_utilization', default=0.9, type=float)

parser.add_argument('--use_hf_conv_template', action='store_true')
parser.add_argument('--use_imend_stop', action='store_true')
Expand Down Expand Up @@ -65,7 +66,7 @@ def sanitize_args(args):
from vllm import LLM, SamplingParams
llm = LLM(model=args.model_name, tokenizer=args.tokenizer_name, tensor_parallel_size=args.tensor_parallel_size,
download_dir=args.download_dir, dtype=args.dtype, tokenizer_mode=args.tokenizer_mode,
max_model_len=args.max_model_len, trust_remote_code=True,
max_model_len=args.max_model_len, trust_remote_code=True, gpu_memory_utilization=args.gpu_memory_utilization,
)
elif args.engine == "hf":
llm = DecoderOnlyModelManager(args.model_name, args.model_name, cache_dir=args.download_dir,
Expand Down

0 comments on commit de573cf

Please sign in to comment.