-
Notifications
You must be signed in to change notification settings - Fork 1
/
base_deepspeed.yaml
26 lines (23 loc) · 1.29 KB
/
base_deepspeed.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# Sample config to run any model in zero-shot mode
# This example loads the model in bfloat16, most model won't fit in a single GPU with this configuration.
# Therefore this config is meant to be used with deepspeed zero3 to split the model across multiple GPUs.
# If you don't have multiple GPUs, see the base.yaml config for an example of using 4 bit quantization
# to fit the model on a single GPU.
# Usage example:
# accelerate launch --deepspeed_config_file configs/deepspeed_configs/deepspeed_zero3.json run.py --config configs/zero-shot/base_deepspeed.yaml --model_name_or_path HuggingFaceH4/zephyr-7b-beta --output_dir results/zero-shot/zephyr-7b-beta
model_name_or_path: ./ # Default value, will be overriden by --model_name_or_path
output_dir: ./ # Default value, will be overriden by --output_dir
torch_dtype: "bfloat16"
quantization: null # Deepspeed zero3 doesn't support quantization
use_flash_attention: true
force_auto_device_map: false
predict_with_generate: false
per_device_eval_batch_size: 8
fewshot: false
# dataset arguments
do_train: false
do_eval: false
do_predict: true
do_predict_full_dataset: false
max_seq_length: null # Use the default value for the model
overwrite_output_dir: false # If true we will tun the inference again if the results already exist. If false we will skip the inference.