修改 config/main.py
# 模块配置, 默认启用lora
enable_deepspeed = False
enable_ptv2 = False
enable_lora = True
load_in_bit = 0 # 4 load_in_4bit, 8 load_in_8bit other 0
可见的前两块卡
train_info_args = {
'devices': 2,
}
# 第一块 和 第三块卡
train_info_args = {
'devices': [0,2],
}
例子 3个机器 每个机器 4个卡
修改train.py Trainer num_nodes = 3
MASTER_ADDR=10.0.0.1 MASTER_PORT=6667 WORLD_SIZE=12 NODE_RANK=0 python train.py
MASTER_ADDR=10.0.0.1 MASTER_PORT=6667 WORLD_SIZE=12 NODE_RANK=1 python train.py
MASTER_ADDR=10.0.0.1 MASTER_PORT=6667 WORLD_SIZE=12 NODE_RANK=2 python train.py
修改data_utils.py "data_backend": "lmdb"
Trainer.precision = '16' # 半精度训练 "32": "32-true", "16": "16-mixed", "bf16": "bf16-mixed"
global_args = {
"load_in_8bit": False, # lora 如果显卡支持int8 可以开启 , 需安装依赖 pip install bitsandbytes
"num_layers": -1, # 是否使用骨干网络的全部层数 , -1 表示全层, 否则只用只用N层
"num_layers_key": "num_layers",
}
lora_info_args = {
'with_lora': True, # 是否启用lora模块
'r': 8,
'target_modules': ['query_key_value'],
'target_dtype': None,
'lora_alpha': 32,
'lora_dropout': 0.1,
'bias': 'none', # Bias type for Lora. Can be 'none', 'all' or 'lora_only'"
'modules_to_save' : None, # "help": "List of modules apart from LoRA layers to be set as trainable and saved in the final checkpoint. "
}
lora int8
global_args = {
"load_in_8bit": True, # lora 如果显卡支持int8 可以开启 , 需安装依赖 pip install bitsandbytes
"num_layers": -1, # 是否使用骨干网络的全部层数 , -1 表示全层, 否则只用只用N层
"num_layers_key": "num_layers",
}
lora_info_args = {
'with_lora': True, # 是否启用lora模块
'r': 8,
'target_modules': ['query_key_value'],
'target_dtype': None,
'lora_alpha': 32,
'lora_dropout': 0.1,
'bias': 'none', # Bias type for Lora. Can be 'none', 'all' or 'lora_only'"
'modules_to_save' : None, # "help": "List of modules apart from LoRA layers to be set as trainable and saved in the final checkpoint. "
}