Skip to content

Latest commit

 

History

History
80 lines (67 loc) · 2.71 KB

args.MD

File metadata and controls

80 lines (67 loc) · 2.71 KB

切换训练模式配置

修改 config/main.py
# 模块配置, 默认启用lora
enable_deepspeed = False
enable_ptv2 = False
enable_lora = True
load_in_bit = 0  # 4 load_in_4bit, 8 load_in_8bit  other  0

单机多卡

可见的前两块卡
train_info_args = {
    'devices': 2,
}

# 第一块 和 第三块卡
train_info_args = {
    'devices': [0,2],
}

多机多卡训练

例子 3个机器 每个机器 4个卡
修改train.py Trainer num_nodes = 3
MASTER_ADDR=10.0.0.1 MASTER_PORT=6667 WORLD_SIZE=12 NODE_RANK=0 python train.py 
MASTER_ADDR=10.0.0.1 MASTER_PORT=6667 WORLD_SIZE=12 NODE_RANK=1 python train.py 
MASTER_ADDR=10.0.0.1 MASTER_PORT=6667 WORLD_SIZE=12 NODE_RANK=2 python train.py 

超大数据集

修改data_utils.py "data_backend": "lmdb" 

精度训练

Trainer.precision = '16' # 半精度训练 "32": "32-true", "16": "16-mixed", "bf16": "bf16-mixed"

lora finetuning

       global_args = {
          "load_in_8bit": False, # lora 如果显卡支持int8 可以开启 , 需安装依赖 pip install bitsandbytes
          "num_layers": -1, # 是否使用骨干网络的全部层数 , -1 表示全层, 否则只用只用N层
          "num_layers_key":  "num_layers",
      }
         lora_info_args = {
               'with_lora': True,  # 是否启用lora模块
               'r': 8,
               'target_modules': ['query_key_value'],
               'target_dtype': None,
               'lora_alpha': 32,
               'lora_dropout': 0.1,
               'bias': 'none',  # Bias type for Lora. Can be 'none', 'all' or 'lora_only'"
               'modules_to_save' : None, # "help": "List of modules apart from LoRA layers to be set as trainable and saved in the final checkpoint. "
        }

int高效训练方式

lora int8

       global_args = {
          "load_in_8bit": True, # lora 如果显卡支持int8 可以开启 , 需安装依赖 pip install bitsandbytes
          "num_layers": -1, # 是否使用骨干网络的全部层数 , -1 表示全层, 否则只用只用N层
          "num_layers_key":  "num_layers",
      }
         lora_info_args = {
               'with_lora': True,  # 是否启用lora模块
               'r': 8,
               'target_modules': ['query_key_value'],
               'target_dtype': None,
               'lora_alpha': 32,
               'lora_dropout': 0.1,
               'bias': 'none',  # Bias type for Lora. Can be 'none', 'all' or 'lora_only'"
               'modules_to_save' : None, # "help": "List of modules apart from LoRA layers to be set as trainable and saved in the final checkpoint. "
        }