From 37a68609f091b99a5679ca1b50d9089e3fffe73c Mon Sep 17 00:00:00 2001 From: Zhong Hui Date: Thu, 15 Sep 2022 15:05:40 +0800 Subject: [PATCH] [DOC] Add ernie-1.0-base-zh-cw benchmark results. (#3248) --- examples/benchmark/clue/README.md | 43 ++++++++++++++++++- model_zoo/ernie-1.0/README.md | 8 ++-- .../ernie-1.0/pretraining_introduction.md | 12 +++--- model_zoo/ernie-1.0/run_pretrain.py | 7 ++- model_zoo/ernie-3.0/README.md | 42 +++++++++++++++++- 5 files changed, 96 insertions(+), 16 deletions(-) diff --git a/examples/benchmark/clue/README.md b/examples/benchmark/clue/README.md index 58b4a294558a..6703c5912ef0 100644 --- a/examples/benchmark/clue/README.md +++ b/examples/benchmark/clue/README.md @@ -70,7 +70,7 @@ 24L1024H - ERNIE 1.0-Large-zh-CW + ERNIE 1.0-Large-zh-cw 79.03 @@ -222,7 +222,7 @@ - 12L768H + 12L768H @@ -264,6 +264,44 @@ 77.88 + + + ERNIE 1.0-Base-zh-cw + + + 76.47 + + + 76.07 + + + 57.86 + + + 59.91 + + + 83.41 + + + 79.58 + + + 89.91 + + + 83.42 + + + 72.88/90.78 + + + 84.68 + + + 76.98 + + ERNIE-Gram-zh @@ -1196,6 +1234,7 @@ AFQMC(语义相似度)、TNEWS(文本分类)、IFLYTEK(长文本分类 | ERNIE 2.0-Large-zh | 1e-5,32 | 3e-5,64 | 3e-5,32 | 2e-5,32 | 1e-5,16 | 3e-5,32 | 1e-5,64 | 2e-5,24 | 2e-5,24 | 3e-5,32 | | HFL/RoBERTa-wwm-ext-large | 1e-5,32 | 3e-5,32 | 2e-5,32 | 1e-5,16 | 1e-5,16 | 2e-5,16 | 2e-5,16 | 3e-5,32 | 1e-5,24 | 2e-5,24 | | ERNIE 3.0-Base-zh | 3e-5,16 | 3e-5,32 | 5e-5,32 | 3e-5,32 | 2e-5,64 | 2e-5,16 | 2e-5,32 | 2e-5,24 | 3e-5,24 | 3e-5,32 | +| ERNIE 1.0-Base-zh-cw | 2e-5,16 | 3e-5,32 | 5e-5,16 | 2e-5,16 | 3e-5,32 | 2e-5,16 | 2e-5,32 | 3e-5,24 | 2e-5,32 | 3e-5,24 | | ERNIE-Gram-zh | 1e-5,16 | 5e-5,16 | 5e-5,16 | 2e-5,32 | 2e-5,64 | 3e-5,16 | 3e-5,64 | 3e-5,32 | 2e-5,24 | 2e-5,24 | | ERNIE 2.0-Base-zh | 3e-5,64 | 3e-5,64 | 5e-5,16 | 5e-5,64 | 5e-5,32 | 5e-5,16 | 2e-5,16 | 2e-5,32 | 3e-5,24 | 3e-5,32 | | Langboat/Mengzi-Bert-Base | 3e-5,32 | 5e-5,32 | 5e-5,16 | 2e-5,16 | 2e-5,16 | 3e-5,8 | 1e-5,16 | 3e-5,24 | 3e-5,24 | 2e-5,32 | diff --git a/model_zoo/ernie-1.0/README.md b/model_zoo/ernie-1.0/README.md index 8d6c4b9fddbd..aba9c7eb9a29 100644 --- a/model_zoo/ernie-1.0/README.md +++ b/model_zoo/ernie-1.0/README.md @@ -484,24 +484,24 @@ python3 -u -m paddle.distributed.launch \ 我们release了base、large两个模型。均取得了较好的预训练效果。 - - **ERNIE 1.0-Base-zh-CW** 模型: + - **ERNIE 1.0-Base-zh-cw** 模型: - 使用CLUE,WuDao共计400GB的语料,batch_size 1024, 训练 400w step,即可训练得到`ernie-3.0-base-zh`类似的模型效果。相关模型参数,开源为`ernie-1.0-base-zh-cw`,用户加载即可使用。使用CLUE benchmark 对最优超参数进行GradSearch搜索: Model                                  | Arch | CLUE AVG | AFQMC | TNEWS | IFLYTEK | CMNLI | OCNLI | CLUE WSC2020 | CSL | CMRC | CHID | C3 -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | Metrics |   |   | Acc | Acc | Acc | Acc | Acc | Acc | Acc | Exact/F1| Acc| Acc | Acc -ERNIE 1.0-Base-zh-CW | 12L768H | 76.44 | 76.04 | 58.02 | 60.87 | 83.56 | 78.61 | 89.14 | 84.00 | 72.26/90.40 | 84.73 | 77.15 | +ERNIE 1.0-Base-zh-cw | 12L768H | 76.47 | 76.07 | 57.86 | 59.91 | 83.41 | 79.91 | 89.91 | 83.42 | 72.88/90.78 | 84.68 | 76.98 | ERNIE 2.0-Base-zh | 12L768H | 74.95 | 76.25 | 58.53 | 61.72 | 83.07 | 78.81 | 84.21 | 82.77 | 68.22/88.71 | 82.78 | 73.19 ERNIE 1.0-Base-zh | 12L768H | 74.17 | 74.84 | 58.91 | 62.25 | 81.68 | 76.58 | 85.20 | 82.77 | 67.32/87.83 | 82.47 | 69.68 - - - **ERNIE 1.0-Large-zh-CW** 模型: + - **ERNIE 1.0-Large-zh-cw** 模型: - 除了base模型外,我们还训练了放出了large模型。此模型参数采用的是词表与ernie-1.0相同,因此命名为`ernie-1.0-large-zh-cw`。使用开源语料,batch_size 512, 训练 400w step,训练去除SOP任务,只保留MLM损失: Model                                    | Arch | CLUE AVG | AFQMC | TNEWS | IFLYTEK | CMNLI | OCNLI | CLUE WSC2020 | CSL | CMRC | CHID | C3 -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | Metrics |   |   | Acc | Acc | Acc | Acc | Acc | Acc | Acc | Exact/F1 | Acc| Acc -ERNIE 1.0-Large-zh-CW| 24L1024H | 79.03 | 75.97 | 59.65 | 62.91 | 85.09 | 81.73| 93.09 | 84.53 | 74.22/91.88 | 88.57 | 84.54 +ERNIE 1.0-Large-zh-cw | 24L1024H | 79.03 | 75.97 | 59.65 | 62.91 | 85.09 | 81.73| 93.09 | 84.53 | 74.22/91.88 | 88.57 | 84.54 ERNIE 3.0-Xbase-zh| 20L1024H | 78.71 | 76.85 | 59.89 | 62.41 | 84.76 | 82.51 | 89.80 | 84.47 | 75.49/92.67 | 86.36 | 84.59 RoBERTa-wwm-ext-large | 24L1024H | 76.61 | 76.00 | 59.33 | 62.02 | 83.88 | 78.81 | 90.79 | 83.67 | 70.58/89.82 | 85.72 | 75.26 diff --git a/model_zoo/ernie-1.0/pretraining_introduction.md b/model_zoo/ernie-1.0/pretraining_introduction.md index 4489e9b87285..7b2aa1f65562 100644 --- a/model_zoo/ernie-1.0/pretraining_introduction.md +++ b/model_zoo/ernie-1.0/pretraining_introduction.md @@ -24,8 +24,8 @@ PaddleNLP致力于预训练开源工作,使用开源中文语料CLUE、WuDao - [3.4 训练数据流配置](#data_pipe) - [3.5 观察评估](#观察评估) - [4. 训练效果](#release_models) - - [4.1 ERNIE 1.0-Base-zh-CW 模型](#ernie-1.0-base-zh-cw) - - [4.2 ERNIE 1.0-Large-zh-CW 模型](#ernie-1.0-large-zh-cw) + - [4.1 ERNIE 1.0-Base-zh-cw 模型](#ernie-1.0-base-zh-cw) + - [4.2 ERNIE 1.0-Large-zh-cw 模型](#ernie-1.0-large-zh-cw) * [5. 参考](#references) 全部流程介绍图如下: @@ -577,28 +577,28 @@ python3 -u -m paddle.distributed.launch \ -### 4.1 ERNIE 1.0-Base-zh-CW 模型 +### 4.1 ERNIE 1.0-Base-zh-cw 模型 使用CLUE,WuDao共计400GB的语料,batch_size 1024, 训练 400w step,即可训练得到`ernie-3.0-base-zh`类似的模型效果。相关模型参数,开源为`ernie-1.0-base-zh-cw`,用户加载即可使用。使用CLUE benchmark 对最优超参数进行GradSearch搜索: Model                                  | Arch | CLUE AVG | AFQMC | TNEWS | IFLYTEK | CMNLI | OCNLI | CLUE WSC2020 | CSL | CMRC | CHID | C3 -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | Metrics |   |   | Acc | Acc | Acc | Acc | Acc | Acc | Acc | Exact/F1| Acc| Acc -ERNIE 1.0-Base-zh-CW | 12L768H | 76.44 | 76.04 | 58.02 | 60.87 | 83.56 | 78.61 | 89.14 | 84.00 | 72.26/90.40 | 84.73 | 77.15 | +ERNIE 1.0-Base-zh-cw | 12L768H | 76.47 | 76.04 | 57.86 | 59.91 | 83.41 | 79.58 | 89.91 | 83.42 | 72.88/90.78 | 84.68 | 76.98 | ERNIE 2.0-Base-zh | 12L768H | 74.32 | 75.65 | 58.25 | 61.64 | 82.62 | 78.71 | 81.91 | 82.33 | 66.08/87.46 | 82.78 | 73.19 ERNIE 1.0-Base-zh | 12L768H | 74.17 | 74.84 | 58.91 | 62.25 | 81.68 | 76.58 | 85.20 | 82.77 | 67.32/87.83 | 82.47 | 69.68 -### 4.2 ERNIE 1.0-Large-zh-CW 模型 +### 4.2 ERNIE 1.0-Large-zh-cw 模型 除了base模型外,我们还训练了large模型。命名为`ernie-1.0-large-zh-cw`。使用开源语料,batch_size 512, 训练 400w step,训练去除SOP任务,只保留MLM损失,使用CLUE benchmark 对最优超参数进行GradSearch搜索: Model                                    | Arch | CLUE AVG | AFQMC | TNEWS | IFLYTEK | CMNLI | OCNLI | CLUE WSC2020 | CSL | CMRC | CHID | C3 -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | Metrics |   |   | Acc | Acc | Acc | Acc | Acc | Acc | Acc | Exact/F1 | Acc| Acc -ERNIE 1.0-Large-zh-CW| 24L1024H | 79.03 | 75.97 | 59.65 | 62.91 | 85.09 | 81.73| 93.09 | 84.53 | 74.22/91.88 | 88.57 | 84.54 +ERNIE 1.0-Large-zh-cw| 24L1024H | 79.03 | 75.97 | 59.65 | 62.91 | 85.09 | 81.73| 93.09 | 84.53 | 74.22/91.88 | 88.57 | 84.54 ERNIE 3.0-Xbase-zh| 20L1024H | 78.39 | 76.16 | 59.55 | 61.87 | 84.40 | 81.73 | 88.82 | 83.60 | 75.99/93.00 | 86.78 | 84.98 RoBERTa-wwm-ext-large | 24L1024H | 76.61 | 76.00 | 59.33 | 62.02 | 83.88 | 78.81 | 90.79 | 83.67 | 70.58/89.82 | 85.72 | 75.26 diff --git a/model_zoo/ernie-1.0/run_pretrain.py b/model_zoo/ernie-1.0/run_pretrain.py index d6bb1cfccc38..e6df62998e3a 100644 --- a/model_zoo/ernie-1.0/run_pretrain.py +++ b/model_zoo/ernie-1.0/run_pretrain.py @@ -541,8 +541,11 @@ def do_train(args): ctx_manager = contextlib.nullcontext() if sys.version_info >= ( 3, 7) else contextlib.suppress() - if worker_num > 1 and (args.use_recompute - or args.accumulate_steps > 1): + if worker_num > 1 and (args.use_recompute or + ((step + 1) % args.accumulate_steps != 0)): + # grad acc, no_sync when (step + 1) % args.accumulate_steps != 0: + # recompute, no_sync every where + # recompute + grad_acc, no_sync every where ctx_manager = model.no_sync() else: ctx_manager = contextlib.nullcontext() if sys.version_info >= ( diff --git a/model_zoo/ernie-3.0/README.md b/model_zoo/ernie-3.0/README.md index a8fef6755dcf..eb52e045606e 100644 --- a/model_zoo/ernie-3.0/README.md +++ b/model_zoo/ernie-3.0/README.md @@ -139,7 +139,7 @@ batch_size=32 和 1,预测精度为 FP16 时,GPU 下的效果-时延图: 24L1024H - ERNIE 1.0-Large-CW + ERNIE 1.0-Large-cw 79.03 @@ -291,7 +291,7 @@ batch_size=32 和 1,预测精度为 FP16 时,GPU 下的效果-时延图: - 12L768H + 12L768H @@ -333,6 +333,44 @@ batch_size=32 和 1,预测精度为 FP16 时,GPU 下的效果-时延图: 77.88 + + + ERNIE 1.0-Base-zh-cw + + + 76.47 + + + 76.07 + + + 57.86 + + + 59.91 + + + 83.41 + + + 79.58 + + + 89.91 + + + 83.42 + + + 72.88/90.78 + + + 84.68 + + + 76.98 + + ERNIE-Gram-zh