From ea5e8a6a875dcdd1cb89b6f269476cc3585576ba Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 12 Mar 2022 11:11:01 +0000 Subject: [PATCH 1/3] Bump pillow from 9.0.0 to 9.0.1 Bumps [pillow](https://github.com/python-pillow/Pillow) from 9.0.0 to 9.0.1. - [Release notes](https://github.com/python-pillow/Pillow/releases) - [Changelog](https://github.com/python-pillow/Pillow/blob/main/CHANGES.rst) - [Commits](https://github.com/python-pillow/Pillow/compare/9.0.0...9.0.1) --- updated-dependencies: - dependency-name: pillow dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index be95d1c..7db037f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -61,7 +61,7 @@ parso==0.8.3 pathtools==0.1.2 pexpect==4.8.0 pickleshare==0.7.5 -Pillow==9.0.0 +Pillow==9.0.1 portalocker==2.3.2 promise==2.3 prompt-toolkit==3.0.24 From 753c630bf8edf23fe11e393643254bbbf388ba36 Mon Sep 17 00:00:00 2001 From: HyeonhoonLee Date: Mon, 14 Mar 2022 11:43:20 +0900 Subject: [PATCH 2/3] feature: add practical cosineannealingwarmuprestarts --- baseline1/medal_contender/configs.py | 1 + baseline1/medal_contender/model.py | 73 +++++++++++++++++++++++++++- baseline1/medal_contender/train.py | 2 + configs/deberta.yaml | 23 +++++---- 4 files changed, 87 insertions(+), 12 deletions(-) diff --git a/baseline1/medal_contender/configs.py b/baseline1/medal_contender/configs.py index ee30dd3..5d0d195 100644 --- a/baseline1/medal_contender/configs.py +++ b/baseline1/medal_contender/configs.py @@ -14,6 +14,7 @@ SCHEDULER_LIST = { "cos_ann": 'CosineAnnealingLR', "cos_ann_warm": 'CosineAnnealingWarmRestarts', + "cos_ann_warm_cus": 'CosineAnnealingWarmUpRestartsCustom', "lambda":"LambdaLR", "multiple":"MultiplicativeLR", "step":"StepLR", diff --git a/baseline1/medal_contender/model.py b/baseline1/medal_contender/model.py index de84c89..9a1a24d 100644 --- a/baseline1/medal_contender/model.py +++ b/baseline1/medal_contender/model.py @@ -1,9 +1,11 @@ import torch import torch.nn as nn from torch.optim import lr_scheduler +from torch.optim.lr_scheduler import _LRScheduler from transformers import AutoModel, AutoConfig from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup from medal_contender.configs import SCHEDULER_LIST, BERT_MODEL_LIST +import math def init_params(module_lst): for module in module_lst: @@ -59,6 +61,62 @@ def forward(self, inputs): output = self.classifier(feature) return output + +class CosineAnnealingWarmUpRestartsCustom(_LRScheduler): + def __init__(self, optimizer, T_0, T_mult=1, eta_max=0.1, T_up=0, gamma=1., last_epoch=-1): + if T_0 <= 0 or not isinstance(T_0, int): + raise ValueError("Expected positive integer T_0, but got {}".format(T_0)) + if T_mult < 1 or not isinstance(T_mult, int): + raise ValueError("Expected integer T_mult >= 1, but got {}".format(T_mult)) + if T_up < 0 or not isinstance(T_up, int): + raise ValueError("Expected positive integer T_up, but got {}".format(T_up)) + self.T_0 = T_0 + self.T_mult = T_mult + self.base_eta_max = eta_max + self.eta_max = eta_max + self.T_up = T_up + self.T_i = T_0 + self.gamma = gamma + self.cycle = 0 + self.T_cur = last_epoch + super(CosineAnnealingWarmUpRestartsCustom, self).__init__(optimizer, last_epoch) + + def get_lr(self): + if self.T_cur == -1: + return self.base_lrs + elif self.T_cur < self.T_up: + return [(self.eta_max - base_lr)*self.T_cur / self.T_up + base_lr for base_lr in self.base_lrs] + else: + return [base_lr + (self.eta_max - base_lr) * (1 + math.cos(math.pi * (self.T_cur-self.T_up) / (self.T_i - self.T_up))) / 2 + for base_lr in self.base_lrs] + + def step(self, epoch=None): + if epoch is None: + epoch = self.last_epoch + 1 + self.T_cur = self.T_cur + 1 + if self.T_cur >= self.T_i: + self.cycle += 1 + self.T_cur = self.T_cur - self.T_i + self.T_i = (self.T_i - self.T_up) * self.T_mult + self.T_up + else: + if epoch >= self.T_0: + if self.T_mult == 1: + self.T_cur = epoch % self.T_0 + self.cycle = epoch // self.T_0 + else: + n = int(math.log((epoch / self.T_0 * (self.T_mult - 1) + 1), self.T_mult)) + self.cycle = n + self.T_cur = epoch - self.T_0 * (self.T_mult ** n - 1) / (self.T_mult - 1) + self.T_i = self.T_0 * self.T_mult ** (n) + else: + self.T_i = self.T_0 + self.T_cur = epoch + + self.eta_max = self.base_eta_max * (self.gamma**self.cycle) + self.last_epoch = math.floor(epoch) + for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()): + param_group['lr'] = lr + def get_optimizer_params(model, encoder_lr, decoder_lr, weight_decay=0.0): param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] @@ -76,12 +134,13 @@ class DeepShareModel(nn.Module): def __init__(self, cfg, config_path=None, pretrained=False): super().__init__() self.cfg = cfg + model_name = BERT_MODEL_LIST[self.cfg.model_param.model_name] if config_path is None: - self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True) + self.config = AutoConfig.from_pretrained(model_name, output_hidden_states=True) else: self.config = torch.load(config_path) if pretrained: - self.model = AutoModel.from_pretrained(cfg.model, config=self.config) + self.model = AutoModel.from_pretrained(model_name, config=self.config) else: self.model = AutoModel.from_config(self.config) self.fc_dropout_0 = nn.Dropout(0.1) @@ -148,6 +207,16 @@ def fetch_scheduler(optimizer, cfg, num_train_steps=None): T_0=cfg.train_param.T_0, eta_min=float(cfg.train_param.min_lr) ) + elif SCHEDULER_LIST[cfg.model_param.scheduler] == 'CosineAnnealingWarmUpRestartsCustom': + scheduler = CosineAnnealingWarmUpRestartsCustom( + optimizer, + T_0=cfg.train_param.T_0, + T_mult=cfg.train_param.T_multi, + eta_max=float(cfg.train_param.max_lr), + T_up=cfg.train_param.T_up, + gamma=float(cfg.train_param.gamma) + ) + elif SCHEDULER_LIST[cfg.model_param.scheduler] == 'LambdaLR': scheduler = lr_scheduler.LambdaLR( optimizer, diff --git a/baseline1/medal_contender/train.py b/baseline1/medal_contender/train.py index bf92455..8edbd30 100644 --- a/baseline1/medal_contender/train.py +++ b/baseline1/medal_contender/train.py @@ -73,6 +73,8 @@ def train_fn(CFG, fold, train_loader, model, criterion, optimizer, epoch, schedu if CFG.train_param.batch_scheduler: if not CFG.model_param.scheduler == 'rlrp': scheduler.step() + if CFG.model_param.scheduler == 'cos_ann_warm_cus': + scheduler.step(epoch=epoch) running_loss += (loss.item() * batch_size) dataset_size += batch_size diff --git a/configs/deberta.yaml b/configs/deberta.yaml index fb013fa..0db4791 100644 --- a/configs/deberta.yaml +++ b/configs/deberta.yaml @@ -1,5 +1,5 @@ program_param: - wandb_key: b41fa9bf3a38422dafc16e253d84694489607e9f + wandb_key: 6b115fa825114338c0e612d95f732074ccf832c8 project_name: nbme seed: 42 @@ -8,7 +8,7 @@ data_param: model_param: model_name: deberta - scheduler: cosine + scheduler: cos_ann_warm_cus #cosine mix_precision: True num_classes: 1 num_cycles: 0.5 @@ -19,30 +19,33 @@ model_param: is_deeper_attn: True train_param: - model_type : Attention # ['Attention', 'DeepShareModel'] + model_type : DeepShareModel # ['Attention', 'DeepShareModel'] debug: False - epochs: 5 - lr: 2e-5 - max_lr: 0.1 + epochs: 20 + lr: 1e-6 + max_lr: 1e-4 min_lr: 1e-6 encoder_lr: 2e-5 decoder_lr: 2e-5 eps: 1e-6 betas: (0.9, 0.999) - batch_size: 4 + batch_size: 24 fc_dropout: 0.2 max_len: weight_decay: 0.01 - gradient_accumulation_steps: 8 + gradient_accumulation_steps: 1 #8 max_grad_norm: 1000 - num_workers: 0 + num_workers: 4 kfold_type : skf # skf : stratifiedkfold / group : groupkfold n_fold: 5 trn_fold: [0, 1, 2, 3, 4] save_dir: checkpoint/deberta loss: BCEWithLogitsLoss T_max: 500 - T_0: 50 + T_0: 5 + T_multi: 1 + gamma : 0.5 + T_up: 5 reduce_ratio: 0.95 apex: True batch_scheduler: True From d22fac7e627e79cf1923ddf7ff778a19278aed95 Mon Sep 17 00:00:00 2001 From: Young Paik Date: Mon, 14 Mar 2022 15:07:36 +0900 Subject: [PATCH 3/3] remove wandb_key --- configs/deberta.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/deberta.yaml b/configs/deberta.yaml index 0db4791..2ef13a3 100644 --- a/configs/deberta.yaml +++ b/configs/deberta.yaml @@ -1,5 +1,5 @@ program_param: - wandb_key: 6b115fa825114338c0e612d95f732074ccf832c8 + wandb_key: project_name: nbme seed: 42