From ea5e8a6a875dcdd1cb89b6f269476cc3585576ba Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sat, 12 Mar 2022 11:11:01 +0000
Subject: [PATCH 1/3] Bump pillow from 9.0.0 to 9.0.1

Bumps [pillow](https://github.com/python-pillow/Pillow) from 9.0.0 to 9.0.1.
- [Release notes](https://github.com/python-pillow/Pillow/releases)
- [Changelog](https://github.com/python-pillow/Pillow/blob/main/CHANGES.rst)
- [Commits](https://github.com/python-pillow/Pillow/compare/9.0.0...9.0.1)

---
updated-dependencies:
- dependency-name: pillow
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index be95d1c..7db037f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -61,7 +61,7 @@ parso==0.8.3
 pathtools==0.1.2
 pexpect==4.8.0
 pickleshare==0.7.5
-Pillow==9.0.0
+Pillow==9.0.1
 portalocker==2.3.2
 promise==2.3
 prompt-toolkit==3.0.24

From 753c630bf8edf23fe11e393643254bbbf388ba36 Mon Sep 17 00:00:00 2001
From: HyeonhoonLee <jackli0373@gmail.com>
Date: Mon, 14 Mar 2022 11:43:20 +0900
Subject: [PATCH 2/3] feature: add practical cosineannealingwarmuprestarts

---
 baseline1/medal_contender/configs.py |  1 +
 baseline1/medal_contender/model.py   | 73 +++++++++++++++++++++++++++-
 baseline1/medal_contender/train.py   |  2 +
 configs/deberta.yaml                 | 23 +++++----
 4 files changed, 87 insertions(+), 12 deletions(-)

diff --git a/baseline1/medal_contender/configs.py b/baseline1/medal_contender/configs.py
index ee30dd3..5d0d195 100644
--- a/baseline1/medal_contender/configs.py
+++ b/baseline1/medal_contender/configs.py
@@ -14,6 +14,7 @@
 SCHEDULER_LIST = {
     "cos_ann": 'CosineAnnealingLR',
     "cos_ann_warm": 'CosineAnnealingWarmRestarts',
+    "cos_ann_warm_cus": 'CosineAnnealingWarmUpRestartsCustom',
     "lambda":"LambdaLR",
     "multiple":"MultiplicativeLR",
     "step":"StepLR",
diff --git a/baseline1/medal_contender/model.py b/baseline1/medal_contender/model.py
index de84c89..9a1a24d 100644
--- a/baseline1/medal_contender/model.py
+++ b/baseline1/medal_contender/model.py
@@ -1,9 +1,11 @@
 import torch
 import torch.nn as nn
 from torch.optim import lr_scheduler
+from torch.optim.lr_scheduler import _LRScheduler
 from transformers import AutoModel, AutoConfig
 from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
 from medal_contender.configs import SCHEDULER_LIST, BERT_MODEL_LIST
+import math
 
 def init_params(module_lst):
     for module in module_lst:
@@ -59,6 +61,62 @@ def forward(self, inputs):
         output = self.classifier(feature)
         return output
 
+
+class CosineAnnealingWarmUpRestartsCustom(_LRScheduler):
+    def __init__(self, optimizer, T_0, T_mult=1, eta_max=0.1, T_up=0, gamma=1., last_epoch=-1):
+        if T_0 <= 0 or not isinstance(T_0, int):
+            raise ValueError("Expected positive integer T_0, but got {}".format(T_0))
+        if T_mult < 1 or not isinstance(T_mult, int):
+            raise ValueError("Expected integer T_mult >= 1, but got {}".format(T_mult))
+        if T_up < 0 or not isinstance(T_up, int):
+            raise ValueError("Expected positive integer T_up, but got {}".format(T_up))
+        self.T_0 = T_0
+        self.T_mult = T_mult
+        self.base_eta_max = eta_max
+        self.eta_max = eta_max
+        self.T_up = T_up
+        self.T_i = T_0
+        self.gamma = gamma
+        self.cycle = 0
+        self.T_cur = last_epoch
+        super(CosineAnnealingWarmUpRestartsCustom, self).__init__(optimizer, last_epoch)
+    
+    def get_lr(self):
+        if self.T_cur == -1:
+            return self.base_lrs
+        elif self.T_cur < self.T_up:
+            return [(self.eta_max - base_lr)*self.T_cur / self.T_up + base_lr for base_lr in self.base_lrs]
+        else:
+            return [base_lr + (self.eta_max - base_lr) * (1 + math.cos(math.pi * (self.T_cur-self.T_up) / (self.T_i - self.T_up))) / 2
+                    for base_lr in self.base_lrs]
+
+    def step(self, epoch=None):
+        if epoch is None:
+            epoch = self.last_epoch + 1
+            self.T_cur = self.T_cur + 1
+            if self.T_cur >= self.T_i:
+                self.cycle += 1
+                self.T_cur = self.T_cur - self.T_i
+                self.T_i = (self.T_i - self.T_up) * self.T_mult + self.T_up
+        else:
+            if epoch >= self.T_0:
+                if self.T_mult == 1:
+                    self.T_cur = epoch % self.T_0
+                    self.cycle = epoch // self.T_0
+                else:
+                    n = int(math.log((epoch / self.T_0 * (self.T_mult - 1) + 1), self.T_mult))
+                    self.cycle = n
+                    self.T_cur = epoch - self.T_0 * (self.T_mult ** n - 1) / (self.T_mult - 1)
+                    self.T_i = self.T_0 * self.T_mult ** (n)
+            else:
+                self.T_i = self.T_0
+                self.T_cur = epoch
+                
+        self.eta_max = self.base_eta_max * (self.gamma**self.cycle)
+        self.last_epoch = math.floor(epoch)
+        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
+            param_group['lr'] = lr
+            
 def get_optimizer_params(model, encoder_lr, decoder_lr, weight_decay=0.0):
     param_optimizer = list(model.named_parameters())
     no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
@@ -76,12 +134,13 @@ class DeepShareModel(nn.Module):
     def __init__(self, cfg, config_path=None, pretrained=False):
         super().__init__()
         self.cfg = cfg
+        model_name = BERT_MODEL_LIST[self.cfg.model_param.model_name]
         if config_path is None:
-            self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
+            self.config = AutoConfig.from_pretrained(model_name, output_hidden_states=True)
         else:
             self.config = torch.load(config_path)
         if pretrained:
-            self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
+            self.model = AutoModel.from_pretrained(model_name, config=self.config)
         else:
             self.model = AutoModel.from_config(self.config)
         self.fc_dropout_0 = nn.Dropout(0.1)
@@ -148,6 +207,16 @@ def fetch_scheduler(optimizer, cfg, num_train_steps=None):
             T_0=cfg.train_param.T_0,
             eta_min=float(cfg.train_param.min_lr)
         )
+    elif SCHEDULER_LIST[cfg.model_param.scheduler] == 'CosineAnnealingWarmUpRestartsCustom':
+        scheduler = CosineAnnealingWarmUpRestartsCustom(
+            optimizer,
+            T_0=cfg.train_param.T_0,
+            T_mult=cfg.train_param.T_multi, 
+            eta_max=float(cfg.train_param.max_lr),
+            T_up=cfg.train_param.T_up,
+            gamma=float(cfg.train_param.gamma)
+        )
+        
     elif SCHEDULER_LIST[cfg.model_param.scheduler] == 'LambdaLR':
         scheduler = lr_scheduler.LambdaLR(
             optimizer,
diff --git a/baseline1/medal_contender/train.py b/baseline1/medal_contender/train.py
index bf92455..8edbd30 100644
--- a/baseline1/medal_contender/train.py
+++ b/baseline1/medal_contender/train.py
@@ -73,6 +73,8 @@ def train_fn(CFG, fold, train_loader, model, criterion, optimizer, epoch, schedu
             if CFG.train_param.batch_scheduler:
                 if not CFG.model_param.scheduler == 'rlrp':
                     scheduler.step()
+                if CFG.model_param.scheduler == 'cos_ann_warm_cus':
+                    scheduler.step(epoch=epoch)
 
         running_loss += (loss.item() * batch_size)
         dataset_size += batch_size
diff --git a/configs/deberta.yaml b/configs/deberta.yaml
index fb013fa..0db4791 100644
--- a/configs/deberta.yaml
+++ b/configs/deberta.yaml
@@ -1,5 +1,5 @@
 program_param:
-  wandb_key: b41fa9bf3a38422dafc16e253d84694489607e9f
+  wandb_key: 6b115fa825114338c0e612d95f732074ccf832c8
   project_name: nbme
   seed: 42
 
@@ -8,7 +8,7 @@ data_param:
 
 model_param:
   model_name: deberta
-  scheduler: cosine
+  scheduler: cos_ann_warm_cus #cosine
   mix_precision: True
   num_classes: 1
   num_cycles: 0.5
@@ -19,30 +19,33 @@ model_param:
   is_deeper_attn: True
 
 train_param:
-  model_type : Attention # ['Attention', 'DeepShareModel']
+  model_type : DeepShareModel # ['Attention', 'DeepShareModel']
   debug: False
-  epochs: 5
-  lr: 2e-5
-  max_lr: 0.1
+  epochs: 20
+  lr: 1e-6
+  max_lr: 1e-4
   min_lr: 1e-6
   encoder_lr: 2e-5
   decoder_lr: 2e-5
   eps: 1e-6
   betas: (0.9, 0.999)
-  batch_size: 4
+  batch_size: 24
   fc_dropout: 0.2
   max_len:
   weight_decay: 0.01
-  gradient_accumulation_steps: 8
+  gradient_accumulation_steps: 1 #8
   max_grad_norm: 1000
-  num_workers: 0
+  num_workers: 4
   kfold_type : skf # skf : stratifiedkfold / group : groupkfold
   n_fold: 5
   trn_fold: [0, 1, 2, 3, 4]
   save_dir: checkpoint/deberta
   loss: BCEWithLogitsLoss
   T_max: 500
-  T_0: 50
+  T_0: 5
+  T_multi: 1
+  gamma : 0.5
+  T_up: 5
   reduce_ratio: 0.95
   apex: True
   batch_scheduler: True

From d22fac7e627e79cf1923ddf7ff778a19278aed95 Mon Sep 17 00:00:00 2001
From: Young Paik <ympaik@hotmail.com>
Date: Mon, 14 Mar 2022 15:07:36 +0900
Subject: [PATCH 3/3] remove wandb_key

---
 configs/deberta.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/deberta.yaml b/configs/deberta.yaml
index 0db4791..2ef13a3 100644
--- a/configs/deberta.yaml
+++ b/configs/deberta.yaml
@@ -1,5 +1,5 @@
 program_param:
-  wandb_key: 6b115fa825114338c0e612d95f732074ccf832c8
+  wandb_key: 
   project_name: nbme
   seed: 42