From 0ad7de0f5a6ae449b8ac009c2f17adde2a70c69c Mon Sep 17 00:00:00 2001
From: ManoleAlexandru99 <manole.alexandru99@yahoo.ro>
Date: Wed, 12 Apr 2023 21:38:12 +0300
Subject: [PATCH] No warm up + Moved dropout back #0015

Also changed constants to hyperparameters
---
 data/hyps/hyp.Objects365.yaml      | 5 +++--
 data/hyps/hyp.VOC.yaml             | 5 +++--
 data/hyps/hyp.no-augmentation.yaml | 5 +++--
 data/hyps/hyp.scratch-high.yaml    | 5 +++--
 data/hyps/hyp.scratch-low.yaml     | 5 +++--
 data/hyps/hyp.scratch-med.yaml     | 5 +++--
 models/common.py                   | 4 +---
 train.py                           | 4 +++-
 utils/loss.py                      | 2 +-
 9 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/data/hyps/hyp.Objects365.yaml b/data/hyps/hyp.Objects365.yaml
index 4012cce7a546..fae42c29b809 100644
--- a/data/hyps/hyp.Objects365.yaml
+++ b/data/hyps/hyp.Objects365.yaml
@@ -7,11 +7,12 @@ lr0: 0.00258
 lrf: 0.17
 momentum: 0.779
 weight_decay: 0.00058
-warmup_epochs: 1.33
+warmup_epochs: 0
 warmup_momentum: 0.86
 warmup_bias_lr: 0.0711
 box: 0.0539
-seg: 0.1 # seg loss
+seg: 1 # Weight for segmentation loss
+det: 1 # Weights all detection losses in the same time (instead of having to change all 3 values)
 cls: 0.299
 cls_pw: 0.825
 obj: 0.632
diff --git a/data/hyps/hyp.VOC.yaml b/data/hyps/hyp.VOC.yaml
index 7409e2f806e6..5b437a3ac126 100644
--- a/data/hyps/hyp.VOC.yaml
+++ b/data/hyps/hyp.VOC.yaml
@@ -13,11 +13,12 @@ lr0: 0.00334
 lrf: 0.15135
 momentum: 0.74832
 weight_decay: 0.00025
-warmup_epochs: 3.3835
+warmup_epochs: 0
 warmup_momentum: 0.59462
 warmup_bias_lr: 0.18657
 box: 0.02
-seg: 0.1 # seg loss
+seg: 1 # seg loss
+det: 1 # Weights all detection losses in the same time (instead of having to change all 3 values)
 cls: 0.21638
 cls_pw: 0.5
 obj: 0.51728
diff --git a/data/hyps/hyp.no-augmentation.yaml b/data/hyps/hyp.no-augmentation.yaml
index ff948114a975..7ceb46f81c5c 100644
--- a/data/hyps/hyp.no-augmentation.yaml
+++ b/data/hyps/hyp.no-augmentation.yaml
@@ -7,11 +7,12 @@ lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
 lrf: 0.1  # final OneCycleLR learning rate (lr0 * lrf)
 momentum: 0.937  # SGD momentum/Adam beta1
 weight_decay: 0.0005  # optimizer weight decay 5e-4
-warmup_epochs: 3.0  # warmup epochs (fractions ok)
+warmup_epochs: 0  # warmup epochs (fractions ok)
 warmup_momentum: 0.8  # warmup initial momentum
 warmup_bias_lr: 0.1  # warmup initial bias lr
 box: 0.05  # box loss gain
-seg: 0.1 # seg loss
+seg: 1 # seg loss
+det: 1 # Weights all detection losses in the same time (instead of having to change all 3 values)
 cls: 0.3  # cls loss gain
 cls_pw: 1.0  # cls BCELoss positive_weight
 obj: 0.7  # obj loss gain (scale with pixels)
diff --git a/data/hyps/hyp.scratch-high.yaml b/data/hyps/hyp.scratch-high.yaml
index 6faf04dc32d0..4aed3c84e4eb 100644
--- a/data/hyps/hyp.scratch-high.yaml
+++ b/data/hyps/hyp.scratch-high.yaml
@@ -7,11 +7,12 @@ lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
 lrf: 0.1  # final OneCycleLR learning rate (lr0 * lrf)
 momentum: 0.937  # SGD momentum/Adam beta1
 weight_decay: 0.0005  # optimizer weight decay 5e-4
-warmup_epochs: 3.0  # warmup epochs (fractions ok)
+warmup_epochs: 0  # warmup epochs (fractions ok)
 warmup_momentum: 0.8  # warmup initial momentum
 warmup_bias_lr: 0.1  # warmup initial bias lr
 box: 0.05  # box loss gain
-seg: 0.1 # seg loss
+seg: 1 # seg loss
+det: 1 # Weights all detection losses in the same time (instead of having to change all 3 values)
 cls: 0.3  # cls loss gain
 cls_pw: 1.0  # cls BCELoss positive_weight
 obj: 0.7  # obj loss gain (scale with pixels)
diff --git a/data/hyps/hyp.scratch-low.yaml b/data/hyps/hyp.scratch-low.yaml
index d8800248782e..6c5aa03f0dce 100644
--- a/data/hyps/hyp.scratch-low.yaml
+++ b/data/hyps/hyp.scratch-low.yaml
@@ -7,11 +7,12 @@ lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
 lrf: 0.01  # final OneCycleLR learning rate (lr0 * lrf)
 momentum: 0.937  # SGD momentum/Adam beta1
 weight_decay: 0.0005  # optimizer weight decay 5e-4
-warmup_epochs: 3.0  # warmup epochs (fractions ok)
+warmup_epochs: 0  # warmup epochs (fractions ok)
 warmup_momentum: 0.8  # warmup initial momentum
 warmup_bias_lr: 0.1  # warmup initial bias lr
 box: 0.05  # box loss gain
-seg: 0.1 # seg loss
+seg: 1 # seg loss
+det: 1 # Weights all detection losses in the same time (instead of having to change all 3 values)
 cls: 0.5  # cls loss gain
 cls_pw: 1.0  # cls BCELoss positive_weight
 obj: 1.0  # obj loss gain (scale with pixels)
diff --git a/data/hyps/hyp.scratch-med.yaml b/data/hyps/hyp.scratch-med.yaml
index 0c8a577c6aa1..fd7ba128181e 100644
--- a/data/hyps/hyp.scratch-med.yaml
+++ b/data/hyps/hyp.scratch-med.yaml
@@ -7,11 +7,12 @@ lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
 lrf: 0.1  # final OneCycleLR learning rate (lr0 * lrf)
 momentum: 0.937  # SGD momentum/Adam beta1
 weight_decay: 0.0005  # optimizer weight decay 5e-4
-warmup_epochs: 3.0  # warmup epochs (fractions ok)
+warmup_epochs: 0  # warmup epochs (fractions ok)
 warmup_momentum: 0.8  # warmup initial momentum
 warmup_bias_lr: 0.1  # warmup initial bias lr
 box: 0.05  # box loss gain
-seg: 0.1 # seg loss
+seg: 1 # seg loss
+det: 1 # Weights all detection losses in the same time (instead of having to change all 3 values)
 cls: 0.3  # cls loss gain
 cls_pw: 1.0  # cls BCELoss positive_weight
 obj: 0.7  # obj loss gain (scale with pixels)
diff --git a/models/common.py b/models/common.py
index 675fe104d186..95305d84cac7 100644
--- a/models/common.py
+++ b/models/common.py
@@ -863,18 +863,16 @@ def __init__(self, in_channels):
 
     def forward(self, x):
         # print('----entry shape', x.shape, '---\n')
-        # x = self.dropout_weak(x)
         x = self.cv1(x)
         x = self.upsample(x)
         # x = self.relu(x)
         # print('----upsample shape', x.shape, '---\n')
-        # x = self.dropout_normal(x)
         x = self.cv2(x)
         x = self.upsample(x)
 
         # x = self.relu(x)
-        x = self.cv3(x)
         x = self.dropout_normal(x)
+        x = self.cv3(x)
         # print('----out shape', x.shape, '---\n')
         # x = self.sigmoid(x)
         return x
diff --git a/train.py b/train.py
index e73401573a98..5c1b0c962d25 100644
--- a/train.py
+++ b/train.py
@@ -241,7 +241,9 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
     hyp['cls'] *= nc / 80 * 3 / nl  # scale to classes and layers
     hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl  # scale to image size and layers
 
-    hyp['seg'] = 1
+    hyp['box'] *= hyp['det']
+    hyp['cls'] *= hyp['det']
+    hyp['obj'] *= hyp['det']
 
     hyp['label_smoothing'] = opt.label_smoothing
     model.nc = nc  # attach number of classes to model
diff --git a/utils/loss.py b/utils/loss.py
index 0c6570d7bbfd..cdee24b4240d 100644
--- a/utils/loss.py
+++ b/utils/loss.py
@@ -204,7 +204,7 @@ def __call__(self, preds, targets, seg_masks):  # predictions, targets
 
         # return (lbox + lobj + lcls) * bs, torch.cat((lbox, lobj, lcls)).detach()
         # return total_loss, torch.cat((lbox, lobj, lcls, lseg)).detach()
-        return (lbox + lobj + lcls) * bs * 0, lseg * bs * 1, torch.cat((lbox, lobj, lcls, lseg)).detach()
+        return (lbox + lobj + lcls) * bs, lseg * bs, torch.cat((lbox, lobj, lcls, lseg)).detach()
 
     def build_targets(self, p, targets):
         # Build targets for compute_loss(), input targets(image,class,x,y,w,h)