From 0d0dd2eea0d97b088c8f3cea638a35149a9443e8 Mon Sep 17 00:00:00 2001
From: kshitijrajsharma <skshitizraj@gmail.com>
Date: Thu, 30 May 2024 14:06:47 +0545
Subject: [PATCH 1/3] add chips length in models and training

---
 backend/core/models.py |  1 +
 backend/core/tasks.py  | 58 ++++++++++++++++++++++++++++++------------
 2 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/backend/core/models.py b/backend/core/models.py
index 5ad0e284..4b054ea6 100644
--- a/backend/core/models.py
+++ b/backend/core/models.py
@@ -84,6 +84,7 @@ class Training(models.Model):
     finished_at = models.DateTimeField(null=True, blank=True)
     accuracy = models.FloatField(null=True, blank=True)
     epochs = models.PositiveIntegerField()
+    chips_length = models.PositiveIntegerField(default=0)
     batch_size = models.PositiveIntegerField()
     freeze_layers = models.BooleanField(default=False)
 
diff --git a/backend/core/tasks.py b/backend/core/tasks.py
index a3ec613b..8fbca094 100644
--- a/backend/core/tasks.py
+++ b/backend/core/tasks.py
@@ -3,23 +3,14 @@
 import os
 import shutil
 import sys
+import tarfile
 import traceback
 from shutil import rmtree
-import tarfile
 
 import hot_fair_utilities
 import ramp.utils
 import tensorflow as tf
 from celery import shared_task
-from django.conf import settings
-from django.contrib.gis.db.models.aggregates import Extent
-from django.contrib.gis.geos import GEOSGeometry
-from django.shortcuts import get_object_or_404
-from django.utils import timezone
-from hot_fair_utilities import preprocess, train
-from hot_fair_utilities.training import run_feedback
-from predictor import download_imagery, get_start_end_download_coords
-
 from core.models import AOI, Feedback, FeedbackAOI, FeedbackLabel, Label, Training
 from core.serializers import (
     AOISerializer,
@@ -29,6 +20,14 @@
     LabelFileSerializer,
 )
 from core.utils import bbox, is_dir_empty
+from django.conf import settings
+from django.contrib.gis.db.models.aggregates import Extent
+from django.contrib.gis.geos import GEOSGeometry
+from django.shortcuts import get_object_or_404
+from django.utils import timezone
+from hot_fair_utilities import preprocess, train
+from hot_fair_utilities.training import run_feedback
+from predictor import download_imagery, get_start_end_download_coords
 
 logger = logging.getLogger(__name__)
 
@@ -37,6 +36,7 @@
 
 DEFAULT_TILE_SIZE = 256
 
+
 def xz_folder(folder_path, output_filename, remove_original=False):
     """
     Compresses a folder and its contents into a .tar.xz file and optionally removes the original folder.
@@ -47,8 +47,8 @@ def xz_folder(folder_path, output_filename, remove_original=False):
     - remove_original: If True, the original folder is removed after compression.
     """
 
-    if not output_filename.endswith('.tar.xz'):
-        output_filename += '.tar.xz'
+    if not output_filename.endswith(".tar.xz"):
+        output_filename += ".tar.xz"
 
     with tarfile.open(output_filename, "w:xz") as tar:
         tar.add(folder_path, arcname=os.path.basename(folder_path))
@@ -57,6 +57,20 @@ def xz_folder(folder_path, output_filename, remove_original=False):
         shutil.rmtree(folder_path)
 
 
+def get_file_count(path):
+    try:
+        return len(
+            [
+                entry
+                for entry in os.listdir(path)
+                if os.path.isfile(os.path.join(path, entry))
+            ]
+        )
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        return 0
+
+
 @shared_task
 def train_model(
     dataset_id,
@@ -189,7 +203,9 @@ def train_model(
                 rasterize_options=["binary"],
                 georeference_images=True,
             )
-
+            training_instance.chips_length = get_file_count(
+                os.path.join(preprocess_output, "chips")
+            )
             # train
 
             train_output = f"{base_path}/train"
@@ -272,9 +288,19 @@ def train_model(
                 f.write(json.dumps(aoi_serializer.data))
 
             # copy aois and labels to preprocess output before compressing it to tar
-            shutil.copyfile(os.path.join(output_path, "aois.geojson"), os.path.join(preprocess_output,'aois.geojson'))
-            shutil.copyfile(os.path.join(output_path, "labels.geojson"), os.path.join(preprocess_output,'labels.geojson'))
-            xz_folder(preprocess_output, os.path.join(output_path, "preprocessed.tar.xz"), remove_original=True)
+            shutil.copyfile(
+                os.path.join(output_path, "aois.geojson"),
+                os.path.join(preprocess_output, "aois.geojson"),
+            )
+            shutil.copyfile(
+                os.path.join(output_path, "labels.geojson"),
+                os.path.join(preprocess_output, "labels.geojson"),
+            )
+            xz_folder(
+                preprocess_output,
+                os.path.join(output_path, "preprocessed.tar.xz"),
+                remove_original=True,
+            )
 
             # now remove the ramp-data all our outputs are copied to our training workspace
             shutil.rmtree(base_path)

From 3e5116bb0bdb6ceea5609ef056833d64ac46812e Mon Sep 17 00:00:00 2001
From: kshitijrajsharma <skshitizraj@gmail.com>
Date: Thu, 30 May 2024 09:36:51 +0000
Subject: [PATCH 2/3] Restrict using native celery function instead import the
 lower level function

---
 backend/core/views.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/backend/core/views.py b/backend/core/views.py
index a2163ac9..d920cd40 100644
--- a/backend/core/views.py
+++ b/backend/core/views.py
@@ -60,8 +60,7 @@
     ModelSerializer,
     PredictionParamSerializer,
 )
-# from .tasks import train_model
-from celery import Celery
+from .tasks import train_model
 from .utils import get_dir_size, gpx_generator, process_rawdata, request_rawdata
 
 
@@ -129,10 +128,8 @@ def create(self, validated_data):
         # create the model instance
         instance = Training.objects.create(**validated_data)
 
-        celery = Celery()
-
         # run your function here
-        task = celery.train_model.delay(
+        task = train_model.delay(
             dataset_id=instance.model.dataset.id,
             training_id=instance.id,
             epochs=instance.epochs,
@@ -474,9 +471,7 @@ def post(self, request, *args, **kwargs):
                 batch_size=batch_size,
                 source_imagery=training_instance.source_imagery,
             )
-            celery = Celery()
-
-            task = celery.train_model.delay(
+            task = train_model.delay(
                 dataset_id=instance.model.dataset.id,
                 training_id=instance.id,
                 epochs=instance.epochs,

From 2a7930e24d14b885a7f032b6c906a8895d9b3f2f Mon Sep 17 00:00:00 2001
From: kshitijrajsharma <skshitizraj@gmail.com>
Date: Thu, 30 May 2024 09:41:55 +0000
Subject: [PATCH 3/3] tasks - save chip length when preprocessing steps is
 finished

---
 backend/core/tasks.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/backend/core/tasks.py b/backend/core/tasks.py
index 8fbca094..bfbf7c3b 100644
--- a/backend/core/tasks.py
+++ b/backend/core/tasks.py
@@ -206,6 +206,8 @@ def train_model(
             training_instance.chips_length = get_file_count(
                 os.path.join(preprocess_output, "chips")
             )
+            training_instance.save()
+
             # train
 
             train_output = f"{base_path}/train"