From 0d0dd2eea0d97b088c8f3cea638a35149a9443e8 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Thu, 30 May 2024 14:06:47 +0545 Subject: [PATCH 1/3] add chips length in models and training --- backend/core/models.py | 1 + backend/core/tasks.py | 58 ++++++++++++++++++++++++++++++------------ 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/backend/core/models.py b/backend/core/models.py index 5ad0e284..4b054ea6 100644 --- a/backend/core/models.py +++ b/backend/core/models.py @@ -84,6 +84,7 @@ class Training(models.Model): finished_at = models.DateTimeField(null=True, blank=True) accuracy = models.FloatField(null=True, blank=True) epochs = models.PositiveIntegerField() + chips_length = models.PositiveIntegerField(default=0) batch_size = models.PositiveIntegerField() freeze_layers = models.BooleanField(default=False) diff --git a/backend/core/tasks.py b/backend/core/tasks.py index a3ec613b..8fbca094 100644 --- a/backend/core/tasks.py +++ b/backend/core/tasks.py @@ -3,23 +3,14 @@ import os import shutil import sys +import tarfile import traceback from shutil import rmtree -import tarfile import hot_fair_utilities import ramp.utils import tensorflow as tf from celery import shared_task -from django.conf import settings -from django.contrib.gis.db.models.aggregates import Extent -from django.contrib.gis.geos import GEOSGeometry -from django.shortcuts import get_object_or_404 -from django.utils import timezone -from hot_fair_utilities import preprocess, train -from hot_fair_utilities.training import run_feedback -from predictor import download_imagery, get_start_end_download_coords - from core.models import AOI, Feedback, FeedbackAOI, FeedbackLabel, Label, Training from core.serializers import ( AOISerializer, @@ -29,6 +20,14 @@ LabelFileSerializer, ) from core.utils import bbox, is_dir_empty +from django.conf import settings +from django.contrib.gis.db.models.aggregates import Extent +from django.contrib.gis.geos import GEOSGeometry +from django.shortcuts import get_object_or_404 +from django.utils import timezone +from hot_fair_utilities import preprocess, train +from hot_fair_utilities.training import run_feedback +from predictor import download_imagery, get_start_end_download_coords logger = logging.getLogger(__name__) @@ -37,6 +36,7 @@ DEFAULT_TILE_SIZE = 256 + def xz_folder(folder_path, output_filename, remove_original=False): """ Compresses a folder and its contents into a .tar.xz file and optionally removes the original folder. @@ -47,8 +47,8 @@ def xz_folder(folder_path, output_filename, remove_original=False): - remove_original: If True, the original folder is removed after compression. """ - if not output_filename.endswith('.tar.xz'): - output_filename += '.tar.xz' + if not output_filename.endswith(".tar.xz"): + output_filename += ".tar.xz" with tarfile.open(output_filename, "w:xz") as tar: tar.add(folder_path, arcname=os.path.basename(folder_path)) @@ -57,6 +57,20 @@ def xz_folder(folder_path, output_filename, remove_original=False): shutil.rmtree(folder_path) +def get_file_count(path): + try: + return len( + [ + entry + for entry in os.listdir(path) + if os.path.isfile(os.path.join(path, entry)) + ] + ) + except Exception as e: + print(f"An error occurred: {e}") + return 0 + + @shared_task def train_model( dataset_id, @@ -189,7 +203,9 @@ def train_model( rasterize_options=["binary"], georeference_images=True, ) - + training_instance.chips_length = get_file_count( + os.path.join(preprocess_output, "chips") + ) # train train_output = f"{base_path}/train" @@ -272,9 +288,19 @@ def train_model( f.write(json.dumps(aoi_serializer.data)) # copy aois and labels to preprocess output before compressing it to tar - shutil.copyfile(os.path.join(output_path, "aois.geojson"), os.path.join(preprocess_output,'aois.geojson')) - shutil.copyfile(os.path.join(output_path, "labels.geojson"), os.path.join(preprocess_output,'labels.geojson')) - xz_folder(preprocess_output, os.path.join(output_path, "preprocessed.tar.xz"), remove_original=True) + shutil.copyfile( + os.path.join(output_path, "aois.geojson"), + os.path.join(preprocess_output, "aois.geojson"), + ) + shutil.copyfile( + os.path.join(output_path, "labels.geojson"), + os.path.join(preprocess_output, "labels.geojson"), + ) + xz_folder( + preprocess_output, + os.path.join(output_path, "preprocessed.tar.xz"), + remove_original=True, + ) # now remove the ramp-data all our outputs are copied to our training workspace shutil.rmtree(base_path) From 3e5116bb0bdb6ceea5609ef056833d64ac46812e Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Thu, 30 May 2024 09:36:51 +0000 Subject: [PATCH 2/3] Restrict using native celery function instead import the lower level function --- backend/core/views.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/backend/core/views.py b/backend/core/views.py index a2163ac9..d920cd40 100644 --- a/backend/core/views.py +++ b/backend/core/views.py @@ -60,8 +60,7 @@ ModelSerializer, PredictionParamSerializer, ) -# from .tasks import train_model -from celery import Celery +from .tasks import train_model from .utils import get_dir_size, gpx_generator, process_rawdata, request_rawdata @@ -129,10 +128,8 @@ def create(self, validated_data): # create the model instance instance = Training.objects.create(**validated_data) - celery = Celery() - # run your function here - task = celery.train_model.delay( + task = train_model.delay( dataset_id=instance.model.dataset.id, training_id=instance.id, epochs=instance.epochs, @@ -474,9 +471,7 @@ def post(self, request, *args, **kwargs): batch_size=batch_size, source_imagery=training_instance.source_imagery, ) - celery = Celery() - - task = celery.train_model.delay( + task = train_model.delay( dataset_id=instance.model.dataset.id, training_id=instance.id, epochs=instance.epochs, From 2a7930e24d14b885a7f032b6c906a8895d9b3f2f Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Thu, 30 May 2024 09:41:55 +0000 Subject: [PATCH 3/3] tasks - save chip length when preprocessing steps is finished --- backend/core/tasks.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/core/tasks.py b/backend/core/tasks.py index 8fbca094..bfbf7c3b 100644 --- a/backend/core/tasks.py +++ b/backend/core/tasks.py @@ -206,6 +206,8 @@ def train_model( training_instance.chips_length = get_file_count( os.path.join(preprocess_output, "chips") ) + training_instance.save() + # train train_output = f"{base_path}/train"