From a113ae46f144d9efcc9ab7d16bf0e01c78dc039f Mon Sep 17 00:00:00 2001 From: kshtiijrajsharma Date: Thu, 18 Apr 2024 15:25:03 +0545 Subject: [PATCH 1/6] Sep api and worker --- backend/core/urls.py | 4 ++-- backend/core/views.py | 32 +++++++++++++++++--------------- backend/requirements.txt | 13 ++++++++----- 3 files changed, 27 insertions(+), 22 deletions(-) diff --git a/backend/core/urls.py b/backend/core/urls.py index c67abd63..d740e4dd 100644 --- a/backend/core/urls.py +++ b/backend/core/urls.py @@ -5,7 +5,7 @@ # now import the views.py file into this code from .views import ( AOIViewSet, - APIStatus, + # APIStatus, ConflateGeojson, DatasetViewSet, FeedbackAOIViewset, @@ -52,7 +52,7 @@ path("training/publish//", publish_training), path("prediction/", PredictionView.as_view()), path("feedback/training/submit/", FeedbackView.as_view()), - path("status/", APIStatus.as_view()), + # path("status/", APIStatus.as_view()), path("geojson2osm/", geojson2osmconverter, name="geojson2osmconverter"), path("conflate/", ConflateGeojson, name="Conflate Geojson"), path("aoi/gpx//", GenerateGpxView.as_view()), diff --git a/backend/core/views.py b/backend/core/views.py index 03f4ffe4..9526cfbe 100644 --- a/backend/core/views.py +++ b/backend/core/views.py @@ -12,7 +12,7 @@ from datetime import datetime from tempfile import NamedTemporaryFile -import tensorflow as tf +# import tensorflow as tf from celery import current_app from celery.result import AsyncResult from django.conf import settings @@ -60,7 +60,8 @@ ModelSerializer, PredictionParamSerializer, ) -from .tasks import train_model +# from .tasks import train_model +from celery import Celery from .utils import get_dir_size, gpx_generator, process_rawdata, request_rawdata @@ -127,9 +128,9 @@ def create(self, validated_data): validated_data["created_by"] = user # create the model instance instance = Training.objects.create(**validated_data) - + celery = Celery() # run your function here - task = train_model.delay( + task = celery.train_model.delay( dataset_id=instance.model.dataset.id, training_id=instance.id, epochs=instance.epochs, @@ -469,8 +470,9 @@ def post(self, request, *args, **kwargs): batch_size=batch_size, source_imagery=training_instance.source_imagery, ) + celery = Celery() - task = train_model.delay( + task = celery.train_model.delay( dataset_id=instance.model.dataset.id, training_id=instance.id, epochs=instance.epochs, @@ -612,16 +614,16 @@ def publish_training(request, training_id: int): return Response("Training Published", status=status.HTTP_201_CREATED) -class APIStatus(APIView): - def get(self, request): - res = { - "tensorflow_version": tf.__version__, - "No of GPU Available": len( - tf.config.experimental.list_physical_devices("GPU") - ), - "API Status": "Healthy", # static for now should be dynamic TODO - } - return Response(res, status=status.HTTP_200_OK) +# class APIStatus(APIView): +# def get(self, request): +# res = { +# "tensorflow_version": tf.__version__, +# "No of GPU Available": len( +# tf.config.experimental.list_physical_devices("GPU") +# ), +# "API Status": "Healthy", # static for now should be dynamic TODO +# } +# return Response(res, status=status.HTTP_200_OK) class GenerateGpxView(APIView): diff --git a/backend/requirements.txt b/backend/requirements.txt index 3ac6c0cd..0cb3549c 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,12 +1,15 @@ django==4.1.4 -# gdal + +gdal==3.6.2 psycopg2 + + djangorestframework==3.14.0 djangorestframework-gis==1.0 dj-database-url==1.2.0 django-leaflet==0.28.3 drf-yasg==1.21.4 -Pillow +# Pillow django-environ==0.9.0 # used for environment django-filter==22.1 django-cors-headers==3.13.0 # used for enabling cors when frontend is hosted on different server / origin @@ -19,7 +22,7 @@ validators==0.20.0 gpxpy==1.5.0 hot-fair-utilities==1.2.3 geojson2osm==0.0.1 -osmconflator -orthogonalizer +osmconflator==0.0.9 +orthogonalizer==0.0.4 fairpredictor==0.0.26 -tflite-runtime==2.14.0 \ No newline at end of file +tflite-runtime==2.14.0 From 79d14e5a9fb0f9f7181a89af0d1125abaf63c8fe Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Thu, 30 May 2024 11:59:22 +0545 Subject: [PATCH 2/6] Add readme for API requirements --- backend/README.md | 24 +++++++----------------- backend/api-requirements.txt | 23 +++++++++++++++++++++++ backend/requirements.txt | 28 +--------------------------- backend/sample_env | 1 + 4 files changed, 32 insertions(+), 44 deletions(-) create mode 100644 backend/api-requirements.txt diff --git a/backend/README.md b/backend/README.md index 2e88a113..392dcc44 100644 --- a/backend/README.md +++ b/backend/README.md @@ -14,33 +14,27 @@ This project was bootstrapped with [Geodjango Template](https://github.com/itsk source ./env/bin/activate ##### Setup Basemodels (Ramp Supported Currently) +- Install git lfs +```bash +sudo apt-get install git-lfs +``` + - Clone Ramp Basemodel ``` git clone https://github.com/radiantearth/model_ramp_baseline.git ``` -OR Download from google drive -``` -pip install gdown -gdown --fuzzy https://drive.google.com/file/d/1wvJhkiOrSlHmmvJ0avkAdu9sslFf5_I0/view?usp=sharing -``` - Clone Ramp - Code Note: This clone location will be your RAMP_HOME ``` git clone https://github.com/kshitijrajsharma/ramp-code-fAIr.git ramp-code ``` + - Copy Basemodel checkpoint to ramp-code ``` cp -r model_ramp_baseline/data/input/checkpoint.tf ramp-code/ramp/checkpoint.tf ``` -Our Basemodel is available for public download [here](https://drive.google.com/file/d/1wvJhkiOrSlHmmvJ0avkAdu9sslFf5_I0/view?usp=sharing) - -You can unzip and move the downloaded basemodel -``` -unzip checkpoint.tf.zip -d ramp-code/ramp -``` - - Remove basemodel repo we don't need it anymore ``` @@ -136,11 +130,7 @@ pip install -r requirements.txt You will need more env variables (Such as Ramp home, Training Home) that can be found on ```.sample_env``` #### Now change your username, password and db name in settings.py accordingly to your database - python manage.py makemigrations login - python manage.py migrate login - python manage.py makemigrations core - python manage.py migrate core - python manage.py makemigrations + python manage.py makemigrations login core python manage.py migrate python manage.py runserver ### Now server will be available in your 8000 port on web, you can check out your localhost:8000/admin for admin panel diff --git a/backend/api-requirements.txt b/backend/api-requirements.txt new file mode 100644 index 00000000..47dda210 --- /dev/null +++ b/backend/api-requirements.txt @@ -0,0 +1,23 @@ +django==4.1.4 +# gdal==3.6.2 +# psycopg2==2.9.9 +djangorestframework==3.14.0 +djangorestframework-gis==1.0 +dj-database-url==1.2.0 +django-leaflet==0.28.3 +drf-yasg==1.21.4 +django-environ==0.9.0 # used for environment +django-filter==22.1 +django-cors-headers==3.13.0 # used for enabling cors when frontend is hosted on different server / origin +osm-login-python==0.0.2 +celery==5.2.7 +redis==4.4.0 +django_celery_results==2.4.0 +flower==1.2.0 +validators==0.20.0 +gpxpy==1.5.0 +geojson2osm==0.0.1 +osmconflator==0.0.9 +orthogonalizer==0.0.4 +fairpredictor==0.0.26 +tflite-runtime==2.14.0 \ No newline at end of file diff --git a/backend/requirements.txt b/backend/requirements.txt index 0cb3549c..97733fef 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,28 +1,2 @@ -django==4.1.4 - -gdal==3.6.2 -psycopg2 - - -djangorestframework==3.14.0 -djangorestframework-gis==1.0 -dj-database-url==1.2.0 -django-leaflet==0.28.3 -drf-yasg==1.21.4 -# Pillow -django-environ==0.9.0 # used for environment -django-filter==22.1 -django-cors-headers==3.13.0 # used for enabling cors when frontend is hosted on different server / origin -osm-login-python==0.0.2 -celery==5.2.7 -redis==4.4.0 -django_celery_results==2.4.0 -flower==1.2.0 -validators==0.20.0 -gpxpy==1.5.0 +-r api-requirements.txt hot-fair-utilities==1.2.3 -geojson2osm==0.0.1 -osmconflator==0.0.9 -orthogonalizer==0.0.4 -fairpredictor==0.0.26 -tflite-runtime==2.14.0 diff --git a/backend/sample_env b/backend/sample_env index 21e8bae9..a47d9ae5 100644 --- a/backend/sample_env +++ b/backend/sample_env @@ -1,3 +1,4 @@ +DEBUG=True SECRET_KEY=yl2w)c0boi_ma-1v5)935^2#&m*r!1s9z9^*9e5co^08_ixzo6 DATABASE_URL=postgis://admin:password@localhost:5432/ai EXPORT_TOOL_API_URL=MY_RAW_DATA_URL From afc23a766bc706349896ba8ca440c0238217fa7d Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Thu, 30 May 2024 12:15:56 +0545 Subject: [PATCH 3/6] Enable psycopg2 --- backend/api-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/api-requirements.txt b/backend/api-requirements.txt index 47dda210..0b047bea 100644 --- a/backend/api-requirements.txt +++ b/backend/api-requirements.txt @@ -1,6 +1,6 @@ django==4.1.4 # gdal==3.6.2 -# psycopg2==2.9.9 +psycopg2 djangorestframework==3.14.0 djangorestframework-gis==1.0 dj-database-url==1.2.0 From 7e5bb7e468158829e26f05149b444008823b6c17 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Thu, 30 May 2024 14:06:47 +0545 Subject: [PATCH 4/6] add chips length in models and training --- backend/core/models.py | 1 + backend/core/tasks.py | 58 ++++++++++++++++++++++++++++++------------ 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/backend/core/models.py b/backend/core/models.py index 5ad0e284..4b054ea6 100644 --- a/backend/core/models.py +++ b/backend/core/models.py @@ -84,6 +84,7 @@ class Training(models.Model): finished_at = models.DateTimeField(null=True, blank=True) accuracy = models.FloatField(null=True, blank=True) epochs = models.PositiveIntegerField() + chips_length = models.PositiveIntegerField(default=0) batch_size = models.PositiveIntegerField() freeze_layers = models.BooleanField(default=False) diff --git a/backend/core/tasks.py b/backend/core/tasks.py index a3ec613b..8fbca094 100644 --- a/backend/core/tasks.py +++ b/backend/core/tasks.py @@ -3,23 +3,14 @@ import os import shutil import sys +import tarfile import traceback from shutil import rmtree -import tarfile import hot_fair_utilities import ramp.utils import tensorflow as tf from celery import shared_task -from django.conf import settings -from django.contrib.gis.db.models.aggregates import Extent -from django.contrib.gis.geos import GEOSGeometry -from django.shortcuts import get_object_or_404 -from django.utils import timezone -from hot_fair_utilities import preprocess, train -from hot_fair_utilities.training import run_feedback -from predictor import download_imagery, get_start_end_download_coords - from core.models import AOI, Feedback, FeedbackAOI, FeedbackLabel, Label, Training from core.serializers import ( AOISerializer, @@ -29,6 +20,14 @@ LabelFileSerializer, ) from core.utils import bbox, is_dir_empty +from django.conf import settings +from django.contrib.gis.db.models.aggregates import Extent +from django.contrib.gis.geos import GEOSGeometry +from django.shortcuts import get_object_or_404 +from django.utils import timezone +from hot_fair_utilities import preprocess, train +from hot_fair_utilities.training import run_feedback +from predictor import download_imagery, get_start_end_download_coords logger = logging.getLogger(__name__) @@ -37,6 +36,7 @@ DEFAULT_TILE_SIZE = 256 + def xz_folder(folder_path, output_filename, remove_original=False): """ Compresses a folder and its contents into a .tar.xz file and optionally removes the original folder. @@ -47,8 +47,8 @@ def xz_folder(folder_path, output_filename, remove_original=False): - remove_original: If True, the original folder is removed after compression. """ - if not output_filename.endswith('.tar.xz'): - output_filename += '.tar.xz' + if not output_filename.endswith(".tar.xz"): + output_filename += ".tar.xz" with tarfile.open(output_filename, "w:xz") as tar: tar.add(folder_path, arcname=os.path.basename(folder_path)) @@ -57,6 +57,20 @@ def xz_folder(folder_path, output_filename, remove_original=False): shutil.rmtree(folder_path) +def get_file_count(path): + try: + return len( + [ + entry + for entry in os.listdir(path) + if os.path.isfile(os.path.join(path, entry)) + ] + ) + except Exception as e: + print(f"An error occurred: {e}") + return 0 + + @shared_task def train_model( dataset_id, @@ -189,7 +203,9 @@ def train_model( rasterize_options=["binary"], georeference_images=True, ) - + training_instance.chips_length = get_file_count( + os.path.join(preprocess_output, "chips") + ) # train train_output = f"{base_path}/train" @@ -272,9 +288,19 @@ def train_model( f.write(json.dumps(aoi_serializer.data)) # copy aois and labels to preprocess output before compressing it to tar - shutil.copyfile(os.path.join(output_path, "aois.geojson"), os.path.join(preprocess_output,'aois.geojson')) - shutil.copyfile(os.path.join(output_path, "labels.geojson"), os.path.join(preprocess_output,'labels.geojson')) - xz_folder(preprocess_output, os.path.join(output_path, "preprocessed.tar.xz"), remove_original=True) + shutil.copyfile( + os.path.join(output_path, "aois.geojson"), + os.path.join(preprocess_output, "aois.geojson"), + ) + shutil.copyfile( + os.path.join(output_path, "labels.geojson"), + os.path.join(preprocess_output, "labels.geojson"), + ) + xz_folder( + preprocess_output, + os.path.join(output_path, "preprocessed.tar.xz"), + remove_original=True, + ) # now remove the ramp-data all our outputs are copied to our training workspace shutil.rmtree(base_path) From d10c6470087848b36ac68341569de8ff9cf0b6d9 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Thu, 30 May 2024 09:36:51 +0000 Subject: [PATCH 5/6] Restrict using native celery function instead import the lower level function --- backend/core/views.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/backend/core/views.py b/backend/core/views.py index a2163ac9..d920cd40 100644 --- a/backend/core/views.py +++ b/backend/core/views.py @@ -60,8 +60,7 @@ ModelSerializer, PredictionParamSerializer, ) -# from .tasks import train_model -from celery import Celery +from .tasks import train_model from .utils import get_dir_size, gpx_generator, process_rawdata, request_rawdata @@ -129,10 +128,8 @@ def create(self, validated_data): # create the model instance instance = Training.objects.create(**validated_data) - celery = Celery() - # run your function here - task = celery.train_model.delay( + task = train_model.delay( dataset_id=instance.model.dataset.id, training_id=instance.id, epochs=instance.epochs, @@ -474,9 +471,7 @@ def post(self, request, *args, **kwargs): batch_size=batch_size, source_imagery=training_instance.source_imagery, ) - celery = Celery() - - task = celery.train_model.delay( + task = train_model.delay( dataset_id=instance.model.dataset.id, training_id=instance.id, epochs=instance.epochs, From e784173073361550fab0d67e0f5dcff502a01076 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Thu, 30 May 2024 09:41:55 +0000 Subject: [PATCH 6/6] tasks - save chip length when preprocessing steps is finished --- backend/core/tasks.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/core/tasks.py b/backend/core/tasks.py index 8fbca094..bfbf7c3b 100644 --- a/backend/core/tasks.py +++ b/backend/core/tasks.py @@ -206,6 +206,8 @@ def train_model( training_instance.chips_length = get_file_count( os.path.join(preprocess_output, "chips") ) + training_instance.save() + # train train_output = f"{base_path}/train"