Skip to content

Commit

Permalink
Merge pull request #221 from hotosm/feature/precompressed-training
Browse files Browse the repository at this point in the history
Feature : Tar .xz Compression
  • Loading branch information
omranlm authored Feb 19, 2024
2 parents fbb4c1b + da87296 commit 4504e0d
Showing 1 changed file with 28 additions and 3 deletions.
31 changes: 28 additions & 3 deletions backend/core/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import sys
import traceback
from shutil import rmtree
import tarfile

import hot_fair_utilities
import ramp.utils
Expand Down Expand Up @@ -36,6 +37,25 @@

DEFAULT_TILE_SIZE = 256

def xz_folder(folder_path, output_filename, remove_original=False):
"""
Compresses a folder and its contents into a .tar.xz file and optionally removes the original folder.
Parameters:
- folder_path: The path to the folder to compress.
- output_filename: The name of the output .tar.xz file.
- remove_original: If True, the original folder is removed after compression.
"""

if not output_filename.endswith('.tar.xz'):
output_filename += '.tar.xz'

with tarfile.open(output_filename, "w:xz") as tar:
tar.add(folder_path, arcname=os.path.basename(folder_path))

if remove_original:
shutil.rmtree(folder_path)


@shared_task
def train_model(
Expand Down Expand Up @@ -210,9 +230,9 @@ def train_model(
final_model_path, os.path.join(output_path, "checkpoint.tf")
)

shutil.copytree(
preprocess_output, os.path.join(output_path, "preprocessed")
)
# shutil.copytree(
# preprocess_output, os.path.join(output_path, "preprocessed")
# )

graph_output_path = f"{base_path}/train/graphs"
shutil.copytree(graph_output_path, os.path.join(output_path, "graphs"))
Expand Down Expand Up @@ -251,6 +271,11 @@ def train_model(
) as f:
f.write(json.dumps(aoi_serializer.data))

# copy aois and labels to preprocess output before compressing it to tar
shutil.copyfile(os.path.join(output_path, "aois.geojson"), os.path.join(preprocess_output,'aois.geojson'))
shutil.copyfile(os.path.join(output_path, "labels.geojson"), os.path.join(preprocess_output,'labels.geojson'))
xz_folder(preprocess_output, os.path.join(output_path, "preprocessed.tar.xz"), remove_original=True)

# now remove the ramp-data all our outputs are copied to our training workspace
shutil.rmtree(base_path)
training_instance.accuracy = float(final_accuracy)
Expand Down

0 comments on commit 4504e0d

Please sign in to comment.