Skip to content

Commit

Permalink
feat: standardize output tree (#746)
Browse files Browse the repository at this point in the history
  • Loading branch information
anesson-cs authored Feb 22, 2024
1 parent dd9a2ef commit fbf81ca
Show file tree
Hide file tree
Showing 25 changed files with 1,041 additions and 712 deletions.
431 changes: 133 additions & 298 deletions docs/notebooks/api_user_guide/1_overview.ipynb

Large diffs are not rendered by default.

245 changes: 129 additions & 116 deletions docs/notebooks/api_user_guide/7_download.ipynb

Large diffs are not rendered by default.

213 changes: 134 additions & 79 deletions docs/notebooks/api_user_guide/8_post_process.ipynb

Large diffs are not rendered by default.

Large diffs are not rendered by default.

80 changes: 47 additions & 33 deletions docs/notebooks/tutos/tuto_cds.ipynb

Large diffs are not rendered by default.

54 changes: 27 additions & 27 deletions docs/notebooks/tutos/tuto_cop_dem.ipynb

Large diffs are not rendered by default.

96 changes: 65 additions & 31 deletions docs/notebooks/tutos/tuto_ecmwf.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions eodag/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ class OrderStatusOnSuccess(TypedDict):
base_uri: str
outputs_prefix: str
extract: bool
outputs_extension: str
order_enabled: bool # HTTPDownload
order_method: str # HTTPDownload
order_headers: Dict[str, str] # HTTPDownload
Expand Down
13 changes: 11 additions & 2 deletions eodag/plugins/apis/ecmwf.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from __future__ import annotations

import logging
import os
from datetime import datetime
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple

Expand All @@ -37,6 +38,7 @@
DEFAULT_PAGE,
get_geometry_from_various,
path_to_uri,
sanitize,
urlsplit,
)
from eodag.utils.exceptions import AuthenticationError, DownloadError
Expand Down Expand Up @@ -179,6 +181,13 @@ def download(
product.location = path_to_uri(fs_path)
return fs_path

new_fs_path = os.path.join(
os.path.dirname(fs_path), sanitize(product.properties["title"])
)
if not os.path.isdir(new_fs_path):
os.makedirs(new_fs_path)
fs_path = os.path.join(new_fs_path, os.path.basename(fs_path))

# get download request dict from product.location/downloadLink url query string
# separate url & parameters
download_request = geojson.loads(urlsplit(product.location).query)
Expand Down Expand Up @@ -222,11 +231,11 @@ def download(
fh.write(product.properties["downloadLink"])
logger.debug("Download recorded in %s", record_filename)

# do not try to extract or delete grib/netcdf
# do not try to extract a directory
kwargs["extract"] = False

product_path = self._finalize(
fs_path,
new_fs_path,
progress_callback=progress_callback,
outputs_extension=f".{product_extension}",
**kwargs,
Expand Down
96 changes: 31 additions & 65 deletions eodag/plugins/download/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def download(
SAFE-build is configured for a given provider and product type.
If the product title is configured to be updated during download and
SAFE-formatted, its destination path will be:
`{outputs_prefix}/{title}/{updated_title}.SAFE`
`{outputs_prefix}/{title}`
:param product: The EO product to download
:type product: :class:`~eodag.api.product._product.EOProduct`
Expand Down Expand Up @@ -1154,8 +1154,7 @@ def get_chunk_dest_path(
# S2 L2A Tile files -----------------------------------------------
if matched := S2L2A_TILE_IMG_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/GRANULE/%s/IMG_DATA/R%s/T%s%s%s_%s_%s_%s.jp2" % (
product.properties["title"],
product_path = "GRANULE/%s/IMG_DATA/R%s/T%s%s%s_%s_%s_%s.jp2" % (
found_dict["num"],
found_dict["res"],
found_dict["tile1"],
Expand All @@ -1167,41 +1166,36 @@ def get_chunk_dest_path(
)
elif matched := S2L2A_TILE_AUX_DIR_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/GRANULE/%s/AUX_DATA/%s" % (
product.properties["title"],
product_path = "GRANULE/%s/AUX_DATA/%s" % (
found_dict["num"],
found_dict["file"],
)
# S2 L2A QI Masks
elif matched := S2_TILE_QI_MSK_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/GRANULE/%s/QI_DATA/MSK_%sPRB_%s" % (
product.properties["title"],
product_path = "GRANULE/%s/QI_DATA/MSK_%sPRB_%s" % (
found_dict["num"],
found_dict["file_base"],
found_dict["file_suffix"],
)
# S2 L2A QI PVI
elif matched := S2_TILE_QI_PVI_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/GRANULE/%s/QI_DATA/%s_%s_PVI.jp2" % (
product.properties["title"],
product_path = "GRANULE/%s/QI_DATA/%s_%s_PVI.jp2" % (
found_dict["num"],
title_part3,
title_date1,
)
# S2 Tile files ---------------------------------------------------
elif matched := S2_TILE_PREVIEW_DIR_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/GRANULE/%s/preview/%s" % (
product.properties["title"],
product_path = "GRANULE/%s/preview/%s" % (
found_dict["num"],
found_dict["file"],
)
elif matched := S2_TILE_IMG_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/GRANULE/%s/IMG_DATA/T%s%s%s_%s_%s" % (
product.properties["title"],
product_path = "GRANULE/%s/IMG_DATA/T%s%s%s_%s_%s" % (
found_dict["num"],
found_dict["tile1"],
found_dict["tile2"],
Expand All @@ -1211,97 +1205,74 @@ def get_chunk_dest_path(
)
elif matched := S2_TILE_THUMBNAIL_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/GRANULE/%s/%s" % (
product.properties["title"],
product_path = "GRANULE/%s/%s" % (
found_dict["num"],
found_dict["file"],
)
elif matched := S2_TILE_MTD_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/GRANULE/%s/MTD_TL.xml" % (
product.properties["title"],
found_dict["num"],
)
product_path = "GRANULE/%s/MTD_TL.xml" % found_dict["num"]
elif matched := S2_TILE_AUX_DIR_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/GRANULE/%s/AUX_DATA/AUX_%s" % (
product.properties["title"],
product_path = "GRANULE/%s/AUX_DATA/AUX_%s" % (
found_dict["num"],
found_dict["file"],
)
elif matched := S2_TILE_QI_DIR_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/GRANULE/%s/QI_DATA/%s" % (
product.properties["title"],
product_path = "GRANULE/%s/QI_DATA/%s" % (
found_dict["num"],
found_dict["file"],
)
# S2 Tiles generic
elif matched := S2_TILE_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/GRANULE/%s/%s" % (
product.properties["title"],
product_path = "GRANULE/%s/%s" % (
found_dict["num"],
found_dict["file"],
)
# S2 Product files
elif matched := S2_PROD_DS_MTD_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/DATASTRIP/%s/MTD_DS.xml" % (
product.properties["title"],
ds_dir,
)
product_path = "DATASTRIP/%s/MTD_DS.xml" % ds_dir
elif matched := S2_PROD_DS_QI_REPORT_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/DATASTRIP/%s/QI_DATA/%s.xml" % (
product.properties["title"],
product_path = "DATASTRIP/%s/QI_DATA/%s.xml" % (
ds_dir,
found_dict["filename"],
)
elif matched := S2_PROD_DS_QI_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/DATASTRIP/%s/QI_DATA/%s" % (
product.properties["title"],
product_path = "DATASTRIP/%s/QI_DATA/%s" % (
ds_dir,
found_dict["file"],
)
elif matched := S2_PROD_INSPIRE_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/INSPIRE.xml" % (product.properties["title"],)
product_path = "INSPIRE.xml"
elif matched := S2_PROD_MTD_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/MTD_MSI%s.xml" % (
product.properties["title"],
s2_processing_level,
)
product_path = "MTD_MSI%s.xml" % s2_processing_level
# S2 Product generic
elif matched := S2_PROD_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/%s" % (
product.properties["title"],
found_dict["file"],
)
product_path = "%s" % found_dict["file"]
# S1 --------------------------------------------------------------
elif matched := S1_CALIB_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = (
"%s.SAFE/annotation/calibration/%s-%s-%s-grd-%s-%s-%03d.xml"
% (
product.properties["title"],
found_dict["file_prefix"],
product.properties["platformSerialIdentifier"].lower(),
found_dict["file_beam"],
found_dict["file_pol"],
s1_title_suffix,
S1_IMG_NB_PER_POLAR.get(
product.properties["polarizationMode"], {}
).get(found_dict["file_pol"].upper(), 1),
)
product_path = "annotation/calibration/%s-%s-%s-grd-%s-%s-%03d.xml" % (
found_dict["file_prefix"],
product.properties["platformSerialIdentifier"].lower(),
found_dict["file_beam"],
found_dict["file_pol"],
s1_title_suffix,
S1_IMG_NB_PER_POLAR.get(product.properties["polarizationMode"], {}).get(
found_dict["file_pol"].upper(), 1
),
)
elif matched := S1_ANNOT_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/annotation/%s-%s-grd-%s-%s-%03d.xml" % (
product.properties["title"],
product_path = "annotation/%s-%s-grd-%s-%s-%03d.xml" % (
product.properties["platformSerialIdentifier"].lower(),
found_dict["file_beam"],
found_dict["file_pol"],
Expand All @@ -1312,8 +1283,7 @@ def get_chunk_dest_path(
)
elif matched := S1_MEAS_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/measurement/%s-%s-grd-%s-%s-%03d.%s" % (
product.properties["title"],
product_path = "measurement/%s-%s-grd-%s-%s-%03d.%s" % (
product.properties["platformSerialIdentifier"].lower(),
found_dict["file_beam"],
found_dict["file_pol"],
Expand All @@ -1325,18 +1295,14 @@ def get_chunk_dest_path(
)
elif matched := S1_REPORT_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/%s.SAFE-%s" % (
product.properties["title"],
product_path = "%s.SAFE-%s" % (
product.properties["title"],
found_dict["file"],
)
# S1 generic
elif matched := S1_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/%s" % (
product.properties["title"],
found_dict["file"],
)
product_path = "%s" % found_dict["file"]
# out of SAFE format
else:
raise NotAvailableError(f"Ignored {chunk.key} out of SAFE matching pattern")
Expand Down
33 changes: 22 additions & 11 deletions eodag/plugins/download/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,11 @@ def _finalize(
extract = (
extract if extract is not None else getattr(self.config, "extract", True)
)
if not extract:
logger.info("Extraction not activated. The product is available as is.")
progress_callback(1, total=1)
return fs_path

delete_archive = kwargs.pop("delete_archive", None)
delete_archive = (
delete_archive
Expand All @@ -340,11 +345,6 @@ def _finalize(
)
outputs_extension = kwargs.pop("outputs_extension", ".zip")

if not extract:
logger.info("Extraction not activated. The product is available as is.")
progress_callback(1, total=1)
return fs_path

product_path = (
fs_path[: fs_path.index(outputs_extension)]
if outputs_extension in fs_path
Expand Down Expand Up @@ -373,7 +373,6 @@ def _finalize(
f"Extraction cancelled, destination directory already exists and is not empty: {product_path}"
)
progress_callback(1, total=1)
product_path = self._resolve_archive_depth(product_path)
return product_path
outputs_prefix = (
kwargs.pop("outputs_prefix", None) or self.config.outputs_prefix
Expand Down Expand Up @@ -402,14 +401,28 @@ def _finalize(
path=extraction_dir,
)
progress_callback(1)
shutil.move(extraction_dir, outputs_dir)
# in some cases, only a lone file is extracted without being in a directory
# then, we create a directory in which we place this file
product_extraction_path = self._resolve_archive_depth(extraction_dir)
if os.path.isfile(product_extraction_path) and not os.path.isdir(
outputs_dir
):
os.makedirs(outputs_dir)
shutil.move(product_extraction_path, outputs_dir)

elif fs_path.endswith(".tar.gz"):
elif fs_path.endswith(".tar") or fs_path.endswith(".tar.gz"):
with tarfile.open(fs_path, "r") as zfile:
progress_callback.reset(total=1)
zfile.extractall(path=extraction_dir)
progress_callback(1)
shutil.move(extraction_dir, outputs_dir)
# in some cases, only a lone file is extracted without being in a directory
# then, we create a directory in which we place this file
product_extraction_path = self._resolve_archive_depth(extraction_dir)
if os.path.isfile(product_extraction_path) and not os.path.isdir(
outputs_dir
):
os.makedirs(outputs_dir)
shutil.move(product_extraction_path, outputs_dir)
else:
progress_callback(1, total=1)

Expand All @@ -429,8 +442,6 @@ def _finalize(
if close_progress_callback:
progress_callback.close()

product_path = self._resolve_archive_depth(product_path)

return product_path

def download_all(
Expand Down
Loading

0 comments on commit fbf81ca

Please sign in to comment.