Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: standardize output tree #746

Merged
merged 21 commits into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
e7b099c
feat: standarize output tree
anesson-cs Jun 23, 2023
1931114
feat: handle non zip files with extension cases
anesson-cs Jul 10, 2023
fd81345
test: update and fix tests following rebase
sbrunato Aug 25, 2023
e701d26
refactor: lint fix following rebase
sbrunato Aug 25, 2023
896a74b
test: fixed cop_ads and ecmwf end-to-end tests
anesson-cs Oct 5, 2023
1c605b0
fix: remove useless 'outputs_in_folder' parameter
anesson-cs Oct 9, 2023
f35125f
test: add download HTTP plugin tests and update some EOProduct tests
anesson-cs Oct 16, 2023
84ff6c4
feat: update AWS download plugin docstring
anesson-cs Oct 16, 2023
335dcca
feat: update product name in tests resources notice
anesson-cs Oct 16, 2023
a4fa616
fix: replace outdated Creodias link in the tests resources notice
anesson-cs Oct 16, 2023
d92af18
docs: update download examples in doc notebooks
anesson-cs Oct 25, 2023
8f9f40b
feat: set 'archive_depth' parameter of Wekeo provider to 2
anesson-cs Oct 31, 2023
deee31e
fix: return the right directory when extraction is cancelled
anesson-cs Nov 15, 2023
21e1e14
fix: remove extension to 'title' parameter in Wekeo provider configur…
anesson-cs Nov 15, 2023
7b96169
fix: handle correctly lone file output tree after product extraction
anesson-cs Nov 15, 2023
6f76d55
feat: handle download and extraction for tarfiles with '.tar' extension
anesson-cs Nov 15, 2023
6cbe674
fix: use a variable instead of an attribute in a test
anesson-cs Feb 20, 2024
32505c0
fix: add a 'TO DO' test
anesson-cs Feb 20, 2024
0a87508
fix: change a comment
anesson-cs Feb 20, 2024
b6e5fd5
fix: handle syntax hints after the rebase
anesson-cs Feb 20, 2024
d0e6820
refactor: post http download file checks
sbrunato Feb 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
431 changes: 133 additions & 298 deletions docs/notebooks/api_user_guide/1_overview.ipynb

Large diffs are not rendered by default.

245 changes: 129 additions & 116 deletions docs/notebooks/api_user_guide/7_download.ipynb

Large diffs are not rendered by default.

213 changes: 134 additions & 79 deletions docs/notebooks/api_user_guide/8_post_process.ipynb

Large diffs are not rendered by default.

Large diffs are not rendered by default.

80 changes: 47 additions & 33 deletions docs/notebooks/tutos/tuto_cds.ipynb

Large diffs are not rendered by default.

54 changes: 27 additions & 27 deletions docs/notebooks/tutos/tuto_cop_dem.ipynb

Large diffs are not rendered by default.

96 changes: 65 additions & 31 deletions docs/notebooks/tutos/tuto_ecmwf.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions eodag/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ class OrderStatusOnSuccess(TypedDict):
base_uri: str
outputs_prefix: str
extract: bool
outputs_extension: str
order_enabled: bool # HTTPDownload
order_method: str # HTTPDownload
order_headers: Dict[str, str] # HTTPDownload
Expand Down
13 changes: 11 additions & 2 deletions eodag/plugins/apis/ecmwf.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from __future__ import annotations

import logging
import os
from datetime import datetime
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple

Expand All @@ -37,6 +38,7 @@
DEFAULT_PAGE,
get_geometry_from_various,
path_to_uri,
sanitize,
urlsplit,
)
from eodag.utils.exceptions import AuthenticationError, DownloadError
Expand Down Expand Up @@ -179,6 +181,13 @@ def download(
product.location = path_to_uri(fs_path)
return fs_path

new_fs_path = os.path.join(
os.path.dirname(fs_path), sanitize(product.properties["title"])
)
if not os.path.isdir(new_fs_path):
os.makedirs(new_fs_path)
fs_path = os.path.join(new_fs_path, os.path.basename(fs_path))

# get download request dict from product.location/downloadLink url query string
# separate url & parameters
download_request = geojson.loads(urlsplit(product.location).query)
Expand Down Expand Up @@ -222,11 +231,11 @@ def download(
fh.write(product.properties["downloadLink"])
logger.debug("Download recorded in %s", record_filename)

# do not try to extract or delete grib/netcdf
# do not try to extract a directory
kwargs["extract"] = False

product_path = self._finalize(
fs_path,
new_fs_path,
progress_callback=progress_callback,
outputs_extension=f".{product_extension}",
**kwargs,
Expand Down
96 changes: 31 additions & 65 deletions eodag/plugins/download/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def download(
SAFE-build is configured for a given provider and product type.
If the product title is configured to be updated during download and
SAFE-formatted, its destination path will be:
`{outputs_prefix}/{title}/{updated_title}.SAFE`
`{outputs_prefix}/{title}`

:param product: The EO product to download
:type product: :class:`~eodag.api.product._product.EOProduct`
Expand Down Expand Up @@ -1154,8 +1154,7 @@ def get_chunk_dest_path(
# S2 L2A Tile files -----------------------------------------------
if matched := S2L2A_TILE_IMG_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/GRANULE/%s/IMG_DATA/R%s/T%s%s%s_%s_%s_%s.jp2" % (
product.properties["title"],
product_path = "GRANULE/%s/IMG_DATA/R%s/T%s%s%s_%s_%s_%s.jp2" % (
found_dict["num"],
found_dict["res"],
found_dict["tile1"],
Expand All @@ -1167,41 +1166,36 @@ def get_chunk_dest_path(
)
elif matched := S2L2A_TILE_AUX_DIR_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/GRANULE/%s/AUX_DATA/%s" % (
product.properties["title"],
product_path = "GRANULE/%s/AUX_DATA/%s" % (
found_dict["num"],
found_dict["file"],
)
# S2 L2A QI Masks
elif matched := S2_TILE_QI_MSK_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/GRANULE/%s/QI_DATA/MSK_%sPRB_%s" % (
product.properties["title"],
product_path = "GRANULE/%s/QI_DATA/MSK_%sPRB_%s" % (
found_dict["num"],
found_dict["file_base"],
found_dict["file_suffix"],
)
# S2 L2A QI PVI
elif matched := S2_TILE_QI_PVI_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/GRANULE/%s/QI_DATA/%s_%s_PVI.jp2" % (
product.properties["title"],
product_path = "GRANULE/%s/QI_DATA/%s_%s_PVI.jp2" % (
found_dict["num"],
title_part3,
title_date1,
)
# S2 Tile files ---------------------------------------------------
elif matched := S2_TILE_PREVIEW_DIR_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/GRANULE/%s/preview/%s" % (
product.properties["title"],
product_path = "GRANULE/%s/preview/%s" % (
found_dict["num"],
found_dict["file"],
)
elif matched := S2_TILE_IMG_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/GRANULE/%s/IMG_DATA/T%s%s%s_%s_%s" % (
product.properties["title"],
product_path = "GRANULE/%s/IMG_DATA/T%s%s%s_%s_%s" % (
found_dict["num"],
found_dict["tile1"],
found_dict["tile2"],
Expand All @@ -1211,97 +1205,74 @@ def get_chunk_dest_path(
)
elif matched := S2_TILE_THUMBNAIL_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/GRANULE/%s/%s" % (
product.properties["title"],
product_path = "GRANULE/%s/%s" % (
found_dict["num"],
found_dict["file"],
)
elif matched := S2_TILE_MTD_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/GRANULE/%s/MTD_TL.xml" % (
product.properties["title"],
found_dict["num"],
)
product_path = "GRANULE/%s/MTD_TL.xml" % found_dict["num"]
elif matched := S2_TILE_AUX_DIR_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/GRANULE/%s/AUX_DATA/AUX_%s" % (
product.properties["title"],
product_path = "GRANULE/%s/AUX_DATA/AUX_%s" % (
found_dict["num"],
found_dict["file"],
)
elif matched := S2_TILE_QI_DIR_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/GRANULE/%s/QI_DATA/%s" % (
product.properties["title"],
product_path = "GRANULE/%s/QI_DATA/%s" % (
found_dict["num"],
found_dict["file"],
)
# S2 Tiles generic
elif matched := S2_TILE_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/GRANULE/%s/%s" % (
product.properties["title"],
product_path = "GRANULE/%s/%s" % (
found_dict["num"],
found_dict["file"],
)
# S2 Product files
elif matched := S2_PROD_DS_MTD_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/DATASTRIP/%s/MTD_DS.xml" % (
product.properties["title"],
ds_dir,
)
product_path = "DATASTRIP/%s/MTD_DS.xml" % ds_dir
elif matched := S2_PROD_DS_QI_REPORT_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/DATASTRIP/%s/QI_DATA/%s.xml" % (
product.properties["title"],
product_path = "DATASTRIP/%s/QI_DATA/%s.xml" % (
ds_dir,
found_dict["filename"],
)
elif matched := S2_PROD_DS_QI_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/DATASTRIP/%s/QI_DATA/%s" % (
product.properties["title"],
product_path = "DATASTRIP/%s/QI_DATA/%s" % (
ds_dir,
found_dict["file"],
)
elif matched := S2_PROD_INSPIRE_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/INSPIRE.xml" % (product.properties["title"],)
product_path = "INSPIRE.xml"
elif matched := S2_PROD_MTD_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/MTD_MSI%s.xml" % (
product.properties["title"],
s2_processing_level,
)
product_path = "MTD_MSI%s.xml" % s2_processing_level
# S2 Product generic
elif matched := S2_PROD_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/%s" % (
product.properties["title"],
found_dict["file"],
)
product_path = "%s" % found_dict["file"]
# S1 --------------------------------------------------------------
elif matched := S1_CALIB_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = (
"%s.SAFE/annotation/calibration/%s-%s-%s-grd-%s-%s-%03d.xml"
% (
product.properties["title"],
found_dict["file_prefix"],
product.properties["platformSerialIdentifier"].lower(),
found_dict["file_beam"],
found_dict["file_pol"],
s1_title_suffix,
S1_IMG_NB_PER_POLAR.get(
product.properties["polarizationMode"], {}
).get(found_dict["file_pol"].upper(), 1),
)
product_path = "annotation/calibration/%s-%s-%s-grd-%s-%s-%03d.xml" % (
found_dict["file_prefix"],
product.properties["platformSerialIdentifier"].lower(),
found_dict["file_beam"],
found_dict["file_pol"],
s1_title_suffix,
S1_IMG_NB_PER_POLAR.get(product.properties["polarizationMode"], {}).get(
found_dict["file_pol"].upper(), 1
),
)
elif matched := S1_ANNOT_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/annotation/%s-%s-grd-%s-%s-%03d.xml" % (
product.properties["title"],
product_path = "annotation/%s-%s-grd-%s-%s-%03d.xml" % (
product.properties["platformSerialIdentifier"].lower(),
found_dict["file_beam"],
found_dict["file_pol"],
Expand All @@ -1312,8 +1283,7 @@ def get_chunk_dest_path(
)
elif matched := S1_MEAS_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/measurement/%s-%s-grd-%s-%s-%03d.%s" % (
product.properties["title"],
product_path = "measurement/%s-%s-grd-%s-%s-%03d.%s" % (
product.properties["platformSerialIdentifier"].lower(),
found_dict["file_beam"],
found_dict["file_pol"],
Expand All @@ -1325,18 +1295,14 @@ def get_chunk_dest_path(
)
elif matched := S1_REPORT_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/%s.SAFE-%s" % (
product.properties["title"],
product_path = "%s.SAFE-%s" % (
product.properties["title"],
found_dict["file"],
)
# S1 generic
elif matched := S1_REGEX.match(chunk.key):
found_dict = matched.groupdict()
product_path = "%s.SAFE/%s" % (
product.properties["title"],
found_dict["file"],
)
product_path = "%s" % found_dict["file"]
# out of SAFE format
else:
raise NotAvailableError(f"Ignored {chunk.key} out of SAFE matching pattern")
Expand Down
33 changes: 22 additions & 11 deletions eodag/plugins/download/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,11 @@ def _finalize(
extract = (
extract if extract is not None else getattr(self.config, "extract", True)
)
if not extract:
logger.info("Extraction not activated. The product is available as is.")
progress_callback(1, total=1)
return fs_path

delete_archive = kwargs.pop("delete_archive", None)
delete_archive = (
delete_archive
Expand All @@ -340,11 +345,6 @@ def _finalize(
)
outputs_extension = kwargs.pop("outputs_extension", ".zip")

if not extract:
logger.info("Extraction not activated. The product is available as is.")
progress_callback(1, total=1)
return fs_path

product_path = (
fs_path[: fs_path.index(outputs_extension)]
if outputs_extension in fs_path
Expand Down Expand Up @@ -373,7 +373,6 @@ def _finalize(
f"Extraction cancelled, destination directory already exists and is not empty: {product_path}"
)
progress_callback(1, total=1)
product_path = self._resolve_archive_depth(product_path)
return product_path
outputs_prefix = (
kwargs.pop("outputs_prefix", None) or self.config.outputs_prefix
Expand Down Expand Up @@ -402,14 +401,28 @@ def _finalize(
path=extraction_dir,
)
progress_callback(1)
shutil.move(extraction_dir, outputs_dir)
# in some cases, only a lone file is extracted without being in a directory
# then, we create a directory in which we place this file
product_extraction_path = self._resolve_archive_depth(extraction_dir)
if os.path.isfile(product_extraction_path) and not os.path.isdir(
outputs_dir
):
os.makedirs(outputs_dir)
shutil.move(product_extraction_path, outputs_dir)

elif fs_path.endswith(".tar.gz"):
elif fs_path.endswith(".tar") or fs_path.endswith(".tar.gz"):
with tarfile.open(fs_path, "r") as zfile:
progress_callback.reset(total=1)
zfile.extractall(path=extraction_dir)
progress_callback(1)
shutil.move(extraction_dir, outputs_dir)
# in some cases, only a lone file is extracted without being in a directory
# then, we create a directory in which we place this file
product_extraction_path = self._resolve_archive_depth(extraction_dir)
if os.path.isfile(product_extraction_path) and not os.path.isdir(
outputs_dir
):
os.makedirs(outputs_dir)
shutil.move(product_extraction_path, outputs_dir)
else:
progress_callback(1, total=1)

Expand All @@ -429,8 +442,6 @@ def _finalize(
if close_progress_callback:
progress_callback.close()

product_path = self._resolve_archive_depth(product_path)

return product_path

def download_all(
Expand Down
Loading
Loading