Skip to content

Commit

Permalink
feat: standarize output tree
Browse files Browse the repository at this point in the history
  • Loading branch information
anesson-cs committed Jun 23, 2023
1 parent 50d6e60 commit 83e7a97
Show file tree
Hide file tree
Showing 10 changed files with 177 additions and 113 deletions.
22 changes: 20 additions & 2 deletions eodag/plugins/apis/cds.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import os
import shutil
from datetime import datetime

import cdsapi
Expand All @@ -31,7 +33,13 @@
from eodag.plugins.search.base import Search
from eodag.plugins.search.build_search_result import BuildPostSearchResult
from eodag.rest.stac import DEFAULT_MISSION_START_DATE
from eodag.utils import datetime_range, get_geometry_from_various, path_to_uri, urlsplit
from eodag.utils import (
datetime_range,
get_geometry_from_various,
path_to_uri,
sanitize,
urlsplit,
)
from eodag.utils.exceptions import AuthenticationError, DownloadError, RequestError
from eodag.utils.logging import get_logging_verbose

Expand Down Expand Up @@ -215,7 +223,17 @@ def download(self, product, auth=None, progress_callback=None, **kwargs):
fh.write(product.properties["downloadLink"])
logger.debug("Download recorded in %s", record_filename)

# do not try to extract or delete grib/netcdf
# Check the output configuration
if getattr(self.config, "outputs_in_folder", False):
new_fs_path = os.path.join(
os.path.dirname(fs_path), sanitize(product.properties["title"])
)
if not os.path.isdir(new_fs_path):
os.makedirs(new_fs_path)
shutil.move(fs_path, new_fs_path)
fs_path = new_fs_path

# do not try to extract or delete grib/netcdf or a directory
kwargs["extract"] = False

product_path = self._finalize(
Expand Down
16 changes: 14 additions & 2 deletions eodag/plugins/apis/ecmwf.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import os
import shutil
from datetime import datetime

import geojson
Expand All @@ -31,7 +33,7 @@
from eodag.plugins.search.base import Search
from eodag.plugins.search.build_search_result import BuildPostSearchResult
from eodag.rest.stac import DEFAULT_MISSION_START_DATE
from eodag.utils import get_geometry_from_various, path_to_uri, urlsplit
from eodag.utils import get_geometry_from_various, path_to_uri, sanitize, urlsplit
from eodag.utils.exceptions import AuthenticationError, DownloadError
from eodag.utils.logging import get_logging_verbose

Expand Down Expand Up @@ -203,7 +205,17 @@ def download(self, product, auth=None, progress_callback=None, **kwargs):
fh.write(product.properties["downloadLink"])
logger.debug("Download recorded in %s", record_filename)

# do not try to extract or delete grib/netcdf
# Check the output configuration
if getattr(self.config, "outputs_in_folder", False):
new_fs_path = os.path.join(
os.path.dirname(fs_path), sanitize(product.properties["title"])
)
if not os.path.isdir(new_fs_path):
os.makedirs(new_fs_path)
shutil.move(fs_path, new_fs_path)
fs_path = new_fs_path

# do not try to extract or delete grib/netcdf or a directory
kwargs["extract"] = False

product_path = self._finalize(
Expand Down
113 changes: 38 additions & 75 deletions eodag/plugins/download/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -733,57 +733,48 @@ def get_chunk_dest_path(self, product, chunk, dir_prefix=None, build_safe=False)
# S2 L2A Tile files -----------------------------------------------
if S2L2A_TILE_IMG_REGEX.match(chunk.key):
found_dict = S2L2A_TILE_IMG_REGEX.match(chunk.key).groupdict()
product_path = (
"%s.SAFE/GRANULE/%s/IMG_DATA/R%s/T%s%s%s_%s_%s_%s.jp2"
% (
product.properties["title"],
found_dict["num"],
found_dict["res"],
found_dict["tile1"],
found_dict["tile2"],
found_dict["tile3"],
title_date1,
found_dict["file"],
found_dict["res"],
)
product_path = "GRANULE/%s/IMG_DATA/R%s/T%s%s%s_%s_%s_%s.jp2" % (
found_dict["num"],
found_dict["res"],
found_dict["tile1"],
found_dict["tile2"],
found_dict["tile3"],
title_date1,
found_dict["file"],
found_dict["res"],
)
elif S2L2A_TILE_AUX_DIR_REGEX.match(chunk.key):
found_dict = S2L2A_TILE_AUX_DIR_REGEX.match(chunk.key).groupdict()
product_path = "%s.SAFE/GRANULE/%s/AUX_DATA/%s" % (
product.properties["title"],
product_path = "GRANULE/%s/AUX_DATA/%s" % (
found_dict["num"],
found_dict["file"],
)
# S2 L2A QI Masks
elif S2_TILE_QI_MSK_REGEX.match(chunk.key):
found_dict = S2_TILE_QI_MSK_REGEX.match(chunk.key).groupdict()
product_path = "%s.SAFE/GRANULE/%s/QI_DATA/MSK_%sPRB_%s" % (
product.properties["title"],
product_path = "GRANULE/%s/QI_DATA/MSK_%sPRB_%s" % (
found_dict["num"],
found_dict["file_base"],
found_dict["file_suffix"],
)
# S2 L2A QI PVI
elif S2_TILE_QI_PVI_REGEX.match(chunk.key):
found_dict = S2_TILE_QI_PVI_REGEX.match(chunk.key).groupdict()
product_path = "%s.SAFE/GRANULE/%s/QI_DATA/%s_%s_PVI.jp2" % (
product.properties["title"],
product_path = "GRANULE/%s/QI_DATA/%s_%s_PVI.jp2" % (
found_dict["num"],
title_part3,
title_date1,
)
# S2 Tile files ---------------------------------------------------
elif S2_TILE_PREVIEW_DIR_REGEX.match(chunk.key):
found_dict = S2_TILE_PREVIEW_DIR_REGEX.match(chunk.key).groupdict()
product_path = "%s.SAFE/GRANULE/%s/preview/%s" % (
product.properties["title"],
product_path = "GRANULE/%s/preview/%s" % (
found_dict["num"],
found_dict["file"],
)
elif S2_TILE_IMG_REGEX.match(chunk.key):
found_dict = S2_TILE_IMG_REGEX.match(chunk.key).groupdict()
product_path = "%s.SAFE/GRANULE/%s/IMG_DATA/T%s%s%s_%s_%s" % (
product.properties["title"],
product_path = "GRANULE/%s/IMG_DATA/T%s%s%s_%s_%s" % (
found_dict["num"],
found_dict["tile1"],
found_dict["tile2"],
Expand All @@ -793,97 +784,74 @@ def get_chunk_dest_path(self, product, chunk, dir_prefix=None, build_safe=False)
)
elif S2_TILE_THUMBNAIL_REGEX.match(chunk.key):
found_dict = S2_TILE_THUMBNAIL_REGEX.match(chunk.key).groupdict()
product_path = "%s.SAFE/GRANULE/%s/%s" % (
product.properties["title"],
product_path = "GRANULE/%s/%s" % (
found_dict["num"],
found_dict["file"],
)
elif S2_TILE_MTD_REGEX.match(chunk.key):
found_dict = S2_TILE_MTD_REGEX.match(chunk.key).groupdict()
product_path = "%s.SAFE/GRANULE/%s/MTD_TL.xml" % (
product.properties["title"],
found_dict["num"],
)
product_path = "GRANULE/%s/MTD_TL.xml" % (found_dict["num"],)
elif S2_TILE_AUX_DIR_REGEX.match(chunk.key):
found_dict = S2_TILE_AUX_DIR_REGEX.match(chunk.key).groupdict()
product_path = "%s.SAFE/GRANULE/%s/AUX_DATA/AUX_%s" % (
product.properties["title"],
product_path = "GRANULE/%s/AUX_DATA/AUX_%s" % (
found_dict["num"],
found_dict["file"],
)
elif S2_TILE_QI_DIR_REGEX.match(chunk.key):
found_dict = S2_TILE_QI_DIR_REGEX.match(chunk.key).groupdict()
product_path = "%s.SAFE/GRANULE/%s/QI_DATA/%s" % (
product.properties["title"],
product_path = "GRANULE/%s/QI_DATA/%s" % (
found_dict["num"],
found_dict["file"],
)
# S2 Tiles generic
elif S2_TILE_REGEX.match(chunk.key):
found_dict = S2_TILE_REGEX.match(chunk.key).groupdict()
product_path = "%s.SAFE/GRANULE/%s/%s" % (
product.properties["title"],
product_path = "GRANULE/%s/%s" % (
found_dict["num"],
found_dict["file"],
)
# S2 Product files
elif S2_PROD_DS_MTD_REGEX.match(chunk.key):
found_dict = S2_PROD_DS_MTD_REGEX.match(chunk.key).groupdict()
product_path = "%s.SAFE/DATASTRIP/%s/MTD_DS.xml" % (
product.properties["title"],
ds_dir,
)
product_path = "DATASTRIP/%s/MTD_DS.xml" % (ds_dir,)
elif S2_PROD_DS_QI_REPORT_REGEX.match(chunk.key):
found_dict = S2_PROD_DS_QI_REPORT_REGEX.match(chunk.key).groupdict()
product_path = "%s.SAFE/DATASTRIP/%s/QI_DATA/%s.xml" % (
product.properties["title"],
product_path = "DATASTRIP/%s/QI_DATA/%s.xml" % (
ds_dir,
found_dict["filename"],
)
elif S2_PROD_DS_QI_REGEX.match(chunk.key):
found_dict = S2_PROD_DS_QI_REGEX.match(chunk.key).groupdict()
product_path = "%s.SAFE/DATASTRIP/%s/QI_DATA/%s" % (
product.properties["title"],
product_path = "DATASTRIP/%s/QI_DATA/%s" % (
ds_dir,
found_dict["file"],
)
elif S2_PROD_INSPIRE_REGEX.match(chunk.key):
found_dict = S2_PROD_INSPIRE_REGEX.match(chunk.key).groupdict()
product_path = "%s.SAFE/INSPIRE.xml" % (product.properties["title"],)
product_path = "INSPIRE.xml"
elif S2_PROD_MTD_REGEX.match(chunk.key):
found_dict = S2_PROD_MTD_REGEX.match(chunk.key).groupdict()
product_path = "%s.SAFE/MTD_MSI%s.xml" % (
product.properties["title"],
s2_processing_level,
)
product_path = "MTD_MSI%s.xml" % (s2_processing_level,)
# S2 Product generic
elif S2_PROD_REGEX.match(chunk.key):
found_dict = S2_PROD_REGEX.match(chunk.key).groupdict()
product_path = "%s.SAFE/%s" % (
product.properties["title"],
found_dict["file"],
)
product_path = "%s" % (found_dict["file"],)
# S1 --------------------------------------------------------------
elif S1_CALIB_REGEX.match(chunk.key):
found_dict = S1_CALIB_REGEX.match(chunk.key).groupdict()
product_path = (
"%s.SAFE/annotation/calibration/%s-%s-%s-grd-%s-%s-%03d.xml"
% (
product.properties["title"],
found_dict["file_prefix"],
product.properties["platformSerialIdentifier"].lower(),
found_dict["file_beam"],
found_dict["file_pol"],
s1_title_suffix,
S1_IMG_NB_PER_POLAR.get(
product.properties["polarizationMode"], {}
).get(found_dict["file_pol"].upper(), 1),
)
product_path = "annotation/calibration/%s-%s-%s-grd-%s-%s-%03d.xml" % (
found_dict["file_prefix"],
product.properties["platformSerialIdentifier"].lower(),
found_dict["file_beam"],
found_dict["file_pol"],
s1_title_suffix,
S1_IMG_NB_PER_POLAR.get(
product.properties["polarizationMode"], {}
).get(found_dict["file_pol"].upper(), 1),
)
elif S1_ANNOT_REGEX.match(chunk.key):
found_dict = S1_ANNOT_REGEX.match(chunk.key).groupdict()
product_path = "%s.SAFE/annotation/%s-%s-grd-%s-%s-%03d.xml" % (
product.properties["title"],
product_path = "annotation/%s-%s-grd-%s-%s-%03d.xml" % (
product.properties["platformSerialIdentifier"].lower(),
found_dict["file_beam"],
found_dict["file_pol"],
Expand All @@ -894,8 +862,7 @@ def get_chunk_dest_path(self, product, chunk, dir_prefix=None, build_safe=False)
)
elif S1_MEAS_REGEX.match(chunk.key):
found_dict = S1_MEAS_REGEX.match(chunk.key).groupdict()
product_path = "%s.SAFE/measurement/%s-%s-grd-%s-%s-%03d.%s" % (
product.properties["title"],
product_path = "measurement/%s-%s-grd-%s-%s-%03d.%s" % (
product.properties["platformSerialIdentifier"].lower(),
found_dict["file_beam"],
found_dict["file_pol"],
Expand All @@ -907,18 +874,14 @@ def get_chunk_dest_path(self, product, chunk, dir_prefix=None, build_safe=False)
)
elif S1_REPORT_REGEX.match(chunk.key):
found_dict = S1_REPORT_REGEX.match(chunk.key).groupdict()
product_path = "%s.SAFE/%s.SAFE-%s" % (
product.properties["title"],
product_path = "%s.SAFE-%s" % (
product.properties["title"],
found_dict["file"],
)
# S1 generic
elif S1_REGEX.match(chunk.key):
found_dict = S1_REGEX.match(chunk.key).groupdict()
product_path = "%s.SAFE/%s" % (
product.properties["title"],
found_dict["file"],
)
product_path = "%s" % (found_dict["file"],)
# out of SAFE format
else:
raise NotAvailableError(
Expand Down
24 changes: 15 additions & 9 deletions eodag/plugins/download/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,11 @@ def _finalize(self, fs_path, progress_callback=None, **kwargs):
extract = (
extract if extract is not None else getattr(self.config, "extract", True)
)
if not extract:
logger.info("Extraction not activated. The product is available as is.")
progress_callback(1, total=1)
return fs_path

delete_archive = kwargs.pop("delete_archive", None)
delete_archive = (
delete_archive
Expand All @@ -273,11 +278,6 @@ def _finalize(self, fs_path, progress_callback=None, **kwargs):
)
outputs_extension = kwargs.pop("outputs_extension", ".zip")

if not extract:
logger.info("Extraction not activated. The product is available as is.")
progress_callback(1, total=1)
return fs_path

product_path = (
fs_path[: fs_path.index(outputs_extension)]
if outputs_extension in fs_path
Expand Down Expand Up @@ -335,19 +335,27 @@ def _finalize(self, fs_path, progress_callback=None, **kwargs):
path=extraction_dir,
)
progress_callback(1)
shutil.move(extraction_dir, outputs_dir)
shutil.move(self._resolve_archive_depth(extraction_dir), outputs_dir)

elif fs_path.endswith(".tar.gz"):
with tarfile.open(fs_path, "r:gz") as zfile:
progress_callback.reset(total=1)
zfile.extractall(path=extraction_dir)
progress_callback(1)
shutil.move(extraction_dir, outputs_dir)
shutil.move(self._resolve_archive_depth(extraction_dir), outputs_dir)
else:
progress_callback(1, total=1)

tmp_dir.cleanup()

# in some cases, only a file is extracted without being in a directory
# we create a directory in which we place this file
if os.path.isfile(outputs_dir):
product_path = os.path.splitext(product_path)[0]
if not os.path.isdir(product_path):
os.makedirs(product_path)
shutil.move(outputs_dir, product_path)

if delete_archive:
logger.info(f"Deleting archive {os.path.basename(fs_path)}")
os.unlink(fs_path)
Expand All @@ -362,8 +370,6 @@ def _finalize(self, fs_path, progress_callback=None, **kwargs):
if close_progress_callback:
progress_callback.close()

product_path = self._resolve_archive_depth(product_path)

return product_path

def download_all(
Expand Down
16 changes: 16 additions & 0 deletions eodag/plugins/download/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,22 @@ def download_request(
shutil.move(fs_path, new_fs_path)
product.location = path_to_uri(new_fs_path)
return new_fs_path

# Check that the downloaded file is not a lone file and must not be placed in a directory
if os.path.isfile(fs_path) and getattr(
self.config, "outputs_in_folder", False
):
new_fs_path = os.path.join(
os.path.dirname(fs_path),
sanitize(product.properties["title"]),
)
if not os.path.isdir(new_fs_path):
os.makedirs(new_fs_path)
shutil.move(fs_path, new_fs_path)
fs_path = new_fs_path

# do not try to extract or delete a directory
kwargs["extract"] = False
product_path = self._finalize(
fs_path, progress_callback=progress_callback, **kwargs
)
Expand Down
Loading

0 comments on commit 83e7a97

Please sign in to comment.