diff --git a/scripts/files/file_tiff.py b/scripts/files/file_tiff.py index 18a7b7321..4cad388bb 100644 --- a/scripts/files/file_tiff.py +++ b/scripts/files/file_tiff.py @@ -1,6 +1,7 @@ import json from enum import Enum from typing import Any, Dict, List, Optional +from urllib.parse import unquote from scripts.gdal.gdal_helper import GDALExecutionException, gdal_info, run_gdal from scripts.gdal.gdalinfo import GdalInfo @@ -24,10 +25,17 @@ class FileTiff: def __init__( self, - path: List[str], + paths: List[str], preset: Optional[str] = None, ) -> None: - self._path_original = path + paths_original = [] + for p in paths: + # paths can be URL containing percent-encoded (like `%20` for space) sequences + # which would make the process fail later TDE-1054 + # FIXME: we should use URLs in the code base + paths_original.append(unquote(p)) + + self._paths_original = paths_original self._path_standardised = "" self._errors: List[Dict[str, Any]] = [] self._gdalinfo: Optional[GdalInfo] = None @@ -138,14 +146,14 @@ def get_errors(self) -> List[Dict[str, Any]]: """ return self._errors - def get_path_original(self) -> List[str]: + def get_paths_original(self) -> List[str]: """Get the path(es) of the original (non standardised) file. It can be a list of path if the standardised file is a retiled image. Returns: a list of file path """ - return self._path_original + return self._paths_original def get_path_standardised(self) -> str: """Get the path of the standardised file. diff --git a/scripts/standardise_validate.py b/scripts/standardise_validate.py index 7be9daab5..3cce16954 100644 --- a/scripts/standardise_validate.py +++ b/scripts/standardise_validate.py @@ -77,7 +77,7 @@ def main() -> None: # If the file is not valid (Non Visual QA errors) # Logs the `vsis3` path to use `gdal` on the file directly from `s3` # This is to help data analysts to verify the file. - original_path: List[str] = file.get_path_original() + original_path: List[str] = file.get_paths_original() standardised_path = file.get_path_standardised() env_argo_template = os.environ.get("ARGO_TEMPLATE") if env_argo_template: diff --git a/scripts/standardising.py b/scripts/standardising.py index f67b7fa8a..ea0615f23 100644 --- a/scripts/standardising.py +++ b/scripts/standardising.py @@ -139,9 +139,9 @@ def standardising( footprint_tmp_path = os.path.join(tmp_path, footprint_file_name) sidecars: List[str] = [] for extension in [".prj", ".tfw"]: - for file_input in files.inputs: + for file_input in tiff.get_paths_original(): sidecars.append(f"{os.path.splitext(file_input)[0]}{extension}") - source_files = write_all(files.inputs, f"{tmp_path}/source/") + source_files = write_all(tiff.get_paths_original(), f"{tmp_path}/source/") write_sidecars(sidecars, f"{tmp_path}/source/") source_tiffs = [file for file in source_files if is_tiff(file)]