Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: generate record hash from product_type + id #1023

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions eodag/plugins/apis/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@ class Api(PluginTopic):
(e.g. 'file:///tmp/product_folder' on Linux or
'file:///C:/Users/username/AppData/LOcal/Temp' on Windows)
- save a *record* file in the directory ``outputs_prefix/.downloaded`` whose name
is built on the MD5 hash of the product's ``remote_location`` attribute
(``hashlib.md5(remote_location.encode("utf-8")).hexdigest()``) and whose content is
the product's ``remote_location`` attribute itself.
is built on the MD5 hash of the product's ``product_type`` and ``properties['id']``
attributes (``hashlib.md5((product.product_type+"-"+product.properties['id']).encode("utf-8")).hexdigest()``)
and whose content is the product's ``remote_location`` attribute itself.
- not try to download a product whose ``location`` attribute already points to an
existing file/directory
- not try to download a product if its *record* file exists as long as the expected
Expand Down
26 changes: 21 additions & 5 deletions eodag/plugins/download/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,9 @@ class Download(PluginTopic):
(e.g. 'file:///tmp/product_folder' on Linux or
'file:///C:/Users/username/AppData/LOcal/Temp' on Windows)
- save a *record* file in the directory ``outputs_prefix/.downloaded`` whose name
is built on the MD5 hash of the product's ``remote_location`` attribute
(``hashlib.md5(remote_location.encode("utf-8")).hexdigest()``) and whose content is
the product's ``remote_location`` attribute itself.
is built on the MD5 hash of the product's ``product_type`` and ``properties['id']``
attributes (``hashlib.md5((product.product_type+"-"+product.properties['id']).encode("utf-8")).hexdigest()``)
and whose content is the product's ``remote_location`` attribute itself.
- not try to download a product whose ``location`` attribute already points to an
existing file/directory
- not try to download a product if its *record* file exists as long as the expected
Expand Down Expand Up @@ -246,8 +246,9 @@ def _prepare_download(
logger.warning(
f"Unable to create records directory. Got:\n{tb.format_exc()}",
)
url_hash = hashlib.md5(url.encode("utf-8")).hexdigest()
record_filename = os.path.join(download_records_dir, url_hash)
record_filename = os.path.join(
download_records_dir, self.generate_record_hash(product)
)
if os.path.isfile(record_filename) and os.path.isfile(fs_path):
logger.info(
f"Product already downloaded: {fs_path}",
Expand Down Expand Up @@ -278,6 +279,21 @@ def _prepare_download(

return fs_path, record_filename

def generate_record_hash(self, product: EOProduct) -> str:
"""Generate the record hash of the given product.

The MD5 hash is built from the product's ``product_type`` and ``properties['id']`` attributes
(``hashlib.md5((product.product_type+"-"+product.properties['id']).encode("utf-8")).hexdigest()``)

:param product: The product to calculate the record hash
:type product: :class:`~eodag.api.product._product.EOProduct`
:returns: The MD5 hash
:rtype: str
"""
# In some unit tests, `product.product_type` is `None` and `product.properties["id"]` is `ìnt`
product_hash = str(product.product_type) + "-" + str(product.properties["id"])
return hashlib.md5(product_hash.encode("utf-8")).hexdigest()

def _resolve_archive_depth(self, product_path: str) -> str:
"""Update product_path using archive_depth from provider configuration.

Expand Down
6 changes: 3 additions & 3 deletions eodag/plugins/download/s3rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
# limitations under the License.
from __future__ import annotations

import hashlib
import logging
import os
import os.path
Expand Down Expand Up @@ -252,8 +251,9 @@ def download_request(
"Unable to create records directory. Got:\n%s", tb.format_exc()
)
# check if product has already been downloaded
url_hash = hashlib.md5(product.remote_location.encode("utf-8")).hexdigest()
record_filename = os.path.join(download_records_dir, url_hash)
record_filename = os.path.join(
download_records_dir, self.generate_record_hash(product)
)
if os.path.isfile(record_filename) and os.path.exists(product_local_path):
product.location = path_to_uri(product_local_path)
return product_local_path
Expand Down
5 changes: 3 additions & 2 deletions tests/test_end_to_end.py
Original file line number Diff line number Diff line change
Expand Up @@ -834,8 +834,9 @@ def test_end_to_end_complete_peps(self):
record_dir = os.path.join(self.tmp_download_path, ".downloaded")
self.assertTrue(os.path.isdir(record_dir))
# It must contain a file per product downloade, whose name is
# the MD5 hash of the product's remote location
expected_hash = hashlib.md5(product.remote_location.encode("utf-8")).hexdigest()
# the MD5 hash of the product's ``product_type`` and ``properties['id']``
expected_hash = product.product_type + "-" + product.properties["id"]
expected_hash = hashlib.md5(expected_hash.encode("utf-8")).hexdigest()
record_file = os.path.join(record_dir, expected_hash)
self.assertTrue(os.path.isfile(record_file))
# Its content must be the product's remote location
Expand Down
Loading