Skip to content

Commit

Permalink
add checksum timestamp changes (#371)
Browse files Browse the repository at this point in the history
* add checksum timestamp changes

* style: black reformat

* style: DRY

* add unit test

---------

Co-authored-by: Donny Winston <[email protected]>
  • Loading branch information
brynnz22 and dwinston authored Nov 13, 2023
1 parent 903f5c3 commit fd3aee9
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 4 deletions.
5 changes: 4 additions & 1 deletion nmdc_runtime/api/core/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,13 @@ def hash_from_str(s: str, algo="sha256") -> str:
return getattr(hashlib, algo)(s.encode("utf-8")).hexdigest()


def sha256hash_from_file(file_path: str):
def sha256hash_from_file(file_path: str, timestamp: str):
# https://stackoverflow.com/a/55542529
h = hashlib.sha256()

timestamp_bytes = timestamp.encode("utf-8")
h.update(timestamp_bytes)

with open(file_path, "rb") as file:
while True:
# Reading is buffered, so we can read smaller chunks.
Expand Down
6 changes: 5 additions & 1 deletion nmdc_runtime/api/endpoints/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,17 +431,21 @@ def persist_content_and_get_drs_object(
filepath = str(Path(save_dir).joinpath(filename))
with open(filepath, "w") as f:
f.write(content)
now_to_the_minute = datetime.now(tz=ZoneInfo("America/Los_Angeles")).isoformat(
timespec="minutes"
)
object_in = DrsObjectIn(
**drs_metadata_for(
filepath,
base={
"description": (
description
+ f" (created by/for {username}"
+ f" at {datetime.now(tz=ZoneInfo('America/Los_Angeles')).isoformat(timespec='minutes')})"
+ f" at {now_to_the_minute})"
),
"access_methods": [{"access_id": drs_id}],
},
timestamp=now_to_the_minute,
)
)
self_uri = f"drs://{HOSTNAME_EXTERNAL}/{drs_id}"
Expand Down
4 changes: 2 additions & 2 deletions nmdc_runtime/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def put_object(filepath, url, mime_type=None):
return requests.put(url, data=f, headers={"Content-Type": mime_type})


def drs_metadata_for(filepath, base=None):
def drs_metadata_for(filepath, base=None, timestamp=None):
"""given file path, get drs metadata
required: size, created_time, and at least one checksum.
Expand All @@ -96,7 +96,7 @@ def drs_metadata_for(filepath, base=None):
)
if "checksums" not in base:
base["checksums"] = [
{"type": "sha256", "checksum": sha256hash_from_file(filepath)}
{"type": "sha256", "checksum": sha256hash_from_file(filepath, timestamp)}
]
if "mime_type" not in base:
base["mime_type"] = mimetypes.guess_type(filepath)[0]
Expand Down
21 changes: 21 additions & 0 deletions tests/unit/core_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from datetime import datetime, timedelta
from pathlib import Path
from zoneinfo import ZoneInfo

from nmdc_runtime.api.core.util import sha256hash_from_file

TEST_FILES_DIR = Path(__file__).parent.parent.joinpath("files")


def test_sha256hash_from_file_is_timestamp_dependent():
file_path = str(TEST_FILES_DIR.joinpath("test_changesheet_update_one_ph.tsv"))
ts_1 = datetime.now(tz=ZoneInfo("America/Los_Angeles"))
ts_2 = ts_1 + timedelta(minutes=1)
hashes = []
for ts in (ts_1, ts_2):
hashes.append(
sha256hash_from_file(
file_path=file_path, timestamp=ts.isoformat(timespec="minutes")
)
)
assert hashes[0] != hashes[1]

0 comments on commit fd3aee9

Please sign in to comment.