Skip to content

Commit

Permalink
refactor: Reuse dictionary to JSON bytes code TDE-1138 (#958)
Browse files Browse the repository at this point in the history
* refactor: Reuse dictionary to JSON bytes code

* feat: Always dump JSON to UTF-8 string

Otherwise information will get lost when decoding.

* test: Verify that inverting JSON conversion works
  • Loading branch information
l0b0 authored May 3, 2024
1 parent afea8f0 commit 0b6158b
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 6 deletions.
17 changes: 17 additions & 0 deletions scripts/json_codec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import json
from typing import Any, Dict


def dict_to_json_bytes(input_dict: Dict[str, Any]) -> bytes:
"""
Try to convert a `dict` into UTF-8 encoded `bytes` representing a JSON dictionary
Examples:
>>> dict_to_json_bytes({})
b'{}'
>>> dict_to_json_bytes({"ā": "😀"}) # Unicode code points U+0101 and U+1F600
b'{"\xc4\x81": "\xf0\x9f\x98\x80"}'
>>> json.loads(dict_to_json_bytes({"ā": "😀"}))
{'ā': '😀'}
"""
return json.dumps(input_dict, ensure_ascii=False).encode("utf-8")
8 changes: 4 additions & 4 deletions scripts/stac/imagery/collection.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import json
import os
from typing import Any, Dict, List, Optional

Expand All @@ -8,6 +7,7 @@
from scripts.datetimes import format_rfc_3339_datetime_string, parse_rfc_3339_datetime
from scripts.files.files_helper import ContentType
from scripts.files.fs import write
from scripts.json_codec import dict_to_json_bytes
from scripts.stac.imagery.capture_area import generate_capture_area, gsd_to_float
from scripts.stac.imagery.metadata_constants import (
DATA_CATEGORIES,
Expand Down Expand Up @@ -95,7 +95,7 @@ def add_capture_area(self, polygons: List[shapely.geometry.shape], target: str,

# The GSD is measured in meters (e.g., `0.3m`)
capture_area_document = generate_capture_area(polygons, gsd_to_float(self.metadata["gsd"]))
capture_area_content: bytes = json.dumps(capture_area_document).encode("utf-8")
capture_area_content: bytes = dict_to_json_bytes(capture_area_document)
file_checksum = checksum.multihash_as_hex(capture_area_content)
capture_area = {
"href": f"./{CAPTURE_AREA_FILE_NAME}",
Expand Down Expand Up @@ -129,7 +129,7 @@ def add_item(self, item: Dict[Any, Any]) -> None:
item: STAC Item to add
"""
item_self_link = next((feat for feat in item["links"] if feat["rel"] == "self"), None)
file_checksum = checksum.multihash_as_hex(json.dumps(item).encode("utf-8"))
file_checksum = checksum.multihash_as_hex(dict_to_json_bytes(item))
if item_self_link:
self.add_link(href=item_self_link["href"], file_checksum=file_checksum)
self.update_temporal_extent(item["properties"]["start_datetime"], item["properties"]["end_datetime"])
Expand Down Expand Up @@ -235,7 +235,7 @@ def write_to(self, destination: str) -> None:
Args:
destination: path of the destination
"""
write(destination, json.dumps(self.stac, ensure_ascii=False).encode("utf-8"), content_type=ContentType.JSON.value)
write(destination, dict_to_json_bytes(self.stac), content_type=ContentType.JSON.value)

def _title(self) -> str:
"""Generates the title for imagery and elevation datasets.
Expand Down
4 changes: 2 additions & 2 deletions scripts/standardise_validate.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import argparse
import json
import os
import sys
from typing import List
Expand All @@ -11,6 +10,7 @@
from scripts.files.files_helper import SUFFIX_JSON, ContentType
from scripts.files.fs import exists, write
from scripts.gdal.gdal_helper import get_srs, get_vfs_path
from scripts.json_codec import dict_to_json_bytes
from scripts.stac.imagery.create_stac import create_item
from scripts.standardising import run_standardising

Expand Down Expand Up @@ -102,7 +102,7 @@ def main() -> None:
item = create_item(
file.get_path_standardised(), start_datetime, end_datetime, arguments.collection_id, file.get_gdalinfo()
)
write(stac_item_path, json.dumps(item.stac).encode("utf-8"), content_type=ContentType.GEOJSON.value)
write(stac_item_path, dict_to_json_bytes(item.stac), content_type=ContentType.GEOJSON.value)
get_log().info("stac_saved", path=stac_item_path)


Expand Down

0 comments on commit 0b6158b

Please sign in to comment.