From 5a1190a297d0d5adf922d4619d4b8cf00d93b1d3 Mon Sep 17 00:00:00 2001 From: "Edgar R. M" Date: Mon, 10 Jul 2023 11:21:49 -0600 Subject: [PATCH 1/2] fix: Add explicit dependency on `packaging` library (#1827) --- poetry.lock | 2 +- pyproject.toml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index f018406f7..acdbe7b8a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2708,4 +2708,4 @@ testing = ["pytest", "pytest-durations"] [metadata] lock-version = "2.0" python-versions = "<3.12,>=3.7.1" -content-hash = "53c3e577e500c322fffa5a7f3e5fcebe34a2657894d35a9d5768b951320448d0" +content-hash = "1cfb42db582744ae6f7afa862f0dc7554c8827499ff5806525e869879cc75db5" diff --git a/pyproject.toml b/pyproject.toml index 705c832a0..f153405a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,6 +57,7 @@ python-dotenv = ">=0.20,<0.22" typing-extensions = "^4.2.0" simplejson = "^3.17.6" jsonschema = "^4.16.0" +packaging = ">=23.1" pytz = ">=2022.2.1,<2024.0.0" PyYAML = "^6.0" # urllib3 2.0 is not compatible with botocore From 8fb30f37ac9fa107237d3c197c5c9b4e8b931132 Mon Sep 17 00:00:00 2001 From: "Edgar R. M" Date: Mon, 10 Jul 2023 11:45:25 -0600 Subject: [PATCH 2/2] fix: Serialization of `decimal.Decimal` (#1826) Co-authored-by: Pat Nadolny --- singer_sdk/batch.py | 3 ++- tests/core/test_batch.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/singer_sdk/batch.py b/singer_sdk/batch.py index f6bc966ab..0cbf11917 100644 --- a/singer_sdk/batch.py +++ b/singer_sdk/batch.py @@ -104,7 +104,8 @@ def get_batches( mode="wb", ) as gz: gz.writelines( - (json.dumps(record) + "\n").encode() for record in chunk + (json.dumps(record, default=str) + "\n").encode() + for record in chunk ) file_url = fs.geturl(filename) yield [file_url] diff --git a/tests/core/test_batch.py b/tests/core/test_batch.py index f818cb2e9..6efb3b34a 100644 --- a/tests/core/test_batch.py +++ b/tests/core/test_batch.py @@ -1,11 +1,15 @@ from __future__ import annotations +import decimal +import re from dataclasses import asdict import pytest +from singer_sdk.batch import JSONLinesBatcher from singer_sdk.helpers._batch import ( BaseBatchFileEncoding, + BatchConfig, JSONLinesEncoding, StorageTarget, ) @@ -95,3 +99,29 @@ def test_storage_from_url(file_url: str, root: str): def test_storage_split_url(file_url: str, expected: tuple): """Test storage target split URL.""" assert StorageTarget.split_url(file_url) == expected + + +def test_json_lines_batcher(): + batcher = JSONLinesBatcher( + "tap-test", + "stream-test", + batch_config=BatchConfig( + encoding=JSONLinesEncoding("gzip"), + storage=StorageTarget("file:///tmp/sdk-batches"), + batch_size=2, + ), + ) + records = [ + {"id": 1, "numeric": decimal.Decimal("1.0")}, + {"id": 2, "numeric": decimal.Decimal("2.0")}, + {"id": 3, "numeric": decimal.Decimal("3.0")}, + ] + + batches = list(batcher.get_batches(records)) + assert len(batches) == 2 + assert all(len(batch) == 1 for batch in batches) + assert all( + re.match(r".*tap-test--stream-test-.*\.json.gz", filepath) + for batch in batches + for filepath in batch + )