diff --git a/poetry.lock b/poetry.lock index f018406f7..acdbe7b8a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2708,4 +2708,4 @@ testing = ["pytest", "pytest-durations"] [metadata] lock-version = "2.0" python-versions = "<3.12,>=3.7.1" -content-hash = "53c3e577e500c322fffa5a7f3e5fcebe34a2657894d35a9d5768b951320448d0" +content-hash = "1cfb42db582744ae6f7afa862f0dc7554c8827499ff5806525e869879cc75db5" diff --git a/pyproject.toml b/pyproject.toml index 705c832a0..f153405a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,6 +57,7 @@ python-dotenv = ">=0.20,<0.22" typing-extensions = "^4.2.0" simplejson = "^3.17.6" jsonschema = "^4.16.0" +packaging = ">=23.1" pytz = ">=2022.2.1,<2024.0.0" PyYAML = "^6.0" # urllib3 2.0 is not compatible with botocore diff --git a/singer_sdk/batch.py b/singer_sdk/batch.py index f6bc966ab..0cbf11917 100644 --- a/singer_sdk/batch.py +++ b/singer_sdk/batch.py @@ -104,7 +104,8 @@ def get_batches( mode="wb", ) as gz: gz.writelines( - (json.dumps(record) + "\n").encode() for record in chunk + (json.dumps(record, default=str) + "\n").encode() + for record in chunk ) file_url = fs.geturl(filename) yield [file_url] diff --git a/tests/core/test_batch.py b/tests/core/test_batch.py index f818cb2e9..6efb3b34a 100644 --- a/tests/core/test_batch.py +++ b/tests/core/test_batch.py @@ -1,11 +1,15 @@ from __future__ import annotations +import decimal +import re from dataclasses import asdict import pytest +from singer_sdk.batch import JSONLinesBatcher from singer_sdk.helpers._batch import ( BaseBatchFileEncoding, + BatchConfig, JSONLinesEncoding, StorageTarget, ) @@ -95,3 +99,29 @@ def test_storage_from_url(file_url: str, root: str): def test_storage_split_url(file_url: str, expected: tuple): """Test storage target split URL.""" assert StorageTarget.split_url(file_url) == expected + + +def test_json_lines_batcher(): + batcher = JSONLinesBatcher( + "tap-test", + "stream-test", + batch_config=BatchConfig( + encoding=JSONLinesEncoding("gzip"), + storage=StorageTarget("file:///tmp/sdk-batches"), + batch_size=2, + ), + ) + records = [ + {"id": 1, "numeric": decimal.Decimal("1.0")}, + {"id": 2, "numeric": decimal.Decimal("2.0")}, + {"id": 3, "numeric": decimal.Decimal("3.0")}, + ] + + batches = list(batcher.get_batches(records)) + assert len(batches) == 2 + assert all(len(batch) == 1 for batch in batches) + assert all( + re.match(r".*tap-test--stream-test-.*\.json.gz", filepath) + for batch in batches + for filepath in batch + )