ome9ax · aroder · Nov 15, 2021 · Feb 10, 2022 · Feb 10, 2022 · Feb 10, 2022
@@ -89,6 +89,7 @@ Full list of options in `config.json`:
 | encryption_type                     | String  |            | (Default: 'none') The type of encryption to use. Current supported options are: 'none' and 'KMS'. |
 | encryption_key                      | String  |            | A reference to the encryption key to use for data encryption. For KMS encryption, this should be the name of the KMS encryption key ID (e.g. '1234abcd-1234-1234-1234-1234abcd1234'). This field is ignored if 'encryption_type' is none or blank. |
 | compression                         | String  |            | The type of compression to apply before uploading. Supported options are `none` (default), `gzip`, and `lzma`. For gzipped files, the file extension will automatically be changed to `.json.gz` for all files. For `lzma` compression, the file extension will automatically be changed to `.json.xz` for all files. |
+| date_format                         | String  |            | (Default: "%Y%m%d") Allows customization of the date format used in the `{date}` token in the naming_convention |
 | naming_convention                   | String  |            | (Default: None) Custom naming convention of the s3 key. Replaces tokens `date`, `stream`, and `timestamp` with the appropriate values. <br><br>Supports "folders" in s3 keys e.g. `folder/folder2/{stream}/export_date={date}/{timestamp}.json`. <br><br>Honors the `s3_key_prefix`,  if set, by prepending the "filename". E.g. naming_convention = `folder1/my_file.json` and s3_key_prefix = `prefix_` results in `folder1/prefix_my_file.json` |
 | timezone_offset                     | Integer |            | Use offset `0` hours is you want the `naming_convention` to use `utc` time zone. The `null` values is used by default. |
 | temp_dir                            | String  |            | (Default: platform-dependent) Directory of temporary JSONL files with RECORD messages. |

@@ -4,5 +4,6 @@
     "s3_bucket": "BUCKET",
     "s3_key_prefix": "SOME-PREFIX/",
     "compression": "gzip",
-    "naming_convention": "{stream}-{timestamp}.jsonl"
+    "naming_convention": "{stream}-{timestamp}.jsonl",
+    "date_format": "%Y-%m-%d"
 }
@@ -1,3 +1,6 @@
 jsonschema==3.2.0
 boto3==1.18.22
 backoff==1.11.1
+pytest-cov
+moto[s3]
+adjust-precision-for-schema==0.3.4
@@ -24,6 +24,7 @@ install_requires =
     jsonschema==3.2.0
     boto3==1.18.22
     backoff==1.11.1
+    adjust-precision-for-schema==0.3.4
 include_package_data = True
 
 [options.package_data]

@@ -13,12 +13,13 @@
 
 from jsonschema import Draft4Validator, FormatChecker
 from decimal import Decimal
+from adjust_precision_for_schema import adjust_decimal_precision_for_schema
 
 from target_s3_jsonl import s3
 from target_s3_jsonl.logger import get_logger
 
 LOGGER = get_logger()
-
+DATE_FORMAT_DEFAULT = "%Y%m%d"
 
 def add_metadata_columns_to_schema(schema_message):
     '''Metadata _sdc columns according to the stitch documentation at
@@ -93,7 +94,7 @@ def float_to_decimal(value):
     return value
 
 
-def get_target_key(message, naming_convention=None, timestamp=None, prefix=None, timezone=None):
+def get_target_key(message, naming_convention=None, timestamp=None, prefix=None, timezone=None, date_format=DATE_FORMAT_DEFAULT):
     '''Creates and returns an S3 key for the message'''
     if not naming_convention:
         naming_convention = '{stream}-{timestamp}.json'
@@ -102,7 +103,7 @@ def get_target_key(message, naming_convention=None, timestamp=None, prefix=None,
     key = naming_convention.format(
         stream=message['stream'],
         timestamp=timestamp if timestamp is not None else datetime.datetime.now(timezone).strftime('%Y%m%dT%H%M%S'),
-        date=datetime.datetime.now(timezone).strftime('%Y%m%d'),
+        date=datetime.datetime.now(timezone).strftime(date_format),
         time=datetime.datetime.now(timezone).strftime('%H%M%S'))
 
     # NOTE: Replace dynamic tokens
@@ -143,6 +144,8 @@ def persist_lines(messages, config):
     key_properties = {}
     validators = {}
 
+    date_format = config.get('date_format') or DATE_FORMAT_DEFAULT
+
     naming_convention_default = '{stream}-{timestamp}.json'
     naming_convention = config.get('naming_convention') or naming_convention_default
     open_func = open
@@ -224,6 +227,7 @@ def persist_lines(messages, config):
             if 'stream' not in o:
                 raise Exception("Line is missing required key 'stream': {}".format(message))
             stream = o['stream']
+            adjust_decimal_precision_for_schema(o['schema'])
 
             if config.get('add_metadata_columns'):
                 schemas[stream] = add_metadata_columns_to_schema(o)
@@ -245,7 +249,8 @@ def persist_lines(messages, config):
                         naming_convention=naming_convention,
                         timestamp=now_formatted,
                         prefix=config.get('s3_key_prefix', ''),
-                        timezone=timezone),
+                        timezone=timezone,
+                        date_format=date_format),
                     'file_name': temp_dir / naming_convention_default.format(stream=stream, timestamp=now_formatted),
                     'file_data': []}
 

@@ -356,7 +356,7 @@ def test_persist_lines(caplog, config, input_data, input_multi_stream_data, inva
     dummy_type = '{"type": "DUMMY", "value": {"currently_syncing": "tap_dummy_test-test_table_one"}}'
     output_state, output_file_metadata = persist_lines([dummy_type] + input_multi_stream_data, config)
 
-    assert caplog.text == 'WARNING  root:__init__.py:255 Unknown message type "{}" in message "{}"'.format(
+    assert caplog.text == 'WARNING  root:__init__.py:260 Unknown message type "{}" in message "{}"'.format(
         json.loads(dummy_type)['type'], dummy_type.replace('"', "'")) + '\n'
 
     with raises(NotImplementedError):