-
Notifications
You must be signed in to change notification settings - Fork 6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
configurable date format and handle decimal InvalidOperation #20
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,6 @@ | ||
jsonschema==3.2.0 | ||
boto3==1.18.22 | ||
backoff==1.11.1 | ||
pytest-cov | ||
moto[s3] | ||
adjust-precision-for-schema==0.3.4 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,12 +13,13 @@ | |
|
||
from jsonschema import Draft4Validator, FormatChecker | ||
from decimal import Decimal | ||
from adjust_precision_for_schema import adjust_decimal_precision_for_schema | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That a great one. Thanks for adding it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reading the Handle multipleOf overflow fix introduced in jsonschema v4.0.0, I wonder if that update doesn't make |
||
|
||
from target_s3_jsonl import s3 | ||
from target_s3_jsonl.logger import get_logger | ||
|
||
LOGGER = get_logger() | ||
|
||
DATE_FORMAT_DEFAULT = "%Y%m%d" | ||
|
||
def add_metadata_columns_to_schema(schema_message): | ||
'''Metadata _sdc columns according to the stitch documentation at | ||
|
@@ -93,7 +94,7 @@ def float_to_decimal(value): | |
return value | ||
|
||
|
||
def get_target_key(message, naming_convention=None, timestamp=None, prefix=None, timezone=None): | ||
def get_target_key(message, naming_convention=None, timestamp=None, prefix=None, timezone=None, date_format=DATE_FORMAT_DEFAULT): | ||
'''Creates and returns an S3 key for the message''' | ||
if not naming_convention: | ||
naming_convention = '{stream}-{timestamp}.json' | ||
|
@@ -102,7 +103,7 @@ def get_target_key(message, naming_convention=None, timestamp=None, prefix=None, | |
key = naming_convention.format( | ||
stream=message['stream'], | ||
timestamp=timestamp if timestamp is not None else datetime.datetime.now(timezone).strftime('%Y%m%dT%H%M%S'), | ||
date=datetime.datetime.now(timezone).strftime('%Y%m%d'), | ||
date=datetime.datetime.now(timezone).strftime(date_format), | ||
time=datetime.datetime.now(timezone).strftime('%H%M%S')) | ||
|
||
# NOTE: Replace dynamic tokens | ||
|
@@ -143,6 +144,8 @@ def persist_lines(messages, config): | |
key_properties = {} | ||
validators = {} | ||
|
||
date_format = config.get('date_format') or DATE_FORMAT_DEFAULT | ||
|
||
naming_convention_default = '{stream}-{timestamp}.json' | ||
naming_convention = config.get('naming_convention') or naming_convention_default | ||
open_func = open | ||
|
@@ -224,6 +227,7 @@ def persist_lines(messages, config): | |
if 'stream' not in o: | ||
raise Exception("Line is missing required key 'stream': {}".format(message)) | ||
stream = o['stream'] | ||
adjust_decimal_precision_for_schema(o['schema']) | ||
|
||
if config.get('add_metadata_columns'): | ||
schemas[stream] = add_metadata_columns_to_schema(o) | ||
|
@@ -245,7 +249,8 @@ def persist_lines(messages, config): | |
naming_convention=naming_convention, | ||
timestamp=now_formatted, | ||
prefix=config.get('s3_key_prefix', ''), | ||
timezone=timezone), | ||
timezone=timezone, | ||
date_format=date_format), | ||
'file_name': temp_dir / naming_convention_default.format(stream=stream, timestamp=now_formatted), | ||
'file_data': []} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Those are already in the
[options.extras_require]
section L34-35. I keep them apart as they are not needed to run the package, only to run the tests.My rational is to keep the target lean if possible.
So I don't know how much of a best practice it is but I used
To install what's needed on
local
. It was my way of following the KISS principle here.