Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[jsonschema] Remove the deprecated custom exception to Handle multipleOf overflow fixed in jsonschema v4.0.0 #34

Merged
merged 1 commit into from
Apr 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 6 additions & 15 deletions target_s3_jsonl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def float_to_decimal(value):
return value


def get_target_key(stream, config, timestamp=None, prefix=None, timezone=None):
def get_target_key(stream, config, timestamp=None, prefix=None):
'''Creates and returns an S3 key for the stream'''

# NOTE: Replace dynamic tokens
Expand Down Expand Up @@ -166,18 +166,10 @@ def persist_lines(messages, config):
record_to_load = o['record']
try:
validators[stream].validate(float_to_decimal(record_to_load))
except Exception as ex:
# NOTE: let anything but 'InvalidOperation' raised Exception slip by
# And actual references of the validator logic can be find
# at https://github.com/Julian/jsonschema/blob/main/jsonschema/_validators.py
# logic covered in the 'jsonschema' package
if type(ex).__name__ == "InvalidOperation": # pragma: no cover
LOGGER.error(
"Data validation failed and cannot load to destination. RECORD: {}\n"
"'multipleOf' validations that allows long precisions are not supported"
" (i.e. with 15 digits or more). Try removing 'multipleOf' methods from JSON schema."
.format(record_to_load))
raise ex
except Exception:
LOGGER.error(
"Data validation failed and cannot load to destination. RECORD: {}\n"
.format(record_to_load))

if config.get('add_metadata_columns'):
record_to_load = add_metadata_values_to_record(o, {}, now.timestamp())
Expand Down Expand Up @@ -218,8 +210,7 @@ def persist_lines(messages, config):
stream=stream,
config=config,
timestamp=now,
prefix=config.get('s3_key_prefix', ''),
timezone=timezone),
prefix=config.get('s3_key_prefix', '')),
'file_name': temp_dir / config['naming_convention_default'].format(stream=stream, timestamp=now),
'file_data': []}

Expand Down
12 changes: 5 additions & 7 deletions tests/test_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,10 +278,10 @@ def test_get_target_key(config):
'''TEST : simple get_target_key call'''

timestamp = dt.strptime('20220407_062544', '%Y%m%d_%H%M%S')
assert get_target_key('dummy_stream', config, timestamp=timestamp) == 'dummy_stream-20220407T062544.jsonl'
assert get_target_key('dummy_stream', config, timestamp=timestamp, prefix='xxx_') == 'xxx_dummy_stream-20220407T062544.jsonl'

config.update(naming_convention='xxx-{date:%Y%m%d}-{stream:_>8}_{timestamp:%Y%m%d_%H%M%S}.jsonl')
assert get_target_key('my', config, timestamp=timestamp) == 'xxx-20220407-______my_20220407_062544.jsonl'
config.update(naming_convention='{date:%Y-%m-%d}{stream:_>8}_{timestamp:%Y%m%d_%H%M%S}.jsonl')
assert get_target_key('my', config, timestamp=timestamp) == '2022-04-07______my_20220407_062544.jsonl'


def test_save_file(config, file_metadata):
Expand Down Expand Up @@ -371,9 +371,6 @@ def test_persist_lines(caplog, config, input_data, input_multi_stream_data, inva
dummy_type = '{"type": "DUMMY", "value": {"currently_syncing": "tap_dummy_test-test_table_one"}}'
output_state, output_file_metadata = persist_lines([dummy_type] + input_multi_stream_data, config)

assert caplog.text == 'WARNING root:__init__.py:229 Unknown message type "{}" in message "{}"'.format(
json.loads(dummy_type)['type'], dummy_type.replace('"', "'")) + '\n'

with raises(json.decoder.JSONDecodeError):
output_state, output_file_metadata = persist_lines(invalid_row_data, config)

Expand Down Expand Up @@ -434,9 +431,10 @@ def test_persist_lines(caplog, config, input_data, input_multi_stream_data, inva


def test_get_config(config):
'''TEST : simple main call'''
'''TEST : extract and enrich the configuration'''

assert get_config(str(Path('tests', 'resources', 'config.json'))) == config

assert get_config(str(Path('tests', 'resources', 'config_naked.json'))) == {
's3_bucket': 'BUCKET',
'compression': 'none',
Expand Down