Skip to content
This repository has been archived by the owner on Sep 23, 2024. It is now read-only.

Update stream metadata for all fields #102

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 21 additions & 21 deletions tap_postgres/stream_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,27 +72,27 @@ def refresh_streams_schema(conn_config: Dict, streams: List[Dict]):
for stream in discover_db(conn, conn_config.get('filter_schemas'), [st['table_name'] for st in streams])
}

LOGGER.debug('New discovery schemas %s', new_discovery)

# For every stream dictionary, update the schema and metadata from the new discovery
for idx, stream in enumerate(streams):
# update schema
streams[idx]['schema'] = copy.deepcopy(new_discovery[stream['tap_stream_id']]['schema'])

# Update metadata
#
# 1st step: new discovery doesn't contain non-discoverable metadata: e.g replication method & key, selected
# so let's copy those from the original stream object
md_map = metadata.to_map(stream['metadata'])
meta = md_map.get(())

for idx_met, metadatum in enumerate(new_discovery[stream['tap_stream_id']]['metadata']):
if not metadatum['breadcrumb']:
meta.update(new_discovery[stream['tap_stream_id']]['metadata'][idx_met]['metadata'])
new_discovery[stream['tap_stream_id']]['metadata'][idx_met]['metadata'] = meta

# 2nd step: now copy all the metadata from the updated new discovery to the original stream
streams[idx]['metadata'] = copy.deepcopy(new_discovery[stream['tap_stream_id']]['metadata'])
LOGGER.debug('New discovery schemas %s', new_discovery)

# For every stream, update the schema and metadata from the new discovery
for idx, stream in enumerate(streams):
# Update schema
streams[idx]['schema'] = copy.deepcopy(new_discovery[stream['tap_stream_id']]['schema'])

# Create updated metadata
original_stream_metadata_map = metadata.to_map(stream['metadata'])
new_discovery_metadata_map = metadata.to_map(new_discovery[stream['tap_stream_id']]['metadata'])

for metadata_element_key in new_discovery_metadata_map:
if metadata_element_key in original_stream_metadata_map:
original_stream_metadata_map[metadata_element_key].update(new_discovery_metadata_map[metadata_element_key])
else:
original_stream_metadata_map[metadata_element_key] = new_discovery_metadata_map[metadata_element_key]

updated_original_metadata_list = metadata.to_list(original_stream_metadata_map)

# Copy the updated metadata back into the original data structure that was passed in
streams[idx]['metadata'] = copy.deepcopy(updated_original_metadata_list)

LOGGER.debug('Updated streams schemas %s', streams)

Expand Down
2 changes: 1 addition & 1 deletion tests/test_full_table_interruption.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def test_catalog(self):

self.assertTrue(blew_up_on_cow)

self.assertEqual(7, len(CAUGHT_MESSAGES))
self.assertEqual(7, len(CAUGHT_MESSAGES), "Number of Caught Messages")

self.assertEqual(CAUGHT_MESSAGES[0]['type'], 'SCHEMA')
self.assertIsInstance(CAUGHT_MESSAGES[1], singer.StateMessage)
Expand Down
10 changes: 9 additions & 1 deletion tests/test_streams_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,14 @@ def test_refresh_streams_schema(self):
'table-key-properties': ['some_id'],
'row-count': 1000,
}
},
{
'breadcrumb': ['properties', 'char_name'],
'metadata': {
'arbitrary_field_metadata': 'should be preserved'
}
}

]
}
]
Expand Down Expand Up @@ -86,7 +93,8 @@ def test_refresh_streams_schema(self):
'selected-by-default': True},
('properties', 'char_name'): {'selected-by-default': True,
'inclusion': 'available',
'sql-datatype': 'character'}})
'sql-datatype': 'character',
'arbitrary_field_metadata': 'should be preserved'}})

self.assertEqual({'properties': {'id': {'type': ['integer'],
'maximum': 2147483647,
Expand Down