Skip to content

Commit

Permalink
Sync record from start date for streams - campaigns, ad groups, ads (#22
Browse files Browse the repository at this point in the history
)

* Sync record from start date for streams - campaigns, ad groups, ads

* fix start date test

* remove the query filter and do pseudo increment for streams

* fix the unit test cases

* remove unused code

* setup and changelog update

---------

Co-authored-by: RushiT0122 <[email protected]>
  • Loading branch information
sgandhi1311 and RushiT0122 authored Jun 22, 2023
1 parent 5cc07f8 commit 003dc44
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 13 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

## 0.4.0
* Sync records from the start date specified in the config. [#22] (https://github.com/singer-io/tap-tiktok-ads/pull/22)

## 0.3.0
* Add backoff logic for error codes - 40200, 40201, 40202, 40700, 50002 [#21] (https://github.com/singer-io/tap-tiktok-ads/pull/21)

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

setup(
name="tap-tiktok-ads",
version="0.3.0",
version="0.4.0",
description="Singer.io tap for extracting data",
author="Stitch",
url="http://singer.io",
Expand Down
8 changes: 4 additions & 4 deletions tap_tiktok_ads/streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,19 +189,19 @@ def transform_advertisers_records(records, bookmark_value):
return transformed_records


def get_bookmark_value(stream_name, bookmark_data, advertiser_id):
def get_bookmark_value(stream_name, bookmark_data, advertiser_id, start_date):
'''
Returns bookmark value for any stream based on stream category(normal or stream with advertiser_id). Return None in
case of `advertisers` stream if bookmark is not present. For other streams return bookmark for each advertiser_id
'''
if stream_name in ENDPOINT_ADVERTISERS:
if bookmark_data:
return bookmark_data
return None
return start_date
elif (stream_name in ENDPOINT_INSIGHTS or stream_name in ENDPOINT_AD_MANAGEMENT) and advertiser_id in bookmark_data:
return bookmark_data[advertiser_id]
else:
return None
return start_date


class Stream():
Expand Down Expand Up @@ -268,7 +268,7 @@ def process_batch(self, stream, records, advertiser_id):
"""
bookmark_column = self.replication_keys[0] # pylint: disable=unsubscriptable-object
bookmark_data = self.get_bookmark(stream.tap_stream_id)
bookmark_value = get_bookmark_value(stream.tap_stream_id, bookmark_data, advertiser_id)
bookmark_value = get_bookmark_value(stream.tap_stream_id, bookmark_data, advertiser_id, self.config['start_date'])
transformed_records = pre_transform(stream.tap_stream_id, records, bookmark_value)
sorted_records = sorted(transformed_records, key=lambda x: x[bookmark_column])
for record in sorted_records:
Expand Down
7 changes: 3 additions & 4 deletions tests/test_start_date.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ def test_run(self):
- Verify by primary key values, that the 2nd sync and 1st sync replicated the same records.
"""

self.first_start_date = "2020-12-01T00:00:00Z"
self.second_start_date = "2020-12-20T00:00:00Z"
self.first_start_date = "2022-04-18T00:00:00Z"
self.second_start_date = "2022-04-21T00:00:00Z"
start_date_1_epoch = self.dt_to_ts(self.first_start_date)
start_date_2_epoch = self.dt_to_ts(self.second_start_date)

Expand Down Expand Up @@ -109,10 +109,9 @@ def test_run(self):
start_date_key_value_parsed = parse(start_date_key_value).strftime("%Y-%m-%dT%H:%M:%SZ")
self.assertGreaterEqual(self.dt_to_ts(start_date_key_value_parsed), start_date_2_epoch)

# ticket - https://jira.talendforge.org/browse/TDL-23225
# Verify the number of records replicated in sync 1 is greater than the number
# of records replicated in sync 2 for stream
# self.assertGreater(record_count_sync_1, record_count_sync_2)
self.assertGreater(record_count_sync_1, record_count_sync_2)

# Verify the records replicated in sync 2 were also replicated in sync 1
self.assertTrue(primary_keys_sync_2.issubset(primary_keys_sync_1))
Expand Down
8 changes: 4 additions & 4 deletions tests/unittests/test_bookmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,18 +43,18 @@ def test_get_bookmark(self, test_transform_advertisers_records, test_get_bk_valu
stream = MockCatalog('advertisers', 'advertisers', ['create_time'])
advertisers.process_batch(stream, [{'create_time': 1642114853}], 'test_acc_id')
# Verify that the get_bookmark_value() is called with {} indicating empty state which is returned from the get_bookmark() function.
test_get_bk_value.assert_called_with('advertisers', {}, 'test_acc_id')
test_get_bk_value.assert_called_with('advertisers', {}, 'test_acc_id', 'test_start_date')

@mock.patch('tap_tiktok_ads.streams.pre_transform')
@mock.patch('tap_tiktok_ads.streams.transform_advertisers_records')
def test_get_bookmark_value(self, test_transform_advertisers_records, test_pre_transform):
'''
Verify that the get_bookmark_value() function returns None when state for the Advertisers stream is not passed.
Verify that the get_bookmark_value() function returns start_date when state for the Advertisers stream is not passed.
'''
config = {"start_date": "test_start_date", "user_agent": "test_user_agent", "access_token": "test_at", "accounts": ['test_acc_id']}
state = {"bookmarks": {"campaigns": {"7052829480590606338": "2022-02-10T12:12:52.000000Z"}}}
advertisers = Advertisers(MockClient(), config, state)
stream = MockCatalog('advertisers', 'advertisers', ['create_time'])
advertisers.process_batch(stream, [{'create_time': 1642114853}], 'test_acc_id')
# Verify that the pre_transform() is called with None thus verifying that the get_bookmark_value() returned None.
test_pre_transform.assert_called_with('advertisers', [{'create_time': 1642114853}], None)
# Verify that the pre_transform() is called with 'test_start_date' thus verifying that the get_bookmark_value() returned 'test_start_date'.
test_pre_transform.assert_called_with('advertisers', [{'create_time': 1642114853}], 'test_start_date')

0 comments on commit 003dc44

Please sign in to comment.