From 504113dc2c2694c078f5a746a72bc887dbf180d4 Mon Sep 17 00:00:00 2001 From: savan-chovatiya Date: Thu, 16 Sep 2021 12:05:02 +0530 Subject: [PATCH 1/7] TDL-15317: Updated primary key for feature-events --- tap_pendo/streams.py | 6 +- tests/base.py | 391 ++++++++++++++++++++++++++++++++++++++++ tests/test_discovery.py | 137 ++++++++++++++ 3 files changed, 533 insertions(+), 1 deletion(-) create mode 100644 tests/base.py create mode 100644 tests/test_discovery.py diff --git a/tap_pendo/streams.py b/tap_pendo/streams.py index fea0a73..2d6909e 100644 --- a/tap_pendo/streams.py +++ b/tap_pendo/streams.py @@ -591,7 +591,11 @@ def get_body(self): class FeatureEvents(EventsBase): name = "feature_events" replication_method = "INCREMENTAL" - key_properties = ['visitor_id', 'account_id', 'server', 'remote_ip'] + key_properties = ['feature_id', 'visitor_id', 'account_id', 'server', 'remote_ip', 'user_agent'] + + def __init__(self, config): + super().__init__(config=config) + self.key_properties.append("day" if self.period == 'dayRange' else "hour") def get_body(self, key_id, period, first): return { diff --git a/tests/base.py b/tests/base.py new file mode 100644 index 0000000..11b73eb --- /dev/null +++ b/tests/base.py @@ -0,0 +1,391 @@ +import os +import unittest +from datetime import datetime as dt +from datetime import timedelta + +import dateutil.parser +import pytz + +import tap_tester.connections as connections +import tap_tester.runner as runner +from tap_tester import menagerie + + +class TestPendoBase(unittest.TestCase): + + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + START_DATE_FORMAT = "%Y-%m-%dT00:00:00Z" + BOOKMARK_COMPARISON_FORMAT = "%Y-%m-%dT%H:%M%S%z" + start_date = "" + is_day_range = True + + @staticmethod + def name(): + return "test_sync" + + @staticmethod + def tap_name(): + """The name of the tap""" + return "tap-pendo" + + @staticmethod + def get_type(): + """the expected url route ending""" + return "platform.pendo" + + def expected_metadata(self): + """The expected streams and metadata about the streams""" + return { + "accounts": { + self.PRIMARY_KEYS: {'account_id'}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {'lastupdated'} + }, + "features": { + self.PRIMARY_KEYS: {'id'}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {'last_updated_at'} + }, + "guides": { + self.PRIMARY_KEYS: {'id'}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {'last_updated_at'} + }, + "pages": { + self.PRIMARY_KEYS: {'id'}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {'last_updated_at'} + }, + # Add back when visitor_history stream causing this test to take + # 4+ hours is solved, tracked in this JIRA: + # https://stitchdata.atlassian.net/browse/SRCE-4755 + # "visitor_history": { + # self.PRIMARY_KEYS: {'visitor_id'}, + # self.REPLICATION_METHOD: self.INCREMENTAL, + # self.REPLICATION_KEYS: {'modified_ts'} + # }, + + "visitors": { + self.PRIMARY_KEYS: {'visitor_id'}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {'lastupdated'} + }, + "track_types": { + self.PRIMARY_KEYS: {'id'}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {'last_updated_at'} + }, + "feature_events":{ + self.PRIMARY_KEYS: {"feature_id", "visitor_id", "account_id", "server", "remote_ip", "user_agent", "day"} + if self.is_day_range else + {"feature_id", "visitor_id", "account_id", "server", "remote_ip", "user_agent", "hour"}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {'day'} if self.is_day_range else {'hour'} + }, + "events": { + self.PRIMARY_KEYS: {"visitor_id", "account_id", "server", "remote_ip"}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {'day'} if self.is_day_range else {'hour'} + }, + "page_events": { + self.PRIMARY_KEYS: {"visitor_id", "account_id", "server", "remote_ip"}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {'day'} if self.is_day_range else {'hour'} + }, + "guide_events": { + self.PRIMARY_KEYS: {"visitor_id", "account_id", "server_name", "remote_ip"}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {'browser_time'} + }, + "poll_events":{ + self.PRIMARY_KEYS: {"visitor_id", "account_id", "server_name", "remote_ip"}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {'browser_time'} + }, + "track_events": { + self.PRIMARY_KEYS: {"visitor_id", "account_id", "server", "remote_ip"}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {'day'} if self.is_day_range else {'hour'} + }, + "metadata_accounts": { + self.REPLICATION_METHOD: self.FULL_TABLE, + }, + "metadata_visitors": { + self.REPLICATION_METHOD: self.FULL_TABLE, + }, + } + + def setUp(self): + missing_envs = [x for x in [ + "TAP_PENDO_INTEGRATION_KEY", + ] if os.getenv(x) is None] + + if missing_envs: + raise Exception("Missing environment variables: {}".format(missing_envs)) + + @staticmethod + def get_credentials(): + """Authentication information for the test account""" + return { + "x_pendo_integration_key": os.getenv("TAP_PENDO_INTEGRATION_KEY") + } + + def get_properties(self, original: bool = True): + """Configuration properties required for the tap.""" + return_value = { + "start_date": "2020-09-10T00:00:00Z", + "lookback_window": "1", + "period": "dayRange" if self.is_day_range else "hourRange", + } + if original: + return return_value + + return_value["start_date"] = self.start_date + return return_value + + + def expected_streams(self): + """A set of expected stream names""" + + return set(self.expected_metadata().keys()) + + def expected_pks(self): + """return a dictionary with key of table name and value as a set of primary key fields""" + return {table: properties.get(self.PRIMARY_KEYS, set()) + for table, properties + in self.expected_metadata().items()} + + def expected_replication_keys(self): + """return a dictionary with key of table name and value as a set of replication key fields""" + return {table: properties.get(self.REPLICATION_KEYS, set()) + for table, properties + in self.expected_metadata().items()} + + def expected_replication_method(self): + """return a dictionary with key of table name nd value of replication method""" + return {table: properties.get(self.REPLICATION_METHOD, None) + for table, properties + in self.expected_metadata().items()} + + def expected_automatic_fields(self): + """return a dictionary with key of table name and value as a set of automatic key fields""" + auto_fields = {} + for k, v in self.expected_metadata().items(): + + auto_fields[k] = v.get(self.PRIMARY_KEYS, set()) | v.get(self.REPLICATION_KEYS, set()) \ + | v.get(self.FOREIGN_KEYS, set()) + return auto_fields + + + ######################### + # Helper Methods # + ######################### + + def run_and_verify_check_mode(self, conn_id): + """ + Run the tap in check mode and verify it succeeds. + This should be ran prior to field selection and initial sync. + Return the connection id and found catalogs from menagerie. + """ + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len( + found_catalogs), 0, msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set( + map(lambda c: c['stream_name'], found_catalogs)) + + subset = self.expected_streams().issubset(found_catalog_names) + self.assertTrue( + subset, msg="Expected check streams are not subset of discovered catalog") + print("discovered schemas are OK") + + return found_catalogs + + def run_and_verify_sync(self, conn_id): + """ + Run a sync job and make sure it exited properly. + Return a dictionary with keys of streams synced + and values of records synced for each stream + """ + + # Run a sync job using orchestrator + sync_job_name = runner.run_sync_mode(self, conn_id) + + # Verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # Verify actual rows were synced + sync_record_count = runner.examine_target_output_file( + self, conn_id, self.expected_streams(), self.expected_pks()) + self.assertGreater( + sum(sync_record_count.values()), 0, + msg="failed to replicate any data: {}".format(sync_record_count) + ) + print("total replicated row count: {}".format( + sum(sync_record_count.values()))) + + return sync_record_count + + def perform_and_verify_table_and_field_selection(self, conn_id, test_catalogs, select_all_fields=True): + """ + Perform table and field selection based off of the streams to select + set and field selection parameters. + Verify this results in the expected streams selected and all or no + fields selected for those streams. + """ + + # Select all available fields or select no fields from all testable streams + self.select_all_streams_and_fields( + conn_id, test_catalogs, select_all_fields) + + catalogs = menagerie.get_catalogs(conn_id) + + # Ensure our selection affects the catalog + expected_selected = [tc.get('stream_name') for tc in test_catalogs] + + for cat in catalogs: + catalog_entry = menagerie.get_annotated_schema( + conn_id, cat['stream_id']) + + # Verify all testable streams are selected + selected = catalog_entry.get('annotated-schema').get('selected') + print("Validating selection on {}: {}".format( + cat['stream_name'], selected)) + if cat['stream_name'] not in expected_selected: + self.assertFalse( + selected, msg="Stream selected, but not testable.") + continue # Skip remaining assertions if we aren't selecting this stream + self.assertTrue(selected, msg="Stream not selected.") + + if select_all_fields: + # Verify all fields within each selected stream are selected + for field, field_props in catalog_entry.get('annotated-schema').get('properties').items(): + field_selected = field_props.get('selected') + print("\tValidating selection on {}.{}: {}".format( + cat['stream_name'], field, field_selected)) + self.assertTrue(field_selected, msg="Field not selected.") + else: + # Verify only automatic fields are selected + expected_automatic_fields = self.expected_automatic_fields().get( + cat['stream_name']) + selected_fields = self.get_selected_fields_from_metadata( + catalog_entry['metadata']) + self.assertEqual(expected_automatic_fields, selected_fields) + + def get_selected_fields_from_metadata(self, metadata): + selected_fields = set() + for field in metadata: + is_field_metadata = len(field['breadcrumb']) > 1 + + inclusion_automatic_or_selected = ( + field['metadata'].get('selected') is True or + field['metadata'].get('inclusion') == 'automatic' + ) + if is_field_metadata and inclusion_automatic_or_selected: + selected_fields.add(field['breadcrumb'][1]) + return selected_fields + + def select_all_streams_and_fields(self, conn_id, catalogs, select_all_fields: bool = True): + """Select all streams and all fields within streams""" + for catalog in catalogs: + schema = menagerie.get_annotated_schema( + conn_id, catalog['stream_id']) + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, [], non_selected_properties) + + def calculated_states_by_stream(self, current_state): + timedelta_by_stream = {stream: [0,0,0,5] # {stream_name: [days, hours, minutes, seconds], ...} + for stream in self.expected_streams()} + + stream_to_calculated_state = {stream: "" for stream in current_state['bookmarks'].keys()} + for stream, state in current_state['bookmarks'].items(): + state_key, state_value = next(iter(state.keys())), next(iter(state.values())) + state_as_datetime = dateutil.parser.parse(state_value) + + days, hours, minutes, seconds = timedelta_by_stream[stream] + calculated_state_as_datetime = state_as_datetime - timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds) + + state_format = '%Y-%m-%dT%H:%M:%S-00:00' + calculated_state_formatted = dt.strftime(calculated_state_as_datetime, state_format) + + stream_to_calculated_state[stream] = {state_key: calculated_state_formatted} + + return stream_to_calculated_state + + def parse_date(self, date_value): + """ + Pass in string-formatted-datetime, parse the value, and return it as an unformatted datetime object. + """ + date_formats = { + "%Y-%m-%dT%H:%M:%S.%fZ", + "%Y-%m-%dT%H:%M:%SZ", + "%Y-%m-%dT%H:%M:%S.%f+00:00", + "%Y-%m-%dT%H:%M:%S+00:00", + "%Y-%m-%d" + } + for date_format in date_formats: + try: + date_stripped = dt.strptime(date_value, date_format) + return date_stripped + except ValueError: + continue + + raise NotImplementedError( + "Tests do not account for dates of this format: {}".format(date_value)) + + ########################################################################## + # Tap Specific Methods + ########################################################################## + + def convert_state_to_utc(self, date_str): + """ + Convert a saved bookmark value of the form '2020-08-25T13:17:36-07:00' to + a string formatted utc datetime, + in order to compare aginast json formatted datetime values + """ + date_object = dateutil.parser.parse(date_str) + date_object_utc = date_object.astimezone(tz=pytz.UTC) + return dt.strftime(date_object_utc, "%Y-%m-%dT%H:%M:%SZ") + + def timedelta_formatted(self, dtime, days=0): + try: + date_stripped = dt.strptime(dtime, "%Y-%m-%dT%H:%M:%SZ") + return_date = date_stripped + timedelta(days=days) + + return dt.strftime(return_date, "%Y-%m-%dT%H:%M:%SZ") + + except ValueError: + try: + date_stripped = dt.strptime(dtime, self.BOOKMARK_COMPARISON_FORMAT) + return_date = date_stripped + timedelta(days=days) + + return dt.strftime(return_date, self.BOOKMARK_COMPARISON_FORMAT) + + except ValueError: + return Exception("Datetime object is not of the format: {}".format(self.START_DATE_FORMAT)) + + def is_incremental(self, stream): + return self.expected_metadata().get(stream).get(self.REPLICATION_METHOD) == self.INCREMENTAL + + def is_event(self, stream): + return stream.endswith('events') \ No newline at end of file diff --git a/tests/test_discovery.py b/tests/test_discovery.py new file mode 100644 index 0000000..299d0c0 --- /dev/null +++ b/tests/test_discovery.py @@ -0,0 +1,137 @@ +import re + +import tap_tester.connections as connections +from base import TestPendoBase +from tap_tester import menagerie + +class PendoDiscoverTest(TestPendoBase): + """ + Testing that discovery creates the appropriate catalog with valid metadata. + • Verify number of actual streams discovered match expected + • Verify the stream names discovered were what we expect + • Verify stream names follow naming convention + streams should only have lowercase alphas and underscores + • verify there is only 1 top level breadcrumb + • verify replication key(s) + • verify primary key(s) + • verify that if there is a replication key we are doing INCREMENTAL otherwise FULL + • verify the actual replication matches our expected replication method + • verify that primary, replication keys are given the inclusion of automatic. + • verify that all other fields have inclusion of available metadata. + """ + + def name(self): + return "pendo_discover_test" + + def discovery_test_run(self): + streams_to_test = self.expected_streams() + + conn_id = connections.ensure_connection(self, payload_hook=None) + + # Verify that there are catalogs found + found_catalogs = self.run_and_verify_check_mode( + conn_id) + + # Verify stream names follow naming convention + # streams should only have lowercase alphas and underscores + found_catalog_names = {c['tap_stream_id'] for c in found_catalogs} + self.assertTrue(all([re.fullmatch(r"[a-z_]+", name) for name in found_catalog_names]), + msg="One or more streams don't follow standard naming") + + for stream in streams_to_test: + with self.subTest(stream=stream): + + # Verify ensure the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertIsNotNone(catalog) + + # collecting expected values + expected_primary_keys = self.expected_pks()[stream] + expected_replication_keys = self.expected_replication_keys()[ + stream] + expected_automatic_fields = self.expected_automatic_fields().get(stream) + expected_replication_method = self.expected_replication_method()[ + stream] + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema( + conn_id, catalog['stream_id']) + metadata = schema_and_metadata["metadata"] + stream_properties = [ + item for item in metadata if item.get("breadcrumb") == []] + actual_primary_keys = set( + stream_properties[0].get( + "metadata", {self.PRIMARY_KEYS: []}).get(self.PRIMARY_KEYS, []) + ) + actual_replication_keys = set( + stream_properties[0].get( + "metadata", {self.REPLICATION_KEYS: []}).get(self.REPLICATION_KEYS, []) + ) + actual_replication_method = stream_properties[0].get( + "metadata", {self.REPLICATION_METHOD: None}).get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + + ########################################################################## + # metadata assertions + ########################################################################## + + # verify there is only 1 top level breadcrumb in metadata + self.assertTrue(len(stream_properties) == 1, + msg="There is NOT only one top level breadcrumb for {}".format(stream) + + "\nstream_properties | {}".format(stream_properties)) + + # verify that if there is a replication key we are doing INCREMENTAL otherwise FULL + if actual_replication_keys: + self.assertTrue(actual_replication_method == self.INCREMENTAL, + msg="Expected INCREMENTAL replication " + "since there is a replication key") + else: + self.assertTrue(actual_replication_method == self.FULL_TABLE, + msg="Expected FULL replication " + "since there is no replication key") + + # verify the actual replication matches our expected replication method + self.assertEqual(expected_replication_method, actual_replication_method, + msg="The actual replication method {} doesn't match the expected {}".format( + actual_replication_method, expected_replication_method)) + + print(stream_properties[0].get( + "metadata", {self.REPLICATION_KEYS: []})) + # verify replication key(s) + self.assertEqual(expected_replication_keys, actual_replication_keys, + msg="expected replication key {} but actual is {}".format( + expected_replication_keys, actual_replication_keys)) + + # verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # verify that primary keys and replication keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_automatic_fields, + actual_automatic_fields) + + # verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields}), + msg="Not all non key properties are set to available in metadata") + + def test_run(self): + + #Discovery test for hourRange period + self.is_day_range = False + self.discovery_test_run() + + #Discovery test for dayRange period + self.is_day_range = True + self.discovery_test_run() \ No newline at end of file From 98ef73ce7562109823dd3b43227dba53e45ccf71 Mon Sep 17 00:00:00 2001 From: savan-chovatiya Date: Thu, 16 Sep 2021 14:42:52 +0530 Subject: [PATCH 2/7] Moved integration test --- tests/{ => tap_tester}/base.py | 0 tests/{ => tap_tester}/test_discovery.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/{ => tap_tester}/base.py (100%) rename tests/{ => tap_tester}/test_discovery.py (100%) diff --git a/tests/base.py b/tests/tap_tester/base.py similarity index 100% rename from tests/base.py rename to tests/tap_tester/base.py diff --git a/tests/test_discovery.py b/tests/tap_tester/test_discovery.py similarity index 100% rename from tests/test_discovery.py rename to tests/tap_tester/test_discovery.py From 9d296b44f625fee3f9592e576476ac67a0f24f2e Mon Sep 17 00:00:00 2001 From: savan-chovatiya Date: Wed, 29 Sep 2021 17:49:22 +0530 Subject: [PATCH 3/7] TDL-15317: Updated integration test --- tests/tap_tester/base.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/tests/tap_tester/base.py b/tests/tap_tester/base.py index 11b73eb..05840b2 100644 --- a/tests/tap_tester/base.py +++ b/tests/tap_tester/base.py @@ -81,21 +81,19 @@ def expected_metadata(self): self.REPLICATION_KEYS: {'last_updated_at'} }, "feature_events":{ - self.PRIMARY_KEYS: {"feature_id", "visitor_id", "account_id", "server", "remote_ip", "user_agent", "day"} - if self.is_day_range else - {"feature_id", "visitor_id", "account_id", "server", "remote_ip", "user_agent", "hour"}, + self.PRIMARY_KEYS: {"feature_id", "visitor_id", "account_id", "server", "remote_ip", "user_agent", "day" if self.is_day_range else "hour"}, self.REPLICATION_METHOD: self.INCREMENTAL, - self.REPLICATION_KEYS: {'day'} if self.is_day_range else {'hour'} + self.REPLICATION_KEYS: {'day' if self.is_day_range else 'hour'} }, "events": { self.PRIMARY_KEYS: {"visitor_id", "account_id", "server", "remote_ip"}, self.REPLICATION_METHOD: self.INCREMENTAL, - self.REPLICATION_KEYS: {'day'} if self.is_day_range else {'hour'} + self.REPLICATION_KEYS: {'day' if self.is_day_range else 'hour'} }, "page_events": { self.PRIMARY_KEYS: {"visitor_id", "account_id", "server", "remote_ip"}, self.REPLICATION_METHOD: self.INCREMENTAL, - self.REPLICATION_KEYS: {'day'} if self.is_day_range else {'hour'} + self.REPLICATION_KEYS: {'day' if self.is_day_range else 'hour'} }, "guide_events": { self.PRIMARY_KEYS: {"visitor_id", "account_id", "server_name", "remote_ip"}, @@ -110,7 +108,7 @@ def expected_metadata(self): "track_events": { self.PRIMARY_KEYS: {"visitor_id", "account_id", "server", "remote_ip"}, self.REPLICATION_METHOD: self.INCREMENTAL, - self.REPLICATION_KEYS: {'day'} if self.is_day_range else {'hour'} + self.REPLICATION_KEYS: {'day' if self.is_day_range else 'hour'} }, "metadata_accounts": { self.REPLICATION_METHOD: self.FULL_TABLE, From 8fe015018a354ba6470af90c5b26b0209dfe9b9b Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Mon, 4 Oct 2021 17:04:43 +0530 Subject: [PATCH 4/7] updated readme file --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e50c118..85ff2e2 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,7 @@ This tap: **[feature_events](https://developers.pendo.io/docs/?bash#get-an-account-by-id)** - Endpoint: [https://api/v1/aggregation](https://app.pendo.io/api/v1/aggregation) -- Primary key fields: `visitor_id`, `account_id`, `server`, `remote_ip` +- Primary key fields: `feature_id`, `visitor_id`, `account_id`, `server`, `remote_ip`, `user_agent`, `day` or `hour` - Replication strategy: INCREMENTAL (query filtered) - Bookmark: `day` or `hour` - Transformations: Camel to snake case. From cdd827811a7e76ca91f13e7ee47ebf9747aa67f3 Mon Sep 17 00:00:00 2001 From: savan-chovatiya Date: Fri, 8 Oct 2021 14:13:52 +0530 Subject: [PATCH 5/7] Resolved internal PR review comments --- .circleci/config.yml | 7 ++++- setup.py | 7 +++-- tests/tap_tester/base.py | 11 +++---- .../test_feature_events_primary_keys.py | 30 +++++++++++++++++++ 4 files changed, 47 insertions(+), 8 deletions(-) create mode 100644 tests/unittests/test_feature_events_primary_keys.py diff --git a/.circleci/config.yml b/.circleci/config.yml index b7e033f..f24bcca 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -11,7 +11,7 @@ jobs: python3 -mvenv /usr/local/share/virtualenvs/tap-pendo source /usr/local/share/virtualenvs/tap-pendo/bin/activate pip install -U pip setuptools - pip install .[dev] + pip install .[test] - run: name: 'JSON Validator' command: | @@ -23,6 +23,11 @@ jobs: source /usr/local/share/virtualenvs/tap-pendo/bin/activate # TODO: Adjust the pylint disables pylint tap_pendo --disable 'broad-except,chained-comparison,empty-docstring,fixme,invalid-name,line-too-long,missing-class-docstring,missing-function-docstring,missing-module-docstring,no-else-raise,no-else-return,too-few-public-methods,too-many-arguments,too-many-branches,too-many-lines,too-many-locals,ungrouped-imports,wrong-spelling-in-comment,wrong-spelling-in-docstring,bad-whitespace,missing-class-docstring' + - run: + name: 'Unit Tests' + command: | + source /usr/local/share/virtualenvs/tap-pendo/bin/activate + nosetests tests/unittests - add_ssh_keys - run: name: 'Integration Tests' diff --git a/setup.py b/setup.py index dcf47d4..9aa83c5 100755 --- a/setup.py +++ b/setup.py @@ -17,9 +17,12 @@ 'ijson==3.1.4', ], extras_require={ - 'dev': [ - 'ipdb==0.11', + 'test': [ 'pylint==2.5.3', + 'nose' + ], + 'dev': [ + 'ipdb==0.11' ] }, entry_points=""" diff --git a/tests/tap_tester/base.py b/tests/tap_tester/base.py index 05840b2..bbd1797 100644 --- a/tests/tap_tester/base.py +++ b/tests/tap_tester/base.py @@ -40,6 +40,7 @@ def get_type(): def expected_metadata(self): """The expected streams and metadata about the streams""" + event_replication_key = 'day' if self.is_day_range else 'hour' return { "accounts": { self.PRIMARY_KEYS: {'account_id'}, @@ -81,19 +82,19 @@ def expected_metadata(self): self.REPLICATION_KEYS: {'last_updated_at'} }, "feature_events":{ - self.PRIMARY_KEYS: {"feature_id", "visitor_id", "account_id", "server", "remote_ip", "user_agent", "day" if self.is_day_range else "hour"}, + self.PRIMARY_KEYS: {"feature_id", "visitor_id", "account_id", "server", "remote_ip", "user_agent", event_replication_key}, self.REPLICATION_METHOD: self.INCREMENTAL, - self.REPLICATION_KEYS: {'day' if self.is_day_range else 'hour'} + self.REPLICATION_KEYS: {event_replication_key} }, "events": { self.PRIMARY_KEYS: {"visitor_id", "account_id", "server", "remote_ip"}, self.REPLICATION_METHOD: self.INCREMENTAL, - self.REPLICATION_KEYS: {'day' if self.is_day_range else 'hour'} + self.REPLICATION_KEYS: {event_replication_key} }, "page_events": { self.PRIMARY_KEYS: {"visitor_id", "account_id", "server", "remote_ip"}, self.REPLICATION_METHOD: self.INCREMENTAL, - self.REPLICATION_KEYS: {'day' if self.is_day_range else 'hour'} + self.REPLICATION_KEYS: {event_replication_key} }, "guide_events": { self.PRIMARY_KEYS: {"visitor_id", "account_id", "server_name", "remote_ip"}, @@ -108,7 +109,7 @@ def expected_metadata(self): "track_events": { self.PRIMARY_KEYS: {"visitor_id", "account_id", "server", "remote_ip"}, self.REPLICATION_METHOD: self.INCREMENTAL, - self.REPLICATION_KEYS: {'day' if self.is_day_range else 'hour'} + self.REPLICATION_KEYS: {event_replication_key} }, "metadata_accounts": { self.REPLICATION_METHOD: self.FULL_TABLE, diff --git a/tests/unittests/test_feature_events_primary_keys.py b/tests/unittests/test_feature_events_primary_keys.py new file mode 100644 index 0000000..3c74a5b --- /dev/null +++ b/tests/unittests/test_feature_events_primary_keys.py @@ -0,0 +1,30 @@ +import unittest +from tap_pendo.streams import FeatureEvents + +class TestFeatureEventsPrimaryKeys(unittest.TestCase): + + def test_feature_event_primary_key_with_hourRange(self): + ''' + Verify that primary keys should have expected fields with 'hour' field when period is hourRange + ''' + config = {"period": "hourRange"} # set hourRange as a period + expected_primary_keys = ["feature_id", "visitor_id", "account_id", "server", "remote_ip", "user_agent", "hour"] + feature_event_stream1 = FeatureEvents(config) # Initialize FeatuereEvents object which sets primary keys + + self.assertEqual(feature_event_stream1.key_properties, expected_primary_keys) + + # Reset key properties for other test as it's class variable + FeatureEvents.key_properties = ['feature_id', 'visitor_id', 'account_id', 'server', 'remote_ip', 'user_agent'] + + def test_feature_event_primary_key_with_dayRange(self): + ''' + Verify that primary keys should have expected fields with 'day' field when period is dayRange + ''' + config = {"period": "dayRange"} # set dayRange as a period + expected_primary_keys = ["feature_id", "visitor_id", "account_id", "server", "remote_ip", "user_agent", "day"] + feature_event_stream2 = FeatureEvents(config) # Initialize Events object which sets primary keys + + self.assertEqual(feature_event_stream2.key_properties, expected_primary_keys) + + # Reset key properties for other test as it's class variable + FeatureEvents.key_properties = ['feature_id', 'visitor_id', 'account_id', 'server', 'remote_ip', 'user_agent'] \ No newline at end of file From d1c083947513e479a6266436a7457ed4104beaa0 Mon Sep 17 00:00:00 2001 From: savan-chovatiya Date: Thu, 21 Oct 2021 11:36:05 +0530 Subject: [PATCH 6/7] Resolved review comments --- .circleci/config.yml | 8 +++++++- .../unittests/test_feature_events_primary_keys.py | 14 +++++++------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index f24bcca..c0899c3 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -27,7 +27,13 @@ jobs: name: 'Unit Tests' command: | source /usr/local/share/virtualenvs/tap-pendo/bin/activate - nosetests tests/unittests + pip install nose coverage + nosetests --with-coverage --cover-erase --cover-package=tap_pendo --cover-html-dir=htmlcov tests/unittests + coverage html + - store_test_results: + path: test_output/report.xml + - store_artifacts: + path: htmlcov - add_ssh_keys - run: name: 'Integration Tests' diff --git a/tests/unittests/test_feature_events_primary_keys.py b/tests/unittests/test_feature_events_primary_keys.py index 3c74a5b..d99b008 100644 --- a/tests/unittests/test_feature_events_primary_keys.py +++ b/tests/unittests/test_feature_events_primary_keys.py @@ -7,24 +7,24 @@ def test_feature_event_primary_key_with_hourRange(self): ''' Verify that primary keys should have expected fields with 'hour' field when period is hourRange ''' + # Reset key properties to default value + FeatureEvents.key_properties = ['feature_id', 'visitor_id', 'account_id', 'server', 'remote_ip', 'user_agent'] config = {"period": "hourRange"} # set hourRange as a period expected_primary_keys = ["feature_id", "visitor_id", "account_id", "server", "remote_ip", "user_agent", "hour"] + feature_event_stream1 = FeatureEvents(config) # Initialize FeatuereEvents object which sets primary keys self.assertEqual(feature_event_stream1.key_properties, expected_primary_keys) - # Reset key properties for other test as it's class variable - FeatureEvents.key_properties = ['feature_id', 'visitor_id', 'account_id', 'server', 'remote_ip', 'user_agent'] - def test_feature_event_primary_key_with_dayRange(self): ''' Verify that primary keys should have expected fields with 'day' field when period is dayRange ''' + # Reset key properties to default value + FeatureEvents.key_properties = ['feature_id', 'visitor_id', 'account_id', 'server', 'remote_ip', 'user_agent'] config = {"period": "dayRange"} # set dayRange as a period - expected_primary_keys = ["feature_id", "visitor_id", "account_id", "server", "remote_ip", "user_agent", "day"] + expected_primary_keys = ["feature_id", "visitor_id", "account_id", "server", "remote_ip", "user_agent", "day"] + feature_event_stream2 = FeatureEvents(config) # Initialize Events object which sets primary keys self.assertEqual(feature_event_stream2.key_properties, expected_primary_keys) - - # Reset key properties for other test as it's class variable - FeatureEvents.key_properties = ['feature_id', 'visitor_id', 'account_id', 'server', 'remote_ip', 'user_agent'] \ No newline at end of file From 925c7f73732eb3be130ce5e1a28fbee13405a197 Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Tue, 7 Dec 2021 14:13:36 +0530 Subject: [PATCH 7/7] run bookmark test with hour and day range --- tests/tap_tester/test_bookmark.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tests/tap_tester/test_bookmark.py b/tests/tap_tester/test_bookmark.py index d86a7e8..d27ac88 100644 --- a/tests/tap_tester/test_bookmark.py +++ b/tests/tap_tester/test_bookmark.py @@ -9,7 +9,7 @@ class PendoBookMarkTest(TestPendoBase): def name(self): return "pendo_bookmark_test" - def test_run(self): + def run_test(self): """ Verify that for each stream you can do a sync which records bookmarks. That the bookmark is the maximum value sent to the target for the replication key. @@ -191,4 +191,13 @@ def test_run(self): # Verify at least 1 record was replicated in the second sync self.assertGreater( - second_sync_count, 0, msg="We are not fully testing bookmarking for {}".format(stream)) \ No newline at end of file + second_sync_count, 0, msg="We are not fully testing bookmarking for {}".format(stream)) + + def test_run(self): + # test for hourRange period + self.is_day_range = False + self.run_test() + + # test for dayRange period + self.is_day_range = True + self.run_test()