From 504113dc2c2694c078f5a746a72bc887dbf180d4 Mon Sep 17 00:00:00 2001
From: savan-chovatiya <savan.chovatiya@crestdatasys.com>
Date: Thu, 16 Sep 2021 12:05:02 +0530
Subject: [PATCH 1/7] TDL-15317: Updated primary key for feature-events

---
 tap_pendo/streams.py    |   6 +-
 tests/base.py           | 391 ++++++++++++++++++++++++++++++++++++++++
 tests/test_discovery.py | 137 ++++++++++++++
 3 files changed, 533 insertions(+), 1 deletion(-)
 create mode 100644 tests/base.py
 create mode 100644 tests/test_discovery.py

diff --git a/tap_pendo/streams.py b/tap_pendo/streams.py
index fea0a73..2d6909e 100644
--- a/tap_pendo/streams.py
+++ b/tap_pendo/streams.py
@@ -591,7 +591,11 @@ def get_body(self):
 class FeatureEvents(EventsBase):
     name = "feature_events"
     replication_method = "INCREMENTAL"
-    key_properties = ['visitor_id', 'account_id', 'server', 'remote_ip']
+    key_properties = ['feature_id', 'visitor_id', 'account_id', 'server', 'remote_ip', 'user_agent']
+
+    def __init__(self, config):
+        super().__init__(config=config)
+        self.key_properties.append("day" if self.period == 'dayRange' else "hour")
 
     def get_body(self, key_id, period, first):
         return {
diff --git a/tests/base.py b/tests/base.py
new file mode 100644
index 0000000..11b73eb
--- /dev/null
+++ b/tests/base.py
@@ -0,0 +1,391 @@
+import os
+import unittest
+from datetime import datetime as dt
+from datetime import timedelta
+
+import dateutil.parser
+import pytz
+
+import tap_tester.connections as connections
+import tap_tester.runner as runner
+from tap_tester import menagerie
+
+
+class TestPendoBase(unittest.TestCase):
+    
+    REPLICATION_KEYS = "valid-replication-keys"
+    PRIMARY_KEYS = "table-key-properties"
+    FOREIGN_KEYS = "table-foreign-key-properties"
+    REPLICATION_METHOD = "forced-replication-method"
+    INCREMENTAL = "INCREMENTAL"
+    FULL_TABLE = "FULL_TABLE"
+    START_DATE_FORMAT = "%Y-%m-%dT00:00:00Z"
+    BOOKMARK_COMPARISON_FORMAT = "%Y-%m-%dT%H:%M%S%z"
+    start_date = ""
+    is_day_range = True
+    
+    @staticmethod
+    def name():
+        return "test_sync"
+
+    @staticmethod
+    def tap_name():
+        """The name of the tap"""
+        return "tap-pendo"
+    
+    @staticmethod
+    def get_type():
+        """the expected url route ending"""
+        return "platform.pendo"
+    
+    def expected_metadata(self):
+        """The expected streams and metadata about the streams"""
+        return {
+            "accounts": {
+                self.PRIMARY_KEYS: {'account_id'},
+                self.REPLICATION_METHOD: self.INCREMENTAL,
+                self.REPLICATION_KEYS: {'lastupdated'}
+            },
+            "features": {
+                self.PRIMARY_KEYS: {'id'},
+                self.REPLICATION_METHOD: self.INCREMENTAL,
+                self.REPLICATION_KEYS: {'last_updated_at'}
+            },
+            "guides": {
+                self.PRIMARY_KEYS: {'id'},
+                self.REPLICATION_METHOD: self.INCREMENTAL,
+                self.REPLICATION_KEYS: {'last_updated_at'}
+            },
+            "pages": {
+                self.PRIMARY_KEYS: {'id'},
+                self.REPLICATION_METHOD: self.INCREMENTAL,
+                self.REPLICATION_KEYS: {'last_updated_at'}
+            },
+            # Add back when visitor_history stream causing this test to take
+            # 4+ hours is solved, tracked in this JIRA:
+            # https://stitchdata.atlassian.net/browse/SRCE-4755
+            # "visitor_history": {
+            #     self.PRIMARY_KEYS: {'visitor_id'},
+            #     self.REPLICATION_METHOD: self.INCREMENTAL,
+            #     self.REPLICATION_KEYS: {'modified_ts'}
+            # },
+
+            "visitors": {
+                self.PRIMARY_KEYS: {'visitor_id'},
+                self.REPLICATION_METHOD: self.INCREMENTAL,
+                self.REPLICATION_KEYS: {'lastupdated'}
+            },
+            "track_types": {
+                self.PRIMARY_KEYS: {'id'},
+                self.REPLICATION_METHOD: self.INCREMENTAL,
+                self.REPLICATION_KEYS: {'last_updated_at'}
+            },
+            "feature_events":{
+                self.PRIMARY_KEYS: {"feature_id", "visitor_id", "account_id", "server", "remote_ip", "user_agent", "day"}
+                                    if self.is_day_range else 
+                                    {"feature_id", "visitor_id", "account_id", "server", "remote_ip", "user_agent", "hour"},
+                self.REPLICATION_METHOD: self.INCREMENTAL,
+                self.REPLICATION_KEYS: {'day'} if self.is_day_range else {'hour'}
+            },
+            "events": {
+                self.PRIMARY_KEYS: {"visitor_id", "account_id", "server", "remote_ip"},
+                self.REPLICATION_METHOD: self.INCREMENTAL,
+                self.REPLICATION_KEYS: {'day'} if self.is_day_range else {'hour'}
+            },
+            "page_events": {
+                self.PRIMARY_KEYS: {"visitor_id", "account_id", "server", "remote_ip"},
+                self.REPLICATION_METHOD: self.INCREMENTAL,
+                self.REPLICATION_KEYS: {'day'} if self.is_day_range else {'hour'}
+            },
+            "guide_events": {
+                self.PRIMARY_KEYS: {"visitor_id", "account_id", "server_name", "remote_ip"},
+                self.REPLICATION_METHOD: self.INCREMENTAL,
+                self.REPLICATION_KEYS: {'browser_time'}
+            },
+            "poll_events":{
+                self.PRIMARY_KEYS: {"visitor_id", "account_id", "server_name", "remote_ip"},
+                self.REPLICATION_METHOD: self.INCREMENTAL,
+                self.REPLICATION_KEYS: {'browser_time'}
+            },
+            "track_events": {
+                self.PRIMARY_KEYS: {"visitor_id", "account_id", "server", "remote_ip"},
+                self.REPLICATION_METHOD: self.INCREMENTAL,
+                self.REPLICATION_KEYS: {'day'} if self.is_day_range else {'hour'}
+            },
+            "metadata_accounts": {
+                self.REPLICATION_METHOD: self.FULL_TABLE,
+            },
+            "metadata_visitors": {
+                self.REPLICATION_METHOD: self.FULL_TABLE,
+            },
+        }
+        
+    def setUp(self):
+        missing_envs = [x for x in [
+            "TAP_PENDO_INTEGRATION_KEY",
+        ] if os.getenv(x) is None]
+
+        if missing_envs:
+            raise Exception("Missing environment variables: {}".format(missing_envs))
+        
+    @staticmethod
+    def get_credentials():
+        """Authentication information for the test account"""
+        return {
+            "x_pendo_integration_key": os.getenv("TAP_PENDO_INTEGRATION_KEY")
+        }
+        
+    def get_properties(self, original: bool = True):
+        """Configuration properties required for the tap."""
+        return_value = {
+            "start_date": "2020-09-10T00:00:00Z",
+            "lookback_window": "1",
+            "period": "dayRange" if self.is_day_range else "hourRange",
+        }
+        if original:
+            return return_value
+        
+        return_value["start_date"] = self.start_date
+        return return_value
+    
+    
+    def expected_streams(self):
+        """A set of expected stream names"""
+    
+        return set(self.expected_metadata().keys())
+    
+    def expected_pks(self):
+        """return a dictionary with key of table name and value as a set of primary key fields"""
+        return {table: properties.get(self.PRIMARY_KEYS, set())
+                for table, properties
+                in self.expected_metadata().items()}
+
+    def expected_replication_keys(self):
+        """return a dictionary with key of table name and value as a set of replication key fields"""
+        return {table: properties.get(self.REPLICATION_KEYS, set())
+                for table, properties
+                in self.expected_metadata().items()}
+
+    def expected_replication_method(self):
+        """return a dictionary with key of table name nd value of replication method"""
+        return {table: properties.get(self.REPLICATION_METHOD, None)
+                for table, properties
+                in self.expected_metadata().items()}
+
+    def expected_automatic_fields(self):
+        """return a dictionary with key of table name and value as a set of automatic key fields"""
+        auto_fields = {}
+        for k, v in self.expected_metadata().items():
+            
+            auto_fields[k] = v.get(self.PRIMARY_KEYS, set()) | v.get(self.REPLICATION_KEYS, set()) \
+                | v.get(self.FOREIGN_KEYS, set())
+        return auto_fields
+    
+
+    #########################
+    #   Helper Methods      #
+    #########################
+
+    def run_and_verify_check_mode(self, conn_id):
+        """
+        Run the tap in check mode and verify it succeeds.
+        This should be ran prior to field selection and initial sync.
+        Return the connection id and found catalogs from menagerie.
+        """
+        # run in check mode
+        check_job_name = runner.run_check_mode(self, conn_id)
+
+        # verify check exit codes
+        exit_status = menagerie.get_exit_status(conn_id, check_job_name)
+        menagerie.verify_check_exit_status(self, exit_status, check_job_name)
+
+        found_catalogs = menagerie.get_catalogs(conn_id)
+        self.assertGreater(len(
+            found_catalogs), 0, msg="unable to locate schemas for connection {}".format(conn_id))
+
+        found_catalog_names = set(
+            map(lambda c: c['stream_name'], found_catalogs))
+
+        subset = self.expected_streams().issubset(found_catalog_names)
+        self.assertTrue(
+            subset, msg="Expected check streams are not subset of discovered catalog")
+        print("discovered schemas are OK")
+
+        return found_catalogs
+    
+    def run_and_verify_sync(self, conn_id):
+        """
+        Run a sync job and make sure it exited properly.
+        Return a dictionary with keys of streams synced
+        and values of records synced for each stream
+        """
+
+        # Run a sync job using orchestrator
+        sync_job_name = runner.run_sync_mode(self, conn_id)
+
+        # Verify tap and target exit codes
+        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
+        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)
+
+        # Verify actual rows were synced
+        sync_record_count = runner.examine_target_output_file(
+            self, conn_id, self.expected_streams(), self.expected_pks())
+        self.assertGreater(
+            sum(sync_record_count.values()), 0,
+            msg="failed to replicate any data: {}".format(sync_record_count)
+        )
+        print("total replicated row count: {}".format(
+            sum(sync_record_count.values())))
+
+        return sync_record_count
+
+    def perform_and_verify_table_and_field_selection(self, conn_id, test_catalogs, select_all_fields=True):
+        """
+        Perform table and field selection based off of the streams to select
+        set and field selection parameters.
+        Verify this results in the expected streams selected and all or no
+        fields selected for those streams.
+        """
+
+        # Select all available fields or select no fields from all testable streams
+        self.select_all_streams_and_fields(
+            conn_id, test_catalogs, select_all_fields)
+
+        catalogs = menagerie.get_catalogs(conn_id)
+
+        # Ensure our selection affects the catalog
+        expected_selected = [tc.get('stream_name') for tc in test_catalogs]
+
+        for cat in catalogs:
+            catalog_entry = menagerie.get_annotated_schema(
+                conn_id, cat['stream_id'])
+
+            # Verify all testable streams are selected
+            selected = catalog_entry.get('annotated-schema').get('selected')
+            print("Validating selection on {}: {}".format(
+                cat['stream_name'], selected))
+            if cat['stream_name'] not in expected_selected:
+                self.assertFalse(
+                    selected, msg="Stream selected, but not testable.")
+                continue  # Skip remaining assertions if we aren't selecting this stream
+            self.assertTrue(selected, msg="Stream not selected.")
+
+            if select_all_fields:
+                # Verify all fields within each selected stream are selected
+                for field, field_props in catalog_entry.get('annotated-schema').get('properties').items():
+                    field_selected = field_props.get('selected')
+                    print("\tValidating selection on {}.{}: {}".format(
+                        cat['stream_name'], field, field_selected))
+                    self.assertTrue(field_selected, msg="Field not selected.")
+            else:
+                # Verify only automatic fields are selected
+                expected_automatic_fields = self.expected_automatic_fields().get(
+                    cat['stream_name'])
+                selected_fields = self.get_selected_fields_from_metadata(
+                    catalog_entry['metadata'])
+                self.assertEqual(expected_automatic_fields, selected_fields)
+                
+    def get_selected_fields_from_metadata(self, metadata):
+        selected_fields = set()
+        for field in metadata:
+            is_field_metadata = len(field['breadcrumb']) > 1
+            
+            inclusion_automatic_or_selected = (
+                field['metadata'].get('selected') is True or
+                field['metadata'].get('inclusion') == 'automatic'
+            )
+            if is_field_metadata and inclusion_automatic_or_selected:
+                selected_fields.add(field['breadcrumb'][1])
+        return selected_fields
+
+    def select_all_streams_and_fields(self, conn_id, catalogs, select_all_fields: bool = True):
+        """Select all streams and all fields within streams"""
+        for catalog in catalogs:
+            schema = menagerie.get_annotated_schema(
+                conn_id, catalog['stream_id'])
+
+            non_selected_properties = []
+            if not select_all_fields:
+                # get a list of all properties so that none are selected
+                non_selected_properties = schema.get('annotated-schema', {}).get(
+                    'properties', {}).keys()
+
+            connections.select_catalog_and_fields_via_metadata(
+                conn_id, catalog, schema, [], non_selected_properties)
+            
+    def calculated_states_by_stream(self, current_state):
+        timedelta_by_stream = {stream: [0,0,0,5]  # {stream_name: [days, hours, minutes, seconds], ...}
+                               for stream in self.expected_streams()}
+        
+        stream_to_calculated_state = {stream: "" for stream in current_state['bookmarks'].keys()}
+        for stream, state in current_state['bookmarks'].items():
+            state_key, state_value = next(iter(state.keys())), next(iter(state.values()))
+            state_as_datetime = dateutil.parser.parse(state_value)
+
+            days, hours, minutes, seconds = timedelta_by_stream[stream]
+            calculated_state_as_datetime = state_as_datetime - timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds)
+
+            state_format = '%Y-%m-%dT%H:%M:%S-00:00'
+            calculated_state_formatted = dt.strftime(calculated_state_as_datetime, state_format)
+
+            stream_to_calculated_state[stream] = {state_key: calculated_state_formatted}
+
+        return stream_to_calculated_state
+    
+    def parse_date(self, date_value):
+        """
+        Pass in string-formatted-datetime, parse the value, and return it as an unformatted datetime object.
+        """
+        date_formats = {
+            "%Y-%m-%dT%H:%M:%S.%fZ",
+            "%Y-%m-%dT%H:%M:%SZ",
+            "%Y-%m-%dT%H:%M:%S.%f+00:00",
+            "%Y-%m-%dT%H:%M:%S+00:00",
+            "%Y-%m-%d"
+        }
+        for date_format in date_formats:
+            try:
+                date_stripped = dt.strptime(date_value, date_format)
+                return date_stripped
+            except ValueError:
+                continue
+
+        raise NotImplementedError(
+            "Tests do not account for dates of this format: {}".format(date_value))
+    
+    ##########################################################################
+    # Tap Specific Methods
+    ##########################################################################
+
+    def convert_state_to_utc(self, date_str):
+        """
+        Convert a saved bookmark value of the form '2020-08-25T13:17:36-07:00' to
+        a string formatted utc datetime,
+        in order to compare aginast json formatted datetime values
+        """
+        date_object = dateutil.parser.parse(date_str)
+        date_object_utc = date_object.astimezone(tz=pytz.UTC)
+        return dt.strftime(date_object_utc, "%Y-%m-%dT%H:%M:%SZ")
+    
+    def timedelta_formatted(self, dtime, days=0):
+        try:
+            date_stripped = dt.strptime(dtime, "%Y-%m-%dT%H:%M:%SZ")
+            return_date = date_stripped + timedelta(days=days)
+
+            return dt.strftime(return_date, "%Y-%m-%dT%H:%M:%SZ")
+
+        except ValueError:
+            try:
+                date_stripped = dt.strptime(dtime, self.BOOKMARK_COMPARISON_FORMAT)
+                return_date = date_stripped + timedelta(days=days)
+
+                return dt.strftime(return_date, self.BOOKMARK_COMPARISON_FORMAT)
+
+            except ValueError:
+                return Exception("Datetime object is not of the format: {}".format(self.START_DATE_FORMAT))
+            
+    def is_incremental(self, stream):
+        return self.expected_metadata().get(stream).get(self.REPLICATION_METHOD) == self.INCREMENTAL
+    
+    def is_event(self, stream):
+        return stream.endswith('events')
\ No newline at end of file
diff --git a/tests/test_discovery.py b/tests/test_discovery.py
new file mode 100644
index 0000000..299d0c0
--- /dev/null
+++ b/tests/test_discovery.py
@@ -0,0 +1,137 @@
+import re
+
+import tap_tester.connections as connections
+from base import TestPendoBase
+from tap_tester import menagerie
+
+class PendoDiscoverTest(TestPendoBase):
+    """
+        Testing that discovery creates the appropriate catalog with valid metadata.
+        • Verify number of actual streams discovered match expected
+        • Verify the stream names discovered were what we expect
+        • Verify stream names follow naming convention
+          streams should only have lowercase alphas and underscores
+        • verify there is only 1 top level breadcrumb
+        • verify replication key(s)
+        • verify primary key(s)
+        • verify that if there is a replication key we are doing INCREMENTAL otherwise FULL
+        • verify the actual replication matches our expected replication method
+        • verify that primary, replication keys are given the inclusion of automatic.
+        • verify that all other fields have inclusion of available metadata.
+    """
+    
+    def name(self):
+        return "pendo_discover_test"
+
+    def discovery_test_run(self):
+        streams_to_test = self.expected_streams()
+
+        conn_id = connections.ensure_connection(self, payload_hook=None)
+
+        # Verify that there are catalogs found
+        found_catalogs = self.run_and_verify_check_mode(
+            conn_id)
+
+        # Verify stream names follow naming convention
+        # streams should only have lowercase alphas and underscores
+        found_catalog_names = {c['tap_stream_id'] for c in found_catalogs}
+        self.assertTrue(all([re.fullmatch(r"[a-z_]+",  name) for name in found_catalog_names]),
+                        msg="One or more streams don't follow standard naming")
+        
+        for stream in streams_to_test:
+            with self.subTest(stream=stream):
+
+                # Verify ensure the caatalog is found for a given stream
+                catalog = next(iter([catalog for catalog in found_catalogs
+                                     if catalog["stream_name"] == stream]))
+                self.assertIsNotNone(catalog)
+
+                # collecting expected values
+                expected_primary_keys = self.expected_pks()[stream]
+                expected_replication_keys = self.expected_replication_keys()[
+                    stream]
+                expected_automatic_fields = self.expected_automatic_fields().get(stream)
+                expected_replication_method = self.expected_replication_method()[
+                    stream]
+
+                # collecting actual values...
+                schema_and_metadata = menagerie.get_annotated_schema(
+                    conn_id, catalog['stream_id'])
+                metadata = schema_and_metadata["metadata"]
+                stream_properties = [
+                    item for item in metadata if item.get("breadcrumb") == []]
+                actual_primary_keys = set(
+                    stream_properties[0].get(
+                        "metadata", {self.PRIMARY_KEYS: []}).get(self.PRIMARY_KEYS, [])
+                )
+                actual_replication_keys = set(
+                    stream_properties[0].get(
+                        "metadata", {self.REPLICATION_KEYS: []}).get(self.REPLICATION_KEYS, [])
+                )
+                actual_replication_method = stream_properties[0].get(
+                    "metadata", {self.REPLICATION_METHOD: None}).get(self.REPLICATION_METHOD)
+                actual_automatic_fields = set(
+                    item.get("breadcrumb", ["properties", None])[1] for item in metadata
+                    if item.get("metadata").get("inclusion") == "automatic"
+                )
+                
+                ##########################################################################
+                # metadata assertions
+                ##########################################################################
+
+                # verify there is only 1 top level breadcrumb in metadata
+                self.assertTrue(len(stream_properties) == 1,
+                                msg="There is NOT only one top level breadcrumb for {}".format(stream) +
+                                "\nstream_properties | {}".format(stream_properties))
+
+                # verify that if there is a replication key we are doing INCREMENTAL otherwise FULL
+                if actual_replication_keys:
+                    self.assertTrue(actual_replication_method == self.INCREMENTAL,
+                                    msg="Expected INCREMENTAL replication "
+                                        "since there is a replication key")
+                else:
+                    self.assertTrue(actual_replication_method == self.FULL_TABLE,
+                                    msg="Expected FULL replication "
+                                    "since there is no replication key")
+                    
+                 # verify the actual replication matches our expected replication method
+                self.assertEqual(expected_replication_method, actual_replication_method,
+                                    msg="The actual replication method {} doesn't match the expected {}".format(
+                                        actual_replication_method, expected_replication_method))
+
+                print(stream_properties[0].get(
+                    "metadata", {self.REPLICATION_KEYS: []}))
+                # verify replication key(s)
+                self.assertEqual(expected_replication_keys, actual_replication_keys,
+                                 msg="expected replication key {} but actual is {}".format(
+                                     expected_replication_keys, actual_replication_keys))
+
+                # verify primary key(s) match expectations
+                self.assertSetEqual(
+                    expected_primary_keys, actual_primary_keys,
+                )
+
+                # verify that primary keys and replication keys
+                # are given the inclusion of automatic in metadata.
+                self.assertSetEqual(expected_automatic_fields,
+                                    actual_automatic_fields)
+
+                # verify that all other fields have inclusion of available
+                # This assumes there are no unsupported fields for SaaS sources
+                self.assertTrue(
+                    all({item.get("metadata").get("inclusion") == "available"
+                         for item in metadata
+                         if item.get("breadcrumb", []) != []
+                         and item.get("breadcrumb", ["properties", None])[1]
+                         not in actual_automatic_fields}),
+                    msg="Not all non key properties are set to available in metadata")
+
+    def test_run(self):
+
+        #Discovery test for hourRange period
+        self.is_day_range = False
+        self.discovery_test_run()
+
+        #Discovery test for dayRange period
+        self.is_day_range = True
+        self.discovery_test_run()
\ No newline at end of file

From 98ef73ce7562109823dd3b43227dba53e45ccf71 Mon Sep 17 00:00:00 2001
From: savan-chovatiya <savan.chovatiya@crestdatasys.com>
Date: Thu, 16 Sep 2021 14:42:52 +0530
Subject: [PATCH 2/7] Moved integration test

---
 tests/{ => tap_tester}/base.py           | 0
 tests/{ => tap_tester}/test_discovery.py | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename tests/{ => tap_tester}/base.py (100%)
 rename tests/{ => tap_tester}/test_discovery.py (100%)

diff --git a/tests/base.py b/tests/tap_tester/base.py
similarity index 100%
rename from tests/base.py
rename to tests/tap_tester/base.py
diff --git a/tests/test_discovery.py b/tests/tap_tester/test_discovery.py
similarity index 100%
rename from tests/test_discovery.py
rename to tests/tap_tester/test_discovery.py

From 9d296b44f625fee3f9592e576476ac67a0f24f2e Mon Sep 17 00:00:00 2001
From: savan-chovatiya <savan.chovatiya@crestdatasys.com>
Date: Wed, 29 Sep 2021 17:49:22 +0530
Subject: [PATCH 3/7] TDL-15317: Updated integration test

---
 tests/tap_tester/base.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/tests/tap_tester/base.py b/tests/tap_tester/base.py
index 11b73eb..05840b2 100644
--- a/tests/tap_tester/base.py
+++ b/tests/tap_tester/base.py
@@ -81,21 +81,19 @@ def expected_metadata(self):
                 self.REPLICATION_KEYS: {'last_updated_at'}
             },
             "feature_events":{
-                self.PRIMARY_KEYS: {"feature_id", "visitor_id", "account_id", "server", "remote_ip", "user_agent", "day"}
-                                    if self.is_day_range else 
-                                    {"feature_id", "visitor_id", "account_id", "server", "remote_ip", "user_agent", "hour"},
+                self.PRIMARY_KEYS: {"feature_id", "visitor_id", "account_id", "server", "remote_ip", "user_agent", "day" if self.is_day_range else "hour"},
                 self.REPLICATION_METHOD: self.INCREMENTAL,
-                self.REPLICATION_KEYS: {'day'} if self.is_day_range else {'hour'}
+                self.REPLICATION_KEYS: {'day' if self.is_day_range else 'hour'}
             },
             "events": {
                 self.PRIMARY_KEYS: {"visitor_id", "account_id", "server", "remote_ip"},
                 self.REPLICATION_METHOD: self.INCREMENTAL,
-                self.REPLICATION_KEYS: {'day'} if self.is_day_range else {'hour'}
+                self.REPLICATION_KEYS: {'day' if self.is_day_range else 'hour'}
             },
             "page_events": {
                 self.PRIMARY_KEYS: {"visitor_id", "account_id", "server", "remote_ip"},
                 self.REPLICATION_METHOD: self.INCREMENTAL,
-                self.REPLICATION_KEYS: {'day'} if self.is_day_range else {'hour'}
+                self.REPLICATION_KEYS: {'day' if self.is_day_range else 'hour'}
             },
             "guide_events": {
                 self.PRIMARY_KEYS: {"visitor_id", "account_id", "server_name", "remote_ip"},
@@ -110,7 +108,7 @@ def expected_metadata(self):
             "track_events": {
                 self.PRIMARY_KEYS: {"visitor_id", "account_id", "server", "remote_ip"},
                 self.REPLICATION_METHOD: self.INCREMENTAL,
-                self.REPLICATION_KEYS: {'day'} if self.is_day_range else {'hour'}
+                self.REPLICATION_KEYS: {'day' if self.is_day_range else 'hour'}
             },
             "metadata_accounts": {
                 self.REPLICATION_METHOD: self.FULL_TABLE,

From 8fe015018a354ba6470af90c5b26b0209dfe9b9b Mon Sep 17 00:00:00 2001
From: harshpatel4_crest <harsh.patel4@crestdatasys.com>
Date: Mon, 4 Oct 2021 17:04:43 +0530
Subject: [PATCH 4/7] updated readme file

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index e50c118..85ff2e2 100644
--- a/README.md
+++ b/README.md
@@ -95,7 +95,7 @@ This tap:
 **[feature_events](https://developers.pendo.io/docs/?bash#get-an-account-by-id)**
 
 - Endpoint: [https://api/v1/aggregation](https://app.pendo.io/api/v1/aggregation)
-- Primary key fields: `visitor_id`, `account_id`, `server`, `remote_ip`
+- Primary key fields: `feature_id`, `visitor_id`, `account_id`, `server`, `remote_ip`, `user_agent`, `day` or `hour`
 - Replication strategy: INCREMENTAL (query filtered)
   - Bookmark: `day` or `hour`
 - Transformations: Camel to snake case.

From cdd827811a7e76ca91f13e7ee47ebf9747aa67f3 Mon Sep 17 00:00:00 2001
From: savan-chovatiya <savan.chovatiya@crestdatasys.com>
Date: Fri, 8 Oct 2021 14:13:52 +0530
Subject: [PATCH 5/7] Resolved internal PR review comments

---
 .circleci/config.yml                          |  7 ++++-
 setup.py                                      |  7 +++--
 tests/tap_tester/base.py                      | 11 +++----
 .../test_feature_events_primary_keys.py       | 30 +++++++++++++++++++
 4 files changed, 47 insertions(+), 8 deletions(-)
 create mode 100644 tests/unittests/test_feature_events_primary_keys.py

diff --git a/.circleci/config.yml b/.circleci/config.yml
index b7e033f..f24bcca 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -11,7 +11,7 @@ jobs:
             python3 -mvenv /usr/local/share/virtualenvs/tap-pendo
             source /usr/local/share/virtualenvs/tap-pendo/bin/activate
             pip install -U pip setuptools
-            pip install .[dev]
+            pip install .[test]
       - run:
           name: 'JSON Validator'
           command: |
@@ -23,6 +23,11 @@ jobs:
             source /usr/local/share/virtualenvs/tap-pendo/bin/activate
             # TODO: Adjust the pylint disables
             pylint tap_pendo --disable 'broad-except,chained-comparison,empty-docstring,fixme,invalid-name,line-too-long,missing-class-docstring,missing-function-docstring,missing-module-docstring,no-else-raise,no-else-return,too-few-public-methods,too-many-arguments,too-many-branches,too-many-lines,too-many-locals,ungrouped-imports,wrong-spelling-in-comment,wrong-spelling-in-docstring,bad-whitespace,missing-class-docstring'
+      - run:
+          name: 'Unit Tests'
+          command: |
+            source /usr/local/share/virtualenvs/tap-pendo/bin/activate
+            nosetests tests/unittests
       - add_ssh_keys
       - run:
           name: 'Integration Tests'
diff --git a/setup.py b/setup.py
index dcf47d4..9aa83c5 100755
--- a/setup.py
+++ b/setup.py
@@ -17,9 +17,12 @@
         'ijson==3.1.4',
     ],
     extras_require={
-        'dev': [
-            'ipdb==0.11',
+        'test': [
             'pylint==2.5.3',
+            'nose'
+        ],
+        'dev': [
+            'ipdb==0.11'
         ]
     },
     entry_points="""
diff --git a/tests/tap_tester/base.py b/tests/tap_tester/base.py
index 05840b2..bbd1797 100644
--- a/tests/tap_tester/base.py
+++ b/tests/tap_tester/base.py
@@ -40,6 +40,7 @@ def get_type():
     
     def expected_metadata(self):
         """The expected streams and metadata about the streams"""
+        event_replication_key = 'day' if self.is_day_range else 'hour'
         return {
             "accounts": {
                 self.PRIMARY_KEYS: {'account_id'},
@@ -81,19 +82,19 @@ def expected_metadata(self):
                 self.REPLICATION_KEYS: {'last_updated_at'}
             },
             "feature_events":{
-                self.PRIMARY_KEYS: {"feature_id", "visitor_id", "account_id", "server", "remote_ip", "user_agent", "day" if self.is_day_range else "hour"},
+                self.PRIMARY_KEYS: {"feature_id", "visitor_id", "account_id", "server", "remote_ip", "user_agent", event_replication_key},
                 self.REPLICATION_METHOD: self.INCREMENTAL,
-                self.REPLICATION_KEYS: {'day' if self.is_day_range else 'hour'}
+                self.REPLICATION_KEYS: {event_replication_key}
             },
             "events": {
                 self.PRIMARY_KEYS: {"visitor_id", "account_id", "server", "remote_ip"},
                 self.REPLICATION_METHOD: self.INCREMENTAL,
-                self.REPLICATION_KEYS: {'day' if self.is_day_range else 'hour'}
+                self.REPLICATION_KEYS: {event_replication_key}
             },
             "page_events": {
                 self.PRIMARY_KEYS: {"visitor_id", "account_id", "server", "remote_ip"},
                 self.REPLICATION_METHOD: self.INCREMENTAL,
-                self.REPLICATION_KEYS: {'day' if self.is_day_range else 'hour'}
+                self.REPLICATION_KEYS: {event_replication_key}
             },
             "guide_events": {
                 self.PRIMARY_KEYS: {"visitor_id", "account_id", "server_name", "remote_ip"},
@@ -108,7 +109,7 @@ def expected_metadata(self):
             "track_events": {
                 self.PRIMARY_KEYS: {"visitor_id", "account_id", "server", "remote_ip"},
                 self.REPLICATION_METHOD: self.INCREMENTAL,
-                self.REPLICATION_KEYS: {'day' if self.is_day_range else 'hour'}
+                self.REPLICATION_KEYS: {event_replication_key}
             },
             "metadata_accounts": {
                 self.REPLICATION_METHOD: self.FULL_TABLE,
diff --git a/tests/unittests/test_feature_events_primary_keys.py b/tests/unittests/test_feature_events_primary_keys.py
new file mode 100644
index 0000000..3c74a5b
--- /dev/null
+++ b/tests/unittests/test_feature_events_primary_keys.py
@@ -0,0 +1,30 @@
+import unittest
+from tap_pendo.streams import FeatureEvents
+
+class TestFeatureEventsPrimaryKeys(unittest.TestCase):
+
+    def test_feature_event_primary_key_with_hourRange(self):
+        '''
+            Verify that primary keys should have expected fields with 'hour' field when period is hourRange 
+        '''
+        config = {"period": "hourRange"} # set hourRange as a period
+        expected_primary_keys = ["feature_id", "visitor_id", "account_id", "server", "remote_ip", "user_agent", "hour"]
+        feature_event_stream1 = FeatureEvents(config) # Initialize FeatuereEvents object which sets primary keys
+
+        self.assertEqual(feature_event_stream1.key_properties, expected_primary_keys)
+
+        # Reset key properties for other test as it's class variable
+        FeatureEvents.key_properties = ['feature_id', 'visitor_id', 'account_id', 'server', 'remote_ip', 'user_agent']
+
+    def test_feature_event_primary_key_with_dayRange(self):
+        '''
+            Verify that primary keys should have expected fields with 'day' field when period is dayRange 
+        '''
+        config = {"period": "dayRange"} # set dayRange as a period
+        expected_primary_keys = ["feature_id", "visitor_id", "account_id", "server", "remote_ip", "user_agent", "day"]        
+        feature_event_stream2 = FeatureEvents(config) # Initialize Events object which sets primary keys
+
+        self.assertEqual(feature_event_stream2.key_properties, expected_primary_keys)
+
+        # Reset key properties for other test as it's class variable
+        FeatureEvents.key_properties = ['feature_id', 'visitor_id', 'account_id', 'server', 'remote_ip', 'user_agent']
\ No newline at end of file

From d1c083947513e479a6266436a7457ed4104beaa0 Mon Sep 17 00:00:00 2001
From: savan-chovatiya <savan.chovatiya@crestdatasys.com>
Date: Thu, 21 Oct 2021 11:36:05 +0530
Subject: [PATCH 6/7] Resolved review comments

---
 .circleci/config.yml                               |  8 +++++++-
 .../unittests/test_feature_events_primary_keys.py  | 14 +++++++-------
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index f24bcca..c0899c3 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -27,7 +27,13 @@ jobs:
           name: 'Unit Tests'
           command: |
             source /usr/local/share/virtualenvs/tap-pendo/bin/activate
-            nosetests tests/unittests
+            pip install nose coverage
+            nosetests --with-coverage --cover-erase --cover-package=tap_pendo --cover-html-dir=htmlcov tests/unittests
+            coverage html
+      - store_test_results:
+          path: test_output/report.xml
+      - store_artifacts:
+          path: htmlcov
       - add_ssh_keys
       - run:
           name: 'Integration Tests'
diff --git a/tests/unittests/test_feature_events_primary_keys.py b/tests/unittests/test_feature_events_primary_keys.py
index 3c74a5b..d99b008 100644
--- a/tests/unittests/test_feature_events_primary_keys.py
+++ b/tests/unittests/test_feature_events_primary_keys.py
@@ -7,24 +7,24 @@ def test_feature_event_primary_key_with_hourRange(self):
         '''
             Verify that primary keys should have expected fields with 'hour' field when period is hourRange 
         '''
+        # Reset key properties to default value
+        FeatureEvents.key_properties = ['feature_id', 'visitor_id', 'account_id', 'server', 'remote_ip', 'user_agent']
         config = {"period": "hourRange"} # set hourRange as a period
         expected_primary_keys = ["feature_id", "visitor_id", "account_id", "server", "remote_ip", "user_agent", "hour"]
+
         feature_event_stream1 = FeatureEvents(config) # Initialize FeatuereEvents object which sets primary keys
 
         self.assertEqual(feature_event_stream1.key_properties, expected_primary_keys)
 
-        # Reset key properties for other test as it's class variable
-        FeatureEvents.key_properties = ['feature_id', 'visitor_id', 'account_id', 'server', 'remote_ip', 'user_agent']
-
     def test_feature_event_primary_key_with_dayRange(self):
         '''
             Verify that primary keys should have expected fields with 'day' field when period is dayRange 
         '''
+        # Reset key properties to default value
+        FeatureEvents.key_properties = ['feature_id', 'visitor_id', 'account_id', 'server', 'remote_ip', 'user_agent']
         config = {"period": "dayRange"} # set dayRange as a period
-        expected_primary_keys = ["feature_id", "visitor_id", "account_id", "server", "remote_ip", "user_agent", "day"]        
+        expected_primary_keys = ["feature_id", "visitor_id", "account_id", "server", "remote_ip", "user_agent", "day"]  
+      
         feature_event_stream2 = FeatureEvents(config) # Initialize Events object which sets primary keys
 
         self.assertEqual(feature_event_stream2.key_properties, expected_primary_keys)
-
-        # Reset key properties for other test as it's class variable
-        FeatureEvents.key_properties = ['feature_id', 'visitor_id', 'account_id', 'server', 'remote_ip', 'user_agent']
\ No newline at end of file

From 925c7f73732eb3be130ce5e1a28fbee13405a197 Mon Sep 17 00:00:00 2001
From: harshpatel4_crest <harsh.patel4@crestdatasys.com>
Date: Tue, 7 Dec 2021 14:13:36 +0530
Subject: [PATCH 7/7] run bookmark test with hour and day range

---
 tests/tap_tester/test_bookmark.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/tests/tap_tester/test_bookmark.py b/tests/tap_tester/test_bookmark.py
index d86a7e8..d27ac88 100644
--- a/tests/tap_tester/test_bookmark.py
+++ b/tests/tap_tester/test_bookmark.py
@@ -9,7 +9,7 @@ class PendoBookMarkTest(TestPendoBase):
     def name(self):
         return "pendo_bookmark_test"
 
-    def test_run(self):
+    def run_test(self):
         """
         Verify that for each stream you can do a sync which records bookmarks.
         That the bookmark is the maximum value sent to the target for the replication key.
@@ -191,4 +191,13 @@ def test_run(self):
 
                 # Verify at least 1 record was replicated in the second sync
                 self.assertGreater(
-                    second_sync_count, 0, msg="We are not fully testing bookmarking for {}".format(stream))
\ No newline at end of file
+                    second_sync_count, 0, msg="We are not fully testing bookmarking for {}".format(stream))
+
+    def test_run(self):
+        # test for hourRange period
+        self.is_day_range = False
+        self.run_test()
+
+        # test for dayRange period
+        self.is_day_range = True
+        self.run_test()