From 60fd9d5ab922f65934ab07f84985c3273fdd1ff8 Mon Sep 17 00:00:00 2001 From: Dmytro Date: Mon, 6 Sep 2021 10:11:48 +0300 Subject: [PATCH] :bug: Fix hubspot datetime empty string (#5798) * Fix hubspot datetime empty string --- .../bases/base-python/setup.py | 2 +- .../bases/source-acceptance-test/CHANGELOG.md | 3 ++ .../bases/source-acceptance-test/Dockerfile | 2 +- .../source_acceptance_test/utils/asserts.py | 2 +- .../connectors/source-hubspot/Dockerfile | 2 +- .../integration_tests/integration_test.py | 27 ------------ .../source-hubspot/source_hubspot/api.py | 34 +++++++++++---- .../unit_tests/test_field_type_converting.py | 43 +++++++++++-------- docs/integrations/sources/hubspot.md | 3 +- 9 files changed, 60 insertions(+), 58 deletions(-) delete mode 100644 airbyte-integrations/connectors/source-hubspot/integration_tests/integration_test.py diff --git a/airbyte-integrations/bases/base-python/setup.py b/airbyte-integrations/bases/base-python/setup.py index 8e46a2086f5b..0e5a6fe20c62 100644 --- a/airbyte-integrations/bases/base-python/setup.py +++ b/airbyte-integrations/bases/base-python/setup.py @@ -37,7 +37,7 @@ "PyYAML==5.4", "pydantic==1.6.*", "airbyte-protocol", - "jsonschema==2.6.0", + "jsonschema==3.2.0", "requests", "backoff", "pytest", diff --git a/airbyte-integrations/bases/source-acceptance-test/CHANGELOG.md b/airbyte-integrations/bases/source-acceptance-test/CHANGELOG.md index fb9df1d40958..096a3bdae91a 100644 --- a/airbyte-integrations/bases/source-acceptance-test/CHANGELOG.md +++ b/airbyte-integrations/bases/source-acceptance-test/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 0.1.18 +Fix checking date-time format againt nullable field. + ## 0.1.17 Fix serialize function for acceptance-tests: https://github.com/airbytehq/airbyte/pull/5738 diff --git a/airbyte-integrations/bases/source-acceptance-test/Dockerfile b/airbyte-integrations/bases/source-acceptance-test/Dockerfile index 3aef6cfc616d..2979e5506ac3 100644 --- a/airbyte-integrations/bases/source-acceptance-test/Dockerfile +++ b/airbyte-integrations/bases/source-acceptance-test/Dockerfile @@ -9,7 +9,7 @@ COPY setup.py ./ COPY pytest.ini ./ RUN pip install . -LABEL io.airbyte.version=0.1.17 +LABEL io.airbyte.version=0.1.18 LABEL io.airbyte.name=airbyte/source-acceptance-test ENTRYPOINT ["python", "-m", "pytest", "-p", "source_acceptance_test.plugin"] diff --git a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/asserts.py b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/asserts.py index 83aec4cca464..3044ee305d36 100644 --- a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/asserts.py +++ b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/asserts.py @@ -52,7 +52,7 @@ def check_datetime(value: str) -> bool: return valid_format and valid_time def check(self, instance, format): - if format == "date-time": + if instance is not None and format == "date-time": if not self.check_datetime(instance): raise FormatError(f"{instance} has invalid datetime format") else: diff --git a/airbyte-integrations/connectors/source-hubspot/Dockerfile b/airbyte-integrations/connectors/source-hubspot/Dockerfile index 51d773f4faa7..2f18fda5f9c8 100644 --- a/airbyte-integrations/connectors/source-hubspot/Dockerfile +++ b/airbyte-integrations/connectors/source-hubspot/Dockerfile @@ -14,5 +14,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "/airbyte/base.sh" -LABEL io.airbyte.version=0.1.11 +LABEL io.airbyte.version=0.1.12 LABEL io.airbyte.name=airbyte/source-hubspot diff --git a/airbyte-integrations/connectors/source-hubspot/integration_tests/integration_test.py b/airbyte-integrations/connectors/source-hubspot/integration_tests/integration_test.py deleted file mode 100644 index 6d275a2544ab..000000000000 --- a/airbyte-integrations/connectors/source-hubspot/integration_tests/integration_test.py +++ /dev/null @@ -1,27 +0,0 @@ -# -# MIT License -# -# Copyright (c) 2020 Airbyte -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -# - - -def test_example(): - assert True diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py b/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py index 9fc56466871a..e0d4818c0f00 100644 --- a/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py @@ -59,14 +59,14 @@ "phone_number": ("string", None), } -CUSTOM_FIELD_VALUE_TYPE_CAST = { +CUSTOM_FIELD_TYPE_TO_VALUE = { bool: "boolean", str: "string", float: "number", int: "integer", } -CUSTOM_FIELD_VALUE_TYPE_CAST_REVERSED = {v: k for k, v in CUSTOM_FIELD_VALUE_TYPE_CAST.items()} +CUSTOM_FIELD_VALUE_TO_TYPE = {v: k for k, v in CUSTOM_FIELD_TYPE_TO_VALUE.items()} def retry_connection_handler(**kwargs): @@ -259,18 +259,31 @@ def _filter_dynamic_fields(self, records: Iterable) -> Iterable: yield record @staticmethod - def _cast_value(declared_field_types: List, field_name: str, field_value): + def _cast_value(declared_field_types: List, field_name: str, field_value: Any, declared_format: str = None) -> Any: + """ + Convert record's received value according to its declared catalog json schema type / format / attribute name. + :param declared_field_types type from catalog schema + :param field_name value's attribute name + :param field_value actual value to cast + :param declared_format format field value from catalog schema + :return Converted value for record + """ - if field_value is None and "null" in declared_field_types: - return field_value + if "null" in declared_field_types: + if field_value is None: + return field_value + # Sometime hubspot output empty string on field with format set. + # Set it to null to avoid errors on destination' normalization stage. + if declared_format and field_value == "": + return None actual_field_type = type(field_value) - actual_field_type_name = CUSTOM_FIELD_VALUE_TYPE_CAST.get(actual_field_type) + actual_field_type_name = CUSTOM_FIELD_TYPE_TO_VALUE.get(actual_field_type) if actual_field_type_name in declared_field_types: return field_value target_type_name = next(filter(lambda t: t != "null", declared_field_types)) - target_type = CUSTOM_FIELD_VALUE_TYPE_CAST_REVERSED.get(target_type_name) + target_type = CUSTOM_FIELD_VALUE_TO_TYPE.get(target_type_name) if target_type_name == "number": # do not cast numeric IDs into float, use integer instead @@ -300,10 +313,13 @@ def _cast_record_fields_if_needed(self, record: Mapping, properties: Mapping[str properties = properties or self.properties for field_name, field_value in record["properties"].items(): - declared_field_types = properties[field_name].get("type") or [] + declared_field_types = properties[field_name].get("type", []) if not isinstance(declared_field_types, Iterable): declared_field_types = [declared_field_types] - record["properties"][field_name] = self._cast_value(declared_field_types, field_name, field_value) + format = properties[field_name].get("format") + record["properties"][field_name] = self._cast_value( + declared_field_types=declared_field_types, field_name=field_name, field_value=field_value, declared_format=format + ) return record diff --git a/airbyte-integrations/connectors/source-hubspot/unit_tests/test_field_type_converting.py b/airbyte-integrations/connectors/source-hubspot/unit_tests/test_field_type_converting.py index 1e2ab8f7ecdf..8f1ca6f038e1 100644 --- a/airbyte-integrations/connectors/source-hubspot/unit_tests/test_field_type_converting.py +++ b/airbyte-integrations/connectors/source-hubspot/unit_tests/test_field_type_converting.py @@ -67,28 +67,37 @@ def test_bad_field_type_converting(field_type, expected, capsys): @pytest.mark.parametrize( - "declared_field_types,field_name,field_value,casted_value", + "declared_field_types,field_name,field_value,format,casted_value", [ # test for None in field_values - (["null", "string"], "some_field", None, None), - (["null", "number"], "some_field", None, None), - (["null", "integer"], "some_field", None, None), - (["null", "object"], "some_field", None, None), - (["null", "boolean"], "some_field", None, None), + (["null", "string"], "some_field", None, None, None), + (["null", "number"], "some_field", None, None, None), + (["null", "integer"], "some_field", None, None, None), + (["null", "object"], "some_field", None, None, None), + (["null", "boolean"], "some_field", None, None, None), # specific cases - ("string", "some_field", "test", "test"), - (["null", "number"], "some_field", "123.456", 123.456), - (["null", "number"], "user_id", "123", 123), - (["null", "string"], "some_field", "123", "123"), + ("string", "some_field", "test", None, "test"), + (["null", "number"], "some_field", "123.456", None, 123.456), + (["null", "number"], "user_id", "123", None, 123), + (["null", "string"], "some_field", "123", None, "123"), # when string has empty field_value (empty string) - (["null", "string"], "some_field", "", ""), + (["null", "string"], "some_field", "", None, ""), # when NOT string type but has empty sting in field_value, instead of double or null, # we should use None instead, to have it properly casted to the correct type - (["null", "number"], "some_field", "", None), - (["null", "integer"], "some_field", "", None), - (["null", "object"], "some_field", "", None), - (["null", "boolean"], "some_field", "", None), + (["null", "number"], "some_field", "", None, None), + (["null", "integer"], "some_field", "", None, None), + (["null", "object"], "some_field", "", None, None), + (["null", "boolean"], "some_field", "", None, None), + # Test casting fields with format specified + (["null", "string"], "some_field", "", "date-time", None), + (["string"], "some_field", "", "date-time", ""), + (["null", "string"], "some_field", "2020", "date-time", "2020"), ], ) -def test_cast_type_if_needed(declared_field_types, field_name, field_value, casted_value): - assert Stream._cast_value(declared_field_types, field_name, field_value) == casted_value +def test_cast_type_if_needed(declared_field_types, field_name, field_value, format, casted_value): + assert ( + Stream._cast_value( + declared_field_types=declared_field_types, field_name=field_name, field_value=field_value, declared_format=format + ) + == casted_value + ) diff --git a/docs/integrations/sources/hubspot.md b/docs/integrations/sources/hubspot.md index fcabe61193d4..2d887278f20a 100644 --- a/docs/integrations/sources/hubspot.md +++ b/docs/integrations/sources/hubspot.md @@ -73,7 +73,8 @@ This connector supports only authentication with API Key. To obtain API key for | Version | Date | Pull Request | Subject | | :------ | :-------- | :----- | :------ | -| 0.1.11 | 2021-08-26 | [5463](https://github.com/airbytehq/airbyte/pull/5463) | Remove all date-time format from schemas | +| 0.1.12 | 2021-09-02 | [5798](https://github.com/airbytehq/airbyte/pull/5798) | Treat empty string values as None for field with format to fix normalization errors | +| 0.1.11 | 2021-08-26 | [5685](https://github.com/airbytehq/airbyte/pull/5685) | Remove all date-time format from schemas | | 0.1.10 | 2021-08-17 | [5463](https://github.com/airbytehq/airbyte/pull/5463) | Fix fail on reading stream using `API Key` without required permissions | | 0.1.9 | 2021-08-11 | [5334](https://github.com/airbytehq/airbyte/pull/5334) | Fix empty strings inside float datatype | | 0.1.8 | 2021-08-06 | [5250](https://github.com/airbytehq/airbyte/pull/5250) | Fix issue with printing exceptions |