Skip to content

Commit

Permalink
🐛 Fix hubspot datetime empty string (#5798)
Browse files Browse the repository at this point in the history
* Fix hubspot datetime empty string
  • Loading branch information
avida authored Sep 6, 2021
1 parent 483d68c commit 60fd9d5
Show file tree
Hide file tree
Showing 9 changed files with 60 additions and 58 deletions.
2 changes: 1 addition & 1 deletion airbyte-integrations/bases/base-python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
"PyYAML==5.4",
"pydantic==1.6.*",
"airbyte-protocol",
"jsonschema==2.6.0",
"jsonschema==3.2.0",
"requests",
"backoff",
"pytest",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

## 0.1.18
Fix checking date-time format againt nullable field.

## 0.1.17
Fix serialize function for acceptance-tests: https://github.com/airbytehq/airbyte/pull/5738

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ COPY setup.py ./
COPY pytest.ini ./
RUN pip install .

LABEL io.airbyte.version=0.1.17
LABEL io.airbyte.version=0.1.18
LABEL io.airbyte.name=airbyte/source-acceptance-test

ENTRYPOINT ["python", "-m", "pytest", "-p", "source_acceptance_test.plugin"]
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def check_datetime(value: str) -> bool:
return valid_format and valid_time

def check(self, instance, format):
if format == "date-time":
if instance is not None and format == "date-time":
if not self.check_datetime(instance):
raise FormatError(f"{instance} has invalid datetime format")
else:
Expand Down
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-hubspot/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@ RUN pip install .

ENV AIRBYTE_ENTRYPOINT "/airbyte/base.sh"

LABEL io.airbyte.version=0.1.11
LABEL io.airbyte.version=0.1.12
LABEL io.airbyte.name=airbyte/source-hubspot

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,14 @@
"phone_number": ("string", None),
}

CUSTOM_FIELD_VALUE_TYPE_CAST = {
CUSTOM_FIELD_TYPE_TO_VALUE = {
bool: "boolean",
str: "string",
float: "number",
int: "integer",
}

CUSTOM_FIELD_VALUE_TYPE_CAST_REVERSED = {v: k for k, v in CUSTOM_FIELD_VALUE_TYPE_CAST.items()}
CUSTOM_FIELD_VALUE_TO_TYPE = {v: k for k, v in CUSTOM_FIELD_TYPE_TO_VALUE.items()}


def retry_connection_handler(**kwargs):
Expand Down Expand Up @@ -259,18 +259,31 @@ def _filter_dynamic_fields(self, records: Iterable) -> Iterable:
yield record

@staticmethod
def _cast_value(declared_field_types: List, field_name: str, field_value):
def _cast_value(declared_field_types: List, field_name: str, field_value: Any, declared_format: str = None) -> Any:
"""
Convert record's received value according to its declared catalog json schema type / format / attribute name.
:param declared_field_types type from catalog schema
:param field_name value's attribute name
:param field_value actual value to cast
:param declared_format format field value from catalog schema
:return Converted value for record
"""

if field_value is None and "null" in declared_field_types:
return field_value
if "null" in declared_field_types:
if field_value is None:
return field_value
# Sometime hubspot output empty string on field with format set.
# Set it to null to avoid errors on destination' normalization stage.
if declared_format and field_value == "":
return None

actual_field_type = type(field_value)
actual_field_type_name = CUSTOM_FIELD_VALUE_TYPE_CAST.get(actual_field_type)
actual_field_type_name = CUSTOM_FIELD_TYPE_TO_VALUE.get(actual_field_type)
if actual_field_type_name in declared_field_types:
return field_value

target_type_name = next(filter(lambda t: t != "null", declared_field_types))
target_type = CUSTOM_FIELD_VALUE_TYPE_CAST_REVERSED.get(target_type_name)
target_type = CUSTOM_FIELD_VALUE_TO_TYPE.get(target_type_name)

if target_type_name == "number":
# do not cast numeric IDs into float, use integer instead
Expand Down Expand Up @@ -300,10 +313,13 @@ def _cast_record_fields_if_needed(self, record: Mapping, properties: Mapping[str
properties = properties or self.properties

for field_name, field_value in record["properties"].items():
declared_field_types = properties[field_name].get("type") or []
declared_field_types = properties[field_name].get("type", [])
if not isinstance(declared_field_types, Iterable):
declared_field_types = [declared_field_types]
record["properties"][field_name] = self._cast_value(declared_field_types, field_name, field_value)
format = properties[field_name].get("format")
record["properties"][field_name] = self._cast_value(
declared_field_types=declared_field_types, field_name=field_name, field_value=field_value, declared_format=format
)

return record

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,28 +67,37 @@ def test_bad_field_type_converting(field_type, expected, capsys):


@pytest.mark.parametrize(
"declared_field_types,field_name,field_value,casted_value",
"declared_field_types,field_name,field_value,format,casted_value",
[
# test for None in field_values
(["null", "string"], "some_field", None, None),
(["null", "number"], "some_field", None, None),
(["null", "integer"], "some_field", None, None),
(["null", "object"], "some_field", None, None),
(["null", "boolean"], "some_field", None, None),
(["null", "string"], "some_field", None, None, None),
(["null", "number"], "some_field", None, None, None),
(["null", "integer"], "some_field", None, None, None),
(["null", "object"], "some_field", None, None, None),
(["null", "boolean"], "some_field", None, None, None),
# specific cases
("string", "some_field", "test", "test"),
(["null", "number"], "some_field", "123.456", 123.456),
(["null", "number"], "user_id", "123", 123),
(["null", "string"], "some_field", "123", "123"),
("string", "some_field", "test", None, "test"),
(["null", "number"], "some_field", "123.456", None, 123.456),
(["null", "number"], "user_id", "123", None, 123),
(["null", "string"], "some_field", "123", None, "123"),
# when string has empty field_value (empty string)
(["null", "string"], "some_field", "", ""),
(["null", "string"], "some_field", "", None, ""),
# when NOT string type but has empty sting in field_value, instead of double or null,
# we should use None instead, to have it properly casted to the correct type
(["null", "number"], "some_field", "", None),
(["null", "integer"], "some_field", "", None),
(["null", "object"], "some_field", "", None),
(["null", "boolean"], "some_field", "", None),
(["null", "number"], "some_field", "", None, None),
(["null", "integer"], "some_field", "", None, None),
(["null", "object"], "some_field", "", None, None),
(["null", "boolean"], "some_field", "", None, None),
# Test casting fields with format specified
(["null", "string"], "some_field", "", "date-time", None),
(["string"], "some_field", "", "date-time", ""),
(["null", "string"], "some_field", "2020", "date-time", "2020"),
],
)
def test_cast_type_if_needed(declared_field_types, field_name, field_value, casted_value):
assert Stream._cast_value(declared_field_types, field_name, field_value) == casted_value
def test_cast_type_if_needed(declared_field_types, field_name, field_value, format, casted_value):
assert (
Stream._cast_value(
declared_field_types=declared_field_types, field_name=field_name, field_value=field_value, declared_format=format
)
== casted_value
)
3 changes: 2 additions & 1 deletion docs/integrations/sources/hubspot.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ This connector supports only authentication with API Key. To obtain API key for

| Version | Date | Pull Request | Subject |
| :------ | :-------- | :----- | :------ |
| 0.1.11 | 2021-08-26 | [5463](https://github.com/airbytehq/airbyte/pull/5463) | Remove all date-time format from schemas |
| 0.1.12 | 2021-09-02 | [5798](https://github.com/airbytehq/airbyte/pull/5798) | Treat empty string values as None for field with format to fix normalization errors |
| 0.1.11 | 2021-08-26 | [5685](https://github.com/airbytehq/airbyte/pull/5685) | Remove all date-time format from schemas |
| 0.1.10 | 2021-08-17 | [5463](https://github.com/airbytehq/airbyte/pull/5463) | Fix fail on reading stream using `API Key` without required permissions |
| 0.1.9 | 2021-08-11 | [5334](https://github.com/airbytehq/airbyte/pull/5334) | Fix empty strings inside float datatype |
| 0.1.8 | 2021-08-06 | [5250](https://github.com/airbytehq/airbyte/pull/5250) | Fix issue with printing exceptions |
Expand Down

0 comments on commit 60fd9d5

Please sign in to comment.