Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐛 Correct unnest_column_name conflict with table_alias #5467

Merged
merged 9 commits into from
Aug 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion airbyte-integrations/bases/base-normalization/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,5 @@ WORKDIR /airbyte
ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh"
ENTRYPOINT ["/airbyte/entrypoint.sh"]

LABEL io.airbyte.version=0.1.41
LABEL io.airbyte.version=0.1.42
LABEL io.airbyte.name=airbyte/normalization
Original file line number Diff line number Diff line change
Expand Up @@ -64,23 +64,43 @@
{%- endmacro %}

{% macro bigquery__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
json_extract({{ from_table}}.{{ json_column }}, {{ format_json_path(normalized_json_path) }})
{%- if from_table|string() == '' %}
json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }})
{% else %}
json_extract({{ from_table}}.{{ json_column }}, {{ format_json_path(normalized_json_path) }})
{% endif -%}
{%- endmacro %}

{% macro postgres__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
jsonb_extract_path({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }})
{%- if from_table|string() == '' %}
jsonb_extract_path({{ json_column }}, {{ format_json_path(json_path_list) }})
{% else %}
jsonb_extract_path({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }})
{% endif -%}
{%- endmacro %}

{% macro mysql__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
json_extract({{ from_table }}.{{ json_column }}, {{ format_json_path(normalized_json_path) }})
{%- if from_table|string() == '' %}
json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }})
{% else %}
json_extract({{ from_table }}.{{ json_column }}, {{ format_json_path(normalized_json_path) }})
{% endif -%}
{%- endmacro %}

{% macro redshift__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
case when json_extract_path_text({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }}, true) != '' then json_extract_path_text({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }}, true) end
{%- if from_table|string() == '' %}
case when json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) != '' then json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) end
{% else %}
case when json_extract_path_text({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }}, true) != '' then json_extract_path_text({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }}, true) end
{% endif -%}
{%- endmacro %}

{% macro snowflake__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
get_path(parse_json({{ from_table }}.{{ json_column }}), {{ format_json_path(json_path_list) }})
{%- if from_table|string() == '' %}
get_path(parse_json({{ json_column }}), {{ format_json_path(json_path_list) }})
{% else %}
get_path(parse_json({{ from_table }}.{{ json_column }}), {{ format_json_path(json_path_list) }})
{% endif -%}
{%- endmacro %}

{# json_extract_scalar ------------------------------------------------- #}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
select
json_extract_scalar(_airbyte_data, "$['id']") as id,
json_extract(table_alias._airbyte_data, "$['conflict_stream_array']") as conflict_stream_array,
json_extract_array(_airbyte_data, "$['conflict_stream_array']") as conflict_stream_array,
_airbyte_emitted_at
from `dataline-integration-testing`.test_normalization._airbyte_raw_conflict_stream_array as table_alias
-- conflict_stream_array;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@ select
cast(id as
string
) as id,
cast(conflict_stream_array as
string
) as conflict_stream_array,
conflict_stream_array,
_airbyte_emitted_at
from `dataline-integration-testing`._airbyte_test_normalization.`conflict_stream_array_ab1`
-- conflict_stream_array;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ select
*,
to_hex(md5(cast(concat(coalesce(cast(id as
string
), ''), '-', coalesce(cast(conflict_stream_array as
), ''), '-', coalesce(cast(array_to_string(conflict_stream_array, "|", "") as
string
), '')) as
string
Expand Down

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
select
json_extract_scalar(_airbyte_data, "$['id']") as id,
json_extract(table_alias._airbyte_data, "$['conflict_stream_name']") as conflict_stream_name,

json_extract(table_alias._airbyte_data, "$['conflict_stream_name']")
as conflict_stream_name,
_airbyte_emitted_at
from `dataline-integration-testing`.test_normalization._airbyte_raw_conflict_stream_name as table_alias
-- conflict_stream_name;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
select
_airbyte_conflict_stream_name_hashid,
json_extract(table_alias.conflict_stream_name, "$['conflict_stream_name']") as conflict_stream_name,

json_extract(table_alias.conflict_stream_name, "$['conflict_stream_name']")
as conflict_stream_name,
_airbyte_emitted_at
from `dataline-integration-testing`.test_normalization.`conflict_stream_name` as table_alias
where conflict_stream_name is not null
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
select
json_extract_scalar(_airbyte_data, "$['id']") as id,
json_extract_scalar(_airbyte_data, "$['date']") as date,
json_extract(table_alias._airbyte_data, "$['partition']") as `partition`,

json_extract(table_alias._airbyte_data, "$['partition']")
as `partition`,
_airbyte_emitted_at
from `dataline-integration-testing`.test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names as table_alias
-- nested_stream_with_complex_columns_resulting_into_long_names;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@


create or replace view `dataline-integration-testing`._airbyte_test_normalization.`unnest_alias_ab1`
OPTIONS()
as
-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
select
json_extract_scalar(_airbyte_data, "$['id']") as id,
json_extract_array(_airbyte_data, "$['children']") as children,
_airbyte_emitted_at
from `dataline-integration-testing`.test_normalization._airbyte_raw_unnest_alias as table_alias
-- unnest_alias;

Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@


create or replace view `dataline-integration-testing`._airbyte_test_normalization.`conflict_stream_array_conflict_stream_array_ab2`
create or replace view `dataline-integration-testing`._airbyte_test_normalization.`unnest_alias_ab2`
OPTIONS()
as
-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
select
_airbyte_conflict_stream_array_hashid,
conflict_stream_name,
cast(id as
int64
) as id,
children,
_airbyte_emitted_at
from `dataline-integration-testing`._airbyte_test_normalization.`conflict_stream_array_conflict_stream_array_ab1`
-- conflict_stream_array at conflict_stream_array/conflict_stream_array;
from `dataline-integration-testing`._airbyte_test_normalization.`unnest_alias_ab1`
-- unnest_alias;

Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@


create or replace view `dataline-integration-testing`._airbyte_test_normalization.`unnest_alias_ab3`
OPTIONS()
as
-- SQL model to build a hash column based on the values of this record
select
*,
to_hex(md5(cast(concat(coalesce(cast(id as
string
), ''), '-', coalesce(cast(array_to_string(children, "|", "") as
string
), '')) as
string
))) as _airbyte_unnest_alias_hashid
from `dataline-integration-testing`._airbyte_test_normalization.`unnest_alias_ab2`
-- unnest_alias;

Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@


create or replace view `dataline-integration-testing`._airbyte_test_normalization.`unnest_alias_children_ab1`
OPTIONS()
as
-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema

select
_airbyte_unnest_alias_hashid,
json_extract_scalar(children, "$['ab_id']") as ab_id,

json_extract(children, "$['owner']")
as owner,
_airbyte_emitted_at
from `dataline-integration-testing`.test_normalization.`unnest_alias` as table_alias
cross join unnest(children) as children
where children is not null
-- children at unnest_alias/children;

Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@


create or replace view `dataline-integration-testing`._airbyte_test_normalization.`unnest_alias_children_ab2`
OPTIONS()
as
-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
select
_airbyte_unnest_alias_hashid,
cast(ab_id as
int64
) as ab_id,
cast(owner as
string
) as owner,
_airbyte_emitted_at
from `dataline-integration-testing`._airbyte_test_normalization.`unnest_alias_children_ab1`
-- children at unnest_alias/children;

Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@


create or replace view `dataline-integration-testing`._airbyte_test_normalization.`unnest_alias_children_ab3`
OPTIONS()
as
-- SQL model to build a hash column based on the values of this record
select
*,
to_hex(md5(cast(concat(coalesce(cast(_airbyte_unnest_alias_hashid as
string
), ''), '-', coalesce(cast(ab_id as
string
), ''), '-', coalesce(cast(owner as
string
), '')) as
string
))) as _airbyte_children_hashid
from `dataline-integration-testing`._airbyte_test_normalization.`unnest_alias_children_ab2`
-- children at unnest_alias/children;

Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@


create or replace view `dataline-integration-testing`._airbyte_test_normalization.`unnest_alias_children_owner_ab1`
OPTIONS()
as
-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
select
_airbyte_children_hashid,
json_extract_scalar(owner, "$['owner_id']") as owner_id,
_airbyte_emitted_at
from `dataline-integration-testing`.test_normalization.`unnest_alias_children` as table_alias
where owner is not null
-- owner at unnest_alias/children/owner;

Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@


create or replace view `dataline-integration-testing`._airbyte_test_normalization.`unnest_alias_children_owner_ab2`
OPTIONS()
as
-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
select
_airbyte_children_hashid,
cast(owner_id as
int64
) as owner_id,
_airbyte_emitted_at
from `dataline-integration-testing`._airbyte_test_normalization.`unnest_alias_children_owner_ab1`
-- owner at unnest_alias/children/owner;

Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@


create or replace view `dataline-integration-testing`._airbyte_test_normalization.`unnest_alias_children_owner_ab3`
OPTIONS()
as
-- SQL model to build a hash column based on the values of this record
select
*,
to_hex(md5(cast(concat(coalesce(cast(_airbyte_children_hashid as
string
), ''), '-', coalesce(cast(owner_id as
string
), '')) as
string
))) as _airbyte_owner_hashid
from `dataline-integration-testing`._airbyte_test_normalization.`unnest_alias_children_owner_ab2`
-- owner at unnest_alias/children/owner;

This file was deleted.

Loading