Skip to content

Commit

Permalink
Merge pull request #57 from ScalefreeCOM/fix_ghost_records_derived_da…
Browse files Browse the repository at this point in the history
…tatypes

Fix ghost records derived datatypes
  • Loading branch information
tkirschke authored Jan 25, 2023
2 parents ccae341 + 699dc7e commit 98f9717
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 7 deletions.
4 changes: 2 additions & 2 deletions macros/staging/snowflake/stage.sql
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ unknown_values AS (
{%- if datavault4dbt.is_something(derived_columns) -%},
{# Additionally generating Ghost Records for Derived Columns #}
{%- for column_name, properties in derived_columns_with_datatypes_DICT.items() %}
{{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, ghost_record_type='unknown') }}
{{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, col_size=properties.col_size, ghost_record_type='unknown') }}
{%- if not loop.last %},{% endif -%}
{%- endfor -%}

Expand Down Expand Up @@ -402,7 +402,7 @@ error_values AS (
{%- if datavault4dbt.is_something(derived_columns) %},
{# Additionally generating Ghost Records for Derived Columns #}
{%- for column_name, properties in derived_columns_with_datatypes_DICT.items() %}
{{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, ghost_record_type='error') }}
{{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, col_size=properties.col_size, ghost_record_type='error') }}
{%- if not loop.last %},{% endif %}
{%- endfor -%}

Expand Down
48 changes: 43 additions & 5 deletions macros/supporting/ghost_record_per_datatype.sql
Original file line number Diff line number Diff line change
Expand Up @@ -131,17 +131,55 @@
{%- set beginning_of_all_times = datavault4dbt.beginning_of_all_times() -%}
{%- set end_of_all_times = datavault4dbt.end_of_all_times() -%}
{%- set timestamp_format = datavault4dbt.timestamp_format() -%}
{%- set unknown_value__STRING = var('datavault4dbt.unknown_value__STRING', '(unknown)') -%}
{%- set error_value__STRING = var('datavault4dbt.error_value__STRING', '(error)') -%}
{%- set unknown_value_alt__STRING = var('datavault4dbt.unknown_value_alt__STRING', 'u') -%}
{%- set error_value_alt__STRING = var('datavault4dbt.error_value_alt__STRING', 'e') -%}

{%- if ghost_record_type == 'unknown' -%}
{%- if datatype in ['TIMESTAMP_NTZ','TIMESTAMP'] %}{{ datavault4dbt.string_to_timestamp(timestamp_format, beginning_of_all_times) }} AS {{ column_name }}
{% elif datatype in ['STRING','VARCHAR'] %}'(unknown)' AS {{ column_name }}
{% elif datatype in ['NUMBER','INT','FLOAT','DECIMAL'] %}0 AS {{ column_name }}
{% elif datatype == 'BOOLEAN' %}CAST('FALSE' AS BOOLEAN) AS {{ column_name }}
{% else %}NULL AS {{ column_name }}
{%- elif datatype in ['STRING', 'VARCHAR'] %}'{{ unknown_value__STRING }}' AS {{ column_name }}
{%- elif datatype == 'CHAR' %}CAST('{{ unknown_value_alt__STRING }}' as {{ datatype }} ) as "{{ column_name }}"
{%- elif datatype.upper().startswith('VARCHAR(') or datatype.upper().startswith('CHAR(') -%}
{%- if col_size is not none -%}
{%- set unknown_dtype_length = col_size | int -%}
{%- if '(' not in datatype -%}
{%- set datatype = datatype ~ "(" ~ (unknown_dtype_length|string) ~ ")" -%}
{%- endif -%}
{%- else -%}
{%- set inside_parenthesis = datatype.split(")")[0] |string -%}
{%- set inside_parenthesis = inside_parenthesis.split("(")[1]-%}
{%- set unknown_dtype_length = inside_parenthesis | int -%}
{%- endif -%}
{%- if unknown_dtype_length < unknown_value__STRING|length -%}
CAST('{{ unknown_value_alt__STRING }}' as {{ datatype }} ) as "{{ column_name }}"
{%- else -%}
CAST('{{ unknown_value__STRING }}' as {{ datatype }} ) as "{{ column_name }}"
{%- endif -%}
{%- elif datatype in ['NUMBER','INT','FLOAT','DECIMAL'] %}0 AS {{ column_name }}
{%- elif datatype == 'BOOLEAN' %}CAST('FALSE' AS BOOLEAN) AS {{ column_name }}
{%- else %}NULL AS {{ column_name }}
{% endif %}
{%- elif ghost_record_type == 'error' -%}
{%- if datatype in ['TIMESTAMP_NTZ','TIMESTAMP'] %}{{ datavault4dbt.string_to_timestamp(timestamp_format, end_of_all_times) }} AS {{ column_name }}
{% elif datatype in ['STRING','VARCHAR'] %}'(error)' AS {{ column_name }}
{%- elif datatype in ['STRING','VARCHAR'] %}'{{ error_value__STRING }}' AS {{ column_name }}
{%- elif datatype == 'CHAR' %}CAST('{{ error_value_alt__STRING }}' as {{ datatype }} ) as "{{ column_name }}"
{%- elif datatype.upper().startswith('VARCHAR(') or datatype.upper().startswith('CHAR(') -%}
{%- if col_size is not none -%}
{%- set error_dtype_length = col_size | int -%}
{%- if '(' not in datatype -%}
{%- set datatype = datatype ~ "(" ~ (error_dtype_length|string) ~ ")" -%}
{%- endif -%}
{%- else -%}
{%- set inside_parenthesis = datatype.split(")")[0] |string -%}
{%- set inside_parenthesis = inside_parenthesis.split("(")[1]-%}
{%- set error_dtype_length = inside_parenthesis | int -%}
{%- endif -%}
{%- if error_dtype_length < error_value__STRING|length -%}
CAST('{{ error_value_alt__STRING }}' as {{ datatype }} ) as "{{ column_name }}"
{%- else -%}
CAST('{{ error_value__STRING }}' as {{ datatype }} ) as "{{ column_name }}"
{%- endif -%}
{% elif datatype in ['NUMBER','INT','FLOAT','DECIMAL'] %}-1 AS {{ column_name }}
{% elif datatype == 'BOOLEAN' %}CAST('FALSE' AS BOOLEAN) AS {{ column_name }}
{% else %}NULL AS {{ column_name }}
Expand Down

0 comments on commit 98f9717

Please sign in to comment.