Skip to content

Commit

Permalink
Fix date_diff in Snowflake (#10672)
Browse files Browse the repository at this point in the history
- Closes #10438
- The results are again aligned across backends.
  • Loading branch information
radeusgd authored Jul 25, 2024
1 parent 6189eb3 commit ba8ae45
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -628,16 +628,25 @@ make_date_diff arguments (metadata : Date_Period_Metadata) =
if arguments.length != 2 then Error.throw (Illegal_State.Error "date_diff expects exactly 2 sub expressions. This is a bug in Database library.") else
start = arguments.at 0
end = arguments.at 1
part_with_multiplier = date_period_to_part_with_multiplier metadata.period
date_part = part_with_multiplier.first
multiplier = part_with_multiplier.second
## The SQL type to add as a cast. This is needed, because otherwise this operation is losing type information,
especially if given NULL (Nothing). It would tell that it returns a VARCHAR which is not true.
sql_typ = sql_type_string_for_date_time metadata.input_value_type
diff = SQL_Builder.code "DATEDIFF('" ++ date_part ++ "', (" ++ start ++ ")::" ++ sql_typ ++ ", (" ++ end ++ ")::" ++ sql_typ ++ ")"
if multiplier == 1 then diff else
# We want to return integer, so we truncate any fractional part that did not constitute a full unit.
SQL_Builder.code "TRUNC(" ++ diff ++ " / " ++ multiplier.to_text ++ ")"
case metadata.period of
# Year / Quarter / Month rely on MONTHS_BETWEEN because DATEDIFF looked just at months and years and ignored days, yielding incorrect results.
Date_Period.Year ->
SQL_Builder.code "TRUNC(MONTHS_BETWEEN((" ++ end ++ ")::DATE, (" ++ start ++ ")::DATE) / 12)"
Date_Period.Quarter ->
SQL_Builder.code "TRUNC(MONTHS_BETWEEN((" ++ end ++ ")::DATE, (" ++ start ++ ")::DATE) / 3)"
Date_Period.Month ->
SQL_Builder.code "TRUNC(MONTHS_BETWEEN((" ++ end ++ ")::DATE, (" ++ start ++ ")::DATE))"
_ ->
part_with_multiplier = date_period_to_part_with_multiplier metadata.period
date_part = part_with_multiplier.first
multiplier = part_with_multiplier.second
## The SQL type to add as a cast. This is needed, because otherwise this operation is losing type information,
especially if given NULL (Nothing). It would tell that it returns a VARCHAR which is not true.
sql_typ = sql_type_string_for_date_time metadata.input_value_type
diff = SQL_Builder.code "DATEDIFF('" ++ date_part ++ "', (" ++ start ++ ")::" ++ sql_typ ++ ", (" ++ end ++ ")::" ++ sql_typ ++ ")"
if multiplier == 1 then diff else
# We want to return integer, so we truncate any fractional part that did not constitute a full unit.
SQL_Builder.code "TRUNC(" ++ diff ++ " / " ++ multiplier.to_text ++ ")"

## PRIVATE
make_date_trunc_to_day arguments =
Expand Down
8 changes: 7 additions & 1 deletion test/Snowflake_Tests/src/Snowflake_Spec.enso
Original file line number Diff line number Diff line change
Expand Up @@ -600,8 +600,14 @@ with_secret name value callback = case value of
get_configured_connection_details = Panic.rethrow <|
account_name = Environment.get "ENSO_SNOWFLAKE_ACCOUNT"
if account_name.is_nothing then Nothing else
if account_name.is_empty then
Panic.throw (Illegal_Argument.Error "ENSO_SNOWFLAKE_ACCOUNT is set, but empty. Please set all required environment variables.")

get_var name =
Environment.get name if_missing=(Panic.throw (Illegal_State.Error "ENSO_SNOWFLAKE_ACCOUNT is set, but "+name+" is not. Please set all required environment variables."))
value = Environment.get name if_missing=(Panic.throw (Illegal_State.Error "ENSO_SNOWFLAKE_ACCOUNT is set, but "+name+" is not. Please set all required environment variables."))
if value.is_empty then
Panic.throw (Illegal_State.Error "The "+name+" environment variable is set, but it is empty. Please set all required environment variables.")
value
user = get_var "ENSO_SNOWFLAKE_USER"
password = get_var "ENSO_SNOWFLAKE_PASSWORD"
database = get_var "ENSO_SNOWFLAKE_DATABASE"
Expand Down
17 changes: 4 additions & 13 deletions test/Table_Tests/src/Common_Table_Operations/Date_Time_Spec.enso
Original file line number Diff line number Diff line change
Expand Up @@ -303,9 +303,11 @@ add_specs suite_builder setup =

(t1.at "X").date_diff (t1.at "Y") Date_Period.Month . to_vector . should_equal [1]
(t1.at "X").date_diff (Date.new 2020 12 1) Date_Period.Month . to_vector . should_equal [-11]
(t1.at "X").date_diff (Date.new 2021 12 1) Date_Period.Month . to_vector . should_equal [0]

(t1.at "X").date_diff (t1.at "Y") Date_Period.Quarter . to_vector . should_equal [0]
(t1.at "X").date_diff (Date.new 2021 5 1) Date_Period.Quarter . to_vector . should_equal [-2]
(t1.at "X").date_diff (Date.new 2023 7 1) Date_Period.Quarter . to_vector . should_equal [6]

(t1.at "X").date_diff (t1.at "Y") Date_Period.Year . to_vector . should_equal [0]
(t1.at "X").date_diff (Date.new 2021 12 1) Date_Period.Year . to_vector . should_equal [0]
Expand All @@ -315,6 +317,7 @@ add_specs suite_builder setup =
(t1.at "X1").date_diff (Date.new 2021 03 02) Date_Period.Day . to_vector . should_equal [59]
(t1.at "X1").date_diff (Date.new 2021 03 02) Date_Period.Month . to_vector . should_equal [2]
(t1.at "X1").date_diff (Date.new 2021 03 01) Date_Period.Day . to_vector . should_equal [58]
(t1.at "X1").date_diff (Date.new 2021 03 01) Date_Period.Month . to_vector . should_equal [1]

# We do allow the `Time_Period.Day` as a kind of alias for `Date_Period.Day` here.
(t1.at "X").date_diff (t1.at "Y") Time_Period.Day . to_vector . should_equal [32]
Expand All @@ -327,6 +330,7 @@ add_specs suite_builder setup =
(t2.at "X").date_diff (Date_Time.new 2021 11 3 10 15 0 zone=zone) Date_Period.Day . to_vector . should_equal [0]

(t2.at "X").date_diff (t2.at "Y") Date_Period.Month . to_vector . should_equal [1]
(t2.at "X").date_diff (Date_Time.new 2021 12 1 10 15 0 zone=zone) Date_Period.Month . to_vector . should_equal [0]

(t2.at "X").date_diff (t2.at "Y") Date_Period.Year . to_vector . should_equal [0]
(t2.at "X").date_diff (Date_Time.new 2031 12 1 10 15 0 zone=zone) Date_Period.Year . to_vector . should_equal [10]
Expand Down Expand Up @@ -387,19 +391,6 @@ add_specs suite_builder setup =
(t3.at "X").date_diff (t3.at "Y") Time_Period.Nanosecond . should_fail_with Unsupported_Database_Operation
(t3.at "X").date_diff (Time_Of_Day.new 10 15 12 34 56 78) Time_Period.Nanosecond . should_fail_with Unsupported_Database_Operation

## TODO these were extracted from the test above, they may later be merged again, or we may keep them separate depending on resolution of
https://github.com/enso-org/enso/issues/10438
group_builder.specify "date_diff edge cases" pending=(if setup.prefix.contains "Snowflake" then "TODO: https://github.com/enso-org/enso/issues/10438") <|
t1 = dates1.get.select_columns ["d_X", "d_Y", "X1"] reorder=True . rename_columns ["X", "Y", "X1"]
## TODO Snowflake just looks at year and month, but Postgres/in-memory look if it was a full month or not by looking at days
(t1.at "X").date_diff (Date.new 2021 12 1) Date_Period.Month . to_vector . should_equal [0]
(t1.at "X").date_diff (Date.new 2023 7 1) Date_Period.Quarter . to_vector . should_equal [6]

(t1.at "X1").date_diff (Date.new 2021 03 01) Date_Period.Month . to_vector . should_equal [1]

t2 = dates1.get.select_columns ["dt_X", "dt_Y"] reorder=True . rename_columns ["X", "Y"]
(t2.at "X").date_diff (Date_Time.new 2021 12 1 10 15 0 zone=zone) Date_Period.Month . to_vector . should_equal [0]

group_builder.specify "date_diff should return integers" <|
t = table_builder [["X", [Date.new 2021 01 31]], ["Y", [Time_Of_Day.new 12 30 20]], ["Z", [Date_Time.new 2021 12 5 12 30 20]]]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ add_specs suite_builder setup =
t2.set (Simple_Expression.Simple_Expr (Column_Ref.Name "C") (Simple_Calculation.Date_Add 5 Date_Period.Month)) "Z" . at "Z" . to_vector . should_equal [Date.new 2003 5 10, Date.new 2005 6 01]
t2.set (Simple_Expression.Simple_Expr (Column_Ref.Name "D") (Simple_Calculation.Date_Add 15 Time_Period.Hour)) "Z" . at "Z" . to_vector . should_equal [Time_Of_Day.new 03 45, Time_Of_Day.new 16 01]

t2.set (Simple_Expression.Simple_Expr (Column_Ref.Name "C") (Simple_Calculation.Date_Diff (Date.new 2003 01 01) Date_Period.Year)) "Z" . at "Z" . to_vector . should_equal [0, -2]
t2.set (Simple_Expression.Simple_Expr (Column_Ref.Name "D") (Simple_Calculation.Date_Diff (Time_Of_Day.new 13) Time_Period.Minute)) "Z" . at "Z" . to_vector . should_equal [15, 59+(60*11)]

t2.set (Simple_Expression.Simple_Expr (Column_Ref.Name "C") (Simple_Calculation.Date_Part Date_Period.Year)) "Z" . at "Z" . to_vector . should_equal [2002, 2005]
Expand All @@ -106,10 +107,6 @@ add_specs suite_builder setup =
t.set (Simple_Expression.Simple_Expr (Column_Ref.Name "x") (Simple_Calculation.Date_Part Date_Period.Year)) . should_fail_with Invalid_Value_Type
Test.expect_panic Type_Error <| t2.set (Simple_Expression.Simple_Expr 42 (Simple_Calculation.Date_Diff "x" Date_Period.Year))

group_builder.specify "date/time (edge case)" pending=(pending_datetime.if_nothing (if setup.prefix.contains "Snowflake" then "TODO: https://github.com/enso-org/enso/issues/10438")) <|
t2 = table_builder [["C", [Date.new 2002 12 10, Date.new 2005 01 01]], ["D", [Time_Of_Day.new 12 45, Time_Of_Day.new 01 01]]]
t2.set (Simple_Expression.Simple_Expr (Column_Ref.Name "C") (Simple_Calculation.Date_Diff (Date.new 2003 01 01) Date_Period.Year)) "Z" . at "Z" . to_vector . should_equal [0, -2]

group_builder.specify "boolean" <|
t = table_builder [["A", [True, False]], ["T", [True, True]]]

Expand Down

0 comments on commit ba8ae45

Please sign in to comment.