diff --git a/datafusion/functions/src/datetime/date_bin.rs b/datafusion/functions/src/datetime/date_bin.rs index e335c4e097f7..e8d065df8633 100644 --- a/datafusion/functions/src/datetime/date_bin.rs +++ b/datafusion/functions/src/datetime/date_bin.rs @@ -240,7 +240,7 @@ fn date_bin_nanos_interval(stride_nanos: i64, source: i64, origin: i64) -> i64 { fn compute_distance(time_diff: i64, stride: i64) -> i64 { let time_delta = time_diff - (time_diff % stride); - if time_diff < 0 && stride > 1 { + if time_diff < 0 && stride > 1 && time_delta != time_diff { // The origin is later than the source timestamp, round down to the previous bin time_delta - stride } else { @@ -864,4 +864,32 @@ mod tests { assert_eq!(result, expected1, "{source} = {expected}"); }) } + + #[test] + fn test_date_bin_before_epoch() { + let cases = [ + ( + (TimeDelta::try_minutes(15), "1969-12-31T23:44:59.999999999"), + "1969-12-31T23:30:00", + ), + ( + (TimeDelta::try_minutes(15), "1969-12-31T23:45:00"), + "1969-12-31T23:45:00", + ), + ( + (TimeDelta::try_minutes(15), "1969-12-31T23:45:00.000000001"), + "1969-12-31T23:45:00", + ), + ]; + + cases.iter().for_each(|((stride, source), expected)| { + let stride = stride.unwrap(); + let stride1 = stride.num_nanoseconds().unwrap(); + let source1 = string_to_timestamp_nanos(source).unwrap(); + + let expected1 = string_to_timestamp_nanos(expected).unwrap(); + let result = date_bin_nanos_interval(stride1, source1, 0); + assert_eq!(result, expected1, "{source} = {expected}"); + }) + } } diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt index 38c2a6647273..a09a63a791fc 100644 --- a/datafusion/sqllogictest/test_files/timestamps.slt +++ b/datafusion/sqllogictest/test_files/timestamps.slt @@ -980,6 +980,23 @@ SELECT DATE_BIN('3 years 1 months', '2022-09-01 00:00:00Z'); ---- 2022-06-01T00:00:00 +# Times before the unix epoch +query P +select date_bin('1 hour', column1) +from (values + (timestamp '1969-01-01 00:00:00'), + (timestamp '1969-01-01 00:15:00'), + (timestamp '1969-01-01 00:30:00'), + (timestamp '1969-01-01 00:45:00'), + (timestamp '1969-01-01 01:00:00') +) as sq +---- +1969-01-01T00:00:00 +1969-01-01T00:00:00 +1969-01-01T00:00:00 +1969-01-01T00:00:00 +1969-01-01T01:00:00 + ### ## test date_trunc function ###