Skip to content

Commit

Permalink
fix: date_bin() on timstamps before 1970 (#13204)
Browse files Browse the repository at this point in the history
* fix: date_bin() on timstamps before 1970

The date_bin() function was not working correctly for timestamps before
1970. Specifically if the input timestamp was the exact time of the
start of a bin then it would be placed in the previous bin.

The % operator has a negative result when the dividend is negative.
This causes the date_bin calculation to round up to the next bin. To
compensate the size of 1 interval is subtracted from the result if the
input is negative. This subtraction is no longer performed if the input
is already the exact time of the start of a bin.

* fix clippy

---------

Co-authored-by: Andrew Lamb <[email protected]>
  • Loading branch information
mhilton and alamb authored Nov 1, 2024
1 parent 592b924 commit a2e5330
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 1 deletion.
30 changes: 29 additions & 1 deletion datafusion/functions/src/datetime/date_bin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ fn date_bin_nanos_interval(stride_nanos: i64, source: i64, origin: i64) -> i64 {
fn compute_distance(time_diff: i64, stride: i64) -> i64 {
let time_delta = time_diff - (time_diff % stride);

if time_diff < 0 && stride > 1 {
if time_diff < 0 && stride > 1 && time_delta != time_diff {
// The origin is later than the source timestamp, round down to the previous bin
time_delta - stride
} else {
Expand Down Expand Up @@ -864,4 +864,32 @@ mod tests {
assert_eq!(result, expected1, "{source} = {expected}");
})
}

#[test]
fn test_date_bin_before_epoch() {
let cases = [
(
(TimeDelta::try_minutes(15), "1969-12-31T23:44:59.999999999"),
"1969-12-31T23:30:00",
),
(
(TimeDelta::try_minutes(15), "1969-12-31T23:45:00"),
"1969-12-31T23:45:00",
),
(
(TimeDelta::try_minutes(15), "1969-12-31T23:45:00.000000001"),
"1969-12-31T23:45:00",
),
];

cases.iter().for_each(|((stride, source), expected)| {
let stride = stride.unwrap();
let stride1 = stride.num_nanoseconds().unwrap();
let source1 = string_to_timestamp_nanos(source).unwrap();

let expected1 = string_to_timestamp_nanos(expected).unwrap();
let result = date_bin_nanos_interval(stride1, source1, 0);
assert_eq!(result, expected1, "{source} = {expected}");
})
}
}
17 changes: 17 additions & 0 deletions datafusion/sqllogictest/test_files/timestamps.slt
Original file line number Diff line number Diff line change
Expand Up @@ -980,6 +980,23 @@ SELECT DATE_BIN('3 years 1 months', '2022-09-01 00:00:00Z');
----
2022-06-01T00:00:00

# Times before the unix epoch
query P
select date_bin('1 hour', column1)
from (values
(timestamp '1969-01-01 00:00:00'),
(timestamp '1969-01-01 00:15:00'),
(timestamp '1969-01-01 00:30:00'),
(timestamp '1969-01-01 00:45:00'),
(timestamp '1969-01-01 01:00:00')
) as sq
----
1969-01-01T00:00:00
1969-01-01T00:00:00
1969-01-01T00:00:00
1969-01-01T00:00:00
1969-01-01T01:00:00

###
## test date_trunc function
###
Expand Down

0 comments on commit a2e5330

Please sign in to comment.