From 64a3d63fbb6f834f5f7be8bcabded686ba0f0f28 Mon Sep 17 00:00:00 2001 From: Yu-Kai Lin Date: Tue, 15 Oct 2024 22:04:17 +0800 Subject: [PATCH] Support "nearest" joinAsof strategy (#287) This PR aims to support [`nearest` asof join strategy](https://docs.rs/polars/0.43.1/polars/prelude/enum.AsofStrategy.html#variant.Nearest). In brief, it changes: - Add "nearest" type mapping to AsofStrategy::Nearest - Add "nearest" option to typescript's typing - Add testcase Closes #286 --- __tests__/datelike.test.ts | 8 ++++++-- polars/dataframe.ts | 8 ++++++-- polars/lazy/dataframe.ts | 8 ++++++-- src/lazy/dataframe.rs | 3 ++- 4 files changed, 20 insertions(+), 7 deletions(-) diff --git a/__tests__/datelike.test.ts b/__tests__/datelike.test.ts index 87bd2fee8..9b20f09f2 100644 --- a/__tests__/datelike.test.ts +++ b/__tests__/datelike.test.ts @@ -8,7 +8,7 @@ describe("datelike", () => { .Series([ "2016-05-25 13:30:00.023", "2016-05-25 13:30:00.023", - "2016-05-25 13:30:00.030", + "2016-05-25 13:30:00.035", "2016-05-25 13:30:00.041", "2016-05-25 13:30:00.048", "2016-05-25 13:30:00.049", @@ -90,7 +90,11 @@ describe("datelike", () => { out = quotes .joinAsof(trades, { on: "dates", strategy: "forward", tolerance: "5ms" }) ["bid_right"].toArray(); - expect(out).toEqual([51.95, 51.95, null, null, 720.77, null, null, null]); + expect(out).toEqual([51.95, 51.95, 51.95, null, 720.77, null, null, null]); + out = quotes + .joinAsof(trades, { on: "dates", strategy: "nearest", tolerance: "5ms" }) + ["bid_right"].toArray(); + expect(out).toEqual([51.95, 51.95, 51.95, 51.95, 98.0, 98.0, null, null]); }); test("asofjoin tolerance grouper", () => { const df1 = pl.DataFrame({ diff --git a/polars/dataframe.ts b/polars/dataframe.ts index 66732287d..3649b695b 100644 --- a/polars/dataframe.ts +++ b/polars/dataframe.ts @@ -771,6 +771,10 @@ export interface DataFrame * - A "forward" search selects the first row in the right DataFrame whose * 'on' key is greater than or equal to the left's key. * + * - A "nearest" search selects the last row in the right DataFrame whose value + * is nearest to the left's key. String keys are not currently supported for a + * nearest search. + * * The default is "backward". * * @param other DataFrame to join with. @@ -779,7 +783,7 @@ export interface DataFrame * @param options.on Join column of both DataFrames. If set, `leftOn` and `rightOn` should be undefined. * @param options.byLeft join on these columns before doing asof join * @param options.byRight join on these columns before doing asof join - * @param options.strategy One of 'forward', 'backward' + * @param options.strategy One of 'forward', 'backward', 'nearest' * @param options.suffix Suffix to append to columns with a duplicate name. * @param options.tolerance * Numeric tolerance. By setting this the join will only be done if the near keys are within this distance. @@ -852,7 +856,7 @@ export interface DataFrame byLeft?: string | string[]; byRight?: string | string[]; by?: string | string[]; - strategy?: "backward" | "forward"; + strategy?: "backward" | "forward" | "nearest"; suffix?: string; tolerance?: number | string; allowParallel?: boolean; diff --git a/polars/lazy/dataframe.ts b/polars/lazy/dataframe.ts index 57c1031f5..a5570c94a 100644 --- a/polars/lazy/dataframe.ts +++ b/polars/lazy/dataframe.ts @@ -253,6 +253,10 @@ export interface LazyDataFrame extends Serialize, GroupByOps { - A "forward" search selects the first row in the right DataFrame whose 'on' key is greater than or equal to the left's key. + - A "nearest" search selects the last row in the right DataFrame whose value + is nearest to the left's key. String keys are not currently supported for a + nearest search. + The default is "backward". Parameters @@ -263,7 +267,7 @@ export interface LazyDataFrame extends Serialize, GroupByOps { @param options.on Join column of both DataFrames. If set, `leftOn` and `rightOn` should be undefined. @param options.byLeft join on these columns before doing asof join @param options.byRight join on these columns before doing asof join - @param options.strategy One of {'forward', 'backward'} + @param options.strategy One of {'forward', 'backward', 'nearest'} @param options.suffix Suffix to append to columns with a duplicate name. @param options.tolerance Numeric tolerance. By setting this the join will only be done if the near keys are within this distance. @@ -337,7 +341,7 @@ export interface LazyDataFrame extends Serialize, GroupByOps { byLeft?: string | string[]; byRight?: string | string[]; by?: string | string[]; - strategy?: "backward" | "forward"; + strategy?: "backward" | "forward" | "nearest"; suffix?: string; tolerance?: number | string; allowParallel?: boolean; diff --git a/src/lazy/dataframe.rs b/src/lazy/dataframe.rs index b1e127915..490893192 100644 --- a/src/lazy/dataframe.rs +++ b/src/lazy/dataframe.rs @@ -307,7 +307,8 @@ impl JsLazyFrame { let strategy = match strategy.as_ref() { "forward" => AsofStrategy::Forward, "backward" => AsofStrategy::Backward, - _ => panic!("expected one of {{'forward', 'backward'}}"), + "nearest" => AsofStrategy::Nearest, + _ => panic!("expected one of {{'forward', 'backward', 'nearest'}}"), }; let ldf = self.ldf.clone(); let other = other.ldf.clone();