Skip to content

Commit

Permalink
Support "nearest" joinAsof strategy (#287)
Browse files Browse the repository at this point in the history
This PR aims to support [`nearest` asof join
strategy](https://docs.rs/polars/0.43.1/polars/prelude/enum.AsofStrategy.html#variant.Nearest).
In brief, it changes:

- Add "nearest" type mapping to AsofStrategy::Nearest
- Add "nearest" option to typescript's typing
- Add testcase

Closes #286
  • Loading branch information
StephLin authored Oct 15, 2024
1 parent 6e4991e commit 64a3d63
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 7 deletions.
8 changes: 6 additions & 2 deletions __tests__/datelike.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ describe("datelike", () => {
.Series([
"2016-05-25 13:30:00.023",
"2016-05-25 13:30:00.023",
"2016-05-25 13:30:00.030",
"2016-05-25 13:30:00.035",
"2016-05-25 13:30:00.041",
"2016-05-25 13:30:00.048",
"2016-05-25 13:30:00.049",
Expand Down Expand Up @@ -90,7 +90,11 @@ describe("datelike", () => {
out = quotes
.joinAsof(trades, { on: "dates", strategy: "forward", tolerance: "5ms" })
["bid_right"].toArray();
expect(out).toEqual([51.95, 51.95, null, null, 720.77, null, null, null]);
expect(out).toEqual([51.95, 51.95, 51.95, null, 720.77, null, null, null]);
out = quotes
.joinAsof(trades, { on: "dates", strategy: "nearest", tolerance: "5ms" })
["bid_right"].toArray();
expect(out).toEqual([51.95, 51.95, 51.95, 51.95, 98.0, 98.0, null, null]);
});
test("asofjoin tolerance grouper", () => {
const df1 = pl.DataFrame({
Expand Down
8 changes: 6 additions & 2 deletions polars/dataframe.ts
Original file line number Diff line number Diff line change
Expand Up @@ -771,6 +771,10 @@ export interface DataFrame
* - A "forward" search selects the first row in the right DataFrame whose
* 'on' key is greater than or equal to the left's key.
*
* - A "nearest" search selects the last row in the right DataFrame whose value
* is nearest to the left's key. String keys are not currently supported for a
* nearest search.
*
* The default is "backward".
*
* @param other DataFrame to join with.
Expand All @@ -779,7 +783,7 @@ export interface DataFrame
* @param options.on Join column of both DataFrames. If set, `leftOn` and `rightOn` should be undefined.
* @param options.byLeft join on these columns before doing asof join
* @param options.byRight join on these columns before doing asof join
* @param options.strategy One of 'forward', 'backward'
* @param options.strategy One of 'forward', 'backward', 'nearest'
* @param options.suffix Suffix to append to columns with a duplicate name.
* @param options.tolerance
* Numeric tolerance. By setting this the join will only be done if the near keys are within this distance.
Expand Down Expand Up @@ -852,7 +856,7 @@ export interface DataFrame
byLeft?: string | string[];
byRight?: string | string[];
by?: string | string[];
strategy?: "backward" | "forward";
strategy?: "backward" | "forward" | "nearest";
suffix?: string;
tolerance?: number | string;
allowParallel?: boolean;
Expand Down
8 changes: 6 additions & 2 deletions polars/lazy/dataframe.ts
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,10 @@ export interface LazyDataFrame extends Serialize, GroupByOps<LazyGroupBy> {
- A "forward" search selects the first row in the right DataFrame whose
'on' key is greater than or equal to the left's key.
- A "nearest" search selects the last row in the right DataFrame whose value
is nearest to the left's key. String keys are not currently supported for a
nearest search.
The default is "backward".
Parameters
Expand All @@ -263,7 +267,7 @@ export interface LazyDataFrame extends Serialize, GroupByOps<LazyGroupBy> {
@param options.on Join column of both DataFrames. If set, `leftOn` and `rightOn` should be undefined.
@param options.byLeft join on these columns before doing asof join
@param options.byRight join on these columns before doing asof join
@param options.strategy One of {'forward', 'backward'}
@param options.strategy One of {'forward', 'backward', 'nearest'}
@param options.suffix Suffix to append to columns with a duplicate name.
@param options.tolerance
Numeric tolerance. By setting this the join will only be done if the near keys are within this distance.
Expand Down Expand Up @@ -337,7 +341,7 @@ export interface LazyDataFrame extends Serialize, GroupByOps<LazyGroupBy> {
byLeft?: string | string[];
byRight?: string | string[];
by?: string | string[];
strategy?: "backward" | "forward";
strategy?: "backward" | "forward" | "nearest";
suffix?: string;
tolerance?: number | string;
allowParallel?: boolean;
Expand Down
3 changes: 2 additions & 1 deletion src/lazy/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,8 @@ impl JsLazyFrame {
let strategy = match strategy.as_ref() {
"forward" => AsofStrategy::Forward,
"backward" => AsofStrategy::Backward,
_ => panic!("expected one of {{'forward', 'backward'}}"),
"nearest" => AsofStrategy::Nearest,
_ => panic!("expected one of {{'forward', 'backward', 'nearest'}}"),
};
let ldf = self.ldf.clone();
let other = other.ldf.clone();
Expand Down

0 comments on commit 64a3d63

Please sign in to comment.