diff --git a/__tests__/dataframe.test.ts b/__tests__/dataframe.test.ts index 3972d3416..550b6d542 100644 --- a/__tests__/dataframe.test.ts +++ b/__tests__/dataframe.test.ts @@ -2249,7 +2249,6 @@ describe("meta", () => { expect(dfString).toStrictEqual(expected); }); }); - test("Jupyter.display", () => { const df = pl.DataFrame({ os: ["apple", "linux"], @@ -2329,4 +2328,79 @@ describe("additional", () => { const actual = df.toRecords(); expect(actual).toEqual(rows); }); + test("upsample", () => { + const df = pl + .DataFrame({ + date: [ + new Date(2024, 1, 1), + new Date(2024, 3, 1), + new Date(2024, 4, 1), + new Date(2024, 5, 1), + ], + groups: ["A", "B", "A", "B"], + values: [0, 1, 2, 3], + }) + .withColumn(pl.col("date").cast(pl.Date).alias("date")) + .sort("date"); + + let actual = df + .upsample("date", "1mo", "0ns", "groups", true) + .select(pl.col("*").forwardFill()); + + let expected = pl + .DataFrame({ + date: [ + new Date(2024, 1, 1), + new Date(2024, 2, 1), + new Date(2024, 3, 1), + new Date(2024, 4, 1), + new Date(2024, 3, 1), + new Date(2024, 4, 1), + new Date(2024, 5, 1), + ], + groups: ["A", "A", "A", "A", "B", "B", "B"], + values: [0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0], + }) + .withColumn(pl.col("date").cast(pl.Date).alias("date")); + + expect(actual).toFrameEqual(expected); + + actual = df + .upsample({ + timeColumn: "date", + every: "1mo", + offset: "0ns", + by: "groups", + maintainOrder: true, + }) + .select(pl.col("*").forwardFill()); + + expect(actual).toFrameEqual(expected); + + actual = df + .upsample({ timeColumn: "date", every: "1mo" }) + .select(pl.col("*").forwardFill()); + + expected = pl + .DataFrame({ + date: [ + new Date(2024, 1, 1), + new Date(2024, 2, 1), + new Date(2024, 3, 1), + new Date(2024, 4, 1), + new Date(2024, 5, 1), + ], + groups: ["A", "A", "B", "A", "B"], + values: [0.0, 0.0, 1.0, 2.0, 3.0], + }) + .withColumn(pl.col("date").cast(pl.Date).alias("date")); + + expect(actual).toFrameEqual(expected); + + actual = df + .upsample({ timeColumn: "date", every: "1m" }) + .select(pl.col("*").forwardFill()); + + expect(actual.shape).toEqual({ height: 174_241, width: 3 }); + }); }); diff --git a/package.json b/package.json index 3d106fd86..176cd8928 100644 --- a/package.json +++ b/package.json @@ -54,17 +54,17 @@ "precommit": "yarn lint && yarn test" }, "devDependencies": { - "@biomejs/biome": "^1.5.1", - "@napi-rs/cli": "^2.17.0", + "@biomejs/biome": "^1.5.3", + "@napi-rs/cli": "^2.18.0", "@types/chance": "^1.1.6", - "@types/jest": "^29.5.11", - "@types/node": "^20.10.8", + "@types/jest": "^29.5.12", + "@types/node": "^20.11.17", "chance": "^1.1.11", "jest": "^29.7.0", "source-map-support": "^0.5.21", - "ts-jest": "^29.1.1", + "ts-jest": "^29.1.2", "ts-node": "^10.9.2", - "typedoc": "^0.25.7", + "typedoc": "^0.25.8", "typescript": "5.3.3" }, "packageManager": "yarn@4.0.2", diff --git a/polars/dataframe.ts b/polars/dataframe.ts index 0742d8398..fcd19702a 100644 --- a/polars/dataframe.ts +++ b/polars/dataframe.ts @@ -57,12 +57,12 @@ interface WriteMethods { * @param options.sep - Separate CSV fields with this symbol. _defaults to `,`_ * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] - * ... }) - * > df.writeCSV() + * ... }); + * > df.writeCSV(); * foo,bar,ham * 1,6,a * 2,7,b @@ -81,7 +81,7 @@ interface WriteMethods { * ... callback(null); * ... } * ... }); - * > df.head(1).writeCSV(writeStream, {hasHeader: false}) + * > df.head(1).writeCSV(writeStream, {hasHeader: false}); * writeStream: '1,6,a' * ``` * @category IO @@ -167,9 +167,9 @@ interface WriteMethods { * @example * Constructing a DataFrame from an object : * ``` - * > data = {'a': [1n, 2n], 'b': [3, 4]} - * > df = pl.DataFrame(data) - * > df + * > const data = {'a': [1n, 2n], 'b': [3, 4]}; + * > const df = pl.DataFrame(data); + * > console.log(df.toString()); * shape: (2, 2) * ╭─────┬─────╮ * │ a ┆ b │ @@ -189,10 +189,9 @@ interface WriteMethods { * In order to specify dtypes for your columns, initialize the DataFrame with a list * of Series instead: * ``` - * > data = [pl.Series('col1', [1, 2], pl.Float32), - * ... pl.Series('col2', [3, 4], pl.Int64)] - * > df2 = pl.DataFrame(series) - * > df2 + * > const data = [pl.Series('col1', [1, 2], pl.Float32), pl.Series('col2', [3, 4], pl.Int64)]; + * > const df2 = pl.DataFrame(series); + * > console.log(df2.toString()); * shape: (2, 2) * ╭──────┬──────╮ * │ col1 ┆ col2 │ @@ -207,9 +206,9 @@ interface WriteMethods { * * Constructing a DataFrame from a list of lists, row orientation inferred: * ``` - * > data = [[1, 2, 3], [4, 5, 6]] - * > df4 = pl.DataFrame(data, ['a', 'b', 'c']) - * > df4 + * > const data = [[1, 2, 3], [4, 5, 6]]; + * > const df4 = pl.DataFrame(data, ['a', 'b', 'c']); + * > console.log(df4.toString()); * shape: (2, 3) * ╭─────┬─────┬─────╮ * │ a ┆ b ┆ c │ @@ -250,12 +249,12 @@ export interface DataFrame * ___ * Example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... 'a': [1.0, 2.8, 3.0], * ... 'b': [4, 5, 6], * ... "c": [True, False, True] - * ... }) - * ... df.describe() + * ... }); + * ... df.describe() * shape: (5, 4) * ╭──────────┬───────┬─────┬──────╮ * │ describe ┆ a ┆ b ┆ c │ @@ -283,13 +282,13 @@ export interface DataFrame * @param name * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6.0, 7.0, 8.0], * ... "ham": ['a', 'b', 'c'], * ... "apple": ['a', 'b', 'c'] - * ... }) - * > df.drop(['ham', 'apple']) + * ... }); + * > console.log(df.drop(['ham', 'apple']).toString()); * shape: (3, 2) * ╭─────┬─────╮ * │ foo ┆ bar │ @@ -314,12 +313,12 @@ export interface DataFrame * ___ * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, null, 8], * ... "ham": ['a', 'b', 'c'] - * ... }) - * > df.dropNulls() + * ... }); + * > console.log(df.dropNulls().toString()); * shape: (2, 3) * ┌─────┬─────┬─────┐ * │ foo ┆ bar ┆ ham │ @@ -341,11 +340,11 @@ export interface DataFrame * @param columns - column or columns to explode * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "letters": ["c", "c", "a", "c", "a", "b"], * ... "nrs": [[1, 2], [1, 3], [4, 3], [5, 5, 5], [6], [2, 1, 2]] - * ... }) - * > df + * ... }); + * > console.log(df.toString()); * shape: (6, 2) * ╭─────────┬────────────╮ * │ letters ┆ nrs │ @@ -438,11 +437,11 @@ export interface DataFrame * @param predicate - Expression that evaluates to a boolean Series. * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] - * ... }) + * ... }); * // Filter on one condition * > df.filter(pl.col("foo").lt(3)) * shape: (2, 3) @@ -477,11 +476,11 @@ export interface DataFrame * @param name -Name of the column to find. * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] - * ... }) + * ... }); * > df.findIdxByName("ham")) * 2 * ``` @@ -503,11 +502,11 @@ export interface DataFrame * @example * ``` * > // A horizontal sum operation - * > df = pl.DataFrame({ + * > let df = pl.DataFrame({ * ... "a": [2, 1, 3], * ... "b": [1, 2, 3], * ... "c": [1.0, 2.0, 3.0] - * ... }) + * ... }); * > df.fold((s1, s2) => s1.plus(s2)) * Series: 'a' [f64] * [ @@ -520,7 +519,7 @@ export interface DataFrame * ... "a": [2, 1, 3], * ... "b": [1, 2, 3], * ... "c": [1.0, 2.0, 3.0] - * ... }) + * ... }); * > df.fold((s1, s2) => s1.zipWith(s1.lt(s2), s2)) * Series: 'a' [f64] * [ @@ -552,12 +551,12 @@ export interface DataFrame * @param options.nullEqual Consider null values as equal. * @example * ``` - * > df1 = pl.DataFrame({ + * > const df1 = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6.0, 7.0, 8.0], * ... "ham": ['a', 'b', 'c'] * ... }) - * > df2 = pl.DataFrame({ + * > const df2 = pl.DataFrame({ * ... "foo": [3, 2, 1], * ... "bar": [8.0, 7.0, 6.0], * ... "ham": ['c', 'b', 'a'] @@ -604,11 +603,11 @@ export interface DataFrame * @param length - Length of the head. * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3, 4, 5], * ... "bar": [6, 7, 8, 9, 10], * ... "ham": ['a', 'b', 'c', 'd','e'] - * ... }) + * ... }); * > df.head(3) * shape: (3, 3) * ╭─────┬─────┬─────╮ @@ -631,12 +630,12 @@ export interface DataFrame * @param inPlace - Modify in place * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] - * ... }) - * > x = pl.Series("apple", [10, 20, 30]) + * ... }); + * > const x = pl.Series("apple", [10, 20, 30]) * > df.hStack([x]) * shape: (3, 4) * ╭─────┬─────┬─────┬───────╮ @@ -688,15 +687,15 @@ export interface DataFrame * @see {@link JoinOptions} * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6.0, 7.0, 8.0], * ... "ham": ['a', 'b', 'c'] - * ... }) - * > otherDF = pl.DataFrame({ + * ... }); + * > const otherDF = pl.DataFrame({ * ... "apple": ['x', 'y', 'z'], * ... "ham": ['a', 'b', 'd'] - * ... }) + * ... }); * > df.join(otherDF, {on: 'ham'}) * shape: (2, 4) * ╭─────┬─────┬─────┬───────╮ @@ -841,11 +840,11 @@ export interface DataFrame * @param axis - either 0 or 1 * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] - * ... }) + * ... }); * > df.max() * shape: (1, 3) * ╭─────┬─────┬──────╮ @@ -876,12 +875,12 @@ export interface DataFrame * ___ * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] - * ... }) - * > df.median() + * ... }); + * > df.median(); * shape: (1, 3) * ╭─────┬─────┬──────╮ * │ foo ┆ bar ┆ ham │ @@ -901,13 +900,13 @@ export interface DataFrame * @param valueVars - Values to use as value variables. * @example * ``` - * > df1 = pl.DataFrame({ + * > const df1 = pl.DataFrame({ * ... 'id': [1], * ... 'asset_key_1': ['123'], * ... 'asset_key_2': ['456'], * ... 'asset_key_3': ['abc'], - * ... }) - * > df1.melt('id', ['asset_key_1', 'asset_key_2', 'asset_key_3']) + * ... }); + * > df1.melt('id', ['asset_key_1', 'asset_key_2', 'asset_key_3']); * shape: (3, 3) * ┌─────┬─────────────┬───────┐ * │ id ┆ variable ┆ value │ @@ -929,12 +928,12 @@ export interface DataFrame * @param axis - either 0 or 1 * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] - * ... }) - * > df.min() + * ... }); + * > df.min(); * shape: (1, 3) * ╭─────┬─────┬──────╮ * │ foo ┆ bar ┆ ham │ @@ -957,12 +956,12 @@ export interface DataFrame * ___ * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "foo": [1, null, 3], * ... "bar": [6, 7, null], * ... "ham": ['a', 'b', 'c'] - * ... }) - * > df.nullCount() + * ... }); + * > df.nullCount(); * shape: (1, 3) * ┌─────┬─────┬─────┐ * │ foo ┆ bar ┆ ham │ @@ -1006,14 +1005,14 @@ export interface DataFrame * @param options.sortColumns Sort the transposed columns by name. Default is by order of discovery. * @example * ``` - * > df = pl.DataFrame( + * > const df = pl.DataFrame( * ... { * ... "foo": ["one", "one", "one", "two", "two", "two"], * ... "bar": ["A", "B", "C", "A", "B", "C"], * ... "baz": [1, 2, 3, 4, 5, 6], * ... } - * ... ) - * > df.pivot({values:"baz", index:"foo", columns:"bar"}) + * ... ); + * > df.pivot({values:"baz", index:"foo", columns:"bar"}); * shape: (2, 4) * ┌─────┬─────┬─────┬─────┐ * │ foo ┆ A ┆ B ┆ C │ @@ -1071,12 +1070,12 @@ export interface DataFrame * Aggregate the columns of this DataFrame to their quantile value. * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] - * ... }) - * > df.quantile(0.5) + * ... }); + * > df.quantile(0.5); * shape: (1, 3) * ╭─────┬─────┬──────╮ * │ foo ┆ bar ┆ ham │ @@ -1101,12 +1100,12 @@ export interface DataFrame * @param mapping - Key value pairs that map from old name to new name. * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] - * ... }) - * > df.rename({"foo": "apple"}) + * ... }); + * > df.rename({"foo": "apple"}); * ╭───────┬─────┬─────╮ * │ apple ┆ bar ┆ ham │ * │ --- ┆ --- ┆ --- │ @@ -1128,13 +1127,13 @@ export interface DataFrame * @param newColumn - New column to insert * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] - * ... }) - * > x = pl.Series("apple", [10, 20, 30]) - * > df.replaceAtIdx(0, x) + * ... }); + * > const x = pl.Series("apple", [10, 20, 30]); + * > df.replaceAtIdx(0, x); * shape: (3, 3) * ╭───────┬─────┬─────╮ * │ apple ┆ bar ┆ ham │ @@ -1155,11 +1154,11 @@ export interface DataFrame * @param index - row index * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] - * ... }) + * ... }); * > df.row(2) * [3, 8, 'c'] * ``` @@ -1176,12 +1175,12 @@ export interface DataFrame * @param columns - Column or columns to select. * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] - * ... }) - * > df.select('foo') + * ... }); + * > df.select('foo'); * shape: (3, 1) * ┌─────┐ * │ foo │ @@ -1204,12 +1203,12 @@ export interface DataFrame * @param periods - Number of places to shift (may be negative). * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] - * ... }) - * > df.shift(1) + * ... }); + * > df.shift(1); * shape: (3, 3) * ┌──────┬──────┬──────┐ * │ foo ┆ bar ┆ ham │ @@ -1248,12 +1247,12 @@ export interface DataFrame * @param opts.fillValue - fill null values with this value. * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] - * ... }) - * > df.shiftAndFill({n:1, fill_value:0}) + * ... }); + * > df.shiftAndFill({n:1, fill_value:0}); * shape: (3, 3) * ┌─────┬─────┬─────┐ * │ foo ┆ bar ┆ ham │ @@ -1290,12 +1289,12 @@ export interface DataFrame * @param opts.length - Length of the slice * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6.0, 7.0, 8.0], * ... "ham": ['a', 'b', 'c'] - * ... }) - * > df.slice(1, 2) // Alternatively `df.slice({offset:1, length:2})` + * ... }); + * > df.slice(1, 2); // Alternatively `df.slice({offset:1, length:2})` * shape: (2, 3) * ┌─────┬─────┬─────┐ * │ foo ┆ bar ┆ ham │ @@ -1335,12 +1334,12 @@ export interface DataFrame * ___ * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] - * ... }) - * > df.std() + * ... }); + * > df.std(); * shape: (1, 3) * ╭─────┬─────┬──────╮ * │ foo ┆ bar ┆ ham │ @@ -1366,11 +1365,11 @@ export interface DataFrame /** * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "letters": ["c", "c", "a", "c", "a", "b"], * ... "nrs": [1, 2, 3, 4, 5, 6] - * ... }) - * > df + * ... }); + * > console.log(df.toString()); * shape: (6, 2) * ╭─────────┬─────╮ * │ letters ┆ nrs │ @@ -1478,11 +1477,11 @@ export interface DataFrame * @param name Name for the struct Series * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "a": [1, 2, 3, 4, 5], * ... "b": ["one", "two", "three", "four", "five"], - * ... }) - * > df.toStruct("nums") + * ... }); + * > df.toStruct("nums"); * shape: (5,) * Series: 'nums' [struct[2]{'a': i64, 'b': str}] * [ @@ -1505,7 +1504,7 @@ export interface DataFrame * @param options.columnNames Optional generator/iterator that yields column names. Will be used to replace the columns in the DataFrame. * * @example - * > df = pl.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}) + * > const df = pl.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}); * > df.transpose({includeHeader:true}) * shape: (2, 4) * ┌────────┬──────────┬──────────┬──────────┐ @@ -1595,14 +1594,14 @@ export interface DataFrame @param names Names of the struct columns that will be decomposed by its fields @example ``` - > df = pl.DataFrame({ + > const df = pl.DataFrame({ ... "int": [1, 2], ... "str": ["a", "b"], ... "bool": [true, null], ... "list": [[1, 2], [3]], ... }) ... .toStruct("my_struct") - ... .toFrame() + ... .toFrame(); > df shape: (2, 1) ┌─────────────────────────────┐ @@ -1632,11 +1631,11 @@ export interface DataFrame * Aggregate the columns of this DataFrame to their variance value. * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * > "foo": [1, 2, 3], * > "bar": [6, 7, 8], * > "ham": ['a', 'b', 'c'] - * > }) + * > }); * > df.var() * shape: (1, 3) * ╭─────┬─────┬──────╮ @@ -1654,17 +1653,17 @@ export interface DataFrame * @param df - DataFrame to stack. * @example * ``` - * > df1 = pl.DataFrame({ + * > const df1 = pl.DataFrame({ * ... "foo": [1, 2], * ... "bar": [6, 7], * ... "ham": ['a', 'b'] - * ... }) - * > df2 = pl.DataFrame({ + * ... }); + * > const df2 = pl.DataFrame({ * ... "foo": [3, 4], * ... "bar": [8 , 9], * ... "ham": ['c', 'd'] - * ... }) - * > df1.vstack(df2) + * ... }); + * > df1.vstack(df2); * shape: (4, 3) * ╭─────┬─────┬─────╮ * │ foo ┆ bar ┆ ham │ @@ -1703,6 +1702,91 @@ export interface DataFrame withRowCount(name?: string): DataFrame; /** @see {@link filter} */ where(predicate: any): DataFrame; + /** + Upsample a DataFrame at a regular frequency. + + The `every` and `offset` arguments are created with the following string language: + - 1ns (1 nanosecond) + - 1us (1 microsecond) + - 1ms (1 millisecond) + - 1s (1 second) + - 1m (1 minute) + - 1h (1 hour) + - 1d (1 calendar day) + - 1w (1 calendar week) + - 1mo (1 calendar month) + - 1q (1 calendar quarter) + - 1y (1 calendar year) + - 1i (1 index count) + + Or combine them: + - "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds + + By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). + Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year". + + Parameters + ---------- + @param timeColumn Time column will be used to determine a date range. + Note that this column has to be sorted for the output to make sense. + @param every Interval will start 'every' duration. + @param offset Change the start of the date range by this offset. + @param by First group by these columns and then upsample for every group. + @param maintainOrder Keep the ordering predictable. This is slower. + + Returns + ------- + DataFrame + Result will be sorted by `timeColumn` (but note that if `by` columns are passed, it will only be sorted within each `by` group). + + Examples + -------- + Upsample a DataFrame by a certain interval. + + >>> const df = pl.DataFrame({ + "date": [ + new Date(2024, 1, 1), + new Date(2024, 3, 1), + new Date(2024, 4, 1), + new Date(2024, 5, 1), + ], + "groups": ["A", "B", "A", "B"], + "values": [0, 1, 2, 3], + }) + .withColumn(pl.col("date").cast(pl.Date).alias("date")) + .sort("date"); + + >>> df.upsample({timeColumn: "date", every: "1mo", offset: "0ns", by: "groups", maintainOrder: true}) + .select(pl.col("*").forwardFill()); +shape: (7, 3) +┌────────────┬────────┬────────┐ +│ date ┆ groups ┆ values │ +│ --- ┆ --- ┆ --- │ +│ date ┆ str ┆ f64 │ +╞════════════╪════════╪════════╡ +│ 2024-02-01 ┆ A ┆ 0.0 │ +│ 2024-03-01 ┆ A ┆ 0.0 │ +│ 2024-04-01 ┆ A ┆ 0.0 │ +│ 2024-05-01 ┆ A ┆ 2.0 │ +│ 2024-04-01 ┆ B ┆ 1.0 │ +│ 2024-05-01 ┆ B ┆ 1.0 │ +│ 2024-06-01 ┆ B ┆ 3.0 │ +└────────────┴────────┴────────┘ + */ + upsample( + timeColumn: string, + every: string, + offset?: string, + by?: string | string[], + maintainOrder?: boolean, + ): DataFrame; + upsample(opts: { + timeColumn: string; + every: string; + offset?: string; + by?: string | string[]; + maintainOrder?: boolean; + }): DataFrame; } function prepareOtherArg(anyValue: any): Series { @@ -1966,6 +2050,30 @@ export const _DataFrame = (_df: any): DataFrame => { by, ); }, + upsample(opts, every?, offset?, by?, maintainOrder?) { + let timeColumn; + if (typeof opts === "string") { + timeColumn = opts; + } else { + timeColumn = opts.timeColumn; + by = opts.by; + offset = opts.offset; + every = opts.every; + maintainOrder = opts.maintainOrder ?? false; + } + + if (typeof by === "string") { + by = [by]; + } else { + by = by ?? []; + } + + offset = offset ?? "0ns"; + + return _DataFrame( + _df.upsample(by, timeColumn, every, offset, maintainOrder), + ); + }, hashRows(obj: any = 0n, k1 = 1n, k2 = 2n, k3 = 3n) { if (typeof obj === "number" || typeof obj === "bigint") { return _Series( diff --git a/polars/lazy/expr/index.ts b/polars/lazy/expr/index.ts index e1e28d3ff..86c88af28 100644 --- a/polars/lazy/expr/index.ts +++ b/polars/lazy/expr/index.ts @@ -66,10 +66,10 @@ export interface Expr * @see {@link Expr.as} * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "a": [1, 2, 3], * ... "b": ["a", "b", None], - * ... }) + * ... }); * > df * shape: (3, 2) * ╭─────┬──────╮ @@ -150,12 +150,12 @@ export interface Expr * @param columns Column(s) to exclude from selection * @example * ``` - * >df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "a": [1, 2, 3], * ... "b": ["a", "b", None], * ... "c": [None, 2, 1], - * ...}) - * >df + * ...}); + * > df * shape: (3, 3) * ╭─────┬──────┬──────╮ * │ a ┆ b ┆ c │ @@ -168,9 +168,9 @@ export interface Expr * ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤ * │ 3 ┆ null ┆ 1 │ * ╰─────┴──────┴──────╯ - * >df.select( + * > df.select( * ... pl.col("*").exclude("b"), - * ... ) + * ... ); * shape: (3, 2) * ╭─────┬──────╮ * │ a ┆ c │ @@ -253,13 +253,13 @@ export interface Expr * @returns Expr that evaluates to a Boolean Series. * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "sets": [[1, 2, 3], [1, 2], [9, 10]], * ... "optional_members": [1, 2, 3] - * ... }) + * ... }); * > df.select( * ... pl.col("optional_members").isIn("sets").alias("contains") - * ... ) + * ... ); * shape: (3, 1) * ┌──────────┐ * │ contains │ @@ -294,15 +294,15 @@ export interface Expr * With `keepName` we can keep the original name of the column * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "a": [1, 2, 3], * ... "b": ["a", "b", None], - * ... }) + * ... }); * * > df * ... .groupBy("a") * ... .agg(pl.col("b").list()) - * ... .sort({by:"a"}) + * ... .sort({by:"a"}); * * shape: (3, 2) * ╭─────┬────────────╮ @@ -374,13 +374,13 @@ export interface Expr * * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "groups": [1, 1, 2, 2, 1, 2, 3, 3, 1], * ... "values": [1, 2, 3, 4, 5, 6, 7, 8, 8], - * ... }) + * ... }); * > df.select( * ... pl.col("groups").sum().over("groups") - * ... ) + * ... ); * ╭────────┬────────╮ * │ groups ┆ values │ * │ --- ┆ --- │ @@ -416,12 +416,12 @@ export interface Expr * Add a prefix the to root column name of the expression. * @example * ``` - * > df = pl.DataFrame({ + * > const df = pl.DataFrame({ * ... "A": [1, 2, 3, 4, 5], * ... "fruits": ["banana", "banana", "apple", "apple", "banana"], * ... "B": [5, 4, 3, 2, 1], * ... "cars": ["beetle", "audi", "beetle", "beetle", "beetle"], - * ... }) + * ... }); * shape: (5, 4) * ╭─────┬──────────┬─────┬──────────╮ * │ A ┆ fruits ┆ B ┆ cars │ @@ -439,7 +439,7 @@ export interface Expr * │ 5 ┆ "banana" ┆ 1 ┆ "beetle" │ * ╰─────┴──────────┴─────┴──────────╯ * > df.select( - * ... pl.all().reverse().prefix("reverse_"), + * ... pl.col("*").reverse().prefix("reverse_"), * ... ) * shape: (5, 8) * ╭───────────┬────────────────┬───────────┬──────────────╮ @@ -528,8 +528,7 @@ export interface Expr Parameters ---------- - @param by - The column(s) used for sorting. + @param by The column(s) used for sorting. @param reverse false -> order from small to large. true -> order from large to small. diff --git a/yarn.lock b/yarn.lock index 57c5028d3..29a979928 100644 --- a/yarn.lock +++ b/yarn.lock @@ -408,18 +408,18 @@ __metadata: languageName: node linkType: hard -"@biomejs/biome@npm:^1.5.1": - version: 1.5.1 - resolution: "@biomejs/biome@npm:1.5.1" - dependencies: - "@biomejs/cli-darwin-arm64": 1.5.1 - "@biomejs/cli-darwin-x64": 1.5.1 - "@biomejs/cli-linux-arm64": 1.5.1 - "@biomejs/cli-linux-arm64-musl": 1.5.1 - "@biomejs/cli-linux-x64": 1.5.1 - "@biomejs/cli-linux-x64-musl": 1.5.1 - "@biomejs/cli-win32-arm64": 1.5.1 - "@biomejs/cli-win32-x64": 1.5.1 +"@biomejs/biome@npm:^1.5.3": + version: 1.5.3 + resolution: "@biomejs/biome@npm:1.5.3" + dependencies: + "@biomejs/cli-darwin-arm64": 1.5.3 + "@biomejs/cli-darwin-x64": 1.5.3 + "@biomejs/cli-linux-arm64": 1.5.3 + "@biomejs/cli-linux-arm64-musl": 1.5.3 + "@biomejs/cli-linux-x64": 1.5.3 + "@biomejs/cli-linux-x64-musl": 1.5.3 + "@biomejs/cli-win32-arm64": 1.5.3 + "@biomejs/cli-win32-x64": 1.5.3 dependenciesMeta: "@biomejs/cli-darwin-arm64": optional: true @@ -439,62 +439,62 @@ __metadata: optional: true bin: biome: bin/biome - checksum: d8dadc2e9b06544a109944b23b812f9da0da456b35d8661518665a295cecd4914940253182f135f34aa7e17d09e4d2d85aa785595ae60aa94fa6aadc8c2ce207 + checksum: 5155dc05ca3a9fd0228eb74df4f21d0ab311e4b02452c6b3dc5615825eab402df6824992014ff19f82bed49fd286e0001ebe608efdba71a41f3f6c06305b1d7c languageName: node linkType: hard -"@biomejs/cli-darwin-arm64@npm:1.5.1": - version: 1.5.1 - resolution: "@biomejs/cli-darwin-arm64@npm:1.5.1" +"@biomejs/cli-darwin-arm64@npm:1.5.3": + version: 1.5.3 + resolution: "@biomejs/cli-darwin-arm64@npm:1.5.3" conditions: os=darwin & cpu=arm64 languageName: node linkType: hard -"@biomejs/cli-darwin-x64@npm:1.5.1": - version: 1.5.1 - resolution: "@biomejs/cli-darwin-x64@npm:1.5.1" +"@biomejs/cli-darwin-x64@npm:1.5.3": + version: 1.5.3 + resolution: "@biomejs/cli-darwin-x64@npm:1.5.3" conditions: os=darwin & cpu=x64 languageName: node linkType: hard -"@biomejs/cli-linux-arm64-musl@npm:1.5.1": - version: 1.5.1 - resolution: "@biomejs/cli-linux-arm64-musl@npm:1.5.1" +"@biomejs/cli-linux-arm64-musl@npm:1.5.3": + version: 1.5.3 + resolution: "@biomejs/cli-linux-arm64-musl@npm:1.5.3" conditions: os=linux & cpu=arm64 & libc=musl languageName: node linkType: hard -"@biomejs/cli-linux-arm64@npm:1.5.1": - version: 1.5.1 - resolution: "@biomejs/cli-linux-arm64@npm:1.5.1" +"@biomejs/cli-linux-arm64@npm:1.5.3": + version: 1.5.3 + resolution: "@biomejs/cli-linux-arm64@npm:1.5.3" conditions: os=linux & cpu=arm64 & libc=glibc languageName: node linkType: hard -"@biomejs/cli-linux-x64-musl@npm:1.5.1": - version: 1.5.1 - resolution: "@biomejs/cli-linux-x64-musl@npm:1.5.1" +"@biomejs/cli-linux-x64-musl@npm:1.5.3": + version: 1.5.3 + resolution: "@biomejs/cli-linux-x64-musl@npm:1.5.3" conditions: os=linux & cpu=x64 & libc=musl languageName: node linkType: hard -"@biomejs/cli-linux-x64@npm:1.5.1": - version: 1.5.1 - resolution: "@biomejs/cli-linux-x64@npm:1.5.1" +"@biomejs/cli-linux-x64@npm:1.5.3": + version: 1.5.3 + resolution: "@biomejs/cli-linux-x64@npm:1.5.3" conditions: os=linux & cpu=x64 & libc=glibc languageName: node linkType: hard -"@biomejs/cli-win32-arm64@npm:1.5.1": - version: 1.5.1 - resolution: "@biomejs/cli-win32-arm64@npm:1.5.1" +"@biomejs/cli-win32-arm64@npm:1.5.3": + version: 1.5.3 + resolution: "@biomejs/cli-win32-arm64@npm:1.5.3" conditions: os=win32 & cpu=arm64 languageName: node linkType: hard -"@biomejs/cli-win32-x64@npm:1.5.1": - version: 1.5.1 - resolution: "@biomejs/cli-win32-x64@npm:1.5.1" +"@biomejs/cli-win32-x64@npm:1.5.3": + version: 1.5.3 + resolution: "@biomejs/cli-win32-x64@npm:1.5.3" conditions: os=win32 & cpu=x64 languageName: node linkType: hard @@ -824,12 +824,12 @@ __metadata: languageName: node linkType: hard -"@napi-rs/cli@npm:^2.17.0": - version: 2.17.0 - resolution: "@napi-rs/cli@npm:2.17.0" +"@napi-rs/cli@npm:^2.18.0": + version: 2.18.0 + resolution: "@napi-rs/cli@npm:2.18.0" bin: napi: scripts/index.js - checksum: f86f5e2ecea589605dd245e647f5de9e60b533911ad3ef59b8c92cddbb9e127696b1d21255c50a857a8a4212418ba0a91800d6051aeffe4380720cda66961a63 + checksum: eadff1dda564416b66db44f5ea7088712f8cf66f6677082197e6d3ce5a57d9eabeb0d091b4d1685e8a4bd275ff1de684fca1ae84edd0f66dac82cb328acc068c languageName: node linkType: hard @@ -991,13 +991,13 @@ __metadata: languageName: node linkType: hard -"@types/jest@npm:^29.5.11": - version: 29.5.11 - resolution: "@types/jest@npm:29.5.11" +"@types/jest@npm:^29.5.12": + version: 29.5.12 + resolution: "@types/jest@npm:29.5.12" dependencies: expect: ^29.0.0 pretty-format: ^29.0.0 - checksum: f892a06ec9f0afa9a61cd7fa316ec614e21d4df1ad301b5a837787e046fcb40dfdf7f264a55e813ac6b9b633cb9d366bd5b8d1cea725e84102477b366df23fdd + checksum: 19b1efdeed9d9a60a81edc8226cdeae5af7479e493eaed273e01243891c9651f7b8b4c08fc633a7d0d1d379b091c4179bbaa0807af62542325fd72f2dd17ce1c languageName: node linkType: hard @@ -1008,12 +1008,12 @@ __metadata: languageName: node linkType: hard -"@types/node@npm:^20.10.8": - version: 20.10.8 - resolution: "@types/node@npm:20.10.8" +"@types/node@npm:^20.11.17": + version: 20.11.17 + resolution: "@types/node@npm:20.11.17" dependencies: undici-types: ~5.26.4 - checksum: ce9b7ee545b3605f667be2ea900e38ab58d7b561192a7342443e5d7f61c44fd9d016eac48e95d3011f090ceea65a727e83a31d51fabdd9fc20ff9992edcbc682 + checksum: 59c0dde187120adc97da30063c86511664b24b50fe777abfe1f557c217d0a0b84a68aaab5ef8ac44f5c2986b3f9cd605a15fa6e4f31195e594da96bbe9617c20 languageName: node linkType: hard @@ -3087,17 +3087,17 @@ __metadata: version: 0.0.0-use.local resolution: "nodejs-polars@workspace:." dependencies: - "@biomejs/biome": ^1.5.1 - "@napi-rs/cli": ^2.17.0 + "@biomejs/biome": ^1.5.3 + "@napi-rs/cli": ^2.18.0 "@types/chance": ^1.1.6 - "@types/jest": ^29.5.11 - "@types/node": ^20.10.8 + "@types/jest": ^29.5.12 + "@types/node": ^20.11.17 chance: ^1.1.11 jest: ^29.7.0 source-map-support: ^0.5.21 - ts-jest: ^29.1.1 + ts-jest: ^29.1.2 ts-node: ^10.9.2 - typedoc: ^0.25.7 + typedoc: ^0.25.8 typescript: 5.3.3 languageName: unknown linkType: soft @@ -3758,9 +3758,9 @@ __metadata: languageName: node linkType: hard -"ts-jest@npm:^29.1.1": - version: 29.1.1 - resolution: "ts-jest@npm:29.1.1" +"ts-jest@npm:^29.1.2": + version: 29.1.2 + resolution: "ts-jest@npm:29.1.2" dependencies: bs-logger: 0.x fast-json-stable-stringify: 2.x @@ -3787,7 +3787,7 @@ __metadata: optional: true bin: ts-jest: cli.js - checksum: a8c9e284ed4f819526749f6e4dc6421ec666f20ab44d31b0f02b4ed979975f7580b18aea4813172d43e39b29464a71899f8893dd29b06b4a351a3af8ba47b402 + checksum: a0ce0affc1b716c78c9ab55837829c42cb04b753d174a5c796bb1ddf9f0379fc20647b76fbe30edb30d9b23181908138d6b4c51ef2ae5e187b66635c295cefd5 languageName: node linkType: hard @@ -3843,9 +3843,9 @@ __metadata: languageName: node linkType: hard -"typedoc@npm:^0.25.7": - version: 0.25.7 - resolution: "typedoc@npm:0.25.7" +"typedoc@npm:^0.25.8": + version: 0.25.8 + resolution: "typedoc@npm:0.25.8" dependencies: lunr: ^2.3.9 marked: ^4.3.0 @@ -3855,7 +3855,7 @@ __metadata: typescript: 4.6.x || 4.7.x || 4.8.x || 4.9.x || 5.0.x || 5.1.x || 5.2.x || 5.3.x bin: typedoc: bin/typedoc - checksum: 49c3bf923a3c9401b549e5843f8efaaac8fa28f8ec6bd8617187b5d9ba9932a3fa63dc3863b82389507ffc7d92908af0dce33780fffb4970cd0833274f6fa0cf + checksum: c81992d791ef267e5f94ce0bf0ee98452ac772f4a12a96ad3b18520e7a1f9ba5f6908e48d5e73330fdedb7bfd0b9e0a74073afa5ef75775f76f1c6f3f3fce3a3 languageName: node linkType: hard