diff --git a/js/src/compute/dataframe.ts b/js/src/compute/dataframe.ts index 2d9962d53178d..e82e65cc1b930 100644 --- a/js/src/compute/dataframe.ts +++ b/js/src/compute/dataframe.ts @@ -31,6 +31,7 @@ export type NextFunc = (idx: number, batch: RecordBatch) => void; Table.prototype.countBy = function(this: Table, name: Col | string) { return new DataFrame(this.chunks).countBy(name); }; Table.prototype.scan = function(this: Table, next: NextFunc, bind?: BindFunc) { return new DataFrame(this.chunks).scan(next, bind); }; +Table.prototype.scanReverse = function(this: Table, next: NextFunc, bind?: BindFunc) { return new DataFrame(this.chunks).scanReverse(next, bind); }; Table.prototype.filter = function(this: Table, predicate: Predicate): FilteredDataFrame { return new DataFrame(this.chunks).filter(predicate); }; export class DataFrame extends Table { @@ -49,6 +50,18 @@ export class DataFrame extends Tabl } } } + public scanReverse(next: NextFunc, bind?: BindFunc) { + const batches = this.chunks, numBatches = batches.length; + for (let batchIndex = numBatches; --batchIndex >= 0;) { + // load batches + const batch = batches[batchIndex]; + if (bind) { bind(batch); } + // yield all indices + for (let index = batch.length; --index >= 0;) { + next(index, batch); + } + } + } public countBy(name: Col | string) { const batches = this.chunks, numBatches = batches.length; const count_by = typeof name === 'string' ? new Col(name) : name as Col; @@ -130,6 +143,23 @@ export class FilteredDataFrame exte } } } + public scanReverse(next: NextFunc, bind?: BindFunc) { + const batches = this._chunks; + const numBatches = batches.length; + for (let batchIndex = numBatches; --batchIndex >= 0;) { + // load batches + const batch = batches[batchIndex]; + // TODO: bind batches lazily + // If predicate doesn't match anything in the batch we don't need + // to bind the callback + if (bind) { bind(batch); } + const predicate = this._predicate.bind(batch); + // yield all indices + for (let index = batch.length; --index >= 0;) { + if (predicate(index, batch)) { next(index, batch); } + } + } + } public count(): number { // inlined version of this: // let sum = 0; diff --git a/js/src/table.ts b/js/src/table.ts index b7cdbe221270a..5c41e14a9f52d 100644 --- a/js/src/table.ts +++ b/js/src/table.ts @@ -44,6 +44,7 @@ export interface Table { clone(chunks?: RecordBatch[], offsets?: Uint32Array): Table; scan(next: import('./compute/dataframe').NextFunc, bind?: import('./compute/dataframe').BindFunc): void; + scanReverse(next: import('./compute/dataframe').NextFunc, bind?: import('./compute/dataframe').BindFunc): void; countBy(name: import('./compute/predicate').Col | string): import('./compute/dataframe').CountByResult; filter(predicate: import('./compute/predicate').Predicate): import('./compute/dataframe').FilteredDataFrame; } diff --git a/js/test/unit/table-tests.ts b/js/test/unit/table-tests.ts index 22c50a7737b8a..ae2f058e7daee 100644 --- a/js/test/unit/table-tests.ts +++ b/js/test/unit/table-tests.ts @@ -326,6 +326,24 @@ describe(`Table`, () => { } }); }); + describe(`scanReverse()`, () => { + test(`yields all values`, () => { + const table = datum.table(); + let expected_idx = values.length; + table.scanReverse((idx, batch) => { + const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i)!); + expect(columns.map((c) => c.get(idx))).toEqual(values[--expected_idx]); + }); + }); + test(`calls bind function with every batch`, () => { + const table = datum.table(); + let bind = jest.fn(); + table.scanReverse(() => { }, bind); + for (let batch of table.chunks) { + expect(bind).toHaveBeenCalledWith(batch); + } + }); + }); test(`count() returns the correct length`, () => { const table = datum.table(); const values = datum.values(); @@ -434,6 +452,26 @@ describe(`Table`, () => { } }); }); + describe(`scanReverse()`, () => { + test(`iterates over expected values in reverse`, () => { + let expected_idx = expected.length; + filtered.scanReverse((idx, batch) => { + const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i)!); + expect(columns.map((c) => c.get(idx))).toEqual(expected[--expected_idx]); + }); + }); + test(`calls bind function on every batch`, () => { + // Techincally, we only need to call bind on + // batches with data that match the predicate, so + // this test may fail in the future if we change + // that - and that's ok! + let bind = jest.fn(); + filtered.scanReverse(() => { }, bind); + for (let batch of table.chunks) { + expect(bind).toHaveBeenCalledWith(batch); + } + }); + }); }); } test(`countBy on dictionary returns the correct counts`, () => {