diff --git a/js/src/predicate.ts b/js/src/predicate.ts index 1553bca7d1903..00aa639663619 100644 --- a/js/src/predicate.ts +++ b/js/src/predicate.ts @@ -61,7 +61,7 @@ export class Col extends Value { } if (this.colidx < 0) { throw new Error(`Failed to bind Col "${this.name}"`); } } - this.vector = batch.columns[this.colidx]; + this.vector = batch.getChildAt(this.colidx); return this.vector.get.bind(this.vector); } diff --git a/js/src/recordbatch.ts b/js/src/recordbatch.ts index 93783fcc6718f..19bd145b15b4d 100644 --- a/js/src/recordbatch.ts +++ b/js/src/recordbatch.ts @@ -18,7 +18,7 @@ import { Schema, Struct } from './type'; import { flatbuffers } from 'flatbuffers'; import { View, Vector, StructVector } from './vector'; -import { Data, NestedData, ChunkedData } from './data'; +import { Data, NestedData } from './data'; import Long = flatbuffers.Long; @@ -32,7 +32,6 @@ export class RecordBatch extends StructVector { public readonly schema: Schema; public readonly length: number; public readonly numCols: number; - public readonly columns: Vector[]; constructor(schema: Schema, data: Data, view: View); constructor(schema: Schema, numRows: Long | number, cols: Data | Vector[]); constructor(...args: any[]) { @@ -42,9 +41,6 @@ export class RecordBatch extends StructVector { this.schema = args[0]; this.length = data.length; this.numCols = this.schema.fields.length; - this.columns = data instanceof ChunkedData - ? data.childVectors - : data.childData.map((col) => Vector.create(col)); } else { const [schema, numRows, cols] = args; const columns: Vector[] = new Array(cols.length); @@ -59,7 +55,6 @@ export class RecordBatch extends StructVector { } super(new NestedData(new Struct(schema.fields), numRows, null, columnsData)); this.schema = schema; - this.columns = columns; this.length = numRows; this.numCols = schema.fields.length; } @@ -72,7 +67,7 @@ export class RecordBatch extends StructVector { const namesToKeep = columnNames.reduce((xs, x) => (xs[x] = true) && xs, Object.create(null)); return new RecordBatch( this.schema.select(...columnNames), this.length, - this.columns.filter((_, index) => namesToKeep[fields[index].name]) + this.childData.filter((_, index) => namesToKeep[fields[index].name]) ); } } diff --git a/js/src/table.ts b/js/src/table.ts index af30bf83c5df3..fb324ab438b38 100644 --- a/js/src/table.ts +++ b/js/src/table.ts @@ -68,7 +68,7 @@ export class Table implements DataFrame { // List of inner RecordBatches public readonly batches: RecordBatch[]; // List of inner Vectors, possibly spanning batches - public readonly columns: Vector[]; + protected readonly _columns: Vector[] = []; // Union of all inner RecordBatches into one RecordBatch, possibly chunked. // If the Table has just one inner RecordBatch, this points to that. // If the Table has multiple inner RecordBatches, then this is a Chunked view @@ -94,10 +94,7 @@ export class Table implements DataFrame { this.schema = schema; this.batches = batches; this.batchesUnion = batches.reduce((union, batch) => union.concat(batch)); - this.columns = batches.slice(1).reduce((columns, batch) => - columns.map((col, idx) => col.concat(batch.columns[idx])), - batches[0].columns - ); + // this.columns = schema.fields.map((_, i) => this.batchesUnion.getChildAt(i)); this.length = this.batchesUnion.length; this.numCols = this.batchesUnion.numCols; } @@ -108,7 +105,8 @@ export class Table implements DataFrame { return this.getColumnAt(this.getColumnIndex(name)); } public getColumnAt(index: number) { - return this.columns[index]; + return this._columns[index] || ( + this._columns[index] = this.batchesUnion.getChildAt(index)); } public getColumnIndex(name: string) { return this.schema.fields.findIndex((f) => f.name === name); @@ -265,7 +263,8 @@ export class CountByResult extends Table implements DataFrame { )); } public toJSON(): Object { - const [values, counts] = this.columns; + const values = this.getColumnAt(0); + const counts = this.getColumnAt(1); const result = {} as { [k: string]: number | null }; for (let i = -1; ++i < this.length;) { result[values.get(i)] = counts.get(i); diff --git a/js/src/vector.ts b/js/src/vector.ts index 60315857ed851..a1b5cb3405017 100644 --- a/js/src/vector.ts +++ b/js/src/vector.ts @@ -35,6 +35,9 @@ export class Vector implements VectorLike, View, Vi public static create(data: Data): Vector { return createVector(data); } + public static concat(...sources: Vector[]): Vector { + return sources.length === 1 ? sources[0] : sources.reduce((a, b) => a.concat(b)); + } public type: T; public length: number; public readonly data: Data; @@ -84,7 +87,7 @@ export class Vector implements VectorLike, View, Vi const { view } = this; const vecs = !(view instanceof ChunkedView) ? [this, ...others] - : [...view.chunks, ...others]; + : [...view.childVectors, ...others]; const offsets = ChunkedData.computeOffsets(vecs); const chunksLength = offsets[offsets.length - 1]; const chunkedData = new ChunkedData(this.type, chunksLength, vecs, 0, -1, offsets); @@ -377,7 +380,7 @@ export class DictionaryVector extends Vector[]; + const chunks = view.childVectors as DictionaryVector[]; // Assume the last chunk's dictionary data is the most up-to-date, // including data from DictionaryBatches that were marked as deltas this.dictionary = chunks[chunks.length - 1].dictionary; diff --git a/js/src/vector/chunked.ts b/js/src/vector/chunked.ts index 96317fc57350b..9f5141f2cd430 100644 --- a/js/src/vector/chunked.ts +++ b/js/src/vector/chunked.ts @@ -16,31 +16,37 @@ // under the License. import { ChunkedData } from '../data'; -import { View, Vector } from '../vector'; +import { View, Vector, NestedVector } from '../vector'; import { DataType, TypedArray, IterableArrayLike } from '../type'; export class ChunkedView implements View { - public chunks: Vector[]; - public offsets: Uint32Array; + public childVectors: Vector[]; + public childOffsets: Uint32Array; + protected _childColumns: Vector[]; constructor(data: ChunkedData) { - this.chunks = data.childVectors; - this.offsets = data.childOffsets; + this.childVectors = data.childVectors; + this.childOffsets = data.childOffsets; } public clone(data: ChunkedData): this { return new ChunkedView(data) as this; } public *[Symbol.iterator](): IterableIterator { - for (const vector of this.chunks) { + for (const vector of this.childVectors) { yield* vector; } } + public getChildAt(index: number) { + return (this._childColumns || (this._childColumns = []))[index] || ( + this._childColumns[index] = Vector.concat( + ...( this.childVectors as NestedVector[]).map((v) => v.getChildAt(index)))); + } public isValid(index: number): boolean { // binary search to find the child vector and value index offset (inlined for speed) - let offsets = this.offsets, pos = 0; + let offsets = this.childOffsets, pos = 0; let lhs = 0, mid = 0, rhs = offsets.length - 1; while (index < offsets[rhs] && index >= (pos = offsets[lhs])) { if (lhs + 1 === rhs) { - return this.chunks[lhs].isValid(index - pos); + return this.childVectors[lhs].isValid(index - pos); } mid = lhs + ((rhs - lhs) / 2) | 0; index >= offsets[mid] ? (lhs = mid) : (rhs = mid); @@ -49,11 +55,11 @@ export class ChunkedView implements View { } public get(index: number): T['TValue'] | null { // binary search to find the child vector and value index offset (inlined for speed) - let offsets = this.offsets, pos = 0; + let offsets = this.childOffsets, pos = 0; let lhs = 0, mid = 0, rhs = offsets.length - 1; while (index < offsets[rhs] && index >= (pos = offsets[lhs])) { if (lhs + 1 === rhs) { - return this.chunks[lhs].get(index - pos); + return this.childVectors[lhs].get(index - pos); } mid = lhs + ((rhs - lhs) / 2) | 0; index >= offsets[mid] ? (lhs = mid) : (rhs = mid); @@ -62,18 +68,18 @@ export class ChunkedView implements View { } public set(index: number, value: T['TValue'] | null): void { // binary search to find the child vector and value index offset (inlined for speed) - let offsets = this.offsets, pos = 0; + let offsets = this.childOffsets, pos = 0; let lhs = 0, mid = 0, rhs = offsets.length - 1; while (index < offsets[rhs] && index >= (pos = offsets[lhs])) { if (lhs + 1 === rhs) { - return this.chunks[lhs].set(index - pos, value); + return this.childVectors[lhs].set(index - pos, value); } mid = lhs + ((rhs - lhs) / 2) | 0; index >= offsets[mid] ? (lhs = mid) : (rhs = mid); } } public toArray(): IterableArrayLike { - const chunks = this.chunks; + const chunks = this.childVectors; const numChunks = chunks.length; if (numChunks === 1) { return chunks[0].toArray(); diff --git a/js/src/vector/nested.ts b/js/src/vector/nested.ts index 17e98294c835a..78e2a988e6d5d 100644 --- a/js/src/vector/nested.ts +++ b/js/src/vector/nested.ts @@ -16,23 +16,23 @@ // under the License. import { Data } from '../data'; +import { View, Vector } from '../vector'; import { IterableArrayLike } from '../type'; -import { View, Vector, createVector } from '../vector'; import { DataType, NestedType, DenseUnion, SparseUnion, Struct, Map_ } from '../type'; export abstract class NestedView implements View { public length: number; public numChildren: number; public childData: Data[]; - protected children: Vector[]; + protected _childColumns: Vector[]; constructor(data: Data, children?: Vector[]) { this.length = data.length; this.childData = data.childData; this.numChildren = data.childData.length; - this.children = children || new Array(this.numChildren); + this._childColumns = children || new Array(this.numChildren); } public clone(data: Data): this { - return new ( this.constructor)(data, this.children) as this; + return new ( this.constructor)(data, this._childColumns) as this; } public isValid(): boolean { return true; @@ -53,8 +53,8 @@ export abstract class NestedView implements View { protected abstract getNested(self: NestedView, index: number): T['TValue']; protected abstract setNested(self: NestedView, index: number, value: T['TValue']): void; public getChildAt(index: number) { - return this.children[index] || ( - this.children[index] = createVector(this.childData[index])); + return this._childColumns[index] || ( + this._childColumns[index] = Vector.create(this.childData[index])); } public *[Symbol.iterator](): IterableIterator { const get = this.getNested; @@ -120,7 +120,7 @@ export class DenseUnionView extends UnionView { export class StructView extends NestedView { protected getNested(self: StructView, index: number) { - return new RowView(self as any, self.children, index); + return new RowView(self as any, self._childColumns, index); } protected setNested(self: StructView, index: number, value: any): void { let idx = -1, len = self.numChildren; @@ -140,7 +140,7 @@ export class MapView extends NestedView { (xs[x.name] = i) && xs || xs, Object.create(null)); } protected getNested(self: MapView, index: number) { - return new MapRowView(self as any, self.children, index); + return new MapRowView(self as any, self._childColumns, index); } protected setNested(self: MapView, index: number, value: { [k: string]: any }): void { const typeIds = self.typeIds as any; @@ -160,7 +160,7 @@ export class RowView extends UnionView { this.length = data.numChildren; } public clone(data: Data & NestedView): this { - return new ( this.constructor)(data, this.children, this.rowIndex) as this; + return new ( this.constructor)(data, this._childColumns, this.rowIndex) as this; } protected getChildValue(self: RowView, index: number, _typeIds: any, _valueOffsets?: any): any | null { const child = self.getChildAt(index); diff --git a/js/test/integration/validate-tests.ts b/js/test/integration/validate-tests.ts index b9d3b1ba481c4..f81634279c943 100644 --- a/js/test/integration/validate-tests.ts +++ b/js/test/integration/validate-tests.ts @@ -132,8 +132,8 @@ function testReaderIntegration(jsonData: any, arrowBuffer: Uint8Array) { expect(jsonRecordBatch.length).toEqual(binaryRecordBatch.length); expect(jsonRecordBatch.numCols).toEqual(binaryRecordBatch.numCols); for (let i = -1, n = jsonRecordBatch.numCols; ++i < n;) { - (jsonRecordBatch.columns[i] as any).name = jsonRecordBatch.schema.fields[i].name; - (expect(jsonRecordBatch.columns[i]) as any).toEqualVector(binaryRecordBatch.columns[i]); + (jsonRecordBatch.getChildAt(i) as any).name = jsonRecordBatch.schema.fields[i].name; + (expect(jsonRecordBatch.getChildAt(i)) as any).toEqualVector(binaryRecordBatch.getChildAt(i)); } } }); @@ -147,8 +147,8 @@ function testTableFromBuffersIntegration(jsonData: any, arrowBuffer: Uint8Array) expect(jsonTable.length).toEqual(binaryTable.length); expect(jsonTable.numCols).toEqual(binaryTable.numCols); for (let i = -1, n = jsonTable.numCols; ++i < n;) { - (jsonTable.columns[i] as any).name = jsonTable.schema.fields[i].name; - (expect(jsonTable.columns[i]) as any).toEqualVector(binaryTable.columns[i]); + (jsonTable.getColumnAt(i) as any).name = jsonTable.schema.fields[i].name; + (expect(jsonTable.getColumnAt(i)) as any).toEqualVector(binaryTable.getColumnAt(i)); } }); } diff --git a/js/test/unit/table-tests.ts b/js/test/unit/table-tests.ts index ff67c04f85e2a..cdddcd58d3c16 100644 --- a/js/test/unit/table-tests.ts +++ b/js/test/unit/table-tests.ts @@ -143,7 +143,8 @@ describe(`Table`, () => { test(`scans expected values`, () => { let expected_idx = 0; table.scan((idx, batch) => { - expect(batch.columns.map((c) => c.get(idx))).toEqual(values[expected_idx++]); + const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i)); + expect(columns.map((c) => c.get(idx))).toEqual(values[expected_idx++]); }); }); test(`count() returns the correct length`, () => { @@ -348,7 +349,8 @@ describe(`Table`, () => { test(`scans expected values`, () => { let expected_idx = 0; table.scan((idx, batch) => { - expect(batch.columns.map((c) => c.get(idx))).toEqual(values[expected_idx++]); + const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i)); + expect(columns.map((c) => c.get(idx))).toEqual(values[expected_idx++]); }); }); test(`count() returns the correct length`, () => {