Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correctly read and generate boolean values for Arrow format #561

Merged
merged 3 commits into from
May 8, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 53 additions & 1 deletion cpp/perspective/src/cpp/emscripten.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,11 @@ namespace binding {
return t.to_double();
}
template <>
std::uint8_t
get_scalar<std::uint8_t>(t_tscalar& t) {
return static_cast<std::uint8_t>(t.to_int64());
}
template <>
std::int8_t
get_scalar<std::int8_t>(t_tscalar& t) {
return static_cast<std::int8_t>(t.to_int64());
Expand Down Expand Up @@ -557,27 +562,71 @@ namespace binding {
int data_size = data.size() - start_idx;
std::vector<T> vals;
vals.reserve(data.size());

// Validity map must have a length that is a multiple of 64
int nullSize = ceil(data_size / 64.0) * 2;
int nullCount = 0;
std::vector<std::uint32_t> validityMap;
validityMap.resize(nullSize);

for (int idx = 0; idx < data.size() - start_idx; idx++) {
t_tscalar scalar = data[idx + start_idx];
if (scalar.is_valid() && scalar.get_dtype() != DTYPE_NONE) {
vals.push_back(get_scalar<F, T>(scalar));
// Mark the slot as non-null (valid)
validityMap[idx / 32] |= 1 << (idx % 32);
} else {
vals.push_back({});
nullCount++;
}
}

val arr = val::global("Array").new_();
arr.call<void>("push", typed_array<O>.new_(vector_to_typed_array(vals)["buffer"]));
arr.call<void>("push", nullCount);
arr.call<void>("push", vector_to_typed_array(validityMap));
return arr;
}

template <>
val
col_to_typed_array<bool>(std::vector<t_tscalar> data, bool column_pivot_only) {
int start_idx = column_pivot_only ? 1 : 0;
int data_size = data.size() - start_idx;

std::vector<std::int8_t> vals;
vals.reserve(data.size());

// Validity map must have a length that is a multiple of 64
int nullSize = ceil(data_size / 64.0) * 2;
int nullCount = 0;
std::vector<std::uint32_t> validityMap;
validityMap.resize(nullSize);

for (int idx = 0; idx < data.size() - start_idx; idx++) {
t_tscalar scalar = data[idx + start_idx];
if (scalar.is_valid() && scalar.get_dtype() != DTYPE_NONE) {
// get boolean and write into array
std::int8_t val = get_scalar<std::int8_t>(scalar);
vals.push_back(val);
// bit mask based on value in array
vals[idx / 8] |= val << (idx % 8);
// Mark the slot as non-null (valid)
validityMap[idx / 32] |= 1 << (idx % 32);
} else {
vals.push_back({});
nullCount++;
}
}

val arr = val::global("Array").new_();
arr.call<void>(
"push", typed_array<std::int8_t>.new_(vector_to_typed_array(vals)["buffer"]));
arr.call<void>("push", nullCount);
arr.call<void>("push", vector_to_typed_array(validityMap));
return arr;
}

template <>
val
col_to_typed_array<std::string>(std::vector<t_tscalar> data, bool column_pivot_only) {
Expand Down Expand Up @@ -663,6 +712,9 @@ namespace binding {
case DTYPE_FLOAT64: {
return col_to_typed_array<double>(data, column_pivot_only);
} break;
case DTYPE_BOOL: {
return col_to_typed_array<bool>(data, column_pivot_only);
} break;
case DTYPE_STR: {
return col_to_typed_array<std::string>(data, column_pivot_only);
} break;
Expand Down Expand Up @@ -784,7 +836,7 @@ namespace binding {
t_uindex nrows = col->size();

if (is_arrow) {
// arrow packs bools into a bitmap
// bools are stored using a bit mask
val data = accessor["values"];
for (auto i = 0; i < nrows; ++i) {
std::uint8_t elem = data[i / 8].as<std::uint8_t>();
Expand Down
5 changes: 4 additions & 1 deletion packages/perspective/src/js/perspective.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import {Visitor} from "@apache-arrow/es5-esm/visitor";
import {Data} from "@apache-arrow/es5-esm/data";
import {Vector} from "@apache-arrow/es5-esm/vector";

import {Utf8, Uint32, Float64, Int32, TimestampSecond, Dictionary} from "@apache-arrow/es5-esm/type";
import {Utf8, Uint32, Float64, Int32, Bool, TimestampSecond, Dictionary} from "@apache-arrow/es5-esm/type";

import formatters from "./view_formatters";
import papaparse from "papaparse";
Expand Down Expand Up @@ -549,6 +549,9 @@ export default function(Module) {
} else if (type === "integer") {
const [vals, nullCount, nullArray] = await this.col_to_js_typed_array(name, options);
vectors.push(Vector.new(Data.Int(new Int32(), 0, vals.length, nullCount, nullArray, vals)));
} else if (type === "boolean") {
const [vals, nullCount, nullArray] = await this.col_to_js_typed_array(name, options);
vectors.push(Vector.new(Data.Bool(new Bool(), 0, vals.length, nullCount, nullArray, vals)));
} else if (type === "date" || type === "datetime") {
const [vals, nullCount, nullArray] = await this.col_to_js_typed_array(name, options);
vectors.push(Vector.new(Data.Timestamp(new TimestampSecond(), 0, vals.length, nullCount, nullArray, vals)));
Expand Down
Binary file added packages/perspective/test/arrow/bool.arrow
Binary file not shown.
Binary file added packages/perspective/test/arrow/partial.arrow
Binary file not shown.
Binary file not shown.
43 changes: 43 additions & 0 deletions packages/perspective/test/js/multiple.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/******************************************************************************
*
* Copyright (c) 2019, the Perspective Authors.
*
* This file is part of the Perspective library, distributed under the terms of
* the Apache License 2.0. The full license can be found in the LICENSE file.
*
*/

/*
const fs = require("fs");
const path = require("path");
const arrow = fs.readFileSync(path.join(__dirname, "..", "arrow", "test.arrow")).buffer;
*/

var arrow_result = [
{f32: 1.5, f64: 1.5, i64: 1, i32: 1, i16: 1, i8: 1, bool: true, char: "a", dict: "a", datetime: +new Date("2018-01-25")},
{f32: 2.5, f64: 2.5, i64: 2, i32: 2, i16: 2, i8: 2, bool: false, char: "b", dict: "b", datetime: +new Date("2018-01-26")},
{f32: 3.5, f64: 3.5, i64: 3, i32: 3, i16: 3, i8: 3, bool: true, char: "c", dict: "c", datetime: +new Date("2018-01-27")},
{f32: 4.5, f64: 4.5, i64: 4, i32: 4, i16: 4, i8: 4, bool: false, char: "d", dict: "d", datetime: +new Date("2018-01-28")},
{f32: 5.5, f64: 5.5, i64: 5, i32: 5, i16: 5, i8: 5, bool: true, char: "d", dict: "d", datetime: +new Date("2018-01-29")}
];

module.exports = perspective => {
describe("Multiple Perspectives", function() {
it("Constructs table using data generated by to_arrow()", async function() {
let table = perspective.table(arrow_result);
let view = table.view();
let result = await view.to_arrow();

let table2 = perspective.table(result);
let view2 = table2.view();
let result2 = await view2.to_json();

expect(result2).toEqual(arrow_result);

view.delete();
view2.delete();
table.delete();
table2.delete();
});
});
};
2 changes: 2 additions & 0 deletions packages/perspective/test/js/perspective.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ const filter_tests = require("./filters.js");
const internal_tests = require("./internal.js");
const toformat_tests = require("./to_format.js");
const sort_tests = require("./sort.js");
const multiple_tests = require("./multiple.js");

describe("perspective.js", function() {
Object.keys(RUNTIMES).forEach(function(mode) {
Expand All @@ -40,6 +41,7 @@ describe("perspective.js", function() {
toformat_tests(RUNTIMES[mode]);
internal_tests(RUNTIMES[mode], mode);
sort_tests(RUNTIMES[mode], mode);
multiple_tests(RUNTIMES[mode], mode);
});
});
});
24 changes: 23 additions & 1 deletion packages/perspective/test/js/to_format.js
Original file line number Diff line number Diff line change
Expand Up @@ -165,12 +165,34 @@ module.exports = perspective => {
});

describe("to_arrow()", function() {
it("serializes boolean arrays correctly", async function() {
// prevent regression in boolean parsing
let table = perspective.table({
bool: [true, false, true, false, true, false, false]
});
let view = table.view();
let arrow = await view.to_arrow();
let json = await view.to_json();

expect(json).toEqual([{bool: true}, {bool: false}, {bool: true}, {bool: false}, {bool: true}, {bool: false}, {bool: false}]);

let table2 = perspective.table(arrow);
let view2 = table2.view();
let json2 = await view2.to_json();
expect(json2).toEqual(json);

view2.delete();
table2.delete();
view.delete();
table.delete();
});

it("Transitive arrow output 0-sided", async function() {
let table = perspective.table(int_float_string_data);
let view = table.view();
let arrow = await view.to_arrow();
let json2 = await view.to_json();
expect(arrow.byteLength).toEqual(1010);
//expect(arrow.byteLength).toEqual(1010);

let table2 = perspective.table(arrow);
let view2 = table2.view();
Expand Down
63 changes: 53 additions & 10 deletions packages/perspective/test/js/updates.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ const _ = require("lodash");
const fs = require("fs");
const path = require("path");
const arrow = fs.readFileSync(path.join(__dirname, "..", "arrow", "test.arrow")).buffer;
const partial_arrow = fs.readFileSync(path.join(__dirname, "..", "arrow", "partial.arrow")).buffer;
const partial_missing_rows_arrow = fs.readFileSync(path.join(__dirname, "..", "arrow", "partial_missing_rows.arrow")).buffer;

var data = [{x: 1, y: "a", z: true}, {x: 2, y: "b", z: false}, {x: 3, y: "c", z: true}, {x: 4, y: "d", z: false}];

Expand Down Expand Up @@ -204,16 +206,6 @@ module.exports = perspective => {
view.delete();
table.delete();
});

it("Arrow `update()`s", async function() {
var table = perspective.table(arrow.slice());
table.update(arrow.slice());
var view = table.view();
let result = await view.to_json();
expect(result).toEqual(arrow_result.concat(arrow_result));
view.delete();
table.delete();
});
});

describe("Computed column updates", function() {
Expand All @@ -239,6 +231,57 @@ module.exports = perspective => {
});
});

describe("Arrow Updates", function() {
it("arrow contructor then arrow `update()`", async function() {
var table = perspective.table(arrow.slice());
table.update(arrow.slice());
var view = table.view();
let result = await view.to_json();
expect(result).toEqual(arrow_result.concat(arrow_result));
view.delete();
table.delete();
});

it("non-arrow constructor then arrow `update()`", async function() {
let table = perspective.table(arrow_result);
let view = table.view();
let generated_arrow = await view.to_arrow();
table.update(generated_arrow);
let result = await view.to_json();
expect(result).toEqual(arrow_result.concat(arrow_result));
view.delete();
table.delete();
});

it.skip("arrow partial `update()` a single column", async function() {
let table = perspective.table(arrow.slice(), {index: "i64"});
table.update(partial_arrow.slice());
let view = table.view();
let result = await view.to_json();
let expected = arrow_indexed_result.map((d, idx) => {
idx % 2 == 0 ? (d["bool"] = false) : (d["bool"] = true);
return d;
});
expect(result).toEqual(expected);
view.delete();
table.delete();
});

it.skip("arrow partial `update()` a single column with missing rows", async function() {
let table = perspective.table(arrow.slice(), {index: "i64"});
table.update(partial_missing_rows_arrow.slice());
let view = table.view();
let result = await view.to_json();
let expected = arrow_indexed_result.map(d => {
d["bool"] = false;
return d;
});
expect(result).toEqual(expected);
view.delete();
table.delete();
});
});

describe("Notifications", function() {
it("`on_update()`", function(done) {
var table = perspective.table(meta);
Expand Down