From bac45f10152103d52e8f2493c92470978e56a366 Mon Sep 17 00:00:00 2001 From: Tim Paine Date: Wed, 9 Jan 2019 18:34:11 -0500 Subject: [PATCH 01/11] more python binding --- .../perspective/include/perspective/binding.h | 25 ++++++++++--- python/perspective/src/binding.cpp | 6 +-- python/perspective/table/__init__.py | 2 +- python/perspective/tests/test_column.py | 37 +++++++++++++++++++ python/perspective/tests/test_gnode.py | 33 +++++++++++++++++ python/perspective/tests/test_schema.py | 31 ++++++++++++++++ 6 files changed, 122 insertions(+), 12 deletions(-) create mode 100644 python/perspective/tests/test_column.py create mode 100644 python/perspective/tests/test_gnode.py create mode 100644 python/perspective/tests/test_schema.py diff --git a/python/perspective/include/perspective/binding.h b/python/perspective/include/perspective/binding.h index b39982ed2b..52bbdea395 100644 --- a/python/perspective/include/perspective/binding.h +++ b/python/perspective/include/perspective/binding.h @@ -13,15 +13,14 @@ #include #include #include +#include +#include #ifndef __PSP_BINDING_HPP__ #define __PSP_BINDING_HPP__ -void test(const char* name); - - perspective::t_schema* t_schema_init(py::list& columns, py::list& types); template @@ -49,8 +48,6 @@ BOOST_PYTHON_MODULE(libbinding) np::initialize(true); _import_array(); - py::def("test", test); - py::enum_("t_dtype") .value("NONE", perspective::DTYPE_NONE) .value("INT64", perspective::DTYPE_INT64) @@ -101,12 +98,27 @@ BOOST_PYTHON_MODULE(libbinding) // when returning const, need return_value_policy .def("columns", &perspective::t_schema::columns, py::return_value_policy()) // .def("types", &perspective::t_schema::types, return_value_policy()) + .def("str", &perspective::t_schema::str) ; - //TODO py::class_("t_column", py::init<>()) + .def("pprint", &perspective::t_column::pprint) + .def("size", &perspective::t_column::size) + .def("get_dtype", &perspective::t_column::get_dtype) + ; + + py::class_("t_gnode", + py::init()) + .def("init", &perspective::t_gnode::init) + .def("pprint", &perspective::t_gnode::pprint) + // when returning const, need return_value_policy + // .def("get_table", static_cast(&perspective::t_gnode::get_table), py::return_value_policy()) + // when multiple overloading methods, need to static_cast to specify + // .def("get_table", static_cast(&perspective::t_gnode::get_table)) + .def("get_tblschema", &perspective::t_gnode::get_tblschema) + .def("get_pivots", &perspective::t_gnode::get_pivots) ; // need boost:noncopyable for PSP_NON_COPYABLE @@ -126,6 +138,7 @@ BOOST_PYTHON_MODULE(libbinding) // when returning const, need return_value_policy .def("name", &perspective::t_table::name, py::return_value_policy()) .def("get_schema", &perspective::t_table::get_schema, py::return_value_policy()) + .def("make_column", &perspective::t_table::make_column) // when multiple overloading methods, need to static_cast to specify .def("num_rows", static_cast (&perspective::t_table::num_rows)) diff --git a/python/perspective/src/binding.cpp b/python/perspective/src/binding.cpp index a5cbadda4d..537d503e80 100644 --- a/python/perspective/src/binding.cpp +++ b/python/perspective/src/binding.cpp @@ -10,10 +10,6 @@ #include #include -void test(const char* name) { - std::cout << "Hello " << name << "!" << std::endl; -} - perspective::t_schema* t_schema_init(py::list& columns, py::list& types) { std::vector cols; @@ -30,7 +26,7 @@ perspective::t_schema* t_schema_init(py::list& columns, py::list& types) return new perspective::t_schema(cols, ts); } -template +template void _fill_col(std::vector& dcol, std::shared_ptr col) { perspective::t_uindex nrows = col->size(); diff --git a/python/perspective/table/__init__.py b/python/perspective/table/__init__.py index aecc024c46..998940ccf6 100644 --- a/python/perspective/table/__init__.py +++ b/python/perspective/table/__init__.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd -from .libbinding import t_schema, t_dtype, t_table +from .libbinding import t_schema, t_dtype, t_table, t_column, t_gnode class Perspective(object): diff --git a/python/perspective/tests/test_column.py b/python/perspective/tests/test_column.py new file mode 100644 index 0000000000..6dcb7d592a --- /dev/null +++ b/python/perspective/tests/test_column.py @@ -0,0 +1,37 @@ +# ***************************************************************************** +# +# Copyright (c) 2019, the Perspective Authors. +# +# This file is part of the Perspective library, distributed under the terms of +# the Apache License 2.0. The full license can be found in the LICENSE file. +# + +import os +import os.path +import numpy as np +import pandas as pd +from perspective.table import Perspective, t_column, t_table, t_schema + + +class TestColumn(object): + def setUp(self): + pass + + def test_column(self): + col = t_column() + col.pprint() + + def test_make_column(self): + column_names = ['Col1', 'Col2', 'Col3', 'Col4', 'Col5'] + types = [int, str, float, np.int64, np.float64] + + dtypes = [] + for name, _type in zip(column_names, types): + dtypes.append(Perspective._type_to_dtype(_type)) + + assert len(column_names) == len(dtypes) + schema = t_schema(column_names, dtypes) + tt = t_table(schema) + tt.init() + + # col = tt.make_column('Test', Perspective._type_to_dtype(int), True) \ No newline at end of file diff --git a/python/perspective/tests/test_gnode.py b/python/perspective/tests/test_gnode.py new file mode 100644 index 0000000000..5414ff73cd --- /dev/null +++ b/python/perspective/tests/test_gnode.py @@ -0,0 +1,33 @@ +# ***************************************************************************** +# +# Copyright (c) 2019, the Perspective Authors. +# +# This file is part of the Perspective library, distributed under the terms of +# the Apache License 2.0. The full license can be found in the LICENSE file. +# + +import os +import os.path +import numpy as np +import pandas as pd +from perspective.table import Perspective +from perspective.table.libbinding import t_schema, t_gnode + + +class TestGnode(object): + def setUp(self): + pass + + def test_gnode(self): + column_names = ['Col1', 'Col2', 'Col3', 'Col4', 'Col5'] + types = [int, str, float, np.int64, np.float64] + + dtypes = [] + for name, _type in zip(column_names, types): + dtypes.append(Perspective._type_to_dtype(_type)) + + assert len(column_names) == len(dtypes) + schema = t_schema(column_names, dtypes) + gnode = t_gnode(schema, schema) + # gnode.pprint() + diff --git a/python/perspective/tests/test_schema.py b/python/perspective/tests/test_schema.py new file mode 100644 index 0000000000..16bb45c71b --- /dev/null +++ b/python/perspective/tests/test_schema.py @@ -0,0 +1,31 @@ +# ***************************************************************************** +# +# Copyright (c) 2019, the Perspective Authors. +# +# This file is part of the Perspective library, distributed under the terms of +# the Apache License 2.0. The full license can be found in the LICENSE file. +# + +import os +import os.path +import numpy as np +import pandas as pd +from perspective.table import Perspective +from perspective.table.libbinding import t_schema + + +class TestSchema(object): + def setUp(self): + pass + + def test_schema(self): + column_names = ['Col1', 'Col2', 'Col3', 'Col4', 'Col5'] + types = [int, str, float, np.int64, np.float64] + + dtypes = [] + for name, _type in zip(column_names, types): + dtypes.append(Perspective._type_to_dtype(_type)) + + assert len(column_names) == len(dtypes) + schema = t_schema(column_names, dtypes) + print(schema.str()) \ No newline at end of file From 91fa42535cba87fe12bf832201ace6178f3e2417 Mon Sep 17 00:00:00 2001 From: Tim Paine Date: Fri, 18 Jan 2019 08:28:42 -1000 Subject: [PATCH 02/11] starting work on binding merging --- CMakeLists.txt | 9 +- .../perspective/{binding.h => python.h} | 29 +- .../src/{binding.cpp => python.cpp} | 7 +- src/cpp/binding.cpp | 16 + src/cpp/emscripten.cpp | 1750 +++++++++++++++++ src/cpp/main.cpp | 1729 +--------------- src/include/perspective/binding.h | 9 + src/include/perspective/emscripten.h | 338 ++++ 8 files changed, 2133 insertions(+), 1754 deletions(-) rename python/perspective/include/perspective/{binding.h => python.h} (83%) rename python/perspective/src/{binding.cpp => python.cpp} (96%) create mode 100644 src/cpp/binding.cpp create mode 100644 src/cpp/emscripten.cpp create mode 100644 src/include/perspective/binding.h create mode 100644 src/include/perspective/emscripten.h diff --git a/CMakeLists.txt b/CMakeLists.txt index f062a2008c..353f6aef9e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -194,6 +194,7 @@ set (SOURCE_FILES src/cpp/base_impl_linux.cpp src/cpp/base_impl_osx.cpp src/cpp/base_impl_win.cpp + src/cpp/binding.cpp src/cpp/build_filter.cpp #src/cpp/calc_agg_dtype.cpp src/cpp/column.cpp @@ -265,13 +266,13 @@ if (PSP_WASM_BUILD) add_library(psp ${SOURCE_FILES}) set_target_properties(psp PROPERTIES COMPILE_FLAGS "${ASYNC_MODE_FLAGS}") - add_executable(perspective.async src/cpp/main.cpp) + add_executable(perspective.async src/cpp/emscripten.cpp) target_link_libraries(perspective.async psp "${ASYNC_MODE_FLAGS}") set_target_properties(perspective.async PROPERTIES COMPILE_FLAGS "${ASYNC_MODE_FLAGS}") set_target_properties(perspective.async PROPERTIES RUNTIME_OUTPUT_DIRECTORY "./build/") set_target_properties(perspective.async PROPERTIES OUTPUT_NAME "psp.async") - add_executable(perspective.sync src/cpp/main.cpp) + add_executable(perspective.sync src/cpp/emscripten.cpp) target_link_libraries(perspective.sync psp "${SYNC_MODE_FLAGS}") set_target_properties(perspective.sync PROPERTIES COMPILE_FLAGS "${SYNC_MODE_FLAGS}") set_target_properties(perspective.sync PROPERTIES RUNTIME_OUTPUT_DIRECTORY "./build/") @@ -279,7 +280,7 @@ if (PSP_WASM_BUILD) add_dependencies(perspective.sync perspective.async) if (NOT DEFINED ENV{PSP_DEBUG}) - add_executable(perspective.asm src/cpp/main.cpp) + add_executable(perspective.asm src/cpp/emscripten.cpp) target_link_libraries(perspective.asm psp "${ASMJS_MODE_FLAGS}") set_target_properties(perspective.asm PROPERTIES COMPILE_FLAGS "${ASMJS_MODE_FLAGS}") set_target_properties(perspective.asm PROPERTIES RUNTIME_OUTPUT_DIRECTORY "./build/") @@ -297,7 +298,7 @@ else() target_link_libraries(psp ${Boost_LIBRARIES} ${PYTHON_LIBRARIES}) set(CMAKE_SHARED_LIBRARY_SUFFIX .so) - add_library(binding SHARED ${CMAKE_SOURCE_DIR}/python/perspective/src/binding.cpp) + add_library(binding SHARED ${CMAKE_SOURCE_DIR}/python/perspective/src/python.cpp) target_link_libraries(binding psp) target_link_libraries(binding tbb) target_link_libraries(binding ${BOOST_PYTHON}) diff --git a/python/perspective/include/perspective/binding.h b/python/perspective/include/perspective/python.h similarity index 83% rename from python/perspective/include/perspective/binding.h rename to python/perspective/include/perspective/python.h index 52bbdea395..2b88580d59 100644 --- a/python/perspective/include/perspective/binding.h +++ b/python/perspective/include/perspective/python.h @@ -13,12 +13,13 @@ #include #include #include -#include -#include -#ifndef __PSP_BINDING_HPP__ -#define __PSP_BINDING_HPP__ +#ifndef __PSP_PYTHON_HPP__ +#define __PSP_PYTHON_HPP__ + + +void test(const char* name); perspective::t_schema* t_schema_init(py::list& columns, py::list& types); @@ -48,6 +49,8 @@ BOOST_PYTHON_MODULE(libbinding) np::initialize(true); _import_array(); + py::def("test", test); + py::enum_("t_dtype") .value("NONE", perspective::DTYPE_NONE) .value("INT64", perspective::DTYPE_INT64) @@ -98,27 +101,12 @@ BOOST_PYTHON_MODULE(libbinding) // when returning const, need return_value_policy .def("columns", &perspective::t_schema::columns, py::return_value_policy()) // .def("types", &perspective::t_schema::types, return_value_policy()) - .def("str", &perspective::t_schema::str) ; + //TODO py::class_("t_column", py::init<>()) - .def("pprint", &perspective::t_column::pprint) - .def("size", &perspective::t_column::size) - .def("get_dtype", &perspective::t_column::get_dtype) - ; - - py::class_("t_gnode", - py::init()) - .def("init", &perspective::t_gnode::init) - .def("pprint", &perspective::t_gnode::pprint) - // when returning const, need return_value_policy - // .def("get_table", static_cast(&perspective::t_gnode::get_table), py::return_value_policy()) - // when multiple overloading methods, need to static_cast to specify - // .def("get_table", static_cast(&perspective::t_gnode::get_table)) - .def("get_tblschema", &perspective::t_gnode::get_tblschema) - .def("get_pivots", &perspective::t_gnode::get_pivots) ; // need boost:noncopyable for PSP_NON_COPYABLE @@ -138,7 +126,6 @@ BOOST_PYTHON_MODULE(libbinding) // when returning const, need return_value_policy .def("name", &perspective::t_table::name, py::return_value_policy()) .def("get_schema", &perspective::t_table::get_schema, py::return_value_policy()) - .def("make_column", &perspective::t_table::make_column) // when multiple overloading methods, need to static_cast to specify .def("num_rows", static_cast (&perspective::t_table::num_rows)) diff --git a/python/perspective/src/binding.cpp b/python/perspective/src/python.cpp similarity index 96% rename from python/perspective/src/binding.cpp rename to python/perspective/src/python.cpp index 537d503e80..66b9dc5121 100644 --- a/python/perspective/src/binding.cpp +++ b/python/perspective/src/python.cpp @@ -8,8 +8,13 @@ */ #ifdef PSP_ENABLE_PYTHON #include +#include #include +void test(const char* name) { + std::cout << "Hello " << name << "!" << std::endl; +} + perspective::t_schema* t_schema_init(py::list& columns, py::list& types) { std::vector cols; @@ -26,7 +31,7 @@ perspective::t_schema* t_schema_init(py::list& columns, py::list& types) return new perspective::t_schema(cols, ts); } -template +template void _fill_col(std::vector& dcol, std::shared_ptr col) { perspective::t_uindex nrows = col->size(); diff --git a/src/cpp/binding.cpp b/src/cpp/binding.cpp new file mode 100644 index 0000000000..984bb0e0c3 --- /dev/null +++ b/src/cpp/binding.cpp @@ -0,0 +1,16 @@ +/****************************************************************************** + * + * Copyright (c) 2019, the Perspective Authors. + * + * This file is part of the Perspective library, distributed under the terms of + * the Apache License 2.0. The full license can be found in the LICENSE file. + * + */ +#include +#include + +namespace perspective { +namespace binding { + +} +} diff --git a/src/cpp/emscripten.cpp b/src/cpp/emscripten.cpp new file mode 100644 index 0000000000..7b8985db33 --- /dev/null +++ b/src/cpp/emscripten.cpp @@ -0,0 +1,1750 @@ +/****************************************************************************** + * + * Copyright (c) 2019, the Perspective Authors. + * + * This file is part of the Perspective library, distributed under the terms of + * the Apache License 2.0. The full license can be found in the LICENSE file. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace emscripten; +using namespace perspective; + +namespace perspective { +namespace binding { + +/****************************************************************************** + * + * Data Loading + */ +template +std::vector _get_sort(T j_sortby) { + std::vector svec{}; + std::vector sortbys = vecFromJSArray(j_sortby); + for (auto idx = 0; idx < sortbys.size(); ++idx) { + std::vector sortby = vecFromJSArray(sortbys[idx]); + t_sorttype sorttype; + switch (sortby[1]) { + case 0: + sorttype = SORTTYPE_ASCENDING; + break; + case 1: + sorttype = SORTTYPE_DESCENDING; + break; + case 2: + sorttype = SORTTYPE_NONE; + break; + case 3: + sorttype = SORTTYPE_ASCENDING_ABS; + break; + case 4: + sorttype = SORTTYPE_DESCENDING_ABS; + break; + } + svec.push_back(t_sortspec(sortby[0], sorttype)); + } + return svec; +} + + +/** + * + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +std::vector +_get_fterms(t_schema schema, val j_filters) { + std::vector fvec{}; + std::vector filters = vecFromJSArray(j_filters); + for (auto fidx = 0; fidx < filters.size(); ++fidx) { + std::vector filter = vecFromJSArray(filters[fidx]); + std::string coln = filter[0].as(); + t_filter_op comp = filter[1].as(); + + switch (comp) { + case FILTER_OP_NOT_IN: + case FILTER_OP_IN: { + std::vector terms{}; + std::vector j_terms = vecFromJSArray(filter[2]); + for (auto jidx = 0; jidx < j_terms.size(); ++jidx) { + terms.push_back(mktscalar(get_interned_cstr(j_terms[jidx].c_str()))); + } + fvec.push_back(t_fterm(coln, comp, mktscalar(0), terms)); + } break; + default: { + t_tscalar term; + switch (schema.get_dtype(coln)) { + case DTYPE_INT32: + term = mktscalar(filter[2].as()); + break; + case DTYPE_INT64: + case DTYPE_FLOAT64: + term = mktscalar(filter[2].as()); + break; + case DTYPE_BOOL: + term = mktscalar(filter[2].as()); + break; + case DTYPE_DATE: + term = mktscalar(t_date(filter[2].as())); + break; + case DTYPE_TIME: + term = mktscalar(t_time(static_cast( + filter[2].call("getTime").as()))); + break; + default: { + term + = mktscalar(get_interned_cstr(filter[2].as().c_str())); + } + } + + fvec.push_back(t_fterm(coln, comp, term, std::vector())); + } + } + } + return fvec; +} + +/** + * + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +std::vector +_get_aggspecs(val j_aggs) { + std::vector aggs = vecFromJSArray(j_aggs); + std::vector aggspecs; + for (auto idx = 0; idx < aggs.size(); ++idx) { + std::vector agg_row = vecFromJSArray(aggs[idx]); + std::string name = agg_row[0].as(); + t_aggtype aggtype = agg_row[1].as(); + + std::vector dependencies; + std::vector deps = vecFromJSArray(agg_row[2]); + for (auto didx = 0; didx < deps.size(); ++didx) { + if (deps[didx].isUndefined()) { + continue; + } + std::string dep = deps[didx].as(); + dependencies.push_back(t_dep(dep, DEPTYPE_COLUMN)); + } + if (aggtype == AGGTYPE_FIRST || aggtype == AGGTYPE_LAST) { + if (dependencies.size() == 1) { + dependencies.push_back(t_dep("psp_pkey", DEPTYPE_COLUMN)); + } + aggspecs.push_back( + t_aggspec(name, name, aggtype, dependencies, SORTTYPE_ASCENDING)); + } else { + aggspecs.push_back(t_aggspec(name, aggtype, dependencies)); + } + } + return aggspecs; +} + +// Date parsing +t_date +jsdate_to_t_date(val date) { + return t_date(date.call("getFullYear").as(), + date.call("getMonth").as(), + date.call("getDate").as()); +} + +val +t_date_to_jsdate(t_date date) { + val jsdate = val::global("Date").new_(); + jsdate.call("setYear", date.year()); + jsdate.call("setMonth", date.month()); + jsdate.call("setDate", date.day()); + jsdate.call("setHours", 0); + jsdate.call("setMinutes", 0); + jsdate.call("setSeconds", 0); + jsdate.call("setMilliseconds", 0); + return jsdate; +} + +/** + * Converts a scalar value to its JS representation. + * + * Params + * ------ + * t_tscalar scalar + * + * Returns + * ------- + * val + */ +template<> +val scalar_to_val(const t_tscalar scalar) { + if (!scalar.is_valid()) { + return val::null(); + } + switch (scalar.get_dtype()) { + case DTYPE_BOOL: { + if (scalar) { + return val(true); + } else { + return val(false); + } + } + case DTYPE_TIME: + case DTYPE_FLOAT64: + case DTYPE_FLOAT32: { + return val(scalar.to_double()); + } + case DTYPE_DATE: { + return t_date_to_jsdate(scalar.get()).call("getTime"); + } + case DTYPE_UINT8: + case DTYPE_UINT16: + case DTYPE_UINT32: + case DTYPE_INT8: + case DTYPE_INT16: + case DTYPE_INT32: { + return val(static_cast(scalar.to_int64())); + } + case DTYPE_UINT64: + case DTYPE_INT64: { + // This could potentially lose precision + return val(static_cast(scalar.to_int64())); + } + case DTYPE_NONE: { + return val::null(); + } + case DTYPE_STR: + default: { + std::wstring_convert converter("", L""); + return val(converter.from_bytes(scalar.to_string())); + } + } +} + +template <> +val scalar_vec_to_val(const std::vector& scalars, std::uint32_t idx) { + return scalar_to_val(scalars[idx]); +} + +/** + * + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ + +namespace arrow { + +void +vecFromTypedArray( + const val& typedArray, void* data, std::int32_t length, const char* destType = nullptr) { + val memory = val::module_property("buffer"); + if (destType == nullptr) { + val memoryView = typedArray["constructor"].new_( + memory, reinterpret_cast(data), length); + memoryView.call("set", typedArray.call("slice", 0, length)); + } else { + val memoryView = val::global(destType).new_( + memory, reinterpret_cast(data), length); + memoryView.call("set", typedArray.call("slice", 0, length)); + } +} + +void +fill_col_valid(val dcol, std::shared_ptr col) { + // dcol should be the Uint8Array containing the null bitmap + t_uindex nrows = col->size(); + + // arrow packs bools into a bitmap + for (auto i = 0; i < nrows; ++i) { + std::uint8_t elem = dcol[i / 8].as(); + bool v = elem & (1 << (i % 8)); + col->set_valid(i, v); + } +} + +void +fill_col_dict(val dictvec, std::shared_ptr col) { + // ptaylor: This assumes the dictionary is either a Binary or Utf8 Vector. Should it support + // other Vector types? + val vdata = dictvec["values"]; + std::int32_t vsize = vdata["length"].as(); + std::vector data; + data.reserve(vsize); + data.resize(vsize); + vecFromTypedArray(vdata, data.data(), vsize); + + val voffsets = dictvec["valueOffsets"]; + std::int32_t osize = voffsets["length"].as(); + std::vector offsets; + offsets.reserve(osize); + offsets.resize(osize); + vecFromTypedArray(voffsets, offsets.data(), osize); + + // Get number of dictionary entries + std::uint32_t dsize = dictvec["length"].as(); + + t_vocab* vocab = col->_get_vocab(); + std::string elem; + + for (std::uint32_t i = 0; i < dsize; ++i) { + std::int32_t bidx = offsets[i]; + std::size_t es = offsets[i + 1] - bidx; + elem.assign(reinterpret_cast(data.data()) + bidx, es); + t_uindex idx = vocab->get_interned(elem); + // Make sure there are no duplicates in the arrow dictionary + assert(idx == i); + } +} +} // namespace arrow + + +namespace js_typed_array { + val ArrayBuffer = val::global("ArrayBuffer"); + val Int8Array = val::global("Int8Array"); + val Int16Array = val::global("Int16Array"); + val Int32Array = val::global("Int32Array"); + val Float32Array = val::global("Float32Array"); + val Float64Array = val::global("Float64Array"); +} // namespace js_typed_array + + +// Given a column index, serialize data to TypedArray +template +val col_to_js_typed_array(T ctx, t_index idx) { + std::vector data = ctx->get_data(0, ctx->get_row_count(), idx, idx + 1); + auto dtype = ctx->get_column_dtype(idx); + int data_size = data.size(); + val constructor = val::undefined(); + val sentinel = val::undefined(); + + switch (dtype) { + case DTYPE_INT8: { + data_size *= sizeof(std::int8_t); + sentinel = val(std::numeric_limits::lowest()); + constructor = js_typed_array::Int8Array; + } break; + case DTYPE_INT16: { + data_size *= sizeof(std::int16_t); + sentinel = val(std::numeric_limits::lowest()); + constructor = js_typed_array::Int16Array; + } break; + case DTYPE_INT32: + case DTYPE_INT64: { + // scalar_to_val converts int64 into int32 + data_size *= sizeof(std::int32_t); + sentinel = val(std::numeric_limits::lowest()); + constructor = js_typed_array::Int32Array; + } break; + case DTYPE_FLOAT32: { + data_size *= sizeof(float); + sentinel = val(std::numeric_limits::lowest()); + constructor = js_typed_array::Float32Array; + } break; + case DTYPE_TIME: + case DTYPE_FLOAT64: { + sentinel = val(std::numeric_limits::lowest()); + data_size *= sizeof(double); + constructor = js_typed_array::Float64Array; + } break; + default: + return constructor; + } + + val buffer = js_typed_array::ArrayBuffer.new_(data_size); + val arr = constructor.new_(buffer); + + for (int idx = 0; idx < data.size(); idx++) { + t_tscalar scalar = data[idx]; + if (scalar.get_dtype() == DTYPE_NONE) { + arr.call("fill", sentinel, idx, idx + 1); + } else { + arr.call("fill", scalar_to_val(scalar), idx, idx + 1); + } + } + + return arr; +} + +void +_fill_col_numeric(val accessor, t_table& tbl, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, + bool is_arrow) { + t_uindex nrows = col->size(); + + if (is_arrow) { + val data = accessor["values"]; + + switch (type) { + case DTYPE_INT8: { + arrow::vecFromTypedArray(data, col->get_nth(0), nrows); + } break; + case DTYPE_INT16: { + arrow::vecFromTypedArray(data, col->get_nth(0), nrows); + } break; + case DTYPE_INT32: { + arrow::vecFromTypedArray(data, col->get_nth(0), nrows); + } break; + case DTYPE_FLOAT32: { + arrow::vecFromTypedArray(data, col->get_nth(0), nrows); + } break; + case DTYPE_FLOAT64: { + arrow::vecFromTypedArray(data, col->get_nth(0), nrows); + } break; + default: + break; + } + } else { + for (auto i = 0; i < nrows; ++i) { + val item = accessor.call("marshal", cidx, i, type); + + if (item.isUndefined()) + continue; + + if (item.isNull()) { + col->unset(i); + continue; + } + + switch (type) { + case DTYPE_INT8: { + col->set_nth(i, item.as()); + } break; + case DTYPE_INT16: { + col->set_nth(i, item.as()); + } break; + case DTYPE_INT32: { + // This handles cases where a long sequence of e.g. 0 precedes a clearly + // float value in an inferred column. Would not be needed if the type + // inference checked the entire column/we could reset parsing. + double fval = item.as(); + if (fval > 2147483647 || fval < -2147483648) { + tbl.promote_column(name, DTYPE_FLOAT64, i); + col = tbl.get_column(name); + type = DTYPE_FLOAT64; + col->set_nth(i, fval); + } else { + col->set_nth(i, static_cast(fval)); + } + } break; + case DTYPE_FLOAT32: { + col->set_nth(i, item.as()); + } break; + case DTYPE_FLOAT64: { + col->set_nth(i, item.as()); + } break; + default: + break; + } + } + } +} + +void +_fill_col_int64(val accessor, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, + bool is_arrow) { + t_uindex nrows = col->size(); + + if (is_arrow) { + val data = accessor["values"]; + // arrow packs 64 bit into two 32 bit ints + arrow::vecFromTypedArray(data, col->get_nth(0), nrows * 2); + } else { + PSP_COMPLAIN_AND_ABORT( + "Unreachable - can't have DTYPE_INT64 column from non-arrow data"); + } +} + +void +_fill_col_time(val accessor, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, + bool is_arrow) { + t_uindex nrows = col->size(); + + if (is_arrow) { + val data = accessor["values"]; + // arrow packs 64 bit into two 32 bit ints + arrow::vecFromTypedArray(data, col->get_nth(0), nrows * 2); + + std::int8_t unit = accessor["type"]["unit"].as(); + if (unit != /* Arrow.enum_.TimeUnit.MILLISECOND */ 1) { + // Slow path - need to convert each value + std::int64_t factor = 1; + if (unit == /* Arrow.enum_.TimeUnit.NANOSECOND */ 3) { + factor = 1e6; + } else if (unit == /* Arrow.enum_.TimeUnit.MICROSECOND */ 2) { + factor = 1e3; + } + for (auto i = 0; i < nrows; ++i) { + col->set_nth(i, *(col->get_nth(i)) / factor); + } + } + } else { + for (auto i = 0; i < nrows; ++i) { + val item = accessor.call("marshal", cidx, i, type); + + if (item.isUndefined()) + continue; + + if (item.isNull()) { + col->unset(i); + continue; + } + + auto elem = static_cast( + item.call("getTime").as()); // dcol[i].as(); + col->set_nth(i, elem); + } + } +} + +void +_fill_col_date(val accessor, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, + bool is_arrow) { + t_uindex nrows = col->size(); + + if (is_arrow) { + // val data = dcol["values"]; + // // arrow packs 64 bit into two 32 bit ints + // arrow::vecFromTypedArray(data, col->get_nth(0), nrows * 2); + + // std::int8_t unit = dcol["type"]["unit"].as(); + // if (unit != /* Arrow.enum_.TimeUnit.MILLISECOND */ 1) { + // // Slow path - need to convert each value + // std::int64_t factor = 1; + // if (unit == /* Arrow.enum_.TimeUnit.NANOSECOND */ 3) { + // factor = 1e6; + // } else if (unit == /* Arrow.enum_.TimeUnit.MICROSECOND */ 2) { + // factor = 1e3; + // } + // for (auto i = 0; i < nrows; ++i) { + // col->set_nth(i, *(col->get_nth(i)) / factor); + // } + // } + } else { + for (auto i = 0; i < nrows; ++i) { + val item = accessor.call("marshal", cidx, i, type); + + if (item.isUndefined()) + continue; + + if (item.isNull()) { + col->unset(i); + continue; + } + + col->set_nth(i, jsdate_to_t_date(item)); + } + } +} + +void +_fill_col_bool(val accessor, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, + bool is_arrow) { + t_uindex nrows = col->size(); + + if (is_arrow) { + // arrow packs bools into a bitmap + val data = accessor["values"]; + for (auto i = 0; i < nrows; ++i) { + std::uint8_t elem = data[i / 8].as(); + bool v = elem & (1 << (i % 8)); + col->set_nth(i, v); + } + } else { + for (auto i = 0; i < nrows; ++i) { + val item = accessor.call("marshal", cidx, i, type); + + if (item.isUndefined()) + continue; + + if (item.isNull()) { + col->unset(i); + continue; + } + + auto elem = item.as(); + col->set_nth(i, elem); + } + } +} + +void +_fill_col_string(val accessor, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, + bool is_arrow) { + + t_uindex nrows = col->size(); + + if (is_arrow) { + if (accessor["constructor"]["name"].as() == "DictionaryVector") { + + val dictvec = accessor["dictionary"]; + arrow::fill_col_dict(dictvec, col); + + // Now process index into dictionary + + // Perspective stores string indices in a 32bit unsigned array + // Javascript's typed arrays handle copying from various bitwidth arrays properly + val vkeys = accessor["indices"]["values"]; + arrow::vecFromTypedArray(vkeys, col->get_nth(0), nrows, "Uint32Array"); + + } else if (accessor["constructor"]["name"].as() == "Utf8Vector" + || accessor["constructor"]["name"].as() == "BinaryVector") { + + val vdata = accessor["values"]; + std::int32_t vsize = vdata["length"].as(); + std::vector data; + data.reserve(vsize); + data.resize(vsize); + arrow::vecFromTypedArray(vdata, data.data(), vsize); + + val voffsets = accessor["valueOffsets"]; + std::int32_t osize = voffsets["length"].as(); + std::vector offsets; + offsets.reserve(osize); + offsets.resize(osize); + arrow::vecFromTypedArray(voffsets, offsets.data(), osize); + + std::string elem; + + for (std::int32_t i = 0; i < nrows; ++i) { + std::int32_t bidx = offsets[i]; + std::size_t es = offsets[i + 1] - bidx; + elem.assign(reinterpret_cast(data.data()) + bidx, es); + col->set_nth(i, elem); + } + } + } else { + for (auto i = 0; i < nrows; ++i) { + val item = accessor.call("marshal", cidx, i, type); + + if (item.isUndefined()) + continue; + + if (item.isNull()) { + col->unset(i); + continue; + } + + std::wstring welem = item.as(); + std::wstring_convert converter; + std::string elem = converter.to_bytes(welem); + col->set_nth(i, elem); + } + } +} + +/** + * Fills the table with data from Javascript. + * + * Params + * ------ + * tbl - pointer to the table object + * ocolnames - vector of column names + * accessor - the JS data accessor interface + * odt - vector of data types + * offset + * is_arrow - flag for arrow data + * + * Returns + * ------- + * + */ +void +_fill_data(t_table& tbl, std::vector ocolnames, val accessor, + std::vector odt, std::uint32_t offset, bool is_arrow) { + + for (auto cidx = 0; cidx < ocolnames.size(); ++cidx) { + auto name = ocolnames[cidx]; + auto col = tbl.get_column(name); + auto col_type = odt[cidx]; + + val dcol = val::undefined(); + + if (is_arrow) { + dcol = accessor["cdata"][cidx]; + } else { + dcol = accessor; + } + + switch (col_type) { + case DTYPE_INT64: { + _fill_col_int64(dcol, col, name, cidx, col_type, is_arrow); + } break; + case DTYPE_BOOL: { + _fill_col_bool(dcol, col, name, cidx, col_type, is_arrow); + } break; + case DTYPE_DATE: { + _fill_col_date(dcol, col, name, cidx, col_type, is_arrow); + } break; + case DTYPE_TIME: { + _fill_col_time(dcol, col, name, cidx, col_type, is_arrow); + } break; + case DTYPE_STR: { + _fill_col_string(dcol, col, name, cidx, col_type, is_arrow); + } break; + case DTYPE_NONE: { + break; + } + default: + _fill_col_numeric(dcol, tbl, col, name, cidx, col_type, is_arrow); + } + + if (is_arrow) { + // Fill validity bitmap + std::uint32_t null_count = dcol["nullCount"].as(); + + if (null_count == 0) { + col->valid_raw_fill(); + } else { + val validity = dcol["nullBitmap"]; + arrow::fill_col_valid(validity, col); + } + } + } +} + +/****************************************************************************** + * + * Public + */ +template <> +void set_column_nth(t_column* col, t_uindex idx, val value) { + + // Check if the value is a javascript null + if (value.isNull()) { + col->unset(idx); + return; + } + + switch (col->get_dtype()) { + case DTYPE_BOOL: { + col->set_nth(idx, value.as(), STATUS_VALID); + break; + } + case DTYPE_FLOAT64: { + col->set_nth(idx, value.as(), STATUS_VALID); + break; + } + case DTYPE_FLOAT32: { + col->set_nth(idx, value.as(), STATUS_VALID); + break; + } + case DTYPE_UINT32: { + col->set_nth(idx, value.as(), STATUS_VALID); + break; + } + case DTYPE_UINT64: { + col->set_nth(idx, value.as(), STATUS_VALID); + break; + } + case DTYPE_INT32: { + col->set_nth(idx, value.as(), STATUS_VALID); + break; + } + case DTYPE_INT64: { + col->set_nth(idx, value.as(), STATUS_VALID); + break; + } + case DTYPE_STR: { + std::wstring welem = value.as(); + + std::wstring_convert converter; + std::string elem = converter.to_bytes(welem); + col->set_nth(idx, elem, STATUS_VALID); + break; + } + case DTYPE_DATE: { + col->set_nth(idx, jsdate_to_t_date(value), STATUS_VALID); + break; + } + case DTYPE_TIME: { + col->set_nth( + idx, static_cast(value.as()), STATUS_VALID); + break; + } + case DTYPE_UINT8: + case DTYPE_UINT16: + case DTYPE_INT8: + case DTYPE_INT16: + default: { + // Other types not implemented + } + } +} + +/** + * Helper function for computed columns + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +template <> +void table_add_computed_column(t_table& table, val computed_defs) { + auto vcomputed_defs = vecFromJSArray(computed_defs); + for (auto i = 0; i < vcomputed_defs.size(); ++i) { + val coldef = vcomputed_defs[i]; + std::string name = coldef["column"].as(); + val inputs = coldef["inputs"]; + val func = coldef["func"]; + val type = coldef["type"]; + + std::string stype; + + if (type.isUndefined()) { + stype = "string"; + } else { + stype = type.as(); + } + + t_dtype dtype; + if (stype == "integer") { + dtype = DTYPE_INT32; + } else if (stype == "float") { + dtype = DTYPE_FLOAT64; + } else if (stype == "boolean") { + dtype = DTYPE_BOOL; + } else if (stype == "date") { + dtype = DTYPE_DATE; + } else if (stype == "datetime") { + dtype = DTYPE_TIME; + } else { + dtype = DTYPE_STR; + } + + // Get list of input column names + auto icol_names = vecFromJSArray(inputs); + + // Get t_column* for all input columns + std::vector icols; + for (const auto& cc : icol_names) { + icols.push_back(table._get_column(cc)); + } + + int arity = icols.size(); + + // Add new column + t_column* out = table.add_column(name, dtype, true); + + val i1 = val::undefined(), i2 = val::undefined(), i3 = val::undefined(), + i4 = val::undefined(); + + t_uindex size = table.size(); + for (t_uindex ridx = 0; ridx < size; ++ridx) { + val value = val::undefined(); + + switch (arity) { + case 0: { + value = func(); + break; + } + case 1: { + i1 = scalar_to_val(icols[0]->get_scalar(ridx)); + if (!i1.isNull()) { + value = func(i1); + } + break; + } + case 2: { + i1 = scalar_to_val(icols[0]->get_scalar(ridx)); + i2 = scalar_to_val(icols[1]->get_scalar(ridx)); + if (!i1.isNull() && !i2.isNull()) { + value = func(i1, i2); + } + break; + } + case 3: { + i1 = scalar_to_val(icols[0]->get_scalar(ridx)); + i2 = scalar_to_val(icols[1]->get_scalar(ridx)); + i3 = scalar_to_val(icols[2]->get_scalar(ridx)); + if (!i1.isNull() && !i2.isNull() && !i3.isNull()) { + value = func(i1, i2, i3); + } + break; + } + case 4: { + i1 = scalar_to_val(icols[0]->get_scalar(ridx)); + i2 = scalar_to_val(icols[1]->get_scalar(ridx)); + i3 = scalar_to_val(icols[2]->get_scalar(ridx)); + i4 = scalar_to_val(icols[3]->get_scalar(ridx)); + if (!i1.isNull() && !i2.isNull() && !i3.isNull() && !i4.isNull()) { + value = func(i1, i2, i3, i4); + } + break; + } + default: { + // Don't handle other arity values + break; + } + } + + if (!value.isUndefined()) { + set_column_nth(out, ridx, value); + } + } + } +} + +/** + * DataAccessor + * + * parses and converts input data into a canonical format for + * interfacing with Perspective. + */ + +// Name parsing +std::vector +column_names(val data, std::int32_t format) { + std::vector names; + val Object = val::global("Object"); + + if (format == 0) { + std::int32_t max_check = 50; + val data_names = Object.call("keys", data[0]); + std::int32_t check_index = std::min(max_check, data["length"].as()); + + for (auto ix = 0; ix < check_index; ix++) { + val next = Object.call("keys", data[ix]); + if (data_names["length"] != next["length"]) { + if (max_check == 50) { + std::cout << "Data parse warning: Array data has inconsistent rows" + << std::endl; + } + + std::cout << boost::format("Extending from %d to %d") + % data_names["length"].as() + % next["length"].as() + << std::endl; + data_names = next; + max_check *= 2; + } + + names = vecFromJSArray(data_names); + } + } else if (format == 1 || format == 2) { + names = vecFromJSArray(Object.call("keys", data)); + } + + return names; +} + +// Type inferrence for fill_col and data_types +t_dtype +infer_type(val x, val date_validator) { + std::string jstype = x.typeOf().as(); + t_dtype t = t_dtype::DTYPE_STR; + + // Unwrap numbers inside strings + val x_number = val::global("Number").call("call", val::object(), x); + bool number_in_string = (jstype == "string") + && (x["length"].as() != 0) + && (!val::global("isNaN") + .call("call", val::object(), x_number)); + + if (x.isNull()) { + t = t_dtype::DTYPE_NONE; + } else if (jstype == "number" || number_in_string) { + if (number_in_string) { + x = x_number; + } + double x_float64 = x.as(); + if ((std::fmod(x_float64, 1.0) == 0.0) && (x_float64 < 10000.0) && (x_float64 != 0.0)) { + t = t_dtype::DTYPE_INT32; + } else { + t = t_dtype::DTYPE_FLOAT64; + } + } else if (jstype == "boolean") { + t = t_dtype::DTYPE_BOOL; + } else if (x.instanceof (val::global("Date"))) { + std::int32_t hours = x.call("getHours").as(); + std::int32_t minutes = x.call("getMinutes").as(); + std::int32_t seconds = x.call("getSeconds").as(); + std::int32_t milliseconds = x.call("getMilliseconds").as(); + + if (hours == 0 && minutes == 0 && seconds == 0 && milliseconds == 0) { + t = t_dtype::DTYPE_DATE; + } else { + t = t_dtype::DTYPE_TIME; + } + } else if (jstype == "string") { + if (date_validator.call("call", val::object(), x).as()) { + t = t_dtype::DTYPE_TIME; + } else { + std::string lower = x.call("toLowerCase").as(); + if (lower == "true" || lower == "false") { + t = t_dtype::DTYPE_BOOL; + } else { + t = t_dtype::DTYPE_STR; + } + } + } + + return t; +} + +t_dtype +get_data_type(val data, std::int32_t format, std::string name, val date_validator) { + std::int32_t i = 0; + boost::optional inferredType; + + if (format == 0) { + // loop parameters differ slightly so rewrite the loop + while (!inferredType.is_initialized() && i < 100 + && i < data["length"].as()) { + if (data[i].call("hasOwnProperty", name).as() == true) { + if (!data[i][name].isNull()) { + inferredType = infer_type(data[i][name], date_validator); + } else { + inferredType = t_dtype::DTYPE_STR; + } + } + + i++; + } + } else if (format == 1) { + while (!inferredType.is_initialized() && i < 100 + && i < data[name]["length"].as()) { + if (!data[name][i].isNull()) { + inferredType = infer_type(data[name][i], date_validator); + } else { + inferredType = t_dtype::DTYPE_STR; + } + + i++; + } + } + + if (!inferredType.is_initialized()) { + return t_dtype::DTYPE_STR; + } else { + return inferredType.get(); + } +} + +std::vector +data_types(val data, std::int32_t format, std::vector names, val date_validator) { + if (names.size() == 0) { + PSP_COMPLAIN_AND_ABORT("Cannot determine data types without column names!"); + } + + std::vector types; + + if (format == 2) { + std::vector data_names + = vecFromJSArray(val::global("Object").call("keys", data)); + + for (std::vector::iterator name = data_names.begin(); + name != data_names.end(); ++name) { + std::string value = data[*name].as(); + t_dtype type; + + if (value == "integer") { + type = t_dtype::DTYPE_INT32; + } else if (value == "float") { + type = t_dtype::DTYPE_FLOAT64; + } else if (value == "string") { + type = t_dtype::DTYPE_STR; + } else if (value == "boolean") { + type = t_dtype::DTYPE_BOOL; + } else if (value == "datetime") { + type = t_dtype::DTYPE_TIME; + } else if (value == "date") { + type = t_dtype::DTYPE_DATE; + } else { + PSP_COMPLAIN_AND_ABORT("Unknown type '" + value + "' for key '" + *name + "'"); + } + + types.push_back(type); + } + + return types; + } else { + for (std::vector::iterator name = names.begin(); name != names.end(); + ++name) { + t_dtype type = get_data_type(data, format, *name, date_validator); + types.push_back(type); + } + } + + return types; +} + +/** + * Create a default gnode. + * + * Params + * ------ + * j_colnames - a JS Array of column names. + * j_dtypes - a JS Array of column types. + * + * Returns + * ------- + * A gnode. + */ +std::shared_ptr +make_gnode(const t_table& table) { + auto iscm = table.get_schema(); + + std::vector ocolnames(iscm.columns()); + std::vector odt(iscm.types()); + + if (iscm.has_column("psp_pkey")) { + t_uindex idx = iscm.get_colidx("psp_pkey"); + ocolnames.erase(ocolnames.begin() + idx); + odt.erase(odt.begin() + idx); + } + + if (iscm.has_column("psp_op")) { + t_uindex idx = iscm.get_colidx("psp_op"); + ocolnames.erase(ocolnames.begin() + idx); + odt.erase(odt.begin() + idx); + } + + t_schema oscm(ocolnames, odt); + + // Create a gnode + auto gnode = std::make_shared(oscm, iscm); + gnode->init(); + + return gnode; +} + +/** + * Create a populated table. + * + * Params + * ------ + * chunk - a JS object containing parsed data and associated metadata + * offset + * limit + * index + * is_delete - sets the table operation + * + * Returns + * ------- + * a populated table. + */ +template<> +std::shared_ptr +make_table(t_pool* pool, val gnode, val accessor, val computed, std::uint32_t offset, + std::uint32_t limit, std::string index, bool is_update, bool is_delete, bool is_arrow) { + std::uint32_t size = accessor["row_count"].as(); + + std::vector colnames; + std::vector dtypes; + + // Determine metadata + if (is_arrow || (is_update || is_delete)) { + // TODO: fully remove intermediate passed-through JS arrays for non-arrow data + colnames = vecFromJSArray(accessor["names"]); + dtypes = vecFromJSArray(accessor["types"]); + } else { + // Infer names and types + val data = accessor["data"]; + std::int32_t format = accessor["format"].as(); + colnames = column_names(data, format); + dtypes = data_types(data, format, colnames, accessor["date_validator"]); + } + + // Check if index is valid after getting column names + bool valid_index = std::find(colnames.begin(), colnames.end(), index) != colnames.end(); + if (index != "" && !valid_index) { + PSP_COMPLAIN_AND_ABORT("Specified index '" + index + "' does not exist in data.") + } + + // Create the table + // TODO assert size > 0 + t_table tbl(t_schema(colnames, dtypes)); + tbl.init(); + tbl.extend(size); + + _fill_data(tbl, colnames, accessor, dtypes, offset, is_arrow); + + // Set up pkey and op columns + if (is_delete) { + auto op_col = tbl.add_column("psp_op", DTYPE_UINT8, false); + op_col->raw_fill(OP_DELETE); + } else { + auto op_col = tbl.add_column("psp_op", DTYPE_UINT8, false); + op_col->raw_fill(OP_INSERT); + } + + if (index == "") { + // If user doesn't specify an column to use as the pkey index, just use + // row number + auto key_col = tbl.add_column("psp_pkey", DTYPE_INT32, true); + auto okey_col = tbl.add_column("psp_okey", DTYPE_INT32, true); + + for (auto ridx = 0; ridx < tbl.size(); ++ridx) { + key_col->set_nth(ridx, (ridx + offset) % limit); + okey_col->set_nth(ridx, (ridx + offset) % limit); + } + } else { + tbl.clone_column(index, "psp_pkey"); + tbl.clone_column(index, "psp_okey"); + } + + std::shared_ptr new_gnode; + + if (gnode.isUndefined()) { + new_gnode = make_gnode(tbl); + pool->register_gnode(new_gnode.get()); + } else { + new_gnode = gnode.as>(); + } + + if (!computed.isUndefined()) { + table_add_computed_column(tbl, computed); + } + + pool->send(new_gnode->get_id(), 0, tbl); + pool->_process(); + + return new_gnode; +} + +/** + * Copies the internal table from a gnode + * + * Params + * ------ + * + * Returns + * ------- + * A gnode. + */ +template <> +std::shared_ptr +clone_gnode_table(t_pool* pool, std::shared_ptr gnode, val computed) { + t_table* tbl = gnode->_get_pkeyed_table(); + table_add_computed_column(*tbl, computed); + std::shared_ptr new_gnode = make_gnode(*tbl); + pool->register_gnode(new_gnode.get()); + pool->send(new_gnode->get_id(), 0, *tbl); + pool->_process(); + return new_gnode; +} + +/** + * + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +template <> +std::shared_ptr +make_context_zero(t_schema schema, t_filter_op combiner, val j_filters, val j_columns, + val j_sortby, t_pool* pool, std::shared_ptr gnode, std::string name) { + auto columns = vecFromJSArray(j_columns); + auto fvec = _get_fterms(schema, j_filters); + auto svec = _get_sort(j_sortby); + auto cfg = t_config(columns, combiner, fvec); + auto ctx0 = std::make_shared(schema, cfg); + ctx0->init(); + ctx0->sort_by(svec); + pool->register_context(gnode->get_id(), name, ZERO_SIDED_CONTEXT, + reinterpret_cast(ctx0.get())); + return ctx0; +} + +/** + * + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +template <> +std::shared_ptr +make_context_one(t_schema schema, val j_pivots, t_filter_op combiner, val j_filters, val j_aggs, + val j_sortby, t_pool* pool, std::shared_ptr gnode, std::string name) { + auto fvec = _get_fterms(schema, j_filters); + auto aggspecs = _get_aggspecs(j_aggs); + auto pivots = vecFromJSArray(j_pivots); + auto svec = _get_sort(j_sortby); + + auto cfg = t_config(pivots, aggspecs, combiner, fvec); + auto ctx1 = std::make_shared(schema, cfg); + + ctx1->init(); + ctx1->sort_by(svec); + pool->register_context( + gnode->get_id(), name, ONE_SIDED_CONTEXT, reinterpret_cast(ctx1.get())); + return ctx1; +} + +/** + * + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +template <> +std::shared_ptr +make_context_two(t_schema schema, val j_rpivots, val j_cpivots, t_filter_op combiner, + val j_filters, val j_aggs, bool show_totals, t_pool* pool, std::shared_ptr gnode, + std::string name) { + auto fvec = _get_fterms(schema, j_filters); + auto aggspecs = _get_aggspecs(j_aggs); + auto rpivots = vecFromJSArray(j_rpivots); + auto cpivots = vecFromJSArray(j_cpivots); + t_totals total = show_totals ? TOTALS_BEFORE : TOTALS_HIDDEN; + + auto cfg = t_config(rpivots, cpivots, aggspecs, total, combiner, fvec); + auto ctx2 = std::make_shared(schema, cfg); + + ctx2->init(); + pool->register_context( + gnode->get_id(), name, TWO_SIDED_CONTEXT, reinterpret_cast(ctx2.get())); + return ctx2; +} + +template <> +void sort(std::shared_ptr ctx2, val j_sortby, val j_column_sortby) { + auto svec = _get_sort(j_sortby); + if (svec.size() > 0) { + ctx2->sort_by(svec); + } + ctx2->column_sort_by(_get_sort(j_column_sortby)); +} + +template <> +val get_column_data(std::shared_ptr table, std::string colname) { + val arr = val::array(); + auto col = table->get_column(colname); + for (auto idx = 0; idx < col->size(); ++idx) { + arr.set(idx, scalar_to_val(col->get_scalar(idx))); + } + return arr; +} + +/** + * + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +template +val get_data(T ctx, std::uint32_t start_row, std::uint32_t end_row, std::uint32_t start_col, + std::uint32_t end_col) { + auto slice = ctx->get_data(start_row, end_row, start_col, end_col); + val arr = val::array(); + for (auto idx = 0; idx < slice.size(); ++idx) { + arr.set(idx, scalar_to_val(slice[idx])); + } + return arr; +} + +template <> +val get_data_two_skip_headers(std::shared_ptr ctx, std::uint32_t depth, + std::uint32_t start_row, std::uint32_t end_row, std::uint32_t start_col, + std::uint32_t end_col) { + auto col_length = ctx->unity_get_column_count(); + std::vector col_nums; + col_nums.push_back(0); + for (t_uindex i = 0; i < col_length; ++i) { + if (ctx->unity_get_column_path(i + 1).size() == depth) { + col_nums.push_back(i + 1); + } + } + col_nums = std::vector(col_nums.begin() + start_col, + col_nums.begin() + std::min(end_col, (std::uint32_t)col_nums.size())); + auto slice = ctx->get_data(start_row, end_row, col_nums.front(), col_nums.back() + 1); + val arr = val::array(); + t_uindex i = 0; + auto iter = slice.begin(); + while (iter != slice.end()) { + t_uindex prev = col_nums.front(); + for (auto idx = col_nums.begin(); idx != col_nums.end(); idx++, i++) { + t_uindex col_num = *idx; + iter += col_num - prev; + prev = col_num; + arr.set(i, scalar_to_val(*iter)); + } + if (iter != slice.end()) + iter++; + } + return arr; +} + +} +} + + + +using namespace perspective::binding; + +/** + * Main + */ +int +main(int argc, char** argv) { + std::cout << "Perspective initialized successfully" << std::endl; + + // clang-format off + EM_ASM({ + + if (typeof self !== "undefined") { + if (self.dispatchEvent && !self._perspective_initialized && self.document) { + self._perspective_initialized = true; + var event = self.document.createEvent("Event"); + event.initEvent("perspective-ready", false, true); + self.dispatchEvent(event); + } else if (!self.document && self.postMessage) { + self.postMessage({}); + } + } + + }); + // clang-format on +} + +/****************************************************************************** + * + * Embind + */ + +EMSCRIPTEN_BINDINGS(perspective) { + class_("t_column") + .smart_ptr>("shared_ptr") + .function("set_scalar", &t_column::set_scalar); + + class_("t_table") + .constructor() + .smart_ptr>("shared_ptr") + .function("add_column", &t_table::add_column, allow_raw_pointers()) + .function("pprint", &t_table::pprint) + .function( + "size", reinterpret_cast(&t_table::size)); + + class_("t_schema") + .function&>( + "columns", &t_schema::columns, allow_raw_pointers()) + .function>("types", &t_schema::types, allow_raw_pointers()); + + class_("t_gnode") + .constructor&, + const std::vector&, const std::vector&>() + .smart_ptr>("shared_ptr") + .function( + "get_id", reinterpret_cast(&t_gnode::get_id)) + .function("get_tblschema", &t_gnode::get_tblschema) + .function("get_table", &t_gnode::get_table, allow_raw_pointers()); + + class_("t_ctx0") + .constructor() + .smart_ptr>("shared_ptr") + .function("sidedness", &t_ctx0::sidedness) + .function("get_row_count", + reinterpret_cast(&t_ctx0::get_row_count)) + .function("get_column_count", + reinterpret_cast(&t_ctx0::get_column_count)) + .function>("get_data", &t_ctx0::get_data) + .function("get_step_delta", &t_ctx0::get_step_delta) + .function>("get_cell_delta", &t_ctx0::get_cell_delta) + .function>("get_column_names", &t_ctx0::get_column_names) + // .function>("get_min_max", &t_ctx0::get_min_max) + // .function("set_minmax_enabled", &t_ctx0::set_minmax_enabled) + .function>("unity_get_row_data", &t_ctx0::unity_get_row_data) + .function>( + "unity_get_column_data", &t_ctx0::unity_get_column_data) + .function>("unity_get_row_path", &t_ctx0::unity_get_row_path) + .function>( + "unity_get_column_path", &t_ctx0::unity_get_column_path) + .function("unity_get_row_depth", &t_ctx0::unity_get_row_depth) + .function("unity_get_column_depth", &t_ctx0::unity_get_column_depth) + .function("unity_get_column_name", &t_ctx0::unity_get_column_name) + .function( + "unity_get_column_display_name", &t_ctx0::unity_get_column_display_name) + .function>( + "unity_get_column_names", &t_ctx0::unity_get_column_names) + .function>( + "unity_get_column_display_names", &t_ctx0::unity_get_column_display_names) + .function("unity_get_column_count", &t_ctx0::unity_get_column_count) + .function("unity_get_row_count", &t_ctx0::unity_get_row_count) + .function("unity_get_row_expanded", &t_ctx0::unity_get_row_expanded) + .function("unity_get_column_expanded", &t_ctx0::unity_get_column_expanded) + .function("unity_init_load_step_end", &t_ctx0::unity_init_load_step_end); + + class_("t_ctx1") + .constructor() + .smart_ptr>("shared_ptr") + .function("sidedness", &t_ctx1::sidedness) + .function("get_row_count", + reinterpret_cast(&t_ctx1::get_row_count)) + .function("get_column_count", + reinterpret_cast(&t_ctx1::get_column_count)) + .function>("get_data", &t_ctx1::get_data) + .function("get_step_delta", &t_ctx1::get_step_delta) + .function>("get_cell_delta", &t_ctx1::get_cell_delta) + .function("set_depth", &t_ctx1::set_depth) + .function("open", select_overload(&t_ctx1::open)) + .function("close", select_overload(&t_ctx1::close)) + .function("get_trav_depth", &t_ctx1::get_trav_depth) + .function>("get_column_names", &t_ctx1::get_aggregates) + .function>("unity_get_row_data", &t_ctx1::unity_get_row_data) + .function>( + "unity_get_column_data", &t_ctx1::unity_get_column_data) + .function>("unity_get_row_path", &t_ctx1::unity_get_row_path) + .function>( + "unity_get_column_path", &t_ctx1::unity_get_column_path) + .function("unity_get_row_depth", &t_ctx1::unity_get_row_depth) + .function("unity_get_column_depth", &t_ctx1::unity_get_column_depth) + .function("unity_get_column_name", &t_ctx1::unity_get_column_name) + .function( + "unity_get_column_display_name", &t_ctx1::unity_get_column_display_name) + .function>( + "unity_get_column_names", &t_ctx1::unity_get_column_names) + .function>( + "unity_get_column_display_names", &t_ctx1::unity_get_column_display_names) + .function("unity_get_column_count", &t_ctx1::unity_get_column_count) + .function("unity_get_row_count", &t_ctx1::unity_get_row_count) + .function("unity_get_row_expanded", &t_ctx1::unity_get_row_expanded) + .function("unity_get_column_expanded", &t_ctx1::unity_get_column_expanded) + .function("unity_init_load_step_end", &t_ctx1::unity_init_load_step_end); + + class_("t_ctx2") + .constructor() + .smart_ptr>("shared_ptr") + .function("sidedness", &t_ctx2::sidedness) + .function("get_row_count", + reinterpret_cast( + select_overload(&t_ctx2::get_row_count))) + .function("get_column_count", + reinterpret_cast(&t_ctx2::get_column_count)) + .function>("get_data", &t_ctx2::get_data) + .function("get_step_delta", &t_ctx2::get_step_delta) + //.function>("get_cell_delta", &t_ctx2::get_cell_delta) + .function("set_depth", &t_ctx2::set_depth) + .function("open", select_overload(&t_ctx2::open)) + .function("close", select_overload(&t_ctx2::close)) + .function>("get_column_names", &t_ctx2::get_aggregates) + .function>("unity_get_row_data", &t_ctx2::unity_get_row_data) + .function>( + "unity_get_column_data", &t_ctx2::unity_get_column_data) + .function>("unity_get_row_path", &t_ctx2::unity_get_row_path) + .function>( + "unity_get_column_path", &t_ctx2::unity_get_column_path) + .function("unity_get_row_depth", &t_ctx2::unity_get_row_depth) + .function("unity_get_column_depth", &t_ctx2::unity_get_column_depth) + .function("unity_get_column_name", &t_ctx2::unity_get_column_name) + .function( + "unity_get_column_display_name", &t_ctx2::unity_get_column_display_name) + .function>( + "unity_get_column_names", &t_ctx2::unity_get_column_names) + .function>( + "unity_get_column_display_names", &t_ctx2::unity_get_column_display_names) + .function("unity_get_column_count", &t_ctx2::unity_get_column_count) + .function("unity_get_row_count", &t_ctx2::unity_get_row_count) + .function("unity_get_row_expanded", &t_ctx2::unity_get_row_expanded) + .function("unity_get_column_expanded", &t_ctx2::unity_get_column_expanded) + .function("unity_init_load_step_end", &t_ctx2::unity_init_load_step_end) + .function("get_totals", &t_ctx2::get_totals) + .function>( + "get_column_path_userspace", &t_ctx2::get_column_path_userspace) + .function("unity_init_load_step_end", &t_ctx2::unity_init_load_step_end); + + class_("t_pool") + .constructor<>() + .smart_ptr>("shared_ptr") + .function("register_gnode", &t_pool::register_gnode, allow_raw_pointers()) + .function("process", &t_pool::_process) + .function("send", &t_pool::send) + .function("epoch", &t_pool::epoch) + .function("unregister_gnode", &t_pool::unregister_gnode) + .function("set_update_delegate", &t_pool::set_update_delegate) + .function("register_context", &t_pool::register_context) + .function("unregister_context", &t_pool::unregister_context) + .function>( + "get_contexts_last_updated", &t_pool::get_contexts_last_updated) + .function>( + "get_gnodes_last_updated", &t_pool::get_gnodes_last_updated) + .function("get_gnode", &t_pool::get_gnode, allow_raw_pointers()); + + class_("t_aggspec").function("name", &t_aggspec::name); + + class_("t_tscalar"); + + value_object("t_updctx") + .field("gnode_id", &t_updctx::m_gnode_id) + .field("ctx_name", &t_updctx::m_ctx); + + value_object("t_cellupd") + .field("row", &t_cellupd::row) + .field("column", &t_cellupd::column) + .field("old_value", &t_cellupd::old_value) + .field("new_value", &t_cellupd::new_value); + + value_object("t_stepdelta") + .field("rows_changed", &t_stepdelta::rows_changed) + .field("columns_changed", &t_stepdelta::columns_changed) + .field("cells", &t_stepdelta::cells); + + register_vector("std::vector"); + register_vector("std::vector"); + register_vector("std::vector"); + register_vector("std::vector"); + register_vector("std::vector"); + register_vector("std::vector"); + register_vector("std::vector"); + + enum_("t_header") + .value("HEADER_ROW", HEADER_ROW) + .value("HEADER_COLUMN", HEADER_COLUMN); + + enum_("t_ctx_type") + .value("ZERO_SIDED_CONTEXT", ZERO_SIDED_CONTEXT) + .value("ONE_SIDED_CONTEXT", ONE_SIDED_CONTEXT) + .value("TWO_SIDED_CONTEXT", TWO_SIDED_CONTEXT) + .value("GROUPED_ZERO_SIDED_CONTEXT", GROUPED_ZERO_SIDED_CONTEXT) + .value("GROUPED_PKEY_CONTEXT", GROUPED_PKEY_CONTEXT) + .value("GROUPED_COLUMNS_CONTEXT", GROUPED_COLUMNS_CONTEXT); + + enum_("t_filter_op") + .value("FILTER_OP_LT", FILTER_OP_LT) + .value("FILTER_OP_LTEQ", FILTER_OP_LTEQ) + .value("FILTER_OP_GT", FILTER_OP_GT) + .value("FILTER_OP_GTEQ", FILTER_OP_GTEQ) + .value("FILTER_OP_EQ", FILTER_OP_EQ) + .value("FILTER_OP_NE", FILTER_OP_NE) + .value("FILTER_OP_BEGINS_WITH", FILTER_OP_BEGINS_WITH) + .value("FILTER_OP_ENDS_WITH", FILTER_OP_ENDS_WITH) + .value("FILTER_OP_CONTAINS", FILTER_OP_CONTAINS) + .value("FILTER_OP_OR", FILTER_OP_OR) + .value("FILTER_OP_IN", FILTER_OP_IN) + .value("FILTER_OP_NOT_IN", FILTER_OP_NOT_IN) + .value("FILTER_OP_AND", FILTER_OP_AND) + .value("FILTER_OP_IS_NAN", FILTER_OP_IS_NAN) + .value("FILTER_OP_IS_NOT_NAN", FILTER_OP_IS_NOT_NAN) + .value("FILTER_OP_IS_VALID", FILTER_OP_IS_VALID) + .value("FILTER_OP_IS_NOT_VALID", FILTER_OP_IS_NOT_VALID); + + enum_("t_dtype") + .value("DTYPE_NONE", DTYPE_NONE) + .value("DTYPE_INT64", DTYPE_INT64) + .value("DTYPE_INT32", DTYPE_INT32) + .value("DTYPE_INT16", DTYPE_INT16) + .value("DTYPE_INT8", DTYPE_INT8) + .value("DTYPE_UINT64", DTYPE_UINT64) + .value("DTYPE_UINT32", DTYPE_UINT32) + .value("DTYPE_UINT16", DTYPE_UINT16) + .value("DTYPE_UINT8", DTYPE_UINT8) + .value("DTYPE_FLOAT64", DTYPE_FLOAT64) + .value("DTYPE_FLOAT32", DTYPE_FLOAT32) + .value("DTYPE_BOOL", DTYPE_BOOL) + .value("DTYPE_TIME", DTYPE_TIME) + .value("DTYPE_DATE", DTYPE_DATE) + .value("DTYPE_ENUM", DTYPE_ENUM) + .value("DTYPE_OID", DTYPE_OID) + .value("DTYPE_PTR", DTYPE_PTR) + .value("DTYPE_F64PAIR", DTYPE_F64PAIR) + .value("DTYPE_USER_FIXED", DTYPE_USER_FIXED) + .value("DTYPE_STR", DTYPE_STR) + .value("DTYPE_USER_VLEN", DTYPE_USER_VLEN) + .value("DTYPE_LAST_VLEN", DTYPE_LAST_VLEN) + .value("DTYPE_LAST", DTYPE_LAST); + + enum_("t_aggtype") + .value("AGGTYPE_SUM", AGGTYPE_SUM) + .value("AGGTYPE_MUL", AGGTYPE_MUL) + .value("AGGTYPE_COUNT", AGGTYPE_COUNT) + .value("AGGTYPE_MEAN", AGGTYPE_MEAN) + .value("AGGTYPE_WEIGHTED_MEAN", AGGTYPE_WEIGHTED_MEAN) + .value("AGGTYPE_UNIQUE", AGGTYPE_UNIQUE) + .value("AGGTYPE_ANY", AGGTYPE_ANY) + .value("AGGTYPE_MEDIAN", AGGTYPE_MEDIAN) + .value("AGGTYPE_JOIN", AGGTYPE_JOIN) + .value("AGGTYPE_SCALED_DIV", AGGTYPE_SCALED_DIV) + .value("AGGTYPE_SCALED_ADD", AGGTYPE_SCALED_ADD) + .value("AGGTYPE_SCALED_MUL", AGGTYPE_SCALED_MUL) + .value("AGGTYPE_DOMINANT", AGGTYPE_DOMINANT) + .value("AGGTYPE_FIRST", AGGTYPE_FIRST) + .value("AGGTYPE_LAST", AGGTYPE_LAST) + .value("AGGTYPE_PY_AGG", AGGTYPE_PY_AGG) + .value("AGGTYPE_AND", AGGTYPE_AND) + .value("AGGTYPE_OR", AGGTYPE_OR) + .value("AGGTYPE_LAST_VALUE", AGGTYPE_LAST_VALUE) + .value("AGGTYPE_HIGH_WATER_MARK", AGGTYPE_HIGH_WATER_MARK) + .value("AGGTYPE_LOW_WATER_MARK", AGGTYPE_LOW_WATER_MARK) + .value("AGGTYPE_UDF_COMBINER", AGGTYPE_UDF_COMBINER) + .value("AGGTYPE_UDF_REDUCER", AGGTYPE_UDF_REDUCER) + .value("AGGTYPE_SUM_ABS", AGGTYPE_SUM_ABS) + .value("AGGTYPE_SUM_NOT_NULL", AGGTYPE_SUM_NOT_NULL) + .value("AGGTYPE_MEAN_BY_COUNT", AGGTYPE_MEAN_BY_COUNT) + .value("AGGTYPE_IDENTITY", AGGTYPE_IDENTITY) + .value("AGGTYPE_DISTINCT_COUNT", AGGTYPE_DISTINCT_COUNT) + .value("AGGTYPE_DISTINCT_LEAF", AGGTYPE_DISTINCT_LEAF) + .value("AGGTYPE_PCT_SUM_PARENT", AGGTYPE_PCT_SUM_PARENT) + .value("AGGTYPE_PCT_SUM_GRAND_TOTAL", AGGTYPE_PCT_SUM_GRAND_TOTAL); + + enum_("t_totals") + .value("TOTALS_BEFORE", TOTALS_BEFORE) + .value("TOTALS_HIDDEN", TOTALS_HIDDEN) + .value("TOTALS_AFTER", TOTALS_AFTER); + + function("sort", &sort); + function("make_table", &make_table, allow_raw_pointers()); + function("make_gnode", &make_gnode); + function("clone_gnode_table", &clone_gnode_table, allow_raw_pointers()); + function("make_context_zero", &make_context_zero, allow_raw_pointers()); + function("make_context_one", &make_context_one, allow_raw_pointers()); + function("make_context_two", &make_context_two, allow_raw_pointers()); + function("scalar_to_val", &scalar_to_val); + function("scalar_vec_to_val", &scalar_vec_to_val); + function("table_add_computed_column", &table_add_computed_column); + function("set_column_nth", &set_column_nth, allow_raw_pointers()); + function("get_data_zero", &get_data>); + function("get_data_one", &get_data>); + function("get_data_two", &get_data>); + function("get_data_two_skip_headers", &get_data_two_skip_headers); + function("col_to_js_typed_array_zero", &col_to_js_typed_array>); + function("col_to_js_typed_array_one", &col_to_js_typed_array>); + function("col_to_js_typed_array_two", &col_to_js_typed_array>); +} diff --git a/src/cpp/main.cpp b/src/cpp/main.cpp index 9d8cd197ea..89069d885b 100644 --- a/src/cpp/main.cpp +++ b/src/cpp/main.cpp @@ -7,1738 +7,11 @@ * */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace perspective; -using namespace emscripten; - -typedef std::codecvt_utf8 utf8convert_type; -typedef std::codecvt_utf8_utf16 utf16convert_type; - -/****************************************************************************** - * - * Data Loading - */ - -std::vector -_get_sort(val j_sortby) { - std::vector svec{}; - std::vector sortbys = vecFromJSArray(j_sortby); - for (auto idx = 0; idx < sortbys.size(); ++idx) { - std::vector sortby = vecFromJSArray(sortbys[idx]); - t_sorttype sorttype; - switch (sortby[1]) { - case 0: - sorttype = SORTTYPE_ASCENDING; - break; - case 1: - sorttype = SORTTYPE_DESCENDING; - break; - case 2: - sorttype = SORTTYPE_NONE; - break; - case 3: - sorttype = SORTTYPE_ASCENDING_ABS; - break; - case 4: - sorttype = SORTTYPE_DESCENDING_ABS; - break; - } - svec.push_back(t_sortspec(sortby[0], sorttype)); - } - return svec; -} - -/** - * - * - * Params - * ------ - * - * - * Returns - * ------- - * - */ -std::vector -_get_fterms(t_schema schema, val j_filters) { - std::vector fvec{}; - std::vector filters = vecFromJSArray(j_filters); - for (auto fidx = 0; fidx < filters.size(); ++fidx) { - std::vector filter = vecFromJSArray(filters[fidx]); - std::string coln = filter[0].as(); - t_filter_op comp = filter[1].as(); - - switch (comp) { - case FILTER_OP_NOT_IN: - case FILTER_OP_IN: { - std::vector terms{}; - std::vector j_terms = vecFromJSArray(filter[2]); - for (auto jidx = 0; jidx < j_terms.size(); ++jidx) { - terms.push_back(mktscalar(get_interned_cstr(j_terms[jidx].c_str()))); - } - fvec.push_back(t_fterm(coln, comp, mktscalar(0), terms)); - } break; - default: { - t_tscalar term; - switch (schema.get_dtype(coln)) { - case DTYPE_INT32: - term = mktscalar(filter[2].as()); - break; - case DTYPE_INT64: - case DTYPE_FLOAT64: - term = mktscalar(filter[2].as()); - break; - case DTYPE_BOOL: - term = mktscalar(filter[2].as()); - break; - case DTYPE_DATE: - term = mktscalar(t_date(filter[2].as())); - break; - case DTYPE_TIME: - term = mktscalar(t_time(static_cast( - filter[2].call("getTime").as()))); - break; - default: { - term - = mktscalar(get_interned_cstr(filter[2].as().c_str())); - } - } - - fvec.push_back(t_fterm(coln, comp, term, std::vector())); - } - } - } - return fvec; -} - -/** - * - * - * Params - * ------ - * - * - * Returns - * ------- - * - */ -std::vector -_get_aggspecs(val j_aggs) { - std::vector aggs = vecFromJSArray(j_aggs); - std::vector aggspecs; - for (auto idx = 0; idx < aggs.size(); ++idx) { - std::vector agg_row = vecFromJSArray(aggs[idx]); - std::string name = agg_row[0].as(); - t_aggtype aggtype = agg_row[1].as(); - - std::vector dependencies; - std::vector deps = vecFromJSArray(agg_row[2]); - for (auto didx = 0; didx < deps.size(); ++didx) { - if (deps[didx].isUndefined()) { - continue; - } - std::string dep = deps[didx].as(); - dependencies.push_back(t_dep(dep, DEPTYPE_COLUMN)); - } - if (aggtype == AGGTYPE_FIRST || aggtype == AGGTYPE_LAST) { - if (dependencies.size() == 1) { - dependencies.push_back(t_dep("psp_pkey", DEPTYPE_COLUMN)); - } - aggspecs.push_back( - t_aggspec(name, name, aggtype, dependencies, SORTTYPE_ASCENDING)); - } else { - aggspecs.push_back(t_aggspec(name, aggtype, dependencies)); - } - } - return aggspecs; -} - -// Date parsing -t_date -jsdate_to_t_date(val date) { - return t_date(date.call("getFullYear").as(), - date.call("getMonth").as(), - date.call("getDate").as()); -} - -val -t_date_to_jsdate(t_date date) { - val jsdate = val::global("Date").new_(); - jsdate.call("setYear", date.year()); - jsdate.call("setMonth", date.month()); - jsdate.call("setDate", date.day()); - jsdate.call("setHours", 0); - jsdate.call("setMinutes", 0); - jsdate.call("setSeconds", 0); - jsdate.call("setMilliseconds", 0); - return jsdate; -} - -/** - * Converts a scalar value to its JS representation. - * - * Params - * ------ - * t_tscalar scalar - * - * Returns - * ------- - * val - */ -val -scalar_to_val(const t_tscalar scalar) { - if (!scalar.is_valid()) { - return val::null(); - } - switch (scalar.get_dtype()) { - case DTYPE_BOOL: { - if (scalar) { - return val(true); - } else { - return val(false); - } - } - case DTYPE_TIME: - case DTYPE_FLOAT64: - case DTYPE_FLOAT32: { - return val(scalar.to_double()); - } - case DTYPE_DATE: { - return t_date_to_jsdate(scalar.get()).call("getTime"); - } - case DTYPE_UINT8: - case DTYPE_UINT16: - case DTYPE_UINT32: - case DTYPE_INT8: - case DTYPE_INT16: - case DTYPE_INT32: { - return val(static_cast(scalar.to_int64())); - } - case DTYPE_UINT64: - case DTYPE_INT64: { - // This could potentially lose precision - return val(static_cast(scalar.to_int64())); - } - case DTYPE_NONE: { - return val::null(); - } - case DTYPE_STR: - default: { - std::wstring_convert converter("", L""); - return val(converter.from_bytes(scalar.to_string())); - } - } -} - -val -scalar_vec_to_val(const std::vector& scalars, std::uint32_t idx) { - return scalar_to_val(scalars[idx]); -} - -/** - * - * - * Params - * ------ - * - * - * Returns - * ------- - * - */ - -namespace arrow { - -void -vecFromTypedArray( - const val& typedArray, void* data, std::int32_t length, const char* destType = nullptr) { - val memory = val::module_property("buffer"); - if (destType == nullptr) { - val memoryView = typedArray["constructor"].new_( - memory, reinterpret_cast(data), length); - memoryView.call("set", typedArray.call("slice", 0, length)); - } else { - val memoryView = val::global(destType).new_( - memory, reinterpret_cast(data), length); - memoryView.call("set", typedArray.call("slice", 0, length)); - } -} - -void -fill_col_valid(val dcol, std::shared_ptr col) { - // dcol should be the Uint8Array containing the null bitmap - t_uindex nrows = col->size(); - - // arrow packs bools into a bitmap - for (auto i = 0; i < nrows; ++i) { - std::uint8_t elem = dcol[i / 8].as(); - bool v = elem & (1 << (i % 8)); - col->set_valid(i, v); - } -} - -void -fill_col_dict(val dictvec, std::shared_ptr col) { - // ptaylor: This assumes the dictionary is either a Binary or Utf8 Vector. Should it support - // other Vector types? - val vdata = dictvec["values"]; - std::int32_t vsize = vdata["length"].as(); - std::vector data; - data.reserve(vsize); - data.resize(vsize); - vecFromTypedArray(vdata, data.data(), vsize); - - val voffsets = dictvec["valueOffsets"]; - std::int32_t osize = voffsets["length"].as(); - std::vector offsets; - offsets.reserve(osize); - offsets.resize(osize); - vecFromTypedArray(voffsets, offsets.data(), osize); - - // Get number of dictionary entries - std::uint32_t dsize = dictvec["length"].as(); - - t_vocab* vocab = col->_get_vocab(); - std::string elem; - - for (std::uint32_t i = 0; i < dsize; ++i) { - std::int32_t bidx = offsets[i]; - std::size_t es = offsets[i + 1] - bidx; - elem.assign(reinterpret_cast(data.data()) + bidx, es); - t_uindex idx = vocab->get_interned(elem); - // Make sure there are no duplicates in the arrow dictionary - assert(idx == i); - } -} -} // namespace arrow - -namespace js_typed_array { -val ArrayBuffer = val::global("ArrayBuffer"); -val Int8Array = val::global("Int8Array"); -val Int16Array = val::global("Int16Array"); -val Int32Array = val::global("Int32Array"); -val Float32Array = val::global("Float32Array"); -val Float64Array = val::global("Float64Array"); -} // namespace js_typed_array - -// Given a column index, serialize data to TypedArray -template -val -col_to_js_typed_array(T ctx, t_index idx) { - std::vector data = ctx->get_data(0, ctx->get_row_count(), idx, idx + 1); - auto dtype = ctx->get_column_dtype(idx); - int data_size = data.size(); - val constructor = val::undefined(); - val sentinel = val::undefined(); - - switch (dtype) { - case DTYPE_INT8: { - data_size *= sizeof(std::int8_t); - sentinel = val(std::numeric_limits::lowest()); - constructor = js_typed_array::Int8Array; - } break; - case DTYPE_INT16: { - data_size *= sizeof(std::int16_t); - sentinel = val(std::numeric_limits::lowest()); - constructor = js_typed_array::Int16Array; - } break; - case DTYPE_INT32: - case DTYPE_INT64: { - // scalar_to_val converts int64 into int32 - data_size *= sizeof(std::int32_t); - sentinel = val(std::numeric_limits::lowest()); - constructor = js_typed_array::Int32Array; - } break; - case DTYPE_FLOAT32: { - data_size *= sizeof(float); - sentinel = val(std::numeric_limits::lowest()); - constructor = js_typed_array::Float32Array; - } break; - case DTYPE_TIME: - case DTYPE_FLOAT64: { - sentinel = val(std::numeric_limits::lowest()); - data_size *= sizeof(double); - constructor = js_typed_array::Float64Array; - } break; - default: - return constructor; - } - - val buffer = js_typed_array::ArrayBuffer.new_(data_size); - val arr = constructor.new_(buffer); - - for (int idx = 0; idx < data.size(); idx++) { - t_tscalar scalar = data[idx]; - if (scalar.get_dtype() == DTYPE_NONE) { - arr.call("fill", sentinel, idx, idx + 1); - } else { - arr.call("fill", scalar_to_val(scalar), idx, idx + 1); - } - } - - return arr; -} - -void -_fill_col_numeric(val accessor, t_table& tbl, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, - bool is_arrow) { - t_uindex nrows = col->size(); - - if (is_arrow) { - val data = accessor["values"]; - - switch (type) { - case DTYPE_INT8: { - arrow::vecFromTypedArray(data, col->get_nth(0), nrows); - } break; - case DTYPE_INT16: { - arrow::vecFromTypedArray(data, col->get_nth(0), nrows); - } break; - case DTYPE_INT32: { - arrow::vecFromTypedArray(data, col->get_nth(0), nrows); - } break; - case DTYPE_FLOAT32: { - arrow::vecFromTypedArray(data, col->get_nth(0), nrows); - } break; - case DTYPE_FLOAT64: { - arrow::vecFromTypedArray(data, col->get_nth(0), nrows); - } break; - default: - break; - } - } else { - for (auto i = 0; i < nrows; ++i) { - val item = accessor.call("marshal", cidx, i, type); - - if (item.isUndefined()) - continue; - - if (item.isNull()) { - col->unset(i); - continue; - } - - switch (type) { - case DTYPE_INT8: { - col->set_nth(i, item.as()); - } break; - case DTYPE_INT16: { - col->set_nth(i, item.as()); - } break; - case DTYPE_INT32: { - // This handles cases where a long sequence of e.g. 0 precedes a clearly - // float value in an inferred column. Would not be needed if the type - // inference checked the entire column/we could reset parsing. - double fval = item.as(); - if (fval > 2147483647 || fval < -2147483648) { - tbl.promote_column(name, DTYPE_FLOAT64, i); - col = tbl.get_column(name); - type = DTYPE_FLOAT64; - col->set_nth(i, fval); - } else { - col->set_nth(i, static_cast(fval)); - } - } break; - case DTYPE_FLOAT32: { - col->set_nth(i, item.as()); - } break; - case DTYPE_FLOAT64: { - col->set_nth(i, item.as()); - } break; - default: - break; - } - } - } -} - -void -_fill_col_int64(val accessor, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, - bool is_arrow) { - t_uindex nrows = col->size(); - - if (is_arrow) { - val data = accessor["values"]; - // arrow packs 64 bit into two 32 bit ints - arrow::vecFromTypedArray(data, col->get_nth(0), nrows * 2); - } else { - PSP_COMPLAIN_AND_ABORT( - "Unreachable - can't have DTYPE_INT64 column from non-arrow data"); - } -} - -void -_fill_col_time(val accessor, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, - bool is_arrow) { - t_uindex nrows = col->size(); - - if (is_arrow) { - val data = accessor["values"]; - // arrow packs 64 bit into two 32 bit ints - arrow::vecFromTypedArray(data, col->get_nth(0), nrows * 2); - - std::int8_t unit = accessor["type"]["unit"].as(); - if (unit != /* Arrow.enum_.TimeUnit.MILLISECOND */ 1) { - // Slow path - need to convert each value - std::int64_t factor = 1; - if (unit == /* Arrow.enum_.TimeUnit.NANOSECOND */ 3) { - factor = 1e6; - } else if (unit == /* Arrow.enum_.TimeUnit.MICROSECOND */ 2) { - factor = 1e3; - } - for (auto i = 0; i < nrows; ++i) { - col->set_nth(i, *(col->get_nth(i)) / factor); - } - } - } else { - for (auto i = 0; i < nrows; ++i) { - val item = accessor.call("marshal", cidx, i, type); - - if (item.isUndefined()) - continue; - - if (item.isNull()) { - col->unset(i); - continue; - } - - auto elem = static_cast( - item.call("getTime").as()); // dcol[i].as(); - col->set_nth(i, elem); - } - } -} - -void -_fill_col_date(val accessor, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, - bool is_arrow) { - t_uindex nrows = col->size(); - - if (is_arrow) { - // val data = dcol["values"]; - // // arrow packs 64 bit into two 32 bit ints - // arrow::vecFromTypedArray(data, col->get_nth(0), nrows * 2); - - // std::int8_t unit = dcol["type"]["unit"].as(); - // if (unit != /* Arrow.enum_.TimeUnit.MILLISECOND */ 1) { - // // Slow path - need to convert each value - // std::int64_t factor = 1; - // if (unit == /* Arrow.enum_.TimeUnit.NANOSECOND */ 3) { - // factor = 1e6; - // } else if (unit == /* Arrow.enum_.TimeUnit.MICROSECOND */ 2) { - // factor = 1e3; - // } - // for (auto i = 0; i < nrows; ++i) { - // col->set_nth(i, *(col->get_nth(i)) / factor); - // } - // } - } else { - for (auto i = 0; i < nrows; ++i) { - val item = accessor.call("marshal", cidx, i, type); - - if (item.isUndefined()) - continue; - - if (item.isNull()) { - col->unset(i); - continue; - } - - col->set_nth(i, jsdate_to_t_date(item)); - } - } -} - -void -_fill_col_bool(val accessor, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, - bool is_arrow) { - t_uindex nrows = col->size(); - - if (is_arrow) { - // arrow packs bools into a bitmap - val data = accessor["values"]; - for (auto i = 0; i < nrows; ++i) { - std::uint8_t elem = data[i / 8].as(); - bool v = elem & (1 << (i % 8)); - col->set_nth(i, v); - } - } else { - for (auto i = 0; i < nrows; ++i) { - val item = accessor.call("marshal", cidx, i, type); - - if (item.isUndefined()) - continue; - - if (item.isNull()) { - col->unset(i); - continue; - } - - auto elem = item.as(); - col->set_nth(i, elem); - } - } -} - -void -_fill_col_string(val accessor, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, - bool is_arrow) { - - t_uindex nrows = col->size(); - - if (is_arrow) { - if (accessor["constructor"]["name"].as() == "DictionaryVector") { - - val dictvec = accessor["dictionary"]; - arrow::fill_col_dict(dictvec, col); - - // Now process index into dictionary - - // Perspective stores string indices in a 32bit unsigned array - // Javascript's typed arrays handle copying from various bitwidth arrays properly - val vkeys = accessor["indices"]["values"]; - arrow::vecFromTypedArray(vkeys, col->get_nth(0), nrows, "Uint32Array"); - - } else if (accessor["constructor"]["name"].as() == "Utf8Vector" - || accessor["constructor"]["name"].as() == "BinaryVector") { - - val vdata = accessor["values"]; - std::int32_t vsize = vdata["length"].as(); - std::vector data; - data.reserve(vsize); - data.resize(vsize); - arrow::vecFromTypedArray(vdata, data.data(), vsize); - - val voffsets = accessor["valueOffsets"]; - std::int32_t osize = voffsets["length"].as(); - std::vector offsets; - offsets.reserve(osize); - offsets.resize(osize); - arrow::vecFromTypedArray(voffsets, offsets.data(), osize); - - std::string elem; - - for (std::int32_t i = 0; i < nrows; ++i) { - std::int32_t bidx = offsets[i]; - std::size_t es = offsets[i + 1] - bidx; - elem.assign(reinterpret_cast(data.data()) + bidx, es); - col->set_nth(i, elem); - } - } - } else { - for (auto i = 0; i < nrows; ++i) { - val item = accessor.call("marshal", cidx, i, type); - - if (item.isUndefined()) - continue; - - if (item.isNull()) { - col->unset(i); - continue; - } - - std::wstring welem = item.as(); - std::wstring_convert converter; - std::string elem = converter.to_bytes(welem); - col->set_nth(i, elem); - } - } -} - -/** - * Fills the table with data from Javascript. - * - * Params - * ------ - * tbl - pointer to the table object - * ocolnames - vector of column names - * accessor - the JS data accessor interface - * odt - vector of data types - * offset - * is_arrow - flag for arrow data - * - * Returns - * ------- - * - */ -void -_fill_data(t_table& tbl, std::vector ocolnames, val accessor, - std::vector odt, std::uint32_t offset, bool is_arrow) { - - for (auto cidx = 0; cidx < ocolnames.size(); ++cidx) { - auto name = ocolnames[cidx]; - auto col = tbl.get_column(name); - auto col_type = odt[cidx]; - - val dcol = val::undefined(); - - if (is_arrow) { - dcol = accessor["cdata"][cidx]; - } else { - dcol = accessor; - } - - switch (col_type) { - case DTYPE_INT64: { - _fill_col_int64(dcol, col, name, cidx, col_type, is_arrow); - } break; - case DTYPE_BOOL: { - _fill_col_bool(dcol, col, name, cidx, col_type, is_arrow); - } break; - case DTYPE_DATE: { - _fill_col_date(dcol, col, name, cidx, col_type, is_arrow); - } break; - case DTYPE_TIME: { - _fill_col_time(dcol, col, name, cidx, col_type, is_arrow); - } break; - case DTYPE_STR: { - _fill_col_string(dcol, col, name, cidx, col_type, is_arrow); - } break; - case DTYPE_NONE: { - break; - } - default: - _fill_col_numeric(dcol, tbl, col, name, cidx, col_type, is_arrow); - } - - if (is_arrow) { - // Fill validity bitmap - std::uint32_t null_count = dcol["nullCount"].as(); - - if (null_count == 0) { - col->valid_raw_fill(); - } else { - val validity = dcol["nullBitmap"]; - arrow::fill_col_valid(validity, col); - } - } - } -} - -/****************************************************************************** - * - * Public - */ - -void -set_column_nth(t_column* col, t_uindex idx, val value) { - - // Check if the value is a javascript null - if (value.isNull()) { - col->unset(idx); - return; - } - - switch (col->get_dtype()) { - case DTYPE_BOOL: { - col->set_nth(idx, value.as(), STATUS_VALID); - break; - } - case DTYPE_FLOAT64: { - col->set_nth(idx, value.as(), STATUS_VALID); - break; - } - case DTYPE_FLOAT32: { - col->set_nth(idx, value.as(), STATUS_VALID); - break; - } - case DTYPE_UINT32: { - col->set_nth(idx, value.as(), STATUS_VALID); - break; - } - case DTYPE_UINT64: { - col->set_nth(idx, value.as(), STATUS_VALID); - break; - } - case DTYPE_INT32: { - col->set_nth(idx, value.as(), STATUS_VALID); - break; - } - case DTYPE_INT64: { - col->set_nth(idx, value.as(), STATUS_VALID); - break; - } - case DTYPE_STR: { - std::wstring welem = value.as(); - - std::wstring_convert converter; - std::string elem = converter.to_bytes(welem); - col->set_nth(idx, elem, STATUS_VALID); - break; - } - case DTYPE_DATE: { - col->set_nth(idx, jsdate_to_t_date(value), STATUS_VALID); - break; - } - case DTYPE_TIME: { - col->set_nth( - idx, static_cast(value.as()), STATUS_VALID); - break; - } - case DTYPE_UINT8: - case DTYPE_UINT16: - case DTYPE_INT8: - case DTYPE_INT16: - default: { - // Other types not implemented - } - } -} - -/** - * Helper function for computed columns - * - * Params - * ------ - * - * - * Returns - * ------- - * - */ -void -table_add_computed_column(t_table& table, val computed_defs) { - auto vcomputed_defs = vecFromJSArray(computed_defs); - for (auto i = 0; i < vcomputed_defs.size(); ++i) { - val coldef = vcomputed_defs[i]; - std::string name = coldef["column"].as(); - val inputs = coldef["inputs"]; - val func = coldef["func"]; - val type = coldef["type"]; - - std::string stype; - - if (type.isUndefined()) { - stype = "string"; - } else { - stype = type.as(); - } - - t_dtype dtype; - if (stype == "integer") { - dtype = DTYPE_INT32; - } else if (stype == "float") { - dtype = DTYPE_FLOAT64; - } else if (stype == "boolean") { - dtype = DTYPE_BOOL; - } else if (stype == "date") { - dtype = DTYPE_DATE; - } else if (stype == "datetime") { - dtype = DTYPE_TIME; - } else { - dtype = DTYPE_STR; - } - - // Get list of input column names - auto icol_names = vecFromJSArray(inputs); - - // Get t_column* for all input columns - std::vector icols; - for (const auto& cc : icol_names) { - icols.push_back(table._get_column(cc)); - } - - int arity = icols.size(); - - // Add new column - t_column* out = table.add_column(name, dtype, true); - - val i1 = val::undefined(), i2 = val::undefined(), i3 = val::undefined(), - i4 = val::undefined(); - - t_uindex size = table.size(); - for (t_uindex ridx = 0; ridx < size; ++ridx) { - val value = val::undefined(); - - switch (arity) { - case 0: { - value = func(); - break; - } - case 1: { - i1 = scalar_to_val(icols[0]->get_scalar(ridx)); - if (!i1.isNull()) { - value = func(i1); - } - break; - } - case 2: { - i1 = scalar_to_val(icols[0]->get_scalar(ridx)); - i2 = scalar_to_val(icols[1]->get_scalar(ridx)); - if (!i1.isNull() && !i2.isNull()) { - value = func(i1, i2); - } - break; - } - case 3: { - i1 = scalar_to_val(icols[0]->get_scalar(ridx)); - i2 = scalar_to_val(icols[1]->get_scalar(ridx)); - i3 = scalar_to_val(icols[2]->get_scalar(ridx)); - if (!i1.isNull() && !i2.isNull() && !i3.isNull()) { - value = func(i1, i2, i3); - } - break; - } - case 4: { - i1 = scalar_to_val(icols[0]->get_scalar(ridx)); - i2 = scalar_to_val(icols[1]->get_scalar(ridx)); - i3 = scalar_to_val(icols[2]->get_scalar(ridx)); - i4 = scalar_to_val(icols[3]->get_scalar(ridx)); - if (!i1.isNull() && !i2.isNull() && !i3.isNull() && !i4.isNull()) { - value = func(i1, i2, i3, i4); - } - break; - } - default: { - // Don't handle other arity values - break; - } - } - - if (!value.isUndefined()) { - set_column_nth(out, ridx, value); - } - } - } -} - -/** - * DataAccessor - * - * parses and converts input data into a canonical format for - * interfacing with Perspective. - */ - -// Name parsing -std::vector -column_names(val data, std::int32_t format) { - std::vector names; - val Object = val::global("Object"); - - if (format == 0) { - std::int32_t max_check = 50; - val data_names = Object.call("keys", data[0]); - std::int32_t check_index = std::min(max_check, data["length"].as()); - - for (auto ix = 0; ix < check_index; ix++) { - val next = Object.call("keys", data[ix]); - if (data_names["length"] != next["length"]) { - if (max_check == 50) { - std::cout << "Data parse warning: Array data has inconsistent rows" - << std::endl; - } - - std::cout << boost::format("Extending from %d to %d") - % data_names["length"].as() - % next["length"].as() - << std::endl; - data_names = next; - max_check *= 2; - } - - names = vecFromJSArray(data_names); - } - } else if (format == 1 || format == 2) { - names = vecFromJSArray(Object.call("keys", data)); - } - - return names; -} - -// Type inferrence for fill_col and data_types -t_dtype -infer_type(val x, val date_validator) { - std::string jstype = x.typeOf().as(); - t_dtype t = t_dtype::DTYPE_STR; - - // Unwrap numbers inside strings - val x_number = val::global("Number").call("call", val::object(), x); - bool number_in_string = (jstype == "string") - && (x["length"].as() != 0) - && (!val::global("isNaN") - .call("call", val::object(), x_number)); - - if (x.isNull()) { - t = t_dtype::DTYPE_NONE; - } else if (jstype == "number" || number_in_string) { - if (number_in_string) { - x = x_number; - } - double x_float64 = x.as(); - if ((std::fmod(x_float64, 1.0) == 0.0) && (x_float64 < 10000.0) && (x_float64 != 0.0)) { - t = t_dtype::DTYPE_INT32; - } else { - t = t_dtype::DTYPE_FLOAT64; - } - } else if (jstype == "boolean") { - t = t_dtype::DTYPE_BOOL; - } else if (x.instanceof (val::global("Date"))) { - std::int32_t hours = x.call("getHours").as(); - std::int32_t minutes = x.call("getMinutes").as(); - std::int32_t seconds = x.call("getSeconds").as(); - std::int32_t milliseconds = x.call("getMilliseconds").as(); - - if (hours == 0 && minutes == 0 && seconds == 0 && milliseconds == 0) { - t = t_dtype::DTYPE_DATE; - } else { - t = t_dtype::DTYPE_TIME; - } - } else if (jstype == "string") { - if (date_validator.call("call", val::object(), x).as()) { - t = t_dtype::DTYPE_TIME; - } else { - std::string lower = x.call("toLowerCase").as(); - if (lower == "true" || lower == "false") { - t = t_dtype::DTYPE_BOOL; - } else { - t = t_dtype::DTYPE_STR; - } - } - } - - return t; -} - -t_dtype -get_data_type(val data, std::int32_t format, std::string name, val date_validator) { - std::int32_t i = 0; - boost::optional inferredType; - - if (format == 0) { - // loop parameters differ slightly so rewrite the loop - while (!inferredType.is_initialized() && i < 100 - && i < data["length"].as()) { - if (data[i].call("hasOwnProperty", name).as() == true) { - if (!data[i][name].isNull()) { - inferredType = infer_type(data[i][name], date_validator); - } else { - inferredType = t_dtype::DTYPE_STR; - } - } - - i++; - } - } else if (format == 1) { - while (!inferredType.is_initialized() && i < 100 - && i < data[name]["length"].as()) { - if (!data[name][i].isNull()) { - inferredType = infer_type(data[name][i], date_validator); - } else { - inferredType = t_dtype::DTYPE_STR; - } - - i++; - } - } - - if (!inferredType.is_initialized()) { - return t_dtype::DTYPE_STR; - } else { - return inferredType.get(); - } -} - -std::vector -data_types(val data, std::int32_t format, std::vector names, val date_validator) { - if (names.size() == 0) { - PSP_COMPLAIN_AND_ABORT("Cannot determine data types without column names!"); - } - - std::vector types; - - if (format == 2) { - std::vector data_names - = vecFromJSArray(val::global("Object").call("keys", data)); - - for (std::vector::iterator name = data_names.begin(); - name != data_names.end(); ++name) { - std::string value = data[*name].as(); - t_dtype type; - - if (value == "integer") { - type = t_dtype::DTYPE_INT32; - } else if (value == "float") { - type = t_dtype::DTYPE_FLOAT64; - } else if (value == "string") { - type = t_dtype::DTYPE_STR; - } else if (value == "boolean") { - type = t_dtype::DTYPE_BOOL; - } else if (value == "datetime") { - type = t_dtype::DTYPE_TIME; - } else if (value == "date") { - type = t_dtype::DTYPE_DATE; - } else { - PSP_COMPLAIN_AND_ABORT("Unknown type '" + value + "' for key '" + *name + "'"); - } - - types.push_back(type); - } - - return types; - } else { - for (std::vector::iterator name = names.begin(); name != names.end(); - ++name) { - t_dtype type = get_data_type(data, format, *name, date_validator); - types.push_back(type); - } - } - - return types; -} - -/** - * Create a default gnode. - * - * Params - * ------ - * j_colnames - a JS Array of column names. - * j_dtypes - a JS Array of column types. - * - * Returns - * ------- - * A gnode. - */ -std::shared_ptr -make_gnode(const t_table& table) { - auto iscm = table.get_schema(); - - std::vector ocolnames(iscm.columns()); - std::vector odt(iscm.types()); - - if (iscm.has_column("psp_pkey")) { - t_uindex idx = iscm.get_colidx("psp_pkey"); - ocolnames.erase(ocolnames.begin() + idx); - odt.erase(odt.begin() + idx); - } - - if (iscm.has_column("psp_op")) { - t_uindex idx = iscm.get_colidx("psp_op"); - ocolnames.erase(ocolnames.begin() + idx); - odt.erase(odt.begin() + idx); - } - - t_schema oscm(ocolnames, odt); - - // Create a gnode - auto gnode = std::make_shared(oscm, iscm); - gnode->init(); - - return gnode; -} -/** - * Create a populated table. - * - * Params - * ------ - * chunk - a JS object containing parsed data and associated metadata - * offset - * limit - * index - * is_delete - sets the table operation - * - * Returns - * ------- - * a populated table. - */ -std::shared_ptr -make_table(t_pool* pool, val gnode, val accessor, val computed, std::uint32_t offset, - std::uint32_t limit, std::string index, bool is_update, bool is_delete, bool is_arrow) { - std::uint32_t size = accessor["row_count"].as(); - - std::vector colnames; - std::vector dtypes; - - // Determine metadata - if (is_arrow || (is_update || is_delete)) { - // TODO: fully remove intermediate passed-through JS arrays for non-arrow data - colnames = vecFromJSArray(accessor["names"]); - dtypes = vecFromJSArray(accessor["types"]); - } else { - // Infer names and types - val data = accessor["data"]; - std::int32_t format = accessor["format"].as(); - colnames = column_names(data, format); - dtypes = data_types(data, format, colnames, accessor["date_validator"]); - } - - // Check if index is valid after getting column names - bool valid_index = std::find(colnames.begin(), colnames.end(), index) != colnames.end(); - if (index != "" && !valid_index) { - PSP_COMPLAIN_AND_ABORT("Specified index '" + index + "' does not exist in data.") - } - - // Create the table - // TODO assert size > 0 - t_table tbl(t_schema(colnames, dtypes)); - tbl.init(); - tbl.extend(size); - - _fill_data(tbl, colnames, accessor, dtypes, offset, is_arrow); - - // Set up pkey and op columns - if (is_delete) { - auto op_col = tbl.add_column("psp_op", DTYPE_UINT8, false); - op_col->raw_fill(OP_DELETE); - } else { - auto op_col = tbl.add_column("psp_op", DTYPE_UINT8, false); - op_col->raw_fill(OP_INSERT); - } - - if (index == "") { - // If user doesn't specify an column to use as the pkey index, just use - // row number - auto key_col = tbl.add_column("psp_pkey", DTYPE_INT32, true); - auto okey_col = tbl.add_column("psp_okey", DTYPE_INT32, true); - - for (auto ridx = 0; ridx < tbl.size(); ++ridx) { - key_col->set_nth(ridx, (ridx + offset) % limit); - okey_col->set_nth(ridx, (ridx + offset) % limit); - } - } else { - tbl.clone_column(index, "psp_pkey"); - tbl.clone_column(index, "psp_okey"); - } - - std::shared_ptr new_gnode; - - if (gnode.isUndefined()) { - new_gnode = make_gnode(tbl); - pool->register_gnode(new_gnode.get()); - } else { - new_gnode = gnode.as>(); - } - - if (!computed.isUndefined()) { - table_add_computed_column(tbl, computed); - } - - pool->send(new_gnode->get_id(), 0, tbl); - pool->_process(); - - return new_gnode; -} - -/** - * Copies the internal table from a gnode - * - * Params - * ------ - * - * Returns - * ------- - * A gnode. - */ -std::shared_ptr -clone_gnode_table(t_pool* pool, std::shared_ptr gnode, val computed) { - t_table* tbl = gnode->_get_pkeyed_table(); - table_add_computed_column(*tbl, computed); - std::shared_ptr new_gnode = make_gnode(*tbl); - pool->register_gnode(new_gnode.get()); - pool->send(new_gnode->get_id(), 0, *tbl); - pool->_process(); - return new_gnode; -} - -/** - * - * - * Params - * ------ - * - * - * Returns - * ------- - * - */ -std::shared_ptr -make_context_zero(t_schema schema, t_filter_op combiner, val j_filters, val j_columns, - val j_sortby, t_pool* pool, std::shared_ptr gnode, std::string name) { - auto columns = vecFromJSArray(j_columns); - auto fvec = _get_fterms(schema, j_filters); - auto svec = _get_sort(j_sortby); - auto cfg = t_config(columns, combiner, fvec); - auto ctx0 = std::make_shared(schema, cfg); - ctx0->init(); - ctx0->sort_by(svec); - pool->register_context(gnode->get_id(), name, ZERO_SIDED_CONTEXT, - reinterpret_cast(ctx0.get())); - return ctx0; -} - -/** - * - * - * Params - * ------ - * - * - * Returns - * ------- - * - */ -std::shared_ptr -make_context_one(t_schema schema, val j_pivots, t_filter_op combiner, val j_filters, val j_aggs, - val j_sortby, t_pool* pool, std::shared_ptr gnode, std::string name) { - auto fvec = _get_fterms(schema, j_filters); - auto aggspecs = _get_aggspecs(j_aggs); - auto pivots = vecFromJSArray(j_pivots); - auto svec = _get_sort(j_sortby); - - auto cfg = t_config(pivots, aggspecs, combiner, fvec); - auto ctx1 = std::make_shared(schema, cfg); - - ctx1->init(); - ctx1->sort_by(svec); - pool->register_context( - gnode->get_id(), name, ONE_SIDED_CONTEXT, reinterpret_cast(ctx1.get())); - return ctx1; -} - -/** - * - * - * Params - * ------ - * - * - * Returns - * ------- - * - */ -std::shared_ptr -make_context_two(t_schema schema, val j_rpivots, val j_cpivots, t_filter_op combiner, - val j_filters, val j_aggs, bool show_totals, t_pool* pool, std::shared_ptr gnode, - std::string name) { - auto fvec = _get_fterms(schema, j_filters); - auto aggspecs = _get_aggspecs(j_aggs); - auto rpivots = vecFromJSArray(j_rpivots); - auto cpivots = vecFromJSArray(j_cpivots); - t_totals total = show_totals ? TOTALS_BEFORE : TOTALS_HIDDEN; - - auto cfg = t_config(rpivots, cpivots, aggspecs, total, combiner, fvec); - auto ctx2 = std::make_shared(schema, cfg); - - ctx2->init(); - pool->register_context( - gnode->get_id(), name, TWO_SIDED_CONTEXT, reinterpret_cast(ctx2.get())); - return ctx2; -} - -void -sort(std::shared_ptr ctx2, val j_sortby, val j_column_sortby) { - auto svec = _get_sort(j_sortby); - if (svec.size() > 0) { - ctx2->sort_by(svec); - } - ctx2->column_sort_by(_get_sort(j_column_sortby)); -} - -val -get_column_data(std::shared_ptr table, std::string colname) { - val arr = val::array(); - auto col = table->get_column(colname); - for (auto idx = 0; idx < col->size(); ++idx) { - arr.set(idx, scalar_to_val(col->get_scalar(idx))); - } - return arr; -} - -/** - * - * - * Params - * ------ - * - * - * Returns - * ------- - * - */ -template -val -get_data(T ctx, std::uint32_t start_row, std::uint32_t end_row, std::uint32_t start_col, - std::uint32_t end_col) { - auto slice = ctx->get_data(start_row, end_row, start_col, end_col); - val arr = val::array(); - for (auto idx = 0; idx < slice.size(); ++idx) { - arr.set(idx, scalar_to_val(slice[idx])); - } - return arr; -} - -val -get_data_two_skip_headers(std::shared_ptr ctx, std::uint32_t depth, - std::uint32_t start_row, std::uint32_t end_row, std::uint32_t start_col, - std::uint32_t end_col) { - auto col_length = ctx->unity_get_column_count(); - std::vector col_nums; - col_nums.push_back(0); - for (t_uindex i = 0; i < col_length; ++i) { - if (ctx->unity_get_column_path(i + 1).size() == depth) { - col_nums.push_back(i + 1); - } - } - col_nums = std::vector(col_nums.begin() + start_col, - col_nums.begin() + std::min(end_col, (std::uint32_t)col_nums.size())); - auto slice = ctx->get_data(start_row, end_row, col_nums.front(), col_nums.back() + 1); - val arr = val::array(); - t_uindex i = 0; - auto iter = slice.begin(); - while (iter != slice.end()) { - t_uindex prev = col_nums.front(); - for (auto idx = col_nums.begin(); idx != col_nums.end(); idx++, i++) { - t_uindex col_num = *idx; - iter += col_num - prev; - prev = col_num; - arr.set(i, scalar_to_val(*iter)); - } - if (iter != slice.end()) - iter++; - } - return arr; -} /** * Main */ int main(int argc, char** argv) { std::cout << "Perspective initialized successfully" << std::endl; - - // clang-format off - EM_ASM({ - - if (typeof self !== "undefined") { - if (self.dispatchEvent && !self._perspective_initialized && self.document) { - self._perspective_initialized = true; - var event = self.document.createEvent("Event"); - event.initEvent("perspective-ready", false, true); - self.dispatchEvent(event); - } else if (!self.document && self.postMessage) { - self.postMessage({}); - } - } - - }); - // clang-format on -} - -/****************************************************************************** - * - * Embind - */ - -EMSCRIPTEN_BINDINGS(perspective) { - class_("t_column") - .smart_ptr>("shared_ptr") - .function("set_scalar", &t_column::set_scalar); - - class_("t_table") - .constructor() - .smart_ptr>("shared_ptr") - .function("add_column", &t_table::add_column, allow_raw_pointers()) - .function("pprint", &t_table::pprint) - .function( - "size", reinterpret_cast(&t_table::size)); - - class_("t_schema") - .function&>( - "columns", &t_schema::columns, allow_raw_pointers()) - .function>("types", &t_schema::types, allow_raw_pointers()); - - class_("t_gnode") - .constructor&, - const std::vector&, const std::vector&>() - .smart_ptr>("shared_ptr") - .function( - "get_id", reinterpret_cast(&t_gnode::get_id)) - .function("get_tblschema", &t_gnode::get_tblschema) - .function("get_table", &t_gnode::get_table, allow_raw_pointers()); - - class_("t_ctx0") - .constructor() - .smart_ptr>("shared_ptr") - .function("sidedness", &t_ctx0::sidedness) - .function("get_row_count", - reinterpret_cast(&t_ctx0::get_row_count)) - .function("get_column_count", - reinterpret_cast(&t_ctx0::get_column_count)) - .function>("get_data", &t_ctx0::get_data) - .function("get_step_delta", &t_ctx0::get_step_delta) - .function>("get_cell_delta", &t_ctx0::get_cell_delta) - .function>("get_column_names", &t_ctx0::get_column_names) - // .function>("get_min_max", &t_ctx0::get_min_max) - // .function("set_minmax_enabled", &t_ctx0::set_minmax_enabled) - .function>("unity_get_row_data", &t_ctx0::unity_get_row_data) - .function>( - "unity_get_column_data", &t_ctx0::unity_get_column_data) - .function>("unity_get_row_path", &t_ctx0::unity_get_row_path) - .function>( - "unity_get_column_path", &t_ctx0::unity_get_column_path) - .function("unity_get_row_depth", &t_ctx0::unity_get_row_depth) - .function("unity_get_column_depth", &t_ctx0::unity_get_column_depth) - .function("unity_get_column_name", &t_ctx0::unity_get_column_name) - .function( - "unity_get_column_display_name", &t_ctx0::unity_get_column_display_name) - .function>( - "unity_get_column_names", &t_ctx0::unity_get_column_names) - .function>( - "unity_get_column_display_names", &t_ctx0::unity_get_column_display_names) - .function("unity_get_column_count", &t_ctx0::unity_get_column_count) - .function("unity_get_row_count", &t_ctx0::unity_get_row_count) - .function("unity_get_row_expanded", &t_ctx0::unity_get_row_expanded) - .function("unity_get_column_expanded", &t_ctx0::unity_get_column_expanded) - .function("unity_init_load_step_end", &t_ctx0::unity_init_load_step_end); - - class_("t_ctx1") - .constructor() - .smart_ptr>("shared_ptr") - .function("sidedness", &t_ctx1::sidedness) - .function("get_row_count", - reinterpret_cast(&t_ctx1::get_row_count)) - .function("get_column_count", - reinterpret_cast(&t_ctx1::get_column_count)) - .function>("get_data", &t_ctx1::get_data) - .function("get_step_delta", &t_ctx1::get_step_delta) - .function>("get_cell_delta", &t_ctx1::get_cell_delta) - .function("set_depth", &t_ctx1::set_depth) - .function("open", select_overload(&t_ctx1::open)) - .function("close", select_overload(&t_ctx1::close)) - .function("get_trav_depth", &t_ctx1::get_trav_depth) - .function>("get_column_names", &t_ctx1::get_aggregates) - .function>("unity_get_row_data", &t_ctx1::unity_get_row_data) - .function>( - "unity_get_column_data", &t_ctx1::unity_get_column_data) - .function>("unity_get_row_path", &t_ctx1::unity_get_row_path) - .function>( - "unity_get_column_path", &t_ctx1::unity_get_column_path) - .function("unity_get_row_depth", &t_ctx1::unity_get_row_depth) - .function("unity_get_column_depth", &t_ctx1::unity_get_column_depth) - .function("unity_get_column_name", &t_ctx1::unity_get_column_name) - .function( - "unity_get_column_display_name", &t_ctx1::unity_get_column_display_name) - .function>( - "unity_get_column_names", &t_ctx1::unity_get_column_names) - .function>( - "unity_get_column_display_names", &t_ctx1::unity_get_column_display_names) - .function("unity_get_column_count", &t_ctx1::unity_get_column_count) - .function("unity_get_row_count", &t_ctx1::unity_get_row_count) - .function("unity_get_row_expanded", &t_ctx1::unity_get_row_expanded) - .function("unity_get_column_expanded", &t_ctx1::unity_get_column_expanded) - .function("unity_init_load_step_end", &t_ctx1::unity_init_load_step_end); - - class_("t_ctx2") - .constructor() - .smart_ptr>("shared_ptr") - .function("sidedness", &t_ctx2::sidedness) - .function("get_row_count", - reinterpret_cast( - select_overload(&t_ctx2::get_row_count))) - .function("get_column_count", - reinterpret_cast(&t_ctx2::get_column_count)) - .function>("get_data", &t_ctx2::get_data) - .function("get_step_delta", &t_ctx2::get_step_delta) - //.function>("get_cell_delta", &t_ctx2::get_cell_delta) - .function("set_depth", &t_ctx2::set_depth) - .function("open", select_overload(&t_ctx2::open)) - .function("close", select_overload(&t_ctx2::close)) - .function>("get_column_names", &t_ctx2::get_aggregates) - .function>("unity_get_row_data", &t_ctx2::unity_get_row_data) - .function>( - "unity_get_column_data", &t_ctx2::unity_get_column_data) - .function>("unity_get_row_path", &t_ctx2::unity_get_row_path) - .function>( - "unity_get_column_path", &t_ctx2::unity_get_column_path) - .function("unity_get_row_depth", &t_ctx2::unity_get_row_depth) - .function("unity_get_column_depth", &t_ctx2::unity_get_column_depth) - .function("unity_get_column_name", &t_ctx2::unity_get_column_name) - .function( - "unity_get_column_display_name", &t_ctx2::unity_get_column_display_name) - .function>( - "unity_get_column_names", &t_ctx2::unity_get_column_names) - .function>( - "unity_get_column_display_names", &t_ctx2::unity_get_column_display_names) - .function("unity_get_column_count", &t_ctx2::unity_get_column_count) - .function("unity_get_row_count", &t_ctx2::unity_get_row_count) - .function("unity_get_row_expanded", &t_ctx2::unity_get_row_expanded) - .function("unity_get_column_expanded", &t_ctx2::unity_get_column_expanded) - .function("unity_init_load_step_end", &t_ctx2::unity_init_load_step_end) - .function("get_totals", &t_ctx2::get_totals) - .function>( - "get_column_path_userspace", &t_ctx2::get_column_path_userspace) - .function("unity_init_load_step_end", &t_ctx2::unity_init_load_step_end); - - class_("t_pool") - .constructor<>() - .smart_ptr>("shared_ptr") - .function("register_gnode", &t_pool::register_gnode, allow_raw_pointers()) - .function("process", &t_pool::_process) - .function("send", &t_pool::send) - .function("epoch", &t_pool::epoch) - .function("unregister_gnode", &t_pool::unregister_gnode) - .function("set_update_delegate", &t_pool::set_update_delegate) - .function("register_context", &t_pool::register_context) - .function("unregister_context", &t_pool::unregister_context) - .function>( - "get_contexts_last_updated", &t_pool::get_contexts_last_updated) - .function>( - "get_gnodes_last_updated", &t_pool::get_gnodes_last_updated) - .function("get_gnode", &t_pool::get_gnode, allow_raw_pointers()); - - class_("t_aggspec").function("name", &t_aggspec::name); - - class_("t_tscalar"); - - value_object("t_updctx") - .field("gnode_id", &t_updctx::m_gnode_id) - .field("ctx_name", &t_updctx::m_ctx); - - value_object("t_cellupd") - .field("row", &t_cellupd::row) - .field("column", &t_cellupd::column) - .field("old_value", &t_cellupd::old_value) - .field("new_value", &t_cellupd::new_value); - - value_object("t_stepdelta") - .field("rows_changed", &t_stepdelta::rows_changed) - .field("columns_changed", &t_stepdelta::columns_changed) - .field("cells", &t_stepdelta::cells); - - register_vector("std::vector"); - register_vector("std::vector"); - register_vector("std::vector"); - register_vector("std::vector"); - register_vector("std::vector"); - register_vector("std::vector"); - register_vector("std::vector"); - - enum_("t_header") - .value("HEADER_ROW", HEADER_ROW) - .value("HEADER_COLUMN", HEADER_COLUMN); - - enum_("t_ctx_type") - .value("ZERO_SIDED_CONTEXT", ZERO_SIDED_CONTEXT) - .value("ONE_SIDED_CONTEXT", ONE_SIDED_CONTEXT) - .value("TWO_SIDED_CONTEXT", TWO_SIDED_CONTEXT) - .value("GROUPED_ZERO_SIDED_CONTEXT", GROUPED_ZERO_SIDED_CONTEXT) - .value("GROUPED_PKEY_CONTEXT", GROUPED_PKEY_CONTEXT) - .value("GROUPED_COLUMNS_CONTEXT", GROUPED_COLUMNS_CONTEXT); - - enum_("t_filter_op") - .value("FILTER_OP_LT", FILTER_OP_LT) - .value("FILTER_OP_LTEQ", FILTER_OP_LTEQ) - .value("FILTER_OP_GT", FILTER_OP_GT) - .value("FILTER_OP_GTEQ", FILTER_OP_GTEQ) - .value("FILTER_OP_EQ", FILTER_OP_EQ) - .value("FILTER_OP_NE", FILTER_OP_NE) - .value("FILTER_OP_BEGINS_WITH", FILTER_OP_BEGINS_WITH) - .value("FILTER_OP_ENDS_WITH", FILTER_OP_ENDS_WITH) - .value("FILTER_OP_CONTAINS", FILTER_OP_CONTAINS) - .value("FILTER_OP_OR", FILTER_OP_OR) - .value("FILTER_OP_IN", FILTER_OP_IN) - .value("FILTER_OP_NOT_IN", FILTER_OP_NOT_IN) - .value("FILTER_OP_AND", FILTER_OP_AND) - .value("FILTER_OP_IS_NAN", FILTER_OP_IS_NAN) - .value("FILTER_OP_IS_NOT_NAN", FILTER_OP_IS_NOT_NAN) - .value("FILTER_OP_IS_VALID", FILTER_OP_IS_VALID) - .value("FILTER_OP_IS_NOT_VALID", FILTER_OP_IS_NOT_VALID); - - enum_("t_dtype") - .value("DTYPE_NONE", DTYPE_NONE) - .value("DTYPE_INT64", DTYPE_INT64) - .value("DTYPE_INT32", DTYPE_INT32) - .value("DTYPE_INT16", DTYPE_INT16) - .value("DTYPE_INT8", DTYPE_INT8) - .value("DTYPE_UINT64", DTYPE_UINT64) - .value("DTYPE_UINT32", DTYPE_UINT32) - .value("DTYPE_UINT16", DTYPE_UINT16) - .value("DTYPE_UINT8", DTYPE_UINT8) - .value("DTYPE_FLOAT64", DTYPE_FLOAT64) - .value("DTYPE_FLOAT32", DTYPE_FLOAT32) - .value("DTYPE_BOOL", DTYPE_BOOL) - .value("DTYPE_TIME", DTYPE_TIME) - .value("DTYPE_DATE", DTYPE_DATE) - .value("DTYPE_ENUM", DTYPE_ENUM) - .value("DTYPE_OID", DTYPE_OID) - .value("DTYPE_PTR", DTYPE_PTR) - .value("DTYPE_F64PAIR", DTYPE_F64PAIR) - .value("DTYPE_USER_FIXED", DTYPE_USER_FIXED) - .value("DTYPE_STR", DTYPE_STR) - .value("DTYPE_USER_VLEN", DTYPE_USER_VLEN) - .value("DTYPE_LAST_VLEN", DTYPE_LAST_VLEN) - .value("DTYPE_LAST", DTYPE_LAST); - - enum_("t_aggtype") - .value("AGGTYPE_SUM", AGGTYPE_SUM) - .value("AGGTYPE_MUL", AGGTYPE_MUL) - .value("AGGTYPE_COUNT", AGGTYPE_COUNT) - .value("AGGTYPE_MEAN", AGGTYPE_MEAN) - .value("AGGTYPE_WEIGHTED_MEAN", AGGTYPE_WEIGHTED_MEAN) - .value("AGGTYPE_UNIQUE", AGGTYPE_UNIQUE) - .value("AGGTYPE_ANY", AGGTYPE_ANY) - .value("AGGTYPE_MEDIAN", AGGTYPE_MEDIAN) - .value("AGGTYPE_JOIN", AGGTYPE_JOIN) - .value("AGGTYPE_SCALED_DIV", AGGTYPE_SCALED_DIV) - .value("AGGTYPE_SCALED_ADD", AGGTYPE_SCALED_ADD) - .value("AGGTYPE_SCALED_MUL", AGGTYPE_SCALED_MUL) - .value("AGGTYPE_DOMINANT", AGGTYPE_DOMINANT) - .value("AGGTYPE_FIRST", AGGTYPE_FIRST) - .value("AGGTYPE_LAST", AGGTYPE_LAST) - .value("AGGTYPE_PY_AGG", AGGTYPE_PY_AGG) - .value("AGGTYPE_AND", AGGTYPE_AND) - .value("AGGTYPE_OR", AGGTYPE_OR) - .value("AGGTYPE_LAST_VALUE", AGGTYPE_LAST_VALUE) - .value("AGGTYPE_HIGH_WATER_MARK", AGGTYPE_HIGH_WATER_MARK) - .value("AGGTYPE_LOW_WATER_MARK", AGGTYPE_LOW_WATER_MARK) - .value("AGGTYPE_UDF_COMBINER", AGGTYPE_UDF_COMBINER) - .value("AGGTYPE_UDF_REDUCER", AGGTYPE_UDF_REDUCER) - .value("AGGTYPE_SUM_ABS", AGGTYPE_SUM_ABS) - .value("AGGTYPE_SUM_NOT_NULL", AGGTYPE_SUM_NOT_NULL) - .value("AGGTYPE_MEAN_BY_COUNT", AGGTYPE_MEAN_BY_COUNT) - .value("AGGTYPE_IDENTITY", AGGTYPE_IDENTITY) - .value("AGGTYPE_DISTINCT_COUNT", AGGTYPE_DISTINCT_COUNT) - .value("AGGTYPE_DISTINCT_LEAF", AGGTYPE_DISTINCT_LEAF) - .value("AGGTYPE_PCT_SUM_PARENT", AGGTYPE_PCT_SUM_PARENT) - .value("AGGTYPE_PCT_SUM_GRAND_TOTAL", AGGTYPE_PCT_SUM_GRAND_TOTAL); - - enum_("t_totals") - .value("TOTALS_BEFORE", TOTALS_BEFORE) - .value("TOTALS_HIDDEN", TOTALS_HIDDEN) - .value("TOTALS_AFTER", TOTALS_AFTER); - - function("sort", &sort); - function("make_table", &make_table, allow_raw_pointers()); - function("make_gnode", &make_gnode); - function("clone_gnode_table", &clone_gnode_table, allow_raw_pointers()); - function("make_context_zero", &make_context_zero, allow_raw_pointers()); - function("make_context_one", &make_context_one, allow_raw_pointers()); - function("make_context_two", &make_context_two, allow_raw_pointers()); - function("scalar_to_val", &scalar_to_val); - function("scalar_vec_to_val", &scalar_vec_to_val); - function("table_add_computed_column", &table_add_computed_column); - function("set_column_nth", &set_column_nth, allow_raw_pointers()); - function("get_data_zero", &get_data>); - function("get_data_one", &get_data>); - function("get_data_two", &get_data>); - function("get_data_two_skip_headers", &get_data_two_skip_headers); - function("col_to_js_typed_array_zero", &col_to_js_typed_array>); - function("col_to_js_typed_array_one", &col_to_js_typed_array>); - function("col_to_js_typed_array_two", &col_to_js_typed_array>); -} +} \ No newline at end of file diff --git a/src/include/perspective/binding.h b/src/include/perspective/binding.h new file mode 100644 index 0000000000..d5e4da1e7b --- /dev/null +++ b/src/include/perspective/binding.h @@ -0,0 +1,9 @@ +/****************************************************************************** + * + * Copyright (c) 2019, the Perspective Authors. + * + * This file is part of the Perspective library, distributed under the terms of + * the Apache License 2.0. The full license can be found in the LICENSE file. + * + */ +#pragma once diff --git a/src/include/perspective/emscripten.h b/src/include/perspective/emscripten.h new file mode 100644 index 0000000000..eecb729d10 --- /dev/null +++ b/src/include/perspective/emscripten.h @@ -0,0 +1,338 @@ +/****************************************************************************** + * + * Copyright (c) 2019, the Perspective Authors. + * + * This file is part of the Perspective library, distributed under the terms of + * the Apache License 2.0. The full license can be found in the LICENSE file. + * + */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef PSP_ENABLE_WASM +#include +#endif + +#ifdef PSP_ENABLE_PYTHON +#include +#endif + +typedef std::codecvt_utf8 utf8convert_type; +typedef std::codecvt_utf8_utf16 utf16convert_type; + +namespace perspective { +namespace binding { + + + +/****************************************************************************** + * + * Data Loading + */ +template +std::vector _get_sort(T j_sortby); + + +/** + * + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +template +std::vector _get_fterms(t_schema schema, T j_filters); + +/** + * + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +template +std::vector _get_aggspecs(T j_aggs); + +// Date parsing +template +t_date jsdate_to_t_date(T date); + +template +T t_date_to_jsdate(t_date date); + +/** + * Converts a scalar value to its JS representation. + * + * Params + * ------ + * t_tscalar scalar + * + * Returns + * ------- + * val + */ +template +T scalar_to_val(const t_tscalar scalar); + +template +T scalar_vec_to_val(const std::vector& scalars, std::uint32_t idx); + +/** + * + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +namespace arrow { + +template +void vecFromTypedArray(const T& typedArray, void* data, std::int32_t length, const char* destType = nullptr); + +template +void fill_col_valid(T dcol, std::shared_ptr col); + +template +void fill_col_dict(T dictvec, std::shared_ptr col); + +} // namespace arrow + +template +void _fill_col_numeric(T accessor, t_table& tbl, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, bool is_arrow); + +template +void _fill_col_int64(T accessor, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, bool is_arrow); + +template +void _fill_col_time(T accessor, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, bool is_arrow); + +template +void _fill_col_date(T accessor, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, bool is_arrow); + +template +void _fill_col_bool(T accessor, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, bool is_arrow); + +template +void _fill_col_string(T accessor, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, bool is_arrow); + +/** + * Fills the table with data from Javascript. + * + * Params + * ------ + * tbl - pointer to the table object + * ocolnames - vector of column names + * accessor - the JS data accessor interface + * odt - vector of data types + * offset + * is_arrow - flag for arrow data + * + * Returns + * ------- + * + */ +template +void _fill_data(t_table& tbl, std::vector ocolnames, T accessor, + std::vector odt, std::uint32_t offset, bool is_arrow); + + +/****************************************************************************** + ****************************************************************************** + ****************************************************************************** + ****************************************************************************** + ****************************************************************************** + * + * Public + */ + +template +void set_column_nth(t_column* col, t_uindex idx, T value); + + +/** + * Helper function for computed columns + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +template +void table_add_computed_column(t_table& table, T computed_defs); + +/** + * DataAccessor + * + * parses and converts input data into a canonical format for + * interfacing with Perspective. + */ + +// Name parsing +template +std::vector column_names(T data, std::int32_t format); + +// Type inferrence for fill_col and data_types +template +t_dtype infer_type(T x, U date_validator); + +template +t_dtype get_data_type(T data, std::int32_t format, std::string name, U date_validator); + +template +std::vector data_types(T data, std::int32_t format, std::vector names, U date_validator); + + +/** + * Create a default gnode. + * + * Params + * ------ + * j_colnames - a JS Array of column names. + * j_dtypes - a JS Array of column types. + * + * Returns + * ------- + * A gnode. + */ +std::shared_ptr make_gnode(const t_table& table); + +/** + * Create a populated table. + * + * Params + * ------ + * chunk - a JS object containing parsed data and associated metadata + * offset + * limit + * index + * is_delete - sets the table operation + * + * Returns + * ------- + * a populated table. + */ +template +std::shared_ptr +make_table(t_pool* pool, T gnode, T accessor, T computed, std::uint32_t offset, + std::uint32_t limit, std::string index, bool is_update, bool is_delete, bool is_arrow); + +/** + * Copies the internal table from a gnode + * + * Params + * ------ + * + * Returns + * ------- + * A gnode. + */ +template +std::shared_ptr +clone_gnode_table(t_pool* pool, std::shared_ptr gnode, T computed); + +/** + * + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +template +std::shared_ptr +make_context_zero(t_schema schema, t_filter_op combiner, T j_filters, T j_columns, + T j_sortby, t_pool* pool, std::shared_ptr gnode, std::string name); + +/** + * + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +template +std::shared_ptr +make_context_one(t_schema schema, T j_pivots, t_filter_op combiner, T j_filters, T j_aggs, + T j_sortby, t_pool* pool, std::shared_ptr gnode, std::string name); + +/** + * + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +template +std::shared_ptr +make_context_two(t_schema schema, T j_rpivots, T j_cpivots, t_filter_op combiner, + T j_filters, T j_aggs, bool show_totals, t_pool* pool, std::shared_ptr gnode, + std::string name); + +template +void sort(std::shared_ptr ctx2, T j_sortby, T j_column_sortby); + +template +T get_column_data(std::shared_ptr table, std::string colname); + +/** + * + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +template +T get_data(U ctx, std::uint32_t start_row, std::uint32_t end_row, std::uint32_t start_col, + std::uint32_t end_col); + +template +T get_data_two_skip_headers(std::shared_ptr ctx, std::uint32_t depth, + std::uint32_t start_row, std::uint32_t end_row, std::uint32_t start_col, + std::uint32_t end_col); + + +} +} From 2e416a101a989b774ef07543daf36012cabd98a5 Mon Sep 17 00:00:00 2001 From: Tim Paine Date: Fri, 18 Jan 2019 10:32:32 -1000 Subject: [PATCH 03/11] centralize binding interface in binding.h --- CMakeLists.txt | 9 +- src/cpp/binding.cpp | 32 ++- src/cpp/emscripten.cpp | 78 ++++--- src/include/perspective/binding.h | 317 +++++++++++++++++++++++++++ src/include/perspective/emscripten.h | 270 +---------------------- 5 files changed, 409 insertions(+), 297 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 84188c0155..9229eadb23 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -221,7 +221,6 @@ set (SOURCE_FILES src/cpp/base_impl_osx.cpp src/cpp/base_impl_wasm.cpp src/cpp/base_impl_win.cpp - src/cpp/binding.cpp src/cpp/build_filter.cpp #src/cpp/calc_agg_dtype.cpp src/cpp/column.cpp @@ -293,13 +292,13 @@ if (PSP_WASM_BUILD) add_library(psp ${SOURCE_FILES}) set_target_properties(psp PROPERTIES COMPILE_FLAGS "${ASYNC_MODE_FLAGS}") - add_executable(perspective.async src/cpp/emscripten.cpp) + add_executable(perspective.async src/cpp/binding.cpp src/cpp/emscripten.cpp) target_link_libraries(perspective.async psp "${ASYNC_MODE_FLAGS}") set_target_properties(perspective.async PROPERTIES COMPILE_FLAGS "${ASYNC_MODE_FLAGS}") set_target_properties(perspective.async PROPERTIES RUNTIME_OUTPUT_DIRECTORY "./build/") set_target_properties(perspective.async PROPERTIES OUTPUT_NAME "psp.async") - add_executable(perspective.sync src/cpp/emscripten.cpp) + add_executable(perspective.sync src/cpp/binding.cpp src/cpp/emscripten.cpp) target_link_libraries(perspective.sync psp "${SYNC_MODE_FLAGS}") set_target_properties(perspective.sync PROPERTIES COMPILE_FLAGS "${SYNC_MODE_FLAGS}") set_target_properties(perspective.sync PROPERTIES RUNTIME_OUTPUT_DIRECTORY "./build/") @@ -307,7 +306,7 @@ if (PSP_WASM_BUILD) add_dependencies(perspective.sync perspective.async) if (NOT CMAKE_BUILD_TYPE_LOWER STREQUAL debug) - add_executable(perspective.asm src/cpp/emscripten.cpp) + add_executable(perspective.asm src/cpp/binding.cpp src/cpp/emscripten.cpp) target_link_libraries(perspective.asm psp "${ASMJS_MODE_FLAGS}") set_target_properties(perspective.asm PROPERTIES COMPILE_FLAGS "${ASMJS_MODE_FLAGS}") set_target_properties(perspective.asm PROPERTIES RUNTIME_OUTPUT_DIRECTORY "./build/") @@ -325,7 +324,7 @@ else() target_link_libraries(psp ${Boost_LIBRARIES} ${PYTHON_LIBRARIES}) set(CMAKE_SHARED_LIBRARY_SUFFIX .so) - add_library(binding SHARED ${CMAKE_SOURCE_DIR}/python/perspective/src/python.cpp) + add_library(binding SHARED src/cpp/binding.cpp ${CMAKE_SOURCE_DIR}/python/perspective/src/python.cpp) target_link_libraries(binding psp) target_link_libraries(binding tbb) target_link_libraries(binding ${BOOST_PYTHON}) diff --git a/src/cpp/binding.cpp b/src/cpp/binding.cpp index 984bb0e0c3..7db9d407f8 100644 --- a/src/cpp/binding.cpp +++ b/src/cpp/binding.cpp @@ -6,11 +6,41 @@ * the Apache License 2.0. The full license can be found in the LICENSE file. * */ + +#include #include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef PSP_ENABLE_WASM +#include +#include +#include +#include +using namespace emscripten; +#endif + +#ifdef PSP_ENABLE_PYTHON + +#endif + +using namespace perspective; namespace perspective { namespace binding { + + + } } diff --git a/src/cpp/emscripten.cpp b/src/cpp/emscripten.cpp index 8039cfc87f..39a053a6de 100644 --- a/src/cpp/emscripten.cpp +++ b/src/cpp/emscripten.cpp @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -36,12 +37,12 @@ namespace binding { * Data Loading */ -template -std::vector _get_sort(T j_sortby) { +template <> +std::vector _get_sort(val j_sortby) { std::vector svec{}; - std::vector sortbys = vecFromJSArray(j_sortby); + std::vector sortbys = vecFromArray(j_sortby); for (auto idx = 0; idx < sortbys.size(); ++idx) { - std::vector sortby = vecFromJSArray(sortbys[idx]); + std::vector sortby = vecFromArray(sortbys[idx]); t_sorttype sorttype; switch (sortby[1]) { case 0: @@ -76,12 +77,13 @@ std::vector _get_sort(T j_sortby) { * ------- * */ +template <> std::vector _get_fterms(t_schema schema, val j_filters) { std::vector fvec{}; - std::vector filters = vecFromJSArray(j_filters); + std::vector filters = vecFromArray(j_filters); for (auto fidx = 0; fidx < filters.size(); ++fidx) { - std::vector filter = vecFromJSArray(filters[fidx]); + std::vector filter = vecFromArray(filters[fidx]); std::string coln = filter[0].as(); t_filter_op comp = filter[1].as(); @@ -89,7 +91,7 @@ _get_fterms(t_schema schema, val j_filters) { case FILTER_OP_NOT_IN: case FILTER_OP_IN: { std::vector terms{}; - std::vector j_terms = vecFromJSArray(filter[2]); + std::vector j_terms = vecFromArray(filter[2]); for (auto jidx = 0; jidx < j_terms.size(); ++jidx) { terms.push_back(mktscalar(get_interned_cstr(j_terms[jidx].c_str()))); } @@ -141,15 +143,15 @@ _get_fterms(t_schema schema, val j_filters) { */ std::vector _get_aggspecs(val j_aggs) { - std::vector aggs = vecFromJSArray(j_aggs); + std::vector aggs = vecFromArray(j_aggs); std::vector aggspecs; for (auto idx = 0; idx < aggs.size(); ++idx) { - std::vector agg_row = vecFromJSArray(aggs[idx]); + std::vector agg_row = vecFromArray(aggs[idx]); std::string name = agg_row[0].as(); t_aggtype aggtype = agg_row[1].as(); std::vector dependencies; - std::vector deps = vecFromJSArray(agg_row[2]); + std::vector deps = vecFromArray(agg_row[2]); for (auto didx = 0; didx < deps.size(); ++didx) { if (deps[didx].isUndefined()) { continue; @@ -202,7 +204,7 @@ t_date_to_jsdate(t_date date) { * ------- * val */ -val scalar_to_val(const t_tscalar scalar) { +val scalar_to_val(const t_tscalar& scalar) { if (!scalar.is_valid()) { return val::null(); } @@ -250,6 +252,21 @@ val scalar_vec_to_val(const std::vector& scalars, std::uint32_t idx) return scalar_to_val(scalars[idx]); } +template +std::vector vecFromArray(T& arr) { + return vecFromJSArray(arr); +} + +template <> +val scalar_to(const t_tscalar& scalar) { + return scalar_to_val(scalar); +} + + +template <> +val scalar_vec_to(const std::vector& scalars, std::uint32_t idx) { + return scalar_vec_to_val(scalars, idx); +} /** * * @@ -264,8 +281,9 @@ val scalar_vec_to_val(const std::vector& scalars, std::uint32_t idx) namespace arrow { -void -vecFromTypedArray( + +template <> +void vecFromTypedArray( const val& typedArray, void* data, std::int32_t length, const char* destType) { val memory = val::module_property("buffer"); if (destType == nullptr) { @@ -279,8 +297,8 @@ vecFromTypedArray( } } -void -fill_col_valid(val dcol, std::shared_ptr col) { +template <> +void fill_col_valid(val dcol, std::shared_ptr col) { // dcol should be the Uint8Array containing the null bitmap t_uindex nrows = col->size(); @@ -292,8 +310,8 @@ fill_col_valid(val dcol, std::shared_ptr col) { } } -void -fill_col_dict(val dictvec, std::shared_ptr col) { +template <> +void fill_col_dict(val dictvec, std::shared_ptr col) { // ptaylor: This assumes the dictionary is either a Binary or Utf8 Vector. Should it support // other Vector types? val vdata = dictvec["values"]; @@ -813,7 +831,7 @@ set_column_nth(t_column* col, t_uindex idx, val value) { */ template <> void table_add_computed_column(t_table& table, val computed_defs) { - auto vcomputed_defs = vecFromJSArray(computed_defs); + auto vcomputed_defs = vecFromArray(computed_defs); for (auto i = 0; i < vcomputed_defs.size(); ++i) { val coldef = vcomputed_defs[i]; std::string name = coldef["column"].as(); @@ -845,7 +863,7 @@ void table_add_computed_column(t_table& table, val computed_defs) { } // Get list of input column names - auto icol_names = vecFromJSArray(inputs); + auto icol_names = vecFromArray(inputs); // Get t_column* for all input columns std::vector icols; @@ -949,10 +967,11 @@ column_names(val data, std::int32_t format) { max_check *= 2; } - names = vecFromJSArray(data_names); + names = vecFromArray(data_names); } } else if (format == 1 || format == 2) { - names = vecFromJSArray(Object.call("keys", data)); + val keys = Object.call("keys", data); + names = vecFromArray(keys); } return names; @@ -1058,8 +1077,9 @@ data_types(val data, std::int32_t format, std::vector names, val da std::vector types; if (format == 2) { + val keys = val::global("Object").template call("keys", data); std::vector data_names - = vecFromJSArray(val::global("Object").call("keys", data)); + = vecFromArray(keys); for (std::vector::iterator name = data_names.begin(); name != data_names.end(); ++name) { @@ -1166,8 +1186,10 @@ make_table(t_pool* pool, val gnode, val accessor, val computed, std::uint32_t of // Determine metadata if (is_arrow || (is_update || is_delete)) { // TODO: fully remove intermediate passed-through JS arrays for non-arrow data - colnames = vecFromJSArray(accessor["names"]); - dtypes = vecFromJSArray(accessor["types"]); + val names = accessor["names"]; + val types = accessor["types"]; + colnames = vecFromArray(names); + dtypes = vecFromArray(types); } else { // Infer names and types val data = accessor["data"]; @@ -1270,7 +1292,7 @@ template <> std::shared_ptr make_context_zero(t_schema schema, t_filter_op combiner, val j_filters, val j_columns, val j_sortby, t_pool* pool, std::shared_ptr gnode, std::string name) { - auto columns = vecFromJSArray(j_columns); + auto columns = vecFromArray(j_columns); auto fvec = _get_fterms(schema, j_filters); auto svec = _get_sort(j_sortby); auto cfg = t_config(columns, combiner, fvec); @@ -1299,7 +1321,7 @@ make_context_one(t_schema schema, val j_pivots, t_filter_op combiner, val j_filt val j_sortby, t_pool* pool, std::shared_ptr gnode, std::string name) { auto fvec = _get_fterms(schema, j_filters); auto aggspecs = _get_aggspecs(j_aggs); - auto pivots = vecFromJSArray(j_pivots); + auto pivots = vecFromArray(j_pivots); auto svec = _get_sort(j_sortby); auto cfg = t_config(pivots, aggspecs, combiner, fvec); @@ -1330,8 +1352,8 @@ make_context_two(t_schema schema, val j_rpivots, val j_cpivots, t_filter_op comb std::string name) { auto fvec = _get_fterms(schema, j_filters); auto aggspecs = _get_aggspecs(j_aggs); - auto rpivots = vecFromJSArray(j_rpivots); - auto cpivots = vecFromJSArray(j_cpivots); + auto rpivots = vecFromArray(j_rpivots); + auto cpivots = vecFromArray(j_cpivots); t_totals total = show_totals ? TOTALS_BEFORE : TOTALS_HIDDEN; auto cfg = t_config(rpivots, cpivots, aggspecs, total, combiner, fvec); diff --git a/src/include/perspective/binding.h b/src/include/perspective/binding.h index d5e4da1e7b..16107b988a 100644 --- a/src/include/perspective/binding.h +++ b/src/include/perspective/binding.h @@ -7,3 +7,320 @@ * */ #pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +typedef std::codecvt_utf8 utf8convert_type; +typedef std::codecvt_utf8_utf16 utf16convert_type; + +namespace perspective { +namespace binding { + + +/****************************************************************************** + * + * Utility + */ +template +std::vector vecFromArray(T& arr); + +/****************************************************************************** + * + * Data Loading + */ +template +std::vector _get_sort(T j_sortby); + + +/** + * + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +template +std::vector _get_fterms(t_schema schema, T j_filters); + +/** + * + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +template +std::vector _get_aggspecs(T j_aggs); + + +/** + * Converts a scalar value to its language-specific representation. + * + * Params + * ------ + * t_tscalar scalar + * + * Returns + * ------- + * T + */ +template +T scalar_to(const t_tscalar& scalar); + +template +T scalar_vec_to(const std::vector& scalars, std::uint32_t idx); + +/** + * + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +namespace arrow { + +template +void vecFromTypedArray(const T& typedArray, void* data, std::int32_t length, const char* destType = nullptr); + +template +void fill_col_valid(T dcol, std::shared_ptr col); + +template +void fill_col_dict(T dictvec, std::shared_ptr col); + +} // namespace arrow + +template +void _fill_col_numeric(T accessor, t_table& tbl, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, bool is_arrow); + +template +void _fill_col_int64(T accessor, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, bool is_arrow); + +template +void _fill_col_time(T accessor, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, bool is_arrow); + +template +void _fill_col_date(T accessor, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, bool is_arrow); + +template +void _fill_col_bool(T accessor, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, bool is_arrow); + +template +void _fill_col_string(T accessor, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, bool is_arrow); + +/** + * Fills the table with data from language. + * + * Params + * ------ + * tbl - pointer to the table object + * ocolnames - vector of column names + * accessor - the data accessor interface + * odt - vector of data types + * offset + * is_arrow - flag for arrow data + * + * Returns + * ------- + * + */ +template +void _fill_data(t_table& tbl, std::vector ocolnames, T accessor, + std::vector odt, std::uint32_t offset, bool is_arrow); + + +/****************************************************************************** + * + * Public + */ + +template +void set_column_nth(t_column* col, t_uindex idx, T value); + + +/** + * Helper function for computed columns + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +template +void table_add_computed_column(t_table& table, T computed_defs); + +/** + * DataAccessor + * + * parses and converts input data into a canonical format for + * interfacing with Perspective. + */ + +// Name parsing +template +std::vector column_names(T data, std::int32_t format); + +// Type inferrence for fill_col and data_types +template +t_dtype infer_type(T x, U date_validator); + +template +t_dtype get_data_type(T data, std::int32_t format, std::string name, U date_validator); + +template +std::vector data_types(T data, std::int32_t format, std::vector names, U date_validator); + + +/** + * Create a default gnode. + * + * Params + * ------ + * j_colnames - a JS Array of column names. + * j_dtypes - a JS Array of column types. + * + * Returns + * ------- + * A gnode. + */ +std::shared_ptr make_gnode(const t_table& table); + +/** + * Create a populated table. + * + * Params + * ------ + * chunk - a JS object containing parsed data and associated metadata + * offset + * limit + * index + * is_delete - sets the table operation + * + * Returns + * ------- + * a populated table. + */ +template +std::shared_ptr +make_table(t_pool* pool, T gnode, T accessor, T computed, std::uint32_t offset, + std::uint32_t limit, std::string index, bool is_update, bool is_delete, bool is_arrow); + +/** + * Copies the internal table from a gnode + * + * Params + * ------ + * + * Returns + * ------- + * A gnode. + */ +template +std::shared_ptr +clone_gnode_table(t_pool* pool, std::shared_ptr gnode, T computed); + +/** + * + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +template +std::shared_ptr +make_context_zero(t_schema schema, t_filter_op combiner, T j_filters, T j_columns, + T j_sortby, t_pool* pool, std::shared_ptr gnode, std::string name); + +/** + * + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +template +std::shared_ptr +make_context_one(t_schema schema, T j_pivots, t_filter_op combiner, T j_filters, T j_aggs, + T j_sortby, t_pool* pool, std::shared_ptr gnode, std::string name); + +/** + * + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +template +std::shared_ptr +make_context_two(t_schema schema, T j_rpivots, T j_cpivots, t_filter_op combiner, + T j_filters, T j_aggs, bool show_totals, t_pool* pool, std::shared_ptr gnode, + std::string name); + +template +void sort(std::shared_ptr ctx2, T j_sortby, T j_column_sortby); + +template +T get_column_data(std::shared_ptr table, std::string colname); + +/** + * + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +template +T get_data(U ctx, std::uint32_t start_row, std::uint32_t end_row, std::uint32_t start_col, + std::uint32_t end_col); + +template +T get_data_two_skip_headers(std::shared_ptr ctx, std::uint32_t depth, + std::uint32_t start_row, std::uint32_t end_row, std::uint32_t start_col, + std::uint32_t end_col); + +} +} diff --git a/src/include/perspective/emscripten.h b/src/include/perspective/emscripten.h index 6b6e772ef3..b19c48197c 100644 --- a/src/include/perspective/emscripten.h +++ b/src/include/perspective/emscripten.h @@ -22,51 +22,9 @@ #include #include - -typedef std::codecvt_utf8 utf8convert_type; -typedef std::codecvt_utf8_utf16 utf16convert_type; - namespace perspective { namespace binding { - - -/****************************************************************************** - * - * Data Loading - */ -template -std::vector _get_sort(T j_sortby); - - -/** - * - * - * Params - * ------ - * - * - * Returns - * ------- - * - */ -template -std::vector _get_fterms(t_schema schema, T j_filters); - -/** - * - * - * Params - * ------ - * - * - * Returns - * ------- - * - */ -template -std::vector _get_aggspecs(T j_aggs); - // Date parsing t_date jsdate_to_t_date(emscripten::val date); emscripten::val t_date_to_jsdate(t_date date); @@ -82,222 +40,13 @@ emscripten::val t_date_to_jsdate(t_date date); * ------- * val */ -emscripten::val scalar_to_val(const t_tscalar scalar); -emscripten::val scalar_vec_to_val(const std::vector& scalars, std::uint32_t idx); - -/** - * - * - * Params - * ------ - * - * - * Returns - * ------- - * - */ -namespace arrow { - -void vecFromTypedArray(const emscripten::val& typedArray, void* data, std::int32_t length, const char* destType = nullptr); - -template -void fill_col_valid(T dcol, std::shared_ptr col); - -template -void fill_col_dict(T dictvec, std::shared_ptr col); - -} // namespace arrow - -template -void _fill_col_numeric(T accessor, t_table& tbl, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, bool is_arrow); - -template -void _fill_col_int64(T accessor, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, bool is_arrow); - -template -void _fill_col_time(T accessor, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, bool is_arrow); - -template -void _fill_col_date(T accessor, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, bool is_arrow); - -template -void _fill_col_bool(T accessor, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, bool is_arrow); - -template -void _fill_col_string(T accessor, std::shared_ptr col, std::string name, std::int32_t cidx, t_dtype type, bool is_arrow); - -/** - * Fills the table with data from Javascript. - * - * Params - * ------ - * tbl - pointer to the table object - * ocolnames - vector of column names - * accessor - the JS data accessor interface - * odt - vector of data types - * offset - * is_arrow - flag for arrow data - * - * Returns - * ------- - * - */ -template -void _fill_data(t_table& tbl, std::vector ocolnames, T accessor, - std::vector odt, std::uint32_t offset, bool is_arrow); - - -/****************************************************************************** - ****************************************************************************** - ****************************************************************************** - ****************************************************************************** - ****************************************************************************** - * - * Public - */ - -template -void set_column_nth(t_column* col, t_uindex idx, T value); - - -/** - * Helper function for computed columns - * - * Params - * ------ - * - * - * Returns - * ------- - * - */ -template -void table_add_computed_column(t_table& table, T computed_defs); - -/** - * DataAccessor - * - * parses and converts input data into a canonical format for - * interfacing with Perspective. - */ - -// Name parsing -template -std::vector column_names(T data, std::int32_t format); - -// Type inferrence for fill_col and data_types -template -t_dtype infer_type(T x, U date_validator); - -template -t_dtype get_data_type(T data, std::int32_t format, std::string name, U date_validator); - -template -std::vector data_types(T data, std::int32_t format, std::vector names, U date_validator); - - -/** - * Create a default gnode. - * - * Params - * ------ - * j_colnames - a JS Array of column names. - * j_dtypes - a JS Array of column types. - * - * Returns - * ------- - * A gnode. - */ -std::shared_ptr make_gnode(const t_table& table); - -/** - * Create a populated table. - * - * Params - * ------ - * chunk - a JS object containing parsed data and associated metadata - * offset - * limit - * index - * is_delete - sets the table operation - * - * Returns - * ------- - * a populated table. - */ -template -std::shared_ptr -make_table(t_pool* pool, T gnode, T accessor, T computed, std::uint32_t offset, - std::uint32_t limit, std::string index, bool is_update, bool is_delete, bool is_arrow); - -/** - * Copies the internal table from a gnode - * - * Params - * ------ - * - * Returns - * ------- - * A gnode. - */ -template -std::shared_ptr -clone_gnode_table(t_pool* pool, std::shared_ptr gnode, T computed); +template <> +emscripten::val scalar_to(const t_tscalar& scalar); +emscripten::val scalar_to_val(const t_tscalar& scalar); -/** - * - * - * Params - * ------ - * - * - * Returns - * ------- - * - */ -template -std::shared_ptr -make_context_zero(t_schema schema, t_filter_op combiner, T j_filters, T j_columns, - T j_sortby, t_pool* pool, std::shared_ptr gnode, std::string name); - -/** - * - * - * Params - * ------ - * - * - * Returns - * ------- - * - */ -template -std::shared_ptr -make_context_one(t_schema schema, T j_pivots, t_filter_op combiner, T j_filters, T j_aggs, - T j_sortby, t_pool* pool, std::shared_ptr gnode, std::string name); - -/** - * - * - * Params - * ------ - * - * - * Returns - * ------- - * - */ -template -std::shared_ptr -make_context_two(t_schema schema, T j_rpivots, T j_cpivots, t_filter_op combiner, - T j_filters, T j_aggs, bool show_totals, t_pool* pool, std::shared_ptr gnode, - std::string name); - -template -void sort(std::shared_ptr ctx2, T j_sortby, T j_column_sortby); - -template -T get_column_data(std::shared_ptr table, std::string colname); +template <> +emscripten::val scalar_vec_to(const std::vector& scalars, std::uint32_t idx); +emscripten::val scalar_vec_to_val(const std::vector& scalars, std::uint32_t idx); /** * @@ -310,13 +59,8 @@ T get_column_data(std::shared_ptr table, std::string colname); * ------- * */ -template -T get_data(U ctx, std::uint32_t start_row, std::uint32_t end_row, std::uint32_t start_col, - std::uint32_t end_col); - template -T get_data_two_skip_headers(std::shared_ptr ctx, std::uint32_t depth, - std::uint32_t start_row, std::uint32_t end_row, std::uint32_t start_col, +emscripten::val get_data_js(T ctx, std::uint32_t start_row, std::uint32_t end_row, std::uint32_t start_col, std::uint32_t end_col); From 15b470638cb9abdcf4e3f634c05b2ff99ec68658 Mon Sep 17 00:00:00 2001 From: Tim Paine Date: Fri, 18 Jan 2019 10:43:14 -1000 Subject: [PATCH 04/11] accidentally cloberred some python bindings, readding them --- python/perspective/include/perspective/python.h | 12 ++++++------ python/perspective/src/python.cpp | 3 --- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/python/perspective/include/perspective/python.h b/python/perspective/include/perspective/python.h index 2b88580d59..d949fcc748 100644 --- a/python/perspective/include/perspective/python.h +++ b/python/perspective/include/perspective/python.h @@ -13,15 +13,14 @@ #include #include #include +#include +#include #ifndef __PSP_PYTHON_HPP__ #define __PSP_PYTHON_HPP__ -void test(const char* name); - - perspective::t_schema* t_schema_init(py::list& columns, py::list& types); template @@ -49,8 +48,6 @@ BOOST_PYTHON_MODULE(libbinding) np::initialize(true); _import_array(); - py::def("test", test); - py::enum_("t_dtype") .value("NONE", perspective::DTYPE_NONE) .value("INT64", perspective::DTYPE_INT64) @@ -109,9 +106,12 @@ BOOST_PYTHON_MODULE(libbinding) py::init<>()) ; + py::class_("t_gnode", py::init()) + .def("pprint", static_cast(&perspective::t_gnode::pprint)) + ; + // need boost:noncopyable for PSP_NON_COPYABLE py::class_("t_table", py::init()) - .def("init", &perspective::t_table::init) .def("clear", &perspective::t_table::clear) .def("reset", &perspective::t_table::reset) diff --git a/python/perspective/src/python.cpp b/python/perspective/src/python.cpp index 66b9dc5121..adb8b05a29 100644 --- a/python/perspective/src/python.cpp +++ b/python/perspective/src/python.cpp @@ -11,9 +11,6 @@ #include #include -void test(const char* name) { - std::cout << "Hello " << name << "!" << std::endl; -} perspective::t_schema* t_schema_init(py::list& columns, py::list& types) { From f75e7845d96094f611f6c2494860bd36ce3ad746 Mon Sep 17 00:00:00 2001 From: Tim Paine Date: Fri, 18 Jan 2019 10:47:31 -1000 Subject: [PATCH 05/11] accidentally cloberred some python bindings, readding them --- python/perspective/include/perspective/python.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/perspective/include/perspective/python.h b/python/perspective/include/perspective/python.h index d949fcc748..1d23b37249 100644 --- a/python/perspective/include/perspective/python.h +++ b/python/perspective/include/perspective/python.h @@ -94,6 +94,8 @@ BOOST_PYTHON_MODULE(libbinding) .def("get_recipe", &perspective::t_schema::get_recipe) .def("has_column", &perspective::t_schema::has_column) .def("get_table_context", &perspective::t_schema::get_table_context) + .def("get_table_context", &perspective::t_schema::get_table_context) + .def("str", &perspective::t_schema::str) // when returning const, need return_value_policy .def("columns", &perspective::t_schema::columns, py::return_value_policy()) @@ -102,8 +104,8 @@ BOOST_PYTHON_MODULE(libbinding) //TODO - py::class_("t_column", - py::init<>()) + py::class_("t_column", py::init<>()) + .def("pprint", static_cast(&perspective::t_column::pprint)) ; py::class_("t_gnode", py::init()) From e9cca79e8372421cabbc9fcfc4fbd8fff9728d5b Mon Sep 17 00:00:00 2001 From: Tim Paine Date: Fri, 18 Jan 2019 19:54:59 -1000 Subject: [PATCH 06/11] mirror JS binding to python --- .../perspective/include/perspective/numpy.h | 3 +- .../perspective/include/perspective/python.h | 452 +++++++++++++++--- python/perspective/src/python.cpp | 447 +++++++++++++++++ src/cpp/emscripten.cpp | 87 +++- src/cpp/pool.cpp | 1 - src/include/perspective/pool.h | 3 +- 6 files changed, 922 insertions(+), 71 deletions(-) diff --git a/python/perspective/include/perspective/numpy.h b/python/perspective/include/perspective/numpy.h index 17295388f7..fcab90d711 100644 --- a/python/perspective/include/perspective/numpy.h +++ b/python/perspective/include/perspective/numpy.h @@ -6,10 +6,9 @@ * the Apache License 2.0. The full license can be found in the LICENSE file. * */ - +#pragma once #if defined(PSP_ENABLE_PYTHON) -#pragma once #include #include #include diff --git a/python/perspective/include/perspective/python.h b/python/perspective/include/perspective/python.h index 1d23b37249..42bc44d308 100644 --- a/python/perspective/include/perspective/python.h +++ b/python/perspective/include/perspective/python.h @@ -7,20 +7,19 @@ * */ +#pragma once #ifdef PSP_ENABLE_PYTHON -#include - #include #include +#include #include #include #include - - -#ifndef __PSP_PYTHON_HPP__ -#define __PSP_PYTHON_HPP__ +#include +namespace perspective { +namespace binding { perspective::t_schema* t_schema_init(py::list& columns, py::list& types); template @@ -41,41 +40,74 @@ void _fill_data_single_column_np(perspective::t_table& tbl, perspective::t_dtype col_type); np::ndarray _get_as_numpy(perspective::t_table& tbl, const std::string& colname_i); +} +} + +/****************************************************************************** + * + * Boost Python binding + */ +using namespace perspective::binding; + BOOST_PYTHON_MODULE(libbinding) { np::initialize(true); _import_array(); - py::enum_("t_dtype") - .value("NONE", perspective::DTYPE_NONE) - .value("INT64", perspective::DTYPE_INT64) - .value("INT32", perspective::DTYPE_INT32) - .value("INT16", perspective::DTYPE_INT16) - .value("INT8", perspective::DTYPE_INT8) - .value("UINT64", perspective::DTYPE_UINT64) - .value("UINT32", perspective::DTYPE_UINT32) - .value("UINT16", perspective::DTYPE_UINT16) - .value("UINT8", perspective::DTYPE_UINT8) - .value("FLOAT64", perspective::DTYPE_FLOAT64) - .value("FLOAT32", perspective::DTYPE_FLOAT32) - .value("BOOL", perspective::DTYPE_BOOL) - .value("TIME", perspective::DTYPE_TIME) - .value("DATE", perspective::DTYPE_DATE) - .value("ENUM", perspective::DTYPE_ENUM) - .value("OID", perspective::DTYPE_OID) - .value("PTR", perspective::DTYPE_PTR) - .value("F64PAIR", perspective::DTYPE_F64PAIR) - .value("USER_FIXED", perspective::DTYPE_USER_FIXED) - .value("STR", perspective::DTYPE_STR) - .value("USER_VLEN", perspective::DTYPE_USER_VLEN) - .value("LAST_VLEN", perspective::DTYPE_LAST_VLEN) - .value("LAST", perspective::DTYPE_LAST) + /****************************************************************************** + * + * t_column + */ + py::class_("t_column", py::init<>()) + // when multiple overloading methods, need to static_cast to specify + .def("pprint", static_cast(&perspective::t_column::pprint)) + .def("set_scalar", &perspective::t_column::set_scalar) ; + /****************************************************************************** + * + * t_table + */ + // need boost:noncopyable for PSP_NON_COPYABLE + py::class_("t_table", py::init()) + .def("init", &perspective::t_table::init) + .def("clear", &perspective::t_table::clear) + .def("reset", &perspective::t_table::reset) + .def("size", &perspective::t_table::size) + .def("reserve", &perspective::t_table::reserve) + .def("extend", &perspective::t_table::extend) + .def("set_size", &perspective::t_table::set_size) + .def("num_columns", &perspective::t_table::num_columns) + .def("get_capacity", &perspective::t_table::get_capacity) + + // when returning const, need return_value_policy + .def("name", &perspective::t_table::name, py::return_value_policy()) + .def("get_schema", &perspective::t_table::get_schema, py::return_value_policy()) + + // when multiple overloading methods, need to static_cast to specify + .def("num_rows", static_cast (&perspective::t_table::num_rows)) + + .def("pprint", static_cast(&perspective::t_table::pprint)) + .def("pprint", static_cast(&perspective::t_table::pprint)) + .def("pprint", static_cast(&perspective::t_table::pprint)) + .def("pprint", static_cast&) const>(&perspective::t_table::pprint)) + + + // custom add ins + // .def("load_column", _fill_data_single_column) + .def("load_column", static_cast(_fill_data_single_column)) + .def("load_column", static_cast(_fill_data_single_column_np)) + .def("get_column", _get_as_numpy) + .def("add_column", &perspective::t_table::add_column, py::return_value_policy()) + ; + /****************************************************************************** + * + * t_schema + */ py::class_("t_schema", py::init, std::vector >()) .def(py::init<>()) @@ -99,54 +131,346 @@ BOOST_PYTHON_MODULE(libbinding) // when returning const, need return_value_policy .def("columns", &perspective::t_schema::columns, py::return_value_policy()) - // .def("types", &perspective::t_schema::types, return_value_policy()) + .def("types", &perspective::t_schema::types) ; + /****************************************************************************** + * + * t_gnode + */ + py::class_("t_gnode", py::init()) + .def(py::init< + perspective::t_gnode_processing_mode, + const perspective::t_schema&, + const std::vector&, + const std::vector&, + const std::vector + >()) + .def("pprint", static_cast(&perspective::t_gnode::pprint)) + .def("get_id", &perspective::t_gnode::get_id) + .def("get_tblschema", &perspective::t_gnode::get_id) + .def("get_table", &perspective::t_gnode::get_id) + ; - //TODO - py::class_("t_column", py::init<>()) - .def("pprint", static_cast(&perspective::t_column::pprint)) + + /****************************************************************************** + * + * t_ctx0 + */ + py::class_("t_ctx0", py::init()) + .def("sidedness", &perspective::t_ctx0::sidedness) + .def("get_row_count", &perspective::t_ctx0::get_row_count) + .def("get_column_count", &perspective::t_ctx0::get_column_count) + .def("get_data", &perspective::t_ctxbase::get_data) + .def("get_step_delta", &perspective::t_ctx0::get_step_delta) + .def("get_cell_delta", &perspective::t_ctx0::get_cell_delta) + .def("get_column_names", &perspective::t_ctx0::get_column_names) + .def("unity_get_row_data", &perspective::t_ctx0::unity_get_row_data) + .def("unity_get_column_data", &perspective::t_ctx0::unity_get_column_data) + .def("unity_get_row_path", &perspective::t_ctx0::unity_get_row_path) + .def("unity_get_column_path", &perspective::t_ctx0::unity_get_column_path) + .def("unity_get_row_depth", &perspective::t_ctx0::unity_get_row_depth) + .def("unity_get_column_depth", &perspective::t_ctx0::unity_get_column_depth) + .def("unity_get_column_name", &perspective::t_ctx0::unity_get_column_name) + .def("unity_get_column_display_name", &perspective::t_ctx0::unity_get_column_display_name) + .def("unity_get_column_names", &perspective::t_ctx0::unity_get_column_names) + .def("unity_get_column_display_names", &perspective::t_ctx0::unity_get_column_display_names) + .def("unity_get_column_count", &perspective::t_ctx0::unity_get_column_count) + .def("unity_get_row_count", &perspective::t_ctx0::unity_get_row_count) + .def("unity_get_row_expanded", &perspective::t_ctx0::unity_get_row_expanded) + .def("unity_get_column_expanded", &perspective::t_ctx0::unity_get_column_expanded) + .def("unity_init_load_step_end", &perspective::t_ctx0::unity_init_load_step_end) ; - py::class_("t_gnode", py::init()) - .def("pprint", static_cast(&perspective::t_gnode::pprint)) + /****************************************************************************** + * + * t_ctx1 + */ + py::class_("t_ctx1", py::init()) + .def("sidedness", &perspective::t_ctx1::sidedness) + .def("get_row_count", &perspective::t_ctx1::get_row_count) + .def("get_column_count", &perspective::t_ctx1::get_column_count) + .def("get_data", &perspective::t_ctxbase::get_data) + .def("get_step_delta", &perspective::t_ctx1::get_step_delta) + .def("get_cell_delta", &perspective::t_ctx1::get_cell_delta) + .def("set_depth", &perspective::t_ctx1::set_depth) + // .def("open", &perspective::t_ctx1::open) + // .def("close", &perspective::t_ctx1::close) + .def("get_trav_depth", &perspective::t_ctx1::get_trav_depth) + .def("get_column_names", &perspective::t_ctx1::get_aggregates) + .def("unity_get_row_data", &perspective::t_ctx1::unity_get_row_data) + .def("unity_get_column_data", &perspective::t_ctx1::unity_get_column_data) + .def("unity_get_row_path", &perspective::t_ctx1::unity_get_row_path) + .def("unity_get_column_path", &perspective::t_ctx1::unity_get_column_path) + .def("unity_get_row_depth", &perspective::t_ctx1::unity_get_row_depth) + .def("unity_get_column_depth", &perspective::t_ctx1::unity_get_column_depth) + .def("unity_get_column_name", &perspective::t_ctx1::unity_get_column_name) + .def("unity_get_column_display_name", &perspective::t_ctx1::unity_get_column_display_name) + .def("unity_get_column_names", &perspective::t_ctx1::unity_get_column_names) + .def("unity_get_column_display_names", &perspective::t_ctx1::unity_get_column_display_names) + .def("unity_get_column_count", &perspective::t_ctx1::unity_get_column_count) + .def("unity_get_row_count", &perspective::t_ctx1::unity_get_row_count) + .def("unity_get_row_expanded", &perspective::t_ctx1::unity_get_row_expanded) + .def("unity_get_column_expanded", &perspective::t_ctx1::unity_get_column_expanded) + .def("unity_init_load_step_end", &perspective::t_ctx1::unity_init_load_step_end) ; - // need boost:noncopyable for PSP_NON_COPYABLE - py::class_("t_table", py::init()) - .def("init", &perspective::t_table::init) - .def("clear", &perspective::t_table::clear) - .def("reset", &perspective::t_table::reset) - .def("size", &perspective::t_table::size) - .def("reserve", &perspective::t_table::reserve) - .def("extend", &perspective::t_table::extend) - .def("set_size", &perspective::t_table::set_size) + /****************************************************************************** + * + * t_ctx2 + */ + py::class_("t_ctx2", py::init()) + .def("sidedness", &perspective::t_ctx2::sidedness) + .def("get_row_count", &perspective::t_ctx2::get_row_count) + .def("get_column_count", &perspective::t_ctx2::get_column_count) + .def("get_data", &perspective::t_ctxbase::get_data) + .def("get_step_delta", &perspective::t_ctx2::get_step_delta) + .def("set_depth", &perspective::t_ctx2::set_depth) + // .def("open", &perspective::t_ctx2::open) + // .def("close", &perspective::t_ctx2::close) + .def("get_column_names", &perspective::t_ctx2::get_aggregates) + .def("unity_get_row_data", &perspective::t_ctx2::unity_get_row_data) + .def("unity_get_column_data", &perspective::t_ctx2::unity_get_column_data) + .def("unity_get_row_path", &perspective::t_ctx2::unity_get_row_path) + .def("unity_get_column_path", &perspective::t_ctx2::unity_get_column_path) + .def("unity_get_row_depth", &perspective::t_ctx2::unity_get_row_depth) + .def("unity_get_column_depth", &perspective::t_ctx2::unity_get_column_depth) + .def("unity_get_column_name", &perspective::t_ctx2::unity_get_column_name) + .def("unity_get_column_display_name", &perspective::t_ctx2::unity_get_column_display_name) + .def("unity_get_column_names", &perspective::t_ctx2::unity_get_column_names) + .def("unity_get_column_display_names", &perspective::t_ctx2::unity_get_column_display_names) + .def("unity_get_column_count", &perspective::t_ctx2::unity_get_column_count) + .def("unity_get_row_count", &perspective::t_ctx2::unity_get_row_count) + .def("unity_get_row_expanded", &perspective::t_ctx2::unity_get_row_expanded) + .def("unity_get_column_expanded", &perspective::t_ctx2::unity_get_column_expanded) + .def("unity_init_load_step_end", &perspective::t_ctx2::unity_init_load_step_end) + .def("get_totals", &perspective::t_ctx2::get_totals) + .def("get_column_path_userspace", &perspective::t_ctx2::get_column_path_userspace) + ; - .def("num_columns", &perspective::t_table::num_columns) - .def("get_capacity", &perspective::t_table::get_capacity) - // when returning const, need return_value_policy - .def("name", &perspective::t_table::name, py::return_value_policy()) - .def("get_schema", &perspective::t_table::get_schema, py::return_value_policy()) + /****************************************************************************** + * + * t_pool + */ + py::class_("t_pool", py::no_init) + .def("register_gnode", &perspective::t_pool::register_gnode) + .def("process", &perspective::t_pool::_process) + .def("send", &perspective::t_pool::send) + .def("epoch", &perspective::t_pool::epoch) + .def("unregister_gnode", &perspective::t_pool::unregister_gnode) + // .def("set_update_delegate", &perspective::t_pool::set_update_delegate) + .def("register_context", &perspective::t_pool::register_context) + .def("unregister_context", &perspective::t_pool::unregister_context) + .def("get_contexts_last_updated", &perspective::t_pool::get_contexts_last_updated) + .def("get_gnodes_last_updated", &perspective::t_pool::get_gnodes_last_updated) + .def("get_gnode", &perspective::t_pool::get_gnode, py::return_value_policy()) + ; - // when multiple overloading methods, need to static_cast to specify - .def("num_rows", static_cast (&perspective::t_table::num_rows)) - - .def("pprint", static_cast(&perspective::t_table::pprint)) - .def("pprint", static_cast(&perspective::t_table::pprint)) - .def("pprint", static_cast(&perspective::t_table::pprint)) - .def("pprint", static_cast&) const>(&perspective::t_table::pprint)) + /****************************************************************************** + * + * t_aggspec + */ + py::class_("t_aggspec", py::init<>()) + .def("name", &perspective::t_aggspec::name) + ; - // custom add ins - // .def("load_column", _fill_data_single_column) - .def("load_column", static_cast(_fill_data_single_column)) - .def("load_column", static_cast(_fill_data_single_column_np)) - .def("get_column", _get_as_numpy) + /****************************************************************************** + * + * t_tscalar + */ + py::class_("t_tscalar", py::init<>()) ; -} + /****************************************************************************** + * + * t_updctx + */ + // TODO + // value_object("t_updctx") + // .field("gnode_id", &t_updctx::m_gnode_id) + // .field("ctx_name", &t_updctx::m_ctx); + + /****************************************************************************** + * + * t_cellupd + */ + // TODO + // value_object("t_cellupd") + // .field("row", &t_cellupd::row) + // .field("column", &t_cellupd::column) + // .field("old_value", &t_cellupd::old_value) + // .field("new_value", &t_cellupd::new_value); + + /****************************************************************************** + * + * t_stepdelta + */ + // TODO + // value_object("t_stepdelta") + // .field("rows_changed", &t_stepdelta::rows_changed) + // .field("columns_changed", &t_stepdelta::columns_changed) + // .field("cells", &t_stepdelta::cells); + + /****************************************************************************** + * + * vector + */ + // TODO + // register_vector("std::vector"); + // register_vector("std::vector"); + // register_vector("std::vector"); + // register_vector("std::vector"); + // register_vector("std::vector"); + // register_vector("std::vector"); + // register_vector("std::vector"); + + /****************************************************************************** + * + * t_header + */ + py::enum_("t_header") + .value("HEADER_ROW", perspective::HEADER_ROW) + .value("HEADER_COLUMN", perspective::HEADER_COLUMN); + + /****************************************************************************** + * + * t_ctx_type + */ + py::enum_("t_ctx_type") + .value("ZERO_SIDED_CONTEXT", perspective::ZERO_SIDED_CONTEXT) + .value("ONE_SIDED_CONTEXT", perspective::ONE_SIDED_CONTEXT) + .value("TWO_SIDED_CONTEXT", perspective::TWO_SIDED_CONTEXT) + .value("GROUPED_ZERO_SIDED_CONTEXT", perspective::GROUPED_ZERO_SIDED_CONTEXT) + .value("GROUPED_PKEY_CONTEXT", perspective::GROUPED_PKEY_CONTEXT) + .value("GROUPED_COLUMNS_CONTEXT", perspective::GROUPED_COLUMNS_CONTEXT); + + /****************************************************************************** + * + * t_filter_op + */ + py::enum_("t_filter_op") + .value("FILTER_OP_LT", perspective::FILTER_OP_LT) + .value("FILTER_OP_LTEQ", perspective::FILTER_OP_LTEQ) + .value("FILTER_OP_GT", perspective::FILTER_OP_GT) + .value("FILTER_OP_GTEQ", perspective::FILTER_OP_GTEQ) + .value("FILTER_OP_EQ", perspective::FILTER_OP_EQ) + .value("FILTER_OP_NE", perspective::FILTER_OP_NE) + .value("FILTER_OP_BEGINS_WITH", perspective::FILTER_OP_BEGINS_WITH) + .value("FILTER_OP_ENDS_WITH", perspective::FILTER_OP_ENDS_WITH) + .value("FILTER_OP_CONTAINS", perspective::FILTER_OP_CONTAINS) + .value("FILTER_OP_OR", perspective::FILTER_OP_OR) + .value("FILTER_OP_IN", perspective::FILTER_OP_IN) + .value("FILTER_OP_NOT_IN", perspective::FILTER_OP_NOT_IN) + .value("FILTER_OP_AND", perspective::FILTER_OP_AND) + .value("FILTER_OP_IS_NAN", perspective::FILTER_OP_IS_NAN) + .value("FILTER_OP_IS_NOT_NAN", perspective::FILTER_OP_IS_NOT_NAN) + .value("FILTER_OP_IS_VALID", perspective::FILTER_OP_IS_VALID) + .value("FILTER_OP_IS_NOT_VALID", perspective::FILTER_OP_IS_NOT_VALID); + + /****************************************************************************** + * + * t_dtype + */ + py::enum_("t_dtype") + .value("NONE", perspective::DTYPE_NONE) + .value("INT64", perspective::DTYPE_INT64) + .value("INT32", perspective::DTYPE_INT32) + .value("INT16", perspective::DTYPE_INT16) + .value("INT8", perspective::DTYPE_INT8) + .value("UINT64", perspective::DTYPE_UINT64) + .value("UINT32", perspective::DTYPE_UINT32) + .value("UINT16", perspective::DTYPE_UINT16) + .value("UINT8", perspective::DTYPE_UINT8) + .value("FLOAT64", perspective::DTYPE_FLOAT64) + .value("FLOAT32", perspective::DTYPE_FLOAT32) + .value("BOOL", perspective::DTYPE_BOOL) + .value("TIME", perspective::DTYPE_TIME) + .value("DATE", perspective::DTYPE_DATE) + .value("ENUM", perspective::DTYPE_ENUM) + .value("OID", perspective::DTYPE_OID) + .value("PTR", perspective::DTYPE_PTR) + .value("F64PAIR", perspective::DTYPE_F64PAIR) + .value("USER_FIXED", perspective::DTYPE_USER_FIXED) + .value("STR", perspective::DTYPE_STR) + .value("USER_VLEN", perspective::DTYPE_USER_VLEN) + .value("LAST_VLEN", perspective::DTYPE_LAST_VLEN) + .value("LAST", perspective::DTYPE_LAST) + ; + + /****************************************************************************** + * + * t_aggtype + */ + py::enum_("t_aggtype") + .value("AGGTYPE_SUM", perspective::AGGTYPE_SUM) + .value("AGGTYPE_MUL", perspective::AGGTYPE_MUL) + .value("AGGTYPE_COUNT", perspective::AGGTYPE_COUNT) + .value("AGGTYPE_MEAN", perspective::AGGTYPE_MEAN) + .value("AGGTYPE_WEIGHTED_MEAN", perspective::AGGTYPE_WEIGHTED_MEAN) + .value("AGGTYPE_UNIQUE", perspective::AGGTYPE_UNIQUE) + .value("AGGTYPE_ANY", perspective::AGGTYPE_ANY) + .value("AGGTYPE_MEDIAN", perspective::AGGTYPE_MEDIAN) + .value("AGGTYPE_JOIN", perspective::AGGTYPE_JOIN) + .value("AGGTYPE_SCALED_DIV", perspective::AGGTYPE_SCALED_DIV) + .value("AGGTYPE_SCALED_ADD", perspective::AGGTYPE_SCALED_ADD) + .value("AGGTYPE_SCALED_MUL", perspective::AGGTYPE_SCALED_MUL) + .value("AGGTYPE_DOMINANT", perspective::AGGTYPE_DOMINANT) + .value("AGGTYPE_FIRST", perspective::AGGTYPE_FIRST) + .value("AGGTYPE_LAST", perspective::AGGTYPE_LAST) + .value("AGGTYPE_PY_AGG", perspective::AGGTYPE_PY_AGG) + .value("AGGTYPE_AND", perspective::AGGTYPE_AND) + .value("AGGTYPE_OR", perspective::AGGTYPE_OR) + .value("AGGTYPE_LAST_VALUE", perspective::AGGTYPE_LAST_VALUE) + .value("AGGTYPE_HIGH_WATER_MARK", perspective::AGGTYPE_HIGH_WATER_MARK) + .value("AGGTYPE_LOW_WATER_MARK", perspective::AGGTYPE_LOW_WATER_MARK) + .value("AGGTYPE_UDF_COMBINER", perspective::AGGTYPE_UDF_COMBINER) + .value("AGGTYPE_UDF_REDUCER", perspective::AGGTYPE_UDF_REDUCER) + .value("AGGTYPE_SUM_ABS", perspective::AGGTYPE_SUM_ABS) + .value("AGGTYPE_SUM_NOT_NULL", perspective::AGGTYPE_SUM_NOT_NULL) + .value("AGGTYPE_MEAN_BY_COUNT", perspective::AGGTYPE_MEAN_BY_COUNT) + .value("AGGTYPE_IDENTITY", perspective::AGGTYPE_IDENTITY) + .value("AGGTYPE_DISTINCT_COUNT", perspective::AGGTYPE_DISTINCT_COUNT) + .value("AGGTYPE_DISTINCT_LEAF", perspective::AGGTYPE_DISTINCT_LEAF) + .value("AGGTYPE_PCT_SUM_PARENT", perspective::AGGTYPE_PCT_SUM_PARENT) + .value("AGGTYPE_PCT_SUM_GRAND_TOTAL", perspective::AGGTYPE_PCT_SUM_GRAND_TOTAL); + + /****************************************************************************** + * + * t_totals + */ + py::enum_("t_totals") + .value("TOTALS_BEFORE", perspective::TOTALS_BEFORE) + .value("TOTALS_HIDDEN", perspective::TOTALS_HIDDEN) + .value("TOTALS_AFTER", perspective::TOTALS_AFTER); + + /****************************************************************************** + * + * assorted functions + */ + py::def("sort", sort); + py::def("make_table", make_table); + py::def("make_gnode", make_gnode); + py::def("clone_gnode_table", clone_gnode_table); + py::def("make_context_zero", make_context_zero); + py::def("make_context_one", make_context_one); + py::def("make_context_two", make_context_two); + // py::def("scalar_to_val", scalar_to_val); + // py::def("scalar_vec_to_val", scalar_vec_to_val); + py::def("table_add_computed_column", table_add_computed_column); + py::def("set_column_nth", set_column_nth); + // py::def("get_data_zero", get_data>); + // py::def("get_data_one", get_data>); + // py::def("get_data_two", get_data>); + // py::def("get_data_two_skip_headers", get_data_two_skip_headers); + // py::def("col_to_js_typed_array_zero", col_to_js_typed_array>); + // py::def("col_to_js_typed_array_one", col_to_js_typed_array>); + // py::def("col_to_js_typed_array_two", col_to_js_typed_array>); + + +/******************************************************************************/ +} -#endif #endif \ No newline at end of file diff --git a/python/perspective/src/python.cpp b/python/perspective/src/python.cpp index adb8b05a29..4d2d1fe420 100644 --- a/python/perspective/src/python.cpp +++ b/python/perspective/src/python.cpp @@ -7,10 +7,26 @@ * */ #ifdef PSP_ENABLE_PYTHON + +#include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include +namespace perspective { +namespace binding { perspective::t_schema* t_schema_init(py::list& columns, py::list& types) { @@ -165,4 +181,435 @@ np::ndarray _get_as_numpy(perspective::t_table& tbl, const std::string& colname_ + + + + + + +template +std::vector vecFromArray(T& arr){ + //TODO + std::vector ret; + return ret; +} + + +/****************************************************************************** + * + * Data Loading + */ +template <> +std::vector _get_sort(py::object j_sortby) { + // TODO + std::vector svec{}; + return svec; +} + +/** + * + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +template <> +std::vector +_get_fterms(t_schema schema, py::object j_filters) { + // TODO + std::vector fvec{}; + return fvec; +} + +/** + * + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +std::vector +_get_aggspecs(py::object j_aggs) { + // TODO + std::vector aggspecs; + return aggspecs; +} + +/** + * Converts a scalar value to its Python representation. + * + * Params + * ------ + * t_tscalar scalar + * + * Returns + * ------- + * py::object + */ +template <> +py::object scalar_to(const t_tscalar& scalar) { + if (!scalar.is_valid()) { + return py::object(); //None + } + switch (scalar.get_dtype()) { + case DTYPE_BOOL: { + if (scalar) { + return py::object(true); + } else { + return py::object(false); + } + } + case DTYPE_TIME: + case DTYPE_FLOAT64: + case DTYPE_FLOAT32: { + return py::object(scalar.to_double()); + } + case DTYPE_DATE: { + // TODO + // return t_date_to_jsdate(scalar.get()).call("getTime"); + } + case DTYPE_UINT8: + case DTYPE_UINT16: + case DTYPE_UINT32: + case DTYPE_INT8: + case DTYPE_INT16: + case DTYPE_INT32: { + return py::object(static_cast(scalar.to_int64())); + } + case DTYPE_UINT64: + case DTYPE_INT64: { + // This could potentially lose precision + return py::object(static_cast(scalar.to_int64())); + } + case DTYPE_NONE: { + return py::object(); //None + } + case DTYPE_STR: + default: { + std::wstring_convert converter("", L""); + return py::str(converter.from_bytes(scalar.to_string())); + } + } +} + + +/** + * Fills the table with data from Javascript. + * + * Params + * ------ + * tbl - pointer to the table object + * ocolnames - vector of column names + * accessor - the JS data accessor interface + * odt - vector of data types + * offset + * is_arrow - flag for arrow data + * + * Returns + * ------- + * + */ +void +_fill_data(t_table& tbl, std::vector ocolnames, py::object accessor, + std::vector odt, std::uint32_t offset, bool is_arrow) { + //TODO +} + +/****************************************************************************** + * + * Public + */ +template +void set_column_nth(t_column* col, t_uindex idx, T value) { + //TODO +} + +/** + * Helper function for computed columns + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +template +void table_add_computed_column(t_table& table, T computed_defs) { + //TODO +} + +/** + * DataAccessor + * + * parses and converts input data into a canonical format for + * interfacing with Perspective. + */ + +// Name parsing +std::vector +column_names(py::object data, std::int32_t format) { + //TODO + std::vector names; + return names; +} + +// Type inferrence for fill_col and data_types +t_dtype +infer_type(py::object x, py::object date_validator) { + //TODO + t_dtype t = t_dtype::DTYPE_STR; + return t; +} + +t_dtype +get_data_type(py::object data, std::int32_t format, std::string name, py::object date_validator) { + //TODO + return t_dtype::DTYPE_STR; +} + +std::vector +data_types(py::object data, std::int32_t format, std::vector names, py::object date_validator) { + //TODO + if (names.size() == 0) { + PSP_COMPLAIN_AND_ABORT("Cannot determine data types without column names!"); + } + std::vector types; + return types; +} + +/** + * Create a default gnode. + * + * Params + * ------ + * j_colnames - a JS Array of column names. + * j_dtypes - a JS Array of column types. + * + * Returns + * ------- + * A gnode. + */ +std::shared_ptr +make_gnode(const t_table& table) { + auto iscm = table.get_schema(); + + std::vector ocolnames(iscm.columns()); + std::vector odt(iscm.types()); + + if (iscm.has_column("psp_pkey")) { + t_uindex idx = iscm.get_colidx("psp_pkey"); + ocolnames.erase(ocolnames.begin() + idx); + odt.erase(odt.begin() + idx); + } + + if (iscm.has_column("psp_op")) { + t_uindex idx = iscm.get_colidx("psp_op"); + ocolnames.erase(ocolnames.begin() + idx); + odt.erase(odt.begin() + idx); + } + + t_schema oscm(ocolnames, odt); + + // Create a gnode + auto gnode = std::make_shared(oscm, iscm); + gnode->init(); + + return gnode; +} + + + +/** + * Create a populated table. + * + * Params + * ------ + * chunk - a JS object containing parsed data and associated metadata + * offset + * limit + * index + * is_delete - sets the table operation + * + * Returns + * ------- + * a populated table. + */ +template +std::shared_ptr +make_table(t_pool* pool, T gnode, T accessor, T computed, std::uint32_t offset, + std::uint32_t limit, std::string index, bool is_update, bool is_delete, bool is_arrow) { + + std::vector colnames; + std::vector dtypes; + // Create the table + t_table tbl(t_schema(colnames, dtypes)); + tbl.init(); + tbl.extend(0); + std::shared_ptr new_gnode; + return new_gnode; +} + +/** + * Copies the internal table from a gnode + * + * Params + * ------ + * + * Returns + * ------- + * A gnode. + */ +template +std::shared_ptr +clone_gnode_table(t_pool* pool, std::shared_ptr gnode, T computed) { + t_table* tbl = gnode->_get_pkeyed_table(); + table_add_computed_column(*tbl, computed); + std::shared_ptr new_gnode = make_gnode(*tbl); + pool->register_gnode(new_gnode.get()); + pool->send(new_gnode->get_id(), 0, *tbl); + pool->_process(); + return new_gnode; +} + +/** + * + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +template +std::shared_ptr +make_context_zero(t_schema schema, t_filter_op combiner, T j_filters, T j_columns, + T j_sortby, t_pool* pool, std::shared_ptr gnode, std::string name) { + auto columns = std::vector(); + auto fvec = _get_fterms(schema, j_filters); + auto svec = _get_sort(j_sortby); + auto cfg = t_config(columns, combiner, fvec); + auto ctx0 = std::make_shared(schema, cfg); + ctx0->init(); + ctx0->sort_by(svec); + pool->register_context(gnode->get_id(), name, ZERO_SIDED_CONTEXT, + reinterpret_cast(ctx0.get())); + return ctx0; +} + +/** + * + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +template +std::shared_ptr +make_context_one(t_schema schema, T j_pivots, t_filter_op combiner, T j_filters, T j_aggs, + T j_sortby, t_pool* pool, std::shared_ptr gnode, std::string name) { + auto fvec = _get_fterms(schema, j_filters); + auto aggspecs = _get_aggspecs(j_aggs); + auto pivots = vecFromArray(j_pivots); + auto svec = _get_sort(j_sortby); + + auto cfg = t_config(pivots, aggspecs, combiner, fvec); + auto ctx1 = std::make_shared(schema, cfg); + + ctx1->init(); + ctx1->sort_by(svec); + pool->register_context( + gnode->get_id(), name, ONE_SIDED_CONTEXT, reinterpret_cast(ctx1.get())); + return ctx1; +} + +/** + * + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +template +std::shared_ptr +make_context_two(t_schema schema, T j_rpivots, T j_cpivots, t_filter_op combiner, + T j_filters, T j_aggs, bool show_totals, t_pool* pool, std::shared_ptr gnode, + std::string name) { + auto fvec = _get_fterms(schema, j_filters); + auto aggspecs = _get_aggspecs(j_aggs); + auto rpivots = vecFromArray(j_rpivots); + auto cpivots = vecFromArray(j_cpivots); + t_totals total = show_totals ? TOTALS_BEFORE : TOTALS_HIDDEN; + + auto cfg = t_config(rpivots, cpivots, aggspecs, total, combiner, fvec); + auto ctx2 = std::make_shared(schema, cfg); + + ctx2->init(); + pool->register_context( + gnode->get_id(), name, TWO_SIDED_CONTEXT, reinterpret_cast(ctx2.get())); + return ctx2; +} + +template +void sort(std::shared_ptr ctx2, T j_sortby, T j_column_sortby) { + +} + +template <> +py::object get_column_data(std::shared_ptr table, std::string colname) { + py::list arr; + return arr; +} + +/** + * + * + * Params + * ------ + * + * + * Returns + * ------- + * + */ +template +py::object get_data(T ctx, std::uint32_t start_row, std::uint32_t end_row, std::uint32_t start_col, + std::uint32_t end_col) { + py::list arr; + return arr; +} + +template <> +py::object get_data_two_skip_headers(std::shared_ptr ctx, std::uint32_t depth, + std::uint32_t start_row, std::uint32_t end_row, std::uint32_t start_col, + std::uint32_t end_col) { + py::list arr; + return arr; +} + + + + +} +} + #endif \ No newline at end of file diff --git a/src/cpp/emscripten.cpp b/src/cpp/emscripten.cpp index 39a053a6de..a9a21868de 100644 --- a/src/cpp/emscripten.cpp +++ b/src/cpp/emscripten.cpp @@ -1476,10 +1476,18 @@ main(int argc, char** argv) { */ EMSCRIPTEN_BINDINGS(perspective) { + /****************************************************************************** + * + * t_column + */ class_("t_column") .smart_ptr>("shared_ptr") .function("set_scalar", &t_column::set_scalar); + /****************************************************************************** + * + * t_table + */ class_("t_table") .constructor() .smart_ptr>("shared_ptr") @@ -1488,11 +1496,19 @@ EMSCRIPTEN_BINDINGS(perspective) { .function( "size", reinterpret_cast(&t_table::size)); + /****************************************************************************** + * + * t_schema + */ class_("t_schema") .function&>( "columns", &t_schema::columns, allow_raw_pointers()) .function>("types", &t_schema::types, allow_raw_pointers()); + /****************************************************************************** + * + * t_gnode + */ class_("t_gnode") .constructor&, const std::vector&, const std::vector&>() @@ -1502,6 +1518,10 @@ EMSCRIPTEN_BINDINGS(perspective) { .function("get_tblschema", &t_gnode::get_tblschema) .function("get_table", &t_gnode::get_table, allow_raw_pointers()); + /****************************************************************************** + * + * t_ctx0 + */ class_("t_ctx0") .constructor() .smart_ptr>("shared_ptr") @@ -1537,6 +1557,10 @@ EMSCRIPTEN_BINDINGS(perspective) { .function("unity_get_column_expanded", &t_ctx0::unity_get_column_expanded) .function("unity_init_load_step_end", &t_ctx0::unity_init_load_step_end); + /****************************************************************************** + * + * t_ctx1 + */ class_("t_ctx1") .constructor() .smart_ptr>("shared_ptr") @@ -1574,6 +1598,10 @@ EMSCRIPTEN_BINDINGS(perspective) { .function("unity_get_column_expanded", &t_ctx1::unity_get_column_expanded) .function("unity_init_load_step_end", &t_ctx1::unity_init_load_step_end); + /****************************************************************************** + * + * t_ctx2 + */ class_("t_ctx2") .constructor() .smart_ptr>("shared_ptr") @@ -1609,12 +1637,15 @@ EMSCRIPTEN_BINDINGS(perspective) { .function("unity_get_row_count", &t_ctx2::unity_get_row_count) .function("unity_get_row_expanded", &t_ctx2::unity_get_row_expanded) .function("unity_get_column_expanded", &t_ctx2::unity_get_column_expanded) - .function("unity_init_load_step_end", &t_ctx2::unity_init_load_step_end) .function("get_totals", &t_ctx2::get_totals) .function>( "get_column_path_userspace", &t_ctx2::get_column_path_userspace) .function("unity_init_load_step_end", &t_ctx2::unity_init_load_step_end); + /****************************************************************************** + * + * t_pool + */ class_("t_pool") .constructor<>() .smart_ptr>("shared_ptr") @@ -1623,7 +1654,7 @@ EMSCRIPTEN_BINDINGS(perspective) { .function("send", &t_pool::send) .function("epoch", &t_pool::epoch) .function("unregister_gnode", &t_pool::unregister_gnode) - .function("set_update_delegate", &t_pool::set_update_delegate) + // .function("set_update_delegate", &t_pool::set_update_delegate) .function("register_context", &t_pool::register_context) .function("unregister_context", &t_pool::unregister_context) .function>( @@ -1632,25 +1663,49 @@ EMSCRIPTEN_BINDINGS(perspective) { "get_gnodes_last_updated", &t_pool::get_gnodes_last_updated) .function("get_gnode", &t_pool::get_gnode, allow_raw_pointers()); + /****************************************************************************** + * + * t_aggspec + */ class_("t_aggspec").function("name", &t_aggspec::name); + /****************************************************************************** + * + * t_tscalar + */ class_("t_tscalar"); + /****************************************************************************** + * + * t_updctx + */ value_object("t_updctx") .field("gnode_id", &t_updctx::m_gnode_id) .field("ctx_name", &t_updctx::m_ctx); + /****************************************************************************** + * + * t_cellupd + */ value_object("t_cellupd") .field("row", &t_cellupd::row) .field("column", &t_cellupd::column) .field("old_value", &t_cellupd::old_value) .field("new_value", &t_cellupd::new_value); + /****************************************************************************** + * + * t_stepdelta + */ value_object("t_stepdelta") .field("rows_changed", &t_stepdelta::rows_changed) .field("columns_changed", &t_stepdelta::columns_changed) .field("cells", &t_stepdelta::cells); + /****************************************************************************** + * + * vector + */ register_vector("std::vector"); register_vector("std::vector"); register_vector("std::vector"); @@ -1659,10 +1714,18 @@ EMSCRIPTEN_BINDINGS(perspective) { register_vector("std::vector"); register_vector("std::vector"); + /****************************************************************************** + * + * t_header + */ enum_("t_header") .value("HEADER_ROW", HEADER_ROW) .value("HEADER_COLUMN", HEADER_COLUMN); + /****************************************************************************** + * + * t_ctx_type + */ enum_("t_ctx_type") .value("ZERO_SIDED_CONTEXT", ZERO_SIDED_CONTEXT) .value("ONE_SIDED_CONTEXT", ONE_SIDED_CONTEXT) @@ -1671,6 +1734,10 @@ EMSCRIPTEN_BINDINGS(perspective) { .value("GROUPED_PKEY_CONTEXT", GROUPED_PKEY_CONTEXT) .value("GROUPED_COLUMNS_CONTEXT", GROUPED_COLUMNS_CONTEXT); + /****************************************************************************** + * + * t_filter_op + */ enum_("t_filter_op") .value("FILTER_OP_LT", FILTER_OP_LT) .value("FILTER_OP_LTEQ", FILTER_OP_LTEQ) @@ -1690,6 +1757,10 @@ EMSCRIPTEN_BINDINGS(perspective) { .value("FILTER_OP_IS_VALID", FILTER_OP_IS_VALID) .value("FILTER_OP_IS_NOT_VALID", FILTER_OP_IS_NOT_VALID); + /****************************************************************************** + * + * t_dtype + */ enum_("t_dtype") .value("DTYPE_NONE", DTYPE_NONE) .value("DTYPE_INT64", DTYPE_INT64) @@ -1715,6 +1786,10 @@ EMSCRIPTEN_BINDINGS(perspective) { .value("DTYPE_LAST_VLEN", DTYPE_LAST_VLEN) .value("DTYPE_LAST", DTYPE_LAST); + /****************************************************************************** + * + * t_aggtype + */ enum_("t_aggtype") .value("AGGTYPE_SUM", AGGTYPE_SUM) .value("AGGTYPE_MUL", AGGTYPE_MUL) @@ -1748,11 +1823,19 @@ EMSCRIPTEN_BINDINGS(perspective) { .value("AGGTYPE_PCT_SUM_PARENT", AGGTYPE_PCT_SUM_PARENT) .value("AGGTYPE_PCT_SUM_GRAND_TOTAL", AGGTYPE_PCT_SUM_GRAND_TOTAL); + /****************************************************************************** + * + * t_totals + */ enum_("t_totals") .value("TOTALS_BEFORE", TOTALS_BEFORE) .value("TOTALS_HIDDEN", TOTALS_HIDDEN) .value("TOTALS_AFTER", TOTALS_AFTER); + /****************************************************************************** + * + * assorted functions + */ function("sort", &sort); function("make_table", &make_table, allow_raw_pointers()); function("make_gnode", &make_gnode); diff --git a/src/cpp/pool.cpp b/src/cpp/pool.cpp index 333c97b797..f3e9154687 100644 --- a/src/cpp/pool.cpp +++ b/src/cpp/pool.cpp @@ -174,7 +174,6 @@ void t_pool::set_update_delegate(emscripten::val ud) { m_update_delegate = ud; } - #endif #ifdef PSP_ENABLE_WASM diff --git a/src/include/perspective/pool.h b/src/include/perspective/pool.h index 06b0da8010..095f661c57 100644 --- a/src/include/perspective/pool.h +++ b/src/include/perspective/pool.h @@ -48,8 +48,7 @@ class PERSPECTIVE_EXPORT t_pool { t_uindex gnode_id, const std::string& name, t_ctx_type type, std::int64_t ptr); void py_notify_userspace(); #endif - t_pool(const t_pool& p) = delete; - t_pool& operator=(const t_pool& p) = delete; + PSP_NON_COPYABLE(t_pool); ~t_pool(); From 526b0878dce8780a62dbee3f4ca55ed76ea17746 Mon Sep 17 00:00:00 2001 From: Tim Paine Date: Sat, 19 Jan 2019 19:50:30 -1000 Subject: [PATCH 07/11] print in background so travis doesnt complain --- scripts/test.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/test.js b/scripts/test.js index e21e49d674..fd67ff8fbc 100644 --- a/scripts/test.js +++ b/scripts/test.js @@ -93,7 +93,7 @@ try { } else { if (args.indexOf("--quiet") > -1) { console.log("-- Running test suite in quiet mode"); - execSync(`output=$(${jest()}); ret=$?; echo "\${output}"; exit $ret`, {stdio: "inherit"}); + execSync(`p=1; while [ $p -eq 1 ] ; do printf "." && sleep 5; done & output=$(${jest()}); ret=$?; p=0; echo "\${output}"; exit $ret`, {stdio: "inherit"}); } else if (process.env.PACKAGE) { console.log("-- Running test suite in individual mode"); let cmd = "node_modules/.bin/lerna exec --concurrency 1 --no-bail"; From b3ce427cf223e492437bad4ffa5a01e9dcf3530d Mon Sep 17 00:00:00 2001 From: Tim Paine Date: Sun, 20 Jan 2019 09:28:11 -1000 Subject: [PATCH 08/11] Revert "print in background so travis doesnt complain" This reverts commit 526b0878dce8780a62dbee3f4ca55ed76ea17746. --- scripts/test.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/test.js b/scripts/test.js index fd67ff8fbc..e21e49d674 100644 --- a/scripts/test.js +++ b/scripts/test.js @@ -93,7 +93,7 @@ try { } else { if (args.indexOf("--quiet") > -1) { console.log("-- Running test suite in quiet mode"); - execSync(`p=1; while [ $p -eq 1 ] ; do printf "." && sleep 5; done & output=$(${jest()}); ret=$?; p=0; echo "\${output}"; exit $ret`, {stdio: "inherit"}); + execSync(`output=$(${jest()}); ret=$?; echo "\${output}"; exit $ret`, {stdio: "inherit"}); } else if (process.env.PACKAGE) { console.log("-- Running test suite in individual mode"); let cmd = "node_modules/.bin/lerna exec --concurrency 1 --no-bail"; From a0dde07243be30c2c07fc2f58e8f2c9222f4e7e5 Mon Sep 17 00:00:00 2001 From: Tim Paine Date: Sun, 20 Jan 2019 09:36:33 -1000 Subject: [PATCH 09/11] accidentally dropped a function --- src/cpp/emscripten.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cpp/emscripten.cpp b/src/cpp/emscripten.cpp index a9a21868de..b6bf7ee995 100644 --- a/src/cpp/emscripten.cpp +++ b/src/cpp/emscripten.cpp @@ -1654,7 +1654,7 @@ EMSCRIPTEN_BINDINGS(perspective) { .function("send", &t_pool::send) .function("epoch", &t_pool::epoch) .function("unregister_gnode", &t_pool::unregister_gnode) - // .function("set_update_delegate", &t_pool::set_update_delegate) + .function("set_update_delegate", &t_pool::set_update_delegate) .function("register_context", &t_pool::register_context) .function("unregister_context", &t_pool::unregister_context) .function>( From 6bedba49fa6d41cc9e1d234d6e0c1544748050d4 Mon Sep 17 00:00:00 2001 From: Tim Paine Date: Sun, 20 Jan 2019 10:25:35 -1000 Subject: [PATCH 10/11] include binding in build behind ifdef --- CMakeLists.txt | 9 +++++---- src/cpp/binding.cpp | 28 ---------------------------- src/include/perspective/binding.h | 4 ++++ 3 files changed, 9 insertions(+), 32 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9229eadb23..84188c0155 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -221,6 +221,7 @@ set (SOURCE_FILES src/cpp/base_impl_osx.cpp src/cpp/base_impl_wasm.cpp src/cpp/base_impl_win.cpp + src/cpp/binding.cpp src/cpp/build_filter.cpp #src/cpp/calc_agg_dtype.cpp src/cpp/column.cpp @@ -292,13 +293,13 @@ if (PSP_WASM_BUILD) add_library(psp ${SOURCE_FILES}) set_target_properties(psp PROPERTIES COMPILE_FLAGS "${ASYNC_MODE_FLAGS}") - add_executable(perspective.async src/cpp/binding.cpp src/cpp/emscripten.cpp) + add_executable(perspective.async src/cpp/emscripten.cpp) target_link_libraries(perspective.async psp "${ASYNC_MODE_FLAGS}") set_target_properties(perspective.async PROPERTIES COMPILE_FLAGS "${ASYNC_MODE_FLAGS}") set_target_properties(perspective.async PROPERTIES RUNTIME_OUTPUT_DIRECTORY "./build/") set_target_properties(perspective.async PROPERTIES OUTPUT_NAME "psp.async") - add_executable(perspective.sync src/cpp/binding.cpp src/cpp/emscripten.cpp) + add_executable(perspective.sync src/cpp/emscripten.cpp) target_link_libraries(perspective.sync psp "${SYNC_MODE_FLAGS}") set_target_properties(perspective.sync PROPERTIES COMPILE_FLAGS "${SYNC_MODE_FLAGS}") set_target_properties(perspective.sync PROPERTIES RUNTIME_OUTPUT_DIRECTORY "./build/") @@ -306,7 +307,7 @@ if (PSP_WASM_BUILD) add_dependencies(perspective.sync perspective.async) if (NOT CMAKE_BUILD_TYPE_LOWER STREQUAL debug) - add_executable(perspective.asm src/cpp/binding.cpp src/cpp/emscripten.cpp) + add_executable(perspective.asm src/cpp/emscripten.cpp) target_link_libraries(perspective.asm psp "${ASMJS_MODE_FLAGS}") set_target_properties(perspective.asm PROPERTIES COMPILE_FLAGS "${ASMJS_MODE_FLAGS}") set_target_properties(perspective.asm PROPERTIES RUNTIME_OUTPUT_DIRECTORY "./build/") @@ -324,7 +325,7 @@ else() target_link_libraries(psp ${Boost_LIBRARIES} ${PYTHON_LIBRARIES}) set(CMAKE_SHARED_LIBRARY_SUFFIX .so) - add_library(binding SHARED src/cpp/binding.cpp ${CMAKE_SOURCE_DIR}/python/perspective/src/python.cpp) + add_library(binding SHARED ${CMAKE_SOURCE_DIR}/python/perspective/src/python.cpp) target_link_libraries(binding psp) target_link_libraries(binding tbb) target_link_libraries(binding ${BOOST_PYTHON}) diff --git a/src/cpp/binding.cpp b/src/cpp/binding.cpp index 7db9d407f8..11c37c0add 100644 --- a/src/cpp/binding.cpp +++ b/src/cpp/binding.cpp @@ -6,36 +6,8 @@ * the Apache License 2.0. The full license can be found in the LICENSE file. * */ - -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef PSP_ENABLE_WASM -#include -#include -#include -#include -using namespace emscripten; -#endif - -#ifdef PSP_ENABLE_PYTHON - -#endif - using namespace perspective; - namespace perspective { namespace binding { diff --git a/src/include/perspective/binding.h b/src/include/perspective/binding.h index 16107b988a..7a36ee5a4c 100644 --- a/src/include/perspective/binding.h +++ b/src/include/perspective/binding.h @@ -8,6 +8,8 @@ */ #pragma once +#if defined(PSP_ENABLE_WASM) || defined(PSP_ENABLE_PYTHON) + #include #include #include @@ -324,3 +326,5 @@ T get_data_two_skip_headers(std::shared_ptr ctx, std::uint32_t depth, } } + +#endif \ No newline at end of file From cc214cbe8c4edccec774b9a486663aa38bf45310 Mon Sep 17 00:00:00 2001 From: Tim Paine Date: Sun, 20 Jan 2019 10:55:33 -1000 Subject: [PATCH 11/11] include base and first --- src/cpp/binding.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cpp/binding.cpp b/src/cpp/binding.cpp index 11c37c0add..249ff659a5 100644 --- a/src/cpp/binding.cpp +++ b/src/cpp/binding.cpp @@ -6,6 +6,8 @@ * the Apache License 2.0. The full license can be found in the LICENSE file. * */ +#include +#include #include using namespace perspective; namespace perspective {