From f7d65bf2ef422a2a84c1c58697447a0fbb104b00 Mon Sep 17 00:00:00 2001 From: Jun Tan Date: Mon, 12 Oct 2020 18:15:33 -0400 Subject: [PATCH 1/5] Add and implement unit context in JS Add unit context - ctx0 with no configuration at all that reads straight from gstate WIP: use unit ctx in JS, indexed updates/removes still broken WIP: fix JS tests WIP: get_pkeys no longer push_back --- cpp/perspective/CMakeLists.txt | 1 + cpp/perspective/src/cpp/context_handle.cpp | 3 + cpp/perspective/src/cpp/context_unit.cpp | 407 ++++++++++++++++++ cpp/perspective/src/cpp/data_slice.cpp | 1 + cpp/perspective/src/cpp/emscripten.cpp | 74 ++++ cpp/perspective/src/cpp/gnode.cpp | 45 +- cpp/perspective/src/cpp/gnode_state.cpp | 70 ++- cpp/perspective/src/cpp/view.cpp | 91 +++- cpp/perspective/src/cpp/view_config.cpp | 15 + .../src/include/perspective/base.h | 1 + .../src/include/perspective/binding.h | 1 + .../src/include/perspective/config.h | 5 +- .../src/include/perspective/context_unit.h | 148 +++++++ .../src/include/perspective/data_slice.h | 1 + .../src/include/perspective/gnode.h | 2 + .../src/include/perspective/gnode_state.h | 51 ++- .../src/include/perspective/view.h | 1 + .../src/include/perspective/view_config.h | 13 + .../bench/perspective.benchmark.js | 36 ++ packages/perspective/src/js/perspective.js | 55 ++- packages/perspective/test/js/to_format.js | 168 ++++++++ scripts/build_js.js | 2 +- 22 files changed, 1151 insertions(+), 40 deletions(-) create mode 100644 cpp/perspective/src/cpp/context_unit.cpp create mode 100644 cpp/perspective/src/include/perspective/context_unit.h diff --git a/cpp/perspective/CMakeLists.txt b/cpp/perspective/CMakeLists.txt index 2a51a69192..8ddafe236e 100644 --- a/cpp/perspective/CMakeLists.txt +++ b/cpp/perspective/CMakeLists.txt @@ -493,6 +493,7 @@ set (SOURCE_FILES ${PSP_CPP_SRC}/src/cpp/context_one.cpp ${PSP_CPP_SRC}/src/cpp/context_two.cpp ${PSP_CPP_SRC}/src/cpp/context_zero.cpp + ${PSP_CPP_SRC}/src/cpp/context_unit.cpp ${PSP_CPP_SRC}/src/cpp/custom_column.cpp ${PSP_CPP_SRC}/src/cpp/data.cpp ${PSP_CPP_SRC}/src/cpp/data_slice.cpp diff --git a/cpp/perspective/src/cpp/context_handle.cpp b/cpp/perspective/src/cpp/context_handle.cpp index 4097c237e3..91edc7a1dd 100644 --- a/cpp/perspective/src/cpp/context_handle.cpp +++ b/cpp/perspective/src/cpp/context_handle.cpp @@ -32,6 +32,9 @@ t_ctx_handle::get_type_descr() const { case ZERO_SIDED_CONTEXT: { return "ZERO_SIDED_CONTEXT"; } break; + case UNIT_CONTEXT: { + return "UNIT_CONTEXT"; + } break; case GROUPED_PKEY_CONTEXT: { return "GROUPED_PKEY_CONTEXT"; } break; diff --git a/cpp/perspective/src/cpp/context_unit.cpp b/cpp/perspective/src/cpp/context_unit.cpp new file mode 100644 index 0000000000..d03b04ec3b --- /dev/null +++ b/cpp/perspective/src/cpp/context_unit.cpp @@ -0,0 +1,407 @@ +/****************************************************************************** + * + * Copyright (c) 2017, the Perspective Authors. + * + * This file is part of the Perspective library, distributed under the terms of + * the Apache License 2.0. The full license can be found in the LICENSE file. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace perspective { + +t_ctxunit::t_ctxunit() {} + +t_ctxunit::t_ctxunit(const t_schema& schema, const t_config& config) + : t_ctxbase(schema, config) + , m_has_delta(false) +{} + +t_ctxunit::~t_ctxunit() {} + +void +t_ctxunit::init() { + m_init = true; +} + + +std::string +t_ctxunit::repr() const { + std::stringstream ss; + ss << "t_ctxunit<" << this << ">"; + return ss.str(); +} + +void +t_ctxunit::step_begin() { + if (!m_init) + return; + + m_delta_pkeys.clear(); + m_rows_changed = false; + m_columns_changed = false; +} + +void +t_ctxunit::step_end() {} + +t_index +t_ctxunit::get_row_count() const { + return m_gstate->num_rows(); +} + +t_index +t_ctxunit::get_column_count() const { + return m_config.get_num_columns(); +} + +std::vector +t_ctxunit::unity_get_row_path(t_uindex idx) const { + return std::vector(mktscalar(idx)); +} + +std::vector +t_ctxunit::unity_get_column_path(t_uindex idx) const { + return std::vector(); +} + +t_uindex +t_ctxunit::unity_get_row_depth(t_uindex ridx) const { + return 0; +} + +t_uindex +t_ctxunit::unity_get_column_depth(t_uindex cidx) const { + return 0; +} + +std::vector +t_ctxunit::unity_get_column_names() const { + return get_column_names(); +} + +t_uindex +t_ctxunit::unity_get_column_count() const { + return get_column_count(); +} + +t_uindex +t_ctxunit::unity_get_row_count() const { + return get_row_count(); +} + +bool +t_ctxunit::unity_get_row_expanded(t_uindex idx) const { + return false; +} + +bool +t_ctxunit::unity_get_column_expanded(t_uindex idx) const { + return false; +} + +/** + * @brief Given a start/end row and column index, return the underlying data + * for the requested subset. + * + * @param start_row + * @param end_row + * @param start_col + * @param end_col + * @return std::vector + */ +std::vector +t_ctxunit::get_data(t_index start_row, t_index end_row, t_index start_col, t_index end_col) const { + t_uindex ctx_nrows = get_row_count(); + t_uindex ctx_ncols = get_column_count(); + + auto ext = sanitize_get_data_extents( + ctx_nrows, ctx_ncols, start_row, end_row, start_col, end_col); + + t_index num_rows = ext.m_erow - ext.m_srow; + t_index stride = ext.m_ecol - ext.m_scol; + std::vector values(num_rows * stride); + + auto none = mknone(); + + for (t_index cidx = ext.m_scol; cidx < ext.m_ecol; ++cidx) { + const std::string& colname = m_config.col_at(cidx); + + std::vector out_data(num_rows); + + m_gstate->read_column(colname, start_row, end_row, out_data); + + for (t_index ridx = ext.m_srow; ridx < ext.m_erow; ++ridx) { + auto v = out_data[ridx - ext.m_srow]; + + // todo: fix null handling + if (!v.is_valid()) + v.set(none); + + values[(ridx - ext.m_srow) * stride + (cidx - ext.m_scol)] = v; + } + } + + return values; +} + +/** + * @brief Given a vector of row indices, which may not be contiguous, return the underlying data + * for these rows. + * + * @param rows a vector of row indices + * @return std::vector a vector of scalars containing the underlying data + */ +std::vector +t_ctxunit::get_data(const std::vector& rows) const { + t_uindex stride = get_column_count(); + std::vector values(rows.size() * stride); + + auto none = mknone(); + + for (t_uindex cidx = 0; cidx < stride; ++cidx) { + std::vector out_data(rows.size()); + m_gstate->read_column(m_config.col_at(cidx), rows, out_data); + + for (t_uindex ridx = 0; ridx < rows.size(); ++ridx) { + auto v = out_data[ridx]; + + if (!v.is_valid()) + v.set(none); + + values[(ridx) * stride + (cidx)] = v; + } + } + + return values; +} + +std::vector +t_ctxunit::get_data(const std::vector& pkeys) const { + t_uindex stride = get_column_count(); + std::vector values(pkeys.size() * stride); + + auto none = mknone(); + + for (t_uindex cidx = 0; cidx < stride; ++cidx) { + std::vector out_data(pkeys.size()); + m_gstate->read_column(m_config.col_at(cidx), pkeys, out_data); + + for (t_uindex ridx = 0; ridx < pkeys.size(); ++ridx) { + auto v = out_data[ridx]; + + if (!v.is_valid()) + v.set(none); + + values[(ridx) * stride + (cidx)] = v; + } + } + + return values; +} + +t_tscalar +t_ctxunit::get_column_name(t_index idx) { + std::string empty(""); + + if (idx >= get_column_count()) + return m_symtable.get_interned_tscalar(empty.c_str()); + + return m_symtable.get_interned_tscalar(m_config.col_at(idx).c_str()); +} + +std::vector +t_ctxunit::get_pkeys(const std::vector>& cells) const { + // Validate cells + t_index num_rows = get_row_count(); + + for (t_index idx = 0, loop_end = cells.size(); idx < loop_end; ++idx) { + t_index ridx = cells[idx].first; + if (ridx >= num_rows) + return {}; + } + + std::set all_rows; + + for (t_index idx = 0, loop_end = cells.size(); idx < loop_end; ++idx) { + all_rows.insert(cells[idx].first); + } + + std::shared_ptr master_table = m_gstate->get_table(); + std::shared_ptr pkey_sptr = master_table->get_const_column("psp_pkey"); + + std::vector rval(all_rows.size()); + + t_uindex i = 0; + for (auto ridx : all_rows) { + rval[i] = pkey_sptr->get_scalar(ridx); + i++; + } + + return rval; +} + +/** + * @brief Returns a `t_rowdelta` struct containing data from updated rows + * and the updated row indices. + * + * @return t_rowdelta + */ +t_rowdelta +t_ctxunit::get_row_delta() { + bool rows_changed = m_rows_changed; + tsl::hopscotch_set pkeys = get_delta_pkeys(); + std::vector pkey_vector(pkeys.begin(), pkeys.end()); + + // Sort pkeys - they will always be integers >= 0, as the table has + // no index set. + std::sort(pkey_vector.begin(), pkey_vector.end()); + + std::vector data = get_data(pkey_vector); + t_rowdelta rval(rows_changed, pkey_vector.size(), data); + clear_deltas(); + + return rval; +} + +const tsl::hopscotch_set& +t_ctxunit::get_delta_pkeys() const { + return m_delta_pkeys; +} + +std::vector +t_ctxunit::get_column_names() const { + return m_schema.columns(); +} + +void +t_ctxunit::reset() { + m_has_delta = false; +} + +bool +t_ctxunit::get_deltas_enabled() const { + return true; +} + +void +t_ctxunit::set_deltas_enabled(bool enabled_state) {} + +t_index +t_ctxunit::sidedness() const { + return 0; +} + +/** + * @brief Notify the context with new data when the `t_gstate` master table is + * not empty, and being updated with new data. + * + * @param flattened + * @param delta + * @param prev + * @param curr + * @param transitions + * @param existed + */ +void +t_ctxunit::notify(const t_data_table& flattened, const t_data_table& delta, + const t_data_table& prev, const t_data_table& curr, const t_data_table& transitions, + const t_data_table& existed) { + t_uindex nrecs = flattened.size(); + + std::shared_ptr pkey_sptr = flattened.get_const_column("psp_pkey"); + std::shared_ptr op_sptr = flattened.get_const_column("psp_op"); + const t_column* pkey_col = pkey_sptr.get(); + const t_column* op_col = op_sptr.get(); + + bool delete_encountered = false; + + // Context does not have filters applied + for (t_uindex idx = 0; idx < nrecs; ++idx) { + t_tscalar pkey = m_symtable.get_interned_tscalar(pkey_col->get_scalar(idx)); + std::uint8_t op_ = *(op_col->get_nth(idx)); + t_op op = static_cast(op_); + + switch (op) { + case OP_INSERT: {} + break; + case OP_DELETE: { + delete_encountered = true; + } break; + default: { PSP_COMPLAIN_AND_ABORT("Unexpected OP"); } break; + } + + // add the pkey for row delta + add_delta_pkey(pkey); + } + + m_has_delta = m_delta_pkeys.size() > 0 || delete_encountered; +} + +/** + * @brief Notify the context with new data after the `t_gstate`'s master table + * has been updated for the first time with data. + * + * @param flattened + */ +void +t_ctxunit::notify(const t_data_table& flattened) { + t_uindex nrecs = flattened.size(); + std::shared_ptr pkey_sptr = flattened.get_const_column("psp_pkey"); + const t_column* pkey_col = pkey_sptr.get(); + + m_has_delta = true; + + for (t_uindex idx = 0; idx < nrecs; ++idx) { + t_tscalar pkey = m_symtable.get_interned_tscalar(pkey_col->get_scalar(idx)); + + // Add primary key to track row delta + add_delta_pkey(pkey); + } +} + +/** + * @brief Mark a primary key as updated by adding it to the tracking set. + * + * @param pkey + */ +void +t_ctxunit::add_delta_pkey(t_tscalar pkey) { + m_delta_pkeys.insert(pkey); +} + +bool +t_ctxunit::has_deltas() const { + return m_has_delta; +} + +void +t_ctxunit::pprint() const {} + +t_dtype +t_ctxunit::get_column_dtype(t_uindex idx) const { + if (idx >= static_cast(get_column_count())) + return DTYPE_NONE; + + auto cname = m_config.col_at(idx); + + if (!m_schema.has_column(cname)) + return DTYPE_NONE; + + return m_schema.get_dtype(cname); +} + +void +t_ctxunit::clear_deltas() { + m_has_delta = false; +} + +} // end namespace perspective diff --git a/cpp/perspective/src/cpp/data_slice.cpp b/cpp/perspective/src/cpp/data_slice.cpp index 51b77cef99..07dd3bff23 100644 --- a/cpp/perspective/src/cpp/data_slice.cpp +++ b/cpp/perspective/src/cpp/data_slice.cpp @@ -176,6 +176,7 @@ t_data_slice::get_slice_idx(t_uindex ridx, t_uindex cidx) const { } // Explicitly instantiate data slice for each context +template class t_data_slice; template class t_data_slice; template class t_data_slice; template class t_data_slice; diff --git a/cpp/perspective/src/cpp/emscripten.cpp b/cpp/perspective/src/cpp/emscripten.cpp index 10a799a961..18a350c530 100644 --- a/cpp/perspective/src/cpp/emscripten.cpp +++ b/cpp/perspective/src/cpp/emscripten.cpp @@ -1511,6 +1511,31 @@ namespace binding { * * Context API */ + template <> + std::shared_ptr + make_context(std::shared_ptr table, std::shared_ptr schema, + std::shared_ptr view_config, const std::string& name) { + auto columns = view_config->get_columns(); + auto filter_op = view_config->get_filter_op(); + auto fterm = view_config->get_fterm(); + auto sortspec = view_config->get_sortspec(); + auto computed_columns = view_config->get_computed_columns(); + + auto cfg = t_config(columns, fterm, filter_op, computed_columns); + auto ctx_unit = std::make_shared(*(schema.get()), cfg); + ctx_unit->init(); + + auto pool = table->get_pool(); + auto gnode = table->get_gnode(); + + pool->register_context( + gnode->get_id(), + name, + UNIT_CONTEXT, + reinterpret_cast(ctx_unit.get())); + + return ctx_unit; + } template <> std::shared_ptr @@ -1763,6 +1788,34 @@ EMSCRIPTEN_BINDINGS(perspective) { */ // Bind a View for each context type + class_>("View_ctxunit") + .constructor< + std::shared_ptr
, + std::shared_ptr, + const std::string&, + const std::string&, + std::shared_ptr>() + .smart_ptr>>("shared_ptr") + .function("sides", &View::sides) + .function("num_rows", &View::num_rows) + .function("num_columns", &View::num_columns) + .function("get_row_expanded", &View::get_row_expanded) + .function("schema", &View::schema) + .function("computed_schema", &View::computed_schema) + .function("column_names", &View::column_names) + .function("column_paths", &View::column_paths) + .function("_get_deltas_enabled", &View::_get_deltas_enabled) + .function("_set_deltas_enabled", &View::_set_deltas_enabled) + .function("get_context", &View::get_context, allow_raw_pointers()) + .function("get_row_pivots", &View::get_row_pivots) + .function("get_column_pivots", &View::get_column_pivots) + .function("get_aggregates", &View::get_aggregates) + .function("get_filter", &View::get_filter) + .function("get_sort", &View::get_sort) + .function("get_step_delta", &View::get_step_delta) + .function("get_column_dtype", &View::get_column_dtype) + .function("is_column_only", &View::is_column_only); + class_>("View_ctx0") .constructor< std::shared_ptr
, @@ -1903,6 +1956,15 @@ EMSCRIPTEN_BINDINGS(perspective) { * * t_data_slice */ + class_>("t_data_slice_ctxunit") + .smart_ptr>>("shared_ptr>>") + .function( + "get_column_slice", &t_data_slice::get_column_slice) + .function("get_slice", &t_data_slice::get_slice) + .function("get_pkeys", &t_data_slice::get_pkeys) + .function( + "get_column_names", &t_data_slice::get_column_names); + class_>("t_data_slice_ctx0") .smart_ptr>>("shared_ptr>>") .function( @@ -1931,6 +1993,13 @@ EMSCRIPTEN_BINDINGS(perspective) { .function( "get_column_names", &t_data_slice::get_column_names) .function("get_row_path", &t_data_slice::get_row_path); + + + /****************************************************************************** + * + * t_ctxunit + */ + class_("t_ctxunit").smart_ptr>("shared_ptr"); /****************************************************************************** * @@ -2115,18 +2184,23 @@ EMSCRIPTEN_BINDINGS(perspective) { */ function("make_table", &make_table); function("col_to_js_typed_array", &col_to_js_typed_array); + function("make_view_unit", &make_view); function("make_view_zero", &make_view); function("make_view_one", &make_view); function("make_view_two", &make_view); + function("get_data_slice_unit", &get_data_slice, allow_raw_pointers()); + function("get_from_data_slice_unit", &get_from_data_slice, allow_raw_pointers()); function("get_data_slice_zero", &get_data_slice, allow_raw_pointers()); function("get_from_data_slice_zero", &get_from_data_slice, allow_raw_pointers()); function("get_data_slice_one", &get_data_slice, allow_raw_pointers()); function("get_from_data_slice_one", &get_from_data_slice, allow_raw_pointers()); function("get_data_slice_two", &get_data_slice, allow_raw_pointers()); function("get_from_data_slice_two", &get_from_data_slice, allow_raw_pointers()); + function("to_arrow_unit", &to_arrow); function("to_arrow_zero", &to_arrow); function("to_arrow_one", &to_arrow); function("to_arrow_two", &to_arrow); + function("get_row_delta_unit", &get_row_delta); function("get_row_delta_zero", &get_row_delta); function("get_row_delta_one", &get_row_delta); function("get_row_delta_two", &get_row_delta); diff --git a/cpp/perspective/src/cpp/gnode.cpp b/cpp/perspective/src/cpp/gnode.cpp index 4ddb751ff9..f59962d07c 100644 --- a/cpp/perspective/src/cpp/gnode.cpp +++ b/cpp/perspective/src/cpp/gnode.cpp @@ -8,9 +8,10 @@ */ #include +#include +#include #include #include -#include #include #include #include @@ -717,6 +718,11 @@ t_gnode::_update_contexts_from_state(std::shared_ptr tbl) { ctx->reset(); update_context_from_state(ctx, tbl); } break; + case UNIT_CONTEXT: { + auto ctx = static_cast(ctxh.m_ctx); + ctx->reset(); + update_context_from_state(ctx, tbl); + } break; case GROUPED_PKEY_CONTEXT: { auto ctx = static_cast(ctxh.m_ctx); ctx->reset(); @@ -750,6 +756,10 @@ t_gnode::get_registered_contexts() const { auto ctx = static_cast(ctxh.m_ctx); ss << ctx->repr() << ")"; } break; + case UNIT_CONTEXT: { + auto ctx = static_cast(ctxh.m_ctx); + ss << ctx->repr() << ")"; + } break; case GROUPED_PKEY_CONTEXT: { auto ctx = static_cast(ctxh.m_ctx); ss << ctx->repr() << ")"; @@ -825,6 +835,15 @@ t_gnode::_register_context(const std::string& name, t_ctx_type type, std::int64_ update_context_from_state(ctx, pkeyed_table); } } break; + case UNIT_CONTEXT: { + set_ctx_state(ptr_); + t_ctxunit* ctx = static_cast(ptr_); + ctx->reset(); + + if (should_update) { + update_context_from_state(ctx, pkeyed_table); + } + } break; case GROUPED_PKEY_CONTEXT: { set_ctx_state(ptr_); auto ctx = static_cast(ptr_); @@ -861,6 +880,8 @@ t_gnode::_unregister_context(const std::string& name) { std::vector computed_column_names; switch (type) { + // No computed columns to remove + case UNIT_CONTEXT: break; case TWO_SIDED_CONTEXT: { t_ctx2* ctx = static_cast(ctxh.m_ctx); auto computed_columns = ctx->get_config().get_computed_columns(); @@ -932,6 +953,9 @@ t_gnode::notify_contexts(const t_data_table& flattened) { case ZERO_SIDED_CONTEXT: { notify_context(flattened, ctxh); } break; + case UNIT_CONTEXT: { + notify_context(flattened, ctxh); + } break; case GROUPED_PKEY_CONTEXT: { notify_context(flattened, ctxh); } break; @@ -1122,6 +1146,7 @@ t_gnode::get_pivots() const { auto pivots = ctx->get_pivots(); rval.insert(std::end(rval), std::begin(pivots), std::end(pivots)); } break; + case UNIT_CONTEXT: case ZERO_SIDED_CONTEXT: case GROUPED_PKEY_CONTEXT: { // no pivots @@ -1150,6 +1175,9 @@ t_gnode::get_trees() { auto& ctxh = kv.second; switch (ctxh.m_ctx_type) { + // `get_trees()` not implemented, as unit contexts have no + // traversal of their own. + case UNIT_CONTEXT: break; case TWO_SIDED_CONTEXT: { auto ctx = reinterpret_cast(ctxh.m_ctx); auto trees = ctx->get_trees(); @@ -1236,6 +1264,12 @@ t_gnode::get_contexts_last_updated() const { rval.push_back(kv.first); } } break; + case UNIT_CONTEXT: { + auto ctx = reinterpret_cast(ctxh.m_ctx); + if (ctx->has_deltas()) { + rval.push_back(kv.first); + } + } break; case GROUPED_PKEY_CONTEXT: { auto ctx = reinterpret_cast(ctxh.m_ctx); if (ctx->has_deltas()) { @@ -1280,6 +1314,10 @@ t_gnode::reset() { auto ctx = reinterpret_cast(ctxh.m_ctx); ctx->reset(); } break; + case UNIT_CONTEXT: { + auto ctx = reinterpret_cast(ctxh.m_ctx); + ctx->reset(); + } break; case GROUPED_PKEY_CONTEXT: { auto ctx = reinterpret_cast(ctxh.m_ctx); ctx->reset(); @@ -1360,6 +1398,11 @@ t_gnode::set_event_loop_thread_id(std::thread::id id) { } #endif +void +t_gnode::register_context(const std::string& name, std::shared_ptr ctx) { + _register_context(name, UNIT_CONTEXT, reinterpret_cast(ctx.get())); +} + void t_gnode::register_context(const std::string& name, std::shared_ptr ctx) { _register_context(name, ZERO_SIDED_CONTEXT, reinterpret_cast(ctx.get())); diff --git a/cpp/perspective/src/cpp/gnode_state.cpp b/cpp/perspective/src/cpp/gnode_state.cpp index 022433840d..d5301e87c7 100644 --- a/cpp/perspective/src/cpp/gnode_state.cpp +++ b/cpp/perspective/src/cpp/gnode_state.cpp @@ -8,9 +8,10 @@ */ #include +#include +#include #include #include -#include #include #include #include @@ -179,7 +180,7 @@ t_gstate::fill_master_table(const t_data_table* flattened) { void t_gstate::update_master_table(const t_data_table* flattened) { - if (size() == 0) { + if (num_rows() == 0) { fill_master_table(flattened); return; } @@ -373,12 +374,12 @@ t_gstate::get_table() const { void t_gstate::read_column(const std::string& colname, const std::vector& pkeys, std::vector& out_data) const { - t_index num = pkeys.size(); + t_index num_rows = pkeys.size(); std::shared_ptr col = m_table->get_const_column(colname); const t_column* col_ = col.get(); - std::vector rval(num); + std::vector rval(num_rows); - for (t_index idx = 0; idx < num; ++idx) { + for (t_index idx = 0; idx < num_rows; ++idx) { t_mapping::const_iterator iter = m_mapping.find(pkeys[idx]); if (iter != m_mapping.end()) { rval[idx].set(col_->get_scalar(iter->second)); @@ -397,13 +398,13 @@ t_gstate::read_column(const std::string& colname, const std::vector& void t_gstate::read_column(const std::string& colname, const std::vector& pkeys, std::vector& out_data, bool include_nones) const { - t_index num = pkeys.size(); + t_index num_rows = pkeys.size(); std::shared_ptr col = m_table->get_const_column(colname); const t_column* col_ = col.get(); std::vector rval; - rval.reserve(num); - for (t_index idx = 0; idx < num; ++idx) { + rval.reserve(num_rows); + for (t_index idx = 0; idx < num_rows; ++idx) { t_mapping::const_iterator iter = m_mapping.find(pkeys[idx]); if (iter != m_mapping.end()) { auto tscalar = col_->get_scalar(iter->second); @@ -416,19 +417,51 @@ t_gstate::read_column(const std::string& colname, const std::vector& } void -t_gstate::read_column(const std::string& colname, std::vector& out_data) const { +t_gstate::read_column( + const std::string& colname, + t_uindex start_idx, + t_uindex end_idx, + std::vector& out_data) const { + t_index num_rows = end_idx - start_idx; + + // Don't read invalid row indices. + if (num_rows <= 0) { + return; + } + std::shared_ptr col = m_table->get_const_column(colname); const t_column* col_ = col.get(); - t_uindex col_size = col_->size(); - std::vector rval(col_size); - for (t_index idx = 0; idx < col_size; ++idx) { - rval[idx].set(col_->get_scalar(idx)); + std::vector rval(num_rows); + + t_uindex i = 0; + for (t_uindex idx = start_idx; idx < end_idx; ++idx) { + rval[i] = col_->get_scalar(idx); + i++; } std::swap(rval, out_data); } +void +t_gstate::read_column( + const std::string& colname, + const std::vector& row_indices, + std::vector& out_data) const { + std::shared_ptr col = m_table->get_const_column(colname); + const t_column* col_ = col.get(); + + t_index num_rows = row_indices.size(); + std::vector rval(num_rows); + + t_uindex i = 0; + for (auto idx : row_indices) { + rval[i] = col_->get_scalar(idx); + i++; + } + + std::swap(rval, out_data); +} t_tscalar t_gstate::get(t_tscalar pkey, const std::string& colname) const { @@ -571,7 +604,7 @@ t_gstate::_get_pkeyed_table(const std::vector& pkeys) const { t_data_table* t_gstate::_get_pkeyed_table(const t_schema& schema, const std::vector& pkeys) const { - t_mask mask(size()); + t_mask mask(num_rows()); for (const auto& pkey : pkeys) { auto lk = lookup(pkey); @@ -698,8 +731,13 @@ t_gstate::_get_pkeyed_table(const t_schema& schema, const t_mask& mask) const { } t_uindex -t_gstate::size() const { - return m_table->size(); +t_gstate::num_rows() const { + return m_table->num_rows(); +} + +t_uindex +t_gstate::num_columns() const { + return m_table->num_columns(); } std::vector diff --git a/cpp/perspective/src/cpp/view.cpp b/cpp/perspective/src/cpp/view.cpp index 98d7c066fa..db2f6d31a8 100644 --- a/cpp/perspective/src/cpp/view.cpp +++ b/cpp/perspective/src/cpp/view.cpp @@ -88,6 +88,12 @@ View::get_view_config() const { return m_view_config; } +template <> +std::int32_t +View::sides() const { + return 0; +} + template <> std::int32_t View::sides() const { @@ -199,6 +205,24 @@ View::column_names(bool skip, std::int32_t depth) const { return names; } +template <> +std::vector> +View::column_names(bool skip, std::int32_t depth) const { + std::vector> names; + + for (t_uindex key = 0, max = m_ctx->unity_get_column_count(); key != max; ++key) { + t_tscalar name = m_ctx->get_column_name(key); + if (name.to_string() == "psp_okey") { + continue; + }; + std::vector col_path; + col_path.push_back(name); + names.push_back(col_path); + } + + return names; +} + template std::vector> View::column_paths() const { @@ -263,7 +287,7 @@ View::schema() const { template <> std::map -View::schema() const { +View::schema() const { t_schema schema = m_ctx->get_schema(); std::vector _types = schema.types(); std::vector names = schema.columns(); @@ -287,6 +311,31 @@ View::schema() const { return new_schema; } +template <> +std::map +View::schema() const { + t_schema schema = m_ctx->get_schema(); + std::vector _types = schema.types(); + std::vector names = schema.columns(); + + std::map types; + for (std::size_t i = 0, max = names.size(); i != max; ++i) { + types[names[i]] = _types[i]; + } + + std::vector> cols = column_names(false); + std::map new_schema; + + for (std::size_t i = 0, max = cols.size(); i != max; ++i) { + std::string name = cols[i].back().to_string(); + if (name == "psp_okey") { + continue; + } + new_schema[name] = dtype_to_str(types[name]); + } + + return new_schema; +} template std::map @@ -314,6 +363,12 @@ View::computed_schema() const { return new_schema; } +template <> +std::map +View::computed_schema() const { + return {}; +} + template <> std::map View::computed_schema() const { @@ -335,6 +390,17 @@ View::computed_schema() const { return new_schema; } +template <> +std::shared_ptr> +View::get_data( + t_uindex start_row, t_uindex end_row, t_uindex start_col, t_uindex end_col) const { + std::vector slice = m_ctx->get_data(start_row, end_row, start_col, end_col); + auto col_names = column_names(); + auto data_slice_ptr = std::make_shared>(m_ctx, start_row, end_row, + start_col, end_col, m_row_offset, m_col_offset, slice, col_names); + return data_slice_ptr; +} + template <> std::shared_ptr> View::get_data( @@ -626,6 +692,12 @@ View::get_row_expanded(std::int32_t ridx) const { return m_ctx->unity_get_row_expanded(ridx); } +template <> +t_index +View::expand(std::int32_t ridx, std::int32_t row_pivot_length) { + return ridx; +} + template <> t_index View::expand(std::int32_t ridx, std::int32_t row_pivot_length) { @@ -666,6 +738,10 @@ View::collapse(std::int32_t ridx) { return m_ctx->close(t_header::HEADER_ROW, ridx); } +template <> +void +View::set_depth(std::int32_t depth, std::int32_t row_pivot_length) {} + template <> void View::set_depth(std::int32_t depth, std::int32_t row_pivot_length) {} @@ -739,6 +815,12 @@ View::get_row_path(t_uindex idx) const { return std::vector(); } +template <> +std::vector +View::get_row_path(t_uindex idx) const { + return std::vector(); +} + template std::vector View::get_row_path(t_uindex idx) const { @@ -751,6 +833,12 @@ View::get_step_delta(t_index bidx, t_index eidx) const { return m_ctx->get_step_delta(bidx, eidx); } +template <> +t_stepdelta +View::get_step_delta(t_index bidx, t_index eidx) const { + return t_stepdelta(); +} + template std::shared_ptr> View::get_row_delta() const { @@ -840,6 +928,7 @@ View::_find_hidden_sort(const std::vector& sort) { } // Explicitly instantiate View for each context +template class View; template class View; template class View; template class View; diff --git a/cpp/perspective/src/cpp/view_config.cpp b/cpp/perspective/src/cpp/view_config.cpp index f9750146f1..d17290f7bb 100644 --- a/cpp/perspective/src/cpp/view_config.cpp +++ b/cpp/perspective/src/cpp/view_config.cpp @@ -281,4 +281,19 @@ t_view_config::get_aggregate_index(const std::string& column) const { return t_index(); } +bool +t_view_config::is_unit_config( + const std::vector& table_columns) const { + if (m_row_pivots.size() == 0 && + m_column_pivots.size() == 0 && + m_filter.size() == 0 && + m_sort.size() == 0 && + m_computed_columns.size() == 0 && + m_columns == table_columns) { + return true; + } else { + return false; + } +} + } // end namespace perspective \ No newline at end of file diff --git a/cpp/perspective/src/include/perspective/base.h b/cpp/perspective/src/include/perspective/base.h index dc050b690c..c6088f2016 100644 --- a/cpp/perspective/src/include/perspective/base.h +++ b/cpp/perspective/src/include/perspective/base.h @@ -325,6 +325,7 @@ PERSPECTIVE_EXPORT std::string _get_default_aggregate_string(t_dtype dtype); enum t_totals { TOTALS_BEFORE, TOTALS_HIDDEN, TOTALS_AFTER }; enum t_ctx_type { + UNIT_CONTEXT, ZERO_SIDED_CONTEXT, ONE_SIDED_CONTEXT, TWO_SIDED_CONTEXT, diff --git a/cpp/perspective/src/include/perspective/binding.h b/cpp/perspective/src/include/perspective/binding.h index b9a1bc54b5..2053f1542f 100644 --- a/cpp/perspective/src/include/perspective/binding.h +++ b/cpp/perspective/src/include/perspective/binding.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/cpp/perspective/src/include/perspective/config.h b/cpp/perspective/src/include/perspective/config.h index 33911661a3..f0a438494e 100644 --- a/cpp/perspective/src/include/perspective/config.h +++ b/cpp/perspective/src/include/perspective/config.h @@ -97,6 +97,9 @@ class PERSPECTIVE_EXPORT t_config { const std::vector& computed_columns, bool column_only); + // An empty config, used for the unit context. + t_config(); + // Constructors used for C++ tests, not exposed to other parts of the engine t_config(const std::vector& row_pivots, const std::vector& col_pivots, const std::vector& aggregates); @@ -113,8 +116,6 @@ class PERSPECTIVE_EXPORT t_config { t_config(const std::vector& row_pivots, const t_aggspec& agg); t_config(const std::vector& detail_columns); - - t_config(); /** * @brief For each column in the config's `detail_columns` (i.e. visible diff --git a/cpp/perspective/src/include/perspective/context_unit.h b/cpp/perspective/src/include/perspective/context_unit.h new file mode 100644 index 0000000000..6d338c6f91 --- /dev/null +++ b/cpp/perspective/src/include/perspective/context_unit.h @@ -0,0 +1,148 @@ +/****************************************************************************** + * + * Copyright (c) 2017, the Perspective Authors. + * + * This file is part of the Perspective library, distributed under the terms of + * the Apache License 2.0. The full license can be found in the LICENSE file. + * + */ + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace perspective { + +class PERSPECTIVE_EXPORT t_ctxunit : public t_ctxbase { +public: + t_ctxunit(); + + t_ctxunit(const t_schema& schema, const t_config& config); + + ~t_ctxunit(); + + perspective::t_index get_row_count() const; + + perspective::t_index get_column_count() const; + + using t_ctxbase::get_data; + + std::vector get_data( + t_index start_row, t_index end_row, t_index start_col, t_index end_col) const; + + std::vector get_data(const std::vector& rows) const; + + std::vector get_data(const std::vector& pkeys) const; + + void sort_by(const std::vector& sortby); + + void reset_sortby(); + + // will only work on empty contexts + void notify(const t_data_table& flattened); + + void notify(const t_data_table& flattened, const t_data_table& delta, const t_data_table& prev, + const t_data_table& current, const t_data_table& transitions, const t_data_table& existed); + + void step_begin(); + + void step_end(); + + std::string repr() const; + + void init(); + + void reset(); + + t_index sidedness() const; + + bool get_deltas_enabled() const; + void set_deltas_enabled(bool enabled_state); + + std::vector get_pkeys(const std::vector>& cells) const; + + std::vector get_cell_data( + const std::vector>& cells) const; + + t_stepdelta get_step_delta(t_index bidx, t_index eidx); + + t_rowdelta get_row_delta(); + + std::vector get_rows_changed(); + + std::vector get_cell_delta(t_index bidx, t_index eidx) const; + + void clear_deltas(); + + void reset_step_state(); + + void disable(); + + void enable(); + + std::vector get_trees(); + + bool has_deltas() const; + + void pprint() const; + + t_dtype get_column_dtype(t_uindex idx) const; + + std::shared_ptr get_table() const; + + t_tscalar get_column_name(t_index idx); + + std::vector get_column_names() const; + + const tsl::hopscotch_set& get_delta_pkeys() const; + + // Unity api + std::vector unity_get_row_path(t_uindex idx) const; + std::vector unity_get_column_path(t_uindex idx) const; + t_uindex unity_get_row_depth(t_uindex ridx) const; + t_uindex unity_get_column_depth(t_uindex cidx) const; + std::vector unity_get_column_names() const; + std::vector unity_get_column_display_names() const; + std::string unity_get_column_name(t_uindex idx) const; + std::string unity_get_column_display_name(t_uindex idx) const; + t_uindex unity_get_column_count() const; + t_uindex unity_get_row_count() const; + t_data_table unity_get_table() const; + bool unity_get_row_expanded(t_uindex idx) const; + bool unity_get_column_expanded(t_uindex idx) const; + void unity_init_load_step_end(); + +protected: + void add_delta_pkey(t_tscalar pkey); + + void add_row(t_tscalar pkey, t_uindex idx); + + void update_row(t_tscalar pkey); + + void delete_row(t_tscalar pkey); + +private: + /** + * The unit context does not contain `m_traversal`, as it does not need to + * keep track of a subset of the `gnode_state`'s master table or perform + * translations between row/column indices and primary keys. Instead, data + * is read from the master table using row/column indices that map directly + * to the master table's data. + */ + tsl::hopscotch_set m_delta_pkeys; + + // A mapping of integer row indices to scalar primary keys. + tsl::hopscotch_map m_pkey_index; + + t_symtable m_symtable; + bool m_has_delta; +}; + +} // end namespace perspective diff --git a/cpp/perspective/src/include/perspective/data_slice.h b/cpp/perspective/src/include/perspective/data_slice.h index 07fada8878..2a86f0ba0f 100644 --- a/cpp/perspective/src/include/perspective/data_slice.h +++ b/cpp/perspective/src/include/perspective/data_slice.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/cpp/perspective/src/include/perspective/gnode.h b/cpp/perspective/src/include/perspective/gnode.h index 3ffe6a55d2..1b8591231f 100644 --- a/cpp/perspective/src/include/perspective/gnode.h +++ b/cpp/perspective/src/include/perspective/gnode.h @@ -44,6 +44,7 @@ PERSPECTIVE_EXPORT t_tscalar calc_newer( PERSPECTIVE_EXPORT t_tscalar calc_negate(t_tscalar val); +class t_ctxunit; class t_ctx0; class t_ctx1; class t_ctx2; @@ -185,6 +186,7 @@ class PERSPECTIVE_EXPORT t_gnode { void promote_column(const std::string& name, t_dtype new_type); // Gnode will steal a reference to the context + void register_context(const std::string& name, std::shared_ptr ctx); void register_context(const std::string& name, std::shared_ptr ctx); void register_context(const std::string& name, std::shared_ptr ctx); void register_context(const std::string& name, std::shared_ptr ctx); diff --git a/cpp/perspective/src/include/perspective/gnode_state.h b/cpp/perspective/src/include/perspective/gnode_state.h index 74c18b4074..ca767dc064 100644 --- a/cpp/perspective/src/include/perspective/gnode_state.h +++ b/cpp/perspective/src/include/perspective/gnode_state.h @@ -100,22 +100,48 @@ class PERSPECTIVE_EXPORT t_gstate { * @param pkeys * @param out_data */ - void read_column(const std::string& colname, const std::vector& pkeys, + void read_column( + const std::string& colname, + const std::vector& pkeys, std::vector& out_data) const; - void read_column(const std::string& colname, const std::vector& pkeys, + void read_column( + const std::string& colname, + const std::vector& pkeys, std::vector& out_data) const; - void read_column(const std::string& colname, const std::vector& pkeys, - std::vector& out_data, bool include_nones) const; + void read_column( + const std::string& colname, + const std::vector& pkeys, + std::vector& out_data, + bool include_nones) const; /** - * @brief Read the entirety of a column into `out_data`. + * @brief Read a column from `start_idx` to `end_idx` into `out_data`. * * @param colname + * @param start_idx + * @param end_idx * @param out_data */ - void read_column(const std::string& colname, std::vector& out_data) const; + void read_column( + const std::string& colname, + t_uindex start_idx, + t_uindex end_idx, + std::vector& out_data) const; + + /** + * @brief Read a column using `row_indices` (not necessarily contiguous) + * into `out_data`. + * + * @param colname + * @param row_indices + * @param out_data + */ + void read_column( + const std::string& colname, + const std::vector& row_indices, + std::vector& out_data) const; /** * @brief Apply the lambda `fn` to each primary-keyed value in the column, @@ -177,11 +203,18 @@ class PERSPECTIVE_EXPORT t_gstate { t_tscalar get_value(const t_tscalar& pkey, const std::string& colname) const; /** - * @brief Return the size of the underlying `t_data_table`. + * @brief Return the number of rows on the master `t_data_table`. + * + * @return t_uindex + */ + t_uindex num_rows() const; + + /** + * @brief Return the number of columns on the master `t_data_table`. * * @return t_uindex */ - t_uindex size() const; + t_uindex num_columns() const; /** * @brief Returns the size of the underlying primary key map. @@ -191,7 +224,7 @@ class PERSPECTIVE_EXPORT t_gstate { t_uindex mapping_size() const; /** - * @brief Resets the gnode state and its underlying `t_data_table` and + * @brief Resets the gnode state and its master `t_data_table` and * mapping. * */ diff --git a/cpp/perspective/src/include/perspective/view.h b/cpp/perspective/src/include/perspective/view.h index 25d9410573..4b2abf3923 100644 --- a/cpp/perspective/src/include/perspective/view.h +++ b/cpp/perspective/src/include/perspective/view.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/cpp/perspective/src/include/perspective/view_config.h b/cpp/perspective/src/include/perspective/view_config.h index d866a0d548..8bc4f76419 100644 --- a/cpp/perspective/src/include/perspective/view_config.h +++ b/cpp/perspective/src/include/perspective/view_config.h @@ -92,6 +92,19 @@ class PERSPECTIVE_EXPORT t_view_config { bool is_column_only() const; + /** + * @brief Whether this config can be used for a `t_ctxunit` - the config + * must have no pivots, sorts, filters, or computed columns, and the + * ordering of columns must be equal to `table_columns`. + * + * @param table_columns a vector of column names from the `Table` for + * comparison. + * + * @return true + * @return false + */ + bool is_unit_config(const std::vector& table_columns) const; + std::int32_t get_row_pivot_depth() const; std::int32_t get_column_pivot_depth() const; diff --git a/packages/perspective-bench/bench/perspective.benchmark.js b/packages/perspective-bench/bench/perspective.benchmark.js index bf0eb44945..d9b4d1f6b1 100644 --- a/packages/perspective-bench/bench/perspective.benchmark.js +++ b/packages/perspective-bench/bench/perspective.benchmark.js @@ -322,6 +322,42 @@ describe("View", async () => { await table.delete(); }); + describe("mixed", async () => { + describe("ctx_unit", async () => { + let view; + + afterEach(async () => { + await view.delete(); + }); + + benchmark(`view`, async () => { + view = table.view(); + await view.schema(); + }); + }); + }); + + describe("mixed", async () => { + describe("ctx_unit", async () => { + let view; + + beforeAll(async () => { + view = table.view(); + await view.schema(); + }); + + afterAll(async () => { + await view.delete(); + }); + + for (const format of ["json", "columns", "arrow"]) { + benchmark(format, async () => { + await view[`to_${format}`](); + }); + } + }); + }); + for (const aggregate of AGG_OPTIONS) { for (const row_pivot of ROW_PIVOT_OPTIONS) { for (const column_pivot of COLUMN_PIVOT_OPTIONS) { diff --git a/packages/perspective/src/js/perspective.js b/packages/perspective/src/js/perspective.js index c76fb1c7f4..23be2c2b0b 100644 --- a/packages/perspective/src/js/perspective.js +++ b/packages/perspective/src/js/perspective.js @@ -7,6 +7,9 @@ * */ +// TODO remove this when is_unit_config is fully in c++ +import {isEqual} from "underscore"; + import * as defaults from "./config/constants.js"; import {get_type_config} from "./config/index.js"; import {DataAccessor} from "./data_accessor"; @@ -143,10 +146,25 @@ export default function(Module) { function view(table, sides, config, view_config, name) { this.name = name; this._View = undefined; + this.table = table; + this.config = config || {}; this.view_config = view_config || new view_config(); - if (sides === 0) { + const num_columns = this.table.columns().length; + + this.is_unit_config = + this.table.index === "" && + sides === 0 && + isEqual(this.view_config.columns.length, num_columns) && + this.view_config.row_pivots.length === 0 && + this.view_config.column_pivots.length === 0 && + this.view_config.filter.length === 0 && + this.view_config.sort.length === 0; + + if (this.is_unit_config) { + this._View = __MODULE__.make_view_unit(table._Table, name, defaults.COLUMN_SEPARATOR_STRING, this.view_config, null); + } else if (sides === 0) { this._View = __MODULE__.make_view_zero(table._Table, name, defaults.COLUMN_SEPARATOR_STRING, this.view_config, null); } else if (sides === 1) { this._View = __MODULE__.make_view_one(table._Table, name, defaults.COLUMN_SEPARATOR_STRING, this.view_config, null); @@ -154,7 +172,6 @@ export default function(Module) { this._View = __MODULE__.make_view_two(table._Table, name, defaults.COLUMN_SEPARATOR_STRING, this.view_config, null); } - this.table = table; this.ctx = this._View.get_context(); this.column_only = this._View.is_column_only(); this.update_callbacks = this.table.update_callbacks; @@ -346,9 +363,13 @@ export default function(Module) { }; view.prototype.get_data_slice = function(start_row, end_row, start_col, end_col) { - const num_sides = this.sides(); - const nidx = SIDES[num_sides]; - return __MODULE__[`get_data_slice_${nidx}`](this._View, start_row, end_row, start_col, end_col); + if (this.is_unit_config) { + return __MODULE__.get_data_slice_unit(this._View, start_row, end_row, start_col, end_col); + } else { + const num_sides = this.sides(); + const nidx = SIDES[num_sides]; + return __MODULE__[`get_data_slice_${nidx}`](this._View, start_row, end_row, start_col, end_col); + } }; /** @@ -407,6 +428,14 @@ export default function(Module) { const has_row_path = num_sides !== 0 && !this.column_only; const nidx = SIDES[num_sides]; + let get_from_data_slice; + + if (this.is_unit_config) { + get_from_data_slice = __MODULE__.get_from_data_slice_unit; + } else { + get_from_data_slice = __MODULE__[`get_from_data_slice_${nidx}`]; + } + const slice = this.get_data_slice(start_row, end_row, start_col, end_col); const ns = slice.get_column_names(); const col_names = extract_vector_scalar(ns).map(x => x.join(defaults.COLUMN_SEPARATOR_STRING)); @@ -445,7 +474,7 @@ export default function(Module) { // these. continue; } else { - let value = __MODULE__[`get_from_data_slice_${nidx}`](slice, ridx, cidx); + let value = get_from_data_slice(slice, ridx, cidx); if ((col_type === "datetime" || col_type === "date") && value !== undefined) { if (date_format) { value = new Date(value); @@ -678,7 +707,9 @@ export default function(Module) { const end_col = options.end_col; const sides = this.sides(); - if (sides === 0) { + if (this.is_unit_config) { + return __MODULE__.to_arrow_unit(this._View, start_row, end_row, start_col, end_col); + } else if (sides === 0) { return __MODULE__.to_arrow_zero(this._View, start_row, end_row, start_col, end_col); } else if (sides === 1) { return __MODULE__.to_arrow_one(this._View, start_row, end_row, start_col, end_col); @@ -794,9 +825,13 @@ export default function(Module) { * @private */ view.prototype._get_row_delta = async function() { - const sides = this.sides(); - const nidx = SIDES[sides]; - return __MODULE__[`get_row_delta_${nidx}`](this._View); + if (this.is_unit_config) { + return __MODULE__.get_row_delta_unit(this._View); + } else { + const sides = this.sides(); + const nidx = SIDES[sides]; + return __MODULE__[`get_row_delta_${nidx}`](this._View); + } }; /** diff --git a/packages/perspective/test/js/to_format.js b/packages/perspective/test/js/to_format.js index cc218dbbc8..81361488cb 100644 --- a/packages/perspective/test/js/to_format.js +++ b/packages/perspective/test/js/to_format.js @@ -46,6 +46,65 @@ module.exports = perspective => { table.delete(); }); + it("should filter out invalid start rows & columns", async function() { + let table = perspective.table(int_float_string_data); + let view = table.view(); + let json = await view.to_json({ + start_row: 5, + start_col: 5 + }); + expect(json).toEqual([]); + view.delete(); + table.delete(); + }); + + it("should filter out invalid start rows based on view", async function() { + let table = perspective.table(int_float_string_data); + let view = table.view({ + filter: [["float", ">", 3.5]] + }); + + // valid on view() but not this filtered view + let json = await view.to_json({ + start_row: 3 + }); + + expect(json).toEqual([]); + + view.delete(); + table.delete(); + }); + + it("should filter out invalid start columns based on view", async function() { + let table = perspective.table(int_float_string_data); + let view = table.view({ + columns: ["float", "int"] + }); + + let json = await view.to_json({ + start_col: 2 + }); + + expect(json).toEqual([{}, {}, {}, {}]); + view.delete(); + table.delete(); + }); + + it("should filter out invalid start rows & columns based on view", async function() { + let table = perspective.table(int_float_string_data); + let view = table.view({ + columns: ["float", "int"], + filter: [["float", ">", 3.5]] + }); + let json = await view.to_json({ + start_row: 5, + start_col: 5 + }); + expect(json).toEqual([]); + view.delete(); + table.delete(); + }); + it("should respect start/end rows", async function() { let table = perspective.table(int_float_string_data); let view = table.view(); @@ -763,6 +822,115 @@ module.exports = perspective => { }); }); + describe("0-sided sorted", function() { + it("should return correct pkey for unindexed table", async function() { + let table = perspective.table(int_float_string_data); + let view = table.view({ + sort: [["float", "desc"]] + }); + let json = await view.to_json({ + start_row: 0, + end_row: 1, + start_col: 1, + end_col: 2, + index: true + }); + expect(json).toEqual([{float: 5.25, __INDEX__: [3]}]); + view.delete(); + table.delete(); + }); + + it("should return correct pkey for float indexed table", async function() { + let table = perspective.table(int_float_string_data, {index: "float"}); + let view = table.view({ + sort: [["float", "desc"]] + }); + let json = await view.to_json({ + start_row: 0, + end_row: 1, + start_col: 1, + end_col: 2, + index: true + }); + expect(json).toEqual([{float: 5.25, __INDEX__: [5.25]}]); + view.delete(); + table.delete(); + }); + + it("should return correct pkey for string indexed table", async function() { + let table = perspective.table(int_float_string_data, {index: "string"}); + let view = table.view({ + sort: [["float", "desc"]] + }); + let json = await view.to_json({ + start_row: 0, + end_row: 1, + start_col: 1, + end_col: 2, + index: true + }); + expect(json).toEqual([{float: 5.25, __INDEX__: ["d"]}]); + view.delete(); + table.delete(); + }); + + it("should return correct pkey for date indexed table", async function() { + // default data generates the same datetime for each row, + // thus pkeys get collapsed + const data = [ + {int: 200, datetime: new Date()}, + {int: 100, datetime: new Date()} + ]; + data[1].datetime.setDate(data[1].datetime.getDate() + 1); + let table = perspective.table(data, {index: "datetime"}); + let view = table.view({ + sort: [["int", "desc"]] + }); + let json = await view.to_json({ + index: true + }); + expect(json).toEqual([ + {int: 200, datetime: data[0].datetime.getTime(), __INDEX__: [data[0].datetime.getTime()]}, + {int: 100, datetime: data[1].datetime.getTime(), __INDEX__: [data[1].datetime.getTime()]} + ]); + view.delete(); + table.delete(); + }); + + it("should return correct pkey for all rows + columns on an unindexed table", async function() { + let table = perspective.table(int_float_string_data); + let view = table.view({ + sort: [["float", "asc"]] + }); + let json = await view.to_json({ + index: true + }); + + for (let i = 0; i < json.length; i++) { + expect(json[i].__INDEX__).toEqual([i]); + } + view.delete(); + table.delete(); + }); + + it("should return correct pkey for all rows + columns on an indexed table", async function() { + let table = perspective.table(int_float_string_data, {index: "string"}); + let view = table.view({ + sort: [["float", "desc"]] + }); + let json = await view.to_json({ + index: true + }); + + let pkeys = ["d", "c", "b", "a"]; + for (let i = 0; i < json.length; i++) { + expect(json[i].__INDEX__).toEqual([pkeys[i]]); + } + view.delete(); + table.delete(); + }); + }); + describe("1-sided", function() { it("should generate pkeys of aggregated rows for 1-sided", async function() { let table = perspective.table(int_float_string_data); diff --git a/scripts/build_js.js b/scripts/build_js.js index 1e274c6a31..a1e2f1f8cf 100644 --- a/scripts/build_js.js +++ b/scripts/build_js.js @@ -97,7 +97,7 @@ function compileCPP(packageName) { const base_dir = path.join(__dirname, "..", "packages", packageName, "build", dir_name); mkdirp.sync(base_dir); const cmd = bash` - emcmake cmake ../../../../cpp/perspective ${process.env.PSP_DEBUG}-DCMAKE_BUILD_TYPE + emcmake cmake ../../../../cpp/perspective -DCMAKE_BUILD_TYPE=${dir_name} && emmake make -j${process.env.PSP_CPU_COUNT || os.cpus().length} `; if (process.env.PSP_DOCKER) { From 957a970b31a7ca5ee4855970882f1db7f14ea910 Mon Sep 17 00:00:00 2001 From: Jun Tan Date: Tue, 13 Oct 2020 19:46:04 -0400 Subject: [PATCH 2/5] Create unit context regardless of column order column order no longer matters for unit context as long as num_columns == table.num_columns more tests, print inside traversal::step_end --- cpp/perspective/src/cpp/flat_traversal.cpp | 46 +++++- .../src/include/perspective/flat_traversal.h | 11 +- packages/perspective/src/js/perspective.js | 11 +- packages/perspective/test/js/constructors.js | 114 +++++++++++--- packages/perspective/test/js/delta.js | 149 ++++++++++++++++++ packages/perspective/test/js/multiple.js | 25 +++ packages/perspective/test/js/updates.js | 144 ++++++++++++++++- 7 files changed, 466 insertions(+), 34 deletions(-) diff --git a/cpp/perspective/src/cpp/flat_traversal.cpp b/cpp/perspective/src/cpp/flat_traversal.cpp index 4f17bc97a1..9525359f29 100644 --- a/cpp/perspective/src/cpp/flat_traversal.cpp +++ b/cpp/perspective/src/cpp/flat_traversal.cpp @@ -250,46 +250,82 @@ t_ftrav::step_end() { t_uindex i = 0; t_multisorter sorter(get_sort_orders(m_sortby)); + std::vector new_rows; new_rows.reserve(m_new_elems.size()); - for (t_pkmselem_map::const_iterator pkelem_iter = m_new_elems.begin(); - pkelem_iter != m_new_elems.end(); ++pkelem_iter) { + std::cout << "pkeys in new_rows" << std::endl; + + for ( + tsl::hopscotch_map::const_iterator pkelem_iter = m_new_elems.begin(); + pkelem_iter != m_new_elems.end(); + ++pkelem_iter) { + std::cout << pkelem_iter->first << ", "; new_rows.push_back(pkelem_iter->second); } + + std::cout << std::endl; + + for (const auto& sort : m_sortby) { + std::cout << "sort: " << sort.m_colname << std::endl; + } + std::sort(new_rows.begin(), new_rows.end(), sorter); + std::cout << "pkeys in new_rows (sorted)" << std::endl; + for (const auto& r : new_rows) { + std::cout << r.m_pkey << ", "; + } + + std::cout << std::endl << "new_index" << std::endl; + for (auto it = new_rows.begin(); it != new_rows.end(); ++it) { const t_mselem& new_elem = *it; while (i < m_index->size()) { const t_mselem& old_elem = (*m_index)[i]; + std::cout << old_elem.m_pkey; if (old_elem.m_deleted) { + std::cout << " deleted, "; i++; m_pkeyidx.erase(old_elem.m_pkey); } else if (old_elem.m_updated) { + std::cout << " updated, "; i++; } else if (sorter(old_elem, new_elem)) { + std::cout << " sorted, "; m_pkeyidx[old_elem.m_pkey] = new_index->size(); new_index->push_back(old_elem); i++; } else { + std::cout << " noop, "; break; } } m_pkeyidx[new_elem.m_pkey] = new_index->size(); + std::cout << new_elem.m_pkey << ": " << new_index->size() << ", "; new_index->push_back(new_elem); } + std::cout << std::endl; + std::cout << "i: " << i << std::endl; + std::cout << "reconciling new_index" << std::endl; + + // reconcile old rows that are marked as removed or updated. while (i < m_index->size()) { const t_mselem& old_elem = (*m_index)[i++]; + std::cout << old_elem.m_pkey; if (old_elem.m_deleted) { + std::cout << " deleted, "; m_pkeyidx.erase(old_elem.m_pkey); } else if (!old_elem.m_updated) { m_pkeyidx[old_elem.m_pkey] = new_index->size(); + std::cout << " not updated - new size: " << new_index->size() << ", "; new_index->push_back(old_elem); + } else { + std::cout << ", "; } } - + std::cout << std::endl; std::swap(new_index, m_index); m_new_elems.clear(); } @@ -308,7 +344,7 @@ t_ftrav::update_row( std::shared_ptr gstate, const t_config& config, t_tscalar pkey) { if (m_sortby.empty()) return; - t_pkeyidx_map::iterator pkiter = m_pkeyidx.find(pkey); + auto pkiter = m_pkeyidx.find(pkey); if (pkiter == m_pkeyidx.end()) { add_row(gstate, config, pkey); return; @@ -321,7 +357,7 @@ t_ftrav::update_row( void t_ftrav::delete_row(t_tscalar pkey) { - t_pkeyidx_map::iterator pkiter = m_pkeyidx.find(pkey); + auto pkiter = m_pkeyidx.find(pkey); if (pkiter == m_pkeyidx.end()) return; (*m_index)[pkiter->second].m_deleted = true; diff --git a/cpp/perspective/src/include/perspective/flat_traversal.h b/cpp/perspective/src/include/perspective/flat_traversal.h index d1172efb07..397a477336 100644 --- a/cpp/perspective/src/include/perspective/flat_traversal.h +++ b/cpp/perspective/src/include/perspective/flat_traversal.h @@ -27,8 +27,6 @@ namespace perspective { class PERSPECTIVE_EXPORT t_ftrav { - typedef tsl::hopscotch_map t_pkeyidx_map; - typedef tsl::hopscotch_map t_pkmselem_map; public: t_ftrav(); @@ -96,8 +94,13 @@ class PERSPECTIVE_EXPORT t_ftrav { private: t_index m_step_deletes; t_index m_step_inserts; - t_pkeyidx_map m_pkeyidx; - t_pkmselem_map m_new_elems; + + // map primary keys to row indices + tsl::hopscotch_map m_pkeyidx; + + // map primary keys to sort items + tsl::hopscotch_map m_new_elems; + std::vector m_sortby; std::shared_ptr> m_index; t_symtable m_symtable; diff --git a/packages/perspective/src/js/perspective.js b/packages/perspective/src/js/perspective.js index 23be2c2b0b..2de2d92f07 100644 --- a/packages/perspective/src/js/perspective.js +++ b/packages/perspective/src/js/perspective.js @@ -6,10 +6,6 @@ * the Apache License 2.0. The full license can be found in the LICENSE file. * */ - -// TODO remove this when is_unit_config is fully in c++ -import {isEqual} from "underscore"; - import * as defaults from "./config/constants.js"; import {get_type_config} from "./config/index.js"; import {DataAccessor} from "./data_accessor"; @@ -153,14 +149,17 @@ export default function(Module) { const num_columns = this.table.columns().length; + // TODO: verify that a different order of the same num_cols as the + // table is valid as a unit context. this.is_unit_config = this.table.index === "" && sides === 0 && - isEqual(this.view_config.columns.length, num_columns) && + this.view_config.columns.length === num_columns && this.view_config.row_pivots.length === 0 && this.view_config.column_pivots.length === 0 && this.view_config.filter.length === 0 && - this.view_config.sort.length === 0; + this.view_config.sort.length === 0 && + this.view_config.computed_columns.length === 0; if (this.is_unit_config) { this._View = __MODULE__.make_view_unit(table._Table, name, defaults.COLUMN_SEPARATOR_STRING, this.view_config, null); diff --git a/packages/perspective/test/js/constructors.js b/packages/perspective/test/js/constructors.js index 14a937ff2a..b084faed9b 100644 --- a/packages/perspective/test/js/constructors.js +++ b/packages/perspective/test/js/constructors.js @@ -10,51 +10,51 @@ const papaparse = require("papaparse"); const moment = require("moment"); const arrows = require("./test_arrows.js"); -var data = [ +const data = [ {x: 1, y: "a", z: true}, {x: 2, y: "b", z: false}, {x: 3, y: "c", z: true}, {x: 4, y: "d", z: false} ]; -var col_data = { +const col_data = { x: [1, 2, 3, 4], y: ["a", "b", "c", "d"], z: [true, false, true, false] }; -var meta = { +const meta = { x: "integer", y: "string", z: "boolean" }; -var data_3 = [ +const data_3 = [ {w: 1.5, x: 1, y: "a", z: true}, {w: 2.5, x: 2, y: "b", z: false}, {w: 3.5, x: 3, y: "c", z: true}, {w: 4.5, x: 4, y: "d", z: false} ]; -var data_7 = { +const data_7 = { w: [1.5, 2.5, 3.5, 4.5], x: [1, 2, 3, 4], y: ["a", "b", "c", "d"], z: [true, false, true, false] }; -var int_in_string = [{a: "1"}, {a: "2"}, {a: "12345"}]; +const int_in_string = [{a: "1"}, {a: "2"}, {a: "12345"}]; -var float_in_string = [{a: "1.5"}, {a: "2.5"}, {a: "12345.56789"}]; +const float_in_string = [{a: "1.5"}, {a: "2.5"}, {a: "12345.56789"}]; -var meta_3 = { +const meta_3 = { w: "float", x: "integer", y: "string", z: "boolean" }; -var arrow_result = [ +const arrow_result = [ { f32: 1.5, f64: 1.5, @@ -267,36 +267,52 @@ for (const k in arrow_date_data) { arrow_date_data[k] = arrow_date_data[k].map(d => (d ? new Date(d).getTime() : null)); } -var dt = () => { +const dt = () => { let dt = new Date(); dt.setHours(4); dt.setMinutes(12); return dt; }; -var data_4 = [{v: dt()}]; +const data_4 = [{v: dt()}]; -var data_5 = [{v: "11-09-2017"}]; +const data_5 = [{v: "11-09-2017"}]; -var meta_4 = {v: "datetime"}; +const meta_4 = {v: "datetime"}; -var csv = "x,y,z\n1,a,true\n2,b,false\n3,c,true\n4,d,false"; +const csv = "x,y,z\n1,a,true\n2,b,false\n3,c,true\n4,d,false"; -var data_6 = [{x: "Å¡"}]; +const data_6 = [{x: "Å¡"}]; -var int_float_data = [ +const int_float_data = [ {int: 1, float: 2.25}, {int: 2, float: 3.5}, {int: 3, float: 4.75}, {int: 4, float: 5.25} ]; -var int_float_string_data = [ +const int_float_string_data = [ {int: 1, float: 2.25, string: "a"}, {int: 2, float: 3.5, string: "b"}, {int: 3, float: 4.75, string: "c"}, {int: 4, float: 5.25, string: "d"} ]; -var datetime_data = [ + +// out of order to make sure we can read out of perspective in insertion +// order, not pkey order. +const all_types_data = [ + {int: 4, float: 5.25, string: "d", date: new Date(2020, 3, 15), datetime: new Date(2020, 0, 15, 23, 30), boolean: true}, + {int: 3, float: 4.75, string: "c", date: new Date(2020, 2, 15), datetime: new Date(2020, 0, 15, 18, 30), boolean: false}, + {int: 2, float: 3.5, string: "b", date: new Date(2020, 1, 15), datetime: new Date(2020, 0, 15, 12, 30), boolean: true}, + {int: 1, float: 2.25, string: "a", date: new Date(2020, 0, 15), datetime: new Date(2020, 0, 15, 6, 30), boolean: false}, + // values above should be replaced with these values below, due to + // indexing + {int: 4, float: 5.25, string: "d", date: new Date(2020, 3, 15), datetime: new Date(2020, 0, 15, 23, 30), boolean: true}, + {int: 3, float: 4.75, string: "c", date: new Date(2020, 2, 15), datetime: new Date(2020, 0, 15, 18, 30), boolean: false}, + {int: 2, float: 3.5, string: "b", date: new Date(2020, 1, 15), datetime: new Date(2020, 0, 15, 12, 30), boolean: true}, + {int: 1, float: 2.25, string: "a", date: new Date(2020, 0, 15), datetime: new Date(2020, 0, 15, 6, 30), boolean: false} +]; + +const datetime_data = [ {datetime: new Date(), int: 1}, {datetime: new Date(), int: 1}, {datetime: new Date(), int: 2}, @@ -1126,5 +1142,67 @@ module.exports = perspective => { expect(await table.get_limit()).toEqual(2); }); }); + + describe("Indexed table constructors", function() { + it("Should index on an integer column", async function() { + const table = perspective.table(all_types_data, {index: "int"}); + const view = table.view(); + expect(await table.size()).toEqual(4); + expect(await view.to_json()).toEqual([ + {int: 1, float: 2.25, string: "a", date: 1579046400000, datetime: 1579069800000, boolean: false}, + {int: 2, float: 3.5, string: "b", date: 1581724800000, datetime: 1579091400000, boolean: true}, + {int: 3, float: 4.75, string: "c", date: 1584230400000, datetime: 1579113000000, boolean: false}, + {int: 4, float: 5.25, string: "d", date: 1586908800000, datetime: 1579131000000, boolean: true} + ]); + }); + + it("Should index on a float column", async function() { + const table = perspective.table(all_types_data, {index: "float"}); + const view = table.view(); + expect(await table.size()).toEqual(4); + expect(await view.to_json()).toEqual([ + {int: 1, float: 2.25, string: "a", date: 1579046400000, datetime: 1579069800000, boolean: false}, + {int: 2, float: 3.5, string: "b", date: 1581724800000, datetime: 1579091400000, boolean: true}, + {int: 3, float: 4.75, string: "c", date: 1584230400000, datetime: 1579113000000, boolean: false}, + {int: 4, float: 5.25, string: "d", date: 1586908800000, datetime: 1579131000000, boolean: true} + ]); + }); + + it("Should index on a string column", async function() { + const table = perspective.table(all_types_data, {index: "string"}); + const view = table.view(); + expect(await table.size()).toEqual(4); + expect(await view.to_json()).toEqual([ + {int: 1, float: 2.25, string: "a", date: 1579046400000, datetime: 1579069800000, boolean: false}, + {int: 2, float: 3.5, string: "b", date: 1581724800000, datetime: 1579091400000, boolean: true}, + {int: 3, float: 4.75, string: "c", date: 1584230400000, datetime: 1579113000000, boolean: false}, + {int: 4, float: 5.25, string: "d", date: 1586908800000, datetime: 1579131000000, boolean: true} + ]); + }); + + it("Should index on a date column", async function() { + const table = perspective.table(all_types_data, {index: "date"}); + const view = table.view(); + expect(await table.size()).toEqual(4); + expect(await view.to_json()).toEqual([ + {int: 1, float: 2.25, string: "a", date: 1579046400000, datetime: 1579069800000, boolean: false}, + {int: 2, float: 3.5, string: "b", date: 1581724800000, datetime: 1579091400000, boolean: true}, + {int: 3, float: 4.75, string: "c", date: 1584230400000, datetime: 1579113000000, boolean: false}, + {int: 4, float: 5.25, string: "d", date: 1586908800000, datetime: 1579131000000, boolean: true} + ]); + }); + + it("Should index on a datetime column", async function() { + const table = perspective.table(all_types_data, {index: "datetime"}); + const view = table.view(); + expect(await table.size()).toEqual(4); + expect(await view.to_json()).toEqual([ + {int: 1, float: 2.25, string: "a", date: 1579046400000, datetime: 1579069800000, boolean: false}, + {int: 2, float: 3.5, string: "b", date: 1581724800000, datetime: 1579091400000, boolean: true}, + {int: 3, float: 4.75, string: "c", date: 1584230400000, datetime: 1579113000000, boolean: false}, + {int: 4, float: 5.25, string: "d", date: 1586908800000, datetime: 1579131000000, boolean: true} + ]); + }); + }); }); }; diff --git a/packages/perspective/test/js/delta.js b/packages/perspective/test/js/delta.js index 4e738165f6..5816f61997 100644 --- a/packages/perspective/test/js/delta.js +++ b/packages/perspective/test/js/delta.js @@ -7,6 +7,8 @@ * */ +const _ = require("underscore"); + let data = [ {x: 1, y: "a", z: true}, {x: 2, y: "b", z: false}, @@ -249,6 +251,153 @@ module.exports = perspective => { }); }); + describe("0-sided row delta, randomized column order", function() { + it("returns changed rows", async function(done) { + let table = perspective.table(data, {index: "x"}); + let columns = _.shuffle(await table.columns()); + let view = table.view({ + columns: columns + }); + view.on_update( + async function(updated) { + const expected = [ + {x: 1, y: "string1", z: true}, + {x: 2, y: "string2", z: false} + ]; + await match_delta(perspective, updated.delta, expected); + view.delete(); + table.delete(); + done(); + }, + {mode: "row"} + ); + table.update(partial_change_y); + }); + + it("returns changed rows from schema", async function(done) { + let table = perspective.table( + { + x: "integer", + y: "string", + z: "boolean" + }, + {index: "x"} + ); + let columns = _.shuffle(await table.columns()); + let view = table.view({ + columns: columns + }); + view.on_update( + async function(updated) { + const expected = [ + {x: 1, y: "d", z: false}, + {x: 2, y: "b", z: false}, + {x: 3, y: "c", z: true} + ]; + await match_delta(perspective, updated.delta, expected); + view.delete(); + table.delete(); + done(); + }, + {mode: "row"} + ); + table.update([ + {x: 1, y: "a", z: true}, + {x: 2, y: "b", z: false}, + {x: 3, y: "c", z: true}, + {x: 1, y: "d", z: false} + ]); + }); + + it("returns added rows", async function(done) { + let table = perspective.table(data); + let columns = _.shuffle(await table.columns()); + let view = table.view({ + columns: columns + }); + view.on_update( + async function(updated) { + const expected = [ + {x: 1, y: "string1", z: null}, + {x: 2, y: "string2", z: null} + ]; + await match_delta(perspective, updated.delta, expected); + view.delete(); + table.delete(); + done(); + }, + {mode: "row"} + ); + table.update(partial_change_y); + }); + + it("returns added rows from schema", async function(done) { + let table = perspective.table({ + x: "integer", + y: "string", + z: "boolean" + }); + let columns = _.shuffle(await table.columns()); + let view = table.view({ + columns: columns + }); + view.on_update( + async function(updated) { + await match_delta(perspective, updated.delta, data); + view.delete(); + table.delete(); + done(); + }, + {mode: "row"} + ); + table.update(data); + }); + + it("returns deleted columns", async function(done) { + let table = perspective.table(data, {index: "x"}); + let columns = _.shuffle(await table.columns()); + let view = table.view({ + columns: columns + }); + view.on_update( + async function(updated) { + const expected = [ + {x: 1, y: null, z: true}, + {x: 4, y: null, z: false} + ]; + await match_delta(perspective, updated.delta, expected); + view.delete(); + table.delete(); + done(); + }, + {mode: "row"} + ); + table.update([ + {x: 1, y: null}, + {x: 4, y: null} + ]); + }); + + it("returns changed rows in non-sequential update", async function(done) { + let table = perspective.table(data, {index: "x"}); + let columns = _.shuffle(await table.columns()); + let view = table.view({ + columns: columns + }); + view.on_update( + async function(updated) { + const expected = partial_change_nonseq; + await match_delta(perspective, updated.delta, expected); + view.delete(); + table.delete(); + done(); + }, + {mode: "row"} + ); + table.update(partial_change_nonseq); + }); + }); + describe("1-sided row delta", function() { it("returns changed rows", async function(done) { let table = perspective.table(data, {index: "x"}); diff --git a/packages/perspective/test/js/multiple.js b/packages/perspective/test/js/multiple.js index fed0499ddf..4e0335350c 100644 --- a/packages/perspective/test/js/multiple.js +++ b/packages/perspective/test/js/multiple.js @@ -21,6 +21,8 @@ var arrow_result = [ {f32: 5.5, f64: 5.5, i64: 5, i32: 5, i16: 5, i8: 5, bool: true, char: "d", dict: "d", datetime: +new Date("2018-01-29")} ]; +const _ = require("underscore"); + module.exports = perspective => { describe("Multiple Perspectives", function() { it("Constructs table using data generated by to_arrow()", async function() { @@ -39,5 +41,28 @@ module.exports = perspective => { table.delete(); table2.delete(); }); + + it("Constructs table using data in random column order generated by to_arrow()", async function() { + let table = perspective.table(arrow_result); + let columns = _.shuffle(await table.columns()); + let view = table.view({ + columns: columns + }); + let result = await view.to_arrow(); + + let table2 = perspective.table(result); + let columns2 = _.shuffle(await table2.columns()); + let view2 = table2.view({ + columns: columns2 + }); + let result2 = await view2.to_json(); + + expect(result2).toEqual(arrow_result); + + view.delete(); + view2.delete(); + table.delete(); + table2.delete(); + }); }); }; diff --git a/packages/perspective/test/js/updates.js b/packages/perspective/test/js/updates.js index ab20de2edc..49743d4b2c 100644 --- a/packages/perspective/test/js/updates.js +++ b/packages/perspective/test/js/updates.js @@ -98,6 +98,7 @@ module.exports = perspective => { const view = table.view(); table.remove([1, 2]); const result = await view.to_json(); + expect(await view.num_rows()).toEqual(2); expect(result.length).toEqual(2); expect(result).toEqual(data.slice(2, 4)); // expect(await table.size()).toEqual(2); @@ -110,6 +111,7 @@ module.exports = perspective => { const view = table.view(); table.remove([1, 2]); const result = await view.to_json(); + expect(await view.num_rows()).toEqual(2); expect(result.length).toEqual(2); expect(result).toEqual(data.slice(2, 4)); // expect(await table.size()).toEqual(2); @@ -117,6 +119,115 @@ module.exports = perspective => { table.delete(); }); + it("after an `update()`, string pkey", async function() { + const table = perspective.table(meta, {index: "y"}); + table.update(data); + const view = table.view(); + table.remove(["a", "b"]); + const result = await view.to_json(); + expect(await view.num_rows()).toEqual(2); + expect(result.length).toEqual(2); + expect(result).toEqual(data.slice(2, 4)); + // expect(await table.size()).toEqual(2); + view.delete(); + table.delete(); + }); + + it("after a regular data load, string pkey", async function() { + const table = perspective.table(data, {index: "y"}); + const view = table.view(); + table.remove(["a", "b"]); + const result = await view.to_json(); + expect(await view.num_rows()).toEqual(2); + expect(result.length).toEqual(2); + expect(result).toEqual(data.slice(2, 4)); + // expect(await table.size()).toEqual(2); + view.delete(); + table.delete(); + }); + + it("after an update, date pkey", async function() { + const datetimes = [new Date(2020, 0, 15), new Date(2020, 1, 15), new Date(2020, 2, 15), new Date(2020, 3, 15)]; + const table = perspective.table( + { + x: "integer", + y: "date", + z: "float" + }, + {index: "y"} + ); + table.update({ + x: [1, 2, 3, 4], + y: datetimes, + z: [1.5, 2.5, 3.5, 4.5] + }); + const view = table.view(); + table.remove(datetimes.slice(0, 2)); + const result = await view.to_columns(); + expect(await view.num_rows()).toEqual(2); + expect(result).toEqual({ + x: [3, 4], + y: [1584230400000, 1586908800000], + z: [3.5, 4.5] + }); + // expect(await table.size()).toEqual(2); + view.delete(); + table.delete(); + }); + + it("after an update, datetime pkey", async function() { + const datetimes = [new Date(2020, 0, 15), new Date(2020, 1, 15), new Date(2020, 2, 15), new Date(2020, 3, 15)]; + const table = perspective.table( + { + x: "integer", + y: "datetime", + z: "float" + }, + {index: "y"} + ); + table.update({ + x: [1, 2, 3, 4], + y: datetimes, + z: [1.5, 2.5, 3.5, 4.5] + }); + const view = table.view(); + table.remove(datetimes.slice(0, 2)); + const result = await view.to_columns(); + expect(await view.num_rows()).toEqual(2); + expect(result).toEqual({ + x: [3, 4], + y: [1584230400000, 1586908800000], + z: [3.5, 4.5] + }); + // expect(await table.size()).toEqual(2); + view.delete(); + table.delete(); + }); + + it("after a regular data load, datetime pkey", async function() { + const datetimes = [new Date(2020, 0, 15), new Date(2020, 1, 15), new Date(2020, 2, 15), new Date(2020, 3, 15)]; + const table = perspective.table( + { + x: [1, 2, 3, 4], + y: datetimes, + z: [1.5, 2.5, 3.5, 4.5] + }, + {index: "y"} + ); + const view = table.view(); + table.remove(datetimes.slice(0, 2)); + const result = await view.to_columns(); + expect(await view.num_rows()).toEqual(2); + expect(result).toEqual({ + x: [3, 4], + y: [1584230400000, 1586908800000], + z: [3.5, 4.5] + }); + // expect(await table.size()).toEqual(2); + view.delete(); + table.delete(); + }); + it("multiple single element removes", async function() { const table = perspective.table(meta, {index: "x"}); for (let i = 0; i < 100; i++) { @@ -971,7 +1082,7 @@ module.exports = perspective => { table.delete(); }); - it("multiple updates on {index: 'x'}", async function() { + it("multiple updates on int {index: 'x'}", async function() { var table = perspective.table(data, {index: "x"}); var view = table.view(); table.update(data); @@ -983,6 +1094,37 @@ module.exports = perspective => { table.delete(); }); + it("multiple updates on str {index: 'y'}", async function() { + var table = perspective.table(data, {index: "y"}); + var view = table.view(); + table.update(data); + table.update(data); + table.update(data); + let result = await view.to_json(); + expect(result).toEqual(data); + view.delete(); + table.delete(); + }); + + it("multiple updates on str {index: 'y'} with new, old, null pkey", async function() { + var table = perspective.table(data, {index: "y"}); + var view = table.view(); + table.update([{x: 1, y: "a", z: true}]); + table.update([{x: 100, y: null, z: true}]); + table.update([{x: 123, y: "abc", z: true}]); + let result = await view.to_json(); + expect(result).toEqual([ + {x: 100, y: null, z: true}, + {x: 1, y: "a", z: true}, + {x: 123, y: "abc", z: true}, + {x: 2, y: "b", z: false}, + {x: 3, y: "c", z: true}, + {x: 4, y: "d", z: false} + ]); + view.delete(); + table.delete(); + }); + it("{index: 'x'} with overlap", async function() { var table = perspective.table(data, {index: "x"}); var view = table.view(); From 6ca1db140e7074c33ba2c5ee51ee3f7728a43d60 Mon Sep 17 00:00:00 2001 From: Jun Tan Date: Thu, 15 Oct 2020 14:49:15 -0400 Subject: [PATCH 3/5] Unit context can be created with an arbitary subset of columns unit context = no pivot/sort/filter/computed, any column order/num of columns read m_delta_pkeys instead of get_delta_pkeys() cleanup --- cpp/perspective/src/cpp/config.cpp | 68 +- cpp/perspective/src/cpp/context_unit.cpp | 272 +++---- cpp/perspective/src/cpp/context_zero.cpp | 714 +++++++++--------- cpp/perspective/src/cpp/emscripten.cpp | 2 +- cpp/perspective/src/cpp/flat_traversal.cpp | 41 +- cpp/perspective/src/cpp/view_config.cpp | 15 - .../src/include/perspective/config.h | 57 +- .../src/include/perspective/context_unit.h | 16 - .../src/include/perspective/view_config.h | 13 - .../bench/perspective.benchmark.js | 36 - packages/perspective/src/js/perspective.js | 5 - packages/perspective/test/js/delta.js | 127 ++++ packages/perspective/test/js/to_format.js | 256 +++++++ packages/perspective/test/js/updates.js | 31 + 14 files changed, 1000 insertions(+), 653 deletions(-) diff --git a/cpp/perspective/src/cpp/config.cpp b/cpp/perspective/src/cpp/config.cpp index 12bc8455c2..cb27eddf61 100644 --- a/cpp/perspective/src/cpp/config.cpp +++ b/cpp/perspective/src/cpp/config.cpp @@ -12,27 +12,6 @@ namespace perspective { -// Construct view config -t_config::t_config(const std::vector& row_pivots, - const std::vector& col_pivots, const std::vector& aggregates, - const std::vector& sortspecs, const std::vector& col_sortspecs, - t_filter_op combiner, const std::vector& fterms, - const std::vector& col_names, bool column_only) - : m_column_only(column_only) - , m_sortspecs(sortspecs) - , m_col_sortspecs(col_sortspecs) - , m_aggregates(aggregates) - , m_detail_columns(col_names) // this should be the columns property - , m_fterms(fterms) - , m_combiner(combiner) { - for (const auto& p : row_pivots) { - m_row_pivots.push_back(t_pivot(p)); - } - for (const auto& p : col_pivots) { - m_col_pivots.push_back(t_pivot(p)); - } -}; - // t_ctx0 t_config::t_config( const std::vector& detail_columns, @@ -41,10 +20,22 @@ t_config::t_config( const std::vector& computed_columns) : m_detail_columns(detail_columns) , m_fterms(fterms) - , m_combiner(combiner) , m_computed_columns(computed_columns) + , m_combiner(combiner) , m_fmode(FMODE_SIMPLE_CLAUSES) { setup(m_detail_columns); + if (m_row_pivots.empty() && + m_col_pivots.empty() && + m_sortby.empty() && + m_sortspecs.empty() && + m_col_sortspecs.empty() && + m_detail_columns.empty() && + m_fterms.empty() && + m_computed_columns.empty()) { + m_is_trivial_config = true; + } else { + m_is_trivial_config = false; + } } // t_ctx1 @@ -55,10 +46,11 @@ t_config::t_config( t_filter_op combiner, const std::vector& computed_columns) : m_aggregates(aggregates) - , m_totals(TOTALS_BEFORE) , m_fterms(fterms) - , m_combiner(combiner) , m_computed_columns(computed_columns) + , m_combiner(combiner) + , m_is_trivial_config(false) + , m_totals(TOTALS_BEFORE) , m_fmode(FMODE_SIMPLE_CLAUSES) { for (const auto& p : row_pivots) { m_row_pivots.push_back(t_pivot(p)); @@ -76,12 +68,13 @@ t_config::t_config( t_filter_op combiner, const std::vector& computed_columns, bool column_only) - : m_column_only(column_only) - , m_aggregates(aggregates) - , m_totals(totals) + : m_aggregates(aggregates) , m_fterms(fterms) - , m_combiner(combiner) , m_computed_columns(computed_columns) + , m_combiner(combiner) + , m_column_only(column_only) + , m_is_trivial_config(false) + , m_totals(totals) , m_fmode(FMODE_SIMPLE_CLAUSES) { for (const auto& p : row_pivots) { m_row_pivots.push_back(t_pivot(p)); @@ -101,10 +94,11 @@ t_config::t_config(const std::vector& row_pivots, const std::vector& col_pivots, const std::vector& aggregates, const t_totals totals, t_filter_op combiner, const std::vector& fterms) : m_aggregates(aggregates) - , m_totals(totals) , m_fterms(fterms) , m_combiner(combiner) - , m_fmode(FMODE_SIMPLE_CLAUSES) { + , m_is_trivial_config(false) + , m_totals(totals) + , m_fmode(FMODE_SIMPLE_CLAUSES){ for (const auto& p : row_pivots) { m_row_pivots.push_back(t_pivot(p)); } @@ -120,6 +114,7 @@ t_config::t_config( const std::vector& row_pivots, const std::vector& aggregates) : m_row_pivots(row_pivots) , m_aggregates(aggregates) + , m_is_trivial_config(false) , m_fmode(FMODE_SIMPLE_CLAUSES) { setup(m_detail_columns, std::vector{}, std::vector{}); } @@ -127,9 +122,10 @@ t_config::t_config( t_config::t_config( const std::vector& row_pivots, const std::vector& aggregates) : m_aggregates(aggregates) - , m_totals(TOTALS_BEFORE) , m_combiner(FILTER_OP_AND) - , m_fmode(FMODE_SIMPLE_CLAUSES) { + , m_is_trivial_config(false) + , m_totals(TOTALS_BEFORE) + , m_fmode(FMODE_SIMPLE_CLAUSES){ for (const auto& p : row_pivots) { m_row_pivots.push_back(t_pivot(p)); } @@ -139,8 +135,9 @@ t_config::t_config( t_config::t_config(const std::vector& row_pivots, const t_aggspec& agg) : m_aggregates(std::vector{agg}) - , m_totals(TOTALS_BEFORE) , m_combiner(FILTER_OP_AND) + , m_is_trivial_config(false) + , m_totals(TOTALS_BEFORE) , m_fmode(FMODE_SIMPLE_CLAUSES) { for (const auto& p : row_pivots) { m_row_pivots.push_back(t_pivot(p)); @@ -215,6 +212,11 @@ t_config::setup(const std::vector& detail_columns, populate_sortby(m_col_pivots); } +bool +t_config::is_trivial_config() { + return m_is_trivial_config; +} + void t_config::populate_sortby(const std::vector& pivots) { for (t_index idx = 0, loop_end = pivots.size(); idx < loop_end; ++idx) { diff --git a/cpp/perspective/src/cpp/context_unit.cpp b/cpp/perspective/src/cpp/context_unit.cpp index d03b04ec3b..1edf41d24f 100644 --- a/cpp/perspective/src/cpp/context_unit.cpp +++ b/cpp/perspective/src/cpp/context_unit.cpp @@ -32,14 +32,6 @@ t_ctxunit::init() { m_init = true; } - -std::string -t_ctxunit::repr() const { - std::stringstream ss; - ss << "t_ctxunit<" << this << ">"; - return ss.str(); -} - void t_ctxunit::step_begin() { if (!m_init) @@ -53,64 +45,82 @@ t_ctxunit::step_begin() { void t_ctxunit::step_end() {} -t_index -t_ctxunit::get_row_count() const { - return m_gstate->num_rows(); -} +/** + * @brief Notify the context with new data when the `t_gstate` master table is + * not empty, and being updated with new data. + * + * @param flattened + * @param delta + * @param prev + * @param curr + * @param transitions + * @param existed + */ +void +t_ctxunit::notify(const t_data_table& flattened, const t_data_table& delta, + const t_data_table& prev, const t_data_table& curr, const t_data_table& transitions, + const t_data_table& existed) { + t_uindex nrecs = flattened.size(); -t_index -t_ctxunit::get_column_count() const { - return m_config.get_num_columns(); -} + std::shared_ptr pkey_sptr = flattened.get_const_column("psp_pkey"); + std::shared_ptr op_sptr = flattened.get_const_column("psp_op"); + const t_column* pkey_col = pkey_sptr.get(); + const t_column* op_col = op_sptr.get(); -std::vector -t_ctxunit::unity_get_row_path(t_uindex idx) const { - return std::vector(mktscalar(idx)); -} + bool delete_encountered = false; -std::vector -t_ctxunit::unity_get_column_path(t_uindex idx) const { - return std::vector(); -} + for (t_uindex idx = 0; idx < nrecs; ++idx) { + // pkeys are always integers >= 0 - no need to use internal + // symtable to dereference. + t_tscalar pkey = pkey_col->get_scalar(idx); + std::uint8_t op_ = *(op_col->get_nth(idx)); + t_op op = static_cast(op_); -t_uindex -t_ctxunit::unity_get_row_depth(t_uindex ridx) const { - return 0; -} + switch (op) { + case OP_INSERT: {} + break; + case OP_DELETE: { + delete_encountered = true; + } break; + default: { PSP_COMPLAIN_AND_ABORT("Unexpected OP"); } break; + } -t_uindex -t_ctxunit::unity_get_column_depth(t_uindex cidx) const { - return 0; -} + // add the pkey for row delta + add_delta_pkey(pkey); + } -std::vector -t_ctxunit::unity_get_column_names() const { - return get_column_names(); + m_has_delta = m_delta_pkeys.size() > 0 || delete_encountered; } -t_uindex -t_ctxunit::unity_get_column_count() const { - return get_column_count(); -} +/** + * @brief Notify the context with new data after the `t_gstate`'s master table + * has been updated for the first time with data. + * + * @param flattened + */ +void +t_ctxunit::notify(const t_data_table& flattened) { + t_uindex nrecs = flattened.size(); + std::shared_ptr pkey_sptr = flattened.get_const_column("psp_pkey"); + const t_column* pkey_col = pkey_sptr.get(); -t_uindex -t_ctxunit::unity_get_row_count() const { - return get_row_count(); -} + m_has_delta = true; -bool -t_ctxunit::unity_get_row_expanded(t_uindex idx) const { - return false; -} + // TODO: pkey and idx are equal, except idx is not a t_tscalar. I don't + // think there is a difference between accessing the pkey column and + // creating a brand new scalar, as get_scalar always returns a copy. We + // could also simply store the row indices but there are some problems + // with correctness that I couldn't nail down. + for (t_uindex idx = 0; idx < nrecs; ++idx) { + t_tscalar pkey = pkey_col->get_scalar(idx); -bool -t_ctxunit::unity_get_column_expanded(t_uindex idx) const { - return false; + // Add primary key to track row delta + add_delta_pkey(pkey); + } } /** - * @brief Given a start/end row and column index, return the underlying data - * for the requested subset. + * @brief Given a start/end row and column, return the data for the subset. * * @param start_row * @param end_row @@ -119,7 +129,11 @@ t_ctxunit::unity_get_column_expanded(t_uindex idx) const { * @return std::vector */ std::vector -t_ctxunit::get_data(t_index start_row, t_index end_row, t_index start_col, t_index end_col) const { +t_ctxunit::get_data( + t_index start_row, + t_index end_row, + t_index start_col, + t_index end_col) const { t_uindex ctx_nrows = get_row_count(); t_uindex ctx_ncols = get_column_count(); @@ -154,11 +168,11 @@ t_ctxunit::get_data(t_index start_row, t_index end_row, t_index start_col, t_ind } /** - * @brief Given a vector of row indices, which may not be contiguous, return the underlying data - * for these rows. + * @brief Given a vector of row indices, which may not be contiguous, + * return the underlying data for these rows. * * @param rows a vector of row indices - * @return std::vector a vector of scalars containing the underlying data + * @return std::vector a vector of scalars containing data */ std::vector t_ctxunit::get_data(const std::vector& rows) const { @@ -208,16 +222,13 @@ t_ctxunit::get_data(const std::vector& pkeys) const { return values; } -t_tscalar -t_ctxunit::get_column_name(t_index idx) { - std::string empty(""); - - if (idx >= get_column_count()) - return m_symtable.get_interned_tscalar(empty.c_str()); - - return m_symtable.get_interned_tscalar(m_config.col_at(idx).c_str()); -} - +/** + * @brief Returns a vector of primary keys for the specified cells, + * reading from the gnode_state's master table instead of from a traversal. + * + * @param cells + * @return std::vector + */ std::vector t_ctxunit::get_pkeys(const std::vector>& cells) const { // Validate cells @@ -249,6 +260,22 @@ t_ctxunit::get_pkeys(const std::vector>& cells) co return rval; } +/** + * @brief Returns a string column name using the context's config. + * + * @param idx + * @return t_tscalar + */ +t_tscalar +t_ctxunit::get_column_name(t_index idx) { + std::string empty(""); + + if (idx >= get_column_count()) + return m_symtable.get_interned_tscalar(empty.c_str()); + + return m_symtable.get_interned_tscalar(m_config.col_at(idx).c_str()); +} + /** * @brief Returns a `t_rowdelta` struct containing data from updated rows * and the updated row indices. @@ -258,8 +285,7 @@ t_ctxunit::get_pkeys(const std::vector>& cells) co t_rowdelta t_ctxunit::get_row_delta() { bool rows_changed = m_rows_changed; - tsl::hopscotch_set pkeys = get_delta_pkeys(); - std::vector pkey_vector(pkeys.begin(), pkeys.end()); + std::vector pkey_vector(m_delta_pkeys.begin(), m_delta_pkeys.end()); // Sort pkeys - they will always be integers >= 0, as the table has // no index set. @@ -300,72 +326,59 @@ t_ctxunit::sidedness() const { return 0; } -/** - * @brief Notify the context with new data when the `t_gstate` master table is - * not empty, and being updated with new data. - * - * @param flattened - * @param delta - * @param prev - * @param curr - * @param transitions - * @param existed - */ -void -t_ctxunit::notify(const t_data_table& flattened, const t_data_table& delta, - const t_data_table& prev, const t_data_table& curr, const t_data_table& transitions, - const t_data_table& existed) { - t_uindex nrecs = flattened.size(); +t_index +t_ctxunit::get_row_count() const { + return m_gstate->num_rows(); +} - std::shared_ptr pkey_sptr = flattened.get_const_column("psp_pkey"); - std::shared_ptr op_sptr = flattened.get_const_column("psp_op"); - const t_column* pkey_col = pkey_sptr.get(); - const t_column* op_col = op_sptr.get(); +t_index +t_ctxunit::get_column_count() const { + return m_config.get_num_columns(); +} - bool delete_encountered = false; +std::vector +t_ctxunit::unity_get_row_path(t_uindex idx) const { + return std::vector(mktscalar(idx)); +} - // Context does not have filters applied - for (t_uindex idx = 0; idx < nrecs; ++idx) { - t_tscalar pkey = m_symtable.get_interned_tscalar(pkey_col->get_scalar(idx)); - std::uint8_t op_ = *(op_col->get_nth(idx)); - t_op op = static_cast(op_); +std::vector +t_ctxunit::unity_get_column_path(t_uindex idx) const { + return std::vector(); +} - switch (op) { - case OP_INSERT: {} - break; - case OP_DELETE: { - delete_encountered = true; - } break; - default: { PSP_COMPLAIN_AND_ABORT("Unexpected OP"); } break; - } +t_uindex +t_ctxunit::unity_get_row_depth(t_uindex ridx) const { + return 0; +} - // add the pkey for row delta - add_delta_pkey(pkey); - } +t_uindex +t_ctxunit::unity_get_column_depth(t_uindex cidx) const { + return 0; +} - m_has_delta = m_delta_pkeys.size() > 0 || delete_encountered; +std::vector +t_ctxunit::unity_get_column_names() const { + return get_column_names(); } -/** - * @brief Notify the context with new data after the `t_gstate`'s master table - * has been updated for the first time with data. - * - * @param flattened - */ -void -t_ctxunit::notify(const t_data_table& flattened) { - t_uindex nrecs = flattened.size(); - std::shared_ptr pkey_sptr = flattened.get_const_column("psp_pkey"); - const t_column* pkey_col = pkey_sptr.get(); +t_uindex +t_ctxunit::unity_get_column_count() const { + return get_column_count(); +} - m_has_delta = true; +t_uindex +t_ctxunit::unity_get_row_count() const { + return get_row_count(); +} - for (t_uindex idx = 0; idx < nrecs; ++idx) { - t_tscalar pkey = m_symtable.get_interned_tscalar(pkey_col->get_scalar(idx)); +bool +t_ctxunit::unity_get_row_expanded(t_uindex idx) const { + return false; +} - // Add primary key to track row delta - add_delta_pkey(pkey); - } +bool +t_ctxunit::unity_get_column_expanded(t_uindex idx) const { + return false; } /** @@ -383,9 +396,6 @@ t_ctxunit::has_deltas() const { return m_has_delta; } -void -t_ctxunit::pprint() const {} - t_dtype t_ctxunit::get_column_dtype(t_uindex idx) const { if (idx >= static_cast(get_column_count())) @@ -404,4 +414,14 @@ t_ctxunit::clear_deltas() { m_has_delta = false; } +std::string +t_ctxunit::repr() const { + std::stringstream ss; + ss << "t_ctxunit<" << this << ">"; + return ss.str(); +} + +void +t_ctxunit::pprint() const {} + } // end namespace perspective diff --git a/cpp/perspective/src/cpp/context_zero.cpp b/cpp/perspective/src/cpp/context_zero.cpp index 1d04268a55..c0181a9093 100644 --- a/cpp/perspective/src/cpp/context_zero.cpp +++ b/cpp/perspective/src/cpp/context_zero.cpp @@ -23,18 +23,23 @@ t_ctx0::t_ctx0() {} t_ctx0::t_ctx0(const t_schema& schema, const t_config& config) : t_ctxbase(schema, config) , m_has_delta(false) +{ -{} +} t_ctx0::~t_ctx0() { m_traversal.reset(); } -std::string -t_ctx0::repr() const { - std::stringstream ss; - ss << "t_ctx0<" << this << ">"; - return ss.str(); +void +t_ctx0::init() { + m_traversal = std::make_shared(); + m_deltas = std::make_shared(); + m_init = true; } +/** + * @brief When the gnode notifies the context with new data, clear deltas + * and prepare to reconcile new data with old. + */ void t_ctx0::step_begin() { if (!m_init) @@ -47,6 +52,10 @@ t_ctx0::step_begin() { m_traversal->step_begin(); } +/** + * @brief After all new rows have been processed, trigger the traversal's + * step_end() method which will reconcile traversal state. + */ void t_ctx0::step_end() { if (!has_deltas()) { @@ -56,305 +65,21 @@ t_ctx0::step_end() { m_traversal->step_end(); } -t_index -t_ctx0::get_row_count() const { - return m_traversal->size(); -} - -t_index -t_ctx0::get_column_count() const { - return m_config.get_num_columns(); -} - -/** - * @brief Given a start/end row and column index, return the underlying data for the requested - * subset. - * - * @param start_row - * @param end_row - * @param start_col - * @param end_col - * @return std::vector - */ -std::vector -t_ctx0::get_data(t_index start_row, t_index end_row, t_index start_col, t_index end_col) const { - t_uindex ctx_nrows = get_row_count(); - t_uindex ctx_ncols = get_column_count(); - auto ext = sanitize_get_data_extents( - ctx_nrows, ctx_ncols, start_row, end_row, start_col, end_col); - - t_index nrows = ext.m_erow - ext.m_srow; - t_index stride = ext.m_ecol - ext.m_scol; - std::vector values(nrows * stride); - - std::vector pkeys = m_traversal->get_pkeys(ext.m_srow, ext.m_erow); - auto none = mknone(); - - for (t_index cidx = ext.m_scol; cidx < ext.m_ecol; ++cidx) { - std::vector out_data(pkeys.size()); - m_gstate->read_column(m_config.col_at(cidx), pkeys, out_data); - - for (t_index ridx = ext.m_srow; ridx < ext.m_erow; ++ridx) { - auto v = out_data[ridx - ext.m_srow]; - - // todo: fix null handling - if (!v.is_valid()) - v.set(none); - - values[(ridx - ext.m_srow) * stride + (cidx - ext.m_scol)] = v; - } - } - - return values; -} - -/** - * @brief Given a vector of row indices, which may not be contiguous, return the underlying data - * for these rows. - * - * @param rows a vector of row indices - * @return std::vector a vector of scalars containing the underlying data - */ -std::vector -t_ctx0::get_data(const std::vector& rows) const { - t_uindex stride = get_column_count(); - std::vector values(rows.size() * stride); - std::vector pkeys = m_traversal->get_pkeys(rows); - - auto none = mknone(); - for (t_uindex cidx = 0; cidx < stride; ++cidx) { - std::vector out_data(rows.size()); - m_gstate->read_column(m_config.col_at(cidx), pkeys, out_data); - - for (t_uindex ridx = 0; ridx < rows.size(); ++ridx) { - auto v = out_data[ridx]; - - if (!v.is_valid()) - v.set(none); - - values[(ridx)*stride + (cidx)] = v; - } - } - - return values; -} - -void -t_ctx0::sort_by() { - reset_sortby(); -} - -void -t_ctx0::sort_by(const std::vector& sortby) { - if (sortby.empty()) - return; - m_traversal->sort_by(m_gstate, m_config, sortby); -} - -void -t_ctx0::reset_sortby() { - m_traversal->sort_by(m_gstate, m_config, std::vector()); -} - -t_tscalar -t_ctx0::get_column_name(t_index idx) { - std::string empty(""); - - if (idx >= get_column_count()) - return m_symtable.get_interned_tscalar(empty.c_str()); - - return m_symtable.get_interned_tscalar(m_config.col_at(idx).c_str()); -} - -void -t_ctx0::init() { - m_traversal = std::make_shared(); - m_deltas = std::make_shared(); - m_init = true; -} - -std::vector -t_ctx0::get_pkeys(const std::vector>& cells) const { - if (!m_traversal->validate_cells(cells)) { - std::vector rval; - return rval; - } - return m_traversal->get_pkeys(cells); -} - -std::vector -t_ctx0::get_all_pkeys(const std::vector>& cells) const { - if (!m_traversal->validate_cells(cells)) { - std::vector rval; - return rval; - } - return m_traversal->get_all_pkeys(cells); -} - -std::vector -t_ctx0::get_cell_data(const std::vector>& cells) const { - if (!m_traversal->validate_cells(cells)) { - std::vector rval; - return rval; - } - - t_uindex ncols = get_column_count(); - - for (const auto& c : cells) { - if (c.second >= ncols) { - std::vector rval; - return rval; - } - } - - // Order aligned with cells - std::vector pkeys = get_all_pkeys(cells); - std::vector out_data; - out_data.reserve(cells.size()); - - for (t_index idx = 0, loop_end = pkeys.size(); idx < loop_end; ++idx) { - std::string colname = m_config.col_at(cells[idx].second); - out_data.push_back(m_gstate->get(pkeys[idx], colname)); - } - - return out_data; -} - -/** - * @brief - * - * @param bidx - * @param eidx - * @return std::vector - */ -std::vector -t_ctx0::get_cell_delta(t_index bidx, t_index eidx) const { - tsl::hopscotch_set pkeys; - t_tscalar prev_pkey; - prev_pkey.set(t_none()); - - bidx = std::min(bidx, m_traversal->size()); - eidx = std::min(eidx, m_traversal->size()); - - std::vector rval; - - if (m_traversal->empty_sort_by()) { - std::vector pkey_vec = m_traversal->get_pkeys(bidx, eidx); - for (t_index idx = 0, loop_end = pkey_vec.size(); idx < loop_end; ++idx) { - const t_tscalar& pkey = pkey_vec[idx]; - t_index row = bidx + idx; - std::pair::type::iterator, - t_zcdeltas::index::type::iterator> - iters = m_deltas->get().equal_range(pkey); - for (t_zcdeltas::index::type::iterator iter = iters.first; - iter != iters.second; ++iter) { - t_cellupd cellupd; - cellupd.row = row; - cellupd.column = iter->m_colidx; - cellupd.old_value = iter->m_old_value; - cellupd.new_value = iter->m_new_value; - rval.push_back(cellupd); - } - } - } else { - for (t_zcdeltas::index::type::iterator iter - = m_deltas->get().begin(); - iter != m_deltas->get().end(); ++iter) { - if (prev_pkey != iter->m_pkey) { - pkeys.insert(iter->m_pkey); - prev_pkey = iter->m_pkey; - } - } - - tsl::hopscotch_map r_indices; - m_traversal->get_row_indices(pkeys, r_indices); - - for (t_zcdeltas::index::type::iterator iter - = m_deltas->get().begin(); - iter != m_deltas->get().end(); ++iter) { - t_index row = r_indices[iter->m_pkey]; - if (bidx <= row && row <= eidx) { - t_cellupd cellupd; - cellupd.row = row; - cellupd.column = iter->m_colidx; - cellupd.old_value = iter->m_old_value; - cellupd.new_value = iter->m_new_value; - rval.push_back(cellupd); - } - } - } - return rval; -} - -/** - * @brief Returns updated cells. - * - * @param bidx - * @param eidx - * @return t_stepdelta - */ -t_stepdelta -t_ctx0::get_step_delta(t_index bidx, t_index eidx) { - bidx = std::min(bidx, m_traversal->size()); - eidx = std::min(eidx, m_traversal->size()); - bool rows_changed = m_rows_changed || !m_traversal->empty_sort_by(); - t_stepdelta rval(rows_changed, m_columns_changed, get_cell_delta(bidx, eidx)); - m_deltas->clear(); - clear_deltas(); - return rval; -} - /** - * @brief Returns a `t_rowdelta` struct containing data from updated rows and the updated row - * indices. - * - * @return t_rowdelta + * @brief Given new data from the gnode, add/update/remove each row from the + * newly-processed data from the traversal. + * + * @param flattened + * @param delta + * @param prev + * @param curr + * @param transitions + * @param existed */ -t_rowdelta -t_ctx0::get_row_delta() { - bool rows_changed = m_rows_changed || !m_traversal->empty_sort_by(); - tsl::hopscotch_set pkeys = get_delta_pkeys(); - std::vector rows = m_traversal->get_row_indices(pkeys); - std::sort(rows.begin(), rows.end()); - std::vector data = get_data(rows); - t_rowdelta rval(rows_changed, rows.size(), data); - clear_deltas(); - return rval; -} - -const tsl::hopscotch_set& -t_ctx0::get_delta_pkeys() const { - return m_delta_pkeys; -} - -std::vector -t_ctx0::get_column_names() const { - return m_config.get_column_names(); -} - -std::vector -t_ctx0::get_sort_by() const { - return m_traversal->get_sort_by(); -} - -void -t_ctx0::reset() { - m_traversal->reset(); - m_deltas = std::make_shared(); - m_has_delta = false; -} - -t_index -t_ctx0::sidedness() const { - return 0; -} - void t_ctx0::notify(const t_data_table& flattened, const t_data_table& delta, const t_data_table& prev, const t_data_table& curr, const t_data_table& transitions, const t_data_table& existed) { - // Notify the context with new data when the `t_gstate` master table is - // not empty, and being updated with new data. psp_log_time(repr() + " notify.enter"); t_uindex nrecs = flattened.size(); std::shared_ptr pkey_sptr = flattened.get_const_column("psp_pkey"); @@ -436,69 +161,263 @@ t_ctx0::notify(const t_data_table& flattened, const t_data_table& delta, default: { PSP_COMPLAIN_AND_ABORT("Unexpected OP"); } break; } - // add the pkey for row delta - add_delta_pkey(pkey); + // add the pkey for row delta + add_delta_pkey(pkey); + } + + psp_log_time(repr() + " notify.no_filter_path.updated_traversal"); + + m_has_delta = m_deltas->size() > 0 || m_delta_pkeys.size() > 0 || delete_encountered; + + psp_log_time(repr() + " notify.no_filter_path.exit"); +} + +/** + * @brief Given new data from the gnode after its first update (going from + * 0 rows to n > 0 rows), add each row to the traversal. + * + * @param flattened + */ +void +t_ctx0::notify(const t_data_table& flattened) { + t_uindex nrecs = flattened.size(); + std::shared_ptr pkey_sptr = flattened.get_const_column("psp_pkey"); + std::shared_ptr op_sptr = flattened.get_const_column("psp_op"); + const t_column* pkey_col = pkey_sptr.get(); + const t_column* op_col = op_sptr.get(); + + m_has_delta = true; + + if (m_config.has_filters()) { + t_mask msk = filter_table_for_config(flattened, m_config); + + for (t_uindex idx = 0; idx < nrecs; ++idx) { + t_tscalar pkey = m_symtable.get_interned_tscalar(pkey_col->get_scalar(idx)); + std::uint8_t op_ = *(op_col->get_nth(idx)); + t_op op = static_cast(op_); + + switch (op) { + case OP_INSERT: { + if (msk.get(idx)) { + m_traversal->add_row(m_gstate, m_config, pkey); + } + } break; + default: break; + } + + // Add primary key to track row delta + add_delta_pkey(pkey); + } + + return; + } + + for (t_uindex idx = 0; idx < nrecs; ++idx) { + t_tscalar pkey = m_symtable.get_interned_tscalar(pkey_col->get_scalar(idx)); + std::uint8_t op_ = *(op_col->get_nth(idx)); + t_op op = static_cast(op_); + + switch (op) { + case OP_INSERT: { + m_traversal->add_row(m_gstate, m_config, pkey); + } break; + default: break; + } + + // Add primary key to track row delta + add_delta_pkey(pkey); + } +} + +/** + * @brief Given a start/end row and column index, return the underlying data + * for the requested subset. + * + * @param start_row + * @param end_row + * @param start_col + * @param end_col + * @return std::vector + */ +std::vector +t_ctx0::get_data(t_index start_row, t_index end_row, t_index start_col, t_index end_col) const { + t_uindex ctx_nrows = get_row_count(); + t_uindex ctx_ncols = get_column_count(); + auto ext = sanitize_get_data_extents( + ctx_nrows, ctx_ncols, start_row, end_row, start_col, end_col); + + t_index nrows = ext.m_erow - ext.m_srow; + t_index stride = ext.m_ecol - ext.m_scol; + std::vector values(nrows * stride); + + std::vector pkeys = m_traversal->get_pkeys(ext.m_srow, ext.m_erow); + auto none = mknone(); + + for (t_index cidx = ext.m_scol; cidx < ext.m_ecol; ++cidx) { + std::vector out_data(pkeys.size()); + m_gstate->read_column(m_config.col_at(cidx), pkeys, out_data); + + for (t_index ridx = ext.m_srow; ridx < ext.m_erow; ++ridx) { + auto v = out_data[ridx - ext.m_srow]; + + // todo: fix null handling + if (!v.is_valid()) + v.set(none); + + values[(ridx - ext.m_srow) * stride + (cidx - ext.m_scol)] = v; + } + } + + return values; +} + +/** + * @brief Given a vector of row indices, which may not be contiguous, + * return the underlying data for these rows. + * + * @param rows a vector of row indices + * @return std::vector a vector of scalars containing the data + */ +std::vector +t_ctx0::get_data(const std::vector& rows) const { + t_uindex stride = get_column_count(); + std::vector values(rows.size() * stride); + std::vector pkeys = m_traversal->get_pkeys(rows); + + auto none = mknone(); + for (t_uindex cidx = 0; cidx < stride; ++cidx) { + std::vector out_data(rows.size()); + m_gstate->read_column(m_config.col_at(cidx), pkeys, out_data); + + for (t_uindex ridx = 0; ridx < rows.size(); ++ridx) { + auto v = out_data[ridx]; + + if (!v.is_valid()) + v.set(none); + + values[(ridx)*stride + (cidx)] = v; + } } - psp_log_time(repr() + " notify.no_filter_path.updated_traversal"); - + return values; +} - m_has_delta = m_deltas->size() > 0 || m_delta_pkeys.size() > 0 || delete_encountered; +void +t_ctx0::sort_by() { + reset_sortby(); +} - psp_log_time(repr() + " notify.no_filter_path.exit"); +void +t_ctx0::sort_by(const std::vector& sortby) { + if (sortby.empty()) + return; + m_traversal->sort_by(m_gstate, m_config, sortby); } void -t_ctx0::notify(const t_data_table& flattened) { - // Notify the context with new data after the `t_gstate`'s master table - // has been updated for the first time with data. - t_uindex nrecs = flattened.size(); - std::shared_ptr pkey_sptr = flattened.get_const_column("psp_pkey"); - std::shared_ptr op_sptr = flattened.get_const_column("psp_op"); - const t_column* pkey_col = pkey_sptr.get(); - const t_column* op_col = op_sptr.get(); +t_ctx0::reset_sortby() { + m_traversal->sort_by(m_gstate, m_config, std::vector()); +} - m_has_delta = true; +t_tscalar +t_ctx0::get_column_name(t_index idx) { + std::string empty(""); - if (m_config.has_filters()) { - t_mask msk = filter_table_for_config(flattened, m_config); + if (idx >= get_column_count()) + return m_symtable.get_interned_tscalar(empty.c_str()); - for (t_uindex idx = 0; idx < nrecs; ++idx) { - t_tscalar pkey = m_symtable.get_interned_tscalar(pkey_col->get_scalar(idx)); - std::uint8_t op_ = *(op_col->get_nth(idx)); - t_op op = static_cast(op_); + return m_symtable.get_interned_tscalar(m_config.col_at(idx).c_str()); +} - switch (op) { - case OP_INSERT: { - if (msk.get(idx)) { - m_traversal->add_row(m_gstate, m_config, pkey); - } - } break; - default: break; - } +std::vector +t_ctx0::get_pkeys(const std::vector>& cells) const { + if (!m_traversal->validate_cells(cells)) { + std::vector rval; + return rval; + } + return m_traversal->get_pkeys(cells); +} - // Add primary key to track row delta - add_delta_pkey(pkey); - } +std::vector +t_ctx0::get_all_pkeys(const std::vector>& cells) const { + if (!m_traversal->validate_cells(cells)) { + std::vector rval; + return rval; + } + return m_traversal->get_all_pkeys(cells); +} - return; +std::vector +t_ctx0::get_cell_data(const std::vector>& cells) const { + if (!m_traversal->validate_cells(cells)) { + std::vector rval; + return rval; } - for (t_uindex idx = 0; idx < nrecs; ++idx) { - t_tscalar pkey = m_symtable.get_interned_tscalar(pkey_col->get_scalar(idx)); - std::uint8_t op_ = *(op_col->get_nth(idx)); - t_op op = static_cast(op_); + t_uindex ncols = get_column_count(); - switch (op) { - case OP_INSERT: { - m_traversal->add_row(m_gstate, m_config, pkey); - } break; - default: break; + for (const auto& c : cells) { + if (c.second >= ncols) { + std::vector rval; + return rval; } + } - // Add primary key to track row delta - add_delta_pkey(pkey); + // Order aligned with cells + std::vector pkeys = get_all_pkeys(cells); + std::vector out_data; + out_data.reserve(cells.size()); + + for (t_index idx = 0, loop_end = pkeys.size(); idx < loop_end; ++idx) { + std::string colname = m_config.col_at(cells[idx].second); + out_data.push_back(m_gstate->get(pkeys[idx], colname)); } + + return out_data; +} + +/** + * @brief Returns a `t_rowdelta` struct containing data from updated rows and the updated row + * indices. + * + * @return t_rowdelta + */ +t_rowdelta +t_ctx0::get_row_delta() { + bool rows_changed = m_rows_changed || !m_traversal->empty_sort_by(); + std::vector rows = m_traversal->get_row_indices(m_delta_pkeys); + std::sort(rows.begin(), rows.end()); + std::vector data = get_data(rows); + t_rowdelta rval(rows_changed, rows.size(), data); + clear_deltas(); + return rval; +} + +const tsl::hopscotch_set& +t_ctx0::get_delta_pkeys() const { + return m_delta_pkeys; +} + +std::vector +t_ctx0::get_column_names() const { + return m_config.get_column_names(); +} + +std::vector +t_ctx0::get_sort_by() const { + return m_traversal->get_sort_by(); +} + +void +t_ctx0::reset() { + m_traversal->reset(); + m_deltas = std::make_shared(); + m_has_delta = false; +} + +t_index +t_ctx0::sidedness() const { + return 0; } void @@ -570,6 +489,100 @@ t_ctx0::calc_step_delta(const t_data_table& flattened, const t_data_table& prev, } } +/** + * @brief + * + * @param bidx + * @param eidx + * @return std::vector + */ +std::vector +t_ctx0::get_cell_delta(t_index bidx, t_index eidx) const { + tsl::hopscotch_set pkeys; + t_tscalar prev_pkey; + prev_pkey.set(t_none()); + + bidx = std::min(bidx, m_traversal->size()); + eidx = std::min(eidx, m_traversal->size()); + + std::vector rval; + + if (m_traversal->empty_sort_by()) { + std::vector pkey_vec = m_traversal->get_pkeys(bidx, eidx); + for (t_index idx = 0, loop_end = pkey_vec.size(); idx < loop_end; ++idx) { + const t_tscalar& pkey = pkey_vec[idx]; + t_index row = bidx + idx; + std::pair::type::iterator, + t_zcdeltas::index::type::iterator> + iters = m_deltas->get().equal_range(pkey); + for (t_zcdeltas::index::type::iterator iter = iters.first; + iter != iters.second; ++iter) { + t_cellupd cellupd; + cellupd.row = row; + cellupd.column = iter->m_colidx; + cellupd.old_value = iter->m_old_value; + cellupd.new_value = iter->m_new_value; + rval.push_back(cellupd); + } + } + } else { + for (t_zcdeltas::index::type::iterator iter + = m_deltas->get().begin(); + iter != m_deltas->get().end(); ++iter) { + if (prev_pkey != iter->m_pkey) { + pkeys.insert(iter->m_pkey); + prev_pkey = iter->m_pkey; + } + } + + tsl::hopscotch_map r_indices; + m_traversal->get_row_indices(pkeys, r_indices); + + for (t_zcdeltas::index::type::iterator iter + = m_deltas->get().begin(); + iter != m_deltas->get().end(); ++iter) { + t_index row = r_indices[iter->m_pkey]; + if (bidx <= row && row <= eidx) { + t_cellupd cellupd; + cellupd.row = row; + cellupd.column = iter->m_colidx; + cellupd.old_value = iter->m_old_value; + cellupd.new_value = iter->m_new_value; + rval.push_back(cellupd); + } + } + } + return rval; +} + +/** + * @brief Returns updated cells. + * + * @param bidx + * @param eidx + * @return t_stepdelta + */ +t_stepdelta +t_ctx0::get_step_delta(t_index bidx, t_index eidx) { + bidx = std::min(bidx, m_traversal->size()); + eidx = std::min(eidx, m_traversal->size()); + bool rows_changed = m_rows_changed || !m_traversal->empty_sort_by(); + t_stepdelta rval(rows_changed, m_columns_changed, get_cell_delta(bidx, eidx)); + m_deltas->clear(); + clear_deltas(); + return rval; +} + +t_index +t_ctx0::get_row_count() const { + return m_traversal->size(); +} + +t_index +t_ctx0::get_column_count() const { + return m_config.get_num_columns(); +} + /** * @brief Mark a primary key as updated by adding it to the tracking set. @@ -616,9 +629,6 @@ t_ctx0::has_deltas() const { return m_has_delta; } -void -t_ctx0::pprint() const {} - t_dtype t_ctx0::get_column_dtype(t_uindex idx) const { if (idx >= static_cast(get_column_count())) @@ -711,4 +721,16 @@ t_ctx0::clear_deltas() { void t_ctx0::unity_init_load_step_end() {} + +std::string +t_ctx0::repr() const { + std::stringstream ss; + ss << "t_ctx0<" << this << ">"; + return ss.str(); +} + +void +t_ctx0::pprint() const {} + + } // end namespace perspective diff --git a/cpp/perspective/src/cpp/emscripten.cpp b/cpp/perspective/src/cpp/emscripten.cpp index 18a350c530..3a3e9952c1 100644 --- a/cpp/perspective/src/cpp/emscripten.cpp +++ b/cpp/perspective/src/cpp/emscripten.cpp @@ -1612,7 +1612,7 @@ namespace binding { t_totals total = sortspec.size() > 0 ? TOTALS_BEFORE : TOTALS_HIDDEN; auto cfg = t_config( - row_pivots, column_pivots, aggspecs, total, fterm, filter_op, computed_columns,column_only); + row_pivots, column_pivots, aggspecs, total, fterm, filter_op, computed_columns, column_only); auto ctx2 = std::make_shared(*(schema.get()), cfg); ctx2->init(); diff --git a/cpp/perspective/src/cpp/flat_traversal.cpp b/cpp/perspective/src/cpp/flat_traversal.cpp index 9525359f29..4f10b4202d 100644 --- a/cpp/perspective/src/cpp/flat_traversal.cpp +++ b/cpp/perspective/src/cpp/flat_traversal.cpp @@ -243,6 +243,7 @@ t_ftrav::step_begin() { void t_ftrav::step_end() { + // The new number of rows in this traversal t_index new_size = m_index->size() + m_step_inserts - m_step_deletes; auto new_index = std::make_shared>(); @@ -254,78 +255,56 @@ t_ftrav::step_end() { std::vector new_rows; new_rows.reserve(m_new_elems.size()); - std::cout << "pkeys in new_rows" << std::endl; - for ( tsl::hopscotch_map::const_iterator pkelem_iter = m_new_elems.begin(); pkelem_iter != m_new_elems.end(); ++pkelem_iter) { - std::cout << pkelem_iter->first << ", "; new_rows.push_back(pkelem_iter->second); } - std::cout << std::endl; - - for (const auto& sort : m_sortby) { - std::cout << "sort: " << sort.m_colname << std::endl; - } - + // TODO: int/float/date/datetime pkeys are already sorted here, so if + // there was a way to assert that `psp_pkey` is a string typed column, + // we can conditional the sort on whether m_sortby.size() > 0 or if + // psp_pkey is a string column. std::sort(new_rows.begin(), new_rows.end(), sorter); - std::cout << "pkeys in new_rows (sorted)" << std::endl; - for (const auto& r : new_rows) { - std::cout << r.m_pkey << ", "; - } - - std::cout << std::endl << "new_index" << std::endl; for (auto it = new_rows.begin(); it != new_rows.end(); ++it) { const t_mselem& new_elem = *it; + while (i < m_index->size()) { const t_mselem& old_elem = (*m_index)[i]; - std::cout << old_elem.m_pkey; + if (old_elem.m_deleted) { - std::cout << " deleted, "; i++; m_pkeyidx.erase(old_elem.m_pkey); } else if (old_elem.m_updated) { - std::cout << " updated, "; i++; } else if (sorter(old_elem, new_elem)) { - std::cout << " sorted, "; m_pkeyidx[old_elem.m_pkey] = new_index->size(); new_index->push_back(old_elem); i++; } else { - std::cout << " noop, "; break; } } m_pkeyidx[new_elem.m_pkey] = new_index->size(); - std::cout << new_elem.m_pkey << ": " << new_index->size() << ", "; new_index->push_back(new_elem); } - std::cout << std::endl; - std::cout << "i: " << i << std::endl; - std::cout << "reconciling new_index" << std::endl; - // reconcile old rows that are marked as removed or updated. while (i < m_index->size()) { const t_mselem& old_elem = (*m_index)[i++]; - std::cout << old_elem.m_pkey; + if (old_elem.m_deleted) { - std::cout << " deleted, "; m_pkeyidx.erase(old_elem.m_pkey); } else if (!old_elem.m_updated) { + // Add back cells that have not changed during this step. m_pkeyidx[old_elem.m_pkey] = new_index->size(); - std::cout << " not updated - new size: " << new_index->size() << ", "; new_index->push_back(old_elem); - } else { - std::cout << ", "; } } - std::cout << std::endl; + std::swap(new_index, m_index); m_new_elems.clear(); } diff --git a/cpp/perspective/src/cpp/view_config.cpp b/cpp/perspective/src/cpp/view_config.cpp index d17290f7bb..f9750146f1 100644 --- a/cpp/perspective/src/cpp/view_config.cpp +++ b/cpp/perspective/src/cpp/view_config.cpp @@ -281,19 +281,4 @@ t_view_config::get_aggregate_index(const std::string& column) const { return t_index(); } -bool -t_view_config::is_unit_config( - const std::vector& table_columns) const { - if (m_row_pivots.size() == 0 && - m_column_pivots.size() == 0 && - m_filter.size() == 0 && - m_sort.size() == 0 && - m_computed_columns.size() == 0 && - m_columns == table_columns) { - return true; - } else { - return false; - } -} - } // end namespace perspective \ No newline at end of file diff --git a/cpp/perspective/src/include/perspective/config.h b/cpp/perspective/src/include/perspective/config.h index f0a438494e..f57287e9d3 100644 --- a/cpp/perspective/src/include/perspective/config.h +++ b/cpp/perspective/src/include/perspective/config.h @@ -27,26 +27,6 @@ namespace perspective { */ class PERSPECTIVE_EXPORT t_config { public: - /** - * @brief Construct a config for a `View` object. Pivots are passed in as vectors of - * strings, which are converted to `t_pivot` objects. - * - * @param row_pivots - * @param column_pivots - * @param aggregates - * @param sortspecs - * @param col_sortspecs - * @param combiner - * @param fterms - * @param col_names - * @param column_only - */ - t_config(const std::vector& row_pivots, - const std::vector& column_pivots, const std::vector& aggregates, - const std::vector& sortspecs, const std::vector& col_sortspecs, - t_filter_op combiner, const std::vector& fterms, - const std::vector& col_names, bool column_only); - /** * @brief Construct a new config for a `t_ctx0` object. * @@ -129,6 +109,17 @@ class PERSPECTIVE_EXPORT t_config { const std::vector& sort_pivot, const std::vector& sort_pivot_by); + /** + * @brief A t_config is trivial if it does not have any pivots, sorts, + * filter terms, or computed columns. This allows a context_zero to + * skip creating a traversal and simply read from its gnode state for + * a performance boost. + * + * @return true + * @return false + */ + bool is_trivial_config(); + t_index get_colidx(const std::string& colname) const; std::string repr() const; @@ -192,28 +183,32 @@ class PERSPECTIVE_EXPORT t_config { void populate_sortby(const std::vector& pivots); private: + // Set at initialization and accessible through a public API. + std::vector m_detail_columns; std::vector m_row_pivots; std::vector m_col_pivots; - bool m_column_only; + std::vector m_aggregates; std::map m_sortby; std::vector m_sortspecs; std::vector m_col_sortspecs; - std::vector m_aggregates; - std::vector m_detail_columns; - t_totals m_totals; - std::map m_detail_colmap; - bool m_has_pkey_agg; - // t_uindex m_row_expand_depth; - // t_uindex m_col_expand_depth; std::vector m_fterms; - t_filter_op m_combiner; std::vector m_computed_columns; + t_filter_op m_combiner; + bool m_column_only; + + // A trivial config exists if there are no pivots, sorts, filters, or + // computed columns. + bool m_is_trivial_config; + + // Internal + t_totals m_totals; + std::map m_detail_colmap; std::string m_parent_pkey_column; std::string m_child_pkey_column; std::string m_grouping_label_column; - t_fmode m_fmode; - std::vector m_filter_exprs; std::string m_grand_agg_str; + t_fmode m_fmode; + bool m_has_pkey_agg; }; } // end namespace perspective diff --git a/cpp/perspective/src/include/perspective/context_unit.h b/cpp/perspective/src/include/perspective/context_unit.h index 6d338c6f91..53042188ab 100644 --- a/cpp/perspective/src/include/perspective/context_unit.h +++ b/cpp/perspective/src/include/perspective/context_unit.h @@ -41,10 +41,6 @@ class PERSPECTIVE_EXPORT t_ctxunit : public t_ctxbase { std::vector get_data(const std::vector& pkeys) const; - void sort_by(const std::vector& sortby); - - void reset_sortby(); - // will only work on empty contexts void notify(const t_data_table& flattened); @@ -68,17 +64,12 @@ class PERSPECTIVE_EXPORT t_ctxunit : public t_ctxbase { std::vector get_pkeys(const std::vector>& cells) const; - std::vector get_cell_data( - const std::vector>& cells) const; - t_stepdelta get_step_delta(t_index bidx, t_index eidx); t_rowdelta get_row_delta(); std::vector get_rows_changed(); - std::vector get_cell_delta(t_index bidx, t_index eidx) const; - void clear_deltas(); void reset_step_state(); @@ -87,8 +78,6 @@ class PERSPECTIVE_EXPORT t_ctxunit : public t_ctxbase { void enable(); - std::vector get_trees(); - bool has_deltas() const; void pprint() const; @@ -114,10 +103,8 @@ class PERSPECTIVE_EXPORT t_ctxunit : public t_ctxbase { std::string unity_get_column_display_name(t_uindex idx) const; t_uindex unity_get_column_count() const; t_uindex unity_get_row_count() const; - t_data_table unity_get_table() const; bool unity_get_row_expanded(t_uindex idx) const; bool unity_get_column_expanded(t_uindex idx) const; - void unity_init_load_step_end(); protected: void add_delta_pkey(t_tscalar pkey); @@ -138,9 +125,6 @@ class PERSPECTIVE_EXPORT t_ctxunit : public t_ctxbase { */ tsl::hopscotch_set m_delta_pkeys; - // A mapping of integer row indices to scalar primary keys. - tsl::hopscotch_map m_pkey_index; - t_symtable m_symtable; bool m_has_delta; }; diff --git a/cpp/perspective/src/include/perspective/view_config.h b/cpp/perspective/src/include/perspective/view_config.h index 8bc4f76419..d866a0d548 100644 --- a/cpp/perspective/src/include/perspective/view_config.h +++ b/cpp/perspective/src/include/perspective/view_config.h @@ -92,19 +92,6 @@ class PERSPECTIVE_EXPORT t_view_config { bool is_column_only() const; - /** - * @brief Whether this config can be used for a `t_ctxunit` - the config - * must have no pivots, sorts, filters, or computed columns, and the - * ordering of columns must be equal to `table_columns`. - * - * @param table_columns a vector of column names from the `Table` for - * comparison. - * - * @return true - * @return false - */ - bool is_unit_config(const std::vector& table_columns) const; - std::int32_t get_row_pivot_depth() const; std::int32_t get_column_pivot_depth() const; diff --git a/packages/perspective-bench/bench/perspective.benchmark.js b/packages/perspective-bench/bench/perspective.benchmark.js index d9b4d1f6b1..bf0eb44945 100644 --- a/packages/perspective-bench/bench/perspective.benchmark.js +++ b/packages/perspective-bench/bench/perspective.benchmark.js @@ -322,42 +322,6 @@ describe("View", async () => { await table.delete(); }); - describe("mixed", async () => { - describe("ctx_unit", async () => { - let view; - - afterEach(async () => { - await view.delete(); - }); - - benchmark(`view`, async () => { - view = table.view(); - await view.schema(); - }); - }); - }); - - describe("mixed", async () => { - describe("ctx_unit", async () => { - let view; - - beforeAll(async () => { - view = table.view(); - await view.schema(); - }); - - afterAll(async () => { - await view.delete(); - }); - - for (const format of ["json", "columns", "arrow"]) { - benchmark(format, async () => { - await view[`to_${format}`](); - }); - } - }); - }); - for (const aggregate of AGG_OPTIONS) { for (const row_pivot of ROW_PIVOT_OPTIONS) { for (const column_pivot of COLUMN_PIVOT_OPTIONS) { diff --git a/packages/perspective/src/js/perspective.js b/packages/perspective/src/js/perspective.js index 2de2d92f07..c1f4774ce8 100644 --- a/packages/perspective/src/js/perspective.js +++ b/packages/perspective/src/js/perspective.js @@ -147,14 +147,9 @@ export default function(Module) { this.config = config || {}; this.view_config = view_config || new view_config(); - const num_columns = this.table.columns().length; - - // TODO: verify that a different order of the same num_cols as the - // table is valid as a unit context. this.is_unit_config = this.table.index === "" && sides === 0 && - this.view_config.columns.length === num_columns && this.view_config.row_pivots.length === 0 && this.view_config.column_pivots.length === 0 && this.view_config.filter.length === 0 && diff --git a/packages/perspective/test/js/delta.js b/packages/perspective/test/js/delta.js index 5816f61997..e8c630c29c 100644 --- a/packages/perspective/test/js/delta.js +++ b/packages/perspective/test/js/delta.js @@ -398,6 +398,133 @@ module.exports = perspective => { }); }); + describe("0-sided row delta, column order subset", function() { + it("returns changed rows", async function(done) { + let table = perspective.table(data, {index: "x"}); + let view = table.view({ + columns: ["y"] + }); + view.on_update( + async function(updated) { + const expected = [{y: "string1"}, {y: "string2"}]; + await match_delta(perspective, updated.delta, expected); + view.delete(); + table.delete(); + done(); + }, + {mode: "row"} + ); + table.update(partial_change_y); + }); + + it("returns changed rows from schema", async function(done) { + let table = perspective.table( + { + x: "integer", + y: "string", + z: "boolean" + }, + {index: "x"} + ); + let view = table.view({ + columns: ["z"] + }); + view.on_update( + async function(updated) { + const expected = [{z: false}, {z: false}, {z: true}]; + await match_delta(perspective, updated.delta, expected); + view.delete(); + table.delete(); + done(); + }, + {mode: "row"} + ); + table.update([ + {x: 1, y: "a", z: true}, + {x: 2, y: "b", z: false}, + {x: 3, y: "c", z: true}, + {x: 1, y: "d", z: false} + ]); + }); + + it("returns added rows", async function(done) { + let table = perspective.table(data); + let view = table.view({ + columns: ["y"] + }); + view.on_update( + async function(updated) { + const expected = [{y: "string1"}, {y: "string2"}]; + await match_delta(perspective, updated.delta, expected); + view.delete(); + table.delete(); + done(); + }, + {mode: "row"} + ); + table.update(partial_change_y); + }); + + it("returns added rows from schema", async function(done) { + let table = perspective.table({ + x: "integer", + y: "string", + z: "boolean" + }); + let view = table.view({ + columns: ["z"] + }); + view.on_update( + async function(updated) { + await match_delta(perspective, updated.delta, [{z: true}, {z: false}, {z: true}, {z: false}]); + view.delete(); + table.delete(); + done(); + }, + {mode: "row"} + ); + table.update(data); + }); + + it("returns deleted rows", async function(done) { + let table = perspective.table(data, {index: "x"}); + let view = table.view({ + columns: ["y"] + }); + view.on_update( + async function(updated) { + const expected = [{y: null}, {y: null}]; + await match_delta(perspective, updated.delta, expected); + view.delete(); + table.delete(); + done(); + }, + {mode: "row"} + ); + table.update([ + {x: 1, y: null}, + {x: 4, y: null} + ]); + }); + + it("returns changed rows in non-sequential update", async function(done) { + let table = perspective.table(data, {index: "x"}); + let view = table.view({ + columns: ["y"] + }); + view.on_update( + async function(updated) { + await match_delta(perspective, updated.delta, [{y: "string1"}, {y: "string2"}]); + view.delete(); + table.delete(); + done(); + }, + {mode: "row"} + ); + table.update(partial_change_nonseq); + }); + }); + describe("1-sided row delta", function() { it("returns changed rows", async function(done) { let table = perspective.table(data, {index: "x"}); diff --git a/packages/perspective/test/js/to_format.js b/packages/perspective/test/js/to_format.js index 81361488cb..2a4f946a4e 100644 --- a/packages/perspective/test/js/to_format.js +++ b/packages/perspective/test/js/to_format.js @@ -822,6 +822,262 @@ module.exports = perspective => { }); }); + describe("0-sided column subset", function() { + it("should return correct pkey for unindexed table", async function() { + let table = perspective.table(int_float_string_data); + let view = table.view({ + columns: ["int", "datetime"] + }); + let json = await view.to_json({ + start_row: 0, + end_row: 1, + start_col: 1, + end_col: 2, + index: true + }); + expect(json).toEqual([{datetime: int_float_string_data[0]["datetime"].getTime(), __INDEX__: [0]}]); + view.delete(); + table.delete(); + }); + + it("should return correct pkey for float indexed table", async function() { + let table = perspective.table(int_float_string_data, {index: "float"}); + let view = table.view({ + columns: ["float", "int"] + }); + let json = await view.to_json({ + start_row: 0, + end_row: 1, + start_col: 1, + end_col: 2, + index: true + }); + expect(json).toEqual([{int: 1, __INDEX__: [2.25]}]); + view.delete(); + table.delete(); + }); + + it("should return correct pkey for string indexed table", async function() { + let table = perspective.table(int_float_string_data, {index: "string"}); + let view = table.view({ + columns: ["string", "datetime"] + }); + let json = await view.to_json({ + start_row: 0, + end_row: 1, + start_col: 1, + end_col: 2, + index: true + }); + expect(json).toEqual([{datetime: int_float_string_data[0]["datetime"].getTime(), __INDEX__: ["a"]}]); + view.delete(); + table.delete(); + }); + + it("should return correct pkey for date indexed table", async function() { + // default data generates the same datetime for each row, thus pkeys get collapsed + const data = [ + {int: 1, datetime: new Date()}, + {int: 2, datetime: new Date()} + ]; + data[1].datetime.setDate(data[1].datetime.getDate() + 1); + let table = perspective.table(data, {index: "datetime"}); + let view = table.view({ + columns: ["int"] + }); + let json = await view.to_json({ + start_row: 1, + end_row: 2, + index: true + }); + expect(json).toEqual([{int: 2, __INDEX__: [data[1].datetime.getTime()]}]); + view.delete(); + table.delete(); + }); + + it("should return correct pkey for all rows + columns on an unindexed table", async function() { + let table = perspective.table(int_float_string_data); + let view = table.view({ + columns: ["int"] + }); + let json = await view.to_json({ + index: true + }); + + for (let i = 0; i < json.length; i++) { + expect(json[i].__INDEX__).toEqual([i]); + } + view.delete(); + table.delete(); + }); + + it("should return correct pkey for all rows + columns on an indexed table", async function() { + let table = perspective.table(int_float_string_data, {index: "string"}); + let view = table.view(); + let json = await view.to_json({ + index: true + }); + + let pkeys = ["a", "b", "c", "d"]; + for (let i = 0; i < json.length; i++) { + expect(json[i].__INDEX__).toEqual([pkeys[i]]); + } + view.delete(); + table.delete(); + }); + }); + + describe("0-sided column subset invalid bounds", function() { + it("should return correct pkey for unindexed table, invalid column", async function() { + let table = perspective.table(int_float_string_data); + let view = table.view({ + columns: ["int"] + }); + let json = await view.to_json({ + start_row: 0, + end_row: 1, + start_col: 1, + end_col: 2, + index: true + }); + expect(json).toEqual([{__INDEX__: [0]}]); + view.delete(); + table.delete(); + }); + + it("should not return pkey for unindexed table, invalid row", async function() { + let table = perspective.table(int_float_string_data); + let view = table.view({ + columns: ["int"] + }); + let json = await view.to_json({ + start_row: 10, + end_row: 15, + index: true + }); + expect(json).toEqual([]); + view.delete(); + table.delete(); + }); + + it("should return correct pkey for float indexed table, invalid column", async function() { + let table = perspective.table(int_float_string_data, {index: "float"}); + let view = table.view({ + columns: ["float"] + }); + let json = await view.to_json({ + start_row: 0, + end_row: 1, + start_col: 1, + end_col: 2, + index: true + }); + expect(json).toEqual([{__INDEX__: [2.25]}]); + view.delete(); + table.delete(); + }); + + it("should not return pkey for float indexed table, invalid row", async function() { + let table = perspective.table(int_float_string_data, {index: "float"}); + let view = table.view({ + columns: ["float"] + }); + let json = await view.to_json({ + start_row: 10, + end_row: 15, + start_col: 1, + end_col: 2, + index: true + }); + expect(json).toEqual([]); + view.delete(); + table.delete(); + }); + + it("should return correct pkey for string indexed table, invalid column", async function() { + let table = perspective.table(int_float_string_data, {index: "string"}); + let view = table.view({ + columns: ["string"] + }); + let json = await view.to_json({ + start_row: 0, + end_row: 1, + start_col: 1, + end_col: 2, + index: true + }); + expect(json).toEqual([{__INDEX__: ["a"]}]); + view.delete(); + table.delete(); + }); + + it("should not return pkey for string indexed table, invalid row", async function() { + let table = perspective.table(int_float_string_data, {index: "string"}); + let view = table.view({ + columns: ["string"] + }); + let json = await view.to_json({ + start_row: 10, + end_row: 11, + start_col: 1, + end_col: 2, + index: true + }); + expect(json).toEqual([]); + view.delete(); + table.delete(); + }); + + it("should return correct pkey for date indexed table, invalid column", async function() { + // default data generates the same datetime for each row, thus pkeys get collapsed + const data = [ + {int: 1, datetime: new Date()}, + {int: 2, datetime: new Date()} + ]; + data[1].datetime.setDate(data[1].datetime.getDate() + 1); + let table = perspective.table(data, {index: "datetime"}); + let view = table.view({ + columns: ["int"] + }); + let json = await view.to_json({ + start_col: 1, + start_col: 2, + index: true + }); + expect(json).toEqual([ + { + __INDEX__: [data[0].datetime.getTime()] + }, + { + __INDEX__: [data[1].datetime.getTime()] + } + ]); + view.delete(); + table.delete(); + }); + + it("should not return pkey for date indexed table, invalid row", async function() { + // default data generates the same datetime for each row, thus pkeys get collapsed + const data = [ + {int: 1, datetime: new Date()}, + {int: 2, datetime: new Date()} + ]; + data[1].datetime.setDate(data[1].datetime.getDate() + 1); + let table = perspective.table(data, {index: "datetime"}); + let view = table.view({ + columns: ["int"] + }); + let json = await view.to_json({ + start_row: 11, + start_row: 12, + index: true + }); + expect(json).toEqual([]); + view.delete(); + table.delete(); + }); + }); + describe("0-sided sorted", function() { it("should return correct pkey for unindexed table", async function() { let table = perspective.table(int_float_string_data); diff --git a/packages/perspective/test/js/updates.js b/packages/perspective/test/js/updates.js index 49743d4b2c..961041f75f 100644 --- a/packages/perspective/test/js/updates.js +++ b/packages/perspective/test/js/updates.js @@ -445,6 +445,37 @@ module.exports = perspective => { table.delete(); }); + it("arrow dict contructor then arrow dict `update()` subset of columns", async function() { + var table = perspective.table(arrows.dict_arrow.slice()); + table.update(arrows.dict_update_arrow.slice()); + var view = table.view({ + columns: ["a"] + }); + let result = await view.to_columns(); + expect(result).toEqual({ + a: ["abc", "def", "def", null, "abc", null, "update1", "update2"] + }); + view.delete(); + table.delete(); + }); + + it("non-arrow constructor then arrow dict `update()`, subset of columns", async function() { + let table = perspective.table({ + a: ["a", "b", "c"], + b: ["d", "e", "f"] + }); + let view = table.view({ + columns: ["a"] + }); + table.update(arrows.dict_update_arrow.slice()); + let result = await view.to_columns(); + expect(result).toEqual({ + a: ["a", "b", "c", null, "update1", "update2"] + }); + view.delete(); + table.delete(); + }); + it("arrow partial `update()` a single column", async function() { let table = perspective.table(arrows.test_arrow.slice(), {index: "i64"}); table.update(arrows.partial_arrow.slice()); From d7854f89bbc291c81910353830cd2143bfe30f3e Mon Sep 17 00:00:00 2001 From: Jun Tan Date: Fri, 16 Oct 2020 15:01:28 -0400 Subject: [PATCH 4/5] Implement unit context in Python, test and fix Windows build Implement unit context in python add more python tests, make get_row_expanded return bool fix windows build --- cpp/perspective/src/cpp/context_unit.cpp | 2 + cpp/perspective/src/cpp/emscripten.cpp | 1 - cpp/perspective/src/cpp/view.cpp | 8 +- .../src/include/perspective/first.h | 4 +- .../src/include/perspective/view.h | 2 +- packages/perspective/src/js/perspective.js | 12 +- .../perspective/include/perspective/python.h | 40 + .../include/perspective/python/context.h | 4 + .../perspective/python/serialization.h | 5 + .../include/perspective/python/view.h | 21 + .../perspective/perspective/src/context.cpp | 26 + .../perspective/src/serialization.cpp | 17 + python/perspective/perspective/src/view.cpp | 46 +- .../perspective/table/_data_formatter.py | 29 +- python/perspective/perspective/table/view.py | 37 +- .../perspective/tests/table/test_update.py | 57 +- .../perspective/tests/table/test_view.py | 75 ++ .../tests/table/test_view_computed.py | 991 ++++++++++-------- 18 files changed, 903 insertions(+), 474 deletions(-) diff --git a/cpp/perspective/src/cpp/context_unit.cpp b/cpp/perspective/src/cpp/context_unit.cpp index 1edf41d24f..c8894ef7d6 100644 --- a/cpp/perspective/src/cpp/context_unit.cpp +++ b/cpp/perspective/src/cpp/context_unit.cpp @@ -151,6 +151,8 @@ t_ctxunit::get_data( std::vector out_data(num_rows); + // Read directly from the row indices on the table - they will + // always correspond exactly. m_gstate->read_column(colname, start_row, end_row, out_data); for (t_index ridx = ext.m_srow; ridx < ext.m_erow; ++ridx) { diff --git a/cpp/perspective/src/cpp/emscripten.cpp b/cpp/perspective/src/cpp/emscripten.cpp index 3a3e9952c1..8a6f6773f3 100644 --- a/cpp/perspective/src/cpp/emscripten.cpp +++ b/cpp/perspective/src/cpp/emscripten.cpp @@ -1518,7 +1518,6 @@ namespace binding { auto columns = view_config->get_columns(); auto filter_op = view_config->get_filter_op(); auto fterm = view_config->get_fterm(); - auto sortspec = view_config->get_sortspec(); auto computed_columns = view_config->get_computed_columns(); auto cfg = t_config(columns, fterm, filter_op, computed_columns); diff --git a/cpp/perspective/src/cpp/view.cpp b/cpp/perspective/src/cpp/view.cpp index db2f6d31a8..743ad0a1ab 100644 --- a/cpp/perspective/src/cpp/view.cpp +++ b/cpp/perspective/src/cpp/view.cpp @@ -687,7 +687,7 @@ View::_set_deltas_enabled(bool enabled_state) { // Pivot table operations template -std::int32_t +bool View::get_row_expanded(std::int32_t ridx) const { return m_ctx->unity_get_row_expanded(ridx); } @@ -720,6 +720,12 @@ View::expand(std::int32_t ridx, std::int32_t row_pivot_length) { } } +template <> +t_index +View::collapse(std::int32_t ridx) { + return ridx; +} + template <> t_index View::collapse(std::int32_t ridx) { diff --git a/cpp/perspective/src/include/perspective/first.h b/cpp/perspective/src/include/perspective/first.h index d3a566b499..1711f5bf37 100644 --- a/cpp/perspective/src/include/perspective/first.h +++ b/cpp/perspective/src/include/perspective/first.h @@ -23,8 +23,8 @@ #ifdef WIN32 #ifndef NOMINMAX #define NOMINMAX -#endif // ifndex nominmax -#endif // win32 +#endif // NOMINMAX +#endif // WIN32 #ifdef PSP_VERIFY #define PSP_STORAGE_VERIFY diff --git a/cpp/perspective/src/include/perspective/view.h b/cpp/perspective/src/include/perspective/view.h index 4b2abf3923..efc83be32f 100644 --- a/cpp/perspective/src/include/perspective/view.h +++ b/cpp/perspective/src/include/perspective/view.h @@ -181,7 +181,7 @@ class PERSPECTIVE_EXPORT View { * @param ridx * @return std::int32_t */ - std::int32_t get_row_expanded(std::int32_t ridx) const; + bool get_row_expanded(std::int32_t ridx) const; /** * @brief Expands the row at "ridx". diff --git a/packages/perspective/src/js/perspective.js b/packages/perspective/src/js/perspective.js index c1f4774ce8..749b1bed60 100644 --- a/packages/perspective/src/js/perspective.js +++ b/packages/perspective/src/js/perspective.js @@ -147,7 +147,7 @@ export default function(Module) { this.config = config || {}; this.view_config = view_config || new view_config(); - this.is_unit_config = + this.is_unit_context = this.table.index === "" && sides === 0 && this.view_config.row_pivots.length === 0 && @@ -156,7 +156,7 @@ export default function(Module) { this.view_config.sort.length === 0 && this.view_config.computed_columns.length === 0; - if (this.is_unit_config) { + if (this.is_unit_context) { this._View = __MODULE__.make_view_unit(table._Table, name, defaults.COLUMN_SEPARATOR_STRING, this.view_config, null); } else if (sides === 0) { this._View = __MODULE__.make_view_zero(table._Table, name, defaults.COLUMN_SEPARATOR_STRING, this.view_config, null); @@ -357,7 +357,7 @@ export default function(Module) { }; view.prototype.get_data_slice = function(start_row, end_row, start_col, end_col) { - if (this.is_unit_config) { + if (this.is_unit_context) { return __MODULE__.get_data_slice_unit(this._View, start_row, end_row, start_col, end_col); } else { const num_sides = this.sides(); @@ -424,7 +424,7 @@ export default function(Module) { let get_from_data_slice; - if (this.is_unit_config) { + if (this.is_unit_context) { get_from_data_slice = __MODULE__.get_from_data_slice_unit; } else { get_from_data_slice = __MODULE__[`get_from_data_slice_${nidx}`]; @@ -701,7 +701,7 @@ export default function(Module) { const end_col = options.end_col; const sides = this.sides(); - if (this.is_unit_config) { + if (this.is_unit_context) { return __MODULE__.to_arrow_unit(this._View, start_row, end_row, start_col, end_col); } else if (sides === 0) { return __MODULE__.to_arrow_zero(this._View, start_row, end_row, start_col, end_col); @@ -819,7 +819,7 @@ export default function(Module) { * @private */ view.prototype._get_row_delta = async function() { - if (this.is_unit_config) { + if (this.is_unit_context) { return __MODULE__.get_row_delta_unit(this._View); } else { const sides = this.sides(); diff --git a/python/perspective/perspective/include/perspective/python.h b/python/perspective/perspective/include/perspective/python.h index beeb61ce90..01db1e21e9 100644 --- a/python/perspective/perspective/include/perspective/python.h +++ b/python/perspective/perspective/include/perspective/python.h @@ -87,6 +87,28 @@ PYBIND11_MODULE(libbinding, m) * View */ // Bind a View for each context type + py::class_, std::shared_ptr>>(m, "View_ctxunit") + .def(py::init, std::shared_ptr, std::string, std::string, + std::shared_ptr>()) + .def("sides", &View::sides) + .def("num_rows", &View::num_rows) + .def("num_columns", &View::num_columns) + .def("get_row_expanded", &View::get_row_expanded) + .def("schema", &View::schema) + .def("computed_schema", &View::computed_schema) + .def("column_names", &View::column_names) + .def("column_paths", &View::column_paths) + .def("_get_deltas_enabled", &View::_get_deltas_enabled) + .def("_set_deltas_enabled", &View::_set_deltas_enabled) + .def("get_context", &View::get_context) + .def("get_row_pivots", &View::get_row_pivots) + .def("get_column_pivots", &View::get_column_pivots) + .def("get_aggregates", &View::get_aggregates) + .def("get_filter", &View::get_filter) + .def("get_sort", &View::get_sort) + .def("get_step_delta", &View::get_step_delta) + .def("get_column_dtype", &View::get_column_dtype) + .def("is_column_only", &View::is_column_only); py::class_, std::shared_ptr>>(m, "View_ctx0") .def(py::init, std::shared_ptr, std::string, std::string, @@ -209,6 +231,12 @@ PYBIND11_MODULE(libbinding, m) * * t_data_slice */ + py::class_, std::shared_ptr>>(m, "t_data_slice_ctxunit") + .def("get_column_slice", &t_data_slice::get_column_slice) + .def("get_slice", &t_data_slice::get_slice) + .def("get_column_names", &t_data_slice::get_column_names) + .def("get_pkeys", &t_data_slice::get_pkeys); + py::class_, std::shared_ptr>>(m, "t_data_slice_ctx0") .def("get_column_slice", &t_data_slice::get_column_slice) .def("get_slice", &t_data_slice::get_slice) @@ -229,6 +257,12 @@ PYBIND11_MODULE(libbinding, m) .def("get_row_path", &t_data_slice::get_row_path) .def("get_pkeys", &t_data_slice::get_pkeys); + /****************************************************************************** + * + * t_ctxunit + */ + py::class_(m, "t_ctxunit"); + /****************************************************************************** * * t_ctx0 @@ -362,9 +396,13 @@ PYBIND11_MODULE(libbinding, m) */ m.def("str_to_filter_op", &str_to_filter_op); m.def("make_table", &make_table_py); + m.def("make_view_unit", &make_view_unit); m.def("make_view_zero", &make_view_ctx0); m.def("make_view_one", &make_view_ctx1); m.def("make_view_two", &make_view_ctx2); + m.def("get_data_slice_unit", &get_data_slice_unit); + m.def("get_from_data_slice_unit", &get_from_data_slice_unit); + m.def("get_pkeys_from_data_slice_unit", &get_pkeys_from_data_slice_unit); m.def("get_data_slice_zero", &get_data_slice_ctx0); m.def("get_from_data_slice_zero", &get_from_data_slice_ctx0); m.def("get_pkeys_from_data_slice_zero", &get_pkeys_from_data_slice_ctx0); @@ -374,9 +412,11 @@ PYBIND11_MODULE(libbinding, m) m.def("get_data_slice_two", &get_data_slice_ctx2); m.def("get_from_data_slice_two", &get_from_data_slice_ctx2); m.def("get_pkeys_from_data_slice_two", &get_pkeys_from_data_slice_ctx2); + m.def("to_arrow_unit", &to_arrow_unit); m.def("to_arrow_zero", &to_arrow_zero); m.def("to_arrow_one", &to_arrow_one); m.def("to_arrow_two", &to_arrow_two); + m.def("get_row_delta_unit", &get_row_delta_unit); m.def("get_row_delta_zero", &get_row_delta_zero); m.def("get_row_delta_one", &get_row_delta_one); m.def("get_row_delta_two", &get_row_delta_two); diff --git a/python/perspective/perspective/include/perspective/python/context.h b/python/perspective/perspective/include/perspective/python/context.h index 7a3a5160a7..b8574eb3f4 100644 --- a/python/perspective/perspective/include/perspective/python/context.h +++ b/python/perspective/perspective/include/perspective/python/context.h @@ -20,6 +20,10 @@ namespace binding { * * Context API */ +template <> +std::shared_ptr +make_context(std::shared_ptr
table, std::shared_ptr schema, std::shared_ptr view_config, const std::string& name); + template <> std::shared_ptr make_context(std::shared_ptr
table, std::shared_ptr schema, std::shared_ptr view_config, const std::string& name); diff --git a/python/perspective/perspective/include/perspective/python/serialization.h b/python/perspective/perspective/include/perspective/python/serialization.h index baf54466dd..d71854c63e 100644 --- a/python/perspective/perspective/include/perspective/python/serialization.h +++ b/python/perspective/perspective/include/perspective/python/serialization.h @@ -28,6 +28,9 @@ t_val get_column_data(std::shared_ptr table, const std::string& co template std::shared_ptr> get_data_slice(std::shared_ptr> view, std::uint32_t start_row, std::uint32_t end_row, std::uint32_t start_col, std::uint32_t end_col); +std::shared_ptr> +get_data_slice_unit(std::shared_ptr> view, std::uint32_t start_row, std::uint32_t end_row, std::uint32_t start_col, std::uint32_t end_col); + std::shared_ptr> get_data_slice_ctx0(std::shared_ptr> view, std::uint32_t start_row, std::uint32_t end_row, std::uint32_t start_col, std::uint32_t end_col); @@ -39,6 +42,7 @@ get_data_slice_ctx2(std::shared_ptr> view, std::uint32_t start_row, template t_val get_from_data_slice(std::shared_ptr> data_slice, t_uindex ridx, t_uindex cidx); +t_val get_from_data_slice_unit(std::shared_ptr> data_slice, t_uindex ridx, t_uindex cidx); t_val get_from_data_slice_ctx0(std::shared_ptr> data_slice, t_uindex ridx, t_uindex cidx); t_val get_from_data_slice_ctx1(std::shared_ptr> data_slice, t_uindex ridx, t_uindex cidx); t_val get_from_data_slice_ctx2(std::shared_ptr> data_slice, t_uindex ridx, t_uindex cidx); @@ -46,6 +50,7 @@ t_val get_from_data_slice_ctx2(std::shared_ptr> data_slice, // wrap `get_pkeys` in order to convert t_scalar to t_val entirely within c++ template std::vector get_pkeys_from_data_slice(std::shared_ptr> data_slice, t_uindex ridx, t_uindex cidx); +std::vector get_pkeys_from_data_slice_unit(std::shared_ptr> data_slice, t_uindex ridx, t_uindex cidx); std::vector get_pkeys_from_data_slice_ctx0(std::shared_ptr> data_slice, t_uindex ridx, t_uindex cidx); std::vector get_pkeys_from_data_slice_ctx1(std::shared_ptr> data_slice, t_uindex ridx, t_uindex cidx); std::vector get_pkeys_from_data_slice_ctx2(std::shared_ptr> data_slice, t_uindex ridx, t_uindex cidx); diff --git a/python/perspective/perspective/include/perspective/python/view.h b/python/perspective/perspective/include/perspective/python/view.h index e06dfb2eac..72269caccd 100644 --- a/python/perspective/perspective/include/perspective/python/view.h +++ b/python/perspective/perspective/include/perspective/python/view.h @@ -40,10 +40,30 @@ std::shared_ptr make_view_config(std::shared_ptr schema template std::shared_ptr> make_view(std::shared_ptr
table, const std::string& name, const std::string& separator, t_val view_config, t_val date_parser); +/** + * Unlike Emscripten, where we can define templated headers in the Embind + * declaration, we need to explicitly specify all templated functions + * before they are used by Pybind. + */ +std::shared_ptr> make_view_unit(std::shared_ptr
table, std::string name, std::string separator, t_val view_config, t_val date_parser); std::shared_ptr> make_view_ctx0(std::shared_ptr
table, std::string name, std::string separator, t_val view_config, t_val date_parser); std::shared_ptr> make_view_ctx1(std::shared_ptr
table, std::string name, std::string separator, t_val view_config, t_val date_parser); std::shared_ptr> make_view_ctx2(std::shared_ptr
table, std::string name, std::string separator, t_val view_config, t_val date_parser); +py::bytes to_arrow_unit( + std::shared_ptr> view, + std::int32_t start_row, + std::int32_t end_row, + std::int32_t start_col, + std::int32_t end_col); + +py::bytes to_arrow_zero( + std::shared_ptr> view, + std::int32_t start_row, + std::int32_t end_row, + std::int32_t start_col, + std::int32_t end_col); + py::bytes to_arrow_zero( std::shared_ptr> view, std::int32_t start_row, @@ -65,6 +85,7 @@ py::bytes to_arrow_two( std::int32_t start_col, std::int32_t end_col); +py::bytes get_row_delta_unit(std::shared_ptr> view); py::bytes get_row_delta_zero(std::shared_ptr> view); py::bytes get_row_delta_one(std::shared_ptr> view); py::bytes get_row_delta_two(std::shared_ptr> view); diff --git a/python/perspective/perspective/src/context.cpp b/python/perspective/perspective/src/context.cpp index 2ff010902a..12a4d4ad2c 100644 --- a/python/perspective/perspective/src/context.cpp +++ b/python/perspective/perspective/src/context.cpp @@ -21,6 +21,32 @@ namespace binding { * Context API */ +template <> +std::shared_ptr +make_context(std::shared_ptr
table, std::shared_ptr schema, + std::shared_ptr view_config, const std::string& name) { + auto columns = view_config->get_columns(); + auto filter_op = view_config->get_filter_op(); + auto fterm = view_config->get_fterm(); + auto computed_columns = view_config->get_computed_columns(); + + auto cfg = t_config(columns, fterm, filter_op, computed_columns); + auto ctx_unit = std::make_shared(*(schema.get()), cfg); + ctx_unit->init(); + + auto pool = table->get_pool(); + auto gnode = table->get_gnode(); + + pool->register_context( + gnode->get_id(), + name, + UNIT_CONTEXT, + reinterpret_cast(ctx_unit.get())); + + return ctx_unit; +} + + template <> std::shared_ptr make_context(std::shared_ptr
table, std::shared_ptr schema, diff --git a/python/perspective/perspective/src/serialization.cpp b/python/perspective/perspective/src/serialization.cpp index 8cd8a6c462..92fb36481b 100644 --- a/python/perspective/perspective/src/serialization.cpp +++ b/python/perspective/perspective/src/serialization.cpp @@ -30,6 +30,12 @@ get_data_slice(std::shared_ptr> view, std::uint32_t start_row, return data_slice; } +std::shared_ptr> +get_data_slice_unit(std::shared_ptr> view, std::uint32_t start_row, + std::uint32_t end_row, std::uint32_t start_col, std::uint32_t end_col) { + return get_data_slice(view, start_row, end_row, start_col, end_col); +} + std::shared_ptr> get_data_slice_ctx0(std::shared_ptr> view, std::uint32_t start_row, std::uint32_t end_row, std::uint32_t start_col, std::uint32_t end_col) { @@ -56,6 +62,12 @@ get_from_data_slice( return scalar_to_py(d); } +t_val +get_from_data_slice_unit( + std::shared_ptr> data_slice, t_uindex ridx, t_uindex cidx) { + return get_from_data_slice(data_slice, ridx, cidx); +} + t_val get_from_data_slice_ctx0( std::shared_ptr> data_slice, t_uindex ridx, t_uindex cidx) { @@ -87,6 +99,11 @@ get_pkeys_from_data_slice(std::shared_ptr> data_slice, t_uin return rval; } +std::vector +get_pkeys_from_data_slice_unit(std::shared_ptr> data_slice, t_uindex ridx, t_uindex cidx) { + return get_pkeys_from_data_slice(data_slice, ridx, cidx); +} + std::vector get_pkeys_from_data_slice_ctx0(std::shared_ptr> data_slice, t_uindex ridx, t_uindex cidx) { return get_pkeys_from_data_slice(data_slice, ridx, cidx); diff --git a/python/perspective/perspective/src/view.cpp b/python/perspective/perspective/src/view.cpp index 2c6feb8fd8..34730c4e79 100644 --- a/python/perspective/perspective/src/view.cpp +++ b/python/perspective/perspective/src/view.cpp @@ -239,6 +239,11 @@ make_view_config(std::shared_ptr schema, t_val date_parser, t_val conf return view_config; } +/****************************************************************************** + * + * make_view + */ + template std::shared_ptr> make_view(std::shared_ptr
table, const std::string& name, const std::string& separator, @@ -253,6 +258,12 @@ make_view(std::shared_ptr
table, const std::string& name, const std::stri } } +std::shared_ptr> +make_view_unit(std::shared_ptr
table, std::string name, std::string separator, + t_val view_config, t_val date_parser) { + return make_view(table, name, separator, view_config, date_parser); +} + std::shared_ptr> make_view_ctx0(std::shared_ptr
table, std::string name, std::string separator, t_val view_config, t_val date_parser) { @@ -271,6 +282,25 @@ make_view_ctx2(std::shared_ptr
table, std::string name, std::string separ return make_view(table, name, separator, view_config, date_parser); } +/****************************************************************************** + * + * to_arrow + */ + +py::bytes +to_arrow_unit( + std::shared_ptr> view, + std::int32_t start_row, + std::int32_t end_row, + std::int32_t start_col, + std::int32_t end_col +) { + PerspectiveScopedGILRelease acquire(view->get_event_loop_thread_id()); + std::shared_ptr str = + view->to_arrow(start_row, end_row, start_col, end_col); + return py::bytes(*str); +} + py::bytes to_arrow_zero( std::shared_ptr> view, @@ -313,6 +343,19 @@ to_arrow_two( return py::bytes(*str); } +/****************************************************************************** + * + * get_row_delta + */ + +py::bytes +get_row_delta_unit(std::shared_ptr> view) { + PerspectiveScopedGILRelease acquire(view->get_event_loop_thread_id()); + std::shared_ptr> slice = view->get_row_delta(); + std::shared_ptr arrow = view->data_slice_to_arrow(slice); + return py::bytes(*arrow); +} + py::bytes get_row_delta_zero(std::shared_ptr> view) { PerspectiveScopedGILRelease acquire(view->get_event_loop_thread_id()); @@ -338,9 +381,6 @@ get_row_delta_two( return py::bytes(*arrow); } - - - } //namespace binding } //namespace perspective diff --git a/python/perspective/perspective/table/_data_formatter.py b/python/perspective/perspective/table/_data_formatter.py index 4bcbbfc870..6c5e2af610 100644 --- a/python/perspective/perspective/table/_data_formatter.py +++ b/python/perspective/perspective/table/_data_formatter.py @@ -10,12 +10,15 @@ from math import floor, ceil, trunc from ._constants import COLUMN_SEPARATOR_STRING from .libbinding import ( + get_data_slice_unit, get_data_slice_zero, get_data_slice_one, get_data_slice_two, + get_from_data_slice_unit, get_from_data_slice_zero, get_from_data_slice_one, get_from_data_slice_two, + get_pkeys_from_data_slice_unit, get_pkeys_from_data_slice_zero, get_pkeys_from_data_slice_one, get_pkeys_from_data_slice_two, @@ -87,7 +90,9 @@ def to_format(options, view, output_format): if output_format in ("dict", "numpy") and (name not in data): # TODO: push into C++ for numpy data[name] = [] - if view._sides == 0: + if view._is_unit_context: + value = get_from_data_slice_unit(data_slice, ridx, cidx) + elif view._sides == 0: value = get_from_data_slice_zero(data_slice, ridx, cidx) elif view._sides == 1: value = get_from_data_slice_one(data_slice, ridx, cidx) @@ -100,7 +105,9 @@ def to_format(options, view, output_format): data[name].append(value) if options["index"]: - if view._sides == 0: + if view._is_unit_context: + pkeys = get_pkeys_from_data_slice_unit(data_slice, ridx, cidx) + elif view._sides == 0: pkeys = get_pkeys_from_data_slice_zero(data_slice, ridx, 0) elif view._sides == 1: pkeys = get_pkeys_from_data_slice_one(data_slice, ridx, 0) @@ -119,8 +126,12 @@ def to_format(options, view, output_format): for pkey in pkeys: data["__INDEX__"].append([pkey]) - if options["id"] and view._sides == 0: - pkeys = get_pkeys_from_data_slice_zero(data_slice, ridx, 0) + if options["id"] and (view._is_unit_context or view._sides == 0): + if view._is_unit_context: + pkeys = get_pkeys_from_data_slice_unit(data_slice, ridx, 0) + else: + pkeys = get_pkeys_from_data_slice_zero(data_slice, ridx, 0) + if output_format == "records": data[-1]["__ID__"] = [] for pkey in pkeys: @@ -151,7 +162,15 @@ def _to_format_helper(view, options=None): options = options or {} opts = _parse_format_options(view, options) - if view._sides == 0: + if view._is_unit_context: + data_slice = get_data_slice_unit( + view._view, + opts["start_row"], + opts["end_row"], + opts["start_col"], + opts["end_col"], + ) + elif view._sides == 0: data_slice = get_data_slice_zero( view._view, opts["start_row"], diff --git a/python/perspective/perspective/table/view.py b/python/perspective/perspective/table/view.py index dadad9b2c3..c4bdb5e3b0 100644 --- a/python/perspective/perspective/table/view.py +++ b/python/perspective/perspective/table/view.py @@ -18,12 +18,15 @@ from ._callback_cache import _PerspectiveCallBackCache from ._date_validator import _PerspectiveDateValidator from .libbinding import ( + make_view_unit, make_view_zero, make_view_one, make_view_two, + to_arrow_unit, to_arrow_zero, to_arrow_one, to_arrow_two, + get_row_delta_unit, get_row_delta_zero, get_row_delta_one, get_row_delta_two, @@ -53,7 +56,25 @@ def __init__(self, Table, **kwargs): date_validator = _PerspectiveDateValidator() - if self._sides == 0: + self._is_unit_context = ( + self._table._index == "" + and self._sides == 0 + and len(self._config.get_row_pivots()) == 0 + and len(self._config.get_column_pivots()) == 0 + and len(self._config.get_filter()) == 0 + and len(self._config.get_sort()) == 0 + and len(self._config.get_computed_columns()) == 0 + ) + + if self._is_unit_context: + self._view = make_view_unit( + self._table._table, + self._name, + COLUMN_SEPARATOR_STRING, + self._config, + date_validator, + ) + elif self._sides == 0: self._view = make_view_zero( self._table._table, self._name, @@ -371,7 +392,15 @@ def remove_delete(self, callback): def to_arrow(self, **kwargs): options = _parse_format_options(self, kwargs) - if self._sides == 0: + if self._is_unit_context: + return to_arrow_unit( + self._view, + options["start_row"], + options["end_row"], + options["start_col"], + options["end_col"], + ) + elif self._sides == 0: return to_arrow_zero( self._view, options["start_row"], @@ -543,7 +572,9 @@ def to_columns(self, **options): return self.to_dict(**options) def _get_row_delta(self): - if self._sides == 0: + if self._is_unit_context: + return get_row_delta_unit(self._view) + elif self._sides == 0: return get_row_delta_zero(self._view) elif self._sides == 1: return get_row_delta_one(self._view) diff --git a/python/perspective/perspective/tests/table/test_update.py b/python/perspective/perspective/tests/table/test_update.py index 0634b16c8c..8635753ed4 100644 --- a/python/perspective/perspective/tests/table/test_update.py +++ b/python/perspective/perspective/tests/table/test_update.py @@ -11,7 +11,6 @@ class TestUpdate(object): - def test_update_from_schema(self): tbl = Table({ "a": str, @@ -53,6 +52,62 @@ def test_update_columnar_partial(self): tbl.update({"a": ["abc"], "b": [456]}) assert tbl.view().to_records() == [{"a": "abc", "b": 456}] + # make sure already created views are notified properly + def test_update_from_schema_notify(self): + tbl = Table({ + "a": str, + "b": int + }) + view = tbl.view() + assert view.num_rows() == 0 + tbl.update([{"a": "abc", "b": 123}]) + assert view.to_records() == [{"a": "abc", "b": 123}] + + def test_update_columnar_from_schema_notify(self): + tbl = Table({ + "a": str, + "b": int + }) + view = tbl.view() + assert view.num_rows() == 0 + tbl.update({"a": ["abc"], "b": [123]}) + assert view.to_records() == [{"a": "abc", "b": 123}] + + def test_update_append_notify(self): + tbl = Table([{"a": "abc", "b": 123}]) + view = tbl.view() + assert view.num_rows() == 1 + tbl.update([{"a": "def", "b": 456}]) + assert view.to_records() == [{"a": "abc", "b": 123}, {"a": "def", "b": 456}] + + def test_update_partial_notify(self): + tbl = Table([{"a": "abc", "b": 123}], index="a") + view = tbl.view() + assert view.num_rows() == 1 + tbl.update([{"a": "abc", "b": 456}]) + assert view.to_records() == [{"a": "abc", "b": 456}] + + def test_update_partial_cols_not_in_schema_notify(self): + tbl = Table([{"a": "abc", "b": 123}], index="a") + view = tbl.view() + assert view.num_rows() == 1 + tbl.update([{"a": "abc", "b": 456, "c": 789}]) + assert view.to_records() == [{"a": "abc", "b": 456}] + + def test_update_columnar_append_notify(self): + tbl = Table({"a": ["abc"], "b": [123]}) + view = tbl.view() + assert view.num_rows() == 1 + tbl.update({"a": ["def"], "b": [456]}) + assert view.to_records() == [{"a": "abc", "b": 123}, {"a": "def", "b": 456}] + + def test_update_columnar_partial_notify(self): + tbl = Table({"a": ["abc"], "b": [123]}, index="a") + view = tbl.view() + assert view.num_rows() == 1 + tbl.update({"a": ["abc"], "b": [456]}) + assert view.to_records() == [{"a": "abc", "b": 456}] + # bool def test_update_bool_from_schema(self): diff --git a/python/perspective/perspective/tests/table/test_view.py b/python/perspective/perspective/tests/table/test_view.py index df6b9908a4..a02baa7d23 100644 --- a/python/perspective/perspective/tests/table/test_view.py +++ b/python/perspective/perspective/tests/table/test_view.py @@ -842,6 +842,23 @@ def cb1(port_id, delta): view.on_update(cb1, mode="row") tbl.update(update_data) + def test_view_row_delta_zero_column_subset(self, util): + data = [{"a": 1, "b": 2}, {"a": 3, "b": 4}] + update_data = { + "a": [5], + "b": [6] + } + + def cb1(port_id, delta): + compare_delta(delta, { + "b": [6] + }) + + tbl = Table(data) + view = tbl.view(columns=["b"]) + view.on_update(cb1, mode="row") + tbl.update(update_data) + def test_view_row_delta_zero_from_schema(self, util): update_data = { "a": [5], @@ -859,6 +876,26 @@ def cb1(port_id, delta): view.on_update(cb1, mode="row") tbl.update(update_data) + def test_view_row_delta_zero_from_schema_column_subset(self, util): + update_data = { + "a": [5], + "b": [6] + } + + def cb1(port_id, delta): + compare_delta(delta, { + "b": [6] + }) + + tbl = Table({ + "a": int, + "b": int + }) + + view = tbl.view(columns=["b"]) + view.on_update(cb1, mode="row") + tbl.update(update_data) + def test_view_row_delta_zero_from_schema_filtered(self, util): update_data = { "a": [8, 9, 10, 11], @@ -1322,3 +1359,41 @@ def test_view_context_two_update_clears_column_regression(self, util): ] assert tbl.size() == 9 + + # expand/collapse + + def test_view_collapse_one(self): + data = [{"a": 1, "b": 2}, {"a": 3, "b": 4}] + tbl = Table(data) + view = tbl.view(row_pivots=["a"]) + assert view.collapse(0) == 2 + + def test_view_collapse_two(self): + data = [{"a": 1, "b": 2, "c": "a"}, {"a": 3, "b": 4, "c": "b"}] + tbl = Table(data) + view = tbl.view(row_pivots=["a"], column_pivots=["c"]) + assert view.collapse(0) == 0 + + def test_view_collapse_two_column_only(self): + data = [{"a": 1, "b": 2, "c": "a"}, {"a": 3, "b": 4, "c": "b"}] + tbl = Table(data) + view = tbl.view(column_pivots=["c"]) + assert view.collapse(0) == 0 + + def test_view_expand_one(self): + data = [{"a": 1, "b": 2}, {"a": 3, "b": 4}] + tbl = Table(data) + view = tbl.view(row_pivots=["a"]) + assert view.expand(0) == 0 + + def test_view_expand_two(self): + data = [{"a": 1, "b": 2, "c": "a"}, {"a": 3, "b": 4, "c": "b"}] + tbl = Table(data) + view = tbl.view(row_pivots=["a"], column_pivots=["c"]) + assert view.expand(1) == 1 + + def test_view_expand_two_column_only(self): + data = [{"a": 1, "b": 2, "c": "a"}, {"a": 3, "b": 4, "c": "b"}] + tbl = Table(data) + view = tbl.view(column_pivots=["c"]) + assert view.expand(0) == 0 diff --git a/python/perspective/perspective/tests/table/test_view_computed.py b/python/perspective/perspective/tests/table/test_view_computed.py index b2d2b9b118..d9b3ca1c05 100644 --- a/python/perspective/perspective/tests/table/test_view_computed.py +++ b/python/perspective/perspective/tests/table/test_view_computed.py @@ -5,323 +5,332 @@ # This file is part of the Perspective library, distributed under the terms of # the Apache License 2.0. The full license can be found in the LICENSE file. # -import numpy as np from datetime import date, datetime from perspective.table import Table class TestViewComputed(object): + def test_view_computed_schema_empty(self): + table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) + view = table.view() + assert view.to_columns() == {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]} + assert view.computed_schema() == {} def test_view_computed_create(self): - table = Table({ - "a": [1, 2, 3, 4], - "b": [5, 6, 7, 8] - }) - view = table.view(computed_columns=[{ - "column": "computed", - "computed_function_name": "+", - "inputs": ["a", "b"] - }]) + table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) + view = table.view( + computed_columns=[ + { + "column": "computed", + "computed_function_name": "+", + "inputs": ["a", "b"], + } + ] + ) assert view.to_columns() == { "a": [1, 2, 3, 4], "b": [5, 6, 7, 8], - "computed": [6, 8, 10, 12] + "computed": [6, 8, 10, 12], } + assert view.computed_schema() == {"computed": float} + + def test_view_computed_create_no_columns(self): + table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) + view = table.view( + columns=[], + computed_columns=[ + { + "column": "computed", + "computed_function_name": "+", + "inputs": ["a", "b"], + } + ], + ) + assert view.to_columns() == {} + assert view.schema() == {} + + # computed column should still exist + assert view.computed_schema() == {"computed": float} + + def test_view_computed_create_columns(self): + table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) + view = table.view( + columns=["computed"], + computed_columns=[ + { + "column": "computed", + "computed_function_name": "+", + "inputs": ["a", "b"], + } + ], + ) + assert view.to_columns() == {"computed": [6, 8, 10, 12]} + assert view.schema() == {"computed": float} + # computed column should still exist + assert view.computed_schema() == {"computed": float} def test_view_computed_multiple_dependents(self): - table = Table({ - "a": [1, 2, 3, 4], - "b": [5, 6, 7, 8] - }) - view = table.view(computed_columns=[{ - "column": "computed", - "computed_function_name": "+", - "inputs": ["a", "b"] - }, - { - "column": "final", - "computed_function_name": "pow2", - "inputs": ["computed"] - } - ]) + table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) + view = table.view( + computed_columns=[ + { + "column": "computed", + "computed_function_name": "+", + "inputs": ["a", "b"], + }, + { + "column": "final", + "computed_function_name": "pow2", + "inputs": ["computed"], + }, + ] + ) assert view.to_columns() == { "a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "computed": [6, 8, 10, 12], - "final": [36, 64, 100, 144] + "final": [36, 64, 100, 144], } def test_view_computed_create_clear(self): - table = Table({ - "a": [1, 2, 3, 4], - "b": [5, 6, 7, 8] - }) - view = table.view(computed_columns=[{ - "column": "computed", - "computed_function_name": "+", - "inputs": ["a", "b"] - }]) + table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) + view = table.view( + computed_columns=[ + { + "column": "computed", + "computed_function_name": "+", + "inputs": ["a", "b"], + } + ] + ) assert view.to_columns() == { "a": [1, 2, 3, 4], "b": [5, 6, 7, 8], - "computed": [6, 8, 10, 12] + "computed": [6, 8, 10, 12], } table.clear() - assert view.schema() == { - "a": int, - "b": int, - "computed": float - } + assert view.schema() == {"a": int, "b": int, "computed": float} assert view.to_columns() == {} def test_view_computed_multiple_dependents_clear(self): - table = Table({ - "a": [1, 2, 3, 4], - "b": [5, 6, 7, 8] - }) - view = table.view(computed_columns=[{ - "column": "computed", - "computed_function_name": "+", - "inputs": ["a", "b"] - }, - { - "column": "final", - "computed_function_name": "pow2", - "inputs": ["computed"] - } - ]) + table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) + view = table.view( + computed_columns=[ + { + "column": "computed", + "computed_function_name": "+", + "inputs": ["a", "b"], + }, + { + "column": "final", + "computed_function_name": "pow2", + "inputs": ["computed"], + }, + ] + ) assert view.to_columns() == { "a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "computed": [6, 8, 10, 12], - "final": [36, 64, 100, 144] + "final": [36, 64, 100, 144], } table.clear() assert view.schema() == { "a": int, "b": int, "computed": float, - "final": float + "final": float, } assert view.to_columns() == {} def test_view_computed_create_replace(self): - table = Table({ - "a": [1, 2, 3, 4], - "b": [5, 6, 7, 8] - }) - view = table.view(computed_columns=[{ - "column": "computed", - "computed_function_name": "+", - "inputs": ["a", "b"] - }]) + table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) + view = table.view( + computed_columns=[ + { + "column": "computed", + "computed_function_name": "+", + "inputs": ["a", "b"], + } + ] + ) assert view.to_columns() == { "a": [1, 2, 3, 4], "b": [5, 6, 7, 8], - "computed": [6, 8, 10, 12] - } - table.replace({ - "a": [10, 20, 30, 40], - "b": [50, 60, 70, 80] - }) - assert view.schema() == { - "a": int, - "b": int, - "computed": float + "computed": [6, 8, 10, 12], } + table.replace({"a": [10, 20, 30, 40], "b": [50, 60, 70, 80]}) + assert view.schema() == {"a": int, "b": int, "computed": float} assert view.to_columns() == { "a": [10, 20, 30, 40], "b": [50, 60, 70, 80], - "computed": [60, 80, 100, 120] + "computed": [60, 80, 100, 120], } def test_view_computed_multiple_dependents_replace(self): - table = Table({ - "a": [1, 2, 3, 4], - "b": [5, 6, 7, 8] - }) - view = table.view(computed_columns=[{ - "column": "computed", - "computed_function_name": "+", - "inputs": ["a", "b"] - }, - { - "column": "final", - "computed_function_name": "pow2", - "inputs": ["computed"] - } - ]) + table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) + view = table.view( + computed_columns=[ + { + "column": "computed", + "computed_function_name": "+", + "inputs": ["a", "b"], + }, + { + "column": "final", + "computed_function_name": "pow2", + "inputs": ["computed"], + }, + ] + ) assert view.to_columns() == { "a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "computed": [6, 8, 10, 12], - "final": [36, 64, 100, 144] + "final": [36, 64, 100, 144], } - table.replace({ - "a": [10, 20, 30, 40], - "b": [50, 60, 70, 80] - }) + table.replace({"a": [10, 20, 30, 40], "b": [50, 60, 70, 80]}) assert view.schema() == { "a": int, "b": int, "computed": float, - "final": float + "final": float, } assert view.to_columns() == { "a": [10, 20, 30, 40], "b": [50, 60, 70, 80], "computed": [60, 80, 100, 120], - "final": [3600, 6400, 10000, 14400] + "final": [3600, 6400, 10000, 14400], } def test_view_computed_multiple_views_should_not_conflate(self): - table = Table({ - "a": [1, 2, 3, 4], - "b": [5, 6, 7, 8] - }) + table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) - view = table.view(computed_columns=[{ - "column": "computed", - "computed_function_name": "+", - "inputs": ["a", "b"] - }]) + view = table.view( + computed_columns=[ + { + "column": "computed", + "computed_function_name": "+", + "inputs": ["a", "b"], + } + ] + ) - view2 = table.view(computed_columns=[{ - "column": "computed2", - "computed_function_name": "-", - "inputs": ["a", "b"] - }]) + view2 = table.view( + computed_columns=[ + { + "column": "computed2", + "computed_function_name": "-", + "inputs": ["a", "b"], + } + ] + ) - assert view.schema() == { - "a": int, - "b": int, - "computed": float - } + assert view.schema() == {"a": int, "b": int, "computed": float} - assert view2.schema() == { - "a": int, - "b": int, - "computed2": float - } + assert view2.schema() == {"a": int, "b": int, "computed2": float} assert view.to_columns() == { "a": [1, 2, 3, 4], "b": [5, 6, 7, 8], - "computed": [6, 8, 10, 12] + "computed": [6, 8, 10, 12], } assert view2.to_columns() == { "a": [1, 2, 3, 4], "b": [5, 6, 7, 8], - "computed2": [-4, -4, -4, -4] + "computed2": [-4, -4, -4, -4], } def test_view_computed_multiple_views_should_all_clear(self): - table = Table({ - "a": [1, 2, 3, 4], - "b": [5, 6, 7, 8] - }) + table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) - view = table.view(computed_columns=[{ - "column": "computed", - "computed_function_name": "+", - "inputs": ["a", "b"] - }]) + view = table.view( + computed_columns=[ + { + "column": "computed", + "computed_function_name": "+", + "inputs": ["a", "b"], + } + ] + ) - view2 = table.view(computed_columns=[{ - "column": "computed2", - "computed_function_name": "-", - "inputs": ["a", "b"] - }]) + view2 = table.view( + computed_columns=[ + { + "column": "computed2", + "computed_function_name": "-", + "inputs": ["a", "b"], + } + ] + ) - assert view.schema() == { - "a": int, - "b": int, - "computed": float - } + assert view.schema() == {"a": int, "b": int, "computed": float} - assert view2.schema() == { - "a": int, - "b": int, - "computed2": float - } + assert view2.schema() == {"a": int, "b": int, "computed2": float} assert view.to_columns() == { "a": [1, 2, 3, 4], "b": [5, 6, 7, 8], - "computed": [6, 8, 10, 12] + "computed": [6, 8, 10, 12], } assert view2.to_columns() == { "a": [1, 2, 3, 4], "b": [5, 6, 7, 8], - "computed2": [-4, -4, -4, -4] + "computed2": [-4, -4, -4, -4], } table.clear() - assert view.schema() == { - "a": int, - "b": int, - "computed": float - } + assert view.schema() == {"a": int, "b": int, "computed": float} - assert view2.schema() == { - "a": int, - "b": int, - "computed2": float - } + assert view2.schema() == {"a": int, "b": int, "computed2": float} assert view.to_columns() == {} assert view2.to_columns() == {} def test_view_computed_multiple_views_should_all_replace(self): - table = Table({ - "a": [1, 2, 3, 4], - "b": [5, 6, 7, 8] - }) + table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) - view = table.view(computed_columns=[{ - "column": "computed", - "computed_function_name": "+", - "inputs": ["a", "b"] - }]) + view = table.view( + computed_columns=[ + { + "column": "computed", + "computed_function_name": "+", + "inputs": ["a", "b"], + } + ] + ) - view2 = table.view(computed_columns=[{ - "column": "computed2", - "computed_function_name": "-", - "inputs": ["a", "b"] - }]) + view2 = table.view( + computed_columns=[ + { + "column": "computed2", + "computed_function_name": "-", + "inputs": ["a", "b"], + } + ] + ) - assert view.schema() == { - "a": int, - "b": int, - "computed": float - } + assert view.schema() == {"a": int, "b": int, "computed": float} - assert view2.schema() == { - "a": int, - "b": int, - "computed2": float - } + assert view2.schema() == {"a": int, "b": int, "computed2": float} assert view.to_columns() == { "a": [1, 2, 3, 4], "b": [5, 6, 7, 8], - "computed": [6, 8, 10, 12] + "computed": [6, 8, 10, 12], } assert view2.to_columns() == { "a": [1, 2, 3, 4], "b": [5, 6, 7, 8], - "computed2": [-4, -4, -4, -4] + "computed2": [-4, -4, -4, -4], } - table.replace({ - "a": [10, 20, 30, 40], - "b": [50, 60, 70, 80] - }) + table.replace({"a": [10, 20, 30, 40], "b": [50, 60, 70, 80]}) assert view.to_columns() == { "a": [10, 20, 30, 40], @@ -332,110 +341,95 @@ def test_view_computed_multiple_views_should_all_replace(self): assert view2.to_columns() == { "a": [10, 20, 30, 40], "b": [50, 60, 70, 80], - "computed2": [-40, -40, -40, -40] + "computed2": [-40, -40, -40, -40], } def test_view_computed_delete_and_create(self): - table = Table({ - "a": [1, 2, 3, 4], - "b": [5, 6, 7, 8] - }) + table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) - view = table.view(computed_columns=[{ - "column": "computed", - "computed_function_name": "+", - "inputs": ["a", "b"] - }]) + view = table.view( + computed_columns=[ + { + "column": "computed", + "computed_function_name": "+", + "inputs": ["a", "b"], + } + ] + ) - assert view.schema() == { - "a": int, - "b": int, - "computed": float - } + assert view.schema() == {"a": int, "b": int, "computed": float} assert view.to_columns() == { "a": [1, 2, 3, 4], "b": [5, 6, 7, 8], - "computed": [6, 8, 10, 12] + "computed": [6, 8, 10, 12], } view.delete() - view2 = table.view(computed_columns=[{ - "column": "computed2", - "computed_function_name": "-", - "inputs": ["a", "b"] - }]) + view2 = table.view( + computed_columns=[ + { + "column": "computed2", + "computed_function_name": "-", + "inputs": ["a", "b"], + } + ] + ) - assert view2.schema() == { - "a": int, - "b": int, - "computed2": float - } + assert view2.schema() == {"a": int, "b": int, "computed2": float} assert view2.to_columns() == { "a": [1, 2, 3, 4], "b": [5, 6, 7, 8], - "computed2": [-4, -4, -4, -4] + "computed2": [-4, -4, -4, -4], } def test_view_computed_delete_and_create_with_updates(self): - table = Table({ - "a": [1, 2, 3, 4], - "b": [5, 6, 7, 8] - }) + table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) - view = table.view(computed_columns=[{ - "column": "computed", - "computed_function_name": "+", - "inputs": ["a", "b"] - }]) + view = table.view( + computed_columns=[ + { + "column": "computed", + "computed_function_name": "+", + "inputs": ["a", "b"], + } + ] + ) - assert view.schema() == { - "a": int, - "b": int, - "computed": float - } + assert view.schema() == {"a": int, "b": int, "computed": float} - table.update({ - "a": [5, 6], - "b": [9, 10] - }) + table.update({"a": [5, 6], "b": [9, 10]}) assert view.to_columns() == { "a": [1, 2, 3, 4, 5, 6], "b": [5, 6, 7, 8, 9, 10], - "computed": [6, 8, 10, 12, 14, 16] + "computed": [6, 8, 10, 12, 14, 16], } view.delete() - view2 = table.view(computed_columns=[{ - "column": "computed2", - "computed_function_name": "-", - "inputs": ["a", "b"] - }]) + view2 = table.view( + computed_columns=[ + { + "column": "computed2", + "computed_function_name": "-", + "inputs": ["a", "b"], + } + ] + ) - assert view2.schema() == { - "a": int, - "b": int, - "computed2": float - } + assert view2.schema() == {"a": int, "b": int, "computed2": float} - table.update({ - "a": [5, 6], - "b": [9, 10] - }) + table.update({"a": [5, 6], "b": [9, 10]}) - table.update({ - "a": [5, 6], - "b": [9, 10] - }) + table.update({"a": [5, 6], "b": [9, 10]}) assert view2.to_columns() == { "a": [1, 2, 3, 4, 5, 6, 5, 6, 5, 6], "b": [5, 6, 7, 8, 9, 10, 9, 10, 9, 10], - "computed2": [-4, -4, -4, -4, -4, -4, -4, -4, -4, -4] + "computed2": [-4, -4, -4, -4, -4, -4, -4, -4, -4, -4], } def test_view_delete_with_scope(self): @@ -467,63 +461,60 @@ def test_view_delete_with_scope(self): ) def test_view_computed_with_custom_columns(self): - table = Table({ - "a": [1, 2, 3, 4], - "b": [5, 6, 7, 8] - }) + table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) view = table.view( columns=["computed", "b"], - computed_columns=[{ - "column": "computed", - "computed_function_name": "+", - "inputs": ["a", "b"] - }] + computed_columns=[ + { + "column": "computed", + "computed_function_name": "+", + "inputs": ["a", "b"], + } + ], ) assert view.to_columns() == { "b": [5, 6, 7, 8], - "computed": [6, 8, 10, 12] + "computed": [6, 8, 10, 12], } def test_view_computed_with_row_pivots(self): - table = Table({ - "a": [1, 2, 3, 4], - "b": [5, 6, 7, 8] - }) + table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) view = table.view( row_pivots=["computed"], - computed_columns=[{ - "column": "computed", - "computed_function_name": "+", - "inputs": ["a", "b"] - }] + computed_columns=[ + { + "column": "computed", + "computed_function_name": "+", + "inputs": ["a", "b"], + } + ], ) assert view.to_columns() == { "__ROW_PATH__": [[], [6], [8], [10], [12]], "a": [10, 1, 2, 3, 4], "b": [26, 5, 6, 7, 8], - "computed": [36.0, 6.0, 8.0, 10.0, 12.0] + "computed": [36.0, 6.0, 8.0, 10.0, 12.0], } def test_view_computed_with_row_pivots_clear(self): - table = Table({ - "a": [1, 2, 3, 4], - "b": [5, 6, 7, 8] - }) + table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) view = table.view( row_pivots=["computed"], - computed_columns=[{ - "column": "computed", - "computed_function_name": "+", - "inputs": ["a", "b"] - }] + computed_columns=[ + { + "column": "computed", + "computed_function_name": "+", + "inputs": ["a", "b"], + } + ], ) assert view.to_columns() == { "__ROW_PATH__": [[], [6], [8], [10], [12]], "a": [10, 1, 2, 3, 4], "b": [26, 5, 6, 7, 8], - "computed": [36.0, 6.0, 8.0, 10.0, 12.0] + "computed": [36.0, 6.0, 8.0, 10.0, 12.0], } table.clear() @@ -532,55 +523,50 @@ def test_view_computed_with_row_pivots_clear(self): "__ROW_PATH__": [[]], "a": [None], "b": [None], - "computed": [None] + "computed": [None], } def test_view_computed_with_row_pivots_replace(self): - table = Table({ - "a": [1, 2, 3, 4], - "b": [5, 6, 7, 8] - }) + table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) view = table.view( row_pivots=["computed"], - computed_columns=[{ - "column": "computed", - "computed_function_name": "+", - "inputs": ["a", "b"] - }] + computed_columns=[ + { + "column": "computed", + "computed_function_name": "+", + "inputs": ["a", "b"], + } + ], ) assert view.to_columns() == { "__ROW_PATH__": [[], [6], [8], [10], [12]], "a": [10, 1, 2, 3, 4], "b": [26, 5, 6, 7, 8], - "computed": [36.0, 6.0, 8.0, 10.0, 12.0] + "computed": [36.0, 6.0, 8.0, 10.0, 12.0], } - table.replace({ - "a": [10, 20, 30, 40], - "b": [50, 60, 70, 80] - }) + table.replace({"a": [10, 20, 30, 40], "b": [50, 60, 70, 80]}) assert view.to_columns() == { "__ROW_PATH__": [[], [60], [80], [100], [120]], "a": [100, 10, 20, 30, 40], "b": [260, 50, 60, 70, 80], - "computed": [360.0, 60.0, 80.0, 100.0, 120.0] + "computed": [360.0, 60.0, 80.0, 100.0, 120.0], } def test_view_computed_with_column_pivots(self): - table = Table({ - "a": [1, 2, 3, 4], - "b": [5, 6, 7, 8] - }) + table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) view = table.view( column_pivots=["computed"], - computed_columns=[{ - "column": "computed", - "computed_function_name": "+", - "inputs": ["a", "b"] - }] + computed_columns=[ + { + "column": "computed", + "computed_function_name": "+", + "inputs": ["a", "b"], + } + ], ) assert view.to_columns() == { "6|a": [1, None, None, None], @@ -594,21 +580,20 @@ def test_view_computed_with_column_pivots(self): "10|computed": [None, None, 10.0, None], "12|a": [None, None, None, 4], "12|b": [None, None, None, 8], - "12|computed": [None, None, None, 12.0] + "12|computed": [None, None, None, 12.0], } def test_view_computed_with_row_column_pivots(self): - table = Table({ - "a": [1, 2, 3, 4], - "b": [5, 6, 7, 8] - }) + table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) view = table.view( column_pivots=["computed"], - computed_columns=[{ - "column": "computed", - "computed_function_name": "+", - "inputs": ["a", "b"] - }] + computed_columns=[ + { + "column": "computed", + "computed_function_name": "+", + "inputs": ["a", "b"], + } + ], ) assert view.to_columns() == { "6|a": [1, None, None, None], @@ -622,284 +607,388 @@ def test_view_computed_with_row_column_pivots(self): "10|computed": [None, None, 10.0, None], "12|a": [None, None, None, 4], "12|b": [None, None, None, 8], - "12|computed": [None, None, None, 12.0] + "12|computed": [None, None, None, 12.0], } - + def test_view_computed_with_sort(self): - table = Table({ - "a": ["a", "ab", "abc", "abcd"] - }) + table = Table({"a": ["a", "ab", "abc", "abcd"]}) view = table.view( sort=[["computed", "desc"]], - computed_columns=[{ - "column": "computed", - "computed_function_name": "length", - "inputs": ["a"] - }] + computed_columns=[ + { + "column": "computed", + "computed_function_name": "length", + "inputs": ["a"], + } + ], ) assert view.to_columns() == { "a": ["abcd", "abc", "ab", "a"], - "computed": [4, 3, 2, 1] + "computed": [4, 3, 2, 1], } def test_view_computed_with_filter(self): - table = Table({ - "a": ["a", "ab", "abc", "abcd"] - }) + table = Table({"a": ["a", "ab", "abc", "abcd"]}) view = table.view( filter=[["computed", ">=", 3]], - computed_columns=[{ - "column": "computed", - "computed_function_name": "length", - "inputs": ["a"] - }] + computed_columns=[ + { + "column": "computed", + "computed_function_name": "length", + "inputs": ["a"], + } + ], ) - assert view.to_columns() == { - "a": ["abc", "abcd"], - "computed": [3, 4] - } + assert view.to_columns() == {"a": ["abc", "abcd"], "computed": [3, 4]} def test_view_day_of_week_date(self): - table = Table({ - "a": [date(2020, 3, i) for i in range(9, 14)] - }) + table = Table({"a": [date(2020, 3, i) for i in range(9, 14)]}) view = table.view( computed_columns=[ { "column": "bucket", "computed_function_name": "day_of_week", - "inputs": ["a"] + "inputs": ["a"], } ] ) - assert view.schema() == { - "a": date, - "bucket": str - } + assert view.schema() == {"a": date, "bucket": str} assert view.to_columns() == { "a": [datetime(2020, 3, i) for i in range(9, 14)], - "bucket": ["2 Monday", "3 Tuesday", "4 Wednesday", "5 Thursday", "6 Friday"] + "bucket": [ + "2 Monday", + "3 Tuesday", + "4 Wednesday", + "5 Thursday", + "6 Friday", + ], } def test_view_day_of_week_datetime(self): - table = Table({ - "a": [datetime(2020, 3, i, 12, 30) for i in range(9, 14)] - }) + table = Table( + {"a": [datetime(2020, 3, i, 12, 30) for i in range(9, 14)]} + ) view = table.view( computed_columns=[ { "column": "bucket", "computed_function_name": "day_of_week", - "inputs": ["a"] + "inputs": ["a"], } ] ) - assert view.schema() == { - "a": datetime, - "bucket": str - } + assert view.schema() == {"a": datetime, "bucket": str} assert view.to_columns() == { "a": [datetime(2020, 3, i, 12, 30) for i in range(9, 14)], - "bucket": ["2 Monday", "3 Tuesday", "4 Wednesday", "5 Thursday", "6 Friday"] + "bucket": [ + "2 Monday", + "3 Tuesday", + "4 Wednesday", + "5 Thursday", + "6 Friday", + ], } def test_view_month_of_year_date(self): - table = Table({ - "a": [date(2020, i, 15) for i in range(1, 13)] - }) + table = Table({"a": [date(2020, i, 15) for i in range(1, 13)]}) view = table.view( computed_columns=[ { "column": "bucket", "computed_function_name": "month_of_year", - "inputs": ["a"] + "inputs": ["a"], } ] ) - assert view.schema() == { - "a": date, - "bucket": str - } + assert view.schema() == {"a": date, "bucket": str} assert view.to_columns() == { "a": [datetime(2020, i, 15) for i in range(1, 13)], - "bucket": ["01 January", "02 February", "03 March", "04 April", "05 May", "06 June", "07 July", "08 August", "09 September", "10 October", "11 November", "12 December"] + "bucket": [ + "01 January", + "02 February", + "03 March", + "04 April", + "05 May", + "06 June", + "07 July", + "08 August", + "09 September", + "10 October", + "11 November", + "12 December", + ], } def test_view_month_of_year_datetime(self): - table = Table({ - "a": [datetime(2020, i, 15) for i in range(1, 13)], - }) + table = Table( + { + "a": [datetime(2020, i, 15) for i in range(1, 13)], + } + ) view = table.view( computed_columns=[ { "column": "bucket", "computed_function_name": "month_of_year", - "inputs": ["a"] + "inputs": ["a"], } ] ) - assert view.schema() == { - "a": datetime, - "bucket": str - } + assert view.schema() == {"a": datetime, "bucket": str} assert view.to_columns() == { "a": [datetime(2020, i, 15) for i in range(1, 13)], - "bucket": ["01 January", "02 February", "03 March", "04 April", "05 May", "06 June", "07 July", "08 August", "09 September", "10 October", "11 November", "12 December"] + "bucket": [ + "01 January", + "02 February", + "03 March", + "04 April", + "05 May", + "06 June", + "07 July", + "08 August", + "09 September", + "10 October", + "11 November", + "12 December", + ], } # bucketing def test_view_day_bucket_date(self): - table = Table({ - "a": [date(2020, 1, 1), date(2020, 1, 1), date(2020, 2, 29), date(2020, 3, 1)], - }) + table = Table( + { + "a": [ + date(2020, 1, 1), + date(2020, 1, 1), + date(2020, 2, 29), + date(2020, 3, 1), + ], + } + ) view = table.view( computed_columns=[ { "column": "bucket", "computed_function_name": "day_bucket", - "inputs": ["a"] + "inputs": ["a"], } ] ) - assert view.schema() == { - "a": date, - "bucket": date - } + assert view.schema() == {"a": date, "bucket": date} assert view.to_columns() == { - "a": [datetime(2020, 1, 1), datetime(2020, 1, 1), datetime(2020, 2, 29), datetime(2020, 3, 1)], - "bucket": [datetime(2020, 1, 1), datetime(2020, 1, 1), datetime(2020, 2, 29), datetime(2020, 3, 1)] + "a": [ + datetime(2020, 1, 1), + datetime(2020, 1, 1), + datetime(2020, 2, 29), + datetime(2020, 3, 1), + ], + "bucket": [ + datetime(2020, 1, 1), + datetime(2020, 1, 1), + datetime(2020, 2, 29), + datetime(2020, 3, 1), + ], } def test_view_day_bucket_date_with_null(self): - table = Table({ - "a": [date(2020, 1, 1), None, date(2020, 2, 29), date(2020, 3, 15)], - }) + table = Table( + { + "a": [ + date(2020, 1, 1), + None, + date(2020, 2, 29), + date(2020, 3, 15), + ], + } + ) view = table.view( computed_columns=[ { "column": "bucket", "computed_function_name": "day_bucket", - "inputs": ["a"] + "inputs": ["a"], } ] ) - assert view.schema() == { - "a": date, - "bucket": date - } + assert view.schema() == {"a": date, "bucket": date} assert view.to_columns() == { - "a": [datetime(2020, 1, 1), None, datetime(2020, 2, 29), datetime(2020, 3, 15)], - "bucket": [datetime(2020, 1, 1), None, datetime(2020, 2, 29), datetime(2020, 3, 15)] + "a": [ + datetime(2020, 1, 1), + None, + datetime(2020, 2, 29), + datetime(2020, 3, 15), + ], + "bucket": [ + datetime(2020, 1, 1), + None, + datetime(2020, 2, 29), + datetime(2020, 3, 15), + ], } def test_view_day_bucket_datetime(self): - table = Table({ - "a": [datetime(2020, 1, 1, 5), datetime(2020, 1, 1, 23), datetime(2020, 2, 29, 1), datetime(2020, 3, 1, 0)], - }) + table = Table( + { + "a": [ + datetime(2020, 1, 1, 5), + datetime(2020, 1, 1, 23), + datetime(2020, 2, 29, 1), + datetime(2020, 3, 1, 0), + ], + } + ) view = table.view( computed_columns=[ { "column": "bucket", "computed_function_name": "day_bucket", - "inputs": ["a"] + "inputs": ["a"], } ] ) - assert view.schema() == { - "a": datetime, - "bucket": date - } + assert view.schema() == {"a": datetime, "bucket": date} assert view.to_columns() == { - "a": [datetime(2020, 1, 1, 5), datetime(2020, 1, 1, 23), datetime(2020, 2, 29, 1), datetime(2020, 3, 1, 0)], - "bucket": [datetime(2020, 1, 1), datetime(2020, 1, 1), datetime(2020, 2, 29), datetime(2020, 3, 1)] + "a": [ + datetime(2020, 1, 1, 5), + datetime(2020, 1, 1, 23), + datetime(2020, 2, 29, 1), + datetime(2020, 3, 1, 0), + ], + "bucket": [ + datetime(2020, 1, 1), + datetime(2020, 1, 1), + datetime(2020, 2, 29), + datetime(2020, 3, 1), + ], } def test_view_month_bucket_date(self): - table = Table({ - "a": [date(2020, 1, 1), date(2020, 1, 28), date(2020, 2, 29), date(2020, 3, 15)], - }) + table = Table( + { + "a": [ + date(2020, 1, 1), + date(2020, 1, 28), + date(2020, 2, 29), + date(2020, 3, 15), + ], + } + ) view = table.view( computed_columns=[ { "column": "bucket", "computed_function_name": "month_bucket", - "inputs": ["a"] + "inputs": ["a"], } ] ) - assert view.schema() == { - "a": date, - "bucket": date - } + assert view.schema() == {"a": date, "bucket": date} assert view.to_columns() == { - "a": [datetime(2020, 1, 1), datetime(2020, 1, 28), datetime(2020, 2, 29), datetime(2020, 3, 15)], - "bucket": [datetime(2020, 1, 1), datetime(2020, 1, 1), datetime(2020, 2, 1), datetime(2020, 3, 1)] + "a": [ + datetime(2020, 1, 1), + datetime(2020, 1, 28), + datetime(2020, 2, 29), + datetime(2020, 3, 15), + ], + "bucket": [ + datetime(2020, 1, 1), + datetime(2020, 1, 1), + datetime(2020, 2, 1), + datetime(2020, 3, 1), + ], } def test_view_month_bucket_date_with_null(self): - table = Table({ - "a": [date(2020, 1, 1), None, date(2020, 2, 29), date(2020, 3, 15)], - }) + table = Table( + { + "a": [ + date(2020, 1, 1), + None, + date(2020, 2, 29), + date(2020, 3, 15), + ], + } + ) view = table.view( computed_columns=[ { "column": "bucket", "computed_function_name": "month_bucket", - "inputs": ["a"] + "inputs": ["a"], } ] ) - assert view.schema() == { - "a": date, - "bucket": date - } + assert view.schema() == {"a": date, "bucket": date} assert view.to_columns() == { - "a": [datetime(2020, 1, 1), None, datetime(2020, 2, 29), datetime(2020, 3, 15)], - "bucket": [datetime(2020, 1, 1), None, datetime(2020, 2, 1), datetime(2020, 3, 1)] + "a": [ + datetime(2020, 1, 1), + None, + datetime(2020, 2, 29), + datetime(2020, 3, 15), + ], + "bucket": [ + datetime(2020, 1, 1), + None, + datetime(2020, 2, 1), + datetime(2020, 3, 1), + ], } def test_view_month_bucket_datetime(self): - table = Table({ - "a": [datetime(2020, 1, 1), datetime(2020, 1, 28), datetime(2020, 2, 29), datetime(2020, 3, 15)], - }) + table = Table( + { + "a": [ + datetime(2020, 1, 1), + datetime(2020, 1, 28), + datetime(2020, 2, 29), + datetime(2020, 3, 15), + ], + } + ) view = table.view( computed_columns=[ { "column": "bucket", "computed_function_name": "month_bucket", - "inputs": ["a"] + "inputs": ["a"], } ] ) - assert view.schema() == { - "a": datetime, - "bucket": date - } + assert view.schema() == {"a": datetime, "bucket": date} assert view.to_columns() == { - "a": [datetime(2020, 1, 1), datetime(2020, 1, 28), datetime(2020, 2, 29), datetime(2020, 3, 15)], - "bucket": [datetime(2020, 1, 1), datetime(2020, 1, 1), datetime(2020, 2, 1), datetime(2020, 3, 1)] + "a": [ + datetime(2020, 1, 1), + datetime(2020, 1, 28), + datetime(2020, 2, 29), + datetime(2020, 3, 15), + ], + "bucket": [ + datetime(2020, 1, 1), + datetime(2020, 1, 1), + datetime(2020, 2, 1), + datetime(2020, 3, 1), + ], } def test_view_month_bucket_datetime_with_null(self): - table = Table({ - "a": [datetime(2020, 1, 1), None, None, datetime(2020, 3, 15)], - }) + table = Table( + { + "a": [datetime(2020, 1, 1), None, None, datetime(2020, 3, 15)], + } + ) view = table.view( computed_columns=[ { "column": "bucket", "computed_function_name": "month_bucket", - "inputs": ["a"] + "inputs": ["a"], } ] ) - assert view.schema() == { - "a": datetime, - "bucket": date - } + assert view.schema() == {"a": datetime, "bucket": date} assert view.to_columns() == { "a": [datetime(2020, 1, 1), None, None, datetime(2020, 3, 15)], - "bucket": [datetime(2020, 1, 1), None, None, datetime(2020, 3, 1)] + "bucket": [datetime(2020, 1, 1), None, None, datetime(2020, 3, 1)], } From 63ea58bb87e1b69086a19f4995b66eed9c6ef1f3 Mon Sep 17 00:00:00 2001 From: Jun Tan Date: Tue, 20 Oct 2020 15:16:06 -0400 Subject: [PATCH 5/5] fix windows build again --- cpp/perspective/src/cpp/context_unit.cpp | 5 +++++ .../src/include/perspective/context_unit.h | 20 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/cpp/perspective/src/cpp/context_unit.cpp b/cpp/perspective/src/cpp/context_unit.cpp index c8894ef7d6..c4fe1f61e5 100644 --- a/cpp/perspective/src/cpp/context_unit.cpp +++ b/cpp/perspective/src/cpp/context_unit.cpp @@ -338,6 +338,11 @@ t_ctxunit::get_column_count() const { return m_config.get_num_columns(); } +std::vector +t_ctxunit::unity_get_row_data(t_uindex idx) const { + return get_data(idx, idx + 1, 0, get_column_count()); +} + std::vector t_ctxunit::unity_get_row_path(t_uindex idx) const { return std::vector(mktscalar(idx)); diff --git a/cpp/perspective/src/include/perspective/context_unit.h b/cpp/perspective/src/include/perspective/context_unit.h index 53042188ab..9c2ec0fa79 100644 --- a/cpp/perspective/src/include/perspective/context_unit.h +++ b/cpp/perspective/src/include/perspective/context_unit.h @@ -20,6 +20,25 @@ namespace perspective { +/** + * @brief A context that does not maintain its own traversal, instead + * reading directly from the underlying master table of the context's + * gnode state. + * + * This context can be created when the table does not have an explicit index + * set, as the order of rows in the master table will be exactly the same as + * the primary key order (using PSP_PKEY). If the table has an explicit index, + * a context's traversal contains the sorted order of primary keys and thus + * the order in which the table is meant to be read. + * + * Additionally, to create a unit context, the context must have no pivots, + * sorts, filters, or computed columns applied. It can have any number of + * columns in any order. See implementations in the binding language to see + * how a unit context is created. + * + * Benchmarking shows a 5-10x improvement in View construction time compared + * to a regular ctx_0. + */ class PERSPECTIVE_EXPORT t_ctxunit : public t_ctxbase { public: t_ctxunit(); @@ -93,6 +112,7 @@ class PERSPECTIVE_EXPORT t_ctxunit : public t_ctxbase { const tsl::hopscotch_set& get_delta_pkeys() const; // Unity api + std::vector unity_get_row_data(t_uindex idx) const; std::vector unity_get_row_path(t_uindex idx) const; std::vector unity_get_column_path(t_uindex idx) const; t_uindex unity_get_row_depth(t_uindex ridx) const;