-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ARROW-1558: [C++] Implement boolean filter (selection) kernel, rename…
… comparison kernel-related functions Materializes an array masked by a selection array (for example one produced by the filter kernel) Author: Benjamin Kietzman <[email protected]> Closes #4366 from bkietz/1558-Implement-boolean-selection-kernels and squashes the following commits: 032d341 <Benjamin Kietzman> fix doc error 3d92b6e <Benjamin Kietzman> Make FilterKernel public e8465e5 <Benjamin Kietzman> iwyu: vector 030ac57 <Benjamin Kietzman> filter benchmarks += MinTime(1.0) nanoseconds 7702055 <Benjamin Kietzman> use expanded bitmap for FixedSizeList and List 060313c <Benjamin Kietzman> refactor FilterImpl<StructType> to own child kernels 24f2e85 <Benjamin Kietzman> add larger benchmarks to test for O(N^2) perf e4d9d85 <Benjamin Kietzman> refactor FilterKernel::Make to use a switch f833e02 <Benjamin Kietzman> add benchmark for fixed_size_list(int64(), 1) f424f34 <Benjamin Kietzman> fix nits and typos 3387f21 <Benjamin Kietzman> use new path for concatenate.h 495e521 <Benjamin Kietzman> Add support for filtering MapArray a8cb993 <Benjamin Kietzman> fix lint error e3b4022 <Benjamin Kietzman> add filter impls for nested types a216388 <Benjamin Kietzman> add explicit qualification for MSVC ccd32a5 <Benjamin Kietzman> add a basic filter benchmark 8a9f379 <Benjamin Kietzman> add a test integrating with arrow::compute::Compare (array-array) 7c50027 <Benjamin Kietzman> add a test integrating with arrow::compute::Compare 6efc4f5 <Benjamin Kietzman> add filter tests with large, random arrays 0f29ab2 <Benjamin Kietzman> rename Mask -> Filter edf2eb1 <Benjamin Kietzman> rename FilterFunction -> CompareFunction 4b24ca3 <Benjamin Kietzman> revert removal of TakeOptions 4c8ce6d <Benjamin Kietzman> revert submodule a54741e <Benjamin Kietzman> add some tests with empty masks/take indices d5c9c14 <Benjamin Kietzman> use checked_cast 223a860 <Benjamin Kietzman> fix typo c953dca <Benjamin Kietzman> remove empty TakeOptions db44424 <Benjamin Kietzman> remove empty MaskOptions 13a1969 <Benjamin Kietzman> initial mask kernel impl
- Loading branch information
Showing
18 changed files
with
1,644 additions
and
470 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
#include "benchmark/benchmark.h" | ||
|
||
#include <vector> | ||
|
||
#include "arrow/compute/benchmark-util.h" | ||
#include "arrow/compute/kernel.h" | ||
#include "arrow/compute/kernels/compare.h" | ||
#include "arrow/compute/test-util.h" | ||
#include "arrow/testing/gtest_util.h" | ||
#include "arrow/testing/random.h" | ||
|
||
namespace arrow { | ||
namespace compute { | ||
|
||
constexpr auto kSeed = 0x94378165; | ||
|
||
static void CompareArrayScalarKernel(benchmark::State& state) { | ||
const int64_t memory_size = state.range(0); | ||
const int64_t array_size = memory_size / sizeof(int64_t); | ||
const double null_percent = static_cast<double>(state.range(1)) / 100.0; | ||
auto rand = random::RandomArrayGenerator(kSeed); | ||
auto array = std::static_pointer_cast<NumericArray<Int64Type>>( | ||
rand.Int64(array_size, -100, 100, null_percent)); | ||
|
||
CompareOptions ge{GREATER_EQUAL}; | ||
|
||
FunctionContext ctx; | ||
for (auto _ : state) { | ||
Datum out; | ||
ABORT_NOT_OK(Compare(&ctx, Datum(array), Datum(int64_t(0)), ge, &out)); | ||
benchmark::DoNotOptimize(out); | ||
} | ||
|
||
state.counters["size"] = static_cast<double>(memory_size); | ||
state.counters["null_percent"] = static_cast<double>(state.range(1)); | ||
state.SetBytesProcessed(state.iterations() * array_size * sizeof(int64_t)); | ||
} | ||
|
||
static void CompareArrayArrayKernel(benchmark::State& state) { | ||
const int64_t memory_size = state.range(0); | ||
const int64_t array_size = memory_size / sizeof(int64_t); | ||
const double null_percent = static_cast<double>(state.range(1)) / 100.0; | ||
auto rand = random::RandomArrayGenerator(kSeed); | ||
auto lhs = std::static_pointer_cast<NumericArray<Int64Type>>( | ||
rand.Int64(array_size, -100, 100, null_percent)); | ||
auto rhs = std::static_pointer_cast<NumericArray<Int64Type>>( | ||
rand.Int64(array_size, -100, 100, null_percent)); | ||
|
||
CompareOptions ge(GREATER_EQUAL); | ||
|
||
FunctionContext ctx; | ||
for (auto _ : state) { | ||
Datum out; | ||
ABORT_NOT_OK(Compare(&ctx, Datum(lhs), Datum(rhs), ge, &out)); | ||
benchmark::DoNotOptimize(out); | ||
} | ||
|
||
state.counters["size"] = static_cast<double>(memory_size); | ||
state.counters["null_percent"] = static_cast<double>(state.range(1)); | ||
state.SetBytesProcessed(state.iterations() * array_size * sizeof(int64_t) * 2); | ||
} | ||
|
||
BENCHMARK(CompareArrayScalarKernel)->Apply(RegressionSetArgs); | ||
BENCHMARK(CompareArrayArrayKernel)->Apply(RegressionSetArgs); | ||
|
||
} // namespace compute | ||
} // namespace arrow |
Oops, something went wrong.