Skip to content

Commit

Permalink
Storages: Use std::vector<UInt8> in BitmapFilter (#9552)
Browse files Browse the repository at this point in the history
ref #6834

Signed-off-by: Lloyd-Pottiger <[email protected]>

Co-authored-by: Lloyd-Pottiger <[email protected]>
  • Loading branch information
JinheLin and Lloyd-Pottiger authored Oct 28, 2024
1 parent a1a3b9b commit b36199e
Show file tree
Hide file tree
Showing 4 changed files with 175 additions and 9 deletions.
14 changes: 7 additions & 7 deletions dbms/src/Storages/DeltaMerge/BitmapFilter/BitmapFilter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
namespace DB::DM
{
BitmapFilter::BitmapFilter(UInt32 size_, bool default_value)
: filter(size_, default_value)
: filter(size_, static_cast<UInt8>(default_value))
, all_match(default_value)
{}

Expand Down Expand Up @@ -58,7 +58,7 @@ void BitmapFilter::set(std::span<const UInt32> row_ids, const FilterPtr & f)
else
{
RUNTIME_CHECK(row_ids.size() == f->size(), row_ids.size(), f->size());
for (UInt32 i = 0; i < row_ids.size(); i++)
for (UInt32 i = 0; i < row_ids.size(); ++i)
{
filter[row_ids[i]] = (*f)[i];
}
Expand All @@ -68,15 +68,15 @@ void BitmapFilter::set(std::span<const UInt32> row_ids, const FilterPtr & f)
void BitmapFilter::set(UInt32 start, UInt32 limit, bool value)
{
RUNTIME_CHECK(start + limit <= filter.size(), start, limit, filter.size());
std::fill_n(filter.begin() + start, limit, value);
std::fill_n(filter.begin() + start, limit, static_cast<UInt8>(value));
}

bool BitmapFilter::get(IColumn::Filter & f, UInt32 start, UInt32 limit) const
{
RUNTIME_CHECK(start + limit <= filter.size(), start, limit, filter.size());
auto begin = filter.cbegin() + start;
auto end = filter.cbegin() + start + limit;
if (all_match || std::find(begin, end, false) == end)
if (all_match || std::find(begin, end, static_cast<UInt8>(false)) == end)
{
return true;
}
Expand All @@ -93,13 +93,13 @@ void BitmapFilter::rangeAnd(IColumn::Filter & f, UInt32 start, UInt32 limit) con
auto begin = filter.cbegin() + start;
if (!all_match)
{
std::transform(f.begin(), f.end(), begin, f.begin(), [](const UInt8 a, const bool b) { return a != 0 && b; });
std::transform(f.begin(), f.end(), begin, f.begin(), [](const auto a, const auto b) { return a && b; });
}
}

void BitmapFilter::runOptimize()
{
all_match = std::find(filter.begin(), filter.end(), false) == filter.end();
all_match = std::find(filter.begin(), filter.end(), static_cast<UInt8>(false)) == filter.end();
}

String BitmapFilter::toDebugString() const
Expand All @@ -117,6 +117,6 @@ String BitmapFilter::toDebugString() const

size_t BitmapFilter::count() const
{
return std::count(filter.cbegin(), filter.cend(), true);
return std::count(filter.cbegin(), filter.cend(), static_cast<UInt8>(true));
}
} // namespace DB::DM
2 changes: 1 addition & 1 deletion dbms/src/Storages/DeltaMerge/BitmapFilter/BitmapFilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class BitmapFilter
private:
void set(std::span<const UInt32> row_ids, const FilterPtr & f);

std::vector<bool> filter;
std::vector<UInt8> filter;
bool all_match;
};

Expand Down
166 changes: 166 additions & 0 deletions dbms/src/Storages/DeltaMerge/BitmapFilter/tests/bench_bitmap.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
// Copyright 2024 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <Core/Defines.h>
#include <benchmark/benchmark.h>

#include <random>
#include <vector>

namespace DB::bench
{
constexpr size_t TEST_BITMAP_SIZE = 65536;

template <typename T>
void bitmapAndStd(benchmark::State & state)
{
std::vector<T> a(TEST_BITMAP_SIZE);
std::vector<T> b(TEST_BITMAP_SIZE);
for (auto _ : state)
{
std::array<T, TEST_BITMAP_SIZE> c;
std::transform(a.begin(), a.end(), b.begin(), c.begin(), [](const auto i, const auto j) { return i && j; });
benchmark::DoNotOptimize(c);
}
}

static void bitmapAndBool(benchmark::State & state)
{
bitmapAndStd<bool>(state);
}

static void bitmapAndUInt8(benchmark::State & state)
{
bitmapAndStd<UInt8>(state);
}

std::vector<UInt32> genRandomRowIDs(size_t rowid_count, size_t bitmap_size)
{
std::vector<UInt32> row_ids(rowid_count);
std::random_device rd;
std::mt19937 gen(rd());
for (auto & id : row_ids)
{
id = gen() % bitmap_size;
}
return row_ids;
}

template <typename T>
void bitmapSetRowID(benchmark::State & state)
{
constexpr size_t rowid_count = 45678;
auto row_ids = genRandomRowIDs(rowid_count, TEST_BITMAP_SIZE);
for (auto _ : state)
{
std::vector<T> v(TEST_BITMAP_SIZE, static_cast<T>(0));
for (auto id : row_ids)
{
v[id] = static_cast<T>(1);
}
benchmark::DoNotOptimize(v);
}
}

static void bitmapSetRowIDBool(benchmark::State & state)
{
bitmapSetRowID<bool>(state);
}

static void bitmapSetRowIDUInt8(benchmark::State & state)
{
bitmapSetRowID<UInt8>(state);
}

template <typename T>
std::vector<T> buildBitmapByRanges(const std::vector<std::pair<size_t, size_t>> & ranges, size_t bitmap_size)
{
std::vector<T> v(bitmap_size, static_cast<T>(0));
for (auto [start, limit] : ranges)
{
std::fill_n(v.begin() + start, limit, 1);
}
return v;
}

template <typename T>
void bitmapSetRange(benchmark::State & state)
{
const std::vector<std::pair<size_t, size_t>> set_ranges = {
{0, 8192},
{8192 * 2, 8192},
{8192 * 5, 8192},
};
for (auto _ : state)
{
auto v = buildBitmapByRanges<T>(set_ranges, TEST_BITMAP_SIZE);
benchmark::DoNotOptimize(v);
}
}

static void bitmapSetRangeBool(benchmark::State & state)
{
bitmapSetRange<bool>(state);
}

static void bitmapSetRangeUInt8(benchmark::State & state)
{
bitmapSetRange<UInt8>(state);
}

template <typename T>
void bitmapGetRange(benchmark::State & state)
{
const std::vector<std::pair<size_t, size_t>> set_ranges = {
{0, 8192},
{8192 * 2, 8192},
{8192 * 5, 8192},
};
auto v = buildBitmapByRanges<T>(set_ranges, TEST_BITMAP_SIZE);

const std::vector<std::pair<size_t, size_t>> get_ranges = {
{1234, 8192},
{8192 * 2 + 1234, 8192},
{8192 * 5 + 1234, 8192},
};
for (auto _ : state)
{
std::array<T, 8192> f;
for (auto [start, limit] : get_ranges)
{
std::copy(v.begin() + start, v.begin() + start + limit, f.begin());
}
benchmark::DoNotOptimize(f);
}
}

static void bitmapGetRangeBool(benchmark::State & state)
{
bitmapGetRange<bool>(state);
}

static void bitmapGetRangeUInt8(benchmark::State & state)
{
bitmapGetRange<UInt8>(state);
}

BENCHMARK(bitmapAndBool);
BENCHMARK(bitmapAndUInt8);
BENCHMARK(bitmapSetRowIDBool);
BENCHMARK(bitmapSetRowIDUInt8);
BENCHMARK(bitmapSetRangeBool);
BENCHMARK(bitmapSetRangeUInt8);
BENCHMARK(bitmapGetRangeBool);
BENCHMARK(bitmapGetRangeUInt8);
} // namespace DB::bench
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ class VectorIndexBenchUtils
auto index_def = dataset.createIndexDef(Builder::kind());
auto builder = std::make_unique<Builder>(0, index_def);
builder->addBlock(*train_data, nullptr, []() { return true; });
builder->save(index_path);
builder->saveToFile(index_path);
}

template <typename Viewer>
Expand Down

0 comments on commit b36199e

Please sign in to comment.