Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Storages: Use std::vector<UInt8> in BitmapFilter #9552

Merged
merged 6 commits into from
Oct 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions dbms/src/Storages/DeltaMerge/BitmapFilter/BitmapFilter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
namespace DB::DM
{
BitmapFilter::BitmapFilter(UInt32 size_, bool default_value)
: filter(size_, default_value)
: filter(size_, static_cast<UInt8>(default_value))
, all_match(default_value)
{}

Expand Down Expand Up @@ -58,7 +58,7 @@ void BitmapFilter::set(std::span<const UInt32> row_ids, const FilterPtr & f)
else
{
RUNTIME_CHECK(row_ids.size() == f->size(), row_ids.size(), f->size());
for (UInt32 i = 0; i < row_ids.size(); i++)
for (UInt32 i = 0; i < row_ids.size(); ++i)
{
filter[row_ids[i]] = (*f)[i];
}
Expand All @@ -68,15 +68,15 @@ void BitmapFilter::set(std::span<const UInt32> row_ids, const FilterPtr & f)
void BitmapFilter::set(UInt32 start, UInt32 limit, bool value)
{
RUNTIME_CHECK(start + limit <= filter.size(), start, limit, filter.size());
std::fill_n(filter.begin() + start, limit, value);
std::fill_n(filter.begin() + start, limit, static_cast<UInt8>(value));
}

bool BitmapFilter::get(IColumn::Filter & f, UInt32 start, UInt32 limit) const
{
RUNTIME_CHECK(start + limit <= filter.size(), start, limit, filter.size());
auto begin = filter.cbegin() + start;
auto end = filter.cbegin() + start + limit;
if (all_match || std::find(begin, end, false) == end)
if (all_match || std::find(begin, end, static_cast<UInt8>(false)) == end)
{
return true;
}
Expand All @@ -93,13 +93,13 @@ void BitmapFilter::rangeAnd(IColumn::Filter & f, UInt32 start, UInt32 limit) con
auto begin = filter.cbegin() + start;
if (!all_match)
{
std::transform(f.begin(), f.end(), begin, f.begin(), [](const UInt8 a, const bool b) { return a != 0 && b; });
std::transform(f.begin(), f.end(), begin, f.begin(), [](const auto a, const auto b) { return a && b; });
}
}

void BitmapFilter::runOptimize()
{
all_match = std::find(filter.begin(), filter.end(), false) == filter.end();
all_match = std::find(filter.begin(), filter.end(), static_cast<UInt8>(false)) == filter.end();
}

String BitmapFilter::toDebugString() const
Expand All @@ -117,6 +117,6 @@ String BitmapFilter::toDebugString() const

size_t BitmapFilter::count() const
{
return std::count(filter.cbegin(), filter.cend(), true);
return std::count(filter.cbegin(), filter.cend(), static_cast<UInt8>(true));
}
} // namespace DB::DM
2 changes: 1 addition & 1 deletion dbms/src/Storages/DeltaMerge/BitmapFilter/BitmapFilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class BitmapFilter
private:
void set(std::span<const UInt32> row_ids, const FilterPtr & f);

std::vector<bool> filter;
std::vector<UInt8> filter;
bool all_match;
};

Expand Down
166 changes: 166 additions & 0 deletions dbms/src/Storages/DeltaMerge/BitmapFilter/tests/bench_bitmap.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
// Copyright 2024 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <Core/Defines.h>
#include <benchmark/benchmark.h>

#include <random>
#include <vector>

namespace DB::bench
{
constexpr size_t TEST_BITMAP_SIZE = 65536;

template <typename T>
void bitmapAndStd(benchmark::State & state)
{
std::vector<T> a(TEST_BITMAP_SIZE);
std::vector<T> b(TEST_BITMAP_SIZE);
for (auto _ : state)
{
std::array<T, TEST_BITMAP_SIZE> c;
std::transform(a.begin(), a.end(), b.begin(), c.begin(), [](const auto i, const auto j) { return i && j; });
benchmark::DoNotOptimize(c);
}
}

static void bitmapAndBool(benchmark::State & state)
{
bitmapAndStd<bool>(state);
}

static void bitmapAndUInt8(benchmark::State & state)
{
bitmapAndStd<UInt8>(state);
}

std::vector<UInt32> genRandomRowIDs(size_t rowid_count, size_t bitmap_size)
{
std::vector<UInt32> row_ids(rowid_count);
std::random_device rd;
std::mt19937 gen(rd());
for (auto & id : row_ids)
{
id = gen() % bitmap_size;
}
return row_ids;
}

template <typename T>
void bitmapSetRowID(benchmark::State & state)
{
constexpr size_t rowid_count = 45678;
auto row_ids = genRandomRowIDs(rowid_count, TEST_BITMAP_SIZE);
for (auto _ : state)
{
std::vector<T> v(TEST_BITMAP_SIZE, static_cast<T>(0));
for (auto id : row_ids)
{
v[id] = static_cast<T>(1);
}
benchmark::DoNotOptimize(v);
}
}

static void bitmapSetRowIDBool(benchmark::State & state)
{
bitmapSetRowID<bool>(state);
}

static void bitmapSetRowIDUInt8(benchmark::State & state)
{
bitmapSetRowID<UInt8>(state);
}

template <typename T>
std::vector<T> buildBitmapByRanges(const std::vector<std::pair<size_t, size_t>> & ranges, size_t bitmap_size)
{
std::vector<T> v(bitmap_size, static_cast<T>(0));
for (auto [start, limit] : ranges)
{
std::fill_n(v.begin() + start, limit, 1);
}
return v;
}

template <typename T>
void bitmapSetRange(benchmark::State & state)
{
const std::vector<std::pair<size_t, size_t>> set_ranges = {
{0, 8192},
{8192 * 2, 8192},
{8192 * 5, 8192},
};
for (auto _ : state)
{
auto v = buildBitmapByRanges<T>(set_ranges, TEST_BITMAP_SIZE);
benchmark::DoNotOptimize(v);
}
}

static void bitmapSetRangeBool(benchmark::State & state)
{
bitmapSetRange<bool>(state);
}

static void bitmapSetRangeUInt8(benchmark::State & state)
{
bitmapSetRange<UInt8>(state);
}

template <typename T>
void bitmapGetRange(benchmark::State & state)
{
const std::vector<std::pair<size_t, size_t>> set_ranges = {
{0, 8192},
{8192 * 2, 8192},
{8192 * 5, 8192},
};
auto v = buildBitmapByRanges<T>(set_ranges, TEST_BITMAP_SIZE);

const std::vector<std::pair<size_t, size_t>> get_ranges = {
{1234, 8192},
{8192 * 2 + 1234, 8192},
{8192 * 5 + 1234, 8192},
};
for (auto _ : state)
{
std::array<T, 8192> f;
for (auto [start, limit] : get_ranges)
{
std::copy(v.begin() + start, v.begin() + start + limit, f.begin());
}
benchmark::DoNotOptimize(f);
}
}

static void bitmapGetRangeBool(benchmark::State & state)
{
bitmapGetRange<bool>(state);
}

static void bitmapGetRangeUInt8(benchmark::State & state)
{
bitmapGetRange<UInt8>(state);
}

BENCHMARK(bitmapAndBool);
BENCHMARK(bitmapAndUInt8);
BENCHMARK(bitmapSetRowIDBool);
BENCHMARK(bitmapSetRowIDUInt8);
BENCHMARK(bitmapSetRangeBool);
BENCHMARK(bitmapSetRangeUInt8);
BENCHMARK(bitmapGetRangeBool);
BENCHMARK(bitmapGetRangeUInt8);
} // namespace DB::bench
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ class VectorIndexBenchUtils
auto index_def = dataset.createIndexDef(Builder::kind());
auto builder = std::make_unique<Builder>(0, index_def);
builder->addBlock(*train_data, nullptr, []() { return true; });
builder->save(index_path);
builder->saveToFile(index_path);
}

template <typename Viewer>
Expand Down