Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Storage: row ID generation for MVCC bitmap filter #6458

Merged
merged 66 commits into from
Jan 31, 2023
Merged
Show file tree
Hide file tree
Changes from 51 commits
Commits
Show all changes
66 commits
Select commit Hold shift + click to select a range
0ff2ac8
bitmap filter
JinheLin Jul 29, 2022
0e9d3cf
test
JinheLin Nov 21, 2022
aab511e
Fix delta row id.
JinheLin Nov 21, 2022
032f18c
test
JinheLin Nov 22, 2022
3d64091
for test
JinheLin Nov 23, 2022
cac5cd8
format
JinheLin Nov 25, 2022
6115761
format
JinheLin Nov 29, 2022
69e90bc
ci
JinheLin Nov 29, 2022
3496ef4
snapshot
JinheLin Nov 30, 2022
d59c7a8
Clean read.
JinheLin Nov 30, 2022
7e39641
fix
JinheLin Dec 2, 2022
bda0352
fix test
JinheLin Dec 2, 2022
ad315e4
ci
JinheLin Dec 5, 2022
aeff8ae
ci
JinheLin Dec 5, 2022
241c467
ci
JinheLin Dec 5, 2022
47bf40f
test
JinheLin Dec 6, 2022
f323376
test
JinheLin Dec 6, 2022
4c52024
test
JinheLin Dec 7, 2022
517b941
test
JinheLin Dec 7, 2022
d6d5613
ci
JinheLin Dec 7, 2022
9dfc9c1
test
JinheLin Dec 7, 2022
41ac4ca
test
JinheLin Dec 7, 2022
6131946
test
JinheLin Dec 7, 2022
0a47978
ci
JinheLin Dec 14, 2022
e1e4ab6
fix
JinheLin Dec 15, 2022
b911d05
ci
JinheLin Dec 15, 2022
605ff44
optimization
JinheLin Dec 15, 2022
687077b
remove useless code
JinheLin Dec 16, 2022
6184f2d
ci
JinheLin Dec 16, 2022
079ac77
ci
JinheLin Dec 16, 2022
29dd419
ci
JinheLin Dec 16, 2022
920b404
fix
JinheLin Dec 16, 2022
0acfa16
format
JinheLin Dec 16, 2022
97f9e26
filter
JinheLin Dec 19, 2022
4ce09e7
Fix scheduling.
JinheLin Dec 16, 2022
71623ea
ci
JinheLin Dec 20, 2022
e087c64
stable only
JinheLin Dec 21, 2022
9d5d698
ci
JinheLin Dec 30, 2022
7f5e415
ci
JinheLin Dec 30, 2022
4859f34
format
JinheLin Dec 30, 2022
84f44bc
fix
JinheLin Jan 4, 2023
dbf95ba
fix
JinheLin Jan 4, 2023
7d4be45
fix
JinheLin Jan 4, 2023
7c69fd9
test
Jan 9, 2023
a606558
test
JinheLin Jan 11, 2023
5d894bc
ci
JinheLin Jan 11, 2023
cc7a730
Fix test.
JinheLin Jan 12, 2023
91279c6
test
JinheLin Jan 12, 2023
7f7fbf1
ci
JinheLin Jan 12, 2023
090d8a1
fix lint.
JinheLin Jan 12, 2023
dbc1c1a
Resolve conflict.
JinheLin Jan 12, 2023
3b59088
remove useless file.
JinheLin Jan 13, 2023
71375b6
test
JinheLin Jan 13, 2023
322d83e
Remove useless.
JinheLin Jan 17, 2023
d60293c
set res_filter to null when all rows match.
JinheLin Jan 17, 2023
c0124bc
fix
JinheLin Jan 17, 2023
c04fb30
Update dbms/src/Storages/DeltaMerge/RowKeyFilter.h
JinheLin Jan 17, 2023
8483878
fix
JinheLin Jan 17, 2023
7cc7101
remove __func__
JinheLin Jan 17, 2023
bd11cc1
remove useless
JinheLin Jan 29, 2023
525650f
remove snapshot from BitmapFilter.
JinheLin Jan 29, 2023
6546115
add comment.
JinheLin Jan 29, 2023
332bcfe
fix comment.
JinheLin Jan 29, 2023
097d928
add comment.
JinheLin Jan 30, 2023
0ce7c8d
Merge branch 'master' into bitmap-filter-seg-row-id
ti-chi-bot Jan 31, 2023
3dd1d7f
Merge branch 'master' into bitmap-filter-seg-row-id
ti-chi-bot Jan 31, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions dbms/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ add_headers_and_sources(dbms src/Interpreters/ClusterProxy)
add_headers_and_sources(dbms src/Columns)
add_headers_and_sources(dbms src/Storages)
add_headers_and_sources(dbms src/Storages/DeltaMerge)
add_headers_and_sources(dbms src/Storages/DeltaMerge/BitmapFilter)
add_headers_and_sources(dbms src/Storages/DeltaMerge/Index)
add_headers_and_sources(dbms src/Storages/DeltaMerge/Filter)
add_headers_and_sources(dbms src/Storages/DeltaMerge/FilterParser)
Expand Down
1 change: 1 addition & 0 deletions dbms/src/Common/CurrentMetrics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
M(DT_SnapshotOfDeltaMerge) \
M(DT_SnapshotOfDeltaCompact) \
M(DT_SnapshotOfPlaceIndex) \
M(DT_SnapshotOfBitmapFilter) \
M(IOLimiterPendingBgWriteReq) \
M(IOLimiterPendingFgWriteReq) \
M(IOLimiterPendingBgReadReq) \
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Core/Block.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ size_t Block::rows() const
if (elem.column)
return elem.column->size();

return 0;
return segment_row_id_col != nullptr ? segment_row_id_col->size() : 0;
}


Expand Down
15 changes: 13 additions & 2 deletions dbms/src/Core/Block.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@ class Block
Container data;
IndexByName index_by_name;

// `start_offset` is the offset of first row in this Block.
// It is used for calculating `segment_row_id`.
UInt64 start_offset = 0;
// `segment_row_id_col` is a virtual column that represents the records' row id in the corresponding segment.
// Only used for calculating MVCC-bitmap-filter.
ColumnPtr segment_row_id_col;

public:
BlockInfo info;

Expand Down Expand Up @@ -110,8 +117,8 @@ class Block
/// Approximate number of allocated bytes in memory - for profiling and limits.
size_t allocatedBytes() const;

explicit operator bool() const { return !data.empty(); }
bool operator!() const { return data.empty(); }
explicit operator bool() const { return !data.empty() || segment_row_id_col != nullptr; }
bool operator!() const { return data.empty() && segment_row_id_col == nullptr; }

/** Get a list of column names separated by commas. */
std::string dumpNames() const;
Expand Down Expand Up @@ -149,6 +156,10 @@ class Block
*/
void updateHash(SipHash & hash) const;

void setStartOffset(UInt64 offset) { start_offset = offset; }
UInt64 startOffset() const { return start_offset; }
void setSegmentRowIdCol(ColumnPtr && col) { segment_row_id_col = col; }
ColumnPtr segmentRowIdCol() const { return segment_row_id_col; }

private:
void eraseImpl(size_t position);
Expand Down
1 change: 1 addition & 0 deletions dbms/src/Interpreters/Settings.h
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,7 @@ struct Settings
\
M(SettingDouble, dt_page_gc_threshold, 0.5, "Max valid rate of deciding to do a GC in PageStorage") \
M(SettingBool, dt_enable_read_thread, true, "Enable storage read thread or not") \
M(SettingBool, dt_enable_bitmap_filter, true, "Use bitmap filter to read data or not") \
\
M(SettingChecksumAlgorithm, dt_checksum_algorithm, ChecksumAlgo::XXH3, "Checksum algorithm for delta tree stable storage") \
M(SettingCompressionMethod, dt_compression_method, CompressionMethod::LZ4, "The method of data compression when writing.") \
Expand Down
130 changes: 130 additions & 0 deletions dbms/src/Storages/DeltaMerge/BitmapFilter/BitmapFilter.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
// Copyright 2022 PingCAP, Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <Storages/DeltaMerge/BitmapFilter/BitmapFilter.h>
#include <Storages/DeltaMerge/DeltaMergeHelpers.h>
#include <Storages/DeltaMerge/Segment.h>

namespace DB::DM
{
BitmapFilter::BitmapFilter(UInt32 size_, const SegmentSnapshotPtr & snapshot_, bool default_value)
: filter(size_, default_value)
, snap(snapshot_)
, all_match(default_value)
{}

void BitmapFilter::set(BlockInputStreamPtr & stream)
{
stream->readPrefix();
for (;;)
{
FilterPtr f = nullptr;
auto blk = stream->read(f, /*res_filter*/ true);
if (likely(blk))
{
set(blk.segmentRowIdCol(), f);
}
else
{
break;
}
}
stream->readSuffix();
}
Lloyd-Pottiger marked this conversation as resolved.
Show resolved Hide resolved

void BitmapFilter::set(const ColumnPtr & col, const FilterPtr & f)
{
const auto * v = toColumnVectorDataPtr<UInt32>(col);
set(v->data(), v->size(), f);
}

void BitmapFilter::set(const UInt32 * data, UInt32 size, const FilterPtr & f)
{
if (size == 0)
{
return;
}
//size_t max_row_id = *std::max_element(data, data + size);
//RUNTIME_CHECK(max_row_id < filter.size(), max_row_id, filter.size());
JinheLin marked this conversation as resolved.
Show resolved Hide resolved
if (!f)
{
for (UInt32 i = 0; i < size; i++)
{
UInt32 row_id = *(data + i);
filter[row_id] = true;
}
}
else
{
RUNTIME_CHECK(size == f->size(), size, f->size());
for (UInt32 i = 0; i < size; i++)
{
UInt32 row_id = *(data + i);
filter[row_id] = (*f)[i];
}
}
}

void BitmapFilter::set(UInt32 start, UInt32 limit)
{
RUNTIME_CHECK(start + limit <= filter.size(), start, limit, filter.size());
std::fill(filter.begin() + start, filter.begin() + start + limit, true);
}

bool BitmapFilter::get(IColumn::Filter & f, UInt32 start, UInt32 limit) const
{
RUNTIME_CHECK(start + limit <= filter.size(), start, limit, filter.size());
auto begin = filter.cbegin() + start;
auto end = filter.cbegin() + start + limit;
if (all_match || std::find(begin, end, false) == end)
{
//static const UInt8 match = 1;
//f.assign(static_cast<size_t>(limit), match);
JinheLin marked this conversation as resolved.
Show resolved Hide resolved
return true;
}
else
{
std::copy(begin, end, f.begin());
return false;
}
}

SegmentSnapshotPtr & BitmapFilter::snapshot()
{
return snap;
}

void BitmapFilter::runOptimize()
{
all_match = std::find(filter.begin(), filter.end(), false) == filter.end();
}
Lloyd-Pottiger marked this conversation as resolved.
Show resolved Hide resolved

String BitmapFilter::toDebugString() const
{
String s(filter.size(), '1');
for (UInt32 i = 0; i < filter.size(); i++)
{
if (!filter[i])
{
s[i] = '0';
}
}
return fmt::format("{}", s);
}
Lloyd-Pottiger marked this conversation as resolved.
Show resolved Hide resolved

size_t BitmapFilter::count() const
{
return std::count(filter.cbegin(), filter.cend(), true);
}
} // namespace DB::DM
51 changes: 51 additions & 0 deletions dbms/src/Storages/DeltaMerge/BitmapFilter/BitmapFilter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Copyright 2022 PingCAP, Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <Columns/IColumn.h>
#include <DataStreams/IBlockInputStream.h>

namespace DB::DM
{
struct SegmentSnapshot;
using SegmentSnapshotPtr = std::shared_ptr<SegmentSnapshot>;

class BitmapFilter
{
public:
BitmapFilter(UInt32 size_, const SegmentSnapshotPtr & snapshot_, bool default_value);

void set(BlockInputStreamPtr & stream);
void set(const ColumnPtr & col, const FilterPtr & f);
void set(const UInt32 * data, UInt32 size, const FilterPtr & f);
void set(UInt32 start, UInt32 limit);
// If return true, all data is match and do not fill the filter.
bool get(IColumn::Filter & f, UInt32 start, UInt32 limit) const;

SegmentSnapshotPtr & snapshot();

void runOptimize();

String toDebugString() const;
size_t count() const;

private:
std::vector<bool> filter;
SegmentSnapshotPtr snap;
JinheLin marked this conversation as resolved.
Show resolved Hide resolved
bool all_match;
};

using BitmapFilterPtr = std::shared_ptr<BitmapFilter>;
} // namespace DB::DM
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
// Copyright 2022 PingCAP, Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <Storages/DeltaMerge/BitmapFilter/BitmapFilter.h>
#include <Storages/DeltaMerge/BitmapFilter/BitmapFilterBlockInputStream.h>
#include <Storages/DeltaMerge/DeltaMergeHelpers.h>

namespace DB::DM
{
BitmapFilterBlockInputStream::BitmapFilterBlockInputStream(
const ColumnDefines & columns_to_read,
BlockInputStreamPtr stable_,
BlockInputStreamPtr delta_,
size_t stable_rows_,
size_t delta_rows_,
const BitmapFilterPtr & bitmap_filter_,
const String & req_id_)
: header(toEmptyBlock(columns_to_read))
, stable(stable_)
, delta(delta_)
, stable_rows(stable_rows_)
, delta_rows(delta_rows_)
, bitmap_filter(bitmap_filter_)
, log(Logger::get(NAME, req_id_))
{}

Block BitmapFilterBlockInputStream::readImpl(FilterPtr & res_filter, bool return_filter)
{
auto [block, from_delta] = readBlock();
if (block)
{
if (from_delta)
{
block.setStartOffset(block.startOffset() + stable_rows);
}

filter.resize(block.rows());
bool all_match = bitmap_filter->get(filter, block.startOffset(), block.rows());
if (!all_match)
{
if (return_filter)
{
res_filter = &filter;
}
else
{
for (auto & col : block)
{
col.column = col.column->filter(filter, block.rows());
}
}
}
JinheLin marked this conversation as resolved.
Show resolved Hide resolved
}
return block;
}

// <Block, from_delta>
std::pair<Block, bool> BitmapFilterBlockInputStream::readBlock()
{
if (stable == nullptr && delta == nullptr)
{
return {{}, false};
}

if (stable == nullptr)
{
return {delta->read(), true};
}

auto block = stable->read();
if (block)
{
return {block, false};
}
else
{
stable = nullptr;
if (delta != nullptr)
{
block = delta->read();
}
return {block, true};
}
}

} // namespace DB::DM
Loading