diff --git a/dbms/src/Columns/ColumnAggregateFunction.cpp b/dbms/src/Columns/ColumnAggregateFunction.cpp index 3b72b127081..fee5c7578b1 100644 --- a/dbms/src/Columns/ColumnAggregateFunction.cpp +++ b/dbms/src/Columns/ColumnAggregateFunction.cpp @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -143,7 +144,11 @@ ColumnPtr ColumnAggregateFunction::filter(const Filter & filter, ssize_t result_ auto & res_data = res->getData(); if (result_size_hint) - res_data.reserve(result_size_hint > 0 ? result_size_hint : size); + { + if (result_size_hint < 0) + result_size_hint = countBytesInFilter(filter); + res_data.reserve(result_size_hint); + } for (size_t i = 0; i < size; ++i) if (filter[i]) diff --git a/dbms/src/Columns/ColumnArray.h b/dbms/src/Columns/ColumnArray.h index 72327995403..10122aed4a1 100644 --- a/dbms/src/Columns/ColumnArray.h +++ b/dbms/src/Columns/ColumnArray.h @@ -87,6 +87,7 @@ class ColumnArray final : public COWPtrHelper void insertFrom(const IColumn & src_, size_t n) override; void insertDefault() override; void popBack(size_t n) override; + /// TODO: If result_size_hint < 0, makes reserve() using size of filtered column, not source column to avoid some OOM issues. ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; diff --git a/dbms/src/Columns/ColumnDecimal.cpp b/dbms/src/Columns/ColumnDecimal.cpp index b2775b46086..34d0336d3f3 100644 --- a/dbms/src/Columns/ColumnDecimal.cpp +++ b/dbms/src/Columns/ColumnDecimal.cpp @@ -306,7 +306,11 @@ ColumnPtr ColumnDecimal::filter(const IColumn::Filter & filt, ssize_t result_ Container & res_data = res->getData(); if (result_size_hint) - res_data.reserve(result_size_hint > 0 ? result_size_hint : size); + { + if (result_size_hint < 0) + result_size_hint = countBytesInFilter(filt); + res_data.reserve(result_size_hint); + } const UInt8 * filt_pos = filt.data(); const UInt8 * filt_end = filt_pos + size; diff --git a/dbms/src/Columns/ColumnFixedString.cpp b/dbms/src/Columns/ColumnFixedString.cpp index 1d3070f128b..2cc6781df34 100644 --- a/dbms/src/Columns/ColumnFixedString.cpp +++ b/dbms/src/Columns/ColumnFixedString.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -205,7 +206,11 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result auto res = ColumnFixedString::create(n); if (result_size_hint) - res->chars.reserve(result_size_hint > 0 ? result_size_hint * n : chars.size()); + { + if (result_size_hint < 0) + result_size_hint = countBytesInFilter(filt); + res->chars.reserve(result_size_hint * n); + } const UInt8 * filt_pos = &filt[0]; const UInt8 * filt_end = filt_pos + col_size; diff --git a/dbms/src/Columns/ColumnVector.cpp b/dbms/src/Columns/ColumnVector.cpp index 7b7785940d3..e615847a308 100644 --- a/dbms/src/Columns/ColumnVector.cpp +++ b/dbms/src/Columns/ColumnVector.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -213,7 +214,11 @@ ColumnPtr ColumnVector::filter(const IColumn::Filter & filt, ssize_t result_s Container & res_data = res->getData(); if (result_size_hint) - res_data.reserve(result_size_hint > 0 ? result_size_hint : size); + { + if (result_size_hint < 0) + result_size_hint = countBytesInFilter(filt); + res_data.reserve(result_size_hint); + } const UInt8 * filt_pos = &filt[0]; const UInt8 * filt_end = filt_pos + size; diff --git a/dbms/src/Columns/ColumnsCommon.cpp b/dbms/src/Columns/ColumnsCommon.cpp index e2d2a130313..e969dc99842 100644 --- a/dbms/src/Columns/ColumnsCommon.cpp +++ b/dbms/src/Columns/ColumnsCommon.cpp @@ -148,9 +148,9 @@ struct ResultOffsetsBuilder : res_offsets(*res_offsets_) {} - void reserve(ssize_t result_size_hint, size_t src_size) + void reserve(size_t result_size_hint) { - res_offsets.reserve(result_size_hint > 0 ? result_size_hint : src_size); + res_offsets.reserve(result_size_hint); } void insertOne(size_t array_size) @@ -191,7 +191,7 @@ struct ResultOffsetsBuilder struct NoResultOffsetsBuilder { explicit NoResultOffsetsBuilder(IColumn::Offsets *) {} - void reserve(ssize_t, size_t) {} + void reserve(size_t) {} void insertOne(size_t) {} template @@ -221,11 +221,12 @@ void filterArraysImplGeneric( if (result_size_hint) { - result_offsets_builder.reserve(result_size_hint, size); - if (result_size_hint < 0) - res_elems.reserve(src_elems.size()); - else if (result_size_hint < 1000000000 && src_elems.size() < 1000000000) /// Avoid overflow. + result_size_hint = countBytesInFilter(filt); + + result_offsets_builder.reserve(result_size_hint); + + if (result_size_hint < 1000000000 && src_elems.size() < 1000000000) /// Avoid overflow. res_elems.reserve((result_size_hint * src_elems.size() + size - 1) / size); } diff --git a/dbms/src/Columns/IColumn.h b/dbms/src/Columns/IColumn.h index 9a5c2c9dc63..a84a913613d 100644 --- a/dbms/src/Columns/IColumn.h +++ b/dbms/src/Columns/IColumn.h @@ -216,7 +216,7 @@ class IColumn : public COWPtr * Is used in WHERE and HAVING operations. * If result_size_hint > 0, then makes advance reserve(result_size_hint) for the result column; * if 0, then don't makes reserve(), - * otherwise (i.e. < 0), makes reserve() using size of source column. + * otherwise (i.e. < 0), makes reserve() using size of filtered column. */ using Filter = PaddedPODArray; virtual Ptr filter(const Filter & filt, ssize_t result_size_hint) const = 0; diff --git a/dbms/src/Common/FailPoint.cpp b/dbms/src/Common/FailPoint.cpp index af93d4c9cd3..9a79fd334d5 100644 --- a/dbms/src/Common/FailPoint.cpp +++ b/dbms/src/Common/FailPoint.cpp @@ -58,7 +58,6 @@ std::unordered_map> FailPointHelper::f M(exception_during_mpp_root_task_run) \ M(exception_during_write_to_storage) \ M(force_set_sst_to_dtfile_block_size) \ - M(force_set_sst_decode_rand) \ M(exception_before_page_file_write_sync) \ M(force_set_segment_ingest_packs_fail) \ M(segment_merge_after_ingest_packs) \ diff --git a/dbms/src/DataStreams/LimitBlockInputStream.cpp b/dbms/src/DataStreams/LimitBlockInputStream.cpp index 13ed3d25929..c631d457ab3 100644 --- a/dbms/src/DataStreams/LimitBlockInputStream.cpp +++ b/dbms/src/DataStreams/LimitBlockInputStream.cpp @@ -22,9 +22,11 @@ namespace DB LimitBlockInputStream::LimitBlockInputStream( const BlockInputStreamPtr & input, size_t limit_, + size_t offset_, const String & req_id) : log(Logger::get(req_id)) - , action(input->getHeader(), limit_) + , limit(limit_) + , offset(offset_) { children.push_back(input); } @@ -32,20 +34,46 @@ LimitBlockInputStream::LimitBlockInputStream( Block LimitBlockInputStream::readImpl() { - Block res = children.back()->read(); + Block res; + size_t rows = 0; - if (action.transform(res)) + if (pos >= offset + limit) { return res; } - else + + do { - return {}; - } + res = children.back()->read(); + if (!res) + return res; + rows = res.rows(); + pos += rows; + } while (pos <= offset); + + /// give away the whole block + if (pos >= offset + rows && pos <= offset + limit) + return res; + + /// give away a piece of the block + UInt64 start = std::max( + static_cast(0), + static_cast(offset) - static_cast(pos) + static_cast(rows)); + + UInt64 length = std::min( + static_cast(limit), + std::min( + static_cast(pos) - static_cast(offset), + static_cast(limit) + static_cast(offset) - static_cast(pos) + static_cast(rows))); + + for (size_t i = 0; i < res.columns(); ++i) + res.safeGetByPosition(i).column = res.safeGetByPosition(i).column->cut(start, length); + + return res; } void LimitBlockInputStream::appendInfo(FmtBuffer & buffer) const { - buffer.fmtAppend(", limit = {}", action.getLimit()); + buffer.fmtAppend(", limit = {}", limit); } } // namespace DB diff --git a/dbms/src/DataStreams/LimitBlockInputStream.h b/dbms/src/DataStreams/LimitBlockInputStream.h index c749ef30800..61d08ff2223 100644 --- a/dbms/src/DataStreams/LimitBlockInputStream.h +++ b/dbms/src/DataStreams/LimitBlockInputStream.h @@ -34,6 +34,7 @@ class LimitBlockInputStream : public IProfilingBlockInputStream LimitBlockInputStream( const BlockInputStreamPtr & input, size_t limit_, + size_t offset_, const String & req_id); String getName() const override { return NAME; } @@ -46,7 +47,10 @@ class LimitBlockInputStream : public IProfilingBlockInputStream private: LoggerPtr log; - LocalLimitTransformAction action; + size_t limit; + size_t offset; + /// how many lines were read, including the last read block + size_t pos = 0; }; } // namespace DB diff --git a/dbms/src/DataStreams/LimitTransformAction.cpp b/dbms/src/DataStreams/LimitTransformAction.cpp index 1fe4d06e520..c59f22d72df 100644 --- a/dbms/src/DataStreams/LimitTransformAction.cpp +++ b/dbms/src/DataStreams/LimitTransformAction.cpp @@ -33,23 +33,6 @@ void cut(Block & block, size_t rows [[maybe_unused]], size_t limit, size_t pos) } } // namespace -bool LocalLimitTransformAction::transform(Block & block) -{ - if (unlikely(!block)) - return true; - - /// pos - how many lines were read, including the last read block - if (pos >= limit) - return false; - - auto rows = block.rows(); - pos += rows; - if (pos > limit) - cut(block, rows, limit, pos); - // for pos <= limit, give away the whole block - return true; -} - bool GlobalLimitTransformAction::transform(Block & block) { if (unlikely(!block)) diff --git a/dbms/src/DataStreams/LimitTransformAction.h b/dbms/src/DataStreams/LimitTransformAction.h index e158f826c4a..51efe9c416d 100644 --- a/dbms/src/DataStreams/LimitTransformAction.h +++ b/dbms/src/DataStreams/LimitTransformAction.h @@ -20,28 +20,6 @@ namespace DB { -struct LocalLimitTransformAction -{ -public: - LocalLimitTransformAction( - const Block & header_, - size_t limit_) - : header(header_) - , limit(limit_) - { - } - - bool transform(Block & block); - - Block getHeader() const { return header; } - size_t getLimit() const { return limit; } - -private: - const Block header; - const size_t limit; - size_t pos = 0; -}; - struct GlobalLimitTransformAction { public: diff --git a/dbms/src/DataStreams/MarkInCompressedFile.h b/dbms/src/DataStreams/MarkInCompressedFile.h index ee7917dc509..0d1ae8314df 100644 --- a/dbms/src/DataStreams/MarkInCompressedFile.h +++ b/dbms/src/DataStreams/MarkInCompressedFile.h @@ -14,11 +14,11 @@ #pragma once -#include - +#include #include #include -#include + +#include namespace DB @@ -50,28 +50,4 @@ struct MarkInCompressedFile using MarksInCompressedFile = PODArray; using MarksInCompressedFilePtr = std::shared_ptr; - -struct MarkWithSizeInCompressedFile -{ - MarkInCompressedFile mark; - size_t mark_size; - - bool operator==(const MarkWithSizeInCompressedFile & rhs) const - { - return std::tie(mark, mark_size) == std::tie(rhs.mark, rhs.mark_size); - } - bool operator!=(const MarkWithSizeInCompressedFile & rhs) const - { - return !(*this == rhs); - } - - String toString() const - { - return "(" + mark.toString() + "," + DB::toString(mark_size) + ")"; - } -}; - -using MarkWithSizesInCompressedFile = PODArray; -using MarkWithSizesInCompressedFilePtr = std::shared_ptr; - -} +} // namespace DB diff --git a/dbms/src/Debug/MockRaftStoreProxy.cpp b/dbms/src/Debug/MockRaftStoreProxy.cpp index 727b2a2d4c7..17c52d9029e 100644 --- a/dbms/src/Debug/MockRaftStoreProxy.cpp +++ b/dbms/src/Debug/MockRaftStoreProxy.cpp @@ -623,11 +623,6 @@ void MockRaftStoreProxy::snapshot( // The new entry is committed on Proxy's side. region->updateCommitIndex(index); - auto ori_snapshot_apply_method = kvs.snapshot_apply_method; - kvs.snapshot_apply_method = TiDB::SnapshotApplyMethod::DTFile_Single; - SCOPE_EXIT({ - kvs.snapshot_apply_method = ori_snapshot_apply_method; - }); std::vector ssts; for (auto & cf : cfs) { diff --git a/dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp b/dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp index 2100f31bb60..c0ab09c73ac 100644 --- a/dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp +++ b/dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp @@ -46,7 +46,6 @@ namespace DB namespace FailPoints { extern const char force_set_sst_to_dtfile_block_size[]; -extern const char force_set_sst_decode_rand[]; extern const char force_set_safepoint_when_decode_block[]; } // namespace FailPoints @@ -422,7 +421,6 @@ void MockRaftCommand::dbgFuncIngestSST(Context & context, const ASTs & args, DBG auto & kvstore = tmt.getKVStore(); auto region = kvstore->getRegion(region_id); - FailPointHelper::enableFailPoint(FailPoints::force_set_sst_decode_rand); // Register some mock SST reading methods so that we can decode data in `MockSSTReader::MockSSTData` RegionMockTest mock_test(kvstore.get(), region); diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index 2e0a54a3e4f..9fcc310d1bc 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -712,11 +712,11 @@ void DAGQueryBlockInterpreter::executeLimit(DAGPipeline & pipeline) limit = query_block.limit_or_topn->limit().limit(); else limit = query_block.limit_or_topn->topn().limit(); - pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, limit, log->identifier()); }); + pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, limit, /*offset*/ 0, log->identifier()); }); if (pipeline.hasMoreThanOneStream()) { executeUnion(pipeline, max_streams, log, false, "for partial limit"); - pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, limit, log->identifier()); }); + pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, limit, /*offset*/ 0, log->identifier()); }); } } diff --git a/dbms/src/Flash/Coprocessor/StreamingDAGResponseWriter.cpp b/dbms/src/Flash/Coprocessor/StreamingDAGResponseWriter.cpp index 9fc78cfeb1c..feb50a17ada 100644 --- a/dbms/src/Flash/Coprocessor/StreamingDAGResponseWriter.cpp +++ b/dbms/src/Flash/Coprocessor/StreamingDAGResponseWriter.cpp @@ -96,21 +96,20 @@ void StreamingDAGResponseWriter::encodeThenWriteBlocks() if (dag_context.encode_type == tipb::EncodeType::TypeCHBlock) { /// passthrough data to a non-TiFlash node, like sending data to TiSpark - while (!blocks.empty()) + for (auto & block : blocks) { - const auto & block = blocks.back(); chunk_codec_stream->encode(block, 0, block.rows()); - blocks.pop_back(); + block.clear(); response.addChunk(chunk_codec_stream->getString()); chunk_codec_stream->clear(); } + blocks.clear(); } else /// passthrough data to a TiDB node { Int64 current_records_num = 0; - while (!blocks.empty()) + for (auto & block : blocks) { - const auto & block = blocks.back(); size_t rows = block.rows(); for (size_t row_index = 0; row_index < rows;) { @@ -125,8 +124,9 @@ void StreamingDAGResponseWriter::encodeThenWriteBlocks() current_records_num += (upper - row_index); row_index = upper; } - blocks.pop_back(); + block.clear(); } + blocks.clear(); if (current_records_num > 0) { diff --git a/dbms/src/Flash/Coprocessor/tests/gtest_ti_remote_block_inputstream.cpp b/dbms/src/Flash/Coprocessor/tests/gtest_ti_remote_block_inputstream.cpp index 5fe4a7d8c80..b911ca5855e 100644 --- a/dbms/src/Flash/Coprocessor/tests/gtest_ti_remote_block_inputstream.cpp +++ b/dbms/src/Flash/Coprocessor/tests/gtest_ti_remote_block_inputstream.cpp @@ -26,11 +26,13 @@ #include #include #include +#include #include #include #include #include +#include namespace DB @@ -247,7 +249,15 @@ struct MockReceiverContext grpc::CompletionQueue *, UnaryCallback *) const {} - void establishMPPConnectionLocal(const MockReceiverContext::Request &, size_t, LocalRequestHandler &, bool) {} + static ExchangePacketReaderPtr makeReader(const Request &) { return nullptr; } + + static std::tuple establishMPPConnectionLocalV1(const ::mpp::EstablishMPPConnectionRequest *, const std::shared_ptr &) + { + // Useless, just for compilation + return std::make_pair(MPPTunnelPtr(), grpc::Status::CANCELLED); + } + + void establishMPPConnectionLocalV2(const Request &, size_t, LocalRequestHandler &, bool) {} PacketQueuePtr queue; std::vector field_types{}; @@ -440,7 +450,8 @@ class TestTiRemoteBlockInputStream : public testing::Test 1, "mock_req_id", "mock_exchange_receiver_id", - 0); + 0, + 2); auto receiver_stream = std::make_shared( receiver, "mock_req_id", diff --git a/dbms/src/Flash/Mpp/ExchangeReceiver.cpp b/dbms/src/Flash/Mpp/ExchangeReceiver.cpp index 7d1cfd7c620..6e254bd62dc 100644 --- a/dbms/src/Flash/Mpp/ExchangeReceiver.cpp +++ b/dbms/src/Flash/Mpp/ExchangeReceiver.cpp @@ -507,7 +507,7 @@ void ExchangeReceiverBase::setUpConnection() } template -void ExchangeReceiverBase::setUpConnectionWithReadLoop(ExchangeRecvRequest && req) +void ExchangeReceiverBase::setUpConnectionWithReadLoop(Request && req) { thread_manager->schedule(true, "Receiver", [this, req = std::move(req)] { if (enable_fine_grained_shuffle_flag) diff --git a/dbms/src/Flash/Mpp/ExchangeReceiver.h b/dbms/src/Flash/Mpp/ExchangeReceiver.h index 7e8f243d29d..8cd72e1c4cb 100644 --- a/dbms/src/Flash/Mpp/ExchangeReceiver.h +++ b/dbms/src/Flash/Mpp/ExchangeReceiver.h @@ -165,7 +165,7 @@ class ExchangeReceiverBase void connectionLocalDone(); void handleConnectionAfterException(); - void setUpConnectionWithReadLoop(ExchangeRecvRequest && req); + void setUpConnectionWithReadLoop(Request && req); bool isReceiverForTiFlashStorage() { diff --git a/dbms/src/Flash/Mpp/FineGrainedShuffleWriter.cpp b/dbms/src/Flash/Mpp/FineGrainedShuffleWriter.cpp index b04851f8e30..f24d59942fd 100644 --- a/dbms/src/Flash/Mpp/FineGrainedShuffleWriter.cpp +++ b/dbms/src/Flash/Mpp/FineGrainedShuffleWriter.cpp @@ -143,17 +143,17 @@ void FineGrainedShuffleWriter::batchWriteFineGrainedShuffleIm assert(fine_grained_shuffle_stream_count <= 1024); HashBaseWriterHelper::materializeBlocks(blocks); - while (!blocks.empty()) + for (auto & block : blocks) { - const auto & block = blocks.back(); if constexpr (version != MPPDataPacketV0) { // check schema assertBlockSchema(expected_types, block, FineGrainedShuffleWriterLabels[MPPDataPacketV1]); } HashBaseWriterHelper::scatterColumnsForFineGrainedShuffle(block, partition_col_ids, collators, partition_key_containers_for_reuse, partition_num, fine_grained_shuffle_stream_count, hash, selector, scattered); - blocks.pop_back(); + block.clear(); } + blocks.clear(); // serialize each partitioned block and write it to its destination size_t part_id = 0; diff --git a/dbms/src/Flash/Mpp/GRPCReceiverContext.cpp b/dbms/src/Flash/Mpp/GRPCReceiverContext.cpp index 6fc95e7b164..985180a9ae1 100644 --- a/dbms/src/Flash/Mpp/GRPCReceiverContext.cpp +++ b/dbms/src/Flash/Mpp/GRPCReceiverContext.cpp @@ -285,13 +285,13 @@ void GRPCReceiverContext::establishMPPConnectionLocalV2( auto [tunnel, err_msg] = task_manager->findTunnelWithTimeout(request.req.get(), std::chrono::seconds(10)); checkLocalTunnel(tunnel, err_msg); - tunnel->connectLocal(source_index, local_request_handler, is_fine_grained); + tunnel->connectLocalV2(source_index, local_request_handler, is_fine_grained); } // TODO remove it in the future std::tuple GRPCReceiverContext::establishMPPConnectionLocalV1( const ::mpp::EstablishMPPConnectionRequest * request, - const std::shared_ptr & task_manager) const + const std::shared_ptr & task_manager) { std::chrono::seconds timeout(10); auto [tunnel, err_msg] = task_manager->findTunnelWithTimeout(request, timeout); @@ -303,7 +303,7 @@ std::tuple GRPCReceiverContext::establishMPPConnecti { return std::make_tuple(nullptr, grpc::Status(grpc::StatusCode::INTERNAL, "EstablishMPPConnectionLocal into a remote channel!")); } - tunnel->connectUnrefinedLocal(nullptr); + tunnel->connectLocalV1(nullptr); return std::make_tuple(tunnel, grpc::Status::OK); } diff --git a/dbms/src/Flash/Mpp/GRPCReceiverContext.h b/dbms/src/Flash/Mpp/GRPCReceiverContext.h index 6cb45f7c118..28a187b614d 100644 --- a/dbms/src/Flash/Mpp/GRPCReceiverContext.h +++ b/dbms/src/Flash/Mpp/GRPCReceiverContext.h @@ -105,7 +105,7 @@ class GRPCReceiverContext void establishMPPConnectionLocalV2(const ExchangeRecvRequest & request, size_t source_index, LocalRequestHandler & local_request_handler, bool is_fine_grained); - std::tuple establishMPPConnectionLocalV1(const ::mpp::EstablishMPPConnectionRequest * request, const std::shared_ptr & task_manager) const; + static std::tuple establishMPPConnectionLocalV1(const ::mpp::EstablishMPPConnectionRequest * request, const std::shared_ptr & task_manager); // Only for tiflash_compute mode, make sure disaggregated_dispatch_reqs is not empty. void sendMPPTaskToTiFlashStorageNode( diff --git a/dbms/src/Flash/Mpp/HashPartitionWriter.cpp b/dbms/src/Flash/Mpp/HashPartitionWriter.cpp index 18e87bdc1c2..a2f5c9c1c2d 100644 --- a/dbms/src/Flash/Mpp/HashPartitionWriter.cpp +++ b/dbms/src/Flash/Mpp/HashPartitionWriter.cpp @@ -154,16 +154,15 @@ void HashPartitionWriter::partitionAndWriteBlocksV1() std::vector> dest_columns(partition_num); size_t total_rows = 0; - while (!blocks.empty()) + for (auto & block : blocks) { - const auto & block = blocks.back(); { // check schema assertBlockSchema(expected_types, block, HashPartitionWriterLabels[MPPDataPacketV1]); } auto && dest_tbl_cols = HashBaseWriterHelper::createDestColumns(block, partition_num); HashBaseWriterHelper::scatterColumns(block, partition_col_ids, collators, partition_key_containers, partition_num, dest_tbl_cols); - blocks.pop_back(); + block.clear(); for (size_t part_id = 0; part_id < partition_num; ++part_id) { @@ -175,6 +174,7 @@ void HashPartitionWriter::partitionAndWriteBlocksV1() dest_columns[part_id].emplace_back(std::move(columns)); } } + blocks.clear(); RUNTIME_CHECK(rows_in_blocks, total_rows); for (size_t part_id = 0; part_id < partition_num; ++part_id) diff --git a/dbms/src/Flash/Mpp/MPPTunnel.cpp b/dbms/src/Flash/Mpp/MPPTunnel.cpp index 1db0b031907..5a56f47368e 100644 --- a/dbms/src/Flash/Mpp/MPPTunnel.cpp +++ b/dbms/src/Flash/Mpp/MPPTunnel.cpp @@ -204,14 +204,14 @@ void MPPTunnel::connectSync(PacketWriter * writer) LOG_DEBUG(log, "Sync tunnel connected"); } -void MPPTunnel::connectLocal(size_t source_index, LocalRequestHandler & local_request_handler, bool is_fine_grained) +void MPPTunnel::connectLocalV2(size_t source_index, LocalRequestHandler & local_request_handler, bool is_fine_grained) { { std::unique_lock lk(mu); RUNTIME_CHECK_MSG(status == TunnelStatus::Unconnected, fmt::format("MPPTunnel has connected or finished: {}", statusToString())); RUNTIME_CHECK_MSG(mode == TunnelSenderMode::LOCAL, "This should be a local tunnel"); - LOG_TRACE(log, "ready to connect local"); + LOG_TRACE(log, "ready to connect local tunnel version 2"); if (is_fine_grained) { local_tunnel_fine_grained_sender_v2 = std::make_shared>(source_index, local_request_handler, log, mem_tracker, tunnel_id); @@ -226,7 +226,7 @@ void MPPTunnel::connectLocal(size_t source_index, LocalRequestHandler & local_re status = TunnelStatus::Connected; cv_for_status_changed.notify_all(); } - LOG_DEBUG(log, "Local tunnel connected"); + LOG_DEBUG(log, "Local tunnel version 2 is connected"); } void MPPTunnel::connectAsync(IAsyncCallData * call_data) @@ -394,14 +394,14 @@ void SyncTunnelSender::startSendThread(PacketWriter * writer) } // TODO remove it in the future -void MPPTunnel::connectUnrefinedLocal(PacketWriter * writer) +void MPPTunnel::connectLocalV1(PacketWriter * writer) { { std::unique_lock lk(mu); if (status != TunnelStatus::Unconnected) throw Exception(fmt::format("MPPTunnel has connected or finished: {}", statusToString())); - LOG_TRACE(log, "ready to connect"); + LOG_TRACE(log, "ready to connect local tunnel version 1"); RUNTIME_ASSERT(writer == nullptr, log); local_tunnel_sender_v1 = std::make_shared(queue_size, mem_tracker, log, tunnel_id, &data_size_in_queue); @@ -410,7 +410,7 @@ void MPPTunnel::connectUnrefinedLocal(PacketWriter * writer) status = TunnelStatus::Connected; cv_for_status_changed.notify_all(); } - LOG_DEBUG(log, "connected"); + LOG_DEBUG(log, "Local tunnel version 1 is connected"); } std::shared_ptr LocalTunnelSenderV1::readForLocal() diff --git a/dbms/src/Flash/Mpp/MPPTunnel.h b/dbms/src/Flash/Mpp/MPPTunnel.h index cfa2c8aae5b..9fb63620e45 100644 --- a/dbms/src/Flash/Mpp/MPPTunnel.h +++ b/dbms/src/Flash/Mpp/MPPTunnel.h @@ -439,12 +439,12 @@ class MPPTunnel : private boost::noncopyable // a MPPConn request has arrived. it will build connection by this tunnel; void connectSync(PacketWriter * writer); - void connectLocal(size_t source_index, LocalRequestHandler & local_request_handler, bool is_fine_grained); + void connectLocalV2(size_t source_index, LocalRequestHandler & local_request_handler, bool is_fine_grained); // like `connect` but it's intended to connect async grpc. void connectAsync(IAsyncCallData * data); - void connectUnrefinedLocal(PacketWriter * writer); + void connectLocalV1(PacketWriter * writer); // wait until all the data has been transferred. void waitForFinish(); diff --git a/dbms/src/Flash/Mpp/MPPTunnelSetHelper.cpp b/dbms/src/Flash/Mpp/MPPTunnelSetHelper.cpp index eb65327a7f8..f1bd57a8d4b 100644 --- a/dbms/src/Flash/Mpp/MPPTunnelSetHelper.cpp +++ b/dbms/src/Flash/Mpp/MPPTunnelSetHelper.cpp @@ -50,14 +50,14 @@ TrackedMppDataPacketPtr ToPacketV0(Blocks & blocks, const std::vector(MPPDataPacketV0); - while (!blocks.empty()) + for (auto & block : blocks) { - const auto & block = blocks.back(); codec_stream->encode(block, 0, block.rows()); - blocks.pop_back(); + block.clear(); tracked_packet->addChunk(codec_stream->getString()); codec_stream->clear(); } + blocks.clear(); return tracked_packet; } diff --git a/dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp b/dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp index 1bde409bc9f..5fa98e93a26 100644 --- a/dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp +++ b/dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp @@ -199,7 +199,7 @@ class MockExchangeReceiver }, []() {}, ReceiverChannelWriter(&msg_channels, "", log, &data_size_in_queue, ReceiverMode::Local)); - tunnel->connectLocal(0, local_request_handler, false); + tunnel->connectLocalV2(0, local_request_handler, false); } } @@ -655,7 +655,7 @@ try []() {}, []() {}, ReceiverChannelWriter(nullptr, "", Logger::get(), nullptr, ReceiverMode::Local)); - tunnels[0]->connectLocal(0, local_req_handler, false); + tunnels[0]->connectLocalV2(0, local_req_handler, false); GTEST_FAIL(); } catch (Exception & e) @@ -674,7 +674,7 @@ try []() {}, []() {}, ReceiverChannelWriter(nullptr, "", Logger::get(), nullptr, ReceiverMode::Local)); - tunnels[0]->connectLocal(0, local_req_handler, false); + tunnels[0]->connectLocalV2(0, local_req_handler, false); GTEST_FAIL(); } catch (Exception & e) diff --git a/dbms/src/Flash/Planner/Plans/PhysicalLimit.cpp b/dbms/src/Flash/Planner/Plans/PhysicalLimit.cpp index 0314d79ea3f..0c0edd0e975 100644 --- a/dbms/src/Flash/Planner/Plans/PhysicalLimit.cpp +++ b/dbms/src/Flash/Planner/Plans/PhysicalLimit.cpp @@ -45,11 +45,11 @@ void PhysicalLimit::buildBlockInputStreamImpl(DAGPipeline & pipeline, Context & { child->buildBlockInputStream(pipeline, context, max_streams); - pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, limit, log->identifier()); }); + pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, limit, /*offset*/ 0, log->identifier()); }); if (pipeline.hasMoreThanOneStream()) { executeUnion(pipeline, max_streams, log, false, "for partial limit"); - pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, limit, log->identifier()); }); + pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, limit, /*offset*/ 0, log->identifier()); }); } } diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index d3a5ccbfd4d..1503fc9c088 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -53,6 +53,7 @@ #include #include #include +#include #include #include #include @@ -206,6 +207,8 @@ struct ContextShared Context::ConfigReloadCallback config_reload_callback; + std::shared_ptr shared_block_schemas; + explicit ContextShared(std::shared_ptr runtime_components_factory_) : runtime_components_factory(std::move(runtime_components_factory_)) , storage_run_mode(PageStorageRunMode::ONLY_V3) @@ -1843,6 +1846,16 @@ SharedQueriesPtr Context::getSharedQueries() return shared->shared_queries; } +const std::shared_ptr & Context::getSharedBlockSchemas() const +{ + return shared->shared_block_schemas; +} + +void Context::initializeSharedBlockSchemas() +{ + shared->shared_block_schemas = std::make_shared(*this); +} + size_t Context::getMaxStreams() const { size_t max_streams = settings.max_threads; diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 63c722dbc12..08a678de2e4 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -109,6 +109,7 @@ namespace DM class MinMaxIndexCache; class DeltaIndexManager; class GlobalStoragePool; +class SharedBlockSchemas; using GlobalStoragePoolPtr = std::shared_ptr; } // namespace DM @@ -178,7 +179,6 @@ class Context DAGContext * dag_context = nullptr; using DatabasePtr = std::shared_ptr; using Databases = std::map>; - /// Use copy constructor or createGlobal() instead Context(); @@ -511,6 +511,9 @@ class Context return disaggregated_mode == DisaggregatedMode::Storage; } + const std::shared_ptr & getSharedBlockSchemas() const; + void initializeSharedBlockSchemas(); + // todo: remove after AutoScaler is stable. void setUseAutoScaler(bool use) { diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 35d71695914..474e979ac4f 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -1196,10 +1196,10 @@ void InterpreterSelectQuery::executePreLimit(Pipeline & pipeline) getLimitLengthAndOffset(query, limit_length, limit_offset); /// If there is LIMIT - if (query.limit_length) + if (limit_length) { pipeline.transform([&](auto & stream) { - stream = std::make_shared(stream, limit_length + limit_offset, /*req_id=*/""); + stream = std::make_shared(stream, limit_length + limit_offset, /* offset */ 0, /*req_id=*/""); }); } } @@ -1237,10 +1237,11 @@ void InterpreterSelectQuery::executeLimit(Pipeline & pipeline) getLimitLengthAndOffset(query, limit_length, limit_offset); /// If there is LIMIT - if (query.limit_length) + if (limit_length) { + RUNTIME_CHECK_MSG(pipeline.streams.size() == 1, "Cannot executeLimit with multiple streams"); pipeline.transform([&](auto & stream) { - stream = std::make_shared(stream, limit_length, /*req_id=*/""); + stream = std::make_shared(stream, limit_length, limit_offset, /*req_id=*/""); }); } } diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index 13e71e81a20..6df5a5b578b 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -219,7 +219,6 @@ struct Settings M(SettingBool, dt_enable_relevant_place, false, "Enable relevant place or not in DeltaTree Engine.") \ M(SettingBool, dt_enable_skippable_place, true, "Enable skippable place or not in DeltaTree Engine.") \ M(SettingBool, dt_enable_stable_column_cache, true, "Enable column cache for StorageDeltaMerge.") \ - M(SettingBool, dt_enable_single_file_mode_dmfile, false, "Enable write DMFile in single file mode.") \ M(SettingUInt64, dt_open_file_max_idle_seconds, 15, "Max idle time of opening files, 0 means infinite.") \ M(SettingUInt64, dt_page_num_max_expect_legacy_files, 100, "Max number of legacy file expected") \ M(SettingFloat, dt_page_num_max_gc_valid_rate, 1.0, "Max valid rate of deciding a page file can be compact when exising legacy files are more over than " \ diff --git a/dbms/src/Server/DTTool/DTToolBench.cpp b/dbms/src/Server/DTTool/DTToolBench.cpp index 1bec02786ae..130e231010f 100644 --- a/dbms/src/Server/DTTool/DTToolBench.cpp +++ b/dbms/src/Server/DTTool/DTToolBench.cpp @@ -356,7 +356,7 @@ int benchEntry(const std::vector & opts) for (size_t i = 0; i < repeat; ++i) { using namespace std::chrono; - dmfile = DB::DM::DMFile::create(1, workdir, false, opt); + dmfile = DB::DM::DMFile::create(1, workdir, opt); auto start = high_resolution_clock::now(); { auto stream = DB::DM::DMFileBlockOutputStream(*db_context, dmfile, *defines); diff --git a/dbms/src/Server/DTTool/DTToolInspect.cpp b/dbms/src/Server/DTTool/DTToolInspect.cpp index c74c0c2994d..d8447a61ede 100644 --- a/dbms/src/Server/DTTool/DTToolInspect.cpp +++ b/dbms/src/Server/DTTool/DTToolInspect.cpp @@ -47,7 +47,6 @@ int inspectServiceMain(DB::Context & context, const InspectArgs & args) auto dmfile = DB::DM::DMFile::restore(fp, args.file_id, 0, args.workdir, DB::DM::DMFile::ReadMetaMode::all()); LOG_INFO(logger, "bytes on disk: {}", dmfile->getBytesOnDisk()); - LOG_INFO(logger, "single file: {}", dmfile->isSingleFileMode()); // if the DMFile has a config file, there may be additional debugging information // we also log the content of dmfile checksum config @@ -81,43 +80,38 @@ int inspectServiceMain(DB::Context & context, const InspectArgs & args) if (args.check) { // for directory mode file, we can consume each file to check its integrity. - if (!dmfile->isSingleFileMode()) + auto prefix = fmt::format("{}/dmf_{}", args.workdir, args.file_id); + auto file = Poco::File{prefix}; + std::vector sub; + file.list(sub); + for (auto & i : sub) { - auto prefix = args.workdir + "/dmf_" + DB::toString(args.file_id); - auto file = Poco::File{prefix}; - std::vector sub; - file.list(sub); - for (auto & i : sub) + if (endsWith(i, ".mrk") || endsWith(i, ".dat") || endsWith(i, ".idx") || i == "pack") { - if (endsWith(i, ".mrk") || endsWith(i, ".dat") || endsWith(i, ".idx") || i == "pack") + auto full_path = fmt::format("{}/{}", prefix, i); + LOG_INFO(logger, "checking full_path is {}: ", full_path); + if (dmfile->getConfiguration()) { - auto full_path = prefix; - full_path += "/"; - full_path += i; - LOG_INFO(logger, "checking {}: ", i); - if (dmfile->getConfiguration()) - { - consume(*DB::createReadBufferFromFileBaseByFileProvider( - fp, - full_path, - DB::EncryptionPath(full_path, i), - dmfile->getConfiguration()->getChecksumFrameLength(), - nullptr, - dmfile->getConfiguration()->getChecksumAlgorithm(), - dmfile->getConfiguration()->getChecksumFrameLength())); - } - else - { - consume(*DB::createReadBufferFromFileBaseByFileProvider( - fp, - full_path, - DB::EncryptionPath(full_path, i), - DBMS_DEFAULT_BUFFER_SIZE, - 0, - nullptr)); - } - LOG_INFO(logger, "[success]"); + consume(*DB::createReadBufferFromFileBaseByFileProvider( + fp, + full_path, + DB::EncryptionPath(full_path, i), + dmfile->getConfiguration()->getChecksumFrameLength(), + nullptr, + dmfile->getConfiguration()->getChecksumAlgorithm(), + dmfile->getConfiguration()->getChecksumFrameLength())); } + else + { + consume(*DB::createReadBufferFromFileBaseByFileProvider( + fp, + full_path, + DB::EncryptionPath(full_path, i), + DBMS_DEFAULT_BUFFER_SIZE, + 0, + nullptr)); + } + LOG_INFO(logger, "[success]"); } } // for both directory file and single mode file, we can read out all blocks from the file. diff --git a/dbms/src/Server/DTTool/DTToolMigrate.cpp b/dbms/src/Server/DTTool/DTToolMigrate.cpp index 77a628275ef..fabcabaca1f 100644 --- a/dbms/src/Server/DTTool/DTToolMigrate.cpp +++ b/dbms/src/Server/DTTool/DTToolMigrate.cpp @@ -209,7 +209,7 @@ int migrateServiceMain(DB::Context & context, const MigrateArgs & args) } LOG_INFO(logger, "creating new dtfile"); - auto new_file = DB::DM::DMFile::create(args.file_id, keeper.migration_temp_dir.path(), false, std::move(option)); + auto new_file = DB::DM::DMFile::create(args.file_id, keeper.migration_temp_dir.path(), std::move(option)); LOG_INFO(logger, "creating input stream"); auto input_stream = DB::DM::createSimpleBlockInputStream(context, src_file); diff --git a/dbms/src/Server/RaftConfigParser.cpp b/dbms/src/Server/RaftConfigParser.cpp index b8cfe1ab3bf..4ebade882fc 100644 --- a/dbms/src/Server/RaftConfigParser.cpp +++ b/dbms/src/Server/RaftConfigParser.cpp @@ -88,40 +88,7 @@ TiFlashRaftConfig TiFlashRaftConfig::parseSettings(Poco::Util::AbstractConfigura res.enable_compatible_mode = config.getBool("raft.enable_compatible_mode"); } - if (config.has("raft.snapshot.method")) - { - String snapshot_method = config.getString("raft.snapshot.method"); - std::transform(snapshot_method.begin(), snapshot_method.end(), snapshot_method.begin(), [](char ch) { return std::tolower(ch); }); - if (snapshot_method == "file1") - { - res.snapshot_apply_method = TiDB::SnapshotApplyMethod::DTFile_Directory; - } -#if 0 - // Not generally available for this file format - else if (snapshot_method == "file2") - { - res.snapshot_apply_method = TiDB::SnapshotApplyMethod::DTFile_Single; - } -#endif - } - switch (res.snapshot_apply_method) - { - case TiDB::SnapshotApplyMethod::DTFile_Directory: - case TiDB::SnapshotApplyMethod::DTFile_Single: - if (res.engine != TiDB::StorageEngine::DT) - { - throw Exception( - fmt::format("Illegal arguments: can not use DTFile to store snapshot data when the storage engine is not DeltaTree, [engine={}] [snapshot method={}]", - static_cast(res.engine), - applyMethodToString(res.snapshot_apply_method)), - ErrorCodes::INVALID_CONFIG_PARAMETER); - } - break; - default: - break; - } - - LOG_INFO(log, "Default storage engine [type={}] [snapshot.method={}]", static_cast(res.engine), applyMethodToString(res.snapshot_apply_method)); + LOG_INFO(log, "Default storage engine [type={}]", static_cast(res.engine)); return res; } diff --git a/dbms/src/Server/RaftConfigParser.h b/dbms/src/Server/RaftConfigParser.h index 604a2476c44..34900af7e66 100644 --- a/dbms/src/Server/RaftConfigParser.h +++ b/dbms/src/Server/RaftConfigParser.h @@ -47,7 +47,6 @@ struct TiFlashRaftConfig static constexpr TiDB::StorageEngine DEFAULT_ENGINE = TiDB::StorageEngine::DT; TiDB::StorageEngine engine = DEFAULT_ENGINE; - TiDB::SnapshotApplyMethod snapshot_apply_method = TiDB::SnapshotApplyMethod::DTFile_Directory; public: TiFlashRaftConfig() = default; diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 4dac558477b..30c3e522eba 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -67,6 +67,7 @@ #include #include #include +#include #include #include #include @@ -1200,6 +1201,8 @@ int Server::main(const std::vector & /*args*/) DM::SegmentReaderPoolManager::instance().init(server_info); DM::SegmentReadTaskScheduler::instance(); + global_context->initializeSharedBlockSchemas(); + { // Note that this must do before initialize schema sync service. do diff --git a/dbms/src/Server/tests/gtest_dttool.cpp b/dbms/src/Server/tests/gtest_dttool.cpp index cfe6a0de071..5ce2cd98686 100644 --- a/dbms/src/Server/tests/gtest_dttool.cpp +++ b/dbms/src/Server/tests/gtest_dttool.cpp @@ -85,7 +85,7 @@ struct DTToolTest : public DB::base::TiFlashStorageTestBasic db_context->getSettingsRef()); // Write { - dmfile = DB::DM::DMFile::create(1, getTemporaryPath(), false, std::nullopt); + dmfile = DB::DM::DMFile::create(1, getTemporaryPath(), std::nullopt); { auto stream = DB::DM::DMFileBlockOutputStream(*db_context, dmfile, *defines); stream.writePrefix(); diff --git a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFile.h b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFile.h index cb8f007756e..47f7a52d841 100644 --- a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFile.h +++ b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFile.h @@ -133,7 +133,7 @@ class ColumnFile /// been persisted in the disk and their data will be immutable. virtual bool isAppendable() const { return false; } virtual void disableAppend() {} - virtual bool append(DMContext & /*dm_context*/, const Block & /*data*/, size_t /*offset*/, size_t /*limit*/, size_t /*data_bytes*/) + virtual bool append(const DMContext & /*dm_context*/, const Block & /*data*/, size_t /*offset*/, size_t /*limit*/, size_t /*data_bytes*/) { throw Exception("Unsupported operation", ErrorCodes::LOGICAL_ERROR); } diff --git a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp index 9cfbafb1ad4..7295e57b038 100644 --- a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp +++ b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp @@ -62,7 +62,7 @@ void ColumnFileBig::serializeMetadata(WriteBuffer & buf, bool /*save_schema*/) c writeIntBinary(valid_bytes, buf); } -ColumnFilePersistedPtr ColumnFileBig::deserializeMetadata(DMContext & context, // +ColumnFilePersistedPtr ColumnFileBig::deserializeMetadata(const DMContext & context, // const RowKeyRange & segment_range, ReadBuffer & buf) { diff --git a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.h b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.h index 2ac97ee8b55..c2187f96f39 100644 --- a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.h +++ b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.h @@ -85,7 +85,7 @@ class ColumnFileBig : public ColumnFilePersisted void serializeMetadata(WriteBuffer & buf, bool save_schema) const override; - static ColumnFilePersistedPtr deserializeMetadata(DMContext & context, // + static ColumnFilePersistedPtr deserializeMetadata(const DMContext & context, // const RowKeyRange & segment_range, ReadBuffer & buf); diff --git a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileInMemory.cpp b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileInMemory.cpp index fff0d964f42..aecce09c9af 100644 --- a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileInMemory.cpp +++ b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileInMemory.cpp @@ -32,6 +32,7 @@ void ColumnFileInMemory::fillColumns(const ColumnDefines & col_defs, size_t col_ Columns read_cols; std::scoped_lock lock(cache->mutex); + const auto & colid_to_offset = schema->getColIdToOffset(); for (size_t i = col_start; i < col_end; ++i) { const auto & cd = col_defs[i]; @@ -61,7 +62,7 @@ ColumnFileInMemory::getReader(const DMContext & /*context*/, const StorageSnapsh return std::make_shared(*this, col_defs); } -bool ColumnFileInMemory::append(DMContext & context, const Block & data, size_t offset, size_t limit, size_t data_bytes) +bool ColumnFileInMemory::append(const DMContext & context, const Block & data, size_t offset, size_t limit, size_t data_bytes) { if (disable_append) return false; diff --git a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileInMemory.h b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileInMemory.h index 74c408efdb2..9440d2c9d73 100644 --- a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileInMemory.h +++ b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileInMemory.h @@ -15,6 +15,7 @@ #pragma once #include +#include namespace DB { @@ -29,7 +30,7 @@ class ColumnFileInMemory : public ColumnFile friend class ColumnFileInMemoryReader; private: - BlockPtr schema; + ColumnFileSchemaPtr schema; UInt64 rows = 0; UInt64 bytes = 0; @@ -39,28 +40,20 @@ class ColumnFileInMemory : public ColumnFile // The cache data in memory. CachePtr cache; - // Used to map column id to column instance in a Block. - ColIdToOffset colid_to_offset; private: void fillColumns(const ColumnDefines & col_defs, size_t col_count, Columns & result) const; const DataTypePtr & getDataType(ColId column_id) const { - // Note that column_id must exist - auto index = colid_to_offset.at(column_id); - return schema->getByPosition(index).type; + return schema->getDataType(column_id); } public: - explicit ColumnFileInMemory(const BlockPtr & schema_, const CachePtr & cache_ = nullptr) + explicit ColumnFileInMemory(const ColumnFileSchemaPtr & schema_, const CachePtr & cache_ = nullptr) : schema(schema_) - , cache(cache_ ? cache_ : std::make_shared(*schema_)) - { - colid_to_offset.clear(); - for (size_t i = 0; i < schema->columns(); ++i) - colid_to_offset.emplace(schema->getByPosition(i).column_id, i); - } + , cache(cache_ ? cache_ : std::make_shared(schema_->getSchema())) + {} Type getType() const override { return Type::INMEMORY_FILE; } @@ -70,9 +63,7 @@ class ColumnFileInMemory : public ColumnFile CachePtr getCache() { return cache; } /// The schema of this pack. - BlockPtr getSchema() const { return schema; } - /// Replace the schema with a new schema, and the new schema instance should be exactly the same as the previous one. - void resetIdenticalSchema(BlockPtr schema_) { schema = schema_; } + ColumnFileSchemaPtr getSchema() const { return schema; } ColumnInMemoryFilePtr clone() { @@ -90,7 +81,7 @@ class ColumnFileInMemory : public ColumnFile { disable_append = true; } - bool append(DMContext & dm_context, const Block & data, size_t offset, size_t limit, size_t data_bytes) override; + bool append(const DMContext & dm_context, const Block & data, size_t offset, size_t limit, size_t data_bytes) override; Block readDataForFlush() const; @@ -101,7 +92,7 @@ class ColumnFileInMemory : public ColumnFile String s = "{in_memory_file,rows:" + DB::toString(rows) // + ",bytes:" + DB::toString(bytes) // + ",disable_append:" + DB::toString(disable_append) // - + ",schema:" + (schema ? schema->dumpStructure() : "none") // + + ",schema:" + (schema ? schema->toString() : "none") // + ",cache_block:" + (cache ? cache->block.dumpStructure() : "none") + "}"; return s; } diff --git a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFilePersisted.cpp b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFilePersisted.cpp index 31388c909e8..54bdd1e17c1 100644 --- a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFilePersisted.cpp +++ b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFilePersisted.cpp @@ -25,12 +25,12 @@ namespace DB { namespace DM { -void serializeSchema(WriteBuffer & buf, const BlockPtr & schema) +void serializeSchema(WriteBuffer & buf, const Block & schema) { if (schema) { - writeIntBinary(static_cast(schema->columns()), buf); - for (auto & col : *schema) + writeIntBinary(static_cast(schema.columns()), buf); + for (const auto & col : schema) { writeIntBinary(col.column_id, buf); writeStringBinary(col.name, buf); @@ -105,7 +105,7 @@ void serializeSavedColumnFiles(WriteBuffer & buf, const ColumnFilePersisteds & c } } -ColumnFilePersisteds deserializeSavedColumnFiles(DMContext & context, const RowKeyRange & segment_range, ReadBuffer & buf) +ColumnFilePersisteds deserializeSavedColumnFiles(const DMContext & context, const RowKeyRange & segment_range, ReadBuffer & buf) { // Check binary version DeltaFormat::Version version; @@ -117,7 +117,7 @@ ColumnFilePersisteds deserializeSavedColumnFiles(DMContext & context, const RowK // V1 and V2 share the same deserializer. case DeltaFormat::V1: case DeltaFormat::V2: - column_files = deserializeSavedColumnFilesInV2Format(buf, version); + column_files = deserializeSavedColumnFilesInV2Format(context, buf, version); break; case DeltaFormat::V3: column_files = deserializeSavedColumnFilesInV3Format(context, segment_range, buf); diff --git a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFilePersisted.h b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFilePersisted.h index 887ba75ca10..bade97e346e 100644 --- a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFilePersisted.h +++ b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFilePersisted.h @@ -35,7 +35,7 @@ class ColumnFilePersisted : public ColumnFile virtual void serializeMetadata(WriteBuffer & buf, bool save_schema) const = 0; }; -void serializeSchema(WriteBuffer & buf, const BlockPtr & schema); +void serializeSchema(WriteBuffer & buf, const Block & schema); BlockPtr deserializeSchema(ReadBuffer & buf); void serializeColumn(MemoryWriteBuffer & buf, const IColumn & column, const DataTypePtr & type, size_t offset, size_t limit, CompressionMethod compression_method, Int64 compression_level); @@ -44,13 +44,13 @@ void deserializeColumn(IColumn & column, const DataTypePtr & type, const ByteBuf /// Serialize those column files' metadata into buf. void serializeSavedColumnFiles(WriteBuffer & buf, const ColumnFilePersisteds & column_files); /// Recreate column file instances from buf. -ColumnFilePersisteds deserializeSavedColumnFiles(DMContext & context, const RowKeyRange & segment_range, ReadBuffer & buf); +ColumnFilePersisteds deserializeSavedColumnFiles(const DMContext & context, const RowKeyRange & segment_range, ReadBuffer & buf); void serializeSavedColumnFilesInV2Format(WriteBuffer & buf, const ColumnFilePersisteds & column_files); -ColumnFilePersisteds deserializeSavedColumnFilesInV2Format(ReadBuffer & buf, UInt64 version); +ColumnFilePersisteds deserializeSavedColumnFilesInV2Format(const DMContext & context, ReadBuffer & buf, UInt64 version); void serializeSavedColumnFilesInV3Format(WriteBuffer & buf, const ColumnFilePersisteds & column_files); -ColumnFilePersisteds deserializeSavedColumnFilesInV3Format(DMContext & context, const RowKeyRange & segment_range, ReadBuffer & buf); +ColumnFilePersisteds deserializeSavedColumnFilesInV3Format(const DMContext & context, const RowKeyRange & segment_range, ReadBuffer & buf); } // namespace DM } // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileSchema.cpp b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileSchema.cpp new file mode 100644 index 00000000000..60378550add --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileSchema.cpp @@ -0,0 +1,101 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +namespace DB +{ +namespace DM +{ + +ColumnFileSchema::ColumnFileSchema(const Block & block) + : schema(block.cloneEmpty()) +{ + for (size_t i = 0; i < schema.columns(); ++i) + colid_to_offset.emplace(schema.getByPosition(i).column_id, i); +} + +const DataTypePtr & ColumnFileSchema::getDataType(ColId column_id) const +{ + /// Returns the data type of a column. + /// The specified column id must exist, otherwise something unexpected will happen. + auto index = colid_to_offset.at(column_id); + return schema.getByPosition(index).type; +} + +String ColumnFileSchema::toString() const +{ + return "{schema:" + (schema ? schema.dumpJsonStructure() : "none") + "}"; +} + +SharedBlockSchemas::SharedBlockSchemas(DB::Context & context) + : background_pool(context.getBackgroundPool()) +{ + handle = background_pool.addTask([&, this] { + std::lock_guard lock(mutex); + for (auto iter = column_file_schemas.begin(); iter != column_file_schemas.end();) + { + if (iter->second.expired()) + { + iter = column_file_schemas.erase(iter); + } + else + { + ++iter; + } + } + return true; + }, + /*multi*/ false, + /*interval_ms*/ 60000); +} + +SharedBlockSchemas::~SharedBlockSchemas() +{ + if (handle) + { + background_pool.removeTask(handle); + } +} + +ColumnFileSchemaPtr SharedBlockSchemas::find(const Digest & digest) +{ + std::lock_guard lock(mutex); + auto it = column_file_schemas.find(digest); + if (it == column_file_schemas.end()) + return nullptr; + return it->second.lock(); +} + +ColumnFileSchemaPtr SharedBlockSchemas::getOrCreate(const Block & block) +{ + Digest digest = hashSchema(block); + std::lock_guard lock(mutex); + auto it = column_file_schemas.find(digest); + if (it == column_file_schemas.end() || it->second.expired()) + { + auto schema = std::make_shared(block); + column_file_schemas.emplace(digest, schema); + return schema; + } + else + return it->second.lock(); +} + +std::shared_ptr getSharedBlockSchemas(const DMContext & context) +{ + return context.db_context.getSharedBlockSchemas(); +} +} // namespace DM +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileSchema.h b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileSchema.h new file mode 100644 index 00000000000..9cfffc8ec83 --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileSchema.h @@ -0,0 +1,93 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace std +{ +using Digest = UInt256; +template <> +struct hash +{ + size_t operator()(const Digest & digest) const + { + size_t seed = 0; + boost::hash_combine(seed, boost::hash_value(digest.a)); + boost::hash_combine(seed, boost::hash_value(digest.b)); + boost::hash_combine(seed, boost::hash_value(digest.c)); + boost::hash_combine(seed, boost::hash_value(digest.d)); + return seed; + } +}; +} // namespace std + +namespace DB +{ +namespace DM +{ +using Digest = UInt256; +class ColumnFileSchema +{ +private: + Block schema; + + using ColIdToOffset = std::unordered_map; + ColIdToOffset colid_to_offset; + +public: + explicit ColumnFileSchema(const Block & block); + + const DataTypePtr & getDataType(ColId column_id) const; + + String toString() const; + + const Block & getSchema() const { return schema; } + const ColIdToOffset & getColIdToOffset() const { return colid_to_offset; } +}; + +using ColumnFileSchemaPtr = std::shared_ptr; + +class SharedBlockSchemas +{ +private: + // we use sha256 to generate Digest for each ColumnFileSchema as the key of column_file_schemas, + // to minimize the possibility of two different schemas having the same key in column_file_schemas. + // Besides, we use weak_ptr to ensure we can remove the ColumnFileSchema, + // when no one use it, to avoid too much memory usage. + std::unordered_map> column_file_schemas; + std::mutex mutex; + BackgroundProcessingPool::TaskHandle handle; + BackgroundProcessingPool & background_pool; + +public: + explicit SharedBlockSchemas(DB::Context & context); + ~SharedBlockSchemas(); + + ColumnFileSchemaPtr find(const Digest & digest); + + ColumnFileSchemaPtr getOrCreate(const Block & block); +}; + +std::shared_ptr getSharedBlockSchemas(const DMContext & context); +} // namespace DM +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileSetSnapshot.h b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileSetSnapshot.h index 7832bcba4f7..fac1f97cf44 100644 --- a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileSetSnapshot.h +++ b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileSetSnapshot.h @@ -39,12 +39,13 @@ class BlockOrDelete , block_offset(offset_) {} explicit BlockOrDelete(const RowKeyRange & delete_range_) - : delete_range(delete_range_) + : block_offset(0) + , delete_range(delete_range_) {} - bool isBlock() { return (bool)block; } + bool isBlock() { return static_cast(block); } auto & getBlock() { return block; }; - auto getBlockOffset() { return block_offset; } + auto getBlockOffset() const { return block_offset; } auto & getDeleteRange() { return delete_range; } }; @@ -60,12 +61,12 @@ class ColumnFileSetSnapshot : public std::enable_shared_from_this +#include +#include #include #include #include +#include namespace DB { @@ -27,6 +31,7 @@ Columns ColumnFileTiny::readFromCache(const ColumnDefines & column_defines, size return {}; Columns columns; + const auto & colid_to_offset = schema->getColIdToOffset(); for (size_t i = col_start; i < col_end; ++i) { const auto & cd = column_defines[i]; @@ -61,6 +66,7 @@ Columns ColumnFileTiny::readFromDisk(const PageReader & page_reader, // PageStorage::PageReadFields fields; fields.first = data_page_id; + const auto & colid_to_offset = schema->getColIdToOffset(); for (size_t index = col_start; index < col_end; ++index) { const auto & cd = column_defines[index]; @@ -71,11 +77,17 @@ Columns ColumnFileTiny::readFromDisk(const PageReader & page_reader, // } else { - // New column after ddl is not exist in this pack, fill with default value + // New column after ddl is not exist in this CFTiny, fill with default value columns[index - col_start] = createColumnWithDefaultValue(cd, rows); } } + // All columns to be read are not exist in this CFTiny and filled with default value, + // we can skip reading from disk + if (fields.second.empty()) + return columns; + + // Read the columns from disk and apply DDL cast if need auto page_map = page_reader.read({fields}); Page page = page_map[data_page_id]; for (size_t index = col_start; index < col_end; ++index) @@ -125,18 +137,26 @@ ColumnFileTiny::getReader(const DMContext & /*context*/, const StorageSnapshotPt void ColumnFileTiny::serializeMetadata(WriteBuffer & buf, bool save_schema) const { - serializeSchema(buf, save_schema ? schema : BlockPtr{}); + serializeSchema(buf, save_schema ? schema->getSchema() : Block{}); writeIntBinary(data_page_id, buf); writeIntBinary(rows, buf); writeIntBinary(bytes, buf); } -std::tuple ColumnFileTiny::deserializeMetadata(ReadBuffer & buf, const BlockPtr & last_schema) +ColumnFilePersistedPtr ColumnFileTiny::deserializeMetadata(const DMContext & context, ReadBuffer & buf, ColumnFileSchemaPtr & last_schema) { - auto schema = deserializeSchema(buf); - if (!schema) + auto schema_block = deserializeSchema(buf); + std::shared_ptr schema; + + if (!schema_block) schema = last_schema; + else + { + schema = getSharedBlockSchemas(context)->getOrCreate(*schema_block); + last_schema = schema; + } + if (unlikely(!schema)) throw Exception("Cannot deserialize DeltaPackBlock's schema", ErrorCodes::LOGICAL_ERROR); @@ -147,7 +167,7 @@ std::tuple ColumnFileTiny::deserializeMetadata readIntBinary(rows, buf); readIntBinary(bytes, buf); - return {std::make_shared(schema, rows, bytes, data_page_id), std::move(schema)}; + return std::make_shared(schema, rows, bytes, data_page_id); } Block ColumnFileTiny::readBlockForMinorCompaction(const PageReader & page_reader) const @@ -164,7 +184,7 @@ Block ColumnFileTiny::readBlockForMinorCompaction(const PageReader & page_reader } else { - const auto & schema_ref = *schema; + const auto & schema_ref = schema->getSchema(); auto page = page_reader.read(data_page_id); auto columns = schema_ref.cloneEmptyColumns(); @@ -183,15 +203,17 @@ Block ColumnFileTiny::readBlockForMinorCompaction(const PageReader & page_reader } } -ColumnTinyFilePtr ColumnFileTiny::writeColumnFile(DMContext & context, const Block & block, size_t offset, size_t limit, WriteBatches & wbs, const BlockPtr & schema, const CachePtr & cache) +ColumnFileTinyPtr ColumnFileTiny::writeColumnFile(const DMContext & context, const Block & block, size_t offset, size_t limit, WriteBatches & wbs, const CachePtr & cache) { auto page_id = writeColumnFileData(context, block, offset, limit, wbs); - auto new_column_file_schema = schema ? schema : std::make_shared(block.cloneEmpty()); + + auto schema = getSharedBlockSchemas(context)->getOrCreate(block); + auto bytes = block.bytes(offset, limit); - return std::make_shared(new_column_file_schema, limit, bytes, page_id, cache); + return std::make_shared(schema, limit, bytes, page_id, cache); } -PageId ColumnFileTiny::writeColumnFileData(DMContext & context, const Block & block, size_t offset, size_t limit, WriteBatches & wbs) +PageId ColumnFileTiny::writeColumnFileData(const DMContext & context, const Block & block, size_t offset, size_t limit, WriteBatches & wbs) { auto page_id = context.storage_pool.newLogPageId(); @@ -201,7 +223,9 @@ PageId ColumnFileTiny::writeColumnFileData(DMContext & context, const Block & bl { auto last_buf_size = write_buf.count(); serializeColumn(write_buf, *col.column, col.type, offset, limit, context.db_context.getSettingsRef().dt_compression_method, context.db_context.getSettingsRef().dt_compression_level); - col_data_sizes.push_back(write_buf.count() - last_buf_size); + size_t serialized_size = write_buf.count() - last_buf_size; + RUNTIME_CHECK_MSG(serialized_size != 0, "try to persist a block with empty column, colname={} colid={} block={}", col.name, col.column_id, block.dumpJsonStructure()); + col_data_sizes.push_back(serialized_size); } auto data_size = write_buf.count(); @@ -252,4 +276,4 @@ ColumnFileReaderPtr ColumnFileTinyReader::createNewReader(const ColumnDefinesPtr } } // namespace DM -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileTiny.h b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileTiny.h index 4a2f494b712..7680139e4b3 100644 --- a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileTiny.h +++ b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileTiny.h @@ -14,14 +14,16 @@ #pragma once +#include #include +#include namespace DB { namespace DM { class ColumnFileTiny; -using ColumnTinyFilePtr = std::shared_ptr; +using ColumnFileTinyPtr = std::shared_ptr; /// A column file which data is stored in PageStorage. /// It may be created in two ways: @@ -32,7 +34,7 @@ class ColumnFileTiny : public ColumnFilePersisted friend class ColumnFileTinyReader; private: - BlockPtr schema; + ColumnFileSchemaPtr schema; UInt64 rows = 0; UInt64 bytes = 0; @@ -45,8 +47,6 @@ class ColumnFileTiny : public ColumnFilePersisted /// The cache data in memory. /// Currently this field is unused. CachePtr cache; - /// Used to map column id to column instance in a Block. - ColIdToOffset colid_to_offset; private: /// Read a block of columns in `column_defines` from cache / disk, @@ -58,22 +58,17 @@ class ColumnFileTiny : public ColumnFilePersisted const DataTypePtr & getDataType(ColId column_id) const { - // Note that column_id must exist - auto index = colid_to_offset.at(column_id); - return schema->getByPosition(index).type; + return schema->getDataType(column_id); } public: - ColumnFileTiny(const BlockPtr & schema_, UInt64 rows_, UInt64 bytes_, PageId data_page_id_, const CachePtr & cache_ = nullptr) + ColumnFileTiny(const ColumnFileSchemaPtr & schema_, UInt64 rows_, UInt64 bytes_, PageId data_page_id_, const CachePtr & cache_ = nullptr) : schema(schema_) , rows(rows_) , bytes(bytes_) , data_page_id(data_page_id_) , cache(cache_) - { - for (size_t i = 0; i < schema->columns(); ++i) - colid_to_offset.emplace(schema->getByPosition(i).column_id, i); - } + {} Type getType() const override { return Type::TINY_FILE; } @@ -84,11 +79,9 @@ class ColumnFileTiny : public ColumnFilePersisted void clearCache() { cache = {}; } /// The schema of this pack. Could be empty, i.e. a DeleteRange does not have a schema. - BlockPtr getSchema() const { return schema; } - /// Replace the schema with a new schema, and the new schema instance should be exactly the same as the previous one. - void resetIdenticalSchema(BlockPtr schema_) { schema = schema_; } + ColumnFileSchemaPtr getSchema() const { return schema; } - ColumnTinyFilePtr cloneWith(PageId new_data_page_id) + ColumnFileTinyPtr cloneWith(PageId new_data_page_id) { auto new_tiny_file = std::make_shared(*this); new_tiny_file->data_page_id = new_data_page_id; @@ -109,11 +102,11 @@ class ColumnFileTiny : public ColumnFilePersisted Block readBlockForMinorCompaction(const PageReader & page_reader) const; - static ColumnTinyFilePtr writeColumnFile(DMContext & context, const Block & block, size_t offset, size_t limit, WriteBatches & wbs, const BlockPtr & schema = nullptr, const CachePtr & cache = nullptr); + static ColumnFileTinyPtr writeColumnFile(const DMContext & context, const Block & block, size_t offset, size_t limit, WriteBatches & wbs, const CachePtr & cache = nullptr); - static PageId writeColumnFileData(DMContext & context, const Block & block, size_t offset, size_t limit, WriteBatches & wbs); + static PageId writeColumnFileData(const DMContext & context, const Block & block, size_t offset, size_t limit, WriteBatches & wbs); - static std::tuple deserializeMetadata(ReadBuffer & buf, const BlockPtr & last_schema); + static ColumnFilePersistedPtr deserializeMetadata(const DMContext & context, ReadBuffer & buf, ColumnFileSchemaPtr & last_schema); bool mayBeFlushedFrom(ColumnFile * from_file) const override { @@ -135,7 +128,7 @@ class ColumnFileTiny : public ColumnFilePersisted String s = "{tiny_file,rows:" + DB::toString(rows) // + ",bytes:" + DB::toString(bytes) // + ",data_page_id:" + DB::toString(data_page_id) // - + ",schema:" + (schema ? schema->dumpStructure() : "none") // + + ",schema:" + (schema ? schema->toString() : "none") // + ",cache_block:" + (cache ? cache->block.dumpStructure() : "none") + "}"; return s; } diff --git a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFile_V2.cpp b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFile_V2.cpp index 09a705bb22d..51240795c3c 100644 --- a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFile_V2.cpp +++ b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFile_V2.cpp @@ -14,6 +14,7 @@ #include #include +#include #include namespace DB @@ -33,7 +34,7 @@ struct ColumnFileV2 using ColumnFileV2Ptr = std::shared_ptr; using ColumnFileV2s = std::vector; -inline ColumnFilePersisteds transform_V2_to_V3(const ColumnFileV2s & column_files_v2) +inline ColumnFilePersisteds transform_V2_to_V3(const DMContext & context, const ColumnFileV2s & column_files_v2) { ColumnFilePersisteds column_files_v3; for (const auto & f : column_files_v2) @@ -42,7 +43,10 @@ inline ColumnFilePersisteds transform_V2_to_V3(const ColumnFileV2s & column_file if (f->isDeleteRange()) f_v3 = std::make_shared(std::move(f->delete_range)); else - f_v3 = std::make_shared(f->schema, f->rows, f->bytes, f->data_page_id); + { + auto schema = getSharedBlockSchemas(context)->getOrCreate(*(f->schema)); + f_v3 = std::make_shared(schema, f->rows, f->bytes, f->data_page_id); + } column_files_v3.push_back(f_v3); } @@ -64,7 +68,7 @@ inline ColumnFileV2s transformSaved_V3_to_V2(const ColumnFilePersisteds & column { f_v2->rows = f_tiny_file->getRows(); f_v2->bytes = f_tiny_file->getBytes(); - f_v2->schema = f_tiny_file->getSchema(); + f_v2->schema = std::make_shared(f_tiny_file->getSchema()->getSchema()); f_v2->data_page_id = f_tiny_file->getDataPageId(); } else @@ -152,12 +156,11 @@ inline ColumnFileV2Ptr deserializeColumnFile_V2(ReadBuffer & buf, UInt64 version } readIntBinary(column_file->data_page_id, buf); - column_file->schema = deserializeSchema(buf); return column_file; } -ColumnFilePersisteds deserializeSavedColumnFilesInV2Format(ReadBuffer & buf, UInt64 version) +ColumnFilePersisteds deserializeSavedColumnFilesInV2Format(const DMContext & context, ReadBuffer & buf, UInt64 version) { size_t size; readIntBinary(size, buf); @@ -175,7 +178,7 @@ ColumnFilePersisteds deserializeSavedColumnFilesInV2Format(ReadBuffer & buf, UIn } column_files.push_back(column_file); } - return transform_V2_to_V3(column_files); + return transform_V2_to_V3(context, column_files); } } // namespace DM diff --git a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFile_V3.cpp b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFile_V3.cpp index dcf063b2fe1..cb7378c5d24 100644 --- a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFile_V3.cpp +++ b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFile_V3.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include namespace DB @@ -24,7 +25,7 @@ namespace DM void serializeSavedColumnFilesInV3Format(WriteBuffer & buf, const ColumnFilePersisteds & column_files) { writeIntBinary(column_files.size(), buf); - BlockPtr last_schema; + ColumnFileSchemaPtr last_schema; for (const auto & column_file : column_files) { @@ -61,13 +62,13 @@ void serializeSavedColumnFilesInV3Format(WriteBuffer & buf, const ColumnFilePers } } -ColumnFilePersisteds deserializeSavedColumnFilesInV3Format(DMContext & context, const RowKeyRange & segment_range, ReadBuffer & buf) +ColumnFilePersisteds deserializeSavedColumnFilesInV3Format(const DMContext & context, const RowKeyRange & segment_range, ReadBuffer & buf) { size_t column_file_count; readIntBinary(column_file_count, buf); ColumnFilePersisteds column_files; column_files.reserve(column_file_count); - BlockPtr last_schema; + ColumnFileSchemaPtr last_schema; for (size_t i = 0; i < column_file_count; ++i) { std::underlying_type::type column_file_type; @@ -80,7 +81,7 @@ ColumnFilePersisteds deserializeSavedColumnFilesInV3Format(DMContext & context, break; case ColumnFile::Type::TINY_FILE: { - std::tie(column_file, last_schema) = ColumnFileTiny::deserializeMetadata(buf, last_schema); + column_file = ColumnFileTiny::deserializeMetadata(context, buf, last_schema); break; } case ColumnFile::Type::BIG_FILE: diff --git a/dbms/src/Storages/DeltaMerge/DMChecksumConfig.cpp b/dbms/src/Storages/DeltaMerge/DMChecksumConfig.cpp index fdb969c4aa2..68c29493114 100644 --- a/dbms/src/Storages/DeltaMerge/DMChecksumConfig.cpp +++ b/dbms/src/Storages/DeltaMerge/DMChecksumConfig.cpp @@ -25,8 +25,6 @@ namespace DB::DM { DMChecksumConfig::DMChecksumConfig(std::istream & input) - : embedded_checksum() - , debug_info() { dtpb::ChecksumConfig configuration; if (unlikely(!configuration.ParseFromIstream(&input))) @@ -102,7 +100,7 @@ std::ostream & operator<<(std::ostream & output, const DMChecksumConfig & config { digest->update(name.data(), name.length()); digest->update(checksum.data(), checksum.length()); - auto embedded_checksum = configuration.add_embedded_checksum(); + auto * embedded_checksum = configuration.add_embedded_checksum(); embedded_checksum->set_name(name); embedded_checksum->set_checksum(checksum); } @@ -113,7 +111,7 @@ std::ostream & operator<<(std::ostream & output, const DMChecksumConfig & config { for (const auto & [name, content] : config.debug_info) { - auto tmp = configuration.add_debug_info(); + auto * tmp = configuration.add_debug_info(); tmp->set_name(name); tmp->set_content(content); } @@ -127,9 +125,9 @@ std::ostream & operator<<(std::ostream & output, const DMChecksumConfig & config return output; } -std::optional DMChecksumConfig::fromDBContext(const Context & context, bool is_single_file) +std::optional DMChecksumConfig::fromDBContext(const Context & context) { - return !is_single_file && STORAGE_FORMAT_CURRENT.dm_file >= DMFileFormat::V2 + return STORAGE_FORMAT_CURRENT.dm_file >= DMFileFormat::V2 ? std::make_optional(DMChecksumConfig{context}) : std::nullopt; }; diff --git a/dbms/src/Storages/DeltaMerge/DMChecksumConfig.h b/dbms/src/Storages/DeltaMerge/DMChecksumConfig.h index b151c15e3fa..6a968ca9d12 100644 --- a/dbms/src/Storages/DeltaMerge/DMChecksumConfig.h +++ b/dbms/src/Storages/DeltaMerge/DMChecksumConfig.h @@ -88,7 +88,7 @@ class DMChecksumConfig } } - [[maybe_unused]] static std::optional fromDBContext(const DB::Context & context, bool is_single_file); + [[maybe_unused]] static std::optional fromDBContext(const DB::Context & context); private: size_t checksum_frame_length; ///< the length of checksum frame diff --git a/dbms/src/Storages/DeltaMerge/DMContext.h b/dbms/src/Storages/DeltaMerge/DMContext.h index 371fc9ab5d0..cad1e5adc98 100644 --- a/dbms/src/Storages/DeltaMerge/DMContext.h +++ b/dbms/src/Storages/DeltaMerge/DMContext.h @@ -129,9 +129,9 @@ struct DMContext : private boost::noncopyable WriteLimiterPtr getWriteLimiter() const { return db_context.getWriteLimiter(); } ReadLimiterPtr getReadLimiter() const { return db_context.getReadLimiter(); } - DM::DMConfigurationOpt createChecksumConfig(bool is_single_file) const + DM::DMConfigurationOpt createChecksumConfig() const { - return DMChecksumConfig::fromDBContext(db_context, is_single_file); + return DMChecksumConfig::fromDBContext(db_context); } }; diff --git a/dbms/src/Storages/DeltaMerge/Delta/ColumnFilePersistedSet.cpp b/dbms/src/Storages/DeltaMerge/Delta/ColumnFilePersistedSet.cpp index d3a6845f80d..b6b34df3542 100644 --- a/dbms/src/Storages/DeltaMerge/Delta/ColumnFilePersistedSet.cpp +++ b/dbms/src/Storages/DeltaMerge/Delta/ColumnFilePersistedSet.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -106,17 +107,6 @@ void ColumnFilePersistedSet::recordRemoveColumnFilesPages(WriteBatches & wbs) co file->removeData(wbs); } -BlockPtr ColumnFilePersistedSet::getLastSchema() -{ - for (auto it = persisted_files.rbegin(); it != persisted_files.rend(); ++it) - { - if (auto * t_file = (*it)->tryToTinyFile(); t_file) - return t_file->getSchema(); - } - return {}; -} - - ColumnFilePersisteds ColumnFilePersistedSet::diffColumnFiles(const ColumnFiles & previous_column_files) const { // It should not be not possible that files in the snapshots are removed when calling this diff --git a/dbms/src/Storages/DeltaMerge/Delta/ColumnFilePersistedSet.h b/dbms/src/Storages/DeltaMerge/Delta/ColumnFilePersistedSet.h index e7796ac55dc..7f191bfa0ca 100644 --- a/dbms/src/Storages/DeltaMerge/Delta/ColumnFilePersistedSet.h +++ b/dbms/src/Storages/DeltaMerge/Delta/ColumnFilePersistedSet.h @@ -36,7 +36,6 @@ #include #include - namespace DB { namespace DM @@ -108,8 +107,6 @@ class ColumnFilePersistedSet : public std::enable_shared_from_this(id_, persisted_files)) - , mem_table_set(std::make_shared(persisted_file_set->getLastSchema(), in_memory_files)) + , mem_table_set(std::make_shared(in_memory_files)) , delta_index(std::make_shared()) , log(Logger::get()) {} DeltaValueSpace::DeltaValueSpace(ColumnFilePersistedSetPtr && persisted_file_set_) : persisted_file_set(std::move(persisted_file_set_)) - , mem_table_set(std::make_shared(persisted_file_set->getLastSchema())) + , mem_table_set(std::make_shared()) , delta_index(std::make_shared()) , log(Logger::get()) {} diff --git a/dbms/src/Storages/DeltaMerge/Delta/MemTableSet.cpp b/dbms/src/Storages/DeltaMerge/Delta/MemTableSet.cpp index 47d2f67f1e0..476d59f550d 100644 --- a/dbms/src/Storages/DeltaMerge/Delta/MemTableSet.cpp +++ b/dbms/src/Storages/DeltaMerge/Delta/MemTableSet.cpp @@ -28,25 +28,6 @@ namespace DM { void MemTableSet::appendColumnFileInner(const ColumnFilePtr & column_file) { - // If this column file's schema is identical to last_schema, then use the last_schema instance (instead of the one in `column_file`), - // so that we don't have to serialize my_schema instance. - if (auto * m_file = column_file->tryToInMemoryFile(); m_file) - { - auto my_schema = m_file->getSchema(); - if (last_schema && my_schema && last_schema != my_schema && isSameSchema(*my_schema, *last_schema)) - m_file->resetIdenticalSchema(last_schema); - else - last_schema = my_schema; - } - else if (auto * t_file = column_file->tryToTinyFile(); t_file) - { - auto my_schema = t_file->getSchema(); - if (last_schema && my_schema && last_schema != my_schema && isSameSchema(*my_schema, *last_schema)) - t_file->resetIdenticalSchema(last_schema); - else - last_schema = my_schema; - } - if (!column_files.empty()) { // As we are now appending a new column file (which can be used for new appends), @@ -212,9 +193,10 @@ void MemTableSet::appendToCache(DMContext & context, const Block & block, size_t if (!success) { + auto schema = getSharedBlockSchemas(context)->getOrCreate(block); + // Create a new column file. - auto my_schema = (last_schema && isSameSchema(block, *last_schema)) ? last_schema : std::make_shared(block.cloneEmpty()); - auto new_column_file = std::make_shared(my_schema); + auto new_column_file = std::make_shared(schema); // Must append the empty `new_column_file` to `column_files` before appending data to it, // because `appendColumnFileInner` will update stats related to `column_files` but we will update stats relate to `new_column_file` here. appendColumnFileInner(new_column_file); diff --git a/dbms/src/Storages/DeltaMerge/Delta/MemTableSet.h b/dbms/src/Storages/DeltaMerge/Delta/MemTableSet.h index 89f1e620559..a6a308fde1b 100644 --- a/dbms/src/Storages/DeltaMerge/Delta/MemTableSet.h +++ b/dbms/src/Storages/DeltaMerge/Delta/MemTableSet.h @@ -34,9 +34,6 @@ class MemTableSet : public std::enable_shared_from_this , private boost::noncopyable { private: - /// To avoid serialize the same schema between continuous ColumnFileInMemory and ColumnFileTiny instance. - BlockPtr last_schema; - // Note that we must update `column_files_count` for outer thread-safe after `column_files` changed ColumnFiles column_files; // TODO: check the proper memory_order when use this atomic variable @@ -52,9 +49,8 @@ class MemTableSet : public std::enable_shared_from_this void appendColumnFileInner(const ColumnFilePtr & column_file); public: - explicit MemTableSet(const BlockPtr & last_schema_, const ColumnFiles & in_memory_files = {}) - : last_schema(last_schema_) - , column_files(in_memory_files) + explicit MemTableSet(const ColumnFiles & in_memory_files = {}) + : column_files(in_memory_files) , log(Logger::get()) { column_files_count = column_files.size(); @@ -63,14 +59,6 @@ class MemTableSet : public std::enable_shared_from_this rows += file->getRows(); bytes += file->getBytes(); deletes += file->getDeletes(); - if (auto * m_file = file->tryToInMemoryFile(); m_file) - { - last_schema = m_file->getSchema(); - } - else if (auto * t_file = file->tryToTinyFile(); t_file) - { - last_schema = t_file->getSchema(); - } } } diff --git a/dbms/src/Storages/DeltaMerge/Delta/MinorCompaction.cpp b/dbms/src/Storages/DeltaMerge/Delta/MinorCompaction.cpp index 6d6164279fa..90959527d30 100644 --- a/dbms/src/Storages/DeltaMerge/Delta/MinorCompaction.cpp +++ b/dbms/src/Storages/DeltaMerge/Delta/MinorCompaction.cpp @@ -35,7 +35,7 @@ void MinorCompaction::prepare(DMContext & context, WriteBatches & wbs, const Pag if (task.is_trivial_move) continue; - auto & schema = *(task.to_compact[0]->tryToTinyFile()->getSchema()); + const auto & schema = task.to_compact[0]->tryToTinyFile()->getSchema()->getSchema(); auto compact_columns = schema.cloneEmptyColumns(); for (auto & file : task.to_compact) { @@ -55,7 +55,7 @@ void MinorCompaction::prepare(DMContext & context, WriteBatches & wbs, const Pag } Block compact_block = schema.cloneWithColumns(std::move(compact_columns)); auto compact_rows = compact_block.rows(); - auto compact_column_file = ColumnFileTiny::writeColumnFile(context, compact_block, 0, compact_rows, wbs, task.to_compact.front()->tryToTinyFile()->getSchema()); + auto compact_column_file = ColumnFileTiny::writeColumnFile(context, compact_block, 0, compact_rows, wbs); wbs.writeLogAndData(); task.result = compact_column_file; diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.cpp index 1ec0785f29b..db6d9c9f814 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.cpp @@ -14,11 +14,42 @@ #include #include +#include namespace DB { namespace DM { +using Digest = UInt256; +Digest hashSchema(const Block & schema) +{ + SHA256_CTX ctx; + SHA256_Init(&ctx); + unsigned char digest_bytes[32]; + + const auto & data = schema.getColumnsWithTypeAndName(); + for (const auto & column_with_type_and_name : data) + { + // for type infos, we should use getName() instead of getTypeId(), + // because for all nullable types, getTypeId() will always return TypeIndex::Nullable in getTypeId() + // but getName() will return the real type name, e.g. Nullable(UInt64), Nullable(datetime(6)) + const auto & type = column_with_type_and_name.type->getName(); + SHA256_Update(&ctx, reinterpret_cast(type.c_str()), type.size()); + + const auto & name = column_with_type_and_name.name; + SHA256_Update(&ctx, reinterpret_cast(name.c_str()), name.size()); + + const auto & column_id = column_with_type_and_name.column_id; + SHA256_Update(&ctx, reinterpret_cast(&column_id), sizeof(column_id)); + + const auto & default_value = column_with_type_and_name.default_value.toString(); + SHA256_Update(&ctx, reinterpret_cast(default_value.c_str()), default_value.size()); + } + + SHA256_Final(digest_bytes, &ctx); + return *(reinterpret_cast(&digest_bytes)); +} + void convertColumn(Block & block, size_t pos, const DataTypePtr & to_type, const Context & context) { const IDataType * to_type_ptr = to_type.get(); diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.h b/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.h index ad585d684c7..24b6f5efd06 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.h +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.h @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include + #include #pragma once @@ -245,6 +247,9 @@ inline bool isSameSchema(const Block & a, const Block & b) return true; } +using Digest = UInt256; +Digest hashSchema(const Block & schema); + /// This method guarantees that the returned valid block is not empty. inline Block readNextBlock(const BlockInputStreamPtr & in) { diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp index e54c63bf283..753184af622 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp @@ -561,8 +561,15 @@ void DeltaMergeStore::write(const Context & db_context, const DB::Settings & db_ // The [offset, rows - offset] can be exceeding the Segment's rowkey_range. Cut the range // to fit the segment. auto [cur_offset, cur_limit] = rowkey_range.getPosRange(handle_column, offset, rows - offset); - if (unlikely(cur_offset != offset)) - throw Exception(fmt::format("cur_offset does not equal to offset. is_common_handle {} start_key {} cur_offset {} cur_limit {} rows {} offset {} rowkey_range {}", is_common_handle, start_key.toRowKeyValue().toString(), cur_offset, cur_limit, rows, offset, rowkey_range.toDebugString()), ErrorCodes::LOGICAL_ERROR); + RUNTIME_CHECK_MSG(cur_offset == offset && cur_limit != 0, + "invalid cur_offset or cur_limit. is_common_handle={} start_key={} cur_offset={} cur_limit={} rows={} offset={} rowkey_range={}", + is_common_handle, + start_key.toRowKeyValue().toString(), + cur_offset, + cur_limit, + rows, + offset, + rowkey_range.toDebugString()); limit = cur_limit; auto alloc_bytes = block.bytes(offset, limit); diff --git a/dbms/src/Storages/DeltaMerge/File/DMFile.cpp b/dbms/src/Storages/DeltaMerge/File/DMFile.cpp index 674f66c05fb..c3dee70e21a 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFile.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFile.cpp @@ -55,9 +55,9 @@ inline constexpr static const char * DATA_FILE_SUFFIX = ".dat"; inline constexpr static const char * INDEX_FILE_SUFFIX = ".idx"; inline constexpr static const char * MARK_FILE_SUFFIX = ".mrk"; -inline String getNGCPath(const String & prefix, bool is_single_mode) +inline String getNGCPath(const String & prefix) { - return prefix + (is_single_mode ? "." : "/") + NGC_FILE_NAME; + return prefix + "/" + NGC_FILE_NAME; } } // namespace details @@ -83,9 +83,9 @@ String DMFile::getPathByStatus(const String & parent_path, UInt64 file_id, DMFil return s; } -String DMFile::getNGCPath(const String & parent_path, UInt64 file_id, DMFile::Status status, bool is_single_mode) +String DMFile::getNGCPath(const String & parent_path, UInt64 file_id, DMFile::Status status) { - return details::getNGCPath(getPathByStatus(parent_path, file_id, status), is_single_mode); + return details::getNGCPath(getPathByStatus(parent_path, file_id, status)); } // @@ -97,17 +97,16 @@ String DMFile::path() const String DMFile::ngcPath() const { - return getNGCPath(parent_path, file_id, status, isSingleFileMode()); + return getNGCPath(parent_path, file_id, status); } -DMFilePtr DMFile::create(UInt64 file_id, const String & parent_path, bool single_file_mode, DMConfigurationOpt configuration) +DMFilePtr DMFile::create(UInt64 file_id, const String & parent_path, DMConfigurationOpt configuration) { Poco::Logger * log = &Poco::Logger::get("DMFile"); // On create, ref_id is the same as file_id. DMFilePtr new_dmfile(new DMFile(file_id, file_id, parent_path, - single_file_mode ? Mode::SINGLE_FILE : Mode::FOLDER, Status::WRITABLE, log, std::move(configuration))); @@ -119,26 +118,13 @@ DMFilePtr DMFile::create(UInt64 file_id, const String & parent_path, bool single file.remove(true); LOG_WARNING(log, "Existing dmfile, removed: {}", path); } - if (single_file_mode) - { - Poco::File parent(parent_path); - parent.createDirectories(); - // Create a mark file to stop this dmfile from being removed by GC. - // We should create NGC file before creating the file under single file mode, - // or the file may be removed. - // FIXME : this should not use PageUtils. - PageUtil::touchFile(new_dmfile->ngcPath()); - PageUtil::touchFile(path); - } - else - { - file.createDirectories(); - // Create a mark file to stop this dmfile from being removed by GC. - // We should create NGC file after creating the directory under folder mode - // since the NGC file is a file under the folder. - // FIXME : this should not use PageUtils. - PageUtil::touchFile(new_dmfile->ngcPath()); - } + + file.createDirectories(); + // Create a mark file to stop this dmfile from being removed by GC. + // We should create NGC file after creating the directory under folder mode + // since the NGC file is a file under the folder. + // FIXME : this should not use PageUtils. + PageUtil::touchFile(new_dmfile->ngcPath()); return new_dmfile; } @@ -156,20 +142,15 @@ DMFilePtr DMFile::restore( if (!poco_file.exists()) return nullptr; - bool single_file_mode = poco_file.isFile(); DMFilePtr dmfile(new DMFile( file_id, page_id, parent_path, - single_file_mode ? Mode::SINGLE_FILE : Mode::FOLDER, Status::READABLE, &Poco::Logger::get("DMFile"))); if (!read_meta_mode.isNone()) { - if (!single_file_mode) - { - dmfile->readConfiguration(file_provider); - } + dmfile->readConfiguration(file_provider); dmfile->readMetadata(file_provider, read_meta_mode); } return dmfile; @@ -177,39 +158,17 @@ DMFilePtr DMFile::restore( String DMFile::colIndexCacheKey(const FileNameBase & file_name_base) const { - if (isSingleFileMode()) - { - return path() + "/" + DMFile::colIndexFileName(file_name_base); - } - else - { - return colIndexPath(file_name_base); - } + return colIndexPath(file_name_base); } String DMFile::colMarkCacheKey(const FileNameBase & file_name_base) const { - if (isSingleFileMode()) - { - return path() + "/" + DMFile::colMarkFileName(file_name_base); - } - else - { - return colMarkPath(file_name_base); - } + return colMarkPath(file_name_base); } bool DMFile::isColIndexExist(const ColId & col_id) const { - if (isSingleFileMode()) - { - const auto index_identifier = DMFile::colIndexFileName(DMFile::getFileNameBase(col_id)); - return isSubFileExists(index_identifier); - } - else - { - return column_indices.count(col_id) != 0; - } + return column_indices.count(col_id) != 0; } String DMFile::encryptionBasePath() const @@ -220,37 +179,37 @@ String DMFile::encryptionBasePath() const EncryptionPath DMFile::encryptionDataPath(const FileNameBase & file_name_base) const { - return EncryptionPath(encryptionBasePath(), isSingleFileMode() ? "" : file_name_base + details::DATA_FILE_SUFFIX); + return EncryptionPath(encryptionBasePath(), file_name_base + details::DATA_FILE_SUFFIX); } EncryptionPath DMFile::encryptionIndexPath(const FileNameBase & file_name_base) const { - return EncryptionPath(encryptionBasePath(), isSingleFileMode() ? "" : file_name_base + details::INDEX_FILE_SUFFIX); + return EncryptionPath(encryptionBasePath(), file_name_base + details::INDEX_FILE_SUFFIX); } EncryptionPath DMFile::encryptionMarkPath(const FileNameBase & file_name_base) const { - return EncryptionPath(encryptionBasePath(), isSingleFileMode() ? "" : file_name_base + details::MARK_FILE_SUFFIX); + return EncryptionPath(encryptionBasePath(), file_name_base + details::MARK_FILE_SUFFIX); } EncryptionPath DMFile::encryptionMetaPath() const { - return EncryptionPath(encryptionBasePath(), isSingleFileMode() ? "" : metaFileName()); + return EncryptionPath(encryptionBasePath(), metaFileName()); } EncryptionPath DMFile::encryptionPackStatPath() const { - return EncryptionPath(encryptionBasePath(), isSingleFileMode() ? "" : packStatFileName()); + return EncryptionPath(encryptionBasePath(), packStatFileName()); } EncryptionPath DMFile::encryptionPackPropertyPath() const { - return EncryptionPath(encryptionBasePath(), isSingleFileMode() ? "" : packPropertyFileName()); + return EncryptionPath(encryptionBasePath(), packPropertyFileName()); } EncryptionPath DMFile::encryptionConfigurationPath() const { - return EncryptionPath(encryptionBasePath(), isSingleFileMode() ? "" : configurationFileName()); + return EncryptionPath(encryptionBasePath(), configurationFileName()); } String DMFile::colDataFileName(const FileNameBase & file_name_base) @@ -383,10 +342,6 @@ void DMFile::writeMetadata(const FileProviderPtr & file_provider, const WriteLim void DMFile::upgradeMetaIfNeed(const FileProviderPtr & file_provider, DMFileFormat::Version ver) { - if (unlikely(mode != Mode::FOLDER)) - { - throw DB::TiFlashException("upgradeMetaIfNeed is only expected to be called when mode is FOLDER.", Errors::DeltaTree::Internal); - } if (unlikely(ver == DMFileFormat::V0)) { // Update ColumnStat.serialized_bytes @@ -450,17 +405,13 @@ void DMFile::readColumnStat(const FileProviderPtr & file_provider, const MetaPac assertString("\n", *buf); readText(column_stats, ver, *buf); - // No need to upgrade meta when mode is Mode::SINGLE_FILE - if (mode == Mode::FOLDER) + // for V2, we do not apply in-place upgrade for now + // but it should not affect the normal read procedure + if (unlikely(ver >= DMFileFormat::V2 && !configuration)) { - // for V2, we do not apply in-place upgrade for now - // but it should not affect the normal read procedure - if (unlikely(ver >= DMFileFormat::V2 && !configuration)) - { - throw TiFlashException("configuration expected but not loaded", Errors::Checksum::Missing); - } - upgradeMetaIfNeed(file_provider, ver); + throw TiFlashException("configuration expected but not loaded", Errors::Checksum::Missing); } + upgradeMetaIfNeed(file_provider, ver); } void DMFile::readPackStat(const FileProviderPtr & file_provider, const MetaPackInfo & meta_pack_info) @@ -543,61 +494,30 @@ void DMFile::readPackProperty(const FileProviderPtr & file_provider, const MetaP void DMFile::readMetadata(const FileProviderPtr & file_provider, const ReadMetaMode & read_meta_mode) { Footer footer; - if (isSingleFileMode()) + + if (read_meta_mode.isAll()) { - // Read the `Footer` part from disk and init `sub_file_stat` - /// TODO: Redesign the file format for single file mode (https://github.com/pingcap/tics/issues/1798) - Poco::File file(path()); - ReadBufferFromFileProvider buf(file_provider, path(), EncryptionPath(encryptionBasePath(), "")); - - buf.seek(file.getSize() - sizeof(Footer), SEEK_SET); - DB::readIntBinary(footer.meta_pack_info.pack_property_offset, buf); - DB::readIntBinary(footer.meta_pack_info.pack_property_size, buf); - DB::readIntBinary(footer.meta_pack_info.column_stat_offset, buf); - DB::readIntBinary(footer.meta_pack_info.column_stat_size, buf); - DB::readIntBinary(footer.meta_pack_info.pack_stat_offset, buf); - DB::readIntBinary(footer.meta_pack_info.pack_stat_size, buf); - DB::readIntBinary(footer.sub_file_stat_offset, buf); - DB::readIntBinary(footer.sub_file_num, buf); - // initialize sub file state - buf.seek(footer.sub_file_stat_offset, SEEK_SET); - SubFileStat sub_file_stat{}; - for (UInt32 i = 0; i < footer.sub_file_num; i++) - { - String name; - DB::readStringBinary(name, buf); - DB::readIntBinary(sub_file_stat.offset, buf); - DB::readIntBinary(sub_file_stat.size, buf); - sub_file_stats.emplace(name, sub_file_stat); - } + initializeIndices(); } - else - { - if (read_meta_mode.isAll()) + if (auto file = Poco::File(packPropertyPath()); file.exists()) + footer.meta_pack_info.pack_property_size = file.getSize(); + + auto recheck = [&](size_t size) { + if (this->configuration) { - initializeSubFileStatsForFolderMode(); - initializeIndices(); + auto total_size = this->configuration->getChecksumFrameLength() + this->configuration->getChecksumHeaderLength(); + auto frame_count = size / total_size + + (0 != size % total_size); + size -= frame_count * this->configuration->getChecksumHeaderLength(); } - if (auto file = Poco::File(packPropertyPath()); file.exists()) - footer.meta_pack_info.pack_property_size = file.getSize(); - - auto recheck = [&](size_t size) { - if (this->configuration) - { - auto total_size = this->configuration->getChecksumFrameLength() + this->configuration->getChecksumHeaderLength(); - auto frame_count = size / total_size - + (0 != size % total_size); - size -= frame_count * this->configuration->getChecksumHeaderLength(); - } - return size; - }; + return size; + }; - if (auto file = Poco::File(packPropertyPath()); file.exists()) - footer.meta_pack_info.pack_property_size = file.getSize(); + if (auto file = Poco::File(packPropertyPath()); file.exists()) + footer.meta_pack_info.pack_property_size = file.getSize(); - footer.meta_pack_info.column_stat_size = Poco::File(metaPath()).getSize(); - footer.meta_pack_info.pack_stat_size = recheck(Poco::File(packStatPath()).getSize()); - } + footer.meta_pack_info.column_stat_size = Poco::File(metaPath()).getSize(); + footer.meta_pack_info.pack_stat_size = recheck(Poco::File(packStatPath()).getSize()); if (read_meta_mode.needPackProperty() && footer.meta_pack_info.pack_property_size != 0) readPackProperty(file_provider, footer.meta_pack_info); @@ -635,52 +555,9 @@ void DMFile::finalizeForFolderMode(const FileProviderPtr & file_provider, const LOG_WARNING(log, "Existing dmfile, removed: {}", deleted_path); } old_file.renameTo(new_path); - initializeSubFileStatsForFolderMode(); initializeIndices(); } -void DMFile::finalizeForSingleFileMode(WriteBuffer & buffer) -{ - Footer footer; - std::tie(footer.meta_pack_info.pack_property_offset, footer.meta_pack_info.pack_property_size) = writePackPropertyToBuffer(buffer); - std::tie(footer.meta_pack_info.column_stat_offset, footer.meta_pack_info.column_stat_size) = writeMetaToBuffer(buffer); - std::tie(footer.meta_pack_info.pack_stat_offset, footer.meta_pack_info.pack_stat_size) = writePackStatToBuffer(buffer); - - footer.sub_file_stat_offset = buffer.count(); - footer.sub_file_num = sub_file_stats.size(); - for (auto & iter : sub_file_stats) - { - writeStringBinary(iter.first, buffer); - writeIntBinary(iter.second.offset, buffer); - writeIntBinary(iter.second.size, buffer); - } - writeIntBinary(footer.meta_pack_info.pack_property_offset, buffer); - writeIntBinary(footer.meta_pack_info.pack_property_size, buffer); - writeIntBinary(footer.meta_pack_info.column_stat_offset, buffer); - writeIntBinary(footer.meta_pack_info.column_stat_size, buffer); - writeIntBinary(footer.meta_pack_info.pack_stat_offset, buffer); - writeIntBinary(footer.meta_pack_info.pack_stat_size, buffer); - writeIntBinary(footer.sub_file_stat_offset, buffer); - writeIntBinary(footer.sub_file_num, buffer); - writeIntBinary(static_cast>(footer.file_format_version), buffer); - buffer.next(); - if (status != Status::WRITING) - throw Exception(fmt::format("Expected WRITING status, now {}", statusString(status))); - Poco::File old_file(path()); - Poco::File old_ngc_file(ngcPath()); - - setStatus(Status::READABLE); - - auto new_path = path(); - Poco::File file(new_path); - if (file.exists()) - file.remove(); - Poco::File new_ngc_file(ngcPath()); - new_ngc_file.createFile(); - old_file.renameTo(new_path); - old_ngc_file.remove(); -} - std::set DMFile::listAllInPath( const FileProviderPtr & file_provider, const String & parent_path, @@ -752,7 +629,7 @@ std::set DMFile::listAllInPath( // Only return the ID if the file is able to be GC-ed. const auto file_path = parent_path + "/" + name; Poco::File file(file_path); - String ngc_path = details::getNGCPath(file_path, file.isFile()); + String ngc_path = details::getNGCPath(file_path); Poco::File ngc_file(ngc_path); if (!ngc_file.exists()) file_ids.insert(file_id); @@ -779,47 +656,21 @@ void DMFile::enableGC() void DMFile::remove(const FileProviderPtr & file_provider) { - if (isSingleFileMode()) + // If we use `FileProvider::deleteDirectory`, it may left a broken DMFile on disk. + // By renaming DMFile with a prefix first, even if there are broken DMFiles left, + // we can safely clean them when `DMFile::listAllInPath` is called. + const String dir_path = path(); + if (Poco::File dir_file(dir_path); dir_file.exists()) { - file_provider->deleteRegularFile(path(), EncryptionPath(encryptionBasePath(), "")); - } - else - { - // If we use `FileProvider::deleteDirectory`, it may left a broken DMFile on disk. - // By renaming DMFile with a prefix first, even if there are broken DMFiles left, - // we can safely clean them when `DMFile::listAllInPath` is called. - const String dir_path = path(); - if (Poco::File dir_file(dir_path); dir_file.exists()) - { - setStatus(Status::DROPPED); - const String deleted_path = path(); - // Rename the directory first (note that we should do it before deleting encryption info) - dir_file.renameTo(deleted_path); - FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::exception_before_dmfile_remove_encryption); - file_provider->deleteEncryptionInfo(EncryptionPath(encryptionBasePath(), "")); - FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::exception_before_dmfile_remove_from_disk); - // Then clean the files on disk - dir_file.remove(true); - } - } -} - -void DMFile::initializeSubFileStatsForFolderMode() -{ - if (isSingleFileMode()) - return; - - Poco::File directory{path()}; - std::vector sub_files{}; - directory.list(sub_files); - for (const auto & name : sub_files) - { - if (endsWith(name, details::DATA_FILE_SUFFIX) || endsWith(name, details::INDEX_FILE_SUFFIX) - || endsWith(name, details::MARK_FILE_SUFFIX)) - { - auto size = Poco::File(path() + "/" + name).getSize(); - sub_file_stats.emplace(name, SubFileStat{0, size}); - } + setStatus(Status::DROPPED); + const String deleted_path = path(); + // Rename the directory first (note that we should do it before deleting encryption info) + dir_file.renameTo(deleted_path); + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::exception_before_dmfile_remove_encryption); + file_provider->deleteEncryptionInfo(EncryptionPath(encryptionBasePath(), "")); + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::exception_before_dmfile_remove_from_disk); + // Then clean the files on disk + dir_file.remove(true); } } @@ -840,8 +691,6 @@ void DMFile::initializeIndices() throw DB::Exception(fmt::format("invalid ColId: {} from file: {}", err.what(), data)); } }; - if (isSingleFileMode()) - return; Poco::File directory{path()}; std::vector sub_files{}; diff --git a/dbms/src/Storages/DeltaMerge/File/DMFile.h b/dbms/src/Storages/DeltaMerge/File/DMFile.h index 22a7bd4ab55..06cf1f9502f 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFile.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFile.h @@ -51,12 +51,6 @@ using DMFiles = std::vector; class DMFile : private boost::noncopyable { public: - enum Mode : int - { - SINGLE_FILE, - FOLDER, - }; - enum Status : int { WRITABLE, @@ -65,11 +59,6 @@ class DMFile : private boost::noncopyable DROPPED, }; - enum DMSingleFileFormatVersion : int - { - SINGLE_FILE_VERSION_BASE = 0, - }; - static String statusString(Status status) { switch (status) @@ -178,12 +167,9 @@ class DMFile : private boost::noncopyable UInt64 sub_file_stat_offset; UInt32 sub_file_num; - DMSingleFileFormatVersion file_format_version; - Footer() : sub_file_stat_offset(0) , sub_file_num(0) - , file_format_version(DMSingleFileFormatVersion::SINGLE_FILE_VERSION_BASE) {} }; @@ -192,7 +178,7 @@ class DMFile : private boost::noncopyable using PackProperties = dtpb::PackProperties; static DMFilePtr - create(UInt64 file_id, const String & parent_path, bool single_file_mode = false, DMConfigurationOpt configuration = std::nullopt); + create(UInt64 file_id, const String & parent_path, DMConfigurationOpt configuration = std::nullopt); static DMFilePtr restore( const FileProviderPtr & file_provider, @@ -212,7 +198,7 @@ class DMFile : private boost::noncopyable // static helper function for getting path static String getPathByStatus(const String & parent_path, UInt64 file_id, DMFile::Status status); - static String getNGCPath(const String & parent_path, UInt64 file_id, DMFile::Status status, bool is_single_mode); + static String getNGCPath(const String & parent_path, UInt64 file_id, DMFile::Status status); bool canGC(); void enableGC(); @@ -266,7 +252,6 @@ class DMFile : private boost::noncopyable throw Exception("Column [" + DB::toString(col_id) + "] not found in dm file [" + path() + "]"); } bool isColumnExist(ColId col_id) const { return column_stats.find(col_id) != column_stats.end(); } - bool isSingleFileMode() const { return mode == Mode::SINGLE_FILE; } /* * TODO: This function is currently unused. We could use it when: @@ -301,22 +286,18 @@ class DMFile : private boost::noncopyable DMFile(UInt64 file_id_, UInt64 page_id_, String parent_path_, - Mode mode_, Status status_, Poco::Logger * log_, DMConfigurationOpt configuration_ = std::nullopt) : file_id(file_id_) , page_id(page_id_) , parent_path(std::move(parent_path_)) - , mode(mode_) , status(status_) , configuration(std::move(configuration_)) , log(log_) { } - bool isFolderMode() const { return mode == Mode::FOLDER; } - // Do not gc me. String ngcPath() const; String metaPath() const { return subFilePath(metaFileName()); } @@ -325,6 +306,10 @@ class DMFile : private boost::noncopyable String configurationPath() const { return subFilePath(configurationFileName()); } using FileNameBase = String; + size_t colIndexSize(const FileNameBase & file_name_base) { return Poco::File(colIndexPath(file_name_base)).getSize(); } + size_t colMarkSize(const FileNameBase & file_name_base) { return Poco::File(colMarkPath(file_name_base)).getSize(); } + size_t colDataSize(const FileNameBase & file_name_base) { return Poco::File(colDataPath(file_name_base)).getSize(); } + String colDataPath(const FileNameBase & file_name_base) const { return subFilePath(colDataFileName(file_name_base)); } String colIndexPath(const FileNameBase & file_name_base) const { return subFilePath(colIndexFileName(file_name_base)); } String colMarkPath(const FileNameBase & file_name_base) const { return subFilePath(colMarkFileName(file_name_base)); } @@ -332,12 +317,6 @@ class DMFile : private boost::noncopyable String colIndexCacheKey(const FileNameBase & file_name_base) const; String colMarkCacheKey(const FileNameBase & file_name_base) const; - size_t colIndexOffset(const FileNameBase & file_name_base) const { return subFileOffset(colIndexFileName(file_name_base)); } - size_t colMarkOffset(const FileNameBase & file_name_base) const { return subFileOffset(colMarkFileName(file_name_base)); } - size_t colIndexSize(const FileNameBase & file_name_base) const { return subFileSize(colIndexFileName(file_name_base)); } - size_t colMarkSize(const FileNameBase & file_name_base) const { return subFileSize(colMarkFileName(file_name_base)); } - size_t colDataSize(const FileNameBase & file_name_base) const { return subFileSize(colDataFileName(file_name_base)); } - bool isColIndexExist(const ColId & col_id) const; String encryptionBasePath() const; @@ -388,19 +367,8 @@ class DMFile : private boost::noncopyable void setStatus(Status status_) { status = status_; } void finalizeForFolderMode(const FileProviderPtr & file_provider, const WriteLimiterPtr & write_limiter); - void finalizeForSingleFileMode(WriteBuffer & buffer); - - void addSubFileStat(const String & name, UInt64 offset, UInt64 size) { sub_file_stats.emplace(name, SubFileStat{offset, size}); } - - bool isSubFileExists(const String & name) const { return sub_file_stats.find(name) != sub_file_stats.end(); } - - String subFilePath(const String & file_name) const { return isSingleFileMode() ? path() : path() + "/" + file_name; } - - size_t subFileOffset(const String & file_name) const { return isSingleFileMode() ? sub_file_stats.at(file_name).offset : 0; } - - size_t subFileSize(const String & file_name) const { return sub_file_stats.at(file_name).size; } - void initializeSubFileStatsForFolderMode(); + String subFilePath(const String & file_name) const { return path() + "/" + file_name; } void initializeIndices(); @@ -416,7 +384,6 @@ class DMFile : private boost::noncopyable ColumnStats column_stats; std::unordered_set column_indices; - Mode mode; Status status; DMConfigurationOpt configuration; // configuration diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileBlockOutputStream.h b/dbms/src/Storages/DeltaMerge/File/DMFileBlockOutputStream.h index 3329dda14d4..89b6fe2627b 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileBlockOutputStream.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFileBlockOutputStream.h @@ -32,12 +32,9 @@ namespace DM class DMFileBlockOutputStream { public: - using Flags = DMFileWriter::Flags; - DMFileBlockOutputStream(const Context & context, const DMFilePtr & dmfile, - const ColumnDefines & write_columns, - const Flags flags = Flags()) + const ColumnDefines & write_columns) : writer( dmfile, write_columns, @@ -46,8 +43,7 @@ class DMFileBlockOutputStream DMFileWriter::Options{ CompressionSettings(context.getSettingsRef().dt_compression_method, context.getSettingsRef().dt_compression_level), context.getSettingsRef().min_compress_block_size, - context.getSettingsRef().max_compress_block_size, - flags}) + context.getSettingsRef().max_compress_block_size}) { } diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h index 25e781cf4d5..6d631a0e6e5 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h @@ -246,7 +246,6 @@ class DMFilePackFilter dmfile->encryptionIndexPath(file_name_base), std::min(static_cast(DBMS_DEFAULT_BUFFER_SIZE), index_file_size), read_limiter); - index_buf.seek(dmfile->colIndexOffset(file_name_base)); return MinMaxIndex::read(*type, index_buf, dmfile->colIndexSize(file_name_base)); } else @@ -258,7 +257,6 @@ class DMFilePackFilter read_limiter, dmfile->configuration->getChecksumAlgorithm(), dmfile->configuration->getChecksumFrameLength()); - index_buf->seek(dmfile->colIndexOffset(file_name_base)); auto header_size = dmfile->configuration->getChecksumHeaderLength(); auto frame_total_size = dmfile->configuration->getChecksumFrameLength() + header_size; auto frame_count = index_file_size / frame_total_size + (index_file_size % frame_total_size != 0); diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp index b07b8ce199d..5534f260a21 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp @@ -51,67 +51,39 @@ DMFileReader::Stream::Stream( size_t max_read_buffer_size, const LoggerPtr & log, const ReadLimiterPtr & read_limiter) - : single_file_mode(reader.single_file_mode) - , avg_size_hint(reader.dmfile->getColumnStat(col_id).avg_size) + : avg_size_hint(reader.dmfile->getColumnStat(col_id).avg_size) { // load mark data - if (reader.single_file_mode) - { - auto mark_with_size_load = [&]() -> MarkWithSizesInCompressedFilePtr { - auto res = std::make_shared(reader.dmfile->getPacks()); - if (res->empty()) // 0 rows. - return res; - size_t size = sizeof(MarkWithSizeInCompressedFile) * reader.dmfile->getPacks(); + auto mark_load = [&]() -> MarksInCompressedFilePtr { + auto res = std::make_shared(reader.dmfile->getPacks()); + if (res->empty()) // 0 rows. + return res; + size_t size = sizeof(MarkInCompressedFile) * reader.dmfile->getPacks(); + if (reader.dmfile->configuration) + { + auto buffer = createReadBufferFromFileBaseByFileProvider( + reader.file_provider, + reader.dmfile->colMarkPath(file_name_base), + reader.dmfile->encryptionMarkPath(file_name_base), + reader.dmfile->getConfiguration()->getChecksumFrameLength(), + read_limiter, + reader.dmfile->getConfiguration()->getChecksumAlgorithm(), + reader.dmfile->getConfiguration()->getChecksumFrameLength()); + buffer->readBig(reinterpret_cast(res->data()), size); + } + else + { auto file = reader.file_provider->newRandomAccessFile(reader.dmfile->colMarkPath(file_name_base), - reader.dmfile->encryptionMarkPath(file_name_base), - nullptr, - -1); - auto mark_size = reader.dmfile->colMarkSize(file_name_base); - auto mark_offset = reader.dmfile->colMarkOffset(file_name_base); - if (unlikely(mark_size != size)) - { - throw DB::TiFlashException("Bad DMFile format, expected mark file content size: " + std::to_string(size) - + " vs. actual: " + std::to_string(mark_size), - Errors::DeltaTree::Internal); - } - PageUtil::readFile(file, mark_offset, reinterpret_cast(res->data()), size, read_limiter); + reader.dmfile->encryptionMarkPath(file_name_base)); + PageUtil::readFile(file, 0, reinterpret_cast(res->data()), size, read_limiter); + } + return res; + }; - return res; - }; - mark_with_sizes = mark_with_size_load(); - } + if (reader.mark_cache) + marks = reader.mark_cache->getOrSet(reader.dmfile->colMarkCacheKey(file_name_base), mark_load); else - { - auto mark_load = [&]() -> MarksInCompressedFilePtr { - auto res = std::make_shared(reader.dmfile->getPacks()); - if (res->empty()) // 0 rows. - return res; - size_t size = sizeof(MarkInCompressedFile) * reader.dmfile->getPacks(); - if (reader.dmfile->configuration) - { - auto buffer = createReadBufferFromFileBaseByFileProvider( - reader.file_provider, - reader.dmfile->colMarkPath(file_name_base), - reader.dmfile->encryptionMarkPath(file_name_base), - reader.dmfile->getConfiguration()->getChecksumFrameLength(), - read_limiter, - reader.dmfile->getConfiguration()->getChecksumAlgorithm(), - reader.dmfile->getConfiguration()->getChecksumFrameLength()); - buffer->readBig(reinterpret_cast(res->data()), size); - } - else - { - auto file = reader.file_provider->newRandomAccessFile(reader.dmfile->colMarkPath(file_name_base), - reader.dmfile->encryptionMarkPath(file_name_base)); - PageUtil::readFile(file, 0, reinterpret_cast(res->data()), size, read_limiter); - } - return res; - }; - if (reader.mark_cache) - marks = reader.mark_cache->getOrSet(reader.dmfile->colMarkCacheKey(file_name_base), mark_load); - else - marks = mark_load(); - } + marks = mark_load(); const String data_path = reader.dmfile->colDataPath(file_name_base); size_t data_file_size = reader.dmfile->colDataSize(file_name_base); @@ -120,19 +92,7 @@ DMFileReader::Stream::Stream( size_t estimated_size = 0; const auto & use_packs = reader.pack_filter.getUsePacks(); - if (reader.single_file_mode) - { - for (size_t i = 0; i < packs; i++) - { - if (!use_packs[i]) - { - continue; - } - buffer_size = std::max(buffer_size, (*mark_with_sizes)[i].mark_size); - estimated_size += (*mark_with_sizes)[i].mark_size; - } - } - else if (!reader.dmfile->configuration) + if (!reader.dmfile->configuration) { for (size_t i = 0; i < packs;) { @@ -170,10 +130,7 @@ DMFileReader::Stream::Stream( else { auto filename = reader.dmfile->colDataFileName(file_name_base); - auto iterator = reader.dmfile->sub_file_stats.find(filename); - estimated_size = iterator != reader.dmfile->sub_file_stats.end() - ? iterator->second.size - : reader.dmfile->configuration->getChecksumFrameLength(); + estimated_size = Poco::File(reader.dmfile->subFilePath(filename)).getSize(); } buffer_size = std::min(buffer_size, max_read_buffer_size); @@ -237,7 +194,6 @@ DMFileReader::DMFileReader( , read_columns(read_columns_) , is_common_handle(is_common_handle_) , read_one_pack_every_time(read_one_pack_every_time_) - , single_file_mode(dmfile_->isSingleFileMode()) , enable_handle_clean_read(enable_handle_clean_read_) , enable_del_clean_read(enable_del_clean_read_) , is_fast_scan(is_fast_scan_) @@ -332,9 +288,9 @@ Block DMFileReader::read() // Find max continuing rows we can read. size_t start_pack_id = next_pack_id; size_t start_row_offset = next_row_offset; - // When single_file_mode is true, or read_one_pack_every_time is true, we can just read one pack every time. + // When read_one_pack_every_time is true, we can just read one pack every time. // 0 means no limit - size_t read_pack_limit = (single_file_mode || read_one_pack_every_time) ? 1 : 0; + size_t read_pack_limit = read_one_pack_every_time ? 1 : 0; const auto & pack_stats = dmfile->getPackStats(); @@ -377,11 +333,6 @@ Block DMFileReader::read() size_t read_packs = next_pack_id - start_pack_id; - if (single_file_mode && read_packs != 1) - { - throw DB::TiFlashException("read_packs must be one when single_file_mode is true.", Errors::DeltaTree::Internal); - } - scan_context->total_dmfile_scanned_packs += read_packs; scan_context->total_dmfile_scanned_rows += read_rows; @@ -496,7 +447,7 @@ Block DMFileReader::read() rows_count += pack_stats[cursor].rows; } ColumnPtr col; - readColumn(cd, col, range.first, range.second - range.first, rows_count, skip_packs_by_column[i], single_file_mode); + readColumn(cd, col, range.first, range.second - range.first, rows_count, skip_packs_by_column[i]); column->insertRangeFrom(*col, 0, col->size()); skip_packs_by_column[i] = 0; } @@ -520,7 +471,7 @@ Block DMFileReader::read() { auto data_type = dmfile->getColumnStat(cd.id).type; ColumnPtr column; - readColumn(cd, column, start_pack_id, read_packs, read_rows, skip_packs_by_column[i], single_file_mode); + readColumn(cd, column, start_pack_id, read_packs, read_rows, skip_packs_by_column[i]); auto converted_column = convertColumnByColumnDefineIfNeed(data_type, std::move(column), cd); res.insert(ColumnWithTypeAndName{std::move(converted_column), cd.type, cd.name, cd.id}); @@ -595,14 +546,13 @@ void DMFileReader::readColumn(ColumnDefine & column_define, size_t start_pack_id, size_t pack_count, size_t read_rows, - size_t skip_packs, - bool force_seek) + size_t skip_packs) { if (!getCachedPacks(column_define.id, start_pack_id, pack_count, read_rows, column)) { auto data_type = dmfile->getColumnStat(column_define.id).type; auto col = data_type->createColumn(); - readFromDisk(column_define, col, start_pack_id, read_rows, skip_packs, force_seek || last_read_from_cache[column_define.id]); + readFromDisk(column_define, col, start_pack_id, read_rows, skip_packs, last_read_from_cache[column_define.id]); column = std::move(col); last_read_from_cache[column_define.id] = false; } diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileReader.h b/dbms/src/Storages/DeltaMerge/File/DMFileReader.h index 5ebde116815..e82749682fa 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileReader.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFileReader.h @@ -50,19 +50,17 @@ class DMFileReader const LoggerPtr & log, const ReadLimiterPtr & read_limiter); - const bool single_file_mode; double avg_size_hint; MarksInCompressedFilePtr marks; - MarkWithSizesInCompressedFilePtr mark_with_sizes; size_t getOffsetInFile(size_t i) const { - return single_file_mode ? (*mark_with_sizes)[i].mark.offset_in_compressed_file : (*marks)[i].offset_in_compressed_file; + return (*marks)[i].offset_in_compressed_file; } size_t getOffsetInDecompressedBlock(size_t i) const { - return single_file_mode ? (*mark_with_sizes)[i].mark.offset_in_decompressed_block : (*marks)[i].offset_in_decompressed_block; + return (*marks)[i].offset_in_decompressed_block; } std::unique_ptr buf; @@ -127,8 +125,7 @@ class DMFileReader size_t start_pack_id, size_t pack_count, size_t read_rows, - size_t skip_packs, - bool force_seek); + size_t skip_packs); bool getCachedPacks(ColId col_id, size_t start_pack_id, size_t pack_count, size_t read_rows, ColumnPtr & col); private: @@ -141,8 +138,6 @@ class DMFileReader // read_one_pack_every_time is used to create info for every pack const bool read_one_pack_every_time; - const bool single_file_mode{}; - /// Clean read optimize // In normal mode, if there is no delta for some packs in stable, we can try to do clean read (enable_handle_clean_read is true). // In fast mode, if we don't need handle column, we will try to do clean read on handle_column(enable_handle_clean_read is true). diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileWriter.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileWriter.cpp index 637181c1e91..e5b30a4949c 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileWriter.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileWriter.cpp @@ -34,37 +34,26 @@ DMFileWriter::DMFileWriter(const DMFilePtr & dmfile_, const DMFileWriter::Options & options_) : dmfile(dmfile_) , write_columns(write_columns_) - , options(options_, dmfile) + , options(options_) , // assume pack_stat_file is the first file created inside DMFile // it will create encryption info for the whole DMFile - pack_stat_file( - (options.flags.isSingleFile()) // - ? nullptr - : (dmfile->configuration ? createWriteBufferFromFileBaseByFileProvider( - file_provider_, - dmfile->packStatPath(), - dmfile->encryptionPackStatPath(), - true, - write_limiter_, - dmfile->configuration->getChecksumAlgorithm(), - dmfile->configuration->getChecksumFrameLength()) - : createWriteBufferFromFileBaseByFileProvider(file_provider_, - dmfile->packStatPath(), - dmfile->encryptionPackStatPath(), - true, - write_limiter_, - 0, - 0, - options.max_compress_block_size))) - , single_file_stream((!options.flags.isSingleFile()) - ? nullptr - : new SingleFileStream( - dmfile_, - options.compression_settings, - options.max_compress_block_size, - file_provider_, - write_limiter_)) + pack_stat_file(dmfile->configuration ? createWriteBufferFromFileBaseByFileProvider( + file_provider_, + dmfile->packStatPath(), + dmfile->encryptionPackStatPath(), + true, + write_limiter_, + dmfile->configuration->getChecksumAlgorithm(), + dmfile->configuration->getChecksumFrameLength()) + : createWriteBufferFromFileBaseByFileProvider(file_provider_, + dmfile->packStatPath(), + dmfile->encryptionPackStatPath(), + true, + write_limiter_, + 0, + 0, + options.max_compress_block_size)) , file_provider(file_provider_) , write_limiter(write_limiter_) { @@ -75,25 +64,7 @@ DMFileWriter::DMFileWriter(const DMFilePtr & dmfile_, /// for handle column always generate index auto type = removeNullable(cd.type); bool do_index = cd.id == EXTRA_HANDLE_COLUMN_ID || type->isInteger() || type->isDateOrDateTime(); - if (options.flags.isSingleFile()) - { - if (do_index) - { - const auto column_name = DMFile::getFileNameBase(cd.id, {}); - single_file_stream->minmax_indexs.emplace(column_name, std::make_shared(*cd.type)); - } - - auto callback = [&](const IDataType::SubstreamPath & substream_path) { - const auto stream_name = DMFile::getFileNameBase(cd.id, substream_path); - single_file_stream->column_data_sizes.emplace(stream_name, 0); - single_file_stream->column_mark_with_sizes.emplace(stream_name, SingleFileStream::MarkWithSizes{}); - }; - cd.type->enumerateStreams(callback, {}); - } - else - { - addStreams(cd.id, cd.type, do_index); - } + addStreams(cd.id, cd.type, do_index); dmfile->column_stats.emplace(cd.id, ColumnStat{cd.id, cd.type, /*avg_size=*/0}); } } @@ -141,10 +112,7 @@ void DMFileWriter::write(const Block & block, const BlockProperty & block_proper stat.first_tag = static_cast(col->get64(0)); } - if (!options.flags.isSingleFile()) - { - writePODBinary(stat, *pack_stat_file); - } + writePODBinary(stat, *pack_stat_file); dmfile->addPack(stat); @@ -157,145 +125,62 @@ void DMFileWriter::write(const Block & block, const BlockProperty & block_proper void DMFileWriter::finalize() { - if (!options.flags.isSingleFile()) - { - pack_stat_file->sync(); - } + pack_stat_file->sync(); for (auto & cd : write_columns) { finalizeColumn(cd.id, cd.type); } - if (options.flags.isSingleFile()) - { - dmfile->finalizeForSingleFileMode(single_file_stream->plain_layer); - single_file_stream->flush(); - } - else - { - dmfile->finalizeForFolderMode(file_provider, write_limiter); - } + dmfile->finalizeForFolderMode(file_provider, write_limiter); } void DMFileWriter::writeColumn(ColId col_id, const IDataType & type, const IColumn & column, const ColumnVector * del_mark) { size_t rows = column.size(); - if (options.flags.isSingleFile()) - { - auto callback = [&](const IDataType::SubstreamPath & substream) { - size_t offset_in_compressed_file = single_file_stream->plain_layer.count(); - const auto stream_name = DMFile::getFileNameBase(col_id, substream); - if (unlikely(substream.size() > 1)) - throw DB::TiFlashException("Substream_path shouldn't be more than one.", Errors::DeltaTree::Internal); - - auto & minmax_indexs = single_file_stream->minmax_indexs; - if (auto iter = minmax_indexs.find(stream_name); iter != minmax_indexs.end()) + type.enumerateStreams( + [&](const IDataType::SubstreamPath & substream) { + const auto name = DMFile::getFileNameBase(col_id, substream); + auto & stream = column_streams.at(name); + if (stream->minmaxes) { // For EXTRA_HANDLE_COLUMN_ID, we ignore del_mark when add minmax index. // Because we need all rows which satisfy a certain range when place delta index no matter whether the row is a delete row. // For TAG Column, we also ignore del_mark when add minmax index. - iter->second->addPack(column, (col_id == EXTRA_HANDLE_COLUMN_ID || col_id == TAG_COLUMN_ID) ? nullptr : del_mark); - } - - auto offset_in_compressed_block = single_file_stream->original_layer.offset(); - if (unlikely(offset_in_compressed_block != 0)) - throw DB::TiFlashException("Offset in compressed block is always expected to be 0 when single_file_mode is true, now " - + DB::toString(offset_in_compressed_block), - Errors::DeltaTree::Internal); - - // write column data - if (substream.empty()) - { - if (unlikely(type.isNullable())) - throw DB::TiFlashException("Type shouldn't be nullable when substream_path is empty.", Errors::DeltaTree::Internal); - - type.serializeBinaryBulk(column, single_file_stream->original_layer, 0, rows); - } - else if (substream[0].type == IDataType::Substream::NullMap) - { - if (unlikely(!type.isNullable())) - throw DB::TiFlashException( - "Type shouldn be nullable when substream_path's type is NullMap.", - Errors::DeltaTree::Internal); - - const auto & col = static_cast(column); - col.checkConsistency(); - DataTypeUInt8().serializeBinaryBulk(col.getNullMapColumn(), single_file_stream->original_layer, 0, rows); - } - else if (substream[0].type == IDataType::Substream::NullableElements) - { - if (unlikely(!type.isNullable())) - throw DB::TiFlashException( - "Type shouldn be nullable when substream_path's type is NullableElements.", - Errors::DeltaTree::Internal); - - const auto & nullable_type = static_cast(type); - const auto & col = static_cast(column); - nullable_type.getNestedType()->serializeBinaryBulk(col.getNestedColumn(), single_file_stream->original_layer, 0, rows); + stream->minmaxes->addPack(column, (col_id == EXTRA_HANDLE_COLUMN_ID || col_id == TAG_COLUMN_ID) ? nullptr : del_mark); } - else - { - throw DB::TiFlashException( - "Unknown type of substream_path: " + std::to_string(substream[0].type), - Errors::DeltaTree::Internal); - } - single_file_stream->flushCompressedData(); - size_t mark_size_in_file = single_file_stream->plain_layer.count() - offset_in_compressed_file; - single_file_stream->column_mark_with_sizes.at(stream_name) - .push_back(MarkWithSizeInCompressedFile{MarkInCompressedFile{.offset_in_compressed_file = offset_in_compressed_file, - .offset_in_decompressed_block = offset_in_compressed_block}, - mark_size_in_file}); - single_file_stream->column_data_sizes[stream_name] += mark_size_in_file; - }; - type.enumerateStreams(callback, {}); - } - else - { - type.enumerateStreams( - [&](const IDataType::SubstreamPath & substream) { - const auto name = DMFile::getFileNameBase(col_id, substream); - auto & stream = column_streams.at(name); - if (stream->minmaxes) - { - // For EXTRA_HANDLE_COLUMN_ID, we ignore del_mark when add minmax index. - // Because we need all rows which satisfy a certain range when place delta index no matter whether the row is a delete row. - // For TAG Column, we also ignore del_mark when add minmax index. - stream->minmaxes->addPack(column, (col_id == EXTRA_HANDLE_COLUMN_ID || col_id == TAG_COLUMN_ID) ? nullptr : del_mark); - } - - /// There could already be enough data to compress into the new block. - if (stream->compressed_buf->offset() >= options.min_compress_block_size) - stream->compressed_buf->next(); - auto offset_in_compressed_block = stream->compressed_buf->offset(); + /// There could already be enough data to compress into the new block. + if (stream->compressed_buf->offset() >= options.min_compress_block_size) + stream->compressed_buf->next(); - writeIntBinary(stream->plain_file->count(), *stream->mark_file); - writeIntBinary(offset_in_compressed_block, *stream->mark_file); - }, - {}); + auto offset_in_compressed_block = stream->compressed_buf->offset(); - type.serializeBinaryBulkWithMultipleStreams( - column, - [&](const IDataType::SubstreamPath & substream) { - const auto stream_name = DMFile::getFileNameBase(col_id, substream); - auto & stream = column_streams.at(stream_name); - return &(*stream->compressed_buf); - }, - 0, - rows, - true, - {}); + writeIntBinary(stream->plain_file->count(), *stream->mark_file); + writeIntBinary(offset_in_compressed_block, *stream->mark_file); + }, + {}); - type.enumerateStreams( - [&](const IDataType::SubstreamPath & substream) { - const auto name = DMFile::getFileNameBase(col_id, substream); - auto & stream = column_streams.at(name); - stream->compressed_buf->nextIfAtEnd(); - }, - {}); - } + type.serializeBinaryBulkWithMultipleStreams( + column, + [&](const IDataType::SubstreamPath & substream) { + const auto stream_name = DMFile::getFileNameBase(col_id, substream); + auto & stream = column_streams.at(stream_name); + return &(*stream->compressed_buf); + }, + 0, + rows, + true, + {}); + + type.enumerateStreams( + [&](const IDataType::SubstreamPath & substream) { + const auto name = DMFile::getFileNameBase(col_id, substream); + auto & stream = column_streams.at(name); + stream->compressed_buf->nextIfAtEnd(); + }, + {}); auto & avg_size = dmfile->column_stats.at(col_id).avg_size; IDataType::updateAvgValueSizeHint(column, avg_size); @@ -315,91 +200,58 @@ void DMFileWriter::finalizeColumn(ColId col_id, DataTypePtr type) } }; #endif - if (options.flags.isSingleFile()) - { - auto callback = [&](const IDataType::SubstreamPath & substream) { - const auto stream_name = DMFile::getFileNameBase(col_id, substream); - - dmfile->addSubFileStat(DMFile::colDataFileName(stream_name), 0, single_file_stream->column_data_sizes.at(stream_name)); - - // write mark - size_t mark_offset_in_file = single_file_stream->plain_layer.count(); - for (const auto & mark_with_size : single_file_stream->column_mark_with_sizes.at(stream_name)) - { - writeIntBinary(mark_with_size.mark.offset_in_compressed_file, single_file_stream->plain_layer); - writeIntBinary(mark_with_size.mark.offset_in_decompressed_block, single_file_stream->plain_layer); - writeIntBinary(mark_with_size.mark_size, single_file_stream->plain_layer); - } - size_t mark_size_in_file = single_file_stream->plain_layer.count() - mark_offset_in_file; - dmfile->addSubFileStat(DMFile::colMarkFileName(stream_name), mark_offset_in_file, mark_size_in_file); - // write minmax - auto & minmax_indexs = single_file_stream->minmax_indexs; - if (auto iter = minmax_indexs.find(stream_name); iter != minmax_indexs.end()) - { - size_t minmax_offset_in_file = single_file_stream->plain_layer.count(); - iter->second->write(*type, single_file_stream->plain_layer); - size_t minmax_size_in_file = single_file_stream->plain_layer.count() - minmax_offset_in_file; - bytes_written += minmax_size_in_file; - dmfile->addSubFileStat(DMFile::colIndexFileName(stream_name), minmax_offset_in_file, minmax_size_in_file); - } - }; - type->enumerateStreams(callback, {}); - } - else - { - auto callback = [&](const IDataType::SubstreamPath & substream) { - const auto stream_name = DMFile::getFileNameBase(col_id, substream); - auto & stream = column_streams.at(stream_name); - stream->flush(); + auto callback = [&](const IDataType::SubstreamPath & substream) { + const auto stream_name = DMFile::getFileNameBase(col_id, substream); + auto & stream = column_streams.at(stream_name); + stream->flush(); #ifndef NDEBUG - examine_buffer_size(*stream->mark_file, *this->file_provider); - examine_buffer_size(*stream->plain_file, *this->file_provider); + examine_buffer_size(*stream->mark_file, *this->file_provider); + examine_buffer_size(*stream->plain_file, *this->file_provider); #endif - bytes_written += stream->getWrittenBytes(); + bytes_written += stream->getWrittenBytes(); - if (stream->minmaxes) + if (stream->minmaxes) + { + if (!dmfile->configuration) { - if (!dmfile->configuration) - { - WriteBufferFromFileProvider buf( - file_provider, - dmfile->colIndexPath(stream_name), - dmfile->encryptionIndexPath(stream_name), - false, - write_limiter); - stream->minmaxes->write(*type, buf); - buf.sync(); - // Ignore data written in index file when the dmfile is empty. - // This is ok because the index file in this case is tiny, and we already ignore other small files like meta and pack stat file. - // The motivation to do this is to show a zero `stable_size_on_disk` for empty segments, - // and we cannot change the index file format for empty dmfile because of backward compatibility. - bytes_written += is_empty_file ? 0 : buf.getMaterializedBytes(); - } - else - { - auto buf = createWriteBufferFromFileBaseByFileProvider(file_provider, - dmfile->colIndexPath(stream_name), - dmfile->encryptionIndexPath(stream_name), - false, - write_limiter, - dmfile->configuration->getChecksumAlgorithm(), - dmfile->configuration->getChecksumFrameLength()); - stream->minmaxes->write(*type, *buf); - buf->sync(); - // Ignore data written in index file when the dmfile is empty. - // This is ok because the index file in this case is tiny, and we already ignore other small files like meta and pack stat file. - // The motivation to do this is to show a zero `stable_size_on_disk` for empty segments, - // and we cannot change the index file format for empty dmfile because of backward compatibility. - bytes_written += is_empty_file ? 0 : buf->getMaterializedBytes(); + WriteBufferFromFileProvider buf( + file_provider, + dmfile->colIndexPath(stream_name), + dmfile->encryptionIndexPath(stream_name), + false, + write_limiter); + stream->minmaxes->write(*type, buf); + buf.sync(); + // Ignore data written in index file when the dmfile is empty. + // This is ok because the index file in this case is tiny, and we already ignore other small files like meta and pack stat file. + // The motivation to do this is to show a zero `stable_size_on_disk` for empty segments, + // and we cannot change the index file format for empty dmfile because of backward compatibility. + bytes_written += is_empty_file ? 0 : buf.getMaterializedBytes(); + } + else + { + auto buf = createWriteBufferFromFileBaseByFileProvider(file_provider, + dmfile->colIndexPath(stream_name), + dmfile->encryptionIndexPath(stream_name), + false, + write_limiter, + dmfile->configuration->getChecksumAlgorithm(), + dmfile->configuration->getChecksumFrameLength()); + stream->minmaxes->write(*type, *buf); + buf->sync(); + // Ignore data written in index file when the dmfile is empty. + // This is ok because the index file in this case is tiny, and we already ignore other small files like meta and pack stat file. + // The motivation to do this is to show a zero `stable_size_on_disk` for empty segments, + // and we cannot change the index file format for empty dmfile because of backward compatibility. + bytes_written += is_empty_file ? 0 : buf->getMaterializedBytes(); #ifndef NDEBUG - examine_buffer_size(*buf, *this->file_provider); + examine_buffer_size(*buf, *this->file_provider); #endif - } } - }; - type->enumerateStreams(callback, {}); - } + } + }; + type->enumerateStreams(callback, {}); // Update column's bytes in disk dmfile->column_stats.at(col_id).serialized_bytes = bytes_written; diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileWriter.h b/dbms/src/Storages/DeltaMerge/File/DMFileWriter.h index 95cedad28e3..efcbe58dd7c 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileWriter.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFileWriter.h @@ -108,59 +108,6 @@ class DMFileWriter using StreamPtr = std::unique_ptr; using ColumnStreams = std::map; - struct SingleFileStream - { - SingleFileStream(const DMFilePtr & dmfile, - CompressionSettings compression_settings, - size_t max_compress_block_size, - const FileProviderPtr & file_provider, - const WriteLimiterPtr & write_limiter_) - : plain_file(createWriteBufferFromFileBaseByFileProvider(file_provider, - dmfile->path(), - EncryptionPath(dmfile->encryptionBasePath(), ""), - true, - write_limiter_, - 0, - 0, - max_compress_block_size)) - , plain_layer(*plain_file) - , compressed_buf(plain_layer, compression_settings) - , original_layer(compressed_buf) - { - } - - void flushCompressedData() - { - original_layer.next(); - compressed_buf.next(); - } - - void flush() - { - plain_layer.next(); - plain_file->next(); - - plain_file->sync(); - } - - using ColumnMinMaxIndexs = std::unordered_map; - ColumnMinMaxIndexs minmax_indexs; - - using ColumnDataSizes = std::unordered_map; - ColumnDataSizes column_data_sizes; - - using MarkWithSizes = std::vector; - using ColumnMarkWithSizes = std::unordered_map; - ColumnMarkWithSizes column_mark_with_sizes; - - /// original_layer -> compressed_buf -> plain_layer -> plain_file - WriteBufferFromFileBasePtr plain_file; - HashingWriteBuffer plain_layer; - CompressedWriteBuffer<> compressed_buf; - HashingWriteBuffer original_layer; - }; - using SingleFileStreamPtr = std::shared_ptr; - struct BlockProperty { size_t not_clean_rows; @@ -169,47 +116,22 @@ class DMFileWriter size_t gc_hint_version; }; - struct Flags - { - private: - static constexpr size_t IS_SINGLE_FILE = 0x01; - - size_t value; - - public: - Flags() - : value(0x0) - {} - - inline void setSingleFile(bool v) { value = (v ? (value | IS_SINGLE_FILE) : (value & ~IS_SINGLE_FILE)); } - inline bool isSingleFile() const { return (value & IS_SINGLE_FILE); } - }; - struct Options { CompressionSettings compression_settings; - size_t min_compress_block_size; - size_t max_compress_block_size; - Flags flags; + size_t min_compress_block_size{}; + size_t max_compress_block_size{}; Options() = default; - Options(CompressionSettings compression_settings_, size_t min_compress_block_size_, size_t max_compress_block_size_, Flags flags_) + Options(CompressionSettings compression_settings_, size_t min_compress_block_size_, size_t max_compress_block_size_) : compression_settings(compression_settings_) , min_compress_block_size(min_compress_block_size_) , max_compress_block_size(max_compress_block_size_) - , flags(flags_) { } - Options(const Options & from, const DMFilePtr & file) - : compression_settings(from.compression_settings) - , min_compress_block_size(from.min_compress_block_size) - , max_compress_block_size(from.max_compress_block_size) - , flags(from.flags) - { - flags.setSingleFile(file->isSingleFileMode()); - } + Options(const Options & from) = default; }; @@ -246,8 +168,6 @@ class DMFileWriter WriteBufferFromFileBasePtr pack_stat_file; - SingleFileStreamPtr single_file_stream; - FileProviderPtr file_provider; WriteLimiterPtr write_limiter; diff --git a/dbms/src/Storages/DeltaMerge/RowKeyRange.h b/dbms/src/Storages/DeltaMerge/RowKeyRange.h index acbcda2ec73..96491471875 100644 --- a/dbms/src/Storages/DeltaMerge/RowKeyRange.h +++ b/dbms/src/Storages/DeltaMerge/RowKeyRange.h @@ -130,7 +130,7 @@ struct RowKeyValue return std::make_shared(prefix + *value); } - bool operator==(const RowKeyValue & v) + bool operator==(const RowKeyValue & v) const { return is_common_handle == v.is_common_handle && (*value) == (*v.value) && int_value == v.int_value; } @@ -173,14 +173,15 @@ struct RowKeyValue */ RowKeyValue toNext() const { + // We want to always ensure that the IntHandle.stringValue == IntHandle.intValue. + if (!is_common_handle) + return toPrefixNext(); + HandleValuePtr next_value = std::make_shared(value->begin(), value->end()); next_value->push_back(0x0); - Int64 next_int_value = int_value; - if (!is_common_handle && next_int_value != std::numeric_limits::max()) - next_int_value++; - - return RowKeyValue(is_common_handle, next_value, next_int_value); + // For common handle, int_value will not be used in compare. Let's keep it unchanged. + return RowKeyValue(/* is_common_handle */ true, next_value, int_value); } void serialize(WriteBuffer & buf) const diff --git a/dbms/src/Storages/DeltaMerge/SSTFilesToDTFilesOutputStream.cpp b/dbms/src/Storages/DeltaMerge/SSTFilesToDTFilesOutputStream.cpp index bb4ccfdf049..e7b909b0fd8 100644 --- a/dbms/src/Storages/DeltaMerge/SSTFilesToDTFilesOutputStream.cpp +++ b/dbms/src/Storages/DeltaMerge/SSTFilesToDTFilesOutputStream.cpp @@ -39,7 +39,6 @@ SSTFilesToDTFilesOutputStream::SSTFilesToDTFilesOutputStream( // ChildStream child_, StorageDeltaMergePtr storage_, DecodingStorageSchemaSnapshotConstPtr schema_snap_, - TiDB::SnapshotApplyMethod method_, FileConvertJobType job_type_, UInt64 split_after_rows_, UInt64 split_after_size_, @@ -47,7 +46,6 @@ SSTFilesToDTFilesOutputStream::SSTFilesToDTFilesOutputStream( // : child(std::move(child_)) , storage(std::move(storage_)) , schema_snap(std::move(schema_snap_)) - , method(method_) , job_type(job_type_) , split_after_rows(split_after_rows_) , split_after_size(split_after_size_) @@ -124,21 +122,8 @@ bool SSTFilesToDTFilesOutputStream::newDTFileStream() return false; } - DMFileBlockOutputStream::Flags flags{}; - switch (method) - { - case TiDB::SnapshotApplyMethod::DTFile_Directory: - flags.setSingleFile(false); - break; - case TiDB::SnapshotApplyMethod::DTFile_Single: - flags.setSingleFile(true); - break; - default: - break; - } - - auto dt_file = DMFile::create(file_id, parent_path, flags.isSingleFile(), storage->createChecksumConfig(flags.isSingleFile())); - dt_stream = std::make_unique(context, dt_file, *(schema_snap->column_defines), flags); + auto dt_file = DMFile::create(file_id, parent_path, storage->createChecksumConfig()); + dt_stream = std::make_unique(context, dt_file, *(schema_snap->column_defines)); dt_stream->writePrefix(); ingest_files.emplace_back(dt_file); ingest_files_range.emplace_back(std::nullopt); diff --git a/dbms/src/Storages/DeltaMerge/SSTFilesToDTFilesOutputStream.h b/dbms/src/Storages/DeltaMerge/SSTFilesToDTFilesOutputStream.h index 59c80b1ade6..18a9bab7eec 100644 --- a/dbms/src/Storages/DeltaMerge/SSTFilesToDTFilesOutputStream.h +++ b/dbms/src/Storages/DeltaMerge/SSTFilesToDTFilesOutputStream.h @@ -74,7 +74,6 @@ class SSTFilesToDTFilesOutputStream : private boost::noncopyable ChildStream child_, StorageDeltaMergePtr storage_, DecodingStorageSchemaSnapshotConstPtr schema_snap_, - TiDB::SnapshotApplyMethod method_, FileConvertJobType job_type_, UInt64 split_after_rows_, UInt64 split_after_size_, @@ -112,7 +111,6 @@ class SSTFilesToDTFilesOutputStream : private boost::noncopyable ChildStream child; StorageDeltaMergePtr storage; DecodingStorageSchemaSnapshotConstPtr schema_snap; - const TiDB::SnapshotApplyMethod method; const FileConvertJobType job_type; const UInt64 split_after_rows; const UInt64 split_after_size; diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index e1d2e87013a..bbeed541f47 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -110,11 +110,10 @@ DMFilePtr writeIntoNewDMFile(DMContext & dm_context, // const ColumnDefinesPtr & schema_snap, const BlockInputStreamPtr & input_stream, UInt64 file_id, - const String & parent_path, - DMFileBlockOutputStream::Flags flags) + const String & parent_path) { - auto dmfile = DMFile::create(file_id, parent_path, flags.isSingleFile(), dm_context.createChecksumConfig(flags.isSingleFile())); - auto output_stream = std::make_shared(dm_context.db_context, dmfile, *schema_snap, flags); + auto dmfile = DMFile::create(file_id, parent_path, dm_context.createChecksumConfig()); + auto output_stream = std::make_shared(dm_context.db_context, dmfile, *schema_snap); const auto * mvcc_stream = typeid_cast *>(input_stream.get()); input_stream->readPrefix(); @@ -175,11 +174,8 @@ StableValueSpacePtr createNewStable( // auto delegator = context.path_pool.getStableDiskDelegator(); auto store_path = delegator.choosePath(); - DMFileBlockOutputStream::Flags flags; - flags.setSingleFile(context.db_context.getSettingsRef().dt_enable_single_file_mode_dmfile); - PageId dtfile_id = context.storage_pool.newDataPageIdForDTFile(delegator, __PRETTY_FUNCTION__); - auto dtfile = writeIntoNewDMFile(context, schema_snap, input_stream, dtfile_id, store_path, flags); + auto dtfile = writeIntoNewDMFile(context, schema_snap, input_stream, dtfile_id, store_path); auto stable = std::make_shared(stable_id); stable->setFiles({dtfile}, RowKeyRange::newAll(context.is_common_handle, context.rowkey_column_size)); diff --git a/dbms/src/Storages/DeltaMerge/WriteBatches.h b/dbms/src/Storages/DeltaMerge/WriteBatches.h index 83420f278d5..b5d2b00d728 100644 --- a/dbms/src/Storages/DeltaMerge/WriteBatches.h +++ b/dbms/src/Storages/DeltaMerge/WriteBatches.h @@ -41,7 +41,7 @@ struct WriteBatches : private boost::noncopyable WriteLimiterPtr write_limiter; - WriteBatches(StoragePool & storage_pool_, const WriteLimiterPtr & write_limiter_ = nullptr) + explicit WriteBatches(StoragePool & storage_pool_, const WriteLimiterPtr & write_limiter_ = nullptr) : ns_id(storage_pool_.getNamespaceId()) , log(ns_id) , data(ns_id) @@ -58,12 +58,11 @@ struct WriteBatches : private boost::noncopyable { if constexpr (DM_RUN_CHECK) { - Poco::Logger * logger = &Poco::Logger::get("WriteBatches"); auto check_empty = [&](const WriteBatch & wb, const String & name) { if (!wb.empty()) { StackTrace trace; - LOG_ERROR(logger, + LOG_ERROR(Logger::get(), "!!!=========================Modifications in {} haven't persisted=========================!!! Stack trace: {}", name, trace.toString()); @@ -91,8 +90,7 @@ struct WriteBatches : private boost::noncopyable if constexpr (DM_RUN_CHECK) { - Poco::Logger * logger = &Poco::Logger::get("WriteBatches"); - auto check = [](const WriteBatch & wb, const String & what, Poco::Logger * logger) { + auto check = [](const WriteBatch & wb, const String & what) { if (wb.empty()) return; for (const auto & w : wb.getWrites()) @@ -100,11 +98,11 @@ struct WriteBatches : private boost::noncopyable if (unlikely(w.type == WriteBatchWriteType::DEL)) throw Exception("Unexpected deletes in " + what); } - LOG_TRACE(logger, "Write into {} : {}", what, wb.toString()); + LOG_TRACE(Logger::get(), "Write into {} : {}", what, wb.toString()); }; - check(log, "log", logger); - check(data, "data", logger); + check(log, "log"); + check(data, "data"); } for (auto & w : log.getWrites()) @@ -135,9 +133,7 @@ struct WriteBatches : private boost::noncopyable if constexpr (DM_RUN_CHECK) { - Poco::Logger * logger = &Poco::Logger::get("WriteBatches"); - - auto check = [](const WriteBatch & wb, const String & what, Poco::Logger * logger) { + auto check = [](const WriteBatch & wb, const String & what) { if (wb.empty()) return; for (const auto & w : wb.getWrites()) @@ -145,11 +141,11 @@ struct WriteBatches : private boost::noncopyable if (unlikely(w.type != WriteBatchWriteType::DEL)) throw Exception("Expected deletes in " + what); } - LOG_TRACE(logger, "Rollback remove from {} : {}", what, wb.toString()); + LOG_TRACE(Logger::get(), "Rollback remove from {} : {}", what, wb.toString()); }; - check(log_wb, "log_wb", logger); - check(data_wb, "data_wb", logger); + check(log_wb, "log_wb"); + check(data_wb, "data_wb"); } storage_pool.logWriter()->write(std::move(log_wb), write_limiter); @@ -163,9 +159,7 @@ struct WriteBatches : private boost::noncopyable { if constexpr (DM_RUN_CHECK) { - Poco::Logger * logger = &Poco::Logger::get("WriteBatches"); - - auto check = [](const WriteBatch & wb, const String & what, Poco::Logger * logger) { + auto check = [](const WriteBatch & wb, const String & what) { if (wb.empty()) return; for (const auto & w : wb.getWrites()) @@ -173,10 +167,10 @@ struct WriteBatches : private boost::noncopyable if (unlikely(w.type != WriteBatchWriteType::PUT)) throw Exception("Expected puts in " + what); } - LOG_TRACE(logger, "Write into {} : {}", what, wb.toString()); + LOG_TRACE(Logger::get(), "Write into {} : {}", what, wb.toString()); }; - check(meta, "meta", logger); + check(meta, "meta"); } storage_pool.metaWriter()->write(std::move(meta), write_limiter); @@ -187,9 +181,7 @@ struct WriteBatches : private boost::noncopyable { if constexpr (DM_RUN_CHECK) { - Poco::Logger * logger = &Poco::Logger::get("WriteBatches"); - - auto check = [](const WriteBatch & wb, const String & what, Poco::Logger * logger) { + auto check = [](const WriteBatch & wb, const String & what) { if (wb.empty()) return; for (const auto & w : wb.getWrites()) @@ -197,12 +189,12 @@ struct WriteBatches : private boost::noncopyable if (unlikely(w.type != WriteBatchWriteType::DEL)) throw Exception("Expected deletes in " + what); } - LOG_TRACE(logger, "Write into {} : {}", what, wb.toString()); + LOG_TRACE(Logger::get(), "Write into {} : {}", what, wb.toString()); }; - check(removed_log, "removed_log", logger); - check(removed_data, "removed_data", logger); - check(removed_meta, "removed_meta", logger); + check(removed_log, "removed_log"); + check(removed_data, "removed_data"); + check(removed_meta, "removed_meta"); } storage_pool.logWriter()->write(std::move(removed_log), write_limiter); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_column_file.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_column_file.cpp index ffdf019d504..be3766c9b8d 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_column_file.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_column_file.cpp @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include +#include #include #include #include @@ -23,9 +25,12 @@ #include #include #include +#include #include #include +#include +#include #include namespace DB @@ -81,7 +86,7 @@ TEST_F(ColumnFileTest, ColumnFileBigRead) try { auto table_columns = DMTestEnv::getDefaultColumns(); - auto dm_file = DMFile::create(1, parent_path, false, std::make_optional()); + auto dm_file = DMFile::create(1, parent_path, std::make_optional()); const size_t num_rows_write_per_batch = 8192; const size_t batch_num = 3; const UInt64 tso_value = 100; @@ -172,12 +177,11 @@ try ColumnFilePersisteds column_file_persisteds; size_t rows = 100; // arbitrary value auto block = DMTestEnv::prepareSimpleWriteBlock(0, rows, false); - auto schema = std::make_shared(block.cloneEmpty()); - column_file_persisteds.push_back(ColumnFileTiny::writeColumnFile(dmContext(), block, 0, rows, wbs, schema)); + column_file_persisteds.push_back(ColumnFileTiny::writeColumnFile(dmContext(), block, 0, rows, wbs)); column_file_persisteds.emplace_back(std::make_shared(RowKeyRange::newAll(false, 1))); - column_file_persisteds.push_back(ColumnFileTiny::writeColumnFile(dmContext(), block, 0, rows, wbs, schema)); + column_file_persisteds.push_back(ColumnFileTiny::writeColumnFile(dmContext(), block, 0, rows, wbs)); column_file_persisteds.emplace_back(std::make_shared(RowKeyRange::newAll(false, 1))); - column_file_persisteds.push_back(ColumnFileTiny::writeColumnFile(dmContext(), block, 0, rows, wbs, schema)); + column_file_persisteds.push_back(ColumnFileTiny::writeColumnFile(dmContext(), block, 0, rows, wbs)); serializeSavedColumnFilesInV3Format(buff, column_file_persisteds); } @@ -191,6 +195,53 @@ try } CATCH +TEST_F(ColumnFileTest, SerializeEmptyBlock) +try +{ + size_t num_rows_write = 0; + Block block = DMTestEnv::prepareSimpleWriteBlock(0, num_rows_write, false); + WriteBatches wbs(dmContext().storage_pool); + EXPECT_THROW(ColumnFileTiny::writeColumnFile(dmContext(), block, 0, num_rows_write, wbs), DB::Exception); +} +CATCH + +TEST_F(ColumnFileTest, ReadColumns) +try +{ + size_t num_rows_write = 10; + Block block = DMTestEnv::prepareSimpleWriteBlock(0, num_rows_write, false); + ColumnFileTinyPtr cf; + { + WriteBatches wbs(dmContext().storage_pool); + cf = ColumnFileTiny::writeColumnFile(dmContext(), block, 0, num_rows_write, wbs); + wbs.writeAll(); + } + auto storage_snap = std::make_shared(dmContext().storage_pool, nullptr, "", true); + + { + // Read columns exactly the same as we have written + auto columns_to_read = std::make_shared(getColumnDefinesFromBlock(block)); + auto reader = cf->getReader(dmContext(), storage_snap, columns_to_read); + auto block_read = reader->readNextBlock(); + ASSERT_BLOCK_EQ(block_read, block); + } + + { + // Only read with a column that is not exist in ColumnFileTiny + ColumnID added_colid = 100; + String added_colname = "added_col"; + auto columns_to_read = std::make_shared(ColumnDefines{ColumnDefine(added_colid, added_colname, typeFromString("Int64"))}); + auto reader = cf->getReader(dmContext(), storage_snap, columns_to_read); + auto block_read = reader->readNextBlock(); + ASSERT_COLUMNS_EQ_R( + ColumnsWithTypeAndName({ + createColumn(std::vector(num_rows_write, 0)), + }), + block_read.getColumnsWithTypeAndName()); + } +} +CATCH + } // namespace tests } // namespace DM } // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store_test_basic.h b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store_test_basic.h index 68c95d07fa6..77a132e92d7 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store_test_basic.h +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store_test_basic.h @@ -37,8 +37,7 @@ extern DMFilePtr writeIntoNewDMFile(DMContext & dm_context, const ColumnDefinesPtr & schema_snap, const BlockInputStreamPtr & input_stream, UInt64 file_id, - const String & parent_path, - DMFileBlockOutputStream::Flags flags); + const String & parent_path); namespace tests { // Simple test suit for DeltaMergeStore. @@ -157,16 +156,12 @@ class DeltaMergeStoreRWTest auto input_stream = std::make_shared(block); auto [store_path, file_id] = store->preAllocateIngestFile(); - DMFileBlockOutputStream::Flags flags; - flags.setSingleFile(DMTestEnv::getPseudoRandomNumber() % 2); - auto dmfile = writeIntoNewDMFile( context, std::make_shared(store->getTableColumns()), input_stream, file_id, - store_path, - flags); + store_path); store->preIngestFile(store_path, file_id, dmfile->getBytesOnDisk()); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_value_space.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_value_space.cpp index 17b5d79febf..ddaa1b9c103 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_value_space.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_value_space.cpp @@ -41,8 +41,7 @@ extern DMFilePtr writeIntoNewDMFile(DMContext & dm_context, // const ColumnDefinesPtr & schema_snap, const BlockInputStreamPtr & input_stream, UInt64 file_id, - const String & parent_path, - DMFileBlockOutputStream::Flags flags); + const String & parent_path); namespace tests { void assertBlocksEqual(const Blocks & blocks1, const Blocks & blocks2) @@ -153,7 +152,7 @@ Block appendColumnFileBigToDeltaValueSpace(DMContext & context, ColumnDefinesPtr auto input_stream = std::make_shared(block); auto store_path = delegator.choosePath(); auto dmfile - = writeIntoNewDMFile(context, std::make_shared(*column_defines), input_stream, file_id, store_path, {}); + = writeIntoNewDMFile(context, std::make_shared(*column_defines), input_stream, file_id, store_path); delegator.addDTFile(file_id, dmfile->getBytesOnDisk(), store_path); auto & pk_column = block.getByPosition(0).column; diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp index 517594d9a50..0de81b1b7d6 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp @@ -42,24 +42,8 @@ namespace DM { namespace tests { -TEST(DMFileWriterFlagsTest, SetClearFlags) -{ - using Flags = DMFileWriter::Flags; - - Flags flags; - - bool f = false; - flags.setSingleFile(f); - EXPECT_FALSE(flags.isSingleFile()); - - f = true; - flags.setSingleFile(f); - EXPECT_TRUE(flags.isSingleFile()); -} - enum class DMFileMode { - SingleFile, DirectoryLegacy, DirectoryChecksum }; @@ -71,9 +55,6 @@ String paramToString(const ::testing::TestParamInfo & info) String name; switch (mode) { - case DMFileMode::SingleFile: - name = "single_file"; - break; case DMFileMode::DirectoryLegacy: name = "folder"; break; @@ -103,13 +84,12 @@ class DMFileTest TiFlashStorageTestBasic::SetUp(); auto mode = GetParam(); - bool single_file_mode = (mode == DMFileMode::SingleFile); auto configuration = (mode == DMFileMode::DirectoryChecksum ? std::make_optional() : std::nullopt); parent_path = TiFlashStorageTestBasic::getTemporaryPath(); path_pool = std::make_unique(db_context->getPathPool().withTable("test", "DMFileTest", false)); storage_pool = std::make_unique(*db_context, /*ns_id*/ 100, *path_pool, "test.t1"); - dm_file = DMFile::create(1, parent_path, single_file_mode, std::move(configuration)); + dm_file = DMFile::create(1, parent_path, std::move(configuration)); table_columns = std::make_shared(); column_cache = std::make_shared(); @@ -252,10 +232,9 @@ try dm_file.reset(); auto mode = GetParam(); - bool single_file_mode = mode == DMFileMode::SingleFile; auto configuration = mode == DMFileMode::DirectoryChecksum ? std::make_optional() : std::nullopt; - dm_file = DMFile::create(id, parent_path, single_file_mode, std::move(configuration)); + dm_file = DMFile::create(id, parent_path, std::move(configuration)); // Right after created, the fil is not abled to GC and it is ignored by `listAllInPath` EXPECT_FALSE(dm_file->canGC()); DMFile::ListOptions options; @@ -903,7 +882,7 @@ CATCH INSTANTIATE_TEST_CASE_P(DTFileMode, // DMFileTest, - testing::Values(DMFileMode::SingleFile, DMFileMode::DirectoryLegacy, DMFileMode::DirectoryChecksum), + testing::Values(DMFileMode::DirectoryLegacy, DMFileMode::DirectoryChecksum), paramToString); @@ -923,12 +902,11 @@ class DMFileClusteredIndexTest path = TiFlashStorageTestBasic::getTemporaryPath(); auto mode = GetParam(); - bool single_file_mode = mode == DMFileMode::SingleFile; auto configuration = mode == DMFileMode::DirectoryChecksum ? std::make_optional() : std::nullopt; path_pool = std::make_unique(db_context->getPathPool().withTable("test", "t", false)); storage_pool = std::make_unique(*db_context, table_id, *path_pool, "test.t1"); - dm_file = DMFile::create(0, path, single_file_mode, std::move(configuration)); + dm_file = DMFile::create(0, path, std::move(configuration)); table_columns = std::make_shared(); column_cache = std::make_shared(); @@ -1127,12 +1105,6 @@ try } CATCH -INSTANTIATE_TEST_CASE_P(DTFileMode, // - DMFileClusteredIndexTest, - testing::Values(DMFile::Mode::FOLDER, DMFile::Mode::SINGLE_FILE), - paramToString); - - /// DDL test cases class DMFileDDLTest : public DMFileTest { @@ -1339,7 +1311,7 @@ CATCH INSTANTIATE_TEST_CASE_P(DTFileMode, // DMFileDDLTest, - testing::Values(DMFileMode::SingleFile, DMFileMode::DirectoryLegacy, DMFileMode::DirectoryChecksum), + testing::Values(DMFileMode::DirectoryLegacy, DMFileMode::DirectoryChecksum), paramToString); } // namespace tests diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp index a2ec3da3cef..cf71819ce00 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -51,8 +52,7 @@ extern DMFilePtr writeIntoNewDMFile(DMContext & dm_context, // const ColumnDefinesPtr & schema_snap, const BlockInputStreamPtr & input_stream, UInt64 file_id, - const String & parent_path, - DMFileBlockOutputStream::Flags flags); + const String & parent_path); namespace tests { class SegmentTest : public DB::base::TiFlashStorageTestBasic @@ -1044,11 +1044,8 @@ class SegmentTest2 : public SegmentTest auto input_stream = std::make_shared(block); auto store_path = delegator.choosePath(); - DMFileBlockOutputStream::Flags flags; - flags.setSingleFile(DMTestEnv::getPseudoRandomNumber() % 2); - auto dmfile - = writeIntoNewDMFile(context, std::make_shared(*tableColumns()), input_stream, file_id, store_path, flags); + = writeIntoNewDMFile(context, std::make_shared(*tableColumns()), input_stream, file_id, store_path); delegator.addDTFile(file_id, dmfile->getBytesOnDisk(), store_path); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.cpp index 9fd6351ca8d..bb7f325f40a 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.cpp @@ -33,8 +33,7 @@ extern DMFilePtr writeIntoNewDMFile(DMContext & dm_context, const ColumnDefinesPtr & schema_snap, const BlockInputStreamPtr & input_stream, UInt64 file_id, - const String & parent_path, - DMFileBlockOutputStream::Flags flags); + const String & parent_path); namespace tests { @@ -314,8 +313,7 @@ ExternalDTFileInfo genDMFile(DeltaMergeStorePtr store, DMContext & context, cons std::make_shared(store->getTableColumns()), input_stream, file_id, - store_path, - /* flags */ {}); + store_path); store->preIngestFile(store_path, file_id, dmfile->getBytesOnDisk()); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_key_range.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_key_range.cpp index 40a0cf782e6..6ce6fe00064 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_key_range.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_key_range.cpp @@ -148,6 +148,37 @@ TEST(RowKey, ToNextKeyCommonHandle) EXPECT_EQ(0, compare(my_next.toRowKeyValueRef(), next.toRowKeyValueRef())); } +TEST(RowKey, NextIntHandleCompare) +{ + auto int_max = RowKeyValue::INT_HANDLE_MAX_KEY; + auto int_max_i64 = RowKeyValue::fromHandle(Handle(std::numeric_limits::max())); + + EXPECT_EQ(1, compare(int_max.toRowKeyValueRef(), int_max_i64.toRowKeyValueRef())); + + auto int_max_i64_pnext = int_max_i64.toPrefixNext(); + EXPECT_EQ(int_max, int_max_i64_pnext); + EXPECT_EQ(0, compare(int_max.toRowKeyValueRef(), int_max_i64_pnext.toRowKeyValueRef())); + EXPECT_EQ(0, compare(int_max_i64_pnext.toRowKeyValueRef(), int_max.toRowKeyValueRef())); + + auto int_max_i64_next = int_max_i64.toNext(); + EXPECT_EQ(int_max, int_max_i64_next); + EXPECT_EQ(0, compare(int_max.toRowKeyValueRef(), int_max_i64_next.toRowKeyValueRef())); + EXPECT_EQ(0, compare(int_max_i64_next.toRowKeyValueRef(), int_max.toRowKeyValueRef())); +} + +TEST(RowKey, NextIntHandleMinMax) +{ + auto v0 = RowKeyValue::fromHandle(Handle(1178400)); + auto v0_next = v0.toNext(); + auto v1 = RowKeyValue::fromHandle(Handle(1178401)); + + EXPECT_EQ(v0, min(v0, v1)); + EXPECT_EQ(v0, min(v0, v0_next)); + + EXPECT_EQ(v1, max(v0, v1)); + EXPECT_EQ(v1, max(v0, v0_next)); +} + } // namespace tests } // namespace DM } // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp index a3d6517a2ff..fb80ec58dd5 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp @@ -392,7 +392,7 @@ try auto [memory_cf, persisted_cf] = delta->cloneAllColumnFiles(lock, *dm_context, segment->getRowKeyRange(), wbs); ASSERT_FALSE(memory_cf.empty()); ASSERT_TRUE(persisted_cf.empty()); - BlockPtr last_schema; + ColumnFileSchemaPtr last_schema; for (const auto & column_file : memory_cf) { if (auto * t_file = column_file->tryToTinyFile(); t_file) diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_replace_data.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_replace_data.cpp index 1c308c6a192..f54350d94a1 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_replace_data.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_replace_data.cpp @@ -40,8 +40,7 @@ extern DMFilePtr writeIntoNewDMFile(DMContext & dm_context, const ColumnDefinesPtr & schema_snap, const BlockInputStreamPtr & input_stream, UInt64 file_id, - const String & parent_path, - DMFileBlockOutputStream::Flags flags); + const String & parent_path); namespace tests { @@ -176,8 +175,7 @@ try table_columns, input_stream, file_id, - delegator.choosePath(), - DMFileBlockOutputStream::Flags{}); + delegator.choosePath()); ingest_wbs.data.putExternal(file_id, /* tag */ 0); ingest_wbs.writeLogAndData(); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp index 7c2334d74f8..064ed0c4387 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp @@ -40,8 +40,7 @@ extern DMFilePtr writeIntoNewDMFile(DMContext & dm_context, const ColumnDefinesPtr & schema_snap, const BlockInputStreamPtr & input_stream, UInt64 file_id, - const String & parent_path, - DMFileBlockOutputStream::Flags flags); + const String & parent_path); namespace tests { @@ -420,8 +419,8 @@ void SegmentTestBasic::ingestDTFileIntoDelta(PageId segment_id, UInt64 write_row auto parent_path = delegator.choosePath(); auto file_id = storage_pool->newDataPageIdForDTFile(delegator, __PRETTY_FUNCTION__); auto input_stream = std::make_shared(block); - DMFileBlockOutputStream::Flags flags; - auto dm_file = writeIntoNewDMFile(*dm_context, table_columns, input_stream, file_id, parent_path, flags); + + auto dm_file = writeIntoNewDMFile(*dm_context, table_columns, input_stream, file_id, parent_path); ingest_wbs.data.putExternal(file_id, /* tag */ 0); ingest_wbs.writeLogAndData(); delegator.addDTFile(file_id, dm_file->getBytesOnDisk(), parent_path); @@ -461,8 +460,7 @@ void SegmentTestBasic::ingestDTFileByReplace(PageId segment_id, UInt64 write_row auto parent_path = delegator.choosePath(); auto file_id = storage_pool->newDataPageIdForDTFile(delegator, __PRETTY_FUNCTION__); auto input_stream = std::make_shared(block); - DMFileBlockOutputStream::Flags flags; - auto dm_file = writeIntoNewDMFile(*dm_context, table_columns, input_stream, file_id, parent_path, flags); + auto dm_file = writeIntoNewDMFile(*dm_context, table_columns, input_stream, file_id, parent_path); ingest_wbs.data.putExternal(file_id, /* tag */ 0); ingest_wbs.writeLogAndData(); delegator.addDTFile(file_id, dm_file->getBytesOnDisk(), parent_path); @@ -543,7 +541,7 @@ void SegmentTestBasic::replaceSegmentData(PageId segment_id, const Block & block auto file_id = storage_pool->newDataPageIdForDTFile(delegator, __PRETTY_FUNCTION__); auto input_stream = std::make_shared(block); - auto dm_file = writeIntoNewDMFile(*dm_context, table_columns, input_stream, file_id, parent_path, {}); + auto dm_file = writeIntoNewDMFile(*dm_context, table_columns, input_stream, file_id, parent_path); ingest_wbs.data.putExternal(file_id, /* tag */ 0); ingest_wbs.writeLogAndData(); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_sst_files_stream.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_sst_files_stream.cpp index 25c2a55dd59..95e0dcda952 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_sst_files_stream.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_sst_files_stream.cpp @@ -177,7 +177,6 @@ try mock_stream, storage, schema_snapshot, - TiDB::SnapshotApplyMethod::DTFile_Directory, FileConvertJobType::ApplySnapshot, /* split_after_rows */ 0, /* split_after_size */ 0, @@ -205,7 +204,6 @@ try mock_stream, storage, schema_snapshot, - TiDB::SnapshotApplyMethod::DTFile_Directory, FileConvertJobType::ApplySnapshot, /* split_after_rows */ 0, /* split_after_size */ 0, @@ -235,7 +233,6 @@ try mock_stream, storage, schema_snapshot, - TiDB::SnapshotApplyMethod::DTFile_Directory, FileConvertJobType::ApplySnapshot, /* split_after_rows */ 1, /* split_after_size */ 1, @@ -266,7 +263,6 @@ try mock_stream, storage, schema_snapshot, - TiDB::SnapshotApplyMethod::DTFile_Directory, FileConvertJobType::ApplySnapshot, /* split_after_rows */ 10, /* split_after_size */ 0, @@ -303,7 +299,6 @@ try mock_stream, storage, schema_snapshot, - TiDB::SnapshotApplyMethod::DTFile_Directory, FileConvertJobType::ApplySnapshot, /* split_after_rows */ 10, /* split_after_size */ 0, @@ -336,7 +331,6 @@ try mock_stream, storage, schema_snapshot, - TiDB::SnapshotApplyMethod::DTFile_Directory, FileConvertJobType::ApplySnapshot, /* split_after_rows */ 10000, /* split_after_size */ 0, @@ -370,7 +364,6 @@ try mock_stream, storage, schema_snapshot, - TiDB::SnapshotApplyMethod::DTFile_Directory, FileConvertJobType::ApplySnapshot, /* split_after_rows */ 20, /* split_after_size */ 0, @@ -410,7 +403,6 @@ try mock_stream, storage, schema_snapshot, - TiDB::SnapshotApplyMethod::DTFile_Directory, FileConvertJobType::ApplySnapshot, /* split_after_rows */ 20, /* split_after_size */ 0, diff --git a/dbms/src/Storages/Page/V3/BlobStore.cpp b/dbms/src/Storages/Page/V3/BlobStore.cpp index 799d0c2cfe8..85eb79457f1 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.cpp +++ b/dbms/src/Storages/Page/V3/BlobStore.cpp @@ -272,7 +272,7 @@ PageEntriesEdit BlobStore::write(DB::WriteBatch & wb, const WriteLimiterPtr & wr } case WriteBatchWriteType::PUT: case WriteBatchWriteType::UPSERT: - throw Exception(fmt::format("write batch have a invalid total size [write_type={}]", static_cast(write.type)), + throw Exception(fmt::format("write batch have a invalid total size == 0 while this kind of entry exist, write_type={}", static_cast(write.type)), ErrorCodes::LOGICAL_ERROR); } } @@ -579,20 +579,49 @@ PageMap BlobStore::read(FieldReadInfos & to_read, const ReadLimiterPtr & read_li } } - // Read with `FieldReadInfos`, buf_size must not be 0. - if (buf_size == 0) + PageMap page_map; + if (unlikely(buf_size == 0)) { - throw Exception("Reading with fields but entry size is 0.", ErrorCodes::LOGICAL_ERROR); + // We should never persist an empty column inside a block. If the buf size is 0 + // then this read with `FieldReadInfos` could be completely eliminated in the upper + // layer. Log a warning to check if it happens. + { + FmtBuffer buf; + buf.joinStr( + to_read.begin(), + to_read.end(), + [](const FieldReadInfo & info, FmtBuffer & fb) { + fb.fmtAppend("{{page_id: {}, fields: {}, entry: {}}}", info.page_id, info.fields, toDebugString(info.entry)); + }, + ","); +#ifndef NDEBUG + // throw an exception under debug mode so we should change the upper layer logic + throw Exception(fmt::format("Reading with fields but entry size is 0, read_info=[{}]", buf.toString()), ErrorCodes::LOGICAL_ERROR); +#endif + // Log a warning under production release + LOG_WARNING(log, "Reading with fields but entry size is 0, read_info=[{}]", buf.toString()); + } + + // Allocating buffer with size == 0 could lead to unexpected behavior, skip the allocating and return + for (const auto & [page_id, entry, fields] : to_read) + { + UNUSED(entry, fields); + Page page(page_id); + page.data = ByteBuffer(nullptr, nullptr); + page_map.emplace(page_id.low, std::move(page)); + } + return page_map; } - char * data_buf = static_cast(alloc(buf_size)); - MemHolder mem_holder = createMemHolder(data_buf, [&, buf_size](char * p) { + + // Allocate one for holding all pages data + char * shared_data_buf = static_cast(alloc(buf_size)); + MemHolder shared_mem_holder = createMemHolder(shared_data_buf, [&, buf_size](char * p) { free(p, buf_size); }); std::set fields_offset_in_page; - char * pos = data_buf; - PageMap page_map; + char * pos = shared_data_buf; for (const auto & [page_id_v3, entry, fields] : to_read) { size_t read_size_this_entry = 0; @@ -636,7 +665,7 @@ PageMap BlobStore::read(FieldReadInfos & to_read, const ReadLimiterPtr & read_li Page page(page_id_v3); page.data = ByteBuffer(pos, write_offset); - page.mem_holder = mem_holder; + page.mem_holder = shared_mem_holder; page.field_offsets.swap(fields_offset_in_page); fields_offset_in_page.clear(); page_map.emplace(page_id_v3.low, std::move(page)); @@ -644,11 +673,22 @@ PageMap BlobStore::read(FieldReadInfos & to_read, const ReadLimiterPtr & read_li pos = write_offset; } - if (unlikely(pos != data_buf + buf_size)) - throw Exception(fmt::format("[end_position={}] not match the [current_position={}]", - data_buf + buf_size, - pos), + if (unlikely(pos != shared_data_buf + buf_size)) + { + FmtBuffer buf; + buf.joinStr( + to_read.begin(), + to_read.end(), + [](const FieldReadInfo & info, FmtBuffer & fb) { + fb.fmtAppend("{{page_id: {}, fields: {}, entry: {}}}", info.page_id, info.fields, toDebugString(info.entry)); + }, + ","); + throw Exception(fmt::format("unexpected read size, end_pos={} current_pos={} read_info=[{}]", + shared_data_buf + buf_size, + pos, + buf.toString()), ErrorCodes::LOGICAL_ERROR); + } return page_map; } @@ -734,10 +774,21 @@ PageMap BlobStore::read(PageIDAndEntriesV3 & entries, const ReadLimiterPtr & rea } if (unlikely(pos != data_buf + buf_size)) - throw Exception(fmt::format("[end_position={}] not match the [current_position={}]", + { + FmtBuffer buf; + buf.joinStr( + entries.begin(), + entries.end(), + [](const PageIDAndEntryV3 & id_entry, FmtBuffer & fb) { + fb.fmtAppend("{{page_id: {}, entry: {}}}", id_entry.first, toDebugString(id_entry.second)); + }, + ","); + throw Exception(fmt::format("unexpected read size, end_pos={} current_pos={} read_info=[{}]", data_buf + buf_size, - pos), + pos, + buf.toString()), ErrorCodes::LOGICAL_ERROR); + } return page_map; } diff --git a/dbms/src/Storages/Page/V3/PageEntry.h b/dbms/src/Storages/Page/V3/PageEntry.h index 65a9b3955d3..3b441bd094a 100644 --- a/dbms/src/Storages/Page/V3/PageEntry.h +++ b/dbms/src/Storages/Page/V3/PageEntry.h @@ -82,13 +82,21 @@ using PageIDAndEntriesV3 = std::vector; inline String toDebugString(const PageEntryV3 & entry) { - return fmt::format("PageEntryV3{{file: {}, offset: 0x{:X}, size: {}, checksum: 0x{:X}, tag: {}, field_offsets_size: {}}}", + FmtBuffer fmt_buf; + fmt_buf.joinStr( + entry.field_offsets.begin(), + entry.field_offsets.end(), + [](const auto & offset_checksum, FmtBuffer & fb) { + fb.fmtAppend("{}", offset_checksum.first); + }, + ","); + return fmt::format("PageEntryV3{{file: {}, offset: 0x{:X}, size: {}, checksum: 0x{:X}, tag: {}, field_offsets: [{}]}}", entry.file_id, entry.offset, entry.size, entry.checksum, entry.tag, - entry.field_offsets.size()); + fmt_buf.toString()); } } // namespace PS::V3 diff --git a/dbms/src/Storages/StorageDeltaMerge.cpp b/dbms/src/Storages/StorageDeltaMerge.cpp index 93b58916443..7c0e5cc49f4 100644 --- a/dbms/src/Storages/StorageDeltaMerge.cpp +++ b/dbms/src/Storages/StorageDeltaMerge.cpp @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -960,7 +961,29 @@ std::pair StorageDeltaMerg { if (cache_blocks.empty()) { - return std::make_pair(decoding_schema_snapshot, std::make_unique(createBlockSortByColumnID(decoding_schema_snapshot))); + BlockUPtr block = std::make_unique(createBlockSortByColumnID(decoding_schema_snapshot)); + auto digest = hashSchema(*block); + auto schema = global_context.getSharedBlockSchemas()->find(digest); + if (schema) + { + // Because we use sha256 to calculate the hash of schema, so schemas has extremely low probability of collision + // while we can't guarantee that there will be no collision forever, + // so (when schema changes) we will check if this schema causes a hash collision, i.e. + // the two different schemas have the same digest. + // Considering there is extremely low probability for same digest but different schema, + // we choose just throw exception when this happens. + // If unfortunately it happens, + // we can rename some columns in this table and then restart tiflash to workaround. + RUNTIME_CHECK_MSG( + isSameSchema(*block, schema->getSchema()), + "new table's schema's digest is the same as one previous table schemas' digest, \ + but schema info is not the same .So please change the new tables' schema, \ + whose table_info is {}. The collisioned schema is {}", + tidb_table_info.serialize(), + schema->toString()); + } + + return std::make_pair(decoding_schema_snapshot, std::move(block)); } else { diff --git a/dbms/src/Storages/StorageDeltaMerge.h b/dbms/src/Storages/StorageDeltaMerge.h index e4db7207541..f98d7b703d5 100644 --- a/dbms/src/Storages/StorageDeltaMerge.h +++ b/dbms/src/Storages/StorageDeltaMerge.h @@ -159,9 +159,9 @@ class StorageDeltaMerge bool initStoreIfDataDirExist() override; - DM::DMConfigurationOpt createChecksumConfig(bool is_single_file) const + DM::DMConfigurationOpt createChecksumConfig() const { - return DM::DMChecksumConfig::fromDBContext(global_context, is_single_file); + return DM::DMChecksumConfig::fromDBContext(global_context); } #ifndef DBMS_PUBLIC_GTEST diff --git a/dbms/src/Storages/System/StorageSystemNumbers.cpp b/dbms/src/Storages/System/StorageSystemNumbers.cpp index 68130897963..016b90b5cc5 100644 --- a/dbms/src/Storages/System/StorageSystemNumbers.cpp +++ b/dbms/src/Storages/System/StorageSystemNumbers.cpp @@ -96,7 +96,7 @@ BlockInputStreams StorageSystemNumbers::read( res[i] = std::make_shared(max_block_size, i * max_block_size, num_streams * max_block_size); if (limit) /// This formula is how to split 'limit' elements to 'num_streams' chunks almost uniformly. - res[i] = std::make_shared(res[i], limit * (i + 1) / num_streams - limit * i / num_streams, /*req_id=*/""); + res[i] = std::make_shared(res[i], limit * (i + 1) / num_streams - limit * i / num_streams, /*offset*/ 0, /*req_id=*/""); } return res; diff --git a/dbms/src/Storages/Transaction/ApplySnapshot.cpp b/dbms/src/Storages/Transaction/ApplySnapshot.cpp index 28fb0d307ce..3591e49900c 100644 --- a/dbms/src/Storages/Transaction/ApplySnapshot.cpp +++ b/dbms/src/Storages/Transaction/ApplySnapshot.cpp @@ -39,7 +39,6 @@ namespace DB namespace FailPoints { extern const char force_set_sst_to_dtfile_block_size[]; -extern const char force_set_sst_decode_rand[]; extern const char pause_until_apply_raft_snapshot[]; } // namespace FailPoints @@ -350,7 +349,6 @@ std::vector KVStore::preHandleSSTsToDTFiles( bounded_stream, storage, schema_snap, - snapshot_apply_method, job_type, /* split_after_rows */ global_settings.dt_segment_limit_rows, /* split_after_size */ global_settings.dt_segment_limit_size, @@ -491,22 +489,6 @@ EngineStoreApplyRes KVStore::handleIngestSST(UInt64 region_id, const SSTViewVec return EngineStoreApplyRes::NotFound; } - fiu_do_on(FailPoints::force_set_sst_decode_rand, { - static int num_call = 0; - switch (num_call++ % 2) - { - case 0: - snapshot_apply_method = TiDB::SnapshotApplyMethod::DTFile_Directory; - break; - case 1: - snapshot_apply_method = TiDB::SnapshotApplyMethod::DTFile_Single; - break; - default: - break; - } - LOG_INFO(log, "{} ingest sst by method {}", region->toString(true), applyMethodToString(snapshot_apply_method)); - }); - const auto func_try_flush = [&]() { if (!region->writeCFCount()) return; diff --git a/dbms/src/Storages/Transaction/KVStore.cpp b/dbms/src/Storages/Transaction/KVStore.cpp index fb21be2d2b7..72ec8b70726 100644 --- a/dbms/src/Storages/Transaction/KVStore.cpp +++ b/dbms/src/Storages/Transaction/KVStore.cpp @@ -43,10 +43,9 @@ namespace FailPoints extern const char force_fail_in_flush_region_data[]; } // namespace FailPoints -KVStore::KVStore(Context & context, TiDB::SnapshotApplyMethod snapshot_apply_method_) +KVStore::KVStore(Context & context) : region_persister(std::make_unique(context, region_manager)) , raft_cmd_res(std::make_unique()) - , snapshot_apply_method(snapshot_apply_method_) , log(Logger::get()) , region_compact_log_period(120) , region_compact_log_min_rows(40 * 1024) diff --git a/dbms/src/Storages/Transaction/KVStore.h b/dbms/src/Storages/Transaction/KVStore.h index ec3494701b4..ac8aa285a20 100644 --- a/dbms/src/Storages/Transaction/KVStore.h +++ b/dbms/src/Storages/Transaction/KVStore.h @@ -78,7 +78,7 @@ class RegionPersister; class KVStore final : private boost::noncopyable { public: - KVStore(Context & context, TiDB::SnapshotApplyMethod snapshot_apply_method_); + KVStore(Context & context); void restore(PathPool & path_pool, const TiFlashRaftProxyHelper *); RegionPtr getRegion(RegionID region_id) const; @@ -135,8 +135,6 @@ class KVStore final : private boost::noncopyable // Exported only for tests. TiFlashRaftProxyHelper * mutProxyHelperUnsafe() { return const_cast(proxy_helper); } - TiDB::SnapshotApplyMethod applyMethod() const { return snapshot_apply_method; } - void addReadIndexEvent(Int64 f) { read_index_event_flag += f; } Int64 getReadIndexEvent() const { return read_index_event_flag; } @@ -256,8 +254,6 @@ class KVStore final : private boost::noncopyable // raft_cmd_res stores the result of applying raft cmd. It must be protected by task_mutex. std::unique_ptr raft_cmd_res; - TiDB::SnapshotApplyMethod snapshot_apply_method; - LoggerPtr log; std::atomic region_compact_log_period; diff --git a/dbms/src/Storages/Transaction/ProxyFFI.cpp b/dbms/src/Storages/Transaction/ProxyFFI.cpp index d1ec51a5cff..7cf4ec40ec8 100644 --- a/dbms/src/Storages/Transaction/ProxyFFI.cpp +++ b/dbms/src/Storages/Transaction/ProxyFFI.cpp @@ -379,19 +379,11 @@ RawCppPtr PreHandleSnapshot( } #endif - switch (kvstore->applyMethod()) - { - case TiDB::SnapshotApplyMethod::DTFile_Directory: - case TiDB::SnapshotApplyMethod::DTFile_Single: - { - // Pre-decode and save as DTFiles - auto ingest_ids = kvstore->preHandleSnapshotToFiles(new_region, snaps, index, term, tmt); - auto * res = new PreHandledSnapshotWithFiles{new_region, std::move(ingest_ids)}; - return GenRawCppPtr(res, RawCppPtrTypeImpl::PreHandledSnapshotWithFiles); - } - default: - throw Exception("Unknow Region apply method: " + applyMethodToString(kvstore->applyMethod())); - } + + // Pre-decode and save as DTFiles + auto ingest_ids = kvstore->preHandleSnapshotToFiles(new_region, snaps, index, term, tmt); + auto * res = new PreHandledSnapshotWithFiles{new_region, std::move(ingest_ids)}; + return GenRawCppPtr(res, RawCppPtrTypeImpl::PreHandledSnapshotWithFiles); } catch (...) { diff --git a/dbms/src/Storages/Transaction/StorageEngineType.h b/dbms/src/Storages/Transaction/StorageEngineType.h index 3d103ca60c1..3cbe4f6bff3 100644 --- a/dbms/src/Storages/Transaction/StorageEngineType.h +++ b/dbms/src/Storages/Transaction/StorageEngineType.h @@ -31,26 +31,4 @@ enum class StorageEngine UNSUPPORTED_ENGINES = 128, }; -enum class SnapshotApplyMethod : std::int32_t -{ - DEPRECATED_Block = 1, - // Invalid if the storage engine is not DeltaTree - DTFile_Directory, - DTFile_Single, -}; - -inline const std::string applyMethodToString(SnapshotApplyMethod method) -{ - switch (method) - { - case SnapshotApplyMethod::DTFile_Directory: - return "file1"; - case SnapshotApplyMethod::DTFile_Single: - return "file2"; - default: - return "unknown(" + std::to_string(static_cast(method)) + ")"; - } - return "unknown"; -} - } // namespace TiDB diff --git a/dbms/src/Storages/Transaction/TMTContext.cpp b/dbms/src/Storages/Transaction/TMTContext.cpp index 17e07180232..f22d2e83a42 100644 --- a/dbms/src/Storages/Transaction/TMTContext.cpp +++ b/dbms/src/Storages/Transaction/TMTContext.cpp @@ -67,7 +67,7 @@ static SchemaSyncerPtr createSchemaSyncer(bool exist_pd_addr, bool for_unit_test TMTContext::TMTContext(Context & context_, const TiFlashRaftConfig & raft_config, const pingcap::ClusterConfig & cluster_config) : context(context_) - , kvstore(context_.isDisaggregatedComputeMode() && context_.useAutoScaler() ? nullptr : std::make_shared(context, raft_config.snapshot_apply_method)) + , kvstore(context_.isDisaggregatedComputeMode() && context_.useAutoScaler() ? nullptr : std::make_shared(context)) , region_table(context) , background_service(nullptr) , gc_manager(context) diff --git a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp index 80b901751df..2e1f7bb18cf 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp @@ -969,13 +969,6 @@ TEST_F(RegionKVStoreTest, KVStore) testRaftChangePeer(kvs, ctx.getTMTContext()); } { - auto ori_snapshot_apply_method = kvs.snapshot_apply_method; - kvs.snapshot_apply_method = TiDB::SnapshotApplyMethod::DTFile_Single; - SCOPE_EXIT({ - kvs.snapshot_apply_method = ori_snapshot_apply_method; - }); - - auto region_id = 19; auto region = makeRegion(region_id, RecordKVFormat::genKey(1, 50), RecordKVFormat::genKey(1, 60)); auto region_id_str = std::to_string(19); diff --git a/dbms/src/Storages/Transaction/tests/kvstore_helper.h b/dbms/src/Storages/Transaction/tests/kvstore_helper.h index 9eb9c9f90e5..cb179cb11c3 100644 --- a/dbms/src/Storages/Transaction/tests/kvstore_helper.h +++ b/dbms/src/Storages/Transaction/tests/kvstore_helper.h @@ -102,7 +102,7 @@ class RegionKVStoreTest : public ::testing::Test { kvstore.reset(); auto & global_ctx = TiFlashTestEnv::getGlobalContext(); - kvstore = std::make_unique(global_ctx, TiDB::SnapshotApplyMethod::DTFile_Directory); + kvstore = std::make_unique(global_ctx); // only recreate kvstore and restore data from disk, don't recreate proxy instance kvstore->restore(*path_pool, proxy_helper.get()); return *kvstore; diff --git a/dbms/src/TestUtils/FunctionTestUtils.h b/dbms/src/TestUtils/FunctionTestUtils.h index 37e0f9783ca..508b06ae102 100644 --- a/dbms/src/TestUtils/FunctionTestUtils.h +++ b/dbms/src/TestUtils/FunctionTestUtils.h @@ -835,7 +835,7 @@ class FunctionTest : public ::testing::Test }; #define ASSERT_COLUMN_EQ(expected, actual) ASSERT_TRUE(DB::tests::columnEqual((expected), (actual))) -#define ASSERT_BLOCK_EQ(expected, actual) DB::tests::blockEqual((expected), (actual)) +#define ASSERT_BLOCK_EQ(expected, actual) ASSERT_TRUE(DB::tests::blockEqual((expected), (actual))) /// restrictly checking columns equality, both data set and each row's offset should be the same #define ASSERT_COLUMNS_EQ_R(expected, actual) ASSERT_TRUE(DB::tests::columnsEqual((expected), (actual), true)) diff --git a/dbms/src/TestUtils/TiFlashTestEnv.cpp b/dbms/src/TestUtils/TiFlashTestEnv.cpp index 019b1d5a6a5..8ae2aa4c35a 100644 --- a/dbms/src/TestUtils/TiFlashTestEnv.cpp +++ b/dbms/src/TestUtils/TiFlashTestEnv.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -137,6 +138,8 @@ void TiFlashTestEnv::addGlobalContext(Strings testdata_path, PageStorageRunMode auto & path_pool = global_context->getPathPool(); global_context->getTMTContext().restore(path_pool); + + global_context->initializeSharedBlockSchemas(); } Context TiFlashTestEnv::getContext(const DB::Settings & settings, Strings testdata_path)