From 159daca49da1fa57926b1b38d4782aa8a8340184 Mon Sep 17 00:00:00 2001 From: JaySon Date: Fri, 16 Sep 2022 21:42:59 +0800 Subject: [PATCH] This is an automated cherry-pick of #5879 Signed-off-by: ti-chi-bot --- dbms/src/Columns/ColumnAggregateFunction.cpp | 2 +- dbms/src/Columns/ColumnArray.cpp | 9 +- dbms/src/Columns/ColumnDecimal.cpp | 9 +- dbms/src/Columns/ColumnFixedString.cpp | 13 +- dbms/src/Columns/ColumnString.cpp | 9 +- dbms/src/Columns/ColumnVector.cpp | 2 +- .../DeltaMerge/SSTFilesToBlockInputStream.h | 2 +- .../Storages/Transaction/ApplySnapshot.cpp | 35 ++- .../Storages/Transaction/PartitionStreams.cpp | 40 ++- .../Storages/Transaction/PartitionStreams.h | 4 + .../Transaction/RegionBlockReader.cpp | 37 +-- .../Storages/Transaction/RegionBlockReader.h | 22 +- dbms/src/Storages/Transaction/RowCodec.cpp | 92 ++++-- dbms/src/Storages/Transaction/RowCodec.h | 36 +-- .../Transaction/tests/RowCodecTestUtils.h | 8 +- .../Transaction/tests/gtest_kvstore.cpp | 10 +- .../tests/gtest_region_block_reader.cpp | 272 +++++++++++++++++- tests/fullstack-test2/ddl/alter_pk.test | 53 ++++ 18 files changed, 504 insertions(+), 151 deletions(-) diff --git a/dbms/src/Columns/ColumnAggregateFunction.cpp b/dbms/src/Columns/ColumnAggregateFunction.cpp index b4495d83ccb..d24a17a1256 100644 --- a/dbms/src/Columns/ColumnAggregateFunction.cpp +++ b/dbms/src/Columns/ColumnAggregateFunction.cpp @@ -101,7 +101,7 @@ void ColumnAggregateFunction::insertRangeFrom(const IColumn & from, size_t start if (start + length > from_concrete.getData().size()) throw Exception( fmt::format( - "Parameters start = {}, length = {} are out of bound in ColumnAggregateFunction::insertRangeFrom method (data.size() = {}).", + "Parameters are out of bound in ColumnAggregateFunction::insertRangeFrom method, start={}, length={}, from.size()={}", start, length, from_concrete.getData().size()), diff --git a/dbms/src/Columns/ColumnArray.cpp b/dbms/src/Columns/ColumnArray.cpp index 852653d3bbc..e2ab256efed 100644 --- a/dbms/src/Columns/ColumnArray.cpp +++ b/dbms/src/Columns/ColumnArray.cpp @@ -428,8 +428,13 @@ void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t leng const ColumnArray & src_concrete = static_cast(src); if (start + length > src_concrete.getOffsets().size()) - throw Exception("Parameter out of bound in ColumnArray::insertRangeFrom method.", - ErrorCodes::PARAMETER_OUT_OF_BOUND); + throw Exception( + fmt::format( + "Parameters are out of bound in ColumnArray::insertRangeFrom method, start={}, length={}, src.size()={}", + start, + length, + src_concrete.getOffsets().size()), + ErrorCodes::PARAMETER_OUT_OF_BOUND); size_t nested_offset = src_concrete.offsetAt(start); size_t nested_length = src_concrete.getOffsets()[start + length - 1] - nested_offset; diff --git a/dbms/src/Columns/ColumnDecimal.cpp b/dbms/src/Columns/ColumnDecimal.cpp index 96d54f320ca..fd99e5b0c32 100644 --- a/dbms/src/Columns/ColumnDecimal.cpp +++ b/dbms/src/Columns/ColumnDecimal.cpp @@ -272,8 +272,13 @@ void ColumnDecimal::insertRangeFrom(const IColumn & src, size_t start, size_t const ColumnDecimal & src_vec = static_cast(src); if (start + length > src_vec.data.size()) - throw Exception("Parameters start = " + toString(start) + ", length = " + toString(length) + " are out of bound in ColumnDecimal::insertRangeFrom method (data.size() = " + toString(src_vec.data.size()) + ").", - ErrorCodes::PARAMETER_OUT_OF_BOUND); + throw Exception( + fmt::format( + "Parameters are out of bound in ColumnDecimal::insertRangeFrom method, start={}, length={}, src.size()={}", + start, + length, + src_vec.data.size()), + ErrorCodes::PARAMETER_OUT_OF_BOUND); size_t old_size = data.size(); data.resize(old_size + length); diff --git a/dbms/src/Columns/ColumnFixedString.cpp b/dbms/src/Columns/ColumnFixedString.cpp index db908b10f9a..5d747b52ed5 100644 --- a/dbms/src/Columns/ColumnFixedString.cpp +++ b/dbms/src/Columns/ColumnFixedString.cpp @@ -183,12 +183,13 @@ void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_ const ColumnFixedString & src_concrete = static_cast(src); if (start + length > src_concrete.size()) - throw Exception("Parameters start = " - + toString(start) + ", length = " - + toString(length) + " are out of bound in ColumnFixedString::insertRangeFrom method" - " (size() = " - + toString(src_concrete.size()) + ").", - ErrorCodes::PARAMETER_OUT_OF_BOUND); + throw Exception( + fmt::format( + "Parameters are out of bound in ColumnFixedString::insertRangeFrom method, start={}, length={}, src.size()={}", + start, + length, + src_concrete.size()), + ErrorCodes::PARAMETER_OUT_OF_BOUND); size_t old_size = chars.size(); chars.resize(old_size + length * n); diff --git a/dbms/src/Columns/ColumnString.cpp b/dbms/src/Columns/ColumnString.cpp index 424f7e72052..60c07e8fe93 100644 --- a/dbms/src/Columns/ColumnString.cpp +++ b/dbms/src/Columns/ColumnString.cpp @@ -86,8 +86,13 @@ void ColumnString::insertRangeFrom(const IColumn & src, size_t start, size_t len const auto & src_concrete = static_cast(src); if (start + length > src_concrete.offsets.size()) - throw Exception("Parameter out of bound in IColumnString::insertRangeFrom method.", - ErrorCodes::PARAMETER_OUT_OF_BOUND); + throw Exception( + fmt::format( + "Parameters are out of bound in ColumnString::insertRangeFrom method, start={}, length={}, src.size()={}", + start, + length, + src_concrete.size()), + ErrorCodes::PARAMETER_OUT_OF_BOUND); size_t nested_offset = src_concrete.offsetAt(start); size_t nested_length = src_concrete.offsets[start + length - 1] - nested_offset; diff --git a/dbms/src/Columns/ColumnVector.cpp b/dbms/src/Columns/ColumnVector.cpp index 49f1290e692..e69727d4a9f 100644 --- a/dbms/src/Columns/ColumnVector.cpp +++ b/dbms/src/Columns/ColumnVector.cpp @@ -198,7 +198,7 @@ void ColumnVector::insertRangeFrom(const IColumn & src, size_t start, size_t if (start + length > src_vec.data.size()) throw Exception( fmt::format( - "Parameters start = {}, length = {} are out of bound in ColumnVector::insertRangeFrom method (data.size() = {}).", + "Parameters are out of bound in ColumnVector::insertRangeFrom method, start={}, length={}, src.size()={}", start, length, src_vec.data.size()), diff --git a/dbms/src/Storages/DeltaMerge/SSTFilesToBlockInputStream.h b/dbms/src/Storages/DeltaMerge/SSTFilesToBlockInputStream.h index 1111f2bb1c3..7b23e6814af 100644 --- a/dbms/src/Storages/DeltaMerge/SSTFilesToBlockInputStream.h +++ b/dbms/src/Storages/DeltaMerge/SSTFilesToBlockInputStream.h @@ -61,7 +61,7 @@ class SSTFilesToBlockInputStream final : public IBlockInputStream bool force_decode_, TMTContext & tmt_, size_t expected_size_ = DEFAULT_MERGE_BLOCK_SIZE); - ~SSTFilesToBlockInputStream(); + ~SSTFilesToBlockInputStream() override; String getName() const override { return "SSTFilesToBlockInputStream"; } diff --git a/dbms/src/Storages/Transaction/ApplySnapshot.cpp b/dbms/src/Storages/Transaction/ApplySnapshot.cpp index 2df95fead93..0e5c9853b8a 100644 --- a/dbms/src/Storages/Transaction/ApplySnapshot.cpp +++ b/dbms/src/Storages/Transaction/ApplySnapshot.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -259,8 +260,6 @@ void KVStore::onSnapshot(const RegionPtrWrap & new_region_wrap, RegionPtr old_re } } -extern RegionPtrWithBlock::CachePtr GenRegionPreDecodeBlockData(const RegionPtr &, Context &); - std::vector KVStore::preHandleSnapshotToFiles( RegionPtr new_region, const SSTViewVec snaps, @@ -268,7 +267,17 @@ std::vector KVStore::preHandleSnapshotToFiles( uint64_t term, TMTContext & tmt) { - return preHandleSSTsToDTFiles(new_region, snaps, index, term, DM::FileConvertJobType::ApplySnapshot, tmt); + std::vector ingest_ids; + try + { + ingest_ids = preHandleSSTsToDTFiles(new_region, snaps, index, term, DM::FileConvertJobType::ApplySnapshot, tmt); + } + catch (DB::Exception & e) + { + e.addMessage(fmt::format("(while preHandleSnapshot region_id={}, index={}, term={})", new_region->id(), index, term)); + e.rethrow(); + } + return ingest_ids; } /// `preHandleSSTsToDTFiles` read data from SSTFiles and generate DTFile(s) for commited data @@ -291,7 +300,8 @@ std::vector KVStore::preHandleSSTsToDTFiles( Stopwatch watch; SCOPE_EXIT({ GET_METRIC(tiflash_raft_command_duration_seconds, type_apply_snapshot_predecode).Observe(watch.elapsedSeconds()); }); - PageIds ids; + PageIds generated_ingest_ids; + TableID physical_table_id = InvalidTableID; while (true) { // If any schema changes is detected during decoding SSTs to DTFiles, we need to cancel and recreate DTFiles with @@ -316,6 +326,7 @@ std::vector KVStore::preHandleSSTsToDTFiles( /* ignore_cache= */ false, context.getSettingsRef().safe_point_update_interval_seconds); } + physical_table_id = storage->getTableInfo().id; // Read from SSTs and refine the boundary of blocks output to DTFiles auto sst_stream = std::make_shared( @@ -339,7 +350,7 @@ std::vector KVStore::preHandleSSTsToDTFiles( stream->writePrefix(); stream->write(); stream->writeSuffix(); - ids = stream->ingestIds(); + generated_ingest_ids = stream->ingestIds(); (void)table_drop_lock; // the table should not be dropped during ingesting file break; @@ -381,12 +392,13 @@ std::vector KVStore::preHandleSSTsToDTFiles( else { // Other unrecoverable error, throw + e.addMessage(fmt::format("physical_table_id={}", physical_table_id)); throw; } } } - return ids; + return generated_ingest_ids; } template @@ -537,7 +549,16 @@ RegionPtr KVStore::handleIngestSSTByDTFile(const RegionPtr & region, const SSTVi } // Decode the KV pairs in ingesting SST into DTFiles - PageIds ingest_ids = preHandleSSTsToDTFiles(tmp_region, snaps, index, term, DM::FileConvertJobType::IngestSST, tmt); + PageIds ingest_ids; + try + { + ingest_ids = preHandleSSTsToDTFiles(tmp_region, snaps, index, term, DM::FileConvertJobType::IngestSST, tmt); + } + catch (DB::Exception & e) + { + e.addMessage(fmt::format("(while handleIngestSST region_id={}, index={}, term={})", tmp_region->id(), index, term)); + e.rethrow(); + } // If `ingest_ids` is empty, ingest SST won't write delete_range for ingest region, it is safe to // ignore the step of calling `ingestFiles` diff --git a/dbms/src/Storages/Transaction/PartitionStreams.cpp b/dbms/src/Storages/Transaction/PartitionStreams.cpp index cf151c4270d..7ee7930d958 100644 --- a/dbms/src/Storages/Transaction/PartitionStreams.cpp +++ b/dbms/src/Storages/Transaction/PartitionStreams.cpp @@ -269,31 +269,26 @@ std::variant ReadRegionCommitCache(const RegionPtr & region, bool lock_region = true) +std::optional ReadRegionCommitCache(const RegionPtr & region, bool lock_region) { auto scanner = region->createCommittedScanner(lock_region); /// Some sanity checks for region meta. - { - if (region->isPendingRemove()) - return std::nullopt; - } + if (region->isPendingRemove()) + return std::nullopt; /// Read raw KVs from region cache. - { - // Shortcut for empty region. - if (!scanner.hasNext()) - return std::nullopt; + // Shortcut for empty region. + if (!scanner.hasNext()) + return std::nullopt; - RegionDataReadInfoList data_list_read; - data_list_read.reserve(scanner.writeMapSize()); - - do - { - data_list_read.emplace_back(scanner.next()); - } while (scanner.hasNext()); - return data_list_read; - } + RegionDataReadInfoList data_list_read; + data_list_read.reserve(scanner.writeMapSize()); + do + { + data_list_read.emplace_back(scanner.next()); + } while (scanner.hasNext()); + return data_list_read; } void RemoveRegionCommitCache(const RegionPtr & region, const RegionDataReadInfoList & data_list_read, bool lock_region = true) @@ -421,7 +416,7 @@ RegionPtrWithBlock::CachePtr GenRegionPreDecodeBlockData(const RegionPtr & regio std::optional data_list_read = std::nullopt; try { - data_list_read = ReadRegionCommitCache(region); + data_list_read = ReadRegionCommitCache(region, true); if (!data_list_read) return nullptr; } @@ -579,7 +574,7 @@ Block GenRegionBlockDataWithSchema(const RegionPtr & region, // const DecodingStorageSchemaSnapshotConstPtr & schema_snap, Timestamp gc_safepoint, bool force_decode, - TMTContext & tmt) + TMTContext & /* */) { // In 5.0.1, feature `compaction filter` is enabled by default. Under such feature tikv will do gc in write & default cf individually. // If some rows were updated and add tiflash replica, tiflash store may receive region snapshot with unmatched data in write & default cf sst files. @@ -587,16 +582,13 @@ Block GenRegionBlockDataWithSchema(const RegionPtr & region, // { gc_safepoint = 10000000; }); // Mock a GC safepoint for testing compaction filter region->tryCompactionFilter(gc_safepoint); - std::optional data_list_read = std::nullopt; - data_list_read = ReadRegionCommitCache(region); + std::optional data_list_read = ReadRegionCommitCache(region, true); Block res_block; // No committed data, just return if (!data_list_read) return res_block; - auto context = tmt.getContext(); - { Stopwatch watch; { diff --git a/dbms/src/Storages/Transaction/PartitionStreams.h b/dbms/src/Storages/Transaction/PartitionStreams.h index aa78942803f..70dd3055bab 100644 --- a/dbms/src/Storages/Transaction/PartitionStreams.h +++ b/dbms/src/Storages/Transaction/PartitionStreams.h @@ -16,7 +16,9 @@ #include #include +#include #include +#include #include namespace DB @@ -25,6 +27,8 @@ class Region; using RegionPtr = std::shared_ptr; class StorageDeltaMerge; +std::optional ReadRegionCommitCache(const RegionPtr & region, bool lock_region); + std::tuple, DecodingStorageSchemaSnapshotConstPtr> // AtomicGetStorageSchema(const RegionPtr & region, TMTContext & tmt); diff --git a/dbms/src/Storages/Transaction/RegionBlockReader.cpp b/dbms/src/Storages/Transaction/RegionBlockReader.cpp index 2ec690c467b..30383aecce6 100644 --- a/dbms/src/Storages/Transaction/RegionBlockReader.cpp +++ b/dbms/src/Storages/Transaction/RegionBlockReader.cpp @@ -13,14 +13,18 @@ // limitations under the License. #include +#include +#include #include #include #include #include +#include #include #include #include #include +#include namespace DB { @@ -109,6 +113,7 @@ bool RegionBlockReader::readImpl(Block & block, const RegionDataReadInfoList & d raw_column->reserve(expected_rows); } } + size_t index = 0; for (const auto & [pk, write_type, commit_ts, value_ptr] : data_list) { @@ -137,37 +142,33 @@ bool RegionBlockReader::readImpl(Block & block, const RegionDataReadInfoList & d } else { - if (schema_snapshot->pk_is_handle) - { - if (!appendRowToBlock(*value_ptr, column_ids_iter, read_column_ids.end(), block, next_column_pos, schema_snapshot->column_infos, schema_snapshot->pk_column_ids[0], force_decode)) - return false; - } - else - { - if (!appendRowToBlock(*value_ptr, column_ids_iter, read_column_ids.end(), block, next_column_pos, schema_snapshot->column_infos, InvalidColumnID, force_decode)) - return false; - } + // Parse column value from encoded value + if (!appendRowToBlock(*value_ptr, column_ids_iter, read_column_ids.end(), block, next_column_pos, schema_snapshot, force_decode)) + return false; } } - /// set extra handle column and pk columns if need + /// set extra handle column and pk columns from encoded key if need if constexpr (pk_type != TMTPKType::STRING) { - // extra handle column's type is always Int64 + // For non-common handle, extra handle column's type is always Int64. + // We need to copy the handle value from encoded key. + const auto handle_value = static_cast(pk); auto * raw_extra_column = const_cast((block.getByPosition(extra_handle_column_pos)).column.get()); - static_cast(raw_extra_column)->getData().push_back(Int64(pk)); + static_cast(raw_extra_column)->getData().push_back(handle_value); + // For pk_is_handle == true, we need to decode the handle value from encoded key, and insert + // to the specify column if (!pk_column_ids.empty()) { auto * raw_pk_column = const_cast((block.getByPosition(pk_pos_map.at(pk_column_ids[0]))).column.get()); if constexpr (pk_type == TMTPKType::INT64) - static_cast(raw_pk_column)->getData().push_back(Int64(pk)); + static_cast(raw_pk_column)->getData().push_back(handle_value); else if constexpr (pk_type == TMTPKType::UINT64) - static_cast(raw_pk_column)->getData().push_back(UInt64(pk)); + static_cast(raw_pk_column)->getData().push_back(UInt64(handle_value)); else { - // The pk_type must be Int32/Uint32 or more narrow type + // The pk_type must be Int32/UInt32 or more narrow type // so cannot tell its' exact type here, just use `insert(Field)` - auto handle_value(static_cast(pk)); raw_pk_column->insert(Field(handle_value)); if (unlikely(raw_pk_column->getInt(index) != handle_value)) { @@ -177,7 +178,7 @@ bool RegionBlockReader::readImpl(Block & block, const RegionDataReadInfoList & d } else { - throw Exception("Detected overflow value when decoding pk column of type " + raw_pk_column->getName(), + throw Exception(fmt::format("Detected overflow value when decoding pk column, type={} handle={}", raw_pk_column->getName(), handle_value), ErrorCodes::LOGICAL_ERROR); } } diff --git a/dbms/src/Storages/Transaction/RegionBlockReader.h b/dbms/src/Storages/Transaction/RegionBlockReader.h index 004d9f40447..98d1246e6d7 100644 --- a/dbms/src/Storages/Transaction/RegionBlockReader.h +++ b/dbms/src/Storages/Transaction/RegionBlockReader.h @@ -14,27 +14,11 @@ #pragma once -#include -#include -#include -#include -#include #include -#include #include -#include - -namespace TiDB -{ -struct TableInfo; -}; namespace DB { -class IManageableStorage; -using ManageableStoragePtr = std::shared_ptr; - -struct ColumnsDescription; class Block; /// The Reader to read the region data in `data_list` and decode based on the given table_info and columns, as a block. @@ -45,12 +29,12 @@ class RegionBlockReader : private boost::noncopyable /// Read `data_list` as a block. /// - /// On decode error, i.e. column number/type mismatch, will do force apply schema, + /// On decode error, i.e. column number/type mismatch, caller should trigger a schema-sync and retry with `force_decode=True`, /// i.e. add/remove/cast unknown/missing/type-mismatch column if force_decode is true, otherwise return empty block and false. /// Moreover, exception will be thrown if we see fatal decode error meanwhile `force_decode` is true. /// - /// `RegionBlockReader::read` is the common routine used by both 'flush' and 'read' processes of TXN engine (Delta-Tree, TXN-MergeTree), - /// each of which will use carefully adjusted 'force_decode' with appropriate error handling/retry to get what they want. + /// `RegionBlockReader::read` is the common routine used by both 'flush' and 'read' processes of Delta-Tree engine, + /// which will use carefully adjusted 'force_decode' with appropriate error handling/retry to get what they want. bool read(Block & block, const RegionDataReadInfoList & data_list, bool force_decode); private: diff --git a/dbms/src/Storages/Transaction/RowCodec.cpp b/dbms/src/Storages/Transaction/RowCodec.cpp index ea7f6b7c2da..4e00bfd27fb 100644 --- a/dbms/src/Storages/Transaction/RowCodec.cpp +++ b/dbms/src/Storages/Transaction/RowCodec.cpp @@ -285,7 +285,9 @@ void encodeRowV2(const TiDB::TableInfo & table_info, const std::vector & RowEncoderV2(table_info, fields).encode(ss); } -bool appendRowToBlock( +// pre-declar block +template +bool appendRowV2ToBlockImpl( const TiKVValue::Base & raw_value, SortedColumnIDWithPosConstIter column_ids_iter, SortedColumnIDWithPosConstIter column_ids_iter_end, @@ -293,18 +295,10 @@ bool appendRowToBlock( size_t block_column_pos, const ColumnInfos & column_infos, ColumnID pk_handle_id, - bool force_decode) -{ - switch (static_cast(raw_value[0])) - { - case static_cast(RowCodecVer::ROW_V2): - return appendRowV2ToBlock(raw_value, column_ids_iter, column_ids_iter_end, block, block_column_pos, column_infos, pk_handle_id, force_decode); - default: - return appendRowV1ToBlock(raw_value, column_ids_iter, column_ids_iter_end, block, block_column_pos, column_infos, pk_handle_id, force_decode); - } -} + bool ignore_pk_if_absent, + bool force_decode); -bool appendRowV2ToBlock( +bool appendRowV1ToBlock( const TiKVValue::Base & raw_value, SortedColumnIDWithPosConstIter column_ids_iter, SortedColumnIDWithPosConstIter column_ids_iter_end, @@ -312,26 +306,74 @@ bool appendRowV2ToBlock( size_t block_column_pos, const ColumnInfos & column_infos, ColumnID pk_handle_id, + bool ignore_pk_if_absent, + bool force_decode); +// pre-declar block end + +bool appendRowToBlock( + const TiKVValue::Base & raw_value, + SortedColumnIDWithPosConstIter column_ids_iter, + SortedColumnIDWithPosConstIter column_ids_iter_end, + Block & block, + size_t block_column_pos, + const DecodingStorageSchemaSnapshotConstPtr & schema_snapshot, bool force_decode) { - auto row_flag = readLittleEndian(&raw_value[1]); - bool is_big = row_flag & RowV2::BigRowMask; - return is_big ? appendRowV2ToBlockImpl(raw_value, column_ids_iter, column_ids_iter_end, block, block_column_pos, column_infos, pk_handle_id, force_decode) - : appendRowV2ToBlockImpl(raw_value, column_ids_iter, column_ids_iter_end, block, block_column_pos, column_infos, pk_handle_id, force_decode); + const ColumnInfos & column_infos = schema_snapshot->column_infos; + // when pk is handle, we need skip pk column when decoding value + ColumnID pk_handle_id = InvalidColumnID; + if (schema_snapshot->pk_is_handle) + { + pk_handle_id = schema_snapshot->pk_column_ids[0]; + } + + // For pk_is_handle table, the column with primary key flag is decoded from encoded key instead of encoded value. + // For common handle table, the column with primary key flag is (usually) decoded from encoded key. We skip + // filling the columns with primary key flags inside this method. + // For other table (non-clustered, use hidden _tidb_rowid as handle), the column with primary key flags could be + // changed, we need to fill missing column with default value. + const bool ignore_pk_if_absent = schema_snapshot->is_common_handle || schema_snapshot->pk_is_handle; + + switch (static_cast(raw_value[0])) + { + case static_cast(RowCodecVer::ROW_V2): + { + auto row_flag = readLittleEndian(&raw_value[1]); + bool is_big = row_flag & RowV2::BigRowMask; + return is_big ? appendRowV2ToBlockImpl(raw_value, column_ids_iter, column_ids_iter_end, block, block_column_pos, column_infos, pk_handle_id, ignore_pk_if_absent, force_decode) + : appendRowV2ToBlockImpl(raw_value, column_ids_iter, column_ids_iter_end, block, block_column_pos, column_infos, pk_handle_id, ignore_pk_if_absent, force_decode); + } + default: + return appendRowV1ToBlock(raw_value, column_ids_iter, column_ids_iter_end, block, block_column_pos, column_infos, pk_handle_id, ignore_pk_if_absent, force_decode); + } } -inline bool addDefaultValueToColumnIfPossible(const ColumnInfo & column_info, Block & block, size_t block_column_pos, bool force_decode) +inline bool addDefaultValueToColumnIfPossible(const ColumnInfo & column_info, Block & block, size_t block_column_pos, bool ignore_pk_if_absent, bool force_decode) { // We consider a missing column could be safely filled with NULL, unless it has not default value and is NOT NULL. - // This could saves lots of unnecessary schema syncs for old data with a schema that has newly added columns. - // for clustered index, if the pk column does not exists, it can still be decoded from the key + // This could saves lots of unnecessary schema syncs for old data with a newer schema that has newly added columns. + if (column_info.hasPriKeyFlag()) - return true; + { + // For clustered index or pk_is_handle, if the pk column does not exists, it can still be decoded from the key + if (ignore_pk_if_absent) + return true; + + assert(!ignore_pk_if_absent); + if (!force_decode) + return false; + // Else non-clustered index, and not pk_is_handle, it could be a row encoded by older schema, + // we need to fill the column wich has primary key flag with default value. + // fallthrough to fill default value when force_decode + } if (column_info.hasNoDefaultValueFlag() && column_info.hasNotNullFlag()) { if (!force_decode) return false; + // Else the row does not contain this "not null" / "no default value" column, + // it could be a row encoded by older schema. + // fallthrough to fill default value when force_decode } // not null or has no default value, tidb will fill with specific value. auto * raw_column = const_cast((block.getByPosition(block_column_pos)).column.get()); @@ -348,6 +390,7 @@ bool appendRowV2ToBlockImpl( size_t block_column_pos, const ColumnInfos & column_infos, ColumnID pk_handle_id, + bool ignore_pk_if_absent, bool force_decode) { size_t cursor = 2; // Skip the initial codec ver and row flag. @@ -398,7 +441,7 @@ bool appendRowV2ToBlockImpl( // a column. // Fill with default value and continue to read data for next column id. const auto & column_info = column_infos[column_ids_iter->second]; - if (!addDefaultValueToColumnIfPossible(column_info, block, block_column_pos, force_decode)) + if (!addDefaultValueToColumnIfPossible(column_info, block, block_column_pos, ignore_pk_if_absent, force_decode)) return false; column_ids_iter++; block_column_pos++; @@ -460,7 +503,7 @@ bool appendRowV2ToBlockImpl( if (column_ids_iter->first != pk_handle_id) { const auto & column_info = column_infos[column_ids_iter->second]; - if (!addDefaultValueToColumnIfPossible(column_info, block, block_column_pos, force_decode)) + if (!addDefaultValueToColumnIfPossible(column_info, block, block_column_pos, ignore_pk_if_absent, force_decode)) return false; } column_ids_iter++; @@ -478,6 +521,7 @@ bool appendRowV1ToBlock( size_t block_column_pos, const ColumnInfos & column_infos, ColumnID pk_handle_id, + bool ignore_pk_if_absent, bool force_decode) { size_t cursor = 0; @@ -514,7 +558,7 @@ bool appendRowV1ToBlock( else if (column_ids_iter->first < next_field_column_id) { const auto & column_info = column_infos[column_ids_iter->second]; - if (!addDefaultValueToColumnIfPossible(column_info, block, block_column_pos, force_decode)) + if (!addDefaultValueToColumnIfPossible(column_info, block, block_column_pos, ignore_pk_if_absent, force_decode)) return false; column_ids_iter++; block_column_pos++; @@ -574,7 +618,7 @@ bool appendRowV1ToBlock( if (column_ids_iter->first != pk_handle_id) { const auto & column_info = column_infos[column_ids_iter->second]; - if (!addDefaultValueToColumnIfPossible(column_info, block, block_column_pos, force_decode)) + if (!addDefaultValueToColumnIfPossible(column_info, block, block_column_pos, ignore_pk_if_absent, force_decode)) return false; } column_ids_iter++; diff --git a/dbms/src/Storages/Transaction/RowCodec.h b/dbms/src/Storages/Transaction/RowCodec.h index dbb6dad785c..99f5612917d 100644 --- a/dbms/src/Storages/Transaction/RowCodec.h +++ b/dbms/src/Storages/Transaction/RowCodec.h @@ -20,9 +20,6 @@ namespace DB { -using TiDB::ColumnInfo; -using TiDB::TableInfo; - /// The following two encode functions are used for testing. void encodeRowV1(const TiDB::TableInfo & table_info, const std::vector & fields, WriteBuffer & ss); void encodeRowV2(const TiDB::TableInfo & table_info, const std::vector & fields, WriteBuffer & ss); @@ -33,39 +30,8 @@ bool appendRowToBlock( SortedColumnIDWithPosConstIter column_ids_iter_end, Block & block, size_t block_column_pos, - const ColumnInfos & column_infos, - ColumnID pk_handle_id, // when pk is handle, we need skip pk column when decoding value + const DecodingStorageSchemaSnapshotConstPtr & schema_snapshot, bool force_decode); -bool appendRowV2ToBlock( - const TiKVValue::Base & raw_value, - SortedColumnIDWithPosConstIter column_ids_iter, - SortedColumnIDWithPosConstIter column_ids_iter_end, - Block & block, - size_t block_column_pos, - const ColumnInfos & column_infos, - ColumnID pk_handle_id, - bool force_decode); - -template -bool appendRowV2ToBlockImpl( - const TiKVValue::Base & raw_value, - SortedColumnIDWithPosConstIter column_ids_iter, - SortedColumnIDWithPosConstIter column_ids_iter_end, - Block & block, - size_t block_column_pos, - const ColumnInfos & column_infos, - ColumnID pk_handle_id, - bool force_decode); - -bool appendRowV1ToBlock( - const TiKVValue::Base & raw_value, - SortedColumnIDWithPosConstIter column_ids_iter, - SortedColumnIDWithPosConstIter column_ids_iter_end, - Block & block, - size_t block_column_pos, - const ColumnInfos & column_infos, - ColumnID pk_handle_id, - bool force_decode); } // namespace DB diff --git a/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h b/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h index 34e0d3d4104..4dd9964775c 100644 --- a/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h +++ b/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h @@ -198,7 +198,8 @@ void getTableInfoFieldsInternal(OrderedColumnInfoFields & column_info_fields, Ty else { column_info_fields.emplace(column_id_value.id, - std::make_tuple(getColumnInfo(column_id_value.id), static_cast(std::move(column_id_value.value)))); + std::make_tuple(getColumnInfo(column_id_value.id), + static_cast(std::move(column_id_value.value)))); } } } @@ -309,10 +310,7 @@ inline Block decodeRowToBlock(const String & row_value, DecodingStorageSchemaSna iter++; Block block = createBlockSortByColumnID(decoding_schema); - if (decoding_schema->pk_is_handle) - appendRowToBlock(row_value, iter, sorted_column_id_with_pos.end(), block, value_column_num, decoding_schema->column_infos, decoding_schema->pk_column_ids[0], true); - else - appendRowToBlock(row_value, iter, sorted_column_id_with_pos.end(), block, value_column_num, decoding_schema->column_infos, InvalidColumnID, true); + appendRowToBlock(row_value, iter, sorted_column_id_with_pos.end(), block, value_column_num, decoding_schema, true); // remove first three column for (size_t i = 0; i < value_column_num; i++) diff --git a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp index 864de5b380e..24604ba8dcc 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp @@ -15,6 +15,11 @@ #include #include #include +<<<<<<< HEAD +======= +#include +#include +>>>>>>> 8e411ae86b (Fix decode error when "NULL" value in the column with "primary key" flag (#5879)) #include #include #include @@ -29,7 +34,6 @@ extern void setupPutRequest(raft_cmdpb::Request *, const std::string &, const Ti extern void setupDelRequest(raft_cmdpb::Request *, const std::string &, const TiKVKey &); } // namespace RegionBench -extern std::optional ReadRegionCommitCache(const RegionPtr & region, bool lock_region = true); extern void RemoveRegionCommitCache(const RegionPtr & region, const RegionDataReadInfoList & data_list_read, bool lock_region = true); extern void CheckRegionForMergeCmd(const raft_cmdpb::AdminResponse & response, const RegionState & region_state); extern void ChangeRegionStateRange(RegionState & region_state, bool source_at_left, const RegionState & source_region_state); @@ -762,7 +766,7 @@ void RegionKVStoreTest::testRegion() ASSERT_EQ(k->lock_version(), 3); } { - std::optional data_list_read = ReadRegionCommitCache(region); + std::optional data_list_read = ReadRegionCommitCache(region, true); ASSERT_TRUE(data_list_read); ASSERT_EQ(1, data_list_read->size()); RemoveRegionCommitCache(region, *data_list_read); @@ -802,7 +806,7 @@ void RegionKVStoreTest::testRegion() region->remove("write", RecordKVFormat::genKey(table_id, 4, 8)); ASSERT_EQ(1, region->writeCFCount()); { - std::optional data_list_read = ReadRegionCommitCache(region); + std::optional data_list_read = ReadRegionCommitCache(region, true); ASSERT_TRUE(data_list_read); ASSERT_EQ(1, data_list_read->size()); RemoveRegionCommitCache(region, *data_list_read); diff --git a/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp b/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp index d08b4dd3738..a6329ce4209 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp @@ -12,11 +12,31 @@ // See the License for the specific language governing permissions and // limitations under the License. +<<<<<<< HEAD #include #include #include #include "RowCodecTestUtils.h" +======= +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +>>>>>>> 8e411ae86b (Fix decode error when "NULL" value in the column with "primary key" flag (#5879)) using TableInfo = TiDB::TableInfo; @@ -63,8 +83,18 @@ class RegionBlockReaderTestFixture : public ::testing::Test std::vector pk_fields; for (size_t i = 0; i < table_info.columns.size(); i++) { +<<<<<<< HEAD if (!table_info.columns[i].hasPriKeyFlag()) value_fields.emplace_back(fields[i]); +======= + if (table_info.is_common_handle || table_info.pk_is_handle) + { + if (table_info.columns[i].hasPriKeyFlag()) + key_encode_fields.emplace_back(fields[i]); + else + value_encode_fields.emplace_back(fields[i]); + } +>>>>>>> 8e411ae86b (Fix decode error when "NULL" value in the column with "primary key" flag (#5879)) else pk_fields.emplace_back(fields[i]); } @@ -73,7 +103,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test WriteBufferFromOwnString pk_buf; if (table_info.is_common_handle) { - auto & primary_index_info = table_info.getPrimaryIndexInfo(); + const auto & primary_index_info = table_info.getPrimaryIndexInfo(); for (size_t i = 0; i < primary_index_info.idx_cols.size(); i++) { auto idx = column_name_columns_index_map[primary_index_info.idx_cols[i].name]; @@ -137,7 +167,14 @@ class RegionBlockReaderTestFixture : public ::testing::Test } else { +<<<<<<< HEAD ASSERT_EQ((*column_element.column)[row], fields_map.at(column_element.column_id)); +======= + if (fields_map.count(column_element.column_id) > 0) + ASSERT_FIELD_EQ((*column_element.column)[row], fields_map.at(column_element.column_id)) << gen_error_log(); + else + LOG_INFO(logger, "ignore value check for new added column, id={}, name={}", column_element.column_id, column_element.name); +>>>>>>> 8e411ae86b (Fix decode error when "NULL" value in the column with "primary key" flag (#5879)) } } } @@ -241,7 +278,22 @@ class RegionBlockReaderTestFixture : public ::testing::Test } }; +<<<<<<< HEAD TEST_F(RegionBlockReaderTestFixture, PKIsNotHandle) +======= +String bytesFromHexString(std::string_view hex_str) +{ + assert(hex_str.size() % 2 == 0); + String bytes(hex_str.size() / 2, '\x00'); + for (size_t i = 0; i < bytes.size(); ++i) + { + bytes[i] = unhex2(hex_str.data() + i * 2); + } + return bytes; +} + +TEST_F(RegionBlockReaderTest, PKIsNotHandle) +>>>>>>> 8e411ae86b (Fix decode error when "NULL" value in the column with "primary key" flag (#5879)) { auto [table_info, fields] = getNormalTableInfoFields({EXTRA_HANDLE_COLUMN_ID}, false); encodeColumns(table_info, fields, RowEncodeVersion::RowV2); @@ -349,4 +401,222 @@ TEST_F(RegionBlockReaderTestFixture, InvalidNULLRowV1) ASSERT_ANY_THROW(decodeAndCheckColumns(new_decoding_schema, true)); } + +TEST_F(RegionBlockReaderTest, MissingPrimaryKeyColumnRowV2) +try +{ + // Mock a table + // `t_case` { + // column3 varchar(32) NOT NULL, + // column4 varchar(20) DEFAULT NULL, + // primary key (`column3`) /*T![clustered_index] NONCLUSTERED */ + // -- _tidb_rowid bigint, // hidden handle + // } + auto [table_info, fields] = getTableInfoAndFields(/*pk_col_ids*/ {3}, false, ColumnIDValue(3, "hello"), ColumnIDValueNull(4)); + ASSERT_EQ(table_info.is_common_handle, false); + ASSERT_EQ(table_info.pk_is_handle, false); + ASSERT_TRUE(table_info.getColumnInfo(3).hasPriKeyFlag()); + ASSERT_FALSE(table_info.getColumnInfo(4).hasPriKeyFlag()); + + // FIXME: actually TiDB won't encode the "NULL" for column4 into value + // but now the `RowEncoderV2` does not support this, we use `RegionBlockReaderTest::ReadFromRegion` + // to test that. + encodeColumns(table_info, fields, RowEncodeVersion::RowV2); + + // Mock re-create the primary key index with "column4" that contains `NULL` value + // `t_case` { + // column3 varchar(32) NOT NULL, + // column4 varchar(20) NOT NULL, + // primary key (`column3`, `column4`) /*T![clustered_index] NONCLUSTERED */ + // -- _tidb_rowid bigint, // hidden handle + // } + TableInfo new_table_info; + std::tie(new_table_info, std::ignore) = getTableInfoAndFields(/*pk_col_ids*/ {3, 4}, false, ColumnIDValueNull(3), ColumnIDValueNull(4)); + ASSERT_EQ(new_table_info.is_common_handle, false); + ASSERT_EQ(new_table_info.pk_is_handle, false); + ASSERT_TRUE(new_table_info.getColumnInfo(3).hasPriKeyFlag()); + ASSERT_TRUE(new_table_info.getColumnInfo(4).hasPriKeyFlag()); + + auto new_decoding_schema = getDecodingStorageSchemaSnapshot(new_table_info); + // FIXME: actually we need to decode the block with force_decode=true, see the + // comments before `encodeColumns` + EXPECT_TRUE(decodeAndCheckColumns(new_decoding_schema, true)); + // // force_decode=false can not decode because there are + // // missing value for column with primary key flag. + // EXPECT_FALSE(decodeAndCheckColumns(new_decoding_schema, false)); + // // force_decode=true, decode ok. + // EXPECT_TRUE(decodeAndCheckColumns(new_decoding_schema, true)); +} +CATCH + +TEST_F(RegionBlockReaderTest, MissingPrimaryKeyColumnRowV1) +try +{ + // Mock a table + // `t_case` { + // column3 varchar(32) NOT NULL, + // column4 varchar(20) DEFAULT NULL, + // primary key (`column3`) /*T![clustered_index] NONCLUSTERED */ + // -- _tidb_rowid bigint, // hidden handle + // } + auto [table_info, fields] = getTableInfoAndFields(/*pk_col_ids*/ {3}, false, ColumnIDValue(3, "hello"), ColumnIDValueNull(4)); + ASSERT_EQ(table_info.is_common_handle, false); + ASSERT_EQ(table_info.pk_is_handle, false); + ASSERT_TRUE(table_info.getColumnInfo(3).hasPriKeyFlag()); + ASSERT_FALSE(table_info.getColumnInfo(4).hasPriKeyFlag()); + + encodeColumns(table_info, fields, RowEncodeVersion::RowV1); + + // Mock re-create the primary key index with "column4" that contains `NULL` value + // `t_case` { + // column3 varchar(32) NOT NULL, + // column4 varchar(20) NOT NULL, + // primary key (`column3`, `column4`) /*T![clustered_index] NONCLUSTERED */ + // -- _tidb_rowid bigint, // hidden handle + // } + TableInfo new_table_info; + std::tie(new_table_info, std::ignore) = getTableInfoAndFields(/*pk_col_ids*/ {3, 4}, false, ColumnIDValueNull(3), ColumnIDValueNull(4)); + ASSERT_EQ(new_table_info.is_common_handle, false); + ASSERT_EQ(new_table_info.pk_is_handle, false); + ASSERT_TRUE(new_table_info.getColumnInfo(3).hasPriKeyFlag()); + ASSERT_TRUE(new_table_info.getColumnInfo(4).hasPriKeyFlag()); + + auto new_decoding_schema = getDecodingStorageSchemaSnapshot(new_table_info); + EXPECT_TRUE(decodeAndCheckColumns(new_decoding_schema, false)); +} +CATCH + +TEST_F(RegionBlockReaderTest, NewMissingPrimaryKeyColumnRowV2) +try +{ + // Mock a table + // `t_case` { + // column3 varchar(32) NOT NULL, + // primary key (`column3`) /*T![clustered_index] NONCLUSTERED */ + // -- _tidb_rowid bigint, // hidden handle + // } + auto [table_info, fields] = getTableInfoAndFields(/*pk_col_ids*/ {3}, false, ColumnIDValue(3, "hello")); + ASSERT_EQ(table_info.is_common_handle, false); + ASSERT_EQ(table_info.pk_is_handle, false); + ASSERT_TRUE(table_info.getColumnInfo(3).hasPriKeyFlag()); + ASSERT_ANY_THROW(table_info.getColumnInfo(4)); // not exist + + encodeColumns(table_info, fields, RowEncodeVersion::RowV2); + + // Mock re-create the primary key index with new-added "column4" + // `t_case` { + // column3 varchar(32) NOT NULL, + // column4 varchar(20) NOT NULL, + // primary key (`column3`, `column4`) /*T![clustered_index] NONCLUSTERED */ + // -- _tidb_rowid bigint, // hidden handle + // } + TableInfo new_table_info; + std::tie(new_table_info, std::ignore) = getTableInfoAndFields(/*pk_col_ids*/ {3, 4}, false, ColumnIDValueNull(3), ColumnIDValueNull(4)); + ASSERT_EQ(new_table_info.is_common_handle, false); + ASSERT_EQ(new_table_info.pk_is_handle, false); + ASSERT_TRUE(new_table_info.getColumnInfo(3).hasPriKeyFlag()); + ASSERT_TRUE(new_table_info.getColumnInfo(4).hasPriKeyFlag()); + + auto new_decoding_schema = getDecodingStorageSchemaSnapshot(new_table_info); + // force_decode=false can not decode because there are + // missing value for column with primary key flag. + EXPECT_FALSE(decodeAndCheckColumns(new_decoding_schema, false)); + // force_decode=true, decode ok. + EXPECT_TRUE(decodeAndCheckColumns(new_decoding_schema, true)); +} +CATCH + +TEST_F(RegionBlockReaderTest, NewMissingPrimaryKeyColumnRowV1) +try +{ + // Mock a table + // `t_case` { + // column3 varchar(32) NOT NULL, + // primary key (`column3`) /*T![clustered_index] NONCLUSTERED */ + // -- _tidb_rowid bigint, // hidden handle + // } + auto [table_info, fields] = getTableInfoAndFields(/*pk_col_ids*/ {3}, false, ColumnIDValue(3, "hello")); + ASSERT_EQ(table_info.is_common_handle, false); + ASSERT_EQ(table_info.pk_is_handle, false); + ASSERT_TRUE(table_info.getColumnInfo(3).hasPriKeyFlag()); + ASSERT_ANY_THROW(table_info.getColumnInfo(4)); // not exist + + encodeColumns(table_info, fields, RowEncodeVersion::RowV1); + + // Mock re-create the primary key index with new-added "column4" + // `t_case` { + // column3 varchar(32) NOT NULL, + // column4 varchar(20) NOT NULL, + // primary key (`column3`, `column4`) /*T![clustered_index] NONCLUSTERED */ + // -- _tidb_rowid bigint, // hidden handle + // } + TableInfo new_table_info; + std::tie(new_table_info, std::ignore) = getTableInfoAndFields(/*pk_col_ids*/ {3, 4}, false, ColumnIDValueNull(3), ColumnIDValueNull(4)); + ASSERT_EQ(new_table_info.is_common_handle, false); + ASSERT_EQ(new_table_info.pk_is_handle, false); + ASSERT_TRUE(new_table_info.getColumnInfo(3).hasPriKeyFlag()); + ASSERT_TRUE(new_table_info.getColumnInfo(4).hasPriKeyFlag()); + + auto new_decoding_schema = getDecodingStorageSchemaSnapshot(new_table_info); + // force_decode=false can not decode because there are + // missing value for column with primary key flag. + EXPECT_FALSE(decodeAndCheckColumns(new_decoding_schema, false)); + // force_decode=true, decode ok. + EXPECT_TRUE(decodeAndCheckColumns(new_decoding_schema, true)); +} +CATCH + +TEST_F(RegionBlockReaderTest, ReadFromRegion) +try +{ + TableInfo table_info(R"({"cols":[ + {"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"case_no","O":"case_no"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"utf8mb4","Collate":"utf8mb4_bin","Decimal":0,"Elems":null,"Flag":4099,"Flen":32,"Tp":15}}, + {"comment":"","default":null,"default_bit":null,"id":2,"name":{"L":"p","O":"p"},"offset":1,"origin_default":null,"state":5,"type":{"Charset":"utf8mb4","Collate":"utf8mb4_bin","Decimal":0,"Elems":null,"Flag":0,"Flen":12,"Tp":15}}, + {"comment":"","default":null,"default_bit":null,"id":3,"name":{"L":"source","O":"source"},"offset":2,"origin_default":"","state":5,"type":{"Charset":"utf8mb4","Collate":"utf8mb4_bin","Decimal":0,"Elems":null,"Flag":4099,"Flen":20,"Tp":15}} + ],"comment":"","id":77,"index_info":[],"is_common_handle":false,"name":{"L":"t_case","O":"t_case"},"partition":null,"pk_is_handle":false,"schema_version":62,"state":5,"tiflash_replica":{"Count":1},"update_timestamp":435984541435559947})"); + + RegionID region_id = 4; + String region_start_key(bytesFromHexString("7480000000000000FF445F720000000000FA")); + String region_end_key(bytesFromHexString("7480000000000000FF4500000000000000F8")); + auto region = makeRegion(region_id, region_start_key, region_end_key); + // the hex kv dump from SSTFile + std::vector> kvs = { + {"7480000000000000FF4D5F728000000000FF0000010000000000FAF9F3125EFCF3FFFE", "4C8280809290B4BB8606"}, + {"7480000000000000FF4D5F728000000000FF0000010000000000FAF9F3126548ABFFFC", "508180D0BAABB3BB8606760A80000100000001010031"}, + {"7480000000000000FF4D5F728000000000FF0000020000000000FAF9F3125EFCF3FFFE", "4C8280809290B4BB8606"}, + {"7480000000000000FF4D5F728000000000FF0000020000000000FAF9F3126548ABFFFC", "508180D0BAABB3BB8606760A80000100000001010032"}, + {"7480000000000000FF4D5F728000000000FF0000030000000000FAF9F3125EFCF3FFFE", "4C8280809290B4BB8606"}, + {"7480000000000000FF4D5F728000000000FF0000030000000000FAF9F3126548ABFFFC", "508180D0BAABB3BB8606760A80000100000001010033"}, + {"7480000000000000FF4D5F728000000000FF0000040000000000FAF9F3125EFCF3FFFE", "4C8280809290B4BB8606"}, + {"7480000000000000FF4D5F728000000000FF0000040000000000FAF9F3126548ABFFFC", "508180D0BAABB3BB8606760A80000100000001010034"}, + }; + for (const auto & [k, v] : kvs) + { + region->insert(ColumnFamilyType::Write, TiKVKey(bytesFromHexString(k)), TiKVValue(bytesFromHexString(v))); + } + + auto data_list_read = ReadRegionCommitCache(region, true); + ASSERT_TRUE(data_list_read.has_value()); + + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + { + // force_decode=false can not decode because there are + // missing value for column with primary key flag. + auto reader = RegionBlockReader(decoding_schema); + Block res_block = createBlockSortByColumnID(decoding_schema); + EXPECT_FALSE(reader.read(res_block, *data_list_read, false)); + } + { + // force_decode=true can decode the block + auto reader = RegionBlockReader(decoding_schema); + Block res_block = createBlockSortByColumnID(decoding_schema); + EXPECT_TRUE(reader.read(res_block, *data_list_read, true)); + res_block.checkNumberOfRows(); + EXPECT_EQ(res_block.rows(), 4); + ASSERT_COLUMN_EQ(res_block.getByName("case_no"), createColumn({"1", "2", "3", "4"})); + } +} +CATCH + + } // namespace DB::tests diff --git a/tests/fullstack-test2/ddl/alter_pk.test b/tests/fullstack-test2/ddl/alter_pk.test index 05e82917700..a2403013157 100644 --- a/tests/fullstack-test2/ddl/alter_pk.test +++ b/tests/fullstack-test2/ddl/alter_pk.test @@ -57,3 +57,56 @@ mysql> alter table test.t drop primary key; │ f │ Nullable(Int32) │ │ │ │ _tidb_rowid │ Int64 │ │ │ └─────────────┴─────────────────┴──────────────┴────────────────────┘ + +# issue 5859, case 1 +mysql> drop table if exists test.t_case; +## create table with `source` is nullable +## insert some data and left `source` to be empty +mysql> create table test.t_case (`case_no` varchar(32) not null,`source` varchar(20) default null,`p` varchar(12) DEFAULT NULL,primary key (`case_no`)); +mysql> insert into test.t_case(case_no) values ("1"), ("2"), ("3"), ("4"); + +## drop the primary key, fill the `source` to be non-empty +## add new primary key with case_no and source +mysql> alter table test.t_case drop primary key; +mysql> update test.t_case set `source` = '' where `source` is NULL; +mysql> alter table test.t_case add primary key (`case_no`, `source`); + +## send the snapshot data to tiflash +mysql> alter table test.t_case set tiflash replica 1; +func> wait_table test t_case +mysql> select case_no,p,source from test.t_case; ++---------+------+--------+ +| case_no | p | source | ++---------+------+--------+ +| 1 | NULL | | +| 2 | NULL | | +| 3 | NULL | | +| 4 | NULL | | ++---------+------+--------+ + + +# issue 5859, case 2 +mysql> drop table if exists test.t_case; +## create table with `case_no` +mysql> create table test.t_case (`case_no` varchar(32) not null,`p` varchar(12) DEFAULT NULL,primary key (`case_no`)); +mysql> insert into test.t_case(case_no) values ("1"), ("2"), ("3"), ("4"); + +mysql> alter table test.t_case add column `source` varchar(20) not null; +## drop the primary key, add new primary key with case_no and source +mysql> alter table test.t_case drop primary key; +mysql> alter table test.t_case add primary key (`case_no`, `source`); + +## send the snapshot data to tiflash +mysql> alter table test.t_case set tiflash replica 1; +func> wait_table test t_case +mysql> select case_no,p,source from test.t_case; ++---------+------+--------+ +| case_no | p | source | ++---------+------+--------+ +| 1 | NULL | | +| 2 | NULL | | +| 3 | NULL | | +| 4 | NULL | | ++---------+------+--------+ + +mysql> drop table if exists test.t_case;