Skip to content

Commit

Permalink
Schema: replace invalid default value to zero value under not strict …
Browse files Browse the repository at this point in the history
…sql mode (#8850) (#8861)

close #8803
  • Loading branch information
ti-chi-bot authored Mar 22, 2024
1 parent a6fd9f3 commit a5bdb67
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 42 deletions.
21 changes: 8 additions & 13 deletions dbms/src/IO/ReadHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,8 @@
#include <common/StringRef.h>
#include <double-conversion/double-conversion.h>

#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstring>
#include <iterator>
#include <limits>
#include <type_traits>

#define DEFAULT_MAX_STRING_SIZE 0x00FFFFFFULL
Expand Down Expand Up @@ -353,7 +349,6 @@ inline void readDecimalText(Decimal<T> & x, ReadBuffer & buf, PrecType precision
value = -value;
x.value = static_cast<T>(value);
checkDecimalOverflow(x, precision);
return;
}

template <typename T, typename ReturnType = void>
Expand Down Expand Up @@ -538,7 +533,7 @@ void readStringUntilEOF(String & s, ReadBuffer & buf);
* - if string is in quotes, then it will be read until closing quote,
* but sequences of two consecutive quotes are parsed as single quote inside string;
*/
void readCSVString(String & s, ReadBuffer & buf, const char delimiter = ',');
void readCSVString(String & s, ReadBuffer & buf, char delimiter = ',');


/// Read and append result to array of characters.
Expand All @@ -561,7 +556,7 @@ template <typename Vector>
void readStringUntilEOFInto(Vector & s, ReadBuffer & buf);

template <typename Vector>
void readCSVStringInto(Vector & s, ReadBuffer & buf, const char delimiter = ',');
void readCSVStringInto(Vector & s, ReadBuffer & buf, char delimiter = ',');

/// ReturnType is either bool or void. If bool, the function will return false instead of throwing an exception.
template <typename Vector, typename ReturnType = void>
Expand All @@ -577,14 +572,14 @@ bool tryReadJSONStringInto(Vector & s, ReadBuffer & buf)
struct NullSink
{
void append(const char *, size_t){};
void push_back(char){};
void push_back(char){}; // NOLINT
};

void parseUUID(const UInt8 * src36, UInt8 * dst16);
void parseUUID(const UInt8 * src36, std::reverse_iterator<UInt8 *> dst16);

template <typename IteratorSrc, typename IteratorDst>
void formatHex(IteratorSrc src, IteratorDst dst, const size_t num_bytes);
void formatHex(IteratorSrc src, IteratorDst dst, size_t num_bytes);

template <typename ReturnType = void>
ReturnType readMyDateTextImpl(UInt64 & date, ReadBuffer & buf)
Expand Down Expand Up @@ -616,13 +611,13 @@ ReturnType readMyDateTextImpl(UInt64 & date, ReadBuffer & buf)
buf.position() += 1;

date = MyDate(year, month, day).toPackedUInt();
return ReturnType(true);
return static_cast<ReturnType>(true);
}

if constexpr (throw_exception)
throw Exception("wrong date format.", ErrorCodes::CANNOT_PARSE_DATE);
else
return ReturnType(false);
return static_cast<ReturnType>(false);
}

inline void readMyDateText(UInt64 & date, ReadBuffer & buf)
Expand Down Expand Up @@ -757,7 +752,7 @@ ReturnType readMyDateTimeTextImpl(UInt64 & packed, int fsp, ReadBuffer & buf)
micro_second *= 10;

packed = MyDateTime(year, month, day, hour, minute, second, micro_second).toPackedUInt();
return ReturnType(true);
return static_cast<ReturnType>(true);
}
}
else if (s + 10 <= buf.buffer().end())
Expand All @@ -769,7 +764,7 @@ ReturnType readMyDateTimeTextImpl(UInt64 & packed, int fsp, ReadBuffer & buf)
if constexpr (throw_exception)
throw Exception("wrong datetime format.", ErrorCodes::CANNOT_PARSE_DATETIME);
else
return ReturnType(false);
return static_cast<ReturnType>(false);
}

inline void readMyDateTimeText(UInt64 & packed, int fsp, ReadBuffer & buf)
Expand Down
77 changes: 48 additions & 29 deletions dbms/src/Storages/Transaction/TiDB.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,16 @@ ColumnInfo::ColumnInfo(Poco::JSON::Object::Ptr json)
deserialize(json);
}

#define TRY_CATCH_DEFAULT_VALUE_TO_FIELD(try_block) \
try \
{ \
try_block \
} \
catch (...) \
{ \
return DB::GenDefaultField(*this); \
}


Field ColumnInfo::defaultValueToField() const
{
Expand All @@ -115,27 +125,27 @@ Field ColumnInfo::defaultValueToField() const
switch (tp)
{
// Integer Type.
// In c++, cast a unsigned integer to signed integer will not change the value.
// like 9223372036854775808 which is larger than the maximum value of Int64,
// static_cast<UInt64>(static_cast<Int64>(9223372036854775808)) == 9223372036854775808
// so we don't need consider unsigned here.
case TypeTiny:
case TypeShort:
case TypeLong:
case TypeLongLong:
case TypeInt24:
{
// In c++, cast a unsigned integer to signed integer will not change the value.
// like 9223372036854775808 which is larger than the maximum value of Int64,
// static_cast<UInt64>(static_cast<Int64>(9223372036854775808)) == 9223372036854775808
// so we don't need consider unsigned here.
try
{
return value.convert<Int64>();
}
catch (...)
{
// due to https://github.com/pingcap/tidb/issues/34881
// we do this to avoid exception in older version of TiDB.
return static_cast<Int64>(std::llround(value.convert<double>()));
}
}
TRY_CATCH_DEFAULT_VALUE_TO_FIELD({
try
{
return value.convert<Int64>();
}
catch (...)
{
// due to https://github.com/pingcap/tidb/issues/34881
// we do this to avoid exception in older version of TiDB.
return static_cast<Int64>(std::llround(value.convert<double>()));
}
});
case TypeBit:
{
// TODO: We shall use something like `orig_default_bit`, which will never change once created,
Expand All @@ -152,16 +162,16 @@ Field ColumnInfo::defaultValueToField() const
return DB::GenDefaultField(*this);
return Field();
}
return getBitValue(bit_value.convert<String>());
TRY_CATCH_DEFAULT_VALUE_TO_FIELD({ return getBitValue(bit_value.convert<String>()); });
}
// Floating type.
case TypeFloat:
case TypeDouble:
return value.convert<double>();
TRY_CATCH_DEFAULT_VALUE_TO_FIELD({ return value.convert<double>(); });
case TypeDate:
case TypeDatetime:
case TypeTimestamp:
return DB::parseMyDateTime(value.convert<String>());
TRY_CATCH_DEFAULT_VALUE_TO_FIELD({ return DB::parseMyDateTime(value.convert<String>()); });
case TypeVarchar:
case TypeTinyBlob:
case TypeMediumBlob:
Expand All @@ -187,24 +197,32 @@ Field ColumnInfo::defaultValueToField() const
// JSON can't have a default value
return genJsonNull();
case TypeEnum:
return getEnumIndex(value.convert<String>());
TRY_CATCH_DEFAULT_VALUE_TO_FIELD({ return getEnumIndex(value.convert<String>()); });
case TypeNull:
return Field();
case TypeDecimal:
case TypeNewDecimal:
return getDecimalValue(value.convert<String>());
TRY_CATCH_DEFAULT_VALUE_TO_FIELD({
auto text = value.convert<String>();
if (text.empty())
return DB::GenDefaultField(*this);
return getDecimalValue(text);
});
case TypeTime:
return getTimeValue(value.convert<String>());
TRY_CATCH_DEFAULT_VALUE_TO_FIELD({ return getTimeValue(value.convert<String>()); });
case TypeYear:
// Never throw exception here, do not use TRY_CATCH_DEFAULT_VALUE_TO_FIELD
return getYearValue(value.convert<String>());
case TypeSet:
return getSetValue(value.convert<String>());
TRY_CATCH_DEFAULT_VALUE_TO_FIELD({ return getSetValue(value.convert<String>()); });
default:
throw Exception("Have not processed type: " + std::to_string(tp));
}
return Field();
}

#undef TRY_CATCH_DEFAULT_VALUE_TO_FIELD

DB::Field ColumnInfo::getDecimalValue(const String & decimal_text) const
{
DB::ReadBufferFromString buffer(decimal_text);
Expand Down Expand Up @@ -243,7 +261,7 @@ Int64 ColumnInfo::getEnumIndex(const String & enum_id_or_text) const
{
const auto * collator = ITiDBCollator::getCollator(collate.isEmpty() ? "binary" : collate.convert<String>());
if (!collator)
// todo if new collation is enabled, should use "utf8mb4_bin"
// TODO: if new collation is enabled, should use "utf8mb4_bin"
collator = ITiDBCollator::getCollator("binary");
for (const auto & elem : elems)
{
Expand All @@ -252,15 +270,14 @@ Int64 ColumnInfo::getEnumIndex(const String & enum_id_or_text) const
return elem.second;
}
}
int num = std::stoi(enum_id_or_text);
return num;
return std::stoi(enum_id_or_text);
}

UInt64 ColumnInfo::getSetValue(const String & set_str) const
{
const auto * collator = ITiDBCollator::getCollator(collate.isEmpty() ? "binary" : collate.convert<String>());
if (!collator)
// todo if new collation is enabled, should use "utf8mb4_bin"
// TODO: if new collation is enabled, should use "utf8mb4_bin"
collator = ITiDBCollator::getCollator("binary");
std::string sort_key_container;
Poco::StringTokenizer string_tokens(set_str, ",");
Expand All @@ -283,7 +300,7 @@ UInt64 ColumnInfo::getSetValue(const String & set_str) const
if (marked.empty())
return value;

throw DB::Exception(std::string(__PRETTY_FUNCTION__) + ": can't parse set type value.");
return 0;
}

Int64 ColumnInfo::getTimeValue(const String & time_str)
Expand All @@ -308,7 +325,9 @@ Int64 ColumnInfo::getTimeValue(const String & time_str)

Int64 ColumnInfo::getYearValue(const String & val)
{
// do not check validation of the val because TiDB will do it
// make sure the year is non-negative integer
if (val.empty() || !std::all_of(val.begin(), val.end(), ::isdigit))
return 0;
Int64 year = std::stol(val);
if (0 < year && year < 70)
return 2000 + year;
Expand Down
51 changes: 51 additions & 0 deletions tests/fullstack-test2/variables/set_sql_mode_ansi.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Copyright 2024 PingCAP, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# use an empty string as the default value for decimal column
mysql> set sql_mode='ansi'; CREATE TABLE test.t (id int, i int default '1.1', f float default 'a', d decimal(10,2) default 'b', y year default 'f');

mysql> set sql_mode='ansi'; insert into test.t (id) values (1),(2),(3),(4);
mysql> alter table test.t set tiflash replica 1;

func> wait_table test t

mysql> set sql_mode='ansi'; insert into test.t (id) values (5),(6),(7),(8);
mysql> set sql_mode='ansi'; set session tidb_isolation_read_engines='tikv'; select * from test.t;
+------+------+------+------+------+
| id | i | f | d | y |
+------+------+------+------+------+
| 1 | 1 | 0 | 0.00 | 0000 |
| 2 | 1 | 0 | 0.00 | 0000 |
| 3 | 1 | 0 | 0.00 | 0000 |
| 4 | 1 | 0 | 0.00 | 0000 |
| 5 | 1 | 0 | 0.00 | 0000 |
| 6 | 1 | 0 | 0.00 | 0000 |
| 7 | 1 | 0 | 0.00 | 0000 |
| 8 | 1 | 0 | 0.00 | 0000 |
+------+------+------+------+------+
mysql> set sql_mode='ansi'; set session tidb_isolation_read_engines='tiflash'; select * from test.t;
+------+------+------+------+------+
| id | i | f | d | y |
+------+------+------+------+------+
| 1 | 1 | 0 | 0.00 | 0000 |
| 2 | 1 | 0 | 0.00 | 0000 |
| 3 | 1 | 0 | 0.00 | 0000 |
| 4 | 1 | 0 | 0.00 | 0000 |
| 5 | 1 | 0 | 0.00 | 0000 |
| 6 | 1 | 0 | 0.00 | 0000 |
| 7 | 1 | 0 | 0.00 | 0000 |
| 8 | 1 | 0 | 0.00 | 0000 |
+------+------+------+------+------+

mysql> drop table test.t;

0 comments on commit a5bdb67

Please sign in to comment.