Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix cast decimal as string #1256

Merged
merged 27 commits into from
Dec 11, 2020
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
391 changes: 356 additions & 35 deletions dbms/src/Common/MyTime.cpp

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dbms/src/Common/MyTime.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ struct MyDate : public MyTimeBase
String toString() const { return dateFormat("%Y-%m-%d"); }
};

Field parseMyDateTime(const String & str);
Field parseMyDateTime(const String & str, int8_t fsp = 6);

void convertTimeZone(UInt64 from_time, UInt64 & to_time, const DateLUTImpl & time_zone_from, const DateLUTImpl & time_zone_to);

Expand Down
3 changes: 2 additions & 1 deletion dbms/src/Common/TiFlashException.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,8 @@ namespace DB
E(Internal, "MPP internal error.", \
"Please contact with developer, \n" \
"better providing information about your cluster(log, topology information etc.).", \
"");)
"");) \
C(Types, E(Truncated, "Data is truncated during conversion.", "", ""); E(WrongValue, "Input value is in wrong format", "", "");)

/// TiFlashError is core struct of standard error,
/// which contains all information about an error except message.
Expand Down
3 changes: 3 additions & 0 deletions dbms/src/Common/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,6 @@ target_link_libraries (persisted_container clickhouse_common_io)

add_executable(decimal_test_decimal_type gtest_decimal_type.cpp)
target_link_libraries(decimal_test_decimal_type clickhouse_common_io gtest_main)

add_executable(mytime_test gtest_mytime.cpp)
target_link_libraries(mytime_test clickhouse_common_io clickhouse_functions gtest_main)
101 changes: 101 additions & 0 deletions dbms/src/Common/tests/gtest_mytime.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#include <Common/Exception.h>
#include <Common/MyTime.h>
#include <DataTypes/DataTypeMyDateTime.h>
#include <gtest/gtest.h>

#include <iostream>
#include <string>
#include <tuple>
#include <vector>

namespace DB
{
namespace tests
{

class TestMyTime : public testing::Test
{
protected:
virtual void SetUp() override {}
virtual void TearDown() override {}

public:
static void checkParseMyDateTime(const std::string & str, const std::string & expected, const DataTypeMyDateTime & type)
{
try
{
UInt64 res = parseMyDateTime(str, type.getFraction()).template safeGet<UInt64>();
MyDateTime datetime(res);
std::string actual = datetime.toString(type.getFraction());
ASSERT_EQ(actual, expected) << "Original datetime string: " << str;
}
catch (...)
{
std::cerr << "Error occurs when parsing: \"" << str << "\"" << std::endl;
throw;
}
}
};

TEST_F(TestMyTime, ParseMyDateTime)
try
{
std::vector<std::tuple<std::string, std::string>> cases_with_fsp{
leiysky marked this conversation as resolved.
Show resolved Hide resolved
{"2020-12-10 11:11:11.123456", "2020-12-10 11:11:11.123456"}, // YYYY-MM-DD HH:MM:SS.mmmmmm
{"00-00-00 00:00:00.123", "2000-00-00 00:00:00.123000"},
{"1701020304.1", "2017-01-02 03:04:01.000000"},
{"1701020302.11", "2017-01-02 03:02:11.000000"},
{"170102037.11", "2017-01-02 03:07:11.000000"},
{"2018.01.01", "2018-01-01 00:00:00.000000"},
{"2020.10.10 10.10.10", "2020-10-10 10:10:10.000000"},
{"2020-10-10 10-10.10", "2020-10-10 10:10:10.000000"},
{"2020-10-10 10.10", "2020-10-10 10:10:00.000000"},
{"2018.01.01", "2018-01-01 00:00:00.000000"},
};
std::vector<std::tuple<std::string, std::string>> cases_without_fsp{
{"2012-12-31 11:30:45", "2012-12-31 11:30:45"},
{"0000-00-00 00:00:00", "0000-00-00 00:00:00"},
{"0001-01-01 00:00:00", "0001-01-01 00:00:00"},
{"00-12-31 11:30:45", "2000-12-31 11:30:45"},
{"12-12-31 11:30:45", "2012-12-31 11:30:45"},
{"2012-12-31", "2012-12-31 00:00:00"},
{"20121231", "2012-12-31 00:00:00"},
{"121231", "2012-12-31 00:00:00"},
{"2012^12^31 11+30+45", "2012-12-31 11:30:45"},
{"2012^12^31T11+30+45", "2012-12-31 11:30:45"},
{"2012-2-1 11:30:45", "2012-02-01 11:30:45"},
{"12-2-1 11:30:45", "2012-02-01 11:30:45"},
{"20121231113045", "2012-12-31 11:30:45"},
{"121231113045", "2012-12-31 11:30:45"},
{"2012-02-29", "2012-02-29 00:00:00"},
{"00-00-00", "0000-00-00 00:00:00"},
{"11111111111", "2011-11-11 11:11:01"},
{"1701020301.", "2017-01-02 03:01:00"},
{"170102036", "2017-01-02 03:06:00"},
{"170102039.", "2017-01-02 03:09:00"},
{"2018-01-01 18", "2018-01-01 18:00:00"},
{"18-01-01 18", "2018-01-01 18:00:00"},
{"2018.01.01 00:00:00", "2018-01-01 00:00:00"},
{"2018/01/01-00:00:00", "2018-01-01 00:00:00"},
{"4710072", "2047-10-07 02:00:00"},
};
DataTypeMyDateTime type_with_fraction(6);
DataTypeMyDateTime type_without_fraction(0);
for (auto & [str, expected] : cases_with_fsp)
{
checkParseMyDateTime(str, expected, type_with_fraction.getFraction());
}
for (auto & [str, expected] : cases_without_fsp)
{
checkParseMyDateTime(str, expected, type_without_fraction.getFraction());
}
}
catch (Exception & e)
{
std::cerr << e.displayText() << std::endl;
GTEST_FAIL();
}

} // namespace tests

} // namespace DB
11 changes: 4 additions & 7 deletions dbms/src/DataTypes/DataTypeMyDateTime.cpp
Original file line number Diff line number Diff line change
@@ -1,16 +1,13 @@
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>

#include <Columns/ColumnsNumber.h>
#include <Common/typeid_cast.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeMyDateTime.h>
#include <common/DateLUT.h>

#include <IO/Operators.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromString.h>

#include <IO/WriteHelpers.h>
#include <Parsers/ASTLiteral.h>
#include <common/DateLUT.h>


namespace DB
Expand Down Expand Up @@ -102,7 +99,7 @@ bool DataTypeMyDateTime::equals(const IDataType & rhs) const
{
/// DateTime with different timezones are equal, because:
/// "all types with different time zones are equivalent and may be used interchangingly."
return typeid(rhs) == typeid(*this);
return typeid(rhs) == typeid(*this) && getFraction() == dynamic_cast<const DataTypeMyDateTime *>(&rhs)->getFraction();
}


Expand Down
34 changes: 34 additions & 0 deletions dbms/src/Debug/dbgFuncCoprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,22 @@ std::unordered_map<String, tipb::ScalarFuncSig> func_name_to_sig({
{"cast_decimal_decimal", tipb::ScalarFuncSig::CastDecimalAsDecimal},
{"cast_time_decimal", tipb::ScalarFuncSig::CastTimeAsDecimal},
{"cast_string_decimal", tipb::ScalarFuncSig::CastStringAsDecimal},
{"cast_int_string", tipb::ScalarFuncSig::CastIntAsString},
{"cast_real_string", tipb::ScalarFuncSig::CastRealAsString},
{"cast_decimal_string", tipb::ScalarFuncSig::CastDecimalAsString},
{"cast_time_string", tipb::ScalarFuncSig::CastTimeAsString},
{"cast_string_string", tipb::ScalarFuncSig::CastStringAsString},
{"cast_int_date", tipb::ScalarFuncSig::CastIntAsTime},
{"cast_real_date", tipb::ScalarFuncSig::CastRealAsTime},
{"cast_decimal_date", tipb::ScalarFuncSig::CastDecimalAsTime},
{"cast_time_date", tipb::ScalarFuncSig::CastTimeAsTime},
{"cast_string_date", tipb::ScalarFuncSig::CastStringAsTime},
{"cast_int_datetime", tipb::ScalarFuncSig::CastIntAsTime},
{"cast_real_datetime", tipb::ScalarFuncSig::CastRealAsTime},
{"cast_decimal_datetime", tipb::ScalarFuncSig::CastDecimalAsTime},
{"cast_time_datetime", tipb::ScalarFuncSig::CastTimeAsTime},
{"cast_string_datetime", tipb::ScalarFuncSig::CastStringAsTime},

});

void compileExpr(const DAGSchema & input, ASTPtr ast, tipb::Expr * expr, std::unordered_set<String> & referred_columns,
Expand Down Expand Up @@ -399,6 +415,24 @@ void compileExpr(const DAGSchema & input, ASTPtr ast, tipb::Expr * expr, std::un
expr->set_sig(tipb::ScalarFuncSig::DateFormatSig);
expr->mutable_field_type()->set_tp(TiDB::TypeString);
break;
case tipb::ScalarFuncSig::CastIntAsTime:
case tipb::ScalarFuncSig::CastRealAsTime:
case tipb::ScalarFuncSig::CastTimeAsTime:
case tipb::ScalarFuncSig::CastDecimalAsTime:
case tipb::ScalarFuncSig::CastStringAsTime:
{
expr->set_sig(it_sig->second);
auto * ft = expr->mutable_field_type();
if (it_sig->first.find("datetime"))
{
ft->set_tp(TiDB::TypeDatetime);
}
else
{
ft->set_tp(TiDB::TypeDate);
}
break;
}
default:
{
expr->set_sig(it_sig->second);
Expand Down
1 change: 1 addition & 0 deletions dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,7 @@ String DAGExpressionAnalyzer::convertToUInt8(ExpressionActionsPtr & actions, con
{
/// use tidb_cast to make it compatible with TiDB
tipb::FieldType field_type;
// TODO: Use TypeDouble as return type, to be compatible with TiDB
field_type.set_tp(TiDB::TypeLongLong);
tipb::Expr type_expr;
constructStringLiteralTiExpr(type_expr, "Nullable(Int64)");
Expand Down
9 changes: 9 additions & 0 deletions dbms/src/Functions/FunctionsConversion.h
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,15 @@ struct FormatImpl<DataTypeMyDateTime>
}
};

template <typename DecimalType>
struct FormatImpl<DataTypeDecimal<DecimalType>>
{
static void execute(const typename DataTypeDecimal<DecimalType>::FieldType v, WriteBuffer & wb, const DataTypeDecimal<DecimalType> * tp, const DateLUTImpl *)
{
writeText(v, tp->getScale(), wb);
}
};

template <typename FieldType>
struct FormatImpl<DataTypeEnum<FieldType>>
{
Expand Down
48 changes: 43 additions & 5 deletions dbms/src/Functions/FunctionsTiDBConversion.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ struct TiDBConvertToString
auto col_to = ColumnString::create();
ColumnString::Chars_t & data_to = col_to->getChars();
ColumnString::Offsets & offsets_to = col_to->getOffsets();
WriteBufferFromVector<ColumnString::Chars_t> write_buffer(data_to);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@LittleFall This part is related to #1998


if constexpr (std::is_same_v<FromDataType, DataTypeString>)
{
Expand All @@ -113,6 +112,8 @@ struct TiDBConvertToString

offsets_to.resize(size);

WriteBufferFromVector<ColumnString::Chars_t> write_buffer(data_to);

size_t current_offset = 0;
for (size_t i = 0; i < size; i++)
{
Expand Down Expand Up @@ -148,6 +149,8 @@ struct TiDBConvertToString
container_per_element.resize(decimal_max_prec);
offsets_to.resize(size);

WriteBufferFromVector<ColumnString::Chars_t> write_buffer(data_to);

for (size_t i = 0; i < size; ++i)
{
WriteBufferFromVector<ColumnString::Chars_t> element_write_buffer(container_per_element);
Expand Down Expand Up @@ -189,6 +192,8 @@ struct TiDBConvertToString
}
offsets_to.resize(size);

WriteBufferFromVector<ColumnString::Chars_t> write_buffer(data_to);

for (size_t i = 0; i < size; ++i)
{
WriteBufferFromVector<ColumnString::Chars_t> element_write_buffer(container_per_element);
Expand Down Expand Up @@ -1172,6 +1177,13 @@ struct TiDBConvertToTime
const auto & col_with_type_and_name = block.getByPosition(arguments[0]);
const auto & type = static_cast<const FromDataType &>(*col_with_type_and_name.type);

int to_fsp [[maybe_unused]] = 0;
if constexpr (std::is_same_v<ToDataType, DataTypeMyDateTime>)
{
const auto * tp = dynamic_cast<const DataTypeMyDateTime *>(removeNullable(block.getByPosition(result).type).get());
to_fsp = tp->getFraction();
}

if constexpr (return_nullable)
{
col_null_map_to = ColumnUInt8::create(size, 0);
Expand All @@ -1195,7 +1207,7 @@ struct TiDBConvertToTime
String string_value = string_ref.toString();
try
{
Field packed_uint_value = parseMyDateTime(string_value);
Field packed_uint_value = parseMyDateTime(string_value, to_fsp);
UInt64 packed_uint = packed_uint_value.template safeGet<UInt64>();
MyDateTime datetime(packed_uint);
if constexpr (std::is_same_v<ToDataType, DataTypeMyDate>)
Expand All @@ -1219,6 +1231,7 @@ struct TiDBConvertToTime
}
else if constexpr (std::is_same_v<FromDataType, DataTypeMyDate> || std::is_same_v<FromDataType, DataTypeMyDateTime>)
{
// cast time as time
const auto * col_from = checkAndGetColumn<ColumnUInt64>(block.getByPosition(arguments[0]).column.get());
const ColumnUInt64::Container & vec_from = col_from->getData();

Expand All @@ -1233,7 +1246,32 @@ struct TiDBConvertToTime
}
else
{
vec_to[i] = datetime.toPackedUInt();
int from_fsp = 0;
if constexpr (std::is_same_v<FromDataType, DataTypeMyDateTime>)
{
auto & from_type = static_cast<const DataTypeMyDateTime &>(type);
from_fsp = from_type.getFraction();
}
UInt32 micro_second = datetime.micro_second;
UInt64 packed_uint = vec_from[i];
if (to_fsp < from_fsp)
{
micro_second = micro_second / std::pow(10, 6 - to_fsp - 1);
micro_second = (micro_second + 5) / 10;
// Overflow
if (micro_second >= std::pow(10, to_fsp))
{
datetime.micro_second = 0;
packed_uint = datetime.toPackedUInt();
packed_uint = AddSecondsImpl::execute(packed_uint, 1, DateLUT::instance());
leiysky marked this conversation as resolved.
Show resolved Hide resolved
}
else
{
datetime.micro_second = micro_second * std::pow(10, 6 - to_fsp);
packed_uint = datetime.toPackedUInt();
}
}
vec_to[i] = packed_uint;
}
}
}
Expand Down Expand Up @@ -1292,7 +1330,7 @@ struct TiDBConvertToTime
{
try
{
Field packed_uint_value = parseMyDateTime(value_str);
Field packed_uint_value = parseMyDateTime(value_str, to_fsp);
UInt64 packed_uint = packed_uint_value.template safeGet<UInt64>();
MyDateTime datetime(packed_uint);
if constexpr (std::is_same_v<ToDataType, DataTypeMyDate>)
Expand Down Expand Up @@ -1325,7 +1363,7 @@ struct TiDBConvertToTime
String value_str = vec_from[i].toString(type.getScale());
try
{
Field value = parseMyDateTime(value_str);
Field value = parseMyDateTime(value_str, to_fsp);
MyDateTime datetime(value.template safeGet<UInt64>());
if constexpr (std::is_same_v<ToDataType, DataTypeMyDate>)
{
Expand Down
15 changes: 15 additions & 0 deletions errors.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ error = '''
Encryption internal error.
'''

["FLASH:MPP:Internal"]
error = '''
MPP internal error.
'''

["FLASH:PageStorage:FileSizeNotMatch"]
error = '''
Some files' size don't match their metadata.
Expand All @@ -83,3 +88,13 @@ error = '''
Schema synchronize error.
'''

["FLASH:Types:Truncated"]
error = '''
Data is truncated during conversion.
'''

["FLASH:Types:WrongValue"]
error = '''
Input value is in wrong format
'''

Loading