Support cast string as double push down (#2038)

* support cast str as double (#1958) * Fix cast string as double (#1966) * support cast str as double * fix * fix * fix * fix test * refine * add test * update * fix test * support cast str as double (#1958) * fix * fix Co-authored-by: ti-srebot <[email protected]>
pingcap · Jun 3, 2021 · 65f3920 · 65f3920
1 parent 4f9be57
commit 65f3920
Show file tree

Hide file tree

Showing 4 changed files with 59 additions and 23 deletions.
diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp
@@ -515,8 +515,7 @@ std::unordered_map<tipb::ScalarFuncSig, String> scalar_func_map({
     //{tipb::ScalarFuncSig::CastIntAsDuration, "cast"},
     //{tipb::ScalarFuncSig::CastIntAsJson, "cast"},
 
-    {tipb::ScalarFuncSig::CastRealAsInt, "tidb_cast"},
-    {tipb::ScalarFuncSig::CastRealAsReal, "tidb_cast"},
+    {tipb::ScalarFuncSig::CastRealAsInt, "tidb_cast"}, {tipb::ScalarFuncSig::CastRealAsReal, "tidb_cast"},
     {tipb::ScalarFuncSig::CastRealAsString, "tidb_cast"}, {tipb::ScalarFuncSig::CastRealAsDecimal, "tidb_cast"},
     {tipb::ScalarFuncSig::CastRealAsTime, "tidb_cast"},
     //{tipb::ScalarFuncSig::CastRealAsDuration, "cast"},
@@ -529,17 +528,16 @@ std::unordered_map<tipb::ScalarFuncSig, String> scalar_func_map({
     //{tipb::ScalarFuncSig::CastDecimalAsDuration, "cast"},
     //{tipb::ScalarFuncSig::CastDecimalAsJson, "cast"},
 
-    {tipb::ScalarFuncSig::CastStringAsInt, "tidb_cast"},
-    //{tipb::ScalarFuncSig::CastStringAsReal, "cast"},
+    {tipb::ScalarFuncSig::CastStringAsInt, "tidb_cast"}, {tipb::ScalarFuncSig::CastStringAsReal, "tidb_cast"},
     {tipb::ScalarFuncSig::CastStringAsString, "tidb_cast"}, {tipb::ScalarFuncSig::CastStringAsDecimal, "tidb_cast"},
     {tipb::ScalarFuncSig::CastStringAsTime, "tidb_cast"},
     //{tipb::ScalarFuncSig::CastStringAsDuration, "cast"},
     //{tipb::ScalarFuncSig::CastStringAsJson, "cast"},
 
     {tipb::ScalarFuncSig::CastTimeAsInt, "tidb_cast"},
     //{tipb::ScalarFuncSig::CastTimeAsReal, "tidb_cast"},
-    //{tipb::ScalarFuncSig::CastTimeAsString, "tidb_cast"},
-    {tipb::ScalarFuncSig::CastTimeAsDecimal, "tidb_cast"}, {tipb::ScalarFuncSig::CastTimeAsTime, "tidb_cast"},
+    {tipb::ScalarFuncSig::CastTimeAsString, "tidb_cast"}, {tipb::ScalarFuncSig::CastTimeAsDecimal, "tidb_cast"},
+    {tipb::ScalarFuncSig::CastTimeAsTime, "tidb_cast"},
     //{tipb::ScalarFuncSig::CastTimeAsDuration, "cast"},
     //{tipb::ScalarFuncSig::CastTimeAsJson, "cast"},
 

diff --git a/dbms/src/Functions/FunctionsTiDBConversion.cpp b/dbms/src/Functions/FunctionsTiDBConversion.cpp
@@ -4,7 +4,7 @@
 namespace DB
 {
 
-StringRef trim(const StringRef & value)
+String trim(const StringRef & value)
 {
     StringRef ret;
     ret.size = 0;
@@ -22,10 +22,10 @@ StringRef trim(const StringRef & value)
             break;
     }
     if (start >= end)
-        return ret;
+        return ret.toString();
     ret.data = value.data + start;
     ret.size = end - start;
-    return ret;
+    return ret.toString();
 }
 
 void registerFunctionsTiDBConversion(FunctionFactory & factory) { factory.registerFunction<FunctionBuilderTiDBCast>(); }

diff --git a/dbms/src/Functions/FunctionsTiDBConversion.h b/dbms/src/Functions/FunctionsTiDBConversion.h
@@ -48,7 +48,7 @@
 namespace DB
 {
 
-StringRef trim(const StringRef & value);
+String trim(const StringRef & value);
 
 enum CastError
 {
@@ -392,14 +392,14 @@ struct TiDBConvertToInteger
     static T strToInt(const StringRef & value, const Context & context)
     {
         // trim space
-        StringRef trim_string = trim(value);
-        if (trim_string.size == 0)
+        String trim_string = trim(value);
+        if (trim_string.size() == 0)
         {
             if (value.size != 0)
                 context.getDAGContext()->handleTruncateError("cast str as int");
             return static_cast<T>(0);
         }
-        StringRef int_string = getValidIntPrefix(trim_string);
+        StringRef int_string = getValidIntPrefix(StringRef(trim_string));
         if (int_string.size == 0)
         {
             if (value.size != 0)
@@ -588,8 +588,7 @@ struct TiDBConvertToFloat
     }
 
     template <typename T>
-    static std::enable_if_t<std::is_floating_point_v<T> || std::is_integral_v<T>, Float64> toFloat(
-        const T & value)
+    static std::enable_if_t<std::is_floating_point_v<T> || std::is_integral_v<T>, Float64> toFloat(const T & value)
     {
         return static_cast<Float64>(value);
     }
@@ -650,14 +649,24 @@ struct TiDBConvertToFloat
 
     static Float64 strToFloat(const StringRef & value, bool need_truncate, Float64 shift, Float64 max_f, const Context & context)
     {
-        StringRef trim_string = trim(value);
-        StringRef float_string = getValidFloatPrefix(trim_string);
-        if (trim_string.size == 0 && value.size != 0)
+        String trim_string = trim(value);
+        StringRef float_string = getValidFloatPrefix(StringRef(trim_string));
+        if (trim_string.size() == 0 && value.size != 0)
         {
-            context.getDAGContext()->handleTruncateError("cast str as real");
+            context.getDAGContext()->handleTruncateError("Truncated incorrect DOUBLE value");
             return 0.0;
         }
         Float64 f = strtod(float_string.data, nullptr);
+        if (f == std::numeric_limits<Float64>::infinity())
+        {
+            context.getDAGContext()->handleOverflowError("Truncated incorrect DOUBLE value", Errors::Types::Truncated);
+            return std::numeric_limits<Float64>::max();
+        }
+        if (f == -std::numeric_limits<double>::infinity())
+        {
+            context.getDAGContext()->handleOverflowError("Truncated incorrect DOUBLE value", Errors::Types::Truncated);
+            return -std::numeric_limits<Float64>::max();
+        }
         return produceTargetFloat64(f, need_truncate, shift, max_f, context);
     }
 
@@ -711,17 +720,16 @@ struct TiDBConvertToFloat
                     MyDateTime date_time(vec_from[i]);
                     if (type.getFraction() > 0)
                         vec_to[i] = toFloat(date_time.year * 10000000000ULL + date_time.month * 100000000ULL + date_time.day * 100000
-                                + date_time.hour * 1000 + date_time.minute * 100 + date_time.second + date_time.micro_second / 1000000.0);
+                            + date_time.hour * 1000 + date_time.minute * 100 + date_time.second + date_time.micro_second / 1000000.0);
                     else
                         vec_to[i] = toFloat(date_time.year * 10000000000ULL + date_time.month * 100000000ULL + date_time.day * 100000
-                                + date_time.hour * 1000 + date_time.minute * 100 + date_time.second);
+                            + date_time.hour * 1000 + date_time.minute * 100 + date_time.second);
                 }
             }
         }
         else if constexpr (std::is_same_v<FromDataType, DataTypeString>)
         {
             /// cast string as real
-            /// the implementation is quite different from TiDB/TiKV, so cast string as float will not be pushed to TiFlash
             const IColumn * col_from = block.getByPosition(arguments[0]).column.get();
             const ColumnString * col_from_string = checkAndGetColumn<ColumnString>(col_from);
             const ColumnString::Chars_t * chars = &col_from_string->getChars();
@@ -1315,7 +1323,8 @@ struct TiDBConvertToTime
                 {
                     // Cannot cast, fill with NULL
                     (*vec_null_map_to)[i] = 1;
-                    context.getDAGContext()->handleInvalidTime("Invalid time value: '" + toString(vec_from[i]) + "'", Errors::Types::WrongValue);
+                    context.getDAGContext()->handleInvalidTime(
+                        "Invalid time value: '" + toString(vec_from[i]) + "'", Errors::Types::WrongValue);
                 }
             }
         }

diff --git a/tests/fullstack-test/expr/cast_string_as_real.test b/tests/fullstack-test/expr/cast_string_as_real.test
@@ -0,0 +1,29 @@
+mysql> drop table if exists test.t
+mysql> create table test.t(a char(30))
+mysql> alter table test.t set tiflash replica 1
+mysql> insert into test.t values ('1.23'),('123'),('-123.99'),('+123.123-'),(0),(0.0),(NULL),('1.11.00'),('11xx'),('11.xx'),('xx.11'),('1e649'),('-1e649'),('9.9999999999999999'),('9.999999999999999')
+
+func> wait_table test t
+
+mysql> set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; select a, b from (select a, cast(a as double) as b from test.t) t group by a, b order by a
++--------------------+-------------------------+
+| a                  | b                       |
++--------------------+-------------------------+
+| NULL               |                    NULL |
+| +123.123-          |                 123.123 |
+| -123.99            |                 -123.99 |
+| -1e649             | -1.7976931348623157e308 |
+| 0                  |                       0 |
+| 0.0                |                       0 |
+| 1.11.00            |                    1.11 |
+| 1.23               |                    1.23 |
+| 11.xx              |                      11 |
+| 11xx               |                      11 |
+| 123                |                     123 |
+| 1e649              |  1.7976931348623157e308 |
+| 9.999999999999999  |       9.999999999999998 |
+| 9.9999999999999999 |                      10 |
+| xx.11              |                       0 |
++--------------------+-------------------------+
+
+mysql> drop table if exists test.t