From f39985ef78d8b564d45cedc999a070510ba36110 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Tue, 20 Sep 2022 11:29:01 +0800 Subject: [PATCH] fix: the results of tikv and tiflash are different (#5839) (#5870) close pingcap/tiflash#5849, close pingcap/tidb#37258 --- dbms/src/Functions/FunctionsLogical.h | 18 ++++- dbms/src/Functions/tests/gtest_logical.cpp | 7 ++ format-diff.py | 79 ++++++++++++---------- tests/fullstack-test/expr/logical_op.test | 17 +++++ 4 files changed, 86 insertions(+), 35 deletions(-) diff --git a/dbms/src/Functions/FunctionsLogical.h b/dbms/src/Functions/FunctionsLogical.h index fe900ea2c5a..7f1813dd219 100644 --- a/dbms/src/Functions/FunctionsLogical.h +++ b/dbms/src/Functions/FunctionsLogical.h @@ -184,7 +184,23 @@ struct AssociativeOperationImpl { if (Op::isSaturable()) { - UInt8 a = vec[i]; + // cast a: UInt8 -> bool -> UInt8 is a trick + // TiFlash converts columns with non-UInt8 type to UInt8 type and sets value to 0 or 1 + // which correspond to false or true. However, for columns with UInt8 type, + // no more convertion will be executed on them and the values stored + // in them are 'origin' which means that they won't be converted to 0 or 1. + // For example: + // Input column with non-UInt8 type: + // column_values = {-2, 0, 2} + // then, they will be converted to: + // vec = {1, 0, 1} (here vec stores converted values) + // + // Input column with UInt8 type: + // column_values = {1, 0, 2} + // then, the vec will be: + // vec = {1, 0, 2} (error, we only want 0 or 1) + // See issue: https://github.com/pingcap/tidb/issues/37258 + bool a = static_cast(vec[i]); return Op::isSaturatedValue(a) ? a : continuation.apply(i); } else diff --git a/dbms/src/Functions/tests/gtest_logical.cpp b/dbms/src/Functions/tests/gtest_logical.cpp index d03c6c4b491..a536f07aac5 100644 --- a/dbms/src/Functions/tests/gtest_logical.cpp +++ b/dbms/src/Functions/tests/gtest_logical.cpp @@ -59,6 +59,13 @@ try func_name, createColumn>({0, 1, 0, 1, {}, 0}), createColumn>({0, 1, 1, 0, 1, {}}))); + // issue 5849 + ASSERT_COLUMN_EQ( + createColumn({0, 1, 1, 1}), + executeFunction( + func_name, + createColumn({0, 123, 0, 41}), + createColumn({0, 11, 221, 0}))); // column, const ASSERT_COLUMN_EQ( createColumn>({1, 1}), diff --git a/format-diff.py b/format-diff.py index 71ec6876cf5..f81814e7a69 100755 --- a/format-diff.py +++ b/format-diff.py @@ -1,44 +1,49 @@ -#!/usr/bin/python3 + +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import argparse import os import subprocess from os import path - import json - - def run_cmd(cmd, show_cmd=False): res = os.popen(cmd).readlines() if show_cmd: print("RUN CMD: {}".format(cmd)) return res - - def main(): default_suffix = ['.cpp', '.h', '.cc', '.hpp'] parser = argparse.ArgumentParser(description='TiFlash Code Format', formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('--repo_path', help='path of tics repository', + parser.add_argument('--repo_path', help='path of tiflash repository', default=os.path.dirname(os.path.abspath(__file__))) parser.add_argument('--suffix', help='suffix of files to format, split by space', default=' '.join(default_suffix)) - parser.add_argument('--ignore_suffix', help='ignore files with suffix, split by space') - parser.add_argument('--diff_from', help='commit hash/branch to check git diff', default='HEAD') - parser.add_argument('--check_formatted', help='exit -1 if NOT formatted', action='store_true') - parser.add_argument('--dump_diff_files_to', help='dump diff file names to specific path', default=None) - + parser.add_argument('--ignore_suffix', + help='ignore files with suffix, split by space') + parser.add_argument( + '--diff_from', help='commit hash/branch to check git diff', default='HEAD') + parser.add_argument('--check_formatted', + help='exit -1 if NOT formatted', action='store_true') + parser.add_argument('--dump_diff_files_to', + help='dump diff file names to specific path', default=None) args = parser.parse_args() default_suffix = args.suffix.strip().split(' ') if args.suffix else [] - ignore_suffix = args.ignore_suffix.strip().split(' ') if args.ignore_suffix else [] - tics_repo_path = args.repo_path - if not os.path.isabs(tics_repo_path): + ignore_suffix = args.ignore_suffix.strip().split( + ' ') if args.ignore_suffix else [] + tiflash_repo_path = args.repo_path + if not os.path.isabs(tiflash_repo_path): raise Exception("path of repo should be absolute") - assert tics_repo_path[-1] != '/' - - os.chdir(tics_repo_path) + assert tiflash_repo_path[-1] != '/' + os.chdir(tiflash_repo_path) files_to_check = run_cmd('git diff HEAD --name-only') if args.diff_from == 'HEAD' else run_cmd( 'git diff {} --name-only'.format(args.diff_from)) - files_to_check = [os.path.join(tics_repo_path, s.strip()) for s in files_to_check] + files_to_check = [os.path.join(tiflash_repo_path, s.strip()) + for s in files_to_check] files_to_format = [] for f in files_to_check: if not any([f.endswith(e) for e in default_suffix]): @@ -52,33 +57,39 @@ def main(): print('file {} can not be formatted'.format(file_path)) continue files_to_format.append(file_path) - if args.dump_diff_files_to: - da = [e[len(tics_repo_path):] for e in files_to_format] - json.dump({'files': da, 'repo': tics_repo_path}, open(args.dump_diff_files_to, 'w')) - print('dump {} modified files info to {}'.format(len(da), args.dump_diff_files_to)) - + da = [e[len(tiflash_repo_path):] for e in files_to_format] + json.dump({'files': da, 'repo': tiflash_repo_path}, + open(args.dump_diff_files_to, 'w')) + print('dump {} modified files info to {}'.format( + len(da), args.dump_diff_files_to)) if files_to_format: print('Files to format:\n {}'.format('\n '.join(files_to_format))) + for file in files_to_format: + cmd = 'clang-format -i {}'.format(file) + if subprocess.Popen(cmd, shell=True, cwd=tiflash_repo_path).wait(): + exit(-1) if args.check_formatted: - cmd = 'clang-format -i {}'.format(' '.join(files_to_format)) - if subprocess.Popen(cmd, shell=True, cwd=tics_repo_path).wait(): - exit(-1) diff_res = run_cmd('git diff --name-only') - if diff_res: + files_not_in_contrib = [f for f in diff_res if not f.startswith('contrib')] + files_contrib = [f for f in diff_res if f.startswith('contrib')] + if files_not_in_contrib: + print('') print('Error: found files NOT formatted') - print(''.join(diff_res)) + print(''.join(files_not_in_contrib)) exit(-1) + elif files_contrib: + print('') + print('Warn: found contrib changed') + print(''.join(files_contrib)) + print('') + print(''.join(run_cmd('git status'))) else: print("Format check passed") else: - cmd = 'clang-format -i {}'.format(' '.join(files_to_format)) - subprocess.Popen(cmd, shell=True, cwd=tics_repo_path).wait() print("Finish code format") else: print('No file to format') - - if __name__ == '__main__': - main() + main() \ No newline at end of file diff --git a/tests/fullstack-test/expr/logical_op.test b/tests/fullstack-test/expr/logical_op.test index b64b4ff35e3..438f881d2cb 100644 --- a/tests/fullstack-test/expr/logical_op.test +++ b/tests/fullstack-test/expr/logical_op.test @@ -1,19 +1,24 @@ mysql> drop table if exists test.t1; mysql> drop table if exists test.t2; mysql> drop table if exists test.t3; +mysql> drop table if exists test.t4; mysql> create table test.t1(a char(20),b double); mysql> create table test.t2(a char(20)); mysql> create table test.t3(a int); +mysql> create table test.t4(a tinyint(45) unsigned NOT NULL, b bigint(20) NOT NULL); mysql> insert into test.t1 values(1,null),('j',0),(1,12.991),(0,0),(0,0),('they',1.009),('can',-99),(0,12.991),(1,-9.183),(null,1); mysql> insert into test.t2 values(0),(0),(0),(0),(0),(0),(1),('with'),('see'),(null); mysql> insert into test.t3 values(0),(1); +mysql> insert into test.t4 values(65, 1),(66, 2), (67, 3), (0, 0); mysql> alter table test.t1 set tiflash replica 1; mysql> alter table test.t2 set tiflash replica 1; mysql> alter table test.t3 set tiflash replica 1; +mysql> alter table test.t4 set tiflash replica 1; func> wait_table test t1 func> wait_table test t2 func> wait_table test t3 +func> wait_table test t4 mysql> set session tidb_isolation_read_engines='tiflash'; select count(*) from test.t1 where (b between null and 100) is null; +----------+ @@ -67,6 +72,18 @@ mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select count(*) from test.t3 group by a having ifnull(null,count(*)) and min(null); # empty +# issue 5849 +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select a or b from test.t4; ++--------+ +| a or b | ++--------+ +| 1 | +| 1 | +| 1 | +| 0 | ++--------+ + #mysql> drop table if exists test.t1; #mysql> drop table if exists test.t2; #mysql> drop table if exists test.t3; +#mysql> drop table if exists test.t4;