From f9e6e55b64e022e632a23efd37fbc0e152ed6516 Mon Sep 17 00:00:00 2001 From: Mark Raasveldt Date: Wed, 3 Jul 2024 18:17:07 +0200 Subject: [PATCH] Expand reduce script and add simplification of case statements with multiple WHEN/THEN branches --- scripts/reduce_sql.py | 28 +++++++++++++++++++++++++--- src/statement_simplifier.cpp | 1 + 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/scripts/reduce_sql.py b/scripts/reduce_sql.py index 245a89b..d430d41 100644 --- a/scripts/reduce_sql.py +++ b/scripts/reduce_sql.py @@ -18,6 +18,7 @@ SELECT * FROM reduce_sql_statement('${QUERY}'); ''' +verbose = False class MultiStatementManager: delimiter = ';' @@ -33,7 +34,8 @@ def __init__(self, multi_statement): self.statements.append(stmt.strip() + ";") def is_multi_statement(sql_statement): - if len(sql_statement.split(';')) > 1: + splits = [x for x in sql_statement.split(';') if len(x.strip()) > 0] + if len(splits) > 1: return True return False @@ -45,10 +47,12 @@ def sanitize_error(err): err = re.sub(r'Error: near line \d+: ', '', err) err = err.replace(os.getcwd() + '/', '') err = err.replace(os.getcwd(), '') + err = re.sub(r'LINE \d+:.*\n', '', err) + err = re.sub(r' *\^ *', '', err) if 'AddressSanitizer' in err: match = re.search(r'[ \t]+[#]0 ([A-Za-z0-9]+) ([^\n]+)', err).groups()[1] err = 'AddressSanitizer error ' + match - return err + return err.strip() def run_shell_command(shell, cmd): @@ -69,7 +73,13 @@ def get_reduced_sql(shell, sql_query): raise Exception("Failed to reduce query") reduce_candidates = [] for line in stdout.split('\n'): - reduce_candidates.append(line.strip('"').replace('""', '"')) + if len(line) <= 2: + continue + if line[0] == '"': + line = line[1:] + if line[len(line) - 1] == '"': + line = line[:len(line) - 1] + reduce_candidates.append(line.replace('""', '"')) return reduce_candidates[1:] @@ -95,6 +105,15 @@ def reduce(sql_query, data_load, shell, error_msg, max_time_seconds=300): print(sql_query) print("=======================") break + elif verbose: + print("Failed to reduce query") + print("=======================") + print(reduce_candidate) + print("=====Target error======") + print(error_msg) + print("=====Actual error======") + print(new_error) + print("=======================") if not found_new_candidate: break return sql_query @@ -273,6 +292,8 @@ def reduce_query_log(queries, shell, data_load=[], max_time_seconds=300): parser.add_argument( '--max-time', dest='max_time', action='store', help='Maximum time in seconds to run the reducer', default=300 ) + parser.add_argument( + '--verbose', dest='verbose', action='store_true', help='Verbose output') args = parser.parse_args() print("Starting reduce process") @@ -280,6 +301,7 @@ def reduce_query_log(queries, shell, data_load=[], max_time_seconds=300): shell = args.shell data_load = open(args.load).read() sql_query = open(args.exec).read() + verbose = args.verbose (stdout, stderr, returncode) = run_shell_command(shell, data_load + sql_query) expected_error = sanitize_error(stderr).strip() if len(expected_error) == 0: diff --git a/src/statement_simplifier.cpp b/src/statement_simplifier.cpp index 66662f4..a439a6e 100644 --- a/src/statement_simplifier.cpp +++ b/src/statement_simplifier.cpp @@ -308,6 +308,7 @@ void StatementSimplifier::SimplifyExpression(duckdb::unique_ptr