From 87b5eabbd04497dbd2b7b982033dd210e141d62c Mon Sep 17 00:00:00 2001
From: Zuleykha Pavlichenkova <zuleykha.pavlichenkova@gmail.com>
Date: Tue, 2 Jul 2024 18:15:07 +0200
Subject: [PATCH 1/5] Point to the most recent version of duckdb submodule

---
 duckdb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/duckdb b/duckdb
index 70fd6a8..cd4e519 160000
--- a/duckdb
+++ b/duckdb
@@ -1 +1 @@
-Subproject commit 70fd6a8a2450c1e2a7d0547d4c0666a649dc378e
+Subproject commit cd4e5194a7de618f8ac1d8bc1be423ab2fa1bd85

From c997853682a7b0bcf2e43e9bc4a8590e905fd0ee Mon Sep 17 00:00:00 2001
From: Zuleykha Pavlichenkova <zuleykha.pavlichenkova@gmail.com>
Date: Thu, 4 Jul 2024 09:34:23 +0200
Subject: [PATCH 2/5] update duckdb submodule to resolve merge conflict

---
 duckdb                      |   2 +-
 scripts/extension-upload.sh |  90 ---------
 scripts/fuzzer_helper.py    | 207 ++++++++++++++++++++
 scripts/parser_test.py      |  21 ++
 scripts/reduce_sql.py       | 368 ++++++++++++++++++++++++++++++++++++
 scripts/run_fuzzer.py       | 199 +++++++++++++++++++
 scripts/run_sqlancer.py     | 150 +++++++++++++++
 scripts/run_test_list.py    |  65 +++++++
 scripts/runsqlsmith.py      |  52 +++++
 scripts/try_timeout.py      |  48 +++++
 10 files changed, 1111 insertions(+), 91 deletions(-)
 delete mode 100755 scripts/extension-upload.sh
 create mode 100644 scripts/fuzzer_helper.py
 create mode 100644 scripts/parser_test.py
 create mode 100644 scripts/reduce_sql.py
 create mode 100644 scripts/run_fuzzer.py
 create mode 100644 scripts/run_sqlancer.py
 create mode 100644 scripts/run_test_list.py
 create mode 100644 scripts/runsqlsmith.py
 create mode 100644 scripts/try_timeout.py

diff --git a/duckdb b/duckdb
index cd4e519..7b2cdc7 160000
--- a/duckdb
+++ b/duckdb
@@ -1 +1 @@
-Subproject commit cd4e5194a7de618f8ac1d8bc1be423ab2fa1bd85
+Subproject commit 7b2cdc786bf64ed776941a3e4a65722941b957a6
diff --git a/scripts/extension-upload.sh b/scripts/extension-upload.sh
deleted file mode 100755
index 9fd5b39..0000000
--- a/scripts/extension-upload.sh
+++ /dev/null
@@ -1,90 +0,0 @@
-#!/bin/bash
-
-# Extension upload script
-
-# Usage: ./extension-upload.sh <name> <extension_version> <duckdb_version> <architecture> <s3_bucket> <copy_to_latest> <copy_to_versioned>
-# <name>                : Name of the extension
-# <extension_version>   : Version (commit / version tag) of the extension
-# <duckdb_version>      : Version (commit / version tag) of DuckDB
-# <architecture>        : Architecture target of the extension binary
-# <s3_bucket>           : S3 bucket to upload to
-# <copy_to_latest>      : Set this as the latest version ("true" / "false", default: "false")
-# <copy_to_versioned>   : Set this as a versioned version that will prevent its deletion
-
-set -e
-
-if [[ $4 == wasm* ]]; then
-  ext="/tmp/extension/$1.duckdb_extension.wasm"
-else
-  ext="/tmp/extension/$1.duckdb_extension"
-fi
-
-echo $ext
-
-script_dir="$(dirname "$(readlink -f "$0")")"
-
-# calculate SHA256 hash of extension binary
-cat $ext > $ext.append
-
-if [[ $4 == wasm* ]]; then
-  # 0 for custom section
-  # 113 in hex = 275 in decimal, total lenght of what follows (1 + 16 + 2 + 256)
-  # [1(continuation) + 0010011(payload) = \x93, 0(continuation) + 10(payload) = \x02]
-  echo -n -e '\x00' >> $ext.append
-  echo -n -e '\x93\x02' >> $ext.append
-  # 10 in hex = 16 in decimal, lenght of name, 1 byte
-  echo -n -e '\x10' >> $ext.append
-  echo -n -e 'duckdb_signature' >> $ext.append
-  # the name of the WebAssembly custom section, 16 bytes
-  # 100 in hex, 256 in decimal
-  # [1(continuation) + 0000000(payload) = ff, 0(continuation) + 10(payload)],
-  # for a grand total of 2 bytes
-  echo -n -e '\x80\x02' >> $ext.append
-fi
-
-# (Optionally) Sign binary
-if [ "$DUCKDB_EXTENSION_SIGNING_PK" != "" ]; then
-  echo "$DUCKDB_EXTENSION_SIGNING_PK" > private.pem
-  $script_dir/../duckdb/scripts/compute-extension-hash.sh $ext.append > $ext.hash
-  openssl pkeyutl -sign -in $ext.hash -inkey private.pem -pkeyopt digest:sha256 -out $ext.sign
-  rm -f private.pem
-fi
-
-# Signature is always there, potentially defaulting to 256 zeros
-truncate -s 256 $ext.sign
-
-# append signature to extension binary
-cat $ext.sign >> $ext.append
-
-# compress extension binary
-if [[ $4 == wasm_* ]]; then
-  brotli < $ext.append > "$ext.compressed"
-else
-  gzip < $ext.append > "$ext.compressed"
-fi
-
-set -e
-
-# Abort if AWS key is not set
-if [ -z "$AWS_ACCESS_KEY_ID" ]; then
-    echo "No AWS key found, skipping.."
-    exit 0
-fi
-
-# upload versioned version
-if [[ $7 = 'true' ]]; then
-  if [[ $4 == wasm* ]]; then
-    aws s3 cp $ext.compressed s3://$5/$1/$2/$3/$4/$1.duckdb_extension.wasm --acl public-read --content-encoding br --content-type="application/wasm"
-  else
-    aws s3 cp $ext.compressed s3://$5/$1/$2/$3/$4/$1.duckdb_extension.gz --acl public-read
-  fi
-fi
-
-# upload to latest version
-if [[ $6 = 'true' ]]; then
-  if [[ $4 == wasm* ]]; then
-    aws s3 cp $ext.compressed s3://$5/$3/$4/$1.duckdb_extension.wasm --acl public-read --content-encoding br --content-type="application/wasm"
-  else
-    aws s3 cp $ext.compressed s3://$5/$3/$4/$1.duckdb_extension.gz --acl public-read
-  fi
-fi
diff --git a/scripts/fuzzer_helper.py b/scripts/fuzzer_helper.py
new file mode 100644
index 0000000..dd82e06
--- /dev/null
+++ b/scripts/fuzzer_helper.py
@@ -0,0 +1,207 @@
+import json
+import requests
+import sys
+import os
+import subprocess
+import reduce_sql
+import fuzzer_helper
+
+
+USERNAME = 'fuzzerofducks'
+
+REPO_OWNER = 'duckdb'
+REPO_NAME = 'duckdb-fuzzer'
+
+fuzzer_desc = '''Issue found by ${FUZZER} on git commit hash [${SHORT_HASH}](https://github.com/duckdb/duckdb/commit/${FULL_HASH}) using seed ${SEED}.
+'''
+
+header = '''### To Reproduce
+```sql
+'''
+
+middle = '''
+```
+
+### Error Message
+```
+'''
+
+footer = '''
+```'''
+
+
+# github stuff
+def issue_url():
+    return 'https://api.github.com/repos/%s/%s/issues' % (REPO_OWNER, REPO_NAME)
+
+
+def get_token():
+    if 'FUZZEROFDUCKSKEY' not in os.environ:
+        print("FUZZEROFDUCKSKEY not found in environment variables")
+        exit(1)
+    token = os.environ['FUZZEROFDUCKSKEY']
+    if len(token) == 0:
+        print("FUZZEROFDUCKSKEY is set but is empty")
+        exit(1)
+
+    if len(token) != 40:
+        print("Incorrect length for FUZZEROFDUCKSKEY")
+        exit(1)
+    return token
+
+
+def create_session():
+    # Create an authenticated session to create the issue
+    session = requests.Session()
+    session.headers.update({'Authorization': 'token %s' % (get_token(),)})
+    return session
+
+
+def make_github_issue(title, body):
+    if len(title) > 240:
+        #  avoid title is too long error (maximum is 256 characters)
+        title = title[:240] + '...'
+    session = create_session()
+    url = issue_url()
+    issue = {'title': title, 'body': body}
+    r = session.post(url, json.dumps(issue))
+    if r.status_code == 201:
+        print('Successfully created Issue "%s"' % title)
+    else:
+        print('Could not create Issue "%s"' % title)
+        print('Response:', r.content.decode('utf8'))
+        raise Exception("Failed to create issue")
+
+
+def get_github_issues(page):
+    session = create_session()
+    url = issue_url() + '?per_page=100&page=' + str(page)
+    r = session.get(url)
+    if r.status_code != 200:
+        print('Failed to get list of issues')
+        print('Response:', r.content.decode('utf8'))
+        raise Exception("Failed to get list of issues")
+    return json.loads(r.content.decode('utf8'))
+
+
+def close_github_issue(number):
+    session = create_session()
+    url = issue_url() + '/' + str(number)
+    params = {'state': 'closed'}
+    r = session.patch(url, json.dumps(params))
+    if r.status_code == 200:
+        print(f'Successfully closed Issue "{number}"')
+    else:
+        print(f'Could not close Issue "{number}" (status code {r.status_code})')
+        print('Response:', r.content.decode('utf8'))
+        raise Exception("Failed to close issue")
+
+
+def label_github_issue(number, label):
+    session = create_session()
+    url = issue_url() + '/' + str(number)
+    params = {'labels': [label]}
+    r = session.patch(url, json.dumps(params))
+    if r.status_code == 200:
+        print(f'Successfully labeled Issue "{number}"')
+    else:
+        print(f'Could not label Issue "{number}" (status code {r.status_code})')
+        print('Response:', r.content.decode('utf8'))
+        raise Exception("Failed to label issue")
+
+
+def extract_issue(body, nr):
+    try:
+        splits = body.split(middle)
+        sql = splits[0].split(header)[1]
+        error = splits[1][: -len(footer)]
+        return (sql, error)
+    except:
+        print(f"Failed to extract SQL/error message from issue {nr}")
+        print(body)
+        return None
+
+
+def run_shell_command_batch(shell, cmd):
+    command = [shell, '--batch', '-init', '/dev/null']
+
+    try:
+        res = subprocess.run(
+            command, input=bytearray(cmd, 'utf8'), stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=300
+        )
+    except subprocess.TimeoutExpired:
+        print(f"TIMEOUT... {cmd}")
+        return ("", "", 0, True)
+    stdout = res.stdout.decode('utf8').strip()
+    stderr = res.stderr.decode('utf8').strip()
+    return (stdout, stderr, res.returncode, False)
+
+
+def test_reproducibility(shell, issue, current_errors, perform_check):
+    extract = extract_issue(issue['body'], issue['number'])
+    labels = issue['labels']
+    label_timeout = False
+    for label in labels:
+        if label['name'] == 'timeout':
+            label_timeout = True
+    if extract is None:
+        # failed extract: leave the issue as-is
+        return True
+    sql = extract[0] + ';'
+    error = extract[1]
+    if perform_check is True and label_timeout is False:
+        print(f"Checking issue {issue['number']}...")
+        (stdout, stderr, returncode, is_timeout) = run_shell_command_batch(shell, sql)
+        if is_timeout:
+            label_github_issue(issue['number'], 'timeout')
+        else:
+            if returncode == 0:
+                return False
+            if not fuzzer_helper.is_internal_error(stderr):
+                return False
+    # issue is still reproducible
+    current_errors[error] = issue
+    return True
+
+
+def extract_github_issues(shell, perform_check):
+    current_errors = dict()
+    for p in range(1, 10):
+        issues = get_github_issues(p)
+        for issue in issues:
+            # check if the github issue is still reproducible
+            if not test_reproducibility(shell, issue, current_errors, perform_check):
+                # the issue appears to be fixed - close the issue
+                print(f"Failed to reproduce issue {issue['number']}, closing...")
+                close_github_issue(int(issue['number']))
+    return current_errors
+
+
+def file_issue(cmd, error_msg, fuzzer, seed, hash):
+    # issue is new, file it
+    print("Filing new issue to Github")
+
+    title = error_msg
+    body = (
+        fuzzer_desc.replace("${FUZZER}", fuzzer)
+        .replace("${FULL_HASH}", hash)
+        .replace("${SHORT_HASH}", hash[:5])
+        .replace("${SEED}", str(seed))
+    )
+    body += header + cmd + middle + error_msg + footer
+    print(title, body)
+    make_github_issue(title, body)
+
+
+def is_internal_error(error):
+    if 'differs from original result' in error:
+        return True
+    if 'INTERNAL' in error:
+        return True
+    if 'signed integer overflow' in error:
+        return True
+    if 'Sanitizer' in error or 'sanitizer' in error:
+        return True
+    if 'runtime error' in error:
+        return True
+    return False
diff --git a/scripts/parser_test.py b/scripts/parser_test.py
new file mode 100644
index 0000000..e09fbe3
--- /dev/null
+++ b/scripts/parser_test.py
@@ -0,0 +1,21 @@
+from sqllogictest import SQLParserException, SQLLogicParser, SQLLogicTest
+
+from typing import Optional
+import argparse
+
+
+def main():
+    parser = argparse.ArgumentParser(description="SQL Logic Parser")
+    parser.add_argument("filename", type=str, help="Path to the SQL logic file")
+    args = parser.parse_args()
+
+    filename = args.filename
+
+    parser = SQLLogicParser()
+    out: Optional[SQLLogicTest] = parser.parse(filename)
+    if not out:
+        raise SQLParserException(f"Test {filename} could not be parsed")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/reduce_sql.py b/scripts/reduce_sql.py
new file mode 100644
index 0000000..245a89b
--- /dev/null
+++ b/scripts/reduce_sql.py
@@ -0,0 +1,368 @@
+import re
+import subprocess
+import time
+import os
+import fuzzer_helper
+import multiprocessing
+import sqlite3
+
+# this script can be used as a library, but can also be directly called
+# example usage:
+# python3 scripts/reduce_sql.py --load load.sql --exec exec.sql
+
+try:
+    multiprocessing.set_start_method('fork')
+except RuntimeError:
+    pass
+get_reduced_query = '''
+SELECT * FROM reduce_sql_statement('${QUERY}');
+'''
+
+
+class MultiStatementManager:
+    delimiter = ';'
+
+    def __init__(self, multi_statement):
+        # strip whitespace, then the final ';', and split on all ';' inbetween.
+        statements = list(
+            map(lambda x: x.strip(), multi_statement.strip().strip(';').split(MultiStatementManager.delimiter))
+        )
+        self.statements = []
+        for stmt in statements:
+            if len(stmt) > 0:
+                self.statements.append(stmt.strip() + ";")
+
+    def is_multi_statement(sql_statement):
+        if len(sql_statement.split(';')) > 1:
+            return True
+        return False
+
+    def get_last_statement(self):
+        return self.statements[-1]
+
+
+def sanitize_error(err):
+    err = re.sub(r'Error: near line \d+: ', '', err)
+    err = err.replace(os.getcwd() + '/', '')
+    err = err.replace(os.getcwd(), '')
+    if 'AddressSanitizer' in err:
+        match = re.search(r'[ \t]+[#]0 ([A-Za-z0-9]+) ([^\n]+)', err).groups()[1]
+        err = 'AddressSanitizer error ' + match
+    return err
+
+
+def run_shell_command(shell, cmd):
+    command = [shell, '-csv', '--batch', '-init', '/dev/null']
+
+    res = subprocess.run(command, input=bytearray(cmd, 'utf8'), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    stdout = res.stdout.decode('utf8').strip()
+    stderr = res.stderr.decode('utf8').strip()
+    return (stdout, stderr, res.returncode)
+
+
+def get_reduced_sql(shell, sql_query):
+    reduce_query = get_reduced_query.replace('${QUERY}', sql_query.replace("'", "''"))
+    (stdout, stderr, returncode) = run_shell_command(shell, reduce_query)
+    if returncode != 0:
+        print(stdout)
+        print(stderr)
+        raise Exception("Failed to reduce query")
+    reduce_candidates = []
+    for line in stdout.split('\n'):
+        reduce_candidates.append(line.strip('"').replace('""', '"'))
+    return reduce_candidates[1:]
+
+
+def reduce(sql_query, data_load, shell, error_msg, max_time_seconds=300):
+    start = time.time()
+    while True:
+        found_new_candidate = False
+        reduce_candidates = get_reduced_sql(shell, sql_query)
+        for reduce_candidate in reduce_candidates:
+            if reduce_candidate == sql_query:
+                continue
+            current_time = time.time()
+            if current_time - start > max_time_seconds:
+                break
+
+            (stdout, stderr, returncode) = run_shell_command(shell, data_load + reduce_candidate)
+            new_error = sanitize_error(stderr)
+            if new_error == error_msg:
+                sql_query = reduce_candidate
+                found_new_candidate = True
+                print("Found new reduced query")
+                print("=======================")
+                print(sql_query)
+                print("=======================")
+                break
+        if not found_new_candidate:
+            break
+    return sql_query
+
+
+def is_ddl_query(query):
+    query = query.lower()
+    if 'create' in query or 'insert' in query or 'update' in query or 'delete' in query:
+        return True
+    return False
+
+
+def initial_cleanup(query_log):
+    query_log = query_log.replace('SELECT * FROM pragma_version()\n', '')
+    return query_log
+
+
+def run_queries_until_crash_mp(queries, result_file):
+    import duckdb
+
+    con = duckdb.connect()
+    sqlite_con = sqlite3.connect(result_file)
+    sqlite_con.execute('CREATE TABLE queries(id INT, text VARCHAR)')
+    sqlite_con.execute('CREATE TABLE result(text VARCHAR)')
+    sqlite_con.execute("INSERT INTO result VALUES ('__CRASH__')")
+    id = 1
+    is_internal_error = False
+    for q in queries:
+        # insert the current query into the database
+        # we do this pre-emptively in case the program crashes
+        sqlite_con.execute('INSERT INTO queries VALUES (?, ?)', (id, q))
+        sqlite_con.commit()
+
+        keep_query = False
+        try:
+            con.execute(q)
+            keep_query = is_ddl_query(q)
+        except Exception as e:
+            exception_error = str(e)
+            is_internal_error = fuzzer_helper.is_internal_error(exception_error)
+            if is_internal_error:
+                keep_query = True
+                sqlite_con.execute('UPDATE result SET text=?', (exception_error,))
+        if not keep_query:
+            sqlite_con.execute('DELETE FROM queries WHERE id=?', (id,))
+        if is_internal_error:
+            # found internal error: no need to try further queries
+            break
+        id += 1
+    if not is_internal_error:
+        # failed to reproduce: delete result
+        sqlite_con.execute('DELETE FROM result')
+        sqlite_con.commit()
+    sqlite_con.close()
+
+
+def run_queries_until_crash(queries):
+    sqlite_file = 'cleaned_queries.db'
+    if os.path.isfile(sqlite_file):
+        os.remove(sqlite_file)
+    # run the queries in a separate process because it might crash
+    p = multiprocessing.Process(target=run_queries_until_crash_mp, args=(queries, sqlite_file))
+    p.start()
+    p.join()
+
+    # read the queries back from the file
+    sqlite_con = sqlite3.connect(sqlite_file)
+    queries = sqlite_con.execute('SELECT text FROM queries ORDER BY id').fetchall()
+    results = sqlite_con.execute('SELECT text FROM result').fetchall()
+    sqlite_con.close()
+    if len(results) == 0:
+        # no internal error or crash found
+        return (None, None)
+    assert len(results) == 1
+    return ([x[0] for x in queries], results[0][0])
+
+
+def cleanup_irrelevant_queries(query_log):
+    query_log = initial_cleanup(query_log)
+
+    queries = [x for x in query_log.split(';\n') if len(x) > 0]
+    return run_queries_until_crash(queries)
+
+
+# def reduce_internal(start, sql_query, data_load, queries_final, shell, error_msg, max_time_seconds=300):
+
+
+def reduce_query_log_query(start, shell, queries, query_index, max_time_seconds):
+    new_query_list = queries[:]
+    sql_query = queries[query_index]
+    while True:
+        found_new_candidate = False
+        reduce_candidates = get_reduced_sql(shell, sql_query)
+        for reduce_candidate in reduce_candidates:
+            if reduce_candidate == sql_query:
+                continue
+            current_time = time.time()
+            if current_time - start > max_time_seconds:
+                break
+
+            new_query_list[query_index] = reduce_candidate
+            (_, error) = run_queries_until_crash(new_query_list)
+
+            if error is not None:
+                sql_query = reduce_candidate
+                found_new_candidate = True
+                print("Found new reduced query")
+                print("=======================")
+                print(sql_query)
+                print("========ERROR==========")
+                print(error)
+                print("=======================")
+                print("")
+                break
+        if not found_new_candidate:
+            break
+    return sql_query
+
+
+def reduce_multi_statement(sql_queries, local_shell, local_data_load):
+    reducer = MultiStatementManager(sql_queries)
+    last_statement = reducer.get_last_statement()
+    print(f"testing if just last statement of multi statement creates the error")
+    (stdout, stderr, returncode) = run_shell_command(local_shell, local_data_load + last_statement)
+    expected_error = sanitize_error(stderr).strip()
+    if len(expected_error) > 0:
+        # reduce just the last statement
+        return reduce(last_statement, local_data_load, local_shell, expected_error, int(args.max_time))
+    queries = reduce_query_log(reducer.statements, local_shell, [local_data_load])
+    return "\n".join(queries)
+
+
+def reduce_query_log(queries, shell, data_load=[], max_time_seconds=300):
+    start = time.time()
+    current_index = 0
+    # first try to remove as many queries as possible
+    while current_index < len(queries):
+        print("Attempting to remove query at position %d (of %d total queries)" % (current_index, len(queries)))
+        current_time = time.time()
+        if current_time - start > max_time_seconds:
+            break
+        # remove the query at "current_index"
+        new_queries = queries[:current_index] + queries[current_index + 1 :]
+        new_queries_with_data = data_load + new_queries
+        # try to run the queries and check if we still get the same error
+        (new_queries_x, current_error) = run_queries_until_crash(new_queries_with_data)
+        if current_error is None:
+            # cannot remove this query without invalidating the test case
+            current_index += 1
+        else:
+            # we can remove this query
+            queries = new_queries
+    # now try to reduce individual queries
+    for i in range(len(queries)):
+        if is_ddl_query(queries[i]):
+            continue
+        current_time = time.time()
+        if current_time - start > max_time_seconds:
+            break
+        queries[i] = reduce_query_log_query(start, shell, queries, i, max_time_seconds)
+    return queries
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser(description='Reduce a problematic SQL query')
+    parser.add_argument(
+        '--shell', dest='shell', action='store', help='Path to the shell executable', default='build/debug/duckdb'
+    )
+    parser.add_argument('--load', dest='load', action='store', help='Path to the data load script', required=True)
+    parser.add_argument('--exec', dest='exec', action='store', help='Path to the executable script', required=True)
+    parser.add_argument(
+        '--inplace', dest='inplace', action='store_true', help='If true, overrides the exec script with the final query'
+    )
+    parser.add_argument(
+        '--max-time', dest='max_time', action='store', help='Maximum time in seconds to run the reducer', default=300
+    )
+
+    args = parser.parse_args()
+    print("Starting reduce process")
+
+    shell = args.shell
+    data_load = open(args.load).read()
+    sql_query = open(args.exec).read()
+    (stdout, stderr, returncode) = run_shell_command(shell, data_load + sql_query)
+    expected_error = sanitize_error(stderr).strip()
+    if len(expected_error) == 0:
+        print("===================================================")
+        print("Could not find expected error - no error encountered")
+        print("===================================================")
+        exit(1)
+
+    print("===================================================")
+    print("Found expected error")
+    print("===================================================")
+    print(expected_error)
+    print("===================================================")
+
+    if MultiStatementManager.is_multi_statement(sql_query):
+        final_query = reduce_multi_statement(sql_query, shell, data_load)
+    else:
+        final_query = reduce(sql_query, data_load, shell, expected_error, int(args.max_time))
+
+    print("Found final reduced query")
+    print("===================================================")
+    print(final_query)
+    print("===================================================")
+    if args.inplace:
+        print(f"Writing to file {args.exec}")
+        with open(args.exec, 'w+') as f:
+            f.write(final_query)
+
+
+# Example usage:
+# error_msg = 'INTERNAL Error: Assertion triggered in file "/Users/myth/Programs/duckdb-bugfix/src/common/types/data_chunk.cpp" on line 41: !types.empty()'
+# shell = 'build/debug/duckdb'
+# data_load = 'create table all_types as select * from test_all_types();'
+# sql_query = '''
+# select
+#   subq_0.c0 as c0,
+#   contains(
+#     cast(cast(nullif(
+#         argmax(
+#           cast(case when 0 then (select varchar from main.all_types limit 1 offset 5)
+#                else (select varchar from main.all_types limit 1 offset 5)
+#                end
+#              as varchar),
+#           cast(decode(
+#             cast(cast(null as blob) as blob)) as varchar)) over (partition by subq_0.c1 order by subq_0.c1),
+#       current_schema()) as varchar) as varchar),
+#     cast(cast(nullif(cast(null as varchar),
+#       cast(null as varchar)) as varchar) as varchar)) as c1,
+#   (select min(time) from main.all_types)
+#      as c2,
+#   subq_0.c1 as c3,
+#   subq_0.c1 as c4,
+#   cast(nullif(subq_0.c1,
+#     subq_0.c1) as decimal(4,1)) as c5
+# from
+#   (select
+#         ref_0.timestamp_ns as c0,
+#         case when (EXISTS (
+#               select
+#                   ref_0.timestamp_ns as c0,
+#                   ref_0.timestamp_ns as c1,
+#                   (select timestamp_tz from main.all_types limit 1 offset 4)
+#                      as c2,
+#                   ref_1.int_array as c3,
+#                   ref_1.dec_4_1 as c4,
+#                   ref_0.utinyint as c5,
+#                   ref_1.int as c6,
+#                   ref_0.double as c7,
+#                   ref_0.medium_enum as c8,
+#                   ref_1.array_of_structs as c9,
+#                   ref_1.varchar as c10
+#                 from
+#                   main.all_types as ref_1
+#                 where ref_1.varchar ~~~ ref_1.varchar
+#                 limit 28))
+#             or (ref_0.varchar ~~~ ref_0.varchar) then ref_0.dec_4_1 else ref_0.dec_4_1 end
+#            as c1
+#       from
+#         main.all_types as ref_0
+#       where (0)
+#         and (ref_0.varchar ~~ ref_0.varchar)) as subq_0
+# where writefile() !~~* writefile()
+# limit 88
+# '''
+#
+# print(reduce(sql_query, data_load, shell, error_msg))
diff --git a/scripts/run_fuzzer.py b/scripts/run_fuzzer.py
new file mode 100644
index 0000000..f2bb8f3
--- /dev/null
+++ b/scripts/run_fuzzer.py
@@ -0,0 +1,199 @@
+import json
+import requests
+import sys
+import os
+import subprocess
+import reduce_sql
+import fuzzer_helper
+import random
+
+seed = -1
+
+fuzzer = None
+db = None
+shell = None
+perform_checks = True
+dry = False
+for param in sys.argv:
+    if param == '--sqlsmith':
+        fuzzer = 'sqlsmith'
+    elif param == '--duckfuzz':
+        fuzzer = 'duckfuzz'
+    elif param == '--duckfuzz_functions':
+        fuzzer = 'duckfuzz_functions'
+    elif param == '--alltypes':
+        db = 'alltypes'
+    elif param == '--tpch':
+        db = 'tpch'
+    elif param == '--emptyalltypes':
+        db = 'emptyalltypes'
+    elif param == '--no_checks':
+        perform_checks = False
+    elif param.startswith('--shell='):
+        shell = param.replace('--shell=', '')
+    elif param.startswith('--seed='):
+        seed = int(param.replace('--seed=', ''))
+    elif param.startswith('--dry'):
+        dry = True
+
+if fuzzer is None:
+    print("Unrecognized fuzzer to run, expected e.g. --sqlsmith or --duckfuzz")
+    exit(1)
+
+if db is None:
+    print("Unrecognized database to run on, expected either --tpch, --alltypes or --emptyalltypes")
+    exit(1)
+
+if shell is None:
+    print("Unrecognized path to shell, expected e.g. --shell=build/debug/duckdb")
+    exit(1)
+
+if seed < 0:
+    seed = random.randint(0, 2**30)
+
+git_hash = os.getenv('DUCKDB_HASH')
+
+
+def create_db_script(db):
+    if db == 'alltypes':
+        return 'create table all_types as select * exclude(small_enum, medium_enum, large_enum) from test_all_types();'
+    elif db == 'tpch':
+        return 'call dbgen(sf=0.1);'
+    elif db == 'emptyalltypes':
+        return 'create table all_types as select * exclude(small_enum, medium_enum, large_enum) from test_all_types() limit 0;'
+    else:
+        raise Exception("Unknown database creation script")
+
+
+def run_fuzzer_script(fuzzer):
+    if fuzzer == 'sqlsmith':
+        return "call sqlsmith(max_queries=${MAX_QUERIES}, seed=${SEED}, verbose_output=1, log='${LAST_LOG_FILE}', complete_log='${COMPLETE_LOG_FILE}');"
+    elif fuzzer == 'duckfuzz':
+        return "call fuzzyduck(max_queries=${MAX_QUERIES}, seed=${SEED}, verbose_output=1, log='${LAST_LOG_FILE}', complete_log='${COMPLETE_LOG_FILE}');"
+    elif fuzzer == 'duckfuzz_functions':
+        return "call fuzz_all_functions(seed=${SEED}, verbose_output=1, log='${LAST_LOG_FILE}', complete_log='${COMPLETE_LOG_FILE}');"
+    else:
+        raise Exception("Unknown fuzzer type")
+
+
+def get_fuzzer_name(fuzzer):
+    if fuzzer == 'sqlsmith':
+        return 'SQLSmith'
+    elif fuzzer == 'duckfuzz':
+        return 'DuckFuzz'
+    elif fuzzer == 'duckfuzz_functions':
+        return 'DuckFuzz (Functions)'
+    else:
+        return 'Unknown'
+
+
+def run_shell_command(cmd):
+    command = [shell, '--batch', '-init', '/dev/null']
+
+    res = subprocess.run(command, input=bytearray(cmd, 'utf8'), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    stdout = res.stdout.decode('utf8', 'ignore').strip()
+    stderr = res.stderr.decode('utf8', 'ignore').strip()
+    return (stdout, stderr, res.returncode)
+
+
+# first get a list of all github issues, and check if we can still reproduce them
+
+if dry:
+    current_errors = []
+else:
+    current_errors = fuzzer_helper.extract_github_issues(shell, perform_checks)
+
+max_queries = 2000
+last_query_log_file = 'sqlsmith.log'
+complete_log_file = 'sqlsmith.complete.log'
+
+print(
+    f'''==========================================
+        RUNNING {fuzzer} on {db}
+=========================================='''
+)
+
+load_script = create_db_script(db)
+fuzzer_name = get_fuzzer_name(fuzzer)
+fuzzer = (
+    run_fuzzer_script(fuzzer)
+    .replace('${MAX_QUERIES}', str(max_queries))
+    .replace('${LAST_LOG_FILE}', last_query_log_file)
+    .replace('${COMPLETE_LOG_FILE}', complete_log_file)
+    .replace('${SEED}', str(seed))
+)
+
+print(load_script)
+print(fuzzer)
+
+cmd = load_script + "\n" + fuzzer
+
+print("==========================================")
+
+(stdout, stderr, returncode) = run_shell_command(cmd)
+
+print(
+    f'''==========================================
+        FINISHED RUNNING
+=========================================='''
+)
+print("==============  STDOUT  ================")
+print(stdout)
+print("==============  STDERR  =================")
+print(stderr)
+print("==========================================")
+
+print(returncode)
+if returncode == 0:
+    print("==============  SUCCESS  ================")
+    exit(0)
+
+print("==============  FAILURE  ================")
+print("Attempting to reproduce and file issue...")
+
+# run the last query, and see if the issue persists
+with open(last_query_log_file, 'r') as f:
+    last_query = f.read()
+
+with open(complete_log_file, 'r') as f:
+    all_queries = f.read()
+
+(stdout, stderr, returncode) = run_shell_command(load_script + all_queries)
+if returncode == 0:
+    print("Failed to reproduce the issue...")
+    exit(0)
+
+print("==============  STDOUT  ================")
+print(stdout)
+print("==============  STDERR  =================")
+print(stderr)
+print("==========================================")
+if not fuzzer_helper.is_internal_error(stderr):
+    print("Failed to reproduce the internal error")
+    exit(0)
+
+error_msg = reduce_sql.sanitize_error(stderr)
+
+print("=========================================")
+print("         Reproduced successfully         ")
+print("=========================================")
+
+# check if this is a duplicate issue
+if error_msg in current_errors:
+    print("Skip filing duplicate issue")
+    print(
+        "Issue already exists: https://github.com/duckdb/duckdb-fuzzer/issues/"
+        + str(current_errors[error_msg]['number'])
+    )
+    exit(0)
+
+print("=========================================")
+print("        Attempting to reduce query       ")
+print("=========================================")
+# try to reduce the query as much as possible
+# reduce_multi_statement checks just the last statement first as a heuristic to see if
+# only the last statement causes the error.
+required_queries = reduce_sql.reduce_multi_statement(all_queries, shell, load_script)
+cmd = load_script + '\n' + last_query + "\n"
+
+fuzzer_helper.file_issue(cmd, error_msg, fuzzer_name, seed, git_hash)
diff --git a/scripts/run_sqlancer.py b/scripts/run_sqlancer.py
new file mode 100644
index 0000000..2646de4
--- /dev/null
+++ b/scripts/run_sqlancer.py
@@ -0,0 +1,150 @@
+import os
+import random
+import subprocess
+import sys
+import reduce_sql
+import fuzzer_helper
+
+persistent = False
+sqlancer_dir = 'sqlancer'
+seed = None
+timeout = 600
+threads = 1
+num_queries = 1000
+shell = None
+
+# python3 scripts/run_sqlancer.py --sqlancer=/Users/myth/Programs/sqlancer --shell=build/debug/duckdb --seed=0
+for arg in sys.argv:
+    if arg == '--persistent':
+        persistent = True
+    elif arg.startswith('--sqlancer='):
+        sqlancer_dir = arg.replace('--sqlancer=', '')
+    elif arg.startswith('--seed='):
+        seed = int(arg.replace('--seed=', ''))
+    elif arg.startswith('--timeout='):
+        timeout = int(arg.replace('--timeout=', ''))
+    elif arg.startswith('--threads='):
+        threads = int(arg.replace('--threads=', ''))
+    elif arg.startswith('--num-queries='):
+        num_queries = int(arg.replace('--num-queries=', ''))
+    elif arg.startswith('--shell='):
+        shell = arg.replace('--shell=', '')
+
+if shell is None:
+    print("Unrecognized path to shell, expected e.g. --shell=build/debug/duckdb")
+    exit(1)
+
+if not os.path.isfile(shell):
+    print(f"Could not find shell \"{shell}\"")
+    exit(1)
+
+if seed is None:
+    seed = random.randint(0, 2**30)
+
+git_hash = fuzzer_helper.get_github_hash()
+
+targetdir = os.path.join(sqlancer_dir, 'target')
+filenames = os.listdir(targetdir)
+found_filename = ""
+for fname in filenames:
+    if 'sqlancer-' in fname.lower():
+        found_filename = fname
+        break
+
+if not found_filename:
+    print("FAILED TO RUN SQLANCER")
+    print("Could not find target file sqlancer/target/sqlancer-*.jar")
+    exit(1)
+
+command_prefix = ['java']
+if persistent:
+    command_prefix += ['-Dduckdb.database.file=/tmp/lancer_duckdb_db']
+command_prefix += ['-jar', os.path.join(targetdir, found_filename)]
+
+seed_text = ''
+if seed is not None:
+    seed_text = f'--random-seed {seed}'
+
+base_cmd = f'--num-queries {num_queries} --num-threads {threads} {seed_text} --log-each-select=true --timeout-seconds {timeout} duckdb'
+command = [x for x in base_cmd.split(' ') if len(x) > 0]
+
+print('--------------------- RUNNING SQLANCER ----------------------')
+print(' '.join(command_prefix + command))
+
+subprocess = subprocess.Popen(command_prefix + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+out = subprocess.stdout.read()
+err = subprocess.stderr.read()
+subprocess.wait()
+
+if subprocess.returncode == 0:
+    print('--------------------- SQLANCER SUCCESS ----------------------')
+    print('SQLANCER EXITED WITH CODE ' + str(subprocess.returncode))
+    exit(0)
+
+print('--------------------- SQLANCER FAILURE ----------------------')
+print('SQLANCER EXITED WITH CODE ' + str(subprocess.returncode))
+print('--------------------- SQLANCER ERROR LOG ----------------------')
+print(err.decode('utf8', 'ignore'))
+print('--------------------- SQLancer Logs ----------------------')
+print(out.decode('utf8', 'ignore'))
+try:
+    with open('duckdb-queries.log', 'r') as f:
+        text = f.read()
+        print('--------------------- DuckDB Logs ----------------------')
+        print(text)
+except:
+    pass
+
+
+with open('duckdb-queries.log', 'r') as f:
+    query_log = f.read()
+
+# clean up any irrelevant SELECT statements and failing DDL statements
+(queries, expected_error) = reduce_sql.cleanup_irrelevant_queries(query_log)
+if queries is None:
+    print('----------------------------------------------')
+    print("Failed to reproduce SQLancer error!")
+    print('----------------------------------------------')
+    exit(0)
+
+print('----------------------------------------------')
+print("Found query log that produces the following error")
+print('----------------------------------------------')
+if expected_error == '__CRASH__':
+    print('CRASH!')
+else:
+    print(expected_error)
+
+print('----------------------------------------------')
+print("Starting reduction process")
+print('----------------------------------------------')
+
+# clean up queries from the query log by trying to remove queries one by one
+queries = reduce_sql.reduce_query_log(queries, shell, [])
+
+reduced_test_case = ';\n'.join(queries)
+print('----------------------------------------------')
+print("Found reproducible test case")
+print('----------------------------------------------')
+print(reduced_test_case)
+
+(stdout, stderr, returncode) = reduce_sql.run_shell_command(shell, reduced_test_case)
+error_msg = reduce_sql.sanitize_error(stderr)
+
+print('----------------------------------------------')
+print("Fetching github issues")
+print('----------------------------------------------')
+
+# first get a list of all github issues, and check if we can still reproduce them
+current_errors = fuzzer_helper.extract_github_issues(shell)
+
+# check if this is a duplicate issue
+if error_msg in current_errors:
+    print("Skip filing duplicate issue")
+    print(
+        "Issue already exists: https://github.com/duckdb/duckdb-fuzzer/issues/"
+        + str(current_errors[error_msg]['number'])
+    )
+    exit(0)
+
+fuzzer_helper.file_issue(reduced_test_case, error_msg, "SQLancer", seed, git_hash)
diff --git a/scripts/run_test_list.py b/scripts/run_test_list.py
new file mode 100644
index 0000000..3ba3426
--- /dev/null
+++ b/scripts/run_test_list.py
@@ -0,0 +1,65 @@
+import sys
+import subprocess
+import re
+import os
+
+# wheth
+no_exit = False
+for i in range(len(sys.argv)):
+    if sys.argv[i] == '--no-exit':
+        no_exit = True
+        del sys.argv[i]
+        i -= 1
+
+if len(sys.argv) < 2:
+    print("Expected usage: python3 scripts/run_test_list.py build/debug/test/unittest [--no-exit]")
+    exit(1)
+unittest_program = sys.argv[1]
+extra_args = []
+if len(sys.argv) > 2:
+    extra_args = [sys.argv[2]]
+
+
+test_cases = []
+for line in sys.stdin:
+    if len(line.strip()) == 0:
+        continue
+    splits = line.rsplit('\t', 1)
+    test_cases.append(splits[0])
+
+test_count = len(test_cases)
+return_code = 0
+for test_number in range(test_count):
+    sys.stdout.write("[" + str(test_number) + "/" + str(test_count) + "]: " + test_cases[test_number])
+    sys.stdout.flush()
+    res = subprocess.run([unittest_program, test_cases[test_number]], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    stdout = res.stdout.decode('utf8')
+    stderr = res.stderr.decode('utf8')
+    if res.returncode is not None and res.returncode != 0:
+        print("FAILURE IN RUNNING TEST")
+        print(
+            """--------------------
+RETURNCODE
+--------------------
+"""
+        )
+        print(res.returncode)
+        print(
+            """--------------------
+STDOUT
+--------------------
+"""
+        )
+        print(stdout)
+        print(
+            """--------------------
+STDERR
+--------------------
+"""
+        )
+        print(stderr)
+        return_code = 1
+        if not no_exit:
+            break
+
+exit(return_code)
diff --git a/scripts/runsqlsmith.py b/scripts/runsqlsmith.py
new file mode 100644
index 0000000..9569750
--- /dev/null
+++ b/scripts/runsqlsmith.py
@@ -0,0 +1,52 @@
+# run SQL smith and collect breaking queries
+import os
+import re
+import subprocess
+import sys
+import sqlite3
+from python_helpers import open_utf8
+
+sqlsmith_db = 'sqlsmith.db'
+sqlsmith_test_dir = 'test/sqlsmith/queries'
+
+export_queries = False
+
+con = sqlite3.connect(sqlsmith_db)
+c = con.cursor()
+
+if len(sys.argv) == 2:
+    if sys.argv[1] == '--export':
+        export_queries = True
+    elif sys.argv[1] == '--reset':
+        c.execute('DROP TABLE IF EXISTS sqlsmith_errors')
+    else:
+        print('Unknown query option ' + sys.argv[1])
+        exit(1)
+
+if export_queries:
+    c.execute('SELECT query FROM sqlsmith_errors')
+    results = c.fetchall()
+    for fname in os.listdir(sqlsmith_test_dir):
+        os.remove(os.path.join(sqlsmith_test_dir, fname))
+
+    for i in range(len(results)):
+        with open(os.path.join(sqlsmith_test_dir, 'sqlsmith-%d.sql' % (i + 1)), 'w+') as f:
+            f.write(results[i][0] + "\n")
+    exit(0)
+
+
+def run_sqlsmith():
+    subprocess.call(['build/debug/third_party/sqlsmith/sqlsmith', '--duckdb=:memory:'])
+
+
+c.execute('CREATE TABLE IF NOT EXISTS sqlsmith_errors(query VARCHAR)')
+
+while True:
+    # run SQL smith
+    run_sqlsmith()
+    # get the breaking query
+    with open_utf8('sqlsmith.log', 'r') as f:
+        text = re.sub('[ \t\n]+', ' ', f.read())
+
+    c.execute('INSERT INTO sqlsmith_errors VALUES (?)', (text,))
+    con.commit()
diff --git a/scripts/try_timeout.py b/scripts/try_timeout.py
new file mode 100644
index 0000000..78bd321
--- /dev/null
+++ b/scripts/try_timeout.py
@@ -0,0 +1,48 @@
+import os
+import sys
+import subprocess
+import threading
+
+if len(sys.argv) < 3:
+    print("Expected python3 scripts/try_timeout.py --timeout=[timeout] --retry=[retries] [cmd] [options...]")
+    print("Timeout should be given in seconds")
+    exit(1)
+
+timeout = int(sys.argv[1].replace("--timeout=", ""))
+retries = int(sys.argv[2].replace("--retry=", ""))
+cmd = sys.argv[3:]
+
+
+class Command(object):
+    def __init__(self, cmd):
+        self.cmd = cmd
+        self.process = None
+
+    def run(self, timeout):
+        self.process = None
+
+        def target():
+            self.process = subprocess.Popen(self.cmd)
+            self.process.communicate()
+
+        thread = threading.Thread(target=target)
+        thread.start()
+
+        thread.join(timeout)
+        if thread.is_alive():
+            print('Terminating process: process exceeded timeout of ' + str(timeout) + ' seconds')
+            self.process.terminate()
+            thread.join()
+        if self.process is None:
+            return 1
+        return self.process.returncode
+
+
+for i in range(retries):
+    print("Attempting to run command \"" + ' '.join(cmd) + '"')
+    command = Command(cmd)
+    returncode = command.run(timeout)
+    if returncode == 0:
+        exit(0)
+
+exit(1)

From d789c661081ab44cfe9988a905c8cbd5376e9c9e Mon Sep 17 00:00:00 2001
From: Zuleykha Pavlichenkova <zuleykha.pavlichenkova@gmail.com>
Date: Thu, 4 Jul 2024 09:35:43 +0200
Subject: [PATCH 3/5] updated again

---
 duckdb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/duckdb b/duckdb
index 7b2cdc7..f108981 160000
--- a/duckdb
+++ b/duckdb
@@ -1 +1 @@
-Subproject commit 7b2cdc786bf64ed776941a3e4a65722941b957a6
+Subproject commit f1089810dfff9f560595a7662be3d1d8022bf665

From 4bc4c213717851157642d18488eb2347fc8d378d Mon Sep 17 00:00:00 2001
From: Zuleykha Pavlichenkova <zuleykha.pavlichenkova@gmail.com>
Date: Thu, 4 Jul 2024 17:21:14 +0200
Subject: [PATCH 4/5] duckdb submodule update

---
 duckdb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/duckdb b/duckdb
index 71941d0..5be7060 160000
--- a/duckdb
+++ b/duckdb
@@ -1 +1 @@
-Subproject commit 71941d0417284dede04f623f45c1552f56515f4f
+Subproject commit 5be70607225453016b36361f56c832517b1cdb8a

From 13727e4b14692aa7002df3a4218b252f1b5e21b0 Mon Sep 17 00:00:00 2001
From: Zuleykha Pavlichenkova <zuleykha.pavlichenkova@gmail.com>
Date: Wed, 10 Jul 2024 13:18:26 +0200
Subject: [PATCH 5/5] Change MainDistributionPipeline.yml file to use shared
 credentials

---
 .../workflows/MainDistributionPipeline.yml    |  10 +-
 .github/workflows/_extension_deploy.yml       | 121 ------------------
 2 files changed, 6 insertions(+), 125 deletions(-)
 delete mode 100644 .github/workflows/_extension_deploy.yml

diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml
index a88fc9c..1e26fae 100644
--- a/.github/workflows/MainDistributionPipeline.yml
+++ b/.github/workflows/MainDistributionPipeline.yml
@@ -16,15 +16,17 @@ jobs:
     name: Build extension binaries
     uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.0.0
     with:
-      duckdb_version: main
+      duckdb_version: v1.0.0
       extension_name: sqlsmith
+      exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools'
 
   duckdb-stable-deploy:
     name: Deploy extension binaries
     needs: duckdb-stable-build
-    uses: ./.github/workflows/_extension_deploy.yml
+    uses: duckdb/extension-ci-tools/.github/workflows/_extension_deploy.yml@v1.0.0
     secrets: inherit
     with:
-      duckdb_version: main
+      duckdb_version: v1.0.0
       extension_name: sqlsmith
-      deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }}
+      exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools'
+      deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }}
\ No newline at end of file
diff --git a/.github/workflows/_extension_deploy.yml b/.github/workflows/_extension_deploy.yml
deleted file mode 100644
index c408f90..0000000
--- a/.github/workflows/_extension_deploy.yml
+++ /dev/null
@@ -1,121 +0,0 @@
-#
-# Reusable workflow that deploys the artifacts produced by github.com/duckdb/duckdb/.github/workflows/_extension_distribution.yml
-#
-# note: this workflow needs to be located in the extension repository, as it requires secrets to be passed to the
-# deploy script. However, it should generally not be necessary to modify this workflow in your extension repository, as
-# this workflow can be configured to use a custom deploy script.
-
-
-name: Extension Deployment
-on:
-  workflow_call:
-    inputs:
-      # The name of the extension
-      extension_name:
-        required: true
-        type: string
-      # DuckDB version to build against
-      duckdb_version:
-        required: true
-        type: string
-      # ';' separated list of architectures to exclude, for example: 'linux_amd64;osx_arm64'
-      exclude_archs:
-        required: false
-        type: string
-        default: ""
-      # Whether to upload this deployment as the latest. This may overwrite a previous deployment.
-      deploy_latest:
-        required: false
-        type: boolean
-        default: false
-      # Whether to upload this deployment under a versioned path. These will not be deleted automatically
-      deploy_versioned:
-        required: false
-        type: boolean
-        default: false
-      # Postfix added to artifact names. Can be used to guarantee unique names when this workflow is called multiple times
-      artifact_postfix:
-        required: false
-        type: string
-        default: ""
-      # Override the default deploy script with a custom script
-      deploy_script:
-        required: false
-        type: string
-        default: "./scripts/extension-upload.sh"
-      # Override the default matrix parse script with a custom script
-      matrix_parse_script:
-        required: false
-        type: string
-        default: "./duckdb/scripts/modify_distribution_matrix.py"
-
-jobs:
-  generate_matrix:
-    name: Generate matrix
-    runs-on: ubuntu-latest
-    outputs:
-      deploy_matrix: ${{ steps.parse-matrices.outputs.deploy_matrix }}
-    steps:
-      - uses: actions/checkout@v3
-        with:
-          fetch-depth: 0
-          submodules: 'true'
-
-      - name: Checkout DuckDB to version
-        run: |
-          cd duckdb
-          git checkout ${{ inputs.duckdb_version }}
-
-      - id: parse-matrices
-        run: |
-          python3 ${{ inputs.matrix_parse_script }} --input ./duckdb/.github/config/distribution_matrix.json --deploy_matrix --output deploy_matrix.json --exclude "${{ inputs.exclude_archs }}" --pretty
-          deploy_matrix="`cat deploy_matrix.json`"
-          echo deploy_matrix=$deploy_matrix >> $GITHUB_OUTPUT
-          echo `cat $GITHUB_OUTPUT`
-
-  deploy:
-    name: Deploy
-    runs-on: ubuntu-latest
-    needs: generate_matrix
-    if: ${{ needs.generate_matrix.outputs.deploy_matrix != '{}' && needs.generate_matrix.outputs.deploy_matrix != '' }}
-    strategy:
-      matrix: ${{fromJson(needs.generate_matrix.outputs.deploy_matrix)}}
-
-    steps:
-      - uses: actions/checkout@v3
-        with:
-          fetch-depth: 0
-          submodules: 'true'
-
-      - name: Checkout DuckDB to version
-        run: |
-          cd duckdb
-          git checkout ${{ inputs.duckdb_version }}
-
-      - uses: actions/download-artifact@v2
-        with:
-          name: ${{ inputs.extension_name }}-${{ inputs.duckdb_version }}-extension-${{matrix.duckdb_arch}}${{inputs.artifact_postfix}}${{startsWith(matrix.duckdb, 'wasm') && '.wasm' || ''}}
-          path: |
-            /tmp/extension
-
-      - name: Deploy
-        shell: bash
-        env:
-          AWS_ACCESS_KEY_ID: ${{ secrets.S3_DEPLOY_ID }}
-          AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_DEPLOY_KEY }}
-          AWS_DEFAULT_REGION: ${{ secrets.S3_REGION }}
-          BUCKET_NAME: ${{ secrets.S3_BUCKET }}
-          DUCKDB_EXTENSION_SIGNING_PK: ${{ secrets.S3_DUCKDB_ORG_EXTENSION_SIGNING_PK }}
-        run: |
-          pwd
-          python3 -m pip install pip awscli
-          git config --global --add safe.directory '*'
-          cd duckdb
-          git fetch --tags
-          export DUCKDB_VERSION=`git tag --points-at HEAD`
-          export DUCKDB_VERSION=${DUCKDB_VERSION:=`git log -1 --format=%h`}
-          cd ..
-          git fetch --tags
-          export EXT_VERSION=`git tag --points-at HEAD`
-          export EXT_VERSION=${EXT_VERSION:=`git log -1 --format=%h`}
-          ${{ inputs.deploy_script }} ${{ inputs.extension_name }} $EXT_VERSION $DUCKDB_VERSION ${{ matrix.duckdb_arch }} $BUCKET_NAME ${{inputs.deploy_latest || 'true' && 'false'}} ${{inputs.deploy_versioned || 'true' && 'false'}}