diff --git a/.github/actions/build_extensions/action.yml b/.github/actions/build_extensions/action.yml
index 30296866caa..4357cdf464d 100644
--- a/.github/actions/build_extensions/action.yml
+++ b/.github/actions/build_extensions/action.yml
@@ -116,17 +116,6 @@ runs:
ls -al
pwd
- - name: Fix for MSVC issue (see e.g. https://github.com/TileDB-Inc/TileDB/pull/4759)
- shell: bash
- if: inputs.deploy_as == 'windows_amd64'
- env:
- OVERLAY_TRIPLET_SRC: ${{ github.workspace }}/vcpkg/triplets/community/x64-windows-static-md.cmake
- OVERLAY_TRIPLET_DST: ${{ github.workspace }}/overlay_triplets/x64-windows-static-md.cmake
- run: |
- mkdir overlay_triplets
- cp $OVERLAY_TRIPLET_SRC $OVERLAY_TRIPLET_DST
- echo "set(VCPKG_PLATFORM_TOOLSET_VERSION "14.39")" >> $OVERLAY_TRIPLET_DST
-
- name: Set Openssl dir
if: inputs.openssl_path != ''
shell: bash
@@ -155,7 +144,6 @@ runs:
GEN: ${{ inputs.ninja == 1 && 'ninja' || '' }}
USE_MERGED_VCPKG_MANIFEST: 1
DUCKDB_PLATFORM: ${{ inputs.duckdb_arch }}
- VCPKG_OVERLAY_TRIPLETS: "${{ github.workspace }}/overlay_triplets"
run: |
ls
mkdir -p ~/.ssh
diff --git a/.github/config/bundled_extensions.cmake b/.github/config/bundled_extensions.cmake
index 12bd89a2820..dc9a0931078 100644
--- a/.github/config/bundled_extensions.cmake
+++ b/.github/config/bundled_extensions.cmake
@@ -25,5 +25,4 @@ duckdb_extension_load(autocomplete)
#
## Extensions that are not linked, but we do want to test them as part of the release build
#
-duckdb_extension_load(sqlsmith DONT_LINK)
duckdb_extension_load(tpcds DONT_LINK)
diff --git a/.github/config/in_tree_extensions.cmake b/.github/config/in_tree_extensions.cmake
index 151d62f8365..6b52aafe6d7 100644
--- a/.github/config/in_tree_extensions.cmake
+++ b/.github/config/in_tree_extensions.cmake
@@ -12,6 +12,5 @@ duckdb_extension_load(inet)
duckdb_extension_load(icu)
duckdb_extension_load(json)
duckdb_extension_load(parquet)
-duckdb_extension_load(sqlsmith)
duckdb_extension_load(tpcds)
duckdb_extension_load(tpch)
diff --git a/.github/config/out_of_tree_extensions.cmake b/.github/config/out_of_tree_extensions.cmake
index f9b21d795a6..9d22632ff77 100644
--- a/.github/config/out_of_tree_extensions.cmake
+++ b/.github/config/out_of_tree_extensions.cmake
@@ -100,12 +100,18 @@ duckdb_extension_load(sqlite_scanner
GIT_TAG 50b7870be099186f195bc72bac5e9e11247ee2f9
)
+duckdb_extension_load(sqlsmith
+ GIT_URL https://github.com/duckdb/duckdb_sqlsmith
+ GIT_TAG 721460ff1f31ce1dc1e4a9c4a55c0faf0b466dcb
+ )
+
################# SUBSTRAIT
if (NOT WIN32)
duckdb_extension_load(substrait
LOAD_TESTS DONT_LINK
GIT_URL https://github.com/duckdb/substrait
GIT_TAG 237931391ebc7e6aee7aa81052fa1411f6c4128e
+ APPLY_PATCHES
)
endif()
diff --git a/.github/patches/extensions/substrait/pushdown_semi_anti.patch b/.github/patches/extensions/substrait/pushdown_semi_anti.patch
new file mode 100644
index 00000000000..6c4affbd966
--- /dev/null
+++ b/.github/patches/extensions/substrait/pushdown_semi_anti.patch
@@ -0,0 +1,17 @@
+diff --git a/src/to_substrait.cpp b/src/to_substrait.cpp
+index 90f7a67..f252aa7 100644
+--- a/src/to_substrait.cpp
++++ b/src/to_substrait.cpp
+@@ -864,7 +864,11 @@ substrait::Rel *DuckDBToSubstrait::TransformComparisonJoin(LogicalOperator &dop)
+ auto left_col_count = dop.children[0]->types.size();
+ if (dop.children[0]->type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN) {
+ auto child_join = (LogicalComparisonJoin *)dop.children[0].get();
+- left_col_count = child_join->left_projection_map.size() + child_join->right_projection_map.size();
++ if (child_join->join_type != JoinType::SEMI && child_join->join_type != JoinType::ANTI) {
++ left_col_count = child_join->left_projection_map.size() + child_join->right_projection_map.size();
++ } else {
++ left_col_count = child_join->left_projection_map.size();
++ }
+ }
+ sjoin->set_allocated_expression(
+ CreateConjunction(djoin.conditions, [&](JoinCondition &in) { return TransformJoinCond(in, left_col_count); }));
diff --git a/.github/regression/micro_extended.csv b/.github/regression/micro_extended.csv
index 7a711a978a8..b87e107bdf7 100644
--- a/.github/regression/micro_extended.csv
+++ b/.github/regression/micro_extended.csv
@@ -129,6 +129,7 @@ benchmark/micro/join/asof_join.benchmark
benchmark/micro/join/asof_join_small_probe.benchmark
benchmark/micro/join/blockwise_nl_join.benchmark
benchmark/micro/join/delim_join_no_blowup.benchmark
+benchmark/micro/join/hashjoin_dups_rhs.benchmark
benchmark/micro/join/hashjoin_highcardinality.benchmark
benchmark/micro/join/hashjoin_lhsarithmetic.benchmark
benchmark/micro/join/iejoin_employees.benchmark
diff --git a/.github/workflows/LinuxRelease.yml b/.github/workflows/LinuxRelease.yml
index 1ab848d53ff..27579b67ad8 100644
--- a/.github/workflows/LinuxRelease.yml
+++ b/.github/workflows/LinuxRelease.yml
@@ -167,6 +167,10 @@ jobs:
ccache: 1
aarch64_cross_compile: 1
+ - name: Checkout (again)
+ shell: bash
+ run: git checkout ${{ inputs.git_ref }}
+
- name: Install unixODBC
shell: bash
run: | # we need an x86 odbc_config tool to run cmake. fun.
@@ -221,6 +225,10 @@ jobs:
openssl: 1
ccache: 1
+ - name: Checkout (again)
+ shell: bash
+ run: git checkout ${{ inputs.git_ref }}
+
- uses: ./.github/actions/build_extensions
with:
vcpkg_target_triplet: x64-linux
@@ -261,6 +269,10 @@ jobs:
aarch64_cross_compile: 1
ccache: 1
+ - name: Checkout (again)
+ shell: bash
+ run: git checkout ${{ inputs.git_ref }}
+
- uses: ./.github/actions/build_extensions
with:
vcpkg_target_triplet: arm64-linux
diff --git a/.github/workflows/OnTag.yml b/.github/workflows/OnTag.yml
index a88b6eb3df6..b94e7a05bda 100644
--- a/.github/workflows/OnTag.yml
+++ b/.github/workflows/OnTag.yml
@@ -13,10 +13,10 @@ jobs:
uses: ./.github/workflows/TwineUpload.yml
secrets: inherit
with:
- override_git_describe: ${{ inputs.override_git_describe || github.event.release.tag_name }}
+ override_git_describe: ${{ inputs.override_git_describe || github.ref_name }}
staged_upload:
uses: ./.github/workflows/StagedUpload.yml
secrets: inherit
with:
- override_git_describe: ${{ inputs.override_git_describe || github.event.release.tag_name }}
+ override_git_describe: ${{ inputs.override_git_describe || github.ref_name }}
diff --git a/.github/workflows/Windows.yml b/.github/workflows/Windows.yml
index 664d209a847..c34e53ca8e9 100644
--- a/.github/workflows/Windows.yml
+++ b/.github/workflows/Windows.yml
@@ -60,6 +60,7 @@ jobs:
- uses: actions/checkout@v3
with:
fetch-depth: 0
+ ref: ${{ inputs.git_ref }}
- uses: actions/setup-python@v5
with:
@@ -189,6 +190,7 @@ jobs:
- uses: actions/checkout@v3
with:
fetch-depth: 0
+ ref: ${{ inputs.git_ref }}
- uses: actions/setup-python@v5
with:
@@ -227,6 +229,8 @@ jobs:
needs: win-release-64
steps:
- uses: actions/checkout@v3
+ with:
+ ref: ${{ inputs.git_ref }}
- uses: msys2/setup-msys2@v2
with:
msystem: MINGW64
@@ -276,6 +280,7 @@ jobs:
- uses: actions/checkout@v3
with:
fetch-depth: 0
+ ref: ${{ inputs.git_ref }}
- uses: actions/setup-python@v5
with:
diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml
index bfb79998329..93a37770c52 100644
--- a/.github/workflows/coverity.yml
+++ b/.github/workflows/coverity.yml
@@ -50,7 +50,6 @@ jobs:
BUILD_HTTPFS: 1
BUILD_JSON: 1
BUILD_INET: 1
- BUILD_SQLSMITH: 1
- name: Upload the result
run: |
diff --git a/.sanitizer-thread-suppressions.txt b/.sanitizer-thread-suppressions.txt
index 3ff0939b380..5d3c828970b 100644
--- a/.sanitizer-thread-suppressions.txt
+++ b/.sanitizer-thread-suppressions.txt
@@ -1,5 +1,6 @@
deadlock:InitializeIndexes
race:NextInnerJoin
+race:NextRightSemiOrAntiJoin
race:duckdb_moodycamel
race:duckdb_jemalloc
race:AddToEvictionQueue
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ec66b571939..fa0f5eeb3dc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -269,7 +269,7 @@ if (OVERRIDE_GIT_DESCRIBE)
if (GIT_RESULT)
message(WARNING "git is available (at ${GIT_EXECUTABLE}) but has failed to execute 'log -1 --format=%h'. Consider providing explicit GIT_COMMIT_HASH")
set(GIT_DESCRIBE "${OVERRIDE_GIT_DESCRIBE}-0-g0123456789")
- endif()
+ endif()
else()
set(GIT_DESCRIBE "${OVERRIDE_GIT_DESCRIBE}-0-g0123456789")
endif()
@@ -289,7 +289,7 @@ else()
if (GIT_RESULT)
message(WARNING "git is available (at ${GIT_EXECUTABLE}) but has failed to execute 'log -1 --format=%h'. Consider providing explicit GIT_COMMIT_HASH or OVERRIDE_GIT_DESCRIBE")
set(GIT_COMMIT_HASH "0123456789")
- endif()
+ endif()
endif()
execute_process(
COMMAND ${GIT_EXECUTABLE} describe --tags --long
@@ -499,6 +499,37 @@ else()
endif()
endif()
+function(is_number input_string return_var)
+ if("${input_string}" MATCHES "^[0-9]+$")
+ set(${return_var} TRUE PARENT_SCOPE)
+ else()
+ set(${return_var} FALSE PARENT_SCOPE)
+ endif()
+endfunction()
+
+set(STANDARD_VECTOR_SIZE "" CACHE STRING "Set a custom STANDARD_VECTOR_SIZE at compile time")
+set(BLOCK_ALLOC_SIZE "" CACHE STRING "Set a custom BLOCK_ALLOC_SIZE at compile time")
+
+if(DEFINED STANDARD_VECTOR_SIZE AND NOT STANDARD_VECTOR_SIZE STREQUAL "")
+ is_number(${STANDARD_VECTOR_SIZE} is_number_result)
+ if(is_number_result)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSTANDARD_VECTOR_SIZE=${STANDARD_VECTOR_SIZE}")
+ message(STATUS "STANDARD_VECTOR_SIZE is set to ${STANDARD_VECTOR_SIZE}")
+ else()
+ message(FATAL_ERROR "STANDARD_VECTOR_SIZE must be a number, not ${STANDARD_VECTOR_SIZE}")
+ endif()
+endif()
+
+if(DEFINED BLOCK_ALLOC_SIZE AND NOT BLOCK_ALLOC_SIZE STREQUAL "")
+ is_number(${BLOCK_ALLOC_SIZE} is_number_result)
+ if(is_number_result)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDUCKDB_BLOCK_ALLOC_SIZE=${BLOCK_ALLOC_SIZE}")
+ message(STATUS "BLOCK_ALLOC_SIZE is set to ${BLOCK_ALLOC_SIZE}")
+ else()
+ message(FATAL_ERROR "BLOCK_ALLOC_SIZE must be a number, not ${BLOCK_ALLOC_SIZE}")
+ endif()
+endif()
+
if(CUSTOM_LINKER)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=${CUSTOM_LINKER}")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=${CUSTOM_LINKER}")
@@ -537,7 +568,7 @@ if(NOT MSVC)
endif()
else()
set(CMAKE_CXX_WINDOWS_FLAGS
- "/wd4244 /wd4267 /wd4200 /wd26451 /wd26495 /D_CRT_SECURE_NO_WARNINGS /utf-8")
+ "/wd4244 /wd4267 /wd4200 /wd26451 /wd26495 /D_CRT_SECURE_NO_WARNINGS /utf-8 /D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR")
if(TREAT_WARNINGS_AS_ERRORS)
set(CMAKE_CXX_WINDOWS_FLAGS "${CMAKE_CXX_WINDOWS_FLAGS} /WX")
endif()
@@ -1260,7 +1291,7 @@ if(BUILD_PYTHON)
DUCKDB_BINARY_DIR=${PROJECT_BINARY_DIR}
DUCKDB_COMPILE_FLAGS=${ALL_COMPILE_FLAGS}
DUCKDB_LIBS="${duckdb_libs}"
- )
+ )
if(PYTHON_EDITABLE_BUILD)
set(PIP_COMMAND ${PIP_COMMAND} python3 -m pip install --editable .)
diff --git a/Makefile b/Makefile
index 542e17d8dca..2769f00b396 100644
--- a/Makefile
+++ b/Makefile
@@ -131,9 +131,6 @@ endif
ifeq (${STATIC_OPENSSL}, 1)
CMAKE_VARS:=${CMAKE_VARS} -DOPENSSL_USE_STATIC_LIBS=1
endif
-ifeq (${BUILD_SQLSMITH}, 1)
- BUILD_EXTENSIONS:=${BUILD_EXTENSIONS};sqlsmith
-endif
ifeq (${BUILD_TPCE}, 1)
CMAKE_VARS:=${CMAKE_VARS} -DBUILD_TPCE=1
endif
@@ -238,6 +235,14 @@ ifdef DEBUG_STACKTRACE
CMAKE_VARS:=${CMAKE_VARS} -DDEBUG_STACKTRACE=1
endif
+# Optional overrides
+ifneq (${STANDARD_VECTOR_SIZE}, )
+ CMAKE_VARS:=${CMAKE_VARS} -DSTANDARD_VECTOR_SIZE=${STANDARD_VECTOR_SIZE}
+endif
+ifneq (${BLOCK_ALLOC_SIZE}, )
+ CMAKE_VARS:=${CMAKE_VARS} -DBLOCK_ALLOC_SIZE=${BLOCK_ALLOC_SIZE}
+endif
+
# Enable VCPKG for this build
ifneq ("${VCPKG_TOOLCHAIN_PATH}", "")
CMAKE_VARS_BUILD:=${CMAKE_VARS_BUILD} -DCMAKE_TOOLCHAIN_FILE='${VCPKG_TOOLCHAIN_PATH}' -DVCPKG_BUILD=1
diff --git a/README.md b/README.md
index 008093b62cb..d40913d46ca 100644
--- a/README.md
+++ b/README.md
@@ -14,12 +14,19 @@
## DuckDB
-DuckDB is a high-performance analytical database system. It is designed to be fast, reliable, portable, and easy to use. DuckDB provides a rich SQL dialect, with support far beyond basic SQL. DuckDB supports arbitrary and nested correlated subqueries, window functions, collations, complex types (arrays, structs), and more. For more information on using DuckDB, please refer to the [DuckDB documentation](https://duckdb.org/docs/).
+
+DuckDB is a high-performance analytical database system. It is designed to be fast, reliable, portable, and easy to use. DuckDB provides a rich SQL dialect, with support far beyond basic SQL. DuckDB supports arbitrary and nested correlated subqueries, window functions, collations, complex types (arrays, structs, maps), and [several extensions designed to make SQL easier to use](https://duckdb.org/docs/guides/sql_features/friendly_sql).
+
+DuckDB is available as a [standalone CLI application](https://duckdb.org/docs/api/cli/overview) and has clients for [Python](https://duckdb.org/docs/api/python/overview), [R](https://duckdb.org/docs/api/r), [Java](https://duckdb.org/docs/api/java), [Wasm](https://duckdb.org/docs/api/wasm/overview), etc., with deep integrations with packages such as [pandas](https://duckdb.org/docs/guides/python/sql_on_pandas) and [dplyr](https://duckdblabs.github.io/duckplyr/).
+
+For more information on using DuckDB, please refer to the [DuckDB documentation](https://duckdb.org/docs/).
## Installation
-If you want to install and use DuckDB, please see [our website](https://www.duckdb.org) for installation and usage instructions.
+
+If you want to install DuckDB, please see [our installation page](https://www.duckdb.org/docs/installation) for instructions.
## Data Import
+
For CSV files and Parquet files, data import is as simple as referencing the file in the FROM clause:
```sql
@@ -30,12 +37,15 @@ SELECT * FROM 'myfile.parquet';
Refer to our [Data Import](https://duckdb.org/docs/data/overview) section for more information.
## SQL Reference
-The [website](https://duckdb.org/docs/sql/introduction) contains a reference of functions and SQL constructs available in DuckDB.
+
+The documentation contains a [SQL introduction and reference](https://duckdb.org/docs/sql/introduction).
## Development
+
For development, DuckDB requires [CMake](https://cmake.org), Python3 and a `C++11` compliant compiler. Run `make` in the root directory to compile the sources. For development, use `make debug` to build a non-optimized debug version. You should run `make unit` and `make allunit` to verify that your version works properly after making changes. To test performance, you can run `BUILD_BENCHMARK=1 BUILD_TPCH=1 make` and then perform several standard benchmarks from the root directory by executing `./build/release/benchmark/benchmark_runner`. The details of benchmarks are in our [Benchmark Guide](benchmark/README.md).
Please also refer to our [Build Guide](https://duckdb.org/dev/building) and [Contribution Guide](CONTRIBUTING.md).
## Support
+
See the [Support Options](https://duckdblabs.com/support/) page.
diff --git a/benchmark/benchmark_runner.cpp b/benchmark/benchmark_runner.cpp
index 04549b0c3ab..65f5b9af787 100644
--- a/benchmark/benchmark_runner.cpp
+++ b/benchmark/benchmark_runner.cpp
@@ -59,10 +59,12 @@ atomic is_active;
atomic timeout;
void sleep_thread(Benchmark *benchmark, BenchmarkRunner *runner, BenchmarkState *state, bool hotrun,
- int timeout_duration) {
- if (timeout_duration < 0) {
+ const optional_idx &optional_timeout) {
+ if (!optional_timeout.IsValid()) {
return;
}
+ auto timeout_duration = optional_timeout.GetIndex();
+
// timeout is given in seconds
// we wait 10ms per iteration, so timeout * 100 gives us the amount of
// iterations
@@ -130,7 +132,8 @@ void BenchmarkRunner::RunBenchmark(Benchmark *benchmark) {
}
is_active = true;
timeout = false;
- std::thread interrupt_thread(sleep_thread, benchmark, this, state.get(), hotrun, benchmark->Timeout());
+ std::thread interrupt_thread(sleep_thread, benchmark, this, state.get(), hotrun,
+ benchmark->Timeout(configuration));
profiler.Start();
benchmark->Run(state.get());
@@ -183,6 +186,8 @@ void print_help() {
fprintf(stderr, " --query Prints query of the benchmark\n");
fprintf(stderr, " --root-dir Sets the root directory for where to store temp data and "
"look for the 'benchmarks' directory\n");
+ fprintf(stderr, " --disable-timeout Disables killing the run after a certain amount of time has "
+ "passed (30 seconds by default)\n");
fprintf(stderr,
" [name_pattern] Run only the benchmark which names match the specified name pattern, "
"e.g., DS.* for TPC-DS benchmarks\n");
@@ -253,6 +258,8 @@ void parse_arguments(const int arg_counter, char const *const *arg_values) {
} else if (arg == "--query") {
// write group of benchmark
instance.configuration.meta = BenchmarkMetaType::QUERY;
+ } else if (arg == "--disable-timeout") {
+ instance.configuration.timeout_duration = optional_idx();
} else if (StringUtil::StartsWith(arg, "--out=") || StringUtil::StartsWith(arg, "--log=")) {
auto splits = StringUtil::Split(arg, '=');
if (splits.size() != 2) {
diff --git a/benchmark/include/benchmark.hpp b/benchmark/include/benchmark.hpp
index b1d8efe660c..ece2dda3d3d 100644
--- a/benchmark/include/benchmark.hpp
+++ b/benchmark/include/benchmark.hpp
@@ -29,8 +29,6 @@ struct BenchmarkState {
//! new benchmarks
class Benchmark {
constexpr static size_t DEFAULT_NRUNS = 5;
- constexpr static size_t DEFAULT_TIMEOUT = 30;
-
Benchmark(Benchmark &) = delete;
public:
@@ -87,8 +85,8 @@ class Benchmark {
return DEFAULT_NRUNS;
}
//! The timeout for this benchmark (in seconds)
- virtual size_t Timeout() {
- return DEFAULT_TIMEOUT;
+ virtual optional_idx Timeout(const BenchmarkConfiguration &config) {
+ return config.timeout_duration;
}
};
diff --git a/benchmark/include/benchmark_configuration.hpp b/benchmark/include/benchmark_configuration.hpp
index f8ed43de8d1..02faf77c5ca 100644
--- a/benchmark/include/benchmark_configuration.hpp
+++ b/benchmark/include/benchmark_configuration.hpp
@@ -12,6 +12,7 @@
#include "duckdb/common/string.hpp"
#include "duckdb/common/vector.hpp"
#include "duckdb/common/helper.hpp"
+#include "duckdb/common/optional_idx.hpp"
namespace duckdb {
@@ -19,9 +20,14 @@ enum class BenchmarkMetaType { NONE, INFO, QUERY };
enum class BenchmarkProfileInfo { NONE, NORMAL, DETAILED };
struct BenchmarkConfiguration {
+public:
+ constexpr static size_t DEFAULT_TIMEOUT = 30;
+
+public:
string name_pattern {};
BenchmarkMetaType meta = BenchmarkMetaType::NONE;
BenchmarkProfileInfo profile_info = BenchmarkProfileInfo::NONE;
+ optional_idx timeout_duration = optional_idx(DEFAULT_TIMEOUT);
};
} // namespace duckdb
diff --git a/benchmark/micro/append_mix.cpp b/benchmark/micro/append_mix.cpp
index 13446f79499..33801e9f657 100644
--- a/benchmark/micro/append_mix.cpp
+++ b/benchmark/micro/append_mix.cpp
@@ -62,7 +62,7 @@ using namespace duckdb;
string BenchmarkInfo() override { \
return "Append 10M rows to a table using an Appender"; \
} \
- size_t Timeout() override { \
+ optional_idx Timeout(const BenchmarkConfiguration &config) override { \
return 600; \
}
diff --git a/benchmark/micro/join/hashjoin_dups_rhs.benchmark b/benchmark/micro/join/hashjoin_dups_rhs.benchmark
new file mode 100644
index 00000000000..9b9ef9cdc49
--- /dev/null
+++ b/benchmark/micro/join/hashjoin_dups_rhs.benchmark
@@ -0,0 +1,16 @@
+# name: benchmark/micro/join/hashjoin_dups_rhs.benchmark
+# description: Inner hash join using string comparisons with 4x duplicates on the rhs and 4096x duplicates on the lhs
+# group: [join]
+
+name Inner Join (dups on rhs)
+group join
+
+load
+create table t1 as select 'verylargestring' || range % 32768 i from range(131072);
+create table t2 as select 'verylargestring' || range % 32768 i from range(134217728);
+
+run
+select count(*) from t1 join t2 using (i)
+
+result I
+536870912
\ No newline at end of file
diff --git a/data/json/12188.ndjson b/data/json/12188.ndjson
new file mode 100644
index 00000000000..2c5b5e06f9b
--- /dev/null
+++ b/data/json/12188.ndjson
@@ -0,0 +1,2 @@
+{"field1": "value1", "field2": {"subfield1": "subvalue1"}}
+{"field1": "value2", "field2": {"subfield2": "subvalue2"}}
diff --git a/data/parquet-testing/parquet_with_json.parquet b/data/parquet-testing/parquet_with_json.parquet
new file mode 100644
index 00000000000..e37dfa0337f
Binary files /dev/null and b/data/parquet-testing/parquet_with_json.parquet differ
diff --git a/extension/delta/CMakeLists.txt b/extension/delta/CMakeLists.txt
new file mode 100644
index 00000000000..c2fa100c362
--- /dev/null
+++ b/extension/delta/CMakeLists.txt
@@ -0,0 +1,131 @@
+cmake_minimum_required(VERSION 2.8.12)
+include(ExternalProject)
+
+# Core config
+set(TARGET_NAME delta)
+
+set(EXTENSION_NAME ${TARGET_NAME}_extension)
+set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension)
+
+project(${TARGET_NAME})
+
+include_directories(src/include)
+
+set(EXTENSION_SOURCES src/delta_extension.cpp src/delta_functions.cpp
+ src/delta_utils.cpp src/functions/delta_scan.cpp)
+
+# Custom config TODO: figure out if we really need this?
+if(APPLE)
+ set(PLATFORM_LIBS
+ m
+ c
+ System
+ resolv
+ "-framework Corefoundation -framework SystemConfiguration -framework Security"
+ )
+elseif(UNIX)
+ set(PLATFORM_LIBS m c resolv)
+elseif(WIN32)
+ set(PLATFORM_LIBS ws2_32 userenv advapi32)
+else()
+ message(STATUS "UNKNOWN OS")
+endif()
+
+# Setup delta-kernel-rs dependency
+set(KERNEL_NAME delta_kernel)
+
+# Set default ExternalProject root directory
+set_directory_properties(PROPERTIES EP_PREFIX ${CMAKE_BINARY_DIR}/rust)
+
+# Propagate arch to rust build for CI
+set(RUST_PLATFORM_TARGET "")
+if("${OS_NAME}" STREQUAL "linux")
+ if("${OS_ARCH}" STREQUAL "arm64")
+ set(RUST_PLATFORM_TARGET "aarch64-unknown-linux-gnu")
+ else()
+ set(RUST_PLATFORM_TARGET "x86_64-unknown-linux-gnu")
+ endif()
+elseif("${OS_NAME}" STREQUAL "osx")
+ # TODO: clean up upstream; we are not correctly setting OS_ARCH for cross
+ # compile
+ if("${OSX_BUILD_ARCH}" STREQUAL "arm64")
+ set(RUST_PLATFORM_TARGET "aarch64-apple-darwin")
+ elseif("${OSX_BUILD_ARCH}" STREQUAL "x86_64")
+ set(RUST_PLATFORM_TARGET "x86_64-apple-darwin")
+ elseif("${OS_ARCH}" STREQUAL "arm64")
+ set(RUST_PLATFORM_TARGET "aarch64-apple-darwin")
+ else()
+ set(RUST_PLATFORM_TARGET "x86_64-apple-darwin")
+ endif()
+endif()
+
+# Add rust_example as a CMake target
+ExternalProject_Add(
+ ${KERNEL_NAME}
+ GIT_REPOSITORY "https://github.com/delta-incubator/delta-kernel-rs"
+ GIT_TAG 08f0764a00e89f42136fd478823d28278adc7ee8
+ CONFIGURE_COMMAND ""
+ UPDATE_COMMAND ""
+ BUILD_IN_SOURCE 1
+ # Build debug build
+ BUILD_COMMAND cargo build --package delta_kernel_ffi --workspace
+ --all-features --target=${RUST_PLATFORM_TARGET}
+ # Build release build
+ COMMAND cargo build --package delta_kernel_ffi --workspace --all-features
+ --release --target=${RUST_PLATFORM_TARGET}
+ # Build DATs
+ COMMAND
+ cargo build
+ --manifest-path=${CMAKE_BINARY_DIR}/rust/src/delta_kernel/acceptance/Cargo.toml
+ BUILD_BYPRODUCTS
+ "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/debug/libdelta_kernel_ffi.a"
+ BUILD_BYPRODUCTS
+ "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/release/libdelta_kernel_ffi.a"
+ BUILD_BYPRODUCTS
+ "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers/delta_kernel_ffi.h"
+ BUILD_BYPRODUCTS
+ "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers/delta_kernel_ffi.hpp"
+ INSTALL_COMMAND ""
+ LOG_BUILD ON)
+
+build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES})
+build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES})
+
+include_directories(
+ ${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers)
+include_directories(
+ ${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers)
+
+# Hides annoying linker warnings
+set(CMAKE_OSX_DEPLOYMENT_TARGET
+ 13.3
+ CACHE STRING "Minimum OS X deployment version" FORCE)
+
+# Add the default client
+add_compile_definitions(DEFINE_DEFAULT_ENGINE)
+
+# Link delta-kernal-rs to static lib
+target_link_libraries(
+ ${EXTENSION_NAME}
+ debug
+ "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/debug/libdelta_kernel_ffi.a"
+ optimized
+ "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/release/libdelta_kernel_ffi.a"
+ ${PLATFORM_LIBS})
+add_dependencies(${EXTENSION_NAME} delta_kernel)
+
+# Link delta-kernal-rs to dynamic lib
+target_link_libraries(
+ ${LOADABLE_EXTENSION_NAME}
+ debug
+ "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/debug/libdelta_kernel_ffi.a"
+ optimized
+ "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/release/libdelta_kernel_ffi.a"
+ ${PLATFORM_LIBS})
+add_dependencies(${LOADABLE_EXTENSION_NAME} delta_kernel)
+
+install(
+ TARGETS ${EXTENSION_NAME}
+ EXPORT "${DUCKDB_EXPORT_SET}"
+ LIBRARY DESTINATION "${INSTALL_LIB_DIR}"
+ ARCHIVE DESTINATION "${INSTALL_LIB_DIR}")
diff --git a/extension/delta/Makefile b/extension/delta/Makefile
new file mode 100644
index 00000000000..05db9579f22
--- /dev/null
+++ b/extension/delta/Makefile
@@ -0,0 +1,25 @@
+PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
+
+# Configuration of extension
+EXT_NAME=deltatable
+EXT_CONFIG=${PROJ_DIR}extension_config.cmake
+
+# Set test paths
+test_release: export DELTA_KERNEL_TESTS_PATH=./build/release/rust/src/delta_kernel/kernel/tests/data
+test_release: export DAT_PATH=./build/release/rust/src/delta_kernel/acceptance/tests/dat
+
+test_debug: export DELTA_KERNEL_TESTS_PATH=./build/debug/rust/src/delta_kernel/kernel/tests/data
+test_debug: export DAT_PATH=./build/debug/rust/src/delta_kernel/acceptance/tests/dat
+
+# Include the Makefile from extension-ci-tools
+include extension-ci-tools/makefiles/duckdb_extension.Makefile
+
+reldebug:
+ mkdir -p build/reldebug && \
+ cmake $(GENERATOR) $(BUILD_FLAGS) $(EXT_RELEASE_FLAGS) -DCMAKE_BUILD_TYPE=RelWithDebInfo -S ./duckdb/ -B build/reldebug && \
+ cmake --build build/reldebug --config RelWithDebInfo
+
+# Generate some test data to test with
+generate-data:
+ python3 -m pip install delta-spark duckdb pandas deltalake pyspark delta
+ python3 scripts/generate_test_data.py
diff --git a/extension/delta/README.md b/extension/delta/README.md
new file mode 100644
index 00000000000..910c002b7dd
--- /dev/null
+++ b/extension/delta/README.md
@@ -0,0 +1,71 @@
+# DuckDB Delta Extension
+This is the experimental DuckDB extension for [Delta](https://delta.io/). It is built using the (also experimental)
+[Delta Kernel](https://github.com/delta-incubator/delta-kernel-rs). The extension (currently) offers **read** support for delta
+tables, both local and remote.
+
+# Supported platforms
+The supported platforms are:
+- `linux_amd64` and `linux_amd64_gcc4`
+- `osx_amd64` and `osx_arm64`
+
+Support for the [other](https://duckdb.org/docs/extensions/working_with_extensions#platforms) DuckDB platforms is
+work-in-progress
+
+# How to use
+**NOTE: this extension requires the DuckDB v0.10.3 or higher**
+
+This extension is distributed as a binary extension. To use it, simply use one of its functions from DuckDB and the extension will be autoloaded:
+```SQL
+FROM delta_scan('s3://some/delta/table');
+```
+
+Note that using DuckDB [Secrets](https://duckdb.org/docs/configuration/secrets_manager.html) for S3 authentication is supported:
+
+```SQL
+CREATE SECRET (TYPE S3, provider credential_chain);
+FROM delta_scan('s3://some/delta/table/with/auth');
+```
+
+To scan a local table, use the full path prefixes with `file://`
+```SQL
+FROM delta_scan('file:///some/path/on/local/machine');
+```
+
+# Features
+While still experimental, many (scanning) features/optimizations are already supported in this extension as it reuses most of DuckDB's
+regular parquet scanning logic:
+- multithreaded scans and parquet metadata reading
+- data skipping/filter pushdown
+ - skipping row-groups in file (based on parquet metadata)
+ - skipping complete files (based on delta partition info)
+- projection pushdown
+- scanning tables with deletion vectors
+- all primitive types
+- structs
+- S3 support with secrets
+
+More features coming soon!
+
+# Building
+See the [Extension Template](https://github.com/duckdb/extension-template) for generic build instructions
+
+# Running tests
+There are various tests available for the delta extension:
+1. Delta Acceptence Test (DAT) based tests in `/test/sql/dat`
+2. delta-kernel-rs based tests in `/test/sql/delta_kernel_rs`
+3. Generated data based tests in `tests/sql/generated` (generated using [delta-rs](https://delta-io.github.io/delta-rs/), [PySpark](https://spark.apache.org/docs/latest/api/python/index.html), and DuckDB)
+
+To run the first 2 sets of tests:
+```shell
+make test_debug
+```
+or in release mode
+```shell
+make test
+```
+
+To also run the tests on generated data:
+```shell
+make generate-data
+GENERATED_DATA_AVAILABLE=1 make test
+```
\ No newline at end of file
diff --git a/extension/delta/extension_config.cmake b/extension/delta/extension_config.cmake
new file mode 100644
index 00000000000..46e7a27b769
--- /dev/null
+++ b/extension/delta/extension_config.cmake
@@ -0,0 +1,14 @@
+# This file is included by DuckDB's build system. It specifies which extension to load
+
+# Extension from this repo
+duckdb_extension_load(delta
+ SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}
+ LOAD_TESTS
+)
+
+# Build the httpfs extension to test with s3/http
+duckdb_extension_load(httpfs)
+
+# Build the tpch and tpcds extension for testing/benchmarking
+duckdb_extension_load(tpch)
+duckdb_extension_load(tpcds)
diff --git a/extension/delta/src/delta_extension.cpp b/extension/delta/src/delta_extension.cpp
new file mode 100644
index 00000000000..e1f464a6b05
--- /dev/null
+++ b/extension/delta/src/delta_extension.cpp
@@ -0,0 +1,43 @@
+#define DUCKDB_EXTENSION_MAIN
+
+#include "delta_extension.hpp"
+#include "delta_functions.hpp"
+
+#include "duckdb.hpp"
+#include "duckdb/common/exception.hpp"
+#include "duckdb/main/extension_util.hpp"
+
+namespace duckdb {
+
+static void LoadInternal(DatabaseInstance &instance) {
+ // Load functions
+ for (const auto &function : DeltaFunctions::GetTableFunctions(instance)) {
+ ExtensionUtil::RegisterFunction(instance, function);
+ }
+}
+
+void DeltaExtension::Load(DuckDB &db) {
+ LoadInternal(*db.instance);
+}
+
+std::string DeltaExtension::Name() {
+ return "delta";
+}
+
+} // namespace duckdb
+
+extern "C" {
+
+DUCKDB_EXTENSION_API void delta_init(duckdb::DatabaseInstance &db) {
+ duckdb::DuckDB db_wrapper(db);
+ db_wrapper.LoadExtension();
+}
+
+DUCKDB_EXTENSION_API const char *delta_version() {
+ return duckdb::DuckDB::LibraryVersion();
+}
+}
+
+#ifndef DUCKDB_EXTENSION_MAIN
+#error DUCKDB_EXTENSION_MAIN not defined
+#endif
diff --git a/extension/delta/src/delta_functions.cpp b/extension/delta/src/delta_functions.cpp
new file mode 100644
index 00000000000..da80b05b4f3
--- /dev/null
+++ b/extension/delta/src/delta_functions.cpp
@@ -0,0 +1,17 @@
+#include "delta_functions.hpp"
+
+#include "duckdb.hpp"
+#include "duckdb/main/extension_util.hpp"
+#include
+
+namespace duckdb {
+
+vector DeltaFunctions::GetTableFunctions(DatabaseInstance &instance) {
+ vector functions;
+
+ functions.push_back(GetDeltaScanFunction(instance));
+
+ return functions;
+}
+
+}; // namespace duckdb
diff --git a/extension/delta/src/delta_utils.cpp b/extension/delta/src/delta_utils.cpp
new file mode 100644
index 00000000000..5f909f19f79
--- /dev/null
+++ b/extension/delta/src/delta_utils.cpp
@@ -0,0 +1,322 @@
+#include "delta_utils.hpp"
+
+#include "duckdb.hpp"
+#include "duckdb/main/extension_util.hpp"
+#include
+
+namespace duckdb {
+
+unique_ptr SchemaVisitor::VisitSnapshotSchema(ffi::SharedSnapshot *snapshot) {
+ SchemaVisitor state;
+ ffi::EngineSchemaVisitor visitor;
+
+ visitor.data = &state;
+ visitor.make_field_list = (uintptr_t(*)(void *, uintptr_t)) & MakeFieldList;
+ visitor.visit_struct = (void (*)(void *, uintptr_t, ffi::KernelStringSlice, uintptr_t)) & VisitStruct;
+ visitor.visit_array = (void (*)(void *, uintptr_t, ffi::KernelStringSlice, bool, uintptr_t)) & VisitArray;
+ visitor.visit_map = (void (*)(void *, uintptr_t, ffi::KernelStringSlice, bool, uintptr_t)) & VisitMap;
+ visitor.visit_decimal = (void (*)(void *, uintptr_t, ffi::KernelStringSlice, uint8_t, uint8_t)) & VisitDecimal;
+ visitor.visit_string = VisitSimpleType();
+ visitor.visit_long = VisitSimpleType();
+ visitor.visit_integer = VisitSimpleType();
+ visitor.visit_short = VisitSimpleType();
+ visitor.visit_byte = VisitSimpleType();
+ visitor.visit_float = VisitSimpleType();
+ visitor.visit_double = VisitSimpleType();
+ visitor.visit_boolean = VisitSimpleType();
+ visitor.visit_binary = VisitSimpleType();
+ visitor.visit_date = VisitSimpleType();
+ visitor.visit_timestamp = VisitSimpleType();
+ visitor.visit_timestamp_ntz = VisitSimpleType();
+
+ uintptr_t result = visit_schema(snapshot, &visitor);
+ return state.TakeFieldList(result);
+}
+
+void SchemaVisitor::VisitDecimal(SchemaVisitor *state, uintptr_t sibling_list_id, ffi::KernelStringSlice name,
+ uint8_t precision, uint8_t scale) {
+ state->AppendToList(sibling_list_id, name, LogicalType::DECIMAL(precision, scale));
+}
+
+uintptr_t SchemaVisitor::MakeFieldList(SchemaVisitor *state, uintptr_t capacity_hint) {
+ return state->MakeFieldListImpl(capacity_hint);
+}
+
+void SchemaVisitor::VisitStruct(SchemaVisitor *state, uintptr_t sibling_list_id, ffi::KernelStringSlice name,
+ uintptr_t child_list_id) {
+ auto children = state->TakeFieldList(child_list_id);
+ state->AppendToList(sibling_list_id, name, LogicalType::STRUCT(std::move(*children)));
+}
+
+void SchemaVisitor::VisitArray(SchemaVisitor *state, uintptr_t sibling_list_id, ffi::KernelStringSlice name,
+ bool contains_null, uintptr_t child_list_id) {
+ auto children = state->TakeFieldList(child_list_id);
+
+ D_ASSERT(children->size() == 1);
+ state->AppendToList(sibling_list_id, name, LogicalType::LIST(children->front().second));
+}
+
+void SchemaVisitor::VisitMap(SchemaVisitor *state, uintptr_t sibling_list_id, ffi::KernelStringSlice name,
+ bool contains_null, uintptr_t child_list_id) {
+ auto children = state->TakeFieldList(child_list_id);
+
+ D_ASSERT(children->size() == 2);
+ state->AppendToList(sibling_list_id, name, LogicalType::MAP(LogicalType::STRUCT(std::move(*children))));
+}
+
+uintptr_t SchemaVisitor::MakeFieldListImpl(uintptr_t capacity_hint) {
+ uintptr_t id = next_id++;
+ auto list = make_uniq();
+ if (capacity_hint > 0) {
+ list->reserve(capacity_hint);
+ }
+ inflight_lists.emplace(id, std::move(list));
+ return id;
+}
+
+void SchemaVisitor::AppendToList(uintptr_t id, ffi::KernelStringSlice name, LogicalType &&child) {
+ auto it = inflight_lists.find(id);
+ if (it == inflight_lists.end()) {
+ // TODO... some error...
+ throw InternalException("WEIRD SHIT");
+ } else {
+ it->second->emplace_back(std::make_pair(string(name.ptr, name.len), std::move(child)));
+ }
+}
+
+unique_ptr SchemaVisitor::TakeFieldList(uintptr_t id) {
+ auto it = inflight_lists.find(id);
+ if (it == inflight_lists.end()) {
+ // TODO: Raise some kind of error.
+ throw InternalException("WEIRD SHIT 2");
+ }
+ auto rval = std::move(it->second);
+ inflight_lists.erase(it);
+ return rval;
+}
+
+ffi::EngineError *DuckDBEngineError::AllocateError(ffi::KernelError etype, ffi::KernelStringSlice msg) {
+ auto error = new DuckDBEngineError;
+ error->etype = etype;
+ error->error_message = string(msg.ptr, msg.len);
+ return error;
+}
+
+string DuckDBEngineError::KernelErrorEnumToString(ffi::KernelError err) {
+ const char *KERNEL_ERROR_ENUM_STRINGS[] = {
+ "UnknownError",
+ "FFIError",
+ "ArrowError",
+ "EngineDataTypeError",
+ "ExtractError",
+ "GenericError",
+ "IOErrorError",
+ "ParquetError",
+ "ObjectStoreError",
+ "ObjectStorePathError",
+ "Reqwest",
+ "FileNotFoundError",
+ "MissingColumnError",
+ "UnexpectedColumnTypeError",
+ "MissingDataError",
+ "MissingVersionError",
+ "DeletionVectorError",
+ "InvalidUrlError",
+ "MalformedJsonError",
+ "MissingMetadataError",
+ "MissingProtocolError",
+ "MissingMetadataAndProtocolError",
+ "ParseError",
+ "JoinFailureError",
+ "Utf8Error",
+ "ParseIntError",
+ "InvalidColumnMappingMode",
+ "InvalidTableLocation",
+ "InvalidDecimalError",
+ };
+
+ static_assert(sizeof(KERNEL_ERROR_ENUM_STRINGS) / sizeof(char *) - 1 == (int)ffi::KernelError::InvalidDecimalError,
+ "KernelErrorEnumStrings mismatched with kernel");
+
+ if ((int)err < sizeof(KERNEL_ERROR_ENUM_STRINGS) / sizeof(char *)) {
+ return KERNEL_ERROR_ENUM_STRINGS[(int)err];
+ }
+
+ return StringUtil::Format("EnumOutOfRange (enum val out of range: %d)", (int)err);
+}
+
+void DuckDBEngineError::Throw(string from_where) {
+ // Make copies before calling delete this
+ auto etype_copy = etype;
+ auto message_copy = error_message;
+
+ // Consume error by calling delete this (remember this error is created by kernel using AllocateError)
+ delete this;
+ throw IOException("Hit DeltaKernel FFI error (from: %s): Hit error: %u (%s) with message (%s)", from_where.c_str(),
+ etype_copy, KernelErrorEnumToString(etype_copy), message_copy);
+}
+
+ffi::KernelStringSlice KernelUtils::ToDeltaString(const string &str) {
+ return {str.data(), str.size()};
+}
+
+string KernelUtils::FromDeltaString(const struct ffi::KernelStringSlice slice) {
+ return {slice.ptr, slice.len};
+}
+
+vector KernelUtils::FromDeltaBoolSlice(const struct ffi::KernelBoolSlice slice) {
+ vector result;
+ result.assign(slice.ptr, slice.ptr + slice.len);
+ return result;
+}
+
+PredicateVisitor::PredicateVisitor(const vector &column_names, optional_ptr filters)
+ : EnginePredicate {.predicate = this,
+ .visitor = (uintptr_t(*)(void *, ffi::KernelExpressionVisitorState *)) & VisitPredicate} {
+ if (filters) {
+ for (auto &filter : filters->filters) {
+ column_filters[column_names[filter.first]] = filter.second.get();
+ }
+ }
+}
+
+// Template wrapper function that implements get_next for EngineIteratorFromCallable.
+template
+static auto GetNextFromCallable(Callable *callable) -> decltype(std::declval()()) {
+ return callable->operator()();
+}
+
+// Wraps a callable object (e.g. C++11 lambda) as an EngineIterator.
+template
+ffi::EngineIterator EngineIteratorFromCallable(Callable &callable) {
+ auto *get_next = &GetNextFromCallable;
+ return {.data = &callable, .get_next = (const void *(*)(void *))get_next};
+};
+
+// Helper function to prevent pushing down filters kernel cant handle
+// TODO: remove once kernel handles this properly?
+static bool CanHandleFilter(TableFilter *filter) {
+ switch (filter->filter_type) {
+ case TableFilterType::CONSTANT_COMPARISON:
+ return true;
+ case TableFilterType::CONJUNCTION_AND: {
+ auto &conjunction = static_cast(*filter);
+ bool can_handle = true;
+ for (const auto &child : conjunction.child_filters) {
+ can_handle = can_handle && CanHandleFilter(child.get());
+ }
+ return can_handle;
+ }
+
+ default:
+ return false;
+ }
+}
+
+// Prunes the list of predicates to ones that we can handle
+static unordered_map PrunePredicates(unordered_map predicates) {
+ unordered_map result;
+ for (const auto &predicate : predicates) {
+ if (CanHandleFilter(predicate.second)) {
+ result[predicate.first] = predicate.second;
+ }
+ }
+ return result;
+}
+
+uintptr_t PredicateVisitor::VisitPredicate(PredicateVisitor *predicate, ffi::KernelExpressionVisitorState *state) {
+ auto filters = PrunePredicates(predicate->column_filters);
+
+ auto it = filters.begin();
+ auto end = filters.end();
+ auto get_next = [predicate, state, &it, &end]() -> uintptr_t {
+ if (it == end) {
+ return 0;
+ }
+ auto &filter = *it++;
+ return predicate->VisitFilter(filter.first, *filter.second, state);
+ };
+ auto eit = EngineIteratorFromCallable(get_next);
+
+ // TODO: this should be fixed upstream?
+ try {
+ return visit_expression_and(state, &eit);
+ } catch (...) {
+ return ~0;
+ }
+}
+
+uintptr_t PredicateVisitor::VisitConstantFilter(const string &col_name, const ConstantFilter &filter,
+ ffi::KernelExpressionVisitorState *state) {
+ auto maybe_left =
+ ffi::visit_expression_column(state, KernelUtils::ToDeltaString(col_name), DuckDBEngineError::AllocateError);
+ uintptr_t left = KernelUtils::UnpackResult(maybe_left, "VisitConstantFilter failed to visit_expression_column");
+
+ uintptr_t right = ~0;
+ auto &value = filter.constant;
+ switch (value.type().id()) {
+ case LogicalType::BIGINT:
+ right = visit_expression_literal_long(state, BigIntValue::Get(value));
+ break;
+
+ case LogicalType::VARCHAR: {
+ // WARNING: C++ lifetime extension rules don't protect calls of the form foo(std::string(...).c_str())
+ auto str = StringValue::Get(value);
+ auto maybe_right = ffi::visit_expression_literal_string(state, KernelUtils::ToDeltaString(col_name),
+ DuckDBEngineError::AllocateError);
+ right = KernelUtils::UnpackResult(maybe_right, "VisitConstantFilter failed to visit_expression_literal_string");
+ break;
+ }
+
+ default:
+ break; // unsupported type
+ }
+
+ // TODO support other comparison types?
+ switch (filter.comparison_type) {
+ case ExpressionType::COMPARE_LESSTHAN:
+ return visit_expression_lt(state, left, right);
+ case ExpressionType::COMPARE_LESSTHANOREQUALTO:
+ return visit_expression_le(state, left, right);
+ case ExpressionType::COMPARE_GREATERTHAN:
+ return visit_expression_gt(state, left, right);
+ case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
+ return visit_expression_ge(state, left, right);
+ case ExpressionType::COMPARE_EQUAL:
+ return visit_expression_eq(state, left, right);
+
+ default:
+ std::cout << " Unsupported operation: " << (int)filter.comparison_type << std::endl;
+ return ~0; // Unsupported operation
+ }
+}
+
+uintptr_t PredicateVisitor::VisitAndFilter(const string &col_name, const ConjunctionAndFilter &filter,
+ ffi::KernelExpressionVisitorState *state) {
+ auto it = filter.child_filters.begin();
+ auto end = filter.child_filters.end();
+ auto get_next = [this, col_name, state, &it, &end]() -> uintptr_t {
+ if (it == end) {
+ return 0;
+ }
+ auto &child_filter = *it++;
+ return VisitFilter(col_name, *child_filter, state);
+ };
+ auto eit = EngineIteratorFromCallable(get_next);
+ return visit_expression_and(state, &eit);
+}
+
+uintptr_t PredicateVisitor::VisitFilter(const string &col_name, const TableFilter &filter,
+ ffi::KernelExpressionVisitorState *state) {
+ switch (filter.filter_type) {
+ case TableFilterType::CONSTANT_COMPARISON:
+ return VisitConstantFilter(col_name, static_cast(filter), state);
+ case TableFilterType::CONJUNCTION_AND:
+ return VisitAndFilter(col_name, static_cast(filter), state);
+ default:
+ throw NotImplementedException("Attempted to push down unimplemented filter type: '%s'",
+ EnumUtil::ToString(filter.filter_type));
+ }
+}
+
+}; // namespace duckdb
diff --git a/extension/delta/src/functions/delta_scan.cpp b/extension/delta/src/functions/delta_scan.cpp
new file mode 100644
index 00000000000..a01bd5f4c42
--- /dev/null
+++ b/extension/delta/src/functions/delta_scan.cpp
@@ -0,0 +1,628 @@
+#include "duckdb/function/table_function.hpp"
+
+#include "delta_functions.hpp"
+#include "functions/delta_scan.hpp"
+#include "duckdb/optimizer/filter_combiner.hpp"
+#include "duckdb/planner/operator/logical_get.hpp"
+#include "duckdb/main/extension_util.hpp"
+#include "duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp"
+#include "duckdb/common/local_file_system.hpp"
+#include "duckdb/common/types/data_chunk.hpp"
+#include "duckdb/parser/expression/constant_expression.hpp"
+#include "duckdb/parser/expression/function_expression.hpp"
+#include "duckdb/parser/parsed_expression.hpp"
+#include "duckdb/execution/expression_executor.hpp"
+#include "duckdb/planner/binder.hpp"
+#include "duckdb/main/secret/secret_manager.hpp"
+
+#include
+#include
+
+namespace duckdb {
+
+static void *allocate_string(const struct ffi::KernelStringSlice slice) {
+ return new string(slice.ptr, slice.len);
+}
+
+static void visit_callback(ffi::NullableCvoid engine_context, struct ffi::KernelStringSlice path, int64_t size,
+ const ffi::DvInfo *dv_info, const struct ffi::CStringMap *partition_values) {
+ auto context = (DeltaSnapshot *)engine_context;
+ auto path_string = context->GetPath();
+ StringUtil::RTrim(path_string, "/");
+ path_string += "/" + KernelUtils::FromDeltaString(path);
+
+ // First we append the file to our resolved files
+ context->resolved_files.push_back(DeltaSnapshot::ToDuckDBPath(path_string));
+ context->metadata.emplace_back(make_uniq());
+
+ D_ASSERT(context->resolved_files.size() == context->metadata.size());
+
+ // Initialize the file metadata
+ context->metadata.back()->delta_snapshot_version = context->version;
+ context->metadata.back()->file_number = context->resolved_files.size() - 1;
+
+ // Fetch the deletion vector
+ auto selection_vector_res =
+ ffi::selection_vector_from_dv(dv_info, context->extern_engine.get(), context->global_state.get());
+ auto selection_vector =
+ KernelUtils::UnpackResult(selection_vector_res, "selection_vector_from_dv for path " + context->GetPath());
+ if (selection_vector.ptr) {
+ context->metadata.back()->selection_vector = selection_vector;
+ }
+
+ // Lookup all columns for potential hits in the constant map
+ case_insensitive_map_t constant_map;
+ for (const auto &col : context->names) {
+ auto key = KernelUtils::ToDeltaString(col);
+ auto *partition_val = (string *)ffi::get_from_map(partition_values, key, allocate_string);
+ if (partition_val) {
+ constant_map[col] = *partition_val;
+ delete partition_val;
+ }
+ }
+ context->metadata.back()->partition_map = std::move(constant_map);
+}
+
+static void visit_data(void *engine_context, ffi::EngineData *engine_data,
+ const struct ffi::KernelBoolSlice selection_vec) {
+ ffi::visit_scan_data(engine_data, selection_vec, engine_context, visit_callback);
+}
+
+static ffi::EngineBuilder *CreateBuilder(ClientContext &context, const string &path) {
+ ffi::EngineBuilder *builder;
+
+ // For "regular" paths we early out with the default builder config
+ if (!StringUtil::StartsWith(path, "s3://")) {
+ auto interface_builder_res =
+ ffi::get_engine_builder(KernelUtils::ToDeltaString(path), DuckDBEngineError::AllocateError);
+ return KernelUtils::UnpackResult(interface_builder_res, "get_engine_interface_builder for path " + path);
+ }
+
+ auto end_of_container = path.find('/', 5);
+
+ if (end_of_container == string::npos) {
+ throw IOException("Invalid s3 url passed to delta scan: %s", path);
+ }
+ auto bucket = path.substr(5, end_of_container - 5);
+ auto path_in_bucket = path.substr(end_of_container);
+
+ auto interface_builder_res =
+ ffi::get_engine_builder(KernelUtils::ToDeltaString(path), DuckDBEngineError::AllocateError);
+ builder = KernelUtils::UnpackResult(interface_builder_res, "get_engine_interface_builder for path " + path);
+
+ // For S3 paths we need to trim the url, set the container, and fetch a potential secret
+ auto &secret_manager = SecretManager::Get(context);
+ auto transaction = CatalogTransaction::GetSystemCatalogTransaction(context);
+
+ auto secret_match = secret_manager.LookupSecret(transaction, path, "s3");
+
+ // No secret: nothing left to do here!
+ if (!secret_match.HasMatch()) {
+ return builder;
+ }
+ const auto &kv_secret = dynamic_cast(*secret_match.secret_entry->secret);
+
+ auto key_id = kv_secret.TryGetValue("key_id").ToString();
+ auto secret = kv_secret.TryGetValue("secret").ToString();
+ auto session_token = kv_secret.TryGetValue("session_token").ToString();
+ auto region = kv_secret.TryGetValue("region").ToString();
+
+ if (key_id.empty() && secret.empty()) {
+ ffi::set_builder_option(builder, KernelUtils::ToDeltaString("skip_signature"),
+ KernelUtils::ToDeltaString("true"));
+ }
+
+ if (!key_id.empty()) {
+ ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_access_key_id"),
+ KernelUtils::ToDeltaString(key_id));
+ }
+ if (!secret.empty()) {
+ ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_secret_access_key"),
+ KernelUtils::ToDeltaString(secret));
+ }
+ if (!session_token.empty()) {
+ ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_session_token"),
+ KernelUtils::ToDeltaString(session_token));
+ }
+ ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_region"), KernelUtils::ToDeltaString(region));
+
+ return builder;
+}
+
+DeltaSnapshot::DeltaSnapshot(ClientContext &context_p, const string &path)
+ : MultiFileList({ToDeltaPath(path)}, FileGlobOptions::ALLOW_EMPTY), context(context_p) {
+}
+
+string DeltaSnapshot::GetPath() {
+ return GetPaths()[0];
+}
+
+string DeltaSnapshot::ToDuckDBPath(const string &raw_path) {
+ if (StringUtil::StartsWith(raw_path, "file://")) {
+ return raw_path.substr(7);
+ }
+ return raw_path;
+}
+
+string DeltaSnapshot::ToDeltaPath(const string &raw_path) {
+ string path;
+ if (StringUtil::StartsWith(raw_path, "./")) {
+ LocalFileSystem fs;
+ path = fs.JoinPath(fs.GetWorkingDirectory(), raw_path.substr(2));
+ path = "file://" + path;
+ } else {
+ path = raw_path;
+ }
+
+ // Paths always end in a slash (kernel likes it that way for now)
+ if (path[path.size() - 1] != '/') {
+ path = path + '/';
+ }
+
+ return path;
+}
+
+void DeltaSnapshot::Bind(vector &return_types, vector &names) {
+ if (!initialized) {
+ InitializeFiles();
+ }
+ auto schema = SchemaVisitor::VisitSnapshotSchema(snapshot.get());
+ for (const auto &field : *schema) {
+ names.push_back(field.first);
+ return_types.push_back(field.second);
+ }
+ // Store the bound names for resolving the complex filter pushdown later
+ this->names = names;
+}
+
+string DeltaSnapshot::GetFile(idx_t i) {
+ if (!initialized) {
+ InitializeFiles();
+ }
+ // We already have this file
+ if (i < resolved_files.size()) {
+ return resolved_files[i];
+ }
+
+ if (files_exhausted) {
+ return "";
+ }
+
+ while (i >= resolved_files.size()) {
+ auto have_scan_data_res = ffi::kernel_scan_data_next(scan_data_iterator.get(), this, visit_data);
+
+ auto have_scan_data = TryUnpackKernelResult(have_scan_data_res);
+
+ // kernel has indicated that we have no more data to scan
+ if (!have_scan_data) {
+ files_exhausted = true;
+ return "";
+ }
+ }
+
+ // The kernel scan visitor should have resolved a file OR returned
+ if (i >= resolved_files.size()) {
+ throw IOException("Delta Kernel seems to have failed to resolve a new file");
+ }
+
+ return resolved_files[i];
+}
+
+void DeltaSnapshot::InitializeFiles() {
+ auto path_slice = KernelUtils::ToDeltaString(paths[0]);
+
+ // Register engine
+ auto interface_builder = CreateBuilder(context, paths[0]);
+ extern_engine = TryUnpackKernelResult(ffi::builder_build(interface_builder));
+
+ // Initialize Snapshot
+ snapshot = TryUnpackKernelResult(ffi::snapshot(path_slice, extern_engine.get()));
+
+ // Create Scan
+ PredicateVisitor visitor(names, &table_filters);
+ scan = TryUnpackKernelResult(ffi::scan(snapshot.get(), extern_engine.get(), &visitor));
+
+ // Create GlobalState
+ global_state = ffi::get_global_scan_state(scan.get());
+
+ // Set version
+ this->version = ffi::version(snapshot.get());
+
+ // Create scan data iterator
+ scan_data_iterator = TryUnpackKernelResult(ffi::kernel_scan_data_init(extern_engine.get(), scan.get()));
+
+ initialized = true;
+}
+
+unique_ptr DeltaSnapshot::ComplexFilterPushdown(ClientContext &context,
+ const MultiFileReaderOptions &options, LogicalGet &get,
+ vector> &filters) {
+ FilterCombiner combiner(context);
+ for (const auto &filter : filters) {
+ combiner.AddFilter(filter->Copy());
+ }
+ auto filterstmp = combiner.GenerateTableScanFilters(get.column_ids);
+
+ // TODO: can/should we figure out if this filtered anything?
+ auto filtered_list = make_uniq(context, paths[0]);
+ filtered_list->table_filters = std::move(filterstmp);
+ filtered_list->names = names;
+
+ return std::move(filtered_list);
+}
+
+vector DeltaSnapshot::GetAllFiles() {
+ idx_t i = resolved_files.size();
+ // TODO: this can probably be improved
+ while (!GetFile(i).empty()) {
+ i++;
+ }
+ return resolved_files;
+}
+
+FileExpandResult DeltaSnapshot::GetExpandResult() {
+ // GetFile(1) will ensure at least the first 2 files are expanded if they are available
+ GetFile(1);
+
+ if (resolved_files.size() > 1) {
+ return FileExpandResult::MULTIPLE_FILES;
+ } else if (resolved_files.size() == 1) {
+ return FileExpandResult::SINGLE_FILE;
+ }
+
+ return FileExpandResult::NO_FILES;
+}
+
+idx_t DeltaSnapshot::GetTotalFileCount() {
+ // TODO: this can probably be improved
+ idx_t i = resolved_files.size();
+ while (!GetFile(i).empty()) {
+ i++;
+ }
+ return resolved_files.size();
+}
+
+unique_ptr DeltaMultiFileReader::CreateInstance() {
+ return std::move(make_uniq());
+}
+
+bool DeltaMultiFileReader::Bind(MultiFileReaderOptions &options, MultiFileList &files,
+ vector &return_types, vector &names,
+ MultiFileReaderBindData &bind_data) {
+ auto &delta_snapshot = dynamic_cast(files);
+
+ delta_snapshot.Bind(return_types, names);
+
+ // We need to parse this option
+ bool file_row_number_enabled = options.custom_options.find("file_row_number") != options.custom_options.end();
+ if (file_row_number_enabled) {
+ bind_data.file_row_number_idx = names.size();
+ return_types.emplace_back(LogicalType::BIGINT);
+ names.emplace_back("file_row_number");
+ } else {
+ // TODO: this is a bogus ID? Change for flag indicating it should be enabled?
+ bind_data.file_row_number_idx = names.size();
+ }
+
+ return true;
+};
+
+void DeltaMultiFileReader::BindOptions(MultiFileReaderOptions &options, MultiFileList &files,
+ vector &return_types, vector &names,
+ MultiFileReaderBindData &bind_data) {
+
+ // Disable all other multifilereader options
+ options.auto_detect_hive_partitioning = false;
+ options.hive_partitioning = false;
+ options.union_by_name = false;
+
+ MultiFileReader::BindOptions(options, files, return_types, names, bind_data);
+
+ auto demo_gen_col_opt = options.custom_options.find("delta_file_number");
+ if (demo_gen_col_opt != options.custom_options.end()) {
+ if (demo_gen_col_opt->second.GetValue()) {
+ names.push_back("delta_file_number");
+ return_types.push_back(LogicalType::UBIGINT);
+ }
+ }
+}
+
+void DeltaMultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_options,
+ const MultiFileReaderBindData &options, const string &filename,
+ const vector &local_names, const vector &global_types,
+ const vector &global_names, const vector &global_column_ids,
+ MultiFileReaderData &reader_data, ClientContext &context,
+ optional_ptr global_state) {
+ MultiFileReader::FinalizeBind(file_options, options, filename, local_names, global_types, global_names,
+ global_column_ids, reader_data, context, global_state);
+
+ // Handle custom delta option set in MultiFileReaderOptions::custom_options
+ auto file_number_opt = file_options.custom_options.find("delta_file_number");
+ if (file_number_opt != file_options.custom_options.end()) {
+ if (file_number_opt->second.GetValue()) {
+ D_ASSERT(global_state);
+ auto &delta_global_state = global_state->Cast();
+ D_ASSERT(delta_global_state.delta_file_number_idx != DConstants::INVALID_INDEX);
+
+ // We add the constant column for the delta_file_number option
+ // NOTE: we add a placeholder here, to demonstrate how we can also populate extra columns in the
+ // FinalizeChunk
+ reader_data.constant_map.emplace_back(delta_global_state.delta_file_number_idx, Value::UBIGINT(0));
+ }
+ }
+
+ // Get the metadata for this file
+ D_ASSERT(global_state->file_list);
+ const auto &snapshot = dynamic_cast(*global_state->file_list);
+ auto &file_metadata = snapshot.metadata[reader_data.file_list_idx.GetIndex()];
+
+ if (!file_metadata->partition_map.empty()) {
+ for (idx_t i = 0; i < global_column_ids.size(); i++) {
+ column_t col_id = global_column_ids[i];
+ auto col_partition_entry = file_metadata->partition_map.find(global_names[col_id]);
+ if (col_partition_entry != file_metadata->partition_map.end()) {
+ // Todo: use https://github.com/delta-io/delta/blob/master/PROTOCOL.md#partition-value-serialization
+ auto maybe_value = Value(col_partition_entry->second).DefaultCastAs(global_types[i]);
+ reader_data.constant_map.emplace_back(i, maybe_value);
+ }
+ }
+ }
+}
+
+unique_ptr DeltaMultiFileReader::CreateFileList(ClientContext &context, const vector &paths,
+ FileGlobOptions options) {
+ if (paths.size() != 1) {
+ throw BinderException("'delta_scan' only supports single path as input");
+ }
+
+ return make_uniq(context, paths[0]);
+}
+
+// Generate the correct Selection Vector Based on the Raw delta KernelBoolSlice dv and the row_id_column
+// TODO: this probably is slower than needed (we can do with less branches in the for loop for most cases)
+static SelectionVector DuckSVFromDeltaSV(const ffi::KernelBoolSlice &dv, Vector row_id_column, idx_t count,
+ idx_t &select_count) {
+ D_ASSERT(row_id_column.GetType() == LogicalType::BIGINT);
+
+ UnifiedVectorFormat data;
+ row_id_column.ToUnifiedFormat(count, data);
+ auto row_ids = UnifiedVectorFormat::GetData(data);
+
+ SelectionVector result {count};
+ idx_t current_select = 0;
+ for (idx_t i = 0; i < count; i++) {
+ auto row_id = row_ids[data.sel->get_index(i)];
+
+ // TODO: why are deletion vectors not spanning whole data?
+ if (row_id >= dv.len || dv.ptr[row_id]) {
+ result.data()[current_select] = i;
+ current_select++;
+ }
+ }
+
+ select_count = current_select;
+
+ return result;
+}
+
+// Parses the columns that are used by the delta extension into
+void DeltaMultiFileReaderGlobalState::SetColumnIdx(const string &column, idx_t idx) {
+ if (column == "file_row_number") {
+ file_row_number_idx = idx;
+ return;
+ } else if (column == "delta_file_number") {
+ delta_file_number_idx = idx;
+ return;
+ }
+ throw IOException("Unknown column '%s' found as required by the DeltaMultiFileReader");
+}
+
+unique_ptr DeltaMultiFileReader::InitializeGlobalState(
+ duckdb::ClientContext &context, const duckdb::MultiFileReaderOptions &file_options,
+ const duckdb::MultiFileReaderBindData &bind_data, const duckdb::MultiFileList &file_list,
+ const vector &global_types, const vector &global_names,
+ const vector &global_column_ids) {
+ vector extra_columns;
+ vector> mapped_columns;
+
+ // Create a map of the columns that are in the projection
+ case_insensitive_map_t selected_columns;
+ for (idx_t i = 0; i < global_column_ids.size(); i++) {
+ auto global_id = global_column_ids[i];
+ if (IsRowIdColumnId(global_id)) {
+ continue;
+ }
+
+ auto &global_name = global_names[global_id];
+ selected_columns.insert({global_name, i});
+ }
+
+ // TODO: only add file_row_number column if there are deletes
+ case_insensitive_map_t columns_to_map = {
+ {"file_row_number", LogicalType::BIGINT},
+ };
+
+ // Add the delta_file_number column to the columns to map
+ auto demo_gen_col_opt = file_options.custom_options.find("delta_file_number");
+ if (demo_gen_col_opt != file_options.custom_options.end()) {
+ if (demo_gen_col_opt->second.GetValue()) {
+ columns_to_map.insert({"delta_file_number", LogicalType::UBIGINT});
+ }
+ }
+
+ // Map every column to either a column in the projection, or add it to the extra columns if it doesn't exist
+ idx_t col_offset = 0;
+ for (const auto &required_column : columns_to_map) {
+ // First check if the column is in the projection
+ auto res = selected_columns.find(required_column.first);
+ if (res != selected_columns.end()) {
+ // The column is in the projection, no special handling is required; we simply store the index
+ mapped_columns.push_back({required_column.first, res->second});
+ continue;
+ }
+
+ // The column is NOT in the projection: it needs to be added as an extra_column
+
+ // Calculate the index of the added column (extra columns are added after all other columns)
+ idx_t current_col_idx = global_column_ids.size() + col_offset++;
+
+ // Add column to the map, to ensure the MultiFileReader can find it when processing the Chunk
+ mapped_columns.push_back({required_column.first, current_col_idx});
+
+ // Ensure the result DataChunk has a vector of the correct type to store this column
+ extra_columns.push_back(required_column.second);
+ }
+
+ auto res = make_uniq(extra_columns, &file_list);
+
+ // Parse all the mapped columns into the DeltaMultiFileReaderGlobalState for easy use;
+ for (const auto &mapped_column : mapped_columns) {
+ res->SetColumnIdx(mapped_column.first, mapped_column.second);
+ }
+
+ return std::move(res);
+}
+
+void DeltaMultiFileReader::CreateNameMapping(const string &file_name, const vector &local_types,
+ const vector &local_names, const vector &global_types,
+ const vector &global_names,
+ const vector &global_column_ids,
+ MultiFileReaderData &reader_data, const string &initial_file,
+ optional_ptr global_state) {
+ // First call the base implementation to do most mapping
+ MultiFileReader::CreateNameMapping(file_name, local_types, local_names, global_types, global_names,
+ global_column_ids, reader_data, initial_file, global_state);
+
+ // Then we handle delta specific mapping
+ D_ASSERT(global_state);
+ auto &delta_global_state = global_state->Cast();
+
+ // Check if the file_row_number column is an "extra_column" which is not part of the projection
+ if (delta_global_state.file_row_number_idx >= global_column_ids.size()) {
+ D_ASSERT(delta_global_state.file_row_number_idx != DConstants::INVALID_INDEX);
+
+ // Build the name map
+ case_insensitive_map_t name_map;
+ for (idx_t col_idx = 0; col_idx < local_names.size(); col_idx++) {
+ name_map[local_names[col_idx]] = col_idx;
+ }
+
+ // Lookup the required column in the local map
+ auto entry = name_map.find("file_row_number");
+ if (entry == name_map.end()) {
+ throw IOException("Failed to find the file_row_number column");
+ }
+
+ // Register the column to be scanned from this file
+ reader_data.column_ids.push_back(entry->second);
+ reader_data.column_mapping.push_back(delta_global_state.file_row_number_idx);
+ }
+
+ // This may have changed: update it
+ reader_data.empty_columns = reader_data.column_ids.empty();
+}
+
+void DeltaMultiFileReader::FinalizeChunk(ClientContext &context, const MultiFileReaderBindData &bind_data,
+ const MultiFileReaderData &reader_data, DataChunk &chunk,
+ optional_ptr global_state) {
+ // Base class finalization first
+ MultiFileReader::FinalizeChunk(context, bind_data, reader_data, chunk, global_state);
+
+ D_ASSERT(global_state);
+ auto &delta_global_state = global_state->Cast();
+ D_ASSERT(delta_global_state.file_list);
+
+ // Get the metadata for this file
+ const auto &snapshot = dynamic_cast(*global_state->file_list);
+ auto &metadata = snapshot.metadata[reader_data.file_list_idx.GetIndex()];
+
+ if (metadata->selection_vector.ptr && chunk.size() != 0) {
+ D_ASSERT(delta_global_state.file_row_number_idx != DConstants::INVALID_INDEX);
+ auto &file_row_number_column = chunk.data[delta_global_state.file_row_number_idx];
+
+ // Construct the selection vector using the file_row_number column and the raw selection vector from delta
+ idx_t select_count;
+ auto sv = DuckSVFromDeltaSV(metadata->selection_vector, file_row_number_column, chunk.size(), select_count);
+ chunk.Slice(sv, select_count);
+ }
+
+ // Note: this demo function shows how we can use DuckDB's Binder create expression-based generated columns
+ if (delta_global_state.delta_file_number_idx != DConstants::INVALID_INDEX) {
+ //! Create Dummy expression (0 + file_number)
+ vector> child_expr;
+ child_expr.push_back(make_uniq(Value::UBIGINT(0)));
+ child_expr.push_back(make_uniq(Value::UBIGINT(7)));
+ unique_ptr expr =
+ make_uniq("+", std::move(child_expr), nullptr, nullptr, false, true);
+
+ //! s dummy expression
+ auto binder = Binder::CreateBinder(context);
+ ExpressionBinder expr_binder(*binder, context);
+ auto bound_expr = expr_binder.Bind(expr, nullptr);
+
+ //! Execute dummy expression into result column
+ ExpressionExecutor expr_executor(context);
+ expr_executor.AddExpression(*bound_expr);
+
+ //! Execute the expression directly into the output Chunk
+ expr_executor.ExecuteExpression(chunk.data[delta_global_state.delta_file_number_idx]);
+ }
+};
+
+bool DeltaMultiFileReader::ParseOption(const string &key, const Value &val, MultiFileReaderOptions &options,
+ ClientContext &context) {
+ auto loption = StringUtil::Lower(key);
+
+ if (loption == "delta_file_number") {
+ options.custom_options[loption] = val;
+ return true;
+ }
+
+ // We need to capture this one to know whether to emit
+ if (loption == "file_row_number") {
+ options.custom_options[loption] = val;
+ return true;
+ }
+
+ return MultiFileReader::ParseOption(key, val, options, context);
+}
+//
+// DeltaMultiFileReaderBindData::DeltaMultiFileReaderBindData(DeltaSnapshot & delta_snapshot):
+// current_snapshot(delta_snapshot){
+//
+//}
+
+TableFunctionSet DeltaFunctions::GetDeltaScanFunction(DatabaseInstance &instance) {
+ // The delta_scan function is constructed by grabbing the parquet scan from the Catalog, then injecting the
+ // DeltaMultiFileReader into it to create a Delta-based multi file read
+
+ auto &parquet_scan = ExtensionUtil::GetTableFunction(instance, "parquet_scan");
+ auto parquet_scan_copy = parquet_scan.functions;
+
+ for (auto &function : parquet_scan_copy.functions) {
+ // Register the MultiFileReader as the driver for reads
+ function.get_multi_file_reader = DeltaMultiFileReader::CreateInstance;
+
+ // Unset all of these: they are either broken, very inefficient.
+ // TODO: implement/fix these
+ function.serialize = nullptr;
+ function.deserialize = nullptr;
+ function.statistics = nullptr;
+ function.table_scan_progress = nullptr;
+ function.cardinality = nullptr;
+ function.get_bind_info = nullptr;
+
+ // Schema param is just confusing here
+ function.named_parameters.erase("schema");
+
+ // Demonstration of a generated column based on information from DeltaSnapshot
+ function.named_parameters["delta_file_number"] = LogicalType::BOOLEAN;
+
+ function.name = "delta_scan";
+ }
+
+ parquet_scan_copy.name = "delta_scan";
+ return parquet_scan_copy;
+}
+
+} // namespace duckdb
diff --git a/extension/delta/src/include/delta_extension.hpp b/extension/delta/src/include/delta_extension.hpp
new file mode 100644
index 00000000000..d6b13f236b8
--- /dev/null
+++ b/extension/delta/src/include/delta_extension.hpp
@@ -0,0 +1,13 @@
+#pragma once
+
+#include "duckdb.hpp"
+
+namespace duckdb {
+
+class DeltaExtension : public Extension {
+public:
+ void Load(DuckDB &db) override;
+ std::string Name() override;
+};
+
+} // namespace duckdb
diff --git a/extension/delta/src/include/delta_functions.hpp b/extension/delta/src/include/delta_functions.hpp
new file mode 100644
index 00000000000..4f819cbfd63
--- /dev/null
+++ b/extension/delta/src/include/delta_functions.hpp
@@ -0,0 +1,22 @@
+//===----------------------------------------------------------------------===//
+// DuckDB
+//
+// delta_functions.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb/parser/parsed_data/create_table_function_info.hpp"
+
+namespace duckdb {
+
+class DeltaFunctions {
+public:
+ static vector GetTableFunctions(DatabaseInstance &instance);
+
+private:
+ static TableFunctionSet GetDeltaScanFunction(DatabaseInstance &instance);
+};
+} // namespace duckdb
diff --git a/extension/delta/src/include/delta_utils.hpp b/extension/delta/src/include/delta_utils.hpp
new file mode 100644
index 00000000000..4b478c998f6
--- /dev/null
+++ b/extension/delta/src/include/delta_utils.hpp
@@ -0,0 +1,155 @@
+#pragma once
+
+#include "delta_kernel_ffi.hpp"
+#include "duckdb/planner/filter/constant_filter.hpp"
+#include "duckdb/planner/filter/conjunction_filter.hpp"
+#include "duckdb/common/enum_util.hpp"
+#include
+
+// TODO: clean up this file as we go
+
+namespace duckdb {
+
+// SchemaVisitor is used to parse the schema of a Delta table from the Kernel
+class SchemaVisitor {
+public:
+ using FieldList = child_list_t;
+
+ static unique_ptr VisitSnapshotSchema(ffi::SharedSnapshot *snapshot);
+
+private:
+ unordered_map> inflight_lists;
+ uintptr_t next_id = 1;
+
+ typedef void(SimpleTypeVisitorFunction)(void *, uintptr_t, ffi::KernelStringSlice);
+
+ template
+ static SimpleTypeVisitorFunction *VisitSimpleType() {
+ return (SimpleTypeVisitorFunction *)&VisitSimpleTypeImpl;
+ }
+ template
+ static void VisitSimpleTypeImpl(SchemaVisitor *state, uintptr_t sibling_list_id, ffi::KernelStringSlice name) {
+ state->AppendToList(sibling_list_id, name, TypeId);
+ }
+
+ static void VisitDecimal(SchemaVisitor *state, uintptr_t sibling_list_id, ffi::KernelStringSlice name,
+ uint8_t precision, uint8_t scale);
+ static uintptr_t MakeFieldList(SchemaVisitor *state, uintptr_t capacity_hint);
+ static void VisitStruct(SchemaVisitor *state, uintptr_t sibling_list_id, ffi::KernelStringSlice name,
+ uintptr_t child_list_id);
+ static void VisitArray(SchemaVisitor *state, uintptr_t sibling_list_id, ffi::KernelStringSlice name,
+ bool contains_null, uintptr_t child_list_id);
+ static void VisitMap(SchemaVisitor *state, uintptr_t sibling_list_id, ffi::KernelStringSlice name,
+ bool contains_null, uintptr_t child_list_id);
+
+ uintptr_t MakeFieldListImpl(uintptr_t capacity_hint);
+ void AppendToList(uintptr_t id, ffi::KernelStringSlice name, LogicalType &&child);
+ unique_ptr TakeFieldList(uintptr_t id);
+};
+
+// Allocator for errors that the kernel might throw
+struct DuckDBEngineError : ffi::EngineError {
+ // Allocate a DuckDBEngineError, function ptr passed to kernel for error allocation
+ static ffi::EngineError *AllocateError(ffi::KernelError etype, ffi::KernelStringSlice msg);
+ // Convert a kernel error enum to a string
+ static string KernelErrorEnumToString(ffi::KernelError err);
+
+ // Throw the error as an IOException
+ [[noreturn]] void Throw(string from_info);
+
+ // The error message from Kernel
+ string error_message;
+};
+
+// RAII wrapper that returns ownership of a kernel pointer to kernel when it goes out of
+// scope. Similar to std::unique_ptr. but does not define operator->() and does not require the
+// kernel type to be complete.
+template
+struct UniqueKernelPointer {
+ UniqueKernelPointer() : ptr(nullptr), free(nullptr) {
+ }
+
+ // Takes ownership of a pointer with associated deleter.
+ UniqueKernelPointer(KernelType *ptr, void (*free)(KernelType *)) : ptr(ptr), free(free) {
+ }
+
+ // movable but not copyable
+ UniqueKernelPointer(UniqueKernelPointer &&other) : ptr(other.ptr) {
+ other.ptr = nullptr;
+ }
+ UniqueKernelPointer &operator=(UniqueKernelPointer &&other) {
+ std::swap(ptr, other.ptr);
+ std::swap(free, other.free);
+ return *this;
+ }
+ UniqueKernelPointer(const UniqueKernelPointer &) = delete;
+ UniqueKernelPointer &operator=(const UniqueKernelPointer &) = delete;
+
+ ~UniqueKernelPointer() {
+ if (ptr && free) {
+ free(ptr);
+ }
+ }
+
+ KernelType *get() const {
+ return ptr;
+ }
+
+private:
+ KernelType *ptr;
+ void (*free)(KernelType *) = nullptr;
+};
+
+// Syntactic sugar around the different kernel types
+template
+struct TemplatedUniqueKernelPointer : public UniqueKernelPointer {
+ TemplatedUniqueKernelPointer() : UniqueKernelPointer() {};
+ TemplatedUniqueKernelPointer(KernelType *ptr) : UniqueKernelPointer(ptr, DeleteFunction) {};
+};
+
+typedef TemplatedUniqueKernelPointer KernelSnapshot;
+typedef TemplatedUniqueKernelPointer KernelExternEngine;
+typedef TemplatedUniqueKernelPointer KernelScan;
+typedef TemplatedUniqueKernelPointer KernelGlobalScanState;
+typedef TemplatedUniqueKernelPointer KernelScanDataIterator;
+
+struct KernelUtils {
+ static ffi::KernelStringSlice ToDeltaString(const string &str);
+ static string FromDeltaString(const struct ffi::KernelStringSlice slice);
+ static vector FromDeltaBoolSlice(const struct ffi::KernelBoolSlice slice);
+
+ // TODO: all kernel results need to be unpacked, not doing so will result in an error. This should be cleaned up
+ template
+ static T UnpackResult(ffi::ExternResult result, const string &from_where) {
+ if (result.tag == ffi::ExternResult::Tag::Err) {
+ if (result.err._0) {
+ auto error_cast = static_cast(result.err._0);
+ error_cast->Throw(from_where);
+ } else {
+ throw IOException("Hit DeltaKernel FFI error (from: %s): Hit error, but error was nullptr",
+ from_where.c_str());
+ }
+ } else if (result.tag == ffi::ExternResult::Tag::Ok) {
+ return result.ok._0;
+ }
+ throw IOException("Invalid error ExternResult tag found!");
+ }
+};
+
+class PredicateVisitor : public ffi::EnginePredicate {
+public:
+ PredicateVisitor(const vector &column_names, optional_ptr filters);
+
+private:
+ unordered_map column_filters;
+
+ static uintptr_t VisitPredicate(PredicateVisitor *predicate, ffi::KernelExpressionVisitorState *state);
+
+ uintptr_t VisitConstantFilter(const string &col_name, const ConstantFilter &filter,
+ ffi::KernelExpressionVisitorState *state);
+ uintptr_t VisitAndFilter(const string &col_name, const ConjunctionAndFilter &filter,
+ ffi::KernelExpressionVisitorState *state);
+ uintptr_t VisitFilter(const string &col_name, const TableFilter &filter, ffi::KernelExpressionVisitorState *state);
+};
+
+} // namespace duckdb
diff --git a/extension/delta/src/include/functions/delta_scan.hpp b/extension/delta/src/include/functions/delta_scan.hpp
new file mode 100644
index 00000000000..32d681fa3db
--- /dev/null
+++ b/extension/delta/src/include/functions/delta_scan.hpp
@@ -0,0 +1,146 @@
+//===----------------------------------------------------------------------===//
+// DuckDB
+//
+// functions/delta_scan.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "delta_utils.hpp"
+#include "duckdb/common/multi_file_reader.hpp"
+
+namespace duckdb {
+
+struct DeltaFileMetaData {
+ DeltaFileMetaData() {};
+
+ // No copying pls
+ DeltaFileMetaData(const DeltaFileMetaData &) = delete;
+ DeltaFileMetaData &operator=(const DeltaFileMetaData &) = delete;
+
+ ~DeltaFileMetaData() {
+ if (selection_vector.ptr) {
+ ffi::drop_bool_slice(selection_vector);
+ }
+ }
+
+ idx_t delta_snapshot_version = DConstants::INVALID_INDEX;
+ idx_t file_number = DConstants::INVALID_INDEX;
+ ffi::KernelBoolSlice selection_vector = {nullptr, 0};
+ case_insensitive_map_t partition_map;
+};
+
+//! The DeltaSnapshot implements the MultiFileList API to allow injecting it into the regular DuckDB parquet scan
+struct DeltaSnapshot : public MultiFileList {
+ DeltaSnapshot(ClientContext &context, const string &path);
+ string GetPath();
+ static string ToDuckDBPath(const string &raw_path);
+ static string ToDeltaPath(const string &raw_path);
+
+ //! MultiFileList API
+public:
+ void Bind(vector &return_types, vector &names);
+ unique_ptr ComplexFilterPushdown(ClientContext &context, const MultiFileReaderOptions &options,
+ LogicalGet &get, vector> &filters) override;
+ vector GetAllFiles() override;
+ FileExpandResult GetExpandResult() override;
+ idx_t GetTotalFileCount() override;
+
+protected:
+ //! Get the i-th expanded file
+ string GetFile(idx_t i) override;
+
+protected:
+ // TODO: How to guarantee we only call this after the filter pushdown?
+ void InitializeFiles();
+
+ template
+ T TryUnpackKernelResult(ffi::ExternResult result) {
+ return KernelUtils::UnpackResult(
+ result, StringUtil::Format("While trying to read from delta table: '%s'", paths[0]));
+ }
+
+ // TODO: change back to protected
+public:
+ idx_t version;
+
+ //! Delta Kernel Structures
+ KernelSnapshot snapshot;
+ KernelExternEngine extern_engine;
+ KernelScan scan;
+ KernelGlobalScanState global_state;
+ KernelScanDataIterator scan_data_iterator;
+
+ //! Names
+ vector names;
+
+ //! Metadata map for files
+ vector> metadata;
+
+ //! Current file list resolution state
+ bool initialized = false;
+ bool files_exhausted = false;
+ vector resolved_files;
+ TableFilterSet table_filters;
+
+ ClientContext &context;
+};
+
+struct DeltaMultiFileReaderGlobalState : public MultiFileReaderGlobalState {
+ DeltaMultiFileReaderGlobalState(vector extra_columns_p, optional_ptr file_list_p)
+ : MultiFileReaderGlobalState(extra_columns_p, file_list_p) {
+ }
+ //! The idx of the file number column in the result chunk
+ idx_t delta_file_number_idx = DConstants::INVALID_INDEX;
+ //! The idx of the file_row_number column in the result chunk
+ idx_t file_row_number_idx = DConstants::INVALID_INDEX;
+
+ void SetColumnIdx(const string &column, idx_t idx);
+};
+
+struct DeltaMultiFileReader : public MultiFileReader {
+ static unique_ptr CreateInstance();
+ //! Return a DeltaSnapshot
+ unique_ptr CreateFileList(ClientContext &context, const vector &paths,
+ FileGlobOptions options) override;
+
+ //! Override the regular parquet bind using the MultiFileReader Bind. The bind from these are what DuckDB's file
+ //! readers will try read
+ bool Bind(MultiFileReaderOptions &options, MultiFileList &files, vector &return_types,
+ vector &names, MultiFileReaderBindData &bind_data) override;
+
+ //! Override the Options bind
+ void BindOptions(MultiFileReaderOptions &options, MultiFileList &files, vector &return_types,
+ vector &names, MultiFileReaderBindData &bind_data) override;
+
+ void CreateNameMapping(const string &file_name, const vector &local_types,
+ const vector &local_names, const vector &global_types,
+ const vector &global_names, const vector &global_column_ids,
+ MultiFileReaderData &reader_data, const string &initial_file,
+ optional_ptr global_state) override;
+
+ unique_ptr
+ InitializeGlobalState(ClientContext &context, const MultiFileReaderOptions &file_options,
+ const MultiFileReaderBindData &bind_data, const MultiFileList &file_list,
+ const vector &global_types, const vector &global_names,
+ const vector &global_column_ids) override;
+
+ void FinalizeBind(const MultiFileReaderOptions &file_options, const MultiFileReaderBindData &options,
+ const string &filename, const vector &local_names,
+ const vector &global_types, const vector &global_names,
+ const vector &global_column_ids, MultiFileReaderData &reader_data,
+ ClientContext &context, optional_ptr global_state) override;
+
+ //! Override the FinalizeChunk method
+ void FinalizeChunk(ClientContext &context, const MultiFileReaderBindData &bind_data,
+ const MultiFileReaderData &reader_data, DataChunk &chunk,
+ optional_ptr global_state) override;
+
+ //! Override the ParseOption call to parse delta_scan specific options
+ bool ParseOption(const string &key, const Value &val, MultiFileReaderOptions &options,
+ ClientContext &context) override;
+};
+
+} // namespace duckdb
diff --git a/extension/delta/vcpkg.json b/extension/delta/vcpkg.json
new file mode 100644
index 00000000000..85936bf44cc
--- /dev/null
+++ b/extension/delta/vcpkg.json
@@ -0,0 +1,5 @@
+{
+ "dependencies": [
+ "openssl"
+ ]
+}
\ No newline at end of file
diff --git a/extension/fts/fts_indexing.cpp b/extension/fts/fts_indexing.cpp
index 0673dcef0bd..c098f194901 100644
--- a/extension/fts/fts_indexing.cpp
+++ b/extension/fts/fts_indexing.cpp
@@ -191,7 +191,7 @@ static string IndexingScript(ClientContext &context, QualifiedName &qname, const
term_tf.termid,
tf,
df,
- (log(((SELECT num_docs FROM %fts_schema%.stats) - df + 0.5) / (df + 0.5))* ((tf * (k + 1)/(tf + k * (1 - b + b * (len / (SELECT avgdl FROM %fts_schema%.stats))))))) AS subscore
+ (log(((SELECT num_docs FROM %fts_schema%.stats) - df + 0.5) / (df + 0.5) + 1) * ((tf * (k + 1)/(tf + k * (1 - b + b * (len / (SELECT avgdl FROM %fts_schema%.stats))))))) AS subscore
FROM term_tf,
cdocs,
%fts_schema%.docs AS docs,
diff --git a/extension/httpfs/include/httpfs.hpp b/extension/httpfs/include/httpfs.hpp
index 1c49889b521..1ef1c11278a 100644
--- a/extension/httpfs/include/httpfs.hpp
+++ b/extension/httpfs/include/httpfs.hpp
@@ -10,7 +10,7 @@
namespace duckdb_httplib_openssl {
struct Response;
-struct Result;
+class Result;
class Client;
} // namespace duckdb_httplib_openssl
diff --git a/extension/icu/icu-strptime.cpp b/extension/icu/icu-strptime.cpp
index d54d15a3700..60ad0c0de46 100644
--- a/extension/icu/icu-strptime.cpp
+++ b/extension/icu/icu-strptime.cpp
@@ -158,7 +158,7 @@ struct ICUStrptime : public ICUDateFunc {
}
}
- static bind_scalar_function_t bind_strptime;
+ static bind_scalar_function_t bind_strptime; // NOLINT
static duckdb::unique_ptr StrpTimeBindFunction(ClientContext &context, ScalarFunction &bound_function,
vector> &arguments) {
@@ -194,7 +194,7 @@ struct ICUStrptime : public ICUDateFunc {
throw InvalidInputException("strptime format list must not be empty");
}
vector formats;
- bool has_tz = true;
+ bool has_tz = false;
for (const auto &child : children) {
format_string = child.ToString();
format.format_specifier = format_string;
@@ -341,7 +341,7 @@ struct ICUStrptime : public ICUDateFunc {
}
};
-bind_scalar_function_t ICUStrptime::bind_strptime = nullptr;
+bind_scalar_function_t ICUStrptime::bind_strptime = nullptr; // NOLINT
struct ICUStrftime : public ICUDateFunc {
static void ParseFormatSpecifier(string_t &format_str, StrfTimeFormat &format) {
diff --git a/extension/icu/icu-table-range.cpp b/extension/icu/icu-table-range.cpp
index 9f466a7d656..f7efd856ad9 100644
--- a/extension/icu/icu-table-range.cpp
+++ b/extension/icu/icu-table-range.cpp
@@ -13,14 +13,13 @@ namespace duckdb {
struct ICUTableRange {
using CalendarPtr = unique_ptr;
- struct BindData : public TableFunctionData {
- BindData(const BindData &other)
+ struct ICURangeBindData : public TableFunctionData {
+ ICURangeBindData(const ICURangeBindData &other)
: TableFunctionData(other), tz_setting(other.tz_setting), cal_setting(other.cal_setting),
- calendar(other.calendar->clone()), start(other.start), end(other.end), increment(other.increment),
- inclusive_bound(other.inclusive_bound), greater_than_check(other.greater_than_check) {
+ calendar(other.calendar->clone()) {
}
- explicit BindData(ClientContext &context) {
+ explicit ICURangeBindData(ClientContext &context) {
Value tz_value;
if (context.TryGetCurrentSetting("TimeZone", tz_value)) {
tz_setting = tz_value.ToString();
@@ -48,6 +47,15 @@ struct ICUTableRange {
string tz_setting;
string cal_setting;
CalendarPtr calendar;
+ };
+
+ struct ICURangeLocalState : public LocalTableFunctionState {
+ ICURangeLocalState() {
+ }
+
+ bool initialized_row = false;
+ idx_t current_input_row = 0;
+ timestamp_t current_state;
timestamp_t start;
timestamp_t end;
@@ -55,17 +63,6 @@ struct ICUTableRange {
bool inclusive_bound;
bool greater_than_check;
- bool Equals(const FunctionData &other_p) const override {
- auto &other = other_p.Cast();
- return other.start == start && other.end == end && other.increment == increment &&
- other.inclusive_bound == inclusive_bound && other.greater_than_check == greater_than_check &&
- *calendar == *other.calendar;
- }
-
- unique_ptr Copy() const override {
- return make_uniq(*this);
- }
-
bool Finished(timestamp_t current_value) const {
if (greater_than_check) {
if (inclusive_bound) {
@@ -84,107 +81,129 @@ struct ICUTableRange {
};
template
- static unique_ptr Bind(ClientContext &context, TableFunctionBindInput &input,
- vector &return_types, vector &names) {
- auto result = make_uniq(context);
-
- auto &inputs = input.inputs;
- D_ASSERT(inputs.size() == 3);
- for (const auto &value : inputs) {
- if (value.IsNull()) {
- throw BinderException("RANGE with NULL bounds is not supported");
+ static void GenerateRangeDateTimeParameters(DataChunk &input, idx_t row_id, ICURangeLocalState &result) {
+ input.Flatten();
+ for (idx_t c = 0; c < input.ColumnCount(); c++) {
+ if (FlatVector::IsNull(input.data[c], row_id)) {
+ result.start = timestamp_t(0);
+ result.end = timestamp_t(0);
+ result.increment = interval_t();
+ result.greater_than_check = true;
+ result.inclusive_bound = false;
+ return;
}
}
- result->start = inputs[0].GetValue();
- result->end = inputs[1].GetValue();
- result->increment = inputs[2].GetValue();
+
+ result.start = FlatVector::GetValue(input.data[0], row_id);
+ result.end = FlatVector::GetValue(input.data[1], row_id);
+ result.increment = FlatVector::GetValue(input.data[2], row_id);
// Infinities either cause errors or infinite loops, so just ban them
- if (!Timestamp::IsFinite(result->start) || !Timestamp::IsFinite(result->end)) {
+ if (!Timestamp::IsFinite(result.start) || !Timestamp::IsFinite(result.end)) {
throw BinderException("RANGE with infinite bounds is not supported");
}
- if (result->increment.months == 0 && result->increment.days == 0 && result->increment.micros == 0) {
+ if (result.increment.months == 0 && result.increment.days == 0 && result.increment.micros == 0) {
throw BinderException("interval cannot be 0!");
}
// all elements should point in the same direction
- if (result->increment.months > 0 || result->increment.days > 0 || result->increment.micros > 0) {
- if (result->increment.months < 0 || result->increment.days < 0 || result->increment.micros < 0) {
+ if (result.increment.months > 0 || result.increment.days > 0 || result.increment.micros > 0) {
+ if (result.increment.months < 0 || result.increment.days < 0 || result.increment.micros < 0) {
throw BinderException("RANGE with composite interval that has mixed signs is not supported");
}
- result->greater_than_check = true;
- if (result->start > result->end) {
+ result.greater_than_check = true;
+ if (result.start > result.end) {
throw BinderException(
"start is bigger than end, but increment is positive: cannot generate infinite series");
}
} else {
- result->greater_than_check = false;
- if (result->start < result->end) {
+ result.greater_than_check = false;
+ if (result.start < result.end) {
throw BinderException(
"start is smaller than end, but increment is negative: cannot generate infinite series");
}
}
- return_types.push_back(inputs[0].type());
+ result.inclusive_bound = GENERATE_SERIES;
+ }
+
+ template
+ static unique_ptr Bind(ClientContext &context, TableFunctionBindInput &input,
+ vector &return_types, vector &names) {
+ auto result = make_uniq(context);
+
+ return_types.push_back(LogicalType::TIMESTAMP_TZ);
if (GENERATE_SERIES) {
- // generate_series has inclusive bounds on the RHS
- result->inclusive_bound = true;
names.emplace_back("generate_series");
} else {
- result->inclusive_bound = false;
names.emplace_back("range");
}
return std::move(result);
}
- struct State : public GlobalTableFunctionState {
- explicit State(timestamp_t start_p) : current_state(start_p) {
- }
-
- timestamp_t current_state;
- bool finished = false;
- };
-
- static unique_ptr Init(ClientContext &context, TableFunctionInitInput &input) {
- auto &bind_data = input.bind_data->Cast();
- return make_uniq(bind_data.start);
+ static unique_ptr RangeDateTimeLocalInit(ExecutionContext &context,
+ TableFunctionInitInput &input,
+ GlobalTableFunctionState *global_state) {
+ return make_uniq();
}
- static void ICUTableRangeFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
- auto &bind_data = data_p.bind_data->Cast();
+ template
+ static OperatorResultType ICUTableRangeFunction(ExecutionContext &context, TableFunctionInput &data_p,
+ DataChunk &input, DataChunk &output) {
+ auto &bind_data = data_p.bind_data->Cast();
+ auto &state = data_p.local_state->Cast();
CalendarPtr calendar_ptr(bind_data.calendar->clone());
auto calendar = calendar_ptr.get();
- auto &state = data_p.global_state->Cast();
- if (state.finished) {
- return;
- }
-
- idx_t size = 0;
- auto data = FlatVector::GetData(output.data[0]);
while (true) {
- data[size++] = state.current_state;
- state.current_state = ICUDateFunc::Add(calendar, state.current_state, bind_data.increment);
- if (bind_data.Finished(state.current_state)) {
- state.finished = true;
- break;
+ if (!state.initialized_row) {
+ // initialize for the current input row
+ if (state.current_input_row >= input.size()) {
+ // ran out of rows
+ state.current_input_row = 0;
+ state.initialized_row = false;
+ return OperatorResultType::NEED_MORE_INPUT;
+ }
+ GenerateRangeDateTimeParameters(input, state.current_input_row, state);
+ state.initialized_row = true;
+ state.current_state = state.start;
+ }
+ idx_t size = 0;
+ auto data = FlatVector::GetData(output.data[0]);
+ while (true) {
+ if (state.Finished(state.current_state)) {
+ break;
+ }
+ data[size++] = state.current_state;
+ state.current_state = ICUDateFunc::Add(calendar, state.current_state, state.increment);
+ if (size >= STANDARD_VECTOR_SIZE) {
+ break;
+ }
}
- if (size >= STANDARD_VECTOR_SIZE) {
- break;
+ if (size == 0) {
+ // move to next row
+ state.current_input_row++;
+ state.initialized_row = false;
+ continue;
}
+ output.SetCardinality(size);
+ return OperatorResultType::HAVE_MORE_OUTPUT;
}
- output.SetCardinality(size);
}
static void AddICUTableRangeFunction(DatabaseInstance &db) {
TableFunctionSet range("range");
- range.AddFunction(TableFunction({LogicalType::TIMESTAMP_TZ, LogicalType::TIMESTAMP_TZ, LogicalType::INTERVAL},
- ICUTableRangeFunction, Bind, Init));
+ TableFunction range_function({LogicalType::TIMESTAMP_TZ, LogicalType::TIMESTAMP_TZ, LogicalType::INTERVAL},
+ nullptr, Bind, nullptr, RangeDateTimeLocalInit);
+ range_function.in_out_function = ICUTableRangeFunction;
+ range.AddFunction(range_function);
ExtensionUtil::AddFunctionOverload(db, range);
// generate_series: similar to range, but inclusive instead of exclusive bounds on the RHS
TableFunctionSet generate_series("generate_series");
- generate_series.AddFunction(
- TableFunction({LogicalType::TIMESTAMP_TZ, LogicalType::TIMESTAMP_TZ, LogicalType::INTERVAL},
- ICUTableRangeFunction, Bind, Init));
+ TableFunction generate_series_function(
+ {LogicalType::TIMESTAMP_TZ, LogicalType::TIMESTAMP_TZ, LogicalType::INTERVAL}, nullptr, Bind, nullptr,
+ RangeDateTimeLocalInit);
+ generate_series_function.in_out_function = ICUTableRangeFunction;
+ generate_series.AddFunction(generate_series_function);
ExtensionUtil::AddFunctionOverload(db, generate_series);
}
};
diff --git a/extension/jemalloc/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h b/extension/jemalloc/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h
index 46bff2794c5..e20bff79423 100644
--- a/extension/jemalloc/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h
+++ b/extension/jemalloc/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h
@@ -2,7 +2,7 @@
#ifndef JEMALLOC_INTERNAL_DEFS_H_
#define JEMALLOC_INTERNAL_DEFS_H_
-#if defined(__GNUC__) && !defined(__clang__)
+#if defined(__GNUC__)
#define _GNU_SOURCE
#endif
@@ -453,12 +453,12 @@
/* #undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP */
/* GNU specific sched_getcpu support */
-#if defined(__GNUC__) && !defined(__clang__)
+#if defined(__GNUC__)
#define JEMALLOC_HAVE_SCHED_GETCPU
#endif
/* GNU specific sched_setaffinity support */
-#if defined(__GNUC__) && !defined(__clang__)
+#if defined(__GNUC__)
#define JEMALLOC_HAVE_SCHED_SETAFFINITY
#endif
diff --git a/extension/parquet/parquet_reader.cpp b/extension/parquet/parquet_reader.cpp
index 75017ea3584..1c2e88ef86e 100644
--- a/extension/parquet/parquet_reader.cpp
+++ b/extension/parquet/parquet_reader.cpp
@@ -242,7 +242,7 @@ LogicalType ParquetReader::DeriveLogicalType(const SchemaElement &s_ele, bool bi
case ConvertedType::INTERVAL:
return LogicalType::INTERVAL;
case ConvertedType::JSON:
- return LogicalType::VARCHAR;
+ return LogicalType::JSON();
case ConvertedType::NULL_TYPE:
return LogicalTypeId::SQLNULL;
case ConvertedType::MAP:
diff --git a/extension/parquet/parquet_writer.cpp b/extension/parquet/parquet_writer.cpp
index 10c3386136e..172bbc8f8bf 100644
--- a/extension/parquet/parquet_writer.cpp
+++ b/extension/parquet/parquet_writer.cpp
@@ -8,6 +8,8 @@
#ifndef DUCKDB_AMALGAMATION
#include "duckdb/common/file_system.hpp"
#include "duckdb/common/serializer/buffered_file_writer.hpp"
+#include "duckdb/common/serializer/deserializer.hpp"
+#include "duckdb/common/serializer/serializer.hpp"
#include "duckdb/common/serializer/write_stream.hpp"
#include "duckdb/common/string_util.hpp"
#include "duckdb/function/table_function.hpp"
@@ -15,8 +17,6 @@
#include "duckdb/main/connection.hpp"
#include "duckdb/parser/parsed_data/create_copy_function_info.hpp"
#include "duckdb/parser/parsed_data/create_table_function_info.hpp"
-#include "duckdb/common/serializer/serializer.hpp"
-#include "duckdb/common/serializer/deserializer.hpp"
#endif
namespace duckdb {
@@ -216,6 +216,11 @@ CopyTypeSupport ParquetWriter::TypeIsSupported(const LogicalType &type) {
void ParquetWriter::SetSchemaProperties(const LogicalType &duckdb_type,
duckdb_parquet::format::SchemaElement &schema_ele) {
+ if (duckdb_type.IsJSONType()) {
+ schema_ele.converted_type = ConvertedType::JSON;
+ schema_ele.__isset.converted_type = true;
+ return;
+ }
switch (duckdb_type.id()) {
case LogicalTypeId::TINYINT:
schema_ele.converted_type = ConvertedType::INT_8;
@@ -578,7 +583,7 @@ void ParquetWriter::Finalize() {
}
// flush to disk
- writer->Sync();
+ writer->Close();
writer.reset();
}
diff --git a/extension/sqlsmith/CMakeLists.txt b/extension/sqlsmith/CMakeLists.txt
deleted file mode 100644
index 309e2e60d5d..00000000000
--- a/extension/sqlsmith/CMakeLists.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-cmake_minimum_required(VERSION 2.8.12...3.29)
-
-project(SQLSmithExtension)
-
-include_directories(include)
-include_directories(third_party/sqlsmith/include)
-add_subdirectory(third_party)
-
-set(SQLSMITH_SOURCES
- sqlsmith_extension.cpp statement_generator.cpp statement_simplifier.cpp
- fuzzyduck.cpp ${SQLSMITH_OBJECT_FILES})
-
-build_static_extension(sqlsmith ${SQLSMITH_SOURCES})
-set(PARAMETERS "-warnings")
-build_loadable_extension(sqlsmith ${PARAMETERS} ${SQLSMITH_SOURCES})
-
-install(
- TARGETS sqlsmith_extension
- EXPORT "${DUCKDB_EXPORT_SET}"
- LIBRARY DESTINATION "${INSTALL_LIB_DIR}"
- ARCHIVE DESTINATION "${INSTALL_LIB_DIR}")
diff --git a/extension/sqlsmith/fuzzyduck.cpp b/extension/sqlsmith/fuzzyduck.cpp
deleted file mode 100644
index 7d4f0b932e6..00000000000
--- a/extension/sqlsmith/fuzzyduck.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-#include "fuzzyduck.hpp"
-#include "duckdb/common/random_engine.hpp"
-#include "statement_generator.hpp"
-#include
-#include
-#include
-
-namespace duckdb {
-
-FuzzyDuck::FuzzyDuck(ClientContext &context) : context(context) {
-}
-
-FuzzyDuck::~FuzzyDuck() {
-}
-
-void FuzzyDuck::BeginFuzzing() {
- auto &random_engine = RandomEngine::Get(context);
- if (seed == 0) {
- seed = random_engine.NextRandomInteger();
- }
- if (max_queries == 0) {
- throw BinderException("Provide a max_queries argument greater than 0");
- }
- if (!complete_log.empty()) {
- auto &fs = FileSystem::GetFileSystem(context);
- TryRemoveFile(complete_log);
- complete_log_handle =
- fs.OpenFile(complete_log, FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE_NEW);
- }
-}
-
-void FuzzyDuck::EndFuzzing() {
- if (complete_log_handle) {
- complete_log_handle->Close();
- }
-}
-
-void FuzzyDuck::Fuzz() {
- BeginFuzzing();
- for (idx_t i = 0; i < max_queries; i++) {
- LogMessage("Query " + to_string(i) + "\n");
- auto query = GenerateQuery();
- RunQuery(std::move(query));
- }
- EndFuzzing();
-}
-
-void FuzzyDuck::FuzzAllFunctions() {
- StatementGenerator generator(context);
- auto queries = generator.GenerateAllFunctionCalls();
-
- if (max_queries == 0) {
- max_queries = queries.size();
- }
-
- std::default_random_engine e(seed);
- std::shuffle(std::begin(queries), std::end(queries), e);
- BeginFuzzing();
- for (auto &query : queries) {
- RunQuery(std::move(query));
- }
- EndFuzzing();
-}
-
-string FuzzyDuck::GenerateQuery() {
- LogTask("Generating query with seed " + to_string(seed));
- auto &engine = RandomEngine::Get(context);
- // set the seed
- engine.SetSeed(seed);
- // get the next seed
- seed = engine.NextRandomInteger();
-
- // generate the statement
- StatementGenerator generator(context);
- auto statement = generator.GenerateStatement();
- return statement->ToString();
-}
-
-void sleep_thread(Connection *con, atomic *is_active, atomic *timed_out, idx_t timeout_duration) {
- // timeout is given in seconds
- // we wait 10ms per iteration, so timeout * 100 gives us the amount of
- // iterations
- for (size_t i = 0; i < (size_t)(timeout_duration * 100) && *is_active; i++) {
- std::this_thread::sleep_for(std::chrono::milliseconds(10));
- }
- if (*is_active) {
- *timed_out = true;
- con->Interrupt();
- }
-}
-
-void FuzzyDuck::RunQuery(string query) {
- LogQuery(query + ";");
-
- Connection con(*context.db);
- atomic is_active(true);
- atomic timed_out(false);
- std::thread interrupt_thread(sleep_thread, &con, &is_active, &timed_out, timeout);
-
- auto result = con.Query(query);
- is_active = false;
- interrupt_thread.join();
- if (timed_out) {
- LogMessage("TIMEOUT");
- } else if (result->HasError()) {
- LogMessage("EXECUTION ERROR: " + result->GetError());
- } else {
- LogMessage("EXECUTION SUCCESS!");
- }
-}
-
-void FuzzyDuck::TryRemoveFile(const string &path) {
- auto &fs = FileSystem::GetFileSystem(context);
- if (fs.FileExists(path)) {
- fs.RemoveFile(path);
- }
-}
-
-void FuzzyDuck::LogMessage(const string &message) {
- if (!verbose_output) {
- return;
- }
- Printer::Print(message);
-}
-
-void FuzzyDuck::LogTask(const string &message) {
- if (verbose_output) {
- LogMessage(message + "\n");
- }
- LogToCurrent(message);
-}
-
-void FuzzyDuck::LogQuery(const string &message) {
- if (verbose_output) {
- LogMessage(message + "\n");
- }
- LogToCurrent(message);
- LogToComplete(message);
-}
-
-void FuzzyDuck::LogToCurrent(const string &message) {
- if (log.empty()) {
- return;
- }
- auto &fs = FileSystem::GetFileSystem(context);
- TryRemoveFile(log);
- auto file = fs.OpenFile(log, FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE_NEW);
- file->Write((void *)message.c_str(), message.size());
- file->Sync();
- file->Close();
-}
-void FuzzyDuck::LogToComplete(const string &message) {
- if (!complete_log_handle) {
- return;
- }
- complete_log_handle->Write((void *)message.c_str(), message.size());
- complete_log_handle->Write((void *)"\n", 1);
- complete_log_handle->Sync();
-}
-
-} // namespace duckdb
diff --git a/extension/sqlsmith/include/fuzzyduck.hpp b/extension/sqlsmith/include/fuzzyduck.hpp
deleted file mode 100644
index 4393e87bc35..00000000000
--- a/extension/sqlsmith/include/fuzzyduck.hpp
+++ /dev/null
@@ -1,54 +0,0 @@
-//===----------------------------------------------------------------------===//
-// DuckDB
-//
-// fuzzyduck.hpp
-//
-//
-//===----------------------------------------------------------------------===//
-
-#pragma once
-
-#include "duckdb.hpp"
-#include "duckdb/parser/query_node.hpp"
-
-namespace duckdb {
-struct FileHandle;
-
-class FuzzyDuck {
-public:
- FuzzyDuck(ClientContext &context);
- ~FuzzyDuck();
-
- ClientContext &context;
- uint32_t seed = 0;
- idx_t max_queries = 0;
- string complete_log;
- string log;
- bool verbose_output = false;
- idx_t timeout = 30;
-
-public:
- void Fuzz();
- void FuzzAllFunctions();
-
-private:
- void BeginFuzzing();
- void EndFuzzing();
-
- string GenerateQuery();
- void RunQuery(string query);
-
- void LogMessage(const string &message);
- void LogTask(const string &message);
- void LogQuery(const string &message);
-
- void LogToCurrent(const string &message);
- void LogToComplete(const string &message);
-
- void TryRemoveFile(const string &path);
-
-private:
- unique_ptr complete_log_handle;
-};
-
-} // namespace duckdb
diff --git a/extension/sqlsmith/include/sqlsmith_extension.hpp b/extension/sqlsmith/include/sqlsmith_extension.hpp
deleted file mode 100644
index 4cd16f77f26..00000000000
--- a/extension/sqlsmith/include/sqlsmith_extension.hpp
+++ /dev/null
@@ -1,22 +0,0 @@
-//===----------------------------------------------------------------------===//
-// DuckDB
-//
-// sqlsmith_extension.hpp
-//
-//
-//===----------------------------------------------------------------------===//
-
-#pragma once
-
-#include "duckdb.hpp"
-
-namespace duckdb {
-
-class SqlsmithExtension : public Extension {
-public:
- void Load(DuckDB &db) override;
- std::string Name() override;
- std::string Version() const override;
-};
-
-} // namespace duckdb
diff --git a/extension/sqlsmith/include/statement_generator.hpp b/extension/sqlsmith/include/statement_generator.hpp
deleted file mode 100644
index bc96b609727..00000000000
--- a/extension/sqlsmith/include/statement_generator.hpp
+++ /dev/null
@@ -1,148 +0,0 @@
-//===----------------------------------------------------------------------===//
-// DuckDB
-//
-// statement_generator.hpp
-//
-//
-//===----------------------------------------------------------------------===//
-
-#pragma once
-
-#include "duckdb.hpp"
-#include "duckdb/parser/query_node.hpp"
-
-namespace duckdb {
-class SQLStatement;
-class SelectStatement;
-class InsertStatement;
-class UpdateStatement;
-class DeleteStatement;
-class TableRef;
-class SelectNode;
-class SetOperationNode;
-class QueryNode;
-class ParsedExpression;
-class ResultModifier;
-class OrderModifier;
-class UpdateSetInfo;
-
-struct GeneratorContext;
-
-class StatementGenerator {
-public:
- constexpr static idx_t MAX_DEPTH = 10;
- constexpr static idx_t MAX_EXPRESSION_DEPTH = 50;
-
- friend class ExpressionDepthChecker;
- friend class AggregateChecker;
- friend class WindowChecker;
-
-public:
- StatementGenerator(ClientContext &context);
- StatementGenerator(StatementGenerator &parent);
- ~StatementGenerator();
-
-public:
- unique_ptr GenerateStatement();
-
- vector GenerateAllFunctionCalls();
-
-private:
- unique_ptr GenerateStatement(StatementType type);
-
- unique_ptr GenerateSelect();
- unique_ptr GenerateCreate();
- unique_ptr GenerateQueryNode();
-
- unique_ptr GenerateCreateInfo();
-
- void GenerateCTEs(QueryNode &node);
- unique_ptr GenerateTableRef();
- unique_ptr GenerateExpression();
-
- unique_ptr GenerateBaseTableRef();
- unique_ptr GenerateExpressionListRef();
- unique_ptr GenerateJoinRef();
- unique_ptr GenerateSubqueryRef();
- unique_ptr GenerateTableFunctionRef();
- unique_ptr GeneratePivotRef();
-
- unique_ptr GenerateConstant();
- unique_ptr GenerateColumnRef();
- unique_ptr GenerateFunction();
- unique_ptr GenerateOperator();
- unique_ptr GenerateWindowFunction(optional_ptr function = nullptr);
- unique_ptr GenerateConjunction();
- unique_ptr GenerateStar();
- unique_ptr GenerateLambda();
- unique_ptr GenerateSubquery();
- unique_ptr GenerateCast();
- unique_ptr GenerateBetween();
- unique_ptr GenerateComparison();
- unique_ptr GeneratePositionalReference();
- unique_ptr GenerateCase();
-
- unique_ptr GenerateOrderBy();
-
- LogicalType GenerateLogicalType();
-
- void GenerateAllScalar(ScalarFunctionCatalogEntry &scalar_function, vector &result);
- void GenerateAllAggregate(AggregateFunctionCatalogEntry &aggregate_function, vector &result);
- string GenerateTestAllTypes(BaseScalarFunction &base_function);
- string GenerateTestVectorTypes(BaseScalarFunction &base_function);
- string GenerateCast(const LogicalType &target, const string &source_name, bool add_varchar);
- bool FunctionArgumentsAlwaysNull(const string &name);
-
- idx_t RandomValue(idx_t max);
- bool RandomBoolean();
- //! Returns true with a percentage change (0-100)
- bool RandomPercentage(idx_t percentage);
- string RandomString(idx_t length);
- unique_ptr RandomExpression(idx_t percentage);
-
- //! Generate identifier for a column or parent using "t" or "c" prefixes. ie. t0, or c0
- string GenerateIdentifier();
- string GenerateTableIdentifier();
- string GenerateSchemaIdentifier();
- string GenerateViewIdentifier();
-
- //! using the parent generate a relation name. ie. t0
- string GenerateRelationName();
- //! using the parent, generate a valid column name. ie. c0
- string GenerateColumnName();
- idx_t GetIndex();
-
- Value GenerateConstantValue();
-
- ExpressionType GenerateComparisonType();
-
- //! used to create columns when creating new tables;
-
-private:
- ClientContext &context;
- optional_ptr parent;
- unique_ptr current_statement;
- vector current_relation_names;
- vector current_column_names;
-
- std::shared_ptr generator_context;
- idx_t index = 0;
- idx_t depth = 0;
- idx_t expression_depth = 0;
-
- bool in_window = false;
- bool in_aggregate = false;
-
- std::shared_ptr GetDatabaseState(ClientContext &context);
- vector> GenerateChildren(idx_t min, idx_t max);
-
- template
- const T &Choose(const vector &entries) {
- if (entries.empty()) {
- throw InternalException("Attempting to choose from an empty vector");
- }
- return entries[RandomValue(entries.size())];
- }
-};
-
-} // namespace duckdb
diff --git a/extension/sqlsmith/include/statement_simplifier.hpp b/extension/sqlsmith/include/statement_simplifier.hpp
deleted file mode 100644
index 0f238437fe8..00000000000
--- a/extension/sqlsmith/include/statement_simplifier.hpp
+++ /dev/null
@@ -1,89 +0,0 @@
-//===----------------------------------------------------------------------===//
-// DuckDB
-//
-// statement_simplifier.hpp
-//
-//
-//===----------------------------------------------------------------------===//
-
-#pragma once
-
-#include "duckdb.hpp"
-#include "duckdb/parser/query_node.hpp"
-
-namespace duckdb {
-class SQLStatement;
-class SelectStatement;
-class InsertStatement;
-class UpdateStatement;
-class DeleteStatement;
-class TableRef;
-class SelectNode;
-class SetOperationNode;
-class QueryNode;
-class ParsedExpression;
-class ResultModifier;
-class OrderModifier;
-class UpdateSetInfo;
-class GroupByNode;
-
-class StatementSimplifier {
-public:
- StatementSimplifier(SQLStatement &statement_p, vector &result_p);
-
- SQLStatement &statement;
- vector &result;
-
-public:
- void Simplify(SQLStatement &stmt);
-
-private:
- void Simplify(SelectStatement &stmt);
- void Simplify(InsertStatement &stmt);
- void Simplify(UpdateStatement &stmt);
- void Simplify(DeleteStatement &stmt);
-
- void Simplification();
-
- template