Skip to content

Commit

Permalink
Merge branch 'feature' into summarize-temporal
Browse files Browse the repository at this point in the history
  • Loading branch information
Richard Wesley committed Jun 10, 2024
2 parents d99e22e + 889dd13 commit 8bedb9e
Show file tree
Hide file tree
Showing 391 changed files with 47,685 additions and 44,742 deletions.
12 changes: 0 additions & 12 deletions .github/actions/build_extensions/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -116,17 +116,6 @@ runs:
ls -al
pwd
- name: Fix for MSVC issue (see e.g. https://github.com/TileDB-Inc/TileDB/pull/4759)
shell: bash
if: inputs.deploy_as == 'windows_amd64'
env:
OVERLAY_TRIPLET_SRC: ${{ github.workspace }}/vcpkg/triplets/community/x64-windows-static-md.cmake
OVERLAY_TRIPLET_DST: ${{ github.workspace }}/overlay_triplets/x64-windows-static-md.cmake
run: |
mkdir overlay_triplets
cp $OVERLAY_TRIPLET_SRC $OVERLAY_TRIPLET_DST
echo "set(VCPKG_PLATFORM_TOOLSET_VERSION "14.39")" >> $OVERLAY_TRIPLET_DST
- name: Set Openssl dir
if: inputs.openssl_path != ''
shell: bash
Expand Down Expand Up @@ -155,7 +144,6 @@ runs:
GEN: ${{ inputs.ninja == 1 && 'ninja' || '' }}
USE_MERGED_VCPKG_MANIFEST: 1
DUCKDB_PLATFORM: ${{ inputs.duckdb_arch }}
VCPKG_OVERLAY_TRIPLETS: "${{ github.workspace }}/overlay_triplets"
run: |
ls
mkdir -p ~/.ssh
Expand Down
1 change: 0 additions & 1 deletion .github/config/bundled_extensions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,4 @@ duckdb_extension_load(autocomplete)
#
## Extensions that are not linked, but we do want to test them as part of the release build
#
duckdb_extension_load(sqlsmith DONT_LINK)
duckdb_extension_load(tpcds DONT_LINK)
1 change: 0 additions & 1 deletion .github/config/in_tree_extensions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,5 @@ duckdb_extension_load(inet)
duckdb_extension_load(icu)
duckdb_extension_load(json)
duckdb_extension_load(parquet)
duckdb_extension_load(sqlsmith)
duckdb_extension_load(tpcds)
duckdb_extension_load(tpch)
6 changes: 6 additions & 0 deletions .github/config/out_of_tree_extensions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,18 @@ duckdb_extension_load(sqlite_scanner
GIT_TAG 50b7870be099186f195bc72bac5e9e11247ee2f9
)

duckdb_extension_load(sqlsmith
GIT_URL https://github.com/duckdb/duckdb_sqlsmith
GIT_TAG 721460ff1f31ce1dc1e4a9c4a55c0faf0b466dcb
)

################# SUBSTRAIT
if (NOT WIN32)
duckdb_extension_load(substrait
LOAD_TESTS DONT_LINK
GIT_URL https://github.com/duckdb/substrait
GIT_TAG 237931391ebc7e6aee7aa81052fa1411f6c4128e
APPLY_PATCHES
)
endif()

Expand Down
17 changes: 17 additions & 0 deletions .github/patches/extensions/substrait/pushdown_semi_anti.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
diff --git a/src/to_substrait.cpp b/src/to_substrait.cpp
index 90f7a67..f252aa7 100644
--- a/src/to_substrait.cpp
+++ b/src/to_substrait.cpp
@@ -864,7 +864,11 @@ substrait::Rel *DuckDBToSubstrait::TransformComparisonJoin(LogicalOperator &dop)
auto left_col_count = dop.children[0]->types.size();
if (dop.children[0]->type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN) {
auto child_join = (LogicalComparisonJoin *)dop.children[0].get();
- left_col_count = child_join->left_projection_map.size() + child_join->right_projection_map.size();
+ if (child_join->join_type != JoinType::SEMI && child_join->join_type != JoinType::ANTI) {
+ left_col_count = child_join->left_projection_map.size() + child_join->right_projection_map.size();
+ } else {
+ left_col_count = child_join->left_projection_map.size();
+ }
}
sjoin->set_allocated_expression(
CreateConjunction(djoin.conditions, [&](JoinCondition &in) { return TransformJoinCond(in, left_col_count); }));
1 change: 1 addition & 0 deletions .github/regression/micro_extended.csv
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ benchmark/micro/join/asof_join.benchmark
benchmark/micro/join/asof_join_small_probe.benchmark
benchmark/micro/join/blockwise_nl_join.benchmark
benchmark/micro/join/delim_join_no_blowup.benchmark
benchmark/micro/join/hashjoin_dups_rhs.benchmark
benchmark/micro/join/hashjoin_highcardinality.benchmark
benchmark/micro/join/hashjoin_lhsarithmetic.benchmark
benchmark/micro/join/iejoin_employees.benchmark
Expand Down
12 changes: 12 additions & 0 deletions .github/workflows/LinuxRelease.yml
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,10 @@ jobs:
ccache: 1
aarch64_cross_compile: 1

- name: Checkout (again)
shell: bash
run: git checkout ${{ inputs.git_ref }}

- name: Install unixODBC
shell: bash
run: | # we need an x86 odbc_config tool to run cmake. fun.
Expand Down Expand Up @@ -221,6 +225,10 @@ jobs:
openssl: 1
ccache: 1

- name: Checkout (again)
shell: bash
run: git checkout ${{ inputs.git_ref }}

- uses: ./.github/actions/build_extensions
with:
vcpkg_target_triplet: x64-linux
Expand Down Expand Up @@ -261,6 +269,10 @@ jobs:
aarch64_cross_compile: 1
ccache: 1

- name: Checkout (again)
shell: bash
run: git checkout ${{ inputs.git_ref }}

- uses: ./.github/actions/build_extensions
with:
vcpkg_target_triplet: arm64-linux
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/OnTag.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ jobs:
uses: ./.github/workflows/TwineUpload.yml
secrets: inherit
with:
override_git_describe: ${{ inputs.override_git_describe || github.event.release.tag_name }}
override_git_describe: ${{ inputs.override_git_describe || github.ref_name }}

staged_upload:
uses: ./.github/workflows/StagedUpload.yml
secrets: inherit
with:
override_git_describe: ${{ inputs.override_git_describe || github.event.release.tag_name }}
override_git_describe: ${{ inputs.override_git_describe || github.ref_name }}
5 changes: 5 additions & 0 deletions .github/workflows/Windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ jobs:
- uses: actions/checkout@v3
with:
fetch-depth: 0
ref: ${{ inputs.git_ref }}

- uses: actions/setup-python@v5
with:
Expand Down Expand Up @@ -189,6 +190,7 @@ jobs:
- uses: actions/checkout@v3
with:
fetch-depth: 0
ref: ${{ inputs.git_ref }}

- uses: actions/setup-python@v5
with:
Expand Down Expand Up @@ -227,6 +229,8 @@ jobs:
needs: win-release-64
steps:
- uses: actions/checkout@v3
with:
ref: ${{ inputs.git_ref }}
- uses: msys2/setup-msys2@v2
with:
msystem: MINGW64
Expand Down Expand Up @@ -276,6 +280,7 @@ jobs:
- uses: actions/checkout@v3
with:
fetch-depth: 0
ref: ${{ inputs.git_ref }}

- uses: actions/setup-python@v5
with:
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/coverity.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ jobs:
BUILD_HTTPFS: 1
BUILD_JSON: 1
BUILD_INET: 1
BUILD_SQLSMITH: 1

- name: Upload the result
run: |
Expand Down
1 change: 1 addition & 0 deletions .sanitizer-thread-suppressions.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
deadlock:InitializeIndexes
race:NextInnerJoin
race:NextRightSemiOrAntiJoin
race:duckdb_moodycamel
race:duckdb_jemalloc
race:AddToEvictionQueue
Expand Down
39 changes: 35 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ if (OVERRIDE_GIT_DESCRIBE)
if (GIT_RESULT)
message(WARNING "git is available (at ${GIT_EXECUTABLE}) but has failed to execute 'log -1 --format=%h'. Consider providing explicit GIT_COMMIT_HASH")
set(GIT_DESCRIBE "${OVERRIDE_GIT_DESCRIBE}-0-g0123456789")
endif()
endif()
else()
set(GIT_DESCRIBE "${OVERRIDE_GIT_DESCRIBE}-0-g0123456789")
endif()
Expand All @@ -289,7 +289,7 @@ else()
if (GIT_RESULT)
message(WARNING "git is available (at ${GIT_EXECUTABLE}) but has failed to execute 'log -1 --format=%h'. Consider providing explicit GIT_COMMIT_HASH or OVERRIDE_GIT_DESCRIBE")
set(GIT_COMMIT_HASH "0123456789")
endif()
endif()
endif()
execute_process(
COMMAND ${GIT_EXECUTABLE} describe --tags --long
Expand Down Expand Up @@ -499,6 +499,37 @@ else()
endif()
endif()

function(is_number input_string return_var)
if("${input_string}" MATCHES "^[0-9]+$")
set(${return_var} TRUE PARENT_SCOPE)
else()
set(${return_var} FALSE PARENT_SCOPE)
endif()
endfunction()

set(STANDARD_VECTOR_SIZE "" CACHE STRING "Set a custom STANDARD_VECTOR_SIZE at compile time")
set(BLOCK_ALLOC_SIZE "" CACHE STRING "Set a custom BLOCK_ALLOC_SIZE at compile time")

if(DEFINED STANDARD_VECTOR_SIZE AND NOT STANDARD_VECTOR_SIZE STREQUAL "")
is_number(${STANDARD_VECTOR_SIZE} is_number_result)
if(is_number_result)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSTANDARD_VECTOR_SIZE=${STANDARD_VECTOR_SIZE}")
message(STATUS "STANDARD_VECTOR_SIZE is set to ${STANDARD_VECTOR_SIZE}")
else()
message(FATAL_ERROR "STANDARD_VECTOR_SIZE must be a number, not ${STANDARD_VECTOR_SIZE}")
endif()
endif()

if(DEFINED BLOCK_ALLOC_SIZE AND NOT BLOCK_ALLOC_SIZE STREQUAL "")
is_number(${BLOCK_ALLOC_SIZE} is_number_result)
if(is_number_result)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDUCKDB_BLOCK_ALLOC_SIZE=${BLOCK_ALLOC_SIZE}")
message(STATUS "BLOCK_ALLOC_SIZE is set to ${BLOCK_ALLOC_SIZE}")
else()
message(FATAL_ERROR "BLOCK_ALLOC_SIZE must be a number, not ${BLOCK_ALLOC_SIZE}")
endif()
endif()

if(CUSTOM_LINKER)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=${CUSTOM_LINKER}")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=${CUSTOM_LINKER}")
Expand Down Expand Up @@ -537,7 +568,7 @@ if(NOT MSVC)
endif()
else()
set(CMAKE_CXX_WINDOWS_FLAGS
"/wd4244 /wd4267 /wd4200 /wd26451 /wd26495 /D_CRT_SECURE_NO_WARNINGS /utf-8")
"/wd4244 /wd4267 /wd4200 /wd26451 /wd26495 /D_CRT_SECURE_NO_WARNINGS /utf-8 /D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR")
if(TREAT_WARNINGS_AS_ERRORS)
set(CMAKE_CXX_WINDOWS_FLAGS "${CMAKE_CXX_WINDOWS_FLAGS} /WX")
endif()
Expand Down Expand Up @@ -1260,7 +1291,7 @@ if(BUILD_PYTHON)
DUCKDB_BINARY_DIR=${PROJECT_BINARY_DIR}
DUCKDB_COMPILE_FLAGS=${ALL_COMPILE_FLAGS}
DUCKDB_LIBS="${duckdb_libs}"
)
)

if(PYTHON_EDITABLE_BUILD)
set(PIP_COMMAND ${PIP_COMMAND} python3 -m pip install --editable .)
Expand Down
11 changes: 8 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,6 @@ endif
ifeq (${STATIC_OPENSSL}, 1)
CMAKE_VARS:=${CMAKE_VARS} -DOPENSSL_USE_STATIC_LIBS=1
endif
ifeq (${BUILD_SQLSMITH}, 1)
BUILD_EXTENSIONS:=${BUILD_EXTENSIONS};sqlsmith
endif
ifeq (${BUILD_TPCE}, 1)
CMAKE_VARS:=${CMAKE_VARS} -DBUILD_TPCE=1
endif
Expand Down Expand Up @@ -238,6 +235,14 @@ ifdef DEBUG_STACKTRACE
CMAKE_VARS:=${CMAKE_VARS} -DDEBUG_STACKTRACE=1
endif

# Optional overrides
ifneq (${STANDARD_VECTOR_SIZE}, )
CMAKE_VARS:=${CMAKE_VARS} -DSTANDARD_VECTOR_SIZE=${STANDARD_VECTOR_SIZE}
endif
ifneq (${BLOCK_ALLOC_SIZE}, )
CMAKE_VARS:=${CMAKE_VARS} -DBLOCK_ALLOC_SIZE=${BLOCK_ALLOC_SIZE}
endif

# Enable VCPKG for this build
ifneq ("${VCPKG_TOOLCHAIN_PATH}", "")
CMAKE_VARS_BUILD:=${CMAKE_VARS_BUILD} -DCMAKE_TOOLCHAIN_FILE='${VCPKG_TOOLCHAIN_PATH}' -DVCPKG_BUILD=1
Expand Down
16 changes: 13 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,19 @@
</p>

## DuckDB
DuckDB is a high-performance analytical database system. It is designed to be fast, reliable, portable, and easy to use. DuckDB provides a rich SQL dialect, with support far beyond basic SQL. DuckDB supports arbitrary and nested correlated subqueries, window functions, collations, complex types (arrays, structs), and more. For more information on using DuckDB, please refer to the [DuckDB documentation](https://duckdb.org/docs/).

DuckDB is a high-performance analytical database system. It is designed to be fast, reliable, portable, and easy to use. DuckDB provides a rich SQL dialect, with support far beyond basic SQL. DuckDB supports arbitrary and nested correlated subqueries, window functions, collations, complex types (arrays, structs, maps), and [several extensions designed to make SQL easier to use](https://duckdb.org/docs/guides/sql_features/friendly_sql).

DuckDB is available as a [standalone CLI application](https://duckdb.org/docs/api/cli/overview) and has clients for [Python](https://duckdb.org/docs/api/python/overview), [R](https://duckdb.org/docs/api/r), [Java](https://duckdb.org/docs/api/java), [Wasm](https://duckdb.org/docs/api/wasm/overview), etc., with deep integrations with packages such as [pandas](https://duckdb.org/docs/guides/python/sql_on_pandas) and [dplyr](https://duckdblabs.github.io/duckplyr/).

For more information on using DuckDB, please refer to the [DuckDB documentation](https://duckdb.org/docs/).

## Installation
If you want to install and use DuckDB, please see [our website](https://www.duckdb.org) for installation and usage instructions.

If you want to install DuckDB, please see [our installation page](https://www.duckdb.org/docs/installation) for instructions.

## Data Import

For CSV files and Parquet files, data import is as simple as referencing the file in the FROM clause:

```sql
Expand All @@ -30,12 +37,15 @@ SELECT * FROM 'myfile.parquet';
Refer to our [Data Import](https://duckdb.org/docs/data/overview) section for more information.

## SQL Reference
The [website](https://duckdb.org/docs/sql/introduction) contains a reference of functions and SQL constructs available in DuckDB.

The documentation contains a [SQL introduction and reference](https://duckdb.org/docs/sql/introduction).

## Development

For development, DuckDB requires [CMake](https://cmake.org), Python3 and a `C++11` compliant compiler. Run `make` in the root directory to compile the sources. For development, use `make debug` to build a non-optimized debug version. You should run `make unit` and `make allunit` to verify that your version works properly after making changes. To test performance, you can run `BUILD_BENCHMARK=1 BUILD_TPCH=1 make` and then perform several standard benchmarks from the root directory by executing `./build/release/benchmark/benchmark_runner`. The details of benchmarks are in our [Benchmark Guide](benchmark/README.md).

Please also refer to our [Build Guide](https://duckdb.org/dev/building) and [Contribution Guide](CONTRIBUTING.md).

## Support

See the [Support Options](https://duckdblabs.com/support/) page.
13 changes: 10 additions & 3 deletions benchmark/benchmark_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,12 @@ atomic<bool> is_active;
atomic<bool> timeout;

void sleep_thread(Benchmark *benchmark, BenchmarkRunner *runner, BenchmarkState *state, bool hotrun,
int timeout_duration) {
if (timeout_duration < 0) {
const optional_idx &optional_timeout) {
if (!optional_timeout.IsValid()) {
return;
}
auto timeout_duration = optional_timeout.GetIndex();

// timeout is given in seconds
// we wait 10ms per iteration, so timeout * 100 gives us the amount of
// iterations
Expand Down Expand Up @@ -130,7 +132,8 @@ void BenchmarkRunner::RunBenchmark(Benchmark *benchmark) {
}
is_active = true;
timeout = false;
std::thread interrupt_thread(sleep_thread, benchmark, this, state.get(), hotrun, benchmark->Timeout());
std::thread interrupt_thread(sleep_thread, benchmark, this, state.get(), hotrun,
benchmark->Timeout(configuration));

profiler.Start();
benchmark->Run(state.get());
Expand Down Expand Up @@ -183,6 +186,8 @@ void print_help() {
fprintf(stderr, " --query Prints query of the benchmark\n");
fprintf(stderr, " --root-dir Sets the root directory for where to store temp data and "
"look for the 'benchmarks' directory\n");
fprintf(stderr, " --disable-timeout Disables killing the run after a certain amount of time has "
"passed (30 seconds by default)\n");
fprintf(stderr,
" [name_pattern] Run only the benchmark which names match the specified name pattern, "
"e.g., DS.* for TPC-DS benchmarks\n");
Expand Down Expand Up @@ -253,6 +258,8 @@ void parse_arguments(const int arg_counter, char const *const *arg_values) {
} else if (arg == "--query") {
// write group of benchmark
instance.configuration.meta = BenchmarkMetaType::QUERY;
} else if (arg == "--disable-timeout") {
instance.configuration.timeout_duration = optional_idx();
} else if (StringUtil::StartsWith(arg, "--out=") || StringUtil::StartsWith(arg, "--log=")) {
auto splits = StringUtil::Split(arg, '=');
if (splits.size() != 2) {
Expand Down
6 changes: 2 additions & 4 deletions benchmark/include/benchmark.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ struct BenchmarkState {
//! new benchmarks
class Benchmark {
constexpr static size_t DEFAULT_NRUNS = 5;
constexpr static size_t DEFAULT_TIMEOUT = 30;

Benchmark(Benchmark &) = delete;

public:
Expand Down Expand Up @@ -87,8 +85,8 @@ class Benchmark {
return DEFAULT_NRUNS;
}
//! The timeout for this benchmark (in seconds)
virtual size_t Timeout() {
return DEFAULT_TIMEOUT;
virtual optional_idx Timeout(const BenchmarkConfiguration &config) {
return config.timeout_duration;
}
};

Expand Down
Loading

0 comments on commit 8bedb9e

Please sign in to comment.