Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into nl_join_reorder
Browse files Browse the repository at this point in the history
  • Loading branch information
korowa committed Mar 31, 2024
2 parents 2da33f4 + cd7a00b commit b04e2a7
Show file tree
Hide file tree
Showing 311 changed files with 18,362 additions and 11,007 deletions.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
.github/ export-ignore
datafusion/proto/src/generated/prost.rs linguist-generated
datafusion/proto/src/generated/pbjson.rs linguist-generated
4 changes: 2 additions & 2 deletions .github/actions/setup-windows-builder/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ runs:
- name: Setup Rust toolchain
shell: bash
run: |
rustup update stable
rustup toolchain install stable
# Avoid self update to avoid CI failures: https://github.com/apache/arrow-datafusion/issues/9653
rustup toolchain install stable --no-self-update
rustup default stable
rustup component add rustfmt
- name: Configure rust runtime env
Expand Down
9 changes: 7 additions & 2 deletions .github/workflows/pr_benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@ jobs:
cd benchmarks
mkdir data
# Setup the TPC-H data set with a scale factor of 10
# Setup the TPC-H data sets for scale factors 1 and 10
./bench.sh data tpch
./bench.sh data tpch10
- name: Generate unique result names
run: |
echo "HEAD_LONG_SHA=$(git log -1 --format='%H')" >> "$GITHUB_ENV"
Expand All @@ -44,6 +45,8 @@ jobs:
cd benchmarks
./bench.sh run tpch
./bench.sh run tpch_mem
./bench.sh run tpch10
# For some reason this step doesn't seem to propagate the env var down into the script
if [ -d "results/HEAD" ]; then
Expand All @@ -64,6 +67,8 @@ jobs:
cd benchmarks
./bench.sh run tpch
./bench.sh run tpch_mem
./bench.sh run tpch10
echo ${{ github.event.issue.number }} > pr
Expand Down
78 changes: 64 additions & 14 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,42 +65,73 @@ jobs:
# this key equals the ones on `linux-build-lib` for re-use
key: cargo-cache-benchmark-${{ hashFiles('datafusion/**/Cargo.toml', 'benchmarks/Cargo.toml', 'datafusion-cli/Cargo.toml') }}

- name: Check workspace without default features
- name: Check datafusion without default features
# Some of the test binaries require the parquet feature still
#run: cargo check --all-targets --no-default-features -p datafusion
run: cargo check --no-default-features -p datafusion

- name: Check datafusion-common without default features
run: cargo check --tests --no-default-features -p datafusion-common
run: cargo check --all-targets --no-default-features -p datafusion-common

- name: Check datafusion-functions
run: cargo check --all-targets --no-default-features -p datafusion-functions

- name: Check workspace in debug mode
run: cargo check

- name: Check workspace with all features
- name: Check workspace with avro,json features
run: cargo check --workspace --benches --features avro,json

- name: Check Cargo.lock for datafusion-cli
run: |
# If this test fails, try running `cargo update` in the `datafusion-cli` directory
# and check in the updated Cargo.lock file.
cargo check --manifest-path datafusion-cli/Cargo.toml --locked
# Ensure that the datafusion crate can be built with only a subset of the function
# packages enabled.
- name: Check function packages (array_expressions)
- name: Check datafusion (array_expressions)
run: cargo check --no-default-features --features=array_expressions -p datafusion

- name: Check function packages (datetime_expressions)
- name: Check datafusion (crypto)
run: cargo check --no-default-features --features=crypto_expressions -p datafusion

- name: Check datafusion (datetime_expressions)
run: cargo check --no-default-features --features=datetime_expressions -p datafusion

- name: Check function packages (encoding_expressions)
- name: Check datafusion (encoding_expressions)
run: cargo check --no-default-features --features=encoding_expressions -p datafusion

- name: Check function packages (math_expressions)
- name: Check datafusion (math_expressions)
run: cargo check --no-default-features --features=math_expressions -p datafusion

- name: Check function packages (regex_expressions)
- name: Check datafusion (regex_expressions)
run: cargo check --no-default-features --features=regex_expressions -p datafusion

- name: Check Cargo.lock for datafusion-cli
run: |
# If this test fails, try running `cargo update` in the `datafusion-cli` directory
# and check in the updated Cargo.lock file.
cargo check --manifest-path datafusion-cli/Cargo.toml --locked
- name: Check datafusion (string_expressions)
run: cargo check --no-default-features --features=string_expressions -p datafusion

# Ensure that the datafusion-functions crate can be built with only a subset of the function
# packages enabled.
- name: Check datafusion-functions (crypto)
run: cargo check --all-targets --no-default-features --features=crypto_expressions -p datafusion-functions

- name: Check datafusion-functions (datetime_expressions)
run: cargo check --all-targets --no-default-features --features=datetime_expressions -p datafusion-functions

- name: Check datafusion-functions (encoding_expressions)
run: cargo check --all-targets --no-default-features --features=encoding_expressions -p datafusion-functions

# test the crate
- name: Check datafusion-functions (math_expressions)
run: cargo check --all-targets --no-default-features --features=math_expressions -p datafusion-functions

- name: Check datafusion-functions (regex_expressions)
run: cargo check --all-targets --no-default-features --features=regex_expressions -p datafusion-functions

- name: Check datafusion-functions (string_expressions)
run: cargo check --all-targets --no-default-features --features=string_expressions -p datafusion-functions

# Run tests
linux-test:
name: cargo test (amd64)
needs: [ linux-build-lib ]
Expand Down Expand Up @@ -164,6 +195,25 @@ jobs:
- name: Verify Working Directory Clean
run: git diff --exit-code

depcheck:
name: circular dependency check
needs: [ linux-build-lib ]
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Check dependencies
run: |
cd dev/depcheck
cargo run
# Run `cargo test doc` (test documentation examples)
linux-test-doc:
name: cargo test doc (amd64)
Expand Down
54 changes: 27 additions & 27 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
# under the License.

[workspace]
exclude = ["datafusion-cli"]
exclude = ["datafusion-cli", "dev/depcheck"]
members = [
"datafusion/common",
"datafusion/common_runtime",
"datafusion/common-runtime",
"datafusion/core",
"datafusion/expr",
"datafusion/execution",
Expand Down Expand Up @@ -49,42 +49,42 @@ license = "Apache-2.0"
readme = "README.md"
repository = "https://github.com/apache/arrow-datafusion"
rust-version = "1.72"
version = "36.0.0"
version = "37.0.0"

[workspace.dependencies]
# We turn off default-features for some dependencies here so the workspaces which inherit them can
# selectively turn them on if needed, since we can override default-features = true (from false)
# for the inherited dependency but cannot do the reverse (override from true to false).
#
# See for more detaiils: https://github.com/rust-lang/cargo/issues/11329
arrow = { version = "50.0.0", features = ["prettyprint"] }
arrow-array = { version = "50.0.0", default-features = false, features = ["chrono-tz"] }
arrow-buffer = { version = "50.0.0", default-features = false }
arrow-flight = { version = "50.0.0", features = ["flight-sql-experimental"] }
arrow-ipc = { version = "50.0.0", default-features = false, features = ["lz4"] }
arrow-ord = { version = "50.0.0", default-features = false }
arrow-schema = { version = "50.0.0", default-features = false }
arrow-string = { version = "50.0.0", default-features = false }
arrow = { version = "51.0.0", features = ["prettyprint"] }
arrow-array = { version = "51.0.0", default-features = false, features = ["chrono-tz"] }
arrow-buffer = { version = "51.0.0", default-features = false }
arrow-flight = { version = "51.0.0", features = ["flight-sql-experimental"] }
arrow-ipc = { version = "51.0.0", default-features = false, features = ["lz4"] }
arrow-ord = { version = "51.0.0", default-features = false }
arrow-schema = { version = "51.0.0", default-features = false }
arrow-string = { version = "51.0.0", default-features = false }
async-trait = "0.1.73"
bigdecimal = "=0.4.1"
bytes = "1.4"
chrono = { version = "0.4.34", default-features = false }
ctor = "0.2.0"
dashmap = "5.4.0"
datafusion = { path = "datafusion/core", version = "36.0.0", default-features = false }
datafusion-common = { path = "datafusion/common", version = "36.0.0", default-features = false }
datafusion-common-runtime = { path = "datafusion/common_runtime", version = "36.0.0" }
datafusion-execution = { path = "datafusion/execution", version = "36.0.0" }
datafusion-expr = { path = "datafusion/expr", version = "36.0.0" }
datafusion-functions = { path = "datafusion/functions", version = "36.0.0" }
datafusion-functions-array = { path = "datafusion/functions-array", version = "36.0.0" }
datafusion-optimizer = { path = "datafusion/optimizer", version = "36.0.0", default-features = false }
datafusion-physical-expr = { path = "datafusion/physical-expr", version = "36.0.0", default-features = false }
datafusion-physical-plan = { path = "datafusion/physical-plan", version = "36.0.0" }
datafusion-proto = { path = "datafusion/proto", version = "36.0.0" }
datafusion-sql = { path = "datafusion/sql", version = "36.0.0" }
datafusion-sqllogictest = { path = "datafusion/sqllogictest", version = "36.0.0" }
datafusion-substrait = { path = "datafusion/substrait", version = "36.0.0" }
datafusion = { path = "datafusion/core", version = "37.0.0", default-features = false }
datafusion-common = { path = "datafusion/common", version = "37.0.0", default-features = false }
datafusion-common-runtime = { path = "datafusion/common-runtime", version = "37.0.0" }
datafusion-execution = { path = "datafusion/execution", version = "37.0.0" }
datafusion-expr = { path = "datafusion/expr", version = "37.0.0" }
datafusion-functions = { path = "datafusion/functions", version = "37.0.0" }
datafusion-functions-array = { path = "datafusion/functions-array", version = "37.0.0" }
datafusion-optimizer = { path = "datafusion/optimizer", version = "37.0.0", default-features = false }
datafusion-physical-expr = { path = "datafusion/physical-expr", version = "37.0.0", default-features = false }
datafusion-physical-plan = { path = "datafusion/physical-plan", version = "37.0.0" }
datafusion-proto = { path = "datafusion/proto", version = "37.0.0" }
datafusion-sql = { path = "datafusion/sql", version = "37.0.0" }
datafusion-sqllogictest = { path = "datafusion/sqllogictest", version = "37.0.0" }
datafusion-substrait = { path = "datafusion/substrait", version = "37.0.0" }
doc-comment = "0.3"
env_logger = "0.11"
futures = "0.3"
Expand All @@ -93,9 +93,9 @@ indexmap = "2.0.0"
itertools = "0.12"
log = "^0.4"
num_cpus = "1.13.0"
object_store = { version = "0.9.0", default-features = false }
object_store = { version = "0.9.1", default-features = false }
parking_lot = "0.12"
parquet = { version = "50.0.0", default-features = false, features = ["arrow", "async", "object_store"] }
parquet = { version = "51.0.0", default-features = false, features = ["arrow", "async", "object_store"] }
rand = "0.8"
rstest = "0.18.0"
serde_json = "1"
Expand Down
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,11 @@ Optional features:
[apache avro]: https://avro.apache.org/
[apache parquet]: https://parquet.apache.org/

## Rust Version Compatibility
## Rust Version Compatibility Policy

Datafusion crate is tested with the [minimum required stable Rust version](https://github.com/search?q=repo%3Aapache%2Farrow-datafusion+rust-version+language%3ATOML+path%3A%2F%5ECargo.toml%2F&type=code)
DataFusion's Minimum Required Stable Rust Version (MSRV) policy is to support
each stable Rust version for 6 months after it is
[released](https://github.com/rust-lang/rust/blob/master/RELEASES.md). This
generally translates to support for the most recent 3 to 4 stable Rust versions.

We enforce this policy using a [MSRV CI Check](https://github.com/search?q=repo%3Aapache%2Farrow-datafusion+rust-version+language%3ATOML+path%3A%2F%5ECargo.toml%2F&type=code)
4 changes: 2 additions & 2 deletions benchmarks/bench.sh
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ run_tpch() {
fi
TPCH_DIR="${DATA_DIR}/tpch_sf${SCALE_FACTOR}"

RESULTS_FILE="${RESULTS_DIR}/tpch.json"
RESULTS_FILE="${RESULTS_DIR}/tpch_sf${SCALE_FACTOR}.json"
echo "RESULTS_FILE: ${RESULTS_FILE}"
echo "Running tpch benchmark..."
$CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path "${TPCH_DIR}" --format parquet -o ${RESULTS_FILE}
Expand All @@ -329,7 +329,7 @@ run_tpch_mem() {
fi
TPCH_DIR="${DATA_DIR}/tpch_sf${SCALE_FACTOR}"

RESULTS_FILE="${RESULTS_DIR}/tpch_mem.json"
RESULTS_FILE="${RESULTS_DIR}/tpch_mem_sf${SCALE_FACTOR}.json"
echo "RESULTS_FILE: ${RESULTS_FILE}"
echo "Running tpch_mem benchmark..."
# -m means in memory
Expand Down
2 changes: 0 additions & 2 deletions conbench/.flake8

This file was deleted.

Loading

0 comments on commit b04e2a7

Please sign in to comment.