From 5d52b32a7d8a2a58c7de1a35a20e1c3e08b55ca3 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 12 May 2022 17:13:05 -0600 Subject: [PATCH] Prepare for datafusion 8.0.0 , ballista 0.7.0 release (#2490) * bump versions * changelog 7.0.0 to 8.0.0 * update versions in docs * revert changelog * re-generate changelogs * revert ballista changelog * update version missed by script and update script * regenerate ballista changelog with correct version --- ballista-cli/Cargo.toml | 8 +- ballista-examples/Cargo.toml | 2 +- ballista/CHANGELOG.md | 283 +++++++++++++++ ballista/rust/client/Cargo.toml | 10 +- ballista/rust/core/Cargo.toml | 6 +- ballista/rust/executor/Cargo.toml | 6 +- ballista/rust/scheduler/Cargo.toml | 8 +- benchmarks/docker-compose.yaml | 6 +- datafusion-cli/Cargo.toml | 4 +- datafusion/CHANGELOG.md | 328 +++++++++++++++++- datafusion/common/Cargo.toml | 2 +- datafusion/core/Cargo.toml | 14 +- datafusion/data-access/Cargo.toml | 2 +- datafusion/expr/Cargo.toml | 4 +- datafusion/jit/Cargo.toml | 6 +- datafusion/physical-expr/Cargo.toml | 8 +- datafusion/proto/Cargo.toml | 4 +- datafusion/row/Cargo.toml | 6 +- dev/release/README.md | 3 +- dev/release/update_change_log-all.sh | 33 -- dev/update_ballista_versions.py | 1 + dev/update_datafusion_versions.py | 1 + docs/source/cli/index.rst | 2 +- docs/source/user-guide/cli.md | 2 +- .../user-guide/distributed/clients/cli.rst | 2 +- .../distributed/deployment/docker-compose.md | 12 +- .../distributed/deployment/docker.md | 20 +- .../distributed/deployment/kubernetes.md | 14 +- docs/source/user-guide/example-usage.md | 2 +- docs/source/user-guide/library.md | 2 +- 30 files changed, 682 insertions(+), 119 deletions(-) delete mode 100755 dev/release/update_change_log-all.sh diff --git a/ballista-cli/Cargo.toml b/ballista-cli/Cargo.toml index d921f9b0e908..7537dc4b2b83 100644 --- a/ballista-cli/Cargo.toml +++ b/ballista-cli/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "ballista-cli" description = "Command Line Client for Ballista distributed query engine." -version = "0.6.0" +version = "0.7.0" authors = ["Apache Arrow "] edition = "2021" keywords = [ "ballista", "cli", ] @@ -30,10 +30,10 @@ readme = "README.md" [dependencies] arrow = { version = "13" } -ballista = { path = "../ballista/rust/client", version = "0.6.0" } +ballista = { path = "../ballista/rust/client", version = "0.7.0" } clap = { version = "3", features = ["derive", "cargo"] } -datafusion = { path = "../datafusion/core", version = "7.0.0" } -datafusion-cli = { path = "../datafusion-cli", version = "7.0.0" } +datafusion = { path = "../datafusion/core", version = "8.0.0" } +datafusion-cli = { path = "../datafusion-cli", version = "8.0.0" } dirs = "4.0.0" env_logger = "0.9" mimalloc = { version = "*", default-features = false } diff --git a/ballista-examples/Cargo.toml b/ballista-examples/Cargo.toml index afdd4862b459..f119401b062e 100644 --- a/ballista-examples/Cargo.toml +++ b/ballista-examples/Cargo.toml @@ -35,7 +35,7 @@ path = "examples/test_sql.rs" required-features = ["ballista/standalone"] [dependencies] -ballista = { path = "../ballista/rust/client", version = "0.6.0" } +ballista = { path = "../ballista/rust/client", version = "0.7.0" } datafusion = { path = "../datafusion/core" } futures = "0.3" num_cpus = "1.13.0" diff --git a/ballista/CHANGELOG.md b/ballista/CHANGELOG.md index b8268fc6537f..07ce062a4f6e 100644 --- a/ballista/CHANGELOG.md +++ b/ballista/CHANGELOG.md @@ -19,6 +19,289 @@ # Changelog +## [ballista-0.7.0](https://github.com/apache/arrow-datafusion/tree/ballista-0.7.0) (2022-05-12) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/7.1.0-rc1...ballista-0.7.0) + +**Breaking changes:** + +- Make `ExecutionPlan::execute` Sync [\#2434](https://github.com/apache/arrow-datafusion/pull/2434) ([tustvold](https://github.com/tustvold)) +- Add `Expr::Exists` to represent EXISTS subquery expression [\#2339](https://github.com/apache/arrow-datafusion/pull/2339) ([andygrove](https://github.com/andygrove)) +- Remove dependency from `LogicalPlan::TableScan` to `ExecutionPlan` [\#2284](https://github.com/apache/arrow-datafusion/pull/2284) ([andygrove](https://github.com/andygrove)) +- Move logical expression type-coercion code from `physical-expr` crate to `expr` crate [\#2257](https://github.com/apache/arrow-datafusion/pull/2257) ([andygrove](https://github.com/andygrove)) +- feat: 2061 create external table ddl table partition cols [\#2099](https://github.com/apache/arrow-datafusion/pull/2099) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jychen7](https://github.com/jychen7)) +- Reorganize the project folders [\#2081](https://github.com/apache/arrow-datafusion/pull/2081) ([yahoNanJing](https://github.com/yahoNanJing)) +- Support more ScalarFunction in Ballista [\#2008](https://github.com/apache/arrow-datafusion/pull/2008) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Merge dataframe and dataframe imp [\#1998](https://github.com/apache/arrow-datafusion/pull/1998) ([vchag](https://github.com/vchag)) +- Rename `ExecutionContext` to `SessionContext`, `ExecutionContextState` to `SessionState`, add `TaskContext` to support multi-tenancy configurations - Part 1 [\#1987](https://github.com/apache/arrow-datafusion/pull/1987) ([mingmwang](https://github.com/mingmwang)) +- Add Coalesce function [\#1969](https://github.com/apache/arrow-datafusion/pull/1969) ([msathis](https://github.com/msathis)) +- Add Create Schema functionality in SQL [\#1959](https://github.com/apache/arrow-datafusion/pull/1959) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([matthewmturner](https://github.com/matthewmturner)) +- remove sync constraint of SendableRecordBatchStream [\#1884](https://github.com/apache/arrow-datafusion/pull/1884) ([doki23](https://github.com/doki23)) + +**Implemented enhancements:** + +- Add `CREATE VIEW` [\#2279](https://github.com/apache/arrow-datafusion/pull/2279) ([matthewmturner](https://github.com/matthewmturner)) +- \[Ballista\] Support Union in ballista. [\#2098](https://github.com/apache/arrow-datafusion/pull/2098) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Add missing aggr\_expr to PhysicalExprNode for Ballista. [\#1989](https://github.com/apache/arrow-datafusion/pull/1989) ([Ted-Jiang](https://github.com/Ted-Jiang)) + +**Fixed bugs:** + +- Ballista integration tests no longer work [\#2440](https://github.com/apache/arrow-datafusion/issues/2440) +- Ballista crates cannot be released from DafaFusion 7.0.0 source release [\#1980](https://github.com/apache/arrow-datafusion/issues/1980) +- protobuf OctetLength should be deserialized as octet\_length, not length [\#1834](https://github.com/apache/arrow-datafusion/pull/1834) ([carols10cents](https://github.com/carols10cents)) + +**Documentation updates:** + +- MINOR: Make crate READMEs consistent [\#2437](https://github.com/apache/arrow-datafusion/pull/2437) ([andygrove](https://github.com/andygrove)) +- docs: Update the Ballista dev env instructions [\#2419](https://github.com/apache/arrow-datafusion/pull/2419) ([haoxins](https://github.com/haoxins)) +- Revise document of installing ballista pinned to specified version [\#2034](https://github.com/apache/arrow-datafusion/pull/2034) ([WinkerDu](https://github.com/WinkerDu)) +- Fix typos \(Datafusion -\> DataFusion\) [\#1993](https://github.com/apache/arrow-datafusion/pull/1993) ([andygrove](https://github.com/andygrove)) + +**Performance improvements:** + +- Introduce StageManager for managing tasks stage by stage [\#1983](https://github.com/apache/arrow-datafusion/pull/1983) ([yahoNanJing](https://github.com/yahoNanJing)) + +**Closed issues:** + +- Make expected result string in unit tests more readable [\#2412](https://github.com/apache/arrow-datafusion/issues/2412) +- remove duplicated `fn aggregate()` in aggregate expression tests [\#2399](https://github.com/apache/arrow-datafusion/issues/2399) +- split `distinct_expression.rs` into `count_distinct.rs` and `array_agg_distinct.rs` [\#2385](https://github.com/apache/arrow-datafusion/issues/2385) +- move sql tests in `context.rs` to corresponding test files in `datafustion/core/tests/sql` [\#2328](https://github.com/apache/arrow-datafusion/issues/2328) +- Date32/Date64 as join keys for merge join [\#2314](https://github.com/apache/arrow-datafusion/issues/2314) +- Error precision and scale for decimal coercion in logic comparison [\#2232](https://github.com/apache/arrow-datafusion/issues/2232) +- Support Multiple row layout [\#2188](https://github.com/apache/arrow-datafusion/issues/2188) +- Discussion: Is Ballista a standalone system or framework [\#1916](https://github.com/apache/arrow-datafusion/issues/1916) + +**Merged pull requests:** + +- MINOR: Enable multi-statement benchmark queries [\#2507](https://github.com/apache/arrow-datafusion/pull/2507) ([andygrove](https://github.com/andygrove)) +- Persist session configs in scheduler [\#2501](https://github.com/apache/arrow-datafusion/pull/2501) ([thinkharderdev](https://github.com/thinkharderdev)) +- Update to `sqlparser` `0.17.0` [\#2500](https://github.com/apache/arrow-datafusion/pull/2500) ([alamb](https://github.com/alamb)) +- Limit cpu cores used when generating changelog [\#2494](https://github.com/apache/arrow-datafusion/pull/2494) ([andygrove](https://github.com/andygrove)) +- MINOR: Parameterize changelog script [\#2484](https://github.com/apache/arrow-datafusion/pull/2484) ([jychen7](https://github.com/jychen7)) +- Fix stage key extraction [\#2472](https://github.com/apache/arrow-datafusion/pull/2472) ([thinkharderdev](https://github.com/thinkharderdev)) +- Add support for list\_dir\(\) on local fs [\#2467](https://github.com/apache/arrow-datafusion/pull/2467) ([wjones127](https://github.com/wjones127)) +- minor: update versions and paths in changelog scripts [\#2429](https://github.com/apache/arrow-datafusion/pull/2429) ([andygrove](https://github.com/andygrove)) +- Fix Ballista executing during plan [\#2428](https://github.com/apache/arrow-datafusion/pull/2428) ([tustvold](https://github.com/tustvold)) +- Re-organize and rename aggregates physical plan [\#2388](https://github.com/apache/arrow-datafusion/pull/2388) ([yjshen](https://github.com/yjshen)) +- Upgrade to arrow 13 [\#2382](https://github.com/apache/arrow-datafusion/pull/2382) ([alamb](https://github.com/alamb)) +- Grouped Aggregate in row format [\#2375](https://github.com/apache/arrow-datafusion/pull/2375) ([yjshen](https://github.com/yjshen)) +- Stop optimizing queries twice [\#2369](https://github.com/apache/arrow-datafusion/pull/2369) ([andygrove](https://github.com/andygrove)) +- Bump follow-redirects from 1.13.2 to 1.14.9 in /ballista/ui/scheduler [\#2325](https://github.com/apache/arrow-datafusion/pull/2325) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Move FileType enum from sql module to logical\_plan module [\#2290](https://github.com/apache/arrow-datafusion/pull/2290) ([andygrove](https://github.com/andygrove)) +- Add BatchPartitioner \(\#2285\) [\#2287](https://github.com/apache/arrow-datafusion/pull/2287) ([tustvold](https://github.com/tustvold)) +- Update uuid requirement from 0.8 to 1.0 [\#2280](https://github.com/apache/arrow-datafusion/pull/2280) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Bump async from 2.6.3 to 2.6.4 in /ballista/ui/scheduler [\#2277](https://github.com/apache/arrow-datafusion/pull/2277) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Bump minimist from 1.2.5 to 1.2.6 in /ballista/ui/scheduler [\#2276](https://github.com/apache/arrow-datafusion/pull/2276) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Bump url-parse from 1.5.1 to 1.5.10 in /ballista/ui/scheduler [\#2275](https://github.com/apache/arrow-datafusion/pull/2275) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Bump nanoid from 3.1.20 to 3.3.3 in /ballista/ui/scheduler [\#2274](https://github.com/apache/arrow-datafusion/pull/2274) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Update to Arrow 12.0.0, update tonic and prost [\#2253](https://github.com/apache/arrow-datafusion/pull/2253) ([alamb](https://github.com/alamb)) +- Add ExecutorMetricsCollector interface [\#2234](https://github.com/apache/arrow-datafusion/pull/2234) ([thinkharderdev](https://github.com/thinkharderdev)) +- minor: add editor config file [\#2224](https://github.com/apache/arrow-datafusion/pull/2224) ([jackwener](https://github.com/jackwener)) +- \[Ballista\] Enable ApproxPercentileWithWeight in Ballista and fill UT [\#2192](https://github.com/apache/arrow-datafusion/pull/2192) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- make nightly clippy happy [\#2186](https://github.com/apache/arrow-datafusion/pull/2186) ([xudong963](https://github.com/xudong963)) +- \[Ballista\]Make PhysicalAggregateExprNode has repeated PhysicalExprNode [\#2184](https://github.com/apache/arrow-datafusion/pull/2184) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Add LogicalPlan::SubqueryAlias [\#2172](https://github.com/apache/arrow-datafusion/pull/2172) ([andygrove](https://github.com/andygrove)) +- Implement fast path of with\_new\_children\(\) in ExecutionPlan [\#2168](https://github.com/apache/arrow-datafusion/pull/2168) ([mingmwang](https://github.com/mingmwang)) +- \[MINOR\] ignore suspicious slow test in Ballista [\#2167](https://github.com/apache/arrow-datafusion/pull/2167) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- enable explain for ballista [\#2163](https://github.com/apache/arrow-datafusion/pull/2163) ([doki23](https://github.com/doki23)) +- Add delimiter for create external table [\#2162](https://github.com/apache/arrow-datafusion/pull/2162) ([matthewmturner](https://github.com/matthewmturner)) +- Update sqlparser requirement from 0.15 to 0.16 [\#2152](https://github.com/apache/arrow-datafusion/pull/2152) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Add IF NOT EXISTS to `CREATE TABLE` and `CREATE EXTERNAL TABLE` [\#2143](https://github.com/apache/arrow-datafusion/pull/2143) ([matthewmturner](https://github.com/matthewmturner)) +- Update quarterly roadmap for Q2 [\#2133](https://github.com/apache/arrow-datafusion/pull/2133) ([matthewmturner](https://github.com/matthewmturner)) +- \[Ballista\] Add ballista plugin manager and UDF plugin [\#2131](https://github.com/apache/arrow-datafusion/pull/2131) ([gaojun2048](https://github.com/gaojun2048)) +- Serialize scalar UDFs in physical plan [\#2130](https://github.com/apache/arrow-datafusion/pull/2130) ([thinkharderdev](https://github.com/thinkharderdev)) +- doc: update release schedule [\#2110](https://github.com/apache/arrow-datafusion/pull/2110) ([jychen7](https://github.com/jychen7)) +- Reduce repetition in Decimal binary kernels, upgrade to arrow 11.1 [\#2107](https://github.com/apache/arrow-datafusion/pull/2107) ([alamb](https://github.com/alamb)) +- update zlib version to 1.2.12 [\#2106](https://github.com/apache/arrow-datafusion/pull/2106) ([waitingkuo](https://github.com/waitingkuo)) +- Add CREATE DATABASE command to SQL [\#2094](https://github.com/apache/arrow-datafusion/pull/2094) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([matthewmturner](https://github.com/matthewmturner)) +- Refactor SessionContext, BallistaContext to support multi-tenancy configurations - Part 3 [\#2091](https://github.com/apache/arrow-datafusion/pull/2091) ([mingmwang](https://github.com/mingmwang)) +- Remove dependency of common for the storage crate [\#2076](https://github.com/apache/arrow-datafusion/pull/2076) ([yahoNanJing](https://github.com/yahoNanJing)) +- [MINOR] fix doc in `EXTRACT\(field FROM source\) [\#2074](https://github.com/apache/arrow-datafusion/pull/2074) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- \[Bug\]\[Datafusion\] fix TaskContext session\_config bug [\#2070](https://github.com/apache/arrow-datafusion/pull/2070) ([gaojun2048](https://github.com/gaojun2048)) +- Short-circuit evaluation for `CaseWhen` [\#2068](https://github.com/apache/arrow-datafusion/pull/2068) ([yjshen](https://github.com/yjshen)) +- split datafusion-object-store module [\#2065](https://github.com/apache/arrow-datafusion/pull/2065) ([yahoNanJing](https://github.com/yahoNanJing)) +- Change log level for noisy logs [\#2060](https://github.com/apache/arrow-datafusion/pull/2060) ([thinkharderdev](https://github.com/thinkharderdev)) +- Update to arrow/parquet 11.0 [\#2048](https://github.com/apache/arrow-datafusion/pull/2048) ([alamb](https://github.com/alamb)) +- minor: format comments \(`//` to `// `\) [\#2047](https://github.com/apache/arrow-datafusion/pull/2047) ([jackwener](https://github.com/jackwener)) +- use cargo-tomlfmt to check Cargo.toml formatting in CI [\#2033](https://github.com/apache/arrow-datafusion/pull/2033) ([WinkerDu](https://github.com/WinkerDu)) +- Refactor SessionContext, SessionState and SessionConfig to support multi-tenancy configurations - Part 2 [\#2029](https://github.com/apache/arrow-datafusion/pull/2029) ([mingmwang](https://github.com/mingmwang)) +- Simplify prerequisites for running examples [\#2028](https://github.com/apache/arrow-datafusion/pull/2028) ([doki23](https://github.com/doki23)) +- Use SessionContext to parse Expr protobuf [\#2024](https://github.com/apache/arrow-datafusion/pull/2024) ([thinkharderdev](https://github.com/thinkharderdev)) +- Fix stuck issue for the load testing of Push-based task scheduling [\#2006](https://github.com/apache/arrow-datafusion/pull/2006) ([yahoNanJing](https://github.com/yahoNanJing)) +- Fixing a typo in documentation [\#1997](https://github.com/apache/arrow-datafusion/pull/1997) ([psvri](https://github.com/psvri)) +- Fix minor clippy issue [\#1995](https://github.com/apache/arrow-datafusion/pull/1995) ([alamb](https://github.com/alamb)) +- Make it possible to only scan part of a parquet file in a partition [\#1990](https://github.com/apache/arrow-datafusion/pull/1990) ([yjshen](https://github.com/yjshen)) +- Update Dockerfile to fix integration tests [\#1982](https://github.com/apache/arrow-datafusion/pull/1982) ([andygrove](https://github.com/andygrove)) +- Update sqlparser requirement from 0.14 to 0.15 [\#1966](https://github.com/apache/arrow-datafusion/pull/1966) ([dependabot[bot]](https://github.com/apps/dependabot)) +- fix logical conflict with protobuf [\#1958](https://github.com/apache/arrow-datafusion/pull/1958) ([alamb](https://github.com/alamb)) +- Update to arrow 10.0.0, pyo3 0.16 [\#1957](https://github.com/apache/arrow-datafusion/pull/1957) ([alamb](https://github.com/alamb)) +- update jit-related dependencies [\#1953](https://github.com/apache/arrow-datafusion/pull/1953) ([xudong963](https://github.com/xudong963)) +- Allow different types of query variables \(`@@var`\) rather than just string [\#1943](https://github.com/apache/arrow-datafusion/pull/1943) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([maxburke](https://github.com/maxburke)) +- Pruning serialization [\#1941](https://github.com/apache/arrow-datafusion/pull/1941) ([thinkharderdev](https://github.com/thinkharderdev)) +- Fix select from EmptyExec always return 0 row after optimizer passes [\#1938](https://github.com/apache/arrow-datafusion/pull/1938) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Introduce Ballista query stage scheduler [\#1935](https://github.com/apache/arrow-datafusion/pull/1935) ([yahoNanJing](https://github.com/yahoNanJing)) +- Add db benchmark script [\#1928](https://github.com/apache/arrow-datafusion/pull/1928) ([matthewmturner](https://github.com/matthewmturner)) +- fix a typo [\#1919](https://github.com/apache/arrow-datafusion/pull/1919) ([vchag](https://github.com/vchag)) +- \[MINOR\] Update copyright year in Docs [\#1918](https://github.com/apache/arrow-datafusion/pull/1918) ([alamb](https://github.com/alamb)) +- add metadata to DFSchema, close \#1806. [\#1914](https://github.com/apache/arrow-datafusion/pull/1914) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jiacai2050](https://github.com/jiacai2050)) +- Refactor scheduler state mod [\#1913](https://github.com/apache/arrow-datafusion/pull/1913) ([yahoNanJing](https://github.com/yahoNanJing)) +- Refactor the event channel [\#1912](https://github.com/apache/arrow-datafusion/pull/1912) ([yahoNanJing](https://github.com/yahoNanJing)) +- Refactor scheduler server [\#1911](https://github.com/apache/arrow-datafusion/pull/1911) ([yahoNanJing](https://github.com/yahoNanJing)) +- Clippy fix on nightly [\#1907](https://github.com/apache/arrow-datafusion/pull/1907) ([yjshen](https://github.com/yjshen)) +- Updated Rust version to 1.59 in all the files [\#1903](https://github.com/apache/arrow-datafusion/pull/1903) ([NaincyKumariKnoldus](https://github.com/NaincyKumariKnoldus)) +- Remove uneeded Mutex in Ballista Client [\#1898](https://github.com/apache/arrow-datafusion/pull/1898) ([alamb](https://github.com/alamb)) +- Create a `datafusion-proto` crate for datafusion protobuf serialization [\#1887](https://github.com/apache/arrow-datafusion/pull/1887) ([carols10cents](https://github.com/carols10cents)) +- Fix clippy lints [\#1885](https://github.com/apache/arrow-datafusion/pull/1885) ([HaoYang670](https://github.com/HaoYang670)) +- Separate cpu-bound \(query-execution\) and IO-bound\(heartbeat\) to … [\#1883](https://github.com/apache/arrow-datafusion/pull/1883) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- \[Minor\] Clean up DecimalArray API Usage [\#1869](https://github.com/apache/arrow-datafusion/pull/1869) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Changes after went through "Datafusion as a library section" [\#1868](https://github.com/apache/arrow-datafusion/pull/1868) ([nonontb](https://github.com/nonontb)) +- Remove allow unused imports from ballista-core, then fix all warnings [\#1853](https://github.com/apache/arrow-datafusion/pull/1853) ([carols10cents](https://github.com/carols10cents)) +- Update to arrow 9.1.0 [\#1851](https://github.com/apache/arrow-datafusion/pull/1851) ([alamb](https://github.com/alamb)) +- move some tests out of context and into sql [\#1846](https://github.com/apache/arrow-datafusion/pull/1846) ([alamb](https://github.com/alamb)) +- Fix compiling ballista in standalone mode, add build to CI [\#1839](https://github.com/apache/arrow-datafusion/pull/1839) ([alamb](https://github.com/alamb)) +- Update documentation example for change in API [\#1812](https://github.com/apache/arrow-datafusion/pull/1812) ([alamb](https://github.com/alamb)) +- Refactor scheduler state with different management policy for volatile and stable states [\#1810](https://github.com/apache/arrow-datafusion/pull/1810) ([yahoNanJing](https://github.com/yahoNanJing)) +- DataFusion + Conbench Integration [\#1791](https://github.com/apache/arrow-datafusion/pull/1791) ([dianaclarke](https://github.com/dianaclarke)) +- Enable periodic cleanup of work\_dir directories in ballista executor [\#1783](https://github.com/apache/arrow-datafusion/pull/1783) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Use`eq_dyn`, `neq_dyn`, `lt_dyn`, `lt_eq_dyn`, `gt_dyn`, `gt_eq_dyn` kernels from arrow [\#1475](https://github.com/apache/arrow-datafusion/pull/1475) ([alamb](https://github.com/alamb)) + +## [7.1.0-rc1](https://github.com/apache/arrow-datafusion/tree/7.1.0-rc1) (2022-04-10) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/7.0.0-rc2...7.1.0-rc1) + +**Implemented enhancements:** + +- Support substring with three arguments: \(str, from, for\) for DataFrame API and Ballista [\#2092](https://github.com/apache/arrow-datafusion/issues/2092) +- UnionAll support for Ballista [\#2032](https://github.com/apache/arrow-datafusion/issues/2032) +- Separate cpu-bound and IO-bound work in ballista-executor by using diff tokio runtime. [\#1770](https://github.com/apache/arrow-datafusion/issues/1770) +- \[Ballista\] Introduce DAGScheduler for better managing the stage-based task scheduling [\#1704](https://github.com/apache/arrow-datafusion/issues/1704) +- \[Ballista\] Support to better manage cluster state, like alive executors, executor available task slots, etc [\#1703](https://github.com/apache/arrow-datafusion/issues/1703) + +**Closed issues:** + +- Optimize memory usage pattern to avoid "double memory" behavior [\#2149](https://github.com/apache/arrow-datafusion/issues/2149) +- Document approx\_percentile\_cont\_with\_weight in users guide [\#2078](https://github.com/apache/arrow-datafusion/issues/2078) +- \[follow up\]cleaning up statements.remove\(0\) [\#1986](https://github.com/apache/arrow-datafusion/issues/1986) +- Formatting error on documentation for Python [\#1873](https://github.com/apache/arrow-datafusion/issues/1873) +- Remove duplicate tests from `test_const_evaluator_scalar_functions` [\#1727](https://github.com/apache/arrow-datafusion/issues/1727) +- Question: Is the Ballista project providing value to the overall DataFusion project? [\#1273](https://github.com/apache/arrow-datafusion/issues/1273) + +## [7.0.0-rc2](https://github.com/apache/arrow-datafusion/tree/7.0.0-rc2) (2022-02-14) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/7.0.0...7.0.0-rc2) + +## [7.0.0](https://github.com/apache/arrow-datafusion/tree/7.0.0) (2022-02-14) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/6.0.0-rc0...7.0.0) + +**Breaking changes:** + +- Update `ExecutionPlan` to know about sortedness and repartitioning optimizer pass respect the invariants [\#1776](https://github.com/apache/arrow-datafusion/pull/1776) ([alamb](https://github.com/alamb)) +- Update to `arrow 8.0.0` [\#1673](https://github.com/apache/arrow-datafusion/pull/1673) ([alamb](https://github.com/alamb)) + +**Implemented enhancements:** + +- Task assignment between Scheduler and Executors [\#1221](https://github.com/apache/arrow-datafusion/issues/1221) +- Add `approx_median()` aggregate function [\#1729](https://github.com/apache/arrow-datafusion/pull/1729) ([realno](https://github.com/realno)) +- \[Ballista\] Add Decimal128, Date64, TimestampSecond, TimestampMillisecond, Interv… [\#1659](https://github.com/apache/arrow-datafusion/pull/1659) ([gaojun2048](https://github.com/gaojun2048)) +- Add `corr` aggregate function [\#1561](https://github.com/apache/arrow-datafusion/pull/1561) ([realno](https://github.com/realno)) +- Add `covar`, `covar_pop` and `covar_samp` aggregate functions [\#1551](https://github.com/apache/arrow-datafusion/pull/1551) ([realno](https://github.com/realno)) +- Add `approx_quantile()` aggregation function [\#1539](https://github.com/apache/arrow-datafusion/pull/1539) ([domodwyer](https://github.com/domodwyer)) +- Initial MemoryManager and DiskManager APIs for query execution + External Sort implementation [\#1526](https://github.com/apache/arrow-datafusion/pull/1526) ([yjshen](https://github.com/yjshen)) +- Add `stddev` and `variance` [\#1525](https://github.com/apache/arrow-datafusion/pull/1525) ([realno](https://github.com/realno)) +- Add `rem` operation for Expr [\#1467](https://github.com/apache/arrow-datafusion/pull/1467) ([liukun4515](https://github.com/liukun4515)) +- Implement `array_agg` aggregate function [\#1300](https://github.com/apache/arrow-datafusion/pull/1300) ([viirya](https://github.com/viirya)) + +**Fixed bugs:** + +- Ballista context::tests::test\_standalone\_mode test fails [\#1020](https://github.com/apache/arrow-datafusion/issues/1020) +- \[Ballista\] Fix scheduler state mod bug [\#1655](https://github.com/apache/arrow-datafusion/pull/1655) ([gaojun2048](https://github.com/gaojun2048)) +- Pass local address host so we do not get mismatch between IPv4 and IP… [\#1466](https://github.com/apache/arrow-datafusion/pull/1466) ([thinkharderdev](https://github.com/thinkharderdev)) +- Add Timezone to Scalar::Time\* types, and better timezone awareness to Datafusion's time types [\#1455](https://github.com/apache/arrow-datafusion/pull/1455) ([maxburke](https://github.com/maxburke)) + +**Documentation updates:** + +- Add dependencies to ballista example documentation [\#1346](https://github.com/apache/arrow-datafusion/pull/1346) ([jgoday](https://github.com/jgoday)) +- \[MINOR\] Fix some typos. [\#1310](https://github.com/apache/arrow-datafusion/pull/1310) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- fix some clippy warnings from nightly channel [\#1277](https://github.com/apache/arrow-datafusion/pull/1277) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jimexist](https://github.com/Jimexist)) + +**Performance improvements:** + +- Introduce push-based task scheduling for Ballista [\#1560](https://github.com/apache/arrow-datafusion/pull/1560) ([yahoNanJing](https://github.com/yahoNanJing)) + +**Closed issues:** + +- Track memory usage in Non Limited Operators [\#1569](https://github.com/apache/arrow-datafusion/issues/1569) +- \[Question\] Why does ballista store tables in the client instead of in the SchedulerServer [\#1473](https://github.com/apache/arrow-datafusion/issues/1473) +- Why use the expr types before coercion to get the result type? [\#1358](https://github.com/apache/arrow-datafusion/issues/1358) +- A problem about the projection\_push\_down optimizer gathers valid columns [\#1312](https://github.com/apache/arrow-datafusion/issues/1312) +- apply constant folding to `LogicalPlan::Values` [\#1170](https://github.com/apache/arrow-datafusion/issues/1170) +- reduce usage of `IntoIterator` in logical plan builder window fn [\#372](https://github.com/apache/arrow-datafusion/issues/372) + +**Merged pull requests:** + +- Fix verification scripts for 7.0.0 release [\#1830](https://github.com/apache/arrow-datafusion/pull/1830) ([alamb](https://github.com/alamb)) +- update README for ballista [\#1817](https://github.com/apache/arrow-datafusion/pull/1817) ([liukun4515](https://github.com/liukun4515)) +- Fix logical conflict [\#1801](https://github.com/apache/arrow-datafusion/pull/1801) ([alamb](https://github.com/alamb)) +- Improve the error message and UX of tpch benchmark program [\#1800](https://github.com/apache/arrow-datafusion/pull/1800) ([alamb](https://github.com/alamb)) +- Update to sqlparser 0.14 [\#1796](https://github.com/apache/arrow-datafusion/pull/1796) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Update datafusion versions [\#1793](https://github.com/apache/arrow-datafusion/pull/1793) ([matthewmturner](https://github.com/matthewmturner)) +- Update datafusion to use arrow 9.0.0 [\#1775](https://github.com/apache/arrow-datafusion/pull/1775) ([alamb](https://github.com/alamb)) +- Update parking\_lot requirement from 0.11 to 0.12 [\#1735](https://github.com/apache/arrow-datafusion/pull/1735) ([dependabot[bot]](https://github.com/apps/dependabot)) +- substitute `parking_lot::Mutex` for `std::sync::Mutex` [\#1720](https://github.com/apache/arrow-datafusion/pull/1720) ([xudong963](https://github.com/xudong963)) +- Create ListingTableConfig which includes file format and schema inference [\#1715](https://github.com/apache/arrow-datafusion/pull/1715) ([matthewmturner](https://github.com/matthewmturner)) +- Support `create_physical_expr` and `ExecutionContextState` or `DefaultPhysicalPlanner` for faster speed [\#1700](https://github.com/apache/arrow-datafusion/pull/1700) ([alamb](https://github.com/alamb)) +- Use NamedTempFile rather than `String` in DiskManager [\#1680](https://github.com/apache/arrow-datafusion/pull/1680) ([alamb](https://github.com/alamb)) +- Abstract over logical and physical plan representations in Ballista [\#1677](https://github.com/apache/arrow-datafusion/pull/1677) ([thinkharderdev](https://github.com/thinkharderdev)) +- upgrade clap to version 3 [\#1672](https://github.com/apache/arrow-datafusion/pull/1672) ([Jimexist](https://github.com/Jimexist)) +- Improve configuration and resource use of `MemoryManager` and `DiskManager` [\#1668](https://github.com/apache/arrow-datafusion/pull/1668) ([alamb](https://github.com/alamb)) +- Make `MemoryManager` and `MemoryStream` public [\#1664](https://github.com/apache/arrow-datafusion/pull/1664) ([yjshen](https://github.com/yjshen)) +- Consolidate Schema and RecordBatch projection [\#1638](https://github.com/apache/arrow-datafusion/pull/1638) ([alamb](https://github.com/alamb)) +- Update hashbrown requirement from 0.11 to 0.12 [\#1631](https://github.com/apache/arrow-datafusion/pull/1631) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Update etcd-client requirement from 0.7 to 0.8 [\#1626](https://github.com/apache/arrow-datafusion/pull/1626) ([dependabot[bot]](https://github.com/apps/dependabot)) +- update nightly version [\#1597](https://github.com/apache/arrow-datafusion/pull/1597) ([Jimexist](https://github.com/Jimexist)) +- Add support show tables and show columns for ballista [\#1593](https://github.com/apache/arrow-datafusion/pull/1593) ([gaojun2048](https://github.com/gaojun2048)) +- minor: improve the benchmark readme [\#1567](https://github.com/apache/arrow-datafusion/pull/1567) ([xudong963](https://github.com/xudong963)) +- Consolidate `batch_size` configuration in `ExecutionConfig`, `RuntimeConfig` and `PhysicalPlanConfig` [\#1562](https://github.com/apache/arrow-datafusion/pull/1562) ([yjshen](https://github.com/yjshen)) +- Update to rust 1.58 [\#1557](https://github.com/apache/arrow-datafusion/pull/1557) ([xudong963](https://github.com/xudong963)) +- support mathematics operation for decimal data type [\#1554](https://github.com/apache/arrow-datafusion/pull/1554) ([liukun4515](https://github.com/liukun4515)) +- Make call SchedulerServer::new once in ballista-scheduler process [\#1537](https://github.com/apache/arrow-datafusion/pull/1537) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Add load test command in tpch.rs. [\#1530](https://github.com/apache/arrow-datafusion/pull/1530) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Remove one copy of ballista datatype serialization code [\#1524](https://github.com/apache/arrow-datafusion/pull/1524) ([alamb](https://github.com/alamb)) +- Update to arrow-7.0.0 [\#1523](https://github.com/apache/arrow-datafusion/pull/1523) ([alamb](https://github.com/alamb)) +- Workaround build failure: Pin quote to 1.0.10 [\#1499](https://github.com/apache/arrow-datafusion/pull/1499) ([alamb](https://github.com/alamb)) +- add rfcs for datafusion [\#1490](https://github.com/apache/arrow-datafusion/pull/1490) ([xudong963](https://github.com/xudong963)) +- support comparison for decimal data type and refactor the binary coercion rule [\#1483](https://github.com/apache/arrow-datafusion/pull/1483) ([liukun4515](https://github.com/liukun4515)) +- Update arrow-rs to 6.4.0 and replace boolean comparison in datafusion with arrow compute kernel [\#1446](https://github.com/apache/arrow-datafusion/pull/1446) ([xudong963](https://github.com/xudong963)) +- support cast/try\_cast for decimal: signed numeric to decimal [\#1442](https://github.com/apache/arrow-datafusion/pull/1442) ([liukun4515](https://github.com/liukun4515)) +- use 0.13 sql parser [\#1435](https://github.com/apache/arrow-datafusion/pull/1435) ([Jimexist](https://github.com/Jimexist)) +- Clarify communication on bi-weekly sync [\#1427](https://github.com/apache/arrow-datafusion/pull/1427) ([alamb](https://github.com/alamb)) +- Minimize features [\#1399](https://github.com/apache/arrow-datafusion/pull/1399) ([carols10cents](https://github.com/carols10cents)) +- Update rust vesion to 1.57 [\#1395](https://github.com/apache/arrow-datafusion/pull/1395) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Add coercion rules for AggregateFunctions [\#1387](https://github.com/apache/arrow-datafusion/pull/1387) ([liukun4515](https://github.com/liukun4515)) +- upgrade the arrow-rs version [\#1385](https://github.com/apache/arrow-datafusion/pull/1385) ([liukun4515](https://github.com/liukun4515)) +- Extract logical plan: rename the plan name \(follow up\) [\#1354](https://github.com/apache/arrow-datafusion/pull/1354) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) +- upgrade arrow-rs to 6.2.0 [\#1334](https://github.com/apache/arrow-datafusion/pull/1334) ([liukun4515](https://github.com/liukun4515)) +- Update release instructions [\#1331](https://github.com/apache/arrow-datafusion/pull/1331) ([alamb](https://github.com/alamb)) +- Extract Aggregate, Sort, and Join to struct from AggregatePlan [\#1326](https://github.com/apache/arrow-datafusion/pull/1326) ([matthewmturner](https://github.com/matthewmturner)) +- Extract `EmptyRelation`, `Limit`, `Values` from `LogicalPlan` [\#1325](https://github.com/apache/arrow-datafusion/pull/1325) ([liukun4515](https://github.com/liukun4515)) +- Extract CrossJoin, Repartition, Union in LogicalPlan [\#1322](https://github.com/apache/arrow-datafusion/pull/1322) ([liukun4515](https://github.com/liukun4515)) +- Extract Explain, Analyze, Extension in LogicalPlan as independent struct [\#1317](https://github.com/apache/arrow-datafusion/pull/1317) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Extract CreateMemoryTable, DropTable, CreateExternalTable in LogicalPlan as independent struct [\#1311](https://github.com/apache/arrow-datafusion/pull/1311) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) +- Extract Projection, Filter, Window in LogicalPlan as independent struct [\#1309](https://github.com/apache/arrow-datafusion/pull/1309) ([ic4y](https://github.com/ic4y)) +- Add PSQL comparison tests for except, intersect [\#1292](https://github.com/apache/arrow-datafusion/pull/1292) ([mrob95](https://github.com/mrob95)) +- Extract logical plans in LogicalPlan as independent struct: TableScan [\#1290](https://github.com/apache/arrow-datafusion/pull/1290) ([xudong963](https://github.com/xudong963)) + +## [6.0.0-rc0](https://github.com/apache/arrow-datafusion/tree/6.0.0-rc0) (2021-11-14) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/6.0.0...6.0.0-rc0) + +## [6.0.0](https://github.com/apache/arrow-datafusion/tree/6.0.0) (2021-11-14) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/ballista-0.6.0...6.0.0) + + ## [ballista-0.6.0](https://github.com/apache/arrow-datafusion/tree/ballista-0.6.0) (2021-11-13) [Full Changelog](https://github.com/apache/arrow-datafusion/compare/ballista-0.5.0...ballista-0.6.0) diff --git a/ballista/rust/client/Cargo.toml b/ballista/rust/client/Cargo.toml index e9805a7b0f4e..828044450b8e 100644 --- a/ballista/rust/client/Cargo.toml +++ b/ballista/rust/client/Cargo.toml @@ -19,7 +19,7 @@ name = "ballista" description = "Ballista Distributed Compute" license = "Apache-2.0" -version = "0.6.0" +version = "0.7.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -28,11 +28,11 @@ edition = "2021" rust-version = "1.59" [dependencies] -ballista-core = { path = "../core", version = "0.6.0" } -ballista-executor = { path = "../executor", version = "0.6.0", optional = true } -ballista-scheduler = { path = "../scheduler", version = "0.6.0", optional = true } +ballista-core = { path = "../core", version = "0.7.0" } +ballista-executor = { path = "../executor", version = "0.7.0", optional = true } +ballista-scheduler = { path = "../scheduler", version = "0.7.0", optional = true } -datafusion = { path = "../../../datafusion/core", version = "7.0.0" } +datafusion = { path = "../../../datafusion/core", version = "8.0.0" } futures = "0.3" log = "0.4" parking_lot = "0.12" diff --git a/ballista/rust/core/Cargo.toml b/ballista/rust/core/Cargo.toml index 05a6f1922c73..db2b60ca4182 100644 --- a/ballista/rust/core/Cargo.toml +++ b/ballista/rust/core/Cargo.toml @@ -19,7 +19,7 @@ name = "ballista-core" description = "Ballista Distributed Compute" license = "Apache-2.0" -version = "0.6.0" +version = "0.7.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -39,8 +39,8 @@ arrow-flight = { version = "13" } async-trait = "0.1.41" chrono = { version = "0.4", default-features = false } clap = { version = "3", features = ["derive", "cargo"] } -datafusion = { path = "../../../datafusion/core", version = "7.0.0" } -datafusion-proto = { path = "../../../datafusion/proto", version = "7.0.0" } +datafusion = { path = "../../../datafusion/core", version = "8.0.0" } +datafusion-proto = { path = "../../../datafusion/proto", version = "8.0.0" } futures = "0.3" hashbrown = "0.12" diff --git a/ballista/rust/executor/Cargo.toml b/ballista/rust/executor/Cargo.toml index 98e87335d11e..3a2b45c5c20f 100644 --- a/ballista/rust/executor/Cargo.toml +++ b/ballista/rust/executor/Cargo.toml @@ -19,7 +19,7 @@ name = "ballista-executor" description = "Ballista Distributed Compute - Executor" license = "Apache-2.0" -version = "0.6.0" +version = "0.7.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -37,10 +37,10 @@ anyhow = "1" arrow = { version = "13" } arrow-flight = { version = "13" } async-trait = "0.1.41" -ballista-core = { path = "../core", version = "0.6.0" } +ballista-core = { path = "../core", version = "0.7.0" } chrono = { version = "0.4", default-features = false } configure_me = "0.4.0" -datafusion = { path = "../../../datafusion/core", version = "7.0.0" } +datafusion = { path = "../../../datafusion/core", version = "8.0.0" } env_logger = "0.9" futures = "0.3" hyper = "0.14.4" diff --git a/ballista/rust/scheduler/Cargo.toml b/ballista/rust/scheduler/Cargo.toml index 07ed56f688d1..50509f1eea8d 100644 --- a/ballista/rust/scheduler/Cargo.toml +++ b/ballista/rust/scheduler/Cargo.toml @@ -19,7 +19,7 @@ name = "ballista-scheduler" description = "Ballista Distributed Compute - Scheduler" license = "Apache-2.0" -version = "0.6.0" +version = "0.7.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -38,10 +38,10 @@ sled = ["sled_package", "tokio-stream"] anyhow = "1" async-recursion = "1.0.0" async-trait = "0.1.41" -ballista-core = { path = "../core", version = "0.6.0" } +ballista-core = { path = "../core", version = "0.7.0" } clap = { version = "3", features = ["derive", "cargo"] } configure_me = "0.4.0" -datafusion = { path = "../../../datafusion/core", version = "7.0.0" } +datafusion = { path = "../../../datafusion/core", version = "8.0.0" } env_logger = "0.9" etcd-client = { version = "0.9", optional = true } futures = "0.3" @@ -62,7 +62,7 @@ tower = { version = "0.4" } warp = "0.3" [dev-dependencies] -ballista-core = { path = "../core", version = "0.6.0" } +ballista-core = { path = "../core", version = "0.7.0" } uuid = { version = "1.0", features = ["v4"] } [build-dependencies] diff --git a/benchmarks/docker-compose.yaml b/benchmarks/docker-compose.yaml index 437b461a0f10..1aa8da505cc6 100644 --- a/benchmarks/docker-compose.yaml +++ b/benchmarks/docker-compose.yaml @@ -20,7 +20,7 @@ services: image: quay.io/coreos/etcd:v3.4.9 command: "etcd -advertise-client-urls http://etcd:2379 -listen-client-urls http://0.0.0.0:2379" ballista-scheduler: - image: ballista:0.6.0 + image: ballista:0.7.0 command: "/scheduler --config-backend etcd --etcd-urls etcd:2379 --bind-host 0.0.0.0 --bind-port 50050" environment: - RUST_LOG=ballista=debug @@ -29,7 +29,7 @@ services: depends_on: - etcd ballista-executor: - image: ballista:0.6.0 + image: ballista:0.7.0 command: "/executor --bind-host 0.0.0.0 --bind-port 50051 --scheduler-host ballista-scheduler" scale: 2 environment: @@ -39,7 +39,7 @@ services: depends_on: - ballista-scheduler ballista-client: - image: ballista:0.6.0 + image: ballista:0.7.0 command: "/bin/sh" # do nothing environment: - RUST_LOG=info diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 08dc44fe7c2f..82fadcd0d072 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-cli" description = "Command Line Client for DataFusion query engine." -version = "7.0.0" +version = "8.0.0" authors = ["Apache Arrow "] edition = "2021" keywords = [ "arrow", "datafusion", "ballista", "query", "sql" ] @@ -31,7 +31,7 @@ readme = "README.md" [dependencies] arrow = { version = "13" } clap = { version = "3", features = ["derive", "cargo"] } -datafusion = { path = "../datafusion/core", version = "7.0.0" } +datafusion = { path = "../datafusion/core", version = "8.0.0" } dirs = "4.0.0" env_logger = "0.9" mimalloc = { version = "*", default-features = false } diff --git a/datafusion/CHANGELOG.md b/datafusion/CHANGELOG.md index ad3d09c6a839..abc322e23413 100644 --- a/datafusion/CHANGELOG.md +++ b/datafusion/CHANGELOG.md @@ -19,6 +19,316 @@ # Changelog +## [8.0.0](https://github.com/apache/arrow-datafusion/tree/8.0.0) (2022-05-12) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/7.1.0-rc1...8.0.0) + +**Breaking changes:** + +- Add SQL planner support for `ROLLUP` and `CUBE` grouping set expressions [\#2446](https://github.com/apache/arrow-datafusion/pull/2446) ([andygrove](https://github.com/andygrove)) +- Make `ExecutionPlan::execute` Sync [\#2434](https://github.com/apache/arrow-datafusion/pull/2434) ([tustvold](https://github.com/tustvold)) +- Introduce new `DataFusionError::SchemaError` type [\#2371](https://github.com/apache/arrow-datafusion/pull/2371) ([andygrove](https://github.com/andygrove)) +- Add `Expr::InSubquery` and `Expr::ScalarSubquery` [\#2342](https://github.com/apache/arrow-datafusion/pull/2342) ([andygrove](https://github.com/andygrove)) +- Add `Expr::Exists` to represent EXISTS subquery expression [\#2339](https://github.com/apache/arrow-datafusion/pull/2339) ([andygrove](https://github.com/andygrove)) +- Move `LogicalPlan` enum to `datafusion-expr` crate [\#2294](https://github.com/apache/arrow-datafusion/pull/2294) ([andygrove](https://github.com/andygrove)) +- Remove dependency from `LogicalPlan::TableScan` to `ExecutionPlan` [\#2284](https://github.com/apache/arrow-datafusion/pull/2284) ([andygrove](https://github.com/andygrove)) +- Move logical expression type-coercion code from `physical-expr` crate to `expr` crate [\#2257](https://github.com/apache/arrow-datafusion/pull/2257) ([andygrove](https://github.com/andygrove)) +- feat: 2061 create external table ddl table partition cols [\#2099](https://github.com/apache/arrow-datafusion/pull/2099) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jychen7](https://github.com/jychen7)) +- Reorganize the project folders [\#2081](https://github.com/apache/arrow-datafusion/pull/2081) ([yahoNanJing](https://github.com/yahoNanJing)) +- Support more ScalarFunction in Ballista [\#2008](https://github.com/apache/arrow-datafusion/pull/2008) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Merge dataframe and dataframe imp [\#1998](https://github.com/apache/arrow-datafusion/pull/1998) ([vchag](https://github.com/vchag)) +- Rename `ExecutionContext` to `SessionContext`, `ExecutionContextState` to `SessionState`, add `TaskContext` to support multi-tenancy configurations - Part 1 [\#1987](https://github.com/apache/arrow-datafusion/pull/1987) ([mingmwang](https://github.com/mingmwang)) +- Add Coalesce function [\#1969](https://github.com/apache/arrow-datafusion/pull/1969) ([msathis](https://github.com/msathis)) +- Add Create Schema functionality in SQL [\#1959](https://github.com/apache/arrow-datafusion/pull/1959) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([matthewmturner](https://github.com/matthewmturner)) +- omit some clone when converting sql to logical plan [\#1945](https://github.com/apache/arrow-datafusion/pull/1945) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([doki23](https://github.com/doki23)) +- \[split/16\] move physical plan expressions folder to datafusion-physical-expr crate [\#1889](https://github.com/apache/arrow-datafusion/pull/1889) ([Jimexist](https://github.com/Jimexist)) +- remove sync constraint of SendableRecordBatchStream [\#1884](https://github.com/apache/arrow-datafusion/pull/1884) ([doki23](https://github.com/doki23)) +- \[split/15\] move built in window expr and partition evaluator [\#1865](https://github.com/apache/arrow-datafusion/pull/1865) ([Jimexist](https://github.com/Jimexist)) + +**Implemented enhancements:** + +- Include `Expr` to `datafusion::prelude` [\#2347](https://github.com/apache/arrow-datafusion/issues/2347) +- Implement `Serialization` API for DataFusion [\#2340](https://github.com/apache/arrow-datafusion/issues/2340) +- Implement `power` function [\#1493](https://github.com/apache/arrow-datafusion/issues/1493) +- allow `lit` python function to support `boolean` and other types [\#1136](https://github.com/apache/arrow-datafusion/issues/1136) +- Automate dependency updates [\#37](https://github.com/apache/arrow-datafusion/issues/37) +- Add `CREATE VIEW` [\#2279](https://github.com/apache/arrow-datafusion/pull/2279) ([matthewmturner](https://github.com/matthewmturner)) +- \[Ballista\] Support Union in ballista. [\#2098](https://github.com/apache/arrow-datafusion/pull/2098) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Change the DataFusion explain plans to make it clearer in the predicate/filter [\#2063](https://github.com/apache/arrow-datafusion/pull/2063) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Add `write_json`, `read_json`, `register_json`, and `JsonFormat` to `CREATE EXTERNAL TABLE` functionality [\#2023](https://github.com/apache/arrow-datafusion/pull/2023) ([matthewmturner](https://github.com/matthewmturner)) +- Qualified wildcard [\#2012](https://github.com/apache/arrow-datafusion/pull/2012) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([doki23](https://github.com/doki23)) +- support bitwise or/'|' operation [\#1876](https://github.com/apache/arrow-datafusion/pull/1876) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) +- Introduce JIT code generation [\#1849](https://github.com/apache/arrow-datafusion/pull/1849) ([yjshen](https://github.com/yjshen)) + +**Fixed bugs:** + +- CASE expr with NULL literals panics `'WHEN expression did not return a BooleanArray'` [\#1189](https://github.com/apache/arrow-datafusion/issues/1189) +- Function calls with NULL literals do not work [\#1188](https://github.com/apache/arrow-datafusion/issues/1188) +- Add SQL planner support for calling `round` function with two arguments [\#2503](https://github.com/apache/arrow-datafusion/pull/2503) ([andygrove](https://github.com/andygrove)) +- nested query fix [\#2402](https://github.com/apache/arrow-datafusion/pull/2402) ([comphead](https://github.com/comphead)) +- fix issue\#2058 file\_format/json.rs attempt to subtract with overflow [\#2066](https://github.com/apache/arrow-datafusion/pull/2066) ([silence-coding](https://github.com/silence-coding)) +- fix bug the optimizer rule filter push down [\#2039](https://github.com/apache/arrow-datafusion/pull/2039) ([jackwener](https://github.com/jackwener)) +- fix: replace `ExecutionContex` and `ExecutionConfig` with `SessionContext` and `SessionConfig` [\#2030](https://github.com/apache/arrow-datafusion/pull/2030) ([xudong963](https://github.com/xudong963)) +- Fixed parquet path partitioning when only selecting partitioned columns [\#2000](https://github.com/apache/arrow-datafusion/pull/2000) ([pjmore](https://github.com/pjmore)) +- Fix ambiguous reference error in filter plan [\#1925](https://github.com/apache/arrow-datafusion/pull/1925) ([jonmmease](https://github.com/jonmmease)) +- platform aware partition parsing [\#1867](https://github.com/apache/arrow-datafusion/pull/1867) ([korowa](https://github.com/korowa)) +- Fix incorrect aggregation in case that GROUP BY contains duplicate column names [\#1855](https://github.com/apache/arrow-datafusion/pull/1855) ([alex-natzka](https://github.com/alex-natzka)) + +**Documentation updates:** + +- MINOR: Make crate READMEs consistent [\#2437](https://github.com/apache/arrow-datafusion/pull/2437) ([andygrove](https://github.com/andygrove)) +- minor: Improve documentation for DFSchema join and merge functions [\#2367](https://github.com/apache/arrow-datafusion/pull/2367) ([andygrove](https://github.com/andygrove)) +- Change the code location and add annotation [\#2037](https://github.com/apache/arrow-datafusion/pull/2037) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- Fix typos \(Datafusion -\> DataFusion\) [\#1993](https://github.com/apache/arrow-datafusion/pull/1993) ([andygrove](https://github.com/andygrove)) +- Add examples to use MemTable and TableProvider \(\#1864\) [\#1946](https://github.com/apache/arrow-datafusion/pull/1946) ([PierreZ](https://github.com/PierreZ)) +- Add doc for building `datafusion-cli` when connect the ballista [\#1866](https://github.com/apache/arrow-datafusion/pull/1866) ([liukun4515](https://github.com/liukun4515)) +- Add benchmarks section to DEVELOPERS.md [\#1838](https://github.com/apache/arrow-datafusion/pull/1838) ([tustvold](https://github.com/tustvold)) + +**Performance improvements:** + +- Avoid an Arc::clone per row in benchmark [\#1975](https://github.com/apache/arrow-datafusion/pull/1975) ([jhorstmann](https://github.com/jhorstmann)) +- Update datafusion-cli allocator [\#1878](https://github.com/apache/arrow-datafusion/pull/1878) ([matthewmturner](https://github.com/matthewmturner)) + +**Closed issues:** + +- Make expected result string in unit tests more readable [\#2412](https://github.com/apache/arrow-datafusion/issues/2412) +- remove duplicated `fn aggregate()` in aggregate expression tests [\#2399](https://github.com/apache/arrow-datafusion/issues/2399) +- split `distinct_expression.rs` into `count_distinct.rs` and `array_agg_distinct.rs` [\#2385](https://github.com/apache/arrow-datafusion/issues/2385) +- move sql tests in `context.rs` to corresponding test files in `datafustion/core/tests/sql` [\#2328](https://github.com/apache/arrow-datafusion/issues/2328) +- Date32/Date64 as join keys for merge join [\#2314](https://github.com/apache/arrow-datafusion/issues/2314) +- Error precision and scale for decimal coercion in logic comparison [\#2232](https://github.com/apache/arrow-datafusion/issues/2232) +- Support Multiple row layout [\#2188](https://github.com/apache/arrow-datafusion/issues/2188) +- TPC-H Query 18 [\#169](https://github.com/apache/arrow-datafusion/issues/169) +- TPC-H Query 16 [\#167](https://github.com/apache/arrow-datafusion/issues/167) +- Implement Sort-Merge Join [\#141](https://github.com/apache/arrow-datafusion/issues/141) +- Split logical expressions out into separate source files [\#114](https://github.com/apache/arrow-datafusion/issues/114) + +**Merged pull requests:** + +- Minor: remove code that is now included in arrow-rs [\#2511](https://github.com/apache/arrow-datafusion/pull/2511) ([alamb](https://github.com/alamb)) +- MINOR: Enable multi-statement benchmark queries [\#2507](https://github.com/apache/arrow-datafusion/pull/2507) ([andygrove](https://github.com/andygrove)) +- MINOR: Add ignored tests for all remaining benchmark queries [\#2506](https://github.com/apache/arrow-datafusion/pull/2506) ([andygrove](https://github.com/andygrove)) +- Update to `sqlparser` `0.17.0` [\#2500](https://github.com/apache/arrow-datafusion/pull/2500) ([alamb](https://github.com/alamb)) +- Add metrics for ParquetExec [\#2499](https://github.com/apache/arrow-datafusion/pull/2499) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Limit cpu cores used when generating changelog [\#2494](https://github.com/apache/arrow-datafusion/pull/2494) ([andygrove](https://github.com/andygrove)) +- Optimize MergeJoin by storing joined indices instead of creating small record batches for each match [\#2492](https://github.com/apache/arrow-datafusion/pull/2492) ([richox](https://github.com/richox)) +- Add SQL planner support for `grouping()` aggregate expressions [\#2486](https://github.com/apache/arrow-datafusion/pull/2486) ([andygrove](https://github.com/andygrove)) +- MINOR: Parameterize changelog script [\#2484](https://github.com/apache/arrow-datafusion/pull/2484) ([jychen7](https://github.com/jychen7)) +- Numeric, String, Boolean comparisons with literal `NULL` [\#2481](https://github.com/apache/arrow-datafusion/pull/2481) ([WinkerDu](https://github.com/WinkerDu)) +- Adds unit test cases of mathematical expressions working with `null` literal [\#2478](https://github.com/apache/arrow-datafusion/pull/2478) ([WinkerDu](https://github.com/WinkerDu)) +- Minor: Move test code from `context.rs` into `sql_integration` [\#2473](https://github.com/apache/arrow-datafusion/pull/2473) ([alamb](https://github.com/alamb)) +- Minor: Use ExprVisitor to find columns referenced by expr [\#2471](https://github.com/apache/arrow-datafusion/pull/2471) ([alamb](https://github.com/alamb)) +- minor: remove expr dependency from the row crate, update crate-deps.dot/svg [\#2470](https://github.com/apache/arrow-datafusion/pull/2470) ([yjshen](https://github.com/yjshen)) +- Fix `read_from_registered_table_with_glob_path` fails if path contains // \#2465 [\#2468](https://github.com/apache/arrow-datafusion/pull/2468) ([timvw](https://github.com/timvw)) +- Add support for list\_dir\(\) on local fs [\#2467](https://github.com/apache/arrow-datafusion/pull/2467) ([wjones127](https://github.com/wjones127)) +- MINOR: Partial fix for SQL aggregate queries with aliases [\#2464](https://github.com/apache/arrow-datafusion/pull/2464) ([andygrove](https://github.com/andygrove)) +- minor: move struct definition out of `aggregate/mod.rs`, etc [\#2458](https://github.com/apache/arrow-datafusion/pull/2458) ([WinkerDu](https://github.com/WinkerDu)) +- Fix bugs in SQL planner with GROUP BY scalar function and alias [\#2457](https://github.com/apache/arrow-datafusion/pull/2457) ([andygrove](https://github.com/andygrove)) +- feat: Support CompoundIdentifier as GetIndexedField access [\#2454](https://github.com/apache/arrow-datafusion/pull/2454) ([ovr](https://github.com/ovr)) +- Table provider error propagation [\#2438](https://github.com/apache/arrow-datafusion/pull/2438) ([jdye64](https://github.com/jdye64)) +- MINOR: Improve error messages for GROUP BY / HAVING queries [\#2435](https://github.com/apache/arrow-datafusion/pull/2435) ([andygrove](https://github.com/andygrove)) +- minor: remove redundant code [\#2432](https://github.com/apache/arrow-datafusion/pull/2432) ([jackwener](https://github.com/jackwener)) +- minor: update versions and paths in changelog scripts [\#2429](https://github.com/apache/arrow-datafusion/pull/2429) ([andygrove](https://github.com/andygrove)) +- Fix Ballista executing during plan [\#2428](https://github.com/apache/arrow-datafusion/pull/2428) ([tustvold](https://github.com/tustvold)) +- minor: format table result vec & remove some unnecessary semicolons [\#2425](https://github.com/apache/arrow-datafusion/pull/2425) ([WinkerDu](https://github.com/WinkerDu)) +- Basic support for `IN` and `NOT IN` Subqueries by rewriting them to `SEMI` / `ANTI` Join [\#2421](https://github.com/apache/arrow-datafusion/pull/2421) ([korowa](https://github.com/korowa)) +- Allow subqueries without aliases [\#2418](https://github.com/apache/arrow-datafusion/pull/2418) ([andygrove](https://github.com/andygrove)) +- Fix bug in subquery join filters referencing outer query [\#2416](https://github.com/apache/arrow-datafusion/pull/2416) ([andygrove](https://github.com/andygrove)) +- MINOR: remove duplicated function `format_state_name()` [\#2414](https://github.com/apache/arrow-datafusion/pull/2414) ([WinkerDu](https://github.com/WinkerDu)) +- Make expected result string in unit tests more readable [\#2413](https://github.com/apache/arrow-datafusion/pull/2413) ([WinkerDu](https://github.com/WinkerDu)) +- `sum(distinct)` support [\#2405](https://github.com/apache/arrow-datafusion/pull/2405) ([WinkerDu](https://github.com/WinkerDu)) +- Update ordered-float requirement from 2.10 to 3.0 [\#2403](https://github.com/apache/arrow-datafusion/pull/2403) ([dependabot[bot]](https://github.com/apps/dependabot)) +- remove duplicated `fn aggregate()` in aggregate expression tests [\#2400](https://github.com/apache/arrow-datafusion/pull/2400) ([WinkerDu](https://github.com/WinkerDu)) +- Support type-coercion from Decimal to Float64 [\#2396](https://github.com/apache/arrow-datafusion/pull/2396) ([comphead](https://github.com/comphead)) +- minor: SchemaError code cleanup and improvements [\#2391](https://github.com/apache/arrow-datafusion/pull/2391) ([andygrove](https://github.com/andygrove)) +- Support struct\_expr generate struct in sql [\#2389](https://github.com/apache/arrow-datafusion/pull/2389) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Re-organize and rename aggregates physical plan [\#2388](https://github.com/apache/arrow-datafusion/pull/2388) ([yjshen](https://github.com/yjshen)) +- refactor `distinct_expressions.rs` and split into `count_distinct.rs` and `array_agg_distinct.rs` [\#2386](https://github.com/apache/arrow-datafusion/pull/2386) ([WinkerDu](https://github.com/WinkerDu)) +- Allow CTEs to be referenced from subquery expressions [\#2384](https://github.com/apache/arrow-datafusion/pull/2384) ([andygrove](https://github.com/andygrove)) +- Upgrade to arrow 13 [\#2382](https://github.com/apache/arrow-datafusion/pull/2382) ([alamb](https://github.com/alamb)) +- Grouped Aggregate in row format [\#2375](https://github.com/apache/arrow-datafusion/pull/2375) ([yjshen](https://github.com/yjshen)) +- Fix bugs with CTE aliasing and normalize all identifiers in the SQL planner [\#2373](https://github.com/apache/arrow-datafusion/pull/2373) ([andygrove](https://github.com/andygrove)) +- Stop optimizing queries twice [\#2369](https://github.com/apache/arrow-datafusion/pull/2369) ([andygrove](https://github.com/andygrove)) +- feat: Support casting to arrays to primitive type [\#2366](https://github.com/apache/arrow-datafusion/pull/2366) ([ovr](https://github.com/ovr)) +- Add proper support for `null` literal by introducing `ScalarValue::Null` [\#2364](https://github.com/apache/arrow-datafusion/pull/2364) ([WinkerDu](https://github.com/WinkerDu)) +- minor: fix duplicate column bug in subquery support [\#2362](https://github.com/apache/arrow-datafusion/pull/2362) ([andygrove](https://github.com/andygrove)) +- Normalize subquery aliases [\#2359](https://github.com/apache/arrow-datafusion/pull/2359) ([andygrove](https://github.com/andygrove)) +- Implement physical planner support for DATE +/- INTERVAL [\#2357](https://github.com/apache/arrow-datafusion/pull/2357) ([andygrove](https://github.com/andygrove)) +- Add SQL query planner support for Scalar Subqueries [\#2354](https://github.com/apache/arrow-datafusion/pull/2354) ([andygrove](https://github.com/andygrove)) +- Add SQL query planner support for IN subqueries [\#2352](https://github.com/apache/arrow-datafusion/pull/2352) ([andygrove](https://github.com/andygrove)) +- Add `Expr` to prelude [\#2348](https://github.com/apache/arrow-datafusion/pull/2348) ([alamb](https://github.com/alamb)) +- Add SQL planner support for EXISTS subqueries [\#2344](https://github.com/apache/arrow-datafusion/pull/2344) ([andygrove](https://github.com/andygrove)) +- Add public Serialization/Deserialization API for `Expr` to/from bytes [\#2341](https://github.com/apache/arrow-datafusion/pull/2341) ([alamb](https://github.com/alamb)) +- Support for date32 and date64 in sort merge join [\#2336](https://github.com/apache/arrow-datafusion/pull/2336) ([hntd187](https://github.com/hntd187)) +- \[physical-expr\] move aggregate exprs and window exprs to their own modules [\#2335](https://github.com/apache/arrow-datafusion/pull/2335) ([yjshen](https://github.com/yjshen)) +- fix: union schema [\#2334](https://github.com/apache/arrow-datafusion/pull/2334) ([gandronchik](https://github.com/gandronchik)) +- Improve sql integration test organization [\#2333](https://github.com/apache/arrow-datafusion/pull/2333) ([alamb](https://github.com/alamb)) +- Support scalar values for func Array [\#2332](https://github.com/apache/arrow-datafusion/pull/2332) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- move sql tests from `context.rs` to corresponding test files in `tests/sql` [\#2329](https://github.com/apache/arrow-datafusion/pull/2329) ([WinkerDu](https://github.com/WinkerDu)) +- deprecate `index_of` and make `index_of_column_by_name` public [\#2320](https://github.com/apache/arrow-datafusion/pull/2320) ([jdye64](https://github.com/jdye64)) +- Fix HashJoin evaluating during plan [\#2317](https://github.com/apache/arrow-datafusion/pull/2317) ([tustvold](https://github.com/tustvold)) +- minor: remove two source files that only had re-exports [\#2313](https://github.com/apache/arrow-datafusion/pull/2313) ([andygrove](https://github.com/andygrove)) +- Don't sort batches during plan [\#2312](https://github.com/apache/arrow-datafusion/pull/2312) ([tustvold](https://github.com/tustvold)) +- Move case/when expressions to datafusion-expr crate [\#2311](https://github.com/apache/arrow-datafusion/pull/2311) ([andygrove](https://github.com/andygrove)) +- Fix CrossJoinExec evaluating during plan [\#2310](https://github.com/apache/arrow-datafusion/pull/2310) ([tustvold](https://github.com/tustvold)) +- Make SortPreservingMerge Usable Outside Tokio \(\#2201\) [\#2305](https://github.com/apache/arrow-datafusion/pull/2305) ([tustvold](https://github.com/tustvold)) +- chore: update cranelift to 0.83.0 [\#2304](https://github.com/apache/arrow-datafusion/pull/2304) ([yjshen](https://github.com/yjshen)) +- Always increment timer on record [\#2298](https://github.com/apache/arrow-datafusion/pull/2298) ([tustvold](https://github.com/tustvold)) +- Remove unnecessary env var for parquet\_sql example [\#2297](https://github.com/apache/arrow-datafusion/pull/2297) ([sergey-melnychuk](https://github.com/sergey-melnychuk)) +- Simplify sort streams [\#2296](https://github.com/apache/arrow-datafusion/pull/2296) ([tustvold](https://github.com/tustvold)) +- MINOR: beautify code with neat idents [\#2295](https://github.com/apache/arrow-datafusion/pull/2295) ([WinkerDu](https://github.com/WinkerDu)) +- Move FileType enum from sql module to logical\_plan module [\#2290](https://github.com/apache/arrow-datafusion/pull/2290) ([andygrove](https://github.com/andygrove)) +- Remove Parquet Empty Projection Workaround [\#2289](https://github.com/apache/arrow-datafusion/pull/2289) ([tustvold](https://github.com/tustvold)) +- Add BatchPartitioner \(\#2285\) [\#2287](https://github.com/apache/arrow-datafusion/pull/2287) ([tustvold](https://github.com/tustvold)) +- Make row its crate to make it accessible from physical-expr [\#2283](https://github.com/apache/arrow-datafusion/pull/2283) ([yjshen](https://github.com/yjshen)) +- Enable filter pushdown when using In\_list on parquet [\#2282](https://github.com/apache/arrow-datafusion/pull/2282) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Update uuid requirement from 0.8 to 1.0 [\#2280](https://github.com/apache/arrow-datafusion/pull/2280) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Add bytes scanned metric to ParquetExec [\#2273](https://github.com/apache/arrow-datafusion/pull/2273) ([thinkharderdev](https://github.com/thinkharderdev)) +- Fix outer join output with all-null indices on empty batch [\#2272](https://github.com/apache/arrow-datafusion/pull/2272) ([yjshen](https://github.com/yjshen)) +- Re-export DataFusion crates [\#2264](https://github.com/apache/arrow-datafusion/pull/2264) ([andygrove](https://github.com/andygrove)) +- rewrite approx\_median to approx\_percentile\_cont while planning phase [\#2262](https://github.com/apache/arrow-datafusion/pull/2262) ([korowa](https://github.com/korowa)) +- Introduce RowLayout to represent rows for different purposes [\#2261](https://github.com/apache/arrow-datafusion/pull/2261) ([yjshen](https://github.com/yjshen)) +- fix string coercion missing in Eq/NotEq operator [\#2258](https://github.com/apache/arrow-datafusion/pull/2258) ([WinkerDu](https://github.com/WinkerDu)) +- Update to Arrow 12.0.0, update tonic and prost [\#2253](https://github.com/apache/arrow-datafusion/pull/2253) ([alamb](https://github.com/alamb)) +- minor: move field\_util from `physical-expr` crate to `expr` crate [\#2250](https://github.com/apache/arrow-datafusion/pull/2250) ([andygrove](https://github.com/andygrove)) +- Move identifer case tests to `sql_integ`, add negative cases, Debug for `DataFrame` [\#2243](https://github.com/apache/arrow-datafusion/pull/2243) ([alamb](https://github.com/alamb)) +- Implement sort-merge join [\#2242](https://github.com/apache/arrow-datafusion/pull/2242) ([richox](https://github.com/richox)) +- fix: find the right wider decimal datatype for comparison operation [\#2241](https://github.com/apache/arrow-datafusion/pull/2241) ([liukun4515](https://github.com/liukun4515)) +- Fix join without constraints [\#2240](https://github.com/apache/arrow-datafusion/pull/2240) ([Dandandan](https://github.com/Dandandan)) +- Add type coercion rule for date + interval [\#2235](https://github.com/apache/arrow-datafusion/pull/2235) ([andygrove](https://github.com/andygrove)) +- support array with scalar arithmetic operation for decimal data type [\#2233](https://github.com/apache/arrow-datafusion/pull/2233) ([liukun4515](https://github.com/liukun4515)) +- chore: add `debug!` log in some execution operators [\#2231](https://github.com/apache/arrow-datafusion/pull/2231) ([NGA-TRAN](https://github.com/NGA-TRAN)) +- Introduce new optional scheduler, using Morsel-driven Parallelism + rayon \(\#2199\) [\#2226](https://github.com/apache/arrow-datafusion/pull/2226) ([tustvold](https://github.com/tustvold)) +- minor: add editor config file [\#2224](https://github.com/apache/arrow-datafusion/pull/2224) ([jackwener](https://github.com/jackwener)) +- minor: Refactor to avoid repeated code in replace\_qualifier [\#2222](https://github.com/apache/arrow-datafusion/pull/2222) ([andygrove](https://github.com/andygrove)) +- update cli readme [\#2220](https://github.com/apache/arrow-datafusion/pull/2220) ([liukun4515](https://github.com/liukun4515)) +- Use `filter` \(filter\_record\_batch\) instead of `take` to avoid using indices [\#2218](https://github.com/apache/arrow-datafusion/pull/2218) ([Dandandan](https://github.com/Dandandan)) +- Add single line description of ExecutionPlan \(\#2216\) [\#2217](https://github.com/apache/arrow-datafusion/pull/2217) ([tustvold](https://github.com/tustvold)) +- Remove tokio::spawn from HashAggregateExec \(\#2201\) [\#2215](https://github.com/apache/arrow-datafusion/pull/2215) ([tustvold](https://github.com/tustvold)) +- Remove tokio::spawn from WindowAggExec \(\#2201\) [\#2203](https://github.com/apache/arrow-datafusion/pull/2203) ([tustvold](https://github.com/tustvold)) +- Make ParquetExec usable outside of a tokio runtime \(\#2201\) [\#2202](https://github.com/apache/arrow-datafusion/pull/2202) ([tustvold](https://github.com/tustvold)) +- add sql level test for decimal data type [\#2200](https://github.com/apache/arrow-datafusion/pull/2200) ([liukun4515](https://github.com/liukun4515)) +- `case when` supports `NULL` constant [\#2197](https://github.com/apache/arrow-datafusion/pull/2197) ([WinkerDu](https://github.com/WinkerDu)) +- feat: Support simple Arrays with Literals [\#2194](https://github.com/apache/arrow-datafusion/pull/2194) ([ovr](https://github.com/ovr)) +- \[Ballista\] Enable ApproxPercentileWithWeight in Ballista and fill UT [\#2192](https://github.com/apache/arrow-datafusion/pull/2192) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- refactor: simplify `prepare_select_exprs` [\#2190](https://github.com/apache/arrow-datafusion/pull/2190) ([jackwener](https://github.com/jackwener)) +- Multiple row-layout support, part-1: Restructure code for clearness [\#2189](https://github.com/apache/arrow-datafusion/pull/2189) ([yjshen](https://github.com/yjshen)) +- make nightly clippy happy [\#2186](https://github.com/apache/arrow-datafusion/pull/2186) ([xudong963](https://github.com/xudong963)) +- \[Ballista\]Make PhysicalAggregateExprNode has repeated PhysicalExprNode [\#2184](https://github.com/apache/arrow-datafusion/pull/2184) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- MINOR: handle `NULL` in advance to avoid value copy in `string_concat` [\#2183](https://github.com/apache/arrow-datafusion/pull/2183) ([WinkerDu](https://github.com/WinkerDu)) +- fix: Sort with a lot of repetition values [\#2182](https://github.com/apache/arrow-datafusion/pull/2182) ([yjshen](https://github.com/yjshen)) +- cli: update lockfile [\#2178](https://github.com/apache/arrow-datafusion/pull/2178) ([happysalada](https://github.com/happysalada)) +- Add LogicalPlan::SubqueryAlias [\#2172](https://github.com/apache/arrow-datafusion/pull/2172) ([andygrove](https://github.com/andygrove)) +- minor: Avoid per cell evaluation in Coalesce, use zip in CaseWhen [\#2171](https://github.com/apache/arrow-datafusion/pull/2171) ([yjshen](https://github.com/yjshen)) +- Handle merged schemas in parquet pruning [\#2170](https://github.com/apache/arrow-datafusion/pull/2170) ([thinkharderdev](https://github.com/thinkharderdev)) +- Implement fast path of with\_new\_children\(\) in ExecutionPlan [\#2168](https://github.com/apache/arrow-datafusion/pull/2168) ([mingmwang](https://github.com/mingmwang)) +- enable explain for ballista [\#2163](https://github.com/apache/arrow-datafusion/pull/2163) ([doki23](https://github.com/doki23)) +- Add delimiter for create external table [\#2162](https://github.com/apache/arrow-datafusion/pull/2162) ([matthewmturner](https://github.com/matthewmturner)) +- \[MINOR\] enable `EXTRACT week` and add test \(after sqlparser update to 0.16\) [\#2157](https://github.com/apache/arrow-datafusion/pull/2157) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Optimize the evaluation of `IN` for large lists using InSet [\#2156](https://github.com/apache/arrow-datafusion/pull/2156) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Update sqlparser requirement from 0.15 to 0.16 [\#2152](https://github.com/apache/arrow-datafusion/pull/2152) ([dependabot[bot]](https://github.com/apps/dependabot)) +- fix `not(null)` with constant `null` [\#2144](https://github.com/apache/arrow-datafusion/pull/2144) ([WinkerDu](https://github.com/WinkerDu)) +- Add IF NOT EXISTS to `CREATE TABLE` and `CREATE EXTERNAL TABLE` [\#2143](https://github.com/apache/arrow-datafusion/pull/2143) ([matthewmturner](https://github.com/matthewmturner)) +- implement 'StringConcat' operator to support sql like "select 'aa' || 'b' " [\#2142](https://github.com/apache/arrow-datafusion/pull/2142) ([WinkerDu](https://github.com/WinkerDu)) +- \#2109 By default, use only 1000 rows to infer the schema [\#2139](https://github.com/apache/arrow-datafusion/pull/2139) ([jychen7](https://github.com/jychen7)) +- \[CLI\] Add show tables in ballista for datafusion-cli [\#2137](https://github.com/apache/arrow-datafusion/pull/2137) ([gaojun2048](https://github.com/gaojun2048)) +- fix: incorrect memory usage track for sort [\#2135](https://github.com/apache/arrow-datafusion/pull/2135) ([yjshen](https://github.com/yjshen)) +- Update quarterly roadmap for Q2 [\#2133](https://github.com/apache/arrow-datafusion/pull/2133) ([matthewmturner](https://github.com/matthewmturner)) +- Reduce SortExec memory usage by void constructing single huge batch [\#2132](https://github.com/apache/arrow-datafusion/pull/2132) ([yjshen](https://github.com/yjshen)) +- MINOR: fix concat\_ws corner bug [\#2128](https://github.com/apache/arrow-datafusion/pull/2128) ([WinkerDu](https://github.com/WinkerDu)) +- Minor add clarifying comment in parquet [\#2127](https://github.com/apache/arrow-datafusion/pull/2127) ([alamb](https://github.com/alamb)) +- Minor: make disk\_manager public [\#2126](https://github.com/apache/arrow-datafusion/pull/2126) ([yjshen](https://github.com/yjshen)) +- JIT-compille DataFusion expression with column name [\#2124](https://github.com/apache/arrow-datafusion/pull/2124) ([Dandandan](https://github.com/Dandandan)) +- minor: replace array\_equals in case evaluation with eq\_dyn from arrow-rs [\#2121](https://github.com/apache/arrow-datafusion/pull/2121) ([alamb](https://github.com/alamb)) +- Serialize timezone in timestamp scalar values [\#2120](https://github.com/apache/arrow-datafusion/pull/2120) ([thinkharderdev](https://github.com/thinkharderdev)) +- minor: fix some clippy warnings from nightly rust [\#2119](https://github.com/apache/arrow-datafusion/pull/2119) ([alamb](https://github.com/alamb)) +- Fix case evaluation with NULLs [\#2118](https://github.com/apache/arrow-datafusion/pull/2118) ([alamb](https://github.com/alamb)) +- issue\#1967 ignore channel close [\#2113](https://github.com/apache/arrow-datafusion/pull/2113) ([silence-coding](https://github.com/silence-coding)) +- cli: add cargo.lock [\#2112](https://github.com/apache/arrow-datafusion/pull/2112) ([happysalada](https://github.com/happysalada)) +- doc: update release schedule [\#2110](https://github.com/apache/arrow-datafusion/pull/2110) ([jychen7](https://github.com/jychen7)) +- fix df union all bug [\#2108](https://github.com/apache/arrow-datafusion/pull/2108) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([WinkerDu](https://github.com/WinkerDu)) +- Reduce repetition in Decimal binary kernels, upgrade to arrow 11.1 [\#2107](https://github.com/apache/arrow-datafusion/pull/2107) ([alamb](https://github.com/alamb)) +- update zlib version to 1.2.12 [\#2106](https://github.com/apache/arrow-datafusion/pull/2106) ([waitingkuo](https://github.com/waitingkuo)) +- Create jit-expression from datafusion expression [\#2103](https://github.com/apache/arrow-datafusion/pull/2103) ([Dandandan](https://github.com/Dandandan)) +- Add CREATE DATABASE command to SQL [\#2094](https://github.com/apache/arrow-datafusion/pull/2094) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([matthewmturner](https://github.com/matthewmturner)) +- Refactor SessionContext, BallistaContext to support multi-tenancy configurations - Part 3 [\#2091](https://github.com/apache/arrow-datafusion/pull/2091) ([mingmwang](https://github.com/mingmwang)) +- minor: remove duplicate test [\#2089](https://github.com/apache/arrow-datafusion/pull/2089) ([jackwener](https://github.com/jackwener)) +- minor: remove repeated test [\#2085](https://github.com/apache/arrow-datafusion/pull/2085) ([jackwener](https://github.com/jackwener)) +- Fix lost filters and projections in ParquetExec, CSVExec etc [\#2077](https://github.com/apache/arrow-datafusion/pull/2077) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Remove dependency of common for the storage crate [\#2076](https://github.com/apache/arrow-datafusion/pull/2076) ([yahoNanJing](https://github.com/yahoNanJing)) +- [MINOR] fix doc in `EXTRACT\(field FROM source\) [\#2074](https://github.com/apache/arrow-datafusion/pull/2074) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- \[Bug\]\[Datafusion\] fix TaskContext session\_config bug [\#2070](https://github.com/apache/arrow-datafusion/pull/2070) ([gaojun2048](https://github.com/gaojun2048)) +- Short-circuit evaluation for `CaseWhen` [\#2068](https://github.com/apache/arrow-datafusion/pull/2068) ([yjshen](https://github.com/yjshen)) +- split datafusion-object-store module [\#2065](https://github.com/apache/arrow-datafusion/pull/2065) ([yahoNanJing](https://github.com/yahoNanJing)) +- Allow `CatalogProvider::register_catalog` to return an error [\#2052](https://github.com/apache/arrow-datafusion/pull/2052) ([alamb](https://github.com/alamb)) +- Add test in register\_catalog and change to use named symbolic constants [\#2050](https://github.com/apache/arrow-datafusion/pull/2050) ([alamb](https://github.com/alamb)) +- Update to arrow/parquet 11.0 [\#2048](https://github.com/apache/arrow-datafusion/pull/2048) ([alamb](https://github.com/alamb)) +- minor: format comments \(`//` to `// `\) [\#2047](https://github.com/apache/arrow-datafusion/pull/2047) ([jackwener](https://github.com/jackwener)) +- use cargo-tomlfmt to check Cargo.toml formatting in CI [\#2033](https://github.com/apache/arrow-datafusion/pull/2033) ([WinkerDu](https://github.com/WinkerDu)) +- feat: \#2004 approx percentile with weight [\#2031](https://github.com/apache/arrow-datafusion/pull/2031) ([jychen7](https://github.com/jychen7)) +- Refactor SessionContext, SessionState and SessionConfig to support multi-tenancy configurations - Part 2 [\#2029](https://github.com/apache/arrow-datafusion/pull/2029) ([mingmwang](https://github.com/mingmwang)) +- Simplify prerequisites for running examples [\#2028](https://github.com/apache/arrow-datafusion/pull/2028) ([doki23](https://github.com/doki23)) +- Replace usage of `println!` with logger macros [\#2020](https://github.com/apache/arrow-datafusion/pull/2020) ([silence-coding](https://github.com/silence-coding)) +- Automatically test examples in user guide [\#2018](https://github.com/apache/arrow-datafusion/pull/2018) ([vchag](https://github.com/vchag)) +- return VecDeque for DFParser::parse\_sql [\#2017](https://github.com/apache/arrow-datafusion/pull/2017) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([doki23](https://github.com/doki23)) +- Eliminate the scalar value filter [\#2002](https://github.com/apache/arrow-datafusion/pull/2002) ([jackwener](https://github.com/jackwener)) +- Fixing a typo in documentation [\#1997](https://github.com/apache/arrow-datafusion/pull/1997) ([psvri](https://github.com/psvri)) +- Correct documentation of ExprVisitor [\#1996](https://github.com/apache/arrow-datafusion/pull/1996) ([alamb](https://github.com/alamb)) +- Make it possible to only scan part of a parquet file in a partition [\#1990](https://github.com/apache/arrow-datafusion/pull/1990) ([yjshen](https://github.com/yjshen)) +- Update Dockerfile to fix integration tests [\#1982](https://github.com/apache/arrow-datafusion/pull/1982) ([andygrove](https://github.com/andygrove)) +- Remove some more unecessary cloning in sql\_expr\_to\_logical\_expr [\#1981](https://github.com/apache/arrow-datafusion/pull/1981) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Add ticket reference to clippy allow [\#1978](https://github.com/apache/arrow-datafusion/pull/1978) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Implement EXTRACT expression with week, month, day, hour [\#1974](https://github.com/apache/arrow-datafusion/pull/1974) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Address typo in ExprVisitable trait documentation [\#1970](https://github.com/apache/arrow-datafusion/pull/1970) ([jdye64](https://github.com/jdye64)) +- Update sqlparser requirement from 0.14 to 0.15 [\#1966](https://github.com/apache/arrow-datafusion/pull/1966) ([dependabot[bot]](https://github.com/apps/dependabot)) +- PruningPredicate should take owned Expr [\#1960](https://github.com/apache/arrow-datafusion/pull/1960) ([thinkharderdev](https://github.com/thinkharderdev)) +- Update to arrow 10.0.0, pyo3 0.16 [\#1957](https://github.com/apache/arrow-datafusion/pull/1957) ([alamb](https://github.com/alamb)) +- update jit-related dependencies [\#1953](https://github.com/apache/arrow-datafusion/pull/1953) ([xudong963](https://github.com/xudong963)) +- minor code refinement: `if_exists` name change, wildcard field for logical plan, etc. [\#1951](https://github.com/apache/arrow-datafusion/pull/1951) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([xudong963](https://github.com/xudong963)) +- Allow different types of query variables \(`@@var`\) rather than just string [\#1943](https://github.com/apache/arrow-datafusion/pull/1943) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([maxburke](https://github.com/maxburke)) +- Pruning serialization [\#1941](https://github.com/apache/arrow-datafusion/pull/1941) ([thinkharderdev](https://github.com/thinkharderdev)) +- Add write\_parquet to `DataFrame` [\#1940](https://github.com/apache/arrow-datafusion/pull/1940) ([matthewmturner](https://github.com/matthewmturner)) +- Fix select from EmptyExec always return 0 row after optimizer passes [\#1938](https://github.com/apache/arrow-datafusion/pull/1938) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Add debug log when waiting for spilling on other consumers [\#1933](https://github.com/apache/arrow-datafusion/pull/1933) ([viirya](https://github.com/viirya)) +- Add db benchmark script [\#1928](https://github.com/apache/arrow-datafusion/pull/1928) ([matthewmturner](https://github.com/matthewmturner)) +- Add write\_csv to DataFrame [\#1922](https://github.com/apache/arrow-datafusion/pull/1922) ([matthewmturner](https://github.com/matthewmturner)) +- \[MINOR\] Update copyright year in Docs [\#1918](https://github.com/apache/arrow-datafusion/pull/1918) ([alamb](https://github.com/alamb)) +- add metadata to DFSchema, close \#1806. [\#1914](https://github.com/apache/arrow-datafusion/pull/1914) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jiacai2050](https://github.com/jiacai2050)) +- Clippy fix on nightly [\#1907](https://github.com/apache/arrow-datafusion/pull/1907) ([yjshen](https://github.com/yjshen)) +- Updated Rust version to 1.59 in all the files [\#1903](https://github.com/apache/arrow-datafusion/pull/1903) ([NaincyKumariKnoldus](https://github.com/NaincyKumariKnoldus)) +- support extract second and minute in expr. [\#1901](https://github.com/apache/arrow-datafusion/pull/1901) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Update crate descriptions [\#1899](https://github.com/apache/arrow-datafusion/pull/1899) ([alamb](https://github.com/alamb)) +- Remove uneeded Mutex in Ballista Client [\#1898](https://github.com/apache/arrow-datafusion/pull/1898) ([alamb](https://github.com/alamb)) +- \[split/17\] move the rest of physical expr to datafusion-physical-expr crate [\#1892](https://github.com/apache/arrow-datafusion/pull/1892) ([Jimexist](https://github.com/Jimexist)) +- Avoid unnecessary branching in row read/write if schema is null-free [\#1891](https://github.com/apache/arrow-datafusion/pull/1891) ([yjshen](https://github.com/yjshen)) +- Make parquet support optional for datafusion-common crate [\#1886](https://github.com/apache/arrow-datafusion/pull/1886) ([jonmmease](https://github.com/jonmmease)) +- Fix clippy lints [\#1885](https://github.com/apache/arrow-datafusion/pull/1885) ([HaoYang670](https://github.com/HaoYang670)) +- Add support for `~/.datafusionrc` and cli option for overriding it to datafusion-cli [\#1875](https://github.com/apache/arrow-datafusion/pull/1875) ([matthewmturner](https://github.com/matthewmturner)) +- \[Minor\] Clean up DecimalArray API Usage [\#1869](https://github.com/apache/arrow-datafusion/pull/1869) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Changes after went through "Datafusion as a library section" [\#1868](https://github.com/apache/arrow-datafusion/pull/1868) ([nonontb](https://github.com/nonontb)) +- Enhance MemorySchemaProvider to support `register_listing_table` [\#1863](https://github.com/apache/arrow-datafusion/pull/1863) ([matthewmturner](https://github.com/matthewmturner)) +- Increase default partition column type from Dict\(UInt8\) to Dict\(UInt16\) [\#1860](https://github.com/apache/arrow-datafusion/pull/1860) ([Igosuki](https://github.com/Igosuki)) +- Update to arrow 9.1.0 [\#1851](https://github.com/apache/arrow-datafusion/pull/1851) ([alamb](https://github.com/alamb)) +- move some tests out of context and into sql [\#1846](https://github.com/apache/arrow-datafusion/pull/1846) ([alamb](https://github.com/alamb)) +- \[split/14\] create `datafusion-physical-expr` module [\#1843](https://github.com/apache/arrow-datafusion/pull/1843) ([Jimexist](https://github.com/Jimexist)) +- Return `Error` when parquet reader fails rather than no data with `println!` [\#1837](https://github.com/apache/arrow-datafusion/pull/1837) ([alamb](https://github.com/alamb)) +- determine build side in hash join by `total_byte_size` instead of `num_rows` [\#1831](https://github.com/apache/arrow-datafusion/pull/1831) ([xudong963](https://github.com/xudong963)) +- Make ballista support an optional feature to datafusion-cli [\#1816](https://github.com/apache/arrow-datafusion/pull/1816) ([alamb](https://github.com/alamb)) +- Update documentation example for change in API [\#1812](https://github.com/apache/arrow-datafusion/pull/1812) ([alamb](https://github.com/alamb)) +- rename references of expr in physical plan module after datafusion-expr split [\#1798](https://github.com/apache/arrow-datafusion/pull/1798) ([Jimexist](https://github.com/Jimexist)) +- DataFusion + Conbench Integration [\#1791](https://github.com/apache/arrow-datafusion/pull/1791) ([dianaclarke](https://github.com/dianaclarke)) +- The returned path value of get\_by\_uri should be self-described with entire path [\#1779](https://github.com/apache/arrow-datafusion/pull/1779) ([yahoNanJing](https://github.com/yahoNanJing)) +- Use`eq_dyn`, `neq_dyn`, `lt_dyn`, `lt_eq_dyn`, `gt_dyn`, `gt_eq_dyn` kernels from arrow [\#1475](https://github.com/apache/arrow-datafusion/pull/1475) ([alamb](https://github.com/alamb)) + + +## [7.1.0](https://github.com/apache/arrow-datafusion/tree/7.1.0) (2022-04-10) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/7.0.0...7.1.0) + +**Fixed bugs:** + +- By default, use only 1000 rows to infer the schema [\#2159](https://github.com/apache/arrow-datafusion/pull/2159) + ## [7.0.0](https://github.com/apache/arrow-datafusion/tree/7.0.0) (2022-02-14) [Full Changelog](https://github.com/apache/arrow-datafusion/compare/6.0.0...7.0.0) @@ -32,7 +342,7 @@ - Remove non idiomatic `DataFusionError::into_arrow_external_error` in favor of From conversion [\#1645](https://github.com/apache/arrow-datafusion/pull/1645) ([alamb](https://github.com/alamb)) - Remove `Accumulator::update` and `Accumulator::merge` [\#1582](https://github.com/apache/arrow-datafusion/pull/1582) ([Jimexist](https://github.com/Jimexist)) - implement `Hash` for various types and replace `PartialOrd` [\#1580](https://github.com/apache/arrow-datafusion/pull/1580) ([Jimexist](https://github.com/Jimexist)) -- Replace `DataFusionError` with `GenericError` in `ObjectStore` interface [\#1541](https://github.com/apache/arrow-datafusion/pull/1541) ([matthewmturner](https://github.com/matthewmturner)) +- Replace `DatafusionError` with `GenericError` in `ObjectStore` interface [\#1541](https://github.com/apache/arrow-datafusion/pull/1541) ([matthewmturner](https://github.com/matthewmturner)) - Make `FLOAT` SQL type map to `Float32` rather than `Float64` [\#1423](https://github.com/apache/arrow-datafusion/pull/1423) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([liukun4515](https://github.com/liukun4515)) - Map `REAL` SQL type to `Float32` rather than `Float64` to be consistent with pg [\#1390](https://github.com/apache/arrow-datafusion/pull/1390) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([hntd187](https://github.com/hntd187)) @@ -79,7 +389,7 @@ - Add support for `ORDER BY` on unprojected columns [\#1415](https://github.com/apache/arrow-datafusion/pull/1415) ([viirya](https://github.com/viirya)) - Support decimal for `min` and `max` aggregate [\#1407](https://github.com/apache/arrow-datafusion/pull/1407) ([liukun4515](https://github.com/liukun4515)) - Consolidate `ConstantFolding` and `SimplifyExpression` [\#1375](https://github.com/apache/arrow-datafusion/pull/1375) ([alamb](https://github.com/alamb)) -- DataFusion cli quiet mode command to contain option bool [\#1345](https://github.com/apache/arrow-datafusion/pull/1345) ([Jimexist](https://github.com/Jimexist)) +- Datafusion cli quiet mode command to contain option bool [\#1345](https://github.com/apache/arrow-datafusion/pull/1345) ([Jimexist](https://github.com/Jimexist)) - Implement `array_agg` aggregate function [\#1300](https://github.com/apache/arrow-datafusion/pull/1300) ([viirya](https://github.com/viirya)) - Add a command to switch output format in cli [\#1284](https://github.com/apache/arrow-datafusion/pull/1284) ([capkurmagati](https://github.com/capkurmagati)) - Support `=`, `<`, `<=`, `>`, `>=`, `!=`, `is distinct from`, `is not distinct from` for `BooleanArray` [\#1163](https://github.com/apache/arrow-datafusion/pull/1163) ([alamb](https://github.com/alamb)) @@ -94,7 +404,7 @@ - CTE/WITH .. UNION ALL confuses name resolution in WHERE [\#1509](https://github.com/apache/arrow-datafusion/issues/1509) - ORDER BY min\(x\) results in error `Plan("No field named 'foo.x'. Valid fields are 'MIN(foo.x)'.")` [\#1479](https://github.com/apache/arrow-datafusion/issues/1479) - Sort discards field metadata on the output schema [\#1476](https://github.com/apache/arrow-datafusion/issues/1476) -- DataFusion should not strip out timezone information from existing types [\#1454](https://github.com/apache/arrow-datafusion/issues/1454) +- Datafusion should not strip out timezone information from existing types [\#1454](https://github.com/apache/arrow-datafusion/issues/1454) - Error on some queries: "column types must match schema types, expected XXX but found YYY" [\#1447](https://github.com/apache/arrow-datafusion/issues/1447) - Query failing to return any results when filter is an equality check on strings \(bad statistics in parquet\) [\#1433](https://github.com/apache/arrow-datafusion/issues/1433) - Field names containing period such as `f.c1` cannot be named in SQL query [\#1432](https://github.com/apache/arrow-datafusion/issues/1432) @@ -111,7 +421,7 @@ - Fix single\_distinct\_to\_groupby for arbitrary expressions [\#1519](https://github.com/apache/arrow-datafusion/pull/1519) ([james727](https://github.com/james727)) - Fix SortExec discards field metadata on the output schema [\#1477](https://github.com/apache/arrow-datafusion/pull/1477) ([alamb](https://github.com/alamb)) - fix calculate in many\_to\_many\_hash\_partition test. [\#1463](https://github.com/apache/arrow-datafusion/pull/1463) ([Ted-Jiang](https://github.com/Ted-Jiang)) -- Add Timezone to Scalar::Time\* types, and better timezone awareness to DataFusion's time types [\#1455](https://github.com/apache/arrow-datafusion/pull/1455) ([maxburke](https://github.com/maxburke)) +- Add Timezone to Scalar::Time\* types, and better timezone awareness to Datafusion's time types [\#1455](https://github.com/apache/arrow-datafusion/pull/1455) ([maxburke](https://github.com/maxburke)) - Support identifiers with `.` in them [\#1449](https://github.com/apache/arrow-datafusion/pull/1449) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) - Fixes for working with functions in dataframes, additional documentation [\#1430](https://github.com/apache/arrow-datafusion/pull/1430) ([tobyhede](https://github.com/tobyhede)) - \[Minor\] Fix `send_time` metric for hash-repartition [\#1421](https://github.com/apache/arrow-datafusion/pull/1421) ([Dandandan](https://github.com/Dandandan)) @@ -130,7 +440,7 @@ - Clarify docs about `Accumulator::update` and `Accumulator::update_batch` [\#1542](https://github.com/apache/arrow-datafusion/pull/1542) ([alamb](https://github.com/alamb)) - Fix duplicated `cargo run --example parquet_sql` [\#1482](https://github.com/apache/arrow-datafusion/pull/1482) ([sergey-melnychuk](https://github.com/sergey-melnychuk)) -- add documentation to DataFusion cli's new commands [\#1348](https://github.com/apache/arrow-datafusion/pull/1348) ([liukun4515](https://github.com/liukun4515)) +- add documentation to Datafusion cli's new commands [\#1348](https://github.com/apache/arrow-datafusion/pull/1348) ([liukun4515](https://github.com/liukun4515)) - fix some clippy warnings from nightly channel [\#1277](https://github.com/apache/arrow-datafusion/pull/1277) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jimexist](https://github.com/Jimexist)) **Performance improvements:** @@ -470,7 +780,7 @@ - delete redundant code [\#973](https://github.com/apache/arrow-datafusion/issues/973) - How to build DataFusion python wheel [\#853](https://github.com/apache/arrow-datafusion/issues/853) - Add support for partition pruning [\#204](https://github.com/apache/arrow-datafusion/issues/204) -- \[DataFusion\] Support joins on TimestampMillisecond columns [\#187](https://github.com/apache/arrow-datafusion/issues/187) +- \[Datafusion\] Support joins on TimestampMillisecond columns [\#187](https://github.com/apache/arrow-datafusion/issues/187) - TPC-H Query 21 [\#173](https://github.com/apache/arrow-datafusion/issues/173) - TPC-H Query 13 [\#164](https://github.com/apache/arrow-datafusion/issues/164) - TPC-H Query 8 [\#162](https://github.com/apache/arrow-datafusion/issues/162) @@ -509,7 +819,7 @@ For older versions, see [apache/arrow/CHANGELOG.md](https://github.com/apache/ar - Box ScalarValue:Lists, reduce size by half size [\#788](https://github.com/apache/arrow-datafusion/pull/788) ([alamb](https://github.com/alamb)) - JOIN conditions are order dependent [\#778](https://github.com/apache/arrow-datafusion/pull/778) ([seddonm1](https://github.com/seddonm1)) - Show the result of all optimizer passes in EXPLAIN VERBOSE [\#759](https://github.com/apache/arrow-datafusion/pull/759) ([alamb](https://github.com/alamb)) -- \#723 DataFusion add option in ExecutionConfig to enable/disable parquet pruning [\#749](https://github.com/apache/arrow-datafusion/pull/749) ([lvheyang](https://github.com/lvheyang)) +- \#723 Datafusion add option in ExecutionConfig to enable/disable parquet pruning [\#749](https://github.com/apache/arrow-datafusion/pull/749) ([lvheyang](https://github.com/lvheyang)) - Update API for extension planning to include logical plan [\#643](https://github.com/apache/arrow-datafusion/pull/643) ([alamb](https://github.com/alamb)) - Rename MergeExec to CoalescePartitionsExec [\#635](https://github.com/apache/arrow-datafusion/pull/635) ([andygrove](https://github.com/andygrove)) - fix 593, reduce cloning by taking ownership in logical planner's `from` fn [\#610](https://github.com/apache/arrow-datafusion/pull/610) ([Jimexist](https://github.com/Jimexist)) @@ -520,7 +830,7 @@ For older versions, see [apache/arrow/CHANGELOG.md](https://github.com/apache/ar - Use 4.x arrow-rs from crates.io rather than git sha [\#395](https://github.com/apache/arrow-datafusion/pull/395) ([alamb](https://github.com/alamb)) - Return Vec\ from PredicateBuilder rather than an `Fn` [\#370](https://github.com/apache/arrow-datafusion/pull/370) ([alamb](https://github.com/alamb)) - Refactor: move RowGroupPredicateBuilder into its own module, rename to PruningPredicateBuilder [\#365](https://github.com/apache/arrow-datafusion/pull/365) ([alamb](https://github.com/alamb)) -- \[DataFusion\] NOW\(\) function support [\#288](https://github.com/apache/arrow-datafusion/pull/288) ([msathis](https://github.com/msathis)) +- \[Datafusion\] NOW\(\) function support [\#288](https://github.com/apache/arrow-datafusion/pull/288) ([msathis](https://github.com/msathis)) - Implement select distinct [\#262](https://github.com/apache/arrow-datafusion/pull/262) ([Dandandan](https://github.com/Dandandan)) - Refactor datafusion/src/physical\_plan/common.rs build\_file\_list to take less param and reuse code [\#253](https://github.com/apache/arrow-datafusion/pull/253) ([Jimexist](https://github.com/Jimexist)) - Support qualified columns in queries [\#55](https://github.com/apache/arrow-datafusion/pull/55) ([houqp](https://github.com/houqp)) @@ -718,7 +1028,7 @@ For older versions, see [apache/arrow/CHANGELOG.md](https://github.com/apache/ar - RFC Roadmap for 2021 \(DataFusion\) [\#140](https://github.com/apache/arrow-datafusion/issues/140) - Implement hash partitioning [\#131](https://github.com/apache/arrow-datafusion/issues/131) - Grouping by column position [\#110](https://github.com/apache/arrow-datafusion/issues/110) -- \[DataFusion\] GROUP BY with a high cardinality doesn't seem to finish [\#107](https://github.com/apache/arrow-datafusion/issues/107) +- \[Datafusion\] GROUP BY with a high cardinality doesn't seem to finish [\#107](https://github.com/apache/arrow-datafusion/issues/107) - \[Rust\] Add support for JSON data sources [\#103](https://github.com/apache/arrow-datafusion/issues/103) - \[Rust\] Implement metrics framework [\#95](https://github.com/apache/arrow-datafusion/issues/95) - Publically export Arrow crate from datafusion [\#36](https://github.com/apache/arrow-datafusion/issues/36) diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index 4520847a4cdf..652dde5fbd5e 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-common" description = "Common functionality for DataFusion query engine" -version = "7.0.0" +version = "8.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index ef1592b85d44..259095c29dad 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion" description = "DataFusion is an in-memory query engine that uses Apache Arrow as the memory model" -version = "7.0.0" +version = "8.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "../README.md" @@ -59,12 +59,12 @@ arrow = { version = "13", features = ["prettyprint"] } async-trait = "0.1.41" avro-rs = { version = "0.13", features = ["snappy"], optional = true } chrono = { version = "0.4", default-features = false } -datafusion-common = { path = "../common", version = "7.0.0", features = ["parquet"] } -datafusion-data-access = { path = "../data-access", version = "7.0.0" } -datafusion-expr = { path = "../expr", version = "7.0.0" } -datafusion-jit = { path = "../jit", version = "7.0.0", optional = true } -datafusion-physical-expr = { path = "../physical-expr", version = "7.0.0" } -datafusion-row = { path = "../row", version = "7.0.0" } +datafusion-common = { path = "../common", version = "8.0.0", features = ["parquet"] } +datafusion-data-access = { path = "../data-access", version = "8.0.0" } +datafusion-expr = { path = "../expr", version = "8.0.0" } +datafusion-jit = { path = "../jit", version = "8.0.0", optional = true } +datafusion-physical-expr = { path = "../physical-expr", version = "8.0.0" } +datafusion-row = { path = "../row", version = "8.0.0" } futures = "0.3" hashbrown = { version = "0.12", features = ["raw"] } lazy_static = { version = "^1.4.0" } diff --git a/datafusion/data-access/Cargo.toml b/datafusion/data-access/Cargo.toml index 4a68ca4866f5..7bf447f04cfe 100644 --- a/datafusion/data-access/Cargo.toml +++ b/datafusion/data-access/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-data-access" description = "General data access layer currently mainly based on the object store interfaces" -version = "7.0.0" +version = "8.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml index 8f84be35ea62..25786e670987 100644 --- a/datafusion/expr/Cargo.toml +++ b/datafusion/expr/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-expr" description = "Logical plan and expression representation for DataFusion query engine" -version = "7.0.0" +version = "8.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -37,5 +37,5 @@ path = "src/lib.rs" [dependencies] ahash = { version = "0.7", default-features = false } arrow = { version = "13", features = ["prettyprint"] } -datafusion-common = { path = "../common", version = "7.0.0" } +datafusion-common = { path = "../common", version = "8.0.0" } sqlparser = "0.17" diff --git a/datafusion/jit/Cargo.toml b/datafusion/jit/Cargo.toml index 047693482fc2..e3278be6d296 100644 --- a/datafusion/jit/Cargo.toml +++ b/datafusion/jit/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-jit" description = "Just In Time (JIT) compilation support for DataFusion query engine" -version = "7.0.0" +version = "8.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -41,7 +41,7 @@ cranelift = "0.83.0" cranelift-jit = "0.83.0" cranelift-module = "0.83.0" cranelift-native = "0.83.0" -datafusion-common = { path = "../common", version = "7.0.0", features = ["jit"] } -datafusion-expr = { path = "../expr", version = "7.0.0" } +datafusion-common = { path = "../common", version = "8.0.0", features = ["jit"] } +datafusion-expr = { path = "../expr", version = "8.0.0" } parking_lot = "0.12" diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml index d64ecb07b714..f9a472555060 100644 --- a/datafusion/physical-expr/Cargo.toml +++ b/datafusion/physical-expr/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-physical-expr" description = "Physical expression implementation for DataFusion query engine" -version = "7.0.0" +version = "8.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -44,9 +44,9 @@ arrow = { version = "13", features = ["prettyprint"] } blake2 = { version = "^0.10.2", optional = true } blake3 = { version = "1.0", optional = true } chrono = { version = "0.4", default-features = false } -datafusion-common = { path = "../common", version = "7.0.0" } -datafusion-expr = { path = "../expr", version = "7.0.0" } -datafusion-row = { path = "../row", version = "7.0.0" } +datafusion-common = { path = "../common", version = "8.0.0" } +datafusion-expr = { path = "../expr", version = "8.0.0" } +datafusion-row = { path = "../row", version = "8.0.0" } hashbrown = { version = "0.12", features = ["raw"] } lazy_static = { version = "^1.4.0" } md-5 = { version = "^0.10.0", optional = true } diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml index c7e338b77242..57466e03be02 100644 --- a/datafusion/proto/Cargo.toml +++ b/datafusion/proto/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-proto" description = "Protobuf serialization of DataFusion logical plan expressions" -version = "7.0.0" +version = "8.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -35,7 +35,7 @@ path = "src/lib.rs" [features] [dependencies] -datafusion = { path = "../core", version = "7.0.0" } +datafusion = { path = "../core", version = "8.0.0" } prost = "0.10" [build-dependencies] diff --git a/datafusion/row/Cargo.toml b/datafusion/row/Cargo.toml index de9fc8c88364..13b485fb7cd5 100644 --- a/datafusion/row/Cargo.toml +++ b/datafusion/row/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-row" description = "Row backed by raw bytes for DataFusion query engine" -version = "7.0.0" +version = "8.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -38,7 +38,7 @@ jit = ["datafusion-jit"] [dependencies] arrow = { version = "13" } -datafusion-common = { path = "../common", version = "7.0.0" } -datafusion-jit = { path = "../jit", version = "7.0.0", optional = true } +datafusion-common = { path = "../common", version = "8.0.0" } +datafusion-jit = { path = "../jit", version = "8.0.0", optional = true } paste = "^1.0" rand = "0.8" diff --git a/dev/release/README.md b/dev/release/README.md index 6437353dc1bd..74faf25e0ccc 100644 --- a/dev/release/README.md +++ b/dev/release/README.md @@ -122,7 +122,8 @@ to generate one if you do not already have one. ```bash # create the changelog -CHANGELOG_GITHUB_TOKEN= ./dev/release/update_change_log-all.sh master 8.0.0 7.0.0 +CHANGELOG_GITHUB_TOKEN= ./dev/release/update_change_log-datafusion.sh master 8.0.0 7.0.0 +CHANGELOG_GITHUB_TOKEN= ./dev/release/update_change_log-ballista.sh master ballista-0.7.0 ballista-0.6.0 # review change log / edit issues and labels if needed, rerun until you are happy with the result git commit -a -m 'Create changelog for release' ``` diff --git a/dev/release/update_change_log-all.sh b/dev/release/update_change_log-all.sh deleted file mode 100755 index d9bb88f73384..000000000000 --- a/dev/release/update_change_log-all.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -set -e - -# Usage: -# CHANGELOG_GITHUB_TOKEN= ./update_change_log-all.sh -# Example: -# CHANGELOG_GITHUB_TOKEN= ./update_change_log-all.sh master 8.0.0 7.1.0 -# CHANGELOG_GITHUB_TOKEN= ./update_change_log-all.sh maint-7.x 7.1.0 7.0.0 - -SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)" - -${SOURCE_DIR}/update_change_log-datafusion.sh $1 $2 $3 -${SOURCE_DIR}/update_change_log-ballista.sh $1 $2 $3 diff --git a/dev/update_ballista_versions.py b/dev/update_ballista_versions.py index fa2c8bc18f10..0fb3f4bf8c5a 100755 --- a/dev/update_ballista_versions.py +++ b/dev/update_ballista_versions.py @@ -44,6 +44,7 @@ def update_cargo_toml(cargo_toml: str, new_version: str): 'ballista-core', 'ballista-executor', 'ballista-scheduler', + 'ballista-cli', ) for ballista_dep in ballista_deps: dep = doc.get('dependencies', {}).get(ballista_dep) diff --git a/dev/update_datafusion_versions.py b/dev/update_datafusion_versions.py index 2b40c186ae01..177c0b61dcdc 100755 --- a/dev/update_datafusion_versions.py +++ b/dev/update_datafusion_versions.py @@ -41,6 +41,7 @@ } ballista_crates = { + 'ballista-cli': 'ballista-cli/Cargo.toml', 'core': 'ballista/rust/core/Cargo.toml', 'client': 'ballista/rust/client/Cargo.toml', 'executor': 'ballista/rust/executor/Cargo.toml', diff --git a/docs/source/cli/index.rst b/docs/source/cli/index.rst index bc22bf6c8348..c10db36dfd63 100644 --- a/docs/source/cli/index.rst +++ b/docs/source/cli/index.rst @@ -58,7 +58,7 @@ Use the following commands to clone this repository and build a Docker image con .. code-block:: bash git clone https://github.com/apache/arrow-datafusion - git checkout 7.0.0 + git checkout 8.0.0 cd arrow-datafusion docker build -f datafusion-cli/Dockerfile . --tag datafusion-cli docker run -it -v $(your_data_location):/data datafusion-cli diff --git a/docs/source/user-guide/cli.md b/docs/source/user-guide/cli.md index ec38aad9a92a..2ec9c8d49937 100644 --- a/docs/source/user-guide/cli.md +++ b/docs/source/user-guide/cli.md @@ -51,7 +51,7 @@ $ echo "1,2" > data.csv ```bash $ datafusion-cli -DataFusion CLI v7.0.0 +DataFusion CLI v8.0.0 > CREATE EXTERNAL TABLE foo (a INT, b INT) STORED AS CSV LOCATION 'data.csv'; 0 rows in set. Query took 0.001 seconds. diff --git a/docs/source/user-guide/distributed/clients/cli.rst b/docs/source/user-guide/distributed/clients/cli.rst index 71518508222b..d5cf30b8a024 100644 --- a/docs/source/user-guide/distributed/clients/cli.rst +++ b/docs/source/user-guide/distributed/clients/cli.rst @@ -39,7 +39,7 @@ Use the following commands to clone this repository and build a Docker image con .. code-block:: bash git clone https://github.com/apache/arrow-datafusion - git checkout 7.0.0 + git checkout 8.0.0 cd arrow-datafusion docker build -f ballista-cli/Dockerfile . --tag ballista-cli docker run -it -v $(your_data_location):/data ballista-cli diff --git a/docs/source/user-guide/distributed/deployment/docker-compose.md b/docs/source/user-guide/distributed/deployment/docker-compose.md index 1b010b560b2c..c43e775ae1d1 100644 --- a/docs/source/user-guide/distributed/deployment/docker-compose.md +++ b/docs/source/user-guide/distributed/deployment/docker-compose.md @@ -28,12 +28,12 @@ There is no officially published Docker image so it is currently necessary to bu Run the following commands to clone the source repository and build the Docker image. ```bash -git clone git@github.com:apache/arrow-datafusion.git -b 5.1.0 +git clone git@github.com:apache/arrow-datafusion.git -b 8.0.0 cd arrow-datafusion ./dev/build-ballista-docker.sh ``` -This will create an image with the tag `ballista:0.6.0`. +This will create an image with the tag `ballista:0.7.0`. ## Start a cluster @@ -48,7 +48,7 @@ services: image: quay.io/coreos/etcd:v3.4.9 command: "etcd -advertise-client-urls http://etcd:2379 -listen-client-urls http://0.0.0.0:2379" ballista-scheduler: - image: ballista:0.6.0 + image: ballista:0.7.0 command: "/scheduler --config-backend etcd --etcd-urls etcd:2379 --bind-host 0.0.0.0 --bind-port 50050" ports: - "50050:50050" @@ -59,7 +59,7 @@ services: depends_on: - etcd ballista-executor: - image: ballista:0.6.0 + image: ballista:0.7.0 command: "/executor --bind-host 0.0.0.0 --bind-port 50051 --scheduler-host ballista-scheduler" ports: - "50051:50051" @@ -90,8 +90,8 @@ Attaching to ballista-benchmarks_etcd_1, ballista-benchmarks_ballista-scheduler_ ballista-executor_1 | [2021-08-28T15:55:22Z INFO ballista_executor] Running with config: ballista-executor_1 | [2021-08-28T15:55:22Z INFO ballista_executor] work_dir: /tmp/.tmpLVx39c ballista-executor_1 | [2021-08-28T15:55:22Z INFO ballista_executor] concurrent_tasks: 4 -ballista-scheduler_1 | [2021-08-28T15:55:22Z INFO ballista_scheduler] Ballista v0.6.0 Scheduler listening on 0.0.0.0:50050 -ballista-executor_1 | [2021-08-28T15:55:22Z INFO ballista_executor] Ballista v0.6.0 Rust Executor listening on 0.0.0.0:50051 +ballista-scheduler_1 | [2021-08-28T15:55:22Z INFO ballista_scheduler] Ballista v0.7.0 Scheduler listening on 0.0.0.0:50050 +ballista-executor_1 | [2021-08-28T15:55:22Z INFO ballista_executor] Ballista v0.7.0 Rust Executor listening on 0.0.0.0:50051 ``` The scheduler listens on port 50050 and this is the port that clients will need to connect to. diff --git a/docs/source/user-guide/distributed/deployment/docker.md b/docs/source/user-guide/distributed/deployment/docker.md index 541a884684db..e00acac98cbb 100644 --- a/docs/source/user-guide/distributed/deployment/docker.md +++ b/docs/source/user-guide/distributed/deployment/docker.md @@ -26,12 +26,12 @@ There is no officially published Docker image so it is currently necessary to bu Run the following commands to clone the source repository and build the Docker image. ```bash -git clone git@github.com:apache/arrow-datafusion.git -b 5.1.0 +git clone git@github.com:apache/arrow-datafusion.git -b 8.0.0 cd arrow-datafusion ./dev/build-ballista-docker.sh ``` -This will create an image with the tag `ballista:0.6.0`. +This will create an image with the tag `ballista:0.7.0`. ### Start a Scheduler @@ -39,7 +39,7 @@ Start a scheduler using the following syntax: ```bash docker run --network=host \ - -d ballista:0.6.0 \ + -d ballista:0.7.0 \ /scheduler --bind-port 50050 ``` @@ -48,14 +48,14 @@ Run `docker ps` to check that the process is running: ``` $ docker ps CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES -1f3f8b5ed93a ballista:0.6.0 "/scheduler --bind-p…" 2 seconds ago Up 1 second tender_archimedes +1f3f8b5ed93a ballista:0.7.0 "/scheduler --bind-p…" 2 seconds ago Up 1 second tender_archimedes ``` Run `docker logs CONTAINER_ID` to check the output from the process: ``` $ docker logs 1f3f8b5ed93a -[2021-08-28T15:45:11Z INFO ballista_scheduler] Ballista v0.6.0 Scheduler listening on 0.0.0.0:50050 +[2021-08-28T15:45:11Z INFO ballista_scheduler] Ballista v0.7.0 Scheduler listening on 0.0.0.0:50050 ``` ### Start executors @@ -64,7 +64,7 @@ Start one or more executor processes. Each executor process will need to listen ```bash docker run --network=host \ - -d ballista:0.6.0 \ + -d ballista:0.7.0 \ /executor --external-host localhost --bind-port 50051 ``` @@ -73,8 +73,8 @@ Use `docker ps` to check that both the scheduer and executor(s) are now running: ``` $ docker ps CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES -7c6941bb8dc0 ballista:0.6.0 "/executor --externa…" 3 seconds ago Up 2 seconds tender_goldberg -1f3f8b5ed93a ballista:0.6.0 "/scheduler --bind-p…" 50 seconds ago Up 49 seconds tender_archimedes +7c6941bb8dc0 ballista:0.7.0 "/executor --externa…" 3 seconds ago Up 2 seconds tender_goldberg +1f3f8b5ed93a ballista:0.7.0 "/scheduler --bind-p…" 50 seconds ago Up 49 seconds tender_archimedes ``` Use `docker logs CONTAINER_ID` to check the output from the executor(s): @@ -84,7 +84,7 @@ $ docker logs 7c6941bb8dc0 [2021-08-28T15:45:58Z INFO ballista_executor] Running with config: [2021-08-28T15:45:58Z INFO ballista_executor] work_dir: /tmp/.tmpeyEM76 [2021-08-28T15:45:58Z INFO ballista_executor] concurrent_tasks: 4 -[2021-08-28T15:45:58Z INFO ballista_executor] Ballista v0.6.0 Rust Executor listening on 0.0.0.0:50051 +[2021-08-28T15:45:58Z INFO ballista_executor] Ballista v0.7.0 Rust Executor listening on 0.0.0.0:50051 ``` ### Using etcd as backing store @@ -96,7 +96,7 @@ to launch the scheduler with this option enabled. ```bash docker run --network=host \ - -d ballista:0.6.0 \ + -d ballista:0.7.0 \ /scheduler --bind-port 50050 \ --config-backend etcd \ --etcd-urls etcd:2379 diff --git a/docs/source/user-guide/distributed/deployment/kubernetes.md b/docs/source/user-guide/distributed/deployment/kubernetes.md index 9a8e29b4f910..3047f325336d 100644 --- a/docs/source/user-guide/distributed/deployment/kubernetes.md +++ b/docs/source/user-guide/distributed/deployment/kubernetes.md @@ -55,20 +55,20 @@ There is no officially published Docker image so it is currently necessary to bu Run the following commands to clone the source repository and build the Docker image. ```bash -git clone git@github.com:apache/arrow-datafusion.git -b 5.1.0 +git clone git@github.com:apache/arrow-datafusion.git -b 8.0.0 cd arrow-datafusion ./dev/build-ballista-docker.sh ``` -This will create an image with the tag `ballista:0.6.0`. +This will create an image with the tag `ballista:0.7.0`. ## Publishing your images Once the images have been built, you can retag them and can push them to your favourite docker registry. ```bash -docker tag ballista:0.6.0 /ballista:0.6.0 -docker push /ballista:0.6.0 +docker tag ballista:0.7.0 /ballista:0.7.0 +docker push /ballista:0.7.0 ``` ## Create Persistent Volume and Persistent Volume Claim @@ -154,7 +154,7 @@ spec: spec: containers: - name: ballista-scheduler - image: /ballista:0.6.0 + image: /ballista:0.7.0 command: ["/scheduler"] args: ["--bind-port=50050"] ports: @@ -185,7 +185,7 @@ spec: spec: containers: - name: ballista-executor - image: /ballista:0.6.0 + image: /ballista:0.7.0 command: ["/executor"] args: - "--bind-port=50051" @@ -229,7 +229,7 @@ You can view the scheduler logs with `kubectl logs ballista-scheduler-0`: ``` $ kubectl logs ballista-scheduler-0 -[2021-02-19T00:24:01Z INFO scheduler] Ballista v0.6.0 Scheduler listening on 0.0.0.0:50050 +[2021-02-19T00:24:01Z INFO scheduler] Ballista v0.7.0 Scheduler listening on 0.0.0.0:50050 [2021-02-19T00:24:16Z INFO ballista::scheduler] Received register_executor request for ExecutorMetadata { id: "b5e81711-1c5c-46ec-8522-d8b359793188", host: "10.1.23.149", port: 50051 } [2021-02-19T00:24:17Z INFO ballista::scheduler] Received register_executor request for ExecutorMetadata { id: "816e4502-a876-4ed8-b33f-86d243dcf63f", host: "10.1.23.150", port: 50051 } ``` diff --git a/docs/source/user-guide/example-usage.md b/docs/source/user-guide/example-usage.md index 6be802400a77..de8cae295579 100644 --- a/docs/source/user-guide/example-usage.md +++ b/docs/source/user-guide/example-usage.md @@ -24,7 +24,7 @@ Add the following to your `Cargo.toml` file: ```toml -datafusion = "7.0.0" +datafusion = "8.0.0" tokio = "1.0" ``` diff --git a/docs/source/user-guide/library.md b/docs/source/user-guide/library.md index cb9ca48e78f8..422c9d6d1ec4 100644 --- a/docs/source/user-guide/library.md +++ b/docs/source/user-guide/library.md @@ -44,7 +44,7 @@ To get started, add the following to your `Cargo.toml` file: ```toml [dependencies] -datafusion = "7.0.0" +datafusion = "8.0.0" ``` ## Create a main function