From 08d1f5507bd5b7991b36364b0d148a80244d2aa3 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 2 Dec 2022 09:43:56 -0700 Subject: [PATCH] Prepare for 15.0.0 Release (#4470) * update version * update versions for test-utils and parquet-test-utils * CHANGELOG * revert changing test-utils versions * update Cargo.lock --- .gitignore | 2 + benchmarks/Cargo.toml | 6 +- datafusion-cli/Cargo.lock | 16 +- datafusion-cli/Cargo.toml | 4 +- datafusion-examples/Cargo.toml | 2 +- datafusion/CHANGELOG.md | 317 ++++++++++++++++++ datafusion/common/Cargo.toml | 2 +- datafusion/core/Cargo.toml | 16 +- .../core/src/execution/memory_manager/mod.rs | 16 +- datafusion/core/src/execution/runtime_env.rs | 4 + datafusion/expr/Cargo.toml | 4 +- datafusion/jit/Cargo.toml | 6 +- datafusion/optimizer/Cargo.toml | 10 +- datafusion/physical-expr/Cargo.toml | 8 +- datafusion/proto/Cargo.toml | 8 +- datafusion/row/Cargo.toml | 6 +- datafusion/sql/Cargo.toml | 6 +- test-utils/Cargo.toml | 2 +- 18 files changed, 381 insertions(+), 54 deletions(-) diff --git a/.gitignore b/.gitignore index 5942659d19d1d..1c68e313a8640 100644 --- a/.gitignore +++ b/.gitignore @@ -98,3 +98,5 @@ dev/dist # CI arrow-ballista + +datafusion/CHANGELOG.md.bak diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index 43955ae8cc632..977658f375836 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-benchmarks" description = "DataFusion Benchmarks" -version = "14.0.0" +version = "15.0.0" edition = "2021" authors = ["Apache Arrow "] homepage = "https://github.com/apache/arrow-datafusion" @@ -34,7 +34,7 @@ snmalloc = ["snmalloc-rs"] [dependencies] arrow = "28.0.0" -datafusion = { path = "../datafusion/core", version = "14.0.0", features = ["scheduler"] } +datafusion = { path = "../datafusion/core", version = "15.0.0", features = ["scheduler"] } env_logger = "0.10" futures = "0.3" mimalloc = { version = "0.1", optional = true, default-features = false } @@ -51,4 +51,4 @@ test-utils = { path = "../test-utils/", version = "0.1.0" } tokio = { version = "^1.0", features = ["macros", "rt", "rt-multi-thread", "parking_lot"] } [dev-dependencies] -datafusion-proto = { path = "../datafusion/proto", version = "14.0.0" } +datafusion-proto = { path = "../datafusion/proto", version = "15.0.0" } diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 01b2d71065793..dbf9cc88d7842 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -660,7 +660,7 @@ dependencies = [ [[package]] name = "datafusion" -version = "14.0.0" +version = "15.0.0" dependencies = [ "ahash", "arrow", @@ -705,7 +705,7 @@ dependencies = [ [[package]] name = "datafusion-cli" -version = "14.0.0" +version = "15.0.0" dependencies = [ "arrow", "clap 3.2.23", @@ -721,7 +721,7 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "14.0.0" +version = "15.0.0" dependencies = [ "arrow", "chrono", @@ -732,7 +732,7 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "14.0.0" +version = "15.0.0" dependencies = [ "ahash", "arrow", @@ -743,7 +743,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "14.0.0" +version = "15.0.0" dependencies = [ "arrow", "async-trait", @@ -757,7 +757,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "14.0.0" +version = "15.0.0" dependencies = [ "ahash", "arrow", @@ -785,7 +785,7 @@ dependencies = [ [[package]] name = "datafusion-row" -version = "14.0.0" +version = "15.0.0" dependencies = [ "arrow", "datafusion-common", @@ -795,7 +795,7 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "14.0.0" +version = "15.0.0" dependencies = [ "arrow-schema", "datafusion-common", diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 3b23b8c751167..012fbc3023873 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-cli" description = "Command Line Client for DataFusion query engine." -version = "14.0.0" +version = "15.0.0" authors = ["Apache Arrow "] edition = "2021" keywords = [ "arrow", "datafusion", "query", "sql" ] @@ -31,7 +31,7 @@ readme = "README.md" [dependencies] arrow = "28.0.0" clap = { version = "3", features = ["derive", "cargo"] } -datafusion = { path = "../datafusion/core", version = "14.0.0" } +datafusion = { path = "../datafusion/core", version = "15.0.0" } dirs = "4.0.0" env_logger = "0.9" mimalloc = { version = "0.1", default-features = false } diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml index cdb77d152f621..e1403466f843b 100644 --- a/datafusion-examples/Cargo.toml +++ b/datafusion-examples/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-examples" description = "DataFusion usage examples" -version = "14.0.0" +version = "15.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" authors = ["Apache Arrow "] diff --git a/datafusion/CHANGELOG.md b/datafusion/CHANGELOG.md index d617a4fb0c3ed..3a16489969627 100644 --- a/datafusion/CHANGELOG.md +++ b/datafusion/CHANGELOG.md @@ -19,6 +19,323 @@ # Changelog +## [15.0.0](https://github.com/apache/arrow-datafusion/tree/15.0.0) (2022-12-01) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/14.0.0-rc1...15.0.0) + +**Breaking changes:** + +- Expose remaining parquet config options into ConfigOptions \(try 2\) [\#4427](https://github.com/apache/arrow-datafusion/pull/4427) ([alamb](https://github.com/alamb)) +- Config Cleanup: Remove TaskProperties and KV structure, keep key=value serialization [\#4382](https://github.com/apache/arrow-datafusion/pull/4382) ([alamb](https://github.com/alamb)) +- add `{TDigest,ScalarValue,Accumulator}::size` [\#4342](https://github.com/apache/arrow-datafusion/pull/4342) ([crepererum](https://github.com/crepererum)) +- API-break: Support `SubqueryAlias` and remove `Alias in Projection` [\#4333](https://github.com/apache/arrow-datafusion/pull/4333) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- split `try_new_with_schema_alias` from original code [\#4284](https://github.com/apache/arrow-datafusion/pull/4284) ([jackwener](https://github.com/jackwener)) +- Collapse statistics in normal explain plan [\#4157](https://github.com/apache/arrow-datafusion/pull/4157) ([alamb](https://github.com/alamb)) +- Linearize binary expressions to reduce proto tree complexity [\#4115](https://github.com/apache/arrow-datafusion/pull/4115) ([isidentical](https://github.com/isidentical)) +- support `SET Timezone` [\#4107](https://github.com/apache/arrow-datafusion/pull/4107) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) + +**Implemented enhancements:** + +- Refactor Built-in, Aggregate window functions to increase code reuse. [\#4440](https://github.com/apache/arrow-datafusion/issues/4440) +- Helper to get "root" error [\#4435](https://github.com/apache/arrow-datafusion/issues/4435) +- Do NOT convert intermediate/source errors to strings. [\#4434](https://github.com/apache/arrow-datafusion/issues/4434) +- Estimate the `total_byte_size` of the filter expression's result when selectivity is available [\#4374](https://github.com/apache/arrow-datafusion/issues/4374) +- refactor the code of the `HashJoin` [\#4356](https://github.com/apache/arrow-datafusion/issues/4356) +- `CoalesceBatchesExec` reports no ordering [\#4331](https://github.com/apache/arrow-datafusion/issues/4331) +- Introduce tournament tree to achieve better k-way sort-merging [\#4300](https://github.com/apache/arrow-datafusion/issues/4300) +- Add a checker to confirm physical optimizer rules will keep the physical plan schema immutable [\#4299](https://github.com/apache/arrow-datafusion/issues/4299) +- Remove the macro rule `unary_scalar_expr` from `expr_fn.rs` [\#4298](https://github.com/apache/arrow-datafusion/issues/4298) +- Remove Alias-in-Projection, replace it with `SubqueryAlias` [\#4291](https://github.com/apache/arrow-datafusion/issues/4291) +- reimplement `reduce_outer_join` [\#4270](https://github.com/apache/arrow-datafusion/issues/4270) +- Reimplement `filter_push_down` [\#4266](https://github.com/apache/arrow-datafusion/issues/4266) +- Reimplement `eliminate_limit` [\#4264](https://github.com/apache/arrow-datafusion/issues/4264) +- Reimplement `limit_push_down` [\#4263](https://github.com/apache/arrow-datafusion/issues/4263) +- Make a data driven SQL testing tool \(so we can reuse duckdb test suite, example\) [\#4248](https://github.com/apache/arrow-datafusion/issues/4248) +- upgrade chrono to 0.4.23 [\#4224](https://github.com/apache/arrow-datafusion/issues/4224) +- support scan non-string columns partitioned parquet files [\#4218](https://github.com/apache/arrow-datafusion/issues/4218) +- Allow optimizer rules to skip optimizing plans [\#4209](https://github.com/apache/arrow-datafusion/issues/4209) +- Supporting specifying schema when create tables [\#4183](https://github.com/apache/arrow-datafusion/issues/4183) +- Improve ergonomics of creating `ListingOptions` [\#4178](https://github.com/apache/arrow-datafusion/issues/4178) +- Add ability to specify external sort information for ParquetExec [\#4169](https://github.com/apache/arrow-datafusion/issues/4169) +- Add another method to collect referenced columns from an expression [\#4152](https://github.com/apache/arrow-datafusion/issues/4152) +- Improve `EXPLAIN ANALYZE` output for parquet exec [\#4144](https://github.com/apache/arrow-datafusion/issues/4144) +- `TableProviderFactory::create` should have `Optional` parameter [\#4142](https://github.com/apache/arrow-datafusion/issues/4142) +- Support more expressions in equality join [\#4140](https://github.com/apache/arrow-datafusion/issues/4140) +- JoinSelection Rule to choose physical join implementation: HashJoin\(Partitioned or CollectLeft\) or SortMergeJoin base on Stats [\#4139](https://github.com/apache/arrow-datafusion/issues/4139) +- Allow TPCH tooling to create a combined result for easier processing by outside tools [\#4127](https://github.com/apache/arrow-datafusion/issues/4127) +- Allow additional options when creating an external table [\#4125](https://github.com/apache/arrow-datafusion/issues/4125) +- reuse code utils::optimize\_children instead of redundant implementation [\#4120](https://github.com/apache/arrow-datafusion/issues/4120) +- Add test field to PR template [\#4113](https://github.com/apache/arrow-datafusion/issues/4113) +- Allow for automatic registration of `ListingTables` [\#4111](https://github.com/apache/arrow-datafusion/issues/4111) +- Add CI check that configs.md is up-to-date [\#4108](https://github.com/apache/arrow-datafusion/issues/4108) +- Support `SET` timezone to non-UTC time zone [\#4106](https://github.com/apache/arrow-datafusion/issues/4106) +- Parquet predicates contains `and true` expressions [\#4091](https://github.com/apache/arrow-datafusion/issues/4091) +- Replace RwLock\ and Mutex\ by using DashMap [\#4077](https://github.com/apache/arrow-datafusion/issues/4077) +- add support for `.xz` compressed files [\#4074](https://github.com/apache/arrow-datafusion/issues/4074) +- add a feature gate to make support for compressed files optional [\#4073](https://github.com/apache/arrow-datafusion/issues/4073) +- Support serializing more deeply nested AND / OR expressions [\#4066](https://github.com/apache/arrow-datafusion/issues/4066) +- Use f64::total\_cmp instead of OrderedFloat [\#4051](https://github.com/apache/arrow-datafusion/issues/4051) +- Add documentation to make it clear that decimal support is still experimental [\#4036](https://github.com/apache/arrow-datafusion/issues/4036) +- Simplify Pushed Down Predicates [\#4020](https://github.com/apache/arrow-datafusion/issues/4020) +- Improve HashJoinExec metrics [\#4009](https://github.com/apache/arrow-datafusion/issues/4009) +- Move physical plan serde from Ballista to DataFusion [\#3949](https://github.com/apache/arrow-datafusion/issues/3949) +- Support `SubqueryAlias` better in planner [\#3927](https://github.com/apache/arrow-datafusion/issues/3927) +- A framework for expression boundary analysis \(and statistics\) [\#3898](https://github.com/apache/arrow-datafusion/issues/3898) +- Replace `Filter: Boolean(false)` with `EmptyRelation` [\#3864](https://github.com/apache/arrow-datafusion/issues/3864) +- Implement statistics estimation for `FilterExec` [\#3845](https://github.com/apache/arrow-datafusion/issues/3845) +- Support parquet page filtering for more types: String, Binary\(Decimal\), Int96 [\#3833](https://github.com/apache/arrow-datafusion/issues/3833) +- Allow configuring parquet filter pushdown dynamically [\#3821](https://github.com/apache/arrow-datafusion/issues/3821) +- Unable to register tables in non-cloud S3 servers [\#3640](https://github.com/apache/arrow-datafusion/issues/3640) +- support more data type in prune for cast/try\_cast [\#3442](https://github.com/apache/arrow-datafusion/issues/3442) +- Disable spill to disk globally [\#3264](https://github.com/apache/arrow-datafusion/issues/3264) +- Consider to categorize Operator [\#3216](https://github.com/apache/arrow-datafusion/issues/3216) +- Replace Projection.alias with SubqueryAlias [\#2212](https://github.com/apache/arrow-datafusion/issues/2212) +- \[Optimizer\] Eliminate the distinct [\#2045](https://github.com/apache/arrow-datafusion/issues/2045) +- beautify datafusion's site: https://arrow.apache.org/datafusion/ [\#1819](https://github.com/apache/arrow-datafusion/issues/1819) +- split datafusion-logical-plan sub-module [\#1755](https://github.com/apache/arrow-datafusion/issues/1755) +- convert `outer join` to `inner join` to improve performance [\#1585](https://github.com/apache/arrow-datafusion/issues/1585) +- Add sqllogictest for datafusion [\#1453](https://github.com/apache/arrow-datafusion/issues/1453) +- Add additional simplification rules [\#1406](https://github.com/apache/arrow-datafusion/issues/1406) +- support more subqueries [\#1209](https://github.com/apache/arrow-datafusion/issues/1209) +- Add baseline metrics for remaining execution plan nodes [\#1019](https://github.com/apache/arrow-datafusion/issues/1019) +- Make `ExecutionPlan` implementations immutable [\#987](https://github.com/apache/arrow-datafusion/issues/987) +- Architecture overview may be insufficient in README [\#980](https://github.com/apache/arrow-datafusion/issues/980) +- Add a separate configuration setting for parallelism of scanning parquet files [\#924](https://github.com/apache/arrow-datafusion/issues/924) +- Support hash repartion elimination [\#41](https://github.com/apache/arrow-datafusion/issues/41) + +**Fixed bugs:** + +- `pyarrow` CI failed [\#4448](https://github.com/apache/arrow-datafusion/issues/4448) +- `UnwrapCastInComparison` exist bug [\#4430](https://github.com/apache/arrow-datafusion/issues/4430) +- The CLI panics when passing an invalid `explain` query [\#4378](https://github.com/apache/arrow-datafusion/issues/4378) +- HashJoin should return Err when the right side input stream produce Err [\#4362](https://github.com/apache/arrow-datafusion/issues/4362) +- Optimizer check errors if resulting schema has different metadata [\#4346](https://github.com/apache/arrow-datafusion/issues/4346) +- Panic with function `to_hex` [\#4339](https://github.com/apache/arrow-datafusion/issues/4339) +- `LimitPushDown` pushdown into limit, result is wrong [\#4308](https://github.com/apache/arrow-datafusion/issues/4308) +- DESCRIBE statement issue with qualified table references [\#4303](https://github.com/apache/arrow-datafusion/issues/4303) +- Panic with window function LAST\_VALUE [\#4297](https://github.com/apache/arrow-datafusion/issues/4297) +- CI failed in `Compare to postgres` [\#4294](https://github.com/apache/arrow-datafusion/issues/4294) +- Field alias can't work in where clause [\#4288](https://github.com/apache/arrow-datafusion/issues/4288) +- Some valid filters are not pushed down to parquet scan [\#4282](https://github.com/apache/arrow-datafusion/issues/4282) +- The type renaming `pub type NullColumnarValue = ColumnarValue` makes no sense [\#4271](https://github.com/apache/arrow-datafusion/issues/4271) +- Current `limit_push_down` can't support cross\_join [\#4256](https://github.com/apache/arrow-datafusion/issues/4256) +- Cargo test fail [\#4253](https://github.com/apache/arrow-datafusion/issues/4253) +- RightSemi/RightAnti HashJoin has bug, the left\_indices is never populated, causing failure to apply join filters. [\#4247](https://github.com/apache/arrow-datafusion/issues/4247) +- Clippy failures [\#4245](https://github.com/apache/arrow-datafusion/issues/4245) +- Cannot query s3 data from datafusion-cli [\#4239](https://github.com/apache/arrow-datafusion/issues/4239) +- Bug parsing interval with negative values [\#4237](https://github.com/apache/arrow-datafusion/issues/4237) +- `cargo test` reports errors on the master branch. [\#4236](https://github.com/apache/arrow-datafusion/issues/4236) +- Doc of the expression function`log2` is incorrect [\#4231](https://github.com/apache/arrow-datafusion/issues/4231) +- HashJoin with mode PartitionMode:CollectLeft has bug and can produce wrong result [\#4230](https://github.com/apache/arrow-datafusion/issues/4230) +- Add ambiguous check when generate projection plan [\#4210](https://github.com/apache/arrow-datafusion/issues/4210) +- What happened for NDJSON support on CLI? [\#4198](https://github.com/apache/arrow-datafusion/issues/4198) +- Add ambiguous check when generate join plan [\#4197](https://github.com/apache/arrow-datafusion/issues/4197) +- Clippy failing on master : error: use of deprecated associated function `chrono::NaiveDate::from_ymd`: use `from_ymd_opt()` instead [\#4187](https://github.com/apache/arrow-datafusion/issues/4187) +- Reimplement the `eliminate_cross_join` [\#4176](https://github.com/apache/arrow-datafusion/issues/4176) +- Incorrect handling of column names [\#4166](https://github.com/apache/arrow-datafusion/issues/4166) +- Update release scripts to support datafusion-benchmarks [\#4134](https://github.com/apache/arrow-datafusion/issues/4134) +- Bug in interpreting correctly parsed SQL with aliases [\#4123](https://github.com/apache/arrow-datafusion/issues/4123) +- The percentile argument for ApproxPercentileCont must be Float64, not Decimal128\(2, 1\) [\#4103](https://github.com/apache/arrow-datafusion/issues/4103) +- Panic when using array\_agg [\#4080](https://github.com/apache/arrow-datafusion/issues/4080) +- Wrong result for FIRST\_VALUE AND LAST\_VALUE window functions [\#4076](https://github.com/apache/arrow-datafusion/issues/4076) +- Round error when casting float to decimal [\#4071](https://github.com/apache/arrow-datafusion/issues/4071) +- Predicate still has cast when comparing Timestamp\(Nano, None\) to a timestamp literal, so can't be pushed down or used for pruning [\#3938](https://github.com/apache/arrow-datafusion/issues/3938) +- Revisit required\_child\_distribution\(\), output\_partitioning\(\), output\_ordering\(\) implementations in ExecutionPlan's implementations [\#3653](https://github.com/apache/arrow-datafusion/issues/3653) +- Can't push down projection after do type coercion [\#3583](https://github.com/apache/arrow-datafusion/issues/3583) +- In some circumstances cast expression is not working [\#3499](https://github.com/apache/arrow-datafusion/issues/3499) +- output\_partitioning\(\) and output\_ordering\(\) implementations are wrong in some physical plan implementations with alias [\#3400](https://github.com/apache/arrow-datafusion/issues/3400) +- Interval Literal doesn't work for timeunit less than millisecond [\#3204](https://github.com/apache/arrow-datafusion/issues/3204) +- `INTERVAL` literal with duplicated interval types should raise error [\#3183](https://github.com/apache/arrow-datafusion/issues/3183) +- Error occurs when only using partition columns in query [\#1999](https://github.com/apache/arrow-datafusion/issues/1999) +- regex\_match does not compile using the `g` flag [\#1429](https://github.com/apache/arrow-datafusion/issues/1429) +- `between` with NULL literals does not work: can't be evaluated because there isn't a common type to coerce the types to [\#1193](https://github.com/apache/arrow-datafusion/issues/1193) +- \[Datafusion\] Error with CAST: Unsupported SQL type Time [\#193](https://github.com/apache/arrow-datafusion/issues/193) + +**Closed issues:** + +- SQL level coverage for when memory limit is exceeded [\#4404](https://github.com/apache/arrow-datafusion/issues/4404) +- Throw error \(not `panic`\) if a listing table specifies an missing partition column [\#4350](https://github.com/apache/arrow-datafusion/issues/4350) +- Page index pruning fail on complex\_expr [\#4317](https://github.com/apache/arrow-datafusion/issues/4317) +- optimize `limit-full join` in the limit push down rule [\#4275](https://github.com/apache/arrow-datafusion/issues/4275) +- `infer_schema` function is not working with s3 Urls or http endpoints [\#4269](https://github.com/apache/arrow-datafusion/issues/4269) +- Add support binary boolean operators with nulls [\#4241](https://github.com/apache/arrow-datafusion/issues/4241) +- Add additional testing to parquet predicate pushdown integration tests [\#4087](https://github.com/apache/arrow-datafusion/issues/4087) +- Add metrics for parquet page level skipping [\#4086](https://github.com/apache/arrow-datafusion/issues/4086) +- Add parquet page index pushdown metrics [\#4058](https://github.com/apache/arrow-datafusion/issues/4058) +- Throw a runtime error if the memory allocated to GroupByHash exceeds a limit [\#3940](https://github.com/apache/arrow-datafusion/issues/3940) +- support unsigned numeric data type in UnwrapCastInBinaryComparison rule [\#3702](https://github.com/apache/arrow-datafusion/issues/3702) +- Support type cast in union [\#2125](https://github.com/apache/arrow-datafusion/issues/2125) +- \[EPIC\] Memory Limited Sort \(Externalized / Spill\) [\#1568](https://github.com/apache/arrow-datafusion/issues/1568) +- Maintain partition information in Union [\#189](https://github.com/apache/arrow-datafusion/issues/189) +- Add coercion support for `NULL` literals [\#185](https://github.com/apache/arrow-datafusion/issues/185) + +**Merged pull requests:** + +- Make `datafusion-sql` depend on `arrow-schema` instead of `arrow` [\#4456](https://github.com/apache/arrow-datafusion/pull/4456) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mbrobbel](https://github.com/mbrobbel)) +- replace the comparator for `decimal array op scalar` using arrow kernel [\#4453](https://github.com/apache/arrow-datafusion/pull/4453) ([liukun4515](https://github.com/liukun4515)) +- Fix pyarrow test [\#4450](https://github.com/apache/arrow-datafusion/pull/4450) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) +- Replace `&Option` with `Option<&T>` [\#4446](https://github.com/apache/arrow-datafusion/pull/4446) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([askoa](https://github.com/askoa)) +- Improve error handling for array downcasting [\#4445](https://github.com/apache/arrow-datafusion/pull/4445) ([retikulum](https://github.com/retikulum)) +- Refactor Builtin Window Function Implementation [\#4441](https://github.com/apache/arrow-datafusion/pull/4441) ([mustafasrepo](https://github.com/mustafasrepo)) +- feat: `DataFusionError::find_root` [\#4437](https://github.com/apache/arrow-datafusion/pull/4437) ([crepererum](https://github.com/crepererum)) +- fix: do NOT convert errors to strings but keep the type [\#4436](https://github.com/apache/arrow-datafusion/pull/4436) ([crepererum](https://github.com/crepererum)) +- The CLI panics when passing an invalid explain query [\#4429](https://github.com/apache/arrow-datafusion/pull/4429) ([comphead](https://github.com/comphead)) +- \[minor\] use arrow kernel concat\_batches instead combine\_batches [\#4423](https://github.com/apache/arrow-datafusion/pull/4423) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- fix panic on to\_hex function for negative numbers [\#4422](https://github.com/apache/arrow-datafusion/pull/4422) ([retikulum](https://github.com/retikulum)) +- Optimize filter executor in pull-based executor [\#4421](https://github.com/apache/arrow-datafusion/pull/4421) ([xudong963](https://github.com/xudong963)) +- optimize limit push for join case [\#4411](https://github.com/apache/arrow-datafusion/pull/4411) ([liukun4515](https://github.com/liukun4515)) +- Add integration test for erroring when memory limits are hit [\#4406](https://github.com/apache/arrow-datafusion/pull/4406) ([alamb](https://github.com/alamb)) +- feat: `ResourceExhausted` for memory limit in `AggregateStream` [\#4405](https://github.com/apache/arrow-datafusion/pull/4405) ([crepererum](https://github.com/crepererum)) +- Update to arrow 28 [\#4400](https://github.com/apache/arrow-datafusion/pull/4400) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) +- Update rstest requirement from 0.15.0 to 0.16.0 [\#4399](https://github.com/apache/arrow-datafusion/pull/4399) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Add sqllogictests \(v0\) [\#4395](https://github.com/apache/arrow-datafusion/pull/4395) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) +- improve hashjoin execution metrics [\#4394](https://github.com/apache/arrow-datafusion/pull/4394) ([AssHero](https://github.com/AssHero)) +- Add `with_new_inputs` for LogicalPlan [\#4393](https://github.com/apache/arrow-datafusion/pull/4393) ([jackwener](https://github.com/jackwener)) +- Clean the code in `limit.rs`. [\#4391](https://github.com/apache/arrow-datafusion/pull/4391) ([HaoYang670](https://github.com/HaoYang670)) +- Move physical plan serde from Ballista to DataFusion [\#4390](https://github.com/apache/arrow-datafusion/pull/4390) ([Kikkon](https://github.com/Kikkon)) +- Fix page index pruning fail on complex\_expr [\#4387](https://github.com/apache/arrow-datafusion/pull/4387) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Add check for nested types in equivalent names and types [\#4380](https://github.com/apache/arrow-datafusion/pull/4380) ([alamb](https://github.com/alamb)) +- refine the code of build schema for ambiguous check, factor this out into a function [\#4379](https://github.com/apache/arrow-datafusion/pull/4379) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([AssHero](https://github.com/AssHero)) +- Refactor the Hash Join [\#4377](https://github.com/apache/arrow-datafusion/pull/4377) ([liukun4515](https://github.com/liukun4515)) +- Minor: Fix typos in the documentation [\#4376](https://github.com/apache/arrow-datafusion/pull/4376) ([martin-g](https://github.com/martin-g)) +- Include byte size estimates in the filter statistics [\#4375](https://github.com/apache/arrow-datafusion/pull/4375) ([isidentical](https://github.com/isidentical)) +- HashJoin should return Err when the right side input stream produce Err, add more join UTs to cover different join types [\#4373](https://github.com/apache/arrow-datafusion/pull/4373) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mingmwang](https://github.com/mingmwang)) +- feat: `ResourceExhausted` for memory limit in `GroupedHashAggregateStream` [\#4371](https://github.com/apache/arrow-datafusion/pull/4371) ([crepererum](https://github.com/crepererum)) +- Use limit\(\) function instead of show\_limit\(\) in the first example [\#4369](https://github.com/apache/arrow-datafusion/pull/4369) ([martin-g](https://github.com/martin-g)) +- Update env\_logger requirement from 0.9 to 0.10 [\#4367](https://github.com/apache/arrow-datafusion/pull/4367) ([dependabot[bot]](https://github.com/apps/dependabot)) +- reimplement `push_down_filter` to remove global-state [\#4365](https://github.com/apache/arrow-datafusion/pull/4365) ([jackwener](https://github.com/jackwener)) +- Support to use Schedular in tpch benchmark [\#4361](https://github.com/apache/arrow-datafusion/pull/4361) ([xudong963](https://github.com/xudong963)) +- Adding more dataframe example to read csv files [\#4360](https://github.com/apache/arrow-datafusion/pull/4360) ([DataPsycho](https://github.com/DataPsycho)) +- minor: correct name and typo [\#4359](https://github.com/apache/arrow-datafusion/pull/4359) ([jackwener](https://github.com/jackwener)) +- Do not log error if page index can not be evaluated [\#4358](https://github.com/apache/arrow-datafusion/pull/4358) ([alamb](https://github.com/alamb)) +- Clean the `expr_fn` - use `scalar_expr` to create unary scalar expr functions, remove macro `unary_scalar_functions` [\#4357](https://github.com/apache/arrow-datafusion/pull/4357) ([HaoYang670](https://github.com/HaoYang670)) +- Throw error \(not `panic`\) if a listing table specifies an missing partition column [\#4354](https://github.com/apache/arrow-datafusion/pull/4354) ([doki23](https://github.com/doki23)) +- Improve error handling and add some more types for proper downcasting [\#4352](https://github.com/apache/arrow-datafusion/pull/4352) ([retikulum](https://github.com/retikulum)) +- Add check to avoid underflow in memory manager [\#4351](https://github.com/apache/arrow-datafusion/pull/4351) ([askoa](https://github.com/askoa)) +- Improve error messages when memory is exhausted while sorting [\#4348](https://github.com/apache/arrow-datafusion/pull/4348) ([alamb](https://github.com/alamb)) +- Do not error in optimizer if resulting schema has different metadata [\#4347](https://github.com/apache/arrow-datafusion/pull/4347) ([alamb](https://github.com/alamb)) +- minor: improve optimizer logging and do not repeat rule name [\#4345](https://github.com/apache/arrow-datafusion/pull/4345) ([alamb](https://github.com/alamb)) +- minor: fix typos in test names [\#4344](https://github.com/apache/arrow-datafusion/pull/4344) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Minor: Add docstrings to `EliminateOuterJoins` optimizer pass [\#4343](https://github.com/apache/arrow-datafusion/pull/4343) ([alamb](https://github.com/alamb)) +- Minor: refactor: isolate common memory accounting utils [\#4341](https://github.com/apache/arrow-datafusion/pull/4341) ([crepererum](https://github.com/crepererum)) +- minor: make `plan_from_tables` return one plan instead of `Vec` [\#4336](https://github.com/apache/arrow-datafusion/pull/4336) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- enhancement: when fetch == 0, pushdown limit 0 instead skip+fetch. [\#4334](https://github.com/apache/arrow-datafusion/pull/4334) ([jackwener](https://github.com/jackwener)) +- Teach optimizer that `CoalesceBatchesExec` does not destroy output order [\#4332](https://github.com/apache/arrow-datafusion/pull/4332) ([alamb](https://github.com/alamb)) +- Add ability to disable DiskManager [\#4330](https://github.com/apache/arrow-datafusion/pull/4330) ([tustvold](https://github.com/tustvold)) +- Update cli.md [\#4329](https://github.com/apache/arrow-datafusion/pull/4329) ([psvri](https://github.com/psvri)) +- fix bug: right semi join can't support the filter [\#4327](https://github.com/apache/arrow-datafusion/pull/4327) ([liukun4515](https://github.com/liukun4515)) +- reimplment `eliminate_limit` to remove `global-state`. [\#4324](https://github.com/apache/arrow-datafusion/pull/4324) ([jackwener](https://github.com/jackwener)) +- Refine Err propagation and avoid unwrap in transform closures [\#4318](https://github.com/apache/arrow-datafusion/pull/4318) ([mingmwang](https://github.com/mingmwang)) +- Add a checker to confirm physical optimizer rules will keep the physical plan schema immutable [\#4316](https://github.com/apache/arrow-datafusion/pull/4316) ([mingmwang](https://github.com/mingmwang)) +- Refactor downcasting functions with downcastvalue macro and improve error handling of `ListArray` downcasting [\#4313](https://github.com/apache/arrow-datafusion/pull/4313) ([retikulum](https://github.com/retikulum)) +- minor: add another test case to cover join ambiguous check [\#4305](https://github.com/apache/arrow-datafusion/pull/4305) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) +- Fix DESCRIBE statement qualified table issue [\#4304](https://github.com/apache/arrow-datafusion/pull/4304) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([gruuya](https://github.com/gruuya)) +- Use tournament loser tree for k-way sort-merging, increase merge speed by 50% [\#4301](https://github.com/apache/arrow-datafusion/pull/4301) ([richox](https://github.com/richox)) +- Pin Python `setuptools` in the CI to fix integration tests [\#4296](https://github.com/apache/arrow-datafusion/pull/4296) ([isidentical](https://github.com/isidentical)) +- Support `SubqueryAlias` in optimizer, physcial planner. [\#4293](https://github.com/apache/arrow-datafusion/pull/4293) ([jackwener](https://github.com/jackwener)) +- minor: avoid a clone into string when checking ambiguous [\#4292](https://github.com/apache/arrow-datafusion/pull/4292) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) +- replace the comparison op for decimal array op using the arrow-rs kernel [\#4290](https://github.com/apache/arrow-datafusion/pull/4290) ([liukun4515](https://github.com/liukun4515)) +- MINOR: replace `{..}` with `(_)`, typo, remove outdated TODO [\#4286](https://github.com/apache/arrow-datafusion/pull/4286) ([jackwener](https://github.com/jackwener)) +- Reduce Expr copies in `ParquetExec` [\#4283](https://github.com/apache/arrow-datafusion/pull/4283) ([alamb](https://github.com/alamb)) +- Fix issue in filter pushdown with overloaded projection index [\#4281](https://github.com/apache/arrow-datafusion/pull/4281) ([thinkharderdev](https://github.com/thinkharderdev)) +- Skip useless pruning predicates in `ParquetExec` [\#4280](https://github.com/apache/arrow-datafusion/pull/4280) ([alamb](https://github.com/alamb)) +- Push down more predicates into `ParquetExec` [\#4279](https://github.com/apache/arrow-datafusion/pull/4279) ([alamb](https://github.com/alamb)) +- Fix EXPLAIN plan for ParquetExec to show pruning\_predicate [\#4278](https://github.com/apache/arrow-datafusion/pull/4278) ([alamb](https://github.com/alamb)) +- reimplement `limit_push_down` to remove global-state, enhance optimize and simplify code. [\#4276](https://github.com/apache/arrow-datafusion/pull/4276) ([jackwener](https://github.com/jackwener)) +- Bump actions/labeler from 4.0.2 to 4.1.0 [\#4274](https://github.com/apache/arrow-datafusion/pull/4274) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Remove the type alias `NullColumnarValue` [\#4273](https://github.com/apache/arrow-datafusion/pull/4273) ([HaoYang670](https://github.com/HaoYang670)) +- reimplement `eliminate_outer_join` [\#4272](https://github.com/apache/arrow-datafusion/pull/4272) ([jackwener](https://github.com/jackwener)) +- Fix bugs in parsing `with header row` and `partitioned by` [\#4268](https://github.com/apache/arrow-datafusion/pull/4268) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) +- improve error messages while downcasting `UInt32Array`, `UInt64Array` and `BooleanArray` [\#4261](https://github.com/apache/arrow-datafusion/pull/4261) ([retikulum](https://github.com/retikulum)) +- add ambiguous check for projection [\#4260](https://github.com/apache/arrow-datafusion/pull/4260) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([AssHero](https://github.com/AssHero)) +- Add ambiguous check for join [\#4258](https://github.com/apache/arrow-datafusion/pull/4258) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) +- support cross\_join in `limit_push_down` [\#4257](https://github.com/apache/arrow-datafusion/pull/4257) ([jackwener](https://github.com/jackwener)) +- Support parquet page filtering on min\_max for `decimal128` and `string` columns [\#4255](https://github.com/apache/arrow-datafusion/pull/4255) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- fix conflict and UT, cleanup redundant legacy code [\#4252](https://github.com/apache/arrow-datafusion/pull/4252) ([jackwener](https://github.com/jackwener)) +- Minor: remove unecessary clone\(\) in planner [\#4249](https://github.com/apache/arrow-datafusion/pull/4249) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Fix nightly clippy failures [\#4246](https://github.com/apache/arrow-datafusion/pull/4246) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) +- Improve Error Handling and Readibility for downcasting `Float32Array`, `Float64Array`, `StringArray` [\#4244](https://github.com/apache/arrow-datafusion/pull/4244) ([retikulum](https://github.com/retikulum)) +- Use defaults for ListingOptions builder [\#4243](https://github.com/apache/arrow-datafusion/pull/4243) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) +- Support binary boolean operators with nulls [\#4242](https://github.com/apache/arrow-datafusion/pull/4242) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Fixing doc of the expression [\#4240](https://github.com/apache/arrow-datafusion/pull/4240) ([Creampanda](https://github.com/Creampanda)) +- Fix negative interval parsing bug [\#4238](https://github.com/apache/arrow-datafusion/pull/4238) ([Jefffrey](https://github.com/Jefffrey)) +- remove duplicate or redundant code [\#4235](https://github.com/apache/arrow-datafusion/pull/4235) ([jackwener](https://github.com/jackwener)) +- add a checker to confirm optimizer can keep plan schema immutable. [\#4233](https://github.com/apache/arrow-datafusion/pull/4233) ([jackwener](https://github.com/jackwener)) +- Fix the percentile argument for ApproxPercentileCont must be Float64, not Decimal128\(2, 1\) [\#4228](https://github.com/apache/arrow-datafusion/pull/4228) ([comphead](https://github.com/comphead)) +- refactor how we create listing tables [\#4227](https://github.com/apache/arrow-datafusion/pull/4227) ([timvw](https://github.com/timvw)) +- Update sqlparser requirement from 0.26 to 0.27 [\#4226](https://github.com/apache/arrow-datafusion/pull/4226) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- upgrade required chrono version to 0.4.23 [\#4225](https://github.com/apache/arrow-datafusion/pull/4225) ([waitingkuo](https://github.com/waitingkuo)) +- Support types other than String for partition columns on ListingTables [\#4221](https://github.com/apache/arrow-datafusion/pull/4221) ([doki23](https://github.com/doki23)) +- \[CBO\] JoinSelection Rule, select HashJoin Partition Mode based on the Join Type and available statistics, option for SortMergeJoin [\#4219](https://github.com/apache/arrow-datafusion/pull/4219) ([mingmwang](https://github.com/mingmwang)) +- Remove alias in Union [\#4212](https://github.com/apache/arrow-datafusion/pull/4212) ([jackwener](https://github.com/jackwener)) +- Add try\_optimize method [\#4208](https://github.com/apache/arrow-datafusion/pull/4208) ([andygrove](https://github.com/andygrove)) +- Provide a builder for ListingOptions with fixups [\#4207](https://github.com/apache/arrow-datafusion/pull/4207) ([alamb](https://github.com/alamb)) +- Avoid error with empty iterators used for `ScalarValue::iter_to_array` [\#4206](https://github.com/apache/arrow-datafusion/pull/4206) ([GrandChaman](https://github.com/GrandChaman)) +- Improve error message for regexp\_match 'g' flag [\#4203](https://github.com/apache/arrow-datafusion/pull/4203) ([Jefffrey](https://github.com/Jefffrey)) +- Return `ResourceExhausted` errors when memory limit is exceed in `GroupedHashAggregateStreamV2` \(Row Hash\) [\#4202](https://github.com/apache/arrow-datafusion/pull/4202) ([crepererum](https://github.com/crepererum)) +- Add additional expr boolean simplification rules [\#4200](https://github.com/apache/arrow-datafusion/pull/4200) ([Jefffrey](https://github.com/Jefffrey)) +- Update to arrow and parquet 27.0.0 [\#4199](https://github.com/apache/arrow-datafusion/pull/4199) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) +- Support `create table` with explicit column definitions [\#4194](https://github.com/apache/arrow-datafusion/pull/4194) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([doki23](https://github.com/doki23)) +- Support all equality predicates in equality join [\#4193](https://github.com/apache/arrow-datafusion/pull/4193) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) +- add `propagate_empty_relation` optimizer rule [\#4192](https://github.com/apache/arrow-datafusion/pull/4192) ([jackwener](https://github.com/jackwener)) +- fix clippy [\#4190](https://github.com/apache/arrow-datafusion/pull/4190) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- Fix clippy by avoiding deprecated functions in chrono [\#4189](https://github.com/apache/arrow-datafusion/pull/4189) ([alamb](https://github.com/alamb)) +- Disallow duplicate interval types during parsing [\#4188](https://github.com/apache/arrow-datafusion/pull/4188) ([Jefffrey](https://github.com/Jefffrey)) +- Parse nanoseconds for intervals [\#4186](https://github.com/apache/arrow-datafusion/pull/4186) ([Jefffrey](https://github.com/Jefffrey)) +- Add rule to reimplement `Eliminate cross join` and remove it in planner [\#4185](https://github.com/apache/arrow-datafusion/pull/4185) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- \[FOLLOWUP\] Enforcement Rule: resolve review comments, refactor adjust\_input\_keys\_ordering\(\) [\#4184](https://github.com/apache/arrow-datafusion/pull/4184) ([mingmwang](https://github.com/mingmwang)) +- Simplify boolean parquet pushdown predicate [\#4182](https://github.com/apache/arrow-datafusion/pull/4182) ([Jefffrey](https://github.com/Jefffrey)) +- Minor: consolidate parquet `custom_reader` integration test into parquet\_exec [\#4175](https://github.com/apache/arrow-datafusion/pull/4175) ([alamb](https://github.com/alamb)) +- minor: remove redundant println and cleanup [\#4173](https://github.com/apache/arrow-datafusion/pull/4173) ([jackwener](https://github.com/jackwener)) +- Add ability to specify external sort information for ListingTables [\#4170](https://github.com/apache/arrow-datafusion/pull/4170) ([alamb](https://github.com/alamb)) +- Improve Error Handling and Readibility for downcasting `Decimal128Array` [\#4168](https://github.com/apache/arrow-datafusion/pull/4168) ([retikulum](https://github.com/retikulum)) +- Minor: Remove completed comment on parquet row group pruning [\#4167](https://github.com/apache/arrow-datafusion/pull/4167) ([alamb](https://github.com/alamb)) +- Update hashbrown requirement from 0.12 to 0.13 [\#4164](https://github.com/apache/arrow-datafusion/pull/4164) ([dependabot[bot]](https://github.com/apps/dependabot)) +- MINOR: enable `dyn_cmp_dict` feature on arrow for physical expr crate [\#4163](https://github.com/apache/arrow-datafusion/pull/4163) ([isidentical](https://github.com/isidentical)) +- Derive filter statistic estimates from the predicate expression [\#4162](https://github.com/apache/arrow-datafusion/pull/4162) ([isidentical](https://github.com/isidentical)) +- Minor: pass `ParquetFileMetrics` to `build_row_filter` in parquet [\#4161](https://github.com/apache/arrow-datafusion/pull/4161) ([alamb](https://github.com/alamb)) +- Minor: Extract parquet row group pruning code into its own module [\#4160](https://github.com/apache/arrow-datafusion/pull/4160) ([alamb](https://github.com/alamb)) +- Full support for time32 and time64 literal values \(`ScalarValue`\) [\#4156](https://github.com/apache/arrow-datafusion/pull/4156) ([andre-cc-natzka](https://github.com/andre-cc-natzka)) +- Window frame GROUPS mode support [\#4155](https://github.com/apache/arrow-datafusion/pull/4155) ([zembunia](https://github.com/zembunia)) +- Improve error messages while downcasting Int64Array [\#4154](https://github.com/apache/arrow-datafusion/pull/4154) ([retikulum](https://github.com/retikulum)) +- Add another method to collect referenced columns from an expression [\#4153](https://github.com/apache/arrow-datafusion/pull/4153) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) +- Remove BoxedAsyncFileReader [\#4150](https://github.com/apache/arrow-datafusion/pull/4150) ([tustvold](https://github.com/tustvold)) +- Support unsigned integers in `unwrap_cast_in_comparison` Optimizer rule [\#4149](https://github.com/apache/arrow-datafusion/pull/4149) ([alamb](https://github.com/alamb)) +- Add support for `DataType::Timestamp` casts in `unwrap_cast_in_comparison` optimizer pass [\#4148](https://github.com/apache/arrow-datafusion/pull/4148) ([alamb](https://github.com/alamb)) +- Add additional testing for `unwrap_cast_in_comparison` [\#4147](https://github.com/apache/arrow-datafusion/pull/4147) ([alamb](https://github.com/alamb)) +- improve error messages while downcasting Int32Array [\#4146](https://github.com/apache/arrow-datafusion/pull/4146) ([retikulum](https://github.com/retikulum)) +- Minor: Update docstring on unwrap\_cast\_in\_comparison [\#4145](https://github.com/apache/arrow-datafusion/pull/4145) ([alamb](https://github.com/alamb)) +- add schema parameter to table provider factory create method [\#4143](https://github.com/apache/arrow-datafusion/pull/4143) ([milenkovicm](https://github.com/milenkovicm)) +- fix: shouldn't pass alias through into subquery. [\#4141](https://github.com/apache/arrow-datafusion/pull/4141) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- Preserve the `Cast` expression in `columnize_expr` [\#4137](https://github.com/apache/arrow-datafusion/pull/4137) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) +- Set versions to dependencies with path in benchmarks Cargo.toml file [\#4136](https://github.com/apache/arrow-datafusion/pull/4136) ([ArkashaJavelin](https://github.com/ArkashaJavelin)) +- Fix links [\#4135](https://github.com/apache/arrow-datafusion/pull/4135) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) +- Use f64::total\_cmp instead of OrderedFloat [\#4133](https://github.com/apache/arrow-datafusion/pull/4133) ([comphead](https://github.com/comphead)) +- Add parquet integration tests for explicitly smaller page sizes, page pruning [\#4131](https://github.com/apache/arrow-datafusion/pull/4131) ([alamb](https://github.com/alamb)) +- Consolidate `ParquetExec` tests in `parquet_exec` integration test [\#4130](https://github.com/apache/arrow-datafusion/pull/4130) ([alamb](https://github.com/alamb)) +- Minor: Use upstream `BooleanArray::true_count` [\#4129](https://github.com/apache/arrow-datafusion/pull/4129) ([alamb](https://github.com/alamb)) +- Combined TPCH runs & uniformed summaries for benchmarks [\#4128](https://github.com/apache/arrow-datafusion/pull/4128) ([isidentical](https://github.com/isidentical)) +- Enable TableProviderFactories to receive additional options when creating an external table [\#4126](https://github.com/apache/arrow-datafusion/pull/4126) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([timvw](https://github.com/timvw)) +- Add CI check that configs.md is up-to-date [\#4124](https://github.com/apache/arrow-datafusion/pull/4124) ([mvanschellebeeck](https://github.com/mvanschellebeeck)) +- \[Part3\] Partition and Sort Enforcement, Enforcement rule implementation [\#4122](https://github.com/apache/arrow-datafusion/pull/4122) ([mingmwang](https://github.com/mingmwang)) +- reuse code `utils::optimize_children` but affect inline. [\#4121](https://github.com/apache/arrow-datafusion/pull/4121) ([jackwener](https://github.com/jackwener)) +- reuse code `utils::optimize_children` instead of redundant implementation [\#4119](https://github.com/apache/arrow-datafusion/pull/4119) ([jackwener](https://github.com/jackwener)) +- Allow listing tables to be created via TableFactories [\#4112](https://github.com/apache/arrow-datafusion/pull/4112) ([avantgardnerio](https://github.com/avantgardnerio)) +- Update SQL reference to state that decimal support is currently experimental [\#4109](https://github.com/apache/arrow-datafusion/pull/4109) ([andygrove](https://github.com/andygrove)) +- Add metrics for parquet page level skipping [\#4105](https://github.com/apache/arrow-datafusion/pull/4105) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Add parser option for parsing SQL numeric literals as decimal [\#4102](https://github.com/apache/arrow-datafusion/pull/4102) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Replace RwLock\ and Mutex\ by using DashMap [\#4079](https://github.com/apache/arrow-datafusion/pull/4079) ([yahoNanJing](https://github.com/yahoNanJing)) +- Custom window frame support extended to built-in window functions [\#4078](https://github.com/apache/arrow-datafusion/pull/4078) ([mustafasrepo](https://github.com/mustafasrepo)) +- Enable tests for page index filtering in parquet filter pushdown test [\#4062](https://github.com/apache/arrow-datafusion/pull/4062) ([alamb](https://github.com/alamb)) +- \[Part2\] Partition and Sort Enforcement, ExecutionPlan enhancement [\#4043](https://github.com/apache/arrow-datafusion/pull/4043) ([mingmwang](https://github.com/mingmwang)) +- add support for xz file compression and `compression` feature [\#3993](https://github.com/apache/arrow-datafusion/pull/3993) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Jimexist](https://github.com/Jimexist)) +- Expression boundary analysis framework [\#3912](https://github.com/apache/arrow-datafusion/pull/3912) ([isidentical](https://github.com/isidentical)) + +## [14.0.0-rc1](https://github.com/apache/arrow-datafusion/tree/14.0.0-rc1) (2022-11-04) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/14.0.0...14.0.0-rc1) + + ## [14.0.0](https://github.com/apache/arrow-datafusion/tree/14.0.0) (2022-11-04) [Full Changelog](https://github.com/apache/arrow-datafusion/compare/13.0.0-rc1...14.0.0) diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index 6b24cba560caf..27f43f93edb20 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-common" description = "Common functionality for DataFusion query engine" -version = "14.0.0" +version = "15.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index e5c802ef18d92..f3834c4cd9c56 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion" description = "DataFusion is an in-memory query engine that uses Apache Arrow as the memory model" -version = "14.0.0" +version = "15.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "../../README.md" @@ -64,13 +64,13 @@ bytes = "1.1" bzip2 = { version = "0.4.3", optional = true } chrono = { version = "0.4.23", default-features = false } dashmap = "5.4.0" -datafusion-common = { path = "../common", version = "14.0.0", features = ["parquet", "object_store"] } -datafusion-expr = { path = "../expr", version = "14.0.0" } -datafusion-jit = { path = "../jit", version = "14.0.0", optional = true } -datafusion-optimizer = { path = "../optimizer", version = "14.0.0" } -datafusion-physical-expr = { path = "../physical-expr", version = "14.0.0" } -datafusion-row = { path = "../row", version = "14.0.0" } -datafusion-sql = { path = "../sql", version = "14.0.0" } +datafusion-common = { path = "../common", version = "15.0.0", features = ["parquet", "object_store"] } +datafusion-expr = { path = "../expr", version = "15.0.0" } +datafusion-jit = { path = "../jit", version = "15.0.0", optional = true } +datafusion-optimizer = { path = "../optimizer", version = "15.0.0" } +datafusion-physical-expr = { path = "../physical-expr", version = "15.0.0" } +datafusion-row = { path = "../row", version = "15.0.0" } +datafusion-sql = { path = "../sql", version = "15.0.0" } flate2 = { version = "1.0.24", optional = true } futures = "0.3" glob = "0.3.0" diff --git a/datafusion/core/src/execution/memory_manager/mod.rs b/datafusion/core/src/execution/memory_manager/mod.rs index c3ff444ebc27e..ccc1e81cd999b 100644 --- a/datafusion/core/src/execution/memory_manager/mod.rs +++ b/datafusion/core/src/execution/memory_manager/mod.rs @@ -173,7 +173,7 @@ pub trait MemoryConsumer: Send + Sync { /// reached for this consumer. async fn try_grow(&self, required: usize) -> Result<()> { let current = self.mem_used(); - debug!( + log::info!( "trying to acquire {} whiling holding {} from consumer {}", human_readable_size(required), human_readable_size(current), @@ -183,7 +183,7 @@ pub trait MemoryConsumer: Send + Sync { let can_grow_directly = self.memory_manager().can_grow_directly(required, current); if !can_grow_directly { - debug!( + log::info!( "Failed to grow memory of {} directly from consumer {}, spilling first ...", human_readable_size(required), self.id() @@ -276,7 +276,7 @@ impl MemoryManager { match config { MemoryManagerConfig::Existing(manager) => manager, MemoryManagerConfig::New { .. } => { - debug!( + log::info!( "Creating memory manager with initial size {}", human_readable_size(pool_size) ); @@ -297,10 +297,12 @@ impl MemoryManager { } pub(crate) fn grow_tracker_usage(&self, delta: usize) { + log::info!("XXX grow tracker usage: {}", delta); self.trackers_total.fetch_add(delta, Ordering::SeqCst); } pub(crate) fn shrink_tracker_usage(&self, delta: usize) { + log::info!("XXX shrink tracker usage: {}", delta); let update = self.trackers_total .fetch_update(Ordering::SeqCst, Ordering::SeqCst, |x| { @@ -325,6 +327,7 @@ impl MemoryManager { /// Register a new memory requester pub(crate) fn register_requester(&self, requester_id: &MemoryConsumerId) { + log::info!("XXX register_requester: id={}", requester_id); self.requesters.lock().insert(requester_id.clone()); } @@ -357,7 +360,7 @@ impl MemoryManager { } else if current < min_per_rqt { // if we cannot acquire at lease 1/2n memory, just wait for others // to spill instead spill self frequently with limited total mem - debug!( + log::info!( "Cannot acquire a minimum amount of {} memory from the manager of total {}, waiting for others to spill ...", human_readable_size(min_per_rqt), human_readable_size(self.pool_size)); let now = Instant::now(); @@ -379,7 +382,7 @@ impl MemoryManager { fn record_free_then_acquire(&self, freed: usize, acquired: usize) { let mut requesters_total = self.requesters_total.lock(); - debug!( + log::info!( "free_then_acquire: total {}, freed {}, acquired {}", human_readable_size(*requesters_total), human_readable_size(freed), @@ -393,7 +396,7 @@ impl MemoryManager { fn record_free(&self, freed: usize) { let mut requesters_total = self.requesters_total.lock(); - debug!( + log::info!( "free: total {}, freed {}", human_readable_size(*requesters_total), human_readable_size(freed) @@ -405,6 +408,7 @@ impl MemoryManager { /// Drop a memory consumer and reclaim the memory pub(crate) fn drop_consumer(&self, id: &MemoryConsumerId, mem_used: usize) { + log::info!("XXX drop_consumer: id={}, mem_used={}", id, mem_used); // find in requesters first { let mut requesters = self.requesters.lock(); diff --git a/datafusion/core/src/execution/runtime_env.rs b/datafusion/core/src/execution/runtime_env.rs index 64da4a103b16e..4768eea5733a3 100644 --- a/datafusion/core/src/execution/runtime_env.rs +++ b/datafusion/core/src/execution/runtime_env.rs @@ -76,21 +76,25 @@ impl RuntimeEnv { /// Register the consumer to get it tracked pub fn register_requester(&self, id: &MemoryConsumerId) { + log::info!("XXX runtime.register_requester: id={}", id); self.memory_manager.register_requester(id); } /// Drop the consumer from get tracked, reclaim memory pub fn drop_consumer(&self, id: &MemoryConsumerId, mem_used: usize) { + log::info!("XXX runtime.drop_consumer: id={}, mem_used={}", id, mem_used); self.memory_manager.drop_consumer(id, mem_used) } /// Grow tracker memory of `delta` pub fn grow_tracker_usage(&self, delta: usize) { + log::info!("XXX runtime.grow_tracker_usage: {}", delta); self.memory_manager.grow_tracker_usage(delta) } /// Shrink tracker memory of `delta` pub fn shrink_tracker_usage(&self, delta: usize) { + log::info!("XXX runtime.shrink_tracker_usage: {}", delta); self.memory_manager.shrink_tracker_usage(delta) } diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml index acf9e2a044955..d8e6658735540 100644 --- a/datafusion/expr/Cargo.toml +++ b/datafusion/expr/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-expr" description = "Logical plan and expression representation for DataFusion query engine" -version = "14.0.0" +version = "15.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -37,6 +37,6 @@ path = "src/lib.rs" [dependencies] ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] } arrow = { version = "28.0.0", default-features = false } -datafusion-common = { path = "../common", version = "14.0.0" } +datafusion-common = { path = "../common", version = "15.0.0" } log = "^0.4" sqlparser = "0.27" diff --git a/datafusion/jit/Cargo.toml b/datafusion/jit/Cargo.toml index 8a923e661301d..26cc5609b10af 100644 --- a/datafusion/jit/Cargo.toml +++ b/datafusion/jit/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-jit" description = "Just In Time (JIT) compilation support for DataFusion query engine" -version = "14.0.0" +version = "15.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -41,7 +41,7 @@ cranelift = "0.89.0" cranelift-jit = "0.89.0" cranelift-module = "0.89.0" cranelift-native = "0.89.0" -datafusion-common = { path = "../common", version = "14.0.0", features = ["jit"] } -datafusion-expr = { path = "../expr", version = "14.0.0" } +datafusion-common = { path = "../common", version = "15.0.0", features = ["jit"] } +datafusion-expr = { path = "../expr", version = "15.0.0" } parking_lot = "0.12" diff --git a/datafusion/optimizer/Cargo.toml b/datafusion/optimizer/Cargo.toml index c31abeb59d96e..14970c405a3a2 100644 --- a/datafusion/optimizer/Cargo.toml +++ b/datafusion/optimizer/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-optimizer" description = "DataFusion Query Optimizer" -version = "14.0.0" +version = "15.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -40,13 +40,13 @@ unicode_expressions = [] arrow = { version = "28.0.0", features = ["prettyprint"] } async-trait = "0.1.41" chrono = { version = "0.4.23", default-features = false } -datafusion-common = { path = "../common", version = "14.0.0" } -datafusion-expr = { path = "../expr", version = "14.0.0" } -datafusion-physical-expr = { path = "../physical-expr", version = "14.0.0" } +datafusion-common = { path = "../common", version = "15.0.0" } +datafusion-expr = { path = "../expr", version = "15.0.0" } +datafusion-physical-expr = { path = "../physical-expr", version = "15.0.0" } hashbrown = { version = "0.13", features = ["raw"] } log = "^0.4" [dev-dependencies] ctor = "0.1.22" -datafusion-sql = { path = "../sql", version = "14.0.0" } +datafusion-sql = { path = "../sql", version = "15.0.0" } env_logger = "0.10.0" diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml index 0ba813110612d..6c202d55e4b8c 100644 --- a/datafusion/physical-expr/Cargo.toml +++ b/datafusion/physical-expr/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-physical-expr" description = "Physical expression implementation for DataFusion query engine" -version = "14.0.0" +version = "15.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -46,9 +46,9 @@ arrow-schema = "28.0.0" blake2 = { version = "^0.10.2", optional = true } blake3 = { version = "1.0", optional = true } chrono = { version = "0.4.23", default-features = false } -datafusion-common = { path = "../common", version = "14.0.0" } -datafusion-expr = { path = "../expr", version = "14.0.0" } -datafusion-row = { path = "../row", version = "14.0.0" } +datafusion-common = { path = "../common", version = "15.0.0" } +datafusion-expr = { path = "../expr", version = "15.0.0" } +datafusion-row = { path = "../row", version = "15.0.0" } half = { version = "2.1", default-features = false } hashbrown = { version = "0.13", features = ["raw"] } itertools = { version = "0.10", features = ["use_std"] } diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml index 7cb5daf33fcb4..6d8ba859f4a9c 100644 --- a/datafusion/proto/Cargo.toml +++ b/datafusion/proto/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-proto" description = "Protobuf serialization of DataFusion logical plan expressions" -version = "14.0.0" +version = "15.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -42,9 +42,9 @@ json = ["pbjson", "serde", "serde_json"] [dependencies] arrow = "28.0.0" chrono = { version = "0.4", default-features = false } -datafusion = { path = "../core", version = "14.0.0" } -datafusion-common = { path = "../common", version = "14.0.0" } -datafusion-expr = { path = "../expr", version = "14.0.0" } +datafusion = { path = "../core", version = "15.0.0" } +datafusion-common = { path = "../common", version = "15.0.0" } +datafusion-expr = { path = "../expr", version = "15.0.0" } object_store = { version = "0.5.0" } parking_lot = { version = "0.12" } pbjson = { version = "0.5", optional = true } diff --git a/datafusion/row/Cargo.toml b/datafusion/row/Cargo.toml index 2ca850aaf12e9..13a2ddb257ea4 100644 --- a/datafusion/row/Cargo.toml +++ b/datafusion/row/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-row" description = "Row backed by raw bytes for DataFusion query engine" -version = "14.0.0" +version = "15.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -38,7 +38,7 @@ jit = ["datafusion-jit"] [dependencies] arrow = "28.0.0" -datafusion-common = { path = "../common", version = "14.0.0" } -datafusion-jit = { path = "../jit", version = "14.0.0", optional = true } +datafusion-common = { path = "../common", version = "15.0.0" } +datafusion-jit = { path = "../jit", version = "15.0.0", optional = true } paste = "^1.0" rand = "0.8" diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml index 3a255b2db8802..decec707546c2 100644 --- a/datafusion/sql/Cargo.toml +++ b/datafusion/sql/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-sql" description = "DataFusion SQL Query Planner" -version = "14.0.0" +version = "15.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -38,6 +38,6 @@ unicode_expressions = [] [dependencies] arrow-schema = "28.0.0" -datafusion-common = { path = "../common", version = "14.0.0" } -datafusion-expr = { path = "../expr", version = "14.0.0" } +datafusion-common = { path = "../common", version = "15.0.0" } +datafusion-expr = { path = "../expr", version = "15.0.0" } sqlparser = "0.27" diff --git a/test-utils/Cargo.toml b/test-utils/Cargo.toml index 2024716bf8c5d..f4067dc48a28f 100644 --- a/test-utils/Cargo.toml +++ b/test-utils/Cargo.toml @@ -24,6 +24,6 @@ edition = "2021" [dependencies] arrow = { version = "28.0.0", features = ["prettyprint"] } -datafusion-common = { path = "../datafusion/common", version = "14.0.0" } +datafusion-common = { path = "../datafusion/common" } env_logger = "0.10.0" rand = "0.8"