From 6d00bd990ce5644181ad1549a6c70c8406219070 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 4 Nov 2022 15:29:24 -0600 Subject: [PATCH] DataFusion 14.0.0 Release Prep (#4110) * bump version * CHANGELOG --- benchmarks/Cargo.toml | 2 +- datafusion-cli/Cargo.lock | 16 +- datafusion-cli/Cargo.toml | 4 +- datafusion-examples/Cargo.toml | 2 +- datafusion/CHANGELOG.md | 334 ++++++++++++++++++++++++++++ datafusion/common/Cargo.toml | 2 +- datafusion/core/Cargo.toml | 16 +- datafusion/expr/Cargo.toml | 4 +- datafusion/jit/Cargo.toml | 6 +- datafusion/optimizer/Cargo.toml | 10 +- datafusion/physical-expr/Cargo.toml | 8 +- datafusion/proto/Cargo.toml | 8 +- datafusion/row/Cargo.toml | 6 +- datafusion/sql/Cargo.toml | 6 +- 14 files changed, 379 insertions(+), 45 deletions(-) diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index 1319bfab206a..aa2324b980af 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-benchmarks" description = "DataFusion Benchmarks" -version = "13.0.0" +version = "14.0.0" edition = "2021" authors = ["Apache Arrow "] homepage = "https://github.com/apache/arrow-datafusion" diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 4a6ab53ceb9c..e746bf97b89e 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -557,7 +557,7 @@ dependencies = [ [[package]] name = "datafusion" -version = "13.0.0" +version = "14.0.0" dependencies = [ "ahash 0.8.1", "arrow", @@ -600,7 +600,7 @@ dependencies = [ [[package]] name = "datafusion-cli" -version = "13.0.0" +version = "14.0.0" dependencies = [ "arrow", "clap", @@ -616,7 +616,7 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "13.0.0" +version = "14.0.0" dependencies = [ "arrow", "chrono", @@ -628,7 +628,7 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "13.0.0" +version = "14.0.0" dependencies = [ "ahash 0.8.1", "arrow", @@ -639,7 +639,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "13.0.0" +version = "14.0.0" dependencies = [ "arrow", "async-trait", @@ -653,7 +653,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "13.0.0" +version = "14.0.0" dependencies = [ "ahash 0.8.1", "arrow", @@ -682,7 +682,7 @@ dependencies = [ [[package]] name = "datafusion-row" -version = "13.0.0" +version = "14.0.0" dependencies = [ "arrow", "datafusion-common", @@ -692,7 +692,7 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "13.0.0" +version = "14.0.0" dependencies = [ "arrow", "datafusion-common", diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 2b9e285b32d0..5a1e4d926264 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-cli" description = "Command Line Client for DataFusion query engine." -version = "13.0.0" +version = "14.0.0" authors = ["Apache Arrow "] edition = "2021" keywords = [ "arrow", "datafusion", "query", "sql" ] @@ -31,7 +31,7 @@ readme = "README.md" [dependencies] arrow = "26.0.0" clap = { version = "3", features = ["derive", "cargo"] } -datafusion = { path = "../datafusion/core", version = "13.0.0" } +datafusion = { path = "../datafusion/core", version = "14.0.0" } dirs = "4.0.0" env_logger = "0.9" mimalloc = { version = "0.1", default-features = false } diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml index 132b7b2389f5..33cf9dd95928 100644 --- a/datafusion-examples/Cargo.toml +++ b/datafusion-examples/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-examples" description = "DataFusion usage examples" -version = "13.0.0" +version = "14.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" authors = ["Apache Arrow "] diff --git a/datafusion/CHANGELOG.md b/datafusion/CHANGELOG.md index 6cffc354f117..d617a4fb0c3e 100644 --- a/datafusion/CHANGELOG.md +++ b/datafusion/CHANGELOG.md @@ -19,6 +19,340 @@ # Changelog +## [14.0.0](https://github.com/apache/arrow-datafusion/tree/14.0.0) (2022-11-04) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/13.0.0-rc1...14.0.0) + +**Breaking changes:** + +- Improve FieldNotFound errors [\#4084](https://github.com/apache/arrow-datafusion/pull/4084) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Refactor: move `simplify_expression.rs` and `expr_simplifier.rs` to a new mod `simplify_expressions` [\#3951](https://github.com/apache/arrow-datafusion/pull/3951) ([HaoYang670](https://github.com/HaoYang670)) +- Support for non-u64 types for Window Bound [\#3916](https://github.com/apache/arrow-datafusion/pull/3916) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([mustafasrepo](https://github.com/mustafasrepo)) +- Expose parquet reader settings using normal DataFusion `ConfigOptions` [\#3822](https://github.com/apache/arrow-datafusion/pull/3822) ([alamb](https://github.com/alamb)) +- Add `Filter::try_new` with validation [\#3796](https://github.com/apache/arrow-datafusion/pull/3796) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Change public simplify API and add a public coerce API [\#3758](https://github.com/apache/arrow-datafusion/pull/3758) ([alamb](https://github.com/alamb)) + +**Implemented enhancements:** + +- Automatically register tables if ObjectStore root is configured [\#4094](https://github.com/apache/arrow-datafusion/issues/4094) +- Simplify small `InList` expressions [\#4089](https://github.com/apache/arrow-datafusion/issues/4089) +- Support `SET` command [\#4067](https://github.com/apache/arrow-datafusion/issues/4067) +- add uuid\(\) function to generate unique uuid per row [\#4045](https://github.com/apache/arrow-datafusion/issues/4045) +- Publish benchmark crate so that it can be used as a library in Ballista [\#4016](https://github.com/apache/arrow-datafusion/issues/4016) +- Add statistics methods to `TableProvider` trait for use in cost-based optimizations in the logical plan [\#3983](https://github.com/apache/arrow-datafusion/issues/3983) +- Implement `current_time` Function [\#3982](https://github.com/apache/arrow-datafusion/issues/3982) +- Implement `current_date` Function [\#3981](https://github.com/apache/arrow-datafusion/issues/3981) +- Put common code used for testing code into datafusion/test\_utils.rs [\#3960](https://github.com/apache/arrow-datafusion/issues/3960) +- Print the configurations of ConfigOptions in an ordered way so that we can directly compare the equality of two ConfigOptions by their debug strings [\#3952](https://github.com/apache/arrow-datafusion/issues/3952) +- Don't make dependants install protoc [\#3947](https://github.com/apache/arrow-datafusion/issues/3947) +- Implement right anti join and support it in HashBuildProbeOrder [\#3946](https://github.com/apache/arrow-datafusion/issues/3946) +- Implement right semi join and support it in HashBuildProbeOrder [\#3945](https://github.com/apache/arrow-datafusion/issues/3945) +- Refactor `simplify_expressions` and `expr_simplifier` [\#3934](https://github.com/apache/arrow-datafusion/issues/3934) +- Implement serialization for `ScalarValue::FixedSizeBinary` [\#3928](https://github.com/apache/arrow-datafusion/issues/3928) +- Support inlining view / dataframes logical plan [\#3913](https://github.com/apache/arrow-datafusion/issues/3913) +- Plans with tables from `TableProviderFactory`s can't be serialized [\#3906](https://github.com/apache/arrow-datafusion/issues/3906) +- Simplify `a AND a` and `a OR a`. [\#3895](https://github.com/apache/arrow-datafusion/issues/3895) +- Allow configuring statistics on TPC-H benchmarks [\#3888](https://github.com/apache/arrow-datafusion/issues/3888) +- CI checks stuck in queued mode [\#3883](https://github.com/apache/arrow-datafusion/issues/3883) +- Multiple optimizer passes [\#3879](https://github.com/apache/arrow-datafusion/issues/3879) +- datafusion-proto does not support view table scan [\#3874](https://github.com/apache/arrow-datafusion/issues/3874) +- TableProviderFactories need to be async and return a Result to be useful [\#3866](https://github.com/apache/arrow-datafusion/issues/3866) +- Factorize common AND factors out of OR predicates to support filterPushDown as possible [\#3858](https://github.com/apache/arrow-datafusion/issues/3858) +- Replace `concat_ws` with `concat` when the delimiter is empty string [\#3857](https://github.com/apache/arrow-datafusion/issues/3857) +- Concatenate contiguous literal arguments of `concat_ws` when doing the expression simplification [\#3856](https://github.com/apache/arrow-datafusion/issues/3856) +- Partition and Sort Enforcement [\#3854](https://github.com/apache/arrow-datafusion/issues/3854) +- Enable mimalloc by default in benchmarks [\#3851](https://github.com/apache/arrow-datafusion/issues/3851) +- Add collect statistics configuration [\#3847](https://github.com/apache/arrow-datafusion/issues/3847) +- \[SQL\] - Support cache/uncache table syntax [\#3842](https://github.com/apache/arrow-datafusion/issues/3842) +- Filter pushdown doesn't seem to apply for filter on TPC-H Q17 [\#3839](https://github.com/apache/arrow-datafusion/issues/3839) +- Support pushdown multi-columns in PageIndex pruning. [\#3834](https://github.com/apache/arrow-datafusion/issues/3834) +- Consolidate `Expr` manipulation code so it is more discoverable and make it easier to use [\#3808](https://github.com/apache/arrow-datafusion/issues/3808) +- Leverage input array's null buffer for regex replace to optimize sparse arrays [\#3803](https://github.com/apache/arrow-datafusion/issues/3803) +- Improve join cardinality estimation when there is no overlap in the min/max values [\#3802](https://github.com/apache/arrow-datafusion/issues/3802) +- datafusion-cli up to date check is failing on master [\#3798](https://github.com/apache/arrow-datafusion/issues/3798) +- Optimize benchmark q2 subquery filter [\#3789](https://github.com/apache/arrow-datafusion/issues/3789) +- Benchmark should infer schema when running against Parquet [\#3776](https://github.com/apache/arrow-datafusion/issues/3776) +- Allow specialized physical functions to provide hints for the array adapter [\#3762](https://github.com/apache/arrow-datafusion/issues/3762) +- \[User Guide\] Add `EXPLAIN` to SQL reference [\#3755](https://github.com/apache/arrow-datafusion/issues/3755) +- move `type coercion` for agg/agg udf [\#3752](https://github.com/apache/arrow-datafusion/issues/3752) +- Prevent Cargo.lock for datafusion-cli being out-of-date [\#3744](https://github.com/apache/arrow-datafusion/issues/3744) +- Add example of expr apis including simplification and coercion [\#3740](https://github.com/apache/arrow-datafusion/issues/3740) +- support `type coercion` for ScalarFunction expr in the logical phase [\#3731](https://github.com/apache/arrow-datafusion/issues/3731) +- Add support for DISTINCT projections in `decorrelate_where_exists` [\#3724](https://github.com/apache/arrow-datafusion/issues/3724) +- Add type coercion rule for `CONCAT` and `CONCAT_WS` [\#3720](https://github.com/apache/arrow-datafusion/issues/3720) +- Expose and document a simpler public API for simplify expressions [\#3709](https://github.com/apache/arrow-datafusion/issues/3709) +- Expose + document the type coercion API publicly [\#3708](https://github.com/apache/arrow-datafusion/issues/3708) +- Concatenate contiguous literal arguments of `CONCAT` during the expression simplification. [\#3683](https://github.com/apache/arrow-datafusion/issues/3683) +- DataFusion 13.0.0 Release [\#3671](https://github.com/apache/arrow-datafusion/issues/3671) +- Add division by `0` rules in the expression simplification [\#3663](https://github.com/apache/arrow-datafusion/issues/3663) +- Compressed CSV/JSON Read [\#3641](https://github.com/apache/arrow-datafusion/issues/3641) +- remove type coercion for agg [\#3623](https://github.com/apache/arrow-datafusion/issues/3623) +- extract or clause as predicate for join rels [\#3577](https://github.com/apache/arrow-datafusion/issues/3577) +- Improve performance of `regex_replace` [\#3518](https://github.com/apache/arrow-datafusion/issues/3518) +- Add benchmarks for parquet queries with filter pushdown enabled [\#3457](https://github.com/apache/arrow-datafusion/issues/3457) +- Make type coercion rule more robust [\#3390](https://github.com/apache/arrow-datafusion/issues/3390) +- `ViewTable::scan` ignores filters and limits [\#3249](https://github.com/apache/arrow-datafusion/issues/3249) +- Add `CREATE VIEW` documentation to user guide [\#3211](https://github.com/apache/arrow-datafusion/issues/3211) +- Push additional parquet filtering into the parquet scan \[EPIC\] [\#3147](https://github.com/apache/arrow-datafusion/issues/3147) +- Remove `core/logical_plan` module [\#2683](https://github.com/apache/arrow-datafusion/issues/2683) +- Datafusion Optimizer Enhancement [\#2255](https://github.com/apache/arrow-datafusion/issues/2255) +- \[Optimizer\] Eliminate self compare self [\#2252](https://github.com/apache/arrow-datafusion/issues/2252) +- Break datafusion crate into smaller crates [\#1750](https://github.com/apache/arrow-datafusion/issues/1750) +- Benchmark `constellation-rs/amadeus`'s parquet implementation [\#1341](https://github.com/apache/arrow-datafusion/issues/1341) +- Use `parquet2` async reader in `physical_plan/parquet` [\#1058](https://github.com/apache/arrow-datafusion/issues/1058) +- Table Scan Enhancement Plan [\#944](https://github.com/apache/arrow-datafusion/issues/944) +- Implement parquet page-level skipping with column index, using min/max stats [\#847](https://github.com/apache/arrow-datafusion/issues/847) +- Support min/max statistics in ParquetTable and ParquetExec [\#537](https://github.com/apache/arrow-datafusion/issues/537) + +**Fixed bugs:** + +- Clippy failing on master [\#4100](https://github.com/apache/arrow-datafusion/issues/4100) +- Panic when the number of partitions of the pipeline that throws the exception is inconsistent with the number of partitions output by the query [\#4096](https://github.com/apache/arrow-datafusion/issues/4096) +- FieldNotFound when field is available [\#4083](https://github.com/apache/arrow-datafusion/issues/4083) +- SingleDistinctToGroupBy being applied too broadly [\#4082](https://github.com/apache/arrow-datafusion/issues/4082) +- single\_distinct\_to\_groupby strips qualifiers from group-by expressions [\#4049](https://github.com/apache/arrow-datafusion/issues/4049) +- Another Internal error when parquet predicate pushdown is enabled "Error evaluating filter predicate: [\#4046](https://github.com/apache/arrow-datafusion/issues/4046) +- Decimal multiplied by Float produces incorrect results [\#4035](https://github.com/apache/arrow-datafusion/issues/4035) +- Cannot query external table - TableScan replaced with EmptyExec [\#4027](https://github.com/apache/arrow-datafusion/issues/4027) +- benchmark q17 produces incorrect result [\#4026](https://github.com/apache/arrow-datafusion/issues/4026) +- benchmark q14 produces incorrect result [\#4025](https://github.com/apache/arrow-datafusion/issues/4025) +- benchmark q11 producing incorrect results [\#4023](https://github.com/apache/arrow-datafusion/issues/4023) +- Internal error when parquet predicate pushdown is enabled "Error evaluating filter predicate:" [\#4006](https://github.com/apache/arrow-datafusion/issues/4006) +- Incorrect results with parquet filtering pushdown enabled [\#4005](https://github.com/apache/arrow-datafusion/issues/4005) +- Wrong results when parquet page index filtering is enabled [\#4002](https://github.com/apache/arrow-datafusion/issues/4002) +- Output schema of semi join has invalid projection added after HashBuildProbeOrder [\#4001](https://github.com/apache/arrow-datafusion/issues/4001) +- `async` deserialization functions are unintuitive and possibly insecure [\#3977](https://github.com/apache/arrow-datafusion/issues/3977) +- `Expr::to_bytes` can produce output that hits `Expr::from_bytes` recursion limit [\#3968](https://github.com/apache/arrow-datafusion/issues/3968) +- Bug on propagating arrow field metadata [\#3964](https://github.com/apache/arrow-datafusion/issues/3964) +- Predicate still has cast when comparing Timestamp\(Nano, None\) to a timestamp literal, so can't be pushed down or used for pruning [\#3938](https://github.com/apache/arrow-datafusion/issues/3938) +- Error using `IN` list on dictionary encoded data: `InList does not support datatype Dictionary(Int32, Utf8).` [\#3936](https://github.com/apache/arrow-datafusion/issues/3936) +- Internal error in CAST from Timestamp\[us\] [\#3922](https://github.com/apache/arrow-datafusion/issues/3922) +- ScalarValue not implemented for FixedSizeBinary types [\#3910](https://github.com/apache/arrow-datafusion/issues/3910) +- \[DOC\] - There are unsupported DDL in the official documentation [\#3904](https://github.com/apache/arrow-datafusion/issues/3904) +- datafusion-proto deserialize with Substring\(str \[from int\] \[for int\]\) fails [\#3901](https://github.com/apache/arrow-datafusion/issues/3901) +- `count(Literal)` gives wrong column name [\#3891](https://github.com/apache/arrow-datafusion/issues/3891) +- `projection_push_down` adds duplicate projections with multiple passes [\#3881](https://github.com/apache/arrow-datafusion/issues/3881) +- Default physical planner generates empty relation for DROP TABLE, CREATE MEMORY TABLE, etc [\#3873](https://github.com/apache/arrow-datafusion/issues/3873) +- Binary expression canonical names are incorrect in some cases [\#3865](https://github.com/apache/arrow-datafusion/issues/3865) +- Using the window function lag causes panic. [\#3830](https://github.com/apache/arrow-datafusion/issues/3830) +- chrono crate : specify 0.4.22 as the minimum version due to spurious build failures [\#3827](https://github.com/apache/arrow-datafusion/issues/3827) +- datafusion-proto deserialize with q16 sql fails [\#3820](https://github.com/apache/arrow-datafusion/issues/3820) +- Filter predicates should not be aliased [\#3795](https://github.com/apache/arrow-datafusion/issues/3795) +- Write csv not save all lines of dataframe [\#3783](https://github.com/apache/arrow-datafusion/issues/3783) +- Regression in simplifying expressions in subqueries [\#3760](https://github.com/apache/arrow-datafusion/issues/3760) +- DataFusionError\(Internal\("The size of the sorted batch is larger than the size of the input batch: 2120 \> 2312"\)\) [\#3747](https://github.com/apache/arrow-datafusion/issues/3747) +- "labeler" PR check is broken [\#3743](https://github.com/apache/arrow-datafusion/issues/3743) +- `DataFrame::select_columns` doesn't work with names containing "." [\#3733](https://github.com/apache/arrow-datafusion/issues/3733) +- TPC-H Query 1 has regressed [\#3729](https://github.com/apache/arrow-datafusion/issues/3729) +- \[RUST\]\[Datafusion\] What causes "Error: Execution\("file size of 4 is less than footer"\)" error? [\#3800](https://github.com/apache/arrow-datafusion/issues/3800) +- Field names containing periods such as f.c cannot work [\#3682](https://github.com/apache/arrow-datafusion/issues/3682) +- TableProvider implementation for DataFrame does not support filter pushdown [\#3681](https://github.com/apache/arrow-datafusion/issues/3681) +- using Decimal\(0\) make system panicked [\#3665](https://github.com/apache/arrow-datafusion/issues/3665) +- Cannot query some parquet files in S3, but they work locally [\#3633](https://github.com/apache/arrow-datafusion/issues/3633) +- ` col / col` returns `1` when `col = 0` [\#3615](https://github.com/apache/arrow-datafusion/issues/3615) +- register\_csv allow space in table\_path [\#3589](https://github.com/apache/arrow-datafusion/issues/3589) +- Hardcoded u64 for WindowFrameBound fields [\#3571](https://github.com/apache/arrow-datafusion/issues/3571) +- `docs.rs` cannot build `datafusion-proto` crate [\#3538](https://github.com/apache/arrow-datafusion/issues/3538) +- Row Hash loads whole aggregation state to memory before sending [\#3460](https://github.com/apache/arrow-datafusion/issues/3460) +- approx\_percentile\_cont return wrong result when scan multi parquet files. [\#3140](https://github.com/apache/arrow-datafusion/issues/3140) +- User guide is incorrect regarding using CLI to register CSV files using schema inference [\#3001](https://github.com/apache/arrow-datafusion/issues/3001) +- Exception: Internal error, Exception: Schema error [\#2938](https://github.com/apache/arrow-datafusion/issues/2938) +- Version 0.6.0 Panic error during SQL execution [\#2738](https://github.com/apache/arrow-datafusion/issues/2738) +- wrong result when operation parquet [\#2044](https://github.com/apache/arrow-datafusion/issues/2044) +- Local object store accepts file:/// as base path, but LocalStore returns meta without the prefix. [\#1923](https://github.com/apache/arrow-datafusion/issues/1923) +- Reading nested parquet files results in `index out of bounds` [\#1383](https://github.com/apache/arrow-datafusion/issues/1383) +- `-` \(negation\) with NULL literals does not work: can't be evaluated because the expression's type is Utf8, not signed [\#1192](https://github.com/apache/arrow-datafusion/issues/1192) +- Inconsistent cast behavior [\#957](https://github.com/apache/arrow-datafusion/issues/957) +- single\_distinct\_to\_groupby no longer drops qualifiers [\#4050](https://github.com/apache/arrow-datafusion/pull/4050) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) + +**Documentation updates:** + +- Clarify in docs that Identifiers are made lower-case in SQL query [\#2374](https://github.com/apache/arrow-datafusion/issues/2374) +- Fix broken links in contributor guide [\#3956](https://github.com/apache/arrow-datafusion/pull/3956) ([Jefffrey](https://github.com/Jefffrey)) +- add create view explanation [\#3925](https://github.com/apache/arrow-datafusion/pull/3925) ([retikulum](https://github.com/retikulum)) +- Update `datafusion-examples` README [\#3814](https://github.com/apache/arrow-datafusion/pull/3814) ([alamb](https://github.com/alamb)) +- Add Seafowl to list of projects using DataFusion [\#3792](https://github.com/apache/arrow-datafusion/pull/3792) ([mildbyte](https://github.com/mildbyte)) + +**Closed issues:** + +- \[QUESTION\] How many times should be the function `create_name` called when executing a query? [\#3900](https://github.com/apache/arrow-datafusion/issues/3900) +- Improve the `Expr` string format [\#3878](https://github.com/apache/arrow-datafusion/issues/3878) +- Simplify division by zero \(division by one / multiplication by zero / multiplication by one\) for Decimal types as well [\#3643](https://github.com/apache/arrow-datafusion/issues/3643) +- InList: merge check branch [\#2833](https://github.com/apache/arrow-datafusion/issues/2833) +- Optimization InList: compare the float data type using OrderedFloat\ [\#2831](https://github.com/apache/arrow-datafusion/issues/2831) +- Outdated section of the add function of the contribution guide [\#2560](https://github.com/apache/arrow-datafusion/issues/2560) +- Optimize InList implementation with native types rather than ScalarValue [\#2165](https://github.com/apache/arrow-datafusion/issues/2165) +- Improve testing of optimizers using EXPLAIN [\#1118](https://github.com/apache/arrow-datafusion/issues/1118) +- Crash on parsing sql query with Cyrillic letters [\#184](https://github.com/apache/arrow-datafusion/issues/184) +- \[EPIC\] Support all TPC-H queries in benchmark [\#158](https://github.com/apache/arrow-datafusion/issues/158) +- Implement optional second argument to ltrim and rtrim functions [\#144](https://github.com/apache/arrow-datafusion/issues/144) +- Benchmark crate does not have a SIMD feature [\#124](https://github.com/apache/arrow-datafusion/issues/124) +- ColumnarValue::into\_array should not require batch [\#113](https://github.com/apache/arrow-datafusion/issues/113) +- \[Rust\] Parquet data source does not support complex types [\#83](https://github.com/apache/arrow-datafusion/issues/83) + +**Merged pull requests:** + +- Appease new clippy [\#4101](https://github.com/apache/arrow-datafusion/pull/4101) ([alamb](https://github.com/alamb)) +- minor: Split parquet reader up into smaller modules [\#4099](https://github.com/apache/arrow-datafusion/pull/4099) ([alamb](https://github.com/alamb)) +- \[MINOR\] Update `SET` in cli.md [\#4098](https://github.com/apache/arrow-datafusion/pull/4098) ([waitingkuo](https://github.com/waitingkuo)) +- fix: Scheduler panic routing errors [\#4097](https://github.com/apache/arrow-datafusion/pull/4097) ([yukkit](https://github.com/yukkit)) +- Automatically register tables if ObjectStore root is configured [\#4095](https://github.com/apache/arrow-datafusion/pull/4095) ([avantgardnerio](https://github.com/avantgardnerio)) +- minor: Use Operator::swap [\#4092](https://github.com/apache/arrow-datafusion/pull/4092) ([alamb](https://github.com/alamb)) +- Simplify small InListExpr [\#4090](https://github.com/apache/arrow-datafusion/pull/4090) ([Dandandan](https://github.com/Dandandan)) +- Minor: Add arrow-rs ticket reference and turn some comments into docstrings [\#4088](https://github.com/apache/arrow-datafusion/pull/4088) ([alamb](https://github.com/alamb)) +- Support Dictionary in InListExpr [\#4070](https://github.com/apache/arrow-datafusion/pull/4070) ([tustvold](https://github.com/tustvold)) +- support `SET` variable [\#4069](https://github.com/apache/arrow-datafusion/pull/4069) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([waitingkuo](https://github.com/waitingkuo)) +- Add in list bench [\#4068](https://github.com/apache/arrow-datafusion/pull/4068) ([tustvold](https://github.com/tustvold)) +- Improve Error Handling and Readibility for downcasting `StructArray` [\#4061](https://github.com/apache/arrow-datafusion/pull/4061) ([retikulum](https://github.com/retikulum)) +- Build tests separately from running [\#4060](https://github.com/apache/arrow-datafusion/pull/4060) ([alamb](https://github.com/alamb)) +- Simplify InListExpr ~20-70% Faster [\#4057](https://github.com/apache/arrow-datafusion/pull/4057) ([tustvold](https://github.com/tustvold)) +- MINOR: Print unoptimized logical plan in execute\_query of tpch benchmark [\#4056](https://github.com/apache/arrow-datafusion/pull/4056) ([viirya](https://github.com/viirya)) +- Minor: clean the code in `eliminate_filter` [\#4055](https://github.com/apache/arrow-datafusion/pull/4055) ([HaoYang670](https://github.com/HaoYang670)) +- Implement `current_time` scalar function [\#4054](https://github.com/apache/arrow-datafusion/pull/4054) ([naosense](https://github.com/naosense)) +- Cleanup hash\_utils adding support for decimal256 and f16 [\#4053](https://github.com/apache/arrow-datafusion/pull/4053) ([tustvold](https://github.com/tustvold)) +- Fix multicolumn parquet predicate pushdown \(\#4046\) [\#4048](https://github.com/apache/arrow-datafusion/pull/4048) ([tustvold](https://github.com/tustvold)) +- Add CI checks that we can serde all benchmark queries [\#4047](https://github.com/apache/arrow-datafusion/pull/4047) ([andygrove](https://github.com/andygrove)) +- Enable more benchmark verification tests [\#4044](https://github.com/apache/arrow-datafusion/pull/4044) ([andygrove](https://github.com/andygrove)) +- Extract common parquet testing code to `parquet-test-util` crate [\#4042](https://github.com/apache/arrow-datafusion/pull/4042) ([alamb](https://github.com/alamb)) +- add uuid\(\) function [\#4041](https://github.com/apache/arrow-datafusion/pull/4041) ([Jimexist](https://github.com/Jimexist)) +- Update to arrow 26, change timezones [\#4039](https://github.com/apache/arrow-datafusion/pull/4039) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) +- Fix Decimal and Floating type coerce rule [\#4038](https://github.com/apache/arrow-datafusion/pull/4038) ([viirya](https://github.com/viirya)) +- Reserve the literal expression of `Count` function [\#4031](https://github.com/apache/arrow-datafusion/pull/4031) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) +- Implement current\_date scalar function [\#4022](https://github.com/apache/arrow-datafusion/pull/4022) ([comphead](https://github.com/comphead)) +- Fix predicate pushdown bugs: project columns within DatafusionArrowPredicate \(\#4005\) \(\#4006\) [\#4021](https://github.com/apache/arrow-datafusion/pull/4021) ([tustvold](https://github.com/tustvold)) +- minor: remove redundant code/TODO [\#4019](https://github.com/apache/arrow-datafusion/pull/4019) ([jackwener](https://github.com/jackwener)) +- Add CI check to verify that benchmark queries return the expected results [\#4015](https://github.com/apache/arrow-datafusion/pull/4015) ([andygrove](https://github.com/andygrove)) +- Minor: Add TODO and tracking ticket reference [\#4012](https://github.com/apache/arrow-datafusion/pull/4012) ([alamb](https://github.com/alamb)) +- Add right anti join support and support it in HashBuildProbeOrder [\#4011](https://github.com/apache/arrow-datafusion/pull/4011) ([Dandandan](https://github.com/Dandandan)) +- MINOR: Generate expected benchmark query results [\#4010](https://github.com/apache/arrow-datafusion/pull/4010) ([andygrove](https://github.com/andygrove)) +- Minor: remove unecessary clippy allow [\#4008](https://github.com/apache/arrow-datafusion/pull/4008) ([alamb](https://github.com/alamb)) +- Minor: Do what clippy says and clean up some code [\#4007](https://github.com/apache/arrow-datafusion/pull/4007) ([alamb](https://github.com/alamb)) +- Improve Error Handling and Readibility for downcasting `Date32Array` [\#4004](https://github.com/apache/arrow-datafusion/pull/4004) ([retikulum](https://github.com/retikulum)) +- Don't add projection for semi joins in HashBuildProbeOrder [\#4000](https://github.com/apache/arrow-datafusion/pull/4000) ([Dandandan](https://github.com/Dandandan)) +- Minor: use `DataType::is_nested` [\#3995](https://github.com/apache/arrow-datafusion/pull/3995) ([alamb](https://github.com/alamb)) +- \[minor\] bump prettier version [\#3992](https://github.com/apache/arrow-datafusion/pull/3992) ([Jimexist](https://github.com/Jimexist)) +- Add parquet predicate pushdown metrics [\#3989](https://github.com/apache/arrow-datafusion/pull/3989) ([alamb](https://github.com/alamb)) +- Pin datafusion-proto build dependencies [\#3987](https://github.com/apache/arrow-datafusion/pull/3987) ([tustvold](https://github.com/tustvold)) +- Add TableProvider.statistics method [\#3986](https://github.com/apache/arrow-datafusion/pull/3986) ([andygrove](https://github.com/andygrove)) +- Add Pull Request guidelines to contributor guide [\#3985](https://github.com/apache/arrow-datafusion/pull/3985) ([alamb](https://github.com/alamb)) +- Update protos [\#3979](https://github.com/apache/arrow-datafusion/pull/3979) ([tustvold](https://github.com/tustvold)) +- Revert async changes but keep deltalake working [\#3978](https://github.com/apache/arrow-datafusion/pull/3978) ([avantgardnerio](https://github.com/avantgardnerio)) +- Correctness integration test for parquet filter pushdown [\#3976](https://github.com/apache/arrow-datafusion/pull/3976) ([alamb](https://github.com/alamb)) +- MINOR: Stop pretty printing batches in benchmark when there are no results [\#3974](https://github.com/apache/arrow-datafusion/pull/3974) ([andygrove](https://github.com/andygrove)) +- MINOR: Re-export Cast struct [\#3971](https://github.com/apache/arrow-datafusion/pull/3971) ([andygrove](https://github.com/andygrove)) +- fix: check recursion limit in `Expr::to_bytes` [\#3970](https://github.com/apache/arrow-datafusion/pull/3970) ([crepererum](https://github.com/crepererum)) +- \[Part1\] Partition and Sort Enforcement, PhysicalExpr enhancement [\#3969](https://github.com/apache/arrow-datafusion/pull/3969) ([mingmwang](https://github.com/mingmwang)) +- Support pushdown multi-columns in PageIndex pruning. [\#3967](https://github.com/apache/arrow-datafusion/pull/3967) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Fix benchmarks README formatting [\#3966](https://github.com/apache/arrow-datafusion/pull/3966) ([Jefffrey](https://github.com/Jefffrey)) +- Bug fix on DFField to Field conversion: preserve metadata [\#3965](https://github.com/apache/arrow-datafusion/pull/3965) ([metesynnada](https://github.com/metesynnada)) +- Informative Error Message for LAG and LEAD functions [\#3963](https://github.com/apache/arrow-datafusion/pull/3963) ([mustafasrepo](https://github.com/mustafasrepo)) +- Minor: Add some docstrings to `FileScanConfig` and `RuntimeEnv` [\#3962](https://github.com/apache/arrow-datafusion/pull/3962) ([alamb](https://github.com/alamb)) +- Move common code used for testing code into datafusion/test\_utils [\#3961](https://github.com/apache/arrow-datafusion/pull/3961) ([alamb](https://github.com/alamb)) +- Update minimum chrono dependency to 0.4.22 [\#3959](https://github.com/apache/arrow-datafusion/pull/3959) ([alamb](https://github.com/alamb)) +- Implement right semi join and support in HashBuildProbeorder [\#3958](https://github.com/apache/arrow-datafusion/pull/3958) ([Dandandan](https://github.com/Dandandan)) +- Print the configurations of ConfigOptions in an ordered way so that we can directly compare the equality of two ConfigOptions by their debug strings [\#3953](https://github.com/apache/arrow-datafusion/pull/3953) ([yahoNanJing](https://github.com/yahoNanJing)) +- Vendor Generated Protobuf Code \(\#3947\) [\#3950](https://github.com/apache/arrow-datafusion/pull/3950) ([tustvold](https://github.com/tustvold)) +- Implement serialization for ScalarValue::FixedSizeBinary [\#3943](https://github.com/apache/arrow-datafusion/pull/3943) ([retikulum](https://github.com/retikulum)) +- Consolidate physical join code into `datafusion/core/src/physical_plan/joins` [\#3942](https://github.com/apache/arrow-datafusion/pull/3942) ([alamb](https://github.com/alamb)) +- Add optimizer test for simplifying predicates on timestamps [\#3939](https://github.com/apache/arrow-datafusion/pull/3939) ([alamb](https://github.com/alamb)) +- Add test for querying predicate on dictionary [\#3937](https://github.com/apache/arrow-datafusion/pull/3937) ([alamb](https://github.com/alamb)) +- fix: return error for unsupported SQL [\#3933](https://github.com/apache/arrow-datafusion/pull/3933) ([Kikkon](https://github.com/Kikkon)) +- doc: fix doc about `CREATE TABLE IF NOT EXISTS` [\#3932](https://github.com/apache/arrow-datafusion/pull/3932) ([jackwener](https://github.com/jackwener)) +- Refactor Expr::Cast to use a struct. [\#3931](https://github.com/apache/arrow-datafusion/pull/3931) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jackwener](https://github.com/jackwener)) +- minor: fix some typo. [\#3930](https://github.com/apache/arrow-datafusion/pull/3930) ([jackwener](https://github.com/jackwener)) +- chore: update cranelift-related dependencies [\#3926](https://github.com/apache/arrow-datafusion/pull/3926) ([xudong963](https://github.com/xudong963)) +- Change cast error from Internal to NotImplemented [\#3924](https://github.com/apache/arrow-datafusion/pull/3924) ([alamb](https://github.com/alamb)) +- Support inlining view / dataframes logical plan [\#3923](https://github.com/apache/arrow-datafusion/pull/3923) ([Dandandan](https://github.com/Dandandan)) +- Add test for Simplify redundant predicates [\#3915](https://github.com/apache/arrow-datafusion/pull/3915) ([src255](https://github.com/src255)) +- Implement ScalarValue for FixedSizeBinary [\#3911](https://github.com/apache/arrow-datafusion/pull/3911) ([maxburke](https://github.com/maxburke)) +- Add serde for plans with tables from `TableProviderFactory`s [\#3907](https://github.com/apache/arrow-datafusion/pull/3907) ([avantgardnerio](https://github.com/avantgardnerio)) +- Support filter/limit pushdown for views/dataframes [\#3905](https://github.com/apache/arrow-datafusion/pull/3905) ([Dandandan](https://github.com/Dandandan)) +- Factorize common AND factors out of OR predicates to support filterPu… [\#3903](https://github.com/apache/arrow-datafusion/pull/3903) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Add `Substring(str [from int] [for int])` support in `datafusion-proto` [\#3902](https://github.com/apache/arrow-datafusion/pull/3902) ([r4ntix](https://github.com/r4ntix)) +- Revert "Factorize common AND factors out of OR predicates to supportfilter Pu… \(\#3859\)" [\#3897](https://github.com/apache/arrow-datafusion/pull/3897) ([alamb](https://github.com/alamb)) +- MINOR: Add notes on Apache Reporter [\#3893](https://github.com/apache/arrow-datafusion/pull/3893) ([andygrove](https://github.com/andygrove)) +- Allow configuring collection of statistics during TPC-H benchmarks [\#3889](https://github.com/apache/arrow-datafusion/pull/3889) ([isidentical](https://github.com/isidentical)) +- Improve formatting of binary expressions [\#3884](https://github.com/apache/arrow-datafusion/pull/3884) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Multiple optimizer passes [\#3880](https://github.com/apache/arrow-datafusion/pull/3880) ([andygrove](https://github.com/andygrove)) +- \[MINOR\] Update docs with newly added configuration values [\#3877](https://github.com/apache/arrow-datafusion/pull/3877) ([alamb](https://github.com/alamb)) +- \[MINOR\] Add a hint about how to resolve the `Cargo.lock` CI check [\#3876](https://github.com/apache/arrow-datafusion/pull/3876) ([alamb](https://github.com/alamb)) +- Add `LogicalPlan::ViewTable` support in `datafusion-proto` [\#3875](https://github.com/apache/arrow-datafusion/pull/3875) ([r4ntix](https://github.com/r4ntix)) +- Optimize the `concat_ws` function [\#3869](https://github.com/apache/arrow-datafusion/pull/3869) ([HaoYang670](https://github.com/HaoYang670)) +- Implement foundational filter selectivity analysis [\#3868](https://github.com/apache/arrow-datafusion/pull/3868) ([isidentical](https://github.com/isidentical)) +- Update `TableProviderFactory` trait to support real-world use-cases [\#3867](https://github.com/apache/arrow-datafusion/pull/3867) ([avantgardnerio](https://github.com/avantgardnerio)) +- put subquery's equal clause into join on clauses instead of filter cl… [\#3862](https://github.com/apache/arrow-datafusion/pull/3862) ([AssHero](https://github.com/AssHero)) +- Factorize common AND factors out of OR predicates to support filterPu… [\#3859](https://github.com/apache/arrow-datafusion/pull/3859) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Enable mimalloc by default in benchmark [\#3853](https://github.com/apache/arrow-datafusion/pull/3853) ([Dandandan](https://github.com/Dandandan)) +- Refactor `Expr::Between` to use a struct [\#3850](https://github.com/apache/arrow-datafusion/pull/3850) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([b41sh](https://github.com/b41sh)) +- Handle cardinality estimation for disjoint inner and outer joins [\#3848](https://github.com/apache/arrow-datafusion/pull/3848) ([isidentical](https://github.com/isidentical)) +- Add setting for statistics collection [\#3846](https://github.com/apache/arrow-datafusion/pull/3846) ([Dandandan](https://github.com/Dandandan)) +- Update to arrow 25.0.0 [\#3844](https://github.com/apache/arrow-datafusion/pull/3844) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([tustvold](https://github.com/tustvold)) +- Tweak list of optimization rules [\#3841](https://github.com/apache/arrow-datafusion/pull/3841) ([Dandandan](https://github.com/Dandandan)) +- Refactor Expr::GetIndexedField to use a struct [\#3838](https://github.com/apache/arrow-datafusion/pull/3838) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([ygf11](https://github.com/ygf11)) +- Infer the count of maximum distinct values from min/max [\#3837](https://github.com/apache/arrow-datafusion/pull/3837) ([isidentical](https://github.com/isidentical)) +- Refactor `Expr::Like`, `Expr::ILike`, `Expr::SimilarTo` to use a struct [\#3836](https://github.com/apache/arrow-datafusion/pull/3836) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([b41sh](https://github.com/b41sh)) +- Refactor Expr::BinaryExpr to use a struct [\#3835](https://github.com/apache/arrow-datafusion/pull/3835) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([zhoudongyan](https://github.com/zhoudongyan)) +- update postgres version to 15 in integration test [\#3831](https://github.com/apache/arrow-datafusion/pull/3831) ([Jimexist](https://github.com/Jimexist)) +- Fix the panic when lpad/rpad parameter is negative [\#3829](https://github.com/apache/arrow-datafusion/pull/3829) ([ZuoTiJia](https://github.com/ZuoTiJia)) +- MINOR: Document SHOW ALL in the users guide [\#3826](https://github.com/apache/arrow-datafusion/pull/3826) ([alamb](https://github.com/alamb)) +- MINOR: Add datafusion-cli documentation on showing configuration [\#3825](https://github.com/apache/arrow-datafusion/pull/3825) ([alamb](https://github.com/alamb)) +- Add/Remove Division Rules [\#3824](https://github.com/apache/arrow-datafusion/pull/3824) ([retikulum](https://github.com/retikulum)) +- Minor: Sort the output of SHOW ALL by config name [\#3823](https://github.com/apache/arrow-datafusion/pull/3823) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([alamb](https://github.com/alamb)) +- Add `precision != 0` check when making decimal type [\#3818](https://github.com/apache/arrow-datafusion/pull/3818) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([HaoYang670](https://github.com/HaoYang670)) +- Infer schema when running benchmarks against parquet [\#3817](https://github.com/apache/arrow-datafusion/pull/3817) ([andygrove](https://github.com/andygrove)) +- Finish removing deprecated `datafusion::logical_plan` module [\#3816](https://github.com/apache/arrow-datafusion/pull/3816) ([andygrove](https://github.com/andygrove)) +- Clarify initial example with respect to capitalization [\#3815](https://github.com/apache/arrow-datafusion/pull/3815) ([alamb](https://github.com/alamb)) +- Improve expression simplification by running it twice [\#3811](https://github.com/apache/arrow-datafusion/pull/3811) ([alamb](https://github.com/alamb)) +- Make expression manipulation consistent and easier to use: `combine/split filter` `conjunction`, etc [\#3810](https://github.com/apache/arrow-datafusion/pull/3810) ([alamb](https://github.com/alamb)) +- Consolidate expression manipulation functions into `datafusion_optimizer` [\#3809](https://github.com/apache/arrow-datafusion/pull/3809) ([alamb](https://github.com/alamb)) +- Optimize `regexp_replace` when the input is a sparse array [\#3804](https://github.com/apache/arrow-datafusion/pull/3804) ([isidentical](https://github.com/isidentical)) +- Stop ignoring errors when writing DataFrame to csv, parquet, json [\#3801](https://github.com/apache/arrow-datafusion/pull/3801) ([andygrove](https://github.com/andygrove)) +- Update datafusion-cli Cargo.lock to fix CI check on master [\#3799](https://github.com/apache/arrow-datafusion/pull/3799) ([alamb](https://github.com/alamb)) +- MINOR: Benchmark regression tests [\#3790](https://github.com/apache/arrow-datafusion/pull/3790) ([andygrove](https://github.com/andygrove)) +- MINOR: Optimizer example and docs, deprecate `Expr::name` [\#3788](https://github.com/apache/arrow-datafusion/pull/3788) ([andygrove](https://github.com/andygrove)) +- Join cardinality computation for cost-based nested join optimizations [\#3787](https://github.com/apache/arrow-datafusion/pull/3787) ([isidentical](https://github.com/isidentical)) +- Optimizer now simplifies multiplication, division, module arg is a literal Decimal zero or one [\#3782](https://github.com/apache/arrow-datafusion/pull/3782) ([drrtuy](https://github.com/drrtuy)) +- Implement parquet page-level skipping with column index, using min/ma… [\#3780](https://github.com/apache/arrow-datafusion/pull/3780) ([Ted-Jiang](https://github.com/Ted-Jiang)) +- Bump actions/labeler from 4.0.1 to 4.0.2 [\#3779](https://github.com/apache/arrow-datafusion/pull/3779) ([dependabot[bot]](https://github.com/apps/dependabot)) +- MINOR: correct `ListingOptions.try_new` docs to include the enabled stat collection [\#3775](https://github.com/apache/arrow-datafusion/pull/3775) ([isidentical](https://github.com/isidentical)) +- Teach a negative NULL expression to return NULL instead of an error [\#3771](https://github.com/apache/arrow-datafusion/pull/3771) ([drrtuy](https://github.com/drrtuy)) +- Add benchmarks for testing row filtering [\#3769](https://github.com/apache/arrow-datafusion/pull/3769) ([thinkharderdev](https://github.com/thinkharderdev)) +- move type coercion of agg and agg\_udaf to logical phase [\#3768](https://github.com/apache/arrow-datafusion/pull/3768) ([liukun4515](https://github.com/liukun4515)) +- User Guide: Add `EXPLAIN` to SQL reference [\#3767](https://github.com/apache/arrow-datafusion/pull/3767) ([unvalley](https://github.com/unvalley)) +- Allow specialized implementations to produce hints for the array adapter [\#3765](https://github.com/apache/arrow-datafusion/pull/3765) ([isidentical](https://github.com/isidentical)) +- Fix optimizer regression with simplifying expressions in subquery filters [\#3764](https://github.com/apache/arrow-datafusion/pull/3764) ([andygrove](https://github.com/andygrove)) +- Run all `datafusion-examples` in CI tests [\#3761](https://github.com/apache/arrow-datafusion/pull/3761) ([alamb](https://github.com/alamb)) +- MINOR: Remove deprecated module `datafusion::logical_plan::plan` [\#3759](https://github.com/apache/arrow-datafusion/pull/3759) ([andygrove](https://github.com/andygrove)) +- Refactor `Expr::Case` to use a struct [\#3757](https://github.com/apache/arrow-datafusion/pull/3757) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([andygrove](https://github.com/andygrove)) +- Do not run labeler CI check if it would fail due to permissions [\#3756](https://github.com/apache/arrow-datafusion/pull/3756) ([alamb](https://github.com/alamb)) +- MINOR: Improvements to `scalar_subquery_to_join` error handling [\#3754](https://github.com/apache/arrow-datafusion/pull/3754) ([andygrove](https://github.com/andygrove)) +- Always track the final size of the in-mem sorted arrays [\#3753](https://github.com/apache/arrow-datafusion/pull/3753) ([isidentical](https://github.com/isidentical)) +- Fix DataFrame::select\_columns to handle column names with a period [\#3751](https://github.com/apache/arrow-datafusion/pull/3751) ([zhoudongyan](https://github.com/zhoudongyan)) +- Fix `ListingTableUrl` to decode percent [\#3750](https://github.com/apache/arrow-datafusion/pull/3750) ([unvalley](https://github.com/unvalley)) +- remove `type coercion` for physical ScalarFunction [\#3749](https://github.com/apache/arrow-datafusion/pull/3749) ([liukun4515](https://github.com/liukun4515)) +- CI: Add a new run to check whether `datafusion-cli` lock file is up-to-date [\#3745](https://github.com/apache/arrow-datafusion/pull/3745) ([isidentical](https://github.com/isidentical)) +- Add datafusion example of expression apis [\#3741](https://github.com/apache/arrow-datafusion/pull/3741) ([alamb](https://github.com/alamb)) +- fix subquery where exists distinct [\#3732](https://github.com/apache/arrow-datafusion/pull/3732) ([b41sh](https://github.com/b41sh)) +- Remove some uneeded code in `CommonSubexprEliminate` [\#3730](https://github.com/apache/arrow-datafusion/pull/3730) ([alamb](https://github.com/alamb)) +- Consolidate and better tests for expression re-rewriting / aliasing [\#3727](https://github.com/apache/arrow-datafusion/pull/3727) ([alamb](https://github.com/alamb)) +- Fix output schema generated by CommonSubExprEliminate [\#3726](https://github.com/apache/arrow-datafusion/pull/3726) ([alex-natzka](https://github.com/alex-natzka)) +- Add type coercion rule for `concat` and `concat_ws` [\#3721](https://github.com/apache/arrow-datafusion/pull/3721) ([HaoYang670](https://github.com/HaoYang670)) +- Expose and document a simpler public API for simplify expressions [\#3719](https://github.com/apache/arrow-datafusion/pull/3719) ([ygf11](https://github.com/ygf11)) +- Remove dead code in `UnwrapCastExprRewriter` that may mask errors [\#3703](https://github.com/apache/arrow-datafusion/pull/3703) ([alamb](https://github.com/alamb)) +- Fix `DataFrame::with_column` to handle creating column names with a period [\#3700](https://github.com/apache/arrow-datafusion/pull/3700) ([alamb](https://github.com/alamb)) +- Add simplification rules for the `CONCAT` function [\#3684](https://github.com/apache/arrow-datafusion/pull/3684) ([HaoYang670](https://github.com/HaoYang670)) +- Compressed CSV/JSON support [\#3642](https://github.com/apache/arrow-datafusion/pull/3642) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([Licht-T](https://github.com/Licht-T)) +- Simplify serialization by removing redundant `PrimitiveScalarValue` [\#3612](https://github.com/apache/arrow-datafusion/pull/3612) ([alamb](https://github.com/alamb)) +- Pushdown single column predicates from ON join clauses [\#3578](https://github.com/apache/arrow-datafusion/pull/3578) ([AssHero](https://github.com/AssHero)) +- Simplify the serialization of `ScalarValue::List` [\#3547](https://github.com/apache/arrow-datafusion/pull/3547) ([alamb](https://github.com/alamb)) +- Generate hash aggregation output in smaller record batches [\#3461](https://github.com/apache/arrow-datafusion/pull/3461) ([milenkovicm](https://github.com/milenkovicm)) +- Improve doc on lowercase treatment of columns on SQL [\#3385](https://github.com/apache/arrow-datafusion/pull/3385) ([nanicpc](https://github.com/nanicpc)) + +## [13.0.0-rc1](https://github.com/apache/arrow-datafusion/tree/13.0.0-rc1) (2022-10-07) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/13.0.0...13.0.0-rc1) + + ## [13.0.0](https://github.com/apache/arrow-datafusion/tree/13.0.0) (2022-10-06) [Full Changelog](https://github.com/apache/arrow-datafusion/compare/12.0.0...13.0.0) diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index b4f6bea85ba7..d62aa1dd1a08 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-common" description = "Common functionality for DataFusion query engine" -version = "13.0.0" +version = "14.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index 88f05cca43f2..46a2fb3c47b8 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion" description = "DataFusion is an in-memory query engine that uses Apache Arrow as the memory model" -version = "13.0.0" +version = "14.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "../../README.md" @@ -62,13 +62,13 @@ async-trait = "0.1.41" bytes = "1.1" bzip2 = "0.4.3" chrono = { version = "0.4.22", default-features = false } -datafusion-common = { path = "../common", version = "13.0.0", features = ["parquet", "object_store"] } -datafusion-expr = { path = "../expr", version = "13.0.0" } -datafusion-jit = { path = "../jit", version = "13.0.0", optional = true } -datafusion-optimizer = { path = "../optimizer", version = "13.0.0" } -datafusion-physical-expr = { path = "../physical-expr", version = "13.0.0" } -datafusion-row = { path = "../row", version = "13.0.0" } -datafusion-sql = { path = "../sql", version = "13.0.0" } +datafusion-common = { path = "../common", version = "14.0.0", features = ["parquet", "object_store"] } +datafusion-expr = { path = "../expr", version = "14.0.0" } +datafusion-jit = { path = "../jit", version = "14.0.0", optional = true } +datafusion-optimizer = { path = "../optimizer", version = "14.0.0" } +datafusion-physical-expr = { path = "../physical-expr", version = "14.0.0" } +datafusion-row = { path = "../row", version = "14.0.0" } +datafusion-sql = { path = "../sql", version = "14.0.0" } flate2 = "1.0.24" futures = "0.3" glob = "0.3.0" diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml index b3a3ad234a59..db4afdc2cfd1 100644 --- a/datafusion/expr/Cargo.toml +++ b/datafusion/expr/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-expr" description = "Logical plan and expression representation for DataFusion query engine" -version = "13.0.0" +version = "14.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -37,6 +37,6 @@ path = "src/lib.rs" [dependencies] ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] } arrow = { version = "26.0.0", default-features = false } -datafusion-common = { path = "../common", version = "13.0.0" } +datafusion-common = { path = "../common", version = "14.0.0" } log = "^0.4" sqlparser = "0.26" diff --git a/datafusion/jit/Cargo.toml b/datafusion/jit/Cargo.toml index e9ad22323c5d..31aff38ae8a0 100644 --- a/datafusion/jit/Cargo.toml +++ b/datafusion/jit/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-jit" description = "Just In Time (JIT) compilation support for DataFusion query engine" -version = "13.0.0" +version = "14.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -41,7 +41,7 @@ cranelift = "0.89.0" cranelift-jit = "0.89.0" cranelift-module = "0.89.0" cranelift-native = "0.89.0" -datafusion-common = { path = "../common", version = "13.0.0", features = ["jit"] } -datafusion-expr = { path = "../expr", version = "13.0.0" } +datafusion-common = { path = "../common", version = "14.0.0", features = ["jit"] } +datafusion-expr = { path = "../expr", version = "14.0.0" } parking_lot = "0.12" diff --git a/datafusion/optimizer/Cargo.toml b/datafusion/optimizer/Cargo.toml index 343cf9d7060b..441793d7ee45 100644 --- a/datafusion/optimizer/Cargo.toml +++ b/datafusion/optimizer/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-optimizer" description = "DataFusion Query Optimizer" -version = "13.0.0" +version = "14.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -40,13 +40,13 @@ unicode_expressions = [] arrow = { version = "26.0.0", features = ["prettyprint"] } async-trait = "0.1.41" chrono = { version = "0.4.22", default-features = false } -datafusion-common = { path = "../common", version = "13.0.0" } -datafusion-expr = { path = "../expr", version = "13.0.0" } -datafusion-physical-expr = { path = "../physical-expr", version = "13.0.0" } +datafusion-common = { path = "../common", version = "14.0.0" } +datafusion-expr = { path = "../expr", version = "14.0.0" } +datafusion-physical-expr = { path = "../physical-expr", version = "14.0.0" } hashbrown = { version = "0.12", features = ["raw"] } log = "^0.4" [dev-dependencies] ctor = "0.1.22" -datafusion-sql = { path = "../sql", version = "13.0.0" } +datafusion-sql = { path = "../sql", version = "14.0.0" } env_logger = "0.9.0" diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml index d3e2c7a70b21..072defd679e5 100644 --- a/datafusion/physical-expr/Cargo.toml +++ b/datafusion/physical-expr/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-physical-expr" description = "Physical expression implementation for DataFusion query engine" -version = "13.0.0" +version = "14.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -46,9 +46,9 @@ arrow-schema = "26.0.0" blake2 = { version = "^0.10.2", optional = true } blake3 = { version = "1.0", optional = true } chrono = { version = "0.4.22", default-features = false } -datafusion-common = { path = "../common", version = "13.0.0" } -datafusion-expr = { path = "../expr", version = "13.0.0" } -datafusion-row = { path = "../row", version = "13.0.0" } +datafusion-common = { path = "../common", version = "14.0.0" } +datafusion-expr = { path = "../expr", version = "14.0.0" } +datafusion-row = { path = "../row", version = "14.0.0" } half = { version = "2.1", default-features = false } hashbrown = { version = "0.12", features = ["raw"] } itertools = { version = "0.10", features = ["use_std"] } diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml index 823001c20cce..f1d000710064 100644 --- a/datafusion/proto/Cargo.toml +++ b/datafusion/proto/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-proto" description = "Protobuf serialization of DataFusion logical plan expressions" -version = "13.0.0" +version = "14.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -41,9 +41,9 @@ json = ["pbjson", "serde", "serde_json"] [dependencies] arrow = "26.0.0" -datafusion = { path = "../core", version = "13.0.0" } -datafusion-common = { path = "../common", version = "13.0.0" } -datafusion-expr = { path = "../expr", version = "13.0.0" } +datafusion = { path = "../core", version = "14.0.0" } +datafusion-common = { path = "../common", version = "14.0.0" } +datafusion-expr = { path = "../expr", version = "14.0.0" } pbjson = { version = "0.5", optional = true } pbjson-types = { version = "0.5", optional = true } prost = "0.11.0" diff --git a/datafusion/row/Cargo.toml b/datafusion/row/Cargo.toml index 794882025a38..1e3e7b757ff8 100644 --- a/datafusion/row/Cargo.toml +++ b/datafusion/row/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-row" description = "Row backed by raw bytes for DataFusion query engine" -version = "13.0.0" +version = "14.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -38,7 +38,7 @@ jit = ["datafusion-jit"] [dependencies] arrow = "26.0.0" -datafusion-common = { path = "../common", version = "13.0.0" } -datafusion-jit = { path = "../jit", version = "13.0.0", optional = true } +datafusion-common = { path = "../common", version = "14.0.0" } +datafusion-jit = { path = "../jit", version = "14.0.0", optional = true } paste = "^1.0" rand = "0.8" diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml index ebffcb8a0b69..8939dc8e8ad5 100644 --- a/datafusion/sql/Cargo.toml +++ b/datafusion/sql/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-sql" description = "DataFusion SQL Query Planner" -version = "13.0.0" +version = "14.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "README.md" @@ -38,6 +38,6 @@ unicode_expressions = [] [dependencies] arrow = { version = "26.0.0", default-features = false } -datafusion-common = { path = "../common", version = "13.0.0" } -datafusion-expr = { path = "../expr", version = "13.0.0" } +datafusion-common = { path = "../common", version = "14.0.0" } +datafusion-expr = { path = "../expr", version = "14.0.0" } sqlparser = "0.26"