From 2517bbb9f1b4d355235c102eb6b6fd669734c43b Mon Sep 17 00:00:00 2001 From: Qingping Hou Date: Fri, 30 Jul 2021 00:07:24 -0700 Subject: [PATCH] create changelog for datafusion and ballista release Created changelog for the following projects: datafusion 5.0.0 python 0.3.0 ballista 0.5.0 Other changes in automation: * updated dev/release/update_change_log.sh to take subproject as argument * added dev/update_ballista_versions.py to help update ballista crate versions. --- .github_changelog_generator | 6 +- ballista-examples/Cargo.toml | 2 +- ballista/CHANGELOG.md | 168 +++++++++++ ballista/rust/client/Cargo.toml | 4 +- ballista/rust/core/Cargo.toml | 2 +- ballista/rust/executor/Cargo.toml | 2 +- ballista/rust/scheduler/Cargo.toml | 2 +- datafusion/CHANGELOG.md | 303 ++++++++++++++++++++ datafusion/Cargo.toml | 2 +- dev/release/update_change_log-ballista.sh | 28 ++ dev/release/update_change_log-datafusion.sh | 28 ++ dev/release/update_change_log-python.sh | 28 ++ dev/release/update_change_log.sh | 41 ++- dev/update_arrow_deps.py | 2 +- dev/update_ballista_versions.py | 68 +++++ python/CHANGELOG.md | 70 +++++ python/Cargo.toml | 2 +- 17 files changed, 742 insertions(+), 16 deletions(-) create mode 100644 ballista/CHANGELOG.md create mode 100644 datafusion/CHANGELOG.md create mode 100755 dev/release/update_change_log-ballista.sh create mode 100755 dev/release/update_change_log-datafusion.sh create mode 100755 dev/release/update_change_log-python.sh create mode 100755 dev/update_ballista_versions.py create mode 100644 python/CHANGELOG.md diff --git a/.github_changelog_generator b/.github_changelog_generator index 49d20dcd9e5ce..6ee6508b7216f 100644 --- a/.github_changelog_generator +++ b/.github_changelog_generator @@ -21,10 +21,10 @@ # point to the old changelog in apache/arrow front-matter=For older versions, see [apache/arrow/CHANGELOG.md](https://github.com/apache/arrow/blob/master/CHANGELOG.md)\n # some issues are just documentation -add-sections={"documentation":{"prefix":"**Documentation updates:**","labels":["documentation"]}} +add-sections={"documentation":{"prefix":"**Documentation updates:**","labels":["documentation"]},"performance":{"prefix":"**Performance improvements:**","labels":["performance"]}} # uncomment to not show PRs. TBD if we shown them or not. #pull-requests=false # so that the component is shown associated with the issue -issue-line-labels=ballista,datafusion,python +issue-line-labels=sql exclude-labels=development-process,invalid -breaking_labels=api-change +breaking-labels=api change diff --git a/ballista-examples/Cargo.toml b/ballista-examples/Cargo.toml index b7d40223c4693..6b9ed171ac59b 100644 --- a/ballista-examples/Cargo.toml +++ b/ballista-examples/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "ballista-examples" description = "Ballista usage examples" -version = "0.5.0-SNAPSHOT" +version = "0.5.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" authors = ["Apache Arrow "] diff --git a/ballista/CHANGELOG.md b/ballista/CHANGELOG.md new file mode 100644 index 0000000000000..2da49e0791a9a --- /dev/null +++ b/ballista/CHANGELOG.md @@ -0,0 +1,168 @@ + + +For older versions, see [apache/arrow/CHANGELOG.md](https://github.com/apache/arrow/blob/master/CHANGELOG.md) + +# Changelog + +## [ballista-0.5.0](https://github.com/apache/arrow-datafusion/tree/ballista-0.5.0) (2021-07-31) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/4.0.0...ballista-0.5.0) + +**Breaking changes:** + +- Box ScalarValue:Lists, reduce size by half size [\#788](https://github.com/apache/arrow-datafusion/pull/788) ([alamb](https://github.com/alamb)) +- Support DataFrame.collect for Ballista DataFrames [\#785](https://github.com/apache/arrow-datafusion/pull/785) ([andygrove](https://github.com/andygrove)) +- JOIN conditions are order dependent [\#778](https://github.com/apache/arrow-datafusion/pull/778) ([seddonm1](https://github.com/seddonm1)) +- UnresolvedShuffleExec should represent a single shuffle [\#727](https://github.com/apache/arrow-datafusion/pull/727) ([andygrove](https://github.com/andygrove)) +- Ballista: Make shuffle partitions configurable in benchmarks [\#702](https://github.com/apache/arrow-datafusion/pull/702) ([andygrove](https://github.com/andygrove)) +- Rename MergeExec to CoalescePartitionsExec [\#635](https://github.com/apache/arrow-datafusion/pull/635) ([andygrove](https://github.com/andygrove)) +- Ballista: Rename QueryStageExec to ShuffleWriterExec [\#633](https://github.com/apache/arrow-datafusion/pull/633) ([andygrove](https://github.com/andygrove)) +- fix 593, reduce cloning by taking ownership in logical planner's `from` fn [\#610](https://github.com/apache/arrow-datafusion/pull/610) ([Jimexist](https://github.com/Jimexist)) +- fix join column handling logic for `On` and `Using` constraints [\#605](https://github.com/apache/arrow-datafusion/pull/605) ([houqp](https://github.com/houqp)) +- Move ballista standalone mode to client [\#589](https://github.com/apache/arrow-datafusion/pull/589) ([edrevo](https://github.com/edrevo)) +- Ballista: Implement map-side shuffle [\#543](https://github.com/apache/arrow-datafusion/pull/543) ([andygrove](https://github.com/andygrove)) +- ShuffleReaderExec now supports multiple locations per partition [\#541](https://github.com/apache/arrow-datafusion/pull/541) ([andygrove](https://github.com/andygrove)) +- Make external hostname in executor optional [\#232](https://github.com/apache/arrow-datafusion/pull/232) ([edrevo](https://github.com/edrevo)) +- Remove namespace from executors [\#75](https://github.com/apache/arrow-datafusion/pull/75) ([edrevo](https://github.com/edrevo)) +- Support qualified columns in queries [\#55](https://github.com/apache/arrow-datafusion/pull/55) ([houqp](https://github.com/houqp)) +- Read CSV format text from stdin or memory [\#54](https://github.com/apache/arrow-datafusion/pull/54) ([heymind](https://github.com/heymind)) +- Remove Ballista DataFrame [\#48](https://github.com/apache/arrow-datafusion/pull/48) ([andygrove](https://github.com/andygrove)) +- Use atomics for SQLMetric implementation, remove unused name field [\#25](https://github.com/apache/arrow-datafusion/pull/25) ([returnString](https://github.com/returnString)) + +**Implemented enhancements:** + +- Support DataFrame.collect for Ballista DataFrames [\#787](https://github.com/apache/arrow-datafusion/issues/787) +- Ballista: Prep for supporting shuffle correctly, part one [\#736](https://github.com/apache/arrow-datafusion/issues/736) +- Ballista: Implement physical plan serde for ShuffleWriterExec [\#710](https://github.com/apache/arrow-datafusion/issues/710) +- Ballista: Finish implementing shuffle mechanism [\#707](https://github.com/apache/arrow-datafusion/issues/707) +- Rename QueryStageExec to ShuffleWriterExec [\#542](https://github.com/apache/arrow-datafusion/issues/542) +- Ballista ShuffleReaderExec should be able to read from multiple locations per partition [\#540](https://github.com/apache/arrow-datafusion/issues/540) +- \[Ballista\] Use deployments in k8s user guide [\#473](https://github.com/apache/arrow-datafusion/issues/473) +- Ballista refactor QueryStageExec in preparation for map-side shuffle [\#458](https://github.com/apache/arrow-datafusion/issues/458) +- Ballista: Implement map-side of shuffle [\#456](https://github.com/apache/arrow-datafusion/issues/456) +- Refactor Ballista to separate Flight logic from execution logic [\#449](https://github.com/apache/arrow-datafusion/issues/449) +- Use published versions of arrow rather than github shas [\#393](https://github.com/apache/arrow-datafusion/issues/393) +- BallistaContext::collect\(\) logging is too noisy [\#352](https://github.com/apache/arrow-datafusion/issues/352) +- Update Ballista to use new physical plan formatter utility [\#343](https://github.com/apache/arrow-datafusion/issues/343) +- Add Ballista Getting Started documentation [\#329](https://github.com/apache/arrow-datafusion/issues/329) +- Remove references to ballistacompute Docker Hub repo [\#325](https://github.com/apache/arrow-datafusion/issues/325) +- Implement scalable distributed joins [\#63](https://github.com/apache/arrow-datafusion/issues/63) +- Remove hard-coded Ballista version from scripts [\#32](https://github.com/apache/arrow-datafusion/issues/32) +- Implement streaming versions of Dataframe.collect methods [\#789](https://github.com/apache/arrow-datafusion/pull/789) ([andygrove](https://github.com/andygrove)) +- Ballista shuffle is finally working as intended, providing scalable distributed joins [\#750](https://github.com/apache/arrow-datafusion/pull/750) ([andygrove](https://github.com/andygrove)) +- Update to use arrow 5.0 [\#721](https://github.com/apache/arrow-datafusion/pull/721) ([alamb](https://github.com/alamb)) +- Implement serde for ShuffleWriterExec [\#712](https://github.com/apache/arrow-datafusion/pull/712) ([andygrove](https://github.com/andygrove)) +- dedup using join column in wildcard expansion [\#678](https://github.com/apache/arrow-datafusion/pull/678) ([houqp](https://github.com/houqp)) +- Implement metrics for shuffle read and write [\#676](https://github.com/apache/arrow-datafusion/pull/676) ([andygrove](https://github.com/andygrove)) +- Remove hard-coded PartitionMode from Ballista serde [\#637](https://github.com/apache/arrow-datafusion/pull/637) ([andygrove](https://github.com/andygrove)) +- Ballista: Implement scalable distributed joins [\#634](https://github.com/apache/arrow-datafusion/pull/634) ([andygrove](https://github.com/andygrove)) +- Add Keda autoscaling for ballista in k8s [\#586](https://github.com/apache/arrow-datafusion/pull/586) ([edrevo](https://github.com/edrevo)) +- Add some resiliency to lost executors [\#568](https://github.com/apache/arrow-datafusion/pull/568) ([edrevo](https://github.com/edrevo)) +- Add `partition by` constructs in window functions and modify logical planning [\#501](https://github.com/apache/arrow-datafusion/pull/501) ([Jimexist](https://github.com/Jimexist)) +- Support anti join [\#482](https://github.com/apache/arrow-datafusion/pull/482) ([Dandandan](https://github.com/Dandandan)) +- add `order by` construct in window function and logical plans [\#463](https://github.com/apache/arrow-datafusion/pull/463) ([Jimexist](https://github.com/Jimexist)) +- Refactor Ballista executor so that FlightService delegates to an Executor struct [\#450](https://github.com/apache/arrow-datafusion/pull/450) ([andygrove](https://github.com/andygrove)) +- implement lead and lag built-in window function [\#429](https://github.com/apache/arrow-datafusion/pull/429) ([Jimexist](https://github.com/Jimexist)) +- Implement fmt\_as for ShuffleReaderExec [\#400](https://github.com/apache/arrow-datafusion/pull/400) ([andygrove](https://github.com/andygrove)) +- Add window expression part 1 - logical and physical planning, structure, to/from proto, and explain, for empty over clause only [\#334](https://github.com/apache/arrow-datafusion/pull/334) ([Jimexist](https://github.com/Jimexist)) +- \[breaking change\] fix 265, log should be log10, and add ln [\#271](https://github.com/apache/arrow-datafusion/pull/271) ([Jimexist](https://github.com/Jimexist)) +- Allow table providers to indicate their type for catalog metadata [\#205](https://github.com/apache/arrow-datafusion/pull/205) ([returnString](https://github.com/returnString)) +- Add query 19 to TPC-H regression tests [\#59](https://github.com/apache/arrow-datafusion/pull/59) ([Dandandan](https://github.com/Dandandan)) +- Use arrow eq kernels in CaseWhen expression evaluation [\#52](https://github.com/apache/arrow-datafusion/pull/52) ([Dandandan](https://github.com/Dandandan)) +- Add option param for standalone mode [\#42](https://github.com/apache/arrow-datafusion/pull/42) ([djKooks](https://github.com/djKooks)) +- \[DataFusion\] Optimize hash join inner workings, null handling fix [\#24](https://github.com/apache/arrow-datafusion/pull/24) ([Dandandan](https://github.com/Dandandan)) +- \[Ballista\] Docker files for ui [\#22](https://github.com/apache/arrow-datafusion/pull/22) ([msathis](https://github.com/msathis)) + +**Fixed bugs:** + +- Ballista: UnresolvedShuffleExec should only have a single stage\_id [\#726](https://github.com/apache/arrow-datafusion/issues/726) +- Ballista integration tests are failing [\#623](https://github.com/apache/arrow-datafusion/issues/623) +- Integration test build failure due to arrow-rs using unstable feature [\#596](https://github.com/apache/arrow-datafusion/issues/596) +- `cargo build` cannot build the project [\#531](https://github.com/apache/arrow-datafusion/issues/531) +- ShuffleReaderExec does not get formatted correctly in displayable physical plan [\#399](https://github.com/apache/arrow-datafusion/issues/399) +- Ballista: Prep for fixing shuffle mechansim, part 1 [\#738](https://github.com/apache/arrow-datafusion/pull/738) ([andygrove](https://github.com/andygrove)) +- Ballista: Shuffle write bug fix [\#714](https://github.com/apache/arrow-datafusion/pull/714) ([andygrove](https://github.com/andygrove)) +- honor table name for csv/parquet scan in ballista plan serde [\#629](https://github.com/apache/arrow-datafusion/pull/629) ([houqp](https://github.com/houqp)) +- MINOR: Fix integration tests by adding datafusion-cli module to docker image [\#322](https://github.com/apache/arrow-datafusion/pull/322) ([andygrove](https://github.com/andygrove)) + +**Documentation updates:** + +- Add Ballista examples [\#775](https://github.com/apache/arrow-datafusion/pull/775) ([andygrove](https://github.com/andygrove)) +- Update ballista.proto link in architecture doc [\#502](https://github.com/apache/arrow-datafusion/pull/502) ([terrycorley](https://github.com/terrycorley)) +- Update k8s user guide to use deployments [\#474](https://github.com/apache/arrow-datafusion/pull/474) ([edrevo](https://github.com/edrevo)) +- use prettier to format md files [\#367](https://github.com/apache/arrow-datafusion/pull/367) ([Jimexist](https://github.com/Jimexist)) +- Make it easier for developers to find Ballista documentation [\#330](https://github.com/apache/arrow-datafusion/pull/330) ([andygrove](https://github.com/andygrove)) +- Instructions for cross-compiling Ballista to the Raspberry Pi [\#263](https://github.com/apache/arrow-datafusion/pull/263) ([andygrove](https://github.com/andygrove)) +- Add install guide in README [\#236](https://github.com/apache/arrow-datafusion/pull/236) ([djKooks](https://github.com/djKooks)) + +**Performance improvements:** + +- Ballista: Avoid sleeping between polling for tasks [\#698](https://github.com/apache/arrow-datafusion/pull/698) ([Dandandan](https://github.com/Dandandan)) +- Make BallistaContext::collect streaming [\#535](https://github.com/apache/arrow-datafusion/pull/535) ([edrevo](https://github.com/edrevo)) + +**Closed issues:** + +- arrow::util::pretty::pretty\_format\_batches missing [\#769](https://github.com/apache/arrow-datafusion/issues/769) +- move the `assert_batches_eq!` macros to a non part of datafusion [\#745](https://github.com/apache/arrow-datafusion/issues/745) +- fix an issue where aliases are not respected in generating downstream schemas in window expr [\#592](https://github.com/apache/arrow-datafusion/issues/592) +- make the planner to print more succinct and useful information in window function explain clause [\#526](https://github.com/apache/arrow-datafusion/issues/526) +- move window frame module to be in `logical_plan` [\#517](https://github.com/apache/arrow-datafusion/issues/517) +- use a more rust idiomatic way of handling nth\_value [\#448](https://github.com/apache/arrow-datafusion/issues/448) +- Make Ballista not depend on arrow directly [\#446](https://github.com/apache/arrow-datafusion/issues/446) +- create a test with more than one partition for window functions [\#435](https://github.com/apache/arrow-datafusion/issues/435) +- Implement hash-partitioned hash aggregate [\#27](https://github.com/apache/arrow-datafusion/issues/27) +- Consider using GitHub pages for DataFusion/Ballista documentation [\#18](https://github.com/apache/arrow-datafusion/issues/18) +- Add Ballista to default cargo workspace [\#17](https://github.com/apache/arrow-datafusion/issues/17) +- Update "repository" in Cargo.toml [\#16](https://github.com/apache/arrow-datafusion/issues/16) +- Consolidate TPC-H benchmarks [\#6](https://github.com/apache/arrow-datafusion/issues/6) +- \[Ballista\] Fix integration test script [\#4](https://github.com/apache/arrow-datafusion/issues/4) +- Ballista should not have separate DataFrame implementation [\#2](https://github.com/apache/arrow-datafusion/issues/2) + +**Merged pull requests:** + +- Fix: Update clippy lints for Rust 1.54 [\#794](https://github.com/apache/arrow-datafusion/pull/794) ([alamb](https://github.com/alamb)) +- MINOR: Remove unused Ballista query execution code path [\#732](https://github.com/apache/arrow-datafusion/pull/732) ([andygrove](https://github.com/andygrove)) +- \[fix\] benchmark run with compose [\#666](https://github.com/apache/arrow-datafusion/pull/666) ([rdettai](https://github.com/rdettai)) +- bring back dev scripts for ballista [\#648](https://github.com/apache/arrow-datafusion/pull/648) ([Jimexist](https://github.com/Jimexist)) +- Remove unnecessary mutex [\#639](https://github.com/apache/arrow-datafusion/pull/639) ([edrevo](https://github.com/edrevo)) +- round trip TPCH queries in tests [\#630](https://github.com/apache/arrow-datafusion/pull/630) ([houqp](https://github.com/houqp)) +- Fix build [\#627](https://github.com/apache/arrow-datafusion/pull/627) ([andygrove](https://github.com/andygrove)) +- in ballista also check for UI prettier changes [\#578](https://github.com/apache/arrow-datafusion/pull/578) ([Jimexist](https://github.com/Jimexist)) +- turn on clippy rule for needless borrow [\#545](https://github.com/apache/arrow-datafusion/pull/545) ([Jimexist](https://github.com/Jimexist)) +- reuse datafusion physical planner in ballista building from protobuf [\#532](https://github.com/apache/arrow-datafusion/pull/532) ([Jimexist](https://github.com/Jimexist)) +- update cargo.toml in python crate and fix unit test due to hash joins [\#483](https://github.com/apache/arrow-datafusion/pull/483) ([Jimexist](https://github.com/Jimexist)) +- make `VOLUME` declaration in tpch datagen docker absolute [\#466](https://github.com/apache/arrow-datafusion/pull/466) ([crepererum](https://github.com/crepererum)) +- Refactor QueryStageExec in preparation for implementing map-side shuffle [\#459](https://github.com/apache/arrow-datafusion/pull/459) ([andygrove](https://github.com/andygrove)) +- Simplified usage of `use arrow` in ballista. [\#447](https://github.com/apache/arrow-datafusion/pull/447) ([jorgecarleitao](https://github.com/jorgecarleitao)) +- Benchmark subcommand to distinguish between DataFusion and Ballista [\#402](https://github.com/apache/arrow-datafusion/pull/402) ([jgoday](https://github.com/jgoday)) +- \#352: BallistaContext::collect\(\) logging is too noisy [\#394](https://github.com/apache/arrow-datafusion/pull/394) ([jgoday](https://github.com/jgoday)) +- cleanup function return type fn [\#350](https://github.com/apache/arrow-datafusion/pull/350) ([Jimexist](https://github.com/Jimexist)) +- Update Ballista to use new physical plan formatter utility [\#344](https://github.com/apache/arrow-datafusion/pull/344) ([andygrove](https://github.com/andygrove)) +- Update arrow dependencies again [\#341](https://github.com/apache/arrow-datafusion/pull/341) ([alamb](https://github.com/alamb)) +- Remove references to Ballista Docker images published to ballistacompute Docker Hub repo [\#326](https://github.com/apache/arrow-datafusion/pull/326) ([andygrove](https://github.com/andygrove)) +- Update arrow-rs deps [\#317](https://github.com/apache/arrow-datafusion/pull/317) ([alamb](https://github.com/alamb)) +- Update arrow deps [\#269](https://github.com/apache/arrow-datafusion/pull/269) ([alamb](https://github.com/alamb)) +- Enable redundant\_field\_names clippy lint [\#261](https://github.com/apache/arrow-datafusion/pull/261) ([Dandandan](https://github.com/Dandandan)) +- Update arrow-rs deps \(to fix build due to flatbuffers update\) [\#224](https://github.com/apache/arrow-datafusion/pull/224) ([alamb](https://github.com/alamb)) +- update arrow-rs deps to latest master [\#216](https://github.com/apache/arrow-datafusion/pull/216) ([alamb](https://github.com/alamb)) + + + +\* *This Changelog was automatically generated by [github_changelog_generator](https://github.com/github-changelog-generator/github-changelog-generator)* diff --git a/ballista/rust/client/Cargo.toml b/ballista/rust/client/Cargo.toml index 5c7eb3802a104..3507a7b22a45d 100644 --- a/ballista/rust/client/Cargo.toml +++ b/ballista/rust/client/Cargo.toml @@ -19,7 +19,7 @@ name = "ballista" description = "Ballista Distributed Compute" license = "Apache-2.0" -version = "0.5.0-SNAPSHOT" +version = "0.5.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" authors = ["Apache Arrow "] @@ -37,4 +37,4 @@ datafusion = { path = "../../../datafusion" } [features] default = [] -standalone = ["ballista-executor", "ballista-scheduler"] \ No newline at end of file +standalone = ["ballista-executor", "ballista-scheduler"] diff --git a/ballista/rust/core/Cargo.toml b/ballista/rust/core/Cargo.toml index ce72d2fda92d4..4648b589f6469 100644 --- a/ballista/rust/core/Cargo.toml +++ b/ballista/rust/core/Cargo.toml @@ -19,7 +19,7 @@ name = "ballista-core" description = "Ballista Distributed Compute" license = "Apache-2.0" -version = "0.5.0-SNAPSHOT" +version = "0.5.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" authors = ["Apache Arrow "] diff --git a/ballista/rust/executor/Cargo.toml b/ballista/rust/executor/Cargo.toml index 428a5bb0f01f5..25adabbbce12a 100644 --- a/ballista/rust/executor/Cargo.toml +++ b/ballista/rust/executor/Cargo.toml @@ -19,7 +19,7 @@ name = "ballista-executor" description = "Ballista Distributed Compute - Executor" license = "Apache-2.0" -version = "0.5.0-SNAPSHOT" +version = "0.5.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" authors = ["Apache Arrow "] diff --git a/ballista/rust/scheduler/Cargo.toml b/ballista/rust/scheduler/Cargo.toml index 9bca8d9695714..f0ffb54d378c1 100644 --- a/ballista/rust/scheduler/Cargo.toml +++ b/ballista/rust/scheduler/Cargo.toml @@ -19,7 +19,7 @@ name = "ballista-scheduler" description = "Ballista Distributed Compute - Scheduler" license = "Apache-2.0" -version = "0.5.0-SNAPSHOT" +version = "0.5.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" authors = ["Apache Arrow "] diff --git a/datafusion/CHANGELOG.md b/datafusion/CHANGELOG.md new file mode 100644 index 0000000000000..088cb2dfa6037 --- /dev/null +++ b/datafusion/CHANGELOG.md @@ -0,0 +1,303 @@ + + +For older versions, see [apache/arrow/CHANGELOG.md](https://github.com/apache/arrow/blob/master/CHANGELOG.md) + +# Changelog + +## [5.0.0](https://github.com/apache/arrow-datafusion/tree/5.0.0) (2021-07-31) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/4.0.0...5.0.0) + +**Breaking changes:** + +- Box ScalarValue:Lists, reduce size by half size [\#788](https://github.com/apache/arrow-datafusion/pull/788) ([alamb](https://github.com/alamb)) +- JOIN conditions are order dependent [\#778](https://github.com/apache/arrow-datafusion/pull/778) ([seddonm1](https://github.com/seddonm1)) +- Show the result of all optimizer passes in EXPLAIN VERBOSE [\#759](https://github.com/apache/arrow-datafusion/pull/759) ([alamb](https://github.com/alamb)) +- \#723 Datafusion add option in ExecutionConfig to enable/disable parquet pruning [\#749](https://github.com/apache/arrow-datafusion/pull/749) ([lvheyang](https://github.com/lvheyang)) +- Update API for extension planning to include logical plan [\#643](https://github.com/apache/arrow-datafusion/pull/643) ([alamb](https://github.com/alamb)) +- Rename MergeExec to CoalescePartitionsExec [\#635](https://github.com/apache/arrow-datafusion/pull/635) ([andygrove](https://github.com/andygrove)) +- fix 593, reduce cloning by taking ownership in logical planner's `from` fn [\#610](https://github.com/apache/arrow-datafusion/pull/610) ([Jimexist](https://github.com/Jimexist)) +- fix join column handling logic for `On` and `Using` constraints [\#605](https://github.com/apache/arrow-datafusion/pull/605) ([houqp](https://github.com/houqp)) +- Rewrite pruning logic in terms of PruningStatistics using Array trait \(option 2\) [\#426](https://github.com/apache/arrow-datafusion/pull/426) ([alamb](https://github.com/alamb)) +- Support reading from NdJson formatted data sources [\#404](https://github.com/apache/arrow-datafusion/pull/404) ([heymind](https://github.com/heymind)) +- Add metrics to RepartitionExec [\#398](https://github.com/apache/arrow-datafusion/pull/398) ([andygrove](https://github.com/andygrove)) +- Use 4.x arrow-rs from crates.io rather than git sha [\#395](https://github.com/apache/arrow-datafusion/pull/395) ([alamb](https://github.com/alamb)) +- Return Vec\ from PredicateBuilder rather than an `Fn` [\#370](https://github.com/apache/arrow-datafusion/pull/370) ([alamb](https://github.com/alamb)) +- Refactor: move RowGroupPredicateBuilder into its own module, rename to PruningPredicateBuilder [\#365](https://github.com/apache/arrow-datafusion/pull/365) ([alamb](https://github.com/alamb)) +- \[Datafusion\] NOW\(\) function support [\#288](https://github.com/apache/arrow-datafusion/pull/288) ([msathis](https://github.com/msathis)) +- Implement select distinct [\#262](https://github.com/apache/arrow-datafusion/pull/262) ([Dandandan](https://github.com/Dandandan)) +- Refactor datafusion/src/physical\_plan/common.rs build\_file\_list to take less param and reuse code [\#253](https://github.com/apache/arrow-datafusion/pull/253) ([Jimexist](https://github.com/Jimexist)) +- Support qualified columns in queries [\#55](https://github.com/apache/arrow-datafusion/pull/55) ([houqp](https://github.com/houqp)) +- Read CSV format text from stdin or memory [\#54](https://github.com/apache/arrow-datafusion/pull/54) ([heymind](https://github.com/heymind)) +- Use atomics for SQLMetric implementation, remove unused name field [\#25](https://github.com/apache/arrow-datafusion/pull/25) ([returnString](https://github.com/returnString)) + +**Implemented enhancements:** + +- Allow extension nodes to correctly plan physical expressions with relations [\#642](https://github.com/apache/arrow-datafusion/issues/642) +- Filters aren't passed down to table scans in a union [\#557](https://github.com/apache/arrow-datafusion/issues/557) +- Support pruning for `boolean` columns [\#490](https://github.com/apache/arrow-datafusion/issues/490) +- Implement SQLMetrics for RepartitionExec [\#397](https://github.com/apache/arrow-datafusion/issues/397) +- DataFusion benchmarks should show executed plan with metrics after query completes [\#396](https://github.com/apache/arrow-datafusion/issues/396) +- Use published versions of arrow rather than github shas [\#393](https://github.com/apache/arrow-datafusion/issues/393) +- Add Compare to GroupByScalar [\#364](https://github.com/apache/arrow-datafusion/issues/364) +- Reusable "row group pruning" logic [\#363](https://github.com/apache/arrow-datafusion/issues/363) +- Add an Order Preserving merge operator [\#362](https://github.com/apache/arrow-datafusion/issues/362) +- Implement Postgres compatible `now()` function [\#251](https://github.com/apache/arrow-datafusion/issues/251) +- COUNT DISTINCT does not support dictionary types [\#249](https://github.com/apache/arrow-datafusion/issues/249) +- Use standard make\_null\_array for CASE [\#222](https://github.com/apache/arrow-datafusion/issues/222) +- Implement date\_trunc\(\) function [\#203](https://github.com/apache/arrow-datafusion/issues/203) +- COUNT DISTINCT does not support for `Float64` [\#199](https://github.com/apache/arrow-datafusion/issues/199) +- Update SQLMetric to use atomics rather than a Mutex [\#30](https://github.com/apache/arrow-datafusion/issues/30) +- Implement streaming versions of Dataframe.collect methods [\#789](https://github.com/apache/arrow-datafusion/pull/789) ([andygrove](https://github.com/andygrove)) +- impl from str for column and scalar [\#762](https://github.com/apache/arrow-datafusion/pull/762) ([Jimexist](https://github.com/Jimexist)) +- impl fmt::Display for PlanType [\#752](https://github.com/apache/arrow-datafusion/pull/752) ([Jimexist](https://github.com/Jimexist)) +- Remove unnecessary projection in logical plan optimization phase [\#747](https://github.com/apache/arrow-datafusion/pull/747) ([waynexia](https://github.com/waynexia)) +- Support table columns alias [\#735](https://github.com/apache/arrow-datafusion/pull/735) ([Dandandan](https://github.com/Dandandan)) +- Derive PartialEq for datasource enums [\#734](https://github.com/apache/arrow-datafusion/pull/734) ([alamb](https://github.com/alamb)) +- Allow filetype to be lowercase, Implement FromStr for FileType [\#728](https://github.com/apache/arrow-datafusion/pull/728) ([Jimexist](https://github.com/Jimexist)) +- Update to use arrow 5.0 [\#721](https://github.com/apache/arrow-datafusion/pull/721) ([alamb](https://github.com/alamb)) +- \#554: Lead/lag window function with offset and default value arguments [\#687](https://github.com/apache/arrow-datafusion/pull/687) ([jgoday](https://github.com/jgoday)) +- dedup using join column in wildcard expansion [\#678](https://github.com/apache/arrow-datafusion/pull/678) ([houqp](https://github.com/houqp)) +- Implement metrics for HashJoinExec [\#664](https://github.com/apache/arrow-datafusion/pull/664) ([andygrove](https://github.com/andygrove)) +- Show physical plan with metrics in benchmark [\#662](https://github.com/apache/arrow-datafusion/pull/662) ([andygrove](https://github.com/andygrove)) +- Allow non-equijoin filters in join condition [\#660](https://github.com/apache/arrow-datafusion/pull/660) ([Dandandan](https://github.com/Dandandan)) +- Add End-to-end test for parquet pruning + metrics for ParquetExec [\#657](https://github.com/apache/arrow-datafusion/pull/657) ([alamb](https://github.com/alamb)) +- Add support for leading field in interval [\#647](https://github.com/apache/arrow-datafusion/pull/647) ([Dandandan](https://github.com/Dandandan)) +- Remove hard-coded PartitionMode from Ballista serde [\#637](https://github.com/apache/arrow-datafusion/pull/637) ([andygrove](https://github.com/andygrove)) +- Ballista: Implement scalable distributed joins [\#634](https://github.com/apache/arrow-datafusion/pull/634) ([andygrove](https://github.com/andygrove)) +- implement rank and dense\_rank function and refactor built-in window function evaluation [\#631](https://github.com/apache/arrow-datafusion/pull/631) ([Jimexist](https://github.com/Jimexist)) +- Improve "field not found" error messages [\#625](https://github.com/apache/arrow-datafusion/pull/625) ([andygrove](https://github.com/andygrove)) +- Support modulus op [\#577](https://github.com/apache/arrow-datafusion/pull/577) ([gangliao](https://github.com/gangliao)) +- implement `std::default::Default` for execution config [\#570](https://github.com/apache/arrow-datafusion/pull/570) ([Jimexist](https://github.com/Jimexist)) +- `to_timestamp_millis()`, `to_timestamp_micros()`, `to_timestamp_seconds()` [\#567](https://github.com/apache/arrow-datafusion/pull/567) ([velvia](https://github.com/velvia)) +- Filter push down for Union [\#559](https://github.com/apache/arrow-datafusion/pull/559) ([Dandandan](https://github.com/Dandandan)) +- Implement window functions with `partition_by` clause [\#558](https://github.com/apache/arrow-datafusion/pull/558) ([Jimexist](https://github.com/Jimexist)) +- support table alias in join clause [\#547](https://github.com/apache/arrow-datafusion/pull/547) ([houqp](https://github.com/houqp)) +- Not equal predicate in physical\_planning pruning [\#544](https://github.com/apache/arrow-datafusion/pull/544) ([jgoday](https://github.com/jgoday)) +- add error handling and boundary checking for window frames [\#530](https://github.com/apache/arrow-datafusion/pull/530) ([Jimexist](https://github.com/Jimexist)) +- Implement window functions with `order_by` clause [\#520](https://github.com/apache/arrow-datafusion/pull/520) ([Jimexist](https://github.com/Jimexist)) +- support group by column positions [\#519](https://github.com/apache/arrow-datafusion/pull/519) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jychen7](https://github.com/jychen7)) +- Implement constant folding for CAST [\#513](https://github.com/apache/arrow-datafusion/pull/513) ([msathis](https://github.com/msathis)) +- Add window frame constructs - alternative [\#506](https://github.com/apache/arrow-datafusion/pull/506) ([Jimexist](https://github.com/Jimexist)) +- Add `partition by` constructs in window functions and modify logical planning [\#501](https://github.com/apache/arrow-datafusion/pull/501) ([Jimexist](https://github.com/Jimexist)) +- Add support for boolean columns in pruning logic [\#500](https://github.com/apache/arrow-datafusion/pull/500) ([alamb](https://github.com/alamb)) +- \#215 resolve aliases for group by exprs [\#485](https://github.com/apache/arrow-datafusion/pull/485) ([jychen7](https://github.com/jychen7)) +- Support anti join [\#482](https://github.com/apache/arrow-datafusion/pull/482) ([Dandandan](https://github.com/Dandandan)) +- Support semi join [\#470](https://github.com/apache/arrow-datafusion/pull/470) ([Dandandan](https://github.com/Dandandan)) +- add `order by` construct in window function and logical plans [\#463](https://github.com/apache/arrow-datafusion/pull/463) ([Jimexist](https://github.com/Jimexist)) +- Remove reundant filters \(e.g. c\> 5 AND c\>5 --\> c\>5\) [\#436](https://github.com/apache/arrow-datafusion/pull/436) ([jgoday](https://github.com/jgoday)) +- fix: display the content of debug explain [\#434](https://github.com/apache/arrow-datafusion/pull/434) ([NGA-TRAN](https://github.com/NGA-TRAN)) +- implement lead and lag built-in window function [\#429](https://github.com/apache/arrow-datafusion/pull/429) ([Jimexist](https://github.com/Jimexist)) +- add support for ndjson for datafusion-cli [\#427](https://github.com/apache/arrow-datafusion/pull/427) ([Jimexist](https://github.com/Jimexist)) +- add `first_value`, `last_value`, and `nth_value` built-in window functions [\#403](https://github.com/apache/arrow-datafusion/pull/403) ([Jimexist](https://github.com/Jimexist)) +- export both `now` and `random` functions [\#389](https://github.com/apache/arrow-datafusion/pull/389) ([Jimexist](https://github.com/Jimexist)) +- Function to create `ArrayRef` from an iterator of ScalarValues [\#381](https://github.com/apache/arrow-datafusion/pull/381) ([alamb](https://github.com/alamb)) +- Sort preserving merge \(\#362\) [\#379](https://github.com/apache/arrow-datafusion/pull/379) ([tustvold](https://github.com/tustvold)) +- Add support for multiple partitions with SortExec \(\#362\) [\#378](https://github.com/apache/arrow-datafusion/pull/378) ([tustvold](https://github.com/tustvold)) +- add window expression stream, delegated window aggregation to aggregate functions, and implement `row_number` [\#375](https://github.com/apache/arrow-datafusion/pull/375) ([Jimexist](https://github.com/Jimexist)) +- Add PartialOrd and Ord to GroupByScalar \(\#364\) [\#368](https://github.com/apache/arrow-datafusion/pull/368) ([tustvold](https://github.com/tustvold)) +- Implement readable explain plans for physical plans [\#337](https://github.com/apache/arrow-datafusion/pull/337) ([alamb](https://github.com/alamb)) +- Add window expression part 1 - logical and physical planning, structure, to/from proto, and explain, for empty over clause only [\#334](https://github.com/apache/arrow-datafusion/pull/334) ([Jimexist](https://github.com/Jimexist)) +- Use NullArray to Pass row count to ScalarFunctions that take 0 arguments [\#328](https://github.com/apache/arrow-datafusion/pull/328) ([Jimexist](https://github.com/Jimexist)) +- add --quiet/-q flag and allow timing info to be turned on/off [\#323](https://github.com/apache/arrow-datafusion/pull/323) ([Jimexist](https://github.com/Jimexist)) +- Implement hash partitioned aggregation [\#320](https://github.com/apache/arrow-datafusion/pull/320) ([Dandandan](https://github.com/Dandandan)) +- Support COUNT\(DISTINCT timestamps\) [\#319](https://github.com/apache/arrow-datafusion/pull/319) ([charlibot](https://github.com/charlibot)) +- add random SQL function [\#303](https://github.com/apache/arrow-datafusion/pull/303) ([Jimexist](https://github.com/Jimexist)) +- allow datafusion cli to take -- comments [\#296](https://github.com/apache/arrow-datafusion/pull/296) ([Jimexist](https://github.com/Jimexist)) +- Add json print format mode to datafusion cli [\#295](https://github.com/apache/arrow-datafusion/pull/295) ([Jimexist](https://github.com/Jimexist)) +- Add print format param with support for tsv print format to datafusion cli [\#292](https://github.com/apache/arrow-datafusion/pull/292) ([Jimexist](https://github.com/Jimexist)) +- Add print format param and support for csv print format to datafusion cli [\#289](https://github.com/apache/arrow-datafusion/pull/289) ([Jimexist](https://github.com/Jimexist)) +- allow datafusion-cli to take a file param [\#285](https://github.com/apache/arrow-datafusion/pull/285) ([Jimexist](https://github.com/Jimexist)) +- add param validation for datafusion-cli [\#284](https://github.com/apache/arrow-datafusion/pull/284) ([Jimexist](https://github.com/Jimexist)) +- \[breaking change\] fix 265, log should be log10, and add ln [\#271](https://github.com/apache/arrow-datafusion/pull/271) ([Jimexist](https://github.com/Jimexist)) +- Implement count distinct for dictionary arrays [\#256](https://github.com/apache/arrow-datafusion/pull/256) ([alamb](https://github.com/alamb)) +- Count distinct floats [\#252](https://github.com/apache/arrow-datafusion/pull/252) ([pjmore](https://github.com/pjmore)) +- Add rule to eliminate `LIMIT 0` and replace it with an `EmptyRelation` [\#213](https://github.com/apache/arrow-datafusion/pull/213) ([Dandandan](https://github.com/Dandandan)) +- Allow table providers to indicate their type for catalog metadata [\#205](https://github.com/apache/arrow-datafusion/pull/205) ([returnString](https://github.com/returnString)) +- Use arrow eq kernels in CaseWhen expression evaluation [\#52](https://github.com/apache/arrow-datafusion/pull/52) ([Dandandan](https://github.com/Dandandan)) +- Re-export Arrow and Parquet crates from DataFusion [\#39](https://github.com/apache/arrow-datafusion/pull/39) ([returnString](https://github.com/returnString)) +- \[DataFusion\] Optimize hash join inner workings, null handling fix [\#24](https://github.com/apache/arrow-datafusion/pull/24) ([Dandandan](https://github.com/Dandandan)) +- \[ARROW-12441\] \[DataFusion\] Cross join implementation [\#11](https://github.com/apache/arrow-datafusion/pull/11) ([Dandandan](https://github.com/Dandandan)) + +**Fixed bugs:** + +- Projection pushdown removes unqualified column names even when they are used [\#617](https://github.com/apache/arrow-datafusion/issues/617) +- Panic while running join datatypes/schema.rs:165:10 [\#601](https://github.com/apache/arrow-datafusion/issues/601) +- Indentation is incorrect for joins in formatted physical plans [\#345](https://github.com/apache/arrow-datafusion/issues/345) +- Error while running `COUNT DISTINCT (timestamp)`: 'Unexpected DataType for list [\#314](https://github.com/apache/arrow-datafusion/issues/314) +- When joining two tables, get Error: Plan\("Schema contains duplicate unqualified field name \'xxx\'"\) [\#311](https://github.com/apache/arrow-datafusion/issues/311) +- Incorrect answers with SELECT DISTINCT queries [\#250](https://github.com/apache/arrow-datafusion/issues/250) +- Intermitent failure in CI join\_with\_hash\_collision [\#227](https://github.com/apache/arrow-datafusion/issues/227) +- `Concat` from Dataframe API no longer accepts multiple expressions [\#226](https://github.com/apache/arrow-datafusion/issues/226) +- \#723 limit pruning rule to simple expression [\#764](https://github.com/apache/arrow-datafusion/pull/764) ([lvheyang](https://github.com/lvheyang)) +- \#699 fix return type conflict when calling builtin math fuctions [\#716](https://github.com/apache/arrow-datafusion/pull/716) ([lvheyang](https://github.com/lvheyang)) +- Fix Date32 and Date64 parquet row group pruning [\#690](https://github.com/apache/arrow-datafusion/pull/690) ([alamb](https://github.com/alamb)) +- Remove qualifiers on pushed down predicates / Fix parquet pruning [\#689](https://github.com/apache/arrow-datafusion/pull/689) ([alamb](https://github.com/alamb)) +- use `Weak` ptr to break catalog list \<\> info schema cyclic reference [\#681](https://github.com/apache/arrow-datafusion/pull/681) ([crepererum](https://github.com/crepererum)) +- honor table name for csv/parquet scan in ballista plan serde [\#629](https://github.com/apache/arrow-datafusion/pull/629) ([houqp](https://github.com/houqp)) +- fix 621, where unnamed window functions shall be differentiated by partition and order by clause [\#622](https://github.com/apache/arrow-datafusion/pull/622) ([Jimexist](https://github.com/Jimexist)) +- RFC: Do not prune out unnecessary columns with unqualified references [\#619](https://github.com/apache/arrow-datafusion/pull/619) ([alamb](https://github.com/alamb)) +- \[fix\] select \* on empty table [\#613](https://github.com/apache/arrow-datafusion/pull/613) ([rdettai](https://github.com/rdettai)) +- fix 592, support alias in window functions [\#607](https://github.com/apache/arrow-datafusion/pull/607) ([Jimexist](https://github.com/Jimexist)) +- RepartitionExec should not error if output has hung up [\#576](https://github.com/apache/arrow-datafusion/pull/576) ([alamb](https://github.com/alamb)) +- Fix pruning on not equal predicate [\#561](https://github.com/apache/arrow-datafusion/pull/561) ([alamb](https://github.com/alamb)) +- hash float arrays using primitive usigned integer type [\#556](https://github.com/apache/arrow-datafusion/pull/556) ([houqp](https://github.com/houqp)) +- Return errors properly from RepartitionExec [\#521](https://github.com/apache/arrow-datafusion/pull/521) ([alamb](https://github.com/alamb)) +- refactor sort exec stream and combine batches [\#515](https://github.com/apache/arrow-datafusion/pull/515) ([Jimexist](https://github.com/Jimexist)) +- Fix display of execution time in datafusion-cli [\#514](https://github.com/apache/arrow-datafusion/pull/514) ([Dandandan](https://github.com/Dandandan)) +- Wrong aggregation arguments error. [\#505](https://github.com/apache/arrow-datafusion/pull/505) ([jgoday](https://github.com/jgoday)) +- fix window aggregation with alias and add integration test case [\#454](https://github.com/apache/arrow-datafusion/pull/454) ([Jimexist](https://github.com/Jimexist)) +- fix: don't duplicate existing filters [\#409](https://github.com/apache/arrow-datafusion/pull/409) ([e-dard](https://github.com/e-dard)) +- Fixed incorrect logical type in GroupByScalar. [\#391](https://github.com/apache/arrow-datafusion/pull/391) ([jorgecarleitao](https://github.com/jorgecarleitao)) +- Fix indented display for multi-child nodes [\#358](https://github.com/apache/arrow-datafusion/pull/358) ([alamb](https://github.com/alamb)) +- Fix SQL planner to support multibyte column names [\#357](https://github.com/apache/arrow-datafusion/pull/357) ([agatan](https://github.com/agatan)) +- Fix wrong projection 'optimization' [\#268](https://github.com/apache/arrow-datafusion/pull/268) ([Dandandan](https://github.com/Dandandan)) +- Fix Left join implementation is incorrect for 0 or multiple batches on the right side [\#238](https://github.com/apache/arrow-datafusion/pull/238) ([Dandandan](https://github.com/Dandandan)) +- Count distinct boolean [\#230](https://github.com/apache/arrow-datafusion/pull/230) ([pjmore](https://github.com/pjmore)) +- Fix Filter / where clause without column names is removed in optimization pass [\#225](https://github.com/apache/arrow-datafusion/pull/225) ([Dandandan](https://github.com/Dandandan)) + +**Documentation updates:** + +- No way to get to the examples from docs.rs [\#186](https://github.com/apache/arrow-datafusion/issues/186) +- Update docs to use vendored version of arrow [\#772](https://github.com/apache/arrow-datafusion/pull/772) ([alamb](https://github.com/alamb)) +- update stale documentations related to window functions [\#598](https://github.com/apache/arrow-datafusion/pull/598) ([Jimexist](https://github.com/Jimexist)) +- update readme to reflect work on window functions [\#471](https://github.com/apache/arrow-datafusion/pull/471) ([Jimexist](https://github.com/Jimexist)) +- Add examples section to datafusion crate doc [\#457](https://github.com/apache/arrow-datafusion/pull/457) ([mluts](https://github.com/mluts)) +- add invariants spec [\#443](https://github.com/apache/arrow-datafusion/pull/443) ([houqp](https://github.com/houqp)) +- add output field name rfc [\#422](https://github.com/apache/arrow-datafusion/pull/422) ([houqp](https://github.com/houqp)) +- Update more docs and also the developer.md doc [\#414](https://github.com/apache/arrow-datafusion/pull/414) ([Jimexist](https://github.com/Jimexist)) +- use prettier to format md files [\#367](https://github.com/apache/arrow-datafusion/pull/367) ([Jimexist](https://github.com/Jimexist)) +- Add new logo svg with white background [\#313](https://github.com/apache/arrow-datafusion/pull/313) ([parthsarthy](https://github.com/parthsarthy)) +- Add projects \(Squirtle and Tensorbase\) to list in readme [\#312](https://github.com/apache/arrow-datafusion/pull/312) ([parthsarthy](https://github.com/parthsarthy)) +- docs - fix the ballista link [\#274](https://github.com/apache/arrow-datafusion/pull/274) ([haoxins](https://github.com/haoxins)) +- misc\(README\): Replace Cube.js with Cube Store [\#248](https://github.com/apache/arrow-datafusion/pull/248) ([ovr](https://github.com/ovr)) +- Initial docs for SQL syntax [\#242](https://github.com/apache/arrow-datafusion/pull/242) ([Dandandan](https://github.com/Dandandan)) +- Deduplicate README.md [\#79](https://github.com/apache/arrow-datafusion/pull/79) ([msathis](https://github.com/msathis)) + +**Performance improvements:** + +- perf: improve performance of `SortPreservingMergeExec` operator [\#722](https://github.com/apache/arrow-datafusion/pull/722) ([e-dard](https://github.com/e-dard)) +- perf: Improve materialisation performance of SortPreservingMergeExec [\#691](https://github.com/apache/arrow-datafusion/pull/691) ([e-dard](https://github.com/e-dard)) +- Optimize count\(\*\) with table statistics [\#620](https://github.com/apache/arrow-datafusion/pull/620) ([Dandandan](https://github.com/Dandandan)) +- optimize window function's `find_ranges_in_range` [\#595](https://github.com/apache/arrow-datafusion/pull/595) ([Jimexist](https://github.com/Jimexist)) +- Collapse sort into window expr and do sort within logical phase [\#571](https://github.com/apache/arrow-datafusion/pull/571) ([Jimexist](https://github.com/Jimexist)) +- Use repartition in window functions to speed up [\#569](https://github.com/apache/arrow-datafusion/pull/569) ([Jimexist](https://github.com/Jimexist)) +- Constant fold / optimize `to_timestamp` function during planning [\#387](https://github.com/apache/arrow-datafusion/pull/387) ([msathis](https://github.com/msathis)) +- Speed up `create_batch_from_map` [\#339](https://github.com/apache/arrow-datafusion/pull/339) ([Dandandan](https://github.com/Dandandan)) +- Simplify math expression code \(use unary kernel\) [\#309](https://github.com/apache/arrow-datafusion/pull/309) ([Dandandan](https://github.com/Dandandan)) + +**Closed issues:** + +- arrow::util::pretty::pretty\_format\_batches missing [\#769](https://github.com/apache/arrow-datafusion/issues/769) +- move the `assert_batches_eq!` macros to a non part of datafusion [\#745](https://github.com/apache/arrow-datafusion/issues/745) +- fix an issue where aliases are not respected in generating downstream schemas in window expr [\#592](https://github.com/apache/arrow-datafusion/issues/592) +- make the planner to print more succinct and useful information in window function explain clause [\#526](https://github.com/apache/arrow-datafusion/issues/526) +- move window frame module to be in `logical_plan` [\#517](https://github.com/apache/arrow-datafusion/issues/517) +- use a more rust idiomatic way of handling nth\_value [\#448](https://github.com/apache/arrow-datafusion/issues/448) +- create a test with more than one partition for window functions [\#435](https://github.com/apache/arrow-datafusion/issues/435) +- COUNT DISTINCT does not support for `Boolean` [\#202](https://github.com/apache/arrow-datafusion/issues/202) +- Read CSV format text from stdin or memory [\#198](https://github.com/apache/arrow-datafusion/issues/198) +- Fix null handling hash join [\#195](https://github.com/apache/arrow-datafusion/issues/195) +- Allow TableProviders to indicate their type for the information schema [\#191](https://github.com/apache/arrow-datafusion/issues/191) +- Make DataFrame extensible [\#190](https://github.com/apache/arrow-datafusion/issues/190) +- TPC-H Query 19 [\#170](https://github.com/apache/arrow-datafusion/issues/170) +- TPC-H Query 7 [\#161](https://github.com/apache/arrow-datafusion/issues/161) +- Upgrade hashbrown to 0.10 [\#151](https://github.com/apache/arrow-datafusion/issues/151) +- Implement vectorized hashing for hash aggregate [\#149](https://github.com/apache/arrow-datafusion/issues/149) +- More efficient LEFT join implementation [\#143](https://github.com/apache/arrow-datafusion/issues/143) +- Implement vectorized hashing [\#142](https://github.com/apache/arrow-datafusion/issues/142) +- RFC Roadmap for 2021 \(DataFusion\) [\#140](https://github.com/apache/arrow-datafusion/issues/140) +- Implement hash partitioning [\#131](https://github.com/apache/arrow-datafusion/issues/131) +- Grouping by column position [\#110](https://github.com/apache/arrow-datafusion/issues/110) +- \[Datafusion\] GROUP BY with a high cardinality doesn't seem to finish [\#107](https://github.com/apache/arrow-datafusion/issues/107) +- \[Rust\] Add support for JSON data sources [\#103](https://github.com/apache/arrow-datafusion/issues/103) +- \[Rust\] Implement metrics framework [\#95](https://github.com/apache/arrow-datafusion/issues/95) +- Publically export Arrow crate from datafusion [\#36](https://github.com/apache/arrow-datafusion/issues/36) +- Implement hash-partitioned hash aggregate [\#27](https://github.com/apache/arrow-datafusion/issues/27) +- Consider using GitHub pages for DataFusion/Ballista documentation [\#18](https://github.com/apache/arrow-datafusion/issues/18) +- Update "repository" in Cargo.toml [\#16](https://github.com/apache/arrow-datafusion/issues/16) + +**Merged pull requests:** + +- Remove GroupByScalar and use ScalarValue in preparation for supporting null values in GroupBy [\#786](https://github.com/apache/arrow-datafusion/pull/786) ([alamb](https://github.com/alamb)) +- Use consistent version of string\_to\_timestamp\_nanos in DataFusion [\#767](https://github.com/apache/arrow-datafusion/pull/767) ([alamb](https://github.com/alamb)) +- fix 226, make `concat`, `concat_ws`, and `random` work with `Python` crate [\#761](https://github.com/apache/arrow-datafusion/pull/761) ([Jimexist](https://github.com/Jimexist)) +- Test for parquet pruning disabling [\#754](https://github.com/apache/arrow-datafusion/pull/754) ([alamb](https://github.com/alamb)) +- Add explain verbose with limit push down [\#751](https://github.com/apache/arrow-datafusion/pull/751) ([Jimexist](https://github.com/Jimexist)) +- Move assert\_batches\_eq! macros to test\_utils.rs [\#746](https://github.com/apache/arrow-datafusion/pull/746) ([alamb](https://github.com/alamb)) +- Show optimized physical and logical plans in EXPLAIN [\#744](https://github.com/apache/arrow-datafusion/pull/744) ([alamb](https://github.com/alamb)) +- update `python` crate to support latest pyo3 syntax and gil sematics [\#741](https://github.com/apache/arrow-datafusion/pull/741) ([Jimexist](https://github.com/Jimexist)) +- update `python` crate dependencies [\#740](https://github.com/apache/arrow-datafusion/pull/740) ([Jimexist](https://github.com/Jimexist)) +- provide more details on required .parquet file extension error message [\#729](https://github.com/apache/arrow-datafusion/pull/729) ([Jimexist](https://github.com/Jimexist)) +- split up windows functions into a dedicated module with separate files [\#724](https://github.com/apache/arrow-datafusion/pull/724) ([Jimexist](https://github.com/Jimexist)) +- Use pytest in integration test [\#715](https://github.com/apache/arrow-datafusion/pull/715) ([Jimexist](https://github.com/Jimexist)) +- replace once iter chain with array::IntoIter [\#704](https://github.com/apache/arrow-datafusion/pull/704) ([houqp](https://github.com/houqp)) +- avoid iterator materialization in column index lookup [\#703](https://github.com/apache/arrow-datafusion/pull/703) ([houqp](https://github.com/houqp)) +- Fix build with 1.52.1 [\#696](https://github.com/apache/arrow-datafusion/pull/696) ([alamb](https://github.com/alamb)) +- Fix test output due to logical merge conflict [\#694](https://github.com/apache/arrow-datafusion/pull/694) ([alamb](https://github.com/alamb)) +- Fix typo in DEVELOPERS.md [\#692](https://github.com/apache/arrow-datafusion/pull/692) ([lvheyang](https://github.com/lvheyang)) +- add more integration tests [\#668](https://github.com/apache/arrow-datafusion/pull/668) ([Jimexist](https://github.com/Jimexist)) +- Bump arrow and parquet versions to 4.4 [\#654](https://github.com/apache/arrow-datafusion/pull/654) ([toddtreece](https://github.com/toddtreece)) +- Add query 15 to TPC-H queries [\#645](https://github.com/apache/arrow-datafusion/pull/645) ([Dandandan](https://github.com/Dandandan)) +- Improve error message and comments [\#641](https://github.com/apache/arrow-datafusion/pull/641) ([alamb](https://github.com/alamb)) +- add integration tests for rank, dense\_rank, fix last\_value evaluation with rank [\#638](https://github.com/apache/arrow-datafusion/pull/638) ([Jimexist](https://github.com/Jimexist)) +- round trip TPCH queries in tests [\#630](https://github.com/apache/arrow-datafusion/pull/630) ([houqp](https://github.com/houqp)) +- use Into\ as argument type wherever applicable [\#615](https://github.com/apache/arrow-datafusion/pull/615) ([houqp](https://github.com/houqp)) +- reuse alias map in aggregate logical planning and refactor position resolution [\#606](https://github.com/apache/arrow-datafusion/pull/606) ([Jimexist](https://github.com/Jimexist)) +- fix clippy warnings [\#581](https://github.com/apache/arrow-datafusion/pull/581) ([Jimexist](https://github.com/Jimexist)) +- Add benchmarks to window function queries [\#564](https://github.com/apache/arrow-datafusion/pull/564) ([Jimexist](https://github.com/Jimexist)) +- reuse code for now function expr creation [\#548](https://github.com/apache/arrow-datafusion/pull/548) ([houqp](https://github.com/houqp)) +- turn on clippy rule for needless borrow [\#545](https://github.com/apache/arrow-datafusion/pull/545) ([Jimexist](https://github.com/Jimexist)) +- Refactor hash aggregates's planner building code [\#539](https://github.com/apache/arrow-datafusion/pull/539) ([Jimexist](https://github.com/Jimexist)) +- Cleanup Repartition Exec code [\#538](https://github.com/apache/arrow-datafusion/pull/538) ([alamb](https://github.com/alamb)) +- reuse datafusion physical planner in ballista building from protobuf [\#532](https://github.com/apache/arrow-datafusion/pull/532) ([Jimexist](https://github.com/Jimexist)) +- remove redundant `into_iter()` calls [\#527](https://github.com/apache/arrow-datafusion/pull/527) ([Jimexist](https://github.com/Jimexist)) +- Fix 517 - move `window_frames` module to `logical_plan` [\#518](https://github.com/apache/arrow-datafusion/pull/518) ([Jimexist](https://github.com/Jimexist)) +- Refactor window aggregation, simplify batch processing logic [\#516](https://github.com/apache/arrow-datafusion/pull/516) ([Jimexist](https://github.com/Jimexist)) +- Add datafusion::test\_util, resolve test data paths without env vars [\#498](https://github.com/apache/arrow-datafusion/pull/498) ([mluts](https://github.com/mluts)) +- Avoid warnings in tests when compiling without default features [\#489](https://github.com/apache/arrow-datafusion/pull/489) ([alamb](https://github.com/alamb)) +- update cargo.toml in python crate and fix unit test due to hash joins [\#483](https://github.com/apache/arrow-datafusion/pull/483) ([Jimexist](https://github.com/Jimexist)) +- use prettier check in CI [\#453](https://github.com/apache/arrow-datafusion/pull/453) ([Jimexist](https://github.com/Jimexist)) +- Optimize `nth_value`, remove `first_value`, `last_value` structs and use idiomatic rust style [\#452](https://github.com/apache/arrow-datafusion/pull/452) ([Jimexist](https://github.com/Jimexist)) +- Fixed typo / logical merge conflict [\#433](https://github.com/apache/arrow-datafusion/pull/433) ([jorgecarleitao](https://github.com/jorgecarleitao)) +- include test data and add aggregation tests in integration test [\#425](https://github.com/apache/arrow-datafusion/pull/425) ([Jimexist](https://github.com/Jimexist)) +- Add some padding around the logo [\#411](https://github.com/apache/arrow-datafusion/pull/411) ([parthsarthy](https://github.com/parthsarthy)) +- Benchmark subcommand to distinguish between DataFusion and Ballista [\#402](https://github.com/apache/arrow-datafusion/pull/402) ([jgoday](https://github.com/jgoday)) +- refactor datafusion/`scalar_value` to use more macro and avoid dup code [\#392](https://github.com/apache/arrow-datafusion/pull/392) ([Jimexist](https://github.com/Jimexist)) +- Update TPC-H benchmark to show physical plan when debug mode is enabled [\#386](https://github.com/apache/arrow-datafusion/pull/386) ([andygrove](https://github.com/andygrove)) +- Update arrow dependencies again [\#341](https://github.com/apache/arrow-datafusion/pull/341) ([alamb](https://github.com/alamb)) +- Update arrow-rs deps [\#317](https://github.com/apache/arrow-datafusion/pull/317) ([alamb](https://github.com/alamb)) +- Update PR template by commenting out instructions [\#315](https://github.com/apache/arrow-datafusion/pull/315) ([alamb](https://github.com/alamb)) +- fix clippy warning [\#286](https://github.com/apache/arrow-datafusion/pull/286) ([Jimexist](https://github.com/Jimexist)) +- add integration test to compare datafusion-cli against psql [\#281](https://github.com/apache/arrow-datafusion/pull/281) ([Jimexist](https://github.com/Jimexist)) +- Update arrow deps [\#269](https://github.com/apache/arrow-datafusion/pull/269) ([alamb](https://github.com/alamb)) +- Use multi-stage build dockerfile in datafusion-cli and reduce image size from 2.16GB to 89.9MB [\#266](https://github.com/apache/arrow-datafusion/pull/266) ([Jimexist](https://github.com/Jimexist)) +- Enable redundant\_field\_names clippy lint [\#261](https://github.com/apache/arrow-datafusion/pull/261) ([Dandandan](https://github.com/Dandandan)) +- fix clippy lint [\#259](https://github.com/apache/arrow-datafusion/pull/259) ([alamb](https://github.com/alamb)) +- Move datafusion-cli to new crate [\#231](https://github.com/apache/arrow-datafusion/pull/231) ([Dandandan](https://github.com/Dandandan)) +- Make test join\_with\_hash\_collision deterministic [\#229](https://github.com/apache/arrow-datafusion/pull/229) ([Dandandan](https://github.com/Dandandan)) +- Update arrow-rs deps \(to fix build due to flatbuffers update\) [\#224](https://github.com/apache/arrow-datafusion/pull/224) ([alamb](https://github.com/alamb)) +- Use standard make\_null\_array for CASE [\#223](https://github.com/apache/arrow-datafusion/pull/223) ([alamb](https://github.com/alamb)) +- update arrow-rs deps to latest master [\#216](https://github.com/apache/arrow-datafusion/pull/216) ([alamb](https://github.com/alamb)) +- MINOR: Remove empty rust dir [\#61](https://github.com/apache/arrow-datafusion/pull/61) ([andygrove](https://github.com/andygrove)) + + + +\* *This Changelog was automatically generated by [github_changelog_generator](https://github.com/github-changelog-generator/github-changelog-generator)* diff --git a/datafusion/Cargo.toml b/datafusion/Cargo.toml index 2f1e997c3596f..1560b398f7733 100644 --- a/datafusion/Cargo.toml +++ b/datafusion/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion" description = "DataFusion is an in-memory query engine that uses Apache Arrow as the memory model" -version = "4.0.0-SNAPSHOT" +version = "5.0.0" homepage = "https://github.com/apache/arrow-datafusion" repository = "https://github.com/apache/arrow-datafusion" readme = "../README.md" diff --git a/dev/release/update_change_log-ballista.sh b/dev/release/update_change_log-ballista.sh new file mode 100755 index 0000000000000..68193156622a2 --- /dev/null +++ b/dev/release/update_change_log-ballista.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Usage: +# CHANGELOG_GITHUB_TOKEN= ./update_change_log-ballista.sh + +SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)" + +CURRENT_VER=$(grep version "${SOURCE_TOP_DIR}/ballista/rust/client/Cargo.toml" | head -n 1 | awk '{print $3}' | tr -d '"') +${SOURCE_DIR}/update_change_log.sh ballista 4.0.0 "ballista-${CURRENT_VER}" diff --git a/dev/release/update_change_log-datafusion.sh b/dev/release/update_change_log-datafusion.sh new file mode 100755 index 0000000000000..f0f455ad1c9b5 --- /dev/null +++ b/dev/release/update_change_log-datafusion.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Usage: +# CHANGELOG_GITHUB_TOKEN= ./update_change_log-datafusion.sh + +SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)" + +CURRENT_VER=$(grep version "${SOURCE_TOP_DIR}/datafusion/Cargo.toml" | head -n 1 | awk '{print $3}' | tr -d '"') +${SOURCE_DIR}/update_change_log.sh datafusion 4.0.0 "${CURRENT_VER}" diff --git a/dev/release/update_change_log-python.sh b/dev/release/update_change_log-python.sh new file mode 100755 index 0000000000000..a48a5b657c5f3 --- /dev/null +++ b/dev/release/update_change_log-python.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Usage: +# CHANGELOG_GITHUB_TOKEN= ./update_change_log-python.sh + +SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)" + +CURRENT_VER=$(grep version "${SOURCE_TOP_DIR}/python/Cargo.toml" | head -n 1 | awk '{print $3}' | tr -d '"') +${SOURCE_DIR}/update_change_log.sh python 4.0.0 "python-${CURRENT_VER}" diff --git a/dev/release/update_change_log.sh b/dev/release/update_change_log.sh index 4ee9e2eb1e498..0c9c2332ce704 100755 --- a/dev/release/update_change_log.sh +++ b/dev/release/update_change_log.sh @@ -27,13 +27,23 @@ # arrow-datafusion/.github_changelog_generator # # Usage: -# CHANGELOG_GITHUB_TOKEN= ./update_change_log.sh +# CHANGELOG_GITHUB_TOKEN= ./update_change_log.sh set -e SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)" +if [[ "$#" -ne 3 ]]; then + echo "USAGE: $0 PROJECT FROM_VER TO_VER" + exit 1 +fi + +PROJECT=$1 +FROM_VER=$2 +TO_VER=$3 +OUTPUT_PATH="${PROJECT}/CHANGELOG.md" + pushd ${SOURCE_TOP_DIR} docker run -it --rm \ -e CHANGELOG_GITHUB_TOKEN=$CHANGELOG_GITHUB_TOKEN \ @@ -41,7 +51,30 @@ docker run -it --rm \ githubchangeloggenerator/github-changelog-generator \ --user apache \ --project arrow-datafusion \ - --since-tag 4.0.0 \ - --future-release 5.0.0 + --since-tag "${FROM_VER}" \ + --include-labels "${PROJECT}" \ + --output "${OUTPUT_PATH}" \ + --future-release "${TO_VER}" + +sed -i "s/\\\n/\n\n/" "${OUTPUT_PATH}" + +echo ' +' | cat - "${OUTPUT_PATH}" > "${OUTPUT_PATH}".tmp +mv "${OUTPUT_PATH}".tmp "${OUTPUT_PATH}" diff --git a/dev/update_arrow_deps.py b/dev/update_arrow_deps.py index 44bdf4235d1c6..69fcdc84ab8fd 100755 --- a/dev/update_arrow_deps.py +++ b/dev/update_arrow_deps.py @@ -17,7 +17,7 @@ # limitations under the License. # -# Script that updates the arrow dependencies in datafusion and ballista, locall +# Script that updates the arrow dependencies in datafusion and ballista, locally # # installation: # pip install tomlkit requests diff --git a/dev/update_ballista_versions.py b/dev/update_ballista_versions.py new file mode 100755 index 0000000000000..cb75a9a71c479 --- /dev/null +++ b/dev/update_ballista_versions.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Script that updates verions for ballista crates, locally +# +# dependencies: +# pip install tomlkit + +import os +import argparse +from pathlib import Path +import tomlkit + + +def update_cargo_toml(cargo_toml: str, new_version: str): + print(f'updating {cargo_toml}') + with open(cargo_toml) as f: + data = f.read() + + doc = tomlkit.parse(data) + doc.get('package')['version'] = new_version + + with open(cargo_toml, 'w') as f: + f.write(tomlkit.dumps(doc)) + + +def main(): + parser = argparse.ArgumentParser(description='Update ballista crate versions.') + parser.add_argument('new_version', type=str, help='new ballista version') + args = parser.parse_args() + + repo_root = Path(__file__).parent.parent.absolute() + ballista_crates = set([ + os.path.join(repo_root, rel_path, "Cargo.toml") + for rel_path in [ + 'ballista-examples', + 'ballista/rust/core', + 'ballista/rust/scheduler', + 'ballista/rust/executor', + 'ballista/rust/client', + ] + ]) + new_version = args.new_version + + print(f'Updating ballista versions in {repo_root} to {new_version}') + + for cargo_toml in ballista_crates: + update_cargo_toml(cargo_toml, new_version) + + +if __name__ == "__main__": + main() diff --git a/python/CHANGELOG.md b/python/CHANGELOG.md new file mode 100644 index 0000000000000..d8bdd13e8ce0a --- /dev/null +++ b/python/CHANGELOG.md @@ -0,0 +1,70 @@ + + +For older versions, see [apache/arrow/CHANGELOG.md](https://github.com/apache/arrow/blob/master/CHANGELOG.md) + +# Changelog + +## [python-0.3.0](https://github.com/apache/arrow-datafusion/tree/python-0.3.0) (2021-07-31) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/4.0.0...python-0.3.0) + +**Implemented enhancements:** + +- add more math functions and unit tests to `python` crate [\#748](https://github.com/apache/arrow-datafusion/pull/748) ([Jimexist](https://github.com/Jimexist)) +- Implement missing join types for Python dataframe [\#503](https://github.com/apache/arrow-datafusion/pull/503) ([Dandandan](https://github.com/Dandandan)) +- Add missing functions to python [\#388](https://github.com/apache/arrow-datafusion/pull/388) ([jgoday](https://github.com/jgoday)) + +**Fixed bugs:** + +- fix maturin version in pyproject.toml [\#756](https://github.com/apache/arrow-datafusion/pull/756) ([Jimexist](https://github.com/Jimexist)) +- fix pyarrow type id mapping in `python` crate [\#742](https://github.com/apache/arrow-datafusion/pull/742) ([Jimexist](https://github.com/Jimexist)) + +**Closed issues:** + +- arrow::util::pretty::pretty\_format\_batches missing [\#769](https://github.com/apache/arrow-datafusion/issues/769) +- move the `assert_batches_eq!` macros to a non part of datafusion [\#745](https://github.com/apache/arrow-datafusion/issues/745) +- fix an issue where aliases are not respected in generating downstream schemas in window expr [\#592](https://github.com/apache/arrow-datafusion/issues/592) +- make the planner to print more succinct and useful information in window function explain clause [\#526](https://github.com/apache/arrow-datafusion/issues/526) +- move window frame module to be in `logical_plan` [\#517](https://github.com/apache/arrow-datafusion/issues/517) +- use a more rust idiomatic way of handling nth\_value [\#448](https://github.com/apache/arrow-datafusion/issues/448) +- create a test with more than one partition for window functions [\#435](https://github.com/apache/arrow-datafusion/issues/435) +- Implement hash-partitioned hash aggregate [\#27](https://github.com/apache/arrow-datafusion/issues/27) +- Consider using GitHub pages for DataFusion/Ballista documentation [\#18](https://github.com/apache/arrow-datafusion/issues/18) +- Update "repository" in Cargo.toml [\#16](https://github.com/apache/arrow-datafusion/issues/16) + +**Merged pull requests:** + +- fix python binding for `concat`, `concat_ws`, and `random` [\#768](https://github.com/apache/arrow-datafusion/pull/768) ([Jimexist](https://github.com/Jimexist)) +- fix 226, make `concat`, `concat_ws`, and `random` work with `Python` crate [\#761](https://github.com/apache/arrow-datafusion/pull/761) ([Jimexist](https://github.com/Jimexist)) +- fix python crate with the changes to logical plan builder [\#650](https://github.com/apache/arrow-datafusion/pull/650) ([Jimexist](https://github.com/Jimexist)) +- use nightly nightly-2021-05-10 [\#536](https://github.com/apache/arrow-datafusion/pull/536) ([Jimexist](https://github.com/Jimexist)) +- Define the unittests using pytest [\#493](https://github.com/apache/arrow-datafusion/pull/493) ([kszucs](https://github.com/kszucs)) +- use requirements.txt to formalize python deps [\#484](https://github.com/apache/arrow-datafusion/pull/484) ([Jimexist](https://github.com/Jimexist)) +- update cargo.toml in python crate and fix unit test due to hash joins [\#483](https://github.com/apache/arrow-datafusion/pull/483) ([Jimexist](https://github.com/Jimexist)) +- simplify python function definitions [\#477](https://github.com/apache/arrow-datafusion/pull/477) ([Jimexist](https://github.com/Jimexist)) +- Expose DataFrame::sort in the python bindings [\#469](https://github.com/apache/arrow-datafusion/pull/469) ([kszucs](https://github.com/kszucs)) +- Revert "Revert "Add datafusion-python \(\#69\)" \(\#257\)" [\#270](https://github.com/apache/arrow-datafusion/pull/270) ([andygrove](https://github.com/andygrove)) +- Revert "Add datafusion-python \(\#69\)" [\#257](https://github.com/apache/arrow-datafusion/pull/257) ([andygrove](https://github.com/andygrove)) +- update arrow-rs deps to latest master [\#216](https://github.com/apache/arrow-datafusion/pull/216) ([alamb](https://github.com/alamb)) +- Add datafusion-python [\#69](https://github.com/apache/arrow-datafusion/pull/69) ([jorgecarleitao](https://github.com/jorgecarleitao)) + + + +\* *This Changelog was automatically generated by [github_changelog_generator](https://github.com/github-changelog-generator/github-changelog-generator)* diff --git a/python/Cargo.toml b/python/Cargo.toml index fe84e5234c333..60cc74dfc89e0 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "datafusion" -version = "0.2.1" +version = "0.3.0" homepage = "https://github.com/apache/arrow" repository = "https://github.com/apache/arrow" authors = ["Apache Arrow "]