From 2517bbb9f1b4d355235c102eb6b6fd669734c43b Mon Sep 17 00:00:00 2001
From: Qingping Hou <dave2008713@gmail.com>
Date: Fri, 30 Jul 2021 00:07:24 -0700
Subject: [PATCH] create changelog for datafusion and ballista release

Created changelog for the following projects:

datafusion 5.0.0
python 0.3.0
ballista 0.5.0

Other changes in automation:

* updated dev/release/update_change_log.sh to take subproject as argument
* added dev/update_ballista_versions.py to help update ballista crate versions.
---
 .github_changelog_generator                 |   6 +-
 ballista-examples/Cargo.toml                |   2 +-
 ballista/CHANGELOG.md                       | 168 +++++++++++
 ballista/rust/client/Cargo.toml             |   4 +-
 ballista/rust/core/Cargo.toml               |   2 +-
 ballista/rust/executor/Cargo.toml           |   2 +-
 ballista/rust/scheduler/Cargo.toml          |   2 +-
 datafusion/CHANGELOG.md                     | 303 ++++++++++++++++++++
 datafusion/Cargo.toml                       |   2 +-
 dev/release/update_change_log-ballista.sh   |  28 ++
 dev/release/update_change_log-datafusion.sh |  28 ++
 dev/release/update_change_log-python.sh     |  28 ++
 dev/release/update_change_log.sh            |  41 ++-
 dev/update_arrow_deps.py                    |   2 +-
 dev/update_ballista_versions.py             |  68 +++++
 python/CHANGELOG.md                         |  70 +++++
 python/Cargo.toml                           |   2 +-
 17 files changed, 742 insertions(+), 16 deletions(-)
 create mode 100644 ballista/CHANGELOG.md
 create mode 100644 datafusion/CHANGELOG.md
 create mode 100755 dev/release/update_change_log-ballista.sh
 create mode 100755 dev/release/update_change_log-datafusion.sh
 create mode 100755 dev/release/update_change_log-python.sh
 create mode 100755 dev/update_ballista_versions.py
 create mode 100644 python/CHANGELOG.md

diff --git a/.github_changelog_generator b/.github_changelog_generator
index 49d20dcd9e5ce..6ee6508b7216f 100644
--- a/.github_changelog_generator
+++ b/.github_changelog_generator
@@ -21,10 +21,10 @@
 # point to the old changelog in apache/arrow
 front-matter=For older versions, see [apache/arrow/CHANGELOG.md](https://github.com/apache/arrow/blob/master/CHANGELOG.md)\n
 # some issues are just documentation
-add-sections={"documentation":{"prefix":"**Documentation updates:**","labels":["documentation"]}}
+add-sections={"documentation":{"prefix":"**Documentation updates:**","labels":["documentation"]},"performance":{"prefix":"**Performance improvements:**","labels":["performance"]}}
 # uncomment to not show PRs. TBD if we shown them or not.
 #pull-requests=false
 # so that the component is shown associated with the issue
-issue-line-labels=ballista,datafusion,python
+issue-line-labels=sql
 exclude-labels=development-process,invalid
-breaking_labels=api-change
+breaking-labels=api change
diff --git a/ballista-examples/Cargo.toml b/ballista-examples/Cargo.toml
index b7d40223c4693..6b9ed171ac59b 100644
--- a/ballista-examples/Cargo.toml
+++ b/ballista-examples/Cargo.toml
@@ -18,7 +18,7 @@
 [package]
 name = "ballista-examples"
 description = "Ballista usage examples"
-version = "0.5.0-SNAPSHOT"
+version = "0.5.0"
 homepage = "https://github.com/apache/arrow-datafusion"
 repository = "https://github.com/apache/arrow-datafusion"
 authors = ["Apache Arrow <dev@arrow.apache.org>"]
diff --git a/ballista/CHANGELOG.md b/ballista/CHANGELOG.md
new file mode 100644
index 0000000000000..2da49e0791a9a
--- /dev/null
+++ b/ballista/CHANGELOG.md
@@ -0,0 +1,168 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+For older versions, see [apache/arrow/CHANGELOG.md](https://github.com/apache/arrow/blob/master/CHANGELOG.md)
+
+# Changelog
+
+## [ballista-0.5.0](https://github.com/apache/arrow-datafusion/tree/ballista-0.5.0) (2021-07-31)
+
+[Full Changelog](https://github.com/apache/arrow-datafusion/compare/4.0.0...ballista-0.5.0)
+
+**Breaking changes:**
+
+- Box ScalarValue:Lists, reduce size by half size [\#788](https://github.com/apache/arrow-datafusion/pull/788) ([alamb](https://github.com/alamb))
+- Support DataFrame.collect for Ballista DataFrames [\#785](https://github.com/apache/arrow-datafusion/pull/785) ([andygrove](https://github.com/andygrove))
+- JOIN conditions are order dependent [\#778](https://github.com/apache/arrow-datafusion/pull/778) ([seddonm1](https://github.com/seddonm1))
+- UnresolvedShuffleExec should represent a single shuffle [\#727](https://github.com/apache/arrow-datafusion/pull/727) ([andygrove](https://github.com/andygrove))
+- Ballista: Make shuffle partitions configurable in benchmarks [\#702](https://github.com/apache/arrow-datafusion/pull/702) ([andygrove](https://github.com/andygrove))
+- Rename MergeExec to CoalescePartitionsExec [\#635](https://github.com/apache/arrow-datafusion/pull/635) ([andygrove](https://github.com/andygrove))
+- Ballista: Rename QueryStageExec to ShuffleWriterExec [\#633](https://github.com/apache/arrow-datafusion/pull/633) ([andygrove](https://github.com/andygrove))
+- fix 593, reduce cloning by taking ownership in logical planner's `from` fn [\#610](https://github.com/apache/arrow-datafusion/pull/610) ([Jimexist](https://github.com/Jimexist))
+- fix join column handling logic for `On` and `Using` constraints [\#605](https://github.com/apache/arrow-datafusion/pull/605) ([houqp](https://github.com/houqp))
+- Move ballista standalone mode to client [\#589](https://github.com/apache/arrow-datafusion/pull/589) ([edrevo](https://github.com/edrevo))
+- Ballista: Implement map-side shuffle [\#543](https://github.com/apache/arrow-datafusion/pull/543) ([andygrove](https://github.com/andygrove))
+- ShuffleReaderExec now supports multiple locations per partition [\#541](https://github.com/apache/arrow-datafusion/pull/541) ([andygrove](https://github.com/andygrove))
+- Make external hostname in executor optional [\#232](https://github.com/apache/arrow-datafusion/pull/232) ([edrevo](https://github.com/edrevo))
+- Remove namespace from executors [\#75](https://github.com/apache/arrow-datafusion/pull/75) ([edrevo](https://github.com/edrevo))
+- Support qualified columns in queries [\#55](https://github.com/apache/arrow-datafusion/pull/55) ([houqp](https://github.com/houqp))
+- Read CSV format text from stdin or memory [\#54](https://github.com/apache/arrow-datafusion/pull/54) ([heymind](https://github.com/heymind))
+- Remove Ballista DataFrame [\#48](https://github.com/apache/arrow-datafusion/pull/48) ([andygrove](https://github.com/andygrove))
+- Use atomics for SQLMetric implementation, remove unused name field [\#25](https://github.com/apache/arrow-datafusion/pull/25) ([returnString](https://github.com/returnString))
+
+**Implemented enhancements:**
+
+- Support DataFrame.collect for Ballista DataFrames [\#787](https://github.com/apache/arrow-datafusion/issues/787)
+- Ballista: Prep for supporting shuffle correctly, part one [\#736](https://github.com/apache/arrow-datafusion/issues/736)
+- Ballista: Implement physical plan serde for ShuffleWriterExec [\#710](https://github.com/apache/arrow-datafusion/issues/710)
+- Ballista: Finish implementing shuffle mechanism [\#707](https://github.com/apache/arrow-datafusion/issues/707)
+- Rename QueryStageExec to ShuffleWriterExec [\#542](https://github.com/apache/arrow-datafusion/issues/542)
+- Ballista ShuffleReaderExec should be able to read from multiple locations per partition [\#540](https://github.com/apache/arrow-datafusion/issues/540)
+- \[Ballista\] Use deployments in k8s user guide [\#473](https://github.com/apache/arrow-datafusion/issues/473)
+- Ballista refactor QueryStageExec in preparation for map-side shuffle [\#458](https://github.com/apache/arrow-datafusion/issues/458)
+- Ballista: Implement map-side of shuffle [\#456](https://github.com/apache/arrow-datafusion/issues/456)
+- Refactor Ballista to separate Flight logic from execution logic [\#449](https://github.com/apache/arrow-datafusion/issues/449)
+- Use published versions of arrow rather than github shas [\#393](https://github.com/apache/arrow-datafusion/issues/393)
+- BallistaContext::collect\(\) logging is too noisy [\#352](https://github.com/apache/arrow-datafusion/issues/352)
+- Update Ballista to use new physical plan formatter utility [\#343](https://github.com/apache/arrow-datafusion/issues/343)
+- Add Ballista Getting Started documentation [\#329](https://github.com/apache/arrow-datafusion/issues/329)
+- Remove references to ballistacompute Docker Hub repo [\#325](https://github.com/apache/arrow-datafusion/issues/325)
+- Implement scalable distributed joins [\#63](https://github.com/apache/arrow-datafusion/issues/63)
+- Remove hard-coded Ballista version from scripts [\#32](https://github.com/apache/arrow-datafusion/issues/32)
+- Implement streaming versions of Dataframe.collect methods [\#789](https://github.com/apache/arrow-datafusion/pull/789) ([andygrove](https://github.com/andygrove))
+- Ballista shuffle is finally working as intended, providing scalable distributed joins [\#750](https://github.com/apache/arrow-datafusion/pull/750) ([andygrove](https://github.com/andygrove))
+- Update to use arrow 5.0 [\#721](https://github.com/apache/arrow-datafusion/pull/721) ([alamb](https://github.com/alamb))
+- Implement serde for ShuffleWriterExec [\#712](https://github.com/apache/arrow-datafusion/pull/712) ([andygrove](https://github.com/andygrove))
+- dedup using join column in wildcard expansion [\#678](https://github.com/apache/arrow-datafusion/pull/678) ([houqp](https://github.com/houqp))
+- Implement metrics for shuffle read and write [\#676](https://github.com/apache/arrow-datafusion/pull/676) ([andygrove](https://github.com/andygrove))
+- Remove hard-coded PartitionMode from Ballista serde [\#637](https://github.com/apache/arrow-datafusion/pull/637) ([andygrove](https://github.com/andygrove))
+- Ballista: Implement scalable distributed joins [\#634](https://github.com/apache/arrow-datafusion/pull/634) ([andygrove](https://github.com/andygrove))
+- Add Keda autoscaling for ballista in k8s [\#586](https://github.com/apache/arrow-datafusion/pull/586) ([edrevo](https://github.com/edrevo))
+- Add some resiliency to lost executors [\#568](https://github.com/apache/arrow-datafusion/pull/568) ([edrevo](https://github.com/edrevo))
+- Add `partition by` constructs in window functions and modify logical planning [\#501](https://github.com/apache/arrow-datafusion/pull/501) ([Jimexist](https://github.com/Jimexist))
+- Support anti join [\#482](https://github.com/apache/arrow-datafusion/pull/482) ([Dandandan](https://github.com/Dandandan))
+- add `order by` construct in window function and logical plans [\#463](https://github.com/apache/arrow-datafusion/pull/463) ([Jimexist](https://github.com/Jimexist))
+- Refactor Ballista executor so that FlightService delegates to an Executor struct [\#450](https://github.com/apache/arrow-datafusion/pull/450) ([andygrove](https://github.com/andygrove))
+- implement lead and lag built-in window function [\#429](https://github.com/apache/arrow-datafusion/pull/429) ([Jimexist](https://github.com/Jimexist))
+- Implement fmt\_as for ShuffleReaderExec [\#400](https://github.com/apache/arrow-datafusion/pull/400) ([andygrove](https://github.com/andygrove))
+- Add window expression part 1 - logical and physical planning, structure, to/from proto, and explain, for empty over clause only [\#334](https://github.com/apache/arrow-datafusion/pull/334) ([Jimexist](https://github.com/Jimexist))
+- \[breaking change\] fix 265, log should be log10, and add ln [\#271](https://github.com/apache/arrow-datafusion/pull/271) ([Jimexist](https://github.com/Jimexist))
+- Allow table providers to indicate their type for catalog metadata [\#205](https://github.com/apache/arrow-datafusion/pull/205) ([returnString](https://github.com/returnString))
+- Add query 19 to TPC-H regression tests [\#59](https://github.com/apache/arrow-datafusion/pull/59) ([Dandandan](https://github.com/Dandandan))
+- Use arrow eq kernels in CaseWhen expression evaluation [\#52](https://github.com/apache/arrow-datafusion/pull/52) ([Dandandan](https://github.com/Dandandan))
+- Add option param for standalone mode [\#42](https://github.com/apache/arrow-datafusion/pull/42) ([djKooks](https://github.com/djKooks))
+- \[DataFusion\] Optimize hash join inner workings, null handling fix [\#24](https://github.com/apache/arrow-datafusion/pull/24) ([Dandandan](https://github.com/Dandandan))
+- \[Ballista\] Docker files for ui [\#22](https://github.com/apache/arrow-datafusion/pull/22) ([msathis](https://github.com/msathis))
+
+**Fixed bugs:**
+
+- Ballista: UnresolvedShuffleExec should only have a single stage\_id [\#726](https://github.com/apache/arrow-datafusion/issues/726)
+- Ballista integration tests are failing [\#623](https://github.com/apache/arrow-datafusion/issues/623)
+- Integration test build failure due to arrow-rs using unstable feature [\#596](https://github.com/apache/arrow-datafusion/issues/596)
+- `cargo build` cannot build the project [\#531](https://github.com/apache/arrow-datafusion/issues/531)
+- ShuffleReaderExec does not get formatted correctly in displayable physical plan [\#399](https://github.com/apache/arrow-datafusion/issues/399)
+- Ballista: Prep for fixing shuffle mechansim, part 1 [\#738](https://github.com/apache/arrow-datafusion/pull/738) ([andygrove](https://github.com/andygrove))
+- Ballista: Shuffle write bug fix [\#714](https://github.com/apache/arrow-datafusion/pull/714) ([andygrove](https://github.com/andygrove))
+- honor table name for csv/parquet scan in ballista plan serde [\#629](https://github.com/apache/arrow-datafusion/pull/629) ([houqp](https://github.com/houqp))
+- MINOR: Fix integration tests by adding datafusion-cli module to docker image [\#322](https://github.com/apache/arrow-datafusion/pull/322) ([andygrove](https://github.com/andygrove))
+
+**Documentation updates:**
+
+- Add Ballista examples [\#775](https://github.com/apache/arrow-datafusion/pull/775) ([andygrove](https://github.com/andygrove))
+- Update ballista.proto link in architecture doc [\#502](https://github.com/apache/arrow-datafusion/pull/502) ([terrycorley](https://github.com/terrycorley))
+- Update k8s user guide to use deployments [\#474](https://github.com/apache/arrow-datafusion/pull/474) ([edrevo](https://github.com/edrevo))
+- use prettier to format md files [\#367](https://github.com/apache/arrow-datafusion/pull/367) ([Jimexist](https://github.com/Jimexist))
+- Make it easier for developers to find Ballista documentation [\#330](https://github.com/apache/arrow-datafusion/pull/330) ([andygrove](https://github.com/andygrove))
+- Instructions for cross-compiling Ballista to the Raspberry Pi [\#263](https://github.com/apache/arrow-datafusion/pull/263) ([andygrove](https://github.com/andygrove))
+- Add install guide in README [\#236](https://github.com/apache/arrow-datafusion/pull/236) ([djKooks](https://github.com/djKooks))
+
+**Performance improvements:**
+
+- Ballista: Avoid sleeping between polling for tasks [\#698](https://github.com/apache/arrow-datafusion/pull/698) ([Dandandan](https://github.com/Dandandan))
+- Make BallistaContext::collect streaming [\#535](https://github.com/apache/arrow-datafusion/pull/535) ([edrevo](https://github.com/edrevo))
+
+**Closed issues:**
+
+- arrow::util::pretty::pretty\_format\_batches missing [\#769](https://github.com/apache/arrow-datafusion/issues/769)
+- move the `assert_batches_eq!` macros to a non part of datafusion [\#745](https://github.com/apache/arrow-datafusion/issues/745)
+- fix an issue where aliases are not respected in generating downstream schemas in window expr [\#592](https://github.com/apache/arrow-datafusion/issues/592)
+- make the planner to print more succinct and useful information in window function explain clause [\#526](https://github.com/apache/arrow-datafusion/issues/526)
+- move window frame module to be in `logical_plan` [\#517](https://github.com/apache/arrow-datafusion/issues/517)
+- use a more rust idiomatic way of handling nth\_value [\#448](https://github.com/apache/arrow-datafusion/issues/448)
+- Make Ballista not depend on arrow directly [\#446](https://github.com/apache/arrow-datafusion/issues/446)
+- create a test with more than one partition for window functions [\#435](https://github.com/apache/arrow-datafusion/issues/435)
+- Implement hash-partitioned hash aggregate [\#27](https://github.com/apache/arrow-datafusion/issues/27)
+- Consider using GitHub pages for DataFusion/Ballista documentation [\#18](https://github.com/apache/arrow-datafusion/issues/18)
+- Add Ballista to default cargo workspace [\#17](https://github.com/apache/arrow-datafusion/issues/17)
+- Update "repository" in Cargo.toml [\#16](https://github.com/apache/arrow-datafusion/issues/16)
+- Consolidate TPC-H benchmarks [\#6](https://github.com/apache/arrow-datafusion/issues/6)
+- \[Ballista\] Fix integration test script [\#4](https://github.com/apache/arrow-datafusion/issues/4)
+- Ballista should not have separate DataFrame implementation [\#2](https://github.com/apache/arrow-datafusion/issues/2)
+
+**Merged pull requests:**
+
+- Fix: Update clippy lints for Rust 1.54 [\#794](https://github.com/apache/arrow-datafusion/pull/794) ([alamb](https://github.com/alamb))
+- MINOR: Remove unused Ballista query execution code path [\#732](https://github.com/apache/arrow-datafusion/pull/732) ([andygrove](https://github.com/andygrove))
+- \[fix\] benchmark run with compose [\#666](https://github.com/apache/arrow-datafusion/pull/666) ([rdettai](https://github.com/rdettai))
+- bring back dev scripts for ballista [\#648](https://github.com/apache/arrow-datafusion/pull/648) ([Jimexist](https://github.com/Jimexist))
+- Remove unnecessary mutex [\#639](https://github.com/apache/arrow-datafusion/pull/639) ([edrevo](https://github.com/edrevo))
+- round trip TPCH queries in tests [\#630](https://github.com/apache/arrow-datafusion/pull/630) ([houqp](https://github.com/houqp))
+- Fix build [\#627](https://github.com/apache/arrow-datafusion/pull/627) ([andygrove](https://github.com/andygrove))
+- in ballista also check for UI prettier changes [\#578](https://github.com/apache/arrow-datafusion/pull/578) ([Jimexist](https://github.com/Jimexist))
+- turn on clippy rule for needless borrow [\#545](https://github.com/apache/arrow-datafusion/pull/545) ([Jimexist](https://github.com/Jimexist))
+- reuse datafusion physical planner in ballista building from protobuf [\#532](https://github.com/apache/arrow-datafusion/pull/532) ([Jimexist](https://github.com/Jimexist))
+- update cargo.toml in python crate and fix unit test due to hash joins [\#483](https://github.com/apache/arrow-datafusion/pull/483) ([Jimexist](https://github.com/Jimexist))
+- make `VOLUME` declaration in tpch datagen docker absolute [\#466](https://github.com/apache/arrow-datafusion/pull/466) ([crepererum](https://github.com/crepererum))
+- Refactor QueryStageExec in preparation for implementing map-side shuffle [\#459](https://github.com/apache/arrow-datafusion/pull/459) ([andygrove](https://github.com/andygrove))
+- Simplified usage of `use arrow` in ballista. [\#447](https://github.com/apache/arrow-datafusion/pull/447) ([jorgecarleitao](https://github.com/jorgecarleitao))
+- Benchmark subcommand to distinguish between DataFusion and Ballista [\#402](https://github.com/apache/arrow-datafusion/pull/402) ([jgoday](https://github.com/jgoday))
+- \#352: BallistaContext::collect\(\) logging is too noisy [\#394](https://github.com/apache/arrow-datafusion/pull/394) ([jgoday](https://github.com/jgoday))
+- cleanup function return type fn [\#350](https://github.com/apache/arrow-datafusion/pull/350) ([Jimexist](https://github.com/Jimexist))
+- Update Ballista to use new physical plan formatter utility [\#344](https://github.com/apache/arrow-datafusion/pull/344) ([andygrove](https://github.com/andygrove))
+- Update arrow dependencies again [\#341](https://github.com/apache/arrow-datafusion/pull/341) ([alamb](https://github.com/alamb))
+- Remove references to Ballista Docker images published to ballistacompute Docker Hub repo [\#326](https://github.com/apache/arrow-datafusion/pull/326) ([andygrove](https://github.com/andygrove))
+- Update arrow-rs deps [\#317](https://github.com/apache/arrow-datafusion/pull/317) ([alamb](https://github.com/alamb))
+- Update arrow deps [\#269](https://github.com/apache/arrow-datafusion/pull/269) ([alamb](https://github.com/alamb))
+- Enable redundant\_field\_names clippy lint [\#261](https://github.com/apache/arrow-datafusion/pull/261) ([Dandandan](https://github.com/Dandandan))
+- Update arrow-rs deps \(to fix build due to flatbuffers update\) [\#224](https://github.com/apache/arrow-datafusion/pull/224) ([alamb](https://github.com/alamb))
+- update arrow-rs deps to latest master [\#216](https://github.com/apache/arrow-datafusion/pull/216) ([alamb](https://github.com/alamb))
+
+
+
+\* *This Changelog was automatically generated by [github_changelog_generator](https://github.com/github-changelog-generator/github-changelog-generator)*
diff --git a/ballista/rust/client/Cargo.toml b/ballista/rust/client/Cargo.toml
index 5c7eb3802a104..3507a7b22a45d 100644
--- a/ballista/rust/client/Cargo.toml
+++ b/ballista/rust/client/Cargo.toml
@@ -19,7 +19,7 @@
 name = "ballista"
 description = "Ballista Distributed Compute"
 license = "Apache-2.0"
-version = "0.5.0-SNAPSHOT"
+version = "0.5.0"
 homepage = "https://github.com/apache/arrow-datafusion"
 repository = "https://github.com/apache/arrow-datafusion"
 authors = ["Apache Arrow <dev@arrow.apache.org>"]
@@ -37,4 +37,4 @@ datafusion = { path = "../../../datafusion" }
 
 [features]
 default = []
-standalone = ["ballista-executor", "ballista-scheduler"]
\ No newline at end of file
+standalone = ["ballista-executor", "ballista-scheduler"]
diff --git a/ballista/rust/core/Cargo.toml b/ballista/rust/core/Cargo.toml
index ce72d2fda92d4..4648b589f6469 100644
--- a/ballista/rust/core/Cargo.toml
+++ b/ballista/rust/core/Cargo.toml
@@ -19,7 +19,7 @@
 name = "ballista-core"
 description = "Ballista Distributed Compute"
 license = "Apache-2.0"
-version = "0.5.0-SNAPSHOT"
+version = "0.5.0"
 homepage = "https://github.com/apache/arrow-datafusion"
 repository = "https://github.com/apache/arrow-datafusion"
 authors = ["Apache Arrow <dev@arrow.apache.org>"]
diff --git a/ballista/rust/executor/Cargo.toml b/ballista/rust/executor/Cargo.toml
index 428a5bb0f01f5..25adabbbce12a 100644
--- a/ballista/rust/executor/Cargo.toml
+++ b/ballista/rust/executor/Cargo.toml
@@ -19,7 +19,7 @@
 name = "ballista-executor"
 description = "Ballista Distributed Compute - Executor"
 license = "Apache-2.0"
-version = "0.5.0-SNAPSHOT"
+version = "0.5.0"
 homepage = "https://github.com/apache/arrow-datafusion"
 repository = "https://github.com/apache/arrow-datafusion"
 authors = ["Apache Arrow <dev@arrow.apache.org>"]
diff --git a/ballista/rust/scheduler/Cargo.toml b/ballista/rust/scheduler/Cargo.toml
index 9bca8d9695714..f0ffb54d378c1 100644
--- a/ballista/rust/scheduler/Cargo.toml
+++ b/ballista/rust/scheduler/Cargo.toml
@@ -19,7 +19,7 @@
 name = "ballista-scheduler"
 description = "Ballista Distributed Compute - Scheduler"
 license = "Apache-2.0"
-version = "0.5.0-SNAPSHOT"
+version = "0.5.0"
 homepage = "https://github.com/apache/arrow-datafusion"
 repository = "https://github.com/apache/arrow-datafusion"
 authors = ["Apache Arrow <dev@arrow.apache.org>"]
diff --git a/datafusion/CHANGELOG.md b/datafusion/CHANGELOG.md
new file mode 100644
index 0000000000000..088cb2dfa6037
--- /dev/null
+++ b/datafusion/CHANGELOG.md
@@ -0,0 +1,303 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+For older versions, see [apache/arrow/CHANGELOG.md](https://github.com/apache/arrow/blob/master/CHANGELOG.md)
+
+# Changelog
+
+## [5.0.0](https://github.com/apache/arrow-datafusion/tree/5.0.0) (2021-07-31)
+
+[Full Changelog](https://github.com/apache/arrow-datafusion/compare/4.0.0...5.0.0)
+
+**Breaking changes:**
+
+- Box ScalarValue:Lists, reduce size by half size [\#788](https://github.com/apache/arrow-datafusion/pull/788) ([alamb](https://github.com/alamb))
+- JOIN conditions are order dependent [\#778](https://github.com/apache/arrow-datafusion/pull/778) ([seddonm1](https://github.com/seddonm1))
+- Show the result of all optimizer passes in EXPLAIN VERBOSE [\#759](https://github.com/apache/arrow-datafusion/pull/759) ([alamb](https://github.com/alamb))
+- \#723 Datafusion add option in ExecutionConfig to enable/disable parquet pruning [\#749](https://github.com/apache/arrow-datafusion/pull/749) ([lvheyang](https://github.com/lvheyang))
+- Update API for extension planning to include logical plan [\#643](https://github.com/apache/arrow-datafusion/pull/643) ([alamb](https://github.com/alamb))
+- Rename MergeExec to CoalescePartitionsExec [\#635](https://github.com/apache/arrow-datafusion/pull/635) ([andygrove](https://github.com/andygrove))
+- fix 593, reduce cloning by taking ownership in logical planner's `from` fn [\#610](https://github.com/apache/arrow-datafusion/pull/610) ([Jimexist](https://github.com/Jimexist))
+- fix join column handling logic for `On` and `Using` constraints [\#605](https://github.com/apache/arrow-datafusion/pull/605) ([houqp](https://github.com/houqp))
+- Rewrite pruning logic in terms of PruningStatistics using Array trait \(option 2\) [\#426](https://github.com/apache/arrow-datafusion/pull/426) ([alamb](https://github.com/alamb))
+- Support reading from NdJson formatted data sources [\#404](https://github.com/apache/arrow-datafusion/pull/404) ([heymind](https://github.com/heymind))
+- Add metrics to RepartitionExec [\#398](https://github.com/apache/arrow-datafusion/pull/398) ([andygrove](https://github.com/andygrove))
+- Use 4.x arrow-rs from crates.io rather than git sha [\#395](https://github.com/apache/arrow-datafusion/pull/395) ([alamb](https://github.com/alamb))
+- Return Vec\<bool\> from PredicateBuilder rather than an `Fn` [\#370](https://github.com/apache/arrow-datafusion/pull/370) ([alamb](https://github.com/alamb))
+- Refactor: move RowGroupPredicateBuilder into its own module, rename to PruningPredicateBuilder [\#365](https://github.com/apache/arrow-datafusion/pull/365) ([alamb](https://github.com/alamb))
+- \[Datafusion\] NOW\(\) function support [\#288](https://github.com/apache/arrow-datafusion/pull/288) ([msathis](https://github.com/msathis))
+- Implement select distinct [\#262](https://github.com/apache/arrow-datafusion/pull/262) ([Dandandan](https://github.com/Dandandan))
+- Refactor datafusion/src/physical\_plan/common.rs build\_file\_list to take less param and reuse code [\#253](https://github.com/apache/arrow-datafusion/pull/253) ([Jimexist](https://github.com/Jimexist))
+- Support qualified columns in queries [\#55](https://github.com/apache/arrow-datafusion/pull/55) ([houqp](https://github.com/houqp))
+- Read CSV format text from stdin or memory [\#54](https://github.com/apache/arrow-datafusion/pull/54) ([heymind](https://github.com/heymind))
+- Use atomics for SQLMetric implementation, remove unused name field [\#25](https://github.com/apache/arrow-datafusion/pull/25) ([returnString](https://github.com/returnString))
+
+**Implemented enhancements:**
+
+- Allow extension nodes to correctly plan physical expressions with relations [\#642](https://github.com/apache/arrow-datafusion/issues/642)
+- Filters aren't passed down to table scans in a union [\#557](https://github.com/apache/arrow-datafusion/issues/557)
+- Support pruning for `boolean` columns [\#490](https://github.com/apache/arrow-datafusion/issues/490)
+- Implement SQLMetrics for RepartitionExec [\#397](https://github.com/apache/arrow-datafusion/issues/397)
+- DataFusion benchmarks should show executed plan with metrics after query completes [\#396](https://github.com/apache/arrow-datafusion/issues/396)
+- Use published versions of arrow rather than github shas [\#393](https://github.com/apache/arrow-datafusion/issues/393)
+- Add Compare to GroupByScalar [\#364](https://github.com/apache/arrow-datafusion/issues/364)
+- Reusable "row group pruning" logic [\#363](https://github.com/apache/arrow-datafusion/issues/363)
+- Add an Order Preserving merge operator [\#362](https://github.com/apache/arrow-datafusion/issues/362)
+- Implement Postgres compatible `now()` function [\#251](https://github.com/apache/arrow-datafusion/issues/251)
+- COUNT DISTINCT does not support dictionary types [\#249](https://github.com/apache/arrow-datafusion/issues/249)
+- Use standard make\_null\_array for CASE [\#222](https://github.com/apache/arrow-datafusion/issues/222)
+-  Implement date\_trunc\(\) function [\#203](https://github.com/apache/arrow-datafusion/issues/203)
+-  COUNT DISTINCT does not support for `Float64` [\#199](https://github.com/apache/arrow-datafusion/issues/199)
+- Update SQLMetric to use atomics rather than a Mutex [\#30](https://github.com/apache/arrow-datafusion/issues/30)
+- Implement streaming versions of Dataframe.collect methods [\#789](https://github.com/apache/arrow-datafusion/pull/789) ([andygrove](https://github.com/andygrove))
+- impl from str for column and scalar [\#762](https://github.com/apache/arrow-datafusion/pull/762) ([Jimexist](https://github.com/Jimexist))
+- impl fmt::Display for PlanType [\#752](https://github.com/apache/arrow-datafusion/pull/752) ([Jimexist](https://github.com/Jimexist))
+- Remove unnecessary projection in logical plan optimization phase [\#747](https://github.com/apache/arrow-datafusion/pull/747) ([waynexia](https://github.com/waynexia))
+- Support table columns alias [\#735](https://github.com/apache/arrow-datafusion/pull/735) ([Dandandan](https://github.com/Dandandan))
+- Derive PartialEq for datasource enums [\#734](https://github.com/apache/arrow-datafusion/pull/734) ([alamb](https://github.com/alamb))
+- Allow filetype to be lowercase, Implement FromStr for FileType [\#728](https://github.com/apache/arrow-datafusion/pull/728) ([Jimexist](https://github.com/Jimexist))
+- Update to use arrow 5.0 [\#721](https://github.com/apache/arrow-datafusion/pull/721) ([alamb](https://github.com/alamb))
+- \#554: Lead/lag window function with offset and default value arguments [\#687](https://github.com/apache/arrow-datafusion/pull/687) ([jgoday](https://github.com/jgoday))
+- dedup using join column in wildcard expansion [\#678](https://github.com/apache/arrow-datafusion/pull/678) ([houqp](https://github.com/houqp))
+- Implement metrics for HashJoinExec [\#664](https://github.com/apache/arrow-datafusion/pull/664) ([andygrove](https://github.com/andygrove))
+- Show physical plan with metrics in benchmark [\#662](https://github.com/apache/arrow-datafusion/pull/662) ([andygrove](https://github.com/andygrove))
+- Allow non-equijoin filters in join condition [\#660](https://github.com/apache/arrow-datafusion/pull/660) ([Dandandan](https://github.com/Dandandan))
+- Add End-to-end test for parquet pruning + metrics for ParquetExec [\#657](https://github.com/apache/arrow-datafusion/pull/657) ([alamb](https://github.com/alamb))
+- Add support for leading field in interval [\#647](https://github.com/apache/arrow-datafusion/pull/647) ([Dandandan](https://github.com/Dandandan))
+- Remove hard-coded PartitionMode from Ballista serde [\#637](https://github.com/apache/arrow-datafusion/pull/637) ([andygrove](https://github.com/andygrove))
+- Ballista: Implement scalable distributed joins [\#634](https://github.com/apache/arrow-datafusion/pull/634) ([andygrove](https://github.com/andygrove))
+- implement rank and dense\_rank function and refactor built-in window function evaluation [\#631](https://github.com/apache/arrow-datafusion/pull/631) ([Jimexist](https://github.com/Jimexist))
+- Improve "field not found" error messages [\#625](https://github.com/apache/arrow-datafusion/pull/625) ([andygrove](https://github.com/andygrove))
+- Support modulus op [\#577](https://github.com/apache/arrow-datafusion/pull/577) ([gangliao](https://github.com/gangliao))
+- implement `std::default::Default` for execution config [\#570](https://github.com/apache/arrow-datafusion/pull/570) ([Jimexist](https://github.com/Jimexist))
+- `to_timestamp_millis()`, `to_timestamp_micros()`, `to_timestamp_seconds()` [\#567](https://github.com/apache/arrow-datafusion/pull/567) ([velvia](https://github.com/velvia))
+- Filter push down for Union [\#559](https://github.com/apache/arrow-datafusion/pull/559) ([Dandandan](https://github.com/Dandandan))
+- Implement window functions with `partition_by` clause [\#558](https://github.com/apache/arrow-datafusion/pull/558) ([Jimexist](https://github.com/Jimexist))
+- support table alias in join clause [\#547](https://github.com/apache/arrow-datafusion/pull/547) ([houqp](https://github.com/houqp))
+- Not equal predicate in physical\_planning pruning [\#544](https://github.com/apache/arrow-datafusion/pull/544) ([jgoday](https://github.com/jgoday))
+- add error handling and boundary checking for window frames [\#530](https://github.com/apache/arrow-datafusion/pull/530) ([Jimexist](https://github.com/Jimexist))
+- Implement window functions with `order_by` clause [\#520](https://github.com/apache/arrow-datafusion/pull/520) ([Jimexist](https://github.com/Jimexist))
+- support group by column positions [\#519](https://github.com/apache/arrow-datafusion/pull/519) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([jychen7](https://github.com/jychen7))
+- Implement constant folding for CAST [\#513](https://github.com/apache/arrow-datafusion/pull/513) ([msathis](https://github.com/msathis))
+- Add window frame constructs - alternative [\#506](https://github.com/apache/arrow-datafusion/pull/506) ([Jimexist](https://github.com/Jimexist))
+- Add `partition by` constructs in window functions and modify logical planning [\#501](https://github.com/apache/arrow-datafusion/pull/501) ([Jimexist](https://github.com/Jimexist))
+-  Add support for boolean columns in pruning logic [\#500](https://github.com/apache/arrow-datafusion/pull/500) ([alamb](https://github.com/alamb))
+- \#215 resolve aliases for group by exprs [\#485](https://github.com/apache/arrow-datafusion/pull/485) ([jychen7](https://github.com/jychen7))
+- Support anti join [\#482](https://github.com/apache/arrow-datafusion/pull/482) ([Dandandan](https://github.com/Dandandan))
+- Support semi join [\#470](https://github.com/apache/arrow-datafusion/pull/470) ([Dandandan](https://github.com/Dandandan))
+- add `order by` construct in window function and logical plans [\#463](https://github.com/apache/arrow-datafusion/pull/463) ([Jimexist](https://github.com/Jimexist))
+- Remove reundant filters \(e.g. c\> 5 AND c\>5 --\> c\>5\) [\#436](https://github.com/apache/arrow-datafusion/pull/436) ([jgoday](https://github.com/jgoday))
+- fix: display the content of debug explain [\#434](https://github.com/apache/arrow-datafusion/pull/434) ([NGA-TRAN](https://github.com/NGA-TRAN))
+- implement lead and lag built-in window function [\#429](https://github.com/apache/arrow-datafusion/pull/429) ([Jimexist](https://github.com/Jimexist))
+- add support for ndjson for datafusion-cli [\#427](https://github.com/apache/arrow-datafusion/pull/427) ([Jimexist](https://github.com/Jimexist))
+- add `first_value`, `last_value`, and `nth_value` built-in window functions [\#403](https://github.com/apache/arrow-datafusion/pull/403) ([Jimexist](https://github.com/Jimexist))
+- export both `now` and `random` functions [\#389](https://github.com/apache/arrow-datafusion/pull/389) ([Jimexist](https://github.com/Jimexist))
+- Function to create `ArrayRef` from an iterator of ScalarValues [\#381](https://github.com/apache/arrow-datafusion/pull/381) ([alamb](https://github.com/alamb))
+- Sort preserving merge \(\#362\) [\#379](https://github.com/apache/arrow-datafusion/pull/379) ([tustvold](https://github.com/tustvold))
+- Add support for multiple partitions with SortExec \(\#362\) [\#378](https://github.com/apache/arrow-datafusion/pull/378) ([tustvold](https://github.com/tustvold))
+- add window expression stream, delegated window aggregation to aggregate functions, and implement `row_number` [\#375](https://github.com/apache/arrow-datafusion/pull/375) ([Jimexist](https://github.com/Jimexist))
+- Add PartialOrd and Ord to GroupByScalar \(\#364\) [\#368](https://github.com/apache/arrow-datafusion/pull/368) ([tustvold](https://github.com/tustvold))
+- Implement readable explain plans for physical plans [\#337](https://github.com/apache/arrow-datafusion/pull/337) ([alamb](https://github.com/alamb))
+- Add window expression part 1 - logical and physical planning, structure, to/from proto, and explain, for empty over clause only [\#334](https://github.com/apache/arrow-datafusion/pull/334) ([Jimexist](https://github.com/Jimexist))
+- Use NullArray to Pass row count to ScalarFunctions that take 0 arguments  [\#328](https://github.com/apache/arrow-datafusion/pull/328) ([Jimexist](https://github.com/Jimexist))
+- add --quiet/-q flag and allow timing info to be turned on/off [\#323](https://github.com/apache/arrow-datafusion/pull/323) ([Jimexist](https://github.com/Jimexist))
+- Implement hash partitioned aggregation [\#320](https://github.com/apache/arrow-datafusion/pull/320) ([Dandandan](https://github.com/Dandandan))
+- Support COUNT\(DISTINCT timestamps\)  [\#319](https://github.com/apache/arrow-datafusion/pull/319) ([charlibot](https://github.com/charlibot))
+- add random SQL function [\#303](https://github.com/apache/arrow-datafusion/pull/303) ([Jimexist](https://github.com/Jimexist))
+- allow datafusion cli to take -- comments [\#296](https://github.com/apache/arrow-datafusion/pull/296) ([Jimexist](https://github.com/Jimexist))
+- Add json print format mode to datafusion cli [\#295](https://github.com/apache/arrow-datafusion/pull/295) ([Jimexist](https://github.com/Jimexist))
+- Add print format param with support for tsv print format to datafusion cli [\#292](https://github.com/apache/arrow-datafusion/pull/292) ([Jimexist](https://github.com/Jimexist))
+- Add print format param and support for csv print format to datafusion cli [\#289](https://github.com/apache/arrow-datafusion/pull/289) ([Jimexist](https://github.com/Jimexist))
+- allow datafusion-cli to take a file param [\#285](https://github.com/apache/arrow-datafusion/pull/285) ([Jimexist](https://github.com/Jimexist))
+- add param validation for datafusion-cli [\#284](https://github.com/apache/arrow-datafusion/pull/284) ([Jimexist](https://github.com/Jimexist))
+- \[breaking change\] fix 265, log should be log10, and add ln [\#271](https://github.com/apache/arrow-datafusion/pull/271) ([Jimexist](https://github.com/Jimexist))
+- Implement count distinct for dictionary arrays [\#256](https://github.com/apache/arrow-datafusion/pull/256) ([alamb](https://github.com/alamb))
+- Count distinct floats [\#252](https://github.com/apache/arrow-datafusion/pull/252) ([pjmore](https://github.com/pjmore))
+- Add rule to eliminate `LIMIT 0` and replace it with an `EmptyRelation` [\#213](https://github.com/apache/arrow-datafusion/pull/213) ([Dandandan](https://github.com/Dandandan))
+- Allow table providers to indicate their type for catalog metadata [\#205](https://github.com/apache/arrow-datafusion/pull/205) ([returnString](https://github.com/returnString))
+- Use arrow eq kernels in CaseWhen expression evaluation [\#52](https://github.com/apache/arrow-datafusion/pull/52) ([Dandandan](https://github.com/Dandandan))
+- Re-export Arrow and Parquet crates from DataFusion [\#39](https://github.com/apache/arrow-datafusion/pull/39) ([returnString](https://github.com/returnString))
+- \[DataFusion\] Optimize hash join inner workings, null handling fix [\#24](https://github.com/apache/arrow-datafusion/pull/24) ([Dandandan](https://github.com/Dandandan))
+- \[ARROW-12441\] \[DataFusion\] Cross join implementation [\#11](https://github.com/apache/arrow-datafusion/pull/11) ([Dandandan](https://github.com/Dandandan))
+
+**Fixed bugs:**
+
+- Projection pushdown removes unqualified column names even when they are used [\#617](https://github.com/apache/arrow-datafusion/issues/617)
+- Panic while running join datatypes/schema.rs:165:10 [\#601](https://github.com/apache/arrow-datafusion/issues/601)
+- Indentation is incorrect for joins in formatted physical plans [\#345](https://github.com/apache/arrow-datafusion/issues/345)
+- Error while running `COUNT DISTINCT (timestamp)`: 'Unexpected DataType for list [\#314](https://github.com/apache/arrow-datafusion/issues/314)
+- When joining two tables, get Error: Plan\("Schema contains duplicate unqualified field name \'xxx\'"\) [\#311](https://github.com/apache/arrow-datafusion/issues/311)
+- Incorrect answers with SELECT DISTINCT queries [\#250](https://github.com/apache/arrow-datafusion/issues/250)
+- Intermitent failure in CI join\_with\_hash\_collision [\#227](https://github.com/apache/arrow-datafusion/issues/227)
+- `Concat` from Dataframe API no longer accepts multiple expressions [\#226](https://github.com/apache/arrow-datafusion/issues/226)
+- \#723 limit pruning rule to simple expression [\#764](https://github.com/apache/arrow-datafusion/pull/764) ([lvheyang](https://github.com/lvheyang))
+- \#699 fix return type conflict when calling builtin math fuctions [\#716](https://github.com/apache/arrow-datafusion/pull/716) ([lvheyang](https://github.com/lvheyang))
+- Fix Date32 and Date64 parquet row group pruning [\#690](https://github.com/apache/arrow-datafusion/pull/690) ([alamb](https://github.com/alamb))
+- Remove qualifiers on pushed down predicates / Fix parquet pruning [\#689](https://github.com/apache/arrow-datafusion/pull/689) ([alamb](https://github.com/alamb))
+- use `Weak` ptr to break catalog list \<\> info schema cyclic reference [\#681](https://github.com/apache/arrow-datafusion/pull/681) ([crepererum](https://github.com/crepererum))
+- honor table name for csv/parquet scan in ballista plan serde [\#629](https://github.com/apache/arrow-datafusion/pull/629) ([houqp](https://github.com/houqp))
+- fix 621, where unnamed window functions shall be differentiated by partition and order by clause [\#622](https://github.com/apache/arrow-datafusion/pull/622) ([Jimexist](https://github.com/Jimexist))
+- RFC: Do not prune out unnecessary columns with unqualified references [\#619](https://github.com/apache/arrow-datafusion/pull/619) ([alamb](https://github.com/alamb))
+- \[fix\] select \* on empty table [\#613](https://github.com/apache/arrow-datafusion/pull/613) ([rdettai](https://github.com/rdettai))
+- fix 592, support alias in window functions [\#607](https://github.com/apache/arrow-datafusion/pull/607) ([Jimexist](https://github.com/Jimexist))
+- RepartitionExec should not error if output has hung up [\#576](https://github.com/apache/arrow-datafusion/pull/576) ([alamb](https://github.com/alamb))
+- Fix pruning on not equal predicate [\#561](https://github.com/apache/arrow-datafusion/pull/561) ([alamb](https://github.com/alamb))
+- hash float arrays using primitive usigned integer type [\#556](https://github.com/apache/arrow-datafusion/pull/556) ([houqp](https://github.com/houqp))
+- Return errors properly from RepartitionExec [\#521](https://github.com/apache/arrow-datafusion/pull/521) ([alamb](https://github.com/alamb))
+- refactor sort exec stream and combine batches [\#515](https://github.com/apache/arrow-datafusion/pull/515) ([Jimexist](https://github.com/Jimexist))
+- Fix display of execution time in datafusion-cli [\#514](https://github.com/apache/arrow-datafusion/pull/514) ([Dandandan](https://github.com/Dandandan))
+- Wrong aggregation arguments error. [\#505](https://github.com/apache/arrow-datafusion/pull/505) ([jgoday](https://github.com/jgoday))
+- fix window aggregation with alias and add integration test case [\#454](https://github.com/apache/arrow-datafusion/pull/454) ([Jimexist](https://github.com/Jimexist))
+- fix: don't duplicate existing filters [\#409](https://github.com/apache/arrow-datafusion/pull/409) ([e-dard](https://github.com/e-dard))
+- Fixed incorrect logical type in GroupByScalar. [\#391](https://github.com/apache/arrow-datafusion/pull/391) ([jorgecarleitao](https://github.com/jorgecarleitao))
+- Fix indented display for multi-child nodes [\#358](https://github.com/apache/arrow-datafusion/pull/358) ([alamb](https://github.com/alamb))
+- Fix SQL planner to support multibyte column names [\#357](https://github.com/apache/arrow-datafusion/pull/357) ([agatan](https://github.com/agatan))
+- Fix wrong projection 'optimization' [\#268](https://github.com/apache/arrow-datafusion/pull/268) ([Dandandan](https://github.com/Dandandan))
+- Fix Left join implementation is incorrect for 0 or multiple batches on the right side [\#238](https://github.com/apache/arrow-datafusion/pull/238) ([Dandandan](https://github.com/Dandandan))
+- Count distinct boolean [\#230](https://github.com/apache/arrow-datafusion/pull/230) ([pjmore](https://github.com/pjmore))
+- Fix Filter / where clause without column names is removed in optimization pass [\#225](https://github.com/apache/arrow-datafusion/pull/225) ([Dandandan](https://github.com/Dandandan))
+
+**Documentation updates:**
+
+-  No way to get to the examples from docs.rs [\#186](https://github.com/apache/arrow-datafusion/issues/186)
+- Update docs to use vendored version of arrow [\#772](https://github.com/apache/arrow-datafusion/pull/772) ([alamb](https://github.com/alamb))
+- update stale documentations related to window functions [\#598](https://github.com/apache/arrow-datafusion/pull/598) ([Jimexist](https://github.com/Jimexist))
+- update readme to reflect work on window functions [\#471](https://github.com/apache/arrow-datafusion/pull/471) ([Jimexist](https://github.com/Jimexist))
+- Add examples section to datafusion crate doc [\#457](https://github.com/apache/arrow-datafusion/pull/457) ([mluts](https://github.com/mluts))
+- add invariants spec [\#443](https://github.com/apache/arrow-datafusion/pull/443) ([houqp](https://github.com/houqp))
+- add output field name rfc [\#422](https://github.com/apache/arrow-datafusion/pull/422) ([houqp](https://github.com/houqp))
+- Update more docs and also the developer.md doc [\#414](https://github.com/apache/arrow-datafusion/pull/414) ([Jimexist](https://github.com/Jimexist))
+- use prettier to format md files [\#367](https://github.com/apache/arrow-datafusion/pull/367) ([Jimexist](https://github.com/Jimexist))
+- Add new logo svg with white background [\#313](https://github.com/apache/arrow-datafusion/pull/313) ([parthsarthy](https://github.com/parthsarthy))
+- Add projects \(Squirtle and Tensorbase\) to list in readme [\#312](https://github.com/apache/arrow-datafusion/pull/312) ([parthsarthy](https://github.com/parthsarthy))
+- docs - fix the ballista link [\#274](https://github.com/apache/arrow-datafusion/pull/274) ([haoxins](https://github.com/haoxins))
+- misc\(README\): Replace Cube.js with Cube Store [\#248](https://github.com/apache/arrow-datafusion/pull/248) ([ovr](https://github.com/ovr))
+- Initial docs for SQL syntax [\#242](https://github.com/apache/arrow-datafusion/pull/242) ([Dandandan](https://github.com/Dandandan))
+- Deduplicate README.md [\#79](https://github.com/apache/arrow-datafusion/pull/79) ([msathis](https://github.com/msathis))
+
+**Performance improvements:**
+
+- perf: improve performance of `SortPreservingMergeExec` operator [\#722](https://github.com/apache/arrow-datafusion/pull/722) ([e-dard](https://github.com/e-dard))
+- perf: Improve materialisation performance of SortPreservingMergeExec [\#691](https://github.com/apache/arrow-datafusion/pull/691) ([e-dard](https://github.com/e-dard))
+- Optimize count\(\*\) with table statistics [\#620](https://github.com/apache/arrow-datafusion/pull/620) ([Dandandan](https://github.com/Dandandan))
+- optimize window function's `find_ranges_in_range` [\#595](https://github.com/apache/arrow-datafusion/pull/595) ([Jimexist](https://github.com/Jimexist))
+- Collapse sort into window expr and do sort within logical phase [\#571](https://github.com/apache/arrow-datafusion/pull/571) ([Jimexist](https://github.com/Jimexist))
+- Use repartition in window functions to speed up [\#569](https://github.com/apache/arrow-datafusion/pull/569) ([Jimexist](https://github.com/Jimexist))
+- Constant fold / optimize `to_timestamp` function during planning [\#387](https://github.com/apache/arrow-datafusion/pull/387) ([msathis](https://github.com/msathis))
+- Speed up `create_batch_from_map` [\#339](https://github.com/apache/arrow-datafusion/pull/339) ([Dandandan](https://github.com/Dandandan))
+- Simplify math expression code \(use unary kernel\) [\#309](https://github.com/apache/arrow-datafusion/pull/309) ([Dandandan](https://github.com/Dandandan))
+
+**Closed issues:**
+
+- arrow::util::pretty::pretty\_format\_batches missing [\#769](https://github.com/apache/arrow-datafusion/issues/769)
+- move the `assert_batches_eq!` macros to a non part of datafusion [\#745](https://github.com/apache/arrow-datafusion/issues/745)
+- fix an issue where aliases are not respected in generating downstream schemas in window expr [\#592](https://github.com/apache/arrow-datafusion/issues/592)
+- make the planner to print more succinct and useful information in window function explain clause [\#526](https://github.com/apache/arrow-datafusion/issues/526)
+- move window frame module to be in `logical_plan` [\#517](https://github.com/apache/arrow-datafusion/issues/517)
+- use a more rust idiomatic way of handling nth\_value [\#448](https://github.com/apache/arrow-datafusion/issues/448)
+- create a test with more than one partition for window functions [\#435](https://github.com/apache/arrow-datafusion/issues/435)
+-  COUNT DISTINCT does not support for `Boolean` [\#202](https://github.com/apache/arrow-datafusion/issues/202)
+-  Read CSV format text from stdin or memory [\#198](https://github.com/apache/arrow-datafusion/issues/198)
+-  Fix null handling hash join [\#195](https://github.com/apache/arrow-datafusion/issues/195)
+-   Allow TableProviders to indicate their type for the information schema [\#191](https://github.com/apache/arrow-datafusion/issues/191)
+-   Make DataFrame extensible [\#190](https://github.com/apache/arrow-datafusion/issues/190)
+-  TPC-H Query 19 [\#170](https://github.com/apache/arrow-datafusion/issues/170)
+-  TPC-H Query 7 [\#161](https://github.com/apache/arrow-datafusion/issues/161)
+-  Upgrade hashbrown to 0.10 [\#151](https://github.com/apache/arrow-datafusion/issues/151)
+-  Implement vectorized hashing for hash aggregate [\#149](https://github.com/apache/arrow-datafusion/issues/149)
+-  More efficient LEFT join implementation [\#143](https://github.com/apache/arrow-datafusion/issues/143)
+-  Implement vectorized hashing [\#142](https://github.com/apache/arrow-datafusion/issues/142)
+-   RFC Roadmap for 2021 \(DataFusion\) [\#140](https://github.com/apache/arrow-datafusion/issues/140)
+-   Implement hash partitioning [\#131](https://github.com/apache/arrow-datafusion/issues/131)
+-   Grouping by column position [\#110](https://github.com/apache/arrow-datafusion/issues/110)
+-  \[Datafusion\] GROUP BY with a high cardinality doesn't seem to finish [\#107](https://github.com/apache/arrow-datafusion/issues/107)
+- \[Rust\]  Add support for JSON data sources [\#103](https://github.com/apache/arrow-datafusion/issues/103)
+- \[Rust\]  Implement metrics framework [\#95](https://github.com/apache/arrow-datafusion/issues/95)
+- Publically export Arrow crate from datafusion  [\#36](https://github.com/apache/arrow-datafusion/issues/36)
+- Implement hash-partitioned hash aggregate [\#27](https://github.com/apache/arrow-datafusion/issues/27)
+- Consider using GitHub pages for DataFusion/Ballista documentation [\#18](https://github.com/apache/arrow-datafusion/issues/18)
+- Update "repository" in Cargo.toml [\#16](https://github.com/apache/arrow-datafusion/issues/16)
+
+**Merged pull requests:**
+
+- Remove GroupByScalar and use ScalarValue in preparation for supporting null values in GroupBy [\#786](https://github.com/apache/arrow-datafusion/pull/786) ([alamb](https://github.com/alamb))
+-  Use consistent version of string\_to\_timestamp\_nanos in DataFusion [\#767](https://github.com/apache/arrow-datafusion/pull/767) ([alamb](https://github.com/alamb))
+- fix 226, make `concat`, `concat_ws`, and `random` work with `Python` crate [\#761](https://github.com/apache/arrow-datafusion/pull/761) ([Jimexist](https://github.com/Jimexist))
+- Test for parquet pruning disabling [\#754](https://github.com/apache/arrow-datafusion/pull/754) ([alamb](https://github.com/alamb))
+- Add explain verbose with limit push down [\#751](https://github.com/apache/arrow-datafusion/pull/751) ([Jimexist](https://github.com/Jimexist))
+- Move assert\_batches\_eq! macros to test\_utils.rs [\#746](https://github.com/apache/arrow-datafusion/pull/746) ([alamb](https://github.com/alamb))
+- Show optimized physical and logical plans in EXPLAIN [\#744](https://github.com/apache/arrow-datafusion/pull/744) ([alamb](https://github.com/alamb))
+- update `python` crate to support latest pyo3 syntax and gil sematics [\#741](https://github.com/apache/arrow-datafusion/pull/741) ([Jimexist](https://github.com/Jimexist))
+- update `python` crate dependencies [\#740](https://github.com/apache/arrow-datafusion/pull/740) ([Jimexist](https://github.com/Jimexist))
+- provide more details on required .parquet file extension error message [\#729](https://github.com/apache/arrow-datafusion/pull/729) ([Jimexist](https://github.com/Jimexist))
+- split up windows functions into a dedicated module with separate files [\#724](https://github.com/apache/arrow-datafusion/pull/724) ([Jimexist](https://github.com/Jimexist))
+- Use pytest in integration test [\#715](https://github.com/apache/arrow-datafusion/pull/715) ([Jimexist](https://github.com/Jimexist))
+- replace once iter chain with array::IntoIter [\#704](https://github.com/apache/arrow-datafusion/pull/704) ([houqp](https://github.com/houqp))
+- avoid iterator materialization in column index lookup [\#703](https://github.com/apache/arrow-datafusion/pull/703) ([houqp](https://github.com/houqp))
+- Fix build with 1.52.1 [\#696](https://github.com/apache/arrow-datafusion/pull/696) ([alamb](https://github.com/alamb))
+- Fix test output due to logical merge conflict [\#694](https://github.com/apache/arrow-datafusion/pull/694) ([alamb](https://github.com/alamb))
+- Fix typo in DEVELOPERS.md [\#692](https://github.com/apache/arrow-datafusion/pull/692) ([lvheyang](https://github.com/lvheyang))
+- add more integration tests [\#668](https://github.com/apache/arrow-datafusion/pull/668) ([Jimexist](https://github.com/Jimexist))
+- Bump arrow and parquet versions to 4.4 [\#654](https://github.com/apache/arrow-datafusion/pull/654) ([toddtreece](https://github.com/toddtreece))
+- Add query 15 to TPC-H queries [\#645](https://github.com/apache/arrow-datafusion/pull/645) ([Dandandan](https://github.com/Dandandan))
+- Improve error message and comments [\#641](https://github.com/apache/arrow-datafusion/pull/641) ([alamb](https://github.com/alamb))
+- add integration tests for rank, dense\_rank, fix last\_value evaluation with rank [\#638](https://github.com/apache/arrow-datafusion/pull/638) ([Jimexist](https://github.com/Jimexist))
+- round trip TPCH queries in tests [\#630](https://github.com/apache/arrow-datafusion/pull/630) ([houqp](https://github.com/houqp))
+- use Into\<String\> as argument type wherever applicable [\#615](https://github.com/apache/arrow-datafusion/pull/615) ([houqp](https://github.com/houqp))
+- reuse alias map in aggregate logical planning and refactor position resolution [\#606](https://github.com/apache/arrow-datafusion/pull/606) ([Jimexist](https://github.com/Jimexist))
+- fix clippy warnings [\#581](https://github.com/apache/arrow-datafusion/pull/581) ([Jimexist](https://github.com/Jimexist))
+- Add benchmarks to window function queries [\#564](https://github.com/apache/arrow-datafusion/pull/564) ([Jimexist](https://github.com/Jimexist))
+- reuse code for now function expr creation [\#548](https://github.com/apache/arrow-datafusion/pull/548) ([houqp](https://github.com/houqp))
+- turn on clippy rule for needless borrow [\#545](https://github.com/apache/arrow-datafusion/pull/545) ([Jimexist](https://github.com/Jimexist))
+- Refactor hash aggregates's planner building code [\#539](https://github.com/apache/arrow-datafusion/pull/539) ([Jimexist](https://github.com/Jimexist))
+- Cleanup Repartition Exec code [\#538](https://github.com/apache/arrow-datafusion/pull/538) ([alamb](https://github.com/alamb))
+- reuse datafusion physical planner in ballista building from protobuf [\#532](https://github.com/apache/arrow-datafusion/pull/532) ([Jimexist](https://github.com/Jimexist))
+- remove redundant `into_iter()` calls [\#527](https://github.com/apache/arrow-datafusion/pull/527) ([Jimexist](https://github.com/Jimexist))
+- Fix 517 - move `window_frames` module to `logical_plan` [\#518](https://github.com/apache/arrow-datafusion/pull/518) ([Jimexist](https://github.com/Jimexist))
+- Refactor window aggregation, simplify batch processing logic [\#516](https://github.com/apache/arrow-datafusion/pull/516) ([Jimexist](https://github.com/Jimexist))
+- Add datafusion::test\_util, resolve test data paths without env vars [\#498](https://github.com/apache/arrow-datafusion/pull/498) ([mluts](https://github.com/mluts))
+- Avoid warnings in tests when compiling without default features [\#489](https://github.com/apache/arrow-datafusion/pull/489) ([alamb](https://github.com/alamb))
+- update cargo.toml in python crate and fix unit test due to hash joins [\#483](https://github.com/apache/arrow-datafusion/pull/483) ([Jimexist](https://github.com/Jimexist))
+- use prettier check in CI [\#453](https://github.com/apache/arrow-datafusion/pull/453) ([Jimexist](https://github.com/Jimexist))
+- Optimize `nth_value`, remove `first_value`, `last_value` structs and use idiomatic rust style [\#452](https://github.com/apache/arrow-datafusion/pull/452) ([Jimexist](https://github.com/Jimexist))
+- Fixed typo / logical merge conflict [\#433](https://github.com/apache/arrow-datafusion/pull/433) ([jorgecarleitao](https://github.com/jorgecarleitao))
+- include test data and add aggregation tests in integration test [\#425](https://github.com/apache/arrow-datafusion/pull/425) ([Jimexist](https://github.com/Jimexist))
+- Add some padding around the logo [\#411](https://github.com/apache/arrow-datafusion/pull/411) ([parthsarthy](https://github.com/parthsarthy))
+- Benchmark subcommand to distinguish between DataFusion and Ballista [\#402](https://github.com/apache/arrow-datafusion/pull/402) ([jgoday](https://github.com/jgoday))
+- refactor datafusion/`scalar_value` to use more macro and avoid dup code [\#392](https://github.com/apache/arrow-datafusion/pull/392) ([Jimexist](https://github.com/Jimexist))
+- Update TPC-H benchmark to show physical plan when debug mode is enabled [\#386](https://github.com/apache/arrow-datafusion/pull/386) ([andygrove](https://github.com/andygrove))
+- Update arrow dependencies again [\#341](https://github.com/apache/arrow-datafusion/pull/341) ([alamb](https://github.com/alamb))
+- Update arrow-rs deps [\#317](https://github.com/apache/arrow-datafusion/pull/317) ([alamb](https://github.com/alamb))
+- Update PR template by commenting out instructions [\#315](https://github.com/apache/arrow-datafusion/pull/315) ([alamb](https://github.com/alamb))
+- fix clippy warning [\#286](https://github.com/apache/arrow-datafusion/pull/286) ([Jimexist](https://github.com/Jimexist))
+- add integration test to compare datafusion-cli against psql [\#281](https://github.com/apache/arrow-datafusion/pull/281) ([Jimexist](https://github.com/Jimexist))
+- Update arrow deps [\#269](https://github.com/apache/arrow-datafusion/pull/269) ([alamb](https://github.com/alamb))
+- Use multi-stage build dockerfile in datafusion-cli and reduce image size from 2.16GB to 89.9MB [\#266](https://github.com/apache/arrow-datafusion/pull/266) ([Jimexist](https://github.com/Jimexist))
+- Enable redundant\_field\_names clippy lint [\#261](https://github.com/apache/arrow-datafusion/pull/261) ([Dandandan](https://github.com/Dandandan))
+- fix clippy lint [\#259](https://github.com/apache/arrow-datafusion/pull/259) ([alamb](https://github.com/alamb))
+- Move datafusion-cli to new crate [\#231](https://github.com/apache/arrow-datafusion/pull/231) ([Dandandan](https://github.com/Dandandan))
+- Make test join\_with\_hash\_collision deterministic [\#229](https://github.com/apache/arrow-datafusion/pull/229) ([Dandandan](https://github.com/Dandandan))
+- Update arrow-rs deps \(to fix build due to flatbuffers update\) [\#224](https://github.com/apache/arrow-datafusion/pull/224) ([alamb](https://github.com/alamb))
+- Use standard make\_null\_array for CASE [\#223](https://github.com/apache/arrow-datafusion/pull/223) ([alamb](https://github.com/alamb))
+- update arrow-rs deps to latest master [\#216](https://github.com/apache/arrow-datafusion/pull/216) ([alamb](https://github.com/alamb))
+- MINOR: Remove empty rust dir [\#61](https://github.com/apache/arrow-datafusion/pull/61) ([andygrove](https://github.com/andygrove))
+
+
+
+\* *This Changelog was automatically generated by [github_changelog_generator](https://github.com/github-changelog-generator/github-changelog-generator)*
diff --git a/datafusion/Cargo.toml b/datafusion/Cargo.toml
index 2f1e997c3596f..1560b398f7733 100644
--- a/datafusion/Cargo.toml
+++ b/datafusion/Cargo.toml
@@ -18,7 +18,7 @@
 [package]
 name = "datafusion"
 description = "DataFusion is an in-memory query engine that uses Apache Arrow as the memory model"
-version = "4.0.0-SNAPSHOT"
+version = "5.0.0"
 homepage = "https://github.com/apache/arrow-datafusion"
 repository = "https://github.com/apache/arrow-datafusion"
 readme = "../README.md"
diff --git a/dev/release/update_change_log-ballista.sh b/dev/release/update_change_log-ballista.sh
new file mode 100755
index 0000000000000..68193156622a2
--- /dev/null
+++ b/dev/release/update_change_log-ballista.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# Usage:
+# CHANGELOG_GITHUB_TOKEN=<TOKEN> ./update_change_log-ballista.sh
+
+SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)"
+
+CURRENT_VER=$(grep version "${SOURCE_TOP_DIR}/ballista/rust/client/Cargo.toml" | head -n 1 | awk '{print $3}' | tr -d '"')
+${SOURCE_DIR}/update_change_log.sh ballista 4.0.0 "ballista-${CURRENT_VER}"
diff --git a/dev/release/update_change_log-datafusion.sh b/dev/release/update_change_log-datafusion.sh
new file mode 100755
index 0000000000000..f0f455ad1c9b5
--- /dev/null
+++ b/dev/release/update_change_log-datafusion.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# Usage:
+# CHANGELOG_GITHUB_TOKEN=<TOKEN> ./update_change_log-datafusion.sh
+
+SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)"
+
+CURRENT_VER=$(grep version "${SOURCE_TOP_DIR}/datafusion/Cargo.toml" | head -n 1 | awk '{print $3}' | tr -d '"')
+${SOURCE_DIR}/update_change_log.sh datafusion 4.0.0 "${CURRENT_VER}"
diff --git a/dev/release/update_change_log-python.sh b/dev/release/update_change_log-python.sh
new file mode 100755
index 0000000000000..a48a5b657c5f3
--- /dev/null
+++ b/dev/release/update_change_log-python.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# Usage:
+# CHANGELOG_GITHUB_TOKEN=<TOKEN> ./update_change_log-python.sh
+
+SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)"
+
+CURRENT_VER=$(grep version "${SOURCE_TOP_DIR}/python/Cargo.toml" | head -n 1 | awk '{print $3}' | tr -d '"')
+${SOURCE_DIR}/update_change_log.sh python 4.0.0 "python-${CURRENT_VER}"
diff --git a/dev/release/update_change_log.sh b/dev/release/update_change_log.sh
index 4ee9e2eb1e498..0c9c2332ce704 100755
--- a/dev/release/update_change_log.sh
+++ b/dev/release/update_change_log.sh
@@ -27,13 +27,23 @@
 # arrow-datafusion/.github_changelog_generator
 #
 # Usage:
-# CHANGELOG_GITHUB_TOKEN=<TOKEN> ./update_change_log.sh
+# CHANGELOG_GITHUB_TOKEN=<TOKEN> ./update_change_log.sh <PROJECT> <FROM_VER> <TO_VER>
 
 set -e
 
 SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)"
 
+if [[ "$#" -ne 3 ]]; then
+    echo "USAGE: $0 PROJECT FROM_VER TO_VER"
+    exit 1
+fi
+
+PROJECT=$1
+FROM_VER=$2
+TO_VER=$3
+OUTPUT_PATH="${PROJECT}/CHANGELOG.md"
+
 pushd ${SOURCE_TOP_DIR}
 docker run -it --rm \
     -e CHANGELOG_GITHUB_TOKEN=$CHANGELOG_GITHUB_TOKEN \
@@ -41,7 +51,30 @@ docker run -it --rm \
     githubchangeloggenerator/github-changelog-generator \
     --user apache \
     --project arrow-datafusion \
-    --since-tag 4.0.0 \
-    --future-release 5.0.0
+    --since-tag "${FROM_VER}" \
+    --include-labels "${PROJECT}" \
+    --output "${OUTPUT_PATH}" \
+    --future-release "${TO_VER}"
+
+sed -i "s/\\\n/\n\n/" "${OUTPUT_PATH}"
+
+echo '<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
 
-sed -i "s/\\\n/\n\n/" CHANGELOG.md
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+' | cat - "${OUTPUT_PATH}" > "${OUTPUT_PATH}".tmp
+mv "${OUTPUT_PATH}".tmp "${OUTPUT_PATH}"
diff --git a/dev/update_arrow_deps.py b/dev/update_arrow_deps.py
index 44bdf4235d1c6..69fcdc84ab8fd 100755
--- a/dev/update_arrow_deps.py
+++ b/dev/update_arrow_deps.py
@@ -17,7 +17,7 @@
 # limitations under the License.
 #
 
-# Script that updates the arrow dependencies in datafusion and ballista, locall
+# Script that updates the arrow dependencies in datafusion and ballista, locally
 #
 # installation:
 # pip install tomlkit requests
diff --git a/dev/update_ballista_versions.py b/dev/update_ballista_versions.py
new file mode 100755
index 0000000000000..cb75a9a71c479
--- /dev/null
+++ b/dev/update_ballista_versions.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Script that updates verions for ballista crates, locally
+#
+# dependencies:
+# pip install tomlkit
+
+import os
+import argparse
+from pathlib import Path
+import tomlkit
+
+
+def update_cargo_toml(cargo_toml: str, new_version: str):
+    print(f'updating {cargo_toml}')
+    with open(cargo_toml) as f:
+        data = f.read()
+
+    doc = tomlkit.parse(data)
+    doc.get('package')['version'] = new_version
+
+    with open(cargo_toml, 'w') as f:
+        f.write(tomlkit.dumps(doc))
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Update ballista crate versions.')
+    parser.add_argument('new_version', type=str, help='new ballista version')
+    args = parser.parse_args()
+
+    repo_root = Path(__file__).parent.parent.absolute()
+    ballista_crates = set([
+        os.path.join(repo_root, rel_path, "Cargo.toml")
+        for rel_path in [
+            'ballista-examples',
+            'ballista/rust/core',
+            'ballista/rust/scheduler',
+            'ballista/rust/executor',
+            'ballista/rust/client',
+        ]
+    ])
+    new_version = args.new_version
+
+    print(f'Updating ballista versions in {repo_root} to {new_version}')
+
+    for cargo_toml in ballista_crates:
+        update_cargo_toml(cargo_toml, new_version)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/python/CHANGELOG.md b/python/CHANGELOG.md
new file mode 100644
index 0000000000000..d8bdd13e8ce0a
--- /dev/null
+++ b/python/CHANGELOG.md
@@ -0,0 +1,70 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+For older versions, see [apache/arrow/CHANGELOG.md](https://github.com/apache/arrow/blob/master/CHANGELOG.md)
+
+# Changelog
+
+## [python-0.3.0](https://github.com/apache/arrow-datafusion/tree/python-0.3.0) (2021-07-31)
+
+[Full Changelog](https://github.com/apache/arrow-datafusion/compare/4.0.0...python-0.3.0)
+
+**Implemented enhancements:**
+
+- add more math functions and unit tests to `python` crate  [\#748](https://github.com/apache/arrow-datafusion/pull/748) ([Jimexist](https://github.com/Jimexist))
+- Implement missing join types for Python dataframe [\#503](https://github.com/apache/arrow-datafusion/pull/503) ([Dandandan](https://github.com/Dandandan))
+- Add missing functions to python [\#388](https://github.com/apache/arrow-datafusion/pull/388) ([jgoday](https://github.com/jgoday))
+
+**Fixed bugs:**
+
+- fix maturin version in pyproject.toml [\#756](https://github.com/apache/arrow-datafusion/pull/756) ([Jimexist](https://github.com/Jimexist))
+- fix pyarrow type id mapping in `python` crate [\#742](https://github.com/apache/arrow-datafusion/pull/742) ([Jimexist](https://github.com/Jimexist))
+
+**Closed issues:**
+
+- arrow::util::pretty::pretty\_format\_batches missing [\#769](https://github.com/apache/arrow-datafusion/issues/769)
+- move the `assert_batches_eq!` macros to a non part of datafusion [\#745](https://github.com/apache/arrow-datafusion/issues/745)
+- fix an issue where aliases are not respected in generating downstream schemas in window expr [\#592](https://github.com/apache/arrow-datafusion/issues/592)
+- make the planner to print more succinct and useful information in window function explain clause [\#526](https://github.com/apache/arrow-datafusion/issues/526)
+- move window frame module to be in `logical_plan` [\#517](https://github.com/apache/arrow-datafusion/issues/517)
+- use a more rust idiomatic way of handling nth\_value [\#448](https://github.com/apache/arrow-datafusion/issues/448)
+- create a test with more than one partition for window functions [\#435](https://github.com/apache/arrow-datafusion/issues/435)
+- Implement hash-partitioned hash aggregate [\#27](https://github.com/apache/arrow-datafusion/issues/27)
+- Consider using GitHub pages for DataFusion/Ballista documentation [\#18](https://github.com/apache/arrow-datafusion/issues/18)
+- Update "repository" in Cargo.toml [\#16](https://github.com/apache/arrow-datafusion/issues/16)
+
+**Merged pull requests:**
+
+- fix python binding for `concat`, `concat_ws`, and `random` [\#768](https://github.com/apache/arrow-datafusion/pull/768) ([Jimexist](https://github.com/Jimexist))
+- fix 226, make `concat`, `concat_ws`, and `random` work with `Python` crate [\#761](https://github.com/apache/arrow-datafusion/pull/761) ([Jimexist](https://github.com/Jimexist))
+- fix python crate with the changes to logical plan builder [\#650](https://github.com/apache/arrow-datafusion/pull/650) ([Jimexist](https://github.com/Jimexist))
+- use nightly nightly-2021-05-10 [\#536](https://github.com/apache/arrow-datafusion/pull/536) ([Jimexist](https://github.com/Jimexist))
+- Define the unittests using pytest [\#493](https://github.com/apache/arrow-datafusion/pull/493) ([kszucs](https://github.com/kszucs))
+- use requirements.txt to formalize python deps [\#484](https://github.com/apache/arrow-datafusion/pull/484) ([Jimexist](https://github.com/Jimexist))
+- update cargo.toml in python crate and fix unit test due to hash joins [\#483](https://github.com/apache/arrow-datafusion/pull/483) ([Jimexist](https://github.com/Jimexist))
+- simplify python function definitions [\#477](https://github.com/apache/arrow-datafusion/pull/477) ([Jimexist](https://github.com/Jimexist))
+- Expose DataFrame::sort in the python bindings [\#469](https://github.com/apache/arrow-datafusion/pull/469) ([kszucs](https://github.com/kszucs))
+- Revert "Revert "Add datafusion-python  \(\#69\)" \(\#257\)" [\#270](https://github.com/apache/arrow-datafusion/pull/270) ([andygrove](https://github.com/andygrove))
+- Revert "Add datafusion-python  \(\#69\)" [\#257](https://github.com/apache/arrow-datafusion/pull/257) ([andygrove](https://github.com/andygrove))
+- update arrow-rs deps to latest master [\#216](https://github.com/apache/arrow-datafusion/pull/216) ([alamb](https://github.com/alamb))
+- Add datafusion-python  [\#69](https://github.com/apache/arrow-datafusion/pull/69) ([jorgecarleitao](https://github.com/jorgecarleitao))
+
+
+
+\* *This Changelog was automatically generated by [github_changelog_generator](https://github.com/github-changelog-generator/github-changelog-generator)*
diff --git a/python/Cargo.toml b/python/Cargo.toml
index fe84e5234c333..60cc74dfc89e0 100644
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -17,7 +17,7 @@
 
 [package]
 name = "datafusion"
-version = "0.2.1"
+version = "0.3.0"
 homepage = "https://github.com/apache/arrow"
 repository = "https://github.com/apache/arrow"
 authors = ["Apache Arrow <dev@arrow.apache.org>"]