From fa728736bffb44af2d2e7fd3a890678f3e807cfd Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 4 Feb 2022 07:22:22 -0500 Subject: [PATCH] Prepare for 9.0.0 release: Update version + CHANGELOG (#1265) * Update version to 9.0.0 * Update changelog generator script * Initial changelog * Updates * Update again * rat --- CHANGELOG.md | 66 +++++++++++++++++++ arrow-flight/Cargo.toml | 4 +- arrow-pyarrow-integration-testing/Cargo.toml | 4 +- arrow/Cargo.toml | 2 +- arrow/README.md | 2 +- .../dependency/default-features/Cargo.toml | 2 +- .../dependency/no-default-features/Cargo.toml | 2 +- arrow/test/dependency/simd/Cargo.toml | 2 +- dev/release/README.md | 2 +- dev/release/update_change_log.sh | 4 +- integration-testing/Cargo.toml | 2 +- parquet/Cargo.toml | 6 +- parquet_derive/Cargo.toml | 4 +- parquet_derive/README.md | 4 +- .../dependency/default-features/Cargo.toml | 2 +- parquet_derive_test/Cargo.toml | 6 +- 16 files changed, 90 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 49ca3aa1fed5..b7bd321004bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,72 @@ # Changelog +## [9.0.0](https://github.com/apache/arrow-rs/tree/9.0.0) (2022-02-04) + +[Full Changelog](https://github.com/apache/arrow-rs/compare/8.0.0...9.0.0) + +**Breaking changes:** + +- Add `Send` + `Sync` to `DataType`, `RowGroupReader`, `FileReader`, `ChunkReader`. [\#1264](https://github.com/apache/arrow-rs/issues/1264) +- Rename the function `Bitmap::len` to `Bitmap::bit_len` to clarify its meaning [\#1242](https://github.com/apache/arrow-rs/pull/1242) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670)) +- Remove unused / broken `memory-check` feature [\#1222](https://github.com/apache/arrow-rs/pull/1222) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann)) +- Potentially buffer multiple `RecordBatches` before writing a parquet row group in `ArrowWriter` [\#1214](https://github.com/apache/arrow-rs/pull/1214) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold)) + +**Implemented enhancements:** + +- Add `async` arrow parquet reader [\#1154](https://github.com/apache/arrow-rs/pull/1154) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold)) +- Rename `Bitmap::len` to `Bitmap::bit_len` [\#1233](https://github.com/apache/arrow-rs/issues/1233) +- Extend CSV schema inference to allow scientific notation for floating point types [\#1215](https://github.com/apache/arrow-rs/issues/1215) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] +- Write Multiple RecordBatch to Parquet Row Group [\#1211](https://github.com/apache/arrow-rs/issues/1211) +- Add doc examples for `eq_dyn` etc. [\#1202](https://github.com/apache/arrow-rs/issues/1202) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] +- Add comparison kernels for `BinaryArray` [\#1108](https://github.com/apache/arrow-rs/issues/1108) +- `impl ArrowNativeType for i128` [\#1098](https://github.com/apache/arrow-rs/issues/1098) +- Remove `Copy` trait bound from dyn scalar kernels [\#1243](https://github.com/apache/arrow-rs/pull/1243) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([matthewmturner](https://github.com/matthewmturner)) +- Add `into_inner` for IPC `FileWriter` [\#1236](https://github.com/apache/arrow-rs/pull/1236) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([yjshen](https://github.com/yjshen)) +- \[Minor\]Re-export `array::builder::make_builder` to make it available for downstream [\#1235](https://github.com/apache/arrow-rs/pull/1235) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([yjshen](https://github.com/yjshen)) + +**Fixed bugs:** + +- Parquet v8.0.0 panics when reading all null column to NullArray [\#1245](https://github.com/apache/arrow-rs/issues/1245) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] +- Get `Unknown configuration option rust-version` when running the rust format command [\#1240](https://github.com/apache/arrow-rs/issues/1240) +- `Bitmap` Length Validation is Incorrect [\#1231](https://github.com/apache/arrow-rs/issues/1231) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] +- Writing sliced `ListArray` or `MapArray` ignore offsets [\#1226](https://github.com/apache/arrow-rs/issues/1226) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] +- Remove broken `memory-tracking` crate feature [\#1171](https://github.com/apache/arrow-rs/issues/1171) +- Revert making `parquet::data_type` and `parquet::arrow::schema` experimental [\#1244](https://github.com/apache/arrow-rs/pull/1244) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold)) + +**Documentation updates:** + +- Update parquet crate documentation and examples [\#1253](https://github.com/apache/arrow-rs/pull/1253) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) +- Refresh parquet readme / contributing guide [\#1252](https://github.com/apache/arrow-rs/pull/1252) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb)) +- Add docs examples for dynamically compare functions [\#1250](https://github.com/apache/arrow-rs/pull/1250) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670)) +- Add Rust Docs examples for UnionArray [\#1241](https://github.com/apache/arrow-rs/pull/1241) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670)) +- Improve documentation for Bitmap [\#1237](https://github.com/apache/arrow-rs/pull/1237) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) + +**Performance improvements:** + +- Improve performance for arithmetic kernels with `simd` feature enabled \(except for division/modulo\) [\#1221](https://github.com/apache/arrow-rs/pull/1221) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann)) +- Do not concatenate identical dictionaries [\#1219](https://github.com/apache/arrow-rs/pull/1219) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold)) +- Preserve dictionary encoding when decoding parquet into Arrow arrays, 60x perf improvement \(\#171\) [\#1180](https://github.com/apache/arrow-rs/pull/1180) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold)) + +**Closed issues:** + +- `UnalignedBitChunkIterator` to that iterates through already aligned `u64` blocks [\#1227](https://github.com/apache/arrow-rs/issues/1227) +- Remove unused `ArrowArrayReader` in parquet [\#1197](https://github.com/apache/arrow-rs/issues/1197) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] + +**Merged pull requests:** + +- Upgrade clap to 3.0.0 [\#1261](https://github.com/apache/arrow-rs/pull/1261) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Jimexist](https://github.com/Jimexist)) +- Update chrono-tz requirement from 0.4 to 0.6 [\#1259](https://github.com/apache/arrow-rs/pull/1259) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot)) +- Update zstd requirement from 0.9 to 0.10 [\#1257](https://github.com/apache/arrow-rs/pull/1257) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([dependabot[bot]](https://github.com/apps/dependabot)) +- Fix NullArrayReader \(\#1245\) [\#1246](https://github.com/apache/arrow-rs/pull/1246) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold)) +- dyn compare for binary array [\#1238](https://github.com/apache/arrow-rs/pull/1238) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670)) +- Remove arrow array reader \(\#1197\) [\#1234](https://github.com/apache/arrow-rs/pull/1234) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold)) +- Fix null bitmap length validation \(\#1231\) [\#1232](https://github.com/apache/arrow-rs/pull/1232) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold)) +- Faster bitmask iteration [\#1228](https://github.com/apache/arrow-rs/pull/1228) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold)) +- Add non utf8 values into the test cases of BinaryArray comparison [\#1220](https://github.com/apache/arrow-rs/pull/1220) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670)) +- Update DECIMAL\_RE to allow scientific notation in auto inferred schemas [\#1216](https://github.com/apache/arrow-rs/pull/1216) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([pjmore](https://github.com/pjmore)) + + ## [8.0.0](https://github.com/apache/arrow-rs/tree/8.0.0) (2022-01-20) [Full Changelog](https://github.com/apache/arrow-rs/compare/7.0.0...8.0.0) diff --git a/arrow-flight/Cargo.toml b/arrow-flight/Cargo.toml index e619c22ac5d9..350e49b8a766 100644 --- a/arrow-flight/Cargo.toml +++ b/arrow-flight/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "arrow-flight" description = "Apache Arrow Flight" -version = "8.0.0" +version = "9.0.0" edition = "2021" rust-version = "1.57" authors = ["Apache Arrow "] @@ -27,7 +27,7 @@ repository = "https://github.com/apache/arrow-rs" license = "Apache-2.0" [dependencies] -arrow = { path = "../arrow", version = "8.0.0" } +arrow = { path = "../arrow", version = "9.0.0" } base64 = "0.13" tonic = "0.6" bytes = "1" diff --git a/arrow-pyarrow-integration-testing/Cargo.toml b/arrow-pyarrow-integration-testing/Cargo.toml index 746684c5d7cc..2ff769f01335 100644 --- a/arrow-pyarrow-integration-testing/Cargo.toml +++ b/arrow-pyarrow-integration-testing/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "arrow-pyarrow-integration-testing" description = "" -version = "8.0.0" +version = "9.0.0" homepage = "https://github.com/apache/arrow-rs" repository = "https://github.com/apache/arrow-rs" authors = ["Apache Arrow "] @@ -32,7 +32,7 @@ name = "arrow_pyarrow_integration_testing" crate-type = ["cdylib"] [dependencies] -arrow = { path = "../arrow", version = "8.0.0", features = ["pyarrow"] } +arrow = { path = "../arrow", version = "9.0.0", features = ["pyarrow"] } pyo3 = { version = "0.15", features = ["extension-module"] } [package.metadata.maturin] diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml index e5f63d834893..362dacefb0ad 100644 --- a/arrow/Cargo.toml +++ b/arrow/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "arrow" -version = "8.0.0" +version = "9.0.0" description = "Rust implementation of Apache Arrow" homepage = "https://github.com/apache/arrow-rs" repository = "https://github.com/apache/arrow-rs" diff --git a/arrow/README.md b/arrow/README.md index 676e305794bc..256a07edcc41 100644 --- a/arrow/README.md +++ b/arrow/README.md @@ -31,7 +31,7 @@ This crate is tested with the latest stable version of Rust. We do not currently The arrow crate follows the [SemVer standard](https://doc.rust-lang.org/cargo/reference/semver.html) defined by Cargo and works well within the Rust crate ecosystem. -However, for historical reasons, this crate uses versions with major numbers greater than `0.x` (e.g. `8.0.0`), unlike many other crates in the Rust ecosystem which spend extended time releasing versions `0.x` to signal planned ongoing API changes. Minor arrow releases contain only compatible changes, while major releases may contain breaking API changes. +However, for historical reasons, this crate uses versions with major numbers greater than `0.x` (e.g. `9.0.0`), unlike many other crates in the Rust ecosystem which spend extended time releasing versions `0.x` to signal planned ongoing API changes. Minor arrow releases contain only compatible changes, while major releases may contain breaking API changes. ## Features diff --git a/arrow/test/dependency/default-features/Cargo.toml b/arrow/test/dependency/default-features/Cargo.toml index 49f7c8651c15..35ca6c1d70f1 100644 --- a/arrow/test/dependency/default-features/Cargo.toml +++ b/arrow/test/dependency/default-features/Cargo.toml @@ -25,6 +25,6 @@ rust-version = "1.57" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -arrow = { path = "../../../../arrow", version = "8.0.0" } +arrow = { path = "../../../../arrow", version = "9.0.0" } [workspace] diff --git a/arrow/test/dependency/no-default-features/Cargo.toml b/arrow/test/dependency/no-default-features/Cargo.toml index 6f2e8eb7e343..a6a142c2319a 100644 --- a/arrow/test/dependency/no-default-features/Cargo.toml +++ b/arrow/test/dependency/no-default-features/Cargo.toml @@ -25,6 +25,6 @@ rust-version = "1.57" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -arrow = { path = "../../../../arrow", version = "8.0.0", default-features = false } +arrow = { path = "../../../../arrow", version = "9.0.0", default-features = false } [workspace] diff --git a/arrow/test/dependency/simd/Cargo.toml b/arrow/test/dependency/simd/Cargo.toml index 30981c502ba4..b3fa5f288837 100644 --- a/arrow/test/dependency/simd/Cargo.toml +++ b/arrow/test/dependency/simd/Cargo.toml @@ -25,6 +25,6 @@ rust-version = "1.57" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -arrow = { path = "../../../../arrow", version = "8.0.0", features = ["simd"]} +arrow = { path = "../../../../arrow", version = "9.0.0", features = ["simd"]} [workspace] diff --git a/dev/release/README.md b/dev/release/README.md index 6c76db42151e..74edb0fa502a 100644 --- a/dev/release/README.md +++ b/dev/release/README.md @@ -23,7 +23,7 @@ We try to release a new version of Arrow every two weeks. This cadence balances getting new features into arrow without overwhelming downstream projects with too frequent changes. -If any code has been merged to master that has a breaking API change, as defined in [Rust RFC 1105](https://github.com/rust-lang/rfcs/blob/master/text/1105-api-evolution.md), the major version number incremented changed (e.g. `8.0.0` to `8.0.0`). Otherwise the new minor version incremented (e.g. `8.0.0` to `7.1.0`). +If any code has been merged to master that has a breaking API change, as defined in [Rust RFC 1105](https://github.com/rust-lang/rfcs/blob/master/text/1105-api-evolution.md), the major version number incremented changed (e.g. `9.0.0` to `9.0.0`). Otherwise the new minor version incremented (e.g. `9.0.0` to `7.1.0`). # Release Mechanics diff --git a/dev/release/update_change_log.sh b/dev/release/update_change_log.sh index d6a4d637f523..cc5f57acfe1f 100755 --- a/dev/release/update_change_log.sh +++ b/dev/release/update_change_log.sh @@ -40,5 +40,5 @@ docker run -it --rm -e CHANGELOG_GITHUB_TOKEN=$CHANGELOG_GITHUB_TOKEN -v "$(pwd) --cache-log=.githubchangeloggenerator.cache.log \ --http-cache \ --max-issues=300 \ - --since-tag 7.0.0 \ - --future-release 8.0.0 + --since-tag 8.0.0 \ + --future-release 9.0.0 diff --git a/integration-testing/Cargo.toml b/integration-testing/Cargo.toml index 03c2abf0b6e6..0196578e0dcb 100644 --- a/integration-testing/Cargo.toml +++ b/integration-testing/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "arrow-integration-testing" description = "Binaries used in the Arrow integration tests" -version = "8.0.0" +version = "9.0.0" homepage = "https://github.com/apache/arrow-rs" repository = "https://github.com/apache/arrow-rs" authors = ["Apache Arrow "] diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml index ad68ae2f4997..6b02da186091 100644 --- a/parquet/Cargo.toml +++ b/parquet/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "parquet" -version = "8.0.0" +version = "9.0.0" license = "Apache-2.0" description = "Apache Parquet implementation in Rust" homepage = "https://github.com/apache/arrow-rs" @@ -40,7 +40,7 @@ lz4 = { version = "1.23", optional = true } zstd = { version = "0.10", optional = true } chrono = { version = "0.4", default-features = false } num-bigint = "0.4" -arrow = { path = "../arrow", version = "8.0.0", optional = true, default-features = false, features = ["ipc"] } +arrow = { path = "../arrow", version = "9.0.0", optional = true, default-features = false, features = ["ipc"] } base64 = { version = "0.13", optional = true } clap = { version = "3", optional = true, features = ["derive", "env"] } serde_json = { version = "1.0", features = ["preserve_order"], optional = true } @@ -57,7 +57,7 @@ brotli = "3.3" flate2 = "1.0" lz4 = "1.23" serde_json = { version = "1.0", features = ["preserve_order"] } -arrow = { path = "../arrow", version = "8.0.0", default-features = false, features = ["test_utils", "prettyprint"] } +arrow = { path = "../arrow", version = "9.0.0", default-features = false, features = ["test_utils", "prettyprint"] } [features] default = ["arrow", "snap", "brotli", "flate2", "lz4", "zstd", "base64"] diff --git a/parquet_derive/Cargo.toml b/parquet_derive/Cargo.toml index 92980db453d2..689d56eb05dd 100644 --- a/parquet_derive/Cargo.toml +++ b/parquet_derive/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "parquet_derive" -version = "8.0.0" +version = "9.0.0" license = "Apache-2.0" description = "Derive macros for the Rust implementation of Apache Parquet" homepage = "https://github.com/apache/arrow-rs" @@ -35,4 +35,4 @@ proc-macro = true proc-macro2 = "1.0" quote = "1.0" syn = { version = "1.0", features = ["full", "extra-traits"] } -parquet = { path = "../parquet", version = "8.0.0" } +parquet = { path = "../parquet", version = "9.0.0" } diff --git a/parquet_derive/README.md b/parquet_derive/README.md index 8909f2dc4971..742bcdc7e98f 100644 --- a/parquet_derive/README.md +++ b/parquet_derive/README.md @@ -32,8 +32,8 @@ Add this to your Cargo.toml: ```toml [dependencies] -parquet = "8.0.0" -parquet_derive = "8.0.0" +parquet = "9.0.0" +parquet_derive = "9.0.0" ``` and this to your crate root: diff --git a/parquet_derive/test/dependency/default-features/Cargo.toml b/parquet_derive/test/dependency/default-features/Cargo.toml index e8c2c94517b2..62eb09abdde3 100644 --- a/parquet_derive/test/dependency/default-features/Cargo.toml +++ b/parquet_derive/test/dependency/default-features/Cargo.toml @@ -25,7 +25,7 @@ rust-version = "1.57" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -parquet_derive = { path = "../../../../parquet_derive", version = "8.0.0" } +parquet_derive = { path = "../../../../parquet_derive", version = "9.0.0" } # Keep this out of the default workspace [workspace] diff --git a/parquet_derive_test/Cargo.toml b/parquet_derive_test/Cargo.toml index 8571ee2222b5..e22b674e7cc2 100644 --- a/parquet_derive_test/Cargo.toml +++ b/parquet_derive_test/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "parquet_derive_test" -version = "8.0.0" +version = "9.0.0" license = "Apache-2.0" description = "Integration test package for parquet-derive" homepage = "https://github.com/apache/arrow-rs" @@ -29,6 +29,6 @@ publish = false rust-version = "1.57" [dependencies] -parquet = { path = "../parquet", version = "8.0.0" } -parquet_derive = { path = "../parquet_derive", version = "8.0.0" } +parquet = { path = "../parquet", version = "9.0.0" } +parquet_derive = { path = "../parquet_derive", version = "9.0.0" } chrono = "0.4.19"