From 68dca4f93cdd1133d38c0e137828c4508db826f3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 29 Oct 2024 05:31:29 +0900 Subject: [PATCH 01/59] MINOR: [CI] Bump actions/setup-dotnet from 4.0.1 to 4.1.0 (#44545) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [actions/setup-dotnet](https://github.com/actions/setup-dotnet) from 4.0.1 to 4.1.0.
Release notes

Sourced from actions/setup-dotnet's releases.

v4.1.0

What's Changed

Bug fixes :

Dependency updates :

New Contributors

Full Changelog: https://github.com/actions/setup-dotnet/compare/v4...v4.1.0

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/setup-dotnet&package-manager=github_actions&previous-version=4.0.1&new-version=4.1.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- .github/workflows/csharp.yml | 6 +++--- .github/workflows/dev.yml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/csharp.yml b/.github/workflows/csharp.yml index a608888c7e35f..95661a816f8e3 100644 --- a/.github/workflows/csharp.yml +++ b/.github/workflows/csharp.yml @@ -54,7 +54,7 @@ jobs: dotnet: ['8.0.x'] steps: - name: Install C# - uses: actions/setup-dotnet@v4.0.1 + uses: actions/setup-dotnet@v4.1.0 with: dotnet-version: ${{ matrix.dotnet }} - name: Setup Python @@ -86,7 +86,7 @@ jobs: dotnet: ['8.0.x'] steps: - name: Install C# - uses: actions/setup-dotnet@v4.0.1 + uses: actions/setup-dotnet@v4.1.0 with: dotnet-version: ${{ matrix.dotnet }} - name: Checkout Arrow @@ -113,7 +113,7 @@ jobs: dotnet: ['8.0.x'] steps: - name: Install C# - uses: actions/setup-dotnet@v4.0.1 + uses: actions/setup-dotnet@v4.1.0 with: dotnet-version: ${{ matrix.dotnet }} - name: Setup Python diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index a6d403af7470f..0651e893ec0b3 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -117,7 +117,7 @@ jobs: with: ruby-version: ruby - name: Install .NET - uses: actions/setup-dotnet@6bd8b7f7774af54e05809fcc5431931b3eb1ddee # v4.0.1 + uses: actions/setup-dotnet@3e891b0cb619bf60e2c25674b222b8940e2c1c25 # v4.1.0 with: dotnet-version: '8.0.x' - name: Install Dependencies From ed5be7a1c61c7e2876094dcb2af6ba29d331bb8d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 29 Oct 2024 05:32:49 +0900 Subject: [PATCH 02/59] MINOR: [CI] Bump actions/setup-python from 5.2.0 to 5.3.0 (#44546) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [actions/setup-python](https://github.com/actions/setup-python) from 5.2.0 to 5.3.0.
Release notes

Sourced from actions/setup-python's releases.

v5.3.0

What's Changed

Bug Fixes:

Enhancements:

New Contributors

Full Changelog: https://github.com/actions/setup-python/compare/v5...v5.3.0

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/setup-python&package-manager=github_actions&previous-version=5.2.0&new-version=5.3.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- .github/workflows/archery.yml | 2 +- .github/workflows/comment_bot.yml | 2 +- .github/workflows/cpp.yml | 6 +++--- .github/workflows/csharp.yml | 4 ++-- .github/workflows/dev.yml | 4 ++-- .github/workflows/docs.yml | 2 +- .github/workflows/docs_light.yml | 2 +- .github/workflows/integration.yml | 2 +- .github/workflows/java.yml | 2 +- .github/workflows/java_jni.yml | 4 ++-- .github/workflows/java_nightly.yml | 2 +- .github/workflows/js.yml | 2 +- .github/workflows/pr_bot.yml | 2 +- .github/workflows/python.yml | 4 ++-- .github/workflows/r.yml | 4 ++-- .github/workflows/r_nightly.yml | 2 +- .github/workflows/ruby.yml | 2 +- .github/workflows/swift.yml | 2 +- 18 files changed, 25 insertions(+), 25 deletions(-) diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml index eaa2249950fb7..6dc4da306a1ea 100644 --- a/.github/workflows/archery.yml +++ b/.github/workflows/archery.yml @@ -65,7 +65,7 @@ jobs: shell: bash run: git branch $ARCHERY_DEFAULT_BRANCH origin/$ARCHERY_DEFAULT_BRANCH || true - name: Setup Python - uses: actions/setup-python@v5.2.0 + uses: actions/setup-python@v5.3.0 with: python-version: '3.9' - name: Install pygit2 binary wheel diff --git a/.github/workflows/comment_bot.yml b/.github/workflows/comment_bot.yml index b7af4c5800835..8885171f0ab3f 100644 --- a/.github/workflows/comment_bot.yml +++ b/.github/workflows/comment_bot.yml @@ -41,7 +41,7 @@ jobs: # fetch the tags for version number generation fetch-depth: 0 - name: Set up Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: 3.12 - name: Install Archery and Crossbow dependencies diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index 634448d0c8f25..996079e36a314 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -149,7 +149,7 @@ jobs: - name: Setup Python on hosted runner if: | matrix.runs-on == 'ubuntu-latest' - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: 3 - name: Setup Python on self-hosted runner @@ -257,7 +257,7 @@ jobs: $(brew --prefix bash)/bin/bash \ ci/scripts/install_minio.sh latest ${ARROW_HOME} - name: Set up Python - uses: actions/setup-python@v5.2.0 + uses: actions/setup-python@v5.3.0 with: python-version: 3.12 - name: Install Google Cloud Storage Testbench @@ -476,7 +476,7 @@ jobs: https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2024-09-13T20-26-02Z chmod +x /usr/local/bin/minio.exe - name: Set up Python - uses: actions/setup-python@v5.2.0 + uses: actions/setup-python@v5.3.0 id: python-install with: python-version: 3.9 diff --git a/.github/workflows/csharp.yml b/.github/workflows/csharp.yml index 95661a816f8e3..d4c681d1601cb 100644 --- a/.github/workflows/csharp.yml +++ b/.github/workflows/csharp.yml @@ -58,7 +58,7 @@ jobs: with: dotnet-version: ${{ matrix.dotnet }} - name: Setup Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: 3 - name: Checkout Arrow @@ -117,7 +117,7 @@ jobs: with: dotnet-version: ${{ matrix.dotnet }} - name: Setup Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: 3.12 - name: Checkout Arrow diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 0651e893ec0b3..e4d3cae96a1e5 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -50,7 +50,7 @@ jobs: with: fetch-depth: 0 - name: Setup Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: 3.12 - name: Install pre-commit @@ -109,7 +109,7 @@ jobs: with: fetch-depth: 0 - name: Install Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: '3.12' - name: Install Ruby diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 1219f7526f9f2..83f835d588af2 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -52,7 +52,7 @@ jobs: key: debian-docs-${{ hashFiles('cpp/**') }} restore-keys: debian-docs- - name: Setup Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: 3.12 - name: Setup Archery diff --git a/.github/workflows/docs_light.yml b/.github/workflows/docs_light.yml index 7d540b7cecdc9..0e23394e8a453 100644 --- a/.github/workflows/docs_light.yml +++ b/.github/workflows/docs_light.yml @@ -59,7 +59,7 @@ jobs: key: conda-docs-${{ hashFiles('cpp/**') }} restore-keys: conda-docs- - name: Setup Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: 3.12 - name: Setup Archery diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 763394dacdae7..da0e2004f1f3f 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -101,7 +101,7 @@ jobs: key: conda-${{ hashFiles('cpp/**') }} restore-keys: conda- - name: Setup Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: 3.12 - name: Setup Archery diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml index a27b3ef2854ca..f6b3fa748e6d2 100644 --- a/.github/workflows/java.yml +++ b/.github/workflows/java.yml @@ -83,7 +83,7 @@ jobs: key: maven-${{ hashFiles('java/**') }} restore-keys: maven- - name: Setup Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: 3.12 - name: Setup Archery diff --git a/.github/workflows/java_jni.yml b/.github/workflows/java_jni.yml index 59936e1cd9955..5682f8e84167e 100644 --- a/.github/workflows/java_jni.yml +++ b/.github/workflows/java_jni.yml @@ -77,7 +77,7 @@ jobs: key: java-jni-manylinux-2014-${{ hashFiles('cpp/**', 'java/**') }} restore-keys: java-jni-manylinux-2014- - name: Setup Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: 3.12 - name: Setup Archery @@ -119,7 +119,7 @@ jobs: key: maven-${{ hashFiles('java/**') }} restore-keys: maven- - name: Setup Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: 3.12 - name: Setup Archery diff --git a/.github/workflows/java_nightly.yml b/.github/workflows/java_nightly.yml index 0bf0c27288faf..436cc324ddc45 100644 --- a/.github/workflows/java_nightly.yml +++ b/.github/workflows/java_nightly.yml @@ -58,7 +58,7 @@ jobs: repository: ursacomputing/crossbow ref: main - name: Set up Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: cache: 'pip' python-version: 3.12 diff --git a/.github/workflows/js.yml b/.github/workflows/js.yml index dfad7de0b644f..810c154aa9c27 100644 --- a/.github/workflows/js.yml +++ b/.github/workflows/js.yml @@ -61,7 +61,7 @@ jobs: with: fetch-depth: 0 - name: Setup Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: 3.12 - name: Setup Archery diff --git a/.github/workflows/pr_bot.yml b/.github/workflows/pr_bot.yml index bbb1a2d7228d0..1eedacf1abf31 100644 --- a/.github/workflows/pr_bot.yml +++ b/.github/workflows/pr_bot.yml @@ -82,7 +82,7 @@ jobs: # fetch the tags for version number generation fetch-depth: 0 - name: Set up Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: 3.12 - name: Install Archery and Crossbow dependencies diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 91f09f6c661ae..72d6bfeab0404 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -114,7 +114,7 @@ jobs: key: ${{ matrix.cache }}-${{ hashFiles('cpp/**') }} restore-keys: ${{ matrix.cache }}- - name: Setup Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: 3.12 - name: Setup Archery @@ -183,7 +183,7 @@ jobs: fetch-depth: 0 submodules: recursive - name: Setup Python - uses: actions/setup-python@v5.2.0 + uses: actions/setup-python@v5.3.0 with: python-version: '3.11' - name: Install Dependencies diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index 3913ab8f022e8..c64822461f3e0 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -155,7 +155,7 @@ jobs: ubuntu-${{ matrix.ubuntu }}-r-${{ matrix.r }}-${{ hashFiles('cpp/src/**/*.cc','cpp/src/**/*.h)') }}- ubuntu-${{ matrix.ubuntu }}-r-${{ matrix.r }}- - name: Setup Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: 3.12 - name: Setup Archery @@ -214,7 +214,7 @@ jobs: fetch-depth: 0 submodules: recursive - name: Setup Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: 3.12 - name: Setup Archery diff --git a/.github/workflows/r_nightly.yml b/.github/workflows/r_nightly.yml index 9817e41d3b61d..4fcb399c91fc6 100644 --- a/.github/workflows/r_nightly.yml +++ b/.github/workflows/r_nightly.yml @@ -60,7 +60,7 @@ jobs: repository: ursacomputing/crossbow ref: main - name: Set up Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: cache: 'pip' python-version: 3.12 diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml index d6cc5c9b97cd4..53c76404e2ce4 100644 --- a/.github/workflows/ruby.yml +++ b/.github/workflows/ruby.yml @@ -90,7 +90,7 @@ jobs: key: ubuntu-${{ matrix.ubuntu }}-ruby-${{ hashFiles('cpp/**') }} restore-keys: ubuntu-${{ matrix.ubuntu }}-ruby- - name: Setup Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: 3.12 - name: Setup Archery diff --git a/.github/workflows/swift.yml b/.github/workflows/swift.yml index e241713cf73cd..2fd55d457c208 100644 --- a/.github/workflows/swift.yml +++ b/.github/workflows/swift.yml @@ -64,7 +64,7 @@ jobs: fetch-depth: 0 submodules: recursive - name: Setup Python on hosted runner - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: 3 - name: Setup Archery From b88d64871c737fa8cbc81f833fa551079cb2ca13 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 28 Oct 2024 15:24:10 -0700 Subject: [PATCH 03/59] MINOR: [C#] Bump Google.Protobuf from 3.28.2 to 3.28.3 in /csharp (#44547) Bumps [Google.Protobuf](https://github.com/protocolbuffers/protobuf) from 3.28.2 to 3.28.3.
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Google.Protobuf&package-manager=nuget&previous-version=3.28.2&new-version=3.28.3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Curt Hagenlocher --- csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj | 2 +- .../Apache.Arrow.Flight.TestWeb.csproj | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj index f15e127a30557..7fbba13b2cc0d 100644 --- a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj +++ b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj @@ -5,7 +5,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj b/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj index 2282c11c1ed39..2db42e40e108a 100644 --- a/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj +++ b/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj @@ -5,7 +5,7 @@ - + From 6c95dfa2771ad3462c3a4e7a9e74ab4409a79c92 Mon Sep 17 00:00:00 2001 From: Tyler White <50381805+IndexSeek@users.noreply.github.com> Date: Mon, 28 Oct 2024 20:44:26 -0400 Subject: [PATCH 04/59] MINOR: [Docs][Python] Document example for pc.index (#44537) ### Rationale for this change In another project, I've been referring to the PyArrow Compute documentation quite a lot, and I believe having more documentation here would be helpful. I added an example for Index, also to highlight the behavior of `start` and the -1 return if the value isn't found. ### What changes are included in this PR? Adding an example that should appear on the [pyarrow.compute.index](https://arrow.apache.org/docs/python/generated/pyarrow.compute.index.html#) documentation page. ### Are these changes tested? I ran this code using the latest version of PyArrow and ensured the outputs were as expected. I did not test building the documentation and rendering, although I would like to start doing this and learn more about this process for future documentation purposes. ### Are there any user-facing changes? A new example would be available on the website. Authored-by: Tyler White <50381805+IndexSeek@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- python/pyarrow/compute.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py index 83612f66d21e2..426ecae31c039 100644 --- a/python/pyarrow/compute.py +++ b/python/pyarrow/compute.py @@ -423,6 +423,18 @@ def index(data, value, start=None, end=None, *, memory_pool=None): ------- index : int the index, or -1 if not found + + Examples + -------- + >>> import pyarrow as pa + >>> import pyarrow.compute as pc + >>> arr = pa.array(["Lorem", "ipsum", "dolor", "sit", "Lorem", "ipsum"]) + >>> pc.index(arr, "ipsum") + + >>> pc.index(arr, "ipsum", start=2) + + >>> pc.index(arr, "amet") + """ if start is not None: if end is not None: From ea9b15ff941e7492e171cffee05af85b99306631 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Tue, 29 Oct 2024 11:57:52 -0400 Subject: [PATCH 05/59] MINOR: [R] Trim news for 18.0 release (#44562) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change The referenced change was included in the 17.0.0.1 CRAN submission (https://github.com/apache/arrow/issues/43317#issuecomment-2289034257) so we don't need to announce it here. As far as CRAN releases go, the performance regression was never released. ### What changes are included in this PR? rm ### Are these changes tested? 🙅 ### Are there any user-facing changes? Words --- r/NEWS.md | 1 - 1 file changed, 1 deletion(-) diff --git a/r/NEWS.md b/r/NEWS.md index afae9ae4e388e..9b9300c27062b 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -23,7 +23,6 @@ * Fix bindings to allow filtering a factor column in a Dataset using `%in%` (#43446) * Update `str_sub` binding to properly handle negative `end` values (@coussens, #44141) -* Fix summarize() performance regression (pushdown) (#43649) * Fix altrep string columns from readr (#43351) * Fix crash in ParquetFileWriter$WriteTable and add WriteBatch (#42241) * Fix bindings in Math group generics (@aboyoun, #43162) From 2a0f06c4a40ec64e1f4f046a7a8aa165329c66d6 Mon Sep 17 00:00:00 2001 From: mroz45 Date: Tue, 29 Oct 2024 22:00:56 +0100 Subject: [PATCH 06/59] GH-41706: [C++][Acero] Enhance asof_join to work in multi-threaded execution by sequencing input (#44083) ### Rationale for this change This is initial PR. I found that with specyfics parameters test fails. ### What changes are included in this PR? In this PR I provoke fail of asof_join_node_test. * GitHub Issue: #41706 Authored-by: kamilt Signed-off-by: Weston Pace --- cpp/src/arrow/acero/accumulation_queue.h | 1 + cpp/src/arrow/acero/asof_join_node.cc | 27 +++++++++++++++----- cpp/src/arrow/acero/asof_join_node_test.cc | 8 ++++-- cpp/src/arrow/acero/options.h | 9 +++++-- cpp/src/arrow/acero/source_node.cc | 5 ++-- cpp/src/arrow/acero/test_util_internal.cc | 4 +++ cpp/src/arrow/dataset/scanner.cc | 11 +++++--- python/pyarrow/_dataset.pyx | 9 +++++-- python/pyarrow/acero.py | 15 ++++++++--- python/pyarrow/includes/libarrow_dataset.pxd | 2 +- 10 files changed, 67 insertions(+), 24 deletions(-) diff --git a/cpp/src/arrow/acero/accumulation_queue.h b/cpp/src/arrow/acero/accumulation_queue.h index a27b8b399ce47..a173f9840388f 100644 --- a/cpp/src/arrow/acero/accumulation_queue.h +++ b/cpp/src/arrow/acero/accumulation_queue.h @@ -128,6 +128,7 @@ class SerialSequencingQueue { /// Strategy that describes how to handle items class Processor { public: + virtual ~Processor() = default; /// Process the batch /// /// This method will be called on each batch in order. Calls to this method diff --git a/cpp/src/arrow/acero/asof_join_node.cc b/cpp/src/arrow/acero/asof_join_node.cc index c4f11d01f3d5c..a5a80c8805fe4 100644 --- a/cpp/src/arrow/acero/asof_join_node.cc +++ b/cpp/src/arrow/acero/asof_join_node.cc @@ -16,6 +16,7 @@ // under the License. #include "arrow/acero/asof_join_node.h" +#include "arrow/acero/accumulation_queue.h" #include "arrow/acero/backpressure_handler.h" #include "arrow/acero/concurrent_queue_internal.h" @@ -471,7 +472,7 @@ class BackpressureController : public BackpressureControl { std::atomic& backpressure_counter_; }; -class InputState { +class InputState : public util::SerialSequencingQueue::Processor { // InputState corresponds to an input // Input record batches are queued up in InputState until processed and // turned into output record batches. @@ -482,7 +483,8 @@ class InputState { const std::shared_ptr& schema, const col_index_t time_col_index, const std::vector& key_col_index) - : queue_(std::move(handler)), + : sequencer_(util::SerialSequencingQueue::Make(this)), + queue_(std::move(handler)), schema_(schema), time_col_index_(time_col_index), key_col_index_(key_col_index), @@ -699,7 +701,16 @@ class InputState { DEBUG_MANIP(std::endl)); return updated; } + Status InsertBatch(ExecBatch batch) { + return sequencer_->InsertBatch(std::move(batch)); + } + Status Process(ExecBatch batch) override { + auto rb = *batch.ToRecordBatch(schema_); + DEBUG_SYNC(node_, "received batch from input ", index_, ":", DEBUG_MANIP(std::endl), + rb->ToString(), DEBUG_MANIP(std::endl)); + return Push(rb); + } void Rehash() { DEBUG_SYNC(node_, "rehashing for input ", index_, ":", DEBUG_MANIP(std::endl)); MemoStore new_memo(DEBUG_ADD(memo_.no_future_, node_, index_)); @@ -760,6 +771,8 @@ class InputState { } private: + // ExecBatch Sequencer + std::unique_ptr sequencer_; // Pending record batches. The latest is the front. Batches cannot be empty. BackpressureConcurrentQueue> queue_; // Schema associated with the input @@ -1399,6 +1412,9 @@ class AsofJoinNode : public ExecNode { // InputReceived may be called after execution was finished. Pushing it to the // InputState is unnecessary since we're done (and anyway may cause the // BackPressureController to pause the input, causing a deadlock), so drop it. + if (::arrow::compute::kUnsequencedIndex == batch.index) + return Status::Invalid("AsofJoin requires sequenced input"); + if (process_task_.is_finished()) { DEBUG_SYNC(this, "Input received while done. Short circuiting.", DEBUG_MANIP(std::endl)); @@ -1409,12 +1425,9 @@ class AsofJoinNode : public ExecNode { ARROW_DCHECK(std_has(inputs_, input)); size_t k = std_find(inputs_, input) - inputs_.begin(); - // Put into the queue - auto rb = *batch.ToRecordBatch(input->output_schema()); - DEBUG_SYNC(this, "received batch from input ", k, ":", DEBUG_MANIP(std::endl), - rb->ToString(), DEBUG_MANIP(std::endl)); + // Put into the sequencing queue + ARROW_RETURN_NOT_OK(state_.at(k)->InsertBatch(std::move(batch))); - ARROW_RETURN_NOT_OK(state_.at(k)->Push(rb)); PushProcess(true); return Status::OK(); diff --git a/cpp/src/arrow/acero/asof_join_node_test.cc b/cpp/src/arrow/acero/asof_join_node_test.cc index 5d3e9fba08bbf..2983888183a38 100644 --- a/cpp/src/arrow/acero/asof_join_node_test.cc +++ b/cpp/src/arrow/acero/asof_join_node_test.cc @@ -101,6 +101,7 @@ Result MakeBatchesFromNumString( BatchesWithSchema batches; batches.schema = schema; int n_fields = schema->num_fields(); + size_t batch_index = 0; for (auto num_batch : num_batches.batches) { Datum two(Int32Scalar(2)); std::vector values; @@ -128,6 +129,7 @@ Result MakeBatchesFromNumString( } } ExecBatch batch(values, num_batch.length); + batch.index = batch_index++; batches.batches.push_back(batch); } return batches; @@ -185,6 +187,7 @@ Result MutateByKey(BatchesWithSchema& batches, std::string fr replace_key ? batches.schema->SetField(from_index, new_field) : batches.schema->AddField(from_index, new_field)); } + size_t batch_index = 0; for (const ExecBatch& batch : batches.batches) { std::vector new_values; for (int i = 0; i < n_fields; i++) { @@ -233,6 +236,7 @@ Result MutateByKey(BatchesWithSchema& batches, std::string fr new_values.push_back(value); } new_batches.batches.emplace_back(new_values, batch.length); + new_batches.batches.back().index = batch_index++; } return new_batches; } @@ -1571,7 +1575,7 @@ void TestSequencing(BatchesMaker maker, int num_batches, int batch_size) { "asofjoin", {l_src, r_src}, GetRepeatedOptions(2, "time", {"key"}, 1000)}; QueryOptions query_options; - query_options.use_threads = false; + query_options.use_threads = true; ASSERT_OK_AND_ASSIGN(BatchesWithCommonSchema batches, DeclarationToExecBatches(asofjoin, query_options)); @@ -1579,7 +1583,7 @@ void TestSequencing(BatchesMaker maker, int num_batches, int batch_size) { } TEST(AsofJoinTest, BatchSequencing) { - return TestSequencing(MakeIntegerBatches, /*num_batches=*/32, /*batch_size=*/1); + return TestSequencing(MakeIntegerBatches, /*num_batches=*/1000, /*batch_size=*/1); } template diff --git a/cpp/src/arrow/acero/options.h b/cpp/src/arrow/acero/options.h index 4447e9c67a199..2beacfe26baa1 100644 --- a/cpp/src/arrow/acero/options.h +++ b/cpp/src/arrow/acero/options.h @@ -93,13 +93,18 @@ class ARROW_ACERO_EXPORT SourceNodeOptions : public ExecNodeOptions { public: /// Create an instance from values SourceNodeOptions(std::shared_ptr output_schema, - std::function>()> generator) - : output_schema(std::move(output_schema)), generator(std::move(generator)) {} + std::function>()> generator, + Ordering ordering = Ordering::Unordered()) + : output_schema(std::move(output_schema)), + generator(std::move(generator)), + ordering(std::move(ordering)) {} /// \brief the schema for batches that will be generated by this source std::shared_ptr output_schema; /// \brief an asynchronous stream of batches ending with std::nullopt std::function>()> generator; + + Ordering ordering = Ordering::Unordered(); }; /// \brief a node that generates data from a table already loaded in memory diff --git a/cpp/src/arrow/acero/source_node.cc b/cpp/src/arrow/acero/source_node.cc index 8060e01f074f8..ac34e4b6a09fc 100644 --- a/cpp/src/arrow/acero/source_node.cc +++ b/cpp/src/arrow/acero/source_node.cc @@ -106,7 +106,8 @@ struct SourceNode : ExecNode, public TracedNode { RETURN_NOT_OK(ValidateExecNodeInputs(plan, inputs, 0, "SourceNode")); const auto& source_options = checked_cast(options); return plan->EmplaceNode(plan, source_options.output_schema, - source_options.generator); + source_options.generator, + source_options.ordering); } const char* kind_name() const override { return "SourceNode"; } @@ -406,7 +407,7 @@ struct SchemaSourceNode : public SourceNode { struct RecordBatchReaderSourceNode : public SourceNode { RecordBatchReaderSourceNode(ExecPlan* plan, std::shared_ptr schema, arrow::AsyncGenerator> generator) - : SourceNode(plan, schema, generator) {} + : SourceNode(plan, schema, generator, Ordering::Implicit()) {} static Result Make(ExecPlan* plan, std::vector inputs, const ExecNodeOptions& options) { diff --git a/cpp/src/arrow/acero/test_util_internal.cc b/cpp/src/arrow/acero/test_util_internal.cc index f50ca92238dc4..107a20354c0e7 100644 --- a/cpp/src/arrow/acero/test_util_internal.cc +++ b/cpp/src/arrow/acero/test_util_internal.cc @@ -384,6 +384,7 @@ Result MakeIntegerBatches( int row = 0; for (int i = 0; i < num_batches; i++) { ARROW_ASSIGN_OR_RAISE(auto batch, MakeIntegerBatch(gens, schema, row, batch_size)); + batch.index = i; out.batches.push_back(std::move(batch)); row += batch_size; } @@ -410,6 +411,9 @@ BatchesWithSchema MakeBatchesFromString(const std::shared_ptr& schema, out_batches.batches.push_back(out_batches.batches[i]); } } + for (size_t batch_index = 0; batch_index < out_batches.batches.size(); ++batch_index) { + out_batches.batches[batch_index].index = batch_index; + } return out_batches; } diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc index a856a792a264f..0df8fd802656c 100644 --- a/cpp/src/arrow/dataset/scanner.cc +++ b/cpp/src/arrow/dataset/scanner.cc @@ -1032,11 +1032,11 @@ Result MakeScanNode(acero::ExecPlan* plan, } else { batch_gen = std::move(merged_batch_gen); } - + int64_t index = require_sequenced_output ? 0 : compute::kUnsequencedIndex; auto gen = MakeMappedGenerator( std::move(batch_gen), - [scan_options](const EnumeratedRecordBatch& partial) - -> Result> { + [scan_options, index](const EnumeratedRecordBatch& partial) mutable + -> Result> { // TODO(ARROW-13263) fragments may be able to attach more guarantees to batches // than this, for example parquet's row group stats. Failing to do this leaves // perf on the table because row group stats could be used to skip kernel execs in @@ -1057,9 +1057,12 @@ Result MakeScanNode(acero::ExecPlan* plan, batch->values.emplace_back(partial.record_batch.index); batch->values.emplace_back(partial.record_batch.last); batch->values.emplace_back(partial.fragment.value->ToString()); + if (index != compute::kUnsequencedIndex) batch->index = index++; return batch; }); + auto ordering = require_sequenced_output ? Ordering::Implicit() : Ordering::Unordered(); + auto fields = scan_options->dataset_schema->fields(); if (scan_options->add_augmented_fields) { for (const auto& aug_field : kAugmentedFields) { @@ -1069,7 +1072,7 @@ Result MakeScanNode(acero::ExecPlan* plan, return acero::MakeExecNode( "source", plan, {}, - acero::SourceNodeOptions{schema(std::move(fields)), std::move(gen)}); + acero::SourceNodeOptions{schema(std::move(fields)), std::move(gen), ordering}); } Result MakeAugmentedProjectNode(acero::ExecPlan* plan, diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx index 39e3f4d665d88..3a4fa1ab611a7 100644 --- a/python/pyarrow/_dataset.pyx +++ b/python/pyarrow/_dataset.pyx @@ -4067,11 +4067,14 @@ cdef class _ScanNodeOptions(ExecNodeOptions): def _set_options(self, Dataset dataset, dict scan_options): cdef: shared_ptr[CScanOptions] c_scan_options + bint require_sequenced_output=False c_scan_options = Scanner._make_scan_options(dataset, scan_options) + require_sequenced_output=scan_options.get("require_sequenced_output", False) + self.wrapped.reset( - new CScanNodeOptions(dataset.unwrap(), c_scan_options) + new CScanNodeOptions(dataset.unwrap(), c_scan_options, require_sequenced_output) ) @@ -4097,7 +4100,9 @@ class ScanNodeOptions(_ScanNodeOptions): dataset : pyarrow.dataset.Dataset The table which acts as the data source. **kwargs : dict, optional - Scan options. See `Scanner.from_dataset` for possible arguments. + Scan options. See `Scanner.from_dataset` for possible arguments. + require_sequenced_output : bool, default False + Assert implicit ordering on data. """ def __init__(self, Dataset dataset, **kwargs): diff --git a/python/pyarrow/acero.py b/python/pyarrow/acero.py index 77ba3ab1ce85d..706338bd8cdb8 100644 --- a/python/pyarrow/acero.py +++ b/python/pyarrow/acero.py @@ -56,8 +56,10 @@ class InMemoryDataset: ds = DatasetModuleStub -def _dataset_to_decl(dataset, use_threads=True): - decl = Declaration("scan", ScanNodeOptions(dataset, use_threads=use_threads)) +def _dataset_to_decl(dataset, use_threads=True, require_sequenced_output=False): + decl = Declaration("scan", ScanNodeOptions( + dataset, use_threads=use_threads, + require_sequenced_output=require_sequenced_output)) # Get rid of special dataset columns # "__fragment_index", "__batch_index", "__last_in_fragment", "__filename" @@ -311,13 +313,18 @@ def _perform_join_asof(left_operand, left_on, left_by, # Add the join node to the execplan if isinstance(left_operand, ds.Dataset): - left_source = _dataset_to_decl(left_operand, use_threads=use_threads) + left_source = _dataset_to_decl( + left_operand, + use_threads=use_threads, + require_sequenced_output=True) else: left_source = Declaration( "table_source", TableSourceNodeOptions(left_operand), ) if isinstance(right_operand, ds.Dataset): - right_source = _dataset_to_decl(right_operand, use_threads=use_threads) + right_source = _dataset_to_decl( + right_operand, use_threads=use_threads, + require_sequenced_output=True) else: right_source = Declaration( "table_source", TableSourceNodeOptions(right_operand) diff --git a/python/pyarrow/includes/libarrow_dataset.pxd b/python/pyarrow/includes/libarrow_dataset.pxd index ef1238e4154bb..d2fbcd0ee4d3b 100644 --- a/python/pyarrow/includes/libarrow_dataset.pxd +++ b/python/pyarrow/includes/libarrow_dataset.pxd @@ -51,7 +51,7 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil: CExpression filter cdef cppclass CScanNodeOptions "arrow::dataset::ScanNodeOptions"(CExecNodeOptions): - CScanNodeOptions(shared_ptr[CDataset] dataset, shared_ptr[CScanOptions] scan_options) + CScanNodeOptions(shared_ptr[CDataset] dataset, shared_ptr[CScanOptions] scan_options, bint require_sequenced_output) shared_ptr[CScanOptions] scan_options From 2c798858e956cfc21638cd2e6c64e3010dda6872 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Wed, 30 Oct 2024 06:17:44 +0900 Subject: [PATCH 07/59] MINOR: [Release] Update versions for 19.0.0-SNAPSHOT --- c_glib/meson.build | 2 +- c_glib/tool/generate-version-header.py | 1 + c_glib/vcpkg.json | 2 +- ci/scripts/PKGBUILD | 2 +- cpp/CMakeLists.txt | 2 +- cpp/vcpkg.json | 2 +- csharp/Directory.Build.props | 2 +- dev/tasks/homebrew-formulae/apache-arrow-glib.rb | 2 +- dev/tasks/homebrew-formulae/apache-arrow.rb | 2 +- docs/source/_static/versions.json | 9 +++++++-- java/adapter/avro/pom.xml | 2 +- java/adapter/jdbc/pom.xml | 2 +- java/adapter/orc/pom.xml | 2 +- java/algorithm/pom.xml | 2 +- java/bom/pom.xml | 2 +- java/c/pom.xml | 2 +- java/compression/pom.xml | 2 +- java/dataset/pom.xml | 2 +- java/flight/flight-core/pom.xml | 2 +- java/flight/flight-integration-tests/pom.xml | 2 +- java/flight/flight-sql-jdbc-core/pom.xml | 2 +- java/flight/flight-sql-jdbc-driver/pom.xml | 2 +- java/flight/flight-sql/pom.xml | 2 +- java/flight/pom.xml | 2 +- java/format/pom.xml | 2 +- java/gandiva/pom.xml | 2 +- java/memory/memory-core/pom.xml | 2 +- java/memory/memory-netty-buffer-patch/pom.xml | 2 +- java/memory/memory-netty/pom.xml | 2 +- java/memory/memory-unsafe/pom.xml | 2 +- java/memory/pom.xml | 2 +- java/performance/pom.xml | 2 +- java/pom.xml | 2 +- java/tools/pom.xml | 2 +- java/vector/pom.xml | 2 +- js/package.json | 2 +- matlab/CMakeLists.txt | 2 +- python/CMakeLists.txt | 2 +- python/pyproject.toml | 2 +- r/DESCRIPTION | 2 +- r/NEWS.md | 4 +++- r/pkgdown/assets/versions.json | 8 ++++++-- ruby/red-arrow-cuda/lib/arrow-cuda/version.rb | 2 +- ruby/red-arrow-dataset/lib/arrow-dataset/version.rb | 2 +- .../red-arrow-flight-sql/lib/arrow-flight-sql/version.rb | 2 +- ruby/red-arrow-flight/lib/arrow-flight/version.rb | 2 +- ruby/red-arrow/lib/arrow/version.rb | 2 +- ruby/red-gandiva/lib/gandiva/version.rb | 2 +- ruby/red-parquet/lib/parquet/version.rb | 2 +- 49 files changed, 62 insertions(+), 50 deletions(-) diff --git a/c_glib/meson.build b/c_glib/meson.build index 96ca375716bad..bd7843d8bc362 100644 --- a/c_glib/meson.build +++ b/c_glib/meson.build @@ -35,7 +35,7 @@ project('arrow-glib', 'c', 'cpp', # * 22.04: 0.61.2 meson_version: '>=0.53.2') -version = '18.0.0-SNAPSHOT' +version = '19.0.0-SNAPSHOT' if version.endswith('-SNAPSHOT') version_numbers = version.split('-')[0].split('.') version_tag = version.split('-')[1] diff --git a/c_glib/tool/generate-version-header.py b/c_glib/tool/generate-version-header.py index ba8cb03d15a3e..4995ce570aeb0 100755 --- a/c_glib/tool/generate-version-header.py +++ b/c_glib/tool/generate-version-header.py @@ -140,6 +140,7 @@ def generate_availability_macros(library: str) -> str: ALL_VERSIONS = [ + (19, 0), (18, 0), (17, 0), (16, 0), diff --git a/c_glib/vcpkg.json b/c_glib/vcpkg.json index 3941edbfec527..f2717f7e27cf2 100644 --- a/c_glib/vcpkg.json +++ b/c_glib/vcpkg.json @@ -1,6 +1,6 @@ { "name": "arrow-glib", - "version-string": "18.0.0-SNAPSHOT", + "version-string": "19.0.0-SNAPSHOT", "dependencies": [ "glib", "gobject-introspection", diff --git a/ci/scripts/PKGBUILD b/ci/scripts/PKGBUILD index ed68faae950b1..24e68fdc82c84 100644 --- a/ci/scripts/PKGBUILD +++ b/ci/scripts/PKGBUILD @@ -18,7 +18,7 @@ _realname=arrow pkgbase=mingw-w64-${_realname} pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}" -pkgver=17.0.0.9000 +pkgver=18.0.0.9000 pkgrel=8000 pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)" arch=("any") diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index a40afd00c85a0..97cbb74d1ffda 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -79,7 +79,7 @@ if(POLICY CMP0170) cmake_policy(SET CMP0170 NEW) endif() -set(ARROW_VERSION "18.0.0-SNAPSHOT") +set(ARROW_VERSION "19.0.0-SNAPSHOT") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}") diff --git a/cpp/vcpkg.json b/cpp/vcpkg.json index 103e678ebb4ac..6192e4efbd21e 100644 --- a/cpp/vcpkg.json +++ b/cpp/vcpkg.json @@ -1,6 +1,6 @@ { "name": "arrow", - "version-string": "18.0.0-SNAPSHOT", + "version-string": "19.0.0-SNAPSHOT", "dependencies": [ "abseil", { diff --git a/csharp/Directory.Build.props b/csharp/Directory.Build.props index 43c93238d6a7b..1994aa74bd9f4 100644 --- a/csharp/Directory.Build.props +++ b/csharp/Directory.Build.props @@ -29,7 +29,7 @@ Apache Arrow library Copyright 2016-2024 The Apache Software Foundation The Apache Software Foundation - 18.0.0-SNAPSHOT + 19.0.0-SNAPSHOT diff --git a/dev/tasks/homebrew-formulae/apache-arrow-glib.rb b/dev/tasks/homebrew-formulae/apache-arrow-glib.rb index 6bcae64adb92f..df068812492e8 100644 --- a/dev/tasks/homebrew-formulae/apache-arrow-glib.rb +++ b/dev/tasks/homebrew-formulae/apache-arrow-glib.rb @@ -29,7 +29,7 @@ class ApacheArrowGlib < Formula desc "GLib bindings for Apache Arrow" homepage "https://arrow.apache.org/" - url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-18.0.0-SNAPSHOT/apache-arrow-18.0.0-SNAPSHOT.tar.gz" + url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-19.0.0-SNAPSHOT/apache-arrow-19.0.0-SNAPSHOT.tar.gz" sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28" license "Apache-2.0" head "https://github.com/apache/arrow.git", branch: "main" diff --git a/dev/tasks/homebrew-formulae/apache-arrow.rb b/dev/tasks/homebrew-formulae/apache-arrow.rb index 955dfa0ea9fa4..a3730db719e05 100644 --- a/dev/tasks/homebrew-formulae/apache-arrow.rb +++ b/dev/tasks/homebrew-formulae/apache-arrow.rb @@ -29,7 +29,7 @@ class ApacheArrow < Formula desc "Columnar in-memory analytics layer designed to accelerate big data" homepage "https://arrow.apache.org/" - url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-18.0.0-SNAPSHOT/apache-arrow-18.0.0-SNAPSHOT.tar.gz" + url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-19.0.0-SNAPSHOT/apache-arrow-19.0.0-SNAPSHOT.tar.gz" sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28" license "Apache-2.0" head "https://github.com/apache/arrow.git", branch: "main" diff --git a/docs/source/_static/versions.json b/docs/source/_static/versions.json index 6a684b56d57b5..7598072f3c2ff 100644 --- a/docs/source/_static/versions.json +++ b/docs/source/_static/versions.json @@ -1,15 +1,20 @@ [ { - "name": "18.0 (dev)", + "name": "19.0 (dev)", "version": "dev/", "url": "https://arrow.apache.org/docs/dev/" }, { - "name": "17.0 (stable)", + "name": "18.0 (stable)", "version": "", "url": "https://arrow.apache.org/docs/", "preferred": true }, + { + "name": "17.0", + "version": "17.0/", + "url": "https://arrow.apache.org/docs/17.0/" + }, { "name": "16.1", "version": "16.1/", diff --git a/java/adapter/avro/pom.xml b/java/adapter/avro/pom.xml index 2c02e72e9c838..827d19f2a2060 100644 --- a/java/adapter/avro/pom.xml +++ b/java/adapter/avro/pom.xml @@ -23,7 +23,7 @@ under the License. org.apache.arrow arrow-java-root - 18.0.0-SNAPSHOT + 19.0.0-SNAPSHOT ../../pom.xml diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml index 5ebb4089cf72f..2f621d7a05a80 100644 --- a/java/adapter/jdbc/pom.xml +++ b/java/adapter/jdbc/pom.xml @@ -23,7 +23,7 @@ under the License. org.apache.arrow arrow-java-root - 18.0.0-SNAPSHOT + 19.0.0-SNAPSHOT ../../pom.xml diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml index f6e7b024e7d9d..203a31d7fcfe7 100644 --- a/java/adapter/orc/pom.xml +++ b/java/adapter/orc/pom.xml @@ -23,7 +23,7 @@ under the License. org.apache.arrow arrow-java-root - 18.0.0-SNAPSHOT + 19.0.0-SNAPSHOT ../../pom.xml diff --git a/java/algorithm/pom.xml b/java/algorithm/pom.xml index 6971b53638e48..898c2605b6281 100644 --- a/java/algorithm/pom.xml +++ b/java/algorithm/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 18.0.0-SNAPSHOT + 19.0.0-SNAPSHOT arrow-algorithm Arrow Algorithms diff --git a/java/bom/pom.xml b/java/bom/pom.xml index ce05f25bc6c9e..5e118bae183c9 100644 --- a/java/bom/pom.xml +++ b/java/bom/pom.xml @@ -29,7 +29,7 @@ under the License. org.apache.arrow arrow-bom - 18.0.0-SNAPSHOT + 19.0.0-SNAPSHOT pom Arrow Bill of Materials diff --git a/java/c/pom.xml b/java/c/pom.xml index fe57bd2ea0ec5..c90b6dc0efef4 100644 --- a/java/c/pom.xml +++ b/java/c/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 18.0.0-SNAPSHOT + 19.0.0-SNAPSHOT arrow-c-data diff --git a/java/compression/pom.xml b/java/compression/pom.xml index 3af668cfaa7a8..094e31afa4738 100644 --- a/java/compression/pom.xml +++ b/java/compression/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 18.0.0-SNAPSHOT + 19.0.0-SNAPSHOT arrow-compression Arrow Compression diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml index ec90e39d522cf..3cc4ca0713974 100644 --- a/java/dataset/pom.xml +++ b/java/dataset/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 18.0.0-SNAPSHOT + 19.0.0-SNAPSHOT arrow-dataset diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml index 74767fd78f1b2..3127bc0d949f1 100644 --- a/java/flight/flight-core/pom.xml +++ b/java/flight/flight-core/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-flight - 18.0.0-SNAPSHOT + 19.0.0-SNAPSHOT flight-core diff --git a/java/flight/flight-integration-tests/pom.xml b/java/flight/flight-integration-tests/pom.xml index 7da5156404dba..e43bcd0571102 100644 --- a/java/flight/flight-integration-tests/pom.xml +++ b/java/flight/flight-integration-tests/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-flight - 18.0.0-SNAPSHOT + 19.0.0-SNAPSHOT flight-integration-tests diff --git a/java/flight/flight-sql-jdbc-core/pom.xml b/java/flight/flight-sql-jdbc-core/pom.xml index eb0ce04ca7fcb..5c893dad62fce 100644 --- a/java/flight/flight-sql-jdbc-core/pom.xml +++ b/java/flight/flight-sql-jdbc-core/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-flight - 18.0.0-SNAPSHOT + 19.0.0-SNAPSHOT flight-sql-jdbc-core diff --git a/java/flight/flight-sql-jdbc-driver/pom.xml b/java/flight/flight-sql-jdbc-driver/pom.xml index 148319e5d9d64..ba6fe277d1327 100644 --- a/java/flight/flight-sql-jdbc-driver/pom.xml +++ b/java/flight/flight-sql-jdbc-driver/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-flight - 18.0.0-SNAPSHOT + 19.0.0-SNAPSHOT flight-sql-jdbc-driver diff --git a/java/flight/flight-sql/pom.xml b/java/flight/flight-sql/pom.xml index 021c1e65ab5b3..9cbc8430fedb7 100644 --- a/java/flight/flight-sql/pom.xml +++ b/java/flight/flight-sql/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-flight - 18.0.0-SNAPSHOT + 19.0.0-SNAPSHOT flight-sql diff --git a/java/flight/pom.xml b/java/flight/pom.xml index 55511eba82b3a..2fc3e89ef8a22 100644 --- a/java/flight/pom.xml +++ b/java/flight/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 18.0.0-SNAPSHOT + 19.0.0-SNAPSHOT arrow-flight diff --git a/java/format/pom.xml b/java/format/pom.xml index f767215b12807..d3578b63d2043 100644 --- a/java/format/pom.xml +++ b/java/format/pom.xml @@ -23,7 +23,7 @@ under the License. org.apache.arrow arrow-java-root - 18.0.0-SNAPSHOT + 19.0.0-SNAPSHOT arrow-format diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml index 70bde084b4216..5367bfdedfdff 100644 --- a/java/gandiva/pom.xml +++ b/java/gandiva/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 18.0.0-SNAPSHOT + 19.0.0-SNAPSHOT org.apache.arrow.gandiva diff --git a/java/memory/memory-core/pom.xml b/java/memory/memory-core/pom.xml index 9b24cee032023..72ee69d60a998 100644 --- a/java/memory/memory-core/pom.xml +++ b/java/memory/memory-core/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-memory - 18.0.0-SNAPSHOT + 19.0.0-SNAPSHOT arrow-memory-core diff --git a/java/memory/memory-netty-buffer-patch/pom.xml b/java/memory/memory-netty-buffer-patch/pom.xml index 2e70279b23b00..07dc7d2403d9e 100644 --- a/java/memory/memory-netty-buffer-patch/pom.xml +++ b/java/memory/memory-netty-buffer-patch/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-memory - 18.0.0-SNAPSHOT + 19.0.0-SNAPSHOT arrow-memory-netty-buffer-patch diff --git a/java/memory/memory-netty/pom.xml b/java/memory/memory-netty/pom.xml index 6cf573dd4d381..6d660da117379 100644 --- a/java/memory/memory-netty/pom.xml +++ b/java/memory/memory-netty/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-memory - 18.0.0-SNAPSHOT + 19.0.0-SNAPSHOT arrow-memory-netty diff --git a/java/memory/memory-unsafe/pom.xml b/java/memory/memory-unsafe/pom.xml index 6bba222d89cb3..92dc0c9fe5dc2 100644 --- a/java/memory/memory-unsafe/pom.xml +++ b/java/memory/memory-unsafe/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-memory - 18.0.0-SNAPSHOT + 19.0.0-SNAPSHOT arrow-memory-unsafe diff --git a/java/memory/pom.xml b/java/memory/pom.xml index 0ca357beaa781..bc34c260505f9 100644 --- a/java/memory/pom.xml +++ b/java/memory/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 18.0.0-SNAPSHOT + 19.0.0-SNAPSHOT arrow-memory pom diff --git a/java/performance/pom.xml b/java/performance/pom.xml index 9f4df1ff2e70d..3f18188e3a3d2 100644 --- a/java/performance/pom.xml +++ b/java/performance/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 18.0.0-SNAPSHOT + 19.0.0-SNAPSHOT arrow-performance jar diff --git a/java/pom.xml b/java/pom.xml index 876ce703f0c16..ff3a3f04ca124 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -28,7 +28,7 @@ under the License. org.apache.arrow arrow-java-root - 18.0.0-SNAPSHOT + 19.0.0-SNAPSHOT pom Apache Arrow Java Root POM diff --git a/java/tools/pom.xml b/java/tools/pom.xml index d261496040b78..f06ded294a763 100644 --- a/java/tools/pom.xml +++ b/java/tools/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 18.0.0-SNAPSHOT + 19.0.0-SNAPSHOT arrow-tools Arrow Tools diff --git a/java/vector/pom.xml b/java/vector/pom.xml index eb0e39565332e..7cd25cd43e237 100644 --- a/java/vector/pom.xml +++ b/java/vector/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 18.0.0-SNAPSHOT + 19.0.0-SNAPSHOT arrow-vector Arrow Vectors diff --git a/js/package.json b/js/package.json index 643f049b6bfaf..a879814426ed7 100644 --- a/js/package.json +++ b/js/package.json @@ -120,5 +120,5 @@ "engines": { "node": ">=12.0" }, - "version": "18.0.0-SNAPSHOT" + "version": "19.0.0-SNAPSHOT" } diff --git a/matlab/CMakeLists.txt b/matlab/CMakeLists.txt index 91c186a6765a5..c11decb5a77a2 100644 --- a/matlab/CMakeLists.txt +++ b/matlab/CMakeLists.txt @@ -100,7 +100,7 @@ endfunction() set(CMAKE_CXX_STANDARD 17) -set(MLARROW_VERSION "18.0.0-SNAPSHOT") +set(MLARROW_VERSION "19.0.0-SNAPSHOT") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" MLARROW_BASE_VERSION "${MLARROW_VERSION}") project(mlarrow VERSION "${MLARROW_BASE_VERSION}") diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 710735adc704d..335efced17d00 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -28,7 +28,7 @@ project(pyarrow) # which in turn meant that Py_GIL_DISABLED was not set. set(CMAKE_NO_SYSTEM_FROM_IMPORTED ON) -set(PYARROW_VERSION "18.0.0-SNAPSHOT") +set(PYARROW_VERSION "19.0.0-SNAPSHOT") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" PYARROW_BASE_VERSION "${PYARROW_VERSION}") # Running from a Python sdist tarball diff --git a/python/pyproject.toml b/python/pyproject.toml index 8748fe4052be1..32b95254f217d 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -81,4 +81,4 @@ root = '..' version_file = 'pyarrow/_generated_version.py' version_scheme = 'guess-next-dev' git_describe_command = 'git describe --dirty --tags --long --match "apache-arrow-[0-9]*.*"' -fallback_version = '18.0.0a0' +fallback_version = '19.0.0a0' diff --git a/r/DESCRIPTION b/r/DESCRIPTION index ff74c566ffeb3..2ae624acf3a0f 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -1,6 +1,6 @@ Package: arrow Title: Integration to 'Apache' 'Arrow' -Version: 17.0.0.9000 +Version: 18.0.0.9000 Authors@R: c( person("Neal", "Richardson", email = "neal.p.richardson@gmail.com", role = c("aut")), person("Ian", "Cook", email = "ianmcook@gmail.com", role = c("aut")), diff --git a/r/NEWS.md b/r/NEWS.md index 9b9300c27062b..08443bc99378a 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -17,7 +17,9 @@ under the License. --> -# arrow 17.0.0.9000 +# arrow 18.0.0.9000 + +# arrow 18.0.0 ## Minor improvements and fixes diff --git a/r/pkgdown/assets/versions.json b/r/pkgdown/assets/versions.json index e8e26f22b05cd..cecbed7f32818 100644 --- a/r/pkgdown/assets/versions.json +++ b/r/pkgdown/assets/versions.json @@ -1,12 +1,16 @@ [ { - "name": "17.0.0.9000 (dev)", + "name": "18.0.0.9000 (dev)", "version": "dev/" }, { - "name": "17.0.0 (release)", + "name": "18.0.0 (release)", "version": "" }, + { + "name": "17.0.0", + "version": "17.0/" + }, { "name": "16.1.0", "version": "16.1/" diff --git a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb index 1fbbe88bcc343..284afd290f655 100644 --- a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb +++ b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowCUDA - VERSION = "18.0.0-SNAPSHOT" + VERSION = "19.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb index a9eae43d59b9f..26cf368019f68 100644 --- a/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb +++ b/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowDataset - VERSION = "18.0.0-SNAPSHOT" + VERSION = "19.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb b/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb index f24688e9cb0f8..a4fabe6737aa8 100644 --- a/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb +++ b/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowFlightSQL - VERSION = "18.0.0-SNAPSHOT" + VERSION = "19.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow-flight/lib/arrow-flight/version.rb b/ruby/red-arrow-flight/lib/arrow-flight/version.rb index 263f35986616b..a497405047e0d 100644 --- a/ruby/red-arrow-flight/lib/arrow-flight/version.rb +++ b/ruby/red-arrow-flight/lib/arrow-flight/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowFlight - VERSION = "18.0.0-SNAPSHOT" + VERSION = "19.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow/lib/arrow/version.rb b/ruby/red-arrow/lib/arrow/version.rb index 42708a65026ee..3033a470dd697 100644 --- a/ruby/red-arrow/lib/arrow/version.rb +++ b/ruby/red-arrow/lib/arrow/version.rb @@ -16,7 +16,7 @@ # under the License. module Arrow - VERSION = "18.0.0-SNAPSHOT" + VERSION = "19.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-gandiva/lib/gandiva/version.rb b/ruby/red-gandiva/lib/gandiva/version.rb index 4a9f27a7ef649..d8e7688f2b454 100644 --- a/ruby/red-gandiva/lib/gandiva/version.rb +++ b/ruby/red-gandiva/lib/gandiva/version.rb @@ -16,7 +16,7 @@ # under the License. module Gandiva - VERSION = "18.0.0-SNAPSHOT" + VERSION = "19.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-parquet/lib/parquet/version.rb b/ruby/red-parquet/lib/parquet/version.rb index ba66b2cad16f3..836e3ce846c86 100644 --- a/ruby/red-parquet/lib/parquet/version.rb +++ b/ruby/red-parquet/lib/parquet/version.rb @@ -16,7 +16,7 @@ # under the License. module Parquet - VERSION = "18.0.0-SNAPSHOT" + VERSION = "19.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-") From 7f28835888c39205dd23c1c7a25acc2d89e121ee Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Wed, 30 Oct 2024 06:17:45 +0900 Subject: [PATCH 08/59] MINOR: [Release] Update .deb package names for 19.0.0 --- .../apache-arrow/debian/control.in | 104 +++++++++--------- ...800.install => libarrow-acero1900.install} | 0 ...install => libarrow-cuda-glib1900.install} | 0 ...1800.install => libarrow-cuda1900.install} | 0 ...tall => libarrow-dataset-glib1900.install} | 0 ...0.install => libarrow-dataset1900.install} | 0 ...stall => libarrow-flight-glib1900.install} | 0 ...l => libarrow-flight-sql-glib1900.install} | 0 ...nstall => libarrow-flight-sql1900.install} | 0 ...00.install => libarrow-flight1900.install} | 0 ...1800.install => libarrow-glib1900.install} | 0 ...arrow1800.install => libarrow1900.install} | 0 ...00.install => libgandiva-glib1900.install} | 0 ...iva1800.install => libgandiva1900.install} | 0 ...00.install => libparquet-glib1900.install} | 0 ...uet1800.install => libparquet1900.install} | 0 dev/tasks/tasks.yml | 60 +++++----- 17 files changed, 82 insertions(+), 82 deletions(-) rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-acero1800.install => libarrow-acero1900.install} (100%) rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-cuda-glib1800.install => libarrow-cuda-glib1900.install} (100%) rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-cuda1800.install => libarrow-cuda1900.install} (100%) rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-dataset-glib1800.install => libarrow-dataset-glib1900.install} (100%) rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-dataset1800.install => libarrow-dataset1900.install} (100%) rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-flight-glib1800.install => libarrow-flight-glib1900.install} (100%) rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-flight-sql-glib1800.install => libarrow-flight-sql-glib1900.install} (100%) rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-flight-sql1800.install => libarrow-flight-sql1900.install} (100%) rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-flight1800.install => libarrow-flight1900.install} (100%) rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-glib1800.install => libarrow-glib1900.install} (100%) rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow1800.install => libarrow1900.install} (100%) rename dev/tasks/linux-packages/apache-arrow/debian/{libgandiva-glib1800.install => libgandiva-glib1900.install} (100%) rename dev/tasks/linux-packages/apache-arrow/debian/{libgandiva1800.install => libgandiva1900.install} (100%) rename dev/tasks/linux-packages/apache-arrow/debian/{libparquet-glib1800.install => libparquet-glib1900.install} (100%) rename dev/tasks/linux-packages/apache-arrow/debian/{libparquet1800.install => libparquet1900.install} (100%) diff --git a/dev/tasks/linux-packages/apache-arrow/debian/control.in b/dev/tasks/linux-packages/apache-arrow/debian/control.in index cf3f488cc17e0..76194318bbba1 100644 --- a/dev/tasks/linux-packages/apache-arrow/debian/control.in +++ b/dev/tasks/linux-packages/apache-arrow/debian/control.in @@ -43,7 +43,7 @@ Build-Depends-Indep: libglib2.0-doc Standards-Version: 3.9.8 Homepage: https://arrow.apache.org/ -Package: libarrow1800 +Package: libarrow1900 Section: libs Architecture: any Multi-Arch: same @@ -63,12 +63,12 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow1800 (= ${binary:Version}) + libarrow1900 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides tools. -Package: libarrow-cuda1800 +Package: libarrow-cuda1900 Section: libs Architecture: @CUDA_ARCHITECTURE@ Multi-Arch: same @@ -76,12 +76,12 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow1800 (= ${binary:Version}) + libarrow1900 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ library files for CUDA support. -Package: libarrow-acero1800 +Package: libarrow-acero1900 Section: libs Architecture: any Multi-Arch: same @@ -89,12 +89,12 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow1800 (= ${binary:Version}) + libarrow1900 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ library files for Acero module. -Package: libarrow-dataset1800 +Package: libarrow-dataset1900 Section: libs Architecture: any Multi-Arch: same @@ -102,13 +102,13 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-acero1800 (= ${binary:Version}), - libparquet1800 (= ${binary:Version}) + libarrow-acero1900 (= ${binary:Version}), + libparquet1900 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ library files for Dataset module. -Package: libarrow-flight1800 +Package: libarrow-flight1900 Section: libs Architecture: any Multi-Arch: same @@ -116,12 +116,12 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow1800 (= ${binary:Version}) + libarrow1900 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ library files for Flight RPC system. -Package: libarrow-flight-sql1800 +Package: libarrow-flight-sql1900 Section: libs Architecture: any Multi-Arch: same @@ -129,7 +129,7 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-flight1800 (= ${binary:Version}) + libarrow-flight1900 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ library files for Flight SQL system. @@ -140,7 +140,7 @@ Architecture: any Multi-Arch: same Depends: ${misc:Depends}, - libarrow1800 (= ${binary:Version}), + libarrow1900 (= ${binary:Version}), @USE_SYSTEM_GRPC@ libabsl-dev, libbrotli-dev, libbz2-dev, @@ -169,7 +169,7 @@ Multi-Arch: same Depends: ${misc:Depends}, libarrow-dev (= ${binary:Version}), - libarrow-cuda1800 (= ${binary:Version}) + libarrow-cuda1900 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ header files for CUDA support. @@ -180,7 +180,7 @@ Architecture: any Multi-Arch: same Depends: ${misc:Depends}, - libarrow-acero1800 (= ${binary:Version}), + libarrow-acero1900 (= ${binary:Version}), libparquet-dev (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . @@ -193,7 +193,7 @@ Multi-Arch: same Depends: ${misc:Depends}, libarrow-acero-dev (= ${binary:Version}), - libarrow-dataset1800 (= ${binary:Version}), + libarrow-dataset1900 (= ${binary:Version}), libparquet-dev (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . @@ -206,7 +206,7 @@ Multi-Arch: same Depends: ${misc:Depends}, libarrow-dev (= ${binary:Version}), - libarrow-flight1800 (= ${binary:Version}), + libarrow-flight1900 (= ${binary:Version}), libc-ares-dev, @USE_SYSTEM_GRPC@ libgrpc++-dev Description: Apache Arrow is a data processing library for analysis @@ -220,12 +220,12 @@ Multi-Arch: same Depends: ${misc:Depends}, libarrow-flight-dev (= ${binary:Version}), - libarrow-flight-sql1800 (= ${binary:Version}) + libarrow-flight-sql1900 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ header files for Flight SQL system. -Package: libgandiva1800 +Package: libgandiva1900 Section: libs Architecture: any Multi-Arch: same @@ -233,7 +233,7 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow1800 (= ${binary:Version}) + libarrow1900 (= ${binary:Version}) Description: Gandiva is a toolset for compiling and evaluating expressions on Arrow Data. . @@ -246,13 +246,13 @@ Multi-Arch: same Depends: ${misc:Depends}, libarrow-dev (= ${binary:Version}), - libgandiva1800 (= ${binary:Version}) + libgandiva1900 (= ${binary:Version}) Description: Gandiva is a toolset for compiling and evaluating expressions on Arrow Data. . This package provides C++ header files. -Package: libparquet1800 +Package: libparquet1900 Section: libs Architecture: any Multi-Arch: same @@ -272,7 +272,7 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libparquet1800 (= ${binary:Version}) + libparquet1900 (= ${binary:Version}) Description: Apache Parquet is a columnar storage format . This package provides tools. @@ -284,13 +284,13 @@ Multi-Arch: same Depends: ${misc:Depends}, libarrow-dev (= ${binary:Version}), - libparquet1800 (= ${binary:Version}), + libparquet1900 (= ${binary:Version}), libthrift-dev Description: Apache Parquet is a columnar storage format . This package provides C++ header files. -Package: libarrow-glib1800 +Package: libarrow-glib1900 Section: libs Architecture: any Multi-Arch: same @@ -298,7 +298,7 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow1800 (= ${binary:Version}) + libarrow1900 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides GLib based library files. @@ -322,7 +322,7 @@ Depends: ${misc:Depends}, libglib2.0-dev, libarrow-acero-dev (= ${binary:Version}), - libarrow-glib1800 (= ${binary:Version}), + libarrow-glib1900 (= ${binary:Version}), gir1.2-arrow-1.0 (= ${binary:Version}) Suggests: libarrow-glib-doc Description: Apache Arrow is a data processing library for analysis @@ -340,7 +340,7 @@ Description: Apache Arrow is a data processing library for analysis . This package provides documentations. -Package: libarrow-cuda-glib1800 +Package: libarrow-cuda-glib1900 Section: libs Architecture: @CUDA_ARCHITECTURE@ Multi-Arch: same @@ -348,8 +348,8 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-glib1800 (= ${binary:Version}), - libarrow-cuda1800 (= ${binary:Version}) + libarrow-glib1900 (= ${binary:Version}), + libarrow-cuda1900 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides GLib based library files for CUDA support. @@ -374,13 +374,13 @@ Depends: ${misc:Depends}, libarrow-cuda-dev (= ${binary:Version}), libarrow-glib-dev (= ${binary:Version}), - libarrow-cuda-glib1800 (= ${binary:Version}), + libarrow-cuda-glib1900 (= ${binary:Version}), gir1.2-arrow-cuda-1.0 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides GLib based header files for CUDA support. -Package: libarrow-dataset-glib1800 +Package: libarrow-dataset-glib1900 Section: libs Architecture: any Multi-Arch: same @@ -388,8 +388,8 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-glib1800 (= ${binary:Version}), - libarrow-dataset1800 (= ${binary:Version}) + libarrow-glib1900 (= ${binary:Version}), + libarrow-dataset1900 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides GLib based library files for dataset module. @@ -414,7 +414,7 @@ Depends: ${misc:Depends}, libarrow-dataset-dev (= ${binary:Version}), libarrow-glib-dev (= ${binary:Version}), - libarrow-dataset-glib1800 (= ${binary:Version}), + libarrow-dataset-glib1900 (= ${binary:Version}), gir1.2-arrow-dataset-1.0 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . @@ -431,7 +431,7 @@ Description: Apache Arrow is a data processing library for analysis . This package provides documentations for dataset module. -Package: libarrow-flight-glib1800 +Package: libarrow-flight-glib1900 Section: libs Architecture: any Multi-Arch: same @@ -439,8 +439,8 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-glib1800 (= ${binary:Version}), - libarrow-flight1800 (= ${binary:Version}) + libarrow-glib1900 (= ${binary:Version}), + libarrow-flight1900 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides GLib based library files for Apache Arrow Flight. @@ -466,7 +466,7 @@ Depends: ${misc:Depends}, libarrow-flight-dev (= ${binary:Version}), libarrow-glib-dev (= ${binary:Version}), - libarrow-flight-glib1800 (= ${binary:Version}), + libarrow-flight-glib1900 (= ${binary:Version}), gir1.2-arrow-flight-1.0 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . @@ -483,7 +483,7 @@ Description: Apache Arrow is a data processing library for analysis . This package provides documentations for Apache Arrow Flight. -Package: libarrow-flight-sql-glib1800 +Package: libarrow-flight-sql-glib1900 Section: libs Architecture: any Multi-Arch: same @@ -491,8 +491,8 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-flight-glib1800 (= ${binary:Version}), - libarrow-flight-sql1800 (= ${binary:Version}) + libarrow-flight-glib1900 (= ${binary:Version}), + libarrow-flight-sql1900 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides GLib based library files for Apache Arrow Flight SQL. @@ -518,7 +518,7 @@ Depends: ${misc:Depends}, libarrow-flight-sql-dev (= ${binary:Version}), libarrow-flight-glib-dev (= ${binary:Version}), - libarrow-flight-sql-glib1800 (= ${binary:Version}), + libarrow-flight-sql-glib1900 (= ${binary:Version}), gir1.2-arrow-flight-sql-1.0 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . @@ -535,7 +535,7 @@ Description: Apache Arrow is a data processing library for analysis . This package provides documentations for Apache Arrow Flight SQL. -Package: libgandiva-glib1800 +Package: libgandiva-glib1900 Section: libs Architecture: any Multi-Arch: same @@ -543,8 +543,8 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-glib1800 (= ${binary:Version}), - libgandiva1800 (= ${binary:Version}) + libarrow-glib1900 (= ${binary:Version}), + libgandiva1900 (= ${binary:Version}) Description: Gandiva is a toolset for compiling and evaluating expressions on Arrow Data. . @@ -571,7 +571,7 @@ Depends: ${misc:Depends}, libgandiva-dev (= ${binary:Version}), libarrow-glib-dev (= ${binary:Version}), - libgandiva-glib1800 (= ${binary:Version}), + libgandiva-glib1900 (= ${binary:Version}), gir1.2-gandiva-1.0 (= ${binary:Version}) Description: Gandiva is a toolset for compiling and evaluating expressions on Arrow Data. @@ -590,7 +590,7 @@ Description: Gandiva is a toolset for compiling and evaluating expressions . This package provides documentations. -Package: libparquet-glib1800 +Package: libparquet-glib1900 Section: libs Architecture: any Multi-Arch: same @@ -598,8 +598,8 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-glib1800 (= ${binary:Version}), - libparquet1800 (= ${binary:Version}) + libarrow-glib1900 (= ${binary:Version}), + libparquet1900 (= ${binary:Version}) Description: Apache Parquet is a columnar storage format . This package provides GLib based library files. @@ -624,7 +624,7 @@ Depends: ${misc:Depends}, libarrow-glib-dev (= ${binary:Version}), libparquet-dev (= ${binary:Version}), - libparquet-glib1800 (= ${binary:Version}), + libparquet-glib1900 (= ${binary:Version}), gir1.2-parquet-1.0 (= ${binary:Version}) Suggests: libparquet-glib-doc Description: Apache Parquet is a columnar storage format diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-acero1800.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-acero1900.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-acero1800.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-acero1900.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib1800.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib1900.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib1800.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib1900.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda1800.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda1900.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda1800.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda1900.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib1800.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib1900.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib1800.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib1900.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset1800.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset1900.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset1800.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset1900.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib1800.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib1900.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib1800.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib1900.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib1800.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib1900.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib1800.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib1900.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql1800.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql1900.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql1800.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql1900.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight1800.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight1900.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight1800.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight1900.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib1800.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib1900.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib1800.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib1900.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow1800.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow1900.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow1800.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow1900.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib1800.install b/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib1900.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib1800.install rename to dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib1900.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libgandiva1800.install b/dev/tasks/linux-packages/apache-arrow/debian/libgandiva1900.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libgandiva1800.install rename to dev/tasks/linux-packages/apache-arrow/debian/libgandiva1900.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib1800.install b/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib1900.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib1800.install rename to dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib1900.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libparquet1800.install b/dev/tasks/linux-packages/apache-arrow/debian/libparquet1900.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libparquet1800.install rename to dev/tasks/linux-packages/apache-arrow/debian/libparquet1900.install diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 9137181466383..30c1daecf7a31 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -512,59 +512,59 @@ tasks: - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-acero-dev_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-acero1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-acero1800_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-acero1900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-acero1900_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-dataset-glib1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-dataset-glib1800_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-dataset1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-dataset1800_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-dataset-glib1900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-dataset-glib1900_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-dataset1900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-dataset1900_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-flight-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-flight-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-flight-glib1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-flight-glib1800_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-flight-glib1900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-flight-glib1900_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-flight-sql-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-flight-sql-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-flight-sql-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-flight-sql-glib1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-flight-sql-glib1800_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-flight-sql1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-flight-sql1800_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-flight1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-flight1800_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-flight-sql-glib1900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-flight-sql-glib1900_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-flight-sql1900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-flight-sql1900_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-flight1900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-flight1900_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-glib1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-glib1800_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow1800_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-glib1900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-glib1900_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow1900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow1900_{no_rc_version}-1_[a-z0-9]+.deb - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libgandiva-glib1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libgandiva-glib1800_{no_rc_version}-1_[a-z0-9]+.deb - - libgandiva1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libgandiva1800_{no_rc_version}-1_[a-z0-9]+.deb + - libgandiva-glib1900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libgandiva-glib1900_{no_rc_version}-1_[a-z0-9]+.deb + - libgandiva1900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libgandiva1900_{no_rc_version}-1_[a-z0-9]+.deb - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libparquet-glib1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libparquet-glib1800_{no_rc_version}-1_[a-z0-9]+.deb - - libparquet1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libparquet1800_{no_rc_version}-1_[a-z0-9]+.deb + - libparquet-glib1900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libparquet-glib1900_{no_rc_version}-1_[a-z0-9]+.deb + - libparquet1900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libparquet1900_{no_rc_version}-1_[a-z0-9]+.deb - parquet-tools_{no_rc_version}-1_[a-z0-9]+.deb {% if architecture == "amd64" %} - gir1.2-arrow-cuda-1.0_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-cuda-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-cuda-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-cuda-glib1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-cuda-glib1800_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-cuda1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-cuda1800_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-cuda-glib1900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-cuda-glib1900_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-cuda1900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-cuda1900_{no_rc_version}-1_[a-z0-9]+.deb {% endif %} {% endfor %} {% endfor %} From 510be3629e86f31aab823e54429b41b3aef2b7fe Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Wed, 30 Oct 2024 06:17:45 +0900 Subject: [PATCH 09/59] MINOR: [Release] Update .deb/.rpm changelogs for 18.0.0 --- .../linux-packages/apache-arrow-apt-source/debian/changelog | 6 ++++++ .../apache-arrow-release/yum/apache-arrow-release.spec.in | 3 +++ dev/tasks/linux-packages/apache-arrow/debian/changelog | 6 ++++++ dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in | 3 +++ 4 files changed, 18 insertions(+) diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog index 279c3cc14a4c8..7aa4173d1dc5b 100644 --- a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog +++ b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog @@ -1,3 +1,9 @@ +apache-arrow-apt-source (18.0.0-1) unstable; urgency=low + + * New upstream release. + + -- Sutou Kouhei Wed, 16 Oct 2024 09:20:06 -0000 + apache-arrow-apt-source (17.0.0-1) unstable; urgency=low * New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in index fd8165d748d78..94a52c9aee1e2 100644 --- a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in +++ b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in @@ -102,6 +102,9 @@ else fi %changelog +* Wed Oct 16 2024 Sutou Kouhei - 18.0.0-1 +- New upstream release. + * Thu Jul 11 2024 Raúl Cumplido - 17.0.0-1 - New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow/debian/changelog b/dev/tasks/linux-packages/apache-arrow/debian/changelog index f59bc9f66233e..b132f9a2946ff 100644 --- a/dev/tasks/linux-packages/apache-arrow/debian/changelog +++ b/dev/tasks/linux-packages/apache-arrow/debian/changelog @@ -1,3 +1,9 @@ +apache-arrow (18.0.0-1) unstable; urgency=low + + * New upstream release. + + -- Sutou Kouhei Wed, 16 Oct 2024 09:20:06 -0000 + apache-arrow (17.0.0-1) unstable; urgency=low * New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in index 9814843ab7810..fe59597946ea8 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in +++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in @@ -894,6 +894,9 @@ Documentation for Apache Parquet GLib. %endif %changelog +* Wed Oct 16 2024 Sutou Kouhei - 18.0.0-1 +- New upstream release. + * Thu Jul 11 2024 Raúl Cumplido - 17.0.0-1 - New upstream release. From 2f9ba7957a56b6aae82cb7f4b584afe2b1987d30 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Tue, 29 Oct 2024 17:40:51 -0700 Subject: [PATCH 10/59] MINOR: [Release] Fix issue listing versions in post-10-docs.sh (#44567) ### Rationale for this change While testing the docs script (post-10-docs.sh) during the 18 release process, I noticed that the 16.1 docs got moved underneath the 17.0 docs folder. i.e., moved to `docs/17.0/16.1`. Which isn't right. This is because the previous glob pattern was `*.0` which doesn't match `16.1`. ### What changes are included in this PR? Just a change to post-10-docs.sh updating the glob. ### Are these changes tested? Yes, locally. I used this updated script to generate https://github.com/apache/arrow-site/pull/553. ### Are there any user-facing changes? No. Authored-by: Bryce Mecum Signed-off-by: Sutou Kouhei --- dev/release/post-10-docs.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/release/post-10-docs.sh b/dev/release/post-10-docs.sh index 58a462551f199..70c54c555f839 100755 --- a/dev/release/post-10-docs.sh +++ b/dev/release/post-10-docs.sh @@ -56,7 +56,7 @@ git branch -D ${branch_name} || : git checkout -b ${branch_name} # list and remove previous versioned docs versioned_paths=() -for versioned_path in docs/*.0/; do +for versioned_path in docs/*.*/; do versioned_paths+=(${versioned_path}) rm -rf ${versioned_path} done From 2df7b23af717f505c16fedbef6e4223ac163ac5a Mon Sep 17 00:00:00 2001 From: Laurent Goujon Date: Tue, 29 Oct 2024 17:48:01 -0700 Subject: [PATCH 11/59] MINOR: [Java] Update mockito version to 5.14.2 (#44566) ### What changes are included in this PR? Update Mockito version to latest/current version (5.14.2). Also remove mockito-inline dependency as the code is now part of mockito-core. Fixes Java 23 compatibility ### Are these changes tested? CI/CD (no new tests) ### Are there any user-facing changes? No Authored-by: Laurent Goujon Signed-off-by: David Li --- java/flight/flight-sql-jdbc-core/pom.xml | 7 ------- java/pom.xml | 3 +-- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/java/flight/flight-sql-jdbc-core/pom.xml b/java/flight/flight-sql-jdbc-core/pom.xml index 5c893dad62fce..3e99e4b77ae3b 100644 --- a/java/flight/flight-sql-jdbc-core/pom.xml +++ b/java/flight/flight-sql-jdbc-core/pom.xml @@ -101,13 +101,6 @@ under the License. test - - org.mockito - mockito-inline - ${mockito.inline.version} - test - - io.netty netty-common diff --git a/java/pom.xml b/java/pom.xml index ff3a3f04ca124..84fb967f4f1f0 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -108,8 +108,7 @@ under the License. 10.18.2 true 2.31.0 - 5.11.0 - 5.2.0 + 5.14.2 3.48.1 1.5.11 none From ba5c86d2c62421b627c609c30ed6b0c98b2882c6 Mon Sep 17 00:00:00 2001 From: Gabriel P Stone Date: Tue, 29 Oct 2024 19:09:12 -0800 Subject: [PATCH 12/59] GH-32206: [C++] GcsFileSystem::Make should return Result (#44503) ### What changes are included in this PR? GcsFileSystem::Make now returns Result, with corresponding header & test changes. ### Are these changes tested? Yes, `arrow-gcsfs-test` passed. ### Are there any user-facing changes? Yes. **This PR includes breaking changes to public APIs.** * GitHub Issue: #32206 Lead-authored-by: Gabriel Stone Co-authored-by: Gabriel P Stone Co-authored-by: Gabriel P Stone Co-authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- cpp/src/arrow/filesystem/gcsfs.cc | 7 +- cpp/src/arrow/filesystem/gcsfs.h | 3 +- cpp/src/arrow/filesystem/gcsfs_test.cc | 116 ++++++++++++------------- r/src/filesystem.cpp | 5 +- 4 files changed, 67 insertions(+), 64 deletions(-) diff --git a/cpp/src/arrow/filesystem/gcsfs.cc b/cpp/src/arrow/filesystem/gcsfs.cc index 97cd391272611..9869687a8b9d7 100644 --- a/cpp/src/arrow/filesystem/gcsfs.cc +++ b/cpp/src/arrow/filesystem/gcsfs.cc @@ -962,10 +962,11 @@ Result> GcsFileSystem::OpenAppendStream( return Status::NotImplemented("Append is not supported in GCS"); } -std::shared_ptr GcsFileSystem::Make(const GcsOptions& options, - const io::IOContext& context) { +Result> GcsFileSystem::Make( + const GcsOptions& options, const io::IOContext& io_context) { // Cannot use `std::make_shared<>` as the constructor is private. - return std::shared_ptr(new GcsFileSystem(options, context)); + std::shared_ptr ptr(new GcsFileSystem(options, io_context)); + return ptr; } GcsFileSystem::GcsFileSystem(const GcsOptions& options, const io::IOContext& context) diff --git a/cpp/src/arrow/filesystem/gcsfs.h b/cpp/src/arrow/filesystem/gcsfs.h index f1fbc95bf957c..6a1c867abc725 100644 --- a/cpp/src/arrow/filesystem/gcsfs.h +++ b/cpp/src/arrow/filesystem/gcsfs.h @@ -231,8 +231,7 @@ class ARROW_EXPORT GcsFileSystem : public FileSystem { const std::shared_ptr& metadata) override; /// Create a GcsFileSystem instance from the given options. - // TODO(ARROW-16884): make this return Result for consistency - static std::shared_ptr Make( + static Result> Make( const GcsOptions& options, const io::IOContext& = io::default_io_context()); private: diff --git a/cpp/src/arrow/filesystem/gcsfs_test.cc b/cpp/src/arrow/filesystem/gcsfs_test.cc index d4d5edf4b8993..0e457b2d502f2 100644 --- a/cpp/src/arrow/filesystem/gcsfs_test.cc +++ b/cpp/src/arrow/filesystem/gcsfs_test.cc @@ -271,7 +271,7 @@ class TestGCSFSGeneric : public GcsIntegrationTest, public GenericFileSystemTest void SetUp() override { ASSERT_NO_FATAL_FAILURE(GcsIntegrationTest::SetUp()); auto bucket_name = RandomBucketName(); - gcs_fs_ = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(gcs_fs_, GcsFileSystem::Make(TestGcsOptions())); ASSERT_OK(gcs_fs_->CreateDir(bucket_name, true)); fs_ = std::make_shared(bucket_name, gcs_fs_); } @@ -487,7 +487,7 @@ TEST(GcsFileSystem, FileSystemCompare) { GcsOptions a_options; a_options.scheme = "http"; a_options.project_id = "test-only-invalid-project-id"; - auto a = GcsFileSystem::Make(a_options); + ASSERT_OK_AND_ASSIGN(auto a, GcsFileSystem::Make(a_options)); EXPECT_THAT(a, NotNull()); EXPECT_TRUE(a->Equals(*a)); @@ -495,7 +495,7 @@ TEST(GcsFileSystem, FileSystemCompare) { b_options.scheme = "http"; b_options.endpoint_override = "localhost:1234"; b_options.project_id = "test-only-invalid-project-id"; - auto b = GcsFileSystem::Make(b_options); + ASSERT_OK_AND_ASSIGN(auto b, GcsFileSystem::Make(b_options)); EXPECT_THAT(b, NotNull()); EXPECT_TRUE(b->Equals(*b)); @@ -622,7 +622,7 @@ TEST(GcsFileSystem, ObjectMetadataRoundtrip) { } TEST_F(GcsIntegrationTest, GetFileInfoBucket) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); arrow::fs::AssertFileInfo(fs.get(), PreexistingBucketName(), FileType::Directory); // URI @@ -632,7 +632,7 @@ TEST_F(GcsIntegrationTest, GetFileInfoBucket) { TEST_F(GcsIntegrationTest, GetFileInfoObjectWithNestedStructure) { // Adds detailed tests to handle cases of different edge cases // with directory naming conventions (e.g. with and without slashes). - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); constexpr auto kObjectName = "test-object-dir/some_other_dir/another_dir/foo"; ASSERT_OK_AND_ASSIGN( auto output, @@ -673,7 +673,7 @@ TEST_F(GcsIntegrationTest, GetFileInfoObjectWithNestedStructure) { } TEST_F(GcsIntegrationTest, GetFileInfoObjectNoExplicitObject) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); auto object = GcsClient().GetObjectMetadata(PreexistingBucketName(), PreexistingObjectName()); ASSERT_TRUE(object.ok()) << "status=" << object.status(); @@ -685,7 +685,7 @@ TEST_F(GcsIntegrationTest, GetFileInfoObjectNoExplicitObject) { } TEST_F(GcsIntegrationTest, GetFileInfoSelectorRecursive) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); ASSERT_OK_AND_ASSIGN(auto hierarchy, CreateHierarchy(fs)); std::vector expected; std::copy_if(hierarchy.contents.begin(), hierarchy.contents.end(), @@ -711,7 +711,7 @@ TEST_F(GcsIntegrationTest, GetFileInfoSelectorRecursive) { } TEST_F(GcsIntegrationTest, GetFileInfoSelectorNonRecursive) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); ASSERT_OK_AND_ASSIGN(auto hierarchy, CreateHierarchy(fs)); std::vector expected; std::copy_if(hierarchy.contents.begin(), hierarchy.contents.end(), @@ -730,7 +730,7 @@ TEST_F(GcsIntegrationTest, GetFileInfoSelectorNonRecursive) { } TEST_F(GcsIntegrationTest, GetFileInfoSelectorLimitedRecursion) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); ASSERT_OK_AND_ASSIGN(auto hierarchy, CreateHierarchy(fs)); for (const auto max_recursion : {0, 1, 2, 3}) { @@ -759,7 +759,7 @@ TEST_F(GcsIntegrationTest, GetFileInfoSelectorLimitedRecursion) { } TEST_F(GcsIntegrationTest, GetFileInfoSelectorNotFoundTrue) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); auto selector = FileSelector(); selector.base_dir = NotFoundObjectPath() + "/"; @@ -770,7 +770,7 @@ TEST_F(GcsIntegrationTest, GetFileInfoSelectorNotFoundTrue) { } TEST_F(GcsIntegrationTest, GetFileInfoSelectorNotFoundFalse) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); auto selector = FileSelector(); selector.base_dir = NotFoundObjectPath() + "/"; @@ -780,34 +780,34 @@ TEST_F(GcsIntegrationTest, GetFileInfoSelectorNotFoundFalse) { } TEST_F(GcsIntegrationTest, CreateDirSuccessBucketOnly) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); auto bucket_name = RandomBucketName(); ASSERT_OK(fs->CreateDir(bucket_name, false)); arrow::fs::AssertFileInfo(fs.get(), bucket_name, FileType::Directory); } TEST_F(GcsIntegrationTest, CreateDirSuccessBucketAndFolder) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); const auto path = PreexistingBucketPath() + RandomFolderName(); ASSERT_OK(fs->CreateDir(path, false)); arrow::fs::AssertFileInfo(fs.get(), path, FileType::Directory); } TEST_F(GcsIntegrationTest, CreateDirFailureFolderWithMissingBucket) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); const auto path = std::string("not-a-bucket/new-folder"); ASSERT_RAISES(IOError, fs->CreateDir(path, false)); } TEST_F(GcsIntegrationTest, CreateDirRecursiveBucketOnly) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); auto bucket_name = RandomBucketName(); ASSERT_OK(fs->CreateDir(bucket_name, true)); arrow::fs::AssertFileInfo(fs.get(), bucket_name, FileType::Directory); } TEST_F(GcsIntegrationTest, CreateDirRecursiveFolderOnly) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); const auto parent = PreexistingBucketPath() + RandomFolderName(); const auto path = internal::ConcatAbstractPath(parent, "new-sub"); ASSERT_OK(fs->CreateDir(path, true)); @@ -816,7 +816,7 @@ TEST_F(GcsIntegrationTest, CreateDirRecursiveFolderOnly) { } TEST_F(GcsIntegrationTest, CreateDirRecursiveBucketAndFolder) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); auto bucket_name = RandomBucketName(); const auto parent = internal::ConcatAbstractPath(bucket_name, RandomFolderName()); const auto path = internal::ConcatAbstractPath(parent, "new-sub"); @@ -827,12 +827,12 @@ TEST_F(GcsIntegrationTest, CreateDirRecursiveBucketAndFolder) { } TEST_F(GcsIntegrationTest, CreateDirUri) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); ASSERT_RAISES(Invalid, fs->CreateDir("gs://" + RandomBucketName(), true)); } TEST_F(GcsIntegrationTest, CreateDirExtraneousSlashes) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); ASSERT_RAISES(Invalid, fs->CreateDir(RandomBucketName() + "//somedir", /*recursive=*/true)); ASSERT_RAISES(Invalid, fs->CreateDir(RandomBucketName() + "/somedir//newdir", @@ -840,14 +840,14 @@ TEST_F(GcsIntegrationTest, CreateDirExtraneousSlashes) { } TEST_F(GcsIntegrationTest, DeleteBucketDirSuccess) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); ASSERT_OK(fs->CreateDir("pyarrow-filesystem/", /*recursive=*/true)); ASSERT_RAISES(Invalid, fs->CreateDir("/", false)); ASSERT_OK(fs->DeleteDir("pyarrow-filesystem/")); } TEST_F(GcsIntegrationTest, DeleteDirSuccess) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); ASSERT_OK_AND_ASSIGN(auto hierarchy, CreateHierarchy(fs)); ASSERT_OK(fs->DeleteDir(hierarchy.base_dir)); @@ -862,18 +862,18 @@ TEST_F(GcsIntegrationTest, DeleteDirSuccess) { } TEST_F(GcsIntegrationTest, DeleteDirUri) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); ASSERT_RAISES(Invalid, fs->DeleteDir("gs://" + PreexistingBucketPath())); } TEST_F(GcsIntegrationTest, DeleteDirExtraneousSlashes) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); ASSERT_RAISES(Invalid, fs->DeleteDir(PreexistingBucketPath() + "/somedir")); ASSERT_RAISES(Invalid, fs->DeleteDir(PreexistingBucketPath() + "somedir//newdir")); } TEST_F(GcsIntegrationTest, DeleteDirContentsSuccess) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); ASSERT_OK_AND_ASSIGN(auto hierarchy, CreateHierarchy(fs)); ASSERT_OK(fs->DeleteDirContents(hierarchy.base_dir)); @@ -891,35 +891,35 @@ TEST_F(GcsIntegrationTest, DeleteDirContentsSuccess) { } TEST_F(GcsIntegrationTest, DeleteRootDirContents) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented, HasSubstr("too dangerous"), fs->DeleteRootDirContents()); } TEST_F(GcsIntegrationTest, DeleteFileSuccess) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); ASSERT_OK(fs->DeleteFile(PreexistingObjectPath())); arrow::fs::AssertFileInfo(fs.get(), PreexistingObjectPath(), FileType::NotFound); } TEST_F(GcsIntegrationTest, DeleteFileFailure) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); ASSERT_RAISES(IOError, fs->DeleteFile(NotFoundObjectPath())); } TEST_F(GcsIntegrationTest, DeleteFileDirectoryFails) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); const auto path = PreexistingBucketPath() + "DeleteFileDirectoryFails/"; ASSERT_RAISES(IOError, fs->DeleteFile(path)); } TEST_F(GcsIntegrationTest, DeleteFileUri) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); ASSERT_RAISES(Invalid, fs->DeleteFile("gs://" + PreexistingObjectPath())); } TEST_F(GcsIntegrationTest, MoveFileSuccess) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); const auto destination_path = PreexistingBucketPath() + "move-destination"; ASSERT_OK(fs->Move(PreexistingObjectPath(), destination_path)); arrow::fs::AssertFileInfo(fs.get(), destination_path, FileType::File); @@ -927,45 +927,45 @@ TEST_F(GcsIntegrationTest, MoveFileSuccess) { } TEST_F(GcsIntegrationTest, MoveFileCannotRenameBuckets) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); ASSERT_RAISES(IOError, fs->Move(PreexistingBucketPath(), "another-bucket/")); } TEST_F(GcsIntegrationTest, MoveFileCannotRenameDirectories) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); ASSERT_RAISES(IOError, fs->Move(PreexistingBucketPath() + "folder/", PreexistingBucketPath() + "new-name")); } TEST_F(GcsIntegrationTest, MoveFileCannotRenameToDirectory) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); ASSERT_OK(fs->CreateDir(PreexistingBucketPath() + "destination", false)); ASSERT_RAISES(IOError, fs->Move(PreexistingObjectPath(), PreexistingBucketPath() + "destination")); } TEST_F(GcsIntegrationTest, MoveFileUri) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); const auto destination_path = PreexistingBucketPath() + "move-destination"; ASSERT_RAISES(Invalid, fs->Move("gs://" + PreexistingObjectPath(), destination_path)); ASSERT_RAISES(Invalid, fs->Move(PreexistingObjectPath(), "gs://" + destination_path)); } TEST_F(GcsIntegrationTest, CopyFileSuccess) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); const auto destination_path = PreexistingBucketPath() + "copy-destination"; ASSERT_OK(fs->CopyFile(PreexistingObjectPath(), destination_path)); arrow::fs::AssertFileInfo(fs.get(), destination_path, FileType::File); } TEST_F(GcsIntegrationTest, CopyFileNotFound) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); const auto destination_path = PreexistingBucketPath() + "copy-destination"; ASSERT_RAISES(IOError, fs->CopyFile(NotFoundObjectPath(), destination_path)); } TEST_F(GcsIntegrationTest, CopyFileUri) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); const auto destination_path = PreexistingBucketPath() + "copy-destination"; ASSERT_RAISES(Invalid, fs->CopyFile("gs://" + PreexistingObjectPath(), destination_path)); @@ -974,7 +974,7 @@ TEST_F(GcsIntegrationTest, CopyFileUri) { } TEST_F(GcsIntegrationTest, OpenInputStreamString) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); std::shared_ptr stream; ASSERT_OK_AND_ASSIGN(stream, fs->OpenInputStream(PreexistingObjectPath())); @@ -987,7 +987,7 @@ TEST_F(GcsIntegrationTest, OpenInputStreamString) { } TEST_F(GcsIntegrationTest, OpenInputStreamStringBuffers) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); std::shared_ptr stream; ASSERT_OK_AND_ASSIGN(stream, fs->OpenInputStream(PreexistingObjectPath())); @@ -1003,7 +1003,7 @@ TEST_F(GcsIntegrationTest, OpenInputStreamStringBuffers) { } TEST_F(GcsIntegrationTest, OpenInputStreamInfo) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); arrow::fs::FileInfo info; ASSERT_OK_AND_ASSIGN(info, fs->GetFileInfo(PreexistingObjectPath())); @@ -1019,7 +1019,7 @@ TEST_F(GcsIntegrationTest, OpenInputStreamInfo) { } TEST_F(GcsIntegrationTest, OpenInputStreamEmpty) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); const auto object_path = internal::ConcatAbstractPath(PreexistingBucketName(), "empty-object.txt"); @@ -1033,13 +1033,13 @@ TEST_F(GcsIntegrationTest, OpenInputStreamEmpty) { } TEST_F(GcsIntegrationTest, OpenInputStreamNotFound) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); ASSERT_RAISES(IOError, fs->OpenInputStream(NotFoundObjectPath())); } TEST_F(GcsIntegrationTest, OpenInputStreamInfoInvalid) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); arrow::fs::FileInfo info; ASSERT_OK_AND_ASSIGN(info, fs->GetFileInfo(PreexistingBucketPath())); @@ -1050,7 +1050,7 @@ TEST_F(GcsIntegrationTest, OpenInputStreamInfoInvalid) { } TEST_F(GcsIntegrationTest, OpenInputStreamUri) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); ASSERT_RAISES(Invalid, fs->OpenInputStream("gs://" + PreexistingObjectPath())); } @@ -1069,7 +1069,7 @@ TEST_F(GcsIntegrationTest, OpenInputStreamReadMetadata) { .upsert_metadata("key0", "value0"))) .value(); - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); std::shared_ptr stream; ASSERT_OK_AND_ASSIGN(stream, fs->OpenInputStream(PreexistingBucketPath() + object_name)); @@ -1117,7 +1117,7 @@ TEST_F(GcsIntegrationTest, OpenInputStreamReadMetadata) { } TEST_F(GcsIntegrationTest, OpenInputStreamClosed) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); ASSERT_OK_AND_ASSIGN(auto stream, fs->OpenInputStream(PreexistingObjectPath())); ASSERT_OK(stream->Close()); @@ -1131,7 +1131,7 @@ TEST_F(GcsIntegrationTest, TestWriteWithDefaults) { auto options = TestGcsOptions(); options.default_bucket_location = "utopia"; options.default_metadata = arrow::key_value_metadata({{"foo", "bar"}}); - auto fs = GcsFileSystem::Make(options); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(options)); std::string bucket = "new_bucket_with_default_location"; auto file_name = "object_with_defaults"; ASSERT_OK(fs->CreateDir(bucket, /*recursive=*/false)); @@ -1172,7 +1172,7 @@ TEST_F(GcsIntegrationTest, TestWriteWithDefaults) { } TEST_F(GcsIntegrationTest, OpenOutputStreamSmall) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); const auto path = PreexistingBucketPath() + "test-write-object"; std::shared_ptr output; @@ -1193,7 +1193,7 @@ TEST_F(GcsIntegrationTest, OpenOutputStreamSmall) { } TEST_F(GcsIntegrationTest, OpenOutputStreamLarge) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); const auto path = PreexistingBucketPath() + "test-write-object"; std::shared_ptr output; @@ -1229,7 +1229,7 @@ TEST_F(GcsIntegrationTest, OpenOutputStreamLarge) { } TEST_F(GcsIntegrationTest, OpenOutputStreamClosed) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); const auto path = internal::ConcatAbstractPath(PreexistingBucketName(), "open-output-stream-closed.txt"); @@ -1242,7 +1242,7 @@ TEST_F(GcsIntegrationTest, OpenOutputStreamClosed) { } TEST_F(GcsIntegrationTest, OpenOutputStreamUri) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); const auto path = internal::ConcatAbstractPath(PreexistingBucketName(), "open-output-stream-uri.txt"); @@ -1250,7 +1250,7 @@ TEST_F(GcsIntegrationTest, OpenOutputStreamUri) { } TEST_F(GcsIntegrationTest, OpenInputFileMixedReadVsReadAt) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); // Create a file large enough to make the random access tests non-trivial. auto constexpr kLineWidth = 100; @@ -1302,7 +1302,7 @@ TEST_F(GcsIntegrationTest, OpenInputFileMixedReadVsReadAt) { } TEST_F(GcsIntegrationTest, OpenInputFileRandomSeek) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); // Create a file large enough to make the random access tests non-trivial. auto constexpr kLineWidth = 100; @@ -1334,7 +1334,7 @@ TEST_F(GcsIntegrationTest, OpenInputFileRandomSeek) { } TEST_F(GcsIntegrationTest, OpenInputFileIoContext) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); // Create a test file. const auto path = PreexistingBucketPath() + "OpenInputFileIoContext/object-name"; @@ -1350,7 +1350,7 @@ TEST_F(GcsIntegrationTest, OpenInputFileIoContext) { } TEST_F(GcsIntegrationTest, OpenInputFileInfo) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); arrow::fs::FileInfo info; ASSERT_OK_AND_ASSIGN(info, fs->GetFileInfo(PreexistingObjectPath())); @@ -1368,13 +1368,13 @@ TEST_F(GcsIntegrationTest, OpenInputFileInfo) { } TEST_F(GcsIntegrationTest, OpenInputFileNotFound) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); ASSERT_RAISES(IOError, fs->OpenInputFile(NotFoundObjectPath())); } TEST_F(GcsIntegrationTest, OpenInputFileInfoInvalid) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); arrow::fs::FileInfo info; ASSERT_OK_AND_ASSIGN(info, fs->GetFileInfo(PreexistingBucketPath())); @@ -1385,7 +1385,7 @@ TEST_F(GcsIntegrationTest, OpenInputFileInfoInvalid) { } TEST_F(GcsIntegrationTest, OpenInputFileClosed) { - auto fs = GcsFileSystem::Make(TestGcsOptions()); + ASSERT_OK_AND_ASSIGN(auto fs, GcsFileSystem::Make(TestGcsOptions())); ASSERT_OK_AND_ASSIGN(auto stream, fs->OpenInputFile(PreexistingObjectPath())); ASSERT_OK(stream->Close()); diff --git a/r/src/filesystem.cpp b/r/src/filesystem.cpp index 2274a3d7ff7a2..95f19e4f2f256 100644 --- a/r/src/filesystem.cpp +++ b/r/src/filesystem.cpp @@ -429,8 +429,11 @@ std::shared_ptr fs___GcsFileSystem__Make(bool anonymous, } auto io_context = MainRThread::GetInstance().CancellableIOContext(); - // TODO(ARROW-16884): update when this returns Result +#if ARROW_VERSION_MAJOR >= 18 + return ValueOrStop(fs::GcsFileSystem::Make(gcs_opts, io_context)); +#else return fs::GcsFileSystem::Make(gcs_opts, io_context); +#endif } // [[gcs::export]] From 5e60823aaa3403026811b936e902dec21456d793 Mon Sep 17 00:00:00 2001 From: George Vanburgh <1670176+georgevanburgh@users.noreply.github.com> Date: Wed, 30 Oct 2024 00:08:53 -0400 Subject: [PATCH 13/59] GH-44575: [C#] Replace LINQ expression with for loop (#44576) For code which repeatedly access columns by name, this LINQ expression can form part of the hot path. This PR replaces the LINQ with the equivalent for loop, and should preserve all existing behaviour ([return -1 in the event of no match](https://learn.microsoft.com/en-us/dotnet/api/system.collections.generic.list-1.indexof?view=net-8.0#system-collections-generic-list-1-indexof(-0))). I ran a quick benchmark to validate the speedup ```cs [MemoryDiagnoser] public class ColumnIndexerBenchmark { private readonly RecordBatch _batch; public ColumnIndexerBenchmark() { var builder = new Schema.Builder(); builder .Field(new Field("A", Int32Type.Default, true)) .Field(new Field("B", Int32Type.Default, true)) .Field(new Field("C", Int32Type.Default, true)) .Field(new Field("D", Int32Type.Default, true)) .Field(new Field("E", Int32Type.Default, true)) .Field(new Field("F", Int32Type.Default, true)) .Field(new Field("G", Int32Type.Default, true)) .Field(new Field("H", Int32Type.Default, true)) .Field(new Field("I", Int32Type.Default, true)) .Field(new Field("J", Int32Type.Default, true)); var schema = builder.Build(); _batch = new RecordBatch(schema, new IArrowArray[schema.FieldsList.Count], 0); } [Benchmark] public void GetColumnByIndex() { _batch.Column("H", StringComparer.Ordinal); } } ``` Some numbers from my machine ``` BenchmarkDotNet v0.14.0, Windows 10 (10.0.19045.5011/22H2/2022Update) 13th Gen Intel Core i7-13800H, 1 CPU, 20 logical and 14 physical cores .NET SDK 8.0.306 [Host] : .NET 8.0.10 (8.0.1024.46610), X64 RyuJIT AVX2 DefaultJob : .NET 8.0.10 (8.0.1024.46610), X64 RyuJIT AVX2 ``` | Method | Mean | Error | StdDev | Gen0 | Allocated | |------------------------ |---------:|----------:|----------:|-------:|----------:| | GetColumnByIndexLinq | 67.84 ns | 1.178 ns | 1.102 ns | 0.0107 | 136 B | | GetColumnByIndexForLoop | 9.428 ns | 0.1334 ns | 0.1114 ns | - | - | In theory, we could achieve a greater speedup by maintaining a lookup of column names to ordinals. We already have several lookup structures inside `Schema`, but none of them provides access to ordinal values. However, the speedup from adding another mapping might not warrant adding yet another lookup structure to `Schema`. If merged, will close #44575. * GitHub Issue: #44575 Authored-by: George Vanburgh Signed-off-by: Curt Hagenlocher --- csharp/src/Apache.Arrow/Schema.cs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/csharp/src/Apache.Arrow/Schema.cs b/csharp/src/Apache.Arrow/Schema.cs index 4357e8b2ddd44..32615e5d67bd8 100644 --- a/csharp/src/Apache.Arrow/Schema.cs +++ b/csharp/src/Apache.Arrow/Schema.cs @@ -82,7 +82,13 @@ public int GetFieldIndex(string name, IEqualityComparer comparer = defau { comparer ??= StringComparer.CurrentCulture; - return _fieldsList.IndexOf(_fieldsList.First(x => comparer.Equals(x.Name, name))); + for (int i = 0; i < _fieldsList.Count; i++) + { + if (comparer.Equals(_fieldsList[i].Name, name)) + return i; + } + + return -1; } public Schema RemoveField(int fieldIndex) From 3217fc2625f80042661172fd589a70c270ba8f8b Mon Sep 17 00:00:00 2001 From: Laurent Goujon Date: Tue, 29 Oct 2024 21:15:50 -0700 Subject: [PATCH 14/59] GH-44564: [Java][FlightSQL] Fix native libraries relocation (#44565) ### Rationale for this change Prefix used by native libraries shipped in the JDBC driver do not match the prefix used by Netty `NativeLibraryLoader` class, preventing them to be detected and loaded. ### What changes are included in this PR? Change the prefix of the libraries and add a integration test to verify the libraries are loaded Also exclude several group of data which are not properly relocated and may cause conflict with existing classpath. ### Are these changes tested? Yes, new test added for checking the native library loader ### Are there any user-facing changes? No * GitHub Issue: #44564 Authored-by: Laurent Goujon Signed-off-by: David Li --- java/flight/flight-sql-jdbc-driver/pom.xml | 13 ++- .../driver/jdbc/ITDriverJarValidation.java | 96 ++++++++++++++----- 2 files changed, 79 insertions(+), 30 deletions(-) diff --git a/java/flight/flight-sql-jdbc-driver/pom.xml b/java/flight/flight-sql-jdbc-driver/pom.xml index ba6fe277d1327..ae8c543fbfe3b 100644 --- a/java/flight/flight-sql-jdbc-driver/pom.xml +++ b/java/flight/flight-sql-jdbc-driver/pom.xml @@ -127,14 +127,14 @@ under the License. mozilla. org.apache.arrow.driver.jdbc.shaded.mozilla. - + META-INF.native.libnetty_ - META-INF.native.liboaadj_netty_ + META-INF.native.liborg_apache_arrow_driver_jdbc_shaded_netty_ META-INF.native.netty_ - META-INF.native.oaadj_netty_ + META-INF.native.org_apache_arrow_driver_jdbc_shaded_netty_ @@ -159,8 +159,11 @@ under the License. **/*.SF **/*.RSA **/*.DSA - META-INF/native/libio_grpc_netty* - META-INF/native/io_grpc_netty_shaded* + + META-INF/native-image/ + META-INF/proguard/ + + META-INF/versions/ **/*.proto **/module-info.class diff --git a/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ITDriverJarValidation.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ITDriverJarValidation.java index b45845485a041..a0e108d6a067b 100644 --- a/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ITDriverJarValidation.java +++ b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ITDriverJarValidation.java @@ -18,20 +18,26 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.fail; import com.google.common.collect.ImmutableSet; import java.io.File; import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; import java.net.JarURLConnection; +import java.net.URISyntaxException; import java.net.URL; -import java.util.Enumeration; +import java.net.URLClassLoader; import java.util.Set; import java.util.concurrent.TimeUnit; +import java.util.function.Predicate; import java.util.jar.JarEntry; import java.util.jar.JarFile; +import java.util.stream.Stream; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; +import org.junit.jupiter.api.function.Executable; /** * Check the content of the JDBC driver jar @@ -48,52 +54,92 @@ public class ITDriverJarValidation { /** List of allowed prefixes a jar entry may match. */ public static final Set ALLOWED_PREFIXES = - ImmutableSet.of("org/apache/arrow/driver/jdbc/", "META-INF/"); + ImmutableSet.of( + "org/apache/arrow/driver/jdbc/", // Driver code + "META-INF/maven/", // Maven metadata (useful for security scanner + "META-INF/services/", // ServiceLoader implementations + "META-INF/license/", + "META-INF/licenses/", + // Prefixes for native libraries + "META-INF/native/liborg_apache_arrow_driver_jdbc_shaded_", + "META-INF/native/org_apache_arrow_driver_jdbc_shaded_"); /** List of allowed files a jar entry may match. */ public static final Set ALLOWED_FILES = - ImmutableSet.of("arrow-git.properties", "properties/flight.properties"); + ImmutableSet.of( + "arrow-git.properties", + "properties/flight.properties", + "META-INF/io.netty.versions.properties", + "META-INF/MANIFEST.MF", + "META-INF/DEPENDENCIES", + "META-INF/FastDoubleParser-LICENSE", + "META-INF/FastDoubleParser-NOTICE", + "META-INF/LICENSE", + "META-INF/LICENSE.txt", + "META-INF/NOTICE", + "META-INF/NOTICE.txt", + "META-INF/thirdparty-LICENSE", + "META-INF/bigint-LICENSE"); // This method is designed to work with Maven failsafe plugin and expects the // JDBC driver jar to be present in the test classpath (instead of the individual classes) - private static JarFile getJdbcJarFile() throws IOException { + private static File getJdbcJarFile() throws IOException { // Check if an override has been set if (JDBC_DRIVER_PATH_OVERRIDE != null) { - return new JarFile(new File(JDBC_DRIVER_PATH_OVERRIDE)); + return new File(JDBC_DRIVER_PATH_OVERRIDE); } - // Check classpath to find the driver jar + // Check classpath to find the driver jar (without loading the class) URL driverClassURL = ITDriverJarValidation.class .getClassLoader() .getResource("org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriver.class"); - assertNotNull(driverClassURL, "Driver jar was not detected in the classpath"); + assertNotNull(driverClassURL, "Driver class was not detected in the classpath"); assertEquals( - "jar", driverClassURL.getProtocol(), "Driver jar was not detected in the classpath"); + "jar", driverClassURL.getProtocol(), "Driver class was not found inside a jar file"); + // Return the enclosing jar file JarURLConnection connection = (JarURLConnection) driverClassURL.openConnection(); - return connection.getJarFile(); + try { + return new File(connection.getJarFileURL().toURI()); + } catch (URISyntaxException e) { + throw new IOException(e); + } } + /** Validate the content of the jar to enforce all 3rd party dependencies have been shaded. */ @Test @Timeout(value = 2, unit = TimeUnit.MINUTES) public void validateShadedJar() throws IOException { - // Validate the content of the jar to enforce all 3rd party dependencies have - // been shaded - try (JarFile jar = getJdbcJarFile()) { - for (Enumeration entries = jar.entries(); entries.hasMoreElements(); ) { - final JarEntry entry = entries.nextElement(); - if (entry.isDirectory()) { - // Directories are ignored - continue; - } - - try { - checkEntryAllowed(entry.getName()); - } catch (AssertionError e) { - fail(e.getMessage()); - } + + try (JarFile jar = new JarFile(getJdbcJarFile())) { + Stream executables = + jar.stream() + .filter(Predicate.not(JarEntry::isDirectory)) + .map( + entry -> { + return () -> checkEntryAllowed(entry.getName()); + }); + + Assertions.assertAll(executables); + } + } + + /** Check that relocated netty code can also load matching native library. */ + @Test + @Timeout(value = 2, unit = TimeUnit.MINUTES) + public void checkNettyOpenSslNativeLoader() throws Throwable { + try (URLClassLoader driverClassLoader = + new URLClassLoader(new URL[] {getJdbcJarFile().toURI().toURL()}, null)) { + Class openSslClass = + driverClassLoader.loadClass( + "org.apache.arrow.driver.jdbc.shaded.io.netty.handler.ssl.OpenSsl"); + Method method = openSslClass.getDeclaredMethod("ensureAvailability"); + try { + method.invoke(null); + } catch (InvocationTargetException e) { + throw e.getCause(); } } } From 4c36f12524752779e682959c7a000a6554fcb784 Mon Sep 17 00:00:00 2001 From: Hiroyuki Sato Date: Wed, 30 Oct 2024 13:44:58 +0900 Subject: [PATCH 15/59] GH-44569: [GLib] Add GArrowDecimal64DataType (#44571) ### Rationale for this change `arrow::Decimal64Type` data type has been introduced. It is also necessary to support the same data type in GLib. ### What changes are included in this PR? This PR implements `GArrowDecimal64DataType`. ### Are these changes tested? YES ### Are there any user-facing changes? Before this change: `Arrow::DecimalDataType.new(8, 2)` returned `Decimal128DataType`. After this change: `Arrow::DecimalDataType.new(8, 2)` returns `Decimal64DataType` * GitHub Issue: #44569 Authored-by: Hiroyuki Sato Signed-off-by: Sutou Kouhei --- c_glib/arrow-glib/basic-data-type.cpp | 61 +++++++++++++++++++++++- c_glib/arrow-glib/basic-data-type.h | 20 ++++++++ c_glib/arrow-glib/type.cpp | 2 + c_glib/arrow-glib/type.h | 4 ++ c_glib/test/test-decimal128-data-type.rb | 4 +- c_glib/test/test-decimal64-data-type.rb | 54 +++++++++++++++++++++ 6 files changed, 142 insertions(+), 3 deletions(-) create mode 100644 c_glib/test/test-decimal64-data-type.rb diff --git a/c_glib/arrow-glib/basic-data-type.cpp b/c_glib/arrow-glib/basic-data-type.cpp index dff972515022f..b2bd99e0f228d 100644 --- a/c_glib/arrow-glib/basic-data-type.cpp +++ b/c_glib/arrow-glib/basic-data-type.cpp @@ -114,6 +114,8 @@ G_BEGIN_DECLS * * #GArrowDecimalDataType is a base class for the decimal data types. * + * #GArrowDecimal64DataType is a class for the 64-bit decimal data type. + * * #GArrowDecimal128DataType is a class for the 128-bit decimal data type. * * #GArrowDecimal256DataType is a class for the 256-bit decimal data type. @@ -1500,7 +1502,10 @@ garrow_decimal_data_type_class_init(GArrowDecimalDataTypeClass *klass) GArrowDecimalDataType * garrow_decimal_data_type_new(gint32 precision, gint32 scale, GError **error) { - if (precision <= garrow_decimal128_data_type_max_precision()) { + if (precision <= garrow_decimal64_data_type_max_precision()) { + return GARROW_DECIMAL_DATA_TYPE( + garrow_decimal64_data_type_new(precision, scale, error)); + } else if (precision <= garrow_decimal128_data_type_max_precision()) { return GARROW_DECIMAL_DATA_TYPE( garrow_decimal128_data_type_new(precision, scale, error)); } else { @@ -1545,6 +1550,57 @@ garrow_decimal_data_type_get_scale(GArrowDecimalDataType *decimal_data_type) return arrow_decimal_type->scale(); } +G_DEFINE_TYPE(GArrowDecimal64DataType, + garrow_decimal64_data_type, + GARROW_TYPE_DECIMAL_DATA_TYPE) + +static void +garrow_decimal64_data_type_init(GArrowDecimal64DataType *object) +{ +} + +static void +garrow_decimal64_data_type_class_init(GArrowDecimal64DataTypeClass *klass) +{ +} + +/** + * garrow_decimal64_data_type_max_precision: + * + * Returns: The max precision of 64-bit decimal data type. + * + * Since: 19.0.0 + */ +gint32 +garrow_decimal64_data_type_max_precision() +{ + return arrow::Decimal64Type::kMaxPrecision; +} + +/** + * garrow_decimal64_data_type_new: + * @precision: The precision of decimal data. + * @scale: The scale of decimal data. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): + * The newly created 64-bit decimal data type on success, %NULL on error. + * + * Since: 19.0.0 + */ +GArrowDecimal64DataType * +garrow_decimal64_data_type_new(gint32 precision, gint32 scale, GError **error) +{ + auto arrow_data_type_result = arrow::Decimal64Type::Make(precision, scale); + if (garrow::check(error, arrow_data_type_result, "[decimal64-data-type][new]")) { + auto arrow_data_type = *arrow_data_type_result; + return GARROW_DECIMAL64_DATA_TYPE( + g_object_new(GARROW_TYPE_DECIMAL64_DATA_TYPE, "data-type", &arrow_data_type, NULL)); + } else { + return NULL; + } +} + G_DEFINE_TYPE(GArrowDecimal128DataType, garrow_decimal128_data_type, GARROW_TYPE_DECIMAL_DATA_TYPE) @@ -2193,6 +2249,9 @@ garrow_data_type_new_raw(std::shared_ptr *arrow_data_type) case arrow::Type::type::MAP: type = GARROW_TYPE_MAP_DATA_TYPE; break; + case arrow::Type::type::DECIMAL64: + type = GARROW_TYPE_DECIMAL64_DATA_TYPE; + break; case arrow::Type::type::DECIMAL128: type = GARROW_TYPE_DECIMAL128_DATA_TYPE; break; diff --git a/c_glib/arrow-glib/basic-data-type.h b/c_glib/arrow-glib/basic-data-type.h index 77180018c9be8..1a677a6e45ff4 100644 --- a/c_glib/arrow-glib/basic-data-type.h +++ b/c_glib/arrow-glib/basic-data-type.h @@ -608,6 +608,26 @@ GARROW_AVAILABLE_IN_ALL gint32 garrow_decimal_data_type_get_scale(GArrowDecimalDataType *decimal_data_type); +#define GARROW_TYPE_DECIMAL64_DATA_TYPE (garrow_decimal64_data_type_get_type()) +GARROW_AVAILABLE_IN_19_0 +G_DECLARE_DERIVABLE_TYPE(GArrowDecimal64DataType, + garrow_decimal64_data_type, + GARROW, + DECIMAL64_DATA_TYPE, + GArrowDecimalDataType) +struct _GArrowDecimal64DataTypeClass +{ + GArrowDecimalDataTypeClass parent_class; +}; + +GARROW_AVAILABLE_IN_19_0 +gint32 +garrow_decimal64_data_type_max_precision(); + +GARROW_AVAILABLE_IN_19_0 +GArrowDecimal64DataType * +garrow_decimal64_data_type_new(gint32 precision, gint32 scale, GError **error); + #define GARROW_TYPE_DECIMAL128_DATA_TYPE (garrow_decimal128_data_type_get_type()) GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128DataType, diff --git a/c_glib/arrow-glib/type.cpp b/c_glib/arrow-glib/type.cpp index d9fc36b8861c4..c4c0748357673 100644 --- a/c_glib/arrow-glib/type.cpp +++ b/c_glib/arrow-glib/type.cpp @@ -78,6 +78,8 @@ garrow_type_from_raw(arrow::Type::type type) return GARROW_TYPE_MONTH_INTERVAL; case arrow::Type::type::INTERVAL_DAY_TIME: return GARROW_TYPE_DAY_TIME_INTERVAL; + case arrow::Type::type::DECIMAL64: + return GARROW_TYPE_DECIMAL64; case arrow::Type::type::DECIMAL128: return GARROW_TYPE_DECIMAL128; case arrow::Type::type::DECIMAL256: diff --git a/c_glib/arrow-glib/type.h b/c_glib/arrow-glib/type.h index 6f33ad64ef55c..3c7c2d5aa4c2b 100644 --- a/c_glib/arrow-glib/type.h +++ b/c_glib/arrow-glib/type.h @@ -70,6 +70,8 @@ G_BEGIN_DECLS * @GARROW_TYPE_LARGE_LIST: A list of some logical data type with 64-bit offsets. * @GARROW_TYPE_MONTH_DAY_NANO_INTERVAL: MONTH_DAY_NANO interval in SQL style. * @GARROW_TYPE_RUN_END_ENCODED: Run-end encoded data. + * @GARROW_TYPE_DECIMAL64: Precision- and scale-based decimal + * type with 64-bit. Storage type depends on the parameters. * * They are corresponding to `arrow::Type::type` values. */ @@ -113,6 +115,8 @@ typedef enum { GARROW_TYPE_LARGE_LIST, GARROW_TYPE_MONTH_DAY_NANO_INTERVAL, GARROW_TYPE_RUN_END_ENCODED, + /* TODO: Remove = 44 when we add STRING_VIEW..DECIMAL32. */ + GARROW_TYPE_DECIMAL64 = 44, } GArrowType; /** diff --git a/c_glib/test/test-decimal128-data-type.rb b/c_glib/test/test-decimal128-data-type.rb index 8cf97e38d47b5..74f39f6ef3eb2 100644 --- a/c_glib/test/test-decimal128-data-type.rb +++ b/c_glib/test/test-decimal128-data-type.rb @@ -42,8 +42,8 @@ def test_scale end def test_decimal_data_type_new - assert_equal(Arrow::Decimal128DataType.new(8, 2), - Arrow::DecimalDataType.new(8, 2)) + assert_equal(Arrow::Decimal128DataType.new(19, 2), + Arrow::DecimalDataType.new(19, 2)) end def test_invalid_precision diff --git a/c_glib/test/test-decimal64-data-type.rb b/c_glib/test/test-decimal64-data-type.rb new file mode 100644 index 0000000000000..5a0c5d08401e2 --- /dev/null +++ b/c_glib/test/test-decimal64-data-type.rb @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDecimal64DataType < Test::Unit::TestCase + def test_type + data_type = Arrow::Decimal64DataType.new(2, 0) + assert_equal(Arrow::Type::DECIMAL64, data_type.id) + end + + def test_name + data_type = Arrow::Decimal64DataType.new(2, 0) + assert_equal("decimal64", data_type.name) + end + + def test_to_s + data_type = Arrow::Decimal64DataType.new(2, 0) + assert_equal("decimal64(2, 0)", data_type.to_s) + end + + def test_precision + data_type = Arrow::Decimal64DataType.new(8, 2) + assert_equal(8, data_type.precision) + end + + def test_scale + data_type = Arrow::Decimal64DataType.new(8, 2) + assert_equal(2, data_type.scale) + end + + def test_decimal_data_type_new + assert_equal(Arrow::Decimal64DataType.new(18, 2), + Arrow::DecimalDataType.new(18, 2)) + end + + def test_invalid_precision + assert_raise(Arrow::Error::Invalid) do + Arrow::Decimal64DataType.new(19, 1) + end + end +end From fa7e0b435a32c04de38103a371b891952ef90775 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Wed, 30 Oct 2024 18:39:40 +0900 Subject: [PATCH 16/59] GH-44558: [Release][Website] Remove needless "Apache Arrow ${VERSION}" section (#44559) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change `dev/release/post-04-website.sh` generates wrong section levels: ```markdown ... ## Changelog ## Apache Arrow 18.0.0 (2024-10-28 07:00:00+00:00) ... ### Bug Fixes ... ### New Features and Improvements ... ``` ### What changes are included in this PR? Remove the `## Apache Arrow ${VERSION}` section: ```markdown ... ## Changelog ... ### Bug Fixes ... ### New Features and Improvements ... ``` ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #44558 Authored-by: Sutou Kouhei Signed-off-by: Raúl Cumplido --- dev/release/post-04-website.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dev/release/post-04-website.sh b/dev/release/post-04-website.sh index 83dc157b346e5..a3c4dbbe07e6a 100755 --- a/dev/release/post-04-website.sh +++ b/dev/release/post-04-website.sh @@ -157,8 +157,11 @@ cat <> "${announce_file}" ANNOUNCE +# Remove the "# Apache Arrow ..." line and increment section level +# of "## Bug Fixes"/"## New Features and Improvements" to "### ...". archery release changelog generate ${version} | \ - sed -e 's/^#/##/g' >> "${announce_file}" + sed -e '/^# /d' \ + -e 's/^#/##/g' >> "${announce_file}" cat <> "${announce_file}" [1]: https://www.apache.org/dyn/closer.lua/arrow/arrow-${version}/ From 916be635edd76e9d8bc653ceb2dbd8810710c341 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Wed, 30 Oct 2024 19:00:17 +0900 Subject: [PATCH 17/59] GH-44556: [Release][MSYS2] Update python-pyarrow too (#44557) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change MSYS2 has `mingw-w64-python-pyarrow`. So we need to update it too. ### What changes are included in this PR? Update both of `mingw-w64-arrow` and `mingw-w64-python-pyarrow`. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #44556 Authored-by: Sutou Kouhei Signed-off-by: Raúl Cumplido --- dev/release/post-13-msys2.sh | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/dev/release/post-13-msys2.sh b/dev/release/post-13-msys2.sh index 154887cf38e78..8f8d035048a3b 100755 --- a/dev/release/post-13-msys2.sh +++ b/dev/release/post-13-msys2.sh @@ -55,25 +55,30 @@ echo "Creating branch: ${branch}" git branch -D ${branch} || : git checkout -b ${branch} -pkgbuild=mingw-w64-arrow/PKGBUILD -echo "Updating PKGBUILD: ${pkgbuild}" -sha256sum=$(curl \ - --location \ - "https://www.apache.org/dyn/closer.lua?action=download&filename=arrow/arrow-${version}/apache-arrow-${version}.tar.gz.sha256" | \ - cut -d' ' -f1) -sed \ - -i.bak \ - -e "s/^pkgver=.*\$/pkgver=${version}/" \ - -e "s/^pkgrel=.*\$/pkgrel=1/" \ - -e "s/^sha256sums=.*\$/sha256sums=('${sha256sum}'/" \ - ${pkgbuild} -rm ${pkgbuild}.bak -git add ${pkgbuild} -git commit -m "arrow: Update to ${version}" +for package in arrow python-pyarrow; do + pkgbuild=mingw-w64-${package}/PKGBUILD + echo "Updating PKGBUILD: ${pkgbuild}" + sha256sum=$(curl \ + --location \ + "https://www.apache.org/dyn/closer.lua?action=download&filename=arrow/arrow-${version}/apache-arrow-${version}.tar.gz.sha256" | \ + cut -d' ' -f1) + sed \ + -i.bak \ + -e "s/^pkgver=.*\$/pkgver=${version}/" \ + -e "s/^pkgrel=.*\$/pkgrel=1/" \ + -e "s/^sha256sums=.*\$/sha256sums=('${sha256sum}'/" \ + ${pkgbuild} + rm ${pkgbuild}.bak + git add ${pkgbuild} + git commit -m "${package}: Update to ${version}" +done for pkgbuild in $(grep -l -r '${MINGW_PACKAGE_PREFIX}-arrow' ./); do dir=${pkgbuild%/PKGBUILD} name=${dir#./mingw-w64-} + if [ ${name} = "python-pyarrow" ]; then + continue + fi echo "Incrementing ${name}'s pkgrel: ${pkgbuild}" pkgrel=$(grep -o '^pkgrel=.*' ${pkgbuild} | cut -d= -f2) sed \ From feafddda79cb4ddd5193ac2170ab43dc825fcb18 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Wed, 30 Oct 2024 19:26:09 +0900 Subject: [PATCH 18/59] GH-44574: [Release] Ensure using the release tag to build binaries (#44577) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change We may use wrong commit with the current `dev/release/03-binary-submit.sh` because it uses the current commit on the checkout-ed branch. ### What changes are included in this PR? Specify `--arrow-sha` explicitly to ensure using the release tag. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #44574 Authored-by: Sutou Kouhei Signed-off-by: Raúl Cumplido --- dev/release/03-binary-submit.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dev/release/03-binary-submit.sh b/dev/release/03-binary-submit.sh index 0dc63aa7089d1..9281597df5ecc 100755 --- a/dev/release/03-binary-submit.sh +++ b/dev/release/03-binary-submit.sh @@ -32,7 +32,8 @@ release_tag="apache-arrow-${version}-rc${rc}" rc_branch="release-${version_with_rc}" : ${ARROW_REPOSITORY:="apache/arrow"} -: ${ARROW_BRANCH:=$release_tag} +: ${ARROW_BRANCH:=${release_tag}} +: ${ARROW_SHA:=${release_tag}} # archery will submit a job with id: "${crossbow_job_prefix}-0" unless there # are jobs submitted with the same prefix (the integer at the end is auto @@ -43,6 +44,7 @@ archery crossbow submit \ --arrow-version ${version_with_rc} \ --arrow-remote "https://github.com/${ARROW_REPOSITORY}" \ --arrow-branch ${ARROW_BRANCH} \ + --arrow-sha ${ARROW_SHA} \ --group packaging # archery will add a comment to the automatically generated PR to track From 5b68eca6cb7ec167cb55d7bdfd6be1850ece963c Mon Sep 17 00:00:00 2001 From: Hiroyuki Sato Date: Thu, 31 Oct 2024 07:05:38 +0900 Subject: [PATCH 19/59] GH-44392: [GLib] Add GArrowDecimal32DataType (#44580) ### Rationale for this change `arrow::Decimal32Type` data type has been introduced. It is also necessary to support the same data type in GLib. ### What changes are included in this PR? This PR implements `GArrowDecimal32DataType`. ### Are these changes tested? YES ### Are there any user-facing changes? Before this change `garrow_decimal_data_type_new()` returns `GArrowDecimal64DataType` if the precision is less than `garrow_decimal64_data_type_max_precision()`. After this change, it returns `GArrowDecimal32DataType` if the precision is less than garrow_decimal32_data_type_max_precision(). * GitHub Issue: #44392 Authored-by: Hiroyuki Sato Signed-off-by: Sutou Kouhei --- c_glib/arrow-glib/basic-data-type.cpp | 68 +++++++++++++++++++++++-- c_glib/arrow-glib/basic-data-type.h | 20 ++++++++ c_glib/arrow-glib/type.cpp | 2 + c_glib/arrow-glib/type.h | 6 ++- c_glib/test/test-decimal32-data-type.rb | 54 ++++++++++++++++++++ 5 files changed, 144 insertions(+), 6 deletions(-) create mode 100644 c_glib/test/test-decimal32-data-type.rb diff --git a/c_glib/arrow-glib/basic-data-type.cpp b/c_glib/arrow-glib/basic-data-type.cpp index b2bd99e0f228d..ecb537aa1f905 100644 --- a/c_glib/arrow-glib/basic-data-type.cpp +++ b/c_glib/arrow-glib/basic-data-type.cpp @@ -114,6 +114,8 @@ G_BEGIN_DECLS * * #GArrowDecimalDataType is a base class for the decimal data types. * + * #GArrowDecimal32DataType is a class for the 32-bit decimal data type. + * * #GArrowDecimal64DataType is a class for the 64-bit decimal data type. * * #GArrowDecimal128DataType is a class for the 128-bit decimal data type. @@ -1493,16 +1495,20 @@ garrow_decimal_data_type_class_init(GArrowDecimalDataTypeClass *klass) * Returns: (nullable): * The newly created decimal data type on success, %NULL on error. * - * #GArrowDecimal256DataType is used if @precision is larger than - * garrow_decimal128_data_type_max_precision(), - * #GArrowDecimal128DataType is used otherwise. + * * #GArrowDecimal32DataType is used if @precision up to 9 + * * #GArrowDecimal64DataType is used if @precision up to 19 + * * #GArrowDecimal128DataType is used if @precision up to 38 + * * #GArrowDecimal256DataType is used otherwise * * Since: 0.10.0 */ GArrowDecimalDataType * garrow_decimal_data_type_new(gint32 precision, gint32 scale, GError **error) { - if (precision <= garrow_decimal64_data_type_max_precision()) { + if (precision <= garrow_decimal32_data_type_max_precision()) { + return GARROW_DECIMAL_DATA_TYPE( + garrow_decimal32_data_type_new(precision, scale, error)); + } else if (precision <= garrow_decimal64_data_type_max_precision()) { return GARROW_DECIMAL_DATA_TYPE( garrow_decimal64_data_type_new(precision, scale, error)); } else if (precision <= garrow_decimal128_data_type_max_precision()) { @@ -1550,6 +1556,57 @@ garrow_decimal_data_type_get_scale(GArrowDecimalDataType *decimal_data_type) return arrow_decimal_type->scale(); } +G_DEFINE_TYPE(GArrowDecimal32DataType, + garrow_decimal32_data_type, + GARROW_TYPE_DECIMAL_DATA_TYPE) + +static void +garrow_decimal32_data_type_init(GArrowDecimal32DataType *object) +{ +} + +static void +garrow_decimal32_data_type_class_init(GArrowDecimal32DataTypeClass *klass) +{ +} + +/** + * garrow_decimal32_data_type_max_precision: + * + * Returns: The max precision of 32-bit decimal data type. + * + * Since: 19.0.0 + */ +gint32 +garrow_decimal32_data_type_max_precision() +{ + return arrow::Decimal32Type::kMaxPrecision; +} + +/** + * garrow_decimal32_data_type_new: + * @precision: The precision of decimal data. + * @scale: The scale of decimal data. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): + * The newly created 32-bit decimal data type on success, %NULL on error. + * + * Since: 19.0.0 + */ +GArrowDecimal32DataType * +garrow_decimal32_data_type_new(gint32 precision, gint32 scale, GError **error) +{ + auto arrow_data_type_result = arrow::Decimal32Type::Make(precision, scale); + if (garrow::check(error, arrow_data_type_result, "[decimal32-data-type][new]")) { + auto arrow_data_type = *arrow_data_type_result; + return GARROW_DECIMAL32_DATA_TYPE( + g_object_new(GARROW_TYPE_DECIMAL32_DATA_TYPE, "data-type", &arrow_data_type, NULL)); + } else { + return NULL; + } +} + G_DEFINE_TYPE(GArrowDecimal64DataType, garrow_decimal64_data_type, GARROW_TYPE_DECIMAL_DATA_TYPE) @@ -2249,6 +2306,9 @@ garrow_data_type_new_raw(std::shared_ptr *arrow_data_type) case arrow::Type::type::MAP: type = GARROW_TYPE_MAP_DATA_TYPE; break; + case arrow::Type::type::DECIMAL32: + type = GARROW_TYPE_DECIMAL32_DATA_TYPE; + break; case arrow::Type::type::DECIMAL64: type = GARROW_TYPE_DECIMAL64_DATA_TYPE; break; diff --git a/c_glib/arrow-glib/basic-data-type.h b/c_glib/arrow-glib/basic-data-type.h index 1a677a6e45ff4..edbe15e2df521 100644 --- a/c_glib/arrow-glib/basic-data-type.h +++ b/c_glib/arrow-glib/basic-data-type.h @@ -608,6 +608,26 @@ GARROW_AVAILABLE_IN_ALL gint32 garrow_decimal_data_type_get_scale(GArrowDecimalDataType *decimal_data_type); +#define GARROW_TYPE_DECIMAL32_DATA_TYPE (garrow_decimal32_data_type_get_type()) +GARROW_AVAILABLE_IN_19_0 +G_DECLARE_DERIVABLE_TYPE(GArrowDecimal32DataType, + garrow_decimal32_data_type, + GARROW, + DECIMAL32_DATA_TYPE, + GArrowDecimalDataType) +struct _GArrowDecimal32DataTypeClass +{ + GArrowDecimalDataTypeClass parent_class; +}; + +GARROW_AVAILABLE_IN_19_0 +gint32 +garrow_decimal32_data_type_max_precision(); + +GARROW_AVAILABLE_IN_19_0 +GArrowDecimal32DataType * +garrow_decimal32_data_type_new(gint32 precision, gint32 scale, GError **error); + #define GARROW_TYPE_DECIMAL64_DATA_TYPE (garrow_decimal64_data_type_get_type()) GARROW_AVAILABLE_IN_19_0 G_DECLARE_DERIVABLE_TYPE(GArrowDecimal64DataType, diff --git a/c_glib/arrow-glib/type.cpp b/c_glib/arrow-glib/type.cpp index c4c0748357673..26d21f6d82587 100644 --- a/c_glib/arrow-glib/type.cpp +++ b/c_glib/arrow-glib/type.cpp @@ -78,6 +78,8 @@ garrow_type_from_raw(arrow::Type::type type) return GARROW_TYPE_MONTH_INTERVAL; case arrow::Type::type::INTERVAL_DAY_TIME: return GARROW_TYPE_DAY_TIME_INTERVAL; + case arrow::Type::type::DECIMAL32: + return GARROW_TYPE_DECIMAL32; case arrow::Type::type::DECIMAL64: return GARROW_TYPE_DECIMAL64; case arrow::Type::type::DECIMAL128: diff --git a/c_glib/arrow-glib/type.h b/c_glib/arrow-glib/type.h index 3c7c2d5aa4c2b..a817da4b9413e 100644 --- a/c_glib/arrow-glib/type.h +++ b/c_glib/arrow-glib/type.h @@ -70,6 +70,7 @@ G_BEGIN_DECLS * @GARROW_TYPE_LARGE_LIST: A list of some logical data type with 64-bit offsets. * @GARROW_TYPE_MONTH_DAY_NANO_INTERVAL: MONTH_DAY_NANO interval in SQL style. * @GARROW_TYPE_RUN_END_ENCODED: Run-end encoded data. + * @GARROW_TYPE_DECIMAL32: Precision- and scale-based decimal * @GARROW_TYPE_DECIMAL64: Precision- and scale-based decimal * type with 64-bit. Storage type depends on the parameters. * @@ -115,8 +116,9 @@ typedef enum { GARROW_TYPE_LARGE_LIST, GARROW_TYPE_MONTH_DAY_NANO_INTERVAL, GARROW_TYPE_RUN_END_ENCODED, - /* TODO: Remove = 44 when we add STRING_VIEW..DECIMAL32. */ - GARROW_TYPE_DECIMAL64 = 44, + /* TODO: Remove = 43 when we add STRING_VIEW(39)..LARGE_LIST_VIEW(42). */ + GARROW_TYPE_DECIMAL32 = 43, + GARROW_TYPE_DECIMAL64, } GArrowType; /** diff --git a/c_glib/test/test-decimal32-data-type.rb b/c_glib/test/test-decimal32-data-type.rb new file mode 100644 index 0000000000000..f0f3778326b2b --- /dev/null +++ b/c_glib/test/test-decimal32-data-type.rb @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDecimal32DataType < Test::Unit::TestCase + def test_type + data_type = Arrow::Decimal32DataType.new(2, 0) + assert_equal(Arrow::Type::DECIMAL32, data_type.id) + end + + def test_name + data_type = Arrow::Decimal32DataType.new(2, 0) + assert_equal("decimal32", data_type.name) + end + + def test_to_s + data_type = Arrow::Decimal32DataType.new(2, 0) + assert_equal("decimal32(2, 0)", data_type.to_s) + end + + def test_precision + data_type = Arrow::Decimal32DataType.new(8, 2) + assert_equal(8, data_type.precision) + end + + def test_scale + data_type = Arrow::Decimal32DataType.new(8, 2) + assert_equal(2, data_type.scale) + end + + def test_decimal_data_type_new + assert_equal(Arrow::Decimal32DataType.new(3, 2), + Arrow::DecimalDataType.new(3, 2)) + end + + def test_invalid_precision + assert_raise(Arrow::Error::Invalid) do + Arrow::Decimal32DataType.new(10, 1) + end + end +end From 3917b605ef1851ad036fb62856efcc83e32f8580 Mon Sep 17 00:00:00 2001 From: mwish Date: Thu, 31 Oct 2024 11:50:09 +0800 Subject: [PATCH 20/59] GH-44581: [C++] Minor: ArrayData ctor can assign null_count directly (#44582) ### Rationale for this change See https://godbolt.org/z/37reoKTfK The `ArrayData` ctor can assign `null_count` directly, rather than `SetNullCount`. ### What changes are included in this PR? Change `null_count` assigned directly ### Are these changes tested? Covered by existing ### Are there any user-facing changes? No * GitHub Issue: #44581 Authored-by: mwish Signed-off-by: mwish --- cpp/src/arrow/array/data.h | 14 ++++++-------- cpp/src/arrow/extension_type.h | 2 +- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h index 1e6ee9a1d32ff..eed7860a9f703 100644 --- a/cpp/src/arrow/array/data.h +++ b/cpp/src/arrow/array/data.h @@ -150,25 +150,23 @@ struct ARROW_EXPORT ArrayData { ArrayData(ArrayData&& other) noexcept : type(std::move(other.type)), length(other.length), + null_count(other.null_count.load()), offset(other.offset), buffers(std::move(other.buffers)), child_data(std::move(other.child_data)), dictionary(std::move(other.dictionary)), - statistics(std::move(other.statistics)) { - SetNullCount(other.null_count); - } + statistics(std::move(other.statistics)) {} // Copy constructor ArrayData(const ArrayData& other) noexcept : type(other.type), length(other.length), + null_count(other.null_count.load()), offset(other.offset), buffers(other.buffers), child_data(other.child_data), dictionary(other.dictionary), - statistics(other.statistics) { - SetNullCount(other.null_count); - } + statistics(other.statistics) {} // Move assignment ArrayData& operator=(ArrayData&& other) { @@ -324,7 +322,7 @@ struct ARROW_EXPORT ArrayData { /// \brief Return true if the validity bitmap may have 0's in it, or if the /// child arrays (in the case of types without a validity bitmap) may have - /// nulls, or if the dictionary of dictionay array may have nulls. + /// nulls, or if the dictionary of dictionary array may have nulls. /// /// This is not a drop-in replacement for MayHaveNulls, as historically /// MayHaveNulls() has been used to check for the presence of a validity @@ -639,7 +637,7 @@ struct ARROW_EXPORT ArraySpan { bool HasVariadicBuffers() const; private: - ARROW_FRIEND_EXPORT friend bool internal::IsNullRunEndEncoded(const ArrayData& span, + ARROW_FRIEND_EXPORT friend bool internal::IsNullRunEndEncoded(const ArrayData& data, int64_t i); bool IsNullSparseUnion(int64_t i) const; diff --git a/cpp/src/arrow/extension_type.h b/cpp/src/arrow/extension_type.h index b3f085198be69..38200f42c62e8 100644 --- a/cpp/src/arrow/extension_type.h +++ b/cpp/src/arrow/extension_type.h @@ -98,7 +98,7 @@ class ARROW_EXPORT ExtensionType : public DataType { protected: explicit ExtensionType(std::shared_ptr storage_type) - : DataType(Type::EXTENSION), storage_type_(storage_type) {} + : DataType(Type::EXTENSION), storage_type_(std::move(storage_type)) {} std::shared_ptr storage_type_; }; From f5691d467c5638c6f6a07948de82369cc0ea282e Mon Sep 17 00:00:00 2001 From: mwish Date: Thu, 31 Oct 2024 14:15:20 +0800 Subject: [PATCH 21/59] GH-44101: [C++][Parquet] Tools: Debug Print for Json should be valid JSON (#44532) ### Rationale for this change The printJson is not a valid json now. This is ok for human-read, but when I want to analysis it with json tools or ai, it will prevent from using it. ### What changes are included in this PR? Change the output to be a valid json. Style: previously, the `\"` trailing would be added in start of object, but this patch put it to end of object Before: ``` stream << "\", \"number\":\"" << number; stream << "\"..."; ``` After: ``` stream << ", \"number\":\"" << number << "\""; ``` ### Are these changes tested? Yes ### Are there any user-facing changes? Minor format change * GitHub Issue: #44101 Authored-by: mwish Signed-off-by: mwish --- cpp/src/parquet/CMakeLists.txt | 2 +- cpp/src/parquet/printer.cc | 56 ++++++++++++++++++---------------- cpp/src/parquet/printer.h | 2 +- cpp/src/parquet/reader_test.cc | 32 +++++++++++++++++++ 4 files changed, 64 insertions(+), 28 deletions(-) diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index b984ef77adbe0..e43a254fb616a 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -320,7 +320,7 @@ if(ARROW_TESTING) # "link" our dependencies so that include paths are configured # correctly target_link_libraries(parquet_testing PUBLIC ${ARROW_GTEST_GMOCK}) - list(APPEND PARQUET_TEST_LINK_LIBS parquet_testing) + list(APPEND PARQUET_TEST_LINK_LIBS parquet_testing RapidJSON) endif() if(NOT ARROW_BUILD_SHARED) diff --git a/cpp/src/parquet/printer.cc b/cpp/src/parquet/printer.cc index 60adfc697f95c..3ce3e1da4bb09 100644 --- a/cpp/src/parquet/printer.cc +++ b/cpp/src/parquet/printer.cc @@ -220,7 +220,7 @@ void ParquetFilePrinter::DebugPrint(std::ostream& stream, std::list selecte bool hasRow; do { hasRow = false; - for (auto scanner : scanners) { + for (const auto& scanner : scanners) { if (scanner->HasNext()) { hasRow = true; scanner->PrintNext(stream, COL_WIDTH); @@ -246,7 +246,7 @@ void ParquetFilePrinter::JSONPrint(std::ostream& stream, std::list selected << file_metadata->schema()->group_node()->field_count() << "\",\n"; stream << " \"NumberOfColumns\": \"" << file_metadata->num_columns() << "\",\n"; - if (selected_columns.size() == 0) { + if (selected_columns.empty()) { for (int i = 0; i < file_metadata->num_columns(); i++) { selected_columns.push_back(i); } @@ -299,29 +299,30 @@ void ParquetFilePrinter::JSONPrint(std::ostream& stream, std::list selected << column_chunk->num_values() << "\", " << "\"StatsSet\": "; if (column_chunk->is_stats_set()) { - stream << "\"True\", \"Stats\": {"; + stream << R"("True", "Stats": {)"; if (stats->HasNullCount()) { - stream << "\"NumNulls\": \"" << stats->null_count(); + stream << R"("NumNulls": ")" << stats->null_count() << "\""; } if (stats->HasDistinctCount()) { - stream << "\", " - << "\"DistinctValues\": \"" << stats->distinct_count(); + stream << ", " + << R"("DistinctValues": ")" << stats->distinct_count() << "\""; } if (stats->HasMinMax()) { std::string min = stats->EncodeMin(), max = stats->EncodeMax(); - stream << "\", " - << "\"Max\": \"" << FormatStatValue(descr->physical_type(), max) + stream << ", " + << R"("Max": ")" << FormatStatValue(descr->physical_type(), max) << "\", " - << "\"Min\": \"" << FormatStatValue(descr->physical_type(), min); + << R"("Min": ")" << FormatStatValue(descr->physical_type(), min) << "\""; } - stream << "\" },"; + stream << " },"; } else { stream << "\"False\","; } stream << "\n \"Compression\": \"" << ::arrow::internal::AsciiToUpper( Codec::GetCodecAsString(column_chunk->compression())) - << "\", \"Encodings\": \""; + << R"(", "Encodings": )"; + stream << "\""; if (column_chunk->encoding_stats().empty()) { for (auto encoding : column_chunk->encodings()) { stream << EncodingToString(encoding) << " "; @@ -329,40 +330,43 @@ void ParquetFilePrinter::JSONPrint(std::ostream& stream, std::list selected } else { PrintPageEncodingStats(stream, column_chunk->encoding_stats()); } - stream << "\", " - << "\"UncompressedSize\": \"" << column_chunk->total_uncompressed_size() - << "\", \"CompressedSize\": \"" << column_chunk->total_compressed_size(); + stream << "\""; + stream << ", " + << R"("UncompressedSize": ")" << column_chunk->total_uncompressed_size() + << R"(", "CompressedSize": ")" << column_chunk->total_compressed_size() + << "\""; if (column_chunk->bloom_filter_offset()) { // Output BloomFilter {offset, length} - stream << "\", BloomFilter {" - << "\"offset\": \"" << column_chunk->bloom_filter_offset().value(); + stream << ", \"BloomFilter\": {" + << R"("offset": ")" << column_chunk->bloom_filter_offset().value() << "\""; if (column_chunk->bloom_filter_length()) { - stream << "\", \"length\": \"" << column_chunk->bloom_filter_length().value(); + stream << R"(, "length": ")" << column_chunk->bloom_filter_length().value() + << "\""; } - stream << "\"}"; + stream << "}"; } if (column_chunk->GetColumnIndexLocation()) { auto location = column_chunk->GetColumnIndexLocation().value(); // Output ColumnIndex {offset, length} - stream << "\", ColumnIndex {" - << "\"offset\": \"" << location.offset; - stream << "\", \"length\": \"" << location.length; + stream << ", \"ColumnIndex\": {" + << R"("offset": ")" << location.offset; + stream << R"(", "length": ")" << location.length; stream << "\"}"; } if (column_chunk->GetOffsetIndexLocation()) { auto location = column_chunk->GetOffsetIndexLocation().value(); // Output OffsetIndex {offset, length} - stream << "\", OffsetIndex {" - << "\"offset\": \"" << location.offset; - stream << "\", \"length\": \"" << location.length; - stream << "\"}"; + stream << ", \"OffsetIndex\": {" + << R"("offset": ")" << location.offset << "\""; + stream << R"(, "length": ")" << location.length << "\""; + stream << "}"; } // end of a ColumnChunk - stream << "\" }"; + stream << " }"; c1++; if (c1 != static_cast(selected_columns.size())) { stream << ",\n"; diff --git a/cpp/src/parquet/printer.h b/cpp/src/parquet/printer.h index 6bdf5b456fa6b..bb86b107f9f9b 100644 --- a/cpp/src/parquet/printer.h +++ b/cpp/src/parquet/printer.h @@ -32,7 +32,7 @@ class PARQUET_EXPORT ParquetFilePrinter { public: explicit ParquetFilePrinter(ParquetFileReader* reader) : fileReader(reader) {} - ~ParquetFilePrinter() {} + ~ParquetFilePrinter() = default; void DebugPrint(std::ostream& stream, std::list selected_columns, bool print_values = false, bool format_dump = false, diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc index fb77ba6cbc178..688c875b9ec0f 100644 --- a/cpp/src/parquet/reader_test.cc +++ b/cpp/src/parquet/reader_test.cc @@ -27,6 +27,12 @@ #include #include +#include "arrow/json/rapidjson_defs.h" // IWYU pragma: keep + +#include +#include +#include + #include "arrow/array.h" #include "arrow/buffer.h" #include "arrow/io/file.h" @@ -47,6 +53,8 @@ #include "parquet/printer.h" #include "parquet/test_util.h" +namespace rj = arrow::rapidjson; + using arrow::internal::checked_pointer_cast; using arrow::internal::Zip; @@ -1172,6 +1180,30 @@ TEST_F(TestJSONWithLocalFile, JSONOutputFLBA) { EXPECT_THAT(json_content, testing::HasSubstr(json_contains)); } +// GH-44101: Test that JSON output is valid JSON +TEST_F(TestJSONWithLocalFile, ValidJsonOutput) { + auto check_json_valid = [](std::string_view json_string) -> ::arrow::Status { + rj::Document json_doc; + constexpr auto kParseFlags = rj::kParseFullPrecisionFlag | rj::kParseNanAndInfFlag; + json_doc.Parse(json_string.data(), json_string.length()); + if (json_doc.HasParseError()) { + return ::arrow::Status::Invalid("JSON parse error at offset ", + json_doc.GetErrorOffset(), ": ", + rj::GetParseError_En(json_doc.GetParseError())); + } + return ::arrow::Status::OK(); + }; + std::vector check_file_lists = { + "data_index_bloom_encoding_with_length.parquet", + "data_index_bloom_encoding_stats.parquet", "alltypes_tiny_pages_plain.parquet", + "concatenated_gzip_members.parquet", "nulls.snappy.parquet"}; + for (const auto& file : check_file_lists) { + std::string json_content = ReadFromLocalFile(file); + ASSERT_OK(check_json_valid(json_content)) + << "Invalid JSON output for file: " << file << ", content:" << json_content; + } +} + TEST(TestFileReader, BufferedReadsWithDictionary) { const int num_rows = 1000; From 567f9c596ed8e5b9c4435172442997874a4764e0 Mon Sep 17 00:00:00 2001 From: Hiroyuki Sato Date: Thu, 31 Oct 2024 15:53:39 +0900 Subject: [PATCH 22/59] GH-44588: [GLib] Add GArrowDecimal64 Class (#44591) ### Rationale for this change `Decimal64` class has been introeduced. It is necessary to support in GLib. ### What changes are included in this PR? Implement `GArrowDecimal64` class. ### Are these changes tested? YES ### Are there any user-facing changes? NO * GitHub Issue: #44588 Lead-authored-by: Hiroyuki Sato Co-authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- c_glib/arrow-glib/decimal.cpp | 435 +++++++++++++++++++++++++++++++++- c_glib/arrow-glib/decimal.h | 79 ++++++ c_glib/arrow-glib/decimal.hpp | 5 + c_glib/test/test-decimal64.rb | 222 +++++++++++++++++ 4 files changed, 740 insertions(+), 1 deletion(-) create mode 100644 c_glib/test/test-decimal64.rb diff --git a/c_glib/arrow-glib/decimal.cpp b/c_glib/arrow-glib/decimal.cpp index 5947868edd3bc..edc2af7a7e051 100644 --- a/c_glib/arrow-glib/decimal.cpp +++ b/c_glib/arrow-glib/decimal.cpp @@ -24,6 +24,24 @@ template struct DecimalConverter { }; +template <> struct DecimalConverter +{ + using ArrowType = arrow::Decimal64; + using GArrowType = GArrowDecimal64; + + GArrowType * + new_raw(std::shared_ptr *arrow_decimal64) + { + return garrow_decimal64_new_raw(arrow_decimal64); + } + + std::shared_ptr + get_raw(GArrowType *decimal64) + { + return garrow_decimal64_get_raw(decimal64); + } +}; + template <> struct DecimalConverter { using ArrowType = arrow::Decimal128; @@ -301,9 +319,11 @@ G_BEGIN_DECLS /** * SECTION: decimal * @section_id: decimal - * @title: 128-bit and 256-bit decimal classes + * @title: 64-bit, 128-bit and 256-bit decimal classes * @include: arrow-glib/arrow-glib.h * + * #GArrowDecimal64 is a 64-bit decimal class. + * * #GArrowDecimal128 is a 128-bit decimal class. * * #GArrowDecimal256 is a 256-bit decimal class. @@ -311,6 +331,404 @@ G_BEGIN_DECLS * Since: 0.10.0 */ +typedef struct GArrowDecimal64Private_ +{ + std::shared_ptr decimal64; +} GArrowDecimal64Private; + +enum { + PROP_DECIMAL64 = 1 +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowDecimal64, garrow_decimal64, G_TYPE_OBJECT) + +#define GARROW_DECIMAL64_GET_PRIVATE(obj) \ + static_cast( \ + garrow_decimal64_get_instance_private(GARROW_DECIMAL64(obj))) + +static void +garrow_decimal64_finalize(GObject *object) +{ + auto priv = GARROW_DECIMAL64_GET_PRIVATE(object); + + priv->decimal64.~shared_ptr(); + + G_OBJECT_CLASS(garrow_decimal64_parent_class)->finalize(object); +} + +static void +garrow_decimal64_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_DECIMAL64_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_DECIMAL64: + priv->decimal64 = + *static_cast *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_decimal64_init(GArrowDecimal64 *object) +{ + auto priv = GARROW_DECIMAL64_GET_PRIVATE(object); + new (&priv->decimal64) std::shared_ptr; +} + +static void +garrow_decimal64_class_init(GArrowDecimal64Class *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_decimal64_finalize; + gobject_class->set_property = garrow_decimal64_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer( + "decimal64", + "Decimal64", + "The raw std::shared *", + static_cast(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_DECIMAL64, spec); +} + +/** + * garrow_decimal64_new_string: + * @data: The data of the decimal. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): + * A newly created #GArrowDecimal64 on success, %NULL on error. + * + * Since: 19.0.0 + */ +GArrowDecimal64 * +garrow_decimal64_new_string(const gchar *data, GError **error) +{ + return garrow_decimal_new_string(data, + error, + "[decimal64][new][string]"); +} + +/** + * garrow_decimal64_new_integer: + * @data: The data of the decimal. + * + * Returns: A newly created #GArrowDecimal64. + * + * Since: 19.0.0 + */ +GArrowDecimal64 * +garrow_decimal64_new_integer(const gint64 data) +{ + return garrow_decimal_new_integer(data); +} + +/** + * garrow_decimal64_copy: + * @decimal: The decimal to be copied. + * + * Returns: (transfer full): A copied #GArrowDecimal64. + * + * Since: 19.0.0 + */ +GArrowDecimal64 * +garrow_decimal64_copy(GArrowDecimal64 *decimal) +{ + return garrow_decimal_copy(decimal); +} + +/** + * garrow_decimal64_equal: + * @decimal: A #GArrowDecimal64. + * @other_decimal: A #GArrowDecimal64 to be compared. + * + * Returns: %TRUE if the decimal is equal to the other decimal, %FALSE + * otherwise. + * + * Since: 19.0.0 + */ +gboolean +garrow_decimal64_equal(GArrowDecimal64 *decimal, GArrowDecimal64 *other_decimal) +{ + return garrow_decimal_equal(decimal, other_decimal); +} + +/** + * garrow_decimal64_not_equal: + * @decimal: A #GArrowDecimal64. + * @other_decimal: A #GArrowDecimal64 to be compared. + * + * Returns: %TRUE if the decimal isn't equal to the other decimal, + * %FALSE otherwise. + * + * Since: 19.0.0 + */ +gboolean +garrow_decimal64_not_equal(GArrowDecimal64 *decimal, GArrowDecimal64 *other_decimal) +{ + return garrow_decimal_not_equal(decimal, other_decimal); +} + +/** + * garrow_decimal64_less_than: + * @decimal: A #GArrowDecimal64. + * @other_decimal: A #GArrowDecimal64 to be compared. + * + * Returns: %TRUE if the decimal is less than the other decimal, + * %FALSE otherwise. + * + * Since: 19.0.0 + */ +gboolean +garrow_decimal64_less_than(GArrowDecimal64 *decimal, GArrowDecimal64 *other_decimal) +{ + return garrow_decimal_less_than(decimal, other_decimal); +} + +/** + * garrow_decimal64_less_than_or_equal: + * @decimal: A #GArrowDecimal64. + * @other_decimal: A #GArrowDecimal64 to be compared. + * + * Returns: %TRUE if the decimal is less than the other decimal + * or equal to the other decimal, %FALSE otherwise. + * + * Since: 19.0.0 + */ +gboolean +garrow_decimal64_less_than_or_equal(GArrowDecimal64 *decimal, + GArrowDecimal64 *other_decimal) +{ + return garrow_decimal_less_than_or_equal(decimal, other_decimal); +} + +/** + * garrow_decimal64_greater_than: + * @decimal: A #GArrowDecimal64. + * @other_decimal: A #GArrowDecimal64 to be compared. + * + * Returns: %TRUE if the decimal is greater than the other decimal, + * %FALSE otherwise. + * + * Since: 19.0.0 + */ +gboolean +garrow_decimal64_greater_than(GArrowDecimal64 *decimal, GArrowDecimal64 *other_decimal) +{ + return garrow_decimal_greater_than(decimal, other_decimal); +} + +/** + * garrow_decimal64_greater_than_or_equal: + * @decimal: A #GArrowDecimal64. + * @other_decimal: A #GArrowDecimal64 to be compared. + * + * Returns: %TRUE if the decimal is greater than the other decimal + * or equal to the other decimal, %FALSE otherwise. + * + * Since: 19.0.0 + */ +gboolean +garrow_decimal64_greater_than_or_equal(GArrowDecimal64 *decimal, + GArrowDecimal64 *other_decimal) +{ + return garrow_decimal_greater_than_or_equal(decimal, other_decimal); +} + +/** + * garrow_decimal64_to_string_scale: + * @decimal: A #GArrowDecimal64. + * @scale: The scale of the decimal. + * + * Returns: The string representation of the decimal. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 19.0.0 + */ +gchar * +garrow_decimal64_to_string_scale(GArrowDecimal64 *decimal, gint32 scale) +{ + return garrow_decimal_to_string_scale(decimal, scale); +} + +/** + * garrow_decimal64_to_string: + * @decimal: A #GArrowDecimal64. + * + * Returns: The string representation of the decimal. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 19.0.0 + */ +gchar * +garrow_decimal64_to_string(GArrowDecimal64 *decimal) +{ + return garrow_decimal_to_string(decimal); +} + +/** + * garrow_decimal64_to_bytes: + * @decimal: A #GArrowDecimal64. + * + * Returns: (transfer full): The binary representation of the decimal. + * + * Since: 19.0.0 + */ +GBytes * +garrow_decimal64_to_bytes(GArrowDecimal64 *decimal) +{ + return garrow_decimal_to_bytes(decimal); +} + +/** + * garrow_decimal64_abs: + * @decimal: A #GArrowDecimal64. + * + * Computes the absolute value of the @decimal destructively. + * + * Since: 19.0.0 + */ +void +garrow_decimal64_abs(GArrowDecimal64 *decimal) +{ + garrow_decimal_abs(decimal); +} + +/** + * garrow_decimal64_negate: + * @decimal: A #GArrowDecimal64. + * + * Negate the current value of the @decimal destructively. + * + * Since: 19.0.0 + */ +void +garrow_decimal64_negate(GArrowDecimal64 *decimal) +{ + garrow_decimal_negate(decimal); +} + +/** + * garrow_decimal64_to_integer: + * @decimal: A #GArrowDecimal64. + * + * Returns: The 64-bit integer representation of the decimal. + * + * Since: 19.0.0 + */ +gint64 +garrow_decimal64_to_integer(GArrowDecimal64 *decimal) +{ + auto arrow_decimal = garrow_decimal64_get_raw(decimal); + return static_cast(*arrow_decimal); +} + +/** + * garrow_decimal64_plus: + * @left: A #GArrowDecimal64. + * @right: A #GArrowDecimal64. + * + * Returns: (transfer full): The added value of these decimals. + * + * Since: 19.0.0 + */ +GArrowDecimal64 * +garrow_decimal64_plus(GArrowDecimal64 *left, GArrowDecimal64 *right) +{ + return garrow_decimal_plus(left, right); +} + +/** + * garrow_decimal64_minus: + * @left: A #GArrowDecimal64. + * @right: A #GArrowDecimal64. + * + * Returns: (transfer full): The subtracted value of these decimals. + * + * Since: 19.0.0 + */ +GArrowDecimal64 * +garrow_decimal64_minus(GArrowDecimal64 *left, GArrowDecimal64 *right) +{ + return garrow_decimal_minus(left, right); +} + +/** + * garrow_decimal64_multiply: + * @left: A #GArrowDecimal64. + * @right: A #GArrowDecimal64. + * + * Returns: (transfer full): The multiplied value of these decimals. + * + * Since: 19.0.0 + */ +GArrowDecimal64 * +garrow_decimal64_multiply(GArrowDecimal64 *left, GArrowDecimal64 *right) +{ + return garrow_decimal_multiply(left, right); +} + +/** + * garrow_decimal64_divide: + * @left: A #GArrowDecimal64. + * @right: A #GArrowDecimal64. + * @remainder: (out) (nullable): A return location for the remainder + * value of these decimals. The returned #GArrowDecimal64 be + * unreferred with g_object_unref() when no longer needed. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The divided value of + * these decimals or %NULL on error. + * + * Since: 19.0.0 + */ +GArrowDecimal64 * +garrow_decimal64_divide(GArrowDecimal64 *left, + GArrowDecimal64 *right, + GArrowDecimal64 **remainder, + GError **error) +{ + return garrow_decimal_divide(left, + right, + remainder, + error, + "[decimal64][divide]"); +} + +/** + * garrow_decimal64_rescale: + * @decimal: A #GArrowDecimal64. + * @original_scale: A scale to be converted from. + * @new_scale: A scale to be converted to. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The rescaled decimal or %NULL on error. + * + * Since: 19.0.0 + */ +GArrowDecimal64 * +garrow_decimal64_rescale(GArrowDecimal64 *decimal, + gint32 original_scale, + gint32 new_scale, + GError **error) +{ + return garrow_decimal_rescale(decimal, + original_scale, + new_scale, + error, + "[decimal64][rescale]"); +} + typedef struct GArrowDecimal128Private_ { std::shared_ptr decimal128; @@ -1081,6 +1499,21 @@ garrow_decimal256_rescale(GArrowDecimal256 *decimal, G_END_DECLS +GArrowDecimal64 * +garrow_decimal64_new_raw(std::shared_ptr *arrow_decimal64) +{ + auto decimal64 = + g_object_new(garrow_decimal64_get_type(), "decimal64", arrow_decimal64, NULL); + return GARROW_DECIMAL64(decimal64); +} + +std::shared_ptr +garrow_decimal64_get_raw(GArrowDecimal64 *decimal64) +{ + auto priv = GARROW_DECIMAL64_GET_PRIVATE(decimal64); + return priv->decimal64; +} + GArrowDecimal128 * garrow_decimal128_new_raw(std::shared_ptr *arrow_decimal128) { diff --git a/c_glib/arrow-glib/decimal.h b/c_glib/arrow-glib/decimal.h index f64afa800a19b..bb266424b4cc0 100644 --- a/c_glib/arrow-glib/decimal.h +++ b/c_glib/arrow-glib/decimal.h @@ -25,6 +25,85 @@ G_BEGIN_DECLS +/* Disabled because it conflicts with GARROW_TYPE_DECIMAL64 in GArrowType. */ +/* #define GARROW_TYPE_DECIMAL64 (garrow_decimal64_get_type()) */ +GARROW_AVAILABLE_IN_ALL +G_DECLARE_DERIVABLE_TYPE(GArrowDecimal64, garrow_decimal64, GARROW, DECIMAL64, GObject) + +struct _GArrowDecimal64Class +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_19_0 +GArrowDecimal64 * +garrow_decimal64_new_string(const gchar *data, GError **error); +GARROW_AVAILABLE_IN_19_0 +GArrowDecimal64 * +garrow_decimal64_new_integer(const gint64 data); +GARROW_AVAILABLE_IN_19_0 +GArrowDecimal64 * +garrow_decimal64_copy(GArrowDecimal64 *decimal); +GARROW_AVAILABLE_IN_19_0 +gboolean +garrow_decimal64_equal(GArrowDecimal64 *decimal, GArrowDecimal64 *other_decimal); +GARROW_AVAILABLE_IN_19_0 +gboolean +garrow_decimal64_not_equal(GArrowDecimal64 *decimal, GArrowDecimal64 *other_decimal); +GARROW_AVAILABLE_IN_19_0 +gboolean +garrow_decimal64_less_than(GArrowDecimal64 *decimal, GArrowDecimal64 *other_decimal); +GARROW_AVAILABLE_IN_19_0 +gboolean +garrow_decimal64_less_than_or_equal(GArrowDecimal64 *decimal, + GArrowDecimal64 *other_decimal); +GARROW_AVAILABLE_IN_19_0 +gboolean +garrow_decimal64_greater_than(GArrowDecimal64 *decimal, GArrowDecimal64 *other_decimal); +GARROW_AVAILABLE_IN_19_0 +gboolean +garrow_decimal64_greater_than_or_equal(GArrowDecimal64 *decimal, + GArrowDecimal64 *other_decimal); +GARROW_AVAILABLE_IN_19_0 +gchar * +garrow_decimal64_to_string_scale(GArrowDecimal64 *decimal, gint32 scale); +GARROW_AVAILABLE_IN_19_0 +gchar * +garrow_decimal64_to_string(GArrowDecimal64 *decimal); +GARROW_AVAILABLE_IN_19_0 +GBytes * +garrow_decimal64_to_bytes(GArrowDecimal64 *decimal); +GARROW_AVAILABLE_IN_19_0 +void +garrow_decimal64_abs(GArrowDecimal64 *decimal); +GARROW_AVAILABLE_IN_19_0 +void +garrow_decimal64_negate(GArrowDecimal64 *decimal); +GARROW_AVAILABLE_IN_19_0 +gint64 +garrow_decimal64_to_integer(GArrowDecimal64 *decimal); +GARROW_AVAILABLE_IN_19_0 +GArrowDecimal64 * +garrow_decimal64_plus(GArrowDecimal64 *left, GArrowDecimal64 *right); +GARROW_AVAILABLE_IN_19_0 +GArrowDecimal64 * +garrow_decimal64_minus(GArrowDecimal64 *left, GArrowDecimal64 *right); +GARROW_AVAILABLE_IN_19_0 +GArrowDecimal64 * +garrow_decimal64_multiply(GArrowDecimal64 *left, GArrowDecimal64 *right); +GARROW_AVAILABLE_IN_19_0 +GArrowDecimal64 * +garrow_decimal64_divide(GArrowDecimal64 *left, + GArrowDecimal64 *right, + GArrowDecimal64 **remainder, + GError **error); +GARROW_AVAILABLE_IN_19_0 +GArrowDecimal64 * +garrow_decimal64_rescale(GArrowDecimal64 *decimal, + gint32 original_scale, + gint32 new_scale, + GError **error); + /* Disabled because it conflicts with GARROW_TYPE_DECIMAL128 in GArrowType. */ /* #define GARROW_TYPE_DECIMAL128 (garrow_decimal128_get_type()) */ GARROW_AVAILABLE_IN_ALL diff --git a/c_glib/arrow-glib/decimal.hpp b/c_glib/arrow-glib/decimal.hpp index 054a91b836054..dbfb7f30c60e0 100644 --- a/c_glib/arrow-glib/decimal.hpp +++ b/c_glib/arrow-glib/decimal.hpp @@ -25,6 +25,11 @@ #include +GArrowDecimal64 * +garrow_decimal64_new_raw(std::shared_ptr *arrow_decimal64); +std::shared_ptr +garrow_decimal64_get_raw(GArrowDecimal64 *decimal); + GArrowDecimal128 * garrow_decimal128_new_raw(std::shared_ptr *arrow_decimal128); std::shared_ptr diff --git a/c_glib/test/test-decimal64.rb b/c_glib/test/test-decimal64.rb new file mode 100644 index 0000000000000..add4f3e0b4909 --- /dev/null +++ b/c_glib/test/test-decimal64.rb @@ -0,0 +1,222 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDecimal64 < Test::Unit::TestCase + def test_new_string_invalid + message = + "[decimal64][new][string]: Invalid: " + + "The string '1,1' is not a valid decimal64 number" + error = assert_raise(Arrow::Error::Invalid) do + Arrow::Decimal64.new("1,1") + end + assert_equal(message, + error.message.lines.first.chomp) + end + + def test_copy + decimal = Arrow::Decimal64.new("234.23445") + assert_equal(decimal, decimal.copy) + end + + def test_to_string_scale + integer_data = 23423445 + string_data = "234.23445" + decimal = Arrow::Decimal64.new(integer_data) + assert_equal(string_data, decimal.to_string_scale(5)) + end + + def test_to_string + string_data = "999999999999999999" + decimal = Arrow::Decimal64.new(string_data) + assert_equal(string_data, decimal.to_s) + end + + def test_to_bytes + decimal = Arrow::Decimal64.new("12.3") + assert_equal([123].pack("q*"), + decimal.to_bytes.to_s) + end + + def test_abs + absolute_value = "230492239423435324" + negative_value = "-230492239423435324" + decimal = Arrow::Decimal64.new(negative_value) + decimal.abs + assert_equal(absolute_value, decimal.to_s) + end + + def test_negate + positive_value = "230492239423435324" + negative_value = "-230492239423435324" + decimal = Arrow::Decimal64.new(positive_value) + decimal.negate + assert_equal(negative_value, decimal.to_s) + decimal.negate + assert_equal(positive_value, decimal.to_s) + end + + def test_plus + integer_data1 = 23423445 + integer_data2 = 5443 + decimal1 = Arrow::Decimal64.new(integer_data1) + decimal2 = Arrow::Decimal64.new(integer_data2) + decimal3 = decimal1.plus(decimal2) + assert_equal((integer_data1 + integer_data2).to_s, + decimal3.to_s) + end + + def test_multiply + integer_data1 = 23423445 + integer_data2 = 5443 + decimal1 = Arrow::Decimal64.new(integer_data1) + decimal2 = Arrow::Decimal64.new(integer_data2) + decimal3 = decimal1.multiply(decimal2) + assert_equal((integer_data1 * integer_data2).to_s, + decimal3.to_s) + end + + def test_divide + integer_data1 = 23423445 + integer_data2 = -5443 + decimal1 = Arrow::Decimal64.new(integer_data1) + decimal2 = Arrow::Decimal64.new(integer_data2) + result, remainder = decimal1.divide(decimal2) + assert_equal([ + integer_data1.quo(integer_data2).truncate.to_s, + integer_data1.remainder(integer_data2).to_s, + ], + [result.to_s, remainder.to_s]) + end + + def test_divide_zero + decimal1 = Arrow::Decimal64.new(23423445) + decimal2 = Arrow::Decimal64.new(0) + message = + "[decimal64][divide]: Invalid: Division by 0 in Decimal64" + assert_raise(Arrow::Error::Invalid.new(message)) do + decimal1.divide(decimal2) + end + end + + def test_equal + decimal = Arrow::Decimal64.new(10) + other_decimal1 = Arrow::Decimal64.new(10) + other_decimal2 = Arrow::Decimal64.new(11) + assert_equal([ + true, + false, + ], + [ + decimal == other_decimal1, + decimal == other_decimal2, + ]) + end + + def test_not_equal + decimal = Arrow::Decimal64.new(10) + other_decimal1 = Arrow::Decimal64.new(10) + other_decimal2 = Arrow::Decimal64.new(11) + assert_equal([ + false, + true, + ], + [ + decimal != other_decimal1, + decimal != other_decimal2, + ]) + end + + def test_less_than + decimal = Arrow::Decimal64.new(10) + other_decimal1 = Arrow::Decimal64.new(11) + other_decimal2 = Arrow::Decimal64.new(9) + assert_equal([ + true, + false, + false + ], + [ + decimal < other_decimal1, + decimal < other_decimal2, + decimal < decimal, + ]) + end + + def test_less_than_or_equal + decimal = Arrow::Decimal64.new(10) + other_decimal1 = Arrow::Decimal64.new(11) + other_decimal2 = Arrow::Decimal64.new(9) + assert_equal([ + true, + false, + true + ], + [ + decimal <= other_decimal1, + decimal <= other_decimal2, + decimal <= decimal + ]) + end + + def test_greater_than + decimal = Arrow::Decimal64.new(10) + other_decimal1 = Arrow::Decimal64.new(11) + other_decimal2 = Arrow::Decimal64.new(9) + assert_equal([ + false, + true, + false + ], + [ + decimal > other_decimal1, + decimal > other_decimal2, + decimal > decimal + ]) + end + + def test_greater_than_or_equal + decimal = Arrow::Decimal64.new(10) + other_decimal1 = Arrow::Decimal64.new(11) + other_decimal2 = Arrow::Decimal64.new(9) + assert_equal([ + false, + true, + true + ], + [ + decimal >= other_decimal1, + decimal >= other_decimal2, + decimal >= decimal + ]) + end + + def test_rescale + decimal = Arrow::Decimal64.new(10) + assert_equal(Arrow::Decimal64.new(1000), + decimal.rescale(1, 3)) + end + + def test_rescale_fail + decimal = Arrow::Decimal64.new(10) + message = + "[decimal64][rescale]: Invalid: " + + "Rescaling Decimal64 value would cause data loss" + assert_raise(Arrow::Error::Invalid.new(message)) do + decimal.rescale(1, -1) + end + end +end From c23ee7342c0c822f1458f892c60edfdd7ce857cc Mon Sep 17 00:00:00 2001 From: Hiroyuki Sato Date: Fri, 1 Nov 2024 06:12:20 +0900 Subject: [PATCH 23/59] GH-44589: [GLib] Add GArrowDecimal32 class (#44597) ### Rationale for this change The `Decimal32` class has been introduced. It is necessary to support it in GLib. ### What changes are included in this PR? Implement `GArrowDecimal32` class ### Are these changes tested? YES ### Are there any user-facing changes? NO * GitHub Issue: #44589 Authored-by: Hiroyuki Sato Signed-off-by: Sutou Kouhei --- c_glib/arrow-glib/decimal.cpp | 434 +++++++++++++++++++++++++++++++++- c_glib/arrow-glib/decimal.h | 79 +++++++ c_glib/arrow-glib/decimal.hpp | 5 + c_glib/test/test-decimal32.rb | 222 +++++++++++++++++ 4 files changed, 739 insertions(+), 1 deletion(-) create mode 100644 c_glib/test/test-decimal32.rb diff --git a/c_glib/arrow-glib/decimal.cpp b/c_glib/arrow-glib/decimal.cpp index edc2af7a7e051..30b596f7a33c0 100644 --- a/c_glib/arrow-glib/decimal.cpp +++ b/c_glib/arrow-glib/decimal.cpp @@ -24,6 +24,24 @@ template struct DecimalConverter { }; +template <> struct DecimalConverter +{ + using ArrowType = arrow::Decimal32; + using GArrowType = GArrowDecimal32; + + GArrowType * + new_raw(std::shared_ptr *arrow_decimal32) + { + return garrow_decimal32_new_raw(arrow_decimal32); + } + + std::shared_ptr + get_raw(GArrowType *decimal32) + { + return garrow_decimal32_get_raw(decimal32); + } +}; + template <> struct DecimalConverter { using ArrowType = arrow::Decimal64; @@ -319,9 +337,11 @@ G_BEGIN_DECLS /** * SECTION: decimal * @section_id: decimal - * @title: 64-bit, 128-bit and 256-bit decimal classes + * @title: 32-bit, 64-bit, 128-bit and 256-bit decimal classes * @include: arrow-glib/arrow-glib.h * + * #GArrowDecimal32 is a 32-bit decimal class. + * * #GArrowDecimal64 is a 64-bit decimal class. * * #GArrowDecimal128 is a 128-bit decimal class. @@ -331,6 +351,403 @@ G_BEGIN_DECLS * Since: 0.10.0 */ +typedef struct GArrowDecimal32Private_ +{ + std::shared_ptr decimal32; +} GArrowDecimal32Private; + +enum { + PROP_DECIMAL32 = 1 +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowDecimal32, garrow_decimal32, G_TYPE_OBJECT) + +#define GARROW_DECIMAL32_GET_PRIVATE(obj) \ + static_cast( \ + garrow_decimal32_get_instance_private(GARROW_DECIMAL32(obj))) + +static void +garrow_decimal32_finalize(GObject *object) +{ + auto priv = GARROW_DECIMAL32_GET_PRIVATE(object); + + priv->decimal32.~shared_ptr(); + + G_OBJECT_CLASS(garrow_decimal32_parent_class)->finalize(object); +} + +static void +garrow_decimal32_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_DECIMAL32_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_DECIMAL32: + priv->decimal32 = + *static_cast *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_decimal32_init(GArrowDecimal32 *object) +{ + auto priv = GARROW_DECIMAL32_GET_PRIVATE(object); + new (&priv->decimal32) std::shared_ptr; +} + +static void +garrow_decimal32_class_init(GArrowDecimal32Class *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_decimal32_finalize; + gobject_class->set_property = garrow_decimal32_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer( + "decimal32", + "Decimal32", + "The raw std::shared *", + static_cast(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_DECIMAL32, spec); +} + +/** + * garrow_decimal32_new_string: + * @data: The data of the decimal. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): + * A newly created #GArrowDecimal32 on success, %NULL on error. + * + * Since: 19.0.0 + */ +GArrowDecimal32 * +garrow_decimal32_new_string(const gchar *data, GError **error) +{ + return garrow_decimal_new_string(data, + error, + "[decimal32][new][string]"); +} + +/** + * garrow_decimal32_new_integer: + * @data: The data of the decimal. + * + * Returns: A newly created #GArrowDecimal32. + * + * Since: 19.0.0 + */ +GArrowDecimal32 * +garrow_decimal32_new_integer(const gint64 data) +{ + return garrow_decimal_new_integer(data); +} + +/** + * garrow_decimal32_copy: + * @decimal: The decimal to be copied. + * + * Returns: (transfer full): A copied #GArrowDecimal32. + * + * Since: 19.0.0 + */ +GArrowDecimal32 * +garrow_decimal32_copy(GArrowDecimal32 *decimal) +{ + return garrow_decimal_copy(decimal); +} + +/** + * garrow_decimal32_equal: + * @decimal: A #GArrowDecimal32. + * @other_decimal: A #GArrowDecimal32 to be compared. + * + * Returns: %TRUE if the decimal is equal to the other decimal, %FALSE + * otherwise. + * + * Since: 19.0.0 + */ +gboolean +garrow_decimal32_equal(GArrowDecimal32 *decimal, GArrowDecimal32 *other_decimal) +{ + return garrow_decimal_equal(decimal, other_decimal); +} + +/** + * garrow_decimal32_not_equal: + * @decimal: A #GArrowDecimal32. + * @other_decimal: A #GArrowDecimal32 to be compared. + * + * Returns: %TRUE if the decimal isn't equal to the other decimal, + * %FALSE otherwise. + * + * Since: 19.0.0 + */ +gboolean +garrow_decimal32_not_equal(GArrowDecimal32 *decimal, GArrowDecimal32 *other_decimal) +{ + return garrow_decimal_not_equal(decimal, other_decimal); +} + +/** + * garrow_decimal32_less_than: + * @decimal: A #GArrowDecimal32. + * @other_decimal: A #GArrowDecimal32 to be compared. + * + * Returns: %TRUE if the decimal is less than the other decimal, + * %FALSE otherwise. + * + * Since: 19.0.0 + */ +gboolean +garrow_decimal32_less_than(GArrowDecimal32 *decimal, GArrowDecimal32 *other_decimal) +{ + return garrow_decimal_less_than(decimal, other_decimal); +} + +/** + * garrow_decimal32_less_than_or_equal: + * @decimal: A #GArrowDecimal32. + * @other_decimal: A #GArrowDecimal32 to be compared. + * + * Returns: %TRUE if the decimal is less than the other decimal + * or equal to the other decimal, %FALSE otherwise. + * + * Since: 19.0.0 + */ +gboolean +garrow_decimal32_less_than_or_equal(GArrowDecimal32 *decimal, + GArrowDecimal32 *other_decimal) +{ + return garrow_decimal_less_than_or_equal(decimal, other_decimal); +} + +/** + * garrow_decimal32_greater_than: + * @decimal: A #GArrowDecimal32. + * @other_decimal: A #GArrowDecimal32 to be compared. + * + * Returns: %TRUE if the decimal is greater than the other decimal, + * %FALSE otherwise. + * + * Since: 19.0.0 + */ +gboolean +garrow_decimal32_greater_than(GArrowDecimal32 *decimal, GArrowDecimal32 *other_decimal) +{ + return garrow_decimal_greater_than(decimal, other_decimal); +} + +/** + * garrow_decimal32_greater_than_or_equal: + * @decimal: A #GArrowDecimal32. + * @other_decimal: A #GArrowDecimal32 to be compared. + * + * Returns: %TRUE if the decimal is greater than the other decimal + * or equal to the other decimal, %FALSE otherwise. + * + * Since: 19.0.0 + */ +gboolean +garrow_decimal32_greater_than_or_equal(GArrowDecimal32 *decimal, + GArrowDecimal32 *other_decimal) +{ + return garrow_decimal_greater_than_or_equal(decimal, other_decimal); +} + +/** + * garrow_decimal32_to_string_scale: + * @decimal: A #GArrowDecimal32. + * @scale: The scale of the decimal. + * + * Returns: The string representation of the decimal. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 19.0.0 + */ +gchar * +garrow_decimal32_to_string_scale(GArrowDecimal32 *decimal, gint32 scale) +{ + return garrow_decimal_to_string_scale(decimal, scale); +} + +/** + * garrow_decimal32_to_string: + * @decimal: A #GArrowDecimal32. + * + * Returns: The string representation of the decimal. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 19.0.0 + */ +gchar * +garrow_decimal32_to_string(GArrowDecimal32 *decimal) +{ + return garrow_decimal_to_string(decimal); +} + +/** + * garrow_decimal32_to_bytes: + * @decimal: A #GArrowDecimal32. + * + * Returns: (transfer full): The binary representation of the decimal. + * + * Since: 19.0.0 + */ +GBytes * +garrow_decimal32_to_bytes(GArrowDecimal32 *decimal) +{ + return garrow_decimal_to_bytes(decimal); +} + +/** + * garrow_decimal32_abs: + * @decimal: A #GArrowDecimal32. + * + * Computes the absolute value of the @decimal destructively. + * + * Since: 19.0.0 + */ +void +garrow_decimal32_abs(GArrowDecimal32 *decimal) +{ + garrow_decimal_abs(decimal); +} + +/** + * garrow_decimal32_negate: + * @decimal: A #GArrowDecimal32. + * + * Negate the current value of the @decimal destructively. + * + * Since: 19.0.0 + */ +void +garrow_decimal32_negate(GArrowDecimal32 *decimal) +{ + garrow_decimal_negate(decimal); +} + +/** + * garrow_decimal32_to_integer: + * @decimal: A #GArrowDecimal32. + * + * Returns: The 64-bit integer representation of the decimal. + * + * Since: 19.0.0 + */ +gint64 +garrow_decimal32_to_integer(GArrowDecimal32 *decimal) +{ + auto arrow_decimal = garrow_decimal32_get_raw(decimal); + return static_cast(*arrow_decimal); +} + +/** + * garrow_decimal32_plus: + * @left: A #GArrowDecimal32. + * @right: A #GArrowDecimal32. + * + * Returns: (transfer full): The added value of these decimals. + * + * Since: 19.0.0 + */ +GArrowDecimal32 * +garrow_decimal32_plus(GArrowDecimal32 *left, GArrowDecimal32 *right) +{ + return garrow_decimal_plus(left, right); +} + +/** + * garrow_decimal32_minus: + * @left: A #GArrowDecimal32. + * @right: A #GArrowDecimal32. + * + * Returns: (transfer full): The subtracted value of these decimals. + * + * Since: 19.0.0 + */ +GArrowDecimal32 * +garrow_decimal32_minus(GArrowDecimal32 *left, GArrowDecimal32 *right) +{ + return garrow_decimal_minus(left, right); +} + +/** + * garrow_decimal32_multiply: + * @left: A #GArrowDecimal32. + * @right: A #GArrowDecimal32. + * + * Returns: (transfer full): The multiplied value of these decimals. + * + * Since: 19.0.0 + */ +GArrowDecimal32 * +garrow_decimal32_multiply(GArrowDecimal32 *left, GArrowDecimal32 *right) +{ + return garrow_decimal_multiply(left, right); +} + +/** + * garrow_decimal32_divide: + * @left: A #GArrowDecimal32. + * @right: A #GArrowDecimal32. + * @remainder: (out) (nullable): A return location for the remainder + * value of these decimals. The returned #GArrowDecimal32 be + * unreferred with g_object_unref() when no longer needed. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The divided value of + * these decimals or %NULL on error. + * + * Since: 19.0.0 + */ +GArrowDecimal32 * +garrow_decimal32_divide(GArrowDecimal32 *left, + GArrowDecimal32 *right, + GArrowDecimal32 **remainder, + GError **error) +{ + return garrow_decimal_divide(left, + right, + remainder, + error, + "[decimal32][divide]"); +} + +/** + * garrow_decimal32_rescale: + * @decimal: A #GArrowDecimal32. + * @original_scale: A scale to be converted from. + * @new_scale: A scale to be converted to. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The rescaled decimal or %NULL on error. + * + * Since: 19.0.0 + */ +GArrowDecimal32 * +garrow_decimal32_rescale(GArrowDecimal32 *decimal, + gint32 original_scale, + gint32 new_scale, + GError **error) +{ + return garrow_decimal_rescale(decimal, + original_scale, + new_scale, + error, + "[decimal32][rescale]"); +} typedef struct GArrowDecimal64Private_ { std::shared_ptr decimal64; @@ -1499,6 +1916,21 @@ garrow_decimal256_rescale(GArrowDecimal256 *decimal, G_END_DECLS +GArrowDecimal32 * +garrow_decimal32_new_raw(std::shared_ptr *arrow_decimal32) +{ + auto decimal32 = + g_object_new(garrow_decimal32_get_type(), "decimal32", arrow_decimal32, NULL); + return GARROW_DECIMAL32(decimal32); +} + +std::shared_ptr +garrow_decimal32_get_raw(GArrowDecimal32 *decimal32) +{ + auto priv = GARROW_DECIMAL32_GET_PRIVATE(decimal32); + return priv->decimal32; +} + GArrowDecimal64 * garrow_decimal64_new_raw(std::shared_ptr *arrow_decimal64) { diff --git a/c_glib/arrow-glib/decimal.h b/c_glib/arrow-glib/decimal.h index bb266424b4cc0..6f839a67d9b3b 100644 --- a/c_glib/arrow-glib/decimal.h +++ b/c_glib/arrow-glib/decimal.h @@ -25,6 +25,85 @@ G_BEGIN_DECLS +/* Disabled because it conflicts with GARROW_TYPE_DECIMAL32 in GArrowType. */ +/* #define GARROW_TYPE_DECIMAL32 (garrow_decimal32_get_type()) */ +GARROW_AVAILABLE_IN_19_0 +G_DECLARE_DERIVABLE_TYPE(GArrowDecimal32, garrow_decimal32, GARROW, DECIMAL32, GObject) + +struct _GArrowDecimal32Class +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_19_0 +GArrowDecimal32 * +garrow_decimal32_new_string(const gchar *data, GError **error); +GARROW_AVAILABLE_IN_19_0 +GArrowDecimal32 * +garrow_decimal32_new_integer(const gint64 data); +GARROW_AVAILABLE_IN_19_0 +GArrowDecimal32 * +garrow_decimal32_copy(GArrowDecimal32 *decimal); +GARROW_AVAILABLE_IN_19_0 +gboolean +garrow_decimal32_equal(GArrowDecimal32 *decimal, GArrowDecimal32 *other_decimal); +GARROW_AVAILABLE_IN_19_0 +gboolean +garrow_decimal32_not_equal(GArrowDecimal32 *decimal, GArrowDecimal32 *other_decimal); +GARROW_AVAILABLE_IN_19_0 +gboolean +garrow_decimal32_less_than(GArrowDecimal32 *decimal, GArrowDecimal32 *other_decimal); +GARROW_AVAILABLE_IN_19_0 +gboolean +garrow_decimal32_less_than_or_equal(GArrowDecimal32 *decimal, + GArrowDecimal32 *other_decimal); +GARROW_AVAILABLE_IN_19_0 +gboolean +garrow_decimal32_greater_than(GArrowDecimal32 *decimal, GArrowDecimal32 *other_decimal); +GARROW_AVAILABLE_IN_19_0 +gboolean +garrow_decimal32_greater_than_or_equal(GArrowDecimal32 *decimal, + GArrowDecimal32 *other_decimal); +GARROW_AVAILABLE_IN_19_0 +gchar * +garrow_decimal32_to_string_scale(GArrowDecimal32 *decimal, gint32 scale); +GARROW_AVAILABLE_IN_19_0 +gchar * +garrow_decimal32_to_string(GArrowDecimal32 *decimal); +GARROW_AVAILABLE_IN_19_0 +GBytes * +garrow_decimal32_to_bytes(GArrowDecimal32 *decimal); +GARROW_AVAILABLE_IN_19_0 +void +garrow_decimal32_abs(GArrowDecimal32 *decimal); +GARROW_AVAILABLE_IN_19_0 +void +garrow_decimal32_negate(GArrowDecimal32 *decimal); +GARROW_AVAILABLE_IN_19_0 +gint64 +garrow_decimal32_to_integer(GArrowDecimal32 *decimal); +GARROW_AVAILABLE_IN_19_0 +GArrowDecimal32 * +garrow_decimal32_plus(GArrowDecimal32 *left, GArrowDecimal32 *right); +GARROW_AVAILABLE_IN_19_0 +GArrowDecimal32 * +garrow_decimal32_minus(GArrowDecimal32 *left, GArrowDecimal32 *right); +GARROW_AVAILABLE_IN_19_0 +GArrowDecimal32 * +garrow_decimal32_multiply(GArrowDecimal32 *left, GArrowDecimal32 *right); +GARROW_AVAILABLE_IN_19_0 +GArrowDecimal32 * +garrow_decimal32_divide(GArrowDecimal32 *left, + GArrowDecimal32 *right, + GArrowDecimal32 **remainder, + GError **error); +GARROW_AVAILABLE_IN_19_0 +GArrowDecimal32 * +garrow_decimal32_rescale(GArrowDecimal32 *decimal, + gint32 original_scale, + gint32 new_scale, + GError **error); + /* Disabled because it conflicts with GARROW_TYPE_DECIMAL64 in GArrowType. */ /* #define GARROW_TYPE_DECIMAL64 (garrow_decimal64_get_type()) */ GARROW_AVAILABLE_IN_ALL diff --git a/c_glib/arrow-glib/decimal.hpp b/c_glib/arrow-glib/decimal.hpp index dbfb7f30c60e0..09ac40a51297e 100644 --- a/c_glib/arrow-glib/decimal.hpp +++ b/c_glib/arrow-glib/decimal.hpp @@ -25,6 +25,11 @@ #include +GArrowDecimal32 * +garrow_decimal32_new_raw(std::shared_ptr *arrow_decimal32); +std::shared_ptr +garrow_decimal32_get_raw(GArrowDecimal32 *decimal); + GArrowDecimal64 * garrow_decimal64_new_raw(std::shared_ptr *arrow_decimal64); std::shared_ptr diff --git a/c_glib/test/test-decimal32.rb b/c_glib/test/test-decimal32.rb new file mode 100644 index 0000000000000..33b84ccc6b531 --- /dev/null +++ b/c_glib/test/test-decimal32.rb @@ -0,0 +1,222 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDecimal32 < Test::Unit::TestCase + def test_new_string_invalid + message = + "[decimal32][new][string]: Invalid: " + + "The string '1,1' is not a valid decimal32 number" + error = assert_raise(Arrow::Error::Invalid) do + Arrow::Decimal32.new("1,1") + end + assert_equal(message, + error.message.lines.first.chomp) + end + + def test_copy + decimal = Arrow::Decimal32.new("234.23445") + assert_equal(decimal, decimal.copy) + end + + def test_to_string_scale + integer_data = 23423445 + string_data = "234.23445" + decimal = Arrow::Decimal32.new(integer_data) + assert_equal(string_data, decimal.to_string_scale(5)) + end + + def test_to_string + string_data = "999999999" + decimal = Arrow::Decimal32.new(string_data) + assert_equal(string_data, decimal.to_s) + end + + def test_to_bytes + decimal = Arrow::Decimal32.new("12.3") + assert_equal("\x7B\x00\x00\x00", + decimal.to_bytes.to_s) + end + + def test_abs + absolute_value = "230492239" + negative_value = "-230492239" + decimal = Arrow::Decimal32.new(negative_value) + decimal.abs + assert_equal(absolute_value, decimal.to_s) + end + + def test_negate + positive_value = "230492239" + negative_value = "-230492239" + decimal = Arrow::Decimal32.new(positive_value) + decimal.negate + assert_equal(negative_value, decimal.to_s) + decimal.negate + assert_equal(positive_value, decimal.to_s) + end + + def test_plus + integer_data1 = 23423445 + integer_data2 = 5443 + decimal1 = Arrow::Decimal32.new(integer_data1) + decimal2 = Arrow::Decimal32.new(integer_data2) + decimal3 = decimal1.plus(decimal2) + assert_equal((integer_data1 + integer_data2).to_s, + decimal3.to_s) + end + + def test_multiply + integer_data1 = 23423 + integer_data2 = 5443 + decimal1 = Arrow::Decimal32.new(integer_data1) + decimal2 = Arrow::Decimal32.new(integer_data2) + decimal3 = decimal1.multiply(decimal2) + assert_equal((integer_data1 * integer_data2).to_s, + decimal3.to_s) + end + + def test_divide + integer_data1 = 23423 + integer_data2 = -5443 + decimal1 = Arrow::Decimal32.new(integer_data1) + decimal2 = Arrow::Decimal32.new(integer_data2) + result, remainder = decimal1.divide(decimal2) + assert_equal([ + integer_data1.quo(integer_data2).truncate.to_s, + integer_data1.remainder(integer_data2).to_s, + ], + [result.to_s, remainder.to_s]) + end + + def test_divide_zero + decimal1 = Arrow::Decimal32.new(23423445) + decimal2 = Arrow::Decimal32.new(0) + message = + "[decimal32][divide]: Invalid: Division by 0 in Decimal32" + assert_raise(Arrow::Error::Invalid.new(message)) do + decimal1.divide(decimal2) + end + end + + def test_equal + decimal = Arrow::Decimal32.new(10) + other_decimal1 = Arrow::Decimal32.new(10) + other_decimal2 = Arrow::Decimal32.new(11) + assert_equal([ + true, + false, + ], + [ + decimal == other_decimal1, + decimal == other_decimal2, + ]) + end + + def test_not_equal + decimal = Arrow::Decimal32.new(10) + other_decimal1 = Arrow::Decimal32.new(10) + other_decimal2 = Arrow::Decimal32.new(11) + assert_equal([ + false, + true, + ], + [ + decimal != other_decimal1, + decimal != other_decimal2, + ]) + end + + def test_less_than + decimal = Arrow::Decimal32.new(10) + other_decimal1 = Arrow::Decimal32.new(11) + other_decimal2 = Arrow::Decimal32.new(9) + assert_equal([ + true, + false, + false + ], + [ + decimal < other_decimal1, + decimal < other_decimal2, + decimal < decimal, + ]) + end + + def test_less_than_or_equal + decimal = Arrow::Decimal32.new(10) + other_decimal1 = Arrow::Decimal32.new(11) + other_decimal2 = Arrow::Decimal32.new(9) + assert_equal([ + true, + false, + true + ], + [ + decimal <= other_decimal1, + decimal <= other_decimal2, + decimal <= decimal + ]) + end + + def test_greater_than + decimal = Arrow::Decimal32.new(10) + other_decimal1 = Arrow::Decimal32.new(11) + other_decimal2 = Arrow::Decimal32.new(9) + assert_equal([ + false, + true, + false + ], + [ + decimal > other_decimal1, + decimal > other_decimal2, + decimal > decimal + ]) + end + + def test_greater_than_or_equal + decimal = Arrow::Decimal32.new(10) + other_decimal1 = Arrow::Decimal32.new(11) + other_decimal2 = Arrow::Decimal32.new(9) + assert_equal([ + false, + true, + true + ], + [ + decimal >= other_decimal1, + decimal >= other_decimal2, + decimal >= decimal + ]) + end + + def test_rescale + decimal = Arrow::Decimal32.new(10) + assert_equal(Arrow::Decimal32.new(1000), + decimal.rescale(1, 3)) + end + + def test_rescale_fail + decimal = Arrow::Decimal32.new(10) + message = + "[decimal32][rescale]: Invalid: " + + "Rescaling Decimal32 value would cause data loss" + assert_raise(Arrow::Error::Invalid.new(message)) do + decimal.rescale(1, -1) + end + end +end From 191472d41714afbd9a2b0b3eb5e25be79ee6fdda Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 1 Nov 2024 06:16:56 +0900 Subject: [PATCH 24/59] GH-44570: [Release][R][Docs] Update `r/pkgdown/assets/versions.html` (#44572) ### Rationale for this change It's still used at https://arrow.apache.org/docs/r/versions.html . So we should update it too. ### What changes are included in this PR? Commit it. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #44570 Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- dev/release/01-prepare-test.rb | 23 +++++++++++++++++++++++ dev/release/post-12-bump-versions-test.rb | 23 +++++++++++++++++++++++ dev/release/utils-prepare.sh | 1 + r/pkgdown/assets/versions.html | 13 ++++++++++--- r/pkgdown/assets/versions.json | 4 ++++ 5 files changed, 61 insertions(+), 3 deletions(-) diff --git a/dev/release/01-prepare-test.rb b/dev/release/01-prepare-test.rb index ca53b7f8fdee5..27f9f5f869ea3 100644 --- a/dev/release/01-prepare-test.rb +++ b/dev/release/01-prepare-test.rb @@ -263,6 +263,18 @@ def test_version_pre_tag ] if next_release_type == :major expected_changes += [ + { + path: "r/pkgdown/assets/versions.html", + hunks: [ + [ + "-

#{@previous_version}.9000 (dev)

", + "-

#{@previous_r_version} (release)

", + "+

#{@release_version}.9000 (dev)

", + "+

#{@release_version} (release)

", + "+

#{@previous_r_version}

", + ] + ], + }, { path: "r/pkgdown/assets/versions.json", hunks: [ @@ -281,6 +293,17 @@ def test_version_pre_tag ] else expected_changes += [ + { + path: "r/pkgdown/assets/versions.html", + hunks: [ + [ + "-

#{@previous_version}.9000 (dev)

", + "-

#{@previous_r_version} (release)

", + "+

#{@release_version}.9000 (dev)

", + "+

#{@release_version} (release)

", + ] + ], + }, { path: "r/pkgdown/assets/versions.json", hunks: [ diff --git a/dev/release/post-12-bump-versions-test.rb b/dev/release/post-12-bump-versions-test.rb index 9af334c496fe6..9c4026584aeca 100644 --- a/dev/release/post-12-bump-versions-test.rb +++ b/dev/release/post-12-bump-versions-test.rb @@ -210,6 +210,18 @@ def test_version_post_tag ["+ (#{@next_major_version}, 0),"], ], }, + { + path: "r/pkgdown/assets/versions.html", + hunks: [ + [ + "-

#{@previous_version}.9000 (dev)

", + "-

#{@previous_r_version} (release)

", + "+

#{@release_version}.9000 (dev)

", + "+

#{@release_version} (release)

", + "+

#{@previous_r_version}

", + ], + ], + }, { path: "r/pkgdown/assets/versions.json", hunks: [ @@ -228,6 +240,17 @@ def test_version_post_tag ] else expected_changes += [ + { + path: "r/pkgdown/assets/versions.html", + hunks: [ + [ + "-

#{@previous_version}.9000 (dev)

", + "-

#{@previous_r_version} (release)

", + "+

#{@release_version}.9000 (dev)

", + "+

#{@release_version} (release)

", + ], + ], + }, { path: "r/pkgdown/assets/versions.json", hunks: [ diff --git a/dev/release/utils-prepare.sh b/dev/release/utils-prepare.sh index ecdd0a26dcb7a..19ffda578b4b0 100644 --- a/dev/release/utils-prepare.sh +++ b/dev/release/utils-prepare.sh @@ -194,6 +194,7 @@ update_versions() { "${base_version}" \ "${next_version}" git add docs/source/_static/versions.json + git add r/pkgdown/assets/versions.html git add r/pkgdown/assets/versions.json popd } diff --git a/r/pkgdown/assets/versions.html b/r/pkgdown/assets/versions.html index 8ba513a98c85b..e2f56772cf339 100644 --- a/r/pkgdown/assets/versions.html +++ b/r/pkgdown/assets/versions.html @@ -1,9 +1,16 @@ -

13.0.0.9000 (dev)

-

13.0.0.1 (release)

+

18.0.0.9000 (dev)

+

18.0.0 (release)

+

17.0.0

+

16.1.0

+

16.0.0

+

15.0.2

+

14.0.2

+

13.0.0.1

12.0.1.1

-

11.0.0.3

+

12.0.0

+

11.0.0

10.0.1

9.0.0

8.0.0

diff --git a/r/pkgdown/assets/versions.json b/r/pkgdown/assets/versions.json index cecbed7f32818..41c8469772f9d 100644 --- a/r/pkgdown/assets/versions.json +++ b/r/pkgdown/assets/versions.json @@ -15,6 +15,10 @@ "name": "16.1.0", "version": "16.1/" }, + { + "name": "16.0.0", + "version": "16.0/" + }, { "name": "15.0.2", "version": "15.0/" From e1fa7e5fb257fa9f83ffcc531efda8a4489b2961 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 1 Nov 2024 08:47:17 +0900 Subject: [PATCH 25/59] GH-44590: [C++] Add `const` and `&` to `arrow::Array::statistics()` return type (#44592) ### Rationale for this change It must be immutable. ### What changes are included in this PR? Add missing `const` and `&`. ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. **This PR includes breaking changes to public APIs.** * GitHub Issue: #44590 Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- cpp/src/arrow/array/array_base.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/array/array_base.h b/cpp/src/arrow/array/array_base.h index e4af67d7e5f0b..21faa3f4279ea 100644 --- a/cpp/src/arrow/array/array_base.h +++ b/cpp/src/arrow/array/array_base.h @@ -237,8 +237,8 @@ class ARROW_EXPORT Array { /// This just delegates to calling statistics on the underlying ArrayData /// object which backs this Array. /// - /// \return const ArrayStatistics& - std::shared_ptr statistics() const { return data_->statistics; } + /// \return const std::shared_ptr& + const std::shared_ptr& statistics() const { return data_->statistics; } protected: Array() = default; From f3abc6802a94a1a4202c710c236a24a137e0a0d7 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 1 Nov 2024 07:58:33 -0700 Subject: [PATCH 26/59] GH-43547: [R][CI] Add recheck workflow for checking reverse dependencies on GHA (#43784) ### Rationale for this change See https://github.com/apache/arrow/issues/43547. ### What changes are included in this PR? Adds two new new crossbow tasks for performing reverse dependency checking using https://github.com/r-devel/recheck: - `r-recheck-most` - `r-recheck-strong` ### Are these changes tested? Yes. https://github.com/apache/arrow/pull/44523#issuecomment-2434122461. ### Are there any user-facing changes? No. * GitHub Issue: #43547 Fixes https://github.com/apache/arrow/issues/43547. Authored-by: Bryce Mecum Signed-off-by: Bryce Mecum --- dev/tasks/r/github.recheck.yml | 30 ++++++++++++++++++++++++++++++ dev/tasks/tasks.yml | 15 +++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 dev/tasks/r/github.recheck.yml diff --git a/dev/tasks/r/github.recheck.yml b/dev/tasks/r/github.recheck.yml new file mode 100644 index 0000000000000..5f0095fa22126 --- /dev/null +++ b/dev/tasks/r/github.recheck.yml @@ -0,0 +1,30 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +{% import 'macros.jinja' as macros with context %} + +{{ macros.github_header() }} + +jobs: + recheck: + name: Reverse check {{ which }} dependents + uses: r-devel/recheck/.github/workflows/recheck.yml@9fe04de60ebeadd505b8d76223a346617ccca836 + with: + which: {{ which }} + subdirectory: r + repository: {{ arrow.github_repo }} + ref: {{ arrow.branch }} diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 30c1daecf7a31..8f542265fd02d 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -94,6 +94,7 @@ groups: r: - test*-r-* - r-binary-packages + - r-recheck-most ruby: - test-*ruby* @@ -901,6 +902,20 @@ tasks: - r-pkg__bin__macosx__big-sur-arm64__contrib__4.3__arrow_{no_rc_r_version}\.tgz - r-pkg__src__contrib__arrow_{no_rc_r_version}\.tar\.gz +{% for which in ["strong", "most"] %} + # strong and most used here are defined by ?tools::package_dependencies as: + # + # strong: Depends, Imports, LinkingTo + # most: Depends, Imports, LinkingTo, Suggests + # + # So the key difference between strong and most is whether you want to expand + # the reverse dependency checking to Suggests (most) or not. + r-recheck-{{which}}: + ci: github + template: r/github.recheck.yml + params: + which: {{which}} +{% endfor %} ########################### Release verification ############################ From 277df0ec4bdc0e9bc4557ce40d38fcf921396683 Mon Sep 17 00:00:00 2001 From: Hiroyuki Sato Date: Sat, 2 Nov 2024 07:04:15 +0900 Subject: [PATCH 27/59] GH-44601: [GLib] Fix the wrong GARROW_AVAILABLE_IN declaration (#44602) ### Rationale for this change GArrowDecimal64 will be introduced in the 19.0.0 release. This part should be `GARROW_AVAILABLE_IN_19_0` instead of `GARROW_AVAILABLE_IN_ALL` ### What changes are included in this PR? Change the `GARROW_AVAILABLE_IN` declaration. ### Are these changes tested? YES ### Are there any user-facing changes? NO * GitHub Issue: #44601 Authored-by: Hiroyuki Sato Signed-off-by: Sutou Kouhei --- c_glib/arrow-glib/decimal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c_glib/arrow-glib/decimal.h b/c_glib/arrow-glib/decimal.h index 6f839a67d9b3b..e034f1bbdc84c 100644 --- a/c_glib/arrow-glib/decimal.h +++ b/c_glib/arrow-glib/decimal.h @@ -106,7 +106,7 @@ garrow_decimal32_rescale(GArrowDecimal32 *decimal, /* Disabled because it conflicts with GARROW_TYPE_DECIMAL64 in GArrowType. */ /* #define GARROW_TYPE_DECIMAL64 (garrow_decimal64_get_type()) */ -GARROW_AVAILABLE_IN_ALL +GARROW_AVAILABLE_IN_19_0 G_DECLARE_DERIVABLE_TYPE(GArrowDecimal64, garrow_decimal64, GARROW, DECIMAL64, GObject) struct _GArrowDecimal64Class From 11c11a48234a7f49e0585f5762b3a6332ac7622a Mon Sep 17 00:00:00 2001 From: Hiroyuki Sato Date: Sat, 2 Nov 2024 07:07:47 +0900 Subject: [PATCH 28/59] GH-44603: [GLib] Add GArrowDecimal64Array and GArrowDecimal64ArrayBuilder (#44605) ### Rationale for this change The `arrow::Decimal64Array` has been released. The `GArrowDecimal64Array` class must be implemented in the GLib. ### What changes are included in this PR? * Implement `GArrowDecimal64Array` * Implement `GArrowDecimal64ArrayBuilder` ### Are these changes tested? YES ### Are there any user-facing changes? NO * GitHub Issue: #44603 Authored-by: Hiroyuki Sato Signed-off-by: Sutou Kouhei --- c_glib/arrow-glib/array-builder.cpp | 105 ++++++++++++++++++++++++++++ c_glib/arrow-glib/array-builder.h | 30 ++++++++ c_glib/arrow-glib/basic-array.cpp | 62 ++++++++++++++++ c_glib/arrow-glib/basic-array.h | 20 ++++++ c_glib/test/test-decimal64-array.rb | 37 ++++++++++ 5 files changed, 254 insertions(+) create mode 100644 c_glib/test/test-decimal64-array.rb diff --git a/c_glib/arrow-glib/array-builder.cpp b/c_glib/arrow-glib/array-builder.cpp index 1897562e13286..4f82ee2983b34 100644 --- a/c_glib/arrow-glib/array-builder.cpp +++ b/c_glib/arrow-glib/array-builder.cpp @@ -457,6 +457,9 @@ G_BEGIN_DECLS * #GArrowMapArrayBuilder is the class to create a new * #GArrowMapArray. * + * #GArrowDecimal64ArrayBuilder is the class to create a new + * #GArrowDecimal64Array. + * * #GArrowDecimal128ArrayBuilder is the class to create a new * #GArrowDecimal128Array. * @@ -6062,6 +6065,105 @@ garrow_map_array_builder_get_value_builder(GArrowMapArrayBuilder *builder) return priv->value_builder; } +G_DEFINE_TYPE(GArrowDecimal64ArrayBuilder, + garrow_decimal64_array_builder, + GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY_BUILDER) + +static void +garrow_decimal64_array_builder_init(GArrowDecimal64ArrayBuilder *builder) +{ +} + +static void +garrow_decimal64_array_builder_class_init(GArrowDecimal64ArrayBuilderClass *klass) +{ +} + +/** + * garrow_decimal64_array_builder_new: + * @data_type: #GArrowDecimal64DataType for the decimal. + * + * Returns: A newly created #GArrowDecimal64ArrayBuilder. + * + * Since: 19.0.0 + */ +GArrowDecimal64ArrayBuilder * +garrow_decimal64_array_builder_new(GArrowDecimal64DataType *data_type) +{ + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto builder = + garrow_array_builder_new(arrow_data_type, NULL, "[decimal64-array-builder][new]"); + return GARROW_DECIMAL64_ARRAY_BUILDER(builder); +} + +/** + * garrow_decimal64_array_builder_append_value: + * @builder: A #GArrowDecimal64ArrayBuilder. + * @value: (nullable): A decimal value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 19.0.0 + */ +gboolean +garrow_decimal64_array_builder_append_value(GArrowDecimal64ArrayBuilder *builder, + GArrowDecimal64 *value, + GError **error) +{ + if (value) { + auto arrow_decimal = garrow_decimal64_get_raw(value); + return garrow_array_builder_append_value( + GARROW_ARRAY_BUILDER(builder), + *arrow_decimal, + error, + "[decimal64-array-builder][append-value]"); + } else { + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); + } +} + +/** + * garrow_decimal64_array_builder_append_values: + * @builder: A #GArrowDecimal64ArrayBuilder. + * @values: (array length=values_length): The array of #GArrowDecimal64. + * @values_length: The length of @values. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth @is_valids is %TRUE, the Nth @values is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of @is_valids. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 19.0.0 + */ +gboolean +garrow_decimal64_array_builder_append_values(GArrowDecimal64ArrayBuilder *builder, + GArrowDecimal64 **values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_array_builder_append_values( + GARROW_ARRAY_BUILDER(builder), + values, + values_length, + is_valids, + is_valids_length, + error, + "[decimal64-array-builder][append-values]", + [](guint8 *output, GArrowDecimal64 *value, gsize size) { + auto arrow_decimal = garrow_decimal64_get_raw(value); + arrow_decimal->ToBytes(output); + }); +} + G_DEFINE_TYPE(GArrowDecimal128ArrayBuilder, garrow_decimal128_array_builder, GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY_BUILDER) @@ -6581,6 +6683,9 @@ garrow_array_builder_new_raw(std::shared_ptr *arrow_builder case arrow::Type::type::MAP: type = GARROW_TYPE_MAP_ARRAY_BUILDER; break; + case arrow::Type::type::DECIMAL64: + type = GARROW_TYPE_DECIMAL64_ARRAY_BUILDER; + break; case arrow::Type::type::DECIMAL128: type = GARROW_TYPE_DECIMAL128_ARRAY_BUILDER; break; diff --git a/c_glib/arrow-glib/array-builder.h b/c_glib/arrow-glib/array-builder.h index da9e8748ee387..f653ddb7781a3 100644 --- a/c_glib/arrow-glib/array-builder.h +++ b/c_glib/arrow-glib/array-builder.h @@ -1729,6 +1729,36 @@ GARROW_AVAILABLE_IN_0_17 GArrowArrayBuilder * garrow_map_array_builder_get_value_builder(GArrowMapArrayBuilder *builder); +#define GARROW_TYPE_DECIMAL64_ARRAY_BUILDER (garrow_decimal64_array_builder_get_type()) +GARROW_AVAILABLE_IN_19_0 +G_DECLARE_DERIVABLE_TYPE(GArrowDecimal64ArrayBuilder, + garrow_decimal64_array_builder, + GARROW, + DECIMAL64_ARRAY_BUILDER, + GArrowFixedSizeBinaryArrayBuilder) +struct _GArrowDecimal64ArrayBuilderClass +{ + GArrowFixedSizeBinaryArrayBuilderClass parent_class; +}; + +GARROW_AVAILABLE_IN_19_0 +GArrowDecimal64ArrayBuilder * +garrow_decimal64_array_builder_new(GArrowDecimal64DataType *data_type); + +GARROW_AVAILABLE_IN_19_0 +gboolean +garrow_decimal64_array_builder_append_value(GArrowDecimal64ArrayBuilder *builder, + GArrowDecimal64 *value, + GError **error); +GARROW_AVAILABLE_IN_19_0 +gboolean +garrow_decimal64_array_builder_append_values(GArrowDecimal64ArrayBuilder *builder, + GArrowDecimal64 **values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); + #define GARROW_TYPE_DECIMAL128_ARRAY_BUILDER (garrow_decimal128_array_builder_get_type()) GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128ArrayBuilder, diff --git a/c_glib/arrow-glib/basic-array.cpp b/c_glib/arrow-glib/basic-array.cpp index f102a252467a3..8c39715c384f0 100644 --- a/c_glib/arrow-glib/basic-array.cpp +++ b/c_glib/arrow-glib/basic-array.cpp @@ -171,6 +171,11 @@ G_BEGIN_DECLS * have Arrow format data, you need to use #GArrowMonthDayNanoIntervalArray * to create a new array. * + * #GArrowDecimal64Array is a class for 64-bit decimal array. It can + * store zero or more 64-bit decimal data. If you don't have Arrow + * format data, you need to use #GArrowDecimal64ArrayBuilder to + * create a new array. + * * #GArrowDecimal128Array is a class for 128-bit decimal array. It can * store zero or more 128-bit decimal data. If you don't have Arrow * format data, you need to use #GArrowDecimal128ArrayBuilder to @@ -3090,6 +3095,60 @@ garrow_fixed_size_binary_array_get_values_bytes(GArrowFixedSizeBinaryArray *arra arrow_binary_array->byte_width() * arrow_array->length()); } +G_DEFINE_TYPE(GArrowDecimal64Array, + garrow_decimal64_array, + GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY) +static void +garrow_decimal64_array_init(GArrowDecimal64Array *object) +{ +} + +static void +garrow_decimal64_array_class_init(GArrowDecimal64ArrayClass *klass) +{ +} + +/** + * garrow_decimal64_array_format_value: + * @array: A #GArrowDecimal64Array. + * @i: The index of the target value. + * + * Returns: (transfer full): The formatted @i-th value. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 19.0.0 + */ +gchar * +garrow_decimal64_array_format_value(GArrowDecimal64Array *array, gint64 i) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_decimal64_array = + std::static_pointer_cast(arrow_array); + auto value = arrow_decimal64_array->FormatValue(i); + return g_strndup(value.data(), value.size()); +} + +/** + * garrow_decimal64_array_get_value: + * @array: A #GArrowDecimal64Array. + * @i: The index of the target value. + * + * Returns: (transfer full): The @i-th value. + * + * Since: 19.0.0 + */ +GArrowDecimal64 * +garrow_decimal64_array_get_value(GArrowDecimal64Array *array, gint64 i) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_decimal64_array = + std::static_pointer_cast(arrow_array); + auto arrow_decimal64 = + std::make_shared(arrow_decimal64_array->GetValue(i)); + return garrow_decimal64_new_raw(&arrow_decimal64); +} + G_DEFINE_TYPE(GArrowDecimal128Array, garrow_decimal128_array, GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY) @@ -3443,6 +3502,9 @@ garrow_array_new_raw_valist(std::shared_ptr *arrow_array, case arrow::Type::type::DICTIONARY: type = GARROW_TYPE_DICTIONARY_ARRAY; break; + case arrow::Type::type::DECIMAL64: + type = GARROW_TYPE_DECIMAL64_ARRAY; + break; case arrow::Type::type::DECIMAL128: type = GARROW_TYPE_DECIMAL128_ARRAY; break; diff --git a/c_glib/arrow-glib/basic-array.h b/c_glib/arrow-glib/basic-array.h index 95679aa37c57a..f70cf114a4a96 100644 --- a/c_glib/arrow-glib/basic-array.h +++ b/c_glib/arrow-glib/basic-array.h @@ -810,6 +810,26 @@ GARROW_AVAILABLE_IN_3_0 GBytes * garrow_fixed_size_binary_array_get_values_bytes(GArrowFixedSizeBinaryArray *array); +#define GARROW_TYPE_DECIMAL64_ARRAY (garrow_decimal64_array_get_type()) +GARROW_AVAILABLE_IN_19_0 +G_DECLARE_DERIVABLE_TYPE(GArrowDecimal64Array, + garrow_decimal64_array, + GARROW, + DECIMAL64_ARRAY, + GArrowFixedSizeBinaryArray) +struct _GArrowDecimal64ArrayClass +{ + GArrowFixedSizeBinaryArrayClass parent_class; +}; + +GARROW_AVAILABLE_IN_19_0 +gchar * +garrow_decimal64_array_format_value(GArrowDecimal64Array *array, gint64 i); + +GARROW_AVAILABLE_IN_19_0 +GArrowDecimal64 * +garrow_decimal64_array_get_value(GArrowDecimal64Array *array, gint64 i); + #define GARROW_TYPE_DECIMAL128_ARRAY (garrow_decimal128_array_get_type()) GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128Array, diff --git a/c_glib/test/test-decimal64-array.rb b/c_glib/test/test-decimal64-array.rb new file mode 100644 index 0000000000000..ab7b9e2523481 --- /dev/null +++ b/c_glib/test/test-decimal64-array.rb @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDecimal64Array < Test::Unit::TestCase + def test_format_value + data_type = Arrow::Decimal64DataType.new(8, 2) + builder = Arrow::Decimal64ArrayBuilder.new(data_type) + decimal = Arrow::Decimal64.new("23423445") + builder.append_value(decimal) + array = builder.finish + assert_equal("234234.45", array.format_value(0)) + end + + def test_value + data_type = Arrow::Decimal64DataType.new(8, 2) + builder = Arrow::Decimal64ArrayBuilder.new(data_type) + decimal = Arrow::Decimal64.new("23423445") + builder.append_value(decimal) + array = builder.finish + assert_equal("234234.45", + array.get_value(0).to_string_scale(array.value_data_type.scale)) + end +end From ab0c857695420f94cc83259494941a68b2762dee Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sun, 3 Nov 2024 00:31:06 +0900 Subject: [PATCH 29/59] GH-44578: [Release][Packaging] Verify wheel version (#44593) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change We want to detect binary build from wrong source. ### What changes are included in this PR? Add version check. If we use wrong source, binary's version is `X.Y.Z-SNAPSHOT` not `X.Y.Z`. So the added check is failed. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #44578 Authored-by: Sutou Kouhei Signed-off-by: Raúl Cumplido --- ci/scripts/python_wheel_unix_test.sh | 7 +++++++ dev/release/verify-release-candidate.sh | 27 ++++++++++++++++++++++--- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/ci/scripts/python_wheel_unix_test.sh b/ci/scripts/python_wheel_unix_test.sh index 1487581eaef51..3ce86b16116b0 100755 --- a/ci/scripts/python_wheel_unix_test.sh +++ b/ci/scripts/python_wheel_unix_test.sh @@ -88,6 +88,13 @@ import pyarrow.parquet fi fi +if [ "${CHECK_VERSION}" == "ON" ]; then + pyarrow_version=$(python -c "import pyarrow; print(pyarrow.__version__)") + [ "${pyarrow_version}" = "${ARROW_VERSION}" ] + arrow_cpp_version=$(python -c "import pyarrow; print(pyarrow.cpp_build_info.version)") + [ "${arrow_cpp_version}" = "${ARROW_VERSION}" ] +fi + if [ "${CHECK_WHEEL_CONTENT}" == "ON" ]; then python ${source_dir}/ci/scripts/python_wheel_validate_contents.py \ --path ${source_dir}/python/repaired_wheels diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index d9f973562aa78..17d10601d11d7 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -1052,6 +1052,12 @@ test_linux_wheels() { local wheel_content="OFF" fi + if [ "${SOURCE_KIND}" = "tarball" ]; then + local check_version="ON" + else + local check_version="OFF" + fi + for python in ${python_versions}; do local pyver=${python/m} for platform in ${platform_tags}; do @@ -1061,7 +1067,11 @@ test_linux_wheels() { continue fi pip install pyarrow-${TEST_PYARROW_VERSION:-${VERSION}}-cp${pyver/.}-cp${python/.}-${platform}.whl - CHECK_WHEEL_CONTENT=${wheel_content:-"ON"} INSTALL_PYARROW=OFF ARROW_GCS=${check_gcs} \ + ARROW_GCS=${check_gcs} \ + ARROW_VERSION=${VERSION} \ + CHECK_VERSION=${check_version} \ + CHECK_WHEEL_CONTENT=${wheel_content:-"ON"} \ + INSTALL_PYARROW=OFF \ ${ARROW_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_SOURCE_DIR} done done @@ -1086,6 +1096,12 @@ test_macos_wheels() { local wheel_content="OFF" fi + if [ "${SOURCE_KIND}" = "tarball" ]; then + local check_version="ON" + else + local check_version="OFF" + fi + # verify arch-native wheels inside an arch-native conda environment for python in ${python_versions}; do local pyver=${python/m} @@ -1102,8 +1118,13 @@ test_macos_wheels() { fi pip install pyarrow-${VERSION}-cp${pyver/.}-cp${python/.}-${platform}.whl - CHECK_WHEEL_CONTENT=${wheel_content:-"ON"} INSTALL_PYARROW=OFF ARROW_FLIGHT=${check_flight} \ - ARROW_GCS=${check_gcs} ARROW_S3=${check_s3} \ + ARROW_FLIGHT=${check_flight} \ + ARROW_GCS=${check_gcs} \ + ARROW_S3=${check_s3} \ + ARROW_VERSION=${VERSION} \ + CHECK_WHEEL_CONTENT=${wheel_content:-"ON"} \ + CHECK_VERSION=${check_version} \ + INSTALL_PYARROW=OFF \ ${ARROW_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_SOURCE_DIR} done done From e76082dc9b5d704b4f5f438c8e8b3bf68a361894 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 3 Nov 2024 06:26:28 +0900 Subject: [PATCH 30/59] MINOR: [JS] Bump memfs from 4.9.2 to 4.14.0 in /js (#44609) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [memfs](https://github.com/streamich/memfs) from 4.9.2 to 4.14.0.
Release notes

Sourced from memfs's releases.

v4.14.0

4.14.0 (2024-10-13)

Features

  • support stream as source in promises version of writeFile (#1069) (11f8a36)

v4.13.0

4.13.0 (2024-10-07)

Features

v4.12.0

4.12.0 (2024-09-19)

Features

v4.11.2

4.11.2 (2024-09-17)

Bug Fixes

v4.11.1

4.11.1 (2024-08-01)

Bug Fixes

v4.11.0

4.11.0 (2024-07-27)

Features

  • volume implementation of .opendir() method (7072fb7)

v4.10.0

4.10.0 (2024-07-27)

... (truncated)

Changelog

Sourced from memfs's changelog.

4.14.0 (2024-10-13)

Features

  • support stream as source in promises version of writeFile (#1069) (11f8a36)

4.13.0 (2024-10-07)

Features

4.12.0 (2024-09-19)

Features

4.11.2 (2024-09-17)

Bug Fixes

4.11.1 (2024-08-01)

Bug Fixes

4.11.0 (2024-07-27)

Features

  • volume implementation of .opendir() method (7072fb7)

4.10.0 (2024-07-27)

Features

  • 🎸 add IReadableWebStreamOptions type (99ebd64)
  • 🎸 implement FileHandle.readableWebStream() (c3ddc6c)

... (truncated)

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=memfs&package-manager=npm_and_yarn&previous-version=4.9.2&new-version=4.14.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- js/package.json | 2 +- js/yarn.lock | 35 ++++++++++++++--------------------- 2 files changed, 15 insertions(+), 22 deletions(-) diff --git a/js/package.json b/js/package.json index a879814426ed7..2a0db0067bd5b 100644 --- a/js/package.json +++ b/js/package.json @@ -98,7 +98,7 @@ "ix": "7.0.0", "jest": "29.7.0", "jest-silent-reporter": "0.6.0", - "memfs": "4.9.2", + "memfs": "4.14.0", "mkdirp": "3.0.1", "multistream": "4.1.0", "regenerator-runtime": "0.14.1", diff --git a/js/yarn.lock b/js/yarn.lock index e237d09469f4d..2941ccf3c569d 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -948,10 +948,10 @@ hyperdyperid "^1.2.0" thingies "^1.20.0" -"@jsonjoy.com/util@^1.1.2": - version "1.1.2" - resolved "https://registry.yarnpkg.com/@jsonjoy.com/util/-/util-1.1.2.tgz#5072c27ecdb16d1ed7a2d125a1d0ed8aba01d652" - integrity sha512-HOGa9wtE6LEz2I5mMQ2pMSjth85PmD71kPbsecs02nEUq3/Kw0wRK3gmZn5BCEB8mFLXByqPxjHgApoMwIPMKQ== +"@jsonjoy.com/util@^1.1.2", "@jsonjoy.com/util@^1.3.0": + version "1.5.0" + resolved "https://registry.yarnpkg.com/@jsonjoy.com/util/-/util-1.5.0.tgz#6008e35b9d9d8ee27bc4bfaa70c8cbf33a537b4c" + integrity sha512-ojoNsrIuPI9g6o8UxhraZQSyF2ByJanAY4cTFbc8Mf2AXEF4aQRGY1dJxyJpuyav8r9FGflEt/Ff3u5Nt6YMPA== "@nodelib/fs.scandir@2.1.5": version "2.1.5" @@ -5168,14 +5168,14 @@ mdurl@^2.0.0: resolved "https://registry.yarnpkg.com/mdurl/-/mdurl-2.0.0.tgz#80676ec0433025dd3e17ee983d0fe8de5a2237e0" integrity sha512-Lf+9+2r+Tdp5wXDXC4PcIBjTDtq4UKjCPMQhKIuzpJNW0b96kVqSwW0bT7FhRSfmAiFYgP+SCRvdrDozfh0U5w== -memfs@4.9.2: - version "4.9.2" - resolved "https://registry.yarnpkg.com/memfs/-/memfs-4.9.2.tgz#42e7b48207268dad8c9c48ea5d4952c5d3840433" - integrity sha512-f16coDZlTG1jskq3mxarwB+fGRrd0uXWt+o1WIhRfOwbXQZqUDsTVxQBFK9JjRQHblg8eAG2JSbprDXKjc7ijQ== +memfs@4.14.0: + version "4.14.0" + resolved "https://registry.yarnpkg.com/memfs/-/memfs-4.14.0.tgz#48d5e85a03ea0b428280003212fbca3063531be3" + integrity sha512-JUeY0F/fQZgIod31Ja1eJgiSxLn7BfQlCnqhwXFBzFHEw63OdLK7VJUJ7bnzNsWgCyoUP5tEp1VRY8rDaYzqOA== dependencies: "@jsonjoy.com/json-pack" "^1.0.3" - "@jsonjoy.com/util" "^1.1.2" - sonic-forest "^1.0.0" + "@jsonjoy.com/util" "^1.3.0" + tree-dump "^1.0.1" tslib "^2.0.0" memoizee@0.4.X: @@ -6397,13 +6397,6 @@ snapdragon@^0.8.1: source-map-resolve "^0.5.0" use "^3.1.0" -sonic-forest@^1.0.0: - version "1.0.2" - resolved "https://registry.yarnpkg.com/sonic-forest/-/sonic-forest-1.0.2.tgz#d80aa621d1cffe75a606ca44789ccff30f5b9ce6" - integrity sha512-2rICdwIJi5kVlehMUVtJeHn3ohh5YZV4pDv0P0c1M11cRz/gXNViItpM94HQwfvnXuzybpqK0LZJgTa3lEwtAw== - dependencies: - tree-dump "^1.0.0" - source-map-resolve@^0.5.0: version "0.5.3" resolved "https://registry.yarnpkg.com/source-map-resolve/-/source-map-resolve-0.5.3.tgz#190866bece7553e1f8f267a2ee82c606b5509a1a" @@ -6879,10 +6872,10 @@ totalist@^3.0.0: resolved "https://registry.yarnpkg.com/totalist/-/totalist-3.0.1.tgz#ba3a3d600c915b1a97872348f79c127475f6acf8" integrity sha512-sf4i37nQ2LBx4m3wB74y+ubopq6W/dIzXg0FDGjsYnZHVa1Da8FH853wlL2gtUhg+xJXjfk3kUZS3BRoQeoQBQ== -tree-dump@^1.0.0: - version "1.0.1" - resolved "https://registry.yarnpkg.com/tree-dump/-/tree-dump-1.0.1.tgz#b448758da7495580e6b7830d6b7834fca4c45b96" - integrity sha512-WCkcRBVPSlHHq1dc/px9iOfqklvzCbdRwvlNfxGZsrHqf6aZttfPrd7DJTt6oR10dwUfpFFQeVTkPbBIZxX/YA== +tree-dump@^1.0.1: + version "1.0.2" + resolved "https://registry.yarnpkg.com/tree-dump/-/tree-dump-1.0.2.tgz#c460d5921caeb197bde71d0e9a7b479848c5b8ac" + integrity sha512-dpev9ABuLWdEubk+cIaI9cHwRNNDjkBBLXTwI4UCUFdQ5xXKqNXoK4FEciw/vxf+NQ7Cb7sGUyeUtORvHIdRXQ== trim-newlines@^4.0.2: version "4.1.1" From 2312eff1acc8818abcf057ec1fea54205b8c8d2f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 3 Nov 2024 06:26:54 +0900 Subject: [PATCH 31/59] MINOR: [JS] Bump rollup from 4.22.4 to 4.24.3 in /js (#44610) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [rollup](https://github.com/rollup/rollup) from 4.22.4 to 4.24.3.
Release notes

Sourced from rollup's releases.

v4.24.3

4.24.3

2024-10-29

Bug Fixes

  • Slightly reduce memory consumption by specifying fixed array sizes where possible (#5703)

Pull Requests

v4.24.2

4.24.2

2024-10-27

Bug Fixes

  • Add missing build dependency (#5705)

Pull Requests

  • #5705: Fix "Couldn't find package" error when installing rollup using yarn (@​tagattie)

v4.24.1

4.24.1

2024-10-27

Bug Fixes

  • Support running Rollup natively on FreeBSD (#5698)

Pull Requests

v4.24.0

4.24.0

2024-10-02

... (truncated)

Changelog

Sourced from rollup's changelog.

4.24.3

2024-10-29

Bug Fixes

  • Slightly reduce memory consumption by specifying fixed array sizes where possible (#5703)

Pull Requests

4.24.2

2024-10-27

Bug Fixes

  • Add missing build dependency (#5705)

Pull Requests

  • #5705: Fix "Couldn't find package" error when installing rollup using yarn (@​tagattie)

4.24.1

2024-10-27

Bug Fixes

  • Support running Rollup natively on FreeBSD (#5698)

Pull Requests

4.24.0

2024-10-02

Features

  • Support preserving and transpiling JSX syntax (#5668)

... (truncated)

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=rollup&package-manager=npm_and_yarn&previous-version=4.22.4&new-version=4.24.3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- js/package.json | 2 +- js/yarn.lock | 220 +++++++++++++++++++++++++----------------------- 2 files changed, 117 insertions(+), 105 deletions(-) diff --git a/js/package.json b/js/package.json index 2a0db0067bd5b..27132ff812b7e 100644 --- a/js/package.json +++ b/js/package.json @@ -102,7 +102,7 @@ "mkdirp": "3.0.1", "multistream": "4.1.0", "regenerator-runtime": "0.14.1", - "rollup": "4.22.4", + "rollup": "4.24.3", "rxjs": "7.8.1", "ts-jest": "29.1.4", "ts-node": "10.9.2", diff --git a/js/yarn.lock b/js/yarn.lock index 2941ccf3c569d..e7906b9aaad2d 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -1017,85 +1017,95 @@ estree-walker "^2.0.2" picomatch "^2.3.1" -"@rollup/rollup-android-arm-eabi@4.22.4": - version "4.22.4" - resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.22.4.tgz#8b613b9725e8f9479d142970b106b6ae878610d5" - integrity sha512-Fxamp4aEZnfPOcGA8KSNEohV8hX7zVHOemC8jVBoBUHu5zpJK/Eu3uJwt6BMgy9fkvzxDaurgj96F/NiLukF2w== - -"@rollup/rollup-android-arm64@4.22.4": - version "4.22.4" - resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.22.4.tgz#654ca1049189132ff602bfcf8df14c18da1f15fb" - integrity sha512-VXoK5UMrgECLYaMuGuVTOx5kcuap1Jm8g/M83RnCHBKOqvPPmROFJGQaZhGccnsFtfXQ3XYa4/jMCJvZnbJBdA== - -"@rollup/rollup-darwin-arm64@4.22.4": - version "4.22.4" - resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.22.4.tgz#6d241d099d1518ef0c2205d96b3fa52e0fe1954b" - integrity sha512-xMM9ORBqu81jyMKCDP+SZDhnX2QEVQzTcC6G18KlTQEzWK8r/oNZtKuZaCcHhnsa6fEeOBionoyl5JsAbE/36Q== - -"@rollup/rollup-darwin-x64@4.22.4": - version "4.22.4" - resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.22.4.tgz#42bd19d292a57ee11734c980c4650de26b457791" - integrity sha512-aJJyYKQwbHuhTUrjWjxEvGnNNBCnmpHDvrb8JFDbeSH3m2XdHcxDd3jthAzvmoI8w/kSjd2y0udT+4okADsZIw== - -"@rollup/rollup-linux-arm-gnueabihf@4.22.4": - version "4.22.4" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.22.4.tgz#f23555ee3d8fe941c5c5fd458cd22b65eb1c2232" - integrity sha512-j63YtCIRAzbO+gC2L9dWXRh5BFetsv0j0va0Wi9epXDgU/XUi5dJKo4USTttVyK7fGw2nPWK0PbAvyliz50SCQ== - -"@rollup/rollup-linux-arm-musleabihf@4.22.4": - version "4.22.4" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.22.4.tgz#f3bbd1ae2420f5539d40ac1fde2b38da67779baa" - integrity sha512-dJnWUgwWBX1YBRsuKKMOlXCzh2Wu1mlHzv20TpqEsfdZLb3WoJW2kIEsGwLkroYf24IrPAvOT/ZQ2OYMV6vlrg== - -"@rollup/rollup-linux-arm64-gnu@4.22.4": - version "4.22.4" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.22.4.tgz#7abe900120113e08a1f90afb84c7c28774054d15" - integrity sha512-AdPRoNi3NKVLolCN/Sp4F4N1d98c4SBnHMKoLuiG6RXgoZ4sllseuGioszumnPGmPM2O7qaAX/IJdeDU8f26Aw== - -"@rollup/rollup-linux-arm64-musl@4.22.4": - version "4.22.4" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.22.4.tgz#9e655285c8175cd44f57d6a1e8e5dedfbba1d820" - integrity sha512-Gl0AxBtDg8uoAn5CCqQDMqAx22Wx22pjDOjBdmG0VIWX3qUBHzYmOKh8KXHL4UpogfJ14G4wk16EQogF+v8hmA== - -"@rollup/rollup-linux-powerpc64le-gnu@4.22.4": - version "4.22.4" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.22.4.tgz#9a79ae6c9e9d8fe83d49e2712ecf4302db5bef5e" - integrity sha512-3aVCK9xfWW1oGQpTsYJJPF6bfpWfhbRnhdlyhak2ZiyFLDaayz0EP5j9V1RVLAAxlmWKTDfS9wyRyY3hvhPoOg== - -"@rollup/rollup-linux-riscv64-gnu@4.22.4": - version "4.22.4" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.22.4.tgz#67ac70eca4ace8e2942fabca95164e8874ab8128" - integrity sha512-ePYIir6VYnhgv2C5Xe9u+ico4t8sZWXschR6fMgoPUK31yQu7hTEJb7bCqivHECwIClJfKgE7zYsh1qTP3WHUA== - -"@rollup/rollup-linux-s390x-gnu@4.22.4": - version "4.22.4" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.22.4.tgz#9f883a7440f51a22ed7f99e1d070bd84ea5005fc" - integrity sha512-GqFJ9wLlbB9daxhVlrTe61vJtEY99/xB3C8e4ULVsVfflcpmR6c8UZXjtkMA6FhNONhj2eA5Tk9uAVw5orEs4Q== - -"@rollup/rollup-linux-x64-gnu@4.22.4": - version "4.22.4" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.22.4.tgz#70116ae6c577fe367f58559e2cffb5641a1dd9d0" - integrity sha512-87v0ol2sH9GE3cLQLNEy0K/R0pz1nvg76o8M5nhMR0+Q+BBGLnb35P0fVz4CQxHYXaAOhE8HhlkaZfsdUOlHwg== - -"@rollup/rollup-linux-x64-musl@4.22.4": - version "4.22.4" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.22.4.tgz#f473f88219feb07b0b98b53a7923be716d1d182f" - integrity sha512-UV6FZMUgePDZrFjrNGIWzDo/vABebuXBhJEqrHxrGiU6HikPy0Z3LfdtciIttEUQfuDdCn8fqh7wiFJjCNwO+g== - -"@rollup/rollup-win32-arm64-msvc@4.22.4": - version "4.22.4" - resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.22.4.tgz#4349482d17f5d1c58604d1c8900540d676f420e0" - integrity sha512-BjI+NVVEGAXjGWYHz/vv0pBqfGoUH0IGZ0cICTn7kB9PyjrATSkX+8WkguNjWoj2qSr1im/+tTGRaY+4/PdcQw== - -"@rollup/rollup-win32-ia32-msvc@4.22.4": - version "4.22.4" - resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.22.4.tgz#a6fc39a15db618040ec3c2a24c1e26cb5f4d7422" - integrity sha512-SiWG/1TuUdPvYmzmYnmd3IEifzR61Tragkbx9D3+R8mzQqDBz8v+BvZNDlkiTtI9T15KYZhP0ehn3Dld4n9J5g== - -"@rollup/rollup-win32-x64-msvc@4.22.4": - version "4.22.4" - resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.22.4.tgz#3dd5d53e900df2a40841882c02e56f866c04d202" - integrity sha512-j8pPKp53/lq9lMXN57S8cFz0MynJk8OWNuUnXct/9KCpKU7DgU3bYMJhwWmcqC0UU29p8Lr0/7KEVcaM6bf47Q== +"@rollup/rollup-android-arm-eabi@4.24.3": + version "4.24.3" + resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.24.3.tgz#49a2a9808074f2683667992aa94b288e0b54fc82" + integrity sha512-ufb2CH2KfBWPJok95frEZZ82LtDl0A6QKTa8MoM+cWwDZvVGl5/jNb79pIhRvAalUu+7LD91VYR0nwRD799HkQ== + +"@rollup/rollup-android-arm64@4.24.3": + version "4.24.3" + resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.24.3.tgz#197e3bc01c228d3c23591e0fcedca91f8f398ec1" + integrity sha512-iAHpft/eQk9vkWIV5t22V77d90CRofgR2006UiCjHcHJFVI1E0oBkQIAbz+pLtthFw3hWEmVB4ilxGyBf48i2Q== + +"@rollup/rollup-darwin-arm64@4.24.3": + version "4.24.3" + resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.24.3.tgz#16772c0309d0dc3cca716580cdac7a1c560ddf46" + integrity sha512-QPW2YmkWLlvqmOa2OwrfqLJqkHm7kJCIMq9kOz40Zo9Ipi40kf9ONG5Sz76zszrmIZZ4hgRIkez69YnTHgEz1w== + +"@rollup/rollup-darwin-x64@4.24.3": + version "4.24.3" + resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.24.3.tgz#4e98120a1c4cda7d4043ccce72347cee53784140" + integrity sha512-KO0pN5x3+uZm1ZXeIfDqwcvnQ9UEGN8JX5ufhmgH5Lz4ujjZMAnxQygZAVGemFWn+ZZC0FQopruV4lqmGMshow== + +"@rollup/rollup-freebsd-arm64@4.24.3": + version "4.24.3" + resolved "https://registry.yarnpkg.com/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.24.3.tgz#27145e414986e216e0d9b9a8d488028f33c39566" + integrity sha512-CsC+ZdIiZCZbBI+aRlWpYJMSWvVssPuWqrDy/zi9YfnatKKSLFCe6fjna1grHuo/nVaHG+kiglpRhyBQYRTK4A== + +"@rollup/rollup-freebsd-x64@4.24.3": + version "4.24.3" + resolved "https://registry.yarnpkg.com/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.24.3.tgz#67e75fd87a903090f038b212273c492e5ca6b32f" + integrity sha512-F0nqiLThcfKvRQhZEzMIXOQG4EeX61im61VYL1jo4eBxv4aZRmpin6crnBJQ/nWnCsjH5F6J3W6Stdm0mBNqBg== + +"@rollup/rollup-linux-arm-gnueabihf@4.24.3": + version "4.24.3" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.24.3.tgz#bb45ebadbb9496298ab5461373bde357e8f33e88" + integrity sha512-KRSFHyE/RdxQ1CSeOIBVIAxStFC/hnBgVcaiCkQaVC+EYDtTe4X7z5tBkFyRoBgUGtB6Xg6t9t2kulnX6wJc6A== + +"@rollup/rollup-linux-arm-musleabihf@4.24.3": + version "4.24.3" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.24.3.tgz#384276c23feb0a4d6ffa603a9a760decce8b4118" + integrity sha512-h6Q8MT+e05zP5BxEKz0vi0DhthLdrNEnspdLzkoFqGwnmOzakEHSlXfVyA4HJ322QtFy7biUAVFPvIDEDQa6rw== + +"@rollup/rollup-linux-arm64-gnu@4.24.3": + version "4.24.3" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.24.3.tgz#89e5a4570ddd9eca908324a6de60bd64f904e3f0" + integrity sha512-fKElSyXhXIJ9pqiYRqisfirIo2Z5pTTve5K438URf08fsypXrEkVmShkSfM8GJ1aUyvjakT+fn2W7Czlpd/0FQ== + +"@rollup/rollup-linux-arm64-musl@4.24.3": + version "4.24.3" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.24.3.tgz#9ffd7cd6c6c6670d8c039056d6a49ad9f1f66949" + integrity sha512-YlddZSUk8G0px9/+V9PVilVDC6ydMz7WquxozToozSnfFK6wa6ne1ATUjUvjin09jp34p84milxlY5ikueoenw== + +"@rollup/rollup-linux-powerpc64le-gnu@4.24.3": + version "4.24.3" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.24.3.tgz#4d32ce982e2d25e3b8116336ad5ce6e270b5a024" + integrity sha512-yNaWw+GAO8JjVx3s3cMeG5Esz1cKVzz8PkTJSfYzE5u7A+NvGmbVFEHP+BikTIyYWuz0+DX9kaA3pH9Sqxp69g== + +"@rollup/rollup-linux-riscv64-gnu@4.24.3": + version "4.24.3" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.24.3.tgz#f43d4e0572397e3d3acd82d77d79ce021dea3310" + integrity sha512-lWKNQfsbpv14ZCtM/HkjCTm4oWTKTfxPmr7iPfp3AHSqyoTz5AgLemYkWLwOBWc+XxBbrU9SCokZP0WlBZM9lA== + +"@rollup/rollup-linux-s390x-gnu@4.24.3": + version "4.24.3" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.24.3.tgz#264f8a4c206173945bdab2a676d638b7945106a9" + integrity sha512-HoojGXTC2CgCcq0Woc/dn12wQUlkNyfH0I1ABK4Ni9YXyFQa86Fkt2Q0nqgLfbhkyfQ6003i3qQk9pLh/SpAYw== + +"@rollup/rollup-linux-x64-gnu@4.24.3": + version "4.24.3" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.24.3.tgz#e86172a407b2edd41540ec2ae636e497fadccff6" + integrity sha512-mnEOh4iE4USSccBOtcrjF5nj+5/zm6NcNhbSEfR3Ot0pxBwvEn5QVUXcuOwwPkapDtGZ6pT02xLoPaNv06w7KQ== + +"@rollup/rollup-linux-x64-musl@4.24.3": + version "4.24.3" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.24.3.tgz#8ae9bf78986d1b16ccbc89ab6f2dfa96807d3178" + integrity sha512-rMTzawBPimBQkG9NKpNHvquIUTQPzrnPxPbCY1Xt+mFkW7pshvyIS5kYgcf74goxXOQk0CP3EoOC1zcEezKXhw== + +"@rollup/rollup-win32-arm64-msvc@4.24.3": + version "4.24.3" + resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.24.3.tgz#11d6a59f651a3c2a9e5eaab0a99367b77a29c319" + integrity sha512-2lg1CE305xNvnH3SyiKwPVsTVLCg4TmNCF1z7PSHX2uZY2VbUpdkgAllVoISD7JO7zu+YynpWNSKAtOrX3AiuA== + +"@rollup/rollup-win32-ia32-msvc@4.24.3": + version "4.24.3" + resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.24.3.tgz#7ff146e53dc6e388b60329b7ec3335501d2b0f98" + integrity sha512-9SjYp1sPyxJsPWuhOCX6F4jUMXGbVVd5obVpoVEi8ClZqo52ViZewA6eFz85y8ezuOA+uJMP5A5zo6Oz4S5rVQ== + +"@rollup/rollup-win32-x64-msvc@4.24.3": + version "4.24.3" + resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.24.3.tgz#7687335781efe6bee14d6ed8eff9746a9f24c9cd" + integrity sha512-HGZgRFFYrMrP3TJlq58nR1xy8zHKId25vhmm5S9jETEfDf6xybPxsavFTJaufe2zgOGYJBskGlj49CwtEuFhWQ== "@rollup/stream@3.0.1": version "3.0.1" @@ -1298,10 +1308,10 @@ "@types/estree" "*" "@types/json-schema" "*" -"@types/estree@*", "@types/estree@1.0.5", "@types/estree@^1.0.0", "@types/estree@^1.0.5": - version "1.0.5" - resolved "https://registry.yarnpkg.com/@types/estree/-/estree-1.0.5.tgz#a6ce3e556e00fd9895dd872dd172ad0d4bd687f4" - integrity sha512-/kYRxGDLWzHOB7q+wtSUQlFrtcdUccpfy+X+9iMBpHK8QLLhx2wIPYuS5DYtR9Wa/YlZAbIovy7qVdB1Aq6Lyw== +"@types/estree@*", "@types/estree@1.0.6", "@types/estree@^1.0.0", "@types/estree@^1.0.5": + version "1.0.6" + resolved "https://registry.yarnpkg.com/@types/estree/-/estree-1.0.6.tgz#628effeeae2064a1b4e79f78e81d87b7e5fc7b50" + integrity sha512-AYnb1nQyY49te+VRAVgmzfcgjYS91mY5P0TKUDCLEM+gNnA+3T6rWITXRLYCpahpqSQbN5cE+gHpnPyXjHWxcw== "@types/expect@^1.20.4": version "1.20.4" @@ -6179,29 +6189,31 @@ rimraf@^3.0.2: dependencies: glob "^7.1.3" -rollup@4.22.4: - version "4.22.4" - resolved "https://registry.yarnpkg.com/rollup/-/rollup-4.22.4.tgz#4135a6446671cd2a2453e1ad42a45d5973ec3a0f" - integrity sha512-vD8HJ5raRcWOyymsR6Z3o6+RzfEPCnVLMFJ6vRslO1jt4LO6dUo5Qnpg7y4RkZFM2DMe3WUirkI5c16onjrc6A== +rollup@4.24.3: + version "4.24.3" + resolved "https://registry.yarnpkg.com/rollup/-/rollup-4.24.3.tgz#8b259063740af60b0030315f88665ba2041789b8" + integrity sha512-HBW896xR5HGmoksbi3JBDtmVzWiPAYqp7wip50hjQ67JbDz61nyoMPdqu1DvVW9asYb2M65Z20ZHsyJCMqMyDg== dependencies: - "@types/estree" "1.0.5" + "@types/estree" "1.0.6" optionalDependencies: - "@rollup/rollup-android-arm-eabi" "4.22.4" - "@rollup/rollup-android-arm64" "4.22.4" - "@rollup/rollup-darwin-arm64" "4.22.4" - "@rollup/rollup-darwin-x64" "4.22.4" - "@rollup/rollup-linux-arm-gnueabihf" "4.22.4" - "@rollup/rollup-linux-arm-musleabihf" "4.22.4" - "@rollup/rollup-linux-arm64-gnu" "4.22.4" - "@rollup/rollup-linux-arm64-musl" "4.22.4" - "@rollup/rollup-linux-powerpc64le-gnu" "4.22.4" - "@rollup/rollup-linux-riscv64-gnu" "4.22.4" - "@rollup/rollup-linux-s390x-gnu" "4.22.4" - "@rollup/rollup-linux-x64-gnu" "4.22.4" - "@rollup/rollup-linux-x64-musl" "4.22.4" - "@rollup/rollup-win32-arm64-msvc" "4.22.4" - "@rollup/rollup-win32-ia32-msvc" "4.22.4" - "@rollup/rollup-win32-x64-msvc" "4.22.4" + "@rollup/rollup-android-arm-eabi" "4.24.3" + "@rollup/rollup-android-arm64" "4.24.3" + "@rollup/rollup-darwin-arm64" "4.24.3" + "@rollup/rollup-darwin-x64" "4.24.3" + "@rollup/rollup-freebsd-arm64" "4.24.3" + "@rollup/rollup-freebsd-x64" "4.24.3" + "@rollup/rollup-linux-arm-gnueabihf" "4.24.3" + "@rollup/rollup-linux-arm-musleabihf" "4.24.3" + "@rollup/rollup-linux-arm64-gnu" "4.24.3" + "@rollup/rollup-linux-arm64-musl" "4.24.3" + "@rollup/rollup-linux-powerpc64le-gnu" "4.24.3" + "@rollup/rollup-linux-riscv64-gnu" "4.24.3" + "@rollup/rollup-linux-s390x-gnu" "4.24.3" + "@rollup/rollup-linux-x64-gnu" "4.24.3" + "@rollup/rollup-linux-x64-musl" "4.24.3" + "@rollup/rollup-win32-arm64-msvc" "4.24.3" + "@rollup/rollup-win32-ia32-msvc" "4.24.3" + "@rollup/rollup-win32-x64-msvc" "4.24.3" fsevents "~2.3.2" run-parallel@^1.1.9: From abda57c42123ab03d97fec7a10c91f231ba7ad64 Mon Sep 17 00:00:00 2001 From: Hiroyuki Sato Date: Sun, 3 Nov 2024 06:53:24 +0900 Subject: [PATCH 32/59] GH-44604: [GLib] Add Decimal32Array (#44617) ### Rationale for this change The `arrow::Decimal32Array` has been released. The `GArrowDecimal32Array` class must be implemented in the GLib. ### What changes are included in this PR? * Implement `GArrowDecimal32Array` * Implement `GArrowDecimal32ArrayBuilder` ### Are these changes tested? YES ### Are there any user-facing changes? NO * GitHub Issue: #44604 Authored-by: Hiroyuki Sato Signed-off-by: Sutou Kouhei --- c_glib/arrow-glib/array-builder.cpp | 105 ++++++++++++++++++++++++++++ c_glib/arrow-glib/array-builder.h | 30 ++++++++ c_glib/arrow-glib/basic-array.cpp | 62 ++++++++++++++++ c_glib/arrow-glib/basic-array.h | 20 ++++++ c_glib/test/test-decimal32-array.rb | 37 ++++++++++ 5 files changed, 254 insertions(+) create mode 100644 c_glib/test/test-decimal32-array.rb diff --git a/c_glib/arrow-glib/array-builder.cpp b/c_glib/arrow-glib/array-builder.cpp index 4f82ee2983b34..87e22c7435209 100644 --- a/c_glib/arrow-glib/array-builder.cpp +++ b/c_glib/arrow-glib/array-builder.cpp @@ -457,6 +457,9 @@ G_BEGIN_DECLS * #GArrowMapArrayBuilder is the class to create a new * #GArrowMapArray. * + * #GArrowDecimal32ArrayBuilder is the class to create a new + * #GArrowDecimal32Array. + * * #GArrowDecimal64ArrayBuilder is the class to create a new * #GArrowDecimal64Array. * @@ -6065,6 +6068,105 @@ garrow_map_array_builder_get_value_builder(GArrowMapArrayBuilder *builder) return priv->value_builder; } +G_DEFINE_TYPE(GArrowDecimal32ArrayBuilder, + garrow_decimal32_array_builder, + GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY_BUILDER) + +static void +garrow_decimal32_array_builder_init(GArrowDecimal32ArrayBuilder *builder) +{ +} + +static void +garrow_decimal32_array_builder_class_init(GArrowDecimal32ArrayBuilderClass *klass) +{ +} + +/** + * garrow_decimal32_array_builder_new: + * @data_type: #GArrowDecimal32DataType for the decimal. + * + * Returns: A newly created #GArrowDecimal32ArrayBuilder. + * + * Since: 19.0.0 + */ +GArrowDecimal32ArrayBuilder * +garrow_decimal32_array_builder_new(GArrowDecimal32DataType *data_type) +{ + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto builder = + garrow_array_builder_new(arrow_data_type, NULL, "[decimal32-array-builder][new]"); + return GARROW_DECIMAL32_ARRAY_BUILDER(builder); +} + +/** + * garrow_decimal32_array_builder_append_value: + * @builder: A #GArrowDecimal32ArrayBuilder. + * @value: (nullable): A decimal value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 19.0.0 + */ +gboolean +garrow_decimal32_array_builder_append_value(GArrowDecimal32ArrayBuilder *builder, + GArrowDecimal32 *value, + GError **error) +{ + if (value) { + auto arrow_decimal = garrow_decimal32_get_raw(value); + return garrow_array_builder_append_value( + GARROW_ARRAY_BUILDER(builder), + *arrow_decimal, + error, + "[decimal32-array-builder][append-value]"); + } else { + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); + } +} + +/** + * garrow_decimal32_array_builder_append_values: + * @builder: A #GArrowDecimal32ArrayBuilder. + * @values: (array length=values_length): The array of #GArrowDecimal32. + * @values_length: The length of @values. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth @is_valids is %TRUE, the Nth @values is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of @is_valids. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 19.0.0 + */ +gboolean +garrow_decimal32_array_builder_append_values(GArrowDecimal32ArrayBuilder *builder, + GArrowDecimal32 **values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_array_builder_append_values( + GARROW_ARRAY_BUILDER(builder), + values, + values_length, + is_valids, + is_valids_length, + error, + "[decimal32-array-builder][append-values]", + [](guint8 *output, GArrowDecimal32 *value, gsize size) { + auto arrow_decimal = garrow_decimal32_get_raw(value); + arrow_decimal->ToBytes(output); + }); +} + G_DEFINE_TYPE(GArrowDecimal64ArrayBuilder, garrow_decimal64_array_builder, GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY_BUILDER) @@ -6683,6 +6785,9 @@ garrow_array_builder_new_raw(std::shared_ptr *arrow_builder case arrow::Type::type::MAP: type = GARROW_TYPE_MAP_ARRAY_BUILDER; break; + case arrow::Type::type::DECIMAL32: + type = GARROW_TYPE_DECIMAL32_ARRAY_BUILDER; + break; case arrow::Type::type::DECIMAL64: type = GARROW_TYPE_DECIMAL64_ARRAY_BUILDER; break; diff --git a/c_glib/arrow-glib/array-builder.h b/c_glib/arrow-glib/array-builder.h index f653ddb7781a3..c15c411503114 100644 --- a/c_glib/arrow-glib/array-builder.h +++ b/c_glib/arrow-glib/array-builder.h @@ -1729,6 +1729,36 @@ GARROW_AVAILABLE_IN_0_17 GArrowArrayBuilder * garrow_map_array_builder_get_value_builder(GArrowMapArrayBuilder *builder); +#define GARROW_TYPE_DECIMAL32_ARRAY_BUILDER (garrow_decimal32_array_builder_get_type()) +GARROW_AVAILABLE_IN_19_0 +G_DECLARE_DERIVABLE_TYPE(GArrowDecimal32ArrayBuilder, + garrow_decimal32_array_builder, + GARROW, + DECIMAL32_ARRAY_BUILDER, + GArrowFixedSizeBinaryArrayBuilder) +struct _GArrowDecimal32ArrayBuilderClass +{ + GArrowFixedSizeBinaryArrayBuilderClass parent_class; +}; + +GARROW_AVAILABLE_IN_19_0 +GArrowDecimal32ArrayBuilder * +garrow_decimal32_array_builder_new(GArrowDecimal32DataType *data_type); + +GARROW_AVAILABLE_IN_19_0 +gboolean +garrow_decimal32_array_builder_append_value(GArrowDecimal32ArrayBuilder *builder, + GArrowDecimal32 *value, + GError **error); +GARROW_AVAILABLE_IN_19_0 +gboolean +garrow_decimal32_array_builder_append_values(GArrowDecimal32ArrayBuilder *builder, + GArrowDecimal32 **values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); + #define GARROW_TYPE_DECIMAL64_ARRAY_BUILDER (garrow_decimal64_array_builder_get_type()) GARROW_AVAILABLE_IN_19_0 G_DECLARE_DERIVABLE_TYPE(GArrowDecimal64ArrayBuilder, diff --git a/c_glib/arrow-glib/basic-array.cpp b/c_glib/arrow-glib/basic-array.cpp index 8c39715c384f0..2169f8a05c77b 100644 --- a/c_glib/arrow-glib/basic-array.cpp +++ b/c_glib/arrow-glib/basic-array.cpp @@ -171,6 +171,11 @@ G_BEGIN_DECLS * have Arrow format data, you need to use #GArrowMonthDayNanoIntervalArray * to create a new array. * + * #GArrowDecimal32Array is a class for 32-bit decimal array. It can + * store zero or more 32-bit decimal data. If you don't have Arrow + * format data, you need to use #GArrowDecimal32ArrayBuilder to + * create a new array. + * * #GArrowDecimal64Array is a class for 64-bit decimal array. It can * store zero or more 64-bit decimal data. If you don't have Arrow * format data, you need to use #GArrowDecimal64ArrayBuilder to @@ -3095,6 +3100,60 @@ garrow_fixed_size_binary_array_get_values_bytes(GArrowFixedSizeBinaryArray *arra arrow_binary_array->byte_width() * arrow_array->length()); } +G_DEFINE_TYPE(GArrowDecimal32Array, + garrow_decimal32_array, + GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY) +static void +garrow_decimal32_array_init(GArrowDecimal32Array *object) +{ +} + +static void +garrow_decimal32_array_class_init(GArrowDecimal32ArrayClass *klass) +{ +} + +/** + * garrow_decimal32_array_format_value: + * @array: A #GArrowDecimal32Array. + * @i: The index of the target value. + * + * Returns: (transfer full): The formatted @i-th value. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 19.0.0 + */ +gchar * +garrow_decimal32_array_format_value(GArrowDecimal32Array *array, gint64 i) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_decimal32_array = + std::static_pointer_cast(arrow_array); + auto value = arrow_decimal32_array->FormatValue(i); + return g_strndup(value.data(), value.size()); +} + +/** + * garrow_decimal32_array_get_value: + * @array: A #GArrowDecimal32Array. + * @i: The index of the target value. + * + * Returns: (transfer full): The @i-th value. + * + * Since: 19.0.0 + */ +GArrowDecimal32 * +garrow_decimal32_array_get_value(GArrowDecimal32Array *array, gint64 i) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_decimal32_array = + std::static_pointer_cast(arrow_array); + auto arrow_decimal32 = + std::make_shared(arrow_decimal32_array->GetValue(i)); + return garrow_decimal32_new_raw(&arrow_decimal32); +} + G_DEFINE_TYPE(GArrowDecimal64Array, garrow_decimal64_array, GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY) @@ -3502,6 +3561,9 @@ garrow_array_new_raw_valist(std::shared_ptr *arrow_array, case arrow::Type::type::DICTIONARY: type = GARROW_TYPE_DICTIONARY_ARRAY; break; + case arrow::Type::type::DECIMAL32: + type = GARROW_TYPE_DECIMAL32_ARRAY; + break; case arrow::Type::type::DECIMAL64: type = GARROW_TYPE_DECIMAL64_ARRAY; break; diff --git a/c_glib/arrow-glib/basic-array.h b/c_glib/arrow-glib/basic-array.h index f70cf114a4a96..dbffedde28164 100644 --- a/c_glib/arrow-glib/basic-array.h +++ b/c_glib/arrow-glib/basic-array.h @@ -810,6 +810,26 @@ GARROW_AVAILABLE_IN_3_0 GBytes * garrow_fixed_size_binary_array_get_values_bytes(GArrowFixedSizeBinaryArray *array); +#define GARROW_TYPE_DECIMAL32_ARRAY (garrow_decimal32_array_get_type()) +GARROW_AVAILABLE_IN_19_0 +G_DECLARE_DERIVABLE_TYPE(GArrowDecimal32Array, + garrow_decimal32_array, + GARROW, + DECIMAL32_ARRAY, + GArrowFixedSizeBinaryArray) +struct _GArrowDecimal32ArrayClass +{ + GArrowFixedSizeBinaryArrayClass parent_class; +}; + +GARROW_AVAILABLE_IN_19_0 +gchar * +garrow_decimal32_array_format_value(GArrowDecimal32Array *array, gint64 i); + +GARROW_AVAILABLE_IN_19_0 +GArrowDecimal32 * +garrow_decimal32_array_get_value(GArrowDecimal32Array *array, gint64 i); + #define GARROW_TYPE_DECIMAL64_ARRAY (garrow_decimal64_array_get_type()) GARROW_AVAILABLE_IN_19_0 G_DECLARE_DERIVABLE_TYPE(GArrowDecimal64Array, diff --git a/c_glib/test/test-decimal32-array.rb b/c_glib/test/test-decimal32-array.rb new file mode 100644 index 0000000000000..ee40f27e81179 --- /dev/null +++ b/c_glib/test/test-decimal32-array.rb @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDecimal32Array < Test::Unit::TestCase + def test_format_value + data_type = Arrow::Decimal32DataType.new(8, 2) + builder = Arrow::Decimal32ArrayBuilder.new(data_type) + decimal = Arrow::Decimal32.new("23423445") + builder.append_value(decimal) + array = builder.finish + assert_equal("234234.45", array.format_value(0)) + end + + def test_value + data_type = Arrow::Decimal32DataType.new(8, 2) + builder = Arrow::Decimal32ArrayBuilder.new(data_type) + decimal = Arrow::Decimal32.new("23423445") + builder.append_value(decimal) + array = builder.finish + assert_equal("234234.45", + array.get_value(0).to_string_scale(array.value_data_type.scale)) + end +end From 405ed6fa84f79471bb57d58bb279584245106d19 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 3 Nov 2024 08:35:51 +0900 Subject: [PATCH 33/59] MINOR: [JS] Bump tslib from 2.6.3 to 2.8.1 in /js (#44611) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [tslib](https://github.com/Microsoft/tslib) from 2.6.3 to 2.8.1.
Release notes

Sourced from tslib's releases.

v2.8.1

What's Changed

Full Changelog: https://github.com/microsoft/tslib/compare/v2.8.0...v2.8.1

v2.8.0

What's Changed

Full Changelog: https://github.com/microsoft/tslib/compare/v2.7.0...v2.8.0

v2.7.0

What's Changed

Full Changelog: https://github.com/microsoft/tslib/compare/v2.6.3...v2.7.0

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=tslib&package-manager=npm_and_yarn&previous-version=2.6.3&new-version=2.8.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- js/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/js/yarn.lock b/js/yarn.lock index e7906b9aaad2d..2babda15b0ef1 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -6933,9 +6933,9 @@ ts-node@10.9.2: yn "3.1.1" tslib@^2.0.0, tslib@^2.1.0, tslib@^2.4.0, tslib@^2.6.2: - version "2.6.3" - resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.6.3.tgz#0438f810ad7a9edcde7a241c3d80db693c8cbfe0" - integrity sha512-xNvxJEOUiWPGhUuUdQgAJPKOOJfGnIyKySOc09XkKsgdUV/3E2zvwZYdejjmRgPCgcym1juLH3226yA7sEFJKQ== + version "2.8.1" + resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.8.1.tgz#612efe4ed235d567e8aba5f2a5fab70280ade83f" + integrity sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w== type-check@^0.4.0, type-check@~0.4.0: version "0.4.0" From 5ff8cda475a7799d865a3741f32ace68f622a345 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 3 Nov 2024 08:36:45 +0900 Subject: [PATCH 34/59] MINOR: [JS] Bump webpack from 5.91.0 to 5.96.1 in /js (#44612) Bumps [webpack](https://github.com/webpack/webpack) from 5.91.0 to 5.96.1.
Release notes

Sourced from webpack's releases.

v5.96.1

Bug Fixes

  • [Types] Add @ types/eslint-scope to dependencieS
  • [Types] Fixed regression in validate

v5.96.0

Bug Fixes

  • Fixed Module Federation should track all referenced chunks
  • Handle Data URI without base64 word
  • HotUpdateChunk have correct runtime when modified with new runtime
  • Order of chunks ids in generated chunk code
  • No extra Javascript chunks when using asset module as an entrypoint
  • Use optimistically logic for output.environment.dynamicImport to determine chunk format when no browserslist or target
  • Collision with global variables for optimization.avoidEntryIife
  • Avoid through variables in inlined module
  • Allow chunk template strings in output.devtoolNamespace
  • No extra runtime for get javascript/css chunk filename
  • No extra runtime for prefetch and preload in JS runtime when it was unsed in CSS
  • Avoid cache invalidation using ProgressPlugin
  • Increase parallelism when using importModule on the execution stage
  • Correctly parsing string in export and import
  • Typescript types
  • [CSS] css/auto considers a module depending on its filename as css (pure CSS) or css/local, before it was css/global and css/local
  • [CSS] Always interpolate classes even if they are not involved in export
  • [CSS] No extra runtime in Javascript runtime chunks for asset modules used in CSS
  • [CSS] No extra runtime in Javascript runtime chunks for external asset modules used in CSS
  • [CSS] No extra runtime for the node target
  • [CSS] Fixed url()s and @ import parsing
  • [CSS] Fixed - emit a warning on broken :local and :global

New Features

  • Export CSS and ESM runtime modules
  • Single Runtime Chunk and Federation eager module hoisting
  • [CSS] Support /* webpackIgnore: true */ for CSS files
  • [CSS] Support src() support
  • [CSS] CSS nesting in CSS modules

v5.95.0

Bug Fixes

  • Fixed hanging when attempting to read a symlink-like file that it can't read
  • Handle default for import context element dependency
  • Merge duplicate chunks call after split chunks
  • Generate correctly code for dynamically importing the same file twice and destructuring
  • Use content hash as [base] and [name] for extracted DataURI's
  • Distinguish module and import in module-import for externals import's

... (truncated)

Commits
  • d4ced73 chore(release): 5.96.1
  • 7d6dbea fix: types regression in validate
  • 5c556e3 fix: types regression in validate
  • 2420eae fix: add @ types/eslint-scope to dependencies due types regression
  • ec45d2d fix: add @ types/eslint-scope to dependencies
  • aff0c3e chore(release): 5.96.0
  • 6f11ec1 refactor: module source types code
  • b07142f refactor: module source types code
  • 7d98b3c fix: Module Federation should track all referenced chunks
  • 6d09769 chore: linting
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=webpack&package-manager=npm_and_yarn&previous-version=5.91.0&new-version=5.96.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- js/package.json | 2 +- js/yarn.lock | 106 +++++++++++++++++++++++++----------------------- 2 files changed, 56 insertions(+), 52 deletions(-) diff --git a/js/package.json b/js/package.json index 27132ff812b7e..399a5c3b94486 100644 --- a/js/package.json +++ b/js/package.json @@ -112,7 +112,7 @@ "vinyl-named": "1.1.0", "vinyl-source-stream": "2.0.0", "web-streams-polyfill": "3.2.1", - "webpack": "5.91.0", + "webpack": "5.96.1", "webpack-bundle-analyzer": "4.10.2", "webpack-stream": "7.0.0", "xml2js": "0.6.2" diff --git a/js/yarn.lock b/js/yarn.lock index 2babda15b0ef1..1345eb97cf9fb 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -1292,7 +1292,7 @@ resolved "https://registry.yarnpkg.com/@types/command-line-usage/-/command-line-usage-5.0.4.tgz#374e4c62d78fbc5a670a0f36da10235af879a0d5" integrity sha512-BwR5KP3Es/CSht0xqBcUXS3qCAUVXwpRKsV2+arxeb65atasuXG9LykC9Ab10Cw3s2raH92ZqOeILaQbsB2ACg== -"@types/eslint-scope@^3.7.3": +"@types/eslint-scope@^3.7.7": version "3.7.7" resolved "https://registry.yarnpkg.com/@types/eslint-scope/-/eslint-scope-3.7.7.tgz#3108bd5f18b0cdb277c867b3dd449c9ed7079ac5" integrity sha512-MzMFlSLBqNF2gcHWO0G1vP/YQyfvrxZ0bF+u7mzUdZ1/xK4A4sru+nraZz5i3iEIk1l1uyicaDVTB4QbbEkAYg== @@ -1308,7 +1308,7 @@ "@types/estree" "*" "@types/json-schema" "*" -"@types/estree@*", "@types/estree@1.0.6", "@types/estree@^1.0.0", "@types/estree@^1.0.5": +"@types/estree@*", "@types/estree@1.0.6", "@types/estree@^1.0.0", "@types/estree@^1.0.6": version "1.0.6" resolved "https://registry.yarnpkg.com/@types/estree/-/estree-1.0.6.tgz#628effeeae2064a1b4e79f78e81d87b7e5fc7b50" integrity sha512-AYnb1nQyY49te+VRAVgmzfcgjYS91mY5P0TKUDCLEM+gNnA+3T6rWITXRLYCpahpqSQbN5cE+gHpnPyXjHWxcw== @@ -1683,11 +1683,6 @@ resolved "https://registry.yarnpkg.com/@xtuc/long/-/long-4.2.2.tgz#d291c6a4e97989b5c61d9acf396ae4fe133a718d" integrity sha512-NuHqBY1PB/D8xU6s/thBgOAiAP7HOYDQ32+BFZILJ8ivkUkAHQnWfn6WhL79Owj1qmUnoN/YPhktdIoucipkAQ== -acorn-import-assertions@^1.9.0: - version "1.9.0" - resolved "https://registry.yarnpkg.com/acorn-import-assertions/-/acorn-import-assertions-1.9.0.tgz#507276249d684797c84e0734ef84860334cfb1ac" - integrity sha512-cmMwop9x+8KFhxvKrKfPYmN6/pKTYYHBqLa0DfvVZcKMJWNyWLnaqND7dx/qn66R7ewM1UX5XMaDVP5wlVTaVA== - acorn-jsx@^5.3.2: version "5.3.2" resolved "https://registry.yarnpkg.com/acorn-jsx/-/acorn-jsx-5.3.2.tgz#7ed5bb55908b3b2f1bc55c6af1653bada7f07937" @@ -1703,10 +1698,10 @@ acorn@^6.4.1: resolved "https://registry.yarnpkg.com/acorn/-/acorn-6.4.2.tgz#35866fd710528e92de10cf06016498e47e39e1e6" integrity sha512-XtGIhXwF8YM8bJhGxG5kXgjkEuNGLTkoYqVE+KMR+aspr4KGYmKYg7yUe3KghyQ9yheNwLnjmzh/7+gfDBmHCQ== -acorn@^8.0.4, acorn@^8.4.1, acorn@^8.7.1, acorn@^8.8.2, acorn@^8.9.0: - version "8.11.3" - resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.11.3.tgz#71e0b14e13a4ec160724b38fb7b0f233b1b81d7a" - integrity sha512-Y9rRfJG5jcKOE0CLisYbojUjIrIEE7AGMzA/Sm4BslANhbS+cDMpgBdcPT91oJ7OuJ9hYJBx59RjbhxVnrF8Xg== +acorn@^8.0.4, acorn@^8.14.0, acorn@^8.4.1, acorn@^8.8.2, acorn@^8.9.0: + version "8.14.0" + resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.14.0.tgz#063e2c70cac5fb4f6467f0b11152e04c682795b0" + integrity sha512-cl669nCJTZBsL97OF4kUQm5g5hC2uihk0NxY3WENAC0TYdILVkAyHymAntgxGkl7K+t0cXIrH5siy5S4XkFycA== aggregate-error@^4.0.0: version "4.0.1" @@ -2164,15 +2159,15 @@ braces@^3.0.2: dependencies: fill-range "^7.0.1" -browserslist@^4.21.10, browserslist@^4.22.2, browserslist@^4.23.0: - version "4.23.0" - resolved "https://registry.yarnpkg.com/browserslist/-/browserslist-4.23.0.tgz#8f3acc2bbe73af7213399430890f86c63a5674ab" - integrity sha512-QW8HiM1shhT2GuzkvklfjcKDiWFXHOeFCIA/huJPwHsslwcydgk7X+z2zXpEijP98UCY7HbubZt5J2Zgvf0CaQ== +browserslist@^4.22.2, browserslist@^4.23.0, browserslist@^4.24.0: + version "4.24.2" + resolved "https://registry.yarnpkg.com/browserslist/-/browserslist-4.24.2.tgz#f5845bc91069dbd55ee89faf9822e1d885d16580" + integrity sha512-ZIc+Q62revdMcqC6aChtW4jz3My3klmCO1fEmINZY/8J3EpBg5/A/D0AKmBveUh6pgoeycoMkVMko84tuYS+Gg== dependencies: - caniuse-lite "^1.0.30001587" - electron-to-chromium "^1.4.668" - node-releases "^2.0.14" - update-browserslist-db "^1.0.13" + caniuse-lite "^1.0.30001669" + electron-to-chromium "^1.5.41" + node-releases "^2.0.18" + update-browserslist-db "^1.1.1" bs-logger@0.x: version "0.2.6" @@ -2259,10 +2254,10 @@ camelcase@^6.2.0, camelcase@^6.3.0: resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-6.3.0.tgz#5685b95eb209ac9c0c177467778c9c84df58ba9a" integrity sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA== -caniuse-lite@^1.0.30001587: - version "1.0.30001605" - resolved "https://registry.yarnpkg.com/caniuse-lite/-/caniuse-lite-1.0.30001605.tgz#ca12d7330dd8bcb784557eb9aa64f0037870d9d6" - integrity sha512-nXwGlFWo34uliI9z3n6Qc0wZaf7zaZWA1CPZ169La5mV3I/gem7bst0vr5XQH5TJXZIMfDeZyOrZnSlVzKxxHQ== +caniuse-lite@^1.0.30001669: + version "1.0.30001677" + resolved "https://registry.yarnpkg.com/caniuse-lite/-/caniuse-lite-1.0.30001677.tgz#27c2e2c637e007cfa864a16f7dfe7cde66b38b5f" + integrity sha512-fmfjsOlJUpMWu+mAAtZZZHz7UEwsUxIIvu1TJfO1HqFQvB/B+ii0xr9B5HpbZY/mC4XZ8SvjHJqtAY6pDPQEog== chalk-template@^0.4.0: version "0.4.0" @@ -2831,10 +2826,10 @@ eastasianwidth@^0.2.0: resolved "https://registry.yarnpkg.com/eastasianwidth/-/eastasianwidth-0.2.0.tgz#696ce2ec0aa0e6ea93a397ffcf24aa7840c827cb" integrity sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA== -electron-to-chromium@^1.4.668: - version "1.4.724" - resolved "https://registry.yarnpkg.com/electron-to-chromium/-/electron-to-chromium-1.4.724.tgz#e0a86fe4d3d0e05a4d7b032549d79608078f830d" - integrity sha512-RTRvkmRkGhNBPPpdrgtDKvmOEYTrPlXDfc0J/Nfq5s29tEahAwhiX4mmhNzj6febWMleulxVYPh7QwCSL/EldA== +electron-to-chromium@^1.5.41: + version "1.5.50" + resolved "https://registry.yarnpkg.com/electron-to-chromium/-/electron-to-chromium-1.5.50.tgz#d9ba818da7b2b5ef1f3dd32bce7046feb7e93234" + integrity sha512-eMVObiUQ2LdgeO1F/ySTXsvqvxb6ZH2zPGaMYsWzRDdOddUa77tdmI0ltg+L16UpbWdhPmuF3wIQYyQq65WfZw== emittery@^0.13.1: version "0.13.1" @@ -2858,10 +2853,10 @@ end-of-stream@^1.0.0, end-of-stream@^1.1.0, end-of-stream@^1.4.4: dependencies: once "^1.4.0" -enhanced-resolve@^5.16.0: - version "5.16.0" - resolved "https://registry.yarnpkg.com/enhanced-resolve/-/enhanced-resolve-5.16.0.tgz#65ec88778083056cb32487faa9aef82ed0864787" - integrity sha512-O+QWCviPNSSLAD9Ucn8Awv+poAkqn3T1XY5/N7kR7rQO9yfSGWkYZDwpJ+iKF7B8rxaQKWngSqACpgzeapSyoA== +enhanced-resolve@^5.17.1: + version "5.17.1" + resolved "https://registry.yarnpkg.com/enhanced-resolve/-/enhanced-resolve-5.17.1.tgz#67bfbbcc2f81d511be77d686a90267ef7f898a15" + integrity sha512-LMHl3dXhTcfv8gM4kEzIUeTQ+7fpdA0l2tUf34BddXPkz2A5xJ5L/Pchd5BL6rdccM9QGvu0sWZzK1Z1t4wwyg== dependencies: graceful-fs "^4.2.4" tapable "^2.2.0" @@ -3008,6 +3003,11 @@ escalade@^3.1.1: resolved "https://registry.yarnpkg.com/escalade/-/escalade-3.1.2.tgz#54076e9ab29ea5bf3d8f1ed62acffbb88272df27" integrity sha512-ErCHMCae19vR8vQGe50xIsVomy19rg6gFu3+r3jkEO46suLMWBksvVyoGgQV+jOfl84ZSOSlmv6Gxa89PmTGmA== +escalade@^3.2.0: + version "3.2.0" + resolved "https://registry.yarnpkg.com/escalade/-/escalade-3.2.0.tgz#011a3f69856ba189dffa7dc8fcce99d2a87903e5" + integrity sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA== + escape-string-regexp@5.0.0: version "5.0.0" resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-5.0.0.tgz#4683126b500b61762f2dbebace1806e8be31b1c8" @@ -5413,10 +5413,10 @@ node-int64@^0.4.0: resolved "https://registry.yarnpkg.com/node-int64/-/node-int64-0.4.0.tgz#87a9065cdb355d3182d8f94ce11188b825c68a3b" integrity sha512-O5lz91xSOeoXP6DulyHfllpq+Eg00MWitZIbtPfoSEvqIHdl5gfcY6hYzDWnj0qD5tz52PI08u9qUvSVeUBeHw== -node-releases@^2.0.14: - version "2.0.14" - resolved "https://registry.yarnpkg.com/node-releases/-/node-releases-2.0.14.tgz#2ffb053bceb8b2be8495ece1ab6ce600c4461b0b" - integrity sha512-y10wOWt8yZpqXmOgRo77WaHEmhYQYGNA6y421PKsKYWEK8aW+cqAphborZDhqfyKrbZEN92CN1X2KbafY2s7Yw== +node-releases@^2.0.18: + version "2.0.18" + resolved "https://registry.yarnpkg.com/node-releases/-/node-releases-2.0.18.tgz#f010e8d35e2fe8d6b2944f03f70213ecedc4ca3f" + integrity sha512-d9VeXT4SJ7ZeOqGX6R5EM022wpL+eWPooLI+5UpWn2jCT1aosUQEhQP214x33Wkwx3JQMvIm+tIoVOdodFS40g== normalize-package-data@^2.3.2, normalize-package-data@^2.5.0: version "2.5.0" @@ -5747,6 +5747,11 @@ picocolors@^1.0.0: resolved "https://registry.yarnpkg.com/picocolors/-/picocolors-1.0.0.tgz#cb5bdc74ff3f51892236eaf79d68bc44564ab81c" integrity sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ== +picocolors@^1.1.0: + version "1.1.1" + resolved "https://registry.yarnpkg.com/picocolors/-/picocolors-1.1.1.tgz#3d321af3eab939b083c8f929a1d12cda81c26b6b" + integrity sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA== + picomatch@^2.0.4, picomatch@^2.2.3, picomatch@^2.3.1: version "2.3.1" resolved "https://registry.yarnpkg.com/picomatch/-/picomatch-2.3.1.tgz#3ba3833733646d9d3e4995946c1365a67fb07a42" @@ -7082,13 +7087,13 @@ upath@^1.1.1: resolved "https://registry.yarnpkg.com/upath/-/upath-1.2.0.tgz#8f66dbcd55a883acdae4408af8b035a5044c1894" integrity sha512-aZwGpamFO61g3OlfT7OQCHqhGnW43ieH9WZeP7QxN/G/jS4jfqUkZxoryvJgVPEcrl5NL/ggHsSmLMHuH64Lhg== -update-browserslist-db@^1.0.13: - version "1.0.13" - resolved "https://registry.yarnpkg.com/update-browserslist-db/-/update-browserslist-db-1.0.13.tgz#3c5e4f5c083661bd38ef64b6328c26ed6c8248c4" - integrity sha512-xebP81SNcPuNpPP3uzeW1NYXxI3rxyJzF3pD6sH4jE7o/IX+WtSpwnVU+qIsDPyk0d3hmFQ7mjqc6AtV604hbg== +update-browserslist-db@^1.1.1: + version "1.1.1" + resolved "https://registry.yarnpkg.com/update-browserslist-db/-/update-browserslist-db-1.1.1.tgz#80846fba1d79e82547fb661f8d141e0945755fe5" + integrity sha512-R8UzCaa9Az+38REPiJ1tXlImTJXlVfgHZsglwBD/k6nj76ctsH1E3q4doGrukiLQd3sGQYu56r5+lo5r94l29A== dependencies: - escalade "^3.1.1" - picocolors "^1.0.0" + escalade "^3.2.0" + picocolors "^1.1.0" uri-js@^4.2.2: version "4.4.1" @@ -7292,21 +7297,20 @@ webpack-stream@7.0.0: through "^2.3.8" vinyl "^2.2.1" -webpack@5.91.0: - version "5.91.0" - resolved "https://registry.yarnpkg.com/webpack/-/webpack-5.91.0.tgz#ffa92c1c618d18c878f06892bbdc3373c71a01d9" - integrity sha512-rzVwlLeBWHJbmgTC/8TvAcu5vpJNII+MelQpylD4jNERPwpBJOE2lEcko1zJX3QJeLjTTAnQxn/OJ8bjDzVQaw== +webpack@5.96.1: + version "5.96.1" + resolved "https://registry.yarnpkg.com/webpack/-/webpack-5.96.1.tgz#3676d1626d8312b6b10d0c18cc049fba7ac01f0c" + integrity sha512-l2LlBSvVZGhL4ZrPwyr8+37AunkcYj5qh8o6u2/2rzoPc8gxFJkLj1WxNgooi9pnoc06jh0BjuXnamM4qlujZA== dependencies: - "@types/eslint-scope" "^3.7.3" - "@types/estree" "^1.0.5" + "@types/eslint-scope" "^3.7.7" + "@types/estree" "^1.0.6" "@webassemblyjs/ast" "^1.12.1" "@webassemblyjs/wasm-edit" "^1.12.1" "@webassemblyjs/wasm-parser" "^1.12.1" - acorn "^8.7.1" - acorn-import-assertions "^1.9.0" - browserslist "^4.21.10" + acorn "^8.14.0" + browserslist "^4.24.0" chrome-trace-event "^1.0.2" - enhanced-resolve "^5.16.0" + enhanced-resolve "^5.17.1" es-module-lexer "^1.2.1" eslint-scope "5.1.1" events "^3.2.0" From a08037f33f2fe00763032623e18ba049d19a024f Mon Sep 17 00:00:00 2001 From: Hiroyuki Sato Date: Sun, 3 Nov 2024 11:01:49 +0900 Subject: [PATCH 35/59] GH-44618: [GLib] Add GArrowDecimal64Scalar (#44620) ### Rationale for this change The `arrow::Decimal64Scalar` has been released. GLib needs to implement `GArrowDecimal64Scalar`. ### What changes are included in this PR? Implement `GArrowDecimal64Scalar`. ### Are these changes tested? YES ### Are there any user-facing changes? NO * GitHub Issue: #44618 Authored-by: Hiroyuki Sato Signed-off-by: Sutou Kouhei --- c_glib/arrow-glib/scalar.cpp | 126 +++++++++++++++++++++++++++ c_glib/arrow-glib/scalar.h | 16 ++++ c_glib/test/test-decimal64-scalar.rb | 48 ++++++++++ 3 files changed, 190 insertions(+) create mode 100644 c_glib/test/test-decimal64-scalar.rb diff --git a/c_glib/arrow-glib/scalar.cpp b/c_glib/arrow-glib/scalar.cpp index f965b4970304b..57085a00c4b10 100644 --- a/c_glib/arrow-glib/scalar.cpp +++ b/c_glib/arrow-glib/scalar.cpp @@ -104,6 +104,8 @@ G_BEGIN_DECLS * #GArrowMonthDayNanoIntervalScalar is a class for the month day nano * intarval scalar. * + * #GArrowDecimal64Scalar is a class for a 64-bit decimal scalar. + * * #GArrowDecimal128Scalar is a class for a 128-bit decimal scalar. * * #GArrowDecimal256Scalar is a class for a 256-bit decimal scalar. @@ -1631,6 +1633,127 @@ garrow_month_day_nano_interval_scalar_get_value(GArrowMonthDayNanoIntervalScalar return priv->value; } +typedef struct GArrowDecimal64ScalarPrivate_ +{ + GArrowDecimal64 *value; +} GArrowDecimal64ScalarPrivate; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowDecimal64Scalar, + garrow_decimal64_scalar, + GARROW_TYPE_SCALAR) + +#define GARROW_DECIMAL64_SCALAR_GET_PRIVATE(obj) \ + static_cast( \ + garrow_decimal64_scalar_get_instance_private(GARROW_DECIMAL64_SCALAR(obj))) + +static void +garrow_decimal64_scalar_dispose(GObject *object) +{ + auto priv = GARROW_DECIMAL64_SCALAR_GET_PRIVATE(object); + + if (priv->value) { + g_object_unref(priv->value); + priv->value = NULL; + } + + G_OBJECT_CLASS(garrow_decimal64_scalar_parent_class)->dispose(object); +} + +static void +garrow_decimal64_scalar_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_DECIMAL64_SCALAR_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_VALUE: + priv->value = GARROW_DECIMAL64(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_decimal64_scalar_init(GArrowDecimal64Scalar *object) +{ +} + +static void +garrow_decimal64_scalar_class_init(GArrowDecimal64ScalarClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_decimal64_scalar_dispose; + gobject_class->set_property = garrow_decimal64_scalar_set_property; + + GParamSpec *spec; + /** + * GArrowDecimal64Scalar:value: + * + * The value of the scalar. + * + * Since: 19.0.0 + */ + spec = g_param_spec_object( + "value", + "Value", + "The value of the scalar", + garrow_decimal64_get_type(), + static_cast(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_VALUE, spec); +} + +/** + * garrow_decimal64_scalar_new: + * @data_type: A #GArrowDecimal64DataType for this scalar. + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowDecimal64Scalar. + * + * Since: 19.0.0 + */ +GArrowDecimal64Scalar * +garrow_decimal64_scalar_new(GArrowDecimal64DataType *data_type, GArrowDecimal64 *value) +{ + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto arrow_value = garrow_decimal64_get_raw(value); + auto arrow_scalar = std::static_pointer_cast( + std::make_shared(*arrow_value, arrow_data_type)); + return GARROW_DECIMAL64_SCALAR(garrow_scalar_new_raw(&arrow_scalar, + "scalar", + &arrow_scalar, + "data-type", + data_type, + "value", + value, + NULL)); +} + +/** + * garrow_decimal64_scalar_get_value: + * @scalar: A #GArrowDecimal64Scalar. + * + * Returns: (transfer none): The value of this scalar. + * + * Since: 19.0.0 + */ +GArrowDecimal64 * +garrow_decimal64_scalar_get_value(GArrowDecimal64Scalar *scalar) +{ + auto priv = GARROW_DECIMAL64_SCALAR_GET_PRIVATE(scalar); + if (!priv->value) { + auto arrow_scalar = std::static_pointer_cast( + garrow_scalar_get_raw(GARROW_SCALAR(scalar))); + auto arrow_value = std::make_shared(arrow_scalar->value); + priv->value = garrow_decimal64_new_raw(&arrow_value); + } + return priv->value; +} + typedef struct GArrowDecimal128ScalarPrivate_ { GArrowDecimal128 *value; @@ -2508,6 +2631,9 @@ garrow_scalar_new_raw_valist(std::shared_ptr *arrow_scalar, case arrow::Type::type::INTERVAL_MONTH_DAY_NANO: type = GARROW_TYPE_MONTH_DAY_NANO_INTERVAL_SCALAR; break; + case arrow::Type::type::DECIMAL64: + type = GARROW_TYPE_DECIMAL64_SCALAR; + break; case arrow::Type::type::DECIMAL128: type = GARROW_TYPE_DECIMAL128_SCALAR; break; diff --git a/c_glib/arrow-glib/scalar.h b/c_glib/arrow-glib/scalar.h index 5f9015d29c61c..c9de9958ad402 100644 --- a/c_glib/arrow-glib/scalar.h +++ b/c_glib/arrow-glib/scalar.h @@ -501,6 +501,22 @@ GARROW_AVAILABLE_IN_8_0 GArrowMonthDayNano * garrow_month_day_nano_interval_scalar_get_value(GArrowMonthDayNanoIntervalScalar *scalar); +#define GARROW_TYPE_DECIMAL64_SCALAR (garrow_decimal64_scalar_get_type()) +GARROW_AVAILABLE_IN_19_0 +G_DECLARE_DERIVABLE_TYPE( + GArrowDecimal64Scalar, garrow_decimal64_scalar, GARROW, DECIMAL64_SCALAR, GArrowScalar) +struct _GArrowDecimal64ScalarClass +{ + GArrowScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_19_0 +GArrowDecimal64Scalar * +garrow_decimal64_scalar_new(GArrowDecimal64DataType *data_type, GArrowDecimal64 *value); +GARROW_AVAILABLE_IN_19_0 +GArrowDecimal64 * +garrow_decimal64_scalar_get_value(GArrowDecimal64Scalar *scalar); + #define GARROW_TYPE_DECIMAL128_SCALAR (garrow_decimal128_scalar_get_type()) GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128Scalar, diff --git a/c_glib/test/test-decimal64-scalar.rb b/c_glib/test/test-decimal64-scalar.rb new file mode 100644 index 0000000000000..fb6a308b6d19f --- /dev/null +++ b/c_glib/test/test-decimal64-scalar.rb @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDecimal64Scalar < Test::Unit::TestCase + def setup + @data_type = Arrow::Decimal64DataType.new(8, 2) + @value = Arrow::Decimal64.new("23423445") + @scalar = Arrow::Decimal64Scalar.new(@data_type, @value) + end + + def test_data_type + assert_equal(@data_type, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::Decimal64Scalar.new(@data_type, @value), + @scalar) + end + + def test_to_s + assert_equal("234234.45", @scalar.to_s) + end + + def test_value + assert_equal(@value, @scalar.value) + end +end From 00e7c65e17f7d59f7c9954473b15b8ffae8dfd1a Mon Sep 17 00:00:00 2001 From: Hang Zheng <49890011+ripplehang@users.noreply.github.com> Date: Mon, 4 Nov 2024 02:21:50 +0800 Subject: [PATCH 36/59] GH-43535: [C++] Support the AWS S3 SSE-C encryption (#43601) ### Rationale for this change [server-side encryption with customer-provided keys](https://docs.aws.amazon.com/AmazonS3/latest/userguide/ServerSideEncryptionCustomerKeys.html) is an important security feature for aws s3, it's useful when user want to manager the encryption key themselves, say, they don't want the data to be exposed to the aws system admin, and ensure the object is safe even the ACCESS_KEY and SECRET_KEY is somehow leaked. Some comparison of S3 encryption options : https://www.linkedin.com/pulse/delusion-s3-encryption-benefits-ravi-ivaturi/ ### What changes are included in this PR? 1. Add the **sse_customer_key** member for S3Options to support [server-side encryption with customer-provided keys](https://docs.aws.amazon.com/AmazonS3/latest/userguide/ServerSideEncryptionCustomerKeys.html) (SSE-C keys). - The sse_customer_key was expected to be 256 bits (32 bytes) according to [aws doc](https://docs.aws.amazon.com/AmazonS3/latest/userguide/ServerSideEncryptionCustomerKeys.html#specifying-s3-c-encryption) - The sse_customer_key was expected to be the raw key rather than base64 encoded value, arrow would calculate the base64 and MD5 on the fly. - By default the sse_customer_key is empty, and when the sse_customer_key is empty, there is no impact on the existing workflow. When the sse_customer_key is configured, it would require the aws sdk version to newer than 1.9.201. 2. Add the **tls_ca_file_path**, **tls_ca_dir_path** and **tls_verify_certificates** members for S3Options. - the tls_ca_file_path, tls_ca_dir_path member for S3Options would override the value configured by arrow::fs::FileSystemGlobalOptions. - for s3, according to [aws sdk doc](https://docs.aws.amazon.com/sdk-for-cpp/v1/developer-guide/client-config.html), the tls_ca_file_path and tls_ca_dir_path only take effect in Linux, in order to support connect to the the storage server like minio with self-signed certificates on non-linux platform, we expose the tls_verify_certificates. 3. Refine the unit test to start the minio server with self-signed certificate on linux platform, so the unit test could cover the https case on linux, and http case on non-linux platform. ### Are these changes tested? Yes ### Are there any user-facing changes? Only additional members to S3Options. * GitHub Issue: #43535 Lead-authored-by: Hang Zheng Co-authored-by: Antoine Pitrou Signed-off-by: Antoine Pitrou --- cpp/src/arrow/filesystem/s3_internal.h | 87 ++++++++++ .../arrow/filesystem/s3_test_cert_internal.h | 77 +++++++++ cpp/src/arrow/filesystem/s3_test_util.cc | 84 ++++++++-- cpp/src/arrow/filesystem/s3_test_util.h | 11 +- cpp/src/arrow/filesystem/s3fs.cc | 69 ++++++-- cpp/src/arrow/filesystem/s3fs.h | 31 ++++ cpp/src/arrow/filesystem/s3fs_benchmark.cc | 4 +- cpp/src/arrow/filesystem/s3fs_test.cc | 150 +++++++++++++++++- cpp/src/arrow/testing/util.cc | 6 + cpp/src/arrow/testing/util.h | 4 + 10 files changed, 486 insertions(+), 37 deletions(-) create mode 100644 cpp/src/arrow/filesystem/s3_test_cert_internal.h diff --git a/cpp/src/arrow/filesystem/s3_internal.h b/cpp/src/arrow/filesystem/s3_internal.h index 54da3d5987e8a..772387e5fb66e 100644 --- a/cpp/src/arrow/filesystem/s3_internal.h +++ b/cpp/src/arrow/filesystem/s3_internal.h @@ -29,15 +29,38 @@ #include #include #include +#include #include #include "arrow/filesystem/filesystem.h" #include "arrow/filesystem/s3fs.h" #include "arrow/status.h" +#include "arrow/util/base64.h" #include "arrow/util/logging.h" #include "arrow/util/print.h" #include "arrow/util/string.h" +#ifndef ARROW_AWS_SDK_VERSION_CHECK +// AWS_SDK_VERSION_{MAJOR,MINOR,PATCH} are available since 1.9.7. +# if defined(AWS_SDK_VERSION_MAJOR) && defined(AWS_SDK_VERSION_MINOR) && \ + defined(AWS_SDK_VERSION_PATCH) +// Redundant "(...)" are for suppressing "Weird number of spaces at +// line-start. Are you using a 2-space indent? [whitespace/indent] +// [3]" errors... +# define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch) \ + ((AWS_SDK_VERSION_MAJOR > (major) || \ + (AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR > (minor)) || \ + ((AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR == (minor) && \ + AWS_SDK_VERSION_PATCH >= (patch))))) +# else +# define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch) 0 +# endif +#endif // !ARROW_AWS_SDK_VERSION_CHECK + +#if ARROW_AWS_SDK_VERSION_CHECK(1, 9, 201) +# define ARROW_S3_HAS_SSE_CUSTOMER_KEY +#endif + namespace arrow { namespace fs { namespace internal { @@ -291,6 +314,70 @@ class ConnectRetryStrategy : public Aws::Client::RetryStrategy { int32_t max_retry_duration_; }; +/// \brief calculate the MD5 of the input SSE-C key (raw key, not base64 encoded) +/// \param sse_customer_key is the input SSE-C key +/// \return the base64 encoded MD5 for the input key +inline Result CalculateSSECustomerKeyMD5( + const std::string& sse_customer_key) { + // The key needs to be 256 bits (32 bytes) according to + // https://docs.aws.amazon.com/AmazonS3/latest/userguide/ServerSideEncryptionCustomerKeys.html#specifying-s3-c-encryption + if (sse_customer_key.length() != 32) { + return Status::Invalid("32 bytes SSE-C key is expected"); + } + + // Convert the raw binary key to an Aws::String + Aws::String sse_customer_key_aws_string(sse_customer_key.data(), + sse_customer_key.length()); + + // Compute the MD5 hash of the raw binary key + Aws::Utils::ByteBuffer sse_customer_key_md5 = + Aws::Utils::HashingUtils::CalculateMD5(sse_customer_key_aws_string); + + // Base64-encode the MD5 hash + return arrow::util::base64_encode(std::string_view( + reinterpret_cast(sse_customer_key_md5.GetUnderlyingData()), + sse_customer_key_md5.GetLength())); +} + +struct SSECustomerKeyHeaders { + std::string sse_customer_key; + std::string sse_customer_key_md5; + std::string sse_customer_algorithm; +}; + +inline Result> GetSSECustomerKeyHeaders( + const std::string& sse_customer_key) { + if (sse_customer_key.empty()) { + return std::nullopt; + } +#ifdef ARROW_S3_HAS_SSE_CUSTOMER_KEY + ARROW_ASSIGN_OR_RAISE(auto md5, internal::CalculateSSECustomerKeyMD5(sse_customer_key)); + return SSECustomerKeyHeaders{arrow::util::base64_encode(sse_customer_key), md5, + "AES256"}; +#else + return Status::NotImplemented( + "SSE customer key not supported by this version of the AWS SDK"); +#endif +} + +template +Status SetSSECustomerKey(S3RequestType* request, const std::string& sse_customer_key) { + ARROW_ASSIGN_OR_RAISE(auto maybe_headers, GetSSECustomerKeyHeaders(sse_customer_key)); + if (!maybe_headers.has_value()) { + return Status::OK(); + } +#ifdef ARROW_S3_HAS_SSE_CUSTOMER_KEY + auto headers = std::move(maybe_headers).value(); + request->SetSSECustomerKey(headers.sse_customer_key); + request->SetSSECustomerKeyMD5(headers.sse_customer_key_md5); + request->SetSSECustomerAlgorithm(headers.sse_customer_algorithm); + return Status::OK(); +#else + return Status::NotImplemented( + "SSE customer key not supported by this version of the AWS SDK"); +#endif +} + } // namespace internal } // namespace fs } // namespace arrow diff --git a/cpp/src/arrow/filesystem/s3_test_cert_internal.h b/cpp/src/arrow/filesystem/s3_test_cert_internal.h new file mode 100644 index 0000000000000..0a69ade7d0e5c --- /dev/null +++ b/cpp/src/arrow/filesystem/s3_test_cert_internal.h @@ -0,0 +1,77 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +namespace arrow::fs { +// The below two static strings are generated according to +// https://github.com/minio/minio/tree/RELEASE.2024-09-22T00-33-43Z/docs/tls#323-generate-a-self-signed-certificate +// `openssl req -new -x509 -nodes -days 36500 -keyout private.key -out public.crt -config +// openssl.conf` +static constexpr const char* kMinioPrivateKey = R"(-----BEGIN PRIVATE KEY----- +MIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQCqwKYHsTSciGqP +uU3qkTWpnXIi3iC0eeW7JSzJHGFs880WdR5JdK4WufPK+1xzgiYjMEPfAcuSWz3b +qYyCI61q+a9Iu2nj7cFTW9bfZrmWlnI0YOLJc+q0AAdAjF1lvRKenH8tbjz/2jyl +i/cYQ+I5Tg4nngrX8OmOfluNzwD/nwGLq6/DVbzDUdPI9q1XtVT/0Vf7qwbDG1HD +NkIzKT5B+YdSLaOCRYNK3x7RPsfazKIBrTmRy1v454wKe8TjTmTB7+m5wKqfCJcq +lI253WHcK0lsw6zCNtX/kahPAvm/8mniPolW4qxoD6xwebgMVkrNTs3ztcPIG9O4 +pmCbATijAgMBAAECggEACL5swiAU7Z8etdVrZAOjl9f0LEzrp9JGLVst++50Hrwt +WGUO8/wBnjBPh6lvhoq3oT2rfBP/dLMva7w28cMZ8kxu6W6PcZiPOdGOI0qDXm69 +0mjTtDU3Y5hMxsVpUvhnp6+j45Otk/x89o1ATgHL59tTZjv1mjFABIf78DsVdgF9 +CMi2q6Lv7NLftieyWmz1K3p109z9+xkDNSOkVrv1JFChviKqWgIS0rdFjySvTgoy +rHYT+TweDliKJrZCeoUJmNB0uVW/dM9lXhcvkvkJZKPPurylx1oH5a7K/sWFPf7A +Ed1vjvZQFlaXu/bOUUSOZtkErAir/oCxrUDsHxGsAQKBgQDZghyy7jNGNdjZe1Xs +On1ZVgIS3Nt+OLGCVH7tTsfZsCOb+SkrhB1RQva3YzPMfgoZScI9+bN/pRVf49Pj +qGEHkW/wozutUve7UMzeTOm1aWxUuaKSrmYST7muvAnlYEtO7agd0wrcusYXlMoG +KQwghkufO9I7wXcrudMKXZalIwKBgQDI+FaUwhgfThkgq6bRbdMEeosgohrCM9Wm +E5JMePQq4VaGcgGveWUoNOgT8kvJa0qQwQOqLZj7kUIdj+SCRt0u+Wu3p5IMqdOq +6tMnLNQ3wzUC2KGFLSfISR3L/bo5Bo6Jqz4hVtjMk3PV9bu50MNTNaofYb2xlf/f +/WgiEG0WgQKBgAr8RVLMMQ7EvXUOg6Jwuc//Rg+J1BQl7OE2P0rhBbr66HGCPhAS +liB6j1dnzT/wxbXNQeA7clNqFRBIw3TmFjB5qfuvYt44KIbvZ8l6fPtKncwRrCJY +aJNYL3qhyKYrHOKZojoPZKcNT9/1BdcVz6T842jhbpbSCKDOu9f0Lh2dAoGATZeM +Hh0eISAPFY0QeDV1znnds3jC6g4HQ/q0dnAQnWmo9XmY6v3sr2xV2jWnSxnwjRjo +aFD4itBXfYBr0ly30wYbr6mz+s2q2oeVhL+LJAhrNDEdk4SOooaQSY0p1BCTAdYq +w8Z7J+kaRRZ+J0zRzROgHkOncKQgSYPWK6i55YECgYAC+ECrHhUlPsfusjKpFsEe +stW1HCt3wXtKQn6SJ6IAesbxwALZS6Da/ZC2x1mdBHS3GwWvtGLc0BPnPVfJjr9V +m82qkgJ+p5d7qp7pRA7SFD+5809yVqRnEF3rSLafgGet9ah0ZjZvQ3fwnYZNnNH9 +t9pJcv2E5xY7/nFNIorpKg== +-----END PRIVATE KEY----- +)"; + +static constexpr const char* kMinioCert = R"(-----BEGIN CERTIFICATE----- +MIIDiTCCAnGgAwIBAgIUXbHZ6FAhKSXg4WSGUQySlSyE4U0wDQYJKoZIhvcNAQEL +BQAwXzELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAlZBMQ4wDAYDVQQHDAVBcnJvdzEO +MAwGA1UECgwFQXJyb3cxDjAMBgNVBAsMBUFycm93MRMwEQYDVQQDDApBcnJyb3dU +ZXN0MB4XDTI0MDkyNDA5MzUxNloXDTM0MDkyMjA5MzUxNlowXzELMAkGA1UEBhMC +VVMxCzAJBgNVBAgMAlZBMQ4wDAYDVQQHDAVBcnJvdzEOMAwGA1UECgwFQXJyb3cx +DjAMBgNVBAsMBUFycm93MRMwEQYDVQQDDApBcnJyb3dUZXN0MIIBIjANBgkqhkiG +9w0BAQEFAAOCAQ8AMIIBCgKCAQEAqsCmB7E0nIhqj7lN6pE1qZ1yIt4gtHnluyUs +yRxhbPPNFnUeSXSuFrnzyvtcc4ImIzBD3wHLkls926mMgiOtavmvSLtp4+3BU1vW +32a5lpZyNGDiyXPqtAAHQIxdZb0Snpx/LW48/9o8pYv3GEPiOU4OJ54K1/Dpjn5b +jc8A/58Bi6uvw1W8w1HTyPatV7VU/9FX+6sGwxtRwzZCMyk+QfmHUi2jgkWDSt8e +0T7H2syiAa05kctb+OeMCnvE405kwe/pucCqnwiXKpSNud1h3CtJbMOswjbV/5Go +TwL5v/Jp4j6JVuKsaA+scHm4DFZKzU7N87XDyBvTuKZgmwE4owIDAQABoz0wOzAa +BgNVHREEEzARhwR/AAABgglsb2NhbGhvc3QwHQYDVR0OBBYEFOUNqUSfROf1dz3o +hAVBhgd3UIvKMA0GCSqGSIb3DQEBCwUAA4IBAQBSwWJ2dSw3jlHU0l2V3ozqthTt +XFo07AyWGw8AWNCM6mQ+GKBf0JJ1d7e4lyTf2lCobknS94EgGPORWeiucKYAoCjS +dh1eKGsSevz1rNbp7wsO7DoiRPciK+S95DbsPowloGI6fvOeE12Cf1udeNIpEYWs +OBFwN0HxfYqdPALCtw7l0icpTrJ2Us06UfL9kbkdZwQhXvOscG7JDRtNjBxl9XNm +TFeMNKROmrEPCWaYr6MJ+ItHtb5Cawapea4THz9GCjR9eLq2CbMqLezZ8xBHPzc4 +ixI2l0uCfg7ZUSA+90yaScc7bhEQ8CMiPtJgNKaKIqB58DpY7028xJpW7Ma2 +-----END CERTIFICATE----- +)"; +} // namespace arrow::fs diff --git a/cpp/src/arrow/filesystem/s3_test_util.cc b/cpp/src/arrow/filesystem/s3_test_util.cc index db0c60f2e80f2..0cfe038599cfe 100644 --- a/cpp/src/arrow/filesystem/s3_test_util.cc +++ b/cpp/src/arrow/filesystem/s3_test_util.cc @@ -19,6 +19,7 @@ # include #endif +#include "arrow/filesystem/s3_test_cert_internal.h" #include "arrow/filesystem/s3_test_util.h" #include "arrow/filesystem/s3fs.h" #include "arrow/testing/process.h" @@ -31,6 +32,11 @@ namespace arrow { namespace fs { +using ::arrow::internal::FileClose; +using ::arrow::internal::FileDescriptor; +using ::arrow::internal::FileOpenWritable; +using ::arrow::internal::FileWrite; +using ::arrow::internal::PlatformFilename; using ::arrow::internal::TemporaryDir; namespace { @@ -44,16 +50,16 @@ const char* kEnvConnectString = "ARROW_TEST_S3_CONNECT_STRING"; const char* kEnvAccessKey = "ARROW_TEST_S3_ACCESS_KEY"; const char* kEnvSecretKey = "ARROW_TEST_S3_SECRET_KEY"; -std::string GenerateConnectString() { return GetListenAddress(); } - } // namespace struct MinioTestServer::Impl { std::unique_ptr temp_dir_; + std::unique_ptr temp_dir_ca_; std::string connect_string_; std::string access_key_ = kMinioAccessKey; std::string secret_key_ = kMinioSecretKey; std::unique_ptr server_process_; + std::string scheme_ = "http"; }; MinioTestServer::MinioTestServer() : impl_(new Impl) {} @@ -69,7 +75,41 @@ std::string MinioTestServer::access_key() const { return impl_->access_key_; } std::string MinioTestServer::secret_key() const { return impl_->secret_key_; } -Status MinioTestServer::Start() { +std::string MinioTestServer::ca_dir_path() const { + return impl_->temp_dir_ca_->path().ToString(); +} + +std::string MinioTestServer::ca_file_path() const { + return impl_->temp_dir_ca_->path().ToString() + "/public.crt"; +} + +std::string MinioTestServer::scheme() const { return impl_->scheme_; } + +Status MinioTestServer::GenerateCertificateFile() { + // create the dedicated folder for certificate file, rather than reuse the data + // folder, since there is test case to check whether the folder is empty. + ARROW_ASSIGN_OR_RAISE(impl_->temp_dir_ca_, TemporaryDir::Make("s3fs-test-ca-")); + + ARROW_ASSIGN_OR_RAISE(auto public_crt_file, + PlatformFilename::FromString(ca_dir_path() + "/public.crt")); + ARROW_ASSIGN_OR_RAISE(auto public_cert_fd, FileOpenWritable(public_crt_file)); + ARROW_RETURN_NOT_OK(FileWrite(public_cert_fd.fd(), + reinterpret_cast(kMinioCert), + strlen(kMinioCert))); + ARROW_RETURN_NOT_OK(public_cert_fd.Close()); + + ARROW_ASSIGN_OR_RAISE(auto private_key_file, + PlatformFilename::FromString(ca_dir_path() + "/private.key")); + ARROW_ASSIGN_OR_RAISE(auto private_key_fd, FileOpenWritable(private_key_file)); + ARROW_RETURN_NOT_OK(FileWrite(private_key_fd.fd(), + reinterpret_cast(kMinioPrivateKey), + strlen(kMinioPrivateKey))); + ARROW_RETURN_NOT_OK(private_key_fd.Close()); + + return Status::OK(); +} + +Status MinioTestServer::Start(bool enable_tls) { const char* connect_str = std::getenv(kEnvConnectString); const char* access_key = std::getenv(kEnvAccessKey); const char* secret_key = std::getenv(kEnvSecretKey); @@ -88,12 +128,27 @@ Status MinioTestServer::Start() { impl_->server_process_->SetEnv("MINIO_SECRET_KEY", kMinioSecretKey); // Disable the embedded console (one less listening address to care about) impl_->server_process_->SetEnv("MINIO_BROWSER", "off"); - impl_->connect_string_ = GenerateConnectString(); - ARROW_RETURN_NOT_OK(impl_->server_process_->SetExecutable(kMinioExecutableName)); // NOTE: --quiet makes startup faster by suppressing remote version check - impl_->server_process_->SetArgs({"server", "--quiet", "--compat", "--address", - impl_->connect_string_, - impl_->temp_dir_->path().ToString()}); + std::vector minio_args({"server", "--quiet", "--compat"}); + if (enable_tls) { + ARROW_RETURN_NOT_OK(GenerateCertificateFile()); + minio_args.emplace_back("--certs-dir"); + minio_args.emplace_back(ca_dir_path()); + impl_->scheme_ = "https"; + // With TLS enabled, we need the connection hostname to match the certificate's + // subject name. This also constrains the actual listening IP address. + impl_->connect_string_ = GetListenAddress("localhost"); + } else { + // Without TLS enabled, we want to minimize the likelihood of address collisions + // by varying the listening IP address (note that most tests don't enable TLS). + impl_->connect_string_ = GetListenAddress(); + } + minio_args.emplace_back("--address"); + minio_args.emplace_back(impl_->connect_string_); + minio_args.emplace_back(impl_->temp_dir_->path().ToString()); + + ARROW_RETURN_NOT_OK(impl_->server_process_->SetExecutable(kMinioExecutableName)); + impl_->server_process_->SetArgs(minio_args); ARROW_RETURN_NOT_OK(impl_->server_process_->Execute()); return Status::OK(); } @@ -105,24 +160,29 @@ Status MinioTestServer::Stop() { struct MinioTestEnvironment::Impl { std::function>()> server_generator_; + bool enable_tls_; + + explicit Impl(bool enable_tls) : enable_tls_(enable_tls) {} Result> LaunchOneServer() { auto server = std::make_shared(); - RETURN_NOT_OK(server->Start()); + RETURN_NOT_OK(server->Start(enable_tls_)); return server; } }; -MinioTestEnvironment::MinioTestEnvironment() : impl_(new Impl) {} +MinioTestEnvironment::MinioTestEnvironment(bool enable_tls) + : impl_(new Impl(enable_tls)) {} MinioTestEnvironment::~MinioTestEnvironment() = default; void MinioTestEnvironment::SetUp() { auto pool = ::arrow::internal::GetCpuThreadPool(); - auto launch_one_server = []() -> Result> { + auto launch_one_server = + [enable_tls = impl_->enable_tls_]() -> Result> { auto server = std::make_shared(); - RETURN_NOT_OK(server->Start()); + RETURN_NOT_OK(server->Start(enable_tls)); return server; }; impl_->server_generator_ = [pool, launch_one_server]() { diff --git a/cpp/src/arrow/filesystem/s3_test_util.h b/cpp/src/arrow/filesystem/s3_test_util.h index e270a6e1c469a..0a89a7a9d5a15 100644 --- a/cpp/src/arrow/filesystem/s3_test_util.h +++ b/cpp/src/arrow/filesystem/s3_test_util.h @@ -40,7 +40,7 @@ class MinioTestServer { MinioTestServer(); ~MinioTestServer(); - Status Start(); + Status Start(bool enable_tls = false); Status Stop(); @@ -50,7 +50,14 @@ class MinioTestServer { std::string secret_key() const; + std::string ca_dir_path() const; + + std::string ca_file_path() const; + + std::string scheme() const; + private: + Status GenerateCertificateFile(); struct Impl; std::unique_ptr impl_; }; @@ -60,7 +67,7 @@ class MinioTestServer { class MinioTestEnvironment : public ::testing::Environment { public: - MinioTestEnvironment(); + explicit MinioTestEnvironment(bool enable_tls = false); ~MinioTestEnvironment(); void SetUp() override; diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index 13d6ead6ef686..ee47e1c702073 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -160,6 +160,7 @@ using internal::IsNotFound; using internal::OutcomeToResult; using internal::OutcomeToStatus; using internal::S3Backend; +using internal::SetSSECustomerKey; using internal::ToAwsString; using internal::ToURLEncodedAwsString; @@ -403,6 +404,13 @@ Result S3Options::FromUri(const Uri& uri, std::string* out_path) { } else if (kv.first == "allow_bucket_deletion") { ARROW_ASSIGN_OR_RAISE(options.allow_bucket_deletion, ::arrow::internal::ParseBoolean(kv.second)); + } else if (kv.first == "tls_ca_file_path") { + options.tls_ca_file_path = kv.second; + } else if (kv.first == "tls_ca_dir_path") { + options.tls_ca_dir_path = kv.second; + } else if (kv.first == "tls_verify_certificates") { + ARROW_ASSIGN_OR_RAISE(options.tls_verify_certificates, + ::arrow::internal::ParseBoolean(kv.second)); } else { return Status::Invalid("Unexpected query parameter in S3 URI: '", kv.first, "'"); } @@ -439,7 +447,11 @@ bool S3Options::Equals(const S3Options& other) const { background_writes == other.background_writes && allow_bucket_creation == other.allow_bucket_creation && allow_bucket_deletion == other.allow_bucket_deletion && - default_metadata_equals && GetAccessKey() == other.GetAccessKey() && + tls_ca_file_path == other.tls_ca_file_path && + tls_ca_dir_path == other.tls_ca_dir_path && + tls_verify_certificates == other.tls_verify_certificates && + sse_customer_key == other.sse_customer_key && default_metadata_equals && + GetAccessKey() == other.GetAccessKey() && GetSecretKey() == other.GetSecretKey() && GetSessionToken() == other.GetSessionToken()); } @@ -1125,12 +1137,17 @@ class ClientBuilder { } else { client_config_.retryStrategy = std::make_shared(); } - if (!internal::global_options.tls_ca_file_path.empty()) { + if (!options_.tls_ca_file_path.empty()) { + client_config_.caFile = ToAwsString(options_.tls_ca_file_path); + } else if (!internal::global_options.tls_ca_file_path.empty()) { client_config_.caFile = ToAwsString(internal::global_options.tls_ca_file_path); } - if (!internal::global_options.tls_ca_dir_path.empty()) { + if (!options_.tls_ca_dir_path.empty()) { + client_config_.caPath = ToAwsString(options_.tls_ca_dir_path); + } else if (!internal::global_options.tls_ca_dir_path.empty()) { client_config_.caPath = ToAwsString(internal::global_options.tls_ca_dir_path); } + client_config_.verifySSL = options_.tls_verify_certificates; // Set proxy options if provided if (!options_.proxy_options.scheme.empty()) { @@ -1292,11 +1309,14 @@ Aws::IOStreamFactory AwsWriteableStreamFactory(void* data, int64_t nbytes) { } Result GetObjectRange(Aws::S3::S3Client* client, - const S3Path& path, int64_t start, - int64_t length, void* out) { + const S3Path& path, + const std::string& sse_customer_key, + int64_t start, int64_t length, + void* out) { S3Model::GetObjectRequest req; req.SetBucket(ToAwsString(path.bucket)); req.SetKey(ToAwsString(path.key)); + RETURN_NOT_OK(SetSSECustomerKey(&req, sse_customer_key)); req.SetRange(ToAwsString(FormatRange(start, length))); req.SetResponseStreamFactory(AwsWriteableStreamFactory(out, length)); return OutcomeToResult("GetObject", client->GetObject(req)); @@ -1433,11 +1453,13 @@ bool IsDirectory(std::string_view key, const S3Model::HeadObjectResult& result) class ObjectInputFile final : public io::RandomAccessFile { public: ObjectInputFile(std::shared_ptr holder, const io::IOContext& io_context, - const S3Path& path, int64_t size = kNoSize) + const S3Path& path, int64_t size = kNoSize, + const std::string& sse_customer_key = "") : holder_(std::move(holder)), io_context_(io_context), path_(path), - content_length_(size) {} + content_length_(size), + sse_customer_key_(sse_customer_key) {} Status Init() { // Issue a HEAD Object to get the content-length and ensure any @@ -1450,6 +1472,7 @@ class ObjectInputFile final : public io::RandomAccessFile { S3Model::HeadObjectRequest req; req.SetBucket(ToAwsString(path_.bucket)); req.SetKey(ToAwsString(path_.key)); + RETURN_NOT_OK(SetSSECustomerKey(&req, sse_customer_key_)); ARROW_ASSIGN_OR_RAISE(auto client_lock, holder_->Lock()); auto outcome = client_lock.Move()->HeadObject(req); @@ -1534,9 +1557,9 @@ class ObjectInputFile final : public io::RandomAccessFile { // Read the desired range of bytes ARROW_ASSIGN_OR_RAISE(auto client_lock, holder_->Lock()); - ARROW_ASSIGN_OR_RAISE( - S3Model::GetObjectResult result, - GetObjectRange(client_lock.get(), path_, position, nbytes, out)); + ARROW_ASSIGN_OR_RAISE(S3Model::GetObjectResult result, + GetObjectRange(client_lock.get(), path_, sse_customer_key_, + position, nbytes, out)); auto& stream = result.GetBody(); stream.ignore(nbytes); @@ -1584,6 +1607,7 @@ class ObjectInputFile final : public io::RandomAccessFile { int64_t pos_ = 0; int64_t content_length_ = kNoSize; std::shared_ptr metadata_; + std::string sse_customer_key_; }; // Upload size per part. While AWS and Minio support different sizes for each @@ -1620,7 +1644,8 @@ class ObjectOutputStream final : public io::OutputStream { metadata_(metadata), default_metadata_(options.default_metadata), background_writes_(options.background_writes), - allow_delayed_open_(options.allow_delayed_open) {} + allow_delayed_open_(options.allow_delayed_open), + sse_customer_key_(options.sse_customer_key) {} ~ObjectOutputStream() override { // For compliance with the rest of the IO stack, Close rather than Abort, @@ -1668,6 +1693,7 @@ class ObjectOutputStream final : public io::OutputStream { S3Model::CreateMultipartUploadRequest req; req.SetBucket(ToAwsString(path_.bucket)); req.SetKey(ToAwsString(path_.key)); + RETURN_NOT_OK(SetSSECustomerKey(&req, sse_customer_key_)); RETURN_NOT_OK(SetMetadataInRequest(&req)); auto outcome = client_lock.Move()->CreateMultipartUpload(req); @@ -1771,6 +1797,7 @@ class ObjectOutputStream final : public io::OutputStream { req.SetKey(ToAwsString(path_.key)); req.SetUploadId(multipart_upload_id_); req.SetMultipartUpload(std::move(completed_upload)); + RETURN_NOT_OK(SetSSECustomerKey(&req, sse_customer_key_)); auto outcome = client_lock.Move()->CompleteMultipartUploadWithErrorFixup(std::move(req)); @@ -1958,6 +1985,7 @@ class ObjectOutputStream final : public io::OutputStream { req.SetKey(ToAwsString(path_.key)); req.SetBody(std::make_shared(data, nbytes)); req.SetContentLength(nbytes); + RETURN_NOT_OK(SetSSECustomerKey(&req, sse_customer_key_)); if (!background_writes_) { req.SetBody(std::make_shared(data, nbytes)); @@ -2171,6 +2199,7 @@ class ObjectOutputStream final : public io::OutputStream { Future<> pending_uploads_completed = Future<>::MakeFinished(Status::OK()); }; std::shared_ptr upload_state_; + std::string sse_customer_key_; }; // This function assumes info->path() is already set @@ -2339,6 +2368,17 @@ class S3FileSystem::Impl : public std::enable_shared_from_this(holder_, fs->io_context(), path); + auto ptr = std::make_shared(holder_, fs->io_context(), path, kNoSize, + fs->options().sse_customer_key); RETURN_NOT_OK(ptr->Init()); return ptr; } @@ -3002,8 +3043,8 @@ class S3FileSystem::Impl : public std::enable_shared_from_this(holder_, fs->io_context(), path, info.size()); + auto ptr = std::make_shared( + holder_, fs->io_context(), path, info.size(), fs->options().sse_customer_key); RETURN_NOT_OK(ptr->Init()); return ptr; } diff --git a/cpp/src/arrow/filesystem/s3fs.h b/cpp/src/arrow/filesystem/s3fs.h index 85d5ff8fed553..ac6342f26a304 100644 --- a/cpp/src/arrow/filesystem/s3fs.h +++ b/cpp/src/arrow/filesystem/s3fs.h @@ -196,6 +196,37 @@ struct ARROW_EXPORT S3Options { /// delay between retries. std::shared_ptr retry_strategy; + /// Optional customer-provided key for server-side encryption (SSE-C). + /// + /// This should be the 32-byte AES-256 key, unencoded. + std::string sse_customer_key; + + /// Optional path to a single PEM file holding all TLS CA certificates + /// + /// If empty, global filesystem options will be used (see FileSystemGlobalOptions); + /// if the corresponding global filesystem option is also empty, the underlying + /// TLS library's defaults will be used. + /// + /// Note this option may be ignored on some systems (Windows, macOS). + std::string tls_ca_file_path; + + /// Optional path to a directory holding TLS CA + /// + /// The given directory should contain CA certificates as individual PEM files + /// named along the OpenSSL "hashed" format. + /// + /// If empty, global filesystem options will be used (see FileSystemGlobalOptions); + /// if the corresponding global filesystem option is also empty, the underlying + /// TLS library's defaults will be used. + /// + /// Note this option may be ignored on some systems (Windows, macOS). + std::string tls_ca_dir_path; + + /// Whether to verify the S3 endpoint's TLS certificate + /// + /// This option applies if the scheme is "https". + bool tls_verify_certificates = true; + S3Options(); /// Configure with the default AWS credentials provider chain. diff --git a/cpp/src/arrow/filesystem/s3fs_benchmark.cc b/cpp/src/arrow/filesystem/s3fs_benchmark.cc index 212164296398b..b7b6dda64192a 100644 --- a/cpp/src/arrow/filesystem/s3fs_benchmark.cc +++ b/cpp/src/arrow/filesystem/s3fs_benchmark.cc @@ -61,7 +61,7 @@ class MinioFixture : public benchmark::Fixture { public: void SetUp(const ::benchmark::State& state) override { minio_.reset(new MinioTestServer()); - ASSERT_OK(minio_->Start()); + ASSERT_OK(minio_->Start(/*enable_tls=*/false)); const char* region_str = std::getenv(kEnvAwsRegion); if (region_str) { @@ -110,7 +110,7 @@ class MinioFixture : public benchmark::Fixture { void MakeFileSystem() { options_.ConfigureAccessKey(minio_->access_key(), minio_->secret_key()); - options_.scheme = "http"; + options_.scheme = minio_->scheme(); if (!region_.empty()) { options_.region = region_; } diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc index 43091aaa986d9..3082ecb7843b8 100644 --- a/cpp/src/arrow/filesystem/s3fs_test.cc +++ b/cpp/src/arrow/filesystem/s3fs_test.cc @@ -71,6 +71,12 @@ #include "arrow/util/range.h" #include "arrow/util/string.h" +// TLS tests require the ability to set a custom CA file when initiating S3 client +// connections, which the AWS SDK currently only supports on Linux. +#if defined(__linux__) +# define ENABLE_TLS_TESTS +#endif // Linux + namespace arrow { namespace fs { @@ -80,6 +86,7 @@ using ::arrow::internal::ToChars; using ::arrow::internal::Zip; using ::arrow::util::UriEscape; +using ::arrow::fs::internal::CalculateSSECustomerKeyMD5; using ::arrow::fs::internal::ConnectRetryStrategy; using ::arrow::fs::internal::ErrorToStatus; using ::arrow::fs::internal::OutcomeToStatus; @@ -94,8 +101,15 @@ ::testing::Environment* s3_env = ::testing::AddGlobalTestEnvironment(new S3Envir ::testing::Environment* minio_env = ::testing::AddGlobalTestEnvironment(new MinioTestEnvironment); -MinioTestEnvironment* GetMinioEnv() { - return ::arrow::internal::checked_cast(minio_env); +::testing::Environment* minio_env_https = + ::testing::AddGlobalTestEnvironment(new MinioTestEnvironment(/*enable_tls=*/true)); + +MinioTestEnvironment* GetMinioEnv(bool enable_tls) { + if (enable_tls) { + return ::arrow::internal::checked_cast(minio_env_https); + } else { + return ::arrow::internal::checked_cast(minio_env); + } } class ShortRetryStrategy : public S3RetryStrategy { @@ -202,10 +216,15 @@ class S3TestMixin : public AwsTestMixin { protected: Status InitServerAndClient() { - ARROW_ASSIGN_OR_RAISE(minio_, GetMinioEnv()->GetOneServer()); + ARROW_ASSIGN_OR_RAISE(minio_, GetMinioEnv(enable_tls_)->GetOneServer()); client_config_.reset(new Aws::Client::ClientConfiguration()); client_config_->endpointOverride = ToAwsString(minio_->connect_string()); - client_config_->scheme = Aws::Http::Scheme::HTTP; + if (minio_->scheme() == "https") { + client_config_->scheme = Aws::Http::Scheme::HTTPS; + client_config_->caFile = ToAwsString(minio_->ca_file_path()); + } else { + client_config_->scheme = Aws::Http::Scheme::HTTP; + } client_config_->retryStrategy = std::make_shared(kRetryInterval, kMaxRetryDuration); credentials_ = {ToAwsString(minio_->access_key()), ToAwsString(minio_->secret_key())}; @@ -224,6 +243,11 @@ class S3TestMixin : public AwsTestMixin { std::unique_ptr client_config_; Aws::Auth::AWSCredentials credentials_; std::unique_ptr client_; + // Use plain HTTP by default, as this allows us to listen on different loopback + // addresses and thus minimize the risk of address reuse (HTTPS requires the + // hostname to match the certificate's subject name, constraining us to a + // single loopback address). + bool enable_tls_ = false; }; void AssertGetObject(Aws::S3::Model::GetObjectResult& result, @@ -249,6 +273,27 @@ void AssertObjectContents(Aws::S3::S3Client* client, const std::string& bucket, AssertGetObject(result, expected); } +//////////////////////////////////////////////////////////////////////////// +// Misc tests + +class InternalsTest : public AwsTestMixin {}; + +TEST_F(InternalsTest, CalculateSSECustomerKeyMD5) { + ASSERT_RAISES(Invalid, CalculateSSECustomerKeyMD5("")); // invalid length + ASSERT_RAISES(Invalid, + CalculateSSECustomerKeyMD5( + "1234567890123456789012345678901234567890")); // invalid length + // valid case, with some non-ASCII character and a null byte in the sse_customer_key + char sse_customer_key[32] = {}; + sse_customer_key[0] = '\x40'; // '@' character + sse_customer_key[1] = '\0'; // null byte + sse_customer_key[2] = '\xFF'; // non-ASCII + sse_customer_key[31] = '\xFA'; // non-ASCII + std::string sse_customer_key_string(sse_customer_key, sizeof(sse_customer_key)); + ASSERT_OK_AND_ASSIGN(auto md5, CalculateSSECustomerKeyMD5(sse_customer_key_string)) + ASSERT_EQ(md5, "97FTa6lj0hE7lshKdBy61g=="); // valid case +} + //////////////////////////////////////////////////////////////////////////// // S3Options tests @@ -300,6 +345,17 @@ TEST_F(S3OptionsTest, FromUri) { ASSERT_EQ(options.scheme, "http"); ASSERT_EQ(options.endpoint_override, "localhost"); ASSERT_EQ(path, "mybucket/foo/bar"); + ASSERT_EQ(options.tls_verify_certificates, true); + + // Explicit tls related configuration + ASSERT_OK_AND_ASSIGN( + options, + S3Options::FromUri("s3://mybucket/foo/bar/?tls_ca_dir_path=/test&tls_ca_file_path=/" + "test/test.pem&tls_verify_certificates=false", + &path)); + ASSERT_EQ(options.tls_ca_dir_path, "/test"); + ASSERT_EQ(options.tls_ca_file_path, "/test/test.pem"); + ASSERT_EQ(options.tls_verify_certificates, false); // Missing bucket name ASSERT_RAISES(Invalid, S3Options::FromUri("s3:///foo/bar/", &path)); @@ -443,6 +499,9 @@ class TestS3FS : public S3TestMixin { // Most tests will create buckets options_.allow_bucket_creation = true; options_.allow_bucket_deletion = true; + if (enable_tls_) { + options_.tls_ca_file_path = minio_->ca_file_path(); + } MakeFileSystem(); // Set up test bucket { @@ -532,7 +591,7 @@ class TestS3FS : public S3TestMixin { Result> MakeNewFileSystem( io::IOContext io_context = io::default_io_context()) { options_.ConfigureAccessKey(minio_->access_key(), minio_->secret_key()); - options_.scheme = "http"; + options_.scheme = minio_->scheme(); options_.endpoint_override = minio_->connect_string(); if (!options_.retry_strategy) { options_.retry_strategy = std::make_shared(); @@ -1298,6 +1357,82 @@ TEST_F(TestS3FS, OpenInputFile) { ASSERT_RAISES(IOError, file->Seek(10)); } +// Minio only allows Server Side Encryption on HTTPS client connections. +#ifdef ENABLE_TLS_TESTS +class TestS3FSHTTPS : public TestS3FS { + public: + void SetUp() override { + enable_tls_ = true; + TestS3FS::SetUp(); + } +}; + +TEST_F(TestS3FSHTTPS, SSECustomerKeyMatch) { + // normal write/read with correct SSE-C key + std::shared_ptr stream; + options_.sse_customer_key = "12345678123456781234567812345678"; + for (const auto& allow_delayed_open : {false, true}) { + ARROW_SCOPED_TRACE("allow_delayed_open = ", allow_delayed_open); + options_.allow_delayed_open = allow_delayed_open; + MakeFileSystem(); + ASSERT_OK_AND_ASSIGN(stream, fs_->OpenOutputStream("bucket/newfile_with_sse_c")); + ASSERT_OK(stream->Write("some")); + ASSERT_OK(stream->Close()); + ASSERT_OK_AND_ASSIGN(auto file, fs_->OpenInputFile("bucket/newfile_with_sse_c")); + ASSERT_OK_AND_ASSIGN(auto buf, file->Read(5)); + AssertBufferEqual(*buf, "some"); + ASSERT_OK(RestoreTestBucket()); + } +} + +TEST_F(TestS3FSHTTPS, SSECustomerKeyMismatch) { + std::shared_ptr stream; + for (const auto& allow_delayed_open : {false, true}) { + ARROW_SCOPED_TRACE("allow_delayed_open = ", allow_delayed_open); + options_.allow_delayed_open = allow_delayed_open; + options_.sse_customer_key = "12345678123456781234567812345678"; + MakeFileSystem(); + ASSERT_OK_AND_ASSIGN(stream, fs_->OpenOutputStream("bucket/newfile_with_sse_c")); + ASSERT_OK(stream->Write("some")); + ASSERT_OK(stream->Close()); + options_.sse_customer_key = "87654321876543218765432187654321"; + MakeFileSystem(); + ASSERT_RAISES(IOError, fs_->OpenInputFile("bucket/newfile_with_sse_c")); + ASSERT_OK(RestoreTestBucket()); + } +} + +TEST_F(TestS3FSHTTPS, SSECustomerKeyMissing) { + std::shared_ptr stream; + for (const auto& allow_delayed_open : {false, true}) { + ARROW_SCOPED_TRACE("allow_delayed_open = ", allow_delayed_open); + options_.allow_delayed_open = allow_delayed_open; + options_.sse_customer_key = "12345678123456781234567812345678"; + MakeFileSystem(); + ASSERT_OK_AND_ASSIGN(stream, fs_->OpenOutputStream("bucket/newfile_with_sse_c")); + ASSERT_OK(stream->Write("some")); + ASSERT_OK(stream->Close()); + + options_.sse_customer_key = {}; + MakeFileSystem(); + ASSERT_RAISES(IOError, fs_->OpenInputFile("bucket/newfile_with_sse_c")); + ASSERT_OK(RestoreTestBucket()); + } +} + +TEST_F(TestS3FSHTTPS, SSECustomerKeyCopyFile) { + ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenOutputStream("bucket/newfile_with_sse_c")); + ASSERT_OK(stream->Write("some")); + ASSERT_OK(stream->Close()); + ASSERT_OK(fs_->CopyFile("bucket/newfile_with_sse_c", "bucket/copied_with_sse_c")); + + ASSERT_OK_AND_ASSIGN(auto file, fs_->OpenInputFile("bucket/copied_with_sse_c")); + ASSERT_OK_AND_ASSIGN(auto buf, file->Read(5)); + AssertBufferEqual(*buf, "some"); + ASSERT_OK(RestoreTestBucket()); +} +#endif // ENABLE_TLS_TESTS + struct S3OptionsTestParameters { bool background_writes{false}; bool allow_delayed_open{false}; @@ -1420,7 +1555,8 @@ TEST_F(TestS3FS, FileSystemFromUri) { std::stringstream ss; ss << "s3://" << minio_->access_key() << ":" << minio_->secret_key() << "@bucket/somedir/subdir/subfile" - << "?scheme=http&endpoint_override=" << UriEscape(minio_->connect_string()); + << "?scheme=" << minio_->scheme() + << "&endpoint_override=" << UriEscape(minio_->connect_string()); std::string path; ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFromUri(ss.str(), &path)); @@ -1522,7 +1658,7 @@ class TestS3FSGeneric : public S3TestMixin, public GenericFileSystemTest { } options_.ConfigureAccessKey(minio_->access_key(), minio_->secret_key()); - options_.scheme = "http"; + options_.scheme = minio_->scheme(); options_.endpoint_override = minio_->connect_string(); options_.retry_strategy = std::make_shared(); ASSERT_OK_AND_ASSIGN(s3fs_, S3FileSystem::Make(options_)); diff --git a/cpp/src/arrow/testing/util.cc b/cpp/src/arrow/testing/util.cc index 7bef9f7d4756d..e5e53801df949 100644 --- a/cpp/src/arrow/testing/util.cc +++ b/cpp/src/arrow/testing/util.cc @@ -206,6 +206,12 @@ std::string GetListenAddress() { return ss.str(); } +std::string GetListenAddress(const std::string& host) { + std::stringstream ss; + ss << host << ":" << GetListenPort(); + return ss.str(); +} + const std::vector>& all_dictionary_index_types() { static std::vector> types = { int8(), uint8(), int16(), uint16(), int32(), uint32(), int64(), uint64()}; diff --git a/cpp/src/arrow/testing/util.h b/cpp/src/arrow/testing/util.h index b4b2785a36292..8cc28a8b073a4 100644 --- a/cpp/src/arrow/testing/util.h +++ b/cpp/src/arrow/testing/util.h @@ -128,6 +128,10 @@ ARROW_TESTING_EXPORT int GetListenPort(); // port conflicts. ARROW_TESTING_EXPORT std::string GetListenAddress(); +// Get a "host:port" to listen on. Compared to GetListenAddress(), this function would use +// the host passed in. +ARROW_TESTING_EXPORT std::string GetListenAddress(const std::string& host); + ARROW_TESTING_EXPORT const std::vector>& all_dictionary_index_types(); From b6e877208f575df53a424d449b4c04fa2f2e81a5 Mon Sep 17 00:00:00 2001 From: Hiroyuki Sato Date: Tue, 5 Nov 2024 06:28:18 +0900 Subject: [PATCH 37/59] GH-44619: [GLib] Add GArrowDecimal32Scalar (#44628) ### Rationale for this change The `arrow::Decimal32Scalar` has been released. GLib needs to implement `GArrowDecimal32Scalar`. ### What changes are included in this PR? Implement `GArrowDecimal32Scalar`. ### Are these changes tested? YES ### Are there any user-facing changes? NO * GitHub Issue: #44619 Lead-authored-by: Hiroyuki Sato Co-authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- c_glib/arrow-glib/scalar.cpp | 126 +++++++++++++++++++++++++++ c_glib/arrow-glib/scalar.h | 16 ++++ c_glib/test/test-decimal32-scalar.rb | 48 ++++++++++ 3 files changed, 190 insertions(+) create mode 100644 c_glib/test/test-decimal32-scalar.rb diff --git a/c_glib/arrow-glib/scalar.cpp b/c_glib/arrow-glib/scalar.cpp index 57085a00c4b10..f2093e3e41ae2 100644 --- a/c_glib/arrow-glib/scalar.cpp +++ b/c_glib/arrow-glib/scalar.cpp @@ -104,6 +104,8 @@ G_BEGIN_DECLS * #GArrowMonthDayNanoIntervalScalar is a class for the month day nano * intarval scalar. * + * #GArrowDecimal32Scalar is a class for a 32-bit decimal scalar. + * * #GArrowDecimal64Scalar is a class for a 64-bit decimal scalar. * * #GArrowDecimal128Scalar is a class for a 128-bit decimal scalar. @@ -1633,6 +1635,127 @@ garrow_month_day_nano_interval_scalar_get_value(GArrowMonthDayNanoIntervalScalar return priv->value; } +typedef struct GArrowDecimal32ScalarPrivate_ +{ + GArrowDecimal32 *value; +} GArrowDecimal32ScalarPrivate; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowDecimal32Scalar, + garrow_decimal32_scalar, + GARROW_TYPE_SCALAR) + +#define GARROW_DECIMAL32_SCALAR_GET_PRIVATE(obj) \ + static_cast( \ + garrow_decimal32_scalar_get_instance_private(GARROW_DECIMAL32_SCALAR(obj))) + +static void +garrow_decimal32_scalar_dispose(GObject *object) +{ + auto priv = GARROW_DECIMAL32_SCALAR_GET_PRIVATE(object); + + if (priv->value) { + g_object_unref(priv->value); + priv->value = NULL; + } + + G_OBJECT_CLASS(garrow_decimal32_scalar_parent_class)->dispose(object); +} + +static void +garrow_decimal32_scalar_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_DECIMAL32_SCALAR_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_VALUE: + priv->value = GARROW_DECIMAL32(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_decimal32_scalar_init(GArrowDecimal32Scalar *object) +{ +} + +static void +garrow_decimal32_scalar_class_init(GArrowDecimal32ScalarClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_decimal32_scalar_dispose; + gobject_class->set_property = garrow_decimal32_scalar_set_property; + + GParamSpec *spec; + /** + * GArrowDecimal32Scalar:value: + * + * The value of the scalar. + * + * Since: 19.0.0 + */ + spec = g_param_spec_object( + "value", + "Value", + "The value of the scalar", + garrow_decimal32_get_type(), + static_cast(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_VALUE, spec); +} + +/** + * garrow_decimal32_scalar_new: + * @data_type: A #GArrowDecimal32DataType for this scalar. + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowDecimal32Scalar. + * + * Since: 19.0.0 + */ +GArrowDecimal32Scalar * +garrow_decimal32_scalar_new(GArrowDecimal32DataType *data_type, GArrowDecimal32 *value) +{ + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto arrow_value = garrow_decimal32_get_raw(value); + auto arrow_scalar = std::static_pointer_cast( + std::make_shared(*arrow_value, arrow_data_type)); + return GARROW_DECIMAL32_SCALAR(garrow_scalar_new_raw(&arrow_scalar, + "scalar", + &arrow_scalar, + "data-type", + data_type, + "value", + value, + NULL)); +} + +/** + * garrow_decimal32_scalar_get_value: + * @scalar: A #GArrowDecimal32Scalar. + * + * Returns: (transfer none): The value of this scalar. + * + * Since: 19.0.0 + */ +GArrowDecimal32 * +garrow_decimal32_scalar_get_value(GArrowDecimal32Scalar *scalar) +{ + auto priv = GARROW_DECIMAL32_SCALAR_GET_PRIVATE(scalar); + if (!priv->value) { + auto arrow_scalar = std::static_pointer_cast( + garrow_scalar_get_raw(GARROW_SCALAR(scalar))); + auto arrow_value = std::make_shared(arrow_scalar->value); + priv->value = garrow_decimal32_new_raw(&arrow_value); + } + return priv->value; +} + typedef struct GArrowDecimal64ScalarPrivate_ { GArrowDecimal64 *value; @@ -2631,6 +2754,9 @@ garrow_scalar_new_raw_valist(std::shared_ptr *arrow_scalar, case arrow::Type::type::INTERVAL_MONTH_DAY_NANO: type = GARROW_TYPE_MONTH_DAY_NANO_INTERVAL_SCALAR; break; + case arrow::Type::type::DECIMAL32: + type = GARROW_TYPE_DECIMAL32_SCALAR; + break; case arrow::Type::type::DECIMAL64: type = GARROW_TYPE_DECIMAL64_SCALAR; break; diff --git a/c_glib/arrow-glib/scalar.h b/c_glib/arrow-glib/scalar.h index c9de9958ad402..4f2c44199f43b 100644 --- a/c_glib/arrow-glib/scalar.h +++ b/c_glib/arrow-glib/scalar.h @@ -501,6 +501,22 @@ GARROW_AVAILABLE_IN_8_0 GArrowMonthDayNano * garrow_month_day_nano_interval_scalar_get_value(GArrowMonthDayNanoIntervalScalar *scalar); +#define GARROW_TYPE_DECIMAL32_SCALAR (garrow_decimal32_scalar_get_type()) +GARROW_AVAILABLE_IN_19_0 +G_DECLARE_DERIVABLE_TYPE( + GArrowDecimal32Scalar, garrow_decimal32_scalar, GARROW, DECIMAL32_SCALAR, GArrowScalar) +struct _GArrowDecimal32ScalarClass +{ + GArrowScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_19_0 +GArrowDecimal32Scalar * +garrow_decimal32_scalar_new(GArrowDecimal32DataType *data_type, GArrowDecimal32 *value); +GARROW_AVAILABLE_IN_19_0 +GArrowDecimal32 * +garrow_decimal32_scalar_get_value(GArrowDecimal32Scalar *scalar); + #define GARROW_TYPE_DECIMAL64_SCALAR (garrow_decimal64_scalar_get_type()) GARROW_AVAILABLE_IN_19_0 G_DECLARE_DERIVABLE_TYPE( diff --git a/c_glib/test/test-decimal32-scalar.rb b/c_glib/test/test-decimal32-scalar.rb new file mode 100644 index 0000000000000..cb54a6bb6a1e5 --- /dev/null +++ b/c_glib/test/test-decimal32-scalar.rb @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDecimal32Scalar < Test::Unit::TestCase + def setup + @data_type = Arrow::Decimal32DataType.new(8, 2) + @value = Arrow::Decimal32.new("23423445") + @scalar = Arrow::Decimal32Scalar.new(@data_type, @value) + end + + def test_data_type + assert_equal(@data_type, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::Decimal32Scalar.new(@data_type, @value), + @scalar) + end + + def test_to_s + assert_equal("234234.45", @scalar.to_s) + end + + def test_value + assert_equal(@value, @scalar.value) + end +end From dcd0ad4cd0670ae3f39b89ab40d3cc81122ffabd Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 5 Nov 2024 06:33:28 +0900 Subject: [PATCH 38/59] GH-44624: [CI][JS] Increase "AMD64 macOS 13 NodeJS 18" timeout (#44625) ### Rationale for this change It took about 25m when it succeeded. We need to increase timeout for stable CI. ### What changes are included in this PR? Increase timeout to 45m from 30m. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #44624 Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- .github/workflows/js.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/js.yml b/.github/workflows/js.yml index 810c154aa9c27..5ef5b37c98815 100644 --- a/.github/workflows/js.yml +++ b/.github/workflows/js.yml @@ -89,7 +89,7 @@ jobs: name: AMD64 macOS 13 NodeJS ${{ matrix.node }} runs-on: macos-13 if: ${{ !contains(github.event.pull_request.title, 'WIP') }} - timeout-minutes: 30 + timeout-minutes: 45 strategy: fail-fast: false matrix: From 13f4a1dd84f424b5b558434e6cd3cdf07a114334 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 5 Nov 2024 08:48:57 +0900 Subject: [PATCH 39/59] MINOR: [Java] Bump checker.framework.version from 3.48.1 to 3.48.2 in /java (#44635) Bumps `checker.framework.version` from 3.48.1 to 3.48.2. Updates `org.checkerframework:checker-qual` from 3.48.1 to 3.48.2
Changelog

Sourced from org.checkerframework:checker-qual's changelog.

Version 3.48.2 (November 1, 2024)

Closed issues:

#6371, #6867.

Commits
  • 59f4594 new release 3.48.2
  • 93e8fac Prep for release.
  • 013a76c Update lists of aliases for @ NonNull (#6883)
  • f23bf98 Update dependency com.amazonaws:aws-java-sdk-bom to v1.12.777 (#6882)
  • 07d8845 Don't re-compute the enclosing method (#6876)
  • a70e1e9 Update dependency org.plumelib:plume-util to v1.10.0 (#6877)
  • fc99f34 Update versions.errorprone to v2.35.1 (#6875)
  • b7d9092 Update versions.errorprone to v2.34.0 (#6870)
  • cfdd5c9 Expect crash due to javac bug
  • 19419ac Cleaner logic to handle types of extends and implements clauses and fixed `ge...
  • Additional commits viewable in compare view

Updates `org.checkerframework:checker` from 3.48.1 to 3.48.2
Changelog

Sourced from org.checkerframework:checker's changelog.

Version 3.48.2 (November 1, 2024)

Closed issues:

#6371, #6867.

Commits
  • 59f4594 new release 3.48.2
  • 93e8fac Prep for release.
  • 013a76c Update lists of aliases for @ NonNull (#6883)
  • f23bf98 Update dependency com.amazonaws:aws-java-sdk-bom to v1.12.777 (#6882)
  • 07d8845 Don't re-compute the enclosing method (#6876)
  • a70e1e9 Update dependency org.plumelib:plume-util to v1.10.0 (#6877)
  • fc99f34 Update versions.errorprone to v2.35.1 (#6875)
  • b7d9092 Update versions.errorprone to v2.34.0 (#6870)
  • cfdd5c9 Expect crash due to javac bug
  • 19419ac Cleaner logic to handle types of extends and implements clauses and fixed `ge...
  • Additional commits viewable in compare view

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- java/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/pom.xml b/java/pom.xml index 84fb967f4f1f0..fa1662ad48344 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -109,7 +109,7 @@ under the License. true 2.31.0 5.14.2 - 3.48.1 + 3.48.2 1.5.11 none -Xdoclint:none From 7612d52231db4455ab2be0c338de5a16dbaf0d05 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 5 Nov 2024 08:49:58 +0900 Subject: [PATCH 40/59] MINOR: [Java] Bump com.puppycrawl.tools:checkstyle from 10.18.2 to 10.20.0 in /java (#44637) Bumps [com.puppycrawl.tools:checkstyle](https://github.com/checkstyle/checkstyle) from 10.18.2 to 10.20.0.
Release notes

Sourced from com.puppycrawl.tools:checkstyle's releases.

checkstyle-10.20.0

Checkstyle 10.20.0 - https://checkstyle.org/releasenotes.html#Release_10.20.0

Breaking backward compatibility:

#15687 - JavadocMethodCheck: removed unnecessary tokens from acceptable

New:

#14424 - HideUtilityClassConstructor - Add option to skip validation based on list of annotations

Bug fixes:

#15831 - google_checks.xml not allowing eol left curly for switch statement with lambda-like construct

checkstyle-10.19.0

Checkstyle 10.19.0 - https://checkstyle.org/releasenotes.html#Release_10.19.0

New:

#9540 - WhitespaceAround: new property allowEmptySwitchBlockStatements #15263 - UnnecessaryParenthesesCheck does not flag unnecessary parentheses in conditional expression

Bug fixes:

#15664 - false-negative in google_checks.xml for not being able to detect requirement of K & R style for FINALLY #15769 - google_checks.xml: remove xpath suppression and false-positive indentation violations for block codes #15685 - JavadocParagraph does not work when paragraphs have their corresponding closing tag #15324 - Enforce preceding line break for opening braces of a case/default under switch in google_checks.xml #15733 - JavadocParagraph: report violation with column #15503 - JavadocParagraph: violate preceding P tag before block-level HTML tags #15716 - google_checks.xml: JavadocParagraph should have allowNewlineParagraph as false

... (truncated)

Commits
  • 41e15b3 [maven-release-plugin] prepare release checkstyle-10.20.0
  • 719ae40 doc: release notes for 10.20.0
  • 4c67922 Issue #15831: enabled allowEmptySwitchBlockStatements property of WhitespaceA...
  • 67b98ab Issue #14814: refactor checkline into iterative method
  • 9db3909 Issue #14814: refactor findmatch into iteration method
  • 853e2ba Issue #13345: Enable examples tests for ExplicitInitializationCheck
  • 8e8df58 Issue #13345: Enable examples tests for CovariantEqualsCheck
  • ca693c7 Issue #15829: Added test class for ConstructorsDeclarationGrouping
  • c256c10 Issue #6207: Added XPath regression test for ClassTypeParameterName
  • 42cf0ad Issue #15456: Specify violation messages for ArrayTrailingComma
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=com.puppycrawl.tools:checkstyle&package-manager=maven&previous-version=10.18.2&new-version=10.20.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- java/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/pom.xml b/java/pom.xml index fa1662ad48344..65d0ca5af47be 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -105,7 +105,7 @@ under the License. 1.12.0 2 - 10.18.2 + 10.20.0 true 2.31.0 5.14.2 From a5fe294b2a75ebfa2a5988223bf61440ac4230e5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 4 Nov 2024 19:59:38 -0500 Subject: [PATCH 41/59] MINOR: [Java] Bump org.codehaus.mojo:exec-maven-plugin from 3.4.1 to 3.5.0 in /java (#44552) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [org.codehaus.mojo:exec-maven-plugin](https://github.com/mojohaus/exec-maven-plugin) from 3.4.1 to 3.5.0.
Release notes

Sourced from org.codehaus.mojo:exec-maven-plugin's releases.

3.5.0

🚀 New features and improvements

🐛 Bug Fixes

📦 Dependency updates

👻 Maintenance

Commits
  • b80d3d6 [maven-release-plugin] prepare release 3.5.0
  • 226a8ce Update site descriptor to 2.0.0
  • 47eac15 #322, enable to control the exec:java interaction with JVM classloader more f...
  • 582aed0 Bump project version
  • 8e7fa52 Update src/main/java/org/codehaus/mojo/exec/ExecMojo.java
  • d2bdc9c Add toolchain java path to environment variables in ExecMojo - added tests an...
  • eb62d78 Add toolchain java path to environment variables in ExecMojo - added tests an...
  • 8dbbb07 Add toolchain java path to environment variables in ExecMojo - added tests an...
  • 168b368 Add toolchain java path to environment variables in ExecMojo - added tests an...
  • 491526a Add toolchain java path to environment variables in ExecMojo - added tests an...
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.codehaus.mojo:exec-maven-plugin&package-manager=maven&previous-version=3.4.1&new-version=3.5.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: David Li --- java/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/pom.xml b/java/pom.xml index 65d0ca5af47be..15c46be50d678 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -514,7 +514,7 @@ under the License. org.codehaus.mojo exec-maven-plugin - 3.4.1 + 3.5.0 org.codehaus.mojo From 640bdd8688a142bbfb32983db6597339026f3131 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 4 Nov 2024 20:00:04 -0500 Subject: [PATCH 42/59] MINOR: [Java] Bump com.google.api.grpc:proto-google-common-protos from 2.46.0 to 2.48.0 in /java (#44553) Bumps [com.google.api.grpc:proto-google-common-protos](https://github.com/googleapis/sdk-platform-java) from 2.46.0 to 2.48.0.
Release notes

Sourced from com.google.api.grpc:proto-google-common-protos's releases.

v2.48.0

2.48.0 (2024-10-22)

Features

Bug Fixes

  • generator setting incorrect name/class for sample due to region tag (2nd attempt) (#3293) (771bd0e)

Dependencies

  • update dependency com.google.errorprone:error_prone_annotations to v2.34.0 (#3303) (5b01274)
  • update dependency com.google.errorprone:error_prone_annotations to v2.34.0 (#3304) (5bd6c9c)
  • update google api dependencies (#3282) (a9eac85)
  • update google auth library dependencies to v1.29.0 (#3302) (e64eda2)

v2.47.0

2.47.0 (2024-10-04)

Features

  • gax: add API key authentication to ClientSettings (#3137) (df08956)
  • gax: append cred-type header for auth metrics (#3186) (ca3ec24)

Bug Fixes

  • address incorrect universe domain validation when quota project id is set (#3257) (6e70c37), closes #3256
  • Disable automatically retrieving Universe Domain from Metadata Server (#3272) (f4402bf)

Dependencies

  • update dependency com.fasterxml.jackson:jackson-bom to v2.18.0 (#3248) (821e83d)
  • update dependency com.google.errorprone:error_prone_annotations to v2.33.0 (#3265) (94450a9)
  • update dependency com.google.errorprone:error_prone_annotations to v2.33.0 (#3266) (8235463)
  • update dependency com.google.guava:guava to v33.3.1-jre (#3228) (4e76207)
  • update dependency net.bytebuddy:byte-buddy to v1.15.3 (#3246) (2aad71d)
  • update google api dependencies (#3242) (02aae9d)
  • update google auth library dependencies to v1.28.0 (#3267) (6d85864)
  • update googleapis/java-cloud-bom digest to 0cd97b7 (#3260) (2d54a5d)
  • update grpc dependencies to v1.67.1 (#3258) (e08906c)
  • update grpc dependencies to v1.67.1 in dependencies.properties (#3279) (5b46e70)

... (truncated)

Changelog

Sourced from com.google.api.grpc:proto-google-common-protos's changelog.

2.48.0 (2024-10-22)

Features

Bug Fixes

  • generator setting incorrect name/class for sample due to region tag (2nd attempt) (#3293) (771bd0e)

Dependencies

  • update dependency com.google.errorprone:error_prone_annotations to v2.34.0 (#3303) (5b01274)
  • update dependency com.google.errorprone:error_prone_annotations to v2.34.0 (#3304) (5bd6c9c)
  • update google api dependencies (#3282) (a9eac85)
  • update google auth library dependencies to v1.29.0 (#3302) (e64eda2)

2.47.0 (2024-10-04)

Features

  • gax: add API key authentication to ClientSettings (#3137) (df08956)
  • gax: append cred-type header for auth metrics (#3186) (ca3ec24)

Bug Fixes

  • address incorrect universe domain validation when quota project id is set (#3257) (6e70c37), closes #3256
  • Disable automatically retrieving Universe Domain from Metadata Server (#3272) (f4402bf)

Dependencies

  • update dependency com.fasterxml.jackson:jackson-bom to v2.18.0 (#3248) (821e83d)
  • update dependency com.google.errorprone:error_prone_annotations to v2.33.0 (#3265) (94450a9)
  • update dependency com.google.errorprone:error_prone_annotations to v2.33.0 (#3266) (8235463)
  • update dependency com.google.guava:guava to v33.3.1-jre (#3228) (4e76207)
  • update dependency net.bytebuddy:byte-buddy to v1.15.3 (#3246) (2aad71d)
  • update google api dependencies (#3242) (02aae9d)
  • update google auth library dependencies to v1.28.0 (#3267) (6d85864)
  • update googleapis/java-cloud-bom digest to 0cd97b7 (#3260) (2d54a5d)
  • update grpc dependencies to v1.67.1 (#3258) (e08906c)
  • update grpc dependencies to v1.67.1 in dependencies.properties (#3279) (5b46e70)
  • update junit5 monorepo to v5.11.2 (#3276) (6b10f94)
  • update netty dependencies to v4.1.114.final (#3263) (8bd83d9)

... (truncated)

Commits
  • d5e74d9 chore(main): release 2.48.0 (#3295)
  • e64eda2 deps: update google auth library dependencies to v1.29.0 (#3302)
  • 5b01274 deps: update dependency com.google.errorprone:error_prone_annotations to v2.3...
  • 5bd6c9c deps: update dependency com.google.errorprone:error_prone_annotations to v2.3...
  • 7512cfa chore: update base image to latest (#3301)
  • fd0b291 chore: update googleapis commit at Sat Oct 5 02:25:08 UTC 2024 (#3283)
  • 16365db chore: suppress pull progress (#3296)
  • a9eac85 deps: update google api dependencies (#3282)
  • dfe1a50 feat: selectively generate libraries (#3290)
  • 771bd0e fix: generator setting incorrect name/class for sample due to region tag (2nd...
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=com.google.api.grpc:proto-google-common-protos&package-manager=maven&previous-version=2.46.0&new-version=2.48.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: David Li --- java/flight/flight-core/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml index 3127bc0d949f1..461c415535764 100644 --- a/java/flight/flight-core/pom.xml +++ b/java/flight/flight-core/pom.xml @@ -134,7 +134,7 @@ under the License. com.google.api.grpc proto-google-common-protos - 2.46.0 + 2.48.0 test From 1a6de9d6590c6cc9afb28d7d5590378ee3d0844f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 4 Nov 2024 20:01:37 -0500 Subject: [PATCH 43/59] MINOR: [Java] Bump io.grpc:grpc-bom from 1.65.0 to 1.68.1 in /java (#44639) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [io.grpc:grpc-bom](https://github.com/grpc/grpc-java) from 1.65.0 to 1.68.1.
Release notes

Sourced from io.grpc:grpc-bom's releases.

v1.68.1

gRPC Java 1.68.1 Release Notes

v1.68.0 was a mistake. This is the first release of version 1.68.x

Bug Fixes

  • xds: Fix NullPointerException introduced in "Fix load reporting when pick first is used for locality-routing" (#11553). This was in 1.67.1 but not 1.68.0

Behavior Changes

  • core: JSON parsing rejects duplicate keys in objects (#11575) (4be69e3f8). This is the existing behavior in C core. Duplicate keys in objects are dangerous as which value takes effect is undefined. Previously, the last value was used
  • okhttp: Detect transport executors with no remaining threads (#11503) (3a6be9ca1). The transport uses two threads, but one is on-demand. If the executor provided to builder.transportExecutor() runs out of threads (e.g., it is a fixed-size thread pool), all transports can be wedged, unable to run on-demand tasks, until keepalive kills one of them. Two threads are now used when handshaking a new transport, and the transport will time out after 1 second with “Timed out waiting for second handshake thread” if two threads are unavailable
  • gcp-csm-o11y: Get mesh_id value from CSM_MESH_ID environment variable, instead of getting it from bootstrap file (84d30afad)

Improvements

  • New grpc-context-override-opentelemetry artifact (#11523) (782a44ad6) (#11599) (e59ae5fad). This is a io.grpc.Context storage override to store its state in io.opentelemetry.context.Context. Libraries should not add a dependency on this artifact, as applications can only have one storage override in their classpath
  • New grpc-s2a artifact. It is a transport that offloads the handshake similar to ALTS, but for TLS. It provides io.grpc.s2a.S2AChannelCredentials
  • api: Enhance name resolver `ResolutionResult` to hold addresses or error so the single listener API onResult2 is used to convey both success and error cases for name resolution (#11330) (1ded8aff8)
  • core: Handle NameResolver/LoadBalancer exceptions when panicking (b692b9d26). This expands the class of bugs that will fail RPCs with the panic error, versus some undefined behavior
  • core: Use the default service config in case of initial name resolver address resolution error (#11577) (fa26a8bc5)
  • core: StreamTracer.inboundMessageRead() now reports uncompressed message size when the message does not need compression (#11598) (2aae68e11). Previously it always reported -1 (unknown)
  • netty: Avoid TCP_USER_TIMEOUT warning when explicitly specifying a non-epoll channel type to use (#11564) (62f409810)
  • okhttp: Don't warn about missing Conscrypt (6f3542297). This is especially helpful when using TLS but not running on Android
  • android: For UdsChannelBuilder, use fake IP instead of localhost (a908b5e40). This avoids an unnecessary DNS lookup
  • xds: Add xDS node ID in select control plane errors to enable cross-referencing with control plane logs when debugging (f3cf7c3c7)
  • xds: Enhanced how ADS stream terminations are handled, specifically addressing cases where a response has or hasn't been received (#2e9c3e19f)
  • binder: Update status code documentation for Android 11's package visibility rules. (#11551) (99be6e985)
  • binder: Update binderDied() error description to spell out the possibilities for those unfamiliar with Android internals. (#11628) (46c1b387f)
  • example-gauth: Use application default creds instead of file argument (#11595) (94a0a0d1c)
  • opentelemetry: Experimental OpenTelemetry tracing is available. Set the GRPC_EXPERIMENTAL_ENABLE_OTEL_TRACING environment variable to true to enable tracing support in GrpcOpenTelemetry (#11409, #11477)(043ba55, 421e237)

Dependencies

  • Updated protobuf-java to 3.25.5. This helps avoid CVE-2024-7254 (2ff837ab6)

Thanks to:
@​Juneezee
@​lgalfaso
@​bestbeforetoday
@​hlx502
@​JoeCqupt

v1.68.0 MISTAKE

This was supposed to be v1.67.0, but there was a mistake during the release process. This has everything in v1.67.1, except for:

  • xds: Fix NullPointerException introduced in "Fix load reporting when pick first is used for locality-routing" (grpc/grpc-java#11553)

v1.67.1

gRPC Java 1.67.1 Release Notes

... (truncated)

Commits
  • 16f93c8 Bump version to 1.68.1
  • 2b53352 Update README etc to reference 1.68.1
  • 135f433 Revert "stub: Ignore unary response on server if status is not OK" (#11636) (...
  • 2d0c158 Bump to 1.68.1-SNAPSHOT (#11637)
  • 46c1b38 Update binderDied() error description to spell out the possibilities for thos...
  • b65cbf5 inprocess: Support tracing message sizes guarded by flag (#11629)
  • 62f4098 netty: Avoid TCP_USER_TIMEOUT warning when not using epoll (#11564)
  • 00c8bc7 Minor grammar fix in Javadoc (#11609)
  • 4be69e3 core: SpiffeUtil API for extracting Spiffe URI and loading TrustBundles (#11575)
  • 1e0928f api: fix javadoc of CallCredentials.applyRequestMetadata
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=io.grpc:grpc-bom&package-manager=maven&previous-version=1.65.0&new-version=1.68.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: David Li --- java/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/pom.xml b/java/pom.xml index 15c46be50d678..7e3e908716d0b 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -97,7 +97,7 @@ under the License. 2.0.16 33.3.1-jre 4.1.114.Final - 1.65.0 + 1.68.1 3.25.4 2.18.0 3.4.1 From 447f27b85677e03d8d47984562de55a51a9dfb44 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 4 Nov 2024 20:19:50 -0500 Subject: [PATCH 44/59] MINOR: [Java] Bump logback.version from 1.5.11 to 1.5.12 in /java (#44550) Bumps `logback.version` from 1.5.11 to 1.5.12. Updates `ch.qos.logback:logback-classic` from 1.5.11 to 1.5.12
Commits

Updates `ch.qos.logback:logback-core` from 1.5.11 to 1.5.12
Commits

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: David Li --- java/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/pom.xml b/java/pom.xml index 7e3e908716d0b..c4d6147d63e05 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -110,7 +110,7 @@ under the License. 2.31.0 5.14.2 3.48.2 - 1.5.11 + 1.5.12 none -Xdoclint:none From 593c75c48e8e1ad2a6dd4077d50712bb828abccc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 4 Nov 2024 20:40:51 -0500 Subject: [PATCH 45/59] MINOR: [Java] Bump com.fasterxml.jackson:jackson-bom from 2.18.0 to 2.18.1 in /java (#44636) Bumps [com.fasterxml.jackson:jackson-bom](https://github.com/FasterXML/jackson-bom) from 2.18.0 to 2.18.1.
Commits
  • ef33ac7 [maven-release-plugin] prepare release jackson-bom-2.18.1
  • f43bf9f Prepare for 2.18.1 release
  • 6f5259d Change to snapshot version of jackson-parent
  • 3f21ec5 Back to snapshot dep
  • bb45933 [maven-release-plugin] prepare for next development iteration
  • See full diff in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=com.fasterxml.jackson:jackson-bom&package-manager=maven&previous-version=2.18.0&new-version=2.18.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: David Li --- java/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/pom.xml b/java/pom.xml index c4d6147d63e05..516186a9230a5 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -99,7 +99,7 @@ under the License. 4.1.114.Final 1.68.1 3.25.4 - 2.18.0 + 2.18.1 3.4.1 24.3.25 1.12.0 From 39c64e045e9c0ce2a49fdb542f7d8d5a691e4b27 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 4 Nov 2024 22:08:33 -0500 Subject: [PATCH 46/59] MINOR: [Java] Bump com.github.luben:zstd-jni from 1.5.6-6 to 1.5.6-7 in /java (#44548) Bumps [com.github.luben:zstd-jni](https://github.com/luben/zstd-jni) from 1.5.6-6 to 1.5.6-7.
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=com.github.luben:zstd-jni&package-manager=maven&previous-version=1.5.6-6&new-version=1.5.6-7)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: David Li --- java/compression/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/compression/pom.xml b/java/compression/pom.xml index 094e31afa4738..8cc4909034abe 100644 --- a/java/compression/pom.xml +++ b/java/compression/pom.xml @@ -55,7 +55,7 @@ under the License. com.github.luben zstd-jni - 1.5.6-6 + 1.5.6-7 From 4274db81e607f833cd022902eb4d01e841a930c4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 4 Nov 2024 22:20:23 -0500 Subject: [PATCH 47/59] MINOR: [Java] Bump org.bouncycastle:bcpkix-jdk18on from 1.78.1 to 1.79 in /java (#44638) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [org.bouncycastle:bcpkix-jdk18on](https://github.com/bcgit/bc-java) from 1.78.1 to 1.79.
Changelog

Sourced from org.bouncycastle:bcpkix-jdk18on's changelog.

2.1.1 Version Release: 1.80 Date:      TBD.

2.2.1 Version Release: 1.79 Date:      2024, 30th October.

... (truncated)

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.bouncycastle:bcpkix-jdk18on&package-manager=maven&previous-version=1.78.1&new-version=1.79)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: David Li --- java/flight/flight-sql-jdbc-core/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/flight/flight-sql-jdbc-core/pom.xml b/java/flight/flight-sql-jdbc-core/pom.xml index 3e99e4b77ae3b..fc033a5ea7ab1 100644 --- a/java/flight/flight-sql-jdbc-core/pom.xml +++ b/java/flight/flight-sql-jdbc-core/pom.xml @@ -121,7 +121,7 @@ under the License. org.bouncycastle bcpkix-jdk18on - 1.78.1 + 1.79 From 46e7f38af71ed701815593f4b6327ceb464d3686 Mon Sep 17 00:00:00 2001 From: Benedikt Reinartz Date: Tue, 5 Nov 2024 17:02:27 +0100 Subject: [PATCH 48/59] GH-23995: [C#] Make PrimitiveArrayBuilder constructor public (#44596) Fixes #23995. Making these constructors `public` allows for writing custom builders. ### Rationale for this change Allows for writing custom builders. ### What changes are included in this PR? Only change of visibility on the default constructors of `PrimitiveArrayBuilder`. ### Are these changes tested? There is not much to test :) ### Are there any user-facing changes? See above. * GitHub Issue: #23995 Authored-by: Benedikt Reinartz Signed-off-by: Curt Hagenlocher --- csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs b/csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs index ae02173fb0df4..b3583842c1ed2 100644 --- a/csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs +++ b/csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs @@ -30,7 +30,7 @@ public abstract class PrimitiveArrayBuilder : IArr public int Length => ArrayBuilder.Length; - internal PrimitiveArrayBuilder(IArrowArrayBuilder> builder) + public PrimitiveArrayBuilder(IArrowArrayBuilder> builder) { ArrayBuilder = builder ?? throw new ArgumentNullException(nameof(builder)); } @@ -110,7 +110,7 @@ public abstract class PrimitiveArrayBuilder : IArrowArrayBu public int Length => ValueBuffer.Length; protected int NullCount => ValidityBuffer.UnsetBitCount; - internal PrimitiveArrayBuilder() + public PrimitiveArrayBuilder() { ValueBuffer = new ArrowBuffer.Builder(); ValidityBuffer = new ArrowBuffer.BitmapBuilder(); From c6f076a7e6e85a10e4b61824c33b6a639ee92e13 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Tue, 5 Nov 2024 14:44:57 -0600 Subject: [PATCH 49/59] GH-44648: [CI] Remove autotune and rebase from commentbot (#44649) ### Rationale for this change Removing code that doesn't work ### What changes are included in this PR? Deleting from the workflow ### Are these changes tested? Changes are part of CI ### Are there any user-facing changes? No * GitHub Issue: #44648 Authored-by: Jonathan Keane Signed-off-by: Jonathan Keane --- .github/workflows/comment_bot.yml | 123 +----------------------------- 1 file changed, 1 insertion(+), 122 deletions(-) diff --git a/.github/workflows/comment_bot.yml b/.github/workflows/comment_bot.yml index 8885171f0ab3f..9e0e8ab47e102 100644 --- a/.github/workflows/comment_bot.yml +++ b/.github/workflows/comment_bot.yml @@ -26,8 +26,7 @@ on: permissions: contents: read - pull-requests: write - + jobs: crossbow: name: Listen! @@ -55,126 +54,6 @@ jobs: --event-name ${{ github.event_name }} \ --event-payload ${{ github.event_path }} - autotune: - name: "Fix all the things" - if: startsWith(github.event.comment.body, '@github-actions autotune') - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0 - - uses: r-lib/actions/pr-fetch@11a22a908006c25fe054c4ef0ac0436b1de3edbe # v2.6.4 - with: - repo-token: ${{ secrets.GITHUB_TOKEN }} - - name: See what is different - run: | - set -ex - DEFAULT_BRANCH=${{ github.event.repository.default_branch }} - git remote add upstream https://github.com/apache/arrow - git fetch upstream - - changed() { - git diff --name-only upstream/$DEFAULT_BRANCH... | grep -e "$1" >/dev/null 2>&1 - } - if changed '^r/.*\.R$'; then - echo "R_DOCS=true" >> $GITHUB_ENV - echo "R_CODE=true" >> $GITHUB_ENV - fi - if changed 'cmake' || changed 'CMake'; then - echo "CMAKE_FORMAT=true" >> $GITHUB_ENV - fi - if changed '^cpp/src'; then - echo "CLANG_FORMAT_CPP=true" >> $GITHUB_ENV - fi - if changed '^r/src'; then - echo "CLANG_FORMAT_R=true" >> $GITHUB_ENV - fi - - name: Ensure clang-format has the appropriate version - if: env.CMAKE_FORMAT == 'true' || - env.CLANG_FORMAT_CPP == 'true' || - env.CLANG_FORMAT_R == 'true' || - endsWith(github.event.comment.body, 'everything') - run: | - set -e - . .env # To get the clang version we use - sudo apt update - sudo apt install -y clang-format-${CLANG_TOOLS} - - name: Run cmake_format - if: env.CMAKE_FORMAT == 'true' || endsWith(github.event.comment.body, 'everything') - run: | - set -ex - export PATH=/home/runner/.local/bin:$PATH - python3 -m pip install --upgrade pip setuptools wheel - python3 -m pip install -e dev/archery[lint] - archery lint --cmake-format --fix - - name: Run clang-format on cpp - if: env.CLANG_FORMAT_CPP == 'true' || endsWith(github.event.comment.body, 'everything') - run: | - . .env # To get the clang version we use - cpp/build-support/run_clang_format.py \ - --clang_format_binary=clang-format-${CLANG_TOOLS} \ - --exclude_glob=cpp/build-support/lint_exclusions.txt \ - --source_dir=cpp/src --quiet --fix - - name: Run clang-format on r - if: env.CLANG_FORMAT_R == 'true' || endsWith(github.event.comment.body, 'everything') - run: | - . .env # To get the clang version we use - cpp/build-support/run_clang_format.py \ - --clang_format_binary=clang-format-${CLANG_TOOLS} \ - --exclude_glob=cpp/build-support/lint_exclusions.txt \ - --source_dir=r/src --quiet --fix - - uses: r-lib/actions/setup-r@11a22a908006c25fe054c4ef0ac0436b1de3edbe # v2.6.4 - if: env.R_DOCS == 'true' || env.R_CODE == 'true' || endsWith(github.event.comment.body, 'everything') - - name: Update R docs - if: env.R_DOCS == 'true' || endsWith(github.event.comment.body, 'everything') - shell: Rscript {0} - run: | - source("ci/etc/rprofile") - install.packages(c("remotes", "roxygen2")) - remotes::install_deps("r") - roxygen2::roxygenize("r") - - name: Style R code - if: env.R_CODE == 'true' || endsWith(github.event.comment.body, 'everything') - shell: Rscript {0} - run: | - changed_files <- system("git diff --name-only upstream/${{ github.event.repository.default_branch }}... 2>&1", intern = TRUE) - # only grab the .R files under r/ - changed_files <- grep('^r/.*\\.R$', changed_files, value = TRUE) - # remove codegen.R and other possible exclusions - changed_files <- changed_files[!changed_files %in% file.path("r", source("r/.styler_excludes.R")$value)] - source("ci/etc/rprofile") - install.packages(c("remotes", "styler")) - remotes::install_deps("r") - styler::style_file(changed_files) - - name: Commit results - run: | - git config user.name "$(git log -1 --pretty=format:%an)" - git config user.email "$(git log -1 --pretty=format:%ae)" - git commit -a -m 'Autoformat/render all the things [automated commit]' || echo "No changes to commit" - - uses: r-lib/actions/pr-push@11a22a908006c25fe054c4ef0ac0436b1de3edbe # v2.6.4 - with: - repo-token: ${{ secrets.GITHUB_TOKEN }} - - rebase: - name: "Rebase" - if: startsWith(github.event.comment.body, '@github-actions rebase') - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0 - - uses: r-lib/actions/pr-fetch@11a22a908006c25fe054c4ef0ac0436b1de3edbe # v2.6.4 - with: - repo-token: ${{ secrets.GITHUB_TOKEN }} - - name: Rebase on ${{ github.repository }} default branch - run: | - set -ex - git config user.name "$(git log -1 --pretty=format:%an)" - git config user.email "$(git log -1 --pretty=format:%ae)" - git remote add upstream https://github.com/${{ github.repository }} - git fetch --unshallow upstream ${{ github.event.repository.default_branch }} - git rebase upstream/${{ github.event.repository.default_branch }} - - uses: r-lib/actions/pr-push@11a22a908006c25fe054c4ef0ac0436b1de3edbe # v2.6.4 - with: - repo-token: ${{ secrets.GITHUB_TOKEN }} - args: "--force" - issue_assign: name: "Assign issue" permissions: From f3b8d6b45155b35c902a11443f1b29cd846832a6 Mon Sep 17 00:00:00 2001 From: David Li Date: Tue, 5 Nov 2024 18:14:09 -0500 Subject: [PATCH 50/59] MINOR: [Java] Revert io.grpc:grpc-bom to 1.65.0 (#44645) ### Rationale for this change I missed that gRPC (Protobuf) cannot be upgraded due to one of our CI jobs using an older libstdc++. ### What changes are included in this PR? This reverts commit 1a6de9d6590c6cc9afb28d7d5590378ee3d0844f. ### Are these changes tested? Yes ### Are there any user-facing changes? No Authored-by: David Li Signed-off-by: David Li --- java/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/pom.xml b/java/pom.xml index 516186a9230a5..5c6719ee0f2c8 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -97,7 +97,7 @@ under the License. 2.0.16 33.3.1-jre 4.1.114.Final - 1.68.1 + 1.65.0 3.25.4 2.18.1 3.4.1 From c3601a97a0718ae47726e6c134cbed4b98bd1a36 Mon Sep 17 00:00:00 2001 From: Maksim Yegorov <997437+myegorov@users.noreply.github.com> Date: Tue, 5 Nov 2024 19:07:06 -0500 Subject: [PATCH 51/59] GH-44344: [Java] fix VectorSchemaRoot.getTransferPair for NullVector (#44631) ### Rationale for this change Do not throw [UnsupportedOperationException("Tried to get allocator from NullVector")](https://github.com/apache/arrow/blob/release-18.0.0-rc0/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java#L160) from [VectorSchemaRoot.slice()](https://github.com/apache/arrow/blob/release-18.0.0-rc0/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java#L341) when slicing a VSR containing a NullVector or ZeroVector. Details in https://github.com/apache/arrow/issues/44344 ### Are these changes tested? Added unit test that would trigger an UnsupportedOperationException on the legacy path. * GitHub Issue: #44344 Authored-by: Maksim Yegorov <59841139+maksimyego-db@users.noreply.github.com> Signed-off-by: David Li --- .../org/apache/arrow/vector/NullVector.java | 9 ++++++- .../org/apache/arrow/vector/ValueVector.java | 6 +++++ .../arrow/vector/TestSplitAndTransfer.java | 25 +++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java index 227ca716f6391..6bfe540d232fc 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java @@ -155,9 +155,16 @@ public boolean allocateNewSafe() { @Override public void reAlloc() {} + /* + * IMPORTANT NOTE + * It's essential that NullVector (and ZeroVector) do not require BufferAllocator for any data storage. + * However, some methods of the parent interface may require passing in a BufferAllocator, even if null. + * + * @return null + */ @Override public BufferAllocator getAllocator() { - throw new UnsupportedOperationException("Tried to get allocator from NullVector"); + return null; } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java index 724941aa2a1e8..0a45409eb9860 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java @@ -80,6 +80,12 @@ public interface ValueVector extends Closeable, Iterable { */ void reAlloc(); + /** + * Get the allocator associated with the vector. CAVEAT: Some ValueVector subclasses (e.g. + * NullVector) do not require an allocator for data storage and may return null. + * + * @return Returns nullable allocator. + */ BufferAllocator getAllocator(); /** diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java index a3f25bc5207b6..6aace956214ff 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java @@ -198,6 +198,31 @@ public void testWithEmptyVector() { toDUV.clear(); } + @Test + public void testWithNullVector() { + int valueCount = 123; + int startIndex = 10; + NullVector fromNullVector = new NullVector("nullVector"); + fromNullVector.setValueCount(valueCount); + TransferPair transferPair = fromNullVector.getTransferPair(fromNullVector.getAllocator()); + transferPair.splitAndTransfer(startIndex, valueCount - startIndex); + NullVector toNullVector = (NullVector) transferPair.getTo(); + + assertEquals(valueCount - startIndex, toNullVector.getValueCount()); + // no allocations to clear for NullVector + } + + @Test + public void testWithZeroVector() { + ZeroVector fromZeroVector = new ZeroVector("zeroVector"); + TransferPair transferPair = fromZeroVector.getTransferPair(fromZeroVector.getAllocator()); + transferPair.splitAndTransfer(0, 0); + ZeroVector toZeroVector = (ZeroVector) transferPair.getTo(); + + assertEquals(0, toZeroVector.getValueCount()); + // no allocations to clear for ZeroVector + } + @Test /* VarCharVector */ public void test() throws Exception { try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) { From 6decf1cca077488ec58aadd5a0a43408342ab612 Mon Sep 17 00:00:00 2001 From: Igor Anferov Date: Wed, 6 Nov 2024 08:25:20 +0000 Subject: [PATCH 52/59] GH-44464: [C++] Added rvalue-reference-qualified overload for arrow::Result::status() returning value instead of reference (#44477) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change In the current implementation, `arrow::Result::status()` always returns the internal `status_` field by a const lvalue reference, regardless of the value category of `Result`. This can lead to potential bugs. For example, consider the following code: ```c++ if (auto&& status = functionReturningArrowResult().status(); status.ok()) return 0; return -1; ``` In this case, the call to `status.ok()` results in undefined behavior because `status` is a dangling const lvalue reference that points to an object returned by `functionReturningArrowResult()`, which is destroyed after the semicolon. If `arrow::Result` had two overloads of the `status()` method for different reference qualifiers: ```c++ template <…> class Result { … auto status() const & -> const Status& { ... } auto status() && -> Status { ... } … }; ``` This would prevent such bugs and potentially allow for better optimization, as the `Status` could be moved from an expiring `Result` object. ### What changes are included in this PR? This PR adds the proposed overload for the `arrow::Result::status()` method and makes other rvalue-qualified `arrow::Result` methods preserve object ref-category during tail `status()` calls. Unfortunately, we can't move the `status_` field in the rvalue-qualified `status()` method, as the state of `status_` must be preserved until the destructor is called. This is because the `storage_` field is either destructed or considered empty based on the state of `status_`. ### Are these changes tested? Since this change is trivial (the new overload doesn't modify the `Result` object and returns `Status` by value), there's nothing significant to test, so no new tests were added. ### Are there any user-facing changes? No existing code will be broken by this change. In all cases where `status()` is called on an lvalue `Result`, the same reference-returning overload will be called. Meanwhile, code calling `status()` on an rvalue `Result` will invoke the new overload, returning `Status` by value instead. * GitHub Issue: #44464 Authored-by: igor-anferov Signed-off-by: mwish --- cpp/src/arrow/result.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/result.h b/cpp/src/arrow/result.h index 6786d2b3fcbfd..091351154251e 100644 --- a/cpp/src/arrow/result.h +++ b/cpp/src/arrow/result.h @@ -294,7 +294,18 @@ class [[nodiscard]] Result : public util::EqualityComparable> { /// /// \return The stored non-OK status object, or an OK status if this object /// has a value. - constexpr const Status& status() const { return status_; } + constexpr const Status& status() const& { return status_; } + + /// Gets the stored status object, or an OK status if a `T` value is stored. + /// + /// \return The stored non-OK status object, or an OK status if this object + /// has a value. + Status status() && { + if (ok()) return Status::OK(); + auto tmp = Status::UnknownError("Uninitialized Result"); + std::swap(status_, tmp); + return tmp; + } /// Gets the stored `T` value. /// @@ -350,7 +361,7 @@ class [[nodiscard]] Result : public util::EqualityComparable> { std::is_constructible::value>::type> Status Value(U* out) && { if (!ok()) { - return status(); + return std::move(*this).status(); } *out = U(MoveValueUnsafe()); return Status::OK(); @@ -380,7 +391,7 @@ class [[nodiscard]] Result : public util::EqualityComparable> { typename EnsureResult()(std::declval()))>::type Map( M&& m) && { if (!ok()) { - return status(); + return std::move(*this).status(); } return std::forward(m)(MoveValueUnsafe()); } @@ -402,7 +413,7 @@ class [[nodiscard]] Result : public util::EqualityComparable> { std::is_constructible::value>::type> Result As() && { if (!ok()) { - return status(); + return std::move(*this).status(); } return U(MoveValueUnsafe()); } From 40b2fca4742e2692a917755fd8db2939e10fa02d Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 6 Nov 2024 04:47:39 -0500 Subject: [PATCH 53/59] GH-43631: [C][Format] Add ArrowAsyncDeviceStreamHandler interface (#43632) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change See https://github.com/apache/arrow-adbc/issues/811 and https://github.com/apache/arrow/issues/43631 ### What changes are included in this PR? Definition of `ArrowAsyncDeviceStreamHandler` and addition of it to the docs. I've sent an [email to the mailing list](https://lists.apache.org/thread/yfokmfkrmmp7tqvq0m3rshcvloq278cq) to start a discussion on this topic, so this may change over time due to those discussions. * GitHub Issue: #43631 Lead-authored-by: Matt Topol Co-authored-by: Felipe Oliveira Carvalho Co-authored-by: Sutou Kouhei Co-authored-by: Raúl Cumplido Co-authored-by: Dane Pitkin Co-authored-by: Antoine Pitrou Co-authored-by: David Li Co-authored-by: Ian Cook Signed-off-by: Matt Topol --- cpp/src/arrow/c/abi.h | 206 +++++++++++ docs/source/format/CDeviceDataInterface.rst | 363 ++++++++++++++++++++ 2 files changed, 569 insertions(+) diff --git a/cpp/src/arrow/c/abi.h b/cpp/src/arrow/c/abi.h index db051fff5ff05..9dc142bd080df 100644 --- a/cpp/src/arrow/c/abi.h +++ b/cpp/src/arrow/c/abi.h @@ -228,6 +228,212 @@ struct ArrowDeviceArrayStream { #endif // ARROW_C_DEVICE_STREAM_INTERFACE +#ifndef ARROW_C_ASYNC_STREAM_INTERFACE +# define ARROW_C_ASYNC_STREAM_INTERFACE + +// EXPERIMENTAL: ArrowAsyncTask represents available data from a producer that was passed +// to an invocation of `on_next_task` on the ArrowAsyncDeviceStreamHandler. +// +// The reason for this Task approach instead of the Async interface returning +// the Array directly is to allow for more complex thread handling and reducing +// context switching and data transfers between CPU cores (e.g. from one L1/L2 +// cache to another) if desired. +// +// For example, the `on_next_task` callback can be called when data is ready, while +// the producer puts potential "decoding" logic in the `ArrowAsyncTask` object. This +// allows for the producer to manage the I/O on one thread which calls `on_next_task` +// and the consumer can determine when the decoding (producer logic in the `extract_data` +// callback of the task) occurs and on which thread, to avoid a CPU core transfer +// (data staying in the L2 cache). +struct ArrowAsyncTask { + // This callback should populate the ArrowDeviceArray associated with this task. + // The order of ArrowAsyncTasks provided by the producer enables a consumer to + // ensure the order of data to process. + // + // This function is expected to be synchronous, but should not perform any blocking + // I/O. Ideally it should be as cheap as possible so as to not tie up the consumer + // thread unnecessarily. + // + // Returns: 0 if successful, errno-compatible error otherwise. + // + // If a non-0 value is returned then it should be followed by a call to `on_error` + // on the appropriate ArrowAsyncDeviceStreamHandler. This is because it's highly + // likely that whatever is calling this function may be entirely disconnected from + // the current control flow. Indicating an error here with a non-zero return allows + // the current flow to be aware of the error occurring, while still allowing any + // logging or error handling to still be centralized in the `on_error` callback of + // the original Async handler. + // + // Rather than a release callback, any required cleanup should be performed as part + // of the invocation of `extract_data`. Ownership of the Array is passed to the consumer + // calling this, and so it must be released separately. + // + // It is only valid to call this method exactly once. + int (*extract_data)(struct ArrowArrayTask* self, struct ArrowDeviceArray* out); + + // opaque task-specific data + void* private_data; +}; + +// EXPERIMENTAL: ArrowAsyncProducer represents a 1-to-1 relationship between an async +// producer and consumer. This object allows the consumer to perform backpressure and flow +// control on the asynchronous stream processing. This object must be owned by the +// producer who creates it, and thus is responsible for cleaning it up. +struct ArrowAsyncProducer { + // A consumer must call this function to start receiving on_next_task calls. + // + // It *must* be valid to call this synchronously from within `on_next_task` or + // `on_schema`, but this function *must not* immediately call `on_next_task` so as + // to avoid recursion and reentrant callbacks. + // + // After cancel has been called, additional calls to this function must be NOPs, + // but allowed. While not cancelled, calling this function must register the + // given number of additional arrays/batches to be produced with the producer. + // The producer should only call `on_next_task` at most the registered number + // of arrays before propagating backpressure. + // + // Any error encountered by calling request must be propagated by calling the `on_error` + // callback of the ArrowAsyncDeviceStreamHandler. + // + // While not cancelled, any subsequent calls to `on_next_task`, `on_error` or + // `release` should be scheduled by the producer to be called later. + // + // It is invalid for a consumer to call this with a value of n <= 0, producers should + // error if given such a value. + void (*request)(struct ArrowAsyncProducer* self, int64_t n); + + // This cancel callback signals a producer that it must eventually stop making calls + // to on_next_task. It must be idempotent and thread-safe. After calling cancel once, + // subsequent calls must be NOPs. This must not call any consumer-side handlers other + // than `on_error`. + // + // It is not required that calling cancel affect the producer immediately, only that it + // must eventually stop calling on_next_task and subsequently call release on the + // async handler. As such, a consumer must be prepared to receive one or more calls to + // `on_next_task` even after calling cancel if there are still requested arrays pending. + // + // Successful cancellation should *not* result in the producer calling `on_error`, it + // should finish out any remaining tasks and eventually call `release`. + // + // Any error encountered during handling a call to cancel must be reported via the + // on_error callback on the async stream handler. + void (*cancel)(struct ArrowAsyncProducer* self); + + // Any additional metadata tied to a specific stream of data. This must either be NULL + // or a valid pointer to metadata which is encoded in the same way schema metadata + // would be. Non-null metadata must be valid for the lifetime of this object. As an + // example a producer could use this to provide the total number of rows and/or batches + // in the stream if known. + const char* additional_metadata; + + // producer-specific opaque data. + void* private_data; +}; + +// EXPERIMENTAL: Similar to ArrowDeviceArrayStream, except designed for an asynchronous +// style of interaction. While ArrowDeviceArrayStream provides producer +// defined callbacks, this is intended to be created by the consumer instead. +// The consumer passes this handler to the producer, which in turn uses the +// callbacks to inform the consumer of events in the stream. +struct ArrowAsyncDeviceStreamHandler { + // Handler for receiving a schema. The passed in stream_schema must be + // released or moved by the handler (producer is giving ownership of the schema to + // the handler, but not ownership of the top level object itself). + // + // With the exception of an error occurring (on_error), this must be the first + // callback function which is called by a producer and must only be called exactly + // once. As such, the producer should provide a valid ArrowAsyncProducer instance + // so the consumer can control the flow. See the documentation on ArrowAsyncProducer + // for how it works. The ArrowAsyncProducer is owned by the producer who calls this + // function and thus the producer is responsible for cleaning it up when calling + // the release callback of this handler. + // + // If there is any additional metadata tied to this stream, it will be provided as + // a non-null value for the `additional_metadata` field of the ArrowAsyncProducer + // which will be valid at least until the release callback is called. + // + // Return value: 0 if successful, `errno`-compatible error otherwise + // + // A producer that receives a non-zero return here should stop producing and eventually + // call release instead. + int (*on_schema)(struct ArrowAsyncDeviceStreamHandler* self, + struct ArrowSchema* stream_schema); + + // Handler for receiving data. This is called when data is available providing an + // ArrowAsyncTask struct to signify it. The producer indicates the end of the stream + // by passing NULL as the value for the task rather than a valid pointer to a task. + // The task object is only valid for the lifetime of this function call, if a consumer + // wants to utilize it after this function returns, it must copy or move the contents + // of it to a new ArrowAsyncTask object. + // + // The `request` callback of a provided ArrowAsyncProducer must be called in order + // to start receiving calls to this handler. + // + // The metadata argument can be null or can be used by a producer + // to pass arbitrary extra information to the consumer (such as total number + // of rows, context info, or otherwise). The data should be passed using the same + // encoding as the metadata within the ArrowSchema struct itself (defined in + // the spec at + // https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema.metadata) + // + // If metadata is non-null then it only needs to exist for the lifetime of this call, + // a consumer who wants it to live after that must copy it to ensure lifetime. + // + // A producer *must not* call this concurrently from multiple different threads. + // + // A consumer must be prepared to receive one or more calls to this callback even + // after calling cancel on the corresponding ArrowAsyncProducer, as cancel does not + // guarantee it happens immediately. + // + // Return value: 0 if successful, `errno`-compatible error otherwise. + // + // If the consumer returns a non-zero return from this method, that indicates to the + // producer that it should stop propagating data as an error occurred. After receiving + // such a return, the only interaction with this object is for the producer to call + // the `release` callback. + int (*on_next_task)(struct ArrowAsyncDeviceStreamHandler* self, + struct ArrowAsyncTask* task, const char* metadata); + + // Handler for encountering an error. The producer should call release after + // this returns to clean up any resources. The `code` passed in can be any error + // code that a producer wants, but should be errno-compatible for consistency. + // + // If the message or metadata are non-null, they will only last as long as this + // function call. The consumer would need to perform a copy of the data if it is + // necessary for them to live past the lifetime of this call. + // + // Error metadata should be encoded as with metadata in ArrowSchema, defined in + // the spec at + // https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema.metadata + // + // It is valid for this to be called by a producer with or without a preceding call + // to ArrowAsyncProducer.request. + // + // This callback must not call any methods of an ArrowAsyncProducer object. + void (*on_error)(struct ArrowAsyncDeviceStreamHandler* self, int code, + const char* message, const char* metadata); + + // Release callback to release any resources for the handler. Should always be + // called by a producer when it is done utilizing a handler. No callbacks should + // be called after this is called. + // + // It is valid for the release callback to be called by a producer with or without + // a preceding call to ArrowAsyncProducer.request. + // + // The release callback must not call any methods of an ArrowAsyncProducer object. + void (*release)(struct ArrowAsyncDeviceStreamHandler* self); + + // MUST be populated by the producer BEFORE calling any callbacks other than release. + // This provides the connection between a handler and its producer, and must exist until + // the release callback is called. + struct ArrowAsyncProducer* producer; + + // Opaque handler-specific data + void* private_data; +}; + +#endif // ARROW_C_ASYNC_STREAM_INTERFACE + #ifdef __cplusplus } #endif diff --git a/docs/source/format/CDeviceDataInterface.rst b/docs/source/format/CDeviceDataInterface.rst index 59433bae47e27..fbb2012c3059b 100644 --- a/docs/source/format/CDeviceDataInterface.rst +++ b/docs/source/format/CDeviceDataInterface.rst @@ -506,6 +506,8 @@ could be used for any device: arr->array.release(&arr->array); } +.. _c-device-stream-interface: + Device Stream Interface ======================= @@ -650,6 +652,367 @@ The stream source is not assumed to be thread-safe. Consumers wanting to call ``get_next`` from several threads should ensure those calls are serialized. +Async Device Stream Interface +============================= + +.. warning:: + + Experimental: The Async C Device Stream interface is experimental in its current + form. Based on feedback and usage the protocol definition may change until + it is fully standardized. + +The :ref:`C stream interface ` provides a synchronous +API centered around the consumer calling the producer functions to retrieve +the next record batch. For concurrent communication between producer and consumer, +the ``ArrowAsyncDeviceStreamHandler`` can be used. This interface is non-opinionated +and may fit into different concurrent communication models. + +Semantics +--------- + +Rather than the producer providing a structure of callbacks for a consumer to +call and retrieve records, the Async interface is a structure allocated and populated by the consumer. +The consumer allocated struct provides handler callbacks for the producer to call +when the schema and chunks of data are available. + +In addition to the ``ArrowAsyncDeviceStreamHandler``, there are also two additional +structs used for the full data flow: ``ArrowAsyncTask`` and ``ArrowAsyncProducer``. + +Structure Definition +-------------------- + +The C device async stream interface consists of three ``struct`` definitions: + +.. code-block:: c + + #ifndef ARROW_C_ASYNC_STREAM_INTERFACE + #define ARROW_C_ASYNC_STREAM_INTERFACE + + struct ArrowAsyncTask { + int (*extract_data)(struct ArrowArrayTask* self, struct ArrowDeviceArray* out); + + void* private_data; + }; + + struct ArrowAsyncProducer { + void (*request)(struct ArrowAsyncProducer* self, int64_t n); + void (*cancel)(struct ArrowAsyncProducer* self); + + void (*release)(struct ArrowAsyncProducer* self); + const char* additional_metadata; + void* private_data; + }; + + struct ArrowAsyncDeviceStreamHandler { + // consumer-specific handlers + int (*on_schema)(struct ArrowAsyncDeviceStreamHandler* self, + struct ArrowSchema* stream_schema); + int (*on_next_task)(struct ArrowAsyncDeviceStreamHandler* self, + struct ArrowAsyncTask* task, const char* metadata); + void (*on_error)(struct ArrowAsyncDeviceStreamHandler* self, + int code, const char* message, const char* metadata); + + // release callback + void (*release)(struct ArrowAsyncDeviceStreamHandler* self); + + // must be populated before calling any callbacks + struct ArrowAsyncProducer* producer; + + // opaque handler-specific data + void* private_data; + }; + + #endif // ARROW_C_ASYNC_STREAM_INTERFACE + +.. note:: + The canonical guard ``ARROW_C_ASYNC_STREAM_INTERFACE`` is meant to avoid + duplicate definitions if two projects copy the C async stream interface + definitions into their own headers, and a third-party project includes + from these two projects. It is therefore important that this guard is kept + exactly as-is when these definitions are copied. + +The ArrowAsyncDeviceStreamHandler structure +''''''''''''''''''''''''''''''''''''''''''' + +The structure has the following fields: + +.. c:member:: int (*ArrowAsyncDeviceStreamHandler.on_schema)(struct ArrowAsyncDeviceStreamHandler*, struct ArrowSchema*) + + *Mandatory.* Handler for receiving the schema of the stream. All incoming records should + match the provided schema. If successful, the function should return 0, otherwise + it should return an ``errno``-compatible error code. + + If there is any extra contextual information that the producer wants to provide, it can set + :c:member:`ArrowAsyncProducer.additional_metadata` to a non-NULL value. This is encoded in the + same format as :c:member:`ArrowSchema.metadata`. The lifetime of this metadata, if not ``NULL``, + should be tied to the lifetime of the ``ArrowAsyncProducer`` object. + + Unless the ``on_error`` handler is called, this will always get called exactly once and will be + the first method called on this object. As such the producer *MUST* populate the ``ArrowAsyncProducer`` + member before calling this function to allow the consumer to apply back-pressure and control the flow of data. + The producer maintains ownership of the ``ArrowAsyncProducer`` and must clean it up *after* + calling the release callback on the ``ArrowAsyncDeviceStreamHandler``. + + A producer that receives a non-zero result here must not subsequently call anything other than + the release callback on this object. + +.. c:member:: int (*ArrowAsyncDeviceStreamHandler.on_next_task)(struct ArrowAsyncDeviceStreamHandler*, struct ArrowAsyncTask*, const char*) + + *Mandatory.* Handler to be called when a new record is available for processing. The + schema for each record should be the same as the schema that ``on_schema`` was called with. + If successfully handled, the function should return 0, otherwise it should return an + ``errno``-compatible error code. + + Rather than passing the record itself it receives an ``ArrowAsyncTask`` instead to facilitate + better consumer-focused thread control as far as receiving the data. A call to this function + simply indicates that data is available via the provided task. + + The producer signals the end of the stream by passing ``NULL`` for the ``ArrowAsyncTask`` + pointer instead of a valid address. This task object is only valid during the lifetime of + this function call. If the consumer wants to use the task beyond the scope of this method, it + must copy or move its contents to a new ArrowAsyncTask object. + + The ``const char*`` parameter exists for producers to provide any extra contextual information + they want. This is encoded in the same format as :c:member:`ArrowSchema.metadata`. If not ``NULL``, + the lifetime is only the scope of the call to this function. A consumer who wants to maintain + the additional metadata beyond the lifetime of this call *MUST* copy the value themselves. + + A producer *MUST NOT* call this concurrently from multiple threads. + + The :c:member:`ArrowAsyncProducer.request` callback must be called to start receiving calls to this + handler. + +.. c:member:: void (*ArrowAsyncDeviceStreamHandler.on_error)(struct ArrowAsyncDeviceStreamHandler, int, const char*, const char*) + + *Mandatory.* Handler to be called when an error is encountered by the producer. After calling + this, the ``release`` callback will be called as the last call on this struct. The parameters + are an ``errno``-compatible error code and an optional error message and metadata. + + If the message and metadata are not ``NULL``, their lifetime is only valid during the scope + of this call. A consumer who wants to maintain these values past the return of this function + *MUST* copy the values themselves. + + If the metadata parameter is not ``NULL``, to provide key-value error metadata, then it should + be encoded identically to the way that metadata is encoded in :c:member:`ArrowSchema.metadata`. + + It is valid for this to be called by a producer with or without a preceding call to + :c:member:`ArrowAsyncProducer.request`. This callback *MUST NOT* call any methods of an + ``ArrowAsyncProducer`` object. + +.. c:member:: void (*ArrowAsyncDeviceStreamHandler.release)(struct ArrowAsyncDeviceStreamHandler*) + + *Mandatory.* A pointer to a consumer-provided release callback for the handler. + + It is valid for this to be called by a producer with or without a preceding call to + :c:member:`ArrowAsyncProducer.request`. This must not call any methods of an ``ArrowAsyncProducer`` + object. + +.. c:member:: struct ArrowAsyncProducer ArrowAsyncDeviceStreamHandler.producer + + *Mandatory.* The producer object that the consumer will use to request additional data or cancel. + + This object *MUST* be populated by the producer before calling the :c:member:`ArrowAsyncDeviceStreamHandler.on_schema` + callback. The producer maintains ownership of this object and must clean it up *after* calling + the release callback on the ``ArrowAsyncDeviceStreamHandler``. + + The consumer *CANNOT* assume that this is valid until the ``on_schema`` callback is called. + +.. c:member:: void* ArrowAsyncDeviceStreamHandler.private_data + + *Optional.* An opaque pointer to consumer-provided private data. + + Producers *MUST NOT* process this member. Lifetime of this member is handled by + the consumer, and especially by the release callback. + +The ArrowAsyncTask structure +'''''''''''''''''''''''''''' + +The purpose of using a Task object rather than passing the array directly to the ``on_next`` +callback is to allow for more complex and efficient thread handling. Utilizing a Task +object allows for a producer to separate the "decoding" logic from the I/O, enabling a +consumer to avoid transferring data between CPU cores (e.g. from one L1/L2 cache to another). + +This producer-provided structure has the following fields: + +.. c:member:: int (*ArrowArrayTask.extract_data)(struct ArrowArrayTask*, struct ArrowDeviceArray*) + + *Mandatory.* A callback to populate the provided ``ArrowDeviceArray`` with the available data. + The order of ``ArrowAsyncTasks`` provided by the producer enables a consumer to know the order of + the data to process. If the consumer does not care about the data that is owned by this task, + it must still call ``extract_data`` so that the producer can perform any required cleanup. ``NULL`` + should be passed as the device array pointer to indicate that the consumer doesn't want the + actual data, letting the task perform necessary cleanup. + + If a non-zero value is returned from this, it should be followed only by the producer calling + the ``on_error`` callback of the ``ArrowAsyncDeviceStreamHandler``. Because calling this method + is likely to be separate from the current control flow, returning a non-zero value to signal + an error occuring allows the current thread to decide handle the case accordingly, while still + allowing all error logging and handling to be centralized in the + :c:member:`ArrowAsyncDeviceStreamHandler.on_error` callback. + + Rather than having a separate release callback, any required cleanup should be performed as part + of the invocation of this callback. Ownership of the Array is given to the pointer passed in as + a parameter, and this array must be released separately. + + It is only valid to call this method exactly once. + +.. c:member:: void* ArrowArrayTask.private_data + + *Optional.* An opaque pointer to producer-provided private data. + + Consumers *MUST NOT* process this member. Lifetime of this member is handled by + the producer who created this object, and should be cleaned up if necessary during + the call to :c:member:`ArrowArrayTask.extract_data`. + +The ArrowAsyncProducer structure +'''''''''''''''''''''''''''''''' + +This producer-provided and managed object has the following fields: + +.. c:member:: void (*ArrowAsyncProducer.request)(struct ArrowAsyncProducer*, uint64_t) + + *Mandatory.* This function must be called by a consumer to start receiving calls to + :c:member:`ArrowAsyncDeviceStreamHandler.on_next_task`. It *MUST* be valid to call + this synchronously from within :c:member:`ArrowAsyncDeviceStreamHandler.on_next_task` + or :c:member:`ArrowAsyncDeviceStreamHandler.on_schema`. As a result, this function + *MUST NOT* synchronously call ``on_next_task`` or ``on_error`` to avoid recursive + and reentrant callbacks. + + After ``cancel`` is called, additional calls to this function must be a NOP, but allowed. + + While not cancelled, calling this function registers the given number of additional + arrays/batches to be produced by the producer. A producer should only call + the appropriate ``on_next_task`` callback up to a maximum of the total sum of calls to + this method before propagating back-pressure / waiting. + + Any error encountered by calling request must be propagated by calling the ``on_error`` + callback of the ``ArrowAsyncDeviceStreamHandler``. + + It is invalid to call this function with a value of ``n`` that is ``<= 0``. Producers should + error (e.g. call ``on_error``) if receiving such a value for ``n``. + +.. c:member:: void (*ArrowAsyncProducer.cancel)(struct ArrowAsyncProducer*) + + *Mandatory.* This function signals to the producer that it must *eventually* stop calling + ``on_next_task``. Calls to ``cancel`` must be idempotent and thread-safe. After calling + it once, subsequent calls *MUST* be a NOP. This *MUST NOT* call any consumer-side handlers + other than ``on_error``. + + It is not required that calling ``cancel`` affect the producer *immediately*, only that it + must eventually stop calling ``on_next_task`` and then subsequently call ``release`` + on the async handler object. As such, a consumer *MUST* be prepared to receive one or more + calls to ``on_next_task`` or ``on_error`` even after calling ``cancel`` if there are still + requested arrays pending. + + Successful cancelling *MUST NOT* result in a producer calling + :c:member:`ArrowAsyncDeviceStreamHandler.on_error`, instead it should finish out any remaining + tasks (calling ``on_next_task`` accordingly) and eventually just call ``release``. + + Any error encountered during handling a call to cancel must be reported via the ``on_error`` + callback on the async stream handler. + +.. c:member:: const char* ArrowAsyncProducer.additional_metadata + + *Optional.* An additional metadata string to provide any extra context to the consumer. This *MUST* + either be ``NULL`` or a valid string that is encoded in the same way as :c:member:`ArrowSchema.metadata`. + As an example, a producer could utilize this metadata to provide the total number of rows and/or batches + in the stream if known. + + If not ``NULL`` it *MUST* be valid for at least the lifetime of this object. + +.. c:member:: void* ArrowAsyncProducer.private_data + + *Optional.* An opaque pointer to producer-provided specific data. + + Consumers *MUST NOT* process this member, the lifetime is owned by the producer + that constructed this object. + +Error Handling +'''''''''''''' + +Unlike the regular C Stream interface, the Async interface allows for errors to flow in +both directions. As a result, error handling can be slightly more complex. Thus this spec +designates the following rules: + +* If the producer encounters an error during processing, it should call the ``on_error`` + callback, and then call ``release`` after it returns. + +* If ``on_schema`` or ``on_next_task`` returns a non-zero integer value, the producer *should not* + call the ``on_error`` callback, but instead should eventually call ``release`` at some point + before or after any logging or processing of the error code. + +Result lifetimes +'''''''''''''''' + +The ``ArrowSchema`` passed to the ``on_schema`` callback must be released independently, +with the object itself needing to be moved to a consumer owned ``ArrowSchema`` object. The +``ArrowSchema*`` passed as a parameter to the callback *MUST NOT* be stored and kept. + +The ``ArrowAsyncTask`` object provided to ``on_next_task`` is owned by the producer and +will be cleaned up during the invocation of calling ``extract_data`` on it. If the consumer +doesn't care about the data, it should pass ``NULL`` instead of a valid ``ArrowDeviceArray*``. + +The ``const char*`` error ``message`` and ``metadata`` which are passed to ``on_error`` +are only valid within the scope of the ``on_error`` function itself. They must be copied +if it is necessary for them to exist after it returns. + +Stream Handler Lifetime +''''''''''''''''''''''' + +Lifetime of the async stream handler is managed using a release callback with similar +usage as in :ref:`C data interface `. + +ArrowAsyncProducer Lifetime +''''''''''''''''''''''''''' + +The lifetime of the ``ArrowAsyncProducer`` is owned by the producer itself and should +be managed by it. It *MUST* be populated before calling any methods other than ``release`` +and *MUST* remain valid at least until just before calling ``release`` on the stream handler object. + +Thread safety +''''''''''''' + +All handler functions on the ``ArrowAsyncDeviceStreamHandler`` should only be called in a +serialized manner, but are not guaranteed to be called from the same thread every time. A +producer should wait for handler callbacks to return before calling the next handler callback, +and before calling the ``release`` callback. + +Back-pressure is managed by the consumer making calls to :c:member:`ArrowAsyncProducer.request` +to indicate how many arrays it is ready to receive. + +The ``ArrowAsyncDeviceStreamHandler`` object should be able to handle callbacks as soon as +it is passed to the producer, any initialization should be performed before it is provided. + +Possible Sequence Diagram +------------------------- + +.. mermaid:: + + sequenceDiagram + Consumer->>+Producer: ArrowAsyncDeviceStreamHandler* + Producer-->>+Consumer: on_schema(ArrowAsyncProducer*, ArrowSchema*) + Consumer->>Producer: ArrowAsyncProducer->request(n) + + par + loop up to n times + Producer-->>Consumer: on_next_task(ArrowAsyncTask*) + end + and for each task + Consumer-->>Producer: ArrowAsyncTask.extract_data(...) + Consumer-->>Producer: ArrowAsyncProducer->request(1) + end + + break Optionally + Consumer->>-Producer: ArrowAsyncProducer->cancel() + end + + loop possible remaining + Producer-->>Consumer: on_next_task(ArrowAsyncTask*) + end + + Producer->>-Consumer: ArrowAsyncDeviceStreamHandler->release() + + Interoperability with other interchange formats =============================================== From bbda6b1e1322c5a9b8c107a03343d18b759694c1 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Wed, 6 Nov 2024 23:43:39 +0900 Subject: [PATCH 54/59] GH-44657: [CI][Dev] Add write permission to the crossbow comment bot (#44658) ### Rationale for this change It needs to write a comment to the target PR. ### What changes are included in this PR? Add write permission to only the crossbow comment bot. ### Are these changes tested? No. ### Are there any user-facing changes? No. * GitHub Issue: #44657 Authored-by: Sutou Kouhei Signed-off-by: Jacob Wujciak-Jens --- .github/workflows/comment_bot.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/comment_bot.yml b/.github/workflows/comment_bot.yml index 9e0e8ab47e102..83b6f6e31ffc3 100644 --- a/.github/workflows/comment_bot.yml +++ b/.github/workflows/comment_bot.yml @@ -32,6 +32,8 @@ jobs: name: Listen! if: startsWith(github.event.comment.body, '@github-actions crossbow') runs-on: ubuntu-latest + permissions: + pull-requests: write steps: - name: Checkout Arrow uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0 From 9b27f42e02d9c4208698a324357cafaaa3e308ce Mon Sep 17 00:00:00 2001 From: Emmanuel Ferdman Date: Wed, 6 Nov 2024 18:54:42 +0200 Subject: [PATCH 55/59] MINOR: [Docs][Python] Update `python_test.cc` reference (#44622) ### Rationale for this change Commit 21dbf4ac09583651899232c9a80b3f5d6580a588 moved the location of `python_test.cc`. ### What changes are included in this PR? PR updates the reference to `python_test.cc`. ### Are these changes tested? ### Are there any user-facing changes? No. Authored-by: Emmanuel Ferdman Signed-off-by: Joris Van den Bossche --- docs/source/developers/python.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/developers/python.rst b/docs/source/developers/python.rst index 2ba4b534caeff..ac70c27934b07 100644 --- a/docs/source/developers/python.rst +++ b/docs/source/developers/python.rst @@ -90,7 +90,7 @@ and look for the "custom options" section. .. note:: There are a few low-level tests written directly in C++. These tests are - implemented in `pyarrow/src/python_test.cc `_, + implemented in `pyarrow/src/arrow/python/python_test.cc `_, but they are also wrapped in a ``pytest``-based `test module `_ run automatically as part of the PyArrow test suite. From ac6b8ffc6a34e490a51760175313f06c99bccdbd Mon Sep 17 00:00:00 2001 From: Hiroyuki Sato Date: Thu, 7 Nov 2024 11:46:34 +0900 Subject: [PATCH 56/59] GH-44656: [GLib] Add GArrowBinaryViewDataType (#44659) ### Rationale for this change The `arrow::BinaryViewType` has been introduced. GLib needs to be implemented as the `GArrowBinaryViewDataType`. ### What changes are included in this PR? Implement `GArrowBinaryViewDataType`. ### Are these changes tested? YES ### Are there any user-facing changes? NO * GitHub Issue: #44656 Lead-authored-by: Hiroyuki Sato Co-authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- c_glib/arrow-glib/basic-data-type.cpp | 30 +++++++++++++++++++++ c_glib/arrow-glib/basic-data-type.h | 16 +++++++++++ c_glib/arrow-glib/type.cpp | 2 ++ c_glib/arrow-glib/type.h | 6 ++++- c_glib/test/test-binary-view-data-type.rb | 33 +++++++++++++++++++++++ 5 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 c_glib/test/test-binary-view-data-type.rb diff --git a/c_glib/arrow-glib/basic-data-type.cpp b/c_glib/arrow-glib/basic-data-type.cpp index ecb537aa1f905..f922106065a8d 100644 --- a/c_glib/arrow-glib/basic-data-type.cpp +++ b/c_glib/arrow-glib/basic-data-type.cpp @@ -127,6 +127,8 @@ G_BEGIN_DECLS * * #GArrowExtensionDataTypeRegistry is a class to manage extension * data types. + * + * #GArrowBinaryViewDataType is a class for the binary view data type. */ struct GArrowDataTypePrivate @@ -2207,6 +2209,34 @@ garrow_extension_data_type_registry_lookup(GArrowExtensionDataTypeRegistry *regi return GARROW_EXTENSION_DATA_TYPE(data_type); } +G_DEFINE_TYPE(GArrowBinaryViewDataType, + garrow_binary_view_data_type, + GARROW_TYPE_DATA_TYPE) + +static void +garrow_binary_view_data_type_init(GArrowBinaryViewDataType *object) +{ +} + +static void +garrow_binary_view_data_type_class_init(GArrowBinaryViewDataTypeClass *klass) +{ +} + +/** + * garrow_binary_view_data_type_new: + * + * Returns: The newly created binary view data type. + */ +GArrowBinaryViewDataType * +garrow_binary_view_data_type_new(void) +{ + auto arrow_data_type = arrow::binary_view(); + GArrowBinaryViewDataType *data_type = GARROW_BINARY_VIEW_DATA_TYPE( + g_object_new(GARROW_TYPE_BINARY_VIEW_DATA_TYPE, "data-type", &arrow_data_type, NULL)); + return data_type; +} + G_END_DECLS GArrowDataType * diff --git a/c_glib/arrow-glib/basic-data-type.h b/c_glib/arrow-glib/basic-data-type.h index edbe15e2df521..b98488211a78e 100644 --- a/c_glib/arrow-glib/basic-data-type.h +++ b/c_glib/arrow-glib/basic-data-type.h @@ -770,4 +770,20 @@ GArrowExtensionDataType * garrow_extension_data_type_registry_lookup(GArrowExtensionDataTypeRegistry *registry, const gchar *name); +#define GARROW_TYPE_BINARY_VIEW_DATA_TYPE (garrow_binary_view_data_type_get_type()) +GARROW_AVAILABLE_IN_19_0 +G_DECLARE_DERIVABLE_TYPE(GArrowBinaryViewDataType, + garrow_binary_view_data_type, + GARROW, + BINARY_VIEW_DATA_TYPE, + GArrowDataType) +struct _GArrowBinaryViewDataTypeClass +{ + GArrowDataTypeClass parent_class; +}; + +GARROW_AVAILABLE_IN_19_0 +GArrowBinaryViewDataType * +garrow_binary_view_data_type_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/type.cpp b/c_glib/arrow-glib/type.cpp index 26d21f6d82587..42372bc8dda6e 100644 --- a/c_glib/arrow-glib/type.cpp +++ b/c_glib/arrow-glib/type.cpp @@ -114,6 +114,8 @@ garrow_type_from_raw(arrow::Type::type type) return GARROW_TYPE_MONTH_DAY_NANO_INTERVAL; case arrow::Type::type::RUN_END_ENCODED: return GARROW_TYPE_RUN_END_ENCODED; + case arrow::Type::type::BINARY_VIEW: + return GARROW_TYPE_BINARY_VIEW; default: return GARROW_TYPE_NA; } diff --git a/c_glib/arrow-glib/type.h b/c_glib/arrow-glib/type.h index a817da4b9413e..f85cf3f2ee416 100644 --- a/c_glib/arrow-glib/type.h +++ b/c_glib/arrow-glib/type.h @@ -70,6 +70,8 @@ G_BEGIN_DECLS * @GARROW_TYPE_LARGE_LIST: A list of some logical data type with 64-bit offsets. * @GARROW_TYPE_MONTH_DAY_NANO_INTERVAL: MONTH_DAY_NANO interval in SQL style. * @GARROW_TYPE_RUN_END_ENCODED: Run-end encoded data. + * @GARROW_TYPE_BINARY_VIEW: Bytes view type with 4-byte prefix and inline small string + * optimization. * @GARROW_TYPE_DECIMAL32: Precision- and scale-based decimal * @GARROW_TYPE_DECIMAL64: Precision- and scale-based decimal * type with 64-bit. Storage type depends on the parameters. @@ -116,7 +118,9 @@ typedef enum { GARROW_TYPE_LARGE_LIST, GARROW_TYPE_MONTH_DAY_NANO_INTERVAL, GARROW_TYPE_RUN_END_ENCODED, - /* TODO: Remove = 43 when we add STRING_VIEW(39)..LARGE_LIST_VIEW(42). */ + /* TODO: Remove = 40 when we add STRING_VIEW(39) */ + GARROW_TYPE_BINARY_VIEW = 40, + /* TODO: Remove = 43 when we add LIST_VIEW(41)..LARGE_LIST_VIEW(42). */ GARROW_TYPE_DECIMAL32 = 43, GARROW_TYPE_DECIMAL64, } GArrowType; diff --git a/c_glib/test/test-binary-view-data-type.rb b/c_glib/test/test-binary-view-data-type.rb new file mode 100644 index 0000000000000..f143b62df4ebc --- /dev/null +++ b/c_glib/test/test-binary-view-data-type.rb @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestBinaryViewDataType < Test::Unit::TestCase + def test_type + data_type = Arrow::BinaryViewDataType.new + assert_equal(Arrow::Type::BINARY_VIEW, data_type.id) + end + + def test_name + data_type = Arrow::BinaryViewDataType.new + assert_equal("binary_view", data_type.name) + end + + def test_to_s + data_type = Arrow::BinaryViewDataType.new + assert_equal("binary_view", data_type.to_s) + end +end From df24a8225999896eb03db280354fbff42dfea0f5 Mon Sep 17 00:00:00 2001 From: David Li Date: Thu, 7 Nov 2024 01:44:01 -0500 Subject: [PATCH 57/59] MINOR: [Release] Support GitHub token in download_rc_binaries.py (#44666) ### Rationale for this change See apache/arrow-adbc#2307. This script is getting rate-limited on GitHub Actions. ### What changes are included in this PR? Pick up GH_TOKEN if it exists. ### Are these changes tested? N/A ### Are there any user-facing changes? N/A Authored-by: David Li Signed-off-by: Sutou Kouhei --- dev/release/download_rc_binaries.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/dev/release/download_rc_binaries.py b/dev/release/download_rc_binaries.py index 49203cd45414e..788d1df0ab3eb 100755 --- a/dev/release/download_rc_binaries.py +++ b/dev/release/download_rc_binaries.py @@ -158,17 +158,22 @@ def __init__(self, repository, tag): raise ValueError("--tag is required") self._repository = repository self._tag = tag + # use the same name as the gh CLI + self._token = os.environ.get("GH_TOKEN") def get_file_list(self, prefix, filter=None): url = (f"https://api.github.com/repos/{self._repository}/" f"releases/tags/{self._tag}") print("Fetching release from", url) + headers = { + "Accept": "application/vnd.github+json", + } + if self._token: + headers["Authorization"] = f"Bearer {self._token}" request = urllib.request.Request( url, method="GET", - headers={ - "Accept": "application/vnd.github+json", - }, + headers=headers, ) raw_response = urllib.request.urlopen(request).read().decode() response = json.loads(raw_response) From 32de498ca7dba5861f22eee5e4527446f6218b7a Mon Sep 17 00:00:00 2001 From: GeorgKreuzmayr <68595015+GeorgKreuzmayr@users.noreply.github.com> Date: Thu, 7 Nov 2024 10:10:51 +0100 Subject: [PATCH 58/59] GH-44668: [Docs] Fix ColumnChunkMetaData offset documentation in pyarrow (#44670) ### Rationale for this change The pyarrow documentation of ColumnMetaData is contradicting the C++ implementation. The pyarrow [documentation](https://arrow.apache.org/docs/dev/python/generated/pyarrow.parquet.ColumnChunkMetaData.html#pyarrow.parquet.ColumnChunkMetaData.data_page_offset) says: The data_page_offset and dictionary_page_offset are relative to the column chunk offset The C++ [comments in the code](https://github.com/apache/arrow/blob/df24a8225999896eb03db280354fbff42dfea0f5/cpp/src/generated/parquet_types.h#L2896) say: The offsets are byte offsets from the beginning of the file to first data_page / dictionary_page ### What changes are included in this PR? Update comments that `data_page_offset` and `dictionary_page_offset` are relative to start of file ### Are these changes tested? Verified locally that C++ code comments are correct ### Are there any user-facing changes? Documentation GitHub Issue: https://github.com/apache/arrow/issues/44668 * GitHub Issue: #44668 Authored-by: Georg Kreuzmayr Signed-off-by: mwish --- python/pyarrow/_parquet.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index 254bfe3b09a9c..a3abf1865b7b5 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -467,7 +467,7 @@ cdef class ColumnChunkMetaData(_Weakrefable): @property def dictionary_page_offset(self): - """Offset of dictionary page relative to column chunk offset (int).""" + """Offset of dictionary page relative to beginning of the file (int).""" if self.has_dictionary_page: return self.metadata.dictionary_page_offset() else: @@ -475,7 +475,7 @@ cdef class ColumnChunkMetaData(_Weakrefable): @property def data_page_offset(self): - """Offset of data page relative to column chunk offset (int).""" + """Offset of data page relative to beginning of the file (int).""" return self.metadata.data_page_offset() @property From b193c4f701afee4581c25c1489afa0e4be8f6a6a Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Thu, 7 Nov 2024 12:37:23 +0100 Subject: [PATCH 59/59] GH-36954: [Python] Add more FlightInfo / FlightEndpoint attributes (#43537) ### Rationale for this change The C++ classes `FlightInfo` and `FlightEndpoint` have attributes that are not available via the Python API. ### What changes are included in this PR? Make the following attributes available in Python: - `FlightInfo.ordered` - `FlightInfo.app_metadata` - `FlightEndpoint.expiration_time` - `FlightEndpoint.app_metadata` Also makes existing attributes optional in constructor: - `FlightInfo.total_records` - `FlightInfo.total_bytes` ### Are these changes tested? Existing tests that test existing attributes are extended. ### Are there any user-facing changes? Yes, changes are backward compatible. * GitHub Issue: #36954 Lead-authored-by: Enrico Minack Co-authored-by: Adam Reeve Co-authored-by: Joris Van den Bossche Co-authored-by: David Li Signed-off-by: David Li --- python/examples/flight/client.py | 3 + python/pyarrow/_flight.pyx | 108 ++++++++++-- python/pyarrow/includes/chrono.pxd | 23 +++ python/pyarrow/includes/libarrow_flight.pxd | 7 + python/pyarrow/includes/libarrow_python.pxd | 4 + python/pyarrow/src/arrow/python/datetime.h | 14 ++ python/pyarrow/src/arrow/python/flight.cc | 9 +- python/pyarrow/src/arrow/python/flight.h | 3 +- python/pyarrow/tests/test_flight.py | 174 +++++++++++++++++--- python/pyarrow/tests/test_flight_async.py | 4 +- 10 files changed, 307 insertions(+), 42 deletions(-) create mode 100644 python/pyarrow/includes/chrono.pxd diff --git a/python/examples/flight/client.py b/python/examples/flight/client.py index ed6ce54ce62ea..75976674bf2e6 100644 --- a/python/examples/flight/client.py +++ b/python/examples/flight/client.py @@ -48,6 +48,9 @@ def list_flights(args, client, connection_args={}): else: print("Unknown") + print(f"Data are {'ordered' if flight.ordered else 'not ordered'}") + print("App metadata:", flight.app_metadata) + print("Number of endpoints:", len(flight.endpoints)) print("Schema:") print(flight.schema) diff --git a/python/pyarrow/_flight.pyx b/python/pyarrow/_flight.pyx index 8289215de2e29..ba6cdf273ac22 100644 --- a/python/pyarrow/_flight.pyx +++ b/python/pyarrow/_flight.pyx @@ -31,7 +31,7 @@ from libcpp cimport bool as c_bool from pyarrow.lib cimport * from pyarrow.lib import (ArrowCancelled, ArrowException, ArrowInvalid, SignalStopHandler) -from pyarrow.lib import as_buffer, frombytes, tobytes +from pyarrow.lib import as_buffer, frombytes, timestamp, tobytes from pyarrow.includes.libarrow_flight cimport * from pyarrow.ipc import _get_legacy_format_default, _ReadPandasMixin import pyarrow.lib as lib @@ -704,7 +704,7 @@ cdef class FlightEndpoint(_Weakrefable): cdef: CFlightEndpoint endpoint - def __init__(self, ticket, locations): + def __init__(self, ticket, locations, expiration_time=None, app_metadata=""): """Create a FlightEndpoint from a ticket and list of locations. Parameters @@ -713,6 +713,12 @@ cdef class FlightEndpoint(_Weakrefable): the ticket needed to access this flight locations : list of string URIs locations where this flight is available + expiration_time : TimestampScalar, default None + Expiration time of this stream. If present, clients may assume + they can retry DoGet requests. Otherwise, clients should avoid + retrying DoGet requests. + app_metadata : bytes or str, default "" + Application-defined opaque metadata. Raises ------ @@ -724,18 +730,40 @@ cdef class FlightEndpoint(_Weakrefable): if isinstance(ticket, Ticket): self.endpoint.ticket.ticket = tobytes(ticket.ticket) - else: + elif isinstance(ticket, (str, bytes)): self.endpoint.ticket.ticket = tobytes(ticket) + else: + raise TypeError("Argument ticket must be a Ticket instance, string or bytes, " + "not '{}'".format(type(ticket))) for location in locations: if isinstance(location, Location): c_location = ( location).location - else: + elif isinstance(location, (str, bytes)): c_location = CLocation() check_flight_status( CLocation.Parse(tobytes(location)).Value(&c_location)) + else: + raise TypeError("Argument locations must contain Location instances, strings or bytes, " + "not '{}'".format(type(location))) self.endpoint.locations.push_back(c_location) + if expiration_time is not None: + if isinstance(expiration_time, lib.TimestampScalar): + # Convert into OS-dependent std::chrono::system_clock::time_point from + # std::chrono::time_point + # See Timestamp in cpp/src/arrow/flight/types.h + self.endpoint.expiration_time = TimePoint_to_system_time(TimePoint_from_ns( + expiration_time.cast(timestamp("ns")).value)) + else: + raise TypeError("Argument expiration_time must be a TimestampScalar, " + "not '{}'".format(type(expiration_time))) + + if not isinstance(app_metadata, (str, bytes)): + raise TypeError("Argument app_metadata must be a string or bytes, " + "not '{}'".format(type(app_metadata))) + self.endpoint.app_metadata = tobytes(app_metadata) + @property def ticket(self): """Get the ticket in this endpoint.""" @@ -743,9 +771,34 @@ cdef class FlightEndpoint(_Weakrefable): @property def locations(self): + """Get locations where this flight is available.""" return [Location.wrap(location) for location in self.endpoint.locations] + @property + def expiration_time(self): + """Get the expiration time of this stream. + + If present, clients may assume they can retry DoGet requests. + Otherwise, clients should avoid retrying DoGet requests. + + """ + cdef: + int64_t time_since_epoch + if self.endpoint.expiration_time.has_value(): + time_since_epoch = TimePoint_to_ns( + # Convert from OS-dependent std::chrono::system_clock::time_point into + # std::chrono::time_point + # See Timestamp in cpp/src/arrow/flight/types.h + TimePoint_from_system_time(self.endpoint.expiration_time.value())) + return lib.scalar(time_since_epoch, timestamp("ns", "UTC")) + return None + + @property + def app_metadata(self): + """Get application-defined opaque metadata.""" + return self.endpoint.app_metadata + def serialize(self): """Get the wire-format representation of this type. @@ -770,7 +823,9 @@ cdef class FlightEndpoint(_Weakrefable): def __repr__(self): return (f"") + f"locations={self.locations!r} " + f"expiration_time={self.expiration_time} " + f"app_metadata={self.app_metadata}>") def __eq__(self, FlightEndpoint other): return self.endpoint == other.endpoint @@ -844,7 +899,7 @@ cdef class FlightInfo(_Weakrefable): return obj def __init__(self, Schema schema, FlightDescriptor descriptor, endpoints, - total_records, total_bytes): + total_records=None, total_bytes=None, ordered=False, app_metadata=""): """Create a FlightInfo object from a schema, descriptor, and endpoints. Parameters @@ -855,10 +910,14 @@ cdef class FlightInfo(_Weakrefable): the descriptor for this flight. endpoints : list of FlightEndpoint a list of endpoints where this flight is available. - total_records : int - the total records in this flight, or -1 if unknown - total_bytes : int - the total bytes in this flight, or -1 if unknown + total_records : int, default None + the total records in this flight, -1 or None if unknown. + total_bytes : int, default None + the total bytes in this flight, -1 or None if unknown. + ordered : boolean, default False + Whether endpoints are in the same order as the data. + app_metadata : bytes or str, default "" + Application-defined opaque metadata. """ cdef: shared_ptr[CSchema] c_schema = pyarrow_unwrap_schema(schema) @@ -874,8 +933,10 @@ cdef class FlightInfo(_Weakrefable): check_flight_status(CreateFlightInfo(c_schema, descriptor.descriptor, c_endpoints, - total_records, - total_bytes, &self.info)) + total_records if total_records is not None else -1, + total_bytes if total_bytes is not None else -1, + ordered, + tobytes(app_metadata), &self.info)) @property def total_records(self): @@ -887,6 +948,25 @@ cdef class FlightInfo(_Weakrefable): """The size in bytes of the data in this flight, or -1 if unknown.""" return self.info.get().total_bytes() + @property + def ordered(self): + """Whether endpoints are in the same order as the data.""" + return self.info.get().ordered() + + @property + def app_metadata(self): + """ + Application-defined opaque metadata. + + There is no inherent or required relationship between this and the + app_metadata fields in the FlightEndpoints or resulting FlightData + messages. Since this metadata is application-defined, a given + application could define there to be a relationship, but there is + none required by the spec. + + """ + return self.info.get().app_metadata() + @property def schema(self): """The schema of the data in this flight.""" @@ -950,7 +1030,9 @@ cdef class FlightInfo(_Weakrefable): f"descriptor={self.descriptor} " f"endpoints={self.endpoints} " f"total_records={self.total_records} " - f"total_bytes={self.total_bytes}>") + f"total_bytes={self.total_bytes} " + f"ordered={self.ordered} " + f"app_metadata={self.app_metadata}>") cdef class FlightStreamChunk(_Weakrefable): diff --git a/python/pyarrow/includes/chrono.pxd b/python/pyarrow/includes/chrono.pxd new file mode 100644 index 0000000000000..e5d22d19751d7 --- /dev/null +++ b/python/pyarrow/includes/chrono.pxd @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# distutils: language = c++ + + +cdef extern from "" namespace "std::chrono::system_clock": + cdef cppclass time_point: + pass diff --git a/python/pyarrow/includes/libarrow_flight.pxd b/python/pyarrow/includes/libarrow_flight.pxd index c4cf5830c4128..d2bc3c9d0da23 100644 --- a/python/pyarrow/includes/libarrow_flight.pxd +++ b/python/pyarrow/includes/libarrow_flight.pxd @@ -19,6 +19,7 @@ from pyarrow.includes.common cimport * from pyarrow.includes.libarrow cimport * +from pyarrow.includes.chrono cimport time_point cdef extern from "arrow/flight/api.h" namespace "arrow" nogil: @@ -134,6 +135,8 @@ cdef extern from "arrow/flight/api.h" namespace "arrow" nogil: CTicket ticket vector[CLocation] locations + optional[time_point] expiration_time + c_string app_metadata bint operator==(CFlightEndpoint) CResult[c_string] SerializeToString() @@ -146,6 +149,8 @@ cdef extern from "arrow/flight/api.h" namespace "arrow" nogil: CFlightInfo(CFlightInfo info) int64_t total_records() int64_t total_bytes() + c_bool ordered() + c_string app_metadata() CResult[shared_ptr[CSchema]] GetSchema(CDictionaryMemo* memo) CFlightDescriptor& descriptor() const vector[CFlightEndpoint]& endpoints() @@ -608,6 +613,8 @@ cdef extern from "arrow/python/flight.h" namespace "arrow::py::flight" nogil: vector[CFlightEndpoint] endpoints, int64_t total_records, int64_t total_bytes, + c_bool ordered, + const c_string& app_metadata, unique_ptr[CFlightInfo]* out) cdef CStatus CreateSchemaResult" arrow::py::flight::CreateSchemaResult"( diff --git a/python/pyarrow/includes/libarrow_python.pxd b/python/pyarrow/includes/libarrow_python.pxd index 96725c9c3862b..da5bca5edd584 100644 --- a/python/pyarrow/includes/libarrow_python.pxd +++ b/python/pyarrow/includes/libarrow_python.pxd @@ -17,6 +17,7 @@ # distutils: language = c++ +from pyarrow.includes.chrono cimport time_point from pyarrow.includes.common cimport * from pyarrow.includes.libarrow cimport * @@ -244,6 +245,9 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py::internal" nogil: CTimePoint TimePoint_from_s(double val) CTimePoint TimePoint_from_ns(int64_t val) + CTimePoint TimePoint_from_system_time(time_point val) + time_point TimePoint_to_system_time(CTimePoint val) + CResult[c_string] TzinfoToString(PyObject* pytzinfo) CResult[PyObject*] StringToTzinfo(c_string) diff --git a/python/pyarrow/src/arrow/python/datetime.h b/python/pyarrow/src/arrow/python/datetime.h index 9b21eeb434217..3de5ea69fd9da 100644 --- a/python/pyarrow/src/arrow/python/datetime.h +++ b/python/pyarrow/src/arrow/python/datetime.h @@ -144,6 +144,20 @@ inline TimePoint TimePoint_from_ns(int64_t val) { return TimePoint(TimePoint::duration(val)); } +ARROW_PYTHON_EXPORT +// Note: Needed by FlightEndpoint.expiration_time, which is an OS-dependent +// std::chrono::system_clock::time_point +inline std::chrono::system_clock::time_point TimePoint_to_system_time(TimePoint val) { + return std::chrono::time_point_cast(val); +} + +ARROW_PYTHON_EXPORT +// Note: Needed by FlightEndpoint.expiration_time, which is an OS-dependent +// std::chrono::system_clock::time_point +inline TimePoint TimePoint_from_system_time(std::chrono::system_clock::time_point val) { + return std::chrono::time_point_cast(val); +} + ARROW_PYTHON_EXPORT inline int64_t PyDelta_to_s(PyDateTime_Delta* pytimedelta) { return (PyDateTime_DELTA_GET_DAYS(pytimedelta) * 86400LL + diff --git a/python/pyarrow/src/arrow/python/flight.cc b/python/pyarrow/src/arrow/python/flight.cc index bf7af27ac726e..ce5e6dfa94e3b 100644 --- a/python/pyarrow/src/arrow/python/flight.cc +++ b/python/pyarrow/src/arrow/python/flight.cc @@ -368,11 +368,12 @@ void PyClientMiddleware::CallCompleted(const Status& call_status) { Status CreateFlightInfo(const std::shared_ptr& schema, const arrow::flight::FlightDescriptor& descriptor, const std::vector& endpoints, - int64_t total_records, int64_t total_bytes, + int64_t total_records, int64_t total_bytes, bool ordered, + const std::string& app_metadata, std::unique_ptr* out) { - ARROW_ASSIGN_OR_RAISE(auto result, - arrow::flight::FlightInfo::Make(*schema, descriptor, endpoints, - total_records, total_bytes)); + ARROW_ASSIGN_OR_RAISE(auto result, arrow::flight::FlightInfo::Make( + *schema, descriptor, endpoints, total_records, + total_bytes, ordered, app_metadata)); *out = std::unique_ptr( new arrow::flight::FlightInfo(std::move(result))); return Status::OK(); diff --git a/python/pyarrow/src/arrow/python/flight.h b/python/pyarrow/src/arrow/python/flight.h index 5243258495778..57d21976bb7ae 100644 --- a/python/pyarrow/src/arrow/python/flight.h +++ b/python/pyarrow/src/arrow/python/flight.h @@ -337,7 +337,8 @@ ARROW_PYFLIGHT_EXPORT Status CreateFlightInfo(const std::shared_ptr& schema, const arrow::flight::FlightDescriptor& descriptor, const std::vector& endpoints, - int64_t total_records, int64_t total_bytes, + int64_t total_records, int64_t total_bytes, bool ordered, + const std::string& app_metadata, std::unique_ptr* out); /// \brief Create a SchemaResult from schema. diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py index f0ceba37d6933..b3103c4be8c6d 100644 --- a/python/pyarrow/tests/test_flight.py +++ b/python/pyarrow/tests/test_flight.py @@ -27,6 +27,7 @@ import time import traceback import json +from datetime import datetime try: import numpy as np @@ -152,8 +153,7 @@ def list_flights(self, context, criteria): yield flight.FlightInfo( pa.schema([]), flight.FlightDescriptor.for_path('/foo'), - [], - -1, -1 + [] ) def do_get(self, context, ticket): @@ -251,10 +251,14 @@ def get_flight_info(self, context, descriptor): flight.FlightEndpoint( b'', [flight.Location.for_grpc_tcp('localhost', 5005)], + pa.scalar("2023-04-05T12:34:56.789012345").cast(pa.timestamp("ns")), + "endpoint app metadata" ), ], - -1, - -1, + 1, + 42, + True, + "info app metadata" ) def get_schema(self, context, descriptor): @@ -387,8 +391,7 @@ def list_flights(self, context, criteria): yield flight.FlightInfo( pa.schema([]), flight.FlightDescriptor.for_path('/foo'), - [], - -1, -1 + [] ) raise flight.FlightInternalError("foo") @@ -876,14 +879,18 @@ def test_repr(): descriptor_repr = "" endpoint_repr = (" " - "locations=[]>") + "locations=[] " + "expiration_time=2023-04-05 12:34:56+00:00 " + "app_metadata=b'endpoint app metadata'>") info_repr = ( " " "endpoints=[] " - "total_records=-1 " - "total_bytes=-1>") + "total_records=1 " + "total_bytes=42 " + "ordered=True " + "app_metadata=b'test app metadata'>") location_repr = "" result_repr = "" schema_result_repr = "" @@ -893,9 +900,15 @@ def test_repr(): assert repr(flight.ActionType("foo", "bar")) == action_type_repr assert repr(flight.BasicAuth("user", "pass")) == basic_auth_repr assert repr(flight.FlightDescriptor.for_command("foo")) == descriptor_repr - assert repr(flight.FlightEndpoint(b"foo", [])) == endpoint_repr + endpoint = flight.FlightEndpoint( + b"foo", [], pa.scalar("2023-04-05T12:34:56").cast(pa.timestamp("s")), + b"endpoint app metadata" + ) + assert repr(endpoint) == endpoint_repr info = flight.FlightInfo( - pa.schema([]), flight.FlightDescriptor.for_path(), [], -1, -1) + pa.schema([]), flight.FlightDescriptor.for_path(), [], + 1, 42, True, b"test app metadata" + ) assert repr(info) == info_repr assert repr(flight.Location("grpc+tcp://localhost:1234")) == location_repr assert repr(flight.Result(b"foo")) == result_repr @@ -907,25 +920,97 @@ def test_repr(): with pytest.raises(TypeError): flight.Action("foo", None) + with pytest.raises(TypeError): + flight.FlightEndpoint(object(), []) + with pytest.raises(TypeError): + flight.FlightEndpoint("foo", ["grpc://test", b"grpc://test", object()]) + with pytest.raises(TypeError): + flight.FlightEndpoint("foo", [], expiration_time="2023-04-05T01:02:03") + with pytest.raises(TypeError): + flight.FlightEndpoint("foo", [], expiration_time=datetime(2023, 4, 5, 1, 2, 3)) + with pytest.raises(TypeError): + flight.FlightEndpoint("foo", [], app_metadata=object()) + def test_eq(): items = [ + lambda: (flight.Action("foo", b""), flight.Action("bar", b"")), lambda: (flight.Action("foo", b""), flight.Action("foo", b"bar")), lambda: (flight.ActionType("foo", "bar"), flight.ActionType("foo", "baz")), lambda: (flight.BasicAuth("user", "pass"), flight.BasicAuth("user2", "pass")), + lambda: (flight.BasicAuth("user", "pass"), + flight.BasicAuth("user", "pass2")), lambda: (flight.FlightDescriptor.for_command("foo"), flight.FlightDescriptor.for_path("foo")), lambda: (flight.FlightEndpoint(b"foo", []), - flight.FlightEndpoint(b"", [])), + flight.FlightEndpoint(b"bar", [])), + lambda: ( + flight.FlightEndpoint( + b"foo", [flight.Location("grpc+tcp://localhost:1234")]), + flight.FlightEndpoint( + b"foo", [flight.Location("grpc+tls://localhost:1234")]) + ), + lambda: ( + flight.FlightEndpoint( + b"foo", [], pa.scalar("2023-04-05T12:34:56").cast(pa.timestamp("s"))), + flight.FlightEndpoint( + b"foo", [], + pa.scalar("2023-04-05T12:34:56.789").cast(pa.timestamp("ms")))), + lambda: (flight.FlightEndpoint(b"foo", [], app_metadata=b''), + flight.FlightEndpoint(b"foo", [], app_metadata=b'meta')), + lambda: ( + flight.FlightInfo( + pa.schema([]), + flight.FlightDescriptor.for_path(), []), + flight.FlightInfo( + pa.schema([("ints", pa.int64())]), + flight.FlightDescriptor.for_path(), [])), + lambda: ( + flight.FlightInfo( + pa.schema([]), + flight.FlightDescriptor.for_path(), []), + flight.FlightInfo( + pa.schema([]), + flight.FlightDescriptor.for_command(b"foo"), [])), lambda: ( flight.FlightInfo( pa.schema([]), - flight.FlightDescriptor.for_path(), [], -1, -1), + flight.FlightDescriptor.for_path(), + [flight.FlightEndpoint(b"foo", [])]), flight.FlightInfo( pa.schema([]), - flight.FlightDescriptor.for_command(b"foo"), [], -1, 42)), + flight.FlightDescriptor.for_path(), + [flight.FlightEndpoint(b"bar", [])])), + lambda: ( + flight.FlightInfo( + pa.schema([]), + flight.FlightDescriptor.for_path(), [], total_records=-1), + flight.FlightInfo( + pa.schema([]), + flight.FlightDescriptor.for_path(), [], total_records=1)), + lambda: ( + flight.FlightInfo( + pa.schema([]), + flight.FlightDescriptor.for_path(), [], total_bytes=-1), + flight.FlightInfo( + pa.schema([]), + flight.FlightDescriptor.for_path(), [], total_bytes=42)), + lambda: ( + flight.FlightInfo( + pa.schema([]), + flight.FlightDescriptor.for_path(), [], ordered=False), + flight.FlightInfo( + pa.schema([]), + flight.FlightDescriptor.for_path(), [], ordered=True)), + lambda: ( + flight.FlightInfo( + pa.schema([]), + flight.FlightDescriptor.for_path(), [], app_metadata=b""), + flight.FlightInfo( + pa.schema([]), + flight.FlightDescriptor.for_path(), [], app_metadata=b"meta")), lambda: (flight.Location("grpc+tcp://localhost:1234"), flight.Location("grpc+tls://localhost:1234")), lambda: (flight.Result(b"foo"), flight.Result(b"bar")), @@ -937,11 +1022,33 @@ def test_eq(): for gen in items: lhs1, rhs1 = gen() lhs2, rhs2 = gen() + assert lhs1 == lhs1 assert lhs1 == lhs2 + assert lhs2 == lhs1 + assert rhs1 == rhs1 assert rhs1 == rhs2 + assert rhs2 == rhs1 assert lhs1 != rhs1 +def test_flight_info_defaults(): + fi1 = flight.FlightInfo(pa.schema([]), flight.FlightDescriptor.for_path(), []) + fi2 = flight.FlightInfo( + pa.schema([]), + flight.FlightDescriptor.for_path(), [], total_records=-1, total_bytes=-1) + fi3 = flight.FlightInfo( + pa.schema([]), + flight.FlightDescriptor.for_path(), [], total_records=None, total_bytes=None) + + assert fi1.total_records == -1 + assert fi2.total_records == -1 + assert fi3.total_records == -1 + + assert fi1.total_bytes == -1 + assert fi2.total_bytes == -1 + assert fi3.total_bytes == -1 + + def test_flight_server_location_argument(): locations = [ None, @@ -1062,12 +1169,30 @@ def test_flight_get_info(): with GetInfoFlightServer() as server: client = FlightClient(('localhost', server.port)) info = client.get_flight_info(flight.FlightDescriptor.for_command(b'')) - assert info.total_records == -1 - assert info.total_bytes == -1 + assert info.total_records == 1 + assert info.total_bytes == 42 + assert info.ordered + assert info.app_metadata == b"info app metadata" assert info.schema == pa.schema([('a', pa.int32())]) assert len(info.endpoints) == 2 assert len(info.endpoints[0].locations) == 1 + assert info.endpoints[0].expiration_time is None + assert info.endpoints[0].app_metadata == b"" assert info.endpoints[0].locations[0] == flight.Location('grpc://test') + # on macOS, system_clock::duration is milliseconds + # on Windows, system_clock::duration is 100 nanoseconds + # on Linux, system_clock::duration is nanoseconds + ts = None + if pa._platform.system() == 'Darwin': + ts = "2023-04-05T12:34:56.789012000+00:00" + elif pa._platform.system() == 'Windows': + ts = "2023-04-05T12:34:56.789012300+00:00" + elif pa._platform.system() == 'Linux': + ts = "2023-04-05T12:34:56.789012345+00:00" + if ts is not None: + assert info.endpoints[1].expiration_time == \ + pa.scalar(ts).cast(pa.timestamp("ns", "UTC")) + assert info.endpoints[1].app_metadata == b"endpoint app metadata" assert info.endpoints[1].locations[0] == \ flight.Location.for_grpc_tcp('localhost', 5005) @@ -1707,21 +1832,29 @@ def test_roundtrip_types(): flight.FlightEndpoint( b'', [flight.Location.for_grpc_tcp('localhost', 5005)], + pa.scalar("2023-04-05T12:34:56.789012345").cast(pa.timestamp("ns")), + b'endpoint app metadata' ), ], - -1, - -1, + 1, + 42, + True, + b'test app metadata' ) info2 = flight.FlightInfo.deserialize(info.serialize()) assert info.schema == info2.schema assert info.descriptor == info2.descriptor assert info.total_bytes == info2.total_bytes assert info.total_records == info2.total_records + assert info.ordered == info2.ordered + assert info.app_metadata == info2.app_metadata assert info.endpoints == info2.endpoints endpoint = flight.FlightEndpoint( ticket, - ['grpc://test', flight.Location.for_grpc_tcp('localhost', 5005)] + ['grpc://test', flight.Location.for_grpc_tcp('localhost', 5005)], + pa.scalar("2023-04-05T12:34:56").cast(pa.timestamp("s")), + b'endpoint app metadata' ) assert endpoint == flight.FlightEndpoint.deserialize(endpoint.serialize()) @@ -2366,8 +2499,7 @@ def get_flight_info(self, context, descriptor): return flight.FlightInfo( pa.schema([]), descriptor, - [], - -1, -1 + [] ) class HeadersTrailersMiddlewareFactory(ClientMiddlewareFactory): diff --git a/python/pyarrow/tests/test_flight_async.py b/python/pyarrow/tests/test_flight_async.py index f3cd1bbb58e2f..197c78cc07365 100644 --- a/python/pyarrow/tests/test_flight_async.py +++ b/python/pyarrow/tests/test_flight_async.py @@ -29,9 +29,7 @@ class ExampleServer(flight.FlightServerBase): simple_info = flight.FlightInfo( pyarrow.schema([("a", "int32")]), flight.FlightDescriptor.for_command(b"simple"), - [], - -1, - -1, + [] ) def get_flight_info(self, context, descriptor):