diff --git a/.clang-format b/.clang-format
index 9448dc8d8c80d..abd823c103904 100644
--- a/.clang-format
+++ b/.clang-format
@@ -19,3 +19,4 @@ BasedOnStyle: Google
 ColumnLimit: 90
 DerivePointerAlignment: false
 IncludeBlocks: Preserve
+IndentPPDirectives: AfterHash
diff --git a/.dockerignore b/.dockerignore
index 3791cca95e3fe..1f1715d8e833d 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -27,11 +27,11 @@
 # include explicitly
 !ci/**
 !c_glib/Gemfile
-!dev/archery/setup.py
 !dev/release/setup-*.sh
 !docs/requirements*.txt
+!go/go.mod
+!go/go.sum
 !python/requirements*.txt
-!python/manylinux1/**
 !r/DESCRIPTION
 !ruby/Gemfile
 !ruby/red-arrow/Gemfile
@@ -46,20 +46,3 @@
 !ruby/red-parquet/Gemfile
 !ruby/red-parquet/lib/parquet/version.rb
 !ruby/red-parquet/red-parquet.gemspec
-!ruby/red-plasma/Gemfile
-!ruby/red-plasma/lib/plasma/version.rb
-!ruby/red-plasma/red-plasma.gemspec
-!rust/Cargo.toml
-!rust/benchmarks/Cargo.toml
-!rust/arrow/Cargo.toml
-!rust/arrow/benches
-!rust/arrow-flight/Cargo.toml
-!rust/parquet/Cargo.toml
-!rust/parquet/build.rs
-!rust/parquet_derive/Cargo.toml
-!rust/parquet_derive_test/Cargo.toml
-!rust/datafusion/Cargo.toml
-!rust/datafusion/benches
-!rust/integration-testing/Cargo.toml
-!go/go.mod
-!go/go.sum
\ No newline at end of file
diff --git a/.env b/.env
index af647fc8b7a7f..c8c236d5ac44b 100644
--- a/.env
+++ b/.env
@@ -71,6 +71,7 @@ NUMBA=latest
 NUMPY=latest
 PANDAS=latest
 PYTHON=3.8
+PYTHON_IMAGE_TAG=3.8
 R=4.4
 SPARK=master
 TURBODBC=latest
diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml
index b016f7d11b9fa..e448209056d78 100644
--- a/.github/workflows/archery.yml
+++ b/.github/workflows/archery.yml
@@ -20,12 +20,14 @@ name: Archery & Crossbow
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/archery.yml'
       - 'dev/archery/**'
       - 'dev/tasks/**'
       - 'docker-compose.yml'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/archery.yml'
       - 'dev/archery/**'
       - 'dev/tasks/**'
@@ -58,7 +60,7 @@ jobs:
         shell: bash
         run: git branch $ARCHERY_DEFAULT_BRANCH origin/$ARCHERY_DEFAULT_BRANCH || true
       - name: Setup Python
-        uses: actions/setup-python@v5.1.1
+        uses: actions/setup-python@v5.2.0
         with:
           python-version: '3.9'
       - name: Install pygit2 binary wheel
diff --git a/.github/workflows/comment_bot.yml b/.github/workflows/comment_bot.yml
index 1138c0a02f812..b7af4c5800835 100644
--- a/.github/workflows/comment_bot.yml
+++ b/.github/workflows/comment_bot.yml
@@ -41,7 +41,7 @@ jobs:
           # fetch the tags for version number generation
           fetch-depth: 0
       - name: Set up Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Install Archery and Crossbow dependencies
diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index fd23e0cf217e6..4a01d2f8e3aab 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -20,6 +20,7 @@ name: C++
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/cpp.yml'
       - 'ci/conda_env_*'
       - 'ci/docker/**'
@@ -35,6 +36,7 @@ on:
       - 'testing'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/cpp.yml'
       - 'ci/conda_env_*'
       - 'ci/docker/**'
@@ -243,7 +245,7 @@ jobs:
           $(brew --prefix bash)/bin/bash \
             ci/scripts/install_minio.sh latest ${ARROW_HOME}
       - name: Set up Python
-        uses: actions/setup-python@v5.1.1
+        uses: actions/setup-python@v5.2.0
         with:
           python-version: 3.12
       - name: Install Google Cloud Storage Testbench
@@ -409,12 +411,10 @@ jobs:
       ARROW_WITH_SNAPPY: ON
       ARROW_WITH_ZLIB: ON
       ARROW_WITH_ZSTD: ON
-      # Don't use preinstalled Boost by empty BOOST_ROOT and
-      # -DBoost_NO_BOOST_CMAKE=ON
+      # Don't use preinstalled Boost by empty BOOST_ROOT
       BOOST_ROOT: ""
       ARROW_CMAKE_ARGS: >-
         -DARROW_PACKAGE_PREFIX=/${{ matrix.msystem_lower}}
-        -DBoost_NO_BOOST_CMAKE=ON
         -DCMAKE_FIND_PACKAGE_PREFER_CONFIG=ON
       # We can't use unity build because we don't have enough memory on
       # GitHub Actions.
@@ -464,7 +464,7 @@ jobs:
             https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2022-05-26T05-48-41Z
           chmod +x /usr/local/bin/minio.exe
       - name: Set up Python
-        uses: actions/setup-python@v5.1.1
+        uses: actions/setup-python@v5.2.0
         id: python-install
         with:
           python-version: 3.9
@@ -472,7 +472,7 @@ jobs:
         shell: msys2 {0}
         env:
           PIPX_BIN_DIR: /usr/local/bin
-          PIPX_PYTHON: ${{ steps.python-install.outputs.python-path }}
+          PIPX_BASE_PYTHON: ${{ steps.python-install.outputs.python-path }}
         run: |
           ci/scripts/install_gcs_testbench.sh default
       - name: Test
diff --git a/.github/workflows/csharp.yml b/.github/workflows/csharp.yml
index 6e8548dc960f4..c618350affbeb 100644
--- a/.github/workflows/csharp.yml
+++ b/.github/workflows/csharp.yml
@@ -108,7 +108,7 @@ jobs:
         with:
           dotnet-version: ${{ matrix.dotnet }}
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Checkout Arrow
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index cc3ff6330746d..1cc8d993498b6 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -45,7 +45,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Install pre-commit
@@ -104,7 +104,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Install Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: '3.12'
       - name: Install Ruby
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 25db1c39ad89e..1219f7526f9f2 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -52,7 +52,7 @@ jobs:
           key: debian-docs-${{ hashFiles('cpp/**') }}
           restore-keys: debian-docs-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Setup Archery
diff --git a/.github/workflows/docs_light.yml b/.github/workflows/docs_light.yml
index ea7fe5d02d7b8..7d540b7cecdc9 100644
--- a/.github/workflows/docs_light.yml
+++ b/.github/workflows/docs_light.yml
@@ -20,6 +20,7 @@ name: Docs
 on:
   pull_request:
     paths:
+      - '.dockerignore'
       - 'docs/**'
       - '.github/workflows/docs_light.yml'
       - 'ci/docker/conda.dockerfile'
@@ -58,7 +59,7 @@ jobs:
           key: conda-docs-${{ hashFiles('cpp/**') }}
           restore-keys: conda-docs-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Setup Archery
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index b9a19d182d5c4..d463549206471 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -20,6 +20,7 @@ name: Go
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/go.yml'
       - 'ci/docker/*_go.dockerfile'
       - 'ci/scripts/go_*'
@@ -27,6 +28,7 @@ on:
       - 'go/**'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/go.yml'
       - 'ci/docker/*_go.dockerfile'
       - 'ci/docker/**'
@@ -207,7 +209,7 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -247,7 +249,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -339,7 +341,7 @@ jobs:
           github.event_name == 'push' &&
           github.repository == 'apache/arrow' &&
           github.ref_name == 'main'
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: '3.10'
       - name: Run Benchmarks
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 43f8af0a600d8..ecf89bff8f600 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -20,6 +20,7 @@ name: Integration
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/integration.yml'
       - 'ci/**'
       - 'dev/archery/**'
@@ -33,6 +34,7 @@ on:
       - 'format/**'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/integration.yml'
       - 'ci/**'
       - 'dev/archery/**'
@@ -89,7 +91,7 @@ jobs:
           key: conda-${{ hashFiles('cpp/**') }}
           restore-keys: conda-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml
index 0317879b580ba..57f834bcbabee 100644
--- a/.github/workflows/java.yml
+++ b/.github/workflows/java.yml
@@ -20,6 +20,7 @@ name: Java
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/java.yml'
       - 'ci/docker/*java*'
       - 'ci/scripts/java*.sh'
@@ -29,6 +30,7 @@ on:
       - 'java/**'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/java.yml'
       - 'ci/docker/*java*'
       - 'ci/scripts/java*.sh'
@@ -76,7 +78,7 @@ jobs:
           key: maven-${{ hashFiles('java/**') }}
           restore-keys: maven-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
diff --git a/.github/workflows/java_jni.yml b/.github/workflows/java_jni.yml
index c2bc679e681a2..f2ecc801dc724 100644
--- a/.github/workflows/java_jni.yml
+++ b/.github/workflows/java_jni.yml
@@ -20,6 +20,7 @@ name: Java JNI
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/java_jni.yml'
       - 'ci/docker/**'
       - 'ci/scripts/cpp_build.sh'
@@ -29,6 +30,7 @@ on:
       - 'java/**'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/java_jni.yml'
       - 'ci/docker/**'
       - 'ci/scripts/cpp_build.sh'
@@ -70,7 +72,7 @@ jobs:
           key: java-jni-manylinux-2014-${{ hashFiles('cpp/**', 'java/**') }}
           restore-keys: java-jni-manylinux-2014-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -110,7 +112,7 @@ jobs:
           key: maven-${{ hashFiles('java/**') }}
           restore-keys: maven-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
diff --git a/.github/workflows/java_nightly.yml b/.github/workflows/java_nightly.yml
index 72afb6dbf1c1d..0bf0c27288faf 100644
--- a/.github/workflows/java_nightly.yml
+++ b/.github/workflows/java_nightly.yml
@@ -58,7 +58,7 @@ jobs:
           repository: ursacomputing/crossbow
           ref: main
       - name: Set up Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           cache: 'pip'
           python-version: 3.12
diff --git a/.github/workflows/js.yml b/.github/workflows/js.yml
index 630bef61105f6..17b57c42b62f6 100644
--- a/.github/workflows/js.yml
+++ b/.github/workflows/js.yml
@@ -20,12 +20,14 @@ name: NodeJS
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/js.yml'
       - 'ci/docker/*js.dockerfile'
       - 'ci/scripts/js_*'
       - 'js/**'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/js.yml'
       - 'ci/docker/*js.dockerfile'
       - 'ci/scripts/js_*'
@@ -54,7 +56,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
diff --git a/.github/workflows/pr_bot.yml b/.github/workflows/pr_bot.yml
index 7dd06b6aeec09..bbb1a2d7228d0 100644
--- a/.github/workflows/pr_bot.yml
+++ b/.github/workflows/pr_bot.yml
@@ -82,7 +82,7 @@ jobs:
           # fetch the tags for version number generation
           fetch-depth: 0
       - name: Set up Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Install Archery and Crossbow dependencies
diff --git a/.github/workflows/pr_review_trigger.yml b/.github/workflows/pr_review_trigger.yml
index 0cd89b3206715..68f922ce8b4d9 100644
--- a/.github/workflows/pr_review_trigger.yml
+++ b/.github/workflows/pr_review_trigger.yml
@@ -29,7 +29,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: "Upload PR review Payload"
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4.4.0
         with:
           path: "${{ github.event_path }}"
           name: "pr_review_payload"
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 90d3a50af3705..6e83b727593b4 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -20,6 +20,7 @@ name: Python
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/python.yml'
       - 'ci/**'
       - 'cpp/**'
@@ -27,6 +28,7 @@ on:
       - 'python/**'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/python.yml'
       - 'ci/**'
       - 'cpp/**'
@@ -107,7 +109,7 @@ jobs:
           key: ${{ matrix.cache }}-${{ hashFiles('cpp/**') }}
           restore-keys: ${{ matrix.cache }}-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -177,7 +179,7 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Setup Python
-        uses: actions/setup-python@v5.1.1
+        uses: actions/setup-python@v5.2.0
         with:
           python-version: '3.11'
       - name: Install Dependencies
diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index 2820d42470bca..fbc2ebe0bc5f1 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -20,6 +20,7 @@ name: R
 on:
   push:
     paths:
+      - '.dockerignore'
       - ".github/workflows/r.yml"
       - "ci/docker/**"
       - "ci/etc/rprofile"
@@ -32,6 +33,7 @@ on:
       - "r/**"
   pull_request:
     paths:
+      - '.dockerignore'
       - ".github/workflows/r.yml"
       - "ci/docker/**"
       - "ci/etc/rprofile"
@@ -146,7 +148,7 @@ jobs:
             ubuntu-${{ matrix.ubuntu }}-r-${{ matrix.r }}-${{ hashFiles('cpp/src/**/*.cc','cpp/src/**/*.h)') }}-
             ubuntu-${{ matrix.ubuntu }}-r-${{ matrix.r }}-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -169,9 +171,9 @@ jobs:
         if: always()
       - name: Save the test output
         if: always()
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v3.1.2
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         with:
-          name: test-output
+          name: test-output-${{ matrix.ubuntu }}-${{ matrix.r }}
           path: r/check/arrow.Rcheck/tests/testthat.Rout*
       - name: Docker Push
         if: >-
@@ -206,7 +208,7 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -230,9 +232,9 @@ jobs:
         if: always()
       - name: Save the test output
         if: always()
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v3.1.2
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         with:
-          name: test-output
+          name: test-output-bundled
           path: r/check/arrow.Rcheck/tests/testthat.Rout*
       - name: Docker Push
         if: >-
@@ -292,7 +294,7 @@ jobs:
         # So that they're unique when multiple are downloaded in the next step
         shell: bash
         run: mv libarrow.zip libarrow-rtools${{ matrix.config.rtools }}-${{ matrix.config.arch }}.zip
-      - uses: actions/upload-artifact@v3
+      - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # # v4.0.0
         with:
           name: libarrow-rtools${{ matrix.config.rtools }}-${{ matrix.config.arch }}.zip
           path: libarrow-rtools${{ matrix.config.rtools }}-${{ matrix.config.arch }}.zip
@@ -330,7 +332,7 @@ jobs:
           echo "$HOME/.local/bin" >> $GITHUB_PATH
       - run: mkdir r/windows
       - name: Download artifacts
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@v4.1.7
         with:
           name: libarrow-rtools40-ucrt64.zip
           path: r/windows
diff --git a/.github/workflows/r_nightly.yml b/.github/workflows/r_nightly.yml
index 1ec071b6bbb5e..9817e41d3b61d 100644
--- a/.github/workflows/r_nightly.yml
+++ b/.github/workflows/r_nightly.yml
@@ -60,7 +60,7 @@ jobs:
           repository: ursacomputing/crossbow
           ref: main
       - name: Set up Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           cache: 'pip'
           python-version: 3.12
diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml
index e4d650e74a8ad..c4a7f31f4a94c 100644
--- a/.github/workflows/ruby.yml
+++ b/.github/workflows/ruby.yml
@@ -20,6 +20,7 @@ name: C GLib & Ruby
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/ruby.yml'
       - 'ci/docker/**'
       - 'ci/scripts/c_glib_*'
@@ -33,6 +34,7 @@ on:
       - 'ruby/**'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/ruby.yml'
       - 'ci/docker/**'
       - 'ci/scripts/c_glib_*'
@@ -83,7 +85,7 @@ jobs:
           key: ubuntu-${{ matrix.ubuntu }}-ruby-${{ hashFiles('cpp/**') }}
           restore-keys: ubuntu-${{ matrix.ubuntu }}-ruby-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -406,7 +408,10 @@ jobs:
             -source "https://nuget.pkg.github.com/$GITHUB_REPOSITORY_OWNER/index.json"
       - name: Build C++ vcpkg dependencies
         run: |
-          vcpkg\vcpkg.exe install --triplet $env:VCPKG_TRIPLET --x-manifest-root cpp --x-install-root build\cpp\vcpkg_installed
+          vcpkg\vcpkg.exe install `
+            --triplet $env:VCPKG_TRIPLET `
+            --x-manifest-root cpp `
+            --x-install-root build\cpp\vcpkg_installed
       - name: Build C++
         shell: cmd
         run: |
diff --git a/.github/workflows/swift.yml b/.github/workflows/swift.yml
index 1b3c9eca1814a..86eb113dfc833 100644
--- a/.github/workflows/swift.yml
+++ b/.github/workflows/swift.yml
@@ -20,6 +20,7 @@ name: Swift
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/swift.yml'
       - 'ci/docker/*swift*'
       - 'ci/scripts/swift_*'
@@ -27,6 +28,7 @@ on:
       - 'swift/**'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/swift.yml'
       - 'ci/docker/*swift*'
       - 'ci/scripts/swift_*'
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index bf0bcde14622a..91017969eb502 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -78,6 +78,26 @@ repos:
           ?^cpp/src/generated/|
           ?^cpp/thirdparty/|
           )
+  - repo: https://github.com/cpplint/cpplint
+    rev: 1.6.1
+    hooks:
+      - id: cpplint
+        name: C++ Lint
+        args:
+          - "--verbose=2"
+        types_or:
+          - c++
+        files: >-
+          ^cpp/
+        exclude: >-
+          (
+          ?\.grpc\.fb\.(cc|h)$|
+          ?\.pb\.(cc|h)$|
+          ?_generated.*\.(cc|h)$|
+          ?^cpp/src/arrow/vendored/|
+          ?^cpp/src/generated/|
+          ?^cpp/thirdparty/|
+          )
   - repo: https://github.com/pre-commit/mirrors-clang-format
     rev: v14.0.6
     hooks:
diff --git a/CPPLINT.cfg b/CPPLINT.cfg
new file mode 100644
index 0000000000000..2f47b4dbf57b7
--- /dev/null
+++ b/CPPLINT.cfg
@@ -0,0 +1,30 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+filter = -build/c++11
+filter = -build/header_guard
+filter = -build/include_order
+filter = -build/include_what_you_use
+filter = -readability/alt_tokens
+# readability/casting is disabled as it aggressively warns about
+# functions with names like "int32", so "int32(x)", where int32 is a
+# function name, warns with
+filter = -readability/casting
+filter = -readability/todo
+filter = -runtime/references
+filter = -whitespace/comments
+linelength = 90
diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile
index eb035d887a158..f0084894e19dc 100644
--- a/ci/docker/conda-cpp.dockerfile
+++ b/ci/docker/conda-cpp.dockerfile
@@ -44,7 +44,7 @@ RUN mamba install -q -y \
 
 # We want to install the GCS testbench using the Conda base environment's Python,
 # because the test environment's Python may later change.
-ENV PIPX_PYTHON=/opt/conda/bin/python3
+ENV PIPX_BASE_PYTHON=/opt/conda/bin/python3
 COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
 RUN /arrow/ci/scripts/install_gcs_testbench.sh default
 
diff --git a/ci/docker/python-wheel-manylinux-test.dockerfile b/ci/docker/python-wheel-manylinux-test.dockerfile
index 443ff9c53cbcb..09883f9780a36 100644
--- a/ci/docker/python-wheel-manylinux-test.dockerfile
+++ b/ci/docker/python-wheel-manylinux-test.dockerfile
@@ -19,13 +19,19 @@ ARG arch
 ARG python_image_tag
 FROM ${arch}/python:${python_image_tag}
 
-# RUN pip install --upgrade pip
-
 # pandas doesn't provide wheel for aarch64 yet, so cache the compiled
 # test dependencies in a docker image
 COPY python/requirements-wheel-test.txt /arrow/python/
 RUN pip install -r /arrow/python/requirements-wheel-test.txt
 
+# Install the GCS testbench with the system Python
+RUN apt-get update -y -q && \
+    apt-get install -y -q \
+        build-essential \
+        python3-dev && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists*
+
 COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
-ARG python
-RUN PYTHON_VERSION=${python} /arrow/ci/scripts/install_gcs_testbench.sh default
+ENV PIPX_PYTHON=/usr/bin/python3 PIPX_PIP_ARGS=--prefer-binary
+RUN /arrow/ci/scripts/install_gcs_testbench.sh default
diff --git a/ci/docker/python-wheel-manylinux.dockerfile b/ci/docker/python-wheel-manylinux.dockerfile
index 42f088fd8a22a..5cc1711608c03 100644
--- a/ci/docker/python-wheel-manylinux.dockerfile
+++ b/ci/docker/python-wheel-manylinux.dockerfile
@@ -100,6 +100,9 @@ RUN vcpkg install \
         --x-feature=parquet \
         --x-feature=s3
 
+# Make sure auditwheel is up-to-date
+RUN pipx upgrade auditwheel
+
 # Configure Python for applications running in the bash shell of this Dockerfile
 ARG python=3.8
 ENV PYTHON_VERSION=${python}
diff --git a/ci/scripts/install_gcs_testbench.sh b/ci/scripts/install_gcs_testbench.sh
index 78826e94d3294..48a5858a358c9 100755
--- a/ci/scripts/install_gcs_testbench.sh
+++ b/ci/scripts/install_gcs_testbench.sh
@@ -39,18 +39,21 @@ if [[ "${version}" -eq "default" ]]; then
   version="v0.39.0"
 fi
 
-: ${PIPX_PYTHON:=$(which python3)}
+# The Python to install pipx with
+: ${PIPX_BASE_PYTHON:=$(which python3)}
+# The Python to install the GCS testbench with
+: ${PIPX_PYTHON:=${PIPX_BASE_PYTHON:-$(which python3)}}
 
 export PIP_BREAK_SYSTEM_PACKAGES=1
-${PIPX_PYTHON} -m pip install -U pipx
+${PIPX_BASE_PYTHON} -m pip install -U pipx
 
-# This script is run with PYTHON undefined in some places,
-# but those only use older pythons.
-if [[ -z "${PYTHON_VERSION}" ]] || [[ "${PYTHON_VERSION}" != "3.13" ]]; then
-  pipx_flags=--verbose
-  if [[ $(id -un) == "root" ]]; then
-    # Install globally as /root/.local/bin is typically not in $PATH
-    pipx_flags="${pipx_flags} --global"
-  fi
-  ${PIPX_PYTHON} -m pipx install ${pipx_flags} "https://github.com/googleapis/storage-testbench/archive/${version}.tar.gz"
+pipx_flags=(--verbose --python ${PIPX_PYTHON})
+if [[ $(id -un) == "root" ]]; then
+  # Install globally as /root/.local/bin is typically not in $PATH
+  pipx_flags+=(--global)
 fi
+if [[ -n "${PIPX_PIP_ARGS}" ]]; then
+  pipx_flags+=(--pip-args "'${PIPX_PIP_ARGS}'")
+fi
+${PIPX_BASE_PYTHON} -m pipx install ${pipx_flags[@]} \
+  "https://github.com/googleapis/storage-testbench/archive/${version}.tar.gz"
diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh
index 92b962f1740bd..d2c392e6b9db3 100755
--- a/ci/scripts/python_wheel_macos_build.sh
+++ b/ci/scripts/python_wheel_macos_build.sh
@@ -150,7 +150,6 @@ echo "=== (${PYTHON_VERSION}) Building wheel ==="
 export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE}
 export PYARROW_BUNDLE_ARROW_CPP=1
 export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR}
-export PYARROW_INSTALL_TESTS=1
 export PYARROW_WITH_ACERO=${ARROW_ACERO}
 export PYARROW_WITH_AZURE=${ARROW_AZURE}
 export PYARROW_WITH_DATASET=${ARROW_DATASET}
diff --git a/ci/scripts/python_wheel_manylinux_build.sh b/ci/scripts/python_wheel_manylinux_build.sh
index aa86494a9d47d..885019ff3049f 100755
--- a/ci/scripts/python_wheel_manylinux_build.sh
+++ b/ci/scripts/python_wheel_manylinux_build.sh
@@ -140,7 +140,6 @@ echo "=== (${PYTHON_VERSION}) Building wheel ==="
 export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE}
 export PYARROW_BUNDLE_ARROW_CPP=1
 export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR}
-export PYARROW_INSTALL_TESTS=1
 export PYARROW_WITH_ACERO=${ARROW_ACERO}
 export PYARROW_WITH_AZURE=${ARROW_AZURE}
 export PYARROW_WITH_DATASET=${ARROW_DATASET}
@@ -181,5 +180,5 @@ popd
 rm -rf dist/temp-fix-wheel
 
 echo "=== (${PYTHON_VERSION}) Tag the wheel with manylinux${MANYLINUX_VERSION} ==="
-auditwheel repair -L . dist/pyarrow-*.whl -w repaired_wheels
+auditwheel repair dist/pyarrow-*.whl -w repaired_wheels
 popd
diff --git a/ci/scripts/python_wheel_unix_test.sh b/ci/scripts/python_wheel_unix_test.sh
index cf87a17056783..6bdc3d3621e14 100755
--- a/ci/scripts/python_wheel_unix_test.sh
+++ b/ci/scripts/python_wheel_unix_test.sh
@@ -34,6 +34,7 @@ source_dir=${1}
 : ${ARROW_S3:=ON}
 : ${ARROW_SUBSTRAIT:=ON}
 : ${CHECK_IMPORTS:=ON}
+: ${CHECK_WHEEL_CONTENT:=ON}
 : ${CHECK_UNITTESTS:=ON}
 : ${INSTALL_PYARROW:=ON}
 
@@ -87,6 +88,11 @@ import pyarrow.parquet
   fi
 fi
 
+if [ "${CHECK_WHEEL_CONTENT}" == "ON" ]; then
+  python ${source_dir}/ci/scripts/python_wheel_validate_contents.py \
+    --path ${source_dir}/python/repaired_wheels
+fi
+
 if [ "${CHECK_UNITTESTS}" == "ON" ]; then
   # Install testing dependencies
   pip install -U -r ${source_dir}/python/requirements-wheel-test.txt
diff --git a/ci/scripts/python_wheel_validate_contents.py b/ci/scripts/python_wheel_validate_contents.py
new file mode 100644
index 0000000000000..22b3a890f036b
--- /dev/null
+++ b/ci/scripts/python_wheel_validate_contents.py
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import argparse
+from pathlib import Path
+import re
+import zipfile
+
+
+def validate_wheel(path):
+    p = Path(path)
+    wheels = list(p.glob('*.whl'))
+    error_msg = f"{len(wheels)} wheels found but only 1 expected ({wheels})"
+    assert len(wheels) == 1, error_msg
+    f = zipfile.ZipFile(wheels[0])
+    outliers = [
+        info.filename for info in f.filelist if not re.match(
+            r'(pyarrow/|pyarrow-[-.\w\d]+\.dist-info/)', info.filename
+        )
+    ]
+    assert not outliers, f"Unexpected contents in wheel: {sorted(outliers)}"
+    print(f"The wheel: {wheels[0]} seems valid.")
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--path", type=str, required=True,
+                        help="Directory where wheel is located")
+    args = parser.parse_args()
+    validate_wheel(args.path)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat
index 54f02ec6f6ed0..1f1d5dca721d9 100644
--- a/ci/scripts/python_wheel_windows_build.bat
+++ b/ci/scripts/python_wheel_windows_build.bat
@@ -106,7 +106,6 @@ echo "=== (%PYTHON_VERSION%) Building wheel ==="
 set PYARROW_BUILD_TYPE=%CMAKE_BUILD_TYPE%
 set PYARROW_BUNDLE_ARROW_CPP=ON
 set PYARROW_CMAKE_GENERATOR=%CMAKE_GENERATOR%
-set PYARROW_INSTALL_TESTS=ON
 set PYARROW_WITH_ACERO=%ARROW_ACERO%
 set PYARROW_WITH_DATASET=%ARROW_DATASET%
 set PYARROW_WITH_FLIGHT=%ARROW_FLIGHT%
diff --git a/ci/scripts/python_wheel_windows_test.bat b/ci/scripts/python_wheel_windows_test.bat
index cac3f18434b6c..de5a2c2e965cb 100755
--- a/ci/scripts/python_wheel_windows_test.bat
+++ b/ci/scripts/python_wheel_windows_test.bat
@@ -64,6 +64,9 @@ set PYTHON_CMD=py -%PYTHON%
 %PYTHON_CMD% -c "import pyarrow.parquet" || exit /B 1
 %PYTHON_CMD% -c "import pyarrow.substrait" || exit /B 1
 
+@REM Validate wheel contents
+%PYTHON_CMD% C:\arrow\ci\scripts\python_wheel_validate_contents.py --path C:\arrow\python\dist || exit /B 1
+
 @rem Download IANA Timezone Database for ORC C++
 curl https://cygwin.osuosl.org/noarch/release/tzdata/tzdata-2024a-1.tar.xz --output tzdata.tar.xz || exit /B
 mkdir %USERPROFILE%\Downloads\test\tzdata
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 5ead9e4b063cd..423744c388471 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -301,7 +301,8 @@ add_custom_target(lint
                   --cpplint_binary
                   ${CPPLINT_BIN}
                   ${COMMON_LINT_OPTIONS}
-                  ${ARROW_LINT_QUIET})
+                  ${ARROW_LINT_QUIET}
+                  WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..)
 
 #
 # "make format" and "make check-format" targets
diff --git a/cpp/build-support/run_cpplint.py b/cpp/build-support/run_cpplint.py
index 76c0fe0aefaca..a81acf2eb2ff9 100755
--- a/cpp/build-support/run_cpplint.py
+++ b/cpp/build-support/run_cpplint.py
@@ -26,24 +26,6 @@
 from functools import partial
 
 
-# NOTE(wesm):
-#
-# * readability/casting is disabled as it aggressively warns about functions
-#   with names like "int32", so "int32(x)", where int32 is a function name,
-#   warns with
-_filters = '''
--whitespace/comments
--readability/casting
--readability/todo
--readability/alt_tokens
--build/header_guard
--build/c++11
--build/include_what_you_use
--runtime/references
--build/include_order
-'''.split()
-
-
 def _get_chunk_key(filenames):
     # lists are not hashable so key on the first filename in a chunk
     return filenames[0]
@@ -87,8 +69,6 @@ def _check_some_files(completed_processes, filenames):
     cmd = [
         arguments.cpplint_binary,
         '--verbose=2',
-        '--linelength=90',
-        '--filter=' + ','.join(_filters)
     ]
     if (arguments.cpplint_binary.endswith('.py') and
             platform.system() == 'Windows'):
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 63e2c036c9a6f..b31037a973279 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -259,7 +259,7 @@ macro(resolve_dependency DEPENDENCY_NAME)
       IS_RUNTIME_DEPENDENCY
       REQUIRED_VERSION
       USE_CONFIG)
-  set(multi_value_args COMPONENTS PC_PACKAGE_NAMES)
+  set(multi_value_args COMPONENTS OPTIONAL_COMPONENTS PC_PACKAGE_NAMES)
   cmake_parse_arguments(ARG
                         "${options}"
                         "${one_value_args}"
@@ -287,6 +287,9 @@ macro(resolve_dependency DEPENDENCY_NAME)
   if(ARG_COMPONENTS)
     list(APPEND FIND_PACKAGE_ARGUMENTS COMPONENTS ${ARG_COMPONENTS})
   endif()
+  if(ARG_OPTIONAL_COMPONENTS)
+    list(APPEND FIND_PACKAGE_ARGUMENTS OPTIONAL_COMPONENTS ${ARG_OPTIONAL_COMPONENTS})
+  endif()
   if(${DEPENDENCY_NAME}_SOURCE STREQUAL "AUTO")
     find_package(${FIND_PACKAGE_ARGUMENTS})
     set(COMPATIBLE ${${PACKAGE_NAME}_FOUND})
@@ -1289,15 +1292,19 @@ if(ARROW_USE_BOOST)
     set(Boost_USE_STATIC_LIBS ON)
   endif()
   if(ARROW_BOOST_REQUIRE_LIBRARY)
-    set(ARROW_BOOST_COMPONENTS system filesystem)
+    set(ARROW_BOOST_COMPONENTS filesystem system)
+    set(ARROW_BOOST_OPTIONAL_COMPONENTS process)
   else()
     set(ARROW_BOOST_COMPONENTS)
+    set(ARROW_BOOST_OPTIONAL_COMPONENTS)
   endif()
   resolve_dependency(Boost
                      REQUIRED_VERSION
                      ${ARROW_BOOST_REQUIRED_VERSION}
                      COMPONENTS
                      ${ARROW_BOOST_COMPONENTS}
+                     OPTIONAL_COMPONENTS
+                     ${ARROW_BOOST_OPTIONAL_COMPONENTS}
                      IS_RUNTIME_DEPENDENCY
                      # libarrow.so doesn't depend on libboost*.
                      FALSE)
@@ -1316,14 +1323,35 @@ if(ARROW_USE_BOOST)
     endif()
   endforeach()
 
-  if(WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-    # boost/process/detail/windows/handle_workaround.hpp doesn't work
-    # without BOOST_USE_WINDOWS_H with MinGW because MinGW doesn't
-    # provide __kernel_entry without winternl.h.
-    #
-    # See also:
-    # https://github.com/boostorg/process/blob/develop/include/boost/process/detail/windows/handle_workaround.hpp
-    target_compile_definitions(Boost::headers INTERFACE "BOOST_USE_WINDOWS_H=1")
+  if(TARGET Boost::process)
+    # Boost >= 1.86
+    target_compile_definitions(Boost::process INTERFACE "BOOST_PROCESS_HAVE_V1")
+    target_compile_definitions(Boost::process INTERFACE "BOOST_PROCESS_HAVE_V2")
+  else()
+    # Boost < 1.86
+    add_library(Boost::process INTERFACE IMPORTED)
+    if(TARGET Boost::filesystem)
+      target_link_libraries(Boost::process INTERFACE Boost::filesystem)
+    endif()
+    if(TARGET Boost::system)
+      target_link_libraries(Boost::process INTERFACE Boost::system)
+    endif()
+    if(TARGET Boost::headers)
+      target_link_libraries(Boost::process INTERFACE Boost::headers)
+    endif()
+    if(Boost_VERSION VERSION_GREATER_EQUAL 1.80)
+      target_compile_definitions(Boost::process INTERFACE "BOOST_PROCESS_HAVE_V2")
+      # Boost < 1.86 has a bug that
+      # boost::process::v2::process_environment::on_setup() isn't
+      # defined. We need to build Boost Process source to define it.
+      #
+      # See also:
+      # https://github.com/boostorg/process/issues/312
+      target_compile_definitions(Boost::process INTERFACE "BOOST_PROCESS_NEED_SOURCE")
+      if(WIN32)
+        target_link_libraries(Boost::process INTERFACE bcrypt ntdll)
+      endif()
+    endif()
   endif()
 
   message(STATUS "Boost include dir: ${Boost_INCLUDE_DIRS}")
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 65343df1291ba..01ac813f4713b 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -644,9 +644,13 @@ else()
 endif()
 
 set(ARROW_TESTING_SHARED_LINK_LIBS arrow_shared ${ARROW_GTEST_GTEST})
-set(ARROW_TESTING_SHARED_PRIVATE_LINK_LIBS arrow::flatbuffers RapidJSON)
-set(ARROW_TESTING_STATIC_LINK_LIBS arrow::flatbuffers RapidJSON arrow_static
-                                   ${ARROW_GTEST_GTEST})
+set(ARROW_TESTING_SHARED_PRIVATE_LINK_LIBS arrow::flatbuffers RapidJSON Boost::process)
+set(ARROW_TESTING_STATIC_LINK_LIBS
+    arrow::flatbuffers
+    RapidJSON
+    Boost::process
+    arrow_static
+    ${ARROW_GTEST_GTEST})
 set(ARROW_TESTING_SHARED_INSTALL_INTERFACE_LIBS Arrow::arrow_shared)
 set(ARROW_TESTING_STATIC_INSTALL_INTERFACE_LIBS Arrow::arrow_static)
 # that depend on gtest
@@ -667,9 +671,10 @@ set(ARROW_TESTING_SRCS
     io/test_common.cc
     ipc/test_common.cc
     testing/fixed_width_test_util.cc
+    testing/generator.cc
     testing/gtest_util.cc
+    testing/process.cc
     testing/random.cc
-    testing/generator.cc
     testing/util.cc)
 
 #
diff --git a/cpp/src/arrow/acero/aggregate_benchmark.cc b/cpp/src/arrow/acero/aggregate_benchmark.cc
index 854862e3e48ca..c0dfba66336af 100644
--- a/cpp/src/arrow/acero/aggregate_benchmark.cc
+++ b/cpp/src/arrow/acero/aggregate_benchmark.cc
@@ -165,11 +165,11 @@ struct SumSentinelUnrolled : public Summer<T> {
   static void Sum(const ArrayType& array, SumState<T>* state) {
     SumState<T> local;
 
-#define SUM_NOT_NULL(ITEM)                                                  \
-  do {                                                                      \
-    local.total += values[i + ITEM] * Traits<T>::NotNull(values[i + ITEM]); \
-    local.valid_count++;                                                    \
-  } while (0)
+#  define SUM_NOT_NULL(ITEM)                                                  \
+    do {                                                                      \
+      local.total += values[i + ITEM] * Traits<T>::NotNull(values[i + ITEM]); \
+      local.valid_count++;                                                    \
+    } while (0)
 
     const auto values = array.raw_values();
     const auto length = array.length();
@@ -185,7 +185,7 @@ struct SumSentinelUnrolled : public Summer<T> {
       SUM_NOT_NULL(7);
     }
 
-#undef SUM_NOT_NULL
+#  undef SUM_NOT_NULL
 
     for (int64_t i = length_rounded * 8; i < length; ++i) {
       local.total += values[i] * Traits<T>::NotNull(values[i]);
@@ -256,7 +256,7 @@ struct SumBitmapVectorizeUnroll : public Summer<T> {
     for (int64_t i = 0; i < length_rounded; i += 8) {
       const uint8_t valid_byte = bitmap[i / 8];
 
-#define SUM_SHIFT(ITEM) (values[i + ITEM] * ((valid_byte >> ITEM) & 1))
+#  define SUM_SHIFT(ITEM) (values[i + ITEM] * ((valid_byte >> ITEM) & 1))
 
       if (valid_byte < 0xFF) {
         // Some nulls
@@ -277,7 +277,7 @@ struct SumBitmapVectorizeUnroll : public Summer<T> {
       }
     }
 
-#undef SUM_SHIFT
+#  undef SUM_SHIFT
 
     for (int64_t i = length_rounded; i < length; ++i) {
       if (bit_util::GetBit(bitmap, i)) {
diff --git a/cpp/src/arrow/acero/aggregate_node_test.cc b/cpp/src/arrow/acero/aggregate_node_test.cc
index d398fb24b73d5..c623271db9fb4 100644
--- a/cpp/src/arrow/acero/aggregate_node_test.cc
+++ b/cpp/src/arrow/acero/aggregate_node_test.cc
@@ -210,5 +210,57 @@ TEST(GroupByNode, NoSkipNulls) {
   AssertExecBatchesEqualIgnoringOrder(out_schema, {expected_batch}, out_batches.batches);
 }
 
+TEST(ScalarAggregateNode, AnyAll) {
+  // GH-43768: boolean_any and boolean_all with constant input should work well
+  // when min_count != 0.
+  std::shared_ptr<Schema> in_schema = schema({field("not_used", int32())});
+  std::shared_ptr<Schema> out_schema = schema({field("agg_out", boolean())});
+  struct AnyAllCase {
+    std::string batches_json;
+    Expression literal;
+    std::string expected_json;
+    bool skip_nulls = false;
+    uint32_t min_count = 2;
+  };
+  std::vector<AnyAllCase> cases{
+      {"[[42], [42], [42], [42]]", literal(true), "[[true]]"},
+      {"[[42], [42], [42], [42]]", literal(false), "[[false]]"},
+      {"[[42], [42], [42], [42]]", literal(BooleanScalar{}), "[[null]]"},
+      {"[[42]]", literal(true), "[[null]]"},
+      {"[[42], [42], [42]]", literal(true), "[[true]]"},
+      {"[[42], [42], [42]]", literal(true), "[[null]]", /*skip_nulls=*/false,
+       /*min_count=*/4},
+      {"[[42], [42], [42], [42]]", literal(BooleanScalar{}), "[[null]]",
+       /*skip_nulls=*/true},
+  };
+  for (const AnyAllCase& any_all_case : cases) {
+    for (auto func_name : {"any", "all"}) {
+      std::vector<ExecBatch> batches{
+          ExecBatchFromJSON({int32()}, any_all_case.batches_json)};
+      std::vector<Aggregate> aggregates = {
+          Aggregate(func_name,
+                    std::make_shared<compute::ScalarAggregateOptions>(
+                        /*skip_nulls=*/any_all_case.skip_nulls,
+                        /*min_count=*/any_all_case.min_count),
+                    FieldRef("literal"))};
+
+      // And a projection to make the input including a Scalar Boolean
+      Declaration plan = Declaration::Sequence(
+          {{"exec_batch_source", ExecBatchSourceNodeOptions(in_schema, batches)},
+           {"project", ProjectNodeOptions({any_all_case.literal}, {"literal"})},
+           {"aggregate", AggregateNodeOptions(aggregates)}});
+
+      ASSERT_OK_AND_ASSIGN(BatchesWithCommonSchema out_batches,
+                           DeclarationToExecBatches(plan));
+
+      ExecBatch expected_batch =
+          ExecBatchFromJSON({boolean()}, any_all_case.expected_json);
+
+      AssertExecBatchesEqualIgnoringOrder(out_schema, {expected_batch},
+                                          out_batches.batches);
+    }
+  }
+}
+
 }  // namespace acero
 }  // namespace arrow
diff --git a/cpp/src/arrow/acero/asof_join_node.cc b/cpp/src/arrow/acero/asof_join_node.cc
index 2248362241cd7..c4f11d01f3d5c 100644
--- a/cpp/src/arrow/acero/asof_join_node.cc
+++ b/cpp/src/arrow/acero/asof_join_node.cc
@@ -34,7 +34,7 @@
 #include "arrow/acero/options.h"
 #include "arrow/acero/unmaterialized_table_internal.h"
 #ifndef NDEBUG
-#include "arrow/acero/options_internal.h"
+#  include "arrow/acero/options_internal.h"
 #endif
 #include "arrow/acero/query_context.h"
 #include "arrow/acero/schema_util.h"
@@ -42,7 +42,7 @@
 #include "arrow/array/builder_binary.h"
 #include "arrow/array/builder_primitive.h"
 #ifndef NDEBUG
-#include "arrow/compute/function_internal.h"
+#  include "arrow/compute/function_internal.h"
 #endif
 #include "arrow/acero/time_series_util.h"
 #include "arrow/compute/key_hash_internal.h"
@@ -207,16 +207,16 @@ class DebugSync {
   std::unique_lock<std::mutex> debug_lock_;
 };
 
-#define DEBUG_SYNC(node, ...) DebugSync(node).insert(__VA_ARGS__)
-#define DEBUG_MANIP(manip) \
-  DebugSync::Manip([](DebugSync& d) -> DebugSync& { return d << manip; })
-#define NDEBUG_EXPLICIT
-#define DEBUG_ADD(ndebug, ...) ndebug, __VA_ARGS__
+#  define DEBUG_SYNC(node, ...) DebugSync(node).insert(__VA_ARGS__)
+#  define DEBUG_MANIP(manip) \
+    DebugSync::Manip([](DebugSync& d) -> DebugSync& { return d << manip; })
+#  define NDEBUG_EXPLICIT
+#  define DEBUG_ADD(ndebug, ...) ndebug, __VA_ARGS__
 #else
-#define DEBUG_SYNC(...)
-#define DEBUG_MANIP(...)
-#define NDEBUG_EXPLICIT explicit
-#define DEBUG_ADD(ndebug, ...) ndebug
+#  define DEBUG_SYNC(...)
+#  define DEBUG_MANIP(...)
+#  define NDEBUG_EXPLICIT explicit
+#  define DEBUG_ADD(ndebug, ...) ndebug
 #endif
 
 struct MemoStore {
diff --git a/cpp/src/arrow/acero/asof_join_node_test.cc b/cpp/src/arrow/acero/asof_join_node_test.cc
index 555f580028fac..5d3e9fba08bbf 100644
--- a/cpp/src/arrow/acero/asof_join_node_test.cc
+++ b/cpp/src/arrow/acero/asof_join_node_test.cc
@@ -26,13 +26,13 @@
 #include "arrow/acero/exec_plan.h"
 #include "arrow/testing/future_util.h"
 #ifndef NDEBUG
-#include <sstream>
+#  include <sstream>
 #endif
 #include <unordered_set>
 
 #include "arrow/acero/options.h"
 #ifndef NDEBUG
-#include "arrow/acero/options_internal.h"
+#  include "arrow/acero/options_internal.h"
 #endif
 #include "arrow/acero/map_node.h"
 #include "arrow/acero/query_context.h"
diff --git a/cpp/src/arrow/acero/bloom_filter.h b/cpp/src/arrow/acero/bloom_filter.h
index 50d07bfd948e0..530beaea64827 100644
--- a/cpp/src/arrow/acero/bloom_filter.h
+++ b/cpp/src/arrow/acero/bloom_filter.h
@@ -18,7 +18,7 @@
 #pragma once
 
 #if defined(ARROW_HAVE_RUNTIME_AVX2)
-#include <immintrin.h>
+#  include <immintrin.h>
 #endif
 
 #include <atomic>
diff --git a/cpp/src/arrow/acero/bloom_filter_test.cc b/cpp/src/arrow/acero/bloom_filter_test.cc
index a2d6e9575a1aa..30cafd120caea 100644
--- a/cpp/src/arrow/acero/bloom_filter_test.cc
+++ b/cpp/src/arrow/acero/bloom_filter_test.cc
@@ -503,9 +503,9 @@ TEST(BloomFilter, Scaling) {
   num_build.push_back(4000000);
 
   std::vector<BloomFilterBuildStrategy> strategies;
-#ifdef ARROW_ENABLE_THREADING
+#  ifdef ARROW_ENABLE_THREADING
   strategies.push_back(BloomFilterBuildStrategy::PARALLEL);
-#endif
+#  endif
   strategies.push_back(BloomFilterBuildStrategy::SINGLE_THREADED);
 
   for (const auto hardware_flags : HardwareFlagsForTesting()) {
diff --git a/cpp/src/arrow/acero/options_internal.h b/cpp/src/arrow/acero/options_internal.h
index d4bf79a7cd008..fd3ea78116572 100644
--- a/cpp/src/arrow/acero/options_internal.h
+++ b/cpp/src/arrow/acero/options_internal.h
@@ -18,8 +18,8 @@
 #pragma once
 
 #ifndef NDEBUG
-#include <mutex>
-#include <ostream>
+#  include <mutex>
+#  include <ostream>
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/acero/visibility.h b/cpp/src/arrow/acero/visibility.h
index 02382232b69dd..21a697a56eca9 100644
--- a/cpp/src/arrow/acero/visibility.h
+++ b/cpp/src/arrow/acero/visibility.h
@@ -20,31 +20,31 @@
 #pragma once
 
 #if defined(_WIN32) || defined(__CYGWIN__)
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(push)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#ifdef ARROW_ACERO_STATIC
-#define ARROW_ACERO_EXPORT
-#elif defined(ARROW_ACERO_EXPORTING)
-#define ARROW_ACERO_EXPORT __declspec(dllexport)
-#else
-#define ARROW_ACERO_EXPORT __declspec(dllimport)
-#endif
+#  ifdef ARROW_ACERO_STATIC
+#    define ARROW_ACERO_EXPORT
+#  elif defined(ARROW_ACERO_EXPORTING)
+#    define ARROW_ACERO_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_ACERO_EXPORT __declspec(dllimport)
+#  endif
 
-#define ARROW_ACERO_NO_EXPORT
+#  define ARROW_ACERO_NO_EXPORT
 #else  // Not Windows
-#ifndef ARROW_ACERO_EXPORT
-#define ARROW_ACERO_EXPORT __attribute__((visibility("default")))
-#endif
-#ifndef ARROW_ACERO_NO_EXPORT
-#define ARROW_ACERO_NO_EXPORT __attribute__((visibility("hidden")))
-#endif
+#  ifndef ARROW_ACERO_EXPORT
+#    define ARROW_ACERO_EXPORT __attribute__((visibility("default")))
+#  endif
+#  ifndef ARROW_ACERO_NO_EXPORT
+#    define ARROW_ACERO_NO_EXPORT __attribute__((visibility("hidden")))
+#  endif
 #endif  // Not-Windows
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
diff --git a/cpp/src/arrow/adapters/orc/adapter.cc b/cpp/src/arrow/adapters/orc/adapter.cc
index 25759f8471365..d16b6cfd2e97d 100644
--- a/cpp/src/arrow/adapters/orc/adapter.cc
+++ b/cpp/src/arrow/adapters/orc/adapter.cc
@@ -25,7 +25,7 @@
 #include <vector>
 
 #ifdef ARROW_ORC_NEED_TIME_ZONE_DATABASE_CHECK
-#include <filesystem>
+#  include <filesystem>
 #endif
 
 #include "arrow/adapters/orc/util.h"
diff --git a/cpp/src/arrow/array/array_base.h b/cpp/src/arrow/array/array_base.h
index 716ae0722069e..e4af67d7e5f0b 100644
--- a/cpp/src/arrow/array/array_base.h
+++ b/cpp/src/arrow/array/array_base.h
@@ -232,6 +232,14 @@ class ARROW_EXPORT Array {
   /// \return DeviceAllocationType
   DeviceAllocationType device_type() const { return data_->device_type(); }
 
+  /// \brief Return the statistics of this Array
+  ///
+  /// This just delegates to calling statistics on the underlying ArrayData
+  /// object which backs this Array.
+  ///
+  /// \return const ArrayStatistics&
+  std::shared_ptr<ArrayStatistics> statistics() const { return data_->statistics; }
+
  protected:
   Array() = default;
   ARROW_DEFAULT_MOVE_AND_ASSIGN(Array);
diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc
index 32806d9d2edb3..73e0c692432b6 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -3709,6 +3709,132 @@ TEST(TestSwapEndianArrayData, InvalidLength) {
   }
 }
 
+class TestArrayDataStatistics : public ::testing::Test {
+ public:
+  void SetUp() {
+    valids_ = {1, 0, 1, 1};
+    null_count_ = std::count(valids_.begin(), valids_.end(), 0);
+    null_buffer_ = *internal::BytesToBits(valids_);
+    values_ = {1, 0, 3, -4};
+    min_ = *std::min_element(values_.begin(), values_.end());
+    max_ = *std::max_element(values_.begin(), values_.end());
+    values_buffer_ = Buffer::FromVector(values_);
+    data_ = ArrayData::Make(int32(), values_.size(), {null_buffer_, values_buffer_},
+                            null_count_);
+    data_->statistics = std::make_shared<ArrayStatistics>();
+    data_->statistics->null_count = null_count_;
+    data_->statistics->min = min_;
+    data_->statistics->is_min_exact = true;
+    data_->statistics->max = max_;
+    data_->statistics->is_max_exact = true;
+  }
+
+ protected:
+  std::vector<uint8_t> valids_;
+  size_t null_count_;
+  std::shared_ptr<Buffer> null_buffer_;
+  std::vector<int32_t> values_;
+  int64_t min_;
+  int64_t max_;
+  std::shared_ptr<Buffer> values_buffer_;
+  std::shared_ptr<ArrayData> data_;
+};
+
+TEST_F(TestArrayDataStatistics, MoveConstructor) {
+  ArrayData copied_data(*data_);
+  ArrayData moved_data(std::move(copied_data));
+
+  ASSERT_TRUE(moved_data.statistics->null_count.has_value());
+  ASSERT_EQ(null_count_, moved_data.statistics->null_count.value());
+
+  ASSERT_TRUE(moved_data.statistics->min.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(moved_data.statistics->min.value()));
+  ASSERT_EQ(min_, std::get<int64_t>(moved_data.statistics->min.value()));
+  ASSERT_TRUE(moved_data.statistics->is_min_exact);
+
+  ASSERT_TRUE(moved_data.statistics->max.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(moved_data.statistics->max.value()));
+  ASSERT_EQ(max_, std::get<int64_t>(moved_data.statistics->max.value()));
+  ASSERT_TRUE(moved_data.statistics->is_max_exact);
+}
+
+TEST_F(TestArrayDataStatistics, CopyConstructor) {
+  ArrayData copied_data(*data_);
+
+  ASSERT_TRUE(copied_data.statistics->null_count.has_value());
+  ASSERT_EQ(null_count_, copied_data.statistics->null_count.value());
+
+  ASSERT_TRUE(copied_data.statistics->min.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data.statistics->min.value()));
+  ASSERT_EQ(min_, std::get<int64_t>(copied_data.statistics->min.value()));
+  ASSERT_TRUE(copied_data.statistics->is_min_exact);
+
+  ASSERT_TRUE(copied_data.statistics->max.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data.statistics->max.value()));
+  ASSERT_EQ(max_, std::get<int64_t>(copied_data.statistics->max.value()));
+  ASSERT_TRUE(copied_data.statistics->is_max_exact);
+}
+
+TEST_F(TestArrayDataStatistics, MoveAssignment) {
+  ArrayData copied_data(*data_);
+  ArrayData moved_data;
+  moved_data = std::move(copied_data);
+
+  ASSERT_TRUE(moved_data.statistics->null_count.has_value());
+  ASSERT_EQ(null_count_, moved_data.statistics->null_count.value());
+
+  ASSERT_TRUE(moved_data.statistics->min.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(moved_data.statistics->min.value()));
+  ASSERT_EQ(min_, std::get<int64_t>(moved_data.statistics->min.value()));
+  ASSERT_TRUE(moved_data.statistics->is_min_exact);
+
+  ASSERT_TRUE(moved_data.statistics->max.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(moved_data.statistics->max.value()));
+  ASSERT_EQ(max_, std::get<int64_t>(moved_data.statistics->max.value()));
+  ASSERT_TRUE(moved_data.statistics->is_max_exact);
+}
+
+TEST_F(TestArrayDataStatistics, CopyAssignment) {
+  ArrayData copied_data;
+  copied_data = *data_;
+
+  ASSERT_TRUE(copied_data.statistics->null_count.has_value());
+  ASSERT_EQ(null_count_, copied_data.statistics->null_count.value());
+
+  ASSERT_TRUE(copied_data.statistics->min.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data.statistics->min.value()));
+  ASSERT_EQ(min_, std::get<int64_t>(copied_data.statistics->min.value()));
+  ASSERT_TRUE(copied_data.statistics->is_min_exact);
+
+  ASSERT_TRUE(copied_data.statistics->max.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data.statistics->max.value()));
+  ASSERT_EQ(max_, std::get<int64_t>(copied_data.statistics->max.value()));
+  ASSERT_TRUE(copied_data.statistics->is_max_exact);
+}
+
+TEST_F(TestArrayDataStatistics, CopyTo) {
+  ASSERT_OK_AND_ASSIGN(auto copied_data,
+                       data_->CopyTo(arrow::default_cpu_memory_manager()));
+
+  ASSERT_TRUE(copied_data->statistics->null_count.has_value());
+  ASSERT_EQ(null_count_, copied_data->statistics->null_count.value());
+
+  ASSERT_TRUE(copied_data->statistics->min.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data->statistics->min.value()));
+  ASSERT_EQ(min_, std::get<int64_t>(copied_data->statistics->min.value()));
+  ASSERT_TRUE(copied_data->statistics->is_min_exact);
+
+  ASSERT_TRUE(copied_data->statistics->max.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data->statistics->max.value()));
+  ASSERT_EQ(max_, std::get<int64_t>(copied_data->statistics->max.value()));
+  ASSERT_TRUE(copied_data->statistics->is_max_exact);
+}
+
+TEST_F(TestArrayDataStatistics, Slice) {
+  auto sliced_data = data_->Slice(0, 1);
+  ASSERT_FALSE(sliced_data->statistics);
+}
+
 template <typename PType>
 class TestPrimitiveArray : public ::testing::Test {
  public:
diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc
index 83eeb56c496cf..8e29297a8c175 100644
--- a/cpp/src/arrow/array/data.cc
+++ b/cpp/src/arrow/array/data.cc
@@ -165,6 +165,8 @@ Result<std::shared_ptr<ArrayData>> CopyToImpl(const ArrayData& data,
     ARROW_ASSIGN_OR_RAISE(output->dictionary, CopyToImpl(*data.dictionary, to, copy_fn));
   }
 
+  output->statistics = data.statistics;
+
   return output;
 }
 }  // namespace
@@ -195,6 +197,7 @@ std::shared_ptr<ArrayData> ArrayData::Slice(int64_t off, int64_t len) const {
   } else {
     copy->null_count = null_count != 0 ? kUnknownNullCount : 0;
   }
+  copy->statistics = nullptr;
   return copy;
 }
 
diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h
index e0508fe6980a7..1e6ee9a1d32ff 100644
--- a/cpp/src/arrow/array/data.h
+++ b/cpp/src/arrow/array/data.h
@@ -24,6 +24,7 @@
 #include <utility>
 #include <vector>
 
+#include "arrow/array/statistics.h"
 #include "arrow/buffer.h"
 #include "arrow/result.h"
 #include "arrow/type.h"
@@ -152,7 +153,8 @@ struct ARROW_EXPORT ArrayData {
         offset(other.offset),
         buffers(std::move(other.buffers)),
         child_data(std::move(other.child_data)),
-        dictionary(std::move(other.dictionary)) {
+        dictionary(std::move(other.dictionary)),
+        statistics(std::move(other.statistics)) {
     SetNullCount(other.null_count);
   }
 
@@ -163,7 +165,8 @@ struct ARROW_EXPORT ArrayData {
         offset(other.offset),
         buffers(other.buffers),
         child_data(other.child_data),
-        dictionary(other.dictionary) {
+        dictionary(other.dictionary),
+        statistics(other.statistics) {
     SetNullCount(other.null_count);
   }
 
@@ -176,6 +179,7 @@ struct ARROW_EXPORT ArrayData {
     buffers = std::move(other.buffers);
     child_data = std::move(other.child_data);
     dictionary = std::move(other.dictionary);
+    statistics = std::move(other.statistics);
     return *this;
   }
 
@@ -188,6 +192,7 @@ struct ARROW_EXPORT ArrayData {
     buffers = other.buffers;
     child_data = other.child_data;
     dictionary = other.dictionary;
+    statistics = other.statistics;
     return *this;
   }
 
@@ -274,6 +279,18 @@ struct ARROW_EXPORT ArrayData {
   }
 
   /// \brief Construct a zero-copy slice of the data with the given offset and length
+  ///
+  /// The associated `ArrayStatistics` is always discarded in a sliced
+  /// `ArrayData`. Because `ArrayStatistics` in the original
+  /// `ArrayData` may be invalid in a sliced `ArrayData`. If you want
+  /// to reuse statistics in the original `ArrayData`, you need to do
+  /// it by yourself.
+  ///
+  /// If the specified slice range has the same range as the original
+  /// `ArrayData`, we can reuse statistics in the original
+  /// `ArrayData`. Because it has the same data as the original
+  /// `ArrayData`. But the associated `ArrayStatistics` is discarded
+  /// in this case too. Use `Copy()` instead for the case.
   std::shared_ptr<ArrayData> Slice(int64_t offset, int64_t length) const;
 
   /// \brief Input-checking variant of Slice
@@ -390,6 +407,9 @@ struct ARROW_EXPORT ArrayData {
 
   // The dictionary for this Array, if any. Only used for dictionary type
   std::shared_ptr<ArrayData> dictionary;
+
+  // The statistics for this Array.
+  std::shared_ptr<ArrayStatistics> statistics;
 };
 
 /// \brief A non-owning Buffer reference
diff --git a/cpp/src/arrow/c/abi.h b/cpp/src/arrow/c/abi.h
index 6abe866b5f6f6..db051fff5ff05 100644
--- a/cpp/src/arrow/c/abi.h
+++ b/cpp/src/arrow/c/abi.h
@@ -41,11 +41,11 @@ extern "C" {
 #endif
 
 #ifndef ARROW_C_DATA_INTERFACE
-#define ARROW_C_DATA_INTERFACE
+#  define ARROW_C_DATA_INTERFACE
 
-#define ARROW_FLAG_DICTIONARY_ORDERED 1
-#define ARROW_FLAG_NULLABLE 2
-#define ARROW_FLAG_MAP_KEYS_SORTED 4
+#  define ARROW_FLAG_DICTIONARY_ORDERED 1
+#  define ARROW_FLAG_NULLABLE 2
+#  define ARROW_FLAG_MAP_KEYS_SORTED 4
 
 struct ArrowSchema {
   // Array type description
@@ -83,7 +83,7 @@ struct ArrowArray {
 #endif  // ARROW_C_DATA_INTERFACE
 
 #ifndef ARROW_C_DEVICE_DATA_INTERFACE
-#define ARROW_C_DEVICE_DATA_INTERFACE
+#  define ARROW_C_DEVICE_DATA_INTERFACE
 
 // Spec and Documentation: https://arrow.apache.org/docs/format/CDeviceDataInterface.html
 
@@ -91,33 +91,33 @@ struct ArrowArray {
 typedef int32_t ArrowDeviceType;
 
 // CPU device, same as using ArrowArray directly
-#define ARROW_DEVICE_CPU 1
+#  define ARROW_DEVICE_CPU 1
 // CUDA GPU Device
-#define ARROW_DEVICE_CUDA 2
+#  define ARROW_DEVICE_CUDA 2
 // Pinned CUDA CPU memory by cudaMallocHost
-#define ARROW_DEVICE_CUDA_HOST 3
+#  define ARROW_DEVICE_CUDA_HOST 3
 // OpenCL Device
-#define ARROW_DEVICE_OPENCL 4
+#  define ARROW_DEVICE_OPENCL 4
 // Vulkan buffer for next-gen graphics
-#define ARROW_DEVICE_VULKAN 7
+#  define ARROW_DEVICE_VULKAN 7
 // Metal for Apple GPU
-#define ARROW_DEVICE_METAL 8
+#  define ARROW_DEVICE_METAL 8
 // Verilog simulator buffer
-#define ARROW_DEVICE_VPI 9
+#  define ARROW_DEVICE_VPI 9
 // ROCm GPUs for AMD GPUs
-#define ARROW_DEVICE_ROCM 10
+#  define ARROW_DEVICE_ROCM 10
 // Pinned ROCm CPU memory allocated by hipMallocHost
-#define ARROW_DEVICE_ROCM_HOST 11
+#  define ARROW_DEVICE_ROCM_HOST 11
 // Reserved for extension
-#define ARROW_DEVICE_EXT_DEV 12
+#  define ARROW_DEVICE_EXT_DEV 12
 // CUDA managed/unified memory allocated by cudaMallocManaged
-#define ARROW_DEVICE_CUDA_MANAGED 13
+#  define ARROW_DEVICE_CUDA_MANAGED 13
 // unified shared memory allocated on a oneAPI non-partitioned device.
-#define ARROW_DEVICE_ONEAPI 14
+#  define ARROW_DEVICE_ONEAPI 14
 // GPU support for next-gen WebGPU standard
-#define ARROW_DEVICE_WEBGPU 15
+#  define ARROW_DEVICE_WEBGPU 15
 // Qualcomm Hexagon DSP
-#define ARROW_DEVICE_HEXAGON 16
+#  define ARROW_DEVICE_HEXAGON 16
 
 struct ArrowDeviceArray {
   // the Allocated Array
@@ -138,7 +138,7 @@ struct ArrowDeviceArray {
 #endif  // ARROW_C_DEVICE_DATA_INTERFACE
 
 #ifndef ARROW_C_STREAM_INTERFACE
-#define ARROW_C_STREAM_INTERFACE
+#  define ARROW_C_STREAM_INTERFACE
 
 struct ArrowArrayStream {
   // Callback to get the stream type
@@ -179,7 +179,7 @@ struct ArrowArrayStream {
 #endif  // ARROW_C_STREAM_INTERFACE
 
 #ifndef ARROW_C_DEVICE_STREAM_INTERFACE
-#define ARROW_C_DEVICE_STREAM_INTERFACE
+#  define ARROW_C_DEVICE_STREAM_INTERFACE
 
 // Equivalent to ArrowArrayStream, but for ArrowDeviceArrays.
 //
diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc
index 09bb524adbdf0..01fd56f631d99 100644
--- a/cpp/src/arrow/c/bridge_test.cc
+++ b/cpp/src/arrow/c/bridge_test.cc
@@ -48,7 +48,7 @@
 
 // TODO(GH-37221): Remove these ifdef checks when compute dependency is removed
 #ifdef ARROW_COMPUTE
-#include "arrow/compute/api_vector.h"
+#  include "arrow/compute/api_vector.h"
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/c/dlpack_abi.h b/cpp/src/arrow/c/dlpack_abi.h
index 4af557a7ed5d7..fbe2a56a344b3 100644
--- a/cpp/src/arrow/c/dlpack_abi.h
+++ b/cpp/src/arrow/c/dlpack_abi.h
@@ -12,9 +12,9 @@
  * \brief Compatibility with C++
  */
 #ifdef __cplusplus
-#define DLPACK_EXTERN_C extern "C"
+#  define DLPACK_EXTERN_C extern "C"
 #else
-#define DLPACK_EXTERN_C
+#  define DLPACK_EXTERN_C
 #endif
 
 /*! \brief The current major version of dlpack */
@@ -25,13 +25,13 @@
 
 /*! \brief DLPACK_DLL prefix for windows */
 #ifdef _WIN32
-#ifdef DLPACK_EXPORTS
-#define DLPACK_DLL __declspec(dllexport)
+#  ifdef DLPACK_EXPORTS
+#    define DLPACK_DLL __declspec(dllexport)
+#  else
+#    define DLPACK_DLL __declspec(dllimport)
+#  endif
 #else
-#define DLPACK_DLL __declspec(dllimport)
-#endif
-#else
-#define DLPACK_DLL
+#  define DLPACK_DLL
 #endif
 
 #include <stddef.h>
diff --git a/cpp/src/arrow/chunk_resolver.cc b/cpp/src/arrow/chunk_resolver.cc
index 55eec53ced1c7..854127480744e 100644
--- a/cpp/src/arrow/chunk_resolver.cc
+++ b/cpp/src/arrow/chunk_resolver.cc
@@ -60,42 +60,38 @@ inline std::vector<int64_t> MakeChunksOffsets(const std::vector<T>& chunks) {
 template <typename IndexType>
 void ResolveManyInline(size_t num_offsets, const int64_t* signed_offsets,
                        int64_t n_indices, const IndexType* logical_index_vec,
-                       IndexType* out_chunk_index_vec, IndexType chunk_hint,
-                       IndexType* out_index_in_chunk_vec) {
+                       TypedChunkLocation<IndexType>* out_chunk_location_vec,
+                       IndexType chunk_hint) {
   auto* offsets = reinterpret_cast<const uint64_t*>(signed_offsets);
   const auto num_chunks = static_cast<IndexType>(num_offsets - 1);
   // chunk_hint in [0, num_offsets) per the precondition.
   for (int64_t i = 0; i < n_indices; i++) {
-    const auto index = static_cast<uint64_t>(logical_index_vec[i]);
+    auto typed_logical_index = logical_index_vec[i];
+    const auto index = static_cast<uint64_t>(typed_logical_index);
+    // use or update chunk_hint
     if (index >= offsets[chunk_hint] &&
         (chunk_hint == num_chunks || index < offsets[chunk_hint + 1])) {
-      out_chunk_index_vec[i] = chunk_hint;  // hint is correct!
-      continue;
+      // hint is correct!
+    } else {
+      // lo < hi is guaranteed by `num_offsets = chunks.size() + 1`
+      auto chunk_index =
+          ChunkResolver::Bisect(index, offsets, /*lo=*/0, /*hi=*/num_offsets);
+      chunk_hint = static_cast<IndexType>(chunk_index);
     }
-    // lo < hi is guaranteed by `num_offsets = chunks.size() + 1`
-    auto chunk_index =
-        ChunkResolver::Bisect(index, offsets, /*lo=*/0, /*hi=*/num_offsets);
-    chunk_hint = static_cast<IndexType>(chunk_index);
-    out_chunk_index_vec[i] = chunk_hint;
-  }
-  if (out_index_in_chunk_vec != NULLPTR) {
-    for (int64_t i = 0; i < n_indices; i++) {
-      auto logical_index = logical_index_vec[i];
-      auto chunk_index = out_chunk_index_vec[i];
-      // chunk_index is in [0, chunks.size()] no matter what the
-      // value of logical_index is, so it's always safe to dereference
-      // offset_ as it contains chunks.size()+1 values.
-      out_index_in_chunk_vec[i] =
-          logical_index - static_cast<IndexType>(offsets[chunk_index]);
+    out_chunk_location_vec[i].chunk_index = chunk_hint;
+    // chunk_index is in [0, chunks.size()] no matter what the
+    // value of logical_index is, so it's always safe to dereference
+    // offset_ as it contains chunks.size()+1 values.
+    out_chunk_location_vec[i].index_in_chunk =
+        typed_logical_index - static_cast<IndexType>(offsets[chunk_hint]);
 #if defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER)
-      // Make it more likely that Valgrind/ASAN can catch an invalid memory
-      // access by poisoning out_index_in_chunk_vec[i] when the logical
-      // index is out-of-bounds.
-      if (chunk_index == num_chunks) {
-        out_index_in_chunk_vec[i] = std::numeric_limits<IndexType>::max();
-      }
-#endif
+    // Make it more likely that Valgrind/ASAN can catch an invalid memory
+    // access by poisoning the index-in-chunk value when the logical
+    // index is out-of-bounds.
+    if (chunk_hint == num_chunks) {
+      out_chunk_location_vec[i].index_in_chunk = std::numeric_limits<IndexType>::max();
     }
+#endif
   }
 }
 
@@ -130,31 +126,31 @@ ChunkResolver& ChunkResolver::operator=(const ChunkResolver& other) noexcept {
 }
 
 void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint8_t* logical_index_vec,
-                                    uint8_t* out_chunk_index_vec, uint8_t chunk_hint,
-                                    uint8_t* out_index_in_chunk_vec) const {
+                                    TypedChunkLocation<uint8_t>* out_chunk_location_vec,
+                                    uint8_t chunk_hint) const {
   ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, logical_index_vec,
-                    out_chunk_index_vec, chunk_hint, out_index_in_chunk_vec);
+                    out_chunk_location_vec, chunk_hint);
 }
 
 void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint32_t* logical_index_vec,
-                                    uint32_t* out_chunk_index_vec, uint32_t chunk_hint,
-                                    uint32_t* out_index_in_chunk_vec) const {
+                                    TypedChunkLocation<uint32_t>* out_chunk_location_vec,
+                                    uint32_t chunk_hint) const {
   ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, logical_index_vec,
-                    out_chunk_index_vec, chunk_hint, out_index_in_chunk_vec);
+                    out_chunk_location_vec, chunk_hint);
 }
 
 void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint16_t* logical_index_vec,
-                                    uint16_t* out_chunk_index_vec, uint16_t chunk_hint,
-                                    uint16_t* out_index_in_chunk_vec) const {
+                                    TypedChunkLocation<uint16_t>* out_chunk_location_vec,
+                                    uint16_t chunk_hint) const {
   ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, logical_index_vec,
-                    out_chunk_index_vec, chunk_hint, out_index_in_chunk_vec);
+                    out_chunk_location_vec, chunk_hint);
 }
 
 void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint64_t* logical_index_vec,
-                                    uint64_t* out_chunk_index_vec, uint64_t chunk_hint,
-                                    uint64_t* out_index_in_chunk_vec) const {
+                                    TypedChunkLocation<uint64_t>* out_chunk_location_vec,
+                                    uint64_t chunk_hint) const {
   ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, logical_index_vec,
-                    out_chunk_index_vec, chunk_hint, out_index_in_chunk_vec);
+                    out_chunk_location_vec, chunk_hint);
 }
 
 }  // namespace arrow::internal
diff --git a/cpp/src/arrow/chunk_resolver.h b/cpp/src/arrow/chunk_resolver.h
index a2a3d5a864243..83fda62387fe1 100644
--- a/cpp/src/arrow/chunk_resolver.h
+++ b/cpp/src/arrow/chunk_resolver.h
@@ -31,28 +31,34 @@ namespace arrow::internal {
 
 struct ChunkResolver;
 
-struct ChunkLocation {
+template <typename IndexType>
+struct TypedChunkLocation {
   /// \brief Index of the chunk in the array of chunks
   ///
   /// The value is always in the range `[0, chunks.size()]`. `chunks.size()` is used
   /// to represent out-of-bounds locations.
-  int64_t chunk_index = 0;
+  IndexType chunk_index = 0;
 
   /// \brief Index of the value in the chunk
   ///
   /// The value is UNDEFINED if chunk_index >= chunks.size()
-  int64_t index_in_chunk = 0;
+  IndexType index_in_chunk = 0;
 
-  ChunkLocation() = default;
+  TypedChunkLocation() = default;
 
-  ChunkLocation(int64_t chunk_index, int64_t index_in_chunk)
-      : chunk_index(chunk_index), index_in_chunk(index_in_chunk) {}
+  TypedChunkLocation(IndexType chunk_index, IndexType index_in_chunk)
+      : chunk_index(chunk_index), index_in_chunk(index_in_chunk) {
+    static_assert(sizeof(TypedChunkLocation<IndexType>) == 2 * sizeof(IndexType));
+    static_assert(alignof(TypedChunkLocation<IndexType>) == alignof(IndexType));
+  }
 
-  bool operator==(ChunkLocation other) const {
+  bool operator==(TypedChunkLocation other) const {
     return chunk_index == other.chunk_index && index_in_chunk == other.index_in_chunk;
   }
 };
 
+using ChunkLocation = TypedChunkLocation<int64_t>;
+
 /// \brief An utility that incrementally resolves logical indices into
 /// physical indices in a chunked array.
 struct ARROW_EXPORT ChunkResolver {
@@ -144,26 +150,25 @@ struct ARROW_EXPORT ChunkResolver {
   ///
   /// \pre 0 <= logical_index_vec[i] < logical_array_length()
   ///      (for well-defined and valid chunk index results)
-  /// \pre out_chunk_index_vec has space for `n_indices`
+  /// \pre out_chunk_location_vec has space for `n_indices` locations
   /// \pre chunk_hint in [0, chunks.size()]
-  /// \post out_chunk_index_vec[i] in [0, chunks.size()] for i in [0, n)
+  /// \post out_chunk_location_vec[i].chunk_index in [0, chunks.size()] for i in [0, n)
   /// \post if logical_index_vec[i] >= chunked_array.length(), then
-  ///       out_chunk_index_vec[i] == chunks.size()
-  ///       and out_index_in_chunk_vec[i] is UNDEFINED (can be out-of-bounds)
-  /// \post if logical_index_vec[i] < 0, then both out_chunk_index_vec[i] and
-  ///       out_index_in_chunk_vec[i] are UNDEFINED
+  ///       out_chunk_location_vec[i].chunk_index == chunks.size()
+  ///       and out_chunk_location_vec[i].index_in_chunk is UNDEFINED (can be
+  ///       out-of-bounds)
+  /// \post if logical_index_vec[i] < 0, then both values in out_chunk_index_vec[i]
+  ///       are UNDEFINED
   ///
   /// \param n_indices The number of logical indices to resolve
   /// \param logical_index_vec The logical indices to resolve
-  /// \param out_chunk_index_vec The output array where the chunk indices will be written
+  /// \param out_chunk_location_vec The output array where the locations will be written
   /// \param chunk_hint 0 or the last chunk_index produced by ResolveMany
-  /// \param out_index_in_chunk_vec If not NULLPTR, the output array where the
-  ///                               within-chunk indices will be written
   /// \return false iff chunks.size() > std::numeric_limits<IndexType>::max()
   template <typename IndexType>
   [[nodiscard]] bool ResolveMany(int64_t n_indices, const IndexType* logical_index_vec,
-                                 IndexType* out_chunk_index_vec, IndexType chunk_hint = 0,
-                                 IndexType* out_index_in_chunk_vec = NULLPTR) const {
+                                 TypedChunkLocation<IndexType>* out_chunk_location_vec,
+                                 IndexType chunk_hint = 0) const {
     if constexpr (sizeof(IndexType) < sizeof(uint64_t)) {
       // The max value returned by Bisect is `offsets.size() - 1` (= chunks.size()).
       constexpr uint64_t kMaxIndexTypeValue = std::numeric_limits<IndexType>::max();
@@ -188,13 +193,11 @@ struct ARROW_EXPORT ChunkResolver {
       // logical index in the chunked array.
       using U = std::make_unsigned_t<IndexType>;
       ResolveManyImpl(n_indices, reinterpret_cast<const U*>(logical_index_vec),
-                      reinterpret_cast<U*>(out_chunk_index_vec),
-                      static_cast<U>(chunk_hint),
-                      reinterpret_cast<U*>(out_index_in_chunk_vec));
+                      reinterpret_cast<TypedChunkLocation<U>*>(out_chunk_location_vec),
+                      static_cast<U>(chunk_hint));
     } else {
       static_assert(std::is_unsigned_v<IndexType>);
-      ResolveManyImpl(n_indices, logical_index_vec, out_chunk_index_vec, chunk_hint,
-                      out_index_in_chunk_vec);
+      ResolveManyImpl(n_indices, logical_index_vec, out_chunk_location_vec, chunk_hint);
     }
     return true;
   }
@@ -226,10 +229,14 @@ struct ARROW_EXPORT ChunkResolver {
 
   /// \pre all the pre-conditions of ChunkResolver::ResolveMany()
   /// \pre num_offsets - 1 <= std::numeric_limits<IndexType>::max()
-  void ResolveManyImpl(int64_t, const uint8_t*, uint8_t*, uint8_t, uint8_t*) const;
-  void ResolveManyImpl(int64_t, const uint16_t*, uint16_t*, uint16_t, uint16_t*) const;
-  void ResolveManyImpl(int64_t, const uint32_t*, uint32_t*, uint32_t, uint32_t*) const;
-  void ResolveManyImpl(int64_t, const uint64_t*, uint64_t*, uint64_t, uint64_t*) const;
+  void ResolveManyImpl(int64_t, const uint8_t*, TypedChunkLocation<uint8_t>*,
+                       uint8_t) const;
+  void ResolveManyImpl(int64_t, const uint16_t*, TypedChunkLocation<uint16_t>*,
+                       uint16_t) const;
+  void ResolveManyImpl(int64_t, const uint32_t*, TypedChunkLocation<uint32_t>*,
+                       uint32_t) const;
+  void ResolveManyImpl(int64_t, const uint64_t*, TypedChunkLocation<uint64_t>*,
+                       uint64_t) const;
 
  public:
   /// \brief Find the index of the chunk that contains the logical index.
diff --git a/cpp/src/arrow/chunked_array_test.cc b/cpp/src/arrow/chunked_array_test.cc
index b796e9250008a..bf9d4af7c7bb0 100644
--- a/cpp/src/arrow/chunked_array_test.cc
+++ b/cpp/src/arrow/chunked_array_test.cc
@@ -37,6 +37,7 @@ namespace arrow {
 
 using internal::ChunkLocation;
 using internal::ChunkResolver;
+using internal::TypedChunkLocation;
 
 class TestChunkedArray : public ::testing::Test {
  protected:
@@ -380,24 +381,26 @@ class TestChunkResolverMany : public ::testing::Test {
   Result<std::vector<ChunkLocation>> ResolveMany(
       const ChunkResolver& resolver, const std::vector<IndexType>& logical_index_vec) {
     const size_t n = logical_index_vec.size();
-    std::vector<IndexType> chunk_index_vec;
-    chunk_index_vec.resize(n);
-    std::vector<IndexType> index_in_chunk_vec;
-    index_in_chunk_vec.resize(n);
+    std::vector<TypedChunkLocation<IndexType>> chunk_location_vec;
+    chunk_location_vec.resize(n);
     bool valid = resolver.ResolveMany<IndexType>(
-        static_cast<int64_t>(n), logical_index_vec.data(), chunk_index_vec.data(), 0,
-        index_in_chunk_vec.data());
+        static_cast<int64_t>(n), logical_index_vec.data(), chunk_location_vec.data(), 0);
     if (ARROW_PREDICT_FALSE(!valid)) {
       return Status::Invalid("index type doesn't fit possible chunk indexes");
     }
-    std::vector<ChunkLocation> locations;
-    locations.reserve(n);
-    for (size_t i = 0; i < n; i++) {
-      auto chunk_index = static_cast<int64_t>(chunk_index_vec[i]);
-      auto index_in_chunk = static_cast<int64_t>(index_in_chunk_vec[i]);
-      locations.emplace_back(chunk_index, index_in_chunk);
+    if constexpr (std::is_same<decltype(ChunkLocation::chunk_index), IndexType>::value) {
+      return chunk_location_vec;
+    } else {
+      std::vector<ChunkLocation> locations;
+      locations.reserve(n);
+      for (size_t i = 0; i < n; i++) {
+        auto loc = chunk_location_vec[i];
+        auto chunk_index = static_cast<int64_t>(loc.chunk_index);
+        auto index_in_chunk = static_cast<int64_t>(loc.index_in_chunk);
+        locations.emplace_back(chunk_index, index_in_chunk);
+      }
+      return locations;
     }
-    return locations;
   }
 
   void CheckResolveMany(const ChunkResolver& resolver,
diff --git a/cpp/src/arrow/compute/kernel.h b/cpp/src/arrow/compute/kernel.h
index cfa1cd8193f36..cfb6265f12904 100644
--- a/cpp/src/arrow/compute/kernel.h
+++ b/cpp/src/arrow/compute/kernel.h
@@ -42,7 +42,7 @@
 // macOS defines PREALLOCATE as a preprocessor macro in the header sys/vnode.h.
 // No other BSD seems to do so. The name is used as an identifier in MemAllocation enum.
 #if defined(__APPLE__) && defined(PREALLOCATE)
-#undef PREALLOCATE
+#  undef PREALLOCATE
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.cc b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
index 1fbcd6a249093..b545d8bcc1003 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
@@ -23,7 +23,9 @@
 #include "arrow/util/cpu_info.h"
 #include "arrow/util/hashing.h"
 
-#include <memory>
+// Include templated definitions for aggregate kernels that must compiled here
+// with the SIMD level configured for this compilation unit in the build.
+#include "arrow/compute/kernels/aggregate_basic.inc.cc"  // NOLINT(build/include)
 
 namespace arrow {
 namespace compute {
@@ -276,11 +278,6 @@ struct SumImplDefault : public SumImpl<ArrowType, SimdLevel::NONE> {
   using SumImpl<ArrowType, SimdLevel::NONE>::SumImpl;
 };
 
-template <typename ArrowType>
-struct MeanImplDefault : public MeanImpl<ArrowType, SimdLevel::NONE> {
-  using MeanImpl<ArrowType, SimdLevel::NONE>::MeanImpl;
-};
-
 Result<std::unique_ptr<KernelState>> SumInit(KernelContext* ctx,
                                              const KernelInitArgs& args) {
   SumLikeInit<SumImplDefault> visitor(
@@ -289,6 +286,14 @@ Result<std::unique_ptr<KernelState>> SumInit(KernelContext* ctx,
   return visitor.Create();
 }
 
+// ----------------------------------------------------------------------
+// Mean implementation
+
+template <typename ArrowType>
+struct MeanImplDefault : public MeanImpl<ArrowType, SimdLevel::NONE> {
+  using MeanImpl<ArrowType, SimdLevel::NONE>::MeanImpl;
+};
+
 Result<std::unique_ptr<KernelState>> MeanInit(KernelContext* ctx,
                                               const KernelInitArgs& args) {
   MeanKernelInit<MeanImplDefault> visitor(
@@ -482,8 +487,8 @@ void AddFirstOrLastAggKernel(ScalarAggregateFunction* func,
 // ----------------------------------------------------------------------
 // MinMax implementation
 
-Result<std::unique_ptr<KernelState>> MinMaxInit(KernelContext* ctx,
-                                                const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> MinMaxInitDefault(KernelContext* ctx,
+                                                       const KernelInitArgs& args) {
   ARROW_ASSIGN_OR_RAISE(TypeHolder out_type,
                         args.kernel->signature->out_type().Resolve(ctx, args.inputs));
   MinMaxInitState<SimdLevel::NONE> visitor(
@@ -532,13 +537,13 @@ struct BooleanAnyImpl : public ScalarAggregator {
     }
     if (batch[0].is_scalar()) {
       const Scalar& scalar = *batch[0].scalar;
-      this->has_nulls = !scalar.is_valid;
-      this->any = scalar.is_valid && checked_cast<const BooleanScalar&>(scalar).value;
-      this->count += scalar.is_valid;
+      this->has_nulls |= !scalar.is_valid;
+      this->any |= scalar.is_valid && checked_cast<const BooleanScalar&>(scalar).value;
+      this->count += scalar.is_valid * batch.length;
       return Status::OK();
     }
     const ArraySpan& data = batch[0].array;
-    this->has_nulls = data.GetNullCount() > 0;
+    this->has_nulls |= data.GetNullCount() > 0;
     this->count += data.length - data.GetNullCount();
     arrow::internal::OptionalBinaryBitBlockCounter counter(
         data.buffers[0].data, data.offset, data.buffers[1].data, data.offset,
@@ -603,13 +608,13 @@ struct BooleanAllImpl : public ScalarAggregator {
     }
     if (batch[0].is_scalar()) {
       const Scalar& scalar = *batch[0].scalar;
-      this->has_nulls = !scalar.is_valid;
-      this->count += scalar.is_valid;
-      this->all = !scalar.is_valid || checked_cast<const BooleanScalar&>(scalar).value;
+      this->has_nulls |= !scalar.is_valid;
+      this->count += scalar.is_valid * batch.length;
+      this->all &= !scalar.is_valid || checked_cast<const BooleanScalar&>(scalar).value;
       return Status::OK();
     }
     const ArraySpan& data = batch[0].array;
-    this->has_nulls = data.GetNullCount() > 0;
+    this->has_nulls |= data.GetNullCount() > 0;
     this->count += data.length - data.GetNullCount();
     arrow::internal::OptionalBinaryBitBlockCounter counter(
         data.buffers[1].data, data.offset, data.buffers[0].data, data.offset,
@@ -1114,14 +1119,14 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
   // Add min max function
   func = std::make_shared<ScalarAggregateFunction>("min_max", Arity::Unary(), min_max_doc,
                                                    &default_scalar_aggregate_options);
-  AddMinMaxKernels(MinMaxInit, {null(), boolean()}, func.get());
-  AddMinMaxKernels(MinMaxInit, NumericTypes(), func.get());
-  AddMinMaxKernels(MinMaxInit, TemporalTypes(), func.get());
-  AddMinMaxKernels(MinMaxInit, BaseBinaryTypes(), func.get());
-  AddMinMaxKernel(MinMaxInit, Type::FIXED_SIZE_BINARY, func.get());
-  AddMinMaxKernel(MinMaxInit, Type::INTERVAL_MONTHS, func.get());
-  AddMinMaxKernel(MinMaxInit, Type::DECIMAL128, func.get());
-  AddMinMaxKernel(MinMaxInit, Type::DECIMAL256, func.get());
+  AddMinMaxKernels(MinMaxInitDefault, {null(), boolean()}, func.get());
+  AddMinMaxKernels(MinMaxInitDefault, NumericTypes(), func.get());
+  AddMinMaxKernels(MinMaxInitDefault, TemporalTypes(), func.get());
+  AddMinMaxKernels(MinMaxInitDefault, BaseBinaryTypes(), func.get());
+  AddMinMaxKernel(MinMaxInitDefault, Type::FIXED_SIZE_BINARY, func.get());
+  AddMinMaxKernel(MinMaxInitDefault, Type::INTERVAL_MONTHS, func.get());
+  AddMinMaxKernel(MinMaxInitDefault, Type::DECIMAL128, func.get());
+  AddMinMaxKernel(MinMaxInitDefault, Type::DECIMAL256, func.get());
   // Add the SIMD variants for min max
 #if defined(ARROW_HAVE_RUNTIME_AVX2)
   if (cpu_info->IsSupported(arrow::internal::CpuInfo::AVX2)) {
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.inc.cc b/cpp/src/arrow/compute/kernels/aggregate_basic.inc.cc
new file mode 100644
index 0000000000000..f2151e0a9e029
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic.inc.cc
@@ -0,0 +1,1025 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// .inc.cc file to be included in compilation unit where kernels are meant to be
+// compiled auto-vectorized by the compiler with different SIMD levels passed
+// as compiler flags.
+//
+// It contains no includes to avoid double inclusion in the compilation unit
+// that includes this .inc.cc file.
+
+#include <cassert>
+#include <cmath>
+#include <memory>
+#include <type_traits>
+#include <utility>
+
+#include "arrow/compute/api_aggregate.h"
+#include "arrow/compute/kernels/aggregate_internal.h"
+#include "arrow/compute/kernels/codegen_internal.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/align_util.h"
+#include "arrow/util/bit_block_counter.h"
+#include "arrow/util/decimal.h"
+
+namespace arrow::compute::internal {
+namespace {
+
+// ----------------------------------------------------------------------
+// Sum implementation
+
+template <typename ArrowType, SimdLevel::type SimdLevel,
+          typename ResultType = typename FindAccumulatorType<ArrowType>::Type>
+struct SumImpl : public ScalarAggregator {
+  using ThisType = SumImpl<ArrowType, SimdLevel, ResultType>;
+  using CType = typename TypeTraits<ArrowType>::CType;
+  using SumType = ResultType;
+  using SumCType = typename TypeTraits<SumType>::CType;
+  using OutputType = typename TypeTraits<SumType>::ScalarType;
+
+  SumImpl(std::shared_ptr<DataType> out_type, ScalarAggregateOptions options_)
+      : out_type(std::move(out_type)), options(std::move(options_)) {}
+
+  Status Consume(KernelContext*, const ExecSpan& batch) override {
+    if (batch[0].is_array()) {
+      const ArraySpan& data = batch[0].array;
+      this->count += data.length - data.GetNullCount();
+      this->nulls_observed = this->nulls_observed || data.GetNullCount();
+
+      if (!options.skip_nulls && this->nulls_observed) {
+        // Short-circuit
+        return Status::OK();
+      }
+
+      if (is_boolean_type<ArrowType>::value) {
+        this->sum += GetTrueCount(data);
+      } else {
+        this->sum += SumArray<CType, SumCType, SimdLevel>(data);
+      }
+    } else {
+      const Scalar& data = *batch[0].scalar;
+      this->count += data.is_valid * batch.length;
+      this->nulls_observed = this->nulls_observed || !data.is_valid;
+      if (data.is_valid) {
+        this->sum += internal::UnboxScalar<ArrowType>::Unbox(data) * batch.length;
+      }
+    }
+    return Status::OK();
+  }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
+    const auto& other = checked_cast<const ThisType&>(src);
+    this->count += other.count;
+    this->sum += other.sum;
+    this->nulls_observed = this->nulls_observed || other.nulls_observed;
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override {
+    if ((!options.skip_nulls && this->nulls_observed) ||
+        (this->count < options.min_count)) {
+      out->value = std::make_shared<OutputType>(out_type);
+    } else {
+      out->value = std::make_shared<OutputType>(this->sum, out_type);
+    }
+    return Status::OK();
+  }
+
+  size_t count = 0;
+  bool nulls_observed = false;
+  SumCType sum = 0;
+  std::shared_ptr<DataType> out_type;
+  ScalarAggregateOptions options;
+};
+
+template <typename ArrowType>
+struct NullImpl : public ScalarAggregator {
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  explicit NullImpl(const ScalarAggregateOptions& options_) : options(options_) {}
+
+  Status Consume(KernelContext*, const ExecSpan& batch) override {
+    if (batch[0].is_scalar() || batch[0].array.GetNullCount() > 0) {
+      // If the batch is a scalar or an array with elements, set is_empty to false
+      is_empty = false;
+    }
+    return Status::OK();
+  }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
+    const auto& other = checked_cast<const NullImpl&>(src);
+    this->is_empty &= other.is_empty;
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override {
+    if ((options.skip_nulls || this->is_empty) && options.min_count == 0) {
+      // Return 0 if the remaining data is empty
+      out->value = output_empty();
+    } else {
+      out->value = MakeNullScalar(TypeTraits<ArrowType>::type_singleton());
+    }
+    return Status::OK();
+  }
+
+  virtual std::shared_ptr<Scalar> output_empty() = 0;
+
+  bool is_empty = true;
+  ScalarAggregateOptions options;
+};
+
+template <typename ArrowType>
+struct NullSumImpl : public NullImpl<ArrowType> {
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  explicit NullSumImpl(const ScalarAggregateOptions& options_)
+      : NullImpl<ArrowType>(options_) {}
+
+  std::shared_ptr<Scalar> output_empty() override {
+    return std::make_shared<ScalarType>(0);
+  }
+};
+
+template <template <typename> class KernelClass>
+struct SumLikeInit {
+  std::unique_ptr<KernelState> state;
+  KernelContext* ctx;
+  std::shared_ptr<DataType> type;
+  const ScalarAggregateOptions& options;
+
+  SumLikeInit(KernelContext* ctx, std::shared_ptr<DataType> type,
+              const ScalarAggregateOptions& options)
+      : ctx(ctx), type(type), options(options) {}
+
+  Status Visit(const DataType&) { return Status::NotImplemented("No sum implemented"); }
+
+  Status Visit(const HalfFloatType&) {
+    return Status::NotImplemented("No sum implemented");
+  }
+
+  Status Visit(const BooleanType&) {
+    auto ty = TypeTraits<typename KernelClass<BooleanType>::SumType>::type_singleton();
+    state.reset(new KernelClass<BooleanType>(ty, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_number<Type, Status> Visit(const Type&) {
+    auto ty = TypeTraits<typename KernelClass<Type>::SumType>::type_singleton();
+    state.reset(new KernelClass<Type>(ty, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_decimal<Type, Status> Visit(const Type&) {
+    state.reset(new KernelClass<Type>(type, options));
+    return Status::OK();
+  }
+
+  virtual Status Visit(const NullType&) {
+    state.reset(new NullSumImpl<Int64Type>(options));
+    return Status::OK();
+  }
+
+  Result<std::unique_ptr<KernelState>> Create() {
+    ARROW_RETURN_NOT_OK(VisitTypeInline(*type, this));
+    return std::move(state);
+  }
+};
+
+// ----------------------------------------------------------------------
+// Mean implementation
+
+template <typename ArrowType, SimdLevel::type SimdLevel, typename Enable = void>
+struct MeanImpl;
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MeanImpl<ArrowType, SimdLevel, enable_if_decimal<ArrowType>>
+    : public SumImpl<ArrowType, SimdLevel> {
+  using SumImpl<ArrowType, SimdLevel>::SumImpl;
+  using SumImpl<ArrowType, SimdLevel>::options;
+  using SumCType = typename SumImpl<ArrowType, SimdLevel>::SumCType;
+  using OutputType = typename SumImpl<ArrowType, SimdLevel>::OutputType;
+
+  template <typename T = ArrowType>
+  Status FinalizeImpl(Datum* out) {
+    if ((!options.skip_nulls && this->nulls_observed) ||
+        (this->count < options.min_count) || (this->count == 0)) {
+      out->value = std::make_shared<OutputType>(this->out_type);
+    } else {
+      SumCType quotient, remainder;
+      ARROW_ASSIGN_OR_RAISE(std::tie(quotient, remainder), this->sum.Divide(this->count));
+      // Round the decimal result based on the remainder
+      remainder.Abs();
+      if (remainder * 2 >= this->count) {
+        if (this->sum >= 0) {
+          quotient += 1;
+        } else {
+          quotient -= 1;
+        }
+      }
+      out->value = std::make_shared<OutputType>(quotient, this->out_type);
+    }
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override { return FinalizeImpl(out); }
+};
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MeanImpl<ArrowType, SimdLevel,
+                std::enable_if_t<!is_decimal_type<ArrowType>::value>>
+    // Override the ResultType of SumImpl because we need to use double for intermediate
+    // sum to prevent integer overflows
+    : public SumImpl<ArrowType, SimdLevel, DoubleType> {
+  using SumImpl<ArrowType, SimdLevel, DoubleType>::SumImpl;
+  using SumImpl<ArrowType, SimdLevel, DoubleType>::options;
+
+  template <typename T = ArrowType>
+  Status FinalizeImpl(Datum* out) {
+    if ((!options.skip_nulls && this->nulls_observed) ||
+        (this->count < options.min_count)) {
+      out->value = std::make_shared<DoubleScalar>();
+    } else {
+      static_assert(std::is_same_v<decltype(this->sum), double>,
+                    "SumCType must be double for numeric inputs");
+      const double mean = this->sum / this->count;
+      out->value = std::make_shared<DoubleScalar>(mean);
+    }
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override { return FinalizeImpl(out); }
+};
+
+template <template <typename> class KernelClass>
+struct MeanKernelInit : public SumLikeInit<KernelClass> {
+  MeanKernelInit(KernelContext* ctx, std::shared_ptr<DataType> type,
+                 const ScalarAggregateOptions& options)
+      : SumLikeInit<KernelClass>(ctx, type, options) {}
+
+  Status Visit(const NullType&) override {
+    this->state.reset(new NullSumImpl<DoubleType>(this->options));
+    return Status::OK();
+  }
+};
+
+// ----------------------------------------------------------------------
+// FirstLast implementation
+
+template <typename ArrowType, typename Enable = void>
+struct FirstLastState {};
+
+template <typename ArrowType>
+struct FirstLastState<ArrowType, enable_if_boolean<ArrowType>> {
+  using ThisType = FirstLastState<ArrowType>;
+  using T = typename ArrowType::c_type;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->first = this->has_values ? this->first : rhs.first;
+    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
+    this->last = rhs.has_values ? rhs.last : this->last;
+    this->last_is_null = rhs.last_is_null;
+    this->has_values |= rhs.has_values;
+    this->has_any_values |= rhs.has_any_values;
+    return *this;
+  }
+
+  void MergeOne(T value) {
+    if (!has_values) {
+      this->first = value;
+      has_values = true;
+    }
+    this->last = value;
+  }
+
+  T first = false;
+  T last = false;
+  bool has_values = false;
+  bool first_is_null = false;
+  bool last_is_null = false;
+  bool has_any_values = false;
+};
+
+template <typename ArrowType>
+struct FirstLastState<ArrowType, enable_if_physical_integer<ArrowType>> {
+  using ThisType = FirstLastState<ArrowType>;
+  using T = typename ArrowType::c_type;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->first = this->has_values ? this->first : rhs.first;
+    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
+    this->last = rhs.has_values ? rhs.last : this->last;
+    this->last_is_null = rhs.last_is_null;
+    this->has_values |= rhs.has_values;
+    this->has_any_values |= rhs.has_any_values;
+    return *this;
+  }
+
+  void MergeOne(T value) {
+    if (!has_values) {
+      this->first = value;
+      has_values = true;
+    }
+    this->last = value;
+  }
+
+  T first = std::numeric_limits<T>::infinity();
+  T last = std::numeric_limits<T>::infinity();
+  bool has_values = false;
+
+  // These are updated in ConsumeScalar and ConsumeArray since null values don't
+  // invoke MergeOne
+  bool first_is_null = false;
+  bool last_is_null = false;
+  // has_any_values indicates whether there is any value (either null or non-null)
+  // (1) has_any_values = false: There is no value aggregated
+  // (2) has_any_values = true, has_values = false: There are only null values aggregated
+  // (3) has_any_values = true, has_values = true: There are both null and non-null values
+  // aggregated
+  bool has_any_values = false;
+};
+
+template <typename ArrowType>
+struct FirstLastState<ArrowType, enable_if_floating_point<ArrowType>> {
+  using ThisType = FirstLastState<ArrowType>;
+  using T = typename ArrowType::c_type;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->first = this->has_values ? this->first : rhs.first;
+    this->last = rhs.has_values ? rhs.last : this->last;
+    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
+    this->last_is_null = rhs.last_is_null;
+    this->has_values |= rhs.has_values;
+    this->has_any_values |= rhs.has_any_values;
+    return *this;
+  }
+
+  void MergeOne(T value) {
+    if (!has_values) {
+      this->first = value;
+      has_values = true;
+    }
+    last = value;
+  }
+
+  T first = std::numeric_limits<T>::infinity();
+  T last = std::numeric_limits<T>::infinity();
+  bool has_values = false;
+  bool first_is_null = false;
+  bool last_is_null = false;
+  bool has_any_values = false;
+};
+
+template <typename ArrowType>
+struct FirstLastState<ArrowType,
+                      enable_if_t<is_base_binary_type<ArrowType>::value ||
+                                  std::is_same<ArrowType, FixedSizeBinaryType>::value>> {
+  using ThisType = FirstLastState<ArrowType>;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->first = this->has_values ? this->first : rhs.first;
+    this->last = rhs.has_values ? rhs.last : this->last;
+    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
+    this->last_is_null = rhs.last_is_null;
+    this->has_values |= rhs.has_values;
+    this->has_any_values |= rhs.has_any_values;
+    return *this;
+  }
+
+  void MergeOne(std::string_view value) {
+    if (!has_values) {
+      first = std::string(value);
+      has_values = true;
+    }
+    last = std::string(value);
+  }
+
+  std::string first = "";
+  std::string last = "";
+  bool has_values = false;
+  bool first_is_null = false;
+  bool last_is_null = false;
+  bool has_any_values = false;
+};
+
+template <typename ArrowType>
+struct FirstLastImpl : public ScalarAggregator {
+  using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
+  using ThisType = FirstLastImpl<ArrowType>;
+  using StateType = FirstLastState<ArrowType>;
+
+  FirstLastImpl(std::shared_ptr<DataType> out_type, ScalarAggregateOptions options)
+      : out_type(std::move(out_type)), options(std::move(options)), count(0) {
+    this->options.min_count = std::max<uint32_t>(1, this->options.min_count);
+  }
+
+  Status Consume(KernelContext*, const ExecSpan& batch) override {
+    if (batch[0].is_array()) {
+      return ConsumeArray(batch[0].array);
+    }
+    return ConsumeScalar(*batch[0].scalar);
+  }
+
+  Status ConsumeScalar(const Scalar& scalar) {
+    this->state.has_any_values = true;
+    if (scalar.is_valid) {
+      this->state.MergeOne(internal::UnboxScalar<ArrowType>::Unbox(scalar));
+    } else {
+      if (!this->state.has_values) {
+        this->state.first_is_null = true;
+      }
+    }
+    this->count += scalar.is_valid;
+    return Status::OK();
+  }
+
+  Status ConsumeArray(const ArraySpan& arr_span) {
+    this->state.has_any_values = true;
+    ArrayType arr(arr_span.ToArrayData());
+    const auto null_count = arr.null_count();
+    this->count += arr.length() - null_count;
+
+    if (null_count == 0) {
+      // If there are no null values, we can just merge
+      // the first and last element
+      this->state.MergeOne(arr.GetView(0));
+      this->state.MergeOne(arr.GetView(arr.length() - 1));
+    } else {
+      int64_t first_i = -1;
+      int64_t last_i = -1;
+
+      if (!this->state.has_values && arr.IsNull(0)) {
+        this->state.first_is_null = true;
+      }
+
+      if (arr.IsNull(arr.length() - 1)) {
+        this->state.last_is_null = true;
+      }
+
+      // Find the first and last non-null value and update state
+      for (int64_t i = 0; i < arr.length(); i++) {
+        if (!arr.IsNull(i)) {
+          first_i = i;
+          break;
+        }
+      }
+      if (first_i >= 0) {
+        for (int64_t i = arr.length() - 1; i >= 0; i--) {
+          if (!arr.IsNull(i)) {
+            last_i = i;
+            break;
+          }
+        }
+        assert(last_i >= first_i);
+        this->state.MergeOne(arr.GetView(first_i));
+        this->state.MergeOne(arr.GetView(last_i));
+      }
+    }
+
+    return Status::OK();
+  }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
+    const auto& other = checked_cast<const ThisType&>(src);
+    this->state += other.state;
+    this->count += other.count;
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override {
+    const auto& struct_type = checked_cast<const StructType&>(*out_type);
+    const auto& child_type = struct_type.field(0)->type();
+    auto null_scalar = MakeNullScalar(child_type);
+
+    std::vector<std::shared_ptr<Scalar>> values;
+
+    if (this->count < options.min_count) {
+      values = {null_scalar, null_scalar};
+    } else {
+      if (state.has_values) {
+        if (options.skip_nulls) {
+          ARROW_ASSIGN_OR_RAISE(auto first_scalar, MakeScalar(child_type, state.first));
+          ARROW_ASSIGN_OR_RAISE(auto last_scalar, MakeScalar(child_type, state.last));
+          values = {first_scalar, last_scalar};
+        } else {
+          ARROW_ASSIGN_OR_RAISE(
+              auto first_scalar,
+              state.first_is_null ? null_scalar : MakeScalar(child_type, state.first));
+          ARROW_ASSIGN_OR_RAISE(
+              auto last_scalar,
+              state.last_is_null ? null_scalar : MakeScalar(child_type, state.last));
+
+          values = {first_scalar, last_scalar};
+        }
+      } else {
+        // If there is no non-null values, we always output null regardless of
+        // skip_null
+        values = {null_scalar, null_scalar};
+      }
+    }
+
+    out->value = std::make_shared<StructScalar>(std::move(values), this->out_type);
+    return Status::OK();
+  }
+
+  std::shared_ptr<DataType> out_type;
+  ScalarAggregateOptions options;
+  int64_t count;
+  FirstLastState<ArrowType> state;
+};
+
+struct FirstLastInitState {
+  std::unique_ptr<KernelState> state;
+  KernelContext* ctx;
+  const DataType& in_type;
+  std::shared_ptr<DataType> out_type;
+  const ScalarAggregateOptions& options;
+
+  FirstLastInitState(KernelContext* ctx, const DataType& in_type,
+                     const std::shared_ptr<DataType>& out_type,
+                     const ScalarAggregateOptions& options)
+      : ctx(ctx), in_type(in_type), out_type(out_type), options(options) {}
+
+  Status Visit(const DataType& ty) {
+    return Status::NotImplemented("No first/last implemented for ", ty);
+  }
+
+  Status Visit(const HalfFloatType& ty) {
+    return Status::NotImplemented("No first/last implemented for ", ty);
+  }
+
+  Status Visit(const BooleanType&) {
+    state.reset(new FirstLastImpl<BooleanType>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_physical_integer<Type, Status> Visit(const Type&) {
+    using PhysicalType = typename Type::PhysicalType;
+    state.reset(new FirstLastImpl<PhysicalType>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_physical_floating_point<Type, Status> Visit(const Type&) {
+    using PhysicalType = typename Type::PhysicalType;
+    state.reset(new FirstLastImpl<PhysicalType>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_base_binary<Type, Status> Visit(const Type&) {
+    state.reset(new FirstLastImpl<Type>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_t<std::is_same<Type, FixedSizeBinaryType>::value, Status> Visit(const Type&) {
+    state.reset(new FirstLastImpl<Type>(out_type, options));
+    return Status::OK();
+  }
+
+  Result<std::unique_ptr<KernelState>> Create() {
+    ARROW_RETURN_NOT_OK(VisitTypeInline(in_type, this));
+    return std::move(state);
+  }
+};
+
+// ----------------------------------------------------------------------
+// MinMax implementation
+
+template <typename ArrowType, SimdLevel::type SimdLevel, typename Enable = void>
+struct MinMaxState {};
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MinMaxState<ArrowType, SimdLevel, enable_if_boolean<ArrowType>> {
+  using ThisType = MinMaxState<ArrowType, SimdLevel>;
+  using T = typename ArrowType::c_type;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->has_nulls |= rhs.has_nulls;
+    this->min = this->min && rhs.min;
+    this->max = this->max || rhs.max;
+    return *this;
+  }
+
+  void MergeOne(T value) {
+    this->min = this->min && value;
+    this->max = this->max || value;
+  }
+
+  T min = true;
+  T max = false;
+  bool has_nulls = false;
+};
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MinMaxState<ArrowType, SimdLevel, enable_if_integer<ArrowType>> {
+  using ThisType = MinMaxState<ArrowType, SimdLevel>;
+  using T = typename ArrowType::c_type;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->has_nulls |= rhs.has_nulls;
+    this->min = std::min(this->min, rhs.min);
+    this->max = std::max(this->max, rhs.max);
+    return *this;
+  }
+
+  void MergeOne(T value) {
+    this->min = std::min(this->min, value);
+    this->max = std::max(this->max, value);
+  }
+
+  T min = std::numeric_limits<T>::max();
+  T max = std::numeric_limits<T>::min();
+  bool has_nulls = false;
+};
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MinMaxState<ArrowType, SimdLevel, enable_if_floating_point<ArrowType>> {
+  using ThisType = MinMaxState<ArrowType, SimdLevel>;
+  using T = typename ArrowType::c_type;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->has_nulls |= rhs.has_nulls;
+    this->min = std::fmin(this->min, rhs.min);
+    this->max = std::fmax(this->max, rhs.max);
+    return *this;
+  }
+
+  void MergeOne(T value) {
+    this->min = std::fmin(this->min, value);
+    this->max = std::fmax(this->max, value);
+  }
+
+  T min = std::numeric_limits<T>::infinity();
+  T max = -std::numeric_limits<T>::infinity();
+  bool has_nulls = false;
+};
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MinMaxState<ArrowType, SimdLevel, enable_if_decimal<ArrowType>> {
+  using ThisType = MinMaxState<ArrowType, SimdLevel>;
+  using T = typename TypeTraits<ArrowType>::CType;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  MinMaxState() : min(T::GetMaxSentinel()), max(T::GetMinSentinel()) {}
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->has_nulls |= rhs.has_nulls;
+    this->min = std::min(this->min, rhs.min);
+    this->max = std::max(this->max, rhs.max);
+    return *this;
+  }
+
+  void MergeOne(std::string_view value) {
+    MergeOne(T(reinterpret_cast<const uint8_t*>(value.data())));
+  }
+
+  void MergeOne(const T value) {
+    this->min = std::min(this->min, value);
+    this->max = std::max(this->max, value);
+  }
+
+  T min;
+  T max;
+  bool has_nulls = false;
+};
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MinMaxState<ArrowType, SimdLevel,
+                   enable_if_t<is_base_binary_type<ArrowType>::value ||
+                               std::is_same<ArrowType, FixedSizeBinaryType>::value>> {
+  using ThisType = MinMaxState<ArrowType, SimdLevel>;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    if (!this->seen && rhs.seen) {
+      this->min = rhs.min;
+      this->max = rhs.max;
+    } else if (this->seen && rhs.seen) {
+      if (this->min > rhs.min) {
+        this->min = rhs.min;
+      }
+      if (this->max < rhs.max) {
+        this->max = rhs.max;
+      }
+    }
+    this->has_nulls |= rhs.has_nulls;
+    this->seen |= rhs.seen;
+    return *this;
+  }
+
+  void MergeOne(std::string_view value) {
+    if (!seen) {
+      this->min = std::string(value);
+      this->max = std::string(value);
+    } else {
+      if (value < std::string_view(this->min)) {
+        this->min = std::string(value);
+      } else if (value > std::string_view(this->max)) {
+        this->max = std::string(value);
+      }
+    }
+    this->seen = true;
+  }
+
+  std::string min;
+  std::string max;
+  bool has_nulls = false;
+  bool seen = false;
+};
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MinMaxImpl : public ScalarAggregator {
+  using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
+  using ThisType = MinMaxImpl<ArrowType, SimdLevel>;
+  using StateType = MinMaxState<ArrowType, SimdLevel>;
+
+  MinMaxImpl(std::shared_ptr<DataType> out_type, ScalarAggregateOptions options)
+      : out_type(std::move(out_type)), options(std::move(options)), count(0) {
+    this->options.min_count = std::max<uint32_t>(1, this->options.min_count);
+  }
+
+  Status Consume(KernelContext*, const ExecSpan& batch) override {
+    if (batch[0].is_array()) {
+      return ConsumeArray(batch[0].array);
+    }
+    return ConsumeScalar(*batch[0].scalar);
+  }
+
+  Status ConsumeScalar(const Scalar& scalar) {
+    StateType local;
+    local.has_nulls = !scalar.is_valid;
+    this->count += scalar.is_valid;
+
+    if (!local.has_nulls || options.skip_nulls) {
+      local.MergeOne(internal::UnboxScalar<ArrowType>::Unbox(scalar));
+    }
+
+    this->state += local;
+    return Status::OK();
+  }
+
+  Status ConsumeArray(const ArraySpan& arr_span) {
+    StateType local;
+
+    ArrayType arr(arr_span.ToArrayData());
+
+    const auto null_count = arr.null_count();
+    local.has_nulls = null_count > 0;
+    this->count += arr.length() - null_count;
+
+    if (!local.has_nulls) {
+      for (int64_t i = 0; i < arr.length(); i++) {
+        local.MergeOne(arr.GetView(i));
+      }
+    } else if (local.has_nulls && options.skip_nulls) {
+      local += ConsumeWithNulls(arr);
+    }
+
+    this->state += local;
+    return Status::OK();
+  }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
+    const auto& other = checked_cast<const ThisType&>(src);
+    this->state += other.state;
+    this->count += other.count;
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override {
+    const auto& struct_type = checked_cast<const StructType&>(*out_type);
+    const auto& child_type = struct_type.field(0)->type();
+
+    std::vector<std::shared_ptr<Scalar>> values;
+    // Physical type != result type
+    if ((state.has_nulls && !options.skip_nulls) || (this->count < options.min_count)) {
+      // (null, null)
+      auto null_scalar = MakeNullScalar(child_type);
+      values = {null_scalar, null_scalar};
+    } else {
+      ARROW_ASSIGN_OR_RAISE(auto min_scalar,
+                            MakeScalar(child_type, std::move(state.min)));
+      ARROW_ASSIGN_OR_RAISE(auto max_scalar,
+                            MakeScalar(child_type, std::move(state.max)));
+      values = {std::move(min_scalar), std::move(max_scalar)};
+    }
+    out->value = std::make_shared<StructScalar>(std::move(values), this->out_type);
+    return Status::OK();
+  }
+
+  std::shared_ptr<DataType> out_type;
+  ScalarAggregateOptions options;
+  int64_t count;
+  MinMaxState<ArrowType, SimdLevel> state;
+
+ private:
+  StateType ConsumeWithNulls(const ArrayType& arr) const {
+    StateType local;
+    const int64_t length = arr.length();
+    int64_t offset = arr.offset();
+    const uint8_t* bitmap = arr.null_bitmap_data();
+    int64_t idx = 0;
+
+    const auto p = arrow::internal::BitmapWordAlign<1>(bitmap, offset, length);
+    // First handle the leading bits
+    const int64_t leading_bits = p.leading_bits;
+    while (idx < leading_bits) {
+      if (bit_util::GetBit(bitmap, offset)) {
+        local.MergeOne(arr.GetView(idx));
+      }
+      idx++;
+      offset++;
+    }
+
+    // The aligned parts scanned with BitBlockCounter
+    arrow::internal::BitBlockCounter data_counter(bitmap, offset, length - leading_bits);
+    auto current_block = data_counter.NextWord();
+    while (idx < length) {
+      if (current_block.AllSet()) {  // All true values
+        int run_length = 0;
+        // Scan forward until a block that has some false values (or the end)
+        while (current_block.length > 0 && current_block.AllSet()) {
+          run_length += current_block.length;
+          current_block = data_counter.NextWord();
+        }
+        for (int64_t i = 0; i < run_length; i++) {
+          local.MergeOne(arr.GetView(idx + i));
+        }
+        idx += run_length;
+        offset += run_length;
+        // The current_block already computed, advance to next loop
+        continue;
+      } else if (!current_block.NoneSet()) {  // Some values are null
+        BitmapReader reader(arr.null_bitmap_data(), offset, current_block.length);
+        for (int64_t i = 0; i < current_block.length; i++) {
+          if (reader.IsSet()) {
+            local.MergeOne(arr.GetView(idx + i));
+          }
+          reader.Next();
+        }
+
+        idx += current_block.length;
+        offset += current_block.length;
+      } else {  // All null values
+        idx += current_block.length;
+        offset += current_block.length;
+      }
+      current_block = data_counter.NextWord();
+    }
+
+    return local;
+  }
+};
+
+template <SimdLevel::type SimdLevel>
+struct BooleanMinMaxImpl : public MinMaxImpl<BooleanType, SimdLevel> {
+  using StateType = MinMaxState<BooleanType, SimdLevel>;
+  using ArrayType = typename TypeTraits<BooleanType>::ArrayType;
+  using MinMaxImpl<BooleanType, SimdLevel>::MinMaxImpl;
+  using MinMaxImpl<BooleanType, SimdLevel>::options;
+
+  Status Consume(KernelContext*, const ExecSpan& batch) override {
+    if (ARROW_PREDICT_FALSE(batch[0].is_scalar())) {
+      return ConsumeScalar(checked_cast<const BooleanScalar&>(*batch[0].scalar));
+    }
+    StateType local;
+    ArrayType arr(batch[0].array.ToArrayData());
+
+    const auto arr_length = arr.length();
+    const auto null_count = arr.null_count();
+    const auto valid_count = arr_length - null_count;
+
+    local.has_nulls = null_count > 0;
+    this->count += valid_count;
+    if (!local.has_nulls || options.skip_nulls) {
+      const auto true_count = arr.true_count();
+      const auto false_count = valid_count - true_count;
+      local.max = true_count > 0;
+      local.min = false_count == 0;
+    }
+
+    this->state += local;
+    return Status::OK();
+  }
+
+  Status ConsumeScalar(const BooleanScalar& scalar) {
+    StateType local;
+
+    local.has_nulls = !scalar.is_valid;
+    this->count += scalar.is_valid;
+    if (!local.has_nulls || options.skip_nulls) {
+      const int true_count = scalar.is_valid && scalar.value;
+      const int false_count = scalar.is_valid && !scalar.value;
+      local.max = true_count > 0;
+      local.min = false_count == 0;
+    }
+
+    this->state += local;
+    return Status::OK();
+  }
+};
+
+struct NullMinMaxImpl : public ScalarAggregator {
+  Status Consume(KernelContext*, const ExecSpan& batch) override { return Status::OK(); }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override { return Status::OK(); }
+
+  Status Finalize(KernelContext*, Datum* out) override {
+    std::vector<std::shared_ptr<Scalar>> values{std::make_shared<NullScalar>(),
+                                                std::make_shared<NullScalar>()};
+    out->value = std::make_shared<StructScalar>(
+        std::move(values), struct_({field("min", null()), field("max", null())}));
+    return Status::OK();
+  }
+};
+
+template <SimdLevel::type SimdLevel>
+struct MinMaxInitState {
+  std::unique_ptr<KernelState> state;
+  KernelContext* ctx;
+  const DataType& in_type;
+  std::shared_ptr<DataType> out_type;
+  const ScalarAggregateOptions& options;
+
+  MinMaxInitState(KernelContext* ctx, const DataType& in_type,
+                  const std::shared_ptr<DataType>& out_type,
+                  const ScalarAggregateOptions& options)
+      : ctx(ctx), in_type(in_type), out_type(out_type), options(options) {}
+
+  Status Visit(const DataType& ty) {
+    return Status::NotImplemented("No min/max implemented for ", ty);
+  }
+
+  Status Visit(const HalfFloatType& ty) {
+    return Status::NotImplemented("No min/max implemented for ", ty);
+  }
+
+  Status Visit(const NullType&) {
+    state.reset(new NullMinMaxImpl());
+    return Status::OK();
+  }
+
+  Status Visit(const BooleanType&) {
+    state.reset(new BooleanMinMaxImpl<SimdLevel>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_physical_integer<Type, Status> Visit(const Type&) {
+    using PhysicalType = typename Type::PhysicalType;
+    state.reset(new MinMaxImpl<PhysicalType, SimdLevel>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_floating_point<Type, Status> Visit(const Type&) {
+    state.reset(new MinMaxImpl<Type, SimdLevel>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_base_binary<Type, Status> Visit(const Type&) {
+    state.reset(new MinMaxImpl<Type, SimdLevel>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_fixed_size_binary<Type, Status> Visit(const Type&) {
+    state.reset(new MinMaxImpl<Type, SimdLevel>(out_type, options));
+    return Status::OK();
+  }
+
+  Result<std::unique_ptr<KernelState>> Create() {
+    ARROW_RETURN_NOT_OK(VisitTypeInline(in_type, this));
+    return std::move(state);
+  }
+};
+
+}  // namespace
+}  // namespace arrow::compute::internal
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc b/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc
index 03b45107eeca1..a1a6a95c5e11c 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc
@@ -17,6 +17,10 @@
 
 #include "arrow/compute/kernels/aggregate_basic_internal.h"
 
+// Include templated definitions for aggregate kernels that must compiled here
+// with the SIMD level configured for this compilation unit in the build.
+#include "arrow/compute/kernels/aggregate_basic.inc.cc"  // NOLINT(build/include)
+
 namespace arrow {
 namespace compute {
 namespace internal {
@@ -24,16 +28,13 @@ namespace internal {
 // ----------------------------------------------------------------------
 // Sum implementation
 
+namespace {
+
 template <typename ArrowType>
 struct SumImplAvx2 : public SumImpl<ArrowType, SimdLevel::AVX2> {
   using SumImpl<ArrowType, SimdLevel::AVX2>::SumImpl;
 };
 
-template <typename ArrowType>
-struct MeanImplAvx2 : public MeanImpl<ArrowType, SimdLevel::AVX2> {
-  using MeanImpl<ArrowType, SimdLevel::AVX2>::MeanImpl;
-};
-
 Result<std::unique_ptr<KernelState>> SumInitAvx2(KernelContext* ctx,
                                                  const KernelInitArgs& args) {
   SumLikeInit<SumImplAvx2> visitor(
@@ -42,6 +43,24 @@ Result<std::unique_ptr<KernelState>> SumInitAvx2(KernelContext* ctx,
   return visitor.Create();
 }
 
+}  // namespace
+
+void AddSumAvx2AggKernels(ScalarAggregateFunction* func) {
+  AddBasicAggKernels(SumInitAvx2, SignedIntTypes(), int64(), func, SimdLevel::AVX2);
+  AddBasicAggKernels(SumInitAvx2, UnsignedIntTypes(), uint64(), func, SimdLevel::AVX2);
+  AddBasicAggKernels(SumInitAvx2, FloatingPointTypes(), float64(), func, SimdLevel::AVX2);
+}
+
+// ----------------------------------------------------------------------
+// Mean implementation
+
+namespace {
+
+template <typename ArrowType>
+struct MeanImplAvx2 : public MeanImpl<ArrowType, SimdLevel::AVX2> {
+  using MeanImpl<ArrowType, SimdLevel::AVX2>::MeanImpl;
+};
+
 Result<std::unique_ptr<KernelState>> MeanInitAvx2(KernelContext* ctx,
                                                   const KernelInitArgs& args) {
   SumLikeInit<MeanImplAvx2> visitor(
@@ -50,9 +69,17 @@ Result<std::unique_ptr<KernelState>> MeanInitAvx2(KernelContext* ctx,
   return visitor.Create();
 }
 
+}  // namespace
+
+void AddMeanAvx2AggKernels(ScalarAggregateFunction* func) {
+  AddBasicAggKernels(MeanInitAvx2, NumericTypes(), float64(), func, SimdLevel::AVX2);
+}
+
 // ----------------------------------------------------------------------
 // MinMax implementation
 
+namespace {
+
 Result<std::unique_ptr<KernelState>> MinMaxInitAvx2(KernelContext* ctx,
                                                     const KernelInitArgs& args) {
   ARROW_ASSIGN_OR_RAISE(TypeHolder out_type,
@@ -63,15 +90,7 @@ Result<std::unique_ptr<KernelState>> MinMaxInitAvx2(KernelContext* ctx,
   return visitor.Create();
 }
 
-void AddSumAvx2AggKernels(ScalarAggregateFunction* func) {
-  AddBasicAggKernels(SumInitAvx2, SignedIntTypes(), int64(), func, SimdLevel::AVX2);
-  AddBasicAggKernels(SumInitAvx2, UnsignedIntTypes(), uint64(), func, SimdLevel::AVX2);
-  AddBasicAggKernels(SumInitAvx2, FloatingPointTypes(), float64(), func, SimdLevel::AVX2);
-}
-
-void AddMeanAvx2AggKernels(ScalarAggregateFunction* func) {
-  AddBasicAggKernels(MeanInitAvx2, NumericTypes(), float64(), func, SimdLevel::AVX2);
-}
+}  // namespace
 
 void AddMinMaxAvx2AggKernels(ScalarAggregateFunction* func) {
   // Enable int types for AVX2 variants.
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc b/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc
index 05356e0aa5e75..9dc490937a691 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc
@@ -17,6 +17,10 @@
 
 #include "arrow/compute/kernels/aggregate_basic_internal.h"
 
+// Include templated definitions for aggregate kernels that must compiled here
+// with the SIMD level configured for this compilation unit in the build.
+#include "arrow/compute/kernels/aggregate_basic.inc.cc"  // NOLINT(build/include)
+
 namespace arrow {
 namespace compute {
 namespace internal {
@@ -24,16 +28,13 @@ namespace internal {
 // ----------------------------------------------------------------------
 // Sum implementation
 
+namespace {
+
 template <typename ArrowType>
 struct SumImplAvx512 : public SumImpl<ArrowType, SimdLevel::AVX512> {
   using SumImpl<ArrowType, SimdLevel::AVX512>::SumImpl;
 };
 
-template <typename ArrowType>
-struct MeanImplAvx512 : public MeanImpl<ArrowType, SimdLevel::AVX512> {
-  using MeanImpl<ArrowType, SimdLevel::AVX512>::MeanImpl;
-};
-
 Result<std::unique_ptr<KernelState>> SumInitAvx512(KernelContext* ctx,
                                                    const KernelInitArgs& args) {
   SumLikeInit<SumImplAvx512> visitor(
@@ -42,6 +43,26 @@ Result<std::unique_ptr<KernelState>> SumInitAvx512(KernelContext* ctx,
   return visitor.Create();
 }
 
+}  // namespace
+
+void AddSumAvx512AggKernels(ScalarAggregateFunction* func) {
+  AddBasicAggKernels(SumInitAvx512, SignedIntTypes(), int64(), func, SimdLevel::AVX512);
+  AddBasicAggKernels(SumInitAvx512, UnsignedIntTypes(), uint64(), func,
+                     SimdLevel::AVX512);
+  AddBasicAggKernels(SumInitAvx512, FloatingPointTypes(), float64(), func,
+                     SimdLevel::AVX512);
+}
+
+// ----------------------------------------------------------------------
+// Mean implementation
+
+namespace {
+
+template <typename ArrowType>
+struct MeanImplAvx512 : public MeanImpl<ArrowType, SimdLevel::AVX512> {
+  using MeanImpl<ArrowType, SimdLevel::AVX512>::MeanImpl;
+};
+
 Result<std::unique_ptr<KernelState>> MeanInitAvx512(KernelContext* ctx,
                                                     const KernelInitArgs& args) {
   SumLikeInit<MeanImplAvx512> visitor(
@@ -50,9 +71,17 @@ Result<std::unique_ptr<KernelState>> MeanInitAvx512(KernelContext* ctx,
   return visitor.Create();
 }
 
+}  // namespace
+
+void AddMeanAvx512AggKernels(ScalarAggregateFunction* func) {
+  AddBasicAggKernels(MeanInitAvx512, NumericTypes(), float64(), func, SimdLevel::AVX512);
+}
+
 // ----------------------------------------------------------------------
 // MinMax implementation
 
+namespace {
+
 Result<std::unique_ptr<KernelState>> MinMaxInitAvx512(KernelContext* ctx,
                                                       const KernelInitArgs& args) {
   ARROW_ASSIGN_OR_RAISE(TypeHolder out_type,
@@ -63,17 +92,7 @@ Result<std::unique_ptr<KernelState>> MinMaxInitAvx512(KernelContext* ctx,
   return visitor.Create();
 }
 
-void AddSumAvx512AggKernels(ScalarAggregateFunction* func) {
-  AddBasicAggKernels(SumInitAvx512, SignedIntTypes(), int64(), func, SimdLevel::AVX512);
-  AddBasicAggKernels(SumInitAvx512, UnsignedIntTypes(), uint64(), func,
-                     SimdLevel::AVX512);
-  AddBasicAggKernels(SumInitAvx512, FloatingPointTypes(), float64(), func,
-                     SimdLevel::AVX512);
-}
-
-void AddMeanAvx512AggKernels(ScalarAggregateFunction* func) {
-  AddBasicAggKernels(MeanInitAvx512, NumericTypes(), float64(), func, SimdLevel::AVX512);
-}
+}  // namespace
 
 void AddMinMaxAvx512AggKernels(ScalarAggregateFunction* func) {
   // Enable 32/64 int types for avx512 variants, no advantage on 8/16 int.
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
index f08e7aaa538bb..5cc3a558b1efb 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
@@ -17,23 +17,18 @@
 
 #pragma once
 
-#include <cmath>
-#include <type_traits>
-#include <utility>
+#include <memory>
+#include <vector>
 
-#include "arrow/compute/api_aggregate.h"
-#include "arrow/compute/kernels/aggregate_internal.h"
+#include "arrow/compute/kernel.h"
 #include "arrow/compute/kernels/codegen_internal.h"
-#include "arrow/compute/kernels/common_internal.h"
-#include "arrow/compute/kernels/util_internal.h"
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/align_util.h"
-#include "arrow/util/bit_block_counter.h"
-#include "arrow/util/decimal.h"
+#include "arrow/compute/type_fwd.h"
+#include "arrow/type_fwd.h"
 
 namespace arrow::compute::internal {
 
+// aggregate_basic.cc
+
 void AddBasicAggKernels(KernelInit init,
                         const std::vector<std::shared_ptr<DataType>>& types,
                         std::shared_ptr<DataType> out_ty, ScalarAggregateFunction* func,
@@ -47,990 +42,16 @@ void AddMinMaxKernel(KernelInit init, internal::detail::GetTypeId get_id,
                      ScalarAggregateFunction* func,
                      SimdLevel::type simd_level = SimdLevel::NONE);
 
-// SIMD variants for kernels
+// aggregate_basic_avx2.cc
+
 void AddSumAvx2AggKernels(ScalarAggregateFunction* func);
 void AddMeanAvx2AggKernels(ScalarAggregateFunction* func);
 void AddMinMaxAvx2AggKernels(ScalarAggregateFunction* func);
 
+// aggregate_basic_avx512.cc
+
 void AddSumAvx512AggKernels(ScalarAggregateFunction* func);
 void AddMeanAvx512AggKernels(ScalarAggregateFunction* func);
 void AddMinMaxAvx512AggKernels(ScalarAggregateFunction* func);
 
-// ----------------------------------------------------------------------
-// Sum implementation
-
-template <typename ArrowType, SimdLevel::type SimdLevel,
-          typename ResultType = typename FindAccumulatorType<ArrowType>::Type>
-struct SumImpl : public ScalarAggregator {
-  using ThisType = SumImpl<ArrowType, SimdLevel, ResultType>;
-  using CType = typename TypeTraits<ArrowType>::CType;
-  using SumType = ResultType;
-  using SumCType = typename TypeTraits<SumType>::CType;
-  using OutputType = typename TypeTraits<SumType>::ScalarType;
-
-  SumImpl(std::shared_ptr<DataType> out_type, ScalarAggregateOptions options_)
-      : out_type(std::move(out_type)), options(std::move(options_)) {}
-
-  Status Consume(KernelContext*, const ExecSpan& batch) override {
-    if (batch[0].is_array()) {
-      const ArraySpan& data = batch[0].array;
-      this->count += data.length - data.GetNullCount();
-      this->nulls_observed = this->nulls_observed || data.GetNullCount();
-
-      if (!options.skip_nulls && this->nulls_observed) {
-        // Short-circuit
-        return Status::OK();
-      }
-
-      if (is_boolean_type<ArrowType>::value) {
-        this->sum += GetTrueCount(data);
-      } else {
-        this->sum += SumArray<CType, SumCType, SimdLevel>(data);
-      }
-    } else {
-      const Scalar& data = *batch[0].scalar;
-      this->count += data.is_valid * batch.length;
-      this->nulls_observed = this->nulls_observed || !data.is_valid;
-      if (data.is_valid) {
-        this->sum += internal::UnboxScalar<ArrowType>::Unbox(data) * batch.length;
-      }
-    }
-    return Status::OK();
-  }
-
-  Status MergeFrom(KernelContext*, KernelState&& src) override {
-    const auto& other = checked_cast<const ThisType&>(src);
-    this->count += other.count;
-    this->sum += other.sum;
-    this->nulls_observed = this->nulls_observed || other.nulls_observed;
-    return Status::OK();
-  }
-
-  Status Finalize(KernelContext*, Datum* out) override {
-    if ((!options.skip_nulls && this->nulls_observed) ||
-        (this->count < options.min_count)) {
-      out->value = std::make_shared<OutputType>(out_type);
-    } else {
-      out->value = std::make_shared<OutputType>(this->sum, out_type);
-    }
-    return Status::OK();
-  }
-
-  size_t count = 0;
-  bool nulls_observed = false;
-  SumCType sum = 0;
-  std::shared_ptr<DataType> out_type;
-  ScalarAggregateOptions options;
-};
-
-template <typename ArrowType>
-struct NullImpl : public ScalarAggregator {
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  explicit NullImpl(const ScalarAggregateOptions& options_) : options(options_) {}
-
-  Status Consume(KernelContext*, const ExecSpan& batch) override {
-    if (batch[0].is_scalar() || batch[0].array.GetNullCount() > 0) {
-      // If the batch is a scalar or an array with elements, set is_empty to false
-      is_empty = false;
-    }
-    return Status::OK();
-  }
-
-  Status MergeFrom(KernelContext*, KernelState&& src) override {
-    const auto& other = checked_cast<const NullImpl&>(src);
-    this->is_empty &= other.is_empty;
-    return Status::OK();
-  }
-
-  Status Finalize(KernelContext*, Datum* out) override {
-    if ((options.skip_nulls || this->is_empty) && options.min_count == 0) {
-      // Return 0 if the remaining data is empty
-      out->value = output_empty();
-    } else {
-      out->value = MakeNullScalar(TypeTraits<ArrowType>::type_singleton());
-    }
-    return Status::OK();
-  }
-
-  virtual std::shared_ptr<Scalar> output_empty() = 0;
-
-  bool is_empty = true;
-  ScalarAggregateOptions options;
-};
-
-template <typename ArrowType>
-struct NullSumImpl : public NullImpl<ArrowType> {
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  explicit NullSumImpl(const ScalarAggregateOptions& options_)
-      : NullImpl<ArrowType>(options_) {}
-
-  std::shared_ptr<Scalar> output_empty() override {
-    return std::make_shared<ScalarType>(0);
-  }
-};
-
-template <typename ArrowType, SimdLevel::type SimdLevel, typename Enable = void>
-struct MeanImpl;
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MeanImpl<ArrowType, SimdLevel, enable_if_decimal<ArrowType>>
-    : public SumImpl<ArrowType, SimdLevel> {
-  using SumImpl<ArrowType, SimdLevel>::SumImpl;
-  using SumImpl<ArrowType, SimdLevel>::options;
-  using SumCType = typename SumImpl<ArrowType, SimdLevel>::SumCType;
-  using OutputType = typename SumImpl<ArrowType, SimdLevel>::OutputType;
-
-  template <typename T = ArrowType>
-  Status FinalizeImpl(Datum* out) {
-    if ((!options.skip_nulls && this->nulls_observed) ||
-        (this->count < options.min_count) || (this->count == 0)) {
-      out->value = std::make_shared<OutputType>(this->out_type);
-    } else {
-      SumCType quotient, remainder;
-      ARROW_ASSIGN_OR_RAISE(std::tie(quotient, remainder), this->sum.Divide(this->count));
-      // Round the decimal result based on the remainder
-      remainder.Abs();
-      if (remainder * 2 >= this->count) {
-        if (this->sum >= 0) {
-          quotient += 1;
-        } else {
-          quotient -= 1;
-        }
-      }
-      out->value = std::make_shared<OutputType>(quotient, this->out_type);
-    }
-    return Status::OK();
-  }
-
-  Status Finalize(KernelContext*, Datum* out) override { return FinalizeImpl(out); }
-};
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MeanImpl<ArrowType, SimdLevel,
-                std::enable_if_t<!is_decimal_type<ArrowType>::value>>
-    // Override the ResultType of SumImpl because we need to use double for intermediate
-    // sum to prevent integer overflows
-    : public SumImpl<ArrowType, SimdLevel, DoubleType> {
-  using SumImpl<ArrowType, SimdLevel, DoubleType>::SumImpl;
-  using SumImpl<ArrowType, SimdLevel, DoubleType>::options;
-
-  template <typename T = ArrowType>
-  Status FinalizeImpl(Datum* out) {
-    if ((!options.skip_nulls && this->nulls_observed) ||
-        (this->count < options.min_count)) {
-      out->value = std::make_shared<DoubleScalar>();
-    } else {
-      static_assert(std::is_same_v<decltype(this->sum), double>,
-                    "SumCType must be double for numeric inputs");
-      const double mean = this->sum / this->count;
-      out->value = std::make_shared<DoubleScalar>(mean);
-    }
-    return Status::OK();
-  }
-
-  Status Finalize(KernelContext*, Datum* out) override { return FinalizeImpl(out); }
-};
-
-template <template <typename> class KernelClass>
-struct SumLikeInit {
-  std::unique_ptr<KernelState> state;
-  KernelContext* ctx;
-  std::shared_ptr<DataType> type;
-  const ScalarAggregateOptions& options;
-
-  SumLikeInit(KernelContext* ctx, std::shared_ptr<DataType> type,
-              const ScalarAggregateOptions& options)
-      : ctx(ctx), type(type), options(options) {}
-
-  Status Visit(const DataType&) { return Status::NotImplemented("No sum implemented"); }
-
-  Status Visit(const HalfFloatType&) {
-    return Status::NotImplemented("No sum implemented");
-  }
-
-  Status Visit(const BooleanType&) {
-    auto ty = TypeTraits<typename KernelClass<BooleanType>::SumType>::type_singleton();
-    state.reset(new KernelClass<BooleanType>(ty, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_number<Type, Status> Visit(const Type&) {
-    auto ty = TypeTraits<typename KernelClass<Type>::SumType>::type_singleton();
-    state.reset(new KernelClass<Type>(ty, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_decimal<Type, Status> Visit(const Type&) {
-    state.reset(new KernelClass<Type>(type, options));
-    return Status::OK();
-  }
-
-  virtual Status Visit(const NullType&) {
-    state.reset(new NullSumImpl<Int64Type>(options));
-    return Status::OK();
-  }
-
-  Result<std::unique_ptr<KernelState>> Create() {
-    RETURN_NOT_OK(VisitTypeInline(*type, this));
-    return std::move(state);
-  }
-};
-
-template <template <typename> class KernelClass>
-struct MeanKernelInit : public SumLikeInit<KernelClass> {
-  MeanKernelInit(KernelContext* ctx, std::shared_ptr<DataType> type,
-                 const ScalarAggregateOptions& options)
-      : SumLikeInit<KernelClass>(ctx, type, options) {}
-
-  Status Visit(const NullType&) override {
-    this->state.reset(new NullSumImpl<DoubleType>(this->options));
-    return Status::OK();
-  }
-};
-
-// ----------------------------------------------------------------------
-// FirstLast implementation
-template <typename ArrowType, typename Enable = void>
-struct FirstLastState {};
-
-template <typename ArrowType>
-struct FirstLastState<ArrowType, enable_if_boolean<ArrowType>> {
-  using ThisType = FirstLastState<ArrowType>;
-  using T = typename ArrowType::c_type;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->first = this->has_values ? this->first : rhs.first;
-    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
-    this->last = rhs.has_values ? rhs.last : this->last;
-    this->last_is_null = rhs.last_is_null;
-    this->has_values |= rhs.has_values;
-    this->has_any_values |= rhs.has_any_values;
-    return *this;
-  }
-
-  void MergeOne(T value) {
-    if (!has_values) {
-      this->first = value;
-      has_values = true;
-    }
-    this->last = value;
-  }
-
-  T first = false;
-  T last = false;
-  bool has_values = false;
-  bool first_is_null = false;
-  bool last_is_null = false;
-  bool has_any_values = false;
-};
-
-template <typename ArrowType>
-struct FirstLastState<ArrowType, enable_if_physical_integer<ArrowType>> {
-  using ThisType = FirstLastState<ArrowType>;
-  using T = typename ArrowType::c_type;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->first = this->has_values ? this->first : rhs.first;
-    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
-    this->last = rhs.has_values ? rhs.last : this->last;
-    this->last_is_null = rhs.last_is_null;
-    this->has_values |= rhs.has_values;
-    this->has_any_values |= rhs.has_any_values;
-    return *this;
-  }
-
-  void MergeOne(T value) {
-    if (!has_values) {
-      this->first = value;
-      has_values = true;
-    }
-    this->last = value;
-  }
-
-  T first = std::numeric_limits<T>::infinity();
-  T last = std::numeric_limits<T>::infinity();
-  bool has_values = false;
-
-  // These are updated in ConsumeScalar and ConsumeArray since null values don't
-  // invoke MergeOne
-  bool first_is_null = false;
-  bool last_is_null = false;
-  // has_any_values indicates whether there is any value (either null or non-null)
-  // (1) has_any_values = false: There is no value aggregated
-  // (2) has_any_values = true, has_values = false: There are only null values aggregated
-  // (3) has_any_values = true, has_values = true: There are both null and non-null values
-  // aggregated
-  bool has_any_values = false;
-};
-
-template <typename ArrowType>
-struct FirstLastState<ArrowType, enable_if_floating_point<ArrowType>> {
-  using ThisType = FirstLastState<ArrowType>;
-  using T = typename ArrowType::c_type;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->first = this->has_values ? this->first : rhs.first;
-    this->last = rhs.has_values ? rhs.last : this->last;
-    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
-    this->last_is_null = rhs.last_is_null;
-    this->has_values |= rhs.has_values;
-    this->has_any_values |= rhs.has_any_values;
-    return *this;
-  }
-
-  void MergeOne(T value) {
-    if (!has_values) {
-      this->first = value;
-      has_values = true;
-    }
-    last = value;
-  }
-
-  T first = std::numeric_limits<T>::infinity();
-  T last = std::numeric_limits<T>::infinity();
-  bool has_values = false;
-  bool first_is_null = false;
-  bool last_is_null = false;
-  bool has_any_values = false;
-};
-
-template <typename ArrowType>
-struct FirstLastState<ArrowType,
-                      enable_if_t<is_base_binary_type<ArrowType>::value ||
-                                  std::is_same<ArrowType, FixedSizeBinaryType>::value>> {
-  using ThisType = FirstLastState<ArrowType>;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->first = this->has_values ? this->first : rhs.first;
-    this->last = rhs.has_values ? rhs.last : this->last;
-    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
-    this->last_is_null = rhs.last_is_null;
-    this->has_values |= rhs.has_values;
-    this->has_any_values |= rhs.has_any_values;
-    return *this;
-  }
-
-  void MergeOne(std::string_view value) {
-    if (!has_values) {
-      first = std::string(value);
-      has_values = true;
-    }
-    last = std::string(value);
-  }
-
-  std::string first = "";
-  std::string last = "";
-  bool has_values = false;
-  bool first_is_null = false;
-  bool last_is_null = false;
-  bool has_any_values = false;
-};
-
-template <typename ArrowType>
-struct FirstLastImpl : public ScalarAggregator {
-  using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
-  using ThisType = FirstLastImpl<ArrowType>;
-  using StateType = FirstLastState<ArrowType>;
-
-  FirstLastImpl(std::shared_ptr<DataType> out_type, ScalarAggregateOptions options)
-      : out_type(std::move(out_type)), options(std::move(options)), count(0) {
-    this->options.min_count = std::max<uint32_t>(1, this->options.min_count);
-  }
-
-  Status Consume(KernelContext*, const ExecSpan& batch) override {
-    if (batch[0].is_array()) {
-      return ConsumeArray(batch[0].array);
-    }
-    return ConsumeScalar(*batch[0].scalar);
-  }
-
-  Status ConsumeScalar(const Scalar& scalar) {
-    this->state.has_any_values = true;
-    if (scalar.is_valid) {
-      this->state.MergeOne(internal::UnboxScalar<ArrowType>::Unbox(scalar));
-    } else {
-      if (!this->state.has_values) {
-        this->state.first_is_null = true;
-      }
-    }
-    this->count += scalar.is_valid;
-    return Status::OK();
-  }
-
-  Status ConsumeArray(const ArraySpan& arr_span) {
-    this->state.has_any_values = true;
-    ArrayType arr(arr_span.ToArrayData());
-    const auto null_count = arr.null_count();
-    this->count += arr.length() - null_count;
-
-    if (null_count == 0) {
-      // If there are no null values, we can just merge
-      // the first and last element
-      this->state.MergeOne(arr.GetView(0));
-      this->state.MergeOne(arr.GetView(arr.length() - 1));
-    } else {
-      int64_t first_i = -1;
-      int64_t last_i = -1;
-
-      if (!this->state.has_values && arr.IsNull(0)) {
-        this->state.first_is_null = true;
-      }
-
-      if (arr.IsNull(arr.length() - 1)) {
-        this->state.last_is_null = true;
-      }
-
-      // Find the first and last non-null value and update state
-      for (int64_t i = 0; i < arr.length(); i++) {
-        if (!arr.IsNull(i)) {
-          first_i = i;
-          break;
-        }
-      }
-      if (first_i >= 0) {
-        for (int64_t i = arr.length() - 1; i >= 0; i--) {
-          if (!arr.IsNull(i)) {
-            last_i = i;
-            break;
-          }
-        }
-        DCHECK_GE(last_i, first_i);
-        this->state.MergeOne(arr.GetView(first_i));
-        this->state.MergeOne(arr.GetView(last_i));
-      }
-    }
-
-    return Status::OK();
-  }
-
-  Status MergeFrom(KernelContext*, KernelState&& src) override {
-    const auto& other = checked_cast<const ThisType&>(src);
-    this->state += other.state;
-    this->count += other.count;
-    return Status::OK();
-  }
-
-  Status Finalize(KernelContext*, Datum* out) override {
-    const auto& struct_type = checked_cast<const StructType&>(*out_type);
-    const auto& child_type = struct_type.field(0)->type();
-    auto null_scalar = MakeNullScalar(child_type);
-
-    std::vector<std::shared_ptr<Scalar>> values;
-
-    if (this->count < options.min_count) {
-      values = {null_scalar, null_scalar};
-    } else {
-      if (state.has_values) {
-        if (options.skip_nulls) {
-          ARROW_ASSIGN_OR_RAISE(auto first_scalar, MakeScalar(child_type, state.first));
-          ARROW_ASSIGN_OR_RAISE(auto last_scalar, MakeScalar(child_type, state.last));
-          values = {first_scalar, last_scalar};
-        } else {
-          ARROW_ASSIGN_OR_RAISE(
-              auto first_scalar,
-              state.first_is_null ? null_scalar : MakeScalar(child_type, state.first));
-          ARROW_ASSIGN_OR_RAISE(
-              auto last_scalar,
-              state.last_is_null ? null_scalar : MakeScalar(child_type, state.last));
-
-          values = {first_scalar, last_scalar};
-        }
-      } else {
-        // If there is no non-null values, we always output null regardless of
-        // skip_null
-        values = {null_scalar, null_scalar};
-      }
-    }
-
-    out->value = std::make_shared<StructScalar>(std::move(values), this->out_type);
-    return Status::OK();
-  }
-
-  std::shared_ptr<DataType> out_type;
-  ScalarAggregateOptions options;
-  int64_t count;
-  FirstLastState<ArrowType> state;
-};
-
-// ----------------------------------------------------------------------
-// MinMax implementation
-template <typename ArrowType, SimdLevel::type SimdLevel, typename Enable = void>
-struct MinMaxState {};
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MinMaxState<ArrowType, SimdLevel, enable_if_boolean<ArrowType>> {
-  using ThisType = MinMaxState<ArrowType, SimdLevel>;
-  using T = typename ArrowType::c_type;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->has_nulls |= rhs.has_nulls;
-    this->min = this->min && rhs.min;
-    this->max = this->max || rhs.max;
-    return *this;
-  }
-
-  void MergeOne(T value) {
-    this->min = this->min && value;
-    this->max = this->max || value;
-  }
-
-  T min = true;
-  T max = false;
-  bool has_nulls = false;
-};
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MinMaxState<ArrowType, SimdLevel, enable_if_integer<ArrowType>> {
-  using ThisType = MinMaxState<ArrowType, SimdLevel>;
-  using T = typename ArrowType::c_type;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->has_nulls |= rhs.has_nulls;
-    this->min = std::min(this->min, rhs.min);
-    this->max = std::max(this->max, rhs.max);
-    return *this;
-  }
-
-  void MergeOne(T value) {
-    this->min = std::min(this->min, value);
-    this->max = std::max(this->max, value);
-  }
-
-  T min = std::numeric_limits<T>::max();
-  T max = std::numeric_limits<T>::min();
-  bool has_nulls = false;
-};
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MinMaxState<ArrowType, SimdLevel, enable_if_floating_point<ArrowType>> {
-  using ThisType = MinMaxState<ArrowType, SimdLevel>;
-  using T = typename ArrowType::c_type;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->has_nulls |= rhs.has_nulls;
-    this->min = std::fmin(this->min, rhs.min);
-    this->max = std::fmax(this->max, rhs.max);
-    return *this;
-  }
-
-  void MergeOne(T value) {
-    this->min = std::fmin(this->min, value);
-    this->max = std::fmax(this->max, value);
-  }
-
-  T min = std::numeric_limits<T>::infinity();
-  T max = -std::numeric_limits<T>::infinity();
-  bool has_nulls = false;
-};
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MinMaxState<ArrowType, SimdLevel, enable_if_decimal<ArrowType>> {
-  using ThisType = MinMaxState<ArrowType, SimdLevel>;
-  using T = typename TypeTraits<ArrowType>::CType;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  MinMaxState() : min(T::GetMaxSentinel()), max(T::GetMinSentinel()) {}
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->has_nulls |= rhs.has_nulls;
-    this->min = std::min(this->min, rhs.min);
-    this->max = std::max(this->max, rhs.max);
-    return *this;
-  }
-
-  void MergeOne(std::string_view value) {
-    MergeOne(T(reinterpret_cast<const uint8_t*>(value.data())));
-  }
-
-  void MergeOne(const T value) {
-    this->min = std::min(this->min, value);
-    this->max = std::max(this->max, value);
-  }
-
-  T min;
-  T max;
-  bool has_nulls = false;
-};
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MinMaxState<ArrowType, SimdLevel,
-                   enable_if_t<is_base_binary_type<ArrowType>::value ||
-                               std::is_same<ArrowType, FixedSizeBinaryType>::value>> {
-  using ThisType = MinMaxState<ArrowType, SimdLevel>;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    if (!this->seen && rhs.seen) {
-      this->min = rhs.min;
-      this->max = rhs.max;
-    } else if (this->seen && rhs.seen) {
-      if (this->min > rhs.min) {
-        this->min = rhs.min;
-      }
-      if (this->max < rhs.max) {
-        this->max = rhs.max;
-      }
-    }
-    this->has_nulls |= rhs.has_nulls;
-    this->seen |= rhs.seen;
-    return *this;
-  }
-
-  void MergeOne(std::string_view value) {
-    if (!seen) {
-      this->min = std::string(value);
-      this->max = std::string(value);
-    } else {
-      if (value < std::string_view(this->min)) {
-        this->min = std::string(value);
-      } else if (value > std::string_view(this->max)) {
-        this->max = std::string(value);
-      }
-    }
-    this->seen = true;
-  }
-
-  std::string min;
-  std::string max;
-  bool has_nulls = false;
-  bool seen = false;
-};
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MinMaxImpl : public ScalarAggregator {
-  using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
-  using ThisType = MinMaxImpl<ArrowType, SimdLevel>;
-  using StateType = MinMaxState<ArrowType, SimdLevel>;
-
-  MinMaxImpl(std::shared_ptr<DataType> out_type, ScalarAggregateOptions options)
-      : out_type(std::move(out_type)), options(std::move(options)), count(0) {
-    this->options.min_count = std::max<uint32_t>(1, this->options.min_count);
-  }
-
-  Status Consume(KernelContext*, const ExecSpan& batch) override {
-    if (batch[0].is_array()) {
-      return ConsumeArray(batch[0].array);
-    }
-    return ConsumeScalar(*batch[0].scalar);
-  }
-
-  Status ConsumeScalar(const Scalar& scalar) {
-    StateType local;
-    local.has_nulls = !scalar.is_valid;
-    this->count += scalar.is_valid;
-
-    if (!local.has_nulls || options.skip_nulls) {
-      local.MergeOne(internal::UnboxScalar<ArrowType>::Unbox(scalar));
-    }
-
-    this->state += local;
-    return Status::OK();
-  }
-
-  Status ConsumeArray(const ArraySpan& arr_span) {
-    StateType local;
-
-    ArrayType arr(arr_span.ToArrayData());
-
-    const auto null_count = arr.null_count();
-    local.has_nulls = null_count > 0;
-    this->count += arr.length() - null_count;
-
-    if (!local.has_nulls) {
-      for (int64_t i = 0; i < arr.length(); i++) {
-        local.MergeOne(arr.GetView(i));
-      }
-    } else if (local.has_nulls && options.skip_nulls) {
-      local += ConsumeWithNulls(arr);
-    }
-
-    this->state += local;
-    return Status::OK();
-  }
-
-  Status MergeFrom(KernelContext*, KernelState&& src) override {
-    const auto& other = checked_cast<const ThisType&>(src);
-    this->state += other.state;
-    this->count += other.count;
-    return Status::OK();
-  }
-
-  Status Finalize(KernelContext*, Datum* out) override {
-    const auto& struct_type = checked_cast<const StructType&>(*out_type);
-    const auto& child_type = struct_type.field(0)->type();
-
-    std::vector<std::shared_ptr<Scalar>> values;
-    // Physical type != result type
-    if ((state.has_nulls && !options.skip_nulls) || (this->count < options.min_count)) {
-      // (null, null)
-      auto null_scalar = MakeNullScalar(child_type);
-      values = {null_scalar, null_scalar};
-    } else {
-      ARROW_ASSIGN_OR_RAISE(auto min_scalar,
-                            MakeScalar(child_type, std::move(state.min)));
-      ARROW_ASSIGN_OR_RAISE(auto max_scalar,
-                            MakeScalar(child_type, std::move(state.max)));
-      values = {std::move(min_scalar), std::move(max_scalar)};
-    }
-    out->value = std::make_shared<StructScalar>(std::move(values), this->out_type);
-    return Status::OK();
-  }
-
-  std::shared_ptr<DataType> out_type;
-  ScalarAggregateOptions options;
-  int64_t count;
-  MinMaxState<ArrowType, SimdLevel> state;
-
- private:
-  StateType ConsumeWithNulls(const ArrayType& arr) const {
-    StateType local;
-    const int64_t length = arr.length();
-    int64_t offset = arr.offset();
-    const uint8_t* bitmap = arr.null_bitmap_data();
-    int64_t idx = 0;
-
-    const auto p = arrow::internal::BitmapWordAlign<1>(bitmap, offset, length);
-    // First handle the leading bits
-    const int64_t leading_bits = p.leading_bits;
-    while (idx < leading_bits) {
-      if (bit_util::GetBit(bitmap, offset)) {
-        local.MergeOne(arr.GetView(idx));
-      }
-      idx++;
-      offset++;
-    }
-
-    // The aligned parts scanned with BitBlockCounter
-    arrow::internal::BitBlockCounter data_counter(bitmap, offset, length - leading_bits);
-    auto current_block = data_counter.NextWord();
-    while (idx < length) {
-      if (current_block.AllSet()) {  // All true values
-        int run_length = 0;
-        // Scan forward until a block that has some false values (or the end)
-        while (current_block.length > 0 && current_block.AllSet()) {
-          run_length += current_block.length;
-          current_block = data_counter.NextWord();
-        }
-        for (int64_t i = 0; i < run_length; i++) {
-          local.MergeOne(arr.GetView(idx + i));
-        }
-        idx += run_length;
-        offset += run_length;
-        // The current_block already computed, advance to next loop
-        continue;
-      } else if (!current_block.NoneSet()) {  // Some values are null
-        BitmapReader reader(arr.null_bitmap_data(), offset, current_block.length);
-        for (int64_t i = 0; i < current_block.length; i++) {
-          if (reader.IsSet()) {
-            local.MergeOne(arr.GetView(idx + i));
-          }
-          reader.Next();
-        }
-
-        idx += current_block.length;
-        offset += current_block.length;
-      } else {  // All null values
-        idx += current_block.length;
-        offset += current_block.length;
-      }
-      current_block = data_counter.NextWord();
-    }
-
-    return local;
-  }
-};
-
-template <SimdLevel::type SimdLevel>
-struct BooleanMinMaxImpl : public MinMaxImpl<BooleanType, SimdLevel> {
-  using StateType = MinMaxState<BooleanType, SimdLevel>;
-  using ArrayType = typename TypeTraits<BooleanType>::ArrayType;
-  using MinMaxImpl<BooleanType, SimdLevel>::MinMaxImpl;
-  using MinMaxImpl<BooleanType, SimdLevel>::options;
-
-  Status Consume(KernelContext*, const ExecSpan& batch) override {
-    if (ARROW_PREDICT_FALSE(batch[0].is_scalar())) {
-      return ConsumeScalar(checked_cast<const BooleanScalar&>(*batch[0].scalar));
-    }
-    StateType local;
-    ArrayType arr(batch[0].array.ToArrayData());
-
-    const auto arr_length = arr.length();
-    const auto null_count = arr.null_count();
-    const auto valid_count = arr_length - null_count;
-
-    local.has_nulls = null_count > 0;
-    this->count += valid_count;
-    if (!local.has_nulls || options.skip_nulls) {
-      const auto true_count = arr.true_count();
-      const auto false_count = valid_count - true_count;
-      local.max = true_count > 0;
-      local.min = false_count == 0;
-    }
-
-    this->state += local;
-    return Status::OK();
-  }
-
-  Status ConsumeScalar(const BooleanScalar& scalar) {
-    StateType local;
-
-    local.has_nulls = !scalar.is_valid;
-    this->count += scalar.is_valid;
-    if (!local.has_nulls || options.skip_nulls) {
-      const int true_count = scalar.is_valid && scalar.value;
-      const int false_count = scalar.is_valid && !scalar.value;
-      local.max = true_count > 0;
-      local.min = false_count == 0;
-    }
-
-    this->state += local;
-    return Status::OK();
-  }
-};
-
-struct NullMinMaxImpl : public ScalarAggregator {
-  Status Consume(KernelContext*, const ExecSpan& batch) override { return Status::OK(); }
-
-  Status MergeFrom(KernelContext*, KernelState&& src) override { return Status::OK(); }
-
-  Status Finalize(KernelContext*, Datum* out) override {
-    std::vector<std::shared_ptr<Scalar>> values{std::make_shared<NullScalar>(),
-                                                std::make_shared<NullScalar>()};
-    out->value = std::make_shared<StructScalar>(
-        std::move(values), struct_({field("min", null()), field("max", null())}));
-    return Status::OK();
-  }
-};
-
-// First/Last
-
-struct FirstLastInitState {
-  std::unique_ptr<KernelState> state;
-  KernelContext* ctx;
-  const DataType& in_type;
-  std::shared_ptr<DataType> out_type;
-  const ScalarAggregateOptions& options;
-
-  FirstLastInitState(KernelContext* ctx, const DataType& in_type,
-                     const std::shared_ptr<DataType>& out_type,
-                     const ScalarAggregateOptions& options)
-      : ctx(ctx), in_type(in_type), out_type(out_type), options(options) {}
-
-  Status Visit(const DataType& ty) {
-    return Status::NotImplemented("No first/last implemented for ", ty);
-  }
-
-  Status Visit(const HalfFloatType& ty) {
-    return Status::NotImplemented("No first/last implemented for ", ty);
-  }
-
-  Status Visit(const BooleanType&) {
-    state.reset(new FirstLastImpl<BooleanType>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_physical_integer<Type, Status> Visit(const Type&) {
-    using PhysicalType = typename Type::PhysicalType;
-    state.reset(new FirstLastImpl<PhysicalType>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_physical_floating_point<Type, Status> Visit(const Type&) {
-    using PhysicalType = typename Type::PhysicalType;
-    state.reset(new FirstLastImpl<PhysicalType>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_base_binary<Type, Status> Visit(const Type&) {
-    state.reset(new FirstLastImpl<Type>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_t<std::is_same<Type, FixedSizeBinaryType>::value, Status> Visit(const Type&) {
-    state.reset(new FirstLastImpl<Type>(out_type, options));
-    return Status::OK();
-  }
-
-  Result<std::unique_ptr<KernelState>> Create() {
-    RETURN_NOT_OK(VisitTypeInline(in_type, this));
-    return std::move(state);
-  }
-};
-
-template <SimdLevel::type SimdLevel>
-struct MinMaxInitState {
-  std::unique_ptr<KernelState> state;
-  KernelContext* ctx;
-  const DataType& in_type;
-  std::shared_ptr<DataType> out_type;
-  const ScalarAggregateOptions& options;
-
-  MinMaxInitState(KernelContext* ctx, const DataType& in_type,
-                  const std::shared_ptr<DataType>& out_type,
-                  const ScalarAggregateOptions& options)
-      : ctx(ctx), in_type(in_type), out_type(out_type), options(options) {}
-
-  Status Visit(const DataType& ty) {
-    return Status::NotImplemented("No min/max implemented for ", ty);
-  }
-
-  Status Visit(const HalfFloatType& ty) {
-    return Status::NotImplemented("No min/max implemented for ", ty);
-  }
-
-  Status Visit(const NullType&) {
-    state.reset(new NullMinMaxImpl());
-    return Status::OK();
-  }
-
-  Status Visit(const BooleanType&) {
-    state.reset(new BooleanMinMaxImpl<SimdLevel>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_physical_integer<Type, Status> Visit(const Type&) {
-    using PhysicalType = typename Type::PhysicalType;
-    state.reset(new MinMaxImpl<PhysicalType, SimdLevel>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_floating_point<Type, Status> Visit(const Type&) {
-    state.reset(new MinMaxImpl<Type, SimdLevel>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_base_binary<Type, Status> Visit(const Type&) {
-    state.reset(new MinMaxImpl<Type, SimdLevel>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_fixed_size_binary<Type, Status> Visit(const Type&) {
-    state.reset(new MinMaxImpl<Type, SimdLevel>(out_type, options));
-    return Status::OK();
-  }
-
-  Result<std::unique_ptr<KernelState>> Create() {
-    RETURN_NOT_OK(VisitTypeInline(in_type, this));
-    return std::move(state);
-  }
-};
-
 }  // namespace arrow::compute::internal
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
index dc3fe29a3dfae..11875522b42c9 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
@@ -217,8 +217,8 @@ struct TemporalToStringCastFunctor<O, TimestampType> {
 
 #if defined(_MSC_VER)
 // Silence warning: """'visitor': unreferenced local variable"""
-#pragma warning(push)
-#pragma warning(disable : 4101)
+#  pragma warning(push)
+#  pragma warning(disable : 4101)
 #endif
 
 struct Utf8Validator {
@@ -422,7 +422,7 @@ BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* ou
 }
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
 // ----------------------------------------------------------------------
diff --git a/cpp/src/arrow/compute/kernels/scalar_round_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_round_benchmark.cc
index 3c5bb76dc24e9..7f1b5ef710379 100644
--- a/cpp/src/arrow/compute/kernels/scalar_round_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_round_benchmark.cc
@@ -122,26 +122,28 @@ void SetRoundArgs(benchmark::internal::Benchmark* bench) {
   BENCHMARK_TEMPLATE(BENCHMARK, OP, DoubleType)->Apply(SetRoundArgs);
 
 #ifdef ALL_ROUND_BENCHMARKS
-#define DECLARE_ROUND_BENCHMARKS_WITH_ROUNDMODE(BENCHMARK, OP, TYPE)                     \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::DOWN)->Apply(SetRoundArgs);         \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::UP)->Apply(SetRoundArgs);           \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::TOWARDS_ZERO)->Apply(SetRoundArgs); \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::TOWARDS_INFINITY)                   \
-      ->Apply(SetRoundArgs);                                                             \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_DOWN)->Apply(SetRoundArgs);    \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_UP)->Apply(SetRoundArgs);      \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TOWARDS_ZERO)                  \
-      ->Apply(SetRoundArgs);                                                             \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TOWARDS_INFINITY)              \
-      ->Apply(SetRoundArgs);                                                             \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TO_EVEN)->Apply(SetRoundArgs); \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TO_ODD)->Apply(SetRoundArgs)
+#  define DECLARE_ROUND_BENCHMARKS_WITH_ROUNDMODE(BENCHMARK, OP, TYPE)                  \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::DOWN)->Apply(SetRoundArgs);      \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::UP)->Apply(SetRoundArgs);        \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::TOWARDS_ZERO)                    \
+        ->Apply(SetRoundArgs);                                                          \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::TOWARDS_INFINITY)                \
+        ->Apply(SetRoundArgs);                                                          \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_DOWN)->Apply(SetRoundArgs); \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_UP)->Apply(SetRoundArgs);   \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TOWARDS_ZERO)               \
+        ->Apply(SetRoundArgs);                                                          \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TOWARDS_INFINITY)           \
+        ->Apply(SetRoundArgs);                                                          \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TO_EVEN)                    \
+        ->Apply(SetRoundArgs);                                                          \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TO_ODD)->Apply(SetRoundArgs)
 #else
-#define DECLARE_ROUND_BENCHMARKS_WITH_ROUNDMODE(BENCHMARK, OP, TYPE)             \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::DOWN)->Apply(SetRoundArgs); \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TOWARDS_ZERO)          \
-      ->Apply(SetRoundArgs);                                                     \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TO_ODD)->Apply(SetRoundArgs)
+#  define DECLARE_ROUND_BENCHMARKS_WITH_ROUNDMODE(BENCHMARK, OP, TYPE)             \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::DOWN)->Apply(SetRoundArgs); \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TOWARDS_ZERO)          \
+        ->Apply(SetRoundArgs);                                                     \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TO_ODD)->Apply(SetRoundArgs)
 #endif
 
 #define DECLARE_ROUND_BENCHMARKS(BENCHMARK, OP)                       \
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
index fecd57412b436..e58f7b065a8e5 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
@@ -30,7 +30,7 @@
 #include "arrow/util/value_parsing.h"
 
 #ifdef ARROW_WITH_RE2
-#include <re2/re2.h>
+#  include <re2/re2.h>
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index 0a2261290846a..59a22b9926456 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -34,7 +34,7 @@
 #include "arrow/util/value_parsing.h"
 
 #ifdef ARROW_WITH_UTF8PROC
-#include <utf8proc.h>
+#  include <utf8proc.h>
 #endif
 
 namespace arrow::compute {
@@ -1415,7 +1415,7 @@ TYPED_TEST(TestStringKernels, IsTitleUnicode) {
 }
 
 // Older versions of utf8proc fail
-#if !(UTF8PROC_VERSION_MAJOR <= 2 && UTF8PROC_VERSION_MINOR < 5)
+#  if !(UTF8PROC_VERSION_MAJOR <= 2 && UTF8PROC_VERSION_MINOR < 5)
 
 TYPED_TEST(TestStringKernels, IsUpperUnicode) {
   // ٣ is arabic 3 (decimal), Φ capital
@@ -1437,7 +1437,7 @@ TYPED_TEST(TestStringKernels, IsUpperUnicode) {
                    boolean(), "[true, true, true, false, true, false]");
 }
 
-#endif  // UTF8PROC_VERSION_MINOR >= 5
+#  endif  // UTF8PROC_VERSION_MINOR >= 5
 
 #endif  // ARROW_WITH_UTF8PROC
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc b/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc
index 42762ca8b116f..cf248b7c9f879 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc
@@ -24,7 +24,7 @@
 #include "arrow/util/utf8_internal.h"
 
 #ifdef ARROW_WITH_UTF8PROC
-#include <utf8proc.h>
+#  include <utf8proc.h>
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/compute/key_hash_internal.h b/cpp/src/arrow/compute/key_hash_internal.h
index 1f25beb0e1622..582cf28732352 100644
--- a/cpp/src/arrow/compute/key_hash_internal.h
+++ b/cpp/src/arrow/compute/key_hash_internal.h
@@ -18,7 +18,7 @@
 #pragma once
 
 #if defined(ARROW_HAVE_RUNTIME_AVX2)
-#include <immintrin.h>
+#  include <immintrin.h>
 #endif
 
 #include <cstdint>
diff --git a/cpp/src/arrow/compute/util.h b/cpp/src/arrow/compute/util.h
index d56e398667f66..9034849bbc36d 100644
--- a/cpp/src/arrow/compute/util.h
+++ b/cpp/src/arrow/compute/util.h
@@ -30,21 +30,22 @@
 #include "arrow/util/cpu_info.h"
 
 #if defined(__clang__) || defined(__GNUC__)
-#define BYTESWAP(x) __builtin_bswap64(x)
-#define ROTL(x, n) (((x) << (n)) | ((x) >> ((-n) & 31)))
-#define ROTL64(x, n) (((x) << (n)) | ((x) >> ((-n) & 63)))
-#define PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
+#  define BYTESWAP(x) __builtin_bswap64(x)
+#  define ROTL(x, n) (((x) << (n)) | ((x) >> ((-n) & 31)))
+#  define ROTL64(x, n) (((x) << (n)) | ((x) >> ((-n) & 63)))
+#  define PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
 #elif defined(_MSC_VER)
-#include <intrin.h>
-#define BYTESWAP(x) _byteswap_uint64(x)
-#define ROTL(x, n) _rotl((x), (n))
-#define ROTL64(x, n) _rotl64((x), (n))
-#if defined(_M_X64) || defined(_M_I86)
-#include <mmintrin.h>  // https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx
-#define PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
-#else
-#define PREFETCH(ptr) (void)(ptr) /* disabled */
-#endif
+#  include <intrin.h>
+#  define BYTESWAP(x) _byteswap_uint64(x)
+#  define ROTL(x, n) _rotl((x), (n))
+#  define ROTL64(x, n) _rotl64((x), (n))
+#  if defined(_M_X64) || defined(_M_I86)
+// https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx
+#    include <mmintrin.h>
+#    define PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
+#  else
+#    define PREFETCH(ptr) (void)(ptr) /* disabled */
+#  endif
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/compute/util_internal.cc b/cpp/src/arrow/compute/util_internal.cc
index 7a7875162c434..7d6c41e092889 100644
--- a/cpp/src/arrow/compute/util_internal.cc
+++ b/cpp/src/arrow/compute/util_internal.cc
@@ -21,7 +21,7 @@
 #include "arrow/memory_pool.h"
 
 #ifdef ADDRESS_SANITIZER
-#include <sanitizer/asan_interface.h>
+#  include <sanitizer/asan_interface.h>
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/csv/writer.cc b/cpp/src/arrow/csv/writer.cc
index 5b9c51cda5576..4b5252076af53 100644
--- a/cpp/src/arrow/csv/writer.cc
+++ b/cpp/src/arrow/csv/writer.cc
@@ -32,7 +32,7 @@
 #include <memory>
 
 #if defined(ARROW_HAVE_NEON) || defined(ARROW_HAVE_SSE4_2)
-#include <xsimd/xsimd.hpp>
+#  include <xsimd/xsimd.hpp>
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/dataset/api.h b/cpp/src/arrow/dataset/api.h
index c2ebd9d300727..38caa1cff19de 100644
--- a/cpp/src/arrow/dataset/api.h
+++ b/cpp/src/arrow/dataset/api.h
@@ -24,16 +24,16 @@
 #include "arrow/dataset/discovery.h"
 #include "arrow/dataset/file_base.h"
 #ifdef ARROW_CSV
-#include "arrow/dataset/file_csv.h"
+#  include "arrow/dataset/file_csv.h"
 #endif
 #ifdef ARROW_JSON
-#include "arrow/dataset/file_json.h"
+#  include "arrow/dataset/file_json.h"
 #endif
 #include "arrow/dataset/file_ipc.h"
 #ifdef ARROW_ORC
-#include "arrow/dataset/file_orc.h"
+#  include "arrow/dataset/file_orc.h"
 #endif
 #ifdef ARROW_PARQUET
-#include "arrow/dataset/file_parquet.h"
+#  include "arrow/dataset/file_parquet.h"
 #endif
 #include "arrow/dataset/scanner.h"
diff --git a/cpp/src/arrow/dataset/file_csv_test.cc b/cpp/src/arrow/dataset/file_csv_test.cc
index 60a6685dc22fd..e8e5838e6f93a 100644
--- a/cpp/src/arrow/dataset/file_csv_test.cc
+++ b/cpp/src/arrow/dataset/file_csv_test.cc
@@ -464,35 +464,35 @@ INSTANTIATE_TEST_SUITE_P(TestUncompressedCsvV2, TestCsvFileFormat,
 // codecs should be independently tested and so we do not need to cover those with
 // valgrind here.
 #ifndef ARROW_VALGRIND
-#ifdef ARROW_WITH_BZ2
+#  ifdef ARROW_WITH_BZ2
 INSTANTIATE_TEST_SUITE_P(TestBZ2Csv, TestCsvFileFormat,
                          ::testing::Values(CsvFileFormatParams{Compression::BZ2, false}));
 INSTANTIATE_TEST_SUITE_P(TestBZ2CsvV2, TestCsvFileFormat,
                          ::testing::Values(CsvFileFormatParams{Compression::BZ2, true}));
-#endif
-#ifdef ARROW_WITH_LZ4
+#  endif
+#  ifdef ARROW_WITH_LZ4
 INSTANTIATE_TEST_SUITE_P(TestLZ4Csv, TestCsvFileFormat,
                          ::testing::Values(CsvFileFormatParams{Compression::LZ4_FRAME,
                                                                false}));
 INSTANTIATE_TEST_SUITE_P(TestLZ4CsvV2, TestCsvFileFormat,
                          ::testing::Values(CsvFileFormatParams{Compression::LZ4_FRAME,
                                                                true}));
-#endif
+#  endif
 // Snappy does not support streaming compression
-#ifdef ARROW_WITH_ZLIB
+#  ifdef ARROW_WITH_ZLIB
 INSTANTIATE_TEST_SUITE_P(TestGzipCsv, TestCsvFileFormat,
                          ::testing::Values(CsvFileFormatParams{Compression::GZIP,
                                                                false}));
 INSTANTIATE_TEST_SUITE_P(TestGzipCsvV2, TestCsvFileFormat,
                          ::testing::Values(CsvFileFormatParams{Compression::GZIP, true}));
-#endif
-#ifdef ARROW_WITH_ZSTD
+#  endif
+#  ifdef ARROW_WITH_ZSTD
 INSTANTIATE_TEST_SUITE_P(TestZSTDCsv, TestCsvFileFormat,
                          ::testing::Values(CsvFileFormatParams{Compression::ZSTD,
                                                                false}));
 INSTANTIATE_TEST_SUITE_P(TestZSTDCsvV2, TestCsvFileFormat,
                          ::testing::Values(CsvFileFormatParams{Compression::ZSTD, true}));
-#endif
+#  endif
 #endif  // ARROW_VALGRIND
 
 class TestCsvFileFormatScan : public FileFormatScanMixin<CsvFormatHelper> {};
diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc
index 1f8b6cc4882cf..ca391b4354c07 100644
--- a/cpp/src/arrow/dataset/file_parquet.cc
+++ b/cpp/src/arrow/dataset/file_parquet.cc
@@ -366,8 +366,12 @@ std::optional<compute::Expression> ParquetFileFragment::EvaluateStatisticsAsExpr
     const parquet::Statistics& statistics) {
   auto field_expr = compute::field_ref(field_ref);
 
+  bool may_have_null = !statistics.HasNullCount() || statistics.null_count() > 0;
   // Optimize for corner case where all values are nulls
-  if (statistics.num_values() == 0 && statistics.null_count() > 0) {
+  if (statistics.num_values() == 0) {
+    // If there are no non-null values, column `field_ref` in the fragment
+    // might be empty or all values are nulls. In this case, we also return
+    // a null expression.
     return is_null(std::move(field_expr));
   }
 
@@ -378,7 +382,6 @@ std::optional<compute::Expression> ParquetFileFragment::EvaluateStatisticsAsExpr
 
   auto maybe_min = Cast(min, field.type());
   auto maybe_max = Cast(max, field.type());
-
   if (maybe_min.ok() && maybe_max.ok()) {
     min = maybe_min.MoveValueUnsafe().scalar();
     max = maybe_max.MoveValueUnsafe().scalar();
@@ -386,7 +389,7 @@ std::optional<compute::Expression> ParquetFileFragment::EvaluateStatisticsAsExpr
     if (min->Equals(*max)) {
       auto single_value = compute::equal(field_expr, compute::literal(std::move(min)));
 
-      if (statistics.null_count() == 0) {
+      if (!may_have_null) {
         return single_value;
       }
       return compute::or_(std::move(single_value), is_null(std::move(field_expr)));
@@ -412,9 +415,8 @@ std::optional<compute::Expression> ParquetFileFragment::EvaluateStatisticsAsExpr
     } else {
       in_range = compute::and_(std::move(lower_bound), std::move(upper_bound));
     }
-
-    if (statistics.null_count() != 0) {
-      return compute::or_(std::move(in_range), compute::is_null(field_expr));
+    if (may_have_null) {
+      return compute::or_(std::move(in_range), compute::is_null(std::move(field_expr)));
     }
     return in_range;
   }
@@ -423,7 +425,7 @@ std::optional<compute::Expression> ParquetFileFragment::EvaluateStatisticsAsExpr
 
 std::optional<compute::Expression> ParquetFileFragment::EvaluateStatisticsAsExpression(
     const Field& field, const parquet::Statistics& statistics) {
-  const auto field_name = field.name();
+  auto field_name = field.name();
   return EvaluateStatisticsAsExpression(field, FieldRef(std::move(field_name)),
                                         statistics);
 }
diff --git a/cpp/src/arrow/dataset/file_parquet_test.cc b/cpp/src/arrow/dataset/file_parquet_test.cc
index bf626826d4d1b..2c05dcd9be459 100644
--- a/cpp/src/arrow/dataset/file_parquet_test.cc
+++ b/cpp/src/arrow/dataset/file_parquet_test.cc
@@ -841,6 +841,56 @@ TEST(TestParquetStatistics, NullMax) {
   EXPECT_EQ(stat_expression->ToString(), "(x >= 1)");
 }
 
+TEST(TestParquetStatistics, NoNullCount) {
+  auto field = ::arrow::field("x", int32());
+  auto parquet_node_ptr = ::parquet::schema::Int32("x", ::parquet::Repetition::REQUIRED);
+  ::parquet::ColumnDescriptor descr(parquet_node_ptr, /*max_definition_level=*/1,
+                                    /*max_repetition_level=*/0);
+
+  auto int32_to_parquet_stats = [](int32_t v) {
+    std::string value;
+    value.resize(sizeof(int32_t));
+    memcpy(value.data(), &v, sizeof(int32_t));
+    return value;
+  };
+  {
+    // Base case: when null_count is not set, the expression might contain null
+    ::parquet::EncodedStatistics encoded_stats;
+    encoded_stats.set_min(int32_to_parquet_stats(1));
+    encoded_stats.set_max(int32_to_parquet_stats(100));
+    encoded_stats.has_null_count = false;
+    encoded_stats.all_null_value = false;
+    encoded_stats.null_count = 0;
+    auto stats = ::parquet::Statistics::Make(&descr, &encoded_stats, /*num_values=*/10);
+
+    auto stat_expression =
+        ParquetFileFragment::EvaluateStatisticsAsExpression(*field, *stats);
+    ASSERT_TRUE(stat_expression.has_value());
+    EXPECT_EQ(stat_expression->ToString(),
+              "(((x >= 1) and (x <= 100)) or is_null(x, {nan_is_null=false}))");
+  }
+  {
+    // Special case: when num_value is 0, it would return
+    // "is_null".
+    ::parquet::EncodedStatistics encoded_stats;
+    encoded_stats.has_null_count = true;
+    encoded_stats.null_count = 1;
+    encoded_stats.all_null_value = true;
+    auto stats = ::parquet::Statistics::Make(&descr, &encoded_stats, /*num_values=*/0);
+    auto stat_expression =
+        ParquetFileFragment::EvaluateStatisticsAsExpression(*field, *stats);
+    ASSERT_TRUE(stat_expression.has_value());
+    EXPECT_EQ(stat_expression->ToString(), "is_null(x, {nan_is_null=false})");
+
+    encoded_stats.has_null_count = false;
+    encoded_stats.all_null_value = false;
+    stats = ::parquet::Statistics::Make(&descr, &encoded_stats, /*num_values=*/0);
+    stat_expression = ParquetFileFragment::EvaluateStatisticsAsExpression(*field, *stats);
+    ASSERT_TRUE(stat_expression.has_value());
+    EXPECT_EQ(stat_expression->ToString(), "is_null(x, {nan_is_null=false})");
+  }
+}
+
 class DelayedBufferReader : public ::arrow::io::BufferReader {
  public:
   explicit DelayedBufferReader(const std::shared_ptr<::arrow::Buffer>& buffer)
diff --git a/cpp/src/arrow/dataset/visibility.h b/cpp/src/arrow/dataset/visibility.h
index b43a253050fd8..752907238ca07 100644
--- a/cpp/src/arrow/dataset/visibility.h
+++ b/cpp/src/arrow/dataset/visibility.h
@@ -20,31 +20,31 @@
 #pragma once
 
 #if defined(_WIN32) || defined(__CYGWIN__)
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(push)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#ifdef ARROW_DS_STATIC
-#define ARROW_DS_EXPORT
-#elif defined(ARROW_DS_EXPORTING)
-#define ARROW_DS_EXPORT __declspec(dllexport)
-#else
-#define ARROW_DS_EXPORT __declspec(dllimport)
-#endif
+#  ifdef ARROW_DS_STATIC
+#    define ARROW_DS_EXPORT
+#  elif defined(ARROW_DS_EXPORTING)
+#    define ARROW_DS_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_DS_EXPORT __declspec(dllimport)
+#  endif
 
-#define ARROW_DS_NO_EXPORT
+#  define ARROW_DS_NO_EXPORT
 #else  // Not Windows
-#ifndef ARROW_DS_EXPORT
-#define ARROW_DS_EXPORT __attribute__((visibility("default")))
-#endif
-#ifndef ARROW_DS_NO_EXPORT
-#define ARROW_DS_NO_EXPORT __attribute__((visibility("hidden")))
-#endif
+#  ifndef ARROW_DS_EXPORT
+#    define ARROW_DS_EXPORT __attribute__((visibility("default")))
+#  endif
+#  ifndef ARROW_DS_NO_EXPORT
+#    define ARROW_DS_NO_EXPORT __attribute__((visibility("hidden")))
+#  endif
 #endif  // Non-Windows
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
diff --git a/cpp/src/arrow/engine/substrait/visibility.h b/cpp/src/arrow/engine/substrait/visibility.h
index d81d202ee6567..9ed1c67352d60 100644
--- a/cpp/src/arrow/engine/substrait/visibility.h
+++ b/cpp/src/arrow/engine/substrait/visibility.h
@@ -22,31 +22,31 @@
 #pragma once
 
 #if defined(_WIN32) || defined(__CYGWIN__)
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(push)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#ifdef ARROW_ENGINE_STATIC
-#define ARROW_ENGINE_EXPORT
-#elif defined(ARROW_ENGINE_EXPORTING)
-#define ARROW_ENGINE_EXPORT __declspec(dllexport)
-#else
-#define ARROW_ENGINE_EXPORT __declspec(dllimport)
-#endif
+#  ifdef ARROW_ENGINE_STATIC
+#    define ARROW_ENGINE_EXPORT
+#  elif defined(ARROW_ENGINE_EXPORTING)
+#    define ARROW_ENGINE_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_ENGINE_EXPORT __declspec(dllimport)
+#  endif
 
-#define ARROW_ENGINE_NO_EXPORT
+#  define ARROW_ENGINE_NO_EXPORT
 #else  // Not Windows
-#ifndef ARROW_ENGINE_EXPORT
-#define ARROW_ENGINE_EXPORT __attribute__((visibility("default")))
-#endif
-#ifndef ARROW_ENGINE_NO_EXPORT
-#define ARROW_ENGINE_NO_EXPORT __attribute__((visibility("hidden")))
-#endif
+#  ifndef ARROW_ENGINE_EXPORT
+#    define ARROW_ENGINE_EXPORT __attribute__((visibility("default")))
+#  endif
+#  ifndef ARROW_ENGINE_NO_EXPORT
+#    define ARROW_ENGINE_NO_EXPORT __attribute__((visibility("hidden")))
+#  endif
 #endif  // Non-Windows
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
diff --git a/cpp/src/arrow/extension_type.cc b/cpp/src/arrow/extension_type.cc
index fc220f73a6beb..d0135e905a0c3 100644
--- a/cpp/src/arrow/extension_type.cc
+++ b/cpp/src/arrow/extension_type.cc
@@ -29,8 +29,8 @@
 #include "arrow/config.h"
 #include "arrow/extension/bool8.h"
 #ifdef ARROW_JSON
-#include "arrow/extension/fixed_shape_tensor.h"
-#include "arrow/extension/opaque.h"
+#  include "arrow/extension/fixed_shape_tensor.h"
+#  include "arrow/extension/opaque.h"
 #endif
 #include "arrow/extension/uuid.h"
 #include "arrow/status.h"
diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt
index dec4bb6e3d465..7afdf566f2fb5 100644
--- a/cpp/src/arrow/filesystem/CMakeLists.txt
+++ b/cpp/src/arrow/filesystem/CMakeLists.txt
@@ -47,9 +47,7 @@ if(ARROW_GCS)
                  EXTRA_LABELS
                  filesystem
                  EXTRA_LINK_LIBS
-                 google-cloud-cpp::storage
-                 Boost::filesystem
-                 Boost::system)
+                 google-cloud-cpp::storage)
 endif()
 
 if(ARROW_AZURE)
@@ -57,9 +55,7 @@ if(ARROW_AZURE)
                  EXTRA_LABELS
                  filesystem
                  EXTRA_LINK_LIBS
-                 ${AZURE_SDK_LINK_LIBRARIES}
-                 Boost::filesystem
-                 Boost::system)
+                 ${AZURE_SDK_LINK_LIBRARIES})
 endif()
 
 if(ARROW_S3)
@@ -75,11 +71,7 @@ if(ARROW_S3)
   else()
     list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS arrow_static)
   endif()
-  list(APPEND
-       ARROW_S3_TEST_EXTRA_LINK_LIBS
-       ${AWSSDK_LINK_LIBRARIES}
-       Boost::filesystem
-       Boost::system)
+  list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS ${AWSSDK_LINK_LIBRARIES})
   add_arrow_test(s3fs_test
                  SOURCES
                  s3fs_test.cc
@@ -122,9 +114,7 @@ if(ARROW_S3)
                         s3_test_util.cc
                         STATIC_LINK_LIBS
                         ${AWSSDK_LINK_LIBRARIES}
-                        ${ARROW_BENCHMARK_LINK_LIBS}
-                        Boost::filesystem
-                        Boost::system)
+                        ${ARROW_BENCHMARK_LINK_LIBS})
     if(ARROW_TEST_LINKAGE STREQUAL "static")
       target_link_libraries(arrow-filesystem-s3fs-benchmark PRIVATE parquet_static)
     else()
diff --git a/cpp/src/arrow/filesystem/api.h b/cpp/src/arrow/filesystem/api.h
index 562b7c1808ec1..7211ad5c2ccdb 100644
--- a/cpp/src/arrow/filesystem/api.h
+++ b/cpp/src/arrow/filesystem/api.h
@@ -21,14 +21,14 @@
 
 #include "arrow/filesystem/filesystem.h"  // IWYU pragma: export
 #ifdef ARROW_AZURE
-#include "arrow/filesystem/azurefs.h"  // IWYU pragma: export
+#  include "arrow/filesystem/azurefs.h"  // IWYU pragma: export
 #endif
 #ifdef ARROW_GCS
-#include "arrow/filesystem/gcsfs.h"  // IWYU pragma: export
+#  include "arrow/filesystem/gcsfs.h"  // IWYU pragma: export
 #endif
 #include "arrow/filesystem/hdfs.h"     // IWYU pragma: export
 #include "arrow/filesystem/localfs.h"  // IWYU pragma: export
 #include "arrow/filesystem/mockfs.h"   // IWYU pragma: export
 #ifdef ARROW_S3
-#include "arrow/filesystem/s3fs.h"  // IWYU pragma: export
+#  include "arrow/filesystem/s3fs.h"  // IWYU pragma: export
 #endif
diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index 0bad856339729..d407b1654f5b5 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -27,12 +27,12 @@
 // idenfity.hpp triggers -Wattributes warnings cause -Werror builds to fail,
 // so disable it for this file with pragmas.
 #if defined(__GNUC__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wattributes"
+#  pragma GCC diagnostic push
+#  pragma GCC diagnostic ignored "-Wattributes"
 #endif
 #include <azure/identity.hpp>
 #if defined(__GNUC__)
-#pragma GCC diagnostic pop
+#  pragma GCC diagnostic pop
 #endif
 #include <azure/storage/blobs.hpp>
 #include <azure/storage/files/datalake.hpp>
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index 9d437d1f83aac..a8dc923476752 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -15,24 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <algorithm>  // Missing include in boost/process
-
-// This boost/asio/io_context.hpp include is needless for no MinGW
-// build.
-//
-// This is for including boost/asio/detail/socket_types.hpp before any
-// "#include <windows.h>". boost/asio/detail/socket_types.hpp doesn't
-// work if windows.h is already included. boost/process.h ->
-// boost/process/args.hpp -> boost/process/detail/basic_cmd.hpp
-// includes windows.h. boost/process/args.hpp is included before
-// boost/process/async.h that includes
-// boost/asio/detail/socket_types.hpp implicitly is included.
-#include <boost/asio/io_context.hpp>
-// We need BOOST_USE_WINDOWS_H definition with MinGW when we use
-// boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in
-// cpp/cmake_modules/ThirdpartyToolchain.cmake for details.
-#include <boost/process.hpp>
-
 #include "arrow/filesystem/azurefs.h"
 #include "arrow/filesystem/azurefs_internal.h"
 
@@ -53,6 +35,7 @@
 #include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/process.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/future.h"
 #include "arrow/util/io_util.h"
@@ -67,7 +50,6 @@ namespace arrow {
 using internal::TemporaryDir;
 namespace fs {
 using internal::ConcatAbstractPath;
-namespace bp = boost::process;
 
 using ::testing::IsEmpty;
 using ::testing::Not;
@@ -174,42 +156,32 @@ class AzuriteEnv : public AzureEnvImpl<AzuriteEnv> {
  private:
   std::unique_ptr<TemporaryDir> temp_dir_;
   arrow::internal::PlatformFilename debug_log_path_;
-  bp::child server_process_;
+  std::unique_ptr<util::Process> server_process_;
 
   using AzureEnvImpl::AzureEnvImpl;
 
  public:
   static const AzureBackend kBackend = AzureBackend::kAzurite;
 
-  ~AzuriteEnv() override {
-    server_process_.terminate();
-    server_process_.wait();
-  }
+  ~AzuriteEnv() = default;
 
   static Result<std::unique_ptr<AzureEnvImpl>> Make() {
     auto self = std::unique_ptr<AzuriteEnv>(
         new AzuriteEnv("devstoreaccount1",
                        "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/"
                        "K1SZFPTOtr/KBHBeksoGMGw=="));
-    auto exe_path = bp::search_path("azurite");
-    if (exe_path.empty()) {
-      return Status::Invalid("Could not find Azurite emulator.");
-    }
+    self->server_process_ = std::make_unique<util::Process>();
+    ARROW_RETURN_NOT_OK(self->server_process_->SetExecutable("azurite"));
     ARROW_ASSIGN_OR_RAISE(self->temp_dir_, TemporaryDir::Make("azurefs-test-"));
     ARROW_ASSIGN_OR_RAISE(self->debug_log_path_,
                           self->temp_dir_->path().Join("debug.log"));
-    auto server_process = bp::child(
-        boost::this_process::environment(), exe_path, "--silent", "--location",
-        self->temp_dir_->path().ToString(), "--debug", self->debug_log_path_.ToString(),
-        // For old Azurite. We can't install the latest Azurite with
-        // old Node.js on old Ubuntu.
-        "--skipApiVersionCheck");
-    if (!server_process.valid() || !server_process.running()) {
-      server_process.terminate();
-      server_process.wait();
-      return Status::Invalid("Could not start Azurite emulator.");
-    }
-    self->server_process_ = std::move(server_process);
+    self->server_process_->SetArgs({"--silent", "--location",
+                                    self->temp_dir_->path().ToString(), "--debug",
+                                    self->debug_log_path_.ToString(),
+                                    // For old Azurite. We can't install the latest
+                                    // Azurite with old Node.js on old Ubuntu.
+                                    "--skipApiVersionCheck"});
+    ARROW_RETURN_NOT_OK(self->server_process_->Execute());
     return self;
   }
 
diff --git a/cpp/src/arrow/filesystem/filesystem.cc b/cpp/src/arrow/filesystem/filesystem.cc
index 284be685fa800..b5765010ec7e9 100644
--- a/cpp/src/arrow/filesystem/filesystem.cc
+++ b/cpp/src/arrow/filesystem/filesystem.cc
@@ -26,16 +26,16 @@
 
 #include "arrow/filesystem/filesystem.h"
 #ifdef ARROW_AZURE
-#include "arrow/filesystem/azurefs.h"
+#  include "arrow/filesystem/azurefs.h"
 #endif
 #ifdef ARROW_GCS
-#include "arrow/filesystem/gcsfs.h"
+#  include "arrow/filesystem/gcsfs.h"
 #endif
 #ifdef ARROW_HDFS
-#include "arrow/filesystem/hdfs.h"
+#  include "arrow/filesystem/hdfs.h"
 #endif
 #ifdef ARROW_S3
-#include "arrow/filesystem/s3fs.h"
+#  include "arrow/filesystem/s3fs.h"
 #endif
 #include "arrow/filesystem/localfs.h"
 #include "arrow/filesystem/mockfs.h"
diff --git a/cpp/src/arrow/filesystem/filesystem_test.cc b/cpp/src/arrow/filesystem/filesystem_test.cc
index 8477647b2cd73..5072c3a8c25b1 100644
--- a/cpp/src/arrow/filesystem/filesystem_test.cc
+++ b/cpp/src/arrow/filesystem/filesystem_test.cc
@@ -20,6 +20,7 @@
 #include <utility>
 #include <vector>
 
+#include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
 #include "arrow/filesystem/filesystem.h"
@@ -632,6 +633,13 @@ TEST_F(TestMockFS, FileSystemFromUri) {
   ASSERT_OK_AND_ASSIGN(fs_, FileSystemFromUri("mock:///foo/bar?q=zzz", &path));
   ASSERT_EQ(path, "foo/bar");
   CheckDirs({});
+  ASSERT_OK_AND_ASSIGN(fs_, FileSystemFromUri("mock:/folder+name/bar?q=zzz", &path));
+  ASSERT_EQ(path, "folder+name/bar");
+  CheckDirs({});
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, ::testing::HasSubstr("syntax error at character ' ' (position 12)"),
+      FileSystemFromUri("mock:/folder name/bar", &path));
+  CheckDirs({});
 }
 
 ////////////////////////////////////////////////////////////////////////////
diff --git a/cpp/src/arrow/filesystem/gcsfs_test.cc b/cpp/src/arrow/filesystem/gcsfs_test.cc
index 2098cf4d7f319..d4d5edf4b8993 100644
--- a/cpp/src/arrow/filesystem/gcsfs_test.cc
+++ b/cpp/src/arrow/filesystem/gcsfs_test.cc
@@ -15,26 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <algorithm>  // Missing include in boost/process
-
-#define BOOST_NO_CXX98_FUNCTION_BASE  // ARROW-17805
-// This boost/asio/io_context.hpp include is needless for no MinGW
-// build.
-//
-// This is for including boost/asio/detail/socket_types.hpp before any
-// "#include <windows.h>". boost/asio/detail/socket_types.hpp doesn't
-// work if windows.h is already included. boost/process.h ->
-// boost/process/args.hpp -> boost/process/detail/basic_cmd.hpp
-// includes windows.h. boost/process/args.hpp is included before
-// boost/process/async.h that includes
-// boost/asio/detail/socket_types.hpp implicitly is included.
-#include <boost/asio/io_context.hpp>
-// We need BOOST_USE_WINDOWS_H definition with MinGW when we use
-// boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in
-// cpp/cmake_modules/ThirdpartyToolchain.cmake for details.
-#include <boost/process.hpp>
-#include <boost/thread.hpp>
-
 #include "arrow/filesystem/gcsfs.h"
 
 #include <absl/time/time.h>
@@ -45,16 +25,15 @@
 #include <google/cloud/storage/options.h>
 #include <gtest/gtest.h>
 
-#include <array>
 #include <random>
 #include <string>
-#include <thread>
 
 #include "arrow/filesystem/gcsfs_internal.h"
 #include "arrow/filesystem/path_util.h"
 #include "arrow/filesystem/test_util.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/matchers.h"
+#include "arrow/testing/process.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/future.h"
 #include "arrow/util/key_value_metadata.h"
@@ -64,7 +43,6 @@ namespace fs {
 
 namespace {
 
-namespace bp = boost::process;
 namespace gc = google::cloud;
 namespace gcs = google::cloud::storage;
 
@@ -89,70 +67,62 @@ class GcsTestbench : public ::testing::Environment {
  public:
   GcsTestbench() {
     port_ = std::to_string(GetListenPort());
-    std::vector<std::string> names{"python3", "python"};
-    // If the build script or application developer provides a value in the PYTHON
-    // environment variable, then just use that.
-    if (const auto* env = std::getenv("PYTHON")) {
-      names = {env};
-    }
     auto error = std::string("Could not start GCS emulator 'storage-testbench'");
+    auto server_process = std::make_unique<util::Process>();
+    auto status = server_process->SetExecutable("storage-testbench");
+    if (!status.ok()) {
+      error += ": " + status.ToString();
+      error_ = std::move(error);
+      return;
+    }
 
-    auto testbench_is_running = [](bp::child& process, bp::ipstream& output) {
-      // Wait for message: "* Restarting with"
-      std::string line;
+    server_process->SetArgs({"--port", port_});
+    server_process->IgnoreStderr();
+    status = server_process->Execute();
+    if (!status.ok()) {
+      error += ": " + status.ToString();
+      error_ = std::move(error);
+      return;
+    }
+
+    auto testbench_is_running = [&server_process, this]() {
+      auto ready_timeout = std::chrono::seconds(10);
       std::chrono::time_point<std::chrono::steady_clock> end =
-          std::chrono::steady_clock::now() + std::chrono::seconds(10);
-      while (process.valid() && process.running() &&
-             std::chrono::steady_clock::now() < end) {
-        if (output.peek() && std::getline(output, line)) {
-          std::cerr << line << std::endl;
-          if (line.find("* Restarting with") != std::string::npos) return true;
-        } else {
-          std::this_thread::sleep_for(std::chrono::milliseconds(20));
+          std::chrono::steady_clock::now() + ready_timeout;
+      while (server_process->IsRunning() && std::chrono::steady_clock::now() < end) {
+        auto client = gcs::Client(
+            google::cloud::Options{}
+                .set<gcs::RestEndpointOption>("http://127.0.0.1:" + port_)
+                .set<gc::UnifiedCredentialsOption>(gc::MakeInsecureCredentials())
+                .set<gcs::RetryPolicyOption>(
+                    gcs::LimitedTimeRetryPolicy(ready_timeout).clone()));
+        auto metadata = client.GetBucketMetadata("nonexistent");
+        if (metadata.status().code() == google::cloud::StatusCode::kNotFound) {
+          return true;
         }
       }
       return false;
     };
 
-    auto exe_path = bp::search_path("storage-testbench");
-    if (!exe_path.empty()) {
-      bp::ipstream output;
-      server_process_ =
-          bp::child(exe_path, "--port", port_, group_, bp::std_err > output);
-      if (!testbench_is_running(server_process_, output)) {
-        error += " (failed to start)";
-        server_process_.terminate();
-        server_process_.wait();
-      }
-    } else {
-      error += " (exe not found)";
-    }
-    if (!server_process_.valid()) {
+    if (!testbench_is_running()) {
+      error += " (failed to listen)";
       error_ = std::move(error);
+      return;
     }
+
+    server_process_ = std::move(server_process);
   }
 
-  bool running() { return server_process_.running(); }
+  bool running() { return server_process_ && server_process_->IsRunning(); }
 
-  ~GcsTestbench() override {
-    // Brutal shutdown, kill the full process group because the GCS testbench may launch
-    // additional children.
-    try {
-      group_.terminate();
-    } catch (bp::process_error&) {
-    }
-    if (server_process_.valid()) {
-      server_process_.wait();
-    }
-  }
+  ~GcsTestbench() = default;
 
   const std::string& port() const { return port_; }
   const std::string& error() const { return error_; }
 
  private:
   std::string port_;
-  bp::child server_process_;
-  bp::group group_;
+  std::unique_ptr<util::Process> server_process_;
   std::string error_;
 };
 
diff --git a/cpp/src/arrow/filesystem/localfs.cc b/cpp/src/arrow/filesystem/localfs.cc
index 22d802d8f9f7f..9fe19cbf25058 100644
--- a/cpp/src/arrow/filesystem/localfs.cc
+++ b/cpp/src/arrow/filesystem/localfs.cc
@@ -22,12 +22,12 @@
 #include <utility>
 
 #ifdef _WIN32
-#include "arrow/util/windows_compatibility.h"
+#  include "arrow/util/windows_compatibility.h"
 #else
-#include <fcntl.h>
-#include <sys/stat.h>
-#include <cerrno>
-#include <cstdio>
+#  include <fcntl.h>
+#  include <sys/stat.h>
+#  include <cerrno>
+#  include <cstdio>
 #endif
 
 #include "arrow/filesystem/filesystem.h"
@@ -157,12 +157,12 @@ FileInfo StatToFileInfo(const struct stat& s) {
     info.set_type(FileType::Unknown);
     info.set_size(kNoSize);
   }
-#ifdef __APPLE__
+#  ifdef __APPLE__
   // macOS doesn't use the POSIX-compliant spelling
   info.set_mtime(ToTimePoint(s.st_mtimespec));
-#else
+#  else
   info.set_mtime(ToTimePoint(s.st_mtim));
-#endif
+#  endif
   return info;
 }
 
diff --git a/cpp/src/arrow/filesystem/s3_test_util.cc b/cpp/src/arrow/filesystem/s3_test_util.cc
index eb29a677dae9e..db0c60f2e80f2 100644
--- a/cpp/src/arrow/filesystem/s3_test_util.cc
+++ b/cpp/src/arrow/filesystem/s3_test_util.cc
@@ -15,33 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <algorithm>  // Missing include in boost/process
-
 #ifndef _WIN32
-#include <sys/wait.h>
-#endif
-
-// This boost/asio/io_context.hpp include is needless for no MinGW
-// build.
-//
-// This is for including boost/asio/detail/socket_types.hpp before any
-// "#include <windows.h>". boost/asio/detail/socket_types.hpp doesn't
-// work if windows.h is already included. boost/process.h ->
-// boost/process/args.hpp -> boost/process/detail/basic_cmd.hpp
-// includes windows.h. boost/process/args.hpp is included before
-// boost/process/async.h that includes
-// boost/asio/detail/socket_types.hpp implicitly is included.
-#ifdef __MINGW32__
-#include <boost/asio/io_context.hpp>
+#  include <sys/wait.h>
 #endif
-#define BOOST_NO_CXX98_FUNCTION_BASE  // ARROW-17805
-// We need BOOST_USE_WINDOWS_H definition with MinGW when we use
-// boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in
-// cpp/cmake_modules/ThirdpartyToolchain.cmake for details.
-#include <boost/process.hpp>
 
 #include "arrow/filesystem/s3_test_util.h"
 #include "arrow/filesystem/s3fs.h"
+#include "arrow/testing/process.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/async_generator.h"
 #include "arrow/util/future.h"
@@ -53,8 +33,6 @@ namespace fs {
 
 using ::arrow::internal::TemporaryDir;
 
-namespace bp = boost::process;
-
 namespace {
 
 const char* kMinioExecutableName = "minio";
@@ -75,7 +53,7 @@ struct MinioTestServer::Impl {
   std::string connect_string_;
   std::string access_key_ = kMinioAccessKey;
   std::string secret_key_ = kMinioSecretKey;
-  std::shared_ptr<::boost::process::child> server_process_;
+  std::unique_ptr<util::Process> server_process_;
 };
 
 MinioTestServer::MinioTestServer() : impl_(new Impl) {}
@@ -105,44 +83,23 @@ Status MinioTestServer::Start() {
 
   ARROW_ASSIGN_OR_RAISE(impl_->temp_dir_, TemporaryDir::Make("s3fs-test-"));
 
-  // Get a copy of the current environment.
-  // (NOTE: using "auto" would return a native_environment that mutates
-  //  the current environment)
-  bp::environment env = boost::this_process::environment();
-  env["MINIO_ACCESS_KEY"] = kMinioAccessKey;
-  env["MINIO_SECRET_KEY"] = kMinioSecretKey;
+  impl_->server_process_ = std::make_unique<util::Process>();
+  impl_->server_process_->SetEnv("MINIO_ACCESS_KEY", kMinioAccessKey);
+  impl_->server_process_->SetEnv("MINIO_SECRET_KEY", kMinioSecretKey);
   // Disable the embedded console (one less listening address to care about)
-  env["MINIO_BROWSER"] = "off";
-
+  impl_->server_process_->SetEnv("MINIO_BROWSER", "off");
   impl_->connect_string_ = GenerateConnectString();
-  auto exe_path = bp::search_path(kMinioExecutableName);
-  if (exe_path.empty()) {
-    return Status::IOError("Failed to find minio executable ('", kMinioExecutableName,
-                           "') in PATH");
-  }
-
-  try {
-    // NOTE: --quiet makes startup faster by suppressing remote version check
-    impl_->server_process_ = std::make_shared<bp::child>(
-        env, exe_path, "server", "--quiet", "--compat", "--address",
-        impl_->connect_string_, impl_->temp_dir_->path().ToString());
-  } catch (const std::exception& e) {
-    return Status::IOError("Failed to launch Minio server: ", e.what());
-  }
+  ARROW_RETURN_NOT_OK(impl_->server_process_->SetExecutable(kMinioExecutableName));
+  // NOTE: --quiet makes startup faster by suppressing remote version check
+  impl_->server_process_->SetArgs({"server", "--quiet", "--compat", "--address",
+                                   impl_->connect_string_,
+                                   impl_->temp_dir_->path().ToString()});
+  ARROW_RETURN_NOT_OK(impl_->server_process_->Execute());
   return Status::OK();
 }
 
 Status MinioTestServer::Stop() {
-  if (impl_->server_process_ && impl_->server_process_->valid()) {
-    // Brutal shutdown
-    impl_->server_process_->terminate();
-    impl_->server_process_->wait();
-#ifndef _WIN32
-    // Despite calling wait() above, boost::process fails to clear zombies
-    // so do it ourselves.
-    waitpid(impl_->server_process_->id(), nullptr, 0);
-#endif
-  }
+  impl_->server_process_ = nullptr;
   return Status::OK();
 }
 
diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index fd5b2e5be2a3a..96c771aeb61b8 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -34,12 +34,12 @@
 
 #ifdef _WIN32
 // Undefine preprocessor macros that interfere with AWS function / method names
-#ifdef GetMessage
-#undef GetMessage
-#endif
-#ifdef GetObject
-#undef GetObject
-#endif
+#  ifdef GetMessage
+#    undef GetMessage
+#  endif
+#  ifdef GetObject
+#    undef GetObject
+#  endif
 #endif
 
 #include <aws/core/Aws.h>
@@ -84,13 +84,13 @@
 // Redundant "(...)" are for suppressing "Weird number of spaces at
 // line-start. Are you using a 2-space indent? [whitespace/indent]
 // [3]" errors...
-#define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch)                      \
-  ((AWS_SDK_VERSION_MAJOR > (major) ||                                        \
-    (AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR > (minor)) ||  \
-    ((AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR == (minor) && \
-      AWS_SDK_VERSION_PATCH >= (patch)))))
+#  define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch)                      \
+    ((AWS_SDK_VERSION_MAJOR > (major) ||                                        \
+      (AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR > (minor)) ||  \
+      ((AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR == (minor) && \
+        AWS_SDK_VERSION_PATCH >= (patch)))))
 #else
-#define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch) 0
+#  define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch) 0
 #endif
 
 // This feature is available since 1.9.0 but
@@ -98,22 +98,22 @@
 // we can't use this feature for [1.9.0,1.9.6]. If it's a problem,
 // please report it to our issue tracker.
 #if ARROW_AWS_SDK_VERSION_CHECK(1, 9, 0)
-#define ARROW_S3_HAS_CRT
+#  define ARROW_S3_HAS_CRT
 #endif
 
 #if ARROW_AWS_SDK_VERSION_CHECK(1, 10, 0)
-#define ARROW_S3_HAS_S3CLIENT_CONFIGURATION
+#  define ARROW_S3_HAS_S3CLIENT_CONFIGURATION
 #endif
 
 #ifdef ARROW_S3_HAS_CRT
-#include <aws/crt/io/Bootstrap.h>
-#include <aws/crt/io/EventLoopGroup.h>
-#include <aws/crt/io/HostResolver.h>
+#  include <aws/crt/io/Bootstrap.h>
+#  include <aws/crt/io/EventLoopGroup.h>
+#  include <aws/crt/io/HostResolver.h>
 #endif
 
 #ifdef ARROW_S3_HAS_S3CLIENT_CONFIGURATION
-#include <aws/s3/S3ClientConfiguration.h>
-#include <aws/s3/S3EndpointProvider.h>
+#  include <aws/s3/S3ClientConfiguration.h>
+#  include <aws/s3/S3EndpointProvider.h>
 #endif
 
 #include "arrow/util/windows_fixup.h"
diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc
index c33fa4f5aac97..82a7d6e546ef3 100644
--- a/cpp/src/arrow/filesystem/s3fs_test.cc
+++ b/cpp/src/arrow/filesystem/s3fs_test.cc
@@ -28,12 +28,12 @@
 
 #ifdef _WIN32
 // Undefine preprocessor macros that interfere with AWS function / method names
-#ifdef GetMessage
-#undef GetMessage
-#endif
-#ifdef GetObject
-#undef GetObject
-#endif
+#  ifdef GetMessage
+#    undef GetMessage
+#  endif
+#  ifdef GetObject
+#    undef GetObject
+#  endif
 #endif
 
 #include <aws/core/Aws.h>
diff --git a/cpp/src/arrow/flight/CMakeLists.txt b/cpp/src/arrow/flight/CMakeLists.txt
index 835c4fc83bf18..b12476ac3893a 100644
--- a/cpp/src/arrow/flight/CMakeLists.txt
+++ b/cpp/src/arrow/flight/CMakeLists.txt
@@ -70,11 +70,6 @@ if(ARROW_BUILD_BENCHMARKS
     endif()
   endif()
 endif()
-list(APPEND
-     ARROW_FLIGHT_TEST_INTERFACE_LIBS
-     Boost::headers
-     Boost::filesystem
-     Boost::system)
 list(APPEND ARROW_FLIGHT_TEST_LINK_LIBS gRPC::grpc++)
 
 # TODO(wesm): Protobuf shared vs static linking
diff --git a/cpp/src/arrow/flight/client.h b/cpp/src/arrow/flight/client.h
index 613903108949e..ae6011b117aa7 100644
--- a/cpp/src/arrow/flight/client.h
+++ b/cpp/src/arrow/flight/client.h
@@ -146,8 +146,8 @@ class ARROW_FLIGHT_EXPORT FlightStreamReader : public MetadataRecordBatchReader
 // Silence warning
 // "non dll-interface class RecordBatchReader used as base for dll-interface class"
 #ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4275)
+#  pragma warning(push)
+#  pragma warning(disable : 4275)
 #endif
 
 /// \brief A RecordBatchWriter that also allows sending
@@ -163,7 +163,7 @@ class ARROW_FLIGHT_EXPORT FlightStreamWriter : public MetadataRecordBatchWriter
 };
 
 #ifdef _MSC_VER
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
 /// \brief A reader for application-specific metadata sent back to the
diff --git a/cpp/src/arrow/flight/client_tracing_middleware.cc b/cpp/src/arrow/flight/client_tracing_middleware.cc
index a45784bd31ecd..9433ed48509aa 100644
--- a/cpp/src/arrow/flight/client_tracing_middleware.cc
+++ b/cpp/src/arrow/flight/client_tracing_middleware.cc
@@ -25,8 +25,8 @@
 #include "arrow/util/tracing_internal.h"
 
 #ifdef ARROW_WITH_OPENTELEMETRY
-#include <opentelemetry/context/propagation/global_propagator.h>
-#include <opentelemetry/context/propagation/text_map_propagator.h>
+#  include <opentelemetry/context/propagation/global_propagator.h>
+#  include <opentelemetry/context/propagation/text_map_propagator.h>
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/flight/cookie_internal.cc b/cpp/src/arrow/flight/cookie_internal.cc
index 75a10d148bf47..99fa8b238ddc8 100644
--- a/cpp/src/arrow/flight/cookie_internal.cc
+++ b/cpp/src/arrow/flight/cookie_internal.cc
@@ -28,7 +28,7 @@
 
 // Mingw-w64 defines strcasecmp in string.h
 #if defined(_WIN32) && !defined(strcasecmp)
-#define strcasecmp stricmp
+#  define strcasecmp stricmp
 #endif
 
 #include <algorithm>
diff --git a/cpp/src/arrow/flight/flight_benchmark.cc b/cpp/src/arrow/flight/flight_benchmark.cc
index 057ef15c3c7ae..1383788e08233 100644
--- a/cpp/src/arrow/flight/flight_benchmark.cc
+++ b/cpp/src/arrow/flight/flight_benchmark.cc
@@ -40,11 +40,11 @@
 #include "arrow/flight/test_util.h"
 
 #ifdef ARROW_CUDA
-#include <cuda.h>
-#include "arrow/gpu/cuda_api.h"
+#  include <cuda.h>
+#  include "arrow/gpu/cuda_api.h"
 #endif
 #ifdef ARROW_WITH_UCX
-#include "arrow/flight/transport/ucx/ucx.h"
+#  include "arrow/flight/transport/ucx/ucx.h"
 #endif
 
 DEFINE_bool(cuda, false, "Allocate results in CUDA memory");
@@ -491,7 +491,7 @@ int main(int argc, char** argv) {
         if (FLAGS_cuda && FLAGS_test_put) {
           server_args.push_back("-cuda");
         }
-        server->Start(server_args);
+        ABORT_NOT_OK(server->Start(server_args));
       }
       std::cout << "Server host: " << FLAGS_server_host << std::endl
                 << "Server port: " << FLAGS_server_port << std::endl;
diff --git a/cpp/src/arrow/flight/flight_test.cc b/cpp/src/arrow/flight/flight_test.cc
index 3d52bc3f5ae06..863f21f8db5e4 100644
--- a/cpp/src/arrow/flight/flight_test.cc
+++ b/cpp/src/arrow/flight/flight_test.cc
@@ -44,7 +44,7 @@
 #include "arrow/util/logging.h"
 
 #ifdef GRPCPP_GRPCPP_H
-#error "gRPC headers should not be in public API"
+#  error "gRPC headers should not be in public API"
 #endif
 
 #include <grpcpp/grpcpp.h>
@@ -71,11 +71,11 @@
 // > between the two different versions of Abseil.
 #include "arrow/util/tracing_internal.h"
 #ifdef ARROW_WITH_OPENTELEMETRY
-#include <opentelemetry/context/propagation/global_propagator.h>
-#include <opentelemetry/context/propagation/text_map_propagator.h>
-#include <opentelemetry/sdk/trace/processor.h>
-#include <opentelemetry/sdk/trace/tracer_provider.h>
-#include <opentelemetry/trace/propagation/http_trace_context.h>
+#  include <opentelemetry/context/propagation/global_propagator.h>
+#  include <opentelemetry/context/propagation/text_map_propagator.h>
+#  include <opentelemetry/sdk/trace/processor.h>
+#  include <opentelemetry/sdk/trace/tracer_provider.h>
+#  include <opentelemetry/trace/propagation/http_trace_context.h>
 #endif
 
 namespace arrow {
@@ -204,7 +204,7 @@ ARROW_FLIGHT_TEST_ASYNC_CLIENT(GrpcAsyncClientTest);
 
 TEST(TestFlight, ConnectUri) {
   TestServer server("flight-test-server");
-  server.Start();
+  ASSERT_OK(server.Start());
   ASSERT_TRUE(server.IsRunning());
 
   std::stringstream ss;
@@ -230,7 +230,7 @@ TEST(TestFlight, InvalidUriScheme) {
 #ifndef _WIN32
 TEST(TestFlight, ConnectUriUnix) {
   TestServer server("flight-test-server", "/tmp/flight-test.sock");
-  server.Start();
+  ASSERT_OK(server.Start());
   ASSERT_TRUE(server.IsRunning());
 
   std::stringstream ss;
diff --git a/cpp/src/arrow/flight/otel_logging.h b/cpp/src/arrow/flight/otel_logging.h
index 9a91e5d99ce7d..d1e8cbb6fcc64 100644
--- a/cpp/src/arrow/flight/otel_logging.h
+++ b/cpp/src/arrow/flight/otel_logging.h
@@ -20,9 +20,9 @@
 #include "arrow/util/config.h"
 
 #ifdef ARROW_WITH_OPENTELEMETRY
-#include "arrow/status.h"
-#include "arrow/telemetry/logging.h"
-#include "arrow/util/macros.h"
+#  include "arrow/status.h"
+#  include "arrow/telemetry/logging.h"
+#  include "arrow/util/macros.h"
 
 namespace arrow::flight {
 
diff --git a/cpp/src/arrow/flight/otel_logging_internal.h b/cpp/src/arrow/flight/otel_logging_internal.h
index 52602f0fe8aa5..426692297c362 100644
--- a/cpp/src/arrow/flight/otel_logging_internal.h
+++ b/cpp/src/arrow/flight/otel_logging_internal.h
@@ -21,8 +21,8 @@
 
 #include "arrow/util/macros.h"
 #ifdef ARROW_WITH_OPENTELEMETRY
-#include "arrow/flight/otel_logging.h"
-#include "arrow/util/logger.h"
+#  include "arrow/flight/otel_logging.h"
+#  include "arrow/util/logger.h"
 
 namespace arrow::flight::internal {
 
@@ -33,24 +33,24 @@ ARROW_EXPORT std::shared_ptr<util::Logger> GetOtelSqlServerLogger();
 
 }  // namespace arrow::flight::internal
 
-#define ARROW_FLIGHT_OTELLOG_CLIENT(LEVEL, ...)                                  \
-  ARROW_LOGGER_CALL(::arrow::flight::internal::GetOtelGrpcClientLogger(), LEVEL, \
-                    __VA_ARGS__)
-#define ARROW_FLIGHT_OTELLOG_SERVER(LEVEL, ...)                                  \
-  ARROW_LOGGER_CALL(::arrow::flight::internal::GetOtelGrpcServerLogger(), LEVEL, \
-                    __VA_ARGS__)
-#define ARROW_FLIGHT_OTELLOG_SQL_CLIENT(LEVEL, ...)                             \
-  ARROW_LOGGER_CALL(::arrow::flight::internal::GetOtelSqlClientLogger(), LEVEL, \
-                    __VA_ARGS__)
-#define ARROW_FLIGHT_OTELLOG_SQL_SERVER(LEVEL, ...)                             \
-  ARROW_LOGGER_CALL(::arrow::flight::internal::GetOtelSqlServerLogger(), LEVEL, \
-                    __VA_ARGS__)
+#  define ARROW_FLIGHT_OTELLOG_CLIENT(LEVEL, ...)                                  \
+    ARROW_LOGGER_CALL(::arrow::flight::internal::GetOtelGrpcClientLogger(), LEVEL, \
+                      __VA_ARGS__)
+#  define ARROW_FLIGHT_OTELLOG_SERVER(LEVEL, ...)                                  \
+    ARROW_LOGGER_CALL(::arrow::flight::internal::GetOtelGrpcServerLogger(), LEVEL, \
+                      __VA_ARGS__)
+#  define ARROW_FLIGHT_OTELLOG_SQL_CLIENT(LEVEL, ...)                             \
+    ARROW_LOGGER_CALL(::arrow::flight::internal::GetOtelSqlClientLogger(), LEVEL, \
+                      __VA_ARGS__)
+#  define ARROW_FLIGHT_OTELLOG_SQL_SERVER(LEVEL, ...)                             \
+    ARROW_LOGGER_CALL(::arrow::flight::internal::GetOtelSqlServerLogger(), LEVEL, \
+                      __VA_ARGS__)
 
 #else
 
-#define ARROW_FLIGHT_OTELLOG_CLIENT(LEVEL, ...) ARROW_UNUSED(0)
-#define ARROW_FLIGHT_OTELLOG_SERVER(LEVEL, ...) ARROW_UNUSED(0)
-#define ARROW_FLIGHT_OTELLOG_SQL_CLIENT(LEVEL, ...) ARROW_UNUSED(0)
-#define ARROW_FLIGHT_OTELLOG_SQL_SERVER(LEVEL, ...) ARROW_UNUSED(0)
+#  define ARROW_FLIGHT_OTELLOG_CLIENT(LEVEL, ...) ARROW_UNUSED(0)
+#  define ARROW_FLIGHT_OTELLOG_SERVER(LEVEL, ...) ARROW_UNUSED(0)
+#  define ARROW_FLIGHT_OTELLOG_SQL_CLIENT(LEVEL, ...) ARROW_UNUSED(0)
+#  define ARROW_FLIGHT_OTELLOG_SQL_SERVER(LEVEL, ...) ARROW_UNUSED(0)
 
 #endif
diff --git a/cpp/src/arrow/flight/perf_server.cc b/cpp/src/arrow/flight/perf_server.cc
index 87676da11213d..e6477edd7050a 100644
--- a/cpp/src/arrow/flight/perf_server.cc
+++ b/cpp/src/arrow/flight/perf_server.cc
@@ -42,10 +42,10 @@
 #include "arrow/flight/test_util.h"
 
 #ifdef ARROW_CUDA
-#include "arrow/gpu/cuda_api.h"
+#  include "arrow/gpu/cuda_api.h"
 #endif
 #ifdef ARROW_WITH_UCX
-#include "arrow/flight/transport/ucx/ucx.h"
+#  include "arrow/flight/transport/ucx/ucx.h"
 #endif
 
 DEFINE_bool(cuda, false, "Allocate results in CUDA memory");
diff --git a/cpp/src/arrow/flight/platform.h b/cpp/src/arrow/flight/platform.h
index 8f8db2d2dc805..498c87c5b7dc9 100644
--- a/cpp/src/arrow/flight/platform.h
+++ b/cpp/src/arrow/flight/platform.h
@@ -24,7 +24,7 @@
 // The protobuf documentation says that C4251 warnings when using the
 // library are spurious and suppressed when the build the library and
 // compiler, but must be also suppressed in downstream projects
-#pragma warning(disable : 4251)
+#  pragma warning(disable : 4251)
 
 #endif  // _MSC_VER
 
diff --git a/cpp/src/arrow/flight/server.cc b/cpp/src/arrow/flight/server.cc
index 06512bda36a49..adbdfb85f29e6 100644
--- a/cpp/src/arrow/flight/server.cc
+++ b/cpp/src/arrow/flight/server.cc
@@ -47,7 +47,7 @@ namespace flight {
 
 namespace {
 #if (ATOMIC_INT_LOCK_FREE != 2 || ATOMIC_POINTER_LOCK_FREE != 2)
-#error "atomic ints and atomic pointers not always lock-free!"
+#  error "atomic ints and atomic pointers not always lock-free!"
 #endif
 
 using ::arrow::internal::SelfPipe;
diff --git a/cpp/src/arrow/flight/server_tracing_middleware.cc b/cpp/src/arrow/flight/server_tracing_middleware.cc
index 02520cb66fd0e..6884308c7ff48 100644
--- a/cpp/src/arrow/flight/server_tracing_middleware.cc
+++ b/cpp/src/arrow/flight/server_tracing_middleware.cc
@@ -27,11 +27,11 @@
 #include "arrow/util/tracing_internal.h"
 
 #ifdef ARROW_WITH_OPENTELEMETRY
-#include <opentelemetry/context/propagation/global_propagator.h>
-#include <opentelemetry/context/propagation/text_map_propagator.h>
-#include <opentelemetry/trace/context.h>
-#include <opentelemetry/trace/propagation/http_trace_context.h>
-#include <opentelemetry/trace/semantic_conventions.h>
+#  include <opentelemetry/context/propagation/global_propagator.h>
+#  include <opentelemetry/context/propagation/text_map_propagator.h>
+#  include <opentelemetry/trace/context.h>
+#  include <opentelemetry/trace/propagation/http_trace_context.h>
+#  include <opentelemetry/trace/semantic_conventions.h>
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/flight/sql/test_app_cli.cc b/cpp/src/arrow/flight/sql/test_app_cli.cc
index 194ecf5e57808..c5606a605e018 100644
--- a/cpp/src/arrow/flight/sql/test_app_cli.cc
+++ b/cpp/src/arrow/flight/sql/test_app_cli.cc
@@ -35,16 +35,16 @@
 #include "arrow/table.h"
 
 #ifdef ARROW_WITH_OPENTELEMETRY
-#include "arrow/flight/otel_logging.h"
-#include "arrow/util/tracing_internal.h"
-
-#include <opentelemetry/context/propagation/global_propagator.h>
-#include <opentelemetry/context/propagation/text_map_propagator.h>
-#include <opentelemetry/sdk/trace/processor.h>
-#include <opentelemetry/sdk/trace/tracer_provider.h>
-#include <opentelemetry/trace/propagation/http_trace_context.h>
-#include <opentelemetry/trace/provider.h>
-#include <opentelemetry/trace/scope.h>
+#  include "arrow/flight/otel_logging.h"
+#  include "arrow/util/tracing_internal.h"
+
+#  include <opentelemetry/context/propagation/global_propagator.h>
+#  include <opentelemetry/context/propagation/text_map_propagator.h>
+#  include <opentelemetry/sdk/trace/processor.h>
+#  include <opentelemetry/sdk/trace/tracer_provider.h>
+#  include <opentelemetry/trace/propagation/http_trace_context.h>
+#  include <opentelemetry/trace/provider.h>
+#  include <opentelemetry/trace/scope.h>
 #endif
 
 using arrow::Result;
diff --git a/cpp/src/arrow/flight/sql/test_server_cli.cc b/cpp/src/arrow/flight/sql/test_server_cli.cc
index a8124140497c6..b632851a1f97c 100644
--- a/cpp/src/arrow/flight/sql/test_server_cli.cc
+++ b/cpp/src/arrow/flight/sql/test_server_cli.cc
@@ -31,12 +31,12 @@
 #include "arrow/util/logging.h"
 
 #ifdef ARROW_WITH_OPENTELEMETRY
-#include "arrow/flight/otel_logging.h"
-#include "arrow/util/tracing_internal.h"
+#  include "arrow/flight/otel_logging.h"
+#  include "arrow/util/tracing_internal.h"
 
-#include <opentelemetry/context/propagation/global_propagator.h>
-#include <opentelemetry/context/propagation/text_map_propagator.h>
-#include <opentelemetry/trace/propagation/http_trace_context.h>
+#  include <opentelemetry/context/propagation/global_propagator.h>
+#  include <opentelemetry/context/propagation/text_map_propagator.h>
+#  include <opentelemetry/trace/propagation/http_trace_context.h>
 #endif
 
 DEFINE_int32(port, 31337, "Server port to listen on");
diff --git a/cpp/src/arrow/flight/sql/visibility.h b/cpp/src/arrow/flight/sql/visibility.h
index 2074815e0a246..cdd8fd953d19f 100644
--- a/cpp/src/arrow/flight/sql/visibility.h
+++ b/cpp/src/arrow/flight/sql/visibility.h
@@ -18,31 +18,31 @@
 #pragma once
 
 #if defined(_WIN32) || defined(__CYGWIN__)
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(push)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#ifdef ARROW_FLIGHT_SQL_STATIC
-#define ARROW_FLIGHT_SQL_EXPORT
-#elif defined(ARROW_FLIGHT_SQL_EXPORTING)
-#define ARROW_FLIGHT_SQL_EXPORT __declspec(dllexport)
-#else
-#define ARROW_FLIGHT_SQL_EXPORT __declspec(dllimport)
-#endif
+#  ifdef ARROW_FLIGHT_SQL_STATIC
+#    define ARROW_FLIGHT_SQL_EXPORT
+#  elif defined(ARROW_FLIGHT_SQL_EXPORTING)
+#    define ARROW_FLIGHT_SQL_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_FLIGHT_SQL_EXPORT __declspec(dllimport)
+#  endif
 
-#define ARROW_FLIGHT_SQL_NO_EXPORT
+#  define ARROW_FLIGHT_SQL_NO_EXPORT
 #else  // Not Windows
-#ifndef ARROW_FLIGHT_SQL_EXPORT
-#define ARROW_FLIGHT_SQL_EXPORT __attribute__((visibility("default")))
-#endif
-#ifndef ARROW_FLIGHT_SQL_NO_EXPORT
-#define ARROW_FLIGHT_SQL_NO_EXPORT __attribute__((visibility("hidden")))
-#endif
+#  ifndef ARROW_FLIGHT_SQL_EXPORT
+#    define ARROW_FLIGHT_SQL_EXPORT __attribute__((visibility("default")))
+#  endif
+#  ifndef ARROW_FLIGHT_SQL_NO_EXPORT
+#    define ARROW_FLIGHT_SQL_NO_EXPORT __attribute__((visibility("hidden")))
+#  endif
 #endif  // Non-Windows
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
diff --git a/cpp/src/arrow/flight/test_definitions.cc b/cpp/src/arrow/flight/test_definitions.cc
index 273d394c288d9..ea6576088f2f5 100644
--- a/cpp/src/arrow/flight/test_definitions.cc
+++ b/cpp/src/arrow/flight/test_definitions.cc
@@ -42,7 +42,7 @@
 #include "gmock/gmock.h"
 
 #if defined(ARROW_CUDA)
-#include "arrow/gpu/cuda_api.h"
+#  include "arrow/gpu/cuda_api.h"
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/flight/test_util.cc b/cpp/src/arrow/flight/test_util.cc
index 127827ff38cdd..aa10d9a7da822 100644
--- a/cpp/src/arrow/flight/test_util.cc
+++ b/cpp/src/arrow/flight/test_util.cc
@@ -17,11 +17,6 @@
 
 #include "arrow/flight/test_util.h"
 
-#ifdef __APPLE__
-#include <limits.h>
-#include <mach-o/dyld.h>
-#endif
-
 #include <algorithm>
 #include <cstdlib>
 #include <fstream>
@@ -31,18 +26,13 @@
 #include "arrow/util/windows_compatibility.h"
 
 #include <gtest/gtest.h>
-#include <boost/filesystem.hpp>
-#define BOOST_NO_CXX98_FUNCTION_BASE  // ARROW-17805
-// We need BOOST_USE_WINDOWS_H definition with MinGW when we use
-// boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in
-// cpp/cmake_modules/ThirdpartyToolchain.cmake for details.
-#include <boost/process.hpp>
 
 #include "arrow/array.h"
 #include "arrow/array/builder_primitive.h"
 #include "arrow/ipc/test_common.h"
 #include "arrow/testing/generator.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/process.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/logging.h"
 
@@ -51,101 +41,27 @@
 
 namespace arrow::flight {
 
-namespace bp = boost::process;
-namespace fs = boost::filesystem;
-
-namespace {
-
-Status ResolveCurrentExecutable(fs::path* out) {
-  // See https://stackoverflow.com/a/1024937/10194 for various
-  // platform-specific recipes.
-
-  boost::system::error_code ec;
-
-#if defined(__linux__)
-  *out = fs::canonical("/proc/self/exe", ec);
-#elif defined(__APPLE__)
-  char buf[PATH_MAX + 1];
-  uint32_t bufsize = sizeof(buf);
-  if (_NSGetExecutablePath(buf, &bufsize) < 0) {
-    return Status::Invalid("Can't resolve current exe: path too large");
-  }
-  *out = fs::canonical(buf, ec);
-#elif defined(_WIN32)
-  char buf[MAX_PATH + 1];
-  if (!GetModuleFileNameA(NULL, buf, sizeof(buf))) {
-    return Status::Invalid("Can't get executable file path");
-  }
-  *out = fs::canonical(buf, ec);
-#else
-  ARROW_UNUSED(ec);
-  return Status::NotImplemented("Not available on this system");
-#endif
-  if (ec) {
-    // XXX fold this into the Status class?
-    return Status::IOError("Can't resolve current exe: ", ec.message());
+Status TestServer::Start(const std::vector<std::string>& extra_args) {
+  server_process_ = std::make_unique<util::Process>();
+  ARROW_RETURN_NOT_OK(server_process_->SetExecutable(executable_name_));
+  std::vector<std::string> args = {};
+  if (unix_sock_.empty()) {
+    args.push_back("-port");
+    args.push_back(std::to_string(port_));
   } else {
-    return Status::OK();
-  }
-}
-
-}  // namespace
-
-void TestServer::Start(const std::vector<std::string>& extra_args) {
-  namespace fs = boost::filesystem;
-
-  std::string str_port = std::to_string(port_);
-  std::vector<fs::path> search_path = ::boost::this_process::path();
-  // If possible, prepend current executable directory to search path,
-  // since it's likely that the test server executable is located in
-  // the same directory as the running unit test.
-  fs::path current_exe;
-  Status st = ResolveCurrentExecutable(&current_exe);
-  if (st.ok()) {
-    search_path.insert(search_path.begin(), current_exe.parent_path());
-  } else if (st.IsNotImplemented()) {
-    ARROW_CHECK(st.IsNotImplemented()) << st.ToString();
-  }
-
-  try {
-    if (unix_sock_.empty()) {
-      server_process_ =
-          std::make_shared<bp::child>(bp::search_path(executable_name_, search_path),
-                                      "-port", str_port, bp::args(extra_args));
-    } else {
-      server_process_ =
-          std::make_shared<bp::child>(bp::search_path(executable_name_, search_path),
-                                      "-server_unix", unix_sock_, bp::args(extra_args));
-    }
-  } catch (...) {
-    std::stringstream ss;
-    ss << "Failed to launch test server '" << executable_name_ << "', looked in ";
-    for (const auto& path : search_path) {
-      ss << path << " : ";
-    }
-    ARROW_LOG(FATAL) << ss.str();
-    throw;
+    args.push_back("-server_unix");
+    args.push_back(unix_sock_);
   }
-  std::cout << "Server running with pid " << server_process_->id() << std::endl;
+  args.insert(args.end(), extra_args.begin(), extra_args.end());
+  server_process_->SetArgs(args);
+  ARROW_RETURN_NOT_OK(server_process_->Execute());
+  std::cout << "Server running with pid " << server_process_->pid() << std::endl;
+  return Status::OK();
 }
 
-int TestServer::Stop() {
-  if (server_process_ && server_process_->valid()) {
-#ifndef _WIN32
-    kill(server_process_->id(), SIGTERM);
-#else
-    // This would use SIGKILL on POSIX, which is more brutal than SIGTERM
-    server_process_->terminate();
-#endif
-    server_process_->wait();
-    return server_process_->exit_code();
-  } else {
-    // Presumably the server wasn't able to start
-    return -1;
-  }
-}
+void TestServer::Stop() { server_process_ = nullptr; }
 
-bool TestServer::IsRunning() { return server_process_->running(); }
+bool TestServer::IsRunning() { return server_process_->IsRunning(); }
 
 int TestServer::port() const { return port_; }
 
diff --git a/cpp/src/arrow/flight/test_util.h b/cpp/src/arrow/flight/test_util.h
index 15ba6145ecd2b..946caebcc2b5a 100644
--- a/cpp/src/arrow/flight/test_util.h
+++ b/cpp/src/arrow/flight/test_util.h
@@ -29,6 +29,7 @@
 
 #include "arrow/status.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/process.h"
 #include "arrow/testing/util.h"
 
 #include "arrow/flight/client.h"
@@ -36,14 +37,6 @@
 #include "arrow/flight/types.h"
 #include "arrow/flight/visibility.h"
 
-namespace boost {
-namespace process {
-
-class child;
-
-}  // namespace process
-}  // namespace boost
-
 namespace arrow {
 namespace flight {
 
@@ -76,10 +69,10 @@ class ARROW_FLIGHT_EXPORT TestServer {
   TestServer(const std::string& executable_name, const std::string& unix_sock)
       : executable_name_(executable_name), unix_sock_(unix_sock) {}
 
-  void Start(const std::vector<std::string>& extra_args);
-  void Start() { Start({}); }
+  Status Start(const std::vector<std::string>& extra_args);
+  Status Start() { return Start({}); }
 
-  int Stop();
+  void Stop();
 
   bool IsRunning();
 
@@ -90,7 +83,7 @@ class ARROW_FLIGHT_EXPORT TestServer {
   std::string executable_name_;
   int port_;
   std::string unix_sock_;
-  std::shared_ptr<::boost::process::child> server_process_;
+  std::unique_ptr<util::Process> server_process_;
 };
 
 // Helper to initialize a server and matching client with callbacks to
diff --git a/cpp/src/arrow/flight/transport/grpc/customize_grpc.h b/cpp/src/arrow/flight/transport/grpc/customize_grpc.h
index 5005fc6b16eb4..b668022087587 100644
--- a/cpp/src/arrow/flight/transport/grpc/customize_grpc.h
+++ b/cpp/src/arrow/flight/transport/grpc/customize_grpc.h
@@ -26,16 +26,16 @@
 
 // Silence protobuf warnings
 #ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4244)
-#pragma warning(disable : 4267)
+#  pragma warning(push)
+#  pragma warning(disable : 4244)
+#  pragma warning(disable : 4267)
 #endif
 
 #include <grpcpp/impl/codegen/config_protobuf.h>
 #include <grpcpp/impl/codegen/proto_utils.h>
 
 #ifdef _MSC_VER
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
 namespace grpc {
diff --git a/cpp/src/arrow/flight/transport/grpc/grpc_client.cc b/cpp/src/arrow/flight/transport/grpc/grpc_client.cc
index 0eb7549134a04..22e8676707342 100644
--- a/cpp/src/arrow/flight/transport/grpc/grpc_client.cc
+++ b/cpp/src/arrow/flight/transport/grpc/grpc_client.cc
@@ -31,7 +31,7 @@
 #include <grpcpp/grpcpp.h>
 #include <grpcpp/support/client_callback.h>
 #if defined(GRPC_NAMESPACE_FOR_TLS_CREDENTIALS_OPTIONS)
-#include <grpcpp/security/tls_credentials_options.h>
+#  include <grpcpp/security/tls_credentials_options.h>
 #endif
 
 #include <grpc/grpc_security_constants.h>
@@ -670,11 +670,11 @@ class UnaryUnaryAsyncCall : public ::grpc::ClientUnaryReactor, public internal::
   }
 };
 
-#define LISTENER_NOT_OK(LISTENER, EXPR)                 \
-  if (auto arrow_status = (EXPR); !arrow_status.ok()) { \
-    (LISTENER)->OnFinish(std::move(arrow_status));      \
-    return;                                             \
-  }
+#  define LISTENER_NOT_OK(LISTENER, EXPR)                 \
+    if (auto arrow_status = (EXPR); !arrow_status.ok()) { \
+      (LISTENER)->OnFinish(std::move(arrow_status));      \
+      return;                                             \
+    }
 #endif
 
 class GrpcClientImpl : public internal::ClientTransport {
@@ -697,7 +697,7 @@ class GrpcClientImpl : public internal::ClientTransport {
 #if defined(GRPC_NAMESPACE_FOR_TLS_CREDENTIALS_OPTIONS)
           namespace ge = ::GRPC_NAMESPACE_FOR_TLS_CREDENTIALS_OPTIONS;
 
-#if defined(GRPC_USE_CERTIFICATE_VERIFIER)
+#  if defined(GRPC_USE_CERTIFICATE_VERIFIER)
           // gRPC >= 1.43
           class NoOpCertificateVerifier : public ge::ExternalCertificateVerifier {
            public:
@@ -712,10 +712,10 @@ class GrpcClientImpl : public internal::ClientTransport {
           auto cert_verifier =
               ge::ExternalCertificateVerifier::Create<NoOpCertificateVerifier>();
 
-#else   // defined(GRPC_USE_CERTIFICATE_VERIFIER)
-        // gRPC < 1.43
-        // A callback to supply to TlsCredentialsOptions that accepts any server
-        // arguments.
+#  else   // defined(GRPC_USE_CERTIFICATE_VERIFIER)
+          // gRPC < 1.43
+          // A callback to supply to TlsCredentialsOptions that accepts any server
+          // arguments.
           struct NoOpTlsAuthorizationCheck
               : public ge::TlsServerAuthorizationCheckInterface {
             int Schedule(ge::TlsServerAuthorizationCheckArg* arg) override {
@@ -727,33 +727,33 @@ class GrpcClientImpl : public internal::ClientTransport {
           auto server_authorization_check = std::make_shared<NoOpTlsAuthorizationCheck>();
           noop_auth_check_ = std::make_shared<ge::TlsServerAuthorizationCheckConfig>(
               server_authorization_check);
-#endif  // defined(GRPC_USE_CERTIFICATE_VERIFIER)
+#  endif  // defined(GRPC_USE_CERTIFICATE_VERIFIER)
 
-#if defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS)
+#  if defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS)
           auto certificate_provider =
               std::make_shared<::grpc::experimental::StaticDataCertificateProvider>(
                   kDummyRootCert);
-#if defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS_ROOT_CERTS)
+#    if defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS_ROOT_CERTS)
           ::grpc::experimental::TlsChannelCredentialsOptions tls_options(
               certificate_provider);
-#else   // defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS_ROOT_CERTS)
-        // While gRPC >= 1.36 does not require a root cert (it has a default)
-        // in practice the path it hardcodes is broken. See grpc/grpc#21655.
+#    else   // defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS_ROOT_CERTS)
+            // While gRPC >= 1.36 does not require a root cert (it has a default)
+            // in practice the path it hardcodes is broken. See grpc/grpc#21655.
           ::grpc::experimental::TlsChannelCredentialsOptions tls_options;
           tls_options.set_certificate_provider(certificate_provider);
-#endif  // defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS_ROOT_CERTS)
+#    endif  // defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS_ROOT_CERTS)
           tls_options.watch_root_certs();
           tls_options.set_root_cert_name("dummy");
-#if defined(GRPC_USE_CERTIFICATE_VERIFIER)
+#    if defined(GRPC_USE_CERTIFICATE_VERIFIER)
           tls_options.set_certificate_verifier(std::move(cert_verifier));
           tls_options.set_check_call_host(false);
           tls_options.set_verify_server_certs(false);
-#else   // defined(GRPC_USE_CERTIFICATE_VERIFIER)
+#    else   // defined(GRPC_USE_CERTIFICATE_VERIFIER)
           tls_options.set_server_verification_option(
               grpc_tls_server_verification_option::GRPC_TLS_SKIP_ALL_SERVER_VERIFICATION);
           tls_options.set_server_authorization_check_config(noop_auth_check_);
-#endif  // defined(GRPC_USE_CERTIFICATE_VERIFIER)
-#elif defined(GRPC_NAMESPACE_FOR_TLS_CREDENTIALS_OPTIONS)
+#    endif  // defined(GRPC_USE_CERTIFICATE_VERIFIER)
+#  elif defined(GRPC_NAMESPACE_FOR_TLS_CREDENTIALS_OPTIONS)
           // continues defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS)
           auto materials_config = std::make_shared<ge::TlsKeyMaterialsConfig>();
           materials_config->set_pem_root_certs(kDummyRootCert);
@@ -761,7 +761,7 @@ class GrpcClientImpl : public internal::ClientTransport {
               GRPC_SSL_DONT_REQUEST_CLIENT_CERTIFICATE,
               GRPC_TLS_SKIP_ALL_SERVER_VERIFICATION, materials_config,
               std::shared_ptr<ge::TlsCredentialReloadConfig>(), noop_auth_check_);
-#endif  // defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS)
+#  endif  // defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS)
           creds = ge::TlsCredentials(tls_options);
 #else   // defined(GRPC_NAMESPACE_FOR_TLS_CREDENTIALS_OPTIONS)
           return Status::NotImplemented(
diff --git a/cpp/src/arrow/flight/transport/grpc/serialization_internal.cc b/cpp/src/arrow/flight/transport/grpc/serialization_internal.cc
index 3df13532b0b05..9b503ede05655 100644
--- a/cpp/src/arrow/flight/transport/grpc/serialization_internal.cc
+++ b/cpp/src/arrow/flight/transport/grpc/serialization_internal.cc
@@ -28,8 +28,8 @@
 #include "arrow/flight/platform.h"
 
 #if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4267)
+#  pragma warning(push)
+#  pragma warning(disable : 4267)
 #endif
 
 #include <google/protobuf/io/coded_stream.h>
@@ -41,7 +41,7 @@
 #include <grpcpp/impl/codegen/proto_utils.h>
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
 #include "arrow/buffer.h"
@@ -400,8 +400,8 @@ ::grpc::Status FlightDataDeserialize(ByteBuffer* buffer,
 
 // The pointer bitcast hack below causes legitimate warnings, silence them.
 #ifndef _WIN32
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#  pragma GCC diagnostic push
+#  pragma GCC diagnostic ignored "-Wstrict-aliasing"
 #endif
 
 // Pointer bitcast explanation: grpc::*Writer<T>::Write() and grpc::*Reader<T>::Read()
@@ -478,7 +478,7 @@ bool ReadPayload(::grpc::ClientReaderWriter<pb::FlightData, pb::PutResult>* read
 }
 
 #ifndef _WIN32
-#pragma GCC diagnostic pop
+#  pragma GCC diagnostic pop
 #endif
 
 }  // namespace grpc
diff --git a/cpp/src/arrow/flight/transport/ucx/flight_transport_ucx_test.cc b/cpp/src/arrow/flight/transport/ucx/flight_transport_ucx_test.cc
index c3481d834f6ea..1090b8356294a 100644
--- a/cpp/src/arrow/flight/transport/ucx/flight_transport_ucx_test.cc
+++ b/cpp/src/arrow/flight/transport/ucx/flight_transport_ucx_test.cc
@@ -27,13 +27,13 @@
 #include "arrow/util/config.h"
 
 #ifdef UCP_API_VERSION
-#error "UCX headers should not be in public API"
+#  error "UCX headers should not be in public API"
 #endif
 
 #include "arrow/flight/transport/ucx/ucx_internal.h"
 
 #ifdef ARROW_CUDA
-#include "arrow/gpu/cuda_api.h"
+#  include "arrow/gpu/cuda_api.h"
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/flight/types.h b/cpp/src/arrow/flight/types.h
index fc54bce9758b2..b6309d0af2a71 100644
--- a/cpp/src/arrow/flight/types.h
+++ b/cpp/src/arrow/flight/types.h
@@ -104,8 +104,8 @@ enum class FlightStatusCode : int8_t {
 // Silence warning
 // "non dll-interface class RecordBatchReader used as base for dll-interface class"
 #ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4275)
+#  pragma warning(push)
+#  pragma warning(disable : 4275)
 #endif
 
 /// \brief Flight-specific error information in a Status.
@@ -139,7 +139,7 @@ class ARROW_FLIGHT_EXPORT FlightStatusDetail : public arrow::StatusDetail {
 };
 
 #ifdef _MSC_VER
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
 /// \brief Make an appropriate Arrow status for the given
diff --git a/cpp/src/arrow/flight/visibility.h b/cpp/src/arrow/flight/visibility.h
index bdee8b751d8a3..06f864ba8cffc 100644
--- a/cpp/src/arrow/flight/visibility.h
+++ b/cpp/src/arrow/flight/visibility.h
@@ -18,31 +18,31 @@
 #pragma once
 
 #if defined(_WIN32) || defined(__CYGWIN__)
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(push)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#ifdef ARROW_FLIGHT_STATIC
-#define ARROW_FLIGHT_EXPORT
-#elif defined(ARROW_FLIGHT_EXPORTING)
-#define ARROW_FLIGHT_EXPORT __declspec(dllexport)
-#else
-#define ARROW_FLIGHT_EXPORT __declspec(dllimport)
-#endif
+#  ifdef ARROW_FLIGHT_STATIC
+#    define ARROW_FLIGHT_EXPORT
+#  elif defined(ARROW_FLIGHT_EXPORTING)
+#    define ARROW_FLIGHT_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_FLIGHT_EXPORT __declspec(dllimport)
+#  endif
 
-#define ARROW_FLIGHT_NO_EXPORT
+#  define ARROW_FLIGHT_NO_EXPORT
 #else  // Not Windows
-#ifndef ARROW_FLIGHT_EXPORT
-#define ARROW_FLIGHT_EXPORT __attribute__((visibility("default")))
-#endif
-#ifndef ARROW_FLIGHT_NO_EXPORT
-#define ARROW_FLIGHT_NO_EXPORT __attribute__((visibility("hidden")))
-#endif
+#  ifndef ARROW_FLIGHT_EXPORT
+#    define ARROW_FLIGHT_EXPORT __attribute__((visibility("default")))
+#  endif
+#  ifndef ARROW_FLIGHT_NO_EXPORT
+#    define ARROW_FLIGHT_NO_EXPORT __attribute__((visibility("hidden")))
+#  endif
 #endif  // Non-Windows
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
diff --git a/cpp/src/arrow/io/buffered_test.cc b/cpp/src/arrow/io/buffered_test.cc
index cbf2c2cf06938..89fe4b159f341 100644
--- a/cpp/src/arrow/io/buffered_test.cc
+++ b/cpp/src/arrow/io/buffered_test.cc
@@ -16,8 +16,8 @@
 // under the License.
 
 #ifndef _WIN32
-#include <fcntl.h>  // IWYU pragma: keep
-#include <unistd.h>
+#  include <fcntl.h>  // IWYU pragma: keep
+#  include <unistd.h>
 #endif
 
 #include <algorithm>
diff --git a/cpp/src/arrow/io/file.cc b/cpp/src/arrow/io/file.cc
index a22accf65660a..9fda5b7fdc16e 100644
--- a/cpp/src/arrow/io/file.cc
+++ b/cpp/src/arrow/io/file.cc
@@ -19,16 +19,16 @@
 
 // sys/mman.h not present in Visual Studio or Cygwin
 #ifdef _WIN32
-#ifndef NOMINMAX
-#define NOMINMAX
-#endif
-#include "arrow/io/mman.h"
-#undef Realloc
-#undef Free
+#  ifndef NOMINMAX
+#    define NOMINMAX
+#  endif
+#  include "arrow/io/mman.h"
+#  undef Realloc
+#  undef Free
 #else
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <unistd.h>  // IWYU pragma: keep
+#  include <fcntl.h>
+#  include <sys/mman.h>
+#  include <unistd.h>  // IWYU pragma: keep
 #endif
 
 #include <algorithm>
diff --git a/cpp/src/arrow/io/file_benchmark.cc b/cpp/src/arrow/io/file_benchmark.cc
index 02ccfb6337f4b..bcaa1a9df014b 100644
--- a/cpp/src/arrow/io/file_benchmark.cc
+++ b/cpp/src/arrow/io/file_benchmark.cc
@@ -33,13 +33,13 @@
 
 #ifdef _WIN32
 
-#include <io.h>
+#  include <io.h>
 
 #else
 
-#include <fcntl.h>
-#include <poll.h>
-#include <unistd.h>
+#  include <fcntl.h>
+#  include <poll.h>
+#  include <unistd.h>
 
 #endif
 
diff --git a/cpp/src/arrow/io/file_test.cc b/cpp/src/arrow/io/file_test.cc
index af414891b950e..44a63e9fdfa81 100644
--- a/cpp/src/arrow/io/file_test.cc
+++ b/cpp/src/arrow/io/file_test.cc
@@ -16,8 +16,8 @@
 // under the License.
 
 #ifndef _WIN32
-#include <fcntl.h>  // IWYU pragma: keep
-#include <unistd.h>
+#  include <fcntl.h>  // IWYU pragma: keep
+#  include <unistd.h>
 #endif
 
 #include <atomic>
diff --git a/cpp/src/arrow/io/hdfs_internal.cc b/cpp/src/arrow/io/hdfs_internal.cc
index 5619dd2435acc..0f46a6faff924 100644
--- a/cpp/src/arrow/io/hdfs_internal.cc
+++ b/cpp/src/arrow/io/hdfs_internal.cc
@@ -40,7 +40,7 @@
 #include "arrow/util/basic_decimal.h"
 
 #ifndef _WIN32
-#include <dlfcn.h>
+#  include <dlfcn.h>
 #endif
 
 #include "arrow/result.h"
@@ -162,13 +162,13 @@ Result<std::vector<PlatformFilename>> get_potential_libjvm_paths() {
 // SFrame uses /usr/libexec/java_home to find JAVA_HOME; for now we are
 // expecting users to set an environment variable
 #else
-#if defined(__aarch64__)
+#  if defined(__aarch64__)
   const std::string prefix_arch{"arm64"};
   const std::string suffix_arch{"aarch64"};
-#else
+#  else
   const std::string prefix_arch{"amd64"};
   const std::string suffix_arch{"amd64"};
-#endif
+#  endif
   ARROW_ASSIGN_OR_RAISE(
       search_prefixes,
       MakeFilenameVector({
diff --git a/cpp/src/arrow/io/memory_benchmark.cc b/cpp/src/arrow/io/memory_benchmark.cc
index e16bbaf03ec47..fda5e17e073bd 100644
--- a/cpp/src/arrow/io/memory_benchmark.cc
+++ b/cpp/src/arrow/io/memory_benchmark.cc
@@ -39,50 +39,50 @@ constexpr size_t kMemoryPerCore = 32 * 1024 * 1024;
 using BufferPtr = std::shared_ptr<Buffer>;
 
 #ifdef ARROW_WITH_BENCHMARKS_REFERENCE
-#ifndef _MSC_VER
+#  ifndef _MSC_VER
 
-#ifdef ARROW_HAVE_SSE4_2
+#    ifdef ARROW_HAVE_SSE4_2
 
-#ifdef ARROW_HAVE_AVX512
+#      ifdef ARROW_HAVE_AVX512
 
 using VectorType = __m512i;
-#define VectorSet _mm512_set1_epi32
-#define VectorLoad _mm512_stream_load_si512
-#define VectorLoadAsm(SRC, DST) \
-  asm volatile("vmovaps %[src], %[dst]" : [dst] "=v"(DST) : [src] "m"(SRC) :)
-#define VectorStreamLoad _mm512_stream_load_si512
-#define VectorStreamLoadAsm(SRC, DST) \
-  asm volatile("vmovntdqa %[src], %[dst]" : [dst] "=v"(DST) : [src] "m"(SRC) :)
-#define VectorStreamWrite _mm512_stream_si512
+#        define VectorSet _mm512_set1_epi32
+#        define VectorLoad _mm512_stream_load_si512
+#        define VectorLoadAsm(SRC, DST) \
+          asm volatile("vmovaps %[src], %[dst]" : [dst] "=v"(DST) : [src] "m"(SRC) :)
+#        define VectorStreamLoad _mm512_stream_load_si512
+#        define VectorStreamLoadAsm(SRC, DST) \
+          asm volatile("vmovntdqa %[src], %[dst]" : [dst] "=v"(DST) : [src] "m"(SRC) :)
+#        define VectorStreamWrite _mm512_stream_si512
 
-#else
+#      else
 
-#ifdef ARROW_HAVE_AVX2
+#        ifdef ARROW_HAVE_AVX2
 
 using VectorType = __m256i;
-#define VectorSet _mm256_set1_epi32
-#define VectorLoad _mm256_stream_load_si256
-#define VectorLoadAsm(SRC, DST) \
-  asm volatile("vmovaps %[src], %[dst]" : [dst] "=v"(DST) : [src] "m"(SRC) :)
-#define VectorStreamLoad _mm256_stream_load_si256
-#define VectorStreamLoadAsm(SRC, DST) \
-  asm volatile("vmovntdqa %[src], %[dst]" : [dst] "=v"(DST) : [src] "m"(SRC) :)
-#define VectorStreamWrite _mm256_stream_si256
+#          define VectorSet _mm256_set1_epi32
+#          define VectorLoad _mm256_stream_load_si256
+#          define VectorLoadAsm(SRC, DST) \
+            asm volatile("vmovaps %[src], %[dst]" : [dst] "=v"(DST) : [src] "m"(SRC) :)
+#          define VectorStreamLoad _mm256_stream_load_si256
+#          define VectorStreamLoadAsm(SRC, DST) \
+            asm volatile("vmovntdqa %[src], %[dst]" : [dst] "=v"(DST) : [src] "m"(SRC) :)
+#          define VectorStreamWrite _mm256_stream_si256
 
-#else  // ARROW_HAVE_AVX2 not set
+#        else  // ARROW_HAVE_AVX2 not set
 
 using VectorType = __m128i;
-#define VectorSet _mm_set1_epi32
-#define VectorLoad _mm_stream_load_si128
-#define VectorLoadAsm(SRC, DST) \
-  asm volatile("movaps %[src], %[dst]" : [dst] "=x"(DST) : [src] "m"(SRC) :)
-#define VectorStreamLoad _mm_stream_load_si128
-#define VectorStreamLoadAsm(SRC, DST) \
-  asm volatile("movntdqa %[src], %[dst]" : [dst] "=x"(DST) : [src] "m"(SRC) :)
-#define VectorStreamWrite _mm_stream_si128
-
-#endif  // ARROW_HAVE_AVX2
-#endif  // ARROW_HAVE_AVX512
+#          define VectorSet _mm_set1_epi32
+#          define VectorLoad _mm_stream_load_si128
+#          define VectorLoadAsm(SRC, DST) \
+            asm volatile("movaps %[src], %[dst]" : [dst] "=x"(DST) : [src] "m"(SRC) :)
+#          define VectorStreamLoad _mm_stream_load_si128
+#          define VectorStreamLoadAsm(SRC, DST) \
+            asm volatile("movntdqa %[src], %[dst]" : [dst] "=x"(DST) : [src] "m"(SRC) :)
+#          define VectorStreamWrite _mm_stream_si128
+
+#        endif  // ARROW_HAVE_AVX2
+#      endif    // ARROW_HAVE_AVX512
 
 static void Read(void* src, void* dst, size_t size) {
   const auto simd = static_cast<VectorType*>(src);
@@ -154,15 +154,15 @@ static void StreamReadWrite(void* src, void* dst, size_t size) {
   }
 }
 
-#endif  // ARROW_HAVE_SSE4_2
+#    endif  // ARROW_HAVE_SSE4_2
 
-#ifdef ARROW_HAVE_NEON
+#    ifdef ARROW_HAVE_NEON
 
 using VectorType = uint8x16_t;
 using VectorTypeDual = uint8x16x2_t;
 
-#define VectorSet vdupq_n_u8
-#define VectorLoadAsm vld1q_u8
+#      define VectorSet vdupq_n_u8
+#      define VectorLoadAsm vld1q_u8
 
 static void armv8_stream_load_pair(VectorType* src, VectorType* dst) {
   asm volatile("LDNP %[reg1], %[reg2], [%[from]]\n\t"
@@ -239,7 +239,7 @@ static void StreamReadWrite(void* src, void* dst, size_t size) {
   }
 }
 
-#endif  // ARROW_HAVE_NEON
+#    endif  // ARROW_HAVE_NEON
 
 static void PlatformMemcpy(void* src, void* dst, size_t size) { memcpy(src, dst, size); }
 
@@ -261,7 +261,7 @@ static void MemoryBandwidth(benchmark::State& state) {  // NOLINT non-const refe
   state.SetBytesProcessed(state.iterations() * buffer_size);
 }
 
-#ifdef ARROW_HAVE_SSE4_2
+#    ifdef ARROW_HAVE_SSE4_2
 static void SetCacheBandwidthArgs(benchmark::internal::Benchmark* bench) {
   auto cache_sizes = {kL1Size, kL2Size, kL3Size};
   for (auto size : cache_sizes) {
@@ -274,7 +274,7 @@ static void SetCacheBandwidthArgs(benchmark::internal::Benchmark* bench) {
 }
 
 BENCHMARK_TEMPLATE(MemoryBandwidth, Read)->Apply(SetCacheBandwidthArgs);
-#endif  // ARROW_HAVE_SSE4_2
+#    endif  // ARROW_HAVE_SSE4_2
 
 static void SetMemoryBandwidthArgs(benchmark::internal::Benchmark* bench) {
   // `UseRealTime` is required due to threads, otherwise the cumulative CPU time
@@ -287,8 +287,8 @@ BENCHMARK_TEMPLATE(MemoryBandwidth, StreamWrite)->Apply(SetMemoryBandwidthArgs);
 BENCHMARK_TEMPLATE(MemoryBandwidth, StreamReadWrite)->Apply(SetMemoryBandwidthArgs);
 BENCHMARK_TEMPLATE(MemoryBandwidth, PlatformMemcpy)->Apply(SetMemoryBandwidthArgs);
 
-#endif  // _MSC_VER
-#endif  // ARROW_WITH_BENCHMARKS_REFERENCE
+#  endif  // _MSC_VER
+#endif    // ARROW_WITH_BENCHMARKS_REFERENCE
 
 static void ParallelMemoryCopy(benchmark::State& state) {  // NOLINT non-const reference
   const int64_t n_threads = state.range(0);
diff --git a/cpp/src/arrow/io/mman.h b/cpp/src/arrow/io/mman.h
index 9b06ac8e7b5ca..04d450cbff513 100644
--- a/cpp/src/arrow/io/mman.h
+++ b/cpp/src/arrow/io/mman.h
@@ -36,7 +36,7 @@
 #define MS_INVALIDATE 4
 
 #ifndef FILE_MAP_EXECUTE
-#define FILE_MAP_EXECUTE 0x0020
+#  define FILE_MAP_EXECUTE 0x0020
 #endif
 
 static inline int __map_mman_error(const DWORD err, const int deferr) {
diff --git a/cpp/src/arrow/io/test_common.cc b/cpp/src/arrow/io/test_common.cc
index 5caa20a445e6d..a06ef2f59221c 100644
--- a/cpp/src/arrow/io/test_common.cc
+++ b/cpp/src/arrow/io/test_common.cc
@@ -23,7 +23,7 @@
 #include <vector>
 
 #ifndef _WIN32
-#include <fcntl.h>
+#  include <fcntl.h>
 #endif
 
 #include "arrow/buffer.h"
diff --git a/cpp/src/arrow/ipc/json_simple_test.cc b/cpp/src/arrow/ipc/json_simple_test.cc
index c6f14b1e1d50e..d3201d8571b2c 100644
--- a/cpp/src/arrow/ipc/json_simple_test.cc
+++ b/cpp/src/arrow/ipc/json_simple_test.cc
@@ -48,7 +48,7 @@
 
 #if defined(_MSC_VER)
 // "warning C4307: '+': integral constant overflow"
-#pragma warning(disable : 4307)
+#  pragma warning(disable : 4307)
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/ipc/read_write_benchmark.cc b/cpp/src/arrow/ipc/read_write_benchmark.cc
index defe9790678c0..19ed2d3034e8d 100644
--- a/cpp/src/arrow/ipc/read_write_benchmark.cc
+++ b/cpp/src/arrow/ipc/read_write_benchmark.cc
@@ -185,24 +185,24 @@ static void DecodeStream(benchmark::State& state) {  // NOLINT non-const referen
 }
 
 #ifdef ARROW_WITH_ZSTD
-#define GENERATE_COMPRESSED_DATA_IN_MEMORY()                                      \
-  constexpr int64_t kBatchSize = 1 << 20; /* 1 MB */                              \
-  constexpr int64_t kBatches = 16;                                                \
-  auto options = ipc::IpcWriteOptions::Defaults();                                \
-  ASSIGN_OR_ABORT(options.codec,                                                  \
-                  arrow::util::Codec::Create(arrow::Compression::type::ZSTD));    \
-  std::shared_ptr<ResizableBuffer> buffer = *AllocateResizableBuffer(1024);       \
-  {                                                                               \
-    auto record_batch = MakeRecordBatch(kBatchSize, state.range(0));              \
-    io::BufferOutputStream stream(buffer);                                        \
-    auto writer = *ipc::MakeFileWriter(&stream, record_batch->schema(), options); \
-    for (int i = 0; i < kBatches; i++) {                                          \
-      ABORT_NOT_OK(writer->WriteRecordBatch(*record_batch));                      \
-    }                                                                             \
-    ABORT_NOT_OK(writer->Close());                                                \
-    ABORT_NOT_OK(stream.Close());                                                 \
-  }                                                                               \
-  constexpr int64_t total_size = kBatchSize * kBatches;
+#  define GENERATE_COMPRESSED_DATA_IN_MEMORY()                                      \
+    constexpr int64_t kBatchSize = 1 << 20; /* 1 MB */                              \
+    constexpr int64_t kBatches = 16;                                                \
+    auto options = ipc::IpcWriteOptions::Defaults();                                \
+    ASSIGN_OR_ABORT(options.codec,                                                  \
+                    arrow::util::Codec::Create(arrow::Compression::type::ZSTD));    \
+    std::shared_ptr<ResizableBuffer> buffer = *AllocateResizableBuffer(1024);       \
+    {                                                                               \
+      auto record_batch = MakeRecordBatch(kBatchSize, state.range(0));              \
+      io::BufferOutputStream stream(buffer);                                        \
+      auto writer = *ipc::MakeFileWriter(&stream, record_batch->schema(), options); \
+      for (int i = 0; i < kBatches; i++) {                                          \
+        ABORT_NOT_OK(writer->WriteRecordBatch(*record_batch));                      \
+      }                                                                             \
+      ABORT_NOT_OK(writer->Close());                                                \
+      ABORT_NOT_OK(stream.Close());                                                 \
+    }                                                                               \
+    constexpr int64_t total_size = kBatchSize * kBatches;
 #endif
 
 #define GENERATE_DATA_IN_MEMORY()                                                 \
diff --git a/cpp/src/arrow/ipc/read_write_test.cc b/cpp/src/arrow/ipc/read_write_test.cc
index ff7838cc39d72..39fd2c40fb4ec 100644
--- a/cpp/src/arrow/ipc/read_write_test.cc
+++ b/cpp/src/arrow/ipc/read_write_test.cc
@@ -1081,9 +1081,9 @@ TEST_F(RecursionLimits, ReadLimit) {
 // Test fails with a structured exception on Windows + Debug
 #if !defined(_WIN32) || defined(NDEBUG)
 TEST_F(RecursionLimits, StressLimit) {
-#ifdef __EMSCRIPTEN__
+#  ifdef __EMSCRIPTEN__
   GTEST_SKIP() << "This crashes the Emscripten runtime.";
-#endif
+#  endif
 
   auto CheckDepth = [this](int recursion_depth, bool* it_works) {
     int32_t metadata_length = -1;
@@ -1112,10 +1112,10 @@ TEST_F(RecursionLimits, StressLimit) {
   ASSERT_TRUE(it_works);
 
 // Mitigate Valgrind's slowness
-#if !defined(ARROW_VALGRIND)
+#  if !defined(ARROW_VALGRIND)
   CheckDepth(500, &it_works);
   ASSERT_TRUE(it_works);
-#endif
+#  endif
 }
 #endif  // !defined(_WIN32) || defined(NDEBUG)
 
diff --git a/cpp/src/arrow/json/rapidjson_defs.h b/cpp/src/arrow/json/rapidjson_defs.h
index 9ed81d000c555..2354c6157263a 100644
--- a/cpp/src/arrow/json/rapidjson_defs.h
+++ b/cpp/src/arrow/json/rapidjson_defs.h
@@ -34,10 +34,10 @@
 
 // enable SIMD whitespace skipping, if available
 #if defined(ARROW_HAVE_SSE4_2)
-#define RAPIDJSON_SSE2 1
-#define RAPIDJSON_SSE42 1
+#  define RAPIDJSON_SSE2 1
+#  define RAPIDJSON_SSE42 1
 #endif
 
 #if defined(ARROW_HAVE_NEON)
-#define RAPIDJSON_NEON 1
+#  define RAPIDJSON_NEON 1
 #endif
diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
index 34207781277d1..3ace2c8f23ab0 100644
--- a/cpp/src/arrow/memory_pool.cc
+++ b/cpp/src/arrow/memory_pool.cc
@@ -28,7 +28,7 @@
 #include <optional>
 
 #if defined(sun) || defined(__sun)
-#include <stdlib.h>
+#  include <stdlib.h>
 #endif
 
 #include "arrow/buffer.h"
@@ -46,11 +46,11 @@
 #include "arrow/util/ubsan.h"
 
 #ifdef __GLIBC__
-#include <malloc.h>
+#  include <malloc.h>
 #endif
 
 #ifdef ARROW_MIMALLOC
-#include <mimalloc.h>
+#  include <mimalloc.h>
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/memory_pool_jemalloc.cc b/cpp/src/arrow/memory_pool_jemalloc.cc
index 24bc0f27f0912..239d83b81bc67 100644
--- a/cpp/src/arrow/memory_pool_jemalloc.cc
+++ b/cpp/src/arrow/memory_pool_jemalloc.cc
@@ -26,11 +26,11 @@
 // its family.
 
 #ifdef ARROW_JEMALLOC_VENDORED
-#define JEMALLOC_MANGLE
+#  define JEMALLOC_MANGLE
 // Explicitly link to our version of jemalloc
-#include "jemalloc_ep/dist/include/jemalloc/jemalloc.h"
+#  include "jemalloc_ep/dist/include/jemalloc/jemalloc.h"
 #else
-#include <jemalloc/jemalloc.h>
+#  include <jemalloc/jemalloc.h>
 #endif
 
 #ifdef ARROW_JEMALLOC_VENDORED
@@ -47,31 +47,31 @@
 // aggressively (and in the background) to the OS. This can be configured
 // further by using the arrow::jemalloc_set_decay_ms API
 
-#undef USE_JEMALLOC_BACKGROUND_THREAD
-#ifndef __APPLE__
+#  undef USE_JEMALLOC_BACKGROUND_THREAD
+#  ifndef __APPLE__
 // ARROW-6977: jemalloc's background_thread isn't always enabled on macOS
-#define USE_JEMALLOC_BACKGROUND_THREAD
-#endif
+#    define USE_JEMALLOC_BACKGROUND_THREAD
+#  endif
 
 // In debug mode, add memory poisoning on alloc / free
-#ifdef NDEBUG
-#define JEMALLOC_DEBUG_OPTIONS ""
-#else
-#define JEMALLOC_DEBUG_OPTIONS ",junk:true"
-#endif
+#  ifdef NDEBUG
+#    define JEMALLOC_DEBUG_OPTIONS ""
+#  else
+#    define JEMALLOC_DEBUG_OPTIONS ",junk:true"
+#  endif
 
 const char* je_arrow_malloc_conf =
     ("oversize_threshold:0"
-#ifdef USE_JEMALLOC_BACKGROUND_THREAD
+#  ifdef USE_JEMALLOC_BACKGROUND_THREAD
      ",dirty_decay_ms:1000"
      ",muzzy_decay_ms:1000"
      ",background_thread:true"
-#else
+#  else
      // ARROW-6994: return memory immediately to the OS if the
      // background_thread option isn't available
      ",dirty_decay_ms:0"
      ",muzzy_decay_ms:0"
-#endif
+#  endif
      JEMALLOC_DEBUG_OPTIONS);  // NOLINT: whitespace/parens
 
 #endif  // ARROW_JEMALLOC_VENDORED
diff --git a/cpp/src/arrow/public_api_test.cc b/cpp/src/arrow/public_api_test.cc
index 20de827ced13f..ccc80dc93a50a 100644
--- a/cpp/src/arrow/public_api_test.cc
+++ b/cpp/src/arrow/public_api_test.cc
@@ -28,32 +28,32 @@
 #include "arrow/ipc/api.h"      // IWYU pragma: keep
 
 #ifdef ARROW_CSV
-#include "arrow/csv/api.h"  // IWYU pragma: keep
+#  include "arrow/csv/api.h"  // IWYU pragma: keep
 #endif
 
 #ifdef ARROW_DATASET
-#include "arrow/dataset/api.h"  // IWYU pragma: keep
+#  include "arrow/dataset/api.h"  // IWYU pragma: keep
 #endif
 
 #ifdef ARROW_FILESYSTEM
-#include "arrow/filesystem/api.h"  // IWYU pragma: keep
+#  include "arrow/filesystem/api.h"  // IWYU pragma: keep
 #endif
 
 #ifdef ARROW_FLIGHT
-#include "arrow/flight/api.h"  // IWYU pragma: keep
+#  include "arrow/flight/api.h"  // IWYU pragma: keep
 #endif
 
 #ifdef ARROW_FLIGHT_SQL
-#include "arrow/flight/sql/api.h"  // IWYU pragma: keep
+#  include "arrow/flight/sql/api.h"  // IWYU pragma: keep
 #endif
 
 #ifdef ARROW_JSON
-#include "arrow/json/api.h"  // IWYU pragma: keep
+#  include "arrow/json/api.h"  // IWYU pragma: keep
 #endif
 
 #ifdef ARROW_SUBSTRAIT
-#include "arrow/engine/api.h"            // IWYU pragma: keep
-#include "arrow/engine/substrait/api.h"  // IWYU pragma: keep
+#  include "arrow/engine/api.h"            // IWYU pragma: keep
+#  include "arrow/engine/substrait/api.h"  // IWYU pragma: keep
 #endif
 
 #include <gmock/gmock-matchers.h>
diff --git a/cpp/src/arrow/result_internal.h b/cpp/src/arrow/result_internal.h
index 7550f945d85d0..134902e1b75ad 100644
--- a/cpp/src/arrow/result_internal.h
+++ b/cpp/src/arrow/result_internal.h
@@ -18,5 +18,5 @@
 #include "arrow/result.h"
 
 #ifndef ASSIGN_OR_RAISE
-#define ASSIGN_OR_RAISE(lhs, rhs) ARROW_ASSIGN_OR_RAISE(lhs, rhs)
+#  define ASSIGN_OR_RAISE(lhs, rhs) ARROW_ASSIGN_OR_RAISE(lhs, rhs)
 #endif
diff --git a/cpp/src/arrow/status.h b/cpp/src/arrow/status.h
index ac384fc389a49..fb75d963f3a3c 100644
--- a/cpp/src/arrow/status.h
+++ b/cpp/src/arrow/status.h
@@ -28,23 +28,23 @@
 #ifdef ARROW_EXTRA_ERROR_CONTEXT
 
 /// \brief Return with given status if condition is met.
-#define ARROW_RETURN_IF_(condition, status, expr)   \
-  do {                                              \
-    if (ARROW_PREDICT_FALSE(condition)) {           \
-      ::arrow::Status _st = (status);               \
-      _st.AddContextLine(__FILE__, __LINE__, expr); \
-      return _st;                                   \
-    }                                               \
-  } while (0)
+#  define ARROW_RETURN_IF_(condition, status, expr)   \
+    do {                                              \
+      if (ARROW_PREDICT_FALSE(condition)) {           \
+        ::arrow::Status _st = (status);               \
+        _st.AddContextLine(__FILE__, __LINE__, expr); \
+        return _st;                                   \
+      }                                               \
+    } while (0)
 
 #else
 
-#define ARROW_RETURN_IF_(condition, status, _) \
-  do {                                         \
-    if (ARROW_PREDICT_FALSE(condition)) {      \
-      return (status);                         \
-    }                                          \
-  } while (0)
+#  define ARROW_RETURN_IF_(condition, status, _) \
+    do {                                         \
+      if (ARROW_PREDICT_FALSE(condition)) {      \
+        return (status);                         \
+      }                                          \
+    } while (0)
 
 #endif  // ARROW_EXTRA_ERROR_CONTEXT
 
@@ -78,7 +78,7 @@
 
 // This is an internal-use macro and should not be used in public headers.
 #ifndef RETURN_NOT_OK
-#define RETURN_NOT_OK(s) ARROW_RETURN_NOT_OK(s)
+#  define RETURN_NOT_OK(s) ARROW_RETURN_NOT_OK(s)
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/telemetry/logging.cc b/cpp/src/arrow/telemetry/logging.cc
index 7e9a69afedbb5..11a70ae1319c8 100644
--- a/cpp/src/arrow/telemetry/logging.cc
+++ b/cpp/src/arrow/telemetry/logging.cc
@@ -21,8 +21,8 @@
 #include "arrow/util/logging.h"
 
 #ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4522)
+#  pragma warning(push)
+#  pragma warning(disable : 4522)
 #endif
 
 #include <google/protobuf/util/json_util.h>
@@ -46,7 +46,7 @@
 
 #include <opentelemetry/exporters/otlp/protobuf_include_suffix.h>
 #ifdef _MSC_VER
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/testing/gtest_compat.h b/cpp/src/arrow/testing/gtest_compat.h
index c934dd2793890..1fd0bfd32c5bc 100644
--- a/cpp/src/arrow/testing/gtest_compat.h
+++ b/cpp/src/arrow/testing/gtest_compat.h
@@ -21,13 +21,13 @@
 
 // GTest < 1.11
 #ifndef GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST
-#define GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(A)
+#  define GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(A)
 #endif
 // GTest < 1.10
 #ifndef TYPED_TEST_SUITE
-#define TYPED_TEST_SUITE TYPED_TEST_CASE
-#define TYPED_TEST_SUITE_P TYPED_TEST_CASE_P
-#define INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_CASE_P
-#define REGISTER_TYPED_TEST_SUITE_P REGISTER_TYPED_TEST_CASE_P
-#define INSTANTIATE_TYPED_TEST_SUITE_P INSTANTIATE_TYPED_TEST_CASE_P
+#  define TYPED_TEST_SUITE TYPED_TEST_CASE
+#  define TYPED_TEST_SUITE_P TYPED_TEST_CASE_P
+#  define INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_CASE_P
+#  define REGISTER_TYPED_TEST_SUITE_P REGISTER_TYPED_TEST_CASE_P
+#  define INSTANTIATE_TYPED_TEST_SUITE_P INSTANTIATE_TYPED_TEST_CASE_P
 #endif
diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc
index ae2e53b30a3ee..c4a7f363c71bc 100644
--- a/cpp/src/arrow/testing/gtest_util.cc
+++ b/cpp/src/arrow/testing/gtest_util.cc
@@ -20,13 +20,13 @@
 #include "arrow/testing/extension_type.h"
 
 #ifdef _WIN32
-#include <crtdbg.h>
-#include <io.h>
+#  include <crtdbg.h>
+#  include <io.h>
 #else
-#include <fcntl.h>     // IWYU pragma: keep
-#include <sys/stat.h>  // IWYU pragma: keep
-#include <sys/wait.h>  // IWYU pragma: keep
-#include <unistd.h>    // IWYU pragma: keep
+#  include <fcntl.h>     // IWYU pragma: keep
+#  include <sys/stat.h>  // IWYU pragma: keep
+#  include <sys/wait.h>  // IWYU pragma: keep
+#  include <unistd.h>    // IWYU pragma: keep
 #endif
 
 #include <algorithm>
diff --git a/cpp/src/arrow/testing/gtest_util.h b/cpp/src/arrow/testing/gtest_util.h
index 85b4c1f1f0138..90311464c283b 100644
--- a/cpp/src/arrow/testing/gtest_util.h
+++ b/cpp/src/arrow/testing/gtest_util.h
@@ -457,9 +457,9 @@ class ARROW_TESTING_EXPORT SignalHandlerGuard {
 };
 
 #ifndef ARROW_LARGE_MEMORY_TESTS
-#define LARGE_MEMORY_TEST(name) DISABLED_##name
+#  define LARGE_MEMORY_TEST(name) DISABLED_##name
 #else
-#define LARGE_MEMORY_TEST(name) name
+#  define LARGE_MEMORY_TEST(name) name
 #endif
 
 inline void PrintTo(const Status& st, std::ostream* os) { *os << st.ToString(); }
diff --git a/cpp/src/arrow/testing/process.cc b/cpp/src/arrow/testing/process.cc
new file mode 100644
index 0000000000000..941ddd9a6b603
--- /dev/null
+++ b/cpp/src/arrow/testing/process.cc
@@ -0,0 +1,298 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/testing/process.h"
+#include "arrow/result.h"
+
+// This boost/asio/io_context.hpp include is needless for no MinGW
+// build.
+//
+// This is for including boost/asio/detail/socket_types.hpp before any
+// "#include <windows.h>". boost/asio/detail/socket_types.hpp doesn't
+// work if windows.h is already included.
+#include <boost/asio/io_context.hpp>
+
+#ifdef BOOST_PROCESS_HAVE_V2
+// We can't use v2 API on Windows because v2 API doesn't support
+// process group [1] and GCS testbench uses multiple processes [2].
+//
+// [1] https://github.com/boostorg/process/issues/259
+// [2] https://github.com/googleapis/storage-testbench/issues/669
+#  ifndef _WIN32
+#    define BOOST_PROCESS_USE_V2
+#  endif
+#endif
+
+#ifdef BOOST_PROCESS_USE_V2
+#  ifdef BOOST_PROCESS_NEED_SOURCE
+// Workaround for https://github.com/boostorg/process/issues/312
+#    define BOOST_PROCESS_V2_SEPARATE_COMPILATION
+#    ifdef __APPLE__
+#      include <sys/sysctl.h>
+#    endif
+#    include <boost/process/v2.hpp>
+#    include <boost/process/v2/src.hpp>
+#  else
+#    include <boost/process/v2.hpp>
+#  endif
+#  include <unordered_map>
+#else
+// We need BOOST_USE_WINDOWS_H definition with MinGW when we use
+// boost/process.hpp. boost/process/detail/windows/handle_workaround.hpp
+// doesn't work without BOOST_USE_WINDOWS_H with MinGW because MinGW
+// doesn't provide __kernel_entry without winternl.h.
+//
+// See also:
+// https://github.com/boostorg/process/blob/develop/include/boost/process/detail/windows/handle_workaround.hpp
+#  ifdef __MINGW32__
+#    define BOOST_USE_WINDOWS_H = 1
+#  endif
+#  ifdef BOOST_PROCESS_HAVE_V1
+#    include <boost/process/v1.hpp>
+#  else
+#    include <boost/process.hpp>
+#  endif
+#endif
+
+#ifdef __APPLE__
+#  include <limits.h>
+#  include <mach-o/dyld.h>
+#endif
+
+#include <chrono>
+#include <iostream>
+#include <sstream>
+#include <thread>
+
+#ifdef BOOST_PROCESS_USE_V2
+namespace asio = BOOST_PROCESS_V2_ASIO_NAMESPACE;
+namespace process = BOOST_PROCESS_V2_NAMESPACE;
+namespace filesystem = process::filesystem;
+#elif defined(BOOST_PROCESS_HAVE_V1)
+namespace process = boost::process::v1;
+namespace filesystem = boost::process::v1::filesystem;
+#else
+namespace process = boost::process;
+namespace filesystem = boost::filesystem;
+#endif
+
+namespace arrow::util {
+
+class Process::Impl {
+ public:
+  Impl() {
+    // Get a copy of the current environment.
+#ifdef BOOST_PROCESS_USE_V2
+    for (const auto& kv : process::environment::current()) {
+      env_[kv.key()] = process::environment::value(kv.value());
+    }
+#else
+    env_ = process::environment(boost::this_process::environment());
+#endif
+  }
+
+  ~Impl() {
+#ifdef BOOST_PROCESS_USE_V2
+    // V2 doesn't provide process group support yet:
+    // https://github.com/boostorg/process/issues/259
+    //
+    // So we try graceful shutdown (SIGTERM + waitpid()) before
+    // immediate shutdown (SIGKILL). This assumes that the target
+    // executable such as "python3 -m testbench" terminates all related
+    // processes by graceful shutdown.
+    boost::system::error_code error_code;
+    if (process_ && process_->running(error_code)) {
+      process_->request_exit(error_code);
+      if (!error_code) {
+        auto timeout = std::chrono::seconds(3);
+        std::chrono::time_point<std::chrono::steady_clock> end =
+            std::chrono::steady_clock::now() + timeout;
+        while (process_->running(error_code) && std::chrono::steady_clock::now() < end) {
+          std::this_thread::sleep_for(std::chrono::milliseconds(20));
+        }
+      }
+    }
+#else
+    process_group_ = nullptr;
+#endif
+    process_ = nullptr;
+  }
+
+  Status SetExecutable(const std::string& name) {
+#ifdef BOOST_PROCESS_USE_V2
+    executable_ = process::environment::find_executable(name);
+#else
+    executable_ = process::search_path(name);
+#endif
+    if (executable_.empty()) {
+      // Search the current executable directory as fallback.
+      ARROW_ASSIGN_OR_RAISE(auto current_exe, ResolveCurrentExecutable());
+#ifdef BOOST_PROCESS_USE_V2
+      std::unordered_map<process::environment::key, process::environment::value> env;
+      for (const auto& kv : process::environment::current()) {
+        env[kv.key()] = process::environment::value(kv.value());
+      }
+      env["PATH"] = process::environment::value(current_exe.parent_path());
+      executable_ = process::environment::find_executable(name, env);
+#else
+      executable_ = process::search_path(name, {current_exe.parent_path()});
+#endif
+    }
+    if (executable_.empty()) {
+      return Status::IOError("Failed to find '", name, "' in PATH");
+    }
+    return Status::OK();
+  }
+
+  void SetArgs(const std::vector<std::string>& args) { args_ = args; }
+
+  void SetEnv(const std::string& name, const std::string& value) {
+#ifdef BOOST_PROCESS_USE_V2
+    env_[name] = process::environment::value(value);
+#else
+    env_[name] = value;
+#endif
+  }
+
+  void IgnoreStderr() { keep_stderr_ = false; }
+
+  Status Execute() {
+    try {
+#ifdef BOOST_PROCESS_USE_V2
+      return ExecuteV2();
+#else
+      return ExecuteV1();
+#endif
+    } catch (const std::exception& e) {
+      return Status::IOError("Failed to launch '", executable_, "': ", e.what());
+    }
+  }
+
+  bool IsRunning() {
+#ifdef BOOST_PROCESS_USE_V2
+    boost::system::error_code error_code;
+    return process_ && process_->running(error_code);
+#else
+    return process_ && process_->running();
+#endif
+  }
+
+  uint64_t pid() {
+    if (!process_) {
+      return 0;
+    }
+    return process_->id();
+  }
+
+ private:
+  filesystem::path executable_;
+  std::vector<std::string> args_;
+  bool keep_stderr_ = true;
+#ifdef BOOST_PROCESS_USE_V2
+  std::unordered_map<process::environment::key, process::environment::value> env_;
+  std::unique_ptr<process::process> process_;
+  asio::io_context ctx_;
+  // boost/process/v2/ doesn't support process group yet:
+  // https://github.com/boostorg/process/issues/259
+#else
+  process::environment env_;
+  std::unique_ptr<process::child> process_;
+  std::unique_ptr<process::group> process_group_;
+#endif
+
+  Result<filesystem::path> ResolveCurrentExecutable() {
+    // See https://stackoverflow.com/a/1024937/10194 for various
+    // platform-specific recipes.
+
+    filesystem::path path;
+    boost::system::error_code error_code;
+
+#if defined(__linux__)
+    path = filesystem::canonical("/proc/self/exe", error_code);
+#elif defined(__APPLE__)
+    char buf[PATH_MAX + 1];
+    uint32_t bufsize = sizeof(buf);
+    if (_NSGetExecutablePath(buf, &bufsize) < 0) {
+      return Status::Invalid("Can't resolve current exe: path too large");
+    }
+    path = filesystem::canonical(buf, error_code);
+#elif defined(_WIN32)
+    char buf[MAX_PATH + 1];
+    if (!GetModuleFileNameA(NULL, buf, sizeof(buf))) {
+      return Status::Invalid("Can't get executable file path");
+    }
+    path = filesystem::canonical(buf, error_code);
+#else
+    ARROW_UNUSED(error_code);
+    return Status::NotImplemented("Not available on this system");
+#endif
+    if (error_code) {
+      // XXX fold this into the Status class?
+      return Status::IOError("Can't resolve current exe: ", error_code.message());
+    } else {
+      return path;
+    }
+  }
+
+#ifdef BOOST_PROCESS_USE_V2
+  Status ExecuteV2() {
+    process::process_environment env(env_);
+    // We can't use std::make_unique<process::process>.
+    process_ = std::unique_ptr<process::process>(
+        new process::process(ctx_, executable_, args_, env,
+                             keep_stderr_ ? process::process_stdio{{}, {}, {}}
+                                          : process::process_stdio{{}, {}, nullptr}));
+    return Status::OK();
+  }
+#else
+  Status ExecuteV1() {
+    process_group_ = std::make_unique<process::group>();
+    if (keep_stderr_) {
+      process_ = std::make_unique<process::child>(executable_, process::args(args_), env_,
+                                                  *process_group_);
+    } else {
+      process_ = std::make_unique<process::child>(executable_, process::args(args_), env_,
+                                                  *process_group_,
+                                                  process::std_err > process::null);
+    }
+    return Status::OK();
+  }
+#endif
+};
+
+Process::Process() : impl_(new Impl()) {}
+
+Process::~Process() {}
+
+Status Process::SetExecutable(const std::string& path) {
+  return impl_->SetExecutable(path);
+}
+
+void Process::SetArgs(const std::vector<std::string>& args) { impl_->SetArgs(args); }
+
+void Process::SetEnv(const std::string& key, const std::string& value) {
+  impl_->SetEnv(key, value);
+}
+
+void Process::IgnoreStderr() { impl_->IgnoreStderr(); }
+
+Status Process::Execute() { return impl_->Execute(); }
+
+bool Process::IsRunning() { return impl_->IsRunning(); }
+
+uint64_t Process::pid() { return impl_->pid(); }
+}  // namespace arrow::util
diff --git a/cpp/src/arrow/testing/process.h b/cpp/src/arrow/testing/process.h
new file mode 100644
index 0000000000000..d4d2ae124f427
--- /dev/null
+++ b/cpp/src/arrow/testing/process.h
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/status.h"
+#include "arrow/testing/visibility.h"
+
+namespace arrow::util {
+
+class ARROW_TESTING_EXPORT Process {
+ public:
+  Process();
+  ~Process();
+
+  Status SetExecutable(const std::string& path);
+  void SetArgs(const std::vector<std::string>& args);
+  void SetEnv(const std::string& name, const std::string& value);
+  void IgnoreStderr();
+  Status Execute();
+  bool IsRunning();
+  uint64_t pid();
+
+ private:
+  class Impl;
+  std::unique_ptr<Impl> impl_;
+};
+}  // namespace arrow::util
diff --git a/cpp/src/arrow/testing/util.cc b/cpp/src/arrow/testing/util.cc
index 36351fa8595be..7bef9f7d4756d 100644
--- a/cpp/src/arrow/testing/util.cc
+++ b/cpp/src/arrow/testing/util.cc
@@ -30,13 +30,13 @@
 #include <winsock2.h>
 // clang-format on
 #else
-#include <arpa/inet.h>   // IWYU pragma: keep
-#include <netinet/in.h>  // IWYU pragma: keep
-#include <sys/socket.h>  // IWYU pragma: keep
-#include <sys/stat.h>    // IWYU pragma: keep
-#include <sys/types.h>   // IWYU pragma: keep
-#include <sys/wait.h>    // IWYU pragma: keep
-#include <unistd.h>      // IWYU pragma: keep
+#  include <arpa/inet.h>   // IWYU pragma: keep
+#  include <netinet/in.h>  // IWYU pragma: keep
+#  include <sys/socket.h>  // IWYU pragma: keep
+#  include <sys/stat.h>    // IWYU pragma: keep
+#  include <sys/types.h>   // IWYU pragma: keep
+#  include <sys/wait.h>    // IWYU pragma: keep
+#  include <unistd.h>      // IWYU pragma: keep
 #endif
 
 #include "arrow/config.h"
@@ -144,8 +144,8 @@ int GetListenPort() {
     return internal::WinErrorMessage(WSAGetLastError());
   };
 #else
-#define INVALID_SOCKET -1
-#define SOCKET_ERROR -1
+#  define INVALID_SOCKET -1
+#  define SOCKET_ERROR -1
   int sock_fd;
   auto sin_len = static_cast<socklen_t>(sizeof(sin));
   auto errno_message = []() -> std::string { return internal::ErrnoMessage(errno); };
diff --git a/cpp/src/arrow/testing/visibility.h b/cpp/src/arrow/testing/visibility.h
index 1b2aa7cd86fc6..b7fbcd42757bd 100644
--- a/cpp/src/arrow/testing/visibility.h
+++ b/cpp/src/arrow/testing/visibility.h
@@ -18,31 +18,31 @@
 #pragma once
 
 #if defined(_WIN32) || defined(__CYGWIN__)
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(push)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#ifdef ARROW_TESTING_STATIC
-#define ARROW_TESTING_EXPORT
-#elif defined(ARROW_TESTING_EXPORTING)
-#define ARROW_TESTING_EXPORT __declspec(dllexport)
-#else
-#define ARROW_TESTING_EXPORT __declspec(dllimport)
-#endif
+#  ifdef ARROW_TESTING_STATIC
+#    define ARROW_TESTING_EXPORT
+#  elif defined(ARROW_TESTING_EXPORTING)
+#    define ARROW_TESTING_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_TESTING_EXPORT __declspec(dllimport)
+#  endif
 
-#define ARROW_TESTING_NO_EXPORT
+#  define ARROW_TESTING_NO_EXPORT
 #else  // Not Windows
-#ifndef ARROW_TESTING_EXPORT
-#define ARROW_TESTING_EXPORT __attribute__((visibility("default")))
-#endif
-#ifndef ARROW_TESTING_NO_EXPORT
-#define ARROW_TESTING_NO_EXPORT __attribute__((visibility("hidden")))
-#endif
+#  ifndef ARROW_TESTING_EXPORT
+#    define ARROW_TESTING_EXPORT __attribute__((visibility("default")))
+#  endif
+#  ifndef ARROW_TESTING_NO_EXPORT
+#    define ARROW_TESTING_NO_EXPORT __attribute__((visibility("hidden")))
+#  endif
 #endif  // Non-Windows
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
diff --git a/cpp/src/arrow/type_benchmark.cc b/cpp/src/arrow/type_benchmark.cc
index 17dccfcb33138..0d1425a405709 100644
--- a/cpp/src/arrow/type_benchmark.cc
+++ b/cpp/src/arrow/type_benchmark.cc
@@ -174,12 +174,12 @@ static void SchemaEqualsWithMetadata(
 // Micro-benchmark various error reporting schemes
 
 #if (defined(__GNUC__) || defined(__APPLE__))
-#define ARROW_NO_INLINE __attribute__((noinline))
+#  define ARROW_NO_INLINE __attribute__((noinline))
 #elif defined(_MSC_VER)
-#define ARROW_NO_INLINE __declspec(noinline)
+#  define ARROW_NO_INLINE __declspec(noinline)
 #else
-#define ARROW_NO_INLINE
-#warning Missing "noinline" attribute, no-inline benchmarks may be bogus
+#  define ARROW_NO_INLINE
+#  warning Missing "noinline" attribute, no-inline benchmarks may be bogus
 #endif
 
 inline int64_t Accumulate(int64_t partial, int32_t value) {
diff --git a/cpp/src/arrow/util/atfork_internal.cc b/cpp/src/arrow/util/atfork_internal.cc
index eb26304fba36e..e89b37d83456e 100644
--- a/cpp/src/arrow/util/atfork_internal.cc
+++ b/cpp/src/arrow/util/atfork_internal.cc
@@ -23,7 +23,7 @@
 #include <vector>
 
 #ifndef _WIN32
-#include <pthread.h>
+#  include <pthread.h>
 #endif
 
 #include "arrow/util/io_util.h"
diff --git a/cpp/src/arrow/util/atfork_test.cc b/cpp/src/arrow/util/atfork_test.cc
index 750f4d138793b..97910f9539c0d 100644
--- a/cpp/src/arrow/util/atfork_test.cc
+++ b/cpp/src/arrow/util/atfork_test.cc
@@ -25,9 +25,9 @@
 #include <vector>
 
 #ifndef _WIN32
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <unistd.h>
+#  include <sys/types.h>
+#  include <sys/wait.h>
+#  include <unistd.h>
 #endif
 
 #include <gmock/gmock-matchers.h>
@@ -110,9 +110,9 @@ class TestAtFork : public ::testing::Test {
 #ifndef _WIN32
 
 TEST_F(TestAtFork, EmptyHandlers) {
-#ifndef ARROW_ENABLE_THREADING
+#  ifndef ARROW_ENABLE_THREADING
   GTEST_SKIP() << "Test requires threading support";
-#endif
+#  endif
 
   auto handlers = std::make_shared<AtForkHandler>();
 
@@ -135,9 +135,9 @@ TEST_F(TestAtFork, EmptyHandlers) {
 }
 
 TEST_F(TestAtFork, SingleThread) {
-#ifndef ARROW_ENABLE_THREADING
+#  ifndef ARROW_ENABLE_THREADING
   GTEST_SKIP() << "Test requires threading support";
-#endif
+#  endif
 
   auto handlers1 = std::make_shared<AtForkHandler>(PushBefore(1), PushParentAfter(11),
                                                    PushChildAfter(21));
@@ -190,16 +190,17 @@ TEST_F(TestAtFork, SingleThread) {
   ASSERT_THAT(child_after_, ElementsAre());
 }
 
-#if !(defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER))
+#  if !(defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) || \
+        defined(THREAD_SANITIZER))
 
 // The two following tests would seem to leak for various reasons.
 // Also, Thread Sanitizer would fail with the same error message as in
 // https://github.com/google/sanitizers/issues/950.
 
 TEST_F(TestAtFork, MultipleThreads) {
-#ifndef ARROW_ENABLE_THREADING
+#    ifndef ARROW_ENABLE_THREADING
   GTEST_SKIP() << "Test requires threading support";
-#endif
+#    endif
 
   const int kNumThreads = 5;
   const int kNumIterations = 40;
@@ -255,12 +256,12 @@ TEST_F(TestAtFork, MultipleThreads) {
 }
 
 TEST_F(TestAtFork, NestedChild) {
-#ifdef __APPLE__
+#    ifdef __APPLE__
   GTEST_SKIP() << "Nested fork is not supported on macOS";
-#endif
-#ifndef ARROW_ENABLE_THREADING
+#    endif
+#    ifndef ARROW_ENABLE_THREADING
   GTEST_SKIP() << "Test requires threading support";
-#endif
+#    endif
 
   auto handlers1 = std::make_shared<AtForkHandler>(PushBefore(1), PushParentAfter(11),
                                                    PushChildAfter(21));
@@ -295,16 +296,16 @@ TEST_F(TestAtFork, NestedChild) {
   ASSERT_THAT(child_after_, ElementsAre());
 }
 
-#endif  // !(defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) ||
-        //   defined(THREAD_SANITIZER))
+#  endif  // !(defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) ||
+          //   defined(THREAD_SANITIZER))
 
 #endif  // !defined(_WIN32)
 
 #ifdef _WIN32
 TEST_F(TestAtFork, NoOp) {
-#ifndef ARROW_ENABLE_THREADING
+#  ifndef ARROW_ENABLE_THREADING
   GTEST_SKIP() << "Test requires threading support";
-#endif
+#  endif
 
   auto handlers = std::make_shared<AtForkHandler>(PushBefore(1), PushParentAfter(11),
                                                   PushChildAfter(21));
diff --git a/cpp/src/arrow/util/bit_stream_utils_internal.h b/cpp/src/arrow/util/bit_stream_utils_internal.h
index 811694e43b76c..316086fcf0c04 100644
--- a/cpp/src/arrow/util/bit_stream_utils_internal.h
+++ b/cpp/src/arrow/util/bit_stream_utils_internal.h
@@ -269,13 +269,13 @@ template <typename T>
 inline void GetValue_(int num_bits, T* v, int max_bytes, const uint8_t* buffer,
                       int* bit_offset, int* byte_offset, uint64_t* buffered_values) {
 #ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4800)
+#  pragma warning(push)
+#  pragma warning(disable : 4800)
 #endif
   *v = static_cast<T>(bit_util::TrailingBits(*buffered_values, *bit_offset + num_bits) >>
                       *bit_offset);
 #ifdef _MSC_VER
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
   *bit_offset += num_bits;
   if (*bit_offset >= 64) {
@@ -285,8 +285,8 @@ inline void GetValue_(int num_bits, T* v, int max_bytes, const uint8_t* buffer,
     *buffered_values =
         detail::ReadLittleEndianWord(buffer + *byte_offset, max_bytes - *byte_offset);
 #ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4800 4805)
+#  pragma warning(push)
+#  pragma warning(disable : 4800 4805)
 #endif
     // Read bits of v that crossed into new buffered_values_
     if (ARROW_PREDICT_TRUE(num_bits - *bit_offset < static_cast<int>(8 * sizeof(T)))) {
@@ -297,7 +297,7 @@ inline void GetValue_(int num_bits, T* v, int max_bytes, const uint8_t* buffer,
                                << (num_bits - *bit_offset));
     }
 #ifdef _MSC_VER
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
     DCHECK_LE(*bit_offset, 64);
   }
@@ -367,12 +367,12 @@ inline int BitReader::GetBatch(int num_bits, T* v, int batch_size) {
       }
       for (int k = 0; k < num_unpacked; ++k) {
 #ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4800)
+#  pragma warning(push)
+#  pragma warning(disable : 4800)
 #endif
         v[i + k] = static_cast<T>(unpack_buffer[k]);
 #ifdef _MSC_VER
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
       }
       i += num_unpacked;
diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h
index 1d3a1dc2459f9..17d1de406d514 100644
--- a/cpp/src/arrow/util/bit_util.h
+++ b/cpp/src/arrow/util/bit_util.h
@@ -18,18 +18,18 @@
 #pragma once
 
 #if defined(_MSC_VER)
-#if defined(_M_AMD64) || defined(_M_X64)
-#include <intrin.h>  // IWYU pragma: keep
-#include <nmmintrin.h>
-#endif
-
-#pragma intrinsic(_BitScanReverse)
-#pragma intrinsic(_BitScanForward)
-#define ARROW_POPCOUNT64 __popcnt64
-#define ARROW_POPCOUNT32 __popcnt
+#  if defined(_M_AMD64) || defined(_M_X64)
+#    include <intrin.h>  // IWYU pragma: keep
+#    include <nmmintrin.h>
+#  endif
+
+#  pragma intrinsic(_BitScanReverse)
+#  pragma intrinsic(_BitScanForward)
+#  define ARROW_POPCOUNT64 __popcnt64
+#  define ARROW_POPCOUNT32 __popcnt
 #else
-#define ARROW_POPCOUNT64 __builtin_popcountll
-#define ARROW_POPCOUNT32 __builtin_popcount
+#  define ARROW_POPCOUNT64 __builtin_popcountll
+#  define ARROW_POPCOUNT32 __builtin_popcount
 #endif
 
 #include <cstdint>
diff --git a/cpp/src/arrow/util/bpacking.cc b/cpp/src/arrow/util/bpacking.cc
index b33eb92606be2..e0d61d8db01e1 100644
--- a/cpp/src/arrow/util/bpacking.cc
+++ b/cpp/src/arrow/util/bpacking.cc
@@ -24,13 +24,13 @@
 #include "arrow/util/logging.h"
 
 #if defined(ARROW_HAVE_RUNTIME_AVX2)
-#include "arrow/util/bpacking_avx2.h"
+#  include "arrow/util/bpacking_avx2.h"
 #endif
 #if defined(ARROW_HAVE_RUNTIME_AVX512)
-#include "arrow/util/bpacking_avx512.h"
+#  include "arrow/util/bpacking_avx512.h"
 #endif
 #if defined(ARROW_HAVE_NEON)
-#include "arrow/util/bpacking_neon.h"
+#  include "arrow/util/bpacking_neon.h"
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/util/byte_stream_split_internal.h b/cpp/src/arrow/util/byte_stream_split_internal.h
index 8bca0d442c681..d3214239ff9fb 100644
--- a/cpp/src/arrow/util/byte_stream_split_internal.h
+++ b/cpp/src/arrow/util/byte_stream_split_internal.h
@@ -29,8 +29,8 @@
 #include <cstring>
 
 #if defined(ARROW_HAVE_NEON) || defined(ARROW_HAVE_SSE4_2)
-#include <xsimd/xsimd.hpp>
-#define ARROW_HAVE_SIMD_SPLIT
+#  include <xsimd/xsimd.hpp>
+#  define ARROW_HAVE_SIMD_SPLIT
 #endif
 
 namespace arrow::util::internal {
@@ -383,28 +383,28 @@ void ByteStreamSplitEncodeAvx2(const uint8_t* raw_values, int width,
 template <int kNumStreams>
 void inline ByteStreamSplitDecodeSimd(const uint8_t* data, int width, int64_t num_values,
                                       int64_t stride, uint8_t* out) {
-#if defined(ARROW_HAVE_AVX2)
+#  if defined(ARROW_HAVE_AVX2)
   return ByteStreamSplitDecodeAvx2<kNumStreams>(data, width, num_values, stride, out);
-#elif defined(ARROW_HAVE_SSE4_2) || defined(ARROW_HAVE_NEON)
+#  elif defined(ARROW_HAVE_SSE4_2) || defined(ARROW_HAVE_NEON)
   return ByteStreamSplitDecodeSimd128<kNumStreams>(data, width, num_values, stride, out);
-#else
-#error "ByteStreamSplitDecodeSimd not implemented"
-#endif
+#  else
+#    error "ByteStreamSplitDecodeSimd not implemented"
+#  endif
 }
 
 template <int kNumStreams>
 void inline ByteStreamSplitEncodeSimd(const uint8_t* raw_values, int width,
                                       const int64_t num_values,
                                       uint8_t* output_buffer_raw) {
-#if defined(ARROW_HAVE_AVX2)
+#  if defined(ARROW_HAVE_AVX2)
   return ByteStreamSplitEncodeAvx2<kNumStreams>(raw_values, width, num_values,
                                                 output_buffer_raw);
-#elif defined(ARROW_HAVE_SSE4_2) || defined(ARROW_HAVE_NEON)
+#  elif defined(ARROW_HAVE_SSE4_2) || defined(ARROW_HAVE_NEON)
   return ByteStreamSplitEncodeSimd128<kNumStreams>(raw_values, width, num_values,
                                                    output_buffer_raw);
-#else
-#error "ByteStreamSplitEncodeSimd not implemented"
-#endif
+#  else
+#    error "ByteStreamSplitEncodeSimd not implemented"
+#  endif
 }
 #endif
 
@@ -546,9 +546,9 @@ inline void ByteStreamSplitDecodeScalarDynamic(const uint8_t* data, int width,
 inline void ByteStreamSplitEncode(const uint8_t* raw_values, int width,
                                   const int64_t num_values, uint8_t* out) {
 #if defined(ARROW_HAVE_SIMD_SPLIT)
-#define ByteStreamSplitEncodePerhapsSimd ByteStreamSplitEncodeSimd
+#  define ByteStreamSplitEncodePerhapsSimd ByteStreamSplitEncodeSimd
 #else
-#define ByteStreamSplitEncodePerhapsSimd ByteStreamSplitEncodeScalar
+#  define ByteStreamSplitEncodePerhapsSimd ByteStreamSplitEncodeScalar
 #endif
   switch (width) {
     case 1:
@@ -570,9 +570,9 @@ inline void ByteStreamSplitEncode(const uint8_t* raw_values, int width,
 inline void ByteStreamSplitDecode(const uint8_t* data, int width, int64_t num_values,
                                   int64_t stride, uint8_t* out) {
 #if defined(ARROW_HAVE_SIMD_SPLIT)
-#define ByteStreamSplitDecodePerhapsSimd ByteStreamSplitDecodeSimd
+#  define ByteStreamSplitDecodePerhapsSimd ByteStreamSplitDecodeSimd
 #else
-#define ByteStreamSplitDecodePerhapsSimd ByteStreamSplitDecodeScalar
+#  define ByteStreamSplitDecodePerhapsSimd ByteStreamSplitDecodeScalar
 #endif
   switch (width) {
     case 1:
diff --git a/cpp/src/arrow/util/byte_stream_split_test.cc b/cpp/src/arrow/util/byte_stream_split_test.cc
index 3a537725b0692..9755cd8b8d0f2 100644
--- a/cpp/src/arrow/util/byte_stream_split_test.cc
+++ b/cpp/src/arrow/util/byte_stream_split_test.cc
@@ -145,9 +145,9 @@ class TestByteStreamSplitSpecialized : public ::testing::Test {
     if constexpr (kSimdImplemented) {
       funcs.push_back({"simd", &ByteStreamSplitDecodeSimd<kWidth>});
       funcs.push_back({"simd128", &ByteStreamSplitDecodeSimd128<kWidth>});
-#if defined(ARROW_HAVE_AVX2)
+#  if defined(ARROW_HAVE_AVX2)
       funcs.push_back({"avx2", &ByteStreamSplitDecodeAvx2<kWidth>});
-#endif
+#  endif
     }
 #endif  // defined(ARROW_HAVE_SIMD_SPLIT)
     return funcs;
@@ -163,9 +163,9 @@ class TestByteStreamSplitSpecialized : public ::testing::Test {
     if constexpr (kSimdImplemented) {
       funcs.push_back({"simd", &ByteStreamSplitEncodeSimd<kWidth>});
       funcs.push_back({"simd128", &ByteStreamSplitEncodeSimd128<kWidth>});
-#if defined(ARROW_HAVE_AVX2)
+#  if defined(ARROW_HAVE_AVX2)
       funcs.push_back({"avx2", &ByteStreamSplitEncodeAvx2<kWidth>});
-#endif
+#  endif
     }
 #endif  // defined(ARROW_HAVE_SIMD_SPLIT)
     return funcs;
diff --git a/cpp/src/arrow/util/cancel.cc b/cpp/src/arrow/util/cancel.cc
index 2648059af81ee..b3a0c1f92c24e 100644
--- a/cpp/src/arrow/util/cancel.cc
+++ b/cpp/src/arrow/util/cancel.cc
@@ -33,7 +33,7 @@
 namespace arrow {
 
 #if ATOMIC_INT_LOCK_FREE != 2
-#error Lock-free atomic int required for signal safety
+#  error Lock-free atomic int required for signal safety
 #endif
 
 using internal::AtForkHandler;
diff --git a/cpp/src/arrow/util/cancel_test.cc b/cpp/src/arrow/util/cancel_test.cc
index 713418f15a0cc..6cea75755de10 100644
--- a/cpp/src/arrow/util/cancel_test.cc
+++ b/cpp/src/arrow/util/cancel_test.cc
@@ -29,9 +29,9 @@
 
 #include <signal.h>
 #ifndef _WIN32
-#include <sys/time.h>  // for setitimer()
-#include <sys/types.h>
-#include <unistd.h>
+#  include <sys/time.h>  // for setitimer()
+#  include <sys/types.h>
+#  include <unistd.h>
 #endif
 
 #include "arrow/testing/gtest_util.h"
@@ -269,9 +269,9 @@ TEST_F(SignalCancelTest, RegisterUnregister) {
 #if !(defined(_WIN32) || defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) || \
       defined(THREAD_SANITIZER))
 TEST_F(SignalCancelTest, ForkSafetyUnregisteredHandlers) {
-#ifndef ARROW_ENABLE_THREADING
+#  ifndef ARROW_ENABLE_THREADING
   GTEST_SKIP() << "Test requires threading support";
-#endif
+#  endif
 
   RunInChild([&]() {
     // Child
@@ -296,9 +296,9 @@ TEST_F(SignalCancelTest, ForkSafetyUnregisteredHandlers) {
 }
 
 TEST_F(SignalCancelTest, ForkSafetyRegisteredHandlers) {
-#ifndef ARROW_ENABLE_THREADING
+#  ifndef ARROW_ENABLE_THREADING
   GTEST_SKIP() << "Test requires threading support";
-#endif
+#  endif
 
   RegisterHandler();
 
diff --git a/cpp/src/arrow/util/compression_benchmark.cc b/cpp/src/arrow/util/compression_benchmark.cc
index 0b9727cff9041..361935805be79 100644
--- a/cpp/src/arrow/util/compression_benchmark.cc
+++ b/cpp/src/arrow/util/compression_benchmark.cc
@@ -228,28 +228,28 @@ static void ReferenceDecompression(
   state.SetBytesProcessed(state.iterations() * data.size());
 }
 
-#ifdef ARROW_WITH_ZLIB
+#  ifdef ARROW_WITH_ZLIB
 BENCHMARK_TEMPLATE(ReferenceStreamingCompression, Compression::GZIP);
 BENCHMARK_TEMPLATE(ReferenceCompression, Compression::GZIP);
 BENCHMARK_TEMPLATE(ReferenceStreamingDecompression, Compression::GZIP);
 BENCHMARK_TEMPLATE(ReferenceDecompression, Compression::GZIP);
-#endif
+#  endif
 
-#ifdef ARROW_WITH_BROTLI
+#  ifdef ARROW_WITH_BROTLI
 BENCHMARK_TEMPLATE(ReferenceStreamingCompression, Compression::BROTLI);
 BENCHMARK_TEMPLATE(ReferenceCompression, Compression::BROTLI);
 BENCHMARK_TEMPLATE(ReferenceStreamingDecompression, Compression::BROTLI);
 BENCHMARK_TEMPLATE(ReferenceDecompression, Compression::BROTLI);
-#endif
+#  endif
 
-#ifdef ARROW_WITH_ZSTD
+#  ifdef ARROW_WITH_ZSTD
 BENCHMARK_TEMPLATE(ReferenceStreamingCompression, Compression::ZSTD);
 BENCHMARK_TEMPLATE(ReferenceCompression, Compression::ZSTD);
 BENCHMARK_TEMPLATE(ReferenceStreamingDecompression, Compression::ZSTD);
 BENCHMARK_TEMPLATE(ReferenceDecompression, Compression::ZSTD);
-#endif
+#  endif
 
-#ifdef ARROW_WITH_LZ4
+#  ifdef ARROW_WITH_LZ4
 BENCHMARK_TEMPLATE(ReferenceStreamingCompression, Compression::LZ4_FRAME);
 BENCHMARK_TEMPLATE(ReferenceCompression, Compression::LZ4_FRAME);
 BENCHMARK_TEMPLATE(ReferenceStreamingDecompression, Compression::LZ4_FRAME);
@@ -257,12 +257,12 @@ BENCHMARK_TEMPLATE(ReferenceDecompression, Compression::LZ4_FRAME);
 
 BENCHMARK_TEMPLATE(ReferenceCompression, Compression::LZ4);
 BENCHMARK_TEMPLATE(ReferenceDecompression, Compression::LZ4);
-#endif
+#  endif
 
-#ifdef ARROW_WITH_SNAPPY
+#  ifdef ARROW_WITH_SNAPPY
 BENCHMARK_TEMPLATE(ReferenceCompression, Compression::SNAPPY);
 BENCHMARK_TEMPLATE(ReferenceDecompression, Compression::SNAPPY);
-#endif
+#  endif
 
 #endif
 
diff --git a/cpp/src/arrow/util/compression_lz4.cc b/cpp/src/arrow/util/compression_lz4.cc
index be957afab3c46..ae1d0961de600 100644
--- a/cpp/src/arrow/util/compression_lz4.cc
+++ b/cpp/src/arrow/util/compression_lz4.cc
@@ -34,7 +34,7 @@
 #include "arrow/util/ubsan.h"
 
 #ifndef LZ4F_HEADER_SIZE_MAX
-#define LZ4F_HEADER_SIZE_MAX 19
+#  define LZ4F_HEADER_SIZE_MAX 19
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/util/cpu_info.cc b/cpp/src/arrow/util/cpu_info.cc
index 7c2e9fa921246..620e520e72b39 100644
--- a/cpp/src/arrow/util/cpu_info.cc
+++ b/cpp/src/arrow/util/cpu_info.cc
@@ -20,17 +20,17 @@
 #include "arrow/util/cpu_info.h"
 
 #ifdef __APPLE__
-#include <sys/sysctl.h>
+#  include <sys/sysctl.h>
 #endif
 
 #ifndef _MSC_VER
-#include <unistd.h>
+#  include <unistd.h>
 #endif
 
 #ifdef _WIN32
-#include <intrin.h>
+#  include <intrin.h>
 
-#include "arrow/util/windows_compatibility.h"
+#  include "arrow/util/windows_compatibility.h"
 #endif
 
 #include <algorithm>
@@ -55,12 +55,12 @@
 #undef CPUINFO_ARCH_PPC
 
 #if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
-#define CPUINFO_ARCH_X86
+#  define CPUINFO_ARCH_X86
 #elif defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__)
-#define CPUINFO_ARCH_ARM
+#  define CPUINFO_ARCH_ARM
 #elif defined(__PPC64__) || defined(__PPC64LE__) || defined(__ppc64__) || \
     defined(__powerpc64__)
-#define CPUINFO_ARCH_PPC
+#  define CPUINFO_ARCH_PPC
 #endif
 
 namespace arrow {
@@ -122,10 +122,10 @@ void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cache_sizes) {
   free(buffer);
 }
 
-#if defined(CPUINFO_ARCH_X86)
+#  if defined(CPUINFO_ARCH_X86)
 // On x86, get CPU features by cpuid, https://en.wikipedia.org/wiki/CPUID
 
-#if defined(__MINGW64_VERSION_MAJOR) && __MINGW64_VERSION_MAJOR < 5
+#    if defined(__MINGW64_VERSION_MAJOR) && __MINGW64_VERSION_MAJOR < 5
 void __cpuidex(int CPUInfo[4], int function_id, int subfunction_id) {
   __asm__ __volatile__("cpuid"
                        : "=a"(CPUInfo[0]), "=b"(CPUInfo[1]), "=c"(CPUInfo[2]),
@@ -138,7 +138,7 @@ int64_t _xgetbv(int xcr) {
   __asm__ __volatile__("xgetbv" : "=a"(out) : "c"(xcr) : "%edx");
   return out;
 }
-#endif  // MINGW
+#    endif  // MINGW
 
 void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
                        std::string* model_name) {
@@ -215,14 +215,14 @@ void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
     }
   }
 }
-#elif defined(CPUINFO_ARCH_ARM)
+#  elif defined(CPUINFO_ARCH_ARM)
 // Windows on Arm
 void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
                        std::string* model_name) {
   *hardware_flags |= CpuInfo::ASIMD;
   // TODO: vendor, model_name
 }
-#endif
+#  endif
 
 #elif defined(__APPLE__)
 //------------------------------ MACOS ------------------------------//
@@ -265,7 +265,7 @@ void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
     int64_t flag;
   };
   std::vector<SysCtlCpuFeature> features = {
-#if defined(CPUINFO_ARCH_X86)
+#  if defined(CPUINFO_ARCH_X86)
     {"hw.optional.sse4_2",
      CpuInfo::SSSE3 | CpuInfo::SSE4_1 | CpuInfo::SSE4_2 | CpuInfo::POPCNT},
     {"hw.optional.avx1_0", CpuInfo::AVX},
@@ -277,10 +277,10 @@ void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
     {"hw.optional.avx512dq", CpuInfo::AVX512DQ},
     {"hw.optional.avx512bw", CpuInfo::AVX512BW},
     {"hw.optional.avx512vl", CpuInfo::AVX512VL},
-#elif defined(CPUINFO_ARCH_ARM)
+#  elif defined(CPUINFO_ARCH_ARM)
     // ARM64 (note that this is exposed under Rosetta as well)
     {"hw.optional.neon", CpuInfo::ASIMD},
-#endif
+#  endif
   };
   for (const auto& feature : features) {
     auto v = IntegerSysCtlByName(feature.name);
@@ -297,7 +297,7 @@ void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
 // Get cache size, return 0 on error
 int64_t LinuxGetCacheSize(int level) {
   // get cache size by sysconf()
-#ifdef _SC_LEVEL1_DCACHE_SIZE
+#  ifdef _SC_LEVEL1_DCACHE_SIZE
   const int kCacheSizeConf[] = {
       _SC_LEVEL1_DCACHE_SIZE,
       _SC_LEVEL2_CACHE_SIZE,
@@ -310,7 +310,7 @@ int64_t LinuxGetCacheSize(int level) {
   if (errno == 0 && cache_size > 0) {
     return cache_size;
   }
-#endif
+#  endif
 
   // get cache size from sysfs if sysconf() fails or not supported
   const char* kCacheSizeSysfs[] = {
@@ -345,12 +345,12 @@ int64_t LinuxGetCacheSize(int level) {
 // care about are present.
 // Returns a bitmap of flags.
 int64_t LinuxParseCpuFlags(const std::string& values) {
-#if defined(CPUINFO_ARCH_X86) || defined(CPUINFO_ARCH_ARM)
+#  if defined(CPUINFO_ARCH_X86) || defined(CPUINFO_ARCH_ARM)
   const struct {
     std::string name;
     int64_t flag;
   } flag_mappings[] = {
-#if defined(CPUINFO_ARCH_X86)
+#    if defined(CPUINFO_ARCH_X86)
     {"ssse3", CpuInfo::SSSE3},
     {"sse4_1", CpuInfo::SSE4_1},
     {"sse4_2", CpuInfo::SSE4_2},
@@ -364,9 +364,9 @@ int64_t LinuxParseCpuFlags(const std::string& values) {
     {"avx512bw", CpuInfo::AVX512BW},
     {"bmi1", CpuInfo::BMI1},
     {"bmi2", CpuInfo::BMI2},
-#elif defined(CPUINFO_ARCH_ARM)
+#    elif defined(CPUINFO_ARCH_ARM)
     {"asimd", CpuInfo::ASIMD},
-#endif
+#    endif
   };
   const int64_t num_flags = sizeof(flag_mappings) / sizeof(flag_mappings[0]);
 
@@ -377,9 +377,9 @@ int64_t LinuxParseCpuFlags(const std::string& values) {
     }
   }
   return flags;
-#else
+#  else
   return 0;
-#endif
+#  endif
 }
 
 void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cache_sizes) {
@@ -466,11 +466,11 @@ bool ArchParseUserSimdLevel(const std::string& simd_level, int64_t* hardware_fla
 }
 
 void ArchVerifyCpuRequirements(const CpuInfo* ci) {
-#if defined(ARROW_HAVE_SSE4_2)
+#  if defined(ARROW_HAVE_SSE4_2)
   if (!ci->IsDetected(CpuInfo::SSE4_2)) {
     DCHECK(false) << "CPU does not support the Supplemental SSE4_2 instruction set";
   }
-#endif
+#  endif
 }
 
 #elif defined(CPUINFO_ARCH_ARM)
diff --git a/cpp/src/arrow/util/decimal_internal.h b/cpp/src/arrow/util/decimal_internal.h
index 89f755af88316..b3a8b1127f918 100644
--- a/cpp/src/arrow/util/decimal_internal.h
+++ b/cpp/src/arrow/util/decimal_internal.h
@@ -189,11 +189,11 @@ constexpr BasicDecimal128 kDecimal128HalfPowersOfTen[] = {
     BasicDecimal128(2710505431213761085LL, 343699775700336640ULL)};
 
 #if ARROW_LITTLE_ENDIAN
-#define BasicDecimal256FromLE(v1, v2, v3, v4) \
-  BasicDecimal256(std::array<uint64_t, 4>{v1, v2, v3, v4})
+#  define BasicDecimal256FromLE(v1, v2, v3, v4) \
+    BasicDecimal256(std::array<uint64_t, 4>{v1, v2, v3, v4})
 #else
-#define BasicDecimal256FromLE(v1, v2, v3, v4) \
-  BasicDecimal256(std::array<uint64_t, 4>{v4, v3, v2, v1})
+#  define BasicDecimal256FromLE(v1, v2, v3, v4) \
+    BasicDecimal256(std::array<uint64_t, 4>{v4, v3, v2, v1})
 #endif
 
 constexpr BasicDecimal256 kDecimal256PowersOfTen[76 + 1] = {
diff --git a/cpp/src/arrow/util/endian.h b/cpp/src/arrow/util/endian.h
index 3d394ba8b7801..9c603144a7fd8 100644
--- a/cpp/src/arrow/util/endian.h
+++ b/cpp/src/arrow/util/endian.h
@@ -18,38 +18,38 @@
 #pragma once
 
 #ifdef _WIN32
-#define ARROW_LITTLE_ENDIAN 1
+#  define ARROW_LITTLE_ENDIAN 1
 #else
-#if defined(__APPLE__) || defined(__FreeBSD__)
-#include <machine/endian.h>  // IWYU pragma: keep
-#elif defined(sun) || defined(__sun)
-#include <sys/byteorder.h>  // IWYU pragma: keep
-#else
-#include <endian.h>  // IWYU pragma: keep
-#endif
+#  if defined(__APPLE__) || defined(__FreeBSD__)
+#    include <machine/endian.h>  // IWYU pragma: keep
+#  elif defined(sun) || defined(__sun)
+#    include <sys/byteorder.h>  // IWYU pragma: keep
+#  else
+#    include <endian.h>  // IWYU pragma: keep
+#  endif
 #
-#ifndef __BYTE_ORDER__
-#error "__BYTE_ORDER__ not defined"
-#endif
+#  ifndef __BYTE_ORDER__
+#    error "__BYTE_ORDER__ not defined"
+#  endif
 #
-#ifndef __ORDER_LITTLE_ENDIAN__
-#error "__ORDER_LITTLE_ENDIAN__ not defined"
-#endif
+#  ifndef __ORDER_LITTLE_ENDIAN__
+#    error "__ORDER_LITTLE_ENDIAN__ not defined"
+#  endif
 #
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-#define ARROW_LITTLE_ENDIAN 1
-#else
-#define ARROW_LITTLE_ENDIAN 0
-#endif
+#  if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#    define ARROW_LITTLE_ENDIAN 1
+#  else
+#    define ARROW_LITTLE_ENDIAN 0
+#  endif
 #endif
 
 #if defined(_MSC_VER)
-#include <intrin.h>  // IWYU pragma: keep
-#define ARROW_BYTE_SWAP64 _byteswap_uint64
-#define ARROW_BYTE_SWAP32 _byteswap_ulong
+#  include <intrin.h>  // IWYU pragma: keep
+#  define ARROW_BYTE_SWAP64 _byteswap_uint64
+#  define ARROW_BYTE_SWAP32 _byteswap_ulong
 #else
-#define ARROW_BYTE_SWAP64 __builtin_bswap64
-#define ARROW_BYTE_SWAP32 __builtin_bswap32
+#  define ARROW_BYTE_SWAP64 __builtin_bswap64
+#  define ARROW_BYTE_SWAP32 __builtin_bswap32
 #endif
 
 #include <algorithm>
diff --git a/cpp/src/arrow/util/hash_util.h b/cpp/src/arrow/util/hash_util.h
index dd1c38a78216e..7b3de2208935f 100644
--- a/cpp/src/arrow/util/hash_util.h
+++ b/cpp/src/arrow/util/hash_util.h
@@ -26,9 +26,9 @@ namespace internal {
 namespace detail {
 
 #if defined(_MSC_VER)
-#define ARROW_HASH_ROTL32(x, r) _rotl(x, r)
+#  define ARROW_HASH_ROTL32(x, r) _rotl(x, r)
 #else
-#define ARROW_HASH_ROTL32(x, r) (x << r) | (x >> (32 - r))
+#  define ARROW_HASH_ROTL32(x, r) (x << r) | (x >> (32 - r))
 #endif
 
 template <typename SizeT>
diff --git a/cpp/src/arrow/util/hashing.h b/cpp/src/arrow/util/hashing.h
index 2de9f4153248f..4ead1a7283d81 100644
--- a/cpp/src/arrow/util/hashing.h
+++ b/cpp/src/arrow/util/hashing.h
@@ -182,7 +182,7 @@ hash_t ComputeStringHash(const void* data, int64_t length) {
   }
 
 #if XXH3_SECRET_SIZE_MIN != 136
-#error XXH3_SECRET_SIZE_MIN changed, please fix kXxh3Secrets
+#  error XXH3_SECRET_SIZE_MIN changed, please fix kXxh3Secrets
 #endif
 
   // XXH3_64bits_withSeed generates a secret based on the seed, which is too slow.
diff --git a/cpp/src/arrow/util/int128_internal.h b/cpp/src/arrow/util/int128_internal.h
index 1d494671a9f8f..201e4a1349190 100644
--- a/cpp/src/arrow/util/int128_internal.h
+++ b/cpp/src/arrow/util/int128_internal.h
@@ -20,7 +20,7 @@
 #include "arrow/util/macros.h"
 
 #ifndef ARROW_USE_NATIVE_INT128
-#include <boost/multiprecision/cpp_int.hpp>
+#  include <boost/multiprecision/cpp_int.hpp>
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc
index 2eefe96f0d5c4..8c4d925dac541 100644
--- a/cpp/src/arrow/util/io_util.cc
+++ b/cpp/src/arrow/util/io_util.cc
@@ -17,7 +17,7 @@
 
 // Ensure 64-bit off_t for platforms where it matters
 #ifdef _FILE_OFFSET_BITS
-#undef _FILE_OFFSET_BITS
+#  undef _FILE_OFFSET_BITS
 #endif
 
 #define _FILE_OFFSET_BITS 64
@@ -27,8 +27,8 @@
 // is the best way to enable modern POSIX APIs, such as posix_madvise(), on Solaris.
 // (see also
 // https://github.com/illumos/illumos-gate/blob/master/usr/src/uts/common/sys/mman.h)
-#undef __EXTENSIONS__
-#define __EXTENSIONS__
+#  undef __EXTENSIONS__
+#  define __EXTENSIONS__
 #endif
 
 #include "arrow/util/windows_compatibility.h"  // IWYU pragma: keep
@@ -60,34 +60,34 @@
 // file compatibility stuff
 
 #ifdef _WIN32
-#include <direct.h>
-#include <io.h>
-#include <share.h>
+#  include <direct.h>
+#  include <io.h>
+#  include <share.h>
 #else  // POSIX-like platforms
-#include <dirent.h>
+#  include <dirent.h>
 #endif
 
 #ifdef _WIN32
-#include "arrow/io/mman.h"
-#undef Realloc
-#undef Free
+#  include "arrow/io/mman.h"
+#  undef Realloc
+#  undef Free
 #else  // POSIX-like platforms
-#include <sys/mman.h>
-#include <unistd.h>
+#  include <sys/mman.h>
+#  include <unistd.h>
 #endif
 
 // define max read/write count
 #ifdef _WIN32
-#define ARROW_MAX_IO_CHUNKSIZE INT32_MAX
+#  define ARROW_MAX_IO_CHUNKSIZE INT32_MAX
 #else
 
-#ifdef __APPLE__
+#  ifdef __APPLE__
 // due to macOS bug, we need to set read/write max
-#define ARROW_MAX_IO_CHUNKSIZE INT32_MAX
-#else
+#    define ARROW_MAX_IO_CHUNKSIZE INT32_MAX
+#  else
 // see notes on Linux read/write manpage
-#define ARROW_MAX_IO_CHUNKSIZE 0x7ffff000
-#endif
+#    define ARROW_MAX_IO_CHUNKSIZE 0x7ffff000
+#  endif
 
 #endif
 
@@ -102,25 +102,25 @@
 
 // For filename conversion
 #if defined(_WIN32)
-#include "arrow/util/utf8.h"
+#  include "arrow/util/utf8.h"
 #endif
 
 #ifdef _WIN32
-#include <psapi.h>
+#  include <psapi.h>
 
 #elif __APPLE__
-#include <mach/mach.h>
-#include <sys/sysctl.h>
+#  include <mach/mach.h>
+#  include <sys/sysctl.h>
 
 #elif __linux__
-#include <sys/sysinfo.h>
-#include <fstream>
+#  include <sys/sysinfo.h>
+#  include <fstream>
 #endif
 
 #ifdef _WIN32
-#include <Windows.h>
+#  include <Windows.h>
 #else
-#include <dlfcn.h>
+#  include <dlfcn.h>
 #endif
 
 namespace arrow::internal {
@@ -1223,11 +1223,11 @@ Status SetPipeFileDescriptorNonBlocking(int fd) {
 namespace {
 
 #ifdef WIN32
-#define PIPE_WRITE _write
-#define PIPE_READ _read
+#  define PIPE_WRITE _write
+#  define PIPE_READ _read
 #else
-#define PIPE_WRITE write
-#define PIPE_READ read
+#  define PIPE_WRITE write
+#  define PIPE_READ read
 #endif
 
 class SelfPipeImpl : public SelfPipe, public std::enable_shared_from_this<SelfPipeImpl> {
@@ -1500,7 +1500,7 @@ Status MemoryAdviseWillNeed(const std::vector<MemoryRegion>& regions) {
             region.size + static_cast<size_t>(addr - aligned_addr)};
   };
 
-#ifdef _WIN32
+#  ifdef _WIN32
   // PrefetchVirtualMemory() is available on Windows 8 or later
   struct PrefetchEntry {  // Like WIN32_MEMORY_RANGE_ENTRY
     void* VirtualAddress;
@@ -1528,7 +1528,7 @@ Status MemoryAdviseWillNeed(const std::vector<MemoryRegion>& regions) {
     }
   }
   return Status::OK();
-#elif defined(POSIX_MADV_WILLNEED)
+#  elif defined(POSIX_MADV_WILLNEED)
   for (const auto& region : regions) {
     if (region.size != 0) {
       const auto aligned = align_region(region);
@@ -1542,9 +1542,9 @@ Status MemoryAdviseWillNeed(const std::vector<MemoryRegion>& regions) {
     }
   }
   return Status::OK();
-#else
+#  else
   return Status::OK();
-#endif
+#  endif
 #else
   return Status::OK();
 #endif
@@ -1876,11 +1876,11 @@ std::vector<NativePathString> GetPlatformTemporaryDirs() {
 
 #else
   selectors = {{"TMPDIR", ""}, {"TMP", ""}, {"TEMP", ""}, {"TEMPDIR", ""}};
-#ifdef __ANDROID__
+#  ifdef __ANDROID__
   fallback_tmp = "/data/local/tmp";
-#else
+#  else
   fallback_tmp = "/tmp";
-#endif
+#  endif
 #endif
 
   std::vector<NativePathString> temp_dirs;
@@ -2157,7 +2157,7 @@ int64_t GetCurrentRSS() {
 
 #elif defined(__APPLE__)
 // OSX ------------------------------------------------------
-#ifdef MACH_TASK_BASIC_INFO
+#  ifdef MACH_TASK_BASIC_INFO
   struct mach_task_basic_info info;
   mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT;
   if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) !=
@@ -2165,7 +2165,7 @@ int64_t GetCurrentRSS() {
     ARROW_LOG(WARNING) << "Can't resolve RSS value";
     return 0;
   }
-#else
+#  else
   struct task_basic_info info;
   mach_msg_type_number_t infoCount = TASK_BASIC_INFO_COUNT;
   if (task_info(mach_task_self(), TASK_BASIC_INFO, (task_info_t)&info, &infoCount) !=
@@ -2173,7 +2173,7 @@ int64_t GetCurrentRSS() {
     ARROW_LOG(WARNING) << "Can't resolve RSS value";
     return 0;
   }
-#endif
+#  endif
   return static_cast<int64_t>(info.resident_size);
 
 #elif defined(__linux__)
diff --git a/cpp/src/arrow/util/io_util.h b/cpp/src/arrow/util/io_util.h
index 5f5bbd169e2eb..892641d4bc52f 100644
--- a/cpp/src/arrow/util/io_util.h
+++ b/cpp/src/arrow/util/io_util.h
@@ -18,7 +18,7 @@
 #pragma once
 
 #ifndef _WIN32
-#define ARROW_HAVE_SIGACTION 1
+#  define ARROW_HAVE_SIGACTION 1
 #endif
 
 #include <atomic>
@@ -29,7 +29,7 @@
 #include <vector>
 
 #if ARROW_HAVE_SIGACTION
-#include <csignal>  // Needed for struct sigaction
+#  include <csignal>  // Needed for struct sigaction
 #endif
 
 #include "arrow/result.h"
diff --git a/cpp/src/arrow/util/io_util_test.cc b/cpp/src/arrow/util/io_util_test.cc
index 73213bf9ce48a..1ff8fcf7adb5c 100644
--- a/cpp/src/arrow/util/io_util_test.cc
+++ b/cpp/src/arrow/util/io_util_test.cc
@@ -29,9 +29,9 @@
 #include <signal.h>
 
 #ifndef _WIN32
-#include <pthread.h>
-#include <sys/types.h>
-#include <unistd.h>
+#  include <pthread.h>
+#  include <sys/types.h>
+#  include <unistd.h>
 #endif
 
 #include <gmock/gmock-matchers.h>
@@ -48,11 +48,11 @@
 #include "arrow/util/windows_fixup.h"
 
 #ifdef WIN32
-#define PIPE_WRITE _write
-#define PIPE_READ _read
+#  define PIPE_WRITE _write
+#  define PIPE_READ _read
 #else
-#define PIPE_WRITE write
-#define PIPE_READ read
+#  define PIPE_WRITE write
+#  define PIPE_READ read
 #endif
 
 namespace arrow {
@@ -474,9 +474,9 @@ TEST_F(TestSelfPipe, SendFromSignalAndWait) {
 #if !(defined(_WIN32) || defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) || \
       defined(THREAD_SANITIZER))
 TEST_F(TestSelfPipe, ForkSafety) {
-#ifndef ARROW_ENABLE_THREADING
+#  ifndef ARROW_ENABLE_THREADING
   GTEST_SKIP() << "Test requires threading support";
-#endif
+#  endif
 
   self_pipe_->Send(123456789123456789ULL);
 
@@ -974,7 +974,7 @@ TEST(DeleteFile, Basics) {
 TEST(FileUtils, LongPaths) {
   // ARROW-8477: check using long file paths under Windows (> 260 characters).
   bool created, deleted;
-#ifdef _WIN32
+#  ifdef _WIN32
   const char* kRegKeyName = R"(SYSTEM\CurrentControlSet\Control\FileSystem)";
   const char* kRegValueName = "LongPathsEnabled";
   DWORD value = 0;
@@ -990,7 +990,7 @@ TEST(FileUtils, LongPaths) {
         << " to 1 on the test host.";
     return;
   }
-#endif
+#  endif
 
   const std::string BASE = "xxx-io-util-test-dir-long";
   PlatformFilename base_path, long_path, long_filename;
diff --git a/cpp/src/arrow/util/logger.h b/cpp/src/arrow/util/logger.h
index 5200503bb4fdb..7832f4a4c2232 100644
--- a/cpp/src/arrow/util/logger.h
+++ b/cpp/src/arrow/util/logger.h
@@ -128,7 +128,7 @@ class ARROW_EXPORT LogMessage {
 // For the following macros, log statements with a lower severity than
 // `ARROW_MINIMUM_LOG_LEVEL` will be stripped from the build
 #ifndef ARROW_MINIMUM_LOG_LEVEL
-#define ARROW_MINIMUM_LOG_LEVEL -1000
+#  define ARROW_MINIMUM_LOG_LEVEL -1000
 #endif
 
 #define ARROW_LOGGER_INTERNAL(LOGGER, LEVEL)                                      \
@@ -137,50 +137,50 @@ class ARROW_EXPORT LogMessage {
 
 static_assert(static_cast<int>(::arrow::util::ArrowLogLevel::ARROW_TRACE) == -2);
 #if ARROW_MINIMUM_LOG_LEVEL <= -2
-#define ARROW_LOGGER_TRACE(LOGGER, ...) \
-  (ARROW_LOGGER_INTERNAL(LOGGER, TRACE).Append(__VA_ARGS__))
+#  define ARROW_LOGGER_TRACE(LOGGER, ...) \
+    (ARROW_LOGGER_INTERNAL(LOGGER, TRACE).Append(__VA_ARGS__))
 #else
-#define ARROW_LOGGER_TRACE(...) ARROW_UNUSED(0)
+#  define ARROW_LOGGER_TRACE(...) ARROW_UNUSED(0)
 #endif
 
 static_assert(static_cast<int>(::arrow::util::ArrowLogLevel::ARROW_DEBUG) == -1);
 #if ARROW_MINIMUM_LOG_LEVEL <= -1
-#define ARROW_LOGGER_DEBUG(LOGGER, ...) \
-  (ARROW_LOGGER_INTERNAL(LOGGER, DEBUG).Append(__VA_ARGS__))
+#  define ARROW_LOGGER_DEBUG(LOGGER, ...) \
+    (ARROW_LOGGER_INTERNAL(LOGGER, DEBUG).Append(__VA_ARGS__))
 #else
-#define ARROW_LOGGER_DEBUG(...) ARROW_UNUSED(0)
+#  define ARROW_LOGGER_DEBUG(...) ARROW_UNUSED(0)
 #endif
 
 static_assert(static_cast<int>(::arrow::util::ArrowLogLevel::ARROW_INFO) == 0);
 #if ARROW_MINIMUM_LOG_LEVEL <= 0
-#define ARROW_LOGGER_INFO(LOGGER, ...) \
-  (ARROW_LOGGER_INTERNAL(LOGGER, INFO).Append(__VA_ARGS__))
+#  define ARROW_LOGGER_INFO(LOGGER, ...) \
+    (ARROW_LOGGER_INTERNAL(LOGGER, INFO).Append(__VA_ARGS__))
 #else
-#define ARROW_LOGGER_INFO(...) ARROW_UNUSED(0)
+#  define ARROW_LOGGER_INFO(...) ARROW_UNUSED(0)
 #endif
 
 static_assert(static_cast<int>(::arrow::util::ArrowLogLevel::ARROW_WARNING) == 1);
 #if ARROW_MINIMUM_LOG_LEVEL <= 1
-#define ARROW_LOGGER_WARNING(LOGGER, ...) \
-  (ARROW_LOGGER_INTERNAL(LOGGER, WARNING).Append(__VA_ARGS__))
+#  define ARROW_LOGGER_WARNING(LOGGER, ...) \
+    (ARROW_LOGGER_INTERNAL(LOGGER, WARNING).Append(__VA_ARGS__))
 #else
-#define ARROW_LOGGER_WARNING(...) ARROW_UNUSED(0)
+#  define ARROW_LOGGER_WARNING(...) ARROW_UNUSED(0)
 #endif
 
 static_assert(static_cast<int>(::arrow::util::ArrowLogLevel::ARROW_ERROR) == 2);
 #if ARROW_MINIMUM_LOG_LEVEL <= 2
-#define ARROW_LOGGER_ERROR(LOGGER, ...) \
-  (ARROW_LOGGER_INTERNAL(LOGGER, ERROR).Append(__VA_ARGS__))
+#  define ARROW_LOGGER_ERROR(LOGGER, ...) \
+    (ARROW_LOGGER_INTERNAL(LOGGER, ERROR).Append(__VA_ARGS__))
 #else
-#define ARROW_LOGGER_ERROR(...) ARROW_UNUSED(0)
+#  define ARROW_LOGGER_ERROR(...) ARROW_UNUSED(0)
 #endif
 
 static_assert(static_cast<int>(::arrow::util::ArrowLogLevel::ARROW_FATAL) == 3);
 #if ARROW_MINIMUM_LOG_LEVEL <= 3
-#define ARROW_LOGGER_FATAL(LOGGER, ...) \
-  (ARROW_LOGGER_INTERNAL(LOGGER, FATAL).Append(__VA_ARGS__))
+#  define ARROW_LOGGER_FATAL(LOGGER, ...) \
+    (ARROW_LOGGER_INTERNAL(LOGGER, FATAL).Append(__VA_ARGS__))
 #else
-#define ARROW_LOGGER_FATAL(...) ARROW_UNUSED(0)
+#  define ARROW_LOGGER_FATAL(...) ARROW_UNUSED(0)
 #endif
 
 #define ARROW_LOGGER_CALL(LOGGER, LEVEL, ...) ARROW_LOGGER_##LEVEL(LOGGER, __VA_ARGS__)
diff --git a/cpp/src/arrow/util/logging.cc b/cpp/src/arrow/util/logging.cc
index ca4edcc5a5deb..993c5306ca4aa 100644
--- a/cpp/src/arrow/util/logging.cc
+++ b/cpp/src/arrow/util/logging.cc
@@ -20,36 +20,36 @@
 #include "arrow/util/config.h"
 
 #ifdef ARROW_WITH_BACKTRACE
-#include <execinfo.h>
+#  include <execinfo.h>
 #endif
 #include <cstdlib>
 #include <iostream>
 
 #ifdef ARROW_USE_GLOG
 
-#include <signal.h>
-#include <vector>
+#  include <signal.h>
+#  include <vector>
 
-#include <glog/logging.h>
+#  include <glog/logging.h>
 
 // Restore our versions of DCHECK and friends, as GLog defines its own
-#undef DCHECK
-#undef DCHECK_OK
-#undef DCHECK_EQ
-#undef DCHECK_NE
-#undef DCHECK_LE
-#undef DCHECK_LT
-#undef DCHECK_GE
-#undef DCHECK_GT
-
-#define DCHECK ARROW_DCHECK
-#define DCHECK_OK ARROW_DCHECK_OK
-#define DCHECK_EQ ARROW_DCHECK_EQ
-#define DCHECK_NE ARROW_DCHECK_NE
-#define DCHECK_LE ARROW_DCHECK_LE
-#define DCHECK_LT ARROW_DCHECK_LT
-#define DCHECK_GE ARROW_DCHECK_GE
-#define DCHECK_GT ARROW_DCHECK_GT
+#  undef DCHECK
+#  undef DCHECK_OK
+#  undef DCHECK_EQ
+#  undef DCHECK_NE
+#  undef DCHECK_LE
+#  undef DCHECK_LT
+#  undef DCHECK_GE
+#  undef DCHECK_GT
+
+#  define DCHECK ARROW_DCHECK
+#  define DCHECK_OK ARROW_DCHECK_OK
+#  define DCHECK_EQ ARROW_DCHECK_EQ
+#  define DCHECK_NE ARROW_DCHECK_NE
+#  define DCHECK_LE ARROW_DCHECK_LE
+#  define DCHECK_LT ARROW_DCHECK_LT
+#  define DCHECK_GE ARROW_DCHECK_GE
+#  define DCHECK_GT ARROW_DCHECK_GT
 
 #endif
 
@@ -188,11 +188,11 @@ void ArrowLog::UninstallSignalAction() {
   // This signal list comes from glog's signalhandler.cc.
   // https://github.com/google/glog/blob/master/src/signalhandler.cc#L58-L70
   std::vector<int> installed_signals({SIGSEGV, SIGILL, SIGFPE, SIGABRT, SIGTERM});
-#ifdef WIN32
+#  ifdef WIN32
   for (int signal_num : installed_signals) {
     ARROW_CHECK(signal(signal_num, SIG_DFL) != SIG_ERR);
   }
-#else
+#  else
   struct sigaction sig_action;
   memset(&sig_action, 0, sizeof(sig_action));
   sigemptyset(&sig_action.sa_mask);
@@ -200,7 +200,7 @@ void ArrowLog::UninstallSignalAction() {
   for (int signal_num : installed_signals) {
     ARROW_CHECK(sigaction(signal_num, &sig_action, NULL) == 0);
   }
-#endif
+#  endif
 #endif
 }
 
diff --git a/cpp/src/arrow/util/logging.h b/cpp/src/arrow/util/logging.h
index be73c020c07f8..04c6bc21cac73 100644
--- a/cpp/src/arrow/util/logging.h
+++ b/cpp/src/arrow/util/logging.h
@@ -22,25 +22,25 @@
 // The LLVM IR code doesn't have an NDEBUG mode. And, it shouldn't include references to
 // streams or stdc++. So, making the DCHECK calls void in that case.
 
-#define ARROW_IGNORE_EXPR(expr) ((void)(expr))
+#  define ARROW_IGNORE_EXPR(expr) ((void)(expr))
 
-#define DCHECK(condition) ARROW_IGNORE_EXPR(condition)
-#define DCHECK_OK(status) ARROW_IGNORE_EXPR(status)
-#define DCHECK_EQ(val1, val2) ARROW_IGNORE_EXPR(val1)
-#define DCHECK_NE(val1, val2) ARROW_IGNORE_EXPR(val1)
-#define DCHECK_LE(val1, val2) ARROW_IGNORE_EXPR(val1)
-#define DCHECK_LT(val1, val2) ARROW_IGNORE_EXPR(val1)
-#define DCHECK_GE(val1, val2) ARROW_IGNORE_EXPR(val1)
-#define DCHECK_GT(val1, val2) ARROW_IGNORE_EXPR(val1)
+#  define DCHECK(condition) ARROW_IGNORE_EXPR(condition)
+#  define DCHECK_OK(status) ARROW_IGNORE_EXPR(status)
+#  define DCHECK_EQ(val1, val2) ARROW_IGNORE_EXPR(val1)
+#  define DCHECK_NE(val1, val2) ARROW_IGNORE_EXPR(val1)
+#  define DCHECK_LE(val1, val2) ARROW_IGNORE_EXPR(val1)
+#  define DCHECK_LT(val1, val2) ARROW_IGNORE_EXPR(val1)
+#  define DCHECK_GE(val1, val2) ARROW_IGNORE_EXPR(val1)
+#  define DCHECK_GT(val1, val2) ARROW_IGNORE_EXPR(val1)
 
 #else  // !GANDIVA_IR
 
-#include <memory>
-#include <ostream>
-#include <string>
+#  include <memory>
+#  include <ostream>
+#  include <string>
 
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
+#  include "arrow/util/macros.h"
+#  include "arrow/util/visibility.h"
 
 namespace arrow {
 namespace util {
@@ -54,115 +54,115 @@ enum class ArrowLogLevel : int {
   ARROW_FATAL = 3
 };
 
-#define ARROW_LOG_INTERNAL(level) ::arrow::util::ArrowLog(__FILE__, __LINE__, level)
-#define ARROW_LOG(level) ARROW_LOG_INTERNAL(::arrow::util::ArrowLogLevel::ARROW_##level)
+#  define ARROW_LOG_INTERNAL(level) ::arrow::util::ArrowLog(__FILE__, __LINE__, level)
+#  define ARROW_LOG(level) ARROW_LOG_INTERNAL(::arrow::util::ArrowLogLevel::ARROW_##level)
 
-#define ARROW_IGNORE_EXPR(expr) ((void)(expr))
+#  define ARROW_IGNORE_EXPR(expr) ((void)(expr))
 
-#define ARROW_CHECK_OR_LOG(condition, level) \
-  ARROW_PREDICT_TRUE(condition)              \
-  ? ARROW_IGNORE_EXPR(0)                     \
-  : ::arrow::util::Voidify() & ARROW_LOG(level) << " Check failed: " #condition " "
+#  define ARROW_CHECK_OR_LOG(condition, level) \
+    ARROW_PREDICT_TRUE(condition)              \
+    ? ARROW_IGNORE_EXPR(0)                     \
+    : ::arrow::util::Voidify() & ARROW_LOG(level) << " Check failed: " #condition " "
 
-#define ARROW_CHECK(condition) ARROW_CHECK_OR_LOG(condition, FATAL)
+#  define ARROW_CHECK(condition) ARROW_CHECK_OR_LOG(condition, FATAL)
 
 // If 'to_call' returns a bad status, CHECK immediately with a logged message
 // of 'msg' followed by the status.
-#define ARROW_CHECK_OK_PREPEND(to_call, msg, level)                 \
-  do {                                                              \
-    ::arrow::Status _s = (to_call);                                 \
-    ARROW_CHECK_OR_LOG(_s.ok(), level)                              \
-        << "Operation failed: " << ARROW_STRINGIFY(to_call) << "\n" \
-        << (msg) << ": " << _s.ToString();                          \
-  } while (false)
+#  define ARROW_CHECK_OK_PREPEND(to_call, msg, level)                 \
+    do {                                                              \
+      ::arrow::Status _s = (to_call);                                 \
+      ARROW_CHECK_OR_LOG(_s.ok(), level)                              \
+          << "Operation failed: " << ARROW_STRINGIFY(to_call) << "\n" \
+          << (msg) << ": " << _s.ToString();                          \
+    } while (false)
 
 // If the status is bad, CHECK immediately, appending the status to the
 // logged message.
-#define ARROW_CHECK_OK(s) ARROW_CHECK_OK_PREPEND(s, "Bad status", FATAL)
+#  define ARROW_CHECK_OK(s) ARROW_CHECK_OK_PREPEND(s, "Bad status", FATAL)
 
-#define ARROW_CHECK_EQ(val1, val2) ARROW_CHECK((val1) == (val2))
-#define ARROW_CHECK_NE(val1, val2) ARROW_CHECK((val1) != (val2))
-#define ARROW_CHECK_LE(val1, val2) ARROW_CHECK((val1) <= (val2))
-#define ARROW_CHECK_LT(val1, val2) ARROW_CHECK((val1) < (val2))
-#define ARROW_CHECK_GE(val1, val2) ARROW_CHECK((val1) >= (val2))
-#define ARROW_CHECK_GT(val1, val2) ARROW_CHECK((val1) > (val2))
+#  define ARROW_CHECK_EQ(val1, val2) ARROW_CHECK((val1) == (val2))
+#  define ARROW_CHECK_NE(val1, val2) ARROW_CHECK((val1) != (val2))
+#  define ARROW_CHECK_LE(val1, val2) ARROW_CHECK((val1) <= (val2))
+#  define ARROW_CHECK_LT(val1, val2) ARROW_CHECK((val1) < (val2))
+#  define ARROW_CHECK_GE(val1, val2) ARROW_CHECK((val1) >= (val2))
+#  define ARROW_CHECK_GT(val1, val2) ARROW_CHECK((val1) > (val2))
 
-#ifdef NDEBUG
-#define ARROW_DFATAL ::arrow::util::ArrowLogLevel::ARROW_WARNING
+#  ifdef NDEBUG
+#    define ARROW_DFATAL ::arrow::util::ArrowLogLevel::ARROW_WARNING
 
 // CAUTION: DCHECK_OK() always evaluates its argument, but other DCHECK*() macros
 // only do so in debug mode.
 
-#define ARROW_DCHECK(condition)               \
-  while (false) ARROW_IGNORE_EXPR(condition); \
-  while (false) ::arrow::util::detail::NullLog()
-#define ARROW_DCHECK_OK(s) \
-  ARROW_IGNORE_EXPR(s);    \
-  while (false) ::arrow::util::detail::NullLog()
-#define ARROW_DCHECK_EQ(val1, val2)      \
-  while (false) ARROW_IGNORE_EXPR(val1); \
-  while (false) ARROW_IGNORE_EXPR(val2); \
-  while (false) ::arrow::util::detail::NullLog()
-#define ARROW_DCHECK_NE(val1, val2)      \
-  while (false) ARROW_IGNORE_EXPR(val1); \
-  while (false) ARROW_IGNORE_EXPR(val2); \
-  while (false) ::arrow::util::detail::NullLog()
-#define ARROW_DCHECK_LE(val1, val2)      \
-  while (false) ARROW_IGNORE_EXPR(val1); \
-  while (false) ARROW_IGNORE_EXPR(val2); \
-  while (false) ::arrow::util::detail::NullLog()
-#define ARROW_DCHECK_LT(val1, val2)      \
-  while (false) ARROW_IGNORE_EXPR(val1); \
-  while (false) ARROW_IGNORE_EXPR(val2); \
-  while (false) ::arrow::util::detail::NullLog()
-#define ARROW_DCHECK_GE(val1, val2)      \
-  while (false) ARROW_IGNORE_EXPR(val1); \
-  while (false) ARROW_IGNORE_EXPR(val2); \
-  while (false) ::arrow::util::detail::NullLog()
-#define ARROW_DCHECK_GT(val1, val2)      \
-  while (false) ARROW_IGNORE_EXPR(val1); \
-  while (false) ARROW_IGNORE_EXPR(val2); \
-  while (false) ::arrow::util::detail::NullLog()
-
-#else
-#define ARROW_DFATAL ::arrow::util::ArrowLogLevel::ARROW_FATAL
-
-#define ARROW_DCHECK ARROW_CHECK
-#define ARROW_DCHECK_OK ARROW_CHECK_OK
-#define ARROW_DCHECK_EQ ARROW_CHECK_EQ
-#define ARROW_DCHECK_NE ARROW_CHECK_NE
-#define ARROW_DCHECK_LE ARROW_CHECK_LE
-#define ARROW_DCHECK_LT ARROW_CHECK_LT
-#define ARROW_DCHECK_GE ARROW_CHECK_GE
-#define ARROW_DCHECK_GT ARROW_CHECK_GT
-
-#endif  // NDEBUG
+#    define ARROW_DCHECK(condition)               \
+      while (false) ARROW_IGNORE_EXPR(condition); \
+      while (false) ::arrow::util::detail::NullLog()
+#    define ARROW_DCHECK_OK(s) \
+      ARROW_IGNORE_EXPR(s);    \
+      while (false) ::arrow::util::detail::NullLog()
+#    define ARROW_DCHECK_EQ(val1, val2)      \
+      while (false) ARROW_IGNORE_EXPR(val1); \
+      while (false) ARROW_IGNORE_EXPR(val2); \
+      while (false) ::arrow::util::detail::NullLog()
+#    define ARROW_DCHECK_NE(val1, val2)      \
+      while (false) ARROW_IGNORE_EXPR(val1); \
+      while (false) ARROW_IGNORE_EXPR(val2); \
+      while (false) ::arrow::util::detail::NullLog()
+#    define ARROW_DCHECK_LE(val1, val2)      \
+      while (false) ARROW_IGNORE_EXPR(val1); \
+      while (false) ARROW_IGNORE_EXPR(val2); \
+      while (false) ::arrow::util::detail::NullLog()
+#    define ARROW_DCHECK_LT(val1, val2)      \
+      while (false) ARROW_IGNORE_EXPR(val1); \
+      while (false) ARROW_IGNORE_EXPR(val2); \
+      while (false) ::arrow::util::detail::NullLog()
+#    define ARROW_DCHECK_GE(val1, val2)      \
+      while (false) ARROW_IGNORE_EXPR(val1); \
+      while (false) ARROW_IGNORE_EXPR(val2); \
+      while (false) ::arrow::util::detail::NullLog()
+#    define ARROW_DCHECK_GT(val1, val2)      \
+      while (false) ARROW_IGNORE_EXPR(val1); \
+      while (false) ARROW_IGNORE_EXPR(val2); \
+      while (false) ::arrow::util::detail::NullLog()
+
+#  else
+#    define ARROW_DFATAL ::arrow::util::ArrowLogLevel::ARROW_FATAL
+
+#    define ARROW_DCHECK ARROW_CHECK
+#    define ARROW_DCHECK_OK ARROW_CHECK_OK
+#    define ARROW_DCHECK_EQ ARROW_CHECK_EQ
+#    define ARROW_DCHECK_NE ARROW_CHECK_NE
+#    define ARROW_DCHECK_LE ARROW_CHECK_LE
+#    define ARROW_DCHECK_LT ARROW_CHECK_LT
+#    define ARROW_DCHECK_GE ARROW_CHECK_GE
+#    define ARROW_DCHECK_GT ARROW_CHECK_GT
+
+#  endif  // NDEBUG
 
 // These are internal-use macros and should not be used in public headers.
-#ifndef DCHECK
-#define DCHECK ARROW_DCHECK
-#endif
-#ifndef DCHECK_OK
-#define DCHECK_OK ARROW_DCHECK_OK
-#endif
-#ifndef DCHECK_EQ
-#define DCHECK_EQ ARROW_DCHECK_EQ
-#endif
-#ifndef DCHECK_NE
-#define DCHECK_NE ARROW_DCHECK_NE
-#endif
-#ifndef DCHECK_LE
-#define DCHECK_LE ARROW_DCHECK_LE
-#endif
-#ifndef DCHECK_LT
-#define DCHECK_LT ARROW_DCHECK_LT
-#endif
-#ifndef DCHECK_GE
-#define DCHECK_GE ARROW_DCHECK_GE
-#endif
-#ifndef DCHECK_GT
-#define DCHECK_GT ARROW_DCHECK_GT
-#endif
+#  ifndef DCHECK
+#    define DCHECK ARROW_DCHECK
+#  endif
+#  ifndef DCHECK_OK
+#    define DCHECK_OK ARROW_DCHECK_OK
+#  endif
+#  ifndef DCHECK_EQ
+#    define DCHECK_EQ ARROW_DCHECK_EQ
+#  endif
+#  ifndef DCHECK_NE
+#    define DCHECK_NE ARROW_DCHECK_NE
+#  endif
+#  ifndef DCHECK_LE
+#    define DCHECK_LE ARROW_DCHECK_LE
+#  endif
+#  ifndef DCHECK_LT
+#    define DCHECK_LT ARROW_DCHECK_LT
+#  endif
+#  ifndef DCHECK_GE
+#    define DCHECK_GE ARROW_DCHECK_GE
+#  endif
+#  ifndef DCHECK_GT
+#    define DCHECK_GT ARROW_DCHECK_GT
+#  endif
 
 // This code is adapted from
 // https://github.com/ray-project/ray/blob/master/src/ray/util/logging.h.
diff --git a/cpp/src/arrow/util/macros.h b/cpp/src/arrow/util/macros.h
index 484df3400d92d..5658874b42b6c 100644
--- a/cpp/src/arrow/util/macros.h
+++ b/cpp/src/arrow/util/macros.h
@@ -25,15 +25,15 @@
 
 // From Google gutil
 #ifndef ARROW_DISALLOW_COPY_AND_ASSIGN
-#define ARROW_DISALLOW_COPY_AND_ASSIGN(TypeName) \
-  TypeName(const TypeName&) = delete;            \
-  void operator=(const TypeName&) = delete
+#  define ARROW_DISALLOW_COPY_AND_ASSIGN(TypeName) \
+    TypeName(const TypeName&) = delete;            \
+    void operator=(const TypeName&) = delete
 #endif
 
 #ifndef ARROW_DEFAULT_MOVE_AND_ASSIGN
-#define ARROW_DEFAULT_MOVE_AND_ASSIGN(TypeName) \
-  TypeName(TypeName&&) = default;               \
-  TypeName& operator=(TypeName&&) = default
+#  define ARROW_DEFAULT_MOVE_AND_ASSIGN(TypeName) \
+    TypeName(TypeName&&) = default;               \
+    TypeName& operator=(TypeName&&) = default
 #endif
 
 // With ARROW_PREDICT_FALSE, GCC and clang can be told that a certain branch is
@@ -68,55 +68,55 @@
 //     Program Annotations". https://github.com/jdoerfert/PETOSPA/blob/master/ISC19.pdf
 #define ARROW_UNUSED(x) (void)(x)
 #ifdef ARROW_WARN_DOCUMENTATION
-#define ARROW_ARG_UNUSED(x) x
+#  define ARROW_ARG_UNUSED(x) x
 #else
-#define ARROW_ARG_UNUSED(x)
+#  define ARROW_ARG_UNUSED(x)
 #endif
 #if defined(__GNUC__)  // GCC and compatible compilers (clang, Intel ICC)
-#define ARROW_NORETURN __attribute__((noreturn))
-#define ARROW_NOINLINE __attribute__((noinline))
-#define ARROW_FORCE_INLINE __attribute__((always_inline))
-#define ARROW_PREDICT_FALSE(x) (__builtin_expect(!!(x), 0))
-#define ARROW_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
-#define ARROW_PREFETCH(addr) __builtin_prefetch(addr)
-#define ARROW_RESTRICT __restrict
-#if defined(__clang__)  // clang-specific
-#define ARROW_COMPILER_ASSUME(expr) __builtin_assume(expr)
-#else  // GCC-specific
-#if __GNUC__ >= 13
-#define ARROW_COMPILER_ASSUME(expr) __attribute__((assume(expr)))
-#else
+#  define ARROW_NORETURN __attribute__((noreturn))
+#  define ARROW_NOINLINE __attribute__((noinline))
+#  define ARROW_FORCE_INLINE __attribute__((always_inline))
+#  define ARROW_PREDICT_FALSE(x) (__builtin_expect(!!(x), 0))
+#  define ARROW_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
+#  define ARROW_PREFETCH(addr) __builtin_prefetch(addr)
+#  define ARROW_RESTRICT __restrict
+#  if defined(__clang__)  // clang-specific
+#    define ARROW_COMPILER_ASSUME(expr) __builtin_assume(expr)
+#  else  // GCC-specific
+#    if __GNUC__ >= 13
+#      define ARROW_COMPILER_ASSUME(expr) __attribute__((assume(expr)))
+#    else
 // GCC does not have a built-in assume intrinsic before GCC 13, so we use an
 // if statement and __builtin_unreachable() to achieve the same effect [2].
 // Unlike clang's __builtin_assume and C++23's [[assume(expr)]], using this
 // on GCC won't warn about side-effects in the expression, so make sure expr
 // is side-effect free when working with GCC versions before 13 (Jan-2024),
 // otherwise clang/MSVC builds will fail in CI.
-#define ARROW_COMPILER_ASSUME(expr) \
-  if (expr) {                       \
-  } else {                          \
-    __builtin_unreachable();        \
-  }
-#endif  // __GNUC__ >= 13
-#endif
+#      define ARROW_COMPILER_ASSUME(expr) \
+        if (expr) {                       \
+        } else {                          \
+          __builtin_unreachable();        \
+        }
+#    endif  // __GNUC__ >= 13
+#  endif
 #elif defined(_MSC_VER)  // MSVC
-#define ARROW_NORETURN __declspec(noreturn)
-#define ARROW_NOINLINE __declspec(noinline)
-#define ARROW_FORCE_INLINE __forceinline
-#define ARROW_PREDICT_FALSE(x) (x)
-#define ARROW_PREDICT_TRUE(x) (x)
-#define ARROW_PREFETCH(addr)
-#define ARROW_RESTRICT __restrict
-#define ARROW_COMPILER_ASSUME(expr) __assume(expr)
+#  define ARROW_NORETURN __declspec(noreturn)
+#  define ARROW_NOINLINE __declspec(noinline)
+#  define ARROW_FORCE_INLINE __forceinline
+#  define ARROW_PREDICT_FALSE(x) (x)
+#  define ARROW_PREDICT_TRUE(x) (x)
+#  define ARROW_PREFETCH(addr)
+#  define ARROW_RESTRICT __restrict
+#  define ARROW_COMPILER_ASSUME(expr) __assume(expr)
 #else
-#define ARROW_NORETURN
-#define ARROW_NOINLINE
-#define ARROW_FORCE_INLINE
-#define ARROW_PREDICT_FALSE(x) (x)
-#define ARROW_PREDICT_TRUE(x) (x)
-#define ARROW_PREFETCH(addr)
-#define ARROW_RESTRICT
-#define ARROW_COMPILER_ASSUME(expr)
+#  define ARROW_NORETURN
+#  define ARROW_NOINLINE
+#  define ARROW_FORCE_INLINE
+#  define ARROW_PREDICT_FALSE(x) (x)
+#  define ARROW_PREDICT_TRUE(x) (x)
+#  define ARROW_PREFETCH(addr)
+#  define ARROW_RESTRICT
+#  define ARROW_COMPILER_ASSUME(expr)
 #endif
 
 // ----------------------------------------------------------------------
@@ -124,11 +124,11 @@
 
 #ifndef NULLPTR
 
-#ifdef __cplusplus_cli
-#define NULLPTR __nullptr
-#else
-#define NULLPTR nullptr
-#endif
+#  ifdef __cplusplus_cli
+#    define NULLPTR __nullptr
+#  else
+#    define NULLPTR nullptr
+#  endif
 
 #endif  // ifndef NULLPTR
 
@@ -154,22 +154,22 @@
 // Macros to disable deprecation warnings
 
 #ifdef __clang__
-#define ARROW_SUPPRESS_DEPRECATION_WARNING \
-  _Pragma("clang diagnostic push");        \
-  _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"")
-#define ARROW_UNSUPPRESS_DEPRECATION_WARNING _Pragma("clang diagnostic pop")
+#  define ARROW_SUPPRESS_DEPRECATION_WARNING \
+    _Pragma("clang diagnostic push");        \
+    _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"")
+#  define ARROW_UNSUPPRESS_DEPRECATION_WARNING _Pragma("clang diagnostic pop")
 #elif defined(__GNUC__)
-#define ARROW_SUPPRESS_DEPRECATION_WARNING \
-  _Pragma("GCC diagnostic push");          \
-  _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
-#define ARROW_UNSUPPRESS_DEPRECATION_WARNING _Pragma("GCC diagnostic pop")
+#  define ARROW_SUPPRESS_DEPRECATION_WARNING \
+    _Pragma("GCC diagnostic push");          \
+    _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
+#  define ARROW_UNSUPPRESS_DEPRECATION_WARNING _Pragma("GCC diagnostic pop")
 #elif defined(_MSC_VER)
-#define ARROW_SUPPRESS_DEPRECATION_WARNING \
-  __pragma(warning(push)) __pragma(warning(disable : 4996))
-#define ARROW_UNSUPPRESS_DEPRECATION_WARNING __pragma(warning(pop))
+#  define ARROW_SUPPRESS_DEPRECATION_WARNING \
+    __pragma(warning(push)) __pragma(warning(disable : 4996))
+#  define ARROW_UNSUPPRESS_DEPRECATION_WARNING __pragma(warning(pop))
 #else
-#define ARROW_SUPPRESS_DEPRECATION_WARNING
-#define ARROW_UNSUPPRESS_DEPRECATION_WARNING
+#  define ARROW_SUPPRESS_DEPRECATION_WARNING
+#  define ARROW_UNSUPPRESS_DEPRECATION_WARNING
 #endif
 
 // ----------------------------------------------------------------------
@@ -178,41 +178,42 @@
 // these macros are portable across different compilers and platforms
 //[https://github.com/google/flatbuffers/blob/master/include/flatbuffers/flatbuffers.h#L1355]
 #if !defined(MANUALLY_ALIGNED_STRUCT)
-#if defined(_MSC_VER)
-#define MANUALLY_ALIGNED_STRUCT(alignment) \
-  __pragma(pack(1));                       \
-  struct __declspec(align(alignment))
-#define STRUCT_END(name, size) \
-  __pragma(pack());            \
-  static_assert(sizeof(name) == size, "compiler breaks packing rules")
-#elif defined(__GNUC__) || defined(__clang__)
-#define MANUALLY_ALIGNED_STRUCT(alignment) \
-  _Pragma("pack(1)") struct __attribute__((aligned(alignment)))
-#define STRUCT_END(name, size) \
-  _Pragma("pack()") static_assert(sizeof(name) == size, "compiler breaks packing rules")
-#else
-#error Unknown compiler, please define structure alignment macros
-#endif
+#  if defined(_MSC_VER)
+#    define MANUALLY_ALIGNED_STRUCT(alignment) \
+      __pragma(pack(1));                       \
+      struct __declspec(align(alignment))
+#    define STRUCT_END(name, size) \
+      __pragma(pack());            \
+      static_assert(sizeof(name) == size, "compiler breaks packing rules")
+#  elif defined(__GNUC__) || defined(__clang__)
+#    define MANUALLY_ALIGNED_STRUCT(alignment) \
+      _Pragma("pack(1)") struct __attribute__((aligned(alignment)))
+#    define STRUCT_END(name, size)                          \
+      _Pragma("pack()") static_assert(sizeof(name) == size, \
+                                      "compiler breaks packing rules")
+#  else
+#    error Unknown compiler, please define structure alignment macros
+#  endif
 #endif  // !defined(MANUALLY_ALIGNED_STRUCT)
 
 // ----------------------------------------------------------------------
 // Convenience macro disabling a particular UBSan check in a function
 
 #if defined(__clang__)
-#define ARROW_DISABLE_UBSAN(feature) __attribute__((no_sanitize(feature)))
+#  define ARROW_DISABLE_UBSAN(feature) __attribute__((no_sanitize(feature)))
 #else
-#define ARROW_DISABLE_UBSAN(feature)
+#  define ARROW_DISABLE_UBSAN(feature)
 #endif
 
 // ----------------------------------------------------------------------
 // Machine information
 
 #if INTPTR_MAX == INT64_MAX
-#define ARROW_BITNESS 64
+#  define ARROW_BITNESS 64
 #elif INTPTR_MAX == INT32_MAX
-#define ARROW_BITNESS 32
+#  define ARROW_BITNESS 32
 #else
-#error Unexpected INTPTR_MAX
+#  error Unexpected INTPTR_MAX
 #endif
 
 // ----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/math_constants.h b/cpp/src/arrow/util/math_constants.h
index 7ee87c5d6ac81..3524f88e0ba9a 100644
--- a/cpp/src/arrow/util/math_constants.h
+++ b/cpp/src/arrow/util/math_constants.h
@@ -22,11 +22,11 @@
 // Not provided by default in MSVC,
 // and _USE_MATH_DEFINES is not reliable with unity builds
 #ifndef M_PI
-#define M_PI 3.14159265358979323846
+#  define M_PI 3.14159265358979323846
 #endif
 #ifndef M_PI_2
-#define M_PI_2 1.57079632679489661923
+#  define M_PI_2 1.57079632679489661923
 #endif
 #ifndef M_PI_4
-#define M_PI_4 0.785398163397448309616
+#  define M_PI_4 0.785398163397448309616
 #endif
diff --git a/cpp/src/arrow/util/mutex.cc b/cpp/src/arrow/util/mutex.cc
index bbf2a9a93e692..e170a8648a1fd 100644
--- a/cpp/src/arrow/util/mutex.cc
+++ b/cpp/src/arrow/util/mutex.cc
@@ -20,8 +20,8 @@
 #include <mutex>
 
 #ifndef _WIN32
-#include <pthread.h>
-#include <atomic>
+#  include <pthread.h>
+#  include <atomic>
 #endif
 
 #include "arrow/util/config.h"
diff --git a/cpp/src/arrow/util/simd.h b/cpp/src/arrow/util/simd.h
index ee9105d5f4beb..b37f6e4926978 100644
--- a/cpp/src/arrow/util/simd.h
+++ b/cpp/src/arrow/util/simd.h
@@ -20,25 +20,25 @@
 #ifdef _MSC_VER
 // MSVC x86_64/arm64
 
-#if defined(_M_AMD64) || defined(_M_X64)
-#include <intrin.h>
-#endif
+#  if defined(_M_AMD64) || defined(_M_X64)
+#    include <intrin.h>
+#  endif
 
 #else
 // gcc/clang (possibly others)
 
-#if defined(ARROW_HAVE_BMI2)
-#include <x86intrin.h>
-#endif
+#  if defined(ARROW_HAVE_BMI2)
+#    include <x86intrin.h>
+#  endif
 
-#if defined(ARROW_HAVE_AVX2) || defined(ARROW_HAVE_AVX512)
-#include <immintrin.h>
-#elif defined(ARROW_HAVE_SSE4_2)
-#include <nmmintrin.h>
-#endif
+#  if defined(ARROW_HAVE_AVX2) || defined(ARROW_HAVE_AVX512)
+#    include <immintrin.h>
+#  elif defined(ARROW_HAVE_SSE4_2)
+#    include <nmmintrin.h>
+#  endif
 
-#ifdef ARROW_HAVE_NEON
-#include <arm_neon.h>
-#endif
+#  ifdef ARROW_HAVE_NEON
+#    include <arm_neon.h>
+#  endif
 
 #endif
diff --git a/cpp/src/arrow/util/small_vector_benchmark.cc b/cpp/src/arrow/util/small_vector_benchmark.cc
index 96f94c369e61e..04ad547221b2c 100644
--- a/cpp/src/arrow/util/small_vector_benchmark.cc
+++ b/cpp/src/arrow/util/small_vector_benchmark.cc
@@ -321,10 +321,10 @@ void LongVectorInsertAtEnd(benchmark::State& state) {
 
 #ifdef ARROW_WITH_BENCHMARKS_REFERENCE
 
-#define STD_VECTOR(T) std::vector<T>
+#  define STD_VECTOR(T) std::vector<T>
 SHORT_VECTOR_BENCHMARKS(STD_VECTOR);
 LONG_VECTOR_BENCHMARKS(STD_VECTOR);
-#undef STD_VECTOR
+#  undef STD_VECTOR
 
 #endif
 
diff --git a/cpp/src/arrow/util/string.h b/cpp/src/arrow/util/string.h
index d7e377773f62f..f2081d0937b77 100644
--- a/cpp/src/arrow/util/string.h
+++ b/cpp/src/arrow/util/string.h
@@ -26,7 +26,7 @@
 #include <vector>
 
 #if __has_include(<charconv>)
-#include <charconv>
+#  include <charconv>
 #endif
 
 #include "arrow/result.h"
diff --git a/cpp/src/arrow/util/thread_pool.cc b/cpp/src/arrow/util/thread_pool.cc
index d82934c9bec01..d59d1f20de7c3 100644
--- a/cpp/src/arrow/util/thread_pool.cc
+++ b/cpp/src/arrow/util/thread_pool.cc
@@ -128,7 +128,7 @@ int SerialExecutor::GetNumTasks() {
 #ifdef ARROW_ENABLE_THREADING
 Status SerialExecutor::SpawnReal(TaskHints hints, FnOnce<void()> task,
                                  StopToken stop_token, StopCallback&& stop_callback) {
-#ifdef ARROW_WITH_OPENTELEMETRY
+#  ifdef ARROW_WITH_OPENTELEMETRY
   // Wrap the task to propagate a parent tracing span to it
   // XXX should there be a generic utility in tracing_internal.h for this?
   task = [func = std::move(task),
@@ -137,7 +137,7 @@ Status SerialExecutor::SpawnReal(TaskHints hints, FnOnce<void()> task,
     auto scope = ::arrow::internal::tracing::GetTracer()->WithActiveSpan(active_span);
     std::move(func)();
   };
-#endif
+#  endif
   // While the SerialExecutor runs tasks synchronously on its main thread,
   // SpawnReal may be called from external threads (e.g. when transferring back
   // from blocking I/O threads), so we need to keep the state alive *and* to
@@ -172,7 +172,7 @@ void SerialExecutor::Finish() {
 #else  // ARROW_ENABLE_THREADING
 Status SerialExecutor::SpawnReal(TaskHints hints, FnOnce<void()> task,
                                  StopToken stop_token, StopCallback&& stop_callback) {
-#ifdef ARROW_WITH_OPENTELEMETRY
+#  ifdef ARROW_WITH_OPENTELEMETRY
   // Wrap the task to propagate a parent tracing span to it
   // XXX should there be a generic utility in tracing_internal.h for this?
   task = [func = std::move(task),
@@ -181,7 +181,7 @@ Status SerialExecutor::SpawnReal(TaskHints hints, FnOnce<void()> task,
     auto scope = ::arrow::internal::tracing::GetTracer()->WithActiveSpan(active_span);
     std::move(func)();
   };
-#endif  // ARROW_WITH_OPENTELEMETRY
+#  endif  // ARROW_WITH_OPENTELEMETRY
 
   if (state_->finished) {
     return Status::Invalid(
@@ -503,7 +503,7 @@ ThreadPool::ThreadPool()
       shutdown_on_destroy_(true) {
   // Eternal thread pools would produce false leak reports in the vector of
   // atfork handlers.
-#if !(defined(_WIN32) || defined(ADDRESS_SANITIZER) || defined(ARROW_VALGRIND))
+#  if !(defined(_WIN32) || defined(ADDRESS_SANITIZER) || defined(ARROW_VALGRIND))
   state_->atfork_handler_ = std::make_shared<AtForkHandler>(
       /*before=*/
       [weak_state = std::weak_ptr<ThreadPool::State>(sp_state_)]() {
@@ -528,7 +528,7 @@ ThreadPool::ThreadPool()
         }
       });
   RegisterAtFork(state_->atfork_handler_);
-#endif
+#  endif
 }
 
 ThreadPool::~ThreadPool() {
@@ -623,7 +623,7 @@ void ThreadPool::LaunchWorkersUnlocked(int threads) {
 Status ThreadPool::SpawnReal(TaskHints hints, FnOnce<void()> task, StopToken stop_token,
                              StopCallback&& stop_callback) {
   {
-#ifdef ARROW_WITH_OPENTELEMETRY
+#  ifdef ARROW_WITH_OPENTELEMETRY
     // Wrap the task to propagate a parent tracing span to it
     // This task-wrapping needs to be done before we grab the mutex because the
     // first call to OT (whatever that happens to be) will attempt to grab this mutex
@@ -638,7 +638,7 @@ Status ThreadPool::SpawnReal(TaskHints hints, FnOnce<void()> task, StopToken sto
     } wrapper{std::forward<FnOnce<void()>>(task),
               ::arrow::internal::tracing::GetTracer()->GetCurrentSpan()};
     task = std::move(wrapper);
-#endif
+#  endif
     std::lock_guard<std::mutex> lock(state_->mutex_);
     if (state_->please_shutdown_) {
       return Status::Invalid("operation forbidden during or after shutdown");
@@ -674,9 +674,9 @@ Result<std::shared_ptr<ThreadPool>> ThreadPool::MakeEternal(int threads) {
   // On Windows, the ThreadPool destructor may be called after non-main threads
   // have been killed by the OS, and hang in a condition variable.
   // On Unix, we want to avoid leak reports by Valgrind.
-#ifdef _WIN32
+#  ifdef _WIN32
   pool->shutdown_on_destroy_ = false;
-#endif
+#  endif
   return pool;
 }
 
diff --git a/cpp/src/arrow/util/thread_pool.h b/cpp/src/arrow/util/thread_pool.h
index 44b1e227b0e5f..cd32781aed756 100644
--- a/cpp/src/arrow/util/thread_pool.h
+++ b/cpp/src/arrow/util/thread_pool.h
@@ -36,7 +36,7 @@
 
 #if defined(_MSC_VER)
 // Disable harmless warning for decorated name length limit
-#pragma warning(disable : 4503)
+#  pragma warning(disable : 4503)
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/util/thread_pool_test.cc b/cpp/src/arrow/util/thread_pool_test.cc
index 8f43bb8dec367..7cf8826e8a173 100644
--- a/cpp/src/arrow/util/thread_pool_test.cc
+++ b/cpp/src/arrow/util/thread_pool_test.cc
@@ -16,8 +16,8 @@
 // under the License.
 
 #ifndef _WIN32
-#include <sys/types.h>
-#include <unistd.h>
+#  include <sys/types.h>
+#  include <unistd.h>
 #endif
 
 #include <algorithm>
@@ -830,9 +830,9 @@ class TestThreadPoolForkSafety : public TestThreadPool {};
 
 TEST_F(TestThreadPoolForkSafety, Basics) {
   {
-#ifndef ARROW_ENABLE_THREADING
+#  ifndef ARROW_ENABLE_THREADING
     GTEST_SKIP() << "Test requires threading support";
-#endif
+#  endif
 
     // Fork after task submission
     auto pool = this->MakeThreadPool(3);
@@ -877,9 +877,9 @@ TEST_F(TestThreadPoolForkSafety, Basics) {
 }
 
 TEST_F(TestThreadPoolForkSafety, MultipleChildThreads) {
-#ifndef ARROW_ENABLE_THREADING
+#  ifndef ARROW_ENABLE_THREADING
   GTEST_SKIP() << "Test requires threading support";
-#endif
+#  endif
   // ARROW-15593: race condition in after-fork ThreadPool reinitialization
   // when SpawnReal() was called from multiple threads in a forked child.
   auto run_in_child = [](ThreadPool* pool) {
@@ -927,12 +927,12 @@ TEST_F(TestThreadPoolForkSafety, MultipleChildThreads) {
 
 TEST_F(TestThreadPoolForkSafety, NestedChild) {
   {
-#ifdef __APPLE__
+#  ifdef __APPLE__
     GTEST_SKIP() << "Nested fork is not supported on macos";
-#endif
-#ifndef ARROW_ENABLE_THREADING
+#  endif
+#  ifndef ARROW_ENABLE_THREADING
     GTEST_SKIP() << "Test requires threading support";
-#endif
+#  endif
     auto pool = this->MakeThreadPool(3);
     ASSERT_OK_AND_ASSIGN(auto fut, pool->Submit(add<int>, 4, 5));
     ASSERT_OK_AND_EQ(9, fut.result());
diff --git a/cpp/src/arrow/util/tracing_internal.cc b/cpp/src/arrow/util/tracing_internal.cc
index f4f65ad1e6132..e47acf42bccd1 100644
--- a/cpp/src/arrow/util/tracing_internal.cc
+++ b/cpp/src/arrow/util/tracing_internal.cc
@@ -26,8 +26,8 @@
 #include <thread>
 
 #ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4522)
+#  pragma warning(push)
+#  pragma warning(disable : 4522)
 #endif
 #include <google/protobuf/util/json_util.h>
 
@@ -45,7 +45,7 @@
 #include <opentelemetry/exporters/otlp/protobuf_include_suffix.h>
 #include <opentelemetry/proto/collector/trace/v1/trace_service.pb.h>
 #ifdef _MSC_VER
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
 #include "arrow/util/io_util.h"
diff --git a/cpp/src/arrow/util/tracing_internal.h b/cpp/src/arrow/util/tracing_internal.h
index a031edf08dc77..6ed731599a9d4 100644
--- a/cpp/src/arrow/util/tracing_internal.h
+++ b/cpp/src/arrow/util/tracing_internal.h
@@ -23,15 +23,15 @@
 #include "arrow/util/config.h"
 
 #ifdef ARROW_WITH_OPENTELEMETRY
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4522)
-#endif
-#include <opentelemetry/trace/provider.h>
-#include <opentelemetry/trace/scope.h>
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
+#  ifdef _MSC_VER
+#    pragma warning(push)
+#    pragma warning(disable : 4522)
+#  endif
+#  include <opentelemetry/trace/provider.h>
+#  include <opentelemetry/trace/scope.h>
+#  ifdef _MSC_VER
+#    pragma warning(pop)
+#  endif
 #endif
 
 #include "arrow/memory_pool.h"
@@ -135,77 +135,78 @@ opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span>& RewrapSpan(
 opentelemetry::trace::StartSpanOptions SpanOptionsWithParent(
     const util::tracing::Span& parent_span);
 
-#define START_SPAN(target_span, ...)                           \
-  auto opentelemetry_scope##__LINE__ =                         \
-      ::arrow::internal::tracing::GetTracer()->WithActiveSpan( \
-          ::arrow::internal::tracing::RewrapSpan(              \
-              target_span.details.get(),                       \
-              ::arrow::internal::tracing::GetTracer()->StartSpan(__VA_ARGS__)))
-
-#define START_SCOPED_SPAN(target_span, ...)                    \
-  ::arrow::internal::tracing::Scope(                           \
-      ::arrow::internal::tracing::GetTracer()->WithActiveSpan( \
-          ::arrow::internal::tracing::RewrapSpan(              \
-              target_span.details.get(),                       \
-              ::arrow::internal::tracing::GetTracer()->StartSpan(__VA_ARGS__))))
-
-#define START_SCOPED_SPAN_SV(target_span, name, ...)                             \
-  ::arrow::internal::tracing::Scope(                                             \
-      ::arrow::internal::tracing::GetTracer()->WithActiveSpan(                   \
-          ::arrow::internal::tracing::RewrapSpan(                                \
-              target_span.details.get(),                                         \
-              ::arrow::internal::tracing::GetTracer()->StartSpan(                \
-                  ::opentelemetry::nostd::string_view(name.data(), name.size()), \
-                  ##__VA_ARGS__))))
-
-#define START_SCOPED_SPAN_WITH_PARENT_SV(target_span, parent_span, name, ...)    \
-  ::arrow::internal::tracing::Scope(                                             \
-      ::arrow::internal::tracing::GetTracer()->WithActiveSpan(                   \
-          ::arrow::internal::tracing::RewrapSpan(                                \
-              target_span.details.get(),                                         \
-                                                                                 \
-              ::arrow::internal::tracing::GetTracer()->StartSpan(                \
-                  ::opentelemetry::nostd::string_view(name.data(), name.size()), \
-                  __VA_ARGS__,                                                   \
-                  ::arrow::internal::tracing::SpanOptionsWithParent(parent_span)))))
-
-#define START_COMPUTE_SPAN(target_span, ...)                        \
-  START_SPAN(target_span, __VA_ARGS__);                             \
-  ::arrow::internal::tracing::UnwrapSpan(target_span.details.get()) \
-      ->SetAttribute("arrow.memory_pool_bytes",                     \
-                     ::arrow::default_memory_pool()->bytes_allocated())
-
-#define EVENT_ON_CURRENT_SPAN(...) \
-  ::arrow::internal::tracing::GetTracer()->GetCurrentSpan()->AddEvent(__VA_ARGS__)
-
-#define EVENT(target_span, ...) \
-  ::arrow::internal::tracing::UnwrapSpan(target_span.details.get())->AddEvent(__VA_ARGS__)
-
-#define ACTIVATE_SPAN(target_span)                             \
-  ::arrow::internal::tracing::Scope(                           \
-      ::arrow::internal::tracing::GetTracer()->WithActiveSpan( \
-          ::arrow::internal::tracing::UnwrapSpan(target_span.details.get())))
-
-#define MARK_SPAN(target_span, status)  \
-  ::arrow::internal::tracing::MarkSpan( \
-      status, ::arrow::internal::tracing::UnwrapSpan(target_span.details.get()).get())
-
-#define END_SPAN(target_span) \
-  ::arrow::internal::tracing::UnwrapSpan(target_span.details.get())->End()
-
-#define END_SPAN_ON_FUTURE_COMPLETION(target_span, target_future) \
-  target_future.SetSpan(&target_span)
-
-#define PROPAGATE_SPAN_TO_GENERATOR(generator)                                \
-  generator = ::arrow::internal::tracing::PropagateSpanThroughAsyncGenerator( \
-      std::move(generator))
-
-#define WRAP_ASYNC_GENERATOR(generator) \
-  generator = ::arrow::internal::tracing::WrapAsyncGenerator(std::move(generator))
-
-#define WRAP_ASYNC_GENERATOR_WITH_CHILD_SPAN(generator, name) \
-  generator =                                                 \
-      ::arrow::internal::tracing::WrapAsyncGenerator(std::move(generator), name, true)
+#  define START_SPAN(target_span, ...)                           \
+    auto opentelemetry_scope##__LINE__ =                         \
+        ::arrow::internal::tracing::GetTracer()->WithActiveSpan( \
+            ::arrow::internal::tracing::RewrapSpan(              \
+                target_span.details.get(),                       \
+                ::arrow::internal::tracing::GetTracer()->StartSpan(__VA_ARGS__)))
+
+#  define START_SCOPED_SPAN(target_span, ...)                    \
+    ::arrow::internal::tracing::Scope(                           \
+        ::arrow::internal::tracing::GetTracer()->WithActiveSpan( \
+            ::arrow::internal::tracing::RewrapSpan(              \
+                target_span.details.get(),                       \
+                ::arrow::internal::tracing::GetTracer()->StartSpan(__VA_ARGS__))))
+
+#  define START_SCOPED_SPAN_SV(target_span, name, ...)                             \
+    ::arrow::internal::tracing::Scope(                                             \
+        ::arrow::internal::tracing::GetTracer()->WithActiveSpan(                   \
+            ::arrow::internal::tracing::RewrapSpan(                                \
+                target_span.details.get(),                                         \
+                ::arrow::internal::tracing::GetTracer()->StartSpan(                \
+                    ::opentelemetry::nostd::string_view(name.data(), name.size()), \
+                    ##__VA_ARGS__))))
+
+#  define START_SCOPED_SPAN_WITH_PARENT_SV(target_span, parent_span, name, ...)    \
+    ::arrow::internal::tracing::Scope(                                             \
+        ::arrow::internal::tracing::GetTracer()->WithActiveSpan(                   \
+            ::arrow::internal::tracing::RewrapSpan(                                \
+                target_span.details.get(),                                         \
+                                                                                   \
+                ::arrow::internal::tracing::GetTracer()->StartSpan(                \
+                    ::opentelemetry::nostd::string_view(name.data(), name.size()), \
+                    __VA_ARGS__,                                                   \
+                    ::arrow::internal::tracing::SpanOptionsWithParent(parent_span)))))
+
+#  define START_COMPUTE_SPAN(target_span, ...)                        \
+    START_SPAN(target_span, __VA_ARGS__);                             \
+    ::arrow::internal::tracing::UnwrapSpan(target_span.details.get()) \
+        ->SetAttribute("arrow.memory_pool_bytes",                     \
+                       ::arrow::default_memory_pool()->bytes_allocated())
+
+#  define EVENT_ON_CURRENT_SPAN(...) \
+    ::arrow::internal::tracing::GetTracer()->GetCurrentSpan()->AddEvent(__VA_ARGS__)
+
+#  define EVENT(target_span, ...)                                     \
+    ::arrow::internal::tracing::UnwrapSpan(target_span.details.get()) \
+        ->AddEvent(__VA_ARGS__)
+
+#  define ACTIVATE_SPAN(target_span)                             \
+    ::arrow::internal::tracing::Scope(                           \
+        ::arrow::internal::tracing::GetTracer()->WithActiveSpan( \
+            ::arrow::internal::tracing::UnwrapSpan(target_span.details.get())))
+
+#  define MARK_SPAN(target_span, status)  \
+    ::arrow::internal::tracing::MarkSpan( \
+        status, ::arrow::internal::tracing::UnwrapSpan(target_span.details.get()).get())
+
+#  define END_SPAN(target_span) \
+    ::arrow::internal::tracing::UnwrapSpan(target_span.details.get())->End()
+
+#  define END_SPAN_ON_FUTURE_COMPLETION(target_span, target_future) \
+    target_future.SetSpan(&target_span)
+
+#  define PROPAGATE_SPAN_TO_GENERATOR(generator)                                \
+    generator = ::arrow::internal::tracing::PropagateSpanThroughAsyncGenerator( \
+        std::move(generator))
+
+#  define WRAP_ASYNC_GENERATOR(generator) \
+    generator = ::arrow::internal::tracing::WrapAsyncGenerator(std::move(generator))
+
+#  define WRAP_ASYNC_GENERATOR_WITH_CHILD_SPAN(generator, name) \
+    generator =                                                 \
+        ::arrow::internal::tracing::WrapAsyncGenerator(std::move(generator), name, true)
 
 /*
  * Calls to the helper macros above are removed by the preprocessor when
@@ -223,19 +224,19 @@ struct Scope {
   [[maybe_unused]] ~Scope() {}
 };
 
-#define START_SPAN(target_span, ...)
-#define START_SCOPED_SPAN(target_span, ...) ::arrow::internal::tracing::Scope()
-#define START_SCOPED_SPAN_SV(target_span, name, ...) ::arrow::internal::tracing::Scope()
-#define START_COMPUTE_SPAN(target_span, ...)
-#define ACTIVATE_SPAN(target_span) ::arrow::internal::tracing::Scope()
-#define MARK_SPAN(target_span, status)
-#define EVENT(target_span, ...)
-#define EVENT_ON_CURRENT_SPAN(...)
-#define END_SPAN(target_span)
-#define END_SPAN_ON_FUTURE_COMPLETION(target_span, target_future)
-#define PROPAGATE_SPAN_TO_GENERATOR(generator)
-#define WRAP_ASYNC_GENERATOR(generator)
-#define WRAP_ASYNC_GENERATOR_WITH_CHILD_SPAN(generator, name)
+#  define START_SPAN(target_span, ...)
+#  define START_SCOPED_SPAN(target_span, ...) ::arrow::internal::tracing::Scope()
+#  define START_SCOPED_SPAN_SV(target_span, name, ...) ::arrow::internal::tracing::Scope()
+#  define START_COMPUTE_SPAN(target_span, ...)
+#  define ACTIVATE_SPAN(target_span) ::arrow::internal::tracing::Scope()
+#  define MARK_SPAN(target_span, status)
+#  define EVENT(target_span, ...)
+#  define EVENT_ON_CURRENT_SPAN(...)
+#  define END_SPAN(target_span)
+#  define END_SPAN_ON_FUTURE_COMPLETION(target_span, target_future)
+#  define PROPAGATE_SPAN_TO_GENERATOR(generator)
+#  define WRAP_ASYNC_GENERATOR(generator)
+#  define WRAP_ASYNC_GENERATOR_WITH_CHILD_SPAN(generator, name)
 
 #endif
 
diff --git a/cpp/src/arrow/util/uri.cc b/cpp/src/arrow/util/uri.cc
index 9c0f7f9a59630..6c0787a87e046 100644
--- a/cpp/src/arrow/util/uri.cc
+++ b/cpp/src/arrow/util/uri.cc
@@ -250,9 +250,16 @@ Status Uri::Parse(const std::string& uri_string) {
   const auto& s = impl_->KeepString(uri_string);
   impl_->string_rep_ = s;
   const char* error_pos;
-  if (uriParseSingleUriExA(&impl_->uri_, s.data(), s.data() + s.size(), &error_pos) !=
-      URI_SUCCESS) {
-    return Status::Invalid("Cannot parse URI: '", uri_string, "'");
+  int retval =
+      uriParseSingleUriExA(&impl_->uri_, s.data(), s.data() + s.size(), &error_pos);
+  if (retval != URI_SUCCESS) {
+    if (retval == URI_ERROR_SYNTAX) {
+      return Status::Invalid("Cannot parse URI: '", uri_string,
+                             "' due to syntax error at character '", *error_pos,
+                             "' (position ", error_pos - s.data(), ")");
+    } else {
+      return Status::Invalid("Cannot parse URI: '", uri_string, "'");
+    }
   }
 
   const auto scheme = TextRangeToView(impl_->uri_.scheme);
diff --git a/cpp/src/arrow/util/utf8.cc b/cpp/src/arrow/util/utf8.cc
index 042a6144d6c19..9f91e0f080868 100644
--- a/cpp/src/arrow/util/utf8.cc
+++ b/cpp/src/arrow/util/utf8.cc
@@ -30,7 +30,7 @@
 
 // Can be defined by utfcpp
 #ifdef NOEXCEPT
-#undef NOEXCEPT
+#  undef NOEXCEPT
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/util/utf8_internal.h b/cpp/src/arrow/util/utf8_internal.h
index ec8fc2b46fe82..335e875f7ae20 100644
--- a/cpp/src/arrow/util/utf8_internal.h
+++ b/cpp/src/arrow/util/utf8_internal.h
@@ -25,7 +25,7 @@
 #include <string_view>
 
 #if defined(ARROW_HAVE_NEON) || defined(ARROW_HAVE_SSE4_2)
-#include <xsimd/xsimd.hpp>
+#  include <xsimd/xsimd.hpp>
 #endif
 
 #include "arrow/type_fwd.h"
diff --git a/cpp/src/arrow/util/visibility.h b/cpp/src/arrow/util/visibility.h
index 1498d2085a03d..9a53cdbdeff1b 100644
--- a/cpp/src/arrow/util/visibility.h
+++ b/cpp/src/arrow/util/visibility.h
@@ -20,67 +20,67 @@
 #if defined(_WIN32) || defined(__CYGWIN__)
 // Windows
 
-#if defined(_MSC_VER)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#if defined(__cplusplus) && defined(__GNUC__) && !defined(__clang__)
+#  if defined(__cplusplus) && defined(__GNUC__) && !defined(__clang__)
 // Use C++ attribute syntax where possible to avoid GCC parser bug
 // (https://stackoverflow.com/questions/57993818/gcc-how-to-combine-attribute-dllexport-and-nodiscard-in-a-struct-de)
-#define ARROW_DLLEXPORT [[gnu::dllexport]]
-#define ARROW_DLLIMPORT [[gnu::dllimport]]
-#else
-#define ARROW_DLLEXPORT __declspec(dllexport)
-#define ARROW_DLLIMPORT __declspec(dllimport)
-#endif
+#    define ARROW_DLLEXPORT [[gnu::dllexport]]
+#    define ARROW_DLLIMPORT [[gnu::dllimport]]
+#  else
+#    define ARROW_DLLEXPORT __declspec(dllexport)
+#    define ARROW_DLLIMPORT __declspec(dllimport)
+#  endif
 
 // _declspec(dllexport) even when the #included by a non-arrow source
-#define ARROW_FORCE_EXPORT ARROW_DLLEXPORT
+#  define ARROW_FORCE_EXPORT ARROW_DLLEXPORT
 
-#ifdef ARROW_STATIC
-#define ARROW_EXPORT
-#define ARROW_FRIEND_EXPORT
-#define ARROW_TEMPLATE_EXPORT
-#elif defined(ARROW_EXPORTING)
-#define ARROW_EXPORT ARROW_DLLEXPORT
+#  ifdef ARROW_STATIC
+#    define ARROW_EXPORT
+#    define ARROW_FRIEND_EXPORT
+#    define ARROW_TEMPLATE_EXPORT
+#  elif defined(ARROW_EXPORTING)
+#    define ARROW_EXPORT ARROW_DLLEXPORT
 // For some reason [[gnu::dllexport]] doesn't work well with friend declarations
-#define ARROW_FRIEND_EXPORT __declspec(dllexport)
-#define ARROW_TEMPLATE_EXPORT ARROW_DLLEXPORT
-#else
-#define ARROW_EXPORT ARROW_DLLIMPORT
-#define ARROW_FRIEND_EXPORT __declspec(dllimport)
-#define ARROW_TEMPLATE_EXPORT ARROW_DLLIMPORT
-#endif
+#    define ARROW_FRIEND_EXPORT __declspec(dllexport)
+#    define ARROW_TEMPLATE_EXPORT ARROW_DLLEXPORT
+#  else
+#    define ARROW_EXPORT ARROW_DLLIMPORT
+#    define ARROW_FRIEND_EXPORT __declspec(dllimport)
+#    define ARROW_TEMPLATE_EXPORT ARROW_DLLIMPORT
+#  endif
 
-#define ARROW_NO_EXPORT
+#  define ARROW_NO_EXPORT
 
 #else
 
 // Non-Windows
 
-#if defined(__cplusplus) && (defined(__GNUC__) || defined(__clang__))
-#ifndef ARROW_EXPORT
-#define ARROW_EXPORT [[gnu::visibility("default")]]
-#endif
-#ifndef ARROW_NO_EXPORT
-#define ARROW_NO_EXPORT [[gnu::visibility("hidden")]]
-#endif
-#else
+#  if defined(__cplusplus) && (defined(__GNUC__) || defined(__clang__))
+#    ifndef ARROW_EXPORT
+#      define ARROW_EXPORT [[gnu::visibility("default")]]
+#    endif
+#    ifndef ARROW_NO_EXPORT
+#      define ARROW_NO_EXPORT [[gnu::visibility("hidden")]]
+#    endif
+#  else
 // Not C++, or not gcc/clang
-#ifndef ARROW_EXPORT
-#define ARROW_EXPORT
-#endif
-#ifndef ARROW_NO_EXPORT
-#define ARROW_NO_EXPORT
-#endif
-#endif
+#    ifndef ARROW_EXPORT
+#      define ARROW_EXPORT
+#    endif
+#    ifndef ARROW_NO_EXPORT
+#      define ARROW_NO_EXPORT
+#    endif
+#  endif
 
-#define ARROW_FRIEND_EXPORT
-#define ARROW_TEMPLATE_EXPORT
+#  define ARROW_FRIEND_EXPORT
+#  define ARROW_TEMPLATE_EXPORT
 
 // [[gnu::visibility("default")]] even when #included by a non-arrow source
-#define ARROW_FORCE_EXPORT [[gnu::visibility("default")]]
+#  define ARROW_FORCE_EXPORT [[gnu::visibility("default")]]
 
 #endif  // Non-Windows
diff --git a/cpp/src/arrow/util/windows_compatibility.h b/cpp/src/arrow/util/windows_compatibility.h
index c97b2f3b76a7c..810a91201f335 100644
--- a/cpp/src/arrow/util/windows_compatibility.h
+++ b/cpp/src/arrow/util/windows_compatibility.h
@@ -18,22 +18,22 @@
 #ifdef _WIN32
 
 // Windows defines min and max macros that mess up std::min/max
-#ifndef NOMINMAX
-#define NOMINMAX
-#endif
+#  ifndef NOMINMAX
+#    define NOMINMAX
+#  endif
 
-#define WIN32_LEAN_AND_MEAN
+#  define WIN32_LEAN_AND_MEAN
 
 // Set Windows 7 as a conservative minimum for Apache Arrow
-#if defined(_WIN32_WINNT) && _WIN32_WINNT < 0x601
-#undef _WIN32_WINNT
-#endif
-#ifndef _WIN32_WINNT
-#define _WIN32_WINNT 0x601
-#endif
+#  if defined(_WIN32_WINNT) && _WIN32_WINNT < 0x601
+#    undef _WIN32_WINNT
+#  endif
+#  ifndef _WIN32_WINNT
+#    define _WIN32_WINNT 0x601
+#  endif
 
-#include <winsock2.h>
+#  include <winsock2.h>
 
-#include "arrow/util/windows_fixup.h"
+#  include "arrow/util/windows_fixup.h"
 
 #endif  // _WIN32
diff --git a/cpp/src/arrow/util/windows_fixup.h b/cpp/src/arrow/util/windows_fixup.h
index 2949ac4ab7688..42e74f4a7857f 100644
--- a/cpp/src/arrow/util/windows_fixup.h
+++ b/cpp/src/arrow/util/windows_fixup.h
@@ -19,32 +19,32 @@
 
 #ifdef _WIN32
 
-#ifdef max
-#undef max
-#endif
-#ifdef min
-#undef min
-#endif
+#  ifdef max
+#    undef max
+#  endif
+#  ifdef min
+#    undef min
+#  endif
 
 // The Windows API defines macros from *File resolving to either
 // *FileA or *FileW.  Need to undo them.
-#ifdef CopyFile
-#undef CopyFile
-#endif
-#ifdef CreateFile
-#undef CreateFile
-#endif
-#ifdef DeleteFile
-#undef DeleteFile
-#endif
+#  ifdef CopyFile
+#    undef CopyFile
+#  endif
+#  ifdef CreateFile
+#    undef CreateFile
+#  endif
+#  ifdef DeleteFile
+#    undef DeleteFile
+#  endif
 
 // Other annoying Windows macro definitions...
-#ifdef IN
-#undef IN
-#endif
-#ifdef OUT
-#undef OUT
-#endif
+#  ifdef IN
+#    undef IN
+#  endif
+#  ifdef OUT
+#    undef OUT
+#  endif
 
 // Note that we can't undefine OPTIONAL, because it can be used in other
 // Windows headers...
diff --git a/cpp/src/gandiva/cast_time.cc b/cpp/src/gandiva/cast_time.cc
index eeb2ea3fdd88f..f170375298b55 100644
--- a/cpp/src/gandiva/cast_time.cc
+++ b/cpp/src/gandiva/cast_time.cc
@@ -22,10 +22,10 @@
 #include "gandiva/precompiled/time_fields.h"
 
 #ifndef GANDIVA_UNIT_TEST
-#include "gandiva/exported_funcs.h"
-#include "gandiva/gdv_function_stubs.h"
+#  include "gandiva/exported_funcs.h"
+#  include "gandiva/gdv_function_stubs.h"
 
-#include "gandiva/engine.h"
+#  include "gandiva/engine.h"
 
 namespace gandiva {
 
diff --git a/cpp/src/gandiva/context_helper.cc b/cpp/src/gandiva/context_helper.cc
index 8edd52b1fb070..2a3efc8348b38 100644
--- a/cpp/src/gandiva/context_helper.cc
+++ b/cpp/src/gandiva/context_helper.cc
@@ -21,9 +21,9 @@
 // This file is also used in the pre-compiled unit tests, which do include
 // llvm/engine/..
 #ifndef GANDIVA_UNIT_TEST
-#include "gandiva/exported_funcs.h"
+#  include "gandiva/exported_funcs.h"
 
-#include "gandiva/engine.h"
+#  include "gandiva/engine.h"
 
 namespace gandiva {
 
diff --git a/cpp/src/gandiva/decimal_xlarge.cc b/cpp/src/gandiva/decimal_xlarge.cc
index 21212422f3d69..e9fe0dc6b91ed 100644
--- a/cpp/src/gandiva/decimal_xlarge.cc
+++ b/cpp/src/gandiva/decimal_xlarge.cc
@@ -33,8 +33,8 @@
 #include "gandiva/decimal_type_util.h"
 
 #ifndef GANDIVA_UNIT_TEST
-#include "gandiva/engine.h"
-#include "gandiva/exported_funcs.h"
+#  include "gandiva/engine.h"
+#  include "gandiva/exported_funcs.h"
 
 namespace gandiva {
 
diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc
index bfce72cefc630..065ea5a59837e 100644
--- a/cpp/src/gandiva/engine.cc
+++ b/cpp/src/gandiva/engine.cc
@@ -18,7 +18,7 @@
 // TODO(wesm): LLVM 7 produces pesky C4244 that disable pragmas around the LLVM
 // includes seem to not fix as with LLVM 6
 #if defined(_MSC_VER)
-#pragma warning(disable : 4244)
+#  pragma warning(disable : 4244)
 #endif
 
 #include "gandiva/engine.h"
@@ -35,12 +35,12 @@
 #include <arrow/util/logging.h>
 
 #if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4141)
-#pragma warning(disable : 4146)
-#pragma warning(disable : 4244)
-#pragma warning(disable : 4267)
-#pragma warning(disable : 4624)
+#  pragma warning(push)
+#  pragma warning(disable : 4141)
+#  pragma warning(disable : 4146)
+#  pragma warning(disable : 4244)
+#  pragma warning(disable : 4267)
+#  pragma warning(disable : 4624)
 #endif
 
 #include <llvm/Analysis/Passes.h>
@@ -56,32 +56,32 @@
 #include <llvm/Linker/Linker.h>
 #include <llvm/Transforms/Utils/Cloning.h>
 #if LLVM_VERSION_MAJOR >= 17
-#include <llvm/TargetParser/SubtargetFeature.h>
+#  include <llvm/TargetParser/SubtargetFeature.h>
 #else
-#include <llvm/MC/SubtargetFeature.h>
+#  include <llvm/MC/SubtargetFeature.h>
 #endif
 #include <llvm/Passes/PassBuilder.h>
 #include <llvm/Support/DynamicLibrary.h>
 #if LLVM_VERSION_MAJOR >= 18
-#include <llvm/TargetParser/Host.h>
+#  include <llvm/TargetParser/Host.h>
 #else
-#include <llvm/Support/Host.h>
+#  include <llvm/Support/Host.h>
 #endif
 #include <llvm/Transforms/IPO/GlobalDCE.h>
 #include <llvm/Transforms/IPO/Internalize.h>
 #if LLVM_VERSION_MAJOR >= 14
-#include <llvm/IR/PassManager.h>
-#include <llvm/MC/TargetRegistry.h>
-#include <llvm/Passes/PassPlugin.h>
-#include <llvm/Transforms/IPO/GlobalOpt.h>
-#include <llvm/Transforms/Scalar/NewGVN.h>
-#include <llvm/Transforms/Scalar/SimplifyCFG.h>
-#include <llvm/Transforms/Utils/Mem2Reg.h>
-#include <llvm/Transforms/Vectorize/LoopVectorize.h>
-#include <llvm/Transforms/Vectorize/SLPVectorizer.h>
+#  include <llvm/IR/PassManager.h>
+#  include <llvm/MC/TargetRegistry.h>
+#  include <llvm/Passes/PassPlugin.h>
+#  include <llvm/Transforms/IPO/GlobalOpt.h>
+#  include <llvm/Transforms/Scalar/NewGVN.h>
+#  include <llvm/Transforms/Scalar/SimplifyCFG.h>
+#  include <llvm/Transforms/Utils/Mem2Reg.h>
+#  include <llvm/Transforms/Vectorize/LoopVectorize.h>
+#  include <llvm/Transforms/Vectorize/SLPVectorizer.h>
 #else
-#include <llvm/Support/TargetRegistry.h>
-#include <llvm/Transforms/IPO/PassManagerBuilder.h>
+#  include <llvm/Support/TargetRegistry.h>
+#  include <llvm/Transforms/IPO/PassManagerBuilder.h>
 #endif
 #include <llvm/Support/TargetSelect.h>
 #include <llvm/Support/raw_ostream.h>
@@ -91,18 +91,18 @@
 #include <llvm/Transforms/Scalar/GVN.h>
 #include <llvm/Transforms/Utils.h>
 #if LLVM_VERSION_MAJOR <= 17
-#include <llvm/Transforms/Vectorize.h>
+#  include <llvm/Transforms/Vectorize.h>
 #endif
 
 // JITLink is available in LLVM 9+
 // but the `InProcessMemoryManager::Create` API was added since LLVM 14
 #if LLVM_VERSION_MAJOR >= 14 && !defined(_WIN32)
-#define JIT_LINK_SUPPORTED
-#include <llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h>
+#  define JIT_LINK_SUPPORTED
+#  include <llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h>
 #endif
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
 #include "gandiva/configuration.h"
diff --git a/cpp/src/gandiva/gandiva_object_cache.h b/cpp/src/gandiva/gandiva_object_cache.h
index 62042c7b627ec..cebc8d5cac211 100644
--- a/cpp/src/gandiva/gandiva_object_cache.h
+++ b/cpp/src/gandiva/gandiva_object_cache.h
@@ -18,12 +18,12 @@
 #pragma once
 
 #if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4244)
-#pragma warning(disable : 4141)
-#pragma warning(disable : 4146)
-#pragma warning(disable : 4267)
-#pragma warning(disable : 4624)
+#  pragma warning(push)
+#  pragma warning(disable : 4244)
+#  pragma warning(disable : 4141)
+#  pragma warning(disable : 4146)
+#  pragma warning(disable : 4267)
+#  pragma warning(disable : 4624)
 #endif
 
 #include <llvm/ExecutionEngine/ObjectCache.h>
diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h
index 8e87bc51215e1..4113f261ad766 100644
--- a/cpp/src/gandiva/gdv_function_stubs.h
+++ b/cpp/src/gandiva/gdv_function_stubs.h
@@ -46,13 +46,13 @@ using gdv_month_interval = int32_t;
 
 #ifdef GANDIVA_UNIT_TEST
 // unit tests may be compiled without O2, so inlining may not happen.
-#define GDV_FORCE_INLINE
+#  define GDV_FORCE_INLINE
 #else
-#ifdef _MSC_VER
-#define GDV_FORCE_INLINE __forceinline
-#else
-#define GDV_FORCE_INLINE inline __attribute__((always_inline))
-#endif
+#  ifdef _MSC_VER
+#    define GDV_FORCE_INLINE __forceinline
+#  else
+#    define GDV_FORCE_INLINE inline __attribute__((always_inline))
+#  endif
 #endif
 
 GANDIVA_EXPORT
diff --git a/cpp/src/gandiva/llvm_includes.h b/cpp/src/gandiva/llvm_includes.h
index 3d455591895c7..659c9d29de177 100644
--- a/cpp/src/gandiva/llvm_includes.h
+++ b/cpp/src/gandiva/llvm_includes.h
@@ -18,13 +18,13 @@
 #pragma once
 
 #if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4141)
-#pragma warning(disable : 4146)
-#pragma warning(disable : 4244)
-#pragma warning(disable : 4267)
-#pragma warning(disable : 4291)
-#pragma warning(disable : 4624)
+#  pragma warning(push)
+#  pragma warning(disable : 4141)
+#  pragma warning(disable : 4146)
+#  pragma warning(disable : 4244)
+#  pragma warning(disable : 4267)
+#  pragma warning(disable : 4291)
+#  pragma warning(disable : 4624)
 #endif
 
 #include <llvm/ExecutionEngine/ExecutionEngine.h>
@@ -33,11 +33,11 @@
 #include <llvm/IR/Module.h>
 
 #if LLVM_VERSION_MAJOR >= 10
-#define LLVM_ALIGN(alignment) (llvm::Align((alignment)))
+#  define LLVM_ALIGN(alignment) (llvm::Align((alignment)))
 #else
-#define LLVM_ALIGN(alignment) (alignment)
+#  define LLVM_ALIGN(alignment) (alignment)
 #endif
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
diff --git a/cpp/src/gandiva/precompiled/CMakeLists.txt b/cpp/src/gandiva/precompiled/CMakeLists.txt
index c092ff4fd011f..c2bc7fc02797e 100644
--- a/cpp/src/gandiva/precompiled/CMakeLists.txt
+++ b/cpp/src/gandiva/precompiled/CMakeLists.txt
@@ -53,8 +53,8 @@ add_custom_target(precompiled ALL DEPENDS ${GANDIVA_PRECOMPILED_BC_PATH}
                                           ${GANDIVA_PRECOMPILED_CC_PATH})
 
 # testing
-if(ARROW_BUILD_TESTS)
-  add_executable(gandiva-precompiled-test
+add_gandiva_test(precompiled-test
+                 SOURCES
                  ../context_helper.cc
                  bitmap_test.cc
                  bitmap.cc
@@ -75,16 +75,12 @@ if(ARROW_BUILD_TESTS)
                  decimal_ops_test.cc
                  decimal_ops.cc
                  ../decimal_type_util.cc
-                 ../decimal_xlarge.cc)
-  target_include_directories(gandiva-precompiled-test PRIVATE ${CMAKE_SOURCE_DIR}/src)
-  target_link_libraries(gandiva-precompiled-test PRIVATE ${ARROW_TEST_LINK_LIBS}
-                                                         Boost::headers)
-  target_compile_definitions(gandiva-precompiled-test PRIVATE GANDIVA_UNIT_TEST=1
-                                                              ARROW_STATIC GANDIVA_STATIC)
-  set(TEST_PATH "${EXECUTABLE_OUTPUT_PATH}/gandiva-precompiled-test")
-  add_test(gandiva-precompiled-test ${TEST_PATH})
-  set_property(TEST gandiva-precompiled-test
-               APPEND
-               PROPERTY LABELS "unittest;gandiva-tests")
-  add_dependencies(gandiva-tests gandiva-precompiled-test)
-endif()
+                 ../decimal_xlarge.cc
+                 EXTRA_INCLUDES
+                 ${CMAKE_SOURCE_DIR}/src
+                 EXTRA_LINK_LIBS
+                 Boost::headers
+                 DEFINITIONS
+                 GANDIVA_UNIT_TEST=1
+                 ARROW_STATIC
+                 GANDIVA_STATIC)
diff --git a/cpp/src/gandiva/precompiled/extended_math_ops.cc b/cpp/src/gandiva/precompiled/extended_math_ops.cc
index e5bd9b48e0e0b..c03db1d5f2b13 100644
--- a/cpp/src/gandiva/precompiled/extended_math_ops.cc
+++ b/cpp/src/gandiva/precompiled/extended_math_ops.cc
@@ -16,7 +16,7 @@
 // under the License.
 
 #ifndef M_PI
-#define M_PI 3.14159265358979323846
+#  define M_PI 3.14159265358979323846
 #endif
 
 #include "arrow/util/logging.h"
diff --git a/cpp/src/gandiva/precompiled/extended_math_ops_test.cc b/cpp/src/gandiva/precompiled/extended_math_ops_test.cc
index 3e9d8a5d2cd44..7170fad01d250 100644
--- a/cpp/src/gandiva/precompiled/extended_math_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/extended_math_ops_test.cc
@@ -16,7 +16,7 @@
 // under the License.
 
 #ifndef M_PI
-#define M_PI 3.14159265358979323846
+#  define M_PI 3.14159265358979323846
 #endif
 
 #include <gtest/gtest.h>
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index 83bbdee208562..c93b694fc777e 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -43,9 +43,9 @@ using gdv_day_time_interval = int64_t;
 
 #ifdef GANDIVA_UNIT_TEST
 // unit tests may be compiled without O2, so inlining may not happen.
-#define FORCE_INLINE
+#  define FORCE_INLINE
 #else
-#define FORCE_INLINE __attribute__((always_inline))
+#  define FORCE_INLINE __attribute__((always_inline))
 #endif
 
 extern "C" {
diff --git a/cpp/src/gandiva/selection_vector.cc b/cpp/src/gandiva/selection_vector.cc
index 39e9f5bc90228..8d5f9f4210af5 100644
--- a/cpp/src/gandiva/selection_vector.cc
+++ b/cpp/src/gandiva/selection_vector.cc
@@ -54,14 +54,14 @@ Status SelectionVector::PopulateFromBitMap(const uint8_t* bitmap, int64_t bitmap
 
     while (current_word != 0) {
 #if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4146)
+#  pragma warning(push)
+#  pragma warning(disable : 4146)
 #endif
       // MSVC warns about negating an unsigned type. We suppress it for now
       uint64_t highest_only = current_word & -current_word;
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
       int pos_in_word = arrow::bit_util::CountTrailingZeros(highest_only);
diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc
index 59eeb3d92f19a..a22d04ac28f47 100644
--- a/cpp/src/gandiva/tests/projector_test.cc
+++ b/cpp/src/gandiva/tests/projector_test.cc
@@ -16,7 +16,7 @@
 // under the License.
 
 #ifndef M_PI
-#define M_PI 3.14159265358979323846
+#  define M_PI 3.14159265358979323846
 #endif
 
 #include "gandiva/projector.h"
diff --git a/cpp/src/gandiva/tests/test_util.cc b/cpp/src/gandiva/tests/test_util.cc
index 2ee49ffae0ed6..584e27e7533b6 100644
--- a/cpp/src/gandiva/tests/test_util.cc
+++ b/cpp/src/gandiva/tests/test_util.cc
@@ -35,7 +35,7 @@ std::shared_ptr<Configuration> TestConfigWithIrDumping() {
 }
 
 #ifndef GANDIVA_EXTENSION_TEST_DIR
-#define GANDIVA_EXTENSION_TEST_DIR "."
+#  define GANDIVA_EXTENSION_TEST_DIR "."
 #endif
 
 std::string GetTestFunctionLLVMIRPath() {
diff --git a/cpp/src/gandiva/visibility.h b/cpp/src/gandiva/visibility.h
index 450b3056b2ec0..4961952c2974e 100644
--- a/cpp/src/gandiva/visibility.h
+++ b/cpp/src/gandiva/visibility.h
@@ -18,31 +18,31 @@
 #pragma once
 
 #if defined(_WIN32) || defined(__CYGWIN__)
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(push)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#ifdef GANDIVA_STATIC
-#define GANDIVA_EXPORT
-#elif defined(GANDIVA_EXPORTING)
-#define GANDIVA_EXPORT __declspec(dllexport)
-#else
-#define GANDIVA_EXPORT __declspec(dllimport)
-#endif
+#  ifdef GANDIVA_STATIC
+#    define GANDIVA_EXPORT
+#  elif defined(GANDIVA_EXPORTING)
+#    define GANDIVA_EXPORT __declspec(dllexport)
+#  else
+#    define GANDIVA_EXPORT __declspec(dllimport)
+#  endif
 
-#define GANDIVA_NO_EXPORT
+#  define GANDIVA_NO_EXPORT
 #else  // Not Windows
-#ifndef GANDIVA_EXPORT
-#define GANDIVA_EXPORT __attribute__((visibility("default")))
-#endif
-#ifndef GANDIVA_NO_EXPORT
-#define GANDIVA_NO_EXPORT __attribute__((visibility("hidden")))
-#endif
+#  ifndef GANDIVA_EXPORT
+#    define GANDIVA_EXPORT __attribute__((visibility("default")))
+#  endif
+#  ifndef GANDIVA_NO_EXPORT
+#    define GANDIVA_NO_EXPORT __attribute__((visibility("hidden")))
+#  endif
 #endif  // Non-Windows
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt
index 17574261d891d..b984ef77adbe0 100644
--- a/cpp/src/parquet/CMakeLists.txt
+++ b/cpp/src/parquet/CMakeLists.txt
@@ -165,7 +165,8 @@ set(PARQUET_SRCS
     column_reader.cc
     column_scanner.cc
     column_writer.cc
-    encoding.cc
+    decoder.cc
+    encoder.cc
     encryption/encryption.cc
     encryption/internal_file_decryptor.cc
     encryption/internal_file_encryptor.cc
diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
index 64030e0f90d54..724e6c44f2ed0 100644
--- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
+++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
@@ -16,9 +16,9 @@
 // under the License.
 
 #ifdef _MSC_VER
-#pragma warning(push)
+#  pragma warning(push)
 // Disable forcing value to bool warnings
-#pragma warning(disable : 4800)
+#  pragma warning(disable : 4800)
 #endif
 
 #include "gmock/gmock.h"
@@ -55,7 +55,7 @@
 #include "arrow/util/range.h"
 
 #ifdef ARROW_CSV
-#include "arrow/csv/api.h"
+#  include "arrow/csv/api.h"
 #endif
 
 #include "parquet/api/reader.h"
diff --git a/cpp/src/parquet/arrow/arrow_statistics_test.cc b/cpp/src/parquet/arrow/arrow_statistics_test.cc
index a19303c3dc03a..2638358f1ce7c 100644
--- a/cpp/src/parquet/arrow/arrow_statistics_test.cc
+++ b/cpp/src/parquet/arrow/arrow_statistics_test.cc
@@ -17,12 +17,14 @@
 
 #include "gtest/gtest.h"
 
+#include "arrow/array.h"
 #include "arrow/table.h"
 #include "arrow/testing/gtest_util.h"
 
 #include "parquet/api/reader.h"
 #include "parquet/api/writer.h"
 
+#include "parquet/arrow/reader.h"
 #include "parquet/arrow/schema.h"
 #include "parquet/arrow/writer.h"
 #include "parquet/file_writer.h"
@@ -179,4 +181,107 @@ TEST(StatisticsTest, TruncateOnlyHalfMinMax) {
   ASSERT_FALSE(stats->HasMinMax());
 }
 
+namespace {
+::arrow::Result<std::shared_ptr<::arrow::Array>> StatisticsReadArray(
+    std::shared_ptr<::arrow::DataType> data_type, const std::string& json) {
+  auto schema = ::arrow::schema({::arrow::field("column", data_type)});
+  auto array = ::arrow::ArrayFromJSON(data_type, json);
+  auto record_batch = ::arrow::RecordBatch::Make(schema, array->length(), {array});
+  ARROW_ASSIGN_OR_RAISE(auto sink, ::arrow::io::BufferOutputStream::Create());
+  const auto arrow_writer_properties =
+      parquet::ArrowWriterProperties::Builder().store_schema()->build();
+  ARROW_ASSIGN_OR_RAISE(
+      auto writer,
+      FileWriter::Open(*schema, ::arrow::default_memory_pool(), sink,
+                       default_writer_properties(), arrow_writer_properties));
+  ARROW_RETURN_NOT_OK(writer->WriteRecordBatch(*record_batch));
+  ARROW_RETURN_NOT_OK(writer->Close());
+  ARROW_ASSIGN_OR_RAISE(auto buffer, sink->Finish());
+
+  auto reader =
+      ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(buffer));
+  std::unique_ptr<FileReader> file_reader;
+  ARROW_RETURN_NOT_OK(
+      FileReader::Make(::arrow::default_memory_pool(), std::move(reader), &file_reader));
+  std::shared_ptr<::arrow::ChunkedArray> chunked_array;
+  ARROW_RETURN_NOT_OK(file_reader->ReadColumn(0, &chunked_array));
+  return chunked_array->chunk(0);
+}
+
+template <typename ArrowType, typename MinMaxType>
+void TestStatisticsReadArray(std::shared_ptr<::arrow::DataType> arrow_type) {
+  using ArrowArrayType = typename ::arrow::TypeTraits<ArrowType>::ArrayType;
+  using ArrowCType = typename ArrowType::c_type;
+  constexpr auto min = std::numeric_limits<ArrowCType>::min();
+  constexpr auto max = std::numeric_limits<ArrowCType>::max();
+
+  std::string json;
+  json += "[";
+  json += std::to_string(max);
+  json += ", null, ";
+  json += std::to_string(min);
+  json += ", ";
+  json += std::to_string(max);
+  json += "]";
+  ASSERT_OK_AND_ASSIGN(auto array, StatisticsReadArray(arrow_type, json));
+  auto typed_array = std::static_pointer_cast<ArrowArrayType>(array);
+  auto statistics = typed_array->statistics();
+  ASSERT_NE(nullptr, statistics);
+  ASSERT_EQ(true, statistics->null_count.has_value());
+  ASSERT_EQ(1, statistics->null_count.value());
+  ASSERT_EQ(false, statistics->distinct_count.has_value());
+  ASSERT_EQ(true, statistics->min.has_value());
+  ASSERT_EQ(true, std::holds_alternative<MinMaxType>(*statistics->min));
+  ASSERT_EQ(min, std::get<MinMaxType>(*statistics->min));
+  ASSERT_EQ(true, statistics->is_min_exact);
+  ASSERT_EQ(true, statistics->max.has_value());
+  ASSERT_EQ(true, std::holds_alternative<MinMaxType>(*statistics->max));
+  ASSERT_EQ(max, std::get<MinMaxType>(*statistics->max));
+  ASSERT_EQ(true, statistics->is_min_exact);
+}
+}  // namespace
+
+TEST(TestStatisticsRead, Int8) {
+  TestStatisticsReadArray<::arrow::Int8Type, int64_t>(::arrow::int8());
+}
+
+TEST(TestStatisticsRead, UInt8) {
+  TestStatisticsReadArray<::arrow::UInt8Type, uint64_t>(::arrow::uint8());
+}
+
+TEST(TestStatisticsRead, Int16) {
+  TestStatisticsReadArray<::arrow::Int16Type, int64_t>(::arrow::int16());
+}
+
+TEST(TestStatisticsRead, UInt16) {
+  TestStatisticsReadArray<::arrow::UInt16Type, uint64_t>(::arrow::uint16());
+}
+
+TEST(TestStatisticsRead, UInt32) {
+  TestStatisticsReadArray<::arrow::UInt32Type, uint64_t>(::arrow::uint32());
+}
+
+TEST(TestStatisticsRead, UInt64) {
+  TestStatisticsReadArray<::arrow::UInt64Type, uint64_t>(::arrow::uint64());
+}
+
+TEST(TestStatisticsRead, Date32) {
+  TestStatisticsReadArray<::arrow::Date32Type, int64_t>(::arrow::date32());
+}
+
+TEST(TestStatisticsRead, Time32) {
+  TestStatisticsReadArray<::arrow::Time32Type, int64_t>(
+      ::arrow::time32(::arrow::TimeUnit::MILLI));
+}
+
+TEST(TestStatisticsRead, Time64) {
+  TestStatisticsReadArray<::arrow::Time64Type, int64_t>(
+      ::arrow::time64(::arrow::TimeUnit::MICRO));
+}
+
+TEST(TestStatisticsRead, Duration) {
+  TestStatisticsReadArray<::arrow::DurationType, int64_t>(
+      ::arrow::duration(::arrow::TimeUnit::NANO));
+}
+
 }  // namespace parquet::arrow
diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc
index 285e2a597389d..4f57c3f4f56f7 100644
--- a/cpp/src/parquet/arrow/reader.cc
+++ b/cpp/src/parquet/arrow/reader.cc
@@ -485,8 +485,9 @@ class LeafReader : public ColumnReaderImpl {
         NextRowGroup();
       }
     }
-    RETURN_NOT_OK(
-        TransferColumnData(record_reader_.get(), field_, descr_, ctx_->pool, &out_));
+    RETURN_NOT_OK(TransferColumnData(record_reader_.get(),
+                                     input_->column_chunk_metadata(), field_, descr_,
+                                     ctx_.get(), &out_));
     return Status::OK();
     END_PARQUET_CATCH_EXCEPTIONS
   }
diff --git a/cpp/src/parquet/arrow/reader_internal.cc b/cpp/src/parquet/arrow/reader_internal.cc
index e5aef5a45b5f3..e6c2d95e1fbf7 100644
--- a/cpp/src/parquet/arrow/reader_internal.cc
+++ b/cpp/src/parquet/arrow/reader_internal.cc
@@ -319,26 +319,59 @@ void ReconstructChunksWithoutNulls(::arrow::ArrayVector* chunks) {
 }
 
 template <typename ArrowType, typename ParquetType>
-Status TransferInt(RecordReader* reader, MemoryPool* pool,
-                   const std::shared_ptr<Field>& field, Datum* out) {
+Status TransferInt(RecordReader* reader,
+                   std::unique_ptr<::parquet::ColumnChunkMetaData> metadata,
+                   const ReaderContext* ctx, const std::shared_ptr<Field>& field,
+                   Datum* out) {
   using ArrowCType = typename ArrowType::c_type;
   using ParquetCType = typename ParquetType::c_type;
   int64_t length = reader->values_written();
   ARROW_ASSIGN_OR_RAISE(auto data,
-                        ::arrow::AllocateBuffer(length * sizeof(ArrowCType), pool));
+                        ::arrow::AllocateBuffer(length * sizeof(ArrowCType), ctx->pool));
 
   auto values = reinterpret_cast<const ParquetCType*>(reader->values());
   auto out_ptr = reinterpret_cast<ArrowCType*>(data->mutable_data());
   std::copy(values, values + length, out_ptr);
+  int64_t null_count = 0;
+  std::vector<std::shared_ptr<Buffer>> buffers = {nullptr, std::move(data)};
   if (field->nullable()) {
-    *out = std::make_shared<ArrayType<ArrowType>>(field->type(), length, std::move(data),
-                                                  reader->ReleaseIsValid(),
-                                                  reader->null_count());
-  } else {
-    *out =
-        std::make_shared<ArrayType<ArrowType>>(field->type(), length, std::move(data),
-                                               /*null_bitmap=*/nullptr, /*null_count=*/0);
+    null_count = reader->null_count();
+    buffers[0] = reader->ReleaseIsValid();
+  }
+  auto array_data =
+      ::arrow::ArrayData::Make(field->type(), length, std::move(buffers), null_count);
+  auto array_statistics = std::make_shared<::arrow::ArrayStatistics>();
+  array_statistics->null_count = null_count;
+  auto statistics = metadata->statistics().get();
+  if (statistics) {
+    if (statistics->HasDistinctCount()) {
+      array_statistics->distinct_count = statistics->distinct_count();
+    }
+    if (statistics->HasMinMax()) {
+      auto typed_statistics =
+          static_cast<::parquet::TypedStatistics<ParquetType>*>(statistics);
+      const ArrowCType min = typed_statistics->min();
+      const ArrowCType max = typed_statistics->max();
+      if (std::is_signed<ArrowCType>::value) {
+        array_statistics->min = static_cast<int64_t>(min);
+        array_statistics->max = static_cast<int64_t>(max);
+      } else {
+        array_statistics->min = static_cast<uint64_t>(min);
+        array_statistics->max = static_cast<uint64_t>(max);
+      }
+      // We can assume that integer based min/max are always exact if
+      // they exist. Apache Parquet's "Statistics" has
+      // "is_min_value_exact" and "is_max_value_exact" but we can
+      // ignore them for integer based min/max.
+      //
+      // See also the discussion at dev@parquet.apache.org:
+      // https://lists.apache.org/thread/zfnmg5p51b7oylft5w5k4670wgkd4zv4
+      array_statistics->is_min_exact = true;
+      array_statistics->is_max_exact = true;
+    }
   }
+  array_data->statistics = std::move(array_statistics);
+  *out = std::make_shared<ArrayType<ArrowType>>(std::move(array_data));
   return Status::OK();
 }
 
@@ -728,21 +761,26 @@ Status TransferHalfFloat(RecordReader* reader, MemoryPool* pool,
 
 }  // namespace
 
-#define TRANSFER_INT32(ENUM, ArrowType)                                               \
-  case ::arrow::Type::ENUM: {                                                         \
-    Status s = TransferInt<ArrowType, Int32Type>(reader, pool, value_field, &result); \
-    RETURN_NOT_OK(s);                                                                 \
+#define TRANSFER_INT32(ENUM, ArrowType)                                            \
+  case ::arrow::Type::ENUM: {                                                      \
+    Status s = TransferInt<ArrowType, Int32Type>(reader, std::move(metadata), ctx, \
+                                                 value_field, &result);            \
+    RETURN_NOT_OK(s);                                                              \
   } break;
 
-#define TRANSFER_INT64(ENUM, ArrowType)                                               \
-  case ::arrow::Type::ENUM: {                                                         \
-    Status s = TransferInt<ArrowType, Int64Type>(reader, pool, value_field, &result); \
-    RETURN_NOT_OK(s);                                                                 \
+#define TRANSFER_INT64(ENUM, ArrowType)                                            \
+  case ::arrow::Type::ENUM: {                                                      \
+    Status s = TransferInt<ArrowType, Int64Type>(reader, std::move(metadata), ctx, \
+                                                 value_field, &result);            \
+    RETURN_NOT_OK(s);                                                              \
   } break;
 
-Status TransferColumnData(RecordReader* reader, const std::shared_ptr<Field>& value_field,
-                          const ColumnDescriptor* descr, MemoryPool* pool,
+Status TransferColumnData(RecordReader* reader,
+                          std::unique_ptr<::parquet::ColumnChunkMetaData> metadata,
+                          const std::shared_ptr<Field>& value_field,
+                          const ColumnDescriptor* descr, const ReaderContext* ctx,
                           std::shared_ptr<ChunkedArray>* out) {
+  auto pool = ctx->pool;
   Datum result;
   std::shared_ptr<ChunkedArray> chunked_result;
   switch (value_field->type()->id()) {
diff --git a/cpp/src/parquet/arrow/reader_internal.h b/cpp/src/parquet/arrow/reader_internal.h
index cf9dbb86577b5..fab56c888045d 100644
--- a/cpp/src/parquet/arrow/reader_internal.h
+++ b/cpp/src/parquet/arrow/reader_internal.h
@@ -66,7 +66,8 @@ class FileColumnIterator {
       : column_index_(column_index),
         reader_(reader),
         schema_(reader->metadata()->schema()),
-        row_groups_(row_groups.begin(), row_groups.end()) {}
+        row_groups_(row_groups.begin(), row_groups.end()),
+        row_group_index_(-1) {}
 
   virtual ~FileColumnIterator() {}
 
@@ -75,7 +76,8 @@ class FileColumnIterator {
       return nullptr;
     }
 
-    auto row_group_reader = reader_->RowGroup(row_groups_.front());
+    row_group_index_ = row_groups_.front();
+    auto row_group_reader = reader_->RowGroup(row_group_index_);
     row_groups_.pop_front();
     return row_group_reader->GetColumnPageReader(column_index_);
   }
@@ -86,23 +88,29 @@ class FileColumnIterator {
 
   std::shared_ptr<FileMetaData> metadata() const { return reader_->metadata(); }
 
+  std::unique_ptr<RowGroupMetaData> row_group_metadata() const {
+    return metadata()->RowGroup(row_group_index_);
+  }
+
+  std::unique_ptr<ColumnChunkMetaData> column_chunk_metadata() const {
+    return row_group_metadata()->ColumnChunk(column_index_);
+  }
+
   int column_index() const { return column_index_; }
 
+  int row_group_index() const { return row_group_index_; }
+
  protected:
   int column_index_;
   ParquetFileReader* reader_;
   const SchemaDescriptor* schema_;
   std::deque<int> row_groups_;
+  int row_group_index_;
 };
 
 using FileColumnIteratorFactory =
     std::function<FileColumnIterator*(int, ParquetFileReader*)>;
 
-Status TransferColumnData(::parquet::internal::RecordReader* reader,
-                          const std::shared_ptr<::arrow::Field>& value_field,
-                          const ColumnDescriptor* descr, ::arrow::MemoryPool* pool,
-                          std::shared_ptr<::arrow::ChunkedArray>* out);
-
 struct ReaderContext {
   ParquetFileReader* reader;
   ::arrow::MemoryPool* pool;
@@ -118,5 +126,11 @@ struct ReaderContext {
   }
 };
 
+Status TransferColumnData(::parquet::internal::RecordReader* reader,
+                          std::unique_ptr<::parquet::ColumnChunkMetaData> metadata,
+                          const std::shared_ptr<::arrow::Field>& value_field,
+                          const ColumnDescriptor* descr, const ReaderContext* ctx,
+                          std::shared_ptr<::arrow::ChunkedArray>* out);
+
 }  // namespace arrow
 }  // namespace parquet
diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/decoder.cc
similarity index 57%
rename from cpp/src/parquet/encoding.cc
rename to cpp/src/parquet/decoder.cc
index 16a1e249273f6..70810461605b1 100644
--- a/cpp/src/parquet/encoding.cc
+++ b/cpp/src/parquet/decoder.cc
@@ -28,23 +28,23 @@
 #include <vector>
 
 #include "arrow/array.h"
+#include "arrow/array/builder_binary.h"
 #include "arrow/array/builder_dict.h"
-#include "arrow/stl_allocator.h"
+#include "arrow/array/builder_primitive.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/bit_block_counter.h"
-#include "arrow/util/bit_run_reader.h"
 #include "arrow/util/bit_stream_utils_internal.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_ops.h"
-#include "arrow/util/bitmap_writer.h"
 #include "arrow/util/byte_stream_split_internal.h"
 #include "arrow/util/checked_cast.h"
-#include "arrow/util/hashing.h"
 #include "arrow/util/int_util_overflow.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/rle_encoding_internal.h"
+#include "arrow/util/spaced.h"
 #include "arrow/util/ubsan.h"
 #include "arrow/visit_data_inline.h"
+
 #include "parquet/exception.h"
 #include "parquet/platform.h"
 #include "parquet/schema.h"
@@ -57,901 +57,12 @@ using arrow::VisitNullBitmapInline;
 using arrow::internal::AddWithOverflow;
 using arrow::internal::BitBlockCounter;
 using arrow::internal::checked_cast;
-using arrow::internal::MultiplyWithOverflow;
-using arrow::internal::SafeSignedSubtract;
-using arrow::internal::SubtractWithOverflow;
 using arrow::util::SafeLoad;
 using arrow::util::SafeLoadAs;
-using std::string_view;
-
-template <typename T>
-using ArrowPoolVector = std::vector<T, ::arrow::stl::allocator<T>>;
 
 namespace parquet {
 namespace {
 
-// The Parquet spec isn't very clear whether ByteArray lengths are signed or
-// unsigned, but the Java implementation uses signed ints.
-constexpr size_t kMaxByteArraySize = std::numeric_limits<int32_t>::max();
-
-// ----------------------------------------------------------------------
-// Encoders
-// ----------------------------------------------------------------------
-
-class EncoderImpl : virtual public Encoder {
- public:
-  EncoderImpl(const ColumnDescriptor* descr, Encoding::type encoding, MemoryPool* pool)
-      : descr_(descr),
-        encoding_(encoding),
-        pool_(pool),
-        type_length_(descr ? descr->type_length() : -1) {}
-
-  Encoding::type encoding() const override { return encoding_; }
-
-  MemoryPool* memory_pool() const override { return pool_; }
-
- protected:
-  // For accessing type-specific metadata, like FIXED_LEN_BYTE_ARRAY
-  const ColumnDescriptor* descr_;
-  const Encoding::type encoding_;
-  MemoryPool* pool_;
-
-  /// Type length from descr
-  const int type_length_;
-};
-
-// ----------------------------------------------------------------------
-// Plain encoder implementation
-
-template <typename DType>
-class PlainEncoder : public EncoderImpl, virtual public TypedEncoder<DType> {
- public:
-  using T = typename DType::c_type;
-
-  explicit PlainEncoder(const ColumnDescriptor* descr, MemoryPool* pool)
-      : EncoderImpl(descr, Encoding::PLAIN, pool), sink_(pool) {}
-
-  int64_t EstimatedDataEncodedSize() override { return sink_.length(); }
-
-  std::shared_ptr<Buffer> FlushValues() override {
-    std::shared_ptr<Buffer> buffer;
-    PARQUET_THROW_NOT_OK(sink_.Finish(&buffer));
-    return buffer;
-  }
-
-  using TypedEncoder<DType>::Put;
-
-  void Put(const T* buffer, int num_values) override;
-
-  void Put(const ::arrow::Array& values) override;
-
-  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
-                 int64_t valid_bits_offset) override {
-    if (valid_bits != NULLPTR) {
-      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
-                                                                   this->memory_pool()));
-      T* data = buffer->template mutable_data_as<T>();
-      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
-          src, num_values, valid_bits, valid_bits_offset, data);
-      Put(data, num_valid_values);
-    } else {
-      Put(src, num_values);
-    }
-  }
-
-  void UnsafePutByteArray(const void* data, uint32_t length) {
-    DCHECK(length == 0 || data != nullptr) << "Value ptr cannot be NULL";
-    sink_.UnsafeAppend(&length, sizeof(uint32_t));
-    sink_.UnsafeAppend(data, static_cast<int64_t>(length));
-  }
-
-  void Put(const ByteArray& val) {
-    // Write the result to the output stream
-    const int64_t increment = static_cast<int64_t>(val.len + sizeof(uint32_t));
-    if (ARROW_PREDICT_FALSE(sink_.length() + increment > sink_.capacity())) {
-      PARQUET_THROW_NOT_OK(sink_.Reserve(increment));
-    }
-    UnsafePutByteArray(val.ptr, val.len);
-  }
-
- protected:
-  template <typename ArrayType>
-  void PutBinaryArray(const ArrayType& array) {
-    const int64_t total_bytes =
-        array.value_offset(array.length()) - array.value_offset(0);
-    PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes + array.length() * sizeof(uint32_t)));
-
-    PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<typename ArrayType::TypeClass>(
-        *array.data(),
-        [&](::std::string_view view) {
-          if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
-            return Status::Invalid(
-                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
-          }
-          UnsafePutByteArray(view.data(), static_cast<uint32_t>(view.size()));
-          return Status::OK();
-        },
-        []() { return Status::OK(); }));
-  }
-
-  ::arrow::BufferBuilder sink_;
-};
-
-template <typename DType>
-void PlainEncoder<DType>::Put(const T* buffer, int num_values) {
-  if (num_values > 0) {
-    PARQUET_THROW_NOT_OK(sink_.Append(buffer, num_values * sizeof(T)));
-  }
-}
-
-template <>
-inline void PlainEncoder<ByteArrayType>::Put(const ByteArray* src, int num_values) {
-  for (int i = 0; i < num_values; ++i) {
-    Put(src[i]);
-  }
-}
-
-template <typename ArrayType>
-void DirectPutImpl(const ::arrow::Array& values, ::arrow::BufferBuilder* sink) {
-  if (values.type_id() != ArrayType::TypeClass::type_id) {
-    std::string type_name = ArrayType::TypeClass::type_name();
-    throw ParquetException("direct put to " + type_name + " from " +
-                           values.type()->ToString() + " not supported");
-  }
-
-  using value_type = typename ArrayType::value_type;
-  constexpr auto value_size = sizeof(value_type);
-  auto raw_values = checked_cast<const ArrayType&>(values).raw_values();
-
-  if (values.null_count() == 0) {
-    // no nulls, just dump the data
-    PARQUET_THROW_NOT_OK(sink->Append(raw_values, values.length() * value_size));
-  } else {
-    PARQUET_THROW_NOT_OK(
-        sink->Reserve((values.length() - values.null_count()) * value_size));
-
-    for (int64_t i = 0; i < values.length(); i++) {
-      if (values.IsValid(i)) {
-        sink->UnsafeAppend(&raw_values[i], value_size);
-      }
-    }
-  }
-}
-
-template <>
-void PlainEncoder<Int32Type>::Put(const ::arrow::Array& values) {
-  DirectPutImpl<::arrow::Int32Array>(values, &sink_);
-}
-
-template <>
-void PlainEncoder<Int64Type>::Put(const ::arrow::Array& values) {
-  DirectPutImpl<::arrow::Int64Array>(values, &sink_);
-}
-
-template <>
-void PlainEncoder<Int96Type>::Put(const ::arrow::Array& values) {
-  ParquetException::NYI("direct put to Int96");
-}
-
-template <>
-void PlainEncoder<FloatType>::Put(const ::arrow::Array& values) {
-  DirectPutImpl<::arrow::FloatArray>(values, &sink_);
-}
-
-template <>
-void PlainEncoder<DoubleType>::Put(const ::arrow::Array& values) {
-  DirectPutImpl<::arrow::DoubleArray>(values, &sink_);
-}
-
-template <typename DType>
-void PlainEncoder<DType>::Put(const ::arrow::Array& values) {
-  ParquetException::NYI("direct put of " + values.type()->ToString());
-}
-
-void AssertBaseBinary(const ::arrow::Array& values) {
-  if (!::arrow::is_base_binary_like(values.type_id())) {
-    throw ParquetException("Only BaseBinaryArray and subclasses supported");
-  }
-}
-
-template <>
-inline void PlainEncoder<ByteArrayType>::Put(const ::arrow::Array& values) {
-  AssertBaseBinary(values);
-
-  if (::arrow::is_binary_like(values.type_id())) {
-    PutBinaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
-  } else {
-    DCHECK(::arrow::is_large_binary_like(values.type_id()));
-    PutBinaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
-  }
-}
-
-void AssertFixedSizeBinary(const ::arrow::Array& values, int type_length) {
-  if (!::arrow::is_fixed_size_binary(values.type_id())) {
-    throw ParquetException("Only FixedSizeBinaryArray and subclasses supported");
-  }
-  if (checked_cast<const ::arrow::FixedSizeBinaryType&>(*values.type()).byte_width() !=
-      type_length) {
-    throw ParquetException("Size mismatch: " + values.type()->ToString() +
-                           " should have been " + std::to_string(type_length) + " wide");
-  }
-}
-
-template <>
-inline void PlainEncoder<FLBAType>::Put(const ::arrow::Array& values) {
-  AssertFixedSizeBinary(values, descr_->type_length());
-  const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values);
-
-  if (data.null_count() == 0) {
-    // no nulls, just dump the data
-    PARQUET_THROW_NOT_OK(
-        sink_.Append(data.raw_values(), data.length() * data.byte_width()));
-  } else {
-    const int64_t total_bytes =
-        data.length() * data.byte_width() - data.null_count() * data.byte_width();
-    PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes));
-    for (int64_t i = 0; i < data.length(); i++) {
-      if (data.IsValid(i)) {
-        sink_.UnsafeAppend(data.Value(i), data.byte_width());
-      }
-    }
-  }
-}
-
-template <>
-inline void PlainEncoder<FLBAType>::Put(const FixedLenByteArray* src, int num_values) {
-  if (descr_->type_length() == 0) {
-    return;
-  }
-  for (int i = 0; i < num_values; ++i) {
-    // Write the result to the output stream
-    DCHECK(src[i].ptr != nullptr) << "Value ptr cannot be NULL";
-    PARQUET_THROW_NOT_OK(sink_.Append(src[i].ptr, descr_->type_length()));
-  }
-}
-
-template <>
-class PlainEncoder<BooleanType> : public EncoderImpl, virtual public BooleanEncoder {
- public:
-  explicit PlainEncoder(const ColumnDescriptor* descr, MemoryPool* pool)
-      : EncoderImpl(descr, Encoding::PLAIN, pool), sink_(pool) {}
-
-  int64_t EstimatedDataEncodedSize() override;
-  std::shared_ptr<Buffer> FlushValues() override;
-
-  void Put(const bool* src, int num_values) override;
-
-  void Put(const std::vector<bool>& src, int num_values) override;
-
-  void PutSpaced(const bool* src, int num_values, const uint8_t* valid_bits,
-                 int64_t valid_bits_offset) override {
-    if (valid_bits != NULLPTR) {
-      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
-                                                                   this->memory_pool()));
-      T* data = buffer->mutable_data_as<T>();
-      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
-          src, num_values, valid_bits, valid_bits_offset, data);
-      Put(data, num_valid_values);
-    } else {
-      Put(src, num_values);
-    }
-  }
-
-  void Put(const ::arrow::Array& values) override {
-    if (values.type_id() != ::arrow::Type::BOOL) {
-      throw ParquetException("direct put to boolean from " + values.type()->ToString() +
-                             " not supported");
-    }
-    const auto& data = checked_cast<const ::arrow::BooleanArray&>(values);
-
-    if (data.null_count() == 0) {
-      // no nulls, just dump the data
-      PARQUET_THROW_NOT_OK(sink_.Reserve(data.length()));
-      sink_.UnsafeAppend(data.data()->GetValues<uint8_t>(1, 0), data.offset(),
-                         data.length());
-    } else {
-      PARQUET_THROW_NOT_OK(sink_.Reserve(data.length() - data.null_count()));
-      for (int64_t i = 0; i < data.length(); i++) {
-        if (data.IsValid(i)) {
-          sink_.UnsafeAppend(data.Value(i));
-        }
-      }
-    }
-  }
-
- private:
-  ::arrow::TypedBufferBuilder<bool> sink_;
-
-  template <typename SequenceType>
-  void PutImpl(const SequenceType& src, int num_values);
-};
-
-template <typename SequenceType>
-void PlainEncoder<BooleanType>::PutImpl(const SequenceType& src, int num_values) {
-  PARQUET_THROW_NOT_OK(sink_.Reserve(num_values));
-  for (int i = 0; i < num_values; ++i) {
-    sink_.UnsafeAppend(src[i]);
-  }
-}
-
-int64_t PlainEncoder<BooleanType>::EstimatedDataEncodedSize() {
-  return ::arrow::bit_util::BytesForBits(sink_.length());
-}
-
-std::shared_ptr<Buffer> PlainEncoder<BooleanType>::FlushValues() {
-  std::shared_ptr<Buffer> buffer;
-  PARQUET_THROW_NOT_OK(sink_.Finish(&buffer));
-  return buffer;
-}
-
-void PlainEncoder<BooleanType>::Put(const bool* src, int num_values) {
-  PutImpl(src, num_values);
-}
-
-void PlainEncoder<BooleanType>::Put(const std::vector<bool>& src, int num_values) {
-  PutImpl(src, num_values);
-}
-
-// ----------------------------------------------------------------------
-// DictEncoder<T> implementations
-
-template <typename DType>
-struct DictEncoderTraits {
-  using c_type = typename DType::c_type;
-  using MemoTableType = ::arrow::internal::ScalarMemoTable<c_type>;
-};
-
-template <>
-struct DictEncoderTraits<ByteArrayType> {
-  using MemoTableType = ::arrow::internal::BinaryMemoTable<::arrow::BinaryBuilder>;
-};
-
-template <>
-struct DictEncoderTraits<FLBAType> {
-  using MemoTableType = ::arrow::internal::BinaryMemoTable<::arrow::BinaryBuilder>;
-};
-
-// Initially 1024 elements
-static constexpr int32_t kInitialHashTableSize = 1 << 10;
-
-int RlePreserveBufferSize(int num_values, int bit_width) {
-  // Note: because of the way RleEncoder::CheckBufferFull()
-  // is called, we have to reserve an extra "RleEncoder::MinBufferSize"
-  // bytes. These extra bytes won't be used but not reserving them
-  // would cause the encoder to fail.
-  return ::arrow::util::RleEncoder::MaxBufferSize(bit_width, num_values) +
-         ::arrow::util::RleEncoder::MinBufferSize(bit_width);
-}
-
-/// See the dictionary encoding section of
-/// https://github.com/Parquet/parquet-format.  The encoding supports
-/// streaming encoding. Values are encoded as they are added while the
-/// dictionary is being constructed. At any time, the buffered values
-/// can be written out with the current dictionary size. More values
-/// can then be added to the encoder, including new dictionary
-/// entries.
-template <typename DType>
-class DictEncoderImpl : public EncoderImpl, virtual public DictEncoder<DType> {
-  using MemoTableType = typename DictEncoderTraits<DType>::MemoTableType;
-
- public:
-  typedef typename DType::c_type T;
-
-  /// In data page, the bit width used to encode the entry
-  /// ids stored as 1 byte (max bit width = 32).
-  constexpr static int32_t kDataPageBitWidthBytes = 1;
-
-  explicit DictEncoderImpl(const ColumnDescriptor* desc, MemoryPool* pool)
-      : EncoderImpl(desc, Encoding::RLE_DICTIONARY, pool),
-        buffered_indices_(::arrow::stl::allocator<int32_t>(pool)),
-        dict_encoded_size_(0),
-        memo_table_(pool, kInitialHashTableSize) {}
-
-  ~DictEncoderImpl() override = default;
-
-  int dict_encoded_size() const override { return dict_encoded_size_; }
-
-  int WriteIndices(uint8_t* buffer, int buffer_len) override {
-    // Write bit width in first byte
-    *buffer = static_cast<uint8_t>(bit_width());
-    ++buffer;
-    --buffer_len;
-
-    ::arrow::util::RleEncoder encoder(buffer, buffer_len, bit_width());
-
-    for (int32_t index : buffered_indices_) {
-      if (ARROW_PREDICT_FALSE(!encoder.Put(index))) return -1;
-    }
-    encoder.Flush();
-
-    ClearIndices();
-    return kDataPageBitWidthBytes + encoder.len();
-  }
-
-  /// Returns a conservative estimate of the number of bytes needed to encode the buffered
-  /// indices. Used to size the buffer passed to WriteIndices().
-  int64_t EstimatedDataEncodedSize() override {
-    return kDataPageBitWidthBytes +
-           RlePreserveBufferSize(static_cast<int>(buffered_indices_.size()), bit_width());
-  }
-
-  /// The minimum bit width required to encode the currently buffered indices.
-  int bit_width() const override {
-    if (ARROW_PREDICT_FALSE(num_entries() == 0)) return 0;
-    if (ARROW_PREDICT_FALSE(num_entries() == 1)) return 1;
-    return bit_util::Log2(num_entries());
-  }
-
-  /// Encode value. Note that this does not actually write any data, just
-  /// buffers the value's index to be written later.
-  inline void Put(const T& value);
-
-  // Not implemented for other data types
-  inline void PutByteArray(const void* ptr, int32_t length);
-
-  void Put(const T* src, int num_values) override {
-    for (int32_t i = 0; i < num_values; i++) {
-      Put(SafeLoad(src + i));
-    }
-  }
-
-  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
-                 int64_t valid_bits_offset) override {
-    ::arrow::internal::VisitSetBitRunsVoid(valid_bits, valid_bits_offset, num_values,
-                                           [&](int64_t position, int64_t length) {
-                                             for (int64_t i = 0; i < length; i++) {
-                                               Put(SafeLoad(src + i + position));
-                                             }
-                                           });
-  }
-
-  using TypedEncoder<DType>::Put;
-
-  void Put(const ::arrow::Array& values) override;
-  void PutDictionary(const ::arrow::Array& values) override;
-
-  template <typename ArrowType, typename T = typename ArrowType::c_type>
-  void PutIndicesTyped(const ::arrow::Array& data) {
-    auto values = data.data()->GetValues<T>(1);
-    size_t buffer_position = buffered_indices_.size();
-    buffered_indices_.resize(buffer_position +
-                             static_cast<size_t>(data.length() - data.null_count()));
-    ::arrow::internal::VisitSetBitRunsVoid(
-        data.null_bitmap_data(), data.offset(), data.length(),
-        [&](int64_t position, int64_t length) {
-          for (int64_t i = 0; i < length; ++i) {
-            buffered_indices_[buffer_position++] =
-                static_cast<int32_t>(values[i + position]);
-          }
-        });
-  }
-
-  void PutIndices(const ::arrow::Array& data) override {
-    switch (data.type()->id()) {
-      case ::arrow::Type::UINT8:
-      case ::arrow::Type::INT8:
-        return PutIndicesTyped<::arrow::UInt8Type>(data);
-      case ::arrow::Type::UINT16:
-      case ::arrow::Type::INT16:
-        return PutIndicesTyped<::arrow::UInt16Type>(data);
-      case ::arrow::Type::UINT32:
-      case ::arrow::Type::INT32:
-        return PutIndicesTyped<::arrow::UInt32Type>(data);
-      case ::arrow::Type::UINT64:
-      case ::arrow::Type::INT64:
-        return PutIndicesTyped<::arrow::UInt64Type>(data);
-      default:
-        throw ParquetException("Passed non-integer array to PutIndices");
-    }
-  }
-
-  std::shared_ptr<Buffer> FlushValues() override {
-    std::shared_ptr<ResizableBuffer> buffer =
-        AllocateBuffer(this->pool_, EstimatedDataEncodedSize());
-    int result_size = WriteIndices(buffer->mutable_data(),
-                                   static_cast<int>(EstimatedDataEncodedSize()));
-    PARQUET_THROW_NOT_OK(buffer->Resize(result_size, false));
-    return buffer;
-  }
-
-  /// Writes out the encoded dictionary to buffer. buffer must be preallocated to
-  /// dict_encoded_size() bytes.
-  void WriteDict(uint8_t* buffer) const override;
-
-  /// The number of entries in the dictionary.
-  int num_entries() const override { return memo_table_.size(); }
-
- private:
-  /// Clears all the indices (but leaves the dictionary).
-  void ClearIndices() { buffered_indices_.clear(); }
-
-  /// Indices that have not yet be written out by WriteIndices().
-  ArrowPoolVector<int32_t> buffered_indices_;
-
-  template <typename ArrayType>
-  void PutBinaryArray(const ArrayType& array) {
-    PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<typename ArrayType::TypeClass>(
-        *array.data(),
-        [&](::std::string_view view) {
-          if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
-            return Status::Invalid(
-                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
-          }
-          PutByteArray(view.data(), static_cast<uint32_t>(view.size()));
-          return Status::OK();
-        },
-        []() { return Status::OK(); }));
-  }
-
-  template <typename ArrayType>
-  void PutBinaryDictionaryArray(const ArrayType& array) {
-    DCHECK_EQ(array.null_count(), 0);
-    for (int64_t i = 0; i < array.length(); i++) {
-      auto v = array.GetView(i);
-      if (ARROW_PREDICT_FALSE(v.size() > kMaxByteArraySize)) {
-        throw ParquetException(
-            "Parquet cannot store strings with size 2GB or more, got: ", v.size());
-      }
-      dict_encoded_size_ += static_cast<int>(v.size() + sizeof(uint32_t));
-      int32_t unused_memo_index;
-      PARQUET_THROW_NOT_OK(memo_table_.GetOrInsert(
-          v.data(), static_cast<int32_t>(v.size()), &unused_memo_index));
-    }
-  }
-
-  /// The number of bytes needed to encode the dictionary.
-  int dict_encoded_size_;
-
-  MemoTableType memo_table_;
-};
-
-template <typename DType>
-void DictEncoderImpl<DType>::WriteDict(uint8_t* buffer) const {
-  // For primitive types, only a memcpy
-  DCHECK_EQ(static_cast<size_t>(dict_encoded_size_), sizeof(T) * memo_table_.size());
-  memo_table_.CopyValues(0 /* start_pos */, reinterpret_cast<T*>(buffer));
-}
-
-// ByteArray and FLBA already have the dictionary encoded in their data heaps
-template <>
-void DictEncoderImpl<ByteArrayType>::WriteDict(uint8_t* buffer) const {
-  memo_table_.VisitValues(0, [&buffer](::std::string_view v) {
-    uint32_t len = static_cast<uint32_t>(v.length());
-    memcpy(buffer, &len, sizeof(len));
-    buffer += sizeof(len);
-    memcpy(buffer, v.data(), len);
-    buffer += len;
-  });
-}
-
-template <>
-void DictEncoderImpl<FLBAType>::WriteDict(uint8_t* buffer) const {
-  memo_table_.VisitValues(0, [&](::std::string_view v) {
-    DCHECK_EQ(v.length(), static_cast<size_t>(type_length_));
-    memcpy(buffer, v.data(), type_length_);
-    buffer += type_length_;
-  });
-}
-
-template <typename DType>
-inline void DictEncoderImpl<DType>::Put(const T& v) {
-  // Put() implementation for primitive types
-  auto on_found = [](int32_t memo_index) {};
-  auto on_not_found = [this](int32_t memo_index) {
-    dict_encoded_size_ += static_cast<int>(sizeof(T));
-  };
-
-  int32_t memo_index;
-  PARQUET_THROW_NOT_OK(memo_table_.GetOrInsert(v, on_found, on_not_found, &memo_index));
-  buffered_indices_.push_back(memo_index);
-}
-
-template <typename DType>
-inline void DictEncoderImpl<DType>::PutByteArray(const void* ptr, int32_t length) {
-  DCHECK(false);
-}
-
-template <>
-inline void DictEncoderImpl<ByteArrayType>::PutByteArray(const void* ptr,
-                                                         int32_t length) {
-  static const uint8_t empty[] = {0};
-
-  auto on_found = [](int32_t memo_index) {};
-  auto on_not_found = [&](int32_t memo_index) {
-    dict_encoded_size_ += static_cast<int>(length + sizeof(uint32_t));
-  };
-
-  DCHECK(ptr != nullptr || length == 0);
-  ptr = (ptr != nullptr) ? ptr : empty;
-  int32_t memo_index;
-  PARQUET_THROW_NOT_OK(
-      memo_table_.GetOrInsert(ptr, length, on_found, on_not_found, &memo_index));
-  buffered_indices_.push_back(memo_index);
-}
-
-template <>
-inline void DictEncoderImpl<ByteArrayType>::Put(const ByteArray& val) {
-  return PutByteArray(val.ptr, static_cast<int32_t>(val.len));
-}
-
-template <>
-inline void DictEncoderImpl<FLBAType>::Put(const FixedLenByteArray& v) {
-  static const uint8_t empty[] = {0};
-
-  auto on_found = [](int32_t memo_index) {};
-  auto on_not_found = [this](int32_t memo_index) { dict_encoded_size_ += type_length_; };
-
-  DCHECK(v.ptr != nullptr || type_length_ == 0);
-  const void* ptr = (v.ptr != nullptr) ? v.ptr : empty;
-  int32_t memo_index;
-  PARQUET_THROW_NOT_OK(
-      memo_table_.GetOrInsert(ptr, type_length_, on_found, on_not_found, &memo_index));
-  buffered_indices_.push_back(memo_index);
-}
-
-template <>
-void DictEncoderImpl<Int96Type>::Put(const ::arrow::Array& values) {
-  ParquetException::NYI("Direct put to Int96");
-}
-
-template <>
-void DictEncoderImpl<Int96Type>::PutDictionary(const ::arrow::Array& values) {
-  ParquetException::NYI("Direct put to Int96");
-}
-
-template <typename DType>
-void DictEncoderImpl<DType>::Put(const ::arrow::Array& values) {
-  using ArrayType = typename ::arrow::CTypeTraits<typename DType::c_type>::ArrayType;
-  const auto& data = checked_cast<const ArrayType&>(values);
-  if (data.null_count() == 0) {
-    // no nulls, just dump the data
-    for (int64_t i = 0; i < data.length(); i++) {
-      Put(data.Value(i));
-    }
-  } else {
-    for (int64_t i = 0; i < data.length(); i++) {
-      if (data.IsValid(i)) {
-        Put(data.Value(i));
-      }
-    }
-  }
-}
-
-template <>
-void DictEncoderImpl<FLBAType>::Put(const ::arrow::Array& values) {
-  AssertFixedSizeBinary(values, type_length_);
-  const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values);
-  if (data.null_count() == 0) {
-    // no nulls, just dump the data
-    for (int64_t i = 0; i < data.length(); i++) {
-      Put(FixedLenByteArray(data.Value(i)));
-    }
-  } else {
-    std::vector<uint8_t> empty(type_length_, 0);
-    for (int64_t i = 0; i < data.length(); i++) {
-      if (data.IsValid(i)) {
-        Put(FixedLenByteArray(data.Value(i)));
-      }
-    }
-  }
-}
-
-template <>
-void DictEncoderImpl<ByteArrayType>::Put(const ::arrow::Array& values) {
-  AssertBaseBinary(values);
-  if (::arrow::is_binary_like(values.type_id())) {
-    PutBinaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
-  } else {
-    DCHECK(::arrow::is_large_binary_like(values.type_id()));
-    PutBinaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
-  }
-}
-
-template <typename DType>
-void AssertCanPutDictionary(DictEncoderImpl<DType>* encoder, const ::arrow::Array& dict) {
-  if (dict.null_count() > 0) {
-    throw ParquetException("Inserted dictionary cannot contain nulls");
-  }
-
-  if (encoder->num_entries() > 0) {
-    throw ParquetException("Can only call PutDictionary on an empty DictEncoder");
-  }
-}
-
-template <typename DType>
-void DictEncoderImpl<DType>::PutDictionary(const ::arrow::Array& values) {
-  AssertCanPutDictionary(this, values);
-
-  using ArrayType = typename ::arrow::CTypeTraits<typename DType::c_type>::ArrayType;
-  const auto& data = checked_cast<const ArrayType&>(values);
-
-  dict_encoded_size_ += static_cast<int>(sizeof(typename DType::c_type) * data.length());
-  for (int64_t i = 0; i < data.length(); i++) {
-    int32_t unused_memo_index;
-    PARQUET_THROW_NOT_OK(memo_table_.GetOrInsert(data.Value(i), &unused_memo_index));
-  }
-}
-
-template <>
-void DictEncoderImpl<FLBAType>::PutDictionary(const ::arrow::Array& values) {
-  AssertFixedSizeBinary(values, type_length_);
-  AssertCanPutDictionary(this, values);
-
-  const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values);
-
-  dict_encoded_size_ += static_cast<int>(type_length_ * data.length());
-  for (int64_t i = 0; i < data.length(); i++) {
-    int32_t unused_memo_index;
-    PARQUET_THROW_NOT_OK(
-        memo_table_.GetOrInsert(data.Value(i), type_length_, &unused_memo_index));
-  }
-}
-
-template <>
-void DictEncoderImpl<ByteArrayType>::PutDictionary(const ::arrow::Array& values) {
-  AssertBaseBinary(values);
-  AssertCanPutDictionary(this, values);
-
-  if (::arrow::is_binary_like(values.type_id())) {
-    PutBinaryDictionaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
-  } else {
-    DCHECK(::arrow::is_large_binary_like(values.type_id()));
-    PutBinaryDictionaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
-  }
-}
-
-// ----------------------------------------------------------------------
-// ByteStreamSplitEncoder<T> implementations
-
-// Common base class for all types
-
-template <typename DType>
-class ByteStreamSplitEncoderBase : public EncoderImpl,
-                                   virtual public TypedEncoder<DType> {
- public:
-  using T = typename DType::c_type;
-  using TypedEncoder<DType>::Put;
-
-  ByteStreamSplitEncoderBase(const ColumnDescriptor* descr, int byte_width,
-                             ::arrow::MemoryPool* pool)
-      : EncoderImpl(descr, Encoding::BYTE_STREAM_SPLIT, pool),
-        sink_{pool},
-        byte_width_(byte_width),
-        num_values_in_buffer_{0} {}
-
-  int64_t EstimatedDataEncodedSize() override { return sink_.length(); }
-
-  std::shared_ptr<Buffer> FlushValues() override {
-    if (byte_width_ == 1) {
-      // Special-cased fast path
-      PARQUET_ASSIGN_OR_THROW(auto buf, sink_.Finish());
-      return buf;
-    }
-    auto output_buffer = AllocateBuffer(this->memory_pool(), EstimatedDataEncodedSize());
-    uint8_t* output_buffer_raw = output_buffer->mutable_data();
-    const uint8_t* raw_values = sink_.data();
-    ::arrow::util::internal::ByteStreamSplitEncode(
-        raw_values, /*width=*/byte_width_, num_values_in_buffer_, output_buffer_raw);
-    sink_.Reset();
-    num_values_in_buffer_ = 0;
-    return output_buffer;
-  }
-
-  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
-                 int64_t valid_bits_offset) override {
-    if (valid_bits != NULLPTR) {
-      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
-                                                                   this->memory_pool()));
-      T* data = buffer->template mutable_data_as<T>();
-      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
-          src, num_values, valid_bits, valid_bits_offset, data);
-      Put(data, num_valid_values);
-    } else {
-      Put(src, num_values);
-    }
-  }
-
- protected:
-  ::arrow::BufferBuilder sink_;
-  // Required because type_length_ is only filled in for FLBA
-  const int byte_width_;
-  int64_t num_values_in_buffer_;
-};
-
-// BYTE_STREAM_SPLIT encoder implementation for FLOAT, DOUBLE, INT32, INT64
-
-template <typename DType>
-class ByteStreamSplitEncoder : public ByteStreamSplitEncoderBase<DType> {
- public:
-  using T = typename DType::c_type;
-  using ArrowType = typename EncodingTraits<DType>::ArrowType;
-
-  ByteStreamSplitEncoder(const ColumnDescriptor* descr,
-                         ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
-      : ByteStreamSplitEncoderBase<DType>(descr,
-                                          /*byte_width=*/static_cast<int>(sizeof(T)),
-                                          pool) {}
-
-  // Inherit Put(const std::vector<T>&...)
-  using TypedEncoder<DType>::Put;
-
-  void Put(const T* buffer, int num_values) override {
-    if (num_values > 0) {
-      PARQUET_THROW_NOT_OK(
-          this->sink_.Append(reinterpret_cast<const uint8_t*>(buffer),
-                             num_values * static_cast<int64_t>(sizeof(T))));
-      this->num_values_in_buffer_ += num_values;
-    }
-  }
-
-  void Put(const ::arrow::Array& values) override {
-    if (values.type_id() != ArrowType::type_id) {
-      throw ParquetException(std::string() + "direct put from " +
-                             values.type()->ToString() + " not supported");
-    }
-    const auto& data = *values.data();
-    this->PutSpaced(data.GetValues<typename ArrowType::c_type>(1),
-                    static_cast<int>(data.length), data.GetValues<uint8_t>(0, 0),
-                    data.offset);
-  }
-};
-
-// BYTE_STREAM_SPLIT encoder implementation for FLBA
-
-template <>
-class ByteStreamSplitEncoder<FLBAType> : public ByteStreamSplitEncoderBase<FLBAType> {
- public:
-  using DType = FLBAType;
-  using T = FixedLenByteArray;
-  using ArrowType = ::arrow::FixedSizeBinaryArray;
-
-  ByteStreamSplitEncoder(const ColumnDescriptor* descr,
-                         ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
-      : ByteStreamSplitEncoderBase<DType>(descr,
-                                          /*byte_width=*/descr->type_length(), pool) {}
-
-  // Inherit Put(const std::vector<T>&...)
-  using TypedEncoder<DType>::Put;
-
-  void Put(const T* buffer, int num_values) override {
-    if (byte_width_ > 0) {
-      const int64_t total_bytes = static_cast<int64_t>(num_values) * byte_width_;
-      PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes));
-      for (int i = 0; i < num_values; ++i) {
-        // Write the result to the output stream
-        DCHECK(buffer[i].ptr != nullptr) << "Value ptr cannot be NULL";
-        sink_.UnsafeAppend(buffer[i].ptr, byte_width_);
-      }
-    }
-    this->num_values_in_buffer_ += num_values;
-  }
-
-  void Put(const ::arrow::Array& values) override {
-    AssertFixedSizeBinary(values, byte_width_);
-    const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values);
-    if (data.null_count() == 0) {
-      // no nulls, just buffer the data
-      PARQUET_THROW_NOT_OK(sink_.Append(data.raw_values(), data.length() * byte_width_));
-      this->num_values_in_buffer_ += data.length();
-    } else {
-      const int64_t num_values = data.length() - data.null_count();
-      const int64_t total_bytes = num_values * byte_width_;
-      PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes));
-      // TODO use VisitSetBitRunsVoid
-      for (int64_t i = 0; i < data.length(); i++) {
-        if (data.IsValid(i)) {
-          sink_.UnsafeAppend(data.Value(i), byte_width_);
-        }
-      }
-      this->num_values_in_buffer_ += num_values;
-    }
-  }
-};
-
-// ----------------------------------------------------------------------
-// Decoders
-// ----------------------------------------------------------------------
-
 class DecoderImpl : virtual public Decoder {
  public:
   void SetData(int num_values, const uint8_t* data, int len) override {
@@ -978,9 +89,35 @@ class DecoderImpl : virtual public Decoder {
 };
 
 template <typename DType>
-class PlainDecoder : public DecoderImpl, virtual public TypedDecoder<DType> {
+class TypedDecoderImpl : virtual public TypedDecoder<DType> {
  public:
   using T = typename DType::c_type;
+
+  int DecodeSpaced(T* buffer, int num_values, int null_count, const uint8_t* valid_bits,
+                   int64_t valid_bits_offset) override {
+    if (null_count > 0) {
+      int values_to_read = num_values - null_count;
+      int values_read = this->Decode(buffer, values_to_read);
+      if (values_read != values_to_read) {
+        throw ParquetException("Number of values / definition_levels read did not match");
+      }
+
+      return ::arrow::util::internal::SpacedExpand<T>(buffer, num_values, null_count,
+                                                      valid_bits, valid_bits_offset);
+    } else {
+      return this->Decode(buffer, num_values);
+    }
+  }
+};
+
+// ----------------------------------------------------------------------
+// PLAIN decoder
+
+template <typename DType>
+class PlainDecoder : public DecoderImpl, virtual public TypedDecoderImpl<DType> {
+ public:
+  using T = typename DType::c_type;
+
   explicit PlainDecoder(const ColumnDescriptor* descr);
 
   int Decode(T* buffer, int max_values) override;
@@ -1156,7 +293,11 @@ int PlainDecoder<DType>::Decode(T* buffer, int max_values) {
   return max_values;
 }
 
-class PlainBooleanDecoder : public DecoderImpl, virtual public BooleanDecoder {
+// PLAIN decoder implementation for BOOLEAN
+
+class PlainBooleanDecoder : public DecoderImpl,
+                            virtual public TypedDecoderImpl<BooleanType>,
+                            virtual public BooleanDecoder {
  public:
   explicit PlainBooleanDecoder(const ColumnDescriptor* descr);
   void SetData(int num_values, const uint8_t* data, int len) override;
@@ -1273,6 +414,8 @@ int PlainBooleanDecoder::Decode(bool* buffer, int max_values) {
   return max_values;
 }
 
+// PLAIN decoder implementation for FIXED_LEN_BYTE_ARRAY and BYTE_ARRAY
+
 // A helper class to abstract away differences between EncodingTraits<DType>::Accumulator
 // for ByteArrayType and FLBAType.
 template <typename DType>
@@ -1592,7 +735,7 @@ class PlainFLBADecoder : public PlainDecoder<FLBAType>, virtual public FLBADecod
 };
 
 // ----------------------------------------------------------------------
-// Dictionary encoding and decoding
+// Dictionary decoding
 
 template <typename Type>
 class DictDecoderImpl : public DecoderImpl, virtual public DictDecoder<Type> {
@@ -2167,329 +1310,42 @@ class DictByteArrayDecoderImpl : public DictDecoderImpl<ByteArrayType>,
         ++num_appended;
       }
     }
-    *out_num_values = values_decoded;
-    return Status::OK();
-  }
-
-  template <typename BuilderType>
-  Status DecodeArrowNonNull(int num_values, BuilderType* builder, int* out_num_values) {
-    constexpr int32_t kBufferSize = 2048;
-    int32_t indices[kBufferSize];
-
-    RETURN_NOT_OK(builder->Reserve(num_values));
-
-    const auto* dict_values = dictionary_->data_as<ByteArray>();
-
-    int values_decoded = 0;
-    while (values_decoded < num_values) {
-      int32_t batch_size = std::min<int32_t>(kBufferSize, num_values - values_decoded);
-      int num_indices = idx_decoder_.GetBatch(indices, batch_size);
-      if (num_indices == 0) ParquetException::EofException();
-      for (int i = 0; i < num_indices; ++i) {
-        auto idx = indices[i];
-        RETURN_NOT_OK(IndexInBounds(idx));
-        const auto& val = dict_values[idx];
-        RETURN_NOT_OK(builder->Append(val.ptr, val.len));
-      }
-      values_decoded += num_indices;
-    }
-    *out_num_values = values_decoded;
-    return Status::OK();
-  }
-};
-
-// ----------------------------------------------------------------------
-// DeltaBitPackEncoder
-
-/// DeltaBitPackEncoder is an encoder for the DeltaBinary Packing format
-/// as per the parquet spec. See:
-/// https://github.com/apache/parquet-format/blob/master/Encodings.md#delta-encoding-delta_binary_packed--5
-///
-/// Consists of a header followed by blocks of delta encoded values binary packed.
-///
-///  Format
-///    [header] [block 1] [block 2] ... [block N]
-///
-///  Header
-///    [block size] [number of mini blocks per block] [total value count] [first value]
-///
-///  Block
-///    [min delta] [list of bitwidths of the mini blocks] [miniblocks]
-///
-/// Sets aside bytes at the start of the internal buffer where the header will be written,
-/// and only writes the header when FlushValues is called before returning it.
-///
-/// To encode a block, we will:
-///
-/// 1. Compute the differences between consecutive elements. For the first element in the
-/// block, use the last element in the previous block or, in the case of the first block,
-/// use the first value of the whole sequence, stored in the header.
-///
-/// 2. Compute the frame of reference (the minimum of the deltas in the block). Subtract
-/// this min delta from all deltas in the block. This guarantees that all values are
-/// non-negative.
-///
-/// 3. Encode the frame of reference (min delta) as a zigzag ULEB128 int followed by the
-/// bit widths of the mini blocks and the delta values (minus the min delta) bit packed
-/// per mini block.
-///
-/// Supports only INT32 and INT64.
-
-template <typename DType>
-class DeltaBitPackEncoder : public EncoderImpl, virtual public TypedEncoder<DType> {
-  // Maximum possible header size
-  static constexpr uint32_t kMaxPageHeaderWriterSize = 32;
-  static constexpr uint32_t kValuesPerBlock =
-      std::is_same_v<int32_t, typename DType::c_type> ? 128 : 256;
-  static constexpr uint32_t kMiniBlocksPerBlock = 4;
-
- public:
-  using T = typename DType::c_type;
-  using UT = std::make_unsigned_t<T>;
-  using TypedEncoder<DType>::Put;
-
-  explicit DeltaBitPackEncoder(const ColumnDescriptor* descr, MemoryPool* pool,
-                               const uint32_t values_per_block = kValuesPerBlock,
-                               const uint32_t mini_blocks_per_block = kMiniBlocksPerBlock)
-      : EncoderImpl(descr, Encoding::DELTA_BINARY_PACKED, pool),
-        values_per_block_(values_per_block),
-        mini_blocks_per_block_(mini_blocks_per_block),
-        values_per_mini_block_(values_per_block / mini_blocks_per_block),
-        deltas_(values_per_block, ::arrow::stl::allocator<T>(pool)),
-        bits_buffer_(
-            AllocateBuffer(pool, (kMiniBlocksPerBlock + values_per_block) * sizeof(T))),
-        sink_(pool),
-        bit_writer_(bits_buffer_->mutable_data(),
-                    static_cast<int>(bits_buffer_->size())) {
-    if (values_per_block_ % 128 != 0) {
-      throw ParquetException(
-          "the number of values in a block must be multiple of 128, but it's " +
-          std::to_string(values_per_block_));
-    }
-    if (values_per_mini_block_ % 32 != 0) {
-      throw ParquetException(
-          "the number of values in a miniblock must be multiple of 32, but it's " +
-          std::to_string(values_per_mini_block_));
-    }
-    if (values_per_block % mini_blocks_per_block != 0) {
-      throw ParquetException(
-          "the number of values per block % number of miniblocks per block must be 0, "
-          "but it's " +
-          std::to_string(values_per_block % mini_blocks_per_block));
-    }
-    // Reserve enough space at the beginning of the buffer for largest possible header.
-    PARQUET_THROW_NOT_OK(sink_.Advance(kMaxPageHeaderWriterSize));
-  }
-
-  std::shared_ptr<Buffer> FlushValues() override;
-
-  int64_t EstimatedDataEncodedSize() override { return sink_.length(); }
-
-  void Put(const ::arrow::Array& values) override;
-
-  void Put(const T* buffer, int num_values) override;
-
-  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
-                 int64_t valid_bits_offset) override;
-
-  void FlushBlock();
-
- private:
-  const uint32_t values_per_block_;
-  const uint32_t mini_blocks_per_block_;
-  const uint32_t values_per_mini_block_;
-  uint32_t values_current_block_{0};
-  uint32_t total_value_count_{0};
-  T first_value_{0};
-  T current_value_{0};
-  ArrowPoolVector<T> deltas_;
-  std::shared_ptr<ResizableBuffer> bits_buffer_;
-  ::arrow::BufferBuilder sink_;
-  ::arrow::bit_util::BitWriter bit_writer_;
-};
-
-template <typename DType>
-void DeltaBitPackEncoder<DType>::Put(const T* src, int num_values) {
-  if (num_values == 0) {
-    return;
-  }
-
-  int idx = 0;
-  if (total_value_count_ == 0) {
-    current_value_ = src[0];
-    first_value_ = current_value_;
-    idx = 1;
-  }
-  total_value_count_ += num_values;
-
-  while (idx < num_values) {
-    T value = src[idx];
-    // Calculate deltas. The possible overflow is handled by use of unsigned integers
-    // making subtraction operations well-defined and correct even in case of overflow.
-    // Encoded integers will wrap back around on decoding.
-    // See http://en.wikipedia.org/wiki/Modular_arithmetic#Integers_modulo_n
-    deltas_[values_current_block_] = SafeSignedSubtract(value, current_value_);
-    current_value_ = value;
-    idx++;
-    values_current_block_++;
-    if (values_current_block_ == values_per_block_) {
-      FlushBlock();
-    }
-  }
-}
-
-template <typename DType>
-void DeltaBitPackEncoder<DType>::FlushBlock() {
-  if (values_current_block_ == 0) {
-    return;
-  }
-
-  // Calculate the frame of reference for this miniblock. This value will be subtracted
-  // from all deltas to guarantee all deltas are positive for encoding.
-  const T min_delta =
-      *std::min_element(deltas_.begin(), deltas_.begin() + values_current_block_);
-  bit_writer_.PutZigZagVlqInt(min_delta);
-
-  // Call to GetNextBytePtr reserves mini_blocks_per_block_ bytes of space to write
-  // bit widths of miniblocks as they become known during the encoding.
-  uint8_t* bit_width_data = bit_writer_.GetNextBytePtr(mini_blocks_per_block_);
-  DCHECK(bit_width_data != nullptr);
-
-  const uint32_t num_miniblocks =
-      static_cast<uint32_t>(std::ceil(static_cast<double>(values_current_block_) /
-                                      static_cast<double>(values_per_mini_block_)));
-  for (uint32_t i = 0; i < num_miniblocks; i++) {
-    const uint32_t values_current_mini_block =
-        std::min(values_per_mini_block_, values_current_block_);
-
-    const uint32_t start = i * values_per_mini_block_;
-    const T max_delta = *std::max_element(
-        deltas_.begin() + start, deltas_.begin() + start + values_current_mini_block);
-
-    // The minimum number of bits required to write any of values in deltas_ vector.
-    // See overflow comment above.
-    const auto bit_width = bit_width_data[i] = bit_util::NumRequiredBits(
-        static_cast<UT>(max_delta) - static_cast<UT>(min_delta));
-
-    for (uint32_t j = start; j < start + values_current_mini_block; j++) {
-      // Convert delta to frame of reference. See overflow comment above.
-      const UT value = static_cast<UT>(deltas_[j]) - static_cast<UT>(min_delta);
-      bit_writer_.PutValue(value, bit_width);
-    }
-    // If there are not enough values to fill the last mini block, we pad the mini block
-    // with zeroes so that its length is the number of values in a full mini block
-    // multiplied by the bit width.
-    for (uint32_t j = values_current_mini_block; j < values_per_mini_block_; j++) {
-      bit_writer_.PutValue(0, bit_width);
-    }
-    values_current_block_ -= values_current_mini_block;
-  }
-
-  // If, in the last block, less than <number of miniblocks in a block> miniblocks are
-  // needed to store the values, the bytes storing the bit widths of the unneeded
-  // miniblocks are still present, their value should be zero, but readers must accept
-  // arbitrary values as well.
-  for (uint32_t i = num_miniblocks; i < mini_blocks_per_block_; i++) {
-    bit_width_data[i] = 0;
-  }
-  DCHECK_EQ(values_current_block_, 0);
-
-  bit_writer_.Flush();
-  PARQUET_THROW_NOT_OK(sink_.Append(bit_writer_.buffer(), bit_writer_.bytes_written()));
-  bit_writer_.Clear();
-}
-
-template <typename DType>
-std::shared_ptr<Buffer> DeltaBitPackEncoder<DType>::FlushValues() {
-  if (values_current_block_ > 0) {
-    FlushBlock();
-  }
-  PARQUET_ASSIGN_OR_THROW(auto buffer, sink_.Finish(/*shrink_to_fit=*/true));
-
-  uint8_t header_buffer_[kMaxPageHeaderWriterSize] = {};
-  bit_util::BitWriter header_writer(header_buffer_, sizeof(header_buffer_));
-  if (!header_writer.PutVlqInt(values_per_block_) ||
-      !header_writer.PutVlqInt(mini_blocks_per_block_) ||
-      !header_writer.PutVlqInt(total_value_count_) ||
-      !header_writer.PutZigZagVlqInt(static_cast<T>(first_value_))) {
-    throw ParquetException("header writing error");
-  }
-  header_writer.Flush();
-
-  // We reserved enough space at the beginning of the buffer for largest possible header
-  // and data was written immediately after. We now write the header data immediately
-  // before the end of reserved space.
-  const size_t offset_bytes = kMaxPageHeaderWriterSize - header_writer.bytes_written();
-  std::memcpy(buffer->mutable_data() + offset_bytes, header_buffer_,
-              header_writer.bytes_written());
-
-  // Reset counter of cached values
-  total_value_count_ = 0;
-  // Reserve enough space at the beginning of the buffer for largest possible header.
-  PARQUET_THROW_NOT_OK(sink_.Advance(kMaxPageHeaderWriterSize));
-
-  // Excess bytes at the beginning are sliced off and ignored.
-  return SliceBuffer(buffer, offset_bytes);
-}
-
-template <>
-void DeltaBitPackEncoder<Int32Type>::Put(const ::arrow::Array& values) {
-  const ::arrow::ArrayData& data = *values.data();
-  if (values.type_id() != ::arrow::Type::INT32) {
-    throw ParquetException("Expected Int32TArray, got ", values.type()->ToString());
-  }
-  if (data.length > std::numeric_limits<int32_t>::max()) {
-    throw ParquetException("Array cannot be longer than ",
-                           std::numeric_limits<int32_t>::max());
+    *out_num_values = values_decoded;
+    return Status::OK();
   }
 
-  if (values.null_count() == 0) {
-    Put(data.GetValues<int32_t>(1), static_cast<int>(data.length));
-  } else {
-    PutSpaced(data.GetValues<int32_t>(1), static_cast<int>(data.length),
-              data.GetValues<uint8_t>(0, 0), data.offset);
-  }
-}
+  template <typename BuilderType>
+  Status DecodeArrowNonNull(int num_values, BuilderType* builder, int* out_num_values) {
+    constexpr int32_t kBufferSize = 2048;
+    int32_t indices[kBufferSize];
 
-template <>
-void DeltaBitPackEncoder<Int64Type>::Put(const ::arrow::Array& values) {
-  const ::arrow::ArrayData& data = *values.data();
-  if (values.type_id() != ::arrow::Type::INT64) {
-    throw ParquetException("Expected Int64TArray, got ", values.type()->ToString());
-  }
-  if (data.length > std::numeric_limits<int32_t>::max()) {
-    throw ParquetException("Array cannot be longer than ",
-                           std::numeric_limits<int32_t>::max());
-  }
-  if (values.null_count() == 0) {
-    Put(data.GetValues<int64_t>(1), static_cast<int>(data.length));
-  } else {
-    PutSpaced(data.GetValues<int64_t>(1), static_cast<int>(data.length),
-              data.GetValues<uint8_t>(0, 0), data.offset);
-  }
-}
+    RETURN_NOT_OK(builder->Reserve(num_values));
 
-template <typename DType>
-void DeltaBitPackEncoder<DType>::PutSpaced(const T* src, int num_values,
-                                           const uint8_t* valid_bits,
-                                           int64_t valid_bits_offset) {
-  if (valid_bits != NULLPTR) {
-    PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
-                                                                 this->memory_pool()));
-    T* data = buffer->template mutable_data_as<T>();
-    int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
-        src, num_values, valid_bits, valid_bits_offset, data);
-    Put(data, num_valid_values);
-  } else {
-    Put(src, num_values);
+    const auto* dict_values = dictionary_->data_as<ByteArray>();
+
+    int values_decoded = 0;
+    while (values_decoded < num_values) {
+      int32_t batch_size = std::min<int32_t>(kBufferSize, num_values - values_decoded);
+      int num_indices = idx_decoder_.GetBatch(indices, batch_size);
+      if (num_indices == 0) ParquetException::EofException();
+      for (int i = 0; i < num_indices; ++i) {
+        auto idx = indices[i];
+        RETURN_NOT_OK(IndexInBounds(idx));
+        const auto& val = dict_values[idx];
+        RETURN_NOT_OK(builder->Append(val.ptr, val.len));
+      }
+      values_decoded += num_indices;
+    }
+    *out_num_values = values_decoded;
+    return Status::OK();
   }
-}
+};
 
 // ----------------------------------------------------------------------
-// DeltaBitPackDecoder
+// DELTA_BINARY_PACKED decoder
 
 template <typename DType>
-class DeltaBitPackDecoder : public DecoderImpl, virtual public TypedDecoder<DType> {
+class DeltaBitPackDecoder : public DecoderImpl, public TypedDecoderImpl<DType> {
  public:
   typedef typename DType::c_type T;
   using UT = std::make_unsigned_t<T>;
@@ -2727,135 +1583,10 @@ class DeltaBitPackDecoder : public DecoderImpl, virtual public TypedDecoder<DTyp
 };
 
 // ----------------------------------------------------------------------
-// DELTA_LENGTH_BYTE_ARRAY
-
-// ----------------------------------------------------------------------
-// DeltaLengthByteArrayEncoder
-
-class DeltaLengthByteArrayEncoder : public EncoderImpl,
-                                    virtual public TypedEncoder<ByteArrayType> {
- public:
-  explicit DeltaLengthByteArrayEncoder(const ColumnDescriptor* descr, MemoryPool* pool)
-      : EncoderImpl(descr, Encoding::DELTA_LENGTH_BYTE_ARRAY,
-                    pool = ::arrow::default_memory_pool()),
-        sink_(pool),
-        length_encoder_(nullptr, pool) {}
-
-  std::shared_ptr<Buffer> FlushValues() override;
-
-  int64_t EstimatedDataEncodedSize() override {
-    return sink_.length() + length_encoder_.EstimatedDataEncodedSize();
-  }
-
-  using TypedEncoder<ByteArrayType>::Put;
-
-  void Put(const ::arrow::Array& values) override;
-
-  void Put(const T* buffer, int num_values) override;
-
-  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
-                 int64_t valid_bits_offset) override;
-
- protected:
-  template <typename ArrayType>
-  void PutBinaryArray(const ArrayType& array) {
-    PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<typename ArrayType::TypeClass>(
-        *array.data(),
-        [&](::std::string_view view) {
-          if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
-            return Status::Invalid(
-                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
-          }
-          if (ARROW_PREDICT_FALSE(
-                  view.size() + sink_.length() >
-                  static_cast<size_t>(std::numeric_limits<int32_t>::max()))) {
-            return Status::Invalid("excess expansion in DELTA_LENGTH_BYTE_ARRAY");
-          }
-          length_encoder_.Put({static_cast<int32_t>(view.length())}, 1);
-          PARQUET_THROW_NOT_OK(sink_.Append(view.data(), view.length()));
-          return Status::OK();
-        },
-        []() { return Status::OK(); }));
-  }
-
-  ::arrow::BufferBuilder sink_;
-  DeltaBitPackEncoder<Int32Type> length_encoder_;
-};
-
-void DeltaLengthByteArrayEncoder::Put(const ::arrow::Array& values) {
-  AssertBaseBinary(values);
-  if (::arrow::is_binary_like(values.type_id())) {
-    PutBinaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
-  } else {
-    PutBinaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
-  }
-}
-
-void DeltaLengthByteArrayEncoder::Put(const T* src, int num_values) {
-  if (num_values == 0) {
-    return;
-  }
-
-  constexpr int kBatchSize = 256;
-  std::array<int32_t, kBatchSize> lengths;
-  uint32_t total_increment_size = 0;
-  for (int idx = 0; idx < num_values; idx += kBatchSize) {
-    const int batch_size = std::min(kBatchSize, num_values - idx);
-    for (int j = 0; j < batch_size; ++j) {
-      const int32_t len = src[idx + j].len;
-      if (ARROW_PREDICT_FALSE(
-              AddWithOverflow(total_increment_size, len, &total_increment_size))) {
-        throw ParquetException("excess expansion in DELTA_LENGTH_BYTE_ARRAY");
-      }
-      lengths[j] = len;
-    }
-    length_encoder_.Put(lengths.data(), batch_size);
-  }
-  if (sink_.length() + total_increment_size > std::numeric_limits<int32_t>::max()) {
-    throw ParquetException("excess expansion in DELTA_LENGTH_BYTE_ARRAY");
-  }
-  PARQUET_THROW_NOT_OK(sink_.Reserve(total_increment_size));
-  for (int idx = 0; idx < num_values; idx++) {
-    sink_.UnsafeAppend(src[idx].ptr, src[idx].len);
-  }
-}
-
-void DeltaLengthByteArrayEncoder::PutSpaced(const T* src, int num_values,
-                                            const uint8_t* valid_bits,
-                                            int64_t valid_bits_offset) {
-  if (valid_bits != NULLPTR) {
-    PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
-                                                                 this->memory_pool()));
-    T* data = buffer->template mutable_data_as<T>();
-    int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
-        src, num_values, valid_bits, valid_bits_offset, data);
-    Put(data, num_valid_values);
-  } else {
-    Put(src, num_values);
-  }
-}
-
-std::shared_ptr<Buffer> DeltaLengthByteArrayEncoder::FlushValues() {
-  std::shared_ptr<Buffer> encoded_lengths = length_encoder_.FlushValues();
-
-  std::shared_ptr<Buffer> data;
-  PARQUET_THROW_NOT_OK(sink_.Finish(&data));
-  sink_.Reset();
-
-  PARQUET_THROW_NOT_OK(sink_.Resize(encoded_lengths->size() + data->size()));
-  PARQUET_THROW_NOT_OK(sink_.Append(encoded_lengths->data(), encoded_lengths->size()));
-  PARQUET_THROW_NOT_OK(sink_.Append(data->data(), data->size()));
-
-  std::shared_ptr<Buffer> buffer;
-  PARQUET_THROW_NOT_OK(sink_.Finish(&buffer, true));
-  return buffer;
-}
-
-// ----------------------------------------------------------------------
-// DeltaLengthByteArrayDecoder
+// DELTA_LENGTH_BYTE_ARRAY decoder
 
 class DeltaLengthByteArrayDecoder : public DecoderImpl,
-                                    virtual public TypedDecoder<ByteArrayType> {
+                                    public TypedDecoderImpl<ByteArrayType> {
  public:
   explicit DeltaLengthByteArrayDecoder(const ColumnDescriptor* descr,
                                        MemoryPool* pool = ::arrow::default_memory_pool())
@@ -2989,113 +1720,11 @@ class DeltaLengthByteArrayDecoder : public DecoderImpl,
 };
 
 // ----------------------------------------------------------------------
-// RLE_BOOLEAN_ENCODER
-
-class RleBooleanEncoder final : public EncoderImpl, virtual public BooleanEncoder {
- public:
-  explicit RleBooleanEncoder(const ColumnDescriptor* descr, ::arrow::MemoryPool* pool)
-      : EncoderImpl(descr, Encoding::RLE, pool),
-        buffered_append_values_(::arrow::stl::allocator<T>(pool)) {}
-
-  int64_t EstimatedDataEncodedSize() override {
-    return kRleLengthInBytes + MaxRleBufferSize();
-  }
-
-  std::shared_ptr<Buffer> FlushValues() override;
-
-  void Put(const T* buffer, int num_values) override;
-  void Put(const ::arrow::Array& values) override {
-    if (values.type_id() != ::arrow::Type::BOOL) {
-      throw ParquetException("RleBooleanEncoder expects BooleanArray, got ",
-                             values.type()->ToString());
-    }
-    const auto& boolean_array = checked_cast<const ::arrow::BooleanArray&>(values);
-    if (values.null_count() == 0) {
-      for (int i = 0; i < boolean_array.length(); ++i) {
-        // null_count == 0, so just call Value directly is ok.
-        buffered_append_values_.push_back(boolean_array.Value(i));
-      }
-    } else {
-      PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<::arrow::BooleanType>(
-          *boolean_array.data(),
-          [&](bool value) {
-            buffered_append_values_.push_back(value);
-            return Status::OK();
-          },
-          []() { return Status::OK(); }));
-    }
-  }
-
-  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
-                 int64_t valid_bits_offset) override {
-    if (valid_bits != NULLPTR) {
-      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
-                                                                   this->memory_pool()));
-      T* data = buffer->mutable_data_as<T>();
-      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
-          src, num_values, valid_bits, valid_bits_offset, data);
-      Put(data, num_valid_values);
-    } else {
-      Put(src, num_values);
-    }
-  }
-
-  void Put(const std::vector<bool>& src, int num_values) override;
-
- protected:
-  template <typename SequenceType>
-  void PutImpl(const SequenceType& src, int num_values);
-
-  int MaxRleBufferSize() const noexcept {
-    return RlePreserveBufferSize(static_cast<int>(buffered_append_values_.size()),
-                                 kBitWidth);
-  }
-
-  constexpr static int32_t kBitWidth = 1;
-  /// 4 bytes in little-endian, which indicates the length.
-  constexpr static int32_t kRleLengthInBytes = 4;
-
-  // std::vector<bool> in C++ is tricky, because it's a bitmap.
-  // Here RleBooleanEncoder will only append values into it, and
-  // dump values into Buffer, so using it here is ok.
-  ArrowPoolVector<bool> buffered_append_values_;
-};
-
-void RleBooleanEncoder::Put(const bool* src, int num_values) { PutImpl(src, num_values); }
-
-void RleBooleanEncoder::Put(const std::vector<bool>& src, int num_values) {
-  PutImpl(src, num_values);
-}
-
-template <typename SequenceType>
-void RleBooleanEncoder::PutImpl(const SequenceType& src, int num_values) {
-  for (int i = 0; i < num_values; ++i) {
-    buffered_append_values_.push_back(src[i]);
-  }
-}
-
-std::shared_ptr<Buffer> RleBooleanEncoder::FlushValues() {
-  int rle_buffer_size_max = MaxRleBufferSize();
-  std::shared_ptr<ResizableBuffer> buffer =
-      AllocateBuffer(this->pool_, rle_buffer_size_max + kRleLengthInBytes);
-  ::arrow::util::RleEncoder encoder(buffer->mutable_data() + kRleLengthInBytes,
-                                    rle_buffer_size_max, /*bit_width*/ kBitWidth);
-
-  for (bool value : buffered_append_values_) {
-    encoder.Put(value ? 1 : 0);
-  }
-  encoder.Flush();
-  ::arrow::util::SafeStore(buffer->mutable_data(),
-                           ::arrow::bit_util::ToLittleEndian(encoder.len()));
-  PARQUET_THROW_NOT_OK(buffer->Resize(kRleLengthInBytes + encoder.len()));
-  buffered_append_values_.clear();
-  return buffer;
-}
-
-// ----------------------------------------------------------------------
-// RLE_BOOLEAN_DECODER
+// RLE decoder for BOOLEAN
 
-class RleBooleanDecoder : public DecoderImpl, virtual public BooleanDecoder {
+class RleBooleanDecoder : public DecoderImpl,
+                          virtual public TypedDecoderImpl<BooleanType>,
+                          virtual public BooleanDecoder {
  public:
   explicit RleBooleanDecoder(const ColumnDescriptor* descr)
       : DecoderImpl(descr, Encoding::RLE) {}
@@ -3209,235 +1838,10 @@ class RleBooleanDecoder : public DecoderImpl, virtual public BooleanDecoder {
 };
 
 // ----------------------------------------------------------------------
-// DELTA_BYTE_ARRAY
-
-/// Delta Byte Array encoding also known as incremental encoding or front compression:
-/// for each element in a sequence of strings, store the prefix length of the previous
-/// entry plus the suffix.
-///
-/// This is stored as a sequence of delta-encoded prefix lengths (DELTA_BINARY_PACKED),
-/// followed by the suffixes encoded as delta length byte arrays
-/// (DELTA_LENGTH_BYTE_ARRAY).
-
-// ----------------------------------------------------------------------
-// DeltaByteArrayEncoder
-
-template <typename DType>
-class DeltaByteArrayEncoder : public EncoderImpl, virtual public TypedEncoder<DType> {
-  static constexpr std::string_view kEmpty = "";
-
- public:
-  using T = typename DType::c_type;
-
-  explicit DeltaByteArrayEncoder(const ColumnDescriptor* descr,
-                                 MemoryPool* pool = ::arrow::default_memory_pool())
-      : EncoderImpl(descr, Encoding::DELTA_BYTE_ARRAY, pool),
-        sink_(pool),
-        prefix_length_encoder_(/*descr=*/nullptr, pool),
-        suffix_encoder_(descr, pool),
-        last_value_(""),
-        empty_(static_cast<uint32_t>(kEmpty.size()),
-               reinterpret_cast<const uint8_t*>(kEmpty.data())) {}
-
-  std::shared_ptr<Buffer> FlushValues() override;
-
-  int64_t EstimatedDataEncodedSize() override {
-    return prefix_length_encoder_.EstimatedDataEncodedSize() +
-           suffix_encoder_.EstimatedDataEncodedSize();
-  }
-
-  using TypedEncoder<DType>::Put;
-
-  void Put(const ::arrow::Array& values) override;
-
-  void Put(const T* buffer, int num_values) override;
-
-  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
-                 int64_t valid_bits_offset) override {
-    if (valid_bits != nullptr) {
-      if (buffer_ == nullptr) {
-        PARQUET_ASSIGN_OR_THROW(buffer_,
-                                ::arrow::AllocateResizableBuffer(num_values * sizeof(T),
-                                                                 this->memory_pool()));
-      } else {
-        PARQUET_THROW_NOT_OK(buffer_->Resize(num_values * sizeof(T), false));
-      }
-      T* data = buffer_->mutable_data_as<T>();
-      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
-          src, num_values, valid_bits, valid_bits_offset, data);
-      Put(data, num_valid_values);
-    } else {
-      Put(src, num_values);
-    }
-  }
-
- protected:
-  template <typename VisitorType>
-  void PutInternal(const T* src, int num_values, const VisitorType visitor) {
-    if (num_values == 0) {
-      return;
-    }
-
-    std::string_view last_value_view = last_value_;
-    constexpr int kBatchSize = 256;
-    std::array<int32_t, kBatchSize> prefix_lengths;
-    std::array<ByteArray, kBatchSize> suffixes;
-
-    for (int i = 0; i < num_values; i += kBatchSize) {
-      const int batch_size = std::min(kBatchSize, num_values - i);
-
-      for (int j = 0; j < batch_size; ++j) {
-        const int idx = i + j;
-        const auto view = visitor[idx];
-        const auto len = static_cast<const uint32_t>(view.length());
-
-        uint32_t common_prefix_length = 0;
-        const uint32_t maximum_common_prefix_length =
-            std::min(len, static_cast<uint32_t>(last_value_view.length()));
-        while (common_prefix_length < maximum_common_prefix_length) {
-          if (last_value_view[common_prefix_length] != view[common_prefix_length]) {
-            break;
-          }
-          common_prefix_length++;
-        }
-
-        last_value_view = view;
-        prefix_lengths[j] = common_prefix_length;
-        const uint32_t suffix_length = len - common_prefix_length;
-        const uint8_t* suffix_ptr = src[idx].ptr + common_prefix_length;
-
-        // Convert to ByteArray, so it can be passed to the suffix_encoder_.
-        const ByteArray suffix(suffix_length, suffix_ptr);
-        suffixes[j] = suffix;
-      }
-      suffix_encoder_.Put(suffixes.data(), batch_size);
-      prefix_length_encoder_.Put(prefix_lengths.data(), batch_size);
-    }
-    last_value_ = last_value_view;
-  }
-
-  template <typename ArrayType>
-  void PutBinaryArray(const ArrayType& array) {
-    auto previous_len = static_cast<uint32_t>(last_value_.length());
-    std::string_view last_value_view = last_value_;
-
-    PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<typename ArrayType::TypeClass>(
-        *array.data(),
-        [&](::std::string_view view) {
-          if (ARROW_PREDICT_FALSE(view.size() >= kMaxByteArraySize)) {
-            return Status::Invalid(
-                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
-          }
-          const ByteArray src{view};
-
-          uint32_t common_prefix_length = 0;
-          const uint32_t len = src.len;
-          const uint32_t maximum_common_prefix_length = std::min(previous_len, len);
-          while (common_prefix_length < maximum_common_prefix_length) {
-            if (last_value_view[common_prefix_length] != view[common_prefix_length]) {
-              break;
-            }
-            common_prefix_length++;
-          }
-          previous_len = len;
-          prefix_length_encoder_.Put({static_cast<int32_t>(common_prefix_length)}, 1);
-
-          last_value_view = view;
-          const auto suffix_length = static_cast<uint32_t>(len - common_prefix_length);
-          if (suffix_length == 0) {
-            suffix_encoder_.Put(&empty_, 1);
-            return Status::OK();
-          }
-          const uint8_t* suffix_ptr = src.ptr + common_prefix_length;
-          // Convert to ByteArray, so it can be passed to the suffix_encoder_.
-          const ByteArray suffix(suffix_length, suffix_ptr);
-          suffix_encoder_.Put(&suffix, 1);
-
-          return Status::OK();
-        },
-        []() { return Status::OK(); }));
-    last_value_ = last_value_view;
-  }
-
-  ::arrow::BufferBuilder sink_;
-  DeltaBitPackEncoder<Int32Type> prefix_length_encoder_;
-  DeltaLengthByteArrayEncoder suffix_encoder_;
-  std::string last_value_;
-  const ByteArray empty_;
-  std::unique_ptr<ResizableBuffer> buffer_;
-};
-
-struct ByteArrayVisitor {
-  const ByteArray* src;
-
-  std::string_view operator[](int i) const {
-    if (ARROW_PREDICT_FALSE(src[i].len >= kMaxByteArraySize)) {
-      throw ParquetException("Parquet cannot store strings with size 2GB or more, got: ",
-                             src[i].len);
-    }
-    return std::string_view{src[i]};
-  }
-
-  uint32_t len(int i) const { return src[i].len; }
-};
-
-struct FLBAVisitor {
-  const FLBA* src;
-  const uint32_t type_length;
-
-  std::string_view operator[](int i) const {
-    return std::string_view{reinterpret_cast<const char*>(src[i].ptr), type_length};
-  }
-
-  uint32_t len(int i) const { return type_length; }
-};
-
-template <>
-void DeltaByteArrayEncoder<ByteArrayType>::Put(const ByteArray* src, int num_values) {
-  auto visitor = ByteArrayVisitor{src};
-  PutInternal<ByteArrayVisitor>(src, num_values, visitor);
-}
-
-template <>
-void DeltaByteArrayEncoder<FLBAType>::Put(const FLBA* src, int num_values) {
-  auto visitor = FLBAVisitor{src, static_cast<uint32_t>(descr_->type_length())};
-  PutInternal<FLBAVisitor>(src, num_values, visitor);
-}
-
-template <typename DType>
-void DeltaByteArrayEncoder<DType>::Put(const ::arrow::Array& values) {
-  if (::arrow::is_binary_like(values.type_id())) {
-    PutBinaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
-  } else if (::arrow::is_large_binary_like(values.type_id())) {
-    PutBinaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
-  } else if (::arrow::is_fixed_size_binary(values.type_id())) {
-    PutBinaryArray(checked_cast<const ::arrow::FixedSizeBinaryArray&>(values));
-  } else {
-    throw ParquetException("Only BaseBinaryArray and subclasses supported");
-  }
-}
-
-template <typename DType>
-std::shared_ptr<Buffer> DeltaByteArrayEncoder<DType>::FlushValues() {
-  PARQUET_THROW_NOT_OK(sink_.Resize(EstimatedDataEncodedSize(), false));
-
-  std::shared_ptr<Buffer> prefix_lengths = prefix_length_encoder_.FlushValues();
-  PARQUET_THROW_NOT_OK(sink_.Append(prefix_lengths->data(), prefix_lengths->size()));
-
-  std::shared_ptr<Buffer> suffixes = suffix_encoder_.FlushValues();
-  PARQUET_THROW_NOT_OK(sink_.Append(suffixes->data(), suffixes->size()));
-
-  std::shared_ptr<Buffer> buffer;
-  PARQUET_THROW_NOT_OK(sink_.Finish(&buffer, true));
-  last_value_.clear();
-  return buffer;
-}
-
-// ----------------------------------------------------------------------
-// DeltaByteArrayDecoder
+// DELTA_BYTE_ARRAY decoder
 
 template <typename DType>
-class DeltaByteArrayDecoderImpl : public DecoderImpl, virtual public TypedDecoder<DType> {
+class DeltaByteArrayDecoderImpl : public DecoderImpl, public TypedDecoderImpl<DType> {
   using T = typename DType::c_type;
 
  public:
@@ -3575,7 +1979,7 @@ class DeltaByteArrayDecoderImpl : public DecoderImpl, virtual public TypedDecode
     }
     PARQUET_THROW_NOT_OK(buffered_data_->Resize(data_size));
 
-    string_view prefix{last_value_};
+    std::string_view prefix{last_value_};
     uint8_t* data_ptr = buffered_data_->mutable_data();
     if (max_values > 0) {
       BuildBufferInternal</*is_first_run=*/true>(prefix_len_ptr, 0, buffer, &prefix,
@@ -3683,8 +2087,7 @@ class DeltaByteArrayFLBADecoder : public DeltaByteArrayDecoderImpl<FLBAType>,
 // BYTE_STREAM_SPLIT decoders
 
 template <typename DType>
-class ByteStreamSplitDecoderBase : public DecoderImpl,
-                                   virtual public TypedDecoder<DType> {
+class ByteStreamSplitDecoderBase : public DecoderImpl, public TypedDecoderImpl<DType> {
  public:
   using T = typename DType::c_type;
 
@@ -3852,110 +2255,7 @@ class ByteStreamSplitDecoder<FLBAType> : public ByteStreamSplitDecoderBase<FLBAT
 }  // namespace
 
 // ----------------------------------------------------------------------
-// Encoder and decoder factory functions
-
-std::unique_ptr<Encoder> MakeEncoder(Type::type type_num, Encoding::type encoding,
-                                     bool use_dictionary, const ColumnDescriptor* descr,
-                                     MemoryPool* pool) {
-  if (use_dictionary) {
-    switch (type_num) {
-      case Type::INT32:
-        return std::make_unique<DictEncoderImpl<Int32Type>>(descr, pool);
-      case Type::INT64:
-        return std::make_unique<DictEncoderImpl<Int64Type>>(descr, pool);
-      case Type::INT96:
-        return std::make_unique<DictEncoderImpl<Int96Type>>(descr, pool);
-      case Type::FLOAT:
-        return std::make_unique<DictEncoderImpl<FloatType>>(descr, pool);
-      case Type::DOUBLE:
-        return std::make_unique<DictEncoderImpl<DoubleType>>(descr, pool);
-      case Type::BYTE_ARRAY:
-        return std::make_unique<DictEncoderImpl<ByteArrayType>>(descr, pool);
-      case Type::FIXED_LEN_BYTE_ARRAY:
-        return std::make_unique<DictEncoderImpl<FLBAType>>(descr, pool);
-      default:
-        DCHECK(false) << "Encoder not implemented";
-        break;
-    }
-  } else if (encoding == Encoding::PLAIN) {
-    switch (type_num) {
-      case Type::BOOLEAN:
-        return std::make_unique<PlainEncoder<BooleanType>>(descr, pool);
-      case Type::INT32:
-        return std::make_unique<PlainEncoder<Int32Type>>(descr, pool);
-      case Type::INT64:
-        return std::make_unique<PlainEncoder<Int64Type>>(descr, pool);
-      case Type::INT96:
-        return std::make_unique<PlainEncoder<Int96Type>>(descr, pool);
-      case Type::FLOAT:
-        return std::make_unique<PlainEncoder<FloatType>>(descr, pool);
-      case Type::DOUBLE:
-        return std::make_unique<PlainEncoder<DoubleType>>(descr, pool);
-      case Type::BYTE_ARRAY:
-        return std::make_unique<PlainEncoder<ByteArrayType>>(descr, pool);
-      case Type::FIXED_LEN_BYTE_ARRAY:
-        return std::make_unique<PlainEncoder<FLBAType>>(descr, pool);
-      default:
-        DCHECK(false) << "Encoder not implemented";
-        break;
-    }
-  } else if (encoding == Encoding::BYTE_STREAM_SPLIT) {
-    switch (type_num) {
-      case Type::INT32:
-        return std::make_unique<ByteStreamSplitEncoder<Int32Type>>(descr, pool);
-      case Type::INT64:
-        return std::make_unique<ByteStreamSplitEncoder<Int64Type>>(descr, pool);
-      case Type::FLOAT:
-        return std::make_unique<ByteStreamSplitEncoder<FloatType>>(descr, pool);
-      case Type::DOUBLE:
-        return std::make_unique<ByteStreamSplitEncoder<DoubleType>>(descr, pool);
-      case Type::FIXED_LEN_BYTE_ARRAY:
-        return std::make_unique<ByteStreamSplitEncoder<FLBAType>>(descr, pool);
-      default:
-        throw ParquetException(
-            "BYTE_STREAM_SPLIT only supports FLOAT, DOUBLE, INT32, INT64 "
-            "and FIXED_LEN_BYTE_ARRAY");
-    }
-  } else if (encoding == Encoding::DELTA_BINARY_PACKED) {
-    switch (type_num) {
-      case Type::INT32:
-        return std::make_unique<DeltaBitPackEncoder<Int32Type>>(descr, pool);
-      case Type::INT64:
-        return std::make_unique<DeltaBitPackEncoder<Int64Type>>(descr, pool);
-      default:
-        throw ParquetException(
-            "DELTA_BINARY_PACKED encoder only supports INT32 and INT64");
-    }
-  } else if (encoding == Encoding::DELTA_LENGTH_BYTE_ARRAY) {
-    switch (type_num) {
-      case Type::BYTE_ARRAY:
-        return std::make_unique<DeltaLengthByteArrayEncoder>(descr, pool);
-      default:
-        throw ParquetException("DELTA_LENGTH_BYTE_ARRAY only supports BYTE_ARRAY");
-    }
-  } else if (encoding == Encoding::RLE) {
-    switch (type_num) {
-      case Type::BOOLEAN:
-        return std::make_unique<RleBooleanEncoder>(descr, pool);
-      default:
-        throw ParquetException("RLE only supports BOOLEAN");
-    }
-  } else if (encoding == Encoding::DELTA_BYTE_ARRAY) {
-    switch (type_num) {
-      case Type::BYTE_ARRAY:
-        return std::make_unique<DeltaByteArrayEncoder<ByteArrayType>>(descr, pool);
-      case Type::FIXED_LEN_BYTE_ARRAY:
-        return std::make_unique<DeltaByteArrayEncoder<FLBAType>>(descr, pool);
-      default:
-        throw ParquetException(
-            "DELTA_BYTE_ARRAY only supports BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY");
-    }
-  } else {
-    ParquetException::NYI("Selected encoding is not supported");
-  }
-  DCHECK(false) << "Should not be able to reach this code";
-  return nullptr;
-}
+// Factory functions
 
 std::unique_ptr<Decoder> MakeDecoder(Type::type type_num, Encoding::type encoding,
                                      const ColumnDescriptor* descr,
diff --git a/cpp/src/parquet/encoder.cc b/cpp/src/parquet/encoder.cc
new file mode 100644
index 0000000000000..89d5d44c5219c
--- /dev/null
+++ b/cpp/src/parquet/encoder.cc
@@ -0,0 +1,1783 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/encoding.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <cstdlib>
+#include <limits>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/stl_allocator.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_stream_utils_internal.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/byte_stream_split_internal.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/hashing.h"
+#include "arrow/util/int_util_overflow.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/rle_encoding_internal.h"
+#include "arrow/util/spaced.h"
+#include "arrow/util/ubsan.h"
+#include "arrow/visit_data_inline.h"
+
+#include "parquet/exception.h"
+#include "parquet/platform.h"
+#include "parquet/schema.h"
+#include "parquet/types.h"
+
+namespace bit_util = arrow::bit_util;
+
+using arrow::Status;
+using arrow::internal::AddWithOverflow;
+using arrow::internal::checked_cast;
+using arrow::internal::SafeSignedSubtract;
+using arrow::util::SafeLoad;
+using arrow::util::SafeLoadAs;
+
+template <typename T>
+using ArrowPoolVector = std::vector<T, ::arrow::stl::allocator<T>>;
+
+namespace parquet {
+namespace {
+
+// The Parquet spec isn't very clear whether ByteArray lengths are signed or
+// unsigned, but the Java implementation uses signed ints.
+constexpr size_t kMaxByteArraySize = std::numeric_limits<int32_t>::max();
+
+class EncoderImpl : virtual public Encoder {
+ public:
+  EncoderImpl(const ColumnDescriptor* descr, Encoding::type encoding, MemoryPool* pool)
+      : descr_(descr),
+        encoding_(encoding),
+        pool_(pool),
+        type_length_(descr ? descr->type_length() : -1) {}
+
+  Encoding::type encoding() const override { return encoding_; }
+
+  MemoryPool* memory_pool() const override { return pool_; }
+
+ protected:
+  // For accessing type-specific metadata, like FIXED_LEN_BYTE_ARRAY
+  const ColumnDescriptor* descr_;
+  const Encoding::type encoding_;
+  MemoryPool* pool_;
+
+  /// Type length from descr
+  const int type_length_;
+};
+
+// ----------------------------------------------------------------------
+// PLAIN encoder
+
+template <typename DType>
+class PlainEncoder : public EncoderImpl, virtual public TypedEncoder<DType> {
+ public:
+  using T = typename DType::c_type;
+
+  explicit PlainEncoder(const ColumnDescriptor* descr, MemoryPool* pool)
+      : EncoderImpl(descr, Encoding::PLAIN, pool), sink_(pool) {}
+
+  int64_t EstimatedDataEncodedSize() override { return sink_.length(); }
+
+  std::shared_ptr<Buffer> FlushValues() override {
+    std::shared_ptr<Buffer> buffer;
+    PARQUET_THROW_NOT_OK(sink_.Finish(&buffer));
+    return buffer;
+  }
+
+  using TypedEncoder<DType>::Put;
+
+  void Put(const T* buffer, int num_values) override;
+
+  void Put(const ::arrow::Array& values) override;
+
+  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override {
+    if (valid_bits != NULLPTR) {
+      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
+                                                                   this->memory_pool()));
+      T* data = buffer->template mutable_data_as<T>();
+      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
+          src, num_values, valid_bits, valid_bits_offset, data);
+      Put(data, num_valid_values);
+    } else {
+      Put(src, num_values);
+    }
+  }
+
+  void UnsafePutByteArray(const void* data, uint32_t length) {
+    DCHECK(length == 0 || data != nullptr) << "Value ptr cannot be NULL";
+    sink_.UnsafeAppend(&length, sizeof(uint32_t));
+    sink_.UnsafeAppend(data, static_cast<int64_t>(length));
+  }
+
+  void Put(const ByteArray& val) {
+    // Write the result to the output stream
+    const int64_t increment = static_cast<int64_t>(val.len + sizeof(uint32_t));
+    if (ARROW_PREDICT_FALSE(sink_.length() + increment > sink_.capacity())) {
+      PARQUET_THROW_NOT_OK(sink_.Reserve(increment));
+    }
+    UnsafePutByteArray(val.ptr, val.len);
+  }
+
+ protected:
+  template <typename ArrayType>
+  void PutBinaryArray(const ArrayType& array) {
+    const int64_t total_bytes =
+        array.value_offset(array.length()) - array.value_offset(0);
+    PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes + array.length() * sizeof(uint32_t)));
+
+    PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<typename ArrayType::TypeClass>(
+        *array.data(),
+        [&](::std::string_view view) {
+          if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
+            return Status::Invalid(
+                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
+          }
+          UnsafePutByteArray(view.data(), static_cast<uint32_t>(view.size()));
+          return Status::OK();
+        },
+        []() { return Status::OK(); }));
+  }
+
+  ::arrow::BufferBuilder sink_;
+};
+
+template <typename DType>
+void PlainEncoder<DType>::Put(const T* buffer, int num_values) {
+  if (num_values > 0) {
+    PARQUET_THROW_NOT_OK(sink_.Append(buffer, num_values * sizeof(T)));
+  }
+}
+
+template <>
+inline void PlainEncoder<ByteArrayType>::Put(const ByteArray* src, int num_values) {
+  for (int i = 0; i < num_values; ++i) {
+    Put(src[i]);
+  }
+}
+
+template <typename ArrayType>
+void DirectPutImpl(const ::arrow::Array& values, ::arrow::BufferBuilder* sink) {
+  if (values.type_id() != ArrayType::TypeClass::type_id) {
+    std::string type_name = ArrayType::TypeClass::type_name();
+    throw ParquetException("direct put to " + type_name + " from " +
+                           values.type()->ToString() + " not supported");
+  }
+
+  using value_type = typename ArrayType::value_type;
+  constexpr auto value_size = sizeof(value_type);
+  auto raw_values = checked_cast<const ArrayType&>(values).raw_values();
+
+  if (values.null_count() == 0) {
+    // no nulls, just dump the data
+    PARQUET_THROW_NOT_OK(sink->Append(raw_values, values.length() * value_size));
+  } else {
+    PARQUET_THROW_NOT_OK(
+        sink->Reserve((values.length() - values.null_count()) * value_size));
+
+    for (int64_t i = 0; i < values.length(); i++) {
+      if (values.IsValid(i)) {
+        sink->UnsafeAppend(&raw_values[i], value_size);
+      }
+    }
+  }
+}
+
+template <>
+void PlainEncoder<Int32Type>::Put(const ::arrow::Array& values) {
+  DirectPutImpl<::arrow::Int32Array>(values, &sink_);
+}
+
+template <>
+void PlainEncoder<Int64Type>::Put(const ::arrow::Array& values) {
+  DirectPutImpl<::arrow::Int64Array>(values, &sink_);
+}
+
+template <>
+void PlainEncoder<Int96Type>::Put(const ::arrow::Array& values) {
+  ParquetException::NYI("direct put to Int96");
+}
+
+template <>
+void PlainEncoder<FloatType>::Put(const ::arrow::Array& values) {
+  DirectPutImpl<::arrow::FloatArray>(values, &sink_);
+}
+
+template <>
+void PlainEncoder<DoubleType>::Put(const ::arrow::Array& values) {
+  DirectPutImpl<::arrow::DoubleArray>(values, &sink_);
+}
+
+template <typename DType>
+void PlainEncoder<DType>::Put(const ::arrow::Array& values) {
+  ParquetException::NYI("direct put of " + values.type()->ToString());
+}
+
+void AssertBaseBinary(const ::arrow::Array& values) {
+  if (!::arrow::is_base_binary_like(values.type_id())) {
+    throw ParquetException("Only BaseBinaryArray and subclasses supported");
+  }
+}
+
+template <>
+inline void PlainEncoder<ByteArrayType>::Put(const ::arrow::Array& values) {
+  AssertBaseBinary(values);
+
+  if (::arrow::is_binary_like(values.type_id())) {
+    PutBinaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
+  } else {
+    DCHECK(::arrow::is_large_binary_like(values.type_id()));
+    PutBinaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
+  }
+}
+
+void AssertFixedSizeBinary(const ::arrow::Array& values, int type_length) {
+  if (!::arrow::is_fixed_size_binary(values.type_id())) {
+    throw ParquetException("Only FixedSizeBinaryArray and subclasses supported");
+  }
+  if (checked_cast<const ::arrow::FixedSizeBinaryType&>(*values.type()).byte_width() !=
+      type_length) {
+    throw ParquetException("Size mismatch: " + values.type()->ToString() +
+                           " should have been " + std::to_string(type_length) + " wide");
+  }
+}
+
+template <>
+inline void PlainEncoder<FLBAType>::Put(const ::arrow::Array& values) {
+  AssertFixedSizeBinary(values, descr_->type_length());
+  const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values);
+
+  if (data.null_count() == 0) {
+    // no nulls, just dump the data
+    PARQUET_THROW_NOT_OK(
+        sink_.Append(data.raw_values(), data.length() * data.byte_width()));
+  } else {
+    const int64_t total_bytes =
+        data.length() * data.byte_width() - data.null_count() * data.byte_width();
+    PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes));
+    for (int64_t i = 0; i < data.length(); i++) {
+      if (data.IsValid(i)) {
+        sink_.UnsafeAppend(data.Value(i), data.byte_width());
+      }
+    }
+  }
+}
+
+template <>
+inline void PlainEncoder<FLBAType>::Put(const FixedLenByteArray* src, int num_values) {
+  if (descr_->type_length() == 0) {
+    return;
+  }
+  for (int i = 0; i < num_values; ++i) {
+    // Write the result to the output stream
+    DCHECK(src[i].ptr != nullptr) << "Value ptr cannot be NULL";
+    PARQUET_THROW_NOT_OK(sink_.Append(src[i].ptr, descr_->type_length()));
+  }
+}
+
+template <>
+class PlainEncoder<BooleanType> : public EncoderImpl, virtual public BooleanEncoder {
+ public:
+  explicit PlainEncoder(const ColumnDescriptor* descr, MemoryPool* pool)
+      : EncoderImpl(descr, Encoding::PLAIN, pool), sink_(pool) {}
+
+  int64_t EstimatedDataEncodedSize() override;
+  std::shared_ptr<Buffer> FlushValues() override;
+
+  void Put(const bool* src, int num_values) override;
+
+  void Put(const std::vector<bool>& src, int num_values) override;
+
+  void PutSpaced(const bool* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override {
+    if (valid_bits != NULLPTR) {
+      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
+                                                                   this->memory_pool()));
+      T* data = buffer->mutable_data_as<T>();
+      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
+          src, num_values, valid_bits, valid_bits_offset, data);
+      Put(data, num_valid_values);
+    } else {
+      Put(src, num_values);
+    }
+  }
+
+  void Put(const ::arrow::Array& values) override {
+    if (values.type_id() != ::arrow::Type::BOOL) {
+      throw ParquetException("direct put to boolean from " + values.type()->ToString() +
+                             " not supported");
+    }
+    const auto& data = checked_cast<const ::arrow::BooleanArray&>(values);
+
+    if (data.null_count() == 0) {
+      // no nulls, just dump the data
+      PARQUET_THROW_NOT_OK(sink_.Reserve(data.length()));
+      sink_.UnsafeAppend(data.data()->GetValues<uint8_t>(1, 0), data.offset(),
+                         data.length());
+    } else {
+      PARQUET_THROW_NOT_OK(sink_.Reserve(data.length() - data.null_count()));
+      for (int64_t i = 0; i < data.length(); i++) {
+        if (data.IsValid(i)) {
+          sink_.UnsafeAppend(data.Value(i));
+        }
+      }
+    }
+  }
+
+ private:
+  ::arrow::TypedBufferBuilder<bool> sink_;
+
+  template <typename SequenceType>
+  void PutImpl(const SequenceType& src, int num_values);
+};
+
+template <typename SequenceType>
+void PlainEncoder<BooleanType>::PutImpl(const SequenceType& src, int num_values) {
+  PARQUET_THROW_NOT_OK(sink_.Reserve(num_values));
+  for (int i = 0; i < num_values; ++i) {
+    sink_.UnsafeAppend(src[i]);
+  }
+}
+
+int64_t PlainEncoder<BooleanType>::EstimatedDataEncodedSize() {
+  return ::arrow::bit_util::BytesForBits(sink_.length());
+}
+
+std::shared_ptr<Buffer> PlainEncoder<BooleanType>::FlushValues() {
+  std::shared_ptr<Buffer> buffer;
+  PARQUET_THROW_NOT_OK(sink_.Finish(&buffer));
+  return buffer;
+}
+
+void PlainEncoder<BooleanType>::Put(const bool* src, int num_values) {
+  PutImpl(src, num_values);
+}
+
+void PlainEncoder<BooleanType>::Put(const std::vector<bool>& src, int num_values) {
+  PutImpl(src, num_values);
+}
+
+// ----------------------------------------------------------------------
+// DictEncoder<T> implementations
+
+template <typename DType>
+struct DictEncoderTraits {
+  using c_type = typename DType::c_type;
+  using MemoTableType = ::arrow::internal::ScalarMemoTable<c_type>;
+};
+
+template <>
+struct DictEncoderTraits<ByteArrayType> {
+  using MemoTableType = ::arrow::internal::BinaryMemoTable<::arrow::BinaryBuilder>;
+};
+
+template <>
+struct DictEncoderTraits<FLBAType> {
+  using MemoTableType = ::arrow::internal::BinaryMemoTable<::arrow::BinaryBuilder>;
+};
+
+// Initially 1024 elements
+static constexpr int32_t kInitialHashTableSize = 1 << 10;
+
+int RlePreserveBufferSize(int num_values, int bit_width) {
+  // Note: because of the way RleEncoder::CheckBufferFull()
+  // is called, we have to reserve an extra "RleEncoder::MinBufferSize"
+  // bytes. These extra bytes won't be used but not reserving them
+  // would cause the encoder to fail.
+  return ::arrow::util::RleEncoder::MaxBufferSize(bit_width, num_values) +
+         ::arrow::util::RleEncoder::MinBufferSize(bit_width);
+}
+
+/// See the dictionary encoding section of
+/// https://github.com/Parquet/parquet-format.  The encoding supports
+/// streaming encoding. Values are encoded as they are added while the
+/// dictionary is being constructed. At any time, the buffered values
+/// can be written out with the current dictionary size. More values
+/// can then be added to the encoder, including new dictionary
+/// entries.
+template <typename DType>
+class DictEncoderImpl : public EncoderImpl, virtual public DictEncoder<DType> {
+  using MemoTableType = typename DictEncoderTraits<DType>::MemoTableType;
+
+ public:
+  typedef typename DType::c_type T;
+
+  /// In data page, the bit width used to encode the entry
+  /// ids stored as 1 byte (max bit width = 32).
+  constexpr static int32_t kDataPageBitWidthBytes = 1;
+
+  explicit DictEncoderImpl(const ColumnDescriptor* desc, MemoryPool* pool)
+      : EncoderImpl(desc, Encoding::RLE_DICTIONARY, pool),
+        buffered_indices_(::arrow::stl::allocator<int32_t>(pool)),
+        dict_encoded_size_(0),
+        memo_table_(pool, kInitialHashTableSize) {}
+
+  ~DictEncoderImpl() override = default;
+
+  int dict_encoded_size() const override { return dict_encoded_size_; }
+
+  int WriteIndices(uint8_t* buffer, int buffer_len) override {
+    // Write bit width in first byte
+    *buffer = static_cast<uint8_t>(bit_width());
+    ++buffer;
+    --buffer_len;
+
+    ::arrow::util::RleEncoder encoder(buffer, buffer_len, bit_width());
+
+    for (int32_t index : buffered_indices_) {
+      if (ARROW_PREDICT_FALSE(!encoder.Put(index))) return -1;
+    }
+    encoder.Flush();
+
+    ClearIndices();
+    return kDataPageBitWidthBytes + encoder.len();
+  }
+
+  /// Returns a conservative estimate of the number of bytes needed to encode the buffered
+  /// indices. Used to size the buffer passed to WriteIndices().
+  int64_t EstimatedDataEncodedSize() override {
+    return kDataPageBitWidthBytes +
+           RlePreserveBufferSize(static_cast<int>(buffered_indices_.size()), bit_width());
+  }
+
+  /// The minimum bit width required to encode the currently buffered indices.
+  int bit_width() const override {
+    if (ARROW_PREDICT_FALSE(num_entries() == 0)) return 0;
+    if (ARROW_PREDICT_FALSE(num_entries() == 1)) return 1;
+    return bit_util::Log2(num_entries());
+  }
+
+  /// Encode value. Note that this does not actually write any data, just
+  /// buffers the value's index to be written later.
+  inline void Put(const T& value);
+
+  // Not implemented for other data types
+  inline void PutByteArray(const void* ptr, int32_t length);
+
+  void Put(const T* src, int num_values) override {
+    for (int32_t i = 0; i < num_values; i++) {
+      Put(SafeLoad(src + i));
+    }
+  }
+
+  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override {
+    ::arrow::internal::VisitSetBitRunsVoid(valid_bits, valid_bits_offset, num_values,
+                                           [&](int64_t position, int64_t length) {
+                                             for (int64_t i = 0; i < length; i++) {
+                                               Put(SafeLoad(src + i + position));
+                                             }
+                                           });
+  }
+
+  using TypedEncoder<DType>::Put;
+
+  void Put(const ::arrow::Array& values) override;
+  void PutDictionary(const ::arrow::Array& values) override;
+
+  template <typename ArrowType, typename T = typename ArrowType::c_type>
+  void PutIndicesTyped(const ::arrow::Array& data) {
+    auto values = data.data()->GetValues<T>(1);
+    size_t buffer_position = buffered_indices_.size();
+    buffered_indices_.resize(buffer_position +
+                             static_cast<size_t>(data.length() - data.null_count()));
+    ::arrow::internal::VisitSetBitRunsVoid(
+        data.null_bitmap_data(), data.offset(), data.length(),
+        [&](int64_t position, int64_t length) {
+          for (int64_t i = 0; i < length; ++i) {
+            buffered_indices_[buffer_position++] =
+                static_cast<int32_t>(values[i + position]);
+          }
+        });
+  }
+
+  void PutIndices(const ::arrow::Array& data) override {
+    switch (data.type()->id()) {
+      case ::arrow::Type::UINT8:
+      case ::arrow::Type::INT8:
+        return PutIndicesTyped<::arrow::UInt8Type>(data);
+      case ::arrow::Type::UINT16:
+      case ::arrow::Type::INT16:
+        return PutIndicesTyped<::arrow::UInt16Type>(data);
+      case ::arrow::Type::UINT32:
+      case ::arrow::Type::INT32:
+        return PutIndicesTyped<::arrow::UInt32Type>(data);
+      case ::arrow::Type::UINT64:
+      case ::arrow::Type::INT64:
+        return PutIndicesTyped<::arrow::UInt64Type>(data);
+      default:
+        throw ParquetException("Passed non-integer array to PutIndices");
+    }
+  }
+
+  std::shared_ptr<Buffer> FlushValues() override {
+    std::shared_ptr<ResizableBuffer> buffer =
+        AllocateBuffer(this->pool_, EstimatedDataEncodedSize());
+    int result_size = WriteIndices(buffer->mutable_data(),
+                                   static_cast<int>(EstimatedDataEncodedSize()));
+    PARQUET_THROW_NOT_OK(buffer->Resize(result_size, false));
+    return buffer;
+  }
+
+  /// Writes out the encoded dictionary to buffer. buffer must be preallocated to
+  /// dict_encoded_size() bytes.
+  void WriteDict(uint8_t* buffer) const override;
+
+  /// The number of entries in the dictionary.
+  int num_entries() const override { return memo_table_.size(); }
+
+ private:
+  /// Clears all the indices (but leaves the dictionary).
+  void ClearIndices() { buffered_indices_.clear(); }
+
+  /// Indices that have not yet be written out by WriteIndices().
+  ArrowPoolVector<int32_t> buffered_indices_;
+
+  template <typename ArrayType>
+  void PutBinaryArray(const ArrayType& array) {
+    PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<typename ArrayType::TypeClass>(
+        *array.data(),
+        [&](::std::string_view view) {
+          if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
+            return Status::Invalid(
+                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
+          }
+          PutByteArray(view.data(), static_cast<uint32_t>(view.size()));
+          return Status::OK();
+        },
+        []() { return Status::OK(); }));
+  }
+
+  template <typename ArrayType>
+  void PutBinaryDictionaryArray(const ArrayType& array) {
+    DCHECK_EQ(array.null_count(), 0);
+    for (int64_t i = 0; i < array.length(); i++) {
+      auto v = array.GetView(i);
+      if (ARROW_PREDICT_FALSE(v.size() > kMaxByteArraySize)) {
+        throw ParquetException(
+            "Parquet cannot store strings with size 2GB or more, got: ", v.size());
+      }
+      dict_encoded_size_ += static_cast<int>(v.size() + sizeof(uint32_t));
+      int32_t unused_memo_index;
+      PARQUET_THROW_NOT_OK(memo_table_.GetOrInsert(
+          v.data(), static_cast<int32_t>(v.size()), &unused_memo_index));
+    }
+  }
+
+  /// The number of bytes needed to encode the dictionary.
+  int dict_encoded_size_;
+
+  MemoTableType memo_table_;
+};
+
+template <typename DType>
+void DictEncoderImpl<DType>::WriteDict(uint8_t* buffer) const {
+  // For primitive types, only a memcpy
+  DCHECK_EQ(static_cast<size_t>(dict_encoded_size_), sizeof(T) * memo_table_.size());
+  memo_table_.CopyValues(0 /* start_pos */, reinterpret_cast<T*>(buffer));
+}
+
+// ByteArray and FLBA already have the dictionary encoded in their data heaps
+template <>
+void DictEncoderImpl<ByteArrayType>::WriteDict(uint8_t* buffer) const {
+  memo_table_.VisitValues(0, [&buffer](::std::string_view v) {
+    uint32_t len = static_cast<uint32_t>(v.length());
+    memcpy(buffer, &len, sizeof(len));
+    buffer += sizeof(len);
+    memcpy(buffer, v.data(), len);
+    buffer += len;
+  });
+}
+
+template <>
+void DictEncoderImpl<FLBAType>::WriteDict(uint8_t* buffer) const {
+  memo_table_.VisitValues(0, [&](::std::string_view v) {
+    DCHECK_EQ(v.length(), static_cast<size_t>(type_length_));
+    memcpy(buffer, v.data(), type_length_);
+    buffer += type_length_;
+  });
+}
+
+template <typename DType>
+inline void DictEncoderImpl<DType>::Put(const T& v) {
+  // Put() implementation for primitive types
+  auto on_found = [](int32_t memo_index) {};
+  auto on_not_found = [this](int32_t memo_index) {
+    dict_encoded_size_ += static_cast<int>(sizeof(T));
+  };
+
+  int32_t memo_index;
+  PARQUET_THROW_NOT_OK(memo_table_.GetOrInsert(v, on_found, on_not_found, &memo_index));
+  buffered_indices_.push_back(memo_index);
+}
+
+template <typename DType>
+inline void DictEncoderImpl<DType>::PutByteArray(const void* ptr, int32_t length) {
+  DCHECK(false);
+}
+
+template <>
+inline void DictEncoderImpl<ByteArrayType>::PutByteArray(const void* ptr,
+                                                         int32_t length) {
+  static const uint8_t empty[] = {0};
+
+  auto on_found = [](int32_t memo_index) {};
+  auto on_not_found = [&](int32_t memo_index) {
+    dict_encoded_size_ += static_cast<int>(length + sizeof(uint32_t));
+  };
+
+  DCHECK(ptr != nullptr || length == 0);
+  ptr = (ptr != nullptr) ? ptr : empty;
+  int32_t memo_index;
+  PARQUET_THROW_NOT_OK(
+      memo_table_.GetOrInsert(ptr, length, on_found, on_not_found, &memo_index));
+  buffered_indices_.push_back(memo_index);
+}
+
+template <>
+inline void DictEncoderImpl<ByteArrayType>::Put(const ByteArray& val) {
+  return PutByteArray(val.ptr, static_cast<int32_t>(val.len));
+}
+
+template <>
+inline void DictEncoderImpl<FLBAType>::Put(const FixedLenByteArray& v) {
+  static const uint8_t empty[] = {0};
+
+  auto on_found = [](int32_t memo_index) {};
+  auto on_not_found = [this](int32_t memo_index) { dict_encoded_size_ += type_length_; };
+
+  DCHECK(v.ptr != nullptr || type_length_ == 0);
+  const void* ptr = (v.ptr != nullptr) ? v.ptr : empty;
+  int32_t memo_index;
+  PARQUET_THROW_NOT_OK(
+      memo_table_.GetOrInsert(ptr, type_length_, on_found, on_not_found, &memo_index));
+  buffered_indices_.push_back(memo_index);
+}
+
+template <>
+void DictEncoderImpl<Int96Type>::Put(const ::arrow::Array& values) {
+  ParquetException::NYI("Direct put to Int96");
+}
+
+template <>
+void DictEncoderImpl<Int96Type>::PutDictionary(const ::arrow::Array& values) {
+  ParquetException::NYI("Direct put to Int96");
+}
+
+template <typename DType>
+void DictEncoderImpl<DType>::Put(const ::arrow::Array& values) {
+  using ArrayType = typename ::arrow::CTypeTraits<typename DType::c_type>::ArrayType;
+  const auto& data = checked_cast<const ArrayType&>(values);
+  if (data.null_count() == 0) {
+    // no nulls, just dump the data
+    for (int64_t i = 0; i < data.length(); i++) {
+      Put(data.Value(i));
+    }
+  } else {
+    for (int64_t i = 0; i < data.length(); i++) {
+      if (data.IsValid(i)) {
+        Put(data.Value(i));
+      }
+    }
+  }
+}
+
+template <>
+void DictEncoderImpl<FLBAType>::Put(const ::arrow::Array& values) {
+  AssertFixedSizeBinary(values, type_length_);
+  const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values);
+  if (data.null_count() == 0) {
+    // no nulls, just dump the data
+    for (int64_t i = 0; i < data.length(); i++) {
+      Put(FixedLenByteArray(data.Value(i)));
+    }
+  } else {
+    std::vector<uint8_t> empty(type_length_, 0);
+    for (int64_t i = 0; i < data.length(); i++) {
+      if (data.IsValid(i)) {
+        Put(FixedLenByteArray(data.Value(i)));
+      }
+    }
+  }
+}
+
+template <>
+void DictEncoderImpl<ByteArrayType>::Put(const ::arrow::Array& values) {
+  AssertBaseBinary(values);
+  if (::arrow::is_binary_like(values.type_id())) {
+    PutBinaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
+  } else {
+    DCHECK(::arrow::is_large_binary_like(values.type_id()));
+    PutBinaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
+  }
+}
+
+template <typename DType>
+void AssertCanPutDictionary(DictEncoderImpl<DType>* encoder, const ::arrow::Array& dict) {
+  if (dict.null_count() > 0) {
+    throw ParquetException("Inserted dictionary cannot contain nulls");
+  }
+
+  if (encoder->num_entries() > 0) {
+    throw ParquetException("Can only call PutDictionary on an empty DictEncoder");
+  }
+}
+
+template <typename DType>
+void DictEncoderImpl<DType>::PutDictionary(const ::arrow::Array& values) {
+  AssertCanPutDictionary(this, values);
+
+  using ArrayType = typename ::arrow::CTypeTraits<typename DType::c_type>::ArrayType;
+  const auto& data = checked_cast<const ArrayType&>(values);
+
+  dict_encoded_size_ += static_cast<int>(sizeof(typename DType::c_type) * data.length());
+  for (int64_t i = 0; i < data.length(); i++) {
+    int32_t unused_memo_index;
+    PARQUET_THROW_NOT_OK(memo_table_.GetOrInsert(data.Value(i), &unused_memo_index));
+  }
+}
+
+template <>
+void DictEncoderImpl<FLBAType>::PutDictionary(const ::arrow::Array& values) {
+  AssertFixedSizeBinary(values, type_length_);
+  AssertCanPutDictionary(this, values);
+
+  const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values);
+
+  dict_encoded_size_ += static_cast<int>(type_length_ * data.length());
+  for (int64_t i = 0; i < data.length(); i++) {
+    int32_t unused_memo_index;
+    PARQUET_THROW_NOT_OK(
+        memo_table_.GetOrInsert(data.Value(i), type_length_, &unused_memo_index));
+  }
+}
+
+template <>
+void DictEncoderImpl<ByteArrayType>::PutDictionary(const ::arrow::Array& values) {
+  AssertBaseBinary(values);
+  AssertCanPutDictionary(this, values);
+
+  if (::arrow::is_binary_like(values.type_id())) {
+    PutBinaryDictionaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
+  } else {
+    DCHECK(::arrow::is_large_binary_like(values.type_id()));
+    PutBinaryDictionaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
+  }
+}
+
+// ----------------------------------------------------------------------
+// BYTE_STREAM_SPLIT encoder
+
+// Common base class for all types
+
+template <typename DType>
+class ByteStreamSplitEncoderBase : public EncoderImpl,
+                                   virtual public TypedEncoder<DType> {
+ public:
+  using T = typename DType::c_type;
+  using TypedEncoder<DType>::Put;
+
+  ByteStreamSplitEncoderBase(const ColumnDescriptor* descr, int byte_width,
+                             ::arrow::MemoryPool* pool)
+      : EncoderImpl(descr, Encoding::BYTE_STREAM_SPLIT, pool),
+        sink_{pool},
+        byte_width_(byte_width),
+        num_values_in_buffer_{0} {}
+
+  int64_t EstimatedDataEncodedSize() override { return sink_.length(); }
+
+  std::shared_ptr<Buffer> FlushValues() override {
+    if (byte_width_ == 1) {
+      // Special-cased fast path
+      PARQUET_ASSIGN_OR_THROW(auto buf, sink_.Finish());
+      return buf;
+    }
+    auto output_buffer = AllocateBuffer(this->memory_pool(), EstimatedDataEncodedSize());
+    uint8_t* output_buffer_raw = output_buffer->mutable_data();
+    const uint8_t* raw_values = sink_.data();
+    ::arrow::util::internal::ByteStreamSplitEncode(
+        raw_values, /*width=*/byte_width_, num_values_in_buffer_, output_buffer_raw);
+    sink_.Reset();
+    num_values_in_buffer_ = 0;
+    return output_buffer;
+  }
+
+  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override {
+    if (valid_bits != NULLPTR) {
+      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
+                                                                   this->memory_pool()));
+      T* data = buffer->template mutable_data_as<T>();
+      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
+          src, num_values, valid_bits, valid_bits_offset, data);
+      Put(data, num_valid_values);
+    } else {
+      Put(src, num_values);
+    }
+  }
+
+ protected:
+  ::arrow::BufferBuilder sink_;
+  // Required because type_length_ is only filled in for FLBA
+  const int byte_width_;
+  int64_t num_values_in_buffer_;
+};
+
+// BYTE_STREAM_SPLIT encoder implementation for FLOAT, DOUBLE, INT32, INT64
+
+template <typename DType>
+class ByteStreamSplitEncoder : public ByteStreamSplitEncoderBase<DType> {
+ public:
+  using T = typename DType::c_type;
+  using ArrowType = typename EncodingTraits<DType>::ArrowType;
+
+  ByteStreamSplitEncoder(const ColumnDescriptor* descr,
+                         ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
+      : ByteStreamSplitEncoderBase<DType>(descr,
+                                          /*byte_width=*/static_cast<int>(sizeof(T)),
+                                          pool) {}
+
+  // Inherit Put(const std::vector<T>&...)
+  using TypedEncoder<DType>::Put;
+
+  void Put(const T* buffer, int num_values) override {
+    if (num_values > 0) {
+      PARQUET_THROW_NOT_OK(
+          this->sink_.Append(reinterpret_cast<const uint8_t*>(buffer),
+                             num_values * static_cast<int64_t>(sizeof(T))));
+      this->num_values_in_buffer_ += num_values;
+    }
+  }
+
+  void Put(const ::arrow::Array& values) override {
+    if (values.type_id() != ArrowType::type_id) {
+      throw ParquetException(std::string() + "direct put from " +
+                             values.type()->ToString() + " not supported");
+    }
+    const auto& data = *values.data();
+    this->PutSpaced(data.GetValues<typename ArrowType::c_type>(1),
+                    static_cast<int>(data.length), data.GetValues<uint8_t>(0, 0),
+                    data.offset);
+  }
+};
+
+// BYTE_STREAM_SPLIT encoder implementation for FLBA
+
+template <>
+class ByteStreamSplitEncoder<FLBAType> : public ByteStreamSplitEncoderBase<FLBAType> {
+ public:
+  using DType = FLBAType;
+  using T = FixedLenByteArray;
+  using ArrowType = ::arrow::FixedSizeBinaryArray;
+
+  ByteStreamSplitEncoder(const ColumnDescriptor* descr,
+                         ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
+      : ByteStreamSplitEncoderBase<DType>(descr,
+                                          /*byte_width=*/descr->type_length(), pool) {}
+
+  // Inherit Put(const std::vector<T>&...)
+  using TypedEncoder<DType>::Put;
+
+  void Put(const T* buffer, int num_values) override {
+    if (byte_width_ > 0) {
+      const int64_t total_bytes = static_cast<int64_t>(num_values) * byte_width_;
+      PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes));
+      for (int i = 0; i < num_values; ++i) {
+        // Write the result to the output stream
+        DCHECK(buffer[i].ptr != nullptr) << "Value ptr cannot be NULL";
+        sink_.UnsafeAppend(buffer[i].ptr, byte_width_);
+      }
+    }
+    this->num_values_in_buffer_ += num_values;
+  }
+
+  void Put(const ::arrow::Array& values) override {
+    AssertFixedSizeBinary(values, byte_width_);
+    const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values);
+    if (data.null_count() == 0) {
+      // no nulls, just buffer the data
+      PARQUET_THROW_NOT_OK(sink_.Append(data.raw_values(), data.length() * byte_width_));
+      this->num_values_in_buffer_ += data.length();
+    } else {
+      const int64_t num_values = data.length() - data.null_count();
+      const int64_t total_bytes = num_values * byte_width_;
+      PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes));
+      // TODO use VisitSetBitRunsVoid
+      for (int64_t i = 0; i < data.length(); i++) {
+        if (data.IsValid(i)) {
+          sink_.UnsafeAppend(data.Value(i), byte_width_);
+        }
+      }
+      this->num_values_in_buffer_ += num_values;
+    }
+  }
+};
+
+// ----------------------------------------------------------------------
+// DELTA_BINARY_PACKED encoder
+
+/// DeltaBitPackEncoder is an encoder for the DeltaBinary Packing format
+/// as per the parquet spec. See:
+/// https://github.com/apache/parquet-format/blob/master/Encodings.md#delta-encoding-delta_binary_packed--5
+///
+/// Consists of a header followed by blocks of delta encoded values binary packed.
+///
+///  Format
+///    [header] [block 1] [block 2] ... [block N]
+///
+///  Header
+///    [block size] [number of mini blocks per block] [total value count] [first value]
+///
+///  Block
+///    [min delta] [list of bitwidths of the mini blocks] [miniblocks]
+///
+/// Sets aside bytes at the start of the internal buffer where the header will be written,
+/// and only writes the header when FlushValues is called before returning it.
+///
+/// To encode a block, we will:
+///
+/// 1. Compute the differences between consecutive elements. For the first element in the
+/// block, use the last element in the previous block or, in the case of the first block,
+/// use the first value of the whole sequence, stored in the header.
+///
+/// 2. Compute the frame of reference (the minimum of the deltas in the block). Subtract
+/// this min delta from all deltas in the block. This guarantees that all values are
+/// non-negative.
+///
+/// 3. Encode the frame of reference (min delta) as a zigzag ULEB128 int followed by the
+/// bit widths of the mini blocks and the delta values (minus the min delta) bit packed
+/// per mini block.
+///
+/// Supports only INT32 and INT64.
+
+template <typename DType>
+class DeltaBitPackEncoder : public EncoderImpl, virtual public TypedEncoder<DType> {
+  // Maximum possible header size
+  static constexpr uint32_t kMaxPageHeaderWriterSize = 32;
+  static constexpr uint32_t kValuesPerBlock =
+      std::is_same_v<int32_t, typename DType::c_type> ? 128 : 256;
+  static constexpr uint32_t kMiniBlocksPerBlock = 4;
+
+ public:
+  using T = typename DType::c_type;
+  using UT = std::make_unsigned_t<T>;
+  using TypedEncoder<DType>::Put;
+
+  explicit DeltaBitPackEncoder(const ColumnDescriptor* descr, MemoryPool* pool,
+                               const uint32_t values_per_block = kValuesPerBlock,
+                               const uint32_t mini_blocks_per_block = kMiniBlocksPerBlock)
+      : EncoderImpl(descr, Encoding::DELTA_BINARY_PACKED, pool),
+        values_per_block_(values_per_block),
+        mini_blocks_per_block_(mini_blocks_per_block),
+        values_per_mini_block_(values_per_block / mini_blocks_per_block),
+        deltas_(values_per_block, ::arrow::stl::allocator<T>(pool)),
+        bits_buffer_(
+            AllocateBuffer(pool, (kMiniBlocksPerBlock + values_per_block) * sizeof(T))),
+        sink_(pool),
+        bit_writer_(bits_buffer_->mutable_data(),
+                    static_cast<int>(bits_buffer_->size())) {
+    if (values_per_block_ % 128 != 0) {
+      throw ParquetException(
+          "the number of values in a block must be multiple of 128, but it's " +
+          std::to_string(values_per_block_));
+    }
+    if (values_per_mini_block_ % 32 != 0) {
+      throw ParquetException(
+          "the number of values in a miniblock must be multiple of 32, but it's " +
+          std::to_string(values_per_mini_block_));
+    }
+    if (values_per_block % mini_blocks_per_block != 0) {
+      throw ParquetException(
+          "the number of values per block % number of miniblocks per block must be 0, "
+          "but it's " +
+          std::to_string(values_per_block % mini_blocks_per_block));
+    }
+    // Reserve enough space at the beginning of the buffer for largest possible header.
+    PARQUET_THROW_NOT_OK(sink_.Advance(kMaxPageHeaderWriterSize));
+  }
+
+  std::shared_ptr<Buffer> FlushValues() override;
+
+  int64_t EstimatedDataEncodedSize() override { return sink_.length(); }
+
+  void Put(const ::arrow::Array& values) override;
+
+  void Put(const T* buffer, int num_values) override;
+
+  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override;
+
+  void FlushBlock();
+
+ private:
+  const uint32_t values_per_block_;
+  const uint32_t mini_blocks_per_block_;
+  const uint32_t values_per_mini_block_;
+  uint32_t values_current_block_{0};
+  uint32_t total_value_count_{0};
+  T first_value_{0};
+  T current_value_{0};
+  ArrowPoolVector<T> deltas_;
+  std::shared_ptr<ResizableBuffer> bits_buffer_;
+  ::arrow::BufferBuilder sink_;
+  ::arrow::bit_util::BitWriter bit_writer_;
+};
+
+template <typename DType>
+void DeltaBitPackEncoder<DType>::Put(const T* src, int num_values) {
+  if (num_values == 0) {
+    return;
+  }
+
+  int idx = 0;
+  if (total_value_count_ == 0) {
+    current_value_ = src[0];
+    first_value_ = current_value_;
+    idx = 1;
+  }
+  total_value_count_ += num_values;
+
+  while (idx < num_values) {
+    T value = src[idx];
+    // Calculate deltas. The possible overflow is handled by use of unsigned integers
+    // making subtraction operations well-defined and correct even in case of overflow.
+    // Encoded integers will wrap back around on decoding.
+    // See http://en.wikipedia.org/wiki/Modular_arithmetic#Integers_modulo_n
+    deltas_[values_current_block_] = SafeSignedSubtract(value, current_value_);
+    current_value_ = value;
+    idx++;
+    values_current_block_++;
+    if (values_current_block_ == values_per_block_) {
+      FlushBlock();
+    }
+  }
+}
+
+template <typename DType>
+void DeltaBitPackEncoder<DType>::FlushBlock() {
+  if (values_current_block_ == 0) {
+    return;
+  }
+
+  // Calculate the frame of reference for this miniblock. This value will be subtracted
+  // from all deltas to guarantee all deltas are positive for encoding.
+  const T min_delta =
+      *std::min_element(deltas_.begin(), deltas_.begin() + values_current_block_);
+  bit_writer_.PutZigZagVlqInt(min_delta);
+
+  // Call to GetNextBytePtr reserves mini_blocks_per_block_ bytes of space to write
+  // bit widths of miniblocks as they become known during the encoding.
+  uint8_t* bit_width_data = bit_writer_.GetNextBytePtr(mini_blocks_per_block_);
+  DCHECK(bit_width_data != nullptr);
+
+  const uint32_t num_miniblocks =
+      static_cast<uint32_t>(std::ceil(static_cast<double>(values_current_block_) /
+                                      static_cast<double>(values_per_mini_block_)));
+  for (uint32_t i = 0; i < num_miniblocks; i++) {
+    const uint32_t values_current_mini_block =
+        std::min(values_per_mini_block_, values_current_block_);
+
+    const uint32_t start = i * values_per_mini_block_;
+    const T max_delta = *std::max_element(
+        deltas_.begin() + start, deltas_.begin() + start + values_current_mini_block);
+
+    // The minimum number of bits required to write any of values in deltas_ vector.
+    // See overflow comment above.
+    const auto bit_width = bit_width_data[i] = bit_util::NumRequiredBits(
+        static_cast<UT>(max_delta) - static_cast<UT>(min_delta));
+
+    for (uint32_t j = start; j < start + values_current_mini_block; j++) {
+      // Convert delta to frame of reference. See overflow comment above.
+      const UT value = static_cast<UT>(deltas_[j]) - static_cast<UT>(min_delta);
+      bit_writer_.PutValue(value, bit_width);
+    }
+    // If there are not enough values to fill the last mini block, we pad the mini block
+    // with zeroes so that its length is the number of values in a full mini block
+    // multiplied by the bit width.
+    for (uint32_t j = values_current_mini_block; j < values_per_mini_block_; j++) {
+      bit_writer_.PutValue(0, bit_width);
+    }
+    values_current_block_ -= values_current_mini_block;
+  }
+
+  // If, in the last block, less than <number of miniblocks in a block> miniblocks are
+  // needed to store the values, the bytes storing the bit widths of the unneeded
+  // miniblocks are still present, their value should be zero, but readers must accept
+  // arbitrary values as well.
+  for (uint32_t i = num_miniblocks; i < mini_blocks_per_block_; i++) {
+    bit_width_data[i] = 0;
+  }
+  DCHECK_EQ(values_current_block_, 0);
+
+  bit_writer_.Flush();
+  PARQUET_THROW_NOT_OK(sink_.Append(bit_writer_.buffer(), bit_writer_.bytes_written()));
+  bit_writer_.Clear();
+}
+
+template <typename DType>
+std::shared_ptr<Buffer> DeltaBitPackEncoder<DType>::FlushValues() {
+  if (values_current_block_ > 0) {
+    FlushBlock();
+  }
+  PARQUET_ASSIGN_OR_THROW(auto buffer, sink_.Finish(/*shrink_to_fit=*/true));
+
+  uint8_t header_buffer_[kMaxPageHeaderWriterSize] = {};
+  bit_util::BitWriter header_writer(header_buffer_, sizeof(header_buffer_));
+  if (!header_writer.PutVlqInt(values_per_block_) ||
+      !header_writer.PutVlqInt(mini_blocks_per_block_) ||
+      !header_writer.PutVlqInt(total_value_count_) ||
+      !header_writer.PutZigZagVlqInt(static_cast<T>(first_value_))) {
+    throw ParquetException("header writing error");
+  }
+  header_writer.Flush();
+
+  // We reserved enough space at the beginning of the buffer for largest possible header
+  // and data was written immediately after. We now write the header data immediately
+  // before the end of reserved space.
+  const size_t offset_bytes = kMaxPageHeaderWriterSize - header_writer.bytes_written();
+  std::memcpy(buffer->mutable_data() + offset_bytes, header_buffer_,
+              header_writer.bytes_written());
+
+  // Reset counter of cached values
+  total_value_count_ = 0;
+  // Reserve enough space at the beginning of the buffer for largest possible header.
+  PARQUET_THROW_NOT_OK(sink_.Advance(kMaxPageHeaderWriterSize));
+
+  // Excess bytes at the beginning are sliced off and ignored.
+  return SliceBuffer(buffer, offset_bytes);
+}
+
+template <>
+void DeltaBitPackEncoder<Int32Type>::Put(const ::arrow::Array& values) {
+  const ::arrow::ArrayData& data = *values.data();
+  if (values.type_id() != ::arrow::Type::INT32) {
+    throw ParquetException("Expected Int32TArray, got ", values.type()->ToString());
+  }
+  if (data.length > std::numeric_limits<int32_t>::max()) {
+    throw ParquetException("Array cannot be longer than ",
+                           std::numeric_limits<int32_t>::max());
+  }
+
+  if (values.null_count() == 0) {
+    Put(data.GetValues<int32_t>(1), static_cast<int>(data.length));
+  } else {
+    PutSpaced(data.GetValues<int32_t>(1), static_cast<int>(data.length),
+              data.GetValues<uint8_t>(0, 0), data.offset);
+  }
+}
+
+template <>
+void DeltaBitPackEncoder<Int64Type>::Put(const ::arrow::Array& values) {
+  const ::arrow::ArrayData& data = *values.data();
+  if (values.type_id() != ::arrow::Type::INT64) {
+    throw ParquetException("Expected Int64TArray, got ", values.type()->ToString());
+  }
+  if (data.length > std::numeric_limits<int32_t>::max()) {
+    throw ParquetException("Array cannot be longer than ",
+                           std::numeric_limits<int32_t>::max());
+  }
+  if (values.null_count() == 0) {
+    Put(data.GetValues<int64_t>(1), static_cast<int>(data.length));
+  } else {
+    PutSpaced(data.GetValues<int64_t>(1), static_cast<int>(data.length),
+              data.GetValues<uint8_t>(0, 0), data.offset);
+  }
+}
+
+template <typename DType>
+void DeltaBitPackEncoder<DType>::PutSpaced(const T* src, int num_values,
+                                           const uint8_t* valid_bits,
+                                           int64_t valid_bits_offset) {
+  if (valid_bits != NULLPTR) {
+    PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
+                                                                 this->memory_pool()));
+    T* data = buffer->template mutable_data_as<T>();
+    int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
+        src, num_values, valid_bits, valid_bits_offset, data);
+    Put(data, num_valid_values);
+  } else {
+    Put(src, num_values);
+  }
+}
+
+// ----------------------------------------------------------------------
+// DELTA_LENGTH_BYTE_ARRAY encoder
+
+class DeltaLengthByteArrayEncoder : public EncoderImpl,
+                                    virtual public TypedEncoder<ByteArrayType> {
+ public:
+  explicit DeltaLengthByteArrayEncoder(const ColumnDescriptor* descr, MemoryPool* pool)
+      : EncoderImpl(descr, Encoding::DELTA_LENGTH_BYTE_ARRAY,
+                    pool = ::arrow::default_memory_pool()),
+        sink_(pool),
+        length_encoder_(nullptr, pool) {}
+
+  std::shared_ptr<Buffer> FlushValues() override;
+
+  int64_t EstimatedDataEncodedSize() override {
+    return sink_.length() + length_encoder_.EstimatedDataEncodedSize();
+  }
+
+  using TypedEncoder<ByteArrayType>::Put;
+
+  void Put(const ::arrow::Array& values) override;
+
+  void Put(const T* buffer, int num_values) override;
+
+  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override;
+
+ protected:
+  template <typename ArrayType>
+  void PutBinaryArray(const ArrayType& array) {
+    PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<typename ArrayType::TypeClass>(
+        *array.data(),
+        [&](::std::string_view view) {
+          if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
+            return Status::Invalid(
+                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
+          }
+          if (ARROW_PREDICT_FALSE(
+                  view.size() + sink_.length() >
+                  static_cast<size_t>(std::numeric_limits<int32_t>::max()))) {
+            return Status::Invalid("excess expansion in DELTA_LENGTH_BYTE_ARRAY");
+          }
+          length_encoder_.Put({static_cast<int32_t>(view.length())}, 1);
+          PARQUET_THROW_NOT_OK(sink_.Append(view.data(), view.length()));
+          return Status::OK();
+        },
+        []() { return Status::OK(); }));
+  }
+
+  ::arrow::BufferBuilder sink_;
+  DeltaBitPackEncoder<Int32Type> length_encoder_;
+};
+
+void DeltaLengthByteArrayEncoder::Put(const ::arrow::Array& values) {
+  AssertBaseBinary(values);
+  if (::arrow::is_binary_like(values.type_id())) {
+    PutBinaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
+  } else {
+    PutBinaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
+  }
+}
+
+void DeltaLengthByteArrayEncoder::Put(const T* src, int num_values) {
+  if (num_values == 0) {
+    return;
+  }
+
+  constexpr int kBatchSize = 256;
+  std::array<int32_t, kBatchSize> lengths;
+  uint32_t total_increment_size = 0;
+  for (int idx = 0; idx < num_values; idx += kBatchSize) {
+    const int batch_size = std::min(kBatchSize, num_values - idx);
+    for (int j = 0; j < batch_size; ++j) {
+      const int32_t len = src[idx + j].len;
+      if (ARROW_PREDICT_FALSE(
+              AddWithOverflow(total_increment_size, len, &total_increment_size))) {
+        throw ParquetException("excess expansion in DELTA_LENGTH_BYTE_ARRAY");
+      }
+      lengths[j] = len;
+    }
+    length_encoder_.Put(lengths.data(), batch_size);
+  }
+  if (sink_.length() + total_increment_size > std::numeric_limits<int32_t>::max()) {
+    throw ParquetException("excess expansion in DELTA_LENGTH_BYTE_ARRAY");
+  }
+  PARQUET_THROW_NOT_OK(sink_.Reserve(total_increment_size));
+  for (int idx = 0; idx < num_values; idx++) {
+    sink_.UnsafeAppend(src[idx].ptr, src[idx].len);
+  }
+}
+
+void DeltaLengthByteArrayEncoder::PutSpaced(const T* src, int num_values,
+                                            const uint8_t* valid_bits,
+                                            int64_t valid_bits_offset) {
+  if (valid_bits != NULLPTR) {
+    PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
+                                                                 this->memory_pool()));
+    T* data = buffer->template mutable_data_as<T>();
+    int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
+        src, num_values, valid_bits, valid_bits_offset, data);
+    Put(data, num_valid_values);
+  } else {
+    Put(src, num_values);
+  }
+}
+
+std::shared_ptr<Buffer> DeltaLengthByteArrayEncoder::FlushValues() {
+  std::shared_ptr<Buffer> encoded_lengths = length_encoder_.FlushValues();
+
+  std::shared_ptr<Buffer> data;
+  PARQUET_THROW_NOT_OK(sink_.Finish(&data));
+  sink_.Reset();
+
+  PARQUET_THROW_NOT_OK(sink_.Resize(encoded_lengths->size() + data->size()));
+  PARQUET_THROW_NOT_OK(sink_.Append(encoded_lengths->data(), encoded_lengths->size()));
+  PARQUET_THROW_NOT_OK(sink_.Append(data->data(), data->size()));
+
+  std::shared_ptr<Buffer> buffer;
+  PARQUET_THROW_NOT_OK(sink_.Finish(&buffer, true));
+  return buffer;
+}
+
+// ----------------------------------------------------------------------
+// DELTA_BYTE_ARRAY encoder
+
+/// Delta Byte Array encoding also known as incremental encoding or front compression:
+/// for each element in a sequence of strings, store the prefix length of the previous
+/// entry plus the suffix.
+///
+/// This is stored as a sequence of delta-encoded prefix lengths (DELTA_BINARY_PACKED),
+/// followed by the suffixes encoded as delta length byte arrays
+/// (DELTA_LENGTH_BYTE_ARRAY).
+
+template <typename DType>
+class DeltaByteArrayEncoder : public EncoderImpl, virtual public TypedEncoder<DType> {
+  static constexpr std::string_view kEmpty = "";
+
+ public:
+  using T = typename DType::c_type;
+
+  explicit DeltaByteArrayEncoder(const ColumnDescriptor* descr,
+                                 MemoryPool* pool = ::arrow::default_memory_pool())
+      : EncoderImpl(descr, Encoding::DELTA_BYTE_ARRAY, pool),
+        sink_(pool),
+        prefix_length_encoder_(/*descr=*/nullptr, pool),
+        suffix_encoder_(descr, pool),
+        last_value_(""),
+        empty_(static_cast<uint32_t>(kEmpty.size()),
+               reinterpret_cast<const uint8_t*>(kEmpty.data())) {}
+
+  std::shared_ptr<Buffer> FlushValues() override;
+
+  int64_t EstimatedDataEncodedSize() override {
+    return prefix_length_encoder_.EstimatedDataEncodedSize() +
+           suffix_encoder_.EstimatedDataEncodedSize();
+  }
+
+  using TypedEncoder<DType>::Put;
+
+  void Put(const ::arrow::Array& values) override;
+
+  void Put(const T* buffer, int num_values) override;
+
+  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override {
+    if (valid_bits != nullptr) {
+      if (buffer_ == nullptr) {
+        PARQUET_ASSIGN_OR_THROW(buffer_,
+                                ::arrow::AllocateResizableBuffer(num_values * sizeof(T),
+                                                                 this->memory_pool()));
+      } else {
+        PARQUET_THROW_NOT_OK(buffer_->Resize(num_values * sizeof(T), false));
+      }
+      T* data = buffer_->mutable_data_as<T>();
+      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
+          src, num_values, valid_bits, valid_bits_offset, data);
+      Put(data, num_valid_values);
+    } else {
+      Put(src, num_values);
+    }
+  }
+
+ protected:
+  template <typename VisitorType>
+  void PutInternal(const T* src, int num_values, const VisitorType visitor) {
+    if (num_values == 0) {
+      return;
+    }
+
+    std::string_view last_value_view = last_value_;
+    constexpr int kBatchSize = 256;
+    std::array<int32_t, kBatchSize> prefix_lengths;
+    std::array<ByteArray, kBatchSize> suffixes;
+
+    for (int i = 0; i < num_values; i += kBatchSize) {
+      const int batch_size = std::min(kBatchSize, num_values - i);
+
+      for (int j = 0; j < batch_size; ++j) {
+        const int idx = i + j;
+        const auto view = visitor[idx];
+        const auto len = static_cast<const uint32_t>(view.length());
+
+        uint32_t common_prefix_length = 0;
+        const uint32_t maximum_common_prefix_length =
+            std::min(len, static_cast<uint32_t>(last_value_view.length()));
+        while (common_prefix_length < maximum_common_prefix_length) {
+          if (last_value_view[common_prefix_length] != view[common_prefix_length]) {
+            break;
+          }
+          common_prefix_length++;
+        }
+
+        last_value_view = view;
+        prefix_lengths[j] = common_prefix_length;
+        const uint32_t suffix_length = len - common_prefix_length;
+        const uint8_t* suffix_ptr = src[idx].ptr + common_prefix_length;
+
+        // Convert to ByteArray, so it can be passed to the suffix_encoder_.
+        const ByteArray suffix(suffix_length, suffix_ptr);
+        suffixes[j] = suffix;
+      }
+      suffix_encoder_.Put(suffixes.data(), batch_size);
+      prefix_length_encoder_.Put(prefix_lengths.data(), batch_size);
+    }
+    last_value_ = last_value_view;
+  }
+
+  template <typename ArrayType>
+  void PutBinaryArray(const ArrayType& array) {
+    auto previous_len = static_cast<uint32_t>(last_value_.length());
+    std::string_view last_value_view = last_value_;
+
+    PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<typename ArrayType::TypeClass>(
+        *array.data(),
+        [&](::std::string_view view) {
+          if (ARROW_PREDICT_FALSE(view.size() >= kMaxByteArraySize)) {
+            return Status::Invalid(
+                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
+          }
+          const ByteArray src{view};
+
+          uint32_t common_prefix_length = 0;
+          const uint32_t len = src.len;
+          const uint32_t maximum_common_prefix_length = std::min(previous_len, len);
+          while (common_prefix_length < maximum_common_prefix_length) {
+            if (last_value_view[common_prefix_length] != view[common_prefix_length]) {
+              break;
+            }
+            common_prefix_length++;
+          }
+          previous_len = len;
+          prefix_length_encoder_.Put({static_cast<int32_t>(common_prefix_length)}, 1);
+
+          last_value_view = view;
+          const auto suffix_length = static_cast<uint32_t>(len - common_prefix_length);
+          if (suffix_length == 0) {
+            suffix_encoder_.Put(&empty_, 1);
+            return Status::OK();
+          }
+          const uint8_t* suffix_ptr = src.ptr + common_prefix_length;
+          // Convert to ByteArray, so it can be passed to the suffix_encoder_.
+          const ByteArray suffix(suffix_length, suffix_ptr);
+          suffix_encoder_.Put(&suffix, 1);
+
+          return Status::OK();
+        },
+        []() { return Status::OK(); }));
+    last_value_ = last_value_view;
+  }
+
+  ::arrow::BufferBuilder sink_;
+  DeltaBitPackEncoder<Int32Type> prefix_length_encoder_;
+  DeltaLengthByteArrayEncoder suffix_encoder_;
+  std::string last_value_;
+  const ByteArray empty_;
+  std::unique_ptr<ResizableBuffer> buffer_;
+};
+
+struct ByteArrayVisitor {
+  const ByteArray* src;
+
+  std::string_view operator[](int i) const {
+    if (ARROW_PREDICT_FALSE(src[i].len >= kMaxByteArraySize)) {
+      throw ParquetException("Parquet cannot store strings with size 2GB or more, got: ",
+                             src[i].len);
+    }
+    return std::string_view{src[i]};
+  }
+
+  uint32_t len(int i) const { return src[i].len; }
+};
+
+struct FLBAVisitor {
+  const FLBA* src;
+  const uint32_t type_length;
+
+  std::string_view operator[](int i) const {
+    return std::string_view{reinterpret_cast<const char*>(src[i].ptr), type_length};
+  }
+
+  uint32_t len(int i) const { return type_length; }
+};
+
+template <>
+void DeltaByteArrayEncoder<ByteArrayType>::Put(const ByteArray* src, int num_values) {
+  auto visitor = ByteArrayVisitor{src};
+  PutInternal<ByteArrayVisitor>(src, num_values, visitor);
+}
+
+template <>
+void DeltaByteArrayEncoder<FLBAType>::Put(const FLBA* src, int num_values) {
+  auto visitor = FLBAVisitor{src, static_cast<uint32_t>(descr_->type_length())};
+  PutInternal<FLBAVisitor>(src, num_values, visitor);
+}
+
+template <typename DType>
+void DeltaByteArrayEncoder<DType>::Put(const ::arrow::Array& values) {
+  if (::arrow::is_binary_like(values.type_id())) {
+    PutBinaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
+  } else if (::arrow::is_large_binary_like(values.type_id())) {
+    PutBinaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
+  } else if (::arrow::is_fixed_size_binary(values.type_id())) {
+    PutBinaryArray(checked_cast<const ::arrow::FixedSizeBinaryArray&>(values));
+  } else {
+    throw ParquetException("Only BaseBinaryArray and subclasses supported");
+  }
+}
+
+template <typename DType>
+std::shared_ptr<Buffer> DeltaByteArrayEncoder<DType>::FlushValues() {
+  PARQUET_THROW_NOT_OK(sink_.Resize(EstimatedDataEncodedSize(), false));
+
+  std::shared_ptr<Buffer> prefix_lengths = prefix_length_encoder_.FlushValues();
+  PARQUET_THROW_NOT_OK(sink_.Append(prefix_lengths->data(), prefix_lengths->size()));
+
+  std::shared_ptr<Buffer> suffixes = suffix_encoder_.FlushValues();
+  PARQUET_THROW_NOT_OK(sink_.Append(suffixes->data(), suffixes->size()));
+
+  std::shared_ptr<Buffer> buffer;
+  PARQUET_THROW_NOT_OK(sink_.Finish(&buffer, true));
+  last_value_.clear();
+  return buffer;
+}
+
+// ----------------------------------------------------------------------
+// RLE encoder for BOOLEAN
+
+class RleBooleanEncoder final : public EncoderImpl, virtual public BooleanEncoder {
+ public:
+  explicit RleBooleanEncoder(const ColumnDescriptor* descr, ::arrow::MemoryPool* pool)
+      : EncoderImpl(descr, Encoding::RLE, pool),
+        buffered_append_values_(::arrow::stl::allocator<T>(pool)) {}
+
+  int64_t EstimatedDataEncodedSize() override {
+    return kRleLengthInBytes + MaxRleBufferSize();
+  }
+
+  std::shared_ptr<Buffer> FlushValues() override;
+
+  void Put(const T* buffer, int num_values) override;
+  void Put(const ::arrow::Array& values) override {
+    if (values.type_id() != ::arrow::Type::BOOL) {
+      throw ParquetException("RleBooleanEncoder expects BooleanArray, got ",
+                             values.type()->ToString());
+    }
+    const auto& boolean_array = checked_cast<const ::arrow::BooleanArray&>(values);
+    if (values.null_count() == 0) {
+      for (int i = 0; i < boolean_array.length(); ++i) {
+        // null_count == 0, so just call Value directly is ok.
+        buffered_append_values_.push_back(boolean_array.Value(i));
+      }
+    } else {
+      PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<::arrow::BooleanType>(
+          *boolean_array.data(),
+          [&](bool value) {
+            buffered_append_values_.push_back(value);
+            return Status::OK();
+          },
+          []() { return Status::OK(); }));
+    }
+  }
+
+  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override {
+    if (valid_bits != NULLPTR) {
+      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
+                                                                   this->memory_pool()));
+      T* data = buffer->mutable_data_as<T>();
+      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
+          src, num_values, valid_bits, valid_bits_offset, data);
+      Put(data, num_valid_values);
+    } else {
+      Put(src, num_values);
+    }
+  }
+
+  void Put(const std::vector<bool>& src, int num_values) override;
+
+ protected:
+  template <typename SequenceType>
+  void PutImpl(const SequenceType& src, int num_values);
+
+  int MaxRleBufferSize() const noexcept {
+    return RlePreserveBufferSize(static_cast<int>(buffered_append_values_.size()),
+                                 kBitWidth);
+  }
+
+  constexpr static int32_t kBitWidth = 1;
+  /// 4 bytes in little-endian, which indicates the length.
+  constexpr static int32_t kRleLengthInBytes = 4;
+
+  // std::vector<bool> in C++ is tricky, because it's a bitmap.
+  // Here RleBooleanEncoder will only append values into it, and
+  // dump values into Buffer, so using it here is ok.
+  ArrowPoolVector<bool> buffered_append_values_;
+};
+
+void RleBooleanEncoder::Put(const bool* src, int num_values) { PutImpl(src, num_values); }
+
+void RleBooleanEncoder::Put(const std::vector<bool>& src, int num_values) {
+  PutImpl(src, num_values);
+}
+
+template <typename SequenceType>
+void RleBooleanEncoder::PutImpl(const SequenceType& src, int num_values) {
+  for (int i = 0; i < num_values; ++i) {
+    buffered_append_values_.push_back(src[i]);
+  }
+}
+
+std::shared_ptr<Buffer> RleBooleanEncoder::FlushValues() {
+  int rle_buffer_size_max = MaxRleBufferSize();
+  std::shared_ptr<ResizableBuffer> buffer =
+      AllocateBuffer(this->pool_, rle_buffer_size_max + kRleLengthInBytes);
+  ::arrow::util::RleEncoder encoder(buffer->mutable_data() + kRleLengthInBytes,
+                                    rle_buffer_size_max, /*bit_width*/ kBitWidth);
+
+  for (bool value : buffered_append_values_) {
+    encoder.Put(value ? 1 : 0);
+  }
+  encoder.Flush();
+  ::arrow::util::SafeStore(buffer->mutable_data(),
+                           ::arrow::bit_util::ToLittleEndian(encoder.len()));
+  PARQUET_THROW_NOT_OK(buffer->Resize(kRleLengthInBytes + encoder.len()));
+  buffered_append_values_.clear();
+  return buffer;
+}
+
+}  // namespace
+
+// ----------------------------------------------------------------------
+// Factory function
+
+std::unique_ptr<Encoder> MakeEncoder(Type::type type_num, Encoding::type encoding,
+                                     bool use_dictionary, const ColumnDescriptor* descr,
+                                     MemoryPool* pool) {
+  if (use_dictionary) {
+    switch (type_num) {
+      case Type::INT32:
+        return std::make_unique<DictEncoderImpl<Int32Type>>(descr, pool);
+      case Type::INT64:
+        return std::make_unique<DictEncoderImpl<Int64Type>>(descr, pool);
+      case Type::INT96:
+        return std::make_unique<DictEncoderImpl<Int96Type>>(descr, pool);
+      case Type::FLOAT:
+        return std::make_unique<DictEncoderImpl<FloatType>>(descr, pool);
+      case Type::DOUBLE:
+        return std::make_unique<DictEncoderImpl<DoubleType>>(descr, pool);
+      case Type::BYTE_ARRAY:
+        return std::make_unique<DictEncoderImpl<ByteArrayType>>(descr, pool);
+      case Type::FIXED_LEN_BYTE_ARRAY:
+        return std::make_unique<DictEncoderImpl<FLBAType>>(descr, pool);
+      default:
+        DCHECK(false) << "Encoder not implemented";
+        break;
+    }
+  } else if (encoding == Encoding::PLAIN) {
+    switch (type_num) {
+      case Type::BOOLEAN:
+        return std::make_unique<PlainEncoder<BooleanType>>(descr, pool);
+      case Type::INT32:
+        return std::make_unique<PlainEncoder<Int32Type>>(descr, pool);
+      case Type::INT64:
+        return std::make_unique<PlainEncoder<Int64Type>>(descr, pool);
+      case Type::INT96:
+        return std::make_unique<PlainEncoder<Int96Type>>(descr, pool);
+      case Type::FLOAT:
+        return std::make_unique<PlainEncoder<FloatType>>(descr, pool);
+      case Type::DOUBLE:
+        return std::make_unique<PlainEncoder<DoubleType>>(descr, pool);
+      case Type::BYTE_ARRAY:
+        return std::make_unique<PlainEncoder<ByteArrayType>>(descr, pool);
+      case Type::FIXED_LEN_BYTE_ARRAY:
+        return std::make_unique<PlainEncoder<FLBAType>>(descr, pool);
+      default:
+        DCHECK(false) << "Encoder not implemented";
+        break;
+    }
+  } else if (encoding == Encoding::BYTE_STREAM_SPLIT) {
+    switch (type_num) {
+      case Type::INT32:
+        return std::make_unique<ByteStreamSplitEncoder<Int32Type>>(descr, pool);
+      case Type::INT64:
+        return std::make_unique<ByteStreamSplitEncoder<Int64Type>>(descr, pool);
+      case Type::FLOAT:
+        return std::make_unique<ByteStreamSplitEncoder<FloatType>>(descr, pool);
+      case Type::DOUBLE:
+        return std::make_unique<ByteStreamSplitEncoder<DoubleType>>(descr, pool);
+      case Type::FIXED_LEN_BYTE_ARRAY:
+        return std::make_unique<ByteStreamSplitEncoder<FLBAType>>(descr, pool);
+      default:
+        throw ParquetException(
+            "BYTE_STREAM_SPLIT only supports FLOAT, DOUBLE, INT32, INT64 "
+            "and FIXED_LEN_BYTE_ARRAY");
+    }
+  } else if (encoding == Encoding::DELTA_BINARY_PACKED) {
+    switch (type_num) {
+      case Type::INT32:
+        return std::make_unique<DeltaBitPackEncoder<Int32Type>>(descr, pool);
+      case Type::INT64:
+        return std::make_unique<DeltaBitPackEncoder<Int64Type>>(descr, pool);
+      default:
+        throw ParquetException(
+            "DELTA_BINARY_PACKED encoder only supports INT32 and INT64");
+    }
+  } else if (encoding == Encoding::DELTA_LENGTH_BYTE_ARRAY) {
+    switch (type_num) {
+      case Type::BYTE_ARRAY:
+        return std::make_unique<DeltaLengthByteArrayEncoder>(descr, pool);
+      default:
+        throw ParquetException("DELTA_LENGTH_BYTE_ARRAY only supports BYTE_ARRAY");
+    }
+  } else if (encoding == Encoding::RLE) {
+    switch (type_num) {
+      case Type::BOOLEAN:
+        return std::make_unique<RleBooleanEncoder>(descr, pool);
+      default:
+        throw ParquetException("RLE only supports BOOLEAN");
+    }
+  } else if (encoding == Encoding::DELTA_BYTE_ARRAY) {
+    switch (type_num) {
+      case Type::BYTE_ARRAY:
+        return std::make_unique<DeltaByteArrayEncoder<ByteArrayType>>(descr, pool);
+      case Type::FIXED_LEN_BYTE_ARRAY:
+        return std::make_unique<DeltaByteArrayEncoder<FLBAType>>(descr, pool);
+      default:
+        throw ParquetException(
+            "DELTA_BYTE_ARRAY only supports BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY");
+    }
+  } else {
+    ParquetException::NYI("Selected encoding is not supported");
+  }
+  DCHECK(false) << "Should not be able to reach this code";
+  return nullptr;
+}
+
+}  // namespace parquet
diff --git a/cpp/src/parquet/encoding.h b/cpp/src/parquet/encoding.h
index 493c4044ddc1c..5717886f10759 100644
--- a/cpp/src/parquet/encoding.h
+++ b/cpp/src/parquet/encoding.h
@@ -22,31 +22,16 @@
 #include <memory>
 #include <vector>
 
-#include "arrow/util/spaced.h"
+#include "arrow/type_fwd.h"
 
 #include "parquet/exception.h"
 #include "parquet/platform.h"
 #include "parquet/types.h"
 
 namespace arrow {
-
-class Array;
-class ArrayBuilder;
-class BinaryArray;
-class BinaryBuilder;
-class BooleanBuilder;
-class Int32Type;
-class Int64Type;
-class FloatType;
-class DoubleType;
-class FixedSizeBinaryType;
-template <typename T>
-class NumericBuilder;
-class FixedSizeBinaryBuilder;
 template <typename T>
 class Dictionary32Builder;
-
-}  // namespace arrow
+}
 
 namespace parquet {
 
@@ -184,7 +169,7 @@ class Encoder {
 template <typename DType>
 class TypedEncoder : virtual public Encoder {
  public:
-  typedef typename DType::c_type T;
+  using T = typename DType::c_type;
 
   using Encoder::Put;
 
@@ -293,20 +278,7 @@ class TypedDecoder : virtual public Decoder {
   /// \param[in] valid_bits_offset offset into valid_bits
   /// \return The number of values decoded, including nulls.
   virtual int DecodeSpaced(T* buffer, int num_values, int null_count,
-                           const uint8_t* valid_bits, int64_t valid_bits_offset) {
-    if (null_count > 0) {
-      int values_to_read = num_values - null_count;
-      int values_read = Decode(buffer, values_to_read);
-      if (values_read != values_to_read) {
-        throw ParquetException("Number of values / definition_levels read did not match");
-      }
-
-      return ::arrow::util::internal::SpacedExpand<T>(buffer, num_values, null_count,
-                                                      valid_bits, valid_bits_offset);
-    } else {
-      return Decode(buffer, num_values);
-    }
-  }
+                           const uint8_t* valid_bits, int64_t valid_bits_offset) = 0;
 
   /// \brief Decode into an ArrayBuilder or other accumulator
   ///
diff --git a/cpp/src/parquet/encryption/encryption_internal.cc b/cpp/src/parquet/encryption/encryption_internal.cc
index a0d9367b619c6..31cad130a10c7 100644
--- a/cpp/src/parquet/encryption/encryption_internal.cc
+++ b/cpp/src/parquet/encryption/encryption_internal.cc
@@ -89,6 +89,12 @@ class AesEncryptor::AesEncryptorImpl {
   }
 
  private:
+  void CheckValid() const {
+    if (ctx_ == nullptr) {
+      throw ParquetException("AesEncryptor was wiped out");
+    }
+  }
+
   EVP_CIPHER_CTX* ctx_;
   int32_t aes_mode_;
   int32_t key_length_;
@@ -156,6 +162,8 @@ AesEncryptor::AesEncryptorImpl::AesEncryptorImpl(ParquetCipher::type alg_id,
 int32_t AesEncryptor::AesEncryptorImpl::SignedFooterEncrypt(
     span<const uint8_t> footer, span<const uint8_t> key, span<const uint8_t> aad,
     span<const uint8_t> nonce, span<uint8_t> encrypted_footer) {
+  CheckValid();
+
   if (static_cast<size_t>(key_length_) != key.size()) {
     std::stringstream ss;
     ss << "Wrong key length " << key.size() << ". Should be " << key_length_;
@@ -180,6 +188,8 @@ int32_t AesEncryptor::AesEncryptorImpl::Encrypt(span<const uint8_t> plaintext,
                                                 span<const uint8_t> key,
                                                 span<const uint8_t> aad,
                                                 span<uint8_t> ciphertext) {
+  CheckValid();
+
   if (static_cast<size_t>(key_length_) != key.size()) {
     std::stringstream ss;
     ss << "Wrong key length " << key.size() << ". Should be " << key_length_;
@@ -413,6 +423,12 @@ class AesDecryptor::AesDecryptorImpl {
   }
 
  private:
+  void CheckValid() const {
+    if (ctx_ == nullptr) {
+      throw ParquetException("AesDecryptor was wiped out");
+    }
+  }
+
   EVP_CIPHER_CTX* ctx_;
   int32_t aes_mode_;
   int32_t key_length_;
@@ -714,6 +730,8 @@ int32_t AesDecryptor::AesDecryptorImpl::Decrypt(span<const uint8_t> ciphertext,
                                                 span<const uint8_t> key,
                                                 span<const uint8_t> aad,
                                                 span<uint8_t> plaintext) {
+  CheckValid();
+
   if (static_cast<size_t>(key_length_) != key.size()) {
     std::stringstream ss;
     ss << "Wrong key length " << key.size() << ". Should be " << key_length_;
@@ -806,4 +824,7 @@ void RandBytes(unsigned char* buf, size_t num) {
 
 void EnsureBackendInitialized() { openssl::EnsureInitialized(); }
 
+#undef ENCRYPT_INIT
+#undef DECRYPT_INIT
+
 }  // namespace parquet::encryption
diff --git a/cpp/src/parquet/exception.h b/cpp/src/parquet/exception.h
index 826f5bdc8bf73..cd221ec7a24ae 100644
--- a/cpp/src/parquet/exception.h
+++ b/cpp/src/parquet/exception.h
@@ -28,7 +28,7 @@
 
 // PARQUET-1085
 #if !defined(ARROW_UNUSED)
-#define ARROW_UNUSED(x) UNUSED(x)
+#  define ARROW_UNUSED(x) UNUSED(x)
 #endif
 
 // Parquet exception to Arrow Status
diff --git a/cpp/src/parquet/level_comparison_inc.h b/cpp/src/parquet/level_comparison_inc.h
index cfee506654331..04f628d533111 100644
--- a/cpp/src/parquet/level_comparison_inc.h
+++ b/cpp/src/parquet/level_comparison_inc.h
@@ -22,7 +22,7 @@
 
 // Used to make sure ODR rule isn't violated.
 #ifndef PARQUET_IMPL_NAMESPACE
-#error "PARQUET_IMPL_NAMESPACE must be defined"
+#  error "PARQUET_IMPL_NAMESPACE must be defined"
 #endif
 namespace parquet::internal::PARQUET_IMPL_NAMESPACE {
 /// Builds a bitmap by applying predicate to the level vector provided.
diff --git a/cpp/src/parquet/level_conversion_inc.h b/cpp/src/parquet/level_conversion_inc.h
index 3accb154e6f5a..5fce93e779b2d 100644
--- a/cpp/src/parquet/level_conversion_inc.h
+++ b/cpp/src/parquet/level_conversion_inc.h
@@ -31,7 +31,7 @@
 #include "parquet/level_comparison.h"
 
 #ifndef PARQUET_IMPL_NAMESPACE
-#error "PARQUET_IMPL_NAMESPACE must be defined"
+#  error "PARQUET_IMPL_NAMESPACE must be defined"
 #endif
 
 namespace parquet::internal::PARQUET_IMPL_NAMESPACE {
@@ -261,7 +261,7 @@ inline uint64_t ExtractBitsSoftware(uint64_t bitmap, uint64_t select_bitmap) {
 #ifdef ARROW_HAVE_BMI2
 
 // Use _pext_u64 on 64-bit builds, _pext_u32 on 32-bit builds,
-#if UINTPTR_MAX == 0xFFFFFFFF
+#  if UINTPTR_MAX == 0xFFFFFFFF
 
 using extract_bitmap_t = uint32_t;
 inline extract_bitmap_t ExtractBits(extract_bitmap_t bitmap,
@@ -269,7 +269,7 @@ inline extract_bitmap_t ExtractBits(extract_bitmap_t bitmap,
   return _pext_u32(bitmap, select_bitmap);
 }
 
-#else
+#  else
 
 using extract_bitmap_t = uint64_t;
 inline extract_bitmap_t ExtractBits(extract_bitmap_t bitmap,
@@ -277,7 +277,7 @@ inline extract_bitmap_t ExtractBits(extract_bitmap_t bitmap,
   return _pext_u64(bitmap, select_bitmap);
 }
 
-#endif
+#  endif
 
 #else  // !defined(ARROW_HAVE_BMI2)
 
diff --git a/cpp/src/parquet/platform.h b/cpp/src/parquet/platform.h
index b085e57cd9918..e8d67e225f8ff 100644
--- a/cpp/src/parquet/platform.h
+++ b/cpp/src/parquet/platform.h
@@ -28,48 +28,48 @@
 
 #if defined(_WIN32) || defined(__CYGWIN__)
 
-#if defined(_MSC_VER)
-#pragma warning(push)
+#  if defined(_MSC_VER)
+#    pragma warning(push)
 // Disable warning for STL types usage in DLL interface
 // https://web.archive.org/web/20130317015847/http://connect.microsoft.com/VisualStudio/feedback/details/696593/vc-10-vs-2010-basic-string-exports
-#pragma warning(disable : 4275 4251)
+#    pragma warning(disable : 4275 4251)
 // Disable diamond inheritance warnings
-#pragma warning(disable : 4250)
+#    pragma warning(disable : 4250)
 // Disable macro redefinition warnings
-#pragma warning(disable : 4005)
+#    pragma warning(disable : 4005)
 // Disable extern before exported template warnings
-#pragma warning(disable : 4910)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#    pragma warning(disable : 4910)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#ifdef PARQUET_STATIC
-#define PARQUET_EXPORT
-#elif defined(PARQUET_EXPORTING)
-#define PARQUET_EXPORT __declspec(dllexport)
-#else
-#define PARQUET_EXPORT __declspec(dllimport)
-#endif
+#  ifdef PARQUET_STATIC
+#    define PARQUET_EXPORT
+#  elif defined(PARQUET_EXPORTING)
+#    define PARQUET_EXPORT __declspec(dllexport)
+#  else
+#    define PARQUET_EXPORT __declspec(dllimport)
+#  endif
 
-#define PARQUET_NO_EXPORT
+#  define PARQUET_NO_EXPORT
 
 #else  // Not Windows
-#ifndef PARQUET_EXPORT
-#define PARQUET_EXPORT __attribute__((visibility("default")))
-#endif
-#ifndef PARQUET_NO_EXPORT
-#define PARQUET_NO_EXPORT __attribute__((visibility("hidden")))
-#endif
+#  ifndef PARQUET_EXPORT
+#    define PARQUET_EXPORT __attribute__((visibility("default")))
+#  endif
+#  ifndef PARQUET_NO_EXPORT
+#    define PARQUET_NO_EXPORT __attribute__((visibility("hidden")))
+#  endif
 #endif  // Non-Windows
 
 // This is a complicated topic, some reading on it:
 // http://www.codesynthesis.com/~boris/blog/2010/01/18/dll-export-cxx-templates/
 #if defined(_MSC_VER) || defined(__clang__)
-#define PARQUET_TEMPLATE_CLASS_EXPORT
-#define PARQUET_TEMPLATE_EXPORT PARQUET_EXPORT
+#  define PARQUET_TEMPLATE_CLASS_EXPORT
+#  define PARQUET_TEMPLATE_EXPORT PARQUET_EXPORT
 #else
-#define PARQUET_TEMPLATE_CLASS_EXPORT PARQUET_EXPORT
-#define PARQUET_TEMPLATE_EXPORT
+#  define PARQUET_TEMPLATE_CLASS_EXPORT PARQUET_EXPORT
+#  define PARQUET_TEMPLATE_EXPORT
 #endif
 
 #define PARQUET_DISALLOW_COPY_AND_ASSIGN ARROW_DISALLOW_COPY_AND_ASSIGN
@@ -80,7 +80,7 @@
 // If ARROW_VALGRIND set when compiling unit tests, also define
 // PARQUET_VALGRIND
 #ifdef ARROW_VALGRIND
-#define PARQUET_VALGRIND
+#  define PARQUET_VALGRIND
 #endif
 
 namespace parquet {
diff --git a/cpp/src/parquet/types_test.cc b/cpp/src/parquet/types_test.cc
index e0ca7d6356646..fdcaed5c81ed7 100644
--- a/cpp/src/parquet/types_test.cc
+++ b/cpp/src/parquet/types_test.cc
@@ -65,11 +65,11 @@ TEST(TestConvertedTypeToString, ConvertedTypes) {
 }
 
 #ifdef __GNUC__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#  pragma GCC diagnostic push
+#  pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 #elif defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4996)
+#  pragma warning(push)
+#  pragma warning(disable : 4996)
 #endif
 
 TEST(TypePrinter, StatisticsTypes) {
@@ -164,9 +164,9 @@ TEST(TestInt96Timestamp, Decoding) {
 }
 
 #if !(defined(_WIN32) || defined(__CYGWIN__))
-#pragma GCC diagnostic pop
+#  pragma GCC diagnostic pop
 #elif _MSC_VER
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
 }  // namespace parquet
diff --git a/cpp/src/parquet/windows_fixup.h b/cpp/src/parquet/windows_fixup.h
index ce44480c5732e..feac4e64d1976 100644
--- a/cpp/src/parquet/windows_fixup.h
+++ b/cpp/src/parquet/windows_fixup.h
@@ -22,8 +22,8 @@
 #ifdef _WIN32
 
 // parquet.thrift's OPTIONAL RepetitionType conflicts with a Windows #define
-#ifdef OPTIONAL
-#undef OPTIONAL
-#endif
+#  ifdef OPTIONAL
+#    undef OPTIONAL
+#  endif
 
 #endif  // _WIN32
diff --git a/cpp/vcpkg.json b/cpp/vcpkg.json
index 6f825b55cfd94..103e678ebb4ac 100644
--- a/cpp/vcpkg.json
+++ b/cpp/vcpkg.json
@@ -15,11 +15,11 @@
       ]
     },
     "benchmark",
+    "boost-crc",
     "boost-filesystem",
     "boost-multiprecision",
     "boost-process",
     "boost-system",
-    "boost-crc",
     "brotli",
     "bzip2",
     "c-ares",
diff --git a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj
index 1870888184906..ec438fde843f4 100644
--- a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj
+++ b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj
@@ -5,7 +5,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Grpc.Tools" Version="2.65.0" PrivateAssets="All" />
+    <PackageReference Include="Grpc.Tools" Version="2.66.0" PrivateAssets="All" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
index 9e1866f84160b..bcfb813c11435 100644
--- a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
+++ b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
@@ -5,9 +5,9 @@
   </PropertyGroup>
   
   <ItemGroup>
-    <PackageReference Include="Google.Protobuf" Version="3.27.3" />
+    <PackageReference Include="Google.Protobuf" Version="3.28.0" />
     <PackageReference Include="Grpc.Net.Client" Version="2.65.0" />
-    <PackageReference Include="Grpc.Tools" Version="2.65.0" PrivateAssets="All" />
+    <PackageReference Include="Grpc.Tools" Version="2.66.0" PrivateAssets="All" />
     <PackageReference Include="System.Memory" Version="4.5.5" />
   </ItemGroup>
 
diff --git a/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs b/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs
index efb22b1948a01..b89ce9da79d14 100644
--- a/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs
+++ b/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs
@@ -13,6 +13,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+using System.Threading;
 using System.Threading.Tasks;
 using Apache.Arrow.Flight.Internal;
 using Apache.Arrow.Flight.Protocol;
@@ -34,12 +35,17 @@ public FlightClient(ChannelBase grpcChannel)
 
         public AsyncServerStreamingCall<FlightInfo> ListFlights(FlightCriteria criteria = null, Metadata headers = null)
         {
-            if(criteria == null)
+            return ListFlights(criteria, headers, null, CancellationToken.None);
+        }
+
+        public AsyncServerStreamingCall<FlightInfo> ListFlights(FlightCriteria criteria, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            if (criteria == null)
             {
                 criteria = FlightCriteria.Empty;
             }
-            
-            var response = _client.ListFlights(criteria.ToProtocol(), headers);
+
+            var response = _client.ListFlights(criteria.ToProtocol(), headers, deadline, cancellationToken);
             var convertStream = new StreamReader<Protocol.FlightInfo, FlightInfo>(response.ResponseStream, inFlight => new FlightInfo(inFlight));
 
             return new AsyncServerStreamingCall<FlightInfo>(convertStream, response.ResponseHeadersAsync, response.GetStatus, response.GetTrailers, response.Dispose);
@@ -47,7 +53,12 @@ public AsyncServerStreamingCall<FlightInfo> ListFlights(FlightCriteria criteria
 
         public AsyncServerStreamingCall<FlightActionType> ListActions(Metadata headers = null)
         {
-            var response = _client.ListActions(EmptyInstance, headers);
+            return ListActions(headers, null, CancellationToken.None);
+        }
+
+        public AsyncServerStreamingCall<FlightActionType> ListActions(Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var response = _client.ListActions(EmptyInstance, headers, deadline, cancellationToken);
             var convertStream = new StreamReader<Protocol.ActionType, FlightActionType>(response.ResponseStream, actionType => new FlightActionType(actionType));
 
             return new AsyncServerStreamingCall<FlightActionType>(convertStream, response.ResponseHeadersAsync, response.GetStatus, response.GetTrailers, response.Dispose);
@@ -55,14 +66,24 @@ public AsyncServerStreamingCall<FlightActionType> ListActions(Metadata headers =
 
         public FlightRecordBatchStreamingCall GetStream(FlightTicket ticket, Metadata headers = null)
         {
-            var stream = _client.DoGet(ticket.ToProtocol(),  headers);
+            return GetStream(ticket, headers, null, CancellationToken.None);
+        }
+
+        public FlightRecordBatchStreamingCall GetStream(FlightTicket ticket, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var stream = _client.DoGet(ticket.ToProtocol(), headers, deadline, cancellationToken);
             var responseStream = new FlightClientRecordBatchStreamReader(stream.ResponseStream);
             return new FlightRecordBatchStreamingCall(responseStream, stream.ResponseHeadersAsync, stream.GetStatus, stream.GetTrailers, stream.Dispose);
         }
 
         public AsyncUnaryCall<FlightInfo> GetInfo(FlightDescriptor flightDescriptor, Metadata headers = null)
         {
-            var flightInfoResult = _client.GetFlightInfoAsync(flightDescriptor.ToProtocol(), headers);
+            return GetInfo(flightDescriptor, headers, null, CancellationToken.None);
+        }
+
+        public AsyncUnaryCall<FlightInfo> GetInfo(FlightDescriptor flightDescriptor, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var flightInfoResult = _client.GetFlightInfoAsync(flightDescriptor.ToProtocol(), headers, deadline, cancellationToken);
 
             var flightInfo = flightInfoResult
                 .ResponseAsync
@@ -79,7 +100,12 @@ public AsyncUnaryCall<FlightInfo> GetInfo(FlightDescriptor flightDescriptor, Met
 
         public FlightRecordBatchDuplexStreamingCall StartPut(FlightDescriptor flightDescriptor, Metadata headers = null)
         {
-            var channels = _client.DoPut(headers);
+            return StartPut(flightDescriptor, headers, null, CancellationToken.None);
+        }
+
+        public FlightRecordBatchDuplexStreamingCall StartPut(FlightDescriptor flightDescriptor, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var channels = _client.DoPut(headers, deadline, cancellationToken);
             var requestStream = new FlightClientRecordBatchStreamWriter(channels.RequestStream, flightDescriptor);
             var readStream = new StreamReader<Protocol.PutResult, FlightPutResult>(channels.ResponseStream, putResult => new FlightPutResult(putResult));
             return new FlightRecordBatchDuplexStreamingCall(
@@ -93,7 +119,13 @@ public FlightRecordBatchDuplexStreamingCall StartPut(FlightDescriptor flightDesc
 
         public AsyncDuplexStreamingCall<FlightHandshakeRequest, FlightHandshakeResponse> Handshake(Metadata headers = null)
         {
-            var channel = _client.Handshake(headers);
+            return Handshake(headers, null, CancellationToken.None);
+
+        }
+
+        public AsyncDuplexStreamingCall<FlightHandshakeRequest, FlightHandshakeResponse> Handshake(Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var channel = _client.Handshake(headers, deadline, cancellationToken);
             var readStream = new StreamReader<HandshakeResponse, FlightHandshakeResponse>(channel.ResponseStream, response => new FlightHandshakeResponse(response));
             var writeStream = new FlightHandshakeStreamWriterAdapter(channel.RequestStream);
             var call = new AsyncDuplexStreamingCall<FlightHandshakeRequest, FlightHandshakeResponse>(
@@ -109,7 +141,12 @@ public AsyncDuplexStreamingCall<FlightHandshakeRequest, FlightHandshakeResponse>
 
         public FlightRecordBatchExchangeCall DoExchange(FlightDescriptor flightDescriptor, Metadata headers = null)
         {
-            var channel = _client.DoExchange(headers);
+            return DoExchange(flightDescriptor, headers, null, CancellationToken.None);
+        }
+
+        public FlightRecordBatchExchangeCall DoExchange(FlightDescriptor flightDescriptor, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var channel = _client.DoExchange(headers, deadline, cancellationToken);
             var requestStream = new FlightClientRecordBatchStreamWriter(channel.RequestStream, flightDescriptor);
             var responseStream = new FlightClientRecordBatchStreamReader(channel.ResponseStream);
             var call = new FlightRecordBatchExchangeCall(
@@ -125,14 +162,24 @@ public FlightRecordBatchExchangeCall DoExchange(FlightDescriptor flightDescripto
 
         public AsyncServerStreamingCall<FlightResult> DoAction(FlightAction action, Metadata headers = null)
         {
-            var stream = _client.DoAction(action.ToProtocol(), headers);
+            return DoAction(action, headers, null, CancellationToken.None);
+        }
+
+        public AsyncServerStreamingCall<FlightResult> DoAction(FlightAction action, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var stream = _client.DoAction(action.ToProtocol(), headers, deadline, cancellationToken);
             var streamReader = new StreamReader<Protocol.Result, FlightResult>(stream.ResponseStream, result => new FlightResult(result));
             return new AsyncServerStreamingCall<FlightResult>(streamReader, stream.ResponseHeadersAsync, stream.GetStatus, stream.GetTrailers, stream.Dispose);
         }
 
         public AsyncUnaryCall<Schema> GetSchema(FlightDescriptor flightDescriptor, Metadata headers = null)
         {
-            var schemaResult = _client.GetSchemaAsync(flightDescriptor.ToProtocol(), headers);
+            return GetSchema(flightDescriptor, headers, null, CancellationToken.None);
+        }
+
+        public AsyncUnaryCall<Schema> GetSchema(FlightDescriptor flightDescriptor, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var schemaResult = _client.GetSchemaAsync(flightDescriptor.ToProtocol(), headers, deadline, cancellationToken);
 
             var schema = schemaResult
                 .ResponseAsync
diff --git a/csharp/src/Apache.Arrow.Flight/FlightInfo.cs b/csharp/src/Apache.Arrow.Flight/FlightInfo.cs
index 16ddb6fbfb834..e2452ac9ff461 100644
--- a/csharp/src/Apache.Arrow.Flight/FlightInfo.cs
+++ b/csharp/src/Apache.Arrow.Flight/FlightInfo.cs
@@ -18,6 +18,7 @@
 using System.Text;
 using Apache.Arrow.Flight.Internal;
 using Apache.Arrow.Ipc;
+using Google.Protobuf;
 
 namespace Apache.Arrow.Flight
 {
@@ -25,7 +26,7 @@ public class FlightInfo
     {
         internal FlightInfo(Protocol.FlightInfo flightInfo)
         {
-            Schema = FlightMessageSerializer.DecodeSchema(flightInfo.Schema.Memory);
+            Schema = flightInfo.Schema?.Length > 0 ? FlightMessageSerializer.DecodeSchema(flightInfo.Schema.Memory) : null;
             Descriptor = new FlightDescriptor(flightInfo.FlightDescriptor);
 
             var endpoints = new List<FlightEndpoint>();
@@ -60,7 +61,7 @@ public FlightInfo(Schema schema, FlightDescriptor descriptor, IReadOnlyList<Flig
 
         internal Protocol.FlightInfo ToProtocol()
         {
-            var serializedSchema = SchemaWriter.SerializeSchema(Schema);
+            var serializedSchema = Schema != null ? SchemaWriter.SerializeSchema(Schema) : ByteString.Empty;
             var response = new Protocol.FlightInfo()
             {
                 Schema = serializedSchema,
diff --git a/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj b/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj
index 14227e2c4eb6b..5ed7cc47d6ac2 100644
--- a/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj
+++ b/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj
@@ -5,7 +5,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Google.Protobuf" Version="3.27.3" />
+    <PackageReference Include="Google.Protobuf" Version="3.28.0" />
     <PackageReference Include="Grpc.AspNetCore" Version="2.65.0" />
   </ItemGroup>
 
diff --git a/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs b/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs
index aac4e4209240a..0e82673d02240 100644
--- a/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs
+++ b/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs
@@ -16,12 +16,15 @@
 using System;
 using System.Collections.Generic;
 using System.Linq;
+using System.Threading;
 using System.Threading.Tasks;
 using Apache.Arrow.Flight.Client;
 using Apache.Arrow.Flight.TestWeb;
 using Apache.Arrow.Tests;
 using Google.Protobuf;
+using Grpc.Core;
 using Grpc.Core.Utils;
+using Python.Runtime;
 using Xunit;
 
 namespace Apache.Arrow.Flight.Tests
@@ -68,14 +71,14 @@ private FlightInfo GivenStoreBatches(FlightDescriptor flightDescriptor, params R
         {
             var initialBatch = batches.FirstOrDefault();
 
-            var flightHolder = new FlightHolder(flightDescriptor, initialBatch.RecordBatch.Schema, _testWebFactory.GetAddress());
+            var flightHolder = new FlightHolder(flightDescriptor, initialBatch?.RecordBatch.Schema, _testWebFactory.GetAddress());
 
-            foreach(var batch in batches)
+            foreach (var batch in batches)
             {
                 flightHolder.AddBatch(batch);
             }
 
-            _flightStore.Flights.Add(flightDescriptor, flightHolder);
+            _flightStore.Flights[flightDescriptor] = flightHolder;
 
             return flightHolder.GetFlightInfo();
         }
@@ -121,6 +124,31 @@ public async Task TestPutTwoRecordBatches()
             ArrowReaderVerifier.CompareBatches(expectedBatch2, actualBatches[1].RecordBatch);
         }
 
+        [Fact]
+        public async Task TestGetRecordBatchWithDelayedSchema()
+        {
+            var flightDescriptor = FlightDescriptor.CreatePathDescriptor("test");
+            var expectedBatch = CreateTestBatch(0, 100);
+
+            //Add flight info only to the in memory store without schema or batch
+            GivenStoreBatches(flightDescriptor);
+
+            //Get the flight info for the ticket and verify the schema is null
+            var flightInfo = await _flightClient.GetInfo(flightDescriptor);
+            Assert.Single(flightInfo.Endpoints);
+            Assert.Null(flightInfo.Schema);
+
+            var endpoint = flightInfo.Endpoints.FirstOrDefault();
+
+            //Update the store with the batch and schema
+            GivenStoreBatches(flightDescriptor, new RecordBatchWithMetadata(expectedBatch));
+            var getStream = _flightClient.GetStream(endpoint.Ticket);
+            var resultList = await getStream.ResponseStream.ToListAsync();
+
+            Assert.Single(resultList);
+            ArrowReaderVerifier.CompareBatches(expectedBatch, resultList[0]);
+        }
+
         [Fact]
         public async Task TestGetSingleRecordBatch()
         {
@@ -187,8 +215,8 @@ public async Task TestGetFlightMetadata()
 
             var getStream = _flightClient.GetStream(endpoint.Ticket);
 
-            List<ByteString> actualMetadata = new List<ByteString>(); 
-            while(await getStream.ResponseStream.MoveNext(default))
+            List<ByteString> actualMetadata = new List<ByteString>();
+            while (await getStream.ResponseStream.MoveNext(default))
             {
                 actualMetadata.AddRange(getStream.ResponseStream.ApplicationMetadata);
             }
@@ -277,7 +305,7 @@ public async Task TestListFlights()
 
             var actualFlights = await listFlightStream.ResponseStream.ToListAsync();
 
-            for(int i = 0; i < expectedFlightInfo.Count; i++)
+            for (int i = 0; i < expectedFlightInfo.Count; i++)
             {
                 FlightInfoComparer.Compare(expectedFlightInfo[i], actualFlights[i]);
             }
@@ -386,7 +414,7 @@ public async Task TestGetBatchesWithAsyncEnumerable()
 
 
             List<RecordBatch> resultList = new List<RecordBatch>();
-            await foreach(var recordBatch in getStream.ResponseStream)
+            await foreach (var recordBatch in getStream.ResponseStream)
             {
                 resultList.Add(recordBatch);
             }
@@ -415,5 +443,89 @@ public async Task EnsureTheSerializedBatchContainsTheProperTotalRecordsAndTotalB
             Assert.Equal(expectedBatch.Length, result.TotalRecords);
             Assert.Equal(expectedTotalBytes, result.TotalBytes);
         }
+
+        [Fact]
+        public async Task EnsureCallRaisesDeadlineExceeded()
+        {
+            var flightDescriptor = FlightDescriptor.CreatePathDescriptor("raise_deadline");
+            var deadline = DateTime.UtcNow;
+            var batch = CreateTestBatch(0, 100);
+
+            RpcException exception = null;
+
+            var asyncServerStreamingCallFlights = _flightClient.ListFlights(null, null, deadline);
+            Assert.Equal(StatusCode.DeadlineExceeded, asyncServerStreamingCallFlights.GetStatus().StatusCode);
+
+            var asyncServerStreamingCallActions = _flightClient.ListActions(null, deadline);
+            Assert.Equal(StatusCode.DeadlineExceeded, asyncServerStreamingCallFlights.GetStatus().StatusCode);
+
+            GivenStoreBatches(flightDescriptor, new RecordBatchWithMetadata(batch));
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await _flightClient.GetInfo(flightDescriptor, null, deadline));
+            Assert.Equal(StatusCode.DeadlineExceeded, exception.StatusCode);
+
+            var flightInfo = await _flightClient.GetInfo(flightDescriptor);
+            var endpoint = flightInfo.Endpoints.FirstOrDefault();
+            var getStream = _flightClient.GetStream(endpoint.Ticket, null, deadline);
+            Assert.Equal(StatusCode.DeadlineExceeded, getStream.GetStatus().StatusCode);
+
+            var duplexStreamingCall = _flightClient.DoExchange(flightDescriptor, null, deadline);
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await duplexStreamingCall.RequestStream.WriteAsync(batch));
+            Assert.Equal(StatusCode.DeadlineExceeded, exception.StatusCode);
+
+            var putStream = _flightClient.StartPut(flightDescriptor, null, deadline);
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await putStream.RequestStream.WriteAsync(batch));
+            Assert.Equal(StatusCode.DeadlineExceeded, exception.StatusCode);
+
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await _flightClient.GetSchema(flightDescriptor, null, deadline));
+            Assert.Equal(StatusCode.DeadlineExceeded, exception.StatusCode);
+
+            var handshakeStreamingCall = _flightClient.Handshake(null, deadline);
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await handshakeStreamingCall.RequestStream.WriteAsync(new FlightHandshakeRequest(ByteString.Empty)));
+            Assert.Equal(StatusCode.DeadlineExceeded, exception.StatusCode);
+        }
+
+        [Fact]
+        public async Task EnsureCallRaisesRequestCancelled()
+        {
+            var cts = new CancellationTokenSource();
+            cts.CancelAfter(1);
+            
+            var batch = CreateTestBatch(0, 100);
+            var metadata = new Metadata();
+            var flightDescriptor = FlightDescriptor.CreatePathDescriptor("raise_cancelled");
+            await Task.Delay(5);
+            RpcException exception = null;
+
+            var asyncServerStreamingCallFlights = _flightClient.ListFlights(null, null, null, cts.Token);
+            Assert.Equal(StatusCode.Cancelled, asyncServerStreamingCallFlights.GetStatus().StatusCode);
+
+            var asyncServerStreamingCallActions = _flightClient.ListActions(null, null, cts.Token);
+            Assert.Equal(StatusCode.Cancelled, asyncServerStreamingCallFlights.GetStatus().StatusCode);
+
+            GivenStoreBatches(flightDescriptor, new RecordBatchWithMetadata(batch));
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await _flightClient.GetInfo(flightDescriptor, null, null, cts.Token));
+            Assert.Equal(StatusCode.Cancelled, exception.StatusCode);
+
+            var flightInfo = await _flightClient.GetInfo(flightDescriptor);
+            var endpoint = flightInfo.Endpoints.FirstOrDefault();
+            var getStream = _flightClient.GetStream(endpoint.Ticket, null, null, cts.Token);
+            Assert.Equal(StatusCode.Cancelled, getStream.GetStatus().StatusCode);
+
+            var duplexStreamingCall = _flightClient.DoExchange(flightDescriptor, null, null, cts.Token);
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await duplexStreamingCall.RequestStream.WriteAsync(batch));
+            Assert.Equal(StatusCode.Cancelled, exception.StatusCode);
+
+            var putStream = _flightClient.StartPut(flightDescriptor, null, null, cts.Token);
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await putStream.RequestStream.WriteAsync(batch));
+            Assert.Equal(StatusCode.Cancelled, exception.StatusCode);
+
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await _flightClient.GetSchema(flightDescriptor, null, null, cts.Token));
+            Assert.Equal(StatusCode.Cancelled, exception.StatusCode);
+
+            var handshakeStreamingCall = _flightClient.Handshake(null, null, cts.Token);
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await handshakeStreamingCall.RequestStream.WriteAsync(new FlightHandshakeRequest(ByteString.Empty)));
+            Assert.Equal(StatusCode.Cancelled, exception.StatusCode);
+
+        }
     }
 }
diff --git a/dev/archery/archery/integration/runner.py b/dev/archery/archery/integration/runner.py
index 0ea244720cc1d..66c8721519ede 100644
--- a/dev/archery/archery/integration/runner.py
+++ b/dev/archery/archery/integration/runner.py
@@ -645,7 +645,7 @@ def run_all_tests(with_cpp=True, with_java=True, with_js=True,
         Scenario(
             "flight_sql:ingestion",
             description="Ensure Flight SQL ingestion works as expected.",
-            skip_testers={"JS", "C#", "Rust", "Java"}
+            skip_testers={"JS", "C#", "Rust"}
         ),
     ]
 
diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja
index df55f32222e91..082d33b124f9f 100644
--- a/dev/tasks/macros.jinja
+++ b/dev/tasks/macros.jinja
@@ -183,6 +183,18 @@ env:
   {% endif %}
 {% endmacro %}
 
+{%- macro github_upload_wheel_scientific_python(pattern) -%}
+  {%- if arrow.is_default_branch() -%}
+  - name: Upload wheel to Anaconda scientific-python
+    shell: bash
+    run: |
+      python3 -m pip install git+https://github.com/Anaconda-Platform/anaconda-client.git@1.12.3
+      anaconda -t ${CROSSBOW_SCIENTIFIC_PYTHON_UPLOAD_TOKEN} upload --force -u scientific-python-nightly-wheels --label main {{ pattern }}
+    env:
+      CROSSBOW_SCIENTIFIC_PYTHON_UPLOAD_TOKEN: {{ '${{ secrets.CROSSBOW_SCIENTIFIC_PYTHON_UPLOAD_TOKEN }}' }}
+  {% endif %}
+{% endmacro %}
+
 {%- macro azure_checkout_arrow() -%}
   - script: |
       git clone --no-checkout --branch {{ arrow.branch }} {{ arrow.remote }} arrow
diff --git a/dev/tasks/python-wheels/github.linux.yml b/dev/tasks/python-wheels/github.linux.yml
index 97746ba3f9b8b..f9df27ba3175b 100644
--- a/dev/tasks/python-wheels/github.linux.yml
+++ b/dev/tasks/python-wheels/github.linux.yml
@@ -110,6 +110,7 @@ jobs:
 
       {{ macros.github_upload_releases("arrow/python/repaired_wheels/*.whl")|indent }}
       {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }}
+      {{ macros.github_upload_wheel_scientific_python("arrow/python/repaired_wheels/*.whl")|indent }}
 
       {% if arrow.is_default_branch() %}
       - name: Push Docker Image
diff --git a/dev/tasks/python-wheels/github.osx.yml b/dev/tasks/python-wheels/github.osx.yml
index b26aeba32b79b..98e06a14ff222 100644
--- a/dev/tasks/python-wheels/github.osx.yml
+++ b/dev/tasks/python-wheels/github.osx.yml
@@ -113,6 +113,21 @@ jobs:
           name: wheel
           path: arrow/python/repaired_wheels/*.whl
 
+      # Use a well-known Python version for the GCS testbench, and avoid
+      # putting it in PATH.
+      - name: Set up Python for GCS testbench
+        uses: actions/setup-python@v5.1.1
+        id: gcs-python-install
+        with:
+          python-version: 3.12
+          update-environment: false
+
+      - name: Install GCS testbench
+        env:
+          PIPX_BIN_DIR: /usr/local/bin
+          PIPX_BASE_PYTHON: {{ '${{ steps.gcs-python-install.outputs.python-path }}' }}
+        run: arrow/ci/scripts/install_gcs_testbench.sh default
+
       - name: Test Wheel
         env:
           PYTEST_ADDOPTS: "-k 'not test_cancellation'"
@@ -121,8 +136,8 @@ jobs:
           source test-env/bin/activate
           pip install --upgrade pip wheel
           arch -{{ arch }} pip install -r arrow/python/requirements-wheel-test.txt
-          PYTHON_VERSION={{ python_version }} arch -{{ arch }} arrow/ci/scripts/install_gcs_testbench.sh default
           arch -{{ arch }} arrow/ci/scripts/python_wheel_unix_test.sh $(pwd)/arrow
 
       {{ macros.github_upload_releases("arrow/python/repaired_wheels/*.whl")|indent }}
       {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }}
+      {{ macros.github_upload_wheel_scientific_python("arrow/python/repaired_wheels/*.whl")|indent }}
diff --git a/dev/tasks/python-wheels/github.windows.yml b/dev/tasks/python-wheels/github.windows.yml
index a40b9c0d65103..3a943b6ae515c 100644
--- a/dev/tasks/python-wheels/github.windows.yml
+++ b/dev/tasks/python-wheels/github.windows.yml
@@ -71,6 +71,7 @@ jobs:
 
       {{ macros.github_upload_releases("arrow/python/dist/*.whl")|indent }}
       {{ macros.github_upload_gemfury("arrow/python/dist/*.whl")|indent }}
+      {{ macros.github_upload_wheel_scientific_python("arrow/python/dist/*.whl")|indent }}
 
       {% if arrow.is_default_branch() %}
       - name: Push Docker Image
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index c6d2f2175d44c..9ded6ee41ab4b 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -29,6 +29,7 @@ groups:
 
   wheel:
     - wheel-*
+    - python-sdist
 
   linux:
     - almalinux-*
@@ -74,6 +75,7 @@ groups:
 
   cpp:
     - test-*cpp*
+    - example-*cpp*
 
   c-glib:
     - test-*c-glib*
diff --git a/docker-compose.yml b/docker-compose.yml
index 97d6e1158ea03..36cf150f25f39 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -53,8 +53,6 @@
 #
 # See more in cpp/build-support/run-test.sh::print_coredumps
 
-version: '3.5'
-
 x-common: &common
   GITHUB_ACTIONS:
 
@@ -1146,6 +1144,7 @@ services:
       <<: *common
       CHECK_IMPORTS: "ON"
       CHECK_UNITTESTS: "OFF"
+      CHECK_WHEEL_CONTENT: "ON"
     command: /arrow/ci/scripts/python_wheel_unix_test.sh /arrow
 
   python-wheel-manylinux-test-unittests:
@@ -1166,6 +1165,7 @@ services:
       <<: *common
       CHECK_IMPORTS: "OFF"
       CHECK_UNITTESTS: "ON"
+      CHECK_WHEEL_CONTENT: "OFF"
     command: /arrow/ci/scripts/python_wheel_unix_test.sh /arrow
 
   python-wheel-windows-vs2019:
diff --git a/docs/source/developers/continuous_integration/docker.rst b/docs/source/developers/continuous_integration/docker.rst
index 68f3c7d709791..129b5d0bcf135 100644
--- a/docs/source/developers/continuous_integration/docker.rst
+++ b/docs/source/developers/continuous_integration/docker.rst
@@ -156,6 +156,18 @@ The following example starts an interactive ``bash`` session in the container
 
     archery docker run ubuntu-cpp bash
 
+**Build the image with increased debugging output:**
+
+To enable additional logging output for debugging, pass the ``--debug`` flag
+to ``archery``.
+
+.. code:: bash
+
+    archery --debug docker run ubuntu-cpp
+
+In addition to enabling ``DEBUG``-level logging, this also translates to
+passing ``--progress=plain`` to docker(-compose) build command.
+
 Docker Volume Caches
 ~~~~~~~~~~~~~~~~~~~~
 
diff --git a/docs/source/developers/python.rst b/docs/source/developers/python.rst
index 6beea55e66b86..2ba4b534caeff 100644
--- a/docs/source/developers/python.rst
+++ b/docs/source/developers/python.rst
@@ -632,9 +632,6 @@ PyArrow are:
    * - ``PYARROW_BUNDLE_CYTHON_CPP``
      - Bundle the C++ files generated by Cython
      - ``0`` (``OFF``)
-   * - ``PYARROW_INSTALL_TESTS``
-     - Add the test to the python package
-     - ``1`` (``ON``)
    * - ``PYARROW_BUILD_VERBOSE``
      - Enable verbose output from Makefile builds
      - ``0`` (``OFF``)
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java
index 337220a42fbce..1daeda6772b26 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java
@@ -42,7 +42,7 @@ public static Stream<Arguments> getTestData() throws IOException {
         Arguments.of(getTable("h2/test1_map_h2.yml", JdbcToArrowMapDataTypeTest.class)));
   }
 
-  /** Test Method to test JdbcToArrow Functionality for Map form Types.OTHER column */
+  /** Test Method to test JdbcToArrow Functionality for Map form Types.OTHER column. */
   @ParameterizedTest
   @MethodSource("getTestData")
   public void testJdbcToArrowValues(Table table)
diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml
index ec8ddbbb780df..d9cd2bb21a526 100644
--- a/java/adapter/orc/pom.xml
+++ b/java/adapter/orc/pom.xml
@@ -61,7 +61,7 @@ under the License.
     <dependency>
       <groupId>org.apache.orc</groupId>
       <artifactId>orc-core</artifactId>
-      <version>1.9.2</version>
+      <version>1.9.4</version>
       <scope>test</scope>
       <exclusions>
         <exclusion>
diff --git a/java/compression/pom.xml b/java/compression/pom.xml
index 46ed8796423eb..f0d8e92c9a41d 100644
--- a/java/compression/pom.xml
+++ b/java/compression/pom.xml
@@ -55,7 +55,7 @@ under the License.
     <dependency>
       <groupId>com.github.luben</groupId>
       <artifactId>zstd-jni</artifactId>
-      <version>1.5.6-4</version>
+      <version>1.5.6-5</version>
     </dependency>
   </dependencies>
 </project>
diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml
index f3384fabbed6d..a19e934f0de98 100644
--- a/java/dataset/pom.xml
+++ b/java/dataset/pom.xml
@@ -32,7 +32,7 @@ under the License.
 
   <properties>
     <arrow.cpp.build.dir>../../../cpp/release-build/</arrow.cpp.build.dir>
-    <parquet.version>1.14.1</parquet.version>
+    <parquet.version>1.14.2</parquet.version>
     <avro.version>1.12.0</avro.version>
   </properties>
 
@@ -130,7 +130,7 @@ under the License.
     <dependency>
       <groupId>org.apache.orc</groupId>
       <artifactId>orc-core</artifactId>
-      <version>1.9.2</version>
+      <version>1.9.4</version>
       <scope>test</scope>
       <exclusions>
         <exclusion>
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
index 62b3c1eedb69d..a15c3049aa6ad 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
@@ -472,7 +472,7 @@ public void close() throws Exception {
     }
   }
 
-  /** A stream observer for Flight.PutResult */
+  /** A stream observer for Flight.PutResult. */
   private static class SetStreamObserver implements StreamObserver<Flight.PutResult> {
     private final BufferAllocator allocator;
     private final StreamListener<PutResult> listener;
diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlExtensionScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlExtensionScenario.java
index 76d79b226623d..69b02030ccd3d 100644
--- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlExtensionScenario.java
+++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlExtensionScenario.java
@@ -16,24 +16,17 @@
  */
 package org.apache.arrow.flight.integration.tests;
 
-import java.util.HashMap;
 import java.util.Map;
 import org.apache.arrow.flight.FlightClient;
 import org.apache.arrow.flight.FlightInfo;
-import org.apache.arrow.flight.FlightStream;
 import org.apache.arrow.flight.Location;
 import org.apache.arrow.flight.SchemaResult;
-import org.apache.arrow.flight.Ticket;
 import org.apache.arrow.flight.sql.CancelResult;
 import org.apache.arrow.flight.sql.FlightSqlClient;
 import org.apache.arrow.flight.sql.FlightSqlProducer;
 import org.apache.arrow.flight.sql.impl.FlightSql;
 import org.apache.arrow.memory.BufferAllocator;
-import org.apache.arrow.util.Preconditions;
-import org.apache.arrow.vector.UInt4Vector;
 import org.apache.arrow.vector.VectorSchemaRoot;
-import org.apache.arrow.vector.complex.DenseUnionVector;
-import org.apache.arrow.vector.types.pojo.Schema;
 
 /**
  * Integration test scenario for validating Flight SQL specs across multiple implementations. This
@@ -53,69 +46,32 @@ public void client(BufferAllocator allocator, Location location, FlightClient cl
   }
 
   private void validateMetadataRetrieval(FlightSqlClient sqlClient) throws Exception {
-    FlightInfo info = sqlClient.getSqlInfo();
-    Ticket ticket = info.getEndpoints().get(0).getTicket();
-
-    Map<Integer, Object> infoValues = new HashMap<>();
-    try (FlightStream stream = sqlClient.getStream(ticket)) {
-      Schema actualSchema = stream.getSchema();
-      IntegrationAssertions.assertEquals(
-          FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA, actualSchema);
-
-      while (stream.next()) {
-        UInt4Vector infoName = (UInt4Vector) stream.getRoot().getVector(0);
-        DenseUnionVector value = (DenseUnionVector) stream.getRoot().getVector(1);
-
-        for (int i = 0; i < stream.getRoot().getRowCount(); i++) {
-          final int code = infoName.get(i);
-          if (infoValues.containsKey(code)) {
-            throw new AssertionError("Duplicate SqlInfo value: " + code);
-          }
-          Object object;
-          byte typeId = value.getTypeId(i);
-          switch (typeId) {
-            case 0: // string
-              object =
-                  Preconditions.checkNotNull(
-                          value.getVarCharVector(typeId).getObject(value.getOffset(i)))
-                      .toString();
-              break;
-            case 1: // bool
-              object = value.getBitVector(typeId).getObject(value.getOffset(i));
-              break;
-            case 2: // int64
-              object = value.getBigIntVector(typeId).getObject(value.getOffset(i));
-              break;
-            case 3: // int32
-              object = value.getIntVector(typeId).getObject(value.getOffset(i));
-              break;
-            default:
-              throw new AssertionError("Decoding SqlInfo of type code " + typeId);
-          }
-          infoValues.put(code, object);
-        }
-      }
-    }
-
-    IntegrationAssertions.assertEquals(
-        Boolean.FALSE, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_SQL_VALUE));
-    IntegrationAssertions.assertEquals(
-        Boolean.TRUE, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_SUBSTRAIT_VALUE));
-    IntegrationAssertions.assertEquals(
-        "min_version",
-        infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_SUBSTRAIT_MIN_VERSION_VALUE));
-    IntegrationAssertions.assertEquals(
-        "max_version",
-        infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_SUBSTRAIT_MAX_VERSION_VALUE));
-    IntegrationAssertions.assertEquals(
-        FlightSql.SqlSupportedTransaction.SQL_SUPPORTED_TRANSACTION_SAVEPOINT_VALUE,
-        infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_TRANSACTION_VALUE));
-    IntegrationAssertions.assertEquals(
-        Boolean.TRUE, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_CANCEL_VALUE));
-    IntegrationAssertions.assertEquals(
-        42, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_STATEMENT_TIMEOUT_VALUE));
-    IntegrationAssertions.assertEquals(
-        7, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_TRANSACTION_TIMEOUT_VALUE));
+    validate(
+        FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA,
+        sqlClient.getSqlInfo(),
+        sqlClient,
+        s -> {
+          Map<Integer, Object> infoValues = readSqlInfoStream(s);
+          IntegrationAssertions.assertEquals(
+              Boolean.FALSE, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_SQL_VALUE));
+          IntegrationAssertions.assertEquals(
+              Boolean.TRUE, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_SUBSTRAIT_VALUE));
+          IntegrationAssertions.assertEquals(
+              "min_version",
+              infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_SUBSTRAIT_MIN_VERSION_VALUE));
+          IntegrationAssertions.assertEquals(
+              "max_version",
+              infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_SUBSTRAIT_MAX_VERSION_VALUE));
+          IntegrationAssertions.assertEquals(
+              FlightSql.SqlSupportedTransaction.SQL_SUPPORTED_TRANSACTION_SAVEPOINT_VALUE,
+              infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_TRANSACTION_VALUE));
+          IntegrationAssertions.assertEquals(
+              Boolean.TRUE, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_CANCEL_VALUE));
+          IntegrationAssertions.assertEquals(
+              42, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_STATEMENT_TIMEOUT_VALUE));
+          IntegrationAssertions.assertEquals(
+              7, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_TRANSACTION_TIMEOUT_VALUE));
+        });
   }
 
   private void validateStatementExecution(FlightSqlClient sqlClient) throws Exception {
diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlIngestionScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlIngestionScenario.java
new file mode 100644
index 0000000000000..981ce89f1b88a
--- /dev/null
+++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlIngestionScenario.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.flight.integration.tests;
+
+import com.google.common.collect.ImmutableMap;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.arrow.flight.FlightClient;
+import org.apache.arrow.flight.FlightProducer;
+import org.apache.arrow.flight.Location;
+import org.apache.arrow.flight.sql.FlightSqlClient;
+import org.apache.arrow.flight.sql.FlightSqlClient.ExecuteIngestOptions;
+import org.apache.arrow.flight.sql.FlightSqlProducer;
+import org.apache.arrow.flight.sql.impl.FlightSql;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+/**
+ * Integration test scenario for validating Flight SQL specs across multiple implementations. This
+ * should ensure that RPC objects are being built and parsed correctly for multiple languages and
+ * that the Arrow schemas are returned as expected.
+ */
+public class FlightSqlIngestionScenario extends FlightSqlScenario {
+
+  @Override
+  public FlightProducer producer(BufferAllocator allocator, Location location) throws Exception {
+    FlightSqlScenarioProducer producer =
+        (FlightSqlScenarioProducer) super.producer(allocator, location);
+    producer
+        .getSqlInfoBuilder()
+        .withFlightSqlServerBulkIngestionTransaction(true)
+        .withFlightSqlServerBulkIngestion(true);
+    return producer;
+  }
+
+  @Override
+  public void client(BufferAllocator allocator, Location location, FlightClient client)
+      throws Exception {
+    try (final FlightSqlClient sqlClient = new FlightSqlClient(client)) {
+      validateMetadataRetrieval(sqlClient);
+      validateIngestion(allocator, sqlClient);
+    }
+  }
+
+  private void validateMetadataRetrieval(FlightSqlClient sqlClient) throws Exception {
+    validate(
+        FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA,
+        sqlClient.getSqlInfo(
+            FlightSql.SqlInfo.FLIGHT_SQL_SERVER_INGEST_TRANSACTIONS_SUPPORTED,
+            FlightSql.SqlInfo.FLIGHT_SQL_SERVER_BULK_INGESTION),
+        sqlClient,
+        s -> {
+          Map<Integer, Object> infoValues = readSqlInfoStream(s);
+          IntegrationAssertions.assertEquals(
+              Boolean.TRUE,
+              infoValues.get(
+                  FlightSql.SqlInfo.FLIGHT_SQL_SERVER_INGEST_TRANSACTIONS_SUPPORTED_VALUE));
+          IntegrationAssertions.assertEquals(
+              Boolean.TRUE,
+              infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_BULK_INGESTION_VALUE));
+        });
+  }
+
+  private VectorSchemaRoot getIngestVectorRoot(BufferAllocator allocator) {
+    Schema schema = FlightSqlScenarioProducer.getIngestSchema();
+    VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator);
+    root.setRowCount(3);
+    return root;
+  }
+
+  private void validateIngestion(BufferAllocator allocator, FlightSqlClient sqlClient) {
+    try (VectorSchemaRoot data = getIngestVectorRoot(allocator)) {
+      TableDefinitionOptions tableDefinitionOptions =
+          TableDefinitionOptions.newBuilder()
+              .setIfExists(TableDefinitionOptions.TableExistsOption.TABLE_EXISTS_OPTION_REPLACE)
+              .setIfNotExist(
+                  TableDefinitionOptions.TableNotExistOption.TABLE_NOT_EXIST_OPTION_CREATE)
+              .build();
+      Map<String, String> options = new HashMap<>(ImmutableMap.of("key1", "val1", "key2", "val2"));
+      ExecuteIngestOptions executeIngestOptions =
+          new ExecuteIngestOptions(
+              "test_table", tableDefinitionOptions, true, "test_catalog", "test_schema", options);
+      FlightSqlClient.Transaction transaction =
+          new FlightSqlClient.Transaction(BULK_INGEST_TRANSACTION_ID);
+      long updatedRows = sqlClient.executeIngest(data, executeIngestOptions, transaction);
+
+      IntegrationAssertions.assertEquals(3L, updatedRows);
+    }
+  }
+}
diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenario.java
index 8918b252700ac..e370a30bdc6ff 100644
--- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenario.java
+++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenario.java
@@ -16,8 +16,14 @@
  */
 package org.apache.arrow.flight.integration.tests;
 
+import static java.util.Objects.isNull;
+
+import com.google.protobuf.Any;
 import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.function.Consumer;
 import org.apache.arrow.flight.CallOption;
 import org.apache.arrow.flight.FlightClient;
 import org.apache.arrow.flight.FlightInfo;
@@ -29,10 +35,14 @@
 import org.apache.arrow.flight.Ticket;
 import org.apache.arrow.flight.sql.FlightSqlClient;
 import org.apache.arrow.flight.sql.FlightSqlProducer;
+import org.apache.arrow.flight.sql.FlightSqlUtils;
 import org.apache.arrow.flight.sql.impl.FlightSql;
 import org.apache.arrow.flight.sql.util.TableRef;
 import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.UInt4Vector;
 import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.DenseUnionVector;
 import org.apache.arrow.vector.types.pojo.Schema;
 
 /**
@@ -52,6 +62,7 @@ public class FlightSqlScenario implements Scenario {
   public static final FlightSqlClient.SubstraitPlan SUBSTRAIT_PLAN =
       new FlightSqlClient.SubstraitPlan(SUBSTRAIT_PLAN_TEXT, SUBSTRAIT_VERSION);
   public static final byte[] TRANSACTION_ID = "transaction_id".getBytes(StandardCharsets.UTF_8);
+  public static final byte[] BULK_INGEST_TRANSACTION_ID = "123".getBytes(StandardCharsets.UTF_8);
 
   @Override
   public FlightProducer producer(BufferAllocator allocator, Location location) throws Exception {
@@ -150,15 +161,23 @@ private void validateMetadataRetrieval(FlightSqlClient sqlClient) throws Excepti
     validateSchema(
         FlightSqlProducer.Schemas.GET_TYPE_INFO_SCHEMA, sqlClient.getXdbcTypeInfoSchema(options));
 
-    validate(
-        FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA,
+    FlightInfo sqlInfoFlightInfo =
         sqlClient.getSqlInfo(
             new FlightSql.SqlInfo[] {
               FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME,
               FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY
             },
-            options),
-        sqlClient);
+            options);
+
+    Ticket ticket = sqlInfoFlightInfo.getEndpoints().get(0).getTicket();
+    FlightSql.CommandGetSqlInfo requestSqlInfoCommand =
+        FlightSqlUtils.unpackOrThrow(
+            Any.parseFrom(ticket.getBytes()), FlightSql.CommandGetSqlInfo.class);
+    IntegrationAssertions.assertEquals(
+        requestSqlInfoCommand.getInfo(0), FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME_VALUE);
+    IntegrationAssertions.assertEquals(
+        requestSqlInfoCommand.getInfo(1), FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY_VALUE);
+    validate(FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA, sqlInfoFlightInfo, sqlClient);
     validateSchema(
         FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA, sqlClient.getSqlInfoSchema(options));
   }
@@ -194,14 +213,64 @@ private void validatePreparedStatementExecution(
 
   protected void validate(Schema expectedSchema, FlightInfo flightInfo, FlightSqlClient sqlClient)
       throws Exception {
+    validate(expectedSchema, flightInfo, sqlClient, null);
+  }
+
+  protected void validate(
+      Schema expectedSchema,
+      FlightInfo flightInfo,
+      FlightSqlClient sqlClient,
+      Consumer<FlightStream> streamConsumer)
+      throws Exception {
     Ticket ticket = flightInfo.getEndpoints().get(0).getTicket();
     try (FlightStream stream = sqlClient.getStream(ticket)) {
       Schema actualSchema = stream.getSchema();
       IntegrationAssertions.assertEquals(expectedSchema, actualSchema);
+      if (!isNull(streamConsumer)) {
+        streamConsumer.accept(stream);
+      }
     }
   }
 
   protected void validateSchema(Schema expected, SchemaResult actual) {
     IntegrationAssertions.assertEquals(expected, actual.getSchema());
   }
+
+  protected Map<Integer, Object> readSqlInfoStream(FlightStream stream) {
+    Map<Integer, Object> infoValues = new HashMap<>();
+    while (stream.next()) {
+      UInt4Vector infoName = (UInt4Vector) stream.getRoot().getVector(0);
+      DenseUnionVector value = (DenseUnionVector) stream.getRoot().getVector(1);
+
+      for (int i = 0; i < stream.getRoot().getRowCount(); i++) {
+        final int code = infoName.get(i);
+        if (infoValues.containsKey(code)) {
+          throw new AssertionError("Duplicate SqlInfo value: " + code);
+        }
+        Object object;
+        byte typeId = value.getTypeId(i);
+        switch (typeId) {
+          case 0: // string
+            object =
+                Preconditions.checkNotNull(
+                        value.getVarCharVector(typeId).getObject(value.getOffset(i)))
+                    .toString();
+            break;
+          case 1: // bool
+            object = value.getBitVector(typeId).getObject(value.getOffset(i));
+            break;
+          case 2: // int64
+            object = value.getBigIntVector(typeId).getObject(value.getOffset(i));
+            break;
+          case 3: // int32
+            object = value.getIntVector(typeId).getObject(value.getOffset(i));
+            break;
+          default:
+            throw new AssertionError("Decoding SqlInfo of type code " + typeId);
+        }
+        infoValues.put(code, object);
+      }
+    }
+    return infoValues;
+  }
 }
diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenarioProducer.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenarioProducer.java
index b7a75b459d176..be746b575761d 100644
--- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenarioProducer.java
+++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenarioProducer.java
@@ -16,13 +16,16 @@
  */
 package org.apache.arrow.flight.integration.tests;
 
+import com.google.common.collect.ImmutableMap;
 import com.google.protobuf.Any;
 import com.google.protobuf.ByteString;
 import com.google.protobuf.InvalidProtocolBufferException;
 import com.google.protobuf.Message;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import org.apache.arrow.flight.CallStatus;
 import org.apache.arrow.flight.Criteria;
 import org.apache.arrow.flight.FlightDescriptor;
@@ -38,6 +41,8 @@
 import org.apache.arrow.flight.sql.FlightSqlProducer;
 import org.apache.arrow.flight.sql.SqlInfoBuilder;
 import org.apache.arrow.flight.sql.impl.FlightSql;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions.TableExistsOption;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions.TableNotExistOption;
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.vector.VectorSchemaRoot;
@@ -48,10 +53,27 @@
 
 /** Hardcoded Flight SQL producer used for cross-language integration tests. */
 public class FlightSqlScenarioProducer implements FlightSqlProducer {
+  public static final String SERVER_NAME = "Flight SQL Integration Test Server";
   private final BufferAllocator allocator;
 
+  private final SqlInfoBuilder sqlInfoBuilder;
+
+  /** Constructor. */
   public FlightSqlScenarioProducer(BufferAllocator allocator) {
     this.allocator = allocator;
+    sqlInfoBuilder =
+        new SqlInfoBuilder()
+            .withFlightSqlServerName(SERVER_NAME)
+            .withFlightSqlServerReadOnly(false)
+            .withFlightSqlServerSql(false)
+            .withFlightSqlServerSubstrait(true)
+            .withFlightSqlServerSubstraitMinVersion("min_version")
+            .withFlightSqlServerSubstraitMaxVersion("max_version")
+            .withFlightSqlServerTransaction(
+                FlightSql.SqlSupportedTransaction.SQL_SUPPORTED_TRANSACTION_SAVEPOINT)
+            .withFlightSqlServerCancel(true)
+            .withFlightSqlServerStatementTimeout(42)
+            .withFlightSqlServerTransactionTimeout(7);
   }
 
   /**
@@ -109,6 +131,15 @@ static Schema getQueryWithTransactionSchema() {
                 null)));
   }
 
+  static Schema getIngestSchema() {
+    return new Schema(
+        Collections.singletonList(Field.nullable("test_field", new ArrowType.Int(64, true))));
+  }
+
+  protected SqlInfoBuilder getSqlInfoBuilder() {
+    return sqlInfoBuilder;
+  }
+
   @Override
   public void beginSavepoint(
       FlightSql.ActionBeginSavepointRequest request,
@@ -511,6 +542,44 @@ public Runnable acceptPutStatement(
             : FlightSqlScenario.UPDATE_STATEMENT_WITH_TRANSACTION_EXPECTED_ROWS);
   }
 
+  @Override
+  public Runnable acceptPutStatementBulkIngest(
+      FlightSql.CommandStatementIngest command,
+      CallContext context,
+      FlightStream flightStream,
+      StreamListener<PutResult> ackStream) {
+
+    IntegrationAssertions.assertEquals(
+        TableExistsOption.TABLE_EXISTS_OPTION_REPLACE,
+        command.getTableDefinitionOptions().getIfExists());
+    IntegrationAssertions.assertEquals(
+        TableNotExistOption.TABLE_NOT_EXIST_OPTION_CREATE,
+        command.getTableDefinitionOptions().getIfNotExist());
+    IntegrationAssertions.assertEquals("test_table", command.getTable());
+    IntegrationAssertions.assertEquals("test_catalog", command.getCatalog());
+    IntegrationAssertions.assertEquals("test_schema", command.getSchema());
+    IntegrationAssertions.assertEquals(true, command.getTemporary());
+    IntegrationAssertions.assertEquals(
+        FlightSqlScenario.BULK_INGEST_TRANSACTION_ID, command.getTransactionId().toByteArray());
+
+    Map<String, String> expectedOptions =
+        new HashMap<>(ImmutableMap.of("key1", "val1", "key2", "val2"));
+    IntegrationAssertions.assertEquals(expectedOptions.size(), command.getOptionsCount());
+
+    for (Map.Entry<String, String> optionEntry : expectedOptions.entrySet()) {
+      String key = optionEntry.getKey();
+      IntegrationAssertions.assertEquals(optionEntry.getValue(), command.getOptionsOrThrow(key));
+    }
+
+    IntegrationAssertions.assertEquals(getIngestSchema(), flightStream.getSchema());
+    long rowCount = 0;
+    while (flightStream.next()) {
+      rowCount += flightStream.getRoot().getRowCount();
+    }
+
+    return acceptPutReturnConstant(ackStream, rowCount);
+  }
+
   @Override
   public Runnable acceptPutSubstraitPlan(
       FlightSql.CommandStatementSubstraitPlan command,
@@ -577,35 +646,19 @@ public Runnable acceptPutPreparedStatementQuery(
   @Override
   public FlightInfo getFlightInfoSqlInfo(
       FlightSql.CommandGetSqlInfo request, CallContext context, FlightDescriptor descriptor) {
-    if (request.getInfoCount() == 2) {
-      // Integration test for the protocol messages
-      IntegrationAssertions.assertEquals(
-          request.getInfo(0), FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME_VALUE);
-      IntegrationAssertions.assertEquals(
-          request.getInfo(1), FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY_VALUE);
-    }
     return getFlightInfoForSchema(request, descriptor, Schemas.GET_SQL_INFO_SCHEMA);
   }
 
   @Override
   public void getStreamSqlInfo(
       FlightSql.CommandGetSqlInfo command, CallContext context, ServerStreamListener listener) {
-    if (command.getInfoCount() == 2) {
+    if (command.getInfoCount() == 2
+        && command.getInfo(0) == FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME_VALUE
+        && command.getInfo(1) == FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY_VALUE) {
       // Integration test for the protocol messages
       putEmptyBatchToStreamListener(listener, Schemas.GET_SQL_INFO_SCHEMA);
       return;
     }
-    SqlInfoBuilder sqlInfoBuilder =
-        new SqlInfoBuilder()
-            .withFlightSqlServerSql(false)
-            .withFlightSqlServerSubstrait(true)
-            .withFlightSqlServerSubstraitMinVersion("min_version")
-            .withFlightSqlServerSubstraitMaxVersion("max_version")
-            .withFlightSqlServerTransaction(
-                FlightSql.SqlSupportedTransaction.SQL_SUPPORTED_TRANSACTION_SAVEPOINT)
-            .withFlightSqlServerCancel(true)
-            .withFlightSqlServerStatementTimeout(42)
-            .withFlightSqlServerTransactionTimeout(7);
     sqlInfoBuilder.send(command.getInfoList(), listener);
   }
 
diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/Scenarios.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/Scenarios.java
index a294902a26d35..451edb6bd5a34 100644
--- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/Scenarios.java
+++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/Scenarios.java
@@ -48,6 +48,7 @@ private Scenarios() {
     scenarios.put("poll_flight_info", PollFlightInfoScenario::new);
     scenarios.put("flight_sql", FlightSqlScenario::new);
     scenarios.put("flight_sql:extension", FlightSqlExtensionScenario::new);
+    scenarios.put("flight_sql:ingestion", FlightSqlIngestionScenario::new);
     scenarios.put("app_metadata_flight_info_endpoint", AppMetadataFlightInfoEndpointScenario::new);
     scenarios.put("session_options", SessionOptionsScenario::new);
   }
diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/TestBufferAllocationListener.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/TestBufferAllocationListener.java
new file mode 100644
index 0000000000000..10594d4cf0962
--- /dev/null
+++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/TestBufferAllocationListener.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.flight.integration.tests;
+
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.arrow.memory.AllocationListener;
+
+class TestBufferAllocationListener implements AllocationListener {
+  static class Entry {
+    StackTraceElement[] stackTrace;
+    long size;
+    boolean forAllocation;
+
+    public Entry(StackTraceElement[] stackTrace, long size, boolean forAllocation) {
+      this.stackTrace = stackTrace;
+      this.size = size;
+      this.forAllocation = forAllocation;
+    }
+  }
+
+  List<Entry> trail = new ArrayList<>();
+
+  public void onAllocation(long size) {
+    trail.add(new Entry(Thread.currentThread().getStackTrace(), size, true));
+  }
+
+  public void onRelease(long size) {
+    trail.add(new Entry(Thread.currentThread().getStackTrace(), size, false));
+  }
+
+  public void reThrowWithAddedAllocatorInfo(Exception e) {
+    StringBuilder sb = new StringBuilder();
+    sb.append(e.getMessage());
+    sb.append("\n");
+    sb.append("[[Buffer allocation and release trail during the test execution: \n");
+    for (Entry trailEntry : trail) {
+      sb.append(
+          String.format(
+              "%s: %d: %n%s",
+              trailEntry.forAllocation ? "allocate" : "release",
+              trailEntry.size,
+              getStackTraceAsString(trailEntry.stackTrace)));
+    }
+    sb.append("]]");
+    throw new IllegalStateException(sb.toString(), e);
+  }
+
+  private String getStackTraceAsString(StackTraceElement[] elements) {
+    StringBuilder sb = new StringBuilder();
+    for (int i = 1; i < elements.length; i++) {
+      StackTraceElement s = elements[i];
+      sb.append("\t");
+      sb.append(s);
+      sb.append("\n");
+    }
+    return sb.toString();
+  }
+}
diff --git a/java/flight/flight-integration-tests/src/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java b/java/flight/flight-integration-tests/src/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java
index bdf1c43ce9da6..8419432c66227 100644
--- a/java/flight/flight-integration-tests/src/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java
+++ b/java/flight/flight-integration-tests/src/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java
@@ -16,6 +16,10 @@
  */
 package org.apache.arrow.flight.integration.tests;
 
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
 import org.apache.arrow.flight.FlightClient;
 import org.apache.arrow.flight.FlightServer;
 import org.apache.arrow.flight.Location;
@@ -80,6 +84,11 @@ void flightSqlExtension() throws Exception {
     testScenario("flight_sql:extension");
   }
 
+  @Test
+  void flightSqlIngestion() throws Exception {
+    testScenario("flight_sql:ingestion");
+  }
+
   @Test
   void appMetadataFlightInfoEndpoint() throws Exception {
     testScenario("app_metadata_flight_info_endpoint");
@@ -91,9 +100,16 @@ void sessionOptions() throws Exception {
   }
 
   void testScenario(String scenarioName) throws Exception {
-    try (final BufferAllocator allocator = new RootAllocator()) {
+    TestBufferAllocationListener listener = new TestBufferAllocationListener();
+    try (final BufferAllocator allocator = new RootAllocator(listener, Long.MAX_VALUE)) {
+      final ExecutorService exec =
+          Executors.newCachedThreadPool(
+              new ThreadFactoryBuilder()
+                  .setNameFormat("integration-test-flight-server-executor-%d")
+                  .build());
       final FlightServer.Builder builder =
           FlightServer.builder()
+              .executor(exec)
               .allocator(allocator)
               .location(Location.forGrpcInsecure("0.0.0.0", 0));
       final Scenario scenario = Scenarios.getScenario(scenarioName);
@@ -108,6 +124,17 @@ void testScenario(String scenarioName) throws Exception {
           scenario.client(allocator, location, client);
         }
       }
+
+      // Shutdown the executor while allowing existing tasks to finish.
+      // Without this wait, allocator.close() may get invoked earlier than an executor thread may
+      // have finished freeing up resources
+      // In that case, allocator.close() can throw an IllegalStateException for memory leak, leading
+      // to flaky tests
+      exec.shutdown();
+      final boolean unused = exec.awaitTermination(3, TimeUnit.SECONDS);
+    } catch (IllegalStateException e) {
+      // this could be due to Allocator detecting memory leak. Add allocation trail to help debug
+      listener.reThrowWithAddedAllocatorInfo(e);
     }
   }
 }
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandlerBuilderTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandlerBuilderTest.java
index 4f16a4fa60932..6beaba82360cc 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandlerBuilderTest.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandlerBuilderTest.java
@@ -33,7 +33,7 @@
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.extension.RegisterExtension;
 
-/** Test the behavior of ArrowFlightSqlClientHandler.Builder */
+/** Test the behavior of ArrowFlightSqlClientHandler.Builder. */
 public class ArrowFlightSqlClientHandlerBuilderTest {
 
   @RegisterExtension
diff --git a/java/flight/flight-sql/pom.xml b/java/flight/flight-sql/pom.xml
index 92bab5e206757..021c1e65ab5b3 100644
--- a/java/flight/flight-sql/pom.xml
+++ b/java/flight/flight-sql/pom.xml
@@ -110,6 +110,12 @@ under the License.
       <version>2.12.0</version>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-text</artifactId>
+      <version>1.12.0</version>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.hamcrest</groupId>
       <artifactId>hamcrest</artifactId>
diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java
index 4bc12d86b1d0e..9a6ffdfdca847 100644
--- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java
+++ b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java
@@ -16,6 +16,7 @@
  */
 package org.apache.arrow.flight.sql;
 
+import static java.util.Objects.isNull;
 import static org.apache.arrow.flight.sql.impl.FlightSql.ActionBeginSavepointRequest;
 import static org.apache.arrow.flight.sql.impl.FlightSql.ActionBeginSavepointResult;
 import static org.apache.arrow.flight.sql.impl.FlightSql.ActionBeginTransactionRequest;
@@ -54,8 +55,10 @@
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
 import java.util.Objects;
 import java.util.concurrent.ExecutionException;
+import java.util.function.Consumer;
 import java.util.stream.Collectors;
 import org.apache.arrow.flight.Action;
 import org.apache.arrow.flight.CallOption;
@@ -82,11 +85,14 @@
 import org.apache.arrow.flight.sql.impl.FlightSql;
 import org.apache.arrow.flight.sql.impl.FlightSql.ActionCreatePreparedStatementResult;
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandPreparedStatementQuery;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions;
 import org.apache.arrow.flight.sql.util.TableRef;
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.util.AutoCloseables;
 import org.apache.arrow.util.Preconditions;
 import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ipc.ArrowStreamReader;
 import org.apache.arrow.vector.ipc.ReadChannel;
 import org.apache.arrow.vector.ipc.message.MessageSerializer;
 import org.apache.arrow.vector.types.pojo.Schema;
@@ -206,6 +212,130 @@ public SchemaResult getExecuteSubstraitSchema(
     return getExecuteSubstraitSchema(substraitPlan, /*transaction*/ null, options);
   }
 
+  /**
+   * Execute a bulk ingest on the server.
+   *
+   * @param data data to be ingested
+   * @param ingestOptions options for the ingest request.
+   * @param options RPC-layer hints for this call.
+   * @return the number of rows affected.
+   */
+  public long executeIngest(
+      final VectorSchemaRoot data,
+      final ExecuteIngestOptions ingestOptions,
+      final CallOption... options) {
+    return executeIngest(data, ingestOptions, /*transaction*/ null, options);
+  }
+
+  /**
+   * Execute a bulk ingest on the server.
+   *
+   * @param dataReader data stream to be ingested
+   * @param ingestOptions options for the ingest request.
+   * @param options RPC-layer hints for this call.
+   * @return the number of rows affected.
+   */
+  public long executeIngest(
+      final ArrowStreamReader dataReader,
+      final ExecuteIngestOptions ingestOptions,
+      final CallOption... options) {
+    return executeIngest(dataReader, ingestOptions, /*transaction*/ null, options);
+  }
+
+  /**
+   * Execute a bulk ingest on the server.
+   *
+   * @param data data to be ingested
+   * @param ingestOptions options for the ingest request.
+   * @param transaction The transaction that this ingest request is part of.
+   * @param options RPC-layer hints for this call.
+   * @return the number of rows affected.
+   */
+  public long executeIngest(
+      final VectorSchemaRoot data,
+      final ExecuteIngestOptions ingestOptions,
+      Transaction transaction,
+      final CallOption... options) {
+    return executeIngest(
+        data, ingestOptions, transaction, FlightClient.ClientStreamListener::putNext, options);
+  }
+
+  /**
+   * Execute a bulk ingest on the server.
+   *
+   * @param dataReader data stream to be ingested
+   * @param ingestOptions options for the ingest request.
+   * @param transaction The transaction that this ingest request is part of.
+   * @param options RPC-layer hints for this call.
+   * @return the number of rows affected.
+   */
+  public long executeIngest(
+      final ArrowStreamReader dataReader,
+      final ExecuteIngestOptions ingestOptions,
+      Transaction transaction,
+      final CallOption... options) {
+
+    try {
+      return executeIngest(
+          dataReader.getVectorSchemaRoot(),
+          ingestOptions,
+          transaction,
+          listener -> {
+            while (true) {
+              try {
+                if (!dataReader.loadNextBatch()) {
+                  break;
+                }
+              } catch (IOException e) {
+                throw CallStatus.UNKNOWN.withCause(e).toRuntimeException();
+              }
+              listener.putNext();
+            }
+          },
+          options);
+    } catch (IOException e) {
+      throw CallStatus.UNKNOWN.withCause(e).toRuntimeException();
+    }
+  }
+
+  private long executeIngest(
+      final VectorSchemaRoot data,
+      final ExecuteIngestOptions ingestOptions,
+      final Transaction transaction,
+      final Consumer<FlightClient.ClientStreamListener> dataPutter,
+      final CallOption... options) {
+    try {
+      final CommandStatementIngest.Builder builder = CommandStatementIngest.newBuilder();
+      if (transaction != null) {
+        builder.setTransactionId(ByteString.copyFrom(transaction.getTransactionId()));
+      }
+      ingestOptions.updateCommandBuilder(builder);
+
+      final FlightDescriptor descriptor =
+          FlightDescriptor.command(Any.pack(builder.build()).toByteArray());
+      try (final SyncPutListener putListener = new SyncPutListener()) {
+
+        final FlightClient.ClientStreamListener listener =
+            client.startPut(descriptor, data, putListener, options);
+        dataPutter.accept(listener);
+        listener.completed();
+        listener.getResult();
+
+        try (final PutResult result = putListener.read()) {
+          final DoPutUpdateResult doPutUpdateResult =
+              DoPutUpdateResult.parseFrom(result.getApplicationMetadata().nioBuffer());
+          return doPutUpdateResult.getRecordCount();
+        }
+      }
+    } catch (final InterruptedException e) {
+      throw CallStatus.CANCELLED.withCause(e).toRuntimeException();
+    } catch (final ExecutionException e) {
+      throw CallStatus.CANCELLED.withCause(e.getCause()).toRuntimeException();
+    } catch (final InvalidProtocolBufferException e) {
+      throw CallStatus.INTERNAL.withCause(e).toRuntimeException();
+    }
+  }
+
   /**
    * Execute an update query on the server.
    *
@@ -245,8 +375,10 @@ public long executeUpdate(
       } finally {
         listener.getResult();
       }
-    } catch (final InterruptedException | ExecutionException e) {
+    } catch (final InterruptedException e) {
       throw CallStatus.CANCELLED.withCause(e).toRuntimeException();
+    } catch (final ExecutionException e) {
+      throw CallStatus.CANCELLED.withCause(e.getCause()).toRuntimeException();
     } catch (final InvalidProtocolBufferException e) {
       throw CallStatus.INTERNAL.withCause(e).toRuntimeException();
     }
@@ -295,8 +427,10 @@ public long executeSubstraitUpdate(
       } finally {
         listener.getResult();
       }
-    } catch (final InterruptedException | ExecutionException e) {
+    } catch (final InterruptedException e) {
       throw CallStatus.CANCELLED.withCause(e).toRuntimeException();
+    } catch (final ExecutionException e) {
+      throw CallStatus.CANCELLED.withCause(e.getCause()).toRuntimeException();
     } catch (final InvalidProtocolBufferException e) {
       throw CallStatus.INTERNAL.withCause(e).toRuntimeException();
     }
@@ -1003,6 +1137,82 @@ public void close() throws Exception {
     AutoCloseables.close(client);
   }
 
+  /** Class to encapsulate Flight SQL bulk ingest request options. * */
+  public static class ExecuteIngestOptions {
+    private final String table;
+    private final TableDefinitionOptions tableDefinitionOptions;
+    private final boolean useTemporaryTable;
+    private final String catalog;
+    private final String schema;
+    private final Map<String, String> options;
+
+    /**
+     * Constructor.
+     *
+     * @param table The table to load data into.
+     * @param tableDefinitionOptions The behavior for handling the table definition.
+     * @param catalog The catalog of the destination table to load data into. If null, a
+     *     backend-specific default may be used.
+     * @param schema The schema of the destination table to load data into. If null, a
+     *     backend-specific default may be used.
+     * @param options Backend-specific options. Can be null if there are no options to be set.
+     */
+    public ExecuteIngestOptions(
+        String table,
+        TableDefinitionOptions tableDefinitionOptions,
+        String catalog,
+        String schema,
+        Map<String, String> options) {
+      this(table, tableDefinitionOptions, false, catalog, schema, options);
+    }
+
+    /**
+     * Constructor.
+     *
+     * @param table The table to load data into.
+     * @param tableDefinitionOptions The behavior for handling the table definition.
+     * @param useTemporaryTable Use a temporary table for bulk ingestion. Temporary table may get
+     *     placed in a backend-specific schema and/or catalog and gets dropped at the end of the
+     *     session. If backend does not support ingesting using a temporary table or an explicit
+     *     choice of schema or catalog is incompatible with the server's namespacing decision, an
+     *     error is returned as part of {@link #executeIngest} request.
+     * @param catalog The catalog of the destination table to load data into. If null, a
+     *     backend-specific default may be used.
+     * @param schema The schema of the destination table to load data into. If null, a
+     *     backend-specific default may be used.
+     * @param options Backend-specific options. Can be null if there are no options to be set.
+     */
+    public ExecuteIngestOptions(
+        String table,
+        TableDefinitionOptions tableDefinitionOptions,
+        boolean useTemporaryTable,
+        String catalog,
+        String schema,
+        Map<String, String> options) {
+      this.table = table;
+      this.tableDefinitionOptions = tableDefinitionOptions;
+      this.useTemporaryTable = useTemporaryTable;
+      this.catalog = catalog;
+      this.schema = schema;
+      this.options = options;
+    }
+
+    protected void updateCommandBuilder(CommandStatementIngest.Builder builder) {
+      builder.setTable(table);
+      builder.setTableDefinitionOptions(tableDefinitionOptions);
+      builder.setTemporary(useTemporaryTable);
+      if (!isNull(catalog)) {
+        builder.setCatalog(catalog);
+      }
+      if (!isNull(schema)) {
+        builder.setSchema(schema);
+      }
+      if (!isNull(options)) {
+        builder.putAllOptions(options);
+      }
+    }
+  }
+
   /** Helper class to encapsulate Flight SQL prepared statement logic. */
   public static class PreparedStatement implements AutoCloseable {
     private final FlightClient client;
@@ -1140,10 +1350,12 @@ public FlightInfo execute(final CallOption... options) {
               }
             }
           }
-        } catch (final InterruptedException | ExecutionException e) {
+        } catch (final InterruptedException e) {
           throw CallStatus.CANCELLED.withCause(e).toRuntimeException();
+        } catch (final ExecutionException e) {
+          throw CallStatus.CANCELLED.withCause(e.getCause()).toRuntimeException();
         } catch (final InvalidProtocolBufferException e) {
-          throw CallStatus.INVALID_ARGUMENT.withCause(e).toRuntimeException();
+          throw CallStatus.INTERNAL.withCause(e).toRuntimeException();
         }
       }
 
@@ -1198,10 +1410,12 @@ public long executeUpdate(final CallOption... options) {
               DoPutUpdateResult.parseFrom(metadata.nioBuffer());
           return doPutUpdateResult.getRecordCount();
         }
-      } catch (final InterruptedException | ExecutionException e) {
+      } catch (final InterruptedException e) {
         throw CallStatus.CANCELLED.withCause(e).toRuntimeException();
+      } catch (final ExecutionException e) {
+        throw CallStatus.CANCELLED.withCause(e.getCause()).toRuntimeException();
       } catch (final InvalidProtocolBufferException e) {
-        throw CallStatus.INVALID_ARGUMENT.withCause(e).toRuntimeException();
+        throw CallStatus.INTERNAL.withCause(e).toRuntimeException();
       }
     }
 
diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlProducer.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlProducer.java
index 0afef79160621..9465e5ff88053 100644
--- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlProducer.java
+++ b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlProducer.java
@@ -83,6 +83,7 @@
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetTables;
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandPreparedStatementQuery;
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandPreparedStatementUpdate;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest;
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementQuery;
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementUpdate;
 import org.apache.arrow.flight.sql.impl.FlightSql.DoPutUpdateResult;
@@ -281,7 +282,8 @@ default void getStream(CallContext context, Ticket ticket, ServerStreamListener
   /**
    * Depending on the provided command, method either: 1. Execute provided SQL query as an update
    * statement, or 2. Execute provided update SQL query prepared statement. In this case, parameters
-   * binding is allowed, or 3. Binds parameters to the provided prepared statement.
+   * binding is allowed, or 3. Binds parameters to the provided prepared statement, or 4. Bulk
+   * ingests data provided through the flightStream.
    *
    * @param context Per-call context.
    * @param flightStream The data stream being uploaded.
@@ -299,6 +301,12 @@ default Runnable acceptPut(
           context,
           flightStream,
           ackStream);
+    } else if (command.is(CommandStatementIngest.class)) {
+      return acceptPutStatementBulkIngest(
+          FlightSqlUtils.unpackOrThrow(command, CommandStatementIngest.class),
+          context,
+          flightStream,
+          ackStream);
     } else if (command.is(CommandStatementSubstraitPlan.class)) {
       return acceptPutSubstraitPlan(
           FlightSqlUtils.unpackOrThrow(command, CommandStatementSubstraitPlan.class),
@@ -777,6 +785,27 @@ Runnable acceptPutStatement(
       FlightStream flightStream,
       StreamListener<PutResult> ackStream);
 
+  /**
+   * Accepts uploaded data for a particular bulk ingest data stream.
+   *
+   * <p>`PutResult`s must be in the form of a {@link DoPutUpdateResult}.
+   *
+   * @param command The bulk ingestion request.
+   * @param context Per-call context.
+   * @param flightStream The data stream being uploaded.
+   * @param ackStream The result data stream.
+   * @return A runnable to process the stream.
+   */
+  default Runnable acceptPutStatementBulkIngest(
+      CommandStatementIngest command,
+      CallContext context,
+      FlightStream flightStream,
+      StreamListener<PutResult> ackStream) {
+    return () -> {
+      ackStream.onError(CallStatus.UNIMPLEMENTED.toRuntimeException());
+    };
+  }
+
   /**
    * Handle a Substrait plan with uploaded data.
    *
diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/NoOpFlightSqlProducer.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/NoOpFlightSqlProducer.java
index 5091017c13cd8..72fcae8c18003 100644
--- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/NoOpFlightSqlProducer.java
+++ b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/NoOpFlightSqlProducer.java
@@ -91,6 +91,18 @@ public Runnable acceptPutStatement(
     throw CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException();
   }
 
+  @Override
+  public Runnable acceptPutStatementBulkIngest(
+      FlightSql.CommandStatementIngest command,
+      CallContext context,
+      FlightStream flightStream,
+      StreamListener<PutResult> ackStream) {
+    return () -> {
+      ackStream.onError(
+          CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException());
+    };
+  }
+
   @Override
   public Runnable acceptPutPreparedStatementUpdate(
       FlightSql.CommandPreparedStatementUpdate command,
diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SqlInfoBuilder.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SqlInfoBuilder.java
index 2a31bc77365e2..cbe4989d14744 100644
--- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SqlInfoBuilder.java
+++ b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SqlInfoBuilder.java
@@ -148,6 +148,17 @@ public SqlInfoBuilder withFlightSqlServerCancel(boolean value) {
     return withBooleanProvider(SqlInfo.FLIGHT_SQL_SERVER_CANCEL_VALUE, value);
   }
 
+  /** Set a value for bulk ingestion support. */
+  public SqlInfoBuilder withFlightSqlServerBulkIngestion(boolean value) {
+    return withBooleanProvider(SqlInfo.FLIGHT_SQL_SERVER_BULK_INGESTION_VALUE, value);
+  }
+
+  /** Set a value for transaction support for bulk ingestion. */
+  public SqlInfoBuilder withFlightSqlServerBulkIngestionTransaction(boolean value) {
+    return withBooleanProvider(
+        SqlInfo.FLIGHT_SQL_SERVER_INGEST_TRANSACTIONS_SUPPORTED_VALUE, value);
+  }
+
   /** Set a value for statement timeouts. */
   public SqlInfoBuilder withFlightSqlServerStatementTimeout(int value) {
     return withIntProvider(SqlInfo.FLIGHT_SQL_SERVER_STATEMENT_TIMEOUT_VALUE, value);
diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java
index 67bfc85c48602..f9d0551a3aa22 100644
--- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java
+++ b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java
@@ -55,6 +55,7 @@
 import java.nio.file.NoSuchFileException;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.nio.file.StandardOpenOption;
 import java.sql.Connection;
 import java.sql.DatabaseMetaData;
 import java.sql.DriverManager;
@@ -82,6 +83,7 @@
 import java.util.function.BiConsumer;
 import java.util.function.Consumer;
 import java.util.function.Predicate;
+import java.util.stream.Collectors;
 import java.util.stream.Stream;
 import org.apache.arrow.adapter.jdbc.ArrowVectorIterator;
 import org.apache.arrow.adapter.jdbc.JdbcFieldInfo;
@@ -112,6 +114,10 @@
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetTables;
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandPreparedStatementQuery;
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandPreparedStatementUpdate;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions.TableExistsOption;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions.TableNotExistOption;
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementQuery;
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementUpdate;
 import org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportedCaseSensitivity;
@@ -146,6 +152,7 @@
 import org.apache.commons.dbcp2.PoolingDataSource;
 import org.apache.commons.pool2.ObjectPool;
 import org.apache.commons.pool2.impl.GenericObjectPool;
+import org.apache.commons.text.StringEscapeUtils;
 import org.slf4j.Logger;
 
 /**
@@ -245,7 +252,9 @@ public FlightSqlExample(final Location location, final String dbName) {
                           : SqlSupportedCaseSensitivity.SQL_CASE_SENSITIVITY_UNKNOWN)
           .withSqlAllTablesAreSelectable(true)
           .withSqlNullOrdering(SqlNullOrdering.SQL_NULLS_SORTED_AT_END)
-          .withSqlMaxColumnsInTable(42);
+          .withSqlMaxColumnsInTable(42)
+          .withFlightSqlServerBulkIngestion(true)
+          .withFlightSqlServerBulkIngestionTransaction(false);
     } catch (SQLException e) {
       throw new RuntimeException(e);
     }
@@ -714,6 +723,34 @@ private static ByteBuffer serializeMetadata(final Schema schema) {
     }
   }
 
+  private static String getRootAsCSVNoHeader(final VectorSchemaRoot root) {
+    StringBuilder sb = new StringBuilder();
+    Schema schema = root.getSchema();
+    int rowCount = root.getRowCount();
+    List<FieldVector> fieldVectors = root.getFieldVectors();
+
+    List<Object> row = new ArrayList<>(schema.getFields().size());
+    for (int i = 0; i < rowCount; i++) {
+      if (i > 0) {
+        sb.append("\n");
+      }
+      row.clear();
+      for (FieldVector v : fieldVectors) {
+        row.add(v.getObject(i));
+      }
+      printRowAsCSV(sb, row);
+    }
+    return sb.toString();
+  }
+
+  private static void printRowAsCSV(StringBuilder sb, List<Object> values) {
+    sb.append(
+        values.stream()
+            .map(v -> isNull(v) ? "" : v.toString())
+            .map(StringEscapeUtils::escapeCsv)
+            .collect(Collectors.joining(",")));
+  }
+
   @Override
   public void getStreamPreparedStatement(
       final CommandPreparedStatementQuery command,
@@ -951,6 +988,138 @@ public Runnable acceptPutStatement(
     };
   }
 
+  @Override
+  public Runnable acceptPutStatementBulkIngest(
+      CommandStatementIngest command,
+      CallContext context,
+      FlightStream flightStream,
+      StreamListener<PutResult> ackStream) {
+
+    final String schema = command.hasSchema() ? command.getSchema() : null;
+    final String table = command.getTable();
+    final boolean temporary = command.getTemporary();
+    final boolean transactionId = command.hasTransactionId();
+    final TableDefinitionOptions tableDefinitionOptions =
+        command.hasTableDefinitionOptions() ? command.getTableDefinitionOptions() : null;
+
+    return () -> {
+      TableExistsOption ifExists = TableExistsOption.TABLE_EXISTS_OPTION_APPEND;
+      if (temporary) {
+        ackStream.onError(
+            CallStatus.UNIMPLEMENTED
+                .withDescription("Bulk ingestion using temporary tables is not supported")
+                .toRuntimeException());
+      } else if (transactionId) {
+        ackStream.onError(
+            CallStatus.UNIMPLEMENTED
+                .withDescription(
+                    "Bulk ingestion automatically happens in a transaction. Specifying explicit transaction is not supported.")
+                .toRuntimeException());
+      } else if (isNull(tableDefinitionOptions)) {
+        ackStream.onError(
+            CallStatus.INVALID_ARGUMENT
+                .withDescription("TableDefinitionOptions not provided.")
+                .toRuntimeException());
+      } else {
+        TableNotExistOption ifNotExist = tableDefinitionOptions.getIfNotExist();
+        ifExists = tableDefinitionOptions.getIfExists();
+
+        if (!TableNotExistOption.TABLE_NOT_EXIST_OPTION_FAIL.equals(ifNotExist)) {
+          ackStream.onError(
+              CallStatus.UNIMPLEMENTED
+                  .withDescription(
+                      "Only supported option is TABLE_NOT_EXIST_OPTION_FAIL for TableNotExistsOption.")
+                  .toRuntimeException());
+        } else if (TableExistsOption.TABLE_EXISTS_OPTION_UNSPECIFIED.equals(ifExists)) {
+          ackStream.onError(
+              CallStatus.INVALID_ARGUMENT
+                  .withDescription("TableExistsOption must be specified")
+                  .toRuntimeException());
+        } else if (TableExistsOption.TABLE_EXISTS_OPTION_FAIL.equals(ifExists)) {
+          ackStream.onError(
+              CallStatus.UNIMPLEMENTED
+                  .withDescription("TABLE_EXISTS_OPTION_FAIL is not supported.")
+                  .toRuntimeException());
+        }
+      }
+
+      Path tempFile = null;
+      try {
+        tempFile = Files.createTempFile(null, null);
+
+        VectorSchemaRoot root = null;
+        int counter = 0;
+        while (flightStream.next()) {
+          if (counter > 0) {
+            Files.writeString(tempFile, "\n", StandardCharsets.UTF_8, StandardOpenOption.APPEND);
+          }
+          counter += 1;
+          root = flightStream.getRoot();
+          Files.writeString(
+              tempFile,
+              getRootAsCSVNoHeader(root),
+              StandardCharsets.UTF_8,
+              StandardOpenOption.APPEND);
+        }
+
+        if (counter > 0) {
+          Files.writeString(tempFile, "\n", StandardCharsets.UTF_8, StandardOpenOption.APPEND);
+        }
+
+        if (!isNull(root)) {
+          String header =
+              root.getSchema().getFields().stream()
+                  .map(Field::getName)
+                  .collect(Collectors.joining(","));
+
+          try (final Connection connection = dataSource.getConnection();
+              final PreparedStatement preparedStatement =
+                  connection.prepareStatement(
+                      "CALL SYSCS_UTIL.SYSCS_IMPORT_DATA (?,?,?,null,?,?,?,?,?)")) {
+
+            preparedStatement.setString(1, schema);
+            preparedStatement.setString(2, table);
+            preparedStatement.setString(3, header);
+            preparedStatement.setString(4, tempFile.toString());
+            preparedStatement.setString(5, ",");
+            preparedStatement.setString(6, "\"");
+            preparedStatement.setString(7, "UTF-8");
+            preparedStatement.setInt(
+                8, TableExistsOption.TABLE_EXISTS_OPTION_REPLACE.equals(ifExists) ? 1 : 0);
+            preparedStatement.execute();
+
+            final DoPutUpdateResult build =
+                DoPutUpdateResult.newBuilder().setRecordCount(-1).build();
+
+            try (final ArrowBuf buffer = rootAllocator.buffer(build.getSerializedSize())) {
+              buffer.writeBytes(build.toByteArray());
+              ackStream.onNext(PutResult.metadata(buffer));
+              ackStream.onCompleted();
+            }
+          } catch (SQLException e) {
+            ackStream.onError(
+                CallStatus.INTERNAL
+                    .withDescription("Failed to execute bulk ingest: " + e)
+                    .toRuntimeException());
+          }
+        }
+      } catch (IOException e) {
+        ackStream.onError(
+            CallStatus.INTERNAL
+                .withDescription("Failed to create temp file for bulk loading: " + e)
+                .toRuntimeException());
+      } finally {
+        if (!isNull(tempFile)) {
+          try {
+            Files.delete(tempFile);
+          } catch (IOException e) {
+            //
+          }
+        }
+      }
+    };
+  }
+
   @Override
   public Runnable acceptPutPreparedStatementUpdate(
       CommandPreparedStatementUpdate command,
diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java
index 2eb74adc5bc0e..3f769363fb64d 100644
--- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java
+++ b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java
@@ -30,6 +30,10 @@
 import static org.junit.jupiter.api.Assertions.assertThrows;
 
 import com.google.common.collect.ImmutableList;
+import java.io.IOException;
+import java.io.PipedInputStream;
+import java.io.PipedOutputStream;
+import java.nio.charset.StandardCharsets;
 import java.sql.SQLException;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -53,6 +57,9 @@
 import org.apache.arrow.flight.sql.FlightSqlProducer;
 import org.apache.arrow.flight.sql.example.FlightSqlExample;
 import org.apache.arrow.flight.sql.impl.FlightSql;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions.TableExistsOption;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions.TableNotExistOption;
 import org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportedCaseSensitivity;
 import org.apache.arrow.flight.sql.util.TableRef;
 import org.apache.arrow.memory.BufferAllocator;
@@ -60,11 +67,15 @@
 import org.apache.arrow.vector.IntVector;
 import org.apache.arrow.vector.VarCharVector;
 import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ipc.ArrowStreamReader;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.types.pojo.Schema;
 import org.apache.arrow.vector.util.Text;
+import org.apache.arrow.vector.util.VectorBatchAppender;
 import org.hamcrest.Matcher;
 import org.hamcrest.MatcherAssert;
 import org.junit.jupiter.api.AfterAll;
@@ -96,6 +107,43 @@ public class TestFlightSql {
   protected static FlightServer server;
   protected static FlightSqlClient sqlClient;
 
+  private static void populateNext10RowsInIngestRootBatch(
+      int startRowNumber,
+      IntVector valueVector,
+      VarCharVector keyNameVector,
+      IntVector foreignIdVector,
+      VarCharVector keyNamesToBeDeletedVector,
+      VectorSchemaRoot ingestRoot) {
+
+    final int NumRowsInBatch = 10;
+
+    valueVector.reset();
+    keyNameVector.reset();
+    foreignIdVector.reset();
+
+    final IntStream range = IntStream.range(1, NumRowsInBatch);
+
+    range.forEach(
+        i -> {
+          valueVector.setSafe(i - 1, (i + startRowNumber - 1) * NumRowsInBatch);
+          keyNameVector.setSafe(i - 1, new Text("value" + (i + startRowNumber - 1)));
+          foreignIdVector.setSafe(i - 1, 1);
+        });
+    // put some comma and double-quote containing string as well
+    valueVector.setSafe(NumRowsInBatch - 1, (NumRowsInBatch + startRowNumber - 1) * NumRowsInBatch);
+    keyNameVector.setSafe(
+        NumRowsInBatch - 1,
+        new Text(
+            String.format(
+                "value%d, is \"%d\"",
+                (NumRowsInBatch + startRowNumber - 1),
+                (NumRowsInBatch + startRowNumber - 1) * NumRowsInBatch)));
+    foreignIdVector.setSafe(NumRowsInBatch - 1, 1);
+    ingestRoot.setRowCount(NumRowsInBatch);
+
+    VectorBatchAppender.batchAppend(keyNamesToBeDeletedVector, keyNameVector);
+  }
+
   @BeforeAll
   public static void setUp() throws Exception {
     setUpClientServer();
@@ -537,6 +585,119 @@ public void testSimplePreparedStatementUpdateResults() throws SQLException {
     }
   }
 
+  @Test
+  public void testBulkIngest() throws IOException {
+    // For bulk ingest DerbyDB requires uppercase column names
+    var keyName = new Field("KEYNAME", FieldType.nullable(new ArrowType.Utf8()), null);
+    var value = new Field("VALUE", FieldType.nullable(new ArrowType.Int(32, true)), null);
+    var foreignId = new Field("FOREIGNID", FieldType.nullable(new ArrowType.Int(32, true)), null);
+
+    Schema dataSchema = new Schema(List.of(keyName, value, foreignId));
+
+    try (final VectorSchemaRoot ingestRoot = VectorSchemaRoot.create(dataSchema, allocator);
+        final VarCharVector keyNamesToBeDeletedVector = new VarCharVector(keyName, allocator)) {
+      final VarCharVector keyNameVector = (VarCharVector) ingestRoot.getVector(0);
+      final IntVector valueVector = (IntVector) ingestRoot.getVector(1);
+      final IntVector foreignIdVector = (IntVector) ingestRoot.getVector(2);
+      ingestRoot.allocateNew();
+      keyNamesToBeDeletedVector.allocateNew();
+
+      try (PipedInputStream inPipe = new PipedInputStream(1024);
+          PipedOutputStream outPipe = new PipedOutputStream(inPipe);
+          ArrowStreamReader reader = new ArrowStreamReader(inPipe, allocator)) {
+
+        new Thread(
+                () -> {
+                  try (ArrowStreamWriter writer =
+                      new ArrowStreamWriter(ingestRoot, null, outPipe)) {
+                    writer.start();
+                    populateNext10RowsInIngestRootBatch(
+                        1,
+                        valueVector,
+                        keyNameVector,
+                        foreignIdVector,
+                        keyNamesToBeDeletedVector,
+                        ingestRoot);
+                    writer.writeBatch();
+                    populateNext10RowsInIngestRootBatch(
+                        11,
+                        valueVector,
+                        keyNameVector,
+                        foreignIdVector,
+                        keyNamesToBeDeletedVector,
+                        ingestRoot);
+                    writer.writeBatch();
+                  } catch (Exception e) {
+                    throw new RuntimeException(e);
+                  }
+                })
+            .start();
+
+        // Ingest from a stream
+        final long updatedRows =
+            sqlClient.executeIngest(
+                reader,
+                new FlightSqlClient.ExecuteIngestOptions(
+                    "INTTABLE",
+                    TableDefinitionOptions.newBuilder()
+                        .setIfExists(TableExistsOption.TABLE_EXISTS_OPTION_APPEND)
+                        .setIfNotExist(TableNotExistOption.TABLE_NOT_EXIST_OPTION_FAIL)
+                        .build(),
+                    null,
+                    null,
+                    null));
+
+        MatcherAssert.assertThat(updatedRows, is(-1L));
+
+        // Ingest directly using VectorSchemaRoot
+        populateNext10RowsInIngestRootBatch(
+            21, valueVector, keyNameVector, foreignIdVector, keyNamesToBeDeletedVector, ingestRoot);
+        sqlClient.executeIngest(
+            ingestRoot,
+            new FlightSqlClient.ExecuteIngestOptions(
+                "INTTABLE",
+                TableDefinitionOptions.newBuilder()
+                    .setIfExists(TableExistsOption.TABLE_EXISTS_OPTION_APPEND)
+                    .setIfNotExist(TableNotExistOption.TABLE_NOT_EXIST_OPTION_FAIL)
+                    .build(),
+                null,
+                null,
+                null));
+
+        try (PreparedStatement deletePrepare =
+            sqlClient.prepare("DELETE FROM INTTABLE WHERE keyName = ?")) {
+          final long deletedRows;
+          try (final VectorSchemaRoot deleteRoot = VectorSchemaRoot.of(keyNamesToBeDeletedVector)) {
+            deletePrepare.setParameters(deleteRoot);
+            deletedRows = deletePrepare.executeUpdate();
+          }
+
+          MatcherAssert.assertThat(deletedRows, is(30L));
+        }
+      }
+    }
+  }
+
+  @Test
+  public void testBulkIngestTransaction() {
+    assertThrows(
+        RuntimeException.class,
+        () -> {
+          sqlClient.executeIngest(
+              VectorSchemaRoot.create(new Schema(List.of()), allocator),
+              new FlightSqlClient.ExecuteIngestOptions(
+                  "INTTABLE",
+                  TableDefinitionOptions.newBuilder()
+                      .setIfExists(TableExistsOption.TABLE_EXISTS_OPTION_APPEND)
+                      .setIfNotExist(TableNotExistOption.TABLE_NOT_EXIST_OPTION_FAIL)
+                      .build(),
+                  null,
+                  null,
+                  null),
+              new FlightSqlClient.Transaction("123".getBytes(StandardCharsets.UTF_8)));
+        });
+  }
+
   @Test
   public void testSimplePreparedStatementUpdateResultsWithoutParameters() throws SQLException {
     try (PreparedStatement prepare =
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java
index 20066ed14b65a..856cc88ab9c39 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java
@@ -34,9 +34,22 @@ public interface AllocationReservation extends AutoCloseable {
    * @param nBytes the number of bytes to add
    * @return true if the addition is possible, false otherwise
    * @throws IllegalStateException if called after buffer() is used to allocate the reservation
+   * @deprecated use {@link #add(long)} instead
    */
+  @Deprecated(forRemoval = true)
   boolean add(int nBytes);
 
+  /**
+   * Add to the current reservation.
+   *
+   * <p>Adding may fail if the allocator is not allowed to consume any more space.
+   *
+   * @param nBytes the number of bytes to add
+   * @return true if the addition is possible, false otherwise
+   * @throws IllegalStateException if called after buffer() is used to allocate the reservation
+   */
+  boolean add(long nBytes);
+
   /**
    * Requests a reservation of additional space.
    *
@@ -44,9 +57,21 @@ public interface AllocationReservation extends AutoCloseable {
    *
    * @param nBytes the amount to reserve
    * @return true if the reservation can be satisfied, false otherwise
+   * @deprecated use {@link #reserve(long)} instead
    */
+  @Deprecated(forRemoval = true)
   boolean reserve(int nBytes);
 
+  /**
+   * Requests a reservation of additional space.
+   *
+   * <p>The implementation of the allocator's inner class provides this.
+   *
+   * @param nBytes the amount to reserve
+   * @return true if the reservation can be satisfied, false otherwise
+   */
+  boolean reserve(long nBytes);
+
   /**
    * Allocate a buffer whose size is the total of all the add()s made.
    *
@@ -65,6 +90,13 @@ public interface AllocationReservation extends AutoCloseable {
    */
   int getSize();
 
+  /**
+   * Get the current size of the reservation (the sum of all the add()s) as a long value.
+   *
+   * @return size of the current reservation
+   */
+  long getSizeLong();
+
   /**
    * Return whether or not the reservation has been used.
    *
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java
index a958092a5789a..775a8925ad1a9 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java
@@ -549,7 +549,7 @@ public byte readByte() {
   }
 
   /**
-   * Read dst.length bytes at readerIndex into dst byte array
+   * Read dst.length bytes at readerIndex into dst byte array.
    *
    * @param dst byte array where the data will be written
    */
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
index dd6375e910b92..20a89d0b7bf18 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
@@ -29,6 +29,7 @@
 import org.apache.arrow.memory.util.AssertionUtil;
 import org.apache.arrow.memory.util.CommonUtil;
 import org.apache.arrow.memory.util.HistoricalLog;
+import org.apache.arrow.memory.util.LargeMemoryUtil;
 import org.apache.arrow.util.Preconditions;
 import org.checkerframework.checker.initialization.qual.Initialized;
 import org.checkerframework.checker.nullness.qual.KeyFor;
@@ -860,7 +861,7 @@ RoundingPolicy getRoundingPolicy() {
   public class Reservation implements AllocationReservation {
 
     private final @Nullable HistoricalLog historicalLog;
-    private int nBytes = 0;
+    private long nBytes = 0;
     private boolean used = false;
     private boolean closed = false;
 
@@ -888,8 +889,15 @@ public Reservation() {
       }
     }
 
+    @SuppressWarnings({"removal", "InlineMeSuggester"})
+    @Deprecated(forRemoval = true)
     @Override
     public boolean add(final int nBytes) {
+      return add((long) nBytes);
+    }
+
+    @Override
+    public boolean add(final long nBytes) {
       assertOpen();
 
       Preconditions.checkArgument(nBytes >= 0, "nBytes(%d) < 0", nBytes);
@@ -906,7 +914,7 @@ public boolean add(final int nBytes) {
       // modifying this behavior so that we maintain what we reserve and what the user asked for
       // and make sure to only
       // round to power of two as necessary.
-      final int nBytesTwo = CommonUtil.nextPowerOfTwo(nBytes);
+      final long nBytesTwo = CommonUtil.nextPowerOfTwo(nBytes);
       if (!reserve(nBytesTwo)) {
         return false;
       }
@@ -929,6 +937,11 @@ public ArrowBuf allocateBuffer() {
 
     @Override
     public int getSize() {
+      return LargeMemoryUtil.checkedCastToInt(nBytes);
+    }
+
+    @Override
+    public long getSizeLong() {
       return nBytes;
     }
 
@@ -978,8 +991,15 @@ public void close() {
       closed = true;
     }
 
+    @SuppressWarnings({"removal", "InlineMeSuggester"})
+    @Deprecated(forRemoval = true)
     @Override
     public boolean reserve(int nBytes) {
+      return reserve((long) nBytes);
+    }
+
+    @Override
+    public boolean reserve(long nBytes) {
       assertOpen();
 
       final AllocationOutcome outcome = BaseAllocator.this.allocateBytes(nBytes);
@@ -999,7 +1019,7 @@ public boolean reserve(int nBytes) {
      * @param nBytes the size of the buffer requested
      * @return the buffer, or null, if the request cannot be satisfied
      */
-    private ArrowBuf allocate(int nBytes) {
+    private ArrowBuf allocate(long nBytes) {
       assertOpen();
 
       boolean success = false;
@@ -1033,7 +1053,7 @@ private ArrowBuf allocate(int nBytes) {
      *
      * @param nBytes the size of the reservation
      */
-    private void releaseReservation(int nBytes) {
+    private void releaseReservation(long nBytes) {
       assertOpen();
 
       releaseBytes(nBytes);
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java
index f8dd7e1d1cb38..50f33d3f021c7 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java
@@ -17,9 +17,9 @@
 package org.apache.arrow.memory;
 
 /**
- * Child allocator class. Only slightly different from the {@see RootAllocator}, in that these can't
- * be created directly, but must be obtained from {@link BufferAllocator#newChildAllocator(String,
- * AllocationListener, long, long)}.
+ * Child allocator class. Only slightly different from the {@link RootAllocator}, in that these
+ * can't be created directly, but must be obtained from {@link
+ * BufferAllocator#newChildAllocator(String, AllocationListener, long, long)}.
  *
  * <p>Child allocators can only be created by the root, or other children, so this class is package
  * private.
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java
index 289b10634d84e..90e8a1d5eca77 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java
@@ -34,13 +34,13 @@ public class DefaultRoundingPolicy implements RoundingPolicy {
    *
    * <p>It was copied from {@link io.netty.buffer.PooledByteBufAllocator}.
    */
-  private static final int MIN_PAGE_SIZE = 4096;
+  private static final long MIN_PAGE_SIZE = 4096;
 
-  private static final int MAX_CHUNK_SIZE = (int) (((long) Integer.MAX_VALUE + 1) / 2);
+  private static final long MAX_CHUNK_SIZE = ((long) Integer.MAX_VALUE + 1) / 2;
   private static final long DEFAULT_CHUNK_SIZE;
 
   static {
-    int defaultPageSize = Integer.getInteger("org.apache.memory.allocator.pageSize", 8192);
+    long defaultPageSize = Long.getLong("org.apache.memory.allocator.pageSize", 8192);
     try {
       validateAndCalculatePageShifts(defaultPageSize);
     } catch (Throwable t) {
@@ -60,7 +60,7 @@ public class DefaultRoundingPolicy implements RoundingPolicy {
     }
   }
 
-  private static int validateAndCalculatePageShifts(int pageSize) {
+  private static long validateAndCalculatePageShifts(long pageSize) {
     if (pageSize < MIN_PAGE_SIZE) {
       throw new IllegalArgumentException(
           "pageSize: " + pageSize + " (expected: " + MIN_PAGE_SIZE + ")");
@@ -71,17 +71,17 @@ private static int validateAndCalculatePageShifts(int pageSize) {
     }
 
     // Logarithm base 2. At this point we know that pageSize is a power of two.
-    return Integer.SIZE - 1 - Integer.numberOfLeadingZeros(pageSize);
+    return Long.SIZE - 1L - Long.numberOfLeadingZeros(pageSize);
   }
 
-  private static int validateAndCalculateChunkSize(int pageSize, int maxOrder) {
+  private static long validateAndCalculateChunkSize(long pageSize, int maxOrder) {
     if (maxOrder > 14) {
       throw new IllegalArgumentException("maxOrder: " + maxOrder + " (expected: 0-14)");
     }
 
     // Ensure the resulting chunkSize does not overflow.
-    int chunkSize = pageSize;
-    for (int i = maxOrder; i > 0; i--) {
+    long chunkSize = pageSize;
+    for (long i = maxOrder; i > 0; i--) {
       if (chunkSize > MAX_CHUNK_SIZE / 2) {
         throw new IllegalArgumentException(
             String.format(
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/SegmentRoundingPolicy.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/SegmentRoundingPolicy.java
index f501cfedd168d..89db736e6a0f9 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/SegmentRoundingPolicy.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/SegmentRoundingPolicy.java
@@ -16,6 +16,8 @@
  */
 package org.apache.arrow.memory.rounding;
 
+import com.google.errorprone.annotations.InlineMe;
+import org.apache.arrow.memory.util.LargeMemoryUtil;
 import org.apache.arrow.util.Preconditions;
 
 /** The rounding policy that each buffer size must a multiple of the segment size. */
@@ -28,7 +30,7 @@ public class SegmentRoundingPolicy implements RoundingPolicy {
    * The segment size. It must be at least {@link SegmentRoundingPolicy#MIN_SEGMENT_SIZE}, and be a
    * power of 2.
    */
-  private int segmentSize;
+  private long segmentSize;
 
   /**
    * Constructor for the segment rounding policy.
@@ -36,8 +38,22 @@ public class SegmentRoundingPolicy implements RoundingPolicy {
    * @param segmentSize the segment size.
    * @throws IllegalArgumentException if the segment size is smaller than {@link
    *     SegmentRoundingPolicy#MIN_SEGMENT_SIZE}, or is not a power of 2.
+   * @deprecated use {@link SegmentRoundingPolicy#SegmentRoundingPolicy(long)} instead.
    */
+  @Deprecated(forRemoval = true)
+  @InlineMe(replacement = "this((long) segmentSize)")
   public SegmentRoundingPolicy(int segmentSize) {
+    this((long) segmentSize);
+  }
+
+  /**
+   * Constructor for the segment rounding policy.
+   *
+   * @param segmentSize the segment size.
+   * @throws IllegalArgumentException if the segment size is smaller than {@link
+   *     SegmentRoundingPolicy#MIN_SEGMENT_SIZE}, or is not a power of 2.
+   */
+  public SegmentRoundingPolicy(long segmentSize) {
     Preconditions.checkArgument(
         segmentSize >= MIN_SEGMENT_SIZE,
         "The segment size cannot be smaller than %s",
@@ -52,7 +68,12 @@ public long getRoundedSize(long requestSize) {
     return (requestSize + (segmentSize - 1)) / segmentSize * segmentSize;
   }
 
+  @Deprecated(forRemoval = true)
   public int getSegmentSize() {
+    return LargeMemoryUtil.checkedCastToInt(segmentSize);
+  }
+
+  public long getSegmentSizeAsLong() {
     return segmentSize;
   }
 }
diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java
index a5fbc67c48f5c..87e9316964dfc 100644
--- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java
+++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java
@@ -315,7 +315,7 @@ public void testRootAllocator_createChildDontClose() throws Exception {
 
   @Test
   public void testSegmentAllocator() {
-    RoundingPolicy policy = new SegmentRoundingPolicy(1024);
+    RoundingPolicy policy = new SegmentRoundingPolicy(1024L);
     try (RootAllocator allocator =
         new RootAllocator(AllocationListener.NOOP, 1024 * 1024, policy)) {
       ArrowBuf buf = allocator.buffer(798);
@@ -334,7 +334,7 @@ public void testSegmentAllocator() {
 
   @Test
   public void testSegmentAllocator_childAllocator() {
-    RoundingPolicy policy = new SegmentRoundingPolicy(1024);
+    RoundingPolicy policy = new SegmentRoundingPolicy(1024L);
     try (RootAllocator allocator = new RootAllocator(AllocationListener.NOOP, 1024 * 1024, policy);
         BufferAllocator childAllocator = allocator.newChildAllocator("child", 0, 512 * 1024)) {
 
@@ -357,14 +357,14 @@ public void testSegmentAllocator_childAllocator() {
   @Test
   public void testSegmentAllocator_smallSegment() {
     IllegalArgumentException e =
-        assertThrows(IllegalArgumentException.class, () -> new SegmentRoundingPolicy(128));
+        assertThrows(IllegalArgumentException.class, () -> new SegmentRoundingPolicy(128L));
     assertEquals("The segment size cannot be smaller than 1024", e.getMessage());
   }
 
   @Test
   public void testSegmentAllocator_segmentSizeNotPowerOf2() {
     IllegalArgumentException e =
-        assertThrows(IllegalArgumentException.class, () -> new SegmentRoundingPolicy(4097));
+        assertThrows(IllegalArgumentException.class, () -> new SegmentRoundingPolicy(4097L));
     assertEquals("The segment size must be a power of 2", e.getMessage());
   }
 
@@ -957,7 +957,7 @@ public void testAllocator_unclaimedReservation() throws Exception {
       try (final BufferAllocator childAllocator1 =
           rootAllocator.newChildAllocator("unclaimedReservation", 0, MAX_ALLOCATION)) {
         try (final AllocationReservation reservation = childAllocator1.newReservation()) {
-          assertTrue(reservation.add(64));
+          assertTrue(reservation.add(64L));
         }
         rootAllocator.verify();
       }
@@ -972,8 +972,8 @@ public void testAllocator_claimedReservation() throws Exception {
           rootAllocator.newChildAllocator("claimedReservation", 0, MAX_ALLOCATION)) {
 
         try (final AllocationReservation reservation = childAllocator1.newReservation()) {
-          assertTrue(reservation.add(32));
-          assertTrue(reservation.add(32));
+          assertTrue(reservation.add(32L));
+          assertTrue(reservation.add(32L));
 
           final ArrowBuf arrowBuf = reservation.allocateBuffer();
           assertEquals(64, arrowBuf.capacity());
diff --git a/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java
index bdad3700cb311..9319d15aaa9a9 100644
--- a/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java
+++ b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java
@@ -38,7 +38,7 @@ public class NettyArrowBuf extends AbstractByteBuf implements AutoCloseable {
 
   private final ArrowBuf arrowBuf;
   private final ArrowByteBufAllocator arrowByteBufAllocator;
-  private int length;
+  private long length;
   private final long address;
 
   /**
@@ -47,10 +47,24 @@ public class NettyArrowBuf extends AbstractByteBuf implements AutoCloseable {
    * @param arrowBuf The buffer to wrap.
    * @param bufferAllocator The allocator for the buffer.
    * @param length The length of this buffer.
+   * @deprecated Use {@link #NettyArrowBuf(ArrowBuf, BufferAllocator, long)} instead.
    */
+  @Deprecated(forRemoval = true)
   public NettyArrowBuf(
       final ArrowBuf arrowBuf, final BufferAllocator bufferAllocator, final int length) {
-    super(length);
+    this(arrowBuf, bufferAllocator, (long) length);
+  }
+
+  /**
+   * Constructs a new instance.
+   *
+   * @param arrowBuf The buffer to wrap.
+   * @param bufferAllocator The allocator for the buffer.
+   * @param length The length of this buffer.
+   */
+  public NettyArrowBuf(
+      final ArrowBuf arrowBuf, final BufferAllocator bufferAllocator, final long length) {
+    super((int) length);
     this.arrowBuf = arrowBuf;
     this.arrowByteBufAllocator = new ArrowByteBufAllocator(bufferAllocator);
     this.length = length;
diff --git a/java/performance/src/main/java/org/apache/arrow/memory/AllocatorBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/memory/AllocatorBenchmarks.java
index f275090aae6bf..1154809cae753 100644
--- a/java/performance/src/main/java/org/apache/arrow/memory/AllocatorBenchmarks.java
+++ b/java/performance/src/main/java/org/apache/arrow/memory/AllocatorBenchmarks.java
@@ -57,9 +57,9 @@ public void defaultAllocatorBenchmark() {
   @BenchmarkMode(Mode.AverageTime)
   @OutputTimeUnit(TimeUnit.MICROSECONDS)
   public void segmentRoundingPolicyBenchmark() {
-    final int bufferSize = 1024;
+    final long bufferSize = 1024L;
     final int numBuffers = 1024;
-    final int segmentSize = 1024;
+    final long segmentSize = 1024L;
 
     RoundingPolicy policy = new SegmentRoundingPolicy(segmentSize);
     try (RootAllocator allocator =
diff --git a/java/pom.xml b/java/pom.xml
index 577f23e6a719c..c6b1876873f30 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -105,9 +105,9 @@ under the License.
     <dep.avro.version>1.12.0</dep.avro.version>
     <arrow.vector.classifier></arrow.vector.classifier>
     <forkCount>2</forkCount>
-    <checkstyle.version>10.17.0</checkstyle.version>
+    <checkstyle.version>10.18.1</checkstyle.version>
     <checkstyle.failOnViolation>true</checkstyle.failOnViolation>
-    <error_prone_core.version>2.30.0</error_prone_core.version>
+    <error_prone_core.version>2.31.0</error_prone_core.version>
     <mockito.core.version>5.11.0</mockito.core.version>
     <mockito.inline.version>5.2.0</mockito.inline.version>
     <checker.framework.version>3.46.0</checker.framework.version>
@@ -279,7 +279,7 @@ under the License.
     <dependency>
       <groupId>org.mockito</groupId>
       <artifactId>mockito-junit-jupiter</artifactId>
-      <version>5.12.0</version>
+      <version>5.13.0</version>
       <scope>test</scope>
     </dependency>
     <dependency>
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowDictionaryBatch.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowDictionaryBatch.java
index a704dbdd74eaa..cee76433ea4c7 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowDictionaryBatch.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowDictionaryBatch.java
@@ -21,7 +21,7 @@
 import org.apache.arrow.flatbuf.MessageHeader;
 
 /**
- * POJO wrapper around a Dictionary Batch IPC messages
+ * POJO wrapper around a Dictionary Batch IPC messages.
  * (https://arrow.apache.org/docs/format/IPC.html#dictionary-batches)
  */
 public class ArrowDictionaryBatch implements ArrowMessage {
diff --git a/matlab/src/cpp/arrow/matlab/api/visibility.h b/matlab/src/cpp/arrow/matlab/api/visibility.h
index 1570de06c4e17..9c5cc28565113 100644
--- a/matlab/src/cpp/arrow/matlab/api/visibility.h
+++ b/matlab/src/cpp/arrow/matlab/api/visibility.h
@@ -18,11 +18,11 @@
 #pragma once
 
 #if defined(_WIN32) || defined(__CYGWIN__)
-#ifdef ARROW_MATLAB_EXPORTING
-#define ARROW_MATLAB_EXPORT __declspec(dllexport)
-#else
-#define ARROW_MATLAB_EXPORT __declspec(dllimport)
-#endif
+#  ifdef ARROW_MATLAB_EXPORTING
+#    define ARROW_MATLAB_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_MATLAB_EXPORT __declspec(dllimport)
+#  endif
 #else  // Not Windows
-#define ARROW_MATLAB_EXPORT __attribute__((visibility("default")))
+#  define ARROW_MATLAB_EXPORT __attribute__((visibility("default")))
 #endif
diff --git a/python/.gitignore b/python/.gitignore
index ce7f065412728..fbc3b192433b9 100644
--- a/python/.gitignore
+++ b/python/.gitignore
@@ -17,6 +17,7 @@ Testing/
 *.cpp
 pyarrow/lib.h
 pyarrow/*_api.h
+pyarrow/_cuda.h
 pyarrow/_generated_version.py
 cython_debug
 
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index c2346750a196f..8e6922a912a32 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -983,6 +983,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
 
         CResult[vector[shared_ptr[CChunkedArray]]] Flatten(CMemoryPool* pool)
 
+        c_bool is_cpu() const
+
         CStatus Validate() const
         CStatus ValidateFull() const
 
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index 5c3d981c3adc7..1caf58e20e653 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -506,6 +506,8 @@ cdef class ChunkedArray(_PandasConvertible):
     cdef:
         shared_ptr[CChunkedArray] sp_chunked_array
         CChunkedArray* chunked_array
+        c_bool _is_cpu
+        c_bool _init_is_cpu
 
     cdef readonly:
         # To allow Table to propagate metadata to pandas.Series
@@ -516,7 +518,7 @@ cdef class ChunkedArray(_PandasConvertible):
 
 
 cdef class _Tabular(_PandasConvertible):
-    pass
+    cdef void _assert_cpu(self) except *
 
 
 cdef class Table(_Tabular):
diff --git a/python/pyarrow/src/arrow/python/datetime.h b/python/pyarrow/src/arrow/python/datetime.h
index 7346d6bc67791..9b21eeb434217 100644
--- a/python/pyarrow/src/arrow/python/datetime.h
+++ b/python/pyarrow/src/arrow/python/datetime.h
@@ -35,9 +35,9 @@
 // Instead, we redefine PyDateTimeAPI to point to a global variable,
 // which is initialized once by calling InitDatetime().
 #ifdef PYPY_VERSION
-#include "datetime.h"
+#  include "datetime.h"
 #else
-#define PyDateTimeAPI ::arrow::py::internal::datetime_api
+#  define PyDateTimeAPI ::arrow::py::internal::datetime_api
 #endif
 
 namespace arrow {
diff --git a/python/pyarrow/src/arrow/python/flight.h b/python/pyarrow/src/arrow/python/flight.h
index 82d93711e55fb..5243258495778 100644
--- a/python/pyarrow/src/arrow/python/flight.h
+++ b/python/pyarrow/src/arrow/python/flight.h
@@ -26,24 +26,24 @@
 #include "arrow/python/common.h"
 
 #if defined(_WIN32) || defined(__CYGWIN__)  // Windows
-#if defined(_MSC_VER)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
-
-#ifdef ARROW_PYTHON_STATIC
-#define ARROW_PYFLIGHT_EXPORT
-#elif defined(ARROW_PYFLIGHT_EXPORTING)
-#define ARROW_PYFLIGHT_EXPORT __declspec(dllexport)
-#else
-#define ARROW_PYFLIGHT_EXPORT __declspec(dllimport)
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
+
+#  ifdef ARROW_PYTHON_STATIC
+#    define ARROW_PYFLIGHT_EXPORT
+#  elif defined(ARROW_PYFLIGHT_EXPORTING)
+#    define ARROW_PYFLIGHT_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_PYFLIGHT_EXPORT __declspec(dllimport)
+#  endif
 
 #else  // Not Windows
-#ifndef ARROW_PYFLIGHT_EXPORT
-#define ARROW_PYFLIGHT_EXPORT __attribute__((visibility("default")))
-#endif
+#  ifndef ARROW_PYFLIGHT_EXPORT
+#    define ARROW_PYFLIGHT_EXPORT __attribute__((visibility("default")))
+#  endif
 #endif  // Non-Windows
 
 namespace arrow {
diff --git a/python/pyarrow/src/arrow/python/numpy_interop.h b/python/pyarrow/src/arrow/python/numpy_interop.h
index 7ea7d6e16f528..a83ae4a62b944 100644
--- a/python/pyarrow/src/arrow/python/numpy_interop.h
+++ b/python/pyarrow/src/arrow/python/numpy_interop.h
@@ -23,19 +23,19 @@
 
 // Don't use the deprecated Numpy functions
 #ifdef NPY_1_7_API_VERSION
-#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
+#  define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
 #else
-#define NPY_ARRAY_NOTSWAPPED NPY_NOTSWAPPED
-#define NPY_ARRAY_ALIGNED NPY_ALIGNED
-#define NPY_ARRAY_WRITEABLE NPY_WRITEABLE
-#define NPY_ARRAY_UPDATEIFCOPY NPY_UPDATEIFCOPY
+#  define NPY_ARRAY_NOTSWAPPED NPY_NOTSWAPPED
+#  define NPY_ARRAY_ALIGNED NPY_ALIGNED
+#  define NPY_ARRAY_WRITEABLE NPY_WRITEABLE
+#  define NPY_ARRAY_UPDATEIFCOPY NPY_UPDATEIFCOPY
 #endif
 
 // This is required to be able to access the NumPy C API properly in C++ files
 // other than init.cc.
 #define PY_ARRAY_UNIQUE_SYMBOL arrow_ARRAY_API
 #ifndef NUMPY_IMPORT_ARRAY
-#define NO_IMPORT_ARRAY
+#  define NO_IMPORT_ARRAY
 #endif
 
 #include <numpy/arrayobject.h>   // IWYU pragma: export
@@ -56,22 +56,22 @@
 // NPY_INT needs to be handled separately.
 
 #if NPY_BITSOF_LONG == 32 && NPY_BITSOF_LONGLONG == 64
-#define NPY_INT64_IS_LONG_LONG 1
+#  define NPY_INT64_IS_LONG_LONG 1
 #else
-#define NPY_INT64_IS_LONG_LONG 0
+#  define NPY_INT64_IS_LONG_LONG 0
 #endif
 
 #if NPY_BITSOF_INT == 32 && NPY_BITSOF_LONG == 64
-#define NPY_INT32_IS_INT 1
+#  define NPY_INT32_IS_INT 1
 #else
-#define NPY_INT32_IS_INT 0
+#  define NPY_INT32_IS_INT 0
 #endif
 
 // Backported NumPy 2 API (can be removed if numpy 2 is required)
 #if NPY_ABI_VERSION < 0x02000000
-#define PyDataType_ELSIZE(descr) ((descr)->elsize)
-#define PyDataType_C_METADATA(descr) ((descr)->c_metadata)
-#define PyDataType_FIELDS(descr) ((descr)->fields)
+#  define PyDataType_ELSIZE(descr) ((descr)->elsize)
+#  define PyDataType_C_METADATA(descr) ((descr)->c_metadata)
+#  define PyDataType_FIELDS(descr) ((descr)->fields)
 #endif
 
 namespace arrow {
diff --git a/python/pyarrow/src/arrow/python/parquet_encryption.h b/python/pyarrow/src/arrow/python/parquet_encryption.h
index a1aaa30e260f5..7a107c89f0bdc 100644
--- a/python/pyarrow/src/arrow/python/parquet_encryption.h
+++ b/python/pyarrow/src/arrow/python/parquet_encryption.h
@@ -27,24 +27,24 @@
 #include "parquet/encryption/kms_client_factory.h"
 
 #if defined(_WIN32) || defined(__CYGWIN__)  // Windows
-#if defined(_MSC_VER)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
-
-#ifdef ARROW_PYTHON_STATIC
-#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT
-#elif defined(ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORTING)
-#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __declspec(dllexport)
-#else
-#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __declspec(dllimport)
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
+
+#  ifdef ARROW_PYTHON_STATIC
+#    define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT
+#  elif defined(ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORTING)
+#    define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __declspec(dllimport)
+#  endif
 
 #else  // Not Windows
-#ifndef ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT
-#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __attribute__((visibility("default")))
-#endif
+#  ifndef ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT
+#    define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __attribute__((visibility("default")))
+#  endif
 #endif  // Non-Windows
 
 namespace arrow {
diff --git a/python/pyarrow/src/arrow/python/platform.h b/python/pyarrow/src/arrow/python/platform.h
index e71c7ac85399e..48758cd1c8468 100644
--- a/python/pyarrow/src/arrow/python/platform.h
+++ b/python/pyarrow/src/arrow/python/platform.h
@@ -29,13 +29,13 @@
 
 // Work around C2528 error
 #ifdef _MSC_VER
-#if _MSC_VER >= 1900
-#undef timezone
-#endif
+#  if _MSC_VER >= 1900
+#    undef timezone
+#  endif
 
 // https://bugs.python.org/issue36020
 // TODO(wjones127): Can remove once we drop support for CPython 3.9
-#ifdef snprintf
-#undef snprintf
-#endif
+#  ifdef snprintf
+#    undef snprintf
+#  endif
 #endif
diff --git a/python/pyarrow/src/arrow/python/visibility.h b/python/pyarrow/src/arrow/python/visibility.h
index dd43b32fd43ff..4bf9680a06bf0 100644
--- a/python/pyarrow/src/arrow/python/visibility.h
+++ b/python/pyarrow/src/arrow/python/visibility.h
@@ -18,22 +18,22 @@
 #pragma once
 
 #if defined(_WIN32) || defined(__CYGWIN__)  // Windows
-#if defined(_MSC_VER)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#ifdef ARROW_PYTHON_STATIC
-#define ARROW_PYTHON_EXPORT
-#elif defined(ARROW_PYTHON_EXPORTING)
-#define ARROW_PYTHON_EXPORT __declspec(dllexport)
-#else
-#define ARROW_PYTHON_EXPORT __declspec(dllimport)
-#endif
+#  ifdef ARROW_PYTHON_STATIC
+#    define ARROW_PYTHON_EXPORT
+#  elif defined(ARROW_PYTHON_EXPORTING)
+#    define ARROW_PYTHON_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_PYTHON_EXPORT __declspec(dllimport)
+#  endif
 
 #else  // Not Windows
-#ifndef ARROW_PYTHON_EXPORT
-#define ARROW_PYTHON_EXPORT __attribute__((visibility("default")))
-#endif
+#  ifndef ARROW_PYTHON_EXPORT
+#    define ARROW_PYTHON_EXPORT __attribute__((visibility("default")))
+#  endif
 #endif  // Non-Windows
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index fff47373cb991..3b0df981e017c 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -59,6 +59,7 @@ cdef class ChunkedArray(_PandasConvertible):
 
     def __cinit__(self):
         self.chunked_array = NULL
+        self._init_is_cpu = False
 
     def __init__(self):
         raise TypeError("Do not call ChunkedArray's constructor directly, use "
@@ -69,6 +70,7 @@ cdef class ChunkedArray(_PandasConvertible):
         self.chunked_array = chunked_array.get()
 
     def __reduce__(self):
+        self._assert_cpu()
         return chunked_array, (self.chunks, self.type)
 
     @property
@@ -198,6 +200,7 @@ cdef class ChunkedArray(_PandasConvertible):
         ArrowInvalid
         """
         if full:
+            self._assert_cpu()
             with nogil:
                 check_status(self.sp_chunked_array.get().ValidateFull())
         else:
@@ -220,6 +223,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.null_count
         1
         """
+        self._assert_cpu()
         return self.chunked_array.null_count()
 
     @property
@@ -245,6 +249,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.nbytes
         49
         """
+        self._assert_cpu()
         cdef:
             CResult[int64_t] c_res_buffer
 
@@ -271,6 +276,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.get_total_buffer_size()
         49
         """
+        self._assert_cpu()
         cdef:
             int64_t total_buffer_size
 
@@ -299,13 +305,14 @@ cdef class ChunkedArray(_PandasConvertible):
         -------
         value : Scalar (index) or ChunkedArray (slice)
         """
-
+        self._assert_cpu()
         if isinstance(key, slice):
             return _normalize_slice(self, key)
 
         return self.getitem(_normalize_index(key, self.chunked_array.length()))
 
     cdef getitem(self, int64_t i):
+        self._assert_cpu()
         return Scalar.wrap(GetResultValue(self.chunked_array.GetScalar(i)))
 
     def is_null(self, *, nan_is_null=False):
@@ -338,6 +345,7 @@ cdef class ChunkedArray(_PandasConvertible):
           ]
         ]
         """
+        self._assert_cpu()
         options = _pc().NullOptions(nan_is_null=nan_is_null)
         return _pc().call_function('is_null', [self], options)
 
@@ -363,6 +371,7 @@ cdef class ChunkedArray(_PandasConvertible):
           ]
         ]
         """
+        self._assert_cpu()
         return _pc().is_nan(self)
 
     def is_valid(self):
@@ -388,6 +397,7 @@ cdef class ChunkedArray(_PandasConvertible):
           ]
         ]
         """
+        self._assert_cpu()
         return _pc().is_valid(self)
 
     def __eq__(self, other):
@@ -430,6 +440,7 @@ cdef class ChunkedArray(_PandasConvertible):
           ]
         ]
         """
+        self._assert_cpu()
         return _pc().fill_null(self, fill_value)
 
     def equals(self, ChunkedArray other):
@@ -458,6 +469,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.equals(animals)
         False
         """
+        self._assert_cpu()
         if other is None:
             return False
 
@@ -472,6 +484,7 @@ cdef class ChunkedArray(_PandasConvertible):
         return result
 
     def _to_pandas(self, options, types_mapper=None, **kwargs):
+        self._assert_cpu()
         return _array_like_to_pandas(self, options, types_mapper=types_mapper)
 
     def to_numpy(self, zero_copy_only=False):
@@ -495,6 +508,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.to_numpy()
         array([  2,   2,   4,   4,   5, 100])
         """
+        self._assert_cpu()
         if np is None:
             raise ImportError(
                 "Cannot return a numpy.ndarray if NumPy is not present")
@@ -529,6 +543,7 @@ cdef class ChunkedArray(_PandasConvertible):
         return values
 
     def __array__(self, dtype=None, copy=None):
+        self._assert_cpu()
         if copy is False:
             raise ValueError(
                 "Unable to avoid a copy while creating a numpy array as requested "
@@ -574,6 +589,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs_seconds.type
         DurationType(duration[s])
         """
+        self._assert_cpu()
         return _pc().cast(self, target_type, safe=safe, options=options)
 
     def dictionary_encode(self, null_encoding='mask'):
@@ -636,6 +652,7 @@ cdef class ChunkedArray(_PandasConvertible):
             ]
         ]
         """
+        self._assert_cpu()
         options = _pc().DictionaryEncodeOptions(null_encoding)
         return _pc().call_function('dictionary_encode', [self], options)
 
@@ -700,6 +717,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.type
         DataType(int64)
         """
+        self._assert_cpu()
         cdef:
             vector[shared_ptr[CChunkedArray]] flattened
             CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
@@ -751,6 +769,7 @@ cdef class ChunkedArray(_PandasConvertible):
           100
         ]
         """
+        self._assert_cpu()
         if self.num_chunks == 0:
             return array([], type=self.type)
         else:
@@ -791,6 +810,7 @@ cdef class ChunkedArray(_PandasConvertible):
           100
         ]
         """
+        self._assert_cpu()
         return _pc().call_function('unique', [self])
 
     def value_counts(self):
@@ -837,6 +857,7 @@ cdef class ChunkedArray(_PandasConvertible):
             1
           ]
         """
+        self._assert_cpu()
         return _pc().call_function('value_counts', [self])
 
     def slice(self, offset=0, length=None):
@@ -959,6 +980,7 @@ cdef class ChunkedArray(_PandasConvertible):
           ]
         ]
         """
+        self._assert_cpu()
         return _pc().filter(self, mask, null_selection_behavior)
 
     def index(self, value, start=None, end=None, *, memory_pool=None):
@@ -1006,6 +1028,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.index(4, start=3)
         <pyarrow.Int64Scalar: 3>
         """
+        self._assert_cpu()
         return _pc().index(self, value, start, end, memory_pool=memory_pool)
 
     def take(self, object indices):
@@ -1052,6 +1075,7 @@ cdef class ChunkedArray(_PandasConvertible):
           ]
         ]
         """
+        self._assert_cpu()
         return _pc().take(self, indices)
 
     def drop_null(self):
@@ -1091,6 +1115,7 @@ cdef class ChunkedArray(_PandasConvertible):
           ]
         ]
         """
+        self._assert_cpu()
         return _pc().drop_null(self)
 
     def sort(self, order="ascending", **kwargs):
@@ -1110,6 +1135,7 @@ cdef class ChunkedArray(_PandasConvertible):
         -------
         result : ChunkedArray
         """
+        self._assert_cpu()
         indices = _pc().sort_indices(
             self,
             options=_pc().SortOptions(sort_keys=[("", order)], **kwargs)
@@ -1209,6 +1235,7 @@ cdef class ChunkedArray(_PandasConvertible):
             ]
         ]
         """
+        self._assert_cpu()
         cdef:
             CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
             shared_ptr[CChunkedArray] c_result
@@ -1333,6 +1360,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.to_pylist()
         [2, 2, 4, 4, None, 100]
         """
+        self._assert_cpu()
         result = []
         for i in range(self.num_chunks):
             result += self.chunk(i).to_pylist()
@@ -1354,6 +1382,7 @@ cdef class ChunkedArray(_PandasConvertible):
         PyCapsule
             A capsule containing a C ArrowArrayStream struct.
         """
+        self._assert_cpu()
         cdef:
             ChunkedArray chunked
             ArrowArrayStream* c_stream = NULL
@@ -1410,6 +1439,20 @@ cdef class ChunkedArray(_PandasConvertible):
         self.init(c_chunked_array)
         return self
 
+    @property
+    def is_cpu(self):
+        """
+        Whether all chunks in the ChunkedArray are CPU-accessible.
+        """
+        if not self._init_is_cpu:
+            self._is_cpu = self.chunked_array.is_cpu()
+            self._init_is_cpu = True
+        return self._is_cpu
+
+    def _assert_cpu(self):
+        if not self.is_cpu:
+            raise NotImplementedError("Implemented only for data on CPU device")
+
 
 def chunked_array(arrays, type=None):
     """
@@ -1574,6 +1617,7 @@ cdef class _Tabular(_PandasConvertible):
                         f"one of the `{self.__class__.__name__}.from_*` functions instead.")
 
     def __array__(self, dtype=None, copy=None):
+        self._assert_cpu()
         if copy is False:
             raise ValueError(
                 "Unable to avoid a copy while creating a numpy array as requested "
@@ -1827,6 +1871,7 @@ cdef class _Tabular(_PandasConvertible):
         n_legs: [[4,100]]
         animals: [["Horse","Centipede"]]
         """
+        self._assert_cpu()
         return _pc().drop_null(self)
 
     def field(self, i):
@@ -2088,6 +2133,7 @@ cdef class _Tabular(_PandasConvertible):
         n_legs: [[5,100,4,2,4,2]]
         animal: [["Brittle stars","Centipede","Dog","Flamingo","Horse","Parrot"]]
         """
+        self._assert_cpu()
         if isinstance(sorting, str):
             sorting = [(sorting, "ascending")]
 
@@ -2133,6 +2179,7 @@ cdef class _Tabular(_PandasConvertible):
         n_legs: [[4,100]]
         animals: [["Horse","Centipede"]]
         """
+        self._assert_cpu()
         return _pc().take(self, indices)
 
     def filter(self, mask, object null_selection_behavior="drop"):
@@ -2202,6 +2249,7 @@ cdef class _Tabular(_PandasConvertible):
         n_legs: [[2,4,null]]
         animals: [["Flamingo","Horse",null]]
         """
+        self._assert_cpu()
         if isinstance(mask, _pc().Expression):
             return _pac()._filter_table(self, mask)
         else:
@@ -2402,6 +2450,9 @@ cdef class _Tabular(_PandasConvertible):
         """
         return self.add_column(self.num_columns, field_, column)
 
+    cdef void _assert_cpu(self) except *:
+        return
+
 
 cdef class RecordBatch(_Tabular):
     """
@@ -2512,6 +2563,7 @@ cdef class RecordBatch(_Tabular):
         return self.batch != NULL
 
     def __reduce__(self):
+        self._assert_cpu()
         return _reconstruct_record_batch, (self.columns, self.schema)
 
     def validate(self, *, full=False):
@@ -2531,6 +2583,7 @@ cdef class RecordBatch(_Tabular):
         ArrowInvalid
         """
         if full:
+            self._assert_cpu()
             with nogil:
                 check_status(self.batch.ValidateFull())
         else:
@@ -2697,6 +2750,7 @@ cdef class RecordBatch(_Tabular):
         >>> batch.nbytes
         116
         """
+        self._assert_cpu()
         cdef:
             CResult[int64_t] c_res_buffer
 
@@ -2726,6 +2780,7 @@ cdef class RecordBatch(_Tabular):
         >>> batch.get_total_buffer_size()
         120
         """
+        self._assert_cpu()
         cdef:
             int64_t total_buffer_size
 
@@ -2792,12 +2847,19 @@ cdef class RecordBatch(_Tabular):
             shared_ptr[CRecordBatch] c_batch
             Field c_field
             Array c_arr
+            CDeviceAllocationType device_type = self.sp_batch.get().device_type()
 
         if isinstance(column, Array):
             c_arr = column
         else:
             c_arr = array(column)
 
+        if device_type != c_arr.sp_array.get().device_type():
+            raise TypeError("The column must be allocated on the same "
+                            "device as the RecordBatch. Got column on "
+                            f"device {c_arr.device_type!r}, but expected "
+                            f"{self.device_type!r}.")
+
         if isinstance(field_, Field):
             c_field = field_
         else:
@@ -2885,12 +2947,19 @@ cdef class RecordBatch(_Tabular):
             shared_ptr[CRecordBatch] c_batch
             Field c_field
             Array c_arr
+            CDeviceAllocationType device_type = self.sp_batch.get().device_type()
 
         if isinstance(column, Array):
             c_arr = column
         else:
             c_arr = array(column)
 
+        if device_type != c_arr.sp_array.get().device_type():
+            raise TypeError("The column must be allocated on the same "
+                            "device as the RecordBatch. Got column on "
+                            f"device {c_arr.device_type!r}, but expected "
+                            f"{self.device_type!r}.")
+
         if isinstance(field_, Field):
             c_field = field_
         else:
@@ -3016,6 +3085,7 @@ cdef class RecordBatch(_Tabular):
         n_legs: [2,2,4,4,5,100]
         animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
         """
+        self._assert_cpu()
         cdef shared_ptr[CBuffer] buffer
         cdef CIpcWriteOptions options = CIpcWriteOptions.Defaults()
         options.memory_pool = maybe_unbox_memory_pool(memory_pool)
@@ -3117,6 +3187,7 @@ cdef class RecordBatch(_Tabular):
         >>> batch.equals(batch_1, check_metadata=True)
         False
         """
+        self._assert_cpu()
         cdef:
             CRecordBatch* this_batch = self.batch
             shared_ptr[CRecordBatch] other_batch = pyarrow_unwrap_batch(other)
@@ -3248,6 +3319,7 @@ cdef class RecordBatch(_Tabular):
         return RecordBatch.from_arrays(newcols, schema=target_schema)
 
     def _to_pandas(self, options, **kwargs):
+        self._assert_cpu()
         return Table.from_batches([self])._to_pandas(options, **kwargs)
 
     @classmethod
@@ -3473,6 +3545,8 @@ cdef class RecordBatch(_Tabular):
         """
         cdef:
             shared_ptr[CRecordBatch] c_record_batch
+        if struct_array.sp_array.get().device_type() != CDeviceAllocationType_kCPU:
+            raise NotImplementedError("Implemented only for data on CPU device")
         with nogil:
             c_record_batch = GetResultValue(
                 CRecordBatch.FromStructArray(struct_array.sp_array))
@@ -3482,6 +3556,7 @@ cdef class RecordBatch(_Tabular):
         """
         Convert to a struct array.
         """
+        self._assert_cpu()
         cdef:
             shared_ptr[CRecordBatch] c_record_batch
             shared_ptr[CArray] c_array
@@ -3560,6 +3635,7 @@ cdef class RecordBatch(_Tabular):
                [ 4., 40.],
                [nan, nan]])
         """
+        self._assert_cpu()
         cdef:
             shared_ptr[CRecordBatch] c_record_batch
             shared_ptr[CTensor] c_tensor
@@ -3686,6 +3762,7 @@ cdef class RecordBatch(_Tabular):
             A pair of PyCapsules containing a C ArrowSchema and ArrowArray,
             respectively.
         """
+        self._assert_cpu()
         cdef:
             ArrowArray* c_array
             ArrowSchema* c_schema
@@ -3731,6 +3808,7 @@ cdef class RecordBatch(_Tabular):
         -------
         PyCapsule
         """
+        self._assert_cpu()
         return Table.from_batches([self]).__arrow_c_stream__(requested_schema)
 
     @staticmethod
@@ -3943,6 +4021,10 @@ cdef class RecordBatch(_Tabular):
         """
         return self.device_type == DeviceAllocationType.CPU
 
+    cdef void _assert_cpu(self) except *:
+        if self.sp_batch.get().device_type() != CDeviceAllocationType_kCPU:
+            raise NotImplementedError("Implemented only for data on CPU device")
+
 
 def _reconstruct_record_batch(columns, schema):
     """
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index 3b60cff2d8cf2..c3f805b4b32d6 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -27,6 +27,7 @@
 import pytest
 import pyarrow as pa
 import pyarrow.compute as pc
+from pyarrow.interchange import from_dataframe
 from pyarrow.vendored.version import Version
 
 
@@ -3374,3 +3375,365 @@ def test_invalid_non_join_column():
     with pytest.raises(pa.lib.ArrowInvalid) as excinfo:
         t2.join(t1, 'id', join_type='inner')
     assert exp_error_msg in str(excinfo.value)
+
+
+@pytest.fixture
+def cuda_context():
+    cuda = pytest.importorskip("pyarrow.cuda")
+    return cuda.Context(0)
+
+
+@pytest.fixture
+def schema():
+    return pa.schema([pa.field('c0', pa.int32()), pa.field('c1', pa.int32())])
+
+
+@pytest.fixture
+def cpu_arrays(schema):
+    return [pa.array([1, 2, 3, 4, 5], schema.field(0).type),
+            pa.array([-10, -5, 0, None, 10], schema.field(1).type)]
+
+
+@pytest.fixture
+def cuda_arrays(cuda_context, cpu_arrays):
+    return [arr.copy_to(cuda_context.memory_manager) for arr in cpu_arrays]
+
+
+@pytest.fixture
+def cpu_chunked_array(cpu_arrays):
+    chunked_array = pa.chunked_array(cpu_arrays)
+    assert chunked_array.is_cpu is True
+    return chunked_array
+
+
+@pytest.fixture
+def cuda_chunked_array(cuda_arrays):
+    chunked_array = pa.chunked_array(cuda_arrays)
+    assert chunked_array.is_cpu is False
+    return chunked_array
+
+
+@pytest.fixture
+def cpu_and_cuda_chunked_array(cpu_arrays, cuda_arrays):
+    chunked_array = pa.chunked_array(cpu_arrays + cuda_arrays)
+    assert chunked_array.is_cpu is False
+    return chunked_array
+
+
+@pytest.fixture
+def cpu_recordbatch(cpu_arrays, schema):
+    return pa.record_batch(cpu_arrays, schema=schema)
+
+
+@pytest.fixture
+def cuda_recordbatch(cuda_context, cpu_recordbatch):
+    return cpu_recordbatch.copy_to(cuda_context.memory_manager)
+
+
+def test_chunked_array_non_cpu(cuda_context, cpu_chunked_array, cuda_chunked_array,
+                               cpu_and_cuda_chunked_array):
+    # type test
+    assert cuda_chunked_array.type == cpu_chunked_array.type
+
+    # length() test
+    assert cuda_chunked_array.length() == cpu_chunked_array.length()
+
+    # str() test
+    assert str(cuda_chunked_array) == str(cpu_chunked_array)
+
+    # repr() test
+    assert str(cuda_chunked_array) in repr(cuda_chunked_array)
+
+    # validate() test
+    cuda_chunked_array.validate()
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.validate(full=True)
+
+    # null_count test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.null_count
+
+    # nbytes() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.nbytes
+
+    # get_total_buffer_size() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.get_total_buffer_size()
+
+    # getitem() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array[0]
+
+    # is_null() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.is_null()
+
+    # is_nan() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.is_nan()
+
+    # is_valid() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.is_valid()
+
+    # fill_null() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.fill_null(0)
+
+    # equals() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array == cuda_chunked_array
+
+    # to_pandas() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.to_pandas()
+
+    # to_numpy() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.to_numpy()
+
+    # __array__() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.__array__()
+
+    # cast() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.cast()
+
+    # dictionary_encode() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.dictionary_encode()
+
+    # flatten() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.flatten()
+
+    # combine_chunks() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.combine_chunks()
+
+    # unique() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.unique()
+
+    # value_counts() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.value_counts()
+
+    # filter() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.filter([True, False, True, False, True])
+
+    # index() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.index(5)
+
+    # slice() test
+    cuda_chunked_array.slice(2, 2)
+
+    # take() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.take([1])
+
+    # drop_null() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.drop_null()
+
+    # sort() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.sort()
+
+    # unify_dictionaries() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.unify_dictionaries()
+
+    # num_chunks test
+    assert cuda_chunked_array.num_chunks == cpu_chunked_array.num_chunks
+
+    # chunks test
+    assert len(cuda_chunked_array.chunks) == len(cpu_chunked_array.chunks)
+
+    # chunk() test
+    chunk = cuda_chunked_array.chunk(0)
+    assert chunk.device_type == pa.DeviceAllocationType.CUDA
+
+    # to_pylist() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.to_pylist()
+
+    # __arrow_c_stream__() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.__arrow_c_stream__()
+
+    # __reduce__() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.__reduce__()
+
+
+def verify_cuda_recordbatch(batch, expected_schema):
+    batch.validate()
+    assert batch.device_type == pa.DeviceAllocationType.CUDA
+    assert batch.is_cpu is False
+    assert batch.num_columns == len(expected_schema.names)
+    assert batch.column_names == expected_schema.names
+    assert str(batch) in repr(batch)
+    for c in batch.columns:
+        assert c.device_type == pa.DeviceAllocationType.CUDA
+    assert batch.schema == expected_schema
+
+
+def test_recordbatch_non_cpu(cuda_context, cpu_recordbatch, cuda_recordbatch,
+                             cuda_arrays, schema):
+    verify_cuda_recordbatch(cuda_recordbatch, expected_schema=schema)
+    assert cuda_recordbatch.shape == (5, 2)
+
+    # columns() test
+    assert len(cuda_recordbatch.columns) == 2
+
+    # add_column(), set_column() test
+    for fn in [cuda_recordbatch.add_column, cuda_recordbatch.set_column]:
+        col = pa.array([6, 7, 8, 9, 10], pa.int8()).copy_to(cuda_context.memory_manager)
+        new_batch = fn(2, 'c2', col)
+        assert len(new_batch.columns) == 3
+        for c in new_batch.columns:
+            assert c.device_type == pa.DeviceAllocationType.CUDA
+        err_msg = ("Got column on device <DeviceAllocationType.CPU: 1>, "
+                   "but expected <DeviceAllocationType.CUDA: 2>.")
+        with pytest.raises(TypeError, match=err_msg):
+            fn(2, 'c2', [1, 1, 1, 1, 1])
+
+    # remove_column() test
+    new_batch = cuda_recordbatch.remove_column(1)
+    verify_cuda_recordbatch(new_batch, expected_schema=schema.remove(1))
+
+    # drop_columns() test
+    new_batch = cuda_recordbatch.drop_columns(['c0', 'c1'])
+    assert len(new_batch.columns) == 0
+    assert new_batch.device_type == pa.DeviceAllocationType.CUDA
+
+    # select() test
+    new_batch = cuda_recordbatch.select(['c0'])
+    verify_cuda_recordbatch(new_batch, expected_schema=schema.remove(1))
+
+    # cast() test
+    new_schema = pa.schema([pa.field('c0', pa.int64()), pa.field('c1', pa.int64())])
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.cast(new_schema)
+
+    # drop_null() test
+    null_col = pa.array([-2, -1, 0, 1, 2],
+                        mask=[True, False, True, False, True]).copy_to(
+        cuda_context.memory_manager)
+    cuda_recordbatch_with_nulls = cuda_recordbatch.add_column(2, 'c2', null_col)
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch_with_nulls.drop_null()
+
+    # filter() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.filter([True] * 5)
+
+    # take() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.take([0])
+
+    # sort_by() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.sort_by('c0')
+
+    # field() test
+    assert cuda_recordbatch.field(0) == schema.field(0)
+    assert cuda_recordbatch.field(1) == schema.field(1)
+
+    # equals() test
+    new_batch = cpu_recordbatch.copy_to(cuda_context.memory_manager)
+    with pytest.raises(NotImplementedError):
+        assert cuda_recordbatch.equals(new_batch) is True
+
+    # from_arrays() test
+    new_batch = pa.RecordBatch.from_arrays(cuda_arrays, ['c0', 'c1'])
+    verify_cuda_recordbatch(new_batch, expected_schema=schema)
+    assert new_batch.copy_to(pa.default_cpu_memory_manager()).equals(cpu_recordbatch)
+
+    # from_pydict() test
+    new_batch = pa.RecordBatch.from_pydict({'c0': cuda_arrays[0], 'c1': cuda_arrays[1]})
+    verify_cuda_recordbatch(new_batch, expected_schema=schema)
+    assert new_batch.copy_to(pa.default_cpu_memory_manager()).equals(cpu_recordbatch)
+
+    # from_struct_array() test
+    fields = [schema.field(i) for i in range(len(schema.names))]
+    struct_array = pa.StructArray.from_arrays(cuda_arrays, fields=fields)
+    with pytest.raises(NotImplementedError):
+        pa.RecordBatch.from_struct_array(struct_array)
+
+    # nbytes test
+    with pytest.raises(NotImplementedError):
+        assert cuda_recordbatch.nbytes
+
+    # get_total_buffer_size() test
+    with pytest.raises(NotImplementedError):
+        assert cuda_recordbatch.get_total_buffer_size()
+
+    # to_pydict() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.to_pydict()
+
+    # to_pylist() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.to_pylist()
+
+    # to_pandas() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.to_pandas()
+
+    # to_tensor() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.to_tensor()
+
+    # to_struct_array() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.to_struct_array()
+
+    # serialize() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.serialize()
+
+    # slice() test
+    new_batch = cuda_recordbatch.slice(1, 3)
+    verify_cuda_recordbatch(new_batch, expected_schema=schema)
+    assert new_batch.num_rows == 3
+    cpu_batch = new_batch.copy_to(pa.default_cpu_memory_manager())
+    assert cpu_batch == cpu_recordbatch.slice(1, 3)
+
+    # replace_schema_metadata() test
+    new_batch = cuda_recordbatch.replace_schema_metadata({b'key': b'value'})
+    verify_cuda_recordbatch(new_batch, expected_schema=schema)
+    assert new_batch.schema.metadata == {b'key': b'value'}
+
+    # rename_columns() test
+    new_batch = cuda_recordbatch.rename_columns(['col0', 'col1'])
+    expected_schema = pa.schema(
+        [pa.field('col0', schema.field(0).type),
+         pa.field('col1', schema.field(1).type)])
+    verify_cuda_recordbatch(new_batch, expected_schema=expected_schema)
+
+    # validate() test
+    cuda_recordbatch.validate()
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.validate(full=True)
+
+    # __array__() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.__array__()
+
+    # __arrow_c_array__() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.__arrow_c_array__()
+
+    # __arrow_c_stream__() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.__arrow_c_stream__()
+
+    # __dataframe__() test
+    with pytest.raises(NotImplementedError):
+        from_dataframe(cuda_recordbatch.__dataframe__())
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 8ece65dd467bb..7c3fcae5cb306 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -74,7 +74,8 @@ zip-safe=false
 include-package-data=true
 
 [tool.setuptools.packages.find]
-where = ["."]
+include = ["pyarrow"]
+namespaces = false
 
 [tool.setuptools.package-data]
 pyarrow = ["*.pxd", "*.pyx", "includes/*.pxd"]
diff --git a/python/setup.py b/python/setup.py
index d3ef3a091467c..60b9a696d9785 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -32,7 +32,7 @@
     # Get correct EXT_SUFFIX on Windows (https://bugs.python.org/issue39825)
     from distutils import sysconfig
 
-from setuptools import setup, Extension, Distribution, find_namespace_packages
+from setuptools import setup, Extension, Distribution
 
 from Cython.Distutils import build_ext as _build_ext
 import Cython
@@ -396,21 +396,7 @@ def has_ext_modules(foo):
         return True
 
 
-if strtobool(os.environ.get('PYARROW_INSTALL_TESTS', '1')):
-    packages = find_namespace_packages(include=['pyarrow*'])
-    exclude_package_data = {}
-else:
-    packages = find_namespace_packages(include=['pyarrow*'],
-                                       exclude=["pyarrow.tests*"])
-    # setuptools adds back importable packages even when excluded.
-    # https://github.com/pypa/setuptools/issues/3260
-    # https://github.com/pypa/setuptools/issues/3340#issuecomment-1219383976
-    exclude_package_data = {"pyarrow": ["tests*"]}
-
-
 setup(
-    packages=packages,
-    exclude_package_data=exclude_package_data,
     distclass=BinaryDistribution,
     # Dummy extension to trigger build_ext
     ext_modules=[Extension('__dummy__', sources=[])],