diff --git a/.clang-format b/.clang-format
index 9448dc8d8c80d..abd823c103904 100644
--- a/.clang-format
+++ b/.clang-format
@@ -19,3 +19,4 @@ BasedOnStyle: Google
 ColumnLimit: 90
 DerivePointerAlignment: false
 IncludeBlocks: Preserve
+IndentPPDirectives: AfterHash
diff --git a/.dockerignore b/.dockerignore
index 3791cca95e3fe..1f1715d8e833d 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -27,11 +27,11 @@
 # include explicitly
 !ci/**
 !c_glib/Gemfile
-!dev/archery/setup.py
 !dev/release/setup-*.sh
 !docs/requirements*.txt
+!go/go.mod
+!go/go.sum
 !python/requirements*.txt
-!python/manylinux1/**
 !r/DESCRIPTION
 !ruby/Gemfile
 !ruby/red-arrow/Gemfile
@@ -46,20 +46,3 @@
 !ruby/red-parquet/Gemfile
 !ruby/red-parquet/lib/parquet/version.rb
 !ruby/red-parquet/red-parquet.gemspec
-!ruby/red-plasma/Gemfile
-!ruby/red-plasma/lib/plasma/version.rb
-!ruby/red-plasma/red-plasma.gemspec
-!rust/Cargo.toml
-!rust/benchmarks/Cargo.toml
-!rust/arrow/Cargo.toml
-!rust/arrow/benches
-!rust/arrow-flight/Cargo.toml
-!rust/parquet/Cargo.toml
-!rust/parquet/build.rs
-!rust/parquet_derive/Cargo.toml
-!rust/parquet_derive_test/Cargo.toml
-!rust/datafusion/Cargo.toml
-!rust/datafusion/benches
-!rust/integration-testing/Cargo.toml
-!go/go.mod
-!go/go.sum
\ No newline at end of file
diff --git a/.env b/.env
index 21f904c3208f6..c8c236d5ac44b 100644
--- a/.env
+++ b/.env
@@ -58,8 +58,8 @@ CUDA=11.2.2
 DASK=latest
 DOTNET=8.0
 GCC_VERSION=""
-GO=1.21.8
-STATICCHECK=v0.4.7
+GO=1.22.6
+STATICCHECK=v0.5.1
 HDFS=3.2.1
 JDK=11
 KARTOTHEK=latest
@@ -71,6 +71,7 @@ NUMBA=latest
 NUMPY=latest
 PANDAS=latest
 PYTHON=3.8
+PYTHON_IMAGE_TAG=3.8
 R=4.4
 SPARK=master
 TURBODBC=latest
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 7d9ff2f42e887..7ba9744ef005d 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -24,13 +24,6 @@ updates:
     commit-message:
       prefix: "MINOR: [CI] "
     open-pull-requests-limit: 10
-  - package-ecosystem: "gomod"
-    directory: "/go/"
-    schedule:
-      interval: "weekly"
-    commit-message:
-      prefix: "MINOR: [Go] "
-    open-pull-requests-limit: 10
   - package-ecosystem: "maven"
     directory: "/java/"
     schedule:
diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml
index b016f7d11b9fa..e448209056d78 100644
--- a/.github/workflows/archery.yml
+++ b/.github/workflows/archery.yml
@@ -20,12 +20,14 @@ name: Archery & Crossbow
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/archery.yml'
       - 'dev/archery/**'
       - 'dev/tasks/**'
       - 'docker-compose.yml'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/archery.yml'
       - 'dev/archery/**'
       - 'dev/tasks/**'
@@ -58,7 +60,7 @@ jobs:
         shell: bash
         run: git branch $ARCHERY_DEFAULT_BRANCH origin/$ARCHERY_DEFAULT_BRANCH || true
       - name: Setup Python
-        uses: actions/setup-python@v5.1.1
+        uses: actions/setup-python@v5.2.0
         with:
           python-version: '3.9'
       - name: Install pygit2 binary wheel
diff --git a/.github/workflows/comment_bot.yml b/.github/workflows/comment_bot.yml
index 1138c0a02f812..b7af4c5800835 100644
--- a/.github/workflows/comment_bot.yml
+++ b/.github/workflows/comment_bot.yml
@@ -41,7 +41,7 @@ jobs:
           # fetch the tags for version number generation
           fetch-depth: 0
       - name: Set up Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Install Archery and Crossbow dependencies
diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index a82e1eb76660b..f5c8b6a7201be 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -20,6 +20,7 @@ name: C++
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/cpp.yml'
       - 'ci/conda_env_*'
       - 'ci/docker/**'
@@ -35,6 +36,7 @@ on:
       - 'testing'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/cpp.yml'
       - 'ci/conda_env_*'
       - 'ci/docker/**'
@@ -99,7 +101,6 @@ jobs:
             cat <<JSON >> "$GITHUB_OUTPUT"
           {
             "arch": "arm64v8",
-            "archery-use-legacy-docker-compose": "1",
             "clang-tools": "10",
             "image": "ubuntu-cpp",
             "llvm": "10",
@@ -124,9 +125,6 @@ jobs:
         include: ${{ fromJson(needs.docker-targets.outputs.targets) }}
     env:
       ARCH: ${{ matrix.arch }}
-      # By default, use `docker compose` because docker-compose v1 is obsolete,
-      # except where the Docker client version is too old.
-      ARCHERY_USE_LEGACY_DOCKER_COMPOSE: ${{ matrix.archery-use-legacy-docker-compose || '0' }}
       ARROW_SIMD_LEVEL: ${{ matrix.simd-level }}
       CLANG_TOOLS: ${{ matrix.clang-tools }}
       LLVM: ${{ matrix.llvm }}
@@ -147,6 +145,7 @@ jobs:
         run: |
           sudo apt update
           sudo apt install -y --no-install-recommends python3 python3-dev python3-pip
+          python3 -m pip install -U pip
       - name: Setup Archery
         run: python3 -m pip install -e dev/archery[docker]
       - name: Execute Docker Build
@@ -156,8 +155,7 @@ jobs:
         run: |
           # GH-40558: reduce ASLR to avoid ASAN/LSAN crashes
           sudo sysctl -w vm.mmap_rnd_bits=28
-          sudo sysctl -w kernel.core_pattern="core.%e.%p"
-          ulimit -c unlimited
+          source ci/scripts/util_enable_core_dumps.sh
           archery docker run ${{ matrix.image }}
       - name: Docker Push
         if: >-
@@ -246,7 +244,7 @@ jobs:
           $(brew --prefix bash)/bin/bash \
             ci/scripts/install_minio.sh latest ${ARROW_HOME}
       - name: Set up Python
-        uses: actions/setup-python@v5.1.1
+        uses: actions/setup-python@v5.2.0
         with:
           python-version: 3.12
       - name: Install Google Cloud Storage Testbench
@@ -273,7 +271,7 @@ jobs:
         shell: bash
         run: |
           sudo sysctl -w kern.coredump=1
-          sudo sysctl -w kern.corefile=core.%N.%P
+          sudo sysctl -w kern.corefile=/tmp/core.%N.%P
           ulimit -c unlimited  # must enable within the same shell
           ci/scripts/cpp_test.sh $(pwd) $(pwd)/build
 
@@ -412,12 +410,10 @@ jobs:
       ARROW_WITH_SNAPPY: ON
       ARROW_WITH_ZLIB: ON
       ARROW_WITH_ZSTD: ON
-      # Don't use preinstalled Boost by empty BOOST_ROOT and
-      # -DBoost_NO_BOOST_CMAKE=ON
+      # Don't use preinstalled Boost by empty BOOST_ROOT
       BOOST_ROOT: ""
       ARROW_CMAKE_ARGS: >-
         -DARROW_PACKAGE_PREFIX=/${{ matrix.msystem_lower}}
-        -DBoost_NO_BOOST_CMAKE=ON
         -DCMAKE_FIND_PACKAGE_PREFER_CONFIG=ON
       # We can't use unity build because we don't have enough memory on
       # GitHub Actions.
@@ -467,16 +463,18 @@ jobs:
             https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2022-05-26T05-48-41Z
           chmod +x /usr/local/bin/minio.exe
       - name: Set up Python
-        uses: actions/setup-python@v5.1.1
+        uses: actions/setup-python@v5.2.0
+        id: python-install
         with:
           python-version: 3.9
       - name: Install Google Cloud Storage Testbench
-        shell: bash
+        shell: msys2 {0}
+        env:
+          PIPX_BIN_DIR: /usr/local/bin
+          PIPX_BASE_PYTHON: ${{ steps.python-install.outputs.python-path }}
         run: |
           ci/scripts/install_gcs_testbench.sh default
-          echo "PYTHON_BIN_DIR=$(cygpath --windows $(dirname $(which python3.exe)))" >> $GITHUB_ENV
       - name: Test
         shell: msys2 {0}
         run: |
-          PATH="$(cygpath --unix ${PYTHON_BIN_DIR}):${PATH}"
           ci/scripts/cpp_test.sh "$(pwd)" "$(pwd)/build"
diff --git a/.github/workflows/csharp.yml b/.github/workflows/csharp.yml
index 6e8548dc960f4..c618350affbeb 100644
--- a/.github/workflows/csharp.yml
+++ b/.github/workflows/csharp.yml
@@ -108,7 +108,7 @@ jobs:
         with:
           dotnet-version: ${{ matrix.dotnet }}
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Checkout Arrow
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index cc3ff6330746d..3879a045fd239 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -45,7 +45,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Install pre-commit
@@ -67,8 +67,7 @@ jobs:
           ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
           ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
         run: |
-          sudo sysctl -w kernel.core_pattern="core.%e.%p"
-          ulimit -c unlimited
+          source ci/scripts/util_enable_core_dumps.sh
           archery docker run -e GITHUB_ACTIONS=true ubuntu-lint
       - name: Docker Push
         if: >-
@@ -104,7 +103,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Install Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: '3.12'
       - name: Install Ruby
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 25db1c39ad89e..1219f7526f9f2 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -52,7 +52,7 @@ jobs:
           key: debian-docs-${{ hashFiles('cpp/**') }}
           restore-keys: debian-docs-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Setup Archery
diff --git a/.github/workflows/docs_light.yml b/.github/workflows/docs_light.yml
index ea7fe5d02d7b8..7d540b7cecdc9 100644
--- a/.github/workflows/docs_light.yml
+++ b/.github/workflows/docs_light.yml
@@ -20,6 +20,7 @@ name: Docs
 on:
   pull_request:
     paths:
+      - '.dockerignore'
       - 'docs/**'
       - '.github/workflows/docs_light.yml'
       - 'ci/docker/conda.dockerfile'
@@ -58,7 +59,7 @@ jobs:
           key: conda-docs-${{ hashFiles('cpp/**') }}
           restore-keys: conda-docs-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Setup Archery
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index 20c78d86cb2a3..d463549206471 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -20,6 +20,7 @@ name: Go
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/go.yml'
       - 'ci/docker/*_go.dockerfile'
       - 'ci/scripts/go_*'
@@ -27,6 +28,7 @@ on:
       - 'go/**'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/go.yml'
       - 'ci/docker/*_go.dockerfile'
       - 'ci/docker/**'
@@ -62,13 +64,13 @@ jobs:
           {
             "arch-label": "AMD64",
             "arch": "amd64",
-            "go": "1.21",
+            "go": "1.22",
             "runs-on": "ubuntu-latest"
           },
           {
             "arch-label": "AMD64",
             "arch": "amd64",
-            "go": "1.22",
+            "go": "1.23",
             "runs-on": "ubuntu-latest"
           }
           JSON
@@ -78,15 +80,13 @@ jobs:
           {
             "arch-label": "ARM64",
             "arch": "arm64v8",
-            "archery-use-legacy-docker-compose": "1",
-            "go": "1.21",
+            "go": "1.22",
             "runs-on": ["self-hosted", "arm", "linux"]
           },
           {
             "arch-label": "ARM64",
             "arch": "arm64v8",
-            "archery-use-legacy-docker-compose": "1",
-            "go": "1.22",
+            "go": "1.23",
             "runs-on": ["self-hosted", "arm", "linux"]
           }
           JSON
@@ -106,9 +106,6 @@ jobs:
         include: ${{ fromJson(needs.docker-targets.outputs.targets) }}
     env:
       ARCH: ${{ matrix.arch }}
-      # By default, use Docker CLI because docker-compose v1 is obsolete,
-      # except where the Docker client version is too old.
-      ARCHERY_USE_LEGACY_DOCKER_COMPOSE: ${{ matrix.archery-use-legacy-docker-compose || '0' }}
       GO: ${{ matrix.go }}
     steps:
       - name: Checkout Arrow
@@ -202,7 +199,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        go: ['1.21', '1.22']
+        go: ['1.22', '1.23']
     env:
       GO: ${{ matrix.go }}
     steps:
@@ -212,7 +209,7 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -243,7 +240,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        go: ['1.21', '1.22']
+        go: ['1.22', '1.23']
     env:
       GO: ${{ matrix.go }}
     steps:
@@ -252,7 +249,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -282,7 +279,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        go: ['1.21', '1.22']
+        go: ['1.22', '1.23']
     steps:
       - name: Checkout Arrow
         uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
@@ -315,7 +312,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        go: ['1.21', '1.22']
+        go: ['1.22', '1.23']
     steps:
       - name: Checkout Arrow
         uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
@@ -328,7 +325,7 @@ jobs:
           go-version: ${{ matrix.go }}
           cache: true
           cache-dependency-path: go/go.sum
-      - name: Install staticcheck      
+      - name: Install staticcheck
         run: |
           . .env
           go install honnef.co/go/tools/cmd/staticcheck@${STATICCHECK}
@@ -344,7 +341,7 @@ jobs:
           github.event_name == 'push' &&
           github.repository == 'apache/arrow' &&
           github.ref_name == 'main'
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: '3.10'
       - name: Run Benchmarks
@@ -373,7 +370,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        go: ['1.21', '1.22']
+        go: ['1.22', '1.23']
     env:
       ARROW_GO_TESTCGO: "1"
     steps:
@@ -444,7 +441,7 @@ jobs:
           ci/scripts/msys2_setup.sh cgo
       - name: Get required Go version
         run: |
-          (. .env && echo "GO_VERSION=${GO}") >> $GITHUB_ENV  
+          (. .env && echo "GO_VERSION=${GO}") >> $GITHUB_ENV
       - name: Update CGO Env vars
         shell: msys2 {0}
         run: |
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 43f8af0a600d8..b73f900e616f5 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -20,6 +20,7 @@ name: Integration
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/integration.yml'
       - 'ci/**'
       - 'dev/archery/**'
@@ -33,6 +34,7 @@ on:
       - 'format/**'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/integration.yml'
       - 'ci/**'
       - 'dev/archery/**'
@@ -89,7 +91,7 @@ jobs:
           key: conda-${{ hashFiles('cpp/**') }}
           restore-keys: conda-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -98,7 +100,8 @@ jobs:
         env:
           ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
           ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
-        run: >
+        run: |
+          source ci/scripts/util_enable_core_dumps.sh
           archery docker run \
             -e ARCHERY_DEFAULT_BRANCH=${{ github.event.repository.default_branch }} \
             -e ARCHERY_INTEGRATION_WITH_NANOARROW=1 \
diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml
index 0317879b580ba..57f834bcbabee 100644
--- a/.github/workflows/java.yml
+++ b/.github/workflows/java.yml
@@ -20,6 +20,7 @@ name: Java
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/java.yml'
       - 'ci/docker/*java*'
       - 'ci/scripts/java*.sh'
@@ -29,6 +30,7 @@ on:
       - 'java/**'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/java.yml'
       - 'ci/docker/*java*'
       - 'ci/scripts/java*.sh'
@@ -76,7 +78,7 @@ jobs:
           key: maven-${{ hashFiles('java/**') }}
           restore-keys: maven-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
diff --git a/.github/workflows/java_jni.yml b/.github/workflows/java_jni.yml
index c2bc679e681a2..e730a5bf3e672 100644
--- a/.github/workflows/java_jni.yml
+++ b/.github/workflows/java_jni.yml
@@ -20,6 +20,7 @@ name: Java JNI
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/java_jni.yml'
       - 'ci/docker/**'
       - 'ci/scripts/cpp_build.sh'
@@ -29,6 +30,7 @@ on:
       - 'java/**'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/java_jni.yml'
       - 'ci/docker/**'
       - 'ci/scripts/cpp_build.sh'
@@ -70,7 +72,7 @@ jobs:
           key: java-jni-manylinux-2014-${{ hashFiles('cpp/**', 'java/**') }}
           restore-keys: java-jni-manylinux-2014-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -79,7 +81,9 @@ jobs:
         env:
           ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
           ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
-        run: archery docker run java-jni-manylinux-2014
+        run: |
+          source ci/scripts/util_enable_core_dumps.sh
+          archery docker run java-jni-manylinux-2014
       - name: Docker Push
         if: >-
           success() &&
@@ -110,7 +114,7 @@ jobs:
           key: maven-${{ hashFiles('java/**') }}
           restore-keys: maven-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
diff --git a/.github/workflows/java_nightly.yml b/.github/workflows/java_nightly.yml
index 72afb6dbf1c1d..0bf0c27288faf 100644
--- a/.github/workflows/java_nightly.yml
+++ b/.github/workflows/java_nightly.yml
@@ -58,7 +58,7 @@ jobs:
           repository: ursacomputing/crossbow
           ref: main
       - name: Set up Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           cache: 'pip'
           python-version: 3.12
diff --git a/.github/workflows/js.yml b/.github/workflows/js.yml
index 630bef61105f6..9ab4edf0851cd 100644
--- a/.github/workflows/js.yml
+++ b/.github/workflows/js.yml
@@ -20,12 +20,14 @@ name: NodeJS
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/js.yml'
       - 'ci/docker/*js.dockerfile'
       - 'ci/scripts/js_*'
       - 'js/**'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/js.yml'
       - 'ci/docker/*js.dockerfile'
       - 'ci/scripts/js_*'
@@ -54,7 +56,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -64,8 +66,7 @@ jobs:
           ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
           ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
         run: |
-          sudo sysctl -w kernel.core_pattern="core.%e.%p"
-          ulimit -c unlimited
+          source ci/scripts/util_enable_core_dumps.sh
           archery docker run debian-js
       - name: Docker Push
         if: >-
diff --git a/.github/workflows/pr_bot.yml b/.github/workflows/pr_bot.yml
index 7dd06b6aeec09..bbb1a2d7228d0 100644
--- a/.github/workflows/pr_bot.yml
+++ b/.github/workflows/pr_bot.yml
@@ -82,7 +82,7 @@ jobs:
           # fetch the tags for version number generation
           fetch-depth: 0
       - name: Set up Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Install Archery and Crossbow dependencies
diff --git a/.github/workflows/pr_review_trigger.yml b/.github/workflows/pr_review_trigger.yml
index 0cd89b3206715..68f922ce8b4d9 100644
--- a/.github/workflows/pr_review_trigger.yml
+++ b/.github/workflows/pr_review_trigger.yml
@@ -29,7 +29,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: "Upload PR review Payload"
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4.4.0
         with:
           path: "${{ github.event_path }}"
           name: "pr_review_payload"
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 916db2580e371..45efd305aa8f6 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -20,6 +20,7 @@ name: Python
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/python.yml'
       - 'ci/**'
       - 'cpp/**'
@@ -27,6 +28,7 @@ on:
       - 'python/**'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/python.yml'
       - 'ci/**'
       - 'cpp/**'
@@ -59,6 +61,7 @@ jobs:
           - conda-python-3.9-nopandas
           - conda-python-3.8-pandas-1.0
           - conda-python-3.10-pandas-latest
+          - conda-python-3.10-no-numpy
         include:
           - name: conda-python-docs
             cache: conda-python-3.9
@@ -83,6 +86,11 @@ jobs:
             title: AMD64 Conda Python 3.10 Pandas latest
             python: "3.10"
             pandas: latest
+          - name: conda-python-3.10-no-numpy
+            cache: conda-python-3.10
+            image: conda-python-no-numpy
+            title: AMD64 Conda Python 3.10 without NumPy
+            python: "3.10"
     env:
       PYTHON: ${{ matrix.python || 3.8 }}
       UBUNTU: ${{ matrix.ubuntu || 20.04 }}
@@ -101,7 +109,7 @@ jobs:
           key: ${{ matrix.cache }}-${{ hashFiles('cpp/**') }}
           restore-keys: ${{ matrix.cache }}-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -111,8 +119,7 @@ jobs:
           ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
           ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
         run: |
-          sudo sysctl -w kernel.core_pattern="core.%e.%p"
-          ulimit -c unlimited
+          source ci/scripts/util_enable_core_dumps.sh
           archery docker run ${{ matrix.image }}
       - name: Docker Push
         if: >-
@@ -163,7 +170,7 @@ jobs:
       ARROW_BUILD_TESTS: OFF
       PYARROW_TEST_LARGE_MEMORY: ON
       # Current oldest supported version according to https://endoflife.date/macos
-      MACOSX_DEPLOYMENT_TARGET: 10.15
+      MACOSX_DEPLOYMENT_TARGET: 12.0
     steps:
       - name: Checkout Arrow
         uses: actions/checkout@v4
@@ -171,7 +178,7 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Setup Python
-        uses: actions/setup-python@v5.1.1
+        uses: actions/setup-python@v5.2.0
         with:
           python-version: '3.11'
       - name: Install Dependencies
diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index 2820d42470bca..92e0e63fb7ea5 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -20,6 +20,7 @@ name: R
 on:
   push:
     paths:
+      - '.dockerignore'
       - ".github/workflows/r.yml"
       - "ci/docker/**"
       - "ci/etc/rprofile"
@@ -32,6 +33,7 @@ on:
       - "r/**"
   pull_request:
     paths:
+      - '.dockerignore'
       - ".github/workflows/r.yml"
       - "ci/docker/**"
       - "ci/etc/rprofile"
@@ -146,7 +148,7 @@ jobs:
             ubuntu-${{ matrix.ubuntu }}-r-${{ matrix.r }}-${{ hashFiles('cpp/src/**/*.cc','cpp/src/**/*.h)') }}-
             ubuntu-${{ matrix.ubuntu }}-r-${{ matrix.r }}-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -156,8 +158,7 @@ jobs:
           ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
           ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
         run: |
-          sudo sysctl -w kernel.core_pattern="core.%e.%p"
-          ulimit -c unlimited
+          source ci/scripts/util_enable_core_dumps.sh
           # Setting a non-default and non-probable Marquesas French Polynesia time
           # it has both with a .45 offset and very very few people who live there.
           archery docker run -e TZ=MART -e ARROW_R_FORCE_TESTS=${{ matrix.force-tests }} ubuntu-r
@@ -169,9 +170,9 @@ jobs:
         if: always()
       - name: Save the test output
         if: always()
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v3.1.2
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         with:
-          name: test-output
+          name: test-output-${{ matrix.ubuntu }}-${{ matrix.r }}
           path: r/check/arrow.Rcheck/tests/testthat.Rout*
       - name: Docker Push
         if: >-
@@ -206,7 +207,7 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -216,8 +217,7 @@ jobs:
           ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
           ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
         run: |
-          sudo sysctl -w kernel.core_pattern="core.%e.%p"
-          ulimit -c unlimited
+          source ci/scripts/util_enable_core_dumps.sh
           # Don't set a TZ here to test that case. These builds will have the following warning in them:
           #   System has not been booted with systemd as init system (PID 1). Can't operate.
           #   Failed to connect to bus: Host is down
@@ -230,9 +230,9 @@ jobs:
         if: always()
       - name: Save the test output
         if: always()
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v3.1.2
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         with:
-          name: test-output
+          name: test-output-bundled
           path: r/check/arrow.Rcheck/tests/testthat.Rout*
       - name: Docker Push
         if: >-
@@ -292,7 +292,7 @@ jobs:
         # So that they're unique when multiple are downloaded in the next step
         shell: bash
         run: mv libarrow.zip libarrow-rtools${{ matrix.config.rtools }}-${{ matrix.config.arch }}.zip
-      - uses: actions/upload-artifact@v3
+      - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # # v4.0.0
         with:
           name: libarrow-rtools${{ matrix.config.rtools }}-${{ matrix.config.arch }}.zip
           path: libarrow-rtools${{ matrix.config.rtools }}-${{ matrix.config.arch }}.zip
@@ -330,7 +330,7 @@ jobs:
           echo "$HOME/.local/bin" >> $GITHUB_PATH
       - run: mkdir r/windows
       - name: Download artifacts
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@v4.1.8
         with:
           name: libarrow-rtools40-ucrt64.zip
           path: r/windows
diff --git a/.github/workflows/r_nightly.yml b/.github/workflows/r_nightly.yml
index 1ec071b6bbb5e..9817e41d3b61d 100644
--- a/.github/workflows/r_nightly.yml
+++ b/.github/workflows/r_nightly.yml
@@ -60,7 +60,7 @@ jobs:
           repository: ursacomputing/crossbow
           ref: main
       - name: Set up Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           cache: 'pip'
           python-version: 3.12
diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml
index e4d650e74a8ad..05b7b317ffd96 100644
--- a/.github/workflows/ruby.yml
+++ b/.github/workflows/ruby.yml
@@ -20,6 +20,7 @@ name: C GLib & Ruby
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/ruby.yml'
       - 'ci/docker/**'
       - 'ci/scripts/c_glib_*'
@@ -33,6 +34,7 @@ on:
       - 'ruby/**'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/ruby.yml'
       - 'ci/docker/**'
       - 'ci/scripts/c_glib_*'
@@ -83,7 +85,7 @@ jobs:
           key: ubuntu-${{ matrix.ubuntu }}-ruby-${{ hashFiles('cpp/**') }}
           restore-keys: ubuntu-${{ matrix.ubuntu }}-ruby-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -93,8 +95,7 @@ jobs:
           ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
           ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
         run: |
-          sudo sysctl -w kernel.core_pattern="core.%e.%p"
-          ulimit -c unlimited
+          source ci/scripts/util_enable_core_dumps.sh
           archery docker run \
             -e ARROW_FLIGHT=ON \
             -e ARROW_FLIGHT_SQL=ON \
@@ -406,7 +407,10 @@ jobs:
             -source "https://nuget.pkg.github.com/$GITHUB_REPOSITORY_OWNER/index.json"
       - name: Build C++ vcpkg dependencies
         run: |
-          vcpkg\vcpkg.exe install --triplet $env:VCPKG_TRIPLET --x-manifest-root cpp --x-install-root build\cpp\vcpkg_installed
+          vcpkg\vcpkg.exe install `
+            --triplet $env:VCPKG_TRIPLET `
+            --x-manifest-root cpp `
+            --x-install-root build\cpp\vcpkg_installed
       - name: Build C++
         shell: cmd
         run: |
diff --git a/.github/workflows/swift.yml b/.github/workflows/swift.yml
index 1b3c9eca1814a..87aa5cb83f714 100644
--- a/.github/workflows/swift.yml
+++ b/.github/workflows/swift.yml
@@ -20,6 +20,7 @@ name: Swift
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/swift.yml'
       - 'ci/docker/*swift*'
       - 'ci/scripts/swift_*'
@@ -27,6 +28,7 @@ on:
       - 'swift/**'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/swift.yml'
       - 'ci/docker/*swift*'
       - 'ci/scripts/swift_*'
@@ -63,8 +65,7 @@ jobs:
           ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
           ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
         run: |
-          sudo sysctl -w kernel.core_pattern="core.%e.%p"
-          ulimit -c unlimited
+          source ci/scripts/util_enable_core_dumps.sh
           archery docker run ubuntu-swift
       - name: Docker Push
         if: >-
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index bf0bcde14622a..91017969eb502 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -78,6 +78,26 @@ repos:
           ?^cpp/src/generated/|
           ?^cpp/thirdparty/|
           )
+  - repo: https://github.com/cpplint/cpplint
+    rev: 1.6.1
+    hooks:
+      - id: cpplint
+        name: C++ Lint
+        args:
+          - "--verbose=2"
+        types_or:
+          - c++
+        files: >-
+          ^cpp/
+        exclude: >-
+          (
+          ?\.grpc\.fb\.(cc|h)$|
+          ?\.pb\.(cc|h)$|
+          ?_generated.*\.(cc|h)$|
+          ?^cpp/src/arrow/vendored/|
+          ?^cpp/src/generated/|
+          ?^cpp/thirdparty/|
+          )
   - repo: https://github.com/pre-commit/mirrors-clang-format
     rev: v14.0.6
     hooks:
diff --git a/CPPLINT.cfg b/CPPLINT.cfg
new file mode 100644
index 0000000000000..2f47b4dbf57b7
--- /dev/null
+++ b/CPPLINT.cfg
@@ -0,0 +1,30 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+filter = -build/c++11
+filter = -build/header_guard
+filter = -build/include_order
+filter = -build/include_what_you_use
+filter = -readability/alt_tokens
+# readability/casting is disabled as it aggressively warns about
+# functions with names like "int32", so "int32(x)", where int32 is a
+# function name, warns with
+filter = -readability/casting
+filter = -readability/todo
+filter = -runtime/references
+filter = -whitespace/comments
+linelength = 90
diff --git a/appveyor.yml b/appveyor.yml
index 5954251d34733..9e4582f1d8d7f 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -24,6 +24,7 @@ only_commits:
     - appveyor.yml
     - ci/appveyor*
     - ci/conda*
+    - ci/scripts/*.bat
     - cpp/
     - format/
     - python/
diff --git a/c_glib/arrow-flight-glib/client.cpp b/c_glib/arrow-flight-glib/client.cpp
index 80c47e336f872..75b02ec25869f 100644
--- a/c_glib/arrow-flight-glib/client.cpp
+++ b/c_glib/arrow-flight-glib/client.cpp
@@ -33,10 +33,19 @@ G_BEGIN_DECLS
  * #GAFlightStreamReader is a class for reading record batches from a
  * server.
  *
+ * #GAFlightStreamWriter is a class for writing record batches to a
+ * server.
+ *
+ * #GAFlightMetadataReader is a class for reading metadata from a
+ * server.
+ *
  * #GAFlightCallOptions is a class for options of each call.
  *
  * #GAFlightClientOptions is a class for options of each client.
  *
+ * #GAFlightDoPutResult is a class that has gaflight_client_do_put()
+ * result.
+ *
  * #GAFlightClient is a class for Apache Arrow Flight client.
  *
  * Since: 5.0.0
@@ -56,6 +65,128 @@ gaflight_stream_reader_class_init(GAFlightStreamReaderClass *klass)
 {
 }
 
+G_DEFINE_TYPE(GAFlightStreamWriter,
+              gaflight_stream_writer,
+              GAFLIGHT_TYPE_RECORD_BATCH_WRITER)
+
+static void
+gaflight_stream_writer_init(GAFlightStreamWriter *object)
+{
+}
+
+static void
+gaflight_stream_writer_class_init(GAFlightStreamWriterClass *klass)
+{
+}
+
+/**
+ * gaflight_stream_writer_done_writing:
+ * @writer: A #GAFlightStreamWriter.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gaflight_stream_writer_done_writing(GAFlightStreamWriter *writer, GError **error)
+{
+  auto flight_writer = std::static_pointer_cast<arrow::flight::FlightStreamWriter>(
+    garrow_record_batch_writer_get_raw(GARROW_RECORD_BATCH_WRITER(writer)));
+  return garrow::check(error,
+                       flight_writer->DoneWriting(),
+                       "[flight-stream-writer][done-writing]");
+}
+
+struct GAFlightMetadataReaderPrivate
+{
+  arrow::flight::FlightMetadataReader *reader;
+};
+
+enum {
+  PROP_METADATA_READER_READER = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightMetadataReader,
+                           gaflight_metadata_reader,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_METADATA_READER_GET_PRIVATE(object)                                     \
+  static_cast<GAFlightMetadataReaderPrivate *>(                                          \
+    gaflight_metadata_reader_get_instance_private(GAFLIGHT_METADATA_READER(object)))
+
+static void
+gaflight_metadata_reader_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_METADATA_READER_GET_PRIVATE(object);
+  delete priv->reader;
+  G_OBJECT_CLASS(gaflight_metadata_reader_parent_class)->finalize(object);
+}
+
+static void
+gaflight_metadata_reader_set_property(GObject *object,
+                                      guint prop_id,
+                                      const GValue *value,
+                                      GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_METADATA_READER_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_METADATA_READER_READER:
+    priv->reader =
+      static_cast<arrow::flight::FlightMetadataReader *>(g_value_get_pointer(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_metadata_reader_init(GAFlightMetadataReader *object)
+{
+}
+
+static void
+gaflight_metadata_reader_class_init(GAFlightMetadataReaderClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = gaflight_metadata_reader_finalize;
+  gobject_class->set_property = gaflight_metadata_reader_set_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer(
+    "reader",
+    nullptr,
+    nullptr,
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_METADATA_READER_READER, spec);
+}
+
+/**
+ * gaflight_metadata_reader_read:
+ * @reader: A #GAFlightMetadataReader.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full): The metadata on success, %NULL on error.
+ *
+ * Since: 18.0.0
+ */
+GArrowBuffer *
+gaflight_metadata_reader_read(GAFlightMetadataReader *reader, GError **error)
+{
+  auto flight_reader = gaflight_metadata_reader_get_raw(reader);
+  std::shared_ptr<arrow::Buffer> metadata;
+  if (garrow::check(error,
+                    flight_reader->ReadMetadata(&metadata),
+                    "[flight-metadata-reader][read]")) {
+    return garrow_buffer_new_raw(&metadata);
+  } else {
+    return nullptr;
+  }
+}
+
 typedef struct GAFlightCallOptionsPrivate_
 {
   arrow::flight::FlightCallOptions options;
@@ -385,6 +516,139 @@ gaflight_client_options_new(void)
     g_object_new(GAFLIGHT_TYPE_CLIENT_OPTIONS, NULL));
 }
 
+struct GAFlightDoPutResultPrivate
+{
+  GAFlightStreamWriter *writer;
+  GAFlightMetadataReader *reader;
+};
+
+enum {
+  PROP_DO_PUT_RESULT_RESULT = 1,
+  PROP_DO_PUT_RESULT_WRITER,
+  PROP_DO_PUT_RESULT_READER,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightDoPutResult, gaflight_do_put_result, G_TYPE_OBJECT)
+
+#define GAFLIGHT_DO_PUT_RESULT_GET_PRIVATE(object)                                       \
+  static_cast<GAFlightDoPutResultPrivate *>(                                             \
+    gaflight_do_put_result_get_instance_private(GAFLIGHT_DO_PUT_RESULT(object)))
+
+static void
+gaflight_do_put_result_dispose(GObject *object)
+{
+  auto priv = GAFLIGHT_DO_PUT_RESULT_GET_PRIVATE(object);
+
+  if (priv->writer) {
+    g_object_unref(priv->writer);
+    priv->writer = nullptr;
+  }
+
+  if (priv->reader) {
+    g_object_unref(priv->reader);
+    priv->reader = nullptr;
+  }
+
+  G_OBJECT_CLASS(gaflight_do_put_result_parent_class)->dispose(object);
+}
+
+static void
+gaflight_do_put_result_init(GAFlightDoPutResult *object)
+{
+}
+
+static void
+gaflight_do_put_result_set_property(GObject *object,
+                                    guint prop_id,
+                                    const GValue *value,
+                                    GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_DO_PUT_RESULT_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_DO_PUT_RESULT_RESULT:
+    {
+      auto result = static_cast<arrow::flight::FlightClient::DoPutResult *>(
+        g_value_get_pointer(value));
+      std::shared_ptr<arrow::flight::FlightStreamWriter> writer =
+        std::move(result->writer);
+      priv->writer = gaflight_stream_writer_new_raw(&writer);
+      priv->reader = gaflight_metadata_reader_new_raw(result->reader.release());
+      break;
+    }
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_do_put_result_get_property(GObject *object,
+                                    guint prop_id,
+                                    GValue *value,
+                                    GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_DO_PUT_RESULT_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_DO_PUT_RESULT_WRITER:
+    g_value_set_object(value, priv->writer);
+    break;
+  case PROP_DO_PUT_RESULT_READER:
+    g_value_set_object(value, priv->reader);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_do_put_result_class_init(GAFlightDoPutResultClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose = gaflight_do_put_result_dispose;
+  gobject_class->set_property = gaflight_do_put_result_set_property;
+  gobject_class->get_property = gaflight_do_put_result_get_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer(
+    "result",
+    nullptr,
+    nullptr,
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_DO_PUT_RESULT_RESULT, spec);
+
+  /**
+   * GAFlightDoPutResult:writer:
+   *
+   * A writer to write record batches to.
+   *
+   * Since: 18.0.0
+   */
+  spec = g_param_spec_object("writer",
+                             nullptr,
+                             nullptr,
+                             GAFLIGHT_TYPE_STREAM_WRITER,
+                             static_cast<GParamFlags>(G_PARAM_READABLE));
+  g_object_class_install_property(gobject_class, PROP_DO_PUT_RESULT_WRITER, spec);
+
+  /**
+   * GAFlightDoPutResult:reader:
+   *
+   * A reader for application metadata from the server.
+   *
+   * Since: 18.0.0
+   */
+  spec = g_param_spec_object("reader",
+                             nullptr,
+                             nullptr,
+                             GAFLIGHT_TYPE_METADATA_READER,
+                             static_cast<GParamFlags>(G_PARAM_READABLE));
+  g_object_class_install_property(gobject_class, PROP_DO_PUT_RESULT_READER, spec);
+}
+
 struct GAFlightClientPrivate
 {
   std::shared_ptr<arrow::flight::FlightClient> client;
@@ -661,6 +925,51 @@ gaflight_client_do_get(GAFlightClient *client,
   return gaflight_stream_reader_new_raw(flight_reader.release(), TRUE);
 }
 
+/**
+ * gaflight_client_do_put:
+ * @client: A #GAFlightClient.
+ * @descriptor: A #GAFlightDescriptor.
+ * @schema: A #GArrowSchema.
+ * @options: (nullable): A #GAFlightCallOptions.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Upload data to a Flight described by the given descriptor. The
+ * caller must call garrow_record_batch_writer_close() on the
+ * returned stream once they are done writing.
+ *
+ * The reader and writer are linked; closing the writer will also
+ * close the reader. Use garrow_flight_stream_writer_done_writing() to
+ * only close the write side of the channel.
+ *
+ * Returns: (nullable) (transfer full):
+ *   The #GAFlighDoPutResult holding a reader and a writer on success,
+ *   %NULL on error.
+ *
+ * Since: 18.0.0
+ */
+GAFlightDoPutResult *
+gaflight_client_do_put(GAFlightClient *client,
+                       GAFlightDescriptor *descriptor,
+                       GArrowSchema *schema,
+                       GAFlightCallOptions *options,
+                       GError **error)
+{
+  auto flight_client = gaflight_client_get_raw(client);
+  auto flight_descriptor = gaflight_descriptor_get_raw(descriptor);
+  auto arrow_schema = garrow_schema_get_raw(schema);
+  arrow::flight::FlightCallOptions flight_default_options;
+  auto flight_options = &flight_default_options;
+  if (options) {
+    flight_options = gaflight_call_options_get_raw(options);
+  }
+  auto result = flight_client->DoPut(*flight_options, *flight_descriptor, arrow_schema);
+  if (!garrow::check(error, result, "[flight-client][do-put]")) {
+    return nullptr;
+  }
+  auto flight_result = std::move(*result);
+  return gaflight_do_put_result_new_raw(&flight_result);
+}
+
 G_END_DECLS
 
 GAFlightStreamReader *
@@ -672,7 +981,31 @@ gaflight_stream_reader_new_raw(arrow::flight::FlightStreamReader *flight_reader,
                                              flight_reader,
                                              "is-owner",
                                              is_owner,
-                                             NULL));
+                                             nullptr));
+}
+
+GAFlightStreamWriter *
+gaflight_stream_writer_new_raw(
+  std::shared_ptr<arrow::flight::FlightStreamWriter> *flight_writer)
+{
+  return GAFLIGHT_STREAM_WRITER(g_object_new(GAFLIGHT_TYPE_STREAM_WRITER,
+                                             "record-batch-writer",
+                                             flight_writer,
+                                             nullptr));
+}
+
+GAFlightMetadataReader *
+gaflight_metadata_reader_new_raw(arrow::flight::FlightMetadataReader *flight_reader)
+{
+  return GAFLIGHT_METADATA_READER(
+    g_object_new(GAFLIGHT_TYPE_METADATA_READER, "reader", flight_reader, nullptr));
+}
+
+arrow::flight::FlightMetadataReader *
+gaflight_metadata_reader_get_raw(GAFlightMetadataReader *reader)
+{
+  auto priv = GAFLIGHT_METADATA_READER_GET_PRIVATE(reader);
+  return priv->reader;
 }
 
 arrow::flight::FlightCallOptions *
@@ -689,6 +1022,13 @@ gaflight_client_options_get_raw(GAFlightClientOptions *options)
   return &(priv->options);
 }
 
+GAFlightDoPutResult *
+gaflight_do_put_result_new_raw(arrow::flight::FlightClient::DoPutResult *flight_result)
+{
+  return GAFLIGHT_DO_PUT_RESULT(
+    g_object_new(GAFLIGHT_TYPE_DO_PUT_RESULT, "result", flight_result, nullptr));
+}
+
 std::shared_ptr<arrow::flight::FlightClient>
 gaflight_client_get_raw(GAFlightClient *client)
 {
diff --git a/c_glib/arrow-flight-glib/client.h b/c_glib/arrow-flight-glib/client.h
index a91bbe55e3c04..12c5a06b810e1 100644
--- a/c_glib/arrow-flight-glib/client.h
+++ b/c_glib/arrow-flight-glib/client.h
@@ -35,6 +35,35 @@ struct _GAFlightStreamReaderClass
   GAFlightRecordBatchReaderClass parent_class;
 };
 
+#define GAFLIGHT_TYPE_STREAM_WRITER (gaflight_stream_writer_get_type())
+GAFLIGHT_AVAILABLE_IN_18_0
+G_DECLARE_DERIVABLE_TYPE(GAFlightStreamWriter,
+                         gaflight_stream_writer,
+                         GAFLIGHT,
+                         STREAM_WRITER,
+                         GAFlightRecordBatchWriter)
+struct _GAFlightStreamWriterClass
+{
+  GAFlightRecordBatchWriterClass parent_class;
+};
+
+GAFLIGHT_AVAILABLE_IN_18_0
+gboolean
+gaflight_stream_writer_done_writing(GAFlightStreamWriter *writer, GError **error);
+
+#define GAFLIGHT_TYPE_METADATA_READER (gaflight_metadata_reader_get_type())
+GAFLIGHT_AVAILABLE_IN_18_0
+G_DECLARE_DERIVABLE_TYPE(
+  GAFlightMetadataReader, gaflight_metadata_reader, GAFLIGHT, METADATA_READER, GObject)
+struct _GAFlightMetadataReaderClass
+{
+  GObjectClass parent_class;
+};
+
+GAFLIGHT_AVAILABLE_IN_18_0
+GArrowBuffer *
+gaflight_metadata_reader_read(GAFlightMetadataReader *reader, GError **error);
+
 #define GAFLIGHT_TYPE_CALL_OPTIONS (gaflight_call_options_get_type())
 GAFLIGHT_AVAILABLE_IN_5_0
 G_DECLARE_DERIVABLE_TYPE(
@@ -75,6 +104,15 @@ GAFLIGHT_AVAILABLE_IN_5_0
 GAFlightClientOptions *
 gaflight_client_options_new(void);
 
+#define GAFLIGHT_TYPE_DO_PUT_RESULT (gaflight_do_put_result_get_type())
+GAFLIGHT_AVAILABLE_IN_18_0
+G_DECLARE_DERIVABLE_TYPE(
+  GAFlightDoPutResult, gaflight_do_put_result, GAFLIGHT, DO_PUT_RESULT, GObject)
+struct _GAFlightDoPutResultClass
+{
+  GObjectClass parent_class;
+};
+
 #define GAFLIGHT_TYPE_CLIENT (gaflight_client_get_type())
 GAFLIGHT_AVAILABLE_IN_5_0
 G_DECLARE_DERIVABLE_TYPE(GAFlightClient, gaflight_client, GAFLIGHT, CLIENT, GObject)
@@ -124,4 +162,12 @@ gaflight_client_do_get(GAFlightClient *client,
                        GAFlightCallOptions *options,
                        GError **error);
 
+GAFLIGHT_AVAILABLE_IN_18_0
+GAFlightDoPutResult *
+gaflight_client_do_put(GAFlightClient *client,
+                       GAFlightDescriptor *descriptor,
+                       GArrowSchema *schema,
+                       GAFlightCallOptions *options,
+                       GError **error);
+
 G_END_DECLS
diff --git a/c_glib/arrow-flight-glib/client.hpp b/c_glib/arrow-flight-glib/client.hpp
index 185a28e6dc4bd..32ad35845aa12 100644
--- a/c_glib/arrow-flight-glib/client.hpp
+++ b/c_glib/arrow-flight-glib/client.hpp
@@ -28,6 +28,19 @@ GAFlightStreamReader *
 gaflight_stream_reader_new_raw(arrow::flight::FlightStreamReader *flight_reader,
                                gboolean is_owner);
 
+GAFLIGHT_EXTERN
+GAFlightStreamWriter *
+gaflight_stream_writer_new_raw(
+  std::shared_ptr<arrow::flight::FlightStreamWriter> *flight_writer);
+
+GAFLIGHT_EXTERN
+GAFlightMetadataReader *
+gaflight_metadata_reader_new_raw(arrow::flight::FlightMetadataReader *flight_reader);
+
+GAFLIGHT_EXTERN
+arrow::flight::FlightMetadataReader *
+gaflight_metadata_reader_get_raw(GAFlightMetadataReader *reader);
+
 GAFLIGHT_EXTERN
 arrow::flight::FlightCallOptions *
 gaflight_call_options_get_raw(GAFlightCallOptions *options);
@@ -36,6 +49,10 @@ GAFLIGHT_EXTERN
 arrow::flight::FlightClientOptions *
 gaflight_client_options_get_raw(GAFlightClientOptions *options);
 
+GAFLIGHT_EXTERN
+GAFlightDoPutResult *
+gaflight_do_put_result_new_raw(arrow::flight::FlightClient::DoPutResult *flight_result);
+
 GAFLIGHT_EXTERN
 std::shared_ptr<arrow::flight::FlightClient>
 gaflight_client_get_raw(GAFlightClient *client);
diff --git a/c_glib/arrow-flight-glib/common.cpp b/c_glib/arrow-flight-glib/common.cpp
index f7eea08c264b3..3deaf67cc14e8 100644
--- a/c_glib/arrow-flight-glib/common.cpp
+++ b/c_glib/arrow-flight-glib/common.cpp
@@ -1196,7 +1196,7 @@ gaflight_record_batch_reader_finalize(GObject *object)
   if (priv->is_owner) {
     delete priv->reader;
   }
-  G_OBJECT_CLASS(gaflight_info_parent_class)->finalize(object);
+  G_OBJECT_CLASS(gaflight_record_batch_reader_parent_class)->finalize(object);
 }
 
 static void
@@ -1300,57 +1300,9 @@ gaflight_record_batch_reader_read_all(GAFlightRecordBatchReader *reader, GError
   }
 }
 
-typedef struct GAFlightRecordBatchWriterPrivate_
-{
-  arrow::flight::MetadataRecordBatchWriter *writer;
-  bool is_owner;
-} GAFlightRecordBatchWriterPrivate;
-
-enum {
-  PROP_RECORD_BATCH_WRITER_WRITER = 1,
-  PROP_RECORD_BATCH_WRITER_IS_OWNER,
-};
-
-G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GAFlightRecordBatchWriter,
-                                    gaflight_record_batch_writer,
-                                    GARROW_TYPE_RECORD_BATCH_WRITER)
-
-#define GAFLIGHT_RECORD_BATCH_WRITER_GET_PRIVATE(object)                                 \
-  static_cast<GAFlightRecordBatchWriterPrivate *>(                                       \
-    gaflight_record_batch_writer_get_instance_private(                                   \
-      GAFLIGHT_RECORD_BATCH_WRITER(object)))
-
-static void
-gaflight_record_batch_writer_finalize(GObject *object)
-{
-  auto priv = GAFLIGHT_RECORD_BATCH_WRITER_GET_PRIVATE(object);
-  if (priv->is_owner) {
-    delete priv->writer;
-  }
-  G_OBJECT_CLASS(gaflight_info_parent_class)->finalize(object);
-}
-
-static void
-gaflight_record_batch_writer_set_property(GObject *object,
-                                          guint prop_id,
-                                          const GValue *value,
-                                          GParamSpec *pspec)
-{
-  auto priv = GAFLIGHT_RECORD_BATCH_WRITER_GET_PRIVATE(object);
-
-  switch (prop_id) {
-  case PROP_RECORD_BATCH_WRITER_WRITER:
-    priv->writer =
-      static_cast<arrow::flight::MetadataRecordBatchWriter *>(g_value_get_pointer(value));
-    break;
-  case PROP_RECORD_BATCH_WRITER_IS_OWNER:
-    priv->is_owner = g_value_get_boolean(value);
-    break;
-  default:
-    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
-    break;
-  }
-}
+G_DEFINE_ABSTRACT_TYPE(GAFlightRecordBatchWriter,
+                       gaflight_record_batch_writer,
+                       GARROW_TYPE_RECORD_BATCH_WRITER)
 
 static void
 gaflight_record_batch_writer_init(GAFlightRecordBatchWriter *object)
@@ -1360,26 +1312,6 @@ gaflight_record_batch_writer_init(GAFlightRecordBatchWriter *object)
 static void
 gaflight_record_batch_writer_class_init(GAFlightRecordBatchWriterClass *klass)
 {
-  auto gobject_class = G_OBJECT_CLASS(klass);
-
-  gobject_class->finalize = gaflight_record_batch_writer_finalize;
-  gobject_class->set_property = gaflight_record_batch_writer_set_property;
-
-  GParamSpec *spec;
-  spec = g_param_spec_pointer(
-    "writer",
-    nullptr,
-    nullptr,
-    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class, PROP_RECORD_BATCH_WRITER_WRITER, spec);
-
-  spec = g_param_spec_boolean(
-    "is-owner",
-    nullptr,
-    nullptr,
-    TRUE,
-    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class, PROP_RECORD_BATCH_WRITER_IS_OWNER, spec);
 }
 
 /**
@@ -1402,7 +1334,8 @@ gaflight_record_batch_writer_begin(GAFlightRecordBatchWriter *writer,
                                    GArrowWriteOptions *options,
                                    GError **error)
 {
-  auto flight_writer = gaflight_record_batch_writer_get_raw(writer);
+  auto flight_writer = std::static_pointer_cast<arrow::flight::MetadataRecordBatchWriter>(
+    garrow_record_batch_writer_get_raw(GARROW_RECORD_BATCH_WRITER(writer)));
   auto arrow_schema = garrow_schema_get_raw(schema);
   arrow::ipc::IpcWriteOptions arrow_write_options;
   if (options) {
@@ -1432,7 +1365,8 @@ gaflight_record_batch_writer_write_metadata(GAFlightRecordBatchWriter *writer,
                                             GArrowBuffer *metadata,
                                             GError **error)
 {
-  auto flight_writer = gaflight_record_batch_writer_get_raw(writer);
+  auto flight_writer = std::static_pointer_cast<arrow::flight::MetadataRecordBatchWriter>(
+    garrow_record_batch_writer_get_raw(GARROW_RECORD_BATCH_WRITER(writer)));
   auto arrow_metadata = garrow_buffer_get_raw(metadata);
   return garrow::check(error,
                        flight_writer->WriteMetadata(arrow_metadata),
@@ -1440,7 +1374,7 @@ gaflight_record_batch_writer_write_metadata(GAFlightRecordBatchWriter *writer,
 }
 
 /**
- * gaflight_record_batch_writer_write:
+ * gaflight_record_batch_writer_write_record_batch:
  * @writer: A #GAFlightRecordBatchWriter.
  * @record_batch: A #GArrowRecordBatch.
  * @metadata: (nullable): A #GArrowBuffer.
@@ -1453,12 +1387,13 @@ gaflight_record_batch_writer_write_metadata(GAFlightRecordBatchWriter *writer,
  * Since: 18.0.0
  */
 gboolean
-gaflight_record_batch_writer_write(GAFlightRecordBatchWriter *writer,
-                                   GArrowRecordBatch *record_batch,
-                                   GArrowBuffer *metadata,
-                                   GError **error)
+gaflight_record_batch_writer_write_record_batch(GAFlightRecordBatchWriter *writer,
+                                                GArrowRecordBatch *record_batch,
+                                                GArrowBuffer *metadata,
+                                                GError **error)
 {
-  auto flight_writer = gaflight_record_batch_writer_get_raw(writer);
+  auto flight_writer = std::static_pointer_cast<arrow::flight::MetadataRecordBatchWriter>(
+    garrow_record_batch_writer_get_raw(GARROW_RECORD_BATCH_WRITER(writer)));
   auto arrow_record_batch = garrow_record_batch_get_raw(record_batch);
   auto arrow_metadata = garrow_buffer_get_raw(metadata);
   return garrow::check(
@@ -1599,10 +1534,3 @@ gaflight_record_batch_reader_get_raw(GAFlightRecordBatchReader *reader)
   auto priv = GAFLIGHT_RECORD_BATCH_READER_GET_PRIVATE(reader);
   return priv->reader;
 }
-
-arrow::flight::MetadataRecordBatchWriter *
-gaflight_record_batch_writer_get_raw(GAFlightRecordBatchWriter *writer)
-{
-  auto priv = GAFLIGHT_RECORD_BATCH_WRITER_GET_PRIVATE(writer);
-  return priv->writer;
-}
diff --git a/c_glib/arrow-flight-glib/common.h b/c_glib/arrow-flight-glib/common.h
index 91c828caabb36..726132fe4921b 100644
--- a/c_glib/arrow-flight-glib/common.h
+++ b/c_glib/arrow-flight-glib/common.h
@@ -259,9 +259,9 @@ gaflight_record_batch_writer_write_metadata(GAFlightRecordBatchWriter *writer,
 
 GAFLIGHT_AVAILABLE_IN_18_0
 gboolean
-gaflight_record_batch_writer_write(GAFlightRecordBatchWriter *writer,
-                                   GArrowRecordBatch *record_batch,
-                                   GArrowBuffer *metadata,
-                                   GError **error);
+gaflight_record_batch_writer_write_record_batch(GAFlightRecordBatchWriter *writer,
+                                                GArrowRecordBatch *record_batch,
+                                                GArrowBuffer *metadata,
+                                                GError **error);
 
 G_END_DECLS
diff --git a/c_glib/arrow-flight-glib/server.cpp b/c_glib/arrow-flight-glib/server.cpp
index f7444918e90f6..e39fd97b0d06c 100644
--- a/c_glib/arrow-flight-glib/server.cpp
+++ b/c_glib/arrow-flight-glib/server.cpp
@@ -45,6 +45,9 @@ G_BEGIN_DECLS
  * client. Also allows reading application-defined metadata via the
  * Flight protocol.
  *
+ * #GAFlightMetadataWriter is a class for sending application-specific
+ * metadata back to client during an upload.
+ *
  * #GAFlightServerAuthSender is a class for sending messages to the
  * client during an authentication handshake.
  *
@@ -290,6 +293,98 @@ gaflight_message_reader_get_descriptor(GAFlightMessageReader *reader)
   return gaflight_descriptor_new_raw(&flight_descriptor);
 }
 
+struct GAFlightMetadataWriterPrivate
+{
+  arrow::flight::FlightMetadataWriter *writer;
+};
+
+enum {
+  PROP_WRITER = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightMetadataWriter,
+                           gaflight_metadata_writer,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_METADATA_WRITER_GET_PRIVATE(object)                                     \
+  static_cast<GAFlightMetadataWriterPrivate *>(                                          \
+    gaflight_metadata_writer_get_instance_private(GAFLIGHT_METADATA_WRITER(object)))
+
+static void
+gaflight_metadata_writer_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_METADATA_WRITER_GET_PRIVATE(object);
+
+  delete priv->writer;
+
+  G_OBJECT_CLASS(gaflight_metadata_writer_parent_class)->finalize(object);
+}
+
+static void
+gaflight_metadata_writer_set_property(GObject *object,
+                                      guint prop_id,
+                                      const GValue *value,
+                                      GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_METADATA_WRITER_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_WRITER:
+    priv->writer =
+      static_cast<arrow::flight::FlightMetadataWriter *>(g_value_get_pointer(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_metadata_writer_init(GAFlightMetadataWriter *object)
+{
+}
+
+static void
+gaflight_metadata_writer_class_init(GAFlightMetadataWriterClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = gaflight_metadata_writer_finalize;
+  gobject_class->set_property = gaflight_metadata_writer_set_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer(
+    "writer",
+    nullptr,
+    nullptr,
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_WRITER, spec);
+}
+
+/**
+ * gaflight_metadata_writer_write:
+ * @writer: A #GAFlightMetadataWriter.
+ * @metadata: A #GArrowBuffer to be sent.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Writes metadata to the client.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gaflight_metadata_writer_write(GAFlightMetadataWriter *writer,
+                               GArrowBuffer *metadata,
+                               GError **error)
+{
+  auto flight_writer = gaflight_metadata_writer_get_raw(writer);
+  auto flight_metadata = garrow_buffer_get_raw(metadata);
+  return garrow::check(error,
+                       flight_writer->WriteMetadata(*flight_metadata),
+                       "[flight-metadata-writer][write]");
+}
+
 struct GAFlightServerCallContextPrivate
 {
   arrow::flight::ServerCallContext *call_context;
@@ -1034,6 +1129,34 @@ namespace gaflight {
       return arrow::Status::OK();
     }
 
+    arrow::Status
+    DoPut(const arrow::flight::ServerCallContext &context,
+          std::unique_ptr<arrow::flight::FlightMessageReader> reader,
+          std::unique_ptr<arrow::flight::FlightMetadataWriter> writer) override
+    {
+      auto gacontext = gaflight_server_call_context_new_raw(&context);
+      auto gareader = gaflight_message_reader_new_raw(reader.release(), TRUE);
+      auto gawriter = gaflight_metadata_writer_new_raw(writer.release());
+      GError *gerror = nullptr;
+      auto success =
+        gaflight_server_do_put(gaserver_, gacontext, gareader, gawriter, &gerror);
+      g_object_unref(gawriter);
+      g_object_unref(gareader);
+      g_object_unref(gacontext);
+      if (!success && !gerror) {
+        g_set_error(&gerror,
+                    GARROW_ERROR,
+                    GARROW_ERROR_UNKNOWN,
+                    "GAFlightServerClass::do_put() returns FALSE but error isn't set");
+      }
+      if (gerror) {
+        return garrow_error_to_status(gerror,
+                                      arrow::StatusCode::UnknownError,
+                                      "[flight-server][do-put]");
+      }
+      return arrow::Status::OK();
+    }
+
   private:
     GAFlightServer *gaserver_;
   };
@@ -1228,6 +1351,35 @@ gaflight_server_do_get(GAFlightServer *server,
   return (*(klass->do_get))(server, context, ticket, error);
 }
 
+/**
+ * gaflight_server_do_put:
+ * @server: A #GAFlightServer.
+ * @context: A #GAFlightServerCallContext.
+ * @reader: A #GAFlightMessageReader.
+ * @writer: A #GAFlightMetadataWriter.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Processes a stream of IPC payloads sent from a client.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gaflight_server_do_put(GAFlightServer *server,
+                       GAFlightServerCallContext *context,
+                       GAFlightMessageReader *reader,
+                       GAFlightMetadataWriter *writer,
+                       GError **error)
+{
+  auto klass = GAFLIGHT_SERVER_GET_CLASS(server);
+  if (!(klass && klass->do_put)) {
+    g_set_error(error, GARROW_ERROR, GARROW_ERROR_NOT_IMPLEMENTED, "not implemented");
+    return false;
+  }
+  return klass->do_put(server, context, reader, writer, error);
+}
+
 G_END_DECLS
 
 arrow::flight::FlightDataStream *
@@ -1257,6 +1409,20 @@ gaflight_message_reader_get_raw(GAFlightMessageReader *reader)
   return static_cast<arrow::flight::FlightMessageReader *>(flight_reader);
 }
 
+GAFlightMetadataWriter *
+gaflight_metadata_writer_new_raw(arrow::flight::FlightMetadataWriter *flight_writer)
+{
+  return GAFLIGHT_METADATA_WRITER(
+    g_object_new(GAFLIGHT_TYPE_METADATA_WRITER, "writer", flight_writer, nullptr));
+}
+
+arrow::flight::FlightMetadataWriter *
+gaflight_metadata_writer_get_raw(GAFlightMetadataWriter *writer)
+{
+  auto priv = GAFLIGHT_METADATA_WRITER_GET_PRIVATE(writer);
+  return priv->writer;
+}
+
 GAFlightServerCallContext *
 gaflight_server_call_context_new_raw(
   const arrow::flight::ServerCallContext *flight_call_context)
diff --git a/c_glib/arrow-flight-glib/server.h b/c_glib/arrow-flight-glib/server.h
index 7e594febb172f..e3a469098b32c 100644
--- a/c_glib/arrow-flight-glib/server.h
+++ b/c_glib/arrow-flight-glib/server.h
@@ -65,6 +65,21 @@ GAFLIGHT_AVAILABLE_IN_14_0
 GAFlightDescriptor *
 gaflight_message_reader_get_descriptor(GAFlightMessageReader *reader);
 
+#define GAFLIGHT_TYPE_METADATA_WRITER (gaflight_metadata_writer_get_type())
+GAFLIGHT_AVAILABLE_IN_18_0
+G_DECLARE_DERIVABLE_TYPE(
+  GAFlightMetadataWriter, gaflight_metadata_writer, GAFLIGHT, METADATA_WRITER, GObject)
+struct _GAFlightMetadataWriterClass
+{
+  GObjectClass parent_class;
+};
+
+GAFLIGHT_AVAILABLE_IN_18_0
+gboolean
+gaflight_metadata_writer_write(GAFlightMetadataWriter *writer,
+                               GArrowBuffer *metadata,
+                               GError **error);
+
 #define GAFLIGHT_TYPE_SERVER_CALL_CONTEXT (gaflight_server_call_context_get_type())
 GAFLIGHT_AVAILABLE_IN_5_0
 G_DECLARE_DERIVABLE_TYPE(GAFlightServerCallContext,
@@ -199,6 +214,7 @@ G_DECLARE_DERIVABLE_TYPE(GAFlightServer, gaflight_server, GAFLIGHT, SERVER, GObj
  * GAFlightServerClass:
  * @list_flights: A virtual function to implement `ListFlights` API.
  * @do_get: A virtual function to implement `DoGet` API.
+ * @do_put: A virtual function to implement `DoPut` API.
  *
  * Since: 5.0.0
  */
@@ -218,6 +234,11 @@ struct _GAFlightServerClass
                                 GAFlightServerCallContext *context,
                                 GAFlightTicket *ticket,
                                 GError **error);
+  gboolean (*do_put)(GAFlightServer *server,
+                     GAFlightServerCallContext *context,
+                     GAFlightMessageReader *reader,
+                     GAFlightMetadataWriter *writer,
+                     GError **error);
 };
 
 GAFLIGHT_AVAILABLE_IN_5_0
@@ -254,4 +275,12 @@ gaflight_server_do_get(GAFlightServer *server,
                        GAFlightTicket *ticket,
                        GError **error);
 
+GAFLIGHT_AVAILABLE_IN_18_0
+gboolean
+gaflight_server_do_put(GAFlightServer *server,
+                       GAFlightServerCallContext *context,
+                       GAFlightMessageReader *reader,
+                       GAFlightMetadataWriter *writer,
+                       GError **error);
+
 G_END_DECLS
diff --git a/c_glib/arrow-flight-glib/server.hpp b/c_glib/arrow-flight-glib/server.hpp
index ec4815751c8d8..f68eef83781ec 100644
--- a/c_glib/arrow-flight-glib/server.hpp
+++ b/c_glib/arrow-flight-glib/server.hpp
@@ -36,6 +36,14 @@ GAFLIGHT_EXTERN
 arrow::flight::FlightMessageReader *
 gaflight_message_reader_get_raw(GAFlightMessageReader *reader);
 
+GAFLIGHT_EXTERN
+GAFlightMetadataWriter *
+gaflight_metadata_writer_new_raw(arrow::flight::FlightMetadataWriter *flight_writer);
+
+GAFLIGHT_EXTERN
+arrow::flight::FlightMetadataWriter *
+gaflight_metadata_writer_get_raw(GAFlightMetadataWriter *writer);
+
 GAFLIGHT_EXTERN
 GAFlightServerCallContext *
 gaflight_server_call_context_new_raw(
diff --git a/c_glib/arrow-glib/writer.cpp b/c_glib/arrow-glib/writer.cpp
index b0321d51b3ba4..08af1c7976965 100644
--- a/c_glib/arrow-glib/writer.cpp
+++ b/c_glib/arrow-glib/writer.cpp
@@ -45,14 +45,14 @@ G_BEGIN_DECLS
  * batches in file format into output.
  */
 
-typedef struct GArrowRecordBatchWriterPrivate_
+struct GArrowRecordBatchWriterPrivate
 {
   std::shared_ptr<arrow::ipc::RecordBatchWriter> record_batch_writer;
-} GArrowRecordBatchWriterPrivate;
+  bool is_closed;
+};
 
 enum {
-  PROP_0,
-  PROP_RECORD_BATCH_WRITER
+  PROP_RECORD_BATCH_WRITER = 1,
 };
 
 G_DEFINE_TYPE_WITH_PRIVATE(GArrowRecordBatchWriter,
@@ -111,6 +111,7 @@ garrow_record_batch_writer_init(GArrowRecordBatchWriter *object)
 {
   auto priv = GARROW_RECORD_BATCH_WRITER_GET_PRIVATE(object);
   new (&priv->record_batch_writer) std::shared_ptr<arrow::ipc::RecordBatchWriter>;
+  priv->is_closed = false;
 }
 
 static void
@@ -193,7 +194,27 @@ garrow_record_batch_writer_close(GArrowRecordBatchWriter *writer, GError **error
   auto arrow_writer = garrow_record_batch_writer_get_raw(writer);
 
   auto status = arrow_writer->Close();
-  return garrow_error_check(error, status, "[record-batch-writer][close]");
+  auto success = garrow_error_check(error, status, "[record-batch-writer][close]");
+  if (success) {
+    auto priv = GARROW_RECORD_BATCH_WRITER_GET_PRIVATE(writer);
+    priv->is_closed = true;
+  }
+  return success;
+}
+
+/**
+ * garrow_record_batch_writer_is_closed:
+ * @writer: A #GArrowRecordBatchWriter.
+ *
+ * Returns: %TRUE if the writer is closed, %FALSE otherwise.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+garrow_record_batch_writer_is_closed(GArrowRecordBatchWriter *writer)
+{
+  auto priv = GARROW_RECORD_BATCH_WRITER_GET_PRIVATE(writer);
+  return priv->is_closed;
 }
 
 G_DEFINE_TYPE(GArrowRecordBatchStreamWriter,
diff --git a/c_glib/arrow-glib/writer.h b/c_glib/arrow-glib/writer.h
index 46bbdddec8c9d..cea8390d9028f 100644
--- a/c_glib/arrow-glib/writer.h
+++ b/c_glib/arrow-glib/writer.h
@@ -53,6 +53,10 @@ GARROW_AVAILABLE_IN_ALL
 gboolean
 garrow_record_batch_writer_close(GArrowRecordBatchWriter *writer, GError **error);
 
+GARROW_AVAILABLE_IN_18_0
+gboolean
+garrow_record_batch_writer_is_closed(GArrowRecordBatchWriter *writer);
+
 #define GARROW_TYPE_RECORD_BATCH_STREAM_WRITER                                           \
   (garrow_record_batch_stream_writer_get_type())
 GARROW_AVAILABLE_IN_ALL
diff --git a/c_glib/arrow-glib/writer.hpp b/c_glib/arrow-glib/writer.hpp
index aa87ffe77d79b..1d85ac52f88d1 100644
--- a/c_glib/arrow-glib/writer.hpp
+++ b/c_glib/arrow-glib/writer.hpp
@@ -25,16 +25,20 @@
 
 #include <arrow-glib/writer.h>
 
+GARROW_AVAILABLE_IN_ALL
 GArrowRecordBatchWriter *
 garrow_record_batch_writer_new_raw(
   std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer);
+GARROW_AVAILABLE_IN_ALL
 std::shared_ptr<arrow::ipc::RecordBatchWriter>
 garrow_record_batch_writer_get_raw(GArrowRecordBatchWriter *writer);
 
+GARROW_AVAILABLE_IN_ALL
 GArrowRecordBatchStreamWriter *
 garrow_record_batch_stream_writer_new_raw(
   std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer);
 
+GARROW_AVAILABLE_IN_ALL
 GArrowRecordBatchFileWriter *
 garrow_record_batch_file_writer_new_raw(
   std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer);
diff --git a/c_glib/parquet-glib/arrow-file-writer.cpp b/c_glib/parquet-glib/arrow-file-writer.cpp
index b6f019ed27d46..7a672f1f21dcc 100644
--- a/c_glib/parquet-glib/arrow-file-writer.cpp
+++ b/c_glib/parquet-glib/arrow-file-writer.cpp
@@ -316,14 +316,13 @@ gparquet_writer_properties_get_data_page_size(GParquetWriterProperties *properti
   return parquet_properties->data_pagesize();
 }
 
-typedef struct GParquetArrowFileWriterPrivate_
+struct GParquetArrowFileWriterPrivate
 {
   parquet::arrow::FileWriter *arrow_file_writer;
-} GParquetArrowFileWriterPrivate;
+};
 
 enum {
-  PROP_0,
-  PROP_ARROW_FILE_WRITER
+  PROP_ARROW_FILE_WRITER = 1,
 };
 
 G_DEFINE_TYPE_WITH_PRIVATE(GParquetArrowFileWriter,
@@ -496,6 +495,45 @@ gparquet_arrow_file_writer_new_path(GArrowSchema *schema,
   }
 }
 
+/**
+ * gparquet_arrow_file_writer_get_schema:
+ * @writer: A #GParquetArrowFileWriter.
+ *
+ * Returns: (transfer full): The schema to be written to.
+ *
+ * Since: 18.0.0
+ */
+GArrowSchema *
+gparquet_arrow_file_writer_get_schema(GParquetArrowFileWriter *writer)
+{
+  auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer);
+  auto arrow_schema = parquet_arrow_file_writer->schema();
+  return garrow_schema_new_raw(&arrow_schema);
+}
+
+/**
+ * gparquet_arrow_file_writer_write_record_batch:
+ * @writer: A #GParquetArrowFileWriter.
+ * @record_batch: A record batch to be written.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gparquet_arrow_file_writer_write_record_batch(GParquetArrowFileWriter *writer,
+                                              GArrowRecordBatch *record_batch,
+                                              GError **error)
+{
+  auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer);
+  auto arrow_record_batch = garrow_record_batch_get_raw(record_batch).get();
+  auto status = parquet_arrow_file_writer->WriteRecordBatch(*arrow_record_batch);
+  return garrow_error_check(error,
+                            status,
+                            "[parquet][arrow][file-writer][write-record-batch]");
+}
+
 /**
  * gparquet_arrow_file_writer_write_table:
  * @writer: A #GParquetArrowFileWriter.
@@ -510,13 +548,57 @@ gparquet_arrow_file_writer_new_path(GArrowSchema *schema,
 gboolean
 gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,
                                        GArrowTable *table,
-                                       guint64 chunk_size,
+                                       gsize chunk_size,
                                        GError **error)
 {
   auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer);
   auto arrow_table = garrow_table_get_raw(table).get();
-  auto status = parquet_arrow_file_writer->WriteTable(*arrow_table, chunk_size);
-  return garrow_error_check(error, status, "[parquet][arrow][file-writer][write-table]");
+  return garrow::check(error,
+                       parquet_arrow_file_writer->WriteTable(*arrow_table, chunk_size),
+                       "[parquet][arrow][file-writer][write-table]");
+}
+
+/**
+ * gparquet_arrow_file_writer_new_row_group:
+ * @writer: A #GParquetArrowFileWriter.
+ * @chunk_size: The max number of rows in a row group.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer,
+                                         gsize chunk_size,
+                                         GError **error)
+{
+  auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer);
+  return garrow::check(error,
+                       parquet_arrow_file_writer->NewRowGroup(chunk_size),
+                       "[parquet][arrow][file-writer][new-row-group]");
+}
+
+/**
+ * gparquet_arrow_file_writer_write_chunked_array:
+ * @writer: A #GParquetArrowFileWriter.
+ * @chunked_array: A #GArrowChunkedArray to be written.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gparquet_arrow_file_writer_write_chunked_array(GParquetArrowFileWriter *writer,
+                                               GArrowChunkedArray *chunked_array,
+                                               GError **error)
+{
+  auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer);
+  auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array);
+  return garrow::check(error,
+                       parquet_arrow_file_writer->WriteColumnChunk(arrow_chunked_array),
+                       "[parquet][arrow][file-writer][write-chunked-array]");
 }
 
 /**
diff --git a/c_glib/parquet-glib/arrow-file-writer.h b/c_glib/parquet-glib/arrow-file-writer.h
index 71cbfa195e842..40595bdfef4b9 100644
--- a/c_glib/parquet-glib/arrow-file-writer.h
+++ b/c_glib/parquet-glib/arrow-file-writer.h
@@ -116,13 +116,35 @@ gparquet_arrow_file_writer_new_path(GArrowSchema *schema,
                                     GParquetWriterProperties *writer_properties,
                                     GError **error);
 
+GPARQUET_AVAILABLE_IN_18_0
+GArrowSchema *
+gparquet_arrow_file_writer_get_schema(GParquetArrowFileWriter *writer);
+
+GPARQUET_AVAILABLE_IN_18_0
+gboolean
+gparquet_arrow_file_writer_write_record_batch(GParquetArrowFileWriter *writer,
+                                              GArrowRecordBatch *record_batch,
+                                              GError **error);
+
 GPARQUET_AVAILABLE_IN_0_11
 gboolean
 gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,
                                        GArrowTable *table,
-                                       guint64 chunk_size,
+                                       gsize chunk_size,
                                        GError **error);
 
+GPARQUET_AVAILABLE_IN_18_0
+gboolean
+gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer,
+                                         gsize chunk_size,
+                                         GError **error);
+
+GPARQUET_AVAILABLE_IN_18_0
+gboolean
+gparquet_arrow_file_writer_write_chunked_array(GParquetArrowFileWriter *writer,
+                                               GArrowChunkedArray *chunked_array,
+                                               GError **error);
+
 GPARQUET_AVAILABLE_IN_0_11
 gboolean
 gparquet_arrow_file_writer_close(GParquetArrowFileWriter *writer, GError **error);
diff --git a/c_glib/test/flight/test-client.rb b/c_glib/test/flight/test-client.rb
index 7eb093d3cab80..f1e3f31234ab4 100644
--- a/c_glib/test/flight/test-client.rb
+++ b/c_glib/test/flight/test-client.rb
@@ -84,4 +84,37 @@ def test_error
       end
     end
   end
+
+  sub_test_case("#do_put") do
+    def test_success
+      client = ArrowFlight::Client.new(@location)
+      generator = Helper::FlightInfoGenerator.new
+      descriptor = generator.page_view_descriptor
+      table = generator.page_view_table
+      result = client.do_put(descriptor, table.schema)
+      writer = result.writer
+      writer.write_table(table)
+      writer.done_writing
+      reader = result.reader
+      metadata = reader.read
+      writer.close
+      assert_equal(["done", table],
+                   [metadata.data.to_s, @server.uploaded_table])
+    end
+
+    def test_error
+      client = ArrowFlight::Client.new(@location)
+      generator = Helper::FlightInfoGenerator.new
+      descriptor = generator.page_view_descriptor
+      table = generator.page_view_table
+      result = client.do_put(descriptor, table.schema)
+      assert_raise(Arrow::Error::Invalid) do
+        writer = result.writer
+        writer.done_writing
+        reader = result.reader
+        reader.read
+        writer.close
+      end
+    end
+  end
 end
diff --git a/c_glib/test/helper/flight-server.rb b/c_glib/test/helper/flight-server.rb
index 8c47029d41791..80b8a5c96cf9f 100644
--- a/c_glib/test/helper/flight-server.rb
+++ b/c_glib/test/helper/flight-server.rb
@@ -34,6 +34,8 @@ def virtual_do_is_valid(context, token)
   class FlightServer < ArrowFlight::Server
     type_register
 
+    attr_reader :uploaded_table
+
     private
     def virtual_do_list_flights(context, criteria)
       generator = FlightInfoGenerator.new
@@ -54,5 +56,14 @@ def virtual_do_do_get(context, ticket)
       reader = Arrow::TableBatchReader.new(table)
       ArrowFlight::RecordBatchStream.new(reader)
     end
+
+    def virtual_do_do_put(context, reader, writer)
+      @uploaded_table = reader.read_all
+      writer.write(Arrow::Buffer.new("done"))
+      if @uploaded_table.n_rows.zero?
+        raise Arrow::Error::Invalid.new("empty table")
+      end
+      true
+    end
   end
 end
diff --git a/c_glib/test/parquet/test-arrow-file-writer.rb b/c_glib/test/parquet/test-arrow-file-writer.rb
index f899e7273b2a2..89db16c6fb90b 100644
--- a/c_glib/test/parquet/test-arrow-file-writer.rb
+++ b/c_glib/test/parquet/test-arrow-file-writer.rb
@@ -26,7 +26,39 @@ def setup
     end
   end
 
-  def test_write
+  def test_schema
+    schema = build_schema("enabled" => :boolean)
+    writer = Parquet::ArrowFileWriter.new(schema, @file.path)
+    assert_equal(schema, writer.schema)
+    writer.close
+  end
+
+  def test_write_record_batch
+    enabled_values = [true, nil, false, true]
+    record_batch =
+      build_record_batch("enabled" => build_boolean_array(enabled_values))
+
+    writer = Parquet::ArrowFileWriter.new(record_batch.schema, @file.path)
+    writer.write_record_batch(record_batch)
+    writer.close
+
+    reader = Parquet::ArrowFileReader.new(@file.path)
+    begin
+      reader.use_threads = true
+      assert_equal([
+                     1,
+                     Arrow::Table.new(record_batch.schema, [record_batch]),
+                   ],
+                   [
+                     reader.n_row_groups,
+                     reader.read_table,
+                   ])
+    ensure
+      reader.unref
+    end
+  end
+
+  def test_write_table
     enabled_values = [true, nil, false, true]
     table = build_table("enabled" => build_boolean_array(enabled_values))
     chunk_size = 2
@@ -40,11 +72,41 @@ def test_write
       reader.use_threads = true
       assert_equal([
                      enabled_values.length / chunk_size,
-                     true,
+                     table,
+                   ],
+                   [
+                     reader.n_row_groups,
+                     reader.read_table,
+                   ])
+    ensure
+      reader.unref
+    end
+  end
+
+  def test_write_chunked_array
+    schema = build_schema("enabled" => :boolean)
+    writer = Parquet::ArrowFileWriter.new(schema, @file.path)
+    writer.new_row_group(2)
+    chunked_array = Arrow::ChunkedArray.new([build_boolean_array([true, nil])])
+    writer.write_chunked_array(chunked_array)
+    writer.new_row_group(1)
+    chunked_array = Arrow::ChunkedArray.new([build_boolean_array([false])])
+    writer.write_chunked_array(chunked_array)
+    writer.close
+
+    reader = Parquet::ArrowFileReader.new(@file.path)
+    begin
+      reader.use_threads = true
+      assert_equal([
+                     2,
+                     build_table("enabled" => [
+                                   build_boolean_array([true, nil]),
+                                   build_boolean_array([false]),
+                                 ]),
                    ],
                    [
                      reader.n_row_groups,
-                     table.equal_metadata(reader.read_table, false),
+                     reader.read_table,
                    ])
     ensure
       reader.unref
diff --git a/c_glib/test/test-file-writer.rb b/c_glib/test/test-file-writer.rb
index 5f9c3c4e19aa9..06c9dfa25c7fc 100644
--- a/c_glib/test/test-file-writer.rb
+++ b/c_glib/test/test-file-writer.rb
@@ -34,6 +34,9 @@ def test_write_record_batch
         file_writer.write_record_batch(record_batch)
       ensure
         file_writer.close
+        assert do
+          file_writer.closed?
+        end
       end
     ensure
       output.close
@@ -68,6 +71,9 @@ def test_write_table
         file_writer.write_table(table)
       ensure
         file_writer.close
+        assert do
+          file_writer.closed?
+        end
       end
     ensure
       output.close
diff --git a/c_glib/test/test-stream-writer.rb b/c_glib/test/test-stream-writer.rb
index 32754e20838b4..261732ae91e15 100644
--- a/c_glib/test/test-stream-writer.rb
+++ b/c_glib/test/test-stream-writer.rb
@@ -35,6 +35,9 @@ def test_write_record_batch
         stream_writer.write_record_batch(record_batch)
       ensure
         stream_writer.close
+        assert do
+          stream_writer.closed?
+        end
       end
     ensure
       output.close
diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat
index f688fbb63a9ad..08a052e82f24d 100644
--- a/ci/appveyor-cpp-build.bat
+++ b/ci/appveyor-cpp-build.bat
@@ -46,7 +46,9 @@ set ARROW_CMAKE_ARGS=-DARROW_DEPENDENCY_SOURCE=CONDA -DARROW_WITH_BZ2=ON
 set ARROW_CXXFLAGS=/WX /MP
 
 @rem Install GCS testbench
+set PIPX_BIN_DIR=C:\Windows\
 call %CD%\ci\scripts\install_gcs_testbench.bat
+storage-testbench -h || exit /B
 
 @rem
 @rem Build and test Arrow C++ libraries (including Parquet)
diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile
index dff1f2224809a..f0084894e19dc 100644
--- a/ci/docker/conda-cpp.dockerfile
+++ b/ci/docker/conda-cpp.dockerfile
@@ -42,17 +42,19 @@ RUN mamba install -q -y \
         valgrind && \
     mamba clean --all
 
+# We want to install the GCS testbench using the Conda base environment's Python,
+# because the test environment's Python may later change.
+ENV PIPX_BASE_PYTHON=/opt/conda/bin/python3
+COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
+RUN /arrow/ci/scripts/install_gcs_testbench.sh default
+
 # Ensure npm, node and azurite are on path. npm and node are required to install azurite, which will then need to 
-# be on the path for the tests to run.  
+# be on the path for the tests to run.
 ENV PATH=/opt/conda/envs/arrow/bin:$PATH
 
 COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_azurite.sh
 
-# We want to install the GCS testbench using the same Python binary that the Conda code will use.
-COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
-RUN /arrow/ci/scripts/install_gcs_testbench.sh default
-
 COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin
 
diff --git a/ci/docker/conda-integration.dockerfile b/ci/docker/conda-integration.dockerfile
index c602490d6b729..7ad2e5c0e8008 100644
--- a/ci/docker/conda-integration.dockerfile
+++ b/ci/docker/conda-integration.dockerfile
@@ -24,7 +24,7 @@ ARG maven=3.8.7
 ARG node=16
 ARG yarn=1.22
 ARG jdk=11
-ARG go=1.21.8
+ARG go=1.22.6
 
 # Install Archery and integration dependencies
 COPY ci/conda_env_archery.txt /arrow/ci/
diff --git a/ci/docker/conda-python.dockerfile b/ci/docker/conda-python.dockerfile
index 027fd589cecca..7e8dbe76f6248 100644
--- a/ci/docker/conda-python.dockerfile
+++ b/ci/docker/conda-python.dockerfile
@@ -32,11 +32,6 @@ RUN mamba install -q -y \
         nomkl && \
     mamba clean --all
 
-# XXX The GCS testbench was already installed in conda-cpp.dockerfile,
-# but we changed the installed Python version above, so we need to reinstall it.
-COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
-RUN /arrow/ci/scripts/install_gcs_testbench.sh default
-
 ENV ARROW_ACERO=ON \
     ARROW_BUILD_STATIC=OFF \
     ARROW_BUILD_TESTS=OFF \
diff --git a/ci/docker/debian-12-go.dockerfile b/ci/docker/debian-12-go.dockerfile
index c958e6bdee211..4bc683c109eb8 100644
--- a/ci/docker/debian-12-go.dockerfile
+++ b/ci/docker/debian-12-go.dockerfile
@@ -16,8 +16,8 @@
 # under the License.
 
 ARG arch=amd64
-ARG go=1.21
-ARG staticcheck=v0.4.7
+ARG go=1.22
+ARG staticcheck=v0.5.1
 FROM ${arch}/golang:${go}-bookworm
 
 # FROM collects all the args, get back the staticcheck version arg
diff --git a/ci/docker/fedora-39-cpp.dockerfile b/ci/docker/fedora-39-cpp.dockerfile
index 33d11823094ce..2ac5afe7b91f6 100644
--- a/ci/docker/fedora-39-cpp.dockerfile
+++ b/ci/docker/fedora-39-cpp.dockerfile
@@ -34,6 +34,7 @@ RUN dnf update -y && \
         curl-devel \
         gcc \
         gcc-c++ \
+        gdb \
         gflags-devel \
         git \
         glog-devel \
diff --git a/ci/docker/linux-apt-python-313-freethreading.dockerfile b/ci/docker/linux-apt-python-313-freethreading.dockerfile
new file mode 100644
index 0000000000000..f5505e67f00bb
--- /dev/null
+++ b/ci/docker/linux-apt-python-313-freethreading.dockerfile
@@ -0,0 +1,59 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG base
+FROM ${base}
+
+RUN apt-get update -y -q && \
+    apt install -y -q --no-install-recommends software-properties-common gpg-agent && \
+    add-apt-repository -y ppa:deadsnakes/ppa && \
+    apt-get update -y -q && \
+    apt install -y -q --no-install-recommends python3.13-dev python3.13-nogil python3.13-venv && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists*
+
+COPY python/requirements-build.txt \
+     python/requirements-test.txt \
+     /arrow/python/
+
+ENV ARROW_PYTHON_VENV /arrow-dev
+RUN python3.13t -m venv ${ARROW_PYTHON_VENV}
+RUN ${ARROW_PYTHON_VENV}/bin/python -m pip install -U pip setuptools wheel
+RUN ${ARROW_PYTHON_VENV}/bin/python -m pip install \
+      --pre \
+      --prefer-binary \
+      --extra-index-url "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple" \
+      -r arrow/python/requirements-build.txt \
+      -r arrow/python/requirements-test.txt
+
+# We want to run the PyArrow test suite with the GIL disabled, but cffi
+# (more precisely, the `_cffi_backend` module) currently doesn't declare
+# itself safe to run without the GIL.
+# Therefore set PYTHON_GIL to 0.
+ENV ARROW_ACERO=ON \
+    ARROW_BUILD_STATIC=OFF \
+    ARROW_BUILD_TESTS=OFF \
+    ARROW_BUILD_UTILITIES=OFF \
+    ARROW_COMPUTE=ON \
+    ARROW_CSV=ON \
+    ARROW_DATASET=ON \
+    ARROW_FILESYSTEM=ON \
+    ARROW_GDB=ON \
+    ARROW_HDFS=ON \
+    ARROW_JSON=ON \
+    ARROW_USE_GLOG=OFF \
+    PYTHON_GIL=0
diff --git a/ci/docker/python-wheel-manylinux-test.dockerfile b/ci/docker/python-wheel-manylinux-test.dockerfile
index 443ff9c53cbcb..09883f9780a36 100644
--- a/ci/docker/python-wheel-manylinux-test.dockerfile
+++ b/ci/docker/python-wheel-manylinux-test.dockerfile
@@ -19,13 +19,19 @@ ARG arch
 ARG python_image_tag
 FROM ${arch}/python:${python_image_tag}
 
-# RUN pip install --upgrade pip
-
 # pandas doesn't provide wheel for aarch64 yet, so cache the compiled
 # test dependencies in a docker image
 COPY python/requirements-wheel-test.txt /arrow/python/
 RUN pip install -r /arrow/python/requirements-wheel-test.txt
 
+# Install the GCS testbench with the system Python
+RUN apt-get update -y -q && \
+    apt-get install -y -q \
+        build-essential \
+        python3-dev && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists*
+
 COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
-ARG python
-RUN PYTHON_VERSION=${python} /arrow/ci/scripts/install_gcs_testbench.sh default
+ENV PIPX_PYTHON=/usr/bin/python3 PIPX_PIP_ARGS=--prefer-binary
+RUN /arrow/ci/scripts/install_gcs_testbench.sh default
diff --git a/ci/docker/python-wheel-manylinux.dockerfile b/ci/docker/python-wheel-manylinux.dockerfile
index 42f088fd8a22a..5cc1711608c03 100644
--- a/ci/docker/python-wheel-manylinux.dockerfile
+++ b/ci/docker/python-wheel-manylinux.dockerfile
@@ -100,6 +100,9 @@ RUN vcpkg install \
         --x-feature=parquet \
         --x-feature=s3
 
+# Make sure auditwheel is up-to-date
+RUN pipx upgrade auditwheel
+
 # Configure Python for applications running in the bash shell of this Dockerfile
 ARG python=3.8
 ENV PYTHON_VERSION=${python}
diff --git a/ci/docker/python-wheel-windows-test-vs2019.dockerfile b/ci/docker/python-wheel-windows-test-vs2019.dockerfile
index 5f488a4c285ff..bffc1bd13d6b7 100644
--- a/ci/docker/python-wheel-windows-test-vs2019.dockerfile
+++ b/ci/docker/python-wheel-windows-test-vs2019.dockerfile
@@ -27,24 +27,38 @@ FROM abrarov/msvc-2019:2.11.0
 # Add unix tools to path
 RUN setx path "%path%;C:\Program Files\Git\usr\bin"
 
-# Remove previous installations of python from the base image
+# 1. Remove previous installations of python from the base image
 # NOTE: a more recent base image (tried with 2.12.1) comes with python 3.9.7
 # and the msi installers are failing to remove pip and tcl/tk "products" making
 # the subsequent choco python installation step failing for installing python
 # version 3.9.* due to existing python version
+# 2. Install Minio for S3 testing.
 RUN wmic product where "name like 'python%%'" call uninstall /nointeractive && \
-    rm -rf Python*
+    rm -rf Python* && \
+    curl https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2022-05-26T05-48-41Z \
+        --output "C:\Windows\Minio.exe"
+
+# Install the GCS testbench using a well-known Python version.
+# NOTE: cannot use pipx's `--fetch-missing-python` because of
+# https://github.com/pypa/pipx/issues/1521, therefore download Python ourselves.
+RUN choco install -r -y --pre --no-progress python --version=3.11.9
+ENV PIPX_BIN_DIR=C:\\Windows\\
+ENV PIPX_PYTHON="C:\Python311\python.exe"
+COPY ci/scripts/install_gcs_testbench.bat C:/arrow/ci/scripts/
+RUN call "C:\arrow\ci\scripts\install_gcs_testbench.bat" && \
+    storage-testbench -h
 
 # Define the full version number otherwise choco falls back to patch number 0 (3.8 => 3.8.0)
 ARG python=3.8
-RUN (if "%python%"=="3.8" setx PYTHON_VERSION "3.8.10" && setx PATH "%PATH%;C:\Python38;C:\Python38\Scripts") & \
-    (if "%python%"=="3.9" setx PYTHON_VERSION "3.9.13" && setx PATH "%PATH%;C:\Python39;C:\Python39\Scripts") & \
-    (if "%python%"=="3.10" setx PYTHON_VERSION "3.10.11" && setx PATH "%PATH%;C:\Python310;C:\Python310\Scripts") & \
-    (if "%python%"=="3.11" setx PYTHON_VERSION "3.11.9" && setx PATH "%PATH%;C:\Python311;C:\Python311\Scripts") & \
-    (if "%python%"=="3.12" setx PYTHON_VERSION "3.12.4" && setx PATH "%PATH%;C:\Python312;C:\Python312\Scripts") & \
-    (if "%python%"=="3.13" setx PYTHON_VERSION "3.13.0-rc1" && setx PATH "%PATH%;C:\Python313;C:\Python313\Scripts")
+RUN (if "%python%"=="3.8" setx PYTHON_VERSION "3.8.10") & \
+    (if "%python%"=="3.9" setx PYTHON_VERSION "3.9.13") & \
+    (if "%python%"=="3.10" setx PYTHON_VERSION "3.10.11") & \
+    (if "%python%"=="3.11" setx PYTHON_VERSION "3.11.9") & \
+    (if "%python%"=="3.12" setx PYTHON_VERSION "3.12.4") & \
+    (if "%python%"=="3.13" setx PYTHON_VERSION "3.13.0-rc1")
 
 # Install archiver to extract xz archives
-RUN choco install -r -y --pre --no-progress python --version=%PYTHON_VERSION% & \
-    python -m pip install --no-cache-dir -U pip setuptools & \
+RUN choco install -r -y --pre --no-progress --force python --version=%PYTHON_VERSION% && \
     choco install --no-progress -r -y archiver
+
+ENV PYTHON=$python
diff --git a/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile
index e17c0306f115d..1b342df596c9d 100644
--- a/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile
+++ b/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile
@@ -29,10 +29,12 @@ RUN apt-get update -y -q && \
         ccache \
         cmake \
         curl \
+        gdb \
         git \
         libssl-dev \
         libcurl4-openssl-dev \
         python3-pip \
+        python3-venv \
         tzdata \
         wget && \
     apt-get clean && \
diff --git a/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile
index 341d8a87e8661..ce31c457e909e 100644
--- a/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile
+++ b/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile
@@ -29,10 +29,12 @@ RUN apt-get update -y -q && \
         ccache \
         cmake \
         curl \
+        gdb \
         git \
         libssl-dev \
         libcurl4-openssl-dev \
         python3-pip \
+        python3-venv \
         tzdata \
         wget && \
     apt-get clean && \
diff --git a/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile
index a995ab2a8bc2d..a1fd178a2c754 100644
--- a/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile
+++ b/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile
@@ -29,10 +29,12 @@ RUN apt-get update -y -q && \
         ccache \
         cmake \
         curl \
+        gdb \
         git \
         libssl-dev \
         libcurl4-openssl-dev \
         python3-pip \
+        python3-venv \
         tzdata \
         tzdata-legacy \
         wget && \
diff --git a/ci/scripts/install_gcs_testbench.bat b/ci/scripts/install_gcs_testbench.bat
index b03d0c2ad6608..f54f98db7cac8 100644
--- a/ci/scripts/install_gcs_testbench.bat
+++ b/ci/scripts/install_gcs_testbench.bat
@@ -17,9 +17,18 @@
 
 @echo on
 
-set GCS_TESTBENCH_VERSION="v0.36.0"
+set GCS_TESTBENCH_VERSION="v0.40.0"
+
+set PIPX_FLAGS=--verbose
+if NOT "%PIPX_PYTHON%"=="" (
+  set PIPX_FLAGS=--python %PIPX_PYTHON% %PIPX_FLAGS%
+)
+
+python -m pip install -U pipx || exit /B 1
 
 @REM Install GCS testbench %GCS_TESTBENCH_VERSION%
-python -m pip install  ^
+pipx install %PIPX_FLAGS% ^
         "https://github.com/googleapis/storage-testbench/archive/%GCS_TESTBENCH_VERSION%.tar.gz" ^
         || exit /B 1
+
+pipx list --verbose
diff --git a/ci/scripts/install_gcs_testbench.sh b/ci/scripts/install_gcs_testbench.sh
index 5471b3cc238ca..48a5858a358c9 100755
--- a/ci/scripts/install_gcs_testbench.sh
+++ b/ci/scripts/install_gcs_testbench.sh
@@ -17,7 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -e
+set -ex
 
 if [ "$#" -ne 1 ]; then
   echo "Usage: $0 <storage-testbench version>"
@@ -34,19 +34,26 @@ case "$(uname -m)" in
     ;;
 esac
 
-# On newer pythons install into the system will fail, so override that
-export PIP_BREAK_SYSTEM_PACKAGES=1
-
 version=$1
 if [[ "${version}" -eq "default" ]]; then
   version="v0.39.0"
-  # Latests versions of Testbench require newer setuptools
-  python3 -m pip install --upgrade setuptools
 fi
 
-# This script is run with PYTHON undefined in some places,
-# but those only use older pythons.
-if [[ -z "${PYTHON_VERSION}" ]] || [[ "${PYTHON_VERSION}" != "3.13" ]]; then
-  python3 -m pip install \
-    "https://github.com/googleapis/storage-testbench/archive/${version}.tar.gz"
+# The Python to install pipx with
+: ${PIPX_BASE_PYTHON:=$(which python3)}
+# The Python to install the GCS testbench with
+: ${PIPX_PYTHON:=${PIPX_BASE_PYTHON:-$(which python3)}}
+
+export PIP_BREAK_SYSTEM_PACKAGES=1
+${PIPX_BASE_PYTHON} -m pip install -U pipx
+
+pipx_flags=(--verbose --python ${PIPX_PYTHON})
+if [[ $(id -un) == "root" ]]; then
+  # Install globally as /root/.local/bin is typically not in $PATH
+  pipx_flags+=(--global)
+fi
+if [[ -n "${PIPX_PIP_ARGS}" ]]; then
+  pipx_flags+=(--pip-args "'${PIPX_PIP_ARGS}'")
 fi
+${PIPX_BASE_PYTHON} -m pipx install ${pipx_flags[@]} \
+  "https://github.com/googleapis/storage-testbench/archive/${version}.tar.gz"
diff --git a/ci/scripts/integration_arrow.sh b/ci/scripts/integration_arrow.sh
index 2eb58e8dc75ec..3050ad3111198 100755
--- a/ci/scripts/integration_arrow.sh
+++ b/ci/scripts/integration_arrow.sh
@@ -17,7 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -ex
+set -e
 
 arrow_dir=${1}
 build_dir=${2}
@@ -30,8 +30,13 @@ gold_dir=$arrow_dir/testing/data/arrow-ipc-stream/integration
 : ${ARROW_INTEGRATION_JAVA:=ON}
 : ${ARROW_INTEGRATION_JS:=ON}
 
+. ${arrow_dir}/ci/scripts/util_log.sh
+
+github_actions_group_begin "Integration: Prepare: Archery"
 pip install -e $arrow_dir/dev/archery[integration]
+github_actions_group_end
 
+github_actions_group_begin "Integration: Prepare: Dependencies"
 # For C Data Interface testing
 if [ "${ARROW_INTEGRATION_CSHARP}" == "ON" ]; then
     pip install pythonnet
@@ -39,6 +44,7 @@ fi
 if [ "${ARROW_INTEGRATION_JAVA}" == "ON" ]; then
     pip install jpype1
 fi
+github_actions_group_end
 
 export ARROW_BUILD_ROOT=${build_dir}
 
diff --git a/ci/scripts/integration_arrow_build.sh b/ci/scripts/integration_arrow_build.sh
index 9b54049a2b803..8fca0d434b75e 100755
--- a/ci/scripts/integration_arrow_build.sh
+++ b/ci/scripts/integration_arrow_build.sh
@@ -17,7 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -ex
+set -e
 
 arrow_dir=${1}
 build_dir=${2}
@@ -28,22 +28,35 @@ build_dir=${2}
 : ${ARROW_INTEGRATION_JAVA:=ON}
 : ${ARROW_INTEGRATION_JS:=ON}
 
+. ${arrow_dir}/ci/scripts/util_log.sh
+
+github_actions_group_begin "Integration: Build: Rust"
 ${arrow_dir}/ci/scripts/rust_build.sh ${arrow_dir} ${build_dir}
+github_actions_group_end
 
+github_actions_group_begin "Integration: Build: nanoarrow"
 ${arrow_dir}/ci/scripts/nanoarrow_build.sh ${arrow_dir} ${build_dir}
+github_actions_group_end
 
+github_actions_group_begin "Integration: Build: C++"
 if [ "${ARROW_INTEGRATION_CPP}" == "ON" ]; then
     ${arrow_dir}/ci/scripts/cpp_build.sh ${arrow_dir} ${build_dir}
 fi
+github_actions_group_end
 
+github_actions_group_begin "Integration: Build: C#"
 if [ "${ARROW_INTEGRATION_CSHARP}" == "ON" ]; then
     ${arrow_dir}/ci/scripts/csharp_build.sh ${arrow_dir} ${build_dir}
 fi
+github_actions_group_end
 
+github_actions_group_begin "Integration: Build: Go"
 if [ "${ARROW_INTEGRATION_GO}" == "ON" ]; then
     ${arrow_dir}/ci/scripts/go_build.sh ${arrow_dir} ${build_dir}
 fi
+github_actions_group_end
 
+github_actions_group_begin "Integration: Build: Java"
 if [ "${ARROW_INTEGRATION_JAVA}" == "ON" ]; then
     export ARROW_JAVA_CDATA="ON"
     export JAVA_JNI_CMAKE_ARGS="-DARROW_JAVA_JNI_ENABLE_DEFAULT=OFF -DARROW_JAVA_JNI_ENABLE_C=ON"
@@ -51,7 +64,10 @@ if [ "${ARROW_INTEGRATION_JAVA}" == "ON" ]; then
     ${arrow_dir}/ci/scripts/java_jni_build.sh ${arrow_dir} ${ARROW_HOME} ${build_dir} /tmp/dist/java
     ${arrow_dir}/ci/scripts/java_build.sh ${arrow_dir} ${build_dir} /tmp/dist/java
 fi
+github_actions_group_end
 
+github_actions_group_begin "Integration: Build: JavaScript"
 if [ "${ARROW_INTEGRATION_JS}" == "ON" ]; then
     ${arrow_dir}/ci/scripts/js_build.sh ${arrow_dir} ${build_dir}
 fi
+github_actions_group_end
diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh
index d5430f26748eb..d2c392e6b9db3 100755
--- a/ci/scripts/python_wheel_macos_build.sh
+++ b/ci/scripts/python_wheel_macos_build.sh
@@ -34,7 +34,7 @@ rm -rf ${source_dir}/python/pyarrow/*.so.*
 
 echo "=== (${PYTHON_VERSION}) Set SDK, C++ and Wheel flags ==="
 export _PYTHON_HOST_PLATFORM="macosx-${MACOSX_DEPLOYMENT_TARGET}-${arch}"
-export MACOSX_DEPLOYMENT_TARGET=${MACOSX_DEPLOYMENT_TARGET:-10.15}
+export MACOSX_DEPLOYMENT_TARGET=${MACOSX_DEPLOYMENT_TARGET:-12.0}
 export SDKROOT=${SDKROOT:-$(xcrun --sdk macosx --show-sdk-path)}
 
 if [ $arch = "arm64" ]; then
@@ -150,7 +150,6 @@ echo "=== (${PYTHON_VERSION}) Building wheel ==="
 export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE}
 export PYARROW_BUNDLE_ARROW_CPP=1
 export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR}
-export PYARROW_INSTALL_TESTS=1
 export PYARROW_WITH_ACERO=${ARROW_ACERO}
 export PYARROW_WITH_AZURE=${ARROW_AZURE}
 export PYARROW_WITH_DATASET=${ARROW_DATASET}
diff --git a/ci/scripts/python_wheel_manylinux_build.sh b/ci/scripts/python_wheel_manylinux_build.sh
index aa86494a9d47d..885019ff3049f 100755
--- a/ci/scripts/python_wheel_manylinux_build.sh
+++ b/ci/scripts/python_wheel_manylinux_build.sh
@@ -140,7 +140,6 @@ echo "=== (${PYTHON_VERSION}) Building wheel ==="
 export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE}
 export PYARROW_BUNDLE_ARROW_CPP=1
 export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR}
-export PYARROW_INSTALL_TESTS=1
 export PYARROW_WITH_ACERO=${ARROW_ACERO}
 export PYARROW_WITH_AZURE=${ARROW_AZURE}
 export PYARROW_WITH_DATASET=${ARROW_DATASET}
@@ -181,5 +180,5 @@ popd
 rm -rf dist/temp-fix-wheel
 
 echo "=== (${PYTHON_VERSION}) Tag the wheel with manylinux${MANYLINUX_VERSION} ==="
-auditwheel repair -L . dist/pyarrow-*.whl -w repaired_wheels
+auditwheel repair dist/pyarrow-*.whl -w repaired_wheels
 popd
diff --git a/ci/scripts/python_wheel_unix_test.sh b/ci/scripts/python_wheel_unix_test.sh
index cf87a17056783..6bdc3d3621e14 100755
--- a/ci/scripts/python_wheel_unix_test.sh
+++ b/ci/scripts/python_wheel_unix_test.sh
@@ -34,6 +34,7 @@ source_dir=${1}
 : ${ARROW_S3:=ON}
 : ${ARROW_SUBSTRAIT:=ON}
 : ${CHECK_IMPORTS:=ON}
+: ${CHECK_WHEEL_CONTENT:=ON}
 : ${CHECK_UNITTESTS:=ON}
 : ${INSTALL_PYARROW:=ON}
 
@@ -87,6 +88,11 @@ import pyarrow.parquet
   fi
 fi
 
+if [ "${CHECK_WHEEL_CONTENT}" == "ON" ]; then
+  python ${source_dir}/ci/scripts/python_wheel_validate_contents.py \
+    --path ${source_dir}/python/repaired_wheels
+fi
+
 if [ "${CHECK_UNITTESTS}" == "ON" ]; then
   # Install testing dependencies
   pip install -U -r ${source_dir}/python/requirements-wheel-test.txt
diff --git a/ci/scripts/python_wheel_validate_contents.py b/ci/scripts/python_wheel_validate_contents.py
new file mode 100644
index 0000000000000..22b3a890f036b
--- /dev/null
+++ b/ci/scripts/python_wheel_validate_contents.py
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import argparse
+from pathlib import Path
+import re
+import zipfile
+
+
+def validate_wheel(path):
+    p = Path(path)
+    wheels = list(p.glob('*.whl'))
+    error_msg = f"{len(wheels)} wheels found but only 1 expected ({wheels})"
+    assert len(wheels) == 1, error_msg
+    f = zipfile.ZipFile(wheels[0])
+    outliers = [
+        info.filename for info in f.filelist if not re.match(
+            r'(pyarrow/|pyarrow-[-.\w\d]+\.dist-info/)', info.filename
+        )
+    ]
+    assert not outliers, f"Unexpected contents in wheel: {sorted(outliers)}"
+    print(f"The wheel: {wheels[0]} seems valid.")
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--path", type=str, required=True,
+                        help="Directory where wheel is located")
+    args = parser.parse_args()
+    validate_wheel(args.path)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat
index 54f02ec6f6ed0..1f1d5dca721d9 100644
--- a/ci/scripts/python_wheel_windows_build.bat
+++ b/ci/scripts/python_wheel_windows_build.bat
@@ -106,7 +106,6 @@ echo "=== (%PYTHON_VERSION%) Building wheel ==="
 set PYARROW_BUILD_TYPE=%CMAKE_BUILD_TYPE%
 set PYARROW_BUNDLE_ARROW_CPP=ON
 set PYARROW_CMAKE_GENERATOR=%CMAKE_GENERATOR%
-set PYARROW_INSTALL_TESTS=ON
 set PYARROW_WITH_ACERO=%ARROW_ACERO%
 set PYARROW_WITH_DATASET=%ARROW_DATASET%
 set PYARROW_WITH_FLIGHT=%ARROW_FLIGHT%
diff --git a/ci/scripts/python_wheel_windows_test.bat b/ci/scripts/python_wheel_windows_test.bat
index 87c0bb1252024..ae5b7e36ad7ab 100755
--- a/ci/scripts/python_wheel_windows_test.bat
+++ b/ci/scripts/python_wheel_windows_test.bat
@@ -28,7 +28,7 @@ set PYARROW_TEST_ORC=ON
 set PYARROW_TEST_PARQUET=ON
 set PYARROW_TEST_PARQUET_ENCRYPTION=ON
 set PYARROW_TEST_SUBSTRAIT=ON
-set PYARROW_TEST_S3=OFF
+set PYARROW_TEST_S3=ON
 set PYARROW_TEST_TENSORFLOW=ON
 
 @REM Enable again once https://github.com/scipy/oldest-supported-numpy/pull/27 gets merged
@@ -37,28 +37,35 @@ set PYARROW_TEST_TENSORFLOW=ON
 set ARROW_TEST_DATA=C:\arrow\testing\data
 set PARQUET_TEST_DATA=C:\arrow\cpp\submodules\parquet-testing\data
 
-@REM Install testing dependencies
-pip install -r C:\arrow\python\requirements-wheel-test.txt || exit /B 1
+@REM List installed Pythons
+py -0p
+
+set PYTHON_CMD=py -%PYTHON%
 
-@REM Install GCS testbench
-call "C:\arrow\ci\scripts\install_gcs_testbench.bat"
+%PYTHON_CMD% -m pip install -U pip setuptools || exit /B 1
+
+@REM Install testing dependencies
+%PYTHON_CMD% -m pip install -r C:\arrow\python\requirements-wheel-test.txt || exit /B 1
 
 @REM Install the built wheels
-python -m pip install --no-index --find-links=C:\arrow\python\dist\ pyarrow || exit /B 1 
+%PYTHON_CMD% -m pip install --no-index --find-links=C:\arrow\python\dist\ pyarrow || exit /B 1
 
 @REM Test that the modules are importable
-python -c "import pyarrow" || exit /B 1
-python -c "import pyarrow._gcsfs" || exit /B 1
-python -c "import pyarrow._hdfs" || exit /B 1 
-python -c "import pyarrow._s3fs" || exit /B 1
-python -c "import pyarrow.csv" || exit /B 1
-python -c "import pyarrow.dataset" || exit /B 1
-python -c "import pyarrow.flight" || exit /B 1
-python -c "import pyarrow.fs" || exit /B 1
-python -c "import pyarrow.json" || exit /B 1
-python -c "import pyarrow.orc" || exit /B 1
-python -c "import pyarrow.parquet" || exit /B 1
-python -c "import pyarrow.substrait" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow._gcsfs" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow._hdfs" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow._s3fs" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.csv" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.dataset" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.flight" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.fs" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.json" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.orc" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.parquet" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.substrait" || exit /B 1
+
+@REM Validate wheel contents
+%PYTHON_CMD% C:\arrow\ci\scripts\python_wheel_validate_contents.py --path C:\arrow\python\dist || exit /B 1
 
 @rem Download IANA Timezone Database for ORC C++
 curl https://cygwin.osuosl.org/noarch/release/tzdata/tzdata-2024a-1.tar.xz --output tzdata.tar.xz || exit /B
@@ -67,4 +74,4 @@ arc unarchive tzdata.tar.xz %USERPROFILE%\Downloads\test\tzdata
 set TZDIR=%USERPROFILE%\Downloads\test\tzdata\usr\share\zoneinfo
 
 @REM Execute unittest
-pytest -r s --pyargs pyarrow || exit /B 1
+%PYTHON_CMD% -m pytest -r s --pyargs pyarrow || exit /B 1
diff --git a/ci/scripts/util_enable_core_dumps.sh b/ci/scripts/util_enable_core_dumps.sh
new file mode 100644
index 0000000000000..09f8d2d727099
--- /dev/null
+++ b/ci/scripts/util_enable_core_dumps.sh
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: this script is not marked executable as it should be source'd
+# for `ulimit` to take effect.
+
+set -e
+
+platform=$(uname)
+
+if [ "${platform}" = "Linux" ]; then
+  # We need to override `core_pattern` because
+  # 1. the original setting may reference apport, which is not available under
+  #    most Docker containers;
+  # 2. we want to write the core file in a well-known directory.
+  sudo sysctl -w kernel.core_pattern="/tmp/core.%e.%p"
+fi
+
+ulimit -c unlimited
diff --git a/ci/scripts/util_log.sh b/ci/scripts/util_log.sh
new file mode 100644
index 0000000000000..b34c44059adb2
--- /dev/null
+++ b/ci/scripts/util_log.sh
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+github_actions_group_begin() {
+  echo "::group::$1"
+  set -x
+}
+
+github_actions_group_end() {
+  set +x
+  echo "::endgroup::"
+}
diff --git a/ci/vcpkg/arm64-osx-static-debug.cmake b/ci/vcpkg/arm64-osx-static-debug.cmake
index f511819a2edd9..32ae7bc433489 100644
--- a/ci/vcpkg/arm64-osx-static-debug.cmake
+++ b/ci/vcpkg/arm64-osx-static-debug.cmake
@@ -21,6 +21,6 @@ set(VCPKG_LIBRARY_LINKAGE static)
 
 set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
 set(VCPKG_OSX_ARCHITECTURES arm64)
-set(VCPKG_OSX_DEPLOYMENT_TARGET "11.0")
+set(VCPKG_OSX_DEPLOYMENT_TARGET "12.0")
 
 set(VCPKG_BUILD_TYPE debug)
diff --git a/ci/vcpkg/arm64-osx-static-release.cmake b/ci/vcpkg/arm64-osx-static-release.cmake
index 43d65efb2651b..dde46cd763afe 100644
--- a/ci/vcpkg/arm64-osx-static-release.cmake
+++ b/ci/vcpkg/arm64-osx-static-release.cmake
@@ -21,6 +21,6 @@ set(VCPKG_LIBRARY_LINKAGE static)
 
 set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
 set(VCPKG_OSX_ARCHITECTURES arm64)
-set(VCPKG_OSX_DEPLOYMENT_TARGET "11.0")
+set(VCPKG_OSX_DEPLOYMENT_TARGET "12.0")
 
 set(VCPKG_BUILD_TYPE release)
diff --git a/ci/vcpkg/universal2-osx-static-debug.cmake b/ci/vcpkg/universal2-osx-static-debug.cmake
index 8abc1ebf838f1..d3ef0d67eb719 100644
--- a/ci/vcpkg/universal2-osx-static-debug.cmake
+++ b/ci/vcpkg/universal2-osx-static-debug.cmake
@@ -21,6 +21,6 @@ set(VCPKG_LIBRARY_LINKAGE static)
 
 set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
 set(VCPKG_OSX_ARCHITECTURES "x86_64;arm64")
-set(VCPKG_OSX_DEPLOYMENT_TARGET "10.15")
+set(VCPKG_OSX_DEPLOYMENT_TARGET "12.0")
 
 set(VCPKG_BUILD_TYPE debug)
diff --git a/ci/vcpkg/universal2-osx-static-release.cmake b/ci/vcpkg/universal2-osx-static-release.cmake
index 2eb36c15175b2..3018aa93e5fbb 100644
--- a/ci/vcpkg/universal2-osx-static-release.cmake
+++ b/ci/vcpkg/universal2-osx-static-release.cmake
@@ -21,6 +21,6 @@ set(VCPKG_LIBRARY_LINKAGE static)
 
 set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
 set(VCPKG_OSX_ARCHITECTURES "x86_64;arm64")
-set(VCPKG_OSX_DEPLOYMENT_TARGET "10.15")
+set(VCPKG_OSX_DEPLOYMENT_TARGET "12.0")
 
 set(VCPKG_BUILD_TYPE release)
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 5ead9e4b063cd..423744c388471 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -301,7 +301,8 @@ add_custom_target(lint
                   --cpplint_binary
                   ${CPPLINT_BIN}
                   ${COMMON_LINT_OPTIONS}
-                  ${ARROW_LINT_QUIET})
+                  ${ARROW_LINT_QUIET}
+                  WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..)
 
 #
 # "make format" and "make check-format" targets
diff --git a/cpp/build-support/run-test.sh b/cpp/build-support/run-test.sh
index 8e42438a23c1c..55e3fe0980749 100755
--- a/cpp/build-support/run-test.sh
+++ b/cpp/build-support/run-test.sh
@@ -121,12 +121,15 @@ function print_coredumps() {
   # patterns must be set with prefix `core.{test-executable}*`:
   #
   # In case of macOS:
-  #   sudo sysctl -w kern.corefile=core.%N.%P
+  #   sudo sysctl -w kern.corefile=/tmp/core.%N.%P
   # On Linux:
-  #   sudo sysctl -w kernel.core_pattern=core.%e.%p
+  #   sudo sysctl -w kernel.core_pattern=/tmp/core.%e.%p
   #
   # and the ulimit must be increased:
   #   ulimit -c unlimited
+  #
+  # If the tests are run in a Docker container, the instructions are slightly
+  # different: see the 'Coredumps' comment section in `docker-compose.yml`.
 
   # filename is truncated to the first 15 characters in case of linux, so limit
   # the pattern for the first 15 characters
@@ -134,19 +137,21 @@ function print_coredumps() {
   FILENAME=$(echo ${FILENAME} | cut -c-15)
   PATTERN="^core\.${FILENAME}"
 
-  COREFILES=$(ls | grep $PATTERN)
+  COREFILES=$(ls /tmp | grep $PATTERN)
   if [ -n "$COREFILES" ]; then
-    echo "Found core dump, printing backtrace:"
-
     for COREFILE in $COREFILES; do
+      COREPATH="/tmp/${COREFILE}"
+      echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
+      echo "Running '${TEST_EXECUTABLE}' produced core dump at '${COREPATH}', printing backtrace:"
       # Print backtrace
       if [ "$(uname)" == "Darwin" ]; then
-        lldb -c "${COREFILE}" --batch --one-line "thread backtrace all -e true"
+        lldb -c "${COREPATH}" --batch --one-line "thread backtrace all -e true"
       else
-        gdb -c "${COREFILE}" $TEST_EXECUTABLE -ex "thread apply all bt" -ex "set pagination 0" -batch
+        gdb -c "${COREPATH}" $TEST_EXECUTABLE -ex "thread apply all bt" -ex "set pagination 0" -batch
       fi
-      # Remove the coredump, regenerate it via running the test case directly
-      rm "${COREFILE}"
+      echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
+      # Remove the coredump, it can be regenerated via running the test case directly
+      rm "${COREPATH}"
     done
   fi
 }
diff --git a/cpp/build-support/run_cpplint.py b/cpp/build-support/run_cpplint.py
index 76c0fe0aefaca..a81acf2eb2ff9 100755
--- a/cpp/build-support/run_cpplint.py
+++ b/cpp/build-support/run_cpplint.py
@@ -26,24 +26,6 @@
 from functools import partial
 
 
-# NOTE(wesm):
-#
-# * readability/casting is disabled as it aggressively warns about functions
-#   with names like "int32", so "int32(x)", where int32 is a function name,
-#   warns with
-_filters = '''
--whitespace/comments
--readability/casting
--readability/todo
--readability/alt_tokens
--build/header_guard
--build/c++11
--build/include_what_you_use
--runtime/references
--build/include_order
-'''.split()
-
-
 def _get_chunk_key(filenames):
     # lists are not hashable so key on the first filename in a chunk
     return filenames[0]
@@ -87,8 +69,6 @@ def _check_some_files(completed_processes, filenames):
     cmd = [
         arguments.cpplint_binary,
         '--verbose=2',
-        '--linelength=90',
-        '--filter=' + ','.join(_filters)
     ]
     if (arguments.cpplint_binary.endswith('.py') and
             platform.system() == 'Windows'):
diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake
index 41466a1c22404..755887314d110 100644
--- a/cpp/cmake_modules/DefineOptions.cmake
+++ b/cpp/cmake_modules/DefineOptions.cmake
@@ -303,7 +303,10 @@ takes precedence over ccache if a storage backend is configured" ON)
                 ARROW_IPC)
 
   define_option(ARROW_AZURE
-                "Build Arrow with Azure support (requires the Azure SDK for C++)" OFF)
+                "Build Arrow with Azure support (requires the Azure SDK for C++)"
+                OFF
+                DEPENDS
+                ARROW_FILESYSTEM)
 
   define_option(ARROW_BUILD_UTILITIES "Build Arrow commandline utilities" OFF)
 
@@ -346,9 +349,16 @@ takes precedence over ccache if a storage backend is configured" ON)
                 ARROW_WITH_UTF8PROC)
 
   define_option(ARROW_GCS
-                "Build Arrow with GCS support (requires the GCloud SDK for C++)" OFF)
+                "Build Arrow with GCS support (requires the GCloud SDK for C++)"
+                OFF
+                DEPENDS
+                ARROW_FILESYSTEM)
 
-  define_option(ARROW_HDFS "Build the Arrow HDFS bridge" OFF)
+  define_option(ARROW_HDFS
+                "Build the Arrow HDFS bridge"
+                OFF
+                DEPENDS
+                ARROW_FILESYSTEM)
 
   define_option(ARROW_IPC "Build the Arrow IPC extensions" ON)
 
@@ -398,7 +408,11 @@ takes precedence over ccache if a storage backend is configured" ON)
                 ARROW_HDFS
                 ARROW_JSON)
 
-  define_option(ARROW_S3 "Build Arrow with S3 support (requires the AWS SDK for C++)" OFF)
+  define_option(ARROW_S3
+                "Build Arrow with S3 support (requires the AWS SDK for C++)"
+                OFF
+                DEPENDS
+                ARROW_FILESYSTEM)
 
   define_option(ARROW_SKYHOOK
                 "Build the Skyhook libraries"
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 63e2c036c9a6f..b31037a973279 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -259,7 +259,7 @@ macro(resolve_dependency DEPENDENCY_NAME)
       IS_RUNTIME_DEPENDENCY
       REQUIRED_VERSION
       USE_CONFIG)
-  set(multi_value_args COMPONENTS PC_PACKAGE_NAMES)
+  set(multi_value_args COMPONENTS OPTIONAL_COMPONENTS PC_PACKAGE_NAMES)
   cmake_parse_arguments(ARG
                         "${options}"
                         "${one_value_args}"
@@ -287,6 +287,9 @@ macro(resolve_dependency DEPENDENCY_NAME)
   if(ARG_COMPONENTS)
     list(APPEND FIND_PACKAGE_ARGUMENTS COMPONENTS ${ARG_COMPONENTS})
   endif()
+  if(ARG_OPTIONAL_COMPONENTS)
+    list(APPEND FIND_PACKAGE_ARGUMENTS OPTIONAL_COMPONENTS ${ARG_OPTIONAL_COMPONENTS})
+  endif()
   if(${DEPENDENCY_NAME}_SOURCE STREQUAL "AUTO")
     find_package(${FIND_PACKAGE_ARGUMENTS})
     set(COMPATIBLE ${${PACKAGE_NAME}_FOUND})
@@ -1289,15 +1292,19 @@ if(ARROW_USE_BOOST)
     set(Boost_USE_STATIC_LIBS ON)
   endif()
   if(ARROW_BOOST_REQUIRE_LIBRARY)
-    set(ARROW_BOOST_COMPONENTS system filesystem)
+    set(ARROW_BOOST_COMPONENTS filesystem system)
+    set(ARROW_BOOST_OPTIONAL_COMPONENTS process)
   else()
     set(ARROW_BOOST_COMPONENTS)
+    set(ARROW_BOOST_OPTIONAL_COMPONENTS)
   endif()
   resolve_dependency(Boost
                      REQUIRED_VERSION
                      ${ARROW_BOOST_REQUIRED_VERSION}
                      COMPONENTS
                      ${ARROW_BOOST_COMPONENTS}
+                     OPTIONAL_COMPONENTS
+                     ${ARROW_BOOST_OPTIONAL_COMPONENTS}
                      IS_RUNTIME_DEPENDENCY
                      # libarrow.so doesn't depend on libboost*.
                      FALSE)
@@ -1316,14 +1323,35 @@ if(ARROW_USE_BOOST)
     endif()
   endforeach()
 
-  if(WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-    # boost/process/detail/windows/handle_workaround.hpp doesn't work
-    # without BOOST_USE_WINDOWS_H with MinGW because MinGW doesn't
-    # provide __kernel_entry without winternl.h.
-    #
-    # See also:
-    # https://github.com/boostorg/process/blob/develop/include/boost/process/detail/windows/handle_workaround.hpp
-    target_compile_definitions(Boost::headers INTERFACE "BOOST_USE_WINDOWS_H=1")
+  if(TARGET Boost::process)
+    # Boost >= 1.86
+    target_compile_definitions(Boost::process INTERFACE "BOOST_PROCESS_HAVE_V1")
+    target_compile_definitions(Boost::process INTERFACE "BOOST_PROCESS_HAVE_V2")
+  else()
+    # Boost < 1.86
+    add_library(Boost::process INTERFACE IMPORTED)
+    if(TARGET Boost::filesystem)
+      target_link_libraries(Boost::process INTERFACE Boost::filesystem)
+    endif()
+    if(TARGET Boost::system)
+      target_link_libraries(Boost::process INTERFACE Boost::system)
+    endif()
+    if(TARGET Boost::headers)
+      target_link_libraries(Boost::process INTERFACE Boost::headers)
+    endif()
+    if(Boost_VERSION VERSION_GREATER_EQUAL 1.80)
+      target_compile_definitions(Boost::process INTERFACE "BOOST_PROCESS_HAVE_V2")
+      # Boost < 1.86 has a bug that
+      # boost::process::v2::process_environment::on_setup() isn't
+      # defined. We need to build Boost Process source to define it.
+      #
+      # See also:
+      # https://github.com/boostorg/process/issues/312
+      target_compile_definitions(Boost::process INTERFACE "BOOST_PROCESS_NEED_SOURCE")
+      if(WIN32)
+        target_link_libraries(Boost::process INTERFACE bcrypt ntdll)
+      endif()
+    endif()
   endif()
 
   message(STATUS "Boost include dir: ${Boost_INCLUDE_DIRS}")
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 89f28ee416ede..e77a02d0c0800 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -373,8 +373,11 @@ set(ARROW_SRCS
     config.cc
     datum.cc
     device.cc
+    device_allocation_type_set.cc
     extension_type.cc
     extension/bool8.cc
+    extension/json.cc
+    extension/uuid.cc
     pretty_print.cc
     record_batch.cc
     result.cc
@@ -642,9 +645,13 @@ else()
 endif()
 
 set(ARROW_TESTING_SHARED_LINK_LIBS arrow_shared ${ARROW_GTEST_GTEST})
-set(ARROW_TESTING_SHARED_PRIVATE_LINK_LIBS arrow::flatbuffers RapidJSON)
-set(ARROW_TESTING_STATIC_LINK_LIBS arrow::flatbuffers RapidJSON arrow_static
-                                   ${ARROW_GTEST_GTEST})
+set(ARROW_TESTING_SHARED_PRIVATE_LINK_LIBS arrow::flatbuffers RapidJSON Boost::process)
+set(ARROW_TESTING_STATIC_LINK_LIBS
+    arrow::flatbuffers
+    RapidJSON
+    Boost::process
+    arrow_static
+    ${ARROW_GTEST_GTEST})
 set(ARROW_TESTING_SHARED_INSTALL_INTERFACE_LIBS Arrow::arrow_shared)
 set(ARROW_TESTING_STATIC_INSTALL_INTERFACE_LIBS Arrow::arrow_static)
 # that depend on gtest
@@ -665,9 +672,10 @@ set(ARROW_TESTING_SRCS
     io/test_common.cc
     ipc/test_common.cc
     testing/fixed_width_test_util.cc
+    testing/generator.cc
     testing/gtest_util.cc
+    testing/process.cc
     testing/random.cc
-    testing/generator.cc
     testing/util.cc)
 
 #
@@ -1225,6 +1233,7 @@ add_subdirectory(testing)
 add_subdirectory(array)
 add_subdirectory(c)
 add_subdirectory(compute)
+add_subdirectory(extension)
 add_subdirectory(io)
 add_subdirectory(tensor)
 add_subdirectory(util)
@@ -1267,7 +1276,6 @@ endif()
 
 if(ARROW_JSON)
   add_subdirectory(json)
-  add_subdirectory(extension)
 endif()
 
 if(ARROW_ORC)
diff --git a/cpp/src/arrow/acero/aggregate_benchmark.cc b/cpp/src/arrow/acero/aggregate_benchmark.cc
index 854862e3e48ca..c0dfba66336af 100644
--- a/cpp/src/arrow/acero/aggregate_benchmark.cc
+++ b/cpp/src/arrow/acero/aggregate_benchmark.cc
@@ -165,11 +165,11 @@ struct SumSentinelUnrolled : public Summer<T> {
   static void Sum(const ArrayType& array, SumState<T>* state) {
     SumState<T> local;
 
-#define SUM_NOT_NULL(ITEM)                                                  \
-  do {                                                                      \
-    local.total += values[i + ITEM] * Traits<T>::NotNull(values[i + ITEM]); \
-    local.valid_count++;                                                    \
-  } while (0)
+#  define SUM_NOT_NULL(ITEM)                                                  \
+    do {                                                                      \
+      local.total += values[i + ITEM] * Traits<T>::NotNull(values[i + ITEM]); \
+      local.valid_count++;                                                    \
+    } while (0)
 
     const auto values = array.raw_values();
     const auto length = array.length();
@@ -185,7 +185,7 @@ struct SumSentinelUnrolled : public Summer<T> {
       SUM_NOT_NULL(7);
     }
 
-#undef SUM_NOT_NULL
+#  undef SUM_NOT_NULL
 
     for (int64_t i = length_rounded * 8; i < length; ++i) {
       local.total += values[i] * Traits<T>::NotNull(values[i]);
@@ -256,7 +256,7 @@ struct SumBitmapVectorizeUnroll : public Summer<T> {
     for (int64_t i = 0; i < length_rounded; i += 8) {
       const uint8_t valid_byte = bitmap[i / 8];
 
-#define SUM_SHIFT(ITEM) (values[i + ITEM] * ((valid_byte >> ITEM) & 1))
+#  define SUM_SHIFT(ITEM) (values[i + ITEM] * ((valid_byte >> ITEM) & 1))
 
       if (valid_byte < 0xFF) {
         // Some nulls
@@ -277,7 +277,7 @@ struct SumBitmapVectorizeUnroll : public Summer<T> {
       }
     }
 
-#undef SUM_SHIFT
+#  undef SUM_SHIFT
 
     for (int64_t i = length_rounded; i < length; ++i) {
       if (bit_util::GetBit(bitmap, i)) {
diff --git a/cpp/src/arrow/acero/aggregate_node_test.cc b/cpp/src/arrow/acero/aggregate_node_test.cc
index d398fb24b73d5..c623271db9fb4 100644
--- a/cpp/src/arrow/acero/aggregate_node_test.cc
+++ b/cpp/src/arrow/acero/aggregate_node_test.cc
@@ -210,5 +210,57 @@ TEST(GroupByNode, NoSkipNulls) {
   AssertExecBatchesEqualIgnoringOrder(out_schema, {expected_batch}, out_batches.batches);
 }
 
+TEST(ScalarAggregateNode, AnyAll) {
+  // GH-43768: boolean_any and boolean_all with constant input should work well
+  // when min_count != 0.
+  std::shared_ptr<Schema> in_schema = schema({field("not_used", int32())});
+  std::shared_ptr<Schema> out_schema = schema({field("agg_out", boolean())});
+  struct AnyAllCase {
+    std::string batches_json;
+    Expression literal;
+    std::string expected_json;
+    bool skip_nulls = false;
+    uint32_t min_count = 2;
+  };
+  std::vector<AnyAllCase> cases{
+      {"[[42], [42], [42], [42]]", literal(true), "[[true]]"},
+      {"[[42], [42], [42], [42]]", literal(false), "[[false]]"},
+      {"[[42], [42], [42], [42]]", literal(BooleanScalar{}), "[[null]]"},
+      {"[[42]]", literal(true), "[[null]]"},
+      {"[[42], [42], [42]]", literal(true), "[[true]]"},
+      {"[[42], [42], [42]]", literal(true), "[[null]]", /*skip_nulls=*/false,
+       /*min_count=*/4},
+      {"[[42], [42], [42], [42]]", literal(BooleanScalar{}), "[[null]]",
+       /*skip_nulls=*/true},
+  };
+  for (const AnyAllCase& any_all_case : cases) {
+    for (auto func_name : {"any", "all"}) {
+      std::vector<ExecBatch> batches{
+          ExecBatchFromJSON({int32()}, any_all_case.batches_json)};
+      std::vector<Aggregate> aggregates = {
+          Aggregate(func_name,
+                    std::make_shared<compute::ScalarAggregateOptions>(
+                        /*skip_nulls=*/any_all_case.skip_nulls,
+                        /*min_count=*/any_all_case.min_count),
+                    FieldRef("literal"))};
+
+      // And a projection to make the input including a Scalar Boolean
+      Declaration plan = Declaration::Sequence(
+          {{"exec_batch_source", ExecBatchSourceNodeOptions(in_schema, batches)},
+           {"project", ProjectNodeOptions({any_all_case.literal}, {"literal"})},
+           {"aggregate", AggregateNodeOptions(aggregates)}});
+
+      ASSERT_OK_AND_ASSIGN(BatchesWithCommonSchema out_batches,
+                           DeclarationToExecBatches(plan));
+
+      ExecBatch expected_batch =
+          ExecBatchFromJSON({boolean()}, any_all_case.expected_json);
+
+      AssertExecBatchesEqualIgnoringOrder(out_schema, {expected_batch},
+                                          out_batches.batches);
+    }
+  }
+}
+
 }  // namespace acero
 }  // namespace arrow
diff --git a/cpp/src/arrow/acero/asof_join_node.cc b/cpp/src/arrow/acero/asof_join_node.cc
index 2248362241cd7..c4f11d01f3d5c 100644
--- a/cpp/src/arrow/acero/asof_join_node.cc
+++ b/cpp/src/arrow/acero/asof_join_node.cc
@@ -34,7 +34,7 @@
 #include "arrow/acero/options.h"
 #include "arrow/acero/unmaterialized_table_internal.h"
 #ifndef NDEBUG
-#include "arrow/acero/options_internal.h"
+#  include "arrow/acero/options_internal.h"
 #endif
 #include "arrow/acero/query_context.h"
 #include "arrow/acero/schema_util.h"
@@ -42,7 +42,7 @@
 #include "arrow/array/builder_binary.h"
 #include "arrow/array/builder_primitive.h"
 #ifndef NDEBUG
-#include "arrow/compute/function_internal.h"
+#  include "arrow/compute/function_internal.h"
 #endif
 #include "arrow/acero/time_series_util.h"
 #include "arrow/compute/key_hash_internal.h"
@@ -207,16 +207,16 @@ class DebugSync {
   std::unique_lock<std::mutex> debug_lock_;
 };
 
-#define DEBUG_SYNC(node, ...) DebugSync(node).insert(__VA_ARGS__)
-#define DEBUG_MANIP(manip) \
-  DebugSync::Manip([](DebugSync& d) -> DebugSync& { return d << manip; })
-#define NDEBUG_EXPLICIT
-#define DEBUG_ADD(ndebug, ...) ndebug, __VA_ARGS__
+#  define DEBUG_SYNC(node, ...) DebugSync(node).insert(__VA_ARGS__)
+#  define DEBUG_MANIP(manip) \
+    DebugSync::Manip([](DebugSync& d) -> DebugSync& { return d << manip; })
+#  define NDEBUG_EXPLICIT
+#  define DEBUG_ADD(ndebug, ...) ndebug, __VA_ARGS__
 #else
-#define DEBUG_SYNC(...)
-#define DEBUG_MANIP(...)
-#define NDEBUG_EXPLICIT explicit
-#define DEBUG_ADD(ndebug, ...) ndebug
+#  define DEBUG_SYNC(...)
+#  define DEBUG_MANIP(...)
+#  define NDEBUG_EXPLICIT explicit
+#  define DEBUG_ADD(ndebug, ...) ndebug
 #endif
 
 struct MemoStore {
diff --git a/cpp/src/arrow/acero/asof_join_node_test.cc b/cpp/src/arrow/acero/asof_join_node_test.cc
index 555f580028fac..5d3e9fba08bbf 100644
--- a/cpp/src/arrow/acero/asof_join_node_test.cc
+++ b/cpp/src/arrow/acero/asof_join_node_test.cc
@@ -26,13 +26,13 @@
 #include "arrow/acero/exec_plan.h"
 #include "arrow/testing/future_util.h"
 #ifndef NDEBUG
-#include <sstream>
+#  include <sstream>
 #endif
 #include <unordered_set>
 
 #include "arrow/acero/options.h"
 #ifndef NDEBUG
-#include "arrow/acero/options_internal.h"
+#  include "arrow/acero/options_internal.h"
 #endif
 #include "arrow/acero/map_node.h"
 #include "arrow/acero/query_context.h"
diff --git a/cpp/src/arrow/acero/bloom_filter.h b/cpp/src/arrow/acero/bloom_filter.h
index 50d07bfd948e0..530beaea64827 100644
--- a/cpp/src/arrow/acero/bloom_filter.h
+++ b/cpp/src/arrow/acero/bloom_filter.h
@@ -18,7 +18,7 @@
 #pragma once
 
 #if defined(ARROW_HAVE_RUNTIME_AVX2)
-#include <immintrin.h>
+#  include <immintrin.h>
 #endif
 
 #include <atomic>
diff --git a/cpp/src/arrow/acero/bloom_filter_test.cc b/cpp/src/arrow/acero/bloom_filter_test.cc
index a2d6e9575a1aa..30cafd120caea 100644
--- a/cpp/src/arrow/acero/bloom_filter_test.cc
+++ b/cpp/src/arrow/acero/bloom_filter_test.cc
@@ -503,9 +503,9 @@ TEST(BloomFilter, Scaling) {
   num_build.push_back(4000000);
 
   std::vector<BloomFilterBuildStrategy> strategies;
-#ifdef ARROW_ENABLE_THREADING
+#  ifdef ARROW_ENABLE_THREADING
   strategies.push_back(BloomFilterBuildStrategy::PARALLEL);
-#endif
+#  endif
   strategies.push_back(BloomFilterBuildStrategy::SINGLE_THREADED);
 
   for (const auto hardware_flags : HardwareFlagsForTesting()) {
diff --git a/cpp/src/arrow/acero/groupby_aggregate_node.cc b/cpp/src/arrow/acero/groupby_aggregate_node.cc
index 723c8b7377e13..06b034ab2d459 100644
--- a/cpp/src/arrow/acero/groupby_aggregate_node.cc
+++ b/cpp/src/arrow/acero/groupby_aggregate_node.cc
@@ -369,13 +369,14 @@ Status GroupByNode::InputReceived(ExecNode* input, ExecBatch batch) {
   DCHECK_EQ(input, inputs_[0]);
 
   auto handler = [this](const ExecBatch& full_batch, const Segment& segment) {
-    if (!segment.extends && segment.offset == 0) RETURN_NOT_OK(OutputResult(false));
+    if (!segment.extends && segment.offset == 0)
+      RETURN_NOT_OK(OutputResult(/*is_last=*/false));
     auto exec_batch = full_batch.Slice(segment.offset, segment.length);
     auto batch = ExecSpan(exec_batch);
     RETURN_NOT_OK(Consume(batch));
     RETURN_NOT_OK(
         ExtractSegmenterValues(&segmenter_values_, exec_batch, segment_key_field_ids_));
-    if (!segment.is_open) RETURN_NOT_OK(OutputResult(false));
+    if (!segment.is_open) RETURN_NOT_OK(OutputResult(/*is_last=*/false));
     return Status::OK();
   };
   ARROW_RETURN_NOT_OK(
diff --git a/cpp/src/arrow/acero/hash_join_benchmark.cc b/cpp/src/arrow/acero/hash_join_benchmark.cc
index 470960b1c5062..e3e37e249e6a3 100644
--- a/cpp/src/arrow/acero/hash_join_benchmark.cc
+++ b/cpp/src/arrow/acero/hash_join_benchmark.cc
@@ -104,7 +104,7 @@ class JoinBenchmark {
       key_cmp.push_back(JoinKeyCmp::EQ);
     }
 
-    for (size_t i = 0; i < settings.build_payload_types.size(); i++) {
+    for (size_t i = 0; i < settings.probe_payload_types.size(); i++) {
       std::string name = "lp" + std::to_string(i);
       DCHECK_OK(l_schema_builder.AddField(field(name, settings.probe_payload_types[i])));
     }
@@ -279,7 +279,7 @@ static void BM_HashJoinBasic_MatchesPerRow(benchmark::State& st) {
   settings.cardinality = 1.0 / static_cast<double>(st.range(0));
 
   settings.num_build_batches = static_cast<int>(st.range(1));
-  settings.num_probe_batches = settings.num_probe_batches;
+  settings.num_probe_batches = settings.num_build_batches;
 
   HashJoinBasicBenchmarkImpl(st, settings);
 }
@@ -291,7 +291,7 @@ static void BM_HashJoinBasic_PayloadSize(benchmark::State& st) {
   settings.cardinality = 1.0 / static_cast<double>(st.range(1));
 
   settings.num_build_batches = static_cast<int>(st.range(2));
-  settings.num_probe_batches = settings.num_probe_batches;
+  settings.num_probe_batches = settings.num_build_batches;
 
   HashJoinBasicBenchmarkImpl(st, settings);
 }
diff --git a/cpp/src/arrow/acero/hash_join_dict.cc b/cpp/src/arrow/acero/hash_join_dict.cc
index 3aef08e6e9ccf..8db9dddb2c3a0 100644
--- a/cpp/src/arrow/acero/hash_join_dict.cc
+++ b/cpp/src/arrow/acero/hash_join_dict.cc
@@ -225,21 +225,20 @@ Status HashJoinDictBuild::Init(ExecContext* ctx, std::shared_ptr<Array> dictiona
     return Status::OK();
   }
 
-  dictionary_ = dictionary;
+  dictionary_ = std::move(dictionary);
 
   // Initialize encoder
   RowEncoder encoder;
-  std::vector<TypeHolder> encoder_types;
-  encoder_types.emplace_back(value_type_);
+  std::vector<TypeHolder> encoder_types{value_type_};
   encoder.Init(encoder_types, ctx);
 
   // Encode all dictionary values
-  int64_t length = dictionary->data()->length;
+  int64_t length = dictionary_->data()->length;
   if (length >= std::numeric_limits<int32_t>::max()) {
     return Status::Invalid(
         "Dictionary length in hash join must fit into signed 32-bit integer.");
   }
-  RETURN_NOT_OK(encoder.EncodeAndAppend(ExecSpan({*dictionary->data()}, length)));
+  RETURN_NOT_OK(encoder.EncodeAndAppend(ExecSpan({*dictionary_->data()}, length)));
 
   std::vector<int32_t> entries_to_take;
 
diff --git a/cpp/src/arrow/acero/hash_join_node.cc b/cpp/src/arrow/acero/hash_join_node.cc
index 67f902e64be93..80dd163ced740 100644
--- a/cpp/src/arrow/acero/hash_join_node.cc
+++ b/cpp/src/arrow/acero/hash_join_node.cc
@@ -61,30 +61,30 @@ Result<std::vector<FieldRef>> HashJoinSchema::ComputePayload(
     const std::vector<FieldRef>& filter, const std::vector<FieldRef>& keys) {
   // payload = (output + filter) - keys, with no duplicates
   std::unordered_set<int> payload_fields;
-  for (auto ref : output) {
+  for (const auto& ref : output) {
     ARROW_ASSIGN_OR_RAISE(auto match, ref.FindOne(schema));
     payload_fields.insert(match[0]);
   }
 
-  for (auto ref : filter) {
+  for (const auto& ref : filter) {
     ARROW_ASSIGN_OR_RAISE(auto match, ref.FindOne(schema));
     payload_fields.insert(match[0]);
   }
 
-  for (auto ref : keys) {
+  for (const auto& ref : keys) {
     ARROW_ASSIGN_OR_RAISE(auto match, ref.FindOne(schema));
     payload_fields.erase(match[0]);
   }
 
   std::vector<FieldRef> payload_refs;
-  for (auto ref : output) {
+  for (const auto& ref : output) {
     ARROW_ASSIGN_OR_RAISE(auto match, ref.FindOne(schema));
     if (payload_fields.find(match[0]) != payload_fields.end()) {
       payload_refs.push_back(ref);
       payload_fields.erase(match[0]);
     }
   }
-  for (auto ref : filter) {
+  for (const auto& ref : filter) {
     ARROW_ASSIGN_OR_RAISE(auto match, ref.FindOne(schema));
     if (payload_fields.find(match[0]) != payload_fields.end()) {
       payload_refs.push_back(ref);
@@ -198,7 +198,7 @@ Status HashJoinSchema::ValidateSchemas(JoinType join_type, const Schema& left_sc
     return Status::Invalid("Different number of key fields on left (", left_keys.size(),
                            ") and right (", right_keys.size(), ") side of the join");
   }
-  if (left_keys.size() < 1) {
+  if (left_keys.empty()) {
     return Status::Invalid("Join key cannot be empty");
   }
   for (size_t i = 0; i < left_keys.size() + right_keys.size(); ++i) {
@@ -432,7 +432,7 @@ Status HashJoinSchema::CollectFilterColumns(std::vector<FieldRef>& left_filter,
         indices[0] -= left_schema.num_fields();
         FieldPath corrected_path(std::move(indices));
         if (right_seen_paths.find(*path) == right_seen_paths.end()) {
-          right_filter.push_back(corrected_path);
+          right_filter.emplace_back(corrected_path);
           right_seen_paths.emplace(std::move(corrected_path));
         }
       } else if (left_seen_paths.find(*path) == left_seen_paths.end()) {
@@ -698,7 +698,7 @@ class HashJoinNode : public ExecNode, public TracedNode {
                std::shared_ptr<Schema> output_schema,
                std::unique_ptr<HashJoinSchema> schema_mgr, Expression filter,
                std::unique_ptr<HashJoinImpl> impl)
-      : ExecNode(plan, inputs, {"left", "right"},
+      : ExecNode(plan, std::move(inputs), {"left", "right"},
                  /*output_schema=*/std::move(output_schema)),
         TracedNode(this),
         join_type_(join_options.join_type),
diff --git a/cpp/src/arrow/acero/hash_join_node.h b/cpp/src/arrow/acero/hash_join_node.h
index ad60019ceabc4..19745b8675cf0 100644
--- a/cpp/src/arrow/acero/hash_join_node.h
+++ b/cpp/src/arrow/acero/hash_join_node.h
@@ -65,9 +65,9 @@ class ARROW_ACERO_EXPORT HashJoinSchema {
   std::shared_ptr<Schema> MakeOutputSchema(const std::string& left_field_name_suffix,
                                            const std::string& right_field_name_suffix);
 
-  bool LeftPayloadIsEmpty() { return PayloadIsEmpty(0); }
+  bool LeftPayloadIsEmpty() const { return PayloadIsEmpty(0); }
 
-  bool RightPayloadIsEmpty() { return PayloadIsEmpty(1); }
+  bool RightPayloadIsEmpty() const { return PayloadIsEmpty(1); }
 
   static int kMissingField() {
     return SchemaProjectionMaps<HashJoinProjection>::kMissingField;
@@ -88,7 +88,7 @@ class ARROW_ACERO_EXPORT HashJoinSchema {
                                             const SchemaProjectionMap& right_to_filter,
                                             const Expression& filter);
 
-  bool PayloadIsEmpty(int side) {
+  bool PayloadIsEmpty(int side) const {
     assert(side == 0 || side == 1);
     return proj_maps[side].num_cols(HashJoinProjection::PAYLOAD) == 0;
   }
diff --git a/cpp/src/arrow/acero/hash_join_node_test.cc b/cpp/src/arrow/acero/hash_join_node_test.cc
index 9065e286a2228..76ad9c7d650eb 100644
--- a/cpp/src/arrow/acero/hash_join_node_test.cc
+++ b/cpp/src/arrow/acero/hash_join_node_test.cc
@@ -29,6 +29,7 @@
 #include "arrow/compute/kernels/test_util.h"
 #include "arrow/compute/light_array_internal.h"
 #include "arrow/compute/row/row_encoder_internal.h"
+#include "arrow/extension/uuid.h"
 #include "arrow/testing/extension_type.h"
 #include "arrow/testing/generator.h"
 #include "arrow/testing/gtest_util.h"
diff --git a/cpp/src/arrow/acero/options_internal.h b/cpp/src/arrow/acero/options_internal.h
index d4bf79a7cd008..fd3ea78116572 100644
--- a/cpp/src/arrow/acero/options_internal.h
+++ b/cpp/src/arrow/acero/options_internal.h
@@ -18,8 +18,8 @@
 #pragma once
 
 #ifndef NDEBUG
-#include <mutex>
-#include <ostream>
+#  include <mutex>
+#  include <ostream>
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/acero/scalar_aggregate_node.cc b/cpp/src/arrow/acero/scalar_aggregate_node.cc
index c7805f4d24eb2..b34f7511cc12b 100644
--- a/cpp/src/arrow/acero/scalar_aggregate_node.cc
+++ b/cpp/src/arrow/acero/scalar_aggregate_node.cc
@@ -234,7 +234,8 @@ Status ScalarAggregateNode::InputReceived(ExecNode* input, ExecBatch batch) {
     // (1) The segment is starting of a new segment group and points to
     // the beginning of the batch, then it means no data in the batch belongs
     // to the current segment group. We can output and reset kernel states.
-    if (!segment.extends && segment.offset == 0) RETURN_NOT_OK(OutputResult(false));
+    if (!segment.extends && segment.offset == 0)
+      RETURN_NOT_OK(OutputResult(/*is_last=*/false));
 
     // We add segment to the current segment group aggregation
     auto exec_batch = full_batch.Slice(segment.offset, segment.length);
@@ -244,7 +245,7 @@ Status ScalarAggregateNode::InputReceived(ExecNode* input, ExecBatch batch) {
 
     // If the segment closes the current segment group, we can output segment group
     // aggregation.
-    if (!segment.is_open) RETURN_NOT_OK(OutputResult(false));
+    if (!segment.is_open) RETURN_NOT_OK(OutputResult(/*is_last=*/false));
 
     return Status::OK();
   };
diff --git a/cpp/src/arrow/acero/swiss_join.cc b/cpp/src/arrow/acero/swiss_join.cc
index 4d0c8187ac6e2..6c783110af571 100644
--- a/cpp/src/arrow/acero/swiss_join.cc
+++ b/cpp/src/arrow/acero/swiss_join.cc
@@ -1667,7 +1667,7 @@ Result<std::shared_ptr<ArrayData>> JoinResultMaterialize::FlushBuildColumn(
     const std::shared_ptr<DataType>& data_type, const RowArray* row_array, int column_id,
     uint32_t* row_ids) {
   ResizableArrayData output;
-  output.Init(data_type, pool_, bit_util::Log2(num_rows_));
+  RETURN_NOT_OK(output.Init(data_type, pool_, bit_util::Log2(num_rows_)));
 
   for (size_t i = 0; i <= null_ranges_.size(); ++i) {
     int row_id_begin =
@@ -2247,8 +2247,9 @@ Result<ExecBatch> JoinResidualFilter::MaterializeFilterInput(
         build_schemas_->map(HashJoinProjection::FILTER, HashJoinProjection::PAYLOAD);
     for (int i = 0; i < num_build_cols; ++i) {
       ResizableArrayData column_data;
-      column_data.Init(build_schemas_->data_type(HashJoinProjection::FILTER, i), pool_,
-                       bit_util::Log2(num_batch_rows));
+      RETURN_NOT_OK(
+          column_data.Init(build_schemas_->data_type(HashJoinProjection::FILTER, i),
+                           pool_, bit_util::Log2(num_batch_rows)));
       if (auto idx = to_key.get(i); idx != SchemaProjectionMap::kMissingField) {
         RETURN_NOT_OK(build_keys_->DecodeSelected(&column_data, idx, num_batch_rows,
                                                   key_ids_maybe_null, pool_));
diff --git a/cpp/src/arrow/acero/visibility.h b/cpp/src/arrow/acero/visibility.h
index 02382232b69dd..21a697a56eca9 100644
--- a/cpp/src/arrow/acero/visibility.h
+++ b/cpp/src/arrow/acero/visibility.h
@@ -20,31 +20,31 @@
 #pragma once
 
 #if defined(_WIN32) || defined(__CYGWIN__)
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(push)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#ifdef ARROW_ACERO_STATIC
-#define ARROW_ACERO_EXPORT
-#elif defined(ARROW_ACERO_EXPORTING)
-#define ARROW_ACERO_EXPORT __declspec(dllexport)
-#else
-#define ARROW_ACERO_EXPORT __declspec(dllimport)
-#endif
+#  ifdef ARROW_ACERO_STATIC
+#    define ARROW_ACERO_EXPORT
+#  elif defined(ARROW_ACERO_EXPORTING)
+#    define ARROW_ACERO_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_ACERO_EXPORT __declspec(dllimport)
+#  endif
 
-#define ARROW_ACERO_NO_EXPORT
+#  define ARROW_ACERO_NO_EXPORT
 #else  // Not Windows
-#ifndef ARROW_ACERO_EXPORT
-#define ARROW_ACERO_EXPORT __attribute__((visibility("default")))
-#endif
-#ifndef ARROW_ACERO_NO_EXPORT
-#define ARROW_ACERO_NO_EXPORT __attribute__((visibility("hidden")))
-#endif
+#  ifndef ARROW_ACERO_EXPORT
+#    define ARROW_ACERO_EXPORT __attribute__((visibility("default")))
+#  endif
+#  ifndef ARROW_ACERO_NO_EXPORT
+#    define ARROW_ACERO_NO_EXPORT __attribute__((visibility("hidden")))
+#  endif
 #endif  // Not-Windows
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
diff --git a/cpp/src/arrow/adapters/orc/adapter.cc b/cpp/src/arrow/adapters/orc/adapter.cc
index 25759f8471365..d16b6cfd2e97d 100644
--- a/cpp/src/arrow/adapters/orc/adapter.cc
+++ b/cpp/src/arrow/adapters/orc/adapter.cc
@@ -25,7 +25,7 @@
 #include <vector>
 
 #ifdef ARROW_ORC_NEED_TIME_ZONE_DATABASE_CHECK
-#include <filesystem>
+#  include <filesystem>
 #endif
 
 #include "arrow/adapters/orc/util.h"
diff --git a/cpp/src/arrow/array/array_base.h b/cpp/src/arrow/array/array_base.h
index 716ae0722069e..e4af67d7e5f0b 100644
--- a/cpp/src/arrow/array/array_base.h
+++ b/cpp/src/arrow/array/array_base.h
@@ -232,6 +232,14 @@ class ARROW_EXPORT Array {
   /// \return DeviceAllocationType
   DeviceAllocationType device_type() const { return data_->device_type(); }
 
+  /// \brief Return the statistics of this Array
+  ///
+  /// This just delegates to calling statistics on the underlying ArrayData
+  /// object which backs this Array.
+  ///
+  /// \return const ArrayStatistics&
+  std::shared_ptr<ArrayStatistics> statistics() const { return data_->statistics; }
+
  protected:
   Array() = default;
   ARROW_DEFAULT_MOVE_AND_ASSIGN(Array);
diff --git a/cpp/src/arrow/array/array_dict.cc b/cpp/src/arrow/array/array_dict.cc
index 7fd76a1dae81b..55e086af30bc2 100644
--- a/cpp/src/arrow/array/array_dict.cc
+++ b/cpp/src/arrow/array/array_dict.cc
@@ -349,7 +349,7 @@ class DictionaryUnifierImpl : public DictionaryUnifier {
   using MemoTableType = typename DictTraits::MemoTableType;
 
   DictionaryUnifierImpl(MemoryPool* pool, std::shared_ptr<DataType> value_type)
-      : pool_(pool), value_type_(value_type), memo_table_(pool) {}
+      : pool_(pool), value_type_(std::move(value_type)), memo_table_(pool) {}
 
   Status Unify(const Array& dictionary, std::shared_ptr<Buffer>* out) override {
     if (dictionary.null_count() > 0) {
@@ -432,7 +432,7 @@ struct MakeUnifier {
   std::unique_ptr<DictionaryUnifier> result;
 
   MakeUnifier(MemoryPool* pool, std::shared_ptr<DataType> value_type)
-      : pool(pool), value_type(value_type) {}
+      : pool(pool), value_type(std::move(value_type)) {}
 
   template <typename T>
   enable_if_no_memoize<T, Status> Visit(const T&) {
diff --git a/cpp/src/arrow/array/array_nested.cc b/cpp/src/arrow/array/array_nested.cc
index 47c0fd35829a1..bb469df1ad6b4 100644
--- a/cpp/src/arrow/array/array_nested.cc
+++ b/cpp/src/arrow/array/array_nested.cc
@@ -542,7 +542,7 @@ Result<std::shared_ptr<ListArray>> ListArray::FromArrays(
     const Array& offsets, const Array& values, MemoryPool* pool,
     std::shared_ptr<Buffer> null_bitmap, int64_t null_count) {
   return ListArrayFromArrays<ListType>(std::make_shared<ListType>(values.type()), offsets,
-                                       values, pool, null_bitmap, null_count);
+                                       values, pool, std::move(null_bitmap), null_count);
 }
 
 Result<std::shared_ptr<ListArray>> ListArray::FromListView(const ListViewArray& source,
@@ -563,7 +563,7 @@ Result<std::shared_ptr<ListArray>> ListArray::FromArrays(
     return Status::TypeError("Mismatching list value type");
   }
   return ListArrayFromArrays<ListType>(std::move(type), offsets, values, pool,
-                                       null_bitmap, null_count);
+                                       std::move(null_bitmap), null_count);
 }
 
 Result<std::shared_ptr<Array>> ListArray::Flatten(MemoryPool* memory_pool) const {
@@ -599,8 +599,8 @@ Result<std::shared_ptr<LargeListArray>> LargeListArray::FromArrays(
     const Array& offsets, const Array& values, MemoryPool* pool,
     std::shared_ptr<Buffer> null_bitmap, int64_t null_count) {
   return ListArrayFromArrays<LargeListType>(
-      std::make_shared<LargeListType>(values.type()), offsets, values, pool, null_bitmap,
-      null_count);
+      std::make_shared<LargeListType>(values.type()), offsets, values, pool,
+      std::move(null_bitmap), null_count);
 }
 
 Result<std::shared_ptr<LargeListArray>> LargeListArray::FromListView(
@@ -622,7 +622,7 @@ Result<std::shared_ptr<LargeListArray>> LargeListArray::FromArrays(
     return Status::TypeError("Mismatching list value type");
   }
   return ListArrayFromArrays<LargeListType>(std::move(type), offsets, values, pool,
-                                            null_bitmap, null_count);
+                                            std::move(null_bitmap), null_count);
 }
 
 Result<std::shared_ptr<Array>> LargeListArray::Flatten(MemoryPool* memory_pool) const {
@@ -662,7 +662,7 @@ Result<std::shared_ptr<ListViewArray>> ListViewArray::FromArrays(
     std::shared_ptr<Buffer> null_bitmap, int64_t null_count) {
   return ListViewArrayFromArrays<ListViewType>(
       std::make_shared<ListViewType>(values.type()), offsets, sizes, values, pool,
-      null_bitmap, null_count);
+      std::move(null_bitmap), null_count);
 }
 
 Result<std::shared_ptr<ListViewArray>> ListViewArray::FromArrays(
@@ -677,7 +677,7 @@ Result<std::shared_ptr<ListViewArray>> ListViewArray::FromArrays(
     return Status::TypeError("Mismatching list-view value type");
   }
   return ListViewArrayFromArrays<ListViewType>(std::move(type), offsets, sizes, values,
-                                               pool, null_bitmap, null_count);
+                                               pool, std::move(null_bitmap), null_count);
 }
 
 Result<std::shared_ptr<ListViewArray>> ListViewArray::FromList(const ListArray& source,
@@ -722,7 +722,7 @@ LargeListViewArray::LargeListViewArray(std::shared_ptr<DataType> type, int64_t l
                                        std::shared_ptr<Buffer> null_bitmap,
                                        int64_t null_count, int64_t offset) {
   LargeListViewArray::SetData(ArrayData::Make(
-      type, length,
+      std::move(type), length,
       {std::move(null_bitmap), std::move(value_offsets), std::move(value_sizes)},
       /*child_data=*/{values->data()}, null_count, offset));
 }
@@ -737,7 +737,7 @@ Result<std::shared_ptr<LargeListViewArray>> LargeListViewArray::FromArrays(
     std::shared_ptr<Buffer> null_bitmap, int64_t null_count) {
   return ListViewArrayFromArrays<LargeListViewType>(
       std::make_shared<LargeListViewType>(values.type()), offsets, sizes, values, pool,
-      null_bitmap, null_count);
+      std::move(null_bitmap), null_count);
 }
 
 Result<std::shared_ptr<LargeListViewArray>> LargeListViewArray::FromArrays(
@@ -752,7 +752,7 @@ Result<std::shared_ptr<LargeListViewArray>> LargeListViewArray::FromArrays(
     return Status::TypeError("Mismatching large list-view value type");
   }
   return ListViewArrayFromArrays<LargeListViewType>(
-      std::move(type), offsets, sizes, values, pool, null_bitmap, null_count);
+      std::move(type), offsets, sizes, values, pool, std::move(null_bitmap), null_count);
 }
 
 Result<std::shared_ptr<Array>> LargeListViewArray::Flatten(
@@ -854,8 +854,9 @@ Result<std::shared_ptr<Array>> MapArray::FromArraysInternal(
     null_count = kUnknownNullCount;
   }
   buffers[1] = typed_offsets.values();
-  return std::make_shared<MapArray>(type, offsets->length() - 1, std::move(buffers), keys,
-                                    items, /*null_count=*/null_count, offsets->offset());
+  return std::make_shared<MapArray>(std::move(type), offsets->length() - 1,
+                                    std::move(buffers), keys, items,
+                                    /*null_count=*/null_count, offsets->offset());
 }
 
 Result<std::shared_ptr<Array>> MapArray::FromArrays(const std::shared_ptr<Array>& offsets,
@@ -971,8 +972,8 @@ Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
   int64_t length = values->length() / list_size;
   auto list_type = std::make_shared<FixedSizeListType>(values->type(), list_size);
 
-  return std::make_shared<FixedSizeListArray>(list_type, length, values, null_bitmap,
-                                              null_count);
+  return std::make_shared<FixedSizeListArray>(list_type, length, values,
+                                              std::move(null_bitmap), null_count);
 }
 
 Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
@@ -992,8 +993,8 @@ Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
   }
   int64_t length = values->length() / list_type.list_size();
 
-  return std::make_shared<FixedSizeListArray>(type, length, values, null_bitmap,
-                                              null_count);
+  return std::make_shared<FixedSizeListArray>(std::move(type), length, values,
+                                              std::move(null_bitmap), null_count);
 }
 
 Result<std::shared_ptr<Array>> FixedSizeListArray::Flatten(
@@ -1015,7 +1016,7 @@ StructArray::StructArray(const std::shared_ptr<DataType>& type, int64_t length,
                          std::shared_ptr<Buffer> null_bitmap, int64_t null_count,
                          int64_t offset) {
   ARROW_CHECK_EQ(type->id(), Type::STRUCT);
-  SetData(ArrayData::Make(type, length, {null_bitmap}, null_count, offset));
+  SetData(ArrayData::Make(type, length, {std::move(null_bitmap)}, null_count, offset));
   for (const auto& child : children) {
     data_->child_data.push_back(child->data());
   }
@@ -1048,7 +1049,7 @@ Result<std::shared_ptr<StructArray>> StructArray::Make(
     null_count = 0;
   }
   return std::make_shared<StructArray>(struct_(fields), length - offset, children,
-                                       null_bitmap, null_count, offset);
+                                       std::move(null_bitmap), null_count, offset);
 }
 
 Result<std::shared_ptr<StructArray>> StructArray::Make(
@@ -1085,8 +1086,8 @@ const std::shared_ptr<Array>& StructArray::field(int i) const {
     } else {
       field_data = data_->child_data[i];
     }
-    std::shared_ptr<Array> result = MakeArray(field_data);
-    std::atomic_store(&boxed_fields_[i], result);
+    result = MakeArray(field_data);
+    std::atomic_store(&boxed_fields_[i], std::move(result));
     return boxed_fields_[i];
   }
   return boxed_fields_[i];
diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc
index 32806d9d2edb3..73e0c692432b6 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -3709,6 +3709,132 @@ TEST(TestSwapEndianArrayData, InvalidLength) {
   }
 }
 
+class TestArrayDataStatistics : public ::testing::Test {
+ public:
+  void SetUp() {
+    valids_ = {1, 0, 1, 1};
+    null_count_ = std::count(valids_.begin(), valids_.end(), 0);
+    null_buffer_ = *internal::BytesToBits(valids_);
+    values_ = {1, 0, 3, -4};
+    min_ = *std::min_element(values_.begin(), values_.end());
+    max_ = *std::max_element(values_.begin(), values_.end());
+    values_buffer_ = Buffer::FromVector(values_);
+    data_ = ArrayData::Make(int32(), values_.size(), {null_buffer_, values_buffer_},
+                            null_count_);
+    data_->statistics = std::make_shared<ArrayStatistics>();
+    data_->statistics->null_count = null_count_;
+    data_->statistics->min = min_;
+    data_->statistics->is_min_exact = true;
+    data_->statistics->max = max_;
+    data_->statistics->is_max_exact = true;
+  }
+
+ protected:
+  std::vector<uint8_t> valids_;
+  size_t null_count_;
+  std::shared_ptr<Buffer> null_buffer_;
+  std::vector<int32_t> values_;
+  int64_t min_;
+  int64_t max_;
+  std::shared_ptr<Buffer> values_buffer_;
+  std::shared_ptr<ArrayData> data_;
+};
+
+TEST_F(TestArrayDataStatistics, MoveConstructor) {
+  ArrayData copied_data(*data_);
+  ArrayData moved_data(std::move(copied_data));
+
+  ASSERT_TRUE(moved_data.statistics->null_count.has_value());
+  ASSERT_EQ(null_count_, moved_data.statistics->null_count.value());
+
+  ASSERT_TRUE(moved_data.statistics->min.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(moved_data.statistics->min.value()));
+  ASSERT_EQ(min_, std::get<int64_t>(moved_data.statistics->min.value()));
+  ASSERT_TRUE(moved_data.statistics->is_min_exact);
+
+  ASSERT_TRUE(moved_data.statistics->max.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(moved_data.statistics->max.value()));
+  ASSERT_EQ(max_, std::get<int64_t>(moved_data.statistics->max.value()));
+  ASSERT_TRUE(moved_data.statistics->is_max_exact);
+}
+
+TEST_F(TestArrayDataStatistics, CopyConstructor) {
+  ArrayData copied_data(*data_);
+
+  ASSERT_TRUE(copied_data.statistics->null_count.has_value());
+  ASSERT_EQ(null_count_, copied_data.statistics->null_count.value());
+
+  ASSERT_TRUE(copied_data.statistics->min.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data.statistics->min.value()));
+  ASSERT_EQ(min_, std::get<int64_t>(copied_data.statistics->min.value()));
+  ASSERT_TRUE(copied_data.statistics->is_min_exact);
+
+  ASSERT_TRUE(copied_data.statistics->max.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data.statistics->max.value()));
+  ASSERT_EQ(max_, std::get<int64_t>(copied_data.statistics->max.value()));
+  ASSERT_TRUE(copied_data.statistics->is_max_exact);
+}
+
+TEST_F(TestArrayDataStatistics, MoveAssignment) {
+  ArrayData copied_data(*data_);
+  ArrayData moved_data;
+  moved_data = std::move(copied_data);
+
+  ASSERT_TRUE(moved_data.statistics->null_count.has_value());
+  ASSERT_EQ(null_count_, moved_data.statistics->null_count.value());
+
+  ASSERT_TRUE(moved_data.statistics->min.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(moved_data.statistics->min.value()));
+  ASSERT_EQ(min_, std::get<int64_t>(moved_data.statistics->min.value()));
+  ASSERT_TRUE(moved_data.statistics->is_min_exact);
+
+  ASSERT_TRUE(moved_data.statistics->max.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(moved_data.statistics->max.value()));
+  ASSERT_EQ(max_, std::get<int64_t>(moved_data.statistics->max.value()));
+  ASSERT_TRUE(moved_data.statistics->is_max_exact);
+}
+
+TEST_F(TestArrayDataStatistics, CopyAssignment) {
+  ArrayData copied_data;
+  copied_data = *data_;
+
+  ASSERT_TRUE(copied_data.statistics->null_count.has_value());
+  ASSERT_EQ(null_count_, copied_data.statistics->null_count.value());
+
+  ASSERT_TRUE(copied_data.statistics->min.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data.statistics->min.value()));
+  ASSERT_EQ(min_, std::get<int64_t>(copied_data.statistics->min.value()));
+  ASSERT_TRUE(copied_data.statistics->is_min_exact);
+
+  ASSERT_TRUE(copied_data.statistics->max.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data.statistics->max.value()));
+  ASSERT_EQ(max_, std::get<int64_t>(copied_data.statistics->max.value()));
+  ASSERT_TRUE(copied_data.statistics->is_max_exact);
+}
+
+TEST_F(TestArrayDataStatistics, CopyTo) {
+  ASSERT_OK_AND_ASSIGN(auto copied_data,
+                       data_->CopyTo(arrow::default_cpu_memory_manager()));
+
+  ASSERT_TRUE(copied_data->statistics->null_count.has_value());
+  ASSERT_EQ(null_count_, copied_data->statistics->null_count.value());
+
+  ASSERT_TRUE(copied_data->statistics->min.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data->statistics->min.value()));
+  ASSERT_EQ(min_, std::get<int64_t>(copied_data->statistics->min.value()));
+  ASSERT_TRUE(copied_data->statistics->is_min_exact);
+
+  ASSERT_TRUE(copied_data->statistics->max.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data->statistics->max.value()));
+  ASSERT_EQ(max_, std::get<int64_t>(copied_data->statistics->max.value()));
+  ASSERT_TRUE(copied_data->statistics->is_max_exact);
+}
+
+TEST_F(TestArrayDataStatistics, Slice) {
+  auto sliced_data = data_->Slice(0, 1);
+  ASSERT_FALSE(sliced_data->statistics);
+}
+
 template <typename PType>
 class TestPrimitiveArray : public ::testing::Test {
  public:
diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc
index 83eeb56c496cf..8e29297a8c175 100644
--- a/cpp/src/arrow/array/data.cc
+++ b/cpp/src/arrow/array/data.cc
@@ -165,6 +165,8 @@ Result<std::shared_ptr<ArrayData>> CopyToImpl(const ArrayData& data,
     ARROW_ASSIGN_OR_RAISE(output->dictionary, CopyToImpl(*data.dictionary, to, copy_fn));
   }
 
+  output->statistics = data.statistics;
+
   return output;
 }
 }  // namespace
@@ -195,6 +197,7 @@ std::shared_ptr<ArrayData> ArrayData::Slice(int64_t off, int64_t len) const {
   } else {
     copy->null_count = null_count != 0 ? kUnknownNullCount : 0;
   }
+  copy->statistics = nullptr;
   return copy;
 }
 
diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h
index e0508fe6980a7..1e6ee9a1d32ff 100644
--- a/cpp/src/arrow/array/data.h
+++ b/cpp/src/arrow/array/data.h
@@ -24,6 +24,7 @@
 #include <utility>
 #include <vector>
 
+#include "arrow/array/statistics.h"
 #include "arrow/buffer.h"
 #include "arrow/result.h"
 #include "arrow/type.h"
@@ -152,7 +153,8 @@ struct ARROW_EXPORT ArrayData {
         offset(other.offset),
         buffers(std::move(other.buffers)),
         child_data(std::move(other.child_data)),
-        dictionary(std::move(other.dictionary)) {
+        dictionary(std::move(other.dictionary)),
+        statistics(std::move(other.statistics)) {
     SetNullCount(other.null_count);
   }
 
@@ -163,7 +165,8 @@ struct ARROW_EXPORT ArrayData {
         offset(other.offset),
         buffers(other.buffers),
         child_data(other.child_data),
-        dictionary(other.dictionary) {
+        dictionary(other.dictionary),
+        statistics(other.statistics) {
     SetNullCount(other.null_count);
   }
 
@@ -176,6 +179,7 @@ struct ARROW_EXPORT ArrayData {
     buffers = std::move(other.buffers);
     child_data = std::move(other.child_data);
     dictionary = std::move(other.dictionary);
+    statistics = std::move(other.statistics);
     return *this;
   }
 
@@ -188,6 +192,7 @@ struct ARROW_EXPORT ArrayData {
     buffers = other.buffers;
     child_data = other.child_data;
     dictionary = other.dictionary;
+    statistics = other.statistics;
     return *this;
   }
 
@@ -274,6 +279,18 @@ struct ARROW_EXPORT ArrayData {
   }
 
   /// \brief Construct a zero-copy slice of the data with the given offset and length
+  ///
+  /// The associated `ArrayStatistics` is always discarded in a sliced
+  /// `ArrayData`. Because `ArrayStatistics` in the original
+  /// `ArrayData` may be invalid in a sliced `ArrayData`. If you want
+  /// to reuse statistics in the original `ArrayData`, you need to do
+  /// it by yourself.
+  ///
+  /// If the specified slice range has the same range as the original
+  /// `ArrayData`, we can reuse statistics in the original
+  /// `ArrayData`. Because it has the same data as the original
+  /// `ArrayData`. But the associated `ArrayStatistics` is discarded
+  /// in this case too. Use `Copy()` instead for the case.
   std::shared_ptr<ArrayData> Slice(int64_t offset, int64_t length) const;
 
   /// \brief Input-checking variant of Slice
@@ -390,6 +407,9 @@ struct ARROW_EXPORT ArrayData {
 
   // The dictionary for this Array, if any. Only used for dictionary type
   std::shared_ptr<ArrayData> dictionary;
+
+  // The statistics for this Array.
+  std::shared_ptr<ArrayStatistics> statistics;
 };
 
 /// \brief A non-owning Buffer reference
diff --git a/cpp/src/arrow/array/validate.cc b/cpp/src/arrow/array/validate.cc
index 0d940d3bc869e..69f1646054f4c 100644
--- a/cpp/src/arrow/array/validate.cc
+++ b/cpp/src/arrow/array/validate.cc
@@ -985,10 +985,22 @@ Status ValidateArrayFull(const Array& array) { return ValidateArrayFull(*array.d
 
 ARROW_EXPORT
 Status ValidateUTF8(const ArrayData& data) {
-  DCHECK(data.type->id() == Type::STRING || data.type->id() == Type::STRING_VIEW ||
-         data.type->id() == Type::LARGE_STRING);
-  UTF8DataValidator validator{data};
-  return VisitTypeInline(*data.type, &validator);
+  const auto& storage_type =
+      (data.type->id() == Type::EXTENSION)
+          ? checked_cast<const ExtensionType&>(*data.type).storage_type()
+          : data.type;
+  DCHECK(storage_type->id() == Type::STRING || storage_type->id() == Type::STRING_VIEW ||
+         storage_type->id() == Type::LARGE_STRING);
+
+  if (data.type->id() == Type::EXTENSION) {
+    ArrayData ext_data(data);
+    ext_data.type = storage_type;
+    UTF8DataValidator validator{ext_data};
+    return VisitTypeInline(*storage_type, &validator);
+  } else {
+    UTF8DataValidator validator{data};
+    return VisitTypeInline(*storage_type, &validator);
+  }
 }
 
 ARROW_EXPORT
diff --git a/cpp/src/arrow/c/abi.h b/cpp/src/arrow/c/abi.h
index 6abe866b5f6f6..db051fff5ff05 100644
--- a/cpp/src/arrow/c/abi.h
+++ b/cpp/src/arrow/c/abi.h
@@ -41,11 +41,11 @@ extern "C" {
 #endif
 
 #ifndef ARROW_C_DATA_INTERFACE
-#define ARROW_C_DATA_INTERFACE
+#  define ARROW_C_DATA_INTERFACE
 
-#define ARROW_FLAG_DICTIONARY_ORDERED 1
-#define ARROW_FLAG_NULLABLE 2
-#define ARROW_FLAG_MAP_KEYS_SORTED 4
+#  define ARROW_FLAG_DICTIONARY_ORDERED 1
+#  define ARROW_FLAG_NULLABLE 2
+#  define ARROW_FLAG_MAP_KEYS_SORTED 4
 
 struct ArrowSchema {
   // Array type description
@@ -83,7 +83,7 @@ struct ArrowArray {
 #endif  // ARROW_C_DATA_INTERFACE
 
 #ifndef ARROW_C_DEVICE_DATA_INTERFACE
-#define ARROW_C_DEVICE_DATA_INTERFACE
+#  define ARROW_C_DEVICE_DATA_INTERFACE
 
 // Spec and Documentation: https://arrow.apache.org/docs/format/CDeviceDataInterface.html
 
@@ -91,33 +91,33 @@ struct ArrowArray {
 typedef int32_t ArrowDeviceType;
 
 // CPU device, same as using ArrowArray directly
-#define ARROW_DEVICE_CPU 1
+#  define ARROW_DEVICE_CPU 1
 // CUDA GPU Device
-#define ARROW_DEVICE_CUDA 2
+#  define ARROW_DEVICE_CUDA 2
 // Pinned CUDA CPU memory by cudaMallocHost
-#define ARROW_DEVICE_CUDA_HOST 3
+#  define ARROW_DEVICE_CUDA_HOST 3
 // OpenCL Device
-#define ARROW_DEVICE_OPENCL 4
+#  define ARROW_DEVICE_OPENCL 4
 // Vulkan buffer for next-gen graphics
-#define ARROW_DEVICE_VULKAN 7
+#  define ARROW_DEVICE_VULKAN 7
 // Metal for Apple GPU
-#define ARROW_DEVICE_METAL 8
+#  define ARROW_DEVICE_METAL 8
 // Verilog simulator buffer
-#define ARROW_DEVICE_VPI 9
+#  define ARROW_DEVICE_VPI 9
 // ROCm GPUs for AMD GPUs
-#define ARROW_DEVICE_ROCM 10
+#  define ARROW_DEVICE_ROCM 10
 // Pinned ROCm CPU memory allocated by hipMallocHost
-#define ARROW_DEVICE_ROCM_HOST 11
+#  define ARROW_DEVICE_ROCM_HOST 11
 // Reserved for extension
-#define ARROW_DEVICE_EXT_DEV 12
+#  define ARROW_DEVICE_EXT_DEV 12
 // CUDA managed/unified memory allocated by cudaMallocManaged
-#define ARROW_DEVICE_CUDA_MANAGED 13
+#  define ARROW_DEVICE_CUDA_MANAGED 13
 // unified shared memory allocated on a oneAPI non-partitioned device.
-#define ARROW_DEVICE_ONEAPI 14
+#  define ARROW_DEVICE_ONEAPI 14
 // GPU support for next-gen WebGPU standard
-#define ARROW_DEVICE_WEBGPU 15
+#  define ARROW_DEVICE_WEBGPU 15
 // Qualcomm Hexagon DSP
-#define ARROW_DEVICE_HEXAGON 16
+#  define ARROW_DEVICE_HEXAGON 16
 
 struct ArrowDeviceArray {
   // the Allocated Array
@@ -138,7 +138,7 @@ struct ArrowDeviceArray {
 #endif  // ARROW_C_DEVICE_DATA_INTERFACE
 
 #ifndef ARROW_C_STREAM_INTERFACE
-#define ARROW_C_STREAM_INTERFACE
+#  define ARROW_C_STREAM_INTERFACE
 
 struct ArrowArrayStream {
   // Callback to get the stream type
@@ -179,7 +179,7 @@ struct ArrowArrayStream {
 #endif  // ARROW_C_STREAM_INTERFACE
 
 #ifndef ARROW_C_DEVICE_STREAM_INTERFACE
-#define ARROW_C_DEVICE_STREAM_INTERFACE
+#  define ARROW_C_DEVICE_STREAM_INTERFACE
 
 // Equivalent to ArrowArrayStream, but for ArrowDeviceArrays.
 //
diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc
index 09bb524adbdf0..01fd56f631d99 100644
--- a/cpp/src/arrow/c/bridge_test.cc
+++ b/cpp/src/arrow/c/bridge_test.cc
@@ -48,7 +48,7 @@
 
 // TODO(GH-37221): Remove these ifdef checks when compute dependency is removed
 #ifdef ARROW_COMPUTE
-#include "arrow/compute/api_vector.h"
+#  include "arrow/compute/api_vector.h"
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/c/dlpack_abi.h b/cpp/src/arrow/c/dlpack_abi.h
index 4af557a7ed5d7..fbe2a56a344b3 100644
--- a/cpp/src/arrow/c/dlpack_abi.h
+++ b/cpp/src/arrow/c/dlpack_abi.h
@@ -12,9 +12,9 @@
  * \brief Compatibility with C++
  */
 #ifdef __cplusplus
-#define DLPACK_EXTERN_C extern "C"
+#  define DLPACK_EXTERN_C extern "C"
 #else
-#define DLPACK_EXTERN_C
+#  define DLPACK_EXTERN_C
 #endif
 
 /*! \brief The current major version of dlpack */
@@ -25,13 +25,13 @@
 
 /*! \brief DLPACK_DLL prefix for windows */
 #ifdef _WIN32
-#ifdef DLPACK_EXPORTS
-#define DLPACK_DLL __declspec(dllexport)
+#  ifdef DLPACK_EXPORTS
+#    define DLPACK_DLL __declspec(dllexport)
+#  else
+#    define DLPACK_DLL __declspec(dllimport)
+#  endif
 #else
-#define DLPACK_DLL __declspec(dllimport)
-#endif
-#else
-#define DLPACK_DLL
+#  define DLPACK_DLL
 #endif
 
 #include <stddef.h>
diff --git a/cpp/src/arrow/chunk_resolver.cc b/cpp/src/arrow/chunk_resolver.cc
index 55eec53ced1c7..854127480744e 100644
--- a/cpp/src/arrow/chunk_resolver.cc
+++ b/cpp/src/arrow/chunk_resolver.cc
@@ -60,42 +60,38 @@ inline std::vector<int64_t> MakeChunksOffsets(const std::vector<T>& chunks) {
 template <typename IndexType>
 void ResolveManyInline(size_t num_offsets, const int64_t* signed_offsets,
                        int64_t n_indices, const IndexType* logical_index_vec,
-                       IndexType* out_chunk_index_vec, IndexType chunk_hint,
-                       IndexType* out_index_in_chunk_vec) {
+                       TypedChunkLocation<IndexType>* out_chunk_location_vec,
+                       IndexType chunk_hint) {
   auto* offsets = reinterpret_cast<const uint64_t*>(signed_offsets);
   const auto num_chunks = static_cast<IndexType>(num_offsets - 1);
   // chunk_hint in [0, num_offsets) per the precondition.
   for (int64_t i = 0; i < n_indices; i++) {
-    const auto index = static_cast<uint64_t>(logical_index_vec[i]);
+    auto typed_logical_index = logical_index_vec[i];
+    const auto index = static_cast<uint64_t>(typed_logical_index);
+    // use or update chunk_hint
     if (index >= offsets[chunk_hint] &&
         (chunk_hint == num_chunks || index < offsets[chunk_hint + 1])) {
-      out_chunk_index_vec[i] = chunk_hint;  // hint is correct!
-      continue;
+      // hint is correct!
+    } else {
+      // lo < hi is guaranteed by `num_offsets = chunks.size() + 1`
+      auto chunk_index =
+          ChunkResolver::Bisect(index, offsets, /*lo=*/0, /*hi=*/num_offsets);
+      chunk_hint = static_cast<IndexType>(chunk_index);
     }
-    // lo < hi is guaranteed by `num_offsets = chunks.size() + 1`
-    auto chunk_index =
-        ChunkResolver::Bisect(index, offsets, /*lo=*/0, /*hi=*/num_offsets);
-    chunk_hint = static_cast<IndexType>(chunk_index);
-    out_chunk_index_vec[i] = chunk_hint;
-  }
-  if (out_index_in_chunk_vec != NULLPTR) {
-    for (int64_t i = 0; i < n_indices; i++) {
-      auto logical_index = logical_index_vec[i];
-      auto chunk_index = out_chunk_index_vec[i];
-      // chunk_index is in [0, chunks.size()] no matter what the
-      // value of logical_index is, so it's always safe to dereference
-      // offset_ as it contains chunks.size()+1 values.
-      out_index_in_chunk_vec[i] =
-          logical_index - static_cast<IndexType>(offsets[chunk_index]);
+    out_chunk_location_vec[i].chunk_index = chunk_hint;
+    // chunk_index is in [0, chunks.size()] no matter what the
+    // value of logical_index is, so it's always safe to dereference
+    // offset_ as it contains chunks.size()+1 values.
+    out_chunk_location_vec[i].index_in_chunk =
+        typed_logical_index - static_cast<IndexType>(offsets[chunk_hint]);
 #if defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER)
-      // Make it more likely that Valgrind/ASAN can catch an invalid memory
-      // access by poisoning out_index_in_chunk_vec[i] when the logical
-      // index is out-of-bounds.
-      if (chunk_index == num_chunks) {
-        out_index_in_chunk_vec[i] = std::numeric_limits<IndexType>::max();
-      }
-#endif
+    // Make it more likely that Valgrind/ASAN can catch an invalid memory
+    // access by poisoning the index-in-chunk value when the logical
+    // index is out-of-bounds.
+    if (chunk_hint == num_chunks) {
+      out_chunk_location_vec[i].index_in_chunk = std::numeric_limits<IndexType>::max();
     }
+#endif
   }
 }
 
@@ -130,31 +126,31 @@ ChunkResolver& ChunkResolver::operator=(const ChunkResolver& other) noexcept {
 }
 
 void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint8_t* logical_index_vec,
-                                    uint8_t* out_chunk_index_vec, uint8_t chunk_hint,
-                                    uint8_t* out_index_in_chunk_vec) const {
+                                    TypedChunkLocation<uint8_t>* out_chunk_location_vec,
+                                    uint8_t chunk_hint) const {
   ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, logical_index_vec,
-                    out_chunk_index_vec, chunk_hint, out_index_in_chunk_vec);
+                    out_chunk_location_vec, chunk_hint);
 }
 
 void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint32_t* logical_index_vec,
-                                    uint32_t* out_chunk_index_vec, uint32_t chunk_hint,
-                                    uint32_t* out_index_in_chunk_vec) const {
+                                    TypedChunkLocation<uint32_t>* out_chunk_location_vec,
+                                    uint32_t chunk_hint) const {
   ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, logical_index_vec,
-                    out_chunk_index_vec, chunk_hint, out_index_in_chunk_vec);
+                    out_chunk_location_vec, chunk_hint);
 }
 
 void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint16_t* logical_index_vec,
-                                    uint16_t* out_chunk_index_vec, uint16_t chunk_hint,
-                                    uint16_t* out_index_in_chunk_vec) const {
+                                    TypedChunkLocation<uint16_t>* out_chunk_location_vec,
+                                    uint16_t chunk_hint) const {
   ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, logical_index_vec,
-                    out_chunk_index_vec, chunk_hint, out_index_in_chunk_vec);
+                    out_chunk_location_vec, chunk_hint);
 }
 
 void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint64_t* logical_index_vec,
-                                    uint64_t* out_chunk_index_vec, uint64_t chunk_hint,
-                                    uint64_t* out_index_in_chunk_vec) const {
+                                    TypedChunkLocation<uint64_t>* out_chunk_location_vec,
+                                    uint64_t chunk_hint) const {
   ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, logical_index_vec,
-                    out_chunk_index_vec, chunk_hint, out_index_in_chunk_vec);
+                    out_chunk_location_vec, chunk_hint);
 }
 
 }  // namespace arrow::internal
diff --git a/cpp/src/arrow/chunk_resolver.h b/cpp/src/arrow/chunk_resolver.h
index a2a3d5a864243..83fda62387fe1 100644
--- a/cpp/src/arrow/chunk_resolver.h
+++ b/cpp/src/arrow/chunk_resolver.h
@@ -31,28 +31,34 @@ namespace arrow::internal {
 
 struct ChunkResolver;
 
-struct ChunkLocation {
+template <typename IndexType>
+struct TypedChunkLocation {
   /// \brief Index of the chunk in the array of chunks
   ///
   /// The value is always in the range `[0, chunks.size()]`. `chunks.size()` is used
   /// to represent out-of-bounds locations.
-  int64_t chunk_index = 0;
+  IndexType chunk_index = 0;
 
   /// \brief Index of the value in the chunk
   ///
   /// The value is UNDEFINED if chunk_index >= chunks.size()
-  int64_t index_in_chunk = 0;
+  IndexType index_in_chunk = 0;
 
-  ChunkLocation() = default;
+  TypedChunkLocation() = default;
 
-  ChunkLocation(int64_t chunk_index, int64_t index_in_chunk)
-      : chunk_index(chunk_index), index_in_chunk(index_in_chunk) {}
+  TypedChunkLocation(IndexType chunk_index, IndexType index_in_chunk)
+      : chunk_index(chunk_index), index_in_chunk(index_in_chunk) {
+    static_assert(sizeof(TypedChunkLocation<IndexType>) == 2 * sizeof(IndexType));
+    static_assert(alignof(TypedChunkLocation<IndexType>) == alignof(IndexType));
+  }
 
-  bool operator==(ChunkLocation other) const {
+  bool operator==(TypedChunkLocation other) const {
     return chunk_index == other.chunk_index && index_in_chunk == other.index_in_chunk;
   }
 };
 
+using ChunkLocation = TypedChunkLocation<int64_t>;
+
 /// \brief An utility that incrementally resolves logical indices into
 /// physical indices in a chunked array.
 struct ARROW_EXPORT ChunkResolver {
@@ -144,26 +150,25 @@ struct ARROW_EXPORT ChunkResolver {
   ///
   /// \pre 0 <= logical_index_vec[i] < logical_array_length()
   ///      (for well-defined and valid chunk index results)
-  /// \pre out_chunk_index_vec has space for `n_indices`
+  /// \pre out_chunk_location_vec has space for `n_indices` locations
   /// \pre chunk_hint in [0, chunks.size()]
-  /// \post out_chunk_index_vec[i] in [0, chunks.size()] for i in [0, n)
+  /// \post out_chunk_location_vec[i].chunk_index in [0, chunks.size()] for i in [0, n)
   /// \post if logical_index_vec[i] >= chunked_array.length(), then
-  ///       out_chunk_index_vec[i] == chunks.size()
-  ///       and out_index_in_chunk_vec[i] is UNDEFINED (can be out-of-bounds)
-  /// \post if logical_index_vec[i] < 0, then both out_chunk_index_vec[i] and
-  ///       out_index_in_chunk_vec[i] are UNDEFINED
+  ///       out_chunk_location_vec[i].chunk_index == chunks.size()
+  ///       and out_chunk_location_vec[i].index_in_chunk is UNDEFINED (can be
+  ///       out-of-bounds)
+  /// \post if logical_index_vec[i] < 0, then both values in out_chunk_index_vec[i]
+  ///       are UNDEFINED
   ///
   /// \param n_indices The number of logical indices to resolve
   /// \param logical_index_vec The logical indices to resolve
-  /// \param out_chunk_index_vec The output array where the chunk indices will be written
+  /// \param out_chunk_location_vec The output array where the locations will be written
   /// \param chunk_hint 0 or the last chunk_index produced by ResolveMany
-  /// \param out_index_in_chunk_vec If not NULLPTR, the output array where the
-  ///                               within-chunk indices will be written
   /// \return false iff chunks.size() > std::numeric_limits<IndexType>::max()
   template <typename IndexType>
   [[nodiscard]] bool ResolveMany(int64_t n_indices, const IndexType* logical_index_vec,
-                                 IndexType* out_chunk_index_vec, IndexType chunk_hint = 0,
-                                 IndexType* out_index_in_chunk_vec = NULLPTR) const {
+                                 TypedChunkLocation<IndexType>* out_chunk_location_vec,
+                                 IndexType chunk_hint = 0) const {
     if constexpr (sizeof(IndexType) < sizeof(uint64_t)) {
       // The max value returned by Bisect is `offsets.size() - 1` (= chunks.size()).
       constexpr uint64_t kMaxIndexTypeValue = std::numeric_limits<IndexType>::max();
@@ -188,13 +193,11 @@ struct ARROW_EXPORT ChunkResolver {
       // logical index in the chunked array.
       using U = std::make_unsigned_t<IndexType>;
       ResolveManyImpl(n_indices, reinterpret_cast<const U*>(logical_index_vec),
-                      reinterpret_cast<U*>(out_chunk_index_vec),
-                      static_cast<U>(chunk_hint),
-                      reinterpret_cast<U*>(out_index_in_chunk_vec));
+                      reinterpret_cast<TypedChunkLocation<U>*>(out_chunk_location_vec),
+                      static_cast<U>(chunk_hint));
     } else {
       static_assert(std::is_unsigned_v<IndexType>);
-      ResolveManyImpl(n_indices, logical_index_vec, out_chunk_index_vec, chunk_hint,
-                      out_index_in_chunk_vec);
+      ResolveManyImpl(n_indices, logical_index_vec, out_chunk_location_vec, chunk_hint);
     }
     return true;
   }
@@ -226,10 +229,14 @@ struct ARROW_EXPORT ChunkResolver {
 
   /// \pre all the pre-conditions of ChunkResolver::ResolveMany()
   /// \pre num_offsets - 1 <= std::numeric_limits<IndexType>::max()
-  void ResolveManyImpl(int64_t, const uint8_t*, uint8_t*, uint8_t, uint8_t*) const;
-  void ResolveManyImpl(int64_t, const uint16_t*, uint16_t*, uint16_t, uint16_t*) const;
-  void ResolveManyImpl(int64_t, const uint32_t*, uint32_t*, uint32_t, uint32_t*) const;
-  void ResolveManyImpl(int64_t, const uint64_t*, uint64_t*, uint64_t, uint64_t*) const;
+  void ResolveManyImpl(int64_t, const uint8_t*, TypedChunkLocation<uint8_t>*,
+                       uint8_t) const;
+  void ResolveManyImpl(int64_t, const uint16_t*, TypedChunkLocation<uint16_t>*,
+                       uint16_t) const;
+  void ResolveManyImpl(int64_t, const uint32_t*, TypedChunkLocation<uint32_t>*,
+                       uint32_t) const;
+  void ResolveManyImpl(int64_t, const uint64_t*, TypedChunkLocation<uint64_t>*,
+                       uint64_t) const;
 
  public:
   /// \brief Find the index of the chunk that contains the logical index.
diff --git a/cpp/src/arrow/chunked_array.cc b/cpp/src/arrow/chunked_array.cc
index c36b736d5d5df..dd6aa51534fcb 100644
--- a/cpp/src/arrow/chunked_array.cc
+++ b/cpp/src/arrow/chunked_array.cc
@@ -27,6 +27,7 @@
 #include "arrow/array/array_nested.h"
 #include "arrow/array/util.h"
 #include "arrow/array/validate.h"
+#include "arrow/device_allocation_type_set.h"
 #include "arrow/pretty_print.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
@@ -86,6 +87,18 @@ Result<std::shared_ptr<ChunkedArray>> ChunkedArray::MakeEmpty(
   return std::make_shared<ChunkedArray>(std::move(new_chunks));
 }
 
+DeviceAllocationTypeSet ChunkedArray::device_types() const {
+  if (chunks_.empty()) {
+    // An empty ChunkedArray is considered to be CPU-only.
+    return DeviceAllocationTypeSet::CpuOnly();
+  }
+  DeviceAllocationTypeSet set;
+  for (const auto& chunk : chunks_) {
+    set.add(chunk->device_type());
+  }
+  return set;
+}
+
 bool ChunkedArray::Equals(const ChunkedArray& other, const EqualOptions& opts) const {
   if (length_ != other.length()) {
     return false;
diff --git a/cpp/src/arrow/chunked_array.h b/cpp/src/arrow/chunked_array.h
index 5d300861d85c2..c65b6cb6e227f 100644
--- a/cpp/src/arrow/chunked_array.h
+++ b/cpp/src/arrow/chunked_array.h
@@ -25,6 +25,7 @@
 
 #include "arrow/chunk_resolver.h"
 #include "arrow/compare.h"
+#include "arrow/device_allocation_type_set.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/type_fwd.h"
@@ -116,6 +117,13 @@ class ARROW_EXPORT ChunkedArray {
   /// \return an ArrayVector of chunks
   const ArrayVector& chunks() const { return chunks_; }
 
+  /// \return The set of device allocation types used by the chunks in this
+  /// chunked array.
+  DeviceAllocationTypeSet device_types() const;
+
+  /// \return true if all chunks are allocated on CPU-accessible memory.
+  bool is_cpu() const { return device_types().is_cpu_only(); }
+
   /// \brief Construct a zero-copy slice of the chunked array with the
   /// indicated offset and length
   ///
diff --git a/cpp/src/arrow/chunked_array_test.cc b/cpp/src/arrow/chunked_array_test.cc
index e9cc283b53cd5..bf9d4af7c7bb0 100644
--- a/cpp/src/arrow/chunked_array_test.cc
+++ b/cpp/src/arrow/chunked_array_test.cc
@@ -37,6 +37,7 @@ namespace arrow {
 
 using internal::ChunkLocation;
 using internal::ChunkResolver;
+using internal::TypedChunkLocation;
 
 class TestChunkedArray : public ::testing::Test {
  protected:
@@ -61,12 +62,17 @@ TEST_F(TestChunkedArray, Make) {
                        ChunkedArray::Make({}, int64()));
   AssertTypeEqual(*int64(), *result->type());
   ASSERT_EQ(result->num_chunks(), 0);
+  // Empty chunked arrays are treated as CPU-allocated.
+  ASSERT_TRUE(result->is_cpu());
 
   auto chunk0 = ArrayFromJSON(int8(), "[0, 1, 2]");
   auto chunk1 = ArrayFromJSON(int16(), "[3, 4, 5]");
 
   ASSERT_OK_AND_ASSIGN(result, ChunkedArray::Make({chunk0, chunk0}));
   ASSERT_OK_AND_ASSIGN(auto result2, ChunkedArray::Make({chunk0, chunk0}, int8()));
+  // All chunks are CPU-accessible.
+  ASSERT_TRUE(result->is_cpu());
+  ASSERT_TRUE(result2->is_cpu());
   AssertChunkedEqual(*result, *result2);
 
   ASSERT_RAISES(TypeError, ChunkedArray::Make({chunk0, chunk1}));
@@ -375,24 +381,26 @@ class TestChunkResolverMany : public ::testing::Test {
   Result<std::vector<ChunkLocation>> ResolveMany(
       const ChunkResolver& resolver, const std::vector<IndexType>& logical_index_vec) {
     const size_t n = logical_index_vec.size();
-    std::vector<IndexType> chunk_index_vec;
-    chunk_index_vec.resize(n);
-    std::vector<IndexType> index_in_chunk_vec;
-    index_in_chunk_vec.resize(n);
+    std::vector<TypedChunkLocation<IndexType>> chunk_location_vec;
+    chunk_location_vec.resize(n);
     bool valid = resolver.ResolveMany<IndexType>(
-        static_cast<int64_t>(n), logical_index_vec.data(), chunk_index_vec.data(), 0,
-        index_in_chunk_vec.data());
+        static_cast<int64_t>(n), logical_index_vec.data(), chunk_location_vec.data(), 0);
     if (ARROW_PREDICT_FALSE(!valid)) {
       return Status::Invalid("index type doesn't fit possible chunk indexes");
     }
-    std::vector<ChunkLocation> locations;
-    locations.reserve(n);
-    for (size_t i = 0; i < n; i++) {
-      auto chunk_index = static_cast<int64_t>(chunk_index_vec[i]);
-      auto index_in_chunk = static_cast<int64_t>(index_in_chunk_vec[i]);
-      locations.emplace_back(chunk_index, index_in_chunk);
+    if constexpr (std::is_same<decltype(ChunkLocation::chunk_index), IndexType>::value) {
+      return chunk_location_vec;
+    } else {
+      std::vector<ChunkLocation> locations;
+      locations.reserve(n);
+      for (size_t i = 0; i < n; i++) {
+        auto loc = chunk_location_vec[i];
+        auto chunk_index = static_cast<int64_t>(loc.chunk_index);
+        auto index_in_chunk = static_cast<int64_t>(loc.index_in_chunk);
+        locations.emplace_back(chunk_index, index_in_chunk);
+      }
+      return locations;
     }
-    return locations;
   }
 
   void CheckResolveMany(const ChunkResolver& resolver,
diff --git a/cpp/src/arrow/compute/expression.cc b/cpp/src/arrow/compute/expression.cc
index 33e5928c2865d..12fda5d58f3bf 100644
--- a/cpp/src/arrow/compute/expression.cc
+++ b/cpp/src/arrow/compute/expression.cc
@@ -23,6 +23,7 @@
 #include <unordered_set>
 
 #include "arrow/chunked_array.h"
+#include "arrow/compute/api_aggregate.h"
 #include "arrow/compute/api_vector.h"
 #include "arrow/compute/exec_internal.h"
 #include "arrow/compute/expression_internal.h"
@@ -1242,6 +1243,72 @@ struct Inequality {
                             /*insert_implicit_casts=*/false, &exec_context);
   }
 
+  /// Simplify an `is_in` call against an inequality guarantee.
+  ///
+  /// We avoid the complexity of fully simplifying EQUAL comparisons to true
+  /// literals (e.g., 'x is_in [1, 2, 3]' given the guarantee 'x = 2') due to
+  /// potential complications with null matching behavior. This is ok for the
+  /// predicate pushdown use case because the overall aim is to simplify to an
+  /// unsatisfiable expression.
+  ///
+  /// \pre `is_in_call` is a call to the `is_in` function
+  /// \return a simplified expression, or nullopt if no simplification occurred
+  static Result<std::optional<Expression>> SimplifyIsIn(
+      const Inequality& guarantee, const Expression::Call* is_in_call) {
+    DCHECK_EQ(is_in_call->function_name, "is_in");
+
+    auto options = checked_pointer_cast<SetLookupOptions>(is_in_call->options);
+
+    const auto& lhs = Comparison::StripOrderPreservingCasts(is_in_call->arguments[0]);
+    if (!lhs.field_ref()) return std::nullopt;
+    if (*lhs.field_ref() != guarantee.target) return std::nullopt;
+
+    FilterOptions::NullSelectionBehavior null_selection;
+    switch (options->null_matching_behavior) {
+      case SetLookupOptions::MATCH:
+        null_selection =
+            guarantee.nullable ? FilterOptions::EMIT_NULL : FilterOptions::DROP;
+        break;
+      case SetLookupOptions::SKIP:
+        null_selection = FilterOptions::DROP;
+        break;
+      case SetLookupOptions::EMIT_NULL:
+        if (guarantee.nullable) return std::nullopt;
+        null_selection = FilterOptions::DROP;
+        break;
+      case SetLookupOptions::INCONCLUSIVE:
+        if (guarantee.nullable) return std::nullopt;
+        ARROW_ASSIGN_OR_RAISE(Datum is_null, IsNull(options->value_set));
+        ARROW_ASSIGN_OR_RAISE(Datum any_null, Any(is_null));
+        if (any_null.scalar_as<BooleanScalar>().value) return std::nullopt;
+        null_selection = FilterOptions::DROP;
+        break;
+    }
+
+    std::string func_name = Comparison::GetName(guarantee.cmp);
+    DCHECK_NE(func_name, "na");
+    std::vector<Datum> args{options->value_set, guarantee.bound};
+    ARROW_ASSIGN_OR_RAISE(Datum filter_mask, CallFunction(func_name, args));
+    FilterOptions filter_options(null_selection);
+    ARROW_ASSIGN_OR_RAISE(Datum simplified_value_set,
+                          Filter(options->value_set, filter_mask, filter_options));
+
+    if (simplified_value_set.length() == 0) return literal(false);
+    if (simplified_value_set.length() == options->value_set.length()) return std::nullopt;
+
+    ExecContext exec_context;
+    Expression::Call simplified_call;
+    simplified_call.function_name = "is_in";
+    simplified_call.arguments = is_in_call->arguments;
+    simplified_call.options = std::make_shared<SetLookupOptions>(
+        simplified_value_set, options->null_matching_behavior);
+    ARROW_ASSIGN_OR_RAISE(
+        Expression simplified_expr,
+        BindNonRecursive(std::move(simplified_call),
+                         /*insert_implicit_casts=*/false, &exec_context));
+    return simplified_expr;
+  }
+
   /// \brief Simplify the given expression given this inequality as a guarantee.
   Result<Expression> Simplify(Expression expr) {
     const auto& guarantee = *this;
@@ -1258,6 +1325,12 @@ struct Inequality {
       return call->function_name == "is_valid" ? literal(true) : literal(false);
     }
 
+    if (call->function_name == "is_in") {
+      ARROW_ASSIGN_OR_RAISE(std::optional<Expression> result,
+                            SimplifyIsIn(guarantee, call));
+      return result.value_or(expr);
+    }
+
     auto cmp = Comparison::Get(expr);
     if (!cmp) return expr;
 
diff --git a/cpp/src/arrow/compute/expression_test.cc b/cpp/src/arrow/compute/expression_test.cc
index d94a17b6ffadf..0b7e8a9c23b13 100644
--- a/cpp/src/arrow/compute/expression_test.cc
+++ b/cpp/src/arrow/compute/expression_test.cc
@@ -27,6 +27,7 @@
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
+#include "arrow/array/builder_primitive.h"
 #include "arrow/compute/expression_internal.h"
 #include "arrow/compute/function_internal.h"
 #include "arrow/compute/registry.h"
@@ -1616,6 +1617,144 @@ TEST(Expression, SimplifyWithComparisonAndNullableCaveat) {
           true_unless_null(field_ref("i32"))));  // not satisfiable, will drop row group
 }
 
+TEST(Expression, SimplifyIsIn) {
+  auto is_in = [](Expression field, std::shared_ptr<DataType> value_set_type,
+                  std::string json_array,
+                  SetLookupOptions::NullMatchingBehavior null_matching_behavior) {
+    SetLookupOptions options{ArrayFromJSON(value_set_type, json_array),
+                             null_matching_behavior};
+    return call("is_in", {field}, options);
+  };
+
+  for (SetLookupOptions::NullMatchingBehavior null_matching : {
+           SetLookupOptions::MATCH,
+           SetLookupOptions::SKIP,
+           SetLookupOptions::EMIT_NULL,
+           SetLookupOptions::INCONCLUSIVE,
+       }) {
+    Simplify{is_in(field_ref("i32"), int32(), "[]", null_matching)}
+        .WithGuarantee(greater(field_ref("i32"), literal(2)))
+        .Expect(false);
+
+    Simplify{is_in(field_ref("i32"), int32(), "[1,3,5,7,9]", null_matching)}
+        .WithGuarantee(equal(field_ref("i32"), literal(6)))
+        .Expect(false);
+
+    Simplify{is_in(field_ref("i32"), int32(), "[1,3,5,7,9]", null_matching)}
+        .WithGuarantee(greater(field_ref("i32"), literal(3)))
+        .Expect(is_in(field_ref("i32"), int32(), "[5,7,9]", null_matching));
+
+    Simplify{is_in(field_ref("i32"), int32(), "[1,3,5,7,9]", null_matching)}
+        .WithGuarantee(greater(field_ref("i32"), literal(9)))
+        .Expect(false);
+
+    Simplify{is_in(field_ref("i32"), int32(), "[1,3,5,7,9]", null_matching)}
+        .WithGuarantee(less_equal(field_ref("i32"), literal(0)))
+        .Expect(false);
+
+    Simplify{is_in(field_ref("i32"), int32(), "[1,3,5,7,9]", null_matching)}
+        .WithGuarantee(greater(field_ref("i32"), literal(0)))
+        .ExpectUnchanged();
+
+    Simplify{is_in(field_ref("i32"), int32(), "[1,3,5,7,9]", null_matching)}
+        .WithGuarantee(less_equal(field_ref("i32"), literal(9)))
+        .ExpectUnchanged();
+
+    Simplify{is_in(field_ref("i32"), int32(), "[1,3,5,7,9]", null_matching)}
+        .WithGuarantee(and_(less_equal(field_ref("i32"), literal(7)),
+                            greater(field_ref("i32"), literal(4))))
+        .Expect(is_in(field_ref("i32"), int32(), "[5,7]", null_matching));
+
+    Simplify{is_in(field_ref("u32"), int8(), "[1,3,5,7,9]", null_matching)}
+        .WithGuarantee(greater(field_ref("u32"), literal(3)))
+        .Expect(is_in(field_ref("u32"), int8(), "[5,7,9]", null_matching));
+
+    Simplify{is_in(field_ref("u32"), int64(), "[1,3,5,7,9]", null_matching)}
+        .WithGuarantee(greater(field_ref("u32"), literal(3)))
+        .Expect(is_in(field_ref("u32"), int64(), "[5,7,9]", null_matching));
+  }
+
+  Simplify{
+      is_in(field_ref("i32"), int32(), "[1,2,3]", SetLookupOptions::MATCH),
+  }
+      .WithGuarantee(
+          or_(greater(field_ref("i32"), literal(2)), is_null(field_ref("i32"))))
+      .Expect(is_in(field_ref("i32"), int32(), "[3]", SetLookupOptions::MATCH));
+
+  Simplify{
+      is_in(field_ref("i32"), int32(), "[1,2,3,null]", SetLookupOptions::MATCH),
+  }
+      .WithGuarantee(greater(field_ref("i32"), literal(2)))
+      .Expect(is_in(field_ref("i32"), int32(), "[3]", SetLookupOptions::MATCH));
+
+  Simplify{
+      is_in(field_ref("i32"), int32(), "[1,2,3,null]", SetLookupOptions::MATCH),
+  }
+      .WithGuarantee(
+          or_(greater(field_ref("i32"), literal(2)), is_null(field_ref("i32"))))
+      .Expect(is_in(field_ref("i32"), int32(), "[3,null]", SetLookupOptions::MATCH));
+
+  Simplify{
+      is_in(field_ref("i32"), int32(), "[1,2,3]", SetLookupOptions::SKIP),
+  }
+      .WithGuarantee(
+          or_(greater(field_ref("i32"), literal(2)), is_null(field_ref("i32"))))
+      .Expect(is_in(field_ref("i32"), int32(), "[3]", SetLookupOptions::SKIP));
+
+  Simplify{
+      is_in(field_ref("i32"), int32(), "[1,2,3,null]", SetLookupOptions::SKIP),
+  }
+      .WithGuarantee(greater(field_ref("i32"), literal(2)))
+      .Expect(is_in(field_ref("i32"), int32(), "[3]", SetLookupOptions::SKIP));
+
+  Simplify{
+      is_in(field_ref("i32"), int32(), "[1,2,3,null]", SetLookupOptions::SKIP),
+  }
+      .WithGuarantee(
+          or_(greater(field_ref("i32"), literal(2)), is_null(field_ref("i32"))))
+      .Expect(is_in(field_ref("i32"), int32(), "[3]", SetLookupOptions::SKIP));
+
+  Simplify{
+      is_in(field_ref("i32"), int32(), "[1,2,3]", SetLookupOptions::EMIT_NULL),
+  }
+      .WithGuarantee(
+          or_(greater(field_ref("i32"), literal(2)), is_null(field_ref("i32"))))
+      .ExpectUnchanged();
+
+  Simplify{
+      is_in(field_ref("i32"), int32(), "[1,2,3,null]", SetLookupOptions::EMIT_NULL),
+  }
+      .WithGuarantee(greater(field_ref("i32"), literal(2)))
+      .Expect(is_in(field_ref("i32"), int32(), "[3]", SetLookupOptions::EMIT_NULL));
+
+  Simplify{
+      is_in(field_ref("i32"), int32(), "[1,2,3,null]", SetLookupOptions::EMIT_NULL),
+  }
+      .WithGuarantee(
+          or_(greater(field_ref("i32"), literal(2)), is_null(field_ref("i32"))))
+      .ExpectUnchanged();
+
+  Simplify{
+      is_in(field_ref("i32"), int32(), "[1,2,3]", SetLookupOptions::INCONCLUSIVE),
+  }
+      .WithGuarantee(
+          or_(greater(field_ref("i32"), literal(2)), is_null(field_ref("i32"))))
+      .ExpectUnchanged();
+
+  Simplify{
+      is_in(field_ref("i32"), int32(), "[1,2,3,null]", SetLookupOptions::INCONCLUSIVE),
+  }
+      .WithGuarantee(greater(field_ref("i32"), literal(2)))
+      .ExpectUnchanged();
+
+  Simplify{
+      is_in(field_ref("i32"), int32(), "[1,2,3,null]", SetLookupOptions::INCONCLUSIVE),
+  }
+      .WithGuarantee(
+          or_(greater(field_ref("i32"), literal(2)), is_null(field_ref("i32"))))
+      .ExpectUnchanged();
+}
+
 TEST(Expression, SimplifyThenExecute) {
   auto filter =
       or_({equal(field_ref("f32"), literal(0)),
@@ -1643,6 +1782,40 @@ TEST(Expression, SimplifyThenExecute) {
   AssertDatumsEqual(evaluated, simplified_evaluated, /*verbose=*/true);
 }
 
+TEST(Expression, SimplifyIsInThenExecute) {
+  auto input = RecordBatchFromJSON(kBoringSchema, R"([
+      {"i64": 2, "i32": 5},
+      {"i64": 5, "i32": 6},
+      {"i64": 3, "i32": 6},
+      {"i64": 3, "i32": 5},
+      {"i64": 4, "i32": 5},
+      {"i64": 2, "i32": 7},
+      {"i64": 5, "i32": 5}
+  ])");
+
+  std::vector<Expression> guarantees{greater(field_ref("i64"), literal(1)),
+                                     greater_equal(field_ref("i32"), literal(5)),
+                                     less_equal(field_ref("i64"), literal(5))};
+
+  for (const Expression& guarantee : guarantees) {
+    auto filter =
+        call("is_in", {guarantee.call()->arguments[0]},
+             compute::SetLookupOptions{ArrayFromJSON(int32(), "[1,2,3]"), true});
+    ASSERT_OK_AND_ASSIGN(filter, filter.Bind(*kBoringSchema));
+    ASSERT_OK_AND_ASSIGN(auto simplified, SimplifyWithGuarantee(filter, guarantee));
+
+    Datum evaluated, simplified_evaluated;
+    ExpectExecute(filter, input, &evaluated);
+    ExpectExecute(simplified, input, &simplified_evaluated);
+    if (simplified_evaluated.is_scalar()) {
+      ASSERT_OK_AND_ASSIGN(
+          simplified_evaluated,
+          MakeArrayFromScalar(*simplified_evaluated.scalar(), evaluated.length()));
+    }
+    AssertDatumsEqual(evaluated, simplified_evaluated, /*verbose=*/true);
+  }
+}
+
 TEST(Expression, Filter) {
   auto ExpectFilter = [](Expression filter, std::string batch_json) {
     ASSERT_OK_AND_ASSIGN(auto s, kBoringSchema->AddField(0, field("in", boolean())));
diff --git a/cpp/src/arrow/compute/function.cc b/cpp/src/arrow/compute/function.cc
index e1a2e8c5d8879..0478a3d1e801a 100644
--- a/cpp/src/arrow/compute/function.cc
+++ b/cpp/src/arrow/compute/function.cc
@@ -30,6 +30,7 @@
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/compute/registry.h"
 #include "arrow/datum.h"
+#include "arrow/device_allocation_type_set.h"
 #include "arrow/util/cpu_info.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/tracing_internal.h"
diff --git a/cpp/src/arrow/compute/kernel.cc b/cpp/src/arrow/compute/kernel.cc
index 5c87ef2cd0561..5e7461cc52d0e 100644
--- a/cpp/src/arrow/compute/kernel.cc
+++ b/cpp/src/arrow/compute/kernel.cc
@@ -24,6 +24,7 @@
 
 #include "arrow/buffer.h"
 #include "arrow/compute/exec.h"
+#include "arrow/device_allocation_type_set.h"
 #include "arrow/result.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/bit_util.h"
diff --git a/cpp/src/arrow/compute/kernel.h b/cpp/src/arrow/compute/kernel.h
index 1adb3e96c97c8..cfb6265f12904 100644
--- a/cpp/src/arrow/compute/kernel.h
+++ b/cpp/src/arrow/compute/kernel.h
@@ -31,6 +31,7 @@
 #include "arrow/buffer.h"
 #include "arrow/compute/exec.h"
 #include "arrow/datum.h"
+#include "arrow/device_allocation_type_set.h"
 #include "arrow/memory_pool.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
@@ -41,7 +42,7 @@
 // macOS defines PREALLOCATE as a preprocessor macro in the header sys/vnode.h.
 // No other BSD seems to do so. The name is used as an identifier in MemAllocation enum.
 #if defined(__APPLE__) && defined(PREALLOCATE)
-#undef PREALLOCATE
+#  undef PREALLOCATE
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.cc b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
index 1fbcd6a249093..b545d8bcc1003 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
@@ -23,7 +23,9 @@
 #include "arrow/util/cpu_info.h"
 #include "arrow/util/hashing.h"
 
-#include <memory>
+// Include templated definitions for aggregate kernels that must compiled here
+// with the SIMD level configured for this compilation unit in the build.
+#include "arrow/compute/kernels/aggregate_basic.inc.cc"  // NOLINT(build/include)
 
 namespace arrow {
 namespace compute {
@@ -276,11 +278,6 @@ struct SumImplDefault : public SumImpl<ArrowType, SimdLevel::NONE> {
   using SumImpl<ArrowType, SimdLevel::NONE>::SumImpl;
 };
 
-template <typename ArrowType>
-struct MeanImplDefault : public MeanImpl<ArrowType, SimdLevel::NONE> {
-  using MeanImpl<ArrowType, SimdLevel::NONE>::MeanImpl;
-};
-
 Result<std::unique_ptr<KernelState>> SumInit(KernelContext* ctx,
                                              const KernelInitArgs& args) {
   SumLikeInit<SumImplDefault> visitor(
@@ -289,6 +286,14 @@ Result<std::unique_ptr<KernelState>> SumInit(KernelContext* ctx,
   return visitor.Create();
 }
 
+// ----------------------------------------------------------------------
+// Mean implementation
+
+template <typename ArrowType>
+struct MeanImplDefault : public MeanImpl<ArrowType, SimdLevel::NONE> {
+  using MeanImpl<ArrowType, SimdLevel::NONE>::MeanImpl;
+};
+
 Result<std::unique_ptr<KernelState>> MeanInit(KernelContext* ctx,
                                               const KernelInitArgs& args) {
   MeanKernelInit<MeanImplDefault> visitor(
@@ -482,8 +487,8 @@ void AddFirstOrLastAggKernel(ScalarAggregateFunction* func,
 // ----------------------------------------------------------------------
 // MinMax implementation
 
-Result<std::unique_ptr<KernelState>> MinMaxInit(KernelContext* ctx,
-                                                const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> MinMaxInitDefault(KernelContext* ctx,
+                                                       const KernelInitArgs& args) {
   ARROW_ASSIGN_OR_RAISE(TypeHolder out_type,
                         args.kernel->signature->out_type().Resolve(ctx, args.inputs));
   MinMaxInitState<SimdLevel::NONE> visitor(
@@ -532,13 +537,13 @@ struct BooleanAnyImpl : public ScalarAggregator {
     }
     if (batch[0].is_scalar()) {
       const Scalar& scalar = *batch[0].scalar;
-      this->has_nulls = !scalar.is_valid;
-      this->any = scalar.is_valid && checked_cast<const BooleanScalar&>(scalar).value;
-      this->count += scalar.is_valid;
+      this->has_nulls |= !scalar.is_valid;
+      this->any |= scalar.is_valid && checked_cast<const BooleanScalar&>(scalar).value;
+      this->count += scalar.is_valid * batch.length;
       return Status::OK();
     }
     const ArraySpan& data = batch[0].array;
-    this->has_nulls = data.GetNullCount() > 0;
+    this->has_nulls |= data.GetNullCount() > 0;
     this->count += data.length - data.GetNullCount();
     arrow::internal::OptionalBinaryBitBlockCounter counter(
         data.buffers[0].data, data.offset, data.buffers[1].data, data.offset,
@@ -603,13 +608,13 @@ struct BooleanAllImpl : public ScalarAggregator {
     }
     if (batch[0].is_scalar()) {
       const Scalar& scalar = *batch[0].scalar;
-      this->has_nulls = !scalar.is_valid;
-      this->count += scalar.is_valid;
-      this->all = !scalar.is_valid || checked_cast<const BooleanScalar&>(scalar).value;
+      this->has_nulls |= !scalar.is_valid;
+      this->count += scalar.is_valid * batch.length;
+      this->all &= !scalar.is_valid || checked_cast<const BooleanScalar&>(scalar).value;
       return Status::OK();
     }
     const ArraySpan& data = batch[0].array;
-    this->has_nulls = data.GetNullCount() > 0;
+    this->has_nulls |= data.GetNullCount() > 0;
     this->count += data.length - data.GetNullCount();
     arrow::internal::OptionalBinaryBitBlockCounter counter(
         data.buffers[1].data, data.offset, data.buffers[0].data, data.offset,
@@ -1114,14 +1119,14 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
   // Add min max function
   func = std::make_shared<ScalarAggregateFunction>("min_max", Arity::Unary(), min_max_doc,
                                                    &default_scalar_aggregate_options);
-  AddMinMaxKernels(MinMaxInit, {null(), boolean()}, func.get());
-  AddMinMaxKernels(MinMaxInit, NumericTypes(), func.get());
-  AddMinMaxKernels(MinMaxInit, TemporalTypes(), func.get());
-  AddMinMaxKernels(MinMaxInit, BaseBinaryTypes(), func.get());
-  AddMinMaxKernel(MinMaxInit, Type::FIXED_SIZE_BINARY, func.get());
-  AddMinMaxKernel(MinMaxInit, Type::INTERVAL_MONTHS, func.get());
-  AddMinMaxKernel(MinMaxInit, Type::DECIMAL128, func.get());
-  AddMinMaxKernel(MinMaxInit, Type::DECIMAL256, func.get());
+  AddMinMaxKernels(MinMaxInitDefault, {null(), boolean()}, func.get());
+  AddMinMaxKernels(MinMaxInitDefault, NumericTypes(), func.get());
+  AddMinMaxKernels(MinMaxInitDefault, TemporalTypes(), func.get());
+  AddMinMaxKernels(MinMaxInitDefault, BaseBinaryTypes(), func.get());
+  AddMinMaxKernel(MinMaxInitDefault, Type::FIXED_SIZE_BINARY, func.get());
+  AddMinMaxKernel(MinMaxInitDefault, Type::INTERVAL_MONTHS, func.get());
+  AddMinMaxKernel(MinMaxInitDefault, Type::DECIMAL128, func.get());
+  AddMinMaxKernel(MinMaxInitDefault, Type::DECIMAL256, func.get());
   // Add the SIMD variants for min max
 #if defined(ARROW_HAVE_RUNTIME_AVX2)
   if (cpu_info->IsSupported(arrow::internal::CpuInfo::AVX2)) {
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.inc.cc b/cpp/src/arrow/compute/kernels/aggregate_basic.inc.cc
new file mode 100644
index 0000000000000..f2151e0a9e029
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic.inc.cc
@@ -0,0 +1,1025 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// .inc.cc file to be included in compilation unit where kernels are meant to be
+// compiled auto-vectorized by the compiler with different SIMD levels passed
+// as compiler flags.
+//
+// It contains no includes to avoid double inclusion in the compilation unit
+// that includes this .inc.cc file.
+
+#include <cassert>
+#include <cmath>
+#include <memory>
+#include <type_traits>
+#include <utility>
+
+#include "arrow/compute/api_aggregate.h"
+#include "arrow/compute/kernels/aggregate_internal.h"
+#include "arrow/compute/kernels/codegen_internal.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/align_util.h"
+#include "arrow/util/bit_block_counter.h"
+#include "arrow/util/decimal.h"
+
+namespace arrow::compute::internal {
+namespace {
+
+// ----------------------------------------------------------------------
+// Sum implementation
+
+template <typename ArrowType, SimdLevel::type SimdLevel,
+          typename ResultType = typename FindAccumulatorType<ArrowType>::Type>
+struct SumImpl : public ScalarAggregator {
+  using ThisType = SumImpl<ArrowType, SimdLevel, ResultType>;
+  using CType = typename TypeTraits<ArrowType>::CType;
+  using SumType = ResultType;
+  using SumCType = typename TypeTraits<SumType>::CType;
+  using OutputType = typename TypeTraits<SumType>::ScalarType;
+
+  SumImpl(std::shared_ptr<DataType> out_type, ScalarAggregateOptions options_)
+      : out_type(std::move(out_type)), options(std::move(options_)) {}
+
+  Status Consume(KernelContext*, const ExecSpan& batch) override {
+    if (batch[0].is_array()) {
+      const ArraySpan& data = batch[0].array;
+      this->count += data.length - data.GetNullCount();
+      this->nulls_observed = this->nulls_observed || data.GetNullCount();
+
+      if (!options.skip_nulls && this->nulls_observed) {
+        // Short-circuit
+        return Status::OK();
+      }
+
+      if (is_boolean_type<ArrowType>::value) {
+        this->sum += GetTrueCount(data);
+      } else {
+        this->sum += SumArray<CType, SumCType, SimdLevel>(data);
+      }
+    } else {
+      const Scalar& data = *batch[0].scalar;
+      this->count += data.is_valid * batch.length;
+      this->nulls_observed = this->nulls_observed || !data.is_valid;
+      if (data.is_valid) {
+        this->sum += internal::UnboxScalar<ArrowType>::Unbox(data) * batch.length;
+      }
+    }
+    return Status::OK();
+  }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
+    const auto& other = checked_cast<const ThisType&>(src);
+    this->count += other.count;
+    this->sum += other.sum;
+    this->nulls_observed = this->nulls_observed || other.nulls_observed;
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override {
+    if ((!options.skip_nulls && this->nulls_observed) ||
+        (this->count < options.min_count)) {
+      out->value = std::make_shared<OutputType>(out_type);
+    } else {
+      out->value = std::make_shared<OutputType>(this->sum, out_type);
+    }
+    return Status::OK();
+  }
+
+  size_t count = 0;
+  bool nulls_observed = false;
+  SumCType sum = 0;
+  std::shared_ptr<DataType> out_type;
+  ScalarAggregateOptions options;
+};
+
+template <typename ArrowType>
+struct NullImpl : public ScalarAggregator {
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  explicit NullImpl(const ScalarAggregateOptions& options_) : options(options_) {}
+
+  Status Consume(KernelContext*, const ExecSpan& batch) override {
+    if (batch[0].is_scalar() || batch[0].array.GetNullCount() > 0) {
+      // If the batch is a scalar or an array with elements, set is_empty to false
+      is_empty = false;
+    }
+    return Status::OK();
+  }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
+    const auto& other = checked_cast<const NullImpl&>(src);
+    this->is_empty &= other.is_empty;
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override {
+    if ((options.skip_nulls || this->is_empty) && options.min_count == 0) {
+      // Return 0 if the remaining data is empty
+      out->value = output_empty();
+    } else {
+      out->value = MakeNullScalar(TypeTraits<ArrowType>::type_singleton());
+    }
+    return Status::OK();
+  }
+
+  virtual std::shared_ptr<Scalar> output_empty() = 0;
+
+  bool is_empty = true;
+  ScalarAggregateOptions options;
+};
+
+template <typename ArrowType>
+struct NullSumImpl : public NullImpl<ArrowType> {
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  explicit NullSumImpl(const ScalarAggregateOptions& options_)
+      : NullImpl<ArrowType>(options_) {}
+
+  std::shared_ptr<Scalar> output_empty() override {
+    return std::make_shared<ScalarType>(0);
+  }
+};
+
+template <template <typename> class KernelClass>
+struct SumLikeInit {
+  std::unique_ptr<KernelState> state;
+  KernelContext* ctx;
+  std::shared_ptr<DataType> type;
+  const ScalarAggregateOptions& options;
+
+  SumLikeInit(KernelContext* ctx, std::shared_ptr<DataType> type,
+              const ScalarAggregateOptions& options)
+      : ctx(ctx), type(type), options(options) {}
+
+  Status Visit(const DataType&) { return Status::NotImplemented("No sum implemented"); }
+
+  Status Visit(const HalfFloatType&) {
+    return Status::NotImplemented("No sum implemented");
+  }
+
+  Status Visit(const BooleanType&) {
+    auto ty = TypeTraits<typename KernelClass<BooleanType>::SumType>::type_singleton();
+    state.reset(new KernelClass<BooleanType>(ty, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_number<Type, Status> Visit(const Type&) {
+    auto ty = TypeTraits<typename KernelClass<Type>::SumType>::type_singleton();
+    state.reset(new KernelClass<Type>(ty, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_decimal<Type, Status> Visit(const Type&) {
+    state.reset(new KernelClass<Type>(type, options));
+    return Status::OK();
+  }
+
+  virtual Status Visit(const NullType&) {
+    state.reset(new NullSumImpl<Int64Type>(options));
+    return Status::OK();
+  }
+
+  Result<std::unique_ptr<KernelState>> Create() {
+    ARROW_RETURN_NOT_OK(VisitTypeInline(*type, this));
+    return std::move(state);
+  }
+};
+
+// ----------------------------------------------------------------------
+// Mean implementation
+
+template <typename ArrowType, SimdLevel::type SimdLevel, typename Enable = void>
+struct MeanImpl;
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MeanImpl<ArrowType, SimdLevel, enable_if_decimal<ArrowType>>
+    : public SumImpl<ArrowType, SimdLevel> {
+  using SumImpl<ArrowType, SimdLevel>::SumImpl;
+  using SumImpl<ArrowType, SimdLevel>::options;
+  using SumCType = typename SumImpl<ArrowType, SimdLevel>::SumCType;
+  using OutputType = typename SumImpl<ArrowType, SimdLevel>::OutputType;
+
+  template <typename T = ArrowType>
+  Status FinalizeImpl(Datum* out) {
+    if ((!options.skip_nulls && this->nulls_observed) ||
+        (this->count < options.min_count) || (this->count == 0)) {
+      out->value = std::make_shared<OutputType>(this->out_type);
+    } else {
+      SumCType quotient, remainder;
+      ARROW_ASSIGN_OR_RAISE(std::tie(quotient, remainder), this->sum.Divide(this->count));
+      // Round the decimal result based on the remainder
+      remainder.Abs();
+      if (remainder * 2 >= this->count) {
+        if (this->sum >= 0) {
+          quotient += 1;
+        } else {
+          quotient -= 1;
+        }
+      }
+      out->value = std::make_shared<OutputType>(quotient, this->out_type);
+    }
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override { return FinalizeImpl(out); }
+};
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MeanImpl<ArrowType, SimdLevel,
+                std::enable_if_t<!is_decimal_type<ArrowType>::value>>
+    // Override the ResultType of SumImpl because we need to use double for intermediate
+    // sum to prevent integer overflows
+    : public SumImpl<ArrowType, SimdLevel, DoubleType> {
+  using SumImpl<ArrowType, SimdLevel, DoubleType>::SumImpl;
+  using SumImpl<ArrowType, SimdLevel, DoubleType>::options;
+
+  template <typename T = ArrowType>
+  Status FinalizeImpl(Datum* out) {
+    if ((!options.skip_nulls && this->nulls_observed) ||
+        (this->count < options.min_count)) {
+      out->value = std::make_shared<DoubleScalar>();
+    } else {
+      static_assert(std::is_same_v<decltype(this->sum), double>,
+                    "SumCType must be double for numeric inputs");
+      const double mean = this->sum / this->count;
+      out->value = std::make_shared<DoubleScalar>(mean);
+    }
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override { return FinalizeImpl(out); }
+};
+
+template <template <typename> class KernelClass>
+struct MeanKernelInit : public SumLikeInit<KernelClass> {
+  MeanKernelInit(KernelContext* ctx, std::shared_ptr<DataType> type,
+                 const ScalarAggregateOptions& options)
+      : SumLikeInit<KernelClass>(ctx, type, options) {}
+
+  Status Visit(const NullType&) override {
+    this->state.reset(new NullSumImpl<DoubleType>(this->options));
+    return Status::OK();
+  }
+};
+
+// ----------------------------------------------------------------------
+// FirstLast implementation
+
+template <typename ArrowType, typename Enable = void>
+struct FirstLastState {};
+
+template <typename ArrowType>
+struct FirstLastState<ArrowType, enable_if_boolean<ArrowType>> {
+  using ThisType = FirstLastState<ArrowType>;
+  using T = typename ArrowType::c_type;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->first = this->has_values ? this->first : rhs.first;
+    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
+    this->last = rhs.has_values ? rhs.last : this->last;
+    this->last_is_null = rhs.last_is_null;
+    this->has_values |= rhs.has_values;
+    this->has_any_values |= rhs.has_any_values;
+    return *this;
+  }
+
+  void MergeOne(T value) {
+    if (!has_values) {
+      this->first = value;
+      has_values = true;
+    }
+    this->last = value;
+  }
+
+  T first = false;
+  T last = false;
+  bool has_values = false;
+  bool first_is_null = false;
+  bool last_is_null = false;
+  bool has_any_values = false;
+};
+
+template <typename ArrowType>
+struct FirstLastState<ArrowType, enable_if_physical_integer<ArrowType>> {
+  using ThisType = FirstLastState<ArrowType>;
+  using T = typename ArrowType::c_type;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->first = this->has_values ? this->first : rhs.first;
+    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
+    this->last = rhs.has_values ? rhs.last : this->last;
+    this->last_is_null = rhs.last_is_null;
+    this->has_values |= rhs.has_values;
+    this->has_any_values |= rhs.has_any_values;
+    return *this;
+  }
+
+  void MergeOne(T value) {
+    if (!has_values) {
+      this->first = value;
+      has_values = true;
+    }
+    this->last = value;
+  }
+
+  T first = std::numeric_limits<T>::infinity();
+  T last = std::numeric_limits<T>::infinity();
+  bool has_values = false;
+
+  // These are updated in ConsumeScalar and ConsumeArray since null values don't
+  // invoke MergeOne
+  bool first_is_null = false;
+  bool last_is_null = false;
+  // has_any_values indicates whether there is any value (either null or non-null)
+  // (1) has_any_values = false: There is no value aggregated
+  // (2) has_any_values = true, has_values = false: There are only null values aggregated
+  // (3) has_any_values = true, has_values = true: There are both null and non-null values
+  // aggregated
+  bool has_any_values = false;
+};
+
+template <typename ArrowType>
+struct FirstLastState<ArrowType, enable_if_floating_point<ArrowType>> {
+  using ThisType = FirstLastState<ArrowType>;
+  using T = typename ArrowType::c_type;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->first = this->has_values ? this->first : rhs.first;
+    this->last = rhs.has_values ? rhs.last : this->last;
+    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
+    this->last_is_null = rhs.last_is_null;
+    this->has_values |= rhs.has_values;
+    this->has_any_values |= rhs.has_any_values;
+    return *this;
+  }
+
+  void MergeOne(T value) {
+    if (!has_values) {
+      this->first = value;
+      has_values = true;
+    }
+    last = value;
+  }
+
+  T first = std::numeric_limits<T>::infinity();
+  T last = std::numeric_limits<T>::infinity();
+  bool has_values = false;
+  bool first_is_null = false;
+  bool last_is_null = false;
+  bool has_any_values = false;
+};
+
+template <typename ArrowType>
+struct FirstLastState<ArrowType,
+                      enable_if_t<is_base_binary_type<ArrowType>::value ||
+                                  std::is_same<ArrowType, FixedSizeBinaryType>::value>> {
+  using ThisType = FirstLastState<ArrowType>;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->first = this->has_values ? this->first : rhs.first;
+    this->last = rhs.has_values ? rhs.last : this->last;
+    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
+    this->last_is_null = rhs.last_is_null;
+    this->has_values |= rhs.has_values;
+    this->has_any_values |= rhs.has_any_values;
+    return *this;
+  }
+
+  void MergeOne(std::string_view value) {
+    if (!has_values) {
+      first = std::string(value);
+      has_values = true;
+    }
+    last = std::string(value);
+  }
+
+  std::string first = "";
+  std::string last = "";
+  bool has_values = false;
+  bool first_is_null = false;
+  bool last_is_null = false;
+  bool has_any_values = false;
+};
+
+template <typename ArrowType>
+struct FirstLastImpl : public ScalarAggregator {
+  using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
+  using ThisType = FirstLastImpl<ArrowType>;
+  using StateType = FirstLastState<ArrowType>;
+
+  FirstLastImpl(std::shared_ptr<DataType> out_type, ScalarAggregateOptions options)
+      : out_type(std::move(out_type)), options(std::move(options)), count(0) {
+    this->options.min_count = std::max<uint32_t>(1, this->options.min_count);
+  }
+
+  Status Consume(KernelContext*, const ExecSpan& batch) override {
+    if (batch[0].is_array()) {
+      return ConsumeArray(batch[0].array);
+    }
+    return ConsumeScalar(*batch[0].scalar);
+  }
+
+  Status ConsumeScalar(const Scalar& scalar) {
+    this->state.has_any_values = true;
+    if (scalar.is_valid) {
+      this->state.MergeOne(internal::UnboxScalar<ArrowType>::Unbox(scalar));
+    } else {
+      if (!this->state.has_values) {
+        this->state.first_is_null = true;
+      }
+    }
+    this->count += scalar.is_valid;
+    return Status::OK();
+  }
+
+  Status ConsumeArray(const ArraySpan& arr_span) {
+    this->state.has_any_values = true;
+    ArrayType arr(arr_span.ToArrayData());
+    const auto null_count = arr.null_count();
+    this->count += arr.length() - null_count;
+
+    if (null_count == 0) {
+      // If there are no null values, we can just merge
+      // the first and last element
+      this->state.MergeOne(arr.GetView(0));
+      this->state.MergeOne(arr.GetView(arr.length() - 1));
+    } else {
+      int64_t first_i = -1;
+      int64_t last_i = -1;
+
+      if (!this->state.has_values && arr.IsNull(0)) {
+        this->state.first_is_null = true;
+      }
+
+      if (arr.IsNull(arr.length() - 1)) {
+        this->state.last_is_null = true;
+      }
+
+      // Find the first and last non-null value and update state
+      for (int64_t i = 0; i < arr.length(); i++) {
+        if (!arr.IsNull(i)) {
+          first_i = i;
+          break;
+        }
+      }
+      if (first_i >= 0) {
+        for (int64_t i = arr.length() - 1; i >= 0; i--) {
+          if (!arr.IsNull(i)) {
+            last_i = i;
+            break;
+          }
+        }
+        assert(last_i >= first_i);
+        this->state.MergeOne(arr.GetView(first_i));
+        this->state.MergeOne(arr.GetView(last_i));
+      }
+    }
+
+    return Status::OK();
+  }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
+    const auto& other = checked_cast<const ThisType&>(src);
+    this->state += other.state;
+    this->count += other.count;
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override {
+    const auto& struct_type = checked_cast<const StructType&>(*out_type);
+    const auto& child_type = struct_type.field(0)->type();
+    auto null_scalar = MakeNullScalar(child_type);
+
+    std::vector<std::shared_ptr<Scalar>> values;
+
+    if (this->count < options.min_count) {
+      values = {null_scalar, null_scalar};
+    } else {
+      if (state.has_values) {
+        if (options.skip_nulls) {
+          ARROW_ASSIGN_OR_RAISE(auto first_scalar, MakeScalar(child_type, state.first));
+          ARROW_ASSIGN_OR_RAISE(auto last_scalar, MakeScalar(child_type, state.last));
+          values = {first_scalar, last_scalar};
+        } else {
+          ARROW_ASSIGN_OR_RAISE(
+              auto first_scalar,
+              state.first_is_null ? null_scalar : MakeScalar(child_type, state.first));
+          ARROW_ASSIGN_OR_RAISE(
+              auto last_scalar,
+              state.last_is_null ? null_scalar : MakeScalar(child_type, state.last));
+
+          values = {first_scalar, last_scalar};
+        }
+      } else {
+        // If there is no non-null values, we always output null regardless of
+        // skip_null
+        values = {null_scalar, null_scalar};
+      }
+    }
+
+    out->value = std::make_shared<StructScalar>(std::move(values), this->out_type);
+    return Status::OK();
+  }
+
+  std::shared_ptr<DataType> out_type;
+  ScalarAggregateOptions options;
+  int64_t count;
+  FirstLastState<ArrowType> state;
+};
+
+struct FirstLastInitState {
+  std::unique_ptr<KernelState> state;
+  KernelContext* ctx;
+  const DataType& in_type;
+  std::shared_ptr<DataType> out_type;
+  const ScalarAggregateOptions& options;
+
+  FirstLastInitState(KernelContext* ctx, const DataType& in_type,
+                     const std::shared_ptr<DataType>& out_type,
+                     const ScalarAggregateOptions& options)
+      : ctx(ctx), in_type(in_type), out_type(out_type), options(options) {}
+
+  Status Visit(const DataType& ty) {
+    return Status::NotImplemented("No first/last implemented for ", ty);
+  }
+
+  Status Visit(const HalfFloatType& ty) {
+    return Status::NotImplemented("No first/last implemented for ", ty);
+  }
+
+  Status Visit(const BooleanType&) {
+    state.reset(new FirstLastImpl<BooleanType>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_physical_integer<Type, Status> Visit(const Type&) {
+    using PhysicalType = typename Type::PhysicalType;
+    state.reset(new FirstLastImpl<PhysicalType>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_physical_floating_point<Type, Status> Visit(const Type&) {
+    using PhysicalType = typename Type::PhysicalType;
+    state.reset(new FirstLastImpl<PhysicalType>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_base_binary<Type, Status> Visit(const Type&) {
+    state.reset(new FirstLastImpl<Type>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_t<std::is_same<Type, FixedSizeBinaryType>::value, Status> Visit(const Type&) {
+    state.reset(new FirstLastImpl<Type>(out_type, options));
+    return Status::OK();
+  }
+
+  Result<std::unique_ptr<KernelState>> Create() {
+    ARROW_RETURN_NOT_OK(VisitTypeInline(in_type, this));
+    return std::move(state);
+  }
+};
+
+// ----------------------------------------------------------------------
+// MinMax implementation
+
+template <typename ArrowType, SimdLevel::type SimdLevel, typename Enable = void>
+struct MinMaxState {};
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MinMaxState<ArrowType, SimdLevel, enable_if_boolean<ArrowType>> {
+  using ThisType = MinMaxState<ArrowType, SimdLevel>;
+  using T = typename ArrowType::c_type;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->has_nulls |= rhs.has_nulls;
+    this->min = this->min && rhs.min;
+    this->max = this->max || rhs.max;
+    return *this;
+  }
+
+  void MergeOne(T value) {
+    this->min = this->min && value;
+    this->max = this->max || value;
+  }
+
+  T min = true;
+  T max = false;
+  bool has_nulls = false;
+};
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MinMaxState<ArrowType, SimdLevel, enable_if_integer<ArrowType>> {
+  using ThisType = MinMaxState<ArrowType, SimdLevel>;
+  using T = typename ArrowType::c_type;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->has_nulls |= rhs.has_nulls;
+    this->min = std::min(this->min, rhs.min);
+    this->max = std::max(this->max, rhs.max);
+    return *this;
+  }
+
+  void MergeOne(T value) {
+    this->min = std::min(this->min, value);
+    this->max = std::max(this->max, value);
+  }
+
+  T min = std::numeric_limits<T>::max();
+  T max = std::numeric_limits<T>::min();
+  bool has_nulls = false;
+};
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MinMaxState<ArrowType, SimdLevel, enable_if_floating_point<ArrowType>> {
+  using ThisType = MinMaxState<ArrowType, SimdLevel>;
+  using T = typename ArrowType::c_type;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->has_nulls |= rhs.has_nulls;
+    this->min = std::fmin(this->min, rhs.min);
+    this->max = std::fmax(this->max, rhs.max);
+    return *this;
+  }
+
+  void MergeOne(T value) {
+    this->min = std::fmin(this->min, value);
+    this->max = std::fmax(this->max, value);
+  }
+
+  T min = std::numeric_limits<T>::infinity();
+  T max = -std::numeric_limits<T>::infinity();
+  bool has_nulls = false;
+};
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MinMaxState<ArrowType, SimdLevel, enable_if_decimal<ArrowType>> {
+  using ThisType = MinMaxState<ArrowType, SimdLevel>;
+  using T = typename TypeTraits<ArrowType>::CType;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  MinMaxState() : min(T::GetMaxSentinel()), max(T::GetMinSentinel()) {}
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->has_nulls |= rhs.has_nulls;
+    this->min = std::min(this->min, rhs.min);
+    this->max = std::max(this->max, rhs.max);
+    return *this;
+  }
+
+  void MergeOne(std::string_view value) {
+    MergeOne(T(reinterpret_cast<const uint8_t*>(value.data())));
+  }
+
+  void MergeOne(const T value) {
+    this->min = std::min(this->min, value);
+    this->max = std::max(this->max, value);
+  }
+
+  T min;
+  T max;
+  bool has_nulls = false;
+};
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MinMaxState<ArrowType, SimdLevel,
+                   enable_if_t<is_base_binary_type<ArrowType>::value ||
+                               std::is_same<ArrowType, FixedSizeBinaryType>::value>> {
+  using ThisType = MinMaxState<ArrowType, SimdLevel>;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    if (!this->seen && rhs.seen) {
+      this->min = rhs.min;
+      this->max = rhs.max;
+    } else if (this->seen && rhs.seen) {
+      if (this->min > rhs.min) {
+        this->min = rhs.min;
+      }
+      if (this->max < rhs.max) {
+        this->max = rhs.max;
+      }
+    }
+    this->has_nulls |= rhs.has_nulls;
+    this->seen |= rhs.seen;
+    return *this;
+  }
+
+  void MergeOne(std::string_view value) {
+    if (!seen) {
+      this->min = std::string(value);
+      this->max = std::string(value);
+    } else {
+      if (value < std::string_view(this->min)) {
+        this->min = std::string(value);
+      } else if (value > std::string_view(this->max)) {
+        this->max = std::string(value);
+      }
+    }
+    this->seen = true;
+  }
+
+  std::string min;
+  std::string max;
+  bool has_nulls = false;
+  bool seen = false;
+};
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MinMaxImpl : public ScalarAggregator {
+  using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
+  using ThisType = MinMaxImpl<ArrowType, SimdLevel>;
+  using StateType = MinMaxState<ArrowType, SimdLevel>;
+
+  MinMaxImpl(std::shared_ptr<DataType> out_type, ScalarAggregateOptions options)
+      : out_type(std::move(out_type)), options(std::move(options)), count(0) {
+    this->options.min_count = std::max<uint32_t>(1, this->options.min_count);
+  }
+
+  Status Consume(KernelContext*, const ExecSpan& batch) override {
+    if (batch[0].is_array()) {
+      return ConsumeArray(batch[0].array);
+    }
+    return ConsumeScalar(*batch[0].scalar);
+  }
+
+  Status ConsumeScalar(const Scalar& scalar) {
+    StateType local;
+    local.has_nulls = !scalar.is_valid;
+    this->count += scalar.is_valid;
+
+    if (!local.has_nulls || options.skip_nulls) {
+      local.MergeOne(internal::UnboxScalar<ArrowType>::Unbox(scalar));
+    }
+
+    this->state += local;
+    return Status::OK();
+  }
+
+  Status ConsumeArray(const ArraySpan& arr_span) {
+    StateType local;
+
+    ArrayType arr(arr_span.ToArrayData());
+
+    const auto null_count = arr.null_count();
+    local.has_nulls = null_count > 0;
+    this->count += arr.length() - null_count;
+
+    if (!local.has_nulls) {
+      for (int64_t i = 0; i < arr.length(); i++) {
+        local.MergeOne(arr.GetView(i));
+      }
+    } else if (local.has_nulls && options.skip_nulls) {
+      local += ConsumeWithNulls(arr);
+    }
+
+    this->state += local;
+    return Status::OK();
+  }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
+    const auto& other = checked_cast<const ThisType&>(src);
+    this->state += other.state;
+    this->count += other.count;
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override {
+    const auto& struct_type = checked_cast<const StructType&>(*out_type);
+    const auto& child_type = struct_type.field(0)->type();
+
+    std::vector<std::shared_ptr<Scalar>> values;
+    // Physical type != result type
+    if ((state.has_nulls && !options.skip_nulls) || (this->count < options.min_count)) {
+      // (null, null)
+      auto null_scalar = MakeNullScalar(child_type);
+      values = {null_scalar, null_scalar};
+    } else {
+      ARROW_ASSIGN_OR_RAISE(auto min_scalar,
+                            MakeScalar(child_type, std::move(state.min)));
+      ARROW_ASSIGN_OR_RAISE(auto max_scalar,
+                            MakeScalar(child_type, std::move(state.max)));
+      values = {std::move(min_scalar), std::move(max_scalar)};
+    }
+    out->value = std::make_shared<StructScalar>(std::move(values), this->out_type);
+    return Status::OK();
+  }
+
+  std::shared_ptr<DataType> out_type;
+  ScalarAggregateOptions options;
+  int64_t count;
+  MinMaxState<ArrowType, SimdLevel> state;
+
+ private:
+  StateType ConsumeWithNulls(const ArrayType& arr) const {
+    StateType local;
+    const int64_t length = arr.length();
+    int64_t offset = arr.offset();
+    const uint8_t* bitmap = arr.null_bitmap_data();
+    int64_t idx = 0;
+
+    const auto p = arrow::internal::BitmapWordAlign<1>(bitmap, offset, length);
+    // First handle the leading bits
+    const int64_t leading_bits = p.leading_bits;
+    while (idx < leading_bits) {
+      if (bit_util::GetBit(bitmap, offset)) {
+        local.MergeOne(arr.GetView(idx));
+      }
+      idx++;
+      offset++;
+    }
+
+    // The aligned parts scanned with BitBlockCounter
+    arrow::internal::BitBlockCounter data_counter(bitmap, offset, length - leading_bits);
+    auto current_block = data_counter.NextWord();
+    while (idx < length) {
+      if (current_block.AllSet()) {  // All true values
+        int run_length = 0;
+        // Scan forward until a block that has some false values (or the end)
+        while (current_block.length > 0 && current_block.AllSet()) {
+          run_length += current_block.length;
+          current_block = data_counter.NextWord();
+        }
+        for (int64_t i = 0; i < run_length; i++) {
+          local.MergeOne(arr.GetView(idx + i));
+        }
+        idx += run_length;
+        offset += run_length;
+        // The current_block already computed, advance to next loop
+        continue;
+      } else if (!current_block.NoneSet()) {  // Some values are null
+        BitmapReader reader(arr.null_bitmap_data(), offset, current_block.length);
+        for (int64_t i = 0; i < current_block.length; i++) {
+          if (reader.IsSet()) {
+            local.MergeOne(arr.GetView(idx + i));
+          }
+          reader.Next();
+        }
+
+        idx += current_block.length;
+        offset += current_block.length;
+      } else {  // All null values
+        idx += current_block.length;
+        offset += current_block.length;
+      }
+      current_block = data_counter.NextWord();
+    }
+
+    return local;
+  }
+};
+
+template <SimdLevel::type SimdLevel>
+struct BooleanMinMaxImpl : public MinMaxImpl<BooleanType, SimdLevel> {
+  using StateType = MinMaxState<BooleanType, SimdLevel>;
+  using ArrayType = typename TypeTraits<BooleanType>::ArrayType;
+  using MinMaxImpl<BooleanType, SimdLevel>::MinMaxImpl;
+  using MinMaxImpl<BooleanType, SimdLevel>::options;
+
+  Status Consume(KernelContext*, const ExecSpan& batch) override {
+    if (ARROW_PREDICT_FALSE(batch[0].is_scalar())) {
+      return ConsumeScalar(checked_cast<const BooleanScalar&>(*batch[0].scalar));
+    }
+    StateType local;
+    ArrayType arr(batch[0].array.ToArrayData());
+
+    const auto arr_length = arr.length();
+    const auto null_count = arr.null_count();
+    const auto valid_count = arr_length - null_count;
+
+    local.has_nulls = null_count > 0;
+    this->count += valid_count;
+    if (!local.has_nulls || options.skip_nulls) {
+      const auto true_count = arr.true_count();
+      const auto false_count = valid_count - true_count;
+      local.max = true_count > 0;
+      local.min = false_count == 0;
+    }
+
+    this->state += local;
+    return Status::OK();
+  }
+
+  Status ConsumeScalar(const BooleanScalar& scalar) {
+    StateType local;
+
+    local.has_nulls = !scalar.is_valid;
+    this->count += scalar.is_valid;
+    if (!local.has_nulls || options.skip_nulls) {
+      const int true_count = scalar.is_valid && scalar.value;
+      const int false_count = scalar.is_valid && !scalar.value;
+      local.max = true_count > 0;
+      local.min = false_count == 0;
+    }
+
+    this->state += local;
+    return Status::OK();
+  }
+};
+
+struct NullMinMaxImpl : public ScalarAggregator {
+  Status Consume(KernelContext*, const ExecSpan& batch) override { return Status::OK(); }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override { return Status::OK(); }
+
+  Status Finalize(KernelContext*, Datum* out) override {
+    std::vector<std::shared_ptr<Scalar>> values{std::make_shared<NullScalar>(),
+                                                std::make_shared<NullScalar>()};
+    out->value = std::make_shared<StructScalar>(
+        std::move(values), struct_({field("min", null()), field("max", null())}));
+    return Status::OK();
+  }
+};
+
+template <SimdLevel::type SimdLevel>
+struct MinMaxInitState {
+  std::unique_ptr<KernelState> state;
+  KernelContext* ctx;
+  const DataType& in_type;
+  std::shared_ptr<DataType> out_type;
+  const ScalarAggregateOptions& options;
+
+  MinMaxInitState(KernelContext* ctx, const DataType& in_type,
+                  const std::shared_ptr<DataType>& out_type,
+                  const ScalarAggregateOptions& options)
+      : ctx(ctx), in_type(in_type), out_type(out_type), options(options) {}
+
+  Status Visit(const DataType& ty) {
+    return Status::NotImplemented("No min/max implemented for ", ty);
+  }
+
+  Status Visit(const HalfFloatType& ty) {
+    return Status::NotImplemented("No min/max implemented for ", ty);
+  }
+
+  Status Visit(const NullType&) {
+    state.reset(new NullMinMaxImpl());
+    return Status::OK();
+  }
+
+  Status Visit(const BooleanType&) {
+    state.reset(new BooleanMinMaxImpl<SimdLevel>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_physical_integer<Type, Status> Visit(const Type&) {
+    using PhysicalType = typename Type::PhysicalType;
+    state.reset(new MinMaxImpl<PhysicalType, SimdLevel>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_floating_point<Type, Status> Visit(const Type&) {
+    state.reset(new MinMaxImpl<Type, SimdLevel>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_base_binary<Type, Status> Visit(const Type&) {
+    state.reset(new MinMaxImpl<Type, SimdLevel>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_fixed_size_binary<Type, Status> Visit(const Type&) {
+    state.reset(new MinMaxImpl<Type, SimdLevel>(out_type, options));
+    return Status::OK();
+  }
+
+  Result<std::unique_ptr<KernelState>> Create() {
+    ARROW_RETURN_NOT_OK(VisitTypeInline(in_type, this));
+    return std::move(state);
+  }
+};
+
+}  // namespace
+}  // namespace arrow::compute::internal
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc b/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc
index 03b45107eeca1..a1a6a95c5e11c 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc
@@ -17,6 +17,10 @@
 
 #include "arrow/compute/kernels/aggregate_basic_internal.h"
 
+// Include templated definitions for aggregate kernels that must compiled here
+// with the SIMD level configured for this compilation unit in the build.
+#include "arrow/compute/kernels/aggregate_basic.inc.cc"  // NOLINT(build/include)
+
 namespace arrow {
 namespace compute {
 namespace internal {
@@ -24,16 +28,13 @@ namespace internal {
 // ----------------------------------------------------------------------
 // Sum implementation
 
+namespace {
+
 template <typename ArrowType>
 struct SumImplAvx2 : public SumImpl<ArrowType, SimdLevel::AVX2> {
   using SumImpl<ArrowType, SimdLevel::AVX2>::SumImpl;
 };
 
-template <typename ArrowType>
-struct MeanImplAvx2 : public MeanImpl<ArrowType, SimdLevel::AVX2> {
-  using MeanImpl<ArrowType, SimdLevel::AVX2>::MeanImpl;
-};
-
 Result<std::unique_ptr<KernelState>> SumInitAvx2(KernelContext* ctx,
                                                  const KernelInitArgs& args) {
   SumLikeInit<SumImplAvx2> visitor(
@@ -42,6 +43,24 @@ Result<std::unique_ptr<KernelState>> SumInitAvx2(KernelContext* ctx,
   return visitor.Create();
 }
 
+}  // namespace
+
+void AddSumAvx2AggKernels(ScalarAggregateFunction* func) {
+  AddBasicAggKernels(SumInitAvx2, SignedIntTypes(), int64(), func, SimdLevel::AVX2);
+  AddBasicAggKernels(SumInitAvx2, UnsignedIntTypes(), uint64(), func, SimdLevel::AVX2);
+  AddBasicAggKernels(SumInitAvx2, FloatingPointTypes(), float64(), func, SimdLevel::AVX2);
+}
+
+// ----------------------------------------------------------------------
+// Mean implementation
+
+namespace {
+
+template <typename ArrowType>
+struct MeanImplAvx2 : public MeanImpl<ArrowType, SimdLevel::AVX2> {
+  using MeanImpl<ArrowType, SimdLevel::AVX2>::MeanImpl;
+};
+
 Result<std::unique_ptr<KernelState>> MeanInitAvx2(KernelContext* ctx,
                                                   const KernelInitArgs& args) {
   SumLikeInit<MeanImplAvx2> visitor(
@@ -50,9 +69,17 @@ Result<std::unique_ptr<KernelState>> MeanInitAvx2(KernelContext* ctx,
   return visitor.Create();
 }
 
+}  // namespace
+
+void AddMeanAvx2AggKernels(ScalarAggregateFunction* func) {
+  AddBasicAggKernels(MeanInitAvx2, NumericTypes(), float64(), func, SimdLevel::AVX2);
+}
+
 // ----------------------------------------------------------------------
 // MinMax implementation
 
+namespace {
+
 Result<std::unique_ptr<KernelState>> MinMaxInitAvx2(KernelContext* ctx,
                                                     const KernelInitArgs& args) {
   ARROW_ASSIGN_OR_RAISE(TypeHolder out_type,
@@ -63,15 +90,7 @@ Result<std::unique_ptr<KernelState>> MinMaxInitAvx2(KernelContext* ctx,
   return visitor.Create();
 }
 
-void AddSumAvx2AggKernels(ScalarAggregateFunction* func) {
-  AddBasicAggKernels(SumInitAvx2, SignedIntTypes(), int64(), func, SimdLevel::AVX2);
-  AddBasicAggKernels(SumInitAvx2, UnsignedIntTypes(), uint64(), func, SimdLevel::AVX2);
-  AddBasicAggKernels(SumInitAvx2, FloatingPointTypes(), float64(), func, SimdLevel::AVX2);
-}
-
-void AddMeanAvx2AggKernels(ScalarAggregateFunction* func) {
-  AddBasicAggKernels(MeanInitAvx2, NumericTypes(), float64(), func, SimdLevel::AVX2);
-}
+}  // namespace
 
 void AddMinMaxAvx2AggKernels(ScalarAggregateFunction* func) {
   // Enable int types for AVX2 variants.
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc b/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc
index 05356e0aa5e75..9dc490937a691 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc
@@ -17,6 +17,10 @@
 
 #include "arrow/compute/kernels/aggregate_basic_internal.h"
 
+// Include templated definitions for aggregate kernels that must compiled here
+// with the SIMD level configured for this compilation unit in the build.
+#include "arrow/compute/kernels/aggregate_basic.inc.cc"  // NOLINT(build/include)
+
 namespace arrow {
 namespace compute {
 namespace internal {
@@ -24,16 +28,13 @@ namespace internal {
 // ----------------------------------------------------------------------
 // Sum implementation
 
+namespace {
+
 template <typename ArrowType>
 struct SumImplAvx512 : public SumImpl<ArrowType, SimdLevel::AVX512> {
   using SumImpl<ArrowType, SimdLevel::AVX512>::SumImpl;
 };
 
-template <typename ArrowType>
-struct MeanImplAvx512 : public MeanImpl<ArrowType, SimdLevel::AVX512> {
-  using MeanImpl<ArrowType, SimdLevel::AVX512>::MeanImpl;
-};
-
 Result<std::unique_ptr<KernelState>> SumInitAvx512(KernelContext* ctx,
                                                    const KernelInitArgs& args) {
   SumLikeInit<SumImplAvx512> visitor(
@@ -42,6 +43,26 @@ Result<std::unique_ptr<KernelState>> SumInitAvx512(KernelContext* ctx,
   return visitor.Create();
 }
 
+}  // namespace
+
+void AddSumAvx512AggKernels(ScalarAggregateFunction* func) {
+  AddBasicAggKernels(SumInitAvx512, SignedIntTypes(), int64(), func, SimdLevel::AVX512);
+  AddBasicAggKernels(SumInitAvx512, UnsignedIntTypes(), uint64(), func,
+                     SimdLevel::AVX512);
+  AddBasicAggKernels(SumInitAvx512, FloatingPointTypes(), float64(), func,
+                     SimdLevel::AVX512);
+}
+
+// ----------------------------------------------------------------------
+// Mean implementation
+
+namespace {
+
+template <typename ArrowType>
+struct MeanImplAvx512 : public MeanImpl<ArrowType, SimdLevel::AVX512> {
+  using MeanImpl<ArrowType, SimdLevel::AVX512>::MeanImpl;
+};
+
 Result<std::unique_ptr<KernelState>> MeanInitAvx512(KernelContext* ctx,
                                                     const KernelInitArgs& args) {
   SumLikeInit<MeanImplAvx512> visitor(
@@ -50,9 +71,17 @@ Result<std::unique_ptr<KernelState>> MeanInitAvx512(KernelContext* ctx,
   return visitor.Create();
 }
 
+}  // namespace
+
+void AddMeanAvx512AggKernels(ScalarAggregateFunction* func) {
+  AddBasicAggKernels(MeanInitAvx512, NumericTypes(), float64(), func, SimdLevel::AVX512);
+}
+
 // ----------------------------------------------------------------------
 // MinMax implementation
 
+namespace {
+
 Result<std::unique_ptr<KernelState>> MinMaxInitAvx512(KernelContext* ctx,
                                                       const KernelInitArgs& args) {
   ARROW_ASSIGN_OR_RAISE(TypeHolder out_type,
@@ -63,17 +92,7 @@ Result<std::unique_ptr<KernelState>> MinMaxInitAvx512(KernelContext* ctx,
   return visitor.Create();
 }
 
-void AddSumAvx512AggKernels(ScalarAggregateFunction* func) {
-  AddBasicAggKernels(SumInitAvx512, SignedIntTypes(), int64(), func, SimdLevel::AVX512);
-  AddBasicAggKernels(SumInitAvx512, UnsignedIntTypes(), uint64(), func,
-                     SimdLevel::AVX512);
-  AddBasicAggKernels(SumInitAvx512, FloatingPointTypes(), float64(), func,
-                     SimdLevel::AVX512);
-}
-
-void AddMeanAvx512AggKernels(ScalarAggregateFunction* func) {
-  AddBasicAggKernels(MeanInitAvx512, NumericTypes(), float64(), func, SimdLevel::AVX512);
-}
+}  // namespace
 
 void AddMinMaxAvx512AggKernels(ScalarAggregateFunction* func) {
   // Enable 32/64 int types for avx512 variants, no advantage on 8/16 int.
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
index f08e7aaa538bb..5cc3a558b1efb 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
@@ -17,23 +17,18 @@
 
 #pragma once
 
-#include <cmath>
-#include <type_traits>
-#include <utility>
+#include <memory>
+#include <vector>
 
-#include "arrow/compute/api_aggregate.h"
-#include "arrow/compute/kernels/aggregate_internal.h"
+#include "arrow/compute/kernel.h"
 #include "arrow/compute/kernels/codegen_internal.h"
-#include "arrow/compute/kernels/common_internal.h"
-#include "arrow/compute/kernels/util_internal.h"
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/align_util.h"
-#include "arrow/util/bit_block_counter.h"
-#include "arrow/util/decimal.h"
+#include "arrow/compute/type_fwd.h"
+#include "arrow/type_fwd.h"
 
 namespace arrow::compute::internal {
 
+// aggregate_basic.cc
+
 void AddBasicAggKernels(KernelInit init,
                         const std::vector<std::shared_ptr<DataType>>& types,
                         std::shared_ptr<DataType> out_ty, ScalarAggregateFunction* func,
@@ -47,990 +42,16 @@ void AddMinMaxKernel(KernelInit init, internal::detail::GetTypeId get_id,
                      ScalarAggregateFunction* func,
                      SimdLevel::type simd_level = SimdLevel::NONE);
 
-// SIMD variants for kernels
+// aggregate_basic_avx2.cc
+
 void AddSumAvx2AggKernels(ScalarAggregateFunction* func);
 void AddMeanAvx2AggKernels(ScalarAggregateFunction* func);
 void AddMinMaxAvx2AggKernels(ScalarAggregateFunction* func);
 
+// aggregate_basic_avx512.cc
+
 void AddSumAvx512AggKernels(ScalarAggregateFunction* func);
 void AddMeanAvx512AggKernels(ScalarAggregateFunction* func);
 void AddMinMaxAvx512AggKernels(ScalarAggregateFunction* func);
 
-// ----------------------------------------------------------------------
-// Sum implementation
-
-template <typename ArrowType, SimdLevel::type SimdLevel,
-          typename ResultType = typename FindAccumulatorType<ArrowType>::Type>
-struct SumImpl : public ScalarAggregator {
-  using ThisType = SumImpl<ArrowType, SimdLevel, ResultType>;
-  using CType = typename TypeTraits<ArrowType>::CType;
-  using SumType = ResultType;
-  using SumCType = typename TypeTraits<SumType>::CType;
-  using OutputType = typename TypeTraits<SumType>::ScalarType;
-
-  SumImpl(std::shared_ptr<DataType> out_type, ScalarAggregateOptions options_)
-      : out_type(std::move(out_type)), options(std::move(options_)) {}
-
-  Status Consume(KernelContext*, const ExecSpan& batch) override {
-    if (batch[0].is_array()) {
-      const ArraySpan& data = batch[0].array;
-      this->count += data.length - data.GetNullCount();
-      this->nulls_observed = this->nulls_observed || data.GetNullCount();
-
-      if (!options.skip_nulls && this->nulls_observed) {
-        // Short-circuit
-        return Status::OK();
-      }
-
-      if (is_boolean_type<ArrowType>::value) {
-        this->sum += GetTrueCount(data);
-      } else {
-        this->sum += SumArray<CType, SumCType, SimdLevel>(data);
-      }
-    } else {
-      const Scalar& data = *batch[0].scalar;
-      this->count += data.is_valid * batch.length;
-      this->nulls_observed = this->nulls_observed || !data.is_valid;
-      if (data.is_valid) {
-        this->sum += internal::UnboxScalar<ArrowType>::Unbox(data) * batch.length;
-      }
-    }
-    return Status::OK();
-  }
-
-  Status MergeFrom(KernelContext*, KernelState&& src) override {
-    const auto& other = checked_cast<const ThisType&>(src);
-    this->count += other.count;
-    this->sum += other.sum;
-    this->nulls_observed = this->nulls_observed || other.nulls_observed;
-    return Status::OK();
-  }
-
-  Status Finalize(KernelContext*, Datum* out) override {
-    if ((!options.skip_nulls && this->nulls_observed) ||
-        (this->count < options.min_count)) {
-      out->value = std::make_shared<OutputType>(out_type);
-    } else {
-      out->value = std::make_shared<OutputType>(this->sum, out_type);
-    }
-    return Status::OK();
-  }
-
-  size_t count = 0;
-  bool nulls_observed = false;
-  SumCType sum = 0;
-  std::shared_ptr<DataType> out_type;
-  ScalarAggregateOptions options;
-};
-
-template <typename ArrowType>
-struct NullImpl : public ScalarAggregator {
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  explicit NullImpl(const ScalarAggregateOptions& options_) : options(options_) {}
-
-  Status Consume(KernelContext*, const ExecSpan& batch) override {
-    if (batch[0].is_scalar() || batch[0].array.GetNullCount() > 0) {
-      // If the batch is a scalar or an array with elements, set is_empty to false
-      is_empty = false;
-    }
-    return Status::OK();
-  }
-
-  Status MergeFrom(KernelContext*, KernelState&& src) override {
-    const auto& other = checked_cast<const NullImpl&>(src);
-    this->is_empty &= other.is_empty;
-    return Status::OK();
-  }
-
-  Status Finalize(KernelContext*, Datum* out) override {
-    if ((options.skip_nulls || this->is_empty) && options.min_count == 0) {
-      // Return 0 if the remaining data is empty
-      out->value = output_empty();
-    } else {
-      out->value = MakeNullScalar(TypeTraits<ArrowType>::type_singleton());
-    }
-    return Status::OK();
-  }
-
-  virtual std::shared_ptr<Scalar> output_empty() = 0;
-
-  bool is_empty = true;
-  ScalarAggregateOptions options;
-};
-
-template <typename ArrowType>
-struct NullSumImpl : public NullImpl<ArrowType> {
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  explicit NullSumImpl(const ScalarAggregateOptions& options_)
-      : NullImpl<ArrowType>(options_) {}
-
-  std::shared_ptr<Scalar> output_empty() override {
-    return std::make_shared<ScalarType>(0);
-  }
-};
-
-template <typename ArrowType, SimdLevel::type SimdLevel, typename Enable = void>
-struct MeanImpl;
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MeanImpl<ArrowType, SimdLevel, enable_if_decimal<ArrowType>>
-    : public SumImpl<ArrowType, SimdLevel> {
-  using SumImpl<ArrowType, SimdLevel>::SumImpl;
-  using SumImpl<ArrowType, SimdLevel>::options;
-  using SumCType = typename SumImpl<ArrowType, SimdLevel>::SumCType;
-  using OutputType = typename SumImpl<ArrowType, SimdLevel>::OutputType;
-
-  template <typename T = ArrowType>
-  Status FinalizeImpl(Datum* out) {
-    if ((!options.skip_nulls && this->nulls_observed) ||
-        (this->count < options.min_count) || (this->count == 0)) {
-      out->value = std::make_shared<OutputType>(this->out_type);
-    } else {
-      SumCType quotient, remainder;
-      ARROW_ASSIGN_OR_RAISE(std::tie(quotient, remainder), this->sum.Divide(this->count));
-      // Round the decimal result based on the remainder
-      remainder.Abs();
-      if (remainder * 2 >= this->count) {
-        if (this->sum >= 0) {
-          quotient += 1;
-        } else {
-          quotient -= 1;
-        }
-      }
-      out->value = std::make_shared<OutputType>(quotient, this->out_type);
-    }
-    return Status::OK();
-  }
-
-  Status Finalize(KernelContext*, Datum* out) override { return FinalizeImpl(out); }
-};
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MeanImpl<ArrowType, SimdLevel,
-                std::enable_if_t<!is_decimal_type<ArrowType>::value>>
-    // Override the ResultType of SumImpl because we need to use double for intermediate
-    // sum to prevent integer overflows
-    : public SumImpl<ArrowType, SimdLevel, DoubleType> {
-  using SumImpl<ArrowType, SimdLevel, DoubleType>::SumImpl;
-  using SumImpl<ArrowType, SimdLevel, DoubleType>::options;
-
-  template <typename T = ArrowType>
-  Status FinalizeImpl(Datum* out) {
-    if ((!options.skip_nulls && this->nulls_observed) ||
-        (this->count < options.min_count)) {
-      out->value = std::make_shared<DoubleScalar>();
-    } else {
-      static_assert(std::is_same_v<decltype(this->sum), double>,
-                    "SumCType must be double for numeric inputs");
-      const double mean = this->sum / this->count;
-      out->value = std::make_shared<DoubleScalar>(mean);
-    }
-    return Status::OK();
-  }
-
-  Status Finalize(KernelContext*, Datum* out) override { return FinalizeImpl(out); }
-};
-
-template <template <typename> class KernelClass>
-struct SumLikeInit {
-  std::unique_ptr<KernelState> state;
-  KernelContext* ctx;
-  std::shared_ptr<DataType> type;
-  const ScalarAggregateOptions& options;
-
-  SumLikeInit(KernelContext* ctx, std::shared_ptr<DataType> type,
-              const ScalarAggregateOptions& options)
-      : ctx(ctx), type(type), options(options) {}
-
-  Status Visit(const DataType&) { return Status::NotImplemented("No sum implemented"); }
-
-  Status Visit(const HalfFloatType&) {
-    return Status::NotImplemented("No sum implemented");
-  }
-
-  Status Visit(const BooleanType&) {
-    auto ty = TypeTraits<typename KernelClass<BooleanType>::SumType>::type_singleton();
-    state.reset(new KernelClass<BooleanType>(ty, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_number<Type, Status> Visit(const Type&) {
-    auto ty = TypeTraits<typename KernelClass<Type>::SumType>::type_singleton();
-    state.reset(new KernelClass<Type>(ty, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_decimal<Type, Status> Visit(const Type&) {
-    state.reset(new KernelClass<Type>(type, options));
-    return Status::OK();
-  }
-
-  virtual Status Visit(const NullType&) {
-    state.reset(new NullSumImpl<Int64Type>(options));
-    return Status::OK();
-  }
-
-  Result<std::unique_ptr<KernelState>> Create() {
-    RETURN_NOT_OK(VisitTypeInline(*type, this));
-    return std::move(state);
-  }
-};
-
-template <template <typename> class KernelClass>
-struct MeanKernelInit : public SumLikeInit<KernelClass> {
-  MeanKernelInit(KernelContext* ctx, std::shared_ptr<DataType> type,
-                 const ScalarAggregateOptions& options)
-      : SumLikeInit<KernelClass>(ctx, type, options) {}
-
-  Status Visit(const NullType&) override {
-    this->state.reset(new NullSumImpl<DoubleType>(this->options));
-    return Status::OK();
-  }
-};
-
-// ----------------------------------------------------------------------
-// FirstLast implementation
-template <typename ArrowType, typename Enable = void>
-struct FirstLastState {};
-
-template <typename ArrowType>
-struct FirstLastState<ArrowType, enable_if_boolean<ArrowType>> {
-  using ThisType = FirstLastState<ArrowType>;
-  using T = typename ArrowType::c_type;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->first = this->has_values ? this->first : rhs.first;
-    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
-    this->last = rhs.has_values ? rhs.last : this->last;
-    this->last_is_null = rhs.last_is_null;
-    this->has_values |= rhs.has_values;
-    this->has_any_values |= rhs.has_any_values;
-    return *this;
-  }
-
-  void MergeOne(T value) {
-    if (!has_values) {
-      this->first = value;
-      has_values = true;
-    }
-    this->last = value;
-  }
-
-  T first = false;
-  T last = false;
-  bool has_values = false;
-  bool first_is_null = false;
-  bool last_is_null = false;
-  bool has_any_values = false;
-};
-
-template <typename ArrowType>
-struct FirstLastState<ArrowType, enable_if_physical_integer<ArrowType>> {
-  using ThisType = FirstLastState<ArrowType>;
-  using T = typename ArrowType::c_type;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->first = this->has_values ? this->first : rhs.first;
-    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
-    this->last = rhs.has_values ? rhs.last : this->last;
-    this->last_is_null = rhs.last_is_null;
-    this->has_values |= rhs.has_values;
-    this->has_any_values |= rhs.has_any_values;
-    return *this;
-  }
-
-  void MergeOne(T value) {
-    if (!has_values) {
-      this->first = value;
-      has_values = true;
-    }
-    this->last = value;
-  }
-
-  T first = std::numeric_limits<T>::infinity();
-  T last = std::numeric_limits<T>::infinity();
-  bool has_values = false;
-
-  // These are updated in ConsumeScalar and ConsumeArray since null values don't
-  // invoke MergeOne
-  bool first_is_null = false;
-  bool last_is_null = false;
-  // has_any_values indicates whether there is any value (either null or non-null)
-  // (1) has_any_values = false: There is no value aggregated
-  // (2) has_any_values = true, has_values = false: There are only null values aggregated
-  // (3) has_any_values = true, has_values = true: There are both null and non-null values
-  // aggregated
-  bool has_any_values = false;
-};
-
-template <typename ArrowType>
-struct FirstLastState<ArrowType, enable_if_floating_point<ArrowType>> {
-  using ThisType = FirstLastState<ArrowType>;
-  using T = typename ArrowType::c_type;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->first = this->has_values ? this->first : rhs.first;
-    this->last = rhs.has_values ? rhs.last : this->last;
-    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
-    this->last_is_null = rhs.last_is_null;
-    this->has_values |= rhs.has_values;
-    this->has_any_values |= rhs.has_any_values;
-    return *this;
-  }
-
-  void MergeOne(T value) {
-    if (!has_values) {
-      this->first = value;
-      has_values = true;
-    }
-    last = value;
-  }
-
-  T first = std::numeric_limits<T>::infinity();
-  T last = std::numeric_limits<T>::infinity();
-  bool has_values = false;
-  bool first_is_null = false;
-  bool last_is_null = false;
-  bool has_any_values = false;
-};
-
-template <typename ArrowType>
-struct FirstLastState<ArrowType,
-                      enable_if_t<is_base_binary_type<ArrowType>::value ||
-                                  std::is_same<ArrowType, FixedSizeBinaryType>::value>> {
-  using ThisType = FirstLastState<ArrowType>;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->first = this->has_values ? this->first : rhs.first;
-    this->last = rhs.has_values ? rhs.last : this->last;
-    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
-    this->last_is_null = rhs.last_is_null;
-    this->has_values |= rhs.has_values;
-    this->has_any_values |= rhs.has_any_values;
-    return *this;
-  }
-
-  void MergeOne(std::string_view value) {
-    if (!has_values) {
-      first = std::string(value);
-      has_values = true;
-    }
-    last = std::string(value);
-  }
-
-  std::string first = "";
-  std::string last = "";
-  bool has_values = false;
-  bool first_is_null = false;
-  bool last_is_null = false;
-  bool has_any_values = false;
-};
-
-template <typename ArrowType>
-struct FirstLastImpl : public ScalarAggregator {
-  using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
-  using ThisType = FirstLastImpl<ArrowType>;
-  using StateType = FirstLastState<ArrowType>;
-
-  FirstLastImpl(std::shared_ptr<DataType> out_type, ScalarAggregateOptions options)
-      : out_type(std::move(out_type)), options(std::move(options)), count(0) {
-    this->options.min_count = std::max<uint32_t>(1, this->options.min_count);
-  }
-
-  Status Consume(KernelContext*, const ExecSpan& batch) override {
-    if (batch[0].is_array()) {
-      return ConsumeArray(batch[0].array);
-    }
-    return ConsumeScalar(*batch[0].scalar);
-  }
-
-  Status ConsumeScalar(const Scalar& scalar) {
-    this->state.has_any_values = true;
-    if (scalar.is_valid) {
-      this->state.MergeOne(internal::UnboxScalar<ArrowType>::Unbox(scalar));
-    } else {
-      if (!this->state.has_values) {
-        this->state.first_is_null = true;
-      }
-    }
-    this->count += scalar.is_valid;
-    return Status::OK();
-  }
-
-  Status ConsumeArray(const ArraySpan& arr_span) {
-    this->state.has_any_values = true;
-    ArrayType arr(arr_span.ToArrayData());
-    const auto null_count = arr.null_count();
-    this->count += arr.length() - null_count;
-
-    if (null_count == 0) {
-      // If there are no null values, we can just merge
-      // the first and last element
-      this->state.MergeOne(arr.GetView(0));
-      this->state.MergeOne(arr.GetView(arr.length() - 1));
-    } else {
-      int64_t first_i = -1;
-      int64_t last_i = -1;
-
-      if (!this->state.has_values && arr.IsNull(0)) {
-        this->state.first_is_null = true;
-      }
-
-      if (arr.IsNull(arr.length() - 1)) {
-        this->state.last_is_null = true;
-      }
-
-      // Find the first and last non-null value and update state
-      for (int64_t i = 0; i < arr.length(); i++) {
-        if (!arr.IsNull(i)) {
-          first_i = i;
-          break;
-        }
-      }
-      if (first_i >= 0) {
-        for (int64_t i = arr.length() - 1; i >= 0; i--) {
-          if (!arr.IsNull(i)) {
-            last_i = i;
-            break;
-          }
-        }
-        DCHECK_GE(last_i, first_i);
-        this->state.MergeOne(arr.GetView(first_i));
-        this->state.MergeOne(arr.GetView(last_i));
-      }
-    }
-
-    return Status::OK();
-  }
-
-  Status MergeFrom(KernelContext*, KernelState&& src) override {
-    const auto& other = checked_cast<const ThisType&>(src);
-    this->state += other.state;
-    this->count += other.count;
-    return Status::OK();
-  }
-
-  Status Finalize(KernelContext*, Datum* out) override {
-    const auto& struct_type = checked_cast<const StructType&>(*out_type);
-    const auto& child_type = struct_type.field(0)->type();
-    auto null_scalar = MakeNullScalar(child_type);
-
-    std::vector<std::shared_ptr<Scalar>> values;
-
-    if (this->count < options.min_count) {
-      values = {null_scalar, null_scalar};
-    } else {
-      if (state.has_values) {
-        if (options.skip_nulls) {
-          ARROW_ASSIGN_OR_RAISE(auto first_scalar, MakeScalar(child_type, state.first));
-          ARROW_ASSIGN_OR_RAISE(auto last_scalar, MakeScalar(child_type, state.last));
-          values = {first_scalar, last_scalar};
-        } else {
-          ARROW_ASSIGN_OR_RAISE(
-              auto first_scalar,
-              state.first_is_null ? null_scalar : MakeScalar(child_type, state.first));
-          ARROW_ASSIGN_OR_RAISE(
-              auto last_scalar,
-              state.last_is_null ? null_scalar : MakeScalar(child_type, state.last));
-
-          values = {first_scalar, last_scalar};
-        }
-      } else {
-        // If there is no non-null values, we always output null regardless of
-        // skip_null
-        values = {null_scalar, null_scalar};
-      }
-    }
-
-    out->value = std::make_shared<StructScalar>(std::move(values), this->out_type);
-    return Status::OK();
-  }
-
-  std::shared_ptr<DataType> out_type;
-  ScalarAggregateOptions options;
-  int64_t count;
-  FirstLastState<ArrowType> state;
-};
-
-// ----------------------------------------------------------------------
-// MinMax implementation
-template <typename ArrowType, SimdLevel::type SimdLevel, typename Enable = void>
-struct MinMaxState {};
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MinMaxState<ArrowType, SimdLevel, enable_if_boolean<ArrowType>> {
-  using ThisType = MinMaxState<ArrowType, SimdLevel>;
-  using T = typename ArrowType::c_type;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->has_nulls |= rhs.has_nulls;
-    this->min = this->min && rhs.min;
-    this->max = this->max || rhs.max;
-    return *this;
-  }
-
-  void MergeOne(T value) {
-    this->min = this->min && value;
-    this->max = this->max || value;
-  }
-
-  T min = true;
-  T max = false;
-  bool has_nulls = false;
-};
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MinMaxState<ArrowType, SimdLevel, enable_if_integer<ArrowType>> {
-  using ThisType = MinMaxState<ArrowType, SimdLevel>;
-  using T = typename ArrowType::c_type;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->has_nulls |= rhs.has_nulls;
-    this->min = std::min(this->min, rhs.min);
-    this->max = std::max(this->max, rhs.max);
-    return *this;
-  }
-
-  void MergeOne(T value) {
-    this->min = std::min(this->min, value);
-    this->max = std::max(this->max, value);
-  }
-
-  T min = std::numeric_limits<T>::max();
-  T max = std::numeric_limits<T>::min();
-  bool has_nulls = false;
-};
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MinMaxState<ArrowType, SimdLevel, enable_if_floating_point<ArrowType>> {
-  using ThisType = MinMaxState<ArrowType, SimdLevel>;
-  using T = typename ArrowType::c_type;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->has_nulls |= rhs.has_nulls;
-    this->min = std::fmin(this->min, rhs.min);
-    this->max = std::fmax(this->max, rhs.max);
-    return *this;
-  }
-
-  void MergeOne(T value) {
-    this->min = std::fmin(this->min, value);
-    this->max = std::fmax(this->max, value);
-  }
-
-  T min = std::numeric_limits<T>::infinity();
-  T max = -std::numeric_limits<T>::infinity();
-  bool has_nulls = false;
-};
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MinMaxState<ArrowType, SimdLevel, enable_if_decimal<ArrowType>> {
-  using ThisType = MinMaxState<ArrowType, SimdLevel>;
-  using T = typename TypeTraits<ArrowType>::CType;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  MinMaxState() : min(T::GetMaxSentinel()), max(T::GetMinSentinel()) {}
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->has_nulls |= rhs.has_nulls;
-    this->min = std::min(this->min, rhs.min);
-    this->max = std::max(this->max, rhs.max);
-    return *this;
-  }
-
-  void MergeOne(std::string_view value) {
-    MergeOne(T(reinterpret_cast<const uint8_t*>(value.data())));
-  }
-
-  void MergeOne(const T value) {
-    this->min = std::min(this->min, value);
-    this->max = std::max(this->max, value);
-  }
-
-  T min;
-  T max;
-  bool has_nulls = false;
-};
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MinMaxState<ArrowType, SimdLevel,
-                   enable_if_t<is_base_binary_type<ArrowType>::value ||
-                               std::is_same<ArrowType, FixedSizeBinaryType>::value>> {
-  using ThisType = MinMaxState<ArrowType, SimdLevel>;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    if (!this->seen && rhs.seen) {
-      this->min = rhs.min;
-      this->max = rhs.max;
-    } else if (this->seen && rhs.seen) {
-      if (this->min > rhs.min) {
-        this->min = rhs.min;
-      }
-      if (this->max < rhs.max) {
-        this->max = rhs.max;
-      }
-    }
-    this->has_nulls |= rhs.has_nulls;
-    this->seen |= rhs.seen;
-    return *this;
-  }
-
-  void MergeOne(std::string_view value) {
-    if (!seen) {
-      this->min = std::string(value);
-      this->max = std::string(value);
-    } else {
-      if (value < std::string_view(this->min)) {
-        this->min = std::string(value);
-      } else if (value > std::string_view(this->max)) {
-        this->max = std::string(value);
-      }
-    }
-    this->seen = true;
-  }
-
-  std::string min;
-  std::string max;
-  bool has_nulls = false;
-  bool seen = false;
-};
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MinMaxImpl : public ScalarAggregator {
-  using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
-  using ThisType = MinMaxImpl<ArrowType, SimdLevel>;
-  using StateType = MinMaxState<ArrowType, SimdLevel>;
-
-  MinMaxImpl(std::shared_ptr<DataType> out_type, ScalarAggregateOptions options)
-      : out_type(std::move(out_type)), options(std::move(options)), count(0) {
-    this->options.min_count = std::max<uint32_t>(1, this->options.min_count);
-  }
-
-  Status Consume(KernelContext*, const ExecSpan& batch) override {
-    if (batch[0].is_array()) {
-      return ConsumeArray(batch[0].array);
-    }
-    return ConsumeScalar(*batch[0].scalar);
-  }
-
-  Status ConsumeScalar(const Scalar& scalar) {
-    StateType local;
-    local.has_nulls = !scalar.is_valid;
-    this->count += scalar.is_valid;
-
-    if (!local.has_nulls || options.skip_nulls) {
-      local.MergeOne(internal::UnboxScalar<ArrowType>::Unbox(scalar));
-    }
-
-    this->state += local;
-    return Status::OK();
-  }
-
-  Status ConsumeArray(const ArraySpan& arr_span) {
-    StateType local;
-
-    ArrayType arr(arr_span.ToArrayData());
-
-    const auto null_count = arr.null_count();
-    local.has_nulls = null_count > 0;
-    this->count += arr.length() - null_count;
-
-    if (!local.has_nulls) {
-      for (int64_t i = 0; i < arr.length(); i++) {
-        local.MergeOne(arr.GetView(i));
-      }
-    } else if (local.has_nulls && options.skip_nulls) {
-      local += ConsumeWithNulls(arr);
-    }
-
-    this->state += local;
-    return Status::OK();
-  }
-
-  Status MergeFrom(KernelContext*, KernelState&& src) override {
-    const auto& other = checked_cast<const ThisType&>(src);
-    this->state += other.state;
-    this->count += other.count;
-    return Status::OK();
-  }
-
-  Status Finalize(KernelContext*, Datum* out) override {
-    const auto& struct_type = checked_cast<const StructType&>(*out_type);
-    const auto& child_type = struct_type.field(0)->type();
-
-    std::vector<std::shared_ptr<Scalar>> values;
-    // Physical type != result type
-    if ((state.has_nulls && !options.skip_nulls) || (this->count < options.min_count)) {
-      // (null, null)
-      auto null_scalar = MakeNullScalar(child_type);
-      values = {null_scalar, null_scalar};
-    } else {
-      ARROW_ASSIGN_OR_RAISE(auto min_scalar,
-                            MakeScalar(child_type, std::move(state.min)));
-      ARROW_ASSIGN_OR_RAISE(auto max_scalar,
-                            MakeScalar(child_type, std::move(state.max)));
-      values = {std::move(min_scalar), std::move(max_scalar)};
-    }
-    out->value = std::make_shared<StructScalar>(std::move(values), this->out_type);
-    return Status::OK();
-  }
-
-  std::shared_ptr<DataType> out_type;
-  ScalarAggregateOptions options;
-  int64_t count;
-  MinMaxState<ArrowType, SimdLevel> state;
-
- private:
-  StateType ConsumeWithNulls(const ArrayType& arr) const {
-    StateType local;
-    const int64_t length = arr.length();
-    int64_t offset = arr.offset();
-    const uint8_t* bitmap = arr.null_bitmap_data();
-    int64_t idx = 0;
-
-    const auto p = arrow::internal::BitmapWordAlign<1>(bitmap, offset, length);
-    // First handle the leading bits
-    const int64_t leading_bits = p.leading_bits;
-    while (idx < leading_bits) {
-      if (bit_util::GetBit(bitmap, offset)) {
-        local.MergeOne(arr.GetView(idx));
-      }
-      idx++;
-      offset++;
-    }
-
-    // The aligned parts scanned with BitBlockCounter
-    arrow::internal::BitBlockCounter data_counter(bitmap, offset, length - leading_bits);
-    auto current_block = data_counter.NextWord();
-    while (idx < length) {
-      if (current_block.AllSet()) {  // All true values
-        int run_length = 0;
-        // Scan forward until a block that has some false values (or the end)
-        while (current_block.length > 0 && current_block.AllSet()) {
-          run_length += current_block.length;
-          current_block = data_counter.NextWord();
-        }
-        for (int64_t i = 0; i < run_length; i++) {
-          local.MergeOne(arr.GetView(idx + i));
-        }
-        idx += run_length;
-        offset += run_length;
-        // The current_block already computed, advance to next loop
-        continue;
-      } else if (!current_block.NoneSet()) {  // Some values are null
-        BitmapReader reader(arr.null_bitmap_data(), offset, current_block.length);
-        for (int64_t i = 0; i < current_block.length; i++) {
-          if (reader.IsSet()) {
-            local.MergeOne(arr.GetView(idx + i));
-          }
-          reader.Next();
-        }
-
-        idx += current_block.length;
-        offset += current_block.length;
-      } else {  // All null values
-        idx += current_block.length;
-        offset += current_block.length;
-      }
-      current_block = data_counter.NextWord();
-    }
-
-    return local;
-  }
-};
-
-template <SimdLevel::type SimdLevel>
-struct BooleanMinMaxImpl : public MinMaxImpl<BooleanType, SimdLevel> {
-  using StateType = MinMaxState<BooleanType, SimdLevel>;
-  using ArrayType = typename TypeTraits<BooleanType>::ArrayType;
-  using MinMaxImpl<BooleanType, SimdLevel>::MinMaxImpl;
-  using MinMaxImpl<BooleanType, SimdLevel>::options;
-
-  Status Consume(KernelContext*, const ExecSpan& batch) override {
-    if (ARROW_PREDICT_FALSE(batch[0].is_scalar())) {
-      return ConsumeScalar(checked_cast<const BooleanScalar&>(*batch[0].scalar));
-    }
-    StateType local;
-    ArrayType arr(batch[0].array.ToArrayData());
-
-    const auto arr_length = arr.length();
-    const auto null_count = arr.null_count();
-    const auto valid_count = arr_length - null_count;
-
-    local.has_nulls = null_count > 0;
-    this->count += valid_count;
-    if (!local.has_nulls || options.skip_nulls) {
-      const auto true_count = arr.true_count();
-      const auto false_count = valid_count - true_count;
-      local.max = true_count > 0;
-      local.min = false_count == 0;
-    }
-
-    this->state += local;
-    return Status::OK();
-  }
-
-  Status ConsumeScalar(const BooleanScalar& scalar) {
-    StateType local;
-
-    local.has_nulls = !scalar.is_valid;
-    this->count += scalar.is_valid;
-    if (!local.has_nulls || options.skip_nulls) {
-      const int true_count = scalar.is_valid && scalar.value;
-      const int false_count = scalar.is_valid && !scalar.value;
-      local.max = true_count > 0;
-      local.min = false_count == 0;
-    }
-
-    this->state += local;
-    return Status::OK();
-  }
-};
-
-struct NullMinMaxImpl : public ScalarAggregator {
-  Status Consume(KernelContext*, const ExecSpan& batch) override { return Status::OK(); }
-
-  Status MergeFrom(KernelContext*, KernelState&& src) override { return Status::OK(); }
-
-  Status Finalize(KernelContext*, Datum* out) override {
-    std::vector<std::shared_ptr<Scalar>> values{std::make_shared<NullScalar>(),
-                                                std::make_shared<NullScalar>()};
-    out->value = std::make_shared<StructScalar>(
-        std::move(values), struct_({field("min", null()), field("max", null())}));
-    return Status::OK();
-  }
-};
-
-// First/Last
-
-struct FirstLastInitState {
-  std::unique_ptr<KernelState> state;
-  KernelContext* ctx;
-  const DataType& in_type;
-  std::shared_ptr<DataType> out_type;
-  const ScalarAggregateOptions& options;
-
-  FirstLastInitState(KernelContext* ctx, const DataType& in_type,
-                     const std::shared_ptr<DataType>& out_type,
-                     const ScalarAggregateOptions& options)
-      : ctx(ctx), in_type(in_type), out_type(out_type), options(options) {}
-
-  Status Visit(const DataType& ty) {
-    return Status::NotImplemented("No first/last implemented for ", ty);
-  }
-
-  Status Visit(const HalfFloatType& ty) {
-    return Status::NotImplemented("No first/last implemented for ", ty);
-  }
-
-  Status Visit(const BooleanType&) {
-    state.reset(new FirstLastImpl<BooleanType>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_physical_integer<Type, Status> Visit(const Type&) {
-    using PhysicalType = typename Type::PhysicalType;
-    state.reset(new FirstLastImpl<PhysicalType>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_physical_floating_point<Type, Status> Visit(const Type&) {
-    using PhysicalType = typename Type::PhysicalType;
-    state.reset(new FirstLastImpl<PhysicalType>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_base_binary<Type, Status> Visit(const Type&) {
-    state.reset(new FirstLastImpl<Type>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_t<std::is_same<Type, FixedSizeBinaryType>::value, Status> Visit(const Type&) {
-    state.reset(new FirstLastImpl<Type>(out_type, options));
-    return Status::OK();
-  }
-
-  Result<std::unique_ptr<KernelState>> Create() {
-    RETURN_NOT_OK(VisitTypeInline(in_type, this));
-    return std::move(state);
-  }
-};
-
-template <SimdLevel::type SimdLevel>
-struct MinMaxInitState {
-  std::unique_ptr<KernelState> state;
-  KernelContext* ctx;
-  const DataType& in_type;
-  std::shared_ptr<DataType> out_type;
-  const ScalarAggregateOptions& options;
-
-  MinMaxInitState(KernelContext* ctx, const DataType& in_type,
-                  const std::shared_ptr<DataType>& out_type,
-                  const ScalarAggregateOptions& options)
-      : ctx(ctx), in_type(in_type), out_type(out_type), options(options) {}
-
-  Status Visit(const DataType& ty) {
-    return Status::NotImplemented("No min/max implemented for ", ty);
-  }
-
-  Status Visit(const HalfFloatType& ty) {
-    return Status::NotImplemented("No min/max implemented for ", ty);
-  }
-
-  Status Visit(const NullType&) {
-    state.reset(new NullMinMaxImpl());
-    return Status::OK();
-  }
-
-  Status Visit(const BooleanType&) {
-    state.reset(new BooleanMinMaxImpl<SimdLevel>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_physical_integer<Type, Status> Visit(const Type&) {
-    using PhysicalType = typename Type::PhysicalType;
-    state.reset(new MinMaxImpl<PhysicalType, SimdLevel>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_floating_point<Type, Status> Visit(const Type&) {
-    state.reset(new MinMaxImpl<Type, SimdLevel>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_base_binary<Type, Status> Visit(const Type&) {
-    state.reset(new MinMaxImpl<Type, SimdLevel>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_fixed_size_binary<Type, Status> Visit(const Type&) {
-    state.reset(new MinMaxImpl<Type, SimdLevel>(out_type, options));
-    return Status::OK();
-  }
-
-  Result<std::unique_ptr<KernelState>> Create() {
-    RETURN_NOT_OK(VisitTypeInline(in_type, this));
-    return std::move(state);
-  }
-};
-
 }  // namespace arrow::compute::internal
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h
index 9e46a21887f8c..7f9be92f3a14b 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.h
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.h
@@ -133,7 +133,8 @@ struct GetViewType<Type, enable_if_has_c_type<Type>> {
 
 template <typename Type>
 struct GetViewType<Type, enable_if_t<is_base_binary_type<Type>::value ||
-                                     is_fixed_size_binary_type<Type>::value>> {
+                                     is_fixed_size_binary_type<Type>::value ||
+                                     is_binary_view_like_type<Type>::value>> {
   using T = std::string_view;
   using PhysicalType = T;
 
@@ -1265,6 +1266,22 @@ ArrayKernelExec GenerateVarBinary(detail::GetTypeId get_id) {
   }
 }
 
+// Generate a kernel given a templated functor for binary-view types. Generates a
+// single kernel for binary/string-view.
+//
+// See "Numeric" above for description of the generator functor
+template <template <typename...> class Generator, typename Type0, typename... Args>
+ArrayKernelExec GenerateVarBinaryViewBase(detail::GetTypeId get_id) {
+  switch (get_id.id) {
+    case Type::BINARY_VIEW:
+    case Type::STRING_VIEW:
+      return Generator<Type0, BinaryViewType, Args...>::Exec;
+    default:
+      DCHECK(false);
+      return nullptr;
+  }
+}
+
 // Generate a kernel given a templated functor for temporal types
 //
 // See "Numeric" above for description of the generator functor
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
index 4bf6a6106dfe5..1207355939a0c 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
@@ -2533,11 +2533,11 @@ struct GroupedCountDistinctImpl : public GroupedAggregator {
 struct GroupedDistinctImpl : public GroupedCountDistinctImpl {
   Result<Datum> Finalize() override {
     ARROW_ASSIGN_OR_RAISE(auto uniques, grouper_->GetUniques());
-    ARROW_ASSIGN_OR_RAISE(auto groupings, grouper_->MakeGroupings(
-                                              *uniques[1].array_as<UInt32Array>(),
-                                              static_cast<uint32_t>(num_groups_), ctx_));
     ARROW_ASSIGN_OR_RAISE(
-        auto list, grouper_->ApplyGroupings(*groupings, *uniques[0].make_array(), ctx_));
+        auto groupings, Grouper::MakeGroupings(*uniques[1].array_as<UInt32Array>(),
+                                               static_cast<uint32_t>(num_groups_), ctx_));
+    ARROW_ASSIGN_OR_RAISE(
+        auto list, Grouper::ApplyGroupings(*groupings, *uniques[0].make_array(), ctx_));
     const auto& values = list->values();
     DCHECK_EQ(values->offset(), 0);
     auto* offsets = list->value_offsets()->mutable_data_as<int32_t>();
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_boolean.cc b/cpp/src/arrow/compute/kernels/scalar_cast_boolean.cc
index 8935b0d5f2d0d..cb1a67bad90ca 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_boolean.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_boolean.cc
@@ -63,6 +63,12 @@ std::vector<std::shared_ptr<CastFunction>> GetBooleanCasts() {
                                                  BooleanType, ParseBooleanString>(*ty);
     DCHECK_OK(func->AddKernel(ty->id(), {ty}, boolean(), exec));
   }
+  for (const auto& ty : BinaryViewTypes()) {
+    ArrayKernelExec exec =
+        GenerateVarBinaryViewBase<applicator::ScalarUnaryNotNull, BooleanType,
+                                  ParseBooleanString>(*ty);
+    DCHECK_OK(func->AddKernel(ty->id(), {ty}, boolean(), exec));
+  }
   return {func};
 }
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
index d8c4088759643..5c43d87edcab9 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
@@ -188,8 +188,6 @@ void CastNumberToNumberUnsafe(Type::type in_type, Type::type out_type,
 // ----------------------------------------------------------------------
 
 Status UnpackDictionary(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  // TODO: is there an implementation more friendly to the "span" data structures?
-
   DictionaryArray dict_arr(batch[0].array.ToArrayData());
   const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
 
@@ -281,6 +279,8 @@ void AddZeroCopyCast(Type::type in_type_id, InputType in_type, OutputType out_ty
 }
 
 static bool CanCastFromDictionary(Type::type type_id) {
+  /// TODO(GH-43010): add is_binary_view_like() here once array_take
+  /// can handle string-views
   return (is_primitive(type_id) || is_base_binary_like(type_id) ||
           is_fixed_size_binary(type_id));
 }
@@ -297,9 +297,6 @@ void AddCommonCasts(Type::type out_type_id, OutputType out_ty, CastFunction* fun
   // From dictionary to this type
   if (CanCastFromDictionary(out_type_id)) {
     // Dictionary unpacking not implemented for boolean or nested types.
-    //
-    // XXX: Uses Take and does its own memory allocation for the moment. We can
-    // fix this later.
     DCHECK_OK(func->AddKernel(Type::DICTIONARY, {InputType(Type::DICTIONARY)}, out_ty,
                               UnpackDictionary, NullHandling::COMPUTED_NO_PREALLOCATE,
                               MemAllocation::NO_PREALLOCATE));
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
index bd9be3e8a9532..1fe26b316362d 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
@@ -313,7 +313,9 @@ struct ParseString {
 
 template <typename O, typename I>
 struct CastFunctor<
-    O, I, enable_if_t<(is_number_type<O>::value && is_base_binary_type<I>::value)>> {
+    O, I,
+    enable_if_t<(is_number_type<O>::value && (is_base_binary_type<I>::value ||
+                                              is_binary_view_like_type<I>::value))>> {
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     return applicator::ScalarUnaryNotNull<O, I, ParseString<O>>::Exec(ctx, batch, out);
   }
@@ -658,11 +660,15 @@ struct DecimalCastFunctor {
 };
 
 template <typename I>
-struct CastFunctor<Decimal128Type, I, enable_if_t<is_base_binary_type<I>::value>>
+struct CastFunctor<
+    Decimal128Type, I,
+    enable_if_t<is_base_binary_type<I>::value || is_binary_view_like_type<I>::value>>
     : public DecimalCastFunctor<Decimal128Type, I> {};
 
 template <typename I>
-struct CastFunctor<Decimal256Type, I, enable_if_t<is_base_binary_type<I>::value>>
+struct CastFunctor<
+    Decimal256Type, I,
+    enable_if_t<is_base_binary_type<I>::value || is_binary_view_like_type<I>::value>>
     : public DecimalCastFunctor<Decimal256Type, I> {};
 
 // ----------------------------------------------------------------------
@@ -708,6 +714,10 @@ void AddCommonNumberCasts(const std::shared_ptr<DataType>& out_ty, CastFunction*
     auto exec = GenerateVarBinaryBase<CastFunctor, OutType>(*in_ty);
     DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, out_ty, exec));
   }
+  for (const std::shared_ptr<DataType>& in_ty : BinaryViewTypes()) {
+    auto exec = GenerateVarBinaryViewBase<CastFunctor, OutType>(*in_ty);
+    DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, out_ty, exec));
+  }
 }
 
 template <typename OutType>
@@ -793,6 +803,10 @@ std::shared_ptr<CastFunction> GetCastToDecimal128() {
     auto exec = GenerateVarBinaryBase<CastFunctor, Decimal128Type>(in_ty->id());
     DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, sig_out_ty, std::move(exec)));
   }
+  for (const std::shared_ptr<DataType>& in_ty : BinaryViewTypes()) {
+    auto exec = GenerateVarBinaryViewBase<CastFunctor, Decimal128Type>(in_ty->id());
+    DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, sig_out_ty, std::move(exec)));
+  }
 
   // Cast from other decimal
   auto exec = CastFunctor<Decimal128Type, Decimal128Type>::Exec;
@@ -828,6 +842,10 @@ std::shared_ptr<CastFunction> GetCastToDecimal256() {
     auto exec = GenerateVarBinaryBase<CastFunctor, Decimal256Type>(in_ty->id());
     DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, sig_out_ty, std::move(exec)));
   }
+  for (const std::shared_ptr<DataType>& in_ty : BinaryViewTypes()) {
+    auto exec = GenerateVarBinaryViewBase<CastFunctor, Decimal256Type>(in_ty->id());
+    DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, sig_out_ty, std::move(exec)));
+  }
 
   // Cast from other decimal
   auto exec = CastFunctor<Decimal256Type, Decimal128Type>::Exec;
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
index dc3fe29a3dfae..4edf00225d317 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
@@ -30,12 +30,14 @@
 #include "arrow/type_traits.h"
 #include "arrow/util/formatting.h"
 #include "arrow/util/int_util.h"
+#include "arrow/util/logging.h"
 #include "arrow/util/utf8_internal.h"
 #include "arrow/visit_data_inline.h"
 
 namespace arrow {
 
 using internal::StringFormatter;
+using internal::VisitSetBitRunsVoid;
 using util::InitializeUTF8;
 using util::ValidateUTF8Inline;
 
@@ -217,8 +219,8 @@ struct TemporalToStringCastFunctor<O, TimestampType> {
 
 #if defined(_MSC_VER)
 // Silence warning: """'visitor': unreferenced local variable"""
-#pragma warning(push)
-#pragma warning(disable : 4101)
+#  pragma warning(push)
+#  pragma warning(disable : 4101)
 #endif
 
 struct Utf8Validator {
@@ -286,17 +288,20 @@ Status CastBinaryToBinaryOffsets<int64_t, int32_t>(KernelContext* ctx,
   }
 }
 
+// Offset String -> Offset String
 template <typename O, typename I>
-enable_if_t<is_base_binary_type<I>::value && !is_fixed_size_binary_type<O>::value, Status>
+enable_if_t<is_base_binary_type<I>::value && is_base_binary_type<O>::value, Status>
 BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
   const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
   const ArraySpan& input = batch[0].array;
 
-  if (!I::is_utf8 && O::is_utf8 && !options.allow_invalid_utf8) {
-    InitializeUTF8();
-    ArraySpanVisitor<I> visitor;
-    Utf8Validator validator;
-    RETURN_NOT_OK(visitor.Visit(input, &validator));
+  if constexpr (!I::is_utf8 && O::is_utf8) {
+    if (!options.allow_invalid_utf8) {
+      InitializeUTF8();
+      ArraySpanVisitor<I> visitor;
+      Utf8Validator validator;
+      RETURN_NOT_OK(visitor.Visit(input, &validator));
+    }
   }
 
   // Start with a zero-copy cast, but change indices to expected size
@@ -305,19 +310,243 @@ BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* ou
       ctx, input, out->array_data().get());
 }
 
+// String View -> Offset String
+template <typename O, typename I>
+enable_if_t<is_binary_view_like_type<I>::value && is_base_binary_type<O>::value, Status>
+BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+  using OutputBuilderType = typename TypeTraits<O>::BuilderType;
+  const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
+  const ArraySpan& input = batch[0].array;
+
+  if constexpr (!I::is_utf8 && O::is_utf8) {
+    if (!options.allow_invalid_utf8) {
+      InitializeUTF8();
+      ArraySpanVisitor<I> visitor;
+      Utf8Validator validator;
+      RETURN_NOT_OK(visitor.Visit(input, &validator));
+    }
+  }
+
+  const int64_t sum_of_binary_view_sizes = util::SumOfBinaryViewSizes(
+      input.GetValues<BinaryViewType::c_type>(1), input.length);
+
+  // TODO(GH-43573): A more efficient implementation that copies the validity
+  // bitmap all at once is possible, but would mean we don't delegate all the
+  // building logic to the ArrayBuilder implementation for the output type.
+  OutputBuilderType builder(options.to_type.GetSharedPtr(), ctx->memory_pool());
+  RETURN_NOT_OK(builder.Resize(input.length));
+  RETURN_NOT_OK(builder.ReserveData(sum_of_binary_view_sizes));
+  arrow::internal::ArraySpanInlineVisitor<I> visitor;
+  RETURN_NOT_OK(visitor.VisitStatus(
+      input,
+      [&](std::string_view v) {
+        // Append valid string view
+        return builder.Append(v);
+      },
+      [&]() {
+        // Append null
+        builder.UnsafeAppendNull();
+        return Status::OK();
+      }));
+
+  std::shared_ptr<ArrayData> output_array;
+  RETURN_NOT_OK(builder.FinishInternal(&output_array));
+  out->value = std::move(output_array);
+  return Status::OK();
+}
+
+// Offset String -> String View
+template <typename O, typename I>
+enable_if_t<is_base_binary_type<I>::value && is_binary_view_like_type<O>::value, Status>
+BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+  using offset_type = typename I::offset_type;
+  const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
+  const ArraySpan& input = batch[0].array;
+
+  if constexpr (!I::is_utf8 && O::is_utf8) {
+    if (!options.allow_invalid_utf8) {
+      InitializeUTF8();
+      ArraySpanVisitor<I> visitor;
+      Utf8Validator validator;
+      RETURN_NOT_OK(visitor.Visit(input, &validator));
+    }
+  }
+
+  // Start with a zero-copy cast, then reconfigure the view and data buffers
+  RETURN_NOT_OK(ZeroCopyCastExec(ctx, batch, out));
+  ArrayData* output = out->array_data().get();
+
+  const int64_t total_length = input.offset + input.length;
+  const auto* validity = input.GetValues<uint8_t>(0, 0);
+  const auto* input_offsets = input.GetValues<offset_type>(1);
+  const auto* input_data = input.GetValues<uint8_t>(2, 0);
+
+  // Turn buffers[1] into a buffer of empty BinaryViewType::c_type entries.
+  ARROW_ASSIGN_OR_RAISE(output->buffers[1],
+                        ctx->Allocate(total_length * BinaryViewType::kSize));
+  memset(output->buffers[1]->mutable_data(), 0, total_length * BinaryViewType::kSize);
+
+  // Check against offset overflow
+  if constexpr (sizeof(offset_type) > 4) {
+    if (total_length > 0) {
+      // Offsets are monotonically increasing, that is, offsets[j] <= offsets[j+1] for
+      // 0 <= j < length, even for null slots. So we only need to check the last offset.
+      const int64_t max_data_offset = input_offsets[input.length];
+      if (ARROW_PREDICT_FALSE(max_data_offset > std::numeric_limits<int32_t>::max())) {
+        // A more complicated loop could work by slicing the data buffer into
+        // more than one variadic buffer, but this is probably overkill for now
+        // before someone hits this problem in practice.
+        return Status::CapacityError("Failed casting from ", input.type->ToString(),
+                                     " to ", output->type->ToString(),
+                                     ": input array too large for efficient conversion.");
+      }
+    }
+  }
+
+  auto* out_views = output->GetMutableValues<BinaryViewType::c_type>(1);
+
+  // If all entries are inline, we can drop the extra data buffer for
+  // large strings in output->buffers[2].
+  bool all_entries_are_inline = true;
+  VisitSetBitRunsVoid(
+      validity, output->offset, output->length,
+      [&](int64_t start_offset, int64_t run_length) {
+        for (int64_t i = start_offset; i < start_offset + run_length; i++) {
+          const offset_type data_offset = input_offsets[i];
+          const offset_type data_length = input_offsets[i + 1] - data_offset;
+          auto& out_view = out_views[i];
+          if (data_length <= BinaryViewType::kInlineSize) {
+            out_view.inlined.size = static_cast<int32_t>(data_length);
+            memcpy(out_view.inlined.data.data(), input_data + data_offset, data_length);
+          } else {
+            out_view.ref.size = static_cast<int32_t>(data_length);
+            memcpy(out_view.ref.prefix.data(), input_data + data_offset,
+                   BinaryViewType::kPrefixSize);
+            // (buffer_index is 0'd by the memset of the buffer 1 above)
+            // out_view.ref.buffer_index = 0;
+            out_view.ref.offset = static_cast<int32_t>(data_offset);
+            all_entries_are_inline = false;
+          }
+        }
+      });
+  if (all_entries_are_inline) {
+    output->buffers[2] = nullptr;
+  }
+  return Status::OK();
+}
+
+// String View -> String View
+template <typename O, typename I>
+enable_if_t<is_binary_view_like_type<I>::value && is_binary_view_like_type<O>::value,
+            Status>
+BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+  const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
+  const ArraySpan& input = batch[0].array;
+
+  if constexpr (!I::is_utf8 && O::is_utf8) {
+    if (!options.allow_invalid_utf8) {
+      InitializeUTF8();
+      ArraySpanVisitor<I> visitor;
+      Utf8Validator validator;
+      RETURN_NOT_OK(visitor.Visit(input, &validator));
+    }
+  }
+
+  return ZeroCopyCastExec(ctx, batch, out);
+}
+
+// Fixed -> String View
 template <typename O, typename I>
 enable_if_t<std::is_same<I, FixedSizeBinaryType>::value &&
-                !std::is_same<O, FixedSizeBinaryType>::value,
+                is_binary_view_like_type<O>::value,
             Status>
 BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
   const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
   const ArraySpan& input = batch[0].array;
 
-  if (O::is_utf8 && !options.allow_invalid_utf8) {
-    InitializeUTF8();
-    ArraySpanVisitor<I> visitor;
-    Utf8Validator validator;
-    RETURN_NOT_OK(visitor.Visit(input, &validator));
+  if constexpr (!I::is_utf8 && O::is_utf8) {
+    if (!options.allow_invalid_utf8) {
+      InitializeUTF8();
+      ArraySpanVisitor<I> visitor;
+      Utf8Validator validator;
+      RETURN_NOT_OK(visitor.Visit(input, &validator));
+    }
+  }
+
+  const int32_t fixed_size_width = input.type->byte_width();
+  const int64_t total_length = input.offset + input.length;
+
+  ArrayData* output = out->array_data().get();
+  DCHECK_EQ(output->length, input.length);
+  output->offset = input.offset;
+  output->buffers.resize(3);
+  output->SetNullCount(input.null_count);
+  // Share the validity bitmap buffer
+  output->buffers[0] = input.GetBuffer(0);
+  // Init buffers[1] with input.length empty BinaryViewType::c_type entries.
+  ARROW_ASSIGN_OR_RAISE(output->buffers[1],
+                        ctx->Allocate(total_length * BinaryViewType::kSize));
+  memset(output->buffers[1]->mutable_data(), 0, total_length * BinaryViewType::kSize);
+  auto* out_views = output->GetMutableValues<BinaryViewType::c_type>(1);
+
+  auto data_buffer = input.GetBuffer(1);
+  const auto* data = data_buffer->data();
+
+  // Check against offset overflow
+  if (total_length > 0) {
+    const int64_t max_data_offset = (total_length - 1) * fixed_size_width;
+    if (ARROW_PREDICT_FALSE(max_data_offset > std::numeric_limits<int32_t>::max())) {
+      // A more complicated loop could work by slicing the data buffer into
+      // more than one variadic buffer, but this is probably overkill for now
+      // before someone hits this problem in practice.
+      return Status::CapacityError("Failed casting from ", input.type->ToString(), " to ",
+                                   output->type->ToString(),
+                                   ": input array too large for efficient conversion.");
+    }
+  }
+
+  // Inline string and non-inline string loops
+  if (fixed_size_width <= BinaryViewType::kInlineSize) {
+    int32_t data_offset = static_cast<int32_t>(input.offset) * fixed_size_width;
+    for (int64_t i = 0; i < input.length; i++) {
+      auto& out_view = out_views[i];
+      out_view.inlined.size = fixed_size_width;
+      memcpy(out_view.inlined.data.data(), data + data_offset, fixed_size_width);
+      data_offset += fixed_size_width;
+    }
+  } else {
+    // We share the fixed-size string array data buffer as variadic data
+    // buffer 0 (index=2+0) and set every buffer_index to 0.
+    output->buffers[2] = std::move(data_buffer);
+    int32_t data_offset = static_cast<int32_t>(input.offset) * fixed_size_width;
+    for (int64_t i = 0; i < input.length; i++) {
+      auto& out_view = out_views[i];
+      out_view.ref.size = fixed_size_width;
+      memcpy(out_view.ref.prefix.data(), data + data_offset, BinaryViewType::kPrefixSize);
+      // (buffer_index is 0'd by the memset of the buffer 1 above)
+      // out_view.ref.buffer_index = 0;
+      out_view.ref.offset = static_cast<int32_t>(data_offset);
+      data_offset += fixed_size_width;
+    }
+  }
+  return Status::OK();
+}
+
+// Fixed -> Offset String
+template <typename O, typename I>
+enable_if_t<std::is_same<I, FixedSizeBinaryType>::value && is_base_binary_type<O>::value,
+            Status>
+BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+  const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
+  const ArraySpan& input = batch[0].array;
+
+  if constexpr (O::is_utf8) {
+    if (!options.allow_invalid_utf8) {
+      InitializeUTF8();
+      ArraySpanVisitor<I> visitor;
+      Utf8Validator validator;
+      RETURN_NOT_OK(visitor.Visit(input, &validator));
+    }
   }
 
   // Check for overflow
@@ -352,7 +581,7 @@ BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* ou
   }
 
   // This buffer is preallocated
-  output_offset_type* offsets = output->GetMutableValues<output_offset_type>(1);
+  auto* offsets = output->GetMutableValues<output_offset_type>(1);
   offsets[0] = static_cast<output_offset_type>(input.offset * width);
   for (int64_t i = 0; i < input.length; i++) {
     offsets[i + 1] = offsets[i] + width;
@@ -378,6 +607,7 @@ BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* ou
   return Status::OK();
 }
 
+// Fixed -> Fixed
 template <typename O, typename I>
 enable_if_t<std::is_same<I, FixedSizeBinaryType>::value &&
                 std::is_same<O, FixedSizeBinaryType>::value,
@@ -394,8 +624,10 @@ BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* ou
   return ZeroCopyCastExec(ctx, batch, out);
 }
 
+// Offset String | String View -> Fixed
 template <typename O, typename I>
-enable_if_t<is_base_binary_type<I>::value && std::is_same<O, FixedSizeBinaryType>::value,
+enable_if_t<(is_base_binary_type<I>::value || is_binary_view_like_type<I>::value) &&
+                std::is_same<O, FixedSizeBinaryType>::value,
             Status>
 BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
   const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
@@ -422,7 +654,7 @@ BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* ou
 }
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
 // ----------------------------------------------------------------------
@@ -484,7 +716,9 @@ void AddBinaryToBinaryCast(CastFunction* func) {
 template <typename OutType>
 void AddBinaryToBinaryCast(CastFunction* func) {
   AddBinaryToBinaryCast<OutType, StringType>(func);
+  AddBinaryToBinaryCast<OutType, StringViewType>(func);
   AddBinaryToBinaryCast<OutType, BinaryType>(func);
+  AddBinaryToBinaryCast<OutType, BinaryViewType>(func);
   AddBinaryToBinaryCast<OutType, LargeStringType>(func);
   AddBinaryToBinaryCast<OutType, LargeBinaryType>(func);
   AddBinaryToBinaryCast<OutType, FixedSizeBinaryType>(func);
@@ -504,7 +738,9 @@ void AddBinaryToFixedSizeBinaryCast(CastFunction* func) {
 
 void AddBinaryToFixedSizeBinaryCast(CastFunction* func) {
   AddBinaryToFixedSizeBinaryCast<StringType>(func);
+  AddBinaryToFixedSizeBinaryCast<StringViewType>(func);
   AddBinaryToFixedSizeBinaryCast<BinaryType>(func);
+  AddBinaryToFixedSizeBinaryCast<BinaryViewType>(func);
   AddBinaryToFixedSizeBinaryCast<LargeStringType>(func);
   AddBinaryToFixedSizeBinaryCast<LargeBinaryType>(func);
   AddBinaryToFixedSizeBinaryCast<FixedSizeBinaryType>(func);
@@ -513,15 +749,24 @@ void AddBinaryToFixedSizeBinaryCast(CastFunction* func) {
 }  // namespace
 
 std::vector<std::shared_ptr<CastFunction>> GetBinaryLikeCasts() {
+  // cast_binary / cast_binary_view / cast_large_binary
+
   auto cast_binary = std::make_shared<CastFunction>("cast_binary", Type::BINARY);
   AddCommonCasts(Type::BINARY, binary(), cast_binary.get());
   AddBinaryToBinaryCast<BinaryType>(cast_binary.get());
 
+  auto cast_binary_view =
+      std::make_shared<CastFunction>("cast_binary_view", Type::BINARY_VIEW);
+  AddCommonCasts(Type::BINARY_VIEW, binary_view(), cast_binary_view.get());
+  AddBinaryToBinaryCast<BinaryViewType>(cast_binary_view.get());
+
   auto cast_large_binary =
       std::make_shared<CastFunction>("cast_large_binary", Type::LARGE_BINARY);
   AddCommonCasts(Type::LARGE_BINARY, large_binary(), cast_large_binary.get());
   AddBinaryToBinaryCast<LargeBinaryType>(cast_large_binary.get());
 
+  // cast_string / cast_string_view / cast_large_string
+
   auto cast_string = std::make_shared<CastFunction>("cast_string", Type::STRING);
   AddCommonCasts(Type::STRING, utf8(), cast_string.get());
   AddNumberToStringCasts<StringType>(cast_string.get());
@@ -529,6 +774,14 @@ std::vector<std::shared_ptr<CastFunction>> GetBinaryLikeCasts() {
   AddTemporalToStringCasts<StringType>(cast_string.get());
   AddBinaryToBinaryCast<StringType>(cast_string.get());
 
+  auto cast_string_view =
+      std::make_shared<CastFunction>("cast_string_view", Type::STRING_VIEW);
+  AddCommonCasts(Type::STRING_VIEW, utf8_view(), cast_string_view.get());
+  AddNumberToStringCasts<StringViewType>(cast_string_view.get());
+  AddDecimalToStringCasts<StringViewType>(cast_string_view.get());
+  AddTemporalToStringCasts<StringViewType>(cast_string_view.get());
+  AddBinaryToBinaryCast<StringViewType>(cast_string_view.get());
+
   auto cast_large_string =
       std::make_shared<CastFunction>("cast_large_string", Type::LARGE_STRING);
   AddCommonCasts(Type::LARGE_STRING, large_utf8(), cast_large_string.get());
@@ -537,13 +790,19 @@ std::vector<std::shared_ptr<CastFunction>> GetBinaryLikeCasts() {
   AddTemporalToStringCasts<LargeStringType>(cast_large_string.get());
   AddBinaryToBinaryCast<LargeStringType>(cast_large_string.get());
 
+  // cast_fixed_size_binary
+
   auto cast_fsb =
       std::make_shared<CastFunction>("cast_fixed_size_binary", Type::FIXED_SIZE_BINARY);
   AddCommonCasts(Type::FIXED_SIZE_BINARY, OutputType(ResolveOutputFromOptions),
                  cast_fsb.get());
   AddBinaryToFixedSizeBinaryCast(cast_fsb.get());
 
-  return {cast_binary, cast_large_binary, cast_string, cast_large_string, cast_fsb};
+  return {
+      std::move(cast_binary), std::move(cast_binary_view), std::move(cast_large_binary),
+      std::move(cast_string), std::move(cast_string_view), std::move(cast_large_string),
+      std::move(cast_fsb),
+  };
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index 140789e59665b..6315044a1ba05 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -97,7 +97,10 @@ static std::vector<std::shared_ptr<DataType>> kDictionaryIndexTypes = kIntegerTy
 static std::vector<std::shared_ptr<DataType>> kBaseBinaryTypes = {
     binary(), utf8(), large_binary(), large_utf8()};
 
-static void AssertBufferSame(const Array& left, const Array& right, int buffer_index) {
+static std::vector<std::shared_ptr<DataType>> kBaseBinaryAndViewTypes = {
+    binary(), utf8(), large_binary(), large_utf8(), utf8_view(), binary_view()};
+
+static void AssertBufferSame(const Array& left, const Array& right, size_t buffer_index) {
   ASSERT_EQ(left.data()->buffers[buffer_index].get(),
             right.data()->buffers[buffer_index].get());
 }
@@ -174,14 +177,14 @@ TEST(Cast, CanCast) {
 
   ExpectCanCast(null(), {boolean()});
   ExpectCanCast(null(), kNumericTypes);
-  ExpectCanCast(null(), kBaseBinaryTypes);
+  ExpectCanCast(null(), kBaseBinaryAndViewTypes);
   ExpectCanCast(
       null(), {date32(), date64(), time32(TimeUnit::MILLI), timestamp(TimeUnit::SECOND)});
   ExpectCanCast(dictionary(uint16(), null()), {null()});
 
   ExpectCanCast(boolean(), {boolean()});
   ExpectCanCast(boolean(), kNumericTypes);
-  ExpectCanCast(boolean(), {utf8(), large_utf8()});
+  ExpectCanCast(boolean(), {utf8(), utf8_view(), large_utf8()});
   ExpectCanCast(dictionary(int32(), boolean()), {boolean()});
 
   ExpectCannotCast(boolean(), {null()});
@@ -198,11 +201,15 @@ TEST(Cast, CanCast) {
     ExpectCannotCast(from_numeric, {null()});
   }
 
-  for (auto from_base_binary : kBaseBinaryTypes) {
+  for (auto from_base_binary : kBaseBinaryAndViewTypes) {
     ExpectCanCast(from_base_binary, {boolean()});
     ExpectCanCast(from_base_binary, kNumericTypes);
     ExpectCanCast(from_base_binary, kBaseBinaryTypes);
-    ExpectCanCast(dictionary(int64(), from_base_binary), {from_base_binary});
+    // TODO(GH-43010): include is_binary_view_like() types here once array_take
+    // can handle string-views
+    if (!is_binary_view_like(*from_base_binary)) {
+      ExpectCanCast(dictionary(int64(), from_base_binary), {from_base_binary});
+    }
 
     // any cast which is valid for the dictionary is valid for the DictionaryArray
     ExpectCanCast(dictionary(uint32(), from_base_binary), kBaseBinaryTypes);
@@ -216,8 +223,9 @@ TEST(Cast, CanCast) {
   ExpectCannotCast(timestamp(TimeUnit::MICRO),
                    {binary(), large_binary()});  // no formatting supported
 
-  ExpectCanCast(fixed_size_binary(3),
-                {binary(), utf8(), large_binary(), large_utf8(), fixed_size_binary(3)});
+  ExpectCanCast(fixed_size_binary(3), kBaseBinaryAndViewTypes);
+  // Identity cast
+  ExpectCanCast(fixed_size_binary(3), {fixed_size_binary(3)});
   // Doesn't fail since a kernel exists (but it will return an error when executed)
   // ExpectCannotCast(fixed_size_binary(3), {fixed_size_binary(5)});
 
@@ -1039,7 +1047,7 @@ TEST(Cast, DecimalToFloating) {
 }
 
 TEST(Cast, DecimalToString) {
-  for (auto string_type : {utf8(), large_utf8()}) {
+  for (auto string_type : {utf8(), utf8_view(), large_utf8()}) {
     for (auto decimal_type : {decimal128(5, 2), decimal256(5, 2)}) {
       CheckCast(ArrayFromJSON(decimal_type, R"(["0.00", null, "123.45", "999.99"])"),
                 ArrayFromJSON(string_type, R"(["0.00", null, "123.45", "999.99"])"));
@@ -1558,7 +1566,7 @@ TEST(Cast, TimeZeroCopy) {
 }
 
 TEST(Cast, DateToString) {
-  for (auto string_type : {utf8(), large_utf8()}) {
+  for (auto string_type : {utf8(), utf8_view(), large_utf8()}) {
     CheckCast(ArrayFromJSON(date32(), "[0, null]"),
               ArrayFromJSON(string_type, R"(["1970-01-01", null])"));
     CheckCast(ArrayFromJSON(date64(), "[86400000, null]"),
@@ -1567,7 +1575,7 @@ TEST(Cast, DateToString) {
 }
 
 TEST(Cast, TimeToString) {
-  for (auto string_type : {utf8(), large_utf8()}) {
+  for (auto string_type : {utf8(), utf8_view(), large_utf8()}) {
     CheckCast(ArrayFromJSON(time32(TimeUnit::SECOND), "[1, 62]"),
               ArrayFromJSON(string_type, R"(["00:00:01", "00:01:02"])"));
     CheckCast(
@@ -1577,7 +1585,7 @@ TEST(Cast, TimeToString) {
 }
 
 TEST(Cast, TimestampToString) {
-  for (auto string_type : {utf8(), large_utf8()}) {
+  for (auto string_type : {utf8(), utf8_view(), large_utf8()}) {
     CheckCast(
         ArrayFromJSON(timestamp(TimeUnit::SECOND), "[-30610224000, -5364662400]"),
         ArrayFromJSON(string_type, R"(["1000-01-01 00:00:00", "1800-01-01 00:00:00"])"));
@@ -1603,7 +1611,7 @@ TEST(Cast, TimestampToString) {
 }
 
 TEST_F(CastTimezone, TimestampWithZoneToString) {
-  for (auto string_type : {utf8(), large_utf8()}) {
+  for (auto string_type : {utf8(), utf8_view(), large_utf8()}) {
     CheckCast(
         ArrayFromJSON(timestamp(TimeUnit::SECOND, "UTC"), "[-30610224000, -5364662400]"),
         ArrayFromJSON(string_type,
@@ -1793,7 +1801,7 @@ TEST(Cast, DurationToDurationMultiplyOverflow) {
 }
 
 TEST(Cast, DurationToString) {
-  for (auto string_type : {utf8(), large_utf8()}) {
+  for (auto string_type : {utf8(), utf8_view(), large_utf8()}) {
     for (auto unit : TimeUnit::values()) {
       CheckCast(ArrayFromJSON(duration(unit), "[0, null, 1234567, 2000]"),
                 ArrayFromJSON(string_type, R"(["0", null, "1234567", "2000"])"));
@@ -2047,31 +2055,41 @@ TEST(Cast, StringToDate) {
 }
 
 static void AssertBinaryZeroCopy(std::shared_ptr<Array> lhs, std::shared_ptr<Array> rhs) {
+  EXPECT_TRUE(is_base_binary_like(lhs->type_id()) || is_binary_view_like(lhs->type_id()));
+  EXPECT_EQ(is_base_binary_like(lhs->type_id()), is_base_binary_like(rhs->type_id()));
   // null bitmap and data buffers are always zero-copied
   AssertBufferSame(*lhs, *rhs, 0);
-  AssertBufferSame(*lhs, *rhs, 2);
-
-  if (offset_bit_width(lhs->type_id()) == offset_bit_width(rhs->type_id())) {
-    // offset buffer is zero copied if possible
-    AssertBufferSame(*lhs, *rhs, 1);
-    return;
+  if (is_base_binary_like(lhs->type_id())) {
+    AssertBufferSame(*lhs, *rhs, 2);
+  } else {
+    for (size_t i = 2; i < lhs->data()->buffers.size(); ++i) {
+      AssertBufferSame(*lhs, *rhs, i);
+    }
   }
 
-  // offset buffers are equivalent
-  ArrayVector offsets;
-  for (auto array : {lhs, rhs}) {
-    auto length = array->length();
-    auto buffer = array->data()->buffers[1];
-    offsets.push_back(offset_bit_width(array->type_id()) == 32
-                          ? *Cast(Int32Array(length, buffer), int64())
-                          : std::make_shared<Int64Array>(length, buffer));
+  if (is_base_binary_like(lhs->type_id())) {
+    if (offset_bit_width(lhs->type_id()) == offset_bit_width(rhs->type_id())) {
+      // offset buffer is zero copied if possible
+      AssertBufferSame(*lhs, *rhs, 1);
+      return;
+    }
+
+    // offset buffers are equivalent
+    ArrayVector offsets;
+    for (auto array : {lhs, rhs}) {
+      auto length = array->length();
+      auto buffer = array->data()->buffers[1];
+      offsets.push_back(offset_bit_width(array->type_id()) == 32
+                            ? *Cast(Int32Array(length, buffer), int64())
+                            : std::make_shared<Int64Array>(length, buffer));
+    }
+    AssertArraysEqual(*offsets[0], *offsets[1]);
   }
-  AssertArraysEqual(*offsets[0], *offsets[1]);
 }
 
 TEST(Cast, BinaryToString) {
-  for (auto bin_type : {binary(), large_binary()}) {
-    for (auto string_type : {utf8(), large_utf8()}) {
+  for (auto bin_type : {binary(), binary_view(), large_binary()}) {
+    for (auto string_type : {utf8(), utf8_view(), large_utf8()}) {
       // empty -> empty always works
       CheckCast(ArrayFromJSON(bin_type, "[]"), ArrayFromJSON(string_type, "[]"));
 
@@ -2089,13 +2107,15 @@ TEST(Cast, BinaryToString) {
       options.allow_invalid_utf8 = true;
       ASSERT_OK_AND_ASSIGN(auto strings, Cast(*invalid_utf8, string_type, options));
       ASSERT_RAISES(Invalid, strings->ValidateFull());
-      AssertBinaryZeroCopy(invalid_utf8, strings);
+      if (is_binary_view_like(*bin_type) == is_binary_view_like(*string_type)) {
+        AssertBinaryZeroCopy(invalid_utf8, strings);
+      }
     }
   }
 
   auto from_type = fixed_size_binary(3);
   auto invalid_utf8 = FixedSizeInvalidUtf8(from_type);
-  for (auto string_type : {utf8(), large_utf8()}) {
+  for (auto string_type : {utf8(), utf8_view(), large_utf8()}) {
     CheckCast(ArrayFromJSON(from_type, "[]"), ArrayFromJSON(string_type, "[]"));
 
     // invalid utf-8 masked by a null bit is not an error
@@ -2116,13 +2136,16 @@ TEST(Cast, BinaryToString) {
 
     // ARROW-16757: we no longer zero copy, but the contents are equal
     ASSERT_NE(invalid_utf8->data()->buffers[1].get(), strings->data()->buffers[2].get());
-    ASSERT_TRUE(invalid_utf8->data()->buffers[1]->Equals(*strings->data()->buffers[2]));
+    if (!is_binary_view_like(*string_type)) {
+      ASSERT_TRUE(invalid_utf8->data()->buffers[1]->Equals(*strings->data()->buffers[2]));
+    }
   }
 }
 
 TEST(Cast, BinaryOrStringToBinary) {
-  for (auto from_type : {utf8(), large_utf8(), binary(), large_binary()}) {
-    for (auto to_type : {binary(), large_binary()}) {
+  for (auto from_type :
+       {utf8(), utf8_view(), large_utf8(), binary(), binary_view(), large_binary()}) {
+    for (auto to_type : {binary(), binary_view(), large_binary()}) {
       // empty -> empty always works
       CheckCast(ArrayFromJSON(from_type, "[]"), ArrayFromJSON(to_type, "[]"));
 
@@ -2131,7 +2154,9 @@ TEST(Cast, BinaryOrStringToBinary) {
       // invalid utf-8 is not an error for binary
       ASSERT_OK_AND_ASSIGN(auto strings, Cast(*invalid_utf8, to_type));
       ValidateOutput(*strings);
-      AssertBinaryZeroCopy(invalid_utf8, strings);
+      if (is_binary_view_like(*from_type) == is_binary_view_like(*to_type)) {
+        AssertBinaryZeroCopy(invalid_utf8, strings);
+      }
 
       // invalid utf-8 masked by a null bit is not an error
       CheckCast(MaskArrayWithNullsAt(InvalidUtf8(from_type), {4}),
@@ -2143,7 +2168,7 @@ TEST(Cast, BinaryOrStringToBinary) {
   auto invalid_utf8 = FixedSizeInvalidUtf8(from_type);
   CheckCast(invalid_utf8, invalid_utf8);
   CheckCastFails(invalid_utf8, CastOptions::Safe(fixed_size_binary(5)));
-  for (auto to_type : {binary(), large_binary()}) {
+  for (auto to_type : {binary(), binary_view(), large_binary()}) {
     CheckCast(ArrayFromJSON(from_type, "[]"), ArrayFromJSON(to_type, "[]"));
     ASSERT_OK_AND_ASSIGN(auto strings, Cast(*invalid_utf8, to_type));
     ValidateOutput(*strings);
@@ -2153,7 +2178,9 @@ TEST(Cast, BinaryOrStringToBinary) {
 
     // ARROW-16757: we no longer zero copy, but the contents are equal
     ASSERT_NE(invalid_utf8->data()->buffers[1].get(), strings->data()->buffers[2].get());
-    ASSERT_TRUE(invalid_utf8->data()->buffers[1]->Equals(*strings->data()->buffers[2]));
+    if (!is_binary_view_like(*to_type)) {
+      ASSERT_TRUE(invalid_utf8->data()->buffers[1]->Equals(*strings->data()->buffers[2]));
+    }
 
     // invalid utf-8 masked by a null bit is not an error
     CheckCast(MaskArrayWithNullsAt(invalid_utf8, {4}),
@@ -2162,8 +2189,8 @@ TEST(Cast, BinaryOrStringToBinary) {
 }
 
 TEST(Cast, StringToString) {
-  for (auto from_type : {utf8(), large_utf8()}) {
-    for (auto to_type : {utf8(), large_utf8()}) {
+  for (auto from_type : {utf8(), utf8_view(), large_utf8()}) {
+    for (auto to_type : {utf8(), utf8_view(), large_utf8()}) {
       // empty -> empty always works
       CheckCast(ArrayFromJSON(from_type, "[]"), ArrayFromJSON(to_type, "[]"));
 
@@ -2179,13 +2206,27 @@ TEST(Cast, StringToString) {
       // utf-8 is not checked by Cast when the origin guarantees utf-8
       ASSERT_OK_AND_ASSIGN(auto strings, Cast(*invalid_utf8, to_type, options));
       ASSERT_RAISES(Invalid, strings->ValidateFull());
-      AssertBinaryZeroCopy(invalid_utf8, strings);
+      if (is_binary_view_like(*from_type) == is_binary_view_like(*to_type)) {
+        AssertBinaryZeroCopy(invalid_utf8, strings);
+      }
+
+      auto short_input = R"(["foo", null, "bar", "baz", "quu"])";
+      auto long_input = R"(["foofoofoofoofoo", null, "barbarbarbarbarbarbar",
+          "bazbazbazbazbazbazbaz", "quuquuquuquuquuquuquuquuquu"])";
+      auto combine_input = R"(["foo", null, "barbarbarbarbarbarbar", "baz", "quu"])";
+
+      CheckCast(ArrayFromJSON(from_type, short_input),
+                ArrayFromJSON(to_type, short_input));
+      CheckCast(ArrayFromJSON(from_type, long_input), ArrayFromJSON(to_type, long_input));
+      CheckCast(ArrayFromJSON(from_type, combine_input),
+                ArrayFromJSON(to_type, combine_input));
     }
   }
 }
 
 TEST(Cast, BinaryOrStringToFixedSizeBinary) {
-  for (auto in_type : {utf8(), large_utf8(), binary(), large_binary()}) {
+  for (auto in_type :
+       {utf8(), large_utf8(), utf8_view(), binary(), binary_view(), large_binary()}) {
     auto valid_input = ArrayFromJSON(in_type, R"(["foo", null, "bar", "baz", "quu"])");
     auto invalid_input = ArrayFromJSON(in_type, R"(["foo", null, "bar", "baz", "quux"])");
 
@@ -2201,7 +2242,8 @@ TEST(Cast, BinaryOrStringToFixedSizeBinary) {
 }
 
 TEST(Cast, FixedSizeBinaryToBinaryOrString) {
-  for (auto out_type : {utf8(), large_utf8(), binary(), large_binary()}) {
+  for (auto out_type :
+       {utf8(), utf8_view(), large_utf8(), binary(), binary_view(), large_binary()}) {
     auto valid_input = ArrayFromJSON(fixed_size_binary(3), R"(["foo", null, "bar",
           "baz", "quu"])");
 
@@ -2214,7 +2256,8 @@ TEST(Cast, FixedSizeBinaryToBinaryOrString) {
 }
 
 TEST(Cast, FixedSizeBinaryToBinaryOrStringWithSlice) {
-  for (auto out_type : {utf8(), large_utf8(), binary(), large_binary()}) {
+  for (auto out_type :
+       {utf8(), utf8_view(), large_utf8(), binary(), binary_view(), large_binary()}) {
     auto valid_input = ArrayFromJSON(fixed_size_binary(3), R"(["foo", null, "bar",
                 "baz", "quu"])");
     auto sliced = valid_input->Slice(1, 3);
@@ -2228,7 +2271,7 @@ TEST(Cast, FixedSizeBinaryToBinaryOrStringWithSlice) {
 }
 
 TEST(Cast, IntToString) {
-  for (auto string_type : {utf8(), large_utf8()}) {
+  for (auto string_type : {utf8(), utf8_view(), large_utf8()}) {
     CheckCast(ArrayFromJSON(int8(), "[0, 1, 127, -128, null]"),
               ArrayFromJSON(string_type, R"(["0", "1", "127", "-128", null])"));
 
@@ -2261,7 +2304,7 @@ TEST(Cast, IntToString) {
 
 TEST(Cast, FloatingToString) {
   for (auto float_type : {float16(), float32(), float64()}) {
-    for (auto string_type : {utf8(), large_utf8()}) {
+    for (auto string_type : {utf8(), utf8_view(), large_utf8()}) {
       CheckCast(ArrayFromJSON(float_type, "[0.0, -0.0, 1.5, -Inf, Inf, NaN, null]"),
                 ArrayFromJSON(string_type,
                               R"(["0", "-0", "1.5", "-inf", "inf", "nan", null])"));
@@ -2270,7 +2313,7 @@ TEST(Cast, FloatingToString) {
 }
 
 TEST(Cast, BooleanToString) {
-  for (auto string_type : {utf8(), large_utf8()}) {
+  for (auto string_type : {utf8(), utf8_view(), large_utf8()}) {
     CheckCast(ArrayFromJSON(boolean(), "[true, true, false, null]"),
               ArrayFromJSON(string_type, R"(["true", "true", "false", null])"));
   }
@@ -2913,9 +2956,12 @@ TEST(Cast, IdentityCasts) {
   for (auto type : kNumericTypes) {
     CheckIdentityCast(type, "[1, 2, null, 4]");
   }
-  CheckIdentityCast(binary(), R"(["foo", "bar"])");
-  CheckIdentityCast(utf8(), R"(["foo", "bar"])");
-  CheckIdentityCast(fixed_size_binary(3), R"(["foo", "bar"])");
+  const std::string json = R"(["foo", "bar"])";
+  CheckIdentityCast(utf8(), json);
+  CheckIdentityCast(binary(), json);
+  CheckIdentityCast(utf8_view(), json);
+  CheckIdentityCast(binary_view(), json);
+  CheckIdentityCast(fixed_size_binary(3), json);
 
   CheckIdentityCast(list(int8()), "[[1, 2], [null], [], [3]]");
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_round_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_round_benchmark.cc
index 3c5bb76dc24e9..7f1b5ef710379 100644
--- a/cpp/src/arrow/compute/kernels/scalar_round_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_round_benchmark.cc
@@ -122,26 +122,28 @@ void SetRoundArgs(benchmark::internal::Benchmark* bench) {
   BENCHMARK_TEMPLATE(BENCHMARK, OP, DoubleType)->Apply(SetRoundArgs);
 
 #ifdef ALL_ROUND_BENCHMARKS
-#define DECLARE_ROUND_BENCHMARKS_WITH_ROUNDMODE(BENCHMARK, OP, TYPE)                     \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::DOWN)->Apply(SetRoundArgs);         \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::UP)->Apply(SetRoundArgs);           \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::TOWARDS_ZERO)->Apply(SetRoundArgs); \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::TOWARDS_INFINITY)                   \
-      ->Apply(SetRoundArgs);                                                             \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_DOWN)->Apply(SetRoundArgs);    \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_UP)->Apply(SetRoundArgs);      \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TOWARDS_ZERO)                  \
-      ->Apply(SetRoundArgs);                                                             \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TOWARDS_INFINITY)              \
-      ->Apply(SetRoundArgs);                                                             \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TO_EVEN)->Apply(SetRoundArgs); \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TO_ODD)->Apply(SetRoundArgs)
+#  define DECLARE_ROUND_BENCHMARKS_WITH_ROUNDMODE(BENCHMARK, OP, TYPE)                  \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::DOWN)->Apply(SetRoundArgs);      \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::UP)->Apply(SetRoundArgs);        \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::TOWARDS_ZERO)                    \
+        ->Apply(SetRoundArgs);                                                          \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::TOWARDS_INFINITY)                \
+        ->Apply(SetRoundArgs);                                                          \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_DOWN)->Apply(SetRoundArgs); \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_UP)->Apply(SetRoundArgs);   \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TOWARDS_ZERO)               \
+        ->Apply(SetRoundArgs);                                                          \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TOWARDS_INFINITY)           \
+        ->Apply(SetRoundArgs);                                                          \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TO_EVEN)                    \
+        ->Apply(SetRoundArgs);                                                          \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TO_ODD)->Apply(SetRoundArgs)
 #else
-#define DECLARE_ROUND_BENCHMARKS_WITH_ROUNDMODE(BENCHMARK, OP, TYPE)             \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::DOWN)->Apply(SetRoundArgs); \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TOWARDS_ZERO)          \
-      ->Apply(SetRoundArgs);                                                     \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TO_ODD)->Apply(SetRoundArgs)
+#  define DECLARE_ROUND_BENCHMARKS_WITH_ROUNDMODE(BENCHMARK, OP, TYPE)             \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::DOWN)->Apply(SetRoundArgs); \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TOWARDS_ZERO)          \
+        ->Apply(SetRoundArgs);                                                     \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TO_ODD)->Apply(SetRoundArgs)
 #endif
 
 #define DECLARE_ROUND_BENCHMARKS(BENCHMARK, OP)                       \
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
index fecd57412b436..e58f7b065a8e5 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
@@ -30,7 +30,7 @@
 #include "arrow/util/value_parsing.h"
 
 #ifdef ARROW_WITH_RE2
-#include <re2/re2.h>
+#  include <re2/re2.h>
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index 0a2261290846a..59a22b9926456 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -34,7 +34,7 @@
 #include "arrow/util/value_parsing.h"
 
 #ifdef ARROW_WITH_UTF8PROC
-#include <utf8proc.h>
+#  include <utf8proc.h>
 #endif
 
 namespace arrow::compute {
@@ -1415,7 +1415,7 @@ TYPED_TEST(TestStringKernels, IsTitleUnicode) {
 }
 
 // Older versions of utf8proc fail
-#if !(UTF8PROC_VERSION_MAJOR <= 2 && UTF8PROC_VERSION_MINOR < 5)
+#  if !(UTF8PROC_VERSION_MAJOR <= 2 && UTF8PROC_VERSION_MINOR < 5)
 
 TYPED_TEST(TestStringKernels, IsUpperUnicode) {
   // ٣ is arabic 3 (decimal), Φ capital
@@ -1437,7 +1437,7 @@ TYPED_TEST(TestStringKernels, IsUpperUnicode) {
                    boolean(), "[true, true, true, false, true, false]");
 }
 
-#endif  // UTF8PROC_VERSION_MINOR >= 5
+#  endif  // UTF8PROC_VERSION_MINOR >= 5
 
 #endif  // ARROW_WITH_UTF8PROC
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc b/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc
index 42762ca8b116f..cf248b7c9f879 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc
@@ -24,7 +24,7 @@
 #include "arrow/util/utf8_internal.h"
 
 #ifdef ARROW_WITH_UTF8PROC
-#include <utf8proc.h>
+#  include <utf8proc.h>
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/compute/key_hash_internal.h b/cpp/src/arrow/compute/key_hash_internal.h
index 1f25beb0e1622..582cf28732352 100644
--- a/cpp/src/arrow/compute/key_hash_internal.h
+++ b/cpp/src/arrow/compute/key_hash_internal.h
@@ -18,7 +18,7 @@
 #pragma once
 
 #if defined(ARROW_HAVE_RUNTIME_AVX2)
-#include <immintrin.h>
+#  include <immintrin.h>
 #endif
 
 #include <cstdint>
diff --git a/cpp/src/arrow/compute/light_array_internal.cc b/cpp/src/arrow/compute/light_array_internal.cc
index 4f235925d0fb6..e4b1f1b8cdd63 100644
--- a/cpp/src/arrow/compute/light_array_internal.cc
+++ b/cpp/src/arrow/compute/light_array_internal.cc
@@ -118,10 +118,9 @@ Result<KeyColumnMetadata> ColumnMetadataFromDataType(
     const std::shared_ptr<DataType>& type) {
   const bool is_extension = type->id() == Type::EXTENSION;
   const std::shared_ptr<DataType>& typ =
-      is_extension
-          ? arrow::internal::checked_pointer_cast<ExtensionType>(type->GetSharedPtr())
-                ->storage_type()
-          : type;
+      is_extension ? arrow::internal::checked_cast<const ExtensionType*>(type.get())
+                         ->storage_type()
+                   : type;
 
   if (typ->id() == Type::DICTIONARY) {
     auto bit_width =
@@ -205,22 +204,25 @@ Status ColumnArraysFromExecBatch(const ExecBatch& batch,
                                    column_arrays);
 }
 
-void ResizableArrayData::Init(const std::shared_ptr<DataType>& data_type,
-                              MemoryPool* pool, int log_num_rows_min) {
+Status ResizableArrayData::Init(const std::shared_ptr<DataType>& data_type,
+                                MemoryPool* pool, int log_num_rows_min) {
 #ifndef NDEBUG
   if (num_rows_allocated_ > 0) {
-    ARROW_DCHECK(data_type_ != NULLPTR);
-    KeyColumnMetadata metadata_before =
-        ColumnMetadataFromDataType(data_type_).ValueOrDie();
-    KeyColumnMetadata metadata_after = ColumnMetadataFromDataType(data_type).ValueOrDie();
+    ARROW_DCHECK(data_type_ != nullptr);
+    const KeyColumnMetadata& metadata_before = column_metadata_;
+    ARROW_ASSIGN_OR_RAISE(KeyColumnMetadata metadata_after,
+                          ColumnMetadataFromDataType(data_type));
     ARROW_DCHECK(metadata_before.is_fixed_length == metadata_after.is_fixed_length &&
                  metadata_before.fixed_length == metadata_after.fixed_length);
   }
 #endif
+  ARROW_DCHECK(data_type != nullptr);
+  ARROW_ASSIGN_OR_RAISE(column_metadata_, ColumnMetadataFromDataType(data_type));
   Clear(/*release_buffers=*/false);
   log_num_rows_min_ = log_num_rows_min;
   data_type_ = data_type;
   pool_ = pool;
+  return Status::OK();
 }
 
 void ResizableArrayData::Clear(bool release_buffers) {
@@ -246,8 +248,6 @@ Status ResizableArrayData::ResizeFixedLengthBuffers(int num_rows_new) {
     num_rows_allocated_new *= 2;
   }
 
-  KeyColumnMetadata column_metadata = ColumnMetadataFromDataType(data_type_).ValueOrDie();
-
   if (buffers_[kFixedLengthBuffer] == NULLPTR) {
     ARROW_DCHECK(buffers_[kValidityBuffer] == NULLPTR &&
                  buffers_[kVariableLengthBuffer] == NULLPTR);
@@ -258,8 +258,8 @@ Status ResizableArrayData::ResizeFixedLengthBuffers(int num_rows_new) {
             bit_util::BytesForBits(num_rows_allocated_new) + kNumPaddingBytes, pool_));
     memset(mutable_data(kValidityBuffer), 0,
            bit_util::BytesForBits(num_rows_allocated_new) + kNumPaddingBytes);
-    if (column_metadata.is_fixed_length) {
-      if (column_metadata.fixed_length == 0) {
+    if (column_metadata_.is_fixed_length) {
+      if (column_metadata_.fixed_length == 0) {
         ARROW_ASSIGN_OR_RAISE(
             buffers_[kFixedLengthBuffer],
             AllocateResizableBuffer(
@@ -271,7 +271,7 @@ Status ResizableArrayData::ResizeFixedLengthBuffers(int num_rows_new) {
         ARROW_ASSIGN_OR_RAISE(
             buffers_[kFixedLengthBuffer],
             AllocateResizableBuffer(
-                num_rows_allocated_new * column_metadata.fixed_length + kNumPaddingBytes,
+                num_rows_allocated_new * column_metadata_.fixed_length + kNumPaddingBytes,
                 pool_));
       }
     } else {
@@ -300,15 +300,15 @@ Status ResizableArrayData::ResizeFixedLengthBuffers(int num_rows_new) {
     memset(mutable_data(kValidityBuffer) + bytes_for_bits_before, 0,
            bytes_for_bits_after - bytes_for_bits_before);
 
-    if (column_metadata.is_fixed_length) {
-      if (column_metadata.fixed_length == 0) {
+    if (column_metadata_.is_fixed_length) {
+      if (column_metadata_.fixed_length == 0) {
         RETURN_NOT_OK(buffers_[kFixedLengthBuffer]->Resize(
             bit_util::BytesForBits(num_rows_allocated_new) + kNumPaddingBytes));
         memset(mutable_data(kFixedLengthBuffer) + bytes_for_bits_before, 0,
                bytes_for_bits_after - bytes_for_bits_before);
       } else {
         RETURN_NOT_OK(buffers_[kFixedLengthBuffer]->Resize(
-            num_rows_allocated_new * column_metadata.fixed_length + kNumPaddingBytes));
+            num_rows_allocated_new * column_metadata_.fixed_length + kNumPaddingBytes));
       }
     } else {
       RETURN_NOT_OK(buffers_[kFixedLengthBuffer]->Resize(
@@ -323,10 +323,7 @@ Status ResizableArrayData::ResizeFixedLengthBuffers(int num_rows_new) {
 }
 
 Status ResizableArrayData::ResizeVaryingLengthBuffer() {
-  KeyColumnMetadata column_metadata;
-  column_metadata = ColumnMetadataFromDataType(data_type_).ValueOrDie();
-
-  if (!column_metadata.is_fixed_length) {
+  if (!column_metadata_.is_fixed_length) {
     int64_t min_new_size = buffers_[kFixedLengthBuffer]->data_as<int32_t>()[num_rows_];
     ARROW_DCHECK(var_len_buf_size_ > 0);
     if (var_len_buf_size_ < min_new_size) {
@@ -343,23 +340,19 @@ Status ResizableArrayData::ResizeVaryingLengthBuffer() {
 }
 
 KeyColumnArray ResizableArrayData::column_array() const {
-  KeyColumnMetadata column_metadata;
-  column_metadata = ColumnMetadataFromDataType(data_type_).ValueOrDie();
-  return KeyColumnArray(column_metadata, num_rows_,
+  return KeyColumnArray(column_metadata_, num_rows_,
                         buffers_[kValidityBuffer]->mutable_data(),
                         buffers_[kFixedLengthBuffer]->mutable_data(),
                         buffers_[kVariableLengthBuffer]->mutable_data());
 }
 
 std::shared_ptr<ArrayData> ResizableArrayData::array_data() const {
-  KeyColumnMetadata column_metadata;
-  column_metadata = ColumnMetadataFromDataType(data_type_).ValueOrDie();
-
-  auto valid_count = arrow::internal::CountSetBits(
-      buffers_[kValidityBuffer]->data(), /*offset=*/0, static_cast<int64_t>(num_rows_));
+  auto valid_count =
+      arrow::internal::CountSetBits(buffers_[kValidityBuffer]->data(), /*bit_offset=*/0,
+                                    static_cast<int64_t>(num_rows_));
   int null_count = static_cast<int>(num_rows_) - static_cast<int>(valid_count);
 
-  if (column_metadata.is_fixed_length) {
+  if (column_metadata_.is_fixed_length) {
     return ArrayData::Make(data_type_, num_rows_,
                            {buffers_[kValidityBuffer], buffers_[kFixedLengthBuffer]},
                            null_count);
@@ -493,10 +486,12 @@ Status ExecBatchBuilder::AppendSelected(const std::shared_ptr<ArrayData>& source
   ARROW_DCHECK(num_rows_before >= 0);
   int num_rows_after = num_rows_before + num_rows_to_append;
   if (target->num_rows() == 0) {
-    target->Init(source->type, pool, kLogNumRows);
+    RETURN_NOT_OK(target->Init(source->type, pool, kLogNumRows));
   }
   RETURN_NOT_OK(target->ResizeFixedLengthBuffers(num_rows_after));
 
+  // Since target->Init is called before, we can assume that the ColumnMetadata
+  // would never fail to be created
   KeyColumnMetadata column_metadata =
       ColumnMetadataFromDataType(source->type).ValueOrDie();
 
@@ -647,11 +642,12 @@ Status ExecBatchBuilder::AppendNulls(const std::shared_ptr<DataType>& type,
   int num_rows_before = target.num_rows();
   int num_rows_after = num_rows_before + num_rows_to_append;
   if (target.num_rows() == 0) {
-    target.Init(type, pool, kLogNumRows);
+    RETURN_NOT_OK(target.Init(type, pool, kLogNumRows));
   }
   RETURN_NOT_OK(target.ResizeFixedLengthBuffers(num_rows_after));
 
-  KeyColumnMetadata column_metadata = ColumnMetadataFromDataType(type).ValueOrDie();
+  ARROW_ASSIGN_OR_RAISE(KeyColumnMetadata column_metadata,
+                        ColumnMetadataFromDataType(type));
 
   // Process fixed length buffer
   //
@@ -708,7 +704,7 @@ Status ExecBatchBuilder::AppendSelected(MemoryPool* pool, const ExecBatch& batch
       const Datum& data = batch.values[col_ids ? col_ids[i] : i];
       ARROW_DCHECK(data.is_array());
       const std::shared_ptr<ArrayData>& array_data = data.array();
-      values_[i].Init(array_data->type, pool, kLogNumRows);
+      RETURN_NOT_OK(values_[i].Init(array_data->type, pool, kLogNumRows));
     }
   }
 
@@ -739,7 +735,7 @@ Status ExecBatchBuilder::AppendNulls(MemoryPool* pool,
   if (values_.empty()) {
     values_.resize(types.size());
     for (size_t i = 0; i < types.size(); ++i) {
-      values_[i].Init(types[i], pool, kLogNumRows);
+      RETURN_NOT_OK(values_[i].Init(types[i], pool, kLogNumRows));
     }
   }
 
diff --git a/cpp/src/arrow/compute/light_array_internal.h b/cpp/src/arrow/compute/light_array_internal.h
index 995c4211998e0..5adb06e540009 100644
--- a/cpp/src/arrow/compute/light_array_internal.h
+++ b/cpp/src/arrow/compute/light_array_internal.h
@@ -65,12 +65,12 @@ struct ARROW_EXPORT KeyColumnMetadata {
   /// If this is true the column will have a validity buffer and
   /// a data buffer and the third buffer will be unused.
   bool is_fixed_length;
-  /// \brief True if this column is the null type
+  /// \brief True if this column is the null type(NA).
   bool is_null_type;
   /// \brief The number of bytes for each item
   ///
   /// Zero has a special meaning, indicating a bit vector with one bit per value if it
-  /// isn't a null type column.
+  /// isn't a null type column. Generally, this means that the column is a boolean type.
   ///
   /// For a varying-length binary column this represents the number of bytes per offset.
   uint32_t fixed_length;
@@ -295,8 +295,8 @@ class ARROW_EXPORT ResizableArrayData {
   /// \param pool The pool to make allocations on
   /// \param log_num_rows_min All resize operations will allocate at least enough
   ///                         space for (1 << log_num_rows_min) rows
-  void Init(const std::shared_ptr<DataType>& data_type, MemoryPool* pool,
-            int log_num_rows_min);
+  Status Init(const std::shared_ptr<DataType>& data_type, MemoryPool* pool,
+              int log_num_rows_min);
 
   /// \brief Resets the array back to an empty state
   /// \param release_buffers If true then allocated memory is released and the
@@ -351,6 +351,8 @@ class ARROW_EXPORT ResizableArrayData {
   static constexpr int64_t kNumPaddingBytes = 64;
   int log_num_rows_min_;
   std::shared_ptr<DataType> data_type_;
+  // Would be valid if data_type_ != NULLPTR.
+  KeyColumnMetadata column_metadata_{};
   MemoryPool* pool_;
   int num_rows_;
   int num_rows_allocated_;
@@ -403,7 +405,7 @@ class ARROW_EXPORT ExecBatchBuilder {
 
   int num_rows() const { return values_.empty() ? 0 : values_[0].num_rows(); }
 
-  static int num_rows_max() { return 1 << kLogNumRows; }
+  static constexpr int num_rows_max() { return 1 << kLogNumRows; }
 
  private:
   static constexpr int kLogNumRows = 15;
diff --git a/cpp/src/arrow/compute/light_array_test.cc b/cpp/src/arrow/compute/light_array_test.cc
index cc02d489d138f..98a1ab8b7acae 100644
--- a/cpp/src/arrow/compute/light_array_test.cc
+++ b/cpp/src/arrow/compute/light_array_test.cc
@@ -295,7 +295,7 @@ TEST(ResizableArrayData, Basic) {
         arrow::internal::checked_pointer_cast<FixedWidthType>(type)->bit_width() / 8;
     {
       ResizableArrayData array;
-      array.Init(type, pool.get(), /*log_num_rows_min=*/16);
+      ASSERT_OK(array.Init(type, pool.get(), /*log_num_rows_min=*/16));
       ASSERT_EQ(0, array.num_rows());
       ASSERT_OK(array.ResizeFixedLengthBuffers(2));
       ASSERT_EQ(2, array.num_rows());
@@ -330,7 +330,7 @@ TEST(ResizableArrayData, Binary) {
     ARROW_SCOPED_TRACE("Type: ", type->ToString());
     {
       ResizableArrayData array;
-      array.Init(type, pool.get(), /*log_num_rows_min=*/4);
+      ASSERT_OK(array.Init(type, pool.get(), /*log_num_rows_min=*/4));
       ASSERT_EQ(0, array.num_rows());
       ASSERT_OK(array.ResizeFixedLengthBuffers(2));
       ASSERT_EQ(2, array.num_rows());
diff --git a/cpp/src/arrow/compute/row/grouper.cc b/cpp/src/arrow/compute/row/grouper.cc
index 5889f94d96c79..2b79539a3b0c2 100644
--- a/cpp/src/arrow/compute/row/grouper.cc
+++ b/cpp/src/arrow/compute/row/grouper.cc
@@ -332,38 +332,6 @@ Result<std::unique_ptr<RowSegmenter>> RowSegmenter::Make(
 
 namespace {
 
-struct GrouperNoKeysImpl : Grouper {
-  Result<std::shared_ptr<Array>> MakeConstantGroupIdArray(int64_t length,
-                                                          group_id_t value) {
-    std::unique_ptr<ArrayBuilder> a_builder;
-    RETURN_NOT_OK(MakeBuilder(default_memory_pool(), g_group_id_type, &a_builder));
-    using GroupIdBuilder = typename TypeTraits<GroupIdType>::BuilderType;
-    auto builder = checked_cast<GroupIdBuilder*>(a_builder.get());
-    if (length != 0) {
-      RETURN_NOT_OK(builder->Resize(length));
-    }
-    for (int64_t i = 0; i < length; i++) {
-      builder->UnsafeAppend(value);
-    }
-    std::shared_ptr<Array> array;
-    RETURN_NOT_OK(builder->Finish(&array));
-    return array;
-  }
-  Status Reset() override { return Status::OK(); }
-  Result<Datum> Consume(const ExecSpan& batch, int64_t offset, int64_t length) override {
-    ARROW_ASSIGN_OR_RAISE(auto array, MakeConstantGroupIdArray(length, 0));
-    return Datum(array);
-  }
-  Result<ExecBatch> GetUniques() override {
-    auto data = ArrayData::Make(uint32(), 1, 0);
-    auto values = data->GetMutableValues<uint32_t>(0);
-    values[0] = 0;
-    ExecBatch out({Datum(data)}, 1);
-    return out;
-  }
-  uint32_t num_groups() const override { return 1; }
-};
-
 struct GrouperImpl : public Grouper {
   static Result<std::unique_ptr<GrouperImpl>> Make(
       const std::vector<TypeHolder>& key_types, ExecContext* ctx) {
diff --git a/cpp/src/arrow/compute/row/grouper.h b/cpp/src/arrow/compute/row/grouper.h
index a883fb938ddaf..1d2aaae9dffb0 100644
--- a/cpp/src/arrow/compute/row/grouper.h
+++ b/cpp/src/arrow/compute/row/grouper.h
@@ -57,13 +57,13 @@ inline bool operator!=(const Segment& segment1, const Segment& segment2) {
 
 /// \brief a helper class to divide a batch into segments of equal values
 ///
-/// For example, given a batch with two rows:
+/// For example, given a batch with two columns specifed as segment keys:
 ///
-/// A A
-/// A A
-/// A B
-/// A B
-/// A A
+/// A A [other columns]...
+/// A A ...
+/// A B ...
+/// A B ...
+/// A A ...
 ///
 /// Then the batch could be divided into 3 segments.  The first would be rows 0 & 1,
 /// the second would be rows 2 & 3, and the third would be row 4.
diff --git a/cpp/src/arrow/compute/row/row_encoder_internal.cc b/cpp/src/arrow/compute/row/row_encoder_internal.cc
index 414cc6793a5a3..0965e4e8f9571 100644
--- a/cpp/src/arrow/compute/row/row_encoder_internal.cc
+++ b/cpp/src/arrow/compute/row/row_encoder_internal.cc
@@ -145,41 +145,37 @@ void FixedWidthKeyEncoder::AddLengthNull(int32_t* length) {
 
 Status FixedWidthKeyEncoder::Encode(const ExecValue& data, int64_t batch_length,
                                     uint8_t** encoded_bytes) {
+  auto handle_next_valid_value = [&](std::string_view bytes) {
+    auto& encoded_ptr = *encoded_bytes++;
+    *encoded_ptr++ = kValidByte;
+    memcpy(encoded_ptr, bytes.data(), byte_width_);
+    encoded_ptr += byte_width_;
+  };
+  auto handle_next_null_value = [&] {
+    auto& encoded_ptr = *encoded_bytes++;
+    *encoded_ptr++ = kNullByte;
+    memset(encoded_ptr, 0, byte_width_);
+    encoded_ptr += byte_width_;
+  };
   if (data.is_array()) {
     ArraySpan viewed = data.array;
+    // The original type might not be FixedSizeBinaryType, but it would
+    // treat the input as binary data.
     auto view_ty = fixed_size_binary(byte_width_);
     viewed.type = view_ty.get();
-    VisitArraySpanInline<FixedSizeBinaryType>(
-        viewed,
-        [&](std::string_view bytes) {
-          auto& encoded_ptr = *encoded_bytes++;
-          *encoded_ptr++ = kValidByte;
-          memcpy(encoded_ptr, bytes.data(), byte_width_);
-          encoded_ptr += byte_width_;
-        },
-        [&] {
-          auto& encoded_ptr = *encoded_bytes++;
-          *encoded_ptr++ = kNullByte;
-          memset(encoded_ptr, 0, byte_width_);
-          encoded_ptr += byte_width_;
-        });
+    VisitArraySpanInline<FixedSizeBinaryType>(viewed, handle_next_valid_value,
+                                              handle_next_null_value);
   } else {
     const auto& scalar = data.scalar_as<arrow::internal::PrimitiveScalarBase>();
     if (scalar.is_valid) {
-      const std::string_view data = scalar.view();
-      DCHECK_EQ(data.size(), static_cast<size_t>(byte_width_));
+      const std::string_view scalar_data = scalar.view();
+      DCHECK_EQ(scalar_data.size(), static_cast<size_t>(byte_width_));
       for (int64_t i = 0; i < batch_length; i++) {
-        auto& encoded_ptr = *encoded_bytes++;
-        *encoded_ptr++ = kValidByte;
-        memcpy(encoded_ptr, data.data(), data.size());
-        encoded_ptr += byte_width_;
+        handle_next_valid_value(scalar_data);
       }
     } else {
       for (int64_t i = 0; i < batch_length; i++) {
-        auto& encoded_ptr = *encoded_bytes++;
-        *encoded_ptr++ = kNullByte;
-        memset(encoded_ptr, 0, byte_width_);
-        encoded_ptr += byte_width_;
+        handle_next_null_value();
       }
     }
   }
@@ -267,11 +263,11 @@ void RowEncoder::Init(const std::vector<TypeHolder>& column_types, ExecContext*
 
   for (size_t i = 0; i < column_types.size(); ++i) {
     const bool is_extension = column_types[i].id() == Type::EXTENSION;
-    const TypeHolder& type = is_extension
-                                 ? arrow::internal::checked_pointer_cast<ExtensionType>(
-                                       column_types[i].GetSharedPtr())
-                                       ->storage_type()
-                                 : column_types[i];
+    const TypeHolder& type =
+        is_extension
+            ? arrow::internal::checked_cast<const ExtensionType*>(column_types[i].type)
+                  ->storage_type()
+            : column_types[i];
 
     if (is_extension) {
       extension_types_[i] = arrow::internal::checked_pointer_cast<ExtensionType>(
@@ -379,7 +375,7 @@ Result<ExecBatch> RowEncoder::Decode(int64_t num_rows, const int32_t* row_ids) {
       ARROW_ASSIGN_OR_RAISE(out.values[i], ::arrow::internal::GetArrayView(
                                                column_array_data, extension_types_[i]))
     } else {
-      out.values[i] = column_array_data;
+      out.values[i] = std::move(column_array_data);
     }
   }
 
diff --git a/cpp/src/arrow/compute/row/row_encoder_internal.h b/cpp/src/arrow/compute/row/row_encoder_internal.h
index 60eb14af504f7..4d6cc34af2342 100644
--- a/cpp/src/arrow/compute/row/row_encoder_internal.h
+++ b/cpp/src/arrow/compute/row/row_encoder_internal.h
@@ -38,16 +38,41 @@ struct ARROW_EXPORT KeyEncoder {
 
   virtual ~KeyEncoder() = default;
 
+  // Increment the values in the lengths array by the length of the encoded key for the
+  // corresponding value in the given column.
+  //
+  // Generally if Encoder is for a fixed-width type, the length of the encoded key
+  // would add ExtraByteForNull + byte_width.
+  // If Encoder is for a variable-width type, the length would add ExtraByteForNull +
+  // sizeof(Offset) + buffer_size.
+  // If Encoder is for null type, the length would add 0.
   virtual void AddLength(const ExecValue& value, int64_t batch_length,
                          int32_t* lengths) = 0;
 
+  // Increment the length by the length of an encoded null value.
+  // It's a special case for AddLength like `AddLength(Null-Scalar, 1, lengths)`.
   virtual void AddLengthNull(int32_t* length) = 0;
 
+  // Encode the column into the encoded_bytes, which is an array of pointers to each row
+  // buffer.
+  //
+  // If value is an array, the array-size should be batch_length.
+  // If value is a scalar, the value would repeat batch_length times.
+  // NB: The pointers in the encoded_bytes will be advanced as values being encoded into.
   virtual Status Encode(const ExecValue&, int64_t batch_length,
                         uint8_t** encoded_bytes) = 0;
 
+  // Encode a null value into the encoded_bytes, which is an array of pointers to each row
+  // buffer.
+  //
+  // It's a special case for Encode like `Encode(Null-Scalar, 1, encoded_bytes)`.
+  // NB: The pointers in the encoded_bytes will be advanced as values being encoded into.
   virtual void EncodeNull(uint8_t** encoded_bytes) = 0;
 
+  // Decode the encoded key from the encoded_bytes, which is an array of pointers to each
+  // row buffer, into an ArrayData.
+  //
+  // NB: The pointers in the encoded_bytes will be advanced as values being decoded from.
   virtual Result<std::shared_ptr<ArrayData>> Decode(uint8_t** encoded_bytes,
                                                     int32_t length, MemoryPool*) = 0;
 
@@ -94,7 +119,7 @@ struct ARROW_EXPORT FixedWidthKeyEncoder : KeyEncoder {
                                             MemoryPool* pool) override;
 
   std::shared_ptr<DataType> type_;
-  int byte_width_;
+  const int byte_width_;
 };
 
 struct ARROW_EXPORT DictionaryKeyEncoder : FixedWidthKeyEncoder {
@@ -118,6 +143,7 @@ struct ARROW_EXPORT VarLengthKeyEncoder : KeyEncoder {
   void AddLength(const ExecValue& data, int64_t batch_length, int32_t* lengths) override {
     if (data.is_array()) {
       int64_t i = 0;
+      ARROW_DCHECK_EQ(data.array.length, batch_length);
       VisitArraySpanInline<T>(
           data.array,
           [&](std::string_view bytes) {
@@ -142,41 +168,34 @@ struct ARROW_EXPORT VarLengthKeyEncoder : KeyEncoder {
 
   Status Encode(const ExecValue& data, int64_t batch_length,
                 uint8_t** encoded_bytes) override {
+    auto handle_next_valid_value = [&encoded_bytes](std::string_view bytes) {
+      auto& encoded_ptr = *encoded_bytes++;
+      *encoded_ptr++ = kValidByte;
+      util::SafeStore(encoded_ptr, static_cast<Offset>(bytes.size()));
+      encoded_ptr += sizeof(Offset);
+      memcpy(encoded_ptr, bytes.data(), bytes.size());
+      encoded_ptr += bytes.size();
+    };
+    auto handle_next_null_value = [&encoded_bytes]() {
+      auto& encoded_ptr = *encoded_bytes++;
+      *encoded_ptr++ = kNullByte;
+      util::SafeStore(encoded_ptr, static_cast<Offset>(0));
+      encoded_ptr += sizeof(Offset);
+    };
     if (data.is_array()) {
-      VisitArraySpanInline<T>(
-          data.array,
-          [&](std::string_view bytes) {
-            auto& encoded_ptr = *encoded_bytes++;
-            *encoded_ptr++ = kValidByte;
-            util::SafeStore(encoded_ptr, static_cast<Offset>(bytes.size()));
-            encoded_ptr += sizeof(Offset);
-            memcpy(encoded_ptr, bytes.data(), bytes.size());
-            encoded_ptr += bytes.size();
-          },
-          [&] {
-            auto& encoded_ptr = *encoded_bytes++;
-            *encoded_ptr++ = kNullByte;
-            util::SafeStore(encoded_ptr, static_cast<Offset>(0));
-            encoded_ptr += sizeof(Offset);
-          });
+      DCHECK_EQ(data.length(), batch_length);
+      VisitArraySpanInline<T>(data.array, handle_next_valid_value,
+                              handle_next_null_value);
     } else {
       const auto& scalar = data.scalar_as<BaseBinaryScalar>();
       if (scalar.is_valid) {
-        const auto& bytes = *scalar.value;
+        const auto bytes = std::string_view{*scalar.value};
         for (int64_t i = 0; i < batch_length; i++) {
-          auto& encoded_ptr = *encoded_bytes++;
-          *encoded_ptr++ = kValidByte;
-          util::SafeStore(encoded_ptr, static_cast<Offset>(bytes.size()));
-          encoded_ptr += sizeof(Offset);
-          memcpy(encoded_ptr, bytes.data(), bytes.size());
-          encoded_ptr += bytes.size();
+          handle_next_valid_value(bytes);
         }
       } else {
         for (int64_t i = 0; i < batch_length; i++) {
-          auto& encoded_ptr = *encoded_bytes++;
-          *encoded_ptr++ = kNullByte;
-          util::SafeStore(encoded_ptr, static_cast<Offset>(0));
-          encoded_ptr += sizeof(Offset);
+          handle_next_null_value();
         }
       }
     }
@@ -250,6 +269,68 @@ struct ARROW_EXPORT NullKeyEncoder : KeyEncoder {
   }
 };
 
+/// RowEncoder encodes ExecSpan to a variable length byte sequence
+/// created by concatenating the encoded form of each column. The encoding
+/// for each column depends on its data type.
+///
+/// This is used to encode columns into row-major format, which will be
+/// beneficial for grouping and joining operations.
+///
+/// Unlike DuckDB and arrow-rs, currently this row format can not help
+/// sortings because the row-format is uncomparable.
+///
+/// # Key Column Encoding
+///
+/// The row format is composed of the the KeyColumn encodings for each,
+/// and the column is encoded as follows:
+/// 1. A null byte for each column, indicating whether the column is null.
+///    "1" for null, "0" for non-null.
+/// 2. The "fixed width" encoding for the column, it would exist whether
+///    the column is null or not.
+/// 3. The "variable payload" encoding for the column, it would exists only
+///    for non-null string/binary columns.
+///    For string/binary columns, the length of the payload is in
+///    "fixed width" part, and the binary contents are in the
+///    "variable payload" part.
+/// 4. Specially, if all columns in a row are null, the caller may decide
+///    to refer to kRowIdForNulls instead of actually encoding/decoding
+///    it using any KeyEncoder. See the comment for encoded_nulls_.
+///
+/// The endianness of the encoded bytes is platform-dependent.
+///
+/// ## Null Type
+///
+/// Null Type is a special case, it doesn't occupy any space in the
+/// encoded row.
+///
+/// ## Fixed Width Type
+///
+/// Fixed Width Type is encoded as a fixed-width byte sequence. For example:
+/// ```
+/// Int8: 5, null, 6
+/// ```
+/// Would be encoded as [0 5], [1 0], [0 6].
+///
+/// ### Dictionary Type
+///
+/// Dictionary Type is encoded as a fixed-width byte sequence using
+/// dictionary  indices, the dictionary should be identical for all
+/// rows.
+///
+/// ## Variable Width Type
+///
+/// Variable Width Type is encoded as:
+/// [null byte, variable-byte length, variable bytes]. For example:
+///
+/// String "abc" Would be encoded as:
+/// 0 ( 1 byte for not null) + 3 ( 4 bytes for length ) + "abc" (payload)
+///
+/// Null string Would be encoded as:
+/// 1 ( 1 byte for null) + 0 ( 4 bytes for length )
+///
+/// # Row Encoding
+///
+/// The row format is the concatenation of the encodings of each column.
 class ARROW_EXPORT RowEncoder {
  public:
   static constexpr int kRowIdForNulls() { return -1; }
@@ -259,6 +340,9 @@ class ARROW_EXPORT RowEncoder {
   Status EncodeAndAppend(const ExecSpan& batch);
   Result<ExecBatch> Decode(int64_t num_rows, const int32_t* row_ids);
 
+  // Returns the encoded representation of the row at index i.
+  // If i is kRowIdForNulls, it returns the pre-encoded all-nulls
+  // row.
   inline std::string encoded_row(int32_t i) const {
     if (i == kRowIdForNulls()) {
       return std::string(reinterpret_cast<const char*>(encoded_nulls_.data()),
@@ -270,14 +354,26 @@ class ARROW_EXPORT RowEncoder {
   }
 
   int32_t num_rows() const {
-    return offsets_.size() == 0 ? 0 : static_cast<int32_t>(offsets_.size() - 1);
+    return offsets_.empty() ? 0 : static_cast<int32_t>(offsets_.size() - 1);
   }
 
  private:
   ExecContext* ctx_{nullptr};
   std::vector<std::shared_ptr<KeyEncoder>> encoders_;
+  // offsets_ vector stores the starting position (offset) of each encoded row
+  // within the bytes_ vector. This allows for quick access to individual rows.
+  //
+  // The size would be num_rows + 1 if not empty, the last element is the total
+  // length of the bytes_ vector.
   std::vector<int32_t> offsets_;
+  // The encoded bytes of all non "kRowIdForNulls" rows.
   std::vector<uint8_t> bytes_;
+  // A pre-encoded constant row with all its columns being null. Useful when
+  // the caller is certain that an entire row is null and then uses kRowIdForNulls
+  // to refer to it.
+  //
+  // EncodeAndAppend would never append this row, but encoded_row and Decode would
+  // return this row when kRowIdForNulls is passed.
   std::vector<uint8_t> encoded_nulls_;
   std::vector<std::shared_ptr<ExtensionType>> extension_types_;
 };
diff --git a/cpp/src/arrow/compute/row/row_internal.h b/cpp/src/arrow/compute/row/row_internal.h
index 094a9c31efe0a..3ab86fd1fc6ed 100644
--- a/cpp/src/arrow/compute/row/row_internal.h
+++ b/cpp/src/arrow/compute/row/row_internal.h
@@ -38,7 +38,7 @@ struct ARROW_EXPORT RowTableMetadata {
   /// For a fixed-length binary row, common size of rows in bytes,
   /// rounded up to the multiple of alignment.
   ///
-  /// For a varying-length binary, size of all encoded fixed-length key columns,
+  /// For a varying-length binary row, size of all encoded fixed-length key columns,
   /// including lengths of varying-length columns, rounded up to the multiple of string
   /// alignment.
   uint32_t fixed_length;
diff --git a/cpp/src/arrow/compute/util.h b/cpp/src/arrow/compute/util.h
index d56e398667f66..9034849bbc36d 100644
--- a/cpp/src/arrow/compute/util.h
+++ b/cpp/src/arrow/compute/util.h
@@ -30,21 +30,22 @@
 #include "arrow/util/cpu_info.h"
 
 #if defined(__clang__) || defined(__GNUC__)
-#define BYTESWAP(x) __builtin_bswap64(x)
-#define ROTL(x, n) (((x) << (n)) | ((x) >> ((-n) & 31)))
-#define ROTL64(x, n) (((x) << (n)) | ((x) >> ((-n) & 63)))
-#define PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
+#  define BYTESWAP(x) __builtin_bswap64(x)
+#  define ROTL(x, n) (((x) << (n)) | ((x) >> ((-n) & 31)))
+#  define ROTL64(x, n) (((x) << (n)) | ((x) >> ((-n) & 63)))
+#  define PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
 #elif defined(_MSC_VER)
-#include <intrin.h>
-#define BYTESWAP(x) _byteswap_uint64(x)
-#define ROTL(x, n) _rotl((x), (n))
-#define ROTL64(x, n) _rotl64((x), (n))
-#if defined(_M_X64) || defined(_M_I86)
-#include <mmintrin.h>  // https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx
-#define PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
-#else
-#define PREFETCH(ptr) (void)(ptr) /* disabled */
-#endif
+#  include <intrin.h>
+#  define BYTESWAP(x) _byteswap_uint64(x)
+#  define ROTL(x, n) _rotl((x), (n))
+#  define ROTL64(x, n) _rotl64((x), (n))
+#  if defined(_M_X64) || defined(_M_I86)
+// https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx
+#    include <mmintrin.h>
+#    define PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
+#  else
+#    define PREFETCH(ptr) (void)(ptr) /* disabled */
+#  endif
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/compute/util_internal.cc b/cpp/src/arrow/compute/util_internal.cc
index 7a7875162c434..7d6c41e092889 100644
--- a/cpp/src/arrow/compute/util_internal.cc
+++ b/cpp/src/arrow/compute/util_internal.cc
@@ -21,7 +21,7 @@
 #include "arrow/memory_pool.h"
 
 #ifdef ADDRESS_SANITIZER
-#include <sanitizer/asan_interface.h>
+#  include <sanitizer/asan_interface.h>
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/csv/writer.cc b/cpp/src/arrow/csv/writer.cc
index 5b9c51cda5576..4b5252076af53 100644
--- a/cpp/src/arrow/csv/writer.cc
+++ b/cpp/src/arrow/csv/writer.cc
@@ -32,7 +32,7 @@
 #include <memory>
 
 #if defined(ARROW_HAVE_NEON) || defined(ARROW_HAVE_SSE4_2)
-#include <xsimd/xsimd.hpp>
+#  include <xsimd/xsimd.hpp>
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/dataset/api.h b/cpp/src/arrow/dataset/api.h
index c2ebd9d300727..38caa1cff19de 100644
--- a/cpp/src/arrow/dataset/api.h
+++ b/cpp/src/arrow/dataset/api.h
@@ -24,16 +24,16 @@
 #include "arrow/dataset/discovery.h"
 #include "arrow/dataset/file_base.h"
 #ifdef ARROW_CSV
-#include "arrow/dataset/file_csv.h"
+#  include "arrow/dataset/file_csv.h"
 #endif
 #ifdef ARROW_JSON
-#include "arrow/dataset/file_json.h"
+#  include "arrow/dataset/file_json.h"
 #endif
 #include "arrow/dataset/file_ipc.h"
 #ifdef ARROW_ORC
-#include "arrow/dataset/file_orc.h"
+#  include "arrow/dataset/file_orc.h"
 #endif
 #ifdef ARROW_PARQUET
-#include "arrow/dataset/file_parquet.h"
+#  include "arrow/dataset/file_parquet.h"
 #endif
 #include "arrow/dataset/scanner.h"
diff --git a/cpp/src/arrow/dataset/file_csv_test.cc b/cpp/src/arrow/dataset/file_csv_test.cc
index 60a6685dc22fd..e8e5838e6f93a 100644
--- a/cpp/src/arrow/dataset/file_csv_test.cc
+++ b/cpp/src/arrow/dataset/file_csv_test.cc
@@ -464,35 +464,35 @@ INSTANTIATE_TEST_SUITE_P(TestUncompressedCsvV2, TestCsvFileFormat,
 // codecs should be independently tested and so we do not need to cover those with
 // valgrind here.
 #ifndef ARROW_VALGRIND
-#ifdef ARROW_WITH_BZ2
+#  ifdef ARROW_WITH_BZ2
 INSTANTIATE_TEST_SUITE_P(TestBZ2Csv, TestCsvFileFormat,
                          ::testing::Values(CsvFileFormatParams{Compression::BZ2, false}));
 INSTANTIATE_TEST_SUITE_P(TestBZ2CsvV2, TestCsvFileFormat,
                          ::testing::Values(CsvFileFormatParams{Compression::BZ2, true}));
-#endif
-#ifdef ARROW_WITH_LZ4
+#  endif
+#  ifdef ARROW_WITH_LZ4
 INSTANTIATE_TEST_SUITE_P(TestLZ4Csv, TestCsvFileFormat,
                          ::testing::Values(CsvFileFormatParams{Compression::LZ4_FRAME,
                                                                false}));
 INSTANTIATE_TEST_SUITE_P(TestLZ4CsvV2, TestCsvFileFormat,
                          ::testing::Values(CsvFileFormatParams{Compression::LZ4_FRAME,
                                                                true}));
-#endif
+#  endif
 // Snappy does not support streaming compression
-#ifdef ARROW_WITH_ZLIB
+#  ifdef ARROW_WITH_ZLIB
 INSTANTIATE_TEST_SUITE_P(TestGzipCsv, TestCsvFileFormat,
                          ::testing::Values(CsvFileFormatParams{Compression::GZIP,
                                                                false}));
 INSTANTIATE_TEST_SUITE_P(TestGzipCsvV2, TestCsvFileFormat,
                          ::testing::Values(CsvFileFormatParams{Compression::GZIP, true}));
-#endif
-#ifdef ARROW_WITH_ZSTD
+#  endif
+#  ifdef ARROW_WITH_ZSTD
 INSTANTIATE_TEST_SUITE_P(TestZSTDCsv, TestCsvFileFormat,
                          ::testing::Values(CsvFileFormatParams{Compression::ZSTD,
                                                                false}));
 INSTANTIATE_TEST_SUITE_P(TestZSTDCsvV2, TestCsvFileFormat,
                          ::testing::Values(CsvFileFormatParams{Compression::ZSTD, true}));
-#endif
+#  endif
 #endif  // ARROW_VALGRIND
 
 class TestCsvFileFormatScan : public FileFormatScanMixin<CsvFormatHelper> {};
diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc
index 1f8b6cc4882cf..ca391b4354c07 100644
--- a/cpp/src/arrow/dataset/file_parquet.cc
+++ b/cpp/src/arrow/dataset/file_parquet.cc
@@ -366,8 +366,12 @@ std::optional<compute::Expression> ParquetFileFragment::EvaluateStatisticsAsExpr
     const parquet::Statistics& statistics) {
   auto field_expr = compute::field_ref(field_ref);
 
+  bool may_have_null = !statistics.HasNullCount() || statistics.null_count() > 0;
   // Optimize for corner case where all values are nulls
-  if (statistics.num_values() == 0 && statistics.null_count() > 0) {
+  if (statistics.num_values() == 0) {
+    // If there are no non-null values, column `field_ref` in the fragment
+    // might be empty or all values are nulls. In this case, we also return
+    // a null expression.
     return is_null(std::move(field_expr));
   }
 
@@ -378,7 +382,6 @@ std::optional<compute::Expression> ParquetFileFragment::EvaluateStatisticsAsExpr
 
   auto maybe_min = Cast(min, field.type());
   auto maybe_max = Cast(max, field.type());
-
   if (maybe_min.ok() && maybe_max.ok()) {
     min = maybe_min.MoveValueUnsafe().scalar();
     max = maybe_max.MoveValueUnsafe().scalar();
@@ -386,7 +389,7 @@ std::optional<compute::Expression> ParquetFileFragment::EvaluateStatisticsAsExpr
     if (min->Equals(*max)) {
       auto single_value = compute::equal(field_expr, compute::literal(std::move(min)));
 
-      if (statistics.null_count() == 0) {
+      if (!may_have_null) {
         return single_value;
       }
       return compute::or_(std::move(single_value), is_null(std::move(field_expr)));
@@ -412,9 +415,8 @@ std::optional<compute::Expression> ParquetFileFragment::EvaluateStatisticsAsExpr
     } else {
       in_range = compute::and_(std::move(lower_bound), std::move(upper_bound));
     }
-
-    if (statistics.null_count() != 0) {
-      return compute::or_(std::move(in_range), compute::is_null(field_expr));
+    if (may_have_null) {
+      return compute::or_(std::move(in_range), compute::is_null(std::move(field_expr)));
     }
     return in_range;
   }
@@ -423,7 +425,7 @@ std::optional<compute::Expression> ParquetFileFragment::EvaluateStatisticsAsExpr
 
 std::optional<compute::Expression> ParquetFileFragment::EvaluateStatisticsAsExpression(
     const Field& field, const parquet::Statistics& statistics) {
-  const auto field_name = field.name();
+  auto field_name = field.name();
   return EvaluateStatisticsAsExpression(field, FieldRef(std::move(field_name)),
                                         statistics);
 }
diff --git a/cpp/src/arrow/dataset/file_parquet_test.cc b/cpp/src/arrow/dataset/file_parquet_test.cc
index bf626826d4d1b..2c05dcd9be459 100644
--- a/cpp/src/arrow/dataset/file_parquet_test.cc
+++ b/cpp/src/arrow/dataset/file_parquet_test.cc
@@ -841,6 +841,56 @@ TEST(TestParquetStatistics, NullMax) {
   EXPECT_EQ(stat_expression->ToString(), "(x >= 1)");
 }
 
+TEST(TestParquetStatistics, NoNullCount) {
+  auto field = ::arrow::field("x", int32());
+  auto parquet_node_ptr = ::parquet::schema::Int32("x", ::parquet::Repetition::REQUIRED);
+  ::parquet::ColumnDescriptor descr(parquet_node_ptr, /*max_definition_level=*/1,
+                                    /*max_repetition_level=*/0);
+
+  auto int32_to_parquet_stats = [](int32_t v) {
+    std::string value;
+    value.resize(sizeof(int32_t));
+    memcpy(value.data(), &v, sizeof(int32_t));
+    return value;
+  };
+  {
+    // Base case: when null_count is not set, the expression might contain null
+    ::parquet::EncodedStatistics encoded_stats;
+    encoded_stats.set_min(int32_to_parquet_stats(1));
+    encoded_stats.set_max(int32_to_parquet_stats(100));
+    encoded_stats.has_null_count = false;
+    encoded_stats.all_null_value = false;
+    encoded_stats.null_count = 0;
+    auto stats = ::parquet::Statistics::Make(&descr, &encoded_stats, /*num_values=*/10);
+
+    auto stat_expression =
+        ParquetFileFragment::EvaluateStatisticsAsExpression(*field, *stats);
+    ASSERT_TRUE(stat_expression.has_value());
+    EXPECT_EQ(stat_expression->ToString(),
+              "(((x >= 1) and (x <= 100)) or is_null(x, {nan_is_null=false}))");
+  }
+  {
+    // Special case: when num_value is 0, it would return
+    // "is_null".
+    ::parquet::EncodedStatistics encoded_stats;
+    encoded_stats.has_null_count = true;
+    encoded_stats.null_count = 1;
+    encoded_stats.all_null_value = true;
+    auto stats = ::parquet::Statistics::Make(&descr, &encoded_stats, /*num_values=*/0);
+    auto stat_expression =
+        ParquetFileFragment::EvaluateStatisticsAsExpression(*field, *stats);
+    ASSERT_TRUE(stat_expression.has_value());
+    EXPECT_EQ(stat_expression->ToString(), "is_null(x, {nan_is_null=false})");
+
+    encoded_stats.has_null_count = false;
+    encoded_stats.all_null_value = false;
+    stats = ::parquet::Statistics::Make(&descr, &encoded_stats, /*num_values=*/0);
+    stat_expression = ParquetFileFragment::EvaluateStatisticsAsExpression(*field, *stats);
+    ASSERT_TRUE(stat_expression.has_value());
+    EXPECT_EQ(stat_expression->ToString(), "is_null(x, {nan_is_null=false})");
+  }
+}
+
 class DelayedBufferReader : public ::arrow::io::BufferReader {
  public:
   explicit DelayedBufferReader(const std::shared_ptr<::arrow::Buffer>& buffer)
diff --git a/cpp/src/arrow/dataset/visibility.h b/cpp/src/arrow/dataset/visibility.h
index b43a253050fd8..752907238ca07 100644
--- a/cpp/src/arrow/dataset/visibility.h
+++ b/cpp/src/arrow/dataset/visibility.h
@@ -20,31 +20,31 @@
 #pragma once
 
 #if defined(_WIN32) || defined(__CYGWIN__)
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(push)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#ifdef ARROW_DS_STATIC
-#define ARROW_DS_EXPORT
-#elif defined(ARROW_DS_EXPORTING)
-#define ARROW_DS_EXPORT __declspec(dllexport)
-#else
-#define ARROW_DS_EXPORT __declspec(dllimport)
-#endif
+#  ifdef ARROW_DS_STATIC
+#    define ARROW_DS_EXPORT
+#  elif defined(ARROW_DS_EXPORTING)
+#    define ARROW_DS_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_DS_EXPORT __declspec(dllimport)
+#  endif
 
-#define ARROW_DS_NO_EXPORT
+#  define ARROW_DS_NO_EXPORT
 #else  // Not Windows
-#ifndef ARROW_DS_EXPORT
-#define ARROW_DS_EXPORT __attribute__((visibility("default")))
-#endif
-#ifndef ARROW_DS_NO_EXPORT
-#define ARROW_DS_NO_EXPORT __attribute__((visibility("hidden")))
-#endif
+#  ifndef ARROW_DS_EXPORT
+#    define ARROW_DS_EXPORT __attribute__((visibility("default")))
+#  endif
+#  ifndef ARROW_DS_NO_EXPORT
+#    define ARROW_DS_NO_EXPORT __attribute__((visibility("hidden")))
+#  endif
 #endif  // Non-Windows
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
diff --git a/cpp/src/arrow/datum.cc b/cpp/src/arrow/datum.cc
index 2ac230232e1b7..b19d186447547 100644
--- a/cpp/src/arrow/datum.cc
+++ b/cpp/src/arrow/datum.cc
@@ -25,6 +25,7 @@
 #include "arrow/array/array_base.h"
 #include "arrow/array/util.h"
 #include "arrow/chunked_array.h"
+#include "arrow/device_allocation_type_set.h"
 #include "arrow/record_batch.h"
 #include "arrow/scalar.h"
 #include "arrow/table.h"
@@ -156,6 +157,45 @@ ArrayVector Datum::chunks() const {
   return this->chunked_array()->chunks();
 }
 
+DeviceAllocationTypeSet Datum::device_types() const {
+  switch (kind()) {
+    case NONE:
+      break;
+    case SCALAR:
+      // Scalars are asssumed as always residing in CPU memory for now.
+      return DeviceAllocationTypeSet::CpuOnly();
+    case ARRAY:
+      return DeviceAllocationTypeSet{array()->device_type()};
+    case CHUNKED_ARRAY:
+      return chunked_array()->device_types();
+    case RECORD_BATCH: {
+      auto& columns = record_batch()->columns();
+      if (columns.empty()) {
+        // An empty RecordBatch is considered to be CPU-only.
+        return DeviceAllocationTypeSet::CpuOnly();
+      }
+      DeviceAllocationTypeSet set;
+      for (const auto& column : columns) {
+        set.add(column->device_type());
+      }
+      return set;
+    }
+    case TABLE: {
+      auto& columns = table()->columns();
+      if (columns.empty()) {
+        // An empty Table is considered to be CPU-only.
+        return DeviceAllocationTypeSet::CpuOnly();
+      }
+      DeviceAllocationTypeSet set;
+      for (const auto& column : columns) {
+        set.Add(column->device_types());
+      }
+      return set;
+    }
+  }
+  return {};
+}
+
 bool Datum::Equals(const Datum& other) const {
   if (this->kind() != other.kind()) return false;
 
diff --git a/cpp/src/arrow/datum.h b/cpp/src/arrow/datum.h
index 31b2d2274c900..4a88e7a81125c 100644
--- a/cpp/src/arrow/datum.h
+++ b/cpp/src/arrow/datum.h
@@ -26,6 +26,7 @@
 #include <vector>
 
 #include "arrow/array/data.h"
+#include "arrow/device_allocation_type_set.h"
 #include "arrow/scalar.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
@@ -295,6 +296,8 @@ struct ARROW_EXPORT Datum {
   /// \return empty if not arraylike
   ArrayVector chunks() const;
 
+  DeviceAllocationTypeSet device_types() const;
+
   /// \brief True if the two data are equal
   bool Equals(const Datum& other) const;
 
diff --git a/cpp/src/arrow/device.h b/cpp/src/arrow/device.h
index f5cca0d27d7b2..1dbe5b4b13e89 100644
--- a/cpp/src/arrow/device.h
+++ b/cpp/src/arrow/device.h
@@ -32,24 +32,6 @@
 
 namespace arrow {
 
-/// \brief EXPERIMENTAL: Device type enum which matches up with C Data Device types
-enum class DeviceAllocationType : char {
-  kCPU = 1,
-  kCUDA = 2,
-  kCUDA_HOST = 3,
-  kOPENCL = 4,
-  kVULKAN = 7,
-  kMETAL = 8,
-  kVPI = 9,
-  kROCM = 10,
-  kROCM_HOST = 11,
-  kEXT_DEV = 12,
-  kCUDA_MANAGED = 13,
-  kONEAPI = 14,
-  kWEBGPU = 15,
-  kHEXAGON = 16,
-};
-
 class MemoryManager;
 
 /// \brief EXPERIMENTAL: Abstract interface for hardware devices
diff --git a/cpp/src/arrow/device_allocation_type_set.cc b/cpp/src/arrow/device_allocation_type_set.cc
new file mode 100644
index 0000000000000..83e9e57f2ee47
--- /dev/null
+++ b/cpp/src/arrow/device_allocation_type_set.cc
@@ -0,0 +1,80 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <string>
+
+#include "arrow/device_allocation_type_set.h"
+#include "arrow/type_fwd.h"
+
+namespace arrow {
+
+const char* DeviceAllocationTypeToCStr(DeviceAllocationType type) {
+  switch (type) {
+    case DeviceAllocationType::kCPU:
+      return "CPU";
+    case DeviceAllocationType::kCUDA:
+      return "CUDA";
+    case DeviceAllocationType::kCUDA_HOST:
+      return "CUDA_HOST";
+    case DeviceAllocationType::kOPENCL:
+      return "OPENCL";
+    case DeviceAllocationType::kVULKAN:
+      return "VULKAN";
+    case DeviceAllocationType::kMETAL:
+      return "METAL";
+    case DeviceAllocationType::kVPI:
+      return "VPI";
+    case DeviceAllocationType::kROCM:
+      return "ROCM";
+    case DeviceAllocationType::kROCM_HOST:
+      return "ROCM_HOST";
+    case DeviceAllocationType::kEXT_DEV:
+      return "EXT_DEV";
+    case DeviceAllocationType::kCUDA_MANAGED:
+      return "CUDA_MANAGED";
+    case DeviceAllocationType::kONEAPI:
+      return "ONEAPI";
+    case DeviceAllocationType::kWEBGPU:
+      return "WEBGPU";
+    case DeviceAllocationType::kHEXAGON:
+      return "HEXAGON";
+  }
+  return "<UNKNOWN>";
+}
+
+std::string DeviceAllocationTypeSet::ToString() const {
+  std::string result = "{";
+  for (int i = 1; i <= kDeviceAllocationTypeMax; i++) {
+    if (device_type_bitset_.test(i)) {
+      // Skip all the unused values in the enum.
+      switch (i) {
+        case 0:
+        case 5:
+        case 6:
+          continue;
+      }
+      if (result.size() > 1) {
+        result += ", ";
+      }
+      result += DeviceAllocationTypeToCStr(static_cast<DeviceAllocationType>(i));
+    }
+  }
+  result += "}";
+  return result;
+}
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/device_allocation_type_set.h b/cpp/src/arrow/device_allocation_type_set.h
new file mode 100644
index 0000000000000..974367307e6d4
--- /dev/null
+++ b/cpp/src/arrow/device_allocation_type_set.h
@@ -0,0 +1,97 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <bitset>
+#include <string>
+
+#include "arrow/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+ARROW_EXPORT
+const char* DeviceAllocationTypeToCStr(DeviceAllocationType type);
+
+class ARROW_EXPORT DeviceAllocationTypeSet {
+ private:
+  std::bitset<kDeviceAllocationTypeMax + 1> device_type_bitset_;
+
+ public:
+  /// \brief Construct an empty set of device types.
+  DeviceAllocationTypeSet() = default;
+
+  /// \brief Construct a set of device types with a single device type.
+  DeviceAllocationTypeSet(  // NOLINT implicit construction
+      DeviceAllocationType accepted_device_type) {
+    add(accepted_device_type);
+  }
+
+  /// \brief Construct a set of device types containing only "kCPU".
+  static DeviceAllocationTypeSet CpuOnly() {
+    return DeviceAllocationTypeSet{DeviceAllocationType::kCPU};
+  }
+
+  /// \brief Construct a set of device types containing all device types.
+  static DeviceAllocationTypeSet All() {
+    DeviceAllocationTypeSet all;
+    all.device_type_bitset_.set();
+    // Don't set the invalid enum values.
+    all.device_type_bitset_.reset(0);
+    all.device_type_bitset_.reset(5);
+    all.device_type_bitset_.reset(6);
+    return all;
+  }
+
+  /// \brief Add a device type to the set of device types.
+  void add(DeviceAllocationType device_type) {
+    device_type_bitset_.set(static_cast<int>(device_type));
+  }
+
+  /// \brief Remove a device type from the set of device types.
+  void remove(DeviceAllocationType device_type) {
+    device_type_bitset_.reset(static_cast<int>(device_type));
+  }
+
+  /// \brief Return true iff the set only contains the CPU device type.
+  bool is_cpu_only() const {
+    return device_type_bitset_ == CpuOnly().device_type_bitset_;
+  }
+
+  /// \brief Return true if the set of accepted device types includes the
+  /// device type.
+  bool contains(DeviceAllocationType device_type) const {
+    return device_type_bitset_.test(static_cast<int>(device_type));
+  }
+
+  /// \brief Add all device types from another set to this set.
+  void Add(DeviceAllocationTypeSet other) {
+    device_type_bitset_ |= other.device_type_bitset_;
+  }
+
+  /// \brief Return true if the set of accepted device types includes all the
+  /// device types in the other set.
+  bool Contains(DeviceAllocationTypeSet other) const {
+    // other \subseteq this <==> (other \intersect this == other)
+    return (other.device_type_bitset_ & device_type_bitset_) == other.device_type_bitset_;
+  }
+
+  std::string ToString() const;
+};
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/engine/substrait/visibility.h b/cpp/src/arrow/engine/substrait/visibility.h
index d81d202ee6567..9ed1c67352d60 100644
--- a/cpp/src/arrow/engine/substrait/visibility.h
+++ b/cpp/src/arrow/engine/substrait/visibility.h
@@ -22,31 +22,31 @@
 #pragma once
 
 #if defined(_WIN32) || defined(__CYGWIN__)
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(push)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#ifdef ARROW_ENGINE_STATIC
-#define ARROW_ENGINE_EXPORT
-#elif defined(ARROW_ENGINE_EXPORTING)
-#define ARROW_ENGINE_EXPORT __declspec(dllexport)
-#else
-#define ARROW_ENGINE_EXPORT __declspec(dllimport)
-#endif
+#  ifdef ARROW_ENGINE_STATIC
+#    define ARROW_ENGINE_EXPORT
+#  elif defined(ARROW_ENGINE_EXPORTING)
+#    define ARROW_ENGINE_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_ENGINE_EXPORT __declspec(dllimport)
+#  endif
 
-#define ARROW_ENGINE_NO_EXPORT
+#  define ARROW_ENGINE_NO_EXPORT
 #else  // Not Windows
-#ifndef ARROW_ENGINE_EXPORT
-#define ARROW_ENGINE_EXPORT __attribute__((visibility("default")))
-#endif
-#ifndef ARROW_ENGINE_NO_EXPORT
-#define ARROW_ENGINE_NO_EXPORT __attribute__((visibility("hidden")))
-#endif
+#  ifndef ARROW_ENGINE_EXPORT
+#    define ARROW_ENGINE_EXPORT __attribute__((visibility("default")))
+#  endif
+#  ifndef ARROW_ENGINE_NO_EXPORT
+#    define ARROW_ENGINE_NO_EXPORT __attribute__((visibility("hidden")))
+#  endif
 #endif  // Non-Windows
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
diff --git a/cpp/src/arrow/extension/CMakeLists.txt b/cpp/src/arrow/extension/CMakeLists.txt
index 5cb4bc77af2a4..4ab6a35b52e4f 100644
--- a/cpp/src/arrow/extension/CMakeLists.txt
+++ b/cpp/src/arrow/extension/CMakeLists.txt
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set(CANONICAL_EXTENSION_TESTS bool8_test.cc)
+set(CANONICAL_EXTENSION_TESTS bool8_test.cc json_test.cc uuid_test.cc)
 
 if(ARROW_JSON)
   list(APPEND CANONICAL_EXTENSION_TESTS fixed_shape_tensor_test.cc opaque_test.cc)
diff --git a/cpp/src/arrow/extension/fixed_shape_tensor_test.cc b/cpp/src/arrow/extension/fixed_shape_tensor_test.cc
index 3fd39a11ff50d..51aea4b25fdda 100644
--- a/cpp/src/arrow/extension/fixed_shape_tensor_test.cc
+++ b/cpp/src/arrow/extension/fixed_shape_tensor_test.cc
@@ -23,7 +23,7 @@
 #include "arrow/array/array_primitive.h"
 #include "arrow/io/memory.h"
 #include "arrow/ipc/reader.h"
-#include "arrow/ipc/writer.h"
+#include "arrow/ipc/test_common.h"
 #include "arrow/record_batch.h"
 #include "arrow/tensor.h"
 #include "arrow/testing/gtest_util.h"
@@ -33,6 +33,7 @@
 namespace arrow {
 
 using FixedShapeTensorType = extension::FixedShapeTensorType;
+using arrow::ipc::test::RoundtripBatch;
 using extension::fixed_shape_tensor;
 using extension::FixedShapeTensorArray;
 
@@ -71,20 +72,6 @@ class TestExtensionType : public ::testing::Test {
   std::string serialized_;
 };
 
-auto RoundtripBatch = [](const std::shared_ptr<RecordBatch>& batch,
-                         std::shared_ptr<RecordBatch>* out) {
-  ASSERT_OK_AND_ASSIGN(auto out_stream, io::BufferOutputStream::Create());
-  ASSERT_OK(ipc::WriteRecordBatchStream({batch}, ipc::IpcWriteOptions::Defaults(),
-                                        out_stream.get()));
-
-  ASSERT_OK_AND_ASSIGN(auto complete_ipc_stream, out_stream->Finish());
-
-  io::BufferReader reader(complete_ipc_stream);
-  std::shared_ptr<RecordBatchReader> batch_reader;
-  ASSERT_OK_AND_ASSIGN(batch_reader, ipc::RecordBatchStreamReader::Open(&reader));
-  ASSERT_OK(batch_reader->ReadNext(out));
-};
-
 TEST_F(TestExtensionType, CheckDummyRegistration) {
   // We need a registered dummy type at runtime to allow for IPC deserialization
   auto registered_type = GetExtensionType("arrow.fixed_shape_tensor");
@@ -218,7 +205,7 @@ TEST_F(TestExtensionType, RoundtripBatch) {
   std::shared_ptr<RecordBatch> read_batch;
   auto ext_field = field(/*name=*/"f0", /*type=*/ext_type_);
   auto batch = RecordBatch::Make(schema({ext_field}), ext_arr->length(), {ext_arr});
-  RoundtripBatch(batch, &read_batch);
+  ASSERT_OK(RoundtripBatch(batch, &read_batch));
   CompareBatch(*batch, *read_batch, /*compare_metadata=*/true);
 
   // Pass extension metadata and storage array, expect getting back extension array
@@ -229,7 +216,7 @@ TEST_F(TestExtensionType, RoundtripBatch) {
   ext_field = field(/*name=*/"f0", /*type=*/element_type_, /*nullable=*/true,
                     /*metadata=*/ext_metadata);
   auto batch2 = RecordBatch::Make(schema({ext_field}), fsla_arr->length(), {fsla_arr});
-  RoundtripBatch(batch2, &read_batch2);
+  ASSERT_OK(RoundtripBatch(batch2, &read_batch2));
   CompareBatch(*batch, *read_batch2, /*compare_metadata=*/true);
 }
 
@@ -482,7 +469,7 @@ TEST_F(TestExtensionType, RoundtripBatchFromTensor) {
   auto ext_field = field("f0", ext_type_, true, ext_metadata);
   auto batch = RecordBatch::Make(schema({ext_field}), ext_arr->length(), {ext_arr});
   std::shared_ptr<RecordBatch> read_batch;
-  RoundtripBatch(batch, &read_batch);
+  ASSERT_OK(RoundtripBatch(batch, &read_batch));
   CompareBatch(*batch, *read_batch, /*compare_metadata=*/true);
 }
 
diff --git a/cpp/src/arrow/extension/json.cc b/cpp/src/arrow/extension/json.cc
new file mode 100644
index 0000000000000..d793233c2b573
--- /dev/null
+++ b/cpp/src/arrow/extension/json.cc
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/extension/json.h"
+
+#include <string>
+
+#include "arrow/extension_type.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/logging.h"
+
+namespace arrow::extension {
+
+bool JsonExtensionType::ExtensionEquals(const ExtensionType& other) const {
+  return other.extension_name() == this->extension_name();
+}
+
+Result<std::shared_ptr<DataType>> JsonExtensionType::Deserialize(
+    std::shared_ptr<DataType> storage_type, const std::string& serialized) const {
+  if (storage_type->id() != Type::STRING && storage_type->id() != Type::STRING_VIEW &&
+      storage_type->id() != Type::LARGE_STRING) {
+    return Status::Invalid("Invalid storage type for JsonExtensionType: ",
+                           storage_type->ToString());
+  }
+  return std::make_shared<JsonExtensionType>(storage_type);
+}
+
+std::string JsonExtensionType::Serialize() const { return ""; }
+
+std::shared_ptr<Array> JsonExtensionType::MakeArray(
+    std::shared_ptr<ArrayData> data) const {
+  DCHECK_EQ(data->type->id(), Type::EXTENSION);
+  DCHECK_EQ("arrow.json",
+            internal::checked_cast<const ExtensionType&>(*data->type).extension_name());
+  return std::make_shared<ExtensionArray>(data);
+}
+
+std::shared_ptr<DataType> json(const std::shared_ptr<DataType> storage_type) {
+  ARROW_CHECK(storage_type->id() != Type::STRING ||
+              storage_type->id() != Type::STRING_VIEW ||
+              storage_type->id() != Type::LARGE_STRING);
+  return std::make_shared<JsonExtensionType>(storage_type);
+}
+
+}  // namespace arrow::extension
diff --git a/cpp/src/arrow/extension/json.h b/cpp/src/arrow/extension/json.h
new file mode 100644
index 0000000000000..4793ab2bc9b36
--- /dev/null
+++ b/cpp/src/arrow/extension/json.h
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <stdexcept>
+#include <string>
+
+#include "arrow/extension_type.h"
+#include "arrow/result.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow::extension {
+
+/// \brief Concrete type class for variable-size JSON data, utf8-encoded.
+class ARROW_EXPORT JsonExtensionType : public ExtensionType {
+ public:
+  explicit JsonExtensionType(const std::shared_ptr<DataType>& storage_type)
+      : ExtensionType(storage_type), storage_type_(storage_type) {}
+
+  std::string extension_name() const override { return "arrow.json"; }
+
+  bool ExtensionEquals(const ExtensionType& other) const override;
+
+  Result<std::shared_ptr<DataType>> Deserialize(
+      std::shared_ptr<DataType> storage_type,
+      const std::string& serialized_data) const override;
+
+  std::string Serialize() const override;
+
+  std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
+
+ private:
+  std::shared_ptr<DataType> storage_type_;
+};
+
+/// \brief Return a JsonExtensionType instance.
+ARROW_EXPORT std::shared_ptr<DataType> json(
+    std::shared_ptr<DataType> storage_type = utf8());
+
+}  // namespace arrow::extension
diff --git a/cpp/src/arrow/extension/json_test.cc b/cpp/src/arrow/extension/json_test.cc
new file mode 100644
index 0000000000000..143e4f9ceeac7
--- /dev/null
+++ b/cpp/src/arrow/extension/json_test.cc
@@ -0,0 +1,83 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/extension/json.h"
+
+#include "arrow/array/validate.h"
+#include "arrow/ipc/test_common.h"
+#include "arrow/record_batch.h"
+#include "arrow/testing/gtest_util.h"
+#include "parquet/exception.h"
+
+namespace arrow {
+
+using arrow::ipc::test::RoundtripBatch;
+using extension::json;
+
+class TestJsonExtensionType : public ::testing::Test {};
+
+std::shared_ptr<Array> ExampleJson(const std::shared_ptr<DataType>& storage_type) {
+  std::shared_ptr<Array> arr = ArrayFromJSON(storage_type, R"([
+    "null",
+    "1234",
+    "3.14159",
+    "true",
+    "false",
+    "\"a json string\"",
+    "[\"a\", \"json\", \"array\"]",
+    "{\"obj\": \"a simple json object\"}"
+   ])");
+  return ExtensionType::WrapArray(arrow::extension::json(storage_type), arr);
+}
+
+TEST_F(TestJsonExtensionType, JsonRoundtrip) {
+  for (const auto& storage_type : {utf8(), large_utf8(), utf8_view()}) {
+    std::shared_ptr<Array> ext_arr = ExampleJson(storage_type);
+    auto batch =
+        RecordBatch::Make(schema({field("f0", json(storage_type))}), 8, {ext_arr});
+
+    std::shared_ptr<RecordBatch> read_batch;
+    ASSERT_OK(RoundtripBatch(batch, &read_batch));
+    ASSERT_OK(read_batch->ValidateFull());
+    CompareBatch(*batch, *read_batch, /*compare_metadata*/ true);
+
+    auto read_ext_arr = read_batch->column(0);
+    ASSERT_OK(internal::ValidateUTF8(*read_ext_arr));
+    ASSERT_OK(read_ext_arr->ValidateFull());
+  }
+}
+
+TEST_F(TestJsonExtensionType, InvalidUTF8) {
+  for (const auto& storage_type : {utf8(), large_utf8(), utf8_view()}) {
+    auto json_type = json(storage_type);
+    auto invalid_input = ArrayFromJSON(storage_type, "[\"Ⱥa\xFFⱭ\", \"Ɽ\xe1\xbdⱤaA\"]");
+    auto ext_arr = ExtensionType::WrapArray(json_type, invalid_input);
+
+    ASSERT_RAISES_WITH_MESSAGE(Invalid,
+                               "Invalid: Invalid UTF8 sequence at string index 0",
+                               ext_arr->ValidateFull());
+    ASSERT_RAISES_WITH_MESSAGE(Invalid,
+                               "Invalid: Invalid UTF8 sequence at string index 0",
+                               arrow::internal::ValidateUTF8(*ext_arr));
+
+    auto batch = RecordBatch::Make(schema({field("f0", json_type)}), 2, {ext_arr});
+    std::shared_ptr<RecordBatch> read_batch;
+    ASSERT_OK(RoundtripBatch(batch, &read_batch));
+  }
+}
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/extension/uuid.cc b/cpp/src/arrow/extension/uuid.cc
new file mode 100644
index 0000000000000..43b917a17f8b2
--- /dev/null
+++ b/cpp/src/arrow/extension/uuid.cc
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <sstream>
+
+#include "arrow/extension_type.h"
+#include "arrow/util/logging.h"
+
+#include "arrow/extension/uuid.h"
+
+namespace arrow::extension {
+
+bool UuidType::ExtensionEquals(const ExtensionType& other) const {
+  return (other.extension_name() == this->extension_name());
+}
+
+std::shared_ptr<Array> UuidType::MakeArray(std::shared_ptr<ArrayData> data) const {
+  DCHECK_EQ(data->type->id(), Type::EXTENSION);
+  DCHECK_EQ("arrow.uuid",
+            static_cast<const ExtensionType&>(*data->type).extension_name());
+  return std::make_shared<UuidArray>(data);
+}
+
+Result<std::shared_ptr<DataType>> UuidType::Deserialize(
+    std::shared_ptr<DataType> storage_type, const std::string& serialized) const {
+  if (!serialized.empty()) {
+    return Status::Invalid("Unexpected serialized metadata: '", serialized, "'");
+  }
+  if (!storage_type->Equals(*fixed_size_binary(16))) {
+    return Status::Invalid("Invalid storage type for UuidType: ",
+                           storage_type->ToString());
+  }
+  return std::make_shared<UuidType>();
+}
+
+std::string UuidType::ToString(bool show_metadata) const {
+  std::stringstream ss;
+  ss << "extension<" << this->extension_name() << ">";
+  return ss.str();
+}
+
+std::shared_ptr<DataType> uuid() { return std::make_shared<UuidType>(); }
+
+}  // namespace arrow::extension
diff --git a/cpp/src/arrow/extension/uuid.h b/cpp/src/arrow/extension/uuid.h
new file mode 100644
index 0000000000000..42bb21cf0b2ed
--- /dev/null
+++ b/cpp/src/arrow/extension/uuid.h
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/extension_type.h"
+
+namespace arrow::extension {
+
+/// \brief UuidArray stores array of UUIDs. Underlying storage type is
+/// FixedSizeBinary(16).
+class ARROW_EXPORT UuidArray : public ExtensionArray {
+ public:
+  using ExtensionArray::ExtensionArray;
+};
+
+/// \brief UuidType is a canonical arrow extension type for UUIDs.
+/// UUIDs are stored as FixedSizeBinary(16) with big-endian notation and this
+/// does not interpret the bytes in any way. Specific UUID version is not
+/// required or guaranteed.
+class ARROW_EXPORT UuidType : public ExtensionType {
+ public:
+  /// \brief Construct a UuidType.
+  UuidType() : ExtensionType(fixed_size_binary(16)) {}
+
+  std::string extension_name() const override { return "arrow.uuid"; }
+  std::string ToString(bool show_metadata = false) const override;
+
+  bool ExtensionEquals(const ExtensionType& other) const override;
+
+  /// Create a UuidArray from ArrayData
+  std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
+
+  Result<std::shared_ptr<DataType>> Deserialize(
+      std::shared_ptr<DataType> storage_type,
+      const std::string& serialized) const override;
+
+  std::string Serialize() const override { return ""; }
+
+  /// \brief Create a UuidType instance
+  static Result<std::shared_ptr<DataType>> Make() { return std::make_shared<UuidType>(); }
+};
+
+/// \brief Return a UuidType instance.
+ARROW_EXPORT std::shared_ptr<DataType> uuid();
+
+}  // namespace arrow::extension
diff --git a/cpp/src/arrow/extension/uuid_test.cc b/cpp/src/arrow/extension/uuid_test.cc
new file mode 100644
index 0000000000000..1c1ffb6eb8e15
--- /dev/null
+++ b/cpp/src/arrow/extension/uuid_test.cc
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/extension/uuid.h"
+
+#include "arrow/testing/matchers.h"
+
+#include "arrow/io/memory.h"
+#include "arrow/ipc/reader.h"
+#include "arrow/ipc/test_common.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/util/key_value_metadata.h"
+
+#include "arrow/testing/extension_type.h"
+
+namespace arrow {
+
+using arrow::ipc::test::RoundtripBatch;
+
+TEST(TestUuuidExtensionType, ExtensionTypeTest) {
+  auto type = uuid();
+  ASSERT_EQ(type->id(), Type::EXTENSION);
+
+  const auto& ext_type = static_cast<const ExtensionType&>(*type);
+  std::string serialized = ext_type.Serialize();
+
+  ASSERT_OK_AND_ASSIGN(auto deserialized,
+                       ext_type.Deserialize(fixed_size_binary(16), serialized));
+  ASSERT_TRUE(deserialized->Equals(*type));
+  ASSERT_FALSE(deserialized->Equals(*fixed_size_binary(16)));
+}
+
+TEST(TestUuuidExtensionType, RoundtripBatch) {
+  auto ext_type = extension::uuid();
+  auto exact_ext_type = internal::checked_pointer_cast<extension::UuidType>(ext_type);
+  auto arr = ArrayFromJSON(fixed_size_binary(16), R"(["abcdefghijklmnop", null])");
+  auto ext_arr = ExtensionType::WrapArray(ext_type, arr);
+
+  // Pass extension array, expect getting back extension array
+  std::shared_ptr<RecordBatch> read_batch;
+  auto ext_field = field(/*name=*/"f0", /*type=*/ext_type);
+  auto batch = RecordBatch::Make(schema({ext_field}), ext_arr->length(), {ext_arr});
+  ASSERT_OK(RoundtripBatch(batch, &read_batch));
+  CompareBatch(*batch, *read_batch, /*compare_metadata=*/true);
+
+  // Pass extension metadata and storage array, expect getting back extension array
+  std::shared_ptr<RecordBatch> read_batch2;
+  auto ext_metadata =
+      key_value_metadata({{"ARROW:extension:name", exact_ext_type->extension_name()},
+                          {"ARROW:extension:metadata", ""}});
+  ext_field = field(/*name=*/"f0", /*type=*/exact_ext_type->storage_type(),
+                    /*nullable=*/true, /*metadata=*/ext_metadata);
+  auto batch2 = RecordBatch::Make(schema({ext_field}), arr->length(), {arr});
+  ASSERT_OK(RoundtripBatch(batch2, &read_batch2));
+  CompareBatch(*batch, *read_batch2, /*compare_metadata=*/true);
+}
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/extension_type.cc b/cpp/src/arrow/extension_type.cc
index 83c7ebed4f319..7ad39eab23f8d 100644
--- a/cpp/src/arrow/extension_type.cc
+++ b/cpp/src/arrow/extension_type.cc
@@ -29,9 +29,11 @@
 #include "arrow/config.h"
 #include "arrow/extension/bool8.h"
 #ifdef ARROW_JSON
-#include "arrow/extension/fixed_shape_tensor.h"
-#include "arrow/extension/opaque.h"
+#  include "arrow/extension/fixed_shape_tensor.h"
+#  include "arrow/extension/opaque.h"
 #endif
+#include "arrow/extension/json.h"
+#include "arrow/extension/uuid.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
 #include "arrow/util/checked_cast.h"
@@ -147,14 +149,14 @@ static void CreateGlobalRegistry() {
   // Register canonical extension types
 
   g_registry = std::make_shared<ExtensionTypeRegistryImpl>();
-  std::vector<std::shared_ptr<DataType>> ext_types{extension::bool8()};
+  std::vector<std::shared_ptr<DataType>> ext_types{extension::bool8(), extension::json(),
+                                                   extension::uuid()};
 
 #ifdef ARROW_JSON
   ext_types.push_back(extension::fixed_shape_tensor(int64(), {}));
   ext_types.push_back(extension::opaque(null(), "", ""));
 #endif
 
-  // Register canonical extension types
   for (const auto& ext_type : ext_types) {
     ARROW_CHECK_OK(
         g_registry->RegisterType(checked_pointer_cast<ExtensionType>(ext_type)));
diff --git a/cpp/src/arrow/extension_type_test.cc b/cpp/src/arrow/extension_type_test.cc
index f104c984a64b4..029d833b98cd8 100644
--- a/cpp/src/arrow/extension_type_test.cc
+++ b/cpp/src/arrow/extension_type_test.cc
@@ -30,6 +30,7 @@
 #include "arrow/io/memory.h"
 #include "arrow/ipc/options.h"
 #include "arrow/ipc/reader.h"
+#include "arrow/ipc/test_common.h"
 #include "arrow/ipc/writer.h"
 #include "arrow/record_batch.h"
 #include "arrow/status.h"
@@ -41,6 +42,8 @@
 
 namespace arrow {
 
+using arrow::ipc::test::RoundtripBatch;
+
 class Parametric1Array : public ExtensionArray {
  public:
   using ExtensionArray::ExtensionArray;
@@ -178,7 +181,7 @@ class ExtStructType : public ExtensionType {
 
 class TestExtensionType : public ::testing::Test {
  public:
-  void SetUp() { ASSERT_OK(RegisterExtensionType(std::make_shared<UuidType>())); }
+  void SetUp() { ASSERT_OK(RegisterExtensionType(std::make_shared<ExampleUuidType>())); }
 
   void TearDown() {
     if (GetExtensionType("uuid")) {
@@ -211,33 +214,19 @@ TEST_F(TestExtensionType, ExtensionTypeTest) {
   ASSERT_EQ(deserialized->byte_width(), 16);
 }
 
-auto RoundtripBatch = [](const std::shared_ptr<RecordBatch>& batch,
-                         std::shared_ptr<RecordBatch>* out) {
-  ASSERT_OK_AND_ASSIGN(auto out_stream, io::BufferOutputStream::Create());
-  ASSERT_OK(ipc::WriteRecordBatchStream({batch}, ipc::IpcWriteOptions::Defaults(),
-                                        out_stream.get()));
-
-  ASSERT_OK_AND_ASSIGN(auto complete_ipc_stream, out_stream->Finish());
-
-  io::BufferReader reader(complete_ipc_stream);
-  std::shared_ptr<RecordBatchReader> batch_reader;
-  ASSERT_OK_AND_ASSIGN(batch_reader, ipc::RecordBatchStreamReader::Open(&reader));
-  ASSERT_OK(batch_reader->ReadNext(out));
-};
-
 TEST_F(TestExtensionType, IpcRoundtrip) {
   auto ext_arr = ExampleUuid();
   auto batch = RecordBatch::Make(schema({field("f0", uuid())}), 4, {ext_arr});
 
   std::shared_ptr<RecordBatch> read_batch;
-  RoundtripBatch(batch, &read_batch);
+  ASSERT_OK(RoundtripBatch(batch, &read_batch));
   CompareBatch(*batch, *read_batch, false /* compare_metadata */);
 
   // Wrap type in a ListArray and ensure it also makes it
   auto offsets_arr = ArrayFromJSON(int32(), "[0, 0, 2, 4]");
   ASSERT_OK_AND_ASSIGN(auto list_arr, ListArray::FromArrays(*offsets_arr, *ext_arr));
   batch = RecordBatch::Make(schema({field("f0", list(uuid()))}), 3, {list_arr});
-  RoundtripBatch(batch, &read_batch);
+  ASSERT_OK(RoundtripBatch(batch, &read_batch));
   CompareBatch(*batch, *read_batch, false /* compare_metadata */);
 }
 
@@ -300,7 +289,7 @@ TEST_F(TestExtensionType, ParametricTypes) {
                                  4, {p1, p2, p3, p4});
 
   std::shared_ptr<RecordBatch> read_batch;
-  RoundtripBatch(batch, &read_batch);
+  ASSERT_OK(RoundtripBatch(batch, &read_batch));
   CompareBatch(*batch, *read_batch, false /* compare_metadata */);
 }
 
diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt
index dec4bb6e3d465..7afdf566f2fb5 100644
--- a/cpp/src/arrow/filesystem/CMakeLists.txt
+++ b/cpp/src/arrow/filesystem/CMakeLists.txt
@@ -47,9 +47,7 @@ if(ARROW_GCS)
                  EXTRA_LABELS
                  filesystem
                  EXTRA_LINK_LIBS
-                 google-cloud-cpp::storage
-                 Boost::filesystem
-                 Boost::system)
+                 google-cloud-cpp::storage)
 endif()
 
 if(ARROW_AZURE)
@@ -57,9 +55,7 @@ if(ARROW_AZURE)
                  EXTRA_LABELS
                  filesystem
                  EXTRA_LINK_LIBS
-                 ${AZURE_SDK_LINK_LIBRARIES}
-                 Boost::filesystem
-                 Boost::system)
+                 ${AZURE_SDK_LINK_LIBRARIES})
 endif()
 
 if(ARROW_S3)
@@ -75,11 +71,7 @@ if(ARROW_S3)
   else()
     list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS arrow_static)
   endif()
-  list(APPEND
-       ARROW_S3_TEST_EXTRA_LINK_LIBS
-       ${AWSSDK_LINK_LIBRARIES}
-       Boost::filesystem
-       Boost::system)
+  list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS ${AWSSDK_LINK_LIBRARIES})
   add_arrow_test(s3fs_test
                  SOURCES
                  s3fs_test.cc
@@ -122,9 +114,7 @@ if(ARROW_S3)
                         s3_test_util.cc
                         STATIC_LINK_LIBS
                         ${AWSSDK_LINK_LIBRARIES}
-                        ${ARROW_BENCHMARK_LINK_LIBS}
-                        Boost::filesystem
-                        Boost::system)
+                        ${ARROW_BENCHMARK_LINK_LIBS})
     if(ARROW_TEST_LINKAGE STREQUAL "static")
       target_link_libraries(arrow-filesystem-s3fs-benchmark PRIVATE parquet_static)
     else()
diff --git a/cpp/src/arrow/filesystem/api.h b/cpp/src/arrow/filesystem/api.h
index 562b7c1808ec1..7211ad5c2ccdb 100644
--- a/cpp/src/arrow/filesystem/api.h
+++ b/cpp/src/arrow/filesystem/api.h
@@ -21,14 +21,14 @@
 
 #include "arrow/filesystem/filesystem.h"  // IWYU pragma: export
 #ifdef ARROW_AZURE
-#include "arrow/filesystem/azurefs.h"  // IWYU pragma: export
+#  include "arrow/filesystem/azurefs.h"  // IWYU pragma: export
 #endif
 #ifdef ARROW_GCS
-#include "arrow/filesystem/gcsfs.h"  // IWYU pragma: export
+#  include "arrow/filesystem/gcsfs.h"  // IWYU pragma: export
 #endif
 #include "arrow/filesystem/hdfs.h"     // IWYU pragma: export
 #include "arrow/filesystem/localfs.h"  // IWYU pragma: export
 #include "arrow/filesystem/mockfs.h"   // IWYU pragma: export
 #ifdef ARROW_S3
-#include "arrow/filesystem/s3fs.h"  // IWYU pragma: export
+#  include "arrow/filesystem/s3fs.h"  // IWYU pragma: export
 #endif
diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index 0bad856339729..d407b1654f5b5 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -27,12 +27,12 @@
 // idenfity.hpp triggers -Wattributes warnings cause -Werror builds to fail,
 // so disable it for this file with pragmas.
 #if defined(__GNUC__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wattributes"
+#  pragma GCC diagnostic push
+#  pragma GCC diagnostic ignored "-Wattributes"
 #endif
 #include <azure/identity.hpp>
 #if defined(__GNUC__)
-#pragma GCC diagnostic pop
+#  pragma GCC diagnostic pop
 #endif
 #include <azure/storage/blobs.hpp>
 #include <azure/storage/files/datalake.hpp>
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index 9d437d1f83aac..a8dc923476752 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -15,24 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <algorithm>  // Missing include in boost/process
-
-// This boost/asio/io_context.hpp include is needless for no MinGW
-// build.
-//
-// This is for including boost/asio/detail/socket_types.hpp before any
-// "#include <windows.h>". boost/asio/detail/socket_types.hpp doesn't
-// work if windows.h is already included. boost/process.h ->
-// boost/process/args.hpp -> boost/process/detail/basic_cmd.hpp
-// includes windows.h. boost/process/args.hpp is included before
-// boost/process/async.h that includes
-// boost/asio/detail/socket_types.hpp implicitly is included.
-#include <boost/asio/io_context.hpp>
-// We need BOOST_USE_WINDOWS_H definition with MinGW when we use
-// boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in
-// cpp/cmake_modules/ThirdpartyToolchain.cmake for details.
-#include <boost/process.hpp>
-
 #include "arrow/filesystem/azurefs.h"
 #include "arrow/filesystem/azurefs_internal.h"
 
@@ -53,6 +35,7 @@
 #include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/process.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/future.h"
 #include "arrow/util/io_util.h"
@@ -67,7 +50,6 @@ namespace arrow {
 using internal::TemporaryDir;
 namespace fs {
 using internal::ConcatAbstractPath;
-namespace bp = boost::process;
 
 using ::testing::IsEmpty;
 using ::testing::Not;
@@ -174,42 +156,32 @@ class AzuriteEnv : public AzureEnvImpl<AzuriteEnv> {
  private:
   std::unique_ptr<TemporaryDir> temp_dir_;
   arrow::internal::PlatformFilename debug_log_path_;
-  bp::child server_process_;
+  std::unique_ptr<util::Process> server_process_;
 
   using AzureEnvImpl::AzureEnvImpl;
 
  public:
   static const AzureBackend kBackend = AzureBackend::kAzurite;
 
-  ~AzuriteEnv() override {
-    server_process_.terminate();
-    server_process_.wait();
-  }
+  ~AzuriteEnv() = default;
 
   static Result<std::unique_ptr<AzureEnvImpl>> Make() {
     auto self = std::unique_ptr<AzuriteEnv>(
         new AzuriteEnv("devstoreaccount1",
                        "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/"
                        "K1SZFPTOtr/KBHBeksoGMGw=="));
-    auto exe_path = bp::search_path("azurite");
-    if (exe_path.empty()) {
-      return Status::Invalid("Could not find Azurite emulator.");
-    }
+    self->server_process_ = std::make_unique<util::Process>();
+    ARROW_RETURN_NOT_OK(self->server_process_->SetExecutable("azurite"));
     ARROW_ASSIGN_OR_RAISE(self->temp_dir_, TemporaryDir::Make("azurefs-test-"));
     ARROW_ASSIGN_OR_RAISE(self->debug_log_path_,
                           self->temp_dir_->path().Join("debug.log"));
-    auto server_process = bp::child(
-        boost::this_process::environment(), exe_path, "--silent", "--location",
-        self->temp_dir_->path().ToString(), "--debug", self->debug_log_path_.ToString(),
-        // For old Azurite. We can't install the latest Azurite with
-        // old Node.js on old Ubuntu.
-        "--skipApiVersionCheck");
-    if (!server_process.valid() || !server_process.running()) {
-      server_process.terminate();
-      server_process.wait();
-      return Status::Invalid("Could not start Azurite emulator.");
-    }
-    self->server_process_ = std::move(server_process);
+    self->server_process_->SetArgs({"--silent", "--location",
+                                    self->temp_dir_->path().ToString(), "--debug",
+                                    self->debug_log_path_.ToString(),
+                                    // For old Azurite. We can't install the latest
+                                    // Azurite with old Node.js on old Ubuntu.
+                                    "--skipApiVersionCheck"});
+    ARROW_RETURN_NOT_OK(self->server_process_->Execute());
     return self;
   }
 
diff --git a/cpp/src/arrow/filesystem/filesystem.cc b/cpp/src/arrow/filesystem/filesystem.cc
index 284be685fa800..b5765010ec7e9 100644
--- a/cpp/src/arrow/filesystem/filesystem.cc
+++ b/cpp/src/arrow/filesystem/filesystem.cc
@@ -26,16 +26,16 @@
 
 #include "arrow/filesystem/filesystem.h"
 #ifdef ARROW_AZURE
-#include "arrow/filesystem/azurefs.h"
+#  include "arrow/filesystem/azurefs.h"
 #endif
 #ifdef ARROW_GCS
-#include "arrow/filesystem/gcsfs.h"
+#  include "arrow/filesystem/gcsfs.h"
 #endif
 #ifdef ARROW_HDFS
-#include "arrow/filesystem/hdfs.h"
+#  include "arrow/filesystem/hdfs.h"
 #endif
 #ifdef ARROW_S3
-#include "arrow/filesystem/s3fs.h"
+#  include "arrow/filesystem/s3fs.h"
 #endif
 #include "arrow/filesystem/localfs.h"
 #include "arrow/filesystem/mockfs.h"
diff --git a/cpp/src/arrow/filesystem/filesystem_test.cc b/cpp/src/arrow/filesystem/filesystem_test.cc
index 8477647b2cd73..5072c3a8c25b1 100644
--- a/cpp/src/arrow/filesystem/filesystem_test.cc
+++ b/cpp/src/arrow/filesystem/filesystem_test.cc
@@ -20,6 +20,7 @@
 #include <utility>
 #include <vector>
 
+#include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
 #include "arrow/filesystem/filesystem.h"
@@ -632,6 +633,13 @@ TEST_F(TestMockFS, FileSystemFromUri) {
   ASSERT_OK_AND_ASSIGN(fs_, FileSystemFromUri("mock:///foo/bar?q=zzz", &path));
   ASSERT_EQ(path, "foo/bar");
   CheckDirs({});
+  ASSERT_OK_AND_ASSIGN(fs_, FileSystemFromUri("mock:/folder+name/bar?q=zzz", &path));
+  ASSERT_EQ(path, "folder+name/bar");
+  CheckDirs({});
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, ::testing::HasSubstr("syntax error at character ' ' (position 12)"),
+      FileSystemFromUri("mock:/folder name/bar", &path));
+  CheckDirs({});
 }
 
 ////////////////////////////////////////////////////////////////////////////
diff --git a/cpp/src/arrow/filesystem/gcsfs_test.cc b/cpp/src/arrow/filesystem/gcsfs_test.cc
index a6022a8d21681..d4d5edf4b8993 100644
--- a/cpp/src/arrow/filesystem/gcsfs_test.cc
+++ b/cpp/src/arrow/filesystem/gcsfs_test.cc
@@ -15,26 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <algorithm>  // Missing include in boost/process
-
-#define BOOST_NO_CXX98_FUNCTION_BASE  // ARROW-17805
-// This boost/asio/io_context.hpp include is needless for no MinGW
-// build.
-//
-// This is for including boost/asio/detail/socket_types.hpp before any
-// "#include <windows.h>". boost/asio/detail/socket_types.hpp doesn't
-// work if windows.h is already included. boost/process.h ->
-// boost/process/args.hpp -> boost/process/detail/basic_cmd.hpp
-// includes windows.h. boost/process/args.hpp is included before
-// boost/process/async.h that includes
-// boost/asio/detail/socket_types.hpp implicitly is included.
-#include <boost/asio/io_context.hpp>
-// We need BOOST_USE_WINDOWS_H definition with MinGW when we use
-// boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in
-// cpp/cmake_modules/ThirdpartyToolchain.cmake for details.
-#include <boost/process.hpp>
-#include <boost/thread.hpp>
-
 #include "arrow/filesystem/gcsfs.h"
 
 #include <absl/time/time.h>
@@ -45,16 +25,15 @@
 #include <google/cloud/storage/options.h>
 #include <gtest/gtest.h>
 
-#include <array>
 #include <random>
 #include <string>
-#include <thread>
 
 #include "arrow/filesystem/gcsfs_internal.h"
 #include "arrow/filesystem/path_util.h"
 #include "arrow/filesystem/test_util.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/matchers.h"
+#include "arrow/testing/process.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/future.h"
 #include "arrow/util/key_value_metadata.h"
@@ -64,7 +43,6 @@ namespace fs {
 
 namespace {
 
-namespace bp = boost::process;
 namespace gc = google::cloud;
 namespace gcs = google::cloud::storage;
 
@@ -89,70 +67,62 @@ class GcsTestbench : public ::testing::Environment {
  public:
   GcsTestbench() {
     port_ = std::to_string(GetListenPort());
-    std::vector<std::string> names{"python3", "python"};
-    // If the build script or application developer provides a value in the PYTHON
-    // environment variable, then just use that.
-    if (const auto* env = std::getenv("PYTHON")) {
-      names = {env};
+    auto error = std::string("Could not start GCS emulator 'storage-testbench'");
+    auto server_process = std::make_unique<util::Process>();
+    auto status = server_process->SetExecutable("storage-testbench");
+    if (!status.ok()) {
+      error += ": " + status.ToString();
+      error_ = std::move(error);
+      return;
+    }
+
+    server_process->SetArgs({"--port", port_});
+    server_process->IgnoreStderr();
+    status = server_process->Execute();
+    if (!status.ok()) {
+      error += ": " + status.ToString();
+      error_ = std::move(error);
+      return;
     }
-    auto error = std::string(
-        "Could not start GCS emulator."
-        " Used the following list of python interpreter names:");
-    for (const auto& interpreter : names) {
-      auto exe_path = bp::search_path(interpreter);
-      error += " " + interpreter;
-      if (exe_path.empty()) {
-        error += " (exe not found)";
-        continue;
-      }
 
-      bp::ipstream output;
-      server_process_ = bp::child(exe_path, "-m", "testbench", "--port", port_, group_,
-                                  bp::std_err > output);
-      // Wait for message: "* Restarting with"
-      auto testbench_is_running = [&output, this](bp::child& process) {
-        std::string line;
-        std::chrono::time_point<std::chrono::steady_clock> end =
-            std::chrono::steady_clock::now() + std::chrono::seconds(10);
-        while (server_process_.valid() && server_process_.running() &&
-               std::chrono::steady_clock::now() < end) {
-          if (output.peek() && std::getline(output, line)) {
-            std::cerr << line << std::endl;
-            if (line.find("* Restarting with") != std::string::npos) return true;
-          } else {
-            std::this_thread::sleep_for(std::chrono::milliseconds(20));
-          }
+    auto testbench_is_running = [&server_process, this]() {
+      auto ready_timeout = std::chrono::seconds(10);
+      std::chrono::time_point<std::chrono::steady_clock> end =
+          std::chrono::steady_clock::now() + ready_timeout;
+      while (server_process->IsRunning() && std::chrono::steady_clock::now() < end) {
+        auto client = gcs::Client(
+            google::cloud::Options{}
+                .set<gcs::RestEndpointOption>("http://127.0.0.1:" + port_)
+                .set<gc::UnifiedCredentialsOption>(gc::MakeInsecureCredentials())
+                .set<gcs::RetryPolicyOption>(
+                    gcs::LimitedTimeRetryPolicy(ready_timeout).clone()));
+        auto metadata = client.GetBucketMetadata("nonexistent");
+        if (metadata.status().code() == google::cloud::StatusCode::kNotFound) {
+          return true;
         }
-        return false;
-      };
+      }
+      return false;
+    };
 
-      if (testbench_is_running(server_process_)) break;
-      error += " (failed to start)";
-      server_process_.terminate();
-      server_process_.wait();
+    if (!testbench_is_running()) {
+      error += " (failed to listen)";
+      error_ = std::move(error);
+      return;
     }
-    if (server_process_.valid() && server_process_.valid()) return;
-    error_ = std::move(error);
+
+    server_process_ = std::move(server_process);
   }
 
-  bool running() { return server_process_.running(); }
+  bool running() { return server_process_ && server_process_->IsRunning(); }
 
-  ~GcsTestbench() override {
-    // Brutal shutdown, kill the full process group because the GCS testbench may launch
-    // additional children.
-    group_.terminate();
-    if (server_process_.valid()) {
-      server_process_.wait();
-    }
-  }
+  ~GcsTestbench() = default;
 
   const std::string& port() const { return port_; }
   const std::string& error() const { return error_; }
 
  private:
   std::string port_;
-  bp::child server_process_;
-  bp::group group_;
+  std::unique_ptr<util::Process> server_process_;
   std::string error_;
 };
 
diff --git a/cpp/src/arrow/filesystem/localfs.cc b/cpp/src/arrow/filesystem/localfs.cc
index 22d802d8f9f7f..9fe19cbf25058 100644
--- a/cpp/src/arrow/filesystem/localfs.cc
+++ b/cpp/src/arrow/filesystem/localfs.cc
@@ -22,12 +22,12 @@
 #include <utility>
 
 #ifdef _WIN32
-#include "arrow/util/windows_compatibility.h"
+#  include "arrow/util/windows_compatibility.h"
 #else
-#include <fcntl.h>
-#include <sys/stat.h>
-#include <cerrno>
-#include <cstdio>
+#  include <fcntl.h>
+#  include <sys/stat.h>
+#  include <cerrno>
+#  include <cstdio>
 #endif
 
 #include "arrow/filesystem/filesystem.h"
@@ -157,12 +157,12 @@ FileInfo StatToFileInfo(const struct stat& s) {
     info.set_type(FileType::Unknown);
     info.set_size(kNoSize);
   }
-#ifdef __APPLE__
+#  ifdef __APPLE__
   // macOS doesn't use the POSIX-compliant spelling
   info.set_mtime(ToTimePoint(s.st_mtimespec));
-#else
+#  else
   info.set_mtime(ToTimePoint(s.st_mtim));
-#endif
+#  endif
   return info;
 }
 
diff --git a/cpp/src/arrow/filesystem/s3_test_util.cc b/cpp/src/arrow/filesystem/s3_test_util.cc
index eb29a677dae9e..db0c60f2e80f2 100644
--- a/cpp/src/arrow/filesystem/s3_test_util.cc
+++ b/cpp/src/arrow/filesystem/s3_test_util.cc
@@ -15,33 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <algorithm>  // Missing include in boost/process
-
 #ifndef _WIN32
-#include <sys/wait.h>
-#endif
-
-// This boost/asio/io_context.hpp include is needless for no MinGW
-// build.
-//
-// This is for including boost/asio/detail/socket_types.hpp before any
-// "#include <windows.h>". boost/asio/detail/socket_types.hpp doesn't
-// work if windows.h is already included. boost/process.h ->
-// boost/process/args.hpp -> boost/process/detail/basic_cmd.hpp
-// includes windows.h. boost/process/args.hpp is included before
-// boost/process/async.h that includes
-// boost/asio/detail/socket_types.hpp implicitly is included.
-#ifdef __MINGW32__
-#include <boost/asio/io_context.hpp>
+#  include <sys/wait.h>
 #endif
-#define BOOST_NO_CXX98_FUNCTION_BASE  // ARROW-17805
-// We need BOOST_USE_WINDOWS_H definition with MinGW when we use
-// boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in
-// cpp/cmake_modules/ThirdpartyToolchain.cmake for details.
-#include <boost/process.hpp>
 
 #include "arrow/filesystem/s3_test_util.h"
 #include "arrow/filesystem/s3fs.h"
+#include "arrow/testing/process.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/async_generator.h"
 #include "arrow/util/future.h"
@@ -53,8 +33,6 @@ namespace fs {
 
 using ::arrow::internal::TemporaryDir;
 
-namespace bp = boost::process;
-
 namespace {
 
 const char* kMinioExecutableName = "minio";
@@ -75,7 +53,7 @@ struct MinioTestServer::Impl {
   std::string connect_string_;
   std::string access_key_ = kMinioAccessKey;
   std::string secret_key_ = kMinioSecretKey;
-  std::shared_ptr<::boost::process::child> server_process_;
+  std::unique_ptr<util::Process> server_process_;
 };
 
 MinioTestServer::MinioTestServer() : impl_(new Impl) {}
@@ -105,44 +83,23 @@ Status MinioTestServer::Start() {
 
   ARROW_ASSIGN_OR_RAISE(impl_->temp_dir_, TemporaryDir::Make("s3fs-test-"));
 
-  // Get a copy of the current environment.
-  // (NOTE: using "auto" would return a native_environment that mutates
-  //  the current environment)
-  bp::environment env = boost::this_process::environment();
-  env["MINIO_ACCESS_KEY"] = kMinioAccessKey;
-  env["MINIO_SECRET_KEY"] = kMinioSecretKey;
+  impl_->server_process_ = std::make_unique<util::Process>();
+  impl_->server_process_->SetEnv("MINIO_ACCESS_KEY", kMinioAccessKey);
+  impl_->server_process_->SetEnv("MINIO_SECRET_KEY", kMinioSecretKey);
   // Disable the embedded console (one less listening address to care about)
-  env["MINIO_BROWSER"] = "off";
-
+  impl_->server_process_->SetEnv("MINIO_BROWSER", "off");
   impl_->connect_string_ = GenerateConnectString();
-  auto exe_path = bp::search_path(kMinioExecutableName);
-  if (exe_path.empty()) {
-    return Status::IOError("Failed to find minio executable ('", kMinioExecutableName,
-                           "') in PATH");
-  }
-
-  try {
-    // NOTE: --quiet makes startup faster by suppressing remote version check
-    impl_->server_process_ = std::make_shared<bp::child>(
-        env, exe_path, "server", "--quiet", "--compat", "--address",
-        impl_->connect_string_, impl_->temp_dir_->path().ToString());
-  } catch (const std::exception& e) {
-    return Status::IOError("Failed to launch Minio server: ", e.what());
-  }
+  ARROW_RETURN_NOT_OK(impl_->server_process_->SetExecutable(kMinioExecutableName));
+  // NOTE: --quiet makes startup faster by suppressing remote version check
+  impl_->server_process_->SetArgs({"server", "--quiet", "--compat", "--address",
+                                   impl_->connect_string_,
+                                   impl_->temp_dir_->path().ToString()});
+  ARROW_RETURN_NOT_OK(impl_->server_process_->Execute());
   return Status::OK();
 }
 
 Status MinioTestServer::Stop() {
-  if (impl_->server_process_ && impl_->server_process_->valid()) {
-    // Brutal shutdown
-    impl_->server_process_->terminate();
-    impl_->server_process_->wait();
-#ifndef _WIN32
-    // Despite calling wait() above, boost::process fails to clear zombies
-    // so do it ourselves.
-    waitpid(impl_->server_process_->id(), nullptr, 0);
-#endif
-  }
+  impl_->server_process_ = nullptr;
   return Status::OK();
 }
 
diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index fd5b2e5be2a3a..96c771aeb61b8 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -34,12 +34,12 @@
 
 #ifdef _WIN32
 // Undefine preprocessor macros that interfere with AWS function / method names
-#ifdef GetMessage
-#undef GetMessage
-#endif
-#ifdef GetObject
-#undef GetObject
-#endif
+#  ifdef GetMessage
+#    undef GetMessage
+#  endif
+#  ifdef GetObject
+#    undef GetObject
+#  endif
 #endif
 
 #include <aws/core/Aws.h>
@@ -84,13 +84,13 @@
 // Redundant "(...)" are for suppressing "Weird number of spaces at
 // line-start. Are you using a 2-space indent? [whitespace/indent]
 // [3]" errors...
-#define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch)                      \
-  ((AWS_SDK_VERSION_MAJOR > (major) ||                                        \
-    (AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR > (minor)) ||  \
-    ((AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR == (minor) && \
-      AWS_SDK_VERSION_PATCH >= (patch)))))
+#  define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch)                      \
+    ((AWS_SDK_VERSION_MAJOR > (major) ||                                        \
+      (AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR > (minor)) ||  \
+      ((AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR == (minor) && \
+        AWS_SDK_VERSION_PATCH >= (patch)))))
 #else
-#define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch) 0
+#  define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch) 0
 #endif
 
 // This feature is available since 1.9.0 but
@@ -98,22 +98,22 @@
 // we can't use this feature for [1.9.0,1.9.6]. If it's a problem,
 // please report it to our issue tracker.
 #if ARROW_AWS_SDK_VERSION_CHECK(1, 9, 0)
-#define ARROW_S3_HAS_CRT
+#  define ARROW_S3_HAS_CRT
 #endif
 
 #if ARROW_AWS_SDK_VERSION_CHECK(1, 10, 0)
-#define ARROW_S3_HAS_S3CLIENT_CONFIGURATION
+#  define ARROW_S3_HAS_S3CLIENT_CONFIGURATION
 #endif
 
 #ifdef ARROW_S3_HAS_CRT
-#include <aws/crt/io/Bootstrap.h>
-#include <aws/crt/io/EventLoopGroup.h>
-#include <aws/crt/io/HostResolver.h>
+#  include <aws/crt/io/Bootstrap.h>
+#  include <aws/crt/io/EventLoopGroup.h>
+#  include <aws/crt/io/HostResolver.h>
 #endif
 
 #ifdef ARROW_S3_HAS_S3CLIENT_CONFIGURATION
-#include <aws/s3/S3ClientConfiguration.h>
-#include <aws/s3/S3EndpointProvider.h>
+#  include <aws/s3/S3ClientConfiguration.h>
+#  include <aws/s3/S3EndpointProvider.h>
 #endif
 
 #include "arrow/util/windows_fixup.h"
diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc
index c33fa4f5aac97..82a7d6e546ef3 100644
--- a/cpp/src/arrow/filesystem/s3fs_test.cc
+++ b/cpp/src/arrow/filesystem/s3fs_test.cc
@@ -28,12 +28,12 @@
 
 #ifdef _WIN32
 // Undefine preprocessor macros that interfere with AWS function / method names
-#ifdef GetMessage
-#undef GetMessage
-#endif
-#ifdef GetObject
-#undef GetObject
-#endif
+#  ifdef GetMessage
+#    undef GetMessage
+#  endif
+#  ifdef GetObject
+#    undef GetObject
+#  endif
 #endif
 
 #include <aws/core/Aws.h>
diff --git a/cpp/src/arrow/flight/CMakeLists.txt b/cpp/src/arrow/flight/CMakeLists.txt
index 98f93705f6f56..b12476ac3893a 100644
--- a/cpp/src/arrow/flight/CMakeLists.txt
+++ b/cpp/src/arrow/flight/CMakeLists.txt
@@ -26,6 +26,12 @@ endif()
 if(WIN32)
   list(APPEND ARROW_FLIGHT_LINK_LIBS ws2_32.lib)
 endif()
+# Updating the MACOSX_DEPLOYMENT_TARGET to 12 required us to explicitly
+# link Flight with OpenSSL on macOS. Read this comment for more details:
+# https://github.com/apache/arrow/pull/43137#pullrequestreview-2267476893
+if(APPLE AND ARROW_USE_OPENSSL)
+  list(APPEND ARROW_FLIGHT_LINK_LIBS ${ARROW_OPENSSL_LIBS})
+endif()
 
 set(ARROW_FLIGHT_TEST_LINKAGE "${ARROW_TEST_LINKAGE}")
 if(Protobuf_USE_STATIC_LIBS)
@@ -64,11 +70,6 @@ if(ARROW_BUILD_BENCHMARKS
     endif()
   endif()
 endif()
-list(APPEND
-     ARROW_FLIGHT_TEST_INTERFACE_LIBS
-     Boost::headers
-     Boost::filesystem
-     Boost::system)
 list(APPEND ARROW_FLIGHT_TEST_LINK_LIBS gRPC::grpc++)
 
 # TODO(wesm): Protobuf shared vs static linking
diff --git a/cpp/src/arrow/flight/client.h b/cpp/src/arrow/flight/client.h
index 613903108949e..ae6011b117aa7 100644
--- a/cpp/src/arrow/flight/client.h
+++ b/cpp/src/arrow/flight/client.h
@@ -146,8 +146,8 @@ class ARROW_FLIGHT_EXPORT FlightStreamReader : public MetadataRecordBatchReader
 // Silence warning
 // "non dll-interface class RecordBatchReader used as base for dll-interface class"
 #ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4275)
+#  pragma warning(push)
+#  pragma warning(disable : 4275)
 #endif
 
 /// \brief A RecordBatchWriter that also allows sending
@@ -163,7 +163,7 @@ class ARROW_FLIGHT_EXPORT FlightStreamWriter : public MetadataRecordBatchWriter
 };
 
 #ifdef _MSC_VER
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
 /// \brief A reader for application-specific metadata sent back to the
diff --git a/cpp/src/arrow/flight/client_tracing_middleware.cc b/cpp/src/arrow/flight/client_tracing_middleware.cc
index a45784bd31ecd..9433ed48509aa 100644
--- a/cpp/src/arrow/flight/client_tracing_middleware.cc
+++ b/cpp/src/arrow/flight/client_tracing_middleware.cc
@@ -25,8 +25,8 @@
 #include "arrow/util/tracing_internal.h"
 
 #ifdef ARROW_WITH_OPENTELEMETRY
-#include <opentelemetry/context/propagation/global_propagator.h>
-#include <opentelemetry/context/propagation/text_map_propagator.h>
+#  include <opentelemetry/context/propagation/global_propagator.h>
+#  include <opentelemetry/context/propagation/text_map_propagator.h>
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/flight/cookie_internal.cc b/cpp/src/arrow/flight/cookie_internal.cc
index 75a10d148bf47..99fa8b238ddc8 100644
--- a/cpp/src/arrow/flight/cookie_internal.cc
+++ b/cpp/src/arrow/flight/cookie_internal.cc
@@ -28,7 +28,7 @@
 
 // Mingw-w64 defines strcasecmp in string.h
 #if defined(_WIN32) && !defined(strcasecmp)
-#define strcasecmp stricmp
+#  define strcasecmp stricmp
 #endif
 
 #include <algorithm>
diff --git a/cpp/src/arrow/flight/flight_benchmark.cc b/cpp/src/arrow/flight/flight_benchmark.cc
index 057ef15c3c7ae..1383788e08233 100644
--- a/cpp/src/arrow/flight/flight_benchmark.cc
+++ b/cpp/src/arrow/flight/flight_benchmark.cc
@@ -40,11 +40,11 @@
 #include "arrow/flight/test_util.h"
 
 #ifdef ARROW_CUDA
-#include <cuda.h>
-#include "arrow/gpu/cuda_api.h"
+#  include <cuda.h>
+#  include "arrow/gpu/cuda_api.h"
 #endif
 #ifdef ARROW_WITH_UCX
-#include "arrow/flight/transport/ucx/ucx.h"
+#  include "arrow/flight/transport/ucx/ucx.h"
 #endif
 
 DEFINE_bool(cuda, false, "Allocate results in CUDA memory");
@@ -491,7 +491,7 @@ int main(int argc, char** argv) {
         if (FLAGS_cuda && FLAGS_test_put) {
           server_args.push_back("-cuda");
         }
-        server->Start(server_args);
+        ABORT_NOT_OK(server->Start(server_args));
       }
       std::cout << "Server host: " << FLAGS_server_host << std::endl
                 << "Server port: " << FLAGS_server_port << std::endl;
diff --git a/cpp/src/arrow/flight/flight_test.cc b/cpp/src/arrow/flight/flight_test.cc
index 3d52bc3f5ae06..863f21f8db5e4 100644
--- a/cpp/src/arrow/flight/flight_test.cc
+++ b/cpp/src/arrow/flight/flight_test.cc
@@ -44,7 +44,7 @@
 #include "arrow/util/logging.h"
 
 #ifdef GRPCPP_GRPCPP_H
-#error "gRPC headers should not be in public API"
+#  error "gRPC headers should not be in public API"
 #endif
 
 #include <grpcpp/grpcpp.h>
@@ -71,11 +71,11 @@
 // > between the two different versions of Abseil.
 #include "arrow/util/tracing_internal.h"
 #ifdef ARROW_WITH_OPENTELEMETRY
-#include <opentelemetry/context/propagation/global_propagator.h>
-#include <opentelemetry/context/propagation/text_map_propagator.h>
-#include <opentelemetry/sdk/trace/processor.h>
-#include <opentelemetry/sdk/trace/tracer_provider.h>
-#include <opentelemetry/trace/propagation/http_trace_context.h>
+#  include <opentelemetry/context/propagation/global_propagator.h>
+#  include <opentelemetry/context/propagation/text_map_propagator.h>
+#  include <opentelemetry/sdk/trace/processor.h>
+#  include <opentelemetry/sdk/trace/tracer_provider.h>
+#  include <opentelemetry/trace/propagation/http_trace_context.h>
 #endif
 
 namespace arrow {
@@ -204,7 +204,7 @@ ARROW_FLIGHT_TEST_ASYNC_CLIENT(GrpcAsyncClientTest);
 
 TEST(TestFlight, ConnectUri) {
   TestServer server("flight-test-server");
-  server.Start();
+  ASSERT_OK(server.Start());
   ASSERT_TRUE(server.IsRunning());
 
   std::stringstream ss;
@@ -230,7 +230,7 @@ TEST(TestFlight, InvalidUriScheme) {
 #ifndef _WIN32
 TEST(TestFlight, ConnectUriUnix) {
   TestServer server("flight-test-server", "/tmp/flight-test.sock");
-  server.Start();
+  ASSERT_OK(server.Start());
   ASSERT_TRUE(server.IsRunning());
 
   std::stringstream ss;
diff --git a/cpp/src/arrow/flight/otel_logging.h b/cpp/src/arrow/flight/otel_logging.h
index 9a91e5d99ce7d..d1e8cbb6fcc64 100644
--- a/cpp/src/arrow/flight/otel_logging.h
+++ b/cpp/src/arrow/flight/otel_logging.h
@@ -20,9 +20,9 @@
 #include "arrow/util/config.h"
 
 #ifdef ARROW_WITH_OPENTELEMETRY
-#include "arrow/status.h"
-#include "arrow/telemetry/logging.h"
-#include "arrow/util/macros.h"
+#  include "arrow/status.h"
+#  include "arrow/telemetry/logging.h"
+#  include "arrow/util/macros.h"
 
 namespace arrow::flight {
 
diff --git a/cpp/src/arrow/flight/otel_logging_internal.h b/cpp/src/arrow/flight/otel_logging_internal.h
index 52602f0fe8aa5..426692297c362 100644
--- a/cpp/src/arrow/flight/otel_logging_internal.h
+++ b/cpp/src/arrow/flight/otel_logging_internal.h
@@ -21,8 +21,8 @@
 
 #include "arrow/util/macros.h"
 #ifdef ARROW_WITH_OPENTELEMETRY
-#include "arrow/flight/otel_logging.h"
-#include "arrow/util/logger.h"
+#  include "arrow/flight/otel_logging.h"
+#  include "arrow/util/logger.h"
 
 namespace arrow::flight::internal {
 
@@ -33,24 +33,24 @@ ARROW_EXPORT std::shared_ptr<util::Logger> GetOtelSqlServerLogger();
 
 }  // namespace arrow::flight::internal
 
-#define ARROW_FLIGHT_OTELLOG_CLIENT(LEVEL, ...)                                  \
-  ARROW_LOGGER_CALL(::arrow::flight::internal::GetOtelGrpcClientLogger(), LEVEL, \
-                    __VA_ARGS__)
-#define ARROW_FLIGHT_OTELLOG_SERVER(LEVEL, ...)                                  \
-  ARROW_LOGGER_CALL(::arrow::flight::internal::GetOtelGrpcServerLogger(), LEVEL, \
-                    __VA_ARGS__)
-#define ARROW_FLIGHT_OTELLOG_SQL_CLIENT(LEVEL, ...)                             \
-  ARROW_LOGGER_CALL(::arrow::flight::internal::GetOtelSqlClientLogger(), LEVEL, \
-                    __VA_ARGS__)
-#define ARROW_FLIGHT_OTELLOG_SQL_SERVER(LEVEL, ...)                             \
-  ARROW_LOGGER_CALL(::arrow::flight::internal::GetOtelSqlServerLogger(), LEVEL, \
-                    __VA_ARGS__)
+#  define ARROW_FLIGHT_OTELLOG_CLIENT(LEVEL, ...)                                  \
+    ARROW_LOGGER_CALL(::arrow::flight::internal::GetOtelGrpcClientLogger(), LEVEL, \
+                      __VA_ARGS__)
+#  define ARROW_FLIGHT_OTELLOG_SERVER(LEVEL, ...)                                  \
+    ARROW_LOGGER_CALL(::arrow::flight::internal::GetOtelGrpcServerLogger(), LEVEL, \
+                      __VA_ARGS__)
+#  define ARROW_FLIGHT_OTELLOG_SQL_CLIENT(LEVEL, ...)                             \
+    ARROW_LOGGER_CALL(::arrow::flight::internal::GetOtelSqlClientLogger(), LEVEL, \
+                      __VA_ARGS__)
+#  define ARROW_FLIGHT_OTELLOG_SQL_SERVER(LEVEL, ...)                             \
+    ARROW_LOGGER_CALL(::arrow::flight::internal::GetOtelSqlServerLogger(), LEVEL, \
+                      __VA_ARGS__)
 
 #else
 
-#define ARROW_FLIGHT_OTELLOG_CLIENT(LEVEL, ...) ARROW_UNUSED(0)
-#define ARROW_FLIGHT_OTELLOG_SERVER(LEVEL, ...) ARROW_UNUSED(0)
-#define ARROW_FLIGHT_OTELLOG_SQL_CLIENT(LEVEL, ...) ARROW_UNUSED(0)
-#define ARROW_FLIGHT_OTELLOG_SQL_SERVER(LEVEL, ...) ARROW_UNUSED(0)
+#  define ARROW_FLIGHT_OTELLOG_CLIENT(LEVEL, ...) ARROW_UNUSED(0)
+#  define ARROW_FLIGHT_OTELLOG_SERVER(LEVEL, ...) ARROW_UNUSED(0)
+#  define ARROW_FLIGHT_OTELLOG_SQL_CLIENT(LEVEL, ...) ARROW_UNUSED(0)
+#  define ARROW_FLIGHT_OTELLOG_SQL_SERVER(LEVEL, ...) ARROW_UNUSED(0)
 
 #endif
diff --git a/cpp/src/arrow/flight/perf_server.cc b/cpp/src/arrow/flight/perf_server.cc
index 87676da11213d..e6477edd7050a 100644
--- a/cpp/src/arrow/flight/perf_server.cc
+++ b/cpp/src/arrow/flight/perf_server.cc
@@ -42,10 +42,10 @@
 #include "arrow/flight/test_util.h"
 
 #ifdef ARROW_CUDA
-#include "arrow/gpu/cuda_api.h"
+#  include "arrow/gpu/cuda_api.h"
 #endif
 #ifdef ARROW_WITH_UCX
-#include "arrow/flight/transport/ucx/ucx.h"
+#  include "arrow/flight/transport/ucx/ucx.h"
 #endif
 
 DEFINE_bool(cuda, false, "Allocate results in CUDA memory");
diff --git a/cpp/src/arrow/flight/platform.h b/cpp/src/arrow/flight/platform.h
index 8f8db2d2dc805..498c87c5b7dc9 100644
--- a/cpp/src/arrow/flight/platform.h
+++ b/cpp/src/arrow/flight/platform.h
@@ -24,7 +24,7 @@
 // The protobuf documentation says that C4251 warnings when using the
 // library are spurious and suppressed when the build the library and
 // compiler, but must be also suppressed in downstream projects
-#pragma warning(disable : 4251)
+#  pragma warning(disable : 4251)
 
 #endif  // _MSC_VER
 
diff --git a/cpp/src/arrow/flight/server.cc b/cpp/src/arrow/flight/server.cc
index 06512bda36a49..adbdfb85f29e6 100644
--- a/cpp/src/arrow/flight/server.cc
+++ b/cpp/src/arrow/flight/server.cc
@@ -47,7 +47,7 @@ namespace flight {
 
 namespace {
 #if (ATOMIC_INT_LOCK_FREE != 2 || ATOMIC_POINTER_LOCK_FREE != 2)
-#error "atomic ints and atomic pointers not always lock-free!"
+#  error "atomic ints and atomic pointers not always lock-free!"
 #endif
 
 using ::arrow::internal::SelfPipe;
diff --git a/cpp/src/arrow/flight/server_tracing_middleware.cc b/cpp/src/arrow/flight/server_tracing_middleware.cc
index 02520cb66fd0e..6884308c7ff48 100644
--- a/cpp/src/arrow/flight/server_tracing_middleware.cc
+++ b/cpp/src/arrow/flight/server_tracing_middleware.cc
@@ -27,11 +27,11 @@
 #include "arrow/util/tracing_internal.h"
 
 #ifdef ARROW_WITH_OPENTELEMETRY
-#include <opentelemetry/context/propagation/global_propagator.h>
-#include <opentelemetry/context/propagation/text_map_propagator.h>
-#include <opentelemetry/trace/context.h>
-#include <opentelemetry/trace/propagation/http_trace_context.h>
-#include <opentelemetry/trace/semantic_conventions.h>
+#  include <opentelemetry/context/propagation/global_propagator.h>
+#  include <opentelemetry/context/propagation/text_map_propagator.h>
+#  include <opentelemetry/trace/context.h>
+#  include <opentelemetry/trace/propagation/http_trace_context.h>
+#  include <opentelemetry/trace/semantic_conventions.h>
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/flight/sql/test_app_cli.cc b/cpp/src/arrow/flight/sql/test_app_cli.cc
index 194ecf5e57808..c5606a605e018 100644
--- a/cpp/src/arrow/flight/sql/test_app_cli.cc
+++ b/cpp/src/arrow/flight/sql/test_app_cli.cc
@@ -35,16 +35,16 @@
 #include "arrow/table.h"
 
 #ifdef ARROW_WITH_OPENTELEMETRY
-#include "arrow/flight/otel_logging.h"
-#include "arrow/util/tracing_internal.h"
-
-#include <opentelemetry/context/propagation/global_propagator.h>
-#include <opentelemetry/context/propagation/text_map_propagator.h>
-#include <opentelemetry/sdk/trace/processor.h>
-#include <opentelemetry/sdk/trace/tracer_provider.h>
-#include <opentelemetry/trace/propagation/http_trace_context.h>
-#include <opentelemetry/trace/provider.h>
-#include <opentelemetry/trace/scope.h>
+#  include "arrow/flight/otel_logging.h"
+#  include "arrow/util/tracing_internal.h"
+
+#  include <opentelemetry/context/propagation/global_propagator.h>
+#  include <opentelemetry/context/propagation/text_map_propagator.h>
+#  include <opentelemetry/sdk/trace/processor.h>
+#  include <opentelemetry/sdk/trace/tracer_provider.h>
+#  include <opentelemetry/trace/propagation/http_trace_context.h>
+#  include <opentelemetry/trace/provider.h>
+#  include <opentelemetry/trace/scope.h>
 #endif
 
 using arrow::Result;
diff --git a/cpp/src/arrow/flight/sql/test_server_cli.cc b/cpp/src/arrow/flight/sql/test_server_cli.cc
index a8124140497c6..b632851a1f97c 100644
--- a/cpp/src/arrow/flight/sql/test_server_cli.cc
+++ b/cpp/src/arrow/flight/sql/test_server_cli.cc
@@ -31,12 +31,12 @@
 #include "arrow/util/logging.h"
 
 #ifdef ARROW_WITH_OPENTELEMETRY
-#include "arrow/flight/otel_logging.h"
-#include "arrow/util/tracing_internal.h"
+#  include "arrow/flight/otel_logging.h"
+#  include "arrow/util/tracing_internal.h"
 
-#include <opentelemetry/context/propagation/global_propagator.h>
-#include <opentelemetry/context/propagation/text_map_propagator.h>
-#include <opentelemetry/trace/propagation/http_trace_context.h>
+#  include <opentelemetry/context/propagation/global_propagator.h>
+#  include <opentelemetry/context/propagation/text_map_propagator.h>
+#  include <opentelemetry/trace/propagation/http_trace_context.h>
 #endif
 
 DEFINE_int32(port, 31337, "Server port to listen on");
diff --git a/cpp/src/arrow/flight/sql/visibility.h b/cpp/src/arrow/flight/sql/visibility.h
index 2074815e0a246..cdd8fd953d19f 100644
--- a/cpp/src/arrow/flight/sql/visibility.h
+++ b/cpp/src/arrow/flight/sql/visibility.h
@@ -18,31 +18,31 @@
 #pragma once
 
 #if defined(_WIN32) || defined(__CYGWIN__)
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(push)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#ifdef ARROW_FLIGHT_SQL_STATIC
-#define ARROW_FLIGHT_SQL_EXPORT
-#elif defined(ARROW_FLIGHT_SQL_EXPORTING)
-#define ARROW_FLIGHT_SQL_EXPORT __declspec(dllexport)
-#else
-#define ARROW_FLIGHT_SQL_EXPORT __declspec(dllimport)
-#endif
+#  ifdef ARROW_FLIGHT_SQL_STATIC
+#    define ARROW_FLIGHT_SQL_EXPORT
+#  elif defined(ARROW_FLIGHT_SQL_EXPORTING)
+#    define ARROW_FLIGHT_SQL_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_FLIGHT_SQL_EXPORT __declspec(dllimport)
+#  endif
 
-#define ARROW_FLIGHT_SQL_NO_EXPORT
+#  define ARROW_FLIGHT_SQL_NO_EXPORT
 #else  // Not Windows
-#ifndef ARROW_FLIGHT_SQL_EXPORT
-#define ARROW_FLIGHT_SQL_EXPORT __attribute__((visibility("default")))
-#endif
-#ifndef ARROW_FLIGHT_SQL_NO_EXPORT
-#define ARROW_FLIGHT_SQL_NO_EXPORT __attribute__((visibility("hidden")))
-#endif
+#  ifndef ARROW_FLIGHT_SQL_EXPORT
+#    define ARROW_FLIGHT_SQL_EXPORT __attribute__((visibility("default")))
+#  endif
+#  ifndef ARROW_FLIGHT_SQL_NO_EXPORT
+#    define ARROW_FLIGHT_SQL_NO_EXPORT __attribute__((visibility("hidden")))
+#  endif
 #endif  // Non-Windows
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
diff --git a/cpp/src/arrow/flight/test_definitions.cc b/cpp/src/arrow/flight/test_definitions.cc
index 273d394c288d9..ea6576088f2f5 100644
--- a/cpp/src/arrow/flight/test_definitions.cc
+++ b/cpp/src/arrow/flight/test_definitions.cc
@@ -42,7 +42,7 @@
 #include "gmock/gmock.h"
 
 #if defined(ARROW_CUDA)
-#include "arrow/gpu/cuda_api.h"
+#  include "arrow/gpu/cuda_api.h"
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/flight/test_util.cc b/cpp/src/arrow/flight/test_util.cc
index 127827ff38cdd..aa10d9a7da822 100644
--- a/cpp/src/arrow/flight/test_util.cc
+++ b/cpp/src/arrow/flight/test_util.cc
@@ -17,11 +17,6 @@
 
 #include "arrow/flight/test_util.h"
 
-#ifdef __APPLE__
-#include <limits.h>
-#include <mach-o/dyld.h>
-#endif
-
 #include <algorithm>
 #include <cstdlib>
 #include <fstream>
@@ -31,18 +26,13 @@
 #include "arrow/util/windows_compatibility.h"
 
 #include <gtest/gtest.h>
-#include <boost/filesystem.hpp>
-#define BOOST_NO_CXX98_FUNCTION_BASE  // ARROW-17805
-// We need BOOST_USE_WINDOWS_H definition with MinGW when we use
-// boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in
-// cpp/cmake_modules/ThirdpartyToolchain.cmake for details.
-#include <boost/process.hpp>
 
 #include "arrow/array.h"
 #include "arrow/array/builder_primitive.h"
 #include "arrow/ipc/test_common.h"
 #include "arrow/testing/generator.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/process.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/logging.h"
 
@@ -51,101 +41,27 @@
 
 namespace arrow::flight {
 
-namespace bp = boost::process;
-namespace fs = boost::filesystem;
-
-namespace {
-
-Status ResolveCurrentExecutable(fs::path* out) {
-  // See https://stackoverflow.com/a/1024937/10194 for various
-  // platform-specific recipes.
-
-  boost::system::error_code ec;
-
-#if defined(__linux__)
-  *out = fs::canonical("/proc/self/exe", ec);
-#elif defined(__APPLE__)
-  char buf[PATH_MAX + 1];
-  uint32_t bufsize = sizeof(buf);
-  if (_NSGetExecutablePath(buf, &bufsize) < 0) {
-    return Status::Invalid("Can't resolve current exe: path too large");
-  }
-  *out = fs::canonical(buf, ec);
-#elif defined(_WIN32)
-  char buf[MAX_PATH + 1];
-  if (!GetModuleFileNameA(NULL, buf, sizeof(buf))) {
-    return Status::Invalid("Can't get executable file path");
-  }
-  *out = fs::canonical(buf, ec);
-#else
-  ARROW_UNUSED(ec);
-  return Status::NotImplemented("Not available on this system");
-#endif
-  if (ec) {
-    // XXX fold this into the Status class?
-    return Status::IOError("Can't resolve current exe: ", ec.message());
+Status TestServer::Start(const std::vector<std::string>& extra_args) {
+  server_process_ = std::make_unique<util::Process>();
+  ARROW_RETURN_NOT_OK(server_process_->SetExecutable(executable_name_));
+  std::vector<std::string> args = {};
+  if (unix_sock_.empty()) {
+    args.push_back("-port");
+    args.push_back(std::to_string(port_));
   } else {
-    return Status::OK();
-  }
-}
-
-}  // namespace
-
-void TestServer::Start(const std::vector<std::string>& extra_args) {
-  namespace fs = boost::filesystem;
-
-  std::string str_port = std::to_string(port_);
-  std::vector<fs::path> search_path = ::boost::this_process::path();
-  // If possible, prepend current executable directory to search path,
-  // since it's likely that the test server executable is located in
-  // the same directory as the running unit test.
-  fs::path current_exe;
-  Status st = ResolveCurrentExecutable(&current_exe);
-  if (st.ok()) {
-    search_path.insert(search_path.begin(), current_exe.parent_path());
-  } else if (st.IsNotImplemented()) {
-    ARROW_CHECK(st.IsNotImplemented()) << st.ToString();
-  }
-
-  try {
-    if (unix_sock_.empty()) {
-      server_process_ =
-          std::make_shared<bp::child>(bp::search_path(executable_name_, search_path),
-                                      "-port", str_port, bp::args(extra_args));
-    } else {
-      server_process_ =
-          std::make_shared<bp::child>(bp::search_path(executable_name_, search_path),
-                                      "-server_unix", unix_sock_, bp::args(extra_args));
-    }
-  } catch (...) {
-    std::stringstream ss;
-    ss << "Failed to launch test server '" << executable_name_ << "', looked in ";
-    for (const auto& path : search_path) {
-      ss << path << " : ";
-    }
-    ARROW_LOG(FATAL) << ss.str();
-    throw;
+    args.push_back("-server_unix");
+    args.push_back(unix_sock_);
   }
-  std::cout << "Server running with pid " << server_process_->id() << std::endl;
+  args.insert(args.end(), extra_args.begin(), extra_args.end());
+  server_process_->SetArgs(args);
+  ARROW_RETURN_NOT_OK(server_process_->Execute());
+  std::cout << "Server running with pid " << server_process_->pid() << std::endl;
+  return Status::OK();
 }
 
-int TestServer::Stop() {
-  if (server_process_ && server_process_->valid()) {
-#ifndef _WIN32
-    kill(server_process_->id(), SIGTERM);
-#else
-    // This would use SIGKILL on POSIX, which is more brutal than SIGTERM
-    server_process_->terminate();
-#endif
-    server_process_->wait();
-    return server_process_->exit_code();
-  } else {
-    // Presumably the server wasn't able to start
-    return -1;
-  }
-}
+void TestServer::Stop() { server_process_ = nullptr; }
 
-bool TestServer::IsRunning() { return server_process_->running(); }
+bool TestServer::IsRunning() { return server_process_->IsRunning(); }
 
 int TestServer::port() const { return port_; }
 
diff --git a/cpp/src/arrow/flight/test_util.h b/cpp/src/arrow/flight/test_util.h
index 15ba6145ecd2b..946caebcc2b5a 100644
--- a/cpp/src/arrow/flight/test_util.h
+++ b/cpp/src/arrow/flight/test_util.h
@@ -29,6 +29,7 @@
 
 #include "arrow/status.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/process.h"
 #include "arrow/testing/util.h"
 
 #include "arrow/flight/client.h"
@@ -36,14 +37,6 @@
 #include "arrow/flight/types.h"
 #include "arrow/flight/visibility.h"
 
-namespace boost {
-namespace process {
-
-class child;
-
-}  // namespace process
-}  // namespace boost
-
 namespace arrow {
 namespace flight {
 
@@ -76,10 +69,10 @@ class ARROW_FLIGHT_EXPORT TestServer {
   TestServer(const std::string& executable_name, const std::string& unix_sock)
       : executable_name_(executable_name), unix_sock_(unix_sock) {}
 
-  void Start(const std::vector<std::string>& extra_args);
-  void Start() { Start({}); }
+  Status Start(const std::vector<std::string>& extra_args);
+  Status Start() { return Start({}); }
 
-  int Stop();
+  void Stop();
 
   bool IsRunning();
 
@@ -90,7 +83,7 @@ class ARROW_FLIGHT_EXPORT TestServer {
   std::string executable_name_;
   int port_;
   std::string unix_sock_;
-  std::shared_ptr<::boost::process::child> server_process_;
+  std::unique_ptr<util::Process> server_process_;
 };
 
 // Helper to initialize a server and matching client with callbacks to
diff --git a/cpp/src/arrow/flight/transport/grpc/customize_grpc.h b/cpp/src/arrow/flight/transport/grpc/customize_grpc.h
index 5005fc6b16eb4..b668022087587 100644
--- a/cpp/src/arrow/flight/transport/grpc/customize_grpc.h
+++ b/cpp/src/arrow/flight/transport/grpc/customize_grpc.h
@@ -26,16 +26,16 @@
 
 // Silence protobuf warnings
 #ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4244)
-#pragma warning(disable : 4267)
+#  pragma warning(push)
+#  pragma warning(disable : 4244)
+#  pragma warning(disable : 4267)
 #endif
 
 #include <grpcpp/impl/codegen/config_protobuf.h>
 #include <grpcpp/impl/codegen/proto_utils.h>
 
 #ifdef _MSC_VER
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
 namespace grpc {
diff --git a/cpp/src/arrow/flight/transport/grpc/grpc_client.cc b/cpp/src/arrow/flight/transport/grpc/grpc_client.cc
index 0eb7549134a04..22e8676707342 100644
--- a/cpp/src/arrow/flight/transport/grpc/grpc_client.cc
+++ b/cpp/src/arrow/flight/transport/grpc/grpc_client.cc
@@ -31,7 +31,7 @@
 #include <grpcpp/grpcpp.h>
 #include <grpcpp/support/client_callback.h>
 #if defined(GRPC_NAMESPACE_FOR_TLS_CREDENTIALS_OPTIONS)
-#include <grpcpp/security/tls_credentials_options.h>
+#  include <grpcpp/security/tls_credentials_options.h>
 #endif
 
 #include <grpc/grpc_security_constants.h>
@@ -670,11 +670,11 @@ class UnaryUnaryAsyncCall : public ::grpc::ClientUnaryReactor, public internal::
   }
 };
 
-#define LISTENER_NOT_OK(LISTENER, EXPR)                 \
-  if (auto arrow_status = (EXPR); !arrow_status.ok()) { \
-    (LISTENER)->OnFinish(std::move(arrow_status));      \
-    return;                                             \
-  }
+#  define LISTENER_NOT_OK(LISTENER, EXPR)                 \
+    if (auto arrow_status = (EXPR); !arrow_status.ok()) { \
+      (LISTENER)->OnFinish(std::move(arrow_status));      \
+      return;                                             \
+    }
 #endif
 
 class GrpcClientImpl : public internal::ClientTransport {
@@ -697,7 +697,7 @@ class GrpcClientImpl : public internal::ClientTransport {
 #if defined(GRPC_NAMESPACE_FOR_TLS_CREDENTIALS_OPTIONS)
           namespace ge = ::GRPC_NAMESPACE_FOR_TLS_CREDENTIALS_OPTIONS;
 
-#if defined(GRPC_USE_CERTIFICATE_VERIFIER)
+#  if defined(GRPC_USE_CERTIFICATE_VERIFIER)
           // gRPC >= 1.43
           class NoOpCertificateVerifier : public ge::ExternalCertificateVerifier {
            public:
@@ -712,10 +712,10 @@ class GrpcClientImpl : public internal::ClientTransport {
           auto cert_verifier =
               ge::ExternalCertificateVerifier::Create<NoOpCertificateVerifier>();
 
-#else   // defined(GRPC_USE_CERTIFICATE_VERIFIER)
-        // gRPC < 1.43
-        // A callback to supply to TlsCredentialsOptions that accepts any server
-        // arguments.
+#  else   // defined(GRPC_USE_CERTIFICATE_VERIFIER)
+          // gRPC < 1.43
+          // A callback to supply to TlsCredentialsOptions that accepts any server
+          // arguments.
           struct NoOpTlsAuthorizationCheck
               : public ge::TlsServerAuthorizationCheckInterface {
             int Schedule(ge::TlsServerAuthorizationCheckArg* arg) override {
@@ -727,33 +727,33 @@ class GrpcClientImpl : public internal::ClientTransport {
           auto server_authorization_check = std::make_shared<NoOpTlsAuthorizationCheck>();
           noop_auth_check_ = std::make_shared<ge::TlsServerAuthorizationCheckConfig>(
               server_authorization_check);
-#endif  // defined(GRPC_USE_CERTIFICATE_VERIFIER)
+#  endif  // defined(GRPC_USE_CERTIFICATE_VERIFIER)
 
-#if defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS)
+#  if defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS)
           auto certificate_provider =
               std::make_shared<::grpc::experimental::StaticDataCertificateProvider>(
                   kDummyRootCert);
-#if defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS_ROOT_CERTS)
+#    if defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS_ROOT_CERTS)
           ::grpc::experimental::TlsChannelCredentialsOptions tls_options(
               certificate_provider);
-#else   // defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS_ROOT_CERTS)
-        // While gRPC >= 1.36 does not require a root cert (it has a default)
-        // in practice the path it hardcodes is broken. See grpc/grpc#21655.
+#    else   // defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS_ROOT_CERTS)
+            // While gRPC >= 1.36 does not require a root cert (it has a default)
+            // in practice the path it hardcodes is broken. See grpc/grpc#21655.
           ::grpc::experimental::TlsChannelCredentialsOptions tls_options;
           tls_options.set_certificate_provider(certificate_provider);
-#endif  // defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS_ROOT_CERTS)
+#    endif  // defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS_ROOT_CERTS)
           tls_options.watch_root_certs();
           tls_options.set_root_cert_name("dummy");
-#if defined(GRPC_USE_CERTIFICATE_VERIFIER)
+#    if defined(GRPC_USE_CERTIFICATE_VERIFIER)
           tls_options.set_certificate_verifier(std::move(cert_verifier));
           tls_options.set_check_call_host(false);
           tls_options.set_verify_server_certs(false);
-#else   // defined(GRPC_USE_CERTIFICATE_VERIFIER)
+#    else   // defined(GRPC_USE_CERTIFICATE_VERIFIER)
           tls_options.set_server_verification_option(
               grpc_tls_server_verification_option::GRPC_TLS_SKIP_ALL_SERVER_VERIFICATION);
           tls_options.set_server_authorization_check_config(noop_auth_check_);
-#endif  // defined(GRPC_USE_CERTIFICATE_VERIFIER)
-#elif defined(GRPC_NAMESPACE_FOR_TLS_CREDENTIALS_OPTIONS)
+#    endif  // defined(GRPC_USE_CERTIFICATE_VERIFIER)
+#  elif defined(GRPC_NAMESPACE_FOR_TLS_CREDENTIALS_OPTIONS)
           // continues defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS)
           auto materials_config = std::make_shared<ge::TlsKeyMaterialsConfig>();
           materials_config->set_pem_root_certs(kDummyRootCert);
@@ -761,7 +761,7 @@ class GrpcClientImpl : public internal::ClientTransport {
               GRPC_SSL_DONT_REQUEST_CLIENT_CERTIFICATE,
               GRPC_TLS_SKIP_ALL_SERVER_VERIFICATION, materials_config,
               std::shared_ptr<ge::TlsCredentialReloadConfig>(), noop_auth_check_);
-#endif  // defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS)
+#  endif  // defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS)
           creds = ge::TlsCredentials(tls_options);
 #else   // defined(GRPC_NAMESPACE_FOR_TLS_CREDENTIALS_OPTIONS)
           return Status::NotImplemented(
diff --git a/cpp/src/arrow/flight/transport/grpc/serialization_internal.cc b/cpp/src/arrow/flight/transport/grpc/serialization_internal.cc
index 3df13532b0b05..9b503ede05655 100644
--- a/cpp/src/arrow/flight/transport/grpc/serialization_internal.cc
+++ b/cpp/src/arrow/flight/transport/grpc/serialization_internal.cc
@@ -28,8 +28,8 @@
 #include "arrow/flight/platform.h"
 
 #if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4267)
+#  pragma warning(push)
+#  pragma warning(disable : 4267)
 #endif
 
 #include <google/protobuf/io/coded_stream.h>
@@ -41,7 +41,7 @@
 #include <grpcpp/impl/codegen/proto_utils.h>
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
 #include "arrow/buffer.h"
@@ -400,8 +400,8 @@ ::grpc::Status FlightDataDeserialize(ByteBuffer* buffer,
 
 // The pointer bitcast hack below causes legitimate warnings, silence them.
 #ifndef _WIN32
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#  pragma GCC diagnostic push
+#  pragma GCC diagnostic ignored "-Wstrict-aliasing"
 #endif
 
 // Pointer bitcast explanation: grpc::*Writer<T>::Write() and grpc::*Reader<T>::Read()
@@ -478,7 +478,7 @@ bool ReadPayload(::grpc::ClientReaderWriter<pb::FlightData, pb::PutResult>* read
 }
 
 #ifndef _WIN32
-#pragma GCC diagnostic pop
+#  pragma GCC diagnostic pop
 #endif
 
 }  // namespace grpc
diff --git a/cpp/src/arrow/flight/transport/ucx/flight_transport_ucx_test.cc b/cpp/src/arrow/flight/transport/ucx/flight_transport_ucx_test.cc
index c3481d834f6ea..1090b8356294a 100644
--- a/cpp/src/arrow/flight/transport/ucx/flight_transport_ucx_test.cc
+++ b/cpp/src/arrow/flight/transport/ucx/flight_transport_ucx_test.cc
@@ -27,13 +27,13 @@
 #include "arrow/util/config.h"
 
 #ifdef UCP_API_VERSION
-#error "UCX headers should not be in public API"
+#  error "UCX headers should not be in public API"
 #endif
 
 #include "arrow/flight/transport/ucx/ucx_internal.h"
 
 #ifdef ARROW_CUDA
-#include "arrow/gpu/cuda_api.h"
+#  include "arrow/gpu/cuda_api.h"
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/flight/types.h b/cpp/src/arrow/flight/types.h
index fc54bce9758b2..b6309d0af2a71 100644
--- a/cpp/src/arrow/flight/types.h
+++ b/cpp/src/arrow/flight/types.h
@@ -104,8 +104,8 @@ enum class FlightStatusCode : int8_t {
 // Silence warning
 // "non dll-interface class RecordBatchReader used as base for dll-interface class"
 #ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4275)
+#  pragma warning(push)
+#  pragma warning(disable : 4275)
 #endif
 
 /// \brief Flight-specific error information in a Status.
@@ -139,7 +139,7 @@ class ARROW_FLIGHT_EXPORT FlightStatusDetail : public arrow::StatusDetail {
 };
 
 #ifdef _MSC_VER
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
 /// \brief Make an appropriate Arrow status for the given
diff --git a/cpp/src/arrow/flight/visibility.h b/cpp/src/arrow/flight/visibility.h
index bdee8b751d8a3..06f864ba8cffc 100644
--- a/cpp/src/arrow/flight/visibility.h
+++ b/cpp/src/arrow/flight/visibility.h
@@ -18,31 +18,31 @@
 #pragma once
 
 #if defined(_WIN32) || defined(__CYGWIN__)
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(push)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#ifdef ARROW_FLIGHT_STATIC
-#define ARROW_FLIGHT_EXPORT
-#elif defined(ARROW_FLIGHT_EXPORTING)
-#define ARROW_FLIGHT_EXPORT __declspec(dllexport)
-#else
-#define ARROW_FLIGHT_EXPORT __declspec(dllimport)
-#endif
+#  ifdef ARROW_FLIGHT_STATIC
+#    define ARROW_FLIGHT_EXPORT
+#  elif defined(ARROW_FLIGHT_EXPORTING)
+#    define ARROW_FLIGHT_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_FLIGHT_EXPORT __declspec(dllimport)
+#  endif
 
-#define ARROW_FLIGHT_NO_EXPORT
+#  define ARROW_FLIGHT_NO_EXPORT
 #else  // Not Windows
-#ifndef ARROW_FLIGHT_EXPORT
-#define ARROW_FLIGHT_EXPORT __attribute__((visibility("default")))
-#endif
-#ifndef ARROW_FLIGHT_NO_EXPORT
-#define ARROW_FLIGHT_NO_EXPORT __attribute__((visibility("hidden")))
-#endif
+#  ifndef ARROW_FLIGHT_EXPORT
+#    define ARROW_FLIGHT_EXPORT __attribute__((visibility("default")))
+#  endif
+#  ifndef ARROW_FLIGHT_NO_EXPORT
+#    define ARROW_FLIGHT_NO_EXPORT __attribute__((visibility("hidden")))
+#  endif
 #endif  // Non-Windows
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
diff --git a/cpp/src/arrow/integration/json_integration_test.cc b/cpp/src/arrow/integration/json_integration_test.cc
index 9b56928c68843..0e84ea6124d5d 100644
--- a/cpp/src/arrow/integration/json_integration_test.cc
+++ b/cpp/src/arrow/integration/json_integration_test.cc
@@ -1046,7 +1046,7 @@ TEST(TestJsonFileReadWrite, JsonExample2) {
 
     auto storage_array =
         ArrayFromJSON(fixed_size_binary(16), R"(["0123456789abcdef", null])");
-    AssertArraysEqual(*batch->column(0), UuidArray(uuid_type, storage_array));
+    AssertArraysEqual(*batch->column(0), ExampleUuidArray(uuid_type, storage_array));
 
     AssertArraysEqual(*batch->column(1), NullArray(2));
   }
diff --git a/cpp/src/arrow/io/buffered_test.cc b/cpp/src/arrow/io/buffered_test.cc
index cbf2c2cf06938..89fe4b159f341 100644
--- a/cpp/src/arrow/io/buffered_test.cc
+++ b/cpp/src/arrow/io/buffered_test.cc
@@ -16,8 +16,8 @@
 // under the License.
 
 #ifndef _WIN32
-#include <fcntl.h>  // IWYU pragma: keep
-#include <unistd.h>
+#  include <fcntl.h>  // IWYU pragma: keep
+#  include <unistd.h>
 #endif
 
 #include <algorithm>
diff --git a/cpp/src/arrow/io/file.cc b/cpp/src/arrow/io/file.cc
index a22accf65660a..9fda5b7fdc16e 100644
--- a/cpp/src/arrow/io/file.cc
+++ b/cpp/src/arrow/io/file.cc
@@ -19,16 +19,16 @@
 
 // sys/mman.h not present in Visual Studio or Cygwin
 #ifdef _WIN32
-#ifndef NOMINMAX
-#define NOMINMAX
-#endif
-#include "arrow/io/mman.h"
-#undef Realloc
-#undef Free
+#  ifndef NOMINMAX
+#    define NOMINMAX
+#  endif
+#  include "arrow/io/mman.h"
+#  undef Realloc
+#  undef Free
 #else
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <unistd.h>  // IWYU pragma: keep
+#  include <fcntl.h>
+#  include <sys/mman.h>
+#  include <unistd.h>  // IWYU pragma: keep
 #endif
 
 #include <algorithm>
diff --git a/cpp/src/arrow/io/file_benchmark.cc b/cpp/src/arrow/io/file_benchmark.cc
index 02ccfb6337f4b..bcaa1a9df014b 100644
--- a/cpp/src/arrow/io/file_benchmark.cc
+++ b/cpp/src/arrow/io/file_benchmark.cc
@@ -33,13 +33,13 @@
 
 #ifdef _WIN32
 
-#include <io.h>
+#  include <io.h>
 
 #else
 
-#include <fcntl.h>
-#include <poll.h>
-#include <unistd.h>
+#  include <fcntl.h>
+#  include <poll.h>
+#  include <unistd.h>
 
 #endif
 
diff --git a/cpp/src/arrow/io/file_test.cc b/cpp/src/arrow/io/file_test.cc
index af414891b950e..44a63e9fdfa81 100644
--- a/cpp/src/arrow/io/file_test.cc
+++ b/cpp/src/arrow/io/file_test.cc
@@ -16,8 +16,8 @@
 // under the License.
 
 #ifndef _WIN32
-#include <fcntl.h>  // IWYU pragma: keep
-#include <unistd.h>
+#  include <fcntl.h>  // IWYU pragma: keep
+#  include <unistd.h>
 #endif
 
 #include <atomic>
diff --git a/cpp/src/arrow/io/hdfs_internal.cc b/cpp/src/arrow/io/hdfs_internal.cc
index 5619dd2435acc..0f46a6faff924 100644
--- a/cpp/src/arrow/io/hdfs_internal.cc
+++ b/cpp/src/arrow/io/hdfs_internal.cc
@@ -40,7 +40,7 @@
 #include "arrow/util/basic_decimal.h"
 
 #ifndef _WIN32
-#include <dlfcn.h>
+#  include <dlfcn.h>
 #endif
 
 #include "arrow/result.h"
@@ -162,13 +162,13 @@ Result<std::vector<PlatformFilename>> get_potential_libjvm_paths() {
 // SFrame uses /usr/libexec/java_home to find JAVA_HOME; for now we are
 // expecting users to set an environment variable
 #else
-#if defined(__aarch64__)
+#  if defined(__aarch64__)
   const std::string prefix_arch{"arm64"};
   const std::string suffix_arch{"aarch64"};
-#else
+#  else
   const std::string prefix_arch{"amd64"};
   const std::string suffix_arch{"amd64"};
-#endif
+#  endif
   ARROW_ASSIGN_OR_RAISE(
       search_prefixes,
       MakeFilenameVector({
diff --git a/cpp/src/arrow/io/memory_benchmark.cc b/cpp/src/arrow/io/memory_benchmark.cc
index e16bbaf03ec47..fda5e17e073bd 100644
--- a/cpp/src/arrow/io/memory_benchmark.cc
+++ b/cpp/src/arrow/io/memory_benchmark.cc
@@ -39,50 +39,50 @@ constexpr size_t kMemoryPerCore = 32 * 1024 * 1024;
 using BufferPtr = std::shared_ptr<Buffer>;
 
 #ifdef ARROW_WITH_BENCHMARKS_REFERENCE
-#ifndef _MSC_VER
+#  ifndef _MSC_VER
 
-#ifdef ARROW_HAVE_SSE4_2
+#    ifdef ARROW_HAVE_SSE4_2
 
-#ifdef ARROW_HAVE_AVX512
+#      ifdef ARROW_HAVE_AVX512
 
 using VectorType = __m512i;
-#define VectorSet _mm512_set1_epi32
-#define VectorLoad _mm512_stream_load_si512
-#define VectorLoadAsm(SRC, DST) \
-  asm volatile("vmovaps %[src], %[dst]" : [dst] "=v"(DST) : [src] "m"(SRC) :)
-#define VectorStreamLoad _mm512_stream_load_si512
-#define VectorStreamLoadAsm(SRC, DST) \
-  asm volatile("vmovntdqa %[src], %[dst]" : [dst] "=v"(DST) : [src] "m"(SRC) :)
-#define VectorStreamWrite _mm512_stream_si512
+#        define VectorSet _mm512_set1_epi32
+#        define VectorLoad _mm512_stream_load_si512
+#        define VectorLoadAsm(SRC, DST) \
+          asm volatile("vmovaps %[src], %[dst]" : [dst] "=v"(DST) : [src] "m"(SRC) :)
+#        define VectorStreamLoad _mm512_stream_load_si512
+#        define VectorStreamLoadAsm(SRC, DST) \
+          asm volatile("vmovntdqa %[src], %[dst]" : [dst] "=v"(DST) : [src] "m"(SRC) :)
+#        define VectorStreamWrite _mm512_stream_si512
 
-#else
+#      else
 
-#ifdef ARROW_HAVE_AVX2
+#        ifdef ARROW_HAVE_AVX2
 
 using VectorType = __m256i;
-#define VectorSet _mm256_set1_epi32
-#define VectorLoad _mm256_stream_load_si256
-#define VectorLoadAsm(SRC, DST) \
-  asm volatile("vmovaps %[src], %[dst]" : [dst] "=v"(DST) : [src] "m"(SRC) :)
-#define VectorStreamLoad _mm256_stream_load_si256
-#define VectorStreamLoadAsm(SRC, DST) \
-  asm volatile("vmovntdqa %[src], %[dst]" : [dst] "=v"(DST) : [src] "m"(SRC) :)
-#define VectorStreamWrite _mm256_stream_si256
+#          define VectorSet _mm256_set1_epi32
+#          define VectorLoad _mm256_stream_load_si256
+#          define VectorLoadAsm(SRC, DST) \
+            asm volatile("vmovaps %[src], %[dst]" : [dst] "=v"(DST) : [src] "m"(SRC) :)
+#          define VectorStreamLoad _mm256_stream_load_si256
+#          define VectorStreamLoadAsm(SRC, DST) \
+            asm volatile("vmovntdqa %[src], %[dst]" : [dst] "=v"(DST) : [src] "m"(SRC) :)
+#          define VectorStreamWrite _mm256_stream_si256
 
-#else  // ARROW_HAVE_AVX2 not set
+#        else  // ARROW_HAVE_AVX2 not set
 
 using VectorType = __m128i;
-#define VectorSet _mm_set1_epi32
-#define VectorLoad _mm_stream_load_si128
-#define VectorLoadAsm(SRC, DST) \
-  asm volatile("movaps %[src], %[dst]" : [dst] "=x"(DST) : [src] "m"(SRC) :)
-#define VectorStreamLoad _mm_stream_load_si128
-#define VectorStreamLoadAsm(SRC, DST) \
-  asm volatile("movntdqa %[src], %[dst]" : [dst] "=x"(DST) : [src] "m"(SRC) :)
-#define VectorStreamWrite _mm_stream_si128
-
-#endif  // ARROW_HAVE_AVX2
-#endif  // ARROW_HAVE_AVX512
+#          define VectorSet _mm_set1_epi32
+#          define VectorLoad _mm_stream_load_si128
+#          define VectorLoadAsm(SRC, DST) \
+            asm volatile("movaps %[src], %[dst]" : [dst] "=x"(DST) : [src] "m"(SRC) :)
+#          define VectorStreamLoad _mm_stream_load_si128
+#          define VectorStreamLoadAsm(SRC, DST) \
+            asm volatile("movntdqa %[src], %[dst]" : [dst] "=x"(DST) : [src] "m"(SRC) :)
+#          define VectorStreamWrite _mm_stream_si128
+
+#        endif  // ARROW_HAVE_AVX2
+#      endif    // ARROW_HAVE_AVX512
 
 static void Read(void* src, void* dst, size_t size) {
   const auto simd = static_cast<VectorType*>(src);
@@ -154,15 +154,15 @@ static void StreamReadWrite(void* src, void* dst, size_t size) {
   }
 }
 
-#endif  // ARROW_HAVE_SSE4_2
+#    endif  // ARROW_HAVE_SSE4_2
 
-#ifdef ARROW_HAVE_NEON
+#    ifdef ARROW_HAVE_NEON
 
 using VectorType = uint8x16_t;
 using VectorTypeDual = uint8x16x2_t;
 
-#define VectorSet vdupq_n_u8
-#define VectorLoadAsm vld1q_u8
+#      define VectorSet vdupq_n_u8
+#      define VectorLoadAsm vld1q_u8
 
 static void armv8_stream_load_pair(VectorType* src, VectorType* dst) {
   asm volatile("LDNP %[reg1], %[reg2], [%[from]]\n\t"
@@ -239,7 +239,7 @@ static void StreamReadWrite(void* src, void* dst, size_t size) {
   }
 }
 
-#endif  // ARROW_HAVE_NEON
+#    endif  // ARROW_HAVE_NEON
 
 static void PlatformMemcpy(void* src, void* dst, size_t size) { memcpy(src, dst, size); }
 
@@ -261,7 +261,7 @@ static void MemoryBandwidth(benchmark::State& state) {  // NOLINT non-const refe
   state.SetBytesProcessed(state.iterations() * buffer_size);
 }
 
-#ifdef ARROW_HAVE_SSE4_2
+#    ifdef ARROW_HAVE_SSE4_2
 static void SetCacheBandwidthArgs(benchmark::internal::Benchmark* bench) {
   auto cache_sizes = {kL1Size, kL2Size, kL3Size};
   for (auto size : cache_sizes) {
@@ -274,7 +274,7 @@ static void SetCacheBandwidthArgs(benchmark::internal::Benchmark* bench) {
 }
 
 BENCHMARK_TEMPLATE(MemoryBandwidth, Read)->Apply(SetCacheBandwidthArgs);
-#endif  // ARROW_HAVE_SSE4_2
+#    endif  // ARROW_HAVE_SSE4_2
 
 static void SetMemoryBandwidthArgs(benchmark::internal::Benchmark* bench) {
   // `UseRealTime` is required due to threads, otherwise the cumulative CPU time
@@ -287,8 +287,8 @@ BENCHMARK_TEMPLATE(MemoryBandwidth, StreamWrite)->Apply(SetMemoryBandwidthArgs);
 BENCHMARK_TEMPLATE(MemoryBandwidth, StreamReadWrite)->Apply(SetMemoryBandwidthArgs);
 BENCHMARK_TEMPLATE(MemoryBandwidth, PlatformMemcpy)->Apply(SetMemoryBandwidthArgs);
 
-#endif  // _MSC_VER
-#endif  // ARROW_WITH_BENCHMARKS_REFERENCE
+#  endif  // _MSC_VER
+#endif    // ARROW_WITH_BENCHMARKS_REFERENCE
 
 static void ParallelMemoryCopy(benchmark::State& state) {  // NOLINT non-const reference
   const int64_t n_threads = state.range(0);
diff --git a/cpp/src/arrow/io/mman.h b/cpp/src/arrow/io/mman.h
index 9b06ac8e7b5ca..04d450cbff513 100644
--- a/cpp/src/arrow/io/mman.h
+++ b/cpp/src/arrow/io/mman.h
@@ -36,7 +36,7 @@
 #define MS_INVALIDATE 4
 
 #ifndef FILE_MAP_EXECUTE
-#define FILE_MAP_EXECUTE 0x0020
+#  define FILE_MAP_EXECUTE 0x0020
 #endif
 
 static inline int __map_mman_error(const DWORD err, const int deferr) {
diff --git a/cpp/src/arrow/io/test_common.cc b/cpp/src/arrow/io/test_common.cc
index 5caa20a445e6d..a06ef2f59221c 100644
--- a/cpp/src/arrow/io/test_common.cc
+++ b/cpp/src/arrow/io/test_common.cc
@@ -23,7 +23,7 @@
 #include <vector>
 
 #ifndef _WIN32
-#include <fcntl.h>
+#  include <fcntl.h>
 #endif
 
 #include "arrow/buffer.h"
diff --git a/cpp/src/arrow/ipc/json_simple_test.cc b/cpp/src/arrow/ipc/json_simple_test.cc
index c6f14b1e1d50e..d3201d8571b2c 100644
--- a/cpp/src/arrow/ipc/json_simple_test.cc
+++ b/cpp/src/arrow/ipc/json_simple_test.cc
@@ -48,7 +48,7 @@
 
 #if defined(_MSC_VER)
 // "warning C4307: '+': integral constant overflow"
-#pragma warning(disable : 4307)
+#  pragma warning(disable : 4307)
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/ipc/read_write_benchmark.cc b/cpp/src/arrow/ipc/read_write_benchmark.cc
index defe9790678c0..19ed2d3034e8d 100644
--- a/cpp/src/arrow/ipc/read_write_benchmark.cc
+++ b/cpp/src/arrow/ipc/read_write_benchmark.cc
@@ -185,24 +185,24 @@ static void DecodeStream(benchmark::State& state) {  // NOLINT non-const referen
 }
 
 #ifdef ARROW_WITH_ZSTD
-#define GENERATE_COMPRESSED_DATA_IN_MEMORY()                                      \
-  constexpr int64_t kBatchSize = 1 << 20; /* 1 MB */                              \
-  constexpr int64_t kBatches = 16;                                                \
-  auto options = ipc::IpcWriteOptions::Defaults();                                \
-  ASSIGN_OR_ABORT(options.codec,                                                  \
-                  arrow::util::Codec::Create(arrow::Compression::type::ZSTD));    \
-  std::shared_ptr<ResizableBuffer> buffer = *AllocateResizableBuffer(1024);       \
-  {                                                                               \
-    auto record_batch = MakeRecordBatch(kBatchSize, state.range(0));              \
-    io::BufferOutputStream stream(buffer);                                        \
-    auto writer = *ipc::MakeFileWriter(&stream, record_batch->schema(), options); \
-    for (int i = 0; i < kBatches; i++) {                                          \
-      ABORT_NOT_OK(writer->WriteRecordBatch(*record_batch));                      \
-    }                                                                             \
-    ABORT_NOT_OK(writer->Close());                                                \
-    ABORT_NOT_OK(stream.Close());                                                 \
-  }                                                                               \
-  constexpr int64_t total_size = kBatchSize * kBatches;
+#  define GENERATE_COMPRESSED_DATA_IN_MEMORY()                                      \
+    constexpr int64_t kBatchSize = 1 << 20; /* 1 MB */                              \
+    constexpr int64_t kBatches = 16;                                                \
+    auto options = ipc::IpcWriteOptions::Defaults();                                \
+    ASSIGN_OR_ABORT(options.codec,                                                  \
+                    arrow::util::Codec::Create(arrow::Compression::type::ZSTD));    \
+    std::shared_ptr<ResizableBuffer> buffer = *AllocateResizableBuffer(1024);       \
+    {                                                                               \
+      auto record_batch = MakeRecordBatch(kBatchSize, state.range(0));              \
+      io::BufferOutputStream stream(buffer);                                        \
+      auto writer = *ipc::MakeFileWriter(&stream, record_batch->schema(), options); \
+      for (int i = 0; i < kBatches; i++) {                                          \
+        ABORT_NOT_OK(writer->WriteRecordBatch(*record_batch));                      \
+      }                                                                             \
+      ABORT_NOT_OK(writer->Close());                                                \
+      ABORT_NOT_OK(stream.Close());                                                 \
+    }                                                                               \
+    constexpr int64_t total_size = kBatchSize * kBatches;
 #endif
 
 #define GENERATE_DATA_IN_MEMORY()                                                 \
diff --git a/cpp/src/arrow/ipc/read_write_test.cc b/cpp/src/arrow/ipc/read_write_test.cc
index ff7838cc39d72..39fd2c40fb4ec 100644
--- a/cpp/src/arrow/ipc/read_write_test.cc
+++ b/cpp/src/arrow/ipc/read_write_test.cc
@@ -1081,9 +1081,9 @@ TEST_F(RecursionLimits, ReadLimit) {
 // Test fails with a structured exception on Windows + Debug
 #if !defined(_WIN32) || defined(NDEBUG)
 TEST_F(RecursionLimits, StressLimit) {
-#ifdef __EMSCRIPTEN__
+#  ifdef __EMSCRIPTEN__
   GTEST_SKIP() << "This crashes the Emscripten runtime.";
-#endif
+#  endif
 
   auto CheckDepth = [this](int recursion_depth, bool* it_works) {
     int32_t metadata_length = -1;
@@ -1112,10 +1112,10 @@ TEST_F(RecursionLimits, StressLimit) {
   ASSERT_TRUE(it_works);
 
 // Mitigate Valgrind's slowness
-#if !defined(ARROW_VALGRIND)
+#  if !defined(ARROW_VALGRIND)
   CheckDepth(500, &it_works);
   ASSERT_TRUE(it_works);
-#endif
+#  endif
 }
 #endif  // !defined(_WIN32) || defined(NDEBUG)
 
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index da84f2f2dc87d..98214c1debb86 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -305,7 +305,7 @@ class ArrayLoader {
       RETURN_NOT_OK(GetBuffer(buffer_index_++, &out_->buffers[1]));
     } else {
       buffer_index_++;
-      out_->buffers[1].reset(new Buffer(nullptr, 0));
+      out_->buffers[1] = std::make_shared<Buffer>(nullptr, 0);
     }
     return Status::OK();
   }
@@ -644,11 +644,11 @@ Result<std::shared_ptr<RecordBatch>> LoadRecordBatch(
     const flatbuf::RecordBatch* metadata, const std::shared_ptr<Schema>& schema,
     const std::vector<bool>& inclusion_mask, const IpcReadContext& context,
     io::RandomAccessFile* file) {
-  if (inclusion_mask.size() > 0) {
-    return LoadRecordBatchSubset(metadata, schema, &inclusion_mask, context, file);
-  } else {
+  if (inclusion_mask.empty()) {
     return LoadRecordBatchSubset(metadata, schema, /*inclusion_mask=*/nullptr, context,
                                  file);
+  } else {
+    return LoadRecordBatchSubset(metadata, schema, &inclusion_mask, context, file);
   }
 }
 
@@ -1447,7 +1447,7 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader {
     // Prebuffering's read patterns are also slightly worse than the alternative
     // when doing whole-file reads because the logic is not in place to recognize
     // we can just read the entire file up-front
-    if (options_.included_fields.size() != 0 &&
+    if (!options_.included_fields.empty() &&
         options_.included_fields.size() != schema_->fields().size() &&
         !file_->supports_zero_copy()) {
       RETURN_NOT_OK(state->PreBufferMetadata({}));
@@ -1907,7 +1907,7 @@ Result<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::Open(
 Future<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::OpenAsync(
     const std::shared_ptr<io::RandomAccessFile>& file, const IpcReadOptions& options) {
   ARROW_ASSIGN_OR_RAISE(int64_t footer_offset, file->GetSize());
-  return OpenAsync(std::move(file), footer_offset, options);
+  return OpenAsync(file, footer_offset, options);
 }
 
 Future<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::OpenAsync(
diff --git a/cpp/src/arrow/ipc/test_common.cc b/cpp/src/arrow/ipc/test_common.cc
index 87c02e2d87a1e..e354e2f89b3b3 100644
--- a/cpp/src/arrow/ipc/test_common.cc
+++ b/cpp/src/arrow/ipc/test_common.cc
@@ -27,8 +27,10 @@
 #include "arrow/array.h"
 #include "arrow/array/builder_binary.h"
 #include "arrow/array/builder_primitive.h"
-#include "arrow/array/builder_time.h"
+#include "arrow/io/memory.h"
+#include "arrow/ipc/reader.h"
 #include "arrow/ipc/test_common.h"
+#include "arrow/ipc/writer.h"
 #include "arrow/pretty_print.h"
 #include "arrow/record_batch.h"
 #include "arrow/status.h"
@@ -242,11 +244,11 @@ Status MakeRandomBooleanArray(const int length, bool include_nulls,
                               std::shared_ptr<Array>* out) {
   std::vector<uint8_t> values(length);
   random_null_bytes(length, 0.5, values.data());
-  ARROW_ASSIGN_OR_RAISE(auto data, internal::BytesToBits(values));
+  ARROW_ASSIGN_OR_RAISE(auto data, arrow::internal::BytesToBits(values));
 
   if (include_nulls) {
     std::vector<uint8_t> valid_bytes(length);
-    ARROW_ASSIGN_OR_RAISE(auto null_bitmap, internal::BytesToBits(valid_bytes));
+    ARROW_ASSIGN_OR_RAISE(auto null_bitmap, arrow::internal::BytesToBits(valid_bytes));
     random_null_bytes(length, 0.1, valid_bytes.data());
     *out = std::make_shared<BooleanArray>(length, data, null_bitmap, -1);
   } else {
@@ -596,7 +598,7 @@ Status MakeStruct(std::shared_ptr<RecordBatch>* out) {
   std::shared_ptr<Array> no_nulls(new StructArray(type, list_batch->num_rows(), columns));
   std::vector<uint8_t> null_bytes(list_batch->num_rows(), 1);
   null_bytes[0] = 0;
-  ARROW_ASSIGN_OR_RAISE(auto null_bitmap, internal::BytesToBits(null_bytes));
+  ARROW_ASSIGN_OR_RAISE(auto null_bitmap, arrow::internal::BytesToBits(null_bytes));
   std::shared_ptr<Array> with_nulls(
       new StructArray(type, list_batch->num_rows(), columns, null_bitmap, 1));
 
@@ -1088,9 +1090,9 @@ Status MakeUuid(std::shared_ptr<RecordBatch>* out) {
   auto f1 = field("f1", uuid_type, /*nullable=*/false);
   auto schema = ::arrow::schema({f0, f1});
 
-  auto a0 = std::make_shared<UuidArray>(
+  auto a0 = std::make_shared<ExampleUuidArray>(
       uuid_type, ArrayFromJSON(storage_type, R"(["0123456789abcdef", null])"));
-  auto a1 = std::make_shared<UuidArray>(
+  auto a1 = std::make_shared<ExampleUuidArray>(
       uuid_type,
       ArrayFromJSON(storage_type, R"(["ZYXWVUTSRQPONMLK", "JIHGFEDBA9876543"])"));
 
@@ -1176,12 +1178,13 @@ enable_if_t<std::is_floating_point<CValueType>::value, void> FillRandomData(
 Status MakeRandomTensor(const std::shared_ptr<DataType>& type,
                         const std::vector<int64_t>& shape, bool row_major_p,
                         std::shared_ptr<Tensor>* out, uint32_t seed) {
-  const auto& element_type = internal::checked_cast<const FixedWidthType&>(*type);
+  const auto& element_type = arrow::internal::checked_cast<const FixedWidthType&>(*type);
   std::vector<int64_t> strides;
   if (row_major_p) {
-    RETURN_NOT_OK(internal::ComputeRowMajorStrides(element_type, shape, &strides));
+    RETURN_NOT_OK(arrow::internal::ComputeRowMajorStrides(element_type, shape, &strides));
   } else {
-    RETURN_NOT_OK(internal::ComputeColumnMajorStrides(element_type, shape, &strides));
+    RETURN_NOT_OK(
+        arrow::internal::ComputeColumnMajorStrides(element_type, shape, &strides));
   }
 
   const int64_t element_size = element_type.bit_width() / CHAR_BIT;
@@ -1233,6 +1236,21 @@ Status MakeRandomTensor(const std::shared_ptr<DataType>& type,
   return Tensor::Make(type, buf, shape, strides).Value(out);
 }
 
+Status RoundtripBatch(const std::shared_ptr<RecordBatch>& batch,
+                      std::shared_ptr<RecordBatch>* out) {
+  ARROW_ASSIGN_OR_RAISE(auto out_stream, io::BufferOutputStream::Create());
+  RETURN_NOT_OK(ipc::WriteRecordBatchStream({batch}, ipc::IpcWriteOptions::Defaults(),
+                                            out_stream.get()));
+
+  ARROW_ASSIGN_OR_RAISE(auto complete_ipc_stream, out_stream->Finish());
+
+  io::BufferReader reader(complete_ipc_stream);
+  std::shared_ptr<RecordBatchReader> batch_reader;
+  ARROW_ASSIGN_OR_RAISE(batch_reader, ipc::RecordBatchStreamReader::Open(&reader));
+  RETURN_NOT_OK(batch_reader->ReadNext(out));
+  return Status::OK();
+}
+
 }  // namespace test
 }  // namespace ipc
 }  // namespace arrow
diff --git a/cpp/src/arrow/ipc/test_common.h b/cpp/src/arrow/ipc/test_common.h
index db8613cbb1e6a..189de288795c0 100644
--- a/cpp/src/arrow/ipc/test_common.h
+++ b/cpp/src/arrow/ipc/test_common.h
@@ -184,6 +184,9 @@ Status MakeRandomTensor(const std::shared_ptr<DataType>& type,
                         const std::vector<int64_t>& shape, bool row_major_p,
                         std::shared_ptr<Tensor>* out, uint32_t seed = 0);
 
+ARROW_TESTING_EXPORT Status RoundtripBatch(const std::shared_ptr<RecordBatch>& batch,
+                                           std::shared_ptr<RecordBatch>* out);
+
 }  // namespace test
 }  // namespace ipc
 }  // namespace arrow
diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc
index f603e60c66555..88aa3f3f8a47a 100644
--- a/cpp/src/arrow/ipc/writer.cc
+++ b/cpp/src/arrow/ipc/writer.cc
@@ -86,7 +86,7 @@ bool HasNestedDict(const ArrayData& data) {
 }
 
 Status GetTruncatedBitmap(int64_t offset, int64_t length,
-                          const std::shared_ptr<Buffer> input, MemoryPool* pool,
+                          const std::shared_ptr<Buffer>& input, MemoryPool* pool,
                           std::shared_ptr<Buffer>* buffer) {
   if (!input) {
     *buffer = input;
@@ -103,7 +103,7 @@ Status GetTruncatedBitmap(int64_t offset, int64_t length,
 }
 
 Status GetTruncatedBuffer(int64_t offset, int64_t length, int32_t byte_width,
-                          const std::shared_ptr<Buffer> input, MemoryPool* pool,
+                          const std::shared_ptr<Buffer>& input, MemoryPool* pool,
                           std::shared_ptr<Buffer>* buffer) {
   if (!input) {
     *buffer = input;
@@ -252,7 +252,7 @@ class RecordBatchSerializer {
   }
 
   Status Assemble(const RecordBatch& batch) {
-    if (field_nodes_.size() > 0) {
+    if (!field_nodes_.empty()) {
       field_nodes_.clear();
       buffer_meta_.clear();
       out_->body_buffers.clear();
@@ -335,8 +335,7 @@ class RecordBatchSerializer {
       ARROW_ASSIGN_OR_RAISE(auto shifted_offsets,
                             AllocateBuffer(required_bytes, options_.memory_pool));
 
-      offset_type* dest_offsets =
-          reinterpret_cast<offset_type*>(shifted_offsets->mutable_data());
+      auto dest_offsets = shifted_offsets->mutable_span_as<offset_type>();
       const offset_type start_offset = array.value_offset(0);
 
       for (int i = 0; i < array.length(); ++i) {
@@ -362,7 +361,6 @@ class RecordBatchSerializer {
                                      offset_type* out_min_offset,
                                      offset_type* out_max_end) {
     auto offsets = array.value_offsets();
-    auto sizes = array.value_sizes();
 
     const int64_t required_bytes = sizeof(offset_type) * array.length();
     if (array.offset() != 0) {
@@ -572,7 +570,7 @@ class RecordBatchSerializer {
   Status Visit(const StructArray& array) {
     --max_recursion_depth_;
     for (int i = 0; i < array.num_fields(); ++i) {
-      std::shared_ptr<Array> field = array.field(i);
+      const auto& field = array.field(i);
       RETURN_NOT_OK(VisitArray(*field));
     }
     ++max_recursion_depth_;
@@ -641,8 +639,7 @@ class RecordBatchSerializer {
       ARROW_ASSIGN_OR_RAISE(
           auto shifted_offsets_buffer,
           AllocateBuffer(length * sizeof(int32_t), options_.memory_pool));
-      int32_t* shifted_offsets =
-          reinterpret_cast<int32_t*>(shifted_offsets_buffer->mutable_data());
+      auto shifted_offsets = shifted_offsets_buffer->mutable_span_as<int32_t>();
 
       // Offsets are guaranteed to be increasing according to the spec, so
       // the first offset we find for a child is the initial offset and
@@ -899,7 +896,7 @@ Status GetContiguousTensor(const Tensor& tensor, MemoryPool* pool,
   RETURN_NOT_OK(WriteStridedTensorData(0, 0, elem_size, tensor,
                                        scratch_space->mutable_data(), &stream));
 
-  out->reset(new Tensor(tensor.type(), contiguous_data, tensor.shape()));
+  *out = std::make_unique<Tensor>(tensor.type(), contiguous_data, tensor.shape());
 
   return Status::OK();
 }
@@ -1005,7 +1002,7 @@ class SparseTensorSerializer {
   }
 
   Status Assemble(const SparseTensor& sparse_tensor) {
-    if (buffer_meta_.size() > 0) {
+    if (!buffer_meta_.empty()) {
       buffer_meta_.clear();
       out_->body_buffers.clear();
     }
@@ -1169,7 +1166,7 @@ Status RecordBatchWriter::WriteTable(const Table& table) { return WriteTable(tab
 
 namespace internal {
 
-IpcPayloadWriter::~IpcPayloadWriter() {}
+IpcPayloadWriter::~IpcPayloadWriter() = default;
 
 Status IpcPayloadWriter::Start() { return Status::OK(); }
 
diff --git a/cpp/src/arrow/json/rapidjson_defs.h b/cpp/src/arrow/json/rapidjson_defs.h
index 9ed81d000c555..2354c6157263a 100644
--- a/cpp/src/arrow/json/rapidjson_defs.h
+++ b/cpp/src/arrow/json/rapidjson_defs.h
@@ -34,10 +34,10 @@
 
 // enable SIMD whitespace skipping, if available
 #if defined(ARROW_HAVE_SSE4_2)
-#define RAPIDJSON_SSE2 1
-#define RAPIDJSON_SSE42 1
+#  define RAPIDJSON_SSE2 1
+#  define RAPIDJSON_SSE42 1
 #endif
 
 #if defined(ARROW_HAVE_NEON)
-#define RAPIDJSON_NEON 1
+#  define RAPIDJSON_NEON 1
 #endif
diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
index 1e855311a98ed..3ace2c8f23ab0 100644
--- a/cpp/src/arrow/memory_pool.cc
+++ b/cpp/src/arrow/memory_pool.cc
@@ -28,7 +28,7 @@
 #include <optional>
 
 #if defined(sun) || defined(__sun)
-#include <stdlib.h>
+#  include <stdlib.h>
 #endif
 
 #include "arrow/buffer.h"
@@ -46,11 +46,11 @@
 #include "arrow/util/ubsan.h"
 
 #ifdef __GLIBC__
-#include <malloc.h>
+#  include <malloc.h>
 #endif
 
 #ifdef ARROW_MIMALLOC
-#include <mimalloc.h>
+#  include <mimalloc.h>
 #endif
 
 namespace arrow {
@@ -858,7 +858,7 @@ class PoolBuffer final : public ResizableBuffer {
     }
     uint8_t* ptr = mutable_data();
     if (!ptr || capacity > capacity_) {
-      int64_t new_capacity = bit_util::RoundUpToMultipleOf64(capacity);
+      ARROW_ASSIGN_OR_RAISE(int64_t new_capacity, RoundCapacity(capacity));
       if (ptr) {
         RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, alignment_, &ptr));
       } else {
@@ -878,7 +878,7 @@ class PoolBuffer final : public ResizableBuffer {
     if (ptr && shrink_to_fit && new_size <= size_) {
       // Buffer is non-null and is not growing, so shrink to the requested size without
       // excess space.
-      int64_t new_capacity = bit_util::RoundUpToMultipleOf64(new_size);
+      ARROW_ASSIGN_OR_RAISE(int64_t new_capacity, RoundCapacity(new_size));
       if (capacity_ != new_capacity) {
         // Buffer hasn't got yet the requested size.
         RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, alignment_, &ptr));
@@ -916,6 +916,13 @@ class PoolBuffer final : public ResizableBuffer {
   }
 
  private:
+  static Result<int64_t> RoundCapacity(int64_t capacity) {
+    if (capacity > std::numeric_limits<int64_t>::max() - 63) {
+      return Status::OutOfMemory("capacity too large");
+    }
+    return bit_util::RoundUpToMultipleOf64(capacity);
+  }
+
   MemoryPool* pool_;
   int64_t alignment_;
 };
diff --git a/cpp/src/arrow/memory_pool_jemalloc.cc b/cpp/src/arrow/memory_pool_jemalloc.cc
index 24bc0f27f0912..239d83b81bc67 100644
--- a/cpp/src/arrow/memory_pool_jemalloc.cc
+++ b/cpp/src/arrow/memory_pool_jemalloc.cc
@@ -26,11 +26,11 @@
 // its family.
 
 #ifdef ARROW_JEMALLOC_VENDORED
-#define JEMALLOC_MANGLE
+#  define JEMALLOC_MANGLE
 // Explicitly link to our version of jemalloc
-#include "jemalloc_ep/dist/include/jemalloc/jemalloc.h"
+#  include "jemalloc_ep/dist/include/jemalloc/jemalloc.h"
 #else
-#include <jemalloc/jemalloc.h>
+#  include <jemalloc/jemalloc.h>
 #endif
 
 #ifdef ARROW_JEMALLOC_VENDORED
@@ -47,31 +47,31 @@
 // aggressively (and in the background) to the OS. This can be configured
 // further by using the arrow::jemalloc_set_decay_ms API
 
-#undef USE_JEMALLOC_BACKGROUND_THREAD
-#ifndef __APPLE__
+#  undef USE_JEMALLOC_BACKGROUND_THREAD
+#  ifndef __APPLE__
 // ARROW-6977: jemalloc's background_thread isn't always enabled on macOS
-#define USE_JEMALLOC_BACKGROUND_THREAD
-#endif
+#    define USE_JEMALLOC_BACKGROUND_THREAD
+#  endif
 
 // In debug mode, add memory poisoning on alloc / free
-#ifdef NDEBUG
-#define JEMALLOC_DEBUG_OPTIONS ""
-#else
-#define JEMALLOC_DEBUG_OPTIONS ",junk:true"
-#endif
+#  ifdef NDEBUG
+#    define JEMALLOC_DEBUG_OPTIONS ""
+#  else
+#    define JEMALLOC_DEBUG_OPTIONS ",junk:true"
+#  endif
 
 const char* je_arrow_malloc_conf =
     ("oversize_threshold:0"
-#ifdef USE_JEMALLOC_BACKGROUND_THREAD
+#  ifdef USE_JEMALLOC_BACKGROUND_THREAD
      ",dirty_decay_ms:1000"
      ",muzzy_decay_ms:1000"
      ",background_thread:true"
-#else
+#  else
      // ARROW-6994: return memory immediately to the OS if the
      // background_thread option isn't available
      ",dirty_decay_ms:0"
      ",muzzy_decay_ms:0"
-#endif
+#  endif
      JEMALLOC_DEBUG_OPTIONS);  // NOLINT: whitespace/parens
 
 #endif  // ARROW_JEMALLOC_VENDORED
diff --git a/cpp/src/arrow/public_api_test.cc b/cpp/src/arrow/public_api_test.cc
index 20de827ced13f..ccc80dc93a50a 100644
--- a/cpp/src/arrow/public_api_test.cc
+++ b/cpp/src/arrow/public_api_test.cc
@@ -28,32 +28,32 @@
 #include "arrow/ipc/api.h"      // IWYU pragma: keep
 
 #ifdef ARROW_CSV
-#include "arrow/csv/api.h"  // IWYU pragma: keep
+#  include "arrow/csv/api.h"  // IWYU pragma: keep
 #endif
 
 #ifdef ARROW_DATASET
-#include "arrow/dataset/api.h"  // IWYU pragma: keep
+#  include "arrow/dataset/api.h"  // IWYU pragma: keep
 #endif
 
 #ifdef ARROW_FILESYSTEM
-#include "arrow/filesystem/api.h"  // IWYU pragma: keep
+#  include "arrow/filesystem/api.h"  // IWYU pragma: keep
 #endif
 
 #ifdef ARROW_FLIGHT
-#include "arrow/flight/api.h"  // IWYU pragma: keep
+#  include "arrow/flight/api.h"  // IWYU pragma: keep
 #endif
 
 #ifdef ARROW_FLIGHT_SQL
-#include "arrow/flight/sql/api.h"  // IWYU pragma: keep
+#  include "arrow/flight/sql/api.h"  // IWYU pragma: keep
 #endif
 
 #ifdef ARROW_JSON
-#include "arrow/json/api.h"  // IWYU pragma: keep
+#  include "arrow/json/api.h"  // IWYU pragma: keep
 #endif
 
 #ifdef ARROW_SUBSTRAIT
-#include "arrow/engine/api.h"            // IWYU pragma: keep
-#include "arrow/engine/substrait/api.h"  // IWYU pragma: keep
+#  include "arrow/engine/api.h"            // IWYU pragma: keep
+#  include "arrow/engine/substrait/api.h"  // IWYU pragma: keep
 #endif
 
 #include <gmock/gmock-matchers.h>
diff --git a/cpp/src/arrow/result_internal.h b/cpp/src/arrow/result_internal.h
index 7550f945d85d0..134902e1b75ad 100644
--- a/cpp/src/arrow/result_internal.h
+++ b/cpp/src/arrow/result_internal.h
@@ -18,5 +18,5 @@
 #include "arrow/result.h"
 
 #ifndef ASSIGN_OR_RAISE
-#define ASSIGN_OR_RAISE(lhs, rhs) ARROW_ASSIGN_OR_RAISE(lhs, rhs)
+#  define ASSIGN_OR_RAISE(lhs, rhs) ARROW_ASSIGN_OR_RAISE(lhs, rhs)
 #endif
diff --git a/cpp/src/arrow/scalar_test.cc b/cpp/src/arrow/scalar_test.cc
index 104a5697b5727..e9ec13e98b4ee 100644
--- a/cpp/src/arrow/scalar_test.cc
+++ b/cpp/src/arrow/scalar_test.cc
@@ -43,7 +43,6 @@ namespace arrow {
 
 using compute::Cast;
 using compute::CastOptions;
-
 using internal::checked_cast;
 using internal::checked_pointer_cast;
 
@@ -2038,7 +2037,7 @@ class TestExtensionScalar : public ::testing::Test {
   void SetUp() {
     type_ = uuid();
     storage_type_ = fixed_size_binary(16);
-    uuid_type_ = checked_cast<const UuidType*>(type_.get());
+    uuid_type_ = checked_cast<const ExampleUuidType*>(type_.get());
   }
 
  protected:
@@ -2049,7 +2048,7 @@ class TestExtensionScalar : public ::testing::Test {
   }
 
   std::shared_ptr<DataType> type_, storage_type_;
-  const UuidType* uuid_type_{nullptr};
+  const ExampleUuidType* uuid_type_{nullptr};
 
   const std::string_view uuid_string1_{UUID_STRING1};
   const std::string_view uuid_string2_{UUID_STRING2};
diff --git a/cpp/src/arrow/status.h b/cpp/src/arrow/status.h
index ac384fc389a49..fb75d963f3a3c 100644
--- a/cpp/src/arrow/status.h
+++ b/cpp/src/arrow/status.h
@@ -28,23 +28,23 @@
 #ifdef ARROW_EXTRA_ERROR_CONTEXT
 
 /// \brief Return with given status if condition is met.
-#define ARROW_RETURN_IF_(condition, status, expr)   \
-  do {                                              \
-    if (ARROW_PREDICT_FALSE(condition)) {           \
-      ::arrow::Status _st = (status);               \
-      _st.AddContextLine(__FILE__, __LINE__, expr); \
-      return _st;                                   \
-    }                                               \
-  } while (0)
+#  define ARROW_RETURN_IF_(condition, status, expr)   \
+    do {                                              \
+      if (ARROW_PREDICT_FALSE(condition)) {           \
+        ::arrow::Status _st = (status);               \
+        _st.AddContextLine(__FILE__, __LINE__, expr); \
+        return _st;                                   \
+      }                                               \
+    } while (0)
 
 #else
 
-#define ARROW_RETURN_IF_(condition, status, _) \
-  do {                                         \
-    if (ARROW_PREDICT_FALSE(condition)) {      \
-      return (status);                         \
-    }                                          \
-  } while (0)
+#  define ARROW_RETURN_IF_(condition, status, _) \
+    do {                                         \
+      if (ARROW_PREDICT_FALSE(condition)) {      \
+        return (status);                         \
+      }                                          \
+    } while (0)
 
 #endif  // ARROW_EXTRA_ERROR_CONTEXT
 
@@ -78,7 +78,7 @@
 
 // This is an internal-use macro and should not be used in public headers.
 #ifndef RETURN_NOT_OK
-#define RETURN_NOT_OK(s) ARROW_RETURN_NOT_OK(s)
+#  define RETURN_NOT_OK(s) ARROW_RETURN_NOT_OK(s)
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/telemetry/logging.cc b/cpp/src/arrow/telemetry/logging.cc
index 7e9a69afedbb5..11a70ae1319c8 100644
--- a/cpp/src/arrow/telemetry/logging.cc
+++ b/cpp/src/arrow/telemetry/logging.cc
@@ -21,8 +21,8 @@
 #include "arrow/util/logging.h"
 
 #ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4522)
+#  pragma warning(push)
+#  pragma warning(disable : 4522)
 #endif
 
 #include <google/protobuf/util/json_util.h>
@@ -46,7 +46,7 @@
 
 #include <opentelemetry/exporters/otlp/protobuf_include_suffix.h>
 #ifdef _MSC_VER
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/testing/extension_type.h b/cpp/src/arrow/testing/extension_type.h
index 6515631f202ae..a4526e31c2b93 100644
--- a/cpp/src/arrow/testing/extension_type.h
+++ b/cpp/src/arrow/testing/extension_type.h
@@ -27,14 +27,14 @@
 
 namespace arrow {
 
-class ARROW_TESTING_EXPORT UuidArray : public ExtensionArray {
+class ARROW_TESTING_EXPORT ExampleUuidArray : public ExtensionArray {
  public:
   using ExtensionArray::ExtensionArray;
 };
 
-class ARROW_TESTING_EXPORT UuidType : public ExtensionType {
+class ARROW_TESTING_EXPORT ExampleUuidType : public ExtensionType {
  public:
-  UuidType() : ExtensionType(fixed_size_binary(16)) {}
+  ExampleUuidType() : ExtensionType(fixed_size_binary(16)) {}
 
   std::string extension_name() const override { return "uuid"; }
 
diff --git a/cpp/src/arrow/testing/gtest_compat.h b/cpp/src/arrow/testing/gtest_compat.h
index c934dd2793890..1fd0bfd32c5bc 100644
--- a/cpp/src/arrow/testing/gtest_compat.h
+++ b/cpp/src/arrow/testing/gtest_compat.h
@@ -21,13 +21,13 @@
 
 // GTest < 1.11
 #ifndef GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST
-#define GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(A)
+#  define GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(A)
 #endif
 // GTest < 1.10
 #ifndef TYPED_TEST_SUITE
-#define TYPED_TEST_SUITE TYPED_TEST_CASE
-#define TYPED_TEST_SUITE_P TYPED_TEST_CASE_P
-#define INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_CASE_P
-#define REGISTER_TYPED_TEST_SUITE_P REGISTER_TYPED_TEST_CASE_P
-#define INSTANTIATE_TYPED_TEST_SUITE_P INSTANTIATE_TYPED_TEST_CASE_P
+#  define TYPED_TEST_SUITE TYPED_TEST_CASE
+#  define TYPED_TEST_SUITE_P TYPED_TEST_CASE_P
+#  define INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_CASE_P
+#  define REGISTER_TYPED_TEST_SUITE_P REGISTER_TYPED_TEST_CASE_P
+#  define INSTANTIATE_TYPED_TEST_SUITE_P INSTANTIATE_TYPED_TEST_CASE_P
 #endif
diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc
index 95de16c715f19..07d15826f2c8f 100644
--- a/cpp/src/arrow/testing/gtest_util.cc
+++ b/cpp/src/arrow/testing/gtest_util.cc
@@ -20,13 +20,13 @@
 #include "arrow/testing/extension_type.h"
 
 #ifdef _WIN32
-#include <crtdbg.h>
-#include <io.h>
+#  include <crtdbg.h>
+#  include <io.h>
 #else
-#include <fcntl.h>     // IWYU pragma: keep
-#include <sys/stat.h>  // IWYU pragma: keep
-#include <sys/wait.h>  // IWYU pragma: keep
-#include <unistd.h>    // IWYU pragma: keep
+#  include <fcntl.h>     // IWYU pragma: keep
+#  include <sys/stat.h>  // IWYU pragma: keep
+#  include <sys/wait.h>  // IWYU pragma: keep
+#  include <unistd.h>    // IWYU pragma: keep
 #endif
 
 #include <algorithm>
@@ -49,9 +49,14 @@
 #include "arrow/buffer.h"
 #include "arrow/compute/api_vector.h"
 #include "arrow/datum.h"
+#include "arrow/extension/json.h"
+#include "arrow/io/memory.h"
 #include "arrow/ipc/json_simple.h"
+#include "arrow/ipc/reader.h"
+#include "arrow/ipc/writer.h"
 #include "arrow/json/rapidjson_defs.h"  // IWYU pragma: keep
 #include "arrow/pretty_print.h"
+#include "arrow/record_batch.h"
 #include "arrow/status.h"
 #include "arrow/table.h"
 #include "arrow/tensor.h"
@@ -847,17 +852,17 @@ Future<> SleepABitAsync() {
 ///////////////////////////////////////////////////////////////////////////
 // Extension types
 
-bool UuidType::ExtensionEquals(const ExtensionType& other) const {
+bool ExampleUuidType::ExtensionEquals(const ExtensionType& other) const {
   return (other.extension_name() == this->extension_name());
 }
 
-std::shared_ptr<Array> UuidType::MakeArray(std::shared_ptr<ArrayData> data) const {
+std::shared_ptr<Array> ExampleUuidType::MakeArray(std::shared_ptr<ArrayData> data) const {
   DCHECK_EQ(data->type->id(), Type::EXTENSION);
   DCHECK_EQ("uuid", static_cast<const ExtensionType&>(*data->type).extension_name());
-  return std::make_shared<UuidArray>(data);
+  return std::make_shared<ExampleUuidArray>(data);
 }
 
-Result<std::shared_ptr<DataType>> UuidType::Deserialize(
+Result<std::shared_ptr<DataType>> ExampleUuidType::Deserialize(
     std::shared_ptr<DataType> storage_type, const std::string& serialized) const {
   if (serialized != "uuid-serialized") {
     return Status::Invalid("Type identifier did not match: '", serialized, "'");
@@ -866,7 +871,7 @@ Result<std::shared_ptr<DataType>> UuidType::Deserialize(
     return Status::Invalid("Invalid storage type for UuidType: ",
                            storage_type->ToString());
   }
-  return std::make_shared<UuidType>();
+  return std::make_shared<ExampleUuidType>();
 }
 
 bool SmallintType::ExtensionEquals(const ExtensionType& other) const {
@@ -982,7 +987,7 @@ Result<std::shared_ptr<DataType>> Complex128Type::Deserialize(
   return std::make_shared<Complex128Type>();
 }
 
-std::shared_ptr<DataType> uuid() { return std::make_shared<UuidType>(); }
+std::shared_ptr<DataType> uuid() { return std::make_shared<ExampleUuidType>(); }
 
 std::shared_ptr<DataType> smallint() { return std::make_shared<SmallintType>(); }
 
diff --git a/cpp/src/arrow/testing/gtest_util.h b/cpp/src/arrow/testing/gtest_util.h
index 85b4c1f1f0138..90311464c283b 100644
--- a/cpp/src/arrow/testing/gtest_util.h
+++ b/cpp/src/arrow/testing/gtest_util.h
@@ -457,9 +457,9 @@ class ARROW_TESTING_EXPORT SignalHandlerGuard {
 };
 
 #ifndef ARROW_LARGE_MEMORY_TESTS
-#define LARGE_MEMORY_TEST(name) DISABLED_##name
+#  define LARGE_MEMORY_TEST(name) DISABLED_##name
 #else
-#define LARGE_MEMORY_TEST(name) name
+#  define LARGE_MEMORY_TEST(name) name
 #endif
 
 inline void PrintTo(const Status& st, std::ostream* os) { *os << st.ToString(); }
diff --git a/cpp/src/arrow/testing/process.cc b/cpp/src/arrow/testing/process.cc
new file mode 100644
index 0000000000000..133768ff015e6
--- /dev/null
+++ b/cpp/src/arrow/testing/process.cc
@@ -0,0 +1,339 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/testing/process.h"
+#include "arrow/result.h"
+
+#define BOOST_PROCESS_AVAILABLE
+#ifdef __EMSCRIPTEN__
+#  undef BOOST_PROCESS_AVAILABLE
+#endif
+
+#ifdef BOOST_PROCESS_AVAILABLE
+// This boost/asio/io_context.hpp include is needless for no MinGW
+// build.
+//
+// This is for including boost/asio/detail/socket_types.hpp before any
+// "#include <windows.h>". boost/asio/detail/socket_types.hpp doesn't
+// work if windows.h is already included.
+#  include <boost/asio/io_context.hpp>
+
+#  ifdef BOOST_PROCESS_HAVE_V2
+// We can't use v2 API on Windows because v2 API doesn't support
+// process group [1] and GCS testbench uses multiple processes [2].
+//
+// [1] https://github.com/boostorg/process/issues/259
+// [2] https://github.com/googleapis/storage-testbench/issues/669
+#    ifndef _WIN32
+#      define BOOST_PROCESS_USE_V2
+#    endif
+#  endif
+
+#  ifdef BOOST_PROCESS_USE_V2
+#    ifdef BOOST_PROCESS_NEED_SOURCE
+// Workaround for https://github.com/boostorg/process/issues/312
+#      define BOOST_PROCESS_V2_SEPARATE_COMPILATION
+#      ifdef __APPLE__
+#        include <sys/sysctl.h>
+#      endif
+#      include <boost/process/v2.hpp>
+#      include <boost/process/v2/src.hpp>
+#    else
+#      include <boost/process/v2.hpp>
+#    endif
+#    include <unordered_map>
+#  else
+// We need BOOST_USE_WINDOWS_H definition with MinGW when we use
+// boost/process.hpp. boost/process/detail/windows/handle_workaround.hpp
+// doesn't work without BOOST_USE_WINDOWS_H with MinGW because MinGW
+// doesn't provide __kernel_entry without winternl.h.
+//
+// See also:
+// https://github.com/boostorg/process/blob/develop/include/boost/process/detail/windows/handle_workaround.hpp
+#    ifdef __MINGW32__
+#      define BOOST_USE_WINDOWS_H = 1
+#    endif
+#    ifdef BOOST_PROCESS_HAVE_V1
+#      include <boost/process/v1.hpp>
+#    else
+#      include <boost/process.hpp>
+#    endif
+#  endif
+
+#  ifdef __APPLE__
+#    include <limits.h>
+#    include <mach-o/dyld.h>
+#  endif
+
+#  include <chrono>
+#  include <iostream>
+#  include <sstream>
+#  include <thread>
+
+#  ifdef BOOST_PROCESS_USE_V2
+namespace asio = BOOST_PROCESS_V2_ASIO_NAMESPACE;
+namespace process = BOOST_PROCESS_V2_NAMESPACE;
+namespace filesystem = process::filesystem;
+#  elif defined(BOOST_PROCESS_HAVE_V1)
+namespace process = boost::process::v1;
+namespace filesystem = boost::process::v1::filesystem;
+#  else
+namespace process = boost::process;
+namespace filesystem = boost::filesystem;
+#  endif
+#endif
+
+namespace arrow::util {
+
+class Process::Impl {
+ public:
+  Impl() {
+    // Get a copy of the current environment.
+#ifdef BOOST_PROCESS_AVAILABLE
+#  ifdef BOOST_PROCESS_USE_V2
+    for (const auto& kv : process::environment::current()) {
+      env_[kv.key()] = process::environment::value(kv.value());
+    }
+#  else
+    env_ = process::environment(boost::this_process::environment());
+#  endif
+#endif
+  }
+
+  ~Impl() {
+#ifdef BOOST_PROCESS_AVAILABLE
+#  ifdef BOOST_PROCESS_USE_V2
+    // V2 doesn't provide process group support yet:
+    // https://github.com/boostorg/process/issues/259
+    //
+    // So we try graceful shutdown (SIGTERM + waitpid()) before
+    // immediate shutdown (SIGKILL). This assumes that the target
+    // executable such as "python3 -m testbench" terminates all related
+    // processes by graceful shutdown.
+    boost::system::error_code error_code;
+    if (process_ && process_->running(error_code)) {
+      process_->request_exit(error_code);
+      if (!error_code) {
+        auto timeout = std::chrono::seconds(3);
+        std::chrono::time_point<std::chrono::steady_clock> end =
+            std::chrono::steady_clock::now() + timeout;
+        while (process_->running(error_code) && std::chrono::steady_clock::now() < end) {
+          std::this_thread::sleep_for(std::chrono::milliseconds(20));
+        }
+      }
+    }
+#  else
+    process_group_ = nullptr;
+#  endif
+    process_ = nullptr;
+#endif
+  }
+
+  Status SetExecutable(const std::string& name) {
+#ifdef BOOST_PROCESS_AVAILABLE
+#  ifdef BOOST_PROCESS_USE_V2
+    executable_ = process::environment::find_executable(name);
+#  else
+    executable_ = process::search_path(name);
+#  endif
+    if (executable_.empty()) {
+      // Search the current executable directory as fallback.
+      ARROW_ASSIGN_OR_RAISE(auto current_exe, ResolveCurrentExecutable());
+#  ifdef BOOST_PROCESS_USE_V2
+      std::unordered_map<process::environment::key, process::environment::value> env;
+      for (const auto& kv : process::environment::current()) {
+        env[kv.key()] = process::environment::value(kv.value());
+      }
+      env["PATH"] = process::environment::value(current_exe.parent_path());
+      executable_ = process::environment::find_executable(name, env);
+#  else
+      executable_ = process::search_path(name, {current_exe.parent_path()});
+#  endif
+    }
+    if (executable_.empty()) {
+      return Status::IOError("Failed to find '", name, "' in PATH");
+    }
+    return Status::OK();
+#else
+    return Status::NotImplemented("Boost.Process isn't available on this system");
+#endif
+  }
+
+  void SetArgs(const std::vector<std::string>& args) {
+#ifdef BOOST_PROCESS_AVAILABLE
+    args_ = args;
+#endif
+  }
+
+  void SetEnv(const std::string& name, const std::string& value) {
+#ifdef BOOST_PROCESS_AVAILABLE
+#  ifdef BOOST_PROCESS_USE_V2
+    env_[name] = process::environment::value(value);
+#  else
+    env_[name] = value;
+#  endif
+#endif
+  }
+
+  void IgnoreStderr() {
+#ifdef BOOST_PROCESS_AVAILABLE
+    keep_stderr_ = false;
+#endif
+  }
+
+  Status Execute() {
+#ifdef BOOST_PROCESS_AVAILABLE
+    try {
+#  ifdef BOOST_PROCESS_USE_V2
+      return ExecuteV2();
+#  else
+      return ExecuteV1();
+#  endif
+    } catch (const std::exception& e) {
+      return Status::IOError("Failed to launch '", executable_, "': ", e.what());
+    }
+#else
+    return Status::NotImplemented("Boost.Process isn't available on this system");
+#endif
+  }
+
+  bool IsRunning() {
+#ifdef BOOST_PROCESS_AVAILABLE
+#  ifdef BOOST_PROCESS_USE_V2
+    boost::system::error_code error_code;
+    return process_ && process_->running(error_code);
+#  else
+    return process_ && process_->running();
+#  endif
+#else
+    return false;
+#endif
+  }
+
+  uint64_t pid() {
+#ifdef BOOST_PROCESS_AVAILABLE
+    if (!process_) {
+      return 0;
+    }
+    return process_->id();
+#else
+    return 0;
+#endif
+  }
+
+ private:
+#ifdef BOOST_PROCESS_AVAILABLE
+  filesystem::path executable_;
+  std::vector<std::string> args_;
+  bool keep_stderr_ = true;
+#  ifdef BOOST_PROCESS_USE_V2
+  std::unordered_map<process::environment::key, process::environment::value> env_;
+  std::unique_ptr<process::process> process_;
+  asio::io_context ctx_;
+  // boost/process/v2/ doesn't support process group yet:
+  // https://github.com/boostorg/process/issues/259
+#  else
+  process::environment env_;
+  std::unique_ptr<process::child> process_;
+  std::unique_ptr<process::group> process_group_;
+#  endif
+#endif
+
+#ifdef BOOST_PROCESS_AVAILABLE
+  Result<filesystem::path> ResolveCurrentExecutable() {
+    // See https://stackoverflow.com/a/1024937/10194 for various
+    // platform-specific recipes.
+
+    filesystem::path path;
+    boost::system::error_code error_code;
+
+#  if defined(__linux__)
+    path = filesystem::canonical("/proc/self/exe", error_code);
+#  elif defined(__APPLE__)
+    char buf[PATH_MAX + 1];
+    uint32_t bufsize = sizeof(buf);
+    if (_NSGetExecutablePath(buf, &bufsize) < 0) {
+      return Status::Invalid("Can't resolve current exe: path too large");
+    }
+    path = filesystem::canonical(buf, error_code);
+#  elif defined(_WIN32)
+    char buf[MAX_PATH + 1];
+    if (!GetModuleFileNameA(NULL, buf, sizeof(buf))) {
+      return Status::Invalid("Can't get executable file path");
+    }
+    path = filesystem::canonical(buf, error_code);
+#  else
+    ARROW_UNUSED(error_code);
+    return Status::NotImplemented("Not available on this system");
+#  endif
+    if (error_code) {
+      // XXX fold this into the Status class?
+      return Status::IOError("Can't resolve current exe: ", error_code.message());
+    } else {
+      return path;
+    }
+  }
+
+#  ifdef BOOST_PROCESS_USE_V2
+  Status ExecuteV2() {
+    process::process_environment env(env_);
+    // We can't use std::make_unique<process::process>.
+    process_ = std::unique_ptr<process::process>(
+        new process::process(ctx_, executable_, args_, env,
+                             keep_stderr_ ? process::process_stdio{{}, {}, {}}
+                                          : process::process_stdio{{}, {}, nullptr}));
+    return Status::OK();
+  }
+#  else
+  Status ExecuteV1() {
+    process_group_ = std::make_unique<process::group>();
+    if (keep_stderr_) {
+      process_ = std::make_unique<process::child>(executable_, process::args(args_), env_,
+                                                  *process_group_);
+    } else {
+      process_ = std::make_unique<process::child>(executable_, process::args(args_), env_,
+                                                  *process_group_,
+                                                  process::std_err > process::null);
+    }
+    return Status::OK();
+  }
+#  endif
+#endif
+};
+
+Process::Process() : impl_(new Impl()) {}
+
+Process::~Process() {}
+
+Status Process::SetExecutable(const std::string& path) {
+  return impl_->SetExecutable(path);
+}
+
+void Process::SetArgs(const std::vector<std::string>& args) { impl_->SetArgs(args); }
+
+void Process::SetEnv(const std::string& key, const std::string& value) {
+  impl_->SetEnv(key, value);
+}
+
+void Process::IgnoreStderr() { impl_->IgnoreStderr(); }
+
+Status Process::Execute() { return impl_->Execute(); }
+
+bool Process::IsRunning() { return impl_->IsRunning(); }
+
+uint64_t Process::pid() { return impl_->pid(); }
+}  // namespace arrow::util
diff --git a/cpp/src/arrow/testing/process.h b/cpp/src/arrow/testing/process.h
new file mode 100644
index 0000000000000..d4d2ae124f427
--- /dev/null
+++ b/cpp/src/arrow/testing/process.h
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/status.h"
+#include "arrow/testing/visibility.h"
+
+namespace arrow::util {
+
+class ARROW_TESTING_EXPORT Process {
+ public:
+  Process();
+  ~Process();
+
+  Status SetExecutable(const std::string& path);
+  void SetArgs(const std::vector<std::string>& args);
+  void SetEnv(const std::string& name, const std::string& value);
+  void IgnoreStderr();
+  Status Execute();
+  bool IsRunning();
+  uint64_t pid();
+
+ private:
+  class Impl;
+  std::unique_ptr<Impl> impl_;
+};
+}  // namespace arrow::util
diff --git a/cpp/src/arrow/testing/util.cc b/cpp/src/arrow/testing/util.cc
index 36351fa8595be..7bef9f7d4756d 100644
--- a/cpp/src/arrow/testing/util.cc
+++ b/cpp/src/arrow/testing/util.cc
@@ -30,13 +30,13 @@
 #include <winsock2.h>
 // clang-format on
 #else
-#include <arpa/inet.h>   // IWYU pragma: keep
-#include <netinet/in.h>  // IWYU pragma: keep
-#include <sys/socket.h>  // IWYU pragma: keep
-#include <sys/stat.h>    // IWYU pragma: keep
-#include <sys/types.h>   // IWYU pragma: keep
-#include <sys/wait.h>    // IWYU pragma: keep
-#include <unistd.h>      // IWYU pragma: keep
+#  include <arpa/inet.h>   // IWYU pragma: keep
+#  include <netinet/in.h>  // IWYU pragma: keep
+#  include <sys/socket.h>  // IWYU pragma: keep
+#  include <sys/stat.h>    // IWYU pragma: keep
+#  include <sys/types.h>   // IWYU pragma: keep
+#  include <sys/wait.h>    // IWYU pragma: keep
+#  include <unistd.h>      // IWYU pragma: keep
 #endif
 
 #include "arrow/config.h"
@@ -144,8 +144,8 @@ int GetListenPort() {
     return internal::WinErrorMessage(WSAGetLastError());
   };
 #else
-#define INVALID_SOCKET -1
-#define SOCKET_ERROR -1
+#  define INVALID_SOCKET -1
+#  define SOCKET_ERROR -1
   int sock_fd;
   auto sin_len = static_cast<socklen_t>(sizeof(sin));
   auto errno_message = []() -> std::string { return internal::ErrnoMessage(errno); };
diff --git a/cpp/src/arrow/testing/visibility.h b/cpp/src/arrow/testing/visibility.h
index 1b2aa7cd86fc6..b7fbcd42757bd 100644
--- a/cpp/src/arrow/testing/visibility.h
+++ b/cpp/src/arrow/testing/visibility.h
@@ -18,31 +18,31 @@
 #pragma once
 
 #if defined(_WIN32) || defined(__CYGWIN__)
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(push)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#ifdef ARROW_TESTING_STATIC
-#define ARROW_TESTING_EXPORT
-#elif defined(ARROW_TESTING_EXPORTING)
-#define ARROW_TESTING_EXPORT __declspec(dllexport)
-#else
-#define ARROW_TESTING_EXPORT __declspec(dllimport)
-#endif
+#  ifdef ARROW_TESTING_STATIC
+#    define ARROW_TESTING_EXPORT
+#  elif defined(ARROW_TESTING_EXPORTING)
+#    define ARROW_TESTING_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_TESTING_EXPORT __declspec(dllimport)
+#  endif
 
-#define ARROW_TESTING_NO_EXPORT
+#  define ARROW_TESTING_NO_EXPORT
 #else  // Not Windows
-#ifndef ARROW_TESTING_EXPORT
-#define ARROW_TESTING_EXPORT __attribute__((visibility("default")))
-#endif
-#ifndef ARROW_TESTING_NO_EXPORT
-#define ARROW_TESTING_NO_EXPORT __attribute__((visibility("hidden")))
-#endif
+#  ifndef ARROW_TESTING_EXPORT
+#    define ARROW_TESTING_EXPORT __attribute__((visibility("default")))
+#  endif
+#  ifndef ARROW_TESTING_NO_EXPORT
+#    define ARROW_TESTING_NO_EXPORT __attribute__((visibility("hidden")))
+#  endif
 #endif  // Non-Windows
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 91a0d87cb8ae7..ae9b213480f7b 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -3333,6 +3333,7 @@ std::vector<std::shared_ptr<DataType>> g_int_types;
 std::vector<std::shared_ptr<DataType>> g_floating_types;
 std::vector<std::shared_ptr<DataType>> g_numeric_types;
 std::vector<std::shared_ptr<DataType>> g_base_binary_types;
+std::vector<std::shared_ptr<DataType>> g_binary_view_types;
 std::vector<std::shared_ptr<DataType>> g_temporal_types;
 std::vector<std::shared_ptr<DataType>> g_interval_types;
 std::vector<std::shared_ptr<DataType>> g_duration_types;
@@ -3384,6 +3385,9 @@ void InitStaticData() {
   // Base binary types (without FixedSizeBinary)
   g_base_binary_types = {binary(), utf8(), large_binary(), large_utf8()};
 
+  // Binary view types
+  g_binary_view_types = {utf8_view(), binary_view()};
+
   // Non-parametric, non-nested types. This also DOES NOT include
   //
   // * Decimal
@@ -3391,9 +3395,10 @@ void InitStaticData() {
   // * Time32
   // * Time64
   // * Timestamp
-  g_primitive_types = {null(), boolean(), date32(), date64(), binary_view(), utf8_view()};
+  g_primitive_types = {null(), boolean(), date32(), date64()};
   Extend(g_numeric_types, &g_primitive_types);
   Extend(g_base_binary_types, &g_primitive_types);
+  Extend(g_binary_view_types, &g_primitive_types);
 }
 
 }  // namespace
@@ -3413,6 +3418,11 @@ const std::vector<std::shared_ptr<DataType>>& StringTypes() {
   return types;
 }
 
+const std::vector<std::shared_ptr<DataType>>& BinaryViewTypes() {
+  std::call_once(static_data_initialized, InitStaticData);
+  return g_binary_view_types;
+}
+
 const std::vector<std::shared_ptr<DataType>>& SignedIntTypes() {
   std::call_once(static_data_initialized, InitStaticData);
   return g_signed_int_types;
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index e087c8ca1c387..e0f87e6a9d263 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -2540,6 +2540,9 @@ const std::vector<std::shared_ptr<DataType>>& BinaryTypes();
 /// \brief String and large-string types
 ARROW_EXPORT
 const std::vector<std::shared_ptr<DataType>>& StringTypes();
+/// \brief String-view and Binary-view
+ARROW_EXPORT
+const std::vector<std::shared_ptr<DataType>>& BinaryViewTypes();
 /// \brief Temporal types including date, time and timestamps for each unit
 ARROW_EXPORT
 const std::vector<std::shared_ptr<DataType>>& TemporalTypes();
diff --git a/cpp/src/arrow/type_benchmark.cc b/cpp/src/arrow/type_benchmark.cc
index 17dccfcb33138..0d1425a405709 100644
--- a/cpp/src/arrow/type_benchmark.cc
+++ b/cpp/src/arrow/type_benchmark.cc
@@ -174,12 +174,12 @@ static void SchemaEqualsWithMetadata(
 // Micro-benchmark various error reporting schemes
 
 #if (defined(__GNUC__) || defined(__APPLE__))
-#define ARROW_NO_INLINE __attribute__((noinline))
+#  define ARROW_NO_INLINE __attribute__((noinline))
 #elif defined(_MSC_VER)
-#define ARROW_NO_INLINE __declspec(noinline)
+#  define ARROW_NO_INLINE __declspec(noinline)
 #else
-#define ARROW_NO_INLINE
-#warning Missing "noinline" attribute, no-inline benchmarks may be bogus
+#  define ARROW_NO_INLINE
+#  warning Missing "noinline" attribute, no-inline benchmarks may be bogus
 #endif
 
 inline int64_t Accumulate(int64_t partial, int32_t value) {
diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h
index 08777d247edbf..8faebe217f141 100644
--- a/cpp/src/arrow/type_fwd.h
+++ b/cpp/src/arrow/type_fwd.h
@@ -724,4 +724,25 @@ ARROW_EXPORT MemoryPool* default_memory_pool();
 
 constexpr int64_t kDefaultBufferAlignment = 64;
 
+/// \brief EXPERIMENTAL: Device type enum which matches up with C Data Device types
+enum class DeviceAllocationType : char {
+  kCPU = 1,
+  kCUDA = 2,
+  kCUDA_HOST = 3,
+  kOPENCL = 4,
+  kVULKAN = 7,
+  kMETAL = 8,
+  kVPI = 9,
+  kROCM = 10,
+  kROCM_HOST = 11,
+  kEXT_DEV = 12,
+  kCUDA_MANAGED = 13,
+  kONEAPI = 14,
+  kWEBGPU = 15,
+  kHEXAGON = 16,
+};
+constexpr int kDeviceAllocationTypeMax = 16;
+
+class DeviceAllocationTypeSet;
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/type_test.cc b/cpp/src/arrow/type_test.cc
index df484a8fc2c59..f641bb9fab738 100644
--- a/cpp/src/arrow/type_test.cc
+++ b/cpp/src/arrow/type_test.cc
@@ -1307,6 +1307,7 @@ TEST_F(TestUnifySchemas, Binary) {
   options.promote_binary = false;
   CheckUnifyFailsTypeError({utf8(), binary()}, {large_utf8(), large_binary()});
   CheckUnifyFailsTypeError(fixed_size_binary(2), BaseBinaryTypes());
+  CheckUnifyFailsTypeError(fixed_size_binary(2), BinaryViewTypes());
   CheckUnifyFailsTypeError(utf8(), {binary(), large_binary(), fixed_size_binary(2)});
 }
 
@@ -2430,6 +2431,7 @@ TEST(TypesTest, TestMembership) {
   TEST_PREDICATE(all_types, is_large_binary_like);
   TEST_PREDICATE(all_types, is_binary);
   TEST_PREDICATE(all_types, is_string);
+  TEST_PREDICATE(all_types, is_binary_view_like);
   TEST_PREDICATE(all_types, is_temporal);
   TEST_PREDICATE(all_types, is_interval);
   TEST_PREDICATE(all_types, is_dictionary);
diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h
index 8caf4400fe86d..96b6ccd26a79e 100644
--- a/cpp/src/arrow/type_traits.h
+++ b/cpp/src/arrow/type_traits.h
@@ -1201,6 +1201,21 @@ constexpr bool is_string(Type::type type_id) {
   return false;
 }
 
+/// \brief Check for a binary-view-like type (i.e. string view and binary view)
+///
+/// \param[in] type_id the type-id to check
+/// \return whether type-id is a binary-view-like type one
+constexpr bool is_binary_view_like(Type::type type_id) {
+  switch (type_id) {
+    case Type::STRING_VIEW:
+    case Type::BINARY_VIEW:
+      return true;
+    default:
+      break;
+  }
+  return false;
+}
+
 /// \brief Check for a temporal type
 ///
 /// \param[in] type_id the type-id to check
@@ -1624,6 +1639,16 @@ static inline bool is_binary(const DataType& type) { return is_binary(type.id())
 /// Convenience for checking using the type's id
 static inline bool is_string(const DataType& type) { return is_string(type.id()); }
 
+/// \brief Check for a binary-view-like type
+///
+/// \param[in] type the type to check
+/// \return whether type is a binary-view-like type
+///
+/// Convenience for checking using the type's id
+static inline bool is_binary_view_like(const DataType& type) {
+  return is_binary_view_like(type.id());
+}
+
 /// \brief Check for a temporal type, including time and timestamps for each unit
 ///
 /// \param[in] type the type to check
diff --git a/cpp/src/arrow/util/atfork_internal.cc b/cpp/src/arrow/util/atfork_internal.cc
index eb26304fba36e..e89b37d83456e 100644
--- a/cpp/src/arrow/util/atfork_internal.cc
+++ b/cpp/src/arrow/util/atfork_internal.cc
@@ -23,7 +23,7 @@
 #include <vector>
 
 #ifndef _WIN32
-#include <pthread.h>
+#  include <pthread.h>
 #endif
 
 #include "arrow/util/io_util.h"
diff --git a/cpp/src/arrow/util/atfork_test.cc b/cpp/src/arrow/util/atfork_test.cc
index 750f4d138793b..97910f9539c0d 100644
--- a/cpp/src/arrow/util/atfork_test.cc
+++ b/cpp/src/arrow/util/atfork_test.cc
@@ -25,9 +25,9 @@
 #include <vector>
 
 #ifndef _WIN32
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <unistd.h>
+#  include <sys/types.h>
+#  include <sys/wait.h>
+#  include <unistd.h>
 #endif
 
 #include <gmock/gmock-matchers.h>
@@ -110,9 +110,9 @@ class TestAtFork : public ::testing::Test {
 #ifndef _WIN32
 
 TEST_F(TestAtFork, EmptyHandlers) {
-#ifndef ARROW_ENABLE_THREADING
+#  ifndef ARROW_ENABLE_THREADING
   GTEST_SKIP() << "Test requires threading support";
-#endif
+#  endif
 
   auto handlers = std::make_shared<AtForkHandler>();
 
@@ -135,9 +135,9 @@ TEST_F(TestAtFork, EmptyHandlers) {
 }
 
 TEST_F(TestAtFork, SingleThread) {
-#ifndef ARROW_ENABLE_THREADING
+#  ifndef ARROW_ENABLE_THREADING
   GTEST_SKIP() << "Test requires threading support";
-#endif
+#  endif
 
   auto handlers1 = std::make_shared<AtForkHandler>(PushBefore(1), PushParentAfter(11),
                                                    PushChildAfter(21));
@@ -190,16 +190,17 @@ TEST_F(TestAtFork, SingleThread) {
   ASSERT_THAT(child_after_, ElementsAre());
 }
 
-#if !(defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER))
+#  if !(defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) || \
+        defined(THREAD_SANITIZER))
 
 // The two following tests would seem to leak for various reasons.
 // Also, Thread Sanitizer would fail with the same error message as in
 // https://github.com/google/sanitizers/issues/950.
 
 TEST_F(TestAtFork, MultipleThreads) {
-#ifndef ARROW_ENABLE_THREADING
+#    ifndef ARROW_ENABLE_THREADING
   GTEST_SKIP() << "Test requires threading support";
-#endif
+#    endif
 
   const int kNumThreads = 5;
   const int kNumIterations = 40;
@@ -255,12 +256,12 @@ TEST_F(TestAtFork, MultipleThreads) {
 }
 
 TEST_F(TestAtFork, NestedChild) {
-#ifdef __APPLE__
+#    ifdef __APPLE__
   GTEST_SKIP() << "Nested fork is not supported on macOS";
-#endif
-#ifndef ARROW_ENABLE_THREADING
+#    endif
+#    ifndef ARROW_ENABLE_THREADING
   GTEST_SKIP() << "Test requires threading support";
-#endif
+#    endif
 
   auto handlers1 = std::make_shared<AtForkHandler>(PushBefore(1), PushParentAfter(11),
                                                    PushChildAfter(21));
@@ -295,16 +296,16 @@ TEST_F(TestAtFork, NestedChild) {
   ASSERT_THAT(child_after_, ElementsAre());
 }
 
-#endif  // !(defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) ||
-        //   defined(THREAD_SANITIZER))
+#  endif  // !(defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) ||
+          //   defined(THREAD_SANITIZER))
 
 #endif  // !defined(_WIN32)
 
 #ifdef _WIN32
 TEST_F(TestAtFork, NoOp) {
-#ifndef ARROW_ENABLE_THREADING
+#  ifndef ARROW_ENABLE_THREADING
   GTEST_SKIP() << "Test requires threading support";
-#endif
+#  endif
 
   auto handlers = std::make_shared<AtForkHandler>(PushBefore(1), PushParentAfter(11),
                                                   PushChildAfter(21));
diff --git a/cpp/src/arrow/util/binary_view_util.h b/cpp/src/arrow/util/binary_view_util.h
index 2206918724969..eb079e2c548ab 100644
--- a/cpp/src/arrow/util/binary_view_util.h
+++ b/cpp/src/arrow/util/binary_view_util.h
@@ -99,4 +99,17 @@ bool EqualBinaryView(BinaryViewType::c_type l, BinaryViewType::c_type r,
                 l.size() - BinaryViewType::kPrefixSize) == 0;
 }
 
+/// \brief Compute the total size of a list of binary views including null
+/// views.
+///
+/// This is useful when calculating the necessary memory to store all the string
+/// data from the views.
+inline int64_t SumOfBinaryViewSizes(const BinaryViewType::c_type* views, int64_t length) {
+  int64_t total = 0;
+  for (int64_t i = 0; i < length; ++i) {
+    total += views[i].size();
+  }
+  return total;
+}
+
 }  // namespace arrow::util
diff --git a/cpp/src/arrow/util/bit_stream_utils_internal.h b/cpp/src/arrow/util/bit_stream_utils_internal.h
index 811694e43b76c..316086fcf0c04 100644
--- a/cpp/src/arrow/util/bit_stream_utils_internal.h
+++ b/cpp/src/arrow/util/bit_stream_utils_internal.h
@@ -269,13 +269,13 @@ template <typename T>
 inline void GetValue_(int num_bits, T* v, int max_bytes, const uint8_t* buffer,
                       int* bit_offset, int* byte_offset, uint64_t* buffered_values) {
 #ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4800)
+#  pragma warning(push)
+#  pragma warning(disable : 4800)
 #endif
   *v = static_cast<T>(bit_util::TrailingBits(*buffered_values, *bit_offset + num_bits) >>
                       *bit_offset);
 #ifdef _MSC_VER
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
   *bit_offset += num_bits;
   if (*bit_offset >= 64) {
@@ -285,8 +285,8 @@ inline void GetValue_(int num_bits, T* v, int max_bytes, const uint8_t* buffer,
     *buffered_values =
         detail::ReadLittleEndianWord(buffer + *byte_offset, max_bytes - *byte_offset);
 #ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4800 4805)
+#  pragma warning(push)
+#  pragma warning(disable : 4800 4805)
 #endif
     // Read bits of v that crossed into new buffered_values_
     if (ARROW_PREDICT_TRUE(num_bits - *bit_offset < static_cast<int>(8 * sizeof(T)))) {
@@ -297,7 +297,7 @@ inline void GetValue_(int num_bits, T* v, int max_bytes, const uint8_t* buffer,
                                << (num_bits - *bit_offset));
     }
 #ifdef _MSC_VER
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
     DCHECK_LE(*bit_offset, 64);
   }
@@ -367,12 +367,12 @@ inline int BitReader::GetBatch(int num_bits, T* v, int batch_size) {
       }
       for (int k = 0; k < num_unpacked; ++k) {
 #ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4800)
+#  pragma warning(push)
+#  pragma warning(disable : 4800)
 #endif
         v[i + k] = static_cast<T>(unpack_buffer[k]);
 #ifdef _MSC_VER
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
       }
       i += num_unpacked;
diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h
index 1d3a1dc2459f9..17d1de406d514 100644
--- a/cpp/src/arrow/util/bit_util.h
+++ b/cpp/src/arrow/util/bit_util.h
@@ -18,18 +18,18 @@
 #pragma once
 
 #if defined(_MSC_VER)
-#if defined(_M_AMD64) || defined(_M_X64)
-#include <intrin.h>  // IWYU pragma: keep
-#include <nmmintrin.h>
-#endif
-
-#pragma intrinsic(_BitScanReverse)
-#pragma intrinsic(_BitScanForward)
-#define ARROW_POPCOUNT64 __popcnt64
-#define ARROW_POPCOUNT32 __popcnt
+#  if defined(_M_AMD64) || defined(_M_X64)
+#    include <intrin.h>  // IWYU pragma: keep
+#    include <nmmintrin.h>
+#  endif
+
+#  pragma intrinsic(_BitScanReverse)
+#  pragma intrinsic(_BitScanForward)
+#  define ARROW_POPCOUNT64 __popcnt64
+#  define ARROW_POPCOUNT32 __popcnt
 #else
-#define ARROW_POPCOUNT64 __builtin_popcountll
-#define ARROW_POPCOUNT32 __builtin_popcount
+#  define ARROW_POPCOUNT64 __builtin_popcountll
+#  define ARROW_POPCOUNT32 __builtin_popcount
 #endif
 
 #include <cstdint>
diff --git a/cpp/src/arrow/util/bpacking.cc b/cpp/src/arrow/util/bpacking.cc
index b33eb92606be2..e0d61d8db01e1 100644
--- a/cpp/src/arrow/util/bpacking.cc
+++ b/cpp/src/arrow/util/bpacking.cc
@@ -24,13 +24,13 @@
 #include "arrow/util/logging.h"
 
 #if defined(ARROW_HAVE_RUNTIME_AVX2)
-#include "arrow/util/bpacking_avx2.h"
+#  include "arrow/util/bpacking_avx2.h"
 #endif
 #if defined(ARROW_HAVE_RUNTIME_AVX512)
-#include "arrow/util/bpacking_avx512.h"
+#  include "arrow/util/bpacking_avx512.h"
 #endif
 #if defined(ARROW_HAVE_NEON)
-#include "arrow/util/bpacking_neon.h"
+#  include "arrow/util/bpacking_neon.h"
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/util/byte_stream_split_internal.h b/cpp/src/arrow/util/byte_stream_split_internal.h
index 8bca0d442c681..d3214239ff9fb 100644
--- a/cpp/src/arrow/util/byte_stream_split_internal.h
+++ b/cpp/src/arrow/util/byte_stream_split_internal.h
@@ -29,8 +29,8 @@
 #include <cstring>
 
 #if defined(ARROW_HAVE_NEON) || defined(ARROW_HAVE_SSE4_2)
-#include <xsimd/xsimd.hpp>
-#define ARROW_HAVE_SIMD_SPLIT
+#  include <xsimd/xsimd.hpp>
+#  define ARROW_HAVE_SIMD_SPLIT
 #endif
 
 namespace arrow::util::internal {
@@ -383,28 +383,28 @@ void ByteStreamSplitEncodeAvx2(const uint8_t* raw_values, int width,
 template <int kNumStreams>
 void inline ByteStreamSplitDecodeSimd(const uint8_t* data, int width, int64_t num_values,
                                       int64_t stride, uint8_t* out) {
-#if defined(ARROW_HAVE_AVX2)
+#  if defined(ARROW_HAVE_AVX2)
   return ByteStreamSplitDecodeAvx2<kNumStreams>(data, width, num_values, stride, out);
-#elif defined(ARROW_HAVE_SSE4_2) || defined(ARROW_HAVE_NEON)
+#  elif defined(ARROW_HAVE_SSE4_2) || defined(ARROW_HAVE_NEON)
   return ByteStreamSplitDecodeSimd128<kNumStreams>(data, width, num_values, stride, out);
-#else
-#error "ByteStreamSplitDecodeSimd not implemented"
-#endif
+#  else
+#    error "ByteStreamSplitDecodeSimd not implemented"
+#  endif
 }
 
 template <int kNumStreams>
 void inline ByteStreamSplitEncodeSimd(const uint8_t* raw_values, int width,
                                       const int64_t num_values,
                                       uint8_t* output_buffer_raw) {
-#if defined(ARROW_HAVE_AVX2)
+#  if defined(ARROW_HAVE_AVX2)
   return ByteStreamSplitEncodeAvx2<kNumStreams>(raw_values, width, num_values,
                                                 output_buffer_raw);
-#elif defined(ARROW_HAVE_SSE4_2) || defined(ARROW_HAVE_NEON)
+#  elif defined(ARROW_HAVE_SSE4_2) || defined(ARROW_HAVE_NEON)
   return ByteStreamSplitEncodeSimd128<kNumStreams>(raw_values, width, num_values,
                                                    output_buffer_raw);
-#else
-#error "ByteStreamSplitEncodeSimd not implemented"
-#endif
+#  else
+#    error "ByteStreamSplitEncodeSimd not implemented"
+#  endif
 }
 #endif
 
@@ -546,9 +546,9 @@ inline void ByteStreamSplitDecodeScalarDynamic(const uint8_t* data, int width,
 inline void ByteStreamSplitEncode(const uint8_t* raw_values, int width,
                                   const int64_t num_values, uint8_t* out) {
 #if defined(ARROW_HAVE_SIMD_SPLIT)
-#define ByteStreamSplitEncodePerhapsSimd ByteStreamSplitEncodeSimd
+#  define ByteStreamSplitEncodePerhapsSimd ByteStreamSplitEncodeSimd
 #else
-#define ByteStreamSplitEncodePerhapsSimd ByteStreamSplitEncodeScalar
+#  define ByteStreamSplitEncodePerhapsSimd ByteStreamSplitEncodeScalar
 #endif
   switch (width) {
     case 1:
@@ -570,9 +570,9 @@ inline void ByteStreamSplitEncode(const uint8_t* raw_values, int width,
 inline void ByteStreamSplitDecode(const uint8_t* data, int width, int64_t num_values,
                                   int64_t stride, uint8_t* out) {
 #if defined(ARROW_HAVE_SIMD_SPLIT)
-#define ByteStreamSplitDecodePerhapsSimd ByteStreamSplitDecodeSimd
+#  define ByteStreamSplitDecodePerhapsSimd ByteStreamSplitDecodeSimd
 #else
-#define ByteStreamSplitDecodePerhapsSimd ByteStreamSplitDecodeScalar
+#  define ByteStreamSplitDecodePerhapsSimd ByteStreamSplitDecodeScalar
 #endif
   switch (width) {
     case 1:
diff --git a/cpp/src/arrow/util/byte_stream_split_test.cc b/cpp/src/arrow/util/byte_stream_split_test.cc
index 3a537725b0692..9755cd8b8d0f2 100644
--- a/cpp/src/arrow/util/byte_stream_split_test.cc
+++ b/cpp/src/arrow/util/byte_stream_split_test.cc
@@ -145,9 +145,9 @@ class TestByteStreamSplitSpecialized : public ::testing::Test {
     if constexpr (kSimdImplemented) {
       funcs.push_back({"simd", &ByteStreamSplitDecodeSimd<kWidth>});
       funcs.push_back({"simd128", &ByteStreamSplitDecodeSimd128<kWidth>});
-#if defined(ARROW_HAVE_AVX2)
+#  if defined(ARROW_HAVE_AVX2)
       funcs.push_back({"avx2", &ByteStreamSplitDecodeAvx2<kWidth>});
-#endif
+#  endif
     }
 #endif  // defined(ARROW_HAVE_SIMD_SPLIT)
     return funcs;
@@ -163,9 +163,9 @@ class TestByteStreamSplitSpecialized : public ::testing::Test {
     if constexpr (kSimdImplemented) {
       funcs.push_back({"simd", &ByteStreamSplitEncodeSimd<kWidth>});
       funcs.push_back({"simd128", &ByteStreamSplitEncodeSimd128<kWidth>});
-#if defined(ARROW_HAVE_AVX2)
+#  if defined(ARROW_HAVE_AVX2)
       funcs.push_back({"avx2", &ByteStreamSplitEncodeAvx2<kWidth>});
-#endif
+#  endif
     }
 #endif  // defined(ARROW_HAVE_SIMD_SPLIT)
     return funcs;
diff --git a/cpp/src/arrow/util/cancel.cc b/cpp/src/arrow/util/cancel.cc
index 2648059af81ee..b3a0c1f92c24e 100644
--- a/cpp/src/arrow/util/cancel.cc
+++ b/cpp/src/arrow/util/cancel.cc
@@ -33,7 +33,7 @@
 namespace arrow {
 
 #if ATOMIC_INT_LOCK_FREE != 2
-#error Lock-free atomic int required for signal safety
+#  error Lock-free atomic int required for signal safety
 #endif
 
 using internal::AtForkHandler;
diff --git a/cpp/src/arrow/util/cancel_test.cc b/cpp/src/arrow/util/cancel_test.cc
index 713418f15a0cc..6cea75755de10 100644
--- a/cpp/src/arrow/util/cancel_test.cc
+++ b/cpp/src/arrow/util/cancel_test.cc
@@ -29,9 +29,9 @@
 
 #include <signal.h>
 #ifndef _WIN32
-#include <sys/time.h>  // for setitimer()
-#include <sys/types.h>
-#include <unistd.h>
+#  include <sys/time.h>  // for setitimer()
+#  include <sys/types.h>
+#  include <unistd.h>
 #endif
 
 #include "arrow/testing/gtest_util.h"
@@ -269,9 +269,9 @@ TEST_F(SignalCancelTest, RegisterUnregister) {
 #if !(defined(_WIN32) || defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) || \
       defined(THREAD_SANITIZER))
 TEST_F(SignalCancelTest, ForkSafetyUnregisteredHandlers) {
-#ifndef ARROW_ENABLE_THREADING
+#  ifndef ARROW_ENABLE_THREADING
   GTEST_SKIP() << "Test requires threading support";
-#endif
+#  endif
 
   RunInChild([&]() {
     // Child
@@ -296,9 +296,9 @@ TEST_F(SignalCancelTest, ForkSafetyUnregisteredHandlers) {
 }
 
 TEST_F(SignalCancelTest, ForkSafetyRegisteredHandlers) {
-#ifndef ARROW_ENABLE_THREADING
+#  ifndef ARROW_ENABLE_THREADING
   GTEST_SKIP() << "Test requires threading support";
-#endif
+#  endif
 
   RegisterHandler();
 
diff --git a/cpp/src/arrow/util/compression_benchmark.cc b/cpp/src/arrow/util/compression_benchmark.cc
index 0b9727cff9041..361935805be79 100644
--- a/cpp/src/arrow/util/compression_benchmark.cc
+++ b/cpp/src/arrow/util/compression_benchmark.cc
@@ -228,28 +228,28 @@ static void ReferenceDecompression(
   state.SetBytesProcessed(state.iterations() * data.size());
 }
 
-#ifdef ARROW_WITH_ZLIB
+#  ifdef ARROW_WITH_ZLIB
 BENCHMARK_TEMPLATE(ReferenceStreamingCompression, Compression::GZIP);
 BENCHMARK_TEMPLATE(ReferenceCompression, Compression::GZIP);
 BENCHMARK_TEMPLATE(ReferenceStreamingDecompression, Compression::GZIP);
 BENCHMARK_TEMPLATE(ReferenceDecompression, Compression::GZIP);
-#endif
+#  endif
 
-#ifdef ARROW_WITH_BROTLI
+#  ifdef ARROW_WITH_BROTLI
 BENCHMARK_TEMPLATE(ReferenceStreamingCompression, Compression::BROTLI);
 BENCHMARK_TEMPLATE(ReferenceCompression, Compression::BROTLI);
 BENCHMARK_TEMPLATE(ReferenceStreamingDecompression, Compression::BROTLI);
 BENCHMARK_TEMPLATE(ReferenceDecompression, Compression::BROTLI);
-#endif
+#  endif
 
-#ifdef ARROW_WITH_ZSTD
+#  ifdef ARROW_WITH_ZSTD
 BENCHMARK_TEMPLATE(ReferenceStreamingCompression, Compression::ZSTD);
 BENCHMARK_TEMPLATE(ReferenceCompression, Compression::ZSTD);
 BENCHMARK_TEMPLATE(ReferenceStreamingDecompression, Compression::ZSTD);
 BENCHMARK_TEMPLATE(ReferenceDecompression, Compression::ZSTD);
-#endif
+#  endif
 
-#ifdef ARROW_WITH_LZ4
+#  ifdef ARROW_WITH_LZ4
 BENCHMARK_TEMPLATE(ReferenceStreamingCompression, Compression::LZ4_FRAME);
 BENCHMARK_TEMPLATE(ReferenceCompression, Compression::LZ4_FRAME);
 BENCHMARK_TEMPLATE(ReferenceStreamingDecompression, Compression::LZ4_FRAME);
@@ -257,12 +257,12 @@ BENCHMARK_TEMPLATE(ReferenceDecompression, Compression::LZ4_FRAME);
 
 BENCHMARK_TEMPLATE(ReferenceCompression, Compression::LZ4);
 BENCHMARK_TEMPLATE(ReferenceDecompression, Compression::LZ4);
-#endif
+#  endif
 
-#ifdef ARROW_WITH_SNAPPY
+#  ifdef ARROW_WITH_SNAPPY
 BENCHMARK_TEMPLATE(ReferenceCompression, Compression::SNAPPY);
 BENCHMARK_TEMPLATE(ReferenceDecompression, Compression::SNAPPY);
-#endif
+#  endif
 
 #endif
 
diff --git a/cpp/src/arrow/util/compression_lz4.cc b/cpp/src/arrow/util/compression_lz4.cc
index be957afab3c46..ae1d0961de600 100644
--- a/cpp/src/arrow/util/compression_lz4.cc
+++ b/cpp/src/arrow/util/compression_lz4.cc
@@ -34,7 +34,7 @@
 #include "arrow/util/ubsan.h"
 
 #ifndef LZ4F_HEADER_SIZE_MAX
-#define LZ4F_HEADER_SIZE_MAX 19
+#  define LZ4F_HEADER_SIZE_MAX 19
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/util/cpu_info.cc b/cpp/src/arrow/util/cpu_info.cc
index 7c2e9fa921246..620e520e72b39 100644
--- a/cpp/src/arrow/util/cpu_info.cc
+++ b/cpp/src/arrow/util/cpu_info.cc
@@ -20,17 +20,17 @@
 #include "arrow/util/cpu_info.h"
 
 #ifdef __APPLE__
-#include <sys/sysctl.h>
+#  include <sys/sysctl.h>
 #endif
 
 #ifndef _MSC_VER
-#include <unistd.h>
+#  include <unistd.h>
 #endif
 
 #ifdef _WIN32
-#include <intrin.h>
+#  include <intrin.h>
 
-#include "arrow/util/windows_compatibility.h"
+#  include "arrow/util/windows_compatibility.h"
 #endif
 
 #include <algorithm>
@@ -55,12 +55,12 @@
 #undef CPUINFO_ARCH_PPC
 
 #if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
-#define CPUINFO_ARCH_X86
+#  define CPUINFO_ARCH_X86
 #elif defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__)
-#define CPUINFO_ARCH_ARM
+#  define CPUINFO_ARCH_ARM
 #elif defined(__PPC64__) || defined(__PPC64LE__) || defined(__ppc64__) || \
     defined(__powerpc64__)
-#define CPUINFO_ARCH_PPC
+#  define CPUINFO_ARCH_PPC
 #endif
 
 namespace arrow {
@@ -122,10 +122,10 @@ void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cache_sizes) {
   free(buffer);
 }
 
-#if defined(CPUINFO_ARCH_X86)
+#  if defined(CPUINFO_ARCH_X86)
 // On x86, get CPU features by cpuid, https://en.wikipedia.org/wiki/CPUID
 
-#if defined(__MINGW64_VERSION_MAJOR) && __MINGW64_VERSION_MAJOR < 5
+#    if defined(__MINGW64_VERSION_MAJOR) && __MINGW64_VERSION_MAJOR < 5
 void __cpuidex(int CPUInfo[4], int function_id, int subfunction_id) {
   __asm__ __volatile__("cpuid"
                        : "=a"(CPUInfo[0]), "=b"(CPUInfo[1]), "=c"(CPUInfo[2]),
@@ -138,7 +138,7 @@ int64_t _xgetbv(int xcr) {
   __asm__ __volatile__("xgetbv" : "=a"(out) : "c"(xcr) : "%edx");
   return out;
 }
-#endif  // MINGW
+#    endif  // MINGW
 
 void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
                        std::string* model_name) {
@@ -215,14 +215,14 @@ void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
     }
   }
 }
-#elif defined(CPUINFO_ARCH_ARM)
+#  elif defined(CPUINFO_ARCH_ARM)
 // Windows on Arm
 void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
                        std::string* model_name) {
   *hardware_flags |= CpuInfo::ASIMD;
   // TODO: vendor, model_name
 }
-#endif
+#  endif
 
 #elif defined(__APPLE__)
 //------------------------------ MACOS ------------------------------//
@@ -265,7 +265,7 @@ void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
     int64_t flag;
   };
   std::vector<SysCtlCpuFeature> features = {
-#if defined(CPUINFO_ARCH_X86)
+#  if defined(CPUINFO_ARCH_X86)
     {"hw.optional.sse4_2",
      CpuInfo::SSSE3 | CpuInfo::SSE4_1 | CpuInfo::SSE4_2 | CpuInfo::POPCNT},
     {"hw.optional.avx1_0", CpuInfo::AVX},
@@ -277,10 +277,10 @@ void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
     {"hw.optional.avx512dq", CpuInfo::AVX512DQ},
     {"hw.optional.avx512bw", CpuInfo::AVX512BW},
     {"hw.optional.avx512vl", CpuInfo::AVX512VL},
-#elif defined(CPUINFO_ARCH_ARM)
+#  elif defined(CPUINFO_ARCH_ARM)
     // ARM64 (note that this is exposed under Rosetta as well)
     {"hw.optional.neon", CpuInfo::ASIMD},
-#endif
+#  endif
   };
   for (const auto& feature : features) {
     auto v = IntegerSysCtlByName(feature.name);
@@ -297,7 +297,7 @@ void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
 // Get cache size, return 0 on error
 int64_t LinuxGetCacheSize(int level) {
   // get cache size by sysconf()
-#ifdef _SC_LEVEL1_DCACHE_SIZE
+#  ifdef _SC_LEVEL1_DCACHE_SIZE
   const int kCacheSizeConf[] = {
       _SC_LEVEL1_DCACHE_SIZE,
       _SC_LEVEL2_CACHE_SIZE,
@@ -310,7 +310,7 @@ int64_t LinuxGetCacheSize(int level) {
   if (errno == 0 && cache_size > 0) {
     return cache_size;
   }
-#endif
+#  endif
 
   // get cache size from sysfs if sysconf() fails or not supported
   const char* kCacheSizeSysfs[] = {
@@ -345,12 +345,12 @@ int64_t LinuxGetCacheSize(int level) {
 // care about are present.
 // Returns a bitmap of flags.
 int64_t LinuxParseCpuFlags(const std::string& values) {
-#if defined(CPUINFO_ARCH_X86) || defined(CPUINFO_ARCH_ARM)
+#  if defined(CPUINFO_ARCH_X86) || defined(CPUINFO_ARCH_ARM)
   const struct {
     std::string name;
     int64_t flag;
   } flag_mappings[] = {
-#if defined(CPUINFO_ARCH_X86)
+#    if defined(CPUINFO_ARCH_X86)
     {"ssse3", CpuInfo::SSSE3},
     {"sse4_1", CpuInfo::SSE4_1},
     {"sse4_2", CpuInfo::SSE4_2},
@@ -364,9 +364,9 @@ int64_t LinuxParseCpuFlags(const std::string& values) {
     {"avx512bw", CpuInfo::AVX512BW},
     {"bmi1", CpuInfo::BMI1},
     {"bmi2", CpuInfo::BMI2},
-#elif defined(CPUINFO_ARCH_ARM)
+#    elif defined(CPUINFO_ARCH_ARM)
     {"asimd", CpuInfo::ASIMD},
-#endif
+#    endif
   };
   const int64_t num_flags = sizeof(flag_mappings) / sizeof(flag_mappings[0]);
 
@@ -377,9 +377,9 @@ int64_t LinuxParseCpuFlags(const std::string& values) {
     }
   }
   return flags;
-#else
+#  else
   return 0;
-#endif
+#  endif
 }
 
 void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cache_sizes) {
@@ -466,11 +466,11 @@ bool ArchParseUserSimdLevel(const std::string& simd_level, int64_t* hardware_fla
 }
 
 void ArchVerifyCpuRequirements(const CpuInfo* ci) {
-#if defined(ARROW_HAVE_SSE4_2)
+#  if defined(ARROW_HAVE_SSE4_2)
   if (!ci->IsDetected(CpuInfo::SSE4_2)) {
     DCHECK(false) << "CPU does not support the Supplemental SSE4_2 instruction set";
   }
-#endif
+#  endif
 }
 
 #elif defined(CPUINFO_ARCH_ARM)
diff --git a/cpp/src/arrow/util/decimal_internal.h b/cpp/src/arrow/util/decimal_internal.h
index 89f755af88316..b3a8b1127f918 100644
--- a/cpp/src/arrow/util/decimal_internal.h
+++ b/cpp/src/arrow/util/decimal_internal.h
@@ -189,11 +189,11 @@ constexpr BasicDecimal128 kDecimal128HalfPowersOfTen[] = {
     BasicDecimal128(2710505431213761085LL, 343699775700336640ULL)};
 
 #if ARROW_LITTLE_ENDIAN
-#define BasicDecimal256FromLE(v1, v2, v3, v4) \
-  BasicDecimal256(std::array<uint64_t, 4>{v1, v2, v3, v4})
+#  define BasicDecimal256FromLE(v1, v2, v3, v4) \
+    BasicDecimal256(std::array<uint64_t, 4>{v1, v2, v3, v4})
 #else
-#define BasicDecimal256FromLE(v1, v2, v3, v4) \
-  BasicDecimal256(std::array<uint64_t, 4>{v4, v3, v2, v1})
+#  define BasicDecimal256FromLE(v1, v2, v3, v4) \
+    BasicDecimal256(std::array<uint64_t, 4>{v4, v3, v2, v1})
 #endif
 
 constexpr BasicDecimal256 kDecimal256PowersOfTen[76 + 1] = {
diff --git a/cpp/src/arrow/util/endian.h b/cpp/src/arrow/util/endian.h
index 3d394ba8b7801..9c603144a7fd8 100644
--- a/cpp/src/arrow/util/endian.h
+++ b/cpp/src/arrow/util/endian.h
@@ -18,38 +18,38 @@
 #pragma once
 
 #ifdef _WIN32
-#define ARROW_LITTLE_ENDIAN 1
+#  define ARROW_LITTLE_ENDIAN 1
 #else
-#if defined(__APPLE__) || defined(__FreeBSD__)
-#include <machine/endian.h>  // IWYU pragma: keep
-#elif defined(sun) || defined(__sun)
-#include <sys/byteorder.h>  // IWYU pragma: keep
-#else
-#include <endian.h>  // IWYU pragma: keep
-#endif
+#  if defined(__APPLE__) || defined(__FreeBSD__)
+#    include <machine/endian.h>  // IWYU pragma: keep
+#  elif defined(sun) || defined(__sun)
+#    include <sys/byteorder.h>  // IWYU pragma: keep
+#  else
+#    include <endian.h>  // IWYU pragma: keep
+#  endif
 #
-#ifndef __BYTE_ORDER__
-#error "__BYTE_ORDER__ not defined"
-#endif
+#  ifndef __BYTE_ORDER__
+#    error "__BYTE_ORDER__ not defined"
+#  endif
 #
-#ifndef __ORDER_LITTLE_ENDIAN__
-#error "__ORDER_LITTLE_ENDIAN__ not defined"
-#endif
+#  ifndef __ORDER_LITTLE_ENDIAN__
+#    error "__ORDER_LITTLE_ENDIAN__ not defined"
+#  endif
 #
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-#define ARROW_LITTLE_ENDIAN 1
-#else
-#define ARROW_LITTLE_ENDIAN 0
-#endif
+#  if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#    define ARROW_LITTLE_ENDIAN 1
+#  else
+#    define ARROW_LITTLE_ENDIAN 0
+#  endif
 #endif
 
 #if defined(_MSC_VER)
-#include <intrin.h>  // IWYU pragma: keep
-#define ARROW_BYTE_SWAP64 _byteswap_uint64
-#define ARROW_BYTE_SWAP32 _byteswap_ulong
+#  include <intrin.h>  // IWYU pragma: keep
+#  define ARROW_BYTE_SWAP64 _byteswap_uint64
+#  define ARROW_BYTE_SWAP32 _byteswap_ulong
 #else
-#define ARROW_BYTE_SWAP64 __builtin_bswap64
-#define ARROW_BYTE_SWAP32 __builtin_bswap32
+#  define ARROW_BYTE_SWAP64 __builtin_bswap64
+#  define ARROW_BYTE_SWAP32 __builtin_bswap32
 #endif
 
 #include <algorithm>
diff --git a/cpp/src/arrow/util/hash_util.h b/cpp/src/arrow/util/hash_util.h
index dd1c38a78216e..7b3de2208935f 100644
--- a/cpp/src/arrow/util/hash_util.h
+++ b/cpp/src/arrow/util/hash_util.h
@@ -26,9 +26,9 @@ namespace internal {
 namespace detail {
 
 #if defined(_MSC_VER)
-#define ARROW_HASH_ROTL32(x, r) _rotl(x, r)
+#  define ARROW_HASH_ROTL32(x, r) _rotl(x, r)
 #else
-#define ARROW_HASH_ROTL32(x, r) (x << r) | (x >> (32 - r))
+#  define ARROW_HASH_ROTL32(x, r) (x << r) | (x >> (32 - r))
 #endif
 
 template <typename SizeT>
diff --git a/cpp/src/arrow/util/hashing.h b/cpp/src/arrow/util/hashing.h
index 2de9f4153248f..4ead1a7283d81 100644
--- a/cpp/src/arrow/util/hashing.h
+++ b/cpp/src/arrow/util/hashing.h
@@ -182,7 +182,7 @@ hash_t ComputeStringHash(const void* data, int64_t length) {
   }
 
 #if XXH3_SECRET_SIZE_MIN != 136
-#error XXH3_SECRET_SIZE_MIN changed, please fix kXxh3Secrets
+#  error XXH3_SECRET_SIZE_MIN changed, please fix kXxh3Secrets
 #endif
 
   // XXH3_64bits_withSeed generates a secret based on the seed, which is too slow.
diff --git a/cpp/src/arrow/util/int128_internal.h b/cpp/src/arrow/util/int128_internal.h
index 1d494671a9f8f..201e4a1349190 100644
--- a/cpp/src/arrow/util/int128_internal.h
+++ b/cpp/src/arrow/util/int128_internal.h
@@ -20,7 +20,7 @@
 #include "arrow/util/macros.h"
 
 #ifndef ARROW_USE_NATIVE_INT128
-#include <boost/multiprecision/cpp_int.hpp>
+#  include <boost/multiprecision/cpp_int.hpp>
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc
index 2eefe96f0d5c4..8c4d925dac541 100644
--- a/cpp/src/arrow/util/io_util.cc
+++ b/cpp/src/arrow/util/io_util.cc
@@ -17,7 +17,7 @@
 
 // Ensure 64-bit off_t for platforms where it matters
 #ifdef _FILE_OFFSET_BITS
-#undef _FILE_OFFSET_BITS
+#  undef _FILE_OFFSET_BITS
 #endif
 
 #define _FILE_OFFSET_BITS 64
@@ -27,8 +27,8 @@
 // is the best way to enable modern POSIX APIs, such as posix_madvise(), on Solaris.
 // (see also
 // https://github.com/illumos/illumos-gate/blob/master/usr/src/uts/common/sys/mman.h)
-#undef __EXTENSIONS__
-#define __EXTENSIONS__
+#  undef __EXTENSIONS__
+#  define __EXTENSIONS__
 #endif
 
 #include "arrow/util/windows_compatibility.h"  // IWYU pragma: keep
@@ -60,34 +60,34 @@
 // file compatibility stuff
 
 #ifdef _WIN32
-#include <direct.h>
-#include <io.h>
-#include <share.h>
+#  include <direct.h>
+#  include <io.h>
+#  include <share.h>
 #else  // POSIX-like platforms
-#include <dirent.h>
+#  include <dirent.h>
 #endif
 
 #ifdef _WIN32
-#include "arrow/io/mman.h"
-#undef Realloc
-#undef Free
+#  include "arrow/io/mman.h"
+#  undef Realloc
+#  undef Free
 #else  // POSIX-like platforms
-#include <sys/mman.h>
-#include <unistd.h>
+#  include <sys/mman.h>
+#  include <unistd.h>
 #endif
 
 // define max read/write count
 #ifdef _WIN32
-#define ARROW_MAX_IO_CHUNKSIZE INT32_MAX
+#  define ARROW_MAX_IO_CHUNKSIZE INT32_MAX
 #else
 
-#ifdef __APPLE__
+#  ifdef __APPLE__
 // due to macOS bug, we need to set read/write max
-#define ARROW_MAX_IO_CHUNKSIZE INT32_MAX
-#else
+#    define ARROW_MAX_IO_CHUNKSIZE INT32_MAX
+#  else
 // see notes on Linux read/write manpage
-#define ARROW_MAX_IO_CHUNKSIZE 0x7ffff000
-#endif
+#    define ARROW_MAX_IO_CHUNKSIZE 0x7ffff000
+#  endif
 
 #endif
 
@@ -102,25 +102,25 @@
 
 // For filename conversion
 #if defined(_WIN32)
-#include "arrow/util/utf8.h"
+#  include "arrow/util/utf8.h"
 #endif
 
 #ifdef _WIN32
-#include <psapi.h>
+#  include <psapi.h>
 
 #elif __APPLE__
-#include <mach/mach.h>
-#include <sys/sysctl.h>
+#  include <mach/mach.h>
+#  include <sys/sysctl.h>
 
 #elif __linux__
-#include <sys/sysinfo.h>
-#include <fstream>
+#  include <sys/sysinfo.h>
+#  include <fstream>
 #endif
 
 #ifdef _WIN32
-#include <Windows.h>
+#  include <Windows.h>
 #else
-#include <dlfcn.h>
+#  include <dlfcn.h>
 #endif
 
 namespace arrow::internal {
@@ -1223,11 +1223,11 @@ Status SetPipeFileDescriptorNonBlocking(int fd) {
 namespace {
 
 #ifdef WIN32
-#define PIPE_WRITE _write
-#define PIPE_READ _read
+#  define PIPE_WRITE _write
+#  define PIPE_READ _read
 #else
-#define PIPE_WRITE write
-#define PIPE_READ read
+#  define PIPE_WRITE write
+#  define PIPE_READ read
 #endif
 
 class SelfPipeImpl : public SelfPipe, public std::enable_shared_from_this<SelfPipeImpl> {
@@ -1500,7 +1500,7 @@ Status MemoryAdviseWillNeed(const std::vector<MemoryRegion>& regions) {
             region.size + static_cast<size_t>(addr - aligned_addr)};
   };
 
-#ifdef _WIN32
+#  ifdef _WIN32
   // PrefetchVirtualMemory() is available on Windows 8 or later
   struct PrefetchEntry {  // Like WIN32_MEMORY_RANGE_ENTRY
     void* VirtualAddress;
@@ -1528,7 +1528,7 @@ Status MemoryAdviseWillNeed(const std::vector<MemoryRegion>& regions) {
     }
   }
   return Status::OK();
-#elif defined(POSIX_MADV_WILLNEED)
+#  elif defined(POSIX_MADV_WILLNEED)
   for (const auto& region : regions) {
     if (region.size != 0) {
       const auto aligned = align_region(region);
@@ -1542,9 +1542,9 @@ Status MemoryAdviseWillNeed(const std::vector<MemoryRegion>& regions) {
     }
   }
   return Status::OK();
-#else
+#  else
   return Status::OK();
-#endif
+#  endif
 #else
   return Status::OK();
 #endif
@@ -1876,11 +1876,11 @@ std::vector<NativePathString> GetPlatformTemporaryDirs() {
 
 #else
   selectors = {{"TMPDIR", ""}, {"TMP", ""}, {"TEMP", ""}, {"TEMPDIR", ""}};
-#ifdef __ANDROID__
+#  ifdef __ANDROID__
   fallback_tmp = "/data/local/tmp";
-#else
+#  else
   fallback_tmp = "/tmp";
-#endif
+#  endif
 #endif
 
   std::vector<NativePathString> temp_dirs;
@@ -2157,7 +2157,7 @@ int64_t GetCurrentRSS() {
 
 #elif defined(__APPLE__)
 // OSX ------------------------------------------------------
-#ifdef MACH_TASK_BASIC_INFO
+#  ifdef MACH_TASK_BASIC_INFO
   struct mach_task_basic_info info;
   mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT;
   if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) !=
@@ -2165,7 +2165,7 @@ int64_t GetCurrentRSS() {
     ARROW_LOG(WARNING) << "Can't resolve RSS value";
     return 0;
   }
-#else
+#  else
   struct task_basic_info info;
   mach_msg_type_number_t infoCount = TASK_BASIC_INFO_COUNT;
   if (task_info(mach_task_self(), TASK_BASIC_INFO, (task_info_t)&info, &infoCount) !=
@@ -2173,7 +2173,7 @@ int64_t GetCurrentRSS() {
     ARROW_LOG(WARNING) << "Can't resolve RSS value";
     return 0;
   }
-#endif
+#  endif
   return static_cast<int64_t>(info.resident_size);
 
 #elif defined(__linux__)
diff --git a/cpp/src/arrow/util/io_util.h b/cpp/src/arrow/util/io_util.h
index 5f5bbd169e2eb..892641d4bc52f 100644
--- a/cpp/src/arrow/util/io_util.h
+++ b/cpp/src/arrow/util/io_util.h
@@ -18,7 +18,7 @@
 #pragma once
 
 #ifndef _WIN32
-#define ARROW_HAVE_SIGACTION 1
+#  define ARROW_HAVE_SIGACTION 1
 #endif
 
 #include <atomic>
@@ -29,7 +29,7 @@
 #include <vector>
 
 #if ARROW_HAVE_SIGACTION
-#include <csignal>  // Needed for struct sigaction
+#  include <csignal>  // Needed for struct sigaction
 #endif
 
 #include "arrow/result.h"
diff --git a/cpp/src/arrow/util/io_util_test.cc b/cpp/src/arrow/util/io_util_test.cc
index 73213bf9ce48a..1ff8fcf7adb5c 100644
--- a/cpp/src/arrow/util/io_util_test.cc
+++ b/cpp/src/arrow/util/io_util_test.cc
@@ -29,9 +29,9 @@
 #include <signal.h>
 
 #ifndef _WIN32
-#include <pthread.h>
-#include <sys/types.h>
-#include <unistd.h>
+#  include <pthread.h>
+#  include <sys/types.h>
+#  include <unistd.h>
 #endif
 
 #include <gmock/gmock-matchers.h>
@@ -48,11 +48,11 @@
 #include "arrow/util/windows_fixup.h"
 
 #ifdef WIN32
-#define PIPE_WRITE _write
-#define PIPE_READ _read
+#  define PIPE_WRITE _write
+#  define PIPE_READ _read
 #else
-#define PIPE_WRITE write
-#define PIPE_READ read
+#  define PIPE_WRITE write
+#  define PIPE_READ read
 #endif
 
 namespace arrow {
@@ -474,9 +474,9 @@ TEST_F(TestSelfPipe, SendFromSignalAndWait) {
 #if !(defined(_WIN32) || defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) || \
       defined(THREAD_SANITIZER))
 TEST_F(TestSelfPipe, ForkSafety) {
-#ifndef ARROW_ENABLE_THREADING
+#  ifndef ARROW_ENABLE_THREADING
   GTEST_SKIP() << "Test requires threading support";
-#endif
+#  endif
 
   self_pipe_->Send(123456789123456789ULL);
 
@@ -974,7 +974,7 @@ TEST(DeleteFile, Basics) {
 TEST(FileUtils, LongPaths) {
   // ARROW-8477: check using long file paths under Windows (> 260 characters).
   bool created, deleted;
-#ifdef _WIN32
+#  ifdef _WIN32
   const char* kRegKeyName = R"(SYSTEM\CurrentControlSet\Control\FileSystem)";
   const char* kRegValueName = "LongPathsEnabled";
   DWORD value = 0;
@@ -990,7 +990,7 @@ TEST(FileUtils, LongPaths) {
         << " to 1 on the test host.";
     return;
   }
-#endif
+#  endif
 
   const std::string BASE = "xxx-io-util-test-dir-long";
   PlatformFilename base_path, long_path, long_filename;
diff --git a/cpp/src/arrow/util/logger.h b/cpp/src/arrow/util/logger.h
index 5200503bb4fdb..7832f4a4c2232 100644
--- a/cpp/src/arrow/util/logger.h
+++ b/cpp/src/arrow/util/logger.h
@@ -128,7 +128,7 @@ class ARROW_EXPORT LogMessage {
 // For the following macros, log statements with a lower severity than
 // `ARROW_MINIMUM_LOG_LEVEL` will be stripped from the build
 #ifndef ARROW_MINIMUM_LOG_LEVEL
-#define ARROW_MINIMUM_LOG_LEVEL -1000
+#  define ARROW_MINIMUM_LOG_LEVEL -1000
 #endif
 
 #define ARROW_LOGGER_INTERNAL(LOGGER, LEVEL)                                      \
@@ -137,50 +137,50 @@ class ARROW_EXPORT LogMessage {
 
 static_assert(static_cast<int>(::arrow::util::ArrowLogLevel::ARROW_TRACE) == -2);
 #if ARROW_MINIMUM_LOG_LEVEL <= -2
-#define ARROW_LOGGER_TRACE(LOGGER, ...) \
-  (ARROW_LOGGER_INTERNAL(LOGGER, TRACE).Append(__VA_ARGS__))
+#  define ARROW_LOGGER_TRACE(LOGGER, ...) \
+    (ARROW_LOGGER_INTERNAL(LOGGER, TRACE).Append(__VA_ARGS__))
 #else
-#define ARROW_LOGGER_TRACE(...) ARROW_UNUSED(0)
+#  define ARROW_LOGGER_TRACE(...) ARROW_UNUSED(0)
 #endif
 
 static_assert(static_cast<int>(::arrow::util::ArrowLogLevel::ARROW_DEBUG) == -1);
 #if ARROW_MINIMUM_LOG_LEVEL <= -1
-#define ARROW_LOGGER_DEBUG(LOGGER, ...) \
-  (ARROW_LOGGER_INTERNAL(LOGGER, DEBUG).Append(__VA_ARGS__))
+#  define ARROW_LOGGER_DEBUG(LOGGER, ...) \
+    (ARROW_LOGGER_INTERNAL(LOGGER, DEBUG).Append(__VA_ARGS__))
 #else
-#define ARROW_LOGGER_DEBUG(...) ARROW_UNUSED(0)
+#  define ARROW_LOGGER_DEBUG(...) ARROW_UNUSED(0)
 #endif
 
 static_assert(static_cast<int>(::arrow::util::ArrowLogLevel::ARROW_INFO) == 0);
 #if ARROW_MINIMUM_LOG_LEVEL <= 0
-#define ARROW_LOGGER_INFO(LOGGER, ...) \
-  (ARROW_LOGGER_INTERNAL(LOGGER, INFO).Append(__VA_ARGS__))
+#  define ARROW_LOGGER_INFO(LOGGER, ...) \
+    (ARROW_LOGGER_INTERNAL(LOGGER, INFO).Append(__VA_ARGS__))
 #else
-#define ARROW_LOGGER_INFO(...) ARROW_UNUSED(0)
+#  define ARROW_LOGGER_INFO(...) ARROW_UNUSED(0)
 #endif
 
 static_assert(static_cast<int>(::arrow::util::ArrowLogLevel::ARROW_WARNING) == 1);
 #if ARROW_MINIMUM_LOG_LEVEL <= 1
-#define ARROW_LOGGER_WARNING(LOGGER, ...) \
-  (ARROW_LOGGER_INTERNAL(LOGGER, WARNING).Append(__VA_ARGS__))
+#  define ARROW_LOGGER_WARNING(LOGGER, ...) \
+    (ARROW_LOGGER_INTERNAL(LOGGER, WARNING).Append(__VA_ARGS__))
 #else
-#define ARROW_LOGGER_WARNING(...) ARROW_UNUSED(0)
+#  define ARROW_LOGGER_WARNING(...) ARROW_UNUSED(0)
 #endif
 
 static_assert(static_cast<int>(::arrow::util::ArrowLogLevel::ARROW_ERROR) == 2);
 #if ARROW_MINIMUM_LOG_LEVEL <= 2
-#define ARROW_LOGGER_ERROR(LOGGER, ...) \
-  (ARROW_LOGGER_INTERNAL(LOGGER, ERROR).Append(__VA_ARGS__))
+#  define ARROW_LOGGER_ERROR(LOGGER, ...) \
+    (ARROW_LOGGER_INTERNAL(LOGGER, ERROR).Append(__VA_ARGS__))
 #else
-#define ARROW_LOGGER_ERROR(...) ARROW_UNUSED(0)
+#  define ARROW_LOGGER_ERROR(...) ARROW_UNUSED(0)
 #endif
 
 static_assert(static_cast<int>(::arrow::util::ArrowLogLevel::ARROW_FATAL) == 3);
 #if ARROW_MINIMUM_LOG_LEVEL <= 3
-#define ARROW_LOGGER_FATAL(LOGGER, ...) \
-  (ARROW_LOGGER_INTERNAL(LOGGER, FATAL).Append(__VA_ARGS__))
+#  define ARROW_LOGGER_FATAL(LOGGER, ...) \
+    (ARROW_LOGGER_INTERNAL(LOGGER, FATAL).Append(__VA_ARGS__))
 #else
-#define ARROW_LOGGER_FATAL(...) ARROW_UNUSED(0)
+#  define ARROW_LOGGER_FATAL(...) ARROW_UNUSED(0)
 #endif
 
 #define ARROW_LOGGER_CALL(LOGGER, LEVEL, ...) ARROW_LOGGER_##LEVEL(LOGGER, __VA_ARGS__)
diff --git a/cpp/src/arrow/util/logging.cc b/cpp/src/arrow/util/logging.cc
index ca4edcc5a5deb..993c5306ca4aa 100644
--- a/cpp/src/arrow/util/logging.cc
+++ b/cpp/src/arrow/util/logging.cc
@@ -20,36 +20,36 @@
 #include "arrow/util/config.h"
 
 #ifdef ARROW_WITH_BACKTRACE
-#include <execinfo.h>
+#  include <execinfo.h>
 #endif
 #include <cstdlib>
 #include <iostream>
 
 #ifdef ARROW_USE_GLOG
 
-#include <signal.h>
-#include <vector>
+#  include <signal.h>
+#  include <vector>
 
-#include <glog/logging.h>
+#  include <glog/logging.h>
 
 // Restore our versions of DCHECK and friends, as GLog defines its own
-#undef DCHECK
-#undef DCHECK_OK
-#undef DCHECK_EQ
-#undef DCHECK_NE
-#undef DCHECK_LE
-#undef DCHECK_LT
-#undef DCHECK_GE
-#undef DCHECK_GT
-
-#define DCHECK ARROW_DCHECK
-#define DCHECK_OK ARROW_DCHECK_OK
-#define DCHECK_EQ ARROW_DCHECK_EQ
-#define DCHECK_NE ARROW_DCHECK_NE
-#define DCHECK_LE ARROW_DCHECK_LE
-#define DCHECK_LT ARROW_DCHECK_LT
-#define DCHECK_GE ARROW_DCHECK_GE
-#define DCHECK_GT ARROW_DCHECK_GT
+#  undef DCHECK
+#  undef DCHECK_OK
+#  undef DCHECK_EQ
+#  undef DCHECK_NE
+#  undef DCHECK_LE
+#  undef DCHECK_LT
+#  undef DCHECK_GE
+#  undef DCHECK_GT
+
+#  define DCHECK ARROW_DCHECK
+#  define DCHECK_OK ARROW_DCHECK_OK
+#  define DCHECK_EQ ARROW_DCHECK_EQ
+#  define DCHECK_NE ARROW_DCHECK_NE
+#  define DCHECK_LE ARROW_DCHECK_LE
+#  define DCHECK_LT ARROW_DCHECK_LT
+#  define DCHECK_GE ARROW_DCHECK_GE
+#  define DCHECK_GT ARROW_DCHECK_GT
 
 #endif
 
@@ -188,11 +188,11 @@ void ArrowLog::UninstallSignalAction() {
   // This signal list comes from glog's signalhandler.cc.
   // https://github.com/google/glog/blob/master/src/signalhandler.cc#L58-L70
   std::vector<int> installed_signals({SIGSEGV, SIGILL, SIGFPE, SIGABRT, SIGTERM});
-#ifdef WIN32
+#  ifdef WIN32
   for (int signal_num : installed_signals) {
     ARROW_CHECK(signal(signal_num, SIG_DFL) != SIG_ERR);
   }
-#else
+#  else
   struct sigaction sig_action;
   memset(&sig_action, 0, sizeof(sig_action));
   sigemptyset(&sig_action.sa_mask);
@@ -200,7 +200,7 @@ void ArrowLog::UninstallSignalAction() {
   for (int signal_num : installed_signals) {
     ARROW_CHECK(sigaction(signal_num, &sig_action, NULL) == 0);
   }
-#endif
+#  endif
 #endif
 }
 
diff --git a/cpp/src/arrow/util/logging.h b/cpp/src/arrow/util/logging.h
index be73c020c07f8..04c6bc21cac73 100644
--- a/cpp/src/arrow/util/logging.h
+++ b/cpp/src/arrow/util/logging.h
@@ -22,25 +22,25 @@
 // The LLVM IR code doesn't have an NDEBUG mode. And, it shouldn't include references to
 // streams or stdc++. So, making the DCHECK calls void in that case.
 
-#define ARROW_IGNORE_EXPR(expr) ((void)(expr))
+#  define ARROW_IGNORE_EXPR(expr) ((void)(expr))
 
-#define DCHECK(condition) ARROW_IGNORE_EXPR(condition)
-#define DCHECK_OK(status) ARROW_IGNORE_EXPR(status)
-#define DCHECK_EQ(val1, val2) ARROW_IGNORE_EXPR(val1)
-#define DCHECK_NE(val1, val2) ARROW_IGNORE_EXPR(val1)
-#define DCHECK_LE(val1, val2) ARROW_IGNORE_EXPR(val1)
-#define DCHECK_LT(val1, val2) ARROW_IGNORE_EXPR(val1)
-#define DCHECK_GE(val1, val2) ARROW_IGNORE_EXPR(val1)
-#define DCHECK_GT(val1, val2) ARROW_IGNORE_EXPR(val1)
+#  define DCHECK(condition) ARROW_IGNORE_EXPR(condition)
+#  define DCHECK_OK(status) ARROW_IGNORE_EXPR(status)
+#  define DCHECK_EQ(val1, val2) ARROW_IGNORE_EXPR(val1)
+#  define DCHECK_NE(val1, val2) ARROW_IGNORE_EXPR(val1)
+#  define DCHECK_LE(val1, val2) ARROW_IGNORE_EXPR(val1)
+#  define DCHECK_LT(val1, val2) ARROW_IGNORE_EXPR(val1)
+#  define DCHECK_GE(val1, val2) ARROW_IGNORE_EXPR(val1)
+#  define DCHECK_GT(val1, val2) ARROW_IGNORE_EXPR(val1)
 
 #else  // !GANDIVA_IR
 
-#include <memory>
-#include <ostream>
-#include <string>
+#  include <memory>
+#  include <ostream>
+#  include <string>
 
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
+#  include "arrow/util/macros.h"
+#  include "arrow/util/visibility.h"
 
 namespace arrow {
 namespace util {
@@ -54,115 +54,115 @@ enum class ArrowLogLevel : int {
   ARROW_FATAL = 3
 };
 
-#define ARROW_LOG_INTERNAL(level) ::arrow::util::ArrowLog(__FILE__, __LINE__, level)
-#define ARROW_LOG(level) ARROW_LOG_INTERNAL(::arrow::util::ArrowLogLevel::ARROW_##level)
+#  define ARROW_LOG_INTERNAL(level) ::arrow::util::ArrowLog(__FILE__, __LINE__, level)
+#  define ARROW_LOG(level) ARROW_LOG_INTERNAL(::arrow::util::ArrowLogLevel::ARROW_##level)
 
-#define ARROW_IGNORE_EXPR(expr) ((void)(expr))
+#  define ARROW_IGNORE_EXPR(expr) ((void)(expr))
 
-#define ARROW_CHECK_OR_LOG(condition, level) \
-  ARROW_PREDICT_TRUE(condition)              \
-  ? ARROW_IGNORE_EXPR(0)                     \
-  : ::arrow::util::Voidify() & ARROW_LOG(level) << " Check failed: " #condition " "
+#  define ARROW_CHECK_OR_LOG(condition, level) \
+    ARROW_PREDICT_TRUE(condition)              \
+    ? ARROW_IGNORE_EXPR(0)                     \
+    : ::arrow::util::Voidify() & ARROW_LOG(level) << " Check failed: " #condition " "
 
-#define ARROW_CHECK(condition) ARROW_CHECK_OR_LOG(condition, FATAL)
+#  define ARROW_CHECK(condition) ARROW_CHECK_OR_LOG(condition, FATAL)
 
 // If 'to_call' returns a bad status, CHECK immediately with a logged message
 // of 'msg' followed by the status.
-#define ARROW_CHECK_OK_PREPEND(to_call, msg, level)                 \
-  do {                                                              \
-    ::arrow::Status _s = (to_call);                                 \
-    ARROW_CHECK_OR_LOG(_s.ok(), level)                              \
-        << "Operation failed: " << ARROW_STRINGIFY(to_call) << "\n" \
-        << (msg) << ": " << _s.ToString();                          \
-  } while (false)
+#  define ARROW_CHECK_OK_PREPEND(to_call, msg, level)                 \
+    do {                                                              \
+      ::arrow::Status _s = (to_call);                                 \
+      ARROW_CHECK_OR_LOG(_s.ok(), level)                              \
+          << "Operation failed: " << ARROW_STRINGIFY(to_call) << "\n" \
+          << (msg) << ": " << _s.ToString();                          \
+    } while (false)
 
 // If the status is bad, CHECK immediately, appending the status to the
 // logged message.
-#define ARROW_CHECK_OK(s) ARROW_CHECK_OK_PREPEND(s, "Bad status", FATAL)
+#  define ARROW_CHECK_OK(s) ARROW_CHECK_OK_PREPEND(s, "Bad status", FATAL)
 
-#define ARROW_CHECK_EQ(val1, val2) ARROW_CHECK((val1) == (val2))
-#define ARROW_CHECK_NE(val1, val2) ARROW_CHECK((val1) != (val2))
-#define ARROW_CHECK_LE(val1, val2) ARROW_CHECK((val1) <= (val2))
-#define ARROW_CHECK_LT(val1, val2) ARROW_CHECK((val1) < (val2))
-#define ARROW_CHECK_GE(val1, val2) ARROW_CHECK((val1) >= (val2))
-#define ARROW_CHECK_GT(val1, val2) ARROW_CHECK((val1) > (val2))
+#  define ARROW_CHECK_EQ(val1, val2) ARROW_CHECK((val1) == (val2))
+#  define ARROW_CHECK_NE(val1, val2) ARROW_CHECK((val1) != (val2))
+#  define ARROW_CHECK_LE(val1, val2) ARROW_CHECK((val1) <= (val2))
+#  define ARROW_CHECK_LT(val1, val2) ARROW_CHECK((val1) < (val2))
+#  define ARROW_CHECK_GE(val1, val2) ARROW_CHECK((val1) >= (val2))
+#  define ARROW_CHECK_GT(val1, val2) ARROW_CHECK((val1) > (val2))
 
-#ifdef NDEBUG
-#define ARROW_DFATAL ::arrow::util::ArrowLogLevel::ARROW_WARNING
+#  ifdef NDEBUG
+#    define ARROW_DFATAL ::arrow::util::ArrowLogLevel::ARROW_WARNING
 
 // CAUTION: DCHECK_OK() always evaluates its argument, but other DCHECK*() macros
 // only do so in debug mode.
 
-#define ARROW_DCHECK(condition)               \
-  while (false) ARROW_IGNORE_EXPR(condition); \
-  while (false) ::arrow::util::detail::NullLog()
-#define ARROW_DCHECK_OK(s) \
-  ARROW_IGNORE_EXPR(s);    \
-  while (false) ::arrow::util::detail::NullLog()
-#define ARROW_DCHECK_EQ(val1, val2)      \
-  while (false) ARROW_IGNORE_EXPR(val1); \
-  while (false) ARROW_IGNORE_EXPR(val2); \
-  while (false) ::arrow::util::detail::NullLog()
-#define ARROW_DCHECK_NE(val1, val2)      \
-  while (false) ARROW_IGNORE_EXPR(val1); \
-  while (false) ARROW_IGNORE_EXPR(val2); \
-  while (false) ::arrow::util::detail::NullLog()
-#define ARROW_DCHECK_LE(val1, val2)      \
-  while (false) ARROW_IGNORE_EXPR(val1); \
-  while (false) ARROW_IGNORE_EXPR(val2); \
-  while (false) ::arrow::util::detail::NullLog()
-#define ARROW_DCHECK_LT(val1, val2)      \
-  while (false) ARROW_IGNORE_EXPR(val1); \
-  while (false) ARROW_IGNORE_EXPR(val2); \
-  while (false) ::arrow::util::detail::NullLog()
-#define ARROW_DCHECK_GE(val1, val2)      \
-  while (false) ARROW_IGNORE_EXPR(val1); \
-  while (false) ARROW_IGNORE_EXPR(val2); \
-  while (false) ::arrow::util::detail::NullLog()
-#define ARROW_DCHECK_GT(val1, val2)      \
-  while (false) ARROW_IGNORE_EXPR(val1); \
-  while (false) ARROW_IGNORE_EXPR(val2); \
-  while (false) ::arrow::util::detail::NullLog()
-
-#else
-#define ARROW_DFATAL ::arrow::util::ArrowLogLevel::ARROW_FATAL
-
-#define ARROW_DCHECK ARROW_CHECK
-#define ARROW_DCHECK_OK ARROW_CHECK_OK
-#define ARROW_DCHECK_EQ ARROW_CHECK_EQ
-#define ARROW_DCHECK_NE ARROW_CHECK_NE
-#define ARROW_DCHECK_LE ARROW_CHECK_LE
-#define ARROW_DCHECK_LT ARROW_CHECK_LT
-#define ARROW_DCHECK_GE ARROW_CHECK_GE
-#define ARROW_DCHECK_GT ARROW_CHECK_GT
-
-#endif  // NDEBUG
+#    define ARROW_DCHECK(condition)               \
+      while (false) ARROW_IGNORE_EXPR(condition); \
+      while (false) ::arrow::util::detail::NullLog()
+#    define ARROW_DCHECK_OK(s) \
+      ARROW_IGNORE_EXPR(s);    \
+      while (false) ::arrow::util::detail::NullLog()
+#    define ARROW_DCHECK_EQ(val1, val2)      \
+      while (false) ARROW_IGNORE_EXPR(val1); \
+      while (false) ARROW_IGNORE_EXPR(val2); \
+      while (false) ::arrow::util::detail::NullLog()
+#    define ARROW_DCHECK_NE(val1, val2)      \
+      while (false) ARROW_IGNORE_EXPR(val1); \
+      while (false) ARROW_IGNORE_EXPR(val2); \
+      while (false) ::arrow::util::detail::NullLog()
+#    define ARROW_DCHECK_LE(val1, val2)      \
+      while (false) ARROW_IGNORE_EXPR(val1); \
+      while (false) ARROW_IGNORE_EXPR(val2); \
+      while (false) ::arrow::util::detail::NullLog()
+#    define ARROW_DCHECK_LT(val1, val2)      \
+      while (false) ARROW_IGNORE_EXPR(val1); \
+      while (false) ARROW_IGNORE_EXPR(val2); \
+      while (false) ::arrow::util::detail::NullLog()
+#    define ARROW_DCHECK_GE(val1, val2)      \
+      while (false) ARROW_IGNORE_EXPR(val1); \
+      while (false) ARROW_IGNORE_EXPR(val2); \
+      while (false) ::arrow::util::detail::NullLog()
+#    define ARROW_DCHECK_GT(val1, val2)      \
+      while (false) ARROW_IGNORE_EXPR(val1); \
+      while (false) ARROW_IGNORE_EXPR(val2); \
+      while (false) ::arrow::util::detail::NullLog()
+
+#  else
+#    define ARROW_DFATAL ::arrow::util::ArrowLogLevel::ARROW_FATAL
+
+#    define ARROW_DCHECK ARROW_CHECK
+#    define ARROW_DCHECK_OK ARROW_CHECK_OK
+#    define ARROW_DCHECK_EQ ARROW_CHECK_EQ
+#    define ARROW_DCHECK_NE ARROW_CHECK_NE
+#    define ARROW_DCHECK_LE ARROW_CHECK_LE
+#    define ARROW_DCHECK_LT ARROW_CHECK_LT
+#    define ARROW_DCHECK_GE ARROW_CHECK_GE
+#    define ARROW_DCHECK_GT ARROW_CHECK_GT
+
+#  endif  // NDEBUG
 
 // These are internal-use macros and should not be used in public headers.
-#ifndef DCHECK
-#define DCHECK ARROW_DCHECK
-#endif
-#ifndef DCHECK_OK
-#define DCHECK_OK ARROW_DCHECK_OK
-#endif
-#ifndef DCHECK_EQ
-#define DCHECK_EQ ARROW_DCHECK_EQ
-#endif
-#ifndef DCHECK_NE
-#define DCHECK_NE ARROW_DCHECK_NE
-#endif
-#ifndef DCHECK_LE
-#define DCHECK_LE ARROW_DCHECK_LE
-#endif
-#ifndef DCHECK_LT
-#define DCHECK_LT ARROW_DCHECK_LT
-#endif
-#ifndef DCHECK_GE
-#define DCHECK_GE ARROW_DCHECK_GE
-#endif
-#ifndef DCHECK_GT
-#define DCHECK_GT ARROW_DCHECK_GT
-#endif
+#  ifndef DCHECK
+#    define DCHECK ARROW_DCHECK
+#  endif
+#  ifndef DCHECK_OK
+#    define DCHECK_OK ARROW_DCHECK_OK
+#  endif
+#  ifndef DCHECK_EQ
+#    define DCHECK_EQ ARROW_DCHECK_EQ
+#  endif
+#  ifndef DCHECK_NE
+#    define DCHECK_NE ARROW_DCHECK_NE
+#  endif
+#  ifndef DCHECK_LE
+#    define DCHECK_LE ARROW_DCHECK_LE
+#  endif
+#  ifndef DCHECK_LT
+#    define DCHECK_LT ARROW_DCHECK_LT
+#  endif
+#  ifndef DCHECK_GE
+#    define DCHECK_GE ARROW_DCHECK_GE
+#  endif
+#  ifndef DCHECK_GT
+#    define DCHECK_GT ARROW_DCHECK_GT
+#  endif
 
 // This code is adapted from
 // https://github.com/ray-project/ray/blob/master/src/ray/util/logging.h.
diff --git a/cpp/src/arrow/util/macros.h b/cpp/src/arrow/util/macros.h
index 484df3400d92d..5658874b42b6c 100644
--- a/cpp/src/arrow/util/macros.h
+++ b/cpp/src/arrow/util/macros.h
@@ -25,15 +25,15 @@
 
 // From Google gutil
 #ifndef ARROW_DISALLOW_COPY_AND_ASSIGN
-#define ARROW_DISALLOW_COPY_AND_ASSIGN(TypeName) \
-  TypeName(const TypeName&) = delete;            \
-  void operator=(const TypeName&) = delete
+#  define ARROW_DISALLOW_COPY_AND_ASSIGN(TypeName) \
+    TypeName(const TypeName&) = delete;            \
+    void operator=(const TypeName&) = delete
 #endif
 
 #ifndef ARROW_DEFAULT_MOVE_AND_ASSIGN
-#define ARROW_DEFAULT_MOVE_AND_ASSIGN(TypeName) \
-  TypeName(TypeName&&) = default;               \
-  TypeName& operator=(TypeName&&) = default
+#  define ARROW_DEFAULT_MOVE_AND_ASSIGN(TypeName) \
+    TypeName(TypeName&&) = default;               \
+    TypeName& operator=(TypeName&&) = default
 #endif
 
 // With ARROW_PREDICT_FALSE, GCC and clang can be told that a certain branch is
@@ -68,55 +68,55 @@
 //     Program Annotations". https://github.com/jdoerfert/PETOSPA/blob/master/ISC19.pdf
 #define ARROW_UNUSED(x) (void)(x)
 #ifdef ARROW_WARN_DOCUMENTATION
-#define ARROW_ARG_UNUSED(x) x
+#  define ARROW_ARG_UNUSED(x) x
 #else
-#define ARROW_ARG_UNUSED(x)
+#  define ARROW_ARG_UNUSED(x)
 #endif
 #if defined(__GNUC__)  // GCC and compatible compilers (clang, Intel ICC)
-#define ARROW_NORETURN __attribute__((noreturn))
-#define ARROW_NOINLINE __attribute__((noinline))
-#define ARROW_FORCE_INLINE __attribute__((always_inline))
-#define ARROW_PREDICT_FALSE(x) (__builtin_expect(!!(x), 0))
-#define ARROW_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
-#define ARROW_PREFETCH(addr) __builtin_prefetch(addr)
-#define ARROW_RESTRICT __restrict
-#if defined(__clang__)  // clang-specific
-#define ARROW_COMPILER_ASSUME(expr) __builtin_assume(expr)
-#else  // GCC-specific
-#if __GNUC__ >= 13
-#define ARROW_COMPILER_ASSUME(expr) __attribute__((assume(expr)))
-#else
+#  define ARROW_NORETURN __attribute__((noreturn))
+#  define ARROW_NOINLINE __attribute__((noinline))
+#  define ARROW_FORCE_INLINE __attribute__((always_inline))
+#  define ARROW_PREDICT_FALSE(x) (__builtin_expect(!!(x), 0))
+#  define ARROW_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
+#  define ARROW_PREFETCH(addr) __builtin_prefetch(addr)
+#  define ARROW_RESTRICT __restrict
+#  if defined(__clang__)  // clang-specific
+#    define ARROW_COMPILER_ASSUME(expr) __builtin_assume(expr)
+#  else  // GCC-specific
+#    if __GNUC__ >= 13
+#      define ARROW_COMPILER_ASSUME(expr) __attribute__((assume(expr)))
+#    else
 // GCC does not have a built-in assume intrinsic before GCC 13, so we use an
 // if statement and __builtin_unreachable() to achieve the same effect [2].
 // Unlike clang's __builtin_assume and C++23's [[assume(expr)]], using this
 // on GCC won't warn about side-effects in the expression, so make sure expr
 // is side-effect free when working with GCC versions before 13 (Jan-2024),
 // otherwise clang/MSVC builds will fail in CI.
-#define ARROW_COMPILER_ASSUME(expr) \
-  if (expr) {                       \
-  } else {                          \
-    __builtin_unreachable();        \
-  }
-#endif  // __GNUC__ >= 13
-#endif
+#      define ARROW_COMPILER_ASSUME(expr) \
+        if (expr) {                       \
+        } else {                          \
+          __builtin_unreachable();        \
+        }
+#    endif  // __GNUC__ >= 13
+#  endif
 #elif defined(_MSC_VER)  // MSVC
-#define ARROW_NORETURN __declspec(noreturn)
-#define ARROW_NOINLINE __declspec(noinline)
-#define ARROW_FORCE_INLINE __forceinline
-#define ARROW_PREDICT_FALSE(x) (x)
-#define ARROW_PREDICT_TRUE(x) (x)
-#define ARROW_PREFETCH(addr)
-#define ARROW_RESTRICT __restrict
-#define ARROW_COMPILER_ASSUME(expr) __assume(expr)
+#  define ARROW_NORETURN __declspec(noreturn)
+#  define ARROW_NOINLINE __declspec(noinline)
+#  define ARROW_FORCE_INLINE __forceinline
+#  define ARROW_PREDICT_FALSE(x) (x)
+#  define ARROW_PREDICT_TRUE(x) (x)
+#  define ARROW_PREFETCH(addr)
+#  define ARROW_RESTRICT __restrict
+#  define ARROW_COMPILER_ASSUME(expr) __assume(expr)
 #else
-#define ARROW_NORETURN
-#define ARROW_NOINLINE
-#define ARROW_FORCE_INLINE
-#define ARROW_PREDICT_FALSE(x) (x)
-#define ARROW_PREDICT_TRUE(x) (x)
-#define ARROW_PREFETCH(addr)
-#define ARROW_RESTRICT
-#define ARROW_COMPILER_ASSUME(expr)
+#  define ARROW_NORETURN
+#  define ARROW_NOINLINE
+#  define ARROW_FORCE_INLINE
+#  define ARROW_PREDICT_FALSE(x) (x)
+#  define ARROW_PREDICT_TRUE(x) (x)
+#  define ARROW_PREFETCH(addr)
+#  define ARROW_RESTRICT
+#  define ARROW_COMPILER_ASSUME(expr)
 #endif
 
 // ----------------------------------------------------------------------
@@ -124,11 +124,11 @@
 
 #ifndef NULLPTR
 
-#ifdef __cplusplus_cli
-#define NULLPTR __nullptr
-#else
-#define NULLPTR nullptr
-#endif
+#  ifdef __cplusplus_cli
+#    define NULLPTR __nullptr
+#  else
+#    define NULLPTR nullptr
+#  endif
 
 #endif  // ifndef NULLPTR
 
@@ -154,22 +154,22 @@
 // Macros to disable deprecation warnings
 
 #ifdef __clang__
-#define ARROW_SUPPRESS_DEPRECATION_WARNING \
-  _Pragma("clang diagnostic push");        \
-  _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"")
-#define ARROW_UNSUPPRESS_DEPRECATION_WARNING _Pragma("clang diagnostic pop")
+#  define ARROW_SUPPRESS_DEPRECATION_WARNING \
+    _Pragma("clang diagnostic push");        \
+    _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"")
+#  define ARROW_UNSUPPRESS_DEPRECATION_WARNING _Pragma("clang diagnostic pop")
 #elif defined(__GNUC__)
-#define ARROW_SUPPRESS_DEPRECATION_WARNING \
-  _Pragma("GCC diagnostic push");          \
-  _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
-#define ARROW_UNSUPPRESS_DEPRECATION_WARNING _Pragma("GCC diagnostic pop")
+#  define ARROW_SUPPRESS_DEPRECATION_WARNING \
+    _Pragma("GCC diagnostic push");          \
+    _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
+#  define ARROW_UNSUPPRESS_DEPRECATION_WARNING _Pragma("GCC diagnostic pop")
 #elif defined(_MSC_VER)
-#define ARROW_SUPPRESS_DEPRECATION_WARNING \
-  __pragma(warning(push)) __pragma(warning(disable : 4996))
-#define ARROW_UNSUPPRESS_DEPRECATION_WARNING __pragma(warning(pop))
+#  define ARROW_SUPPRESS_DEPRECATION_WARNING \
+    __pragma(warning(push)) __pragma(warning(disable : 4996))
+#  define ARROW_UNSUPPRESS_DEPRECATION_WARNING __pragma(warning(pop))
 #else
-#define ARROW_SUPPRESS_DEPRECATION_WARNING
-#define ARROW_UNSUPPRESS_DEPRECATION_WARNING
+#  define ARROW_SUPPRESS_DEPRECATION_WARNING
+#  define ARROW_UNSUPPRESS_DEPRECATION_WARNING
 #endif
 
 // ----------------------------------------------------------------------
@@ -178,41 +178,42 @@
 // these macros are portable across different compilers and platforms
 //[https://github.com/google/flatbuffers/blob/master/include/flatbuffers/flatbuffers.h#L1355]
 #if !defined(MANUALLY_ALIGNED_STRUCT)
-#if defined(_MSC_VER)
-#define MANUALLY_ALIGNED_STRUCT(alignment) \
-  __pragma(pack(1));                       \
-  struct __declspec(align(alignment))
-#define STRUCT_END(name, size) \
-  __pragma(pack());            \
-  static_assert(sizeof(name) == size, "compiler breaks packing rules")
-#elif defined(__GNUC__) || defined(__clang__)
-#define MANUALLY_ALIGNED_STRUCT(alignment) \
-  _Pragma("pack(1)") struct __attribute__((aligned(alignment)))
-#define STRUCT_END(name, size) \
-  _Pragma("pack()") static_assert(sizeof(name) == size, "compiler breaks packing rules")
-#else
-#error Unknown compiler, please define structure alignment macros
-#endif
+#  if defined(_MSC_VER)
+#    define MANUALLY_ALIGNED_STRUCT(alignment) \
+      __pragma(pack(1));                       \
+      struct __declspec(align(alignment))
+#    define STRUCT_END(name, size) \
+      __pragma(pack());            \
+      static_assert(sizeof(name) == size, "compiler breaks packing rules")
+#  elif defined(__GNUC__) || defined(__clang__)
+#    define MANUALLY_ALIGNED_STRUCT(alignment) \
+      _Pragma("pack(1)") struct __attribute__((aligned(alignment)))
+#    define STRUCT_END(name, size)                          \
+      _Pragma("pack()") static_assert(sizeof(name) == size, \
+                                      "compiler breaks packing rules")
+#  else
+#    error Unknown compiler, please define structure alignment macros
+#  endif
 #endif  // !defined(MANUALLY_ALIGNED_STRUCT)
 
 // ----------------------------------------------------------------------
 // Convenience macro disabling a particular UBSan check in a function
 
 #if defined(__clang__)
-#define ARROW_DISABLE_UBSAN(feature) __attribute__((no_sanitize(feature)))
+#  define ARROW_DISABLE_UBSAN(feature) __attribute__((no_sanitize(feature)))
 #else
-#define ARROW_DISABLE_UBSAN(feature)
+#  define ARROW_DISABLE_UBSAN(feature)
 #endif
 
 // ----------------------------------------------------------------------
 // Machine information
 
 #if INTPTR_MAX == INT64_MAX
-#define ARROW_BITNESS 64
+#  define ARROW_BITNESS 64
 #elif INTPTR_MAX == INT32_MAX
-#define ARROW_BITNESS 32
+#  define ARROW_BITNESS 32
 #else
-#error Unexpected INTPTR_MAX
+#  error Unexpected INTPTR_MAX
 #endif
 
 // ----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/math_constants.h b/cpp/src/arrow/util/math_constants.h
index 7ee87c5d6ac81..3524f88e0ba9a 100644
--- a/cpp/src/arrow/util/math_constants.h
+++ b/cpp/src/arrow/util/math_constants.h
@@ -22,11 +22,11 @@
 // Not provided by default in MSVC,
 // and _USE_MATH_DEFINES is not reliable with unity builds
 #ifndef M_PI
-#define M_PI 3.14159265358979323846
+#  define M_PI 3.14159265358979323846
 #endif
 #ifndef M_PI_2
-#define M_PI_2 1.57079632679489661923
+#  define M_PI_2 1.57079632679489661923
 #endif
 #ifndef M_PI_4
-#define M_PI_4 0.785398163397448309616
+#  define M_PI_4 0.785398163397448309616
 #endif
diff --git a/cpp/src/arrow/util/mutex.cc b/cpp/src/arrow/util/mutex.cc
index bbf2a9a93e692..e170a8648a1fd 100644
--- a/cpp/src/arrow/util/mutex.cc
+++ b/cpp/src/arrow/util/mutex.cc
@@ -20,8 +20,8 @@
 #include <mutex>
 
 #ifndef _WIN32
-#include <pthread.h>
-#include <atomic>
+#  include <pthread.h>
+#  include <atomic>
 #endif
 
 #include "arrow/util/config.h"
diff --git a/cpp/src/arrow/util/simd.h b/cpp/src/arrow/util/simd.h
index ee9105d5f4beb..b37f6e4926978 100644
--- a/cpp/src/arrow/util/simd.h
+++ b/cpp/src/arrow/util/simd.h
@@ -20,25 +20,25 @@
 #ifdef _MSC_VER
 // MSVC x86_64/arm64
 
-#if defined(_M_AMD64) || defined(_M_X64)
-#include <intrin.h>
-#endif
+#  if defined(_M_AMD64) || defined(_M_X64)
+#    include <intrin.h>
+#  endif
 
 #else
 // gcc/clang (possibly others)
 
-#if defined(ARROW_HAVE_BMI2)
-#include <x86intrin.h>
-#endif
+#  if defined(ARROW_HAVE_BMI2)
+#    include <x86intrin.h>
+#  endif
 
-#if defined(ARROW_HAVE_AVX2) || defined(ARROW_HAVE_AVX512)
-#include <immintrin.h>
-#elif defined(ARROW_HAVE_SSE4_2)
-#include <nmmintrin.h>
-#endif
+#  if defined(ARROW_HAVE_AVX2) || defined(ARROW_HAVE_AVX512)
+#    include <immintrin.h>
+#  elif defined(ARROW_HAVE_SSE4_2)
+#    include <nmmintrin.h>
+#  endif
 
-#ifdef ARROW_HAVE_NEON
-#include <arm_neon.h>
-#endif
+#  ifdef ARROW_HAVE_NEON
+#    include <arm_neon.h>
+#  endif
 
 #endif
diff --git a/cpp/src/arrow/util/small_vector_benchmark.cc b/cpp/src/arrow/util/small_vector_benchmark.cc
index 96f94c369e61e..04ad547221b2c 100644
--- a/cpp/src/arrow/util/small_vector_benchmark.cc
+++ b/cpp/src/arrow/util/small_vector_benchmark.cc
@@ -321,10 +321,10 @@ void LongVectorInsertAtEnd(benchmark::State& state) {
 
 #ifdef ARROW_WITH_BENCHMARKS_REFERENCE
 
-#define STD_VECTOR(T) std::vector<T>
+#  define STD_VECTOR(T) std::vector<T>
 SHORT_VECTOR_BENCHMARKS(STD_VECTOR);
 LONG_VECTOR_BENCHMARKS(STD_VECTOR);
-#undef STD_VECTOR
+#  undef STD_VECTOR
 
 #endif
 
diff --git a/cpp/src/arrow/util/string.h b/cpp/src/arrow/util/string.h
index d7e377773f62f..f2081d0937b77 100644
--- a/cpp/src/arrow/util/string.h
+++ b/cpp/src/arrow/util/string.h
@@ -26,7 +26,7 @@
 #include <vector>
 
 #if __has_include(<charconv>)
-#include <charconv>
+#  include <charconv>
 #endif
 
 #include "arrow/result.h"
diff --git a/cpp/src/arrow/util/thread_pool.cc b/cpp/src/arrow/util/thread_pool.cc
index d82934c9bec01..d59d1f20de7c3 100644
--- a/cpp/src/arrow/util/thread_pool.cc
+++ b/cpp/src/arrow/util/thread_pool.cc
@@ -128,7 +128,7 @@ int SerialExecutor::GetNumTasks() {
 #ifdef ARROW_ENABLE_THREADING
 Status SerialExecutor::SpawnReal(TaskHints hints, FnOnce<void()> task,
                                  StopToken stop_token, StopCallback&& stop_callback) {
-#ifdef ARROW_WITH_OPENTELEMETRY
+#  ifdef ARROW_WITH_OPENTELEMETRY
   // Wrap the task to propagate a parent tracing span to it
   // XXX should there be a generic utility in tracing_internal.h for this?
   task = [func = std::move(task),
@@ -137,7 +137,7 @@ Status SerialExecutor::SpawnReal(TaskHints hints, FnOnce<void()> task,
     auto scope = ::arrow::internal::tracing::GetTracer()->WithActiveSpan(active_span);
     std::move(func)();
   };
-#endif
+#  endif
   // While the SerialExecutor runs tasks synchronously on its main thread,
   // SpawnReal may be called from external threads (e.g. when transferring back
   // from blocking I/O threads), so we need to keep the state alive *and* to
@@ -172,7 +172,7 @@ void SerialExecutor::Finish() {
 #else  // ARROW_ENABLE_THREADING
 Status SerialExecutor::SpawnReal(TaskHints hints, FnOnce<void()> task,
                                  StopToken stop_token, StopCallback&& stop_callback) {
-#ifdef ARROW_WITH_OPENTELEMETRY
+#  ifdef ARROW_WITH_OPENTELEMETRY
   // Wrap the task to propagate a parent tracing span to it
   // XXX should there be a generic utility in tracing_internal.h for this?
   task = [func = std::move(task),
@@ -181,7 +181,7 @@ Status SerialExecutor::SpawnReal(TaskHints hints, FnOnce<void()> task,
     auto scope = ::arrow::internal::tracing::GetTracer()->WithActiveSpan(active_span);
     std::move(func)();
   };
-#endif  // ARROW_WITH_OPENTELEMETRY
+#  endif  // ARROW_WITH_OPENTELEMETRY
 
   if (state_->finished) {
     return Status::Invalid(
@@ -503,7 +503,7 @@ ThreadPool::ThreadPool()
       shutdown_on_destroy_(true) {
   // Eternal thread pools would produce false leak reports in the vector of
   // atfork handlers.
-#if !(defined(_WIN32) || defined(ADDRESS_SANITIZER) || defined(ARROW_VALGRIND))
+#  if !(defined(_WIN32) || defined(ADDRESS_SANITIZER) || defined(ARROW_VALGRIND))
   state_->atfork_handler_ = std::make_shared<AtForkHandler>(
       /*before=*/
       [weak_state = std::weak_ptr<ThreadPool::State>(sp_state_)]() {
@@ -528,7 +528,7 @@ ThreadPool::ThreadPool()
         }
       });
   RegisterAtFork(state_->atfork_handler_);
-#endif
+#  endif
 }
 
 ThreadPool::~ThreadPool() {
@@ -623,7 +623,7 @@ void ThreadPool::LaunchWorkersUnlocked(int threads) {
 Status ThreadPool::SpawnReal(TaskHints hints, FnOnce<void()> task, StopToken stop_token,
                              StopCallback&& stop_callback) {
   {
-#ifdef ARROW_WITH_OPENTELEMETRY
+#  ifdef ARROW_WITH_OPENTELEMETRY
     // Wrap the task to propagate a parent tracing span to it
     // This task-wrapping needs to be done before we grab the mutex because the
     // first call to OT (whatever that happens to be) will attempt to grab this mutex
@@ -638,7 +638,7 @@ Status ThreadPool::SpawnReal(TaskHints hints, FnOnce<void()> task, StopToken sto
     } wrapper{std::forward<FnOnce<void()>>(task),
               ::arrow::internal::tracing::GetTracer()->GetCurrentSpan()};
     task = std::move(wrapper);
-#endif
+#  endif
     std::lock_guard<std::mutex> lock(state_->mutex_);
     if (state_->please_shutdown_) {
       return Status::Invalid("operation forbidden during or after shutdown");
@@ -674,9 +674,9 @@ Result<std::shared_ptr<ThreadPool>> ThreadPool::MakeEternal(int threads) {
   // On Windows, the ThreadPool destructor may be called after non-main threads
   // have been killed by the OS, and hang in a condition variable.
   // On Unix, we want to avoid leak reports by Valgrind.
-#ifdef _WIN32
+#  ifdef _WIN32
   pool->shutdown_on_destroy_ = false;
-#endif
+#  endif
   return pool;
 }
 
diff --git a/cpp/src/arrow/util/thread_pool.h b/cpp/src/arrow/util/thread_pool.h
index 44b1e227b0e5f..cd32781aed756 100644
--- a/cpp/src/arrow/util/thread_pool.h
+++ b/cpp/src/arrow/util/thread_pool.h
@@ -36,7 +36,7 @@
 
 #if defined(_MSC_VER)
 // Disable harmless warning for decorated name length limit
-#pragma warning(disable : 4503)
+#  pragma warning(disable : 4503)
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/util/thread_pool_test.cc b/cpp/src/arrow/util/thread_pool_test.cc
index 8f43bb8dec367..7cf8826e8a173 100644
--- a/cpp/src/arrow/util/thread_pool_test.cc
+++ b/cpp/src/arrow/util/thread_pool_test.cc
@@ -16,8 +16,8 @@
 // under the License.
 
 #ifndef _WIN32
-#include <sys/types.h>
-#include <unistd.h>
+#  include <sys/types.h>
+#  include <unistd.h>
 #endif
 
 #include <algorithm>
@@ -830,9 +830,9 @@ class TestThreadPoolForkSafety : public TestThreadPool {};
 
 TEST_F(TestThreadPoolForkSafety, Basics) {
   {
-#ifndef ARROW_ENABLE_THREADING
+#  ifndef ARROW_ENABLE_THREADING
     GTEST_SKIP() << "Test requires threading support";
-#endif
+#  endif
 
     // Fork after task submission
     auto pool = this->MakeThreadPool(3);
@@ -877,9 +877,9 @@ TEST_F(TestThreadPoolForkSafety, Basics) {
 }
 
 TEST_F(TestThreadPoolForkSafety, MultipleChildThreads) {
-#ifndef ARROW_ENABLE_THREADING
+#  ifndef ARROW_ENABLE_THREADING
   GTEST_SKIP() << "Test requires threading support";
-#endif
+#  endif
   // ARROW-15593: race condition in after-fork ThreadPool reinitialization
   // when SpawnReal() was called from multiple threads in a forked child.
   auto run_in_child = [](ThreadPool* pool) {
@@ -927,12 +927,12 @@ TEST_F(TestThreadPoolForkSafety, MultipleChildThreads) {
 
 TEST_F(TestThreadPoolForkSafety, NestedChild) {
   {
-#ifdef __APPLE__
+#  ifdef __APPLE__
     GTEST_SKIP() << "Nested fork is not supported on macos";
-#endif
-#ifndef ARROW_ENABLE_THREADING
+#  endif
+#  ifndef ARROW_ENABLE_THREADING
     GTEST_SKIP() << "Test requires threading support";
-#endif
+#  endif
     auto pool = this->MakeThreadPool(3);
     ASSERT_OK_AND_ASSIGN(auto fut, pool->Submit(add<int>, 4, 5));
     ASSERT_OK_AND_EQ(9, fut.result());
diff --git a/cpp/src/arrow/util/tracing_internal.cc b/cpp/src/arrow/util/tracing_internal.cc
index f4f65ad1e6132..e47acf42bccd1 100644
--- a/cpp/src/arrow/util/tracing_internal.cc
+++ b/cpp/src/arrow/util/tracing_internal.cc
@@ -26,8 +26,8 @@
 #include <thread>
 
 #ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4522)
+#  pragma warning(push)
+#  pragma warning(disable : 4522)
 #endif
 #include <google/protobuf/util/json_util.h>
 
@@ -45,7 +45,7 @@
 #include <opentelemetry/exporters/otlp/protobuf_include_suffix.h>
 #include <opentelemetry/proto/collector/trace/v1/trace_service.pb.h>
 #ifdef _MSC_VER
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
 #include "arrow/util/io_util.h"
diff --git a/cpp/src/arrow/util/tracing_internal.h b/cpp/src/arrow/util/tracing_internal.h
index a031edf08dc77..6ed731599a9d4 100644
--- a/cpp/src/arrow/util/tracing_internal.h
+++ b/cpp/src/arrow/util/tracing_internal.h
@@ -23,15 +23,15 @@
 #include "arrow/util/config.h"
 
 #ifdef ARROW_WITH_OPENTELEMETRY
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4522)
-#endif
-#include <opentelemetry/trace/provider.h>
-#include <opentelemetry/trace/scope.h>
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
+#  ifdef _MSC_VER
+#    pragma warning(push)
+#    pragma warning(disable : 4522)
+#  endif
+#  include <opentelemetry/trace/provider.h>
+#  include <opentelemetry/trace/scope.h>
+#  ifdef _MSC_VER
+#    pragma warning(pop)
+#  endif
 #endif
 
 #include "arrow/memory_pool.h"
@@ -135,77 +135,78 @@ opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span>& RewrapSpan(
 opentelemetry::trace::StartSpanOptions SpanOptionsWithParent(
     const util::tracing::Span& parent_span);
 
-#define START_SPAN(target_span, ...)                           \
-  auto opentelemetry_scope##__LINE__ =                         \
-      ::arrow::internal::tracing::GetTracer()->WithActiveSpan( \
-          ::arrow::internal::tracing::RewrapSpan(              \
-              target_span.details.get(),                       \
-              ::arrow::internal::tracing::GetTracer()->StartSpan(__VA_ARGS__)))
-
-#define START_SCOPED_SPAN(target_span, ...)                    \
-  ::arrow::internal::tracing::Scope(                           \
-      ::arrow::internal::tracing::GetTracer()->WithActiveSpan( \
-          ::arrow::internal::tracing::RewrapSpan(              \
-              target_span.details.get(),                       \
-              ::arrow::internal::tracing::GetTracer()->StartSpan(__VA_ARGS__))))
-
-#define START_SCOPED_SPAN_SV(target_span, name, ...)                             \
-  ::arrow::internal::tracing::Scope(                                             \
-      ::arrow::internal::tracing::GetTracer()->WithActiveSpan(                   \
-          ::arrow::internal::tracing::RewrapSpan(                                \
-              target_span.details.get(),                                         \
-              ::arrow::internal::tracing::GetTracer()->StartSpan(                \
-                  ::opentelemetry::nostd::string_view(name.data(), name.size()), \
-                  ##__VA_ARGS__))))
-
-#define START_SCOPED_SPAN_WITH_PARENT_SV(target_span, parent_span, name, ...)    \
-  ::arrow::internal::tracing::Scope(                                             \
-      ::arrow::internal::tracing::GetTracer()->WithActiveSpan(                   \
-          ::arrow::internal::tracing::RewrapSpan(                                \
-              target_span.details.get(),                                         \
-                                                                                 \
-              ::arrow::internal::tracing::GetTracer()->StartSpan(                \
-                  ::opentelemetry::nostd::string_view(name.data(), name.size()), \
-                  __VA_ARGS__,                                                   \
-                  ::arrow::internal::tracing::SpanOptionsWithParent(parent_span)))))
-
-#define START_COMPUTE_SPAN(target_span, ...)                        \
-  START_SPAN(target_span, __VA_ARGS__);                             \
-  ::arrow::internal::tracing::UnwrapSpan(target_span.details.get()) \
-      ->SetAttribute("arrow.memory_pool_bytes",                     \
-                     ::arrow::default_memory_pool()->bytes_allocated())
-
-#define EVENT_ON_CURRENT_SPAN(...) \
-  ::arrow::internal::tracing::GetTracer()->GetCurrentSpan()->AddEvent(__VA_ARGS__)
-
-#define EVENT(target_span, ...) \
-  ::arrow::internal::tracing::UnwrapSpan(target_span.details.get())->AddEvent(__VA_ARGS__)
-
-#define ACTIVATE_SPAN(target_span)                             \
-  ::arrow::internal::tracing::Scope(                           \
-      ::arrow::internal::tracing::GetTracer()->WithActiveSpan( \
-          ::arrow::internal::tracing::UnwrapSpan(target_span.details.get())))
-
-#define MARK_SPAN(target_span, status)  \
-  ::arrow::internal::tracing::MarkSpan( \
-      status, ::arrow::internal::tracing::UnwrapSpan(target_span.details.get()).get())
-
-#define END_SPAN(target_span) \
-  ::arrow::internal::tracing::UnwrapSpan(target_span.details.get())->End()
-
-#define END_SPAN_ON_FUTURE_COMPLETION(target_span, target_future) \
-  target_future.SetSpan(&target_span)
-
-#define PROPAGATE_SPAN_TO_GENERATOR(generator)                                \
-  generator = ::arrow::internal::tracing::PropagateSpanThroughAsyncGenerator( \
-      std::move(generator))
-
-#define WRAP_ASYNC_GENERATOR(generator) \
-  generator = ::arrow::internal::tracing::WrapAsyncGenerator(std::move(generator))
-
-#define WRAP_ASYNC_GENERATOR_WITH_CHILD_SPAN(generator, name) \
-  generator =                                                 \
-      ::arrow::internal::tracing::WrapAsyncGenerator(std::move(generator), name, true)
+#  define START_SPAN(target_span, ...)                           \
+    auto opentelemetry_scope##__LINE__ =                         \
+        ::arrow::internal::tracing::GetTracer()->WithActiveSpan( \
+            ::arrow::internal::tracing::RewrapSpan(              \
+                target_span.details.get(),                       \
+                ::arrow::internal::tracing::GetTracer()->StartSpan(__VA_ARGS__)))
+
+#  define START_SCOPED_SPAN(target_span, ...)                    \
+    ::arrow::internal::tracing::Scope(                           \
+        ::arrow::internal::tracing::GetTracer()->WithActiveSpan( \
+            ::arrow::internal::tracing::RewrapSpan(              \
+                target_span.details.get(),                       \
+                ::arrow::internal::tracing::GetTracer()->StartSpan(__VA_ARGS__))))
+
+#  define START_SCOPED_SPAN_SV(target_span, name, ...)                             \
+    ::arrow::internal::tracing::Scope(                                             \
+        ::arrow::internal::tracing::GetTracer()->WithActiveSpan(                   \
+            ::arrow::internal::tracing::RewrapSpan(                                \
+                target_span.details.get(),                                         \
+                ::arrow::internal::tracing::GetTracer()->StartSpan(                \
+                    ::opentelemetry::nostd::string_view(name.data(), name.size()), \
+                    ##__VA_ARGS__))))
+
+#  define START_SCOPED_SPAN_WITH_PARENT_SV(target_span, parent_span, name, ...)    \
+    ::arrow::internal::tracing::Scope(                                             \
+        ::arrow::internal::tracing::GetTracer()->WithActiveSpan(                   \
+            ::arrow::internal::tracing::RewrapSpan(                                \
+                target_span.details.get(),                                         \
+                                                                                   \
+                ::arrow::internal::tracing::GetTracer()->StartSpan(                \
+                    ::opentelemetry::nostd::string_view(name.data(), name.size()), \
+                    __VA_ARGS__,                                                   \
+                    ::arrow::internal::tracing::SpanOptionsWithParent(parent_span)))))
+
+#  define START_COMPUTE_SPAN(target_span, ...)                        \
+    START_SPAN(target_span, __VA_ARGS__);                             \
+    ::arrow::internal::tracing::UnwrapSpan(target_span.details.get()) \
+        ->SetAttribute("arrow.memory_pool_bytes",                     \
+                       ::arrow::default_memory_pool()->bytes_allocated())
+
+#  define EVENT_ON_CURRENT_SPAN(...) \
+    ::arrow::internal::tracing::GetTracer()->GetCurrentSpan()->AddEvent(__VA_ARGS__)
+
+#  define EVENT(target_span, ...)                                     \
+    ::arrow::internal::tracing::UnwrapSpan(target_span.details.get()) \
+        ->AddEvent(__VA_ARGS__)
+
+#  define ACTIVATE_SPAN(target_span)                             \
+    ::arrow::internal::tracing::Scope(                           \
+        ::arrow::internal::tracing::GetTracer()->WithActiveSpan( \
+            ::arrow::internal::tracing::UnwrapSpan(target_span.details.get())))
+
+#  define MARK_SPAN(target_span, status)  \
+    ::arrow::internal::tracing::MarkSpan( \
+        status, ::arrow::internal::tracing::UnwrapSpan(target_span.details.get()).get())
+
+#  define END_SPAN(target_span) \
+    ::arrow::internal::tracing::UnwrapSpan(target_span.details.get())->End()
+
+#  define END_SPAN_ON_FUTURE_COMPLETION(target_span, target_future) \
+    target_future.SetSpan(&target_span)
+
+#  define PROPAGATE_SPAN_TO_GENERATOR(generator)                                \
+    generator = ::arrow::internal::tracing::PropagateSpanThroughAsyncGenerator( \
+        std::move(generator))
+
+#  define WRAP_ASYNC_GENERATOR(generator) \
+    generator = ::arrow::internal::tracing::WrapAsyncGenerator(std::move(generator))
+
+#  define WRAP_ASYNC_GENERATOR_WITH_CHILD_SPAN(generator, name) \
+    generator =                                                 \
+        ::arrow::internal::tracing::WrapAsyncGenerator(std::move(generator), name, true)
 
 /*
  * Calls to the helper macros above are removed by the preprocessor when
@@ -223,19 +224,19 @@ struct Scope {
   [[maybe_unused]] ~Scope() {}
 };
 
-#define START_SPAN(target_span, ...)
-#define START_SCOPED_SPAN(target_span, ...) ::arrow::internal::tracing::Scope()
-#define START_SCOPED_SPAN_SV(target_span, name, ...) ::arrow::internal::tracing::Scope()
-#define START_COMPUTE_SPAN(target_span, ...)
-#define ACTIVATE_SPAN(target_span) ::arrow::internal::tracing::Scope()
-#define MARK_SPAN(target_span, status)
-#define EVENT(target_span, ...)
-#define EVENT_ON_CURRENT_SPAN(...)
-#define END_SPAN(target_span)
-#define END_SPAN_ON_FUTURE_COMPLETION(target_span, target_future)
-#define PROPAGATE_SPAN_TO_GENERATOR(generator)
-#define WRAP_ASYNC_GENERATOR(generator)
-#define WRAP_ASYNC_GENERATOR_WITH_CHILD_SPAN(generator, name)
+#  define START_SPAN(target_span, ...)
+#  define START_SCOPED_SPAN(target_span, ...) ::arrow::internal::tracing::Scope()
+#  define START_SCOPED_SPAN_SV(target_span, name, ...) ::arrow::internal::tracing::Scope()
+#  define START_COMPUTE_SPAN(target_span, ...)
+#  define ACTIVATE_SPAN(target_span) ::arrow::internal::tracing::Scope()
+#  define MARK_SPAN(target_span, status)
+#  define EVENT(target_span, ...)
+#  define EVENT_ON_CURRENT_SPAN(...)
+#  define END_SPAN(target_span)
+#  define END_SPAN_ON_FUTURE_COMPLETION(target_span, target_future)
+#  define PROPAGATE_SPAN_TO_GENERATOR(generator)
+#  define WRAP_ASYNC_GENERATOR(generator)
+#  define WRAP_ASYNC_GENERATOR_WITH_CHILD_SPAN(generator, name)
 
 #endif
 
diff --git a/cpp/src/arrow/util/uri.cc b/cpp/src/arrow/util/uri.cc
index 9c0f7f9a59630..6c0787a87e046 100644
--- a/cpp/src/arrow/util/uri.cc
+++ b/cpp/src/arrow/util/uri.cc
@@ -250,9 +250,16 @@ Status Uri::Parse(const std::string& uri_string) {
   const auto& s = impl_->KeepString(uri_string);
   impl_->string_rep_ = s;
   const char* error_pos;
-  if (uriParseSingleUriExA(&impl_->uri_, s.data(), s.data() + s.size(), &error_pos) !=
-      URI_SUCCESS) {
-    return Status::Invalid("Cannot parse URI: '", uri_string, "'");
+  int retval =
+      uriParseSingleUriExA(&impl_->uri_, s.data(), s.data() + s.size(), &error_pos);
+  if (retval != URI_SUCCESS) {
+    if (retval == URI_ERROR_SYNTAX) {
+      return Status::Invalid("Cannot parse URI: '", uri_string,
+                             "' due to syntax error at character '", *error_pos,
+                             "' (position ", error_pos - s.data(), ")");
+    } else {
+      return Status::Invalid("Cannot parse URI: '", uri_string, "'");
+    }
   }
 
   const auto scheme = TextRangeToView(impl_->uri_.scheme);
diff --git a/cpp/src/arrow/util/utf8.cc b/cpp/src/arrow/util/utf8.cc
index 042a6144d6c19..9f91e0f080868 100644
--- a/cpp/src/arrow/util/utf8.cc
+++ b/cpp/src/arrow/util/utf8.cc
@@ -30,7 +30,7 @@
 
 // Can be defined by utfcpp
 #ifdef NOEXCEPT
-#undef NOEXCEPT
+#  undef NOEXCEPT
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/util/utf8_internal.h b/cpp/src/arrow/util/utf8_internal.h
index ec8fc2b46fe82..335e875f7ae20 100644
--- a/cpp/src/arrow/util/utf8_internal.h
+++ b/cpp/src/arrow/util/utf8_internal.h
@@ -25,7 +25,7 @@
 #include <string_view>
 
 #if defined(ARROW_HAVE_NEON) || defined(ARROW_HAVE_SSE4_2)
-#include <xsimd/xsimd.hpp>
+#  include <xsimd/xsimd.hpp>
 #endif
 
 #include "arrow/type_fwd.h"
diff --git a/cpp/src/arrow/util/visibility.h b/cpp/src/arrow/util/visibility.h
index 1498d2085a03d..9a53cdbdeff1b 100644
--- a/cpp/src/arrow/util/visibility.h
+++ b/cpp/src/arrow/util/visibility.h
@@ -20,67 +20,67 @@
 #if defined(_WIN32) || defined(__CYGWIN__)
 // Windows
 
-#if defined(_MSC_VER)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#if defined(__cplusplus) && defined(__GNUC__) && !defined(__clang__)
+#  if defined(__cplusplus) && defined(__GNUC__) && !defined(__clang__)
 // Use C++ attribute syntax where possible to avoid GCC parser bug
 // (https://stackoverflow.com/questions/57993818/gcc-how-to-combine-attribute-dllexport-and-nodiscard-in-a-struct-de)
-#define ARROW_DLLEXPORT [[gnu::dllexport]]
-#define ARROW_DLLIMPORT [[gnu::dllimport]]
-#else
-#define ARROW_DLLEXPORT __declspec(dllexport)
-#define ARROW_DLLIMPORT __declspec(dllimport)
-#endif
+#    define ARROW_DLLEXPORT [[gnu::dllexport]]
+#    define ARROW_DLLIMPORT [[gnu::dllimport]]
+#  else
+#    define ARROW_DLLEXPORT __declspec(dllexport)
+#    define ARROW_DLLIMPORT __declspec(dllimport)
+#  endif
 
 // _declspec(dllexport) even when the #included by a non-arrow source
-#define ARROW_FORCE_EXPORT ARROW_DLLEXPORT
+#  define ARROW_FORCE_EXPORT ARROW_DLLEXPORT
 
-#ifdef ARROW_STATIC
-#define ARROW_EXPORT
-#define ARROW_FRIEND_EXPORT
-#define ARROW_TEMPLATE_EXPORT
-#elif defined(ARROW_EXPORTING)
-#define ARROW_EXPORT ARROW_DLLEXPORT
+#  ifdef ARROW_STATIC
+#    define ARROW_EXPORT
+#    define ARROW_FRIEND_EXPORT
+#    define ARROW_TEMPLATE_EXPORT
+#  elif defined(ARROW_EXPORTING)
+#    define ARROW_EXPORT ARROW_DLLEXPORT
 // For some reason [[gnu::dllexport]] doesn't work well with friend declarations
-#define ARROW_FRIEND_EXPORT __declspec(dllexport)
-#define ARROW_TEMPLATE_EXPORT ARROW_DLLEXPORT
-#else
-#define ARROW_EXPORT ARROW_DLLIMPORT
-#define ARROW_FRIEND_EXPORT __declspec(dllimport)
-#define ARROW_TEMPLATE_EXPORT ARROW_DLLIMPORT
-#endif
+#    define ARROW_FRIEND_EXPORT __declspec(dllexport)
+#    define ARROW_TEMPLATE_EXPORT ARROW_DLLEXPORT
+#  else
+#    define ARROW_EXPORT ARROW_DLLIMPORT
+#    define ARROW_FRIEND_EXPORT __declspec(dllimport)
+#    define ARROW_TEMPLATE_EXPORT ARROW_DLLIMPORT
+#  endif
 
-#define ARROW_NO_EXPORT
+#  define ARROW_NO_EXPORT
 
 #else
 
 // Non-Windows
 
-#if defined(__cplusplus) && (defined(__GNUC__) || defined(__clang__))
-#ifndef ARROW_EXPORT
-#define ARROW_EXPORT [[gnu::visibility("default")]]
-#endif
-#ifndef ARROW_NO_EXPORT
-#define ARROW_NO_EXPORT [[gnu::visibility("hidden")]]
-#endif
-#else
+#  if defined(__cplusplus) && (defined(__GNUC__) || defined(__clang__))
+#    ifndef ARROW_EXPORT
+#      define ARROW_EXPORT [[gnu::visibility("default")]]
+#    endif
+#    ifndef ARROW_NO_EXPORT
+#      define ARROW_NO_EXPORT [[gnu::visibility("hidden")]]
+#    endif
+#  else
 // Not C++, or not gcc/clang
-#ifndef ARROW_EXPORT
-#define ARROW_EXPORT
-#endif
-#ifndef ARROW_NO_EXPORT
-#define ARROW_NO_EXPORT
-#endif
-#endif
+#    ifndef ARROW_EXPORT
+#      define ARROW_EXPORT
+#    endif
+#    ifndef ARROW_NO_EXPORT
+#      define ARROW_NO_EXPORT
+#    endif
+#  endif
 
-#define ARROW_FRIEND_EXPORT
-#define ARROW_TEMPLATE_EXPORT
+#  define ARROW_FRIEND_EXPORT
+#  define ARROW_TEMPLATE_EXPORT
 
 // [[gnu::visibility("default")]] even when #included by a non-arrow source
-#define ARROW_FORCE_EXPORT [[gnu::visibility("default")]]
+#  define ARROW_FORCE_EXPORT [[gnu::visibility("default")]]
 
 #endif  // Non-Windows
diff --git a/cpp/src/arrow/util/windows_compatibility.h b/cpp/src/arrow/util/windows_compatibility.h
index c97b2f3b76a7c..810a91201f335 100644
--- a/cpp/src/arrow/util/windows_compatibility.h
+++ b/cpp/src/arrow/util/windows_compatibility.h
@@ -18,22 +18,22 @@
 #ifdef _WIN32
 
 // Windows defines min and max macros that mess up std::min/max
-#ifndef NOMINMAX
-#define NOMINMAX
-#endif
+#  ifndef NOMINMAX
+#    define NOMINMAX
+#  endif
 
-#define WIN32_LEAN_AND_MEAN
+#  define WIN32_LEAN_AND_MEAN
 
 // Set Windows 7 as a conservative minimum for Apache Arrow
-#if defined(_WIN32_WINNT) && _WIN32_WINNT < 0x601
-#undef _WIN32_WINNT
-#endif
-#ifndef _WIN32_WINNT
-#define _WIN32_WINNT 0x601
-#endif
+#  if defined(_WIN32_WINNT) && _WIN32_WINNT < 0x601
+#    undef _WIN32_WINNT
+#  endif
+#  ifndef _WIN32_WINNT
+#    define _WIN32_WINNT 0x601
+#  endif
 
-#include <winsock2.h>
+#  include <winsock2.h>
 
-#include "arrow/util/windows_fixup.h"
+#  include "arrow/util/windows_fixup.h"
 
 #endif  // _WIN32
diff --git a/cpp/src/arrow/util/windows_fixup.h b/cpp/src/arrow/util/windows_fixup.h
index 2949ac4ab7688..42e74f4a7857f 100644
--- a/cpp/src/arrow/util/windows_fixup.h
+++ b/cpp/src/arrow/util/windows_fixup.h
@@ -19,32 +19,32 @@
 
 #ifdef _WIN32
 
-#ifdef max
-#undef max
-#endif
-#ifdef min
-#undef min
-#endif
+#  ifdef max
+#    undef max
+#  endif
+#  ifdef min
+#    undef min
+#  endif
 
 // The Windows API defines macros from *File resolving to either
 // *FileA or *FileW.  Need to undo them.
-#ifdef CopyFile
-#undef CopyFile
-#endif
-#ifdef CreateFile
-#undef CreateFile
-#endif
-#ifdef DeleteFile
-#undef DeleteFile
-#endif
+#  ifdef CopyFile
+#    undef CopyFile
+#  endif
+#  ifdef CreateFile
+#    undef CreateFile
+#  endif
+#  ifdef DeleteFile
+#    undef DeleteFile
+#  endif
 
 // Other annoying Windows macro definitions...
-#ifdef IN
-#undef IN
-#endif
-#ifdef OUT
-#undef OUT
-#endif
+#  ifdef IN
+#    undef IN
+#  endif
+#  ifdef OUT
+#    undef OUT
+#  endif
 
 // Note that we can't undefine OPTIONAL, because it can be used in other
 // Windows headers...
diff --git a/cpp/src/arrow/visit_data_inline.h b/cpp/src/arrow/visit_data_inline.h
index a2ba9cfc65071..3fa557af2079d 100644
--- a/cpp/src/arrow/visit_data_inline.h
+++ b/cpp/src/arrow/visit_data_inline.h
@@ -249,7 +249,8 @@ VisitArraySpanInline(const ArraySpan& arr, ValidFunc&& valid_func, NullFunc&& nu
 // The scalar value's type depends on the array data type:
 // - the type's `c_type`, if any
 // - for boolean arrays, a `bool`
-// - for binary, string and fixed-size binary arrays, a `std::string_view`
+// - for binary, string, large binary and string, binary and string view, and fixed-size
+//   binary arrays, a `std::string_view`
 
 template <typename T>
 struct ArraySpanVisitor {
diff --git a/cpp/src/gandiva/cast_time.cc b/cpp/src/gandiva/cast_time.cc
index eeb2ea3fdd88f..f170375298b55 100644
--- a/cpp/src/gandiva/cast_time.cc
+++ b/cpp/src/gandiva/cast_time.cc
@@ -22,10 +22,10 @@
 #include "gandiva/precompiled/time_fields.h"
 
 #ifndef GANDIVA_UNIT_TEST
-#include "gandiva/exported_funcs.h"
-#include "gandiva/gdv_function_stubs.h"
+#  include "gandiva/exported_funcs.h"
+#  include "gandiva/gdv_function_stubs.h"
 
-#include "gandiva/engine.h"
+#  include "gandiva/engine.h"
 
 namespace gandiva {
 
diff --git a/cpp/src/gandiva/context_helper.cc b/cpp/src/gandiva/context_helper.cc
index 8edd52b1fb070..2a3efc8348b38 100644
--- a/cpp/src/gandiva/context_helper.cc
+++ b/cpp/src/gandiva/context_helper.cc
@@ -21,9 +21,9 @@
 // This file is also used in the pre-compiled unit tests, which do include
 // llvm/engine/..
 #ifndef GANDIVA_UNIT_TEST
-#include "gandiva/exported_funcs.h"
+#  include "gandiva/exported_funcs.h"
 
-#include "gandiva/engine.h"
+#  include "gandiva/engine.h"
 
 namespace gandiva {
 
diff --git a/cpp/src/gandiva/decimal_xlarge.cc b/cpp/src/gandiva/decimal_xlarge.cc
index 21212422f3d69..e9fe0dc6b91ed 100644
--- a/cpp/src/gandiva/decimal_xlarge.cc
+++ b/cpp/src/gandiva/decimal_xlarge.cc
@@ -33,8 +33,8 @@
 #include "gandiva/decimal_type_util.h"
 
 #ifndef GANDIVA_UNIT_TEST
-#include "gandiva/engine.h"
-#include "gandiva/exported_funcs.h"
+#  include "gandiva/engine.h"
+#  include "gandiva/exported_funcs.h"
 
 namespace gandiva {
 
diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc
index bfce72cefc630..065ea5a59837e 100644
--- a/cpp/src/gandiva/engine.cc
+++ b/cpp/src/gandiva/engine.cc
@@ -18,7 +18,7 @@
 // TODO(wesm): LLVM 7 produces pesky C4244 that disable pragmas around the LLVM
 // includes seem to not fix as with LLVM 6
 #if defined(_MSC_VER)
-#pragma warning(disable : 4244)
+#  pragma warning(disable : 4244)
 #endif
 
 #include "gandiva/engine.h"
@@ -35,12 +35,12 @@
 #include <arrow/util/logging.h>
 
 #if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4141)
-#pragma warning(disable : 4146)
-#pragma warning(disable : 4244)
-#pragma warning(disable : 4267)
-#pragma warning(disable : 4624)
+#  pragma warning(push)
+#  pragma warning(disable : 4141)
+#  pragma warning(disable : 4146)
+#  pragma warning(disable : 4244)
+#  pragma warning(disable : 4267)
+#  pragma warning(disable : 4624)
 #endif
 
 #include <llvm/Analysis/Passes.h>
@@ -56,32 +56,32 @@
 #include <llvm/Linker/Linker.h>
 #include <llvm/Transforms/Utils/Cloning.h>
 #if LLVM_VERSION_MAJOR >= 17
-#include <llvm/TargetParser/SubtargetFeature.h>
+#  include <llvm/TargetParser/SubtargetFeature.h>
 #else
-#include <llvm/MC/SubtargetFeature.h>
+#  include <llvm/MC/SubtargetFeature.h>
 #endif
 #include <llvm/Passes/PassBuilder.h>
 #include <llvm/Support/DynamicLibrary.h>
 #if LLVM_VERSION_MAJOR >= 18
-#include <llvm/TargetParser/Host.h>
+#  include <llvm/TargetParser/Host.h>
 #else
-#include <llvm/Support/Host.h>
+#  include <llvm/Support/Host.h>
 #endif
 #include <llvm/Transforms/IPO/GlobalDCE.h>
 #include <llvm/Transforms/IPO/Internalize.h>
 #if LLVM_VERSION_MAJOR >= 14
-#include <llvm/IR/PassManager.h>
-#include <llvm/MC/TargetRegistry.h>
-#include <llvm/Passes/PassPlugin.h>
-#include <llvm/Transforms/IPO/GlobalOpt.h>
-#include <llvm/Transforms/Scalar/NewGVN.h>
-#include <llvm/Transforms/Scalar/SimplifyCFG.h>
-#include <llvm/Transforms/Utils/Mem2Reg.h>
-#include <llvm/Transforms/Vectorize/LoopVectorize.h>
-#include <llvm/Transforms/Vectorize/SLPVectorizer.h>
+#  include <llvm/IR/PassManager.h>
+#  include <llvm/MC/TargetRegistry.h>
+#  include <llvm/Passes/PassPlugin.h>
+#  include <llvm/Transforms/IPO/GlobalOpt.h>
+#  include <llvm/Transforms/Scalar/NewGVN.h>
+#  include <llvm/Transforms/Scalar/SimplifyCFG.h>
+#  include <llvm/Transforms/Utils/Mem2Reg.h>
+#  include <llvm/Transforms/Vectorize/LoopVectorize.h>
+#  include <llvm/Transforms/Vectorize/SLPVectorizer.h>
 #else
-#include <llvm/Support/TargetRegistry.h>
-#include <llvm/Transforms/IPO/PassManagerBuilder.h>
+#  include <llvm/Support/TargetRegistry.h>
+#  include <llvm/Transforms/IPO/PassManagerBuilder.h>
 #endif
 #include <llvm/Support/TargetSelect.h>
 #include <llvm/Support/raw_ostream.h>
@@ -91,18 +91,18 @@
 #include <llvm/Transforms/Scalar/GVN.h>
 #include <llvm/Transforms/Utils.h>
 #if LLVM_VERSION_MAJOR <= 17
-#include <llvm/Transforms/Vectorize.h>
+#  include <llvm/Transforms/Vectorize.h>
 #endif
 
 // JITLink is available in LLVM 9+
 // but the `InProcessMemoryManager::Create` API was added since LLVM 14
 #if LLVM_VERSION_MAJOR >= 14 && !defined(_WIN32)
-#define JIT_LINK_SUPPORTED
-#include <llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h>
+#  define JIT_LINK_SUPPORTED
+#  include <llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h>
 #endif
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
 #include "gandiva/configuration.h"
diff --git a/cpp/src/gandiva/gandiva_object_cache.h b/cpp/src/gandiva/gandiva_object_cache.h
index 62042c7b627ec..cebc8d5cac211 100644
--- a/cpp/src/gandiva/gandiva_object_cache.h
+++ b/cpp/src/gandiva/gandiva_object_cache.h
@@ -18,12 +18,12 @@
 #pragma once
 
 #if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4244)
-#pragma warning(disable : 4141)
-#pragma warning(disable : 4146)
-#pragma warning(disable : 4267)
-#pragma warning(disable : 4624)
+#  pragma warning(push)
+#  pragma warning(disable : 4244)
+#  pragma warning(disable : 4141)
+#  pragma warning(disable : 4146)
+#  pragma warning(disable : 4267)
+#  pragma warning(disable : 4624)
 #endif
 
 #include <llvm/ExecutionEngine/ObjectCache.h>
diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h
index 8e87bc51215e1..4113f261ad766 100644
--- a/cpp/src/gandiva/gdv_function_stubs.h
+++ b/cpp/src/gandiva/gdv_function_stubs.h
@@ -46,13 +46,13 @@ using gdv_month_interval = int32_t;
 
 #ifdef GANDIVA_UNIT_TEST
 // unit tests may be compiled without O2, so inlining may not happen.
-#define GDV_FORCE_INLINE
+#  define GDV_FORCE_INLINE
 #else
-#ifdef _MSC_VER
-#define GDV_FORCE_INLINE __forceinline
-#else
-#define GDV_FORCE_INLINE inline __attribute__((always_inline))
-#endif
+#  ifdef _MSC_VER
+#    define GDV_FORCE_INLINE __forceinline
+#  else
+#    define GDV_FORCE_INLINE inline __attribute__((always_inline))
+#  endif
 #endif
 
 GANDIVA_EXPORT
diff --git a/cpp/src/gandiva/llvm_includes.h b/cpp/src/gandiva/llvm_includes.h
index 3d455591895c7..659c9d29de177 100644
--- a/cpp/src/gandiva/llvm_includes.h
+++ b/cpp/src/gandiva/llvm_includes.h
@@ -18,13 +18,13 @@
 #pragma once
 
 #if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4141)
-#pragma warning(disable : 4146)
-#pragma warning(disable : 4244)
-#pragma warning(disable : 4267)
-#pragma warning(disable : 4291)
-#pragma warning(disable : 4624)
+#  pragma warning(push)
+#  pragma warning(disable : 4141)
+#  pragma warning(disable : 4146)
+#  pragma warning(disable : 4244)
+#  pragma warning(disable : 4267)
+#  pragma warning(disable : 4291)
+#  pragma warning(disable : 4624)
 #endif
 
 #include <llvm/ExecutionEngine/ExecutionEngine.h>
@@ -33,11 +33,11 @@
 #include <llvm/IR/Module.h>
 
 #if LLVM_VERSION_MAJOR >= 10
-#define LLVM_ALIGN(alignment) (llvm::Align((alignment)))
+#  define LLVM_ALIGN(alignment) (llvm::Align((alignment)))
 #else
-#define LLVM_ALIGN(alignment) (alignment)
+#  define LLVM_ALIGN(alignment) (alignment)
 #endif
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
diff --git a/cpp/src/gandiva/precompiled/CMakeLists.txt b/cpp/src/gandiva/precompiled/CMakeLists.txt
index c092ff4fd011f..c2bc7fc02797e 100644
--- a/cpp/src/gandiva/precompiled/CMakeLists.txt
+++ b/cpp/src/gandiva/precompiled/CMakeLists.txt
@@ -53,8 +53,8 @@ add_custom_target(precompiled ALL DEPENDS ${GANDIVA_PRECOMPILED_BC_PATH}
                                           ${GANDIVA_PRECOMPILED_CC_PATH})
 
 # testing
-if(ARROW_BUILD_TESTS)
-  add_executable(gandiva-precompiled-test
+add_gandiva_test(precompiled-test
+                 SOURCES
                  ../context_helper.cc
                  bitmap_test.cc
                  bitmap.cc
@@ -75,16 +75,12 @@ if(ARROW_BUILD_TESTS)
                  decimal_ops_test.cc
                  decimal_ops.cc
                  ../decimal_type_util.cc
-                 ../decimal_xlarge.cc)
-  target_include_directories(gandiva-precompiled-test PRIVATE ${CMAKE_SOURCE_DIR}/src)
-  target_link_libraries(gandiva-precompiled-test PRIVATE ${ARROW_TEST_LINK_LIBS}
-                                                         Boost::headers)
-  target_compile_definitions(gandiva-precompiled-test PRIVATE GANDIVA_UNIT_TEST=1
-                                                              ARROW_STATIC GANDIVA_STATIC)
-  set(TEST_PATH "${EXECUTABLE_OUTPUT_PATH}/gandiva-precompiled-test")
-  add_test(gandiva-precompiled-test ${TEST_PATH})
-  set_property(TEST gandiva-precompiled-test
-               APPEND
-               PROPERTY LABELS "unittest;gandiva-tests")
-  add_dependencies(gandiva-tests gandiva-precompiled-test)
-endif()
+                 ../decimal_xlarge.cc
+                 EXTRA_INCLUDES
+                 ${CMAKE_SOURCE_DIR}/src
+                 EXTRA_LINK_LIBS
+                 Boost::headers
+                 DEFINITIONS
+                 GANDIVA_UNIT_TEST=1
+                 ARROW_STATIC
+                 GANDIVA_STATIC)
diff --git a/cpp/src/gandiva/precompiled/extended_math_ops.cc b/cpp/src/gandiva/precompiled/extended_math_ops.cc
index e5bd9b48e0e0b..c03db1d5f2b13 100644
--- a/cpp/src/gandiva/precompiled/extended_math_ops.cc
+++ b/cpp/src/gandiva/precompiled/extended_math_ops.cc
@@ -16,7 +16,7 @@
 // under the License.
 
 #ifndef M_PI
-#define M_PI 3.14159265358979323846
+#  define M_PI 3.14159265358979323846
 #endif
 
 #include "arrow/util/logging.h"
diff --git a/cpp/src/gandiva/precompiled/extended_math_ops_test.cc b/cpp/src/gandiva/precompiled/extended_math_ops_test.cc
index 3e9d8a5d2cd44..7170fad01d250 100644
--- a/cpp/src/gandiva/precompiled/extended_math_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/extended_math_ops_test.cc
@@ -16,7 +16,7 @@
 // under the License.
 
 #ifndef M_PI
-#define M_PI 3.14159265358979323846
+#  define M_PI 3.14159265358979323846
 #endif
 
 #include <gtest/gtest.h>
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index 83bbdee208562..c93b694fc777e 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -43,9 +43,9 @@ using gdv_day_time_interval = int64_t;
 
 #ifdef GANDIVA_UNIT_TEST
 // unit tests may be compiled without O2, so inlining may not happen.
-#define FORCE_INLINE
+#  define FORCE_INLINE
 #else
-#define FORCE_INLINE __attribute__((always_inline))
+#  define FORCE_INLINE __attribute__((always_inline))
 #endif
 
 extern "C" {
diff --git a/cpp/src/gandiva/selection_vector.cc b/cpp/src/gandiva/selection_vector.cc
index 39e9f5bc90228..8d5f9f4210af5 100644
--- a/cpp/src/gandiva/selection_vector.cc
+++ b/cpp/src/gandiva/selection_vector.cc
@@ -54,14 +54,14 @@ Status SelectionVector::PopulateFromBitMap(const uint8_t* bitmap, int64_t bitmap
 
     while (current_word != 0) {
 #if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4146)
+#  pragma warning(push)
+#  pragma warning(disable : 4146)
 #endif
       // MSVC warns about negating an unsigned type. We suppress it for now
       uint64_t highest_only = current_word & -current_word;
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
       int pos_in_word = arrow::bit_util::CountTrailingZeros(highest_only);
diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc
index 59eeb3d92f19a..a22d04ac28f47 100644
--- a/cpp/src/gandiva/tests/projector_test.cc
+++ b/cpp/src/gandiva/tests/projector_test.cc
@@ -16,7 +16,7 @@
 // under the License.
 
 #ifndef M_PI
-#define M_PI 3.14159265358979323846
+#  define M_PI 3.14159265358979323846
 #endif
 
 #include "gandiva/projector.h"
diff --git a/cpp/src/gandiva/tests/test_util.cc b/cpp/src/gandiva/tests/test_util.cc
index 2ee49ffae0ed6..584e27e7533b6 100644
--- a/cpp/src/gandiva/tests/test_util.cc
+++ b/cpp/src/gandiva/tests/test_util.cc
@@ -35,7 +35,7 @@ std::shared_ptr<Configuration> TestConfigWithIrDumping() {
 }
 
 #ifndef GANDIVA_EXTENSION_TEST_DIR
-#define GANDIVA_EXTENSION_TEST_DIR "."
+#  define GANDIVA_EXTENSION_TEST_DIR "."
 #endif
 
 std::string GetTestFunctionLLVMIRPath() {
diff --git a/cpp/src/gandiva/visibility.h b/cpp/src/gandiva/visibility.h
index 450b3056b2ec0..4961952c2974e 100644
--- a/cpp/src/gandiva/visibility.h
+++ b/cpp/src/gandiva/visibility.h
@@ -18,31 +18,31 @@
 #pragma once
 
 #if defined(_WIN32) || defined(__CYGWIN__)
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(push)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#ifdef GANDIVA_STATIC
-#define GANDIVA_EXPORT
-#elif defined(GANDIVA_EXPORTING)
-#define GANDIVA_EXPORT __declspec(dllexport)
-#else
-#define GANDIVA_EXPORT __declspec(dllimport)
-#endif
+#  ifdef GANDIVA_STATIC
+#    define GANDIVA_EXPORT
+#  elif defined(GANDIVA_EXPORTING)
+#    define GANDIVA_EXPORT __declspec(dllexport)
+#  else
+#    define GANDIVA_EXPORT __declspec(dllimport)
+#  endif
 
-#define GANDIVA_NO_EXPORT
+#  define GANDIVA_NO_EXPORT
 #else  // Not Windows
-#ifndef GANDIVA_EXPORT
-#define GANDIVA_EXPORT __attribute__((visibility("default")))
-#endif
-#ifndef GANDIVA_NO_EXPORT
-#define GANDIVA_NO_EXPORT __attribute__((visibility("hidden")))
-#endif
+#  ifndef GANDIVA_EXPORT
+#    define GANDIVA_EXPORT __attribute__((visibility("default")))
+#  endif
+#  ifndef GANDIVA_NO_EXPORT
+#    define GANDIVA_NO_EXPORT __attribute__((visibility("hidden")))
+#  endif
 #endif  // Non-Windows
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt
index 17574261d891d..b984ef77adbe0 100644
--- a/cpp/src/parquet/CMakeLists.txt
+++ b/cpp/src/parquet/CMakeLists.txt
@@ -165,7 +165,8 @@ set(PARQUET_SRCS
     column_reader.cc
     column_scanner.cc
     column_writer.cc
-    encoding.cc
+    decoder.cc
+    encoder.cc
     encryption/encryption.cc
     encryption/internal_file_decryptor.cc
     encryption/internal_file_encryptor.cc
diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
index 64030e0f90d54..5d990a5c6bd4a 100644
--- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
+++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
@@ -16,9 +16,9 @@
 // under the License.
 
 #ifdef _MSC_VER
-#pragma warning(push)
+#  pragma warning(push)
 // Disable forcing value to bool warnings
-#pragma warning(disable : 4800)
+#  pragma warning(disable : 4800)
 #endif
 
 #include "gmock/gmock.h"
@@ -37,6 +37,7 @@
 #include "arrow/array/builder_primitive.h"
 #include "arrow/chunked_array.h"
 #include "arrow/compute/api.h"
+#include "arrow/extension/json.h"
 #include "arrow/io/api.h"
 #include "arrow/record_batch.h"
 #include "arrow/scalar.h"
@@ -55,7 +56,7 @@
 #include "arrow/util/range.h"
 
 #ifdef ARROW_CSV
-#include "arrow/csv/api.h"
+#  include "arrow/csv/api.h"
 #endif
 
 #include "parquet/api/reader.h"
@@ -618,10 +619,15 @@ class ParquetIOTestBase : public ::testing::Test {
     return ParquetFileWriter::Open(sink_, schema);
   }
 
-  void ReaderFromSink(std::unique_ptr<FileReader>* out) {
+  void ReaderFromSink(
+      std::unique_ptr<FileReader>* out,
+      const ArrowReaderProperties& properties = default_arrow_reader_properties()) {
     ASSERT_OK_AND_ASSIGN(auto buffer, sink_->Finish());
-    ASSERT_OK_NO_THROW(OpenFile(std::make_shared<BufferReader>(buffer),
-                                ::arrow::default_memory_pool(), out));
+    FileReaderBuilder builder;
+    ASSERT_OK_NO_THROW(builder.Open(std::make_shared<BufferReader>(buffer)));
+    ASSERT_OK_NO_THROW(builder.memory_pool(::arrow::default_memory_pool())
+                           ->properties(properties)
+                           ->Build(out));
   }
 
   void ReadSingleColumnFile(std::unique_ptr<FileReader> file_reader,
@@ -670,6 +676,7 @@ class ParquetIOTestBase : public ::testing::Test {
   void RoundTripSingleColumn(
       const std::shared_ptr<Array>& values, const std::shared_ptr<Array>& expected,
       const std::shared_ptr<::parquet::ArrowWriterProperties>& arrow_properties,
+      const ArrowReaderProperties& reader_properties = default_arrow_reader_properties(),
       bool nullable = true) {
     std::shared_ptr<Table> table = MakeSimpleTable(values, nullable);
     this->ResetSink();
@@ -679,7 +686,7 @@ class ParquetIOTestBase : public ::testing::Test {
 
     std::shared_ptr<Table> out;
     std::unique_ptr<FileReader> reader;
-    ASSERT_NO_FATAL_FAILURE(this->ReaderFromSink(&reader));
+    ASSERT_NO_FATAL_FAILURE(this->ReaderFromSink(&reader, reader_properties));
     const bool expect_metadata = arrow_properties->store_schema();
     ASSERT_NO_FATAL_FAILURE(
         this->ReadTableFromFile(std::move(reader), expect_metadata, &out));
@@ -1428,6 +1435,52 @@ TEST_F(TestLargeStringParquetIO, Basics) {
   this->RoundTripSingleColumn(large_array, large_array, arrow_properties);
 }
 
+using TestJsonParquetIO = TestParquetIO<::arrow::extension::JsonExtensionType>;
+
+TEST_F(TestJsonParquetIO, JsonExtension) {
+  const char* json = R"([
+    "null",
+    "1234",
+    "3.14159",
+    "true",
+    "false",
+    "\"a json string\"",
+    "[\"a\", \"json\", \"array\"]",
+    "{\"obj\": \"a simple json object\"}"
+  ])";
+
+  const auto json_type = ::arrow::extension::json();
+  const auto string_array = ::arrow::ArrayFromJSON(::arrow::utf8(), json);
+  const auto json_array = ::arrow::ExtensionType::WrapArray(json_type, string_array);
+
+  const auto json_large_type = ::arrow::extension::json(::arrow::large_utf8());
+  const auto large_string_array = ::arrow::ArrayFromJSON(::arrow::large_utf8(), json);
+  const auto json_large_array =
+      ::arrow::ExtensionType::WrapArray(json_large_type, large_string_array);
+
+  // When the original Arrow schema isn't stored and Arrow extensions are disabled,
+  // LogicalType::JSON is read as utf8.
+  this->RoundTripSingleColumn(json_array, string_array,
+                              default_arrow_writer_properties());
+  this->RoundTripSingleColumn(json_large_array, string_array,
+                              default_arrow_writer_properties());
+
+  // When the original Arrow schema isn't stored and Arrow extensions are enabled,
+  // LogicalType::JSON is read as JsonExtensionType with utf8 storage.
+  ::parquet::ArrowReaderProperties reader_properties;
+  reader_properties.set_arrow_extensions_enabled(true);
+  this->RoundTripSingleColumn(json_array, json_array, default_arrow_writer_properties(),
+                              reader_properties);
+  this->RoundTripSingleColumn(json_large_array, json_array,
+                              default_arrow_writer_properties(), reader_properties);
+
+  // When the original Arrow schema is stored, the stored Arrow type is respected.
+  const auto writer_properties =
+      ::parquet::ArrowWriterProperties::Builder().store_schema()->build();
+  this->RoundTripSingleColumn(json_array, json_array, writer_properties);
+  this->RoundTripSingleColumn(json_large_array, json_large_array, writer_properties);
+}
+
 using TestNullParquetIO = TestParquetIO<::arrow::NullType>;
 
 TEST_F(TestNullParquetIO, NullColumn) {
diff --git a/cpp/src/parquet/arrow/arrow_schema_test.cc b/cpp/src/parquet/arrow/arrow_schema_test.cc
index 9f60cd31d3541..31ead461aa6e2 100644
--- a/cpp/src/parquet/arrow/arrow_schema_test.cc
+++ b/cpp/src/parquet/arrow/arrow_schema_test.cc
@@ -31,8 +31,11 @@
 #include "parquet/thrift_internal.h"
 
 #include "arrow/array.h"
+#include "arrow/extension/json.h"
+#include "arrow/ipc/writer.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/type.h"
+#include "arrow/util/base64.h"
 #include "arrow/util/key_value_metadata.h"
 
 using arrow::Field;
@@ -76,17 +79,17 @@ class TestConvertParquetSchema : public ::testing::Test {
       auto result_field = result_schema_->field(i);
       auto expected_field = expected_schema->field(i);
       EXPECT_TRUE(result_field->Equals(expected_field, check_metadata))
-          << "Field " << i << "\n  result: " << result_field->ToString()
-          << "\n  expected: " << expected_field->ToString();
+          << "Field " << i << "\n  result: " << result_field->ToString(check_metadata)
+          << "\n  expected: " << expected_field->ToString(check_metadata);
     }
   }
 
   ::arrow::Status ConvertSchema(
       const std::vector<NodePtr>& nodes,
-      const std::shared_ptr<const KeyValueMetadata>& key_value_metadata = nullptr) {
+      const std::shared_ptr<const KeyValueMetadata>& key_value_metadata = nullptr,
+      ArrowReaderProperties props = ArrowReaderProperties()) {
     NodePtr schema = GroupNode::Make("schema", Repetition::REPEATED, nodes);
     descr_.Init(schema);
-    ArrowReaderProperties props;
     return FromParquetSchema(&descr_, props, key_value_metadata, &result_schema_);
   }
 
@@ -230,7 +233,7 @@ TEST_F(TestConvertParquetSchema, ParquetAnnotatedFields) {
        ::arrow::uint64()},
       {"int(64, true)", LogicalType::Int(64, true), ParquetType::INT64, -1,
        ::arrow::int64()},
-      {"json", LogicalType::JSON(), ParquetType::BYTE_ARRAY, -1, ::arrow::binary()},
+      {"json", LogicalType::JSON(), ParquetType::BYTE_ARRAY, -1, ::arrow::utf8()},
       {"bson", LogicalType::BSON(), ParquetType::BYTE_ARRAY, -1, ::arrow::binary()},
       {"interval", LogicalType::Interval(), ParquetType::FIXED_LEN_BYTE_ARRAY, 12,
        ::arrow::fixed_size_binary(12)},
@@ -724,6 +727,87 @@ TEST_F(TestConvertParquetSchema, ParquetRepeatedNestedSchema) {
   ASSERT_NO_FATAL_FAILURE(CheckFlatSchema(arrow_schema));
 }
 
+Status ArrowSchemaToParquetMetadata(std::shared_ptr<::arrow::Schema>& arrow_schema,
+                                    std::shared_ptr<KeyValueMetadata>& metadata) {
+  ARROW_ASSIGN_OR_RAISE(
+      std::shared_ptr<Buffer> serialized,
+      ::arrow::ipc::SerializeSchema(*arrow_schema, ::arrow::default_memory_pool()));
+  std::string schema_as_string = serialized->ToString();
+  std::string schema_base64 = ::arrow::util::base64_encode(schema_as_string);
+  metadata = ::arrow::key_value_metadata({"ARROW:schema"}, {schema_base64});
+  return Status::OK();
+}
+
+TEST_F(TestConvertParquetSchema, ParquetSchemaArrowExtensions) {
+  std::vector<NodePtr> parquet_fields;
+  parquet_fields.push_back(PrimitiveNode::Make(
+      "json_1", Repetition::OPTIONAL, ParquetType::BYTE_ARRAY, ConvertedType::JSON));
+  parquet_fields.push_back(PrimitiveNode::Make(
+      "json_2", Repetition::OPTIONAL, ParquetType::BYTE_ARRAY, ConvertedType::JSON));
+
+  {
+    // Parquet file does not contain Arrow schema.
+    // By default, both fields should be treated as utf8() fields in Arrow.
+    auto arrow_schema = ::arrow::schema(
+        {::arrow::field("json_1", UTF8, true), ::arrow::field("json_2", UTF8, true)});
+    std::shared_ptr<KeyValueMetadata> metadata{};
+    ASSERT_OK(ConvertSchema(parquet_fields, metadata));
+    CheckFlatSchema(arrow_schema);
+  }
+
+  {
+    // Parquet file does not contain Arrow schema.
+    // If Arrow extensions are enabled, both fields should be treated as json() extension
+    // fields.
+    ArrowReaderProperties props;
+    props.set_arrow_extensions_enabled(true);
+    auto arrow_schema = ::arrow::schema(
+        {::arrow::field("json_1", ::arrow::extension::json(), true),
+         ::arrow::field("json_2", ::arrow::extension::json(::arrow::large_utf8()),
+                        true)});
+    std::shared_ptr<KeyValueMetadata> metadata{};
+    ASSERT_OK(ConvertSchema(parquet_fields, metadata, props));
+    CheckFlatSchema(arrow_schema);
+  }
+
+  {
+    // Parquet file contains Arrow schema.
+    // Both json_1 and json_2 should be returned as a json() field
+    // even though extensions are not enabled.
+    ArrowReaderProperties props;
+    props.set_arrow_extensions_enabled(false);
+    std::shared_ptr<KeyValueMetadata> field_metadata =
+        ::arrow::key_value_metadata({"foo", "bar"}, {"biz", "baz"});
+    auto arrow_schema = ::arrow::schema(
+        {::arrow::field("json_1", ::arrow::extension::json(), true, field_metadata),
+         ::arrow::field("json_2", ::arrow::extension::json(::arrow::large_utf8()),
+                        true)});
+
+    std::shared_ptr<KeyValueMetadata> metadata;
+    ASSERT_OK(ArrowSchemaToParquetMetadata(arrow_schema, metadata));
+    ASSERT_OK(ConvertSchema(parquet_fields, metadata, props));
+    CheckFlatSchema(arrow_schema, true /* check_metadata */);
+  }
+
+  {
+    // Parquet file contains Arrow schema. Extensions are enabled.
+    // Both json_1 and json_2 should be returned as a json() field
+    ArrowReaderProperties props;
+    props.set_arrow_extensions_enabled(true);
+    std::shared_ptr<KeyValueMetadata> field_metadata =
+        ::arrow::key_value_metadata({"foo", "bar"}, {"biz", "baz"});
+    auto arrow_schema = ::arrow::schema(
+        {::arrow::field("json_1", ::arrow::extension::json(), true, field_metadata),
+         ::arrow::field("json_2", ::arrow::extension::json(::arrow::large_utf8()),
+                        true)});
+
+    std::shared_ptr<KeyValueMetadata> metadata;
+    ASSERT_OK(ArrowSchemaToParquetMetadata(arrow_schema, metadata));
+    ASSERT_OK(ConvertSchema(parquet_fields, metadata, props));
+    CheckFlatSchema(arrow_schema, true /* check_metadata */);
+  }
+}
+
 class TestConvertArrowSchema : public ::testing::Test {
  public:
   virtual void SetUp() {}
diff --git a/cpp/src/parquet/arrow/arrow_statistics_test.cc b/cpp/src/parquet/arrow/arrow_statistics_test.cc
index a19303c3dc03a..5011bf89112c6 100644
--- a/cpp/src/parquet/arrow/arrow_statistics_test.cc
+++ b/cpp/src/parquet/arrow/arrow_statistics_test.cc
@@ -17,12 +17,16 @@
 
 #include "gtest/gtest.h"
 
+#include "arrow/array.h"
+#include "arrow/array/builder_primitive.h"
+#include "arrow/array/builder_time.h"
 #include "arrow/table.h"
 #include "arrow/testing/gtest_util.h"
 
 #include "parquet/api/reader.h"
 #include "parquet/api/writer.h"
 
+#include "parquet/arrow/reader.h"
 #include "parquet/arrow/schema.h"
 #include "parquet/arrow/writer.h"
 #include "parquet/file_writer.h"
@@ -179,4 +183,143 @@ TEST(StatisticsTest, TruncateOnlyHalfMinMax) {
   ASSERT_FALSE(stats->HasMinMax());
 }
 
+namespace {
+::arrow::Result<std::shared_ptr<::arrow::Array>> StatisticsReadArray(
+    std::shared_ptr<::arrow::DataType> data_type, std::shared_ptr<::arrow::Array> array) {
+  auto schema = ::arrow::schema({::arrow::field("column", data_type)});
+  auto record_batch = ::arrow::RecordBatch::Make(schema, array->length(), {array});
+  ARROW_ASSIGN_OR_RAISE(auto sink, ::arrow::io::BufferOutputStream::Create());
+  const auto arrow_writer_properties =
+      parquet::ArrowWriterProperties::Builder().store_schema()->build();
+  ARROW_ASSIGN_OR_RAISE(
+      auto writer,
+      FileWriter::Open(*schema, ::arrow::default_memory_pool(), sink,
+                       default_writer_properties(), arrow_writer_properties));
+  ARROW_RETURN_NOT_OK(writer->WriteRecordBatch(*record_batch));
+  ARROW_RETURN_NOT_OK(writer->Close());
+  ARROW_ASSIGN_OR_RAISE(auto buffer, sink->Finish());
+
+  auto reader =
+      ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(buffer));
+  std::unique_ptr<FileReader> file_reader;
+  ARROW_RETURN_NOT_OK(
+      FileReader::Make(::arrow::default_memory_pool(), std::move(reader), &file_reader));
+  std::shared_ptr<::arrow::ChunkedArray> chunked_array;
+  ARROW_RETURN_NOT_OK(file_reader->ReadColumn(0, &chunked_array));
+  return chunked_array->chunk(0);
+}
+
+template <typename ArrowType, typename MinMaxType>
+void TestStatisticsReadArray(std::shared_ptr<::arrow::DataType> arrow_type) {
+  using ArrowArrayType = typename ::arrow::TypeTraits<ArrowType>::ArrayType;
+  using ArrowArrayBuilder = typename ::arrow::TypeTraits<ArrowType>::BuilderType;
+  using ArrowCType = typename ArrowType::c_type;
+  constexpr auto min = std::numeric_limits<ArrowCType>::lowest();
+  constexpr auto max = std::numeric_limits<ArrowCType>::max();
+
+  std::unique_ptr<ArrowArrayBuilder> builder;
+  if constexpr (::arrow::TypeTraits<ArrowType>::is_parameter_free) {
+    builder = std::make_unique<ArrowArrayBuilder>(::arrow::default_memory_pool());
+  } else {
+    builder =
+        std::make_unique<ArrowArrayBuilder>(arrow_type, ::arrow::default_memory_pool());
+  }
+  ASSERT_OK(builder->Append(max));
+  ASSERT_OK(builder->AppendNull());
+  ASSERT_OK(builder->Append(min));
+  ASSERT_OK(builder->Append(max));
+  ASSERT_OK_AND_ASSIGN(auto built_array, builder->Finish());
+  ASSERT_OK_AND_ASSIGN(auto read_array,
+                       StatisticsReadArray(arrow_type, std::move(built_array)));
+  auto typed_read_array = std::static_pointer_cast<ArrowArrayType>(read_array);
+  auto statistics = typed_read_array->statistics();
+  ASSERT_NE(nullptr, statistics);
+  ASSERT_EQ(true, statistics->null_count.has_value());
+  ASSERT_EQ(1, statistics->null_count.value());
+  ASSERT_EQ(false, statistics->distinct_count.has_value());
+  ASSERT_EQ(true, statistics->min.has_value());
+  ASSERT_EQ(true, std::holds_alternative<MinMaxType>(*statistics->min));
+  ASSERT_EQ(min, std::get<MinMaxType>(*statistics->min));
+  ASSERT_EQ(true, statistics->is_min_exact);
+  ASSERT_EQ(true, statistics->max.has_value());
+  ASSERT_EQ(true, std::holds_alternative<MinMaxType>(*statistics->max));
+  ASSERT_EQ(max, std::get<MinMaxType>(*statistics->max));
+  ASSERT_EQ(true, statistics->is_min_exact);
+}
+}  // namespace
+
+TEST(TestStatisticsRead, Int8) {
+  TestStatisticsReadArray<::arrow::Int8Type, int64_t>(::arrow::int8());
+}
+
+TEST(TestStatisticsRead, UInt8) {
+  TestStatisticsReadArray<::arrow::UInt8Type, uint64_t>(::arrow::uint8());
+}
+
+TEST(TestStatisticsRead, Int16) {
+  TestStatisticsReadArray<::arrow::Int16Type, int64_t>(::arrow::int16());
+}
+
+TEST(TestStatisticsRead, UInt16) {
+  TestStatisticsReadArray<::arrow::UInt16Type, uint64_t>(::arrow::uint16());
+}
+
+TEST(TestStatisticsRead, Int32) {
+  TestStatisticsReadArray<::arrow::Int32Type, int64_t>(::arrow::int32());
+}
+
+TEST(TestStatisticsRead, UInt32) {
+  TestStatisticsReadArray<::arrow::UInt32Type, uint64_t>(::arrow::uint32());
+}
+
+TEST(TestStatisticsRead, Int64) {
+  TestStatisticsReadArray<::arrow::Int64Type, int64_t>(::arrow::int64());
+}
+
+TEST(TestStatisticsRead, UInt64) {
+  TestStatisticsReadArray<::arrow::UInt64Type, uint64_t>(::arrow::uint64());
+}
+
+TEST(TestStatisticsRead, Float) {
+  TestStatisticsReadArray<::arrow::FloatType, double>(::arrow::float32());
+}
+
+TEST(TestStatisticsRead, Double) {
+  TestStatisticsReadArray<::arrow::DoubleType, double>(::arrow::float64());
+}
+
+TEST(TestStatisticsRead, Date32) {
+  TestStatisticsReadArray<::arrow::Date32Type, int64_t>(::arrow::date32());
+}
+
+TEST(TestStatisticsRead, Time32) {
+  TestStatisticsReadArray<::arrow::Time32Type, int64_t>(
+      ::arrow::time32(::arrow::TimeUnit::MILLI));
+}
+
+TEST(TestStatisticsRead, Time64) {
+  TestStatisticsReadArray<::arrow::Time64Type, int64_t>(
+      ::arrow::time64(::arrow::TimeUnit::MICRO));
+}
+
+TEST(TestStatisticsRead, TimestampMilli) {
+  TestStatisticsReadArray<::arrow::TimestampType, int64_t>(
+      ::arrow::timestamp(::arrow::TimeUnit::MILLI));
+}
+
+TEST(TestStatisticsRead, TimestampMicro) {
+  TestStatisticsReadArray<::arrow::TimestampType, int64_t>(
+      ::arrow::timestamp(::arrow::TimeUnit::MICRO));
+}
+
+TEST(TestStatisticsRead, TimestampNano) {
+  TestStatisticsReadArray<::arrow::TimestampType, int64_t>(
+      ::arrow::timestamp(::arrow::TimeUnit::NANO));
+}
+
+TEST(TestStatisticsRead, Duration) {
+  TestStatisticsReadArray<::arrow::DurationType, int64_t>(
+      ::arrow::duration(::arrow::TimeUnit::NANO));
+}
+
 }  // namespace parquet::arrow
diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc
index 285e2a597389d..4f57c3f4f56f7 100644
--- a/cpp/src/parquet/arrow/reader.cc
+++ b/cpp/src/parquet/arrow/reader.cc
@@ -485,8 +485,9 @@ class LeafReader : public ColumnReaderImpl {
         NextRowGroup();
       }
     }
-    RETURN_NOT_OK(
-        TransferColumnData(record_reader_.get(), field_, descr_, ctx_->pool, &out_));
+    RETURN_NOT_OK(TransferColumnData(record_reader_.get(),
+                                     input_->column_chunk_metadata(), field_, descr_,
+                                     ctx_.get(), &out_));
     return Status::OK();
     END_PARQUET_CATCH_EXCEPTIONS
   }
diff --git a/cpp/src/parquet/arrow/reader_internal.cc b/cpp/src/parquet/arrow/reader_internal.cc
index e5aef5a45b5f3..aa84a7a92bbe1 100644
--- a/cpp/src/parquet/arrow/reader_internal.cc
+++ b/cpp/src/parquet/arrow/reader_internal.cc
@@ -319,31 +319,86 @@ void ReconstructChunksWithoutNulls(::arrow::ArrayVector* chunks) {
 }
 
 template <typename ArrowType, typename ParquetType>
-Status TransferInt(RecordReader* reader, MemoryPool* pool,
-                   const std::shared_ptr<Field>& field, Datum* out) {
+void AttachStatistics(::arrow::ArrayData* data,
+                      std::unique_ptr<::parquet::ColumnChunkMetaData> metadata,
+                      const ReaderContext* ctx) {
+  using ArrowCType = typename ArrowType::c_type;
+
+  auto statistics = metadata->statistics().get();
+  if (data->null_count == ::arrow::kUnknownNullCount && !statistics) {
+    return;
+  }
+
+  auto array_statistics = std::make_shared<::arrow::ArrayStatistics>();
+  if (data->null_count != ::arrow::kUnknownNullCount) {
+    array_statistics->null_count = data->null_count;
+  }
+  if (statistics) {
+    if (statistics->HasDistinctCount()) {
+      array_statistics->distinct_count = statistics->distinct_count();
+    }
+    if (statistics->HasMinMax()) {
+      auto typed_statistics =
+          static_cast<::parquet::TypedStatistics<ParquetType>*>(statistics);
+      const ArrowCType min = typed_statistics->min();
+      const ArrowCType max = typed_statistics->max();
+      if (std::is_floating_point<ArrowCType>::value) {
+        array_statistics->min = static_cast<double>(min);
+        array_statistics->max = static_cast<double>(max);
+      } else if (std::is_signed<ArrowCType>::value) {
+        array_statistics->min = static_cast<int64_t>(min);
+        array_statistics->max = static_cast<int64_t>(max);
+      } else {
+        array_statistics->min = static_cast<uint64_t>(min);
+        array_statistics->max = static_cast<uint64_t>(max);
+      }
+      // We can assume that integer and floating point number based
+      // min/max are always exact if they exist. Apache Parquet's
+      // "Statistics" has "is_min_value_exact" and
+      // "is_max_value_exact" but we can ignore them for integer and
+      // floating point number based min/max.
+      //
+      // See also the discussion at dev@parquet.apache.org:
+      // https://lists.apache.org/thread/zfnmg5p51b7oylft5w5k4670wgkd4zv4
+      array_statistics->is_min_exact = true;
+      array_statistics->is_max_exact = true;
+    }
+  }
+
+  data->statistics = std::move(array_statistics);
+}
+
+template <typename ArrowType, typename ParquetType>
+Status TransferInt(RecordReader* reader,
+                   std::unique_ptr<::parquet::ColumnChunkMetaData> metadata,
+                   const ReaderContext* ctx, const std::shared_ptr<Field>& field,
+                   Datum* out) {
   using ArrowCType = typename ArrowType::c_type;
   using ParquetCType = typename ParquetType::c_type;
   int64_t length = reader->values_written();
   ARROW_ASSIGN_OR_RAISE(auto data,
-                        ::arrow::AllocateBuffer(length * sizeof(ArrowCType), pool));
+                        ::arrow::AllocateBuffer(length * sizeof(ArrowCType), ctx->pool));
 
   auto values = reinterpret_cast<const ParquetCType*>(reader->values());
   auto out_ptr = reinterpret_cast<ArrowCType*>(data->mutable_data());
   std::copy(values, values + length, out_ptr);
+  int64_t null_count = 0;
+  std::vector<std::shared_ptr<Buffer>> buffers = {nullptr, std::move(data)};
   if (field->nullable()) {
-    *out = std::make_shared<ArrayType<ArrowType>>(field->type(), length, std::move(data),
-                                                  reader->ReleaseIsValid(),
-                                                  reader->null_count());
-  } else {
-    *out =
-        std::make_shared<ArrayType<ArrowType>>(field->type(), length, std::move(data),
-                                               /*null_bitmap=*/nullptr, /*null_count=*/0);
+    null_count = reader->null_count();
+    buffers[0] = reader->ReleaseIsValid();
   }
+  auto array_data =
+      ::arrow::ArrayData::Make(field->type(), length, std::move(buffers), null_count);
+  AttachStatistics<ArrowType, ParquetType>(array_data.get(), std::move(metadata), ctx);
+  *out = std::make_shared<ArrayType<ArrowType>>(std::move(array_data));
   return Status::OK();
 }
 
-std::shared_ptr<Array> TransferZeroCopy(RecordReader* reader,
-                                        const std::shared_ptr<Field>& field) {
+template <typename ArrowType, typename ParquetType>
+std::shared_ptr<Array> TransferZeroCopy(
+    RecordReader* reader, std::unique_ptr<::parquet::ColumnChunkMetaData> metadata,
+    const ReaderContext* ctx, const std::shared_ptr<Field>& field) {
   std::shared_ptr<::arrow::ArrayData> data;
   if (field->nullable()) {
     std::vector<std::shared_ptr<Buffer>> buffers = {reader->ReleaseIsValid(),
@@ -355,7 +410,8 @@ std::shared_ptr<Array> TransferZeroCopy(RecordReader* reader,
     data = std::make_shared<::arrow::ArrayData>(field->type(), reader->values_written(),
                                                 std::move(buffers), /*null_count=*/0);
   }
-  return ::arrow::MakeArray(data);
+  AttachStatistics<ArrowType, ParquetType>(data.get(), std::move(metadata), ctx);
+  return ::arrow::MakeArray(std::move(data));
 }
 
 Status TransferBool(RecordReader* reader, bool nullable, MemoryPool* pool, Datum* out) {
@@ -728,21 +784,26 @@ Status TransferHalfFloat(RecordReader* reader, MemoryPool* pool,
 
 }  // namespace
 
-#define TRANSFER_INT32(ENUM, ArrowType)                                               \
-  case ::arrow::Type::ENUM: {                                                         \
-    Status s = TransferInt<ArrowType, Int32Type>(reader, pool, value_field, &result); \
-    RETURN_NOT_OK(s);                                                                 \
+#define TRANSFER_INT32(ENUM, ArrowType)                                            \
+  case ::arrow::Type::ENUM: {                                                      \
+    Status s = TransferInt<ArrowType, Int32Type>(reader, std::move(metadata), ctx, \
+                                                 value_field, &result);            \
+    RETURN_NOT_OK(s);                                                              \
   } break;
 
-#define TRANSFER_INT64(ENUM, ArrowType)                                               \
-  case ::arrow::Type::ENUM: {                                                         \
-    Status s = TransferInt<ArrowType, Int64Type>(reader, pool, value_field, &result); \
-    RETURN_NOT_OK(s);                                                                 \
+#define TRANSFER_INT64(ENUM, ArrowType)                                            \
+  case ::arrow::Type::ENUM: {                                                      \
+    Status s = TransferInt<ArrowType, Int64Type>(reader, std::move(metadata), ctx, \
+                                                 value_field, &result);            \
+    RETURN_NOT_OK(s);                                                              \
   } break;
 
-Status TransferColumnData(RecordReader* reader, const std::shared_ptr<Field>& value_field,
-                          const ColumnDescriptor* descr, MemoryPool* pool,
+Status TransferColumnData(RecordReader* reader,
+                          std::unique_ptr<::parquet::ColumnChunkMetaData> metadata,
+                          const std::shared_ptr<Field>& value_field,
+                          const ColumnDescriptor* descr, const ReaderContext* ctx,
                           std::shared_ptr<ChunkedArray>* out) {
+  auto pool = ctx->pool;
   Datum result;
   std::shared_ptr<ChunkedArray> chunked_result;
   switch (value_field->type()->id()) {
@@ -756,10 +817,20 @@ Status TransferColumnData(RecordReader* reader, const std::shared_ptr<Field>& va
       break;
     }
     case ::arrow::Type::INT32:
+      result = TransferZeroCopy<::arrow::Int32Type, Int32Type>(
+          reader, std::move(metadata), ctx, value_field);
+      break;
     case ::arrow::Type::INT64:
+      result = TransferZeroCopy<::arrow::Int64Type, Int64Type>(
+          reader, std::move(metadata), ctx, value_field);
+      break;
     case ::arrow::Type::FLOAT:
+      result = TransferZeroCopy<::arrow::FloatType, FloatType>(
+          reader, std::move(metadata), ctx, value_field);
+      break;
     case ::arrow::Type::DOUBLE:
-      result = TransferZeroCopy(reader, value_field);
+      result = TransferZeroCopy<::arrow::DoubleType, DoubleType>(
+          reader, std::move(metadata), ctx, value_field);
       break;
     case ::arrow::Type::BOOL:
       RETURN_NOT_OK(TransferBool(reader, value_field->nullable(), pool, &result));
@@ -857,7 +928,8 @@ Status TransferColumnData(RecordReader* reader, const std::shared_ptr<Field>& va
           case ::arrow::TimeUnit::MILLI:
           case ::arrow::TimeUnit::MICRO:
           case ::arrow::TimeUnit::NANO:
-            result = TransferZeroCopy(reader, value_field);
+            result = TransferZeroCopy<::arrow::Int64Type, Int64Type>(
+                reader, std::move(metadata), ctx, value_field);
             break;
           default:
             return Status::NotImplemented("TimeUnit not supported");
diff --git a/cpp/src/parquet/arrow/reader_internal.h b/cpp/src/parquet/arrow/reader_internal.h
index cf9dbb86577b5..fab56c888045d 100644
--- a/cpp/src/parquet/arrow/reader_internal.h
+++ b/cpp/src/parquet/arrow/reader_internal.h
@@ -66,7 +66,8 @@ class FileColumnIterator {
       : column_index_(column_index),
         reader_(reader),
         schema_(reader->metadata()->schema()),
-        row_groups_(row_groups.begin(), row_groups.end()) {}
+        row_groups_(row_groups.begin(), row_groups.end()),
+        row_group_index_(-1) {}
 
   virtual ~FileColumnIterator() {}
 
@@ -75,7 +76,8 @@ class FileColumnIterator {
       return nullptr;
     }
 
-    auto row_group_reader = reader_->RowGroup(row_groups_.front());
+    row_group_index_ = row_groups_.front();
+    auto row_group_reader = reader_->RowGroup(row_group_index_);
     row_groups_.pop_front();
     return row_group_reader->GetColumnPageReader(column_index_);
   }
@@ -86,23 +88,29 @@ class FileColumnIterator {
 
   std::shared_ptr<FileMetaData> metadata() const { return reader_->metadata(); }
 
+  std::unique_ptr<RowGroupMetaData> row_group_metadata() const {
+    return metadata()->RowGroup(row_group_index_);
+  }
+
+  std::unique_ptr<ColumnChunkMetaData> column_chunk_metadata() const {
+    return row_group_metadata()->ColumnChunk(column_index_);
+  }
+
   int column_index() const { return column_index_; }
 
+  int row_group_index() const { return row_group_index_; }
+
  protected:
   int column_index_;
   ParquetFileReader* reader_;
   const SchemaDescriptor* schema_;
   std::deque<int> row_groups_;
+  int row_group_index_;
 };
 
 using FileColumnIteratorFactory =
     std::function<FileColumnIterator*(int, ParquetFileReader*)>;
 
-Status TransferColumnData(::parquet::internal::RecordReader* reader,
-                          const std::shared_ptr<::arrow::Field>& value_field,
-                          const ColumnDescriptor* descr, ::arrow::MemoryPool* pool,
-                          std::shared_ptr<::arrow::ChunkedArray>* out);
-
 struct ReaderContext {
   ParquetFileReader* reader;
   ::arrow::MemoryPool* pool;
@@ -118,5 +126,11 @@ struct ReaderContext {
   }
 };
 
+Status TransferColumnData(::parquet::internal::RecordReader* reader,
+                          std::unique_ptr<::parquet::ColumnChunkMetaData> metadata,
+                          const std::shared_ptr<::arrow::Field>& value_field,
+                          const ColumnDescriptor* descr, const ReaderContext* ctx,
+                          std::shared_ptr<::arrow::ChunkedArray>* out);
+
 }  // namespace arrow
 }  // namespace parquet
diff --git a/cpp/src/parquet/arrow/reader_writer_benchmark.cc b/cpp/src/parquet/arrow/reader_writer_benchmark.cc
index 95c4a659297d9..283b113dfe992 100644
--- a/cpp/src/parquet/arrow/reader_writer_benchmark.cc
+++ b/cpp/src/parquet/arrow/reader_writer_benchmark.cc
@@ -20,6 +20,7 @@
 #include <array>
 #include <iostream>
 #include <random>
+#include <type_traits>
 
 #include "parquet/arrow/reader.h"
 #include "parquet/arrow/writer.h"
@@ -28,6 +29,7 @@
 #include "parquet/file_reader.h"
 #include "parquet/file_writer.h"
 #include "parquet/platform.h"
+#include "parquet/properties.h"
 
 #include "arrow/array.h"
 #include "arrow/array/builder_primitive.h"
@@ -36,6 +38,7 @@
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/util/async_generator.h"
+#include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_ops.h"
 #include "arrow/util/logging.h"
 
@@ -44,6 +47,7 @@ using arrow::ArrayVector;
 using arrow::BooleanBuilder;
 using arrow::FieldVector;
 using arrow::NumericBuilder;
+using arrow::Table;
 
 #define EXIT_NOT_OK(s)                                        \
   do {                                                        \
@@ -88,6 +92,11 @@ struct benchmark_traits<BooleanType> {
   using arrow_type = ::arrow::BooleanType;
 };
 
+template <>
+struct benchmark_traits<Float16LogicalType> {
+  using arrow_type = ::arrow::HalfFloatType;
+};
+
 template <typename ParquetType>
 using ArrowType = typename benchmark_traits<ParquetType>::arrow_type;
 
@@ -98,13 +107,28 @@ std::shared_ptr<ColumnDescriptor> MakeSchema(Repetition::type repetition) {
                                             repetition == Repetition::REPEATED);
 }
 
-template <bool nullable, typename ParquetType>
+template <typename ParquetType>
+int64_t BytesForItems(int64_t num_items) {
+  static_assert(!std::is_same_v<ParquetType, FLBAType>,
+                "BytesForItems unsupported for FLBAType");
+  return num_items * sizeof(typename ParquetType::c_type);
+}
+
+template <>
+int64_t BytesForItems<BooleanType>(int64_t num_items) {
+  return ::arrow::bit_util::BytesForBits(num_items);
+}
+
+template <>
+int64_t BytesForItems<Float16LogicalType>(int64_t num_items) {
+  return num_items * sizeof(uint16_t);
+}
+
+template <typename ParquetType>
 void SetBytesProcessed(::benchmark::State& state, int64_t num_values = BENCHMARK_SIZE) {
   const int64_t items_processed = state.iterations() * num_values;
-  const int64_t bytes_processed = items_processed * sizeof(typename ParquetType::c_type);
-
-  state.SetItemsProcessed(bytes_processed);
-  state.SetBytesProcessed(bytes_processed);
+  state.SetItemsProcessed(items_processed);
+  state.SetBytesProcessed(BytesForItems<ParquetType>(items_processed));
 }
 
 constexpr int64_t kAlternatingOrNa = -1;
@@ -125,15 +149,15 @@ std::vector<T> RandomVector(int64_t true_percentage, int64_t vector_size,
   return values;
 }
 
-template <typename ParquetType>
-std::shared_ptr<::arrow::Table> TableFromVector(
-    const std::vector<typename ParquetType::c_type>& vec, bool nullable,
-    int64_t null_percentage = kAlternatingOrNa) {
+template <typename ParquetType, typename ArrowType = ArrowType<ParquetType>>
+std::shared_ptr<Table> TableFromVector(const std::vector<typename ArrowType::c_type>& vec,
+                                       bool nullable,
+                                       int64_t null_percentage = kAlternatingOrNa) {
   if (!nullable) {
     ARROW_CHECK_EQ(null_percentage, kAlternatingOrNa);
   }
-  std::shared_ptr<::arrow::DataType> type = std::make_shared<ArrowType<ParquetType>>();
-  NumericBuilder<ArrowType<ParquetType>> builder;
+  std::shared_ptr<::arrow::DataType> type = std::make_shared<ArrowType>();
+  NumericBuilder<ArrowType> builder;
   if (nullable) {
     // Note true values select index 1 of sample_values
     auto valid_bytes = RandomVector<uint8_t>(/*true_percentage=*/null_percentage,
@@ -147,13 +171,12 @@ std::shared_ptr<::arrow::Table> TableFromVector(
 
   auto field = ::arrow::field("column", type, nullable);
   auto schema = ::arrow::schema({field});
-  return ::arrow::Table::Make(schema, {array});
+  return Table::Make(schema, {array});
 }
 
 template <>
-std::shared_ptr<::arrow::Table> TableFromVector<BooleanType>(const std::vector<bool>& vec,
-                                                             bool nullable,
-                                                             int64_t null_percentage) {
+std::shared_ptr<Table> TableFromVector<BooleanType, ::arrow::BooleanType>(
+    const std::vector<bool>& vec, bool nullable, int64_t null_percentage) {
   BooleanBuilder builder;
   if (nullable) {
     auto valid_bytes = RandomVector<bool>(/*true_percentage=*/null_percentage, vec.size(),
@@ -168,21 +191,21 @@ std::shared_ptr<::arrow::Table> TableFromVector<BooleanType>(const std::vector<b
   auto field = ::arrow::field("column", ::arrow::boolean(), nullable);
   auto schema = std::make_shared<::arrow::Schema>(
       std::vector<std::shared_ptr<::arrow::Field>>({field}));
-  return ::arrow::Table::Make(schema, {array});
+  return Table::Make(schema, {array});
 }
 
 template <bool nullable, typename ParquetType>
 static void BM_WriteColumn(::benchmark::State& state) {
   using T = typename ParquetType::c_type;
   std::vector<T> values(BENCHMARK_SIZE, static_cast<T>(128));
-  std::shared_ptr<::arrow::Table> table = TableFromVector<ParquetType>(values, nullable);
+  std::shared_ptr<Table> table = TableFromVector<ParquetType>(values, nullable);
 
   while (state.KeepRunning()) {
     auto output = CreateOutputStream();
     EXIT_NOT_OK(
         WriteTable(*table, ::arrow::default_memory_pool(), output, BENCHMARK_SIZE));
   }
-  SetBytesProcessed<nullable, ParquetType>(state);
+  SetBytesProcessed<ParquetType>(state);
 }
 
 BENCHMARK_TEMPLATE2(BM_WriteColumn, false, Int32Type);
@@ -199,8 +222,8 @@ BENCHMARK_TEMPLATE2(BM_WriteColumn, true, BooleanType);
 
 int32_t kInfiniteUniqueValues = -1;
 
-std::shared_ptr<::arrow::Table> RandomStringTable(int64_t length, int64_t unique_values,
-                                                  int64_t null_percentage) {
+std::shared_ptr<Table> RandomStringTable(int64_t length, int64_t unique_values,
+                                         int64_t null_percentage) {
   std::shared_ptr<::arrow::DataType> type = ::arrow::utf8();
   std::shared_ptr<::arrow::Array> arr;
   ::arrow::random::RandomArrayGenerator generator(/*seed=*/500);
@@ -213,12 +236,12 @@ std::shared_ptr<::arrow::Table> RandomStringTable(int64_t length, int64_t unique
                                       /*min_length=*/3, /*max_length=*/32,
                                       /*null_probability=*/null_probability);
   }
-  return ::arrow::Table::Make(
+  return Table::Make(
       ::arrow::schema({::arrow::field("column", type, null_percentage > 0)}), {arr});
 }
 
 static void BM_WriteBinaryColumn(::benchmark::State& state) {
-  std::shared_ptr<::arrow::Table> table =
+  std::shared_ptr<Table> table =
       RandomStringTable(BENCHMARK_SIZE, state.range(1), state.range(0));
 
   while (state.KeepRunning()) {
@@ -257,20 +280,22 @@ struct Examples<bool> {
   static constexpr std::array<bool, 2> values() { return {false, true}; }
 };
 
-static void BenchmarkReadTable(::benchmark::State& state, const ::arrow::Table& table,
+static void BenchmarkReadTable(::benchmark::State& state, const Table& table,
+                               std::shared_ptr<WriterProperties> properties,
                                int64_t num_values = -1, int64_t total_bytes = -1) {
   auto output = CreateOutputStream();
-  EXIT_NOT_OK(
-      WriteTable(table, ::arrow::default_memory_pool(), output, table.num_rows()));
+  EXIT_NOT_OK(WriteTable(table, ::arrow::default_memory_pool(), output,
+                         /*chunk_size=*/table.num_rows(), properties));
   PARQUET_ASSIGN_OR_THROW(auto buffer, output->Finish());
 
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     auto reader =
         ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(buffer));
     std::unique_ptr<FileReader> arrow_reader;
     EXIT_NOT_OK(FileReader::Make(::arrow::default_memory_pool(), std::move(reader),
                                  &arrow_reader));
-    std::shared_ptr<::arrow::Table> table;
+
+    std::shared_ptr<Table> table;
     EXIT_NOT_OK(arrow_reader->ReadTable(&table));
   }
 
@@ -283,19 +308,32 @@ static void BenchmarkReadTable(::benchmark::State& state, const ::arrow::Table&
   }
 }
 
+static void BenchmarkReadTable(::benchmark::State& state, const Table& table,
+                               int64_t num_values = -1, int64_t total_bytes = -1) {
+  BenchmarkReadTable(state, table, default_writer_properties(), num_values, total_bytes);
+}
+
 static void BenchmarkReadArray(::benchmark::State& state,
                                const std::shared_ptr<Array>& array, bool nullable,
+                               std::shared_ptr<WriterProperties> properties,
                                int64_t num_values = -1, int64_t total_bytes = -1) {
   auto schema = ::arrow::schema({field("s", array->type(), nullable)});
-  auto table = ::arrow::Table::Make(schema, {array}, array->length());
+  auto table = Table::Make(schema, {array}, array->length());
 
   EXIT_NOT_OK(table->Validate());
 
   BenchmarkReadTable(state, *table, num_values, total_bytes);
 }
 
+static void BenchmarkReadArray(::benchmark::State& state,
+                               const std::shared_ptr<Array>& array, bool nullable,
+                               int64_t num_values = -1, int64_t total_bytes = -1) {
+  BenchmarkReadArray(state, array, nullable, default_writer_properties(), num_values,
+                     total_bytes);
+}
+
 //
-// Benchmark reading a primitive column
+// Benchmark reading a dict-encoded primitive column
 //
 
 template <bool nullable, typename ParquetType>
@@ -305,19 +343,22 @@ static void BM_ReadColumn(::benchmark::State& state) {
   auto values = RandomVector<T>(/*percentage=*/state.range(1), BENCHMARK_SIZE,
                                 Examples<T>::values());
 
-  std::shared_ptr<::arrow::Table> table =
+  std::shared_ptr<Table> table =
       TableFromVector<ParquetType>(values, nullable, state.range(0));
 
-  BenchmarkReadTable(state, *table, table->num_rows(),
-                     sizeof(typename ParquetType::c_type) * table->num_rows());
+  auto properties = WriterProperties::Builder().disable_dictionary()->build();
+
+  BenchmarkReadTable(state, *table, properties, table->num_rows(),
+                     BytesForItems<ParquetType>(table->num_rows()));
 }
 
 // There are two parameters here that cover different data distributions.
 // null_percentage governs distribution and therefore runs of null values.
 // first_value_percentage governs distribution of values (we select from 1 of 2)
 // so when 0 or 100 RLE is triggered all the time.  When a value in the range (0, 100)
-// there will be some percentage of RLE encoded values and some percentage of literal
-// encoded values (RLE is much less likely with percentages close to 50).
+// there will be some percentage of RLE-encoded dictionary indices and some
+// percentage of literal encoded dictionary indices
+// (RLE is much less likely with percentages close to 50).
 BENCHMARK_TEMPLATE2(BM_ReadColumn, false, Int32Type)
     ->Args({/*null_percentage=*/kAlternatingOrNa, 1})
     ->Args({/*null_percentage=*/kAlternatingOrNa, 10})
@@ -325,6 +366,7 @@ BENCHMARK_TEMPLATE2(BM_ReadColumn, false, Int32Type)
 
 BENCHMARK_TEMPLATE2(BM_ReadColumn, true, Int32Type)
     ->Args({/*null_percentage=*/kAlternatingOrNa, /*first_value_percentage=*/0})
+    ->Args({/*null_percentage=*/0, /*first_value_percentage=*/1})
     ->Args({/*null_percentage=*/1, /*first_value_percentage=*/1})
     ->Args({/*null_percentage=*/10, /*first_value_percentage=*/10})
     ->Args({/*null_percentage=*/25, /*first_value_percentage=*/5})
@@ -369,12 +411,51 @@ BENCHMARK_TEMPLATE2(BM_ReadColumn, true, BooleanType)
     ->Args({kAlternatingOrNa, 1})
     ->Args({5, 10});
 
+//
+// Benchmark reading a PLAIN-encoded primitive column
+//
+
+template <bool nullable, typename ParquetType>
+static void BM_ReadColumnPlain(::benchmark::State& state) {
+  using c_type = typename ArrowType<ParquetType>::c_type;
+
+  const std::vector<c_type> values(BENCHMARK_SIZE, static_cast<c_type>(42));
+  std::shared_ptr<Table> table =
+      TableFromVector<ParquetType>(values, /*nullable=*/nullable, state.range(0));
+
+  auto properties = WriterProperties::Builder().disable_dictionary()->build();
+  BenchmarkReadTable(state, *table, properties, table->num_rows(),
+                     BytesForItems<ParquetType>(table->num_rows()));
+}
+
+BENCHMARK_TEMPLATE2(BM_ReadColumnPlain, false, Int32Type)
+    ->ArgNames({"null_probability"})
+    ->Args({kAlternatingOrNa});
+BENCHMARK_TEMPLATE2(BM_ReadColumnPlain, true, Int32Type)
+    ->ArgNames({"null_probability"})
+    ->Args({0})
+    ->Args({1})
+    ->Args({50})
+    ->Args({99})
+    ->Args({100});
+
+BENCHMARK_TEMPLATE2(BM_ReadColumnPlain, false, Float16LogicalType)
+    ->ArgNames({"null_probability"})
+    ->Args({kAlternatingOrNa});
+BENCHMARK_TEMPLATE2(BM_ReadColumnPlain, true, Float16LogicalType)
+    ->ArgNames({"null_probability"})
+    ->Args({0})
+    ->Args({1})
+    ->Args({50})
+    ->Args({99})
+    ->Args({100});
+
 //
 // Benchmark reading binary column
 //
 
 static void BM_ReadBinaryColumn(::benchmark::State& state) {
-  std::shared_ptr<::arrow::Table> table =
+  std::shared_ptr<Table> table =
       RandomStringTable(BENCHMARK_SIZE, state.range(1), state.range(0));
 
   // Offsets + data
@@ -572,7 +653,7 @@ BENCHMARK(BM_ReadListOfListColumn)->Apply(NestedReadArguments);
 
 static void BM_ReadIndividualRowGroups(::benchmark::State& state) {
   std::vector<int64_t> values(BENCHMARK_SIZE, 128);
-  std::shared_ptr<::arrow::Table> table = TableFromVector<Int64Type>(values, true);
+  std::shared_ptr<Table> table = TableFromVector<Int64Type>(values, true);
   auto output = CreateOutputStream();
   // This writes 10 RowGroups
   EXIT_NOT_OK(
@@ -587,27 +668,27 @@ static void BM_ReadIndividualRowGroups(::benchmark::State& state) {
     EXIT_NOT_OK(FileReader::Make(::arrow::default_memory_pool(), std::move(reader),
                                  &arrow_reader));
 
-    std::vector<std::shared_ptr<::arrow::Table>> tables;
+    std::vector<std::shared_ptr<Table>> tables;
     for (int i = 0; i < arrow_reader->num_row_groups(); i++) {
       // Only read the even numbered RowGroups
       if ((i % 2) == 0) {
-        std::shared_ptr<::arrow::Table> table;
+        std::shared_ptr<Table> table;
         EXIT_NOT_OK(arrow_reader->RowGroup(i)->ReadTable(&table));
         tables.push_back(table);
       }
     }
 
-    std::shared_ptr<::arrow::Table> final_table;
+    std::shared_ptr<Table> final_table;
     PARQUET_ASSIGN_OR_THROW(final_table, ConcatenateTables(tables));
   }
-  SetBytesProcessed<true, Int64Type>(state);
+  SetBytesProcessed<Int64Type>(state);
 }
 
 BENCHMARK(BM_ReadIndividualRowGroups);
 
 static void BM_ReadMultipleRowGroups(::benchmark::State& state) {
   std::vector<int64_t> values(BENCHMARK_SIZE, 128);
-  std::shared_ptr<::arrow::Table> table = TableFromVector<Int64Type>(values, true);
+  std::shared_ptr<Table> table = TableFromVector<Int64Type>(values, true);
   auto output = CreateOutputStream();
   // This writes 10 RowGroups
   EXIT_NOT_OK(
@@ -621,17 +702,17 @@ static void BM_ReadMultipleRowGroups(::benchmark::State& state) {
     std::unique_ptr<FileReader> arrow_reader;
     EXIT_NOT_OK(FileReader::Make(::arrow::default_memory_pool(), std::move(reader),
                                  &arrow_reader));
-    std::shared_ptr<::arrow::Table> table;
+    std::shared_ptr<Table> table;
     EXIT_NOT_OK(arrow_reader->ReadRowGroups(rgs, &table));
   }
-  SetBytesProcessed<true, Int64Type>(state);
+  SetBytesProcessed<Int64Type>(state);
 }
 
 BENCHMARK(BM_ReadMultipleRowGroups);
 
 static void BM_ReadMultipleRowGroupsGenerator(::benchmark::State& state) {
   std::vector<int64_t> values(BENCHMARK_SIZE, 128);
-  std::shared_ptr<::arrow::Table> table = TableFromVector<Int64Type>(values, true);
+  std::shared_ptr<Table> table = TableFromVector<Int64Type>(values, true);
   auto output = CreateOutputStream();
   // This writes 10 RowGroups
   EXIT_NOT_OK(
@@ -650,9 +731,9 @@ static void BM_ReadMultipleRowGroupsGenerator(::benchmark::State& state) {
                     arrow_reader->GetRecordBatchGenerator(arrow_reader, rgs, {0}));
     auto fut = ::arrow::CollectAsyncGenerator(generator);
     ASSIGN_OR_ABORT(auto batches, fut.result());
-    ASSIGN_OR_ABORT(auto actual, ::arrow::Table::FromRecordBatches(std::move(batches)));
+    ASSIGN_OR_ABORT(auto actual, Table::FromRecordBatches(std::move(batches)));
   }
-  SetBytesProcessed<true, Int64Type>(state);
+  SetBytesProcessed<Int64Type>(state);
 }
 
 BENCHMARK(BM_ReadMultipleRowGroupsGenerator);
diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc
index ec3890a41f442..1623d80dcb0e4 100644
--- a/cpp/src/parquet/arrow/schema.cc
+++ b/cpp/src/parquet/arrow/schema.cc
@@ -21,6 +21,7 @@
 #include <string>
 #include <vector>
 
+#include "arrow/extension/json.h"
 #include "arrow/extension_type.h"
 #include "arrow/io/memory.h"
 #include "arrow/ipc/api.h"
@@ -427,6 +428,13 @@ Status FieldToNode(const std::string& name, const std::shared_ptr<Field>& field,
     }
     case ArrowTypeId::EXTENSION: {
       auto ext_type = std::static_pointer_cast<::arrow::ExtensionType>(field->type());
+      // Built-in JSON extension is handled differently.
+      if (ext_type->extension_name() == std::string("arrow.json")) {
+        // Set physical and logical types and instantiate primitive node.
+        type = ParquetType::BYTE_ARRAY;
+        logical_type = LogicalType::JSON();
+        break;
+      }
       std::shared_ptr<::arrow::Field> storage_field = ::arrow::field(
           name, ext_type->storage_type(), field->nullable(), field->metadata());
       return FieldToNode(name, storage_field, properties, arrow_properties, out);
@@ -438,7 +446,7 @@ Status FieldToNode(const std::string& name, const std::shared_ptr<Field>& field,
     }
 
     default: {
-      // TODO: DENSE_UNION, SPARE_UNION, JSON_SCALAR, DECIMAL_TEXT, VARCHAR
+      // TODO: DENSE_UNION, SPARE_UNION, DECIMAL_TEXT, VARCHAR
       return Status::NotImplemented(
           "Unhandled type for Arrow to Parquet schema conversion: ",
           field->type()->ToString());
@@ -476,9 +484,8 @@ bool IsDictionaryReadSupported(const ArrowType& type) {
 ::arrow::Result<std::shared_ptr<ArrowType>> GetTypeForNode(
     int column_index, const schema::PrimitiveNode& primitive_node,
     SchemaTreeContext* ctx) {
-  ASSIGN_OR_RAISE(
-      std::shared_ptr<ArrowType> storage_type,
-      GetArrowType(primitive_node, ctx->properties.coerce_int96_timestamp_unit()));
+  ASSIGN_OR_RAISE(std::shared_ptr<ArrowType> storage_type,
+                  GetArrowType(primitive_node, ctx->properties));
   if (ctx->properties.read_dictionary(column_index) &&
       IsDictionaryReadSupported(*storage_type)) {
     return ::arrow::dictionary(::arrow::int32(), storage_type);
@@ -984,18 +991,35 @@ Result<bool> ApplyOriginalMetadata(const Field& origin_field, SchemaField* infer
   bool modified = false;
 
   auto& origin_type = origin_field.type();
+  const auto& inferred_type = inferred->field->type();
 
   if (origin_type->id() == ::arrow::Type::EXTENSION) {
     const auto& ex_type = checked_cast<const ::arrow::ExtensionType&>(*origin_type);
-    auto origin_storage_field = origin_field.WithType(ex_type.storage_type());
+    if (inferred_type->id() != ::arrow::Type::EXTENSION &&
+        ex_type.extension_name() == std::string("arrow.json") &&
+        (inferred_type->id() == ::arrow::Type::STRING ||
+         inferred_type->id() == ::arrow::Type::LARGE_STRING ||
+         inferred_type->id() == ::arrow::Type::STRING_VIEW)) {
+      // Schema mismatch.
+      //
+      // Arrow extensions are DISABLED in Parquet.
+      // origin_type is ::arrow::extension::json()
+      // inferred_type is ::arrow::utf8()
+      //
+      // Origin type is restored as Arrow should be considered the source of truth.
+      inferred->field = inferred->field->WithType(origin_type);
+      RETURN_NOT_OK(ApplyOriginalStorageMetadata(origin_field, inferred));
+    } else {
+      auto origin_storage_field = origin_field.WithType(ex_type.storage_type());
 
-    // Apply metadata recursively to storage type
-    RETURN_NOT_OK(ApplyOriginalStorageMetadata(*origin_storage_field, inferred));
+      // Apply metadata recursively to storage type
+      RETURN_NOT_OK(ApplyOriginalStorageMetadata(*origin_storage_field, inferred));
 
-    // Restore extension type, if the storage type is the same as inferred
-    // from the Parquet type
-    if (ex_type.storage_type()->Equals(*inferred->field->type())) {
-      inferred->field = inferred->field->WithType(origin_type);
+      // Restore extension type, if the storage type is the same as inferred
+      // from the Parquet type
+      if (ex_type.storage_type()->Equals(*inferred->field->type())) {
+        inferred->field = inferred->field->WithType(origin_type);
+      }
     }
     modified = true;
   } else {
diff --git a/cpp/src/parquet/arrow/schema_internal.cc b/cpp/src/parquet/arrow/schema_internal.cc
index a8e2a95b9b97d..261a00940654d 100644
--- a/cpp/src/parquet/arrow/schema_internal.cc
+++ b/cpp/src/parquet/arrow/schema_internal.cc
@@ -17,8 +17,11 @@
 
 #include "parquet/arrow/schema_internal.h"
 
+#include "arrow/extension/json.h"
 #include "arrow/type.h"
 
+#include "parquet/properties.h"
+
 using ArrowType = ::arrow::DataType;
 using ArrowTypeId = ::arrow::Type;
 using ParquetType = parquet::Type;
@@ -107,7 +110,8 @@ Result<std::shared_ptr<ArrowType>> MakeArrowTimestamp(const LogicalType& logical
   }
 }
 
-Result<std::shared_ptr<ArrowType>> FromByteArray(const LogicalType& logical_type) {
+Result<std::shared_ptr<ArrowType>> FromByteArray(
+    const LogicalType& logical_type, const ArrowReaderProperties& reader_properties) {
   switch (logical_type.type()) {
     case LogicalType::Type::STRING:
       return ::arrow::utf8();
@@ -115,9 +119,15 @@ Result<std::shared_ptr<ArrowType>> FromByteArray(const LogicalType& logical_type
       return MakeArrowDecimal(logical_type);
     case LogicalType::Type::NONE:
     case LogicalType::Type::ENUM:
-    case LogicalType::Type::JSON:
     case LogicalType::Type::BSON:
       return ::arrow::binary();
+    case LogicalType::Type::JSON:
+      if (reader_properties.get_arrow_extensions_enabled()) {
+        return ::arrow::extension::json(::arrow::utf8());
+      }
+      // When the original Arrow schema isn't stored and Arrow extensions are disabled,
+      // LogicalType::JSON is read as utf8().
+      return ::arrow::utf8();
     default:
       return Status::NotImplemented("Unhandled logical logical_type ",
                                     logical_type.ToString(), " for binary array");
@@ -180,7 +190,7 @@ Result<std::shared_ptr<ArrowType>> FromInt64(const LogicalType& logical_type) {
 
 Result<std::shared_ptr<ArrowType>> GetArrowType(
     Type::type physical_type, const LogicalType& logical_type, int type_length,
-    const ::arrow::TimeUnit::type int96_arrow_time_unit) {
+    const ArrowReaderProperties& reader_properties) {
   if (logical_type.is_invalid() || logical_type.is_null()) {
     return ::arrow::null();
   }
@@ -193,13 +203,13 @@ Result<std::shared_ptr<ArrowType>> GetArrowType(
     case ParquetType::INT64:
       return FromInt64(logical_type);
     case ParquetType::INT96:
-      return ::arrow::timestamp(int96_arrow_time_unit);
+      return ::arrow::timestamp(reader_properties.coerce_int96_timestamp_unit());
     case ParquetType::FLOAT:
       return ::arrow::float32();
     case ParquetType::DOUBLE:
       return ::arrow::float64();
     case ParquetType::BYTE_ARRAY:
-      return FromByteArray(logical_type);
+      return FromByteArray(logical_type, reader_properties);
     case ParquetType::FIXED_LEN_BYTE_ARRAY:
       return FromFLBA(logical_type, type_length);
     default: {
@@ -212,9 +222,9 @@ Result<std::shared_ptr<ArrowType>> GetArrowType(
 
 Result<std::shared_ptr<ArrowType>> GetArrowType(
     const schema::PrimitiveNode& primitive,
-    const ::arrow::TimeUnit::type int96_arrow_time_unit) {
+    const ArrowReaderProperties& reader_properties) {
   return GetArrowType(primitive.physical_type(), *primitive.logical_type(),
-                      primitive.type_length(), int96_arrow_time_unit);
+                      primitive.type_length(), reader_properties);
 }
 
 }  // namespace parquet::arrow
diff --git a/cpp/src/parquet/arrow/schema_internal.h b/cpp/src/parquet/arrow/schema_internal.h
index f56ba0958ae2d..58828f85ab8e3 100644
--- a/cpp/src/parquet/arrow/schema_internal.h
+++ b/cpp/src/parquet/arrow/schema_internal.h
@@ -18,6 +18,7 @@
 #pragma once
 
 #include "arrow/result.h"
+#include "arrow/type_fwd.h"
 #include "parquet/schema.h"
 
 namespace arrow {
@@ -28,7 +29,8 @@ namespace parquet::arrow {
 
 using ::arrow::Result;
 
-Result<std::shared_ptr<::arrow::DataType>> FromByteArray(const LogicalType& logical_type);
+Result<std::shared_ptr<::arrow::DataType>> FromByteArray(const LogicalType& logical_type,
+                                                         bool use_known_arrow_extensions);
 Result<std::shared_ptr<::arrow::DataType>> FromFLBA(const LogicalType& logical_type,
                                                     int32_t physical_length);
 Result<std::shared_ptr<::arrow::DataType>> FromInt32(const LogicalType& logical_type);
@@ -36,10 +38,10 @@ Result<std::shared_ptr<::arrow::DataType>> FromInt64(const LogicalType& logical_
 
 Result<std::shared_ptr<::arrow::DataType>> GetArrowType(
     Type::type physical_type, const LogicalType& logical_type, int type_length,
-    ::arrow::TimeUnit::type int96_arrow_time_unit = ::arrow::TimeUnit::NANO);
+    const ArrowReaderProperties& reader_properties);
 
 Result<std::shared_ptr<::arrow::DataType>> GetArrowType(
     const schema::PrimitiveNode& primitive,
-    ::arrow::TimeUnit::type int96_arrow_time_unit = ::arrow::TimeUnit::NANO);
+    const ArrowReaderProperties& reader_properties);
 
 }  // namespace parquet::arrow
diff --git a/cpp/src/parquet/arrow/writer.cc b/cpp/src/parquet/arrow/writer.cc
index 4fd7ef1b47b39..463713df1b1aa 100644
--- a/cpp/src/parquet/arrow/writer.cc
+++ b/cpp/src/parquet/arrow/writer.cc
@@ -523,16 +523,6 @@ Status FileWriter::Make(::arrow::MemoryPool* pool,
   return Status::OK();
 }
 
-Status FileWriter::Open(const ::arrow::Schema& schema, ::arrow::MemoryPool* pool,
-                        std::shared_ptr<::arrow::io::OutputStream> sink,
-                        std::shared_ptr<WriterProperties> properties,
-                        std::unique_ptr<FileWriter>* writer) {
-  ARROW_ASSIGN_OR_RAISE(
-      *writer, Open(std::move(schema), pool, std::move(sink), std::move(properties),
-                    default_arrow_writer_properties()));
-  return Status::OK();
-}
-
 Status GetSchemaMetadata(const ::arrow::Schema& schema, ::arrow::MemoryPool* pool,
                          const ArrowWriterProperties& properties,
                          std::shared_ptr<const KeyValueMetadata>* out) {
@@ -560,16 +550,6 @@ Status GetSchemaMetadata(const ::arrow::Schema& schema, ::arrow::MemoryPool* poo
   return Status::OK();
 }
 
-Status FileWriter::Open(const ::arrow::Schema& schema, ::arrow::MemoryPool* pool,
-                        std::shared_ptr<::arrow::io::OutputStream> sink,
-                        std::shared_ptr<WriterProperties> properties,
-                        std::shared_ptr<ArrowWriterProperties> arrow_properties,
-                        std::unique_ptr<FileWriter>* writer) {
-  ARROW_ASSIGN_OR_RAISE(*writer, Open(std::move(schema), pool, std::move(sink),
-                                      std::move(properties), arrow_properties));
-  return Status::OK();
-}
-
 Result<std::unique_ptr<FileWriter>> FileWriter::Open(
     const ::arrow::Schema& schema, ::arrow::MemoryPool* pool,
     std::shared_ptr<::arrow::io::OutputStream> sink,
diff --git a/cpp/src/parquet/arrow/writer.h b/cpp/src/parquet/arrow/writer.h
index 4a1a033a7b7b8..4e1ddafd9a082 100644
--- a/cpp/src/parquet/arrow/writer.h
+++ b/cpp/src/parquet/arrow/writer.h
@@ -74,18 +74,6 @@ class PARQUET_EXPORT FileWriter {
       std::shared_ptr<ArrowWriterProperties> arrow_properties =
           default_arrow_writer_properties());
 
-  ARROW_DEPRECATED("Deprecated in 11.0.0. Use Result-returning variants instead.")
-  static ::arrow::Status Open(const ::arrow::Schema& schema, MemoryPool* pool,
-                              std::shared_ptr<::arrow::io::OutputStream> sink,
-                              std::shared_ptr<WriterProperties> properties,
-                              std::unique_ptr<FileWriter>* writer);
-  ARROW_DEPRECATED("Deprecated in 11.0.0. Use Result-returning variants instead.")
-  static ::arrow::Status Open(const ::arrow::Schema& schema, MemoryPool* pool,
-                              std::shared_ptr<::arrow::io::OutputStream> sink,
-                              std::shared_ptr<WriterProperties> properties,
-                              std::shared_ptr<ArrowWriterProperties> arrow_properties,
-                              std::unique_ptr<FileWriter>* writer);
-
   /// Return the Arrow schema to be written to.
   virtual std::shared_ptr<::arrow::Schema> schema() const = 0;
 
diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc
index 60a8a2176b0a8..3ffc6f720061f 100644
--- a/cpp/src/parquet/column_reader.cc
+++ b/cpp/src/parquet/column_reader.cc
@@ -75,24 +75,6 @@ constexpr int64_t kMinLevelBatchSize = 1024;
 // Both RecordReader and the ColumnReader use this for skipping.
 constexpr int64_t kSkipScratchBatchSize = 1024;
 
-inline bool HasSpacedValues(const ColumnDescriptor* descr) {
-  if (descr->max_repetition_level() > 0) {
-    // repeated+flat case
-    return !descr->schema_node()->is_required();
-  } else {
-    // non-repeated+nested case
-    // Find if a node forces nulls in the lowest level along the hierarchy
-    const schema::Node* node = descr->schema_node().get();
-    while (node) {
-      if (node->is_optional()) {
-        return true;
-      }
-      node = node->parent();
-    }
-    return false;
-  }
-}
-
 // Throws exception if number_decoded does not match expected.
 inline void CheckNumberDecoded(int64_t number_decoded, int64_t expected) {
   if (ARROW_PREDICT_FALSE(number_decoded != expected)) {
@@ -979,11 +961,6 @@ class TypedColumnReaderImpl : public TypedColumnReader<DType>,
   int64_t ReadBatch(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
                     T* values, int64_t* values_read) override;
 
-  int64_t ReadBatchSpaced(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
-                          T* values, uint8_t* valid_bits, int64_t valid_bits_offset,
-                          int64_t* levels_read, int64_t* values_read,
-                          int64_t* null_count) override;
-
   int64_t Skip(int64_t num_values_to_skip) override;
 
   Type::type type() const override { return this->descr_->physical_type(); }
@@ -1153,89 +1130,6 @@ int64_t TypedColumnReaderImpl<DType>::ReadBatch(int64_t batch_size, int16_t* def
   return total_values;
 }
 
-template <typename DType>
-int64_t TypedColumnReaderImpl<DType>::ReadBatchSpaced(
-    int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, T* values,
-    uint8_t* valid_bits, int64_t valid_bits_offset, int64_t* levels_read,
-    int64_t* values_read, int64_t* null_count_out) {
-  // HasNext might invoke ReadNewPage until a data page with
-  // `available_values_current_page() > 0` is found.
-  if (!HasNext()) {
-    *levels_read = 0;
-    *values_read = 0;
-    *null_count_out = 0;
-    return 0;
-  }
-
-  // Number of non-null values to read
-  int64_t total_values;
-  // TODO(wesm): keep reading data pages until batch_size is reached, or the
-  // row group is finished
-  batch_size = std::min(batch_size, this->available_values_current_page());
-
-  // If the field is required and non-repeated, there are no definition levels
-  if (this->max_def_level_ > 0) {
-    int64_t num_def_levels = this->ReadDefinitionLevels(batch_size, def_levels);
-    if (ARROW_PREDICT_FALSE(num_def_levels != batch_size)) {
-      throw ParquetException(kErrorRepDefLevelNotMatchesNumValues);
-    }
-
-    // Not present for non-repeated fields
-    if (this->max_rep_level_ > 0) {
-      int64_t num_rep_levels = this->ReadRepetitionLevels(batch_size, rep_levels);
-      if (ARROW_PREDICT_FALSE(num_def_levels != num_rep_levels)) {
-        throw ParquetException(kErrorRepDefLevelNotMatchesNumValues);
-      }
-    }
-
-    const bool has_spaced_values = HasSpacedValues(this->descr_);
-    int64_t null_count = 0;
-    if (!has_spaced_values) {
-      int64_t values_to_read =
-          std::count(def_levels, def_levels + num_def_levels, this->max_def_level_);
-      total_values = this->ReadValues(values_to_read, values);
-      ::arrow::bit_util::SetBitsTo(valid_bits, valid_bits_offset,
-                                   /*length=*/total_values,
-                                   /*bits_are_set=*/true);
-      *values_read = total_values;
-    } else {
-      internal::LevelInfo info;
-      info.repeated_ancestor_def_level = this->max_def_level_ - 1;
-      info.def_level = this->max_def_level_;
-      info.rep_level = this->max_rep_level_;
-      internal::ValidityBitmapInputOutput validity_io;
-      validity_io.values_read_upper_bound = num_def_levels;
-      validity_io.valid_bits = valid_bits;
-      validity_io.valid_bits_offset = valid_bits_offset;
-      validity_io.null_count = null_count;
-      validity_io.values_read = *values_read;
-
-      internal::DefLevelsToBitmap(def_levels, num_def_levels, info, &validity_io);
-      null_count = validity_io.null_count;
-      *values_read = validity_io.values_read;
-
-      total_values =
-          this->ReadValuesSpaced(*values_read, values, static_cast<int>(null_count),
-                                 valid_bits, valid_bits_offset);
-    }
-    *levels_read = num_def_levels;
-    *null_count_out = null_count;
-
-  } else {
-    // Required field, read all values
-    total_values = this->ReadValues(batch_size, values);
-    ::arrow::bit_util::SetBitsTo(valid_bits, valid_bits_offset,
-                                 /*length=*/total_values,
-                                 /*bits_are_set=*/true);
-    *null_count_out = 0;
-    *values_read = total_values;
-    *levels_read = total_values;
-  }
-
-  this->ConsumeBufferedValues(*levels_read);
-  return total_values;
-}
-
 template <typename DType>
 void TypedColumnReaderImpl<DType>::InitScratchForSkip() {
   if (this->scratch_for_skip_ == nullptr) {
@@ -2050,6 +1944,14 @@ class TypedRecordReader : public TypedColumnReaderImpl<DType>,
   LevelInfo leaf_info_;
 };
 
+/// In FLBARecordReader, we read fixed length byte array values.
+///
+/// Unlike other fixed length types, the `values_` buffer is not used to store
+/// values, instead we use `data_builder_` to store the values, and `null_bitmap_builder_`
+/// is used to store the null bitmap.
+///
+/// The `values_` buffer is used to store the temporary values for `Decode`, and it would
+/// be Reset after each `Decode` call. The `valid_bits_` buffer is never used.
 class FLBARecordReader final : public TypedRecordReader<FLBAType>,
                                virtual public BinaryRecordReader {
  public:
@@ -2134,6 +2036,13 @@ class FLBARecordReader final : public TypedRecordReader<FLBAType>,
   ::arrow::BufferBuilder data_builder_;
 };
 
+/// ByteArrayRecordReader reads variable length byte array values.
+///
+/// It only calls `DecodeArrowNonNull` and `DecodeArrow` to read values, and
+/// `Decode` and `DecodeSpaced` are not used.
+///
+/// The `values_` buffers are never used, and the `accumulator_`
+/// is used to store the values.
 class ByteArrayChunkedRecordReader final : public TypedRecordReader<ByteArrayType>,
                                            virtual public BinaryRecordReader {
  public:
@@ -2147,7 +2056,7 @@ class ByteArrayChunkedRecordReader final : public TypedRecordReader<ByteArrayTyp
 
   ::arrow::ArrayVector GetBuilderChunks() override {
     ::arrow::ArrayVector result = accumulator_.chunks;
-    if (result.size() == 0 || accumulator_.builder->length() > 0) {
+    if (result.empty() || accumulator_.builder->length() > 0) {
       std::shared_ptr<::arrow::Array> last_chunk;
       PARQUET_THROW_NOT_OK(accumulator_.builder->Finish(&last_chunk));
       result.push_back(std::move(last_chunk));
@@ -2176,6 +2085,11 @@ class ByteArrayChunkedRecordReader final : public TypedRecordReader<ByteArrayTyp
   typename EncodingTraits<ByteArrayType>::Accumulator accumulator_;
 };
 
+/// ByteArrayDictionaryRecordReader reads into ::arrow::dictionary(index: int32,
+/// values: binary).
+///
+/// If underlying column is dictionary encoded, it will call `DecodeIndices` to read,
+/// otherwise it will call `DecodeArrowNonNull` to read.
 class ByteArrayDictionaryRecordReader final : public TypedRecordReader<ByteArrayType>,
                                               virtual public DictionaryRecordReader {
  public:
@@ -2225,10 +2139,9 @@ class ByteArrayDictionaryRecordReader final : public TypedRecordReader<ByteArray
     } else {
       num_decoded = this->current_decoder_->DecodeArrowNonNull(
           static_cast<int>(values_to_read), &builder_);
-
-      /// Flush values since they have been copied into the builder
-      ResetValues();
     }
+    // Flush values since they have been copied into the builder
+    ResetValues();
     CheckNumberDecoded(num_decoded, values_to_read);
   }
 
@@ -2244,11 +2157,10 @@ class ByteArrayDictionaryRecordReader final : public TypedRecordReader<ByteArray
       num_decoded = this->current_decoder_->DecodeArrow(
           static_cast<int>(values_to_read), static_cast<int>(null_count),
           valid_bits_->mutable_data(), values_written_, &builder_);
-
-      /// Flush values since they have been copied into the builder
-      ResetValues();
     }
     ARROW_DCHECK_EQ(num_decoded, values_to_read - null_count);
+    // Flush values since they have been copied into the builder
+    ResetValues();
   }
 
  private:
diff --git a/cpp/src/parquet/column_reader.h b/cpp/src/parquet/column_reader.h
index 29e1b2a25e437..c31088c96cd84 100644
--- a/cpp/src/parquet/column_reader.h
+++ b/cpp/src/parquet/column_reader.h
@@ -219,48 +219,6 @@ class TypedColumnReader : public ColumnReader {
   virtual int64_t ReadBatch(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
                             T* values, int64_t* values_read) = 0;
 
-  /// Read a batch of repetition levels, definition levels, and values from the
-  /// column and leave spaces for null entries on the lowest level in the values
-  /// buffer.
-  ///
-  /// In comparison to ReadBatch the length of repetition and definition levels
-  /// is the same as of the number of values read for max_definition_level == 1.
-  /// In the case of max_definition_level > 1, the repetition and definition
-  /// levels are larger than the values but the values include the null entries
-  /// with definition_level == (max_definition_level - 1).
-  ///
-  /// To fully exhaust a row group, you must read batches until the number of
-  /// values read reaches the number of stored values according to the metadata.
-  ///
-  /// @param batch_size the number of levels to read
-  /// @param[out] def_levels The Parquet definition levels, output has
-  ///   the length levels_read.
-  /// @param[out] rep_levels The Parquet repetition levels, output has
-  ///   the length levels_read.
-  /// @param[out] values The values in the lowest nested level including
-  ///   spacing for nulls on the lowest levels; output has the length
-  ///   values_read.
-  /// @param[out] valid_bits Memory allocated for a bitmap that indicates if
-  ///   the row is null or on the maximum definition level. For performance
-  ///   reasons the underlying buffer should be able to store 1 bit more than
-  ///   required. If this requires an additional byte, this byte is only read
-  ///   but never written to.
-  /// @param valid_bits_offset The offset in bits of the valid_bits where the
-  ///   first relevant bit resides.
-  /// @param[out] levels_read The number of repetition/definition levels that were read.
-  /// @param[out] values_read The number of values read, this includes all
-  ///   non-null entries as well as all null-entries on the lowest level
-  ///   (i.e. definition_level == max_definition_level - 1)
-  /// @param[out] null_count The number of nulls on the lowest levels.
-  ///   (i.e. (values_read - null_count) is total number of non-null entries)
-  ///
-  /// \deprecated Since 4.0.0
-  ARROW_DEPRECATED("Doesn't handle nesting correctly and unused outside of unit tests.")
-  virtual int64_t ReadBatchSpaced(int64_t batch_size, int16_t* def_levels,
-                                  int16_t* rep_levels, T* values, uint8_t* valid_bits,
-                                  int64_t valid_bits_offset, int64_t* levels_read,
-                                  int64_t* values_read, int64_t* null_count) = 0;
-
   // Skip reading values. This method will work for both repeated and
   // non-repeated fields. Note that this method is skipping values and not
   // records. This distinction is important for repeated fields, meaning that
@@ -446,7 +404,9 @@ class PARQUET_EXPORT RecordReader {
   int64_t null_count_;
 
   /// \brief Each bit corresponds to one element in 'values_' and specifies if it
-  /// is null or not null. Not set if read_dense_for_nullable_ is true.
+  /// is null or not null.
+  ///
+  /// Not set if leaf type is not nullable or read_dense_for_nullable_ is true.
   std::shared_ptr<::arrow::ResizableBuffer> valid_bits_;
 
   /// \brief Buffer for definition levels. May contain more levels than
@@ -471,7 +431,10 @@ class PARQUET_EXPORT RecordReader {
 
   bool read_dictionary_ = false;
   // If true, we will not leave any space for the null values in the values_
-  // vector.
+  // vector or fill nulls values in BinaryRecordReader/DictionaryRecordReader.
+  //
+  // If read_dense_for_nullable_ is true, the BinaryRecordReader/DictionaryRecordReader
+  // might still populate the validity bitmap buffer.
   bool read_dense_for_nullable_ = false;
 };
 
diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc
index 9096f195687fb..f3d580ab5d345 100644
--- a/cpp/src/parquet/column_reader_test.cc
+++ b/cpp/src/parquet/column_reader_test.cc
@@ -125,58 +125,6 @@ class TestPrimitiveReader : public ::testing::Test {
     ASSERT_EQ(0, batch_actual);
     ASSERT_EQ(0, values_read);
   }
-  void CheckResultsSpaced() {
-    std::vector<int32_t> vresult(num_levels_, -1);
-    std::vector<int16_t> dresult(num_levels_, -1);
-    std::vector<int16_t> rresult(num_levels_, -1);
-    std::vector<uint8_t> valid_bits(num_levels_, 255);
-    int total_values_read = 0;
-    int batch_actual = 0;
-    int levels_actual = 0;
-    int64_t null_count = -1;
-    int64_t levels_read = 0;
-    int64_t values_read;
-
-    Int32Reader* reader = static_cast<Int32Reader*>(reader_.get());
-    int32_t batch_size = 8;
-    int batch = 0;
-    // This will cover both the cases
-    // 1) batch_size < page_size (multiple ReadBatch from a single page)
-    // 2) batch_size > page_size (BatchRead limits to a single page)
-    do {
-      ARROW_SUPPRESS_DEPRECATION_WARNING
-      batch = static_cast<int>(reader->ReadBatchSpaced(
-          batch_size, dresult.data() + levels_actual, rresult.data() + levels_actual,
-          vresult.data() + batch_actual, valid_bits.data() + batch_actual, 0,
-          &levels_read, &values_read, &null_count));
-      ARROW_UNSUPPRESS_DEPRECATION_WARNING
-      total_values_read += batch - static_cast<int>(null_count);
-      batch_actual += batch;
-      levels_actual += static_cast<int>(levels_read);
-      batch_size = std::min(1 << 24, std::max(batch_size * 2, 4096));
-    } while ((batch > 0) || (levels_read > 0));
-
-    ASSERT_EQ(num_levels_, levels_actual);
-    ASSERT_EQ(num_values_, total_values_read);
-    if (max_def_level_ > 0) {
-      ASSERT_TRUE(vector_equal(def_levels_, dresult));
-      ASSERT_TRUE(vector_equal_with_def_levels(values_, dresult, max_def_level_,
-                                               max_rep_level_, vresult));
-    } else {
-      ASSERT_TRUE(vector_equal(values_, vresult));
-    }
-    if (max_rep_level_ > 0) {
-      ASSERT_TRUE(vector_equal(rep_levels_, rresult));
-    }
-    // catch improper writes at EOS
-    ARROW_SUPPRESS_DEPRECATION_WARNING
-    batch_actual = static_cast<int>(
-        reader->ReadBatchSpaced(5, nullptr, nullptr, nullptr, valid_bits.data(), 0,
-                                &levels_read, &values_read, &null_count));
-    ARROW_UNSUPPRESS_DEPRECATION_WARNING
-    ASSERT_EQ(0, batch_actual);
-    ASSERT_EQ(0, null_count);
-  }
 
   void Clear() {
     values_.clear();
@@ -194,14 +142,6 @@ class TestPrimitiveReader : public ::testing::Test {
     InitReader(d);
     CheckResults();
     Clear();
-
-    num_values_ =
-        MakePages<Int32Type>(d, num_pages, levels_per_page, def_levels_, rep_levels_,
-                             values_, data_buffer_, pages_, Encoding::PLAIN);
-    num_levels_ = num_pages * levels_per_page;
-    InitReader(d);
-    CheckResultsSpaced();
-    Clear();
   }
 
   void ExecuteDict(int num_pages, int levels_per_page, const ColumnDescriptor* d) {
@@ -212,14 +152,6 @@ class TestPrimitiveReader : public ::testing::Test {
     InitReader(d);
     CheckResults();
     Clear();
-
-    num_values_ =
-        MakePages<Int32Type>(d, num_pages, levels_per_page, def_levels_, rep_levels_,
-                             values_, data_buffer_, pages_, Encoding::RLE_DICTIONARY);
-    num_levels_ = num_pages * levels_per_page;
-    InitReader(d);
-    CheckResultsSpaced();
-    Clear();
   }
 
  protected:
diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc
index 40d19d38e10ab..b7ff712abebe9 100644
--- a/cpp/src/parquet/column_writer.cc
+++ b/cpp/src/parquet/column_writer.cc
@@ -718,18 +718,6 @@ std::unique_ptr<PageWriter> PageWriter::Open(
   }
 }
 
-std::unique_ptr<PageWriter> PageWriter::Open(
-    std::shared_ptr<ArrowOutputStream> sink, Compression::type codec,
-    int compression_level, ColumnChunkMetaDataBuilder* metadata,
-    int16_t row_group_ordinal, int16_t column_chunk_ordinal, MemoryPool* pool,
-    bool buffered_row_group, std::shared_ptr<Encryptor> meta_encryptor,
-    std::shared_ptr<Encryptor> data_encryptor, bool page_write_checksum_enabled,
-    ColumnIndexBuilder* column_index_builder, OffsetIndexBuilder* offset_index_builder) {
-  return PageWriter::Open(sink, codec, metadata, row_group_ordinal, column_chunk_ordinal,
-                          pool, buffered_row_group, meta_encryptor, data_encryptor,
-                          page_write_checksum_enabled, column_index_builder,
-                          offset_index_builder, CodecOptions{compression_level});
-}
 // ----------------------------------------------------------------------
 // ColumnWriter
 
diff --git a/cpp/src/parquet/column_writer.h b/cpp/src/parquet/column_writer.h
index 845bf9aa896bd..bd329d61053f2 100644
--- a/cpp/src/parquet/column_writer.h
+++ b/cpp/src/parquet/column_writer.h
@@ -103,21 +103,6 @@ class PARQUET_EXPORT PageWriter {
       OffsetIndexBuilder* offset_index_builder = NULLPTR,
       const CodecOptions& codec_options = CodecOptions{});
 
-  ARROW_DEPRECATED("Deprecated in 13.0.0. Use CodecOptions-taking overload instead.")
-  static std::unique_ptr<PageWriter> Open(
-      std::shared_ptr<ArrowOutputStream> sink, Compression::type codec,
-      int compression_level, ColumnChunkMetaDataBuilder* metadata,
-      int16_t row_group_ordinal = -1, int16_t column_chunk_ordinal = -1,
-      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool(),
-      bool buffered_row_group = false,
-      std::shared_ptr<Encryptor> header_encryptor = NULLPTR,
-      std::shared_ptr<Encryptor> data_encryptor = NULLPTR,
-      bool page_write_checksum_enabled = false,
-      // column_index_builder MUST outlive the PageWriter
-      ColumnIndexBuilder* column_index_builder = NULLPTR,
-      // offset_index_builder MUST outlive the PageWriter
-      OffsetIndexBuilder* offset_index_builder = NULLPTR);
-
   // The Column Writer decides if dictionary encoding is used if set and
   // if the dictionary encoding has fallen back to default encoding on reaching dictionary
   // page limit
diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/decoder.cc
similarity index 57%
rename from cpp/src/parquet/encoding.cc
rename to cpp/src/parquet/decoder.cc
index 16a1e249273f6..7063f423096dc 100644
--- a/cpp/src/parquet/encoding.cc
+++ b/cpp/src/parquet/decoder.cc
@@ -28,23 +28,23 @@
 #include <vector>
 
 #include "arrow/array.h"
+#include "arrow/array/builder_binary.h"
 #include "arrow/array/builder_dict.h"
-#include "arrow/stl_allocator.h"
+#include "arrow/array/builder_primitive.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/bit_block_counter.h"
-#include "arrow/util/bit_run_reader.h"
 #include "arrow/util/bit_stream_utils_internal.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_ops.h"
-#include "arrow/util/bitmap_writer.h"
 #include "arrow/util/byte_stream_split_internal.h"
 #include "arrow/util/checked_cast.h"
-#include "arrow/util/hashing.h"
 #include "arrow/util/int_util_overflow.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/rle_encoding_internal.h"
+#include "arrow/util/spaced.h"
 #include "arrow/util/ubsan.h"
 #include "arrow/visit_data_inline.h"
+
 #include "parquet/exception.h"
 #include "parquet/platform.h"
 #include "parquet/schema.h"
@@ -57,901 +57,12 @@ using arrow::VisitNullBitmapInline;
 using arrow::internal::AddWithOverflow;
 using arrow::internal::BitBlockCounter;
 using arrow::internal::checked_cast;
-using arrow::internal::MultiplyWithOverflow;
-using arrow::internal::SafeSignedSubtract;
-using arrow::internal::SubtractWithOverflow;
 using arrow::util::SafeLoad;
 using arrow::util::SafeLoadAs;
-using std::string_view;
-
-template <typename T>
-using ArrowPoolVector = std::vector<T, ::arrow::stl::allocator<T>>;
 
 namespace parquet {
 namespace {
 
-// The Parquet spec isn't very clear whether ByteArray lengths are signed or
-// unsigned, but the Java implementation uses signed ints.
-constexpr size_t kMaxByteArraySize = std::numeric_limits<int32_t>::max();
-
-// ----------------------------------------------------------------------
-// Encoders
-// ----------------------------------------------------------------------
-
-class EncoderImpl : virtual public Encoder {
- public:
-  EncoderImpl(const ColumnDescriptor* descr, Encoding::type encoding, MemoryPool* pool)
-      : descr_(descr),
-        encoding_(encoding),
-        pool_(pool),
-        type_length_(descr ? descr->type_length() : -1) {}
-
-  Encoding::type encoding() const override { return encoding_; }
-
-  MemoryPool* memory_pool() const override { return pool_; }
-
- protected:
-  // For accessing type-specific metadata, like FIXED_LEN_BYTE_ARRAY
-  const ColumnDescriptor* descr_;
-  const Encoding::type encoding_;
-  MemoryPool* pool_;
-
-  /// Type length from descr
-  const int type_length_;
-};
-
-// ----------------------------------------------------------------------
-// Plain encoder implementation
-
-template <typename DType>
-class PlainEncoder : public EncoderImpl, virtual public TypedEncoder<DType> {
- public:
-  using T = typename DType::c_type;
-
-  explicit PlainEncoder(const ColumnDescriptor* descr, MemoryPool* pool)
-      : EncoderImpl(descr, Encoding::PLAIN, pool), sink_(pool) {}
-
-  int64_t EstimatedDataEncodedSize() override { return sink_.length(); }
-
-  std::shared_ptr<Buffer> FlushValues() override {
-    std::shared_ptr<Buffer> buffer;
-    PARQUET_THROW_NOT_OK(sink_.Finish(&buffer));
-    return buffer;
-  }
-
-  using TypedEncoder<DType>::Put;
-
-  void Put(const T* buffer, int num_values) override;
-
-  void Put(const ::arrow::Array& values) override;
-
-  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
-                 int64_t valid_bits_offset) override {
-    if (valid_bits != NULLPTR) {
-      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
-                                                                   this->memory_pool()));
-      T* data = buffer->template mutable_data_as<T>();
-      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
-          src, num_values, valid_bits, valid_bits_offset, data);
-      Put(data, num_valid_values);
-    } else {
-      Put(src, num_values);
-    }
-  }
-
-  void UnsafePutByteArray(const void* data, uint32_t length) {
-    DCHECK(length == 0 || data != nullptr) << "Value ptr cannot be NULL";
-    sink_.UnsafeAppend(&length, sizeof(uint32_t));
-    sink_.UnsafeAppend(data, static_cast<int64_t>(length));
-  }
-
-  void Put(const ByteArray& val) {
-    // Write the result to the output stream
-    const int64_t increment = static_cast<int64_t>(val.len + sizeof(uint32_t));
-    if (ARROW_PREDICT_FALSE(sink_.length() + increment > sink_.capacity())) {
-      PARQUET_THROW_NOT_OK(sink_.Reserve(increment));
-    }
-    UnsafePutByteArray(val.ptr, val.len);
-  }
-
- protected:
-  template <typename ArrayType>
-  void PutBinaryArray(const ArrayType& array) {
-    const int64_t total_bytes =
-        array.value_offset(array.length()) - array.value_offset(0);
-    PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes + array.length() * sizeof(uint32_t)));
-
-    PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<typename ArrayType::TypeClass>(
-        *array.data(),
-        [&](::std::string_view view) {
-          if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
-            return Status::Invalid(
-                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
-          }
-          UnsafePutByteArray(view.data(), static_cast<uint32_t>(view.size()));
-          return Status::OK();
-        },
-        []() { return Status::OK(); }));
-  }
-
-  ::arrow::BufferBuilder sink_;
-};
-
-template <typename DType>
-void PlainEncoder<DType>::Put(const T* buffer, int num_values) {
-  if (num_values > 0) {
-    PARQUET_THROW_NOT_OK(sink_.Append(buffer, num_values * sizeof(T)));
-  }
-}
-
-template <>
-inline void PlainEncoder<ByteArrayType>::Put(const ByteArray* src, int num_values) {
-  for (int i = 0; i < num_values; ++i) {
-    Put(src[i]);
-  }
-}
-
-template <typename ArrayType>
-void DirectPutImpl(const ::arrow::Array& values, ::arrow::BufferBuilder* sink) {
-  if (values.type_id() != ArrayType::TypeClass::type_id) {
-    std::string type_name = ArrayType::TypeClass::type_name();
-    throw ParquetException("direct put to " + type_name + " from " +
-                           values.type()->ToString() + " not supported");
-  }
-
-  using value_type = typename ArrayType::value_type;
-  constexpr auto value_size = sizeof(value_type);
-  auto raw_values = checked_cast<const ArrayType&>(values).raw_values();
-
-  if (values.null_count() == 0) {
-    // no nulls, just dump the data
-    PARQUET_THROW_NOT_OK(sink->Append(raw_values, values.length() * value_size));
-  } else {
-    PARQUET_THROW_NOT_OK(
-        sink->Reserve((values.length() - values.null_count()) * value_size));
-
-    for (int64_t i = 0; i < values.length(); i++) {
-      if (values.IsValid(i)) {
-        sink->UnsafeAppend(&raw_values[i], value_size);
-      }
-    }
-  }
-}
-
-template <>
-void PlainEncoder<Int32Type>::Put(const ::arrow::Array& values) {
-  DirectPutImpl<::arrow::Int32Array>(values, &sink_);
-}
-
-template <>
-void PlainEncoder<Int64Type>::Put(const ::arrow::Array& values) {
-  DirectPutImpl<::arrow::Int64Array>(values, &sink_);
-}
-
-template <>
-void PlainEncoder<Int96Type>::Put(const ::arrow::Array& values) {
-  ParquetException::NYI("direct put to Int96");
-}
-
-template <>
-void PlainEncoder<FloatType>::Put(const ::arrow::Array& values) {
-  DirectPutImpl<::arrow::FloatArray>(values, &sink_);
-}
-
-template <>
-void PlainEncoder<DoubleType>::Put(const ::arrow::Array& values) {
-  DirectPutImpl<::arrow::DoubleArray>(values, &sink_);
-}
-
-template <typename DType>
-void PlainEncoder<DType>::Put(const ::arrow::Array& values) {
-  ParquetException::NYI("direct put of " + values.type()->ToString());
-}
-
-void AssertBaseBinary(const ::arrow::Array& values) {
-  if (!::arrow::is_base_binary_like(values.type_id())) {
-    throw ParquetException("Only BaseBinaryArray and subclasses supported");
-  }
-}
-
-template <>
-inline void PlainEncoder<ByteArrayType>::Put(const ::arrow::Array& values) {
-  AssertBaseBinary(values);
-
-  if (::arrow::is_binary_like(values.type_id())) {
-    PutBinaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
-  } else {
-    DCHECK(::arrow::is_large_binary_like(values.type_id()));
-    PutBinaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
-  }
-}
-
-void AssertFixedSizeBinary(const ::arrow::Array& values, int type_length) {
-  if (!::arrow::is_fixed_size_binary(values.type_id())) {
-    throw ParquetException("Only FixedSizeBinaryArray and subclasses supported");
-  }
-  if (checked_cast<const ::arrow::FixedSizeBinaryType&>(*values.type()).byte_width() !=
-      type_length) {
-    throw ParquetException("Size mismatch: " + values.type()->ToString() +
-                           " should have been " + std::to_string(type_length) + " wide");
-  }
-}
-
-template <>
-inline void PlainEncoder<FLBAType>::Put(const ::arrow::Array& values) {
-  AssertFixedSizeBinary(values, descr_->type_length());
-  const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values);
-
-  if (data.null_count() == 0) {
-    // no nulls, just dump the data
-    PARQUET_THROW_NOT_OK(
-        sink_.Append(data.raw_values(), data.length() * data.byte_width()));
-  } else {
-    const int64_t total_bytes =
-        data.length() * data.byte_width() - data.null_count() * data.byte_width();
-    PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes));
-    for (int64_t i = 0; i < data.length(); i++) {
-      if (data.IsValid(i)) {
-        sink_.UnsafeAppend(data.Value(i), data.byte_width());
-      }
-    }
-  }
-}
-
-template <>
-inline void PlainEncoder<FLBAType>::Put(const FixedLenByteArray* src, int num_values) {
-  if (descr_->type_length() == 0) {
-    return;
-  }
-  for (int i = 0; i < num_values; ++i) {
-    // Write the result to the output stream
-    DCHECK(src[i].ptr != nullptr) << "Value ptr cannot be NULL";
-    PARQUET_THROW_NOT_OK(sink_.Append(src[i].ptr, descr_->type_length()));
-  }
-}
-
-template <>
-class PlainEncoder<BooleanType> : public EncoderImpl, virtual public BooleanEncoder {
- public:
-  explicit PlainEncoder(const ColumnDescriptor* descr, MemoryPool* pool)
-      : EncoderImpl(descr, Encoding::PLAIN, pool), sink_(pool) {}
-
-  int64_t EstimatedDataEncodedSize() override;
-  std::shared_ptr<Buffer> FlushValues() override;
-
-  void Put(const bool* src, int num_values) override;
-
-  void Put(const std::vector<bool>& src, int num_values) override;
-
-  void PutSpaced(const bool* src, int num_values, const uint8_t* valid_bits,
-                 int64_t valid_bits_offset) override {
-    if (valid_bits != NULLPTR) {
-      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
-                                                                   this->memory_pool()));
-      T* data = buffer->mutable_data_as<T>();
-      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
-          src, num_values, valid_bits, valid_bits_offset, data);
-      Put(data, num_valid_values);
-    } else {
-      Put(src, num_values);
-    }
-  }
-
-  void Put(const ::arrow::Array& values) override {
-    if (values.type_id() != ::arrow::Type::BOOL) {
-      throw ParquetException("direct put to boolean from " + values.type()->ToString() +
-                             " not supported");
-    }
-    const auto& data = checked_cast<const ::arrow::BooleanArray&>(values);
-
-    if (data.null_count() == 0) {
-      // no nulls, just dump the data
-      PARQUET_THROW_NOT_OK(sink_.Reserve(data.length()));
-      sink_.UnsafeAppend(data.data()->GetValues<uint8_t>(1, 0), data.offset(),
-                         data.length());
-    } else {
-      PARQUET_THROW_NOT_OK(sink_.Reserve(data.length() - data.null_count()));
-      for (int64_t i = 0; i < data.length(); i++) {
-        if (data.IsValid(i)) {
-          sink_.UnsafeAppend(data.Value(i));
-        }
-      }
-    }
-  }
-
- private:
-  ::arrow::TypedBufferBuilder<bool> sink_;
-
-  template <typename SequenceType>
-  void PutImpl(const SequenceType& src, int num_values);
-};
-
-template <typename SequenceType>
-void PlainEncoder<BooleanType>::PutImpl(const SequenceType& src, int num_values) {
-  PARQUET_THROW_NOT_OK(sink_.Reserve(num_values));
-  for (int i = 0; i < num_values; ++i) {
-    sink_.UnsafeAppend(src[i]);
-  }
-}
-
-int64_t PlainEncoder<BooleanType>::EstimatedDataEncodedSize() {
-  return ::arrow::bit_util::BytesForBits(sink_.length());
-}
-
-std::shared_ptr<Buffer> PlainEncoder<BooleanType>::FlushValues() {
-  std::shared_ptr<Buffer> buffer;
-  PARQUET_THROW_NOT_OK(sink_.Finish(&buffer));
-  return buffer;
-}
-
-void PlainEncoder<BooleanType>::Put(const bool* src, int num_values) {
-  PutImpl(src, num_values);
-}
-
-void PlainEncoder<BooleanType>::Put(const std::vector<bool>& src, int num_values) {
-  PutImpl(src, num_values);
-}
-
-// ----------------------------------------------------------------------
-// DictEncoder<T> implementations
-
-template <typename DType>
-struct DictEncoderTraits {
-  using c_type = typename DType::c_type;
-  using MemoTableType = ::arrow::internal::ScalarMemoTable<c_type>;
-};
-
-template <>
-struct DictEncoderTraits<ByteArrayType> {
-  using MemoTableType = ::arrow::internal::BinaryMemoTable<::arrow::BinaryBuilder>;
-};
-
-template <>
-struct DictEncoderTraits<FLBAType> {
-  using MemoTableType = ::arrow::internal::BinaryMemoTable<::arrow::BinaryBuilder>;
-};
-
-// Initially 1024 elements
-static constexpr int32_t kInitialHashTableSize = 1 << 10;
-
-int RlePreserveBufferSize(int num_values, int bit_width) {
-  // Note: because of the way RleEncoder::CheckBufferFull()
-  // is called, we have to reserve an extra "RleEncoder::MinBufferSize"
-  // bytes. These extra bytes won't be used but not reserving them
-  // would cause the encoder to fail.
-  return ::arrow::util::RleEncoder::MaxBufferSize(bit_width, num_values) +
-         ::arrow::util::RleEncoder::MinBufferSize(bit_width);
-}
-
-/// See the dictionary encoding section of
-/// https://github.com/Parquet/parquet-format.  The encoding supports
-/// streaming encoding. Values are encoded as they are added while the
-/// dictionary is being constructed. At any time, the buffered values
-/// can be written out with the current dictionary size. More values
-/// can then be added to the encoder, including new dictionary
-/// entries.
-template <typename DType>
-class DictEncoderImpl : public EncoderImpl, virtual public DictEncoder<DType> {
-  using MemoTableType = typename DictEncoderTraits<DType>::MemoTableType;
-
- public:
-  typedef typename DType::c_type T;
-
-  /// In data page, the bit width used to encode the entry
-  /// ids stored as 1 byte (max bit width = 32).
-  constexpr static int32_t kDataPageBitWidthBytes = 1;
-
-  explicit DictEncoderImpl(const ColumnDescriptor* desc, MemoryPool* pool)
-      : EncoderImpl(desc, Encoding::RLE_DICTIONARY, pool),
-        buffered_indices_(::arrow::stl::allocator<int32_t>(pool)),
-        dict_encoded_size_(0),
-        memo_table_(pool, kInitialHashTableSize) {}
-
-  ~DictEncoderImpl() override = default;
-
-  int dict_encoded_size() const override { return dict_encoded_size_; }
-
-  int WriteIndices(uint8_t* buffer, int buffer_len) override {
-    // Write bit width in first byte
-    *buffer = static_cast<uint8_t>(bit_width());
-    ++buffer;
-    --buffer_len;
-
-    ::arrow::util::RleEncoder encoder(buffer, buffer_len, bit_width());
-
-    for (int32_t index : buffered_indices_) {
-      if (ARROW_PREDICT_FALSE(!encoder.Put(index))) return -1;
-    }
-    encoder.Flush();
-
-    ClearIndices();
-    return kDataPageBitWidthBytes + encoder.len();
-  }
-
-  /// Returns a conservative estimate of the number of bytes needed to encode the buffered
-  /// indices. Used to size the buffer passed to WriteIndices().
-  int64_t EstimatedDataEncodedSize() override {
-    return kDataPageBitWidthBytes +
-           RlePreserveBufferSize(static_cast<int>(buffered_indices_.size()), bit_width());
-  }
-
-  /// The minimum bit width required to encode the currently buffered indices.
-  int bit_width() const override {
-    if (ARROW_PREDICT_FALSE(num_entries() == 0)) return 0;
-    if (ARROW_PREDICT_FALSE(num_entries() == 1)) return 1;
-    return bit_util::Log2(num_entries());
-  }
-
-  /// Encode value. Note that this does not actually write any data, just
-  /// buffers the value's index to be written later.
-  inline void Put(const T& value);
-
-  // Not implemented for other data types
-  inline void PutByteArray(const void* ptr, int32_t length);
-
-  void Put(const T* src, int num_values) override {
-    for (int32_t i = 0; i < num_values; i++) {
-      Put(SafeLoad(src + i));
-    }
-  }
-
-  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
-                 int64_t valid_bits_offset) override {
-    ::arrow::internal::VisitSetBitRunsVoid(valid_bits, valid_bits_offset, num_values,
-                                           [&](int64_t position, int64_t length) {
-                                             for (int64_t i = 0; i < length; i++) {
-                                               Put(SafeLoad(src + i + position));
-                                             }
-                                           });
-  }
-
-  using TypedEncoder<DType>::Put;
-
-  void Put(const ::arrow::Array& values) override;
-  void PutDictionary(const ::arrow::Array& values) override;
-
-  template <typename ArrowType, typename T = typename ArrowType::c_type>
-  void PutIndicesTyped(const ::arrow::Array& data) {
-    auto values = data.data()->GetValues<T>(1);
-    size_t buffer_position = buffered_indices_.size();
-    buffered_indices_.resize(buffer_position +
-                             static_cast<size_t>(data.length() - data.null_count()));
-    ::arrow::internal::VisitSetBitRunsVoid(
-        data.null_bitmap_data(), data.offset(), data.length(),
-        [&](int64_t position, int64_t length) {
-          for (int64_t i = 0; i < length; ++i) {
-            buffered_indices_[buffer_position++] =
-                static_cast<int32_t>(values[i + position]);
-          }
-        });
-  }
-
-  void PutIndices(const ::arrow::Array& data) override {
-    switch (data.type()->id()) {
-      case ::arrow::Type::UINT8:
-      case ::arrow::Type::INT8:
-        return PutIndicesTyped<::arrow::UInt8Type>(data);
-      case ::arrow::Type::UINT16:
-      case ::arrow::Type::INT16:
-        return PutIndicesTyped<::arrow::UInt16Type>(data);
-      case ::arrow::Type::UINT32:
-      case ::arrow::Type::INT32:
-        return PutIndicesTyped<::arrow::UInt32Type>(data);
-      case ::arrow::Type::UINT64:
-      case ::arrow::Type::INT64:
-        return PutIndicesTyped<::arrow::UInt64Type>(data);
-      default:
-        throw ParquetException("Passed non-integer array to PutIndices");
-    }
-  }
-
-  std::shared_ptr<Buffer> FlushValues() override {
-    std::shared_ptr<ResizableBuffer> buffer =
-        AllocateBuffer(this->pool_, EstimatedDataEncodedSize());
-    int result_size = WriteIndices(buffer->mutable_data(),
-                                   static_cast<int>(EstimatedDataEncodedSize()));
-    PARQUET_THROW_NOT_OK(buffer->Resize(result_size, false));
-    return buffer;
-  }
-
-  /// Writes out the encoded dictionary to buffer. buffer must be preallocated to
-  /// dict_encoded_size() bytes.
-  void WriteDict(uint8_t* buffer) const override;
-
-  /// The number of entries in the dictionary.
-  int num_entries() const override { return memo_table_.size(); }
-
- private:
-  /// Clears all the indices (but leaves the dictionary).
-  void ClearIndices() { buffered_indices_.clear(); }
-
-  /// Indices that have not yet be written out by WriteIndices().
-  ArrowPoolVector<int32_t> buffered_indices_;
-
-  template <typename ArrayType>
-  void PutBinaryArray(const ArrayType& array) {
-    PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<typename ArrayType::TypeClass>(
-        *array.data(),
-        [&](::std::string_view view) {
-          if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
-            return Status::Invalid(
-                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
-          }
-          PutByteArray(view.data(), static_cast<uint32_t>(view.size()));
-          return Status::OK();
-        },
-        []() { return Status::OK(); }));
-  }
-
-  template <typename ArrayType>
-  void PutBinaryDictionaryArray(const ArrayType& array) {
-    DCHECK_EQ(array.null_count(), 0);
-    for (int64_t i = 0; i < array.length(); i++) {
-      auto v = array.GetView(i);
-      if (ARROW_PREDICT_FALSE(v.size() > kMaxByteArraySize)) {
-        throw ParquetException(
-            "Parquet cannot store strings with size 2GB or more, got: ", v.size());
-      }
-      dict_encoded_size_ += static_cast<int>(v.size() + sizeof(uint32_t));
-      int32_t unused_memo_index;
-      PARQUET_THROW_NOT_OK(memo_table_.GetOrInsert(
-          v.data(), static_cast<int32_t>(v.size()), &unused_memo_index));
-    }
-  }
-
-  /// The number of bytes needed to encode the dictionary.
-  int dict_encoded_size_;
-
-  MemoTableType memo_table_;
-};
-
-template <typename DType>
-void DictEncoderImpl<DType>::WriteDict(uint8_t* buffer) const {
-  // For primitive types, only a memcpy
-  DCHECK_EQ(static_cast<size_t>(dict_encoded_size_), sizeof(T) * memo_table_.size());
-  memo_table_.CopyValues(0 /* start_pos */, reinterpret_cast<T*>(buffer));
-}
-
-// ByteArray and FLBA already have the dictionary encoded in their data heaps
-template <>
-void DictEncoderImpl<ByteArrayType>::WriteDict(uint8_t* buffer) const {
-  memo_table_.VisitValues(0, [&buffer](::std::string_view v) {
-    uint32_t len = static_cast<uint32_t>(v.length());
-    memcpy(buffer, &len, sizeof(len));
-    buffer += sizeof(len);
-    memcpy(buffer, v.data(), len);
-    buffer += len;
-  });
-}
-
-template <>
-void DictEncoderImpl<FLBAType>::WriteDict(uint8_t* buffer) const {
-  memo_table_.VisitValues(0, [&](::std::string_view v) {
-    DCHECK_EQ(v.length(), static_cast<size_t>(type_length_));
-    memcpy(buffer, v.data(), type_length_);
-    buffer += type_length_;
-  });
-}
-
-template <typename DType>
-inline void DictEncoderImpl<DType>::Put(const T& v) {
-  // Put() implementation for primitive types
-  auto on_found = [](int32_t memo_index) {};
-  auto on_not_found = [this](int32_t memo_index) {
-    dict_encoded_size_ += static_cast<int>(sizeof(T));
-  };
-
-  int32_t memo_index;
-  PARQUET_THROW_NOT_OK(memo_table_.GetOrInsert(v, on_found, on_not_found, &memo_index));
-  buffered_indices_.push_back(memo_index);
-}
-
-template <typename DType>
-inline void DictEncoderImpl<DType>::PutByteArray(const void* ptr, int32_t length) {
-  DCHECK(false);
-}
-
-template <>
-inline void DictEncoderImpl<ByteArrayType>::PutByteArray(const void* ptr,
-                                                         int32_t length) {
-  static const uint8_t empty[] = {0};
-
-  auto on_found = [](int32_t memo_index) {};
-  auto on_not_found = [&](int32_t memo_index) {
-    dict_encoded_size_ += static_cast<int>(length + sizeof(uint32_t));
-  };
-
-  DCHECK(ptr != nullptr || length == 0);
-  ptr = (ptr != nullptr) ? ptr : empty;
-  int32_t memo_index;
-  PARQUET_THROW_NOT_OK(
-      memo_table_.GetOrInsert(ptr, length, on_found, on_not_found, &memo_index));
-  buffered_indices_.push_back(memo_index);
-}
-
-template <>
-inline void DictEncoderImpl<ByteArrayType>::Put(const ByteArray& val) {
-  return PutByteArray(val.ptr, static_cast<int32_t>(val.len));
-}
-
-template <>
-inline void DictEncoderImpl<FLBAType>::Put(const FixedLenByteArray& v) {
-  static const uint8_t empty[] = {0};
-
-  auto on_found = [](int32_t memo_index) {};
-  auto on_not_found = [this](int32_t memo_index) { dict_encoded_size_ += type_length_; };
-
-  DCHECK(v.ptr != nullptr || type_length_ == 0);
-  const void* ptr = (v.ptr != nullptr) ? v.ptr : empty;
-  int32_t memo_index;
-  PARQUET_THROW_NOT_OK(
-      memo_table_.GetOrInsert(ptr, type_length_, on_found, on_not_found, &memo_index));
-  buffered_indices_.push_back(memo_index);
-}
-
-template <>
-void DictEncoderImpl<Int96Type>::Put(const ::arrow::Array& values) {
-  ParquetException::NYI("Direct put to Int96");
-}
-
-template <>
-void DictEncoderImpl<Int96Type>::PutDictionary(const ::arrow::Array& values) {
-  ParquetException::NYI("Direct put to Int96");
-}
-
-template <typename DType>
-void DictEncoderImpl<DType>::Put(const ::arrow::Array& values) {
-  using ArrayType = typename ::arrow::CTypeTraits<typename DType::c_type>::ArrayType;
-  const auto& data = checked_cast<const ArrayType&>(values);
-  if (data.null_count() == 0) {
-    // no nulls, just dump the data
-    for (int64_t i = 0; i < data.length(); i++) {
-      Put(data.Value(i));
-    }
-  } else {
-    for (int64_t i = 0; i < data.length(); i++) {
-      if (data.IsValid(i)) {
-        Put(data.Value(i));
-      }
-    }
-  }
-}
-
-template <>
-void DictEncoderImpl<FLBAType>::Put(const ::arrow::Array& values) {
-  AssertFixedSizeBinary(values, type_length_);
-  const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values);
-  if (data.null_count() == 0) {
-    // no nulls, just dump the data
-    for (int64_t i = 0; i < data.length(); i++) {
-      Put(FixedLenByteArray(data.Value(i)));
-    }
-  } else {
-    std::vector<uint8_t> empty(type_length_, 0);
-    for (int64_t i = 0; i < data.length(); i++) {
-      if (data.IsValid(i)) {
-        Put(FixedLenByteArray(data.Value(i)));
-      }
-    }
-  }
-}
-
-template <>
-void DictEncoderImpl<ByteArrayType>::Put(const ::arrow::Array& values) {
-  AssertBaseBinary(values);
-  if (::arrow::is_binary_like(values.type_id())) {
-    PutBinaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
-  } else {
-    DCHECK(::arrow::is_large_binary_like(values.type_id()));
-    PutBinaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
-  }
-}
-
-template <typename DType>
-void AssertCanPutDictionary(DictEncoderImpl<DType>* encoder, const ::arrow::Array& dict) {
-  if (dict.null_count() > 0) {
-    throw ParquetException("Inserted dictionary cannot contain nulls");
-  }
-
-  if (encoder->num_entries() > 0) {
-    throw ParquetException("Can only call PutDictionary on an empty DictEncoder");
-  }
-}
-
-template <typename DType>
-void DictEncoderImpl<DType>::PutDictionary(const ::arrow::Array& values) {
-  AssertCanPutDictionary(this, values);
-
-  using ArrayType = typename ::arrow::CTypeTraits<typename DType::c_type>::ArrayType;
-  const auto& data = checked_cast<const ArrayType&>(values);
-
-  dict_encoded_size_ += static_cast<int>(sizeof(typename DType::c_type) * data.length());
-  for (int64_t i = 0; i < data.length(); i++) {
-    int32_t unused_memo_index;
-    PARQUET_THROW_NOT_OK(memo_table_.GetOrInsert(data.Value(i), &unused_memo_index));
-  }
-}
-
-template <>
-void DictEncoderImpl<FLBAType>::PutDictionary(const ::arrow::Array& values) {
-  AssertFixedSizeBinary(values, type_length_);
-  AssertCanPutDictionary(this, values);
-
-  const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values);
-
-  dict_encoded_size_ += static_cast<int>(type_length_ * data.length());
-  for (int64_t i = 0; i < data.length(); i++) {
-    int32_t unused_memo_index;
-    PARQUET_THROW_NOT_OK(
-        memo_table_.GetOrInsert(data.Value(i), type_length_, &unused_memo_index));
-  }
-}
-
-template <>
-void DictEncoderImpl<ByteArrayType>::PutDictionary(const ::arrow::Array& values) {
-  AssertBaseBinary(values);
-  AssertCanPutDictionary(this, values);
-
-  if (::arrow::is_binary_like(values.type_id())) {
-    PutBinaryDictionaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
-  } else {
-    DCHECK(::arrow::is_large_binary_like(values.type_id()));
-    PutBinaryDictionaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
-  }
-}
-
-// ----------------------------------------------------------------------
-// ByteStreamSplitEncoder<T> implementations
-
-// Common base class for all types
-
-template <typename DType>
-class ByteStreamSplitEncoderBase : public EncoderImpl,
-                                   virtual public TypedEncoder<DType> {
- public:
-  using T = typename DType::c_type;
-  using TypedEncoder<DType>::Put;
-
-  ByteStreamSplitEncoderBase(const ColumnDescriptor* descr, int byte_width,
-                             ::arrow::MemoryPool* pool)
-      : EncoderImpl(descr, Encoding::BYTE_STREAM_SPLIT, pool),
-        sink_{pool},
-        byte_width_(byte_width),
-        num_values_in_buffer_{0} {}
-
-  int64_t EstimatedDataEncodedSize() override { return sink_.length(); }
-
-  std::shared_ptr<Buffer> FlushValues() override {
-    if (byte_width_ == 1) {
-      // Special-cased fast path
-      PARQUET_ASSIGN_OR_THROW(auto buf, sink_.Finish());
-      return buf;
-    }
-    auto output_buffer = AllocateBuffer(this->memory_pool(), EstimatedDataEncodedSize());
-    uint8_t* output_buffer_raw = output_buffer->mutable_data();
-    const uint8_t* raw_values = sink_.data();
-    ::arrow::util::internal::ByteStreamSplitEncode(
-        raw_values, /*width=*/byte_width_, num_values_in_buffer_, output_buffer_raw);
-    sink_.Reset();
-    num_values_in_buffer_ = 0;
-    return output_buffer;
-  }
-
-  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
-                 int64_t valid_bits_offset) override {
-    if (valid_bits != NULLPTR) {
-      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
-                                                                   this->memory_pool()));
-      T* data = buffer->template mutable_data_as<T>();
-      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
-          src, num_values, valid_bits, valid_bits_offset, data);
-      Put(data, num_valid_values);
-    } else {
-      Put(src, num_values);
-    }
-  }
-
- protected:
-  ::arrow::BufferBuilder sink_;
-  // Required because type_length_ is only filled in for FLBA
-  const int byte_width_;
-  int64_t num_values_in_buffer_;
-};
-
-// BYTE_STREAM_SPLIT encoder implementation for FLOAT, DOUBLE, INT32, INT64
-
-template <typename DType>
-class ByteStreamSplitEncoder : public ByteStreamSplitEncoderBase<DType> {
- public:
-  using T = typename DType::c_type;
-  using ArrowType = typename EncodingTraits<DType>::ArrowType;
-
-  ByteStreamSplitEncoder(const ColumnDescriptor* descr,
-                         ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
-      : ByteStreamSplitEncoderBase<DType>(descr,
-                                          /*byte_width=*/static_cast<int>(sizeof(T)),
-                                          pool) {}
-
-  // Inherit Put(const std::vector<T>&...)
-  using TypedEncoder<DType>::Put;
-
-  void Put(const T* buffer, int num_values) override {
-    if (num_values > 0) {
-      PARQUET_THROW_NOT_OK(
-          this->sink_.Append(reinterpret_cast<const uint8_t*>(buffer),
-                             num_values * static_cast<int64_t>(sizeof(T))));
-      this->num_values_in_buffer_ += num_values;
-    }
-  }
-
-  void Put(const ::arrow::Array& values) override {
-    if (values.type_id() != ArrowType::type_id) {
-      throw ParquetException(std::string() + "direct put from " +
-                             values.type()->ToString() + " not supported");
-    }
-    const auto& data = *values.data();
-    this->PutSpaced(data.GetValues<typename ArrowType::c_type>(1),
-                    static_cast<int>(data.length), data.GetValues<uint8_t>(0, 0),
-                    data.offset);
-  }
-};
-
-// BYTE_STREAM_SPLIT encoder implementation for FLBA
-
-template <>
-class ByteStreamSplitEncoder<FLBAType> : public ByteStreamSplitEncoderBase<FLBAType> {
- public:
-  using DType = FLBAType;
-  using T = FixedLenByteArray;
-  using ArrowType = ::arrow::FixedSizeBinaryArray;
-
-  ByteStreamSplitEncoder(const ColumnDescriptor* descr,
-                         ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
-      : ByteStreamSplitEncoderBase<DType>(descr,
-                                          /*byte_width=*/descr->type_length(), pool) {}
-
-  // Inherit Put(const std::vector<T>&...)
-  using TypedEncoder<DType>::Put;
-
-  void Put(const T* buffer, int num_values) override {
-    if (byte_width_ > 0) {
-      const int64_t total_bytes = static_cast<int64_t>(num_values) * byte_width_;
-      PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes));
-      for (int i = 0; i < num_values; ++i) {
-        // Write the result to the output stream
-        DCHECK(buffer[i].ptr != nullptr) << "Value ptr cannot be NULL";
-        sink_.UnsafeAppend(buffer[i].ptr, byte_width_);
-      }
-    }
-    this->num_values_in_buffer_ += num_values;
-  }
-
-  void Put(const ::arrow::Array& values) override {
-    AssertFixedSizeBinary(values, byte_width_);
-    const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values);
-    if (data.null_count() == 0) {
-      // no nulls, just buffer the data
-      PARQUET_THROW_NOT_OK(sink_.Append(data.raw_values(), data.length() * byte_width_));
-      this->num_values_in_buffer_ += data.length();
-    } else {
-      const int64_t num_values = data.length() - data.null_count();
-      const int64_t total_bytes = num_values * byte_width_;
-      PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes));
-      // TODO use VisitSetBitRunsVoid
-      for (int64_t i = 0; i < data.length(); i++) {
-        if (data.IsValid(i)) {
-          sink_.UnsafeAppend(data.Value(i), byte_width_);
-        }
-      }
-      this->num_values_in_buffer_ += num_values;
-    }
-  }
-};
-
-// ----------------------------------------------------------------------
-// Decoders
-// ----------------------------------------------------------------------
-
 class DecoderImpl : virtual public Decoder {
  public:
   void SetData(int num_values, const uint8_t* data, int len) override {
@@ -978,9 +89,35 @@ class DecoderImpl : virtual public Decoder {
 };
 
 template <typename DType>
-class PlainDecoder : public DecoderImpl, virtual public TypedDecoder<DType> {
+class TypedDecoderImpl : virtual public TypedDecoder<DType> {
  public:
   using T = typename DType::c_type;
+
+  int DecodeSpaced(T* buffer, int num_values, int null_count, const uint8_t* valid_bits,
+                   int64_t valid_bits_offset) override {
+    if (null_count > 0) {
+      int values_to_read = num_values - null_count;
+      int values_read = this->Decode(buffer, values_to_read);
+      if (values_read != values_to_read) {
+        throw ParquetException("Number of values / definition_levels read did not match");
+      }
+
+      return ::arrow::util::internal::SpacedExpand<T>(buffer, num_values, null_count,
+                                                      valid_bits, valid_bits_offset);
+    } else {
+      return this->Decode(buffer, num_values);
+    }
+  }
+};
+
+// ----------------------------------------------------------------------
+// PLAIN decoder
+
+template <typename DType>
+class PlainDecoder : public DecoderImpl, virtual public TypedDecoderImpl<DType> {
+ public:
+  using T = typename DType::c_type;
+
   explicit PlainDecoder(const ColumnDescriptor* descr);
 
   int Decode(T* buffer, int max_values) override;
@@ -1156,7 +293,11 @@ int PlainDecoder<DType>::Decode(T* buffer, int max_values) {
   return max_values;
 }
 
-class PlainBooleanDecoder : public DecoderImpl, virtual public BooleanDecoder {
+// PLAIN decoder implementation for BOOLEAN
+
+class PlainBooleanDecoder : public DecoderImpl,
+                            virtual public TypedDecoderImpl<BooleanType>,
+                            virtual public BooleanDecoder {
  public:
   explicit PlainBooleanDecoder(const ColumnDescriptor* descr);
   void SetData(int num_values, const uint8_t* data, int len) override;
@@ -1273,6 +414,8 @@ int PlainBooleanDecoder::Decode(bool* buffer, int max_values) {
   return max_values;
 }
 
+// PLAIN decoder implementation for FIXED_LEN_BYTE_ARRAY and BYTE_ARRAY
+
 // A helper class to abstract away differences between EncodingTraits<DType>::Accumulator
 // for ByteArrayType and FLBAType.
 template <typename DType>
@@ -1592,7 +735,7 @@ class PlainFLBADecoder : public PlainDecoder<FLBAType>, virtual public FLBADecod
 };
 
 // ----------------------------------------------------------------------
-// Dictionary encoding and decoding
+// Dictionary decoding
 
 template <typename Type>
 class DictDecoderImpl : public DecoderImpl, virtual public DictDecoder<Type> {
@@ -2167,329 +1310,42 @@ class DictByteArrayDecoderImpl : public DictDecoderImpl<ByteArrayType>,
         ++num_appended;
       }
     }
-    *out_num_values = values_decoded;
-    return Status::OK();
-  }
-
-  template <typename BuilderType>
-  Status DecodeArrowNonNull(int num_values, BuilderType* builder, int* out_num_values) {
-    constexpr int32_t kBufferSize = 2048;
-    int32_t indices[kBufferSize];
-
-    RETURN_NOT_OK(builder->Reserve(num_values));
-
-    const auto* dict_values = dictionary_->data_as<ByteArray>();
-
-    int values_decoded = 0;
-    while (values_decoded < num_values) {
-      int32_t batch_size = std::min<int32_t>(kBufferSize, num_values - values_decoded);
-      int num_indices = idx_decoder_.GetBatch(indices, batch_size);
-      if (num_indices == 0) ParquetException::EofException();
-      for (int i = 0; i < num_indices; ++i) {
-        auto idx = indices[i];
-        RETURN_NOT_OK(IndexInBounds(idx));
-        const auto& val = dict_values[idx];
-        RETURN_NOT_OK(builder->Append(val.ptr, val.len));
-      }
-      values_decoded += num_indices;
-    }
-    *out_num_values = values_decoded;
-    return Status::OK();
-  }
-};
-
-// ----------------------------------------------------------------------
-// DeltaBitPackEncoder
-
-/// DeltaBitPackEncoder is an encoder for the DeltaBinary Packing format
-/// as per the parquet spec. See:
-/// https://github.com/apache/parquet-format/blob/master/Encodings.md#delta-encoding-delta_binary_packed--5
-///
-/// Consists of a header followed by blocks of delta encoded values binary packed.
-///
-///  Format
-///    [header] [block 1] [block 2] ... [block N]
-///
-///  Header
-///    [block size] [number of mini blocks per block] [total value count] [first value]
-///
-///  Block
-///    [min delta] [list of bitwidths of the mini blocks] [miniblocks]
-///
-/// Sets aside bytes at the start of the internal buffer where the header will be written,
-/// and only writes the header when FlushValues is called before returning it.
-///
-/// To encode a block, we will:
-///
-/// 1. Compute the differences between consecutive elements. For the first element in the
-/// block, use the last element in the previous block or, in the case of the first block,
-/// use the first value of the whole sequence, stored in the header.
-///
-/// 2. Compute the frame of reference (the minimum of the deltas in the block). Subtract
-/// this min delta from all deltas in the block. This guarantees that all values are
-/// non-negative.
-///
-/// 3. Encode the frame of reference (min delta) as a zigzag ULEB128 int followed by the
-/// bit widths of the mini blocks and the delta values (minus the min delta) bit packed
-/// per mini block.
-///
-/// Supports only INT32 and INT64.
-
-template <typename DType>
-class DeltaBitPackEncoder : public EncoderImpl, virtual public TypedEncoder<DType> {
-  // Maximum possible header size
-  static constexpr uint32_t kMaxPageHeaderWriterSize = 32;
-  static constexpr uint32_t kValuesPerBlock =
-      std::is_same_v<int32_t, typename DType::c_type> ? 128 : 256;
-  static constexpr uint32_t kMiniBlocksPerBlock = 4;
-
- public:
-  using T = typename DType::c_type;
-  using UT = std::make_unsigned_t<T>;
-  using TypedEncoder<DType>::Put;
-
-  explicit DeltaBitPackEncoder(const ColumnDescriptor* descr, MemoryPool* pool,
-                               const uint32_t values_per_block = kValuesPerBlock,
-                               const uint32_t mini_blocks_per_block = kMiniBlocksPerBlock)
-      : EncoderImpl(descr, Encoding::DELTA_BINARY_PACKED, pool),
-        values_per_block_(values_per_block),
-        mini_blocks_per_block_(mini_blocks_per_block),
-        values_per_mini_block_(values_per_block / mini_blocks_per_block),
-        deltas_(values_per_block, ::arrow::stl::allocator<T>(pool)),
-        bits_buffer_(
-            AllocateBuffer(pool, (kMiniBlocksPerBlock + values_per_block) * sizeof(T))),
-        sink_(pool),
-        bit_writer_(bits_buffer_->mutable_data(),
-                    static_cast<int>(bits_buffer_->size())) {
-    if (values_per_block_ % 128 != 0) {
-      throw ParquetException(
-          "the number of values in a block must be multiple of 128, but it's " +
-          std::to_string(values_per_block_));
-    }
-    if (values_per_mini_block_ % 32 != 0) {
-      throw ParquetException(
-          "the number of values in a miniblock must be multiple of 32, but it's " +
-          std::to_string(values_per_mini_block_));
-    }
-    if (values_per_block % mini_blocks_per_block != 0) {
-      throw ParquetException(
-          "the number of values per block % number of miniblocks per block must be 0, "
-          "but it's " +
-          std::to_string(values_per_block % mini_blocks_per_block));
-    }
-    // Reserve enough space at the beginning of the buffer for largest possible header.
-    PARQUET_THROW_NOT_OK(sink_.Advance(kMaxPageHeaderWriterSize));
-  }
-
-  std::shared_ptr<Buffer> FlushValues() override;
-
-  int64_t EstimatedDataEncodedSize() override { return sink_.length(); }
-
-  void Put(const ::arrow::Array& values) override;
-
-  void Put(const T* buffer, int num_values) override;
-
-  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
-                 int64_t valid_bits_offset) override;
-
-  void FlushBlock();
-
- private:
-  const uint32_t values_per_block_;
-  const uint32_t mini_blocks_per_block_;
-  const uint32_t values_per_mini_block_;
-  uint32_t values_current_block_{0};
-  uint32_t total_value_count_{0};
-  T first_value_{0};
-  T current_value_{0};
-  ArrowPoolVector<T> deltas_;
-  std::shared_ptr<ResizableBuffer> bits_buffer_;
-  ::arrow::BufferBuilder sink_;
-  ::arrow::bit_util::BitWriter bit_writer_;
-};
-
-template <typename DType>
-void DeltaBitPackEncoder<DType>::Put(const T* src, int num_values) {
-  if (num_values == 0) {
-    return;
-  }
-
-  int idx = 0;
-  if (total_value_count_ == 0) {
-    current_value_ = src[0];
-    first_value_ = current_value_;
-    idx = 1;
-  }
-  total_value_count_ += num_values;
-
-  while (idx < num_values) {
-    T value = src[idx];
-    // Calculate deltas. The possible overflow is handled by use of unsigned integers
-    // making subtraction operations well-defined and correct even in case of overflow.
-    // Encoded integers will wrap back around on decoding.
-    // See http://en.wikipedia.org/wiki/Modular_arithmetic#Integers_modulo_n
-    deltas_[values_current_block_] = SafeSignedSubtract(value, current_value_);
-    current_value_ = value;
-    idx++;
-    values_current_block_++;
-    if (values_current_block_ == values_per_block_) {
-      FlushBlock();
-    }
-  }
-}
-
-template <typename DType>
-void DeltaBitPackEncoder<DType>::FlushBlock() {
-  if (values_current_block_ == 0) {
-    return;
-  }
-
-  // Calculate the frame of reference for this miniblock. This value will be subtracted
-  // from all deltas to guarantee all deltas are positive for encoding.
-  const T min_delta =
-      *std::min_element(deltas_.begin(), deltas_.begin() + values_current_block_);
-  bit_writer_.PutZigZagVlqInt(min_delta);
-
-  // Call to GetNextBytePtr reserves mini_blocks_per_block_ bytes of space to write
-  // bit widths of miniblocks as they become known during the encoding.
-  uint8_t* bit_width_data = bit_writer_.GetNextBytePtr(mini_blocks_per_block_);
-  DCHECK(bit_width_data != nullptr);
-
-  const uint32_t num_miniblocks =
-      static_cast<uint32_t>(std::ceil(static_cast<double>(values_current_block_) /
-                                      static_cast<double>(values_per_mini_block_)));
-  for (uint32_t i = 0; i < num_miniblocks; i++) {
-    const uint32_t values_current_mini_block =
-        std::min(values_per_mini_block_, values_current_block_);
-
-    const uint32_t start = i * values_per_mini_block_;
-    const T max_delta = *std::max_element(
-        deltas_.begin() + start, deltas_.begin() + start + values_current_mini_block);
-
-    // The minimum number of bits required to write any of values in deltas_ vector.
-    // See overflow comment above.
-    const auto bit_width = bit_width_data[i] = bit_util::NumRequiredBits(
-        static_cast<UT>(max_delta) - static_cast<UT>(min_delta));
-
-    for (uint32_t j = start; j < start + values_current_mini_block; j++) {
-      // Convert delta to frame of reference. See overflow comment above.
-      const UT value = static_cast<UT>(deltas_[j]) - static_cast<UT>(min_delta);
-      bit_writer_.PutValue(value, bit_width);
-    }
-    // If there are not enough values to fill the last mini block, we pad the mini block
-    // with zeroes so that its length is the number of values in a full mini block
-    // multiplied by the bit width.
-    for (uint32_t j = values_current_mini_block; j < values_per_mini_block_; j++) {
-      bit_writer_.PutValue(0, bit_width);
-    }
-    values_current_block_ -= values_current_mini_block;
-  }
-
-  // If, in the last block, less than <number of miniblocks in a block> miniblocks are
-  // needed to store the values, the bytes storing the bit widths of the unneeded
-  // miniblocks are still present, their value should be zero, but readers must accept
-  // arbitrary values as well.
-  for (uint32_t i = num_miniblocks; i < mini_blocks_per_block_; i++) {
-    bit_width_data[i] = 0;
-  }
-  DCHECK_EQ(values_current_block_, 0);
-
-  bit_writer_.Flush();
-  PARQUET_THROW_NOT_OK(sink_.Append(bit_writer_.buffer(), bit_writer_.bytes_written()));
-  bit_writer_.Clear();
-}
-
-template <typename DType>
-std::shared_ptr<Buffer> DeltaBitPackEncoder<DType>::FlushValues() {
-  if (values_current_block_ > 0) {
-    FlushBlock();
-  }
-  PARQUET_ASSIGN_OR_THROW(auto buffer, sink_.Finish(/*shrink_to_fit=*/true));
-
-  uint8_t header_buffer_[kMaxPageHeaderWriterSize] = {};
-  bit_util::BitWriter header_writer(header_buffer_, sizeof(header_buffer_));
-  if (!header_writer.PutVlqInt(values_per_block_) ||
-      !header_writer.PutVlqInt(mini_blocks_per_block_) ||
-      !header_writer.PutVlqInt(total_value_count_) ||
-      !header_writer.PutZigZagVlqInt(static_cast<T>(first_value_))) {
-    throw ParquetException("header writing error");
-  }
-  header_writer.Flush();
-
-  // We reserved enough space at the beginning of the buffer for largest possible header
-  // and data was written immediately after. We now write the header data immediately
-  // before the end of reserved space.
-  const size_t offset_bytes = kMaxPageHeaderWriterSize - header_writer.bytes_written();
-  std::memcpy(buffer->mutable_data() + offset_bytes, header_buffer_,
-              header_writer.bytes_written());
-
-  // Reset counter of cached values
-  total_value_count_ = 0;
-  // Reserve enough space at the beginning of the buffer for largest possible header.
-  PARQUET_THROW_NOT_OK(sink_.Advance(kMaxPageHeaderWriterSize));
-
-  // Excess bytes at the beginning are sliced off and ignored.
-  return SliceBuffer(buffer, offset_bytes);
-}
-
-template <>
-void DeltaBitPackEncoder<Int32Type>::Put(const ::arrow::Array& values) {
-  const ::arrow::ArrayData& data = *values.data();
-  if (values.type_id() != ::arrow::Type::INT32) {
-    throw ParquetException("Expected Int32TArray, got ", values.type()->ToString());
-  }
-  if (data.length > std::numeric_limits<int32_t>::max()) {
-    throw ParquetException("Array cannot be longer than ",
-                           std::numeric_limits<int32_t>::max());
+    *out_num_values = values_decoded;
+    return Status::OK();
   }
 
-  if (values.null_count() == 0) {
-    Put(data.GetValues<int32_t>(1), static_cast<int>(data.length));
-  } else {
-    PutSpaced(data.GetValues<int32_t>(1), static_cast<int>(data.length),
-              data.GetValues<uint8_t>(0, 0), data.offset);
-  }
-}
+  template <typename BuilderType>
+  Status DecodeArrowNonNull(int num_values, BuilderType* builder, int* out_num_values) {
+    constexpr int32_t kBufferSize = 2048;
+    int32_t indices[kBufferSize];
 
-template <>
-void DeltaBitPackEncoder<Int64Type>::Put(const ::arrow::Array& values) {
-  const ::arrow::ArrayData& data = *values.data();
-  if (values.type_id() != ::arrow::Type::INT64) {
-    throw ParquetException("Expected Int64TArray, got ", values.type()->ToString());
-  }
-  if (data.length > std::numeric_limits<int32_t>::max()) {
-    throw ParquetException("Array cannot be longer than ",
-                           std::numeric_limits<int32_t>::max());
-  }
-  if (values.null_count() == 0) {
-    Put(data.GetValues<int64_t>(1), static_cast<int>(data.length));
-  } else {
-    PutSpaced(data.GetValues<int64_t>(1), static_cast<int>(data.length),
-              data.GetValues<uint8_t>(0, 0), data.offset);
-  }
-}
+    RETURN_NOT_OK(builder->Reserve(num_values));
 
-template <typename DType>
-void DeltaBitPackEncoder<DType>::PutSpaced(const T* src, int num_values,
-                                           const uint8_t* valid_bits,
-                                           int64_t valid_bits_offset) {
-  if (valid_bits != NULLPTR) {
-    PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
-                                                                 this->memory_pool()));
-    T* data = buffer->template mutable_data_as<T>();
-    int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
-        src, num_values, valid_bits, valid_bits_offset, data);
-    Put(data, num_valid_values);
-  } else {
-    Put(src, num_values);
+    const auto* dict_values = dictionary_->data_as<ByteArray>();
+
+    int values_decoded = 0;
+    while (values_decoded < num_values) {
+      int32_t batch_size = std::min<int32_t>(kBufferSize, num_values - values_decoded);
+      int num_indices = idx_decoder_.GetBatch(indices, batch_size);
+      if (num_indices == 0) ParquetException::EofException();
+      for (int i = 0; i < num_indices; ++i) {
+        auto idx = indices[i];
+        RETURN_NOT_OK(IndexInBounds(idx));
+        const auto& val = dict_values[idx];
+        RETURN_NOT_OK(builder->Append(val.ptr, val.len));
+      }
+      values_decoded += num_indices;
+    }
+    *out_num_values = values_decoded;
+    return Status::OK();
   }
-}
+};
 
 // ----------------------------------------------------------------------
-// DeltaBitPackDecoder
+// DELTA_BINARY_PACKED decoder
 
 template <typename DType>
-class DeltaBitPackDecoder : public DecoderImpl, virtual public TypedDecoder<DType> {
+class DeltaBitPackDecoder : public DecoderImpl, public TypedDecoderImpl<DType> {
  public:
   typedef typename DType::c_type T;
   using UT = std::make_unsigned_t<T>;
@@ -2727,135 +1583,10 @@ class DeltaBitPackDecoder : public DecoderImpl, virtual public TypedDecoder<DTyp
 };
 
 // ----------------------------------------------------------------------
-// DELTA_LENGTH_BYTE_ARRAY
-
-// ----------------------------------------------------------------------
-// DeltaLengthByteArrayEncoder
-
-class DeltaLengthByteArrayEncoder : public EncoderImpl,
-                                    virtual public TypedEncoder<ByteArrayType> {
- public:
-  explicit DeltaLengthByteArrayEncoder(const ColumnDescriptor* descr, MemoryPool* pool)
-      : EncoderImpl(descr, Encoding::DELTA_LENGTH_BYTE_ARRAY,
-                    pool = ::arrow::default_memory_pool()),
-        sink_(pool),
-        length_encoder_(nullptr, pool) {}
-
-  std::shared_ptr<Buffer> FlushValues() override;
-
-  int64_t EstimatedDataEncodedSize() override {
-    return sink_.length() + length_encoder_.EstimatedDataEncodedSize();
-  }
-
-  using TypedEncoder<ByteArrayType>::Put;
-
-  void Put(const ::arrow::Array& values) override;
-
-  void Put(const T* buffer, int num_values) override;
-
-  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
-                 int64_t valid_bits_offset) override;
-
- protected:
-  template <typename ArrayType>
-  void PutBinaryArray(const ArrayType& array) {
-    PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<typename ArrayType::TypeClass>(
-        *array.data(),
-        [&](::std::string_view view) {
-          if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
-            return Status::Invalid(
-                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
-          }
-          if (ARROW_PREDICT_FALSE(
-                  view.size() + sink_.length() >
-                  static_cast<size_t>(std::numeric_limits<int32_t>::max()))) {
-            return Status::Invalid("excess expansion in DELTA_LENGTH_BYTE_ARRAY");
-          }
-          length_encoder_.Put({static_cast<int32_t>(view.length())}, 1);
-          PARQUET_THROW_NOT_OK(sink_.Append(view.data(), view.length()));
-          return Status::OK();
-        },
-        []() { return Status::OK(); }));
-  }
-
-  ::arrow::BufferBuilder sink_;
-  DeltaBitPackEncoder<Int32Type> length_encoder_;
-};
-
-void DeltaLengthByteArrayEncoder::Put(const ::arrow::Array& values) {
-  AssertBaseBinary(values);
-  if (::arrow::is_binary_like(values.type_id())) {
-    PutBinaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
-  } else {
-    PutBinaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
-  }
-}
-
-void DeltaLengthByteArrayEncoder::Put(const T* src, int num_values) {
-  if (num_values == 0) {
-    return;
-  }
-
-  constexpr int kBatchSize = 256;
-  std::array<int32_t, kBatchSize> lengths;
-  uint32_t total_increment_size = 0;
-  for (int idx = 0; idx < num_values; idx += kBatchSize) {
-    const int batch_size = std::min(kBatchSize, num_values - idx);
-    for (int j = 0; j < batch_size; ++j) {
-      const int32_t len = src[idx + j].len;
-      if (ARROW_PREDICT_FALSE(
-              AddWithOverflow(total_increment_size, len, &total_increment_size))) {
-        throw ParquetException("excess expansion in DELTA_LENGTH_BYTE_ARRAY");
-      }
-      lengths[j] = len;
-    }
-    length_encoder_.Put(lengths.data(), batch_size);
-  }
-  if (sink_.length() + total_increment_size > std::numeric_limits<int32_t>::max()) {
-    throw ParquetException("excess expansion in DELTA_LENGTH_BYTE_ARRAY");
-  }
-  PARQUET_THROW_NOT_OK(sink_.Reserve(total_increment_size));
-  for (int idx = 0; idx < num_values; idx++) {
-    sink_.UnsafeAppend(src[idx].ptr, src[idx].len);
-  }
-}
-
-void DeltaLengthByteArrayEncoder::PutSpaced(const T* src, int num_values,
-                                            const uint8_t* valid_bits,
-                                            int64_t valid_bits_offset) {
-  if (valid_bits != NULLPTR) {
-    PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
-                                                                 this->memory_pool()));
-    T* data = buffer->template mutable_data_as<T>();
-    int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
-        src, num_values, valid_bits, valid_bits_offset, data);
-    Put(data, num_valid_values);
-  } else {
-    Put(src, num_values);
-  }
-}
-
-std::shared_ptr<Buffer> DeltaLengthByteArrayEncoder::FlushValues() {
-  std::shared_ptr<Buffer> encoded_lengths = length_encoder_.FlushValues();
-
-  std::shared_ptr<Buffer> data;
-  PARQUET_THROW_NOT_OK(sink_.Finish(&data));
-  sink_.Reset();
-
-  PARQUET_THROW_NOT_OK(sink_.Resize(encoded_lengths->size() + data->size()));
-  PARQUET_THROW_NOT_OK(sink_.Append(encoded_lengths->data(), encoded_lengths->size()));
-  PARQUET_THROW_NOT_OK(sink_.Append(data->data(), data->size()));
-
-  std::shared_ptr<Buffer> buffer;
-  PARQUET_THROW_NOT_OK(sink_.Finish(&buffer, true));
-  return buffer;
-}
-
-// ----------------------------------------------------------------------
-// DeltaLengthByteArrayDecoder
+// DELTA_LENGTH_BYTE_ARRAY decoder
 
 class DeltaLengthByteArrayDecoder : public DecoderImpl,
-                                    virtual public TypedDecoder<ByteArrayType> {
+                                    public TypedDecoderImpl<ByteArrayType> {
  public:
   explicit DeltaLengthByteArrayDecoder(const ColumnDescriptor* descr,
                                        MemoryPool* pool = ::arrow::default_memory_pool())
@@ -2989,113 +1720,11 @@ class DeltaLengthByteArrayDecoder : public DecoderImpl,
 };
 
 // ----------------------------------------------------------------------
-// RLE_BOOLEAN_ENCODER
-
-class RleBooleanEncoder final : public EncoderImpl, virtual public BooleanEncoder {
- public:
-  explicit RleBooleanEncoder(const ColumnDescriptor* descr, ::arrow::MemoryPool* pool)
-      : EncoderImpl(descr, Encoding::RLE, pool),
-        buffered_append_values_(::arrow::stl::allocator<T>(pool)) {}
-
-  int64_t EstimatedDataEncodedSize() override {
-    return kRleLengthInBytes + MaxRleBufferSize();
-  }
-
-  std::shared_ptr<Buffer> FlushValues() override;
-
-  void Put(const T* buffer, int num_values) override;
-  void Put(const ::arrow::Array& values) override {
-    if (values.type_id() != ::arrow::Type::BOOL) {
-      throw ParquetException("RleBooleanEncoder expects BooleanArray, got ",
-                             values.type()->ToString());
-    }
-    const auto& boolean_array = checked_cast<const ::arrow::BooleanArray&>(values);
-    if (values.null_count() == 0) {
-      for (int i = 0; i < boolean_array.length(); ++i) {
-        // null_count == 0, so just call Value directly is ok.
-        buffered_append_values_.push_back(boolean_array.Value(i));
-      }
-    } else {
-      PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<::arrow::BooleanType>(
-          *boolean_array.data(),
-          [&](bool value) {
-            buffered_append_values_.push_back(value);
-            return Status::OK();
-          },
-          []() { return Status::OK(); }));
-    }
-  }
-
-  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
-                 int64_t valid_bits_offset) override {
-    if (valid_bits != NULLPTR) {
-      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
-                                                                   this->memory_pool()));
-      T* data = buffer->mutable_data_as<T>();
-      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
-          src, num_values, valid_bits, valid_bits_offset, data);
-      Put(data, num_valid_values);
-    } else {
-      Put(src, num_values);
-    }
-  }
-
-  void Put(const std::vector<bool>& src, int num_values) override;
-
- protected:
-  template <typename SequenceType>
-  void PutImpl(const SequenceType& src, int num_values);
-
-  int MaxRleBufferSize() const noexcept {
-    return RlePreserveBufferSize(static_cast<int>(buffered_append_values_.size()),
-                                 kBitWidth);
-  }
-
-  constexpr static int32_t kBitWidth = 1;
-  /// 4 bytes in little-endian, which indicates the length.
-  constexpr static int32_t kRleLengthInBytes = 4;
-
-  // std::vector<bool> in C++ is tricky, because it's a bitmap.
-  // Here RleBooleanEncoder will only append values into it, and
-  // dump values into Buffer, so using it here is ok.
-  ArrowPoolVector<bool> buffered_append_values_;
-};
-
-void RleBooleanEncoder::Put(const bool* src, int num_values) { PutImpl(src, num_values); }
-
-void RleBooleanEncoder::Put(const std::vector<bool>& src, int num_values) {
-  PutImpl(src, num_values);
-}
-
-template <typename SequenceType>
-void RleBooleanEncoder::PutImpl(const SequenceType& src, int num_values) {
-  for (int i = 0; i < num_values; ++i) {
-    buffered_append_values_.push_back(src[i]);
-  }
-}
-
-std::shared_ptr<Buffer> RleBooleanEncoder::FlushValues() {
-  int rle_buffer_size_max = MaxRleBufferSize();
-  std::shared_ptr<ResizableBuffer> buffer =
-      AllocateBuffer(this->pool_, rle_buffer_size_max + kRleLengthInBytes);
-  ::arrow::util::RleEncoder encoder(buffer->mutable_data() + kRleLengthInBytes,
-                                    rle_buffer_size_max, /*bit_width*/ kBitWidth);
-
-  for (bool value : buffered_append_values_) {
-    encoder.Put(value ? 1 : 0);
-  }
-  encoder.Flush();
-  ::arrow::util::SafeStore(buffer->mutable_data(),
-                           ::arrow::bit_util::ToLittleEndian(encoder.len()));
-  PARQUET_THROW_NOT_OK(buffer->Resize(kRleLengthInBytes + encoder.len()));
-  buffered_append_values_.clear();
-  return buffer;
-}
-
-// ----------------------------------------------------------------------
-// RLE_BOOLEAN_DECODER
+// RLE decoder for BOOLEAN
 
-class RleBooleanDecoder : public DecoderImpl, virtual public BooleanDecoder {
+class RleBooleanDecoder : public DecoderImpl,
+                          virtual public TypedDecoderImpl<BooleanType>,
+                          virtual public BooleanDecoder {
  public:
   explicit RleBooleanDecoder(const ColumnDescriptor* descr)
       : DecoderImpl(descr, Encoding::RLE) {}
@@ -3209,235 +1838,10 @@ class RleBooleanDecoder : public DecoderImpl, virtual public BooleanDecoder {
 };
 
 // ----------------------------------------------------------------------
-// DELTA_BYTE_ARRAY
-
-/// Delta Byte Array encoding also known as incremental encoding or front compression:
-/// for each element in a sequence of strings, store the prefix length of the previous
-/// entry plus the suffix.
-///
-/// This is stored as a sequence of delta-encoded prefix lengths (DELTA_BINARY_PACKED),
-/// followed by the suffixes encoded as delta length byte arrays
-/// (DELTA_LENGTH_BYTE_ARRAY).
-
-// ----------------------------------------------------------------------
-// DeltaByteArrayEncoder
-
-template <typename DType>
-class DeltaByteArrayEncoder : public EncoderImpl, virtual public TypedEncoder<DType> {
-  static constexpr std::string_view kEmpty = "";
-
- public:
-  using T = typename DType::c_type;
-
-  explicit DeltaByteArrayEncoder(const ColumnDescriptor* descr,
-                                 MemoryPool* pool = ::arrow::default_memory_pool())
-      : EncoderImpl(descr, Encoding::DELTA_BYTE_ARRAY, pool),
-        sink_(pool),
-        prefix_length_encoder_(/*descr=*/nullptr, pool),
-        suffix_encoder_(descr, pool),
-        last_value_(""),
-        empty_(static_cast<uint32_t>(kEmpty.size()),
-               reinterpret_cast<const uint8_t*>(kEmpty.data())) {}
-
-  std::shared_ptr<Buffer> FlushValues() override;
-
-  int64_t EstimatedDataEncodedSize() override {
-    return prefix_length_encoder_.EstimatedDataEncodedSize() +
-           suffix_encoder_.EstimatedDataEncodedSize();
-  }
-
-  using TypedEncoder<DType>::Put;
-
-  void Put(const ::arrow::Array& values) override;
-
-  void Put(const T* buffer, int num_values) override;
-
-  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
-                 int64_t valid_bits_offset) override {
-    if (valid_bits != nullptr) {
-      if (buffer_ == nullptr) {
-        PARQUET_ASSIGN_OR_THROW(buffer_,
-                                ::arrow::AllocateResizableBuffer(num_values * sizeof(T),
-                                                                 this->memory_pool()));
-      } else {
-        PARQUET_THROW_NOT_OK(buffer_->Resize(num_values * sizeof(T), false));
-      }
-      T* data = buffer_->mutable_data_as<T>();
-      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
-          src, num_values, valid_bits, valid_bits_offset, data);
-      Put(data, num_valid_values);
-    } else {
-      Put(src, num_values);
-    }
-  }
-
- protected:
-  template <typename VisitorType>
-  void PutInternal(const T* src, int num_values, const VisitorType visitor) {
-    if (num_values == 0) {
-      return;
-    }
-
-    std::string_view last_value_view = last_value_;
-    constexpr int kBatchSize = 256;
-    std::array<int32_t, kBatchSize> prefix_lengths;
-    std::array<ByteArray, kBatchSize> suffixes;
-
-    for (int i = 0; i < num_values; i += kBatchSize) {
-      const int batch_size = std::min(kBatchSize, num_values - i);
-
-      for (int j = 0; j < batch_size; ++j) {
-        const int idx = i + j;
-        const auto view = visitor[idx];
-        const auto len = static_cast<const uint32_t>(view.length());
-
-        uint32_t common_prefix_length = 0;
-        const uint32_t maximum_common_prefix_length =
-            std::min(len, static_cast<uint32_t>(last_value_view.length()));
-        while (common_prefix_length < maximum_common_prefix_length) {
-          if (last_value_view[common_prefix_length] != view[common_prefix_length]) {
-            break;
-          }
-          common_prefix_length++;
-        }
-
-        last_value_view = view;
-        prefix_lengths[j] = common_prefix_length;
-        const uint32_t suffix_length = len - common_prefix_length;
-        const uint8_t* suffix_ptr = src[idx].ptr + common_prefix_length;
-
-        // Convert to ByteArray, so it can be passed to the suffix_encoder_.
-        const ByteArray suffix(suffix_length, suffix_ptr);
-        suffixes[j] = suffix;
-      }
-      suffix_encoder_.Put(suffixes.data(), batch_size);
-      prefix_length_encoder_.Put(prefix_lengths.data(), batch_size);
-    }
-    last_value_ = last_value_view;
-  }
-
-  template <typename ArrayType>
-  void PutBinaryArray(const ArrayType& array) {
-    auto previous_len = static_cast<uint32_t>(last_value_.length());
-    std::string_view last_value_view = last_value_;
-
-    PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<typename ArrayType::TypeClass>(
-        *array.data(),
-        [&](::std::string_view view) {
-          if (ARROW_PREDICT_FALSE(view.size() >= kMaxByteArraySize)) {
-            return Status::Invalid(
-                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
-          }
-          const ByteArray src{view};
-
-          uint32_t common_prefix_length = 0;
-          const uint32_t len = src.len;
-          const uint32_t maximum_common_prefix_length = std::min(previous_len, len);
-          while (common_prefix_length < maximum_common_prefix_length) {
-            if (last_value_view[common_prefix_length] != view[common_prefix_length]) {
-              break;
-            }
-            common_prefix_length++;
-          }
-          previous_len = len;
-          prefix_length_encoder_.Put({static_cast<int32_t>(common_prefix_length)}, 1);
-
-          last_value_view = view;
-          const auto suffix_length = static_cast<uint32_t>(len - common_prefix_length);
-          if (suffix_length == 0) {
-            suffix_encoder_.Put(&empty_, 1);
-            return Status::OK();
-          }
-          const uint8_t* suffix_ptr = src.ptr + common_prefix_length;
-          // Convert to ByteArray, so it can be passed to the suffix_encoder_.
-          const ByteArray suffix(suffix_length, suffix_ptr);
-          suffix_encoder_.Put(&suffix, 1);
-
-          return Status::OK();
-        },
-        []() { return Status::OK(); }));
-    last_value_ = last_value_view;
-  }
-
-  ::arrow::BufferBuilder sink_;
-  DeltaBitPackEncoder<Int32Type> prefix_length_encoder_;
-  DeltaLengthByteArrayEncoder suffix_encoder_;
-  std::string last_value_;
-  const ByteArray empty_;
-  std::unique_ptr<ResizableBuffer> buffer_;
-};
-
-struct ByteArrayVisitor {
-  const ByteArray* src;
-
-  std::string_view operator[](int i) const {
-    if (ARROW_PREDICT_FALSE(src[i].len >= kMaxByteArraySize)) {
-      throw ParquetException("Parquet cannot store strings with size 2GB or more, got: ",
-                             src[i].len);
-    }
-    return std::string_view{src[i]};
-  }
-
-  uint32_t len(int i) const { return src[i].len; }
-};
-
-struct FLBAVisitor {
-  const FLBA* src;
-  const uint32_t type_length;
-
-  std::string_view operator[](int i) const {
-    return std::string_view{reinterpret_cast<const char*>(src[i].ptr), type_length};
-  }
-
-  uint32_t len(int i) const { return type_length; }
-};
-
-template <>
-void DeltaByteArrayEncoder<ByteArrayType>::Put(const ByteArray* src, int num_values) {
-  auto visitor = ByteArrayVisitor{src};
-  PutInternal<ByteArrayVisitor>(src, num_values, visitor);
-}
-
-template <>
-void DeltaByteArrayEncoder<FLBAType>::Put(const FLBA* src, int num_values) {
-  auto visitor = FLBAVisitor{src, static_cast<uint32_t>(descr_->type_length())};
-  PutInternal<FLBAVisitor>(src, num_values, visitor);
-}
-
-template <typename DType>
-void DeltaByteArrayEncoder<DType>::Put(const ::arrow::Array& values) {
-  if (::arrow::is_binary_like(values.type_id())) {
-    PutBinaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
-  } else if (::arrow::is_large_binary_like(values.type_id())) {
-    PutBinaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
-  } else if (::arrow::is_fixed_size_binary(values.type_id())) {
-    PutBinaryArray(checked_cast<const ::arrow::FixedSizeBinaryArray&>(values));
-  } else {
-    throw ParquetException("Only BaseBinaryArray and subclasses supported");
-  }
-}
-
-template <typename DType>
-std::shared_ptr<Buffer> DeltaByteArrayEncoder<DType>::FlushValues() {
-  PARQUET_THROW_NOT_OK(sink_.Resize(EstimatedDataEncodedSize(), false));
-
-  std::shared_ptr<Buffer> prefix_lengths = prefix_length_encoder_.FlushValues();
-  PARQUET_THROW_NOT_OK(sink_.Append(prefix_lengths->data(), prefix_lengths->size()));
-
-  std::shared_ptr<Buffer> suffixes = suffix_encoder_.FlushValues();
-  PARQUET_THROW_NOT_OK(sink_.Append(suffixes->data(), suffixes->size()));
-
-  std::shared_ptr<Buffer> buffer;
-  PARQUET_THROW_NOT_OK(sink_.Finish(&buffer, true));
-  last_value_.clear();
-  return buffer;
-}
-
-// ----------------------------------------------------------------------
-// DeltaByteArrayDecoder
+// DELTA_BYTE_ARRAY decoder
 
 template <typename DType>
-class DeltaByteArrayDecoderImpl : public DecoderImpl, virtual public TypedDecoder<DType> {
+class DeltaByteArrayDecoderImpl : public DecoderImpl, public TypedDecoderImpl<DType> {
   using T = typename DType::c_type;
 
  public:
@@ -3575,7 +1979,7 @@ class DeltaByteArrayDecoderImpl : public DecoderImpl, virtual public TypedDecode
     }
     PARQUET_THROW_NOT_OK(buffered_data_->Resize(data_size));
 
-    string_view prefix{last_value_};
+    std::string_view prefix{last_value_};
     uint8_t* data_ptr = buffered_data_->mutable_data();
     if (max_values > 0) {
       BuildBufferInternal</*is_first_run=*/true>(prefix_len_ptr, 0, buffer, &prefix,
@@ -3643,6 +2047,8 @@ class DeltaByteArrayDecoderImpl : public DecoderImpl, virtual public TypedDecode
   int num_valid_values_{0};
   uint32_t prefix_len_offset_{0};
   std::shared_ptr<ResizableBuffer> buffered_prefix_length_;
+  // buffer for decoded strings, which gurantees the lifetime of the decoded strings
+  // until the next call of Decode.
   std::shared_ptr<ResizableBuffer> buffered_data_;
 };
 
@@ -3683,8 +2089,7 @@ class DeltaByteArrayFLBADecoder : public DeltaByteArrayDecoderImpl<FLBAType>,
 // BYTE_STREAM_SPLIT decoders
 
 template <typename DType>
-class ByteStreamSplitDecoderBase : public DecoderImpl,
-                                   virtual public TypedDecoder<DType> {
+class ByteStreamSplitDecoderBase : public DecoderImpl, public TypedDecoderImpl<DType> {
  public:
   using T = typename DType::c_type;
 
@@ -3852,110 +2257,7 @@ class ByteStreamSplitDecoder<FLBAType> : public ByteStreamSplitDecoderBase<FLBAT
 }  // namespace
 
 // ----------------------------------------------------------------------
-// Encoder and decoder factory functions
-
-std::unique_ptr<Encoder> MakeEncoder(Type::type type_num, Encoding::type encoding,
-                                     bool use_dictionary, const ColumnDescriptor* descr,
-                                     MemoryPool* pool) {
-  if (use_dictionary) {
-    switch (type_num) {
-      case Type::INT32:
-        return std::make_unique<DictEncoderImpl<Int32Type>>(descr, pool);
-      case Type::INT64:
-        return std::make_unique<DictEncoderImpl<Int64Type>>(descr, pool);
-      case Type::INT96:
-        return std::make_unique<DictEncoderImpl<Int96Type>>(descr, pool);
-      case Type::FLOAT:
-        return std::make_unique<DictEncoderImpl<FloatType>>(descr, pool);
-      case Type::DOUBLE:
-        return std::make_unique<DictEncoderImpl<DoubleType>>(descr, pool);
-      case Type::BYTE_ARRAY:
-        return std::make_unique<DictEncoderImpl<ByteArrayType>>(descr, pool);
-      case Type::FIXED_LEN_BYTE_ARRAY:
-        return std::make_unique<DictEncoderImpl<FLBAType>>(descr, pool);
-      default:
-        DCHECK(false) << "Encoder not implemented";
-        break;
-    }
-  } else if (encoding == Encoding::PLAIN) {
-    switch (type_num) {
-      case Type::BOOLEAN:
-        return std::make_unique<PlainEncoder<BooleanType>>(descr, pool);
-      case Type::INT32:
-        return std::make_unique<PlainEncoder<Int32Type>>(descr, pool);
-      case Type::INT64:
-        return std::make_unique<PlainEncoder<Int64Type>>(descr, pool);
-      case Type::INT96:
-        return std::make_unique<PlainEncoder<Int96Type>>(descr, pool);
-      case Type::FLOAT:
-        return std::make_unique<PlainEncoder<FloatType>>(descr, pool);
-      case Type::DOUBLE:
-        return std::make_unique<PlainEncoder<DoubleType>>(descr, pool);
-      case Type::BYTE_ARRAY:
-        return std::make_unique<PlainEncoder<ByteArrayType>>(descr, pool);
-      case Type::FIXED_LEN_BYTE_ARRAY:
-        return std::make_unique<PlainEncoder<FLBAType>>(descr, pool);
-      default:
-        DCHECK(false) << "Encoder not implemented";
-        break;
-    }
-  } else if (encoding == Encoding::BYTE_STREAM_SPLIT) {
-    switch (type_num) {
-      case Type::INT32:
-        return std::make_unique<ByteStreamSplitEncoder<Int32Type>>(descr, pool);
-      case Type::INT64:
-        return std::make_unique<ByteStreamSplitEncoder<Int64Type>>(descr, pool);
-      case Type::FLOAT:
-        return std::make_unique<ByteStreamSplitEncoder<FloatType>>(descr, pool);
-      case Type::DOUBLE:
-        return std::make_unique<ByteStreamSplitEncoder<DoubleType>>(descr, pool);
-      case Type::FIXED_LEN_BYTE_ARRAY:
-        return std::make_unique<ByteStreamSplitEncoder<FLBAType>>(descr, pool);
-      default:
-        throw ParquetException(
-            "BYTE_STREAM_SPLIT only supports FLOAT, DOUBLE, INT32, INT64 "
-            "and FIXED_LEN_BYTE_ARRAY");
-    }
-  } else if (encoding == Encoding::DELTA_BINARY_PACKED) {
-    switch (type_num) {
-      case Type::INT32:
-        return std::make_unique<DeltaBitPackEncoder<Int32Type>>(descr, pool);
-      case Type::INT64:
-        return std::make_unique<DeltaBitPackEncoder<Int64Type>>(descr, pool);
-      default:
-        throw ParquetException(
-            "DELTA_BINARY_PACKED encoder only supports INT32 and INT64");
-    }
-  } else if (encoding == Encoding::DELTA_LENGTH_BYTE_ARRAY) {
-    switch (type_num) {
-      case Type::BYTE_ARRAY:
-        return std::make_unique<DeltaLengthByteArrayEncoder>(descr, pool);
-      default:
-        throw ParquetException("DELTA_LENGTH_BYTE_ARRAY only supports BYTE_ARRAY");
-    }
-  } else if (encoding == Encoding::RLE) {
-    switch (type_num) {
-      case Type::BOOLEAN:
-        return std::make_unique<RleBooleanEncoder>(descr, pool);
-      default:
-        throw ParquetException("RLE only supports BOOLEAN");
-    }
-  } else if (encoding == Encoding::DELTA_BYTE_ARRAY) {
-    switch (type_num) {
-      case Type::BYTE_ARRAY:
-        return std::make_unique<DeltaByteArrayEncoder<ByteArrayType>>(descr, pool);
-      case Type::FIXED_LEN_BYTE_ARRAY:
-        return std::make_unique<DeltaByteArrayEncoder<FLBAType>>(descr, pool);
-      default:
-        throw ParquetException(
-            "DELTA_BYTE_ARRAY only supports BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY");
-    }
-  } else {
-    ParquetException::NYI("Selected encoding is not supported");
-  }
-  DCHECK(false) << "Should not be able to reach this code";
-  return nullptr;
-}
+// Factory functions
 
 std::unique_ptr<Decoder> MakeDecoder(Type::type type_num, Encoding::type encoding,
                                      const ColumnDescriptor* descr,
diff --git a/cpp/src/parquet/encoder.cc b/cpp/src/parquet/encoder.cc
new file mode 100644
index 0000000000000..89d5d44c5219c
--- /dev/null
+++ b/cpp/src/parquet/encoder.cc
@@ -0,0 +1,1783 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/encoding.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <cstdlib>
+#include <limits>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/stl_allocator.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_stream_utils_internal.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/byte_stream_split_internal.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/hashing.h"
+#include "arrow/util/int_util_overflow.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/rle_encoding_internal.h"
+#include "arrow/util/spaced.h"
+#include "arrow/util/ubsan.h"
+#include "arrow/visit_data_inline.h"
+
+#include "parquet/exception.h"
+#include "parquet/platform.h"
+#include "parquet/schema.h"
+#include "parquet/types.h"
+
+namespace bit_util = arrow::bit_util;
+
+using arrow::Status;
+using arrow::internal::AddWithOverflow;
+using arrow::internal::checked_cast;
+using arrow::internal::SafeSignedSubtract;
+using arrow::util::SafeLoad;
+using arrow::util::SafeLoadAs;
+
+template <typename T>
+using ArrowPoolVector = std::vector<T, ::arrow::stl::allocator<T>>;
+
+namespace parquet {
+namespace {
+
+// The Parquet spec isn't very clear whether ByteArray lengths are signed or
+// unsigned, but the Java implementation uses signed ints.
+constexpr size_t kMaxByteArraySize = std::numeric_limits<int32_t>::max();
+
+class EncoderImpl : virtual public Encoder {
+ public:
+  EncoderImpl(const ColumnDescriptor* descr, Encoding::type encoding, MemoryPool* pool)
+      : descr_(descr),
+        encoding_(encoding),
+        pool_(pool),
+        type_length_(descr ? descr->type_length() : -1) {}
+
+  Encoding::type encoding() const override { return encoding_; }
+
+  MemoryPool* memory_pool() const override { return pool_; }
+
+ protected:
+  // For accessing type-specific metadata, like FIXED_LEN_BYTE_ARRAY
+  const ColumnDescriptor* descr_;
+  const Encoding::type encoding_;
+  MemoryPool* pool_;
+
+  /// Type length from descr
+  const int type_length_;
+};
+
+// ----------------------------------------------------------------------
+// PLAIN encoder
+
+template <typename DType>
+class PlainEncoder : public EncoderImpl, virtual public TypedEncoder<DType> {
+ public:
+  using T = typename DType::c_type;
+
+  explicit PlainEncoder(const ColumnDescriptor* descr, MemoryPool* pool)
+      : EncoderImpl(descr, Encoding::PLAIN, pool), sink_(pool) {}
+
+  int64_t EstimatedDataEncodedSize() override { return sink_.length(); }
+
+  std::shared_ptr<Buffer> FlushValues() override {
+    std::shared_ptr<Buffer> buffer;
+    PARQUET_THROW_NOT_OK(sink_.Finish(&buffer));
+    return buffer;
+  }
+
+  using TypedEncoder<DType>::Put;
+
+  void Put(const T* buffer, int num_values) override;
+
+  void Put(const ::arrow::Array& values) override;
+
+  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override {
+    if (valid_bits != NULLPTR) {
+      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
+                                                                   this->memory_pool()));
+      T* data = buffer->template mutable_data_as<T>();
+      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
+          src, num_values, valid_bits, valid_bits_offset, data);
+      Put(data, num_valid_values);
+    } else {
+      Put(src, num_values);
+    }
+  }
+
+  void UnsafePutByteArray(const void* data, uint32_t length) {
+    DCHECK(length == 0 || data != nullptr) << "Value ptr cannot be NULL";
+    sink_.UnsafeAppend(&length, sizeof(uint32_t));
+    sink_.UnsafeAppend(data, static_cast<int64_t>(length));
+  }
+
+  void Put(const ByteArray& val) {
+    // Write the result to the output stream
+    const int64_t increment = static_cast<int64_t>(val.len + sizeof(uint32_t));
+    if (ARROW_PREDICT_FALSE(sink_.length() + increment > sink_.capacity())) {
+      PARQUET_THROW_NOT_OK(sink_.Reserve(increment));
+    }
+    UnsafePutByteArray(val.ptr, val.len);
+  }
+
+ protected:
+  template <typename ArrayType>
+  void PutBinaryArray(const ArrayType& array) {
+    const int64_t total_bytes =
+        array.value_offset(array.length()) - array.value_offset(0);
+    PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes + array.length() * sizeof(uint32_t)));
+
+    PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<typename ArrayType::TypeClass>(
+        *array.data(),
+        [&](::std::string_view view) {
+          if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
+            return Status::Invalid(
+                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
+          }
+          UnsafePutByteArray(view.data(), static_cast<uint32_t>(view.size()));
+          return Status::OK();
+        },
+        []() { return Status::OK(); }));
+  }
+
+  ::arrow::BufferBuilder sink_;
+};
+
+template <typename DType>
+void PlainEncoder<DType>::Put(const T* buffer, int num_values) {
+  if (num_values > 0) {
+    PARQUET_THROW_NOT_OK(sink_.Append(buffer, num_values * sizeof(T)));
+  }
+}
+
+template <>
+inline void PlainEncoder<ByteArrayType>::Put(const ByteArray* src, int num_values) {
+  for (int i = 0; i < num_values; ++i) {
+    Put(src[i]);
+  }
+}
+
+template <typename ArrayType>
+void DirectPutImpl(const ::arrow::Array& values, ::arrow::BufferBuilder* sink) {
+  if (values.type_id() != ArrayType::TypeClass::type_id) {
+    std::string type_name = ArrayType::TypeClass::type_name();
+    throw ParquetException("direct put to " + type_name + " from " +
+                           values.type()->ToString() + " not supported");
+  }
+
+  using value_type = typename ArrayType::value_type;
+  constexpr auto value_size = sizeof(value_type);
+  auto raw_values = checked_cast<const ArrayType&>(values).raw_values();
+
+  if (values.null_count() == 0) {
+    // no nulls, just dump the data
+    PARQUET_THROW_NOT_OK(sink->Append(raw_values, values.length() * value_size));
+  } else {
+    PARQUET_THROW_NOT_OK(
+        sink->Reserve((values.length() - values.null_count()) * value_size));
+
+    for (int64_t i = 0; i < values.length(); i++) {
+      if (values.IsValid(i)) {
+        sink->UnsafeAppend(&raw_values[i], value_size);
+      }
+    }
+  }
+}
+
+template <>
+void PlainEncoder<Int32Type>::Put(const ::arrow::Array& values) {
+  DirectPutImpl<::arrow::Int32Array>(values, &sink_);
+}
+
+template <>
+void PlainEncoder<Int64Type>::Put(const ::arrow::Array& values) {
+  DirectPutImpl<::arrow::Int64Array>(values, &sink_);
+}
+
+template <>
+void PlainEncoder<Int96Type>::Put(const ::arrow::Array& values) {
+  ParquetException::NYI("direct put to Int96");
+}
+
+template <>
+void PlainEncoder<FloatType>::Put(const ::arrow::Array& values) {
+  DirectPutImpl<::arrow::FloatArray>(values, &sink_);
+}
+
+template <>
+void PlainEncoder<DoubleType>::Put(const ::arrow::Array& values) {
+  DirectPutImpl<::arrow::DoubleArray>(values, &sink_);
+}
+
+template <typename DType>
+void PlainEncoder<DType>::Put(const ::arrow::Array& values) {
+  ParquetException::NYI("direct put of " + values.type()->ToString());
+}
+
+void AssertBaseBinary(const ::arrow::Array& values) {
+  if (!::arrow::is_base_binary_like(values.type_id())) {
+    throw ParquetException("Only BaseBinaryArray and subclasses supported");
+  }
+}
+
+template <>
+inline void PlainEncoder<ByteArrayType>::Put(const ::arrow::Array& values) {
+  AssertBaseBinary(values);
+
+  if (::arrow::is_binary_like(values.type_id())) {
+    PutBinaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
+  } else {
+    DCHECK(::arrow::is_large_binary_like(values.type_id()));
+    PutBinaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
+  }
+}
+
+void AssertFixedSizeBinary(const ::arrow::Array& values, int type_length) {
+  if (!::arrow::is_fixed_size_binary(values.type_id())) {
+    throw ParquetException("Only FixedSizeBinaryArray and subclasses supported");
+  }
+  if (checked_cast<const ::arrow::FixedSizeBinaryType&>(*values.type()).byte_width() !=
+      type_length) {
+    throw ParquetException("Size mismatch: " + values.type()->ToString() +
+                           " should have been " + std::to_string(type_length) + " wide");
+  }
+}
+
+template <>
+inline void PlainEncoder<FLBAType>::Put(const ::arrow::Array& values) {
+  AssertFixedSizeBinary(values, descr_->type_length());
+  const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values);
+
+  if (data.null_count() == 0) {
+    // no nulls, just dump the data
+    PARQUET_THROW_NOT_OK(
+        sink_.Append(data.raw_values(), data.length() * data.byte_width()));
+  } else {
+    const int64_t total_bytes =
+        data.length() * data.byte_width() - data.null_count() * data.byte_width();
+    PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes));
+    for (int64_t i = 0; i < data.length(); i++) {
+      if (data.IsValid(i)) {
+        sink_.UnsafeAppend(data.Value(i), data.byte_width());
+      }
+    }
+  }
+}
+
+template <>
+inline void PlainEncoder<FLBAType>::Put(const FixedLenByteArray* src, int num_values) {
+  if (descr_->type_length() == 0) {
+    return;
+  }
+  for (int i = 0; i < num_values; ++i) {
+    // Write the result to the output stream
+    DCHECK(src[i].ptr != nullptr) << "Value ptr cannot be NULL";
+    PARQUET_THROW_NOT_OK(sink_.Append(src[i].ptr, descr_->type_length()));
+  }
+}
+
+template <>
+class PlainEncoder<BooleanType> : public EncoderImpl, virtual public BooleanEncoder {
+ public:
+  explicit PlainEncoder(const ColumnDescriptor* descr, MemoryPool* pool)
+      : EncoderImpl(descr, Encoding::PLAIN, pool), sink_(pool) {}
+
+  int64_t EstimatedDataEncodedSize() override;
+  std::shared_ptr<Buffer> FlushValues() override;
+
+  void Put(const bool* src, int num_values) override;
+
+  void Put(const std::vector<bool>& src, int num_values) override;
+
+  void PutSpaced(const bool* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override {
+    if (valid_bits != NULLPTR) {
+      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
+                                                                   this->memory_pool()));
+      T* data = buffer->mutable_data_as<T>();
+      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
+          src, num_values, valid_bits, valid_bits_offset, data);
+      Put(data, num_valid_values);
+    } else {
+      Put(src, num_values);
+    }
+  }
+
+  void Put(const ::arrow::Array& values) override {
+    if (values.type_id() != ::arrow::Type::BOOL) {
+      throw ParquetException("direct put to boolean from " + values.type()->ToString() +
+                             " not supported");
+    }
+    const auto& data = checked_cast<const ::arrow::BooleanArray&>(values);
+
+    if (data.null_count() == 0) {
+      // no nulls, just dump the data
+      PARQUET_THROW_NOT_OK(sink_.Reserve(data.length()));
+      sink_.UnsafeAppend(data.data()->GetValues<uint8_t>(1, 0), data.offset(),
+                         data.length());
+    } else {
+      PARQUET_THROW_NOT_OK(sink_.Reserve(data.length() - data.null_count()));
+      for (int64_t i = 0; i < data.length(); i++) {
+        if (data.IsValid(i)) {
+          sink_.UnsafeAppend(data.Value(i));
+        }
+      }
+    }
+  }
+
+ private:
+  ::arrow::TypedBufferBuilder<bool> sink_;
+
+  template <typename SequenceType>
+  void PutImpl(const SequenceType& src, int num_values);
+};
+
+template <typename SequenceType>
+void PlainEncoder<BooleanType>::PutImpl(const SequenceType& src, int num_values) {
+  PARQUET_THROW_NOT_OK(sink_.Reserve(num_values));
+  for (int i = 0; i < num_values; ++i) {
+    sink_.UnsafeAppend(src[i]);
+  }
+}
+
+int64_t PlainEncoder<BooleanType>::EstimatedDataEncodedSize() {
+  return ::arrow::bit_util::BytesForBits(sink_.length());
+}
+
+std::shared_ptr<Buffer> PlainEncoder<BooleanType>::FlushValues() {
+  std::shared_ptr<Buffer> buffer;
+  PARQUET_THROW_NOT_OK(sink_.Finish(&buffer));
+  return buffer;
+}
+
+void PlainEncoder<BooleanType>::Put(const bool* src, int num_values) {
+  PutImpl(src, num_values);
+}
+
+void PlainEncoder<BooleanType>::Put(const std::vector<bool>& src, int num_values) {
+  PutImpl(src, num_values);
+}
+
+// ----------------------------------------------------------------------
+// DictEncoder<T> implementations
+
+template <typename DType>
+struct DictEncoderTraits {
+  using c_type = typename DType::c_type;
+  using MemoTableType = ::arrow::internal::ScalarMemoTable<c_type>;
+};
+
+template <>
+struct DictEncoderTraits<ByteArrayType> {
+  using MemoTableType = ::arrow::internal::BinaryMemoTable<::arrow::BinaryBuilder>;
+};
+
+template <>
+struct DictEncoderTraits<FLBAType> {
+  using MemoTableType = ::arrow::internal::BinaryMemoTable<::arrow::BinaryBuilder>;
+};
+
+// Initially 1024 elements
+static constexpr int32_t kInitialHashTableSize = 1 << 10;
+
+int RlePreserveBufferSize(int num_values, int bit_width) {
+  // Note: because of the way RleEncoder::CheckBufferFull()
+  // is called, we have to reserve an extra "RleEncoder::MinBufferSize"
+  // bytes. These extra bytes won't be used but not reserving them
+  // would cause the encoder to fail.
+  return ::arrow::util::RleEncoder::MaxBufferSize(bit_width, num_values) +
+         ::arrow::util::RleEncoder::MinBufferSize(bit_width);
+}
+
+/// See the dictionary encoding section of
+/// https://github.com/Parquet/parquet-format.  The encoding supports
+/// streaming encoding. Values are encoded as they are added while the
+/// dictionary is being constructed. At any time, the buffered values
+/// can be written out with the current dictionary size. More values
+/// can then be added to the encoder, including new dictionary
+/// entries.
+template <typename DType>
+class DictEncoderImpl : public EncoderImpl, virtual public DictEncoder<DType> {
+  using MemoTableType = typename DictEncoderTraits<DType>::MemoTableType;
+
+ public:
+  typedef typename DType::c_type T;
+
+  /// In data page, the bit width used to encode the entry
+  /// ids stored as 1 byte (max bit width = 32).
+  constexpr static int32_t kDataPageBitWidthBytes = 1;
+
+  explicit DictEncoderImpl(const ColumnDescriptor* desc, MemoryPool* pool)
+      : EncoderImpl(desc, Encoding::RLE_DICTIONARY, pool),
+        buffered_indices_(::arrow::stl::allocator<int32_t>(pool)),
+        dict_encoded_size_(0),
+        memo_table_(pool, kInitialHashTableSize) {}
+
+  ~DictEncoderImpl() override = default;
+
+  int dict_encoded_size() const override { return dict_encoded_size_; }
+
+  int WriteIndices(uint8_t* buffer, int buffer_len) override {
+    // Write bit width in first byte
+    *buffer = static_cast<uint8_t>(bit_width());
+    ++buffer;
+    --buffer_len;
+
+    ::arrow::util::RleEncoder encoder(buffer, buffer_len, bit_width());
+
+    for (int32_t index : buffered_indices_) {
+      if (ARROW_PREDICT_FALSE(!encoder.Put(index))) return -1;
+    }
+    encoder.Flush();
+
+    ClearIndices();
+    return kDataPageBitWidthBytes + encoder.len();
+  }
+
+  /// Returns a conservative estimate of the number of bytes needed to encode the buffered
+  /// indices. Used to size the buffer passed to WriteIndices().
+  int64_t EstimatedDataEncodedSize() override {
+    return kDataPageBitWidthBytes +
+           RlePreserveBufferSize(static_cast<int>(buffered_indices_.size()), bit_width());
+  }
+
+  /// The minimum bit width required to encode the currently buffered indices.
+  int bit_width() const override {
+    if (ARROW_PREDICT_FALSE(num_entries() == 0)) return 0;
+    if (ARROW_PREDICT_FALSE(num_entries() == 1)) return 1;
+    return bit_util::Log2(num_entries());
+  }
+
+  /// Encode value. Note that this does not actually write any data, just
+  /// buffers the value's index to be written later.
+  inline void Put(const T& value);
+
+  // Not implemented for other data types
+  inline void PutByteArray(const void* ptr, int32_t length);
+
+  void Put(const T* src, int num_values) override {
+    for (int32_t i = 0; i < num_values; i++) {
+      Put(SafeLoad(src + i));
+    }
+  }
+
+  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override {
+    ::arrow::internal::VisitSetBitRunsVoid(valid_bits, valid_bits_offset, num_values,
+                                           [&](int64_t position, int64_t length) {
+                                             for (int64_t i = 0; i < length; i++) {
+                                               Put(SafeLoad(src + i + position));
+                                             }
+                                           });
+  }
+
+  using TypedEncoder<DType>::Put;
+
+  void Put(const ::arrow::Array& values) override;
+  void PutDictionary(const ::arrow::Array& values) override;
+
+  template <typename ArrowType, typename T = typename ArrowType::c_type>
+  void PutIndicesTyped(const ::arrow::Array& data) {
+    auto values = data.data()->GetValues<T>(1);
+    size_t buffer_position = buffered_indices_.size();
+    buffered_indices_.resize(buffer_position +
+                             static_cast<size_t>(data.length() - data.null_count()));
+    ::arrow::internal::VisitSetBitRunsVoid(
+        data.null_bitmap_data(), data.offset(), data.length(),
+        [&](int64_t position, int64_t length) {
+          for (int64_t i = 0; i < length; ++i) {
+            buffered_indices_[buffer_position++] =
+                static_cast<int32_t>(values[i + position]);
+          }
+        });
+  }
+
+  void PutIndices(const ::arrow::Array& data) override {
+    switch (data.type()->id()) {
+      case ::arrow::Type::UINT8:
+      case ::arrow::Type::INT8:
+        return PutIndicesTyped<::arrow::UInt8Type>(data);
+      case ::arrow::Type::UINT16:
+      case ::arrow::Type::INT16:
+        return PutIndicesTyped<::arrow::UInt16Type>(data);
+      case ::arrow::Type::UINT32:
+      case ::arrow::Type::INT32:
+        return PutIndicesTyped<::arrow::UInt32Type>(data);
+      case ::arrow::Type::UINT64:
+      case ::arrow::Type::INT64:
+        return PutIndicesTyped<::arrow::UInt64Type>(data);
+      default:
+        throw ParquetException("Passed non-integer array to PutIndices");
+    }
+  }
+
+  std::shared_ptr<Buffer> FlushValues() override {
+    std::shared_ptr<ResizableBuffer> buffer =
+        AllocateBuffer(this->pool_, EstimatedDataEncodedSize());
+    int result_size = WriteIndices(buffer->mutable_data(),
+                                   static_cast<int>(EstimatedDataEncodedSize()));
+    PARQUET_THROW_NOT_OK(buffer->Resize(result_size, false));
+    return buffer;
+  }
+
+  /// Writes out the encoded dictionary to buffer. buffer must be preallocated to
+  /// dict_encoded_size() bytes.
+  void WriteDict(uint8_t* buffer) const override;
+
+  /// The number of entries in the dictionary.
+  int num_entries() const override { return memo_table_.size(); }
+
+ private:
+  /// Clears all the indices (but leaves the dictionary).
+  void ClearIndices() { buffered_indices_.clear(); }
+
+  /// Indices that have not yet be written out by WriteIndices().
+  ArrowPoolVector<int32_t> buffered_indices_;
+
+  template <typename ArrayType>
+  void PutBinaryArray(const ArrayType& array) {
+    PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<typename ArrayType::TypeClass>(
+        *array.data(),
+        [&](::std::string_view view) {
+          if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
+            return Status::Invalid(
+                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
+          }
+          PutByteArray(view.data(), static_cast<uint32_t>(view.size()));
+          return Status::OK();
+        },
+        []() { return Status::OK(); }));
+  }
+
+  template <typename ArrayType>
+  void PutBinaryDictionaryArray(const ArrayType& array) {
+    DCHECK_EQ(array.null_count(), 0);
+    for (int64_t i = 0; i < array.length(); i++) {
+      auto v = array.GetView(i);
+      if (ARROW_PREDICT_FALSE(v.size() > kMaxByteArraySize)) {
+        throw ParquetException(
+            "Parquet cannot store strings with size 2GB or more, got: ", v.size());
+      }
+      dict_encoded_size_ += static_cast<int>(v.size() + sizeof(uint32_t));
+      int32_t unused_memo_index;
+      PARQUET_THROW_NOT_OK(memo_table_.GetOrInsert(
+          v.data(), static_cast<int32_t>(v.size()), &unused_memo_index));
+    }
+  }
+
+  /// The number of bytes needed to encode the dictionary.
+  int dict_encoded_size_;
+
+  MemoTableType memo_table_;
+};
+
+template <typename DType>
+void DictEncoderImpl<DType>::WriteDict(uint8_t* buffer) const {
+  // For primitive types, only a memcpy
+  DCHECK_EQ(static_cast<size_t>(dict_encoded_size_), sizeof(T) * memo_table_.size());
+  memo_table_.CopyValues(0 /* start_pos */, reinterpret_cast<T*>(buffer));
+}
+
+// ByteArray and FLBA already have the dictionary encoded in their data heaps
+template <>
+void DictEncoderImpl<ByteArrayType>::WriteDict(uint8_t* buffer) const {
+  memo_table_.VisitValues(0, [&buffer](::std::string_view v) {
+    uint32_t len = static_cast<uint32_t>(v.length());
+    memcpy(buffer, &len, sizeof(len));
+    buffer += sizeof(len);
+    memcpy(buffer, v.data(), len);
+    buffer += len;
+  });
+}
+
+template <>
+void DictEncoderImpl<FLBAType>::WriteDict(uint8_t* buffer) const {
+  memo_table_.VisitValues(0, [&](::std::string_view v) {
+    DCHECK_EQ(v.length(), static_cast<size_t>(type_length_));
+    memcpy(buffer, v.data(), type_length_);
+    buffer += type_length_;
+  });
+}
+
+template <typename DType>
+inline void DictEncoderImpl<DType>::Put(const T& v) {
+  // Put() implementation for primitive types
+  auto on_found = [](int32_t memo_index) {};
+  auto on_not_found = [this](int32_t memo_index) {
+    dict_encoded_size_ += static_cast<int>(sizeof(T));
+  };
+
+  int32_t memo_index;
+  PARQUET_THROW_NOT_OK(memo_table_.GetOrInsert(v, on_found, on_not_found, &memo_index));
+  buffered_indices_.push_back(memo_index);
+}
+
+template <typename DType>
+inline void DictEncoderImpl<DType>::PutByteArray(const void* ptr, int32_t length) {
+  DCHECK(false);
+}
+
+template <>
+inline void DictEncoderImpl<ByteArrayType>::PutByteArray(const void* ptr,
+                                                         int32_t length) {
+  static const uint8_t empty[] = {0};
+
+  auto on_found = [](int32_t memo_index) {};
+  auto on_not_found = [&](int32_t memo_index) {
+    dict_encoded_size_ += static_cast<int>(length + sizeof(uint32_t));
+  };
+
+  DCHECK(ptr != nullptr || length == 0);
+  ptr = (ptr != nullptr) ? ptr : empty;
+  int32_t memo_index;
+  PARQUET_THROW_NOT_OK(
+      memo_table_.GetOrInsert(ptr, length, on_found, on_not_found, &memo_index));
+  buffered_indices_.push_back(memo_index);
+}
+
+template <>
+inline void DictEncoderImpl<ByteArrayType>::Put(const ByteArray& val) {
+  return PutByteArray(val.ptr, static_cast<int32_t>(val.len));
+}
+
+template <>
+inline void DictEncoderImpl<FLBAType>::Put(const FixedLenByteArray& v) {
+  static const uint8_t empty[] = {0};
+
+  auto on_found = [](int32_t memo_index) {};
+  auto on_not_found = [this](int32_t memo_index) { dict_encoded_size_ += type_length_; };
+
+  DCHECK(v.ptr != nullptr || type_length_ == 0);
+  const void* ptr = (v.ptr != nullptr) ? v.ptr : empty;
+  int32_t memo_index;
+  PARQUET_THROW_NOT_OK(
+      memo_table_.GetOrInsert(ptr, type_length_, on_found, on_not_found, &memo_index));
+  buffered_indices_.push_back(memo_index);
+}
+
+template <>
+void DictEncoderImpl<Int96Type>::Put(const ::arrow::Array& values) {
+  ParquetException::NYI("Direct put to Int96");
+}
+
+template <>
+void DictEncoderImpl<Int96Type>::PutDictionary(const ::arrow::Array& values) {
+  ParquetException::NYI("Direct put to Int96");
+}
+
+template <typename DType>
+void DictEncoderImpl<DType>::Put(const ::arrow::Array& values) {
+  using ArrayType = typename ::arrow::CTypeTraits<typename DType::c_type>::ArrayType;
+  const auto& data = checked_cast<const ArrayType&>(values);
+  if (data.null_count() == 0) {
+    // no nulls, just dump the data
+    for (int64_t i = 0; i < data.length(); i++) {
+      Put(data.Value(i));
+    }
+  } else {
+    for (int64_t i = 0; i < data.length(); i++) {
+      if (data.IsValid(i)) {
+        Put(data.Value(i));
+      }
+    }
+  }
+}
+
+template <>
+void DictEncoderImpl<FLBAType>::Put(const ::arrow::Array& values) {
+  AssertFixedSizeBinary(values, type_length_);
+  const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values);
+  if (data.null_count() == 0) {
+    // no nulls, just dump the data
+    for (int64_t i = 0; i < data.length(); i++) {
+      Put(FixedLenByteArray(data.Value(i)));
+    }
+  } else {
+    std::vector<uint8_t> empty(type_length_, 0);
+    for (int64_t i = 0; i < data.length(); i++) {
+      if (data.IsValid(i)) {
+        Put(FixedLenByteArray(data.Value(i)));
+      }
+    }
+  }
+}
+
+template <>
+void DictEncoderImpl<ByteArrayType>::Put(const ::arrow::Array& values) {
+  AssertBaseBinary(values);
+  if (::arrow::is_binary_like(values.type_id())) {
+    PutBinaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
+  } else {
+    DCHECK(::arrow::is_large_binary_like(values.type_id()));
+    PutBinaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
+  }
+}
+
+template <typename DType>
+void AssertCanPutDictionary(DictEncoderImpl<DType>* encoder, const ::arrow::Array& dict) {
+  if (dict.null_count() > 0) {
+    throw ParquetException("Inserted dictionary cannot contain nulls");
+  }
+
+  if (encoder->num_entries() > 0) {
+    throw ParquetException("Can only call PutDictionary on an empty DictEncoder");
+  }
+}
+
+template <typename DType>
+void DictEncoderImpl<DType>::PutDictionary(const ::arrow::Array& values) {
+  AssertCanPutDictionary(this, values);
+
+  using ArrayType = typename ::arrow::CTypeTraits<typename DType::c_type>::ArrayType;
+  const auto& data = checked_cast<const ArrayType&>(values);
+
+  dict_encoded_size_ += static_cast<int>(sizeof(typename DType::c_type) * data.length());
+  for (int64_t i = 0; i < data.length(); i++) {
+    int32_t unused_memo_index;
+    PARQUET_THROW_NOT_OK(memo_table_.GetOrInsert(data.Value(i), &unused_memo_index));
+  }
+}
+
+template <>
+void DictEncoderImpl<FLBAType>::PutDictionary(const ::arrow::Array& values) {
+  AssertFixedSizeBinary(values, type_length_);
+  AssertCanPutDictionary(this, values);
+
+  const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values);
+
+  dict_encoded_size_ += static_cast<int>(type_length_ * data.length());
+  for (int64_t i = 0; i < data.length(); i++) {
+    int32_t unused_memo_index;
+    PARQUET_THROW_NOT_OK(
+        memo_table_.GetOrInsert(data.Value(i), type_length_, &unused_memo_index));
+  }
+}
+
+template <>
+void DictEncoderImpl<ByteArrayType>::PutDictionary(const ::arrow::Array& values) {
+  AssertBaseBinary(values);
+  AssertCanPutDictionary(this, values);
+
+  if (::arrow::is_binary_like(values.type_id())) {
+    PutBinaryDictionaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
+  } else {
+    DCHECK(::arrow::is_large_binary_like(values.type_id()));
+    PutBinaryDictionaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
+  }
+}
+
+// ----------------------------------------------------------------------
+// BYTE_STREAM_SPLIT encoder
+
+// Common base class for all types
+
+template <typename DType>
+class ByteStreamSplitEncoderBase : public EncoderImpl,
+                                   virtual public TypedEncoder<DType> {
+ public:
+  using T = typename DType::c_type;
+  using TypedEncoder<DType>::Put;
+
+  ByteStreamSplitEncoderBase(const ColumnDescriptor* descr, int byte_width,
+                             ::arrow::MemoryPool* pool)
+      : EncoderImpl(descr, Encoding::BYTE_STREAM_SPLIT, pool),
+        sink_{pool},
+        byte_width_(byte_width),
+        num_values_in_buffer_{0} {}
+
+  int64_t EstimatedDataEncodedSize() override { return sink_.length(); }
+
+  std::shared_ptr<Buffer> FlushValues() override {
+    if (byte_width_ == 1) {
+      // Special-cased fast path
+      PARQUET_ASSIGN_OR_THROW(auto buf, sink_.Finish());
+      return buf;
+    }
+    auto output_buffer = AllocateBuffer(this->memory_pool(), EstimatedDataEncodedSize());
+    uint8_t* output_buffer_raw = output_buffer->mutable_data();
+    const uint8_t* raw_values = sink_.data();
+    ::arrow::util::internal::ByteStreamSplitEncode(
+        raw_values, /*width=*/byte_width_, num_values_in_buffer_, output_buffer_raw);
+    sink_.Reset();
+    num_values_in_buffer_ = 0;
+    return output_buffer;
+  }
+
+  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override {
+    if (valid_bits != NULLPTR) {
+      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
+                                                                   this->memory_pool()));
+      T* data = buffer->template mutable_data_as<T>();
+      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
+          src, num_values, valid_bits, valid_bits_offset, data);
+      Put(data, num_valid_values);
+    } else {
+      Put(src, num_values);
+    }
+  }
+
+ protected:
+  ::arrow::BufferBuilder sink_;
+  // Required because type_length_ is only filled in for FLBA
+  const int byte_width_;
+  int64_t num_values_in_buffer_;
+};
+
+// BYTE_STREAM_SPLIT encoder implementation for FLOAT, DOUBLE, INT32, INT64
+
+template <typename DType>
+class ByteStreamSplitEncoder : public ByteStreamSplitEncoderBase<DType> {
+ public:
+  using T = typename DType::c_type;
+  using ArrowType = typename EncodingTraits<DType>::ArrowType;
+
+  ByteStreamSplitEncoder(const ColumnDescriptor* descr,
+                         ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
+      : ByteStreamSplitEncoderBase<DType>(descr,
+                                          /*byte_width=*/static_cast<int>(sizeof(T)),
+                                          pool) {}
+
+  // Inherit Put(const std::vector<T>&...)
+  using TypedEncoder<DType>::Put;
+
+  void Put(const T* buffer, int num_values) override {
+    if (num_values > 0) {
+      PARQUET_THROW_NOT_OK(
+          this->sink_.Append(reinterpret_cast<const uint8_t*>(buffer),
+                             num_values * static_cast<int64_t>(sizeof(T))));
+      this->num_values_in_buffer_ += num_values;
+    }
+  }
+
+  void Put(const ::arrow::Array& values) override {
+    if (values.type_id() != ArrowType::type_id) {
+      throw ParquetException(std::string() + "direct put from " +
+                             values.type()->ToString() + " not supported");
+    }
+    const auto& data = *values.data();
+    this->PutSpaced(data.GetValues<typename ArrowType::c_type>(1),
+                    static_cast<int>(data.length), data.GetValues<uint8_t>(0, 0),
+                    data.offset);
+  }
+};
+
+// BYTE_STREAM_SPLIT encoder implementation for FLBA
+
+template <>
+class ByteStreamSplitEncoder<FLBAType> : public ByteStreamSplitEncoderBase<FLBAType> {
+ public:
+  using DType = FLBAType;
+  using T = FixedLenByteArray;
+  using ArrowType = ::arrow::FixedSizeBinaryArray;
+
+  ByteStreamSplitEncoder(const ColumnDescriptor* descr,
+                         ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
+      : ByteStreamSplitEncoderBase<DType>(descr,
+                                          /*byte_width=*/descr->type_length(), pool) {}
+
+  // Inherit Put(const std::vector<T>&...)
+  using TypedEncoder<DType>::Put;
+
+  void Put(const T* buffer, int num_values) override {
+    if (byte_width_ > 0) {
+      const int64_t total_bytes = static_cast<int64_t>(num_values) * byte_width_;
+      PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes));
+      for (int i = 0; i < num_values; ++i) {
+        // Write the result to the output stream
+        DCHECK(buffer[i].ptr != nullptr) << "Value ptr cannot be NULL";
+        sink_.UnsafeAppend(buffer[i].ptr, byte_width_);
+      }
+    }
+    this->num_values_in_buffer_ += num_values;
+  }
+
+  void Put(const ::arrow::Array& values) override {
+    AssertFixedSizeBinary(values, byte_width_);
+    const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values);
+    if (data.null_count() == 0) {
+      // no nulls, just buffer the data
+      PARQUET_THROW_NOT_OK(sink_.Append(data.raw_values(), data.length() * byte_width_));
+      this->num_values_in_buffer_ += data.length();
+    } else {
+      const int64_t num_values = data.length() - data.null_count();
+      const int64_t total_bytes = num_values * byte_width_;
+      PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes));
+      // TODO use VisitSetBitRunsVoid
+      for (int64_t i = 0; i < data.length(); i++) {
+        if (data.IsValid(i)) {
+          sink_.UnsafeAppend(data.Value(i), byte_width_);
+        }
+      }
+      this->num_values_in_buffer_ += num_values;
+    }
+  }
+};
+
+// ----------------------------------------------------------------------
+// DELTA_BINARY_PACKED encoder
+
+/// DeltaBitPackEncoder is an encoder for the DeltaBinary Packing format
+/// as per the parquet spec. See:
+/// https://github.com/apache/parquet-format/blob/master/Encodings.md#delta-encoding-delta_binary_packed--5
+///
+/// Consists of a header followed by blocks of delta encoded values binary packed.
+///
+///  Format
+///    [header] [block 1] [block 2] ... [block N]
+///
+///  Header
+///    [block size] [number of mini blocks per block] [total value count] [first value]
+///
+///  Block
+///    [min delta] [list of bitwidths of the mini blocks] [miniblocks]
+///
+/// Sets aside bytes at the start of the internal buffer where the header will be written,
+/// and only writes the header when FlushValues is called before returning it.
+///
+/// To encode a block, we will:
+///
+/// 1. Compute the differences between consecutive elements. For the first element in the
+/// block, use the last element in the previous block or, in the case of the first block,
+/// use the first value of the whole sequence, stored in the header.
+///
+/// 2. Compute the frame of reference (the minimum of the deltas in the block). Subtract
+/// this min delta from all deltas in the block. This guarantees that all values are
+/// non-negative.
+///
+/// 3. Encode the frame of reference (min delta) as a zigzag ULEB128 int followed by the
+/// bit widths of the mini blocks and the delta values (minus the min delta) bit packed
+/// per mini block.
+///
+/// Supports only INT32 and INT64.
+
+template <typename DType>
+class DeltaBitPackEncoder : public EncoderImpl, virtual public TypedEncoder<DType> {
+  // Maximum possible header size
+  static constexpr uint32_t kMaxPageHeaderWriterSize = 32;
+  static constexpr uint32_t kValuesPerBlock =
+      std::is_same_v<int32_t, typename DType::c_type> ? 128 : 256;
+  static constexpr uint32_t kMiniBlocksPerBlock = 4;
+
+ public:
+  using T = typename DType::c_type;
+  using UT = std::make_unsigned_t<T>;
+  using TypedEncoder<DType>::Put;
+
+  explicit DeltaBitPackEncoder(const ColumnDescriptor* descr, MemoryPool* pool,
+                               const uint32_t values_per_block = kValuesPerBlock,
+                               const uint32_t mini_blocks_per_block = kMiniBlocksPerBlock)
+      : EncoderImpl(descr, Encoding::DELTA_BINARY_PACKED, pool),
+        values_per_block_(values_per_block),
+        mini_blocks_per_block_(mini_blocks_per_block),
+        values_per_mini_block_(values_per_block / mini_blocks_per_block),
+        deltas_(values_per_block, ::arrow::stl::allocator<T>(pool)),
+        bits_buffer_(
+            AllocateBuffer(pool, (kMiniBlocksPerBlock + values_per_block) * sizeof(T))),
+        sink_(pool),
+        bit_writer_(bits_buffer_->mutable_data(),
+                    static_cast<int>(bits_buffer_->size())) {
+    if (values_per_block_ % 128 != 0) {
+      throw ParquetException(
+          "the number of values in a block must be multiple of 128, but it's " +
+          std::to_string(values_per_block_));
+    }
+    if (values_per_mini_block_ % 32 != 0) {
+      throw ParquetException(
+          "the number of values in a miniblock must be multiple of 32, but it's " +
+          std::to_string(values_per_mini_block_));
+    }
+    if (values_per_block % mini_blocks_per_block != 0) {
+      throw ParquetException(
+          "the number of values per block % number of miniblocks per block must be 0, "
+          "but it's " +
+          std::to_string(values_per_block % mini_blocks_per_block));
+    }
+    // Reserve enough space at the beginning of the buffer for largest possible header.
+    PARQUET_THROW_NOT_OK(sink_.Advance(kMaxPageHeaderWriterSize));
+  }
+
+  std::shared_ptr<Buffer> FlushValues() override;
+
+  int64_t EstimatedDataEncodedSize() override { return sink_.length(); }
+
+  void Put(const ::arrow::Array& values) override;
+
+  void Put(const T* buffer, int num_values) override;
+
+  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override;
+
+  void FlushBlock();
+
+ private:
+  const uint32_t values_per_block_;
+  const uint32_t mini_blocks_per_block_;
+  const uint32_t values_per_mini_block_;
+  uint32_t values_current_block_{0};
+  uint32_t total_value_count_{0};
+  T first_value_{0};
+  T current_value_{0};
+  ArrowPoolVector<T> deltas_;
+  std::shared_ptr<ResizableBuffer> bits_buffer_;
+  ::arrow::BufferBuilder sink_;
+  ::arrow::bit_util::BitWriter bit_writer_;
+};
+
+template <typename DType>
+void DeltaBitPackEncoder<DType>::Put(const T* src, int num_values) {
+  if (num_values == 0) {
+    return;
+  }
+
+  int idx = 0;
+  if (total_value_count_ == 0) {
+    current_value_ = src[0];
+    first_value_ = current_value_;
+    idx = 1;
+  }
+  total_value_count_ += num_values;
+
+  while (idx < num_values) {
+    T value = src[idx];
+    // Calculate deltas. The possible overflow is handled by use of unsigned integers
+    // making subtraction operations well-defined and correct even in case of overflow.
+    // Encoded integers will wrap back around on decoding.
+    // See http://en.wikipedia.org/wiki/Modular_arithmetic#Integers_modulo_n
+    deltas_[values_current_block_] = SafeSignedSubtract(value, current_value_);
+    current_value_ = value;
+    idx++;
+    values_current_block_++;
+    if (values_current_block_ == values_per_block_) {
+      FlushBlock();
+    }
+  }
+}
+
+template <typename DType>
+void DeltaBitPackEncoder<DType>::FlushBlock() {
+  if (values_current_block_ == 0) {
+    return;
+  }
+
+  // Calculate the frame of reference for this miniblock. This value will be subtracted
+  // from all deltas to guarantee all deltas are positive for encoding.
+  const T min_delta =
+      *std::min_element(deltas_.begin(), deltas_.begin() + values_current_block_);
+  bit_writer_.PutZigZagVlqInt(min_delta);
+
+  // Call to GetNextBytePtr reserves mini_blocks_per_block_ bytes of space to write
+  // bit widths of miniblocks as they become known during the encoding.
+  uint8_t* bit_width_data = bit_writer_.GetNextBytePtr(mini_blocks_per_block_);
+  DCHECK(bit_width_data != nullptr);
+
+  const uint32_t num_miniblocks =
+      static_cast<uint32_t>(std::ceil(static_cast<double>(values_current_block_) /
+                                      static_cast<double>(values_per_mini_block_)));
+  for (uint32_t i = 0; i < num_miniblocks; i++) {
+    const uint32_t values_current_mini_block =
+        std::min(values_per_mini_block_, values_current_block_);
+
+    const uint32_t start = i * values_per_mini_block_;
+    const T max_delta = *std::max_element(
+        deltas_.begin() + start, deltas_.begin() + start + values_current_mini_block);
+
+    // The minimum number of bits required to write any of values in deltas_ vector.
+    // See overflow comment above.
+    const auto bit_width = bit_width_data[i] = bit_util::NumRequiredBits(
+        static_cast<UT>(max_delta) - static_cast<UT>(min_delta));
+
+    for (uint32_t j = start; j < start + values_current_mini_block; j++) {
+      // Convert delta to frame of reference. See overflow comment above.
+      const UT value = static_cast<UT>(deltas_[j]) - static_cast<UT>(min_delta);
+      bit_writer_.PutValue(value, bit_width);
+    }
+    // If there are not enough values to fill the last mini block, we pad the mini block
+    // with zeroes so that its length is the number of values in a full mini block
+    // multiplied by the bit width.
+    for (uint32_t j = values_current_mini_block; j < values_per_mini_block_; j++) {
+      bit_writer_.PutValue(0, bit_width);
+    }
+    values_current_block_ -= values_current_mini_block;
+  }
+
+  // If, in the last block, less than <number of miniblocks in a block> miniblocks are
+  // needed to store the values, the bytes storing the bit widths of the unneeded
+  // miniblocks are still present, their value should be zero, but readers must accept
+  // arbitrary values as well.
+  for (uint32_t i = num_miniblocks; i < mini_blocks_per_block_; i++) {
+    bit_width_data[i] = 0;
+  }
+  DCHECK_EQ(values_current_block_, 0);
+
+  bit_writer_.Flush();
+  PARQUET_THROW_NOT_OK(sink_.Append(bit_writer_.buffer(), bit_writer_.bytes_written()));
+  bit_writer_.Clear();
+}
+
+template <typename DType>
+std::shared_ptr<Buffer> DeltaBitPackEncoder<DType>::FlushValues() {
+  if (values_current_block_ > 0) {
+    FlushBlock();
+  }
+  PARQUET_ASSIGN_OR_THROW(auto buffer, sink_.Finish(/*shrink_to_fit=*/true));
+
+  uint8_t header_buffer_[kMaxPageHeaderWriterSize] = {};
+  bit_util::BitWriter header_writer(header_buffer_, sizeof(header_buffer_));
+  if (!header_writer.PutVlqInt(values_per_block_) ||
+      !header_writer.PutVlqInt(mini_blocks_per_block_) ||
+      !header_writer.PutVlqInt(total_value_count_) ||
+      !header_writer.PutZigZagVlqInt(static_cast<T>(first_value_))) {
+    throw ParquetException("header writing error");
+  }
+  header_writer.Flush();
+
+  // We reserved enough space at the beginning of the buffer for largest possible header
+  // and data was written immediately after. We now write the header data immediately
+  // before the end of reserved space.
+  const size_t offset_bytes = kMaxPageHeaderWriterSize - header_writer.bytes_written();
+  std::memcpy(buffer->mutable_data() + offset_bytes, header_buffer_,
+              header_writer.bytes_written());
+
+  // Reset counter of cached values
+  total_value_count_ = 0;
+  // Reserve enough space at the beginning of the buffer for largest possible header.
+  PARQUET_THROW_NOT_OK(sink_.Advance(kMaxPageHeaderWriterSize));
+
+  // Excess bytes at the beginning are sliced off and ignored.
+  return SliceBuffer(buffer, offset_bytes);
+}
+
+template <>
+void DeltaBitPackEncoder<Int32Type>::Put(const ::arrow::Array& values) {
+  const ::arrow::ArrayData& data = *values.data();
+  if (values.type_id() != ::arrow::Type::INT32) {
+    throw ParquetException("Expected Int32TArray, got ", values.type()->ToString());
+  }
+  if (data.length > std::numeric_limits<int32_t>::max()) {
+    throw ParquetException("Array cannot be longer than ",
+                           std::numeric_limits<int32_t>::max());
+  }
+
+  if (values.null_count() == 0) {
+    Put(data.GetValues<int32_t>(1), static_cast<int>(data.length));
+  } else {
+    PutSpaced(data.GetValues<int32_t>(1), static_cast<int>(data.length),
+              data.GetValues<uint8_t>(0, 0), data.offset);
+  }
+}
+
+template <>
+void DeltaBitPackEncoder<Int64Type>::Put(const ::arrow::Array& values) {
+  const ::arrow::ArrayData& data = *values.data();
+  if (values.type_id() != ::arrow::Type::INT64) {
+    throw ParquetException("Expected Int64TArray, got ", values.type()->ToString());
+  }
+  if (data.length > std::numeric_limits<int32_t>::max()) {
+    throw ParquetException("Array cannot be longer than ",
+                           std::numeric_limits<int32_t>::max());
+  }
+  if (values.null_count() == 0) {
+    Put(data.GetValues<int64_t>(1), static_cast<int>(data.length));
+  } else {
+    PutSpaced(data.GetValues<int64_t>(1), static_cast<int>(data.length),
+              data.GetValues<uint8_t>(0, 0), data.offset);
+  }
+}
+
+template <typename DType>
+void DeltaBitPackEncoder<DType>::PutSpaced(const T* src, int num_values,
+                                           const uint8_t* valid_bits,
+                                           int64_t valid_bits_offset) {
+  if (valid_bits != NULLPTR) {
+    PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
+                                                                 this->memory_pool()));
+    T* data = buffer->template mutable_data_as<T>();
+    int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
+        src, num_values, valid_bits, valid_bits_offset, data);
+    Put(data, num_valid_values);
+  } else {
+    Put(src, num_values);
+  }
+}
+
+// ----------------------------------------------------------------------
+// DELTA_LENGTH_BYTE_ARRAY encoder
+
+class DeltaLengthByteArrayEncoder : public EncoderImpl,
+                                    virtual public TypedEncoder<ByteArrayType> {
+ public:
+  explicit DeltaLengthByteArrayEncoder(const ColumnDescriptor* descr, MemoryPool* pool)
+      : EncoderImpl(descr, Encoding::DELTA_LENGTH_BYTE_ARRAY,
+                    pool = ::arrow::default_memory_pool()),
+        sink_(pool),
+        length_encoder_(nullptr, pool) {}
+
+  std::shared_ptr<Buffer> FlushValues() override;
+
+  int64_t EstimatedDataEncodedSize() override {
+    return sink_.length() + length_encoder_.EstimatedDataEncodedSize();
+  }
+
+  using TypedEncoder<ByteArrayType>::Put;
+
+  void Put(const ::arrow::Array& values) override;
+
+  void Put(const T* buffer, int num_values) override;
+
+  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override;
+
+ protected:
+  template <typename ArrayType>
+  void PutBinaryArray(const ArrayType& array) {
+    PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<typename ArrayType::TypeClass>(
+        *array.data(),
+        [&](::std::string_view view) {
+          if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
+            return Status::Invalid(
+                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
+          }
+          if (ARROW_PREDICT_FALSE(
+                  view.size() + sink_.length() >
+                  static_cast<size_t>(std::numeric_limits<int32_t>::max()))) {
+            return Status::Invalid("excess expansion in DELTA_LENGTH_BYTE_ARRAY");
+          }
+          length_encoder_.Put({static_cast<int32_t>(view.length())}, 1);
+          PARQUET_THROW_NOT_OK(sink_.Append(view.data(), view.length()));
+          return Status::OK();
+        },
+        []() { return Status::OK(); }));
+  }
+
+  ::arrow::BufferBuilder sink_;
+  DeltaBitPackEncoder<Int32Type> length_encoder_;
+};
+
+void DeltaLengthByteArrayEncoder::Put(const ::arrow::Array& values) {
+  AssertBaseBinary(values);
+  if (::arrow::is_binary_like(values.type_id())) {
+    PutBinaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
+  } else {
+    PutBinaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
+  }
+}
+
+void DeltaLengthByteArrayEncoder::Put(const T* src, int num_values) {
+  if (num_values == 0) {
+    return;
+  }
+
+  constexpr int kBatchSize = 256;
+  std::array<int32_t, kBatchSize> lengths;
+  uint32_t total_increment_size = 0;
+  for (int idx = 0; idx < num_values; idx += kBatchSize) {
+    const int batch_size = std::min(kBatchSize, num_values - idx);
+    for (int j = 0; j < batch_size; ++j) {
+      const int32_t len = src[idx + j].len;
+      if (ARROW_PREDICT_FALSE(
+              AddWithOverflow(total_increment_size, len, &total_increment_size))) {
+        throw ParquetException("excess expansion in DELTA_LENGTH_BYTE_ARRAY");
+      }
+      lengths[j] = len;
+    }
+    length_encoder_.Put(lengths.data(), batch_size);
+  }
+  if (sink_.length() + total_increment_size > std::numeric_limits<int32_t>::max()) {
+    throw ParquetException("excess expansion in DELTA_LENGTH_BYTE_ARRAY");
+  }
+  PARQUET_THROW_NOT_OK(sink_.Reserve(total_increment_size));
+  for (int idx = 0; idx < num_values; idx++) {
+    sink_.UnsafeAppend(src[idx].ptr, src[idx].len);
+  }
+}
+
+void DeltaLengthByteArrayEncoder::PutSpaced(const T* src, int num_values,
+                                            const uint8_t* valid_bits,
+                                            int64_t valid_bits_offset) {
+  if (valid_bits != NULLPTR) {
+    PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
+                                                                 this->memory_pool()));
+    T* data = buffer->template mutable_data_as<T>();
+    int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
+        src, num_values, valid_bits, valid_bits_offset, data);
+    Put(data, num_valid_values);
+  } else {
+    Put(src, num_values);
+  }
+}
+
+std::shared_ptr<Buffer> DeltaLengthByteArrayEncoder::FlushValues() {
+  std::shared_ptr<Buffer> encoded_lengths = length_encoder_.FlushValues();
+
+  std::shared_ptr<Buffer> data;
+  PARQUET_THROW_NOT_OK(sink_.Finish(&data));
+  sink_.Reset();
+
+  PARQUET_THROW_NOT_OK(sink_.Resize(encoded_lengths->size() + data->size()));
+  PARQUET_THROW_NOT_OK(sink_.Append(encoded_lengths->data(), encoded_lengths->size()));
+  PARQUET_THROW_NOT_OK(sink_.Append(data->data(), data->size()));
+
+  std::shared_ptr<Buffer> buffer;
+  PARQUET_THROW_NOT_OK(sink_.Finish(&buffer, true));
+  return buffer;
+}
+
+// ----------------------------------------------------------------------
+// DELTA_BYTE_ARRAY encoder
+
+/// Delta Byte Array encoding also known as incremental encoding or front compression:
+/// for each element in a sequence of strings, store the prefix length of the previous
+/// entry plus the suffix.
+///
+/// This is stored as a sequence of delta-encoded prefix lengths (DELTA_BINARY_PACKED),
+/// followed by the suffixes encoded as delta length byte arrays
+/// (DELTA_LENGTH_BYTE_ARRAY).
+
+template <typename DType>
+class DeltaByteArrayEncoder : public EncoderImpl, virtual public TypedEncoder<DType> {
+  static constexpr std::string_view kEmpty = "";
+
+ public:
+  using T = typename DType::c_type;
+
+  explicit DeltaByteArrayEncoder(const ColumnDescriptor* descr,
+                                 MemoryPool* pool = ::arrow::default_memory_pool())
+      : EncoderImpl(descr, Encoding::DELTA_BYTE_ARRAY, pool),
+        sink_(pool),
+        prefix_length_encoder_(/*descr=*/nullptr, pool),
+        suffix_encoder_(descr, pool),
+        last_value_(""),
+        empty_(static_cast<uint32_t>(kEmpty.size()),
+               reinterpret_cast<const uint8_t*>(kEmpty.data())) {}
+
+  std::shared_ptr<Buffer> FlushValues() override;
+
+  int64_t EstimatedDataEncodedSize() override {
+    return prefix_length_encoder_.EstimatedDataEncodedSize() +
+           suffix_encoder_.EstimatedDataEncodedSize();
+  }
+
+  using TypedEncoder<DType>::Put;
+
+  void Put(const ::arrow::Array& values) override;
+
+  void Put(const T* buffer, int num_values) override;
+
+  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override {
+    if (valid_bits != nullptr) {
+      if (buffer_ == nullptr) {
+        PARQUET_ASSIGN_OR_THROW(buffer_,
+                                ::arrow::AllocateResizableBuffer(num_values * sizeof(T),
+                                                                 this->memory_pool()));
+      } else {
+        PARQUET_THROW_NOT_OK(buffer_->Resize(num_values * sizeof(T), false));
+      }
+      T* data = buffer_->mutable_data_as<T>();
+      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
+          src, num_values, valid_bits, valid_bits_offset, data);
+      Put(data, num_valid_values);
+    } else {
+      Put(src, num_values);
+    }
+  }
+
+ protected:
+  template <typename VisitorType>
+  void PutInternal(const T* src, int num_values, const VisitorType visitor) {
+    if (num_values == 0) {
+      return;
+    }
+
+    std::string_view last_value_view = last_value_;
+    constexpr int kBatchSize = 256;
+    std::array<int32_t, kBatchSize> prefix_lengths;
+    std::array<ByteArray, kBatchSize> suffixes;
+
+    for (int i = 0; i < num_values; i += kBatchSize) {
+      const int batch_size = std::min(kBatchSize, num_values - i);
+
+      for (int j = 0; j < batch_size; ++j) {
+        const int idx = i + j;
+        const auto view = visitor[idx];
+        const auto len = static_cast<const uint32_t>(view.length());
+
+        uint32_t common_prefix_length = 0;
+        const uint32_t maximum_common_prefix_length =
+            std::min(len, static_cast<uint32_t>(last_value_view.length()));
+        while (common_prefix_length < maximum_common_prefix_length) {
+          if (last_value_view[common_prefix_length] != view[common_prefix_length]) {
+            break;
+          }
+          common_prefix_length++;
+        }
+
+        last_value_view = view;
+        prefix_lengths[j] = common_prefix_length;
+        const uint32_t suffix_length = len - common_prefix_length;
+        const uint8_t* suffix_ptr = src[idx].ptr + common_prefix_length;
+
+        // Convert to ByteArray, so it can be passed to the suffix_encoder_.
+        const ByteArray suffix(suffix_length, suffix_ptr);
+        suffixes[j] = suffix;
+      }
+      suffix_encoder_.Put(suffixes.data(), batch_size);
+      prefix_length_encoder_.Put(prefix_lengths.data(), batch_size);
+    }
+    last_value_ = last_value_view;
+  }
+
+  template <typename ArrayType>
+  void PutBinaryArray(const ArrayType& array) {
+    auto previous_len = static_cast<uint32_t>(last_value_.length());
+    std::string_view last_value_view = last_value_;
+
+    PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<typename ArrayType::TypeClass>(
+        *array.data(),
+        [&](::std::string_view view) {
+          if (ARROW_PREDICT_FALSE(view.size() >= kMaxByteArraySize)) {
+            return Status::Invalid(
+                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
+          }
+          const ByteArray src{view};
+
+          uint32_t common_prefix_length = 0;
+          const uint32_t len = src.len;
+          const uint32_t maximum_common_prefix_length = std::min(previous_len, len);
+          while (common_prefix_length < maximum_common_prefix_length) {
+            if (last_value_view[common_prefix_length] != view[common_prefix_length]) {
+              break;
+            }
+            common_prefix_length++;
+          }
+          previous_len = len;
+          prefix_length_encoder_.Put({static_cast<int32_t>(common_prefix_length)}, 1);
+
+          last_value_view = view;
+          const auto suffix_length = static_cast<uint32_t>(len - common_prefix_length);
+          if (suffix_length == 0) {
+            suffix_encoder_.Put(&empty_, 1);
+            return Status::OK();
+          }
+          const uint8_t* suffix_ptr = src.ptr + common_prefix_length;
+          // Convert to ByteArray, so it can be passed to the suffix_encoder_.
+          const ByteArray suffix(suffix_length, suffix_ptr);
+          suffix_encoder_.Put(&suffix, 1);
+
+          return Status::OK();
+        },
+        []() { return Status::OK(); }));
+    last_value_ = last_value_view;
+  }
+
+  ::arrow::BufferBuilder sink_;
+  DeltaBitPackEncoder<Int32Type> prefix_length_encoder_;
+  DeltaLengthByteArrayEncoder suffix_encoder_;
+  std::string last_value_;
+  const ByteArray empty_;
+  std::unique_ptr<ResizableBuffer> buffer_;
+};
+
+struct ByteArrayVisitor {
+  const ByteArray* src;
+
+  std::string_view operator[](int i) const {
+    if (ARROW_PREDICT_FALSE(src[i].len >= kMaxByteArraySize)) {
+      throw ParquetException("Parquet cannot store strings with size 2GB or more, got: ",
+                             src[i].len);
+    }
+    return std::string_view{src[i]};
+  }
+
+  uint32_t len(int i) const { return src[i].len; }
+};
+
+struct FLBAVisitor {
+  const FLBA* src;
+  const uint32_t type_length;
+
+  std::string_view operator[](int i) const {
+    return std::string_view{reinterpret_cast<const char*>(src[i].ptr), type_length};
+  }
+
+  uint32_t len(int i) const { return type_length; }
+};
+
+template <>
+void DeltaByteArrayEncoder<ByteArrayType>::Put(const ByteArray* src, int num_values) {
+  auto visitor = ByteArrayVisitor{src};
+  PutInternal<ByteArrayVisitor>(src, num_values, visitor);
+}
+
+template <>
+void DeltaByteArrayEncoder<FLBAType>::Put(const FLBA* src, int num_values) {
+  auto visitor = FLBAVisitor{src, static_cast<uint32_t>(descr_->type_length())};
+  PutInternal<FLBAVisitor>(src, num_values, visitor);
+}
+
+template <typename DType>
+void DeltaByteArrayEncoder<DType>::Put(const ::arrow::Array& values) {
+  if (::arrow::is_binary_like(values.type_id())) {
+    PutBinaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
+  } else if (::arrow::is_large_binary_like(values.type_id())) {
+    PutBinaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
+  } else if (::arrow::is_fixed_size_binary(values.type_id())) {
+    PutBinaryArray(checked_cast<const ::arrow::FixedSizeBinaryArray&>(values));
+  } else {
+    throw ParquetException("Only BaseBinaryArray and subclasses supported");
+  }
+}
+
+template <typename DType>
+std::shared_ptr<Buffer> DeltaByteArrayEncoder<DType>::FlushValues() {
+  PARQUET_THROW_NOT_OK(sink_.Resize(EstimatedDataEncodedSize(), false));
+
+  std::shared_ptr<Buffer> prefix_lengths = prefix_length_encoder_.FlushValues();
+  PARQUET_THROW_NOT_OK(sink_.Append(prefix_lengths->data(), prefix_lengths->size()));
+
+  std::shared_ptr<Buffer> suffixes = suffix_encoder_.FlushValues();
+  PARQUET_THROW_NOT_OK(sink_.Append(suffixes->data(), suffixes->size()));
+
+  std::shared_ptr<Buffer> buffer;
+  PARQUET_THROW_NOT_OK(sink_.Finish(&buffer, true));
+  last_value_.clear();
+  return buffer;
+}
+
+// ----------------------------------------------------------------------
+// RLE encoder for BOOLEAN
+
+class RleBooleanEncoder final : public EncoderImpl, virtual public BooleanEncoder {
+ public:
+  explicit RleBooleanEncoder(const ColumnDescriptor* descr, ::arrow::MemoryPool* pool)
+      : EncoderImpl(descr, Encoding::RLE, pool),
+        buffered_append_values_(::arrow::stl::allocator<T>(pool)) {}
+
+  int64_t EstimatedDataEncodedSize() override {
+    return kRleLengthInBytes + MaxRleBufferSize();
+  }
+
+  std::shared_ptr<Buffer> FlushValues() override;
+
+  void Put(const T* buffer, int num_values) override;
+  void Put(const ::arrow::Array& values) override {
+    if (values.type_id() != ::arrow::Type::BOOL) {
+      throw ParquetException("RleBooleanEncoder expects BooleanArray, got ",
+                             values.type()->ToString());
+    }
+    const auto& boolean_array = checked_cast<const ::arrow::BooleanArray&>(values);
+    if (values.null_count() == 0) {
+      for (int i = 0; i < boolean_array.length(); ++i) {
+        // null_count == 0, so just call Value directly is ok.
+        buffered_append_values_.push_back(boolean_array.Value(i));
+      }
+    } else {
+      PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<::arrow::BooleanType>(
+          *boolean_array.data(),
+          [&](bool value) {
+            buffered_append_values_.push_back(value);
+            return Status::OK();
+          },
+          []() { return Status::OK(); }));
+    }
+  }
+
+  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override {
+    if (valid_bits != NULLPTR) {
+      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
+                                                                   this->memory_pool()));
+      T* data = buffer->mutable_data_as<T>();
+      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
+          src, num_values, valid_bits, valid_bits_offset, data);
+      Put(data, num_valid_values);
+    } else {
+      Put(src, num_values);
+    }
+  }
+
+  void Put(const std::vector<bool>& src, int num_values) override;
+
+ protected:
+  template <typename SequenceType>
+  void PutImpl(const SequenceType& src, int num_values);
+
+  int MaxRleBufferSize() const noexcept {
+    return RlePreserveBufferSize(static_cast<int>(buffered_append_values_.size()),
+                                 kBitWidth);
+  }
+
+  constexpr static int32_t kBitWidth = 1;
+  /// 4 bytes in little-endian, which indicates the length.
+  constexpr static int32_t kRleLengthInBytes = 4;
+
+  // std::vector<bool> in C++ is tricky, because it's a bitmap.
+  // Here RleBooleanEncoder will only append values into it, and
+  // dump values into Buffer, so using it here is ok.
+  ArrowPoolVector<bool> buffered_append_values_;
+};
+
+void RleBooleanEncoder::Put(const bool* src, int num_values) { PutImpl(src, num_values); }
+
+void RleBooleanEncoder::Put(const std::vector<bool>& src, int num_values) {
+  PutImpl(src, num_values);
+}
+
+template <typename SequenceType>
+void RleBooleanEncoder::PutImpl(const SequenceType& src, int num_values) {
+  for (int i = 0; i < num_values; ++i) {
+    buffered_append_values_.push_back(src[i]);
+  }
+}
+
+std::shared_ptr<Buffer> RleBooleanEncoder::FlushValues() {
+  int rle_buffer_size_max = MaxRleBufferSize();
+  std::shared_ptr<ResizableBuffer> buffer =
+      AllocateBuffer(this->pool_, rle_buffer_size_max + kRleLengthInBytes);
+  ::arrow::util::RleEncoder encoder(buffer->mutable_data() + kRleLengthInBytes,
+                                    rle_buffer_size_max, /*bit_width*/ kBitWidth);
+
+  for (bool value : buffered_append_values_) {
+    encoder.Put(value ? 1 : 0);
+  }
+  encoder.Flush();
+  ::arrow::util::SafeStore(buffer->mutable_data(),
+                           ::arrow::bit_util::ToLittleEndian(encoder.len()));
+  PARQUET_THROW_NOT_OK(buffer->Resize(kRleLengthInBytes + encoder.len()));
+  buffered_append_values_.clear();
+  return buffer;
+}
+
+}  // namespace
+
+// ----------------------------------------------------------------------
+// Factory function
+
+std::unique_ptr<Encoder> MakeEncoder(Type::type type_num, Encoding::type encoding,
+                                     bool use_dictionary, const ColumnDescriptor* descr,
+                                     MemoryPool* pool) {
+  if (use_dictionary) {
+    switch (type_num) {
+      case Type::INT32:
+        return std::make_unique<DictEncoderImpl<Int32Type>>(descr, pool);
+      case Type::INT64:
+        return std::make_unique<DictEncoderImpl<Int64Type>>(descr, pool);
+      case Type::INT96:
+        return std::make_unique<DictEncoderImpl<Int96Type>>(descr, pool);
+      case Type::FLOAT:
+        return std::make_unique<DictEncoderImpl<FloatType>>(descr, pool);
+      case Type::DOUBLE:
+        return std::make_unique<DictEncoderImpl<DoubleType>>(descr, pool);
+      case Type::BYTE_ARRAY:
+        return std::make_unique<DictEncoderImpl<ByteArrayType>>(descr, pool);
+      case Type::FIXED_LEN_BYTE_ARRAY:
+        return std::make_unique<DictEncoderImpl<FLBAType>>(descr, pool);
+      default:
+        DCHECK(false) << "Encoder not implemented";
+        break;
+    }
+  } else if (encoding == Encoding::PLAIN) {
+    switch (type_num) {
+      case Type::BOOLEAN:
+        return std::make_unique<PlainEncoder<BooleanType>>(descr, pool);
+      case Type::INT32:
+        return std::make_unique<PlainEncoder<Int32Type>>(descr, pool);
+      case Type::INT64:
+        return std::make_unique<PlainEncoder<Int64Type>>(descr, pool);
+      case Type::INT96:
+        return std::make_unique<PlainEncoder<Int96Type>>(descr, pool);
+      case Type::FLOAT:
+        return std::make_unique<PlainEncoder<FloatType>>(descr, pool);
+      case Type::DOUBLE:
+        return std::make_unique<PlainEncoder<DoubleType>>(descr, pool);
+      case Type::BYTE_ARRAY:
+        return std::make_unique<PlainEncoder<ByteArrayType>>(descr, pool);
+      case Type::FIXED_LEN_BYTE_ARRAY:
+        return std::make_unique<PlainEncoder<FLBAType>>(descr, pool);
+      default:
+        DCHECK(false) << "Encoder not implemented";
+        break;
+    }
+  } else if (encoding == Encoding::BYTE_STREAM_SPLIT) {
+    switch (type_num) {
+      case Type::INT32:
+        return std::make_unique<ByteStreamSplitEncoder<Int32Type>>(descr, pool);
+      case Type::INT64:
+        return std::make_unique<ByteStreamSplitEncoder<Int64Type>>(descr, pool);
+      case Type::FLOAT:
+        return std::make_unique<ByteStreamSplitEncoder<FloatType>>(descr, pool);
+      case Type::DOUBLE:
+        return std::make_unique<ByteStreamSplitEncoder<DoubleType>>(descr, pool);
+      case Type::FIXED_LEN_BYTE_ARRAY:
+        return std::make_unique<ByteStreamSplitEncoder<FLBAType>>(descr, pool);
+      default:
+        throw ParquetException(
+            "BYTE_STREAM_SPLIT only supports FLOAT, DOUBLE, INT32, INT64 "
+            "and FIXED_LEN_BYTE_ARRAY");
+    }
+  } else if (encoding == Encoding::DELTA_BINARY_PACKED) {
+    switch (type_num) {
+      case Type::INT32:
+        return std::make_unique<DeltaBitPackEncoder<Int32Type>>(descr, pool);
+      case Type::INT64:
+        return std::make_unique<DeltaBitPackEncoder<Int64Type>>(descr, pool);
+      default:
+        throw ParquetException(
+            "DELTA_BINARY_PACKED encoder only supports INT32 and INT64");
+    }
+  } else if (encoding == Encoding::DELTA_LENGTH_BYTE_ARRAY) {
+    switch (type_num) {
+      case Type::BYTE_ARRAY:
+        return std::make_unique<DeltaLengthByteArrayEncoder>(descr, pool);
+      default:
+        throw ParquetException("DELTA_LENGTH_BYTE_ARRAY only supports BYTE_ARRAY");
+    }
+  } else if (encoding == Encoding::RLE) {
+    switch (type_num) {
+      case Type::BOOLEAN:
+        return std::make_unique<RleBooleanEncoder>(descr, pool);
+      default:
+        throw ParquetException("RLE only supports BOOLEAN");
+    }
+  } else if (encoding == Encoding::DELTA_BYTE_ARRAY) {
+    switch (type_num) {
+      case Type::BYTE_ARRAY:
+        return std::make_unique<DeltaByteArrayEncoder<ByteArrayType>>(descr, pool);
+      case Type::FIXED_LEN_BYTE_ARRAY:
+        return std::make_unique<DeltaByteArrayEncoder<FLBAType>>(descr, pool);
+      default:
+        throw ParquetException(
+            "DELTA_BYTE_ARRAY only supports BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY");
+    }
+  } else {
+    ParquetException::NYI("Selected encoding is not supported");
+  }
+  DCHECK(false) << "Should not be able to reach this code";
+  return nullptr;
+}
+
+}  // namespace parquet
diff --git a/cpp/src/parquet/encoding.h b/cpp/src/parquet/encoding.h
index 493c4044ddc1c..5717886f10759 100644
--- a/cpp/src/parquet/encoding.h
+++ b/cpp/src/parquet/encoding.h
@@ -22,31 +22,16 @@
 #include <memory>
 #include <vector>
 
-#include "arrow/util/spaced.h"
+#include "arrow/type_fwd.h"
 
 #include "parquet/exception.h"
 #include "parquet/platform.h"
 #include "parquet/types.h"
 
 namespace arrow {
-
-class Array;
-class ArrayBuilder;
-class BinaryArray;
-class BinaryBuilder;
-class BooleanBuilder;
-class Int32Type;
-class Int64Type;
-class FloatType;
-class DoubleType;
-class FixedSizeBinaryType;
-template <typename T>
-class NumericBuilder;
-class FixedSizeBinaryBuilder;
 template <typename T>
 class Dictionary32Builder;
-
-}  // namespace arrow
+}
 
 namespace parquet {
 
@@ -184,7 +169,7 @@ class Encoder {
 template <typename DType>
 class TypedEncoder : virtual public Encoder {
  public:
-  typedef typename DType::c_type T;
+  using T = typename DType::c_type;
 
   using Encoder::Put;
 
@@ -293,20 +278,7 @@ class TypedDecoder : virtual public Decoder {
   /// \param[in] valid_bits_offset offset into valid_bits
   /// \return The number of values decoded, including nulls.
   virtual int DecodeSpaced(T* buffer, int num_values, int null_count,
-                           const uint8_t* valid_bits, int64_t valid_bits_offset) {
-    if (null_count > 0) {
-      int values_to_read = num_values - null_count;
-      int values_read = Decode(buffer, values_to_read);
-      if (values_read != values_to_read) {
-        throw ParquetException("Number of values / definition_levels read did not match");
-      }
-
-      return ::arrow::util::internal::SpacedExpand<T>(buffer, num_values, null_count,
-                                                      valid_bits, valid_bits_offset);
-    } else {
-      return Decode(buffer, num_values);
-    }
-  }
+                           const uint8_t* valid_bits, int64_t valid_bits_offset) = 0;
 
   /// \brief Decode into an ArrayBuilder or other accumulator
   ///
diff --git a/cpp/src/parquet/encryption/encryption_internal.cc b/cpp/src/parquet/encryption/encryption_internal.cc
index a0d9367b619c6..31cad130a10c7 100644
--- a/cpp/src/parquet/encryption/encryption_internal.cc
+++ b/cpp/src/parquet/encryption/encryption_internal.cc
@@ -89,6 +89,12 @@ class AesEncryptor::AesEncryptorImpl {
   }
 
  private:
+  void CheckValid() const {
+    if (ctx_ == nullptr) {
+      throw ParquetException("AesEncryptor was wiped out");
+    }
+  }
+
   EVP_CIPHER_CTX* ctx_;
   int32_t aes_mode_;
   int32_t key_length_;
@@ -156,6 +162,8 @@ AesEncryptor::AesEncryptorImpl::AesEncryptorImpl(ParquetCipher::type alg_id,
 int32_t AesEncryptor::AesEncryptorImpl::SignedFooterEncrypt(
     span<const uint8_t> footer, span<const uint8_t> key, span<const uint8_t> aad,
     span<const uint8_t> nonce, span<uint8_t> encrypted_footer) {
+  CheckValid();
+
   if (static_cast<size_t>(key_length_) != key.size()) {
     std::stringstream ss;
     ss << "Wrong key length " << key.size() << ". Should be " << key_length_;
@@ -180,6 +188,8 @@ int32_t AesEncryptor::AesEncryptorImpl::Encrypt(span<const uint8_t> plaintext,
                                                 span<const uint8_t> key,
                                                 span<const uint8_t> aad,
                                                 span<uint8_t> ciphertext) {
+  CheckValid();
+
   if (static_cast<size_t>(key_length_) != key.size()) {
     std::stringstream ss;
     ss << "Wrong key length " << key.size() << ". Should be " << key_length_;
@@ -413,6 +423,12 @@ class AesDecryptor::AesDecryptorImpl {
   }
 
  private:
+  void CheckValid() const {
+    if (ctx_ == nullptr) {
+      throw ParquetException("AesDecryptor was wiped out");
+    }
+  }
+
   EVP_CIPHER_CTX* ctx_;
   int32_t aes_mode_;
   int32_t key_length_;
@@ -714,6 +730,8 @@ int32_t AesDecryptor::AesDecryptorImpl::Decrypt(span<const uint8_t> ciphertext,
                                                 span<const uint8_t> key,
                                                 span<const uint8_t> aad,
                                                 span<uint8_t> plaintext) {
+  CheckValid();
+
   if (static_cast<size_t>(key_length_) != key.size()) {
     std::stringstream ss;
     ss << "Wrong key length " << key.size() << ". Should be " << key_length_;
@@ -806,4 +824,7 @@ void RandBytes(unsigned char* buf, size_t num) {
 
 void EnsureBackendInitialized() { openssl::EnsureInitialized(); }
 
+#undef ENCRYPT_INIT
+#undef DECRYPT_INIT
+
 }  // namespace parquet::encryption
diff --git a/cpp/src/parquet/exception.h b/cpp/src/parquet/exception.h
index 826f5bdc8bf73..cd221ec7a24ae 100644
--- a/cpp/src/parquet/exception.h
+++ b/cpp/src/parquet/exception.h
@@ -28,7 +28,7 @@
 
 // PARQUET-1085
 #if !defined(ARROW_UNUSED)
-#define ARROW_UNUSED(x) UNUSED(x)
+#  define ARROW_UNUSED(x) UNUSED(x)
 #endif
 
 // Parquet exception to Arrow Status
diff --git a/cpp/src/parquet/level_comparison_inc.h b/cpp/src/parquet/level_comparison_inc.h
index cfee506654331..04f628d533111 100644
--- a/cpp/src/parquet/level_comparison_inc.h
+++ b/cpp/src/parquet/level_comparison_inc.h
@@ -22,7 +22,7 @@
 
 // Used to make sure ODR rule isn't violated.
 #ifndef PARQUET_IMPL_NAMESPACE
-#error "PARQUET_IMPL_NAMESPACE must be defined"
+#  error "PARQUET_IMPL_NAMESPACE must be defined"
 #endif
 namespace parquet::internal::PARQUET_IMPL_NAMESPACE {
 /// Builds a bitmap by applying predicate to the level vector provided.
diff --git a/cpp/src/parquet/level_conversion_inc.h b/cpp/src/parquet/level_conversion_inc.h
index 3accb154e6f5a..5fce93e779b2d 100644
--- a/cpp/src/parquet/level_conversion_inc.h
+++ b/cpp/src/parquet/level_conversion_inc.h
@@ -31,7 +31,7 @@
 #include "parquet/level_comparison.h"
 
 #ifndef PARQUET_IMPL_NAMESPACE
-#error "PARQUET_IMPL_NAMESPACE must be defined"
+#  error "PARQUET_IMPL_NAMESPACE must be defined"
 #endif
 
 namespace parquet::internal::PARQUET_IMPL_NAMESPACE {
@@ -261,7 +261,7 @@ inline uint64_t ExtractBitsSoftware(uint64_t bitmap, uint64_t select_bitmap) {
 #ifdef ARROW_HAVE_BMI2
 
 // Use _pext_u64 on 64-bit builds, _pext_u32 on 32-bit builds,
-#if UINTPTR_MAX == 0xFFFFFFFF
+#  if UINTPTR_MAX == 0xFFFFFFFF
 
 using extract_bitmap_t = uint32_t;
 inline extract_bitmap_t ExtractBits(extract_bitmap_t bitmap,
@@ -269,7 +269,7 @@ inline extract_bitmap_t ExtractBits(extract_bitmap_t bitmap,
   return _pext_u32(bitmap, select_bitmap);
 }
 
-#else
+#  else
 
 using extract_bitmap_t = uint64_t;
 inline extract_bitmap_t ExtractBits(extract_bitmap_t bitmap,
@@ -277,7 +277,7 @@ inline extract_bitmap_t ExtractBits(extract_bitmap_t bitmap,
   return _pext_u64(bitmap, select_bitmap);
 }
 
-#endif
+#  endif
 
 #else  // !defined(ARROW_HAVE_BMI2)
 
diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc
index 423154f8641e5..8f577be45b96d 100644
--- a/cpp/src/parquet/metadata.cc
+++ b/cpp/src/parquet/metadata.cc
@@ -408,15 +408,6 @@ std::unique_ptr<ColumnChunkMetaData> ColumnChunkMetaData::Make(
                               properties, writer_version, std::move(file_decryptor)));
 }
 
-std::unique_ptr<ColumnChunkMetaData> ColumnChunkMetaData::Make(
-    const void* metadata, const ColumnDescriptor* descr,
-    const ApplicationVersion* writer_version, int16_t row_group_ordinal,
-    int16_t column_ordinal, std::shared_ptr<InternalFileDecryptor> file_decryptor) {
-  return std::unique_ptr<ColumnChunkMetaData>(new ColumnChunkMetaData(
-      metadata, descr, row_group_ordinal, column_ordinal, default_reader_properties(),
-      writer_version, std::move(file_decryptor)));
-}
-
 ColumnChunkMetaData::ColumnChunkMetaData(
     const void* metadata, const ColumnDescriptor* descr, int16_t row_group_ordinal,
     int16_t column_ordinal, const ReaderProperties& properties,
@@ -587,15 +578,6 @@ class RowGroupMetaData::RowGroupMetaDataImpl {
   std::shared_ptr<InternalFileDecryptor> file_decryptor_;
 };
 
-std::unique_ptr<RowGroupMetaData> RowGroupMetaData::Make(
-    const void* metadata, const SchemaDescriptor* schema,
-    const ApplicationVersion* writer_version,
-    std::shared_ptr<InternalFileDecryptor> file_decryptor) {
-  return std::unique_ptr<parquet::RowGroupMetaData>(
-      new RowGroupMetaData(metadata, schema, default_reader_properties(), writer_version,
-                           std::move(file_decryptor)));
-}
-
 std::unique_ptr<RowGroupMetaData> RowGroupMetaData::Make(
     const void* metadata, const SchemaDescriptor* schema,
     const ReaderProperties& properties, const ApplicationVersion* writer_version,
@@ -988,13 +970,6 @@ std::shared_ptr<FileMetaData> FileMetaData::Make(
       new FileMetaData(metadata, metadata_len, properties, std::move(file_decryptor)));
 }
 
-std::shared_ptr<FileMetaData> FileMetaData::Make(
-    const void* metadata, uint32_t* metadata_len,
-    std::shared_ptr<InternalFileDecryptor> file_decryptor) {
-  return std::shared_ptr<FileMetaData>(new FileMetaData(
-      metadata, metadata_len, default_reader_properties(), std::move(file_decryptor)));
-}
-
 FileMetaData::FileMetaData(const void* metadata, uint32_t* metadata_len,
                            const ReaderProperties& properties,
                            std::shared_ptr<InternalFileDecryptor> file_decryptor)
@@ -1911,13 +1886,11 @@ void RowGroupMetaDataBuilder::Finish(int64_t total_bytes_written,
 // file metadata
 class FileMetaDataBuilder::FileMetaDataBuilderImpl {
  public:
-  explicit FileMetaDataBuilderImpl(
-      const SchemaDescriptor* schema, std::shared_ptr<WriterProperties> props,
-      std::shared_ptr<const KeyValueMetadata> key_value_metadata)
+  explicit FileMetaDataBuilderImpl(const SchemaDescriptor* schema,
+                                   std::shared_ptr<WriterProperties> props)
       : metadata_(new format::FileMetaData()),
         properties_(std::move(props)),
-        schema_(schema),
-        key_value_metadata_(std::move(key_value_metadata)) {
+        schema_(schema) {
     if (properties_->file_encryption_properties() != nullptr &&
         properties_->file_encryption_properties()->encrypted_footer()) {
       crypto_metadata_ = std::make_unique<format::FileCryptoMetaData>();
@@ -1974,13 +1947,8 @@ class FileMetaDataBuilder::FileMetaDataBuilderImpl {
     metadata_->__set_num_rows(total_rows);
     metadata_->__set_row_groups(row_groups_);
 
-    if (key_value_metadata_ || key_value_metadata) {
-      if (!key_value_metadata_) {
-        key_value_metadata_ = key_value_metadata;
-      } else if (key_value_metadata) {
-        key_value_metadata_ = key_value_metadata_->Merge(*key_value_metadata);
-      }
-      ToThriftKeyValueMetadata(*key_value_metadata_, metadata_.get());
+    if (key_value_metadata) {
+      ToThriftKeyValueMetadata(*key_value_metadata, metadata_.get());
     }
 
     int32_t file_version = 0;
@@ -2066,27 +2034,17 @@ class FileMetaDataBuilder::FileMetaDataBuilderImpl {
 
   std::unique_ptr<RowGroupMetaDataBuilder> current_row_group_builder_;
   const SchemaDescriptor* schema_;
-  std::shared_ptr<const KeyValueMetadata> key_value_metadata_;
 };
 
-std::unique_ptr<FileMetaDataBuilder> FileMetaDataBuilder::Make(
-    const SchemaDescriptor* schema, std::shared_ptr<WriterProperties> props,
-    std::shared_ptr<const KeyValueMetadata> key_value_metadata) {
-  return std::unique_ptr<FileMetaDataBuilder>(
-      new FileMetaDataBuilder(schema, std::move(props), std::move(key_value_metadata)));
-}
-
 std::unique_ptr<FileMetaDataBuilder> FileMetaDataBuilder::Make(
     const SchemaDescriptor* schema, std::shared_ptr<WriterProperties> props) {
   return std::unique_ptr<FileMetaDataBuilder>(
       new FileMetaDataBuilder(schema, std::move(props)));
 }
 
-FileMetaDataBuilder::FileMetaDataBuilder(
-    const SchemaDescriptor* schema, std::shared_ptr<WriterProperties> props,
-    std::shared_ptr<const KeyValueMetadata> key_value_metadata)
-    : impl_{std::make_unique<FileMetaDataBuilderImpl>(schema, std::move(props),
-                                                      std::move(key_value_metadata))} {}
+FileMetaDataBuilder::FileMetaDataBuilder(const SchemaDescriptor* schema,
+                                         std::shared_ptr<WriterProperties> props)
+    : impl_{std::make_unique<FileMetaDataBuilderImpl>(schema, std::move(props))} {}
 
 FileMetaDataBuilder::~FileMetaDataBuilder() = default;
 
diff --git a/cpp/src/parquet/metadata.h b/cpp/src/parquet/metadata.h
index d1e2d1904a694..dc97d816daa74 100644
--- a/cpp/src/parquet/metadata.h
+++ b/cpp/src/parquet/metadata.h
@@ -127,14 +127,6 @@ struct IndexLocation {
 class PARQUET_EXPORT ColumnChunkMetaData {
  public:
   // API convenience to get a MetaData accessor
-
-  ARROW_DEPRECATED("Use the ReaderProperties-taking overload")
-  static std::unique_ptr<ColumnChunkMetaData> Make(
-      const void* metadata, const ColumnDescriptor* descr,
-      const ApplicationVersion* writer_version, int16_t row_group_ordinal = -1,
-      int16_t column_ordinal = -1,
-      std::shared_ptr<InternalFileDecryptor> file_decryptor = NULLPTR);
-
   static std::unique_ptr<ColumnChunkMetaData> Make(
       const void* metadata, const ColumnDescriptor* descr,
       const ReaderProperties& properties = default_reader_properties(),
@@ -200,12 +192,6 @@ class PARQUET_EXPORT ColumnChunkMetaData {
 /// \brief RowGroupMetaData is a proxy around format::RowGroupMetaData.
 class PARQUET_EXPORT RowGroupMetaData {
  public:
-  ARROW_DEPRECATED("Use the ReaderProperties-taking overload")
-  static std::unique_ptr<RowGroupMetaData> Make(
-      const void* metadata, const SchemaDescriptor* schema,
-      const ApplicationVersion* writer_version,
-      std::shared_ptr<InternalFileDecryptor> file_decryptor = NULLPTR);
-
   /// \brief Create a RowGroupMetaData from a serialized thrift message.
   static std::unique_ptr<RowGroupMetaData> Make(
       const void* metadata, const SchemaDescriptor* schema,
@@ -273,11 +259,6 @@ class FileMetaDataBuilder;
 /// \brief FileMetaData is a proxy around format::FileMetaData.
 class PARQUET_EXPORT FileMetaData {
  public:
-  ARROW_DEPRECATED("Use the ReaderProperties-taking overload")
-  static std::shared_ptr<FileMetaData> Make(
-      const void* serialized_metadata, uint32_t* inout_metadata_len,
-      std::shared_ptr<InternalFileDecryptor> file_decryptor);
-
   /// \brief Create a FileMetaData from a serialized thrift message.
   static std::shared_ptr<FileMetaData> Make(
       const void* serialized_metadata, uint32_t* inout_metadata_len,
@@ -547,11 +528,6 @@ struct PageIndexLocation {
 
 class PARQUET_EXPORT FileMetaDataBuilder {
  public:
-  ARROW_DEPRECATED("Deprecated in 12.0.0. Use overload without KeyValueMetadata instead.")
-  static std::unique_ptr<FileMetaDataBuilder> Make(
-      const SchemaDescriptor* schema, std::shared_ptr<WriterProperties> props,
-      std::shared_ptr<const KeyValueMetadata> key_value_metadata);
-
   // API convenience to get a MetaData builder
   static std::unique_ptr<FileMetaDataBuilder> Make(
       const SchemaDescriptor* schema, std::shared_ptr<WriterProperties> props);
@@ -572,9 +548,8 @@ class PARQUET_EXPORT FileMetaDataBuilder {
   std::unique_ptr<FileCryptoMetaData> GetCryptoMetaData();
 
  private:
-  explicit FileMetaDataBuilder(
-      const SchemaDescriptor* schema, std::shared_ptr<WriterProperties> props,
-      std::shared_ptr<const KeyValueMetadata> key_value_metadata = NULLPTR);
+  explicit FileMetaDataBuilder(const SchemaDescriptor* schema,
+                               std::shared_ptr<WriterProperties> props);
   // PIMPL Idiom
   class FileMetaDataBuilderImpl;
   std::unique_ptr<FileMetaDataBuilderImpl> impl_;
diff --git a/cpp/src/parquet/platform.h b/cpp/src/parquet/platform.h
index b085e57cd9918..e8d67e225f8ff 100644
--- a/cpp/src/parquet/platform.h
+++ b/cpp/src/parquet/platform.h
@@ -28,48 +28,48 @@
 
 #if defined(_WIN32) || defined(__CYGWIN__)
 
-#if defined(_MSC_VER)
-#pragma warning(push)
+#  if defined(_MSC_VER)
+#    pragma warning(push)
 // Disable warning for STL types usage in DLL interface
 // https://web.archive.org/web/20130317015847/http://connect.microsoft.com/VisualStudio/feedback/details/696593/vc-10-vs-2010-basic-string-exports
-#pragma warning(disable : 4275 4251)
+#    pragma warning(disable : 4275 4251)
 // Disable diamond inheritance warnings
-#pragma warning(disable : 4250)
+#    pragma warning(disable : 4250)
 // Disable macro redefinition warnings
-#pragma warning(disable : 4005)
+#    pragma warning(disable : 4005)
 // Disable extern before exported template warnings
-#pragma warning(disable : 4910)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#    pragma warning(disable : 4910)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#ifdef PARQUET_STATIC
-#define PARQUET_EXPORT
-#elif defined(PARQUET_EXPORTING)
-#define PARQUET_EXPORT __declspec(dllexport)
-#else
-#define PARQUET_EXPORT __declspec(dllimport)
-#endif
+#  ifdef PARQUET_STATIC
+#    define PARQUET_EXPORT
+#  elif defined(PARQUET_EXPORTING)
+#    define PARQUET_EXPORT __declspec(dllexport)
+#  else
+#    define PARQUET_EXPORT __declspec(dllimport)
+#  endif
 
-#define PARQUET_NO_EXPORT
+#  define PARQUET_NO_EXPORT
 
 #else  // Not Windows
-#ifndef PARQUET_EXPORT
-#define PARQUET_EXPORT __attribute__((visibility("default")))
-#endif
-#ifndef PARQUET_NO_EXPORT
-#define PARQUET_NO_EXPORT __attribute__((visibility("hidden")))
-#endif
+#  ifndef PARQUET_EXPORT
+#    define PARQUET_EXPORT __attribute__((visibility("default")))
+#  endif
+#  ifndef PARQUET_NO_EXPORT
+#    define PARQUET_NO_EXPORT __attribute__((visibility("hidden")))
+#  endif
 #endif  // Non-Windows
 
 // This is a complicated topic, some reading on it:
 // http://www.codesynthesis.com/~boris/blog/2010/01/18/dll-export-cxx-templates/
 #if defined(_MSC_VER) || defined(__clang__)
-#define PARQUET_TEMPLATE_CLASS_EXPORT
-#define PARQUET_TEMPLATE_EXPORT PARQUET_EXPORT
+#  define PARQUET_TEMPLATE_CLASS_EXPORT
+#  define PARQUET_TEMPLATE_EXPORT PARQUET_EXPORT
 #else
-#define PARQUET_TEMPLATE_CLASS_EXPORT PARQUET_EXPORT
-#define PARQUET_TEMPLATE_EXPORT
+#  define PARQUET_TEMPLATE_CLASS_EXPORT PARQUET_EXPORT
+#  define PARQUET_TEMPLATE_EXPORT
 #endif
 
 #define PARQUET_DISALLOW_COPY_AND_ASSIGN ARROW_DISALLOW_COPY_AND_ASSIGN
@@ -80,7 +80,7 @@
 // If ARROW_VALGRIND set when compiling unit tests, also define
 // PARQUET_VALGRIND
 #ifdef ARROW_VALGRIND
-#define PARQUET_VALGRIND
+#  define PARQUET_VALGRIND
 #endif
 
 namespace parquet {
diff --git a/cpp/src/parquet/properties.h b/cpp/src/parquet/properties.h
index 4d3acb491e390..7f2e371df66d7 100644
--- a/cpp/src/parquet/properties.h
+++ b/cpp/src/parquet/properties.h
@@ -870,7 +870,8 @@ class PARQUET_EXPORT ArrowReaderProperties {
         batch_size_(kArrowDefaultBatchSize),
         pre_buffer_(true),
         cache_options_(::arrow::io::CacheOptions::LazyDefaults()),
-        coerce_int96_timestamp_unit_(::arrow::TimeUnit::NANO) {}
+        coerce_int96_timestamp_unit_(::arrow::TimeUnit::NANO),
+        arrow_extensions_enabled_(false) {}
 
   /// \brief Set whether to use the IO thread pool to parse columns in parallel.
   ///
@@ -941,6 +942,18 @@ class PARQUET_EXPORT ArrowReaderProperties {
     return coerce_int96_timestamp_unit_;
   }
 
+  /// Enable Parquet-supported Arrow extension types.
+  ///
+  /// When enabled, Parquet logical types will be mapped to their corresponding Arrow
+  /// extension types at read time, if such exist. Currently only arrow::extension::json()
+  /// extension type is supported. Columns whose LogicalType is JSON will be interpreted
+  /// as arrow::extension::json(), with storage type inferred from the serialized Arrow
+  /// schema if present, or `utf8` by default.
+  void set_arrow_extensions_enabled(bool extensions_enabled) {
+    arrow_extensions_enabled_ = extensions_enabled;
+  }
+  bool get_arrow_extensions_enabled() const { return arrow_extensions_enabled_; }
+
  private:
   bool use_threads_;
   std::unordered_set<int> read_dict_indices_;
@@ -949,6 +962,7 @@ class PARQUET_EXPORT ArrowReaderProperties {
   ::arrow::io::IOContext io_context_;
   ::arrow::io::CacheOptions cache_options_;
   ::arrow::TimeUnit::type coerce_int96_timestamp_unit_;
+  bool arrow_extensions_enabled_;
 };
 
 /// EXPERIMENTAL: Constructs the default ArrowReaderProperties
diff --git a/cpp/src/parquet/types_test.cc b/cpp/src/parquet/types_test.cc
index e0ca7d6356646..fdcaed5c81ed7 100644
--- a/cpp/src/parquet/types_test.cc
+++ b/cpp/src/parquet/types_test.cc
@@ -65,11 +65,11 @@ TEST(TestConvertedTypeToString, ConvertedTypes) {
 }
 
 #ifdef __GNUC__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#  pragma GCC diagnostic push
+#  pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 #elif defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4996)
+#  pragma warning(push)
+#  pragma warning(disable : 4996)
 #endif
 
 TEST(TypePrinter, StatisticsTypes) {
@@ -164,9 +164,9 @@ TEST(TestInt96Timestamp, Decoding) {
 }
 
 #if !(defined(_WIN32) || defined(__CYGWIN__))
-#pragma GCC diagnostic pop
+#  pragma GCC diagnostic pop
 #elif _MSC_VER
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
 }  // namespace parquet
diff --git a/cpp/src/parquet/windows_fixup.h b/cpp/src/parquet/windows_fixup.h
index ce44480c5732e..feac4e64d1976 100644
--- a/cpp/src/parquet/windows_fixup.h
+++ b/cpp/src/parquet/windows_fixup.h
@@ -22,8 +22,8 @@
 #ifdef _WIN32
 
 // parquet.thrift's OPTIONAL RepetitionType conflicts with a Windows #define
-#ifdef OPTIONAL
-#undef OPTIONAL
-#endif
+#  ifdef OPTIONAL
+#    undef OPTIONAL
+#  endif
 
 #endif  // _WIN32
diff --git a/cpp/vcpkg.json b/cpp/vcpkg.json
index 6f825b55cfd94..103e678ebb4ac 100644
--- a/cpp/vcpkg.json
+++ b/cpp/vcpkg.json
@@ -15,11 +15,11 @@
       ]
     },
     "benchmark",
+    "boost-crc",
     "boost-filesystem",
     "boost-multiprecision",
     "boost-process",
     "boost-system",
-    "boost-crc",
     "brotli",
     "bzip2",
     "c-ares",
diff --git a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj
index 1870888184906..ec438fde843f4 100644
--- a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj
+++ b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj
@@ -5,7 +5,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Grpc.Tools" Version="2.65.0" PrivateAssets="All" />
+    <PackageReference Include="Grpc.Tools" Version="2.66.0" PrivateAssets="All" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
index 9e1866f84160b..dc2e720313ba5 100644
--- a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
+++ b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
@@ -1,19 +1,23 @@
 <Project Sdk="Microsoft.NET.Sdk">
 
   <PropertyGroup>
-    <TargetFrameworks>netstandard2.0;netstandard2.1</TargetFrameworks>
+    <TargetFrameworks>netstandard2.0;netstandard2.1;net462</TargetFrameworks>
   </PropertyGroup>
   
   <ItemGroup>
-    <PackageReference Include="Google.Protobuf" Version="3.27.3" />
+    <PackageReference Include="Google.Protobuf" Version="3.28.1" />
     <PackageReference Include="Grpc.Net.Client" Version="2.65.0" />
-    <PackageReference Include="Grpc.Tools" Version="2.65.0" PrivateAssets="All" />
+    <PackageReference Include="Grpc.Tools" Version="2.66.0" PrivateAssets="All" />
     <PackageReference Include="System.Memory" Version="4.5.5" />
   </ItemGroup>
 
   <ItemGroup Condition="'$(TargetFramework)'=='netstandard2.0'">
     <PackageReference Include="Microsoft.Bcl.AsyncInterfaces" Version="6.0.0" />
   </ItemGroup>
+  
+  <ItemGroup Condition="'$(TargetFramework)'=='net462'">
+    <PackageReference Include="Grpc.Core" Version="2.46.6" />
+  </ItemGroup>
 
   <ItemGroup>
     <ProjectReference Include="..\Apache.Arrow\Apache.Arrow.csproj" />
diff --git a/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs b/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs
index efb22b1948a01..b89ce9da79d14 100644
--- a/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs
+++ b/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs
@@ -13,6 +13,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+using System.Threading;
 using System.Threading.Tasks;
 using Apache.Arrow.Flight.Internal;
 using Apache.Arrow.Flight.Protocol;
@@ -34,12 +35,17 @@ public FlightClient(ChannelBase grpcChannel)
 
         public AsyncServerStreamingCall<FlightInfo> ListFlights(FlightCriteria criteria = null, Metadata headers = null)
         {
-            if(criteria == null)
+            return ListFlights(criteria, headers, null, CancellationToken.None);
+        }
+
+        public AsyncServerStreamingCall<FlightInfo> ListFlights(FlightCriteria criteria, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            if (criteria == null)
             {
                 criteria = FlightCriteria.Empty;
             }
-            
-            var response = _client.ListFlights(criteria.ToProtocol(), headers);
+
+            var response = _client.ListFlights(criteria.ToProtocol(), headers, deadline, cancellationToken);
             var convertStream = new StreamReader<Protocol.FlightInfo, FlightInfo>(response.ResponseStream, inFlight => new FlightInfo(inFlight));
 
             return new AsyncServerStreamingCall<FlightInfo>(convertStream, response.ResponseHeadersAsync, response.GetStatus, response.GetTrailers, response.Dispose);
@@ -47,7 +53,12 @@ public AsyncServerStreamingCall<FlightInfo> ListFlights(FlightCriteria criteria
 
         public AsyncServerStreamingCall<FlightActionType> ListActions(Metadata headers = null)
         {
-            var response = _client.ListActions(EmptyInstance, headers);
+            return ListActions(headers, null, CancellationToken.None);
+        }
+
+        public AsyncServerStreamingCall<FlightActionType> ListActions(Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var response = _client.ListActions(EmptyInstance, headers, deadline, cancellationToken);
             var convertStream = new StreamReader<Protocol.ActionType, FlightActionType>(response.ResponseStream, actionType => new FlightActionType(actionType));
 
             return new AsyncServerStreamingCall<FlightActionType>(convertStream, response.ResponseHeadersAsync, response.GetStatus, response.GetTrailers, response.Dispose);
@@ -55,14 +66,24 @@ public AsyncServerStreamingCall<FlightActionType> ListActions(Metadata headers =
 
         public FlightRecordBatchStreamingCall GetStream(FlightTicket ticket, Metadata headers = null)
         {
-            var stream = _client.DoGet(ticket.ToProtocol(),  headers);
+            return GetStream(ticket, headers, null, CancellationToken.None);
+        }
+
+        public FlightRecordBatchStreamingCall GetStream(FlightTicket ticket, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var stream = _client.DoGet(ticket.ToProtocol(), headers, deadline, cancellationToken);
             var responseStream = new FlightClientRecordBatchStreamReader(stream.ResponseStream);
             return new FlightRecordBatchStreamingCall(responseStream, stream.ResponseHeadersAsync, stream.GetStatus, stream.GetTrailers, stream.Dispose);
         }
 
         public AsyncUnaryCall<FlightInfo> GetInfo(FlightDescriptor flightDescriptor, Metadata headers = null)
         {
-            var flightInfoResult = _client.GetFlightInfoAsync(flightDescriptor.ToProtocol(), headers);
+            return GetInfo(flightDescriptor, headers, null, CancellationToken.None);
+        }
+
+        public AsyncUnaryCall<FlightInfo> GetInfo(FlightDescriptor flightDescriptor, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var flightInfoResult = _client.GetFlightInfoAsync(flightDescriptor.ToProtocol(), headers, deadline, cancellationToken);
 
             var flightInfo = flightInfoResult
                 .ResponseAsync
@@ -79,7 +100,12 @@ public AsyncUnaryCall<FlightInfo> GetInfo(FlightDescriptor flightDescriptor, Met
 
         public FlightRecordBatchDuplexStreamingCall StartPut(FlightDescriptor flightDescriptor, Metadata headers = null)
         {
-            var channels = _client.DoPut(headers);
+            return StartPut(flightDescriptor, headers, null, CancellationToken.None);
+        }
+
+        public FlightRecordBatchDuplexStreamingCall StartPut(FlightDescriptor flightDescriptor, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var channels = _client.DoPut(headers, deadline, cancellationToken);
             var requestStream = new FlightClientRecordBatchStreamWriter(channels.RequestStream, flightDescriptor);
             var readStream = new StreamReader<Protocol.PutResult, FlightPutResult>(channels.ResponseStream, putResult => new FlightPutResult(putResult));
             return new FlightRecordBatchDuplexStreamingCall(
@@ -93,7 +119,13 @@ public FlightRecordBatchDuplexStreamingCall StartPut(FlightDescriptor flightDesc
 
         public AsyncDuplexStreamingCall<FlightHandshakeRequest, FlightHandshakeResponse> Handshake(Metadata headers = null)
         {
-            var channel = _client.Handshake(headers);
+            return Handshake(headers, null, CancellationToken.None);
+
+        }
+
+        public AsyncDuplexStreamingCall<FlightHandshakeRequest, FlightHandshakeResponse> Handshake(Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var channel = _client.Handshake(headers, deadline, cancellationToken);
             var readStream = new StreamReader<HandshakeResponse, FlightHandshakeResponse>(channel.ResponseStream, response => new FlightHandshakeResponse(response));
             var writeStream = new FlightHandshakeStreamWriterAdapter(channel.RequestStream);
             var call = new AsyncDuplexStreamingCall<FlightHandshakeRequest, FlightHandshakeResponse>(
@@ -109,7 +141,12 @@ public AsyncDuplexStreamingCall<FlightHandshakeRequest, FlightHandshakeResponse>
 
         public FlightRecordBatchExchangeCall DoExchange(FlightDescriptor flightDescriptor, Metadata headers = null)
         {
-            var channel = _client.DoExchange(headers);
+            return DoExchange(flightDescriptor, headers, null, CancellationToken.None);
+        }
+
+        public FlightRecordBatchExchangeCall DoExchange(FlightDescriptor flightDescriptor, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var channel = _client.DoExchange(headers, deadline, cancellationToken);
             var requestStream = new FlightClientRecordBatchStreamWriter(channel.RequestStream, flightDescriptor);
             var responseStream = new FlightClientRecordBatchStreamReader(channel.ResponseStream);
             var call = new FlightRecordBatchExchangeCall(
@@ -125,14 +162,24 @@ public FlightRecordBatchExchangeCall DoExchange(FlightDescriptor flightDescripto
 
         public AsyncServerStreamingCall<FlightResult> DoAction(FlightAction action, Metadata headers = null)
         {
-            var stream = _client.DoAction(action.ToProtocol(), headers);
+            return DoAction(action, headers, null, CancellationToken.None);
+        }
+
+        public AsyncServerStreamingCall<FlightResult> DoAction(FlightAction action, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var stream = _client.DoAction(action.ToProtocol(), headers, deadline, cancellationToken);
             var streamReader = new StreamReader<Protocol.Result, FlightResult>(stream.ResponseStream, result => new FlightResult(result));
             return new AsyncServerStreamingCall<FlightResult>(streamReader, stream.ResponseHeadersAsync, stream.GetStatus, stream.GetTrailers, stream.Dispose);
         }
 
         public AsyncUnaryCall<Schema> GetSchema(FlightDescriptor flightDescriptor, Metadata headers = null)
         {
-            var schemaResult = _client.GetSchemaAsync(flightDescriptor.ToProtocol(), headers);
+            return GetSchema(flightDescriptor, headers, null, CancellationToken.None);
+        }
+
+        public AsyncUnaryCall<Schema> GetSchema(FlightDescriptor flightDescriptor, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var schemaResult = _client.GetSchemaAsync(flightDescriptor.ToProtocol(), headers, deadline, cancellationToken);
 
             var schema = schemaResult
                 .ResponseAsync
diff --git a/csharp/src/Apache.Arrow.Flight/FlightInfo.cs b/csharp/src/Apache.Arrow.Flight/FlightInfo.cs
index 16ddb6fbfb834..e2452ac9ff461 100644
--- a/csharp/src/Apache.Arrow.Flight/FlightInfo.cs
+++ b/csharp/src/Apache.Arrow.Flight/FlightInfo.cs
@@ -18,6 +18,7 @@
 using System.Text;
 using Apache.Arrow.Flight.Internal;
 using Apache.Arrow.Ipc;
+using Google.Protobuf;
 
 namespace Apache.Arrow.Flight
 {
@@ -25,7 +26,7 @@ public class FlightInfo
     {
         internal FlightInfo(Protocol.FlightInfo flightInfo)
         {
-            Schema = FlightMessageSerializer.DecodeSchema(flightInfo.Schema.Memory);
+            Schema = flightInfo.Schema?.Length > 0 ? FlightMessageSerializer.DecodeSchema(flightInfo.Schema.Memory) : null;
             Descriptor = new FlightDescriptor(flightInfo.FlightDescriptor);
 
             var endpoints = new List<FlightEndpoint>();
@@ -60,7 +61,7 @@ public FlightInfo(Schema schema, FlightDescriptor descriptor, IReadOnlyList<Flig
 
         internal Protocol.FlightInfo ToProtocol()
         {
-            var serializedSchema = SchemaWriter.SerializeSchema(Schema);
+            var serializedSchema = Schema != null ? SchemaWriter.SerializeSchema(Schema) : ByteString.Empty;
             var response = new Protocol.FlightInfo()
             {
                 Schema = serializedSchema,
diff --git a/csharp/src/Apache.Arrow.Flight/Server/GrpcCoreFlightServerExtensions.cs b/csharp/src/Apache.Arrow.Flight/Server/GrpcCoreFlightServerExtensions.cs
new file mode 100644
index 0000000000000..3773e184df468
--- /dev/null
+++ b/csharp/src/Apache.Arrow.Flight/Server/GrpcCoreFlightServerExtensions.cs
@@ -0,0 +1,39 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if NET46_OR_GREATER
+
+using Apache.Arrow.Flight.Protocol;
+using Apache.Arrow.Flight.Server.Internal;
+using Grpc.Core;
+
+namespace Apache.Arrow.Flight.Server
+{
+    public static class GrpcCoreFlightServerExtensions
+    {
+        /// <summary>
+        /// Create a ServerServiceDefinition for use with a <see href="https://grpc.github.io/grpc/csharp/api/Grpc.Core.Server.html">Grpc.Core Server</see>
+        //  This allows running a flight server on pre-Kestrel .net Framework versions
+        /// </summary>
+        /// <param name="flightServer"></param>
+        /// <returns></returns>
+        public static ServerServiceDefinition CreateServiceDefinition(this FlightServer flightServer)
+        {
+            return FlightService.BindService(new FlightServerImplementation(flightServer));
+        }
+    }
+}
+
+#endif
diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
index 047cdb94b963e..baa9ca1188cef 100644
--- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
@@ -7,7 +7,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.10.0" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.11.1" />
     <PackageReference Include="xunit" Version="2.9.0" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.8.2" />
   </ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
index dc95f9edf9f7f..fb546c213f8a6 100644
--- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
@@ -6,7 +6,7 @@
     </PropertyGroup>
 
     <ItemGroup>
-      <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.10.0" />
+      <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.11.1" />
       <PackageReference Include="xunit" Version="2.9.0" />
       <PackageReference Include="xunit.runner.visualstudio" Version="2.8.2" />
       <PackageReference Include="coverlet.collector" Version="6.0.2" />
diff --git a/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj b/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj
index 14227e2c4eb6b..3fea68352ba5b 100644
--- a/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj
+++ b/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj
@@ -5,7 +5,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Google.Protobuf" Version="3.27.3" />
+    <PackageReference Include="Google.Protobuf" Version="3.28.1" />
     <PackageReference Include="Grpc.AspNetCore" Version="2.65.0" />
   </ItemGroup>
 
diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
index e68a97670cc7e..71f54aa539e14 100644
--- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
@@ -6,7 +6,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.10.0" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.11.1" />
     <PackageReference Include="xunit" Version="2.9.0" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.8.2" />
     <PackageReference Include="coverlet.collector" Version="6.0.2" />
diff --git a/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs b/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs
index aac4e4209240a..0e82673d02240 100644
--- a/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs
+++ b/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs
@@ -16,12 +16,15 @@
 using System;
 using System.Collections.Generic;
 using System.Linq;
+using System.Threading;
 using System.Threading.Tasks;
 using Apache.Arrow.Flight.Client;
 using Apache.Arrow.Flight.TestWeb;
 using Apache.Arrow.Tests;
 using Google.Protobuf;
+using Grpc.Core;
 using Grpc.Core.Utils;
+using Python.Runtime;
 using Xunit;
 
 namespace Apache.Arrow.Flight.Tests
@@ -68,14 +71,14 @@ private FlightInfo GivenStoreBatches(FlightDescriptor flightDescriptor, params R
         {
             var initialBatch = batches.FirstOrDefault();
 
-            var flightHolder = new FlightHolder(flightDescriptor, initialBatch.RecordBatch.Schema, _testWebFactory.GetAddress());
+            var flightHolder = new FlightHolder(flightDescriptor, initialBatch?.RecordBatch.Schema, _testWebFactory.GetAddress());
 
-            foreach(var batch in batches)
+            foreach (var batch in batches)
             {
                 flightHolder.AddBatch(batch);
             }
 
-            _flightStore.Flights.Add(flightDescriptor, flightHolder);
+            _flightStore.Flights[flightDescriptor] = flightHolder;
 
             return flightHolder.GetFlightInfo();
         }
@@ -121,6 +124,31 @@ public async Task TestPutTwoRecordBatches()
             ArrowReaderVerifier.CompareBatches(expectedBatch2, actualBatches[1].RecordBatch);
         }
 
+        [Fact]
+        public async Task TestGetRecordBatchWithDelayedSchema()
+        {
+            var flightDescriptor = FlightDescriptor.CreatePathDescriptor("test");
+            var expectedBatch = CreateTestBatch(0, 100);
+
+            //Add flight info only to the in memory store without schema or batch
+            GivenStoreBatches(flightDescriptor);
+
+            //Get the flight info for the ticket and verify the schema is null
+            var flightInfo = await _flightClient.GetInfo(flightDescriptor);
+            Assert.Single(flightInfo.Endpoints);
+            Assert.Null(flightInfo.Schema);
+
+            var endpoint = flightInfo.Endpoints.FirstOrDefault();
+
+            //Update the store with the batch and schema
+            GivenStoreBatches(flightDescriptor, new RecordBatchWithMetadata(expectedBatch));
+            var getStream = _flightClient.GetStream(endpoint.Ticket);
+            var resultList = await getStream.ResponseStream.ToListAsync();
+
+            Assert.Single(resultList);
+            ArrowReaderVerifier.CompareBatches(expectedBatch, resultList[0]);
+        }
+
         [Fact]
         public async Task TestGetSingleRecordBatch()
         {
@@ -187,8 +215,8 @@ public async Task TestGetFlightMetadata()
 
             var getStream = _flightClient.GetStream(endpoint.Ticket);
 
-            List<ByteString> actualMetadata = new List<ByteString>(); 
-            while(await getStream.ResponseStream.MoveNext(default))
+            List<ByteString> actualMetadata = new List<ByteString>();
+            while (await getStream.ResponseStream.MoveNext(default))
             {
                 actualMetadata.AddRange(getStream.ResponseStream.ApplicationMetadata);
             }
@@ -277,7 +305,7 @@ public async Task TestListFlights()
 
             var actualFlights = await listFlightStream.ResponseStream.ToListAsync();
 
-            for(int i = 0; i < expectedFlightInfo.Count; i++)
+            for (int i = 0; i < expectedFlightInfo.Count; i++)
             {
                 FlightInfoComparer.Compare(expectedFlightInfo[i], actualFlights[i]);
             }
@@ -386,7 +414,7 @@ public async Task TestGetBatchesWithAsyncEnumerable()
 
 
             List<RecordBatch> resultList = new List<RecordBatch>();
-            await foreach(var recordBatch in getStream.ResponseStream)
+            await foreach (var recordBatch in getStream.ResponseStream)
             {
                 resultList.Add(recordBatch);
             }
@@ -415,5 +443,89 @@ public async Task EnsureTheSerializedBatchContainsTheProperTotalRecordsAndTotalB
             Assert.Equal(expectedBatch.Length, result.TotalRecords);
             Assert.Equal(expectedTotalBytes, result.TotalBytes);
         }
+
+        [Fact]
+        public async Task EnsureCallRaisesDeadlineExceeded()
+        {
+            var flightDescriptor = FlightDescriptor.CreatePathDescriptor("raise_deadline");
+            var deadline = DateTime.UtcNow;
+            var batch = CreateTestBatch(0, 100);
+
+            RpcException exception = null;
+
+            var asyncServerStreamingCallFlights = _flightClient.ListFlights(null, null, deadline);
+            Assert.Equal(StatusCode.DeadlineExceeded, asyncServerStreamingCallFlights.GetStatus().StatusCode);
+
+            var asyncServerStreamingCallActions = _flightClient.ListActions(null, deadline);
+            Assert.Equal(StatusCode.DeadlineExceeded, asyncServerStreamingCallFlights.GetStatus().StatusCode);
+
+            GivenStoreBatches(flightDescriptor, new RecordBatchWithMetadata(batch));
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await _flightClient.GetInfo(flightDescriptor, null, deadline));
+            Assert.Equal(StatusCode.DeadlineExceeded, exception.StatusCode);
+
+            var flightInfo = await _flightClient.GetInfo(flightDescriptor);
+            var endpoint = flightInfo.Endpoints.FirstOrDefault();
+            var getStream = _flightClient.GetStream(endpoint.Ticket, null, deadline);
+            Assert.Equal(StatusCode.DeadlineExceeded, getStream.GetStatus().StatusCode);
+
+            var duplexStreamingCall = _flightClient.DoExchange(flightDescriptor, null, deadline);
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await duplexStreamingCall.RequestStream.WriteAsync(batch));
+            Assert.Equal(StatusCode.DeadlineExceeded, exception.StatusCode);
+
+            var putStream = _flightClient.StartPut(flightDescriptor, null, deadline);
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await putStream.RequestStream.WriteAsync(batch));
+            Assert.Equal(StatusCode.DeadlineExceeded, exception.StatusCode);
+
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await _flightClient.GetSchema(flightDescriptor, null, deadline));
+            Assert.Equal(StatusCode.DeadlineExceeded, exception.StatusCode);
+
+            var handshakeStreamingCall = _flightClient.Handshake(null, deadline);
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await handshakeStreamingCall.RequestStream.WriteAsync(new FlightHandshakeRequest(ByteString.Empty)));
+            Assert.Equal(StatusCode.DeadlineExceeded, exception.StatusCode);
+        }
+
+        [Fact]
+        public async Task EnsureCallRaisesRequestCancelled()
+        {
+            var cts = new CancellationTokenSource();
+            cts.CancelAfter(1);
+            
+            var batch = CreateTestBatch(0, 100);
+            var metadata = new Metadata();
+            var flightDescriptor = FlightDescriptor.CreatePathDescriptor("raise_cancelled");
+            await Task.Delay(5);
+            RpcException exception = null;
+
+            var asyncServerStreamingCallFlights = _flightClient.ListFlights(null, null, null, cts.Token);
+            Assert.Equal(StatusCode.Cancelled, asyncServerStreamingCallFlights.GetStatus().StatusCode);
+
+            var asyncServerStreamingCallActions = _flightClient.ListActions(null, null, cts.Token);
+            Assert.Equal(StatusCode.Cancelled, asyncServerStreamingCallFlights.GetStatus().StatusCode);
+
+            GivenStoreBatches(flightDescriptor, new RecordBatchWithMetadata(batch));
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await _flightClient.GetInfo(flightDescriptor, null, null, cts.Token));
+            Assert.Equal(StatusCode.Cancelled, exception.StatusCode);
+
+            var flightInfo = await _flightClient.GetInfo(flightDescriptor);
+            var endpoint = flightInfo.Endpoints.FirstOrDefault();
+            var getStream = _flightClient.GetStream(endpoint.Ticket, null, null, cts.Token);
+            Assert.Equal(StatusCode.Cancelled, getStream.GetStatus().StatusCode);
+
+            var duplexStreamingCall = _flightClient.DoExchange(flightDescriptor, null, null, cts.Token);
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await duplexStreamingCall.RequestStream.WriteAsync(batch));
+            Assert.Equal(StatusCode.Cancelled, exception.StatusCode);
+
+            var putStream = _flightClient.StartPut(flightDescriptor, null, null, cts.Token);
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await putStream.RequestStream.WriteAsync(batch));
+            Assert.Equal(StatusCode.Cancelled, exception.StatusCode);
+
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await _flightClient.GetSchema(flightDescriptor, null, null, cts.Token));
+            Assert.Equal(StatusCode.Cancelled, exception.StatusCode);
+
+            var handshakeStreamingCall = _flightClient.Handshake(null, null, cts.Token);
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await handshakeStreamingCall.RequestStream.WriteAsync(new FlightHandshakeRequest(ByteString.Empty)));
+            Assert.Equal(StatusCode.Cancelled, exception.StatusCode);
+
+        }
     }
 }
diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
index f05338313063c..aabe787b1f8c3 100644
--- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
@@ -16,7 +16,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.10.0" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.11.1" />
     <PackageReference Include="xunit" Version="2.9.0" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.8.2">
       <PrivateAssets>all</PrivateAssets>
diff --git a/dev/archery/archery/docker/cli.py b/dev/archery/archery/docker/cli.py
index 23c565f7780ff..6a1303a8983d5 100644
--- a/dev/archery/archery/docker/cli.py
+++ b/dev/archery/archery/docker/cli.py
@@ -21,6 +21,7 @@
 import click
 
 from ..utils.cli import validate_arrow_sources
+from ..utils.logger import group
 from .core import DockerCompose, UndefinedImage
 
 
@@ -82,11 +83,12 @@ def docker(ctx, src, dry_run, using_legacy_docker_compose, using_docker_cli,
     using_docker_cli |= using_docker_buildx
     compose_bin = ("docker-compose" if using_legacy_docker_compose
                    else "docker compose")
-    compose = DockerCompose(config_path, params=os.environ,
-                            using_docker=using_docker_cli,
-                            using_buildx=using_docker_buildx,
-                            debug=ctx.obj.get('debug', False),
-                            compose_bin=compose_bin)
+    with group("Docker: Preppare"):
+        compose = DockerCompose(config_path, params=os.environ,
+                                using_docker=using_docker_cli,
+                                using_buildx=using_docker_buildx,
+                                debug=ctx.obj.get('debug', False),
+                                compose_bin=compose_bin)
     if dry_run:
         _mock_compose_calls(compose)
     ctx.obj['compose'] = compose
@@ -229,10 +231,12 @@ def docker_run(obj, image, command, *, env, user, force_pull, force_build,
     env = dict(kv.split('=', 1) for kv in env)
     try:
         if force_pull:
-            compose.pull(image, pull_leaf=use_leaf_cache)
+            with group("Docker: Pull"):
+                compose.pull(image, pull_leaf=use_leaf_cache)
         if force_build:
-            compose.build(image, use_cache=use_cache,
-                          use_leaf_cache=use_leaf_cache)
+            with group("Docker: Build"):
+                compose.build(image, use_cache=use_cache,
+                              use_leaf_cache=use_leaf_cache)
         if build_only:
             return
         compose.run(
diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py
index d395d26cb71d3..f63aa0d95a484 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -1845,7 +1845,7 @@ def generate_nested_dictionary_case():
 def generate_extension_case():
     dict0 = Dictionary(0, StringField('dictionary0'), size=5, name='DICT0')
 
-    uuid_type = ExtensionType('uuid', 'uuid-serialized',
+    uuid_type = ExtensionType('arrow.uuid', '',
                               FixedSizeBinaryField('', 16))
     dict_ext_type = ExtensionType(
         'dict-extension', 'dict-extension-serialized',
diff --git a/dev/archery/archery/integration/runner.py b/dev/archery/archery/integration/runner.py
index 0ea244720cc1d..ca5febca9f801 100644
--- a/dev/archery/archery/integration/runner.py
+++ b/dev/archery/archery/integration/runner.py
@@ -17,6 +17,7 @@
 
 from collections import namedtuple
 from concurrent.futures import ThreadPoolExecutor
+import contextlib
 from functools import partial
 import glob
 import gzip
@@ -39,6 +40,7 @@
 from .tester_nanoarrow import NanoarrowTester
 from .util import guid, printer
 from .util import SKIP_C_ARRAY, SKIP_C_SCHEMA, SKIP_FLIGHT, SKIP_IPC
+from ..utils.logger import group as group_raw
 from ..utils.source import ARROW_ROOT_DEFAULT
 from . import datagen
 
@@ -49,6 +51,12 @@
 log = printer.print
 
 
+@contextlib.contextmanager
+def group(name):
+    with group_raw(name, log):
+        yield
+
+
 class Outcome:
     def __init__(self):
         self.failure = None
@@ -91,20 +99,22 @@ def run_ipc(self):
             self._compare_ipc_implementations(
                 producer, consumer, self._produce_consume,
                 self.json_files)
+
         if self.gold_dirs:
             for gold_dir, consumer in itertools.product(
                     self.gold_dirs,
                     filter(lambda t: t.CONSUMER, self.testers)):
-                log('\n')
-                log('******************************************************')
-                log('Tests against golden files in {}'.format(gold_dir))
-                log('******************************************************')
-
-                def run_gold(_, consumer, test_case: datagen.File):
-                    return self._run_gold(gold_dir, consumer, test_case)
-                self._compare_ipc_implementations(
-                    consumer, consumer, run_gold,
-                    self._gold_tests(gold_dir))
+                with group(f"Integration: Test: IPC: Gold: {consumer.name}"):
+                    log('\n')
+                    log('******************************************************')
+                    log('Tests against golden files in {}'.format(gold_dir))
+                    log('******************************************************')
+
+                    def run_gold(_, consumer, test_case: datagen.File):
+                        return self._run_gold(gold_dir, consumer, test_case)
+                    self._compare_ipc_implementations(
+                        consumer, consumer, run_gold,
+                        self._gold_tests(gold_dir))
         log('\n')
 
     def run_flight(self):
@@ -233,14 +243,15 @@ def _compare_ipc_implementations(
         """
         Compare Arrow IPC for two implementations (one producer, one consumer).
         """
-        log('##########################################################')
-        log('IPC: {0} producing, {1} consuming'
-            .format(producer.name, consumer.name))
-        log('##########################################################')
+        with group(f"Integration: Test: IPC: {producer.name} -> {consumer.name}"):
+            log('##########################################################')
+            log('IPC: {0} producing, {1} consuming'
+                .format(producer.name, consumer.name))
+            log('##########################################################')
 
-        case_runner = partial(self._run_ipc_test_case,
-                              producer, consumer, run_binaries)
-        self._run_test_cases(case_runner, test_cases)
+            case_runner = partial(self._run_ipc_test_case,
+                                  producer, consumer, run_binaries)
+            self._run_test_cases(case_runner, test_cases)
 
     def _run_ipc_test_case(
         self,
@@ -357,14 +368,15 @@ def _compare_flight_implementations(
         producer: Tester,
         consumer: Tester
     ):
-        log('##########################################################')
-        log('Flight: {0} serving, {1} requesting'
-            .format(producer.name, consumer.name))
-        log('##########################################################')
+        with group(f"Integration: Test: Flight: {producer.name} -> {consumer.name}"):
+            log('##########################################################')
+            log('Flight: {0} serving, {1} requesting'
+                .format(producer.name, consumer.name))
+            log('##########################################################')
 
-        case_runner = partial(self._run_flight_test_case, producer, consumer)
-        self._run_test_cases(
-            case_runner, self.json_files + self.flight_scenarios)
+            case_runner = partial(self._run_flight_test_case, producer, consumer)
+            self._run_test_cases(
+                case_runner, self.json_files + self.flight_scenarios)
 
     def _run_flight_test_case(self,
                               producer: Tester,
@@ -415,27 +427,32 @@ def _compare_c_data_implementations(
         producer: Tester,
         consumer: Tester
     ):
-        log('##########################################################')
-        log(f'C Data Interface: '
-            f'{producer.name} exporting, {consumer.name} importing')
-        log('##########################################################')
-
-        # Serial execution is required for proper memory accounting
-        serial = True
-
-        with producer.make_c_data_exporter() as exporter:
-            with consumer.make_c_data_importer() as importer:
-                case_runner = partial(self._run_c_schema_test_case,
-                                      producer, consumer,
-                                      exporter, importer)
-                self._run_test_cases(case_runner, self.json_files, serial=serial)
-
-                if producer.C_DATA_ARRAY_EXPORTER and consumer.C_DATA_ARRAY_IMPORTER:
-                    case_runner = partial(self._run_c_array_test_cases,
+        with group("Integration: Test: C Data Interface: "
+                   f"{producer.name} -> {consumer.name}"):
+            log('##########################################################')
+            log(f'C Data Interface: '
+                f'{producer.name} exporting, {consumer.name} importing')
+            log('##########################################################')
+
+            # Serial execution is required for proper memory accounting
+            serial = True
+
+            with producer.make_c_data_exporter() as exporter:
+                with consumer.make_c_data_importer() as importer:
+                    case_runner = partial(self._run_c_schema_test_case,
                                           producer, consumer,
                                           exporter, importer)
                     self._run_test_cases(case_runner, self.json_files, serial=serial)
 
+                    if producer.C_DATA_ARRAY_EXPORTER and \
+                       consumer.C_DATA_ARRAY_IMPORTER:
+                        case_runner = partial(self._run_c_array_test_cases,
+                                              producer, consumer,
+                                              exporter, importer)
+                        self._run_test_cases(case_runner,
+                                             self.json_files,
+                                             serial=serial)
+
     def _run_c_schema_test_case(self,
                                 producer: Tester, consumer: Tester,
                                 exporter: CDataExporter,
@@ -645,7 +662,7 @@ def run_all_tests(with_cpp=True, with_java=True, with_js=True,
         Scenario(
             "flight_sql:ingestion",
             description="Ensure Flight SQL ingestion works as expected.",
-            skip_testers={"JS", "C#", "Rust", "Java"}
+            skip_testers={"JS", "C#", "Rust"}
         ),
     ]
 
@@ -657,22 +674,23 @@ def run_all_tests(with_cpp=True, with_java=True, with_js=True,
     if run_c_data:
         runner.run_c_data()
 
-    fail_count = 0
-    if runner.failures:
-        log("################# FAILURES #################")
-        for test_case, producer, consumer, exc_info in runner.failures:
-            fail_count += 1
-            log("FAILED TEST:", end=" ")
-            log(test_case.name, producer.name, "producing, ",
-                consumer.name, "consuming")
-            if exc_info:
-                exc_type, exc_value, exc_tb = exc_info
-                log(f'{exc_type}: {exc_value}')
-            log()
-
-    log(f"{fail_count} failures, {len(runner.skips)} skips")
-    if fail_count > 0:
-        sys.exit(1)
+    with group("Integration: Test: Result"):
+        fail_count = 0
+        if runner.failures:
+            log("################# FAILURES #################")
+            for test_case, producer, consumer, exc_info in runner.failures:
+                fail_count += 1
+                log("FAILED TEST:", end=" ")
+                log(test_case.name, producer.name, "producing, ",
+                    consumer.name, "consuming")
+                if exc_info:
+                    exc_type, exc_value, exc_tb = exc_info
+                    log(f'{exc_type}: {exc_value}')
+                log()
+
+        log(f"{fail_count} failures, {len(runner.skips)} skips")
+        if fail_count > 0:
+            sys.exit(1)
 
 
 def write_js_test_json(directory):
diff --git a/dev/archery/archery/utils/logger.py b/dev/archery/archery/utils/logger.py
index 9d0feda88e6ea..b315a52b7a000 100644
--- a/dev/archery/archery/utils/logger.py
+++ b/dev/archery/archery/utils/logger.py
@@ -15,7 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import contextlib
 import logging
+import os
 
 """ Global logger. """
 logger = logging.getLogger("archery")
@@ -27,3 +29,24 @@ def __init__(self, quiet=False):
 
 
 ctx = LoggingContext()
+
+in_github_actions = (os.environ.get("GITHUB_ACTIONS") == "true")
+
+
+@contextlib.contextmanager
+def group(name, output=None):
+    """
+    Group outputs in the given with block.
+
+    This does nothing in non GitHub Actions environment for now.
+    """
+    if output is None:
+        def output(message):
+            print(message, flush=True)
+    if in_github_actions:
+        output(f"::group::{name}")
+    try:
+        yield
+    finally:
+        if in_github_actions:
+            output("::endgroup::")
diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index 07e765a759ea0..cdea4ca0d00a1 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -24,7 +24,7 @@
 # - JDK >= 11
 # - gcc >= 4.8
 # - Node.js >= 18
-# - Go >= 1.21
+# - Go >= 1.22
 # - Docker
 #
 # If using a non-system Boost, set BOOST_ROOT and add Boost libraries to
@@ -403,7 +403,7 @@ install_go() {
     return 0
   fi
 
-  local version=1.21.8
+  local version=1.22.6
   show_info "Installing go version ${version}..."
 
   local arch="$(uname -m)"
@@ -512,7 +512,7 @@ install_maven() {
     show_info "System Maven version ${SYSTEM_MAVEN_VERSION} matches required Maven version ${MAVEN_VERSION}. Skipping installation."
   else
     # Append pipe character to make preview release versions like "X.Y.Z-beta-1" sort
-    # as older than their corresponding release version "X.Y.Z". This works because 
+    # as older than their corresponding release version "X.Y.Z". This works because
     # `sort -V` orders the pipe character lower than any version number character.
     older_version=$(printf '%s\n%s\n' "$SYSTEM_MAVEN_VERSION" "$MAVEN_VERSION" | sed 's/$/|/' | sort -V | sed 's/|$//' | head -n1)
     if [[ "$older_version" == "$SYSTEM_MAVEN_VERSION" ]]; then
@@ -953,7 +953,7 @@ test_go() {
   show_header "Build and test Go libraries"
 
   maybe_setup_go
-  maybe_setup_conda compilers go=1.21
+  maybe_setup_conda compilers go=1.22
 
   pushd go
   go get -v ./...
diff --git a/dev/tasks/docker-tests/github.cuda.yml b/dev/tasks/docker-tests/github.cuda.yml
index 8c04da8a91a4f..e65ac457b2ef7 100644
--- a/dev/tasks/docker-tests/github.cuda.yml
+++ b/dev/tasks/docker-tests/github.cuda.yml
@@ -26,18 +26,19 @@ jobs:
     runs-on: ['self-hosted', 'cuda']
 {{ macros.github_set_env(env) }}
     timeout-minutes: {{ timeout|default(60) }}
-    env:
-      ARCHERY_USE_LEGACY_DOCKER_COMPOSE: 1
     steps:
       {{ macros.github_checkout_arrow(fetch_depth=fetch_depth|default(1))|indent }}
-      # python 3.8 is installed on the runner, no need to install
+      # python 3.10 is installed on the runner, no need to install
+      - name: Install pip
+        run: sudo apt update && sudo apt install python3-pip -y
       - name: Install archery
-        run: python -m pip install -e arrow/dev/archery[docker]
+        run: python3 -m pip install -e arrow/dev/archery[docker]
       - name: Execute Docker Build
         shell: bash
         env:
         {{ macros.github_set_sccache_envvars()|indent(8) }}
         run: |
+          source arrow/ci/scripts/util_enable_core_dumps.sh
           archery docker run \
             -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" \
             {{ flags|default("") }} \
diff --git a/dev/tasks/docker-tests/github.linux.yml b/dev/tasks/docker-tests/github.linux.yml
index 28d3203c1ed48..ee221d6f6d8d6 100644
--- a/dev/tasks/docker-tests/github.linux.yml
+++ b/dev/tasks/docker-tests/github.linux.yml
@@ -38,6 +38,7 @@ jobs:
         run: |
           # GH-40558: reduce ASLR to avoid TSAN crashing
           sudo sysctl -w vm.mmap_rnd_bits=28
+          source arrow/ci/scripts/util_enable_core_dumps.sh
           archery docker run \
             -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" \
             {{ flags|default("") }} \
@@ -62,7 +63,7 @@ jobs:
           done
       - name: Save the R test output
         if: always()
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
           name: test-output
           path: arrow/r/check/arrow.Rcheck/tests/testthat.Rout*
diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml
index 7cbd5f05dab4a..9910daa21ef37 100644
--- a/dev/tasks/java-jars/github.yml
+++ b/dev/tasks/java-jars/github.yml
@@ -30,7 +30,6 @@ jobs:
       ARCH: {{ '${{ matrix.platform.archery_arch }}' }}
       ARCH_ALIAS: {{ '${{ matrix.platform.archery_arch_alias }}' }}
       ARCH_SHORT: {{ '${{ matrix.platform.archery_arch_short }}' }}
-      ARCHERY_USE_LEGACY_DOCKER_COMPOSE: {{ "${{matrix.platform.archery_use_legacy_docker_compose || '0'}}" }}
     strategy:
       fail-fast: false
       matrix:
@@ -45,7 +44,6 @@ jobs:
             archery_arch: "arm64v8"
             archery_arch_alias: "aarch64"
             archery_arch_short: "arm64"
-            archery_use_legacy_docker_compose: "1"
     steps:
       {{ macros.github_checkout_arrow()|indent }}
       {{ macros.github_free_space()|indent }}
@@ -61,7 +59,7 @@ jobs:
       - name: Compress into single artifact to keep directory structure
         run: tar -cvzf arrow-shared-libs-linux-{{ arch }}.tar.gz arrow/java-dist/
       - name: Upload artifacts
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
           name: ubuntu-shared-lib-{{ arch }}
           path: arrow-shared-libs-linux-{{ arch }}.tar.gz
@@ -154,7 +152,7 @@ jobs:
       - name: Compress into single artifact to keep directory structure
         run: tar -cvzf arrow-shared-libs-macos-{{ arch }}.tar.gz arrow/java-dist/
       - name: Upload artifacts
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
           name: macos-shared-lib-{{ arch }}
           path: arrow-shared-libs-macos-{{ arch }}.tar.gz
@@ -188,7 +186,7 @@ jobs:
         shell: bash
         run: tar -cvzf arrow-shared-libs-windows.tar.gz arrow/java-dist/
       - name: Upload artifacts
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
           name: windows-shared-lib
           path: arrow-shared-libs-windows.tar.gz
@@ -203,7 +201,7 @@ jobs:
     steps:
       {{ macros.github_checkout_arrow(fetch_depth=0)|indent }}
       - name: Download Libraries
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@v4
         with:
           path: artifacts
       - name: Decompress artifacts
diff --git a/dev/tasks/linux-packages/github.linux.yml b/dev/tasks/linux-packages/github.linux.yml
index 4bf2295ef3e95..cce976cd60e4e 100644
--- a/dev/tasks/linux-packages/github.linux.yml
+++ b/dev/tasks/linux-packages/github.linux.yml
@@ -29,7 +29,6 @@ jobs:
     {% endif %}
     env:
       ARCHITECTURE: {{ architecture }}
-      ARCHERY_USE_LEGACY_DOCKER_COMPOSE: {{ '1' if architecture == 'arm64' else '0' }}
     steps:
       {{ macros.github_checkout_arrow()|indent }}
       {{ macros.github_login_dockerhub()|indent }}
diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja
index 6423ca0e9efda..082d33b124f9f 100644
--- a/dev/tasks/macros.jinja
+++ b/dev/tasks/macros.jinja
@@ -169,16 +169,32 @@ env:
   - name: Upload package to Gemfury
     shell: bash
     run: |
-      fury push \
-        --api-token=${CROSSBOW_GEMFURY_TOKEN} \
-        --as=${CROSSBOW_GEMFURY_ORG} \
-        {{ pattern }}
+      if $(fury versions --as=${CROSSBOW_GEMFURY_ORG} --api-token=${CROSSBOW_GEMFURY_TOKEN} pyarrow | grep --fixed-strings -q "{{ arrow.no_rc_version }}"); then
+        echo "Version {{ arrow.no_rc_version }} already exists. Avoid pushing version."
+      else
+        fury push \
+          --api-token=${CROSSBOW_GEMFURY_TOKEN} \
+          --as=${CROSSBOW_GEMFURY_ORG} \
+          {{ pattern }}
+      fi
     env:
       CROSSBOW_GEMFURY_TOKEN: {{ '${{ secrets.CROSSBOW_GEMFURY_TOKEN }}' }}
       CROSSBOW_GEMFURY_ORG: {{ '${{ secrets.CROSSBOW_GEMFURY_ORG }}' }}
   {% endif %}
 {% endmacro %}
 
+{%- macro github_upload_wheel_scientific_python(pattern) -%}
+  {%- if arrow.is_default_branch() -%}
+  - name: Upload wheel to Anaconda scientific-python
+    shell: bash
+    run: |
+      python3 -m pip install git+https://github.com/Anaconda-Platform/anaconda-client.git@1.12.3
+      anaconda -t ${CROSSBOW_SCIENTIFIC_PYTHON_UPLOAD_TOKEN} upload --force -u scientific-python-nightly-wheels --label main {{ pattern }}
+    env:
+      CROSSBOW_SCIENTIFIC_PYTHON_UPLOAD_TOKEN: {{ '${{ secrets.CROSSBOW_SCIENTIFIC_PYTHON_UPLOAD_TOKEN }}' }}
+  {% endif %}
+{% endmacro %}
+
 {%- macro azure_checkout_arrow() -%}
   - script: |
       git clone --no-checkout --branch {{ arrow.branch }} {{ arrow.remote }} arrow
diff --git a/dev/tasks/python-wheels/github.linux.yml b/dev/tasks/python-wheels/github.linux.yml
index 2854d4349fb7c..faca698b71a4d 100644
--- a/dev/tasks/python-wheels/github.linux.yml
+++ b/dev/tasks/python-wheels/github.linux.yml
@@ -33,7 +33,6 @@ jobs:
       ARCH: amd64
       {% else %}
       ARCH: arm64v8
-      ARCHERY_USE_LEGACY_DOCKER_COMPOSE: 1
       {% endif %}
       PYTHON: "{{ python_version }}"
       {% if python_version == "3.13" %}
@@ -51,7 +50,7 @@ jobs:
         shell: bash
         run: archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} python-wheel-manylinux-{{ manylinux_version }}
 
-      - uses: actions/upload-artifact@v3
+      - uses: actions/upload-artifact@v4
         with:
           name: wheel
           path: arrow/python/repaired_wheels/*.whl
@@ -60,6 +59,7 @@ jobs:
       - name: Test wheel
         shell: bash
         run: |
+          source arrow/ci/scripts/util_enable_core_dumps.sh
           archery docker run python-wheel-manylinux-test-imports
           archery docker run python-wheel-manylinux-test-unittests
 
@@ -111,6 +111,7 @@ jobs:
 
       {{ macros.github_upload_releases("arrow/python/repaired_wheels/*.whl")|indent }}
       {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }}
+      {{ macros.github_upload_wheel_scientific_python("arrow/python/repaired_wheels/*.whl")|indent }}
 
       {% if arrow.is_default_branch() %}
       - name: Push Docker Image
diff --git a/dev/tasks/python-wheels/github.osx.yml b/dev/tasks/python-wheels/github.osx.yml
index b26aeba32b79b..5d85e7905726e 100644
--- a/dev/tasks/python-wheels/github.osx.yml
+++ b/dev/tasks/python-wheels/github.osx.yml
@@ -108,11 +108,26 @@ jobs:
           pip install --upgrade pip wheel
           PYTHON=python arrow/ci/scripts/python_wheel_macos_build.sh {{ arch }} $(pwd)/arrow $(pwd)/build
 
-      - uses: actions/upload-artifact@v3
+      - uses: actions/upload-artifact@v4
         with:
           name: wheel
           path: arrow/python/repaired_wheels/*.whl
 
+      # Use a well-known Python version for the GCS testbench, and avoid
+      # putting it in PATH.
+      - name: Set up Python for GCS testbench
+        uses: actions/setup-python@v5.1.1
+        id: gcs-python-install
+        with:
+          python-version: 3.12
+          update-environment: false
+
+      - name: Install GCS testbench
+        env:
+          PIPX_BIN_DIR: /usr/local/bin
+          PIPX_BASE_PYTHON: {{ '${{ steps.gcs-python-install.outputs.python-path }}' }}
+        run: arrow/ci/scripts/install_gcs_testbench.sh default
+
       - name: Test Wheel
         env:
           PYTEST_ADDOPTS: "-k 'not test_cancellation'"
@@ -121,8 +136,8 @@ jobs:
           source test-env/bin/activate
           pip install --upgrade pip wheel
           arch -{{ arch }} pip install -r arrow/python/requirements-wheel-test.txt
-          PYTHON_VERSION={{ python_version }} arch -{{ arch }} arrow/ci/scripts/install_gcs_testbench.sh default
           arch -{{ arch }} arrow/ci/scripts/python_wheel_unix_test.sh $(pwd)/arrow
 
       {{ macros.github_upload_releases("arrow/python/repaired_wheels/*.whl")|indent }}
       {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }}
+      {{ macros.github_upload_wheel_scientific_python("arrow/python/repaired_wheels/*.whl")|indent }}
diff --git a/dev/tasks/python-wheels/github.windows.yml b/dev/tasks/python-wheels/github.windows.yml
index a40b9c0d65103..2bcda4966db8b 100644
--- a/dev/tasks/python-wheels/github.windows.yml
+++ b/dev/tasks/python-wheels/github.windows.yml
@@ -58,7 +58,7 @@ jobs:
           )
           archery docker run --no-build -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} python-wheel-windows-vs2019
 
-      - uses: actions/upload-artifact@v3
+      - uses: actions/upload-artifact@v4
         with:
           name: wheel
           path: arrow/python/dist/*.whl
@@ -71,6 +71,7 @@ jobs:
 
       {{ macros.github_upload_releases("arrow/python/dist/*.whl")|indent }}
       {{ macros.github_upload_gemfury("arrow/python/dist/*.whl")|indent }}
+      {{ macros.github_upload_wheel_scientific_python("arrow/python/dist/*.whl")|indent }}
 
       {% if arrow.is_default_branch() %}
       - name: Push Docker Image
diff --git a/dev/tasks/r/github.devdocs.yml b/dev/tasks/r/github.devdocs.yml
index 530fb5e2f2ea9..6047951155cde 100644
--- a/dev/tasks/r/github.devdocs.yml
+++ b/dev/tasks/r/github.devdocs.yml
@@ -68,7 +68,7 @@ jobs:
           EOF
         shell: bash -l {0}
       - name: Save the install script
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
           name: {{ "devdocs-script_os-${{ matrix.os }}_sysinstall-${{ matrix.system-install }}" }}
           path: arrow/r/vignettes/developers/script.sh
diff --git a/dev/tasks/r/github.linux.arrow.version.back.compat.yml b/dev/tasks/r/github.linux.arrow.version.back.compat.yml
index 086705dbb9cf4..90b2554eb8cd7 100644
--- a/dev/tasks/r/github.linux.arrow.version.back.compat.yml
+++ b/dev/tasks/r/github.linux.arrow.version.back.compat.yml
@@ -58,7 +58,7 @@ jobs:
         shell: bash
 
       - name: Upload the parquet artifacts
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
           name: files
           path: arrow/r/extra-tests/files
@@ -108,7 +108,7 @@ jobs:
           cp arrow/r/extra-tests/helper*.R extra-tests/
           cp arrow/r/extra-tests/test-*.R extra-tests/
       - name: Download artifacts
-        uses: actions/download-artifact@v2
+        uses: actions/download-artifact@v4
         with:
           name: files
           path: extra-tests/files
diff --git a/dev/tasks/r/github.linux.cran.yml b/dev/tasks/r/github.linux.cran.yml
index 34cb4b9446a0b..8f56bf771d224 100644
--- a/dev/tasks/r/github.linux.cran.yml
+++ b/dev/tasks/r/github.linux.cran.yml
@@ -55,7 +55,7 @@ jobs:
         if: always()
       - name: Save the test output
         if: always()
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
-          name: test-output
+          name: test-output-{{ "${{ matrix.r_image }}" }}
           path: arrow/r/check/arrow.Rcheck/tests/testthat.Rout*
diff --git a/dev/tasks/r/github.linux.offline.build.yml b/dev/tasks/r/github.linux.offline.build.yml
index 9ac0ebc40835e..62cdaa02051dd 100644
--- a/dev/tasks/r/github.linux.offline.build.yml
+++ b/dev/tasks/r/github.linux.offline.build.yml
@@ -41,7 +41,7 @@ jobs:
           R -e "source('R/install-arrow.R'); create_package_with_all_dependencies(dest_file = 'arrow_with_deps.tar.gz', source_file = \"${built_tar}\")"
         shell: bash
       - name: Upload the third party dependency artifacts
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
           name: thirdparty_deps
           path: arrow/r/arrow_with_deps.tar.gz
@@ -60,7 +60,7 @@ jobs:
 
       - uses: r-lib/actions/setup-r@v2
       - name: Download artifacts
-        uses: actions/download-artifact@v2
+        uses: actions/download-artifact@v4
         with:
           name: thirdparty_deps
           path: arrow/r/
@@ -91,7 +91,7 @@ jobs:
         run: cat arrow-tests/testthat.Rout*
         if: always()
       - name: Save the test output
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
           name: test-output
           path: arrow-tests/testthat.Rout*
diff --git a/dev/tasks/r/github.linux.versions.yml b/dev/tasks/r/github.linux.versions.yml
index 753efe61d048e..092ac97de8ec4 100644
--- a/dev/tasks/r/github.linux.versions.yml
+++ b/dev/tasks/r/github.linux.versions.yml
@@ -55,7 +55,7 @@ jobs:
         if: always()
       - name: Save the test output
         if: always()
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
-          name: test-output
+          name: test-output-{{ "${{ matrix.r_version }}" }}
           path: arrow/r/check/arrow.Rcheck/tests/testthat.Rout*
diff --git a/dev/tasks/r/github.macos-linux.local.yml b/dev/tasks/r/github.macos-linux.local.yml
index b221e8c5d8d5b..2db80f254fec5 100644
--- a/dev/tasks/r/github.macos-linux.local.yml
+++ b/dev/tasks/r/github.macos-linux.local.yml
@@ -97,8 +97,8 @@ jobs:
         run: cat arrow-tests/testthat.Rout*
         if: failure()
       - name: Save the test output
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
-          name: test-output
+          name: test-output-{{ "${{ matrix.os }}" }}
           path: arrow-tests/testthat.Rout*
         if: always()
diff --git a/dev/tasks/r/github.macos.cran.yml b/dev/tasks/r/github.macos.cran.yml
index 33965988e213a..dda8ac7fd7850 100644
--- a/dev/tasks/r/github.macos.cran.yml
+++ b/dev/tasks/r/github.macos.cran.yml
@@ -75,7 +75,7 @@ jobs:
         run: cat arrow-tests/testthat.Rout*
         if: failure()
       - name: Save the test output
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
           name: test-output
           path: arrow-tests/testthat.Rout*
diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml
index 0539eae6cc9d9..66008275148f9 100644
--- a/dev/tasks/r/github.packages.yml
+++ b/dev/tasks/r/github.packages.yml
@@ -51,7 +51,7 @@ jobs:
           R CMD build --no-build-vignettes .
 
       - name: Upload package artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: r-pkg__src__contrib
           path: arrow/r/arrow_*.tar.gz
@@ -106,7 +106,7 @@ jobs:
           cd arrow/r/libarrow/dist
           shasum -a 512 arrow-*.zip > arrow-{{ '${{ needs.source.outputs.pkg_version }}' }}.zip.sha512
       - name: Upload binary artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: r-lib__libarrow__bin__darwin-{{ '${{ matrix.platform.arch }}' }}-openssl-{{ '${{ matrix.openssl }}' }}
           path: arrow/r/libarrow/dist/arrow-*.zip*
@@ -140,8 +140,7 @@ jobs:
           UBUNTU: {{ '"${{ matrix.ubuntu }}"' }}
         {{ macros.github_set_sccache_envvars()|indent(8) }}
         run: |
-          sudo sysctl -w kernel.core_pattern="core.%e.%p"
-          ulimit -c unlimited
+          source ci/scripts/util_enable_core_dumps.sh
           archery docker run \
             -e EXTRA_CMAKE_FLAGS="{{ '${{ matrix.extra-cmake-flags }}' }}" \
             {{ '${{ matrix.os }}' }}-cpp-static
@@ -162,7 +161,7 @@ jobs:
           cd arrow/r/libarrow/dist
           shasum -a 512 arrow-*.zip > arrow-{{ '${{ needs.source.outputs.pkg_version }}' }}.zip.sha512
       - name: Upload binary artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: r-lib__libarrow__bin__linux-openssl-{{ '${{ matrix.openssl }}' }}
           path: arrow/r/libarrow/dist/arrow-*.zip*
@@ -195,7 +194,7 @@ jobs:
           cd build
           sha512sum arrow-*.zip > arrow-{{ '${{ needs.source.outputs.pkg_version }}' }}.zip.sha512
       - name: Upload binary artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: r-lib__libarrow__bin__windows
           path: build/arrow-*.zip*
@@ -292,7 +291,7 @@ jobs:
           cat(cmd, file = Sys.getenv("GITHUB_OUTPUT"), append = TRUE)
 
       - name: Upload binary artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: r-pkg{{ '${{ steps.build.outputs.path }}' }}
           path: arrow_*
@@ -348,7 +347,7 @@ jobs:
           '
       - name: Upload binary artifact
         if: matrix.config.devtoolset
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: r-pkg_centos7
           path: arrow_*
@@ -360,7 +359,7 @@ jobs:
     runs-on: ubuntu-latest
     container: "rstudio/r-base:4.2-centos7"
     steps:
-      - uses: actions/download-artifact@v3
+      - uses: actions/download-artifact@v4
         with:
           name: r-pkg_centos7
       - name: Install DTS Package
@@ -442,7 +441,7 @@ jobs:
     steps:
       {{ macros.github_checkout_arrow()|indent }}
       - name: Download Artifacts
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@v4
         with:
           path: artifacts
       - name: Install R
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index cae34c3231381..9bb7eedd7b3ee 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -29,6 +29,7 @@ groups:
 
   wheel:
     - wheel-*
+    - python-sdist
 
   linux:
     - almalinux-*
@@ -69,11 +70,15 @@ groups:
 
 {############################# Testing tasks #################################}
 
+  cuda:
+    - test-cuda-*
+
   test:
     - test-*
 
   cpp:
     - test-*cpp*
+    - example-*cpp*
 
   c-glib:
     - test-*c-glib*
@@ -413,7 +418,7 @@ tasks:
 
 {############################## Wheel macOS ####################################}
 
-{% for macos_version, macos_codename in [("10.15", "catalina")] %}
+{% for macos_version, macos_codename in [("12.0", "monterey")] %}
   {% set platform_tag = "macosx_{}_x86_64".format(macos_version.replace('.', '_')) %}
 
   wheel-macos-{{ macos_codename }}-{{ python_tag }}-amd64:
@@ -424,25 +429,25 @@ tasks:
       arrow_jemalloc: "ON"
       python_version: "{{ python_version }}"
       macos_deployment_target: "{{ macos_version }}"
-      runs_on: "macos-13"
+      runs_on: "macos-12"
       vcpkg_arch: "amd64"
     artifacts:
       - pyarrow-{no_rc_version}-{{ python_tag }}-{{ abi_tag }}-{{ platform_tag }}.whl
 
 {% endfor %}
 
-  wheel-macos-big-sur-{{ python_tag }}-arm64:
+  wheel-macos-monterey-{{ python_tag }}-arm64:
     ci: github
     template: python-wheels/github.osx.yml
     params:
       arch: "arm64"
       arrow_jemalloc: "OFF"
       python_version: "{{ python_version }}"
-      macos_deployment_target: "11.0"
+      macos_deployment_target: "12.0"
       runs_on: "macos-14"
       vcpkg_arch: "arm64"
     artifacts:
-      - pyarrow-{no_rc_version}-{{ python_tag }}-{{ python_tag }}-macosx_11_0_arm64.whl
+      - pyarrow-{no_rc_version}-{{ python_tag }}-{{ python_tag }}-macosx_12_0_arm64.whl
 
 {############################## Wheel Windows ################################}
 
@@ -994,7 +999,7 @@ tasks:
       github_runner: "macos-14"
   {% endfor %}
 
-  {% for macos_version in ["11", "12"] %}
+  {% for macos_version in ["12"] %}
   verify-rc-binaries-wheels-macos-{{ macos_version }}-amd64:
     ci: github
     template: verify-rc/github.macos.yml
@@ -1255,6 +1260,14 @@ tasks:
         PYTHON: "3.10"
       image: conda-python-cython2
 
+  test-ubuntu-22.04-python-313-freethreading:
+    ci: github
+    template: docker-tests/github.linux.yml
+    params:
+      env:
+        UBUNTU: 22.04
+      image: ubuntu-python-313-freethreading
+
   test-debian-12-python-3-amd64:
     ci: github
     template: docker-tests/github.linux.yml
@@ -1475,7 +1488,7 @@ tasks:
         R_PRUNE_DEPS: TRUE
       image: r-clang-sanitizer
 
-  {% for go_version, staticcheck in [("1.21", "v0.4.7"), ("1.22", "latest")] %}
+  {% for go_version, staticcheck in [("1.22", "v0.5.1"), ("1.23", "latest")] %}
   test-debian-12-go-{{ go_version }}:
     ci: github
     template: docker-tests/github.linux.yml
diff --git a/dev/tasks/verify-rc/github.macos.yml b/dev/tasks/verify-rc/github.macos.yml
index 4bc3fff71b64a..e2bc7895c6d05 100644
--- a/dev/tasks/verify-rc/github.macos.yml
+++ b/dev/tasks/verify-rc/github.macos.yml
@@ -22,7 +22,7 @@
 {% set use_conda = use_conda|default(False) %}
 # env: is generated by macros.github_header()
   # Current oldest supported version according to https://endoflife.date/macos
-  MACOSX_DEPLOYMENT_TARGET: "10.15"
+  MACOSX_DEPLOYMENT_TARGET: "12.0"
 
 jobs:
   verify:
diff --git a/docker-compose.yml b/docker-compose.yml
index 3045cf015bc26..6d9b738d8da35 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -38,11 +38,11 @@
 # WARNING: setting this will affect the host machine.
 #
 # Linux host:
-#   $ sudo sysctl -w kernel.core_pattern=core.%e.%p
+#   $ sudo sysctl -w kernel.core_pattern=/tmp/core.%e.%p
 #
 # macOS host running Docker for Mac (won't persist between restarts):
 #   $ screen ~/Library/Containers/com.docker.docker/Data/vms/0/tty
-#   # echo "core.%e.%p" > /proc/sys/kernel/core_pattern
+#   # echo "/tmp/core.%e.%p" > /proc/sys/kernel/core_pattern
 #
 # The setup attempts to generate coredumps by default, but the correct paths
 # above must be set. In order to disable the coredump generation set
@@ -53,8 +53,6 @@
 #
 # See more in cpp/build-support/run-test.sh::print_coredumps
 
-version: '3.5'
-
 x-common: &common
   GITHUB_ACTIONS:
 
@@ -126,6 +124,7 @@ x-hierarchy:
         - conda-python-hdfs
         - conda-python-java-integration
         - conda-python-jpype
+        - conda-python-no-numpy
         - conda-python-spark
         - conda-python-substrait
   - conda-verify-rc
@@ -150,6 +149,7 @@ x-hierarchy:
     - ubuntu-lint
     - ubuntu-python
     - ubuntu-python-sdist-test
+    - ubuntu-python-313-freethreading
     - ubuntu-r
     - ubuntu-r-only-r
   - ubuntu-cpp-bundled
@@ -1086,6 +1086,32 @@ services:
         /arrow/ci/scripts/cpp_build.sh /arrow /build &&
         /arrow/ci/scripts/python_sdist_test.sh /arrow"
 
+  ############################ Python free-threading ##########################
+
+  ubuntu-python-313-freethreading:
+    # Usage:
+    #   docker-compose build ubuntu-cpp
+    #   docker-compose build ubuntu-python-313-freethreading
+    #   docker-compose run --rm ubuntu-python-313-freethreading
+    # Parameters:
+    #   ARCH: amd64, arm64v8, ...
+    #   UBUNTU: 20.04, 22.04, 24.04
+    image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-python-313-freethreading
+    build:
+      context: .
+      dockerfile: ci/docker/linux-apt-python-313-freethreading.dockerfile
+      cache_from:
+        - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-python-313-freethreading
+      args:
+        base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp
+    shm_size: *shm-size
+    environment:
+      <<: [*common, *ccache]
+      # Bundled build of OpenTelemetry needs a git client
+      ARROW_WITH_OPENTELEMETRY: "OFF"
+    volumes: *ubuntu-volumes
+    command: *python-command
+
   ############################ Python wheels ##################################
 
   # See available versions at:
@@ -1145,6 +1171,7 @@ services:
       <<: *common
       CHECK_IMPORTS: "ON"
       CHECK_UNITTESTS: "OFF"
+      CHECK_WHEEL_CONTENT: "ON"
     command: /arrow/ci/scripts/python_wheel_unix_test.sh /arrow
 
   python-wheel-manylinux-test-unittests:
@@ -1165,6 +1192,7 @@ services:
       <<: *common
       CHECK_IMPORTS: "OFF"
       CHECK_UNITTESTS: "ON"
+      CHECK_WHEEL_CONTENT: "OFF"
     command: /arrow/ci/scripts/python_wheel_unix_test.sh /arrow
 
   python-wheel-windows-vs2019:
@@ -1258,6 +1286,37 @@ services:
     volumes: *conda-volumes
     command: *python-conda-command
 
+  conda-python-no-numpy:
+    # Usage:
+    #   docker-compose build conda
+    #   docker-compose build conda-cpp
+    #   docker-compose build conda-python
+    #   docker-compose build conda-python-no-numpy
+    #   docker-compose run --rm conda-python-no-numpy
+    image: ${REPO}:${ARCH}-conda-python-${PYTHON}-no-numpy
+    build:
+      context: .
+      dockerfile: ci/docker/conda-python.dockerfile
+      cache_from:
+        - ${REPO}:${ARCH}-conda-python-${PYTHON}
+      args:
+        repo: ${REPO}
+        arch: ${ARCH}
+        python: ${PYTHON}
+    shm_size: *shm-size
+    environment:
+      <<: [*common, *ccache, *sccache]
+      PARQUET_REQUIRE_ENCRYPTION:  # inherit
+      HYPOTHESIS_PROFILE:  # inherit
+      PYARROW_TEST_HYPOTHESIS:  # inherit
+    volumes: *conda-volumes
+    command:
+      ["
+        /arrow/ci/scripts/cpp_build.sh /arrow /build &&
+        /arrow/ci/scripts/python_build.sh /arrow /build &&
+        mamba uninstall -y numpy &&
+        /arrow/ci/scripts/python_test.sh /arrow"]
+
   conda-python-docs:
     # Usage:
     #   archery docker run conda-python-docs
diff --git a/docs/source/developers/continuous_integration/docker.rst b/docs/source/developers/continuous_integration/docker.rst
index 68f3c7d709791..129b5d0bcf135 100644
--- a/docs/source/developers/continuous_integration/docker.rst
+++ b/docs/source/developers/continuous_integration/docker.rst
@@ -156,6 +156,18 @@ The following example starts an interactive ``bash`` session in the container
 
     archery docker run ubuntu-cpp bash
 
+**Build the image with increased debugging output:**
+
+To enable additional logging output for debugging, pass the ``--debug`` flag
+to ``archery``.
+
+.. code:: bash
+
+    archery --debug docker run ubuntu-cpp
+
+In addition to enabling ``DEBUG``-level logging, this also translates to
+passing ``--progress=plain`` to docker(-compose) build command.
+
 Docker Volume Caches
 ~~~~~~~~~~~~~~~~~~~~
 
diff --git a/docs/source/developers/cpp/building.rst b/docs/source/developers/cpp/building.rst
index b052b856c9bd5..60a9e0694158f 100644
--- a/docs/source/developers/cpp/building.rst
+++ b/docs/source/developers/cpp/building.rst
@@ -213,6 +213,8 @@ and then ask to compile the build targets:
 
    0 directories, 3 files
 
+   $ cmake --install .
+
 When creating a build, it is possible to pass custom options besides
 the preset-defined ones, for example:
 
@@ -293,6 +295,7 @@ Minimal release build (1GB of RAM for building or more recommended):
    $ cd build-release
    $ cmake ..
    $ make -j8       # if you have 8 CPU cores, otherwise adjust
+   $ make install
 
 Minimal debug build with unit tests (4GB of RAM for building or more recommended):
 
@@ -305,6 +308,7 @@ Minimal debug build with unit tests (4GB of RAM for building or more recommended
    $ cmake -DCMAKE_BUILD_TYPE=Debug -DARROW_BUILD_TESTS=ON ..
    $ make -j8       # if you have 8 CPU cores, otherwise adjust
    $ make unittest  # to run the tests
+   $ make install
 
 The unit tests are not built by default. After building, one can also invoke
 the unit tests using the ``ctest`` tool provided by CMake (note that ``test``
diff --git a/docs/source/developers/guide/resources.rst b/docs/source/developers/guide/resources.rst
index b5905af65499b..5b598ab1296ac 100644
--- a/docs/source/developers/guide/resources.rst
+++ b/docs/source/developers/guide/resources.rst
@@ -71,7 +71,6 @@ Contributing
 
 - :ref:`contributing`
 - `Arrow R Developer Guide <https://arrow.apache.org/docs/r/articles/developing.html>`_
-- `Writing Bindings article for R package <https://arrow.apache.org/docs/r/articles/developers/bindings.html>`_.
 
 Reproducible examples:
 
diff --git a/docs/source/developers/guide/step_by_step/arrow_codebase.rst b/docs/source/developers/guide/step_by_step/arrow_codebase.rst
index 0c194ab3a3f70..c4ea61d89ff80 100644
--- a/docs/source/developers/guide/step_by_step/arrow_codebase.rst
+++ b/docs/source/developers/guide/step_by_step/arrow_codebase.rst
@@ -150,6 +150,3 @@ C++ we must create the binding manually to use it in that implementation.
       When writing bindings between C++ compute functions and R functions,
       the aim is to expose the C++ functionality via the same interface as
       existing R functions.
-
-      To read the full content on the topic of R bindings read through the
-      `Writing Bindings article <https://arrow.apache.org/docs/r/articles/developers/bindings.html>`_.
diff --git a/docs/source/developers/guide/tutorials/r_tutorial.rst b/docs/source/developers/guide/tutorials/r_tutorial.rst
index 62d5cfcbc76c2..3fba873bff0a9 100644
--- a/docs/source/developers/guide/tutorials/r_tutorial.rst
+++ b/docs/source/developers/guide/tutorials/r_tutorial.rst
@@ -27,22 +27,6 @@ R tutorials
 ***********
 
 
-Writing Bindings Walkthrough
-============================
-
-The first R package tutorial to be included in the New Contributor's
-guide is a **Walkthrough** added in the **Writing Bindings**
-vignette. With time we will try to include additional tutorials
-directly into this guide.
-
-This tutorial will show how to do a binding of a C++ function
-`starts_with() <https://arrow.apache.org/docs/cpp/compute.html#containment-tests>`_
-to the (base) R function ``startsWith()``.
-
-To view the tutorial follow the
-`Walkthrough section of the Writing Bindings article <https://arrow.apache.org/docs/r/articles/developers/bindings.html#walkthrough>`_.
-
-
 R tutorial on adding a lubridate binding
 ========================================
 
@@ -56,11 +40,6 @@ The binding will be added to the ``expression.R`` file in the
 R package. But you can also follow these steps in case you are
 adding a binding that will live somewhere else.
 
-.. seealso::
-
-   To read more about the philosophy behind R bindings, refer to the
-   `Writing Bindings article <https://arrow.apache.org/docs/r/articles/developers/bindings.html>`_.
-
 This tutorial is different from the :ref:`step_by_step` as we
 will be working on a specific case. This tutorial is not meant
 as a step-by-step guide.
@@ -170,13 +149,6 @@ equivalent data types. lubridate's ``mday()`` function has no additional
 arguments and there are also no option classes associated with Arrow C++
 function ``day()``.
 
-.. note::
-
-   To see what to do if there is an option class associated with the
-   function you are binding, refer to
-   `Examining the C++ function <https://arrow.apache.org/docs/r/articles/developers/bindings.html#examining-the-c-function>`_ from the Writing Bindings
-   article.
-
 Looking at the code in ``expressions.R`` we can see the day function
 is already specified/mapped on the R package side:
 `<https://github.com/apache/arrow/blob/658bec37aa5cbdd53b5e4cdc81b8ba3962e67f11/r/R/expression.R#L63-L64>`_
diff --git a/docs/source/developers/overview.rst b/docs/source/developers/overview.rst
index 5a18b1e4eb8db..7e38dcb8ebc85 100644
--- a/docs/source/developers/overview.rst
+++ b/docs/source/developers/overview.rst
@@ -100,9 +100,6 @@ When contributing a patch, use this list as a checklist of Apache Arrow workflow
 * So that your pull request syncs with the GitHub issue, **prefix your pull request
   title with the GitHub issue id** (ex:
   `GH-14866: [C++] Remove internal GroupBy implementation <https://github.com/apache/arrow/pull/14867>`_).
-  Similarly **prefix your pull request name with the JIRA issue id** (ex:
-  `ARROW-767: [C++] Filesystem abstraction <https://github.com/apache/arrow/pull/4225>`_)
-  in case the issue is still located in Jira.
 * Give the pull request a **clear, brief description**: when the pull request is
   merged, this will be retained in the extended commit message.
 * Make sure that your code **passes the unit tests**. You can find instructions how
diff --git a/docs/source/developers/python.rst b/docs/source/developers/python.rst
index 6beea55e66b86..2ba4b534caeff 100644
--- a/docs/source/developers/python.rst
+++ b/docs/source/developers/python.rst
@@ -632,9 +632,6 @@ PyArrow are:
    * - ``PYARROW_BUNDLE_CYTHON_CPP``
      - Bundle the C++ files generated by Cython
      - ``0`` (``OFF``)
-   * - ``PYARROW_INSTALL_TESTS``
-     - Add the test to the python package
-     - ``1`` (``ON``)
    * - ``PYARROW_BUILD_VERBOSE``
      - Enable verbose output from Makefile builds
      - ``0`` (``OFF``)
diff --git a/docs/source/format/CanonicalExtensions.rst b/docs/source/format/CanonicalExtensions.rst
index 5658f949ceeaa..1106f8aaffdd3 100644
--- a/docs/source/format/CanonicalExtensions.rst
+++ b/docs/source/format/CanonicalExtensions.rst
@@ -272,6 +272,8 @@ JSON
   In the future, additional fields may be added, but they are not required
   to interpret the array.
 
+.. _uuid_extension:
+
 UUID
 ====
 
diff --git a/docs/source/format/Columnar.rst b/docs/source/format/Columnar.rst
index 4c758c5294325..697c39b0cb1d9 100644
--- a/docs/source/format/Columnar.rst
+++ b/docs/source/format/Columnar.rst
@@ -21,7 +21,7 @@
 Arrow Columnar Format
 *********************
 
-*Version: 1.4*
+*Version: 1.5*
 
 .. seealso:: :ref:`Additions to the Arrow columnar format since version 1.0.0
    <post-1-0-0-format-versions>`
diff --git a/docs/source/format/Versioning.rst b/docs/source/format/Versioning.rst
index 8fcf11b21f0cc..d46d07a90906c 100644
--- a/docs/source/format/Versioning.rst
+++ b/docs/source/format/Versioning.rst
@@ -105,3 +105,8 @@ Version 1.4
 * Added :ref:`listview-layout` and the associated ListView and LargeListView
   types.
 * Added :ref:`variadic-buffers`.
+
+Version 1.5
+-----------
+
+* Expanded Decimal type bit widths to allow 32-bit and 64-bit types.
diff --git a/docs/source/status.rst b/docs/source/status.rst
index 5e2c2cc19c890..98374164d7ae0 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -119,9 +119,9 @@ Data Types
 +-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Variable shape tensor |       |       |       |            |       |       |       |       |
 +-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| JSON                  |       |       | ✓     |            |       |       |       |       |
+| JSON                  | ✓     |       | ✓     |            |       |       |       |       |
 +-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| UUID                  |       |       | ✓     |            |       |       |       |       |
+| UUID                  | ✓     |       | ✓     |            |       |       |       |       |
 +-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | 8-bit Boolean         | ✓     |       | ✓     |            |       |       |       |       |
 +-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
diff --git a/format/Flight.proto b/format/Flight.proto
index 2187a51ed48f4..f2b0f889cf7d2 100644
--- a/format/Flight.proto
+++ b/format/Flight.proto
@@ -20,7 +20,7 @@ syntax = "proto3";
 import "google/protobuf/timestamp.proto";
 
 option java_package = "org.apache.arrow.flight.impl";
-option go_package = "github.com/apache/arrow/go/arrow/flight/gen/flight";
+option go_package = "github.com/apache/arrow-go/arrow/flight/gen/flight";
 option csharp_namespace = "Apache.Arrow.Flight.Protocol";
 
 package arrow.flight.protocol;
diff --git a/format/FlightSql.proto b/format/FlightSql.proto
index 6fca141d692a7..ef1ae7513d4d4 100644
--- a/format/FlightSql.proto
+++ b/format/FlightSql.proto
@@ -20,7 +20,7 @@ syntax = "proto3";
 import "google/protobuf/descriptor.proto";
 
 option java_package = "org.apache.arrow.flight.sql.impl";
-option go_package = "github.com/apache/arrow/go/arrow/flight/gen/flight";
+option go_package = "github.com/apache/arrow-go/arrow/flight/gen/flight";
 package arrow.flight.protocol.sql;
 
 /*
diff --git a/format/Schema.fbs b/format/Schema.fbs
index a03ca31ae97c4..e8e14b112a771 100644
--- a/format/Schema.fbs
+++ b/format/Schema.fbs
@@ -24,6 +24,7 @@
 /// Version 1.3 - Add Run-End Encoded.
 /// Version 1.4 - Add BinaryView, Utf8View, variadicBufferCounts, ListView, and
 /// LargeListView.
+/// Version 1.5 - Add 32-bit and 64-bit as allowed bit widths for Decimal
 
 namespace org.apache.arrow.flatbuf;
 
@@ -222,9 +223,9 @@ table RunEndEncoded {
 }
 
 /// Exact decimal value represented as an integer value in two's
-/// complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers
-/// are used. The representation uses the endianness indicated
-/// in the Schema.
+/// complement. Currently 32-bit (4-byte), 64-bit (8-byte), 
+/// 128-bit (16-byte) and 256-bit (32-byte) integers are used.
+/// The representation uses the endianness indicated in the Schema.
 table Decimal {
   /// Total number of decimal digits
   precision: int;
@@ -232,7 +233,7 @@ table Decimal {
   /// Number of digits after the decimal point "."
   scale: int;
 
-  /// Number of bits per value. The only accepted widths are 128 and 256.
+  /// Number of bits per value. The accepted widths are 32, 64, 128 and 256.
   /// We use bitWidth for consistency with Int::bitWidth.
   bitWidth: int = 128;
 }
diff --git a/go/arrow/compute/cast_test.go b/go/arrow/compute/cast_test.go
index fa08467dd3946..db6098225dda8 100644
--- a/go/arrow/compute/cast_test.go
+++ b/go/arrow/compute/cast_test.go
@@ -129,7 +129,7 @@ func checkScalarWithScalars(t *testing.T, funcName string, inputs []scalar.Scala
 			fmt.Fprintf(&b, " (types differed: %s vs %s)",
 				out.(*compute.ScalarDatum).Type(), expected.DataType())
 		}
-		t.Fatalf(b.String())
+		t.Fatal(b.String())
 	}
 }
 
diff --git a/go/arrow/ipc/file_test.go b/go/arrow/ipc/file_test.go
index dea63579cfea6..b9a4547a5126a 100644
--- a/go/arrow/ipc/file_test.go
+++ b/go/arrow/ipc/file_test.go
@@ -17,13 +17,17 @@
 package ipc_test
 
 import (
+	"bytes"
 	"fmt"
 	"os"
 	"testing"
 
+	"github.com/apache/arrow/go/v18/arrow/array"
 	"github.com/apache/arrow/go/v18/arrow/internal/arrdata"
 	"github.com/apache/arrow/go/v18/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
 	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/stretchr/testify/require"
 )
 
 func TestFile(t *testing.T) {
@@ -75,3 +79,39 @@ func TestFileCompressed(t *testing.T) {
 		}
 	}
 }
+
+func TestFileEmbedsStream(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	recs := arrdata.Records["primitives"]
+	schema := recs[0].Schema()
+
+	var buf bytes.Buffer
+	w, err := ipc.NewFileWriter(&buf, ipc.WithSchema(schema), ipc.WithAllocator(mem))
+	require.NoError(t, err)
+	defer w.Close()
+
+	for _, rec := range recs {
+		require.NoError(t, w.Write(rec))
+	}
+
+	require.NoError(t, w.Close())
+
+	// we should be able to read a valid ipc stream within the ipc file
+
+	// create an ipc stream reader, skipping the file magic+padding bytes
+	rdr, err := ipc.NewReader(bytes.NewReader(buf.Bytes()[8:]), ipc.WithSchema(schema), ipc.WithAllocator(mem))
+	require.NoError(t, err)
+	defer rdr.Release()
+
+	// the stream reader should know to stop before the footer if the EOS indicator is properly written
+	var i int
+	for rdr.Next() {
+		rec := rdr.Record()
+		require.Truef(t, array.RecordEqual(rec, recs[i]), "records[%d] differ", i)
+		i++
+	}
+
+	require.NoError(t, rdr.Err())
+}
diff --git a/go/arrow/ipc/file_writer.go b/go/arrow/ipc/file_writer.go
index 8582c81baf2fe..9a3d7d3dbeb02 100644
--- a/go/arrow/ipc/file_writer.go
+++ b/go/arrow/ipc/file_writer.go
@@ -37,23 +37,17 @@ type PayloadWriter interface {
 	Close() error
 }
 
-type pwriter struct {
-	w   io.WriteSeeker
-	pos int64
+type fileWriter struct {
+	streamWriter
 
 	schema *arrow.Schema
 	dicts  []fileBlock
 	recs   []fileBlock
 }
 
-func (w *pwriter) Start() error {
+func (w *fileWriter) Start() error {
 	var err error
 
-	err = w.updatePos()
-	if err != nil {
-		return fmt.Errorf("arrow/ipc: could not update position while in start: %w", err)
-	}
-
 	// only necessary to align to 8-byte boundary at the start of the file
 	_, err = w.Write(Magic)
 	if err != nil {
@@ -65,10 +59,10 @@ func (w *pwriter) Start() error {
 		return fmt.Errorf("arrow/ipc: could not align start block: %w", err)
 	}
 
-	return err
+	return w.streamWriter.Start()
 }
 
-func (w *pwriter) WritePayload(p Payload) error {
+func (w *fileWriter) WritePayload(p Payload) error {
 	blk := fileBlock{Offset: w.pos, Meta: 0, Body: p.size}
 	n, err := writeIPCPayload(w, p)
 	if err != nil {
@@ -77,11 +71,6 @@ func (w *pwriter) WritePayload(p Payload) error {
 
 	blk.Meta = int32(n)
 
-	err = w.updatePos()
-	if err != nil {
-		return fmt.Errorf("arrow/ipc: could not update position while in write-payload: %w", err)
-	}
-
 	switch flatbuf.MessageHeader(p.msg) {
 	case flatbuf.MessageHeaderDictionaryBatch:
 		w.dicts = append(w.dicts, blk)
@@ -92,27 +81,18 @@ func (w *pwriter) WritePayload(p Payload) error {
 	return nil
 }
 
-func (w *pwriter) Close() error {
+func (w *fileWriter) Close() error {
 	var err error
 
-	// write file footer
-	err = w.updatePos()
-	if err != nil {
-		return fmt.Errorf("arrow/ipc: could not update position while in close: %w", err)
+	if err = w.streamWriter.Close(); err != nil {
+		return err
 	}
 
 	pos := w.pos
-	err = writeFileFooter(w.schema, w.dicts, w.recs, w)
-	if err != nil {
+	if err = writeFileFooter(w.schema, w.dicts, w.recs, w); err != nil {
 		return fmt.Errorf("arrow/ipc: could not write file footer: %w", err)
 	}
 
-	// write file footer length
-	err = w.updatePos() // not strictly needed as we passed w to writeFileFooter...
-	if err != nil {
-		return fmt.Errorf("arrow/ipc: could not compute file footer length: %w", err)
-	}
-
 	size := w.pos - pos
 	if size <= 0 {
 		return fmt.Errorf("arrow/ipc: invalid file footer size (size=%d)", size)
@@ -133,13 +113,7 @@ func (w *pwriter) Close() error {
 	return nil
 }
 
-func (w *pwriter) updatePos() error {
-	var err error
-	w.pos, err = w.w.Seek(0, io.SeekCurrent)
-	return err
-}
-
-func (w *pwriter) align(align int32) error {
+func (w *fileWriter) align(align int32) error {
 	remainder := paddedLength(w.pos, align) - w.pos
 	if remainder == 0 {
 		return nil
@@ -149,12 +123,6 @@ func (w *pwriter) align(align int32) error {
 	return err
 }
 
-func (w *pwriter) Write(p []byte) (int, error) {
-	n, err := w.w.Write(p)
-	w.pos += int64(n)
-	return n, err
-}
-
 func writeIPCPayload(w io.Writer, p Payload) (int, error) {
 	n, err := writeMessage(p.meta, kArrowIPCAlignment, w)
 	if err != nil {
@@ -259,18 +227,12 @@ func (ps payloads) Release() {
 
 // FileWriter is an Arrow file writer.
 type FileWriter struct {
-	w io.WriteSeeker
+	w io.Writer
 
 	mem memory.Allocator
 
-	header struct {
-		started bool
-		offset  int64
-	}
-
-	footer struct {
-		written bool
-	}
+	headerStarted bool
+	footerWritten bool
 
 	pw PayloadWriter
 
@@ -289,7 +251,7 @@ type FileWriter struct {
 }
 
 // NewFileWriter opens an Arrow file using the provided writer w.
-func NewFileWriter(w io.WriteSeeker, opts ...Option) (*FileWriter, error) {
+func NewFileWriter(w io.Writer, opts ...Option) (*FileWriter, error) {
 	var (
 		cfg = newConfig(opts...)
 		err error
@@ -297,7 +259,7 @@ func NewFileWriter(w io.WriteSeeker, opts ...Option) (*FileWriter, error) {
 
 	f := FileWriter{
 		w:               w,
-		pw:              &pwriter{w: w, schema: cfg.schema, pos: -1},
+		pw:              &fileWriter{streamWriter: streamWriter{w: w}, schema: cfg.schema},
 		mem:             cfg.alloc,
 		schema:          cfg.schema,
 		codec:           cfg.codec,
@@ -306,12 +268,6 @@ func NewFileWriter(w io.WriteSeeker, opts ...Option) (*FileWriter, error) {
 		compressors:     make([]compressor, cfg.compressNP),
 	}
 
-	pos, err := f.w.Seek(0, io.SeekCurrent)
-	if err != nil {
-		return nil, fmt.Errorf("arrow/ipc: could not seek current position: %w", err)
-	}
-	f.header.offset = pos
-
 	return &f, err
 }
 
@@ -321,7 +277,7 @@ func (f *FileWriter) Close() error {
 		return fmt.Errorf("arrow/ipc: could not write empty file: %w", err)
 	}
 
-	if f.footer.written {
+	if f.footerWritten {
 		return nil
 	}
 
@@ -329,7 +285,7 @@ func (f *FileWriter) Close() error {
 	if err != nil {
 		return fmt.Errorf("arrow/ipc: could not close payload writer: %w", err)
 	}
-	f.footer.written = true
+	f.footerWritten = true
 
 	return nil
 }
@@ -367,14 +323,14 @@ func (f *FileWriter) Write(rec arrow.Record) error {
 }
 
 func (f *FileWriter) checkStarted() error {
-	if !f.header.started {
+	if !f.headerStarted {
 		return f.start()
 	}
 	return nil
 }
 
 func (f *FileWriter) start() error {
-	f.header.started = true
+	f.headerStarted = true
 	err := f.pw.Start()
 	if err != nil {
 		return err
diff --git a/go/arrow/ipc/writer.go b/go/arrow/ipc/writer.go
index 02c67635bb2fd..5a280fbf84a1f 100644
--- a/go/arrow/ipc/writer.go
+++ b/go/arrow/ipc/writer.go
@@ -37,18 +37,18 @@ import (
 	"github.com/apache/arrow/go/v18/internal/utils"
 )
 
-type swriter struct {
+type streamWriter struct {
 	w   io.Writer
 	pos int64
 }
 
-func (w *swriter) Start() error { return nil }
-func (w *swriter) Close() error {
+func (w *streamWriter) Start() error { return nil }
+func (w *streamWriter) Close() error {
 	_, err := w.Write(kEOS[:])
 	return err
 }
 
-func (w *swriter) WritePayload(p Payload) error {
+func (w *streamWriter) WritePayload(p Payload) error {
 	_, err := writeIPCPayload(w, p)
 	if err != nil {
 		return err
@@ -56,7 +56,7 @@ func (w *swriter) WritePayload(p Payload) error {
 	return nil
 }
 
-func (w *swriter) Write(p []byte) (int, error) {
+func (w *streamWriter) Write(p []byte) (int, error) {
 	n, err := w.w.Write(p)
 	w.pos += int64(n)
 	return n, err
@@ -118,7 +118,7 @@ func NewWriter(w io.Writer, opts ...Option) *Writer {
 	return &Writer{
 		w:              w,
 		mem:            cfg.alloc,
-		pw:             &swriter{w: w},
+		pw:             &streamWriter{w: w},
 		schema:         cfg.schema,
 		codec:          cfg.codec,
 		emitDictDeltas: cfg.emitDictDeltas,
diff --git a/go/arrow/scalar/parse.go b/go/arrow/scalar/parse.go
index 866e627113d88..27db42afa69b1 100644
--- a/go/arrow/scalar/parse.go
+++ b/go/arrow/scalar/parse.go
@@ -329,7 +329,7 @@ func fromListScalar(s ListScalar, v reflect.Value) error {
 		}
 	case *array.Map:
 		// only implementing slice of metadata for now
-		if v.Type().Elem() != reflect.PtrTo(reflect.TypeOf(arrow.Metadata{})) {
+		if v.Type().Elem() != reflect.PointerTo(reflect.TypeOf(arrow.Metadata{})) {
 			return fmt.Errorf("unimplemented fromListScalar type %s to %s", arr.DataType(), v.Type().String())
 		}
 
diff --git a/go/go.mod b/go/go.mod
index 9f4222a541bb6..77f98cefb0f0e 100644
--- a/go/go.mod
+++ b/go/go.mod
@@ -16,7 +16,7 @@
 
 module github.com/apache/arrow/go/v18
 
-go 1.21
+go 1.22
 
 require (
 	github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c
@@ -47,9 +47,9 @@ require (
 
 require (
 	github.com/google/uuid v1.6.0
-	github.com/hamba/avro/v2 v2.24.1
+	github.com/hamba/avro/v2 v2.25.0
 	github.com/huandu/xstrings v1.4.0
-	github.com/substrait-io/substrait-go v0.6.0
+	github.com/substrait-io/substrait-go v0.7.0
 	github.com/tidwall/sjson v1.2.5
 )
 
diff --git a/go/go.sum b/go/go.sum
index c7eb3a66deeec..6f22e11aef03a 100644
--- a/go/go.sum
+++ b/go/go.sum
@@ -43,8 +43,8 @@ github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbu
 github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26/go.mod h1:dDKJzRmX4S37WGHujM7tX//fmj1uioxKzKxz3lo4HJo=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/hamba/avro/v2 v2.24.1 h1:Xi+7AnhaAc41aA/jmmYpxMsdEDOf1rdup6NJ85P7q2I=
-github.com/hamba/avro/v2 v2.24.1/go.mod h1:7vDfy/2+kYCE8WUHoj2et59GTv0ap7ptktMXu0QHePI=
+github.com/hamba/avro/v2 v2.25.0 h1:9qig/K4VP5tMq6DuKGfI6YdXncTkPJT1IJDMSv82EeI=
+github.com/hamba/avro/v2 v2.25.0/go.mod h1:I8glyswHnpED3Nlx2ZdUe+4LJnCOOyiCzLMno9i/Uu0=
 github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
 github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
 github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
@@ -99,8 +99,8 @@ github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
 github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
-github.com/substrait-io/substrait-go v0.6.0 h1:n2G/SGmrn7U5Q39VA8WeM2UfVL5Y/6HX8WAP9uJLNk4=
-github.com/substrait-io/substrait-go v0.6.0/go.mod h1:cl8Wsc7aBPDfcHp9+OrUqGpjkgrYlhcDsH/lMP6KUZA=
+github.com/substrait-io/substrait-go v0.7.0 h1:53yi73t4wW383+RD1YuhXhbjhP1KzF9GCxPC7SsRlqc=
+github.com/substrait-io/substrait-go v0.7.0/go.mod h1:7mjSvIaxk94bOF+YZn/vBOpHK4DWTpBv7nC/btjXCmc=
 github.com/tidwall/gjson v1.14.2 h1:6BBkirS0rAHjumnjHF6qgy5d2YAJ1TLIaFE2lzfOLqo=
 github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
 github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
diff --git a/go/parquet/compress/compress.go b/go/parquet/compress/compress.go
index b6a1349133e84..92f2ae99bb13f 100644
--- a/go/parquet/compress/compress.go
+++ b/go/parquet/compress/compress.go
@@ -49,8 +49,9 @@ var Codecs = struct {
 	Brotli Compression
 	// LZ4 unsupported in this library due to problematic issues between the Hadoop LZ4 spec vs regular lz4
 	// see: http://mail-archives.apache.org/mod_mbox/arrow-dev/202007.mbox/%3CCAAri41v24xuA8MGHLDvgSnE+7AAgOhiEukemW_oPNHMvfMmrWw@mail.gmail.com%3E
-	Lz4  Compression
-	Zstd Compression
+	Lz4    Compression
+	Zstd   Compression
+	Lz4Raw Compression
 }{
 	Uncompressed: Compression(parquet.CompressionCodec_UNCOMPRESSED),
 	Snappy:       Compression(parquet.CompressionCodec_SNAPPY),
@@ -59,17 +60,12 @@ var Codecs = struct {
 	Brotli:       Compression(parquet.CompressionCodec_BROTLI),
 	Lz4:          Compression(parquet.CompressionCodec_LZ4),
 	Zstd:         Compression(parquet.CompressionCodec_ZSTD),
+	Lz4Raw:       Compression(parquet.CompressionCodec_LZ4_RAW),
 }
 
 // Codec is an interface which is implemented for each compression type in order to make the interactions easy to
 // implement. Most consumers won't be calling GetCodec directly.
 type Codec interface {
-	// NewReader provides a reader that wraps a stream with compressed data to stream the uncompressed data
-	NewReader(io.Reader) io.ReadCloser
-	// NewWriter provides a wrapper around a write stream to compress data before writing it.
-	NewWriter(io.Writer) io.WriteCloser
-	// NewWriterLevel is like NewWriter but allows specifying the compression level
-	NewWriterLevel(io.Writer, int) (io.WriteCloser, error)
 	// Encode encodes a block of data given by src and returns the compressed block. dst should be either nil
 	// or sized large enough to fit the compressed block (use CompressBound to allocate). dst and src should not
 	// overlap since some of the compression types don't allow it.
@@ -90,6 +86,16 @@ type Codec interface {
 	Decode(dst, src []byte) []byte
 }
 
+// StreamingCodec is an interface that may be implemented for compression codecs that expose a streaming API.
+type StreamingCodec interface {
+	// NewReader provides a reader that wraps a stream with compressed data to stream the uncompressed data
+	NewReader(io.Reader) io.ReadCloser
+	// NewWriter provides a wrapper around a write stream to compress data before writing it.
+	NewWriter(io.Writer) io.WriteCloser
+	// NewWriterLevel is like NewWriter but allows specifying the compression level
+	NewWriterLevel(io.Writer, int) (io.WriteCloser, error)
+}
+
 var codecs = map[Compression]Codec{}
 
 // RegisterCodec adds or overrides a codec implementation for a given compression algorithm.
diff --git a/go/parquet/compress/compress_test.go b/go/parquet/compress/compress_test.go
index 843062c0d024a..5aac74759e1f9 100644
--- a/go/parquet/compress/compress_test.go
+++ b/go/parquet/compress/compress_test.go
@@ -66,8 +66,8 @@ func TestCompressDataOneShot(t *testing.T) {
 		{compress.Codecs.Gzip},
 		{compress.Codecs.Brotli},
 		{compress.Codecs.Zstd},
+		{compress.Codecs.Lz4Raw},
 		// {compress.Codecs.Lzo},
-		// {compress.Codecs.Lz4},
 	}
 
 	for _, tt := range tests {
@@ -107,9 +107,11 @@ func TestCompressReaderWriter(t *testing.T) {
 			var buf bytes.Buffer
 			codec, err := compress.GetCodec(tt.c)
 			assert.NoError(t, err)
+			streamingCodec, ok := codec.(compress.StreamingCodec)
+			assert.True(t, ok)
 			data := makeRandomData(RandomDataSize)
 
-			wr := codec.NewWriter(&buf)
+			wr := streamingCodec.NewWriter(&buf)
 
 			const chunkSize = 1111
 			input := data
@@ -129,7 +131,7 @@ func TestCompressReaderWriter(t *testing.T) {
 			}
 			wr.Close()
 
-			rdr := codec.NewReader(&buf)
+			rdr := streamingCodec.NewReader(&buf)
 			out, err := io.ReadAll(rdr)
 			assert.NoError(t, err)
 			assert.Exactly(t, data, out)
diff --git a/go/parquet/compress/lz4_raw.go b/go/parquet/compress/lz4_raw.go
new file mode 100644
index 0000000000000..788d9520a668b
--- /dev/null
+++ b/go/parquet/compress/lz4_raw.go
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package compress
+
+import (
+	"sync"
+
+	"github.com/pierrec/lz4/v4"
+)
+
+// lz4.Compressor is not goroutine-safe, so we use a pool to amortize the cost
+// of allocating a new one for each call to Encode().
+var compressorPool = sync.Pool{New: func() interface{} { return new(lz4.Compressor) }}
+
+func compressBlock(src, dst []byte) (int, error) {
+	c := compressorPool.Get().(*lz4.Compressor)
+	defer compressorPool.Put(c)
+	return c.CompressBlock(src, dst)
+}
+
+type lz4RawCodec struct{}
+
+func (c lz4RawCodec) Encode(dst, src []byte) []byte {
+	n, err := compressBlock(src, dst[:cap(dst)])
+	if err != nil {
+		panic(err)
+	}
+
+	return dst[:n]
+}
+
+func (c lz4RawCodec) EncodeLevel(dst, src []byte, _ int) []byte {
+	// the lz4 block implementation does not allow level to be set
+	return c.Encode(dst, src)
+}
+
+func (lz4RawCodec) Decode(dst, src []byte) []byte {
+	n, err := lz4.UncompressBlock(src, dst)
+	if err != nil {
+		panic(err)
+	}
+
+	return dst[:n]
+}
+
+func (c lz4RawCodec) CompressBound(len int64) int64 {
+	return int64(lz4.CompressBlockBound(int(len)))
+}
+
+func init() {
+	RegisterCodec(Codecs.Lz4Raw, lz4RawCodec{})
+}
diff --git a/go/parquet/file/file_reader.go b/go/parquet/file/file_reader.go
index f838482fbb0e9..f25b882e00647 100644
--- a/go/parquet/file/file_reader.go
+++ b/go/parquet/file/file_reader.go
@@ -233,7 +233,7 @@ func (f *Reader) parseMetaData() error {
 func (f *Reader) handleAadPrefix(fileDecrypt *parquet.FileDecryptionProperties, algo *parquet.Algorithm) (string, error) {
 	aadPrefixInProps := fileDecrypt.AadPrefix()
 	aadPrefix := []byte(aadPrefixInProps)
-	fileHasAadPrefix := algo.Aad.AadPrefix != nil && len(algo.Aad.AadPrefix) > 0
+	fileHasAadPrefix := len(algo.Aad.AadPrefix) > 0
 	aadPrefixInFile := algo.Aad.AadPrefix
 
 	if algo.Aad.SupplyAadPrefix && aadPrefixInProps == "" {
diff --git a/go/parquet/file/file_reader_test.go b/go/parquet/file/file_reader_test.go
index 547ec475c2720..74926c958e2f7 100644
--- a/go/parquet/file/file_reader_test.go
+++ b/go/parquet/file/file_reader_test.go
@@ -452,6 +452,55 @@ func TestRleBooleanEncodingFileRead(t *testing.T) {
 	assert.Equal(t, expected, values[:len(expected)])
 }
 
+type mockBadReader struct {
+	cnt    int
+	reader *os.File
+}
+
+func (m *mockBadReader) Seek(offset int64, whence int) (int64, error) {
+	return m.reader.Seek(offset, whence)
+}
+
+func (m *mockBadReader) ReadAt(p []byte, off int64) (n int, err error) {
+	if m.cnt == 0 {
+		return 0, fmt.Errorf("mock error")
+	}
+	m.cnt--
+	return m.reader.ReadAt(p, off)
+}
+
+func TestBadReader(t *testing.T) {
+	dir := os.Getenv("PARQUET_TEST_DATA")
+	if dir == "" {
+		t.Skip("no path supplied with PARQUET_TEST_DATA")
+	}
+	require.DirExists(t, dir)
+
+	filePath := path.Join(dir, "byte_stream_split_extended.gzip.parquet")
+	f, err := os.Open(filePath)
+	assert.NoError(t, err)
+	defer f.Close()
+
+	reader := &mockBadReader{
+		cnt:    2,
+		reader: f,
+	}
+	r, err := file.NewParquetReader(reader, file.WithReadProps(&parquet.ReaderProperties{
+		BufferSize:            int64(1024),
+		BufferedStreamEnabled: true,
+	}))
+	assert.NoError(t, err)
+
+	fileReader, err := pqarrow.NewFileReader(r, pqarrow.ArrowReadProperties{}, memory.DefaultAllocator)
+	assert.NoError(t, err)
+
+	columnReader, err := fileReader.GetColumn(context.Background(), 0)
+	assert.NoError(t, err)
+
+	_, err = columnReader.NextBatch(1)
+	assert.ErrorContains(t, err, "mock error") // Expect an error to occur.
+}
+
 func TestByteStreamSplitEncodingFileRead(t *testing.T) {
 	dir := os.Getenv("PARQUET_TEST_DATA")
 	if dir == "" {
@@ -644,3 +693,130 @@ func TestDeltaBinaryPackedMultipleBatches(t *testing.T) {
 
 	require.Equalf(t, size, totalRows, "Expected %d rows, but got %d rows", size, totalRows)
 }
+
+// Test read file lz4_raw_compressed.parquet
+// Contents documented at https://github.com/apache/parquet-testing/commit/ddd898958803cb89b7156c6350584d1cda0fe8de
+func TestLZ4RawFileRead(t *testing.T) {
+	dir := os.Getenv("PARQUET_TEST_DATA")
+	if dir == "" {
+		t.Skip("no path supplied with PARQUET_TEST_DATA")
+	}
+	require.DirExists(t, dir)
+
+	props := parquet.NewReaderProperties(memory.DefaultAllocator)
+	fileReader, err := file.OpenParquetFile(path.Join(dir, "lz4_raw_compressed.parquet"),
+		false, file.WithReadProps(props))
+	require.NoError(t, err)
+	defer fileReader.Close()
+
+	nRows := 4
+	nCols := 3
+	require.Equal(t, 1, fileReader.NumRowGroups())
+	rgr := fileReader.RowGroup(0)
+	require.EqualValues(t, nRows, rgr.NumRows())
+	require.EqualValues(t, nCols, rgr.NumColumns())
+
+	rdr, err := rgr.Column(0)
+	require.NoError(t, err)
+
+	rowsInt64, ok := rdr.(*file.Int64ColumnChunkReader)
+	require.True(t, ok)
+
+	valsInt64 := make([]int64, nRows)
+	total, read, err := rowsInt64.ReadBatch(int64(nRows), valsInt64, nil, nil)
+	require.NoError(t, err)
+	require.Equal(t, int64(nRows), total)
+	require.Equal(t, nRows, read)
+
+	expectedValsInt64 := []int64{
+		1593604800,
+		1593604800,
+		1593604801,
+		1593604801,
+	}
+	require.Equal(t, expectedValsInt64, valsInt64)
+
+	rdr, err = rgr.Column(1)
+	require.NoError(t, err)
+
+	rowsByteArray, ok := rdr.(*file.ByteArrayColumnChunkReader)
+	require.True(t, ok)
+
+	valsByteArray := make([]parquet.ByteArray, nRows)
+	total, read, err = rowsByteArray.ReadBatch(int64(nRows), valsByteArray, nil, nil)
+	require.NoError(t, err)
+	require.Equal(t, int64(nRows), total)
+	require.Equal(t, nRows, read)
+
+	expectedValsByteArray := []parquet.ByteArray{
+		[]byte("abc"),
+		[]byte("def"),
+		[]byte("abc"),
+		[]byte("def"),
+	}
+	require.Equal(t, expectedValsByteArray, valsByteArray)
+
+	rdr, err = rgr.Column(2)
+	require.NoError(t, err)
+
+	rowsFloat64, ok := rdr.(*file.Float64ColumnChunkReader)
+	require.True(t, ok)
+
+	valsFloat64 := make([]float64, nRows)
+	total, read, err = rowsFloat64.ReadBatch(int64(nRows), valsFloat64, nil, nil)
+	require.NoError(t, err)
+	require.Equal(t, int64(nRows), total)
+	require.Equal(t, nRows, read)
+
+	expectedValsFloat64 := []float64{
+		42.0,
+		7.7,
+		42.125,
+		7.7,
+	}
+	require.Equal(t, expectedValsFloat64, valsFloat64)
+}
+
+// Test read file lz4_raw_compressed_larger.parquet
+// Contents documented at https://github.com/apache/parquet-testing/commit/ddd898958803cb89b7156c6350584d1cda0fe8de
+func TestLZ4RawLargerFileRead(t *testing.T) {
+	dir := os.Getenv("PARQUET_TEST_DATA")
+	if dir == "" {
+		t.Skip("no path supplied with PARQUET_TEST_DATA")
+	}
+	require.DirExists(t, dir)
+
+	props := parquet.NewReaderProperties(memory.DefaultAllocator)
+	fileReader, err := file.OpenParquetFile(path.Join(dir, "lz4_raw_compressed_larger.parquet"),
+		false, file.WithReadProps(props))
+	require.NoError(t, err)
+	defer fileReader.Close()
+
+	nRows := 10000
+	nCols := 1
+	require.Equal(t, 1, fileReader.NumRowGroups())
+	rgr := fileReader.RowGroup(0)
+	require.EqualValues(t, nRows, rgr.NumRows())
+	require.EqualValues(t, nCols, rgr.NumColumns())
+
+	rdr, err := rgr.Column(0)
+	require.NoError(t, err)
+
+	rows, ok := rdr.(*file.ByteArrayColumnChunkReader)
+	require.True(t, ok)
+
+	vals := make([]parquet.ByteArray, nRows)
+	total, read, err := rows.ReadBatch(int64(nRows), vals, nil, nil)
+	require.NoError(t, err)
+	require.Equal(t, int64(nRows), total)
+	require.Equal(t, nRows, read)
+
+	expectedValsHead := []parquet.ByteArray{
+		[]byte("c7ce6bef-d5b0-4863-b199-8ea8c7fb117b"),
+		[]byte("e8fb9197-cb9f-4118-b67f-fbfa65f61843"),
+		[]byte("885136e1-0aa1-4fdb-8847-63d87b07c205"),
+		[]byte("ce7b2019-8ebe-4906-a74d-0afa2409e5df"),
+		[]byte("a9ee2527-821b-4b71-a926-03f73c3fc8b7"),
+	}
+	require.Equal(t, expectedValsHead, vals[:len(expectedValsHead)])
+}
diff --git a/go/parquet/file/file_writer_test.go b/go/parquet/file/file_writer_test.go
index 0faf3f7233bd3..12ac93d1ef4b2 100644
--- a/go/parquet/file/file_writer_test.go
+++ b/go/parquet/file/file_writer_test.go
@@ -260,7 +260,7 @@ func (t *SerializeTestSuite) TestSmallFile() {
 		compress.Codecs.Brotli,
 		compress.Codecs.Gzip,
 		compress.Codecs.Zstd,
-		// compress.Codecs.Lz4,
+		compress.Codecs.Lz4Raw,
 		// compress.Codecs.Lzo,
 	}
 	for _, c := range codecs {
@@ -540,3 +540,59 @@ func TestBatchedByteStreamSplitFileRoundtrip(t *testing.T) {
 
 	require.NoError(t, rdr.Close())
 }
+
+func TestLZ4RawFileRoundtrip(t *testing.T) {
+	input := []int64{
+		-1, 0, 1, 2, 3, 4, 5, 123456789, -123456789,
+	}
+
+	size := len(input)
+
+	field, err := schema.NewPrimitiveNodeLogical("int64", parquet.Repetitions.Required, nil, parquet.Types.Int64, 0, 1)
+	require.NoError(t, err)
+
+	schema, err := schema.NewGroupNode("test", parquet.Repetitions.Required, schema.FieldList{field}, 0)
+	require.NoError(t, err)
+
+	sink := encoding.NewBufferWriter(0, memory.DefaultAllocator)
+	writer := file.NewParquetWriter(sink, schema, file.WithWriterProps(parquet.NewWriterProperties(parquet.WithCompression(compress.Codecs.Lz4Raw))))
+
+	rgw := writer.AppendRowGroup()
+	cw, err := rgw.NextColumn()
+	require.NoError(t, err)
+
+	i64ColumnWriter, ok := cw.(*file.Int64ColumnChunkWriter)
+	require.True(t, ok)
+
+	nVals, err := i64ColumnWriter.WriteBatch(input, nil, nil)
+	require.NoError(t, err)
+	require.EqualValues(t, size, nVals)
+
+	require.NoError(t, cw.Close())
+	require.NoError(t, rgw.Close())
+	require.NoError(t, writer.Close())
+
+	rdr, err := file.NewParquetReader(bytes.NewReader(sink.Bytes()))
+	require.NoError(t, err)
+
+	require.Equal(t, 1, rdr.NumRowGroups())
+	require.EqualValues(t, size, rdr.NumRows())
+
+	rgr := rdr.RowGroup(0)
+	cr, err := rgr.Column(0)
+	require.NoError(t, err)
+
+	i64ColumnReader, ok := cr.(*file.Int64ColumnChunkReader)
+	require.True(t, ok)
+
+	output := make([]int64, size)
+
+	total, valuesRead, err := i64ColumnReader.ReadBatch(int64(size), output, nil, nil)
+	require.NoError(t, err)
+	require.EqualValues(t, size, total)
+	require.EqualValues(t, size, valuesRead)
+
+	require.Equal(t, input, output)
+
+	require.NoError(t, rdr.Close())
+}
diff --git a/go/parquet/file/record_reader.go b/go/parquet/file/record_reader.go
index 667ffca77a8d1..765f4a9d34b33 100755
--- a/go/parquet/file/record_reader.go
+++ b/go/parquet/file/record_reader.go
@@ -645,7 +645,7 @@ func (rr *recordReader) ReadRecords(numRecords int64) (int64, error) {
 		}
 	}
 
-	return recordsRead, nil
+	return recordsRead, rr.Err()
 }
 
 func (rr *recordReader) ReleaseValidBits() *memory.Buffer {
diff --git a/go/parquet/pqarrow/reader_writer_test.go b/go/parquet/pqarrow/reader_writer_test.go
index 31bd0eba84388..e020c7d9457a9 100644
--- a/go/parquet/pqarrow/reader_writer_test.go
+++ b/go/parquet/pqarrow/reader_writer_test.go
@@ -19,6 +19,8 @@ package pqarrow_test
 import (
 	"bytes"
 	"context"
+	"fmt"
+	"math"
 	"testing"
 	"unsafe"
 
@@ -26,8 +28,10 @@ import (
 	"github.com/apache/arrow/go/v18/arrow/array"
 	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/compress"
 	"github.com/apache/arrow/go/v18/parquet/file"
 	"github.com/apache/arrow/go/v18/parquet/pqarrow"
+	"github.com/stretchr/testify/require"
 	"golang.org/x/exp/rand"
 	"gonum.org/v1/gonum/stat/distuv"
 )
@@ -275,3 +279,110 @@ func BenchmarkReadColumnFloat64(b *testing.B) {
 		benchReadTable(b, tt.name, tbl, int64(arrow.Int32Traits.BytesRequired(SIZELEN)))
 	}
 }
+
+var compressTestCases = []struct {
+	c compress.Compression
+}{
+	{compress.Codecs.Uncompressed},
+	{compress.Codecs.Snappy},
+	{compress.Codecs.Gzip},
+	{compress.Codecs.Brotli},
+	{compress.Codecs.Zstd},
+	{compress.Codecs.Lz4Raw},
+	// {compress.Codecs.Lzo},
+}
+
+func buildTableForTest(mem memory.Allocator) arrow.Table {
+	schema := arrow.NewSchema(
+		[]arrow.Field{
+			{Name: "int64s", Type: arrow.PrimitiveTypes.Int64},
+			{Name: "strings", Type: arrow.BinaryTypes.String},
+			{Name: "bools", Type: arrow.FixedWidthTypes.Boolean},
+			{Name: "repeated_int64s", Type: arrow.PrimitiveTypes.Int64},
+			{Name: "repeated_strings", Type: arrow.BinaryTypes.String},
+			{Name: "repeated_bools", Type: arrow.FixedWidthTypes.Boolean},
+		},
+		nil,
+	)
+	bldr := array.NewRecordBuilder(mem, schema)
+	defer bldr.Release()
+
+	for i := 0; i < SIZELEN; i++ {
+		bldr.Field(0).(*array.Int64Builder).Append(int64(i))
+		bldr.Field(1).(*array.StringBuilder).Append(fmt.Sprint(i))
+		bldr.Field(2).(*array.BooleanBuilder).Append(i%2 == 0)
+		bldr.Field(3).(*array.Int64Builder).Append(0)
+		bldr.Field(4).(*array.StringBuilder).Append("the string is the same")
+		bldr.Field(5).(*array.BooleanBuilder).Append(true)
+	}
+
+	rec := bldr.NewRecord()
+	return array.NewTableFromRecords(schema, []arrow.Record{rec})
+}
+
+func BenchmarkWriteTableCompressed(b *testing.B) {
+	mem := memory.DefaultAllocator
+	table := buildTableForTest(mem)
+	defer table.Release()
+
+	var uncompressedSize uint64
+	for idxCol := 0; int64(idxCol) < table.NumCols(); idxCol++ {
+		column := table.Column(idxCol)
+		for _, chunk := range column.Data().Chunks() {
+			uncompressedSize += chunk.Data().SizeInBytes()
+		}
+	}
+
+	var buf bytes.Buffer
+	buf.Grow(int(uncompressedSize))
+	for _, tc := range compressTestCases {
+		b.Run(fmt.Sprintf("codec=%s", tc.c), func(b *testing.B) {
+			buf.Reset()
+			b.ResetTimer()
+			b.SetBytes(int64(uncompressedSize))
+			for n := 0; n < b.N; n++ {
+				require.NoError(b,
+					pqarrow.WriteTable(
+						table,
+						&buf,
+						math.MaxInt64,
+						parquet.NewWriterProperties(parquet.WithAllocator(mem), parquet.WithCompression(tc.c)),
+						pqarrow.DefaultWriterProps(),
+					),
+				)
+			}
+		})
+	}
+}
+
+func BenchmarkReadTableCompressed(b *testing.B) {
+	ctx := context.Background()
+	mem := memory.DefaultAllocator
+	table := buildTableForTest(mem)
+	defer table.Release()
+
+	for _, tc := range compressTestCases {
+		b.Run(fmt.Sprintf("codec=%s", tc.c), func(b *testing.B) {
+			var buf bytes.Buffer
+			err := pqarrow.WriteTable(
+				table,
+				&buf,
+				math.MaxInt64,
+				parquet.NewWriterProperties(parquet.WithAllocator(mem), parquet.WithCompression(tc.c)),
+				pqarrow.DefaultWriterProps(),
+			)
+			require.NoError(b, err)
+
+			compressedBytes := buf.Len()
+			rdr := bytes.NewReader(buf.Bytes())
+
+			b.ResetTimer()
+			b.SetBytes(int64(compressedBytes))
+			for n := 0; n < b.N; n++ {
+				tab, err := pqarrow.ReadTable(ctx, rdr, nil, pqarrow.ArrowReadProperties{}, mem)
+				require.NoError(b, err)
+				defer tab.Release()
+			}
+		})
+	}
+}
diff --git a/go/parquet/schema/reflection.go b/go/parquet/schema/reflection.go
index 0bec9eb599dc8..51d0a84f2244f 100644
--- a/go/parquet/schema/reflection.go
+++ b/go/parquet/schema/reflection.go
@@ -639,7 +639,7 @@ func typeFromNode(n Node) reflect.Type {
 		}
 
 		if n.RepetitionType() == parquet.Repetitions.Optional {
-			typ = reflect.PtrTo(typ)
+			typ = reflect.PointerTo(typ)
 		} else if n.RepetitionType() == parquet.Repetitions.Repeated {
 			typ = reflect.SliceOf(typ)
 		}
@@ -707,7 +707,7 @@ func typeFromNode(n Node) reflect.Type {
 				elemType = reflect.SliceOf(elemType)
 			}
 			if gnode.RepetitionType() == parquet.Repetitions.Optional {
-				elemType = reflect.PtrTo(elemType)
+				elemType = reflect.PointerTo(elemType)
 			}
 			return elemType
 		case ConvertedTypes.Map, ConvertedTypes.MapKeyValue:
@@ -778,7 +778,7 @@ func typeFromNode(n Node) reflect.Type {
 
 			mapType := reflect.MapOf(keyType, valType)
 			if gnode.RepetitionType() == parquet.Repetitions.Optional {
-				mapType = reflect.PtrTo(mapType)
+				mapType = reflect.PointerTo(mapType)
 			}
 			return mapType
 		default:
@@ -796,7 +796,7 @@ func typeFromNode(n Node) reflect.Type {
 				return reflect.SliceOf(structType)
 			}
 			if gnode.RepetitionType() == parquet.Repetitions.Optional {
-				return reflect.PtrTo(structType)
+				return reflect.PointerTo(structType)
 			}
 			return structType
 		}
diff --git a/java/.mvn/extensions.xml b/java/.mvn/extensions.xml
index 716e2f9e81c35..ae632dccf0c70 100644
--- a/java/.mvn/extensions.xml
+++ b/java/.mvn/extensions.xml
@@ -23,11 +23,11 @@
     <extension>
         <groupId>com.gradle</groupId>
         <artifactId>develocity-maven-extension</artifactId>
-        <version>1.21.6</version>
+        <version>1.22.1</version>
     </extension>
     <extension>
         <groupId>com.gradle</groupId>
         <artifactId>common-custom-user-data-maven-extension</artifactId>
-        <version>2.0</version>
+        <version>2.0.1</version>
     </extension>
 </extensions>
diff --git a/java/adapter/avro/pom.xml b/java/adapter/avro/pom.xml
index cb4adccb76771..2c02e72e9c838 100644
--- a/java/adapter/avro/pom.xml
+++ b/java/adapter/avro/pom.xml
@@ -56,4 +56,18 @@ under the License.
       <version>${dep.avro.version}</version>
     </dependency>
   </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <configuration combine.children="append">
+          <compilerArgs>
+            <arg>-Werror</arg>
+          </compilerArgs>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
 </project>
diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java
index f8022a9385134..44ccbc74511dd 100644
--- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java
+++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java
@@ -50,6 +50,7 @@
 public class AvroToArrowIteratorTest extends AvroTestBase {
 
   @BeforeEach
+  @Override
   public void init() {
     final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
     this.config = new AvroToArrowConfigBuilder(allocator).setTargetBatchSize(3).build();
diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml
index 099798a95cd25..5ebb4089cf72f 100644
--- a/java/adapter/jdbc/pom.xml
+++ b/java/adapter/jdbc/pom.xml
@@ -116,6 +116,15 @@ under the License.
           <argLine>--add-reads=org.apache.arrow.adapter.jdbc=com.fasterxml.jackson.dataformat.yaml --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED -Duser.timezone=UTC</argLine>
         </configuration>
       </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <configuration combine.children="append">
+          <compilerArgs>
+            <arg>-Werror</arg>
+          </compilerArgs>
+        </configuration>
+      </plugin>
     </plugins>
   </build>
 </project>
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java
index 726e1905c4242..39c0085603f17 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java
@@ -91,6 +91,7 @@ public static Stream<Arguments> getTestData()
    */
   @ParameterizedTest
   @MethodSource("getTestData")
+  @Override
   public void testJdbcToArrowValues(Table table)
       throws SQLException, IOException, ClassNotFoundException {
     this.initializeDatabase(table);
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java
index c246bb2bec47e..2274f51745973 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java
@@ -145,6 +145,7 @@ public static Stream<Arguments> getTestData()
   /** Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes. */
   @ParameterizedTest
   @MethodSource("getTestData")
+  @Override
   public void testJdbcToArrowValues(Table table)
       throws SQLException, IOException, ClassNotFoundException {
     this.initializeDatabase(table);
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java
index 337220a42fbce..456d338f6bd75 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java
@@ -42,9 +42,10 @@ public static Stream<Arguments> getTestData() throws IOException {
         Arguments.of(getTable("h2/test1_map_h2.yml", JdbcToArrowMapDataTypeTest.class)));
   }
 
-  /** Test Method to test JdbcToArrow Functionality for Map form Types.OTHER column */
+  /** Test Method to test JdbcToArrow Functionality for Map form Types.OTHER column. */
   @ParameterizedTest
   @MethodSource("getTestData")
+  @Override
   public void testJdbcToArrowValues(Table table)
       throws SQLException, IOException, ClassNotFoundException {
     this.initializeDatabase(table);
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java
index 205b7e16f2f09..2009268980afe 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java
@@ -113,6 +113,7 @@ public static Stream<Arguments> getTestData()
    */
   @ParameterizedTest
   @MethodSource("getTestData")
+  @Override
   public void testJdbcToArrowValues(Table table)
       throws SQLException, IOException, ClassNotFoundException {
     this.initializeDatabase(table);
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java
index 382d20f45d4b1..2108afec4c945 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java
@@ -59,6 +59,7 @@ public static Stream<Arguments> getTestData()
    */
   @ParameterizedTest
   @MethodSource("getTestData")
+  @Override
   public void testJdbcToArrowValues(Table table)
       throws SQLException, IOException, ClassNotFoundException {
     this.initializeDatabase(table);
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
index 7966f62e175e3..bea7d4d37c50e 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
@@ -87,6 +87,7 @@ public static Stream<Arguments> getTestData()
    */
   @ParameterizedTest
   @MethodSource("getTestData")
+  @Override
   public void testJdbcToArrowValues(Table table)
       throws SQLException, IOException, ClassNotFoundException {
     this.initializeDatabase(table);
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java
index 0f60c89d1c03c..14396997d2863 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java
@@ -91,6 +91,7 @@ public static Stream<Arguments> getTestData()
    */
   @ParameterizedTest
   @MethodSource("getTestData")
+  @Override
   public void testJdbcToArrowValues(Table table)
       throws SQLException, IOException, ClassNotFoundException {
     this.initializeDatabase(table);
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java
index 40fd39ac0c555..de9eff327ef6f 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java
@@ -76,6 +76,7 @@ public class JdbcToArrowVectorIteratorTest extends JdbcToArrowTest {
 
   @ParameterizedTest
   @MethodSource("getTestData")
+  @Override
   public void testJdbcToArrowValues(Table table)
       throws SQLException, IOException, ClassNotFoundException {
     this.initializeDatabase(table);
diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml
index ec8ddbbb780df..cf35397c9917b 100644
--- a/java/adapter/orc/pom.xml
+++ b/java/adapter/orc/pom.xml
@@ -61,7 +61,7 @@ under the License.
     <dependency>
       <groupId>org.apache.orc</groupId>
       <artifactId>orc-core</artifactId>
-      <version>1.9.2</version>
+      <version>1.9.4</version>
       <scope>test</scope>
       <exclusions>
         <exclusion>
@@ -160,6 +160,15 @@ under the License.
           </execution>
         </executions>
       </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <configuration combine.children="append">
+          <compilerArgs>
+            <arg>-Werror</arg>
+          </compilerArgs>
+        </configuration>
+      </plugin>
     </plugins>
   </build>
 </project>
diff --git a/java/c/pom.xml b/java/c/pom.xml
index 52962354047b1..fe57bd2ea0ec5 100644
--- a/java/c/pom.xml
+++ b/java/c/pom.xml
@@ -91,5 +91,16 @@ under the License.
         </includes>
       </resource>
     </resources>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <configuration combine.children="append">
+          <compilerArgs>
+            <arg>-Werror</arg>
+          </compilerArgs>
+        </configuration>
+      </plugin>
+    </plugins>
   </build>
 </project>
diff --git a/java/c/src/main/java/org/apache/arrow/c/ArrayExporter.java b/java/c/src/main/java/org/apache/arrow/c/ArrayExporter.java
index 820a1522749c6..0c6b5de4486bc 100644
--- a/java/c/src/main/java/org/apache/arrow/c/ArrayExporter.java
+++ b/java/c/src/main/java/org/apache/arrow/c/ArrayExporter.java
@@ -90,7 +90,7 @@ void export(ArrowArray array, FieldVector vector, DictionaryProvider dictionaryP
 
       data.buffers = new ArrayList<>(vector.getExportedCDataBufferCount());
       data.buffers_ptrs =
-          allocator.buffer((long) (vector.getExportedCDataBufferCount()) * Long.BYTES);
+          allocator.buffer((long) vector.getExportedCDataBufferCount() * Long.BYTES);
       vector.exportCDataBuffers(data.buffers, data.buffers_ptrs, NULL);
 
       if (dictionaryEncoding != null) {
diff --git a/java/c/src/main/java/org/apache/arrow/c/ArrowSchema.java b/java/c/src/main/java/org/apache/arrow/c/ArrowSchema.java
index 06e401627ef01..ad9f16ae9ceed 100644
--- a/java/c/src/main/java/org/apache/arrow/c/ArrowSchema.java
+++ b/java/c/src/main/java/org/apache/arrow/c/ArrowSchema.java
@@ -52,6 +52,7 @@
  */
 public class ArrowSchema implements BaseStruct {
   private static final int SIZE_OF = 72;
+  private static final int INDEX_RELEASE_CALLBACK = 56;
 
   private ArrowBuf data;
 
@@ -103,7 +104,9 @@ public static ArrowSchema wrap(long memoryAddress) {
    * @return A new ArrowSchema instance
    */
   public static ArrowSchema allocateNew(BufferAllocator allocator) {
-    return new ArrowSchema(allocator.buffer(ArrowSchema.SIZE_OF));
+    ArrowSchema schema = new ArrowSchema(allocator.buffer(ArrowSchema.SIZE_OF));
+    schema.markReleased();
+    return schema;
   }
 
   ArrowSchema(ArrowBuf data) {
@@ -111,6 +114,11 @@ public static ArrowSchema allocateNew(BufferAllocator allocator) {
     this.data = data;
   }
 
+  /** Mark the schema as released. */
+  public void markReleased() {
+    directBuffer().putLong(INDEX_RELEASE_CALLBACK, NULL);
+  }
+
   @Override
   public long memoryAddress() {
     checkNotNull(data, "ArrowSchema is already closed");
diff --git a/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java b/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java
index 93fef6d7ca801..e47d27bf091ee 100644
--- a/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java
+++ b/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java
@@ -232,7 +232,7 @@ public List<ArrowBuf> visit(ArrowType.Utf8 type) {
   private List<ArrowBuf> visitVariableWidthView(ArrowType type) {
     final int viewBufferIndex = 1;
     final int variadicSizeBufferIndex = this.buffers.length - 1;
-    final long numOfVariadicBuffers = this.buffers.length - 3;
+    final long numOfVariadicBuffers = this.buffers.length - 3L;
     final long variadicSizeBufferCapacity = numOfVariadicBuffers * Long.BYTES;
     List<ArrowBuf> buffers = new ArrayList<>();
 
diff --git a/java/c/src/test/java/org/apache/arrow/c/ArrowArrayUtilityTest.java b/java/c/src/test/java/org/apache/arrow/c/ArrowArrayUtilityTest.java
index 1d4cb411fab45..511358a5e62fa 100644
--- a/java/c/src/test/java/org/apache/arrow/c/ArrowArrayUtilityTest.java
+++ b/java/c/src/test/java/org/apache/arrow/c/ArrowArrayUtilityTest.java
@@ -50,6 +50,13 @@ void afterEach() {
     allocator.close();
   }
 
+  @Test
+  void arraySchemaInit() {
+    ArrowSchema schema = ArrowSchema.allocateNew(allocator);
+    assertThat(schema.snapshot().release).isEqualTo(0);
+    schema.close();
+  }
+
   // ------------------------------------------------------------
   // BufferImportTypeVisitor
 
diff --git a/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java b/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java
index ce0e82586b766..8cd4913f22dd2 100644
--- a/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java
+++ b/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java
@@ -247,8 +247,8 @@ private void createStructVector(StructVector vector) {
 
     // Write the values to child 1
     child1.allocateNew();
-    child1.set(0, "01234567890".getBytes());
-    child1.set(1, "012345678901234567".getBytes());
+    child1.set(0, "01234567890".getBytes(StandardCharsets.UTF_8));
+    child1.set(1, "012345678901234567".getBytes(StandardCharsets.UTF_8));
     vector.setIndexDefined(0);
 
     // Write the values to child 2
@@ -269,8 +269,8 @@ private void createStructVectorInline(StructVector vector) {
 
     // Write the values to child 1
     child1.allocateNew();
-    child1.set(0, "012345678".getBytes());
-    child1.set(1, "01234".getBytes());
+    child1.set(0, "012345678".getBytes(StandardCharsets.UTF_8));
+    child1.set(1, "01234".getBytes(StandardCharsets.UTF_8));
     vector.setIndexDefined(0);
 
     // Write the values to child 2
diff --git a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
index 18b2e94adde47..d8286465e475f 100644
--- a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
+++ b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
@@ -528,14 +528,6 @@ public void testVarBinaryVector() {
     }
   }
 
-  private String generateString(String str, int repetition) {
-    StringBuilder aRepeated = new StringBuilder();
-    for (int i = 0; i < repetition; i++) {
-      aRepeated.append(str);
-    }
-    return aRepeated.toString();
-  }
-
   @Test
   public void testViewVector() {
     // ViewVarCharVector with short strings
diff --git a/java/compression/pom.xml b/java/compression/pom.xml
index a1f2bc861da1f..f0d8e92c9a41d 100644
--- a/java/compression/pom.xml
+++ b/java/compression/pom.xml
@@ -50,12 +50,12 @@ under the License.
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-compress</artifactId>
-      <version>1.27.0</version>
+      <version>1.27.1</version>
     </dependency>
     <dependency>
       <groupId>com.github.luben</groupId>
       <artifactId>zstd-jni</artifactId>
-      <version>1.5.6-4</version>
+      <version>1.5.6-5</version>
     </dependency>
   </dependencies>
 </project>
diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml
index f3384fabbed6d..92b67825517c6 100644
--- a/java/dataset/pom.xml
+++ b/java/dataset/pom.xml
@@ -32,7 +32,7 @@ under the License.
 
   <properties>
     <arrow.cpp.build.dir>../../../cpp/release-build/</arrow.cpp.build.dir>
-    <parquet.version>1.14.1</parquet.version>
+    <parquet.version>1.14.2</parquet.version>
     <avro.version>1.12.0</avro.version>
   </properties>
 
@@ -130,7 +130,7 @@ under the License.
     <dependency>
       <groupId>org.apache.orc</groupId>
       <artifactId>orc-core</artifactId>
-      <version>1.9.2</version>
+      <version>1.9.4</version>
       <scope>test</scope>
       <exclusions>
         <exclusion>
@@ -202,6 +202,15 @@ under the License.
           </execution>
         </executions>
       </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <configuration combine.children="append">
+          <compilerArgs>
+            <arg>-Werror</arg>
+          </compilerArgs>
+        </configuration>
+      </plugin>
     </plugins>
   </build>
 </project>
diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/TextBasedWriteSupport.java b/java/dataset/src/test/java/org/apache/arrow/dataset/TextBasedWriteSupport.java
index 9e6559824ce7f..e3495bd81ca79 100644
--- a/java/dataset/src/test/java/org/apache/arrow/dataset/TextBasedWriteSupport.java
+++ b/java/dataset/src/test/java/org/apache/arrow/dataset/TextBasedWriteSupport.java
@@ -17,10 +17,13 @@
 package org.apache.arrow.dataset;
 
 import java.io.File;
-import java.io.FileWriter;
 import java.io.IOException;
+import java.io.Writer;
 import java.net.URI;
 import java.net.URISyntaxException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.StandardOpenOption;
 import java.util.Random;
 
 public class TextBasedWriteSupport {
@@ -43,7 +46,12 @@ public static TextBasedWriteSupport writeTempFile(
       File outputFolder, String fileExtension, String... values)
       throws URISyntaxException, IOException {
     TextBasedWriteSupport writer = new TextBasedWriteSupport(outputFolder, fileExtension);
-    try (FileWriter addValues = new FileWriter(new File(writer.uri), true)) {
+    try (Writer addValues =
+        Files.newBufferedWriter(
+            new File(writer.uri).toPath(),
+            StandardCharsets.UTF_8,
+            StandardOpenOption.CREATE,
+            StandardOpenOption.APPEND)) {
       for (Object value : values) {
         addValues.write(value + "\n");
       }
diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java b/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java
index 0b085d25b32eb..89ce208e8c6f6 100644
--- a/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java
+++ b/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java
@@ -29,7 +29,6 @@
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashSet;
-import java.util.LinkedList;
 import java.util.List;
 import java.util.Objects;
 import java.util.Optional;
@@ -407,7 +406,7 @@ public void testBaseArrowIpcRead() throws Exception {
     try (VectorSchemaRoot root = VectorSchemaRoot.create(sourceSchema, rootAllocator());
         FileOutputStream sink = new FileOutputStream(dataFile);
         ArrowFileWriter writer =
-            new ArrowFileWriter(root, /*dictionaryProvider=*/ null, sink.getChannel())) {
+            new ArrowFileWriter(root, /* provider= */ null, sink.getChannel())) {
       IntVector ints = (IntVector) root.getVector(0);
       ints.setSafe(0, 0);
       ints.setSafe(1, 1024);
@@ -562,7 +561,7 @@ private void checkParquetReadResult(
       Schema schema, List<GenericRecord> expected, List<ArrowRecordBatch> actual) {
     assertEquals(expected.size(), actual.stream().mapToInt(ArrowRecordBatch::getLength).sum());
     final int fieldCount = schema.getFields().size();
-    LinkedList<GenericRecord> expectedRemovable = new LinkedList<>(expected);
+    ArrayList<GenericRecord> expectedRemovable = new ArrayList<>(expected);
     try (VectorSchemaRoot vsr = VectorSchemaRoot.create(schema, rootAllocator())) {
       VectorLoader loader = new VectorLoader(vsr);
       for (ArrowRecordBatch batch : actual) {
@@ -578,7 +577,7 @@ private void checkParquetReadResult(
           }
         }
         for (int i = 0; i < batchRowCount; i++) {
-          expectedRemovable.poll();
+          expectedRemovable.remove(0);
         }
       }
       assertTrue(expectedRemovable.isEmpty());
diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/jni/TestReservationListener.java b/java/dataset/src/test/java/org/apache/arrow/dataset/jni/TestReservationListener.java
index f366c824d2ded..9fabc4a257fb3 100644
--- a/java/dataset/src/test/java/org/apache/arrow/dataset/jni/TestReservationListener.java
+++ b/java/dataset/src/test/java/org/apache/arrow/dataset/jni/TestReservationListener.java
@@ -91,6 +91,7 @@ public void unreserve(long size) {
   }
 
   @Test
+  @SuppressWarnings("UnnecessaryAsync")
   public void testErrorThrownFromReservationListener() throws Exception {
     final String errorMessage = "ERROR_MESSAGE";
     ParquetWriteSupport writeSupport =
diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/substrait/TestAceroSubstraitConsumer.java b/java/dataset/src/test/java/org/apache/arrow/dataset/substrait/TestAceroSubstraitConsumer.java
index 97c185d7053d5..eec6570a639f2 100644
--- a/java/dataset/src/test/java/org/apache/arrow/dataset/substrait/TestAceroSubstraitConsumer.java
+++ b/java/dataset/src/test/java/org/apache/arrow/dataset/substrait/TestAceroSubstraitConsumer.java
@@ -23,6 +23,7 @@
 
 import java.io.File;
 import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Paths;
 import java.util.Arrays;
@@ -87,7 +88,8 @@ public void testRunQueryLocalFiles() throws Exception {
                                 TestAceroSubstraitConsumer.class
                                     .getClassLoader()
                                     .getResource("substrait/local_files_users.json")
-                                    .toURI())))
+                                    .toURI())),
+                        StandardCharsets.UTF_8)
                     .replace("FILENAME_PLACEHOLDER", writeSupport.getOutputURI()))) {
       assertEquals(schema, arrowReader.getVectorSchemaRoot().getSchema());
       int rowcount = 0;
@@ -134,7 +136,8 @@ public void testRunQueryNamedTable() throws Exception {
                               TestAceroSubstraitConsumer.class
                                   .getClassLoader()
                                   .getResource("substrait/named_table_users.json")
-                                  .toURI()))),
+                                  .toURI())),
+                      StandardCharsets.UTF_8),
                   mapTableToArrowReader)) {
         assertEquals(schema, arrowReader.getVectorSchemaRoot().getSchema());
         assertEquals(arrowReader.getVectorSchemaRoot().getSchema(), schema);
@@ -186,7 +189,8 @@ public void testRunQueryNamedTableWithException() throws Exception {
                                     TestAceroSubstraitConsumer.class
                                         .getClassLoader()
                                         .getResource("substrait/named_table_users.json")
-                                        .toURI()))),
+                                        .toURI())),
+                            StandardCharsets.UTF_8),
                         mapTableToArrowReader)) {
               assertEquals(schema, arrowReader.getVectorSchemaRoot().getSchema());
               int rowcount = 0;
@@ -311,12 +315,6 @@ public void testRunExtendedExpressionsFilter() throws Exception {
   @Test
   public void testRunExtendedExpressionsFilterWithProjectionsInsteadOfFilterException()
       throws Exception {
-    final Schema schema =
-        new Schema(
-            Arrays.asList(
-                Field.nullable("id", new ArrowType.Int(32, true)),
-                Field.nullable("name", new ArrowType.Utf8())),
-            null);
     // Substrait Extended Expression: Project New Column:
     // Expression ADD: id + 2
     // Expression CONCAT: name + '-' + name
@@ -360,12 +358,6 @@ public void testRunExtendedExpressionsFilterWithProjectionsInsteadOfFilterExcept
 
   @Test
   public void testRunExtendedExpressionsFilterWithEmptyFilterException() throws Exception {
-    final Schema schema =
-        new Schema(
-            Arrays.asList(
-                Field.nullable("id", new ArrowType.Int(32, true)),
-                Field.nullable("name", new ArrowType.Utf8())),
-            null);
     String base64EncodedSubstraitFilter = "";
     ByteBuffer substraitExpressionFilter = getByteBuffer(base64EncodedSubstraitFilter);
     ParquetWriteSupport writeSupport =
@@ -529,12 +521,6 @@ public void testRunExtendedExpressionsProjectionWithFilterInsteadOfProjectionExc
 
   @Test
   public void testRunExtendedExpressionsProjectionWithEmptyProjectionException() throws Exception {
-    final Schema schema =
-        new Schema(
-            Arrays.asList(
-                Field.nullable("id", new ArrowType.Int(32, true)),
-                Field.nullable("name", new ArrowType.Utf8())),
-            null);
     String base64EncodedSubstraitFilter = "";
     ByteBuffer substraitExpressionProjection = getByteBuffer(base64EncodedSubstraitFilter);
     ParquetWriteSupport writeSupport =
diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml
index e4d1d5d3885a5..a9724289c2252 100644
--- a/java/flight/flight-core/pom.xml
+++ b/java/flight/flight-core/pom.xml
@@ -134,7 +134,7 @@ under the License.
     <dependency>
       <groupId>com.google.api.grpc</groupId>
       <artifactId>proto-google-common-protos</artifactId>
-      <version>2.42.0</version>
+      <version>2.44.0</version>
       <scope>test</scope>
     </dependency>
     <dependency>
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
index 62b3c1eedb69d..a15c3049aa6ad 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
@@ -472,7 +472,7 @@ public void close() throws Exception {
     }
   }
 
-  /** A stream observer for Flight.PutResult */
+  /** A stream observer for Flight.PutResult. */
   private static class SetStreamObserver implements StreamObserver<Flight.PutResult> {
     private final BufferAllocator allocator;
     private final StreamListener<PutResult> listener;
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java
index 05dbe42c49172..ac761457f57fd 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java
@@ -188,6 +188,7 @@ public static final class Builder {
     private CallHeaderAuthenticator headerAuthenticator = CallHeaderAuthenticator.NO_OP;
     private ExecutorService executor = null;
     private int maxInboundMessageSize = MAX_GRPC_MESSAGE_SIZE;
+    private int maxHeaderListSize = MAX_GRPC_MESSAGE_SIZE;
     private int backpressureThreshold = DEFAULT_BACKPRESSURE_THRESHOLD;
     private InputStream certChain;
     private InputStream key;
@@ -324,6 +325,7 @@ public FlightServer build() {
       builder
           .executor(exec)
           .maxInboundMessageSize(maxInboundMessageSize)
+          .maxInboundMetadataSize(maxHeaderListSize)
           .addService(
               ServerInterceptors.intercept(
                   flightService,
@@ -366,6 +368,11 @@ public FlightServer build() {
       return new FlightServer(location, builder.build(), grpcExecutor);
     }
 
+    public Builder setMaxHeaderListSize(int maxHeaderListSize) {
+      this.maxHeaderListSize = maxHeaderListSize;
+      return this;
+    }
+
     /**
      * Set the maximum size of a message. Defaults to "unlimited", depending on the underlying
      * transport.
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightService.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightService.java
index 5ebeb44c1d36e..fc3f83e4eafd3 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightService.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightService.java
@@ -27,6 +27,7 @@
 import java.nio.charset.StandardCharsets;
 import java.util.Collections;
 import java.util.Optional;
+import java.util.Random;
 import org.apache.arrow.flight.impl.Flight;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
@@ -152,4 +153,76 @@ public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor
       assertEquals("No schema is present in FlightInfo", e.getMessage());
     }
   }
+
+  /**
+   * Test for GH-41584 where flight defaults for header size was not in sync b\w client and server.
+   */
+  @Test
+  public void testHeaderSizeExchangeInService() throws Exception {
+    final FlightProducer producer =
+        new NoOpFlightProducer() {
+          @Override
+          public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) {
+            String longHeader =
+                context.getMiddleware(FlightConstants.HEADER_KEY).headers().get("long-header");
+            return new FlightInfo(
+                null,
+                descriptor,
+                Collections.emptyList(),
+                0,
+                0,
+                false,
+                IpcOption.DEFAULT,
+                longHeader.getBytes(StandardCharsets.UTF_8));
+          }
+        };
+
+    String headerVal = generateRandom(1024 * 10);
+    FlightCallHeaders callHeaders = new FlightCallHeaders();
+    callHeaders.insert("long-header", headerVal);
+    // sever with default header limit same as client
+    try (final FlightServer s =
+            FlightServer.builder(allocator, forGrpcInsecure(LOCALHOST, 0), producer)
+                .build()
+                .start();
+        final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) {
+      FlightInfo flightInfo =
+          client.getInfo(FlightDescriptor.path("test"), new HeaderCallOption(callHeaders));
+      assertEquals(Optional.empty(), flightInfo.getSchemaOptional());
+      assertEquals(new Schema(Collections.emptyList()), flightInfo.getSchema());
+      assertArrayEquals(flightInfo.getAppMetadata(), headerVal.getBytes(StandardCharsets.UTF_8));
+    }
+    // server with 15kb header limit
+    try (final FlightServer s =
+            FlightServer.builder(allocator, forGrpcInsecure(LOCALHOST, 0), producer)
+                .setMaxHeaderListSize(1024 * 15)
+                .build()
+                .start();
+        final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) {
+      FlightInfo flightInfo =
+          client.getInfo(FlightDescriptor.path("test"), new HeaderCallOption(callHeaders));
+      assertEquals(Optional.empty(), flightInfo.getSchemaOptional());
+      assertEquals(new Schema(Collections.emptyList()), flightInfo.getSchema());
+      assertArrayEquals(flightInfo.getAppMetadata(), headerVal.getBytes(StandardCharsets.UTF_8));
+
+      callHeaders.insert("another-header", headerVal + headerVal);
+      FlightRuntimeException e =
+          assertThrows(
+              FlightRuntimeException.class,
+              () ->
+                  client.getInfo(FlightDescriptor.path("test"), new HeaderCallOption(callHeaders)));
+      assertEquals("http2 exception", e.getMessage());
+    }
+  }
+
+  private static String generateRandom(int size) {
+    String aToZ = "ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890";
+    Random random = new Random();
+    StringBuilder res = new StringBuilder();
+    for (int i = 0; i < size; i++) {
+      int randIndex = random.nextInt(aToZ.length());
+      res.append(aToZ.charAt(randIndex));
+    }
+    return res.toString();
+  }
 }
diff --git a/java/flight/flight-integration-tests/pom.xml b/java/flight/flight-integration-tests/pom.xml
index a154062ba814d..7da5156404dba 100644
--- a/java/flight/flight-integration-tests/pom.xml
+++ b/java/flight/flight-integration-tests/pom.xml
@@ -58,7 +58,7 @@ under the License.
     <dependency>
       <groupId>commons-cli</groupId>
       <artifactId>commons-cli</artifactId>
-      <version>1.8.0</version>
+      <version>1.9.0</version>
     </dependency>
     <dependency>
       <groupId>org.slf4j</groupId>
diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlExtensionScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlExtensionScenario.java
index 76d79b226623d..69b02030ccd3d 100644
--- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlExtensionScenario.java
+++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlExtensionScenario.java
@@ -16,24 +16,17 @@
  */
 package org.apache.arrow.flight.integration.tests;
 
-import java.util.HashMap;
 import java.util.Map;
 import org.apache.arrow.flight.FlightClient;
 import org.apache.arrow.flight.FlightInfo;
-import org.apache.arrow.flight.FlightStream;
 import org.apache.arrow.flight.Location;
 import org.apache.arrow.flight.SchemaResult;
-import org.apache.arrow.flight.Ticket;
 import org.apache.arrow.flight.sql.CancelResult;
 import org.apache.arrow.flight.sql.FlightSqlClient;
 import org.apache.arrow.flight.sql.FlightSqlProducer;
 import org.apache.arrow.flight.sql.impl.FlightSql;
 import org.apache.arrow.memory.BufferAllocator;
-import org.apache.arrow.util.Preconditions;
-import org.apache.arrow.vector.UInt4Vector;
 import org.apache.arrow.vector.VectorSchemaRoot;
-import org.apache.arrow.vector.complex.DenseUnionVector;
-import org.apache.arrow.vector.types.pojo.Schema;
 
 /**
  * Integration test scenario for validating Flight SQL specs across multiple implementations. This
@@ -53,69 +46,32 @@ public void client(BufferAllocator allocator, Location location, FlightClient cl
   }
 
   private void validateMetadataRetrieval(FlightSqlClient sqlClient) throws Exception {
-    FlightInfo info = sqlClient.getSqlInfo();
-    Ticket ticket = info.getEndpoints().get(0).getTicket();
-
-    Map<Integer, Object> infoValues = new HashMap<>();
-    try (FlightStream stream = sqlClient.getStream(ticket)) {
-      Schema actualSchema = stream.getSchema();
-      IntegrationAssertions.assertEquals(
-          FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA, actualSchema);
-
-      while (stream.next()) {
-        UInt4Vector infoName = (UInt4Vector) stream.getRoot().getVector(0);
-        DenseUnionVector value = (DenseUnionVector) stream.getRoot().getVector(1);
-
-        for (int i = 0; i < stream.getRoot().getRowCount(); i++) {
-          final int code = infoName.get(i);
-          if (infoValues.containsKey(code)) {
-            throw new AssertionError("Duplicate SqlInfo value: " + code);
-          }
-          Object object;
-          byte typeId = value.getTypeId(i);
-          switch (typeId) {
-            case 0: // string
-              object =
-                  Preconditions.checkNotNull(
-                          value.getVarCharVector(typeId).getObject(value.getOffset(i)))
-                      .toString();
-              break;
-            case 1: // bool
-              object = value.getBitVector(typeId).getObject(value.getOffset(i));
-              break;
-            case 2: // int64
-              object = value.getBigIntVector(typeId).getObject(value.getOffset(i));
-              break;
-            case 3: // int32
-              object = value.getIntVector(typeId).getObject(value.getOffset(i));
-              break;
-            default:
-              throw new AssertionError("Decoding SqlInfo of type code " + typeId);
-          }
-          infoValues.put(code, object);
-        }
-      }
-    }
-
-    IntegrationAssertions.assertEquals(
-        Boolean.FALSE, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_SQL_VALUE));
-    IntegrationAssertions.assertEquals(
-        Boolean.TRUE, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_SUBSTRAIT_VALUE));
-    IntegrationAssertions.assertEquals(
-        "min_version",
-        infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_SUBSTRAIT_MIN_VERSION_VALUE));
-    IntegrationAssertions.assertEquals(
-        "max_version",
-        infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_SUBSTRAIT_MAX_VERSION_VALUE));
-    IntegrationAssertions.assertEquals(
-        FlightSql.SqlSupportedTransaction.SQL_SUPPORTED_TRANSACTION_SAVEPOINT_VALUE,
-        infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_TRANSACTION_VALUE));
-    IntegrationAssertions.assertEquals(
-        Boolean.TRUE, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_CANCEL_VALUE));
-    IntegrationAssertions.assertEquals(
-        42, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_STATEMENT_TIMEOUT_VALUE));
-    IntegrationAssertions.assertEquals(
-        7, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_TRANSACTION_TIMEOUT_VALUE));
+    validate(
+        FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA,
+        sqlClient.getSqlInfo(),
+        sqlClient,
+        s -> {
+          Map<Integer, Object> infoValues = readSqlInfoStream(s);
+          IntegrationAssertions.assertEquals(
+              Boolean.FALSE, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_SQL_VALUE));
+          IntegrationAssertions.assertEquals(
+              Boolean.TRUE, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_SUBSTRAIT_VALUE));
+          IntegrationAssertions.assertEquals(
+              "min_version",
+              infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_SUBSTRAIT_MIN_VERSION_VALUE));
+          IntegrationAssertions.assertEquals(
+              "max_version",
+              infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_SUBSTRAIT_MAX_VERSION_VALUE));
+          IntegrationAssertions.assertEquals(
+              FlightSql.SqlSupportedTransaction.SQL_SUPPORTED_TRANSACTION_SAVEPOINT_VALUE,
+              infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_TRANSACTION_VALUE));
+          IntegrationAssertions.assertEquals(
+              Boolean.TRUE, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_CANCEL_VALUE));
+          IntegrationAssertions.assertEquals(
+              42, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_STATEMENT_TIMEOUT_VALUE));
+          IntegrationAssertions.assertEquals(
+              7, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_TRANSACTION_TIMEOUT_VALUE));
+        });
   }
 
   private void validateStatementExecution(FlightSqlClient sqlClient) throws Exception {
diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlIngestionScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlIngestionScenario.java
new file mode 100644
index 0000000000000..981ce89f1b88a
--- /dev/null
+++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlIngestionScenario.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.flight.integration.tests;
+
+import com.google.common.collect.ImmutableMap;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.arrow.flight.FlightClient;
+import org.apache.arrow.flight.FlightProducer;
+import org.apache.arrow.flight.Location;
+import org.apache.arrow.flight.sql.FlightSqlClient;
+import org.apache.arrow.flight.sql.FlightSqlClient.ExecuteIngestOptions;
+import org.apache.arrow.flight.sql.FlightSqlProducer;
+import org.apache.arrow.flight.sql.impl.FlightSql;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+/**
+ * Integration test scenario for validating Flight SQL specs across multiple implementations. This
+ * should ensure that RPC objects are being built and parsed correctly for multiple languages and
+ * that the Arrow schemas are returned as expected.
+ */
+public class FlightSqlIngestionScenario extends FlightSqlScenario {
+
+  @Override
+  public FlightProducer producer(BufferAllocator allocator, Location location) throws Exception {
+    FlightSqlScenarioProducer producer =
+        (FlightSqlScenarioProducer) super.producer(allocator, location);
+    producer
+        .getSqlInfoBuilder()
+        .withFlightSqlServerBulkIngestionTransaction(true)
+        .withFlightSqlServerBulkIngestion(true);
+    return producer;
+  }
+
+  @Override
+  public void client(BufferAllocator allocator, Location location, FlightClient client)
+      throws Exception {
+    try (final FlightSqlClient sqlClient = new FlightSqlClient(client)) {
+      validateMetadataRetrieval(sqlClient);
+      validateIngestion(allocator, sqlClient);
+    }
+  }
+
+  private void validateMetadataRetrieval(FlightSqlClient sqlClient) throws Exception {
+    validate(
+        FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA,
+        sqlClient.getSqlInfo(
+            FlightSql.SqlInfo.FLIGHT_SQL_SERVER_INGEST_TRANSACTIONS_SUPPORTED,
+            FlightSql.SqlInfo.FLIGHT_SQL_SERVER_BULK_INGESTION),
+        sqlClient,
+        s -> {
+          Map<Integer, Object> infoValues = readSqlInfoStream(s);
+          IntegrationAssertions.assertEquals(
+              Boolean.TRUE,
+              infoValues.get(
+                  FlightSql.SqlInfo.FLIGHT_SQL_SERVER_INGEST_TRANSACTIONS_SUPPORTED_VALUE));
+          IntegrationAssertions.assertEquals(
+              Boolean.TRUE,
+              infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_BULK_INGESTION_VALUE));
+        });
+  }
+
+  private VectorSchemaRoot getIngestVectorRoot(BufferAllocator allocator) {
+    Schema schema = FlightSqlScenarioProducer.getIngestSchema();
+    VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator);
+    root.setRowCount(3);
+    return root;
+  }
+
+  private void validateIngestion(BufferAllocator allocator, FlightSqlClient sqlClient) {
+    try (VectorSchemaRoot data = getIngestVectorRoot(allocator)) {
+      TableDefinitionOptions tableDefinitionOptions =
+          TableDefinitionOptions.newBuilder()
+              .setIfExists(TableDefinitionOptions.TableExistsOption.TABLE_EXISTS_OPTION_REPLACE)
+              .setIfNotExist(
+                  TableDefinitionOptions.TableNotExistOption.TABLE_NOT_EXIST_OPTION_CREATE)
+              .build();
+      Map<String, String> options = new HashMap<>(ImmutableMap.of("key1", "val1", "key2", "val2"));
+      ExecuteIngestOptions executeIngestOptions =
+          new ExecuteIngestOptions(
+              "test_table", tableDefinitionOptions, true, "test_catalog", "test_schema", options);
+      FlightSqlClient.Transaction transaction =
+          new FlightSqlClient.Transaction(BULK_INGEST_TRANSACTION_ID);
+      long updatedRows = sqlClient.executeIngest(data, executeIngestOptions, transaction);
+
+      IntegrationAssertions.assertEquals(3L, updatedRows);
+    }
+  }
+}
diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenario.java
index 8918b252700ac..e370a30bdc6ff 100644
--- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenario.java
+++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenario.java
@@ -16,8 +16,14 @@
  */
 package org.apache.arrow.flight.integration.tests;
 
+import static java.util.Objects.isNull;
+
+import com.google.protobuf.Any;
 import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.function.Consumer;
 import org.apache.arrow.flight.CallOption;
 import org.apache.arrow.flight.FlightClient;
 import org.apache.arrow.flight.FlightInfo;
@@ -29,10 +35,14 @@
 import org.apache.arrow.flight.Ticket;
 import org.apache.arrow.flight.sql.FlightSqlClient;
 import org.apache.arrow.flight.sql.FlightSqlProducer;
+import org.apache.arrow.flight.sql.FlightSqlUtils;
 import org.apache.arrow.flight.sql.impl.FlightSql;
 import org.apache.arrow.flight.sql.util.TableRef;
 import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.UInt4Vector;
 import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.DenseUnionVector;
 import org.apache.arrow.vector.types.pojo.Schema;
 
 /**
@@ -52,6 +62,7 @@ public class FlightSqlScenario implements Scenario {
   public static final FlightSqlClient.SubstraitPlan SUBSTRAIT_PLAN =
       new FlightSqlClient.SubstraitPlan(SUBSTRAIT_PLAN_TEXT, SUBSTRAIT_VERSION);
   public static final byte[] TRANSACTION_ID = "transaction_id".getBytes(StandardCharsets.UTF_8);
+  public static final byte[] BULK_INGEST_TRANSACTION_ID = "123".getBytes(StandardCharsets.UTF_8);
 
   @Override
   public FlightProducer producer(BufferAllocator allocator, Location location) throws Exception {
@@ -150,15 +161,23 @@ private void validateMetadataRetrieval(FlightSqlClient sqlClient) throws Excepti
     validateSchema(
         FlightSqlProducer.Schemas.GET_TYPE_INFO_SCHEMA, sqlClient.getXdbcTypeInfoSchema(options));
 
-    validate(
-        FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA,
+    FlightInfo sqlInfoFlightInfo =
         sqlClient.getSqlInfo(
             new FlightSql.SqlInfo[] {
               FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME,
               FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY
             },
-            options),
-        sqlClient);
+            options);
+
+    Ticket ticket = sqlInfoFlightInfo.getEndpoints().get(0).getTicket();
+    FlightSql.CommandGetSqlInfo requestSqlInfoCommand =
+        FlightSqlUtils.unpackOrThrow(
+            Any.parseFrom(ticket.getBytes()), FlightSql.CommandGetSqlInfo.class);
+    IntegrationAssertions.assertEquals(
+        requestSqlInfoCommand.getInfo(0), FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME_VALUE);
+    IntegrationAssertions.assertEquals(
+        requestSqlInfoCommand.getInfo(1), FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY_VALUE);
+    validate(FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA, sqlInfoFlightInfo, sqlClient);
     validateSchema(
         FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA, sqlClient.getSqlInfoSchema(options));
   }
@@ -194,14 +213,64 @@ private void validatePreparedStatementExecution(
 
   protected void validate(Schema expectedSchema, FlightInfo flightInfo, FlightSqlClient sqlClient)
       throws Exception {
+    validate(expectedSchema, flightInfo, sqlClient, null);
+  }
+
+  protected void validate(
+      Schema expectedSchema,
+      FlightInfo flightInfo,
+      FlightSqlClient sqlClient,
+      Consumer<FlightStream> streamConsumer)
+      throws Exception {
     Ticket ticket = flightInfo.getEndpoints().get(0).getTicket();
     try (FlightStream stream = sqlClient.getStream(ticket)) {
       Schema actualSchema = stream.getSchema();
       IntegrationAssertions.assertEquals(expectedSchema, actualSchema);
+      if (!isNull(streamConsumer)) {
+        streamConsumer.accept(stream);
+      }
     }
   }
 
   protected void validateSchema(Schema expected, SchemaResult actual) {
     IntegrationAssertions.assertEquals(expected, actual.getSchema());
   }
+
+  protected Map<Integer, Object> readSqlInfoStream(FlightStream stream) {
+    Map<Integer, Object> infoValues = new HashMap<>();
+    while (stream.next()) {
+      UInt4Vector infoName = (UInt4Vector) stream.getRoot().getVector(0);
+      DenseUnionVector value = (DenseUnionVector) stream.getRoot().getVector(1);
+
+      for (int i = 0; i < stream.getRoot().getRowCount(); i++) {
+        final int code = infoName.get(i);
+        if (infoValues.containsKey(code)) {
+          throw new AssertionError("Duplicate SqlInfo value: " + code);
+        }
+        Object object;
+        byte typeId = value.getTypeId(i);
+        switch (typeId) {
+          case 0: // string
+            object =
+                Preconditions.checkNotNull(
+                        value.getVarCharVector(typeId).getObject(value.getOffset(i)))
+                    .toString();
+            break;
+          case 1: // bool
+            object = value.getBitVector(typeId).getObject(value.getOffset(i));
+            break;
+          case 2: // int64
+            object = value.getBigIntVector(typeId).getObject(value.getOffset(i));
+            break;
+          case 3: // int32
+            object = value.getIntVector(typeId).getObject(value.getOffset(i));
+            break;
+          default:
+            throw new AssertionError("Decoding SqlInfo of type code " + typeId);
+        }
+        infoValues.put(code, object);
+      }
+    }
+    return infoValues;
+  }
 }
diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenarioProducer.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenarioProducer.java
index b7a75b459d176..be746b575761d 100644
--- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenarioProducer.java
+++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenarioProducer.java
@@ -16,13 +16,16 @@
  */
 package org.apache.arrow.flight.integration.tests;
 
+import com.google.common.collect.ImmutableMap;
 import com.google.protobuf.Any;
 import com.google.protobuf.ByteString;
 import com.google.protobuf.InvalidProtocolBufferException;
 import com.google.protobuf.Message;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import org.apache.arrow.flight.CallStatus;
 import org.apache.arrow.flight.Criteria;
 import org.apache.arrow.flight.FlightDescriptor;
@@ -38,6 +41,8 @@
 import org.apache.arrow.flight.sql.FlightSqlProducer;
 import org.apache.arrow.flight.sql.SqlInfoBuilder;
 import org.apache.arrow.flight.sql.impl.FlightSql;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions.TableExistsOption;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions.TableNotExistOption;
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.vector.VectorSchemaRoot;
@@ -48,10 +53,27 @@
 
 /** Hardcoded Flight SQL producer used for cross-language integration tests. */
 public class FlightSqlScenarioProducer implements FlightSqlProducer {
+  public static final String SERVER_NAME = "Flight SQL Integration Test Server";
   private final BufferAllocator allocator;
 
+  private final SqlInfoBuilder sqlInfoBuilder;
+
+  /** Constructor. */
   public FlightSqlScenarioProducer(BufferAllocator allocator) {
     this.allocator = allocator;
+    sqlInfoBuilder =
+        new SqlInfoBuilder()
+            .withFlightSqlServerName(SERVER_NAME)
+            .withFlightSqlServerReadOnly(false)
+            .withFlightSqlServerSql(false)
+            .withFlightSqlServerSubstrait(true)
+            .withFlightSqlServerSubstraitMinVersion("min_version")
+            .withFlightSqlServerSubstraitMaxVersion("max_version")
+            .withFlightSqlServerTransaction(
+                FlightSql.SqlSupportedTransaction.SQL_SUPPORTED_TRANSACTION_SAVEPOINT)
+            .withFlightSqlServerCancel(true)
+            .withFlightSqlServerStatementTimeout(42)
+            .withFlightSqlServerTransactionTimeout(7);
   }
 
   /**
@@ -109,6 +131,15 @@ static Schema getQueryWithTransactionSchema() {
                 null)));
   }
 
+  static Schema getIngestSchema() {
+    return new Schema(
+        Collections.singletonList(Field.nullable("test_field", new ArrowType.Int(64, true))));
+  }
+
+  protected SqlInfoBuilder getSqlInfoBuilder() {
+    return sqlInfoBuilder;
+  }
+
   @Override
   public void beginSavepoint(
       FlightSql.ActionBeginSavepointRequest request,
@@ -511,6 +542,44 @@ public Runnable acceptPutStatement(
             : FlightSqlScenario.UPDATE_STATEMENT_WITH_TRANSACTION_EXPECTED_ROWS);
   }
 
+  @Override
+  public Runnable acceptPutStatementBulkIngest(
+      FlightSql.CommandStatementIngest command,
+      CallContext context,
+      FlightStream flightStream,
+      StreamListener<PutResult> ackStream) {
+
+    IntegrationAssertions.assertEquals(
+        TableExistsOption.TABLE_EXISTS_OPTION_REPLACE,
+        command.getTableDefinitionOptions().getIfExists());
+    IntegrationAssertions.assertEquals(
+        TableNotExistOption.TABLE_NOT_EXIST_OPTION_CREATE,
+        command.getTableDefinitionOptions().getIfNotExist());
+    IntegrationAssertions.assertEquals("test_table", command.getTable());
+    IntegrationAssertions.assertEquals("test_catalog", command.getCatalog());
+    IntegrationAssertions.assertEquals("test_schema", command.getSchema());
+    IntegrationAssertions.assertEquals(true, command.getTemporary());
+    IntegrationAssertions.assertEquals(
+        FlightSqlScenario.BULK_INGEST_TRANSACTION_ID, command.getTransactionId().toByteArray());
+
+    Map<String, String> expectedOptions =
+        new HashMap<>(ImmutableMap.of("key1", "val1", "key2", "val2"));
+    IntegrationAssertions.assertEquals(expectedOptions.size(), command.getOptionsCount());
+
+    for (Map.Entry<String, String> optionEntry : expectedOptions.entrySet()) {
+      String key = optionEntry.getKey();
+      IntegrationAssertions.assertEquals(optionEntry.getValue(), command.getOptionsOrThrow(key));
+    }
+
+    IntegrationAssertions.assertEquals(getIngestSchema(), flightStream.getSchema());
+    long rowCount = 0;
+    while (flightStream.next()) {
+      rowCount += flightStream.getRoot().getRowCount();
+    }
+
+    return acceptPutReturnConstant(ackStream, rowCount);
+  }
+
   @Override
   public Runnable acceptPutSubstraitPlan(
       FlightSql.CommandStatementSubstraitPlan command,
@@ -577,35 +646,19 @@ public Runnable acceptPutPreparedStatementQuery(
   @Override
   public FlightInfo getFlightInfoSqlInfo(
       FlightSql.CommandGetSqlInfo request, CallContext context, FlightDescriptor descriptor) {
-    if (request.getInfoCount() == 2) {
-      // Integration test for the protocol messages
-      IntegrationAssertions.assertEquals(
-          request.getInfo(0), FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME_VALUE);
-      IntegrationAssertions.assertEquals(
-          request.getInfo(1), FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY_VALUE);
-    }
     return getFlightInfoForSchema(request, descriptor, Schemas.GET_SQL_INFO_SCHEMA);
   }
 
   @Override
   public void getStreamSqlInfo(
       FlightSql.CommandGetSqlInfo command, CallContext context, ServerStreamListener listener) {
-    if (command.getInfoCount() == 2) {
+    if (command.getInfoCount() == 2
+        && command.getInfo(0) == FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME_VALUE
+        && command.getInfo(1) == FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY_VALUE) {
       // Integration test for the protocol messages
       putEmptyBatchToStreamListener(listener, Schemas.GET_SQL_INFO_SCHEMA);
       return;
     }
-    SqlInfoBuilder sqlInfoBuilder =
-        new SqlInfoBuilder()
-            .withFlightSqlServerSql(false)
-            .withFlightSqlServerSubstrait(true)
-            .withFlightSqlServerSubstraitMinVersion("min_version")
-            .withFlightSqlServerSubstraitMaxVersion("max_version")
-            .withFlightSqlServerTransaction(
-                FlightSql.SqlSupportedTransaction.SQL_SUPPORTED_TRANSACTION_SAVEPOINT)
-            .withFlightSqlServerCancel(true)
-            .withFlightSqlServerStatementTimeout(42)
-            .withFlightSqlServerTransactionTimeout(7);
     sqlInfoBuilder.send(command.getInfoList(), listener);
   }
 
diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/Scenarios.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/Scenarios.java
index a294902a26d35..451edb6bd5a34 100644
--- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/Scenarios.java
+++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/Scenarios.java
@@ -48,6 +48,7 @@ private Scenarios() {
     scenarios.put("poll_flight_info", PollFlightInfoScenario::new);
     scenarios.put("flight_sql", FlightSqlScenario::new);
     scenarios.put("flight_sql:extension", FlightSqlExtensionScenario::new);
+    scenarios.put("flight_sql:ingestion", FlightSqlIngestionScenario::new);
     scenarios.put("app_metadata_flight_info_endpoint", AppMetadataFlightInfoEndpointScenario::new);
     scenarios.put("session_options", SessionOptionsScenario::new);
   }
diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/TestBufferAllocationListener.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/TestBufferAllocationListener.java
new file mode 100644
index 0000000000000..10594d4cf0962
--- /dev/null
+++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/TestBufferAllocationListener.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.flight.integration.tests;
+
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.arrow.memory.AllocationListener;
+
+class TestBufferAllocationListener implements AllocationListener {
+  static class Entry {
+    StackTraceElement[] stackTrace;
+    long size;
+    boolean forAllocation;
+
+    public Entry(StackTraceElement[] stackTrace, long size, boolean forAllocation) {
+      this.stackTrace = stackTrace;
+      this.size = size;
+      this.forAllocation = forAllocation;
+    }
+  }
+
+  List<Entry> trail = new ArrayList<>();
+
+  public void onAllocation(long size) {
+    trail.add(new Entry(Thread.currentThread().getStackTrace(), size, true));
+  }
+
+  public void onRelease(long size) {
+    trail.add(new Entry(Thread.currentThread().getStackTrace(), size, false));
+  }
+
+  public void reThrowWithAddedAllocatorInfo(Exception e) {
+    StringBuilder sb = new StringBuilder();
+    sb.append(e.getMessage());
+    sb.append("\n");
+    sb.append("[[Buffer allocation and release trail during the test execution: \n");
+    for (Entry trailEntry : trail) {
+      sb.append(
+          String.format(
+              "%s: %d: %n%s",
+              trailEntry.forAllocation ? "allocate" : "release",
+              trailEntry.size,
+              getStackTraceAsString(trailEntry.stackTrace)));
+    }
+    sb.append("]]");
+    throw new IllegalStateException(sb.toString(), e);
+  }
+
+  private String getStackTraceAsString(StackTraceElement[] elements) {
+    StringBuilder sb = new StringBuilder();
+    for (int i = 1; i < elements.length; i++) {
+      StackTraceElement s = elements[i];
+      sb.append("\t");
+      sb.append(s);
+      sb.append("\n");
+    }
+    return sb.toString();
+  }
+}
diff --git a/java/flight/flight-integration-tests/src/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java b/java/flight/flight-integration-tests/src/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java
index bdf1c43ce9da6..8419432c66227 100644
--- a/java/flight/flight-integration-tests/src/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java
+++ b/java/flight/flight-integration-tests/src/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java
@@ -16,6 +16,10 @@
  */
 package org.apache.arrow.flight.integration.tests;
 
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
 import org.apache.arrow.flight.FlightClient;
 import org.apache.arrow.flight.FlightServer;
 import org.apache.arrow.flight.Location;
@@ -80,6 +84,11 @@ void flightSqlExtension() throws Exception {
     testScenario("flight_sql:extension");
   }
 
+  @Test
+  void flightSqlIngestion() throws Exception {
+    testScenario("flight_sql:ingestion");
+  }
+
   @Test
   void appMetadataFlightInfoEndpoint() throws Exception {
     testScenario("app_metadata_flight_info_endpoint");
@@ -91,9 +100,16 @@ void sessionOptions() throws Exception {
   }
 
   void testScenario(String scenarioName) throws Exception {
-    try (final BufferAllocator allocator = new RootAllocator()) {
+    TestBufferAllocationListener listener = new TestBufferAllocationListener();
+    try (final BufferAllocator allocator = new RootAllocator(listener, Long.MAX_VALUE)) {
+      final ExecutorService exec =
+          Executors.newCachedThreadPool(
+              new ThreadFactoryBuilder()
+                  .setNameFormat("integration-test-flight-server-executor-%d")
+                  .build());
       final FlightServer.Builder builder =
           FlightServer.builder()
+              .executor(exec)
               .allocator(allocator)
               .location(Location.forGrpcInsecure("0.0.0.0", 0));
       final Scenario scenario = Scenarios.getScenario(scenarioName);
@@ -108,6 +124,17 @@ void testScenario(String scenarioName) throws Exception {
           scenario.client(allocator, location, client);
         }
       }
+
+      // Shutdown the executor while allowing existing tasks to finish.
+      // Without this wait, allocator.close() may get invoked earlier than an executor thread may
+      // have finished freeing up resources
+      // In that case, allocator.close() can throw an IllegalStateException for memory leak, leading
+      // to flaky tests
+      exec.shutdown();
+      final boolean unused = exec.awaitTermination(3, TimeUnit.SECONDS);
+    } catch (IllegalStateException e) {
+      // this could be due to Allocator detecting memory leak. Add allocation trail to help debug
+      listener.reThrowWithAddedAllocatorInfo(e);
     }
   }
 }
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandlerBuilderTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandlerBuilderTest.java
index 4f16a4fa60932..6beaba82360cc 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandlerBuilderTest.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandlerBuilderTest.java
@@ -33,7 +33,7 @@
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.extension.RegisterExtension;
 
-/** Test the behavior of ArrowFlightSqlClientHandler.Builder */
+/** Test the behavior of ArrowFlightSqlClientHandler.Builder. */
 public class ArrowFlightSqlClientHandlerBuilderTest {
 
   @RegisterExtension
diff --git a/java/flight/flight-sql/pom.xml b/java/flight/flight-sql/pom.xml
index c9c589d202ac6..021c1e65ab5b3 100644
--- a/java/flight/flight-sql/pom.xml
+++ b/java/flight/flight-sql/pom.xml
@@ -110,6 +110,12 @@ under the License.
       <version>2.12.0</version>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-text</artifactId>
+      <version>1.12.0</version>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.hamcrest</groupId>
       <artifactId>hamcrest</artifactId>
@@ -118,7 +124,7 @@ under the License.
     <dependency>
       <groupId>commons-cli</groupId>
       <artifactId>commons-cli</artifactId>
-      <version>1.8.0</version>
+      <version>1.9.0</version>
       <optional>true</optional>
     </dependency>
   </dependencies>
diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java
index 4bc12d86b1d0e..9a6ffdfdca847 100644
--- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java
+++ b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java
@@ -16,6 +16,7 @@
  */
 package org.apache.arrow.flight.sql;
 
+import static java.util.Objects.isNull;
 import static org.apache.arrow.flight.sql.impl.FlightSql.ActionBeginSavepointRequest;
 import static org.apache.arrow.flight.sql.impl.FlightSql.ActionBeginSavepointResult;
 import static org.apache.arrow.flight.sql.impl.FlightSql.ActionBeginTransactionRequest;
@@ -54,8 +55,10 @@
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
 import java.util.Objects;
 import java.util.concurrent.ExecutionException;
+import java.util.function.Consumer;
 import java.util.stream.Collectors;
 import org.apache.arrow.flight.Action;
 import org.apache.arrow.flight.CallOption;
@@ -82,11 +85,14 @@
 import org.apache.arrow.flight.sql.impl.FlightSql;
 import org.apache.arrow.flight.sql.impl.FlightSql.ActionCreatePreparedStatementResult;
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandPreparedStatementQuery;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions;
 import org.apache.arrow.flight.sql.util.TableRef;
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.util.AutoCloseables;
 import org.apache.arrow.util.Preconditions;
 import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ipc.ArrowStreamReader;
 import org.apache.arrow.vector.ipc.ReadChannel;
 import org.apache.arrow.vector.ipc.message.MessageSerializer;
 import org.apache.arrow.vector.types.pojo.Schema;
@@ -206,6 +212,130 @@ public SchemaResult getExecuteSubstraitSchema(
     return getExecuteSubstraitSchema(substraitPlan, /*transaction*/ null, options);
   }
 
+  /**
+   * Execute a bulk ingest on the server.
+   *
+   * @param data data to be ingested
+   * @param ingestOptions options for the ingest request.
+   * @param options RPC-layer hints for this call.
+   * @return the number of rows affected.
+   */
+  public long executeIngest(
+      final VectorSchemaRoot data,
+      final ExecuteIngestOptions ingestOptions,
+      final CallOption... options) {
+    return executeIngest(data, ingestOptions, /*transaction*/ null, options);
+  }
+
+  /**
+   * Execute a bulk ingest on the server.
+   *
+   * @param dataReader data stream to be ingested
+   * @param ingestOptions options for the ingest request.
+   * @param options RPC-layer hints for this call.
+   * @return the number of rows affected.
+   */
+  public long executeIngest(
+      final ArrowStreamReader dataReader,
+      final ExecuteIngestOptions ingestOptions,
+      final CallOption... options) {
+    return executeIngest(dataReader, ingestOptions, /*transaction*/ null, options);
+  }
+
+  /**
+   * Execute a bulk ingest on the server.
+   *
+   * @param data data to be ingested
+   * @param ingestOptions options for the ingest request.
+   * @param transaction The transaction that this ingest request is part of.
+   * @param options RPC-layer hints for this call.
+   * @return the number of rows affected.
+   */
+  public long executeIngest(
+      final VectorSchemaRoot data,
+      final ExecuteIngestOptions ingestOptions,
+      Transaction transaction,
+      final CallOption... options) {
+    return executeIngest(
+        data, ingestOptions, transaction, FlightClient.ClientStreamListener::putNext, options);
+  }
+
+  /**
+   * Execute a bulk ingest on the server.
+   *
+   * @param dataReader data stream to be ingested
+   * @param ingestOptions options for the ingest request.
+   * @param transaction The transaction that this ingest request is part of.
+   * @param options RPC-layer hints for this call.
+   * @return the number of rows affected.
+   */
+  public long executeIngest(
+      final ArrowStreamReader dataReader,
+      final ExecuteIngestOptions ingestOptions,
+      Transaction transaction,
+      final CallOption... options) {
+
+    try {
+      return executeIngest(
+          dataReader.getVectorSchemaRoot(),
+          ingestOptions,
+          transaction,
+          listener -> {
+            while (true) {
+              try {
+                if (!dataReader.loadNextBatch()) {
+                  break;
+                }
+              } catch (IOException e) {
+                throw CallStatus.UNKNOWN.withCause(e).toRuntimeException();
+              }
+              listener.putNext();
+            }
+          },
+          options);
+    } catch (IOException e) {
+      throw CallStatus.UNKNOWN.withCause(e).toRuntimeException();
+    }
+  }
+
+  private long executeIngest(
+      final VectorSchemaRoot data,
+      final ExecuteIngestOptions ingestOptions,
+      final Transaction transaction,
+      final Consumer<FlightClient.ClientStreamListener> dataPutter,
+      final CallOption... options) {
+    try {
+      final CommandStatementIngest.Builder builder = CommandStatementIngest.newBuilder();
+      if (transaction != null) {
+        builder.setTransactionId(ByteString.copyFrom(transaction.getTransactionId()));
+      }
+      ingestOptions.updateCommandBuilder(builder);
+
+      final FlightDescriptor descriptor =
+          FlightDescriptor.command(Any.pack(builder.build()).toByteArray());
+      try (final SyncPutListener putListener = new SyncPutListener()) {
+
+        final FlightClient.ClientStreamListener listener =
+            client.startPut(descriptor, data, putListener, options);
+        dataPutter.accept(listener);
+        listener.completed();
+        listener.getResult();
+
+        try (final PutResult result = putListener.read()) {
+          final DoPutUpdateResult doPutUpdateResult =
+              DoPutUpdateResult.parseFrom(result.getApplicationMetadata().nioBuffer());
+          return doPutUpdateResult.getRecordCount();
+        }
+      }
+    } catch (final InterruptedException e) {
+      throw CallStatus.CANCELLED.withCause(e).toRuntimeException();
+    } catch (final ExecutionException e) {
+      throw CallStatus.CANCELLED.withCause(e.getCause()).toRuntimeException();
+    } catch (final InvalidProtocolBufferException e) {
+      throw CallStatus.INTERNAL.withCause(e).toRuntimeException();
+    }
+  }
+
   /**
    * Execute an update query on the server.
    *
@@ -245,8 +375,10 @@ public long executeUpdate(
       } finally {
         listener.getResult();
       }
-    } catch (final InterruptedException | ExecutionException e) {
+    } catch (final InterruptedException e) {
       throw CallStatus.CANCELLED.withCause(e).toRuntimeException();
+    } catch (final ExecutionException e) {
+      throw CallStatus.CANCELLED.withCause(e.getCause()).toRuntimeException();
     } catch (final InvalidProtocolBufferException e) {
       throw CallStatus.INTERNAL.withCause(e).toRuntimeException();
     }
@@ -295,8 +427,10 @@ public long executeSubstraitUpdate(
       } finally {
         listener.getResult();
       }
-    } catch (final InterruptedException | ExecutionException e) {
+    } catch (final InterruptedException e) {
       throw CallStatus.CANCELLED.withCause(e).toRuntimeException();
+    } catch (final ExecutionException e) {
+      throw CallStatus.CANCELLED.withCause(e.getCause()).toRuntimeException();
     } catch (final InvalidProtocolBufferException e) {
       throw CallStatus.INTERNAL.withCause(e).toRuntimeException();
     }
@@ -1003,6 +1137,82 @@ public void close() throws Exception {
     AutoCloseables.close(client);
   }
 
+  /** Class to encapsulate Flight SQL bulk ingest request options. * */
+  public static class ExecuteIngestOptions {
+    private final String table;
+    private final TableDefinitionOptions tableDefinitionOptions;
+    private final boolean useTemporaryTable;
+    private final String catalog;
+    private final String schema;
+    private final Map<String, String> options;
+
+    /**
+     * Constructor.
+     *
+     * @param table The table to load data into.
+     * @param tableDefinitionOptions The behavior for handling the table definition.
+     * @param catalog The catalog of the destination table to load data into. If null, a
+     *     backend-specific default may be used.
+     * @param schema The schema of the destination table to load data into. If null, a
+     *     backend-specific default may be used.
+     * @param options Backend-specific options. Can be null if there are no options to be set.
+     */
+    public ExecuteIngestOptions(
+        String table,
+        TableDefinitionOptions tableDefinitionOptions,
+        String catalog,
+        String schema,
+        Map<String, String> options) {
+      this(table, tableDefinitionOptions, false, catalog, schema, options);
+    }
+
+    /**
+     * Constructor.
+     *
+     * @param table The table to load data into.
+     * @param tableDefinitionOptions The behavior for handling the table definition.
+     * @param useTemporaryTable Use a temporary table for bulk ingestion. Temporary table may get
+     *     placed in a backend-specific schema and/or catalog and gets dropped at the end of the
+     *     session. If backend does not support ingesting using a temporary table or an explicit
+     *     choice of schema or catalog is incompatible with the server's namespacing decision, an
+     *     error is returned as part of {@link #executeIngest} request.
+     * @param catalog The catalog of the destination table to load data into. If null, a
+     *     backend-specific default may be used.
+     * @param schema The schema of the destination table to load data into. If null, a
+     *     backend-specific default may be used.
+     * @param options Backend-specific options. Can be null if there are no options to be set.
+     */
+    public ExecuteIngestOptions(
+        String table,
+        TableDefinitionOptions tableDefinitionOptions,
+        boolean useTemporaryTable,
+        String catalog,
+        String schema,
+        Map<String, String> options) {
+      this.table = table;
+      this.tableDefinitionOptions = tableDefinitionOptions;
+      this.useTemporaryTable = useTemporaryTable;
+      this.catalog = catalog;
+      this.schema = schema;
+      this.options = options;
+    }
+
+    protected void updateCommandBuilder(CommandStatementIngest.Builder builder) {
+      builder.setTable(table);
+      builder.setTableDefinitionOptions(tableDefinitionOptions);
+      builder.setTemporary(useTemporaryTable);
+      if (!isNull(catalog)) {
+        builder.setCatalog(catalog);
+      }
+      if (!isNull(schema)) {
+        builder.setSchema(schema);
+      }
+      if (!isNull(options)) {
+        builder.putAllOptions(options);
+      }
+    }
+  }
+
   /** Helper class to encapsulate Flight SQL prepared statement logic. */
   public static class PreparedStatement implements AutoCloseable {
     private final FlightClient client;
@@ -1140,10 +1350,12 @@ public FlightInfo execute(final CallOption... options) {
               }
             }
           }
-        } catch (final InterruptedException | ExecutionException e) {
+        } catch (final InterruptedException e) {
           throw CallStatus.CANCELLED.withCause(e).toRuntimeException();
+        } catch (final ExecutionException e) {
+          throw CallStatus.CANCELLED.withCause(e.getCause()).toRuntimeException();
         } catch (final InvalidProtocolBufferException e) {
-          throw CallStatus.INVALID_ARGUMENT.withCause(e).toRuntimeException();
+          throw CallStatus.INTERNAL.withCause(e).toRuntimeException();
         }
       }
 
@@ -1198,10 +1410,12 @@ public long executeUpdate(final CallOption... options) {
               DoPutUpdateResult.parseFrom(metadata.nioBuffer());
           return doPutUpdateResult.getRecordCount();
         }
-      } catch (final InterruptedException | ExecutionException e) {
+      } catch (final InterruptedException e) {
         throw CallStatus.CANCELLED.withCause(e).toRuntimeException();
+      } catch (final ExecutionException e) {
+        throw CallStatus.CANCELLED.withCause(e.getCause()).toRuntimeException();
       } catch (final InvalidProtocolBufferException e) {
-        throw CallStatus.INVALID_ARGUMENT.withCause(e).toRuntimeException();
+        throw CallStatus.INTERNAL.withCause(e).toRuntimeException();
       }
     }
 
diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlProducer.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlProducer.java
index 0afef79160621..9465e5ff88053 100644
--- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlProducer.java
+++ b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlProducer.java
@@ -83,6 +83,7 @@
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetTables;
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandPreparedStatementQuery;
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandPreparedStatementUpdate;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest;
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementQuery;
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementUpdate;
 import org.apache.arrow.flight.sql.impl.FlightSql.DoPutUpdateResult;
@@ -281,7 +282,8 @@ default void getStream(CallContext context, Ticket ticket, ServerStreamListener
   /**
    * Depending on the provided command, method either: 1. Execute provided SQL query as an update
    * statement, or 2. Execute provided update SQL query prepared statement. In this case, parameters
-   * binding is allowed, or 3. Binds parameters to the provided prepared statement.
+   * binding is allowed, or 3. Binds parameters to the provided prepared statement, or 4. Bulk
+   * ingests data provided through the flightStream.
    *
    * @param context Per-call context.
    * @param flightStream The data stream being uploaded.
@@ -299,6 +301,12 @@ default Runnable acceptPut(
           context,
           flightStream,
           ackStream);
+    } else if (command.is(CommandStatementIngest.class)) {
+      return acceptPutStatementBulkIngest(
+          FlightSqlUtils.unpackOrThrow(command, CommandStatementIngest.class),
+          context,
+          flightStream,
+          ackStream);
     } else if (command.is(CommandStatementSubstraitPlan.class)) {
       return acceptPutSubstraitPlan(
           FlightSqlUtils.unpackOrThrow(command, CommandStatementSubstraitPlan.class),
@@ -777,6 +785,27 @@ Runnable acceptPutStatement(
       FlightStream flightStream,
       StreamListener<PutResult> ackStream);
 
+  /**
+   * Accepts uploaded data for a particular bulk ingest data stream.
+   *
+   * <p>`PutResult`s must be in the form of a {@link DoPutUpdateResult}.
+   *
+   * @param command The bulk ingestion request.
+   * @param context Per-call context.
+   * @param flightStream The data stream being uploaded.
+   * @param ackStream The result data stream.
+   * @return A runnable to process the stream.
+   */
+  default Runnable acceptPutStatementBulkIngest(
+      CommandStatementIngest command,
+      CallContext context,
+      FlightStream flightStream,
+      StreamListener<PutResult> ackStream) {
+    return () -> {
+      ackStream.onError(CallStatus.UNIMPLEMENTED.toRuntimeException());
+    };
+  }
+
   /**
    * Handle a Substrait plan with uploaded data.
    *
diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/NoOpFlightSqlProducer.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/NoOpFlightSqlProducer.java
index 5091017c13cd8..72fcae8c18003 100644
--- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/NoOpFlightSqlProducer.java
+++ b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/NoOpFlightSqlProducer.java
@@ -91,6 +91,18 @@ public Runnable acceptPutStatement(
     throw CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException();
   }
 
+  @Override
+  public Runnable acceptPutStatementBulkIngest(
+      FlightSql.CommandStatementIngest command,
+      CallContext context,
+      FlightStream flightStream,
+      StreamListener<PutResult> ackStream) {
+    return () -> {
+      ackStream.onError(
+          CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException());
+    };
+  }
+
   @Override
   public Runnable acceptPutPreparedStatementUpdate(
       FlightSql.CommandPreparedStatementUpdate command,
diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SqlInfoBuilder.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SqlInfoBuilder.java
index 2a31bc77365e2..cbe4989d14744 100644
--- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SqlInfoBuilder.java
+++ b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SqlInfoBuilder.java
@@ -148,6 +148,17 @@ public SqlInfoBuilder withFlightSqlServerCancel(boolean value) {
     return withBooleanProvider(SqlInfo.FLIGHT_SQL_SERVER_CANCEL_VALUE, value);
   }
 
+  /** Set a value for bulk ingestion support. */
+  public SqlInfoBuilder withFlightSqlServerBulkIngestion(boolean value) {
+    return withBooleanProvider(SqlInfo.FLIGHT_SQL_SERVER_BULK_INGESTION_VALUE, value);
+  }
+
+  /** Set a value for transaction support for bulk ingestion. */
+  public SqlInfoBuilder withFlightSqlServerBulkIngestionTransaction(boolean value) {
+    return withBooleanProvider(
+        SqlInfo.FLIGHT_SQL_SERVER_INGEST_TRANSACTIONS_SUPPORTED_VALUE, value);
+  }
+
   /** Set a value for statement timeouts. */
   public SqlInfoBuilder withFlightSqlServerStatementTimeout(int value) {
     return withIntProvider(SqlInfo.FLIGHT_SQL_SERVER_STATEMENT_TIMEOUT_VALUE, value);
diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java
index e7127faf97539..f9d0551a3aa22 100644
--- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java
+++ b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java
@@ -55,6 +55,7 @@
 import java.nio.file.NoSuchFileException;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.nio.file.StandardOpenOption;
 import java.sql.Connection;
 import java.sql.DatabaseMetaData;
 import java.sql.DriverManager;
@@ -82,6 +83,7 @@
 import java.util.function.BiConsumer;
 import java.util.function.Consumer;
 import java.util.function.Predicate;
+import java.util.stream.Collectors;
 import java.util.stream.Stream;
 import org.apache.arrow.adapter.jdbc.ArrowVectorIterator;
 import org.apache.arrow.adapter.jdbc.JdbcFieldInfo;
@@ -112,6 +114,10 @@
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetTables;
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandPreparedStatementQuery;
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandPreparedStatementUpdate;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions.TableExistsOption;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions.TableNotExistOption;
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementQuery;
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementUpdate;
 import org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportedCaseSensitivity;
@@ -146,6 +152,7 @@
 import org.apache.commons.dbcp2.PoolingDataSource;
 import org.apache.commons.pool2.ObjectPool;
 import org.apache.commons.pool2.impl.GenericObjectPool;
+import org.apache.commons.text.StringEscapeUtils;
 import org.slf4j.Logger;
 
 /**
@@ -181,9 +188,8 @@ public static void main(String[] args) throws Exception {
 
   public FlightSqlExample(final Location location, final String dbName) {
     // TODO Constructor should not be doing work.
-    checkState(
-        removeDerbyDatabaseIfExists(dbName) && populateDerbyDatabase(dbName),
-        "Failed to reset Derby database!");
+    checkState(removeDerbyDatabaseIfExists(dbName), "Failed to clear Derby database!");
+    checkState(populateDerbyDatabase(dbName), "Failed to populate Derby database!");
     databaseUri = "jdbc:derby:target/" + dbName;
     final ConnectionFactory connectionFactory =
         new DriverManagerConnectionFactory(databaseUri, new Properties());
@@ -246,43 +252,44 @@ public FlightSqlExample(final Location location, final String dbName) {
                           : SqlSupportedCaseSensitivity.SQL_CASE_SENSITIVITY_UNKNOWN)
           .withSqlAllTablesAreSelectable(true)
           .withSqlNullOrdering(SqlNullOrdering.SQL_NULLS_SORTED_AT_END)
-          .withSqlMaxColumnsInTable(42);
+          .withSqlMaxColumnsInTable(42)
+          .withFlightSqlServerBulkIngestion(true)
+          .withFlightSqlServerBulkIngestionTransaction(false);
     } catch (SQLException e) {
       throw new RuntimeException(e);
     }
   }
 
   public static boolean removeDerbyDatabaseIfExists(final String dbName) {
-    boolean wasSuccess;
     final Path path = Paths.get("target" + File.separator + dbName);
 
     try (final Stream<Path> walk = Files.walk(path)) {
       /*
        * Iterate over all paths to delete, mapping each path to the outcome of its own
-       * deletion as a boolean representing whether or not each individual operation was
-       * successful; then reduce all booleans into a single answer, and store that into
-       * `wasSuccess`, which will later be returned by this method.
+       * deletion as a boolean representing whether each individual operation was
+       * successful; then reduce all booleans into a single answer.
        * If for whatever reason the resulting `Stream<Boolean>` is empty, throw an `IOException`;
        * this not expected.
        */
-      wasSuccess =
+      boolean unused =
           walk.sorted(Comparator.reverseOrder())
               .map(Path::toFile)
               .map(File::delete)
               .reduce(Boolean::logicalAnd)
               .orElseThrow(IOException::new);
-    } catch (IOException e) {
+    } catch (NoSuchFileException e) {
       /*
        * The only acceptable scenario for an `IOException` to be thrown here is if
        * an attempt to delete an non-existing file takes place -- which should be
        * alright, since they would be deleted anyway.
        */
-      if (!(wasSuccess = e instanceof NoSuchFileException)) {
-        LOGGER.error(format("Failed attempt to clear DerbyDB: <%s>", e.getMessage()), e);
-      }
+      LOGGER.error(format("No existing Derby database to delete.: <%s>", e.getMessage()), e);
+      return true;
+    } catch (Exception e) {
+      LOGGER.error(format("Failed attempt to clear DerbyDB.: <%s>", e.getMessage()), e);
+      return false;
     }
-
-    return wasSuccess;
+    return true;
   }
 
   private static boolean populateDerbyDatabase(final String dbName) {
@@ -716,6 +723,34 @@ private static ByteBuffer serializeMetadata(final Schema schema) {
     }
   }
 
+  private static String getRootAsCSVNoHeader(final VectorSchemaRoot root) {
+    StringBuilder sb = new StringBuilder();
+    Schema schema = root.getSchema();
+    int rowCount = root.getRowCount();
+    List<FieldVector> fieldVectors = root.getFieldVectors();
+
+    List<Object> row = new ArrayList<>(schema.getFields().size());
+    for (int i = 0; i < rowCount; i++) {
+      if (i > 0) {
+        sb.append("\n");
+      }
+      row.clear();
+      for (FieldVector v : fieldVectors) {
+        row.add(v.getObject(i));
+      }
+      printRowAsCSV(sb, row);
+    }
+    return sb.toString();
+  }
+
+  private static void printRowAsCSV(StringBuilder sb, List<Object> values) {
+    sb.append(
+        values.stream()
+            .map(v -> isNull(v) ? "" : v.toString())
+            .map(StringEscapeUtils::escapeCsv)
+            .collect(Collectors.joining(",")));
+  }
+
   @Override
   public void getStreamPreparedStatement(
       final CommandPreparedStatementQuery command,
@@ -953,6 +988,138 @@ public Runnable acceptPutStatement(
     };
   }
 
+  @Override
+  public Runnable acceptPutStatementBulkIngest(
+      CommandStatementIngest command,
+      CallContext context,
+      FlightStream flightStream,
+      StreamListener<PutResult> ackStream) {
+
+    final String schema = command.hasSchema() ? command.getSchema() : null;
+    final String table = command.getTable();
+    final boolean temporary = command.getTemporary();
+    final boolean transactionId = command.hasTransactionId();
+    final TableDefinitionOptions tableDefinitionOptions =
+        command.hasTableDefinitionOptions() ? command.getTableDefinitionOptions() : null;
+
+    return () -> {
+      TableExistsOption ifExists = TableExistsOption.TABLE_EXISTS_OPTION_APPEND;
+      if (temporary) {
+        ackStream.onError(
+            CallStatus.UNIMPLEMENTED
+                .withDescription("Bulk ingestion using temporary tables is not supported")
+                .toRuntimeException());
+      } else if (transactionId) {
+        ackStream.onError(
+            CallStatus.UNIMPLEMENTED
+                .withDescription(
+                    "Bulk ingestion automatically happens in a transaction. Specifying explicit transaction is not supported.")
+                .toRuntimeException());
+      } else if (isNull(tableDefinitionOptions)) {
+        ackStream.onError(
+            CallStatus.INVALID_ARGUMENT
+                .withDescription("TableDefinitionOptions not provided.")
+                .toRuntimeException());
+      } else {
+        TableNotExistOption ifNotExist = tableDefinitionOptions.getIfNotExist();
+        ifExists = tableDefinitionOptions.getIfExists();
+
+        if (!TableNotExistOption.TABLE_NOT_EXIST_OPTION_FAIL.equals(ifNotExist)) {
+          ackStream.onError(
+              CallStatus.UNIMPLEMENTED
+                  .withDescription(
+                      "Only supported option is TABLE_NOT_EXIST_OPTION_FAIL for TableNotExistsOption.")
+                  .toRuntimeException());
+        } else if (TableExistsOption.TABLE_EXISTS_OPTION_UNSPECIFIED.equals(ifExists)) {
+          ackStream.onError(
+              CallStatus.INVALID_ARGUMENT
+                  .withDescription("TableExistsOption must be specified")
+                  .toRuntimeException());
+        } else if (TableExistsOption.TABLE_EXISTS_OPTION_FAIL.equals(ifExists)) {
+          ackStream.onError(
+              CallStatus.UNIMPLEMENTED
+                  .withDescription("TABLE_EXISTS_OPTION_FAIL is not supported.")
+                  .toRuntimeException());
+        }
+      }
+
+      Path tempFile = null;
+      try {
+        tempFile = Files.createTempFile(null, null);
+
+        VectorSchemaRoot root = null;
+        int counter = 0;
+        while (flightStream.next()) {
+          if (counter > 0) {
+            Files.writeString(tempFile, "\n", StandardCharsets.UTF_8, StandardOpenOption.APPEND);
+          }
+          counter += 1;
+          root = flightStream.getRoot();
+          Files.writeString(
+              tempFile,
+              getRootAsCSVNoHeader(root),
+              StandardCharsets.UTF_8,
+              StandardOpenOption.APPEND);
+        }
+
+        if (counter > 0) {
+          Files.writeString(tempFile, "\n", StandardCharsets.UTF_8, StandardOpenOption.APPEND);
+        }
+
+        if (!isNull(root)) {
+          String header =
+              root.getSchema().getFields().stream()
+                  .map(Field::getName)
+                  .collect(Collectors.joining(","));
+
+          try (final Connection connection = dataSource.getConnection();
+              final PreparedStatement preparedStatement =
+                  connection.prepareStatement(
+                      "CALL SYSCS_UTIL.SYSCS_IMPORT_DATA (?,?,?,null,?,?,?,?,?)")) {
+
+            preparedStatement.setString(1, schema);
+            preparedStatement.setString(2, table);
+            preparedStatement.setString(3, header);
+            preparedStatement.setString(4, tempFile.toString());
+            preparedStatement.setString(5, ",");
+            preparedStatement.setString(6, "\"");
+            preparedStatement.setString(7, "UTF-8");
+            preparedStatement.setInt(
+                8, TableExistsOption.TABLE_EXISTS_OPTION_REPLACE.equals(ifExists) ? 1 : 0);
+            preparedStatement.execute();
+
+            final DoPutUpdateResult build =
+                DoPutUpdateResult.newBuilder().setRecordCount(-1).build();
+
+            try (final ArrowBuf buffer = rootAllocator.buffer(build.getSerializedSize())) {
+              buffer.writeBytes(build.toByteArray());
+              ackStream.onNext(PutResult.metadata(buffer));
+              ackStream.onCompleted();
+            }
+          } catch (SQLException e) {
+            ackStream.onError(
+                CallStatus.INTERNAL
+                    .withDescription("Failed to execute bulk ingest: " + e)
+                    .toRuntimeException());
+          }
+        }
+      } catch (IOException e) {
+        ackStream.onError(
+            CallStatus.INTERNAL
+                .withDescription("Failed to create temp file for bulk loading: " + e)
+                .toRuntimeException());
+      } finally {
+        if (!isNull(tempFile)) {
+          try {
+            Files.delete(tempFile);
+          } catch (IOException e) {
+            //
+          }
+        }
+      }
+    };
+  }
+
   @Override
   public Runnable acceptPutPreparedStatementUpdate(
       CommandPreparedStatementUpdate command,
diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java
index 2eb74adc5bc0e..3f769363fb64d 100644
--- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java
+++ b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java
@@ -30,6 +30,10 @@
 import static org.junit.jupiter.api.Assertions.assertThrows;
 
 import com.google.common.collect.ImmutableList;
+import java.io.IOException;
+import java.io.PipedInputStream;
+import java.io.PipedOutputStream;
+import java.nio.charset.StandardCharsets;
 import java.sql.SQLException;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -53,6 +57,9 @@
 import org.apache.arrow.flight.sql.FlightSqlProducer;
 import org.apache.arrow.flight.sql.example.FlightSqlExample;
 import org.apache.arrow.flight.sql.impl.FlightSql;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions.TableExistsOption;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions.TableNotExistOption;
 import org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportedCaseSensitivity;
 import org.apache.arrow.flight.sql.util.TableRef;
 import org.apache.arrow.memory.BufferAllocator;
@@ -60,11 +67,15 @@
 import org.apache.arrow.vector.IntVector;
 import org.apache.arrow.vector.VarCharVector;
 import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ipc.ArrowStreamReader;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.types.pojo.Schema;
 import org.apache.arrow.vector.util.Text;
+import org.apache.arrow.vector.util.VectorBatchAppender;
 import org.hamcrest.Matcher;
 import org.hamcrest.MatcherAssert;
 import org.junit.jupiter.api.AfterAll;
@@ -96,6 +107,43 @@ public class TestFlightSql {
   protected static FlightServer server;
   protected static FlightSqlClient sqlClient;
 
+  private static void populateNext10RowsInIngestRootBatch(
+      int startRowNumber,
+      IntVector valueVector,
+      VarCharVector keyNameVector,
+      IntVector foreignIdVector,
+      VarCharVector keyNamesToBeDeletedVector,
+      VectorSchemaRoot ingestRoot) {
+
+    final int NumRowsInBatch = 10;
+
+    valueVector.reset();
+    keyNameVector.reset();
+    foreignIdVector.reset();
+
+    final IntStream range = IntStream.range(1, NumRowsInBatch);
+
+    range.forEach(
+        i -> {
+          valueVector.setSafe(i - 1, (i + startRowNumber - 1) * NumRowsInBatch);
+          keyNameVector.setSafe(i - 1, new Text("value" + (i + startRowNumber - 1)));
+          foreignIdVector.setSafe(i - 1, 1);
+        });
+    // put some comma and double-quote containing string as well
+    valueVector.setSafe(NumRowsInBatch - 1, (NumRowsInBatch + startRowNumber - 1) * NumRowsInBatch);
+    keyNameVector.setSafe(
+        NumRowsInBatch - 1,
+        new Text(
+            String.format(
+                "value%d, is \"%d\"",
+                (NumRowsInBatch + startRowNumber - 1),
+                (NumRowsInBatch + startRowNumber - 1) * NumRowsInBatch)));
+    foreignIdVector.setSafe(NumRowsInBatch - 1, 1);
+    ingestRoot.setRowCount(NumRowsInBatch);
+
+    VectorBatchAppender.batchAppend(keyNamesToBeDeletedVector, keyNameVector);
+  }
+
   @BeforeAll
   public static void setUp() throws Exception {
     setUpClientServer();
@@ -537,6 +585,119 @@ public void testSimplePreparedStatementUpdateResults() throws SQLException {
     }
   }
 
+  @Test
+  public void testBulkIngest() throws IOException {
+    // For bulk ingest DerbyDB requires uppercase column names
+    var keyName = new Field("KEYNAME", FieldType.nullable(new ArrowType.Utf8()), null);
+    var value = new Field("VALUE", FieldType.nullable(new ArrowType.Int(32, true)), null);
+    var foreignId = new Field("FOREIGNID", FieldType.nullable(new ArrowType.Int(32, true)), null);
+
+    Schema dataSchema = new Schema(List.of(keyName, value, foreignId));
+
+    try (final VectorSchemaRoot ingestRoot = VectorSchemaRoot.create(dataSchema, allocator);
+        final VarCharVector keyNamesToBeDeletedVector = new VarCharVector(keyName, allocator)) {
+      final VarCharVector keyNameVector = (VarCharVector) ingestRoot.getVector(0);
+      final IntVector valueVector = (IntVector) ingestRoot.getVector(1);
+      final IntVector foreignIdVector = (IntVector) ingestRoot.getVector(2);
+      ingestRoot.allocateNew();
+      keyNamesToBeDeletedVector.allocateNew();
+
+      try (PipedInputStream inPipe = new PipedInputStream(1024);
+          PipedOutputStream outPipe = new PipedOutputStream(inPipe);
+          ArrowStreamReader reader = new ArrowStreamReader(inPipe, allocator)) {
+
+        new Thread(
+                () -> {
+                  try (ArrowStreamWriter writer =
+                      new ArrowStreamWriter(ingestRoot, null, outPipe)) {
+                    writer.start();
+                    populateNext10RowsInIngestRootBatch(
+                        1,
+                        valueVector,
+                        keyNameVector,
+                        foreignIdVector,
+                        keyNamesToBeDeletedVector,
+                        ingestRoot);
+                    writer.writeBatch();
+                    populateNext10RowsInIngestRootBatch(
+                        11,
+                        valueVector,
+                        keyNameVector,
+                        foreignIdVector,
+                        keyNamesToBeDeletedVector,
+                        ingestRoot);
+                    writer.writeBatch();
+                  } catch (Exception e) {
+                    throw new RuntimeException(e);
+                  }
+                })
+            .start();
+
+        // Ingest from a stream
+        final long updatedRows =
+            sqlClient.executeIngest(
+                reader,
+                new FlightSqlClient.ExecuteIngestOptions(
+                    "INTTABLE",
+                    TableDefinitionOptions.newBuilder()
+                        .setIfExists(TableExistsOption.TABLE_EXISTS_OPTION_APPEND)
+                        .setIfNotExist(TableNotExistOption.TABLE_NOT_EXIST_OPTION_FAIL)
+                        .build(),
+                    null,
+                    null,
+                    null));
+
+        MatcherAssert.assertThat(updatedRows, is(-1L));
+
+        // Ingest directly using VectorSchemaRoot
+        populateNext10RowsInIngestRootBatch(
+            21, valueVector, keyNameVector, foreignIdVector, keyNamesToBeDeletedVector, ingestRoot);
+        sqlClient.executeIngest(
+            ingestRoot,
+            new FlightSqlClient.ExecuteIngestOptions(
+                "INTTABLE",
+                TableDefinitionOptions.newBuilder()
+                    .setIfExists(TableExistsOption.TABLE_EXISTS_OPTION_APPEND)
+                    .setIfNotExist(TableNotExistOption.TABLE_NOT_EXIST_OPTION_FAIL)
+                    .build(),
+                null,
+                null,
+                null));
+
+        try (PreparedStatement deletePrepare =
+            sqlClient.prepare("DELETE FROM INTTABLE WHERE keyName = ?")) {
+          final long deletedRows;
+          try (final VectorSchemaRoot deleteRoot = VectorSchemaRoot.of(keyNamesToBeDeletedVector)) {
+            deletePrepare.setParameters(deleteRoot);
+            deletedRows = deletePrepare.executeUpdate();
+          }
+
+          MatcherAssert.assertThat(deletedRows, is(30L));
+        }
+      }
+    }
+  }
+
+  @Test
+  public void testBulkIngestTransaction() {
+    assertThrows(
+        RuntimeException.class,
+        () -> {
+          sqlClient.executeIngest(
+              VectorSchemaRoot.create(new Schema(List.of()), allocator),
+              new FlightSqlClient.ExecuteIngestOptions(
+                  "INTTABLE",
+                  TableDefinitionOptions.newBuilder()
+                      .setIfExists(TableExistsOption.TABLE_EXISTS_OPTION_APPEND)
+                      .setIfNotExist(TableNotExistOption.TABLE_NOT_EXIST_OPTION_FAIL)
+                      .build(),
+                  null,
+                  null,
+                  null),
+              new FlightSqlClient.Transaction("123".getBytes(StandardCharsets.UTF_8)));
+        });
+  }
+
   @Test
   public void testSimplePreparedStatementUpdateResultsWithoutParameters() throws SQLException {
     try (PreparedStatement prepare =
diff --git a/java/format/pom.xml b/java/format/pom.xml
index 1121930da42d2..f767215b12807 100644
--- a/java/format/pom.xml
+++ b/java/format/pom.xml
@@ -61,6 +61,15 @@ under the License.
           </java>
         </configuration>
       </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <configuration combine.children="append">
+          <compilerArgs>
+            <arg>-Werror</arg>
+          </compilerArgs>
+        </configuration>
+      </plugin>
     </plugins>
   </build>
 </project>
diff --git a/java/format/src/main/java/module-info.java b/java/format/src/main/java/module-info.java
index bda779c91afbc..f8d740b726fde 100644
--- a/java/format/src/main/java/module-info.java
+++ b/java/format/src/main/java/module-info.java
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+// TODO(https://github.com/apache/arrow/issues/44037): Google hasn't reviewed Flatbuffers fix
+@SuppressWarnings({ "requires-automatic", "requires-transitive-automatic" })
 module org.apache.arrow.format {
   exports org.apache.arrow.flatbuf;
   requires transitive flatbuffers.java;
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
index f2590226b1a74..0d86bd9e72923 100644
--- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
@@ -62,6 +62,7 @@
 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 
+@Disabled("Disabled until GH-43981 is solved")
 public class ProjectorTest extends BaseEvaluatorTest {
 
   private Charset utf8Charset = Charset.forName("UTF-8");
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java
index 20066ed14b65a..856cc88ab9c39 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java
@@ -34,9 +34,22 @@ public interface AllocationReservation extends AutoCloseable {
    * @param nBytes the number of bytes to add
    * @return true if the addition is possible, false otherwise
    * @throws IllegalStateException if called after buffer() is used to allocate the reservation
+   * @deprecated use {@link #add(long)} instead
    */
+  @Deprecated(forRemoval = true)
   boolean add(int nBytes);
 
+  /**
+   * Add to the current reservation.
+   *
+   * <p>Adding may fail if the allocator is not allowed to consume any more space.
+   *
+   * @param nBytes the number of bytes to add
+   * @return true if the addition is possible, false otherwise
+   * @throws IllegalStateException if called after buffer() is used to allocate the reservation
+   */
+  boolean add(long nBytes);
+
   /**
    * Requests a reservation of additional space.
    *
@@ -44,9 +57,21 @@ public interface AllocationReservation extends AutoCloseable {
    *
    * @param nBytes the amount to reserve
    * @return true if the reservation can be satisfied, false otherwise
+   * @deprecated use {@link #reserve(long)} instead
    */
+  @Deprecated(forRemoval = true)
   boolean reserve(int nBytes);
 
+  /**
+   * Requests a reservation of additional space.
+   *
+   * <p>The implementation of the allocator's inner class provides this.
+   *
+   * @param nBytes the amount to reserve
+   * @return true if the reservation can be satisfied, false otherwise
+   */
+  boolean reserve(long nBytes);
+
   /**
    * Allocate a buffer whose size is the total of all the add()s made.
    *
@@ -65,6 +90,13 @@ public interface AllocationReservation extends AutoCloseable {
    */
   int getSize();
 
+  /**
+   * Get the current size of the reservation (the sum of all the add()s) as a long value.
+   *
+   * @return size of the current reservation
+   */
+  long getSizeLong();
+
   /**
    * Return whether or not the reservation has been used.
    *
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java
index a958092a5789a..775a8925ad1a9 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java
@@ -549,7 +549,7 @@ public byte readByte() {
   }
 
   /**
-   * Read dst.length bytes at readerIndex into dst byte array
+   * Read dst.length bytes at readerIndex into dst byte array.
    *
    * @param dst byte array where the data will be written
    */
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
index dd6375e910b92..20a89d0b7bf18 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
@@ -29,6 +29,7 @@
 import org.apache.arrow.memory.util.AssertionUtil;
 import org.apache.arrow.memory.util.CommonUtil;
 import org.apache.arrow.memory.util.HistoricalLog;
+import org.apache.arrow.memory.util.LargeMemoryUtil;
 import org.apache.arrow.util.Preconditions;
 import org.checkerframework.checker.initialization.qual.Initialized;
 import org.checkerframework.checker.nullness.qual.KeyFor;
@@ -860,7 +861,7 @@ RoundingPolicy getRoundingPolicy() {
   public class Reservation implements AllocationReservation {
 
     private final @Nullable HistoricalLog historicalLog;
-    private int nBytes = 0;
+    private long nBytes = 0;
     private boolean used = false;
     private boolean closed = false;
 
@@ -888,8 +889,15 @@ public Reservation() {
       }
     }
 
+    @SuppressWarnings({"removal", "InlineMeSuggester"})
+    @Deprecated(forRemoval = true)
     @Override
     public boolean add(final int nBytes) {
+      return add((long) nBytes);
+    }
+
+    @Override
+    public boolean add(final long nBytes) {
       assertOpen();
 
       Preconditions.checkArgument(nBytes >= 0, "nBytes(%d) < 0", nBytes);
@@ -906,7 +914,7 @@ public boolean add(final int nBytes) {
       // modifying this behavior so that we maintain what we reserve and what the user asked for
       // and make sure to only
       // round to power of two as necessary.
-      final int nBytesTwo = CommonUtil.nextPowerOfTwo(nBytes);
+      final long nBytesTwo = CommonUtil.nextPowerOfTwo(nBytes);
       if (!reserve(nBytesTwo)) {
         return false;
       }
@@ -929,6 +937,11 @@ public ArrowBuf allocateBuffer() {
 
     @Override
     public int getSize() {
+      return LargeMemoryUtil.checkedCastToInt(nBytes);
+    }
+
+    @Override
+    public long getSizeLong() {
       return nBytes;
     }
 
@@ -978,8 +991,15 @@ public void close() {
       closed = true;
     }
 
+    @SuppressWarnings({"removal", "InlineMeSuggester"})
+    @Deprecated(forRemoval = true)
     @Override
     public boolean reserve(int nBytes) {
+      return reserve((long) nBytes);
+    }
+
+    @Override
+    public boolean reserve(long nBytes) {
       assertOpen();
 
       final AllocationOutcome outcome = BaseAllocator.this.allocateBytes(nBytes);
@@ -999,7 +1019,7 @@ public boolean reserve(int nBytes) {
      * @param nBytes the size of the buffer requested
      * @return the buffer, or null, if the request cannot be satisfied
      */
-    private ArrowBuf allocate(int nBytes) {
+    private ArrowBuf allocate(long nBytes) {
       assertOpen();
 
       boolean success = false;
@@ -1033,7 +1053,7 @@ private ArrowBuf allocate(int nBytes) {
      *
      * @param nBytes the size of the reservation
      */
-    private void releaseReservation(int nBytes) {
+    private void releaseReservation(long nBytes) {
       assertOpen();
 
       releaseBytes(nBytes);
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java
index f8dd7e1d1cb38..50f33d3f021c7 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java
@@ -17,9 +17,9 @@
 package org.apache.arrow.memory;
 
 /**
- * Child allocator class. Only slightly different from the {@see RootAllocator}, in that these can't
- * be created directly, but must be obtained from {@link BufferAllocator#newChildAllocator(String,
- * AllocationListener, long, long)}.
+ * Child allocator class. Only slightly different from the {@link RootAllocator}, in that these
+ * can't be created directly, but must be obtained from {@link
+ * BufferAllocator#newChildAllocator(String, AllocationListener, long, long)}.
  *
  * <p>Child allocators can only be created by the root, or other children, so this class is package
  * private.
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java
index 289b10634d84e..90e8a1d5eca77 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java
@@ -34,13 +34,13 @@ public class DefaultRoundingPolicy implements RoundingPolicy {
    *
    * <p>It was copied from {@link io.netty.buffer.PooledByteBufAllocator}.
    */
-  private static final int MIN_PAGE_SIZE = 4096;
+  private static final long MIN_PAGE_SIZE = 4096;
 
-  private static final int MAX_CHUNK_SIZE = (int) (((long) Integer.MAX_VALUE + 1) / 2);
+  private static final long MAX_CHUNK_SIZE = ((long) Integer.MAX_VALUE + 1) / 2;
   private static final long DEFAULT_CHUNK_SIZE;
 
   static {
-    int defaultPageSize = Integer.getInteger("org.apache.memory.allocator.pageSize", 8192);
+    long defaultPageSize = Long.getLong("org.apache.memory.allocator.pageSize", 8192);
     try {
       validateAndCalculatePageShifts(defaultPageSize);
     } catch (Throwable t) {
@@ -60,7 +60,7 @@ public class DefaultRoundingPolicy implements RoundingPolicy {
     }
   }
 
-  private static int validateAndCalculatePageShifts(int pageSize) {
+  private static long validateAndCalculatePageShifts(long pageSize) {
     if (pageSize < MIN_PAGE_SIZE) {
       throw new IllegalArgumentException(
           "pageSize: " + pageSize + " (expected: " + MIN_PAGE_SIZE + ")");
@@ -71,17 +71,17 @@ private static int validateAndCalculatePageShifts(int pageSize) {
     }
 
     // Logarithm base 2. At this point we know that pageSize is a power of two.
-    return Integer.SIZE - 1 - Integer.numberOfLeadingZeros(pageSize);
+    return Long.SIZE - 1L - Long.numberOfLeadingZeros(pageSize);
   }
 
-  private static int validateAndCalculateChunkSize(int pageSize, int maxOrder) {
+  private static long validateAndCalculateChunkSize(long pageSize, int maxOrder) {
     if (maxOrder > 14) {
       throw new IllegalArgumentException("maxOrder: " + maxOrder + " (expected: 0-14)");
     }
 
     // Ensure the resulting chunkSize does not overflow.
-    int chunkSize = pageSize;
-    for (int i = maxOrder; i > 0; i--) {
+    long chunkSize = pageSize;
+    for (long i = maxOrder; i > 0; i--) {
       if (chunkSize > MAX_CHUNK_SIZE / 2) {
         throw new IllegalArgumentException(
             String.format(
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/SegmentRoundingPolicy.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/SegmentRoundingPolicy.java
index f501cfedd168d..89db736e6a0f9 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/SegmentRoundingPolicy.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/SegmentRoundingPolicy.java
@@ -16,6 +16,8 @@
  */
 package org.apache.arrow.memory.rounding;
 
+import com.google.errorprone.annotations.InlineMe;
+import org.apache.arrow.memory.util.LargeMemoryUtil;
 import org.apache.arrow.util.Preconditions;
 
 /** The rounding policy that each buffer size must a multiple of the segment size. */
@@ -28,7 +30,7 @@ public class SegmentRoundingPolicy implements RoundingPolicy {
    * The segment size. It must be at least {@link SegmentRoundingPolicy#MIN_SEGMENT_SIZE}, and be a
    * power of 2.
    */
-  private int segmentSize;
+  private long segmentSize;
 
   /**
    * Constructor for the segment rounding policy.
@@ -36,8 +38,22 @@ public class SegmentRoundingPolicy implements RoundingPolicy {
    * @param segmentSize the segment size.
    * @throws IllegalArgumentException if the segment size is smaller than {@link
    *     SegmentRoundingPolicy#MIN_SEGMENT_SIZE}, or is not a power of 2.
+   * @deprecated use {@link SegmentRoundingPolicy#SegmentRoundingPolicy(long)} instead.
    */
+  @Deprecated(forRemoval = true)
+  @InlineMe(replacement = "this((long) segmentSize)")
   public SegmentRoundingPolicy(int segmentSize) {
+    this((long) segmentSize);
+  }
+
+  /**
+   * Constructor for the segment rounding policy.
+   *
+   * @param segmentSize the segment size.
+   * @throws IllegalArgumentException if the segment size is smaller than {@link
+   *     SegmentRoundingPolicy#MIN_SEGMENT_SIZE}, or is not a power of 2.
+   */
+  public SegmentRoundingPolicy(long segmentSize) {
     Preconditions.checkArgument(
         segmentSize >= MIN_SEGMENT_SIZE,
         "The segment size cannot be smaller than %s",
@@ -52,7 +68,12 @@ public long getRoundedSize(long requestSize) {
     return (requestSize + (segmentSize - 1)) / segmentSize * segmentSize;
   }
 
+  @Deprecated(forRemoval = true)
   public int getSegmentSize() {
+    return LargeMemoryUtil.checkedCastToInt(segmentSize);
+  }
+
+  public long getSegmentSizeAsLong() {
     return segmentSize;
   }
 }
diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java
index a5fbc67c48f5c..87e9316964dfc 100644
--- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java
+++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java
@@ -315,7 +315,7 @@ public void testRootAllocator_createChildDontClose() throws Exception {
 
   @Test
   public void testSegmentAllocator() {
-    RoundingPolicy policy = new SegmentRoundingPolicy(1024);
+    RoundingPolicy policy = new SegmentRoundingPolicy(1024L);
     try (RootAllocator allocator =
         new RootAllocator(AllocationListener.NOOP, 1024 * 1024, policy)) {
       ArrowBuf buf = allocator.buffer(798);
@@ -334,7 +334,7 @@ public void testSegmentAllocator() {
 
   @Test
   public void testSegmentAllocator_childAllocator() {
-    RoundingPolicy policy = new SegmentRoundingPolicy(1024);
+    RoundingPolicy policy = new SegmentRoundingPolicy(1024L);
     try (RootAllocator allocator = new RootAllocator(AllocationListener.NOOP, 1024 * 1024, policy);
         BufferAllocator childAllocator = allocator.newChildAllocator("child", 0, 512 * 1024)) {
 
@@ -357,14 +357,14 @@ public void testSegmentAllocator_childAllocator() {
   @Test
   public void testSegmentAllocator_smallSegment() {
     IllegalArgumentException e =
-        assertThrows(IllegalArgumentException.class, () -> new SegmentRoundingPolicy(128));
+        assertThrows(IllegalArgumentException.class, () -> new SegmentRoundingPolicy(128L));
     assertEquals("The segment size cannot be smaller than 1024", e.getMessage());
   }
 
   @Test
   public void testSegmentAllocator_segmentSizeNotPowerOf2() {
     IllegalArgumentException e =
-        assertThrows(IllegalArgumentException.class, () -> new SegmentRoundingPolicy(4097));
+        assertThrows(IllegalArgumentException.class, () -> new SegmentRoundingPolicy(4097L));
     assertEquals("The segment size must be a power of 2", e.getMessage());
   }
 
@@ -957,7 +957,7 @@ public void testAllocator_unclaimedReservation() throws Exception {
       try (final BufferAllocator childAllocator1 =
           rootAllocator.newChildAllocator("unclaimedReservation", 0, MAX_ALLOCATION)) {
         try (final AllocationReservation reservation = childAllocator1.newReservation()) {
-          assertTrue(reservation.add(64));
+          assertTrue(reservation.add(64L));
         }
         rootAllocator.verify();
       }
@@ -972,8 +972,8 @@ public void testAllocator_claimedReservation() throws Exception {
           rootAllocator.newChildAllocator("claimedReservation", 0, MAX_ALLOCATION)) {
 
         try (final AllocationReservation reservation = childAllocator1.newReservation()) {
-          assertTrue(reservation.add(32));
-          assertTrue(reservation.add(32));
+          assertTrue(reservation.add(32L));
+          assertTrue(reservation.add(32L));
 
           final ArrowBuf arrowBuf = reservation.allocateBuffer();
           assertEquals(64, arrowBuf.capacity());
diff --git a/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java
index bdad3700cb311..9319d15aaa9a9 100644
--- a/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java
+++ b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java
@@ -38,7 +38,7 @@ public class NettyArrowBuf extends AbstractByteBuf implements AutoCloseable {
 
   private final ArrowBuf arrowBuf;
   private final ArrowByteBufAllocator arrowByteBufAllocator;
-  private int length;
+  private long length;
   private final long address;
 
   /**
@@ -47,10 +47,24 @@ public class NettyArrowBuf extends AbstractByteBuf implements AutoCloseable {
    * @param arrowBuf The buffer to wrap.
    * @param bufferAllocator The allocator for the buffer.
    * @param length The length of this buffer.
+   * @deprecated Use {@link #NettyArrowBuf(ArrowBuf, BufferAllocator, long)} instead.
    */
+  @Deprecated(forRemoval = true)
   public NettyArrowBuf(
       final ArrowBuf arrowBuf, final BufferAllocator bufferAllocator, final int length) {
-    super(length);
+    this(arrowBuf, bufferAllocator, (long) length);
+  }
+
+  /**
+   * Constructs a new instance.
+   *
+   * @param arrowBuf The buffer to wrap.
+   * @param bufferAllocator The allocator for the buffer.
+   * @param length The length of this buffer.
+   */
+  public NettyArrowBuf(
+      final ArrowBuf arrowBuf, final BufferAllocator bufferAllocator, final long length) {
+    super((int) length);
     this.arrowBuf = arrowBuf;
     this.arrowByteBufAllocator = new ArrowByteBufAllocator(bufferAllocator);
     this.length = length;
diff --git a/java/memory/memory-netty/pom.xml b/java/memory/memory-netty/pom.xml
index f2d4d2d0fe3bc..6cf573dd4d381 100644
--- a/java/memory/memory-netty/pom.xml
+++ b/java/memory/memory-netty/pom.xml
@@ -56,7 +56,6 @@ under the License.
     <dependency>
       <groupId>ch.qos.logback</groupId>
       <artifactId>logback-core</artifactId>
-      <version>1.3.14</version>
       <scope>test</scope>
     </dependency>
     <dependency>
diff --git a/java/performance/src/main/java/org/apache/arrow/memory/AllocatorBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/memory/AllocatorBenchmarks.java
index f275090aae6bf..1154809cae753 100644
--- a/java/performance/src/main/java/org/apache/arrow/memory/AllocatorBenchmarks.java
+++ b/java/performance/src/main/java/org/apache/arrow/memory/AllocatorBenchmarks.java
@@ -57,9 +57,9 @@ public void defaultAllocatorBenchmark() {
   @BenchmarkMode(Mode.AverageTime)
   @OutputTimeUnit(TimeUnit.MICROSECONDS)
   public void segmentRoundingPolicyBenchmark() {
-    final int bufferSize = 1024;
+    final long bufferSize = 1024L;
     final int numBuffers = 1024;
-    final int segmentSize = 1024;
+    final long segmentSize = 1024L;
 
     RoundingPolicy policy = new SegmentRoundingPolicy(segmentSize);
     try (RootAllocator allocator =
diff --git a/java/pom.xml b/java/pom.xml
index a73453df68fd2..808b0ad4d8cc7 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -93,11 +93,11 @@ under the License.
   <properties>
     <target.gen.source.path>${project.build.directory}/generated-sources</target.gen.source.path>
     <dep.junit.platform.version>1.9.0</dep.junit.platform.version>
-    <dep.junit.jupiter.version>5.10.3</dep.junit.jupiter.version>
-    <dep.slf4j.version>2.0.13</dep.slf4j.version>
-    <dep.guava-bom.version>33.2.1-jre</dep.guava-bom.version>
-    <dep.netty-bom.version>4.1.112.Final</dep.netty-bom.version>
-    <dep.grpc-bom.version>1.66.0</dep.grpc-bom.version>
+    <dep.junit.jupiter.version>5.11.0</dep.junit.jupiter.version>
+    <dep.slf4j.version>2.0.16</dep.slf4j.version>
+    <dep.guava-bom.version>33.3.0-jre</dep.guava-bom.version>
+    <dep.netty-bom.version>4.1.113.Final</dep.netty-bom.version>
+    <dep.grpc-bom.version>1.65.0</dep.grpc-bom.version>
     <dep.protobuf-bom.version>3.25.4</dep.protobuf-bom.version>
     <dep.jackson-bom.version>2.17.2</dep.jackson-bom.version>
     <dep.hadoop.version>3.4.0</dep.hadoop.version>
@@ -105,12 +105,13 @@ under the License.
     <dep.avro.version>1.12.0</dep.avro.version>
     <arrow.vector.classifier></arrow.vector.classifier>
     <forkCount>2</forkCount>
-    <checkstyle.version>10.17.0</checkstyle.version>
+    <checkstyle.version>10.18.1</checkstyle.version>
     <checkstyle.failOnViolation>true</checkstyle.failOnViolation>
-    <error_prone_core.version>2.30.0</error_prone_core.version>
+    <error_prone_core.version>2.31.0</error_prone_core.version>
     <mockito.core.version>5.11.0</mockito.core.version>
     <mockito.inline.version>5.2.0</mockito.inline.version>
-    <checker.framework.version>3.46.0</checker.framework.version>
+    <checker.framework.version>3.47.0</checker.framework.version>
+    <logback.version>1.5.8</logback.version>
     <doclint>none</doclint>
     <additionalparam>-Xdoclint:none</additionalparam>
     <!-- List of add-opens arg line arguments for tests -->
@@ -221,6 +222,16 @@ under the License.
         <type>pom</type>
         <scope>import</scope>
       </dependency>
+      <dependency>
+        <groupId>ch.qos.logback</groupId>
+        <artifactId>logback-classic</artifactId>
+        <version>${logback.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>ch.qos.logback</groupId>
+        <artifactId>logback-core</artifactId>
+        <version>${logback.version}</version>
+      </dependency>
     </dependencies>
   </dependencyManagement>
 
@@ -268,13 +279,13 @@ under the License.
     <dependency>
       <groupId>org.mockito</groupId>
       <artifactId>mockito-junit-jupiter</artifactId>
-      <version>5.12.0</version>
+      <version>5.13.0</version>
       <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>ch.qos.logback</groupId>
       <artifactId>logback-classic</artifactId>
-      <version>1.4.14</version>
+      <version>${logback.version}</version>
       <scope>test</scope>
     </dependency>
     <dependency>
diff --git a/java/tools/pom.xml b/java/tools/pom.xml
index 94566495dff19..d261496040b78 100644
--- a/java/tools/pom.xml
+++ b/java/tools/pom.xml
@@ -54,12 +54,11 @@ under the License.
     <dependency>
       <groupId>commons-cli</groupId>
       <artifactId>commons-cli</artifactId>
-      <version>1.8.0</version>
+      <version>1.9.0</version>
     </dependency>
     <dependency>
       <groupId>ch.qos.logback</groupId>
       <artifactId>logback-classic</artifactId>
-      <version>1.4.14</version>
       <scope>test</scope>
     </dependency>
     <!--
diff --git a/java/vector/pom.xml b/java/vector/pom.xml
index 73d76fc7306ae..eb0e39565332e 100644
--- a/java/vector/pom.xml
+++ b/java/vector/pom.xml
@@ -118,6 +118,15 @@ under the License.
           </execution>
         </executions>
       </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <configuration combine.children="append">
+          <compilerArgs>
+            <arg>-Werror</arg>
+          </compilerArgs>
+        </configuration>
+      </plugin>
       <plugin>
         <!-- generate sources from fmpp -->
         <groupId>org.apache.drill.tools</groupId>
diff --git a/java/vector/src/main/codegen/templates/BaseWriter.java b/java/vector/src/main/codegen/templates/BaseWriter.java
index 458a4df1eec82..e952d46f1f241 100644
--- a/java/vector/src/main/codegen/templates/BaseWriter.java
+++ b/java/vector/src/main/codegen/templates/BaseWriter.java
@@ -125,6 +125,7 @@ public interface StructOrListWriter {
     /**
      * @deprecated use {@link #listOfStruct()} instead.
      */
+    @Deprecated
     StructOrListWriter listoftstruct(String name);
     StructOrListWriter listOfStruct(String name);
     StructOrListWriter list(String name);
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java
index 9aa1bffb8463e..ed51f748af577 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java
@@ -41,6 +41,7 @@
 import org.apache.arrow.vector.complex.ListVector;
 import org.apache.arrow.vector.complex.ListViewVector;
 import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.StructVector;
 import org.apache.arrow.vector.complex.UnionVector;
 
 /** Visitor to compare a range of values for vectors. */
@@ -345,6 +346,20 @@ protected boolean compareDenseUnionVectors(Range range) {
     return true;
   }
 
+  private boolean compareStructVectorsInternal(
+      NonNullableStructVector leftVector, NonNullableStructVector rightVector, Range range) {
+    List<String> leftChildNames = leftVector.getChildFieldNames();
+    for (String name : leftChildNames) {
+      RangeEqualsVisitor visitor =
+          createInnerVisitor(
+              leftVector.getChild(name), rightVector.getChild(name), /*type comparator*/ null);
+      if (!visitor.rangeEquals(range)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
   protected boolean compareStructVectors(Range range) {
     NonNullableStructVector leftVector = (NonNullableStructVector) left;
     NonNullableStructVector rightVector = (NonNullableStructVector) right;
@@ -354,15 +369,49 @@ protected boolean compareStructVectors(Range range) {
       return false;
     }
 
-    for (String name : leftChildNames) {
-      RangeEqualsVisitor visitor =
-          createInnerVisitor(
-              leftVector.getChild(name), rightVector.getChild(name), /*type comparator*/ null);
-      if (!visitor.rangeEquals(range)) {
+    if (!(leftVector instanceof StructVector || rightVector instanceof StructVector)) {
+      // neither struct vector is nullable
+      return compareStructVectorsInternal(leftVector, rightVector, range);
+    }
+
+    Range subRange = new Range(0, 0, 0);
+    boolean lastIsNull = true;
+    int lastNullIndex = -1;
+    for (int i = 0; i < range.getLength(); i++) {
+      int leftIndex = range.getLeftStart() + i;
+      int rightIndex = range.getRightStart() + i;
+      boolean isLeftNull = leftVector.isNull(leftIndex);
+      boolean isRightNull = rightVector.isNull(rightIndex);
+
+      if (isLeftNull != isRightNull) {
+        // exactly one slot is null, unequal
         return false;
       }
+      if (isLeftNull) {
+        // slots are null
+        if (!lastIsNull) {
+          subRange
+              .setLeftStart(range.getLeftStart() + lastNullIndex + 1)
+              .setRightStart(range.getRightStart() + lastNullIndex + 1)
+              .setLength(i - (lastNullIndex + 1));
+          if (!compareStructVectorsInternal(leftVector, rightVector, subRange)) {
+            return false;
+          }
+        }
+        lastIsNull = true;
+        lastNullIndex = i;
+      } else {
+        // slots are not null
+        lastIsNull = false;
+      }
+    }
+    if (!lastIsNull) {
+      subRange
+          .setLeftStart(range.getLeftStart() + lastNullIndex + 1)
+          .setRightStart(range.getRightStart() + lastNullIndex + 1)
+          .setLength(range.getLength() - (lastNullIndex + 1));
+      return compareStructVectorsInternal(leftVector, rightVector, subRange);
     }
-
     return true;
   }
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowDictionaryBatch.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowDictionaryBatch.java
index a704dbdd74eaa..cee76433ea4c7 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowDictionaryBatch.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowDictionaryBatch.java
@@ -21,7 +21,7 @@
 import org.apache.arrow.flatbuf.MessageHeader;
 
 /**
- * POJO wrapper around a Dictionary Batch IPC messages
+ * POJO wrapper around a Dictionary Batch IPC messages.
  * (https://arrow.apache.org/docs/format/IPC.html#dictionary-batches)
  */
 public class ArrowDictionaryBatch implements ArrowMessage {
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java
index eca5c2d9b2a83..08da786eb272c 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java
@@ -434,17 +434,18 @@ public void testStructVectorRangeEquals() {
       NullableStructWriter writer1 = vector1.getWriter();
       writer1.allocate();
 
+      writeStructVector(writer1, 0, 0L);
       writeStructVector(writer1, 1, 10L);
       writeStructVector(writer1, 2, 20L);
       writeStructVector(writer1, 3, 30L);
       writeStructVector(writer1, 4, 40L);
       writeStructVector(writer1, 5, 50L);
-      writer1.setValueCount(5);
+      writer1.setValueCount(6);
 
       NullableStructWriter writer2 = vector2.getWriter();
       writer2.allocate();
 
-      writeStructVector(writer2, 0, 00L);
+      writeStructVector(writer2, 0, 0L);
       writeStructVector(writer2, 2, 20L);
       writeStructVector(writer2, 3, 30L);
       writeStructVector(writer2, 4, 40L);
@@ -452,7 +453,20 @@ public void testStructVectorRangeEquals() {
       writer2.setValueCount(5);
 
       RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2);
-      assertTrue(visitor.rangeEquals(new Range(1, 1, 3)));
+      assertTrue(visitor.rangeEquals(new Range(2, 1, 3)));
+
+      // different nullability but same values
+      vector1.setNull(3);
+      assertFalse(visitor.rangeEquals(new Range(2, 1, 3)));
+      // both null and same values
+      vector2.setNull(2);
+      assertTrue(visitor.rangeEquals(new Range(2, 1, 3)));
+      // both not null but different values
+      assertFalse(visitor.rangeEquals(new Range(2, 1, 4)));
+      // both null but different values
+      vector1.setNull(5);
+      vector2.setNull(4);
+      assertTrue(visitor.rangeEquals(new Range(2, 1, 4)));
     }
   }
 
diff --git a/js/package.json b/js/package.json
index cbf0670e018b6..d8a784b784d3c 100644
--- a/js/package.json
+++ b/js/package.json
@@ -72,7 +72,7 @@
     "@types/glob": "8.1.0",
     "@types/jest": "29.5.12",
     "@types/multistream": "4.1.3",
-    "@typescript-eslint/eslint-plugin": "7.12.0",
+    "@typescript-eslint/eslint-plugin": "7.18.0",
     "@typescript-eslint/parser": "7.14.1",
     "async-done": "2.0.0",
     "benny": "3.7.1",
@@ -95,7 +95,7 @@
     "gulp-terser": "2.1.0",
     "gulp-typescript": "5.0.1",
     "gulp-vinyl-size": "1.1.4",
-    "ix": "6.0.0",
+    "ix": "7.0.0",
     "jest": "29.7.0",
     "jest-silent-reporter": "0.6.0",
     "memfs": "4.9.2",
diff --git a/js/yarn.lock b/js/yarn.lock
index dc1fc99a0ecf4..e8223fba9aad2 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -1201,9 +1201,9 @@
   integrity sha512-e2BR4lsJkkRlKZ/qCHPw9ZaSxc0MVUd7gtbtaB7aMvHeJVYe8sOB8DBZkP2DtISHGSku9sCK6T6cnY0CtXrOCQ==
 
 "@swc/helpers@^0.5.11":
-  version "0.5.11"
-  resolved "https://registry.yarnpkg.com/@swc/helpers/-/helpers-0.5.11.tgz#5bab8c660a6e23c13b2d23fcd1ee44a2db1b0cb7"
-  integrity sha512-YNlnKRWF2sVojTpIyzwou9XoTNbzbzONwRhOoniEioF1AtaitTvVZblaQRrAzChWQ1bLYyYSWzM18y4WwgzJ+A==
+  version "0.5.12"
+  resolved "https://registry.yarnpkg.com/@swc/helpers/-/helpers-0.5.12.tgz#37aaca95284019eb5d2207101249435659709f4b"
+  integrity sha512-KMZNXiGibsW9kvZAO1Pam2JPTDBm+KSHMMHWdsyI/1DbIZjT2A6Gy3hblVXUMEDvUAKq+e0vL0X0o54owWji7g==
   dependencies:
     tslib "^2.4.0"
 
@@ -1421,16 +1421,16 @@
   dependencies:
     "@types/yargs-parser" "*"
 
-"@typescript-eslint/eslint-plugin@7.12.0":
-  version "7.12.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-7.12.0.tgz#f87a32e8972b8a60024f2f8f12205e7c8108bc41"
-  integrity sha512-7F91fcbuDf/d3S8o21+r3ZncGIke/+eWk0EpO21LXhDfLahriZF9CGj4fbAetEjlaBdjdSm9a6VeXbpbT6Z40Q==
+"@typescript-eslint/eslint-plugin@7.18.0":
+  version "7.18.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-7.18.0.tgz#b16d3cf3ee76bf572fdf511e79c248bdec619ea3"
+  integrity sha512-94EQTWZ40mzBc42ATNIBimBEDltSJ9RQHCC8vc/PDbxi4k8dVwUAv4o98dk50M1zB+JGFxp43FP7f8+FP8R6Sw==
   dependencies:
     "@eslint-community/regexpp" "^4.10.0"
-    "@typescript-eslint/scope-manager" "7.12.0"
-    "@typescript-eslint/type-utils" "7.12.0"
-    "@typescript-eslint/utils" "7.12.0"
-    "@typescript-eslint/visitor-keys" "7.12.0"
+    "@typescript-eslint/scope-manager" "7.18.0"
+    "@typescript-eslint/type-utils" "7.18.0"
+    "@typescript-eslint/utils" "7.18.0"
+    "@typescript-eslint/visitor-keys" "7.18.0"
     graphemer "^1.4.0"
     ignore "^5.3.1"
     natural-compare "^1.4.0"
@@ -1447,14 +1447,6 @@
     "@typescript-eslint/visitor-keys" "7.14.1"
     debug "^4.3.4"
 
-"@typescript-eslint/scope-manager@7.12.0":
-  version "7.12.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-7.12.0.tgz#259c014362de72dd34f995efe6bd8dda486adf58"
-  integrity sha512-itF1pTnN6F3unPak+kutH9raIkL3lhH1YRPGgt7QQOh43DQKVJXmWkpb+vpc/TiDHs6RSd9CTbDsc/Y+Ygq7kg==
-  dependencies:
-    "@typescript-eslint/types" "7.12.0"
-    "@typescript-eslint/visitor-keys" "7.12.0"
-
 "@typescript-eslint/scope-manager@7.14.1":
   version "7.14.1"
   resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-7.14.1.tgz#63de7a577bc6fe8ee6e412a5b85499f654b93ee5"
@@ -1463,39 +1455,33 @@
     "@typescript-eslint/types" "7.14.1"
     "@typescript-eslint/visitor-keys" "7.14.1"
 
-"@typescript-eslint/type-utils@7.12.0":
-  version "7.12.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/type-utils/-/type-utils-7.12.0.tgz#9dfaaa1972952f395ec5be4f5bbfc4d3cdc63908"
-  integrity sha512-lib96tyRtMhLxwauDWUp/uW3FMhLA6D0rJ8T7HmH7x23Gk1Gwwu8UZ94NMXBvOELn6flSPiBrCKlehkiXyaqwA==
+"@typescript-eslint/scope-manager@7.18.0":
+  version "7.18.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-7.18.0.tgz#c928e7a9fc2c0b3ed92ab3112c614d6bd9951c83"
+  integrity sha512-jjhdIE/FPF2B7Z1uzc6i3oWKbGcHb87Qw7AWj6jmEqNOfDFbJWtjt/XfwCpvNkpGWlcJaog5vTR+VV8+w9JflA==
+  dependencies:
+    "@typescript-eslint/types" "7.18.0"
+    "@typescript-eslint/visitor-keys" "7.18.0"
+
+"@typescript-eslint/type-utils@7.18.0":
+  version "7.18.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/type-utils/-/type-utils-7.18.0.tgz#2165ffaee00b1fbbdd2d40aa85232dab6998f53b"
+  integrity sha512-XL0FJXuCLaDuX2sYqZUUSOJ2sG5/i1AAze+axqmLnSkNEVMVYLF+cbwlB2w8D1tinFuSikHmFta+P+HOofrLeA==
   dependencies:
-    "@typescript-eslint/typescript-estree" "7.12.0"
-    "@typescript-eslint/utils" "7.12.0"
+    "@typescript-eslint/typescript-estree" "7.18.0"
+    "@typescript-eslint/utils" "7.18.0"
     debug "^4.3.4"
     ts-api-utils "^1.3.0"
 
-"@typescript-eslint/types@7.12.0":
-  version "7.12.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.12.0.tgz#bf208f971a8da1e7524a5d9ae2b5f15192a37981"
-  integrity sha512-o+0Te6eWp2ppKY3mLCU+YA9pVJxhUJE15FV7kxuD9jgwIAa+w/ycGJBMrYDTpVGUM/tgpa9SeMOugSabWFq7bg==
-
 "@typescript-eslint/types@7.14.1":
   version "7.14.1"
   resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.14.1.tgz#a43a540dbe5df7f2a11269683d777fc50b4350aa"
   integrity sha512-mL7zNEOQybo5R3AavY+Am7KLv8BorIv7HCYS5rKoNZKQD9tsfGUpO4KdAn3sSUvTiS4PQkr2+K0KJbxj8H9NDg==
 
-"@typescript-eslint/typescript-estree@7.12.0":
-  version "7.12.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-7.12.0.tgz#e6c1074f248b3db6573ab6a7c47a39c4cd498ff9"
-  integrity sha512-5bwqLsWBULv1h6pn7cMW5dXX/Y2amRqLaKqsASVwbBHMZSnHqE/HN4vT4fE0aFsiwxYvr98kqOWh1a8ZKXalCQ==
-  dependencies:
-    "@typescript-eslint/types" "7.12.0"
-    "@typescript-eslint/visitor-keys" "7.12.0"
-    debug "^4.3.4"
-    globby "^11.1.0"
-    is-glob "^4.0.3"
-    minimatch "^9.0.4"
-    semver "^7.6.0"
-    ts-api-utils "^1.3.0"
+"@typescript-eslint/types@7.18.0":
+  version "7.18.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.18.0.tgz#b90a57ccdea71797ffffa0321e744f379ec838c9"
+  integrity sha512-iZqi+Ds1y4EDYUtlOOC+aUmxnE9xS/yCigkjA7XpTKV6nCBd3Hp/PRGGmdwnfkV2ThMyYldP1wRpm/id99spTQ==
 
 "@typescript-eslint/typescript-estree@7.14.1":
   version "7.14.1"
@@ -1511,23 +1497,29 @@
     semver "^7.6.0"
     ts-api-utils "^1.3.0"
 
-"@typescript-eslint/utils@7.12.0", "@typescript-eslint/utils@^6.0.0 || ^7.0.0":
-  version "7.12.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/utils/-/utils-7.12.0.tgz#c6e58fd7f724cdccc848f71e388ad80cbdb95dd0"
-  integrity sha512-Y6hhwxwDx41HNpjuYswYp6gDbkiZ8Hin9Bf5aJQn1bpTs3afYY4GX+MPYxma8jtoIV2GRwTM/UJm/2uGCVv+DQ==
+"@typescript-eslint/typescript-estree@7.18.0":
+  version "7.18.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-7.18.0.tgz#b5868d486c51ce8f312309ba79bdb9f331b37931"
+  integrity sha512-aP1v/BSPnnyhMHts8cf1qQ6Q1IFwwRvAQGRvBFkWlo3/lH29OXA3Pts+c10nxRxIBrDnoMqzhgdwVe5f2D6OzA==
   dependencies:
-    "@eslint-community/eslint-utils" "^4.4.0"
-    "@typescript-eslint/scope-manager" "7.12.0"
-    "@typescript-eslint/types" "7.12.0"
-    "@typescript-eslint/typescript-estree" "7.12.0"
+    "@typescript-eslint/types" "7.18.0"
+    "@typescript-eslint/visitor-keys" "7.18.0"
+    debug "^4.3.4"
+    globby "^11.1.0"
+    is-glob "^4.0.3"
+    minimatch "^9.0.4"
+    semver "^7.6.0"
+    ts-api-utils "^1.3.0"
 
-"@typescript-eslint/visitor-keys@7.12.0":
-  version "7.12.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-7.12.0.tgz#c053b55a996679528beeedd8e565710ce1ae1ad3"
-  integrity sha512-uZk7DevrQLL3vSnfFl5bj4sL75qC9D6EdjemIdbtkuUmIheWpuiiylSY01JxJE7+zGrOWDZrp1WxOuDntvKrHQ==
+"@typescript-eslint/utils@7.18.0", "@typescript-eslint/utils@^6.0.0 || ^7.0.0":
+  version "7.18.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/utils/-/utils-7.18.0.tgz#bca01cde77f95fc6a8d5b0dbcbfb3d6ca4be451f"
+  integrity sha512-kK0/rNa2j74XuHVcoCZxdFBMF+aq/vH83CXAOHieC+2Gis4mF8jJXT5eAfyD3K0sAxtPuwxaIOIOvhwzVDt/kw==
   dependencies:
-    "@typescript-eslint/types" "7.12.0"
-    eslint-visitor-keys "^3.4.3"
+    "@eslint-community/eslint-utils" "^4.4.0"
+    "@typescript-eslint/scope-manager" "7.18.0"
+    "@typescript-eslint/types" "7.18.0"
+    "@typescript-eslint/typescript-estree" "7.18.0"
 
 "@typescript-eslint/visitor-keys@7.14.1":
   version "7.14.1"
@@ -1537,6 +1529,14 @@
     "@typescript-eslint/types" "7.14.1"
     eslint-visitor-keys "^3.4.3"
 
+"@typescript-eslint/visitor-keys@7.18.0":
+  version "7.18.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-7.18.0.tgz#0564629b6124d67607378d0f0332a0495b25e7d7"
+  integrity sha512-cDF0/Gf81QpY3xYyJKDV14Zwdmid5+uuENhjH2EqFaF0ni+yAyq/LzMaIJdhNJXZI7uLzwIlA+V7oWoyn6Curg==
+  dependencies:
+    "@typescript-eslint/types" "7.18.0"
+    eslint-visitor-keys "^3.4.3"
+
 "@ungap/structured-clone@^1.2.0":
   version "1.2.0"
   resolved "https://registry.yarnpkg.com/@ungap/structured-clone/-/structured-clone-1.2.0.tgz#756641adb587851b5ccb3e095daf27ae581c8406"
@@ -4381,10 +4381,10 @@ istextorbinary@^3.0.0:
     binaryextensions "^2.2.0"
     textextensions "^3.2.0"
 
-ix@6.0.0:
-  version "6.0.0"
-  resolved "https://registry.yarnpkg.com/ix/-/ix-6.0.0.tgz#c1875523f8090c7146dc3ac3412a763663887f27"
-  integrity sha512-B/KeYkHtOWbr3ttckqWT9uha2ixw9fGVDxX+DwVXhO+P5eOhyCadt+aC30hRBvG+do+tbI3xbYDMYN6dp1C4Vw==
+ix@7.0.0:
+  version "7.0.0"
+  resolved "https://registry.yarnpkg.com/ix/-/ix-7.0.0.tgz#df4c9a242614178f0836aa3cd1965441fae301d1"
+  integrity sha512-hgVnphYh+ytIEsmjeym5wP2GPaM3+RZf7zCrZXE7gjwwmpIBEg0t6GRX7BbdXzTosXCstEAzdPxpyplGBYnIbw==
   dependencies:
     "@types/node" ">=13.7.4"
     tslib "^2.6.2"
diff --git a/matlab/src/cpp/arrow/matlab/api/visibility.h b/matlab/src/cpp/arrow/matlab/api/visibility.h
index 1570de06c4e17..9c5cc28565113 100644
--- a/matlab/src/cpp/arrow/matlab/api/visibility.h
+++ b/matlab/src/cpp/arrow/matlab/api/visibility.h
@@ -18,11 +18,11 @@
 #pragma once
 
 #if defined(_WIN32) || defined(__CYGWIN__)
-#ifdef ARROW_MATLAB_EXPORTING
-#define ARROW_MATLAB_EXPORT __declspec(dllexport)
-#else
-#define ARROW_MATLAB_EXPORT __declspec(dllimport)
-#endif
+#  ifdef ARROW_MATLAB_EXPORTING
+#    define ARROW_MATLAB_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_MATLAB_EXPORT __declspec(dllimport)
+#  endif
 #else  // Not Windows
-#define ARROW_MATLAB_EXPORT __attribute__((visibility("default")))
+#  define ARROW_MATLAB_EXPORT __attribute__((visibility("default")))
 #endif
diff --git a/python/.gitignore b/python/.gitignore
index ce7f065412728..fbc3b192433b9 100644
--- a/python/.gitignore
+++ b/python/.gitignore
@@ -17,6 +17,7 @@ Testing/
 *.cpp
 pyarrow/lib.h
 pyarrow/*_api.h
+pyarrow/_cuda.h
 pyarrow/_generated_version.py
 cython_debug
 
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 5d5eeaf8157b4..912719b20f0e4 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -21,6 +21,13 @@
 cmake_minimum_required(VERSION 3.16)
 project(pyarrow)
 
+# This is needed for 3.13 free-threading. CMake used to add Python
+# include directories with `-isystem`, which led to some Python-internal
+# includes to resolve to normal 3.13 includes (cause -isystem includes
+# are searched after system directories), instead of 3.13-freethreading,
+# which in turn meant that Py_GIL_DISABLED was not set.
+set(CMAKE_NO_SYSTEM_FROM_IMPORTED ON)
+
 set(PYARROW_VERSION "18.0.0-SNAPSHOT")
 string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" PYARROW_BASE_VERSION "${PYARROW_VERSION}")
 
@@ -84,7 +91,7 @@ set(CMAKE_MACOSX_RPATH 1)
 if(DEFINED ENV{MACOSX_DEPLOYMENT_TARGET})
   set(CMAKE_OSX_DEPLOYMENT_TARGET $ENV{MACOSX_DEPLOYMENT_TARGET})
 else()
-  set(CMAKE_OSX_DEPLOYMENT_TARGET 10.15)
+  set(CMAKE_OSX_DEPLOYMENT_TARGET 12.0)
 endif()
 
 # Generate a Clang compile_commands.json "compilation database" file for use
@@ -339,17 +346,17 @@ set(PYARROW_CPP_SRCS
     ${PYARROW_CPP_SOURCE_DIR}/gdb.cc
     ${PYARROW_CPP_SOURCE_DIR}/helpers.cc
     ${PYARROW_CPP_SOURCE_DIR}/inference.cc
-    ${PYARROW_CPP_SOURCE_DIR}/init.cc
     ${PYARROW_CPP_SOURCE_DIR}/io.cc
     ${PYARROW_CPP_SOURCE_DIR}/ipc.cc
     ${PYARROW_CPP_SOURCE_DIR}/numpy_convert.cc
+    ${PYARROW_CPP_SOURCE_DIR}/numpy_init.cc
     ${PYARROW_CPP_SOURCE_DIR}/numpy_to_arrow.cc
     ${PYARROW_CPP_SOURCE_DIR}/python_test.cc
     ${PYARROW_CPP_SOURCE_DIR}/python_to_arrow.cc
     ${PYARROW_CPP_SOURCE_DIR}/pyarrow.cc
     ${PYARROW_CPP_SOURCE_DIR}/serialize.cc
     ${PYARROW_CPP_SOURCE_DIR}/udf.cc)
-set_source_files_properties(${PYARROW_CPP_SOURCE_DIR}/init.cc
+set_source_files_properties(${PYARROW_CPP_SOURCE_DIR}/numpy_init.cc
                             PROPERTIES SKIP_PRECOMPILE_HEADERS ON
                                        SKIP_UNITY_BUILD_INCLUSION ON)
 
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 807bcdc315036..d31c93119b73a 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -172,9 +172,7 @@ def print_entry(label, value):
                          union, sparse_union, dense_union,
                          dictionary,
                          run_end_encoded,
-                         fixed_shape_tensor,
-                         opaque,
-                         bool8,
+                         bool8, fixed_shape_tensor, opaque, uuid,
                          field,
                          type_for_alias,
                          DataType, DictionaryType, StructType,
@@ -184,8 +182,9 @@ def print_entry(label, value):
                          TimestampType, Time32Type, Time64Type, DurationType,
                          FixedSizeBinaryType, Decimal128Type, Decimal256Type,
                          BaseExtensionType, ExtensionType,
-                         RunEndEncodedType, FixedShapeTensorType, OpaqueType,
-                         Bool8Type, PyExtensionType, UnknownExtensionType,
+                         RunEndEncodedType, Bool8Type, FixedShapeTensorType,
+                         OpaqueType, UuidType,
+                         PyExtensionType, UnknownExtensionType,
                          register_extension_type, unregister_extension_type,
                          DictionaryMemo,
                          KeyValueMetadata,
@@ -218,8 +217,9 @@ def print_entry(label, value):
                          Time32Array, Time64Array, DurationArray,
                          MonthDayNanoIntervalArray,
                          Decimal128Array, Decimal256Array, StructArray, ExtensionArray,
-                         RunEndEncodedArray, FixedShapeTensorArray, OpaqueArray,
-                         Bool8Array, scalar, NA, _NULL as NULL, Scalar,
+                         RunEndEncodedArray, Bool8Array, FixedShapeTensorArray,
+                         OpaqueArray, UuidArray,
+                         scalar, NA, _NULL as NULL, Scalar,
                          NullScalar, BooleanScalar,
                          Int8Scalar, Int16Scalar, Int32Scalar, Int64Scalar,
                          UInt8Scalar, UInt16Scalar, UInt32Scalar, UInt64Scalar,
@@ -235,8 +235,8 @@ def print_entry(label, value):
                          StringScalar, LargeStringScalar, StringViewScalar,
                          FixedSizeBinaryScalar, DictionaryScalar,
                          MapScalar, StructScalar, UnionScalar,
-                         RunEndEncodedScalar, ExtensionScalar,
-                         FixedShapeTensorScalar, OpaqueScalar, Bool8Scalar)
+                         RunEndEncodedScalar, Bool8Scalar, ExtensionScalar,
+                         FixedShapeTensorScalar, OpaqueScalar, UuidScalar)
 
 # Buffers, allocation
 from pyarrow.lib import (DeviceAllocationType, Device, MemoryManager,
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index 0e860eaf4c6b8..d39120934d5fd 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -33,7 +33,10 @@ from pyarrow.util import _DEPR_MSG
 from libcpp cimport bool as c_bool
 
 import inspect
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import warnings
 
 
@@ -43,6 +46,11 @@ _substrait_msg = (
 )
 
 
+SUPPORTED_INPUT_ARR_TYPES = (list, tuple)
+if np is not None:
+    SUPPORTED_INPUT_ARR_TYPES += (np.ndarray, )
+
+
 def _pas():
     global __pas
     if __pas is None:
@@ -473,7 +481,7 @@ cdef class MetaFunction(Function):
 
 cdef _pack_compute_args(object values, vector[CDatum]* out):
     for val in values:
-        if isinstance(val, (list, np.ndarray)):
+        if isinstance(val, SUPPORTED_INPUT_ARR_TYPES):
             val = lib.asarray(val)
 
         if isinstance(val, Array):
@@ -2189,7 +2197,7 @@ class QuantileOptions(_QuantileOptions):
 
     def __init__(self, q=0.5, *, interpolation="linear", skip_nulls=True,
                  min_count=0):
-        if not isinstance(q, (list, tuple, np.ndarray)):
+        if not isinstance(q, SUPPORTED_INPUT_ARR_TYPES):
             q = [q]
         self._set_options(q, interpolation, skip_nulls, min_count)
 
@@ -2222,7 +2230,7 @@ class TDigestOptions(_TDigestOptions):
 
     def __init__(self, q=0.5, *, delta=100, buffer_size=500, skip_nulls=True,
                  min_count=0):
-        if not isinstance(q, (list, tuple, np.ndarray)):
+        if not isinstance(q, SUPPORTED_INPUT_ARR_TYPES):
             q = [q]
         self._set_options(q, delta, buffer_size, skip_nulls, min_count)
 
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 77d6c9c06d2de..93c44297590e8 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -50,6 +50,8 @@ cdef _sequence_to_array(object sequence, object mask, object size,
 
 
 cdef inline _is_array_like(obj):
+    if np is None:
+        return False
     if isinstance(obj, np.ndarray):
         return True
     return pandas_api._have_pandas_internal() and pandas_api.is_array_like(obj)
@@ -1608,6 +1610,9 @@ cdef class Array(_PandasConvertible):
         """
         self._assert_cpu()
 
+        if np is None:
+            raise ImportError(
+                "Cannot return a numpy.ndarray if NumPy is not present")
         cdef:
             PyObject* out
             PandasOptions c_options
@@ -4338,6 +4343,12 @@ cdef class ExtensionArray(Array):
         return result
 
 
+class UuidArray(ExtensionArray):
+    """
+    Concrete class for Arrow arrays of UUID data type.
+    """
+
+
 cdef class FixedShapeTensorArray(ExtensionArray):
     """
     Concrete class for fixed shape tensor extension arrays.
diff --git a/python/pyarrow/builder.pxi b/python/pyarrow/builder.pxi
index 2af39e2c589e6..fbab5bbdb5a01 100644
--- a/python/pyarrow/builder.pxi
+++ b/python/pyarrow/builder.pxi
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import math
+
 
 cdef class StringBuilder(_Weakrefable):
     """
@@ -42,10 +44,10 @@ cdef class StringBuilder(_Weakrefable):
         value : string/bytes or np.nan/None
             The value to append to the string array builder.
         """
-        if value is None or value is np.nan:
-            self.builder.get().AppendNull()
-        elif isinstance(value, (bytes, str)):
+        if isinstance(value, (bytes, str)):
             self.builder.get().Append(tobytes(value))
+        elif value is None or math.isnan(value):
+            self.builder.get().AppendNull()
         else:
             raise TypeError('StringBuilder only accepts string objects')
 
@@ -108,10 +110,10 @@ cdef class StringViewBuilder(_Weakrefable):
         value : string/bytes or np.nan/None
             The value to append to the string array builder.
         """
-        if value is None or value is np.nan:
-            self.builder.get().AppendNull()
-        elif isinstance(value, (bytes, str)):
+        if isinstance(value, (bytes, str)):
             self.builder.get().Append(tobytes(value))
+        elif value is None or math.isnan(value):
+            self.builder.get().AppendNull()
         else:
             raise TypeError('StringViewBuilder only accepts string objects')
 
diff --git a/python/pyarrow/conftest.py b/python/pyarrow/conftest.py
index 29c850c142da1..10a2e72f923cb 100644
--- a/python/pyarrow/conftest.py
+++ b/python/pyarrow/conftest.py
@@ -25,7 +25,6 @@
 from pyarrow.tests.util import windows_has_tzdata
 import sys
 
-import numpy as np
 
 groups = [
     'acero',
@@ -46,6 +45,8 @@
     'lz4',
     'memory_leak',
     'nopandas',
+    'nonumpy',
+    'numpy',
     'orc',
     'pandas',
     'parquet',
@@ -81,6 +82,8 @@
     'lz4': Codec.is_available('lz4'),
     'memory_leak': False,
     'nopandas': False,
+    'nonumpy': False,
+    'numpy': False,
     'orc': False,
     'pandas': False,
     'parquet': False,
@@ -158,6 +161,12 @@
 except ImportError:
     defaults['nopandas'] = True
 
+try:
+    import numpy  # noqa
+    defaults['numpy'] = True
+except ImportError:
+    defaults['nonumpy'] = True
+
 try:
     import pyarrow.parquet  # noqa
     defaults['parquet'] = True
@@ -327,6 +336,7 @@ def unary_agg_func_fixture():
     Register a unary aggregate function (mean)
     """
     from pyarrow import compute as pc
+    import numpy as np
 
     def func(ctx, x):
         return pa.scalar(np.nanmean(x))
@@ -352,6 +362,7 @@ def varargs_agg_func_fixture():
     Register a unary aggregate function
     """
     from pyarrow import compute as pc
+    import numpy as np
 
     def func(ctx, *args):
         sum = 0.0
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 6f510cfc0c06c..8e6922a912a32 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -983,6 +983,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
 
         CResult[vector[shared_ptr[CChunkedArray]]] Flatten(CMemoryPool* pool)
 
+        c_bool is_cpu() const
+
         CStatus Validate() const
         CStatus ValidateFull() const
 
@@ -2865,6 +2867,16 @@ cdef extern from "arrow/extension_type.h" namespace "arrow":
         shared_ptr[CArray] storage()
 
 
+cdef extern from "arrow/extension/uuid.h" namespace "arrow::extension" nogil:
+    cdef cppclass CUuidType" arrow::extension::UuidType"(CExtensionType):
+
+        @staticmethod
+        CResult[shared_ptr[CDataType]] Make()
+
+    cdef cppclass CUuidArray" arrow::extension::UuidArray"(CExtensionArray):
+        pass
+
+
 cdef extern from "arrow/extension/fixed_shape_tensor.h" namespace "arrow::extension" nogil:
     cdef cppclass CFixedShapeTensorType \
             " arrow::extension::FixedShapeTensorType"(CExtensionType):
diff --git a/python/pyarrow/includes/libarrow_python.pxd b/python/pyarrow/includes/libarrow_python.pxd
index 9fcc97aaf0a9c..96725c9c3862b 100644
--- a/python/pyarrow/includes/libarrow_python.pxd
+++ b/python/pyarrow/includes/libarrow_python.pxd
@@ -248,7 +248,7 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py::internal" nogil:
     CResult[PyObject*] StringToTzinfo(c_string)
 
 
-cdef extern from "arrow/python/init.h":
+cdef extern from "arrow/python/numpy_init.h" namespace "arrow::py":
     int arrow_init_numpy() except -1
 
 
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index a7c3b496a0045..25a7945dc3ddc 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -222,6 +222,9 @@ cdef class OpaqueType(BaseExtensionType):
     cdef:
         const COpaqueType* opaque_ext_type
 
+cdef class UuidType(BaseExtensionType):
+    cdef:
+        const CUuidType* uuid_ext_type
 
 cdef class PyExtensionType(ExtensionType):
     pass
@@ -503,6 +506,8 @@ cdef class ChunkedArray(_PandasConvertible):
     cdef:
         shared_ptr[CChunkedArray] sp_chunked_array
         CChunkedArray* chunked_array
+        c_bool _is_cpu
+        c_bool _init_is_cpu
 
     cdef readonly:
         # To allow Table to propagate metadata to pandas.Series
@@ -513,13 +518,15 @@ cdef class ChunkedArray(_PandasConvertible):
 
 
 cdef class _Tabular(_PandasConvertible):
-    pass
+    cdef void _assert_cpu(self) except *
 
 
 cdef class Table(_Tabular):
     cdef:
         shared_ptr[CTable] sp_table
         CTable* table
+        c_bool _is_cpu
+        c_bool _init_is_cpu
 
     cdef void init(self, const shared_ptr[CTable]& table)
 
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index c72841c299566..6b82eb6566896 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -21,7 +21,10 @@
 
 import datetime
 import decimal as _pydecimal
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import os
 import sys
 
@@ -32,8 +35,11 @@ from pyarrow.includes.common cimport PyObject_to_object
 cimport pyarrow.includes.libarrow_python as libarrow_python
 cimport cpython as cp
 
-# Initialize NumPy C API
-arrow_init_numpy()
+
+# Initialize NumPy C API only if numpy was able to be imported
+if np is not None:
+    arrow_init_numpy()
+
 # Initialize PyArrow C++ API
 # (used from some of our C++ code, see e.g. ARROW-5260)
 import_pyarrow()
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index fcccf564fc619..7fbde36bc23e9 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -30,13 +30,17 @@
 import re
 import warnings
 
-import numpy as np
-
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pyarrow as pa
 from pyarrow.lib import _pandas_api, frombytes, is_threading_enabled  # noqa
 
 
 _logical_type_map = {}
+_numpy_logical_type_map = {}
+_pandas_logical_type_map = {}
 
 
 def get_logical_type_map():
@@ -85,27 +89,32 @@ def get_logical_type(arrow_type):
         return 'object'
 
 
-_numpy_logical_type_map = {
-    np.bool_: 'bool',
-    np.int8: 'int8',
-    np.int16: 'int16',
-    np.int32: 'int32',
-    np.int64: 'int64',
-    np.uint8: 'uint8',
-    np.uint16: 'uint16',
-    np.uint32: 'uint32',
-    np.uint64: 'uint64',
-    np.float32: 'float32',
-    np.float64: 'float64',
-    'datetime64[D]': 'date',
-    np.str_: 'string',
-    np.bytes_: 'bytes',
-}
+def get_numpy_logical_type_map():
+    global _numpy_logical_type_map
+    if not _numpy_logical_type_map:
+        _numpy_logical_type_map.update({
+            np.bool_: 'bool',
+            np.int8: 'int8',
+            np.int16: 'int16',
+            np.int32: 'int32',
+            np.int64: 'int64',
+            np.uint8: 'uint8',
+            np.uint16: 'uint16',
+            np.uint32: 'uint32',
+            np.uint64: 'uint64',
+            np.float32: 'float32',
+            np.float64: 'float64',
+            'datetime64[D]': 'date',
+            np.str_: 'string',
+            np.bytes_: 'bytes',
+        })
+    return _numpy_logical_type_map
 
 
 def get_logical_type_from_numpy(pandas_collection):
+    numpy_logical_type_map = get_numpy_logical_type_map()
     try:
-        return _numpy_logical_type_map[pandas_collection.dtype.type]
+        return numpy_logical_type_map[pandas_collection.dtype.type]
     except KeyError:
         if hasattr(pandas_collection.dtype, 'tz'):
             return 'datetimetz'
@@ -1023,18 +1032,23 @@ def _is_generated_index_name(name):
     return re.match(pattern, name) is not None
 
 
-_pandas_logical_type_map = {
-    'date': 'datetime64[D]',
-    'datetime': 'datetime64[ns]',
-    'datetimetz': 'datetime64[ns]',
-    'unicode': np.str_,
-    'bytes': np.bytes_,
-    'string': np.str_,
-    'integer': np.int64,
-    'floating': np.float64,
-    'decimal': np.object_,
-    'empty': np.object_,
-}
+def get_pandas_logical_type_map():
+    global _pandas_logical_type_map
+
+    if not _pandas_logical_type_map:
+        _pandas_logical_type_map.update({
+            'date': 'datetime64[D]',
+            'datetime': 'datetime64[ns]',
+            'datetimetz': 'datetime64[ns]',
+            'unicode': np.str_,
+            'bytes': np.bytes_,
+            'string': np.str_,
+            'integer': np.int64,
+            'floating': np.float64,
+            'decimal': np.object_,
+            'empty': np.object_,
+        })
+    return _pandas_logical_type_map
 
 
 def _pandas_type_to_numpy_type(pandas_type):
@@ -1050,8 +1064,9 @@ def _pandas_type_to_numpy_type(pandas_type):
     dtype : np.dtype
         The dtype that corresponds to `pandas_type`.
     """
+    pandas_logical_type_map = get_pandas_logical_type_map()
     try:
-        return _pandas_logical_type_map[pandas_type]
+        return pandas_logical_type_map[pandas_type]
     except KeyError:
         if 'mixed' in pandas_type:
             # catching 'mixed', 'mixed-integer' and 'mixed-integer-float'
diff --git a/python/pyarrow/public-api.pxi b/python/pyarrow/public-api.pxi
index 19a26bd6c683d..d3e2ff2e99d91 100644
--- a/python/pyarrow/public-api.pxi
+++ b/python/pyarrow/public-api.pxi
@@ -120,14 +120,17 @@ cdef api object pyarrow_wrap_data_type(
     elif type.get().id() == _Type_EXTENSION:
         ext_type = <const CExtensionType*> type.get()
         cpy_ext_type = dynamic_cast[_CPyExtensionTypePtr](ext_type)
+        extension_name = ext_type.extension_name()
         if cpy_ext_type != nullptr:
             return cpy_ext_type.GetInstance()
-        elif ext_type.extension_name() == b"arrow.fixed_shape_tensor":
+        elif extension_name == b"arrow.bool8":
+            out = Bool8Type.__new__(Bool8Type)
+        elif extension_name == b"arrow.fixed_shape_tensor":
             out = FixedShapeTensorType.__new__(FixedShapeTensorType)
-        elif ext_type.extension_name() == b"arrow.opaque":
+        elif extension_name == b"arrow.opaque":
             out = OpaqueType.__new__(OpaqueType)
-        elif ext_type.extension_name() == b"arrow.bool8":
-            out = Bool8Type.__new__(Bool8Type)
+        elif extension_name == b"arrow.uuid":
+            out = UuidType.__new__(UuidType)
         else:
             out = BaseExtensionType.__new__(BaseExtensionType)
     else:
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index 72ae2aee5f8b3..68f77832c4342 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -17,6 +17,7 @@
 
 import collections
 from cython cimport binding
+from uuid import UUID
 
 
 cdef class Scalar(_Weakrefable):
@@ -1043,6 +1044,15 @@ cdef class ExtensionScalar(Scalar):
         return pyarrow_wrap_scalar(<shared_ptr[CScalar]> sp_scalar)
 
 
+class UuidScalar(ExtensionScalar):
+    """
+    Concrete class for Uuid extension scalar.
+    """
+
+    def as_py(self):
+        return None if self.value is None else UUID(bytes=self.value.as_py())
+
+
 cdef class FixedShapeTensorScalar(ExtensionScalar):
     """
     Concrete class for fixed shape tensor extension scalar.
diff --git a/python/pyarrow/src/arrow/python/datetime.h b/python/pyarrow/src/arrow/python/datetime.h
index 7346d6bc67791..9b21eeb434217 100644
--- a/python/pyarrow/src/arrow/python/datetime.h
+++ b/python/pyarrow/src/arrow/python/datetime.h
@@ -35,9 +35,9 @@
 // Instead, we redefine PyDateTimeAPI to point to a global variable,
 // which is initialized once by calling InitDatetime().
 #ifdef PYPY_VERSION
-#include "datetime.h"
+#  include "datetime.h"
 #else
-#define PyDateTimeAPI ::arrow::py::internal::datetime_api
+#  define PyDateTimeAPI ::arrow::py::internal::datetime_api
 #endif
 
 namespace arrow {
diff --git a/python/pyarrow/src/arrow/python/deserialize.h b/python/pyarrow/src/arrow/python/deserialize.h
index 41b6a13a38875..fe1d73622a3db 100644
--- a/python/pyarrow/src/arrow/python/deserialize.h
+++ b/python/pyarrow/src/arrow/python/deserialize.h
@@ -24,6 +24,7 @@
 #include "arrow/python/serialize.h"
 #include "arrow/python/visibility.h"
 #include "arrow/status.h"
+#include "arrow/util/macros.h"
 
 namespace arrow {
 
@@ -55,6 +56,7 @@ struct ARROW_PYTHON_EXPORT SparseTensorCounts {
 /// \param[in] src a RandomAccessFile
 /// \param[out] out the reconstructed data
 /// \return Status
+ARROW_DEPRECATED("Deprecated in 18.0.0. Will be removed in 20.0.0")
 ARROW_PYTHON_EXPORT
 Status ReadSerializedObject(io::RandomAccessFile* src, SerializedPyObject* out);
 
@@ -70,6 +72,7 @@ Status ReadSerializedObject(io::RandomAccessFile* src, SerializedPyObject* out);
 /// num_csf_tensors * (2 * ndim_csf + 3) + num_buffers in length
 /// \param[out] out the reconstructed object
 /// \return Status
+ARROW_DEPRECATED("Deprecated in 18.0.0. Will be removed in 20.0.0")
 ARROW_PYTHON_EXPORT
 Status GetSerializedFromComponents(int num_tensors,
                                    const SparseTensorCounts& num_sparse_tensors,
@@ -88,6 +91,7 @@ Status GetSerializedFromComponents(int num_tensors,
 /// \param[out] out The returned object
 /// \return Status
 /// This acquires the GIL
+ARROW_DEPRECATED("Deprecated in 18.0.0. Will be removed in 20.0.0")
 ARROW_PYTHON_EXPORT
 Status DeserializeObject(PyObject* context, const SerializedPyObject& object,
                          PyObject* base, PyObject** out);
@@ -96,9 +100,11 @@ Status DeserializeObject(PyObject* context, const SerializedPyObject& object,
 /// \param[in] object Object to deserialize
 /// \param[out] out The deserialized tensor
 /// \return Status
+ARROW_DEPRECATED("Deprecated in 18.0.0. Will be removed in 20.0.0")
 ARROW_PYTHON_EXPORT
 Status DeserializeNdarray(const SerializedPyObject& object, std::shared_ptr<Tensor>* out);
 
+ARROW_DEPRECATED("Deprecated in 18.0.0. Will be removed in 20.0.0")
 ARROW_PYTHON_EXPORT
 Status NdarrayFromBuffer(std::shared_ptr<Buffer> src, std::shared_ptr<Tensor>* out);
 
diff --git a/python/pyarrow/src/arrow/python/flight.h b/python/pyarrow/src/arrow/python/flight.h
index 82d93711e55fb..5243258495778 100644
--- a/python/pyarrow/src/arrow/python/flight.h
+++ b/python/pyarrow/src/arrow/python/flight.h
@@ -26,24 +26,24 @@
 #include "arrow/python/common.h"
 
 #if defined(_WIN32) || defined(__CYGWIN__)  // Windows
-#if defined(_MSC_VER)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
-
-#ifdef ARROW_PYTHON_STATIC
-#define ARROW_PYFLIGHT_EXPORT
-#elif defined(ARROW_PYFLIGHT_EXPORTING)
-#define ARROW_PYFLIGHT_EXPORT __declspec(dllexport)
-#else
-#define ARROW_PYFLIGHT_EXPORT __declspec(dllimport)
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
+
+#  ifdef ARROW_PYTHON_STATIC
+#    define ARROW_PYFLIGHT_EXPORT
+#  elif defined(ARROW_PYFLIGHT_EXPORTING)
+#    define ARROW_PYFLIGHT_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_PYFLIGHT_EXPORT __declspec(dllimport)
+#  endif
 
 #else  // Not Windows
-#ifndef ARROW_PYFLIGHT_EXPORT
-#define ARROW_PYFLIGHT_EXPORT __attribute__((visibility("default")))
-#endif
+#  ifndef ARROW_PYFLIGHT_EXPORT
+#    define ARROW_PYFLIGHT_EXPORT __attribute__((visibility("default")))
+#  endif
 #endif  // Non-Windows
 
 namespace arrow {
diff --git a/python/pyarrow/src/arrow/python/gdb.cc b/python/pyarrow/src/arrow/python/gdb.cc
index 6941769e4efe8..7c58bae3342c2 100644
--- a/python/pyarrow/src/arrow/python/gdb.cc
+++ b/python/pyarrow/src/arrow/python/gdb.cc
@@ -22,7 +22,7 @@
 #include "arrow/array.h"
 #include "arrow/chunked_array.h"
 #include "arrow/datum.h"
-#include "arrow/extension_type.h"
+#include "arrow/extension/uuid.h"
 #include "arrow/ipc/json_simple.h"
 #include "arrow/python/gdb.h"
 #include "arrow/record_batch.h"
@@ -37,6 +37,8 @@
 
 namespace arrow {
 
+using extension::uuid;
+using extension::UuidType;
 using ipc::internal::json::ArrayFromJSON;
 using ipc::internal::json::ChunkedArrayFromJSON;
 using ipc::internal::json::ScalarFromJSON;
@@ -56,29 +58,6 @@ class CustomStatusDetail : public StatusDetail {
   std::string ToString() const override { return "This is a detail"; }
 };
 
-class UuidType : public ExtensionType {
- public:
-  UuidType() : ExtensionType(fixed_size_binary(16)) {}
-
-  std::string extension_name() const override { return "uuid"; }
-
-  bool ExtensionEquals(const ExtensionType& other) const override {
-    return (other.extension_name() == this->extension_name());
-  }
-
-  std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override {
-    return std::make_shared<ExtensionArray>(data);
-  }
-
-  Result<std::shared_ptr<DataType>> Deserialize(
-      std::shared_ptr<DataType> storage_type,
-      const std::string& serialized) const override {
-    return Status::NotImplemented("");
-  }
-
-  std::string Serialize() const override { return "uuid-serialized"; }
-};
-
 std::shared_ptr<Array> SliceArrayFromJSON(const std::shared_ptr<DataType>& ty,
                                           std::string_view json, int64_t offset = 0,
                                           int64_t length = -1) {
diff --git a/python/pyarrow/src/arrow/python/helpers.cc b/python/pyarrow/src/arrow/python/helpers.cc
index 18302e6fe0401..ca89ebe9d8bdd 100644
--- a/python/pyarrow/src/arrow/python/helpers.cc
+++ b/python/pyarrow/src/arrow/python/helpers.cc
@@ -22,6 +22,7 @@
 
 #include <cmath>
 #include <limits>
+#include <mutex>
 #include <sstream>
 #include <type_traits>
 
@@ -292,7 +293,15 @@ bool PyFloat_IsNaN(PyObject* obj) {
 
 namespace {
 
+// This needs a conditional, because using std::once_flag could introduce
+// a deadlock when the GIL is enabled. See
+// https://github.com/apache/arrow/commit/f69061935e92e36e25bb891177ca8bc4f463b272 for
+// more info.
+#ifdef Py_GIL_DISABLED
+static std::once_flag pandas_static_initialized;
+#else
 static bool pandas_static_initialized = false;
+#endif
 
 // Once initialized, these variables hold borrowed references to Pandas static data.
 // We should not use OwnedRef here because Python destructors would be
@@ -304,15 +313,7 @@ static PyObject* pandas_Timestamp = nullptr;
 static PyTypeObject* pandas_NaTType = nullptr;
 static PyObject* pandas_DateOffset = nullptr;
 
-}  // namespace
-
-void InitPandasStaticData() {
-  // NOTE: This is called with the GIL held.  We needn't (and shouldn't,
-  // to avoid deadlocks) use an additional C++ lock (ARROW-10519).
-  if (pandas_static_initialized) {
-    return;
-  }
-
+void GetPandasStaticSymbols() {
   OwnedRef pandas;
 
   // Import pandas
@@ -321,11 +322,14 @@ void InitPandasStaticData() {
     return;
   }
 
+#ifndef Py_GIL_DISABLED
   // Since ImportModule can release the GIL, another thread could have
   // already initialized the static data.
   if (pandas_static_initialized) {
     return;
   }
+#endif
+
   OwnedRef ref;
 
   // set NaT sentinel and its type
@@ -355,9 +359,25 @@ void InitPandasStaticData() {
   if (ImportFromModule(pandas.obj(), "DateOffset", &ref).ok()) {
     pandas_DateOffset = ref.obj();
   }
+}
+
+}  // namespace
 
+#ifdef Py_GIL_DISABLED
+void InitPandasStaticData() {
+  std::call_once(pandas_static_initialized, GetPandasStaticSymbols);
+}
+#else
+void InitPandasStaticData() {
+  // NOTE: This is called with the GIL held.  We needn't (and shouldn't,
+  // to avoid deadlocks) use an additional C++ lock (ARROW-10519).
+  if (pandas_static_initialized) {
+    return;
+  }
+  GetPandasStaticSymbols();
   pandas_static_initialized = true;
 }
+#endif
 
 bool PandasObjectIsNull(PyObject* obj) {
   if (!MayHaveNaN(obj)) {
diff --git a/python/pyarrow/src/arrow/python/inference.cc b/python/pyarrow/src/arrow/python/inference.cc
index 10116f9afad69..1aa7915ba1e19 100644
--- a/python/pyarrow/src/arrow/python/inference.cc
+++ b/python/pyarrow/src/arrow/python/inference.cc
@@ -395,11 +395,11 @@ class TypeInferrer {
       *keep_going = make_unions_;
     } else if (arrow::py::is_scalar(obj)) {
       RETURN_NOT_OK(VisitArrowScalar(obj, keep_going));
-    } else if (PyArray_CheckAnyScalarExact(obj)) {
+    } else if (has_numpy() && PyArray_CheckAnyScalarExact(obj)) {
       RETURN_NOT_OK(VisitDType(PyArray_DescrFromScalar(obj), keep_going));
     } else if (PySet_Check(obj) || (Py_TYPE(obj) == &PyDictValues_Type)) {
       RETURN_NOT_OK(VisitSet(obj, keep_going));
-    } else if (PyArray_Check(obj)) {
+    } else if (has_numpy() && PyArray_Check(obj)) {
       RETURN_NOT_OK(VisitNdarray(obj, keep_going));
     } else if (PyDict_Check(obj)) {
       RETURN_NOT_OK(VisitDict(obj));
diff --git a/python/pyarrow/src/arrow/python/iterators.h b/python/pyarrow/src/arrow/python/iterators.h
index 7b31962dac5b8..dd467f6ac4077 100644
--- a/python/pyarrow/src/arrow/python/iterators.h
+++ b/python/pyarrow/src/arrow/python/iterators.h
@@ -22,6 +22,7 @@
 #include "arrow/array/array_primitive.h"
 
 #include "arrow/python/common.h"
+#include "arrow/python/numpy_init.h"
 #include "arrow/python/numpy_internal.h"
 
 namespace arrow {
@@ -44,7 +45,7 @@ inline Status VisitSequenceGeneric(PyObject* obj, int64_t offset, VisitorFunc&&
   // VisitorFunc may set to false to terminate iteration
   bool keep_going = true;
 
-  if (PyArray_Check(obj)) {
+  if (has_numpy() && PyArray_Check(obj)) {
     PyArrayObject* arr_obj = reinterpret_cast<PyArrayObject*>(obj);
     if (PyArray_NDIM(arr_obj) != 1) {
       return Status::Invalid("Only 1D arrays accepted");
@@ -64,8 +65,13 @@ inline Status VisitSequenceGeneric(PyObject* obj, int64_t offset, VisitorFunc&&
     // This code path is inefficient: callers should implement dedicated
     // logic for non-object arrays.
   }
+
   if (PySequence_Check(obj)) {
+#ifdef Py_GIL_DISABLED
+    if (PyTuple_Check(obj)) {
+#else
     if (PyList_Check(obj) || PyTuple_Check(obj)) {
+#endif
       // Use fast item access
       const Py_ssize_t size = PySequence_Fast_GET_SIZE(obj);
       for (Py_ssize_t i = offset; keep_going && i < size; ++i) {
@@ -101,7 +107,7 @@ inline Status VisitSequence(PyObject* obj, int64_t offset, VisitorFunc&& func) {
 template <class VisitorFunc>
 inline Status VisitSequenceMasked(PyObject* obj, PyObject* mo, int64_t offset,
                                   VisitorFunc&& func) {
-  if (PyArray_Check(mo)) {
+  if (has_numpy() && PyArray_Check(mo)) {
     PyArrayObject* mask = reinterpret_cast<PyArrayObject*>(mo);
     if (PyArray_NDIM(mask) != 1) {
       return Status::Invalid("Mask must be 1D array");
diff --git a/python/pyarrow/src/arrow/python/numpy_convert.cc b/python/pyarrow/src/arrow/python/numpy_convert.cc
index 5fd2cb511ff8a..4113cc67d2fc6 100644
--- a/python/pyarrow/src/arrow/python/numpy_convert.cc
+++ b/python/pyarrow/src/arrow/python/numpy_convert.cc
@@ -488,7 +488,13 @@ Status NdarraysToSparseCSFTensor(MemoryPool* pool, PyObject* data_ao, PyObject*
   std::vector<std::shared_ptr<Tensor>> indices(ndim);
 
   for (int i = 0; i < ndim - 1; ++i) {
+#ifdef Py_GIL_DISABLED
+    PyObject* item = PySequence_ITEM(indptr_ao, i);
+    RETURN_IF_PYERROR();
+    OwnedRef item_ref(item);
+#else
     PyObject* item = PySequence_Fast_GET_ITEM(indptr_ao, i);
+#endif
     if (!PyArray_Check(item)) {
       return Status::TypeError("Did not pass ndarray object for indptr");
     }
@@ -497,7 +503,13 @@ Status NdarraysToSparseCSFTensor(MemoryPool* pool, PyObject* data_ao, PyObject*
   }
 
   for (int i = 0; i < ndim; ++i) {
+#ifdef Py_GIL_DISABLED
+    PyObject* item = PySequence_ITEM(indices_ao, i);
+    RETURN_IF_PYERROR();
+    OwnedRef item_ref(item);
+#else
     PyObject* item = PySequence_Fast_GET_ITEM(indices_ao, i);
+#endif
     if (!PyArray_Check(item)) {
       return Status::TypeError("Did not pass ndarray object for indices");
     }
diff --git a/python/pyarrow/src/arrow/python/init.cc b/python/pyarrow/src/arrow/python/numpy_init.cc
similarity index 78%
rename from python/pyarrow/src/arrow/python/init.cc
rename to python/pyarrow/src/arrow/python/numpy_init.cc
index dba293bbe2366..96e2c7b7ccb5c 100644
--- a/python/pyarrow/src/arrow/python/init.cc
+++ b/python/pyarrow/src/arrow/python/numpy_init.cc
@@ -18,7 +18,16 @@
 // Trigger the array import (inversion of NO_IMPORT_ARRAY)
 #define NUMPY_IMPORT_ARRAY
 
-#include "arrow/python/init.h"
+#include "arrow/python/numpy_init.h"
 #include "arrow/python/numpy_interop.h"
 
-int arrow_init_numpy() { return arrow::py::import_numpy(); }
+namespace arrow::py {
+bool numpy_imported = false;
+
+int arrow_init_numpy() {
+  numpy_imported = true;
+  return arrow::py::import_numpy();
+}
+
+bool has_numpy() { return numpy_imported; }
+}  // namespace arrow::py
diff --git a/python/pyarrow/src/arrow/python/init.h b/python/pyarrow/src/arrow/python/numpy_init.h
similarity index 93%
rename from python/pyarrow/src/arrow/python/init.h
rename to python/pyarrow/src/arrow/python/numpy_init.h
index 2e6c954862bd9..36c544c1b51fd 100644
--- a/python/pyarrow/src/arrow/python/init.h
+++ b/python/pyarrow/src/arrow/python/numpy_init.h
@@ -20,7 +20,8 @@
 #include "arrow/python/platform.h"
 #include "arrow/python/visibility.h"
 
-extern "C" {
+namespace arrow::py {
 ARROW_PYTHON_EXPORT
 int arrow_init_numpy();
-}
+bool has_numpy();
+}  // namespace arrow::py
diff --git a/python/pyarrow/src/arrow/python/numpy_internal.h b/python/pyarrow/src/arrow/python/numpy_internal.h
index b9b632f9f9a12..0b4d0be00e42b 100644
--- a/python/pyarrow/src/arrow/python/numpy_internal.h
+++ b/python/pyarrow/src/arrow/python/numpy_internal.h
@@ -19,6 +19,7 @@
 
 #pragma once
 
+#include "arrow/python/numpy_init.h"
 #include "arrow/python/numpy_interop.h"
 
 #include "arrow/status.h"
@@ -155,15 +156,27 @@ inline Status VisitNumpyArrayInline(PyArrayObject* arr, VISITOR* visitor) {
 namespace internal {
 
 inline bool PyFloatScalar_Check(PyObject* obj) {
-  return PyFloat_Check(obj) || PyArray_IsScalar(obj, Floating);
+  if (has_numpy()) {
+    return PyFloat_Check(obj) || PyArray_IsScalar(obj, Floating);
+  } else {
+    return PyFloat_Check(obj);
+  }
 }
 
 inline bool PyIntScalar_Check(PyObject* obj) {
-  return PyLong_Check(obj) || PyArray_IsScalar(obj, Integer);
+  if (has_numpy()) {
+    return PyLong_Check(obj) || PyArray_IsScalar(obj, Integer);
+  } else {
+    return PyLong_Check(obj);
+  }
 }
 
 inline bool PyBoolScalar_Check(PyObject* obj) {
-  return PyBool_Check(obj) || PyArray_IsScalar(obj, Bool);
+  if (has_numpy()) {
+    return PyBool_Check(obj) || PyArray_IsScalar(obj, Bool);
+  } else {
+    return PyBool_Check(obj);
+  }
 }
 
 static inline PyArray_Descr* GetSafeNumPyDtype(int type) {
diff --git a/python/pyarrow/src/arrow/python/numpy_interop.h b/python/pyarrow/src/arrow/python/numpy_interop.h
index 7ea7d6e16f528..a83ae4a62b944 100644
--- a/python/pyarrow/src/arrow/python/numpy_interop.h
+++ b/python/pyarrow/src/arrow/python/numpy_interop.h
@@ -23,19 +23,19 @@
 
 // Don't use the deprecated Numpy functions
 #ifdef NPY_1_7_API_VERSION
-#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
+#  define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
 #else
-#define NPY_ARRAY_NOTSWAPPED NPY_NOTSWAPPED
-#define NPY_ARRAY_ALIGNED NPY_ALIGNED
-#define NPY_ARRAY_WRITEABLE NPY_WRITEABLE
-#define NPY_ARRAY_UPDATEIFCOPY NPY_UPDATEIFCOPY
+#  define NPY_ARRAY_NOTSWAPPED NPY_NOTSWAPPED
+#  define NPY_ARRAY_ALIGNED NPY_ALIGNED
+#  define NPY_ARRAY_WRITEABLE NPY_WRITEABLE
+#  define NPY_ARRAY_UPDATEIFCOPY NPY_UPDATEIFCOPY
 #endif
 
 // This is required to be able to access the NumPy C API properly in C++ files
 // other than init.cc.
 #define PY_ARRAY_UNIQUE_SYMBOL arrow_ARRAY_API
 #ifndef NUMPY_IMPORT_ARRAY
-#define NO_IMPORT_ARRAY
+#  define NO_IMPORT_ARRAY
 #endif
 
 #include <numpy/arrayobject.h>   // IWYU pragma: export
@@ -56,22 +56,22 @@
 // NPY_INT needs to be handled separately.
 
 #if NPY_BITSOF_LONG == 32 && NPY_BITSOF_LONGLONG == 64
-#define NPY_INT64_IS_LONG_LONG 1
+#  define NPY_INT64_IS_LONG_LONG 1
 #else
-#define NPY_INT64_IS_LONG_LONG 0
+#  define NPY_INT64_IS_LONG_LONG 0
 #endif
 
 #if NPY_BITSOF_INT == 32 && NPY_BITSOF_LONG == 64
-#define NPY_INT32_IS_INT 1
+#  define NPY_INT32_IS_INT 1
 #else
-#define NPY_INT32_IS_INT 0
+#  define NPY_INT32_IS_INT 0
 #endif
 
 // Backported NumPy 2 API (can be removed if numpy 2 is required)
 #if NPY_ABI_VERSION < 0x02000000
-#define PyDataType_ELSIZE(descr) ((descr)->elsize)
-#define PyDataType_C_METADATA(descr) ((descr)->c_metadata)
-#define PyDataType_FIELDS(descr) ((descr)->fields)
+#  define PyDataType_ELSIZE(descr) ((descr)->elsize)
+#  define PyDataType_C_METADATA(descr) ((descr)->c_metadata)
+#  define PyDataType_FIELDS(descr) ((descr)->fields)
 #endif
 
 namespace arrow {
diff --git a/python/pyarrow/src/arrow/python/parquet_encryption.h b/python/pyarrow/src/arrow/python/parquet_encryption.h
index a1aaa30e260f5..7a107c89f0bdc 100644
--- a/python/pyarrow/src/arrow/python/parquet_encryption.h
+++ b/python/pyarrow/src/arrow/python/parquet_encryption.h
@@ -27,24 +27,24 @@
 #include "parquet/encryption/kms_client_factory.h"
 
 #if defined(_WIN32) || defined(__CYGWIN__)  // Windows
-#if defined(_MSC_VER)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
-
-#ifdef ARROW_PYTHON_STATIC
-#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT
-#elif defined(ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORTING)
-#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __declspec(dllexport)
-#else
-#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __declspec(dllimport)
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
+
+#  ifdef ARROW_PYTHON_STATIC
+#    define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT
+#  elif defined(ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORTING)
+#    define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __declspec(dllimport)
+#  endif
 
 #else  // Not Windows
-#ifndef ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT
-#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __attribute__((visibility("default")))
-#endif
+#  ifndef ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT
+#    define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __attribute__((visibility("default")))
+#  endif
 #endif  // Non-Windows
 
 namespace arrow {
diff --git a/python/pyarrow/src/arrow/python/platform.h b/python/pyarrow/src/arrow/python/platform.h
index e71c7ac85399e..48758cd1c8468 100644
--- a/python/pyarrow/src/arrow/python/platform.h
+++ b/python/pyarrow/src/arrow/python/platform.h
@@ -29,13 +29,13 @@
 
 // Work around C2528 error
 #ifdef _MSC_VER
-#if _MSC_VER >= 1900
-#undef timezone
-#endif
+#  if _MSC_VER >= 1900
+#    undef timezone
+#  endif
 
 // https://bugs.python.org/issue36020
 // TODO(wjones127): Can remove once we drop support for CPython 3.9
-#ifdef snprintf
-#undef snprintf
-#endif
+#  ifdef snprintf
+#    undef snprintf
+#  endif
 #endif
diff --git a/python/pyarrow/src/arrow/python/python_test.cc b/python/pyarrow/src/arrow/python/python_test.cc
index 746bf410911f9..eea6bf9459d1f 100644
--- a/python/pyarrow/src/arrow/python/python_test.cc
+++ b/python/pyarrow/src/arrow/python/python_test.cc
@@ -870,7 +870,7 @@ std::vector<TestCase> GetCppTestCases() {
        TestInferAllLeadingZerosExponentialNotationPositive},
       {"test_infer_all_leading_zeros_exponential_notation_negative",
        TestInferAllLeadingZerosExponentialNotationNegative},
-      {"test_object_block_write_fails", TestObjectBlockWriteFails},
+      {"test_object_block_write_fails_pandas_convert", TestObjectBlockWriteFails},
       {"test_mixed_type_fails", TestMixedTypeFails},
       {"test_from_python_decimal_rescale_not_truncateable",
        TestFromPythonDecimalRescaleNotTruncateable},
diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/arrow/python/python_to_arrow.cc
index ce9e15c894ce3..e7195e99072b0 100644
--- a/python/pyarrow/src/arrow/python/python_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc
@@ -202,7 +202,7 @@ class PyValue {
       return true;
     } else if (obj == Py_False) {
       return false;
-    } else if (PyArray_IsScalar(obj, Bool)) {
+    } else if (has_numpy() && PyArray_IsScalar(obj, Bool)) {
       return reinterpret_cast<PyBoolScalarObject*>(obj)->obval == NPY_TRUE;
     } else {
       return internal::InvalidValue(obj, "tried to convert to boolean");
@@ -385,7 +385,7 @@ class PyValue {
         default:
           return Status::UnknownError("Invalid time unit");
       }
-    } else if (PyArray_CheckAnyScalarExact(obj)) {
+    } else if (has_numpy() && PyArray_CheckAnyScalarExact(obj)) {
       // validate that the numpy scalar has np.datetime64 dtype
       ARROW_ASSIGN_OR_RAISE(auto numpy_type, NumPyScalarToArrowDataType(obj));
       if (!numpy_type->Equals(*type)) {
@@ -464,7 +464,7 @@ class PyValue {
         default:
           return Status::UnknownError("Invalid time unit");
       }
-    } else if (PyArray_CheckAnyScalarExact(obj)) {
+    } else if (has_numpy() && PyArray_CheckAnyScalarExact(obj)) {
       // validate that the numpy scalar has np.datetime64 dtype
       ARROW_ASSIGN_OR_RAISE(auto numpy_type, NumPyScalarToArrowDataType(obj));
       if (!numpy_type->Equals(*type)) {
@@ -664,7 +664,7 @@ class PyPrimitiveConverter<
       ARROW_ASSIGN_OR_RAISE(
           auto converted, PyValue::Convert(this->primitive_type_, this->options_, value));
       // Numpy NaT sentinels can be checked after the conversion
-      if (PyArray_CheckAnyScalarExact(value) &&
+      if (has_numpy() && PyArray_CheckAnyScalarExact(value) &&
           PyValue::IsNaT(this->primitive_type_, converted)) {
         this->primitive_builder_->UnsafeAppendNull();
       } else {
@@ -804,8 +804,7 @@ class PyListConverter : public ListConverter<T, PyConverter, PyConverterTrait> {
     if (PyValue::IsNull(this->options_, value)) {
       return this->list_builder_->AppendNull();
     }
-
-    if (PyArray_Check(value)) {
+    if (has_numpy() && PyArray_Check(value)) {
       RETURN_NOT_OK(AppendNdarray(value));
     } else if (PySequence_Check(value)) {
       RETURN_NOT_OK(AppendSequence(value));
diff --git a/python/pyarrow/src/arrow/python/serialize.h b/python/pyarrow/src/arrow/python/serialize.h
index fd207d3e06903..af6d2d81a61c4 100644
--- a/python/pyarrow/src/arrow/python/serialize.h
+++ b/python/pyarrow/src/arrow/python/serialize.h
@@ -24,6 +24,7 @@
 #include "arrow/python/visibility.h"
 #include "arrow/sparse_tensor.h"
 #include "arrow/status.h"
+#include "arrow/util/macros.h"
 
 // Forward declaring PyObject, see
 // https://mail.python.org/pipermail/python-dev/2003-August/037601.html
@@ -92,6 +93,7 @@ struct ARROW_PYTHON_EXPORT SerializedPyObject {
 /// \return Status
 ///
 /// Release GIL before calling
+ARROW_DEPRECATED("Deprecated in 18.0.0. Will be removed in 20.0.0")
 ARROW_PYTHON_EXPORT
 Status SerializeObject(PyObject* context, PyObject* sequence, SerializedPyObject* out);
 
@@ -99,6 +101,7 @@ Status SerializeObject(PyObject* context, PyObject* sequence, SerializedPyObject
 /// \param[in] tensor Tensor to be serialized
 /// \param[out] out The serialized representation
 /// \return Status
+ARROW_DEPRECATED("Deprecated in 18.0.0. Will be removed in 20.0.0")
 ARROW_PYTHON_EXPORT
 Status SerializeTensor(std::shared_ptr<Tensor> tensor, py::SerializedPyObject* out);
 
@@ -108,6 +111,7 @@ Status SerializeTensor(std::shared_ptr<Tensor> tensor, py::SerializedPyObject* o
 /// \param[in] tensor_num_bytes The length of the Tensor data in bytes
 /// \param[in] dst The OutputStream to write the Tensor header to
 /// \return Status
+ARROW_DEPRECATED("Deprecated in 18.0.0. Will be removed in 20.0.0")
 ARROW_PYTHON_EXPORT
 Status WriteNdarrayHeader(std::shared_ptr<DataType> dtype,
                           const std::vector<int64_t>& shape, int64_t tensor_num_bytes,
diff --git a/python/pyarrow/src/arrow/python/visibility.h b/python/pyarrow/src/arrow/python/visibility.h
index dd43b32fd43ff..4bf9680a06bf0 100644
--- a/python/pyarrow/src/arrow/python/visibility.h
+++ b/python/pyarrow/src/arrow/python/visibility.h
@@ -18,22 +18,22 @@
 #pragma once
 
 #if defined(_WIN32) || defined(__CYGWIN__)  // Windows
-#if defined(_MSC_VER)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#ifdef ARROW_PYTHON_STATIC
-#define ARROW_PYTHON_EXPORT
-#elif defined(ARROW_PYTHON_EXPORTING)
-#define ARROW_PYTHON_EXPORT __declspec(dllexport)
-#else
-#define ARROW_PYTHON_EXPORT __declspec(dllimport)
-#endif
+#  ifdef ARROW_PYTHON_STATIC
+#    define ARROW_PYTHON_EXPORT
+#  elif defined(ARROW_PYTHON_EXPORTING)
+#    define ARROW_PYTHON_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_PYTHON_EXPORT __declspec(dllimport)
+#  endif
 
 #else  // Not Windows
-#ifndef ARROW_PYTHON_EXPORT
-#define ARROW_PYTHON_EXPORT __attribute__((visibility("default")))
-#endif
+#  ifndef ARROW_PYTHON_EXPORT
+#    define ARROW_PYTHON_EXPORT __attribute__((visibility("default")))
+#  endif
 #endif  // Non-Windows
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 6d34c71c9df40..819bbc34c66b9 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -59,6 +59,7 @@ cdef class ChunkedArray(_PandasConvertible):
 
     def __cinit__(self):
         self.chunked_array = NULL
+        self._init_is_cpu = False
 
     def __init__(self):
         raise TypeError("Do not call ChunkedArray's constructor directly, use "
@@ -69,6 +70,7 @@ cdef class ChunkedArray(_PandasConvertible):
         self.chunked_array = chunked_array.get()
 
     def __reduce__(self):
+        self._assert_cpu()
         return chunked_array, (self.chunks, self.type)
 
     @property
@@ -198,6 +200,7 @@ cdef class ChunkedArray(_PandasConvertible):
         ArrowInvalid
         """
         if full:
+            self._assert_cpu()
             with nogil:
                 check_status(self.sp_chunked_array.get().ValidateFull())
         else:
@@ -220,6 +223,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.null_count
         1
         """
+        self._assert_cpu()
         return self.chunked_array.null_count()
 
     @property
@@ -245,6 +249,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.nbytes
         49
         """
+        self._assert_cpu()
         cdef:
             CResult[int64_t] c_res_buffer
 
@@ -271,6 +276,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.get_total_buffer_size()
         49
         """
+        self._assert_cpu()
         cdef:
             int64_t total_buffer_size
 
@@ -299,13 +305,14 @@ cdef class ChunkedArray(_PandasConvertible):
         -------
         value : Scalar (index) or ChunkedArray (slice)
         """
-
+        self._assert_cpu()
         if isinstance(key, slice):
             return _normalize_slice(self, key)
 
         return self.getitem(_normalize_index(key, self.chunked_array.length()))
 
     cdef getitem(self, int64_t i):
+        self._assert_cpu()
         return Scalar.wrap(GetResultValue(self.chunked_array.GetScalar(i)))
 
     def is_null(self, *, nan_is_null=False):
@@ -338,6 +345,7 @@ cdef class ChunkedArray(_PandasConvertible):
           ]
         ]
         """
+        self._assert_cpu()
         options = _pc().NullOptions(nan_is_null=nan_is_null)
         return _pc().call_function('is_null', [self], options)
 
@@ -363,6 +371,7 @@ cdef class ChunkedArray(_PandasConvertible):
           ]
         ]
         """
+        self._assert_cpu()
         return _pc().is_nan(self)
 
     def is_valid(self):
@@ -388,6 +397,7 @@ cdef class ChunkedArray(_PandasConvertible):
           ]
         ]
         """
+        self._assert_cpu()
         return _pc().is_valid(self)
 
     def __eq__(self, other):
@@ -430,6 +440,7 @@ cdef class ChunkedArray(_PandasConvertible):
           ]
         ]
         """
+        self._assert_cpu()
         return _pc().fill_null(self, fill_value)
 
     def equals(self, ChunkedArray other):
@@ -458,6 +469,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.equals(animals)
         False
         """
+        self._assert_cpu()
         if other is None:
             return False
 
@@ -472,6 +484,7 @@ cdef class ChunkedArray(_PandasConvertible):
         return result
 
     def _to_pandas(self, options, types_mapper=None, **kwargs):
+        self._assert_cpu()
         return _array_like_to_pandas(self, options, types_mapper=types_mapper)
 
     def to_numpy(self, zero_copy_only=False):
@@ -495,6 +508,10 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.to_numpy()
         array([  2,   2,   4,   4,   5, 100])
         """
+        self._assert_cpu()
+        if np is None:
+            raise ImportError(
+                "Cannot return a numpy.ndarray if NumPy is not present")
         if zero_copy_only:
             raise ValueError(
                 "zero_copy_only must be False for pyarrow.ChunkedArray.to_numpy"
@@ -526,6 +543,7 @@ cdef class ChunkedArray(_PandasConvertible):
         return values
 
     def __array__(self, dtype=None, copy=None):
+        self._assert_cpu()
         if copy is False:
             raise ValueError(
                 "Unable to avoid a copy while creating a numpy array as requested "
@@ -571,6 +589,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs_seconds.type
         DurationType(duration[s])
         """
+        self._assert_cpu()
         return _pc().cast(self, target_type, safe=safe, options=options)
 
     def dictionary_encode(self, null_encoding='mask'):
@@ -633,6 +652,7 @@ cdef class ChunkedArray(_PandasConvertible):
             ]
         ]
         """
+        self._assert_cpu()
         options = _pc().DictionaryEncodeOptions(null_encoding)
         return _pc().call_function('dictionary_encode', [self], options)
 
@@ -697,6 +717,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.type
         DataType(int64)
         """
+        self._assert_cpu()
         cdef:
             vector[shared_ptr[CChunkedArray]] flattened
             CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
@@ -748,6 +769,7 @@ cdef class ChunkedArray(_PandasConvertible):
           100
         ]
         """
+        self._assert_cpu()
         if self.num_chunks == 0:
             return array([], type=self.type)
         else:
@@ -788,6 +810,7 @@ cdef class ChunkedArray(_PandasConvertible):
           100
         ]
         """
+        self._assert_cpu()
         return _pc().call_function('unique', [self])
 
     def value_counts(self):
@@ -834,6 +857,7 @@ cdef class ChunkedArray(_PandasConvertible):
             1
           ]
         """
+        self._assert_cpu()
         return _pc().call_function('value_counts', [self])
 
     def slice(self, offset=0, length=None):
@@ -956,6 +980,7 @@ cdef class ChunkedArray(_PandasConvertible):
           ]
         ]
         """
+        self._assert_cpu()
         return _pc().filter(self, mask, null_selection_behavior)
 
     def index(self, value, start=None, end=None, *, memory_pool=None):
@@ -1003,6 +1028,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.index(4, start=3)
         <pyarrow.Int64Scalar: 3>
         """
+        self._assert_cpu()
         return _pc().index(self, value, start, end, memory_pool=memory_pool)
 
     def take(self, object indices):
@@ -1049,6 +1075,7 @@ cdef class ChunkedArray(_PandasConvertible):
           ]
         ]
         """
+        self._assert_cpu()
         return _pc().take(self, indices)
 
     def drop_null(self):
@@ -1088,6 +1115,7 @@ cdef class ChunkedArray(_PandasConvertible):
           ]
         ]
         """
+        self._assert_cpu()
         return _pc().drop_null(self)
 
     def sort(self, order="ascending", **kwargs):
@@ -1107,6 +1135,7 @@ cdef class ChunkedArray(_PandasConvertible):
         -------
         result : ChunkedArray
         """
+        self._assert_cpu()
         indices = _pc().sort_indices(
             self,
             options=_pc().SortOptions(sort_keys=[("", order)], **kwargs)
@@ -1206,6 +1235,7 @@ cdef class ChunkedArray(_PandasConvertible):
             ]
         ]
         """
+        self._assert_cpu()
         cdef:
             CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
             shared_ptr[CChunkedArray] c_result
@@ -1330,6 +1360,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.to_pylist()
         [2, 2, 4, 4, None, 100]
         """
+        self._assert_cpu()
         result = []
         for i in range(self.num_chunks):
             result += self.chunk(i).to_pylist()
@@ -1351,6 +1382,7 @@ cdef class ChunkedArray(_PandasConvertible):
         PyCapsule
             A capsule containing a C ArrowArrayStream struct.
         """
+        self._assert_cpu()
         cdef:
             ChunkedArray chunked
             ArrowArrayStream* c_stream = NULL
@@ -1407,6 +1439,20 @@ cdef class ChunkedArray(_PandasConvertible):
         self.init(c_chunked_array)
         return self
 
+    @property
+    def is_cpu(self):
+        """
+        Whether all chunks in the ChunkedArray are CPU-accessible.
+        """
+        if not self._init_is_cpu:
+            self._is_cpu = self.chunked_array.is_cpu()
+            self._init_is_cpu = True
+        return self._is_cpu
+
+    def _assert_cpu(self):
+        if not self.is_cpu:
+            raise NotImplementedError("Implemented only for data on CPU device")
+
 
 def chunked_array(arrays, type=None):
     """
@@ -1571,6 +1617,7 @@ cdef class _Tabular(_PandasConvertible):
                         f"one of the `{self.__class__.__name__}.from_*` functions instead.")
 
     def __array__(self, dtype=None, copy=None):
+        self._assert_cpu()
         if copy is False:
             raise ValueError(
                 "Unable to avoid a copy while creating a numpy array as requested "
@@ -1824,6 +1871,7 @@ cdef class _Tabular(_PandasConvertible):
         n_legs: [[4,100]]
         animals: [["Horse","Centipede"]]
         """
+        self._assert_cpu()
         return _pc().drop_null(self)
 
     def field(self, i):
@@ -2085,6 +2133,7 @@ cdef class _Tabular(_PandasConvertible):
         n_legs: [[5,100,4,2,4,2]]
         animal: [["Brittle stars","Centipede","Dog","Flamingo","Horse","Parrot"]]
         """
+        self._assert_cpu()
         if isinstance(sorting, str):
             sorting = [(sorting, "ascending")]
 
@@ -2130,6 +2179,7 @@ cdef class _Tabular(_PandasConvertible):
         n_legs: [[4,100]]
         animals: [["Horse","Centipede"]]
         """
+        self._assert_cpu()
         return _pc().take(self, indices)
 
     def filter(self, mask, object null_selection_behavior="drop"):
@@ -2199,6 +2249,7 @@ cdef class _Tabular(_PandasConvertible):
         n_legs: [[2,4,null]]
         animals: [["Flamingo","Horse",null]]
         """
+        self._assert_cpu()
         if isinstance(mask, _pc().Expression):
             return _pac()._filter_table(self, mask)
         else:
@@ -2399,6 +2450,9 @@ cdef class _Tabular(_PandasConvertible):
         """
         return self.add_column(self.num_columns, field_, column)
 
+    cdef void _assert_cpu(self) except *:
+        return
+
 
 cdef class RecordBatch(_Tabular):
     """
@@ -2509,6 +2563,7 @@ cdef class RecordBatch(_Tabular):
         return self.batch != NULL
 
     def __reduce__(self):
+        self._assert_cpu()
         return _reconstruct_record_batch, (self.columns, self.schema)
 
     def validate(self, *, full=False):
@@ -2528,6 +2583,7 @@ cdef class RecordBatch(_Tabular):
         ArrowInvalid
         """
         if full:
+            self._assert_cpu()
             with nogil:
                 check_status(self.batch.ValidateFull())
         else:
@@ -2694,6 +2750,7 @@ cdef class RecordBatch(_Tabular):
         >>> batch.nbytes
         116
         """
+        self._assert_cpu()
         cdef:
             CResult[int64_t] c_res_buffer
 
@@ -2723,6 +2780,7 @@ cdef class RecordBatch(_Tabular):
         >>> batch.get_total_buffer_size()
         120
         """
+        self._assert_cpu()
         cdef:
             int64_t total_buffer_size
 
@@ -2789,12 +2847,19 @@ cdef class RecordBatch(_Tabular):
             shared_ptr[CRecordBatch] c_batch
             Field c_field
             Array c_arr
+            CDeviceAllocationType device_type = self.sp_batch.get().device_type()
 
         if isinstance(column, Array):
             c_arr = column
         else:
             c_arr = array(column)
 
+        if device_type != c_arr.sp_array.get().device_type():
+            raise TypeError("The column must be allocated on the same "
+                            "device as the RecordBatch. Got column on "
+                            f"device {c_arr.device_type!r}, but expected "
+                            f"{self.device_type!r}.")
+
         if isinstance(field_, Field):
             c_field = field_
         else:
@@ -2882,12 +2947,19 @@ cdef class RecordBatch(_Tabular):
             shared_ptr[CRecordBatch] c_batch
             Field c_field
             Array c_arr
+            CDeviceAllocationType device_type = self.sp_batch.get().device_type()
 
         if isinstance(column, Array):
             c_arr = column
         else:
             c_arr = array(column)
 
+        if device_type != c_arr.sp_array.get().device_type():
+            raise TypeError("The column must be allocated on the same "
+                            "device as the RecordBatch. Got column on "
+                            f"device {c_arr.device_type!r}, but expected "
+                            f"{self.device_type!r}.")
+
         if isinstance(field_, Field):
             c_field = field_
         else:
@@ -3013,6 +3085,7 @@ cdef class RecordBatch(_Tabular):
         n_legs: [2,2,4,4,5,100]
         animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
         """
+        self._assert_cpu()
         cdef shared_ptr[CBuffer] buffer
         cdef CIpcWriteOptions options = CIpcWriteOptions.Defaults()
         options.memory_pool = maybe_unbox_memory_pool(memory_pool)
@@ -3114,6 +3187,7 @@ cdef class RecordBatch(_Tabular):
         >>> batch.equals(batch_1, check_metadata=True)
         False
         """
+        self._assert_cpu()
         cdef:
             CRecordBatch* this_batch = self.batch
             shared_ptr[CRecordBatch] other_batch = pyarrow_unwrap_batch(other)
@@ -3245,6 +3319,7 @@ cdef class RecordBatch(_Tabular):
         return RecordBatch.from_arrays(newcols, schema=target_schema)
 
     def _to_pandas(self, options, **kwargs):
+        self._assert_cpu()
         return Table.from_batches([self])._to_pandas(options, **kwargs)
 
     @classmethod
@@ -3470,6 +3545,8 @@ cdef class RecordBatch(_Tabular):
         """
         cdef:
             shared_ptr[CRecordBatch] c_record_batch
+        if struct_array.sp_array.get().device_type() != CDeviceAllocationType_kCPU:
+            raise NotImplementedError("Implemented only for data on CPU device")
         with nogil:
             c_record_batch = GetResultValue(
                 CRecordBatch.FromStructArray(struct_array.sp_array))
@@ -3479,6 +3556,7 @@ cdef class RecordBatch(_Tabular):
         """
         Convert to a struct array.
         """
+        self._assert_cpu()
         cdef:
             shared_ptr[CRecordBatch] c_record_batch
             shared_ptr[CArray] c_array
@@ -3557,6 +3635,7 @@ cdef class RecordBatch(_Tabular):
                [ 4., 40.],
                [nan, nan]])
         """
+        self._assert_cpu()
         cdef:
             shared_ptr[CRecordBatch] c_record_batch
             shared_ptr[CTensor] c_tensor
@@ -3683,6 +3762,7 @@ cdef class RecordBatch(_Tabular):
             A pair of PyCapsules containing a C ArrowSchema and ArrowArray,
             respectively.
         """
+        self._assert_cpu()
         cdef:
             ArrowArray* c_array
             ArrowSchema* c_schema
@@ -3728,6 +3808,7 @@ cdef class RecordBatch(_Tabular):
         -------
         PyCapsule
         """
+        self._assert_cpu()
         return Table.from_batches([self]).__arrow_c_stream__(requested_schema)
 
     @staticmethod
@@ -3940,6 +4021,10 @@ cdef class RecordBatch(_Tabular):
         """
         return self.device_type == DeviceAllocationType.CPU
 
+    cdef void _assert_cpu(self) except *:
+        if self.sp_batch.get().device_type() != CDeviceAllocationType_kCPU:
+            raise NotImplementedError("Implemented only for data on CPU device")
+
 
 def _reconstruct_record_batch(columns, schema):
     """
@@ -4095,6 +4180,7 @@ cdef class Table(_Tabular):
 
     def __cinit__(self):
         self.table = NULL
+        self._init_is_cpu = False
 
     cdef void init(self, const shared_ptr[CTable]& table):
         self.sp_table = table
@@ -4120,6 +4206,7 @@ cdef class Table(_Tabular):
         ArrowInvalid
         """
         if full:
+            self._assert_cpu()
             with nogil:
                 check_status(self.table.ValidateFull())
         else:
@@ -4129,6 +4216,7 @@ cdef class Table(_Tabular):
     def __reduce__(self):
         # Reduce the columns as ChunkedArrays to avoid serializing schema
         # data twice
+        self._assert_cpu()
         columns = [col for col in self.columns]
         return _reconstruct_table, (columns, self.schema)
 
@@ -4367,6 +4455,7 @@ cdef class Table(_Tabular):
         a.year: [[null,2022]]
         month: [[4,6]]
         """
+        self._assert_cpu()
         cdef:
             shared_ptr[CTable] flattened
             CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
@@ -4414,6 +4503,7 @@ cdef class Table(_Tabular):
         n_legs: [[2,2,4,4,5,100]]
         animals: [["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]]
         """
+        self._assert_cpu()
         cdef:
             shared_ptr[CTable] combined
             CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
@@ -4471,6 +4561,7 @@ cdef class Table(_Tabular):
         ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]  -- indices:
         [3,4,5]]
         """
+        self._assert_cpu()
         cdef:
             CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
             shared_ptr[CTable] c_result
@@ -4516,6 +4607,7 @@ cdef class Table(_Tabular):
         >>> table.equals(table_1, check_metadata=True)
         False
         """
+        self._assert_cpu()
         if other is None:
             return False
 
@@ -4573,6 +4665,7 @@ cdef class Table(_Tabular):
         n_legs: [[2,4,5,100]]
         animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
         """
+        self._assert_cpu()
         cdef:
             ChunkedArray column, casted
             Field field
@@ -4824,6 +4917,7 @@ cdef class Table(_Tabular):
         -------
         ChunkedArray
         """
+        self._assert_cpu()
         return chunked_array([
             batch.to_struct_array()
             for batch in self.to_batches(max_chunksize=max_chunksize)
@@ -5033,6 +5127,7 @@ cdef class Table(_Tabular):
 
     def _to_pandas(self, options, categories=None, ignore_metadata=False,
                    types_mapper=None):
+        self._assert_cpu()
         from pyarrow.pandas_compat import table_to_dataframe
         df = table_to_dataframe(
             options, self, categories,
@@ -5154,6 +5249,7 @@ cdef class Table(_Tabular):
         >>> table.nbytes
         72
         """
+        self._assert_cpu()
         cdef:
             CResult[int64_t] c_res_buffer
 
@@ -5183,6 +5279,7 @@ cdef class Table(_Tabular):
         >>> table.get_total_buffer_size()
         76
         """
+        self._assert_cpu()
         cdef:
             int64_t total_buffer_size
 
@@ -5491,6 +5588,7 @@ cdef class Table(_Tabular):
         year: [[2020,2022,2021,2019]]
         n_legs_sum: [[2,6,104,5]]
         """
+        self._assert_cpu()
         return TableGroupBy(self, keys, use_threads=use_threads)
 
     def join(self, right_table, keys, right_keys=None, join_type="left outer",
@@ -5600,6 +5698,7 @@ cdef class Table(_Tabular):
         n_legs: [[100]]
         animal: [["Centipede"]]
         """
+        self._assert_cpu()
         if right_keys is None:
             right_keys = keys
         return _pac()._perform_join(
@@ -5687,6 +5786,7 @@ cdef class Table(_Tabular):
         n_legs: [[null,5,null,5,null]]
         animal: [[null,"Brittle stars",null,"Brittle stars",null]]
         """
+        self._assert_cpu()
         if right_on is None:
             right_on = on
         if right_by is None:
@@ -5712,8 +5812,23 @@ cdef class Table(_Tabular):
         -------
         PyCapsule
         """
+        self._assert_cpu()
         return self.to_reader().__arrow_c_stream__(requested_schema)
 
+    @property
+    def is_cpu(self):
+        """
+        Whether all ChunkedArrays are CPU-accessible.
+        """
+        if not self._init_is_cpu:
+            self._is_cpu = all(c.is_cpu for c in self.itercolumns())
+            self._init_is_cpu = True
+        return self._is_cpu
+
+    cdef void _assert_cpu(self) except *:
+        if not self.is_cpu:
+            raise NotImplementedError("Implemented only for data on CPU device")
+
 
 def _reconstruct_table(arrays, schema):
     """
diff --git a/python/pyarrow/tensor.pxi b/python/pyarrow/tensor.pxi
index 6fb4fc99d7cbc..3e0c63c18fc98 100644
--- a/python/pyarrow/tensor.pxi
+++ b/python/pyarrow/tensor.pxi
@@ -107,6 +107,9 @@ strides: {0.strides}""".format(self)
         array([[  2,   2,   4],
                [  4,   5, 100]], dtype=int32)
         """
+        if np is None:
+            raise ImportError(
+                "Cannot return a numpy.ndarray if NumPy is not present")
         cdef PyObject* out
 
         check_status(TensorToNdarray(self.sp_tensor, self, &out))
@@ -478,6 +481,9 @@ shape: {0.shape}""".format(self)
         """
         Convert arrow::SparseCOOTensor to numpy.ndarrays with zero copy.
         """
+        if np is None:
+            raise ImportError(
+                "Cannot return a numpy.ndarray if NumPy is not present")
         cdef PyObject* out_data
         cdef PyObject* out_coords
 
@@ -743,6 +749,9 @@ shape: {0.shape}""".format(self)
         """
         Convert arrow::SparseCSRMatrix to numpy.ndarrays with zero copy.
         """
+        if np is None:
+            raise ImportError(
+                "Cannot return a numpy.ndarray if NumPy is not present")
         cdef PyObject* out_data
         cdef PyObject* out_indptr
         cdef PyObject* out_indices
@@ -981,6 +990,9 @@ shape: {0.shape}""".format(self)
         """
         Convert arrow::SparseCSCMatrix to numpy.ndarrays with zero copy
         """
+        if np is None:
+            raise ImportError(
+                "Cannot return a numpy.ndarray if NumPy is not present")
         cdef PyObject* out_data
         cdef PyObject* out_indptr
         cdef PyObject* out_indices
@@ -1216,6 +1228,9 @@ shape: {0.shape}""".format(self)
         """
         Convert arrow::SparseCSFTensor to numpy.ndarrays with zero copy
         """
+        if np is None:
+            raise ImportError(
+                "Cannot return a numpy.ndarray if NumPy is not present")
         cdef PyObject* out_data
         cdef PyObject* out_indptr
         cdef PyObject* out_indices
diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py
index e1919497b5116..0b82696d0a73f 100644
--- a/python/pyarrow/tests/conftest.py
+++ b/python/pyarrow/tests/conftest.py
@@ -25,6 +25,7 @@
 
 import pytest
 import hypothesis as h
+
 from ..conftest import groups, defaults
 
 from pyarrow import set_timezone_db_path
@@ -233,17 +234,16 @@ def minio_server_health_check(address):
 def gcs_server():
     port = find_free_port()
     env = os.environ.copy()
-    args = [sys.executable, '-m', 'testbench', '--port', str(port)]
+    exe = 'storage-testbench'
+    args = [exe, '--port', str(port)]
     proc = None
     try:
-        # check first if testbench module is available
-        import testbench  # noqa:F401
         # start server
         proc = subprocess.Popen(args, env=env)
         # Make sure the server is alive.
         if proc.poll() is not None:
             pytest.skip(f"Command {args} did not start server successfully!")
-    except (ModuleNotFoundError, OSError) as e:
+    except OSError as e:
         pytest.skip(f"Command {args} failed to execute: {e}")
     else:
         yield {
diff --git a/python/pyarrow/tests/extensions.pyx b/python/pyarrow/tests/extensions.pyx
index c1bf9aae1ec03..309b574dc0264 100644
--- a/python/pyarrow/tests/extensions.pyx
+++ b/python/pyarrow/tests/extensions.pyx
@@ -37,7 +37,7 @@ cdef extern from * namespace "arrow::py" nogil:
     class UuidType : public ExtensionType {
     public:
         UuidType() : ExtensionType(fixed_size_binary(16)) {}
-        std::string extension_name() const override { return "uuid"; }
+        std::string extension_name() const override { return "example-uuid"; }
 
         bool ExtensionEquals(const ExtensionType& other) const override {
             return other.extension_name() == this->extension_name();
diff --git a/python/pyarrow/tests/interchange/test_conversion.py b/python/pyarrow/tests/interchange/test_conversion.py
index 6d91bad57cef4..50da6693afff1 100644
--- a/python/pyarrow/tests/interchange/test_conversion.py
+++ b/python/pyarrow/tests/interchange/test_conversion.py
@@ -16,11 +16,15 @@
 # under the License.
 
 from datetime import datetime as dt
-import numpy as np
 import pyarrow as pa
 from pyarrow.vendored.version import Version
 import pytest
 
+try:
+    import numpy as np
+except ImportError:
+    np = None
+
 import pyarrow.interchange as pi
 from pyarrow.interchange.column import (
     _PyArrowColumn,
@@ -107,13 +111,13 @@ def test_offset_of_sliced_array():
     "int", [pa.int8(), pa.int16(), pa.int32(), pa.int64()]
 )
 @pytest.mark.parametrize(
-    "float, np_float", [
+    "float, np_float_str", [
         # (pa.float16(), np.float16),   #not supported by pandas
-        (pa.float32(), np.float32),
-        (pa.float64(), np.float64)
+        (pa.float32(), "float32"),
+        (pa.float64(), "float64")
     ]
 )
-def test_pandas_roundtrip(uint, int, float, np_float):
+def test_pandas_roundtrip(uint, int, float, np_float_str):
     if Version(pd.__version__) < Version("1.5.0"):
         pytest.skip("__dataframe__ added to pandas in 1.5.0")
 
@@ -122,7 +126,7 @@ def test_pandas_roundtrip(uint, int, float, np_float):
         {
             "a": pa.array(arr, type=uint),
             "b": pa.array(arr, type=int),
-            "c": pa.array(np.array(arr, dtype=np_float), type=float),
+            "c": pa.array(np.array(arr, dtype=np.dtype(np_float_str)), type=float),
             "d": [True, False, True],
         }
     )
@@ -326,13 +330,13 @@ def test_pandas_roundtrip_datetime(unit):
 
 @pytest.mark.pandas
 @pytest.mark.parametrize(
-    "np_float", [np.float32, np.float64]
+    "np_float_str", ["float32", "float64"]
 )
-def test_pandas_to_pyarrow_with_missing(np_float):
+def test_pandas_to_pyarrow_with_missing(np_float_str):
     if Version(pd.__version__) < Version("1.5.0"):
         pytest.skip("__dataframe__ added to pandas in 1.5.0")
 
-    np_array = np.array([0, np.nan, 2], dtype=np_float)
+    np_array = np.array([0, np.nan, 2], dtype=np.dtype(np_float_str))
     datetime_array = [None, dt(2007, 7, 14), dt(2007, 7, 15)]
     df = pd.DataFrame({
         # float, ColumnNullType.USE_NAN
@@ -364,6 +368,7 @@ def test_pandas_to_pyarrow_float16_with_missing():
         pi.from_dataframe(df)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(
     "uint", [pa.uint8(), pa.uint16(), pa.uint32()]
 )
@@ -371,16 +376,16 @@ def test_pandas_to_pyarrow_float16_with_missing():
     "int", [pa.int8(), pa.int16(), pa.int32(), pa.int64()]
 )
 @pytest.mark.parametrize(
-    "float, np_float", [
-        (pa.float16(), np.float16),
-        (pa.float32(), np.float32),
-        (pa.float64(), np.float64)
+    "float, np_float_str", [
+        (pa.float16(), "float16"),
+        (pa.float32(), "float32"),
+        (pa.float64(), "float64")
     ]
 )
 @pytest.mark.parametrize("unit", ['s', 'ms', 'us', 'ns'])
 @pytest.mark.parametrize("tz", ['America/New_York', '+07:30', '-04:30'])
 @pytest.mark.parametrize("offset, length", [(0, 3), (0, 2), (1, 2), (2, 1)])
-def test_pyarrow_roundtrip(uint, int, float, np_float,
+def test_pyarrow_roundtrip(uint, int, float, np_float_str,
                            unit, tz, offset, length):
 
     from datetime import datetime as dt
@@ -391,7 +396,7 @@ def test_pyarrow_roundtrip(uint, int, float, np_float,
         {
             "a": pa.array(arr, type=uint),
             "b": pa.array(arr, type=int),
-            "c": pa.array(np.array(arr, dtype=np_float),
+            "c": pa.array(np.array(arr, dtype=np.dtype(np_float_str)),
                           type=float, from_pandas=True),
             "d": [True, False, True],
             "e": [True, False, None],
diff --git a/python/pyarrow/tests/interchange/test_interchange_spec.py b/python/pyarrow/tests/interchange/test_interchange_spec.py
index 826089652bca6..d060f7842c2fe 100644
--- a/python/pyarrow/tests/interchange/test_interchange_spec.py
+++ b/python/pyarrow/tests/interchange/test_interchange_spec.py
@@ -19,10 +19,13 @@
 import hypothesis as h
 import hypothesis.strategies as st
 
-import numpy as np
+import pytest
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pyarrow as pa
 import pyarrow.tests.strategies as past
-import pytest
 
 
 all_types = st.deferred(
@@ -39,6 +42,7 @@
 
 # datetime is tested in test_extra.py
 # dictionary is tested in test_categorical()
+@pytest.mark.numpy
 @h.given(past.arrays(all_types, size=3))
 def test_dtypes(arr):
     table = pa.table([arr], names=["a"])
@@ -51,6 +55,7 @@ def test_dtypes(arr):
     assert df.get_column(0).offset == 0
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(
     "uint, uint_bw",
     [
@@ -68,17 +73,17 @@ def test_dtypes(arr):
     ]
 )
 @pytest.mark.parametrize(
-    "float, float_bw, np_float", [
-        (pa.float16(), 16, np.float16),
-        (pa.float32(), 32, np.float32),
-        (pa.float64(), 64, np.float64)
+    "float, float_bw, np_float_str", [
+        (pa.float16(), 16, "float16"),
+        (pa.float32(), 32, "float32"),
+        (pa.float64(), 64, "float64")
     ]
 )
 @pytest.mark.parametrize("unit", ['s', 'ms', 'us', 'ns'])
 @pytest.mark.parametrize("tz", ['', 'America/New_York', '+07:30', '-04:30'])
 @pytest.mark.parametrize("use_batch", [False, True])
 def test_mixed_dtypes(uint, uint_bw, int, int_bw,
-                      float, float_bw, np_float, unit, tz,
+                      float, float_bw, np_float_str, unit, tz,
                       use_batch):
     from datetime import datetime as dt
     arr = [1, 2, 3]
@@ -87,7 +92,7 @@ def test_mixed_dtypes(uint, uint_bw, int, int_bw,
         {
             "a": pa.array(arr, type=uint),
             "b": pa.array(arr, type=int),
-            "c": pa.array(np.array(arr, dtype=np_float), type=float),
+            "c": pa.array(np.array(arr, dtype=np.dtype(np_float_str)), type=float),
             "d": [True, False, True],
             "e": ["a", "", "c"],
             "f": pa.array(dt_arr, type=pa.timestamp(unit, tz=tz))
@@ -200,16 +205,16 @@ def test_column_get_chunks(use_batch, size, n_chunks):
     "int", [pa.int8(), pa.int16(), pa.int32(), pa.int64()]
 )
 @pytest.mark.parametrize(
-    "float, np_float", [
-        (pa.float16(), np.float16),
-        (pa.float32(), np.float32),
-        (pa.float64(), np.float64)
+    "float, np_float_str", [
+        (pa.float16(), "float16"),
+        (pa.float32(), "float32"),
+        (pa.float64(), "float64")
     ]
 )
 @pytest.mark.parametrize("use_batch", [False, True])
-def test_get_columns(uint, int, float, np_float, use_batch):
+def test_get_columns(uint, int, float, np_float_str, use_batch):
     arr = [[1, 2, 3], [4, 5]]
-    arr_float = np.array([1, 2, 3, 4, 5], dtype=np_float)
+    arr_float = np.array([1, 2, 3, 4, 5], dtype=np.dtype(np_float_str))
     table = pa.table(
         {
             "a": pa.chunked_array(arr, type=uint),
diff --git a/python/pyarrow/tests/parquet/common.py b/python/pyarrow/tests/parquet/common.py
index b4a57ba0b1556..fd6ad94fbd6d3 100644
--- a/python/pyarrow/tests/parquet/common.py
+++ b/python/pyarrow/tests/parquet/common.py
@@ -17,7 +17,10 @@
 
 import io
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 from pyarrow.tests import util
diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py
index 194af7415e863..6496aa99092b8 100644
--- a/python/pyarrow/tests/parquet/test_basic.py
+++ b/python/pyarrow/tests/parquet/test_basic.py
@@ -22,7 +22,6 @@
 from shutil import copytree
 from decimal import Decimal
 
-import numpy as np
 import pytest
 
 import pyarrow as pa
@@ -47,6 +46,10 @@
 except ImportError:
     pd = tm = None
 
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 # Marks all of the tests in this module
 # Ignore these with pytest ... -m 'not parquet'
diff --git a/python/pyarrow/tests/parquet/test_data_types.py b/python/pyarrow/tests/parquet/test_data_types.py
index e6b66b00428fb..79dd96948261c 100644
--- a/python/pyarrow/tests/parquet/test_data_types.py
+++ b/python/pyarrow/tests/parquet/test_data_types.py
@@ -17,8 +17,12 @@
 
 import decimal
 import io
+import random
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa
@@ -173,6 +177,7 @@ def test_direct_read_dictionary_subfield():
     assert result[0].num_chunks == 1
 
 
+@pytest.mark.numpy
 def test_dictionary_array_automatically_read():
     # ARROW-3246
 
@@ -334,10 +339,10 @@ def test_column_of_lists(tempdir):
 def test_large_list_records():
     # This was fixed in PARQUET-1100
 
-    list_lengths = np.random.randint(0, 500, size=50)
-    list_lengths[::10] = 0
+    list_lengths = [random.randint(0, 500) for _ in range(50)]
+    list_lengths[::10] = [0, 0, 0, 0, 0]
 
-    list_values = [list(map(int, np.random.randint(0, 100, size=x)))
+    list_values = [list(map(int, [random.randint(0, 100) for _ in range(x)]))
                    if i % 8 else None
                    for i, x in enumerate(list_lengths)]
 
diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py
index 47e608a1404ff..f68f1aa9cdb46 100644
--- a/python/pyarrow/tests/parquet/test_dataset.py
+++ b/python/pyarrow/tests/parquet/test_dataset.py
@@ -20,7 +20,10 @@
 import os
 import pathlib
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 import unittest.mock as mock
 
diff --git a/python/pyarrow/tests/parquet/test_datetime.py b/python/pyarrow/tests/parquet/test_datetime.py
index 08fb1098322be..b89fd97cb91e6 100644
--- a/python/pyarrow/tests/parquet/test_datetime.py
+++ b/python/pyarrow/tests/parquet/test_datetime.py
@@ -19,7 +19,10 @@
 import io
 import warnings
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa
diff --git a/python/pyarrow/tests/parquet/test_metadata.py b/python/pyarrow/tests/parquet/test_metadata.py
index c29213ebc3d42..14ce9bbfcdd58 100644
--- a/python/pyarrow/tests/parquet/test_metadata.py
+++ b/python/pyarrow/tests/parquet/test_metadata.py
@@ -20,7 +20,10 @@
 from collections import OrderedDict
 import io
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa
@@ -584,7 +587,7 @@ def test_table_large_metadata():
     my_schema = pa.schema([pa.field('f0', 'double')],
                           metadata={'large': 'x' * 10000000})
 
-    table = pa.table([np.arange(10)], schema=my_schema)
+    table = pa.table([range(10)], schema=my_schema)
     _check_roundtrip(table)
 
 
diff --git a/python/pyarrow/tests/parquet/test_pandas.py b/python/pyarrow/tests/parquet/test_pandas.py
index b5913bf5c6b6e..2ea2f46873aef 100644
--- a/python/pyarrow/tests/parquet/test_pandas.py
+++ b/python/pyarrow/tests/parquet/test_pandas.py
@@ -18,7 +18,10 @@
 import io
 import json
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa
diff --git a/python/pyarrow/tests/strategies.py b/python/pyarrow/tests/strategies.py
index db0aa1397123d..7a1b31a4d9d77 100644
--- a/python/pyarrow/tests/strategies.py
+++ b/python/pyarrow/tests/strategies.py
@@ -21,7 +21,10 @@
 import pytest
 import hypothesis as h
 import hypothesis.strategies as st
-import hypothesis.extra.numpy as npst
+try:
+    import hypothesis.extra.numpy as npst
+except ImportError:
+    npst = None
 try:
     import hypothesis.extra.pytz as tzst
 except ImportError:
@@ -35,7 +38,10 @@
         import tzdata  # noqa:F401
     except ImportError:
         zoneinfo = None
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 
diff --git a/python/pyarrow/tests/test_adhoc_memory_leak.py b/python/pyarrow/tests/test_adhoc_memory_leak.py
index cd381cf427dc3..76a766984dab6 100644
--- a/python/pyarrow/tests/test_adhoc_memory_leak.py
+++ b/python/pyarrow/tests/test_adhoc_memory_leak.py
@@ -17,7 +17,10 @@
 
 import pytest
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pyarrow as pa
 
 import pyarrow.tests.util as test_util
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index c44ec3f8e1afe..4160d64829483 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -27,7 +27,10 @@
 import sys
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 import pyarrow.tests.strategies as past
@@ -157,6 +160,7 @@ def test_binary_total_values_length():
     assert large_arr.slice(1, 3).total_values_length == 11
 
 
+@pytest.mark.numpy
 def test_to_numpy_zero_copy():
     arr = pa.array(range(10))
 
@@ -176,6 +180,7 @@ def test_to_numpy_zero_copy():
     np.testing.assert_array_equal(np_arr, expected)
 
 
+@pytest.mark.numpy
 def test_chunked_array_to_numpy_zero_copy():
     elements = [[2, 2, 4], [4, 5, 100]]
 
@@ -191,6 +196,7 @@ def test_chunked_array_to_numpy_zero_copy():
     np.testing.assert_array_equal(np_arr, expected)
 
 
+@pytest.mark.numpy
 def test_to_numpy_unsupported_types():
     # ARROW-2871: Some primitive types are not yet supported in to_numpy
     bool_arr = pa.array([True, False, True])
@@ -217,6 +223,7 @@ def test_to_numpy_unsupported_types():
         arr.to_numpy()
 
 
+@pytest.mark.numpy
 def test_to_numpy_writable():
     arr = pa.array(range(10))
     np_arr = arr.to_numpy()
@@ -234,6 +241,7 @@ def test_to_numpy_writable():
         arr.to_numpy(zero_copy_only=True, writable=True)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('unit', ['s', 'ms', 'us', 'ns'])
 @pytest.mark.parametrize('tz', [None, "UTC"])
 def test_to_numpy_datetime64(unit, tz):
@@ -243,6 +251,7 @@ def test_to_numpy_datetime64(unit, tz):
     np.testing.assert_array_equal(np_arr, expected)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('unit', ['s', 'ms', 'us', 'ns'])
 def test_to_numpy_timedelta64(unit):
     arr = pa.array([1, 2, 3], pa.duration(unit))
@@ -251,6 +260,7 @@ def test_to_numpy_timedelta64(unit):
     np.testing.assert_array_equal(np_arr, expected)
 
 
+@pytest.mark.numpy
 def test_to_numpy_dictionary():
     # ARROW-7591
     arr = pa.array(["a", "b", "a"]).dictionary_encode()
@@ -427,6 +437,11 @@ def test_array_getitem():
         with pytest.raises(IndexError):
             arr[idx]
 
+
+@pytest.mark.numpy
+def test_array_getitem_numpy_scalars():
+    arr = pa.array(range(10, 15))
+    lst = arr.to_pylist()
     # check that numpy scalars are supported
     for idx in range(-len(arr), len(arr)):
         assert arr[np.int32(idx)].as_py() == lst[idx]
@@ -469,9 +484,11 @@ def test_array_slice():
             res.validate()
             expected = arr.to_pylist()[start:stop]
             assert res.to_pylist() == expected
-            assert res.to_numpy().tolist() == expected
+            if np is not None:
+                assert res.to_numpy().tolist() == expected
 
 
+@pytest.mark.numpy
 def test_array_slice_negative_step():
     # ARROW-2714
     np_arr = np.arange(20)
@@ -542,6 +559,7 @@ def test_struct_array_slice():
                                    {'a': 5, 'b': 6.5}]
 
 
+@pytest.mark.numpy
 def test_array_factory_invalid_type():
 
     class MyObject:
@@ -552,6 +570,7 @@ class MyObject:
         pa.array(arr)
 
 
+@pytest.mark.numpy
 def test_array_ref_to_ndarray_base():
     arr = np.array([1, 2, 3])
 
@@ -576,6 +595,7 @@ def test_array_eq():
     assert (arr1 == None) is False  # noqa: E711
 
 
+@pytest.mark.numpy
 def test_array_from_buffers():
     values_buf = pa.py_buffer(np.int16([4, 5, 6, 7]))
     nulls_buf = pa.py_buffer(np.uint8([0b00001101]))
@@ -773,6 +793,7 @@ def test_dictionary_from_buffers(offset):
     assert a[offset:] == b
 
 
+@pytest.mark.numpy
 def test_dictionary_from_numpy():
     indices = np.repeat([0, 1, 2], 2)
     dictionary = np.array(['foo', 'bar', 'baz'], dtype=object)
@@ -795,6 +816,7 @@ def test_dictionary_from_numpy():
             assert d2[i].as_py() == dictionary[indices[i]]
 
 
+@pytest.mark.numpy
 def test_dictionary_to_numpy():
     expected = pa.array(
         ["foo", "bar", None, "foo"]
@@ -865,6 +887,7 @@ def test_dictionary_to_numpy():
     )
 
 
+@pytest.mark.numpy
 def test_dictionary_from_boxed_arrays():
     indices = np.repeat([0, 1, 2], 2)
     dictionary = np.array(['foo', 'bar', 'baz'], dtype=object)
@@ -910,6 +933,7 @@ def test_dictionary_indices():
     arr.indices.validate(full=True)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(('list_array_type', 'list_type_factory'),
                          [(pa.ListArray, pa.list_),
                           (pa.LargeListArray, pa.large_list)])
@@ -1052,6 +1076,7 @@ def test_map_from_dict():
     assert tup_arr.equals(dict_arr)
 
 
+@pytest.mark.numpy
 def test_map_from_arrays():
     offsets_arr = np.array([0, 2, 5, 8], dtype='i4')
     offsets = pa.array(offsets_arr, type='int32')
@@ -1472,6 +1497,7 @@ def _check_cast_case(case, *, safe=True, check_array_construction=True):
         assert in_arr.equals(expected)
 
 
+@pytest.mark.numpy
 def test_cast_integers_safe():
     safe_cases = [
         (np.array([0, 1, 2, 3], dtype='i1'), 'int8',
@@ -1558,6 +1584,7 @@ def test_chunked_array_data_warns():
     assert isinstance(res, pa.ChunkedArray)
 
 
+@pytest.mark.numpy
 def test_cast_integers_unsafe():
     # We let NumPy do the unsafe casting.
     # Note that NEP50 in the NumPy spec no longer allows
@@ -1578,6 +1605,7 @@ def test_cast_integers_unsafe():
         _check_cast_case(case, safe=False)
 
 
+@pytest.mark.numpy
 def test_floating_point_truncate_safe():
     safe_cases = [
         (np.array([1.0, 2.0, 3.0], dtype='float32'), 'float32',
@@ -1591,6 +1619,7 @@ def test_floating_point_truncate_safe():
         _check_cast_case(case, safe=True)
 
 
+@pytest.mark.numpy
 def test_floating_point_truncate_unsafe():
     unsafe_cases = [
         (np.array([1.1, 2.2, 3.3], dtype='float32'), 'float32',
@@ -1635,6 +1664,7 @@ def test_decimal_to_int_safe():
         _check_cast_case(case, safe=True)
 
 
+@pytest.mark.numpy
 def test_decimal_to_int_value_out_of_bounds():
     out_of_bounds_cases = [
         (
@@ -1735,6 +1765,7 @@ def test_decimal_to_decimal():
         result = arr.cast(pa.decimal128(5, 2))
 
 
+@pytest.mark.numpy
 def test_safe_cast_nan_to_int_raises():
     arr = pa.array([np.nan, 1.])
 
@@ -1742,6 +1773,7 @@ def test_safe_cast_nan_to_int_raises():
         arr.cast(pa.int64(), safe=True)
 
 
+@pytest.mark.numpy
 def test_cast_signed_to_unsigned():
     safe_cases = [
         (np.array([0, 1, 2, 3], dtype='i1'), pa.uint8(),
@@ -1992,6 +2024,7 @@ def test_dictionary_decode():
         assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_cast_time32_to_int():
     arr = pa.array(np.array([0, 1, 2], dtype='int32'),
                    type=pa.time32('s'))
@@ -2001,6 +2034,7 @@ def test_cast_time32_to_int():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_cast_time64_to_int():
     arr = pa.array(np.array([0, 1, 2], dtype='int64'),
                    type=pa.time64('us'))
@@ -2010,6 +2044,7 @@ def test_cast_time64_to_int():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_cast_timestamp_to_int():
     arr = pa.array(np.array([0, 1, 2], dtype='int64'),
                    type=pa.timestamp('us'))
@@ -2035,6 +2070,7 @@ def test_cast_date32_to_int():
     assert result2.equals(arr)
 
 
+@pytest.mark.numpy
 def test_cast_duration_to_int():
     arr = pa.array(np.array([0, 1, 2], dtype='int64'),
                    type=pa.duration('us'))
@@ -2044,6 +2080,7 @@ def test_cast_duration_to_int():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_cast_binary_to_utf8():
     binary_arr = pa.array([b'foo', b'bar', b'baz'], type=pa.binary())
     utf8_arr = binary_arr.cast(pa.utf8())
@@ -2064,6 +2101,7 @@ def test_cast_binary_to_utf8():
     assert casted.null_count == 1
 
 
+@pytest.mark.numpy
 def test_cast_date64_to_int():
     arr = pa.array(np.array([0, 1, 2], dtype='int64'),
                    type=pa.date64())
@@ -2146,6 +2184,7 @@ def test_array_pickle_dictionary(pickle_module):
         assert array.equals(result)
 
 
+@pytest.mark.numpy
 @h.settings(suppress_health_check=(h.HealthCheck.too_slow,))
 @h.given(
     past.arrays(
@@ -2177,9 +2216,9 @@ def test_array_pickle_protocol5(data, typ, pickle_module):
         assert result_addresses == addresses
 
 
-@pytest.mark.parametrize(
-    'narr',
-    [
+@pytest.mark.numpy
+def test_to_numpy_roundtrip():
+    for narr in [
         np.arange(10, dtype=np.int64),
         np.arange(10, dtype=np.int32),
         np.arange(10, dtype=np.int16),
@@ -2191,23 +2230,23 @@ def test_array_pickle_protocol5(data, typ, pickle_module):
         np.arange(10, dtype=np.float64),
         np.arange(10, dtype=np.float32),
         np.arange(10, dtype=np.float16),
-    ]
-)
-def test_to_numpy_roundtrip(narr):
-    arr = pa.array(narr)
-    assert narr.dtype == arr.to_numpy().dtype
-    np.testing.assert_array_equal(narr, arr.to_numpy())
-    np.testing.assert_array_equal(narr[:6], arr[:6].to_numpy())
-    np.testing.assert_array_equal(narr[2:], arr[2:].to_numpy())
-    np.testing.assert_array_equal(narr[2:6], arr[2:6].to_numpy())
+    ]:
+        arr = pa.array(narr)
+        assert narr.dtype == arr.to_numpy().dtype
+        np.testing.assert_array_equal(narr, arr.to_numpy())
+        np.testing.assert_array_equal(narr[:6], arr[:6].to_numpy())
+        np.testing.assert_array_equal(narr[2:], arr[2:].to_numpy())
+        np.testing.assert_array_equal(narr[2:6], arr[2:6].to_numpy())
 
 
+@pytest.mark.numpy
 def test_array_uint64_from_py_over_range():
     arr = pa.array([2 ** 63], type=pa.uint64())
     expected = pa.array(np.array([2 ** 63], dtype='u8'))
     assert arr.equals(expected)
 
 
+@pytest.mark.numpy
 def test_array_conversions_no_sentinel_values():
     arr = np.array([1, 2, 3, 4], dtype='int8')
     refcount = sys.getrefcount(arr)
@@ -2249,6 +2288,7 @@ def test_time32_time64_from_integer():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_binary_string_pandas_null_sentinels():
     # ARROW-6227
     def _check_case(ty):
@@ -2259,6 +2299,7 @@ def _check_case(ty):
     _check_case('utf8')
 
 
+@pytest.mark.numpy
 def test_pandas_null_sentinels_raise_error():
     # ARROW-6227
     cases = [
@@ -2299,6 +2340,7 @@ def test_pandas_null_sentinels_index():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_array_roundtrip_from_numpy_datetimeD():
     arr = np.array([None, datetime.date(2017, 4, 4)], dtype='datetime64[D]')
 
@@ -2319,6 +2361,7 @@ def test_array_from_naive_datetimes():
     assert arr.type == pa.timestamp('us', tz=None)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(('dtype', 'type'), [
     ('datetime64[s]', pa.timestamp('s')),
     ('datetime64[ms]', pa.timestamp('ms')),
@@ -2342,6 +2385,7 @@ def test_array_from_numpy_datetime(dtype, type):
     assert arr.equals(expected)
 
 
+@pytest.mark.numpy
 def test_array_from_different_numpy_datetime_units_raises():
     data = [
         None,
@@ -2356,6 +2400,7 @@ def test_array_from_different_numpy_datetime_units_raises():
         pa.array(data)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('unit', ['ns', 'us', 'ms', 's'])
 def test_array_from_list_of_timestamps(unit):
     n = np.datetime64('NaT', unit)
@@ -2370,6 +2415,7 @@ def test_array_from_list_of_timestamps(unit):
     assert a1[0] == a2[0]
 
 
+@pytest.mark.numpy
 def test_array_from_timestamp_with_generic_unit():
     n = np.datetime64('NaT')
     x = np.datetime64('2017-01-01 01:01:01.111111111')
@@ -2380,6 +2426,7 @@ def test_array_from_timestamp_with_generic_unit():
         pa.array([n, x, y])
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(('dtype', 'type'), [
     ('timedelta64[s]', pa.duration('s')),
     ('timedelta64[ms]', pa.duration('ms')),
@@ -2408,6 +2455,7 @@ def test_array_from_numpy_timedelta(dtype, type):
     assert arr.to_pylist() == data
 
 
+@pytest.mark.numpy
 def test_array_from_numpy_timedelta_incorrect_unit():
     # generic (no unit)
     td = np.timedelta64(1)
@@ -2423,6 +2471,7 @@ def test_array_from_numpy_timedelta_incorrect_unit():
             pa.array(data)
 
 
+@pytest.mark.numpy
 def test_array_from_numpy_ascii():
     arr = np.array(['abcde', 'abc', ''], dtype='|S5')
 
@@ -2567,6 +2616,7 @@ def test_interval_array_from_dateoffset():
     assert list(actual_list[0]) == expected_from_pandas
 
 
+@pytest.mark.numpy
 def test_array_from_numpy_unicode():
     dtypes = ['<U5', '>U5']
 
@@ -2599,12 +2649,14 @@ def test_array_from_numpy_unicode():
     assert arrow_arr.equals(expected)
 
 
+@pytest.mark.numpy
 def test_array_string_from_non_string():
     # ARROW-5682 - when converting to string raise on non string-like dtype
     with pytest.raises(TypeError):
         pa.array(np.array([1, 2, 3]), type=pa.string())
 
 
+@pytest.mark.numpy
 def test_array_string_from_all_null():
     # ARROW-5682
     vals = np.array([None, None], dtype=object)
@@ -2619,6 +2671,7 @@ def test_array_string_from_all_null():
     assert arr.null_count == 2
 
 
+@pytest.mark.numpy
 def test_array_from_masked():
     ma = np.ma.array([1, 2, 3, 4], dtype='int64',
                      mask=[False, False, True, False])
@@ -2630,6 +2683,7 @@ def test_array_from_masked():
         pa.array(ma, mask=np.array([True, False, False, False]))
 
 
+@pytest.mark.numpy
 def test_array_from_shrunken_masked():
     ma = np.ma.array([0], dtype='int64')
     result = pa.array(ma)
@@ -2637,6 +2691,7 @@ def test_array_from_shrunken_masked():
     assert expected.equals(result)
 
 
+@pytest.mark.numpy
 def test_array_from_invalid_dim_raises():
     msg = "only handle 1-dimensional arrays"
     arr2d = np.array([[1, 2, 3], [4, 5, 6]])
@@ -2648,6 +2703,7 @@ def test_array_from_invalid_dim_raises():
         pa.array(arr0d)
 
 
+@pytest.mark.numpy
 def test_array_from_strided_bool():
     # ARROW-6325
     arr = np.ones((3, 2), dtype=bool)
@@ -2659,6 +2715,7 @@ def test_array_from_strided_bool():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_array_from_strided():
     pydata = [
         ([b"ab", b"cd", b"ef"], (pa.binary(), pa.binary(2))),
@@ -2683,6 +2740,7 @@ def test_boolean_true_count_false_count():
     assert arr.false_count == 1000
 
 
+@pytest.mark.numpy
 def test_buffers_primitive():
     a = pa.array([1, 2, None, 4], type=pa.int16())
     buffers = a.buffers()
@@ -2755,6 +2813,7 @@ def test_buffers_nested():
     assert struct.unpack('4xh', values) == (43,)
 
 
+@pytest.mark.numpy
 def test_total_buffer_size():
     a = pa.array(np.array([4, 5, 6], dtype='int64'))
     assert a.nbytes == 8 * 3
@@ -3153,6 +3212,7 @@ def test_nested_dictionary_array():
     assert dict_arr2.to_pylist() == ['a', 'b', 'a', 'b', 'a']
 
 
+@pytest.mark.numpy
 def test_array_from_numpy_str_utf8():
     # ARROW-3890 -- in Python 3, NPY_UNICODE arrays are produced, but in Python
     # 2 they are NPY_STRING (binary), so we must do UTF-8 validation
@@ -3179,6 +3239,7 @@ def test_array_from_numpy_str_utf8():
         pa.array(vec, pa.string(), mask=np.array([False]))
 
 
+@pytest.mark.numpy
 @pytest.mark.slow
 @pytest.mark.large_memory
 def test_numpy_binary_overflow_to_chunked():
@@ -3237,6 +3298,7 @@ def test_list_child_overflow_to_chunked():
     assert len(arr.chunk(1)) == 1
 
 
+@pytest.mark.numpy
 def test_infer_type_masked():
     # ARROW-5208
     ty = pa.infer_type(['foo', 'bar', None, 2],
@@ -3252,6 +3314,7 @@ def test_infer_type_masked():
     assert pa.infer_type([], mask=[]) == pa.null()
 
 
+@pytest.mark.numpy
 def test_array_masked():
     # ARROW-5208
     arr = pa.array([4, None, 4, 3.],
@@ -3264,6 +3327,7 @@ def test_array_masked():
     assert arr.type == pa.int64()
 
 
+@pytest.mark.numpy
 def test_array_supported_masks():
     # ARROW-13883
     arr = pa.array([4, None, 4, 3.],
@@ -3322,6 +3386,7 @@ def test_array_supported_pandas_masks():
     assert arr.to_pylist() == [None, 1]
 
 
+@pytest.mark.numpy
 def test_binary_array_masked():
     # ARROW-12431
     masked_basic = pa.array([b'\x05'], type=pa.binary(1),
@@ -3354,6 +3419,7 @@ def test_binary_array_masked():
     assert ([b'aaa', b'bbb', b'ccc']*10) == arrow_array.to_pylist()
 
 
+@pytest.mark.numpy
 def test_binary_array_strided():
     # Masked
     nparray = np.array([b"ab", b"cd", b"ef"])
@@ -3367,6 +3433,7 @@ def test_binary_array_strided():
     assert [b"ab", b"ef"] == arrow_array.to_pylist()
 
 
+@pytest.mark.numpy
 def test_array_invalid_mask_raises():
     # ARROW-10742
     cases = [
@@ -3400,6 +3467,7 @@ def test_array_from_large_pyints():
         pa.array([int(2 ** 63)])
 
 
+@pytest.mark.numpy
 def test_numpy_array_protocol():
     # test the __array__ method on pyarrow.Array
     arr = pa.array([1, 2, 3])
@@ -3446,6 +3514,7 @@ def test_numpy_array_protocol():
     assert result.dtype == "float64"
 
 
+@pytest.mark.numpy
 def test_array_protocol():
 
     class MyArray:
@@ -3769,6 +3838,7 @@ def test_run_end_encoded_from_buffers():
                                            1, offset, children)
 
 
+@pytest.mark.numpy
 def test_run_end_encoded_from_array_with_type():
     run_ends = [1, 3, 6]
     values = [1, 2, 3]
@@ -3808,6 +3878,7 @@ def test_run_end_encoded_from_array_with_type():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_run_end_encoded_to_numpy():
     arr = [1, 2, 2, 3, 3, 3]
     ree_array = pa.array(arr, pa.run_end_encoded(pa.int32(), pa.int64()))
@@ -4023,6 +4094,7 @@ def test_list_view_slice(list_view_type):
     assert sliced_array[0].as_py() == sliced_array.values[i:j].to_pylist() == [4]
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('numpy_native_dtype', ['u2', 'i4', 'f8'])
 def test_swapped_byte_order_fails(numpy_native_dtype):
     # ARROW-39129
diff --git a/python/pyarrow/tests/test_builder.py b/python/pyarrow/tests/test_builder.py
index abc8a0013df37..9187a19b5fc24 100644
--- a/python/pyarrow/tests/test_builder.py
+++ b/python/pyarrow/tests/test_builder.py
@@ -15,10 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import math
 import weakref
 
-import numpy as np
-
 import pyarrow as pa
 from pyarrow.lib import StringBuilder, StringViewBuilder
 
@@ -35,7 +34,7 @@ def test_string_builder_append():
     sbuilder = StringBuilder()
     sbuilder.append(b"a byte string")
     sbuilder.append("a string")
-    sbuilder.append(np.nan)
+    sbuilder.append(math.nan)
     sbuilder.append(None)
     assert len(sbuilder) == 4
     assert sbuilder.null_count == 2
@@ -50,7 +49,7 @@ def test_string_builder_append():
 
 def test_string_builder_append_values():
     sbuilder = StringBuilder()
-    sbuilder.append_values([np.nan, None, "text", None, "other text"])
+    sbuilder.append_values([math.nan, None, "text", None, "other text"])
     assert sbuilder.null_count == 3
     arr = sbuilder.finish()
     assert arr.null_count == 3
@@ -60,7 +59,7 @@ def test_string_builder_append_values():
 
 def test_string_builder_append_after_finish():
     sbuilder = StringBuilder()
-    sbuilder.append_values([np.nan, None, "text", None, "other text"])
+    sbuilder.append_values([math.nan, None, "text", None, "other text"])
     arr = sbuilder.finish()
     sbuilder.append("No effect")
     expected = [None, None, "text", None, "other text"]
@@ -72,7 +71,7 @@ def test_string_view_builder():
     builder.append(b"a byte string")
     builder.append("a string")
     builder.append("a longer not-inlined string")
-    builder.append(np.nan)
+    builder.append(math.nan)
     builder.append_values([None, "text"])
     assert len(builder) == 6
     assert builder.null_count == 2
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 64fe7f1deb510..c16d2f9aacf74 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -28,7 +28,10 @@
 import sys
 import textwrap
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 try:
     import pandas as pd
@@ -44,27 +47,6 @@
 except ImportError:
     pas = None
 
-all_array_types = [
-    ('bool', [True, False, False, True, True]),
-    ('uint8', np.arange(5)),
-    ('int8', np.arange(5)),
-    ('uint16', np.arange(5)),
-    ('int16', np.arange(5)),
-    ('uint32', np.arange(5)),
-    ('int32', np.arange(5)),
-    ('uint64', np.arange(5, 10)),
-    ('int64', np.arange(5, 10)),
-    ('float', np.arange(0, 0.5, 0.1)),
-    ('double', np.arange(0, 0.5, 0.1)),
-    ('string', ['a', 'b', None, 'ddd', 'ee']),
-    ('binary', [b'a', b'b', b'c', b'ddd', b'ee']),
-    (pa.binary(3), [b'abc', b'bcd', b'cde', b'def', b'efg']),
-    (pa.list_(pa.int8()), [[1, 2], [3, 4], [5, 6], None, [9, 16]]),
-    (pa.large_list(pa.int16()), [[1], [2, 3, 4], [5, 6], None, [9, 16]]),
-    (pa.struct([('a', pa.int8()), ('b', pa.int8())]), [
-        {'a': 1, 'b': 2}, None, {'a': 3, 'b': 4}, None, {'a': 5, 'b': 6}]),
-]
-
 exported_functions = [
     func for (name, func) in sorted(pc.__dict__.items())
     if hasattr(func, '__arrow_compute_function__')]
@@ -87,6 +69,28 @@
 ]
 
 
+all_array_types = [
+    ('bool', [True, False, False, True, True]),
+    ('uint8', range(5)),
+    ('int8', range(5)),
+    ('uint16', range(5)),
+    ('int16', range(5)),
+    ('uint32', range(5)),
+    ('int32', range(5)),
+    ('uint64', range(5, 10)),
+    ('int64', range(5, 10)),
+    ('float', [0, 0.1, 0.2, 0.3, 0.4]),
+    ('double', [0, 0.1, 0.2, 0.3, 0.4]),
+    ('string', ['a', 'b', None, 'ddd', 'ee']),
+    ('binary', [b'a', b'b', b'c', b'ddd', b'ee']),
+    (pa.binary(3), [b'abc', b'bcd', b'cde', b'def', b'efg']),
+    (pa.list_(pa.int8()), [[1, 2], [3, 4], [5, 6], None, [9, 16]]),
+    (pa.large_list(pa.int16()), [[1], [2, 3, 4], [5, 6], None, [9, 16]]),
+    (pa.struct([('a', pa.int8()), ('b', pa.int8())]), [
+        {'a': 1, 'b': 2}, None, {'a': 3, 'b': 4}, None, {'a': 5, 'b': 6}]),
+]
+
+
 def test_exported_functions():
     # Check that all exported concrete functions can be called with
     # the right number of arguments.
@@ -263,6 +267,7 @@ def test_get_function_hash_aggregate():
                         pc.HashAggregateKernel, 1)
 
 
+@pytest.mark.numpy
 def test_call_function_with_memory_pool():
     arr = pa.array(["foo", "bar", "baz"])
     indices = np.array([2, 2, 1])
@@ -1172,7 +1177,7 @@ def test_take_on_chunked_array():
         ]
     ])
 
-    indices = np.array([0, 5, 1, 6, 9, 2])
+    indices = pa.array([0, 5, 1, 6, 9, 2])
     result = arr.take(indices)
     expected = pa.chunked_array([["a", "f", "b", "g", "j", "c"]])
     assert result.equals(expected)
@@ -1304,12 +1309,6 @@ def test_filter(ty, values):
     result.validate()
     assert result.equals(pa.array([values[0], values[3], None], type=ty))
 
-    # same test with different array type
-    mask = np.array([True, False, False, True, None])
-    result = arr.filter(mask, null_selection_behavior='drop')
-    result.validate()
-    assert result.equals(pa.array([values[0], values[3]], type=ty))
-
     # non-boolean dtype
     mask = pa.array([0, 1, 0, 1, 0])
     with pytest.raises(NotImplementedError):
@@ -1321,6 +1320,17 @@ def test_filter(ty, values):
         arr.filter(mask)
 
 
+@pytest.mark.numpy
+@pytest.mark.parametrize(('ty', 'values'), all_array_types)
+def test_filter_numpy_array_mask(ty, values):
+    arr = pa.array(values, type=ty)
+    # same test as test_filter with different array type
+    mask = np.array([True, False, False, True, None])
+    result = arr.filter(mask, null_selection_behavior='drop')
+    result.validate()
+    assert result.equals(pa.array([values[0], values[3]], type=ty))
+
+
 def test_filter_chunked_array():
     arr = pa.chunked_array([["a", None], ["c", "d", "e"]])
     expected_drop = pa.chunked_array([["a"], ["e"]])
@@ -1586,9 +1596,11 @@ def test_round_to_integer(ty):
     for round_mode, expected in rmode_and_expected.items():
         options = RoundOptions(round_mode=round_mode)
         result = round(values, options=options)
-        np.testing.assert_array_equal(result, pa.array(expected))
+        expected_array = pa.array(expected, type=pa.float64())
+        assert expected_array.equals(result)
 
 
+@pytest.mark.numpy
 def test_round():
     values = [320, 3.5, 3.075, 4.5, -3.212, -35.1234, -3.045, None]
     ndigits_and_expected = {
@@ -1607,6 +1619,7 @@ def test_round():
         assert pc.round(values, ndigits, "half_towards_infinity") == result
 
 
+@pytest.mark.numpy
 def test_round_to_multiple():
     values = [320, 3.5, 3.075, 4.5, -3.212, -35.1234, -3.045, None]
     multiple_and_expected = {
@@ -1670,7 +1683,7 @@ def test_is_null():
     expected = pa.chunked_array([[True, True], [True, False]])
     assert result.equals(expected)
 
-    arr = pa.array([1, 2, 3, None, np.nan])
+    arr = pa.array([1, 2, 3, None, float("nan")])
     result = arr.is_null()
     expected = pa.array([False, False, False, True, False])
     assert result.equals(expected)
@@ -1681,7 +1694,7 @@ def test_is_null():
 
 
 def test_is_nan():
-    arr = pa.array([1, 2, 3, None, np.nan])
+    arr = pa.array([1, 2, 3, None, float("nan")])
     result = arr.is_nan()
     expected = pa.array([False, False, False, None, True])
     assert result.equals(expected)
@@ -1986,6 +1999,7 @@ def check_cast_float_to_decimal(float_ty, float_val, decimal_ty, decimal_ctx,
 
 
 # Cannot test float32 as case generators above assume float64
+@pytest.mark.numpy
 @pytest.mark.parametrize('float_ty', [pa.float64()], ids=str)
 @pytest.mark.parametrize('decimal_ty', decimal_type_traits,
                          ids=lambda v: v.name)
@@ -2003,6 +2017,7 @@ def test_cast_float_to_decimal(float_ty, decimal_ty, case_generator):
                 ctx, decimal_ty.max_precision)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('float_ty', [pa.float32(), pa.float64()], ids=str)
 @pytest.mark.parametrize('decimal_traits', decimal_type_traits,
                          ids=lambda v: v.name)
@@ -2402,7 +2417,7 @@ def _check_temporal_rounding(ts, values, unit):
         "millisecond": "s",
         "second": "min",
         "minute": "h",
-        "hour": "d",
+        "hour": "D",
     }
     ta = pa.array(ts)
 
@@ -2908,6 +2923,7 @@ def test_min_max_element_wise():
     assert result == pa.array([1, 2, None])
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('start', (1.25, 10.5, -10.5))
 @pytest.mark.parametrize('skip_nulls', (True, False))
 def test_cumulative_sum(start, skip_nulls):
@@ -2962,6 +2978,7 @@ def test_cumulative_sum(start, skip_nulls):
             pc.cumulative_sum([1, 2, 3], start=strt)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('start', (1.25, 10.5, -10.5))
 @pytest.mark.parametrize('skip_nulls', (True, False))
 def test_cumulative_prod(start, skip_nulls):
@@ -3016,6 +3033,7 @@ def test_cumulative_prod(start, skip_nulls):
             pc.cumulative_prod([1, 2, 3], start=strt)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('start', (0.5, 3.5, 6.5))
 @pytest.mark.parametrize('skip_nulls', (True, False))
 def test_cumulative_max(start, skip_nulls):
@@ -3073,6 +3091,7 @@ def test_cumulative_max(start, skip_nulls):
             pc.cumulative_max([1, 2, 3], start=strt)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('start', (0.5, 3.5, 6.5))
 @pytest.mark.parametrize('skip_nulls', (True, False))
 def test_cumulative_min(start, skip_nulls):
@@ -3407,6 +3426,7 @@ def create_sample_expressions():
 # Tests the Arrow-specific serialization mechanism
 
 
+@pytest.mark.numpy
 def test_expression_serialization_arrow(pickle_module):
     for expr in create_sample_expressions()["all"]:
         assert isinstance(expr, pc.Expression)
@@ -3414,6 +3434,7 @@ def test_expression_serialization_arrow(pickle_module):
         assert expr.equals(restored)
 
 
+@pytest.mark.numpy
 @pytest.mark.substrait
 def test_expression_serialization_substrait():
 
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index 6140163a8ee8c..c3589877e6423 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -23,8 +23,11 @@
 import re
 
 import hypothesis as h
-import numpy as np
 import pytest
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 from pyarrow.pandas_compat import _pandas_api  # noqa
 import pyarrow as pa
@@ -32,17 +35,17 @@
 
 
 int_type_pairs = [
-    (np.int8, pa.int8()),
-    (np.int16, pa.int16()),
-    (np.int32, pa.int32()),
-    (np.int64, pa.int64()),
-    (np.uint8, pa.uint8()),
-    (np.uint16, pa.uint16()),
-    (np.uint32, pa.uint32()),
-    (np.uint64, pa.uint64())]
+    ("int8", pa.int8()),
+    ("int16", pa.int16()),
+    ("int32", pa.int32()),
+    ("int64", pa.int64()),
+    ("uint8", pa.uint8()),
+    ("uint16", pa.uint16()),
+    ("uint32", pa.uint32()),
+    ("uint64", pa.uint64())]
 
 
-np_int_types, pa_int_types = zip(*int_type_pairs)
+np_str_int_types, pa_int_types = zip(*int_type_pairs)
 
 
 class StrangeIterable:
@@ -174,7 +177,9 @@ def _as_set(xs):
     return set(xs)
 
 
-SEQUENCE_TYPES = [_as_list, _as_tuple, _as_numpy_array]
+SEQUENCE_TYPES = [_as_list, _as_tuple]
+if np is not None:
+    SEQUENCE_TYPES.append(_as_numpy_array)
 ITERABLE_TYPES = [_as_set, _as_dict_values] + SEQUENCE_TYPES
 COLLECTIONS_TYPES = [_as_deque] + ITERABLE_TYPES
 
@@ -217,6 +222,7 @@ def test_sequence_boolean(seq):
     assert arr.to_pylist() == expected
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 def test_sequence_numpy_boolean(seq):
     expected = [np.bool_(True), None, np.bool_(False), None]
@@ -225,6 +231,7 @@ def test_sequence_numpy_boolean(seq):
     assert arr.to_pylist() == [True, None, False, None]
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 def test_sequence_mixed_numpy_python_bools(seq):
     values = np.array([True, False])
@@ -278,11 +285,14 @@ def test_list_with_non_list(seq):
 
 
 @parametrize_with_sequence_types
+@pytest.mark.parametrize(
+    "inner_seq", SEQUENCE_TYPES
+)
 @pytest.mark.parametrize("factory", [
     pa.list_, pa.large_list, pa.list_view, pa.large_list_view])
-def test_nested_arrays(seq, factory):
-    arr = pa.array(seq([np.array([], dtype=np.int64),
-                        np.array([1, 2], dtype=np.int64), None]),
+def test_nested_arrays(seq, inner_seq, factory):
+    arr = pa.array(seq([inner_seq([]),
+                        inner_seq([1, 2]), None]),
                    type=factory(pa.int64()))
     assert len(arr) == 3
     assert arr.null_count == 1
@@ -290,6 +300,7 @@ def test_nested_arrays(seq, factory):
     assert arr.to_pylist() == [[], [1, 2], None]
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 def test_nested_fixed_size_list(seq):
     # sequence of lists
@@ -334,10 +345,12 @@ def test_sequence_all_none(seq):
     assert arr.to_pylist() == [None, None]
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 @pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
 def test_sequence_integer(seq, np_scalar_pa_type):
-    np_scalar, pa_type = np_scalar_pa_type
+    np_str_scalar, pa_type = np_scalar_pa_type
+    np_scalar = getattr(np, np_str_scalar)
     expected = [1, None, 3, None,
                 np.iinfo(np_scalar).min, np.iinfo(np_scalar).max]
     arr = pa.array(seq(expected), type=pa_type)
@@ -347,12 +360,12 @@ def test_sequence_integer(seq, np_scalar_pa_type):
     assert arr.to_pylist() == expected
 
 
+@pytest.mark.numpy
 @parametrize_with_collections_types
-@pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
-def test_sequence_integer_np_nan(seq, np_scalar_pa_type):
+@pytest.mark.parametrize("pa_type", pa_int_types)
+def test_sequence_integer_np_nan(seq, pa_type):
     # ARROW-2806: numpy.nan is a double value and thus should produce
     # a double array.
-    _, pa_type = np_scalar_pa_type
     with pytest.raises(ValueError):
         pa.array(seq([np.nan]), type=pa_type, from_pandas=False)
 
@@ -364,12 +377,12 @@ def test_sequence_integer_np_nan(seq, np_scalar_pa_type):
     assert arr.to_pylist() == expected
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
-@pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
-def test_sequence_integer_nested_np_nan(seq, np_scalar_pa_type):
+@pytest.mark.parametrize("pa_type", pa_int_types)
+def test_sequence_integer_nested_np_nan(seq, pa_type):
     # ARROW-2806: numpy.nan is a double value and thus should produce
     # a double array.
-    _, pa_type = np_scalar_pa_type
     with pytest.raises(ValueError):
         pa.array(seq([[np.nan]]), type=pa.list_(pa_type), from_pandas=False)
 
@@ -391,10 +404,12 @@ def test_sequence_integer_inferred(seq):
     assert arr.to_pylist() == expected
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 @pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
 def test_sequence_numpy_integer(seq, np_scalar_pa_type):
-    np_scalar, pa_type = np_scalar_pa_type
+    np_str_scalar, pa_type = np_scalar_pa_type
+    np_scalar = getattr(np, np_str_scalar)
     expected = [np_scalar(1), None, np_scalar(3), None,
                 np_scalar(np.iinfo(np_scalar).min),
                 np_scalar(np.iinfo(np_scalar).max)]
@@ -405,10 +420,12 @@ def test_sequence_numpy_integer(seq, np_scalar_pa_type):
     assert arr.to_pylist() == expected
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 @pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
 def test_sequence_numpy_integer_inferred(seq, np_scalar_pa_type):
-    np_scalar, pa_type = np_scalar_pa_type
+    np_str_scalar, pa_type = np_scalar_pa_type
+    np_scalar = getattr(np, np_str_scalar)
     expected = [np_scalar(1), None, np_scalar(3), None]
     expected += [np_scalar(np.iinfo(np_scalar).min),
                  np_scalar(np.iinfo(np_scalar).max)]
@@ -434,6 +451,7 @@ def test_broken_integers(seq):
         pa.array(seq(data), type=pa.int64())
 
 
+@pytest.mark.numpy
 def test_numpy_scalars_mixed_type():
     # ARROW-4324
     data = [np.int32(10), np.float32(0.5)]
@@ -448,6 +466,7 @@ def test_numpy_scalars_mixed_type():
     assert arr.equals(expected)
 
 
+@pytest.mark.numpy
 @pytest.mark.xfail(reason="Type inference for uint64 not implemented",
                    raises=OverflowError)
 def test_uint64_max_convert():
@@ -491,7 +510,7 @@ def test_integer_from_string_error(seq, typ):
 
 def test_convert_with_mask():
     data = [1, 2, 3, 4, 5]
-    mask = np.array([False, True, False, False, True])
+    mask = [False, True, False, False, True]
 
     result = pa.array(data, mask=mask)
     expected = pa.array([1, None, 3, 4, None])
@@ -559,6 +578,7 @@ def test_double_integer_coerce_representable_range():
         pa.array(invalid_values2)
 
 
+@pytest.mark.numpy
 def test_float32_integer_coerce_representable_range():
     f32 = np.float32
     valid_values = [f32(1.5), 1 << 24, -(1 << 24)]
@@ -587,14 +607,16 @@ def test_mixed_sequence_errors():
         pa.array([1.5, 'foo'])
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
-@pytest.mark.parametrize("np_scalar,pa_type", [
-    (np.float16, pa.float16()),
-    (np.float32, pa.float32()),
-    (np.float64, pa.float64())
+@pytest.mark.parametrize("np_str_scalar,pa_type", [
+    ("float16", pa.float16()),
+    ("float32", pa.float32()),
+    ("float64", pa.float64())
 ])
 @pytest.mark.parametrize("from_pandas", [True, False])
-def test_sequence_numpy_double(seq, np_scalar, pa_type, from_pandas):
+def test_sequence_numpy_double(seq, np_str_scalar, pa_type, from_pandas):
+    np_scalar = getattr(np, np_str_scalar)
     data = [np_scalar(1.5), np_scalar(1), None, np_scalar(2.5), None, np.nan]
     arr = pa.array(seq(data), from_pandas=from_pandas)
     assert len(arr) == 6
@@ -616,27 +638,29 @@ def test_sequence_numpy_double(seq, np_scalar, pa_type, from_pandas):
         assert np.isnan(arr.to_pylist()[5])
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize("from_pandas", [True, False])
-@pytest.mark.parametrize("inner_seq", [np.array, list])
-def test_ndarray_nested_numpy_double(from_pandas, inner_seq):
+def test_ndarray_nested_numpy_double(from_pandas):
     # ARROW-2806
-    data = np.array([
-        inner_seq([1., 2.]),
-        inner_seq([1., 2., 3.]),
-        inner_seq([np.nan]),
-        None
-    ], dtype=object)
-    arr = pa.array(data, from_pandas=from_pandas)
-    assert len(arr) == 4
-    assert arr.null_count == 1
-    assert arr.type == pa.list_(pa.float64())
-    if from_pandas:
-        assert arr.to_pylist() == [[1.0, 2.0], [1.0, 2.0, 3.0], [None], None]
-    else:
-        np.testing.assert_equal(arr.to_pylist(),
-                                [[1., 2.], [1., 2., 3.], [np.nan], None])
+    for inner_seq in (np.array, list):
+        data = np.array([
+            inner_seq([1., 2.]),
+            inner_seq([1., 2., 3.]),
+            inner_seq([np.nan]),
+            None
+        ], dtype=object)
+        arr = pa.array(data, from_pandas=from_pandas)
+        assert len(arr) == 4
+        assert arr.null_count == 1
+        assert arr.type == pa.list_(pa.float64())
+        if from_pandas:
+            assert arr.to_pylist() == [[1.0, 2.0], [1.0, 2.0, 3.0], [None], None]
+        else:
+            np.testing.assert_equal(arr.to_pylist(),
+                                    [[1., 2.], [1., 2., 3.], [np.nan], None])
 
 
+@pytest.mark.numpy
 def test_nested_ndarray_in_object_array():
     # ARROW-4350
     arr = np.empty(2, dtype=object)
@@ -664,6 +688,7 @@ def test_nested_ndarray_in_object_array():
     assert result.to_pylist() == [[[1], [2]], [[1], [2]]]
 
 
+@pytest.mark.numpy
 @pytest.mark.xfail(reason=("Type inference for multidimensional ndarray "
                            "not yet implemented"),
                    raises=AssertionError)
@@ -682,6 +707,7 @@ def test_multidimensional_ndarray_as_nested_list():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(('data', 'value_type'), [
     ([True, False], pa.bool_()),
     ([None, None], pa.null()),
@@ -711,6 +737,7 @@ def test_list_array_from_object_ndarray(data, value_type):
     assert arr.to_pylist() == [data]
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(('data', 'value_type'), [
     ([[1, 2], [3]], pa.list_(pa.int64())),
     ([[1, 2], [3, 4]], pa.list_(pa.int64(), 2)),
@@ -730,13 +757,14 @@ def test_array_ignore_nan_from_pandas():
     # See ARROW-4324, this reverts logic that was introduced in
     # ARROW-2240
     with pytest.raises(ValueError):
-        pa.array([np.nan, 'str'])
+        pa.array([float("nan"), 'str'])
 
-    arr = pa.array([np.nan, 'str'], from_pandas=True)
+    arr = pa.array([float("nan"), 'str'], from_pandas=True)
     expected = pa.array([None, 'str'])
     assert arr.equals(expected)
 
 
+@pytest.mark.numpy
 def test_nested_ndarray_different_dtypes():
     data = [
         np.array([1, 2, 3], dtype='int64'),
@@ -1238,6 +1266,7 @@ def test_sequence_timestamp_out_of_bounds_nanosecond():
     assert arr.to_pylist()[0] == datetime.datetime(2262, 4, 12)
 
 
+@pytest.mark.numpy
 def test_sequence_numpy_timestamp():
     data = [
         np.datetime64(datetime.datetime(2007, 7, 13, 1, 23, 34, 123456)),
@@ -1407,14 +1436,25 @@ class CustomClass():
             pa.array([1, CustomClass()], type=ty)
 
 
-@pytest.mark.parametrize('np_scalar', [True, False])
-def test_sequence_duration(np_scalar):
+def test_sequence_duration():
     td1 = datetime.timedelta(2, 3601, 1)
     td2 = datetime.timedelta(1, 100, 1000)
-    if np_scalar:
-        data = [np.timedelta64(td1), None, np.timedelta64(td2)]
-    else:
-        data = [td1, None, td2]
+    data = [td1, None, td2]
+
+    arr = pa.array(data)
+    assert len(arr) == 3
+    assert arr.type == pa.duration('us')
+    assert arr.null_count == 1
+    assert arr[0].as_py() == td1
+    assert arr[1].as_py() is None
+    assert arr[2].as_py() == td2
+
+
+@pytest.mark.numpy
+def test_sequence_duration_np_scalar():
+    td1 = datetime.timedelta(2, 3601, 1)
+    td2 = datetime.timedelta(1, 100, 1000)
+    data = [np.timedelta64(td1), None, np.timedelta64(td2)]
 
     arr = pa.array(data)
     assert len(arr) == 3
@@ -1480,6 +1520,7 @@ def test_sequence_duration_nested_lists_with_explicit_type(factory):
     assert arr.to_pylist() == data
 
 
+@pytest.mark.numpy
 def test_sequence_duration_nested_lists_numpy():
     td1 = datetime.timedelta(1, 1, 1000)
     td2 = datetime.timedelta(1, 100)
@@ -1769,6 +1810,7 @@ def test_struct_from_dicts_bytes_keys():
     ]
 
 
+@pytest.mark.numpy
 def test_struct_from_tuples():
     ty = pa.struct([pa.field('a', pa.int32()),
                     pa.field('b', pa.string()),
@@ -1915,6 +1957,7 @@ def test_struct_from_mixed_sequence():
         pa.array(data, type=ty)
 
 
+@pytest.mark.numpy
 def test_struct_from_dicts_inference():
     expected_type = pa.struct([pa.field('a', pa.int64()),
                                pa.field('b', pa.string()),
@@ -1992,7 +2035,7 @@ def test_structarray_from_arrays_coerce():
 
 
 def test_decimal_array_with_none_and_nan():
-    values = [decimal.Decimal('1.234'), None, np.nan, decimal.Decimal('nan')]
+    values = [decimal.Decimal('1.234'), None, float("nan"), decimal.Decimal('nan')]
 
     with pytest.raises(TypeError):
         # ARROW-6227: Without from_pandas=True, NaN is considered a float
@@ -2215,6 +2258,7 @@ def test_roundtrip_nanosecond_resolution_pandas_temporal_objects():
     ]
 
 
+@pytest.mark.numpy
 @h.given(past.all_arrays)
 def test_array_to_pylist_roundtrip(arr):
     seq = arr.to_pylist()
@@ -2498,6 +2542,7 @@ def test_array_accepts_pyarrow_scalar(seq, data, scalar_data, value_type):
     assert expect.equals(result)
 
 
+@pytest.mark.numpy
 @parametrize_with_collections_types
 def test_array_accepts_pyarrow_scalar_errors(seq):
     sequence = seq([pa.scalar(1), pa.scalar("a"), pa.scalar(3.0)])
diff --git a/python/pyarrow/tests/test_cpp_internals.py b/python/pyarrow/tests/test_cpp_internals.py
index 83800b77f894b..7508d8f0b9816 100644
--- a/python/pyarrow/tests/test_cpp_internals.py
+++ b/python/pyarrow/tests/test_cpp_internals.py
@@ -18,6 +18,8 @@
 import os.path
 from os.path import join as pjoin
 
+import pytest
+
 from pyarrow._pyarrow_cpp_tests import get_cpp_tests
 
 
@@ -26,10 +28,16 @@ def inject_cpp_tests(ns):
     Inject C++ tests as Python functions into namespace `ns` (a dict).
     """
     for case in get_cpp_tests():
+
         def wrapper(case=case):
             case()
         wrapper.__name__ = wrapper.__qualname__ = case.name
         wrapper.__module__ = ns['__name__']
+        # Add numpy or pandas marks if the test requires it
+        if 'numpy' in case.name:
+            wrapper = pytest.mark.numpy(wrapper)
+        elif 'pandas' in case.name:
+            wrapper = pytest.mark.pandas(wrapper)
         ns[case.name] = wrapper
 
 
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 112129d9602ed..dcf96f68c4da7 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -24,6 +24,7 @@
 import io
 import itertools
 import os
+import random
 import select
 import shutil
 import signal
@@ -36,8 +37,6 @@
 
 import pytest
 
-import numpy as np
-
 import pyarrow as pa
 from pyarrow.csv import (
     open_csv, read_csv, ReadOptions, ParseOptions, ConvertOptions, ISO8601,
@@ -54,18 +53,32 @@ def generate_col_names():
             yield first + second
 
 
+def split_rows(arr, num_cols, num_rows):
+    # Split a num_cols x num_rows array into rows
+    for i in range(0, num_rows * num_cols, num_cols):
+        yield arr[i:i + num_cols]
+
+
+def split_columns(arr, num_cols, num_rows):
+    # Split a num_cols x num_rows array into columns
+    for i in range(0, num_cols):
+        yield arr[i::num_cols]
+
+
 def make_random_csv(num_cols=2, num_rows=10, linesep='\r\n', write_names=True):
-    arr = np.random.RandomState(42).randint(0, 1000, size=(num_cols, num_rows))
+    rnd = random.Random(42)
+    arr = [rnd.randint(0, 1000) for _ in range(num_cols * num_rows)]
     csv = io.StringIO()
     col_names = list(itertools.islice(generate_col_names(), num_cols))
     if write_names:
         csv.write(",".join(col_names))
         csv.write(linesep)
-    for row in arr.T:
+    for row in split_rows(arr, num_cols, num_rows):
         csv.write(",".join(map(str, row)))
         csv.write(linesep)
     csv = csv.getvalue().encode()
-    columns = [pa.array(a, type=pa.int64()) for a in arr]
+    columns = [pa.array(row, type=pa.int64())
+               for row in split_columns(arr, num_cols, num_rows)]
     expected = pa.Table.from_arrays(columns, col_names)
     return csv, expected
 
@@ -127,6 +140,25 @@ def __ne__(self, other):
                 other.result != self.result)
 
 
+def test_split_rows_and_columns_utility():
+    num_cols = 5
+    num_rows = 2
+    arr = [x for x in range(1, 11)]
+    rows = list(split_rows(arr, num_cols, num_rows))
+    assert rows == [
+        [1, 2, 3, 4, 5],
+        [6, 7, 8, 9, 10]
+    ]
+    columns = list(split_columns(arr, num_cols, num_rows))
+    assert columns == [
+        [1, 6],
+        [2, 7],
+        [3, 8],
+        [4, 9],
+        [5, 10]
+    ]
+
+
 def test_read_options(pickle_module):
     cls = ReadOptions
     opts = cls()
@@ -520,6 +552,7 @@ def test_skip_rows_after_names(self):
             assert (values[opts.skip_rows + opts.skip_rows_after_names:] ==
                     table_dict[name])
 
+    @pytest.mark.numpy
     def test_row_number_offset_in_errors(self):
         # Row numbers are only correctly counted in serial reads
         def format_msg(msg_format, row, *args):
@@ -1802,6 +1835,7 @@ def test_header_skip_rows(self):
         with pytest.raises(StopIteration):
             assert reader.read_next_batch()
 
+    @pytest.mark.numpy
     def test_skip_rows_after_names(self):
         super().test_skip_rows_after_names()
 
diff --git a/python/pyarrow/tests/test_cuda.py b/python/pyarrow/tests/test_cuda.py
index d55be651b1571..a71fa036503d7 100644
--- a/python/pyarrow/tests/test_cuda.py
+++ b/python/pyarrow/tests/test_cuda.py
@@ -26,7 +26,10 @@
 import pytest
 
 import pyarrow as pa
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    pytestmark = pytest.mark.numpy
 
 
 cuda = pytest.importorskip("pyarrow.cuda")
diff --git a/python/pyarrow/tests/test_cuda_numba_interop.py b/python/pyarrow/tests/test_cuda_numba_interop.py
index ff1722d278d5e..876f3c7f761cf 100644
--- a/python/pyarrow/tests/test_cuda_numba_interop.py
+++ b/python/pyarrow/tests/test_cuda_numba_interop.py
@@ -17,7 +17,10 @@
 
 import pytest
 import pyarrow as pa
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    pytestmark = pytest.mark.numpy
 
 dtypes = ['uint8', 'int16', 'float32']
 cuda = pytest.importorskip("pyarrow.cuda")
diff --git a/python/pyarrow/tests/test_cython.py b/python/pyarrow/tests/test_cython.py
index 0eeae5d65f7d5..937d927f831b0 100644
--- a/python/pyarrow/tests/test_cython.py
+++ b/python/pyarrow/tests/test_cython.py
@@ -80,6 +80,9 @@ def check_cython_example_module(mod):
         mod.cast_scalar(scal, pa.list_(pa.int64()))
 
 
+# NumPy is still a required build dependency. It is present in our
+# headers and is required to build for the cython tests.
+@pytest.mark.numpy
 @pytest.mark.cython
 def test_cython_api(tmpdir):
     """
@@ -162,6 +165,7 @@ def test_cython_api(tmpdir):
                               env=subprocess_env)
 
 
+@pytest.mark.numpy
 @pytest.mark.cython
 def test_visit_strings(tmpdir):
     with tmpdir.as_cwd():
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 3b0284bcb74a6..276cd2e78db37 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -20,6 +20,7 @@
 import os
 import pathlib
 import posixpath
+import random
 import sys
 import tempfile
 import textwrap
@@ -28,7 +29,10 @@
 from shutil import copytree
 from urllib.parse import quote
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa
@@ -684,8 +688,8 @@ def test_partitioning():
 
     # test partitioning roundtrip
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))],
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a'] * 10 + ['b'] * 10)],
         names=["f1", "f2", "part"]
     )
     partitioning_schema = pa.schema([("part", pa.string())])
@@ -2494,7 +2498,7 @@ def _create_partitioned_dataset(basedir):
         pq.write_table(table.slice(3*i, 3), part / "test.parquet")
 
     full_table = table.append_column(
-        "part", pa.array(np.repeat([0, 1, 2], 3), type=pa.int32()))
+        "part", pa.array([0] * 3 + [1] * 3 + [2] * 3, type=pa.int32()))
 
     return full_table, path
 
@@ -2532,7 +2536,7 @@ def test_open_dataset_partitioned_directory(tempdir, dataset_reader, pickle_modu
 
     result = dataset.to_table()
     expected = table.append_column(
-        "part", pa.array(np.repeat([0, 1, 2], 3), type=pa.int8()))
+        "part", pa.array([0] * 3 + [1] * 3 + [2] * 3, type=pa.int8()))
     assert result.equals(expected)
 
 
@@ -3567,7 +3571,7 @@ def _create_parquet_dataset_simple(root_path):
     metadata_collector = []
 
     for i in range(4):
-        table = pa.table({'f1': [i] * 10, 'f2': np.random.randn(10)})
+        table = pa.table({'f1': [i] * 10, 'f2': [random.random() for _ in range(10)]})
         pq.write_to_dataset(
             table, str(root_path), metadata_collector=metadata_collector
         )
@@ -4255,7 +4259,7 @@ def compare_tables_ignoring_order(t1, t2):
 
 
 def _generate_random_int_array(size=4, min=1, max=10):
-    return np.random.randint(min, max, size)
+    return [random.randint(min, max) for _ in range(size)]
 
 
 def _generate_data_and_columns(num_of_columns, num_of_records):
@@ -4513,8 +4517,8 @@ def file_visitor(written_file):
 
 def test_write_table(tempdir):
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a'] * 10 + ['b'] * 10)
     ], names=["f1", "f2", "part"])
 
     base_dir = tempdir / 'single'
@@ -4560,8 +4564,8 @@ def file_visitor(written_file):
 
 def test_write_table_multiple_fragments(tempdir):
     table = pa.table([
-        pa.array(range(10)), pa.array(np.random.randn(10)),
-        pa.array(np.repeat(['a', 'b'], 5))
+        pa.array(range(10)), pa.array(random.random() for _ in range(10)),
+        pa.array(['a'] * 5 + ['b'] * 5)
     ], names=["f1", "f2", "part"])
     table = pa.concat_tables([table]*2)
 
@@ -4596,8 +4600,8 @@ def test_write_table_multiple_fragments(tempdir):
 
 def test_write_iterable(tempdir):
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a'] * 10 + ['b'] * 10)
     ], names=["f1", "f2", "part"])
 
     base_dir = tempdir / 'inmemory_iterable'
@@ -4618,8 +4622,8 @@ def test_write_iterable(tempdir):
 
 def test_write_scanner(tempdir, dataset_reader):
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a'] * 10 + ['b'] * 10)
     ], names=["f1", "f2", "part"])
     dataset = ds.dataset(table)
 
@@ -4647,7 +4651,7 @@ def test_write_table_partitioned_dict(tempdir):
     # specifying the dictionary values explicitly
     table = pa.table([
         pa.array(range(20)),
-        pa.array(np.repeat(['a', 'b'], 10)).dictionary_encode(),
+        pa.array(['a'] * 10 + ['b'] * 10).dictionary_encode(),
     ], names=['col', 'part'])
 
     partitioning = ds.partitioning(table.select(["part"]).schema)
@@ -4666,6 +4670,7 @@ def test_write_table_partitioned_dict(tempdir):
     assert result.equals(table)
 
 
+@pytest.mark.numpy
 @pytest.mark.parquet
 def test_write_dataset_parquet(tempdir):
     table = pa.table([
@@ -4712,8 +4717,8 @@ def test_write_dataset_parquet(tempdir):
 
 def test_write_dataset_csv(tempdir):
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a'] * 10 + ['b'] * 10)
     ], names=["f1", "f2", "chr1"])
 
     base_dir = tempdir / 'csv_dataset'
@@ -4739,8 +4744,8 @@ def test_write_dataset_csv(tempdir):
 @pytest.mark.parquet
 def test_write_dataset_parquet_file_visitor(tempdir):
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a'] * 10 + ['b'] * 10)
     ], names=["f1", "f2", "part"])
 
     visitor_called = False
@@ -4763,7 +4768,7 @@ def test_partition_dataset_parquet_file_visitor(tempdir):
     f1_vals = [item for chunk in range(4) for item in [chunk] * 10]
     f2_vals = [item*10 for chunk in range(4) for item in [chunk] * 10]
     table = pa.table({'f1': f1_vals, 'f2': f2_vals,
-                      'part': np.repeat(['a', 'b'], 20)})
+                      'part': ['a'] * 20 + ['b'] * 20})
 
     root_path = tempdir / 'partitioned'
     partitioning = ds.partitioning(
@@ -4841,8 +4846,8 @@ def test_write_dataset_s3(s3_example_simple):
     )
 
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))],
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a'] * 10 + ['b'] * 10)],
         names=["f1", "f2", "part"]
     )
     part = ds.partitioning(pa.schema([("part", pa.string())]), flavor="hive")
@@ -4918,8 +4923,8 @@ def test_write_dataset_s3_put_only(s3_server):
     _configure_s3_limited_user(s3_server, _minio_put_only_policy)
 
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))],
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a']*10 + ['b'] * 10)],
         names=["f1", "f2", "part"]
     )
     part = ds.partitioning(pa.schema([("part", pa.string())]), flavor="hive")
diff --git a/python/pyarrow/tests/test_dataset_encryption.py b/python/pyarrow/tests/test_dataset_encryption.py
index 0d8b4a152ab9f..eb79121b1cdbe 100644
--- a/python/pyarrow/tests/test_dataset_encryption.py
+++ b/python/pyarrow/tests/test_dataset_encryption.py
@@ -17,7 +17,7 @@
 
 import base64
 from datetime import timedelta
-import numpy as np
+import random
 import pyarrow.fs as fs
 import pyarrow as pa
 
@@ -187,7 +187,10 @@ def unwrap_key(self, wrapped_key: bytes, _: str) -> bytes:
 
     row_count = 2**15 + 1
     table = pa.Table.from_arrays(
-        [pa.array(np.random.rand(row_count), type=pa.float32())], names=["foo"]
+        [pa.array(
+            [random.random() for _ in range(row_count)],
+            type=pa.float32()
+        )], names=["foo"]
     )
 
     kms_config = pe.KmsConnectionConfig()
diff --git a/python/pyarrow/tests/test_dlpack.py b/python/pyarrow/tests/test_dlpack.py
index 7cf3f4acdbd40..a18accb1e21df 100644
--- a/python/pyarrow/tests/test_dlpack.py
+++ b/python/pyarrow/tests/test_dlpack.py
@@ -19,12 +19,20 @@
 from functools import wraps
 import pytest
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 from pyarrow.vendored.version import Version
 
 
+# Marks all of the tests in this module
+# Ignore these with pytest ... -m 'not numpy'
+pytestmark = pytest.mark.numpy
+
+
 def PyCapsule_IsValid(capsule, name):
     return ctypes.pythonapi.PyCapsule_IsValid(ctypes.py_object(capsule), name) == 1
 
@@ -52,45 +60,45 @@ def wrapper(*args, **kwargs):
 
 @check_bytes_allocated
 @pytest.mark.parametrize(
-    ('value_type', 'np_type'),
+    ('value_type', 'np_type_str'),
     [
-        (pa.uint8(), np.uint8),
-        (pa.uint16(), np.uint16),
-        (pa.uint32(), np.uint32),
-        (pa.uint64(), np.uint64),
-        (pa.int8(), np.int8),
-        (pa.int16(), np.int16),
-        (pa.int32(), np.int32),
-        (pa.int64(), np.int64),
-        (pa.float16(), np.float16),
-        (pa.float32(), np.float32),
-        (pa.float64(), np.float64),
+        (pa.uint8(), "uint8"),
+        (pa.uint16(), "uint16"),
+        (pa.uint32(), "uint32"),
+        (pa.uint64(), "uint64"),
+        (pa.int8(), "int8"),
+        (pa.int16(), "int16"),
+        (pa.int32(), "int32"),
+        (pa.int64(), "int64"),
+        (pa.float16(), "float16"),
+        (pa.float32(), "float32"),
+        (pa.float64(), "float64"),
     ]
 )
-def test_dlpack(value_type, np_type):
+def test_dlpack(value_type, np_type_str):
     if Version(np.__version__) < Version("1.24.0"):
         pytest.skip("No dlpack support in numpy versions older than 1.22.0, "
                     "strict keyword in assert_array_equal added in numpy version "
                     "1.24.0")
 
-    expected = np.array([1, 2, 3], dtype=np_type)
+    expected = np.array([1, 2, 3], dtype=np.dtype(np_type_str))
     arr = pa.array(expected, type=value_type)
     check_dlpack_export(arr, expected)
 
     arr_sliced = arr.slice(1, 1)
-    expected = np.array([2], dtype=np_type)
+    expected = np.array([2], dtype=np.dtype(np_type_str))
     check_dlpack_export(arr_sliced, expected)
 
     arr_sliced = arr.slice(0, 1)
-    expected = np.array([1], dtype=np_type)
+    expected = np.array([1], dtype=np.dtype(np_type_str))
     check_dlpack_export(arr_sliced, expected)
 
     arr_sliced = arr.slice(1)
-    expected = np.array([2, 3], dtype=np_type)
+    expected = np.array([2, 3], dtype=np.dtype(np_type_str))
     check_dlpack_export(arr_sliced, expected)
 
     arr_zero = pa.array([], type=value_type)
-    expected = np.array([], dtype=np_type)
+    expected = np.array([], dtype=np.dtype(np_type_str))
     check_dlpack_export(arr_zero, expected)
 
 
diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py
index 0d50c467e96bd..b74eca75bdca9 100644
--- a/python/pyarrow/tests/test_extension_type.py
+++ b/python/pyarrow/tests/test_extension_type.py
@@ -23,12 +23,15 @@
 from uuid import uuid4, UUID
 import sys
 
-import numpy as np
+import pytest
+try:
+    import numpy as np
+except ImportError:
+    np = None
+
 import pyarrow as pa
 from pyarrow.vendored.version import Version
 
-import pytest
-
 
 @contextlib.contextmanager
 def registered_extension_type(ext_type):
@@ -95,18 +98,21 @@ def __arrow_ext_deserialize__(cls, storage_type, serialized):
         return cls()
 
 
-class UuidScalarType(pa.ExtensionScalar):
+class ExampleUuidScalarType(pa.ExtensionScalar):
     def as_py(self):
         return None if self.value is None else UUID(bytes=self.value.as_py())
 
 
-class UuidType(pa.ExtensionType):
+class ExampleUuidType(pa.ExtensionType):
 
     def __init__(self):
-        super().__init__(pa.binary(16), 'pyarrow.tests.UuidType')
+        super().__init__(pa.binary(16), 'pyarrow.tests.ExampleUuidType')
+
+    def __reduce__(self):
+        return ExampleUuidType, ()
 
     def __arrow_ext_scalar_class__(self):
-        return UuidScalarType
+        return ExampleUuidScalarType
 
     def __arrow_ext_serialize__(self):
         return b''
@@ -116,10 +122,10 @@ def __arrow_ext_deserialize__(cls, storage_type, serialized):
         return cls()
 
 
-class UuidType2(pa.ExtensionType):
+class ExampleUuidType2(pa.ExtensionType):
 
     def __init__(self):
-        super().__init__(pa.binary(16), 'pyarrow.tests.UuidType2')
+        super().__init__(pa.binary(16), 'pyarrow.tests.ExampleUuidType2')
 
     def __arrow_ext_serialize__(self):
         return b''
@@ -250,8 +256,8 @@ def ipc_read_batch(buf):
 
 
 def test_ext_type_basics():
-    ty = UuidType()
-    assert ty.extension_name == "pyarrow.tests.UuidType"
+    ty = ExampleUuidType()
+    assert ty.extension_name == "pyarrow.tests.ExampleUuidType"
 
 
 def test_ext_type_str():
@@ -267,16 +273,16 @@ def test_ext_type_repr():
 
 
 def test_ext_type_lifetime():
-    ty = UuidType()
+    ty = ExampleUuidType()
     wr = weakref.ref(ty)
     del ty
     assert wr() is None
 
 
 def test_ext_type_storage_type():
-    ty = UuidType()
+    ty = ExampleUuidType()
     assert ty.storage_type == pa.binary(16)
-    assert ty.__class__ is UuidType
+    assert ty.__class__ is ExampleUuidType
     ty = ParamExtType(5)
     assert ty.storage_type == pa.binary(5)
     assert ty.__class__ is ParamExtType
@@ -284,7 +290,7 @@ def test_ext_type_storage_type():
 
 def test_ext_type_byte_width():
     # Test for fixed-size binary types
-    ty = UuidType()
+    ty = pa.uuid()
     assert ty.byte_width == 16
     ty = ParamExtType(5)
     assert ty.byte_width == 5
@@ -297,7 +303,7 @@ def test_ext_type_byte_width():
 
 def test_ext_type_bit_width():
     # Test for fixed-size binary types
-    ty = UuidType()
+    ty = pa.uuid()
     assert ty.bit_width == 128
     ty = ParamExtType(5)
     assert ty.bit_width == 40
@@ -309,7 +315,7 @@ def test_ext_type_bit_width():
 
 
 def test_ext_type_as_py():
-    ty = UuidType()
+    ty = ExampleUuidType()
     expected = uuid4()
     scalar = pa.ExtensionScalar.from_storage(ty, expected.bytes)
     assert scalar.as_py() == expected
@@ -342,12 +348,22 @@ def test_ext_type_as_py():
 
 def test_uuid_type_pickle(pickle_module):
     for proto in range(0, pickle_module.HIGHEST_PROTOCOL + 1):
-        ty = UuidType()
+        ty = ExampleUuidType()
+        ser = pickle_module.dumps(ty, protocol=proto)
+        del ty
+        ty = pickle_module.loads(ser)
+        wr = weakref.ref(ty)
+        assert ty.extension_name == "pyarrow.tests.ExampleUuidType"
+        del ty
+        assert wr() is None
+
+    for proto in range(0, pickle_module.HIGHEST_PROTOCOL + 1):
+        ty = pa.uuid()
         ser = pickle_module.dumps(ty, protocol=proto)
         del ty
         ty = pickle_module.loads(ser)
         wr = weakref.ref(ty)
-        assert ty.extension_name == "pyarrow.tests.UuidType"
+        assert ty.extension_name == "arrow.uuid"
         del ty
         assert wr() is None
 
@@ -358,8 +374,8 @@ def test_ext_type_equality():
     c = ParamExtType(6)
     assert a != b
     assert b == c
-    d = UuidType()
-    e = UuidType()
+    d = ExampleUuidType()
+    e = ExampleUuidType()
     assert a != d
     assert d == e
 
@@ -403,7 +419,7 @@ def test_ext_array_equality():
     storage1 = pa.array([b"0123456789abcdef"], type=pa.binary(16))
     storage2 = pa.array([b"0123456789abcdef"], type=pa.binary(16))
     storage3 = pa.array([], type=pa.binary(16))
-    ty1 = UuidType()
+    ty1 = ExampleUuidType()
     ty2 = ParamExtType(16)
 
     a = pa.ExtensionArray.from_storage(ty1, storage1)
@@ -451,9 +467,9 @@ def test_ext_scalar_from_array():
     data = [b"0123456789abcdef", b"0123456789abcdef",
             b"zyxwvutsrqponmlk", None]
     storage = pa.array(data, type=pa.binary(16))
-    ty1 = UuidType()
+    ty1 = ExampleUuidType()
     ty2 = ParamExtType(16)
-    ty3 = UuidType2()
+    ty3 = ExampleUuidType2()
 
     a = pa.ExtensionArray.from_storage(ty1, storage)
     b = pa.ExtensionArray.from_storage(ty2, storage)
@@ -462,9 +478,9 @@ def test_ext_scalar_from_array():
     scalars_a = list(a)
     assert len(scalars_a) == 4
 
-    assert ty1.__arrow_ext_scalar_class__() == UuidScalarType
-    assert isinstance(a[0], UuidScalarType)
-    assert isinstance(scalars_a[0], UuidScalarType)
+    assert ty1.__arrow_ext_scalar_class__() == ExampleUuidScalarType
+    assert isinstance(a[0], ExampleUuidScalarType)
+    assert isinstance(scalars_a[0], ExampleUuidScalarType)
 
     for s, val in zip(scalars_a, data):
         assert isinstance(s, pa.ExtensionScalar)
@@ -505,7 +521,7 @@ def test_ext_scalar_from_array():
 
 
 def test_ext_scalar_from_storage():
-    ty = UuidType()
+    ty = ExampleUuidType()
 
     s = pa.ExtensionScalar.from_storage(ty, None)
     assert isinstance(s, pa.ExtensionScalar)
@@ -549,6 +565,7 @@ def test_ext_array_pickling(pickle_module):
         assert arr.storage.to_pylist() == [b"foo", b"bar"]
 
 
+@pytest.mark.numpy
 def test_ext_array_conversion_to_numpy():
     storage1 = pa.array([1, 2, 3], type=pa.int64())
     storage2 = pa.array([b"123", b"456", b"789"], type=pa.binary(3))
@@ -606,6 +623,7 @@ def struct_w_ext_data():
     return [sarr1, sarr2]
 
 
+@pytest.mark.numpy
 def test_struct_w_ext_array_to_numpy(struct_w_ext_data):
     # ARROW-15291
     # Check that we don't segfault when trying to build
@@ -706,14 +724,14 @@ def test_cast_between_extension_types():
     tiny_int_arr.cast(pa.int64()).cast(IntegerType())
 
     # Between the same extension types is okay
-    array = pa.array([b'1' * 16, b'2' * 16], pa.binary(16)).cast(UuidType())
-    out = array.cast(UuidType())
-    assert out.type == UuidType()
+    array = pa.array([b'1' * 16, b'2' * 16], pa.binary(16)).cast(ExampleUuidType())
+    out = array.cast(ExampleUuidType())
+    assert out.type == ExampleUuidType()
 
     # Will still fail casting between extensions who share storage type,
     # can only cast between exactly the same extension types.
     with pytest.raises(TypeError, match='Casting from *'):
-        array.cast(UuidType2())
+        array.cast(ExampleUuidType2())
 
 
 def test_cast_to_extension_with_extension_storage():
@@ -744,10 +762,10 @@ def test_cast_nested_extension_types(data, type_factory):
 
 def test_casting_dict_array_to_extension_type():
     storage = pa.array([b"0123456789abcdef"], type=pa.binary(16))
-    arr = pa.ExtensionArray.from_storage(UuidType(), storage)
+    arr = pa.ExtensionArray.from_storage(ExampleUuidType(), storage)
     dict_arr = pa.DictionaryArray.from_arrays(pa.array([0, 0], pa.int32()),
                                               arr)
-    out = dict_arr.cast(UuidType())
+    out = dict_arr.cast(ExampleUuidType())
     assert isinstance(out, pa.ExtensionArray)
     assert out.to_pylist() == [UUID('30313233-3435-3637-3839-616263646566'),
                                UUID('30313233-3435-3637-3839-616263646566')]
@@ -1220,6 +1238,7 @@ def test_parquet_extension_nested_in_extension(tmpdir):
             assert table == orig_table
 
 
+@pytest.mark.numpy
 def test_to_numpy():
     period_type = PeriodType('D')
     storage = pa.array([1, 2, 3, 4], pa.int64())
@@ -1272,7 +1291,11 @@ def test_empty_take():
     (["cat", "dog", "horse"], LabelType)
 ))
 @pytest.mark.parametrize(
-    "into", ["to_numpy", pytest.param("to_pandas", marks=pytest.mark.pandas)])
+    "into", [
+        pytest.param("to_numpy", marks=pytest.mark.numpy),
+        pytest.param("to_pandas", marks=pytest.mark.pandas)
+    ]
+)
 def test_extension_array_to_numpy_pandas(data, ty, into):
     storage = pa.array(data)
     ext_arr = pa.ExtensionArray.from_storage(ty(), storage)
@@ -1288,6 +1311,7 @@ def test_extension_array_to_numpy_pandas(data, ty, into):
         assert np.array_equal(result, expected)
 
 
+@pytest.mark.numpy
 def test_array_constructor():
     ext_type = IntegerType()
     storage = pa.array([1, 2, 3], type=pa.int64())
@@ -1320,6 +1344,7 @@ def test_array_constructor_from_pandas():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 @pytest.mark.cython
 def test_cpp_extension_in_python(tmpdir):
     from .test_cython import (
@@ -1347,7 +1372,7 @@ def test_cpp_extension_in_python(tmpdir):
     mod = __import__('extensions')
 
     uuid_type = mod._make_uuid_type()
-    assert uuid_type.extension_name == "uuid"
+    assert uuid_type.extension_name == "example-uuid"
     assert uuid_type.storage_type == pa.binary(16)
 
     array = mod._make_uuid_array()
@@ -1356,6 +1381,31 @@ def test_cpp_extension_in_python(tmpdir):
     assert array[0].as_py() == b'abcdefghijklmno0'
     assert array[1].as_py() == b'0onmlkjihgfedcba'
 
+    buf = ipc_write_batch(pa.RecordBatch.from_arrays([array], ["example-uuid"]))
+
+    batch = ipc_read_batch(buf)
+    reconstructed_array = batch.column(0)
+    assert reconstructed_array.type == uuid_type
+    assert reconstructed_array == array
+
+
+def test_uuid_extension():
+    data = [b"0123456789abcdef", b"0123456789abcdef",
+            b"zyxwvutsrqponmlk", None]
+
+    uuid_type = pa.uuid()
+    assert uuid_type.extension_name == "arrow.uuid"
+    assert uuid_type.storage_type == pa.binary(16)
+    assert uuid_type.__class__ is pa.UuidType
+
+    storage = pa.array(data, pa.binary(16))
+    array = pa.ExtensionArray.from_storage(uuid_type, storage)
+    assert array.type == uuid_type
+
+    assert array.to_pylist() == [x if x is None else UUID(bytes=x) for x in data]
+    assert array[0].as_py() == UUID(bytes=data[0])
+    assert array[3].as_py() is None
+
     buf = ipc_write_batch(pa.RecordBatch.from_arrays([array], ["uuid"]))
 
     batch = ipc_read_batch(buf)
@@ -1363,6 +1413,9 @@ def test_cpp_extension_in_python(tmpdir):
     assert reconstructed_array.type == uuid_type
     assert reconstructed_array == array
 
+    assert uuid_type.__arrow_ext_scalar_class__() == pa.UuidScalar
+    assert isinstance(array[0], pa.UuidScalar)
+
 
 def test_tensor_type():
     tensor_type = pa.fixed_shape_tensor(pa.int8(), [2, 3])
@@ -1389,38 +1442,45 @@ def test_tensor_type():
     assert tensor_type.permutation is None
 
 
-@pytest.mark.parametrize("value_type", (np.int8(), np.int64(), np.float32()))
-def test_tensor_class_methods(value_type):
+@pytest.mark.numpy
+@pytest.mark.parametrize("np_type_str", ("int8", "int64", "float32"))
+def test_tensor_class_methods(np_type_str):
     from numpy.lib.stride_tricks import as_strided
-    arrow_type = pa.from_numpy_dtype(value_type)
+    arrow_type = pa.from_numpy_dtype(np.dtype(np_type_str))
 
     tensor_type = pa.fixed_shape_tensor(arrow_type, [2, 3])
     storage = pa.array([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
                        pa.list_(arrow_type, 6))
     arr = pa.ExtensionArray.from_storage(tensor_type, storage)
     expected = np.array(
-        [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], dtype=value_type)
+        [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]],
+        dtype=np.dtype(np_type_str)
+    )
     np.testing.assert_array_equal(arr.to_tensor(), expected)
     np.testing.assert_array_equal(arr.to_numpy_ndarray(), expected)
 
-    expected = np.array([[[7, 8, 9], [10, 11, 12]]], dtype=value_type)
+    expected = np.array([[[7, 8, 9], [10, 11, 12]]], dtype=np.dtype(np_type_str))
     result = arr[1:].to_numpy_ndarray()
     np.testing.assert_array_equal(result, expected)
 
     values = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]
-    flat_arr = np.array(values[0], dtype=value_type)
-    bw = value_type.itemsize
+    flat_arr = np.array(values[0], dtype=np.dtype(np_type_str))
+    bw = np.dtype(np_type_str).itemsize
     storage = pa.array(values, pa.list_(arrow_type, 12))
 
     tensor_type = pa.fixed_shape_tensor(arrow_type, [2, 2, 3], permutation=[0, 1, 2])
     result = pa.ExtensionArray.from_storage(tensor_type, storage)
     expected = np.array(
-        [[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]], dtype=value_type)
+        [[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]],
+        dtype=np.dtype(np_type_str)
+    )
     np.testing.assert_array_equal(result.to_numpy_ndarray(), expected)
 
     result = flat_arr.reshape(1, 2, 3, 2)
     expected = np.array(
-        [[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]]], dtype=value_type)
+        [[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]]],
+        dtype=np.dtype(np_type_str)
+    )
     np.testing.assert_array_equal(result, expected)
 
     tensor_type = pa.fixed_shape_tensor(arrow_type, [2, 2, 3], permutation=[0, 2, 1])
@@ -1441,25 +1501,27 @@ def test_tensor_class_methods(value_type):
     assert result.to_tensor().strides == (12 * bw, 1 * bw, 6 * bw, 2 * bw)
 
 
-@pytest.mark.parametrize("value_type", (np.int8(), np.int64(), np.float32()))
-def test_tensor_array_from_numpy(value_type):
+@pytest.mark.numpy
+@pytest.mark.parametrize("np_type_str", ("int8", "int64", "float32"))
+def test_tensor_array_from_numpy(np_type_str):
     from numpy.lib.stride_tricks import as_strided
-    arrow_type = pa.from_numpy_dtype(value_type)
+    arrow_type = pa.from_numpy_dtype(np.dtype(np_type_str))
 
     arr = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]],
-                   dtype=value_type, order="C")
+                   dtype=np.dtype(np_type_str), order="C")
     tensor_array_from_numpy = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
     assert isinstance(tensor_array_from_numpy.type, pa.FixedShapeTensorType)
     assert tensor_array_from_numpy.type.value_type == arrow_type
     assert tensor_array_from_numpy.type.shape == [2, 3]
 
     arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]],
-                   dtype=value_type, order="F")
+                   dtype=np.dtype(np_type_str), order="F")
     with pytest.raises(ValueError, match="First stride needs to be largest"):
         pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
 
-    flat_arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=value_type)
-    bw = value_type.itemsize
+    flat_arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
+                        dtype=np.dtype(np_type_str))
+    bw = np.dtype(np_type_str).itemsize
 
     arr = flat_arr.reshape(1, 3, 4)
     tensor_array_from_numpy = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
@@ -1477,23 +1539,26 @@ def test_tensor_array_from_numpy(value_type):
     arr = flat_arr.reshape(1, 2, 3, 2)
     result = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
     expected = np.array(
-        [[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]]], dtype=value_type)
+        [[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]]],
+        dtype=np.dtype(np_type_str)
+    )
     np.testing.assert_array_equal(result.to_numpy_ndarray(), expected)
 
-    arr = np.array([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]], dtype=value_type)
+    arr = np.array([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
+                   dtype=np.dtype(np_type_str))
     expected = arr[1:]
     result = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)[1:].to_numpy_ndarray()
     np.testing.assert_array_equal(result, expected)
 
-    arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=value_type)
+    arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=np.dtype(np_type_str))
     with pytest.raises(ValueError, match="Cannot convert 1D array or scalar to fixed"):
         pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
 
-    arr = np.array(1, dtype=value_type)
+    arr = np.array(1, dtype=np.dtype(np_type_str))
     with pytest.raises(ValueError, match="Cannot convert 1D array or scalar to fixed"):
         pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
 
-    arr = np.array([], dtype=value_type)
+    arr = np.array([], dtype=np.dtype(np_type_str))
 
     with pytest.raises(ValueError, match="Cannot convert 1D array or scalar to fixed"):
         pa.FixedShapeTensorArray.from_numpy_ndarray(arr.reshape((0)))
@@ -1505,6 +1570,7 @@ def test_tensor_array_from_numpy(value_type):
         pa.FixedShapeTensorArray.from_numpy_ndarray(arr.reshape((3, 0, 2)))
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize("tensor_type", (
     pa.fixed_shape_tensor(pa.int8(), [2, 2, 3]),
     pa.fixed_shape_tensor(pa.int8(), [2, 2, 3], permutation=[0, 2, 1]),
@@ -1760,6 +1826,7 @@ def test_bool8_to_bool_conversion():
     assert bool_arr.cast(pa.bool8()) == canonical_bool8_arr
 
 
+@pytest.mark.numpy
 def test_bool8_to_numpy_conversion():
     arr = pa.ExtensionArray.from_storage(
         pa.bool8(),
@@ -1800,6 +1867,7 @@ def test_bool8_to_numpy_conversion():
     assert arr_to_np_writable.ctypes.data != arr_no_nulls.buffers()[1].address
 
 
+@pytest.mark.numpy
 def test_bool8_from_numpy_conversion():
     np_arr_no_nulls = np.array([True, False, True, True], dtype=np.bool_)
     canonical_bool8_arr_no_nulls = pa.ExtensionArray.from_storage(
diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
index 0064006489088..18c8cd5b654e6 100644
--- a/python/pyarrow/tests/test_feather.py
+++ b/python/pyarrow/tests/test_feather.py
@@ -23,7 +23,10 @@
 import hypothesis as h
 import hypothesis.strategies as st
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 import pyarrow.tests.strategies as past
@@ -135,6 +138,7 @@ def f():
     pytest.raises(exc, f)
 
 
+@pytest.mark.numpy
 def test_dataset(version):
     num_values = (100, 100)
     num_files = 5
@@ -354,6 +358,7 @@ def test_buffer_bounds_error(version):
         _check_arrow_roundtrip(table)
 
 
+@pytest.mark.numpy
 def test_boolean_object_nulls(version):
     repeats = 100
     table = pa.Table.from_arrays(
@@ -540,6 +545,7 @@ def test_read_columns(version):
                             columns=['boo', 'woo'])
 
 
+@pytest.mark.numpy
 def test_overwritten_file(version):
     path = random_path()
     TEST_FILES.append(path)
@@ -675,6 +681,7 @@ def test_v2_compression_options():
         write_feather(df, buf, compression='snappy')
 
 
+@pytest.mark.numpy
 def test_v2_lz4_default_compression():
     # ARROW-8750: Make sure that the compression=None option selects lz4 if
     # it's available
@@ -807,6 +814,7 @@ def test_nested_types(compression):
     _check_arrow_roundtrip(table, compression=compression)
 
 
+@pytest.mark.numpy
 @h.given(past.all_tables, st.sampled_from(["uncompressed", "lz4", "zstd"]))
 def test_roundtrip(table, compression):
     _check_arrow_roundtrip(table, compression=compression)
diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py
index 832c6a2dbdf9f..029a2695b9fd8 100644
--- a/python/pyarrow/tests/test_flight.py
+++ b/python/pyarrow/tests/test_flight.py
@@ -28,7 +28,10 @@
 import traceback
 import json
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 import pyarrow as pa
 
@@ -1588,6 +1591,7 @@ def test_flight_do_put_metadata():
                 assert idx == server_idx
 
 
+@pytest.mark.numpy
 def test_flight_do_put_limit():
     """Try a simple do_put call with a size limit."""
     large_batch = pa.RecordBatch.from_arrays([
diff --git a/python/pyarrow/tests/test_gdb.py b/python/pyarrow/tests/test_gdb.py
index 0d12d710dcf64..2ac2f55754fe5 100644
--- a/python/pyarrow/tests/test_gdb.py
+++ b/python/pyarrow/tests/test_gdb.py
@@ -409,7 +409,7 @@ def test_types_stack(gdb_arrow):
 
     check_stack_repr(
         gdb_arrow, "uuid_type",
-        ('arrow::ExtensionType "extension<uuid>" '
+        ('arrow::ExtensionType "extension<arrow.uuid>" '
          'with storage type arrow::fixed_size_binary(16)'))
 
 
@@ -447,7 +447,7 @@ def test_types_heap(gdb_arrow):
 
     check_heap_repr(
         gdb_arrow, "heap_uuid_type",
-        ('arrow::ExtensionType "extension<uuid>" '
+        ('arrow::ExtensionType "extension<arrow.uuid>" '
          'with storage type arrow::fixed_size_binary(16)'))
 
 
@@ -716,12 +716,12 @@ def test_scalars_stack(gdb_arrow):
 
     check_stack_repr(
         gdb_arrow, "extension_scalar",
-        ('arrow::ExtensionScalar of type "extension<uuid>", '
+        ('arrow::ExtensionScalar of type "extension<arrow.uuid>", '
          'value arrow::FixedSizeBinaryScalar of size 16, '
          'value "0123456789abcdef"'))
     check_stack_repr(
         gdb_arrow, "extension_scalar_null",
-        'arrow::ExtensionScalar of type "extension<uuid>", null value')
+        'arrow::ExtensionScalar of type "extension<arrow.uuid>", null value')
 
 
 def test_scalars_heap(gdb_arrow):
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index ef499a3a8d76c..e2df1b1c46835 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -25,11 +25,15 @@
 import os
 import pathlib
 import pytest
+import random
 import sys
 import tempfile
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 from pyarrow.util import guid
 from pyarrow import Codec
@@ -464,6 +468,7 @@ def test_buffer_hex(val, expected_hex_buffer):
     assert buf.hex() == expected_hex_buffer
 
 
+@pytest.mark.numpy
 def test_buffer_to_numpy():
     # Make sure creating a numpy array from an arrow buffer works
     byte_array = bytearray(20)
@@ -476,6 +481,7 @@ def test_buffer_to_numpy():
     assert array.base == buf
 
 
+@pytest.mark.numpy
 def test_buffer_from_numpy():
     # C-contiguous
     arr = np.arange(12, dtype=np.int8).reshape((3, 4))
@@ -493,6 +499,7 @@ def test_buffer_from_numpy():
         buf = pa.py_buffer(arr.T[::2])
 
 
+@pytest.mark.numpy
 def test_buffer_address():
     b1 = b'some data!'
     b2 = bytearray(b1)
@@ -513,6 +520,7 @@ def test_buffer_address():
     assert buf.address == arr.ctypes.data
 
 
+@pytest.mark.numpy
 def test_buffer_equals():
     # Buffer.equals() returns true iff the buffers have the same contents
     def eq(a, b):
@@ -624,6 +632,7 @@ def test_buffer_hashing():
         hash(pa.py_buffer(b'123'))
 
 
+@pytest.mark.numpy
 def test_buffer_protocol_respects_immutability():
     # ARROW-3228; NumPy's frombuffer ctor determines whether a buffer-like
     # object is mutable by first attempting to get a mutable buffer using
@@ -635,6 +644,7 @@ def test_buffer_protocol_respects_immutability():
     assert not numpy_ref.flags.writeable
 
 
+@pytest.mark.numpy
 def test_foreign_buffer():
     obj = np.array([1, 2], dtype=np.int32)
     addr = obj.__array_interface__["data"][0]
@@ -669,6 +679,7 @@ def test_allocate_buffer_resizable():
     assert buf.size == 200
 
 
+@pytest.mark.numpy
 def test_non_cpu_buffer(pickle_module):
     cuda = pytest.importorskip("pyarrow.cuda")
     ctx = cuda.Context(0)
@@ -798,6 +809,7 @@ def test_cache_options_pickling(pickle_module):
         assert pickle_module.loads(pickle_module.dumps(option)) == option
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize("compression", [
     pytest.param(
         "bz2", marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)
@@ -838,6 +850,7 @@ def test_compress_decompress(compression):
         pa.decompress(compressed_bytes, codec=compression)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize("compression", [
     pytest.param(
         "bz2", marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)
@@ -996,6 +1009,7 @@ def make_buffer(bytes_obj):
     assert refcount_before == sys.getrefcount(val)
 
 
+@pytest.mark.numpy
 def test_nativefile_write_memoryview():
     f = pa.BufferOutputStream()
     data = b'ok'
@@ -1058,8 +1072,8 @@ def test_mock_output_stream():
 @pytest.fixture
 def sample_disk_data(request, tmpdir):
     SIZE = 4096
-    arr = np.random.randint(0, 256, size=SIZE).astype('u1')
-    data = arr.tobytes()[:SIZE]
+    arr = [random.randint(0, 255) for _ in range(SIZE)]
+    data = bytes(arr[:SIZE])
 
     path = os.path.join(str(tmpdir), guid())
 
@@ -1146,8 +1160,8 @@ def test_memory_map_writer(tmpdir):
     if sys.platform == "emscripten":
         pytest.xfail("Multiple memory maps to same file don't work on emscripten")
     SIZE = 4096
-    arr = np.random.randint(0, 256, size=SIZE).astype('u1')
-    data = arr.tobytes()[:SIZE]
+    arr = [random.randint(0, 255) for _ in range(SIZE)]
+    data = bytes(arr[:SIZE])
 
     path = os.path.join(str(tmpdir), guid())
     with open(path, 'wb') as f:
@@ -1187,9 +1201,9 @@ def test_memory_map_writer(tmpdir):
 
 def test_memory_map_resize(tmpdir):
     SIZE = 4096
-    arr = np.random.randint(0, 256, size=SIZE).astype(np.uint8)
-    data1 = arr.tobytes()[:(SIZE // 2)]
-    data2 = arr.tobytes()[(SIZE // 2):]
+    arr = [random.randint(0, 255) for _ in range(SIZE)]
+    data1 = bytes(arr[:(SIZE // 2)])
+    data2 = bytes(arr[(SIZE // 2):])
 
     path = os.path.join(str(tmpdir), guid())
 
@@ -1202,7 +1216,7 @@ def test_memory_map_resize(tmpdir):
     mmap.close()
 
     with open(path, 'rb') as f:
-        assert f.read() == arr.tobytes()
+        assert f.read() == bytes(arr[:SIZE])
 
 
 def test_memory_zero_length(tmpdir):
@@ -1241,8 +1255,8 @@ def test_memory_map_deref_remove(tmpdir):
 
 def test_os_file_writer(tmpdir):
     SIZE = 4096
-    arr = np.random.randint(0, 256, size=SIZE).astype('u1')
-    data = arr.tobytes()[:SIZE]
+    arr = [random.randint(0, 255) for _ in range(SIZE)]
+    data = bytes(arr[:SIZE])
 
     path = os.path.join(str(tmpdir), guid())
     with open(path, 'wb') as f:
@@ -1523,6 +1537,7 @@ def test_buffered_input_stream_detach_non_seekable():
         raw.seek(2)
 
 
+@pytest.mark.numpy
 def test_buffered_output_stream():
     np_buf = np.zeros(100, dtype=np.int8)  # zero-initialized buffer
     buf = pa.py_buffer(np_buf)
@@ -1540,6 +1555,7 @@ def test_buffered_output_stream():
     assert np_buf[:10].tobytes() == b'123456789\0'
 
 
+@pytest.mark.numpy
 def test_buffered_output_stream_detach():
     np_buf = np.zeros(100, dtype=np.int8)  # zero-initialized buffer
     buf = pa.py_buffer(np_buf)
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index 1e5242efe40f0..4be5792a92f6d 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -20,11 +20,15 @@
 import io
 import pathlib
 import pytest
+import random
 import socket
 import threading
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 from pyarrow.tests.util import changed_environ, invoke_script
@@ -59,7 +63,7 @@ def write_batches(self, num_batches=5, as_table=False):
         batches = []
         for i in range(num_batches):
             batch = pa.record_batch(
-                [np.random.randn(nrows),
+                [[random.random() for _ in range(nrows)],
                  ['foo', None, 'bar', 'bazbaz', 'qux']],
                 schema=schema)
             batches.append(batch)
@@ -422,7 +426,7 @@ def test_stream_simple_roundtrip(stream_fixture, use_legacy_ipc_format):
 @pytest.mark.zstd
 def test_compression_roundtrip():
     sink = io.BytesIO()
-    values = np.random.randint(0, 3, 10000)
+    values = [random.randint(0, 3) for _ in range(10000)]
     table = pa.Table.from_arrays([values], names=["values"])
 
     options = pa.ipc.IpcWriteOptions(compression='zstd')
diff --git a/python/pyarrow/tests/test_json.py b/python/pyarrow/tests/test_json.py
index a0a6174266310..3bb4440e89750 100644
--- a/python/pyarrow/tests/test_json.py
+++ b/python/pyarrow/tests/test_json.py
@@ -23,7 +23,10 @@
 import string
 import unittest
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa
@@ -297,6 +300,7 @@ def test_explicit_schema_with_unexpected_behaviour(self):
                            match="JSON parse error: unexpected field"):
             self.read_bytes(rows, parse_options=opts)
 
+    @pytest.mark.numpy
     def test_small_random_json(self):
         data, expected = make_random_json(num_cols=2, num_rows=10)
         table = self.read_bytes(data)
@@ -304,6 +308,7 @@ def test_small_random_json(self):
         assert table.equals(expected)
         assert table.to_pydict() == expected.to_pydict()
 
+    @pytest.mark.numpy
     def test_load_large_json(self):
         data, expected = make_random_json(num_cols=2, num_rows=100100)
         # set block size is 10MB
@@ -312,6 +317,7 @@ def test_load_large_json(self):
         assert table.num_rows == 100100
         assert expected.num_rows == 100100
 
+    @pytest.mark.numpy
     def test_stress_block_sizes(self):
         # Test a number of small block sizes to stress block stitching
         data_base, expected = make_random_json(num_cols=2, num_rows=100)
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 208812c3ac458..178a073ed59dc 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -27,9 +27,18 @@
 
 import hypothesis as h
 import hypothesis.strategies as st
-import numpy as np
-import numpy.testing as npt
 import pytest
+try:
+    import numpy as np
+    import numpy.testing as npt
+    try:
+        _np_VisibleDeprecationWarning = np.VisibleDeprecationWarning
+    except AttributeError:
+        from numpy.exceptions import (
+            VisibleDeprecationWarning as _np_VisibleDeprecationWarning
+        )
+except ImportError:
+    np = None
 
 from pyarrow.pandas_compat import get_logical_type, _pandas_api
 from pyarrow.tests.util import invoke_script, random_ascii, rands
@@ -51,14 +60,6 @@
     pass
 
 
-try:
-    _np_VisibleDeprecationWarning = np.VisibleDeprecationWarning
-except AttributeError:
-    from numpy.exceptions import (
-        VisibleDeprecationWarning as _np_VisibleDeprecationWarning
-    )
-
-
 # Marks all of the tests in this module
 pytestmark = pytest.mark.pandas
 
@@ -1202,9 +1203,11 @@ def test_datetime64_to_date32(self):
 
     @pytest.mark.parametrize('mask', [
         None,
-        np.array([True, False, False, True, False, False]),
+        [True, False, False, True, False, False],
     ])
     def test_pandas_datetime_to_date64(self, mask):
+        if mask:
+            mask = np.array(mask)
         s = pd.to_datetime([
             '2018-05-10T00:00:00',
             '2018-05-11T00:00:00',
@@ -1608,7 +1611,8 @@ def test_array_from_pandas_date_with_mask(self):
         assert pa.Array.from_pandas(expected).equals(result)
 
     @pytest.mark.skipif(
-        Version('1.16.0') <= Version(np.__version__) < Version('1.16.1'),
+        np is not None and Version('1.16.0') <= Version(
+            np.__version__) < Version('1.16.1'),
         reason='Until numpy/numpy#12745 is resolved')
     def test_fixed_offset_timezone(self):
         df = pd.DataFrame({
@@ -2921,23 +2925,23 @@ class TestConvertMisc:
     """
 
     type_pairs = [
-        (np.int8, pa.int8()),
-        (np.int16, pa.int16()),
-        (np.int32, pa.int32()),
-        (np.int64, pa.int64()),
-        (np.uint8, pa.uint8()),
-        (np.uint16, pa.uint16()),
-        (np.uint32, pa.uint32()),
-        (np.uint64, pa.uint64()),
-        (np.float16, pa.float16()),
-        (np.float32, pa.float32()),
-        (np.float64, pa.float64()),
+        ("int8", pa.int8()),
+        ("int16", pa.int16()),
+        ("int32", pa.int32()),
+        ("int64", pa.int64()),
+        ("uint8", pa.uint8()),
+        ("uint16", pa.uint16()),
+        ("uint32", pa.uint32()),
+        ("uint64", pa.uint64()),
+        ("float16", pa.float16()),
+        ("float32", pa.float32()),
+        ("float64", pa.float64()),
         # XXX unsupported
         # (np.dtype([('a', 'i2')]), pa.struct([pa.field('a', pa.int16())])),
-        (np.object_, pa.string()),
-        (np.object_, pa.binary()),
-        (np.object_, pa.binary(10)),
-        (np.object_, pa.list_(pa.int64())),
+        ("object", pa.string()),
+        ("object", pa.binary()),
+        ("object", pa.binary(10)),
+        ("object", pa.list_(pa.int64())),
     ]
 
     def test_all_none_objects(self):
@@ -2950,8 +2954,8 @@ def test_all_none_category(self):
         _check_pandas_roundtrip(df)
 
     def test_empty_arrays(self):
-        for dtype, pa_type in self.type_pairs:
-            arr = np.array([], dtype=dtype)
+        for dtype_str, pa_type in self.type_pairs:
+            arr = np.array([], dtype=np.dtype(dtype_str))
             _check_array_roundtrip(arr, type=pa_type)
 
     def test_non_threaded_conversion(self):
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index bc50697e1be17..3f4a53c473e7e 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -20,7 +20,10 @@
 import pytest
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 import pyarrow.compute as pc
@@ -40,7 +43,6 @@
     (1, pa.int64(), pa.Int64Scalar),
     (1, pa.uint64(), pa.UInt64Scalar),
     (1.0, None, pa.DoubleScalar),
-    (np.float16(1.0), pa.float16(), pa.HalfFloatScalar),
     (1.0, pa.float32(), pa.FloatScalar),
     (decimal.Decimal("1.123"), None, pa.Decimal128Scalar),
     (decimal.Decimal("1.1234567890123456789012345678901234567890"),
@@ -98,6 +100,40 @@ def test_basics(value, ty, klass, pickle_module):
     assert wr() is None
 
 
+# This test is a copy of test_basics but only for float16 (HalfFloatScalar)
+# which currently requires a numpy scalar to create it. The test collection
+# fails if numpy is used on the parametrization when not present.
+@pytest.mark.numpy
+def test_basics_np_required(pickle_module):
+    value, ty, klass = np.float16(1.0), pa.float16(), pa.HalfFloatScalar
+    s = pa.scalar(value, type=ty)
+    s.validate()
+    s.validate(full=True)
+    assert isinstance(s, klass)
+    assert s.as_py() == value
+    assert s == pa.scalar(value, type=ty)
+    assert s != value
+    assert s != "else"
+    assert hash(s) == hash(s)
+    assert s.is_valid is True
+    assert s != None  # noqa: E711
+
+    s = pa.scalar(None, type=s.type)
+    assert s.is_valid is False
+    assert s.as_py() is None
+    assert s != pa.scalar(value, type=ty)
+
+    # test pickle roundtrip
+    restored = pickle_module.loads(pickle_module.dumps(s))
+    assert s.equals(restored)
+
+    # test that scalars are weak-referenceable
+    wr = weakref.ref(s)
+    assert wr() is not None
+    del s
+    assert wr() is None
+
+
 def test_invalid_scalar():
     s = pc.cast(pa.scalar(b"\xff"), pa.string(), safe=False)
     s.validate()
@@ -202,14 +238,15 @@ def test_numerics():
     assert str(s) == "1.5"
     assert s.as_py() == 1.5
 
-    # float16
-    s = pa.scalar(np.float16(0.5), type='float16')
-    assert isinstance(s, pa.HalfFloatScalar)
-    # on numpy2 repr(np.float16(0.5)) == "np.float16(0.5)"
-    # on numpy1 repr(np.float16(0.5)) == "0.5"
-    assert repr(s) == f"<pyarrow.HalfFloatScalar: {np.float16(0.5)!r}>"
-    assert str(s) == "0.5"
-    assert s.as_py() == 0.5
+    if np is not None:
+        # float16
+        s = pa.scalar(np.float16(0.5), type='float16')
+        assert isinstance(s, pa.HalfFloatScalar)
+        # on numpy2 repr(np.float16(0.5)) == "np.float16(0.5)"
+        # on numpy1 repr(np.float16(0.5)) == "0.5"
+        assert repr(s) == f"<pyarrow.HalfFloatScalar: {np.float16(0.5)!r}>"
+        assert str(s) == "0.5"
+        assert s.as_py() == 0.5
 
 
 def test_decimal128():
@@ -434,6 +471,7 @@ def test_timestamp_fixed_offset_print():
     assert str(arr[0]) == "1970-01-01 02:00:00+02:00"
 
 
+@pytest.mark.numpy
 def test_duration():
     arr = np.array([0, 3600000000000], dtype='timedelta64[ns]')
 
@@ -559,6 +597,7 @@ def test_list(ty, klass):
         s[2]
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('ty', [
     pa.list_(pa.int64()),
     pa.large_list(pa.int64()),
diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
index 1b05c58384cf0..bdcb6c2b42d78 100644
--- a/python/pyarrow/tests/test_schema.py
+++ b/python/pyarrow/tests/test_schema.py
@@ -20,7 +20,10 @@
 import weakref
 
 import pytest
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pyarrow as pa
 
 import pyarrow.tests.util as test_util
@@ -185,6 +188,7 @@ def test_time_types():
         pa.time64('s')
 
 
+@pytest.mark.numpy
 def test_from_numpy_dtype():
     cases = [
         (np.dtype('bool'), pa.bool_()),
diff --git a/python/pyarrow/tests/test_sparse_tensor.py b/python/pyarrow/tests/test_sparse_tensor.py
index aa7da0a742086..7ba9e2b3e13db 100644
--- a/python/pyarrow/tests/test_sparse_tensor.py
+++ b/python/pyarrow/tests/test_sparse_tensor.py
@@ -19,7 +19,10 @@
 import sys
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    pytestmark = pytest.mark.numpy
 import pyarrow as pa
 
 try:
diff --git a/python/pyarrow/tests/test_strategies.py b/python/pyarrow/tests/test_strategies.py
index 14fc949928c33..da50bcda52f2b 100644
--- a/python/pyarrow/tests/test_strategies.py
+++ b/python/pyarrow/tests/test_strategies.py
@@ -17,6 +17,8 @@
 
 import hypothesis as h
 
+import pytest
+
 import pyarrow as pa
 import pyarrow.tests.strategies as past
 
@@ -36,11 +38,13 @@ def test_schemas(schema):
     assert isinstance(schema, pa.lib.Schema)
 
 
+@pytest.mark.numpy
 @h.given(past.all_arrays)
 def test_arrays(array):
     assert isinstance(array, pa.lib.Array)
 
 
+@pytest.mark.numpy
 @h.given(past.arrays(past.primitive_types, nullable=False))
 def test_array_nullability(array):
     assert array.null_count == 0
@@ -56,6 +60,7 @@ def test_record_batches(record_bath):
     assert isinstance(record_bath, pa.lib.RecordBatch)
 
 
+@pytest.mark.numpy
 @h.given(past.all_tables)
 def test_tables(table):
     assert isinstance(table, pa.lib.Table)
diff --git a/python/pyarrow/tests/test_substrait.py b/python/pyarrow/tests/test_substrait.py
index 40700e4741321..01d468cd9e9cc 100644
--- a/python/pyarrow/tests/test_substrait.py
+++ b/python/pyarrow/tests/test_substrait.py
@@ -608,6 +608,7 @@ def table_provider(names, schema):
     assert res_tb == expected
 
 
+@pytest.mark.numpy
 def test_scalar_aggregate_udf_basic(varargs_agg_func_fixture):
 
     test_table = pa.Table.from_pydict(
@@ -756,6 +757,7 @@ def table_provider(names, _):
     assert res_tb == expected_tb
 
 
+@pytest.mark.numpy
 def test_hash_aggregate_udf_basic(varargs_agg_func_fixture):
 
     test_table = pa.Table.from_pydict(
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index cd38909edf357..b66a5eb083cc5 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -20,10 +20,14 @@
 import sys
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 import pyarrow as pa
 import pyarrow.compute as pc
+from pyarrow.interchange import from_dataframe
 from pyarrow.vendored.version import Version
 
 
@@ -125,6 +129,7 @@ def test_chunked_array_can_combine_chunks_with_no_chunks():
     ).combine_chunks() == pa.array([], type=pa.bool_())
 
 
+@pytest.mark.numpy
 def test_chunked_array_to_numpy():
     data = pa.chunked_array([
         [1, 2, 3],
@@ -173,6 +178,7 @@ def test_chunked_array_str():
 ]"""
 
 
+@pytest.mark.numpy
 def test_chunked_array_getitem():
     data = [
         pa.array([1, 2, 3]),
@@ -972,12 +978,14 @@ def check_tensors(tensor, expected_tensor, type, size):
     assert tensor.strides == expected_tensor.strides
 
 
-@pytest.mark.parametrize('typ', [
-    np.uint8, np.uint16, np.uint32, np.uint64,
-    np.int8, np.int16, np.int32, np.int64,
-    np.float32, np.float64,
+@pytest.mark.numpy
+@pytest.mark.parametrize('typ_str', [
+    "uint8", "uint16", "uint32", "uint64",
+    "int8", "int16", "int32", "int64",
+    "float32", "float64",
 ])
-def test_recordbatch_to_tensor_uniform_type(typ):
+def test_recordbatch_to_tensor_uniform_type(typ_str):
+    typ = np.dtype(typ_str)
     arr1 = [1, 2, 3, 4, 5, 6, 7, 8, 9]
     arr2 = [10, 20, 30, 40, 50, 60, 70, 80, 90]
     arr3 = [100, 100, 100, 100, 100, 100, 100, 100, 100]
@@ -1031,6 +1039,7 @@ def test_recordbatch_to_tensor_uniform_type(typ):
     check_tensors(result, expected, pa.from_numpy_dtype(typ), 15)
 
 
+@pytest.mark.numpy
 def test_recordbatch_to_tensor_uniform_float_16():
     arr1 = [1, 2, 3, 4, 5, 6, 7, 8, 9]
     arr2 = [10, 20, 30, 40, 50, 60, 70, 80, 90]
@@ -1054,6 +1063,7 @@ def test_recordbatch_to_tensor_uniform_float_16():
     check_tensors(result, expected, pa.float16(), 27)
 
 
+@pytest.mark.numpy
 def test_recordbatch_to_tensor_mixed_type():
     # uint16 + int16 = int32
     arr1 = [1, 2, 3, 4, 5, 6, 7, 8, 9]
@@ -1105,6 +1115,7 @@ def test_recordbatch_to_tensor_mixed_type():
     assert result.strides == expected.strides
 
 
+@pytest.mark.numpy
 def test_recordbatch_to_tensor_unsupported_mixed_type_with_float16():
     arr1 = [1, 2, 3, 4, 5, 6, 7, 8, 9]
     arr2 = [10, 20, 30, 40, 50, 60, 70, 80, 90]
@@ -1124,6 +1135,7 @@ def test_recordbatch_to_tensor_unsupported_mixed_type_with_float16():
         batch.to_tensor()
 
 
+@pytest.mark.numpy
 def test_recordbatch_to_tensor_nan():
     arr1 = [1, 2, 3, 4, np.nan, 6, 7, 8, 9]
     arr2 = [10, 20, 30, 40, 50, 60, 70, np.nan, 90]
@@ -1144,6 +1156,7 @@ def test_recordbatch_to_tensor_nan():
     assert result.strides == expected.strides
 
 
+@pytest.mark.numpy
 def test_recordbatch_to_tensor_null():
     arr1 = [1, 2, 3, 4, None, 6, 7, 8, 9]
     arr2 = [10, 20, 30, 40, 50, 60, 70, None, 90]
@@ -1204,6 +1217,7 @@ def test_recordbatch_to_tensor_null():
     assert result.strides == expected.strides
 
 
+@pytest.mark.numpy
 def test_recordbatch_to_tensor_empty():
     batch = pa.RecordBatch.from_arrays(
         [
@@ -1295,6 +1309,7 @@ def test_slice_zero_length_table():
     table.to_pandas()
 
 
+@pytest.mark.numpy
 def test_recordbatchlist_schema_equals():
     a1 = np.array([1], dtype='uint32')
     a2 = np.array([4.0, 5.0], dtype='float64')
@@ -2130,6 +2145,7 @@ def test_table_unsafe_casting(cls):
     assert casted_table.equals(expected_table)
 
 
+@pytest.mark.numpy
 def test_invalid_table_construct():
     array = np.array([0, 1], dtype=np.uint8)
     u8 = pa.uint8()
@@ -3287,6 +3303,7 @@ def test_table_sort_by(cls):
     assert sorted_tab_dict["b"] == ["foo", "car", "bar", "foobar"]
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize("constructor", [pa.table, pa.record_batch])
 def test_numpy_asarray(constructor):
     table = constructor([[1, 2, 3], [4.0, 5.0, 6.0]], names=["a", "b"])
@@ -3319,6 +3336,7 @@ def test_numpy_asarray(constructor):
     assert result.dtype == "int32"
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize("constructor", [pa.table, pa.record_batch])
 def test_numpy_array_protocol(constructor):
     table = constructor([[1, 2, 3], [4.0, 5.0, 6.0]], names=["a", "b"])
@@ -3357,3 +3375,587 @@ def test_invalid_non_join_column():
     with pytest.raises(pa.lib.ArrowInvalid) as excinfo:
         t2.join(t1, 'id', join_type='inner')
     assert exp_error_msg in str(excinfo.value)
+
+
+@pytest.fixture
+def cuda_context():
+    cuda = pytest.importorskip("pyarrow.cuda")
+    return cuda.Context(0)
+
+
+@pytest.fixture
+def schema():
+    return pa.schema([pa.field('c0', pa.int32()), pa.field('c1', pa.int32())])
+
+
+@pytest.fixture
+def cpu_arrays(schema):
+    return [pa.array([1, 2, 3, 4, 5], schema.field(0).type),
+            pa.array([-10, -5, 0, None, 10], schema.field(1).type)]
+
+
+@pytest.fixture
+def cuda_arrays(cuda_context, cpu_arrays):
+    return [arr.copy_to(cuda_context.memory_manager) for arr in cpu_arrays]
+
+
+@pytest.fixture
+def cpu_chunked_array(cpu_arrays):
+    chunked_array = pa.chunked_array(cpu_arrays)
+    assert chunked_array.is_cpu is True
+    return chunked_array
+
+
+@pytest.fixture
+def cuda_chunked_array(cuda_arrays):
+    chunked_array = pa.chunked_array(cuda_arrays)
+    assert chunked_array.is_cpu is False
+    return chunked_array
+
+
+@pytest.fixture
+def cpu_and_cuda_chunked_array(cpu_arrays, cuda_arrays):
+    chunked_array = pa.chunked_array(cpu_arrays + cuda_arrays)
+    assert chunked_array.is_cpu is False
+    return chunked_array
+
+
+@pytest.fixture
+def cpu_recordbatch(cpu_arrays, schema):
+    return pa.record_batch(cpu_arrays, schema=schema)
+
+
+@pytest.fixture
+def cuda_recordbatch(cuda_context, cpu_recordbatch):
+    return cpu_recordbatch.copy_to(cuda_context.memory_manager)
+
+
+@pytest.fixture
+def cpu_table(schema, cpu_chunked_array):
+    return pa.table([cpu_chunked_array, cpu_chunked_array], schema=schema)
+
+
+@pytest.fixture
+def cuda_table(schema, cuda_chunked_array):
+    return pa.table([cuda_chunked_array, cuda_chunked_array], schema=schema)
+
+
+@pytest.fixture
+def cpu_and_cuda_table(schema, cpu_chunked_array, cuda_chunked_array):
+    return pa.table([cpu_chunked_array, cuda_chunked_array], schema=schema)
+
+
+def test_chunked_array_non_cpu(cuda_context, cpu_chunked_array, cuda_chunked_array,
+                               cpu_and_cuda_chunked_array):
+    # type test
+    assert cuda_chunked_array.type == cpu_chunked_array.type
+
+    # length() test
+    assert cuda_chunked_array.length() == cpu_chunked_array.length()
+
+    # str() test
+    assert str(cuda_chunked_array) == str(cpu_chunked_array)
+
+    # repr() test
+    assert str(cuda_chunked_array) in repr(cuda_chunked_array)
+
+    # validate() test
+    cuda_chunked_array.validate()
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.validate(full=True)
+
+    # null_count test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.null_count
+
+    # nbytes() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.nbytes
+
+    # get_total_buffer_size() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.get_total_buffer_size()
+
+    # getitem() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array[0]
+
+    # is_null() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.is_null()
+
+    # is_nan() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.is_nan()
+
+    # is_valid() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.is_valid()
+
+    # fill_null() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.fill_null(0)
+
+    # equals() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array == cuda_chunked_array
+
+    # to_pandas() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.to_pandas()
+
+    # to_numpy() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.to_numpy()
+
+    # __array__() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.__array__()
+
+    # cast() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.cast()
+
+    # dictionary_encode() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.dictionary_encode()
+
+    # flatten() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.flatten()
+
+    # combine_chunks() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.combine_chunks()
+
+    # unique() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.unique()
+
+    # value_counts() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.value_counts()
+
+    # filter() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.filter([True, False, True, False, True])
+
+    # index() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.index(5)
+
+    # slice() test
+    cuda_chunked_array.slice(2, 2)
+
+    # take() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.take([1])
+
+    # drop_null() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.drop_null()
+
+    # sort() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.sort()
+
+    # unify_dictionaries() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.unify_dictionaries()
+
+    # num_chunks test
+    assert cuda_chunked_array.num_chunks == cpu_chunked_array.num_chunks
+
+    # chunks test
+    assert len(cuda_chunked_array.chunks) == len(cpu_chunked_array.chunks)
+
+    # chunk() test
+    chunk = cuda_chunked_array.chunk(0)
+    assert chunk.device_type == pa.DeviceAllocationType.CUDA
+
+    # to_pylist() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.to_pylist()
+
+    # __arrow_c_stream__() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.__arrow_c_stream__()
+
+    # __reduce__() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.__reduce__()
+
+
+def verify_cuda_recordbatch(batch, expected_schema):
+    batch.validate()
+    assert batch.device_type == pa.DeviceAllocationType.CUDA
+    assert batch.is_cpu is False
+    assert batch.num_columns == len(expected_schema.names)
+    assert batch.column_names == expected_schema.names
+    assert str(batch) in repr(batch)
+    for c in batch.columns:
+        assert c.device_type == pa.DeviceAllocationType.CUDA
+    assert batch.schema == expected_schema
+
+
+def test_recordbatch_non_cpu(cuda_context, cpu_recordbatch, cuda_recordbatch,
+                             cuda_arrays, schema):
+    verify_cuda_recordbatch(cuda_recordbatch, expected_schema=schema)
+    N = cuda_recordbatch.num_rows
+
+    # shape test
+    assert cuda_recordbatch.shape == (5, 2)
+
+    # columns() test
+    assert len(cuda_recordbatch.columns) == 2
+
+    # add_column(), set_column() test
+    for fn in [cuda_recordbatch.add_column, cuda_recordbatch.set_column]:
+        col = pa.array([-2, -1, 0, 1, 2], pa.int8()
+                       ).copy_to(cuda_context.memory_manager)
+        new_batch = fn(2, 'c2', col)
+        verify_cuda_recordbatch(
+            new_batch, expected_schema=schema.append(pa.field('c2', pa.int8())))
+        err_msg = ("Got column on device <DeviceAllocationType.CPU: 1>, "
+                   "but expected <DeviceAllocationType.CUDA: 2>.")
+        with pytest.raises(TypeError, match=err_msg):
+            fn(2, 'c2', [1] * N)
+
+    # remove_column() test
+    new_batch = cuda_recordbatch.remove_column(1)
+    verify_cuda_recordbatch(new_batch, expected_schema=schema.remove(1))
+
+    # drop_columns() test
+    new_batch = cuda_recordbatch.drop_columns(['c1'])
+    verify_cuda_recordbatch(new_batch, expected_schema=schema.remove(1))
+    empty_batch = cuda_recordbatch.drop_columns(['c0', 'c1'])
+    assert len(empty_batch.columns) == 0
+    assert empty_batch.device_type == pa.DeviceAllocationType.CUDA
+
+    # select() test
+    new_batch = cuda_recordbatch.select(['c0'])
+    verify_cuda_recordbatch(new_batch, expected_schema=schema.remove(1))
+
+    # cast() test
+    new_schema = pa.schema([pa.field('c0', pa.int64()), pa.field('c1', pa.int64())])
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.cast(new_schema)
+
+    # drop_null() test
+    null_col = pa.array([1] * N, mask=[True, False, True, False, True]).copy_to(
+        cuda_context.memory_manager)
+    cuda_recordbatch_with_nulls = cuda_recordbatch.add_column(2, 'c2', null_col)
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch_with_nulls.drop_null()
+
+    # filter() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.filter([True] * N)
+
+    # take() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.take([0])
+
+    # sort_by() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.sort_by('c0')
+
+    # field() test
+    assert cuda_recordbatch.field(0) == schema.field(0)
+    assert cuda_recordbatch.field(1) == schema.field(1)
+
+    # equals() test
+    new_batch = cpu_recordbatch.copy_to(cuda_context.memory_manager)
+    with pytest.raises(NotImplementedError):
+        assert cuda_recordbatch.equals(new_batch) is True
+
+    # from_arrays() test
+    new_batch = pa.RecordBatch.from_arrays(cuda_arrays, ['c0', 'c1'])
+    verify_cuda_recordbatch(new_batch, expected_schema=schema)
+    assert new_batch.copy_to(pa.default_cpu_memory_manager()).equals(cpu_recordbatch)
+
+    # from_pydict() test
+    new_batch = pa.RecordBatch.from_pydict({'c0': cuda_arrays[0], 'c1': cuda_arrays[1]})
+    verify_cuda_recordbatch(new_batch, expected_schema=schema)
+    assert new_batch.copy_to(pa.default_cpu_memory_manager()).equals(cpu_recordbatch)
+
+    # from_struct_array() test
+    fields = [schema.field(i) for i in range(len(schema.names))]
+    struct_array = pa.StructArray.from_arrays(cuda_arrays, fields=fields)
+    with pytest.raises(NotImplementedError):
+        pa.RecordBatch.from_struct_array(struct_array)
+
+    # nbytes test
+    with pytest.raises(NotImplementedError):
+        assert cuda_recordbatch.nbytes
+
+    # get_total_buffer_size() test
+    with pytest.raises(NotImplementedError):
+        assert cuda_recordbatch.get_total_buffer_size()
+
+    # to_pydict() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.to_pydict()
+
+    # to_pylist() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.to_pylist()
+
+    # to_pandas() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.to_pandas()
+
+    # to_tensor() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.to_tensor()
+
+    # to_struct_array() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.to_struct_array()
+
+    # serialize() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.serialize()
+
+    # slice() test
+    new_batch = cuda_recordbatch.slice(1, 3)
+    verify_cuda_recordbatch(new_batch, expected_schema=schema)
+    assert new_batch.num_rows == 3
+    cpu_batch = new_batch.copy_to(pa.default_cpu_memory_manager())
+    assert cpu_batch == cpu_recordbatch.slice(1, 3)
+
+    # replace_schema_metadata() test
+    new_batch = cuda_recordbatch.replace_schema_metadata({b'key': b'value'})
+    verify_cuda_recordbatch(new_batch, expected_schema=schema)
+    assert new_batch.schema.metadata == {b'key': b'value'}
+
+    # rename_columns() test
+    new_batch = cuda_recordbatch.rename_columns(['col0', 'col1'])
+    expected_schema = pa.schema(
+        [pa.field('col0', schema.field(0).type),
+         pa.field('col1', schema.field(1).type)])
+    verify_cuda_recordbatch(new_batch, expected_schema=expected_schema)
+
+    # validate() test
+    cuda_recordbatch.validate()
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.validate(full=True)
+
+    # __array__() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.__array__()
+
+    # __arrow_c_array__() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.__arrow_c_array__()
+
+    # __arrow_c_stream__() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.__arrow_c_stream__()
+
+    # __dataframe__() test
+    with pytest.raises(NotImplementedError):
+        from_dataframe(cuda_recordbatch.__dataframe__())
+
+
+def verify_cuda_table(table, expected_schema):
+    table.validate()
+    assert table.is_cpu is False
+    assert table.num_columns == len(expected_schema.names)
+    assert table.column_names == expected_schema.names
+    assert str(table) in repr(table)
+    for c in table.columns:
+        assert c.is_cpu is False
+        for chunk in c.iterchunks():
+            assert chunk.is_cpu is False
+            assert chunk.device_type == pa.DeviceAllocationType.CUDA
+    assert table.schema == expected_schema
+
+
+def test_table_non_cpu(cuda_context, cpu_table, cuda_table,
+                       cuda_arrays, cuda_recordbatch, schema):
+    verify_cuda_table(cuda_table, expected_schema=schema)
+    N = cuda_table.num_rows
+
+    # shape test
+    assert cuda_table.shape == (10, 2)
+
+    # columns() test
+    assert len(cuda_table.columns) == 2
+
+    # add_column(), set_column() test
+    for fn in [cuda_table.add_column, cuda_table.set_column]:
+        cpu_col = pa.array([1] * N, pa.int8())
+        cuda_col = cpu_col.copy_to(cuda_context.memory_manager)
+        new_table = fn(2, 'c2', cuda_col)
+        verify_cuda_table(new_table, expected_schema=schema.append(
+            pa.field('c2', pa.int8())))
+        new_table = fn(2, 'c2', cpu_col)
+        assert new_table.is_cpu is False
+        assert new_table.column(0).is_cpu is False
+        assert new_table.column(1).is_cpu is False
+        assert new_table.column(2).is_cpu is True
+
+    # remove_column() test
+    new_table = cuda_table.remove_column(1)
+    verify_cuda_table(new_table, expected_schema=schema.remove(1))
+
+    # drop_columns() test
+    new_table = cuda_table.drop_columns(['c1'])
+    verify_cuda_table(new_table, expected_schema=schema.remove(1))
+    new_table = cuda_table.drop_columns(['c0', 'c1'])
+    assert len(new_table.columns) == 0
+    assert new_table.is_cpu
+
+    # select() test
+    new_table = cuda_table.select(['c0'])
+    verify_cuda_table(new_table, expected_schema=schema.remove(1))
+
+    # cast() test
+    new_schema = pa.schema([pa.field('c0', pa.int64()), pa.field('c1', pa.int64())])
+    with pytest.raises(NotImplementedError):
+        cuda_table.cast(new_schema)
+
+    # drop_null() test
+    null_col = pa.array([1] * N, mask=[True] * N).copy_to(cuda_context.memory_manager)
+    cuda_table_with_nulls = cuda_table.add_column(2, 'c2', null_col)
+    with pytest.raises(NotImplementedError):
+        cuda_table_with_nulls.drop_null()
+
+    # filter() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.filter([True] * N)
+
+    # take() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.take([0])
+
+    # sort_by() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.sort_by('c0')
+
+    # field() test
+    assert cuda_table.field(0) == schema.field(0)
+    assert cuda_table.field(1) == schema.field(1)
+
+    # equals() test
+    with pytest.raises(NotImplementedError):
+        assert cuda_table.equals(cpu_table)
+
+    # from_arrays() test
+    new_table = pa.Table.from_arrays(cuda_arrays, ['c0', 'c1'])
+    verify_cuda_table(new_table, expected_schema=schema)
+
+    # from_pydict() test
+    new_table = pa.Table.from_pydict({'c0': cuda_arrays[0], 'c1': cuda_arrays[1]})
+    verify_cuda_table(new_table, expected_schema=schema)
+
+    # from_struct_array() test
+    fields = [schema.field(i) for i in range(len(schema.names))]
+    struct_array = pa.StructArray.from_arrays(cuda_arrays, fields=fields)
+    with pytest.raises(NotImplementedError):
+        pa.Table.from_struct_array(struct_array)
+
+    # from_batches() test
+    new_table = pa.Table.from_batches([cuda_recordbatch, cuda_recordbatch], schema)
+    verify_cuda_table(new_table, expected_schema=schema)
+
+    # nbytes test
+    with pytest.raises(NotImplementedError):
+        assert cuda_table.nbytes
+
+    # get_total_buffer_size() test
+    with pytest.raises(NotImplementedError):
+        assert cuda_table.get_total_buffer_size()
+
+    # to_pydict() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.to_pydict()
+
+    # to_pylist() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.to_pylist()
+
+    # to_pandas() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.to_pandas()
+
+    # to_struct_array() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.to_struct_array()
+
+    # to_batches() test
+    batches = cuda_table.to_batches(max_chunksize=5)
+    for batch in batches:
+        # GH-44049
+        with pytest.raises(AssertionError):
+            verify_cuda_recordbatch(batch, expected_schema=schema)
+
+    # to_reader() test
+    reader = cuda_table.to_reader(max_chunksize=5)
+    for batch in reader:
+        # GH-44049
+        with pytest.raises(AssertionError):
+            verify_cuda_recordbatch(batch, expected_schema=schema)
+
+    # slice() test
+    new_table = cuda_table.slice(1, 3)
+    verify_cuda_table(new_table, expected_schema=schema)
+    assert new_table.num_rows == 3
+
+    # replace_schema_metadata() test
+    new_table = cuda_table.replace_schema_metadata({b'key': b'value'})
+    verify_cuda_table(new_table, expected_schema=schema)
+    assert new_table.schema.metadata == {b'key': b'value'}
+
+    # rename_columns() test
+    new_table = cuda_table.rename_columns(['col0', 'col1'])
+    expected_schema = pa.schema(
+        [pa.field('col0', schema.field(0).type),
+         pa.field('col1', schema.field(1).type)])
+    verify_cuda_table(new_table, expected_schema=expected_schema)
+
+    # validate() test
+    cuda_table.validate()
+    with pytest.raises(NotImplementedError):
+        cuda_table.validate(full=True)
+
+    # flatten() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.flatten()
+
+    # combine_chunks() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.flatten()
+
+    # unify_dictionaries() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.unify_dictionaries()
+
+    # group_by() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.group_by('c0')
+
+    # join() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.join(cuda_table, 'c0')
+
+    # join_asof() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.join_asof(cuda_table, 'c0', 'c0', 0)
+
+    # __array__() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.__array__()
+
+    # __arrow_c_stream__() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.__arrow_c_stream__()
+
+    # __dataframe__() test
+    with pytest.raises(NotImplementedError):
+        from_dataframe(cuda_table.__dataframe__())
+
+    # __reduce__() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.__reduce__()
diff --git a/python/pyarrow/tests/test_tensor.py b/python/pyarrow/tests/test_tensor.py
index 29c6de65b1607..debb1066280c1 100644
--- a/python/pyarrow/tests/test_tensor.py
+++ b/python/pyarrow/tests/test_tensor.py
@@ -21,7 +21,10 @@
 import warnings
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    pytestmark = pytest.mark.numpy
 import pyarrow as pa
 
 
diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py
index d673f956527aa..cc680939ac46a 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -30,7 +30,10 @@
     tzst = None
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pyarrow as pa
 import pyarrow.types as types
 import pyarrow.tests.strategies as past
@@ -1265,14 +1268,16 @@ def test_field_modified_copies():
 
 def test_is_integer_value():
     assert pa.types.is_integer_value(1)
-    assert pa.types.is_integer_value(np.int64(1))
+    if np is not None:
+        assert pa.types.is_integer_value(np.int64(1))
     assert not pa.types.is_integer_value('1')
 
 
 def test_is_float_value():
     assert not pa.types.is_float_value(1)
     assert pa.types.is_float_value(1.)
-    assert pa.types.is_float_value(np.float64(1))
+    if np is not None:
+        assert pa.types.is_float_value(np.float64(1))
     assert not pa.types.is_float_value('1.0')
 
 
@@ -1280,8 +1285,9 @@ def test_is_boolean_value():
     assert not pa.types.is_boolean_value(1)
     assert pa.types.is_boolean_value(True)
     assert pa.types.is_boolean_value(False)
-    assert pa.types.is_boolean_value(np.bool_(True))
-    assert pa.types.is_boolean_value(np.bool_(False))
+    if np is not None:
+        assert pa.types.is_boolean_value(np.bool_(True))
+        assert pa.types.is_boolean_value(np.bool_(False))
 
 
 @h.settings(suppress_health_check=(h.HealthCheck.too_slow,))
diff --git a/python/pyarrow/tests/test_udf.py b/python/pyarrow/tests/test_udf.py
index 22fefbbb58ba9..93004a30618a7 100644
--- a/python/pyarrow/tests/test_udf.py
+++ b/python/pyarrow/tests/test_udf.py
@@ -18,7 +18,10 @@
 
 import pytest
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 from pyarrow import compute as pc
@@ -749,6 +752,7 @@ def test_udt_datasource1_exception():
         _test_datasource1_udt(datasource1_exception)
 
 
+@pytest.mark.numpy
 def test_scalar_agg_basic(unary_agg_func_fixture):
     arr = pa.array([10.0, 20.0, 30.0, 40.0, 50.0], pa.float64())
     result = pc.call_function("mean_udf", [arr])
@@ -756,6 +760,7 @@ def test_scalar_agg_basic(unary_agg_func_fixture):
     assert result == expected
 
 
+@pytest.mark.numpy
 def test_scalar_agg_empty(unary_agg_func_fixture):
     empty = pa.array([], pa.float64())
 
@@ -775,6 +780,7 @@ def test_scalar_agg_wrong_output_type(wrong_output_type_agg_func_fixture):
         pc.call_function("y=wrong_output_type(x)", [arr])
 
 
+@pytest.mark.numpy
 def test_scalar_agg_varargs(varargs_agg_func_fixture):
     arr1 = pa.array([10, 20, 30, 40, 50], pa.int64())
     arr2 = pa.array([1.0, 2.0, 3.0, 4.0, 5.0], pa.float64())
@@ -786,6 +792,7 @@ def test_scalar_agg_varargs(varargs_agg_func_fixture):
     assert result == expected
 
 
+@pytest.mark.numpy
 def test_scalar_agg_exception(exception_agg_func_fixture):
     arr = pa.array([10, 20, 30, 40, 50, 60], pa.int64())
 
@@ -793,6 +800,7 @@ def test_scalar_agg_exception(exception_agg_func_fixture):
         pc.call_function("y=exception_len(x)", [arr])
 
 
+@pytest.mark.numpy
 def test_hash_agg_basic(unary_agg_func_fixture):
     arr1 = pa.array([10.0, 20.0, 30.0, 40.0, 50.0], pa.float64())
     arr2 = pa.array([4, 2, 1, 2, 1], pa.int32())
@@ -811,6 +819,7 @@ def test_hash_agg_basic(unary_agg_func_fixture):
     assert result.sort_by('id') == expected.sort_by('id')
 
 
+@pytest.mark.numpy
 def test_hash_agg_empty(unary_agg_func_fixture):
     arr1 = pa.array([], pa.float64())
     arr2 = pa.array([], pa.int32())
@@ -841,6 +850,7 @@ def test_hash_agg_wrong_output_type(wrong_output_type_agg_func_fixture):
         table.group_by("id").aggregate([("value", "y=wrong_output_type(x)")])
 
 
+@pytest.mark.numpy
 def test_hash_agg_exception(exception_agg_func_fixture):
     arr1 = pa.array([10, 20, 30, 40, 50], pa.int64())
     arr2 = pa.array([4, 2, 1, 2, 1], pa.int32())
@@ -850,6 +860,7 @@ def test_hash_agg_exception(exception_agg_func_fixture):
         table.group_by("id").aggregate([("value", "y=exception_len(x)")])
 
 
+@pytest.mark.numpy
 def test_hash_agg_random(sum_agg_func_fixture):
     """Test hash aggregate udf with randomly sampled data"""
 
diff --git a/python/pyarrow/tests/test_without_numpy.py b/python/pyarrow/tests/test_without_numpy.py
new file mode 100644
index 0000000000000..55c12602ce89a
--- /dev/null
+++ b/python/pyarrow/tests/test_without_numpy.py
@@ -0,0 +1,58 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pytest
+
+import pyarrow as pa
+
+# Marks all of the tests in this module
+# Ignore these with pytest ... -m 'not nonumpy'
+pytestmark = pytest.mark.nonumpy
+
+
+def test_array_to_np():
+    arr = pa.array(range(10))
+
+    msg = "Cannot return a numpy.ndarray if NumPy is not present"
+
+    with pytest.raises(ImportError, match=msg):
+        arr.to_numpy()
+
+
+def test_chunked_array_to_np():
+    data = pa.chunked_array([
+        [1, 2, 3],
+        [4, 5, 6],
+        []
+    ])
+    msg = "Cannot return a numpy.ndarray if NumPy is not present"
+
+    with pytest.raises(ImportError, match=msg):
+        data.to_numpy()
+
+
+def test_tensor_to_np():
+    tensor_type = pa.fixed_shape_tensor(pa.int32(), [2, 2])
+    arr = [[1, 2, 3, 4], [10, 20, 30, 40], [100, 200, 300, 400]]
+    storage = pa.array(arr, pa.list_(pa.int32(), 4))
+    tensor_array = pa.ExtensionArray.from_storage(tensor_type, storage)
+
+    tensor = tensor_array.to_tensor()
+    msg = "Cannot return a numpy.ndarray if NumPy is not present"
+
+    with pytest.raises(ImportError, match=msg):
+        tensor.to_numpy()
diff --git a/python/pyarrow/tests/util.py b/python/pyarrow/tests/util.py
index 638eee9807335..aa6dd21f800c5 100644
--- a/python/pyarrow/tests/util.py
+++ b/python/pyarrow/tests/util.py
@@ -22,7 +22,6 @@
 import contextlib
 import decimal
 import gc
-import numpy as np
 import os
 import random
 import re
@@ -110,27 +109,15 @@ def randdecimal(precision, scale):
 
 
 def random_ascii(length):
-    return bytes(np.random.randint(65, 123, size=length, dtype='i1'))
+    return bytes([random.randint(65, 122) for i in range(length)])
 
 
 def rands(nchars):
     """
     Generate one random string.
     """
-    RANDS_CHARS = np.array(
-        list(string.ascii_letters + string.digits), dtype=(np.str_, 1))
-    return "".join(np.random.choice(RANDS_CHARS, nchars))
-
-
-def make_dataframe():
-    import pandas as pd
-
-    N = 30
-    df = pd.DataFrame(
-        {col: np.random.randn(N) for col in string.ascii_uppercase[:4]},
-        index=pd.Index([rands(10) for _ in range(N)])
-    )
-    return df
+    RANDS_CHARS = list(string.ascii_letters + string.digits)
+    return "".join(random.choice(RANDS_CHARS) for i in range(nchars))
 
 
 def memory_leak_check(f, metric='rss', threshold=1 << 17, iterations=10,
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index dd5b0ac323f65..70f12e9796e80 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -33,42 +33,50 @@ from cython import sizeof
 
 # These are imprecise because the type (in pandas 0.x) depends on the presence
 # of nulls
-cdef dict _pandas_type_map = {
-    _Type_NA: np.object_,  # NaNs
-    _Type_BOOL: np.bool_,
-    _Type_INT8: np.int8,
-    _Type_INT16: np.int16,
-    _Type_INT32: np.int32,
-    _Type_INT64: np.int64,
-    _Type_UINT8: np.uint8,
-    _Type_UINT16: np.uint16,
-    _Type_UINT32: np.uint32,
-    _Type_UINT64: np.uint64,
-    _Type_HALF_FLOAT: np.float16,
-    _Type_FLOAT: np.float32,
-    _Type_DOUBLE: np.float64,
-    # Pandas does not support [D]ay, so default to [ms] for date32
-    _Type_DATE32: np.dtype('datetime64[ms]'),
-    _Type_DATE64: np.dtype('datetime64[ms]'),
-    _Type_TIMESTAMP: {
-        's': np.dtype('datetime64[s]'),
-        'ms': np.dtype('datetime64[ms]'),
-        'us': np.dtype('datetime64[us]'),
-        'ns': np.dtype('datetime64[ns]'),
-    },
-    _Type_DURATION: {
-        's': np.dtype('timedelta64[s]'),
-        'ms': np.dtype('timedelta64[ms]'),
-        'us': np.dtype('timedelta64[us]'),
-        'ns': np.dtype('timedelta64[ns]'),
-    },
-    _Type_BINARY: np.object_,
-    _Type_FIXED_SIZE_BINARY: np.object_,
-    _Type_STRING: np.object_,
-    _Type_LIST: np.object_,
-    _Type_MAP: np.object_,
-    _Type_DECIMAL128: np.object_,
-}
+cdef dict _pandas_type_map = {}
+
+
+def _get_pandas_type_map():
+    global _pandas_type_map
+    if not _pandas_type_map:
+        _pandas_type_map.update({
+            _Type_NA: np.object_,  # NaNs
+            _Type_BOOL: np.bool_,
+            _Type_INT8: np.int8,
+            _Type_INT16: np.int16,
+            _Type_INT32: np.int32,
+            _Type_INT64: np.int64,
+            _Type_UINT8: np.uint8,
+            _Type_UINT16: np.uint16,
+            _Type_UINT32: np.uint32,
+            _Type_UINT64: np.uint64,
+            _Type_HALF_FLOAT: np.float16,
+            _Type_FLOAT: np.float32,
+            _Type_DOUBLE: np.float64,
+            # Pandas does not support [D]ay, so default to [ms] for date32
+            _Type_DATE32: np.dtype('datetime64[ms]'),
+            _Type_DATE64: np.dtype('datetime64[ms]'),
+            _Type_TIMESTAMP: {
+                's': np.dtype('datetime64[s]'),
+                'ms': np.dtype('datetime64[ms]'),
+                'us': np.dtype('datetime64[us]'),
+                'ns': np.dtype('datetime64[ns]'),
+            },
+            _Type_DURATION: {
+                's': np.dtype('timedelta64[s]'),
+                'ms': np.dtype('timedelta64[ms]'),
+                'us': np.dtype('timedelta64[us]'),
+                'ns': np.dtype('timedelta64[ns]'),
+            },
+            _Type_BINARY: np.object_,
+            _Type_FIXED_SIZE_BINARY: np.object_,
+            _Type_STRING: np.object_,
+            _Type_LIST: np.object_,
+            _Type_MAP: np.object_,
+            _Type_DECIMAL128: np.object_,
+        })
+    return _pandas_type_map
+
 
 cdef dict _pep3118_type_map = {
     _Type_INT8: b'b',
@@ -149,14 +157,15 @@ def _is_primitive(Type type):
 
 def _get_pandas_type(arrow_type, coerce_to_ns=False):
     cdef Type type_id = arrow_type.id
-    if type_id not in _pandas_type_map:
+    cdef dict pandas_type_map = _get_pandas_type_map()
+    if type_id not in pandas_type_map:
         return None
     if coerce_to_ns:
         # ARROW-3789: Coerce date/timestamp types to datetime64[ns]
         if type_id == _Type_DURATION:
             return np.dtype('timedelta64[ns]')
         return np.dtype('datetime64[ns]')
-    pandas_type = _pandas_type_map[type_id]
+    pandas_type = pandas_type_map[type_id]
     if isinstance(pandas_type, dict):
         unit = getattr(arrow_type, 'unit', None)
         pandas_type = pandas_type.get(unit, None)
@@ -1803,6 +1812,25 @@ cdef class ExtensionType(BaseExtensionType):
         return ExtensionScalar
 
 
+cdef class UuidType(BaseExtensionType):
+    """
+    Concrete class for UUID extension type.
+    """
+
+    cdef void init(self, const shared_ptr[CDataType]& type) except *:
+        BaseExtensionType.init(self, type)
+        self.uuid_ext_type = <const CUuidType*> type.get()
+
+    def __arrow_ext_class__(self):
+        return UuidArray
+
+    def __reduce__(self):
+        return uuid, ()
+
+    def __arrow_ext_scalar_class__(self):
+        return UuidScalar
+
+
 cdef class FixedShapeTensorType(BaseExtensionType):
     """
     Concrete class for fixed shape tensor extension type.
@@ -5268,6 +5296,21 @@ def run_end_encoded(run_end_type, value_type):
     return pyarrow_wrap_data_type(ree_type)
 
 
+def uuid():
+    """
+    Create UuidType instance.
+
+    Returns
+    -------
+    type : UuidType
+    """
+
+    cdef UuidType out = UuidType.__new__(UuidType)
+    c_uuid_ext_type = GetResultValue(CUuidType.Make())
+    out.init(c_uuid_ext_type)
+    return out
+
+
 def fixed_shape_tensor(DataType value_type, shape, dim_names=None, permutation=None):
     """
     Create instance of fixed shape tensor extension type with shape and optional
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 8ece65dd467bb..7c3fcae5cb306 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -74,7 +74,8 @@ zip-safe=false
 include-package-data=true
 
 [tool.setuptools.packages.find]
-where = ["."]
+include = ["pyarrow"]
+namespaces = false
 
 [tool.setuptools.package-data]
 pyarrow = ["*.pxd", "*.pyx", "includes/*.pxd"]
diff --git a/python/scripts/run_emscripten_tests.py b/python/scripts/run_emscripten_tests.py
index 1a4b4a4e05614..53d3dd52bd8a6 100644
--- a/python/scripts/run_emscripten_tests.py
+++ b/python/scripts/run_emscripten_tests.py
@@ -335,7 +335,7 @@ def _load_pyarrow_in_runner(driver, wheel_name):
         """
 import pyarrow,pathlib
 pyarrow_dir = pathlib.Path(pyarrow.__file__).parent
-pytest.main([pyarrow_dir, '-v'])
+pytest.main([pyarrow_dir, '-r', 's'])
 """,
         wait_for_terminate=False,
     )
diff --git a/python/setup.py b/python/setup.py
index d3ef3a091467c..60b9a696d9785 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -32,7 +32,7 @@
     # Get correct EXT_SUFFIX on Windows (https://bugs.python.org/issue39825)
     from distutils import sysconfig
 
-from setuptools import setup, Extension, Distribution, find_namespace_packages
+from setuptools import setup, Extension, Distribution
 
 from Cython.Distutils import build_ext as _build_ext
 import Cython
@@ -396,21 +396,7 @@ def has_ext_modules(foo):
         return True
 
 
-if strtobool(os.environ.get('PYARROW_INSTALL_TESTS', '1')):
-    packages = find_namespace_packages(include=['pyarrow*'])
-    exclude_package_data = {}
-else:
-    packages = find_namespace_packages(include=['pyarrow*'],
-                                       exclude=["pyarrow.tests*"])
-    # setuptools adds back importable packages even when excluded.
-    # https://github.com/pypa/setuptools/issues/3260
-    # https://github.com/pypa/setuptools/issues/3340#issuecomment-1219383976
-    exclude_package_data = {"pyarrow": ["tests*"]}
-
-
 setup(
-    packages=packages,
-    exclude_package_data=exclude_package_data,
     distclass=BinaryDistribution,
     # Dummy extension to trigger build_ext
     ext_modules=[Extension('__dummy__', sources=[])],
diff --git a/r/NEWS.md b/r/NEWS.md
index 0e6e4634a0af8..b9568afe66542 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -32,7 +32,7 @@
   functions (UDFs); for UDFs, see `register_scalar_function()`. (#41223)
 * `mutate()` expressions can now include aggregations, such as `x - mean(x)`. (#41350)
 * `summarize()` supports more complex expressions, and correctly handles cases
-  where column names are reused in expressions.
+  where column names are reused in expressions. (#41223)
 * The `na_matches` argument to the `dplyr::*_join()` functions is now supported.
   This argument controls whether `NA` values are considered equal when joining. (#41358)
 * R metadata, stored in the Arrow schema to support round-tripping data between
diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index 44dfbbcd5c7e7..4c3b78e085c6e 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -41,7 +41,7 @@ supported_dplyr_methods <- list(
   collect = NULL,
   summarise = c(
     "window functions not currently supported;",
-    'arguments `.drop = FALSE` and `.groups = "rowwise" not supported'
+    'arguments `.drop = FALSE` and `.groups = "rowwise"` not supported'
   ),
   group_by = NULL,
   groups = NULL,
diff --git a/r/R/dplyr-funcs-doc.R b/r/R/dplyr-funcs-doc.R
index 7f0627c33d010..4f90dd16b266f 100644
--- a/r/R/dplyr-funcs-doc.R
+++ b/r/R/dplyr-funcs-doc.R
@@ -67,7 +67,7 @@
 #' * [`slice_min()`][dplyr::slice_min()]: slicing within groups not supported; `with_ties = TRUE` (dplyr default) is not supported; `prop` only supported on queries where `nrow()` is knowable without evaluating
 #' * [`slice_sample()`][dplyr::slice_sample()]: slicing within groups not supported; `replace = TRUE` and the `weight_by` argument not supported; `n` only supported on queries where `nrow()` is knowable without evaluating
 #' * [`slice_tail()`][dplyr::slice_tail()]: slicing within groups not supported; Arrow datasets do not have row order, so tail is non-deterministic; `prop` only supported on queries where `nrow()` is knowable without evaluating
-#' * [`summarise()`][dplyr::summarise()]: window functions not currently supported; arguments `.drop = FALSE` and `.groups = "rowwise" not supported
+#' * [`summarise()`][dplyr::summarise()]: window functions not currently supported; arguments `.drop = FALSE` and `.groups = "rowwise"` not supported
 #' * [`tally()`][dplyr::tally()]
 #' * [`transmute()`][dplyr::transmute()]
 #' * [`ungroup()`][dplyr::ungroup()]
diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index a9ad750de7c42..42fd245e5ab9d 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -241,7 +241,7 @@ group_types <- function(.data, schema = NULL) {
 }
 
 format_aggregation <- function(x) {
-  paste0(x$fun, "(", paste(map(x$data, ~ .$ToString()), collapse = ","), ")")
+  Expression$create(x$fun, args = x$data, options = x$options)$ToString()
 }
 
 # This function evaluates an expression and returns the post-summarize
diff --git a/r/R/metadata.R b/r/R/metadata.R
index ba73f0857881d..61e412be62450 100644
--- a/r/R/metadata.R
+++ b/r/R/metadata.R
@@ -107,15 +107,34 @@ safe_r_metadata <- function(metadata, on_save = FALSE) {
   # and mutate the `types_removed` variable outside of it.
   check_r_metadata_types_recursive <- function(x) {
     allowed_types <- c("character", "double", "integer", "logical", "complex", "list", "NULL")
+    # Pull out the attributes so we can also check them
+    x_attrs <- attributes(x)
+
     if (is.list(x)) {
+      # Add special handling for some base R classes that are list but
+      # their [[ methods leads to infinite recursion.
+      # We unclass here and then reapply attributes after.
+      x <- unclass(x)
+
       types <- map_chr(x, typeof)
-      x[types == "list"] <- map(x[types == "list"], check_r_metadata_types_recursive)
       ok <- types %in% allowed_types
       if (!all(ok)) {
         # Record the invalid types, then remove the offending elements
         types_removed <<- c(types_removed, setdiff(types, allowed_types))
         x <- x[ok]
+        if ("names" %in% names(x_attrs)) {
+          # Also prune from the attributes since we'll re-add later
+          x_attrs[["names"]] <- x_attrs[["names"]][ok]
+        }
       }
+      # For the rest, recurse
+      x <- map(x, check_r_metadata_types_recursive)
+    }
+
+    # attributes() of a named list will return a list with a "names" attribute,
+    # so it will recurse indefinitely.
+    if (!is.null(x_attrs) && !identical(x_attrs, list(names = names(x)))) {
+      attributes(x) <- check_r_metadata_types_recursive(x_attrs)
     }
     x
   }
diff --git a/r/man/acero.Rd b/r/man/acero.Rd
index 9ef9cd7dda6fb..aceb533a151f6 100644
--- a/r/man/acero.Rd
+++ b/r/man/acero.Rd
@@ -54,7 +54,7 @@ Table into an R \code{tibble}.
 \item \code{\link[dplyr:slice]{slice_min()}}: slicing within groups not supported; \code{with_ties = TRUE} (dplyr default) is not supported; \code{prop} only supported on queries where \code{nrow()} is knowable without evaluating
 \item \code{\link[dplyr:slice]{slice_sample()}}: slicing within groups not supported; \code{replace = TRUE} and the \code{weight_by} argument not supported; \code{n} only supported on queries where \code{nrow()} is knowable without evaluating
 \item \code{\link[dplyr:slice]{slice_tail()}}: slicing within groups not supported; Arrow datasets do not have row order, so tail is non-deterministic; \code{prop} only supported on queries where \code{nrow()} is knowable without evaluating
-\item \code{\link[dplyr:summarise]{summarise()}}: window functions not currently supported; arguments \code{.drop = FALSE} and `.groups = "rowwise" not supported
+\item \code{\link[dplyr:summarise]{summarise()}}: window functions not currently supported; arguments \code{.drop = FALSE} and \code{.groups = "rowwise"} not supported
 \item \code{\link[dplyr:count]{tally()}}
 \item \code{\link[dplyr:transmute]{transmute()}}
 \item \code{\link[dplyr:group_by]{ungroup()}}
diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R
index 95212407acf9d..8d2a209df547f 100644
--- a/r/tests/testthat/test-dplyr-summarize.R
+++ b/r/tests/testthat/test-dplyr-summarize.R
@@ -955,6 +955,44 @@ test_that("Summarize with 0 arguments", {
   )
 })
 
+test_that("Printing aggregation expressions", {
+  q <- tbl |>
+    arrow_table() |>
+    summarize(
+      total = sum(int, na.rm = TRUE),
+      prod = prod(int, na.rm = TRUE),
+      any = any(lgl, na.rm = TRUE),
+      all = all(lgl, na.rm = TRUE),
+      mean = mean(int, na.rm = TRUE),
+      sd = sd(int, na.rm = TRUE),
+      var = var(int, na.rm = TRUE),
+      n_distinct = n_distinct(chr),
+      min = min(int, na.rm = TRUE),
+      max = max(int, na.rm = TRUE)
+    )
+  expect_output(
+    print(q$.data),
+    "Table (query)
+int: int32
+lgl: bool
+chr: string
+
+* Aggregations:
+total: sum(int, {skip_nulls=true, min_count=0})
+prod: product(int, {skip_nulls=true, min_count=0})
+any: any(lgl, {skip_nulls=true, min_count=0})
+all: all(lgl, {skip_nulls=true, min_count=0})
+mean: mean(int, {skip_nulls=true, min_count=0})
+sd: stddev(int, {ddof=1, skip_nulls=true, min_count=0})
+var: variance(int, {ddof=1, skip_nulls=true, min_count=0})
+n_distinct: count_distinct(chr, {mode=ALL})
+min: min(int, {skip_nulls=true, min_count=0})
+max: max(int, {skip_nulls=true, min_count=0})
+See $.data for the source Arrow object",
+    fixed = TRUE
+  )
+})
+
 test_that("Not supported: window functions", {
   compare_dplyr_binding(
     .input %>%
diff --git a/r/tests/testthat/test-gcs.R b/r/tests/testthat/test-gcs.R
index d671c12138c60..54159e82ca60f 100644
--- a/r/tests/testthat/test-gcs.R
+++ b/r/tests/testthat/test-gcs.R
@@ -116,12 +116,12 @@ test_that("GcsFileSystem$create() can read json_credentials", {
 })
 
 skip_on_cran()
-skip_if_not(system('python -c "import testbench"') == 0, message = "googleapis-storage-testbench is not installed.")
+skip_if_not(system("storage-testbench -h") == 0, message = "googleapis-storage-testbench is not installed.")
 library(dplyr)
 
 testbench_port <- Sys.getenv("TESTBENCH_PORT", "9001")
 
-pid_minio <- sys::exec_background("python", c("-m", "testbench", "--port", testbench_port),
+pid_minio <- sys::exec_background("storage-testbench", c("--port", testbench_port),
   std_out = FALSE,
   std_err = FALSE # TODO: is there a good place to send output?
 )
diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R
index 175e7ef3b6b73..06aa1535e0a36 100644
--- a/r/tests/testthat/test-metadata.R
+++ b/r/tests/testthat/test-metadata.R
@@ -149,6 +149,15 @@ arbitrary\040code\040was\040just\040executed
   )
 })
 
+test_that("R metadata processing doesn't choke on packageVersion() output", {
+  metadata <- list(version = packageVersion("base"))
+  expect_identical(safe_r_metadata(metadata), metadata)
+
+  df <- example_data[1:6]
+  attr(df, "version") <- packageVersion("base")
+  expect_equal_data_frame(Table$create(df), df)
+})
+
 test_that("Complex or unsafe attributes are pruned from R metadata, if they exist", {
   tab <- Table$create(example_data[1:6])
   bad <- new.env()
@@ -161,18 +170,24 @@ i Type: \"environment\"
 > If you trust the source, you can set `options(arrow.unsafe_metadata = TRUE)` to preserve them.",
     fixed = TRUE
   )
+  # Try hiding it even further, in attributes
+  bad_meta <- list(attributes = structure(list(), hidden_attr = bad))
+  tab$metadata <- list(r = rawToChar(serialize(bad_meta, NULL, ascii = TRUE)))
+  expect_warning(
+    as.data.frame(tab),
+    "Potentially unsafe or invalid elements have been discarded from R metadata.
+i Type: \"environment\"
+> If you trust the source, you can set `options(arrow.unsafe_metadata = TRUE)` to preserve them.",
+    fixed = TRUE
+  )
+
   # You can set an option to allow them through.
   # It still warns, just differently, and it doesn't prune the attributes
   withr::local_options(list("arrow.unsafe_metadata" = TRUE))
   expect_warning(
-    expect_warning(
-      as.data.frame(tab),
-      "R metadata may have unsafe or invalid elements
+    as.data.frame(tab),
+    "R metadata may have unsafe or invalid elements
 i Type: \"environment\""
-    ),
-    # This particular example ultimately fails because it's not a list
-    "Invalid metadata$r",
-    fixed = TRUE
   )
 })
 
diff --git a/r/vignettes/developing.Rmd b/r/vignettes/developing.Rmd
index 248a80292a029..147f9cc028d78 100644
--- a/r/vignettes/developing.Rmd
+++ b/r/vignettes/developing.Rmd
@@ -52,6 +52,3 @@ There are a number of ways in which we do this:
 * [Running R with the C++ debugger attached](https://arrow.apache.org/docs/r/articles/developers/debugging.html)
 * [In-depth guide to how the package installation works](https://arrow.apache.org/docs/r/articles/developers/install_details.html)
 * [Using Docker to diagnose a bug or test a feature on a specific OS](https://arrow.apache.org/docs/r/articles/developers/docker.html)
-* [Writing bindings between R functions and Arrow Acero functions](https://arrow.apache.org/docs/r/articles/developers/bindings.html)
-
-
diff --git a/ruby/red-arrow-flight/lib/arrow-flight/client.rb b/ruby/red-arrow-flight/lib/arrow-flight/client.rb
index ad45a4e403559..2750bcca589c8 100644
--- a/ruby/red-arrow-flight/lib/arrow-flight/client.rb
+++ b/ruby/red-arrow-flight/lib/arrow-flight/client.rb
@@ -47,5 +47,49 @@ def authenticate_basic(user, password, options=nil)
       end
       options
     end
+
+    alias_method :do_put_raw, :do_put
+    # Upload data to a Flight described by the given descriptor. The
+    # caller must call `#close` on the returned stream once they are
+    # done writing. Note that it's automatically done when you use
+    # block.
+    #
+    # The reader and writer are linked; closing the writer will also
+    # close the reader. Use GArrowFlight::StreamWriter#done_writing to
+    # only close the write side of the channel.
+    #
+    # @param descriptor [GArrowFlight::Descriptor] Descriptor to be uploaded.
+    # @param schema [GArrow::Schema] Schema of uploaded data.
+    # @param options [ArrowFlight::CallOptions, Hash, nil] (nil)
+    #   The options to be used.
+    #
+    # @yieldparam writer [GArrowFlight::StreamWriter] The writer to upload
+    #   data to the given descriptor.
+    #
+    #   This is closed automatically after the given block is finished.
+    #
+    # @yieldparam reader [GArrowFlight::MetadataReader] The reader to read
+    #   metadata from the server.
+    #
+    # @return [Array<GArrowFlight::MetadataReader, GArrowFlight::StreamWriter>, Object]
+    #   The reader and the writer if block isn't given.
+    #
+    #   The return value from block if block is given.
+    #
+    # @since 18.0.0
+    def do_put(descriptor, schema, options=nil)
+      result = do_put_raw(descriptor, schema, options)
+      reader = result.reader
+      writer = result.writer
+      if block_given?
+        begin
+          yield(reader, writer)
+        ensure
+          writer.close unless writer.closed?
+        end
+      else
+        return reader, writer
+      end
+    end
   end
 end
diff --git a/ruby/red-arrow-flight/test/helper/server.rb b/ruby/red-arrow-flight/test/helper/server.rb
index 269bb5f3d7858..1ea4855897b09 100644
--- a/ruby/red-arrow-flight/test/helper/server.rb
+++ b/ruby/red-arrow-flight/test/helper/server.rb
@@ -21,6 +21,8 @@ module Helper
   class Server < ArrowFlight::Server
     type_register
 
+    attr_reader :uploaded_table
+
     private
     def virtual_do_list_flights(context, criteria)
       generator = InfoGenerator.new
@@ -35,5 +37,14 @@ def virtual_do_do_get(context, ticket)
       table = generator.page_view_table
       ArrowFlight::RecordBatchStream.new(table)
     end
+
+    def virtual_do_do_put(context, reader, writer)
+      @uploaded_table = reader.read_all
+      writer.write(Arrow::Buffer.new("done"))
+      if @uploaded_table.n_rows.zero?
+        raise Arrow::Error::Invalid.new("empty table")
+      end
+      true
+    end
   end
 end
diff --git a/ruby/red-arrow-flight/test/test-client.rb b/ruby/red-arrow-flight/test/test-client.rb
index 850d6f45790c3..9f1ebbff81550 100644
--- a/ruby/red-arrow-flight/test/test-client.rb
+++ b/ruby/red-arrow-flight/test/test-client.rb
@@ -43,4 +43,35 @@ def test_do_get
     assert_equal(generator.page_view_table,
                  reader.read_all)
   end
+
+  def test_do_put_with_block
+    client = ArrowFlight::Client.new(@location)
+    generator = Helper::InfoGenerator.new
+    descriptor = generator.page_view_descriptor
+    table = generator.page_view_table
+    client.do_put(descriptor, table.schema) do |reader, writer|
+      writer.write_table(table)
+      writer.done_writing
+      metadata = reader.read
+      assert_equal(["done", table],
+                   [metadata.data.to_s, @server.uploaded_table])
+    end
+  end
+
+  def test_do_put_without_block
+    client = ArrowFlight::Client.new(@location)
+    generator = Helper::InfoGenerator.new
+    descriptor = generator.page_view_descriptor
+    table = generator.page_view_table
+    reader, writer = client.do_put(descriptor, table.schema)
+    begin
+      writer.write_table(table)
+      writer.done_writing
+      metadata = reader.read
+      assert_equal(["done", table],
+                   [metadata.data.to_s, @server.uploaded_table])
+    ensure
+      writer.close
+    end
+  end
 end
diff --git a/ruby/red-arrow/ext/arrow/extconf.rb b/ruby/red-arrow/ext/arrow/extconf.rb
index 28ccd0b2d59e1..a3005cd56f270 100644
--- a/ruby/red-arrow/ext/arrow/extconf.rb
+++ b/ruby/red-arrow/ext/arrow/extconf.rb
@@ -91,7 +91,7 @@
   symbols_in_external_bundles.each do |symbol|
     $DLDFLAGS << " -Wl,-U,#{symbol}"
   end
-  mmacosx_version_min = "-mmacosx-version-min=10.15"
+  mmacosx_version_min = "-mmacosx-version-min=12.0"
   $CFLAGS << " #{mmacosx_version_min}"
   $CXXFLAGS << " #{mmacosx_version_min}"
 end
diff --git a/ruby/red-arrow/lib/arrow/decimal128-array.rb b/ruby/red-arrow/lib/arrow/decimal128-array.rb
index a5ee53be7b229..528c878a859b5 100644
--- a/ruby/red-arrow/lib/arrow/decimal128-array.rb
+++ b/ruby/red-arrow/lib/arrow/decimal128-array.rb
@@ -18,7 +18,9 @@
 module Arrow
   class Decimal128Array
     def get_value(i)
-      BigDecimal(format_value(i))
+      string = format_value(i)
+      string.sub!(".E", ".0E") if string.include?(".E")
+      BigDecimal(string)
     end
   end
 end
diff --git a/ruby/red-arrow/lib/arrow/decimal256-array.rb b/ruby/red-arrow/lib/arrow/decimal256-array.rb
index 8c2306dfe3627..32841ca4862f5 100644
--- a/ruby/red-arrow/lib/arrow/decimal256-array.rb
+++ b/ruby/red-arrow/lib/arrow/decimal256-array.rb
@@ -19,7 +19,9 @@ module Arrow
   class Decimal256Array
     # @since 3.0.0
     def get_value(i)
-      BigDecimal(format_value(i))
+      string = format_value(i)
+      string.sub!(".E", ".0E") if string.include?(".E")
+      BigDecimal(string)
     end
   end
 end
diff --git a/ruby/red-arrow/test/test-decimal128-array.rb b/ruby/red-arrow/test/test-decimal128-array.rb
index a50e2cf4a4832..a6e7c4e1ac433 100644
--- a/ruby/red-arrow/test/test-decimal128-array.rb
+++ b/ruby/red-arrow/test/test-decimal128-array.rb
@@ -38,4 +38,10 @@ class Decimal128ArrayTest < Test::Unit::TestCase
                    array.to_a)
     end
   end
+
+  def test_zero
+    array = Arrow::Decimal128Array.new({precision: 38, scale: 9},
+                                       [BigDecimal("0")])
+    assert_equal(BigDecimal("0"), array[0])
+  end
 end
diff --git a/ruby/red-arrow/test/test-decimal256-array.rb b/ruby/red-arrow/test/test-decimal256-array.rb
index ed542f2d6c75e..053e948fc84b7 100644
--- a/ruby/red-arrow/test/test-decimal256-array.rb
+++ b/ruby/red-arrow/test/test-decimal256-array.rb
@@ -38,4 +38,10 @@ class Decimal256ArrayTest < Test::Unit::TestCase
                    array.to_a)
     end
   end
+
+  def test_zero
+    array = Arrow::Decimal256Array.new({precision: 38, scale: 9},
+                                       [BigDecimal("0")])
+    assert_equal(BigDecimal("0"), array[0])
+  end
 end
diff --git a/ruby/red-parquet/lib/parquet/arrow-file-writer.rb b/ruby/red-parquet/lib/parquet/arrow-file-writer.rb
new file mode 100644
index 0000000000000..137dc518e3f95
--- /dev/null
+++ b/ruby/red-parquet/lib/parquet/arrow-file-writer.rb
@@ -0,0 +1,98 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Parquet
+  class ArrowFileWriter
+    # Write data to Apache Parquet.
+    #
+    # @return [void]
+    #
+    # @overload write(record_batch)
+    #
+    #   @param record_batch [Arrow::RecordBatch] The record batch to
+    #     be written.
+    #
+    #   @example Write a record batch
+    #     record_batch = Arrow::RecordBatch.new(enabled: [true, false])
+    #     schema = record_batch.schema
+    #     Parquet::ArrowFileWriter.open(schema, "data.parquet") do |writer|
+    #       writer.write(record_batch)
+    #     end
+    #
+    # @overload write(table, chunk_size: nil)
+    #
+    #   @param table [Arrow::Table] The table to be written.
+    #
+    #   @param chunk_size [nil, Integer] (nil) The maximum number of
+    #     rows to write per row group.
+    #
+    #     If this is `nil`, the default value (`1024 * 1024`) is used.
+    #
+    #   @example Write a record batch with the default chunk size
+    #     table = Arrow::Table.new(enabled: [true, false])
+    #     schema = table.schema
+    #     Parquet::ArrowFileWriter.open(schema, "data.parquet") do |writer|
+    #       writer.write(table)
+    #     end
+    #
+    #   @example Write a record batch with the specified chunk size
+    #     table = Arrow::Table.new(enabled: [true, false])
+    #     schema = table.schema
+    #     Parquet::ArrowFileWriter.open(schema, "data.parquet") do |writer|
+    #       writer.write(table, chunk_size: 1)
+    #     end
+    #
+    # @overload write(raw_records)
+    #
+    #   @param data [Array<Hash>, Array<Array>] The data to be written
+    #     as primitive Ruby objects.
+    #
+    #   @example Write a record batch with Array<Array> based data
+    #     schema = Arrow::Schema.new(enabled: :boolean)
+    #     raw_records = [
+    #       [true],
+    #       [false],
+    #     ]
+    #     Parquet::ArrowFileWriter.open(schema, "data.parquet") do |writer|
+    #       writer.write(raw_records)
+    #     end
+    #
+    #   @example Write a record batch with Array<Hash> based data
+    #     schema = Arrow::Schema.new(enabled: :boolean)
+    #     raw_columns = [
+    #       enabled: [true, false],
+    #     ]
+    #     Parquet::ArrowFileWriter.open(schema, "data.parquet") do |writer|
+    #       writer.write(raw_columns)
+    #     end
+    #
+    # @since 18.0.0
+    def write(target, chunk_size: nil)
+      case target
+      when Arrow::RecordBatch
+        write_record_batch(target)
+      when Arrow::Table
+        # Same as parquet::DEFAULT_MAX_ROW_GROUP_LENGTH in C++
+        chunk_size ||= 1024 * 1024
+        write_table(target, chunk_size)
+      else
+        record_batch = Arrow::RecordBatch.new(schema, target)
+        write_record_batch(record_batch)
+      end
+    end
+  end
+end
diff --git a/ruby/red-parquet/lib/parquet/loader.rb b/ruby/red-parquet/lib/parquet/loader.rb
index 0c20ad2b52a21..018a35ce459eb 100644
--- a/ruby/red-parquet/lib/parquet/loader.rb
+++ b/ruby/red-parquet/lib/parquet/loader.rb
@@ -30,6 +30,7 @@ def post_load(repository, namespace)
 
     def require_libraries
       require "parquet/arrow-file-reader"
+      require "parquet/arrow-file-writer"
       require "parquet/arrow-table-loadable"
       require "parquet/arrow-table-savable"
       require "parquet/writer-properties"
diff --git a/ruby/red-parquet/test/test-arrow-file-writer.rb b/ruby/red-parquet/test/test-arrow-file-writer.rb
new file mode 100644
index 0000000000000..c71586499c59d
--- /dev/null
+++ b/ruby/red-parquet/test/test-arrow-file-writer.rb
@@ -0,0 +1,76 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestArrowFileWriter < Test::Unit::TestCase
+  def open_buffer_output_stream
+    buffer = Arrow::ResizableBuffer.new(4096)
+    Arrow::BufferOutputStream.open(buffer) do |output|
+      yield(output)
+    end
+    buffer
+  end
+
+  sub_test_case("#write") do
+    test("RecordBatch") do
+      schema = Arrow::Schema.new(visible: :boolean)
+      record_batch = Arrow::RecordBatch.new(schema, [[true], [false]])
+      buffer = open_buffer_output_stream do |output|
+        Parquet::ArrowFileWriter.open(record_batch.schema, output) do |writer|
+          writer.write(record_batch)
+        end
+      end
+      assert_equal(record_batch.to_table,
+                   Arrow::Table.load(buffer, format: :parquet))
+    end
+
+    test("Table") do
+      schema = Arrow::Schema.new(visible: :boolean)
+      table = Arrow::Table.new(schema, [[true], [false]])
+      buffer = open_buffer_output_stream do |output|
+        Parquet::ArrowFileWriter.open(table.schema, output) do |writer|
+          writer.write(table)
+        end
+      end
+      assert_equal(table,
+                   Arrow::Table.load(buffer, format: :parquet))
+    end
+
+    test("[[]]") do
+      schema = Arrow::Schema.new(visible: :boolean)
+      raw_records = [[true], [false]]
+      buffer = open_buffer_output_stream do |output|
+        Parquet::ArrowFileWriter.open(schema, output) do |writer|
+          writer.write(raw_records)
+        end
+      end
+      assert_equal(Arrow::RecordBatch.new(schema, raw_records).to_table,
+                   Arrow::Table.load(buffer, format: :parquet))
+    end
+
+    test("[{}]") do
+      schema = Arrow::Schema.new(visible: :boolean)
+      raw_columns = [visible: [true, false]]
+      buffer = open_buffer_output_stream do |output|
+        Parquet::ArrowFileWriter.open(schema, output) do |writer|
+          writer.write(raw_columns)
+        end
+      end
+      assert_equal(Arrow::RecordBatch.new(schema, raw_columns).to_table,
+                   Arrow::Table.load(buffer, format: :parquet))
+    end
+  end
+end
diff --git a/testing b/testing
index 735ae7128d571..4d209492d514c 160000
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit 735ae7128d571398dd798d7ff004adebeb342883
+Subproject commit 4d209492d514c2d3cb2d392681b9aa00e6d8da1c