diff --git a/.asf.yaml b/.asf.yaml
index ba325c2abf231..f3a8ed9fee90f 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -23,7 +23,6 @@ github:
     - benibus
     - jbonofre
     - js8544
-    - laurentgo
     - vibhatha
     - ZhangHuiGui
 
diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index 5ccefa32725f3..7dfe987d2eaff 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -69,65 +69,37 @@ env:
   DOCKER_VOLUME_PREFIX: ".docker/"
 
 jobs:
-  docker-targets:
-    name: Docker targets
-    runs-on: ubuntu-latest
-    outputs:
-      targets: ${{ steps.detect-targets.outputs.targets }}
-    steps:
-      - name: Detect targets
-        id: detect-targets
-        run: |
-          echo "targets<<JSON" >> "$GITHUB_OUTPUT"
-          echo "[" >> "$GITHUB_OUTPUT"
-          cat <<JSON >> "$GITHUB_OUTPUT"
-          {
-            "arch": "amd64",
-            "clang-tools": "14",
-            "image": "conda-cpp",
-            "llvm": "14",
-            "runs-on": "ubuntu-latest",
-            "simd-level": "AVX2",
-            "title": "AMD64 Conda C++ AVX2",
-            "ubuntu": "22.04"
-          },
-          {
-            "arch": "amd64",
-            "clang-tools": "14",
-            "image": "ubuntu-cpp-sanitizer",
-            "llvm": "14",
-            "runs-on": "ubuntu-latest",
-            "title": "AMD64 Ubuntu 22.04 C++ ASAN UBSAN",
-            "ubuntu": "22.04"
-          }
-          JSON
-          if [ "$GITHUB_REPOSITORY_OWNER" = "apache" ]; then
-            echo "," >> "$GITHUB_OUTPUT"
-            cat <<JSON >> "$GITHUB_OUTPUT"
-          {
-            "arch": "arm64v8",
-            "clang-tools": "10",
-            "image": "ubuntu-cpp",
-            "llvm": "10",
-            "runs-on": ["self-hosted", "arm", "linux"],
-            "title": "ARM64 Ubuntu 20.04 C++",
-            "ubuntu": "20.04"
-          }
-          JSON
-          fi
-          echo "]" >> "$GITHUB_OUTPUT"
-          echo "JSON" >> "$GITHUB_OUTPUT"
-
   docker:
     name: ${{ matrix.title }}
-    needs: docker-targets
     runs-on: ${{ matrix.runs-on }}
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
     timeout-minutes: 75
     strategy:
       fail-fast: false
       matrix:
-        include: ${{ fromJson(needs.docker-targets.outputs.targets) }}
+        include:
+          - arch: amd64
+            clang-tools: 14
+            image: conda-cpp
+            llvm: 14
+            runs-on: ubuntu-latest
+            simd-level: AVX2
+            title: AMD64 Conda C++ AVX2
+            ubuntu: 22.04
+          - arch: amd64
+            clang-tools: 14
+            image: ubuntu-cpp-sanitizer
+            llvm: 14
+            runs-on: ubuntu-latest
+            title: AMD64 Ubuntu 22.04 C++ ASAN UBSAN
+            ubuntu: 22.04
+          - arch: arm64v8
+            clang-tools: 10
+            image: ubuntu-cpp
+            llvm: 10
+            runs-on: ubuntu-24.04-arm
+            title: ARM64 Ubuntu 20.04 C++
+            ubuntu: 20.04
     env:
       ARCH: ${{ matrix.arch }}
       ARROW_SIMD_LEVEL: ${{ matrix.simd-level }}
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index d59da447612a6..f9718cbf7bb18 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -124,7 +124,7 @@ jobs:
         shell: bash
         run: |
           gem install test-unit
-          pip install "cython>=0.29.31" setuptools pytest requests setuptools-scm
+          pip install "cython>=3" setuptools pytest requests setuptools-scm
       - name: Run Release Test
         env:
           ARROW_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/pr_review_trigger.yml b/.github/workflows/pr_review_trigger.yml
index 2c840e95c8db6..a6dd5f1275331 100644
--- a/.github/workflows/pr_review_trigger.yml
+++ b/.github/workflows/pr_review_trigger.yml
@@ -29,7 +29,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: "Upload PR review Payload"
-        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
         with:
           path: "${{ github.event_path }}"
           name: "pr_review_payload"
diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index bc7db519b64f7..cb000f8b95c1b 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -177,7 +177,7 @@ jobs:
         if: always()
       - name: Save the test output
         if: always()
-        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
         with:
           name: test-output-${{ matrix.ubuntu }}-${{ matrix.r }}
           path: r/check/arrow.Rcheck/tests/testthat.Rout*
@@ -237,7 +237,7 @@ jobs:
         if: always()
       - name: Save the test output
         if: always()
-        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
         with:
           name: test-output-bundled
           path: r/check/arrow.Rcheck/tests/testthat.Rout*
@@ -299,7 +299,7 @@ jobs:
         # So that they're unique when multiple are downloaded in the next step
         shell: bash
         run: mv libarrow.zip libarrow-rtools${{ matrix.config.rtools }}-${{ matrix.config.arch }}.zip
-      - uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
+      - uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
         with:
           name: libarrow-rtools${{ matrix.config.rtools }}-${{ matrix.config.arch }}.zip
           path: libarrow-rtools${{ matrix.config.rtools }}-${{ matrix.config.arch }}.zip
diff --git a/c_glib/meson.build b/c_glib/meson.build
index bd7843d8bc362..017765cd14626 100644
--- a/c_glib/meson.build
+++ b/c_glib/meson.build
@@ -35,7 +35,7 @@ project('arrow-glib', 'c', 'cpp',
         #   * 22.04: 0.61.2
         meson_version: '>=0.53.2')
 
-version = '19.0.0-SNAPSHOT'
+version = '20.0.0-SNAPSHOT'
 if version.endswith('-SNAPSHOT')
   version_numbers = version.split('-')[0].split('.')
   version_tag = version.split('-')[1]
diff --git a/c_glib/parquet-glib/arrow-file-writer.cpp b/c_glib/parquet-glib/arrow-file-writer.cpp
index 2b8e2bdeac026..738fb4fd824c8 100644
--- a/c_glib/parquet-glib/arrow-file-writer.cpp
+++ b/c_glib/parquet-glib/arrow-file-writer.cpp
@@ -574,7 +574,6 @@ gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,
 /**
  * gparquet_arrow_file_writer_new_row_group:
  * @writer: A #GParquetArrowFileWriter.
- * @chunk_size: The max number of rows in a row group.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Start a new row group.
@@ -584,13 +583,11 @@ gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,
  * Since: 18.0.0
  */
 gboolean
-gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer,
-                                         gsize chunk_size,
-                                         GError **error)
+gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer, GError **error)
 {
   auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer);
   return garrow::check(error,
-                       parquet_arrow_file_writer->NewRowGroup(chunk_size),
+                       parquet_arrow_file_writer->NewRowGroup(),
                        "[parquet][arrow][file-writer][new-row-group]");
 }
 
diff --git a/c_glib/parquet-glib/arrow-file-writer.h b/c_glib/parquet-glib/arrow-file-writer.h
index 2c82f7c1f87de..4986430c951d0 100644
--- a/c_glib/parquet-glib/arrow-file-writer.h
+++ b/c_glib/parquet-glib/arrow-file-writer.h
@@ -135,9 +135,7 @@ gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,
 
 GPARQUET_AVAILABLE_IN_18_0
 gboolean
-gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer,
-                                         gsize chunk_size,
-                                         GError **error);
+gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer, GError **error);
 
 GPARQUET_AVAILABLE_IN_18_0
 gboolean
diff --git a/c_glib/test/parquet/test-arrow-file-writer.rb b/c_glib/test/parquet/test-arrow-file-writer.rb
index d8344bf1c50b0..418de4782d0b0 100644
--- a/c_glib/test/parquet/test-arrow-file-writer.rb
+++ b/c_glib/test/parquet/test-arrow-file-writer.rb
@@ -89,10 +89,10 @@ def test_write_table
   def test_write_chunked_array
     schema = build_schema("enabled" => :boolean)
     writer = Parquet::ArrowFileWriter.new(schema, @file.path)
-    writer.new_row_group(2)
+    writer.new_row_group
     chunked_array = Arrow::ChunkedArray.new([build_boolean_array([true, nil])])
     writer.write_chunked_array(chunked_array)
-    writer.new_row_group(1)
+    writer.new_row_group
     chunked_array = Arrow::ChunkedArray.new([build_boolean_array([false])])
     writer.write_chunked_array(chunked_array)
     writer.close
diff --git a/c_glib/tool/generate-version-header.py b/c_glib/tool/generate-version-header.py
index 4995ce570aeb0..6a8976204c05a 100755
--- a/c_glib/tool/generate-version-header.py
+++ b/c_glib/tool/generate-version-header.py
@@ -140,6 +140,7 @@ def generate_availability_macros(library: str) -> str:
 
 
 ALL_VERSIONS = [
+        (20, 0),
         (19, 0),
         (18, 0),
         (17, 0),
diff --git a/c_glib/vcpkg.json b/c_glib/vcpkg.json
index f2717f7e27cf2..5873fd9f28ec2 100644
--- a/c_glib/vcpkg.json
+++ b/c_glib/vcpkg.json
@@ -1,6 +1,6 @@
 {
   "name": "arrow-glib",
-  "version-string": "19.0.0-SNAPSHOT",
+  "version-string": "20.0.0-SNAPSHOT",
   "dependencies": [
     "glib",
     "gobject-introspection",
diff --git a/ci/appveyor-cpp-setup.bat b/ci/appveyor-cpp-setup.bat
index 912b130acff45..ff159bd0b4b59 100644
--- a/ci/appveyor-cpp-setup.bat
+++ b/ci/appveyor-cpp-setup.bat
@@ -70,7 +70,6 @@ conda create -n arrow ^
   "ninja" ^
   "nomkl" ^
   "pandas" ^
-  "fsspec" ^
   "python=%PYTHON%" ^
   || exit /B
 conda list -n arrow
@@ -86,7 +85,7 @@ set CXX=cl.exe
 @rem Download Minio somewhere on PATH, for unit tests
 @rem
 if "%ARROW_S3%" == "ON" (
-  appveyor DownloadFile https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2024-09-13T20-26-02Z -FileName C:\Windows\Minio.exe || exit /B
+  appveyor DownloadFile https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2025-01-20T14-49-07Z -FileName C:\Windows\Minio.exe || exit /B
 )
 
 @rem
diff --git a/ci/conda_env_python.txt b/ci/conda_env_python.txt
index bf915493de302..9a48f26b79c6e 100644
--- a/ci/conda_env_python.txt
+++ b/ci/conda_env_python.txt
@@ -15,10 +15,12 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# don't add pandas here, because it is not a mandatory test dependency
-boto3  # not a direct dependency of s3fs, but needed for our s3fs fixture
+# Don't add pandas here, because it is not a mandatory test dependency
+
+# Not a direct dependency of s3fs, but needed for our s3fs fixture
+boto3
 cffi
-cython>=0.29.31
+cython>=3
 cloudpickle
 fsspec
 hypothesis
diff --git a/ci/conda_env_sphinx.txt b/ci/conda_env_sphinx.txt
index 4665a32e24bbe..751df9b2f3c01 100644
--- a/ci/conda_env_sphinx.txt
+++ b/ci/conda_env_sphinx.txt
@@ -30,9 +30,5 @@ sphinx-lint
 sphinxcontrib-jquery
 sphinxcontrib-mermaid
 sphinx==6.2
-# Requirement for doctest-cython
-# Needs upper pin of 0.3.0, see:
-# https://github.com/lgpage/pytest-cython/issues/67
-# With 0.3.* bug fix release, the pin can be removed
-pytest-cython==0.2.2
+pytest-cython
 pandas
diff --git a/ci/docker/conda-python-cython2.dockerfile b/ci/docker/conda-python-cython2.dockerfile
deleted file mode 100644
index 859ad868b0c71..0000000000000
--- a/ci/docker/conda-python-cython2.dockerfile
+++ /dev/null
@@ -1,24 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-ARG repo
-ARG arch
-ARG python=3.9
-FROM ${repo}:${arch}-conda-python-${python}
-
-RUN mamba install -q -y "cython<3" && \
-    mamba clean --all
diff --git a/ci/docker/conda.dockerfile b/ci/docker/conda.dockerfile
index fbd81903b0a3a..0d48fb3ef83d0 100644
--- a/ci/docker/conda.dockerfile
+++ b/ci/docker/conda.dockerfile
@@ -21,9 +21,15 @@ FROM ${arch}/ubuntu:22.04
 # install build essentials
 RUN export DEBIAN_FRONTEND=noninteractive && \
     apt-get update -y -q && \
-    apt-get install -y -q curl wget tzdata libc6-dbg gdb \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/*
+    apt-get install -y -q \
+    curl \
+    gdb \
+    libc6-dbg \
+    patch \
+    tzdata \
+    wget && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
 
 # install conda and mamba via miniforge
 COPY ci/scripts/install_conda.sh /arrow/ci/scripts/
diff --git a/ci/docker/debian-12-cpp.dockerfile b/ci/docker/debian-12-cpp.dockerfile
index f486d07ff8894..fe3976248cc86 100644
--- a/ci/docker/debian-12-cpp.dockerfile
+++ b/ci/docker/debian-12-cpp.dockerfile
@@ -84,6 +84,7 @@ RUN apt-get update -y -q && \
         ninja-build \
         nlohmann-json3-dev \
         npm \
+        patch \
         pkg-config \
         protobuf-compiler-grpc \
         python3-dev \
diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile
index 8dc778d544a6d..259c5fb77fa41 100644
--- a/ci/docker/ubuntu-20.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-20.04-cpp.dockerfile
@@ -106,6 +106,7 @@ RUN apt-get update -y -q && \
         ninja-build \
         nlohmann-json3-dev \
         npm \
+        patch \
         pkg-config \
         protobuf-compiler \
         python3-dev \
diff --git a/ci/docker/ubuntu-22.04-cpp.dockerfile b/ci/docker/ubuntu-22.04-cpp.dockerfile
index 28cef2946385c..721b37dcae842 100644
--- a/ci/docker/ubuntu-22.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-22.04-cpp.dockerfile
@@ -111,6 +111,7 @@ RUN apt-get update -y -q && \
         ninja-build \
         nlohmann-json3-dev \
         npm \
+        patch \
         pkg-config \
         protobuf-compiler \
         protobuf-compiler-grpc \
diff --git a/ci/docker/ubuntu-24.04-cpp.dockerfile b/ci/docker/ubuntu-24.04-cpp.dockerfile
index 3f486b09f95ff..592a9a6a232e5 100644
--- a/ci/docker/ubuntu-24.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-24.04-cpp.dockerfile
@@ -111,6 +111,7 @@ RUN apt-get update -y -q && \
         ninja-build \
         nlohmann-json3-dev \
         npm \
+        patch \
         pkg-config \
         protobuf-compiler \
         protobuf-compiler-grpc \
diff --git a/ci/scripts/PKGBUILD b/ci/scripts/PKGBUILD
index 4c567d550b92a..efeed954006c1 100644
--- a/ci/scripts/PKGBUILD
+++ b/ci/scripts/PKGBUILD
@@ -18,7 +18,7 @@
 _realname=arrow
 pkgbase=mingw-w64-${_realname}
 pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}"
-pkgver=18.1.0.9000
+pkgver=19.0.0.9000
 pkgrel=8000
 pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)"
 arch=("any")
diff --git a/ci/scripts/install_minio.sh b/ci/scripts/install_minio.sh
index 6f9701ab5a150..8685ced0bd1ab 100755
--- a/ci/scripts/install_minio.sh
+++ b/ci/scripts/install_minio.sh
@@ -63,7 +63,7 @@ if [ "${version}" != "latest" ]; then
 fi
 
 # Use specific versions for minio server and client to avoid CI failures on new releases.
-minio_version="minio.RELEASE.2024-09-13T20-26-02Z"
+minio_version="minio.RELEASE.2025-01-20T14-49-07Z"
 mc_version="mc.RELEASE.2024-09-16T17-43-14Z"
 
 download()
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 103e0f08445d9..a7d80c2e96c23 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -79,7 +79,7 @@ if(POLICY CMP0170)
   cmake_policy(SET CMP0170 NEW)
 endif()
 
-set(ARROW_VERSION "19.0.0-SNAPSHOT")
+set(ARROW_VERSION "20.0.0-SNAPSHOT")
 
 string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}")
 
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index abfe6d274f7b8..f9459f4175c83 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -4573,11 +4573,16 @@ target_include_directories(arrow::hadoop INTERFACE "${HADOOP_HOME}/include")
 function(build_orc)
   message(STATUS "Building Apache ORC from source")
 
+  # Remove this and "patch" in "ci/docker/{debian,ubuntu}-*.dockerfile" once we have a patch for ORC 2.1.1
+  find_program(PATCH patch REQUIRED)
+  set(ORC_PATCH_COMMAND ${PATCH} -p1 -i ${CMAKE_CURRENT_LIST_DIR}/orc.diff)
+
   if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.29)
     fetchcontent_declare(orc
                          ${FC_DECLARE_COMMON_OPTIONS}
                          URL ${ORC_SOURCE_URL}
-                         URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}")
+                         URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}"
+                         PATCH_COMMAND ${ORC_PATCH_COMMAND})
     prepare_fetchcontent()
 
     set(CMAKE_UNITY_BUILD FALSE)
@@ -4667,16 +4672,10 @@ function(build_orc)
         OFF
         CACHE BOOL "" FORCE)
 
-    # We can remove this with ORC 2.0.2 or later.
-    list(PREPEND CMAKE_MODULE_PATH
-         ${CMAKE_CURRENT_BINARY_DIR}/_deps/orc-src/cmake_modules)
-
     fetchcontent_makeavailable(orc)
 
     add_library(orc::orc INTERFACE IMPORTED)
     target_link_libraries(orc::orc INTERFACE orc)
-    target_include_directories(orc::orc INTERFACE "${orc_BINARY_DIR}/c++/include"
-                                                  "${orc_SOURCE_DIR}/c++/include")
 
     list(APPEND ARROW_BUNDLED_STATIC_LIBS orc)
   else()
@@ -4701,6 +4700,9 @@ function(build_orc)
     get_target_property(ORC_ZSTD_ROOT ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES)
     get_filename_component(ORC_ZSTD_ROOT "${ORC_ZSTD_ROOT}" DIRECTORY)
 
+    get_target_property(ORC_ZLIB_ROOT ZLIB::ZLIB INTERFACE_INCLUDE_DIRECTORIES)
+    get_filename_component(ORC_ZLIB_ROOT "${ORC_ZLIB_ROOT}" DIRECTORY)
+
     set(ORC_CMAKE_ARGS
         ${EP_COMMON_CMAKE_ARGS}
         "-DCMAKE_INSTALL_PREFIX=${ORC_PREFIX}"
@@ -4710,7 +4712,6 @@ function(build_orc)
         -DBUILD_TOOLS=OFF
         -DBUILD_CPP_TESTS=OFF
         -DINSTALL_VENDORED_LIBS=OFF
-        "-DLZ4_HOME=${ORC_LZ4_ROOT}"
         "-DPROTOBUF_EXECUTABLE=$<TARGET_FILE:${ARROW_PROTOBUF_PROTOC}>"
         "-DPROTOBUF_HOME=${ORC_PROTOBUF_ROOT}"
         "-DPROTOBUF_INCLUDE_DIR=$<TARGET_PROPERTY:${ARROW_PROTOBUF_LIBPROTOBUF},INTERFACE_INCLUDE_DIRECTORIES>"
@@ -4718,16 +4719,17 @@ function(build_orc)
         "-DPROTOC_LIBRARY=$<TARGET_FILE:${ARROW_PROTOBUF_LIBPROTOC}>"
         "-DSNAPPY_HOME=${ORC_SNAPPY_ROOT}"
         "-DSNAPPY_LIBRARY=$<TARGET_FILE:${Snappy_TARGET}>"
+        "-DLZ4_HOME=${ORC_LZ4_ROOT}"
         "-DLZ4_LIBRARY=$<TARGET_FILE:LZ4::lz4>"
         "-DLZ4_STATIC_LIB=$<TARGET_FILE:LZ4::lz4>"
         "-DLZ4_INCLUDE_DIR=${ORC_LZ4_ROOT}/include"
         "-DSNAPPY_INCLUDE_DIR=${ORC_SNAPPY_INCLUDE_DIR}"
         "-DZSTD_HOME=${ORC_ZSTD_ROOT}"
         "-DZSTD_INCLUDE_DIR=$<TARGET_PROPERTY:${ARROW_ZSTD_LIBZSTD},INTERFACE_INCLUDE_DIRECTORIES>"
-        "-DZSTD_LIBRARY=$<TARGET_FILE:${ARROW_ZSTD_LIBZSTD}>")
-    if(ZLIB_ROOT)
-      set(ORC_CMAKE_ARGS ${ORC_CMAKE_ARGS} "-DZLIB_HOME=${ZLIB_ROOT}")
-    endif()
+        "-DZSTD_LIBRARY=$<TARGET_FILE:${ARROW_ZSTD_LIBZSTD}>"
+        "-DZLIB_HOME=${ORC_ZLIB_ROOT}"
+        "-DZLIB_INCLUDE_DIR=$<TARGET_PROPERTY:ZLIB::ZLIB,INTERFACE_INCLUDE_DIRECTORIES>"
+        "-DZLIB_LIBRARY=$<TARGET_FILE:ZLIB::ZLIB>")
 
     # Work around CMake bug
     file(MAKE_DIRECTORY ${ORC_INCLUDE_DIR})
@@ -4743,7 +4745,8 @@ function(build_orc)
                                 ${ARROW_ZSTD_LIBZSTD}
                                 ${Snappy_TARGET}
                                 LZ4::lz4
-                                ZLIB::ZLIB)
+                                ZLIB::ZLIB
+                        PATCH_COMMAND ${ORC_PATCH_COMMAND})
     add_library(orc::orc STATIC IMPORTED)
     set_target_properties(orc::orc PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}")
     target_include_directories(orc::orc BEFORE INTERFACE "${ORC_INCLUDE_DIR}")
diff --git a/cpp/cmake_modules/orc.diff b/cpp/cmake_modules/orc.diff
new file mode 100644
index 0000000000000..7bdbfa1cf5d33
--- /dev/null
+++ b/cpp/cmake_modules/orc.diff
@@ -0,0 +1,289 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index 1f8931508..f8e57bf5f 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -30,8 +30,8 @@ SET(CPACK_PACKAGE_VERSION_MAJOR "2")
+ SET(CPACK_PACKAGE_VERSION_MINOR "1")
+ SET(CPACK_PACKAGE_VERSION_PATCH "0")
+ SET(ORC_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}")
+-set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules")
+ set(CMAKE_EXPORT_COMPILE_COMMANDS ON) # For clang-tidy.
++list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules")
+ 
+ option (BUILD_JAVA
+     "Include ORC Java library in the build process"
+@@ -225,5 +225,3 @@ if (BUILD_CPP_TESTS)
+     )
+   endif ()
+ endif ()
+-
+-INCLUDE(CheckFormat)
+diff --git a/c++/src/CMakeLists.txt b/c++/src/CMakeLists.txt
+index 694667c06..af13a94aa 100644
+--- a/c++/src/CMakeLists.txt
++++ b/c++/src/CMakeLists.txt
+@@ -218,8 +218,8 @@ target_include_directories (orc
+   INTERFACE
+     $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
+   PUBLIC
+-    $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/c++/include>
+-    $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/c++/include>
++    $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/c++/include>
++    $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/c++/include>
+   PRIVATE
+     ${CMAKE_CURRENT_SOURCE_DIR}
+     ${CMAKE_CURRENT_BINARY_DIR}
+diff --git a/cmake_modules/ThirdpartyToolchain.cmake b/cmake_modules/ThirdpartyToolchain.cmake
+index 017e6c5b8..fe376ed16 100644
+--- a/cmake_modules/ThirdpartyToolchain.cmake
++++ b/cmake_modules/ThirdpartyToolchain.cmake
+@@ -103,13 +103,13 @@ endif ()
+ 
+ # ----------------------------------------------------------------------
+ # Macros for adding third-party libraries
+-macro (add_resolved_library target_name link_lib include_dir)
+-  add_library (${target_name} INTERFACE IMPORTED)
++macro (orc_add_resolved_library target_name link_lib include_dir)
++  add_library (${target_name} INTERFACE IMPORTED GLOBAL)
+   target_link_libraries (${target_name} INTERFACE ${link_lib})
+   target_include_directories (${target_name} SYSTEM INTERFACE ${include_dir})
+ endmacro ()
+ 
+-macro (add_built_library external_project_name target_name link_lib include_dir)
++macro (orc_add_built_library external_project_name target_name link_lib include_dir)
+   file (MAKE_DIRECTORY "${include_dir}")
+ 
+   add_library (${target_name} STATIC IMPORTED)
+@@ -122,7 +122,7 @@ macro (add_built_library external_project_name target_name link_lib include_dir)
+   endif ()
+ endmacro ()
+ 
+-function(provide_cmake_module MODULE_NAME)
++function(orc_provide_cmake_module MODULE_NAME)
+   set(module "${CMAKE_SOURCE_DIR}/cmake_modules/${MODULE_NAME}.cmake")
+   if(EXISTS "${module}")
+     message(STATUS "Providing CMake module for ${MODULE_NAME} as part of CMake package")
+@@ -130,8 +130,8 @@ function(provide_cmake_module MODULE_NAME)
+   endif()
+ endfunction()
+ 
+-function(provide_find_module PACKAGE_NAME)
+-  provide_cmake_module("Find${PACKAGE_NAME}")
++function(orc_provide_find_module PACKAGE_NAME)
++  orc_provide_cmake_module("Find${PACKAGE_NAME}")
+ endfunction()
+ 
+ # ----------------------------------------------------------------------
+@@ -156,7 +156,7 @@ ExternalProject_Add (orc-format_ep
+ # Snappy
+ if (ORC_PACKAGE_KIND STREQUAL "conan")
+   find_package (Snappy REQUIRED CONFIG)
+-  add_resolved_library (orc_snappy ${Snappy_LIBRARIES} ${Snappy_INCLUDE_DIR})
++  orc_add_resolved_library (orc_snappy ${Snappy_LIBRARIES} ${Snappy_INCLUDE_DIR})
+   list (APPEND ORC_SYSTEM_DEPENDENCIES Snappy)
+   list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:Snappy::snappy>")
+ elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg")
+@@ -168,13 +168,13 @@ elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg")
+ elseif (NOT "${SNAPPY_HOME}" STREQUAL "")
+   find_package (Snappy REQUIRED)
+   if (ORC_PREFER_STATIC_SNAPPY AND SNAPPY_STATIC_LIB)
+-    add_resolved_library (orc_snappy ${SNAPPY_STATIC_LIB} ${SNAPPY_INCLUDE_DIR})
++    orc_add_resolved_library (orc_snappy ${SNAPPY_STATIC_LIB} ${SNAPPY_INCLUDE_DIR})
+   else ()
+-    add_resolved_library (orc_snappy ${SNAPPY_LIBRARY} ${SNAPPY_INCLUDE_DIR})
++    orc_add_resolved_library (orc_snappy ${SNAPPY_LIBRARY} ${SNAPPY_INCLUDE_DIR})
+   endif ()
+   list (APPEND ORC_SYSTEM_DEPENDENCIES Snappy)
+   list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:Snappy::snappy>")
+-  provide_find_module (Snappy)
++  orc_provide_find_module (Snappy)
+ else ()
+   set(SNAPPY_HOME "${THIRDPARTY_DIR}/snappy_ep-install")
+   set(SNAPPY_INCLUDE_DIR "${SNAPPY_HOME}/include")
+@@ -194,7 +194,7 @@ else ()
+     ${THIRDPARTY_LOG_OPTIONS}
+     BUILD_BYPRODUCTS "${SNAPPY_STATIC_LIB}")
+ 
+-  add_built_library (snappy_ep orc_snappy ${SNAPPY_STATIC_LIB} ${SNAPPY_INCLUDE_DIR})
++  orc_add_built_library (snappy_ep orc_snappy ${SNAPPY_STATIC_LIB} ${SNAPPY_INCLUDE_DIR})
+ 
+   list (APPEND ORC_VENDOR_DEPENDENCIES "orc::vendored_snappy|${SNAPPY_STATIC_LIB_NAME}")
+   list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:orc::vendored_snappy>")
+@@ -207,7 +207,7 @@ add_library (orc::snappy ALIAS orc_snappy)
+ 
+ if (ORC_PACKAGE_KIND STREQUAL "conan")
+   find_package (ZLIB REQUIRED CONFIG)
+-  add_resolved_library (orc_zlib ${ZLIB_LIBRARIES} ${ZLIB_INCLUDE_DIR})
++  orc_add_resolved_library (orc_zlib ${ZLIB_LIBRARIES} ${ZLIB_INCLUDE_DIR})
+   list (APPEND ORC_SYSTEM_DEPENDENCIES ZLIB)
+   list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:ZLIB::ZLIB>")
+ elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg")
+@@ -219,13 +219,13 @@ elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg")
+ elseif (NOT "${ZLIB_HOME}" STREQUAL "")
+   find_package (ZLIB REQUIRED)
+   if (ORC_PREFER_STATIC_ZLIB AND ZLIB_STATIC_LIB)
+-    add_resolved_library (orc_zlib ${ZLIB_STATIC_LIB} ${ZLIB_INCLUDE_DIR})
++    orc_add_resolved_library (orc_zlib ${ZLIB_STATIC_LIB} ${ZLIB_INCLUDE_DIR})
+   else ()
+-    add_resolved_library (orc_zlib ${ZLIB_LIBRARY} ${ZLIB_INCLUDE_DIR})
++    orc_add_resolved_library (orc_zlib ${ZLIB_LIBRARY} ${ZLIB_INCLUDE_DIR})
+   endif ()
+   list (APPEND ORC_SYSTEM_DEPENDENCIES ZLIB)
+   list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:ZLIB::ZLIB>")
+-  provide_find_module (ZLIB)
++  orc_provide_find_module (ZLIB)
+ else ()
+   set(ZLIB_PREFIX "${THIRDPARTY_DIR}/zlib_ep-install")
+   set(ZLIB_INCLUDE_DIR "${ZLIB_PREFIX}/include")
+@@ -252,7 +252,7 @@ else ()
+     ${THIRDPARTY_LOG_OPTIONS}
+     BUILD_BYPRODUCTS "${ZLIB_STATIC_LIB}")
+ 
+-  add_built_library (zlib_ep orc_zlib ${ZLIB_STATIC_LIB} ${ZLIB_INCLUDE_DIR})
++  orc_add_built_library (zlib_ep orc_zlib ${ZLIB_STATIC_LIB} ${ZLIB_INCLUDE_DIR})
+ 
+   list (APPEND ORC_VENDOR_DEPENDENCIES "orc::vendored_zlib|${ZLIB_STATIC_LIB_NAME}")
+   list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:orc::vendored_zlib>")
+@@ -265,7 +265,7 @@ add_library (orc::zlib ALIAS orc_zlib)
+ 
+ if (ORC_PACKAGE_KIND STREQUAL "conan")
+   find_package (ZSTD REQUIRED CONFIG)
+-  add_resolved_library (orc_zstd ${zstd_LIBRARIES} ${zstd_INCLUDE_DIR})
++  orc_add_resolved_library (orc_zstd ${zstd_LIBRARIES} ${zstd_INCLUDE_DIR})
+   list (APPEND ORC_SYSTEM_DEPENDENCIES ZSTD)
+   list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:$<IF:$<TARGET_EXISTS:zstd::libzstd_shared>,zstd::libzstd_shared,zstd::libzstd_static>>")
+ elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg")
+@@ -277,14 +277,14 @@ elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg")
+ elseif (NOT "${ZSTD_HOME}" STREQUAL "")
+   find_package (ZSTD REQUIRED)
+   if (ORC_PREFER_STATIC_ZSTD AND ZSTD_STATIC_LIB)
+-    add_resolved_library (orc_zstd ${ZSTD_STATIC_LIB} ${ZSTD_INCLUDE_DIR})
++    orc_add_resolved_library (orc_zstd ${ZSTD_STATIC_LIB} ${ZSTD_INCLUDE_DIR})
+     list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:zstd::libzstd_static>")
+   else ()
+-    add_resolved_library (orc_zstd ${ZSTD_LIBRARY} ${ZSTD_INCLUDE_DIR})
++    orc_add_resolved_library (orc_zstd ${ZSTD_LIBRARY} ${ZSTD_INCLUDE_DIR})
+     list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:$<IF:$<TARGET_EXISTS:zstd::libzstd_shared>,zstd::libzstd_shared,zstd::libzstd_static>>")
+   endif ()
+   list (APPEND ORC_SYSTEM_DEPENDENCIES ZSTD)
+-  provide_find_module (ZSTD)
++  orc_provide_find_module (ZSTD)
+ else ()
+   set(ZSTD_HOME "${THIRDPARTY_DIR}/zstd_ep-install")
+   set(ZSTD_INCLUDE_DIR "${ZSTD_HOME}/include")
+@@ -318,7 +318,7 @@ else ()
+           ${THIRDPARTY_LOG_OPTIONS}
+           BUILD_BYPRODUCTS ${ZSTD_STATIC_LIB})
+ 
+-  add_built_library (zstd_ep orc_zstd ${ZSTD_STATIC_LIB} ${ZSTD_INCLUDE_DIR})
++  orc_add_built_library (zstd_ep orc_zstd ${ZSTD_STATIC_LIB} ${ZSTD_INCLUDE_DIR})
+ 
+   list (APPEND ORC_VENDOR_DEPENDENCIES "orc::vendored_zstd|${ZSTD_STATIC_LIB_NAME}")
+   list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:orc::vendored_zstd>")
+@@ -330,7 +330,7 @@ add_library (orc::zstd ALIAS orc_zstd)
+ # LZ4
+ if (ORC_PACKAGE_KIND STREQUAL "conan")
+   find_package (LZ4 REQUIRED CONFIG)
+-  add_resolved_library (orc_lz4 ${lz4_LIBRARIES} ${lz4_INCLUDE_DIR})
++  orc_add_resolved_library (orc_lz4 ${lz4_LIBRARIES} ${lz4_INCLUDE_DIR})
+   list (APPEND ORC_SYSTEM_DEPENDENCIES LZ4)
+   list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:LZ4::lz4>")
+ elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg")
+@@ -342,13 +342,13 @@ elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg")
+ elseif (NOT "${LZ4_HOME}" STREQUAL "")
+   find_package (LZ4 REQUIRED)
+   if (ORC_PREFER_STATIC_LZ4 AND LZ4_STATIC_LIB)
+-    add_resolved_library (orc_lz4 ${LZ4_STATIC_LIB} ${LZ4_INCLUDE_DIR})
++    orc_add_resolved_library (orc_lz4 ${LZ4_STATIC_LIB} ${LZ4_INCLUDE_DIR})
+   else ()
+-    add_resolved_library (orc_lz4 ${LZ4_LIBRARY} ${LZ4_INCLUDE_DIR})
++    orc_add_resolved_library (orc_lz4 ${LZ4_LIBRARY} ${LZ4_INCLUDE_DIR})
+   endif ()
+   list (APPEND ORC_SYSTEM_DEPENDENCIES LZ4)
+   list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:LZ4::lz4>")
+-  provide_find_module (LZ4)
++  orc_provide_find_module (LZ4)
+ else ()
+   set(LZ4_PREFIX "${THIRDPARTY_DIR}/lz4_ep-install")
+   set(LZ4_INCLUDE_DIR "${LZ4_PREFIX}/include")
+@@ -375,7 +375,7 @@ else ()
+     ${THIRDPARTY_LOG_OPTIONS}
+     BUILD_BYPRODUCTS ${LZ4_STATIC_LIB})
+ 
+-  add_built_library (lz4_ep orc_lz4 ${LZ4_STATIC_LIB} ${LZ4_INCLUDE_DIR})
++  orc_add_built_library (lz4_ep orc_lz4 ${LZ4_STATIC_LIB} ${LZ4_INCLUDE_DIR})
+ 
+   list (APPEND ORC_VENDOR_DEPENDENCIES "orc::vendored_lz4|${LZ4_STATIC_LIB_NAME}")
+   list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:orc::vendored_lz4>")
+@@ -491,7 +491,7 @@ endif ()
+ 
+ if (ORC_PACKAGE_KIND STREQUAL "conan")
+   find_package (Protobuf REQUIRED CONFIG)
+-  add_resolved_library (orc_protobuf ${protobuf_LIBRARIES} ${protobuf_INCLUDE_DIR})
++  orc_add_resolved_library (orc_protobuf ${protobuf_LIBRARIES} ${protobuf_INCLUDE_DIR})
+   list (APPEND ORC_SYSTEM_DEPENDENCIES Protobuf)
+   list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:protobuf::libprotobuf>")
+ elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg")
+@@ -505,20 +505,20 @@ elseif (NOT "${PROTOBUF_HOME}" STREQUAL "")
+   find_package (Protobuf REQUIRED)
+ 
+   if (ORC_PREFER_STATIC_PROTOBUF AND PROTOBUF_STATIC_LIB)
+-    add_resolved_library (orc_protobuf ${PROTOBUF_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR})
++    orc_add_resolved_library (orc_protobuf ${PROTOBUF_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR})
+   else ()
+-    add_resolved_library (orc_protobuf ${PROTOBUF_LIBRARY} ${PROTOBUF_INCLUDE_DIR})
++    orc_add_resolved_library (orc_protobuf ${PROTOBUF_LIBRARY} ${PROTOBUF_INCLUDE_DIR})
+   endif ()
+ 
+   if (ORC_PREFER_STATIC_PROTOBUF AND PROTOC_STATIC_LIB)
+-    add_resolved_library (orc_protoc ${PROTOC_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR})
++    orc_add_resolved_library (orc_protoc ${PROTOC_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR})
+   else ()
+-    add_resolved_library (orc_protoc ${PROTOC_LIBRARY} ${PROTOBUF_INCLUDE_DIR})
++    orc_add_resolved_library (orc_protoc ${PROTOC_LIBRARY} ${PROTOBUF_INCLUDE_DIR})
+   endif ()
+ 
+   list (APPEND ORC_SYSTEM_DEPENDENCIES Protobuf)
+   list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:protobuf::libprotobuf>")
+-  provide_find_module (Protobuf)
++  orc_provide_find_module (Protobuf)
+ else ()
+   set(PROTOBUF_PREFIX "${THIRDPARTY_DIR}/protobuf_ep-install")
+   set(PROTOBUF_INCLUDE_DIR "${PROTOBUF_PREFIX}/include")
+@@ -556,8 +556,8 @@ else ()
+     ${THIRDPARTY_LOG_OPTIONS}
+     BUILD_BYPRODUCTS "${PROTOBUF_STATIC_LIB}" "${PROTOC_STATIC_LIB}")
+ 
+-  add_built_library (protobuf_ep orc_protobuf ${PROTOBUF_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR})
+-  add_built_library (protobuf_ep orc_protoc ${PROTOC_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR})
++  orc_add_built_library (protobuf_ep orc_protobuf ${PROTOBUF_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR})
++  orc_add_built_library (protobuf_ep orc_protoc ${PROTOC_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR})
+ 
+   list (APPEND ORC_VENDOR_DEPENDENCIES "orc::vendored_protobuf|${PROTOBUF_STATIC_LIB_NAME}")
+   list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:orc::vendored_protobuf>")
+@@ -610,7 +610,7 @@ if(BUILD_LIBHDFSPP)
+         BUILD_BYPRODUCTS "${LIBHDFSPP_STATIC_LIB}"
+         CMAKE_ARGS ${LIBHDFSPP_CMAKE_ARGS})
+ 
+-      add_built_library(libhdfspp_ep libhdfspp ${LIBHDFSPP_STATIC_LIB} ${LIBHDFSPP_INCLUDE_DIR})
++      orc_add_built_library(libhdfspp_ep libhdfspp ${LIBHDFSPP_STATIC_LIB} ${LIBHDFSPP_INCLUDE_DIR})
+ 
+       set (LIBHDFSPP_LIBRARIES
+            libhdfspp
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 6e2294371e7a6..eb9860b240f16 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -771,13 +771,14 @@ if(ARROW_COMPUTE)
        compute/kernels/scalar_validity.cc
        compute/kernels/vector_array_sort.cc
        compute/kernels/vector_cumulative_ops.cc
-       compute/kernels/vector_pairwise.cc
        compute/kernels/vector_nested.cc
+       compute/kernels/vector_pairwise.cc
        compute/kernels/vector_rank.cc
        compute/kernels/vector_replace.cc
        compute/kernels/vector_run_end_encode.cc
        compute/kernels/vector_select_k.cc
        compute/kernels/vector_sort.cc
+       compute/kernels/vector_swizzle.cc
        compute/key_hash_internal.cc
        compute/key_map_internal.cc
        compute/light_array_internal.cc
diff --git a/cpp/src/arrow/acero/CMakeLists.txt b/cpp/src/arrow/acero/CMakeLists.txt
index 0a2536b11e33c..54269f1df0eb6 100644
--- a/cpp/src/arrow/acero/CMakeLists.txt
+++ b/cpp/src/arrow/acero/CMakeLists.txt
@@ -117,7 +117,7 @@ if(ARROW_TESTING)
   if(ARROW_WITH_OPENTELEMETRY)
     target_link_libraries(arrow_acero_testing PRIVATE ${ARROW_OPENTELEMETRY_LIBS})
   endif()
-  list(APPEND ARROW_ACERO_TEST_LINK_LIBS arrow_acero_testing)
+  list(APPEND ARROW_ACERO_TEST_LINK_LIBS arrow_acero_testing arrow_compute_testing)
 endif()
 # Only for hash_aggregate_test.cc.
 if(ARROW_USE_BOOST)
diff --git a/cpp/src/arrow/acero/accumulation_queue.h b/cpp/src/arrow/acero/accumulation_queue.h
index a173f9840388f..92d62d5d99d16 100644
--- a/cpp/src/arrow/acero/accumulation_queue.h
+++ b/cpp/src/arrow/acero/accumulation_queue.h
@@ -22,6 +22,7 @@
 #include <optional>
 #include <vector>
 
+#include "arrow/acero/visibility.h"
 #include "arrow/compute/exec.h"
 #include "arrow/result.h"
 
@@ -70,7 +71,7 @@ class AccumulationQueue {
 /// For example, in a top-n node, the process callback should determine how many
 /// rows need to be delivered for the given batch, and then return a task to actually
 /// deliver those rows.
-class SequencingQueue {
+class ARROW_ACERO_EXPORT SequencingQueue {
  public:
   using Task = std::function<Status()>;
 
@@ -123,7 +124,7 @@ class SequencingQueue {
 ///
 /// It can be helpful to think of this as if a dedicated thread is running Process as
 /// batches arrive
-class SerialSequencingQueue {
+class ARROW_ACERO_EXPORT SerialSequencingQueue {
  public:
   /// Strategy that describes how to handle items
   class Processor {
diff --git a/cpp/src/arrow/acero/aggregate_node.h b/cpp/src/arrow/acero/aggregate_node.h
index 790264b208305..0c6fea16a8acc 100644
--- a/cpp/src/arrow/acero/aggregate_node.h
+++ b/cpp/src/arrow/acero/aggregate_node.h
@@ -24,6 +24,7 @@
 
 #include "arrow/acero/visibility.h"
 #include "arrow/compute/api_aggregate.h"
+#include "arrow/compute/test_util_internal.h"
 #include "arrow/compute/type_fwd.h"
 #include "arrow/result.h"
 #include "arrow/type_fwd.h"
diff --git a/cpp/src/arrow/acero/aggregate_node_test.cc b/cpp/src/arrow/acero/aggregate_node_test.cc
index c623271db9fb4..f980496d527d1 100644
--- a/cpp/src/arrow/acero/aggregate_node_test.cc
+++ b/cpp/src/arrow/acero/aggregate_node_test.cc
@@ -24,6 +24,7 @@
 
 #include "arrow/acero/test_util_internal.h"
 #include "arrow/compute/api_aggregate.h"
+#include "arrow/compute/test_util_internal.h"
 #include "arrow/result.h"
 #include "arrow/table.h"
 #include "arrow/testing/gtest_util.h"
@@ -32,6 +33,8 @@
 
 namespace arrow {
 
+using compute::ExecBatchFromJSON;
+
 namespace acero {
 
 Result<std::shared_ptr<Table>> TableGroupBy(
diff --git a/cpp/src/arrow/acero/asof_join_node_test.cc b/cpp/src/arrow/acero/asof_join_node_test.cc
index 64d41ccb1ab20..c726ac7c821a7 100644
--- a/cpp/src/arrow/acero/asof_join_node_test.cc
+++ b/cpp/src/arrow/acero/asof_join_node_test.cc
@@ -41,8 +41,9 @@
 #include "arrow/acero/util.h"
 #include "arrow/api.h"
 #include "arrow/compute/api_scalar.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/cast.h"
 #include "arrow/compute/row/row_encoder_internal.h"
+#include "arrow/compute/test_util_internal.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/matchers.h"
 #include "arrow/testing/random.h"
@@ -67,6 +68,7 @@ namespace arrow {
 
 using compute::Cast;
 using compute::Divide;
+using compute::ExecBatchFromJSON;
 using compute::Multiply;
 using compute::Subtract;
 
diff --git a/cpp/src/arrow/acero/hash_aggregate_test.cc b/cpp/src/arrow/acero/hash_aggregate_test.cc
index 1e2975afc91b3..7f4b6dd75272f 100644
--- a/cpp/src/arrow/acero/hash_aggregate_test.cc
+++ b/cpp/src/arrow/acero/hash_aggregate_test.cc
@@ -42,7 +42,6 @@
 #include "arrow/compute/kernels/codegen_internal.h"
 #include "arrow/compute/registry.h"
 #include "arrow/compute/row/grouper.h"
-#include "arrow/compute/row/grouper_internal.h"
 #include "arrow/table.h"
 #include "arrow/testing/generator.h"
 #include "arrow/testing/gtest_util.h"
@@ -70,9 +69,11 @@ using internal::checked_cast;
 using internal::checked_pointer_cast;
 using internal::ToChars;
 
+using compute::ArgShape;
 using compute::CallFunction;
 using compute::CountOptions;
 using compute::default_exec_context;
+using compute::ExecBatchFromJSON;
 using compute::ExecSpan;
 using compute::FunctionOptions;
 using compute::Grouper;
@@ -84,6 +85,7 @@ using compute::SortKey;
 using compute::SortOrder;
 using compute::Take;
 using compute::TDigestOptions;
+using compute::ValidateOutput;
 using compute::VarianceOptions;
 
 namespace acero {
@@ -159,8 +161,6 @@ TEST(AggregateSchema, SingleKeyAndSegmentKey) {
       output_schema);
 }
 
-namespace {
-
 using GroupByFunction = std::function<Result<Datum>(
     const std::vector<Datum>&, const std::vector<Datum>&, const std::vector<Datum>&,
     const std::vector<Aggregate>&, bool, bool)>;
@@ -538,930 +538,6 @@ Result<Datum> GroupByTest(GroupByFunction group_by, const std::vector<Datum>& ar
   return GroupByTest(group_by, arguments, keys, {}, aggregates, use_threads);
 }
 
-template <typename GroupClass>
-void TestGroupClassSupportedKeys(
-    std::function<Result<std::unique_ptr<GroupClass>>(const std::vector<TypeHolder>&)>
-        make_func) {
-  ASSERT_OK(make_func({boolean()}));
-
-  ASSERT_OK(make_func({int8(), uint16(), int32(), uint64()}));
-
-  ASSERT_OK(make_func({dictionary(int64(), utf8())}));
-
-  ASSERT_OK(make_func({float16(), float32(), float64()}));
-
-  ASSERT_OK(make_func({utf8(), binary(), large_utf8(), large_binary()}));
-
-  ASSERT_OK(make_func({fixed_size_binary(16), fixed_size_binary(32)}));
-
-  ASSERT_OK(make_func({decimal128(32, 10), decimal256(76, 20)}));
-
-  ASSERT_OK(make_func({date32(), date64()}));
-
-  for (auto unit : {
-           TimeUnit::SECOND,
-           TimeUnit::MILLI,
-           TimeUnit::MICRO,
-           TimeUnit::NANO,
-       }) {
-    ASSERT_OK(make_func({timestamp(unit), duration(unit)}));
-  }
-
-  ASSERT_OK(
-      make_func({day_time_interval(), month_interval(), month_day_nano_interval()}));
-
-  ASSERT_OK(make_func({null()}));
-
-  ASSERT_RAISES(NotImplemented, make_func({struct_({field("", int64())})}));
-
-  ASSERT_RAISES(NotImplemented, make_func({struct_({})}));
-
-  ASSERT_RAISES(NotImplemented, make_func({list(int32())}));
-
-  ASSERT_RAISES(NotImplemented, make_func({fixed_size_list(int32(), 5)}));
-
-  ASSERT_RAISES(NotImplemented, make_func({dense_union({field("", int32())})}));
-}
-
-void TestSegments(std::unique_ptr<RowSegmenter>& segmenter, const ExecSpan& batch,
-                  std::vector<Segment> expected_segments) {
-  ASSERT_OK_AND_ASSIGN(auto actual_segments, segmenter->GetSegments(batch));
-  ASSERT_EQ(actual_segments.size(), expected_segments.size());
-  for (size_t i = 0; i < actual_segments.size(); ++i) {
-    SCOPED_TRACE("segment #" + ToChars(i));
-    ASSERT_EQ(actual_segments[i], expected_segments[i]);
-  }
-}
-
-Result<std::unique_ptr<Grouper>> MakeGrouper(const std::vector<TypeHolder>& key_types) {
-  return Grouper::Make(key_types, default_exec_context());
-}
-
-Result<std::unique_ptr<RowSegmenter>> MakeRowSegmenter(
-    const std::vector<TypeHolder>& key_types) {
-  return RowSegmenter::Make(key_types, /*nullable_leys=*/false, default_exec_context());
-}
-
-Result<std::unique_ptr<RowSegmenter>> MakeGenericSegmenter(
-    const std::vector<TypeHolder>& key_types) {
-  return MakeAnyKeysSegmenter(key_types, default_exec_context());
-}
-
-}  // namespace
-
-TEST(RowSegmenter, SupportedKeys) {
-  TestGroupClassSupportedKeys<RowSegmenter>(MakeRowSegmenter);
-}
-
-TEST(RowSegmenter, Basics) {
-  std::vector<TypeHolder> bad_types2 = {int32(), float32()};
-  std::vector<TypeHolder> types2 = {int32(), int32()};
-  std::vector<TypeHolder> bad_types1 = {float32()};
-  std::vector<TypeHolder> types1 = {int32()};
-  std::vector<TypeHolder> types0 = {};
-  auto batch2 = ExecBatchFromJSON(types2, "[[1, 1], [1, 2], [2, 2]]");
-  auto batch1 = ExecBatchFromJSON(types1, "[[1], [1], [2]]");
-  ExecBatch batch0({}, 3);
-  {
-    SCOPED_TRACE("types0 segmenting of batch2");
-    ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types0));
-    ExecSpan span2(batch2);
-    EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, HasSubstr("expected batch size 0 "),
-                                    segmenter->GetSegments(span2));
-    ExecSpan span0(batch0);
-    TestSegments(segmenter, span0, {{0, 3, true, true}});
-  }
-  {
-    SCOPED_TRACE("bad_types1 segmenting of batch1");
-    ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(bad_types1));
-    ExecSpan span1(batch1);
-    EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, HasSubstr("expected batch value 0 of type "),
-                                    segmenter->GetSegments(span1));
-  }
-  {
-    SCOPED_TRACE("types1 segmenting of batch2");
-    ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types1));
-    ExecSpan span2(batch2);
-    EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, HasSubstr("expected batch size 1 "),
-                                    segmenter->GetSegments(span2));
-    ExecSpan span1(batch1);
-    TestSegments(segmenter, span1, {{0, 2, false, true}, {2, 1, true, false}});
-  }
-  {
-    SCOPED_TRACE("bad_types2 segmenting of batch2");
-    ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(bad_types2));
-    ExecSpan span2(batch2);
-    EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, HasSubstr("expected batch value 1 of type "),
-                                    segmenter->GetSegments(span2));
-  }
-  {
-    SCOPED_TRACE("types2 segmenting of batch1");
-    ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types2));
-    ExecSpan span1(batch1);
-    EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, HasSubstr("expected batch size 2 "),
-                                    segmenter->GetSegments(span1));
-    ExecSpan span2(batch2);
-    TestSegments(segmenter, span2,
-                 {{0, 1, false, true}, {1, 1, false, false}, {2, 1, true, false}});
-  }
-}
-
-TEST(RowSegmenter, NonOrdered) {
-  for (int num_keys = 1; num_keys <= 2; ++num_keys) {
-    SCOPED_TRACE("non-ordered " + ToChars(num_keys) + " int32(s)");
-    std::vector<TypeHolder> types(num_keys, int32());
-    std::vector<Datum> values(num_keys, ArrayFromJSON(int32(), "[1, 1, 2, 1, 2]"));
-    ExecBatch batch(std::move(values), 5);
-    ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types));
-    TestSegments(segmenter, ExecSpan(batch),
-                 {{0, 2, false, true},
-                  {2, 1, false, false},
-                  {3, 1, false, false},
-                  {4, 1, true, false}});
-  }
-}
-
-TEST(RowSegmenter, EmptyBatches) {
-  {
-    SCOPED_TRACE("empty batches {int32}");
-    std::vector<TypeHolder> types = {int32()};
-    std::vector<ExecBatch> batches = {
-        ExecBatchFromJSON(types, "[]"),         ExecBatchFromJSON(types, "[]"),
-        ExecBatchFromJSON(types, "[[1]]"),      ExecBatchFromJSON(types, "[]"),
-        ExecBatchFromJSON(types, "[[1]]"),      ExecBatchFromJSON(types, "[]"),
-        ExecBatchFromJSON(types, "[[2], [2]]"), ExecBatchFromJSON(types, "[]"),
-    };
-    ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types));
-    TestSegments(segmenter, ExecSpan(batches[0]), {});
-    TestSegments(segmenter, ExecSpan(batches[1]), {});
-    TestSegments(segmenter, ExecSpan(batches[2]), {{0, 1, true, true}});
-    TestSegments(segmenter, ExecSpan(batches[3]), {});
-    TestSegments(segmenter, ExecSpan(batches[4]), {{0, 1, true, true}});
-    TestSegments(segmenter, ExecSpan(batches[5]), {});
-    TestSegments(segmenter, ExecSpan(batches[6]), {{0, 2, true, false}});
-    TestSegments(segmenter, ExecSpan(batches[7]), {});
-  }
-  {
-    SCOPED_TRACE("empty batches {int32, int32}");
-    std::vector<TypeHolder> types = {int32(), int32()};
-    std::vector<ExecBatch> batches = {
-        ExecBatchFromJSON(types, "[]"),
-        ExecBatchFromJSON(types, "[]"),
-        ExecBatchFromJSON(types, "[[1, 1]]"),
-        ExecBatchFromJSON(types, "[]"),
-        ExecBatchFromJSON(types, "[[1, 1]]"),
-        ExecBatchFromJSON(types, "[]"),
-        ExecBatchFromJSON(types, "[[2, 2], [2, 2]]"),
-        ExecBatchFromJSON(types, "[]"),
-    };
-    ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types));
-    TestSegments(segmenter, ExecSpan(batches[0]), {});
-    TestSegments(segmenter, ExecSpan(batches[1]), {});
-    TestSegments(segmenter, ExecSpan(batches[2]), {{0, 1, true, true}});
-    TestSegments(segmenter, ExecSpan(batches[3]), {});
-    TestSegments(segmenter, ExecSpan(batches[4]), {{0, 1, true, true}});
-    TestSegments(segmenter, ExecSpan(batches[5]), {});
-    TestSegments(segmenter, ExecSpan(batches[6]), {{0, 2, true, false}});
-    TestSegments(segmenter, ExecSpan(batches[7]), {});
-  }
-}
-
-TEST(RowSegmenter, MultipleSegments) {
-  auto test_with_keys = [](int num_keys, const std::shared_ptr<Array>& key) {
-    SCOPED_TRACE("multiple segments " + ToChars(num_keys) + " " +
-                 key->type()->ToString());
-    std::vector<TypeHolder> types(num_keys, key->type());
-    std::vector<Datum> values(num_keys, key);
-    ExecBatch batch(std::move(values), key->length());
-    ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types));
-    TestSegments(segmenter, ExecSpan(batch),
-                 {{0, 2, false, true},
-                  {2, 1, false, false},
-                  {3, 1, false, false},
-                  {4, 2, false, false},
-                  {6, 2, false, false},
-                  {8, 1, true, false}});
-  };
-  for (int num_keys = 1; num_keys <= 2; ++num_keys) {
-    test_with_keys(num_keys, ArrayFromJSON(int32(), "[1, 1, 2, 5, 3, 3, 5, 5, 4]"));
-    test_with_keys(
-        num_keys,
-        ArrayFromJSON(fixed_size_binary(2),
-                      R"(["aa", "aa", "bb", "ee", "cc", "cc", "ee", "ee", "dd"])"));
-    test_with_keys(num_keys, DictArrayFromJSON(dictionary(int8(), utf8()),
-                                               "[0, 0, 1, 4, 2, 2, 4, 4, 3]",
-                                               R"(["a", "b", "c", "d", "e"])"));
-  }
-}
-
-TEST(RowSegmenter, MultipleSegmentsMultipleBatches) {
-  {
-    SCOPED_TRACE("multiple segments multiple batches {int32}");
-    std::vector<TypeHolder> types = {int32()};
-    std::vector<ExecBatch> batches = {
-        ExecBatchFromJSON(types, "[[1]]"), ExecBatchFromJSON(types, "[[1], [2]]"),
-        ExecBatchFromJSON(types, "[[5], [3]]"),
-        ExecBatchFromJSON(types, "[[3], [5], [5]]"), ExecBatchFromJSON(types, "[[4]]")};
-
-    ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types));
-    TestSegments(segmenter, ExecSpan(batches[0]), {{0, 1, true, true}});
-    TestSegments(segmenter, ExecSpan(batches[1]),
-                 {{0, 1, false, true}, {1, 1, true, false}});
-    TestSegments(segmenter, ExecSpan(batches[2]),
-                 {{0, 1, false, false}, {1, 1, true, false}});
-    TestSegments(segmenter, ExecSpan(batches[3]),
-                 {{0, 1, false, true}, {1, 2, true, false}});
-    TestSegments(segmenter, ExecSpan(batches[4]), {{0, 1, true, false}});
-  }
-  {
-    SCOPED_TRACE("multiple segments multiple batches {int32, int32}");
-    std::vector<TypeHolder> types = {int32(), int32()};
-    std::vector<ExecBatch> batches = {
-        ExecBatchFromJSON(types, "[[1, 1]]"),
-        ExecBatchFromJSON(types, "[[1, 1], [2, 2]]"),
-        ExecBatchFromJSON(types, "[[5, 5], [3, 3]]"),
-        ExecBatchFromJSON(types, "[[3, 3], [5, 5], [5, 5]]"),
-        ExecBatchFromJSON(types, "[[4, 4]]")};
-
-    ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types));
-    TestSegments(segmenter, ExecSpan(batches[0]), {{0, 1, true, true}});
-    TestSegments(segmenter, ExecSpan(batches[1]),
-                 {{0, 1, false, true}, {1, 1, true, false}});
-    TestSegments(segmenter, ExecSpan(batches[2]),
-                 {{0, 1, false, false}, {1, 1, true, false}});
-    TestSegments(segmenter, ExecSpan(batches[3]),
-                 {{0, 1, false, true}, {1, 2, true, false}});
-    TestSegments(segmenter, ExecSpan(batches[4]), {{0, 1, true, false}});
-  }
-}
-
-namespace {
-
-void TestRowSegmenterConstantBatch(
-    const std::shared_ptr<DataType>& type,
-    std::function<ArgShape(int64_t key)> shape_func,
-    std::function<Result<std::shared_ptr<Scalar>>(int64_t key)> value_func,
-    std::function<Result<std::unique_ptr<RowSegmenter>>(const std::vector<TypeHolder>&)>
-        make_segmenter) {
-  constexpr int64_t n_keys = 3, n_rows = 3, repetitions = 3;
-  std::vector<TypeHolder> types(n_keys, type);
-  std::vector<Datum> full_values(n_keys);
-  for (int64_t i = 0; i < n_keys; i++) {
-    auto shape = shape_func(i);
-    ASSERT_OK_AND_ASSIGN(auto scalar, value_func(i));
-    if (shape == ArgShape::SCALAR) {
-      full_values[i] = std::move(scalar);
-    } else {
-      ASSERT_OK_AND_ASSIGN(full_values[i], MakeArrayFromScalar(*scalar, n_rows));
-    }
-  }
-  auto test_with_keys = [&](int64_t keys) -> Status {
-    SCOPED_TRACE("constant-batch with " + ToChars(keys) + " key(s)");
-    std::vector<Datum> values(full_values.begin(), full_values.begin() + keys);
-    ExecBatch batch(values, n_rows);
-    std::vector<TypeHolder> key_types(types.begin(), types.begin() + keys);
-    ARROW_ASSIGN_OR_RAISE(auto segmenter, make_segmenter(key_types));
-    for (int64_t i = 0; i < repetitions; i++) {
-      TestSegments(segmenter, ExecSpan(batch), {{0, n_rows, true, true}});
-      ARROW_RETURN_NOT_OK(segmenter->Reset());
-    }
-    return Status::OK();
-  };
-  for (int64_t i = 0; i <= n_keys; i++) {
-    ASSERT_OK(test_with_keys(i));
-  }
-}
-
-}  // namespace
-
-TEST(RowSegmenter, ConstantArrayBatch) {
-  TestRowSegmenterConstantBatch(
-      int32(), [](int64_t key) { return ArgShape::ARRAY; },
-      [](int64_t key) { return MakeScalar(1); }, MakeRowSegmenter);
-}
-
-TEST(RowSegmenter, ConstantScalarBatch) {
-  TestRowSegmenterConstantBatch(
-      int32(), [](int64_t key) { return ArgShape::SCALAR; },
-      [](int64_t key) { return MakeScalar(1); }, MakeRowSegmenter);
-}
-
-TEST(RowSegmenter, ConstantMixedBatch) {
-  TestRowSegmenterConstantBatch(
-      int32(),
-      [](int64_t key) { return key % 2 == 0 ? ArgShape::SCALAR : ArgShape::ARRAY; },
-      [](int64_t key) { return MakeScalar(1); }, MakeRowSegmenter);
-}
-
-TEST(RowSegmenter, ConstantArrayBatchWithAnyKeysSegmenter) {
-  TestRowSegmenterConstantBatch(
-      int32(), [](int64_t key) { return ArgShape::ARRAY; },
-      [](int64_t key) { return MakeScalar(1); }, MakeGenericSegmenter);
-}
-
-TEST(RowSegmenter, ConstantScalarBatchWithAnyKeysSegmenter) {
-  TestRowSegmenterConstantBatch(
-      int32(), [](int64_t key) { return ArgShape::SCALAR; },
-      [](int64_t key) { return MakeScalar(1); }, MakeGenericSegmenter);
-}
-
-TEST(RowSegmenter, ConstantMixedBatchWithAnyKeysSegmenter) {
-  TestRowSegmenterConstantBatch(
-      int32(),
-      [](int64_t key) { return key % 2 == 0 ? ArgShape::SCALAR : ArgShape::ARRAY; },
-      [](int64_t key) { return MakeScalar(1); }, MakeGenericSegmenter);
-}
-
-TEST(RowSegmenter, ConstantFixedSizeBinaryArrayBatch) {
-  constexpr int fsb = 8;
-  auto type = fixed_size_binary(fsb);
-  ASSERT_OK_AND_ASSIGN(auto value, MakeScalar(type, std::string(fsb, 'X')));
-  TestRowSegmenterConstantBatch(
-      type, [](int64_t key) { return ArgShape::ARRAY; },
-      [&](int64_t key) { return value; }, MakeRowSegmenter);
-}
-
-TEST(RowSegmenter, ConstantFixedSizeBinaryScalarBatch) {
-  constexpr int fsb = 8;
-  auto type = fixed_size_binary(fsb);
-  ASSERT_OK_AND_ASSIGN(auto value, MakeScalar(type, std::string(fsb, 'X')));
-  TestRowSegmenterConstantBatch(
-      fixed_size_binary(8), [](int64_t key) { return ArgShape::SCALAR; },
-      [&](int64_t key) { return value; }, MakeRowSegmenter);
-}
-
-TEST(RowSegmenter, ConstantFixedSizeBinaryMixedBatch) {
-  constexpr int fsb = 8;
-  auto type = fixed_size_binary(fsb);
-  ASSERT_OK_AND_ASSIGN(auto value, MakeScalar(type, std::string(fsb, 'X')));
-  TestRowSegmenterConstantBatch(
-      fixed_size_binary(8),
-      [](int64_t key) { return key % 2 == 0 ? ArgShape::SCALAR : ArgShape::ARRAY; },
-      [&](int64_t key) { return value; }, MakeRowSegmenter);
-}
-
-TEST(RowSegmenter, ConstantFixedSizeBinaryArrayBatchWithAnyKeysSegmenter) {
-  constexpr int fsb = 8;
-  auto type = fixed_size_binary(fsb);
-  ASSERT_OK_AND_ASSIGN(auto value, MakeScalar(type, std::string(fsb, 'X')));
-  TestRowSegmenterConstantBatch(
-      type, [](int64_t key) { return ArgShape::ARRAY; },
-      [&](int64_t key) { return value; }, MakeGenericSegmenter);
-}
-
-TEST(RowSegmenter, ConstantFixedSizeBinaryScalarBatchWithAnyKeysSegmenter) {
-  constexpr int fsb = 8;
-  auto type = fixed_size_binary(fsb);
-  ASSERT_OK_AND_ASSIGN(auto value, MakeScalar(type, std::string(fsb, 'X')));
-  TestRowSegmenterConstantBatch(
-      fixed_size_binary(8), [](int64_t key) { return ArgShape::SCALAR; },
-      [&](int64_t key) { return value; }, MakeGenericSegmenter);
-}
-
-TEST(RowSegmenter, ConstantFixedSizeBinaryMixedBatchWithAnyKeysSegmenter) {
-  constexpr int fsb = 8;
-  auto type = fixed_size_binary(fsb);
-  ASSERT_OK_AND_ASSIGN(auto value, MakeScalar(type, std::string(fsb, 'X')));
-  TestRowSegmenterConstantBatch(
-      fixed_size_binary(8),
-      [](int64_t key) { return key % 2 == 0 ? ArgShape::SCALAR : ArgShape::ARRAY; },
-      [&](int64_t key) { return value; }, MakeGenericSegmenter);
-}
-
-TEST(RowSegmenter, ConstantDictionaryArrayBatch) {
-  auto index_type = int32();
-  auto value_type = utf8();
-  auto dict_type = dictionary(index_type, value_type);
-  auto dict = ArrayFromJSON(value_type, R"(["alpha", null, "gamma"])");
-  ASSERT_OK_AND_ASSIGN(auto index_value, MakeScalar(index_type, 0));
-  auto dict_value = DictionaryScalar::Make(std::move(index_value), dict);
-  TestRowSegmenterConstantBatch(
-      dict_type, [](int64_t key) { return ArgShape::ARRAY; },
-      [&](int64_t key) { return dict_value; }, MakeRowSegmenter);
-}
-
-TEST(RowSegmenter, ConstantDictionaryScalarBatch) {
-  auto index_type = int32();
-  auto value_type = utf8();
-  auto dict_type = dictionary(index_type, value_type);
-  auto dict = ArrayFromJSON(value_type, R"(["alpha", null, "gamma"])");
-  ASSERT_OK_AND_ASSIGN(auto index_value, MakeScalar(index_type, 0));
-  auto dict_value = DictionaryScalar::Make(std::move(index_value), dict);
-  TestRowSegmenterConstantBatch(
-      dict_type, [](int64_t key) { return ArgShape::SCALAR; },
-      [&](int64_t key) { return dict_value; }, MakeRowSegmenter);
-}
-
-TEST(RowSegmenter, ConstantDictionaryMixedBatch) {
-  auto index_type = int32();
-  auto value_type = utf8();
-  auto dict_type = dictionary(index_type, value_type);
-  auto dict = ArrayFromJSON(value_type, R"(["alpha", null, "gamma"])");
-  ASSERT_OK_AND_ASSIGN(auto index_value, MakeScalar(index_type, 0));
-  auto dict_value = DictionaryScalar::Make(std::move(index_value), dict);
-  TestRowSegmenterConstantBatch(
-      dict_type,
-      [](int64_t key) { return key % 2 == 0 ? ArgShape::SCALAR : ArgShape::ARRAY; },
-      [&](int64_t key) { return dict_value; }, MakeRowSegmenter);
-}
-
-TEST(RowSegmenter, ConstantDictionaryArrayBatchWithAnyKeysSegmenter) {
-  auto index_type = int32();
-  auto value_type = utf8();
-  auto dict_type = dictionary(index_type, value_type);
-  auto dict = ArrayFromJSON(value_type, R"(["alpha", null, "gamma"])");
-  ASSERT_OK_AND_ASSIGN(auto index_value, MakeScalar(index_type, 0));
-  auto dict_value = DictionaryScalar::Make(std::move(index_value), dict);
-  TestRowSegmenterConstantBatch(
-      dict_type, [](int64_t key) { return ArgShape::ARRAY; },
-      [&](int64_t key) { return dict_value; }, MakeGenericSegmenter);
-}
-
-TEST(RowSegmenter, ConstantDictionaryScalarBatchWithAnyKeysSegmenter) {
-  auto index_type = int32();
-  auto value_type = utf8();
-  auto dict_type = dictionary(index_type, value_type);
-  auto dict = ArrayFromJSON(value_type, R"(["alpha", null, "gamma"])");
-  ASSERT_OK_AND_ASSIGN(auto index_value, MakeScalar(index_type, 0));
-  auto dict_value = DictionaryScalar::Make(std::move(index_value), dict);
-  TestRowSegmenterConstantBatch(
-      dict_type, [](int64_t key) { return ArgShape::SCALAR; },
-      [&](int64_t key) { return dict_value; }, MakeGenericSegmenter);
-}
-
-TEST(RowSegmenter, ConstantDictionaryMixedBatchWithAnyKeysSegmenter) {
-  auto index_type = int32();
-  auto value_type = utf8();
-  auto dict_type = dictionary(index_type, value_type);
-  auto dict = ArrayFromJSON(value_type, R"(["alpha", null, "gamma"])");
-  ASSERT_OK_AND_ASSIGN(auto index_value, MakeScalar(index_type, 0));
-  auto dict_value = DictionaryScalar::Make(std::move(index_value), dict);
-  TestRowSegmenterConstantBatch(
-      dict_type,
-      [](int64_t key) { return key % 2 == 0 ? ArgShape::SCALAR : ArgShape::ARRAY; },
-      [&](int64_t key) { return dict_value; }, MakeGenericSegmenter);
-}
-
-TEST(RowSegmenter, RowConstantBatch) {
-  constexpr size_t n = 3;
-  std::vector<TypeHolder> types = {int32(), int32(), int32()};
-  auto full_batch = ExecBatchFromJSON(types, "[[1, 1, 1], [2, 2, 2], [3, 3, 3]]");
-  std::vector<Segment> expected_segments_for_size_0 = {{0, 3, true, true}};
-  std::vector<Segment> expected_segments = {
-      {0, 1, false, true}, {1, 1, false, false}, {2, 1, true, false}};
-  auto test_by_size = [&](size_t size) -> Status {
-    SCOPED_TRACE("constant-batch with " + ToChars(size) + " key(s)");
-    std::vector<Datum> values(full_batch.values.begin(),
-                              full_batch.values.begin() + size);
-    ExecBatch batch(values, full_batch.length);
-    std::vector<TypeHolder> key_types(types.begin(), types.begin() + size);
-    ARROW_ASSIGN_OR_RAISE(auto segmenter, MakeRowSegmenter(key_types));
-    TestSegments(segmenter, ExecSpan(batch),
-                 size == 0 ? expected_segments_for_size_0 : expected_segments);
-    return Status::OK();
-  };
-  for (size_t i = 0; i <= n; i++) {
-    ASSERT_OK(test_by_size(i));
-  }
-}
-
-TEST(Grouper, SupportedKeys) { TestGroupClassSupportedKeys<Grouper>(MakeGrouper); }
-
-struct TestGrouper {
-  explicit TestGrouper(std::vector<TypeHolder> types, std::vector<ArgShape> shapes = {})
-      : types_(std::move(types)), shapes_(std::move(shapes)) {
-    grouper_ = Grouper::Make(types_).ValueOrDie();
-
-    FieldVector fields;
-    for (const auto& type : types_) {
-      fields.push_back(field("", type.GetSharedPtr()));
-    }
-    key_schema_ = schema(std::move(fields));
-  }
-
-  void ExpectConsume(const std::string& key_json, const std::string& expected) {
-    auto expected_arr = ArrayFromJSON(uint32(), expected);
-    if (shapes_.size() > 0) {
-      ExpectConsume(ExecBatchFromJSON(types_, shapes_, key_json), expected_arr);
-    } else {
-      ExpectConsume(ExecBatchFromJSON(types_, key_json), expected_arr);
-    }
-  }
-
-  void ExpectConsume(const std::vector<Datum>& key_values, Datum expected) {
-    ASSERT_OK_AND_ASSIGN(auto key_batch, ExecBatch::Make(key_values));
-    ExpectConsume(key_batch, expected);
-  }
-
-  void ExpectConsume(const ExecBatch& key_batch, Datum expected) {
-    Datum ids;
-    ConsumeAndValidate(key_batch, &ids);
-    AssertEquivalentIds(expected, ids);
-  }
-
-  void ExpectUniques(const ExecBatch& uniques) {
-    EXPECT_THAT(grouper_->GetUniques(), ResultWith(Eq(uniques)));
-  }
-
-  void ExpectUniques(const std::string& uniques_json) {
-    if (shapes_.size() > 0) {
-      ExpectUniques(ExecBatchFromJSON(types_, shapes_, uniques_json));
-    } else {
-      ExpectUniques(ExecBatchFromJSON(types_, uniques_json));
-    }
-  }
-
-  void AssertEquivalentIds(const Datum& expected, const Datum& actual) {
-    auto left = expected.make_array();
-    auto right = actual.make_array();
-    ASSERT_EQ(left->length(), right->length()) << "#ids unequal";
-    int64_t num_ids = left->length();
-    auto left_data = left->data();
-    auto right_data = right->data();
-    auto left_ids = reinterpret_cast<const uint32_t*>(left_data->buffers[1]->data());
-    auto right_ids = reinterpret_cast<const uint32_t*>(right_data->buffers[1]->data());
-    uint32_t max_left_id = 0;
-    uint32_t max_right_id = 0;
-    for (int64_t i = 0; i < num_ids; ++i) {
-      if (left_ids[i] > max_left_id) {
-        max_left_id = left_ids[i];
-      }
-      if (right_ids[i] > max_right_id) {
-        max_right_id = right_ids[i];
-      }
-    }
-    std::vector<bool> right_to_left_present(max_right_id + 1, false);
-    std::vector<bool> left_to_right_present(max_left_id + 1, false);
-    std::vector<uint32_t> right_to_left(max_right_id + 1);
-    std::vector<uint32_t> left_to_right(max_left_id + 1);
-    for (int64_t i = 0; i < num_ids; ++i) {
-      uint32_t left_id = left_ids[i];
-      uint32_t right_id = right_ids[i];
-      if (!left_to_right_present[left_id]) {
-        left_to_right[left_id] = right_id;
-        left_to_right_present[left_id] = true;
-      }
-      if (!right_to_left_present[right_id]) {
-        right_to_left[right_id] = left_id;
-        right_to_left_present[right_id] = true;
-      }
-      ASSERT_EQ(left_id, right_to_left[right_id]);
-      ASSERT_EQ(right_id, left_to_right[left_id]);
-    }
-  }
-
-  void ConsumeAndValidate(const ExecBatch& key_batch, Datum* ids = nullptr) {
-    ASSERT_OK_AND_ASSIGN(Datum id_batch, grouper_->Consume(ExecSpan(key_batch)));
-
-    ValidateConsume(key_batch, id_batch);
-
-    if (ids) {
-      *ids = std::move(id_batch);
-    }
-  }
-
-  void ValidateConsume(const ExecBatch& key_batch, const Datum& id_batch) {
-    if (uniques_.length == -1) {
-      ASSERT_OK_AND_ASSIGN(uniques_, grouper_->GetUniques());
-    } else if (static_cast<int64_t>(grouper_->num_groups()) > uniques_.length) {
-      ASSERT_OK_AND_ASSIGN(ExecBatch new_uniques, grouper_->GetUniques());
-
-      // check that uniques_ are prefixes of new_uniques
-      for (int i = 0; i < uniques_.num_values(); ++i) {
-        auto new_unique = new_uniques[i].make_array();
-        ValidateOutput(*new_unique);
-
-        AssertDatumsEqual(uniques_[i], new_unique->Slice(0, uniques_.length),
-                          /*verbose=*/true);
-      }
-
-      uniques_ = std::move(new_uniques);
-    }
-
-    // check that the ids encode an equivalent key sequence
-    auto ids = id_batch.make_array();
-    ValidateOutput(*ids);
-
-    for (int i = 0; i < key_batch.num_values(); ++i) {
-      SCOPED_TRACE(ToChars(i) + "th key array");
-      auto original =
-          key_batch[i].is_array()
-              ? key_batch[i].make_array()
-              : *MakeArrayFromScalar(*key_batch[i].scalar(), key_batch.length);
-      ASSERT_OK_AND_ASSIGN(auto encoded, Take(*uniques_[i].make_array(), *ids));
-      AssertArraysEqual(*original, *encoded, /*verbose=*/true,
-                        EqualOptions().nans_equal(true));
-    }
-  }
-
-  std::vector<TypeHolder> types_;
-  std::vector<ArgShape> shapes_;
-  std::shared_ptr<Schema> key_schema_;
-  std::unique_ptr<Grouper> grouper_;
-  ExecBatch uniques_ = ExecBatch({}, -1);
-};
-
-TEST(Grouper, BooleanKey) {
-  TestGrouper g({boolean()});
-
-  g.ExpectConsume("[[true], [true]]", "[0, 0]");
-
-  g.ExpectConsume("[[true], [true]]", "[0, 0]");
-
-  g.ExpectConsume("[[false], [null]]", "[1, 2]");
-
-  g.ExpectConsume("[[true], [false], [true], [false], [null], [false], [null]]",
-                  "[0, 1, 0, 1, 2, 1, 2]");
-}
-
-TEST(Grouper, NumericKey) {
-  for (auto ty : {
-           uint8(),
-           int8(),
-           uint16(),
-           int16(),
-           uint32(),
-           int32(),
-           uint64(),
-           int64(),
-           float16(),
-           float32(),
-           float64(),
-       }) {
-    SCOPED_TRACE("key type: " + ty->ToString());
-
-    TestGrouper g({ty});
-
-    g.ExpectConsume("[[3], [3]]", "[0, 0]");
-    g.ExpectUniques("[[3]]");
-
-    g.ExpectConsume("[[3], [3]]", "[0, 0]");
-    g.ExpectUniques("[[3]]");
-
-    g.ExpectConsume("[[27], [81], [81]]", "[1, 2, 2]");
-    g.ExpectUniques("[[3], [27], [81]]");
-
-    g.ExpectConsume("[[3], [27], [3], [27], [null], [81], [27], [81]]",
-                    "[0, 1, 0, 1, 3, 2, 1, 2]");
-    g.ExpectUniques("[[3], [27], [81], [null]]");
-  }
-}
-
-TEST(Grouper, FloatingPointKey) {
-  TestGrouper g({float32()});
-
-  // -0.0 hashes differently from 0.0
-  g.ExpectConsume("[[0.0], [-0.0]]", "[0, 1]");
-
-  g.ExpectConsume("[[Inf], [-Inf]]", "[2, 3]");
-
-  // assert(!(NaN == NaN)) does not cause spurious new groups
-  g.ExpectConsume("[[NaN], [NaN]]", "[4, 4]");
-
-  // TODO(bkietz) test denormal numbers, more NaNs
-}
-
-TEST(Grouper, StringKey) {
-  for (auto ty : {utf8(), large_utf8(), fixed_size_binary(2)}) {
-    SCOPED_TRACE("key type: " + ty->ToString());
-
-    TestGrouper g({ty});
-
-    g.ExpectConsume(R"([["eh"], ["eh"]])", "[0, 0]");
-
-    g.ExpectConsume(R"([["eh"], ["eh"]])", "[0, 0]");
-
-    g.ExpectConsume(R"([["be"], [null]])", "[1, 2]");
-  }
-}
-
-TEST(Grouper, DictKey) {
-  TestGrouper g({dictionary(int32(), utf8())});
-
-  // For dictionary keys, all batches must share a single dictionary.
-  // Eventually, differing dictionaries will be unified and indices transposed
-  // during encoding to relieve this restriction.
-  const auto dict = ArrayFromJSON(utf8(), R"(["ex", "why", "zee", null])");
-
-  auto WithIndices = [&](const std::string& indices) {
-    return Datum(*DictionaryArray::FromArrays(ArrayFromJSON(int32(), indices), dict));
-  };
-
-  // NB: null index is not considered equivalent to index=3 (which encodes null in dict)
-  g.ExpectConsume({WithIndices("           [3, 1, null, 0, 2]")},
-                  ArrayFromJSON(uint32(), "[0, 1, 2, 3, 4]"));
-
-  g = TestGrouper({dictionary(int32(), utf8())});
-
-  g.ExpectConsume({WithIndices("           [0, 1, 2, 3, null]")},
-                  ArrayFromJSON(uint32(), "[0, 1, 2, 3, 4]"));
-
-  g.ExpectConsume({WithIndices("           [3, 1, null, 0, 2]")},
-                  ArrayFromJSON(uint32(), "[3, 1, 4,    0, 2]"));
-
-  auto dict_arr = *DictionaryArray::FromArrays(
-      ArrayFromJSON(int32(), "[0, 1]"),
-      ArrayFromJSON(utf8(), R"(["different", "dictionary"])"));
-  ExecSpan dict_span({*dict_arr->data()}, 2);
-  EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented,
-                                  HasSubstr("Unifying differing dictionaries"),
-                                  g.grouper_->Consume(dict_span));
-}
-
-TEST(Grouper, StringInt64Key) {
-  TestGrouper g({utf8(), int64()});
-
-  g.ExpectConsume(R"([["eh", 0], ["eh", 0]])", "[0, 0]");
-
-  g.ExpectConsume(R"([["eh", 0], ["eh", null]])", "[0, 1]");
-
-  g.ExpectConsume(R"([["eh", 1], ["bee", 1]])", "[2, 3]");
-
-  g.ExpectConsume(R"([["eh", null], ["bee", 1]])", "[1, 3]");
-
-  g = TestGrouper({utf8(), int64()});
-
-  g.ExpectConsume(R"([
-    ["ex",  0],
-    ["ex",  0],
-    ["why", 0],
-    ["ex",  1],
-    ["why", 0],
-    ["ex",  1],
-    ["ex",  0],
-    ["why", 1]
-  ])",
-                  "[0, 0, 1, 2, 1, 2, 0, 3]");
-
-  g.ExpectConsume(R"([
-    ["ex",  0],
-    [null,  0],
-    [null,  0],
-    ["ex",  1],
-    [null,  null],
-    ["ex",  1],
-    ["ex",  0],
-    ["why", null]
-  ])",
-                  "[0, 4, 4, 2, 5, 2, 0, 6]");
-}
-
-TEST(Grouper, DoubleStringInt64Key) {
-  TestGrouper g({float64(), utf8(), int64()});
-
-  g.ExpectConsume(R"([[1.5, "eh", 0], [1.5, "eh", 0]])", "[0, 0]");
-
-  g.ExpectConsume(R"([[1.5, "eh", 0], [1.5, "eh", 0]])", "[0, 0]");
-
-  g.ExpectConsume(R"([[1.0, "eh", 0], [1.0, "be", null]])", "[1, 2]");
-
-  // note: -0 and +0 hash differently
-  g.ExpectConsume(R"([[-0.0, "be", 7], [0.0, "be", 7]])", "[3, 4]");
-}
-
-TEST(Grouper, RandomInt64Keys) {
-  TestGrouper g({int64()});
-  for (int i = 0; i < 4; ++i) {
-    SCOPED_TRACE(ToChars(i) + "th key batch");
-
-    ExecBatch key_batch{
-        *random::GenerateBatch(g.key_schema_->fields(), 1 << 12, 0xDEADBEEF)};
-    g.ConsumeAndValidate(key_batch);
-  }
-}
-
-TEST(Grouper, RandomStringInt64Keys) {
-  TestGrouper g({utf8(), int64()});
-  for (int i = 0; i < 4; ++i) {
-    SCOPED_TRACE(ToChars(i) + "th key batch");
-
-    ExecBatch key_batch{
-        *random::GenerateBatch(g.key_schema_->fields(), 1 << 12, 0xDEADBEEF)};
-    g.ConsumeAndValidate(key_batch);
-  }
-}
-
-TEST(Grouper, RandomStringInt64DoubleInt32Keys) {
-  TestGrouper g({utf8(), int64(), float64(), int32()});
-  for (int i = 0; i < 4; ++i) {
-    SCOPED_TRACE(ToChars(i) + "th key batch");
-
-    ExecBatch key_batch{
-        *random::GenerateBatch(g.key_schema_->fields(), 1 << 12, 0xDEADBEEF)};
-    g.ConsumeAndValidate(key_batch);
-  }
-}
-
-TEST(Grouper, NullKeys) {
-  TestGrouper g({null()});
-  g.ExpectConsume("[[null], [null]]", "[0, 0]");
-}
-
-TEST(Grouper, MultipleNullKeys) {
-  TestGrouper g({null(), null(), null(), null()});
-  g.ExpectConsume("[[null, null, null, null], [null, null, null, null]]", "[0, 0]");
-}
-
-TEST(Grouper, Int64NullKeys) {
-  TestGrouper g({int64(), null()});
-  g.ExpectConsume("[[1, null], [2, null], [1, null]]", "[0, 1, 0]");
-}
-
-TEST(Grouper, StringNullKeys) {
-  TestGrouper g({utf8(), null()});
-  g.ExpectConsume(R"([["be", null], ["eh", null]])", "[0, 1]");
-}
-
-TEST(Grouper, DoubleNullStringKey) {
-  TestGrouper g({float64(), null(), utf8()});
-
-  g.ExpectConsume(R"([[1.5, null, "eh"], [1.5, null, "eh"]])", "[0, 0]");
-  g.ExpectConsume(R"([[null, null, "eh"], [1.0, null, null]])", "[1, 2]");
-  g.ExpectConsume(R"([
-    [1.0,  null, "wh"],
-    [4.4,  null, null],
-    [5.2,  null, "eh"],
-    [6.5,  null, "be"],
-    [7.3,  null, null],
-    [1.0,  null, "wh"],
-    [9.1,  null, "eh"],
-    [10.2, null, "be"],
-    [1.0, null, null]
-  ])",
-                  "[3, 4, 5, 6, 7, 3, 8, 9, 2]");
-}
-
-TEST(Grouper, EmptyNullKeys) {
-  TestGrouper g({null()});
-  g.ExpectConsume("[]", "[]");
-}
-
-TEST(Grouper, MakeGroupings) {
-  auto ExpectGroupings = [](std::string ids_json, std::string expected_json) {
-    auto ids = checked_pointer_cast<UInt32Array>(ArrayFromJSON(uint32(), ids_json));
-    auto expected = ArrayFromJSON(list(int32()), expected_json);
-
-    auto num_groups = static_cast<uint32_t>(expected->length());
-    ASSERT_OK_AND_ASSIGN(auto actual, Grouper::MakeGroupings(*ids, num_groups));
-    AssertArraysEqual(*expected, *actual, /*verbose=*/true);
-
-    // validate ApplyGroupings
-    ASSERT_OK_AND_ASSIGN(auto grouped_ids, Grouper::ApplyGroupings(*actual, *ids));
-
-    for (uint32_t group = 0; group < num_groups; ++group) {
-      auto ids_slice = checked_pointer_cast<UInt32Array>(grouped_ids->value_slice(group));
-      for (auto slot : *ids_slice) {
-        EXPECT_EQ(slot, group);
-      }
-    }
-  };
-
-  ExpectGroupings("[]", "[[]]");
-
-  ExpectGroupings("[0, 0, 0]", "[[0, 1, 2]]");
-
-  ExpectGroupings("[0, 0, 0, 1, 1, 2]", "[[0, 1, 2], [3, 4], [5], []]");
-
-  ExpectGroupings("[2, 1, 2, 1, 1, 2]", "[[], [1, 3, 4], [0, 2, 5], [], []]");
-
-  ExpectGroupings("[2, 2, 5, 5, 2, 3]", "[[], [], [0, 1, 4], [5], [], [2, 3], [], []]");
-
-  auto ids = checked_pointer_cast<UInt32Array>(ArrayFromJSON(uint32(), "[0, null, 1]"));
-  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, HasSubstr("MakeGroupings with null ids"),
-                                  Grouper::MakeGroupings(*ids, 5));
-}
-
-TEST(Grouper, ScalarValues) {
-  // large_utf8 forces GrouperImpl over GrouperFastImpl
-  for (const auto& str_type : {utf8(), large_utf8()}) {
-    {
-      TestGrouper g(
-          {boolean(), int32(), decimal128(3, 2), decimal256(3, 2), fixed_size_binary(2),
-           str_type, int32()},
-          {ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::SCALAR,
-           ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::ARRAY});
-      g.ExpectConsume(
-          R"([
-[true, 1, "1.00", "2.00", "ab", "foo", 2],
-[true, 1, "1.00", "2.00", "ab", "foo", 2],
-[true, 1, "1.00", "2.00", "ab", "foo", 3]
-])",
-          "[0, 0, 1]");
-    }
-    {
-      auto dict_type = dictionary(int32(), utf8());
-      TestGrouper g({dict_type, str_type}, {ArgShape::SCALAR, ArgShape::SCALAR});
-      const auto dict = R"(["foo", null])";
-      g.ExpectConsume(
-          {DictScalarFromJSON(dict_type, "0", dict), ScalarFromJSON(str_type, R"("")")},
-          ArrayFromJSON(uint32(), "[0]"));
-      g.ExpectConsume(
-          {DictScalarFromJSON(dict_type, "1", dict), ScalarFromJSON(str_type, R"("")")},
-          ArrayFromJSON(uint32(), "[1]"));
-    }
-  }
-}
-
 void TestSegmentKey(GroupByFunction group_by, const std::shared_ptr<Table>& table,
                     Datum output, const std::vector<Datum>& segment_keys);
 
diff --git a/cpp/src/arrow/acero/hash_join_node_test.cc b/cpp/src/arrow/acero/hash_join_node_test.cc
index 76ad9c7d650eb..654fd59c45d5a 100644
--- a/cpp/src/arrow/acero/hash_join_node_test.cc
+++ b/cpp/src/arrow/acero/hash_join_node_test.cc
@@ -26,9 +26,9 @@
 #include "arrow/acero/test_util_internal.h"
 #include "arrow/acero/util.h"
 #include "arrow/api.h"
-#include "arrow/compute/kernels/test_util.h"
 #include "arrow/compute/light_array_internal.h"
 #include "arrow/compute/row/row_encoder_internal.h"
+#include "arrow/compute/test_util_internal.h"
 #include "arrow/extension/uuid.h"
 #include "arrow/testing/extension_type.h"
 #include "arrow/testing/generator.h"
@@ -49,6 +49,7 @@ using compute::and_;
 using compute::call;
 using compute::default_exec_context;
 using compute::ExecBatchBuilder;
+using compute::ExecBatchFromJSON;
 using compute::ExecSpan;
 using compute::field_ref;
 using compute::SortIndices;
@@ -2350,7 +2351,7 @@ TEST(HashJoin, FineGrainedResidualFilter) {
         auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([
                             [null, null, "r_payload"],
                             [null, 0, "r_payload"],
-                            [null, 42, "r_payload"], 
+                            [null, 42, "r_payload"],
                             ["both1", null, "r_payload"],
                             ["both2", null, "r_payload"],
                             ["right_only", null, "r_payload"],
@@ -2519,7 +2520,7 @@ TEST(HashJoin, FineGrainedResidualFilter) {
         auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([
                             [null, null, "r_payload"],
                             [null, 0, "r_payload"],
-                            [null, 42, "r_payload"], 
+                            [null, 42, "r_payload"],
                             ["both1", null, "r_payload"],
                             ["both1", 0, "r_payload"],
                             ["both1", 42, "r_payload"],
@@ -2704,7 +2705,7 @@ TEST(HashJoin, FineGrainedResidualFilter) {
         auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([
                             [null, null, "r_payload"],
                             [null, 0, "r_payload"],
-                            [null, 42, "r_payload"], 
+                            [null, 42, "r_payload"],
                             ["right_only", null, "r_payload"],
                             ["right_only", 0, "r_payload"],
                             ["right_only", 42, "r_payload"]])");
@@ -2879,7 +2880,7 @@ TEST(HashJoin, FineGrainedResidualFilter) {
         auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([
                             [null, null, "r_payload"],
                             [null, 0, "r_payload"],
-                            [null, 42, "r_payload"], 
+                            [null, 42, "r_payload"],
                             ["both1", null, "r_payload"],
                             ["both1", 0, "r_payload"],
                             ["both2", null, "r_payload"],
@@ -3054,7 +3055,7 @@ TEST(HashJoin, FineGrainedResidualFilter) {
         auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([
                             [null, null, "r_payload"],
                             [null, 0, "r_payload"],
-                            [null, 42, "r_payload"], 
+                            [null, 42, "r_payload"],
                             ["both1", null, "r_payload"],
                             ["both2", null, "r_payload"],
                             ["right_only", null, "r_payload"],
@@ -3370,8 +3371,10 @@ TEST(HashJoin, LARGE_MEMORY_TEST(BuildSideOver4GBVarLength)) {
   constexpr int value_no_match_length_min = 128;
   constexpr int value_no_match_length_max = 129;
   constexpr int value_match_length = 130;
+  // The value "DDD..." will be hashed to the partition over 4GB of the hash table.
+  // Matching at this area gives us more coverage.
   const auto value_match =
-      std::make_shared<StringScalar>(std::string(value_match_length, 'X'));
+      std::make_shared<StringScalar>(std::string(value_match_length, 'D'));
   constexpr int16_t num_rows_per_batch_left = 128;
   constexpr int16_t num_rows_per_batch_right = 4096;
   const int64_t num_batches_left = 8;
@@ -3446,5 +3449,104 @@ TEST(HashJoin, LARGE_MEMORY_TEST(BuildSideOver4GBVarLength)) {
                    num_batches_left * num_rows_per_batch_left * num_batches_right);
 }
 
+// GH-45334: The row ids of the matching rows on the right side (the build side) are very
+// big, causing the index calculation overflow.
+TEST(HashJoin, BuildSideLargeRowIds) {
+  GTEST_SKIP() << "Test disabled due to excessively time and resource consuming, "
+                  "for local debugging only.";
+
+  // A fair amount of match rows to trigger both SIMD and non-SIMD code paths.
+  const int64_t num_match_rows = 35;
+  const int64_t num_rows_per_match_batch = 35;
+  const int64_t num_match_batches = num_match_rows / num_rows_per_match_batch;
+
+  const int64_t num_unmatch_rows_large = 720898048;
+  const int64_t num_rows_per_unmatch_batch_large = 352001;
+  const int64_t num_unmatch_batches_large =
+      num_unmatch_rows_large / num_rows_per_unmatch_batch_large;
+
+  auto schema_small =
+      schema({field("small_key", int64()), field("small_payload", int64())});
+  auto schema_large =
+      schema({field("large_key", int64()), field("large_payload", int64())});
+
+  // A carefully chosen key value which hashes to 0xFFFFFFFE, making the match rows to be
+  // placed at higher address of the row table.
+  const int64_t match_key = 289339070;
+  const int64_t match_payload = 42;
+
+  // Match arrays of length num_rows_per_match_batch.
+  ASSERT_OK_AND_ASSIGN(
+      auto match_key_arr,
+      Constant(MakeScalar(match_key))->Generate(num_rows_per_match_batch));
+  ASSERT_OK_AND_ASSIGN(
+      auto match_payload_arr,
+      Constant(MakeScalar(match_payload))->Generate(num_rows_per_match_batch));
+  // Append 1 row of null to trigger null processing code paths.
+  ASSERT_OK_AND_ASSIGN(auto null_arr, MakeArrayOfNull(int64(), 1));
+  ASSERT_OK_AND_ASSIGN(match_key_arr, Concatenate({match_key_arr, null_arr}));
+  ASSERT_OK_AND_ASSIGN(match_payload_arr, Concatenate({match_payload_arr, null_arr}));
+  // Match batch.
+  ExecBatch match_batch({match_key_arr, match_payload_arr}, num_rows_per_match_batch + 1);
+
+  // Small batch.
+  ExecBatch batch_small = match_batch;
+
+  // Large unmatch batches.
+  const int64_t seed = 42;
+  std::vector<ExecBatch> unmatch_batches_large;
+  unmatch_batches_large.reserve(num_unmatch_batches_large);
+  ASSERT_OK_AND_ASSIGN(auto unmatch_payload_arr_large,
+                       MakeArrayOfNull(int64(), num_rows_per_unmatch_batch_large));
+  int64_t unmatch_range_per_batch =
+      (std::numeric_limits<int64_t>::max() - match_key) / num_unmatch_batches_large;
+  for (int i = 0; i < num_unmatch_batches_large; ++i) {
+    auto unmatch_key_arr_large = RandomArrayGenerator(seed).Int64(
+        num_rows_per_unmatch_batch_large,
+        /*min=*/match_key + 1 + i * unmatch_range_per_batch,
+        /*max=*/match_key + 1 + (i + 1) * unmatch_range_per_batch);
+    unmatch_batches_large.push_back(
+        ExecBatch({unmatch_key_arr_large, unmatch_payload_arr_large},
+                  num_rows_per_unmatch_batch_large));
+  }
+  // Large match batch.
+  ExecBatch match_batch_large = match_batch;
+
+  // Batches with schemas.
+  auto batches_small = BatchesWithSchema{
+      std::vector<ExecBatch>(num_match_batches, batch_small), schema_small};
+  auto batches_large = BatchesWithSchema{std::move(unmatch_batches_large), schema_large};
+  for (int i = 0; i < num_match_batches; i++) {
+    batches_large.batches.push_back(match_batch_large);
+  }
+
+  Declaration source_small{
+      "exec_batch_source",
+      ExecBatchSourceNodeOptions(batches_small.schema, batches_small.batches)};
+  Declaration source_large{
+      "exec_batch_source",
+      ExecBatchSourceNodeOptions(batches_large.schema, batches_large.batches)};
+
+  HashJoinNodeOptions join_opts(JoinType::INNER, /*left_keys=*/{"small_key"},
+                                /*right_keys=*/{"large_key"});
+  Declaration join{
+      "hashjoin", {std::move(source_small), std::move(source_large)}, join_opts};
+
+  // Join should emit num_match_rows * num_match_rows rows.
+  ASSERT_OK_AND_ASSIGN(auto batches_result, DeclarationToExecBatches(std::move(join)));
+  Declaration result{"exec_batch_source",
+                     ExecBatchSourceNodeOptions(std::move(batches_result.schema),
+                                                std::move(batches_result.batches))};
+  AssertRowCountEq(result, num_match_rows * num_match_rows);
+
+  // All rows should be match_key/payload.
+  auto predicate = and_({equal(field_ref("small_key"), literal(match_key)),
+                         equal(field_ref("small_payload"), literal(match_payload)),
+                         equal(field_ref("large_key"), literal(match_key)),
+                         equal(field_ref("large_payload"), literal(match_payload))});
+  Declaration filter{"filter", {result}, FilterNodeOptions{std::move(predicate)}};
+  AssertRowCountEq(std::move(filter), num_match_rows * num_match_rows);
+}
+
 }  // namespace acero
 }  // namespace arrow
diff --git a/cpp/src/arrow/acero/plan_test.cc b/cpp/src/arrow/acero/plan_test.cc
index e74ad6a6665a4..61ab09f6674d9 100644
--- a/cpp/src/arrow/acero/plan_test.cc
+++ b/cpp/src/arrow/acero/plan_test.cc
@@ -27,6 +27,7 @@
 #include "arrow/acero/util.h"
 #include "arrow/compute/exec.h"
 #include "arrow/compute/expression.h"
+#include "arrow/compute/test_util_internal.h"
 #include "arrow/io/util_internal.h"
 #include "arrow/record_batch.h"
 #include "arrow/table.h"
@@ -51,8 +52,10 @@ using testing::UnorderedElementsAreArray;
 
 namespace arrow {
 
+using compute::ArgShape;
 using compute::call;
 using compute::CountOptions;
+using compute::ExecBatchFromJSON;
 using compute::field_ref;
 using compute::ScalarAggregateOptions;
 using compute::SortKey;
diff --git a/cpp/src/arrow/acero/swiss_join.cc b/cpp/src/arrow/acero/swiss_join.cc
index 200a75d1dcc6c..0ef014c6ff540 100644
--- a/cpp/src/arrow/acero/swiss_join.cc
+++ b/cpp/src/arrow/acero/swiss_join.cc
@@ -439,11 +439,11 @@ Status RowArrayMerge::PrepareForMerge(RowArray* target,
     num_rows = 0;
     num_bytes = 0;
     for (size_t i = 0; i < sources.size(); ++i) {
-      target->rows_.mutable_offsets()[num_rows] = static_cast<uint32_t>(num_bytes);
+      target->rows_.mutable_offsets()[num_rows] = num_bytes;
       num_rows += sources[i]->rows_.length();
       num_bytes += sources[i]->rows_.offsets()[sources[i]->rows_.length()];
     }
-    target->rows_.mutable_offsets()[num_rows] = static_cast<uint32_t>(num_bytes);
+    target->rows_.mutable_offsets()[num_rows] = num_bytes;
   }
 
   return Status::OK();
@@ -477,14 +477,15 @@ void RowArrayMerge::CopyFixedLength(RowTableImpl* target, const RowTableImpl& so
                                     const int64_t* source_rows_permutation) {
   int64_t num_source_rows = source.length();
 
-  int64_t fixed_length = target->metadata().fixed_length;
+  uint32_t fixed_length = target->metadata().fixed_length;
 
   // Permutation of source rows is optional. Without permutation all that is
   // needed is memcpy.
   //
   if (!source_rows_permutation) {
-    memcpy(target->mutable_data(1) + fixed_length * first_target_row_id, source.data(1),
-           fixed_length * num_source_rows);
+    DCHECK_LE(first_target_row_id, std::numeric_limits<uint32_t>::max());
+    memcpy(target->mutable_fixed_length_rows(static_cast<uint32_t>(first_target_row_id)),
+           source.fixed_length_rows(/*row_id=*/0), fixed_length * num_source_rows);
   } else {
     // Row length must be a multiple of 64-bits due to enforced alignment.
     // Loop for each output row copying a fixed number of 64-bit words.
@@ -494,10 +495,13 @@ void RowArrayMerge::CopyFixedLength(RowTableImpl* target, const RowTableImpl& so
     int64_t num_words_per_row = fixed_length / sizeof(uint64_t);
     for (int64_t i = 0; i < num_source_rows; ++i) {
       int64_t source_row_id = source_rows_permutation[i];
+      DCHECK_LE(source_row_id, std::numeric_limits<uint32_t>::max());
       const uint64_t* source_row_ptr = reinterpret_cast<const uint64_t*>(
-          source.data(1) + fixed_length * source_row_id);
+          source.fixed_length_rows(static_cast<uint32_t>(source_row_id)));
+      int64_t target_row_id = first_target_row_id + i;
+      DCHECK_LE(target_row_id, std::numeric_limits<uint32_t>::max());
       uint64_t* target_row_ptr = reinterpret_cast<uint64_t*>(
-          target->mutable_data(1) + fixed_length * (first_target_row_id + i));
+          target->mutable_fixed_length_rows(static_cast<uint32_t>(target_row_id)));
 
       for (int64_t word = 0; word < num_words_per_row; ++word) {
         target_row_ptr[word] = source_row_ptr[word];
@@ -529,16 +533,16 @@ void RowArrayMerge::CopyVaryingLength(RowTableImpl* target, const RowTableImpl&
 
     // We can simply memcpy bytes of rows if their order has not changed.
     //
-    memcpy(target->mutable_data(2) + target_offsets[first_target_row_id], source.data(2),
-           source_offsets[num_source_rows] - source_offsets[0]);
+    memcpy(target->mutable_var_length_rows() + target_offsets[first_target_row_id],
+           source.var_length_rows(), source_offsets[num_source_rows] - source_offsets[0]);
   } else {
     int64_t target_row_offset = first_target_row_offset;
-    uint64_t* target_row_ptr =
-        reinterpret_cast<uint64_t*>(target->mutable_data(2) + target_row_offset);
+    uint64_t* target_row_ptr = reinterpret_cast<uint64_t*>(
+        target->mutable_var_length_rows() + target_row_offset);
     for (int64_t i = 0; i < num_source_rows; ++i) {
       int64_t source_row_id = source_rows_permutation[i];
       const uint64_t* source_row_ptr = reinterpret_cast<const uint64_t*>(
-          source.data(2) + source_offsets[source_row_id]);
+          source.var_length_rows() + source_offsets[source_row_id]);
       int64_t length = source_offsets[source_row_id + 1] - source_offsets[source_row_id];
       // Though the row offset is 64-bit, the length of a single row must be 32-bit as
       // required by current row table implementation.
@@ -564,14 +568,18 @@ void RowArrayMerge::CopyNulls(RowTableImpl* target, const RowTableImpl& source,
                               const int64_t* source_rows_permutation) {
   int64_t num_source_rows = source.length();
   int num_bytes_per_row = target->metadata().null_masks_bytes_per_row;
-  uint8_t* target_nulls = target->null_masks() + num_bytes_per_row * first_target_row_id;
+  DCHECK_LE(first_target_row_id, std::numeric_limits<uint32_t>::max());
+  uint8_t* target_nulls =
+      target->mutable_null_masks(static_cast<uint32_t>(first_target_row_id));
   if (!source_rows_permutation) {
-    memcpy(target_nulls, source.null_masks(), num_bytes_per_row * num_source_rows);
+    memcpy(target_nulls, source.null_masks(/*row_id=*/0),
+           num_bytes_per_row * num_source_rows);
   } else {
-    for (int64_t i = 0; i < num_source_rows; ++i) {
+    for (uint32_t i = 0; i < num_source_rows; ++i) {
       int64_t source_row_id = source_rows_permutation[i];
+      DCHECK_LE(source_row_id, std::numeric_limits<uint32_t>::max());
       const uint8_t* source_nulls =
-          source.null_masks() + num_bytes_per_row * source_row_id;
+          source.null_masks(static_cast<uint32_t>(source_row_id));
       for (int64_t byte = 0; byte < num_bytes_per_row; ++byte) {
         *target_nulls++ = *source_nulls++;
       }
diff --git a/cpp/src/arrow/acero/swiss_join_avx2.cc b/cpp/src/arrow/acero/swiss_join_avx2.cc
index 1d6b7eda6e6a0..86d08870e58d8 100644
--- a/cpp/src/arrow/acero/swiss_join_avx2.cc
+++ b/cpp/src/arrow/acero/swiss_join_avx2.cc
@@ -16,6 +16,7 @@
 // under the License.
 
 #include "arrow/acero/swiss_join_internal.h"
+#include "arrow/compute/row/row_util_avx2_internal.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/simd.h"
 
@@ -46,7 +47,7 @@ int RowArrayAccessor::Visit_avx2(const RowTableImpl& rows, int column_id, int nu
 
   if (!is_fixed_length_column) {
     int varbinary_column_id = VarbinaryColumnId(rows.metadata(), column_id);
-    const uint8_t* row_ptr_base = rows.data(2);
+    const uint8_t* row_ptr_base = rows.var_length_rows();
     const RowTableImpl::offset_type* row_offsets = rows.offsets();
     auto row_offsets_i64 =
         reinterpret_cast<const arrow::util::int64_for_gather_t*>(row_offsets);
@@ -172,7 +173,7 @@ int RowArrayAccessor::Visit_avx2(const RowTableImpl& rows, int column_id, int nu
     if (is_fixed_length_row) {
       // Case 3: This is a fixed length column in fixed length row
       //
-      const uint8_t* row_ptr_base = rows.data(1);
+      const uint8_t* row_ptr_base = rows.fixed_length_rows(/*row_id=*/0);
       for (int i = 0; i < num_rows / kUnroll; ++i) {
         // Load 8 32-bit row ids.
         __m256i row_id =
@@ -197,7 +198,7 @@ int RowArrayAccessor::Visit_avx2(const RowTableImpl& rows, int column_id, int nu
     } else {
       // Case 4: This is a fixed length column in varying length row
       //
-      const uint8_t* row_ptr_base = rows.data(2);
+      const uint8_t* row_ptr_base = rows.var_length_rows();
       const RowTableImpl::offset_type* row_offsets = rows.offsets();
       auto row_offsets_i64 =
           reinterpret_cast<const arrow::util::int64_for_gather_t*>(row_offsets);
@@ -237,31 +238,16 @@ int RowArrayAccessor::VisitNulls_avx2(const RowTableImpl& rows, int column_id,
   //
   constexpr int kUnroll = 8;
 
-  const uint8_t* null_masks = rows.null_masks();
-  __m256i null_bits_per_row =
-      _mm256_set1_epi32(8 * rows.metadata().null_masks_bytes_per_row);
-  __m256i pos_after_encoding =
-      _mm256_set1_epi32(rows.metadata().pos_after_encoding(column_id));
+  uint32_t pos_after_encoding = rows.metadata().pos_after_encoding(column_id);
   for (int i = 0; i < num_rows / kUnroll; ++i) {
     __m256i row_id = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(row_ids) + i);
-    __m256i bit_id = _mm256_mullo_epi32(row_id, null_bits_per_row);
-    bit_id = _mm256_add_epi32(bit_id, pos_after_encoding);
-    __m256i bytes = _mm256_i32gather_epi32(reinterpret_cast<const int*>(null_masks),
-                                           _mm256_srli_epi32(bit_id, 3), 1);
-    __m256i bit_in_word = _mm256_sllv_epi32(
-        _mm256_set1_epi32(1), _mm256_and_si256(bit_id, _mm256_set1_epi32(7)));
-    // `result` will contain one 32-bit word per tested null bit, either 0xffffffff if the
-    // null bit was set or 0 if it was unset.
-    __m256i result =
-        _mm256_cmpeq_epi32(_mm256_and_si256(bytes, bit_in_word), bit_in_word);
-    // NB: Be careful about sign-extension when casting the return value of
-    // _mm256_movemask_epi8 (signed 32-bit) to unsigned 64-bit, which will pollute the
-    // higher bits of the following OR.
-    uint32_t null_bytes_lo = static_cast<uint32_t>(
-        _mm256_movemask_epi8(_mm256_cvtepi32_epi64(_mm256_castsi256_si128(result))));
-    uint64_t null_bytes_hi =
-        _mm256_movemask_epi8(_mm256_cvtepi32_epi64(_mm256_extracti128_si256(result, 1)));
-    uint64_t null_bytes = null_bytes_lo | (null_bytes_hi << 32);
+    __m256i null32 = GetNullBitInt32(rows, pos_after_encoding, row_id);
+    null32 = _mm256_cmpeq_epi32(null32, _mm256_set1_epi32(1));
+    uint32_t null32_lo =
+        _mm256_movemask_epi8(_mm256_cvtepi32_epi64(_mm256_castsi256_si128(null32)));
+    uint32_t null32_hi =
+        _mm256_movemask_epi8(_mm256_cvtepi32_epi64(_mm256_extracti128_si256(null32, 1)));
+    uint64_t null_bytes = null32_lo | (static_cast<uint64_t>(null32_hi) << 32);
 
     process_8_values_fn(i * kUnroll, null_bytes);
   }
diff --git a/cpp/src/arrow/acero/swiss_join_internal.h b/cpp/src/arrow/acero/swiss_join_internal.h
index 6d48a35ecd0eb..d0d97aa1cc0fe 100644
--- a/cpp/src/arrow/acero/swiss_join_internal.h
+++ b/cpp/src/arrow/acero/swiss_join_internal.h
@@ -72,7 +72,7 @@ class RowArrayAccessor {
 
     if (!is_fixed_length_column) {
       int varbinary_column_id = VarbinaryColumnId(rows.metadata(), column_id);
-      const uint8_t* row_ptr_base = rows.data(2);
+      const uint8_t* row_ptr_base = rows.var_length_rows();
       const RowTableImpl::offset_type* row_offsets = rows.offsets();
       uint32_t field_offset_within_row, field_length;
 
@@ -108,22 +108,21 @@ class RowArrayAccessor {
       if (field_length == 0) {
         field_length = 1;
       }
-      uint32_t row_length = rows.metadata().fixed_length;
 
       bool is_fixed_length_row = rows.metadata().is_fixed_length;
       if (is_fixed_length_row) {
         // Case 3: This is a fixed length column in a fixed length row
         //
-        const uint8_t* row_ptr_base = rows.data(1) + field_offset_within_row;
         for (int i = 0; i < num_rows; ++i) {
           uint32_t row_id = row_ids[i];
-          const uint8_t* row_ptr = row_ptr_base + row_length * row_id;
+          const uint8_t* row_ptr =
+              rows.fixed_length_rows(row_id) + field_offset_within_row;
           process_value_fn(i, row_ptr, field_length);
         }
       } else {
         // Case 4: This is a fixed length column in a varying length row
         //
-        const uint8_t* row_ptr_base = rows.data(2) + field_offset_within_row;
+        const uint8_t* row_ptr_base = rows.var_length_rows() + field_offset_within_row;
         const RowTableImpl::offset_type* row_offsets = rows.offsets();
         for (int i = 0; i < num_rows; ++i) {
           uint32_t row_id = row_ids[i];
@@ -142,13 +141,10 @@ class RowArrayAccessor {
   template <class PROCESS_VALUE_FN>
   static void VisitNulls(const RowTableImpl& rows, int column_id, int num_rows,
                          const uint32_t* row_ids, PROCESS_VALUE_FN process_value_fn) {
-    const uint8_t* null_masks = rows.null_masks();
-    uint32_t null_mask_num_bytes = rows.metadata().null_masks_bytes_per_row;
     uint32_t pos_after_encoding = rows.metadata().pos_after_encoding(column_id);
     for (int i = 0; i < num_rows; ++i) {
       uint32_t row_id = row_ids[i];
-      int64_t bit_id = row_id * null_mask_num_bytes * 8 + pos_after_encoding;
-      process_value_fn(i, bit_util::GetBit(null_masks, bit_id) ? 0xff : 0);
+      process_value_fn(i, rows.is_null(row_id, pos_after_encoding) ? 0xff : 0);
     }
   }
 
diff --git a/cpp/src/arrow/acero/test_util_internal.cc b/cpp/src/arrow/acero/test_util_internal.cc
index 107a20354c0e7..2748d4107ed36 100644
--- a/cpp/src/arrow/acero/test_util_internal.cc
+++ b/cpp/src/arrow/acero/test_util_internal.cc
@@ -38,6 +38,7 @@
 #include "arrow/compute/api_vector.h"
 #include "arrow/compute/exec.h"
 #include "arrow/compute/function_internal.h"
+#include "arrow/compute/test_util_internal.h"
 #include "arrow/datum.h"
 #include "arrow/io/interfaces.h"
 #include "arrow/record_batch.h"
@@ -59,67 +60,12 @@ namespace arrow {
 using arrow::internal::CpuInfo;
 using arrow::internal::Executor;
 
+using compute::ExecBatchFromJSON;
 using compute::SortKey;
 using compute::Take;
 
 namespace acero {
 
-namespace {
-
-void ValidateOutputImpl(const ArrayData& output) {
-  ASSERT_OK(::arrow::internal::ValidateArrayFull(output));
-  TestInitialized(output);
-}
-
-void ValidateOutputImpl(const ChunkedArray& output) {
-  ASSERT_OK(output.ValidateFull());
-  for (const auto& chunk : output.chunks()) {
-    TestInitialized(*chunk);
-  }
-}
-
-void ValidateOutputImpl(const RecordBatch& output) {
-  ASSERT_OK(output.ValidateFull());
-  for (const auto& column : output.column_data()) {
-    TestInitialized(*column);
-  }
-}
-
-void ValidateOutputImpl(const Table& output) {
-  ASSERT_OK(output.ValidateFull());
-  for (const auto& column : output.columns()) {
-    for (const auto& chunk : column->chunks()) {
-      TestInitialized(*chunk);
-    }
-  }
-}
-
-void ValidateOutputImpl(const Scalar& output) { ASSERT_OK(output.ValidateFull()); }
-
-}  // namespace
-
-void ValidateOutput(const Datum& output) {
-  switch (output.kind()) {
-    case Datum::ARRAY:
-      ValidateOutputImpl(*output.array());
-      break;
-    case Datum::CHUNKED_ARRAY:
-      ValidateOutputImpl(*output.chunked_array());
-      break;
-    case Datum::RECORD_BATCH:
-      ValidateOutputImpl(*output.record_batch());
-      break;
-    case Datum::TABLE:
-      ValidateOutputImpl(*output.table());
-      break;
-    case Datum::SCALAR:
-      ValidateOutputImpl(*output.scalar());
-      break;
-    default:
-      break;
-  }
-}
-
 std::vector<int64_t> HardwareFlagsForTesting() {
   // Acero currently only has AVX2 optimizations
   return arrow::GetSupportedHardwareFlags({CpuInfo::AVX2});
@@ -199,36 +145,6 @@ ExecNode* MakeDummyNode(ExecPlan* plan, std::string label, std::vector<ExecNode*
   return node;
 }
 
-ExecBatch ExecBatchFromJSON(const std::vector<TypeHolder>& types, std::string_view json) {
-  auto fields = ::arrow::internal::MapVector(
-      [](const TypeHolder& th) { return field("", th.GetSharedPtr()); }, types);
-
-  ExecBatch batch{*RecordBatchFromJSON(schema(std::move(fields)), json)};
-
-  return batch;
-}
-
-ExecBatch ExecBatchFromJSON(const std::vector<TypeHolder>& types,
-                            const std::vector<ArgShape>& shapes, std::string_view json) {
-  DCHECK_EQ(types.size(), shapes.size());
-
-  ExecBatch batch = ExecBatchFromJSON(types, json);
-
-  auto value_it = batch.values.begin();
-  for (ArgShape shape : shapes) {
-    if (shape == ArgShape::SCALAR) {
-      if (batch.length == 0) {
-        *value_it = MakeNullScalar(value_it->type());
-      } else {
-        *value_it = value_it->make_array()->GetScalar(0).ValueOrDie();
-      }
-    }
-    ++value_it;
-  }
-
-  return batch;
-}
-
 Future<> StartAndFinish(ExecPlan* plan) {
   RETURN_NOT_OK(plan->Validate());
   plan->StartProducing();
diff --git a/cpp/src/arrow/acero/test_util_internal.h b/cpp/src/arrow/acero/test_util_internal.h
index 569fb1254db4a..2367524a5600c 100644
--- a/cpp/src/arrow/acero/test_util_internal.h
+++ b/cpp/src/arrow/acero/test_util_internal.h
@@ -36,8 +36,6 @@
 
 namespace arrow::acero {
 
-void ValidateOutput(const Datum& output);
-
 // Enumerate all hardware flags that can be tested on this platform
 // and would lead to different code paths being tested in Acero.
 std::vector<int64_t> HardwareFlagsForTesting();
@@ -50,16 +48,6 @@ ExecNode* MakeDummyNode(ExecPlan* plan, std::string label, std::vector<ExecNode*
                         bool is_sink = false, StartProducingFunc = {},
                         StopProducingFunc = {});
 
-ExecBatch ExecBatchFromJSON(const std::vector<TypeHolder>& types, std::string_view json);
-
-/// \brief Shape qualifier for value types. In certain instances
-/// (e.g. "map_lookup" kernel), an argument may only be a scalar, where in
-/// other kernels arguments can be arrays or scalars
-enum class ArgShape { ANY, ARRAY, SCALAR };
-
-ExecBatch ExecBatchFromJSON(const std::vector<TypeHolder>& types,
-                            const std::vector<ArgShape>& shapes, std::string_view json);
-
 struct BatchesWithSchema {
   std::vector<ExecBatch> batches;
   std::shared_ptr<Schema> schema;
diff --git a/cpp/src/arrow/acero/tpch_node_test.cc b/cpp/src/arrow/acero/tpch_node_test.cc
index 17fb43452bc58..f484d6c9d523e 100644
--- a/cpp/src/arrow/acero/tpch_node_test.cc
+++ b/cpp/src/arrow/acero/tpch_node_test.cc
@@ -27,7 +27,6 @@
 #include "arrow/acero/test_util_internal.h"
 #include "arrow/acero/tpch_node.h"
 #include "arrow/acero/util.h"
-#include "arrow/compute/kernels/test_util.h"
 #include "arrow/compute/row/row_encoder_internal.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/matchers.h"
diff --git a/cpp/src/arrow/adapters/orc/adapter_test.cc b/cpp/src/arrow/adapters/orc/adapter_test.cc
index b9d6c53215b41..b3c314fccc0b3 100644
--- a/cpp/src/arrow/adapters/orc/adapter_test.cc
+++ b/cpp/src/arrow/adapters/orc/adapter_test.cc
@@ -235,7 +235,7 @@ void AssertTableWriteReadEqual(const std::vector<std::shared_ptr<Table>>& input_
   write_options.compression = Compression::UNCOMPRESSED;
 #endif
   write_options.file_version = adapters::orc::FileVersion(0, 11);
-  write_options.compression_block_size = 32768;
+  write_options.compression_block_size = 64 * 1024;
   write_options.row_index_stride = 5000;
   EXPECT_OK_AND_ASSIGN(auto writer, adapters::orc::ORCFileWriter::Open(
                                         buffer_output_stream.get(), write_options));
@@ -272,7 +272,7 @@ void AssertBatchWriteReadEqual(
   write_options.compression = Compression::UNCOMPRESSED;
 #endif
   write_options.file_version = adapters::orc::FileVersion(0, 11);
-  write_options.compression_block_size = 32768;
+  write_options.compression_block_size = 64 * 1024;
   write_options.row_index_stride = 5000;
   EXPECT_OK_AND_ASSIGN(auto writer, adapters::orc::ORCFileWriter::Open(
                                         buffer_output_stream.get(), write_options));
@@ -330,7 +330,7 @@ std::unique_ptr<liborc::Writer> CreateWriter(uint64_t stripe_size,
                                              liborc::OutputStream* stream) {
   liborc::WriterOptions options;
   options.setStripeSize(stripe_size);
-  options.setCompressionBlockSize(1024);
+  options.setCompressionBlockSize(64 * 1024);
   options.setMemoryPool(liborc::getDefaultPool());
   options.setRowIndexStride(0);
   return liborc::createWriter(type, stream, options);
@@ -668,7 +668,7 @@ TEST_F(TestORCWriterTrivialNoWrite, noWrite) {
   write_options.compression = Compression::UNCOMPRESSED;
 #endif
   write_options.file_version = adapters::orc::FileVersion(0, 11);
-  write_options.compression_block_size = 32768;
+  write_options.compression_block_size = 64 * 1024;
   write_options.row_index_stride = 5000;
   EXPECT_OK_AND_ASSIGN(auto writer, adapters::orc::ORCFileWriter::Open(
                                         buffer_output_stream.get(), write_options));
diff --git a/cpp/src/arrow/compute/CMakeLists.txt b/cpp/src/arrow/compute/CMakeLists.txt
index ca811dac041fe..6deb2cbad8cb3 100644
--- a/cpp/src/arrow/compute/CMakeLists.txt
+++ b/cpp/src/arrow/compute/CMakeLists.txt
@@ -28,6 +28,14 @@ endif()
 # Unit tests
 #
 
+# Define arrow_compute_testing object library for common test files
+if(ARROW_TESTING)
+  add_library(arrow_compute_testing OBJECT test_util_internal.cc)
+  # Even though this is still just an object library we still need to "link" our
+  # dependencies so that include paths are configured correctly
+  target_link_libraries(arrow_compute_testing PUBLIC ${ARROW_GTEST_GMOCK})
+endif()
+
 set(ARROW_COMPUTE_TEST_PREFIX "arrow-compute")
 set(ARROW_COMPUTE_TEST_LABELS "arrow-compute-tests")
 set(ARROW_COMPUTE_TEST_ARGS PREFIX ${ARROW_COMPUTE_TEST_PREFIX} LABELS
@@ -87,9 +95,16 @@ add_arrow_test(internals_test
                function_test.cc
                exec_test.cc
                kernel_test.cc
-               registry_test.cc)
+               registry_test.cc
+               EXTRA_LINK_LIBS
+               arrow_compute_testing)
+
+add_arrow_compute_test(expression_test
+                       SOURCES
+                       expression_test.cc
+                       EXTRA_LINK_LIBS
+                       arrow_compute_testing)
 
-add_arrow_compute_test(expression_test SOURCES expression_test.cc)
 add_arrow_compute_test(row_test
                        SOURCES
                        key_hash_test.cc
@@ -98,7 +113,9 @@ add_arrow_compute_test(row_test
                        row/grouper_test.cc
                        row/row_encoder_internal_test.cc
                        row/row_test.cc
-                       util_internal_test.cc)
+                       util_internal_test.cc
+                       EXTRA_LINK_LIBS
+                       arrow_compute_testing)
 
 add_arrow_benchmark(function_benchmark PREFIX "arrow-compute")
 
diff --git a/cpp/src/arrow/compute/api_vector.cc b/cpp/src/arrow/compute/api_vector.cc
index f0d5c0fcc3d72..22ecf1cc87844 100644
--- a/cpp/src/arrow/compute/api_vector.cc
+++ b/cpp/src/arrow/compute/api_vector.cc
@@ -155,6 +155,12 @@ static auto kPairwiseOptionsType = GetFunctionOptionsType<PairwiseOptions>(
     DataMember("periods", &PairwiseOptions::periods));
 static auto kListFlattenOptionsType = GetFunctionOptionsType<ListFlattenOptions>(
     DataMember("recursive", &ListFlattenOptions::recursive));
+static auto kInversePermutationOptionsType =
+    GetFunctionOptionsType<InversePermutationOptions>(
+        DataMember("max_index", &InversePermutationOptions::max_index),
+        DataMember("output_type", &InversePermutationOptions::output_type));
+static auto kScatterOptionsType = GetFunctionOptionsType<ScatterOptions>(
+    DataMember("max_index", &ScatterOptions::max_index));
 }  // namespace
 }  // namespace internal
 
@@ -230,6 +236,17 @@ ListFlattenOptions::ListFlattenOptions(bool recursive)
     : FunctionOptions(internal::kListFlattenOptionsType), recursive(recursive) {}
 constexpr char ListFlattenOptions::kTypeName[];
 
+InversePermutationOptions::InversePermutationOptions(
+    int64_t max_index, std::shared_ptr<DataType> output_type)
+    : FunctionOptions(internal::kInversePermutationOptionsType),
+      max_index(max_index),
+      output_type(std::move(output_type)) {}
+constexpr char InversePermutationOptions::kTypeName[];
+
+ScatterOptions::ScatterOptions(int64_t max_index)
+    : FunctionOptions(internal::kScatterOptionsType), max_index(max_index) {}
+constexpr char ScatterOptions::kTypeName[];
+
 namespace internal {
 void RegisterVectorOptions(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunctionOptionsType(kFilterOptionsType));
@@ -244,6 +261,8 @@ void RegisterVectorOptions(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunctionOptionsType(kRankOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kPairwiseOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kListFlattenOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kInversePermutationOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kScatterOptionsType));
 }
 }  // namespace internal
 
@@ -429,5 +448,19 @@ Result<Datum> CumulativeMean(const Datum& values, const CumulativeOptions& optio
   return CallFunction("cumulative_mean", {Datum(values)}, &options, ctx);
 }
 
+// ----------------------------------------------------------------------
+// Swizzle functions
+
+Result<Datum> InversePermutation(const Datum& indices,
+                                 const InversePermutationOptions& options,
+                                 ExecContext* ctx) {
+  return CallFunction("inverse_permutation", {indices}, &options, ctx);
+}
+
+Result<Datum> Scatter(const Datum& values, const Datum& indices,
+                      const ScatterOptions& options, ExecContext* ctx) {
+  return CallFunction("scatter", {values, indices}, &options, ctx);
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h
index e5bcc37329661..ada1665b3ec7c 100644
--- a/cpp/src/arrow/compute/api_vector.h
+++ b/cpp/src/arrow/compute/api_vector.h
@@ -257,6 +257,40 @@ class ARROW_EXPORT ListFlattenOptions : public FunctionOptions {
   bool recursive = false;
 };
 
+/// \brief Options for inverse_permutation function
+class ARROW_EXPORT InversePermutationOptions : public FunctionOptions {
+ public:
+  explicit InversePermutationOptions(int64_t max_index = -1,
+                                     std::shared_ptr<DataType> output_type = NULLPTR);
+  static constexpr char const kTypeName[] = "InversePermutationOptions";
+  static InversePermutationOptions Defaults() { return InversePermutationOptions(); }
+
+  /// \brief The max value in the input indices to allow. The length of the function's
+  /// output will be this value plus 1. If negative, this value will be set to the length
+  /// of the input indices minus 1 and the length of the function's output will be the
+  /// length of the input indices.
+  int64_t max_index = -1;
+  /// \brief The type of the output inverse permutation. If null, the output will be of
+  /// the same type as the input indices, otherwise must be signed integer type. An
+  /// invalid error will be reported if this type is not able to store the length of the
+  /// input indices.
+  std::shared_ptr<DataType> output_type = NULLPTR;
+};
+
+/// \brief Options for scatter function
+class ARROW_EXPORT ScatterOptions : public FunctionOptions {
+ public:
+  explicit ScatterOptions(int64_t max_index = -1);
+  static constexpr char const kTypeName[] = "ScatterOptions";
+  static ScatterOptions Defaults() { return ScatterOptions(); }
+
+  /// \brief The max value in the input indices to allow. The length of the function's
+  /// output will be this value plus 1. If negative, this value will be set to the length
+  /// of the input indices minus 1 and the length of the function's output will be the
+  /// length of the input indices.
+  int64_t max_index = -1;
+};
+
 /// @}
 
 /// \brief Filter with a boolean selection filter
@@ -705,5 +739,58 @@ Result<std::shared_ptr<Array>> PairwiseDiff(const Array& array,
                                             bool check_overflow = false,
                                             ExecContext* ctx = NULLPTR);
 
+/// \brief Return the inverse permutation of the given indices.
+///
+/// For indices[i] = x, inverse_permutation[x] = i. And inverse_permutation[x] = null if x
+/// does not appear in the input indices. Indices must be in the range of [0, max_index],
+/// or null, which will be ignored. If multiple indices point to the same value, the last
+/// one is used.
+///
+/// For example, with
+///   indices = [null, 0, null, 2, 4, 1, 1]
+/// the inverse permutation is
+///   [1, 6, 3, null, 4, null, null]
+/// if max_index = 6.
+///
+/// \param[in] indices array-like indices
+/// \param[in] options configures the max index and the output type
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting inverse permutation
+///
+/// \since 20.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> InversePermutation(
+    const Datum& indices,
+    const InversePermutationOptions& options = InversePermutationOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
+
+/// \brief Scatter the values into specified positions according to the indices.
+///
+/// For indices[i] = x, output[x] = values[i]. And output[x] = null if x does not appear
+/// in the input indices. Indices must be in the range of [0, max_index], or null, in
+/// which case the corresponding value will be ignored. If multiple indices point to the
+/// same value, the last one is used.
+///
+/// For example, with
+///   values = [a, b, c, d, e, f, g]
+///   indices = [null, 0, null, 2, 4, 1, 1]
+/// the output is
+///   [b, g, d, null, e, null, null]
+/// if max_index = 6.
+///
+/// \param[in] values datum to scatter
+/// \param[in] indices array-like indices
+/// \param[in] options configures the max index of to scatter
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 20.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Scatter(const Datum& values, const Datum& indices,
+                      const ScatterOptions& options = ScatterOptions::Defaults(),
+                      ExecContext* ctx = NULLPTR);
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/function_test.cc b/cpp/src/arrow/compute/function_test.cc
index c269de0763217..b7d017d482013 100644
--- a/cpp/src/arrow/compute/function_test.cc
+++ b/cpp/src/arrow/compute/function_test.cc
@@ -136,6 +136,10 @@ TEST(FunctionOptions, Equality) {
   options.emplace_back(new SelectKOptions(5, {{SortKey("key", SortOrder::Ascending)}}));
   options.emplace_back(new Utf8NormalizeOptions());
   options.emplace_back(new Utf8NormalizeOptions(Utf8NormalizeOptions::NFD));
+  options.emplace_back(
+      new InversePermutationOptions(/*max_index=*/42, /*output_type=*/int32()));
+  options.emplace_back(new ScatterOptions());
+  options.emplace_back(new ScatterOptions(/*max_index=*/42));
 
   for (size_t i = 0; i < options.size(); i++) {
     const size_t prev_i = i == 0 ? options.size() - 1 : i - 1;
diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt b/cpp/src/arrow/compute/kernels/CMakeLists.txt
index 7c7b9c8b68d45..4dedd1f23e090 100644
--- a/cpp/src/arrow/compute/kernels/CMakeLists.txt
+++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt
@@ -18,9 +18,9 @@
 # ----------------------------------------------------------------------
 # Tests that don't require the full kernel library
 
-# Define arrow_compute_testing object library for common test files
+# Define arrow_compute_kernels_testing object library for common test files
 if(ARROW_TESTING)
-  add_library(arrow_compute_kernels_testing OBJECT test_util.cc)
+  add_library(arrow_compute_kernels_testing OBJECT test_util_internal.cc)
   # Even though this is still just an object library we still need to "link" our
   # dependencies so that include paths are configured correctly
   target_link_libraries(arrow_compute_kernels_testing PUBLIC ${ARROW_GTEST_GMOCK})
@@ -31,12 +31,14 @@ add_arrow_test(scalar_cast_test
                SOURCES
                scalar_cast_test.cc
                EXTRA_LINK_LIBS
-               arrow_compute_kernels_testing)
+               arrow_compute_kernels_testing
+               arrow_compute_testing)
 
 # ----------------------------------------------------------------------
 # Scalar kernels
 
-set(ARROW_COMPUTE_SCALAR_TYPE_TEST_LINK_LIBS arrow_compute_kernels_testing)
+set(ARROW_COMPUTE_SCALAR_TYPE_TEST_LINK_LIBS arrow_compute_kernels_testing
+                                             arrow_compute_testing)
 if(ARROW_WITH_UTF8PROC)
   list(APPEND ARROW_COMPUTE_SCALAR_TYPE_TEST_LINK_LIBS utf8proc::utf8proc)
 endif()
@@ -52,13 +54,15 @@ add_arrow_compute_test(scalar_if_else_test
                        SOURCES
                        scalar_if_else_test.cc
                        EXTRA_LINK_LIBS
-                       arrow_compute_kernels_testing)
+                       arrow_compute_kernels_testing
+                       arrow_compute_testing)
 
 add_arrow_compute_test(scalar_temporal_test
                        SOURCES
                        scalar_temporal_test.cc
                        EXTRA_LINK_LIBS
-                       arrow_compute_kernels_testing)
+                       arrow_compute_kernels_testing
+                       arrow_compute_testing)
 
 add_arrow_compute_test(scalar_math_test
                        SOURCES
@@ -66,7 +70,8 @@ add_arrow_compute_test(scalar_math_test
                        scalar_compare_test.cc
                        scalar_round_arithmetic_test.cc
                        EXTRA_LINK_LIBS
-                       arrow_compute_kernels_testing)
+                       arrow_compute_kernels_testing
+                       arrow_compute_testing)
 
 add_arrow_compute_test(scalar_utility_test
                        SOURCES
@@ -74,7 +79,8 @@ add_arrow_compute_test(scalar_utility_test
                        scalar_set_lookup_test.cc
                        scalar_validity_test.cc
                        EXTRA_LINK_LIBS
-                       arrow_compute_kernels_testing)
+                       arrow_compute_kernels_testing
+                       arrow_compute_testing)
 
 add_arrow_benchmark(scalar_arithmetic_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(scalar_boolean_benchmark PREFIX "arrow-compute")
@@ -101,19 +107,29 @@ add_arrow_compute_test(vector_test
                        vector_run_end_encode_test.cc
                        select_k_test.cc
                        EXTRA_LINK_LIBS
-                       arrow_compute_kernels_testing)
+                       arrow_compute_kernels_testing
+                       arrow_compute_testing)
 
 add_arrow_compute_test(vector_sort_test
                        SOURCES
                        vector_sort_test.cc
                        EXTRA_LINK_LIBS
-                       arrow_compute_kernels_testing)
+                       arrow_compute_kernels_testing
+                       arrow_compute_testing)
 
 add_arrow_compute_test(vector_selection_test
                        SOURCES
                        vector_selection_test.cc
                        EXTRA_LINK_LIBS
-                       arrow_compute_kernels_testing)
+                       arrow_compute_kernels_testing
+                       arrow_compute_testing)
+
+add_arrow_compute_test(vector_swizzle_test
+                       SOURCES
+                       vector_swizzle_test.cc
+                       EXTRA_LINK_LIBS
+                       arrow_compute_kernels_testing
+                       arrow_compute_testing)
 
 add_arrow_benchmark(vector_hash_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(vector_sort_benchmark PREFIX "arrow-compute")
@@ -132,6 +148,7 @@ add_arrow_compute_test(aggregate_test
                        aggregate_test.cc
                        EXTRA_LINK_LIBS
                        arrow_compute_kernels_testing
+                       arrow_compute_testing
                        Boost::headers)
 
 # ----------------------------------------------------------------------
diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc b/cpp/src/arrow/compute/kernels/aggregate_test.cc
index 65439af2748b5..e6ad915fd5667 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc
@@ -32,7 +32,7 @@
 #include "arrow/compute/api_vector.h"
 #include "arrow/compute/cast.h"
 #include "arrow/compute/kernels/aggregate_internal.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/kernels/test_util_internal.h"
 #include "arrow/compute/registry.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h
index 594bd1fce0b84..2a492f581f53b 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.h
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.h
@@ -1037,8 +1037,9 @@ ArrayKernelExec GenerateFloatingPoint(detail::GetTypeId get_id) {
 // Generate a kernel given a templated functor for integer types
 //
 // See "Numeric" above for description of the generator functor
-template <template <typename...> class Generator, typename Type0, typename... Args>
-ArrayKernelExec GenerateInteger(detail::GetTypeId get_id) {
+template <template <typename...> class Generator, typename Type0,
+          typename KernelType = ArrayKernelExec, typename... Args>
+KernelType GenerateInteger(detail::GetTypeId get_id) {
   switch (get_id.id) {
     case Type::INT8:
       return Generator<Type0, Int8Type, Args...>::Exec;
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_benchmark.cc
index 17e9951d69bc2..908c642bae8d8 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_benchmark.cc
@@ -19,8 +19,10 @@
 
 #include <vector>
 
+#include "arrow/array.h"
+#include "arrow/chunked_array.h"
 #include "arrow/compute/api_scalar.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/datum.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/util/benchmark_util.h"
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
index 9a1a569081d9a..1162dad855da8 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
@@ -29,7 +29,7 @@
 #include "arrow/array.h"
 #include "arrow/buffer.h"
 #include "arrow/compute/api.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/kernels/test_util_internal.h"
 #include "arrow/datum.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
diff --git a/cpp/src/arrow/compute/kernels/scalar_boolean_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_boolean_benchmark.cc
index 969b91a141733..89091186ae289 100644
--- a/cpp/src/arrow/compute/kernels/scalar_boolean_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_boolean_benchmark.cc
@@ -20,7 +20,6 @@
 #include <vector>
 
 #include "arrow/compute/api_scalar.h"
-#include "arrow/compute/kernels/test_util.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/util/benchmark_util.h"
diff --git a/cpp/src/arrow/compute/kernels/scalar_boolean_test.cc b/cpp/src/arrow/compute/kernels/scalar_boolean_test.cc
index fe6ca527bfd70..a8d7cab5f5ad9 100644
--- a/cpp/src/arrow/compute/kernels/scalar_boolean_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_boolean_test.cc
@@ -24,7 +24,7 @@
 
 #include "arrow/chunked_array.h"
 #include "arrow/compute/api_scalar.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/kernels/test_util_internal.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/util/checked_cast.h"
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_cast_benchmark.cc
index 8eea8725ddf61..04749a5bea254 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_benchmark.cc
@@ -20,7 +20,6 @@
 #include <vector>
 
 #include "arrow/compute/cast.h"
-#include "arrow/compute/kernels/test_util.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/util/benchmark_util.h"
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index 80d5b3c46cae1..d7f73e2bb7bfe 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -46,7 +46,7 @@
 #include "arrow/compute/cast.h"
 #include "arrow/compute/kernel.h"
 #include "arrow/compute/kernels/codegen_internal.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/kernels/test_util_internal.h"
 
 namespace arrow {
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_compare_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_compare_benchmark.cc
index 126838d757539..fdfd63498f5a8 100644
--- a/cpp/src/arrow/compute/kernels/scalar_compare_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_compare_benchmark.cc
@@ -20,7 +20,7 @@
 #include <vector>
 
 #include "arrow/compute/api_scalar.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/kernels/test_util_internal.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/util/benchmark_util.h"
diff --git a/cpp/src/arrow/compute/kernels/scalar_compare_test.cc b/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
index 8f5952b40500a..b505e11a27d53 100644
--- a/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
@@ -26,7 +26,7 @@
 
 #include "arrow/array.h"
 #include "arrow/compute/api.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/kernels/test_util_internal.h"
 #include "arrow/testing/builder.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/matchers.h"
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
index 9a0ca325277dc..76ad19f3c4833 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
@@ -23,7 +23,7 @@
 #include "arrow/array/concatenate.h"
 #include "arrow/compute/api_scalar.h"
 #include "arrow/compute/cast.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/kernels/test_util_internal.h"
 #include "arrow/compute/registry.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/util/checked_cast.h"
diff --git a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
index b6a6cac1b4382..f199f56aa2f0c 100644
--- a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
@@ -20,7 +20,7 @@
 #include "arrow/chunked_array.h"
 #include "arrow/compute/api.h"
 #include "arrow/compute/api_scalar.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/kernels/test_util_internal.h"
 #include "arrow/result.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/matchers.h"
diff --git a/cpp/src/arrow/compute/kernels/scalar_random_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_random_benchmark.cc
index 23b9a7422e2ee..bf4a339a9bd81 100644
--- a/cpp/src/arrow/compute/kernels/scalar_random_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_random_benchmark.cc
@@ -18,7 +18,7 @@
 #include "benchmark/benchmark.h"
 
 #include "arrow/compute/api_scalar.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/exec.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/util/benchmark_util.h"
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_random_test.cc b/cpp/src/arrow/compute/kernels/scalar_random_test.cc
index 81c0c90cb6b43..ff90d0c332ab4 100644
--- a/cpp/src/arrow/compute/kernels/scalar_random_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_random_test.cc
@@ -18,7 +18,7 @@
 #include <gtest/gtest.h>
 
 #include "arrow/compute/api.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/kernels/test_util_internal.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/util/thread_pool.h"
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_round_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_round_arithmetic_test.cc
index 3d205b0451dc2..a572af4195c60 100644
--- a/cpp/src/arrow/compute/kernels/scalar_round_arithmetic_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_round_arithmetic_test.cc
@@ -26,7 +26,7 @@
 #include "arrow/array.h"
 #include "arrow/buffer.h"
 #include "arrow/compute/api.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/kernels/test_util_internal.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/bit_util.h"
diff --git a/cpp/src/arrow/compute/kernels/scalar_round_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_round_benchmark.cc
index 7f1b5ef710379..1d6cef29ff154 100644
--- a/cpp/src/arrow/compute/kernels/scalar_round_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_round_benchmark.cc
@@ -19,8 +19,10 @@
 
 #include <vector>
 
+#include "arrow/array.h"
+#include "arrow/chunked_array.h"
 #include "arrow/compute/api_scalar.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/datum.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/util/benchmark_util.h"
diff --git a/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc
index 9158c518b41a0..c1adc30616dfe 100644
--- a/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc
@@ -19,7 +19,6 @@
 
 #include "arrow/compute/api_scalar.h"
 #include "arrow/compute/kernels/common_internal.h"
-#include "arrow/compute/kernels/test_util.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/util/benchmark_util.h"
diff --git a/cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc b/cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc
index 89e10d1b54103..dbd8948992041 100644
--- a/cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc
@@ -33,7 +33,7 @@
 #include "arrow/array/builder_primitive.h"
 #include "arrow/chunked_array.h"
 #include "arrow/compute/api.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/kernels/test_util_internal.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/testing/gtest_compat.h"
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc
index 909c89dbe4795..a2cfbd0597437 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc
@@ -19,10 +19,14 @@
 
 #include "benchmark/benchmark.h"
 
+#include "arrow/array.h"
+#include "arrow/chunked_array.h"
 #include "arrow/compute/api_scalar.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/exec.h"
+#include "arrow/datum.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
+#include "arrow/type_fwd.h"
 #include "arrow/util/benchmark_util.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/config.h"
@@ -180,7 +184,7 @@ static void BinaryJoinElementWise(benchmark::State& state,
 
   random::RandomArrayGenerator rng(kSeed);
 
-  DatumVector args;
+  std::vector<Datum> args;
   ArrayVector strings;
   int64_t total_values_length = 0;
   for (int i = 0; i < n_cols; i++) {
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index 59a22b9926456..38455dc146711 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -26,7 +26,7 @@
 #include "arrow/compute/api_scalar.h"
 #include "arrow/compute/exec.h"
 #include "arrow/compute/kernels/codegen_internal.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/kernels/test_util_internal.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/type.h"
 #include "arrow/type_fwd.h"
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_benchmark.cc
index 29c2acfd0a1e2..780e90c087e56 100644
--- a/cpp/src/arrow/compute/kernels/scalar_temporal_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal_benchmark.cc
@@ -20,7 +20,6 @@
 #include "benchmark/benchmark.h"
 
 #include "arrow/compute/api_scalar.h"
-#include "arrow/compute/kernels/test_util.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/util/benchmark_util.h"
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
index c36fbf77eafb1..6f92036f55b44 100644
--- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
@@ -21,7 +21,7 @@
 
 #include "arrow/compute/api_scalar.h"
 #include "arrow/compute/cast.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/kernels/test_util_internal.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/matchers.h"
 #include "arrow/testing/util.h"
diff --git a/cpp/src/arrow/compute/kernels/scalar_validity_test.cc b/cpp/src/arrow/compute/kernels/scalar_validity_test.cc
index d1462838f3be6..2d1167a18137d 100644
--- a/cpp/src/arrow/compute/kernels/scalar_validity_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_validity_test.cc
@@ -19,7 +19,7 @@
 
 #include "arrow/array.h"
 #include "arrow/compute/api.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/kernels/test_util_internal.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/type.h"
diff --git a/cpp/src/arrow/compute/kernels/select_k_test.cc b/cpp/src/arrow/compute/kernels/select_k_test.cc
index c9dbe0bd4c075..05813ae6e70b5 100644
--- a/cpp/src/arrow/compute/kernels/select_k_test.cc
+++ b/cpp/src/arrow/compute/kernels/select_k_test.cc
@@ -25,7 +25,7 @@
 #include "arrow/array/array_decimal.h"
 #include "arrow/array/concatenate.h"
 #include "arrow/compute/api_vector.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/kernels/test_util_internal.h"
 #include "arrow/compute/kernels/util_internal.h"
 #include "arrow/table.h"
 #include "arrow/testing/gtest_util.h"
diff --git a/cpp/src/arrow/compute/kernels/test_util.cc b/cpp/src/arrow/compute/kernels/test_util_internal.cc
similarity index 89%
rename from cpp/src/arrow/compute/kernels/test_util.cc
rename to cpp/src/arrow/compute/kernels/test_util_internal.cc
index 2217787663a63..d48b3b0781e7a 100644
--- a/cpp/src/arrow/compute/kernels/test_util.cc
+++ b/cpp/src/arrow/compute/kernels/test_util_internal.cc
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/kernels/test_util_internal.h"
 
 #include <cstdint>
 #include <memory>
@@ -281,62 +281,6 @@ void CheckScalarBinaryCommutative(std::string func_name, Datum left_input,
   CheckScalar(func_name, {right_input, left_input}, expected, options);
 }
 
-namespace {
-
-void ValidateOutputImpl(const ArrayData& output) {
-  ASSERT_OK(::arrow::internal::ValidateArrayFull(output));
-  TestInitialized(output);
-}
-
-void ValidateOutputImpl(const ChunkedArray& output) {
-  ASSERT_OK(output.ValidateFull());
-  for (const auto& chunk : output.chunks()) {
-    TestInitialized(*chunk);
-  }
-}
-
-void ValidateOutputImpl(const RecordBatch& output) {
-  ASSERT_OK(output.ValidateFull());
-  for (const auto& column : output.column_data()) {
-    TestInitialized(*column);
-  }
-}
-
-void ValidateOutputImpl(const Table& output) {
-  ASSERT_OK(output.ValidateFull());
-  for (const auto& column : output.columns()) {
-    for (const auto& chunk : column->chunks()) {
-      TestInitialized(*chunk);
-    }
-  }
-}
-
-void ValidateOutputImpl(const Scalar& output) { ASSERT_OK(output.ValidateFull()); }
-
-}  // namespace
-
-void ValidateOutput(const Datum& output) {
-  switch (output.kind()) {
-    case Datum::ARRAY:
-      ValidateOutputImpl(*output.array());
-      break;
-    case Datum::CHUNKED_ARRAY:
-      ValidateOutputImpl(*output.chunked_array());
-      break;
-    case Datum::RECORD_BATCH:
-      ValidateOutputImpl(*output.record_batch());
-      break;
-    case Datum::TABLE:
-      ValidateOutputImpl(*output.table());
-      break;
-    case Datum::SCALAR:
-      ValidateOutputImpl(*output.scalar());
-      break;
-    default:
-      break;
-  }
-}
-
 void CheckDispatchBest(std::string func_name, std::vector<TypeHolder> original_values,
                        std::vector<TypeHolder> expected_equivalent_values) {
   ASSERT_OK_AND_ASSIGN(auto function, GetFunctionRegistry()->GetFunction(func_name));
diff --git a/cpp/src/arrow/compute/kernels/test_util.h b/cpp/src/arrow/compute/kernels/test_util_internal.h
similarity index 99%
rename from cpp/src/arrow/compute/kernels/test_util.h
rename to cpp/src/arrow/compute/kernels/test_util_internal.h
index 11e77caeff861..e3a27ab9addbc 100644
--- a/cpp/src/arrow/compute/kernels/test_util.h
+++ b/cpp/src/arrow/compute/kernels/test_util_internal.h
@@ -29,6 +29,7 @@
 #include "arrow/compute/api_scalar.h"
 #include "arrow/compute/cast.h"
 #include "arrow/compute/kernel.h"
+#include "arrow/compute/test_util_internal.h"
 #include "arrow/datum.h"
 #include "arrow/memory_pool.h"
 #include "arrow/pretty_print.h"
@@ -131,8 +132,6 @@ void CheckScalarBinaryCommutative(std::string func_name, Datum left_input,
 void CheckVectorUnary(std::string func_name, Datum input, Datum expected,
                       const FunctionOptions* options = nullptr);
 
-void ValidateOutput(const Datum& output);
-
 static constexpr random::SeedType kRandomSeed = 0x0ff1ce;
 
 template <template <typename> class DoTestFunctor>
diff --git a/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc b/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc
index 6760be26fc521..53c28032b8261 100644
--- a/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc
@@ -31,7 +31,7 @@
 
 #include "arrow/array/builder_primitive.h"
 #include "arrow/compute/api.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/kernels/test_util_internal.h"
 #include "arrow/type_fwd.h"
 
 namespace arrow {
diff --git a/cpp/src/arrow/compute/kernels/vector_hash_test.cc b/cpp/src/arrow/compute/kernels/vector_hash_test.cc
index 7f2325d4ffc41..0a966a66f4fbb 100644
--- a/cpp/src/arrow/compute/kernels/vector_hash_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_hash_test.cc
@@ -41,7 +41,7 @@
 #include "arrow/util/decimal.h"
 
 #include "arrow/compute/api.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/kernels/test_util_internal.h"
 
 #include "arrow/ipc/json_simple.h"
 
diff --git a/cpp/src/arrow/compute/kernels/vector_nested_test.cc b/cpp/src/arrow/compute/kernels/vector_nested_test.cc
index da751fa5de403..07f9c44db52a4 100644
--- a/cpp/src/arrow/compute/kernels/vector_nested_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_nested_test.cc
@@ -20,7 +20,7 @@
 #include "arrow/chunked_array.h"
 #include "arrow/compute/api.h"
 #include "arrow/compute/api_vector.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/kernels/test_util_internal.h"
 #include "arrow/result.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/util/checked_cast.h"
diff --git a/cpp/src/arrow/compute/kernels/vector_pairwise_test.cc b/cpp/src/arrow/compute/kernels/vector_pairwise_test.cc
index 8cac602dc1608..cae9469c3c939 100644
--- a/cpp/src/arrow/compute/kernels/vector_pairwise_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_pairwise_test.cc
@@ -19,7 +19,7 @@
 #include <memory>
 #include <string>
 #include "arrow/compute/api_vector.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/kernels/test_util_internal.h"
 #include "arrow/compute/registry.h"
 #include "arrow/compute/type_fwd.h"
 #include "arrow/testing/gtest_util.h"
diff --git a/cpp/src/arrow/compute/kernels/vector_partition_benchmark.cc b/cpp/src/arrow/compute/kernels/vector_partition_benchmark.cc
index f21dd8317e493..7f199b25b7622 100644
--- a/cpp/src/arrow/compute/kernels/vector_partition_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/vector_partition_benchmark.cc
@@ -17,8 +17,9 @@
 
 #include "benchmark/benchmark.h"
 
+#include "arrow/array/array_base.h"
+#include "arrow/chunked_array.h"
 #include "arrow/compute/api_vector.h"
-#include "arrow/compute/kernels/test_util.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/util/benchmark_util.h"
diff --git a/cpp/src/arrow/compute/kernels/vector_replace_test.cc b/cpp/src/arrow/compute/kernels/vector_replace_test.cc
index 39d6ff1d470f1..31afbff33672f 100644
--- a/cpp/src/arrow/compute/kernels/vector_replace_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_replace_test.cc
@@ -21,7 +21,7 @@
 #include "arrow/array/concatenate.h"
 #include "arrow/chunked_array.h"
 #include "arrow/compute/api_vector.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/kernels/test_util_internal.h"
 #include "arrow/testing/generator.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/util/checked_cast.h"
diff --git a/cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc b/cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc
index 75affd32560f0..040dfc9656d79 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc
@@ -21,8 +21,12 @@
 #include <cstdint>
 #include <sstream>
 
+#include "arrow/array.h"
+#include "arrow/chunked_array.h"
 #include "arrow/compute/api_vector.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/datum.h"
+#include "arrow/record_batch.h"
+#include "arrow/table.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/util/benchmark_util.h"
diff --git a/cpp/src/arrow/compute/kernels/vector_selection_test.cc b/cpp/src/arrow/compute/kernels/vector_selection_test.cc
index b38f3fcbd8ccd..5fa2d6824dc17 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection_test.cc
@@ -27,7 +27,7 @@
 #include "arrow/array/concatenate.h"
 #include "arrow/chunked_array.h"
 #include "arrow/compute/api.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/kernels/test_util_internal.h"
 #include "arrow/scalar.h"
 #include "arrow/table.h"
 #include "arrow/testing/builder.h"
diff --git a/cpp/src/arrow/compute/kernels/vector_sort_benchmark.cc b/cpp/src/arrow/compute/kernels/vector_sort_benchmark.cc
index 0cefddff5a10d..5c31d4da38add 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort_benchmark.cc
@@ -17,8 +17,11 @@
 
 #include "benchmark/benchmark.h"
 
+#include "arrow/array.h"
+#include "arrow/chunked_array.h"
 #include "arrow/compute/api_vector.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/exec.h"
+#include "arrow/datum.h"
 #include "arrow/table.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
diff --git a/cpp/src/arrow/compute/kernels/vector_sort_test.cc b/cpp/src/arrow/compute/kernels/vector_sort_test.cc
index 1328dddc04153..7f0ef641f6ceb 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort_test.cc
@@ -29,7 +29,7 @@
 #include "arrow/array/array_decimal.h"
 #include "arrow/array/concatenate.h"
 #include "arrow/compute/api_vector.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/kernels/test_util_internal.h"
 #include "arrow/result.h"
 #include "arrow/table.h"
 #include "arrow/testing/gtest_util.h"
diff --git a/cpp/src/arrow/compute/kernels/vector_swizzle.cc b/cpp/src/arrow/compute/kernels/vector_swizzle.cc
new file mode 100644
index 0000000000000..43eeb37c94a8a
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/vector_swizzle.cc
@@ -0,0 +1,421 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.#include "arrow/compute/api_vector.h"
+
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/function.h"
+#include "arrow/compute/kernels/codegen_internal.h"
+#include "arrow/compute/registry.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+
+namespace arrow::compute::internal {
+
+namespace {
+
+// ----------------------------------------------------------------------
+// InversePermutation
+
+const FunctionDoc inverse_permutation_doc(
+    "Return the inverse permutation of the given indices",
+    "For the `i`-th `index` in `indices`, the `index`-th output is `i`", {"indices"});
+
+const InversePermutationOptions* GetDefaultInversePermutationOptions() {
+  static const auto kDefaultInversePermutationOptions =
+      InversePermutationOptions::Defaults();
+  return &kDefaultInversePermutationOptions;
+}
+
+using InversePermutationState = OptionsWrapper<InversePermutationOptions>;
+
+/// Resolve the output type of inverse_permutation. The output type is specified in the
+/// options, and if null, set it to the input type. The output type must be signed
+/// integer.
+Result<TypeHolder> ResolveInversePermutationOutputType(
+    KernelContext* ctx, const std::vector<TypeHolder>& input_types) {
+  DCHECK_EQ(input_types.size(), 1);
+  DCHECK_NE(input_types[0], nullptr);
+
+  std::shared_ptr<DataType> output_type = InversePermutationState::Get(ctx).output_type;
+  if (!output_type) {
+    output_type = input_types[0].owned_type;
+  }
+  if (!is_signed_integer(output_type->id())) {
+    return Status::TypeError(
+        "Output type of inverse_permutation must be signed integer, got " +
+        output_type->ToString());
+  }
+
+  return TypeHolder(std::move(output_type));
+}
+
+template <typename ExecType>
+struct InversePermutationImpl {
+  using ThisType = InversePermutationImpl<ExecType>;
+  using IndexType = typename ExecType::IndexType;
+  using IndexCType = typename IndexType::c_type;
+  using ShapeType = typename ExecType::ShapeType;
+
+  static Result<std::shared_ptr<ArrayData>> Exec(
+      KernelContext* ctx, const ShapeType& indices, int64_t input_length,
+      const std::shared_ptr<DataType>& input_type) {
+    const auto& options = InversePermutationState::Get(ctx);
+
+    // Apply default options semantics.
+    int64_t output_length = options.max_index < 0 ? input_length : options.max_index + 1;
+    std::shared_ptr<DataType> output_type = options.output_type;
+    if (!output_type) {
+      output_type = input_type;
+    }
+
+    ThisType impl(ctx, indices, input_length, output_length);
+    RETURN_NOT_OK(VisitTypeInline(*output_type, &impl));
+
+    return ArrayData::Make(std::move(output_type), output_length,
+                           {std::move(impl.validity_buf_), std::move(impl.data_buf_)});
+  }
+
+  template <typename Type>
+  enable_if_t<is_integer_type<Type>::value, Status> Visit(const Type& output_type) {
+    using OutputCType = typename Type::c_type;
+
+    RETURN_NOT_OK(CheckInput(output_type));
+
+    // Dispatch the execution based on whether there are likely many nulls in the output.
+    // - If many nulls (i.e. the output is "sparse"), preallocate an all-false validity
+    // buffer and a zero-initialized data buffer (just to avoid exposing previous memory
+    // contents - even if it is shadowed by the validity bit). The subsequent processing
+    // will fill the valid values only.
+    // - Otherwise (i.e. the output is "dense"), the validity buffer is lazily allocated
+    // and initialized all-true in the subsequent processing only when needed. The data
+    // buffer is preallocated and filled with "impossible" values (that is, input_length -
+    // note that the range of inverse_permutation is [0, input_length)) for the subsequent
+    // processing to detect validity.
+    if (LikelyManyNulls()) {
+      RETURN_NOT_OK(AllocateValidityBufAndFill(false));
+      RETURN_NOT_OK(AllocateDataBufAndZero(output_type));
+      return Execute<Type, true>();
+    } else {
+      RETURN_NOT_OK(
+          AllocateDataBufAndFill(output_type, static_cast<OutputCType>(input_length_)));
+      return Execute<Type, false>();
+    }
+  }
+
+  Status Visit(const DataType& output_type) {
+    DCHECK(false) << "Shouldn't reach here";
+    return Status::Invalid("Shouldn't reach here");
+  }
+
+ private:
+  KernelContext* ctx_;
+  const ShapeType& indices_;
+  const int64_t input_length_;
+  const int64_t output_length_;
+
+  std::shared_ptr<Buffer> validity_buf_;
+  std::shared_ptr<Buffer> data_buf_;
+
+  InversePermutationImpl(KernelContext* ctx, const ShapeType& indices,
+                         int64_t input_length, int64_t output_length)
+      : ctx_(ctx),
+        indices_(indices),
+        input_length_(input_length),
+        output_length_(output_length) {}
+
+  template <typename Type>
+  Status CheckInput(const Type& output_type) {
+    using OutputCType = typename Type::c_type;
+
+    if (static_cast<int64_t>(std::numeric_limits<OutputCType>::max()) < input_length_) {
+      return Status::Invalid(
+          "Output type " + output_type.ToString() +
+          " of inverse_permutation is insufficient to store indices of length " +
+          std::to_string(input_length_));
+    }
+
+    return Status::OK();
+  }
+
+  bool LikelyManyNulls() { return output_length_ > 2 * input_length_; }
+
+  Status AllocateValidityBufAndFill(bool valid) {
+    DCHECK_EQ(validity_buf_, nullptr);
+
+    ARROW_ASSIGN_OR_RAISE(validity_buf_, ctx_->Allocate(output_length_));
+    auto validity = validity_buf_->mutable_data_as<uint8_t>();
+    std::memset(validity, valid ? 0xff : 0, validity_buf_->capacity());
+
+    return Status::OK();
+  }
+
+  Status AllocateDataBuf(const DataType& output_type) {
+    DCHECK_EQ(data_buf_, nullptr);
+
+    ARROW_ASSIGN_OR_RAISE(data_buf_,
+                          ctx_->Allocate(output_length_ * output_type.byte_width()));
+
+    return Status::OK();
+  }
+
+  Status AllocateDataBufAndZero(const DataType& output_type) {
+    RETURN_NOT_OK(AllocateDataBuf(output_type));
+
+    uint8_t* data = data_buf_->mutable_data();
+    std::memset(data, 0, output_length_ * output_type.byte_width());
+
+    return Status::OK();
+  }
+
+  template <typename Type, typename OutputCType = typename Type::c_type>
+  Status AllocateDataBufAndFill(const Type& output_type, OutputCType value) {
+    RETURN_NOT_OK(AllocateDataBuf(output_type));
+
+    OutputCType* data = data_buf_->mutable_data_as<OutputCType>();
+    for (int64_t i = 0; i < output_length_; ++i) {
+      data[i] = value;
+    }
+
+    return Status::OK();
+  }
+
+  template <typename Type, bool likely_many_nulls>
+  Status Execute() {
+    using OutputCType = typename Type::c_type;
+
+    uint8_t* validity = nullptr;
+    if constexpr (likely_many_nulls) {
+      DCHECK_NE(validity_buf_, nullptr);
+      validity = validity_buf_->mutable_data_as<uint8_t>();
+    } else {
+      DCHECK_EQ(validity_buf_, nullptr);
+    }
+    DCHECK_NE(data_buf_, nullptr);
+    OutputCType* data = data_buf_->mutable_data_as<OutputCType>();
+    int64_t inverse = 0;
+    RETURN_NOT_OK(ExecType::VisitIndices(
+        indices_,
+        [&](IndexCType index) {
+          if (ARROW_PREDICT_FALSE(index < 0 ||
+                                  static_cast<int64_t>(index) >= output_length_)) {
+            return Status::IndexError("Index out of bounds: ", std::to_string(index));
+          }
+          data[index] = static_cast<OutputCType>(inverse);
+          // If many nulls, set validity to true for valid values.
+          if constexpr (likely_many_nulls) {
+            bit_util::SetBitTo(validity, index, true);
+          }
+          ++inverse;
+          return Status::OK();
+        },
+        [&]() {
+          ++inverse;
+          return Status::OK();
+        }));
+
+    // If not many nulls, run another pass iterating over the data to set the validity
+    // to false if the value is "impossible". The validity buffer is on demand allocated
+    // and initialized all-true when the first "impossible" value is seen.
+    if constexpr (!likely_many_nulls) {
+      for (int64_t i = 0; i < output_length_; ++i) {
+        if (data[i] == static_cast<OutputCType>(input_length_)) {
+          if (ARROW_PREDICT_FALSE(!validity_buf_)) {
+            RETURN_NOT_OK(AllocateValidityBufAndFill(true));
+            validity = validity_buf_->mutable_data_as<uint8_t>();
+          }
+          bit_util::SetBitTo(validity, i, false);
+        }
+      }
+    }
+
+    return Status::OK();
+  }
+};
+
+template <typename Ignored, typename Type>
+struct InversePermutation {
+  using ThisType = InversePermutation<Ignored, Type>;
+  using IndexType = Type;
+  using ShapeType = ArraySpan;
+
+  template <typename ValidFunc, typename NullFunc>
+  static Status VisitIndices(const ArraySpan& span, ValidFunc&& valid_func,
+                             NullFunc&& null_func) {
+    return VisitArraySpanInline<IndexType>(span, std::forward<ValidFunc>(valid_func),
+                                           std::forward<NullFunc>(null_func));
+  }
+
+  static Status Exec(KernelContext* ctx, const ExecSpan& span, ExecResult* result) {
+    DCHECK_EQ(span.num_values(), 1);
+    DCHECK(span[0].is_array());
+    const auto& indices = span[0].array;
+    ARROW_ASSIGN_OR_RAISE(
+        result->value, InversePermutationImpl<ThisType>::Exec(
+                           ctx, indices, indices.length, indices.type->GetSharedPtr()));
+    return Status::OK();
+  }
+};
+
+template <typename Ignored, typename Type>
+struct InversePermutationChunked {
+  using ThisType = InversePermutationChunked<Ignored, Type>;
+  using IndexType = Type;
+  using ShapeType = std::shared_ptr<ChunkedArray>;
+
+  template <typename ValidFunc, typename NullFunc>
+  static Status VisitIndices(const std::shared_ptr<ChunkedArray>& chunked_array,
+                             ValidFunc&& valid_func, NullFunc&& null_func) {
+    for (const auto& chunk : chunked_array->chunks()) {
+      ArraySpan span(*chunk->data());
+      RETURN_NOT_OK(VisitArraySpanInline<IndexType>(
+          span, std::forward<ValidFunc>(valid_func), std::forward<NullFunc>(null_func)));
+    }
+    return Status::OK();
+  }
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* result) {
+    DCHECK_EQ(batch.num_values(), 1);
+    DCHECK(batch[0].is_chunked_array());
+    const auto& indices = batch[0].chunked_array();
+    ARROW_ASSIGN_OR_RAISE(auto inverse_permutation,
+                          InversePermutationImpl<ThisType>::Exec(
+                              ctx, indices, indices->length(), indices->type()));
+    *result =
+        Datum(std::make_shared<ChunkedArray>(MakeArray(std::move(inverse_permutation))));
+    return Status::OK();
+  }
+};
+
+void RegisterVectorInversePermutation(FunctionRegistry* registry) {
+  auto function = std::make_shared<VectorFunction>("inverse_permutation", Arity::Unary(),
+                                                   inverse_permutation_doc,
+                                                   GetDefaultInversePermutationOptions());
+
+  auto add_kernel = [&function](Type::type type_id) {
+    VectorKernel kernel;
+    kernel.signature =
+        KernelSignature::Make({InputType(match::SameTypeId(type_id))},
+                              OutputType(ResolveInversePermutationOutputType));
+    kernel.init = InversePermutationState::Init;
+    kernel.exec = GenerateInteger<InversePermutation, void, ArrayKernelExec>(type_id);
+    kernel.exec_chunked =
+        GenerateInteger<InversePermutationChunked, void, VectorKernel::ChunkedExec>(
+            type_id);
+    kernel.can_execute_chunkwise = false;
+    kernel.output_chunked = false;
+    DCHECK_OK(function->AddKernel(std::move(kernel)));
+  };
+  for (const auto& t : SignedIntTypes()) {
+    add_kernel(t->id());
+  }
+
+  DCHECK_OK(registry->AddFunction(std::move(function)));
+}
+
+// ----------------------------------------------------------------------
+// Scatter
+
+const FunctionDoc scatter_doc(
+    "Scatter the values into specified positions according to the indices",
+    "Place the `i`-th value at the position specified by the `i`-th index",
+    {"values", "indices"});
+
+const ScatterOptions* GetDefaultScatterOptions() {
+  static const auto kDefaultScatterOptions = ScatterOptions::Defaults();
+  return &kDefaultScatterOptions;
+}
+
+class ScatterMetaFunction : public MetaFunction {
+ public:
+  ScatterMetaFunction()
+      : MetaFunction("scatter", Arity::Binary(), scatter_doc,
+                     GetDefaultScatterOptions()) {}
+
+  Result<Datum> ExecuteImpl(const std::vector<Datum>& args,
+                            const FunctionOptions* options,
+                            ExecContext* ctx) const override {
+    DCHECK_EQ(args.size(), 2);
+    const auto& values = args[0];
+    const auto& indices = args[1];
+    // Though the way how scatter is currently implemented may support record batch or
+    // table, we don't want to promise that yet.
+    if (!values.is_arraylike()) {
+      return Status::NotImplemented("Scatter does not support " +
+                                    ToString(values.kind()) + " values");
+    }
+    if (!indices.is_arraylike()) {
+      return Status::NotImplemented("Scatter does not support " +
+                                    ToString(values.kind()) + " indices");
+    }
+    auto* scatter_options = checked_cast<const ScatterOptions*>(options);
+    if (values.length() != indices.length()) {
+      return Status::Invalid(
+          "Input and indices of scatter must have the same length, got " +
+          std::to_string(values.length()) + " and " + std::to_string(indices.length()));
+    }
+    if (!is_signed_integer(indices.type()->id())) {
+      return Status::TypeError("Indices of scatter must be of signed integer type, got ",
+                               indices.type()->ToString());
+    }
+    // Internally invoke Take(values, InversePermutation(indices)) to implement scatter.
+    // For example, with
+    //   values = [a, b, c, d, e, f, g]
+    //   indices = [null, 0, 3, 2, 4, 1, 1]
+    // the InversePermutation(indices) is
+    // [1, 6, 3, 2, 4, null, null]  if max_index = 6.
+    // and Take(values, InversePermutation(indices)) is
+    // [b, g, d, c, e, null, null]  if max_index = 6.
+    InversePermutationOptions inverse_permutation_options{
+        scatter_options->max_index,
+        // Use the smallest possible uint type to store inverse permutation.
+        InferSmallestInversePermutationType(values.length())};
+    ARROW_ASSIGN_OR_RAISE(auto inverse_permutation,
+                          CallFunction("inverse_permutation", {indices},
+                                       &inverse_permutation_options, ctx));
+    TakeOptions take_options{/*boundcheck=*/false};
+    return CallFunction("take", {values, inverse_permutation}, &take_options, ctx);
+  }
+
+ private:
+  static std::shared_ptr<DataType> InferSmallestInversePermutationType(
+      int64_t input_length) {
+    DCHECK_GE(input_length, 0);
+    if (input_length <= std::numeric_limits<int8_t>::max()) {
+      return int8();
+    } else if (input_length <= std::numeric_limits<int16_t>::max()) {
+      return int16();
+    } else if (input_length <= std::numeric_limits<int32_t>::max()) {
+      return int32();
+    } else {
+      return int64();
+    }
+  }
+};
+
+void RegisterVectorScatter(FunctionRegistry* registry) {
+  DCHECK_OK(registry->AddFunction(std::make_shared<ScatterMetaFunction>()));
+}
+
+}  // namespace
+
+// ----------------------------------------------------------------------
+
+void RegisterVectorSwizzle(FunctionRegistry* registry) {
+  RegisterVectorInversePermutation(registry);
+  RegisterVectorScatter(registry);
+}
+
+}  // namespace arrow::compute::internal
diff --git a/cpp/src/arrow/compute/kernels/vector_swizzle_test.cc b/cpp/src/arrow/compute/kernels/vector_swizzle_test.cc
new file mode 100644
index 0000000000000..0879955ec49ae
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/vector_swizzle_test.cc
@@ -0,0 +1,756 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include "arrow/array/concatenate.h"
+#include "arrow/chunked_array.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/kernels/test_util_internal.h"
+#include "arrow/testing/generator.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/random.h"
+#include "arrow/util/logging.h"
+
+namespace arrow::compute {
+
+namespace {
+
+using SmallSignedIntegerTypes = ::testing::Types<Int8Type, Int16Type>;
+
+}  // namespace
+
+// ----------------------------------------------------------------------
+// InversePermutation tests
+
+namespace {
+
+Result<Datum> InversePermutation(const Datum& indices, int64_t max_index,
+                                 std::shared_ptr<DataType> output_type) {
+  InversePermutationOptions options{max_index, std::move(output_type)};
+  return InversePermutation(indices, options);
+}
+
+void AssertInversePermutation(const Datum& indices, int64_t max_index,
+                              const std::shared_ptr<DataType>& output_type,
+                              const Datum& expected, bool validity_must_be_null) {
+  ASSERT_OK_AND_ASSIGN(auto result, InversePermutation(indices, max_index, output_type));
+  ValidateOutput(result);
+  ASSERT_EQ(indices.kind(), result.kind());
+  std::shared_ptr<Array> result_array;
+  if (result.is_array()) {
+    result_array = result.make_array();
+  } else {
+    ASSERT_TRUE(result.is_chunked_array());
+    ASSERT_OK_AND_ASSIGN(result_array, Concatenate(result.chunked_array()->chunks()));
+  }
+  AssertDatumsEqual(expected, result_array);
+  if (validity_must_be_null) {
+    ASSERT_FALSE(result_array->data()->HasValidityBitmap());
+  }
+}
+
+template <typename InputFunc>
+void DoTestInversePermutationForInputTypes(
+    const std::vector<std::shared_ptr<DataType>>& input_types, InputFunc&& input,
+    int64_t max_index, const std::shared_ptr<DataType>& output_type,
+    const Datum& expected, bool validity_must_be_null = false) {
+  for (const auto& input_type : input_types) {
+    ARROW_SCOPED_TRACE("Input type: " + input_type->ToString());
+    ASSERT_OK_AND_ASSIGN(auto indices, input(input_type));
+    AssertInversePermutation(indices, max_index, output_type, expected,
+                             validity_must_be_null);
+  }
+}
+
+template <typename InputFunc>
+void DoTestInversePermutationForInputOutputTypes(
+    const std::vector<std::shared_ptr<DataType>>& input_types,
+    const std::vector<std::shared_ptr<DataType>>& output_types, InputFunc&& input,
+    int64_t max_index, const std::string& expected_str, bool validity_must_be_null) {
+  for (const auto& output_type : output_types) {
+    ARROW_SCOPED_TRACE("Output type: " + output_type->ToString());
+    auto expected = ArrayFromJSON(output_type, expected_str);
+    DoTestInversePermutationForInputTypes(input_types, std::forward<InputFunc>(input),
+                                          max_index, output_type, expected,
+                                          validity_must_be_null);
+  }
+}
+
+void TestInversePermutationForInputOutputTypes(
+    const std::vector<std::shared_ptr<DataType>>& input_types,
+    const std::vector<std::shared_ptr<DataType>>& output_types,
+    const std::vector<std::string>& indices_chunked_str, int64_t max_index,
+    const std::string& expected_str, bool validity_must_be_null) {
+  {
+    ARROW_SCOPED_TRACE("Array");
+    DoTestInversePermutationForInputOutputTypes(
+        input_types, output_types,
+        [&](const std::shared_ptr<DataType>& input_type) -> Result<Datum> {
+          auto chunked = ChunkedArrayFromJSON(input_type, indices_chunked_str);
+          return Concatenate(chunked->chunks());
+        },
+        max_index, expected_str, validity_must_be_null);
+  }
+  {
+    ARROW_SCOPED_TRACE("Chunked");
+    DoTestInversePermutationForInputOutputTypes(
+        input_types, output_types,
+        [&](const std::shared_ptr<DataType>& input_type) -> Result<Datum> {
+          return ChunkedArrayFromJSON(input_type, indices_chunked_str);
+        },
+        max_index, expected_str, validity_must_be_null);
+  }
+}
+
+void TestInversePermutation(const std::vector<std::string>& indices_chunked_str,
+                            int64_t max_index, const std::string& expected_str,
+                            bool validity_must_be_null = false) {
+  TestInversePermutationForInputOutputTypes(SignedIntTypes(), SignedIntTypes(),
+                                            indices_chunked_str, max_index, expected_str,
+                                            validity_must_be_null);
+}
+
+}  // namespace
+
+TEST(InversePermutation, InvalidOutputType) {
+  {
+    ARROW_SCOPED_TRACE("Output type unsigned");
+    auto indices = ArrayFromJSON(int32(), "[]");
+    ASSERT_RAISES_WITH_MESSAGE(
+        TypeError,
+        "Type error: Output type of inverse_permutation must be signed integer, got "
+        "uint32",
+        InversePermutation(indices, /*max_index=*/0, /*output_type=*/uint32()));
+  }
+  {
+    ARROW_SCOPED_TRACE("Output type float");
+    auto indices = ArrayFromJSON(int32(), "[]");
+    ASSERT_RAISES_WITH_MESSAGE(
+        TypeError,
+        "Type error: Output type of inverse_permutation must be signed integer, got "
+        "float",
+        InversePermutation(indices, /*max_index=*/0, /*output_type=*/float32()));
+  }
+  {
+    ARROW_SCOPED_TRACE("Output type string");
+    auto indices = ArrayFromJSON(int32(), "[]");
+    ASSERT_RAISES_WITH_MESSAGE(
+        TypeError,
+        "Type error: Output type of inverse_permutation must be signed integer, got "
+        "string",
+        InversePermutation(indices, /*max_index=*/0, /*output_type=*/utf8()));
+  }
+}
+
+TEST(InversePermutation, DefaultOptions) {
+  {
+    ARROW_SCOPED_TRACE("Default options values");
+    InversePermutationOptions options;
+    ASSERT_EQ(options.max_index, -1);
+    ASSERT_EQ(options.output_type, nullptr);
+  }
+  {
+    ARROW_SCOPED_TRACE("Default options semantics");
+    for (const auto& input_type : SignedIntTypes()) {
+      ARROW_SCOPED_TRACE("Input type: " + input_type->ToString());
+      auto indices = ArrayFromJSON(input_type, "[0]");
+      ASSERT_OK_AND_ASSIGN(Datum result, InversePermutation(indices));
+      AssertDatumsEqual(indices, result);
+    }
+  }
+}
+
+TEST(InversePermutation, InvalidIndex) {
+  {
+    ARROW_SCOPED_TRACE("Negative index");
+    auto indices = ArrayFromJSON(int32(), "[-1]");
+    ASSERT_RAISES_WITH_MESSAGE(IndexError, "Index error: Index out of bounds: -1",
+                               InversePermutation(indices));
+  }
+  {
+    ARROW_SCOPED_TRACE("Exceeds max_index");
+    auto indices = ArrayFromJSON(int32(), "[42]");
+    ASSERT_RAISES_WITH_MESSAGE(
+        IndexError, "Index error: Index out of bounds: 42",
+        InversePermutation(indices, /*max_index=*/1, /*output_type=*/int32()));
+  }
+}
+
+TEST(InversePermutation, Basic) {
+  {
+    ARROW_SCOPED_TRACE("Basic");
+    std::vector<std::string> indices_chunked{
+        "[]", "[9, 7, 5, 3, 1]", "[0]", "[2, 4, 6]", "[8]", "[]"};
+    int64_t max_index = 9;
+    auto expected = "[5, 4, 6, 3, 7, 2, 8, 1, 9, 0]";
+    TestInversePermutation(indices_chunked, max_index, expected,
+                           /*validity_must_be_null=*/true);
+  }
+  {
+    ARROW_SCOPED_TRACE("Basic with nulls");
+    std::vector<std::string> indices_chunked{
+        "[]", "[9, 7, 5, 3, 1]", "[null]", "[null, null, null]", "[null]", "[]"};
+    int64_t max_index = 9;
+    auto expected = "[null, 4, null, 3, null, 2, null, 1, null, 0]";
+    TestInversePermutation(indices_chunked, max_index, expected);
+  }
+  {
+    ARROW_SCOPED_TRACE("Output greater than input");
+    std::vector<std::string> indices_chunked{"[]", "[1]", "[]", "[2]"};
+    int64_t max_index = 6;
+    auto expected = "[null, 0, 1, null, null, null, null]";
+    TestInversePermutation(indices_chunked, max_index, expected);
+  }
+  {
+    ARROW_SCOPED_TRACE("Input all null");
+    std::vector<std::string> indices_chunked{"[]", "[null]", "[]", "[null]"};
+    int64_t max_index = 1;
+    auto expected = "[null, null]";
+    TestInversePermutation(indices_chunked, max_index, expected);
+  }
+  {
+    ARROW_SCOPED_TRACE("Empty input output null");
+    std::vector<std::string> indices_chunked{"[]", "[]", "[]", "[]"};
+    int64_t max_index = 6;
+    auto expected = "[null, null, null, null, null, null, null]";
+    TestInversePermutation(indices_chunked, max_index, expected);
+  }
+  {
+    ARROW_SCOPED_TRACE("Input duplicated indices");
+    std::vector<std::string> indices_chunked{"[]", "[1, 2]", "[3, 1, 2, 3, 1]",
+                                             "[]", "[2]",    "[3]"};
+    int64_t max_index = 4;
+    auto expected = "[null, 6, 7, 8, null]";
+    TestInversePermutation(indices_chunked, max_index, expected);
+  }
+}
+
+template <typename ArrowType>
+class TestInversePermutationSmallOutputType : public ::testing::Test {
+ protected:
+  using CType = typename TypeTraits<ArrowType>::CType;
+
+  std::shared_ptr<DataType> type_singleton() {
+    return TypeTraits<ArrowType>::type_singleton();
+  }
+};
+
+TYPED_TEST_SUITE(TestInversePermutationSmallOutputType, SmallSignedIntegerTypes);
+
+TYPED_TEST(TestInversePermutationSmallOutputType, JustEnoughOutputType) {
+  auto output_type = this->type_singleton();
+  int64_t input_length =
+      static_cast<int64_t>(std::numeric_limits<typename TestFixture::CType>::max());
+  auto expected =
+      ArrayFromJSON(output_type, "[" + std::to_string(input_length - 1) + "]");
+  DoTestInversePermutationForInputTypes(
+      SignedIntTypes(),
+      [&](const std::shared_ptr<DataType>& input_type) -> Result<Datum> {
+        return ConstantArrayGenerator::Zeroes(input_length, input_type);
+      },
+      /*max_index=*/0, output_type, expected);
+}
+
+TYPED_TEST(TestInversePermutationSmallOutputType, InsufficientOutputType) {
+  auto output_type = this->type_singleton();
+  int64_t input_length =
+      static_cast<int64_t>(std::numeric_limits<typename TestFixture::CType>::max()) + 1;
+  for (const auto& input_type : SignedIntTypes()) {
+    ARROW_SCOPED_TRACE("Input type: " + input_type->ToString());
+    auto indices = ConstantArrayGenerator::Zeroes(input_length, input_type);
+    ASSERT_RAISES_WITH_MESSAGE(
+        Invalid,
+        "Invalid: Output type " + output_type->ToString() +
+            " of inverse_permutation is insufficient to store indices of length " +
+            std::to_string(input_length),
+        InversePermutation(indices, /*max_index=*/0, output_type));
+  }
+}
+
+// ----------------------------------------------------------------------
+// Scatter tests
+//
+// Shorthand notation:
+//
+//   A = Array
+//   C = ChunkedArray
+
+namespace {
+
+Result<Datum> Scatter(const Datum& values, const Datum& indices, int64_t max_index) {
+  ScatterOptions options{max_index};
+  ARROW_ASSIGN_OR_RAISE(Datum result, Scatter(values, indices, options));
+  ValidateOutput(result);
+  return result;
+}
+
+void AssertScatterAAA(const std::shared_ptr<Array>& values,
+                      const std::shared_ptr<Array>& indices, int64_t max_index,
+                      const std::shared_ptr<Array>& expected) {
+  ASSERT_OK_AND_ASSIGN(Datum result, Scatter(values, indices, max_index));
+  AssertDatumsEqual(expected, result);
+}
+
+void AssertScatterCAC(const std::shared_ptr<ChunkedArray>& values,
+                      const std::shared_ptr<Array>& indices, int64_t max_index,
+                      const std::shared_ptr<Array>& expected) {
+  ASSERT_OK_AND_ASSIGN(Datum result, Scatter(values, indices, max_index));
+  ASSERT_TRUE(result.is_chunked_array());
+  ASSERT_OK_AND_ASSIGN(auto result_array, Concatenate(result.chunked_array()->chunks()));
+  AssertDatumsEqual(expected, result_array);
+}
+
+void AssertScatterACC(const std::shared_ptr<Array>& values,
+                      const std::shared_ptr<ChunkedArray>& indices, int64_t max_index,
+                      const std::shared_ptr<Array>& expected) {
+  ASSERT_OK_AND_ASSIGN(Datum result, Scatter(values, indices, max_index));
+  ASSERT_TRUE(result.is_chunked_array());
+  ASSERT_OK_AND_ASSIGN(auto result_array, Concatenate(result.chunked_array()->chunks()));
+  AssertDatumsEqual(expected, result_array);
+}
+
+void AssertScatterCCC(const std::shared_ptr<ChunkedArray>& values,
+                      const std::shared_ptr<ChunkedArray>& indices, int64_t max_index,
+                      const std::shared_ptr<Array>& expected) {
+  ASSERT_OK_AND_ASSIGN(Datum result, Scatter(values, indices, max_index));
+  ASSERT_TRUE(result.is_chunked_array());
+  ASSERT_OK_AND_ASSIGN(auto result_array, Concatenate(result.chunked_array()->chunks()));
+  AssertDatumsEqual(expected, result_array);
+}
+
+void DoTestScatterAAA(const std::shared_ptr<Array>& values,
+                      const std::shared_ptr<Array>& indices, int64_t max_index,
+                      const std::shared_ptr<Array>& expected) {
+  AssertScatterAAA(values, indices, max_index, expected);
+}
+
+/// The following helper functions are based on the invariant:
+/// Scatter([V, V], [I, I'], 2 * (m + 1) - 1) == Concat(E, E)
+///
+/// where
+///   V = values
+///   I = indices
+///   m = max_index
+///   I' = I + (m + 1)
+///   E = Scatter(V, I, m)
+
+/// Make indices suffix I' = I + (m + 1).
+Result<std::shared_ptr<Array>> MakeIndicesSuffix(const std::shared_ptr<Array>& indices,
+                                                 int64_t max_index) {
+  ARROW_ASSIGN_OR_RAISE(auto m_plus_one, MakeScalar(indices->type(), max_index + 1));
+  ARROW_ASSIGN_OR_RAISE(auto indices_plus_m_plus_one, Add(indices, m_plus_one));
+  return indices_plus_m_plus_one.make_array();
+}
+
+void DoTestScatterCACWithArrays(const std::shared_ptr<Array>& values,
+                                const std::shared_ptr<Array>& indices, int64_t max_index,
+                                const std::shared_ptr<Array>& expected) {
+  auto chunked_values2 = std::make_shared<ChunkedArray>(ArrayVector{values, values});
+
+  auto indices_prefix = indices;
+  ASSERT_OK_AND_ASSIGN(auto indices_suffix, MakeIndicesSuffix(indices, max_index));
+  ASSERT_OK_AND_ASSIGN(auto concat_indices2,
+                       Concatenate({indices_prefix, indices_suffix}));
+
+  ASSERT_OK_AND_ASSIGN(auto concat_expected2,
+                       Concatenate(ArrayVector{expected, expected}));
+
+  AssertScatterCAC(chunked_values2, concat_indices2, (max_index + 1) * 2 - 1,
+                   concat_expected2);
+}
+
+void DoTestScatterACCWithArrays(const std::shared_ptr<Array>& values,
+                                const std::shared_ptr<Array>& indices, int64_t max_index,
+                                const std::shared_ptr<Array>& expected) {
+  ASSERT_OK_AND_ASSIGN(auto concat_values2, Concatenate(ArrayVector{values, values}));
+
+  auto indices_prefix = indices;
+  ASSERT_OK_AND_ASSIGN(auto indices_suffix, MakeIndicesSuffix(indices, max_index));
+  auto chunked_indices2 =
+      std::make_shared<ChunkedArray>(ArrayVector{indices_prefix, indices_suffix});
+
+  ASSERT_OK_AND_ASSIGN(auto concat_expected2,
+                       Concatenate(ArrayVector{expected, expected}));
+
+  AssertScatterACC(concat_values2, chunked_indices2, (max_index + 1) * 2 - 1,
+                   concat_expected2);
+}
+
+void DoTestScatterCCCWithArrays(const std::shared_ptr<Array>& values,
+                                const std::shared_ptr<Array>& indices, int64_t max_index,
+                                const std::shared_ptr<Array>& expected) {
+  auto chunked_values2 = std::make_shared<ChunkedArray>(ArrayVector{values, values});
+
+  auto indices_prefix = indices;
+  ASSERT_OK_AND_ASSIGN(auto indices_suffix, MakeIndicesSuffix(indices, max_index));
+  auto chunked_indices2 =
+      std::make_shared<ChunkedArray>(ArrayVector{indices_prefix, indices_suffix});
+
+  ASSERT_OK_AND_ASSIGN(auto concat_expected2,
+                       Concatenate(ArrayVector{expected, expected}));
+
+  AssertScatterCCC(chunked_values2, chunked_indices2, (max_index + 1) * 2 - 1,
+                   concat_expected2);
+}
+
+void DoTestScatterForIndicesTypes(
+    const std::vector<std::shared_ptr<DataType>>& indices_types,
+    const std::shared_ptr<Array>& values, const std::shared_ptr<Array>& indices,
+    int64_t max_index, const std::shared_ptr<Array>& expected) {
+  for (const auto& indices_type : indices_types) {
+    ARROW_SCOPED_TRACE("Indices type: " + indices_type->ToString());
+    ASSERT_OK_AND_ASSIGN(auto casted, Cast(indices, indices_type));
+    ASSERT_TRUE(casted.is_array());
+    auto casted_indices = casted.make_array();
+    {
+      ARROW_SCOPED_TRACE("AAA");
+      DoTestScatterAAA(values, casted_indices, max_index, expected);
+    }
+    {
+      ARROW_SCOPED_TRACE("CAA");
+      DoTestScatterCACWithArrays(values, casted_indices, max_index, expected);
+    }
+    {
+      ARROW_SCOPED_TRACE("ACA");
+      DoTestScatterACCWithArrays(values, casted_indices, max_index, expected);
+    }
+    {
+      ARROW_SCOPED_TRACE("CCA");
+      DoTestScatterCCCWithArrays(values, casted_indices, max_index, expected);
+    }
+  }
+}
+
+void DoTestScatter(const std::shared_ptr<Array>& values,
+                   const std::shared_ptr<Array>& indices, int64_t max_index,
+                   const std::shared_ptr<Array>& expected) {
+  DoTestScatterForIndicesTypes(SignedIntTypes(), values, indices, max_index, expected);
+}
+
+void TestScatter(const std::shared_ptr<DataType>& value_type,
+                 const std::string& values_str, const std::string& indices_str,
+                 int64_t max_index, const std::string& expected_str) {
+  auto values = ArrayFromJSON(value_type, values_str);
+  auto indices = ArrayFromJSON(int8(), indices_str);
+  auto expected = ArrayFromJSON(value_type, expected_str);
+  DoTestScatter(values, indices, max_index, expected);
+}
+
+}  // namespace
+
+TEST(Scatter, Invalid) {
+  {
+    ARROW_SCOPED_TRACE("Length mismatch");
+    auto values = ArrayFromJSON(int32(), "[0, 1]");
+    auto indices = ArrayFromJSON(int32(), "[0]");
+    ASSERT_RAISES_WITH_MESSAGE(
+        Invalid,
+        "Invalid: Input and indices of scatter must have the same length, got 2 and 1",
+        Scatter(values, indices));
+  }
+  {
+    ARROW_SCOPED_TRACE("Invalid input type");
+    auto values = ArrayFromJSON(int32(), "[0]");
+    {
+      ARROW_SCOPED_TRACE("uint32");
+      auto indices = ArrayFromJSON(uint32(), R"([0])");
+      ASSERT_RAISES_WITH_MESSAGE(
+          TypeError,
+          "Type error: Indices of scatter must be of signed integer type, got uint32",
+          Scatter(values, indices));
+    }
+    {
+      ARROW_SCOPED_TRACE("string");
+      auto indices = ArrayFromJSON(utf8(), R"(["a"])");
+      ASSERT_RAISES_WITH_MESSAGE(
+          TypeError,
+          "Type error: Indices of scatter must be of signed integer type, got string",
+          Scatter(values, indices));
+    }
+  }
+}
+
+TEST(Scatter, DefaultOptions) {
+  {
+    ARROW_SCOPED_TRACE("Default options values");
+    ScatterOptions options;
+    ASSERT_EQ(options.max_index, -1);
+  }
+  {
+    ARROW_SCOPED_TRACE("Default options semantics");
+    auto values = ArrayFromJSON(utf8(), R"(["a"])");
+    for (const auto& indices_type : SignedIntTypes()) {
+      ARROW_SCOPED_TRACE("Indices type: " + indices_type->ToString());
+      auto indices = ArrayFromJSON(indices_type, "[0]");
+      ASSERT_OK_AND_ASSIGN(Datum result, Scatter(values, indices));
+      AssertDatumsEqual(values, result);
+    }
+  }
+}
+
+TEST(Scatter, InvalidIndex) {
+  {
+    ARROW_SCOPED_TRACE("Negative index");
+    auto values = ArrayFromJSON(utf8(), R"(["a"])");
+    auto indices = ArrayFromJSON(int32(), "[-1]");
+    ASSERT_RAISES_WITH_MESSAGE(IndexError, "Index error: Index out of bounds: -1",
+                               Scatter(values, indices));
+  }
+  {
+    ARROW_SCOPED_TRACE("Exceeds max_index");
+    auto values = ArrayFromJSON(utf8(), R"(["a"])");
+    auto indices = ArrayFromJSON(int32(), "[42]");
+    ASSERT_RAISES_WITH_MESSAGE(IndexError, "Index error: Index out of bounds: 42",
+                               Scatter(values, indices, /*max_index=*/1));
+  }
+}
+
+template <typename ArrowType>
+class TestScatterSmallIndicesTypes : public ::testing::Test {
+ protected:
+  using CType = typename TypeTraits<ArrowType>::CType;
+
+  std::shared_ptr<DataType> type_singleton() {
+    return TypeTraits<ArrowType>::type_singleton();
+  }
+};
+
+TYPED_TEST_SUITE(TestScatterSmallIndicesTypes, SmallSignedIntegerTypes);
+
+TYPED_TEST(TestScatterSmallIndicesTypes, MaxIntegerIndex) {
+  auto values = ArrayFromJSON(utf8(), R"(["a"])");
+  auto indices_type = this->type_singleton();
+  int64_t max_integer =
+      static_cast<int64_t>(std::numeric_limits<typename TestFixture::CType>::max());
+  auto indices = ArrayFromJSON(indices_type, "[" + std::to_string(max_integer - 1) + "]");
+  ASSERT_OK_AND_ASSIGN(auto expected_prefix_nulls,
+                       MakeArrayOfNull(utf8(), max_integer - 1));
+  auto expected_suffix_value = values;
+  ASSERT_OK_AND_ASSIGN(auto expected,
+                       Concatenate({expected_prefix_nulls, expected_suffix_value}));
+  DoTestScatterAAA(values, indices, /*max_index=*/max_integer - 1, expected);
+}
+
+TEST(Scatter, Boolean) {
+  {
+    ARROW_SCOPED_TRACE("Basic");
+    auto values = "[true, false, true, true, false, false, true, true, true, false]";
+    auto indices = "[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]";
+    int64_t max_index = 9;
+    auto expected = "[false, true, true, true, false, false, true, true, false, true]";
+    TestScatter(boolean(), values, indices, max_index, expected);
+  }
+  {
+    ARROW_SCOPED_TRACE("Values with nulls");
+    auto values = "[true, false, null, true, true, false, false, null, null, true]";
+    auto indices = "[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]";
+    int64_t max_index = 9;
+    auto expected = "[true, null, null, false, false, true, true, null, false, true]";
+    TestScatter(boolean(), values, indices, max_index, expected);
+  }
+  {
+    ARROW_SCOPED_TRACE("Indices with nulls");
+    auto values = "[true, false, true, true, false, false, true, true, true, false]";
+    auto indices = "[9, null, 7, null, 5, null, 3, null, 1, null]";
+    int64_t max_index = 9;
+    auto expected = "[null, true, null, true, null, false, null, true, null, true]";
+    TestScatter(boolean(), values, indices, max_index, expected);
+  }
+  {
+    ARROW_SCOPED_TRACE("Output greater than input");
+    auto values = "[true, true, true, false, false, false]";
+    auto indices = "[0, 3, 6, 1, 4, 7]";
+    int64_t max_index = 8;
+    auto expected = "[true, false, null, true, false, null, true, false, null]";
+    TestScatter(boolean(), values, indices, max_index, expected);
+  }
+  {
+    ARROW_SCOPED_TRACE("Values all null");
+    auto values = "[null, null]";
+    auto indices = "[0, 1]";
+    int64_t max_index = 1;
+    auto expected = "[null, null]";
+    TestScatter(boolean(), values, indices, max_index, expected);
+  }
+  {
+    ARROW_SCOPED_TRACE("Indices all null");
+    auto values = "[true, false]";
+    auto indices = "[null, null]";
+    int64_t max_index = 1;
+    auto expected = "[null, null]";
+    TestScatter(boolean(), values, indices, max_index, expected);
+  }
+  {
+    ARROW_SCOPED_TRACE("Empty input output null");
+    auto values = "[]";
+    auto indices = "[]";
+    int64_t max_index = 1;
+    auto expected = "[null, null]";
+    TestScatter(boolean(), values, indices, max_index, expected);
+  }
+  {
+    ARROW_SCOPED_TRACE("Indices duplicated indices");
+    auto values = "[true, false, null, null]";
+    auto indices = "[0, 1, 0, 1]";
+    int64_t max_index = 3;
+    auto expected = "[null, null, null, null]";
+    TestScatter(boolean(), values, indices, max_index, expected);
+  }
+}
+
+TEST(Scatter, Numeric) {
+  for (const auto& value_type : NumericTypes()) {
+    ARROW_SCOPED_TRACE(value_type->ToString());
+    {
+      ARROW_SCOPED_TRACE("Basic");
+      auto values = "[10, 11, 12, 13, 14, 15, 16, 17, 18, 19]";
+      auto indices = "[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]";
+      int64_t max_index = 9;
+      auto expected = "[19, 18, 17, 16, 15, 14, 13, 12, 11, 10]";
+      TestScatter(value_type, values, indices, max_index, expected);
+    }
+    {
+      ARROW_SCOPED_TRACE("Values with nulls");
+      auto values = "[null, 11, null, 13, null, 15, null, 17, null, 19]";
+      auto indices = "[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]";
+      int64_t max_index = 9;
+      auto expected = "[19, null, 17, null, 15, null, 13, null, 11, null]";
+      TestScatter(value_type, values, indices, max_index, expected);
+    }
+    {
+      ARROW_SCOPED_TRACE("Indices with nulls");
+      auto values = "[10, 11, 12, 13, 14, 15, 16, 17, 18, 19]";
+      auto indices = "[9, null, 7, null, 5, null, 3, null, 1, null]";
+      int64_t max_index = 9;
+      auto expected = "[null, 18, null, 16, null, 14, null, 12, null, 10]";
+      TestScatter(value_type, values, indices, max_index, expected);
+    }
+    {
+      ARROW_SCOPED_TRACE("Output greater than input");
+      auto values = "[0, 0, 0, 1, 1, 1]";
+      auto indices = "[0, 3, 6, 1, 4, 7]";
+      int64_t max_index = 8;
+      auto expected = "[0, 1, null, 0, 1, null, 0, 1, null]";
+      TestScatter(value_type, values, indices, max_index, expected);
+    }
+    {
+      ARROW_SCOPED_TRACE("Values all null");
+      auto values = "[null, null]";
+      auto indices = "[0, 1]";
+      int64_t max_index = 1;
+      auto expected = "[null, null]";
+      TestScatter(value_type, values, indices, max_index, expected);
+    }
+    {
+      ARROW_SCOPED_TRACE("Indices all null");
+      auto values = "[0, 1]";
+      auto indices = "[null, null]";
+      int64_t max_index = 1;
+      auto expected = "[null, null]";
+      TestScatter(value_type, values, indices, max_index, expected);
+    }
+    {
+      ARROW_SCOPED_TRACE("Empty input output null");
+      auto values = "[]";
+      auto indices = "[]";
+      int64_t max_index = 1;
+      auto expected = "[null, null]";
+      TestScatter(value_type, values, indices, max_index, expected);
+    }
+    {
+      ARROW_SCOPED_TRACE("Indices duplicated indices");
+      auto values = "[1, 0, null, null]";
+      auto indices = "[0, 1, 0, 1]";
+      int64_t max_index = 3;
+      auto expected = "[null, null, null, null]";
+      TestScatter(value_type, values, indices, max_index, expected);
+    }
+  }
+}
+
+TEST(Scatter, Binary) {
+  for (const auto& value_type : BaseBinaryTypes()) {
+    ARROW_SCOPED_TRACE(value_type->ToString());
+    {
+      ARROW_SCOPED_TRACE("Basic");
+      auto values = R"(["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"])";
+      auto indices = "[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]";
+      int64_t max_index = 9;
+      auto expected = R"(["j", "i", "h", "g", "f", "e", "d", "c", "b", "a"])";
+      TestScatter(value_type, values, indices, max_index, expected);
+    }
+    {
+      ARROW_SCOPED_TRACE("Values with nulls");
+      auto values = R"([null, "b", null, "d", null, "f", null, "h", null, "j"])";
+      auto indices = "[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]";
+      int64_t max_index = 9;
+      auto expected = R"(["j", null, "h", null, "f", null, "d", null, "b", null])";
+      TestScatter(value_type, values, indices, max_index, expected);
+    }
+    {
+      ARROW_SCOPED_TRACE("Indices with nulls");
+      auto values = R"(["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"])";
+      auto indices = "[9, null, 7, null, 5, null, 3, null, 1, null]";
+      int64_t max_index = 9;
+      auto expected = R"([null, "i", null, "g", null, "e", null, "c", null, "a"])";
+      TestScatter(value_type, values, indices, max_index, expected);
+    }
+    {
+      ARROW_SCOPED_TRACE("Output greater than input");
+      auto values = R"(["a", "a", "a", "b", "b", "b"])";
+      auto indices = "[0, 3, 6, 1, 4, 7]";
+      int64_t max_index = 8;
+      auto expected = R"(["a", "b", null, "a", "b", null, "a", "b", null])";
+      TestScatter(value_type, values, indices, max_index, expected);
+    }
+    {
+      ARROW_SCOPED_TRACE("Values all null");
+      auto values = "[null, null]";
+      auto indices = "[0, 1]";
+      int64_t max_index = 1;
+      auto expected = "[null, null]";
+      TestScatter(value_type, values, indices, max_index, expected);
+    }
+    {
+      ARROW_SCOPED_TRACE("Indices all null");
+      auto values = R"(["a", "b"])";
+      auto indices = "[null, null]";
+      int64_t max_index = 1;
+      auto expected = "[null, null]";
+      TestScatter(value_type, values, indices, max_index, expected);
+    }
+    {
+      ARROW_SCOPED_TRACE("Empty input output null");
+      auto values = "[]";
+      auto indices = "[]";
+      int64_t max_index = 1;
+      auto expected = "[null, null]";
+      TestScatter(value_type, values, indices, max_index, expected);
+    }
+    {
+      ARROW_SCOPED_TRACE("Indices duplicated indices");
+      auto values = R"(["a", "b", null, null])";
+      auto indices = "[0, 1, 0, 1]";
+      int64_t max_index = 3;
+      auto expected = "[null, null, null, null]";
+      TestScatter(value_type, values, indices, max_index, expected);
+    }
+  }
+}
+
+}  // namespace arrow::compute
diff --git a/cpp/src/arrow/compute/kernels/vector_topk_benchmark.cc b/cpp/src/arrow/compute/kernels/vector_topk_benchmark.cc
index e95e7a6f02a04..44452471b25ab 100644
--- a/cpp/src/arrow/compute/kernels/vector_topk_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/vector_topk_benchmark.cc
@@ -17,8 +17,11 @@
 
 #include "benchmark/benchmark.h"
 
+#include "arrow/array.h"
+#include "arrow/chunked_array.h"
 #include "arrow/compute/api_vector.h"
-#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/exec.h"
+#include "arrow/datum.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/util/benchmark_util.h"
diff --git a/cpp/src/arrow/compute/registry.cc b/cpp/src/arrow/compute/registry.cc
index 0f535eb373269..ef9f3c7e1fbf5 100644
--- a/cpp/src/arrow/compute/registry.cc
+++ b/cpp/src/arrow/compute/registry.cc
@@ -321,6 +321,7 @@ static std::unique_ptr<FunctionRegistry> CreateBuiltInRegistry() {
   RegisterVectorRunEndEncode(registry.get());
   RegisterVectorRunEndDecode(registry.get());
   RegisterVectorPairwise(registry.get());
+  RegisterVectorSwizzle(registry.get());
 
   // Aggregate functions
   RegisterHashAggregateBasic(registry.get());
diff --git a/cpp/src/arrow/compute/registry_internal.h b/cpp/src/arrow/compute/registry_internal.h
index cdc9f804e72f1..8287e6305086d 100644
--- a/cpp/src/arrow/compute/registry_internal.h
+++ b/cpp/src/arrow/compute/registry_internal.h
@@ -56,6 +56,7 @@ void RegisterVectorSort(FunctionRegistry* registry);
 void RegisterVectorRunEndEncode(FunctionRegistry* registry);
 void RegisterVectorRunEndDecode(FunctionRegistry* registry);
 void RegisterVectorPairwise(FunctionRegistry* registry);
+void RegisterVectorSwizzle(FunctionRegistry* registry);
 void RegisterVectorOptions(FunctionRegistry* registry);
 
 // Aggregate functions
diff --git a/cpp/src/arrow/compute/row/compare_internal.cc b/cpp/src/arrow/compute/row/compare_internal.cc
index 5e1a87b795202..b7a01ea75ad7d 100644
--- a/cpp/src/arrow/compute/row/compare_internal.cc
+++ b/cpp/src/arrow/compute/row/compare_internal.cc
@@ -55,13 +55,10 @@ void KeyCompare::NullUpdateColumnToRow(uint32_t id_col, uint32_t num_rows_to_com
 
   if (!col.data(0)) {
     // Remove rows from the result for which the column value is a null
-    const uint8_t* null_masks = rows.null_masks();
-    uint32_t null_mask_num_bytes = rows.metadata().null_masks_bytes_per_row;
     for (uint32_t i = num_processed; i < num_rows_to_compare; ++i) {
       uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i;
       uint32_t irow_right = left_to_right_map[irow_left];
-      int64_t bitid = irow_right * null_mask_num_bytes * 8 + null_bit_id;
-      match_bytevector[i] &= (bit_util::GetBit(null_masks, bitid) ? 0 : 0xff);
+      match_bytevector[i] &= (rows.is_null(irow_right, null_bit_id) ? 0 : 0xff);
     }
   } else if (!rows.has_any_nulls(ctx)) {
     // Remove rows from the result for which the column value on left side is
@@ -74,15 +71,12 @@ void KeyCompare::NullUpdateColumnToRow(uint32_t id_col, uint32_t num_rows_to_com
           bit_util::GetBit(non_nulls, irow_left + col.bit_offset(0)) ? 0xff : 0;
     }
   } else {
-    const uint8_t* null_masks = rows.null_masks();
-    uint32_t null_mask_num_bytes = rows.metadata().null_masks_bytes_per_row;
     const uint8_t* non_nulls = col.data(0);
     ARROW_DCHECK(non_nulls);
     for (uint32_t i = num_processed; i < num_rows_to_compare; ++i) {
       uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i;
       uint32_t irow_right = left_to_right_map[irow_left];
-      int64_t bitid_right = irow_right * null_mask_num_bytes * 8 + null_bit_id;
-      int right_null = bit_util::GetBit(null_masks, bitid_right) ? 0xff : 0;
+      int right_null = rows.is_null(irow_right, null_bit_id) ? 0xff : 0;
       int left_null =
           bit_util::GetBit(non_nulls, irow_left + col.bit_offset(0)) ? 0 : 0xff;
       match_bytevector[i] |= left_null & right_null;
@@ -101,7 +95,7 @@ void KeyCompare::CompareBinaryColumnToRowHelper(
   if (is_fixed_length) {
     uint32_t fixed_length = rows.metadata().fixed_length;
     const uint8_t* rows_left = col.data(1);
-    const uint8_t* rows_right = rows.data(1);
+    const uint8_t* rows_right = rows.fixed_length_rows(/*row_id=*/0);
     for (uint32_t i = first_row_to_compare; i < num_rows_to_compare; ++i) {
       uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i;
       // irow_right is used to index into row data so promote to the row offset type.
@@ -113,7 +107,7 @@ void KeyCompare::CompareBinaryColumnToRowHelper(
   } else {
     const uint8_t* rows_left = col.data(1);
     const RowTableImpl::offset_type* offsets_right = rows.offsets();
-    const uint8_t* rows_right = rows.data(2);
+    const uint8_t* rows_right = rows.var_length_rows();
     for (uint32_t i = first_row_to_compare; i < num_rows_to_compare; ++i) {
       uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i;
       uint32_t irow_right = left_to_right_map[irow_left];
@@ -246,7 +240,7 @@ void KeyCompare::CompareVarBinaryColumnToRowHelper(
   const uint32_t* offsets_left = col.offsets();
   const RowTableImpl::offset_type* offsets_right = rows.offsets();
   const uint8_t* rows_left = col.data(2);
-  const uint8_t* rows_right = rows.data(2);
+  const uint8_t* rows_right = rows.var_length_rows();
   for (uint32_t i = first_row_to_compare; i < num_rows_to_compare; ++i) {
     uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i;
     uint32_t irow_right = left_to_right_map[irow_left];
diff --git a/cpp/src/arrow/compute/row/compare_internal_avx2.cc b/cpp/src/arrow/compute/row/compare_internal_avx2.cc
index 9f6e1adfe2108..ac335d31d5967 100644
--- a/cpp/src/arrow/compute/row/compare_internal_avx2.cc
+++ b/cpp/src/arrow/compute/row/compare_internal_avx2.cc
@@ -16,6 +16,7 @@
 // under the License.
 
 #include "arrow/compute/row/compare_internal.h"
+#include "arrow/compute/row/row_util_avx2_internal.h"
 #include "arrow/compute/util.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/simd.h"
@@ -23,6 +24,8 @@
 namespace arrow {
 namespace compute {
 
+namespace {
+
 inline __m256i set_first_n_bytes_avx2(int n) {
   constexpr uint64_t kByteSequence0To7 = 0x0706050403020100ULL;
   constexpr uint64_t kByteSequence8To15 = 0x0f0e0d0c0b0a0908ULL;
@@ -34,6 +37,21 @@ inline __m256i set_first_n_bytes_avx2(int n) {
                                               kByteSequence16To23, kByteSequence24To31));
 }
 
+// Convert 8 64-bit comparision results, each being 0 or -1, to 8 bytes.
+inline uint64_t Cmp64To8(__m256i cmp64_lo, __m256i cmp64_hi) {
+  uint32_t cmp_lo = _mm256_movemask_epi8(cmp64_lo);
+  uint32_t cmp_hi = _mm256_movemask_epi8(cmp64_hi);
+  return cmp_lo | (static_cast<uint64_t>(cmp_hi) << 32);
+}
+
+// Convert 8 32-bit comparision results, each being 0 or -1, to 8 bytes.
+inline uint64_t Cmp32To8(__m256i cmp32) {
+  return Cmp64To8(_mm256_cvtepi32_epi64(_mm256_castsi256_si128(cmp32)),
+                  _mm256_cvtepi32_epi64(_mm256_extracti128_si256(cmp32, 1)));
+}
+
+}  // namespace
+
 template <bool use_selection>
 uint32_t KeyCompare::NullUpdateColumnToRowImp_avx2(
     uint32_t id_col, uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null,
@@ -49,9 +67,6 @@ uint32_t KeyCompare::NullUpdateColumnToRowImp_avx2(
 
   if (!col.data(0)) {
     // Remove rows from the result for which the column value is a null
-    const uint8_t* null_masks = rows.null_masks();
-    uint32_t null_mask_num_bytes = rows.metadata().null_masks_bytes_per_row;
-
     uint32_t num_processed = 0;
     constexpr uint32_t unroll = 8;
     for (uint32_t i = 0; i < num_rows_to_compare / unroll; ++i) {
@@ -64,21 +79,9 @@ uint32_t KeyCompare::NullUpdateColumnToRowImp_avx2(
         irow_right =
             _mm256_loadu_si256(reinterpret_cast<const __m256i*>(left_to_right_map) + i);
       }
-      __m256i bitid =
-          _mm256_mullo_epi32(irow_right, _mm256_set1_epi32(null_mask_num_bytes * 8));
-      bitid = _mm256_add_epi32(bitid, _mm256_set1_epi32(null_bit_id));
-      __m256i right =
-          _mm256_i32gather_epi32((const int*)null_masks, _mm256_srli_epi32(bitid, 3), 1);
-      right = _mm256_and_si256(
-          _mm256_set1_epi32(1),
-          _mm256_srlv_epi32(right, _mm256_and_si256(bitid, _mm256_set1_epi32(7))));
+      __m256i right = GetNullBitInt32(rows, null_bit_id, irow_right);
       __m256i cmp = _mm256_cmpeq_epi32(right, _mm256_setzero_si256());
-      uint32_t result_lo =
-          _mm256_movemask_epi8(_mm256_cvtepi32_epi64(_mm256_castsi256_si128(cmp)));
-      uint32_t result_hi =
-          _mm256_movemask_epi8(_mm256_cvtepi32_epi64(_mm256_extracti128_si256(cmp, 1)));
-      reinterpret_cast<uint64_t*>(match_bytevector)[i] &=
-          result_lo | (static_cast<uint64_t>(result_hi) << 32);
+      reinterpret_cast<uint64_t*>(match_bytevector)[i] &= Cmp32To8(cmp);
     }
     num_processed = num_rows_to_compare / unroll * unroll;
     return num_processed;
@@ -107,18 +110,11 @@ uint32_t KeyCompare::NullUpdateColumnToRowImp_avx2(
         __m256i bits = _mm256_setr_epi32(1, 2, 4, 8, 16, 32, 64, 128);
         cmp = _mm256_cmpeq_epi32(_mm256_and_si256(left, bits), bits);
       }
-      uint32_t result_lo =
-          _mm256_movemask_epi8(_mm256_cvtepi32_epi64(_mm256_castsi256_si128(cmp)));
-      uint32_t result_hi =
-          _mm256_movemask_epi8(_mm256_cvtepi32_epi64(_mm256_extracti128_si256(cmp, 1)));
-      reinterpret_cast<uint64_t*>(match_bytevector)[i] &=
-          result_lo | (static_cast<uint64_t>(result_hi) << 32);
-      num_processed = num_rows_to_compare / unroll * unroll;
+      reinterpret_cast<uint64_t*>(match_bytevector)[i] &= Cmp32To8(cmp);
     }
+    num_processed = num_rows_to_compare / unroll * unroll;
     return num_processed;
   } else {
-    const uint8_t* null_masks = rows.null_masks();
-    uint32_t null_mask_num_bytes = rows.metadata().null_masks_bytes_per_row;
     const uint8_t* non_nulls = col.data(0);
     ARROW_DCHECK(non_nulls);
 
@@ -147,29 +143,11 @@ uint32_t KeyCompare::NullUpdateColumnToRowImp_avx2(
         left_null =
             _mm256_cmpeq_epi32(_mm256_and_si256(left, bits), _mm256_setzero_si256());
       }
-      __m256i bitid =
-          _mm256_mullo_epi32(irow_right, _mm256_set1_epi32(null_mask_num_bytes * 8));
-      bitid = _mm256_add_epi32(bitid, _mm256_set1_epi32(null_bit_id));
-      __m256i right =
-          _mm256_i32gather_epi32((const int*)null_masks, _mm256_srli_epi32(bitid, 3), 1);
-      right = _mm256_and_si256(
-          _mm256_set1_epi32(1),
-          _mm256_srlv_epi32(right, _mm256_and_si256(bitid, _mm256_set1_epi32(7))));
+      __m256i right = GetNullBitInt32(rows, null_bit_id, irow_right);
       __m256i right_null = _mm256_cmpeq_epi32(right, _mm256_set1_epi32(1));
 
-      uint64_t left_null_64 =
-          static_cast<uint32_t>(_mm256_movemask_epi8(
-              _mm256_cvtepi32_epi64(_mm256_castsi256_si128(left_null)))) |
-          (static_cast<uint64_t>(static_cast<uint32_t>(_mm256_movemask_epi8(
-               _mm256_cvtepi32_epi64(_mm256_extracti128_si256(left_null, 1)))))
-           << 32);
-
-      uint64_t right_null_64 =
-          static_cast<uint32_t>(_mm256_movemask_epi8(
-              _mm256_cvtepi32_epi64(_mm256_castsi256_si128(right_null)))) |
-          (static_cast<uint64_t>(static_cast<uint32_t>(_mm256_movemask_epi8(
-               _mm256_cvtepi32_epi64(_mm256_extracti128_si256(right_null, 1)))))
-           << 32);
+      uint64_t left_null_64 = Cmp32To8(left_null);
+      uint64_t right_null_64 = Cmp32To8(right_null);
 
       reinterpret_cast<uint64_t*>(match_bytevector)[i] |= left_null_64 & right_null_64;
       reinterpret_cast<uint64_t*>(match_bytevector)[i] &= ~(left_null_64 ^ right_null_64);
@@ -189,7 +167,7 @@ uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2(
   if (is_fixed_length) {
     uint32_t fixed_length = rows.metadata().fixed_length;
     const uint8_t* rows_left = col.data(1);
-    const uint8_t* rows_right = rows.data(1);
+    const uint8_t* rows_right = rows.fixed_length_rows(/*row_id=*/0);
     constexpr uint32_t unroll = 8;
     __m256i irow_left = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
     for (uint32_t i = 0; i < num_rows_to_compare / unroll; ++i) {
@@ -234,7 +212,7 @@ uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2(
   } else {
     const uint8_t* rows_left = col.data(1);
     const RowTableImpl::offset_type* offsets_right = rows.offsets();
-    const uint8_t* rows_right = rows.data(2);
+    const uint8_t* rows_right = rows.var_length_rows();
     constexpr uint32_t unroll = 8;
     __m256i irow_left = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
     for (uint32_t i = 0; i < num_rows_to_compare / unroll; ++i) {
@@ -321,12 +299,7 @@ inline uint64_t CompareSelected8_avx2(const uint8_t* left_base, const uint8_t* r
 
   __m256i cmp = _mm256_cmpeq_epi32(left, right);
 
-  uint32_t result_lo =
-      _mm256_movemask_epi8(_mm256_cvtepi32_epi64(_mm256_castsi256_si128(cmp)));
-  uint32_t result_hi =
-      _mm256_movemask_epi8(_mm256_cvtepi32_epi64(_mm256_extracti128_si256(cmp, 1)));
-
-  return result_lo | (static_cast<uint64_t>(result_hi) << 32);
+  return Cmp32To8(cmp);
 }
 
 template <int column_width>
@@ -372,12 +345,7 @@ inline uint64_t Compare8_avx2(const uint8_t* left_base, const uint8_t* right_bas
 
   __m256i cmp = _mm256_cmpeq_epi32(left, right);
 
-  uint32_t result_lo =
-      _mm256_movemask_epi8(_mm256_cvtepi32_epi64(_mm256_castsi256_si128(cmp)));
-  uint32_t result_hi =
-      _mm256_movemask_epi8(_mm256_cvtepi32_epi64(_mm256_extracti128_si256(cmp, 1)));
-
-  return result_lo | (static_cast<uint64_t>(result_hi) << 32);
+  return Cmp32To8(cmp);
 }
 
 template <bool use_selection>
@@ -402,9 +370,9 @@ inline uint64_t Compare8_64bit_avx2(const uint8_t* left_base, const uint8_t* rig
       reinterpret_cast<const arrow::util::int64_for_gather_t*>(right_base);
   __m256i right_lo = _mm256_i64gather_epi64(right_base_i64, offset_right_lo, 1);
   __m256i right_hi = _mm256_i64gather_epi64(right_base_i64, offset_right_hi, 1);
-  uint32_t result_lo = _mm256_movemask_epi8(_mm256_cmpeq_epi64(left_lo, right_lo));
-  uint32_t result_hi = _mm256_movemask_epi8(_mm256_cmpeq_epi64(left_hi, right_hi));
-  return result_lo | (static_cast<uint64_t>(result_hi) << 32);
+  __m256i cmp_lo = _mm256_cmpeq_epi64(left_lo, right_lo);
+  __m256i cmp_hi = _mm256_cmpeq_epi64(left_hi, right_hi);
+  return Cmp64To8(cmp_lo, cmp_hi);
 }
 
 template <bool use_selection>
@@ -554,7 +522,7 @@ void KeyCompare::CompareVarBinaryColumnToRowImp_avx2(
   const uint32_t* offsets_left = col.offsets();
   const RowTableImpl::offset_type* offsets_right = rows.offsets();
   const uint8_t* rows_left = col.data(2);
-  const uint8_t* rows_right = rows.data(2);
+  const uint8_t* rows_right = rows.var_length_rows();
   for (uint32_t i = 0; i < num_rows_to_compare; ++i) {
     uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i;
     uint32_t irow_right = left_to_right_map[irow_left];
diff --git a/cpp/src/arrow/compute/row/compare_test.cc b/cpp/src/arrow/compute/row/compare_test.cc
index 5e8ee7c58a782..2b8f4d97561e8 100644
--- a/cpp/src/arrow/compute/row/compare_test.cc
+++ b/cpp/src/arrow/compute/row/compare_test.cc
@@ -327,7 +327,7 @@ TEST(KeyCompare, LARGE_MEMORY_TEST(CompareColumnsToRowsOver2GB)) {
   ASSERT_OK_AND_ASSIGN(RowTableImpl row_table_right,
                        MakeRowTableFromExecBatch(batch_left));
   // The row table must contain an offset buffer.
-  ASSERT_NE(row_table_right.data(2), NULLPTR);
+  ASSERT_NE(row_table_right.var_length_rows(), NULLPTR);
   // The whole point of this test.
   ASSERT_GT(row_table_right.offsets()[num_rows - 1], k2GB);
 
@@ -387,7 +387,7 @@ TEST(KeyCompare, LARGE_MEMORY_TEST(CompareColumnsToRowsOver4GBFixedLength)) {
       RepeatRowTableUntil(MakeRowTableFromExecBatch(batch_left).ValueUnsafe(),
                           num_rows_row_table));
   // The row table must not contain a third buffer.
-  ASSERT_EQ(row_table_right.data(2), NULLPTR);
+  ASSERT_EQ(row_table_right.var_length_rows(), NULLPTR);
   // The row data must be greater than 4GB.
   ASSERT_GT(row_table_right.buffer_size(1), k4GB);
 
@@ -460,7 +460,7 @@ TEST(KeyCompare, LARGE_MEMORY_TEST(CompareColumnsToRowsOver4GBVarLength)) {
       RepeatRowTableUntil(MakeRowTableFromExecBatch(batch_left).ValueUnsafe(),
                           num_rows_row_table));
   // The row table must contain an offset buffer.
-  ASSERT_NE(row_table_right.data(2), NULLPTR);
+  ASSERT_NE(row_table_right.var_length_rows(), NULLPTR);
   // At least the last row should be located at over 4GB.
   ASSERT_GT(row_table_right.offsets()[num_rows_row_table - 1], k4GB);
 
diff --git a/cpp/src/arrow/compute/row/encode_internal.cc b/cpp/src/arrow/compute/row/encode_internal.cc
index 127d43021d639..136e776093761 100644
--- a/cpp/src/arrow/compute/row/encode_internal.cc
+++ b/cpp/src/arrow/compute/row/encode_internal.cc
@@ -260,36 +260,32 @@ void EncoderInteger::Decode(uint32_t start_row, uint32_t num_rows,
       col_prep.metadata().fixed_length == rows.metadata().fixed_length) {
     DCHECK_EQ(offset_within_row, 0);
     uint32_t row_size = rows.metadata().fixed_length;
-    memcpy(col_prep.mutable_data(1), rows.data(1) + start_row * row_size,
-           num_rows * row_size);
+    memcpy(col_prep.mutable_data(1), rows.fixed_length_rows(start_row),
+           static_cast<int64_t>(num_rows) * row_size);
   } else if (rows.metadata().is_fixed_length) {
-    uint32_t row_size = rows.metadata().fixed_length;
-    const uint8_t* row_base =
-        rows.data(1) + static_cast<RowTableImpl::offset_type>(start_row) * row_size;
-    row_base += offset_within_row;
     uint8_t* col_base = col_prep.mutable_data(1);
     switch (col_prep.metadata().fixed_length) {
       case 1:
         for (uint32_t i = 0; i < num_rows; ++i) {
-          col_base[i] = row_base[i * row_size];
+          col_base[i] = *rows.fixed_length_rows(start_row + i) + offset_within_row;
         }
         break;
       case 2:
         for (uint32_t i = 0; i < num_rows; ++i) {
           reinterpret_cast<uint16_t*>(col_base)[i] =
-              *reinterpret_cast<const uint16_t*>(row_base + i * row_size);
+              *reinterpret_cast<const uint16_t*>(rows.fixed_length_rows(start_row + i));
         }
         break;
       case 4:
         for (uint32_t i = 0; i < num_rows; ++i) {
           reinterpret_cast<uint32_t*>(col_base)[i] =
-              *reinterpret_cast<const uint32_t*>(row_base + i * row_size);
+              *reinterpret_cast<const uint32_t*>(rows.fixed_length_rows(start_row + i));
         }
         break;
       case 8:
         for (uint32_t i = 0; i < num_rows; ++i) {
           reinterpret_cast<uint64_t*>(col_base)[i] =
-              *reinterpret_cast<const uint64_t*>(row_base + i * row_size);
+              *reinterpret_cast<const uint64_t*>(rows.fixed_length_rows(start_row + i));
         }
         break;
       default:
@@ -297,7 +293,7 @@ void EncoderInteger::Decode(uint32_t start_row, uint32_t num_rows,
     }
   } else {
     const RowTableImpl::offset_type* row_offsets = rows.offsets() + start_row;
-    const uint8_t* row_base = rows.data(2);
+    const uint8_t* row_base = rows.var_length_rows();
     row_base += offset_within_row;
     uint8_t* col_base = col_prep.mutable_data(1);
     switch (col_prep.metadata().fixed_length) {
@@ -343,14 +339,14 @@ void EncoderBinary::EncodeSelectedImp(uint32_t offset_within_row, RowTableImpl*
   if (is_fixed_length) {
     uint32_t row_width = rows->metadata().fixed_length;
     const uint8_t* src_base = col.data(1);
-    uint8_t* dst = rows->mutable_data(1) + offset_within_row;
+    uint8_t* dst = rows->mutable_fixed_length_rows(/*row_id=*/0) + offset_within_row;
     for (uint32_t i = 0; i < num_selected; ++i) {
       copy_fn(dst, src_base, selection[i]);
       dst += row_width;
     }
     if (col.data(0)) {
       const uint8_t* non_null_bits = col.data(0);
-      uint8_t* dst = rows->mutable_data(1) + offset_within_row;
+      dst = rows->mutable_fixed_length_rows(/*row_id=*/0) + offset_within_row;
       for (uint32_t i = 0; i < num_selected; ++i) {
         bool is_null = !bit_util::GetBit(non_null_bits, selection[i] + col.bit_offset(0));
         if (is_null) {
@@ -361,14 +357,14 @@ void EncoderBinary::EncodeSelectedImp(uint32_t offset_within_row, RowTableImpl*
     }
   } else {
     const uint8_t* src_base = col.data(1);
-    uint8_t* dst = rows->mutable_data(2) + offset_within_row;
+    uint8_t* dst = rows->mutable_var_length_rows() + offset_within_row;
     const RowTableImpl::offset_type* offsets = rows->offsets();
     for (uint32_t i = 0; i < num_selected; ++i) {
       copy_fn(dst + offsets[i], src_base, selection[i]);
     }
     if (col.data(0)) {
       const uint8_t* non_null_bits = col.data(0);
-      uint8_t* dst = rows->mutable_data(2) + offset_within_row;
+      uint8_t* dst = rows->mutable_var_length_rows() + offset_within_row;
       const RowTableImpl::offset_type* offsets = rows->offsets();
       for (uint32_t i = 0; i < num_selected; ++i) {
         bool is_null = !bit_util::GetBit(non_null_bits, selection[i] + col.bit_offset(0));
@@ -584,16 +580,13 @@ void EncoderBinaryPair::DecodeImp(uint32_t num_rows_to_skip, uint32_t start_row,
   uint8_t* dst_A = col1->mutable_data(1);
   uint8_t* dst_B = col2->mutable_data(1);
 
-  uint32_t fixed_length = rows.metadata().fixed_length;
   const RowTableImpl::offset_type* offsets;
   const uint8_t* src_base;
   if (is_row_fixed_length) {
-    src_base = rows.data(1) +
-               static_cast<RowTableImpl::offset_type>(start_row) * fixed_length +
-               offset_within_row;
+    src_base = rows.fixed_length_rows(start_row) + offset_within_row;
     offsets = nullptr;
   } else {
-    src_base = rows.data(2) + offset_within_row;
+    src_base = rows.var_length_rows() + offset_within_row;
     offsets = rows.offsets() + start_row;
   }
 
@@ -601,6 +594,7 @@ void EncoderBinaryPair::DecodeImp(uint32_t num_rows_to_skip, uint32_t start_row,
   using col2_type_const = typename std::add_const<col2_type>::type;
 
   if (is_row_fixed_length) {
+    uint32_t fixed_length = rows.metadata().fixed_length;
     const uint8_t* src = src_base + num_rows_to_skip * fixed_length;
     for (uint32_t i = num_rows_to_skip; i < num_rows; ++i) {
       reinterpret_cast<col1_type*>(dst_A)[i] = *reinterpret_cast<col1_type_const*>(src);
@@ -654,7 +648,7 @@ void EncoderOffsets::Decode(uint32_t start_row, uint32_t num_rows,
 
   for (uint32_t i = 0; i < num_rows; ++i) {
     // Find the beginning of cumulative lengths array for next row
-    const uint8_t* row = rows.data(2) + row_offsets[i];
+    const uint8_t* row = rows.var_length_rows() + row_offsets[i];
     const uint32_t* varbinary_ends = rows.metadata().varbinary_end_array(row);
 
     // Update the offset of each column
@@ -728,7 +722,7 @@ void EncoderOffsets::EncodeSelectedImp(uint32_t ivarbinary, RowTableImpl* rows,
                                        const std::vector<KeyColumnArray>& cols,
                                        uint32_t num_selected, const uint16_t* selection) {
   const RowTableImpl::offset_type* row_offsets = rows->offsets();
-  uint8_t* row_base = rows->mutable_data(2) +
+  uint8_t* row_base = rows->mutable_var_length_rows() +
                       rows->metadata().varbinary_end_array_offset +
                       ivarbinary * sizeof(uint32_t);
   const uint32_t* col_offsets = cols[ivarbinary].offsets();
@@ -824,8 +818,6 @@ void EncoderNulls::Decode(uint32_t start_row, uint32_t num_rows, const RowTableI
     DCHECK(col.mutable_data(0) || col.metadata().is_null_type);
   }
 
-  const uint8_t* null_masks = rows.null_masks();
-  uint32_t null_masks_bytes_per_row = rows.metadata().null_masks_bytes_per_row;
   for (size_t col = 0; col < cols->size(); ++col) {
     if ((*cols)[col].metadata().is_null_type) {
       continue;
@@ -839,9 +831,7 @@ void EncoderNulls::Decode(uint32_t start_row, uint32_t num_rows, const RowTableI
       memset(non_nulls + 1, 0xff, bit_util::BytesForBits(num_rows - bits_in_first_byte));
     }
     for (uint32_t row = 0; row < num_rows; ++row) {
-      uint32_t null_masks_bit_id =
-          (start_row + row) * null_masks_bytes_per_row * 8 + static_cast<uint32_t>(col);
-      bool is_set = bit_util::GetBit(null_masks, null_masks_bit_id);
+      bool is_set = rows.is_null(start_row + row, static_cast<uint32_t>(col));
       if (is_set) {
         bit_util::ClearBit(non_nulls, bit_offset + row);
       }
@@ -853,7 +843,7 @@ void EncoderVarBinary::EncodeSelected(uint32_t ivarbinary, RowTableImpl* rows,
                                       const KeyColumnArray& cols, uint32_t num_selected,
                                       const uint16_t* selection) {
   const RowTableImpl::offset_type* row_offsets = rows->offsets();
-  uint8_t* row_base = rows->mutable_data(2);
+  uint8_t* row_base = rows->mutable_var_length_rows();
   const uint32_t* col_offsets = cols.offsets();
   const uint8_t* col_base = cols.data(2);
 
@@ -882,7 +872,7 @@ void EncoderVarBinary::EncodeSelected(uint32_t ivarbinary, RowTableImpl* rows,
 void EncoderNulls::EncodeSelected(RowTableImpl* rows,
                                   const std::vector<KeyColumnArray>& cols,
                                   uint32_t num_selected, const uint16_t* selection) {
-  uint8_t* null_masks = rows->null_masks();
+  uint8_t* null_masks = rows->mutable_null_masks(/*row_id=*/0);
   uint32_t null_mask_num_bytes = rows->metadata().null_masks_bytes_per_row;
   memset(null_masks, 0, null_mask_num_bytes * num_selected);
   for (size_t icol = 0; icol < cols.size(); ++icol) {
diff --git a/cpp/src/arrow/compute/row/encode_internal.h b/cpp/src/arrow/compute/row/encode_internal.h
index 37538fcc4b835..5ad82e0c8e749 100644
--- a/cpp/src/arrow/compute/row/encode_internal.h
+++ b/cpp/src/arrow/compute/row/encode_internal.h
@@ -164,11 +164,10 @@ class EncoderBinary {
     uint32_t col_width = col_const->metadata().fixed_length;
 
     if (is_row_fixed_length) {
-      uint32_t row_width = rows_const->metadata().fixed_length;
       for (uint32_t i = 0; i < num_rows; ++i) {
         const uint8_t* src;
         uint8_t* dst;
-        src = rows_const->data(1) + row_width * (start_row + i) + offset_within_row;
+        src = rows_const->fixed_length_rows(start_row + i) + offset_within_row;
         dst = col_mutable_maybe_null->mutable_data(1) + col_width * i;
         copy_fn(dst, src, col_width);
       }
@@ -177,7 +176,8 @@ class EncoderBinary {
       for (uint32_t i = 0; i < num_rows; ++i) {
         const uint8_t* src;
         uint8_t* dst;
-        src = rows_const->data(2) + row_offsets[start_row + i] + offset_within_row;
+        src = rows_const->var_length_rows() + row_offsets[start_row + i] +
+              offset_within_row;
         dst = col_mutable_maybe_null->mutable_data(1) + col_width * i;
         copy_fn(dst, src, col_width);
       }
@@ -277,7 +277,7 @@ class EncoderVarBinary {
       col_offset_next = col_offsets[i + 1];
 
       RowTableImpl::offset_type row_offset = row_offsets_for_batch[i];
-      const uint8_t* row = rows_const->data(2) + row_offset;
+      const uint8_t* row = rows_const->var_length_rows() + row_offset;
 
       uint32_t offset_within_row;
       uint32_t length;
@@ -293,7 +293,7 @@ class EncoderVarBinary {
 
       const uint8_t* src;
       uint8_t* dst;
-      src = rows_const->data(2) + row_offset;
+      src = rows_const->var_length_rows() + row_offset;
       dst = col_mutable_maybe_null->mutable_data(2) + col_offset;
       copy_fn(dst, src, length);
     }
diff --git a/cpp/src/arrow/compute/row/encode_internal_avx2.cc b/cpp/src/arrow/compute/row/encode_internal_avx2.cc
index d2e317deb890c..650d24b8efc51 100644
--- a/cpp/src/arrow/compute/row/encode_internal_avx2.cc
+++ b/cpp/src/arrow/compute/row/encode_internal_avx2.cc
@@ -75,14 +75,9 @@ uint32_t EncoderBinaryPair::DecodeImp_avx2(uint32_t start_row, uint32_t num_rows
 
   uint32_t fixed_length = rows.metadata().fixed_length;
   const RowTableImpl::offset_type* offsets;
-  const uint8_t* src_base;
   if (is_row_fixed_length) {
-    src_base = rows.data(1) +
-               static_cast<RowTableImpl::offset_type>(fixed_length) * start_row +
-               offset_within_row;
     offsets = nullptr;
   } else {
-    src_base = rows.data(2) + offset_within_row;
     offsets = rows.offsets() + start_row;
   }
 
@@ -94,14 +89,15 @@ uint32_t EncoderBinaryPair::DecodeImp_avx2(uint32_t start_row, uint32_t num_rows
     for (uint32_t i = 0; i < num_rows / unroll; ++i) {
       const __m128i *src0, *src1, *src2, *src3;
       if (is_row_fixed_length) {
-        const uint8_t* src = src_base + (i * unroll) * fixed_length;
+        const uint8_t* src =
+            rows.fixed_length_rows(start_row + i * unroll) + offset_within_row;
         src0 = reinterpret_cast<const __m128i*>(src);
         src1 = reinterpret_cast<const __m128i*>(src + fixed_length);
         src2 = reinterpret_cast<const __m128i*>(src + fixed_length * 2);
         src3 = reinterpret_cast<const __m128i*>(src + fixed_length * 3);
       } else {
+        const uint8_t* src = rows.var_length_rows() + offset_within_row;
         const RowTableImpl::offset_type* row_offsets = offsets + i * unroll;
-        const uint8_t* src = src_base;
         src0 = reinterpret_cast<const __m128i*>(src + row_offsets[0]);
         src1 = reinterpret_cast<const __m128i*>(src + row_offsets[1]);
         src2 = reinterpret_cast<const __m128i*>(src + row_offsets[2]);
@@ -127,7 +123,8 @@ uint32_t EncoderBinaryPair::DecodeImp_avx2(uint32_t start_row, uint32_t num_rows
     uint8_t buffer[64];
     for (uint32_t i = 0; i < num_rows / unroll; ++i) {
       if (is_row_fixed_length) {
-        const uint8_t* src = src_base + (i * unroll) * fixed_length;
+        const uint8_t* src =
+            rows.fixed_length_rows(start_row + i * unroll) + offset_within_row;
         for (int j = 0; j < unroll; ++j) {
           if (col_width == 1) {
             reinterpret_cast<uint16_t*>(buffer)[j] =
@@ -141,8 +138,8 @@ uint32_t EncoderBinaryPair::DecodeImp_avx2(uint32_t start_row, uint32_t num_rows
           }
         }
       } else {
+        const uint8_t* src = rows.var_length_rows() + offset_within_row;
         const RowTableImpl::offset_type* row_offsets = offsets + i * unroll;
-        const uint8_t* src = src_base;
         for (int j = 0; j < unroll; ++j) {
           if (col_width == 1) {
             reinterpret_cast<uint16_t*>(buffer)[j] =
diff --git a/cpp/src/arrow/compute/row/grouper_test.cc b/cpp/src/arrow/compute/row/grouper_test.cc
index fcee46863fdf8..0b8d8da0a6b7d 100644
--- a/cpp/src/arrow/compute/row/grouper_test.cc
+++ b/cpp/src/arrow/compute/row/grouper_test.cc
@@ -17,13 +17,28 @@
 
 #include <numeric>
 
+#include <gtest/gtest.h>
+
+#include "arrow/array.h"
+#include "arrow/array/util.h"
+#include "arrow/compute/api_vector.h"
 #include "arrow/compute/exec.h"
 #include "arrow/compute/row/grouper.h"
+#include "arrow/compute/row/grouper_internal.h"
+#include "arrow/compute/test_util_internal.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/matchers.h"
 #include "arrow/testing/random.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/string.h"
+
+namespace arrow::compute {
 
-namespace arrow {
-namespace compute {
+using ::arrow::internal::checked_pointer_cast;
+using ::arrow::internal::ToChars;
+using ::testing::Eq;
+using ::testing::HasSubstr;
 
 // Specialized case for GH-40997
 TEST(Grouper, ResortedColumnsWithLargeNullRows) {
@@ -86,5 +101,922 @@ TEST(Grouper, EmptyGroups) {
   ASSERT_EQ(groups[1].array()->buffers[2]->size(), 0);
 }
 
-}  // namespace compute
-}  // namespace arrow
+template <typename GroupClass>
+void TestGroupClassSupportedKeys(
+    std::function<Result<std::unique_ptr<GroupClass>>(const std::vector<TypeHolder>&)>
+        make_func) {
+  ASSERT_OK(make_func({boolean()}));
+
+  ASSERT_OK(make_func({int8(), uint16(), int32(), uint64()}));
+
+  ASSERT_OK(make_func({dictionary(int64(), utf8())}));
+
+  ASSERT_OK(make_func({float16(), float32(), float64()}));
+
+  ASSERT_OK(make_func({utf8(), binary(), large_utf8(), large_binary()}));
+
+  ASSERT_OK(make_func({fixed_size_binary(16), fixed_size_binary(32)}));
+
+  ASSERT_OK(make_func({decimal128(32, 10), decimal256(76, 20)}));
+
+  ASSERT_OK(make_func({date32(), date64()}));
+
+  for (auto unit : {
+           TimeUnit::SECOND,
+           TimeUnit::MILLI,
+           TimeUnit::MICRO,
+           TimeUnit::NANO,
+       }) {
+    ASSERT_OK(make_func({timestamp(unit), duration(unit)}));
+  }
+
+  ASSERT_OK(
+      make_func({day_time_interval(), month_interval(), month_day_nano_interval()}));
+
+  ASSERT_OK(make_func({null()}));
+
+  ASSERT_RAISES(NotImplemented, make_func({struct_({field("", int64())})}));
+
+  ASSERT_RAISES(NotImplemented, make_func({struct_({})}));
+
+  ASSERT_RAISES(NotImplemented, make_func({list(int32())}));
+
+  ASSERT_RAISES(NotImplemented, make_func({fixed_size_list(int32(), 5)}));
+
+  ASSERT_RAISES(NotImplemented, make_func({dense_union({field("", int32())})}));
+}
+
+void TestSegments(std::unique_ptr<RowSegmenter>& segmenter, const ExecSpan& batch,
+                  std::vector<Segment> expected_segments) {
+  ASSERT_OK_AND_ASSIGN(auto actual_segments, segmenter->GetSegments(batch));
+  ASSERT_EQ(actual_segments.size(), expected_segments.size());
+  for (size_t i = 0; i < actual_segments.size(); ++i) {
+    SCOPED_TRACE("segment #" + ToChars(i));
+    ASSERT_EQ(actual_segments[i], expected_segments[i]);
+  }
+}
+
+Result<std::unique_ptr<Grouper>> MakeGrouper(const std::vector<TypeHolder>& key_types) {
+  return Grouper::Make(key_types, default_exec_context());
+}
+
+Result<std::unique_ptr<RowSegmenter>> MakeRowSegmenter(
+    const std::vector<TypeHolder>& key_types) {
+  return RowSegmenter::Make(key_types, /*nullable_leys=*/false, default_exec_context());
+}
+
+Result<std::unique_ptr<RowSegmenter>> MakeGenericSegmenter(
+    const std::vector<TypeHolder>& key_types) {
+  return MakeAnyKeysSegmenter(key_types, default_exec_context());
+}
+
+TEST(RowSegmenter, SupportedKeys) {
+  TestGroupClassSupportedKeys<RowSegmenter>(MakeRowSegmenter);
+}
+
+TEST(RowSegmenter, Basics) {
+  std::vector<TypeHolder> bad_types2 = {int32(), float32()};
+  std::vector<TypeHolder> types2 = {int32(), int32()};
+  std::vector<TypeHolder> bad_types1 = {float32()};
+  std::vector<TypeHolder> types1 = {int32()};
+  std::vector<TypeHolder> types0 = {};
+  auto batch2 = ExecBatchFromJSON(types2, "[[1, 1], [1, 2], [2, 2]]");
+  auto batch1 = ExecBatchFromJSON(types1, "[[1], [1], [2]]");
+  ExecBatch batch0({}, 3);
+  {
+    SCOPED_TRACE("types0 segmenting of batch2");
+    ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types0));
+    ExecSpan span2(batch2);
+    EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, HasSubstr("expected batch size 0 "),
+                                    segmenter->GetSegments(span2));
+    ExecSpan span0(batch0);
+    TestSegments(segmenter, span0, {{0, 3, true, true}});
+  }
+  {
+    SCOPED_TRACE("bad_types1 segmenting of batch1");
+    ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(bad_types1));
+    ExecSpan span1(batch1);
+    EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, HasSubstr("expected batch value 0 of type "),
+                                    segmenter->GetSegments(span1));
+  }
+  {
+    SCOPED_TRACE("types1 segmenting of batch2");
+    ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types1));
+    ExecSpan span2(batch2);
+    EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, HasSubstr("expected batch size 1 "),
+                                    segmenter->GetSegments(span2));
+    ExecSpan span1(batch1);
+    TestSegments(segmenter, span1, {{0, 2, false, true}, {2, 1, true, false}});
+  }
+  {
+    SCOPED_TRACE("bad_types2 segmenting of batch2");
+    ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(bad_types2));
+    ExecSpan span2(batch2);
+    EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, HasSubstr("expected batch value 1 of type "),
+                                    segmenter->GetSegments(span2));
+  }
+  {
+    SCOPED_TRACE("types2 segmenting of batch1");
+    ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types2));
+    ExecSpan span1(batch1);
+    EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, HasSubstr("expected batch size 2 "),
+                                    segmenter->GetSegments(span1));
+    ExecSpan span2(batch2);
+    TestSegments(segmenter, span2,
+                 {{0, 1, false, true}, {1, 1, false, false}, {2, 1, true, false}});
+  }
+}
+
+TEST(RowSegmenter, NonOrdered) {
+  for (int num_keys = 1; num_keys <= 2; ++num_keys) {
+    SCOPED_TRACE("non-ordered " + ToChars(num_keys) + " int32(s)");
+    std::vector<TypeHolder> types(num_keys, int32());
+    std::vector<Datum> values(num_keys, ArrayFromJSON(int32(), "[1, 1, 2, 1, 2]"));
+    ExecBatch batch(std::move(values), 5);
+    ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types));
+    TestSegments(segmenter, ExecSpan(batch),
+                 {{0, 2, false, true},
+                  {2, 1, false, false},
+                  {3, 1, false, false},
+                  {4, 1, true, false}});
+  }
+}
+
+TEST(RowSegmenter, EmptyBatches) {
+  {
+    SCOPED_TRACE("empty batches {int32}");
+    std::vector<TypeHolder> types = {int32()};
+    std::vector<ExecBatch> batches = {
+        ExecBatchFromJSON(types, "[]"),         ExecBatchFromJSON(types, "[]"),
+        ExecBatchFromJSON(types, "[[1]]"),      ExecBatchFromJSON(types, "[]"),
+        ExecBatchFromJSON(types, "[[1]]"),      ExecBatchFromJSON(types, "[]"),
+        ExecBatchFromJSON(types, "[[2], [2]]"), ExecBatchFromJSON(types, "[]"),
+    };
+    ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types));
+    TestSegments(segmenter, ExecSpan(batches[0]), {});
+    TestSegments(segmenter, ExecSpan(batches[1]), {});
+    TestSegments(segmenter, ExecSpan(batches[2]), {{0, 1, true, true}});
+    TestSegments(segmenter, ExecSpan(batches[3]), {});
+    TestSegments(segmenter, ExecSpan(batches[4]), {{0, 1, true, true}});
+    TestSegments(segmenter, ExecSpan(batches[5]), {});
+    TestSegments(segmenter, ExecSpan(batches[6]), {{0, 2, true, false}});
+    TestSegments(segmenter, ExecSpan(batches[7]), {});
+  }
+  {
+    SCOPED_TRACE("empty batches {int32, int32}");
+    std::vector<TypeHolder> types = {int32(), int32()};
+    std::vector<ExecBatch> batches = {
+        ExecBatchFromJSON(types, "[]"),
+        ExecBatchFromJSON(types, "[]"),
+        ExecBatchFromJSON(types, "[[1, 1]]"),
+        ExecBatchFromJSON(types, "[]"),
+        ExecBatchFromJSON(types, "[[1, 1]]"),
+        ExecBatchFromJSON(types, "[]"),
+        ExecBatchFromJSON(types, "[[2, 2], [2, 2]]"),
+        ExecBatchFromJSON(types, "[]"),
+    };
+    ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types));
+    TestSegments(segmenter, ExecSpan(batches[0]), {});
+    TestSegments(segmenter, ExecSpan(batches[1]), {});
+    TestSegments(segmenter, ExecSpan(batches[2]), {{0, 1, true, true}});
+    TestSegments(segmenter, ExecSpan(batches[3]), {});
+    TestSegments(segmenter, ExecSpan(batches[4]), {{0, 1, true, true}});
+    TestSegments(segmenter, ExecSpan(batches[5]), {});
+    TestSegments(segmenter, ExecSpan(batches[6]), {{0, 2, true, false}});
+    TestSegments(segmenter, ExecSpan(batches[7]), {});
+  }
+}
+
+TEST(RowSegmenter, MultipleSegments) {
+  auto test_with_keys = [](int num_keys, const std::shared_ptr<Array>& key) {
+    SCOPED_TRACE("multiple segments " + ToChars(num_keys) + " " +
+                 key->type()->ToString());
+    std::vector<TypeHolder> types(num_keys, key->type());
+    std::vector<Datum> values(num_keys, key);
+    ExecBatch batch(std::move(values), key->length());
+    ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types));
+    TestSegments(segmenter, ExecSpan(batch),
+                 {{0, 2, false, true},
+                  {2, 1, false, false},
+                  {3, 1, false, false},
+                  {4, 2, false, false},
+                  {6, 2, false, false},
+                  {8, 1, true, false}});
+  };
+  for (int num_keys = 1; num_keys <= 2; ++num_keys) {
+    test_with_keys(num_keys, ArrayFromJSON(int32(), "[1, 1, 2, 5, 3, 3, 5, 5, 4]"));
+    test_with_keys(
+        num_keys,
+        ArrayFromJSON(fixed_size_binary(2),
+                      R"(["aa", "aa", "bb", "ee", "cc", "cc", "ee", "ee", "dd"])"));
+    test_with_keys(num_keys, DictArrayFromJSON(dictionary(int8(), utf8()),
+                                               "[0, 0, 1, 4, 2, 2, 4, 4, 3]",
+                                               R"(["a", "b", "c", "d", "e"])"));
+  }
+}
+
+TEST(RowSegmenter, MultipleSegmentsMultipleBatches) {
+  {
+    SCOPED_TRACE("multiple segments multiple batches {int32}");
+    std::vector<TypeHolder> types = {int32()};
+    std::vector<ExecBatch> batches = {
+        ExecBatchFromJSON(types, "[[1]]"), ExecBatchFromJSON(types, "[[1], [2]]"),
+        ExecBatchFromJSON(types, "[[5], [3]]"),
+        ExecBatchFromJSON(types, "[[3], [5], [5]]"), ExecBatchFromJSON(types, "[[4]]")};
+
+    ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types));
+    TestSegments(segmenter, ExecSpan(batches[0]), {{0, 1, true, true}});
+    TestSegments(segmenter, ExecSpan(batches[1]),
+                 {{0, 1, false, true}, {1, 1, true, false}});
+    TestSegments(segmenter, ExecSpan(batches[2]),
+                 {{0, 1, false, false}, {1, 1, true, false}});
+    TestSegments(segmenter, ExecSpan(batches[3]),
+                 {{0, 1, false, true}, {1, 2, true, false}});
+    TestSegments(segmenter, ExecSpan(batches[4]), {{0, 1, true, false}});
+  }
+  {
+    SCOPED_TRACE("multiple segments multiple batches {int32, int32}");
+    std::vector<TypeHolder> types = {int32(), int32()};
+    std::vector<ExecBatch> batches = {
+        ExecBatchFromJSON(types, "[[1, 1]]"),
+        ExecBatchFromJSON(types, "[[1, 1], [2, 2]]"),
+        ExecBatchFromJSON(types, "[[5, 5], [3, 3]]"),
+        ExecBatchFromJSON(types, "[[3, 3], [5, 5], [5, 5]]"),
+        ExecBatchFromJSON(types, "[[4, 4]]")};
+
+    ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types));
+    TestSegments(segmenter, ExecSpan(batches[0]), {{0, 1, true, true}});
+    TestSegments(segmenter, ExecSpan(batches[1]),
+                 {{0, 1, false, true}, {1, 1, true, false}});
+    TestSegments(segmenter, ExecSpan(batches[2]),
+                 {{0, 1, false, false}, {1, 1, true, false}});
+    TestSegments(segmenter, ExecSpan(batches[3]),
+                 {{0, 1, false, true}, {1, 2, true, false}});
+    TestSegments(segmenter, ExecSpan(batches[4]), {{0, 1, true, false}});
+  }
+}
+
+void TestRowSegmenterConstantBatch(
+    const std::shared_ptr<DataType>& type,
+    std::function<ArgShape(int64_t key)> shape_func,
+    std::function<Result<std::shared_ptr<Scalar>>(int64_t key)> value_func,
+    std::function<Result<std::unique_ptr<RowSegmenter>>(const std::vector<TypeHolder>&)>
+        make_segmenter) {
+  constexpr int64_t n_keys = 3, n_rows = 3, repetitions = 3;
+  std::vector<TypeHolder> types(n_keys, type);
+  std::vector<Datum> full_values(n_keys);
+  for (int64_t i = 0; i < n_keys; i++) {
+    auto shape = shape_func(i);
+    ASSERT_OK_AND_ASSIGN(auto scalar, value_func(i));
+    if (shape == ArgShape::SCALAR) {
+      full_values[i] = std::move(scalar);
+    } else {
+      ASSERT_OK_AND_ASSIGN(full_values[i], MakeArrayFromScalar(*scalar, n_rows));
+    }
+  }
+  auto test_with_keys = [&](int64_t keys) -> Status {
+    SCOPED_TRACE("constant-batch with " + ToChars(keys) + " key(s)");
+    std::vector<Datum> values(full_values.begin(), full_values.begin() + keys);
+    ExecBatch batch(values, n_rows);
+    std::vector<TypeHolder> key_types(types.begin(), types.begin() + keys);
+    ARROW_ASSIGN_OR_RAISE(auto segmenter, make_segmenter(key_types));
+    for (int64_t i = 0; i < repetitions; i++) {
+      TestSegments(segmenter, ExecSpan(batch), {{0, n_rows, true, true}});
+      ARROW_RETURN_NOT_OK(segmenter->Reset());
+    }
+    return Status::OK();
+  };
+  for (int64_t i = 0; i <= n_keys; i++) {
+    ASSERT_OK(test_with_keys(i));
+  }
+}
+
+TEST(RowSegmenter, ConstantArrayBatch) {
+  TestRowSegmenterConstantBatch(
+      int32(), [](int64_t key) { return ArgShape::ARRAY; },
+      [](int64_t key) { return MakeScalar(1); }, MakeRowSegmenter);
+}
+
+TEST(RowSegmenter, ConstantScalarBatch) {
+  TestRowSegmenterConstantBatch(
+      int32(), [](int64_t key) { return ArgShape::SCALAR; },
+      [](int64_t key) { return MakeScalar(1); }, MakeRowSegmenter);
+}
+
+TEST(RowSegmenter, ConstantMixedBatch) {
+  TestRowSegmenterConstantBatch(
+      int32(),
+      [](int64_t key) { return key % 2 == 0 ? ArgShape::SCALAR : ArgShape::ARRAY; },
+      [](int64_t key) { return MakeScalar(1); }, MakeRowSegmenter);
+}
+
+TEST(RowSegmenter, ConstantArrayBatchWithAnyKeysSegmenter) {
+  TestRowSegmenterConstantBatch(
+      int32(), [](int64_t key) { return ArgShape::ARRAY; },
+      [](int64_t key) { return MakeScalar(1); }, MakeGenericSegmenter);
+}
+
+TEST(RowSegmenter, ConstantScalarBatchWithAnyKeysSegmenter) {
+  TestRowSegmenterConstantBatch(
+      int32(), [](int64_t key) { return ArgShape::SCALAR; },
+      [](int64_t key) { return MakeScalar(1); }, MakeGenericSegmenter);
+}
+
+TEST(RowSegmenter, ConstantMixedBatchWithAnyKeysSegmenter) {
+  TestRowSegmenterConstantBatch(
+      int32(),
+      [](int64_t key) { return key % 2 == 0 ? ArgShape::SCALAR : ArgShape::ARRAY; },
+      [](int64_t key) { return MakeScalar(1); }, MakeGenericSegmenter);
+}
+
+TEST(RowSegmenter, ConstantFixedSizeBinaryArrayBatch) {
+  constexpr int fsb = 8;
+  auto type = fixed_size_binary(fsb);
+  ASSERT_OK_AND_ASSIGN(auto value, MakeScalar(type, std::string(fsb, 'X')));
+  TestRowSegmenterConstantBatch(
+      type, [](int64_t key) { return ArgShape::ARRAY; },
+      [&](int64_t key) { return value; }, MakeRowSegmenter);
+}
+
+TEST(RowSegmenter, ConstantFixedSizeBinaryScalarBatch) {
+  constexpr int fsb = 8;
+  auto type = fixed_size_binary(fsb);
+  ASSERT_OK_AND_ASSIGN(auto value, MakeScalar(type, std::string(fsb, 'X')));
+  TestRowSegmenterConstantBatch(
+      fixed_size_binary(8), [](int64_t key) { return ArgShape::SCALAR; },
+      [&](int64_t key) { return value; }, MakeRowSegmenter);
+}
+
+TEST(RowSegmenter, ConstantFixedSizeBinaryMixedBatch) {
+  constexpr int fsb = 8;
+  auto type = fixed_size_binary(fsb);
+  ASSERT_OK_AND_ASSIGN(auto value, MakeScalar(type, std::string(fsb, 'X')));
+  TestRowSegmenterConstantBatch(
+      fixed_size_binary(8),
+      [](int64_t key) { return key % 2 == 0 ? ArgShape::SCALAR : ArgShape::ARRAY; },
+      [&](int64_t key) { return value; }, MakeRowSegmenter);
+}
+
+TEST(RowSegmenter, ConstantFixedSizeBinaryArrayBatchWithAnyKeysSegmenter) {
+  constexpr int fsb = 8;
+  auto type = fixed_size_binary(fsb);
+  ASSERT_OK_AND_ASSIGN(auto value, MakeScalar(type, std::string(fsb, 'X')));
+  TestRowSegmenterConstantBatch(
+      type, [](int64_t key) { return ArgShape::ARRAY; },
+      [&](int64_t key) { return value; }, MakeGenericSegmenter);
+}
+
+TEST(RowSegmenter, ConstantFixedSizeBinaryScalarBatchWithAnyKeysSegmenter) {
+  constexpr int fsb = 8;
+  auto type = fixed_size_binary(fsb);
+  ASSERT_OK_AND_ASSIGN(auto value, MakeScalar(type, std::string(fsb, 'X')));
+  TestRowSegmenterConstantBatch(
+      fixed_size_binary(8), [](int64_t key) { return ArgShape::SCALAR; },
+      [&](int64_t key) { return value; }, MakeGenericSegmenter);
+}
+
+TEST(RowSegmenter, ConstantFixedSizeBinaryMixedBatchWithAnyKeysSegmenter) {
+  constexpr int fsb = 8;
+  auto type = fixed_size_binary(fsb);
+  ASSERT_OK_AND_ASSIGN(auto value, MakeScalar(type, std::string(fsb, 'X')));
+  TestRowSegmenterConstantBatch(
+      fixed_size_binary(8),
+      [](int64_t key) { return key % 2 == 0 ? ArgShape::SCALAR : ArgShape::ARRAY; },
+      [&](int64_t key) { return value; }, MakeGenericSegmenter);
+}
+
+TEST(RowSegmenter, ConstantDictionaryArrayBatch) {
+  auto index_type = int32();
+  auto value_type = utf8();
+  auto dict_type = dictionary(index_type, value_type);
+  auto dict = ArrayFromJSON(value_type, R"(["alpha", null, "gamma"])");
+  ASSERT_OK_AND_ASSIGN(auto index_value, MakeScalar(index_type, 0));
+  auto dict_value = DictionaryScalar::Make(std::move(index_value), dict);
+  TestRowSegmenterConstantBatch(
+      dict_type, [](int64_t key) { return ArgShape::ARRAY; },
+      [&](int64_t key) { return dict_value; }, MakeRowSegmenter);
+}
+
+TEST(RowSegmenter, ConstantDictionaryScalarBatch) {
+  auto index_type = int32();
+  auto value_type = utf8();
+  auto dict_type = dictionary(index_type, value_type);
+  auto dict = ArrayFromJSON(value_type, R"(["alpha", null, "gamma"])");
+  ASSERT_OK_AND_ASSIGN(auto index_value, MakeScalar(index_type, 0));
+  auto dict_value = DictionaryScalar::Make(std::move(index_value), dict);
+  TestRowSegmenterConstantBatch(
+      dict_type, [](int64_t key) { return ArgShape::SCALAR; },
+      [&](int64_t key) { return dict_value; }, MakeRowSegmenter);
+}
+
+TEST(RowSegmenter, ConstantDictionaryMixedBatch) {
+  auto index_type = int32();
+  auto value_type = utf8();
+  auto dict_type = dictionary(index_type, value_type);
+  auto dict = ArrayFromJSON(value_type, R"(["alpha", null, "gamma"])");
+  ASSERT_OK_AND_ASSIGN(auto index_value, MakeScalar(index_type, 0));
+  auto dict_value = DictionaryScalar::Make(std::move(index_value), dict);
+  TestRowSegmenterConstantBatch(
+      dict_type,
+      [](int64_t key) { return key % 2 == 0 ? ArgShape::SCALAR : ArgShape::ARRAY; },
+      [&](int64_t key) { return dict_value; }, MakeRowSegmenter);
+}
+
+TEST(RowSegmenter, ConstantDictionaryArrayBatchWithAnyKeysSegmenter) {
+  auto index_type = int32();
+  auto value_type = utf8();
+  auto dict_type = dictionary(index_type, value_type);
+  auto dict = ArrayFromJSON(value_type, R"(["alpha", null, "gamma"])");
+  ASSERT_OK_AND_ASSIGN(auto index_value, MakeScalar(index_type, 0));
+  auto dict_value = DictionaryScalar::Make(std::move(index_value), dict);
+  TestRowSegmenterConstantBatch(
+      dict_type, [](int64_t key) { return ArgShape::ARRAY; },
+      [&](int64_t key) { return dict_value; }, MakeGenericSegmenter);
+}
+
+TEST(RowSegmenter, ConstantDictionaryScalarBatchWithAnyKeysSegmenter) {
+  auto index_type = int32();
+  auto value_type = utf8();
+  auto dict_type = dictionary(index_type, value_type);
+  auto dict = ArrayFromJSON(value_type, R"(["alpha", null, "gamma"])");
+  ASSERT_OK_AND_ASSIGN(auto index_value, MakeScalar(index_type, 0));
+  auto dict_value = DictionaryScalar::Make(std::move(index_value), dict);
+  TestRowSegmenterConstantBatch(
+      dict_type, [](int64_t key) { return ArgShape::SCALAR; },
+      [&](int64_t key) { return dict_value; }, MakeGenericSegmenter);
+}
+
+TEST(RowSegmenter, ConstantDictionaryMixedBatchWithAnyKeysSegmenter) {
+  auto index_type = int32();
+  auto value_type = utf8();
+  auto dict_type = dictionary(index_type, value_type);
+  auto dict = ArrayFromJSON(value_type, R"(["alpha", null, "gamma"])");
+  ASSERT_OK_AND_ASSIGN(auto index_value, MakeScalar(index_type, 0));
+  auto dict_value = DictionaryScalar::Make(std::move(index_value), dict);
+  TestRowSegmenterConstantBatch(
+      dict_type,
+      [](int64_t key) { return key % 2 == 0 ? ArgShape::SCALAR : ArgShape::ARRAY; },
+      [&](int64_t key) { return dict_value; }, MakeGenericSegmenter);
+}
+
+TEST(RowSegmenter, RowConstantBatch) {
+  constexpr size_t n = 3;
+  std::vector<TypeHolder> types = {int32(), int32(), int32()};
+  auto full_batch = ExecBatchFromJSON(types, "[[1, 1, 1], [2, 2, 2], [3, 3, 3]]");
+  std::vector<Segment> expected_segments_for_size_0 = {{0, 3, true, true}};
+  std::vector<Segment> expected_segments = {
+      {0, 1, false, true}, {1, 1, false, false}, {2, 1, true, false}};
+  auto test_by_size = [&](size_t size) -> Status {
+    SCOPED_TRACE("constant-batch with " + ToChars(size) + " key(s)");
+    std::vector<Datum> values(full_batch.values.begin(),
+                              full_batch.values.begin() + size);
+    ExecBatch batch(values, full_batch.length);
+    std::vector<TypeHolder> key_types(types.begin(), types.begin() + size);
+    ARROW_ASSIGN_OR_RAISE(auto segmenter, MakeRowSegmenter(key_types));
+    TestSegments(segmenter, ExecSpan(batch),
+                 size == 0 ? expected_segments_for_size_0 : expected_segments);
+    return Status::OK();
+  };
+  for (size_t i = 0; i <= n; i++) {
+    ASSERT_OK(test_by_size(i));
+  }
+}
+
+TEST(Grouper, SupportedKeys) { TestGroupClassSupportedKeys<Grouper>(MakeGrouper); }
+
+struct TestGrouper {
+  explicit TestGrouper(std::vector<TypeHolder> types, std::vector<ArgShape> shapes = {})
+      : types_(std::move(types)), shapes_(std::move(shapes)) {
+    grouper_ = Grouper::Make(types_).ValueOrDie();
+
+    FieldVector fields;
+    for (const auto& type : types_) {
+      fields.push_back(field("", type.GetSharedPtr()));
+    }
+    key_schema_ = schema(std::move(fields));
+  }
+
+  void ExpectConsume(const std::string& key_json, const std::string& expected) {
+    auto expected_arr = ArrayFromJSON(uint32(), expected);
+    if (shapes_.size() > 0) {
+      ExpectConsume(ExecBatchFromJSON(types_, shapes_, key_json), expected_arr);
+    } else {
+      ExpectConsume(ExecBatchFromJSON(types_, key_json), expected_arr);
+    }
+  }
+
+  void ExpectConsume(const std::vector<Datum>& key_values, Datum expected) {
+    ASSERT_OK_AND_ASSIGN(auto key_batch, ExecBatch::Make(key_values));
+    ExpectConsume(key_batch, expected);
+  }
+
+  void ExpectConsume(const ExecBatch& key_batch, Datum expected) {
+    Datum ids;
+    ConsumeAndValidate(key_batch, &ids);
+    AssertEquivalentIds(expected, ids);
+  }
+
+  void ExpectUniques(const ExecBatch& uniques) {
+    EXPECT_THAT(grouper_->GetUniques(), ResultWith(Eq(uniques)));
+  }
+
+  void ExpectUniques(const std::string& uniques_json) {
+    if (shapes_.size() > 0) {
+      ExpectUniques(ExecBatchFromJSON(types_, shapes_, uniques_json));
+    } else {
+      ExpectUniques(ExecBatchFromJSON(types_, uniques_json));
+    }
+  }
+
+  void AssertEquivalentIds(const Datum& expected, const Datum& actual) {
+    auto left = expected.make_array();
+    auto right = actual.make_array();
+    ASSERT_EQ(left->length(), right->length()) << "#ids unequal";
+    int64_t num_ids = left->length();
+    auto left_data = left->data();
+    auto right_data = right->data();
+    auto left_ids = reinterpret_cast<const uint32_t*>(left_data->buffers[1]->data());
+    auto right_ids = reinterpret_cast<const uint32_t*>(right_data->buffers[1]->data());
+    uint32_t max_left_id = 0;
+    uint32_t max_right_id = 0;
+    for (int64_t i = 0; i < num_ids; ++i) {
+      if (left_ids[i] > max_left_id) {
+        max_left_id = left_ids[i];
+      }
+      if (right_ids[i] > max_right_id) {
+        max_right_id = right_ids[i];
+      }
+    }
+    std::vector<bool> right_to_left_present(max_right_id + 1, false);
+    std::vector<bool> left_to_right_present(max_left_id + 1, false);
+    std::vector<uint32_t> right_to_left(max_right_id + 1);
+    std::vector<uint32_t> left_to_right(max_left_id + 1);
+    for (int64_t i = 0; i < num_ids; ++i) {
+      uint32_t left_id = left_ids[i];
+      uint32_t right_id = right_ids[i];
+      if (!left_to_right_present[left_id]) {
+        left_to_right[left_id] = right_id;
+        left_to_right_present[left_id] = true;
+      }
+      if (!right_to_left_present[right_id]) {
+        right_to_left[right_id] = left_id;
+        right_to_left_present[right_id] = true;
+      }
+      ASSERT_EQ(left_id, right_to_left[right_id]);
+      ASSERT_EQ(right_id, left_to_right[left_id]);
+    }
+  }
+
+  void ConsumeAndValidate(const ExecBatch& key_batch, Datum* ids = nullptr) {
+    ASSERT_OK_AND_ASSIGN(Datum id_batch, grouper_->Consume(ExecSpan(key_batch)));
+
+    ValidateConsume(key_batch, id_batch);
+
+    if (ids) {
+      *ids = std::move(id_batch);
+    }
+  }
+
+  void ValidateConsume(const ExecBatch& key_batch, const Datum& id_batch) {
+    if (uniques_.length == -1) {
+      ASSERT_OK_AND_ASSIGN(uniques_, grouper_->GetUniques());
+    } else if (static_cast<int64_t>(grouper_->num_groups()) > uniques_.length) {
+      ASSERT_OK_AND_ASSIGN(ExecBatch new_uniques, grouper_->GetUniques());
+
+      // check that uniques_ are prefixes of new_uniques
+      for (int i = 0; i < uniques_.num_values(); ++i) {
+        auto new_unique = new_uniques[i].make_array();
+        ValidateOutput(*new_unique);
+
+        AssertDatumsEqual(uniques_[i], new_unique->Slice(0, uniques_.length),
+                          /*verbose=*/true);
+      }
+
+      uniques_ = std::move(new_uniques);
+    }
+
+    // check that the ids encode an equivalent key sequence
+    auto ids = id_batch.make_array();
+    ValidateOutput(*ids);
+
+    for (int i = 0; i < key_batch.num_values(); ++i) {
+      SCOPED_TRACE(ToChars(i) + "th key array");
+      auto original =
+          key_batch[i].is_array()
+              ? key_batch[i].make_array()
+              : *MakeArrayFromScalar(*key_batch[i].scalar(), key_batch.length);
+      ASSERT_OK_AND_ASSIGN(auto encoded, Take(*uniques_[i].make_array(), *ids));
+      AssertArraysEqual(*original, *encoded, /*verbose=*/true,
+                        EqualOptions().nans_equal(true));
+    }
+  }
+
+  std::vector<TypeHolder> types_;
+  std::vector<ArgShape> shapes_;
+  std::shared_ptr<Schema> key_schema_;
+  std::unique_ptr<Grouper> grouper_;
+  ExecBatch uniques_ = ExecBatch({}, -1);
+};
+
+TEST(Grouper, BooleanKey) {
+  TestGrouper g({boolean()});
+
+  g.ExpectConsume("[[true], [true]]", "[0, 0]");
+
+  g.ExpectConsume("[[true], [true]]", "[0, 0]");
+
+  g.ExpectConsume("[[false], [null]]", "[1, 2]");
+
+  g.ExpectConsume("[[true], [false], [true], [false], [null], [false], [null]]",
+                  "[0, 1, 0, 1, 2, 1, 2]");
+}
+
+TEST(Grouper, NumericKey) {
+  for (auto ty : {
+           uint8(),
+           int8(),
+           uint16(),
+           int16(),
+           uint32(),
+           int32(),
+           uint64(),
+           int64(),
+           float16(),
+           float32(),
+           float64(),
+       }) {
+    SCOPED_TRACE("key type: " + ty->ToString());
+
+    TestGrouper g({ty});
+
+    g.ExpectConsume("[[3], [3]]", "[0, 0]");
+    g.ExpectUniques("[[3]]");
+
+    g.ExpectConsume("[[3], [3]]", "[0, 0]");
+    g.ExpectUniques("[[3]]");
+
+    g.ExpectConsume("[[27], [81], [81]]", "[1, 2, 2]");
+    g.ExpectUniques("[[3], [27], [81]]");
+
+    g.ExpectConsume("[[3], [27], [3], [27], [null], [81], [27], [81]]",
+                    "[0, 1, 0, 1, 3, 2, 1, 2]");
+    g.ExpectUniques("[[3], [27], [81], [null]]");
+  }
+}
+
+TEST(Grouper, FloatingPointKey) {
+  TestGrouper g({float32()});
+
+  // -0.0 hashes differently from 0.0
+  g.ExpectConsume("[[0.0], [-0.0]]", "[0, 1]");
+
+  g.ExpectConsume("[[Inf], [-Inf]]", "[2, 3]");
+
+  // assert(!(NaN == NaN)) does not cause spurious new groups
+  g.ExpectConsume("[[NaN], [NaN]]", "[4, 4]");
+
+  // TODO(bkietz) test denormal numbers, more NaNs
+}
+
+TEST(Grouper, StringKey) {
+  for (auto ty : {utf8(), large_utf8(), fixed_size_binary(2)}) {
+    SCOPED_TRACE("key type: " + ty->ToString());
+
+    TestGrouper g({ty});
+
+    g.ExpectConsume(R"([["eh"], ["eh"]])", "[0, 0]");
+
+    g.ExpectConsume(R"([["eh"], ["eh"]])", "[0, 0]");
+
+    g.ExpectConsume(R"([["be"], [null]])", "[1, 2]");
+  }
+}
+
+TEST(Grouper, DictKey) {
+  TestGrouper g({dictionary(int32(), utf8())});
+
+  // For dictionary keys, all batches must share a single dictionary.
+  // Eventually, differing dictionaries will be unified and indices transposed
+  // during encoding to relieve this restriction.
+  const auto dict = ArrayFromJSON(utf8(), R"(["ex", "why", "zee", null])");
+
+  auto WithIndices = [&](const std::string& indices) {
+    return Datum(*DictionaryArray::FromArrays(ArrayFromJSON(int32(), indices), dict));
+  };
+
+  // NB: null index is not considered equivalent to index=3 (which encodes null in dict)
+  g.ExpectConsume({WithIndices("           [3, 1, null, 0, 2]")},
+                  ArrayFromJSON(uint32(), "[0, 1, 2, 3, 4]"));
+
+  g = TestGrouper({dictionary(int32(), utf8())});
+
+  g.ExpectConsume({WithIndices("           [0, 1, 2, 3, null]")},
+                  ArrayFromJSON(uint32(), "[0, 1, 2, 3, 4]"));
+
+  g.ExpectConsume({WithIndices("           [3, 1, null, 0, 2]")},
+                  ArrayFromJSON(uint32(), "[3, 1, 4,    0, 2]"));
+
+  auto dict_arr = *DictionaryArray::FromArrays(
+      ArrayFromJSON(int32(), "[0, 1]"),
+      ArrayFromJSON(utf8(), R"(["different", "dictionary"])"));
+  ExecSpan dict_span({*dict_arr->data()}, 2);
+  EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented,
+                                  HasSubstr("Unifying differing dictionaries"),
+                                  g.grouper_->Consume(dict_span));
+}
+
+TEST(Grouper, StringInt64Key) {
+  TestGrouper g({utf8(), int64()});
+
+  g.ExpectConsume(R"([["eh", 0], ["eh", 0]])", "[0, 0]");
+
+  g.ExpectConsume(R"([["eh", 0], ["eh", null]])", "[0, 1]");
+
+  g.ExpectConsume(R"([["eh", 1], ["bee", 1]])", "[2, 3]");
+
+  g.ExpectConsume(R"([["eh", null], ["bee", 1]])", "[1, 3]");
+
+  g = TestGrouper({utf8(), int64()});
+
+  g.ExpectConsume(R"([
+    ["ex",  0],
+    ["ex",  0],
+    ["why", 0],
+    ["ex",  1],
+    ["why", 0],
+    ["ex",  1],
+    ["ex",  0],
+    ["why", 1]
+  ])",
+                  "[0, 0, 1, 2, 1, 2, 0, 3]");
+
+  g.ExpectConsume(R"([
+    ["ex",  0],
+    [null,  0],
+    [null,  0],
+    ["ex",  1],
+    [null,  null],
+    ["ex",  1],
+    ["ex",  0],
+    ["why", null]
+  ])",
+                  "[0, 4, 4, 2, 5, 2, 0, 6]");
+}
+
+TEST(Grouper, DoubleStringInt64Key) {
+  TestGrouper g({float64(), utf8(), int64()});
+
+  g.ExpectConsume(R"([[1.5, "eh", 0], [1.5, "eh", 0]])", "[0, 0]");
+
+  g.ExpectConsume(R"([[1.5, "eh", 0], [1.5, "eh", 0]])", "[0, 0]");
+
+  g.ExpectConsume(R"([[1.0, "eh", 0], [1.0, "be", null]])", "[1, 2]");
+
+  // note: -0 and +0 hash differently
+  g.ExpectConsume(R"([[-0.0, "be", 7], [0.0, "be", 7]])", "[3, 4]");
+}
+
+TEST(Grouper, RandomInt64Keys) {
+  TestGrouper g({int64()});
+  for (int i = 0; i < 4; ++i) {
+    SCOPED_TRACE(ToChars(i) + "th key batch");
+
+    ExecBatch key_batch{
+        *random::GenerateBatch(g.key_schema_->fields(), 1 << 12, 0xDEADBEEF)};
+    g.ConsumeAndValidate(key_batch);
+  }
+}
+
+TEST(Grouper, RandomStringInt64Keys) {
+  TestGrouper g({utf8(), int64()});
+  for (int i = 0; i < 4; ++i) {
+    SCOPED_TRACE(ToChars(i) + "th key batch");
+
+    ExecBatch key_batch{
+        *random::GenerateBatch(g.key_schema_->fields(), 1 << 12, 0xDEADBEEF)};
+    g.ConsumeAndValidate(key_batch);
+  }
+}
+
+TEST(Grouper, RandomStringInt64DoubleInt32Keys) {
+  TestGrouper g({utf8(), int64(), float64(), int32()});
+  for (int i = 0; i < 4; ++i) {
+    SCOPED_TRACE(ToChars(i) + "th key batch");
+
+    ExecBatch key_batch{
+        *random::GenerateBatch(g.key_schema_->fields(), 1 << 12, 0xDEADBEEF)};
+    g.ConsumeAndValidate(key_batch);
+  }
+}
+
+TEST(Grouper, NullKeys) {
+  TestGrouper g({null()});
+  g.ExpectConsume("[[null], [null]]", "[0, 0]");
+}
+
+TEST(Grouper, MultipleNullKeys) {
+  TestGrouper g({null(), null(), null(), null()});
+  g.ExpectConsume("[[null, null, null, null], [null, null, null, null]]", "[0, 0]");
+}
+
+TEST(Grouper, Int64NullKeys) {
+  TestGrouper g({int64(), null()});
+  g.ExpectConsume("[[1, null], [2, null], [1, null]]", "[0, 1, 0]");
+}
+
+TEST(Grouper, StringNullKeys) {
+  TestGrouper g({utf8(), null()});
+  g.ExpectConsume(R"([["be", null], ["eh", null]])", "[0, 1]");
+}
+
+TEST(Grouper, DoubleNullStringKey) {
+  TestGrouper g({float64(), null(), utf8()});
+
+  g.ExpectConsume(R"([[1.5, null, "eh"], [1.5, null, "eh"]])", "[0, 0]");
+  g.ExpectConsume(R"([[null, null, "eh"], [1.0, null, null]])", "[1, 2]");
+  g.ExpectConsume(R"([
+    [1.0,  null, "wh"],
+    [4.4,  null, null],
+    [5.2,  null, "eh"],
+    [6.5,  null, "be"],
+    [7.3,  null, null],
+    [1.0,  null, "wh"],
+    [9.1,  null, "eh"],
+    [10.2, null, "be"],
+    [1.0, null, null]
+  ])",
+                  "[3, 4, 5, 6, 7, 3, 8, 9, 2]");
+}
+
+TEST(Grouper, EmptyNullKeys) {
+  TestGrouper g({null()});
+  g.ExpectConsume("[]", "[]");
+}
+
+TEST(Grouper, MakeGroupings) {
+  auto ExpectGroupings = [](std::string ids_json, std::string expected_json) {
+    auto ids = checked_pointer_cast<UInt32Array>(ArrayFromJSON(uint32(), ids_json));
+    auto expected = ArrayFromJSON(list(int32()), expected_json);
+
+    auto num_groups = static_cast<uint32_t>(expected->length());
+    ASSERT_OK_AND_ASSIGN(auto actual, Grouper::MakeGroupings(*ids, num_groups));
+    AssertArraysEqual(*expected, *actual, /*verbose=*/true);
+
+    // validate ApplyGroupings
+    ASSERT_OK_AND_ASSIGN(auto grouped_ids, Grouper::ApplyGroupings(*actual, *ids));
+
+    for (uint32_t group = 0; group < num_groups; ++group) {
+      auto ids_slice = checked_pointer_cast<UInt32Array>(grouped_ids->value_slice(group));
+      for (auto slot : *ids_slice) {
+        EXPECT_EQ(slot, group);
+      }
+    }
+  };
+
+  ExpectGroupings("[]", "[[]]");
+
+  ExpectGroupings("[0, 0, 0]", "[[0, 1, 2]]");
+
+  ExpectGroupings("[0, 0, 0, 1, 1, 2]", "[[0, 1, 2], [3, 4], [5], []]");
+
+  ExpectGroupings("[2, 1, 2, 1, 1, 2]", "[[], [1, 3, 4], [0, 2, 5], [], []]");
+
+  ExpectGroupings("[2, 2, 5, 5, 2, 3]", "[[], [], [0, 1, 4], [5], [], [2, 3], [], []]");
+
+  auto ids = checked_pointer_cast<UInt32Array>(ArrayFromJSON(uint32(), "[0, null, 1]"));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, HasSubstr("MakeGroupings with null ids"),
+                                  Grouper::MakeGroupings(*ids, 5));
+}
+
+TEST(Grouper, ScalarValues) {
+  // large_utf8 forces GrouperImpl over GrouperFastImpl
+  for (const auto& str_type : {utf8(), large_utf8()}) {
+    {
+      TestGrouper g(
+          {boolean(), int32(), decimal128(3, 2), decimal256(3, 2), fixed_size_binary(2),
+           str_type, int32()},
+          {ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::SCALAR,
+           ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::ARRAY});
+      g.ExpectConsume(
+          R"([
+[true, 1, "1.00", "2.00", "ab", "foo", 2],
+[true, 1, "1.00", "2.00", "ab", "foo", 2],
+[true, 1, "1.00", "2.00", "ab", "foo", 3]
+])",
+          "[0, 0, 1]");
+    }
+    {
+      auto dict_type = dictionary(int32(), utf8());
+      TestGrouper g({dict_type, str_type}, {ArgShape::SCALAR, ArgShape::SCALAR});
+      const auto dict = R"(["foo", null])";
+      g.ExpectConsume(
+          {DictScalarFromJSON(dict_type, "0", dict), ScalarFromJSON(str_type, R"("")")},
+          ArrayFromJSON(uint32(), "[0]"));
+      g.ExpectConsume(
+          {DictScalarFromJSON(dict_type, "1", dict), ScalarFromJSON(str_type, R"("")")},
+          ArrayFromJSON(uint32(), "[1]"));
+    }
+  }
+}
+
+}  // namespace arrow::compute
diff --git a/cpp/src/arrow/compute/row/row_internal.cc b/cpp/src/arrow/compute/row/row_internal.cc
index aa7e62add45ff..492cc71ac49f3 100644
--- a/cpp/src/arrow/compute/row/row_internal.cc
+++ b/cpp/src/arrow/compute/row/row_internal.cc
@@ -406,10 +406,14 @@ bool RowTableImpl::has_any_nulls(const LightContext* ctx) const {
     return true;
   }
   if (num_rows_for_has_any_nulls_ < num_rows_) {
-    auto size_per_row = metadata().null_masks_bytes_per_row;
+    DCHECK_LE(num_rows_for_has_any_nulls_, std::numeric_limits<uint32_t>::max());
+    int64_t num_bytes =
+        metadata().null_masks_bytes_per_row * (num_rows_ - num_rows_for_has_any_nulls_);
+    DCHECK_LE(num_bytes, std::numeric_limits<uint32_t>::max());
     has_any_nulls_ = !util::bit_util::are_all_bytes_zero(
-        ctx->hardware_flags, null_masks() + size_per_row * num_rows_for_has_any_nulls_,
-        static_cast<uint32_t>(size_per_row * (num_rows_ - num_rows_for_has_any_nulls_)));
+        ctx->hardware_flags,
+        null_masks(static_cast<uint32_t>(num_rows_for_has_any_nulls_)),
+        static_cast<uint32_t>(num_bytes));
     num_rows_for_has_any_nulls_ = num_rows_;
   }
   return has_any_nulls_;
diff --git a/cpp/src/arrow/compute/row/row_internal.h b/cpp/src/arrow/compute/row/row_internal.h
index 3ab86fd1fc6ed..0919773a2281b 100644
--- a/cpp/src/arrow/compute/row/row_internal.h
+++ b/cpp/src/arrow/compute/row/row_internal.h
@@ -199,29 +199,44 @@ class ARROW_EXPORT RowTableImpl {
   const RowTableMetadata& metadata() const { return metadata_; }
   /// \brief The number of rows stored in the table
   int64_t length() const { return num_rows_; }
-  // Accessors into the table's buffers
-  const uint8_t* data(int i) const {
-    ARROW_DCHECK(i >= 0 && i < kMaxBuffers);
-    if (ARROW_PREDICT_TRUE(buffers_[i])) {
-      return buffers_[i]->data();
-    }
-    return NULLPTR;
+
+  const uint8_t* null_masks(uint32_t row_id) const {
+    return data(0) + static_cast<int64_t>(row_id) * metadata_.null_masks_bytes_per_row;
   }
-  uint8_t* mutable_data(int i) {
-    ARROW_DCHECK(i >= 0 && i < kMaxBuffers);
-    if (ARROW_PREDICT_TRUE(buffers_[i])) {
-      return buffers_[i]->mutable_data();
-    }
-    return NULLPTR;
+  uint8_t* mutable_null_masks(uint32_t row_id) {
+    return mutable_data(0) +
+           static_cast<int64_t>(row_id) * metadata_.null_masks_bytes_per_row;
+  }
+  bool is_null(uint32_t row_id, uint32_t col_pos) const {
+    return bit_util::GetBit(null_masks(row_id), col_pos);
   }
+
+  const uint8_t* fixed_length_rows(uint32_t row_id) const {
+    ARROW_DCHECK(metadata_.is_fixed_length);
+    return data(1) + static_cast<int64_t>(row_id) * metadata_.fixed_length;
+  }
+  uint8_t* mutable_fixed_length_rows(uint32_t row_id) {
+    ARROW_DCHECK(metadata_.is_fixed_length);
+    return mutable_data(1) + static_cast<int64_t>(row_id) * metadata_.fixed_length;
+  }
+
   const offset_type* offsets() const {
+    ARROW_DCHECK(!metadata_.is_fixed_length);
     return reinterpret_cast<const offset_type*>(data(1));
   }
   offset_type* mutable_offsets() {
+    ARROW_DCHECK(!metadata_.is_fixed_length);
     return reinterpret_cast<offset_type*>(mutable_data(1));
   }
-  const uint8_t* null_masks() const { return null_masks_->data(); }
-  uint8_t* null_masks() { return null_masks_->mutable_data(); }
+
+  const uint8_t* var_length_rows() const {
+    ARROW_DCHECK(!metadata_.is_fixed_length);
+    return data(2);
+  }
+  uint8_t* mutable_var_length_rows() {
+    ARROW_DCHECK(!metadata_.is_fixed_length);
+    return mutable_data(2);
+  }
 
   /// \brief True if there is a null value anywhere in the table
   ///
@@ -237,6 +252,22 @@ class ARROW_EXPORT RowTableImpl {
   }
 
  private:
+  // Accessors into the table's buffers
+  const uint8_t* data(int i) const {
+    ARROW_DCHECK(i >= 0 && i < kMaxBuffers);
+    if (ARROW_PREDICT_TRUE(buffers_[i])) {
+      return buffers_[i]->data();
+    }
+    return NULLPTR;
+  }
+  uint8_t* mutable_data(int i) {
+    ARROW_DCHECK(i >= 0 && i < kMaxBuffers);
+    if (ARROW_PREDICT_TRUE(buffers_[i])) {
+      return buffers_[i]->mutable_data();
+    }
+    return NULLPTR;
+  }
+
   /// \brief Resize the fixed length buffers to store `num_extra_rows` more rows. The
   /// fixed length buffers are buffers_[0] for null masks, buffers_[1] for row data if the
   /// row is fixed length, or for row offsets otherwise.
diff --git a/cpp/src/arrow/compute/row/row_test.cc b/cpp/src/arrow/compute/row/row_test.cc
index 5057ce91b5bea..49d8f2a9afe14 100644
--- a/cpp/src/arrow/compute/row/row_test.cc
+++ b/cpp/src/arrow/compute/row/row_test.cc
@@ -92,9 +92,8 @@ TEST(RowTableMemoryConsumption, Encode) {
         ASSERT_OK_AND_ASSIGN(auto row_table,
                              MakeRowTableFromColumn(col, num_rows, dt->byte_width(),
                                                     /*string_alignment=*/0));
-        ASSERT_NE(row_table.data(0), NULLPTR);
-        ASSERT_NE(row_table.data(1), NULLPTR);
-        ASSERT_EQ(row_table.data(2), NULLPTR);
+        ASSERT_NE(row_table.null_masks(/*row_id=*/0), NULLPTR);
+        ASSERT_NE(row_table.fixed_length_rows(/*row_id=*/0), NULLPTR);
 
         int64_t actual_null_mask_size =
             num_rows * row_table.metadata().null_masks_bytes_per_row;
@@ -113,9 +112,9 @@ TEST(RowTableMemoryConsumption, Encode) {
       SCOPED_TRACE("encoding var length column of " + std::to_string(num_rows) + " rows");
       ASSERT_OK_AND_ASSIGN(auto row_table,
                            MakeRowTableFromColumn(var_length_column, num_rows, 4, 4));
-      ASSERT_NE(row_table.data(0), NULLPTR);
-      ASSERT_NE(row_table.data(1), NULLPTR);
-      ASSERT_NE(row_table.data(2), NULLPTR);
+      ASSERT_NE(row_table.null_masks(/*row_id=*/0), NULLPTR);
+      ASSERT_NE(row_table.offsets(), NULLPTR);
+      ASSERT_NE(row_table.var_length_rows(), NULLPTR);
 
       int64_t actual_null_mask_size =
           num_rows * row_table.metadata().null_masks_bytes_per_row;
diff --git a/cpp/src/arrow/compute/row/row_util_avx2_internal.h b/cpp/src/arrow/compute/row/row_util_avx2_internal.h
new file mode 100644
index 0000000000000..3ec5fc3a67e3c
--- /dev/null
+++ b/cpp/src/arrow/compute/row/row_util_avx2_internal.h
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#pragma once
+
+#include "arrow/compute/row/row_internal.h"
+#include "arrow/util/simd.h"
+
+#if !defined(ARROW_HAVE_AVX2) && !defined(ARROW_HAVE_AVX512) && \
+    !defined(ARROW_HAVE_RUNTIME_AVX2) && !defined(ARROW_HAVE_RUNTIME_AVX512)
+#  error "This file should only be included when AVX2 or AVX512 is enabled"
+#endif
+
+namespace arrow::compute {
+
+// Get null bits for 8 32-bit row ids in `row_id32` at `col_pos` as a vector of 32-bit
+// integers. Note that the result integer is 0 if the corresponding column is not null, or
+// 1 otherwise.
+inline __m256i GetNullBitInt32(const RowTableImpl& rows, uint32_t col_pos,
+                               __m256i row_id32) {
+  const uint8_t* null_masks = rows.null_masks(/*row_id=*/0);
+  __m256i null_mask_num_bits =
+      _mm256_set1_epi64x(rows.metadata().null_masks_bytes_per_row * 8);
+  __m256i row_lo = _mm256_cvtepi32_epi64(_mm256_castsi256_si128(row_id32));
+  __m256i row_hi = _mm256_cvtepi32_epi64(_mm256_extracti128_si256(row_id32, 1));
+  __m256i bit_id_lo = _mm256_mul_epi32(row_lo, null_mask_num_bits);
+  __m256i bit_id_hi = _mm256_mul_epi32(row_hi, null_mask_num_bits);
+  bit_id_lo = _mm256_add_epi64(bit_id_lo, _mm256_set1_epi64x(col_pos));
+  bit_id_hi = _mm256_add_epi64(bit_id_hi, _mm256_set1_epi64x(col_pos));
+  __m128i right_lo = _mm256_i64gather_epi32(reinterpret_cast<const int*>(null_masks),
+                                            _mm256_srli_epi64(bit_id_lo, 3), 1);
+  __m128i right_hi = _mm256_i64gather_epi32(reinterpret_cast<const int*>(null_masks),
+                                            _mm256_srli_epi64(bit_id_hi, 3), 1);
+  __m256i right = _mm256_set_m128i(right_hi, right_lo);
+  return _mm256_and_si256(_mm256_set1_epi32(1), _mm256_srli_epi32(right, col_pos & 7));
+}
+
+}  // namespace arrow::compute
diff --git a/cpp/src/arrow/compute/test_util_internal.cc b/cpp/src/arrow/compute/test_util_internal.cc
new file mode 100644
index 0000000000000..23ed5909ee0fd
--- /dev/null
+++ b/cpp/src/arrow/compute/test_util_internal.cc
@@ -0,0 +1,123 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/test_util_internal.h"
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/validate.h"
+#include "arrow/chunked_array.h"
+#include "arrow/datum.h"
+#include "arrow/record_batch.h"
+#include "arrow/scalar.h"
+#include "arrow/table.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/type.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/vector.h"
+
+namespace arrow::compute {
+
+using compute::ExecBatch;
+using internal::MapVector;
+
+ExecBatch ExecBatchFromJSON(const std::vector<TypeHolder>& types, std::string_view json) {
+  auto fields = ::arrow::internal::MapVector(
+      [](const TypeHolder& th) { return field("", th.GetSharedPtr()); }, types);
+
+  ExecBatch batch{*RecordBatchFromJSON(schema(std::move(fields)), json)};
+
+  return batch;
+}
+
+ExecBatch ExecBatchFromJSON(const std::vector<TypeHolder>& types,
+                            const std::vector<ArgShape>& shapes, std::string_view json) {
+  DCHECK_EQ(types.size(), shapes.size());
+
+  ExecBatch batch = ExecBatchFromJSON(types, json);
+
+  auto value_it = batch.values.begin();
+  for (ArgShape shape : shapes) {
+    if (shape == ArgShape::SCALAR) {
+      if (batch.length == 0) {
+        *value_it = MakeNullScalar(value_it->type());
+      } else {
+        *value_it = value_it->make_array()->GetScalar(0).ValueOrDie();
+      }
+    }
+    ++value_it;
+  }
+
+  return batch;
+}
+
+namespace {
+
+void ValidateOutputImpl(const ArrayData& output) {
+  ASSERT_OK(::arrow::internal::ValidateArrayFull(output));
+  TestInitialized(output);
+}
+
+void ValidateOutputImpl(const ChunkedArray& output) {
+  ASSERT_OK(output.ValidateFull());
+  for (const auto& chunk : output.chunks()) {
+    TestInitialized(*chunk);
+  }
+}
+
+void ValidateOutputImpl(const RecordBatch& output) {
+  ASSERT_OK(output.ValidateFull());
+  for (const auto& column : output.column_data()) {
+    TestInitialized(*column);
+  }
+}
+
+void ValidateOutputImpl(const Table& output) {
+  ASSERT_OK(output.ValidateFull());
+  for (const auto& column : output.columns()) {
+    for (const auto& chunk : column->chunks()) {
+      TestInitialized(*chunk);
+    }
+  }
+}
+
+void ValidateOutputImpl(const Scalar& output) { ASSERT_OK(output.ValidateFull()); }
+
+}  // namespace
+
+void ValidateOutput(const Datum& output) {
+  switch (output.kind()) {
+    case Datum::ARRAY:
+      ValidateOutputImpl(*output.array());
+      break;
+    case Datum::CHUNKED_ARRAY:
+      ValidateOutputImpl(*output.chunked_array());
+      break;
+    case Datum::RECORD_BATCH:
+      ValidateOutputImpl(*output.record_batch());
+      break;
+    case Datum::TABLE:
+      ValidateOutputImpl(*output.table());
+      break;
+    case Datum::SCALAR:
+      ValidateOutputImpl(*output.scalar());
+      break;
+    default:
+      break;
+  }
+}
+
+}  // namespace arrow::compute
diff --git a/cpp/src/arrow/compute/test_util_internal.h b/cpp/src/arrow/compute/test_util_internal.h
new file mode 100644
index 0000000000000..6a172b07692ec
--- /dev/null
+++ b/cpp/src/arrow/compute/test_util_internal.h
@@ -0,0 +1,42 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string_view>
+#include <vector>
+
+#include "arrow/compute/exec.h"
+#include "arrow/type_fwd.h"
+
+namespace arrow::compute {
+
+using compute::ExecBatch;
+
+ExecBatch ExecBatchFromJSON(const std::vector<TypeHolder>& types, std::string_view json);
+
+/// \brief Shape qualifier for value types. In certain instances
+/// (e.g. "map_lookup" kernel), an argument may only be a scalar, where in
+/// other kernels arguments can be arrays or scalars
+enum class ArgShape { ANY, ARRAY, SCALAR };
+
+ExecBatch ExecBatchFromJSON(const std::vector<TypeHolder>& types,
+                            const std::vector<ArgShape>& shapes, std::string_view json);
+
+void ValidateOutput(const Datum& output);
+
+}  // namespace arrow::compute
diff --git a/cpp/src/arrow/dataset/CMakeLists.txt b/cpp/src/arrow/dataset/CMakeLists.txt
index bdb89ee8914f8..e99593f669b8e 100644
--- a/cpp/src/arrow/dataset/CMakeLists.txt
+++ b/cpp/src/arrow/dataset/CMakeLists.txt
@@ -106,10 +106,10 @@ endif()
 
 if(ARROW_TEST_LINKAGE STREQUAL "static")
   set(ARROW_DATASET_TEST_LINK_LIBS arrow_dataset_static arrow_acero_testing
-                                   ${ARROW_TEST_STATIC_LINK_LIBS})
+                                   arrow_compute_testing ${ARROW_TEST_STATIC_LINK_LIBS})
 else()
   set(ARROW_DATASET_TEST_LINK_LIBS arrow_dataset_shared arrow_acero_testing
-                                   ${ARROW_TEST_SHARED_LINK_LIBS})
+                                   arrow_compute_testing ${ARROW_TEST_SHARED_LINK_LIBS})
 endif()
 
 foreach(LIB_TARGET ${ARROW_DATASET_LIBRARIES})
diff --git a/cpp/src/arrow/dataset/file_parquet_test.cc b/cpp/src/arrow/dataset/file_parquet_test.cc
index 2c05dcd9be459..536fcdb21c107 100644
--- a/cpp/src/arrow/dataset/file_parquet_test.cc
+++ b/cpp/src/arrow/dataset/file_parquet_test.cc
@@ -85,7 +85,6 @@ class ParquetFormatHelper {
   static Status WriteRecordBatch(const RecordBatch& batch,
                                  parquet::arrow::FileWriter* writer) {
     auto schema = batch.schema();
-    auto size = batch.num_rows();
 
     if (!schema->Equals(*writer->schema(), false)) {
       return Status::Invalid("RecordBatch schema does not match this writer's. batch:'",
@@ -93,7 +92,7 @@ class ParquetFormatHelper {
                              "'");
     }
 
-    RETURN_NOT_OK(writer->NewRowGroup(size));
+    RETURN_NOT_OK(writer->NewRowGroup());
     for (int i = 0; i < batch.num_columns(); i++) {
       RETURN_NOT_OK(writer->WriteColumnChunk(*batch.column(i)));
     }
diff --git a/cpp/src/arrow/dataset/file_test.cc b/cpp/src/arrow/dataset/file_test.cc
index dd64849526bc0..5d6068557f047 100644
--- a/cpp/src/arrow/dataset/file_test.cc
+++ b/cpp/src/arrow/dataset/file_test.cc
@@ -27,6 +27,7 @@
 #include "arrow/acero/exec_plan.h"
 #include "arrow/acero/test_util_internal.h"
 #include "arrow/array/array_primitive.h"
+#include "arrow/compute/test_util_internal.h"
 #include "arrow/dataset/api.h"
 #include "arrow/dataset/partition.h"
 #include "arrow/dataset/plan.h"
@@ -42,6 +43,7 @@ namespace cp = arrow::compute;
 
 namespace arrow {
 
+using compute::ExecBatchFromJSON;
 using internal::TemporaryDir;
 
 namespace dataset {
@@ -377,9 +379,8 @@ class FileSystemWriteTest : public testing::TestWithParam<std::tuple<bool, bool>
 
     acero::BatchesWithSchema source_data;
     source_data.batches = {
-        acero::ExecBatchFromJSON({int32(), boolean()}, "[[null, true], [4, false]]"),
-        acero::ExecBatchFromJSON({int32(), boolean()},
-                                 "[[5, null], [6, false], [7, false]]")};
+        ExecBatchFromJSON({int32(), boolean()}, "[[null, true], [4, false]]"),
+        ExecBatchFromJSON({int32(), boolean()}, "[[5, null], [6, false], [7, false]]")};
     source_data.schema = schema({field("i32", int32()), field("bool", boolean())});
 
     AsyncGenerator<std::optional<cp::ExecBatch>> sink_gen;
diff --git a/cpp/src/arrow/dataset/test_util_internal.cc b/cpp/src/arrow/dataset/test_util_internal.cc
index 88b514b2e5c4f..2bc7104bf491d 100644
--- a/cpp/src/arrow/dataset/test_util_internal.cc
+++ b/cpp/src/arrow/dataset/test_util_internal.cc
@@ -122,36 +122,6 @@ struct DummyNode : ExecNode {
 
 }  // namespace
 
-ExecBatch ExecBatchFromJSON(const std::vector<TypeHolder>& types, std::string_view json) {
-  auto fields = ::arrow::internal::MapVector(
-      [](const TypeHolder& th) { return field("", th.GetSharedPtr()); }, types);
-
-  ExecBatch batch{*RecordBatchFromJSON(schema(std::move(fields)), json)};
-
-  return batch;
-}
-
-ExecBatch ExecBatchFromJSON(const std::vector<TypeHolder>& types,
-                            const std::vector<ArgShape>& shapes, std::string_view json) {
-  DCHECK_EQ(types.size(), shapes.size());
-
-  ExecBatch batch = ExecBatchFromJSON(types, json);
-
-  auto value_it = batch.values.begin();
-  for (ArgShape shape : shapes) {
-    if (shape == ArgShape::SCALAR) {
-      if (batch.length == 0) {
-        *value_it = MakeNullScalar(value_it->type());
-      } else {
-        *value_it = value_it->make_array()->GetScalar(0).ValueOrDie();
-      }
-    }
-    ++value_it;
-  }
-
-  return batch;
-}
-
 Future<> StartAndFinish(ExecPlan* plan) {
   RETURN_NOT_OK(plan->Validate());
   plan->StartProducing();
diff --git a/cpp/src/arrow/dataset/test_util_internal.h b/cpp/src/arrow/dataset/test_util_internal.h
index 8195218b0cfe8..e0a7151d0b89f 100644
--- a/cpp/src/arrow/dataset/test_util_internal.h
+++ b/cpp/src/arrow/dataset/test_util_internal.h
@@ -73,16 +73,6 @@ namespace dataset {
 using StartProducingFunc = std::function<Status(ExecNode*)>;
 using StopProducingFunc = std::function<void(ExecNode*)>;
 
-ExecBatch ExecBatchFromJSON(const std::vector<TypeHolder>& types, std::string_view json);
-
-/// \brief Shape qualifier for value types. In certain instances
-/// (e.g. "map_lookup" kernel), an argument may only be a scalar, where in
-/// other kernels arguments can be arrays or scalars
-enum class ArgShape { ANY, ARRAY, SCALAR };
-
-ExecBatch ExecBatchFromJSON(const std::vector<TypeHolder>& types,
-                            const std::vector<ArgShape>& shapes, std::string_view json);
-
 struct BatchesWithSchema {
   std::vector<ExecBatch> batches;
   std::shared_ptr<Schema> schema;
diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
index 856c032c3588a..47a00016b94b0 100644
--- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
+++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
@@ -739,7 +739,7 @@ class ParquetIOTestBase : public ::testing::Test {
     ASSERT_OK_NO_THROW(FileWriter::Make(::arrow::default_memory_pool(),
                                         MakeWriter(schema), arrow_schema,
                                         default_arrow_writer_properties(), &writer));
-    ASSERT_OK_NO_THROW(writer->NewRowGroup(values->length()));
+    ASSERT_OK_NO_THROW(writer->NewRowGroup());
     ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*values));
     ASSERT_OK_NO_THROW(writer->Close());
     // writer->Close() should be idempotent
@@ -1053,7 +1053,7 @@ TYPED_TEST(TestParquetIO, SingleColumnRequiredChunkedWrite) {
                                       this->MakeWriter(schema), arrow_schema,
                                       default_arrow_writer_properties(), &writer));
   for (int i = 0; i < 4; i++) {
-    ASSERT_OK_NO_THROW(writer->NewRowGroup(chunk_size));
+    ASSERT_OK_NO_THROW(writer->NewRowGroup());
     std::shared_ptr<Array> sliced_array = values->Slice(i * chunk_size, chunk_size);
     ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*sliced_array));
   }
@@ -1126,7 +1126,7 @@ TYPED_TEST(TestParquetIO, SingleColumnOptionalChunkedWrite) {
                                       this->MakeWriter(schema), arrow_schema,
                                       default_arrow_writer_properties(), &writer));
   for (int i = 0; i < 4; i++) {
-    ASSERT_OK_NO_THROW(writer->NewRowGroup(chunk_size));
+    ASSERT_OK_NO_THROW(writer->NewRowGroup());
     std::shared_ptr<Array> sliced_array = values->Slice(i * chunk_size, chunk_size);
     ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*sliced_array));
   }
@@ -4371,6 +4371,7 @@ TEST_P(TestArrowWriteDictionary, Statistics) {
             ->data_page_version(this->GetParquetDataPageVersion())
             ->write_batch_size(2)
             ->data_pagesize(2)
+            ->disable_write_page_index()
             ->build();
     std::unique_ptr<FileWriter> writer;
     ASSERT_OK_AND_ASSIGN(
@@ -4476,6 +4477,7 @@ TEST_P(TestArrowWriteDictionary, StatisticsUnifiedDictionary) {
             ->data_page_version(this->GetParquetDataPageVersion())
             ->write_batch_size(3)
             ->data_pagesize(3)
+            ->disable_write_page_index()
             ->build();
     std::unique_ptr<FileWriter> writer;
     ASSERT_OK_AND_ASSIGN(
@@ -5149,7 +5151,7 @@ class TestIntegerAnnotateDecimalTypeParquetIO : public TestParquetIO<TestType> {
         ::arrow::default_memory_pool(),
         ParquetFileWriter::Open(this->sink_, schema_node, writer_properties),
         arrow_schema, default_arrow_writer_properties(), &writer));
-    ASSERT_OK_NO_THROW(writer->NewRowGroup(values->length()));
+    ASSERT_OK_NO_THROW(writer->NewRowGroup());
     ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*values));
     ASSERT_OK_NO_THROW(writer->Close());
   }
@@ -5290,7 +5292,10 @@ TEST(TestArrowReadWrite, WriteAndReadRecordBatch) {
   auto pool = ::arrow::default_memory_pool();
   auto sink = CreateOutputStream();
   // Limit the max number of rows in a row group to 10
-  auto writer_properties = WriterProperties::Builder().max_row_group_length(10)->build();
+  auto writer_properties = WriterProperties::Builder()
+                               .max_row_group_length(10)
+                               ->disable_write_page_index()
+                               ->build();
   auto arrow_writer_properties = default_arrow_writer_properties();
 
   // Prepare schema
@@ -5346,7 +5351,7 @@ TEST(TestArrowReadWrite, WriteAndReadRecordBatch) {
   ASSERT_EQ(10, file_metadata->RowGroup(0)->num_rows());
   ASSERT_EQ(2, file_metadata->RowGroup(1)->num_rows());
 
-  // Verify that page index is not written by default.
+  // Verify that page index is not written.
   for (int i = 0; i < num_row_groups; ++i) {
     auto row_group_metadata = file_metadata->RowGroup(i);
     for (int j = 0; j < row_group_metadata->num_columns(); ++j) {
@@ -5481,7 +5486,7 @@ TEST(TestArrowReadWrite, OperationsOnClosedWriter) {
   // Operations on closed writer are invalid
   ASSERT_OK(writer->Close());
 
-  ASSERT_RAISES(Invalid, writer->NewRowGroup(1));
+  ASSERT_RAISES(Invalid, writer->NewRowGroup());
   ASSERT_RAISES(Invalid, writer->WriteColumnChunk(table->column(0), 0, 1));
   ASSERT_RAISES(Invalid, writer->NewBufferedRowGroup());
   ASSERT_OK_AND_ASSIGN(auto record_batch, table->CombineChunksToBatch());
diff --git a/cpp/src/parquet/arrow/size_stats_benchmark.cc b/cpp/src/parquet/arrow/size_stats_benchmark.cc
index d43a3737b18b8..c5c95fc6141b6 100644
--- a/cpp/src/parquet/arrow/size_stats_benchmark.cc
+++ b/cpp/src/parquet/arrow/size_stats_benchmark.cc
@@ -80,12 +80,16 @@ int64_t GetTotalPageIndexSize(const std::shared_ptr<::parquet::FileMetaData>& me
 }
 
 void WriteColumn(::benchmark::State& state, const std::shared_ptr<::arrow::Table>& table,
-                 SizeStatisticsLevel stats_level) {
+                 SizeStatisticsLevel stats_level, bool enable_page_index) {
   // Use the fastest possible encoding and compression settings, to better exhibit
   // the size statistics overhead.
-  auto properties = WriterProperties::Builder()
-                        .enable_statistics()
-                        ->enable_write_page_index()
+  auto builder = WriterProperties::Builder();
+  if (enable_page_index) {
+    builder.enable_write_page_index();
+  } else {
+    builder.disable_write_page_index();
+  }
+  auto properties = builder.enable_statistics()
                         ->disable_dictionary()
                         ->encoding(Encoding::PLAIN)
                         ->set_size_statistics_level(stats_level)
@@ -113,17 +117,17 @@ void WriteColumn(::benchmark::State& state, const std::shared_ptr<::arrow::Table
   state.SetBytesProcessed(state.iterations() * GetTotalBytes(table));
 }
 
-template <SizeStatisticsLevel level, typename ArrowType>
+template <SizeStatisticsLevel level, typename ArrowType, bool enable_page_index>
 void BM_WritePrimitiveColumn(::benchmark::State& state) {
   ::arrow::random::RandomArrayGenerator generator(/*seed=*/42);
   auto type = std::make_shared<ArrowType>();
   auto array = generator.ArrayOf(type, kBenchmarkSize, kNullProbability);
   auto table = ::arrow::Table::Make(
       ::arrow::schema({::arrow::field("column", type, kNullProbability > 0)}), {array});
-  WriteColumn(state, table, level);
+  WriteColumn(state, table, level, enable_page_index);
 }
 
-template <SizeStatisticsLevel level, typename ArrowType>
+template <SizeStatisticsLevel level, typename ArrowType, bool enable_page_index>
 void BM_WriteListColumn(::benchmark::State& state) {
   ::arrow::random::RandomArrayGenerator generator(/*seed=*/42);
   auto element_type = std::make_shared<ArrowType>();
@@ -133,33 +137,43 @@ void BM_WriteListColumn(::benchmark::State& state) {
   auto table = ::arrow::Table::Make(
       ::arrow::schema({::arrow::field("column", list_type, kNullProbability > 0)}),
       {list_array});
-  WriteColumn(state, table, level);
+  WriteColumn(state, table, level, enable_page_index);
 }
 
-BENCHMARK_TEMPLATE(BM_WritePrimitiveColumn, SizeStatisticsLevel::None,
-                   ::arrow::Int64Type);
+BENCHMARK_TEMPLATE(BM_WritePrimitiveColumn, SizeStatisticsLevel::None, ::arrow::Int64Type,
+                   /*enable_page_index=*/false);
+BENCHMARK_TEMPLATE(BM_WritePrimitiveColumn, SizeStatisticsLevel::None, ::arrow::Int64Type,
+                   /*enable_page_index=*/true);
 BENCHMARK_TEMPLATE(BM_WritePrimitiveColumn, SizeStatisticsLevel::ColumnChunk,
-                   ::arrow::Int64Type);
+                   ::arrow::Int64Type, /*enable_page_index=*/true);
 BENCHMARK_TEMPLATE(BM_WritePrimitiveColumn, SizeStatisticsLevel::PageAndColumnChunk,
-                   ::arrow::Int64Type);
+                   ::arrow::Int64Type, /*enable_page_index=*/true);
 
 BENCHMARK_TEMPLATE(BM_WritePrimitiveColumn, SizeStatisticsLevel::None,
-                   ::arrow::StringType);
+                   ::arrow::StringType, /*enable_page_index=*/false);
+BENCHMARK_TEMPLATE(BM_WritePrimitiveColumn, SizeStatisticsLevel::None,
+                   ::arrow::StringType, /*enable_page_index=*/true);
 BENCHMARK_TEMPLATE(BM_WritePrimitiveColumn, SizeStatisticsLevel::ColumnChunk,
-                   ::arrow::StringType);
+                   ::arrow::StringType, /*enable_page_index=*/true);
 BENCHMARK_TEMPLATE(BM_WritePrimitiveColumn, SizeStatisticsLevel::PageAndColumnChunk,
-                   ::arrow::StringType);
+                   ::arrow::StringType, /*enable_page_index=*/true);
 
-BENCHMARK_TEMPLATE(BM_WriteListColumn, SizeStatisticsLevel::None, ::arrow::Int64Type);
+BENCHMARK_TEMPLATE(BM_WriteListColumn, SizeStatisticsLevel::None, ::arrow::Int64Type,
+                   /*enable_page_index=*/false);
+BENCHMARK_TEMPLATE(BM_WriteListColumn, SizeStatisticsLevel::None, ::arrow::Int64Type,
+                   /*enable_page_index=*/true);
 BENCHMARK_TEMPLATE(BM_WriteListColumn, SizeStatisticsLevel::ColumnChunk,
-                   ::arrow::Int64Type);
+                   ::arrow::Int64Type, /*enable_page_index=*/true);
 BENCHMARK_TEMPLATE(BM_WriteListColumn, SizeStatisticsLevel::PageAndColumnChunk,
-                   ::arrow::Int64Type);
+                   ::arrow::Int64Type, /*enable_page_index=*/true);
 
-BENCHMARK_TEMPLATE(BM_WriteListColumn, SizeStatisticsLevel::None, ::arrow::StringType);
+BENCHMARK_TEMPLATE(BM_WriteListColumn, SizeStatisticsLevel::None, ::arrow::StringType,
+                   /*enable_page_index=*/false);
+BENCHMARK_TEMPLATE(BM_WriteListColumn, SizeStatisticsLevel::None, ::arrow::StringType,
+                   /*enable_page_index=*/true);
 BENCHMARK_TEMPLATE(BM_WriteListColumn, SizeStatisticsLevel::ColumnChunk,
-                   ::arrow::StringType);
+                   ::arrow::StringType, /*enable_page_index=*/true);
 BENCHMARK_TEMPLATE(BM_WriteListColumn, SizeStatisticsLevel::PageAndColumnChunk,
-                   ::arrow::StringType);
+                   ::arrow::StringType, /*enable_page_index=*/true);
 
 }  // namespace parquet::benchmark
diff --git a/cpp/src/parquet/arrow/writer.cc b/cpp/src/parquet/arrow/writer.cc
index 463713df1b1aa..c6d86648c1d63 100644
--- a/cpp/src/parquet/arrow/writer.cc
+++ b/cpp/src/parquet/arrow/writer.cc
@@ -305,7 +305,7 @@ class FileWriterImpl : public FileWriter {
                                 default_arrow_reader_properties(), &schema_manifest_);
   }
 
-  Status NewRowGroup(int64_t chunk_size) override {
+  Status NewRowGroup() override {
     RETURN_NOT_OK(CheckClosed());
     if (row_group_writer_ != nullptr) {
       PARQUET_CATCH_NOT_OK(row_group_writer_->Close());
@@ -379,7 +379,7 @@ class FileWriterImpl : public FileWriter {
     }
 
     auto WriteRowGroup = [&](int64_t offset, int64_t size) {
-      RETURN_NOT_OK(NewRowGroup(size));
+      RETURN_NOT_OK(NewRowGroup());
       for (int i = 0; i < table.num_columns(); i++) {
         RETURN_NOT_OK(WriteColumnChunk(table.column(i), offset, size));
       }
diff --git a/cpp/src/parquet/arrow/writer.h b/cpp/src/parquet/arrow/writer.h
index 4e1ddafd9a082..e36b8f252c750 100644
--- a/cpp/src/parquet/arrow/writer.h
+++ b/cpp/src/parquet/arrow/writer.h
@@ -87,9 +87,14 @@ class PARQUET_EXPORT FileWriter {
   /// \brief Start a new row group.
   ///
   /// Returns an error if not all columns have been written.
+  virtual ::arrow::Status NewRowGroup() = 0;
+
+  /// \brief Start a new row group.
   ///
-  /// \param chunk_size the number of rows in the next row group.
-  virtual ::arrow::Status NewRowGroup(int64_t chunk_size) = 0;
+  /// \deprecated Deprecated in 19.0.0.
+  ARROW_DEPRECATED(
+      "Deprecated in 19.0.0. Use NewRowGroup() without the `chunk_size` argument.")
+  virtual ::arrow::Status NewRowGroup(int64_t chunk_size) { return NewRowGroup(); }
 
   /// \brief Write ColumnChunk in row group using an array.
   virtual ::arrow::Status WriteColumnChunk(const ::arrow::Array& data) = 0;
diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc
index 2a3bbf76d1c6e..8d4169b034ac6 100644
--- a/cpp/src/parquet/column_reader.cc
+++ b/cpp/src/parquet/column_reader.cc
@@ -580,13 +580,21 @@ std::shared_ptr<Buffer> SerializedPageReader::DecompressIfNeeded(
     memcpy(decompressed, page_buffer->data(), levels_byte_len);
   }
 
-  // Decompress the values
-  PARQUET_ASSIGN_OR_THROW(
-      auto decompressed_len,
-      decompressor_->Decompress(compressed_len - levels_byte_len,
-                                page_buffer->data() + levels_byte_len,
-                                uncompressed_len - levels_byte_len,
-                                decompression_buffer_->mutable_data() + levels_byte_len));
+  // GH-31992: DataPageV2 may store only levels and no values when all
+  // values are null. In this case, Parquet java is known to produce a
+  // 0-len compressed area (which is invalid compressed input).
+  // See https://github.com/apache/parquet-java/issues/3122
+  int64_t decompressed_len = 0;
+  if (uncompressed_len - levels_byte_len != 0) {
+    // Decompress the values
+    PARQUET_ASSIGN_OR_THROW(
+        decompressed_len,
+        decompressor_->Decompress(
+            compressed_len - levels_byte_len, page_buffer->data() + levels_byte_len,
+            uncompressed_len - levels_byte_len,
+            decompression_buffer_->mutable_data() + levels_byte_len));
+  }
+
   if (decompressed_len != uncompressed_len - levels_byte_len) {
     throw ParquetException("Page didn't decompress to expected size, expected: " +
                            std::to_string(uncompressed_len - levels_byte_len) +
diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc
index f3d580ab5d345..87514d87db636 100644
--- a/cpp/src/parquet/column_reader_test.cc
+++ b/cpp/src/parquet/column_reader_test.cc
@@ -465,7 +465,7 @@ TEST_F(TestPrimitiveReader, TestRepetitionLvlBytesWithMaxRepetitionZero) {
   int32_t values[batch_size];
   int64_t values_read;
   ASSERT_TRUE(reader->HasNext());
-  EXPECT_EQ(4, reader->ReadBatch(batch_size, def_levels_out, /*replevels=*/nullptr,
+  EXPECT_EQ(4, reader->ReadBatch(batch_size, def_levels_out, /*rep_levels=*/nullptr,
                                  values, &values_read));
   EXPECT_EQ(3, values_read);
 }
diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc
index 683a5ab735aed..4998e6f301a00 100644
--- a/cpp/src/parquet/column_writer.cc
+++ b/cpp/src/parquet/column_writer.cc
@@ -1609,6 +1609,9 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter<
 
     auto add_levels = [](std::vector<int64_t>& level_histogram,
                          ::arrow::util::span<const int16_t> levels, int16_t max_level) {
+      if (max_level == 0) {
+        return;
+      }
       ARROW_DCHECK_EQ(static_cast<size_t>(max_level) + 1, level_histogram.size());
       ::parquet::UpdateLevelHistogram(levels, level_histogram);
     };
diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc
index 25446aefd6814..744859cf0f037 100644
--- a/cpp/src/parquet/column_writer_test.cc
+++ b/cpp/src/parquet/column_writer_test.cc
@@ -1774,5 +1774,38 @@ TEST_F(TestInt32Writer, WriteKeyValueMetadataEndToEnd) {
   ASSERT_EQ("bar", value);
 }
 
+TEST_F(TestValuesWriterInt32Type, AllNullsCompressionInPageV2) {
+  // GH-31992: In DataPageV2, the levels and data will not be compressed together,
+  // so, when all values are null, the compressed values should be empty. And
+  // we should handle this case correctly.
+  std::vector<Compression::type> compressions = {Compression::SNAPPY, Compression::GZIP,
+                                                 Compression::ZSTD, Compression::BROTLI,
+                                                 Compression::LZ4};
+  for (auto compression : compressions) {
+    if (!Codec::IsAvailable(compression)) {
+      continue;
+    }
+    ARROW_SCOPED_TRACE("compression = ", Codec::GetCodecAsString(compression));
+    // Optional and non-repeated, with definition levels
+    // but no repetition levels
+    this->SetUpSchema(Repetition::OPTIONAL);
+    this->GenerateData(SMALL_SIZE);
+    std::fill(this->def_levels_.begin(), this->def_levels_.end(), 0);
+    ColumnProperties column_properties;
+    column_properties.set_compression(compression);
+
+    auto writer =
+        this->BuildWriter(SMALL_SIZE, column_properties, ParquetVersion::PARQUET_2_LATEST,
+                          ParquetDataPageVersion::V2);
+    writer->WriteBatch(this->values_.size(), this->def_levels_.data(), nullptr,
+                       this->values_ptr_);
+    writer->Close();
+
+    ASSERT_EQ(100, this->metadata_num_values());
+    this->ReadColumn(compression);
+    ASSERT_EQ(0, this->values_read_);
+  }
+}
+
 }  // namespace test
 }  // namespace parquet
diff --git a/cpp/src/parquet/properties.h b/cpp/src/parquet/properties.h
index edaf28cd92ae6..8ae3660014f76 100644
--- a/cpp/src/parquet/properties.h
+++ b/cpp/src/parquet/properties.h
@@ -164,7 +164,9 @@ static constexpr int64_t DEFAULT_MAX_STATISTICS_SIZE = 4096;
 static constexpr Encoding::type DEFAULT_ENCODING = Encoding::UNKNOWN;
 static const char DEFAULT_CREATED_BY[] = CREATED_BY_VERSION;
 static constexpr Compression::type DEFAULT_COMPRESSION_TYPE = Compression::UNCOMPRESSED;
-static constexpr bool DEFAULT_IS_PAGE_INDEX_ENABLED = false;
+static constexpr bool DEFAULT_IS_PAGE_INDEX_ENABLED = true;
+static constexpr SizeStatisticsLevel DEFAULT_SIZE_STATISTICS_LEVEL =
+    SizeStatisticsLevel::PageAndColumnChunk;
 
 class PARQUET_EXPORT ColumnProperties {
  public:
@@ -258,7 +260,7 @@ class PARQUET_EXPORT WriterProperties {
           created_by_(DEFAULT_CREATED_BY),
           store_decimal_as_integer_(false),
           page_checksum_enabled_(false),
-          size_statistics_level_(SizeStatisticsLevel::None) {}
+          size_statistics_level_(DEFAULT_SIZE_STATISTICS_LEVEL) {}
 
     explicit Builder(const WriterProperties& properties)
         : pool_(properties.memory_pool()),
diff --git a/cpp/src/parquet/size_statistics.cc b/cpp/src/parquet/size_statistics.cc
index 7292f9222a684..1ce6c937ad5e6 100644
--- a/cpp/src/parquet/size_statistics.cc
+++ b/cpp/src/parquet/size_statistics.cc
@@ -64,23 +64,28 @@ void SizeStatistics::IncrementUnencodedByteArrayDataBytes(int64_t value) {
 }
 
 void SizeStatistics::Validate(const ColumnDescriptor* descr) const {
-  if (repetition_level_histogram.size() !=
-      static_cast<size_t>(descr->max_repetition_level() + 1)) {
-    throw ParquetException("Repetition level histogram size mismatch");
-  }
-  if (definition_level_histogram.size() !=
-      static_cast<size_t>(descr->max_definition_level() + 1)) {
-    throw ParquetException("Definition level histogram size mismatch");
-  }
+  auto validate_histogram = [](const std::vector<int64_t>& histogram, int16_t max_level,
+                               const std::string& name) {
+    if (histogram.empty()) {
+      // A levels histogram is always allowed to be missing.
+      return;
+    }
+    if (histogram.size() != static_cast<size_t>(max_level + 1)) {
+      std::stringstream ss;
+      ss << name << " level histogram size mismatch, size: " << histogram.size()
+         << ", expected: " << (max_level + 1);
+      throw ParquetException(ss.str());
+    }
+  };
+  validate_histogram(repetition_level_histogram, descr->max_repetition_level(),
+                     "Repetition");
+  validate_histogram(definition_level_histogram, descr->max_definition_level(),
+                     "Definition");
   if (unencoded_byte_array_data_bytes.has_value() &&
       descr->physical_type() != Type::BYTE_ARRAY) {
     throw ParquetException("Unencoded byte array data bytes does not support " +
                            TypeToString(descr->physical_type()));
   }
-  if (!unencoded_byte_array_data_bytes.has_value() &&
-      descr->physical_type() == Type::BYTE_ARRAY) {
-    throw ParquetException("Missing unencoded byte array data bytes");
-  }
 }
 
 void SizeStatistics::Reset() {
@@ -93,8 +98,15 @@ void SizeStatistics::Reset() {
 
 std::unique_ptr<SizeStatistics> SizeStatistics::Make(const ColumnDescriptor* descr) {
   auto size_stats = std::make_unique<SizeStatistics>();
-  size_stats->repetition_level_histogram.resize(descr->max_repetition_level() + 1, 0);
-  size_stats->definition_level_histogram.resize(descr->max_definition_level() + 1, 0);
+  // If the max level is 0, the level histogram can be omitted because it contains
+  // only single level (a.k.a. 0) and its count is equivalent to `num_values` of the
+  // column chunk or data page.
+  if (descr->max_repetition_level() != 0) {
+    size_stats->repetition_level_histogram.resize(descr->max_repetition_level() + 1, 0);
+  }
+  if (descr->max_definition_level() != 0) {
+    size_stats->definition_level_histogram.resize(descr->max_definition_level() + 1, 0);
+  }
   if (descr->physical_type() == Type::BYTE_ARRAY) {
     size_stats->unencoded_byte_array_data_bytes = 0;
   }
diff --git a/cpp/src/parquet/size_statistics_test.cc b/cpp/src/parquet/size_statistics_test.cc
index 0958ae4dec2ca..90d6df57e7f43 100644
--- a/cpp/src/parquet/size_statistics_test.cc
+++ b/cpp/src/parquet/size_statistics_test.cc
@@ -216,6 +216,20 @@ class SizeStatisticsRoundTripTest : public ::testing::Test {
     }
   }
 
+  void ReadData() {
+    auto reader =
+        ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(buffer_));
+    auto metadata = reader->metadata();
+    for (int i = 0; i < metadata->num_row_groups(); ++i) {
+      int64_t num_rows = metadata->RowGroup(i)->num_rows();
+      auto row_group_reader = reader->RowGroup(i);
+      for (int j = 0; j < metadata->num_columns(); ++j) {
+        auto column_reader = row_group_reader->RecordReader(j);
+        ASSERT_EQ(column_reader->ReadRecords(num_rows + 1), num_rows);
+      }
+    }
+  }
+
   void Reset() { buffer_.reset(); }
 
  protected:
@@ -300,23 +314,23 @@ TEST_F(SizeStatisticsRoundTripTest, WriteDictionaryArray) {
   ReadSizeStatistics();
   EXPECT_THAT(row_group_stats_,
               ::testing::ElementsAre(SizeStatistics{/*def_levels=*/{0, 2},
-                                                    /*rep_levels=*/{2},
+                                                    /*rep_levels=*/{},
                                                     /*byte_array_bytes=*/5},
                                      SizeStatistics{/*def_levels=*/{1, 1},
-                                                    /*rep_levels=*/{2},
+                                                    /*rep_levels=*/{},
                                                     /*byte_array_bytes=*/1},
                                      SizeStatistics{/*def_levels=*/{0, 2},
-                                                    /*rep_levels=*/{2},
+                                                    /*rep_levels=*/{},
                                                     /*byte_array_bytes=*/4}));
   EXPECT_THAT(page_stats_,
               ::testing::ElementsAre(PageSizeStatistics{/*def_levels=*/{0, 2},
-                                                        /*rep_levels=*/{2},
+                                                        /*rep_levels=*/{},
                                                         /*byte_array_bytes=*/{5}},
                                      PageSizeStatistics{/*def_levels=*/{1, 1},
-                                                        /*rep_levels=*/{2},
+                                                        /*rep_levels=*/{},
                                                         /*byte_array_bytes=*/{1}},
                                      PageSizeStatistics{/*def_levels=*/{0, 2},
-                                                        /*rep_levels=*/{2},
+                                                        /*rep_levels=*/{},
                                                         /*byte_array_bytes=*/{4}}));
 }
 
@@ -368,12 +382,31 @@ TEST_F(SizeStatisticsRoundTripTest, LargePage) {
   ReadSizeStatistics();
   EXPECT_THAT(row_group_stats_,
               ::testing::ElementsAre(SizeStatistics{/*def_levels=*/{30000, 60000},
-                                                    /*rep_levels=*/{90000},
+                                                    /*rep_levels=*/{},
                                                     /*byte_array_bytes=*/90000}));
   EXPECT_THAT(page_stats_,
               ::testing::ElementsAre(PageSizeStatistics{/*def_levels=*/{30000, 60000},
-                                                        /*rep_levels=*/{90000},
+                                                        /*rep_levels=*/{},
                                                         /*byte_array_bytes=*/{90000}}));
 }
 
+TEST_F(SizeStatisticsRoundTripTest, MaxLevelZero) {
+  auto schema =
+      ::arrow::schema({::arrow::field("a", ::arrow::utf8(), /*nullable=*/false)});
+  WriteFile(SizeStatisticsLevel::PageAndColumnChunk,
+            ::arrow::TableFromJSON(schema, {R"([["foo"],["bar"]])"}),
+            /*max_row_group_length=*/2,
+            /*page_size=*/1024);
+  ASSERT_NO_FATAL_FAILURE(ReadSizeStatistics());
+  ASSERT_NO_FATAL_FAILURE(ReadData());
+  EXPECT_THAT(row_group_stats_,
+              ::testing::ElementsAre(SizeStatistics{/*def_levels=*/{},
+                                                    /*rep_levels=*/{},
+                                                    /*byte_array_bytes=*/6}));
+  EXPECT_THAT(page_stats_,
+              ::testing::ElementsAre(PageSizeStatistics{/*def_levels=*/{},
+                                                        /*rep_levels=*/{},
+                                                        /*byte_array_bytes=*/{6}}));
+}
+
 }  // namespace parquet
diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index 53d2034600a7a..29f0cc7d1b418 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -90,8 +90,8 @@ ARROW_OPENTELEMETRY_BUILD_VERSION=v1.13.0
 ARROW_OPENTELEMETRY_BUILD_SHA256_CHECKSUM=7735cc56507149686e6019e06f588317099d4522480be5f38a2a09ec69af1706
 ARROW_OPENTELEMETRY_PROTO_BUILD_VERSION=v0.17.0
 ARROW_OPENTELEMETRY_PROTO_BUILD_SHA256_CHECKSUM=f269fbcb30e17b03caa1decd231ce826e59d7651c0f71c3b28eb5140b4bb5412
-ARROW_ORC_BUILD_VERSION=2.0.3
-ARROW_ORC_BUILD_SHA256_CHECKSUM=082cba862b5a8a0d14c225404d0b51cd8d1b64ca81b8f1e500322ce8922cb86d
+ARROW_ORC_BUILD_VERSION=2.1.0
+ARROW_ORC_BUILD_SHA256_CHECKSUM=69d45665bfb5699b709094ba630ae4b186b19e083c4438855fc29c77125c149c
 ARROW_PROTOBUF_BUILD_VERSION=v21.3
 ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM=2f723218f6cb709ae4cdc4fb5ed56a5951fc5d466f0128ce4c946b8c78c8c49f
 # Because of https://github.com/Tencent/rapidjson/pull/1323, we require
diff --git a/cpp/vcpkg.json b/cpp/vcpkg.json
index 6192e4efbd21e..f67fdbf90712f 100644
--- a/cpp/vcpkg.json
+++ b/cpp/vcpkg.json
@@ -1,6 +1,6 @@
 {
   "name": "arrow",
-  "version-string": "19.0.0-SNAPSHOT",
+  "version-string": "20.0.0-SNAPSHOT",
   "dependencies": [
     "abseil",
     {
diff --git a/csharp/Directory.Build.props b/csharp/Directory.Build.props
index 1994aa74bd9f4..474c577346074 100644
--- a/csharp/Directory.Build.props
+++ b/csharp/Directory.Build.props
@@ -29,7 +29,7 @@
     <Product>Apache Arrow library</Product>
     <Copyright>Copyright 2016-2024 The Apache Software Foundation</Copyright>
     <Company>The Apache Software Foundation</Company>
-    <Version>19.0.0-SNAPSHOT</Version>
+    <Version>20.0.0-SNAPSHOT</Version>
   </PropertyGroup>
 
   <PropertyGroup>
diff --git a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj
index de367e5393e88..1e8911e1fa49f 100644
--- a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj
+++ b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj
@@ -5,7 +5,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Grpc.Tools" Version="2.68.1" PrivateAssets="All" />
+    <PackageReference Include="Grpc.Tools" Version="2.69.0" PrivateAssets="All" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
index 58b65a3a7130a..de09925147008 100644
--- a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
+++ b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
@@ -5,9 +5,9 @@
   </PropertyGroup>
   
   <ItemGroup>
-    <PackageReference Include="Google.Protobuf" Version="3.29.2" />
+    <PackageReference Include="Google.Protobuf" Version="3.29.3" />
     <PackageReference Include="Grpc.Net.Client" Version="2.65.0" />
-    <PackageReference Include="Grpc.Tools" Version="2.68.1" PrivateAssets="All" />
+    <PackageReference Include="Grpc.Tools" Version="2.69.0" PrivateAssets="All" />
     <PackageReference Include="System.Memory" Version="4.6.0" />
   </ItemGroup>
 
diff --git a/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs b/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs
index b89ce9da79d14..10660f40b4c3e 100644
--- a/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs
+++ b/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs
@@ -98,11 +98,39 @@ public AsyncUnaryCall<FlightInfo> GetInfo(FlightDescriptor flightDescriptor, Met
                 flightInfoResult.Dispose);
         }
 
+        /// <summary>
+        /// Start a Flight Put request.
+        /// </summary>
+        /// <param name="flightDescriptor">Descriptor for the data to be put</param>
+        /// <param name="headers">gRPC headers to send with the request</param>
+        /// <returns>A <see cref="FlightRecordBatchDuplexStreamingCall" /> object used to write data batches and receive responses</returns>
         public FlightRecordBatchDuplexStreamingCall StartPut(FlightDescriptor flightDescriptor, Metadata headers = null)
         {
             return StartPut(flightDescriptor, headers, null, CancellationToken.None);
         }
 
+        /// <summary>
+        /// Start a Flight Put request.
+        /// </summary>
+        /// <param name="flightDescriptor">Descriptor for the data to be put</param>
+        /// <param name="schema">The schema of the data</param>
+        /// <param name="headers">gRPC headers to send with the request</param>
+        /// <returns>A <see cref="FlightRecordBatchDuplexStreamingCall" /> object used to write data batches and receive responses</returns>
+        /// <remarks>Using this method rather than a StartPut overload that doesn't accept a schema
+        /// means that the schema is sent even if no data batches are sent</remarks>
+        public Task<FlightRecordBatchDuplexStreamingCall> StartPut(FlightDescriptor flightDescriptor, Schema schema, Metadata headers = null)
+        {
+            return StartPut(flightDescriptor, schema, headers, null, CancellationToken.None);
+        }
+
+        /// <summary>
+        /// Start a Flight Put request.
+        /// </summary>
+        /// <param name="flightDescriptor">Descriptor for the data to be put</param>
+        /// <param name="headers">gRPC headers to send with the request</param>
+        /// <param name="deadline">Optional deadline. The request will be cancelled if this deadline is reached.</param>
+        /// <param name="cancellationToken">Optional token for cancelling the request</param>
+        /// <returns>A <see cref="FlightRecordBatchDuplexStreamingCall" /> object used to write data batches and receive responses</returns>
         public FlightRecordBatchDuplexStreamingCall StartPut(FlightDescriptor flightDescriptor, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
         {
             var channels = _client.DoPut(headers, deadline, cancellationToken);
@@ -117,6 +145,33 @@ public FlightRecordBatchDuplexStreamingCall StartPut(FlightDescriptor flightDesc
                 channels.Dispose);
         }
 
+        /// <summary>
+        /// Start a Flight Put request.
+        /// </summary>
+        /// <param name="flightDescriptor">Descriptor for the data to be put</param>
+        /// <param name="schema">The schema of the data</param>
+        /// <param name="headers">gRPC headers to send with the request</param>
+        /// <param name="deadline">Optional deadline. The request will be cancelled if this deadline is reached.</param>
+        /// <param name="cancellationToken">Optional token for cancelling the request</param>
+        /// <returns>A <see cref="FlightRecordBatchDuplexStreamingCall" /> object used to write data batches and receive responses</returns>
+        /// <remarks>Using this method rather than a StartPut overload that doesn't accept a schema
+        /// means that the schema is sent even if no data batches are sent</remarks>
+        public async Task<FlightRecordBatchDuplexStreamingCall> StartPut(FlightDescriptor flightDescriptor, Schema schema, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var channels = _client.DoPut(headers, deadline, cancellationToken);
+            var requestStream = new FlightClientRecordBatchStreamWriter(channels.RequestStream, flightDescriptor);
+            var readStream = new StreamReader<Protocol.PutResult, FlightPutResult>(channels.ResponseStream, putResult => new FlightPutResult(putResult));
+            var streamingCall = new FlightRecordBatchDuplexStreamingCall(
+                requestStream,
+                readStream,
+                channels.ResponseHeadersAsync,
+                channels.GetStatus,
+                channels.GetTrailers,
+                channels.Dispose);
+            await streamingCall.RequestStream.SetupStream(schema).ConfigureAwait(false);
+            return streamingCall;
+        }
+
         public AsyncDuplexStreamingCall<FlightHandshakeRequest, FlightHandshakeResponse> Handshake(Metadata headers = null)
         {
             return Handshake(headers, null, CancellationToken.None);
diff --git a/csharp/src/Apache.Arrow.Flight/FlightRecordBatchStreamWriter.cs b/csharp/src/Apache.Arrow.Flight/FlightRecordBatchStreamWriter.cs
index 7a8a6fd677c68..314d46da00830 100644
--- a/csharp/src/Apache.Arrow.Flight/FlightRecordBatchStreamWriter.cs
+++ b/csharp/src/Apache.Arrow.Flight/FlightRecordBatchStreamWriter.cs
@@ -38,9 +38,22 @@ private protected FlightRecordBatchStreamWriter(IAsyncStreamWriter<Protocol.Flig
             _flightDescriptor = flightDescriptor;
         }
 
-        private void SetupStream(Schema schema)
+        /// <summary>
+        /// Configure the data stream to write to.
+        /// </summary>
+        /// <remarks>
+        /// The stream will be set up automatically when writing a RecordBatch if required,
+        /// but calling this method before writing any data allows handling empty streams.
+        /// </remarks>
+        /// <param name="schema">The schema of data to be written to this stream</param>
+        public async Task SetupStream(Schema schema)
         {
+            if (_flightDataStream != null)
+            {
+                throw new InvalidOperationException("Flight data stream is already set");
+            }
             _flightDataStream = new FlightDataStream(_clientStreamWriter, _flightDescriptor, schema);
+            await _flightDataStream.SendSchema().ConfigureAwait(false);
         }
 
         public WriteOptions WriteOptions { get => throw new NotImplementedException(); set => throw new NotImplementedException(); }
@@ -50,14 +63,14 @@ public Task WriteAsync(RecordBatch message)
             return WriteAsync(message, default);
         }
 
-        public Task WriteAsync(RecordBatch message, ByteString applicationMetadata)
+        public async Task WriteAsync(RecordBatch message, ByteString applicationMetadata)
         {
             if (_flightDataStream == null)
             {
-                SetupStream(message.Schema);
+                await SetupStream(message.Schema).ConfigureAwait(false);
             }
 
-            return _flightDataStream.Write(message, applicationMetadata);
+            await _flightDataStream.Write(message, applicationMetadata);
         }
 
         protected virtual void Dispose(bool disposing)
diff --git a/csharp/src/Apache.Arrow.Flight/Internal/FlightDataStream.cs b/csharp/src/Apache.Arrow.Flight/Internal/FlightDataStream.cs
index 72c1551be2917..7cbbe66f40a94 100644
--- a/csharp/src/Apache.Arrow.Flight/Internal/FlightDataStream.cs
+++ b/csharp/src/Apache.Arrow.Flight/Internal/FlightDataStream.cs
@@ -44,7 +44,7 @@ public FlightDataStream(IAsyncStreamWriter<Protocol.FlightData> clientStreamWrit
             _flightDescriptor = flightDescriptor;
         }
 
-        private async Task SendSchema()
+        public async Task SendSchema()
         {
             _currentFlightData = new Protocol.FlightData();
 
diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
index 6c7f311bf821d..32c6304a1854b 100644
--- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
@@ -8,8 +8,8 @@
 
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.12.0" />
-    <PackageReference Include="xunit" Version="2.9.2" />
-    <PackageReference Include="xunit.runner.visualstudio" Version="3.0.0" />
+    <PackageReference Include="xunit" Version="2.9.3" />
+    <PackageReference Include="xunit.runner.visualstudio" Version="3.0.1" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Flight.IntegrationTest/Scenarios/JsonTestScenario.cs b/csharp/test/Apache.Arrow.Flight.IntegrationTest/Scenarios/JsonTestScenario.cs
index 4f7fed74352fc..784751044065a 100644
--- a/csharp/test/Apache.Arrow.Flight.IntegrationTest/Scenarios/JsonTestScenario.cs
+++ b/csharp/test/Apache.Arrow.Flight.IntegrationTest/Scenarios/JsonTestScenario.cs
@@ -76,7 +76,7 @@ public async Task RunClient(int serverPort)
         var batches = jsonFile.Batches.Select(batch => batch.ToArrow(schema, dictionaries)).ToArray();
 
         // 1. Put the data to the server.
-        await UploadBatches(client, descriptor, batches).ConfigureAwait(false);
+        await UploadBatches(client, descriptor, schema, batches).ConfigureAwait(false);
 
         // 2. Get the ticket for the data.
         var info = await client.GetInfo(descriptor).ConfigureAwait(false);
@@ -112,9 +112,10 @@ public async Task RunClient(int serverPort)
         }
     }
 
-    private static async Task UploadBatches(FlightClient client, FlightDescriptor descriptor, RecordBatch[] batches)
+    private static async Task UploadBatches(
+        FlightClient client, FlightDescriptor descriptor, Schema schema, RecordBatch[] batches)
     {
-        using var putCall = client.StartPut(descriptor);
+        using var putCall = await client.StartPut(descriptor, schema);
         using var writer = putCall.RequestStream;
 
         try
diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
index 820217d44ab70..dd55ca912deca 100644
--- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
@@ -7,9 +7,9 @@
 
     <ItemGroup>
       <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.12.0" />
-      <PackageReference Include="xunit" Version="2.9.2" />
-      <PackageReference Include="xunit.runner.visualstudio" Version="3.0.0" />
-      <PackageReference Include="coverlet.collector" Version="6.0.3" />
+      <PackageReference Include="xunit" Version="2.9.3" />
+      <PackageReference Include="xunit.runner.visualstudio" Version="3.0.1" />
+      <PackageReference Include="coverlet.collector" Version="6.0.4" />
     </ItemGroup>
 
     <ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj b/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj
index 08215ef323d8c..6368c8841b672 100644
--- a/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj
+++ b/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj
@@ -5,7 +5,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Google.Protobuf" Version="3.29.2" />
+    <PackageReference Include="Google.Protobuf" Version="3.29.3" />
     <PackageReference Include="Grpc.AspNetCore" Version="2.67.0" />
   </ItemGroup>
 
diff --git a/csharp/test/Apache.Arrow.Flight.TestWeb/TestFlightServer.cs b/csharp/test/Apache.Arrow.Flight.TestWeb/TestFlightServer.cs
index 46c5460912d8c..5689b45bfdec8 100644
--- a/csharp/test/Apache.Arrow.Flight.TestWeb/TestFlightServer.cs
+++ b/csharp/test/Apache.Arrow.Flight.TestWeb/TestFlightServer.cs
@@ -51,9 +51,10 @@ public override async Task DoGet(FlightTicket ticket, FlightServerRecordBatchStr
 
             if(_flightStore.Flights.TryGetValue(flightDescriptor, out var flightHolder))
             {
+                await responseStream.SetupStream(flightHolder.GetFlightInfo().Schema);
+
                 var batches = flightHolder.GetRecordBatches();
 
-                
                 foreach(var batch in batches)
                 {
                     await responseStream.WriteAsync(batch.RecordBatch, batch.Metadata);
diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
index 9ae627c07dcd8..eca1f70760cfe 100644
--- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
@@ -7,9 +7,9 @@
 
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.12.0" />
-    <PackageReference Include="xunit" Version="2.9.2" />
-    <PackageReference Include="xunit.runner.visualstudio" Version="3.0.0" />
-    <PackageReference Include="coverlet.collector" Version="6.0.3" />
+    <PackageReference Include="xunit" Version="2.9.3" />
+    <PackageReference Include="xunit.runner.visualstudio" Version="3.0.1" />
+    <PackageReference Include="coverlet.collector" Version="6.0.4" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs b/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs
index 350762c992769..241b3c006a003 100644
--- a/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs
+++ b/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs
@@ -57,6 +57,13 @@ private RecordBatch CreateTestBatch(int startValue, int length)
             return batchBuilder.Build();
         }
 
+        private Schema GetStoreSchema(FlightDescriptor flightDescriptor)
+        {
+            Assert.Contains(flightDescriptor, (IReadOnlyDictionary<FlightDescriptor, FlightHolder>)_flightStore.Flights);
+
+            var flightHolder = _flightStore.Flights[flightDescriptor];
+            return flightHolder.GetFlightInfo().Schema;
+        }
 
         private IEnumerable<RecordBatchWithMetadata> GetStoreBatch(FlightDescriptor flightDescriptor)
         {
@@ -88,7 +95,7 @@ public async Task TestPutSingleRecordBatch()
             var flightDescriptor = FlightDescriptor.CreatePathDescriptor("test");
             var expectedBatch = CreateTestBatch(0, 100);
 
-            var putStream = _flightClient.StartPut(flightDescriptor);
+            var putStream = await _flightClient.StartPut(flightDescriptor, expectedBatch.Schema);
             await putStream.RequestStream.WriteAsync(expectedBatch);
             await putStream.RequestStream.CompleteAsync();
             var putResults = await putStream.ResponseStream.ToListAsync();
@@ -108,7 +115,7 @@ public async Task TestPutTwoRecordBatches()
             var expectedBatch1 = CreateTestBatch(0, 100);
             var expectedBatch2 = CreateTestBatch(0, 100);
 
-            var putStream = _flightClient.StartPut(flightDescriptor);
+            var putStream = await _flightClient.StartPut(flightDescriptor, expectedBatch1.Schema);
             await putStream.RequestStream.WriteAsync(expectedBatch1);
             await putStream.RequestStream.WriteAsync(expectedBatch2);
             await putStream.RequestStream.CompleteAsync();
@@ -123,6 +130,23 @@ public async Task TestPutTwoRecordBatches()
             ArrowReaderVerifier.CompareBatches(expectedBatch2, actualBatches[1].RecordBatch);
         }
 
+        [Fact]
+        public async Task TestPutZeroRecordBatches()
+        {
+            var flightDescriptor = FlightDescriptor.CreatePathDescriptor("test");
+            var schema = CreateTestBatch(0, 1).Schema;
+
+            var putStream = await _flightClient.StartPut(flightDescriptor, schema);
+            await putStream.RequestStream.CompleteAsync();
+            var putResults = await putStream.ResponseStream.ToListAsync();
+
+            Assert.Empty(putResults);
+
+            var actualSchema = GetStoreSchema(flightDescriptor);
+
+            SchemaComparer.Compare(schema, actualSchema);
+        }
+
         [Fact]
         public async Task TestGetRecordBatchWithDelayedSchema()
         {
@@ -230,7 +254,7 @@ public async Task TestPutWithMetadata()
             var expectedBatch = CreateTestBatch(0, 100);
             var expectedMetadata = ByteString.CopyFromUtf8("test metadata");
 
-            var putStream = _flightClient.StartPut(flightDescriptor);
+            var putStream = await _flightClient.StartPut(flightDescriptor, expectedBatch.Schema);
             await putStream.RequestStream.WriteAsync(expectedBatch, expectedMetadata);
             await putStream.RequestStream.CompleteAsync();
             var putResults = await putStream.ResponseStream.ToListAsync();
@@ -471,8 +495,7 @@ public async Task EnsureCallRaisesDeadlineExceeded()
             exception = await Assert.ThrowsAsync<RpcException>(async () => await duplexStreamingCall.RequestStream.WriteAsync(batch));
             Assert.Equal(StatusCode.DeadlineExceeded, exception.StatusCode);
 
-            var putStream = _flightClient.StartPut(flightDescriptor, null, deadline);
-            exception = await Assert.ThrowsAsync<RpcException>(async () => await putStream.RequestStream.WriteAsync(batch));
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await _flightClient.StartPut(flightDescriptor, batch.Schema, null, deadline));
             Assert.Equal(StatusCode.DeadlineExceeded, exception.StatusCode);
 
             exception = await Assert.ThrowsAsync<RpcException>(async () => await _flightClient.GetSchema(flightDescriptor, null, deadline));
@@ -514,8 +537,7 @@ public async Task EnsureCallRaisesRequestCancelled()
             exception = await Assert.ThrowsAsync<RpcException>(async () => await duplexStreamingCall.RequestStream.WriteAsync(batch));
             Assert.Equal(StatusCode.Cancelled, exception.StatusCode);
 
-            var putStream = _flightClient.StartPut(flightDescriptor, null, null, cts.Token);
-            exception = await Assert.ThrowsAsync<RpcException>(async () => await putStream.RequestStream.WriteAsync(batch));
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await _flightClient.StartPut(flightDescriptor, batch.Schema, null, null, cts.Token));
             Assert.Equal(StatusCode.Cancelled, exception.StatusCode);
 
             exception = await Assert.ThrowsAsync<RpcException>(async () => await _flightClient.GetSchema(flightDescriptor, null, null, cts.Token));
diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
index 3c48b4c71ae6c..7055bb5ab1bc6 100644
--- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
@@ -22,7 +22,7 @@
     </PackageReference>
   </ItemGroup>
   <ItemGroup Condition="'$(TargetFramework)' != 'net462'">
-    <PackageReference Include="xunit.runner.visualstudio" Version="3.0.0">
+    <PackageReference Include="xunit.runner.visualstudio" Version="3.0.1">
       <PrivateAssets>all</PrivateAssets>
       <IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>
     </PackageReference>
@@ -30,7 +30,7 @@
 
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.12.0" />
-    <PackageReference Include="xunit" Version="2.9.2" />
+    <PackageReference Include="xunit" Version="2.9.3" />
     <PackageReference Include="xunit.skippablefact" Version="1.5.23" />
     <PackageReference Include="pythonnet" Version="3.0.5" />
   </ItemGroup>
diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py
index b4fbbb2d41498..027e675792dbe 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -1890,10 +1890,7 @@ def _temp_path():
         return
 
     file_objs = [
-        generate_primitive_case([], name='primitive_no_batches')
-        # TODO(https://github.com/apache/arrow/issues/44363)
-        .skip_format(SKIP_FLIGHT, 'C#'),
-
+        generate_primitive_case([], name='primitive_no_batches'),
         generate_primitive_case([17, 20], name='primitive'),
         generate_primitive_case([0, 0, 0], name='primitive_zerolength'),
 
diff --git a/dev/release/post-08-csharp.sh b/dev/release/post-08-csharp.sh
index 8c86b36774887..75bf2b963aa8e 100755
--- a/dev/release/post-08-csharp.sh
+++ b/dev/release/post-08-csharp.sh
@@ -39,6 +39,7 @@ base_names=()
 base_names+=(Apache.Arrow.${version})
 base_names+=(Apache.Arrow.Flight.${version})
 base_names+=(Apache.Arrow.Flight.AspNetCore.${version})
+base_names+=(Apache.Arrow.Flight.Sql.${version})
 base_names+=(Apache.Arrow.Compression.${version})
 for base_name in ${base_names[@]}; do
   for extension in nupkg snupkg; do
diff --git a/dev/tasks/homebrew-formulae/apache-arrow-glib.rb b/dev/tasks/homebrew-formulae/apache-arrow-glib.rb
index df068812492e8..82d8464abed9d 100644
--- a/dev/tasks/homebrew-formulae/apache-arrow-glib.rb
+++ b/dev/tasks/homebrew-formulae/apache-arrow-glib.rb
@@ -29,7 +29,7 @@
 class ApacheArrowGlib < Formula
   desc "GLib bindings for Apache Arrow"
   homepage "https://arrow.apache.org/"
-  url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-19.0.0-SNAPSHOT/apache-arrow-19.0.0-SNAPSHOT.tar.gz"
+  url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-20.0.0-SNAPSHOT/apache-arrow-20.0.0-SNAPSHOT.tar.gz"
   sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28"
   license "Apache-2.0"
   head "https://github.com/apache/arrow.git", branch: "main"
diff --git a/dev/tasks/homebrew-formulae/apache-arrow.rb b/dev/tasks/homebrew-formulae/apache-arrow.rb
index a3730db719e05..caf82b8db89d4 100644
--- a/dev/tasks/homebrew-formulae/apache-arrow.rb
+++ b/dev/tasks/homebrew-formulae/apache-arrow.rb
@@ -29,7 +29,7 @@
 class ApacheArrow < Formula
   desc "Columnar in-memory analytics layer designed to accelerate big data"
   homepage "https://arrow.apache.org/"
-  url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-19.0.0-SNAPSHOT/apache-arrow-19.0.0-SNAPSHOT.tar.gz"
+  url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-20.0.0-SNAPSHOT/apache-arrow-20.0.0-SNAPSHOT.tar.gz"
   sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28"
   license "Apache-2.0"
   head "https://github.com/apache/arrow.git", branch: "main"
diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
index 7326bde3ee2d0..3131a6945b9ba 100644
--- a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
+++ b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
@@ -1,3 +1,9 @@
+apache-arrow-apt-source (19.0.0-1) unstable; urgency=low
+
+  * New upstream release.
+
+ -- Bryce Mecum <petridish@gmail.com>  Tue, 07 Jan 2025 19:36:45 -0000
+
 apache-arrow-apt-source (18.1.0-1) unstable; urgency=low
 
   * New upstream release.
diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
index ff08b13902923..bf54f698ae10a 100644
--- a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
+++ b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
@@ -102,6 +102,9 @@ else
 fi
 
 %changelog
+* Tue Jan 07 2025 Bryce Mecum <petridish@gmail.com> - 19.0.0-1
+- New upstream release.
+
 * Wed Nov 13 2024 Bryce Mecum <petridish@gmail.com> - 18.1.0-1
 - New upstream release.
 
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/changelog b/dev/tasks/linux-packages/apache-arrow/debian/changelog
index 43fd92f73f68e..e2769e4bb6bf8 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/changelog
+++ b/dev/tasks/linux-packages/apache-arrow/debian/changelog
@@ -1,3 +1,9 @@
+apache-arrow (19.0.0-1) unstable; urgency=low
+
+  * New upstream release.
+
+ -- Bryce Mecum <petridish@gmail.com>  Tue, 07 Jan 2025 19:36:45 -0000
+
 apache-arrow (18.1.0-1) unstable; urgency=low
 
   * New upstream release.
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/control.in b/dev/tasks/linux-packages/apache-arrow/debian/control.in
index 76194318bbba1..258880265ea3c 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/control.in
+++ b/dev/tasks/linux-packages/apache-arrow/debian/control.in
@@ -43,7 +43,7 @@ Build-Depends-Indep: libglib2.0-doc
 Standards-Version: 3.9.8
 Homepage: https://arrow.apache.org/
 
-Package: libarrow1900
+Package: libarrow2000
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -63,12 +63,12 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow1900 (= ${binary:Version})
+  libarrow2000 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides tools.
 
-Package: libarrow-cuda1900
+Package: libarrow-cuda2000
 Section: libs
 Architecture: @CUDA_ARCHITECTURE@
 Multi-Arch: same
@@ -76,12 +76,12 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow1900 (= ${binary:Version})
+  libarrow2000 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for CUDA support.
 
-Package: libarrow-acero1900
+Package: libarrow-acero2000
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -89,12 +89,12 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow1900 (= ${binary:Version})
+  libarrow2000 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for Acero module.
 
-Package: libarrow-dataset1900
+Package: libarrow-dataset2000
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -102,13 +102,13 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-acero1900 (= ${binary:Version}),
-  libparquet1900 (= ${binary:Version})
+  libarrow-acero2000 (= ${binary:Version}),
+  libparquet2000 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for Dataset module.
 
-Package: libarrow-flight1900
+Package: libarrow-flight2000
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -116,12 +116,12 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow1900 (= ${binary:Version})
+  libarrow2000 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for Flight RPC system.
 
-Package: libarrow-flight-sql1900
+Package: libarrow-flight-sql2000
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -129,7 +129,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-flight1900 (= ${binary:Version})
+  libarrow-flight2000 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for Flight SQL system.
@@ -140,7 +140,7 @@ Architecture: any
 Multi-Arch: same
 Depends:
   ${misc:Depends},
-  libarrow1900 (= ${binary:Version}),
+  libarrow2000 (= ${binary:Version}),
 @USE_SYSTEM_GRPC@  libabsl-dev,
   libbrotli-dev,
   libbz2-dev,
@@ -169,7 +169,7 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libarrow-cuda1900 (= ${binary:Version})
+  libarrow-cuda2000 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files for CUDA support.
@@ -180,7 +180,7 @@ Architecture: any
 Multi-Arch: same
 Depends:
   ${misc:Depends},
-  libarrow-acero1900 (= ${binary:Version}),
+  libarrow-acero2000 (= ${binary:Version}),
   libparquet-dev (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
@@ -193,7 +193,7 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-acero-dev (= ${binary:Version}),
-  libarrow-dataset1900 (= ${binary:Version}),
+  libarrow-dataset2000 (= ${binary:Version}),
   libparquet-dev (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
@@ -206,7 +206,7 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libarrow-flight1900 (= ${binary:Version}),
+  libarrow-flight2000 (= ${binary:Version}),
   libc-ares-dev,
 @USE_SYSTEM_GRPC@  libgrpc++-dev
 Description: Apache Arrow is a data processing library for analysis
@@ -220,12 +220,12 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-flight-dev (= ${binary:Version}),
-  libarrow-flight-sql1900 (= ${binary:Version})
+  libarrow-flight-sql2000 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files for Flight SQL system.
 
-Package: libgandiva1900
+Package: libgandiva2000
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -233,7 +233,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow1900 (= ${binary:Version})
+  libarrow2000 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
  .
@@ -246,13 +246,13 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libgandiva1900 (= ${binary:Version})
+  libgandiva2000 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
  .
  This package provides C++ header files.
 
-Package: libparquet1900
+Package: libparquet2000
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -272,7 +272,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libparquet1900 (= ${binary:Version})
+  libparquet2000 (= ${binary:Version})
 Description: Apache Parquet is a columnar storage format
  .
  This package provides tools.
@@ -284,13 +284,13 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libparquet1900 (= ${binary:Version}),
+  libparquet2000 (= ${binary:Version}),
   libthrift-dev
 Description: Apache Parquet is a columnar storage format
  .
  This package provides C++ header files.
 
-Package: libarrow-glib1900
+Package: libarrow-glib2000
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -298,7 +298,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow1900 (= ${binary:Version})
+  libarrow2000 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files.
@@ -322,7 +322,7 @@ Depends:
   ${misc:Depends},
   libglib2.0-dev,
   libarrow-acero-dev (= ${binary:Version}),
-  libarrow-glib1900 (= ${binary:Version}),
+  libarrow-glib2000 (= ${binary:Version}),
   gir1.2-arrow-1.0 (= ${binary:Version})
 Suggests: libarrow-glib-doc
 Description: Apache Arrow is a data processing library for analysis
@@ -340,7 +340,7 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides documentations.
 
-Package: libarrow-cuda-glib1900
+Package: libarrow-cuda-glib2000
 Section: libs
 Architecture: @CUDA_ARCHITECTURE@
 Multi-Arch: same
@@ -348,8 +348,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib1900 (= ${binary:Version}),
-  libarrow-cuda1900 (= ${binary:Version})
+  libarrow-glib2000 (= ${binary:Version}),
+  libarrow-cuda2000 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files for CUDA support.
@@ -374,13 +374,13 @@ Depends:
   ${misc:Depends},
   libarrow-cuda-dev (= ${binary:Version}),
   libarrow-glib-dev (= ${binary:Version}),
-  libarrow-cuda-glib1900 (= ${binary:Version}),
+  libarrow-cuda-glib2000 (= ${binary:Version}),
   gir1.2-arrow-cuda-1.0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based header files for CUDA support.
 
-Package: libarrow-dataset-glib1900
+Package: libarrow-dataset-glib2000
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -388,8 +388,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib1900 (= ${binary:Version}),
-  libarrow-dataset1900 (= ${binary:Version})
+  libarrow-glib2000 (= ${binary:Version}),
+  libarrow-dataset2000 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files for dataset module.
@@ -414,7 +414,7 @@ Depends:
   ${misc:Depends},
   libarrow-dataset-dev (= ${binary:Version}),
   libarrow-glib-dev (= ${binary:Version}),
-  libarrow-dataset-glib1900 (= ${binary:Version}),
+  libarrow-dataset-glib2000 (= ${binary:Version}),
   gir1.2-arrow-dataset-1.0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
@@ -431,7 +431,7 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides documentations for dataset module.
 
-Package: libarrow-flight-glib1900
+Package: libarrow-flight-glib2000
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -439,8 +439,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib1900 (= ${binary:Version}),
-  libarrow-flight1900 (= ${binary:Version})
+  libarrow-glib2000 (= ${binary:Version}),
+  libarrow-flight2000 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files for Apache Arrow Flight.
@@ -466,7 +466,7 @@ Depends:
   ${misc:Depends},
   libarrow-flight-dev (= ${binary:Version}),
   libarrow-glib-dev (= ${binary:Version}),
-  libarrow-flight-glib1900 (= ${binary:Version}),
+  libarrow-flight-glib2000 (= ${binary:Version}),
   gir1.2-arrow-flight-1.0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
@@ -483,7 +483,7 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides documentations for Apache Arrow Flight.
 
-Package: libarrow-flight-sql-glib1900
+Package: libarrow-flight-sql-glib2000
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -491,8 +491,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-flight-glib1900 (= ${binary:Version}),
-  libarrow-flight-sql1900 (= ${binary:Version})
+  libarrow-flight-glib2000 (= ${binary:Version}),
+  libarrow-flight-sql2000 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files for Apache Arrow Flight SQL.
@@ -518,7 +518,7 @@ Depends:
   ${misc:Depends},
   libarrow-flight-sql-dev (= ${binary:Version}),
   libarrow-flight-glib-dev (= ${binary:Version}),
-  libarrow-flight-sql-glib1900 (= ${binary:Version}),
+  libarrow-flight-sql-glib2000 (= ${binary:Version}),
   gir1.2-arrow-flight-sql-1.0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
@@ -535,7 +535,7 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides documentations for Apache Arrow Flight SQL.
 
-Package: libgandiva-glib1900
+Package: libgandiva-glib2000
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -543,8 +543,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib1900 (= ${binary:Version}),
-  libgandiva1900 (= ${binary:Version})
+  libarrow-glib2000 (= ${binary:Version}),
+  libgandiva2000 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
  .
@@ -571,7 +571,7 @@ Depends:
   ${misc:Depends},
   libgandiva-dev (= ${binary:Version}),
   libarrow-glib-dev (= ${binary:Version}),
-  libgandiva-glib1900 (= ${binary:Version}),
+  libgandiva-glib2000 (= ${binary:Version}),
   gir1.2-gandiva-1.0 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
@@ -590,7 +590,7 @@ Description: Gandiva is a toolset for compiling and evaluating expressions
  .
  This package provides documentations.
 
-Package: libparquet-glib1900
+Package: libparquet-glib2000
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -598,8 +598,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib1900 (= ${binary:Version}),
-  libparquet1900 (= ${binary:Version})
+  libarrow-glib2000 (= ${binary:Version}),
+  libparquet2000 (= ${binary:Version})
 Description: Apache Parquet is a columnar storage format
  .
  This package provides GLib based library files.
@@ -624,7 +624,7 @@ Depends:
   ${misc:Depends},
   libarrow-glib-dev (= ${binary:Version}),
   libparquet-dev (= ${binary:Version}),
-  libparquet-glib1900 (= ${binary:Version}),
+  libparquet-glib2000 (= ${binary:Version}),
   gir1.2-parquet-1.0 (= ${binary:Version})
 Suggests: libparquet-glib-doc
 Description: Apache Parquet is a columnar storage format
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-acero1900.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-acero2000.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-acero1900.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-acero2000.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib1900.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib2000.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib1900.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib2000.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda1900.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda2000.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda1900.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda2000.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib1900.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib2000.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib1900.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib2000.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset1900.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset2000.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset1900.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset2000.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib1900.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib2000.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib1900.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib2000.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib1900.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib2000.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib1900.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib2000.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql1900.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql2000.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql1900.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql2000.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight1900.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight2000.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight1900.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight2000.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib1900.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib2000.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib1900.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib2000.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow1900.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow2000.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow1900.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow2000.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib1900.install b/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib2000.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib1900.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib2000.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libgandiva1900.install b/dev/tasks/linux-packages/apache-arrow/debian/libgandiva2000.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libgandiva1900.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libgandiva2000.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib1900.install b/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib2000.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib1900.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib2000.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libparquet1900.install b/dev/tasks/linux-packages/apache-arrow/debian/libparquet2000.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libparquet1900.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libparquet2000.install
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
index e5d8c87f45996..3766073bef590 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
+++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
@@ -894,6 +894,9 @@ Documentation for Apache Parquet GLib.
 %endif
 
 %changelog
+* Tue Jan 07 2025 Bryce Mecum <petridish@gmail.com> - 19.0.0-1
+- New upstream release.
+
 * Wed Nov 13 2024 Bryce Mecum <petridish@gmail.com> - 18.1.0-1
 - New upstream release.
 
diff --git a/dev/tasks/linux-packages/github.linux.yml b/dev/tasks/linux-packages/github.linux.yml
index 263394eb2b617..e514931fd1b6a 100644
--- a/dev/tasks/linux-packages/github.linux.yml
+++ b/dev/tasks/linux-packages/github.linux.yml
@@ -23,9 +23,9 @@ jobs:
   package:
     name: Package
     {% if architecture == "amd64" %}
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
     {% else %}
-    runs-on: ["self-hosted", "Linux", "arm64"]
+    runs-on: ubuntu-24.04-arm
     {% endif %}
     env:
       ARCHITECTURE: {{ architecture }}
diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja
index dded9492f0c4d..a8e9604779c85 100644
--- a/dev/tasks/macros.jinja
+++ b/dev/tasks/macros.jinja
@@ -57,20 +57,11 @@ env:
 {% endmacro %}
 
 {%- macro github_install_archery() -%}
-  - name: Set up Python by actions/setup-python
-    if: |
-      !(runner.os == 'Linux' && runner.arch != 'X64')
+  - name: Set up Python
     uses: actions/setup-python@v4
     with:
       cache: 'pip'
       python-version: 3.12
-  - name: Set up Python by apt
-    if: runner.os == 'Linux' && runner.arch != 'X64'
-    run: |
-      sudo apt update
-      sudo apt-get install -y python3-pip
-      pip install -U pip
-      echo "$HOME/.local/bin" >>"$GITHUB_PATH"
   - name: Install Archery
     shell: bash
     run: pip install -e arrow/dev/archery[all]
@@ -85,21 +76,10 @@ env:
 {% endmacro %}
 
 {%- macro github_upload_releases(pattern) -%}
-  - name: Set up Python by actions/setup-python
-    if: |
-      !(runner.os == 'Linux' && runner.arch != 'X64')
+  - name: Set up Python
     uses: actions/setup-python@v4
     with:
       python-version: 3.12
-  - name: Set up Python by apt
-    if: runner.os == 'Linux' && runner.arch != 'X64'
-    run: |
-      sudo apt update
-      sudo apt install -y \
-        libgit2-dev \
-        libpython3-dev \
-        python3-pip
-      sudo python3 -m pip install --upgrade pip
   - name: Checkout Crossbow
     uses: actions/checkout@v4
     with:
@@ -145,23 +125,11 @@ env:
 
 {%- macro github_upload_gemfury(pattern) -%}
   {%- if arrow.is_default_branch() -%}
-  - name: Set up Ruby by apt
-    if: runner.os == 'Linux' && runner.arch != 'X64'
-    run: |
-      sudo apt update
-      sudo apt install -y ruby-full
-  - name: Set up Ruby by GitHub Actions
-    if: runner.arch == 'X64' && runner.os != 'macOS'
+  - name: Set up Ruby
     uses: ruby/setup-ruby@v1
     with:
       ruby-version: "ruby"
-  - name: Install gemfury client on ARM self-hosted
-    if: runner.arch != 'X64'
-    run: |
-      gem install --user-install gemfury
-      ruby -r rubygems -e 'puts("#{Gem.user_dir}/bin")' >> $GITHUB_PATH
   - name: Install gemfury client
-    if: runner.arch == 'X64'
     run: |
       gem install gemfury
   - name: Upload package to Gemfury
@@ -186,6 +154,12 @@ env:
   - name: Upload wheel to Anaconda scientific-python
     shell: bash
     run: |
+      # check if completion actually expands to wheel files
+      # to prevent empty dirs from failing silently
+      if ! compgen -G "{{ pattern }}" > /dev/null; then
+          echo "No wheel files found!"
+          exit 1
+      fi
       python3 -m pip install git+https://github.com/Anaconda-Platform/anaconda-client.git@1.12.3
       anaconda -t ${CROSSBOW_SCIENTIFIC_PYTHON_UPLOAD_TOKEN} upload --force -u scientific-python-nightly-wheels --label main {{ pattern }}
     env:
diff --git a/dev/tasks/python-wheels/github.linux.yml b/dev/tasks/python-wheels/github.linux.yml
index ec5b9b31da8e1..b32b1805b6dfb 100644
--- a/dev/tasks/python-wheels/github.linux.yml
+++ b/dev/tasks/python-wheels/github.linux.yml
@@ -28,7 +28,7 @@ jobs:
     {% if arch == "amd64" %}
     runs-on: ubuntu-latest
     {% else %}
-    runs-on: ["self-hosted", "Linux", "arm64"]
+    runs-on: ubuntu-24.04-arm
     {% endif %}
     env:
       # archery uses these environment variables
diff --git a/dev/tasks/python-wheels/github.windows.yml b/dev/tasks/python-wheels/github.windows.yml
index 36b25e9819f27..2ac942d5bdb4d 100644
--- a/dev/tasks/python-wheels/github.windows.yml
+++ b/dev/tasks/python-wheels/github.windows.yml
@@ -91,7 +91,7 @@ jobs:
 
       {{ macros.github_upload_releases("arrow/python/repaired_wheels/*.whl")|indent }}
       {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }}
-      {{ macros.github_upload_wheel_scientific_python("arrow/repaired_wheels/repaired_wheels/*.whl")|indent }}
+      {{ macros.github_upload_wheel_scientific_python("arrow/python/repaired_wheels/*.whl")|indent }}
 
       {% if arrow.is_default_branch() %}
       - name: Push Docker image
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 93450df155c9a..14bd83c90e5b4 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -697,12 +697,14 @@ tasks:
     params:
       run: ubuntu-csharp
     artifacts:
+      - Apache.Arrow.Compression.{no_rc_version}.nupkg
+      - Apache.Arrow.Compression.{no_rc_version}.snupkg
       - Apache.Arrow.Flight.AspNetCore.{no_rc_version}.nupkg
       - Apache.Arrow.Flight.AspNetCore.{no_rc_version}.snupkg
+      - Apache.Arrow.Flight.Sql.{no_rc_version}.nupkg
+      - Apache.Arrow.Flight.Sql.{no_rc_version}.snupkg
       - Apache.Arrow.Flight.{no_rc_version}.nupkg
       - Apache.Arrow.Flight.{no_rc_version}.snupkg
-      - Apache.Arrow.Compression.{no_rc_version}.nupkg
-      - Apache.Arrow.Compression.{no_rc_version}.snupkg
       - Apache.Arrow.{no_rc_version}.nupkg
       - Apache.Arrow.{no_rc_version}.snupkg
 
@@ -1116,14 +1118,6 @@ tasks:
         PYTHON: "3.10"
       image: conda-python-substrait
 
-  test-conda-python-3.10-cython2:
-    ci: github
-    template: docker-tests/github.linux.yml
-    params:
-      env:
-        PYTHON: "3.10"
-      image: conda-python-cython2
-
   test-ubuntu-22.04-python-313-freethreading:
     ci: github
     template: docker-tests/github.linux.yml
diff --git a/docker-compose.yml b/docker-compose.yml
index 43dd3511fcf18..87383568aaba4 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -123,7 +123,6 @@ x-hierarchy:
         - conda-python-pandas:
           - conda-python-docs
         - conda-python-cpython-debug
-        - conda-python-cython2
         - conda-python-dask
         - conda-python-emscripten
         - conda-python-hdfs
@@ -332,6 +331,7 @@ services:
       ARROW_FLIGHT_SQL: "OFF"
       ARROW_GANDIVA: "OFF"
       ARROW_JEMALLOC: "OFF"
+      ARROW_MIMALLOC: "OFF"
       ARROW_RUNTIME_SIMD_LEVEL: "AVX2"  # AVX512 not supported by Valgrind (ARROW-9851)
       ARROW_TEST_MEMCHECK: "ON"
       ARROW_USE_LD_GOLD: "ON"
@@ -1429,15 +1429,10 @@ services:
       BUILD_DOCS_PYTHON: "ON"
       PYTEST_ARGS: "--doctest-modules --doctest-cython"
     volumes: *conda-volumes
-    # pytest is installed with an upper pin of 8.0.0 because
-    # newer version breaks cython doctesting, see:
-    # https://github.com/lgpage/pytest-cython/issues/58
-    # Remove pip install pytest~=7 when upstream issue is resolved
     command:
       ["/arrow/ci/scripts/cpp_build.sh /arrow /build &&
         /arrow/ci/scripts/python_build.sh /arrow /build &&
         pip install -e /arrow/dev/archery[numpydoc] &&
-        pip install pytest~=7.4 &&
         archery numpydoc --allow-rule GL10,PR01,PR03,PR04,PR05,PR10,RT03,YD01 &&
         /arrow/ci/scripts/python_test.sh /arrow"]
 
@@ -1499,30 +1494,6 @@ services:
         /arrow/ci/scripts/python_build.sh /arrow /build &&
         /arrow/ci/scripts/integration_substrait.sh"]
 
-  conda-python-cython2:
-    # Usage:
-    #   docker compose build conda
-    #   docker compose build conda-cpp
-    #   docker compose build conda-python
-    #   docker compose build conda-python-cython2
-    #   docker compose run --rm conda-python-cython2
-    image: ${REPO}:${ARCH}-conda-python-${PYTHON}-cython2
-    build:
-      context: .
-      dockerfile: ci/docker/conda-python-cython2.dockerfile
-      cache_from:
-        - ${REPO}:${ARCH}-conda-python-${PYTHON}-cython2
-      args:
-        repo: ${REPO}
-        arch: ${ARCH}
-        python: ${PYTHON}
-    shm_size: *shm-size
-    environment:
-      <<: [*common, *ccache]
-      PYTEST_ARGS:  # inherit
-    volumes: *conda-volumes
-    command: *python-conda-command
-
   conda-python-cpython-debug:
     # Usage:
     #   docker compose build conda
diff --git a/docs/source/_static/versions.json b/docs/source/_static/versions.json
index f1a20098d4caf..370915c479856 100644
--- a/docs/source/_static/versions.json
+++ b/docs/source/_static/versions.json
@@ -1,15 +1,20 @@
 [
     {
-        "name": "19.0 (dev)",
+        "name": "20.0 (dev)",
         "version": "dev/",
         "url": "https://arrow.apache.org/docs/dev/"
     },
     {
-        "name": "18.1 (stable)",
+        "name": "19.0 (stable)",
         "version": "",
         "url": "https://arrow.apache.org/docs/",
         "preferred": true
     },
+    {
+        "name": "18.1",
+        "version": "18.1/",
+        "url": "https://arrow.apache.org/docs/18.1/"
+    },
     {
         "name": "18.0",
         "version": "18.0/",
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index ec53fb04688bc..92f3e44039147 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -1316,7 +1316,7 @@ depending on a condition.
   input. If the nulls present on the first input, they will be promoted to the
   output, otherwise nulls will be chosen based on the first input values.
 
-  Also see: :ref:`replace_with_mask <cpp-compute-vector-structural-transforms>`.
+  Also see: :ref:`replace_with_mask <cpp-compute-vector-replace-functions>`.
 
 Structural transforms
 ~~~~~~~~~~~~~~~~~~~~~
@@ -1731,19 +1731,23 @@ Selections
 
 These functions select and return a subset of their input.
 
-+---------------+--------+--------------+--------------+--------------+-------------------------+-----------+
-| Function name | Arity  | Input type 1 | Input type 2 | Output type  | Options class           | Notes     |
-+===============+========+==============+==============+==============+=========================+===========+
-| array_filter  | Binary | Any          | Boolean      | Input type 1 | :struct:`FilterOptions` | \(2)      |
-+---------------+--------+--------------+--------------+--------------+-------------------------+-----------+
-| array_take    | Binary | Any          | Integer      | Input type 1 | :struct:`TakeOptions`   | \(3)      |
-+---------------+--------+--------------+--------------+--------------+-------------------------+-----------+
-| drop_null     | Unary  | Any          |              | Input type 1 |                         | \(1)      |
-+---------------+--------+--------------+--------------+--------------+-------------------------+-----------+
-| filter        | Binary | Any          | Boolean      | Input type 1 | :struct:`FilterOptions` | \(2)      |
-+---------------+--------+--------------+--------------+--------------+-------------------------+-----------+
-| take          | Binary | Any          | Integer      | Input type 1 | :struct:`TakeOptions`   | \(3)      |
-+---------------+--------+--------------+--------------+--------------+-------------------------+-----------+
++---------------------+--------+----------------+--------------+---------------------+-------------------------------------+-------+
+| Function name       | Arity  | Input type 1   | Input type 2 | Output type         | Options class                       | Notes |
++=====================+========+================+==============+=====================+=====================================+=======+
+| array_filter        | Binary | Any            | Boolean      | Input type 1        | :struct:`FilterOptions`             | \(2)  |
++---------------------+--------+----------------+--------------+---------------------+-------------------------------------+-------+
+| array_take          | Binary | Any            | Integer      | Input type 1        | :struct:`TakeOptions`               | \(3)  |
++---------------------+--------+----------------+--------------+---------------------+-------------------------------------+-------+
+| drop_null           | Unary  | Any            |              | Input type 1        |                                     | \(1)  |
++---------------------+--------+----------------+--------------+---------------------+-------------------------------------+-------+
+| filter              | Binary | Any            | Boolean      | Input type 1        | :struct:`FilterOptions`             | \(2)  |
++---------------------+--------+----------------+--------------+---------------------+-------------------------------------+-------+
+| inverse_permutation | Unary  | Signed Integer |              | Signed Integer \(4) | :struct:`InversePermutationOptions` | \(5)  |
++---------------------+--------+----------------+--------------+---------------------+-------------------------------------+-------+
+| scatter             | Binary | Any            | Integer      | Input type 1        | :struct:`ScatterOptions`            | \(6)  |
++---------------------+--------+----------------+--------------+---------------------+-------------------------------------+-------+
+| take                | Binary | Any            | Integer      | Input type 1        | :struct:`TakeOptions`               | \(3)  |
++---------------------+--------+----------------+--------------+---------------------+-------------------------------------+-------+
 
 * \(1) Each element in the input is appended to the output iff it is non-null.
   If the input is a record batch or table, any null value in a column drops
@@ -1756,6 +1760,18 @@ These functions select and return a subset of their input.
 * \(3) For each element *i* in input 2 (the indices), the *i*'th element
   in input 1 (the values) is appended to the output.
 
+* \(4) The output type is specified in :struct:`InversePermutationOptions`.
+
+* \(5) For *indices[i] = x*, *inverse_permutation[x] = i*. And *inverse_permutation[x]
+  = null* if *x* does not appear in the input indices. Indices must be in the range
+  of *[0, max_index]*, or null, which will be ignored. If multiple indices point to the
+  same value, the last one is used.
+
+* \(6) For *indices[i] = x*, *output[x] = values[i]*. And *output[x] = null*
+  if *x* does not appear in the input indices. Indices must be in the range
+  of *[0, max_index]*, or null, in which case the corresponding value will be
+  ignored. If multiple indices point to the same value, the last one is used.
+
 Containment tests
 ~~~~~~~~~~~~~~~~~
 
@@ -1882,6 +1898,8 @@ Structural transforms
     index *n* and the type code at index *n* is 2.
   * The indices ``2`` and ``7`` are invalid.
 
+.. _cpp-compute-vector-replace-functions:
+
 Replace functions
 ~~~~~~~~~~~~~~~~~
 
diff --git a/docs/source/developers/python.rst b/docs/source/developers/python.rst
index ac70c27934b07..6c0ffe0c0a9d4 100644
--- a/docs/source/developers/python.rst
+++ b/docs/source/developers/python.rst
@@ -724,26 +724,18 @@ Installing Nightly Packages
 .. warning::
     These packages are not official releases. Use them at your own risk.
 
-PyArrow has nightly wheels and Conda packages for testing purposes.
+PyArrow has nightly wheels for testing purposes hosted at
+`scientific-python-nightly-wheels
+<https://anaconda.org/scientific-python-nightly-wheels/pyarrow>`_.
 
 These may be suitable for downstream libraries in their continuous integration
 setup to maintain compatibility with the upcoming PyArrow features,
-deprecations and/or feature removals.
+deprecations, and/or feature removals.
 
-Install the development version of PyArrow from `arrow-nightlies
-<https://anaconda.org/arrow-nightlies/pyarrow>`_ conda channel:
+To install the most recent nightly version of PyArrow, run:
 
 .. code-block:: bash
 
-    conda install -c arrow-nightlies pyarrow
-
-Note that this requires to use the ``conda-forge`` channel for all other
-packages (``conda config --add channels conda-forge``).
-
-Install the development version from an `alternative PyPI
-<https://gemfury.com/arrow-nightlies>`_ index:
-
-.. code-block:: bash
-
-    pip install --extra-index-url https://pypi.fury.io/arrow-nightlies/ \
-        --prefer-binary --pre pyarrow
+    pip install \
+      -i https://pypi.anaconda.org/scientific-python-nightly-wheels/simple \
+      pyarrow
diff --git a/docs/source/format/Columnar.rst b/docs/source/format/Columnar.rst
index 9ef6a933528f8..e1603e8d8e3f6 100644
--- a/docs/source/format/Columnar.rst
+++ b/docs/source/format/Columnar.rst
@@ -693,7 +693,7 @@ having logical values::
 
 It may have the following representation: ::
 
-    * Length: 4, Null count: 1
+    * Length: 5, Null count: 1
     * Validity bitmap buffer:
 
       | Byte 0 (validity bitmap) | Bytes 1-63            |
diff --git a/js/package.json b/js/package.json
index 17bb95b7a5f51..e0dffc7f48590 100644
--- a/js/package.json
+++ b/js/package.json
@@ -120,5 +120,5 @@
   "engines": {
     "node": ">=12.0"
   },
-  "version": "19.0.0-SNAPSHOT"
+  "version": "20.0.0-SNAPSHOT"
 }
diff --git a/js/src/util/bn.ts b/js/src/util/bn.ts
index 8f6dfe258fc8d..8eed3909246d3 100644
--- a/js/src/util/bn.ts
+++ b/js/src/util/bn.ts
@@ -91,11 +91,14 @@ export function bigNumToNumber<T extends BN<BigNumArray>>(bn: T, scale?: number)
             number |= word * (BigInt(1) << BigInt(64 * i++));
         }
     }
-    if (typeof scale === 'number') {
-        const denominator = BigInt(Math.pow(10, scale));
+    if (typeof scale === 'number' && scale > 0) {
+        const denominator = BigInt('1'.padEnd(scale + 1, '0'));
         const quotient = number / denominator;
-        const remainder = number % denominator;
-        return bigIntToNumber(quotient) + (bigIntToNumber(remainder) / bigIntToNumber(denominator));
+        const remainder = negative? -(number % denominator) : number % denominator;
+        const integerPart = bigIntToNumber(quotient);
+        const fractionPart = `${remainder}`.padStart(scale, '0');
+        const sign = negative && integerPart === 0 ? '-' : '';
+        return +`${sign}${integerPart}.${fractionPart}`;
     }
     return bigIntToNumber(number);
 }
diff --git a/js/test/unit/bn-tests.ts b/js/test/unit/bn-tests.ts
index 2ea8f6055db2c..510b8c2cd664e 100644
--- a/js/test/unit/bn-tests.ts
+++ b/js/test/unit/bn-tests.ts
@@ -16,7 +16,7 @@
 // under the License.
 
 import * as Arrow from 'apache-arrow';
-const { BN } = Arrow.util;
+const { BN, bigNumToNumber } = Arrow.util;
 
 describe(`BN`, () => {
     test(`to detect signed numbers, unsigned numbers and decimals`, () => {
@@ -98,4 +98,15 @@ describe(`BN`, () => {
         // const n6 = new BN(new Uint32Array([0x00000000, 0x00000000, 0x00000000, 0x80000000]), false);
         // expect(n6.valueOf(1)).toBe(1.7014118346046923e+37);
     });
+
+    test(`bigNumToNumber`, () => {
+        const n1 = new BN(new Uint32Array([3, 2, 1, 0]));
+        expect(() => bigNumToNumber(n1)).toThrow('18446744082299486211');
+        /* eslint-disable @typescript-eslint/no-loss-of-precision */
+        expect(bigNumToNumber(n1, 10)).toBeCloseTo(1844674408.2299486);
+        expect(bigNumToNumber(n1, 15)).toBeCloseTo(18446.744082299486);
+        expect(bigNumToNumber(n1, 20)).toBeCloseTo(0.18446744082299486);
+        expect(bigNumToNumber(n1, 25)).toBeCloseTo(0.0000018446744082299486);
+        /* eslint-enable @typescript-eslint/no-loss-of-precision */
+    });
 });
diff --git a/matlab/CMakeLists.txt b/matlab/CMakeLists.txt
index c11decb5a77a2..cd6d8768d03ee 100644
--- a/matlab/CMakeLists.txt
+++ b/matlab/CMakeLists.txt
@@ -100,7 +100,7 @@ endfunction()
 
 set(CMAKE_CXX_STANDARD 17)
 
-set(MLARROW_VERSION "19.0.0-SNAPSHOT")
+set(MLARROW_VERSION "20.0.0-SNAPSHOT")
 string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" MLARROW_BASE_VERSION "${MLARROW_VERSION}")
 
 project(mlarrow VERSION "${MLARROW_BASE_VERSION}")
diff --git a/matlab/src/cpp/arrow/matlab/error/error.h b/matlab/src/cpp/arrow/matlab/error/error.h
index 425e089d9f2f9..47bde56dacf8d 100644
--- a/matlab/src/cpp/arrow/matlab/error/error.h
+++ b/matlab/src/cpp/arrow/matlab/error/error.h
@@ -247,6 +247,8 @@ static const char* IPC_RECORD_BATCH_WRITE_FAILED =
 static const char* IPC_RECORD_BATCH_WRITE_CLOSE_FAILED = "arrow:io:ipc:CloseFailed";
 static const char* IPC_RECORD_BATCH_READER_OPEN_FAILED =
     "arrow:io:ipc:FailedToOpenRecordBatchReader";
+static const char* IPC_RECORD_BATCH_READER_INVALID_CONSTRUCTION_TYPE =
+    "arrow:io:ipc:InvalidConstructionType";
 static const char* IPC_RECORD_BATCH_READ_INVALID_INDEX = "arrow:io:ipc:InvalidIndex";
 static const char* IPC_RECORD_BATCH_READ_FAILED = "arrow:io:ipc:ReadFailed";
 static const char* IPC_TABLE_READ_FAILED = "arrow:io:ipc:TableReadFailed";
diff --git a/matlab/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_stream_reader.cc b/matlab/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_stream_reader.cc
index f3c833484d38e..1704eee8f7a7e 100644
--- a/matlab/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_stream_reader.cc
+++ b/matlab/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_stream_reader.cc
@@ -17,6 +17,8 @@
 
 #include "arrow/matlab/io/ipc/proxy/record_batch_stream_reader.h"
 #include "arrow/io/file.h"
+#include "arrow/io/memory.h"
+#include "arrow/matlab/buffer/matlab_buffer.h"
 #include "arrow/matlab/error/error.h"
 #include "arrow/matlab/tabular/proxy/record_batch.h"
 #include "arrow/matlab/tabular/proxy/schema.h"
@@ -36,14 +38,13 @@ RecordBatchStreamReader::RecordBatchStreamReader(
   REGISTER_METHOD(RecordBatchStreamReader, readTable);
 }
 
-libmexclass::proxy::MakeResult RecordBatchStreamReader::make(
+libmexclass::proxy::MakeResult RecordBatchStreamReader::fromFile(
     const libmexclass::proxy::FunctionArguments& constructor_arguments) {
   namespace mda = ::matlab::data;
   using RecordBatchStreamReaderProxy =
       arrow::matlab::io::ipc::proxy::RecordBatchStreamReader;
 
   const mda::StructArray opts = constructor_arguments[0];
-
   const mda::StringArray filename_mda = opts[0]["Filename"];
   const auto filename_utf16 = std::u16string(filename_mda[0]);
   MATLAB_ASSIGN_OR_ERROR(const auto filename_utf8,
@@ -60,6 +61,43 @@ libmexclass::proxy::MakeResult RecordBatchStreamReader::make(
   return std::make_shared<RecordBatchStreamReaderProxy>(std::move(reader));
 }
 
+libmexclass::proxy::MakeResult RecordBatchStreamReader::fromBytes(
+    const libmexclass::proxy::FunctionArguments& constructor_arguments) {
+  namespace mda = ::matlab::data;
+  using RecordBatchStreamReaderProxy =
+      arrow::matlab::io::ipc::proxy::RecordBatchStreamReader;
+
+  const mda::StructArray opts = constructor_arguments[0];
+  const ::matlab::data::TypedArray<uint8_t> bytes_mda = opts[0]["Bytes"];
+  const auto matlab_buffer =
+      std::make_shared<arrow::matlab::buffer::MatlabBuffer>(bytes_mda);
+  auto buffer_reader = std::make_shared<arrow::io::BufferReader>(matlab_buffer);
+  MATLAB_ASSIGN_OR_ERROR(auto reader,
+                         arrow::ipc::RecordBatchStreamReader::Open(buffer_reader),
+                         error::IPC_RECORD_BATCH_READER_OPEN_FAILED);
+  return std::make_shared<RecordBatchStreamReaderProxy>(std::move(reader));
+}
+
+libmexclass::proxy::MakeResult RecordBatchStreamReader::make(
+    const libmexclass::proxy::FunctionArguments& constructor_arguments) {
+  namespace mda = ::matlab::data;
+  const mda::StructArray opts = constructor_arguments[0];
+
+  // Dispatch to the appropriate static "make" method depending
+  // on the input type.
+  const mda::StringArray type_mda = opts[0]["Type"];
+  const auto type_utf16 = std::u16string(type_mda[0]);
+  if (type_utf16 == u"Bytes") {
+    return RecordBatchStreamReader::fromBytes(constructor_arguments);
+  } else if (type_utf16 == u"File") {
+    return RecordBatchStreamReader::fromFile(constructor_arguments);
+  } else {
+    return libmexclass::error::Error{
+        "arrow:io:ipc:InvalidConstructionType",
+        "Invalid construction type for RecordBatchStreamReader."};
+  }
+}
+
 void RecordBatchStreamReader::getSchema(libmexclass::proxy::method::Context& context) {
   namespace mda = ::matlab::data;
   using SchemaProxy = arrow::matlab::tabular::proxy::Schema;
diff --git a/matlab/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_stream_reader.h b/matlab/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_stream_reader.h
index 56fb293987825..0492c46dc04cc 100644
--- a/matlab/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_stream_reader.h
+++ b/matlab/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_stream_reader.h
@@ -30,6 +30,10 @@ class RecordBatchStreamReader : public libmexclass::proxy::Proxy {
 
   static libmexclass::proxy::MakeResult make(
       const libmexclass::proxy::FunctionArguments& constructor_arguments);
+  static libmexclass::proxy::MakeResult fromFile(
+      const libmexclass::proxy::FunctionArguments& constructor_arguments);
+  static libmexclass::proxy::MakeResult fromBytes(
+      const libmexclass::proxy::FunctionArguments& constructor_arguments);
 
  protected:
   std::shared_ptr<arrow::ipc::RecordBatchStreamReader> reader;
diff --git a/matlab/src/matlab/+arrow/+io/+ipc/RecordBatchStreamReader.m b/matlab/src/matlab/+arrow/+io/+ipc/RecordBatchStreamReader.m
index 60ca38eba9ad5..44f70815ed07a 100644
--- a/matlab/src/matlab/+arrow/+io/+ipc/RecordBatchStreamReader.m
+++ b/matlab/src/matlab/+arrow/+io/+ipc/RecordBatchStreamReader.m
@@ -26,14 +26,34 @@
         Schema
     end
 
-    methods
-        function obj = RecordBatchStreamReader(filename)
+    methods (Static)
+        function obj = fromBytes(bytes)
+            arguments
+                bytes(:, 1) uint8
+            end
+            args = struct(Bytes=bytes, Type="Bytes");
+            proxyName = "arrow.io.ipc.proxy.RecordBatchStreamReader";
+            proxy = arrow.internal.proxy.create(proxyName, args);
+            obj = arrow.io.ipc.RecordBatchStreamReader(proxy);
+        end
+
+        function obj = fromFile(filename)
             arguments
                 filename(1, 1) string {mustBeNonzeroLengthText}
             end
-            args = struct(Filename=filename);
+            args = struct(Filename=filename, Type="File");
             proxyName = "arrow.io.ipc.proxy.RecordBatchStreamReader";
-            obj.Proxy = arrow.internal.proxy.create(proxyName, args);
+            proxy = arrow.internal.proxy.create(proxyName, args);
+            obj = arrow.io.ipc.RecordBatchStreamReader(proxy);
+        end
+    end
+
+    methods
+        function obj = RecordBatchStreamReader(proxy)
+            arguments
+                proxy(1, 1) libmexclass.proxy.Proxy
+            end
+            obj.Proxy = proxy;
         end
 
         function schema = get.Schema(obj)
diff --git a/matlab/test/arrow/io/ipc/tRecordBatchStreamReader.m b/matlab/test/arrow/io/ipc/tRecordBatchStreamReader.m
index 6ca67197739ae..ad52726f3cb7a 100644
--- a/matlab/test/arrow/io/ipc/tRecordBatchStreamReader.m
+++ b/matlab/test/arrow/io/ipc/tRecordBatchStreamReader.m
@@ -25,9 +25,39 @@
     end
 
     properties (TestParameter)
+        RecordBatchStreamReaderConstructorFcn = {@tRecordBatchStreamReader.FromBytes, @arrow.io.ipc.RecordBatchStreamReader.fromFile}
         RecordBatchReadFcn = {@read, @readRecordBatch}
     end
 
+    methods (Static)
+
+        % Read the given file into memory as an array of bytes (uint8).
+        function bytes = readBytes(filename)
+            if ismissing(filename)
+                % Simulate the behavior of fromFile when a filename
+                % that is a missing string value is supplied.
+                error(message("MATLAB:validators:mustBeNonzeroLengthText", ""))
+            end
+            fid = fopen(filename, "r");
+            try
+                bytes = fread(fid, "uint8=>uint8");
+            catch e
+                % Simulate the behavior of fromFile when an invalid
+                % filename is supplied.
+                error(message("MATLAB:validators:mustBeNonzeroLengthText", ""))
+            end
+            fclose(fid);
+        end
+
+        % Read the given file into memory as bytes and then construct a
+        % RecordBatchStreamReader from the bytes.
+        function reader = FromBytes(filename)
+            bytes = tRecordBatchStreamReader.readBytes(filename);
+            reader = arrow.io.ipc.RecordBatchStreamReader.fromBytes(bytes);
+        end
+
+    end
+
     methods(TestClassSetup)
 
         function setupDataFolder(testCase)
@@ -82,19 +112,19 @@ function setupMultipleBatchStreamFile(testCase)
 
     methods (Test)
 
-        function ZeroLengthFilenameError(testCase)
+        function ZeroLengthFilenameError(testCase, RecordBatchStreamReaderConstructorFcn)
             % Verify RecordBatchStreamReader throws an exception with the
             % identifier MATLAB:validators:mustBeNonzeroLengthText if the
             % filename input argument given is a zero length string.
-            fcn = @() arrow.io.ipc.RecordBatchStreamReader("");
+            fcn = @() RecordBatchStreamReaderConstructorFcn("");
             testCase.verifyError(fcn, "MATLAB:validators:mustBeNonzeroLengthText");
         end
 
-        function MissingStringFilenameError(testCase)
+        function MissingStringFilenameError(testCase, RecordBatchStreamReaderConstructorFcn)
             % Verify RecordBatchStreamReader throws an exception with the
             % identifier MATLAB:validators:mustBeNonzeroLengthText if the
             % filename input argument given is a missing string.
-            fcn = @() arrow.io.ipc.RecordBatchStreamReader(string(missing));
+            fcn = @() RecordBatchStreamReaderConstructorFcn(string(missing));
             testCase.verifyError(fcn, "MATLAB:validators:mustBeNonzeroLengthText");
         end
 
@@ -106,43 +136,43 @@ function FilenameInvalidTypeError(testCase)
             testCase.verifyError(fcn, "MATLAB:validation:UnableToConvert");
         end
 
-        function Schema(testCase)
+        function Schema(testCase, RecordBatchStreamReaderConstructorFcn)
             % Verify the getter method for Schema returns the
             % expected value.
             fieldA = arrow.field("A", arrow.string());
             fieldB = arrow.field("B", arrow.float32());
             expectedSchema = arrow.schema([fieldA fieldB]);
 
-            reader = arrow.io.ipc.RecordBatchStreamReader(testCase.ZeroBatchStreamFile);
+            reader = RecordBatchStreamReaderConstructorFcn(testCase.ZeroBatchStreamFile);
             testCase.verifyEqual(reader.Schema, expectedSchema);
 
-            reader = arrow.io.ipc.RecordBatchStreamReader(testCase.OneBatchStreamFile);
+            reader = RecordBatchStreamReaderConstructorFcn(testCase.OneBatchStreamFile);
             testCase.verifyEqual(reader.Schema, expectedSchema);
 
-            reader = arrow.io.ipc.RecordBatchStreamReader(testCase.MultipleBatchStreamFile);
+            reader = RecordBatchStreamReaderConstructorFcn(testCase.MultipleBatchStreamFile);
             testCase.verifyEqual(reader.Schema, expectedSchema);
         end
 
-        function SchemaNoSetter(testCase)
+        function SchemaNoSetter(testCase, RecordBatchStreamReaderConstructorFcn)
             % Verify the Schema property is not settable.
             fieldC = arrow.field("C", arrow.date32());
             schema = arrow.schema(fieldC);
-            reader = arrow.io.ipc.RecordBatchStreamReader(testCase.ZeroBatchStreamFile);
+            reader = RecordBatchStreamReaderConstructorFcn(testCase.ZeroBatchStreamFile);
             testCase.verifyError(@() setfield(reader, "Schema", schema), "MATLAB:class:SetProhibited");
         end
 
-        function ReadErrorIfEndOfStream(testCase, RecordBatchReadFcn)
+        function ReadErrorIfEndOfStream(testCase, RecordBatchStreamReaderConstructorFcn, RecordBatchReadFcn)
             % Verify read throws an execption with the identifier arrow:io:ipc:EndOfStream
             % on an Arrow IPC Stream file containing zero batches.
-            reader = arrow.io.ipc.RecordBatchStreamReader(testCase.ZeroBatchStreamFile);
+            reader = RecordBatchStreamReaderConstructorFcn(testCase.ZeroBatchStreamFile);
             fcn = @() RecordBatchReadFcn(reader);
             testCase.verifyError(fcn, "arrow:io:ipc:EndOfStream");
         end
 
-        function ReadOneBatchStreamFile(testCase, RecordBatchReadFcn)
+        function ReadOneBatchStreamFile(testCase, RecordBatchStreamReaderConstructorFcn, RecordBatchReadFcn)
             % Verify read can successfully read an Arrow IPC Stream file
             % containing one batch.
-            reader = arrow.io.ipc.RecordBatchStreamReader(testCase.OneBatchStreamFile);
+            reader = RecordBatchStreamReaderConstructorFcn(testCase.OneBatchStreamFile);
 
             expectedMatlabTable = table(["Row1"; "Row2"], single([1; 2]), VariableNames=["A", "B"]);
             expected = arrow.recordBatch(expectedMatlabTable);
@@ -153,10 +183,10 @@ function ReadOneBatchStreamFile(testCase, RecordBatchReadFcn)
             testCase.verifyError(fcn, "arrow:io:ipc:EndOfStream");
         end
 
-        function ReadMultipleBatchStreamFile(testCase, RecordBatchReadFcn)
+        function ReadMultipleBatchStreamFile(testCase, RecordBatchStreamReaderConstructorFcn, RecordBatchReadFcn)
             % Verify read can successfully read an Arrow IPC Stream file
             % containing mulitple batches.
-            reader = arrow.io.ipc.RecordBatchStreamReader(testCase.MultipleBatchStreamFile);
+            reader = RecordBatchStreamReaderConstructorFcn(testCase.MultipleBatchStreamFile);
 
             expectedMatlabTable1 = table(["Row1"; "Row2"], single([1; 2]), VariableNames=["A", "B"]);
             expected1 = arrow.recordBatch(expectedMatlabTable1);
@@ -172,12 +202,12 @@ function ReadMultipleBatchStreamFile(testCase, RecordBatchReadFcn)
             testCase.verifyError(fcn, "arrow:io:ipc:EndOfStream");
         end
 
-        function HasNext(testCase, RecordBatchReadFcn)
+        function HasNext(testCase, RecordBatchStreamReaderConstructorFcn, RecordBatchReadFcn)
             % Verify that the hasnext method returns true the correct
             % number of times depending on the number of record
             % batches in an Arrow IPC Stream format.
 
-            reader = arrow.io.ipc.RecordBatchStreamReader(testCase.ZeroBatchStreamFile);
+            reader = RecordBatchStreamReaderConstructorFcn(testCase.ZeroBatchStreamFile);
             % hasnext should return true 0 times for a 0 batch file.
             iterations = 0;
             while reader.hasnext()
@@ -186,7 +216,7 @@ function HasNext(testCase, RecordBatchReadFcn)
             end
             testCase.verifyEqual(iterations, 0);
 
-            reader = arrow.io.ipc.RecordBatchStreamReader(testCase.OneBatchStreamFile);
+            reader = RecordBatchStreamReaderConstructorFcn(testCase.OneBatchStreamFile);
             % hasnext should return true 1 time for a 1 batch file.
             iterations = 0;
             while reader.hasnext()
@@ -195,7 +225,7 @@ function HasNext(testCase, RecordBatchReadFcn)
             end
             testCase.verifyEqual(iterations, 1);
 
-            reader = arrow.io.ipc.RecordBatchStreamReader(testCase.MultipleBatchStreamFile);
+            reader = RecordBatchStreamReaderConstructorFcn(testCase.MultipleBatchStreamFile);
             % hasnext should return true 2 times for a 2 batch file.
             iterations = 0;
             while reader.hasnext()
@@ -205,12 +235,12 @@ function HasNext(testCase, RecordBatchReadFcn)
             testCase.verifyEqual(iterations, 2);
         end
 
-        function Done(testCase, RecordBatchReadFcn)
+        function Done(testCase, RecordBatchStreamReaderConstructorFcn, RecordBatchReadFcn)
             % Verify that the done method returns false the correct
             % number of times depending on the number of record
             % batches in an Arrow IPC Stream format.
 
-            reader = arrow.io.ipc.RecordBatchStreamReader(testCase.ZeroBatchStreamFile);
+            reader = RecordBatchStreamReaderConstructorFcn(testCase.ZeroBatchStreamFile);
             % done should return false 0 times for a 0 batch file.
             iterations = 0;
             while ~reader.done()
@@ -219,7 +249,7 @@ function Done(testCase, RecordBatchReadFcn)
             end
             testCase.verifyEqual(iterations, 0);
 
-            reader = arrow.io.ipc.RecordBatchStreamReader(testCase.OneBatchStreamFile);
+            reader = RecordBatchStreamReaderConstructorFcn(testCase.OneBatchStreamFile);
             % done should return false 1 time for a 1 batch file.
             iterations = 0;
             while ~reader.done()
@@ -228,7 +258,7 @@ function Done(testCase, RecordBatchReadFcn)
             end
             testCase.verifyEqual(iterations, 1);
 
-            reader = arrow.io.ipc.RecordBatchStreamReader(testCase.MultipleBatchStreamFile);
+            reader = RecordBatchStreamReaderConstructorFcn(testCase.MultipleBatchStreamFile);
             % done should return false 2 times for a 2 batch file.
             iterations = 0;
             while ~reader.done()
@@ -238,40 +268,40 @@ function Done(testCase, RecordBatchReadFcn)
             testCase.verifyEqual(iterations, 2);
         end
 
-        function ReadTableZeroBatchStreamFile(testCase)
+        function ReadTableZeroBatchStreamFile(testCase, RecordBatchStreamReaderConstructorFcn)
             % Verify read can successfully read an Arrow IPC Stream file
             % containing zero batches as an arrow.tabular.Table.
-            reader = arrow.io.ipc.RecordBatchStreamReader(testCase.ZeroBatchStreamFile);
+            reader = RecordBatchStreamReaderConstructorFcn(testCase.ZeroBatchStreamFile);
             matlabTable = table('Size', [0, 2], 'VariableTypes', ["string", "single"], 'VariableNames', ["A", "B"]);
             expected = arrow.table(matlabTable);
             actual = reader.readTable();
             testCase.verifyEqual(actual, expected);
         end
 
-        function ReadTableOneBatchStreamFile(testCase)
+        function ReadTableOneBatchStreamFile(testCase, RecordBatchStreamReaderConstructorFcn)
             % Verify read can successfully read an Arrow IPC Stream file
             % containing one batch as an arrow.tabular.Table.
-            reader = arrow.io.ipc.RecordBatchStreamReader(testCase.OneBatchStreamFile);
+            reader = RecordBatchStreamReaderConstructorFcn(testCase.OneBatchStreamFile);
             matlabTable = table(["Row1"; "Row2"], single([1; 2]), VariableNames=["A", "B"]);
             expected = arrow.table(matlabTable);
             actual = reader.readTable();
             testCase.verifyEqual(actual, expected);
         end
 
-        function ReadTableMultipleBatchStreamFile(testCase)
+        function ReadTableMultipleBatchStreamFile(testCase, RecordBatchStreamReaderConstructorFcn)
             % Verify read can successfully read an Arrow IPC Stream file
             % containing multiple batches as an arrow.tabular.Table.
-            reader = arrow.io.ipc.RecordBatchStreamReader(testCase.MultipleBatchStreamFile);
+            reader = RecordBatchStreamReaderConstructorFcn(testCase.MultipleBatchStreamFile);
             matlabTable = table(["Row1"; "Row2"; "Row3"; "Row4"], single([1; 2; 3; 4]), VariableNames=["A", "B"]);
             expected = arrow.table(matlabTable);
             actual = reader.readTable();
             testCase.verifyEqual(actual, expected);
         end
 
-        function ReadTableAfterReadRecordBatch(testCase, RecordBatchReadFcn)
+        function ReadTableAfterReadRecordBatch(testCase, RecordBatchStreamReaderConstructorFcn, RecordBatchReadFcn)
             % Verify readTable returns only the remaining record batches
             % in an Arrow IPC Stream file after calling readRecordBatch first.
-            reader = arrow.io.ipc.RecordBatchStreamReader(testCase.MultipleBatchStreamFile);
+            reader = RecordBatchStreamReaderConstructorFcn(testCase.MultipleBatchStreamFile);
 
             testCase.verifyTrue(reader.hasnext());
             testCase.verifyFalse(reader.done());
@@ -292,10 +322,10 @@ function ReadTableAfterReadRecordBatch(testCase, RecordBatchReadFcn)
             testCase.verifyTrue(reader.done());
         end
 
-        function ReadTableMultipleCalls(testCase)
+        function ReadTableMultipleCalls(testCase, RecordBatchStreamReaderConstructorFcn)
             % Verify readTable returns an empty table if it is called
             % multiple times in a row.
-            reader = arrow.io.ipc.RecordBatchStreamReader(testCase.MultipleBatchStreamFile);
+            reader = RecordBatchStreamReaderConstructorFcn(testCase.MultipleBatchStreamFile);
 
             expected = arrow.table(...
                 table(["Row1"; "Row2"; "Row3"; "Row4"], single([1; 2; 3; 4]), VariableNames=["A", "B"]) ...
@@ -323,14 +353,22 @@ function ReadTableMultipleCalls(testCase)
             testCase.verifyTrue(reader.done());
         end
 
-        function ErrorIfNotIpcStreamFile(testCase)
+        function ErrorIfNotIpcStreamFile(testCase, RecordBatchStreamReaderConstructorFcn)
             % Verify RecordBatchStreamReader throws an exception with the
             % identifier arrow:io:ipc:FailedToOpenRecordBatchReader if
             % the provided file is not an Arrow IPC Stream file.
-            fcn = @() arrow.io.ipc.RecordBatchStreamReader(testCase.RandomAccessFile);
+            fcn = @() RecordBatchStreamReaderConstructorFcn(testCase.RandomAccessFile);
             testCase.verifyError(fcn, "arrow:io:ipc:FailedToOpenRecordBatchReader");
         end
 
+        function ErrorIfNotProxy(testCase)
+            % Verify the RecordBatchStreamReader constructor throws an exception
+            % with the identifier MATLAB:validation:UnableToConvert if the input
+            % is not a Proxy object.
+            fcn = @() arrow.io.ipc.RecordBatchStreamReader(testCase.RandomAccessFile);
+            testCase.verifyError(fcn, "MATLAB:validation:UnableToConvert");
+        end
+
     end
 
 end
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 80d1cd31ac231..0b0275f200d9d 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -28,7 +28,7 @@ project(pyarrow)
 # which in turn meant that Py_GIL_DISABLED was not set.
 set(CMAKE_NO_SYSTEM_FROM_IMPORTED ON)
 
-set(PYARROW_VERSION "19.0.0-SNAPSHOT")
+set(PYARROW_VERSION "20.0.0-SNAPSHOT")
 string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" PYARROW_BASE_VERSION "${PYARROW_VERSION}")
 
 # Generate SO version and full SO version
@@ -200,10 +200,6 @@ if(MSVC)
   # seem harmless, and probably not worth the effort of working around it
   string(APPEND CMAKE_CXX_FLAGS " /wd4800")
 
-  # See https://github.com/cython/cython/issues/2731. Change introduced in
-  # Cython 0.29.1 causes "unsafe use of type 'bool' in operation"
-  string(APPEND CMAKE_CXX_FLAGS " /wd4804")
-
   # See https://github.com/cython/cython/issues/4445.
   #
   # Cython 3 emits "(void)__Pyx_PyObject_CallMethod0;" to suppress a
diff --git a/python/pyarrow/_azurefs.pyx b/python/pyarrow/_azurefs.pyx
index 5cd6300c18c6a..60c326d280e2f 100644
--- a/python/pyarrow/_azurefs.pyx
+++ b/python/pyarrow/_azurefs.pyx
@@ -17,8 +17,6 @@
 
 # cython: language_level = 3
 
-from cython cimport binding
-
 
 from pyarrow.lib import frombytes, tobytes
 from pyarrow.includes.libarrow_fs cimport *
@@ -115,7 +113,6 @@ cdef class AzureFileSystem(FileSystem):
         self.azurefs = <CAzureFileSystem*> wrapped.get()
 
     @staticmethod
-    @binding(True)  # Required for cython < 3
     def _reconstruct(kwargs):
         # __reduce__ doesn't allow passing named arguments directly to the
         # reconstructor, hence this wrapper.
diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx
index 8fe9f30d33af9..863c928591937 100644
--- a/python/pyarrow/_dataset_parquet.pyx
+++ b/python/pyarrow/_dataset_parquet.pyx
@@ -19,7 +19,6 @@
 
 """Dataset support for Parquet file format."""
 
-from cython cimport binding
 from cython.operator cimport dereference as deref
 
 import os
@@ -890,7 +889,6 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions):
         return attrs == other_attrs
 
     @staticmethod
-    @binding(True)  # Required for Cython < 3
     def _reconstruct(kwargs):
         # __reduce__ doesn't allow passing named arguments directly to the
         # reconstructor, hence this wrapper.
diff --git a/python/pyarrow/_fs.pyx b/python/pyarrow/_fs.pyx
index e315dd6381f41..2b1f0e7eb3189 100644
--- a/python/pyarrow/_fs.pyx
+++ b/python/pyarrow/_fs.pyx
@@ -18,7 +18,6 @@
 # cython: language_level = 3
 
 from cpython.datetime cimport datetime, PyDateTime_DateTime
-from cython cimport binding
 
 from pyarrow.includes.common cimport *
 from pyarrow.includes.libarrow_python cimport PyDateTime_to_TimePoint
@@ -422,7 +421,6 @@ cdef class FileSystem(_Weakrefable):
                         "SubTreeFileSystem")
 
     @staticmethod
-    @binding(True)  # Required for cython < 3
     def _from_uri(uri):
         fs, _path = FileSystem.from_uri(uri)
         return fs
diff --git a/python/pyarrow/_gcsfs.pyx b/python/pyarrow/_gcsfs.pyx
index 5e69413cea953..a5a366fe9cc67 100644
--- a/python/pyarrow/_gcsfs.pyx
+++ b/python/pyarrow/_gcsfs.pyx
@@ -17,8 +17,6 @@
 
 # cython: language_level = 3
 
-from cython cimport binding
-
 from pyarrow.lib cimport (pyarrow_wrap_metadata,
                           pyarrow_unwrap_metadata)
 from pyarrow.lib import frombytes, tobytes, ensure_metadata
@@ -164,7 +162,6 @@ cdef class GcsFileSystem(FileSystem):
         return datetime.fromtimestamp(expiration_ns / 1.0e9, timezone.utc)
 
     @staticmethod
-    @binding(True)  # Required for cython < 3
     def _reconstruct(kwargs):
         # __reduce__ doesn't allow passing named arguments directly to the
         # reconstructor, hence this wrapper.
diff --git a/python/pyarrow/_hdfs.pyx b/python/pyarrow/_hdfs.pyx
index c426337a12ec1..0a1a5483bda32 100644
--- a/python/pyarrow/_hdfs.pyx
+++ b/python/pyarrow/_hdfs.pyx
@@ -17,8 +17,6 @@
 
 # cython: language_level = 3
 
-from cython cimport binding
-
 from pyarrow.includes.common cimport *
 from pyarrow.includes.libarrow cimport *
 from pyarrow.includes.libarrow_fs cimport *
@@ -137,7 +135,6 @@ replication=1)``
         return self
 
     @staticmethod
-    @binding(True)  # Required for cython < 3
     def _reconstruct(kwargs):
         # __reduce__ doesn't allow passing named arguments directly to the
         # reconstructor, hence this wrapper.
diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd
index d6aebd8284f4a..c17c3b70d7f41 100644
--- a/python/pyarrow/_parquet.pxd
+++ b/python/pyarrow/_parquet.pxd
@@ -484,11 +484,9 @@ cdef extern from "parquet/arrow/reader.h" namespace "parquet::arrow" nogil:
                               const vector[int]& column_indices,
                               shared_ptr[CTable]* out)
 
-        CStatus GetRecordBatchReader(const vector[int]& row_group_indices,
-                                     const vector[int]& column_indices,
-                                     unique_ptr[CRecordBatchReader]* out)
-        CStatus GetRecordBatchReader(const vector[int]& row_group_indices,
-                                     unique_ptr[CRecordBatchReader]* out)
+        CResult[unique_ptr[CRecordBatchReader]] GetRecordBatchReader(const vector[int]& row_group_indices,
+                                                                     const vector[int]& column_indices)
+        CResult[unique_ptr[CRecordBatchReader]] GetRecordBatchReader(const vector[int]& row_group_indices)
 
         CStatus ReadTable(shared_ptr[CTable]* out)
         CStatus ReadTable(const vector[int]& column_indices,
@@ -556,7 +554,7 @@ cdef extern from "parquet/arrow/writer.h" namespace "parquet::arrow" nogil:
                                              const shared_ptr[ArrowWriterProperties]& arrow_properties)
 
         CStatus WriteTable(const CTable& table, int64_t chunk_size)
-        CStatus NewRowGroup(int64_t chunk_size)
+        CStatus NewRowGroup()
         CStatus Close()
         CStatus AddKeyValueMetadata(const shared_ptr[const CKeyValueMetadata]& key_value_metadata)
 
diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index a3abf1865b7b5..2fb1e41641f8e 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -1616,16 +1616,16 @@ cdef class ParquetReader(_Weakrefable):
             for index in column_indices:
                 c_column_indices.push_back(index)
             with nogil:
-                check_status(
+                recordbatchreader = GetResultValue(
                     self.reader.get().GetRecordBatchReader(
-                        c_row_groups, c_column_indices, &recordbatchreader
+                        c_row_groups, c_column_indices
                     )
                 )
         else:
             with nogil:
-                check_status(
+                recordbatchreader = GetResultValue(
                     self.reader.get().GetRecordBatchReader(
-                        c_row_groups, &recordbatchreader
+                        c_row_groups
                     )
                 )
 
diff --git a/python/pyarrow/_s3fs.pyx b/python/pyarrow/_s3fs.pyx
index 038f9109d8152..2a7cf6dfdd143 100644
--- a/python/pyarrow/_s3fs.pyx
+++ b/python/pyarrow/_s3fs.pyx
@@ -17,8 +17,6 @@
 
 # cython: language_level = 3
 
-from cython cimport binding
-
 from pyarrow.lib cimport (check_status, pyarrow_wrap_metadata,
                           pyarrow_unwrap_metadata)
 from pyarrow.lib import frombytes, tobytes, KeyValueMetadata
@@ -419,7 +417,6 @@ cdef class S3FileSystem(FileSystem):
         self.s3fs = <CS3FileSystem*> wrapped.get()
 
     @staticmethod
-    @binding(True)  # Required for cython < 3
     def _reconstruct(kwargs):
         # __reduce__ doesn't allow passing named arguments directly to the
         # reconstructor, hence this wrapper.
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index b3de15067fbfa..89015dadba5ae 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -2370,7 +2370,6 @@ cdef class CacheOptions(_Weakrefable):
             ideal_bandwidth_utilization_frac, max_ideal_request_size_mib))
 
     @staticmethod
-    @binding(True)  # Required for Cython < 3
     def _reconstruct(kwargs):
         # __reduce__ doesn't allow passing named arguments directly to the
         # reconstructor, hence this wrapper.
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index 2235cd0b981a6..e877b0965d139 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -16,7 +16,6 @@
 # under the License.
 
 import collections
-from cython cimport binding
 from uuid import UUID
 
 
@@ -895,7 +894,6 @@ cdef class DictionaryScalar(Scalar):
     """
 
     @staticmethod
-    @binding(True)  # Required for cython < 3
     def _reconstruct(type, is_valid, index, dictionary):
         cdef:
             CDictionaryScalarIndexAndDictionary value
diff --git a/python/pyarrow/src/arrow/python/python_test.cc b/python/pyarrow/src/arrow/python/python_test.cc
index eea6bf9459d1f..f988f8da31cb1 100644
--- a/python/pyarrow/src/arrow/python/python_test.cc
+++ b/python/pyarrow/src/arrow/python/python_test.cc
@@ -663,7 +663,7 @@ Status TestDecimal128OverflowFails() {
   ASSERT_EQ(38, metadata.precision());
   ASSERT_EQ(1, metadata.scale());
 
-  auto type = ::arrow::decimal(38, 38);
+  auto type = ::arrow::smallest_decimal(38, 38);
   const auto& decimal_type = checked_cast<const DecimalType&>(*type);
   ASSERT_RAISES(Invalid,
                 internal::DecimalFromPythonDecimal(python_decimal, decimal_type, &value));
@@ -689,7 +689,7 @@ Status TestDecimal256OverflowFails() {
   ASSERT_EQ(76, metadata.precision());
   ASSERT_EQ(1, metadata.scale());
 
-  auto type = ::arrow::decimal(76, 76);
+  auto type = ::arrow::smallest_decimal(76, 76);
   const auto& decimal_type = checked_cast<const DecimalType&>(*type);
   ASSERT_RAISES(Invalid,
                 internal::DecimalFromPythonDecimal(python_decimal, decimal_type, &value));
diff --git a/python/pyarrow/tests/test_orc.py b/python/pyarrow/tests/test_orc.py
index b0f9e813b103d..706fb3fe45cea 100644
--- a/python/pyarrow/tests/test_orc.py
+++ b/python/pyarrow/tests/test_orc.py
@@ -334,7 +334,7 @@ def test_buffer_readwrite_with_writeoptions():
         compression='snappy',
         file_version='0.11',
         row_index_stride=5000,
-        compression_block_size=32768,
+        compression_block_size=65536,
     )
     buffer_reader = pa.BufferReader(buffer_output_stream.getvalue())
     orc_file = orc.ORCFile(buffer_reader)
@@ -344,7 +344,7 @@ def test_buffer_readwrite_with_writeoptions():
     assert orc_file.compression == 'SNAPPY'
     assert orc_file.file_version == '0.11'
     assert orc_file.row_index_stride == 5000
-    assert orc_file.compression_size == 32768
+    assert orc_file.compression_size == 65536
 
     # deprecated keyword order
     buffer_output_stream = pa.BufferOutputStream()
@@ -355,7 +355,7 @@ def test_buffer_readwrite_with_writeoptions():
             compression='uncompressed',
             file_version='0.11',
             row_index_stride=20000,
-            compression_block_size=16384,
+            compression_block_size=65536,
         )
     buffer_reader = pa.BufferReader(buffer_output_stream.getvalue())
     orc_file = orc.ORCFile(buffer_reader)
@@ -365,7 +365,7 @@ def test_buffer_readwrite_with_writeoptions():
     assert orc_file.compression == 'UNCOMPRESSED'
     assert orc_file.file_version == '0.11'
     assert orc_file.row_index_stride == 20000
-    assert orc_file.compression_size == 16384
+    assert orc_file.compression_size == 65536
 
 
 def test_buffer_readwrite_with_bad_writeoptions():
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 85bdbec0915ed..567fc96f78ebf 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -17,7 +17,7 @@
 
 [build-system]
 requires = [
-    "cython >= 0.29.31",
+    "cython >= 3",
     # Starting with NumPy 1.25, NumPy is (by default) as far back compatible
     # as oldest-support-numpy was (customizable with a NPY_TARGET_VERSION
     # define).  For older Python versions (where NumPy 1.25 is not yet available)
@@ -81,4 +81,4 @@ root = '..'
 version_file = 'pyarrow/_generated_version.py'
 version_scheme = 'guess-next-dev'
 git_describe_command = 'git describe --dirty --tags --long --match "apache-arrow-[0-9]*.*"'
-fallback_version = '19.0.0a0'
+fallback_version = '20.0.0a0'
diff --git a/python/requirements-build.txt b/python/requirements-build.txt
index c150c842a0cc6..00b523ee52120 100644
--- a/python/requirements-build.txt
+++ b/python/requirements-build.txt
@@ -1,4 +1,4 @@
-cython>=0.29.31
+cython>=3
 oldest-supported-numpy>=0.14; python_version<'3.9'
 numpy>=1.25; python_version>='3.9'
 setuptools_scm>=8
diff --git a/python/requirements-wheel-build.txt b/python/requirements-wheel-build.txt
index faa078d3d7fe7..38e59ce45ab8e 100644
--- a/python/requirements-wheel-build.txt
+++ b/python/requirements-wheel-build.txt
@@ -1,4 +1,4 @@
-cython>=0.29.31
+cython>=3
 oldest-supported-numpy>=0.14; python_version<'3.9'
 numpy>=2.0.0; python_version>='3.9'
 setuptools_scm
diff --git a/python/setup.py b/python/setup.py
index 60b9a696d9785..bd658b91a53a5 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -48,9 +48,9 @@
 )
 
 
-if Cython.__version__ < '0.29.31':
+if Cython.__version__ < '3':
     raise Exception(
-        'Please update your Cython version. Supported Cython >= 0.29.31')
+        'Please update your Cython version. Supported Cython >= 3')
 
 setup_dir = os.path.abspath(os.path.dirname(__file__))
 
diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index b45022f5aa90b..1500978a0ff87 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: arrow
 Title: Integration to 'Apache' 'Arrow'
-Version: 18.1.0.9000
+Version: 19.0.0.9000
 Authors@R: c(
     person("Neal", "Richardson", email = "neal.p.richardson@gmail.com", role = c("aut")),
     person("Ian", "Cook", email = "ianmcook@gmail.com", role = c("aut")),
diff --git a/r/NEWS.md b/r/NEWS.md
index 83d09157b9038..fbe66aa14bf68 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -17,7 +17,9 @@
   under the License.
 -->
 
-# arrow 18.1.0.9000
+# arrow 19.0.0.9000
+
+# arrow 19.0.0
 
 # arrow 18.1.0
 
diff --git a/r/pkgdown/assets/versions.html b/r/pkgdown/assets/versions.html
index b9148ef4ff092..cbeff74fa4ce4 100644
--- a/r/pkgdown/assets/versions.html
+++ b/r/pkgdown/assets/versions.html
@@ -1,7 +1,8 @@
 <!DOCTYPE html>
 <html>
-<body><p><a href="../dev/r/">18.1.0.9000 (dev)</a></p>
-<p><a href="../r/">18.1.0 (release)</a></p>
+<body><p><a href="../dev/r/">19.0.0.9000 (dev)</a></p>
+<p><a href="../r/">19.0.0 (release)</a></p>
+<p><a href="../18.1/r/">18.1.0</a></p>
 <p><a href="../17.0/r/">17.0.0</a></p>
 <p><a href="../16.1/r/">16.1.0</a></p>
 <p><a href="../16.0/r/">16.0.0</a></p>
diff --git a/r/pkgdown/assets/versions.json b/r/pkgdown/assets/versions.json
index 8c8d1203f01a1..105c4d2f3d0e2 100644
--- a/r/pkgdown/assets/versions.json
+++ b/r/pkgdown/assets/versions.json
@@ -1,12 +1,16 @@
 [
     {
-        "name": "18.1.0.9000 (dev)",
+        "name": "19.0.0.9000 (dev)",
         "version": "dev/"
     },
     {
-        "name": "18.1.0 (release)",
+        "name": "19.0.0 (release)",
         "version": ""
     },
+    {
+        "name": "18.1.0",
+        "version": "18.1/"
+    },
     {
         "name": "17.0.0",
         "version": "17.0/"
diff --git a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb
index 284afd290f655..548622e7400a6 100644
--- a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb
+++ b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module ArrowCUDA
-  VERSION = "19.0.0-SNAPSHOT"
+  VERSION = "20.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb
index 26cf368019f68..64f8570f3d206 100644
--- a/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb
+++ b/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module ArrowDataset
-  VERSION = "19.0.0-SNAPSHOT"
+  VERSION = "20.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb b/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb
index a4fabe6737aa8..ddd70bf43e1fe 100644
--- a/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb
+++ b/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module ArrowFlightSQL
-  VERSION = "19.0.0-SNAPSHOT"
+  VERSION = "20.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-arrow-flight/lib/arrow-flight/version.rb b/ruby/red-arrow-flight/lib/arrow-flight/version.rb
index a497405047e0d..f3970286b69c5 100644
--- a/ruby/red-arrow-flight/lib/arrow-flight/version.rb
+++ b/ruby/red-arrow-flight/lib/arrow-flight/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module ArrowFlight
-  VERSION = "19.0.0-SNAPSHOT"
+  VERSION = "20.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-arrow/lib/arrow/version.rb b/ruby/red-arrow/lib/arrow/version.rb
index 3033a470dd697..eaeec61e7fedf 100644
--- a/ruby/red-arrow/lib/arrow/version.rb
+++ b/ruby/red-arrow/lib/arrow/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module Arrow
-  VERSION = "19.0.0-SNAPSHOT"
+  VERSION = "20.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-gandiva/lib/gandiva/version.rb b/ruby/red-gandiva/lib/gandiva/version.rb
index d8e7688f2b454..657023bed0f69 100644
--- a/ruby/red-gandiva/lib/gandiva/version.rb
+++ b/ruby/red-gandiva/lib/gandiva/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module Gandiva
-  VERSION = "19.0.0-SNAPSHOT"
+  VERSION = "20.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-parquet/lib/parquet/version.rb b/ruby/red-parquet/lib/parquet/version.rb
index 836e3ce846c86..ecc9a975b0b54 100644
--- a/ruby/red-parquet/lib/parquet/version.rb
+++ b/ruby/red-parquet/lib/parquet/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module Parquet
-  VERSION = "19.0.0-SNAPSHOT"
+  VERSION = "20.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")