diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml
index cb783dd66c3fb..c698baba2c816 100644
--- a/.github/workflows/archery.yml
+++ b/.github/workflows/archery.yml
@@ -32,7 +32,9 @@ on:
       - 'docker-compose.yml'
 
 env:
+  ARCHERY_DEBUG: 1
   ARCHERY_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
+  ARCHERY_USE_DOCKER_CLI: 1
 
 concurrency:
   group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
@@ -59,7 +61,7 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@v5.1.0
         with:
-          python-version: '3.12'
+          python-version: '3.9'
       - name: Install pygit2 binary wheel
         run: pip install pygit2 --only-binary pygit2
       - name: Install Archery, Crossbow- and Test Dependencies
diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index e8e41f1bcb90c..1d10be3b5bc82 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -53,6 +53,7 @@ permissions:
   contents: read
 
 env:
+  ARCHERY_DEBUG: 1
   ARROW_ENABLE_TIMING_TESTS: OFF
   DOCKER_VOLUME_PREFIX: ".docker/"
 
@@ -94,6 +95,7 @@ jobs:
             cat <<JSON >> "$GITHUB_OUTPUT"
           {
             "arch": "arm64v8",
+            "archery-use-docker-cli": "0",
             "clang-tools": "10",
             "image": "ubuntu-cpp",
             "llvm": "10",
@@ -118,6 +120,9 @@ jobs:
         include: ${{ fromJson(needs.docker-targets.outputs.targets) }}
     env:
       ARCH: ${{ matrix.arch }}
+      # By default, use Docker CLI because docker-compose v1 is obsolete,
+      # except where the Docker client version is too old.
+      ARCHERY_USE_DOCKER_CLI: ${{ matrix.archery-use-docker-cli || '1' }}
       ARROW_SIMD_LEVEL: ${{ matrix.simd-level }}
       CLANG_TOOLS: ${{ matrix.clang-tools }}
       LLVM: ${{ matrix.llvm }}
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index 37fda2e313ae2..8af5832f15948 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -29,6 +29,10 @@ concurrency:
 permissions:
   contents: read
 
+env:
+  ARCHERY_DEBUG: 1
+  ARCHERY_USE_DOCKER_CLI: 1
+
 jobs:
 
   lint:
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 9c7701f25f756..fe49e275d908d 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -24,6 +24,8 @@ permissions:
   contents: read
 
 env:
+  ARCHERY_DEBUG: 1
+  ARCHERY_USE_DOCKER_CLI: 1
   ARROW_ENABLE_TIMING_TESTS: OFF
   DOCKER_VOLUME_PREFIX: ".docker/"
 
diff --git a/.github/workflows/docs_light.yml b/.github/workflows/docs_light.yml
index 6ec4c3d53d0e3..376c87651d2d0 100644
--- a/.github/workflows/docs_light.yml
+++ b/.github/workflows/docs_light.yml
@@ -33,6 +33,8 @@ permissions:
   contents: read
   
 env:
+  ARCHERY_DEBUG: 1
+  ARCHERY_USE_DOCKER_CLI: 1
   ARROW_ENABLE_TIMING_TESTS: OFF
   DOCKER_VOLUME_PREFIX: ".docker/"
 
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index 7fca38528260f..11dc29dcae54e 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -41,6 +41,10 @@ concurrency:
 permissions:
   contents: read
 
+env:
+  ARCHERY_DEBUG: 1
+  ARCHERY_USE_DOCKER_CLI: 1
+
 jobs:
 
   docker-targets:
@@ -75,12 +79,14 @@ jobs:
           {
             "arch-label": "ARM64",
             "arch": "arm64v8",
+            "archery-use-docker-cli": "0",
             "go": "1.21",
             "runs-on": ["self-hosted", "arm", "linux"]
           },
           {
             "arch-label": "ARM64",
             "arch": "arm64v8",
+            "archery-use-docker-cli": "0",
             "go": "1.22",
             "runs-on": ["self-hosted", "arm", "linux"]
           }
@@ -101,6 +107,9 @@ jobs:
         include: ${{ fromJson(needs.docker-targets.outputs.targets) }}
     env:
       ARCH: ${{ matrix.arch }}
+      # By default, use Docker CLI because docker-compose v1 is obsolete,
+      # except where the Docker client version is too old.
+      ARCHERY_USE_DOCKER_CLI: ${{ matrix.archery-use-docker-cli || '1' }}
       GO: ${{ matrix.go }}
     steps:
       - name: Checkout Arrow
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 0f186ff6a4527..2c3499c160f9c 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -51,6 +51,8 @@ permissions:
   contents: read
 
 env:
+  ARCHERY_DEBUG: 1
+  ARCHERY_USE_DOCKER_CLI: 1
   DOCKER_VOLUME_PREFIX: ".docker/"
 
 jobs:
diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml
index 423f54cd93547..e92d3f4fc5877 100644
--- a/.github/workflows/java.yml
+++ b/.github/workflows/java.yml
@@ -45,6 +45,8 @@ permissions:
   contents: read
 
 env:
+  ARCHERY_DEBUG: 1
+  ARCHERY_USE_DOCKER_CLI: 1
   DOCKER_VOLUME_PREFIX: ".docker/"
 
 jobs:
@@ -56,8 +58,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        jdk: [8, 11, 17, 21]
-        maven: [3.9.5]
+        jdk: [8, 11, 17, 21, 22]
+        maven: [3.9.6]
         image: [java]
     env:
       JDK: ${{ matrix.jdk }}
diff --git a/.github/workflows/java_jni.yml b/.github/workflows/java_jni.yml
index 790ffd5c650e0..958216ac7669d 100644
--- a/.github/workflows/java_jni.yml
+++ b/.github/workflows/java_jni.yml
@@ -45,6 +45,8 @@ permissions:
   contents: read
 
 env:
+  ARCHERY_DEBUG: 1
+  ARCHERY_USE_DOCKER_CLI: 1
   DOCKER_VOLUME_PREFIX: ".docker/"
 
 jobs:
diff --git a/.github/workflows/js.yml b/.github/workflows/js.yml
index dab89da44c861..c9b7d7b742d88 100644
--- a/.github/workflows/js.yml
+++ b/.github/workflows/js.yml
@@ -38,6 +38,10 @@ concurrency:
 permissions:
   contents: read
 
+env:
+  ARCHERY_DEBUG: 1
+  ARCHERY_USE_DOCKER_CLI: 1
+
 jobs:
 
   docker:
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 1147ac13e6f93..2db9b17e895b0 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -41,6 +41,8 @@ permissions:
   contents: read
 
 env:
+  ARCHERY_DEBUG: 1
+  ARCHERY_USE_DOCKER_CLI: 1
   DOCKER_VOLUME_PREFIX: ".docker/"
 
 jobs:
diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index 78677499f3e45..05c85fa6dc2c2 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -51,6 +51,8 @@ permissions:
   contents: read
 
 env:
+  ARCHERY_DEBUG: 1
+  ARCHERY_USE_DOCKER_CLI: 1
   DOCKER_VOLUME_PREFIX: ".docker/"
 
 jobs:
diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml
index 311c1c822baf6..ea3e61d55787d 100644
--- a/.github/workflows/ruby.yml
+++ b/.github/workflows/ruby.yml
@@ -53,6 +53,8 @@ permissions:
   contents: read
 
 env:
+  ARCHERY_DEBUG: 1
+  ARCHERY_USE_DOCKER_CLI: 1
   DOCKER_VOLUME_PREFIX: ".docker/"
 
 jobs:
diff --git a/.github/workflows/swift.yml b/.github/workflows/swift.yml
index f55e9e77503c0..3f039315b505a 100644
--- a/.github/workflows/swift.yml
+++ b/.github/workflows/swift.yml
@@ -41,6 +41,8 @@ permissions:
   contents: read
 
 env:
+  ARCHERY_DEBUG: 1
+  ARCHERY_USE_DOCKER_CLI: 1
   DOCKER_VOLUME_PREFIX: ".docker/"
 
 jobs:
diff --git a/c_glib/example/vala/meson.build b/c_glib/example/vala/meson.build
index 474f0b1e9a51a..b7eb86200ddd6 100644
--- a/c_glib/example/vala/meson.build
+++ b/c_glib/example/vala/meson.build
@@ -18,11 +18,15 @@
 # under the License.
 
 if generate_vapi
+  c_flags = [
+    '-Wno-unused-but-set-variable',
+  ]
+  c_flags = meson.get_compiler('c').get_supported_arguments(c_flags)
   vala_example_executable_kwargs = {
     'c_args': [
       '-I' + project_build_root,
       '-I' + project_source_root,
-    ],
+    ] + c_flags,
     'dependencies': [
       arrow_glib_vapi,
       dependency('gio-2.0'),
diff --git a/c_glib/example/vala/read-file.vala b/c_glib/example/vala/read-file.vala
index a0a06275c4b24..287eddac76352 100644
--- a/c_glib/example/vala/read-file.vala
+++ b/c_glib/example/vala/read-file.vala
@@ -119,8 +119,8 @@ void print_array(GArrow.Array array) {
 
 void print_record_batch(GArrow.RecordBatch record_batch) {
     var n_columns = record_batch.get_n_columns();
-    for (var nth_column = 0; nth_column < n_columns; nth_column++) {
-        stdout.printf("columns[%" + int64.FORMAT + "](%s): ",
+    for (int nth_column = 0; nth_column < n_columns; nth_column++) {
+        stdout.printf("columns[%d](%s): ",
                       nth_column,
                       record_batch.get_column_name(nth_column));
         var array = record_batch.get_column_data(nth_column);
diff --git a/c_glib/example/vala/read-stream.vala b/c_glib/example/vala/read-stream.vala
index c58dc848930a8..4520c8609bdaf 100644
--- a/c_glib/example/vala/read-stream.vala
+++ b/c_glib/example/vala/read-stream.vala
@@ -119,8 +119,8 @@ void print_array(GArrow.Array array) {
 
 void print_record_batch(GArrow.RecordBatch record_batch) {
     var n_columns = record_batch.get_n_columns();
-    for (var nth_column = 0; nth_column < n_columns; nth_column++) {
-        stdout.printf("columns[%" + int64.FORMAT + "](%s): ",
+    for (int nth_column = 0; nth_column < n_columns; nth_column++) {
+        stdout.printf("columns[%d](%s): ",
                       nth_column,
                       record_batch.get_column_name(nth_column));
         var array = record_batch.get_column_data(nth_column);
diff --git a/c_glib/test/test-half-float-scalar.rb b/c_glib/test/test-half-float-scalar.rb
index ac41f91ece621..3073d84d796cf 100644
--- a/c_glib/test/test-half-float-scalar.rb
+++ b/c_glib/test/test-half-float-scalar.rb
@@ -41,7 +41,7 @@ def test_equal
   end
 
   def test_to_s
-    assert_equal("[\n  #{@half_float}\n]", @scalar.to_s)
+    assert_equal("1.0009765625", @scalar.to_s)
   end
 
   def test_value
diff --git a/ci/conda_env_sphinx.txt b/ci/conda_env_sphinx.txt
index 6899f9c36a7f6..0a356d5722c42 100644
--- a/ci/conda_env_sphinx.txt
+++ b/ci/conda_env_sphinx.txt
@@ -29,5 +29,8 @@ sphinx-copybutton
 sphinxcontrib-jquery
 sphinx==6.2
 # Requirement for doctest-cython
-pytest-cython
+# Needs upper pin of 0.3.0, see:
+# https://github.com/lgpage/pytest-cython/issues/67
+# With 0.3.* bug fix release, the pin can be removed
+pytest-cython==0.2.2
 pandas
diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile
index 3e3b7ac3a6d99..124256378b287 100644
--- a/ci/docker/ubuntu-20.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-20.04-cpp.dockerfile
@@ -101,6 +101,7 @@ RUN apt-get update -y -q && \
         libutf8proc-dev \
         libxml2-dev \
         libzstd-dev \
+        lld \
         make \
         ninja-build \
         nlohmann-json3-dev \
@@ -164,6 +165,7 @@ ENV absl_SOURCE=BUNDLED \
     ARROW_SUBSTRAIT=ON \
     ARROW_USE_ASAN=OFF \
     ARROW_USE_CCACHE=ON \
+    ARROW_USE_LLD=ON \
     ARROW_USE_UBSAN=OFF \
     ARROW_WITH_BROTLI=ON \
     ARROW_WITH_BZ2=ON \
diff --git a/ci/docker/ubuntu-22.04-cpp.dockerfile b/ci/docker/ubuntu-22.04-cpp.dockerfile
index e8416c1378a9a..eb189841cd344 100644
--- a/ci/docker/ubuntu-22.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-22.04-cpp.dockerfile
@@ -65,6 +65,7 @@ RUN latest_system_llvm=14 && \
 RUN apt-get update -y -q && \
     apt-get install -y -q --no-install-recommends \
         autoconf \
+        bzip2 \
         ca-certificates \
         ccache \
         cmake \
@@ -115,10 +116,20 @@ RUN apt-get update -y -q && \
         rapidjson-dev \
         rsync \
         tzdata \
-        wget && \
+        wget \
+        xz-utils && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists*
 
+# install emscripten using EMSDK
+ARG emscripten_version="3.1.45"
+RUN cd ~ && git clone https://github.com/emscripten-core/emsdk.git && \
+    cd emsdk && \
+    ./emsdk install ${emscripten_version} && \
+    ./emsdk activate ${emscripten_version} && \
+    echo "Installed emsdk to:" ~/emsdk
+
+
 ARG gcc_version=""
 RUN if [ "${gcc_version}" = "" ]; then \
       apt-get update -y -q && \
@@ -151,6 +162,9 @@ RUN if [ "${gcc_version}" = "" ]; then \
       update-alternatives --set c++ /usr/bin/g++; \
     fi
 
+# make sure zlib is cached in the EMSDK folder
+RUN source ~/emsdk/emsdk_env.sh && embuilder --pic build zlib
+
 COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_minio.sh latest /usr/local
 
diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh
index 1e09924a5e576..e28ceae8801f0 100755
--- a/ci/scripts/cpp_build.sh
+++ b/ci/scripts/cpp_build.sh
@@ -92,112 +92,133 @@ esac
 mkdir -p ${build_dir}
 pushd ${build_dir}
 
-cmake \
-  -Dabsl_SOURCE=${absl_SOURCE:-} \
-  -DARROW_ACERO=${ARROW_ACERO:-OFF} \
-  -DARROW_AZURE=${ARROW_AZURE:-OFF} \
-  -DARROW_BOOST_USE_SHARED=${ARROW_BOOST_USE_SHARED:-ON} \
-  -DARROW_BUILD_BENCHMARKS_REFERENCE=${ARROW_BUILD_BENCHMARKS:-OFF} \
-  -DARROW_BUILD_BENCHMARKS=${ARROW_BUILD_BENCHMARKS:-OFF} \
-  -DARROW_BUILD_EXAMPLES=${ARROW_BUILD_EXAMPLES:-OFF} \
-  -DARROW_BUILD_INTEGRATION=${ARROW_BUILD_INTEGRATION:-OFF} \
-  -DARROW_BUILD_SHARED=${ARROW_BUILD_SHARED:-ON} \
-  -DARROW_BUILD_STATIC=${ARROW_BUILD_STATIC:-ON} \
-  -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS:-OFF} \
-  -DARROW_BUILD_UTILITIES=${ARROW_BUILD_UTILITIES:-ON} \
-  -DARROW_COMPUTE=${ARROW_COMPUTE:-ON} \
-  -DARROW_CSV=${ARROW_CSV:-ON} \
-  -DARROW_CUDA=${ARROW_CUDA:-OFF} \
-  -DARROW_CXXFLAGS=${ARROW_CXXFLAGS:-} \
-  -DARROW_CXX_FLAGS_DEBUG="${ARROW_CXX_FLAGS_DEBUG:-}" \
-  -DARROW_CXX_FLAGS_RELEASE="${ARROW_CXX_FLAGS_RELEASE:-}" \
-  -DARROW_CXX_FLAGS_RELWITHDEBINFO="${ARROW_CXX_FLAGS_RELWITHDEBINFO:-}" \
-  -DARROW_C_FLAGS_DEBUG="${ARROW_C_FLAGS_DEBUG:-}" \
-  -DARROW_C_FLAGS_RELEASE="${ARROW_C_FLAGS_RELEASE:-}" \
-  -DARROW_C_FLAGS_RELWITHDEBINFO="${ARROW_C_FLAGS_RELWITHDEBINFO:-}" \
-  -DARROW_DATASET=${ARROW_DATASET:-OFF} \
-  -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-AUTO} \
-  -DARROW_ENABLE_THREADING=${ARROW_ENABLE_THREADING:-ON} \
-  -DARROW_ENABLE_TIMING_TESTS=${ARROW_ENABLE_TIMING_TESTS:-ON} \
-  -DARROW_EXTRA_ERROR_CONTEXT=${ARROW_EXTRA_ERROR_CONTEXT:-OFF} \
-  -DARROW_FILESYSTEM=${ARROW_FILESYSTEM:-ON} \
-  -DARROW_FLIGHT=${ARROW_FLIGHT:-OFF} \
-  -DARROW_FLIGHT_SQL=${ARROW_FLIGHT_SQL:-OFF} \
-  -DARROW_FUZZING=${ARROW_FUZZING:-OFF} \
-  -DARROW_GANDIVA_PC_CXX_FLAGS=${ARROW_GANDIVA_PC_CXX_FLAGS:-} \
-  -DARROW_GANDIVA=${ARROW_GANDIVA:-OFF} \
-  -DARROW_GCS=${ARROW_GCS:-OFF} \
-  -DARROW_HDFS=${ARROW_HDFS:-ON} \
-  -DARROW_INSTALL_NAME_RPATH=${ARROW_INSTALL_NAME_RPATH:-ON} \
-  -DARROW_JEMALLOC=${ARROW_JEMALLOC:-ON} \
-  -DARROW_JSON=${ARROW_JSON:-ON} \
-  -DARROW_LARGE_MEMORY_TESTS=${ARROW_LARGE_MEMORY_TESTS:-OFF} \
-  -DARROW_MIMALLOC=${ARROW_MIMALLOC:-OFF} \
-  -DARROW_NO_DEPRECATED_API=${ARROW_NO_DEPRECATED_API:-OFF} \
-  -DARROW_ORC=${ARROW_ORC:-OFF} \
-  -DARROW_PARQUET=${ARROW_PARQUET:-OFF} \
-  -DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \
-  -DARROW_S3=${ARROW_S3:-OFF} \
-  -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL:-DEFAULT} \
-  -DARROW_SKYHOOK=${ARROW_SKYHOOK:-OFF} \
-  -DARROW_SUBSTRAIT=${ARROW_SUBSTRAIT:-OFF} \
-  -DARROW_TEST_LINKAGE=${ARROW_TEST_LINKAGE:-shared} \
-  -DARROW_TEST_MEMCHECK=${ARROW_TEST_MEMCHECK:-OFF} \
-  -DARROW_USE_ASAN=${ARROW_USE_ASAN:-OFF} \
-  -DARROW_USE_CCACHE=${ARROW_USE_CCACHE:-ON} \
-  -DARROW_USE_GLOG=${ARROW_USE_GLOG:-OFF} \
-  -DARROW_USE_LD_GOLD=${ARROW_USE_LD_GOLD:-OFF} \
-  -DARROW_USE_MOLD=${ARROW_USE_MOLD:-OFF} \
-  -DARROW_USE_PRECOMPILED_HEADERS=${ARROW_USE_PRECOMPILED_HEADERS:-OFF} \
-  -DARROW_USE_STATIC_CRT=${ARROW_USE_STATIC_CRT:-OFF} \
-  -DARROW_USE_TSAN=${ARROW_USE_TSAN:-OFF} \
-  -DARROW_USE_UBSAN=${ARROW_USE_UBSAN:-OFF} \
-  -DARROW_VERBOSE_THIRDPARTY_BUILD=${ARROW_VERBOSE_THIRDPARTY_BUILD:-OFF} \
-  -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI:-OFF} \
-  -DARROW_WITH_BZ2=${ARROW_WITH_BZ2:-OFF} \
-  -DARROW_WITH_LZ4=${ARROW_WITH_LZ4:-OFF} \
-  -DARROW_WITH_OPENTELEMETRY=${ARROW_WITH_OPENTELEMETRY:-OFF} \
-  -DARROW_WITH_MUSL=${ARROW_WITH_MUSL:-OFF} \
-  -DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY:-OFF} \
-  -DARROW_WITH_UCX=${ARROW_WITH_UCX:-OFF} \
-  -DARROW_WITH_UTF8PROC=${ARROW_WITH_UTF8PROC:-ON} \
-  -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB:-OFF} \
-  -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD:-OFF} \
-  -DAWSSDK_SOURCE=${AWSSDK_SOURCE:-} \
-  -DAzure_SOURCE=${Azure_SOURCE:-} \
-  -Dbenchmark_SOURCE=${benchmark_SOURCE:-} \
-  -DBOOST_SOURCE=${BOOST_SOURCE:-} \
-  -DBrotli_SOURCE=${Brotli_SOURCE:-} \
-  -DBUILD_WARNING_LEVEL=${BUILD_WARNING_LEVEL:-CHECKIN} \
-  -Dc-ares_SOURCE=${cares_SOURCE:-} \
-  -DCMAKE_BUILD_TYPE=${ARROW_BUILD_TYPE:-debug} \
-  -DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE:-OFF} \
-  -DCMAKE_C_FLAGS="${CFLAGS:-}" \
-  -DCMAKE_CXX_FLAGS="${CXXFLAGS:-}" \
-  -DCMAKE_CXX_STANDARD="${CMAKE_CXX_STANDARD:-17}" \
-  -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR:-lib} \
-  -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX:-${ARROW_HOME}} \
-  -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \
-  -Dgflags_SOURCE=${gflags_SOURCE:-} \
-  -Dgoogle_cloud_cpp_storage_SOURCE=${google_cloud_cpp_storage_SOURCE:-} \
-  -DgRPC_SOURCE=${gRPC_SOURCE:-} \
-  -DGTest_SOURCE=${GTest_SOURCE:-} \
-  -Dlz4_SOURCE=${lz4_SOURCE:-} \
-  -DORC_SOURCE=${ORC_SOURCE:-} \
-  -DPARQUET_BUILD_EXAMPLES=${PARQUET_BUILD_EXAMPLES:-OFF} \
-  -DPARQUET_BUILD_EXECUTABLES=${PARQUET_BUILD_EXECUTABLES:-OFF} \
-  -DPARQUET_REQUIRE_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION:-ON} \
-  -DProtobuf_SOURCE=${Protobuf_SOURCE:-} \
-  -DRapidJSON_SOURCE=${RapidJSON_SOURCE:-} \
-  -Dre2_SOURCE=${re2_SOURCE:-} \
-  -DSnappy_SOURCE=${Snappy_SOURCE:-} \
-  -DThrift_SOURCE=${Thrift_SOURCE:-} \
-  -Dutf8proc_SOURCE=${utf8proc_SOURCE:-} \
-  -Dzstd_SOURCE=${zstd_SOURCE:-} \
-  -Dxsimd_SOURCE=${xsimd_SOURCE:-} \
-  -G "${CMAKE_GENERATOR:-Ninja}" \
-  ${ARROW_CMAKE_ARGS} \
-  ${source_dir}
+if [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then
+  if [ "${UBUNTU}" = "20.04" ]; then
+    echo "arrow emscripten build is not supported on Ubuntu 20.04, run with UBUNTU=22.04"
+    exit -1
+  fi
+  n_jobs=2 # Emscripten build fails on docker unless this is set really low
+  source ~/emsdk/emsdk_env.sh
+  emcmake cmake \
+    --preset=ninja-${ARROW_BUILD_TYPE:-debug}-emscripten \
+    -DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE:-OFF} \
+    -DCMAKE_C_FLAGS="${CFLAGS:-}" \
+    -DCMAKE_CXX_FLAGS="${CXXFLAGS:-}" \
+    -DCMAKE_CXX_STANDARD="${CMAKE_CXX_STANDARD:-17}" \
+    -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR:-lib} \
+    -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX:-${ARROW_HOME}} \
+    -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \
+    ${ARROW_CMAKE_ARGS} \
+    ${source_dir}
+else
+  cmake \
+    -Dabsl_SOURCE=${absl_SOURCE:-} \
+    -DARROW_ACERO=${ARROW_ACERO:-OFF} \
+    -DARROW_AZURE=${ARROW_AZURE:-OFF} \
+    -DARROW_BOOST_USE_SHARED=${ARROW_BOOST_USE_SHARED:-ON} \
+    -DARROW_BUILD_BENCHMARKS_REFERENCE=${ARROW_BUILD_BENCHMARKS:-OFF} \
+    -DARROW_BUILD_BENCHMARKS=${ARROW_BUILD_BENCHMARKS:-OFF} \
+    -DARROW_BUILD_EXAMPLES=${ARROW_BUILD_EXAMPLES:-OFF} \
+    -DARROW_BUILD_INTEGRATION=${ARROW_BUILD_INTEGRATION:-OFF} \
+    -DARROW_BUILD_SHARED=${ARROW_BUILD_SHARED:-ON} \
+    -DARROW_BUILD_STATIC=${ARROW_BUILD_STATIC:-ON} \
+    -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS:-OFF} \
+    -DARROW_BUILD_UTILITIES=${ARROW_BUILD_UTILITIES:-ON} \
+    -DARROW_COMPUTE=${ARROW_COMPUTE:-ON} \
+    -DARROW_CSV=${ARROW_CSV:-ON} \
+    -DARROW_CUDA=${ARROW_CUDA:-OFF} \
+    -DARROW_CXXFLAGS=${ARROW_CXXFLAGS:-} \
+    -DARROW_CXX_FLAGS_DEBUG="${ARROW_CXX_FLAGS_DEBUG:-}" \
+    -DARROW_CXX_FLAGS_RELEASE="${ARROW_CXX_FLAGS_RELEASE:-}" \
+    -DARROW_CXX_FLAGS_RELWITHDEBINFO="${ARROW_CXX_FLAGS_RELWITHDEBINFO:-}" \
+    -DARROW_C_FLAGS_DEBUG="${ARROW_C_FLAGS_DEBUG:-}" \
+    -DARROW_C_FLAGS_RELEASE="${ARROW_C_FLAGS_RELEASE:-}" \
+    -DARROW_C_FLAGS_RELWITHDEBINFO="${ARROW_C_FLAGS_RELWITHDEBINFO:-}" \
+    -DARROW_DATASET=${ARROW_DATASET:-OFF} \
+    -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-AUTO} \
+    -DARROW_ENABLE_THREADING=${ARROW_ENABLE_THREADING:-ON} \
+    -DARROW_ENABLE_TIMING_TESTS=${ARROW_ENABLE_TIMING_TESTS:-ON} \
+    -DARROW_EXTRA_ERROR_CONTEXT=${ARROW_EXTRA_ERROR_CONTEXT:-OFF} \
+    -DARROW_FILESYSTEM=${ARROW_FILESYSTEM:-ON} \
+    -DARROW_FLIGHT=${ARROW_FLIGHT:-OFF} \
+    -DARROW_FLIGHT_SQL=${ARROW_FLIGHT_SQL:-OFF} \
+    -DARROW_FUZZING=${ARROW_FUZZING:-OFF} \
+    -DARROW_GANDIVA_PC_CXX_FLAGS=${ARROW_GANDIVA_PC_CXX_FLAGS:-} \
+    -DARROW_GANDIVA=${ARROW_GANDIVA:-OFF} \
+    -DARROW_GCS=${ARROW_GCS:-OFF} \
+    -DARROW_HDFS=${ARROW_HDFS:-ON} \
+    -DARROW_INSTALL_NAME_RPATH=${ARROW_INSTALL_NAME_RPATH:-ON} \
+    -DARROW_JEMALLOC=${ARROW_JEMALLOC:-ON} \
+    -DARROW_JSON=${ARROW_JSON:-ON} \
+    -DARROW_LARGE_MEMORY_TESTS=${ARROW_LARGE_MEMORY_TESTS:-OFF} \
+    -DARROW_MIMALLOC=${ARROW_MIMALLOC:-OFF} \
+    -DARROW_NO_DEPRECATED_API=${ARROW_NO_DEPRECATED_API:-OFF} \
+    -DARROW_ORC=${ARROW_ORC:-OFF} \
+    -DARROW_PARQUET=${ARROW_PARQUET:-OFF} \
+    -DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \
+    -DARROW_S3=${ARROW_S3:-OFF} \
+    -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL:-DEFAULT} \
+    -DARROW_SKYHOOK=${ARROW_SKYHOOK:-OFF} \
+    -DARROW_SUBSTRAIT=${ARROW_SUBSTRAIT:-OFF} \
+    -DARROW_TEST_LINKAGE=${ARROW_TEST_LINKAGE:-shared} \
+    -DARROW_TEST_MEMCHECK=${ARROW_TEST_MEMCHECK:-OFF} \
+    -DARROW_USE_ASAN=${ARROW_USE_ASAN:-OFF} \
+    -DARROW_USE_CCACHE=${ARROW_USE_CCACHE:-ON} \
+    -DARROW_USE_GLOG=${ARROW_USE_GLOG:-OFF} \
+    -DARROW_USE_LD_GOLD=${ARROW_USE_LD_GOLD:-OFF} \
+    -DARROW_USE_LLD=${ARROW_USE_LLD:-OFF} \
+    -DARROW_USE_MOLD=${ARROW_USE_MOLD:-OFF} \
+    -DARROW_USE_PRECOMPILED_HEADERS=${ARROW_USE_PRECOMPILED_HEADERS:-OFF} \
+    -DARROW_USE_STATIC_CRT=${ARROW_USE_STATIC_CRT:-OFF} \
+    -DARROW_USE_TSAN=${ARROW_USE_TSAN:-OFF} \
+    -DARROW_USE_UBSAN=${ARROW_USE_UBSAN:-OFF} \
+    -DARROW_VERBOSE_THIRDPARTY_BUILD=${ARROW_VERBOSE_THIRDPARTY_BUILD:-OFF} \
+    -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI:-OFF} \
+    -DARROW_WITH_BZ2=${ARROW_WITH_BZ2:-OFF} \
+    -DARROW_WITH_LZ4=${ARROW_WITH_LZ4:-OFF} \
+    -DARROW_WITH_OPENTELEMETRY=${ARROW_WITH_OPENTELEMETRY:-OFF} \
+    -DARROW_WITH_MUSL=${ARROW_WITH_MUSL:-OFF} \
+    -DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY:-OFF} \
+    -DARROW_WITH_UCX=${ARROW_WITH_UCX:-OFF} \
+    -DARROW_WITH_UTF8PROC=${ARROW_WITH_UTF8PROC:-ON} \
+    -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB:-OFF} \
+    -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD:-OFF} \
+    -DAWSSDK_SOURCE=${AWSSDK_SOURCE:-} \
+    -DAzure_SOURCE=${Azure_SOURCE:-} \
+    -Dbenchmark_SOURCE=${benchmark_SOURCE:-} \
+    -DBOOST_SOURCE=${BOOST_SOURCE:-} \
+    -DBrotli_SOURCE=${Brotli_SOURCE:-} \
+    -DBUILD_WARNING_LEVEL=${BUILD_WARNING_LEVEL:-CHECKIN} \
+    -Dc-ares_SOURCE=${cares_SOURCE:-} \
+    -DCMAKE_BUILD_TYPE=${ARROW_BUILD_TYPE:-debug} \
+    -DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE:-OFF} \
+    -DCMAKE_C_FLAGS="${CFLAGS:-}" \
+    -DCMAKE_CXX_FLAGS="${CXXFLAGS:-}" \
+    -DCMAKE_CXX_STANDARD="${CMAKE_CXX_STANDARD:-17}" \
+    -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR:-lib} \
+    -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX:-${ARROW_HOME}} \
+    -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \
+    -Dgflags_SOURCE=${gflags_SOURCE:-} \
+    -Dgoogle_cloud_cpp_storage_SOURCE=${google_cloud_cpp_storage_SOURCE:-} \
+    -DgRPC_SOURCE=${gRPC_SOURCE:-} \
+    -DGTest_SOURCE=${GTest_SOURCE:-} \
+    -Dlz4_SOURCE=${lz4_SOURCE:-} \
+    -DORC_SOURCE=${ORC_SOURCE:-} \
+    -DPARQUET_BUILD_EXAMPLES=${PARQUET_BUILD_EXAMPLES:-OFF} \
+    -DPARQUET_BUILD_EXECUTABLES=${PARQUET_BUILD_EXECUTABLES:-OFF} \
+    -DPARQUET_REQUIRE_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION:-ON} \
+    -DProtobuf_SOURCE=${Protobuf_SOURCE:-} \
+    -DRapidJSON_SOURCE=${RapidJSON_SOURCE:-} \
+    -Dre2_SOURCE=${re2_SOURCE:-} \
+    -DSnappy_SOURCE=${Snappy_SOURCE:-} \
+    -DThrift_SOURCE=${Thrift_SOURCE:-} \
+    -Dutf8proc_SOURCE=${utf8proc_SOURCE:-} \
+    -Dzstd_SOURCE=${zstd_SOURCE:-} \
+    -Dxsimd_SOURCE=${xsimd_SOURCE:-} \
+    -G "${CMAKE_GENERATOR:-Ninja}" \
+    ${ARROW_CMAKE_ARGS} \
+    ${source_dir}
+fi
 
 export CMAKE_BUILD_PARALLEL_LEVEL=${CMAKE_BUILD_PARALLEL_LEVEL:-$[${n_jobs} + 1]}
 time cmake --build . --target install
diff --git a/ci/scripts/cpp_test.sh b/ci/scripts/cpp_test.sh
index f388825fd0a98..2c640f2c1fb6a 100755
--- a/ci/scripts/cpp_test.sh
+++ b/ci/scripts/cpp_test.sh
@@ -80,6 +80,10 @@ case "$(uname)" in
     ;;
 esac
 
+if [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then  
+  n_jobs=1 # avoid spurious fails on emscripten due to loading too many big executables
+fi
+
 pushd ${build_dir}
 
 if [ -z "${PYTHON}" ] && ! which python > /dev/null 2>&1; then
diff --git a/ci/scripts/go_bench.sh b/ci/scripts/go_bench.sh
old mode 100644
new mode 100755
diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json
index 9d99b3b2a79e0..13d1241990c31 100644
--- a/cpp/CMakePresets.json
+++ b/cpp/CMakePresets.json
@@ -46,6 +46,32 @@
         "CMAKE_BUILD_TYPE": "RelWithDebInfo"
       }
     },
+    {
+      "name": "features-emscripten",
+      "hidden": true,
+      "cacheVariables": {
+        "ARROW_ACERO": "ON",
+        "ARROW_BUILD_SHARED": "OFF",
+        "ARROW_BUILD_STATIC": "ON",
+        "ARROW_CUDA": "OFF",
+        "ARROW_DEPENDENCY_SOURCE": "BUNDLED",
+        "ARROW_DEPENDENCY_USE_SHARED": "OFF",
+        "ARROW_ENABLE_THREADING": "OFF",
+        "ARROW_FLIGHT": "OFF",
+        "ARROW_IPC": "ON",
+        "ARROW_JEMALLOC": "OFF",
+        "ARROW_MIMALLOC": "OFF",
+        "ARROW_ORC": "ON",
+        "ARROW_RUNTIME_SIMD_LEVEL": "NONE",
+        "ARROW_S3": "OFF",
+        "ARROW_SIMD_LEVEL": "NONE",
+        "ARROW_SUBSTRAIT": "ON",
+        "ARROW_WITH_BROTLI": "ON",
+        "ARROW_WITH_OPENTELEMETRY": "OFF",
+        "ARROW_WITH_SNAPPY": "ON",
+        "CMAKE_C_BYTE_ORDER": "LITTLE_ENDIAN"
+      }
+    },
     {
       "name": "features-minimal",
       "hidden": true,
@@ -341,6 +367,24 @@
       "displayName": "Release build with CUDA integration",
       "cacheVariables": {}
     },
+    {
+      "name": "ninja-debug-emscripten",
+      "inherits": [
+        "features-emscripten",
+        "base-debug"
+      ],
+      "displayName": "Debug build which builds an Emscripten library",
+      "cacheVariables": {}
+    },
+    {
+      "name": "ninja-release-emscripten",
+      "inherits": [
+        "features-emscripten",
+        "base-release"
+      ],
+      "displayName": "Release build which builds an Emscripten library",
+      "cacheVariables": {}
+    },
     {
       "name": "ninja-release-flight",
       "inherits": [
@@ -447,4 +491,4 @@
       }
     }
   ]
-}
+}
\ No newline at end of file
diff --git a/cpp/build-support/emscripten-test-init.js b/cpp/build-support/emscripten-test-init.js
new file mode 100644
index 0000000000000..bbb542a29f021
--- /dev/null
+++ b/cpp/build-support/emscripten-test-init.js
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+var Module = {
+};
+
+// make sure tests can access the current parquet test data files
+Module.preRun = () => {ENV.PARQUET_TEST_DATA = process.env.PARQUET_TEST_DATA;
+    ENV.ARROW_TEST_DATA = process.env.ARROW_TEST_DATA;
+};
\ No newline at end of file
diff --git a/cpp/cmake_modules/BuildUtils.cmake b/cpp/cmake_modules/BuildUtils.cmake
index 7a45e9cca59de..e7523add27223 100644
--- a/cpp/cmake_modules/BuildUtils.cmake
+++ b/cpp/cmake_modules/BuildUtils.cmake
@@ -760,8 +760,8 @@ function(ADD_TEST_CASE REL_TEST_NAME)
                valgrind --suppressions=valgrind.supp --tool=memcheck --gen-suppressions=all \
                  --num-callers=500 --leak-check=full --leak-check-heuristics=stdstring \
                  --error-exitcode=1 ${TEST_PATH} ${ARG_TEST_ARGUMENTS}")
-  elseif(WIN32)
-    add_test(${TEST_NAME} ${TEST_PATH} ${ARG_TEST_ARGUMENTS})
+  elseif(WIN32 OR CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+    add_test(NAME ${TEST_NAME} COMMAND ${TEST_NAME} ${ARG_TEST_ARGUMENTS})
   else()
     add_test(${TEST_NAME}
              ${BUILD_SUPPORT_DIR}/run-test.sh
diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake
index 5b8bcb3ac6965..dc0e5da63adb7 100644
--- a/cpp/cmake_modules/DefineOptions.cmake
+++ b/cpp/cmake_modules/DefineOptions.cmake
@@ -170,6 +170,8 @@ takes precedence over ccache if a storage backend is configured" ON)
 
   define_option(ARROW_USE_LD_GOLD "Use ld.gold for linking on Linux (if available)" OFF)
 
+  define_option(ARROW_USE_LLD "Use the LLVM lld for linking (if available)" OFF)
+
   define_option(ARROW_USE_MOLD "Use mold for linking on Linux (if available)" OFF)
 
   define_option(ARROW_USE_PRECOMPILED_HEADERS "Use precompiled headers when compiling"
diff --git a/cpp/cmake_modules/FindorcAlt.cmake b/cpp/cmake_modules/FindorcAlt.cmake
index dc3b978cf4037..289416678ad39 100644
--- a/cpp/cmake_modules/FindorcAlt.cmake
+++ b/cpp/cmake_modules/FindorcAlt.cmake
@@ -29,6 +29,7 @@ endif()
 find_package(orc ${find_package_args})
 if(orc_FOUND)
   set(orcAlt_FOUND TRUE)
+  set(orcAlt_VERSION ${orc_VERSION})
   return()
 endif()
 
@@ -51,8 +52,17 @@ else()
             NAMES orc/orc-config.hh
             PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
 endif()
+if(ORC_INCLUDE_DIR)
+  file(READ "${ORC_INCLUDE_DIR}/orc/orc-config.hh" ORC_CONFIG_HH_CONTENT)
+  string(REGEX MATCH "#define ORC_VERSION \"[0-9.]+\"" ORC_VERSION_DEFINITION
+               "${ORC_CONFIG_HH_CONTENT}")
+  string(REGEX MATCH "[0-9.]+" ORC_VERSION "${ORC_VERSION_DEFINITION}")
+endif()
 
-find_package_handle_standard_args(orcAlt REQUIRED_VARS ORC_STATIC_LIB ORC_INCLUDE_DIR)
+find_package_handle_standard_args(
+  orcAlt
+  REQUIRED_VARS ORC_STATIC_LIB ORC_INCLUDE_DIR
+  VERSION_VAR ORC_VERSION)
 
 if(orcAlt_FOUND)
   if(NOT TARGET orc::orc)
diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake
index 90decb4224ec6..d56609c123968 100644
--- a/cpp/cmake_modules/SetupCxxFlags.cmake
+++ b/cpp/cmake_modules/SetupCxxFlags.cmake
@@ -24,7 +24,9 @@ include(CheckCXXSourceCompiles)
 message(STATUS "System processor: ${CMAKE_SYSTEM_PROCESSOR}")
 
 if(NOT DEFINED ARROW_CPU_FLAG)
-  if(CMAKE_SYSTEM_PROCESSOR MATCHES "AMD64|amd64|X86|x86|i[3456]86|x64")
+  if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+    set(ARROW_CPU_FLAG "emscripten")
+  elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "AMD64|amd64|X86|x86|i[3456]86|x64")
     set(ARROW_CPU_FLAG "x86")
   elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|ARM64|arm64")
     set(ARROW_CPU_FLAG "aarch64")
@@ -312,7 +314,12 @@ if("${BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wextra")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wdocumentation")
-    set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wshorten-64-to-32")
+    if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+      # size_t is 32 bit in Emscripten wasm32 - ignore conversion errors
+      set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-shorten-64-to-32")
+    else()
+      set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wshorten-64-to-32")
+    endif()
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-missing-braces")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unused-parameter")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-constant-logical-operand")
@@ -633,19 +640,23 @@ if(NOT WIN32 AND NOT APPLE)
       if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
         if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "12.1.0")
           set(MOLD_LINKER_FLAGS "-fuse-ld=mold")
-          message(STATUS "Using optional mold linker")
         else()
           message(STATUS "Need GCC 12.1.0 or later to use mold linker: ${CMAKE_CXX_COMPILER_VERSION}"
           )
         endif()
       elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
-        set(MOLD_LINKER_FLAGS "--ld-path=${LD_MOLD}")
-        message(STATUS "Using optional mold linker")
+        if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "12.0.0")
+          set(MOLD_LINKER_FLAGS "--ld-path=${LD_MOLD}")
+        else()
+          message(STATUS "Need clang 12.0.0 or later to use mold linker: ${CMAKE_CXX_COMPILER_VERSION}"
+          )
+        endif()
       else()
         message(STATUS "Using the default linker because compiler doesn't support mold: ${CMAKE_CXX_COMPILER_ID}"
         )
       endif()
       if(MOLD_LINKER_FLAGS)
+        message(STATUS "Using optional mold linker")
         string(APPEND CMAKE_EXE_LINKER_FLAGS " ${MOLD_LINKER_FLAGS}")
         string(APPEND CMAKE_MODULE_LINKER_FLAGS " ${MOLD_LINKER_FLAGS}")
         string(APPEND CMAKE_SHARED_LINKER_FLAGS " ${MOLD_LINKER_FLAGS}")
@@ -656,6 +667,39 @@ if(NOT WIN32 AND NOT APPLE)
   endif()
 endif()
 
+if(ARROW_USE_LLD)
+  find_program(LD_LLD ld.lld)
+  if(LD_LLD)
+    unset(LLD_LINKER_FLAGS)
+    if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+      if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "9.1.0")
+        set(LLD_LINKER_FLAGS "-fuse-ld=lld")
+      else()
+        message(STATUS "Need GCC 9.1.0 or later to use LLD linker: ${CMAKE_CXX_COMPILER_VERSION}"
+        )
+      endif()
+    elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+      if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "12.0.0")
+        set(LLD_LINKER_FLAGS "--ld-path=${LD_LLD}")
+      else()
+        message(STATUS "Need clang 12.0.0 or later to use LLD linker: ${CMAKE_CXX_COMPILER_VERSION}"
+        )
+      endif()
+    else()
+      message(STATUS "Using the default linker because compiler doesn't support LLD: ${CMAKE_CXX_COMPILER_ID}"
+      )
+    endif()
+    if(LLD_LINKER_FLAGS)
+      message(STATUS "Using optional LLVM LLD linker")
+      string(APPEND CMAKE_EXE_LINKER_FLAGS " ${LLD_LINKER_FLAGS}")
+      string(APPEND CMAKE_MODULE_LINKER_FLAGS " ${LLD_LINKER_FLAGS}")
+      string(APPEND CMAKE_SHARED_LINKER_FLAGS " ${LLD_LINKER_FLAGS}")
+    else()
+      message(STATUS "Using the default linker because the LLD isn't supported")
+    endif()
+  endif()
+endif()
+
 # compiler flags for different build types (run 'cmake -DCMAKE_BUILD_TYPE=<type> .')
 # For all builds:
 # For CMAKE_BUILD_TYPE=Debug
@@ -692,17 +736,36 @@ if(NOT MSVC)
   set(C_DEBUG_FLAGS "")
   set(CXX_DEBUG_FLAGS "")
   if(NOT MSVC)
-    if(NOT CMAKE_C_FLAGS_DEBUG MATCHES "-O")
-      string(APPEND C_DEBUG_FLAGS " -O0")
-    endif()
-    if(NOT CMAKE_CXX_FLAGS_DEBUG MATCHES "-O")
-      string(APPEND CXX_DEBUG_FLAGS " -O0")
-    endif()
-    if(ARROW_GGDB_DEBUG)
-      string(APPEND C_DEBUG_FLAGS " -ggdb")
-      string(APPEND CXX_DEBUG_FLAGS " -ggdb")
-      string(APPEND C_RELWITHDEBINFO_FLAGS " -ggdb")
-      string(APPEND CXX_RELWITHDEBINFO_FLAGS " -ggdb")
+    if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+      # with -g it uses DWARF debug info, which is really slow to build
+      # on emscripten (and uses tons of memory)
+      string(REPLACE "-g" " " CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG})
+      string(REPLACE "-g" " " CMAKE_C_FLAGS_DEBUG ${CMAKE_C_FLAGS_DEBUG})
+      string(APPEND C_DEBUG_FLAGS " -g2")
+      string(APPEND CXX_DEBUG_FLAGS " -g2")
+      string(APPEND C_RELWITHDEBINFO_FLAGS " -g2")
+      string(APPEND CXX_RELWITHDEBINFO_FLAGS " -g2")
+      # without -O1, emscripten executables are *MASSIVE*. Don't use -O0
+      if(NOT CMAKE_C_FLAGS_DEBUG MATCHES "-O")
+        string(APPEND C_DEBUG_FLAGS " -O1")
+      endif()
+      if(NOT CMAKE_CXX_FLAGS_DEBUG MATCHES "-O")
+        string(APPEND CXX_DEBUG_FLAGS " -O1")
+      endif()
+    else()
+      if(NOT CMAKE_C_FLAGS_DEBUG MATCHES "-O")
+        string(APPEND C_DEBUG_FLAGS " -O0")
+      endif()
+      if(NOT CMAKE_CXX_FLAGS_DEBUG MATCHES "-O")
+        string(APPEND CXX_DEBUG_FLAGS " -O0")
+      endif()
+
+      if(ARROW_GGDB_DEBUG)
+        string(APPEND C_DEBUG_FLAGS " -ggdb")
+        string(APPEND CXX_DEBUG_FLAGS " -ggdb")
+        string(APPEND C_RELWITHDEBINFO_FLAGS " -ggdb")
+        string(APPEND CXX_RELWITHDEBINFO_FLAGS " -ggdb")
+      endif()
     endif()
   endif()
 
@@ -733,3 +796,40 @@ if(MSVC)
     set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${MSVC_LINKER_FLAGS}")
   endif()
 endif()
+
+if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+  # flags are:
+  # 1) We force *everything* to build as position independent
+  # 2) And with support for C++ exceptions
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -fexceptions")
+  # deprecated-literal-operator error is thrown in datetime (vendored lib in arrow)
+  set(CMAKE_CXX_FLAGS
+      "${CMAKE_CXX_FLAGS} -fPIC -fexceptions -Wno-error=deprecated-literal-operator")
+
+  # flags for creating shared libraries (only used in pyarrow, because
+  # Emscripten builds libarrow as static)
+  # flags are:
+  # 1) Tell it to use JavaScript / WebAssembly 64 bit number support.
+  # 2) Tell it to build with support for C++ exceptions
+  # 3) Skip linker flags error which happens with -soname parameter
+  set(ARROW_EMSCRIPTEN_LINKER_FLAGS "-sWASM_BIGINT=1 -fexceptions -Wno-error=linkflags")
+  set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS
+      "-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
+  set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS
+      "-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
+  set(CMAKE_SHARED_LINKER_FLAGS "-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
+  if(ARROW_TESTING)
+    # flags for building test executables for use in node
+    if("${CMAKE_BUILD_TYPE}" STREQUAL "RELEASE")
+      set(CMAKE_EXE_LINKER_FLAGS
+          "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sALLOW_MEMORY_GROWTH -lnodefs.js -lnoderawfs.js --pre-js ${BUILD_SUPPORT_DIR}/emscripten-test-init.js"
+      )
+    else()
+      set(CMAKE_EXE_LINKER_FLAGS
+          "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sERROR_ON_WASM_CHANGES_AFTER_LINK=1 -sALLOW_MEMORY_GROWTH -lnodefs.js -lnoderawfs.js --pre-js ${BUILD_SUPPORT_DIR}/emscripten-test-init.js"
+      )
+    endif()
+  else()
+    set(CMAKE_EXE_LINKER_FLAGS "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sALLOW_MEMORY_GROWTH")
+  endif()
+endif()
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index ad7344b09dd4e..7d54ccccf7c19 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -976,6 +976,23 @@ set(EP_COMMON_CMAKE_ARGS
     -DCMAKE_OSX_SYSROOT=${CMAKE_OSX_SYSROOT}
     -DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE})
 
+# if building with a toolchain file, pass that through
+if(CMAKE_TOOLCHAIN_FILE)
+  list(APPEND EP_COMMON_CMAKE_ARGS -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE})
+endif()
+
+# and crosscompiling emulator (for try_run() )
+if(CMAKE_CROSSCOMPILING_EMULATOR)
+  string(REPLACE ";" ${EP_LIST_SEPARATOR} EP_CMAKE_CROSSCOMPILING_EMULATOR
+                 "${CMAKE_CROSSCOMPILING_EMULATOR}")
+  list(APPEND EP_COMMON_CMAKE_ARGS
+       -DCMAKE_CROSSCOMPILING_EMULATOR=${EP_CMAKE_CROSSCOMPILING_EMULATOR})
+endif()
+
+if(CMAKE_PROJECT_INCLUDE)
+  list(APPEND EP_COMMON_CMAKE_ARGS -DCMAKE_PROJECT_INCLUDE=${CMAKE_PROJECT_INCLUDE})
+endif()
+
 # Enable s/ccache if set by parent.
 if(CMAKE_C_COMPILER_LAUNCHER AND CMAKE_CXX_COMPILER_LAUNCHER)
   list(APPEND EP_COMMON_CMAKE_ARGS
@@ -1349,6 +1366,14 @@ macro(build_snappy)
     set(SNAPPY_PATCH_COMMAND)
   endif()
 
+  if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+    # ignore linker flag errors, as Snappy sets
+    # -Werror -Wall, and Emscripten doesn't support -soname
+    list(APPEND SNAPPY_CMAKE_ARGS
+         "-DCMAKE_SHARED_LINKER_FLAGS=${CMAKE_SHARED_LINKER_FLAGS}"
+         "-Wno-error=linkflags")
+  endif()
+
   externalproject_add(snappy_ep
                       ${EP_COMMON_OPTIONS}
                       BUILD_IN_SOURCE 1
@@ -1394,6 +1419,7 @@ macro(build_brotli)
   message(STATUS "Building brotli from source")
   set(BROTLI_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/brotli_ep/src/brotli_ep-install")
   set(BROTLI_INCLUDE_DIR "${BROTLI_PREFIX}/include")
+  set(BROTLI_LIB_DIR "${BROTLI_PREFIX}/lib")
   set(BROTLI_STATIC_LIBRARY_ENC
       "${BROTLI_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}brotlienc-static${CMAKE_STATIC_LIBRARY_SUFFIX}"
   )
@@ -1405,6 +1431,26 @@ macro(build_brotli)
   )
   set(BROTLI_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} "-DCMAKE_INSTALL_PREFIX=${BROTLI_PREFIX}")
 
+  set(BROTLI_EP_OPTIONS)
+  if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+    # "cmake install" is disabled for Brotli on Emscripten, so the
+    # default INSTALL_COMMAND fails. We need to disable the default
+    # INSTALL_COMMAND.
+    list(APPEND
+         BROTLI_EP_OPTIONS
+         INSTALL_COMMAND
+         ${CMAKE_COMMAND}
+         -E
+         true)
+
+    set(BROTLI_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/brotli_ep-prefix/src/brotli_ep-build)
+    set(BROTLI_BUILD_LIBS
+        "${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlienc-static${CMAKE_STATIC_LIBRARY_SUFFIX}"
+        "${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlidec-static${CMAKE_STATIC_LIBRARY_SUFFIX}"
+        "${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlicommon-static${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
+  endif()
+
   externalproject_add(brotli_ep
                       ${EP_COMMON_OPTIONS}
                       URL ${BROTLI_SOURCE_URL}
@@ -1414,7 +1460,20 @@ macro(build_brotli)
                                        "${BROTLI_STATIC_LIBRARY_COMMON}"
                                        ${BROTLI_BUILD_BYPRODUCTS}
                       CMAKE_ARGS ${BROTLI_CMAKE_ARGS}
-                      STEP_TARGETS headers_copy)
+                      STEP_TARGETS headers_copy ${BROTLI_EP_OPTIONS})
+
+  if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+    # Copy the libraries to our install directory manually.
+    set(BROTLI_BUILD_INCLUDE_DIR
+        ${CMAKE_CURRENT_BINARY_DIR}/brotli_ep-prefix/src/brotli_ep/c/include/brotli)
+    add_custom_command(TARGET brotli_ep
+                       POST_BUILD
+                       COMMAND ${CMAKE_COMMAND} -E copy_if_different
+                               ${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}*${CMAKE_STATIC_LIBRARY_SUFFIX}
+                               ${BROTLI_LIB_DIR}
+                       COMMAND ${CMAKE_COMMAND} -E copy_directory
+                               ${BROTLI_BUILD_INCLUDE_DIR} ${BROTLI_INCLUDE_DIR}/brotli)
+  endif()
 
   file(MAKE_DIRECTORY "${BROTLI_INCLUDE_DIR}")
 
@@ -1657,6 +1716,9 @@ macro(build_thrift)
   if(DEFINED BOOST_ROOT)
     list(APPEND THRIFT_CMAKE_ARGS "-DBOOST_ROOT=${BOOST_ROOT}")
   endif()
+  if(DEFINED Boost_INCLUDE_DIR)
+    list(APPEND THRIFT_CMAKE_ARGS "-DBoost_INCLUDE_DIR=${Boost_INCLUDE_DIR}")
+  endif()
   if(DEFINED Boost_NAMESPACE)
     list(APPEND THRIFT_CMAKE_ARGS "-DBoost_NAMESPACE=${Boost_NAMESPACE}")
   endif()
@@ -1798,6 +1860,36 @@ macro(build_protobuf)
   add_dependencies(arrow::protobuf::protoc protobuf_ep)
 
   list(APPEND ARROW_BUNDLED_STATIC_LIBS arrow::protobuf::libprotobuf)
+
+  if(CMAKE_CROSSCOMPILING)
+    # If we are cross compiling, we need to build protoc for the host
+    # system also, as it is used when building Arrow
+    # We do this by calling CMake as a child process
+    # with CXXFLAGS / CFLAGS and CMake flags cleared.
+    set(PROTOBUF_HOST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/protobuf_ep_host-install")
+    set(PROTOBUF_HOST_COMPILER "${PROTOBUF_HOST_PREFIX}/bin/protoc")
+
+    set(PROTOBUF_HOST_CMAKE_ARGS
+        "-DCMAKE_CXX_FLAGS="
+        "-DCMAKE_C_FLAGS="
+        "-DCMAKE_INSTALL_PREFIX=${PROTOBUF_HOST_PREFIX}"
+        -Dprotobuf_BUILD_TESTS=OFF
+        -Dprotobuf_DEBUG_POSTFIX=)
+
+    externalproject_add(protobuf_ep_host
+                        ${EP_COMMON_OPTIONS}
+                        CMAKE_ARGS ${PROTOBUF_HOST_CMAKE_ARGS}
+                        BUILD_BYPRODUCTS "${PROTOBUF_HOST_COMPILER}"
+                        BUILD_IN_SOURCE 1
+                        URL ${PROTOBUF_SOURCE_URL}
+                        URL_HASH "SHA256=${ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM}")
+
+    add_executable(arrow::protobuf::host_protoc IMPORTED)
+    set_target_properties(arrow::protobuf::host_protoc
+                          PROPERTIES IMPORTED_LOCATION "${PROTOBUF_HOST_COMPILER}")
+
+    add_dependencies(arrow::protobuf::host_protoc protobuf_ep_host)
+  endif()
 endmacro()
 
 if(ARROW_WITH_PROTOBUF)
@@ -1862,7 +1954,11 @@ if(ARROW_WITH_PROTOBUF)
   else()
     set(ARROW_PROTOBUF_LIBPROTOC protobuf::libprotoc)
   endif()
-  if(TARGET arrow::protobuf::protoc)
+  if(TARGET arrow::protobuf::host_protoc)
+    # make sure host protoc is used for compiling protobuf files
+    # during build of e.g. orc
+    set(ARROW_PROTOBUF_PROTOC arrow::protobuf::host_protoc)
+  elseif(TARGET arrow::protobuf::protoc)
     set(ARROW_PROTOBUF_PROTOC arrow::protobuf::protoc)
   else()
     if(NOT TARGET protobuf::protoc)
@@ -2164,8 +2260,15 @@ function(build_gtest)
   if(APPLE)
     string(APPEND CMAKE_CXX_FLAGS " -Wno-unused-value" " -Wno-ignored-attributes")
   endif()
-  set(BUILD_SHARED_LIBS ON)
-  set(BUILD_STATIC_LIBS OFF)
+  # If we're building static libs for Emscripten, we need to build *everything* as
+  # static libs.
+  if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+    set(BUILD_SHARED_LIBS OFF)
+    set(BUILD_STATIC_LIBS ON)
+  else()
+    set(BUILD_SHARED_LIBS ON)
+    set(BUILD_STATIC_LIBS OFF)
+  endif()
   # We need to use "cache" variable to override the default
   # INSTALL_GTEST option by this value. See also:
   # https://cmake.org/cmake/help/latest/policy/CMP0077.html
@@ -2403,37 +2506,58 @@ endif()
 
 macro(build_zlib)
   message(STATUS "Building ZLIB from source")
-  set(ZLIB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep/src/zlib_ep-install")
-  if(MSVC)
-    if(${UPPERCASE_BUILD_TYPE} STREQUAL "DEBUG")
-      set(ZLIB_STATIC_LIB_NAME zlibstaticd.lib)
-    else()
-      set(ZLIB_STATIC_LIB_NAME zlibstatic.lib)
+
+  # ensure zlib is built with -fpic
+  # and make sure that the build finds the version in Emscripten ports
+  # - n.b. the actual linking happens because -sUSE_ZLIB=1 is
+  # set in the compiler variables, but cmake expects
+  # it to exist at configuration time if we aren't building it as
+  # bundled. We need to do this for all packages
+  # not just zlib as some depend on zlib, but we don't rebuild
+  # if it exists already
+  if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+    # build zlib using Emscripten ports
+    if(NOT EXISTS ${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a)
+      execute_process(COMMAND embuilder --pic --force build zlib)
     endif()
+    add_library(ZLIB::ZLIB STATIC IMPORTED)
+    set_property(TARGET ZLIB::ZLIB
+                 PROPERTY IMPORTED_LOCATION
+                          "${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a")
+    list(APPEND ARROW_BUNDLED_STATIC_LIBS ZLIB::ZLIB)
   else()
-    set(ZLIB_STATIC_LIB_NAME libz.a)
-  endif()
-  set(ZLIB_STATIC_LIB "${ZLIB_PREFIX}/lib/${ZLIB_STATIC_LIB_NAME}")
-  set(ZLIB_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} "-DCMAKE_INSTALL_PREFIX=${ZLIB_PREFIX}")
+    set(ZLIB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep/src/zlib_ep-install")
+    if(MSVC)
+      if(${UPPERCASE_BUILD_TYPE} STREQUAL "DEBUG")
+        set(ZLIB_STATIC_LIB_NAME zlibstaticd.lib)
+      else()
+        set(ZLIB_STATIC_LIB_NAME zlibstatic.lib)
+      endif()
+    else()
+      set(ZLIB_STATIC_LIB_NAME libz.a)
+    endif()
+    set(ZLIB_STATIC_LIB "${ZLIB_PREFIX}/lib/${ZLIB_STATIC_LIB_NAME}")
+    set(ZLIB_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} "-DCMAKE_INSTALL_PREFIX=${ZLIB_PREFIX}")
 
-  externalproject_add(zlib_ep
-                      ${EP_COMMON_OPTIONS}
-                      URL ${ZLIB_SOURCE_URL}
-                      URL_HASH "SHA256=${ARROW_ZLIB_BUILD_SHA256_CHECKSUM}"
-                      BUILD_BYPRODUCTS "${ZLIB_STATIC_LIB}"
-                      CMAKE_ARGS ${ZLIB_CMAKE_ARGS})
+    externalproject_add(zlib_ep
+                        ${EP_COMMON_OPTIONS}
+                        URL ${ZLIB_SOURCE_URL}
+                        URL_HASH "SHA256=${ARROW_ZLIB_BUILD_SHA256_CHECKSUM}"
+                        BUILD_BYPRODUCTS "${ZLIB_STATIC_LIB}"
+                        CMAKE_ARGS ${ZLIB_CMAKE_ARGS})
 
-  file(MAKE_DIRECTORY "${ZLIB_PREFIX}/include")
+    file(MAKE_DIRECTORY "${ZLIB_PREFIX}/include")
 
-  add_library(ZLIB::ZLIB STATIC IMPORTED)
-  set(ZLIB_LIBRARIES ${ZLIB_STATIC_LIB})
-  set(ZLIB_INCLUDE_DIRS "${ZLIB_PREFIX}/include")
-  set_target_properties(ZLIB::ZLIB PROPERTIES IMPORTED_LOCATION ${ZLIB_LIBRARIES})
-  target_include_directories(ZLIB::ZLIB BEFORE INTERFACE "${ZLIB_INCLUDE_DIRS}")
+    add_library(ZLIB::ZLIB STATIC IMPORTED)
+    set(ZLIB_LIBRARIES ${ZLIB_STATIC_LIB})
+    set(ZLIB_INCLUDE_DIRS "${ZLIB_PREFIX}/include")
+    set_target_properties(ZLIB::ZLIB PROPERTIES IMPORTED_LOCATION ${ZLIB_LIBRARIES})
+    target_include_directories(ZLIB::ZLIB BEFORE INTERFACE "${ZLIB_INCLUDE_DIRS}")
 
-  add_dependencies(ZLIB::ZLIB zlib_ep)
+    add_dependencies(ZLIB::ZLIB zlib_ep)
+    list(APPEND ARROW_BUNDLED_STATIC_LIBS ZLIB::ZLIB)
+  endif()
 
-  list(APPEND ARROW_BUNDLED_STATIC_LIBS ZLIB::ZLIB)
   set(ZLIB_VENDORED TRUE)
 endmacro()
 
@@ -4390,6 +4514,10 @@ macro(build_orc)
       "-DPROTOBUF_LIBRARY=$<TARGET_FILE:${ARROW_PROTOBUF_LIBPROTOBUF}>"
       "-DPROTOC_LIBRARY=$<TARGET_FILE:${ARROW_PROTOBUF_LIBPROTOC}>"
       "-DSNAPPY_HOME=${ORC_SNAPPY_ROOT}"
+      "-DSNAPPY_LIBRARY=$<TARGET_FILE:${Snappy_TARGET}>"
+      "-DLZ4_LIBRARY=$<TARGET_FILE:LZ4::lz4>"
+      "-DLZ4_STATIC_LIBRARY=$<TARGET_FILE:LZ4::lz4>"
+      "-DLZ4_INCLUDE_DIR=${ORC_LZ4_ROOT}/include"
       "-DSNAPPY_INCLUDE_DIR=${ORC_SNAPPY_INCLUDE_DIR}"
       "-DZSTD_HOME=${ORC_ZSTD_ROOT}"
       "-DZSTD_INCLUDE_DIR=$<TARGET_PROPERTY:${ARROW_ZSTD_LIBZSTD},INTERFACE_INCLUDE_DIRECTORIES>"
@@ -4431,6 +4559,15 @@ macro(build_orc)
     endif()
     target_link_libraries(orc::orc INTERFACE ${CMAKE_DL_LIBS})
   endif()
+  if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+    if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "9")
+      target_link_libraries(orc::orc INTERFACE stdc++fs)
+    endif()
+  elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+    if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "8")
+      target_link_libraries(orc::orc INTERFACE c++fs)
+    endif()
+  endif()
 
   add_dependencies(orc::orc orc_ep)
 
@@ -4440,6 +4577,11 @@ endmacro()
 if(ARROW_ORC)
   resolve_dependency(orc HAVE_ALT TRUE)
   target_link_libraries(orc::orc INTERFACE ${ARROW_PROTOBUF_LIBPROTOBUF})
+  if(ORC_VENDORED)
+    set(ARROW_ORC_VERSION ${ARROW_ORC_BUILD_VERSION})
+  else()
+    set(ARROW_ORC_VERSION ${orcAlt_VERSION})
+  endif()
   message(STATUS "Found ORC static library: ${ORC_STATIC_LIB}")
   message(STATUS "Found ORC headers: ${ORC_INCLUDE_DIR}")
 endif()
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 617bfedabf373..026bb5c77e066 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -899,6 +899,10 @@ if(ARROW_ORC)
                            adapters/orc/util.cc)
   foreach(ARROW_ORC_TARGET ${ARROW_ORC_TARGETS})
     target_link_libraries(${ARROW_ORC_TARGET} PRIVATE orc::orc)
+    if(ARROW_ORC_VERSION VERSION_LESS "2.0.0")
+      target_compile_definitions(${ARROW_ORC_TARGET}
+                                 PRIVATE ARROW_ORC_NEED_TIME_ZONE_DATABASE_CHECK)
+    endif()
   endforeach()
 else()
   set(ARROW_ORC_TARGET_SHARED)
diff --git a/cpp/src/arrow/adapters/orc/adapter.cc b/cpp/src/arrow/adapters/orc/adapter.cc
index 127ec49ba990f..98784450b3cce 100644
--- a/cpp/src/arrow/adapters/orc/adapter.cc
+++ b/cpp/src/arrow/adapters/orc/adapter.cc
@@ -18,13 +18,16 @@
 #include "arrow/adapters/orc/adapter.h"
 
 #include <algorithm>
-#include <filesystem>
 #include <list>
 #include <memory>
 #include <sstream>
 #include <string>
 #include <vector>
 
+#ifdef ARROW_ORC_NEED_TIME_ZONE_DATABASE_CHECK
+#include <filesystem>
+#endif
+
 #include "arrow/adapters/orc/util.h"
 #include "arrow/builder.h"
 #include "arrow/io/interfaces.h"
@@ -183,11 +186,9 @@ liborc::RowReaderOptions DefaultRowReaderOptions() {
   return options;
 }
 
+#ifdef ARROW_ORC_NEED_TIME_ZONE_DATABASE_CHECK
 // Proactively check timezone database availability for ORC versions older than 2.0.0
 Status CheckTimeZoneDatabaseAvailability() {
-  if (GetOrcMajorVersion() >= 2) {
-    return Status::OK();
-  }
   auto tz_dir = std::getenv("TZDIR");
   bool is_tzdb_avaiable = tz_dir != nullptr
                               ? std::filesystem::exists(tz_dir)
@@ -200,6 +201,7 @@ Status CheckTimeZoneDatabaseAvailability() {
   }
   return Status::OK();
 }
+#endif
 
 }  // namespace
 
@@ -559,7 +561,9 @@ ORCFileReader::~ORCFileReader() {}
 
 Result<std::unique_ptr<ORCFileReader>> ORCFileReader::Open(
     const std::shared_ptr<io::RandomAccessFile>& file, MemoryPool* pool) {
+#ifdef ARROW_ORC_NEED_TIME_ZONE_DATABASE_CHECK
   RETURN_NOT_OK(CheckTimeZoneDatabaseAvailability());
+#endif
   auto result = std::unique_ptr<ORCFileReader>(new ORCFileReader());
   RETURN_NOT_OK(result->impl_->Open(file, pool));
   return std::move(result);
@@ -826,7 +830,9 @@ ORCFileWriter::ORCFileWriter() { impl_.reset(new ORCFileWriter::Impl()); }
 
 Result<std::unique_ptr<ORCFileWriter>> ORCFileWriter::Open(
     io::OutputStream* output_stream, const WriteOptions& writer_options) {
+#ifdef ARROW_ORC_NEED_TIME_ZONE_DATABASE_CHECK
   RETURN_NOT_OK(CheckTimeZoneDatabaseAvailability());
+#endif
   std::unique_ptr<ORCFileWriter> result =
       std::unique_ptr<ORCFileWriter>(new ORCFileWriter());
   Status status = result->impl_->Open(output_stream, writer_options);
diff --git a/cpp/src/arrow/array/array_dict_test.cc b/cpp/src/arrow/array/array_dict_test.cc
index 4ae9e3d6dcbfc..22d6d1fc5ae92 100644
--- a/cpp/src/arrow/array/array_dict_test.cc
+++ b/cpp/src/arrow/array/array_dict_test.cc
@@ -1129,7 +1129,7 @@ TEST(TestDictionary, Validate) {
   arr = std::make_shared<DictionaryArray>(dict_type, indices, MakeArray(invalid_data));
   ASSERT_RAISES(Invalid, arr->ValidateFull());
 
-#if !defined(__APPLE__) && !defined(ARROW_VALGRIND)
+#if !defined(__APPLE__) && !defined(ARROW_VALGRIND) && !defined(__EMSCRIPTEN__)
   // GH-35712: ASSERT_DEATH would make testing slow on macOS.
   ASSERT_DEATH(
       {
diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index bb632e2eb912d..e983b47e39dc4 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -44,6 +44,7 @@
 #include "arrow/util/bitmap_ops.h"
 #include "arrow/util/bitmap_reader.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/float16.h"
 #include "arrow/util/key_value_metadata.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
@@ -59,6 +60,7 @@ using internal::BitmapReader;
 using internal::BitmapUInt64Reader;
 using internal::checked_cast;
 using internal::OptionalBitmapEquals;
+using util::Float16;
 
 // ----------------------------------------------------------------------
 // Public method implementations
@@ -95,6 +97,30 @@ struct FloatingEquality {
   const T epsilon;
 };
 
+// For half-float equality.
+template <typename Flags>
+struct FloatingEquality<uint16_t, Flags> {
+  explicit FloatingEquality(const EqualOptions& options)
+      : epsilon(static_cast<float>(options.atol())) {}
+
+  bool operator()(uint16_t x, uint16_t y) const {
+    Float16 f_x = Float16::FromBits(x);
+    Float16 f_y = Float16::FromBits(y);
+    if (x == y) {
+      return Flags::signed_zeros_equal || (f_x.signbit() == f_y.signbit());
+    }
+    if (Flags::nans_equal && f_x.is_nan() && f_y.is_nan()) {
+      return true;
+    }
+    if (Flags::approximate && (fabs(f_x.ToFloat() - f_y.ToFloat()) <= epsilon)) {
+      return true;
+    }
+    return false;
+  }
+
+  const float epsilon;
+};
+
 template <typename T, typename Visitor>
 struct FloatingEqualityDispatcher {
   const EqualOptions& options;
@@ -259,6 +285,8 @@ class RangeDataEqualsImpl {
 
   Status Visit(const DoubleType& type) { return CompareFloating(type); }
 
+  Status Visit(const HalfFloatType& type) { return CompareFloating(type); }
+
   // Also matches StringType
   Status Visit(const BinaryType& type) { return CompareBinary(type); }
 
@@ -863,6 +891,8 @@ class ScalarEqualsVisitor {
 
   Status Visit(const DoubleScalar& left) { return CompareFloating(left); }
 
+  Status Visit(const HalfFloatScalar& left) { return CompareFloating(left); }
+
   template <typename T>
   enable_if_t<std::is_base_of<BaseBinaryScalar, T>::value, Status> Visit(const T& left) {
     const auto& right = checked_cast<const BaseBinaryScalar&>(right_);
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
index 8cf5a04addb00..d8c4088759643 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
@@ -19,10 +19,13 @@
 #include "arrow/compute/cast_internal.h"
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/extension_type.h"
+#include "arrow/type_traits.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/float16.h"
 
 namespace arrow {
 
+using arrow::util::Float16;
 using internal::checked_cast;
 using internal::PrimitiveScalarBase;
 
@@ -47,6 +50,42 @@ struct CastPrimitive {
   }
 };
 
+// Converting floating types to half float.
+template <typename InType>
+struct CastPrimitive<HalfFloatType, InType, enable_if_physical_floating_point<InType>> {
+  static void Exec(const ArraySpan& arr, ArraySpan* out) {
+    using InT = typename InType::c_type;
+    const InT* in_values = arr.GetValues<InT>(1);
+    uint16_t* out_values = out->GetValues<uint16_t>(1);
+    for (int64_t i = 0; i < arr.length; ++i) {
+      *out_values++ = Float16(*in_values++).bits();
+    }
+  }
+};
+
+// Converting from half float to other floating types.
+template <>
+struct CastPrimitive<FloatType, HalfFloatType, enable_if_t<true>> {
+  static void Exec(const ArraySpan& arr, ArraySpan* out) {
+    const uint16_t* in_values = arr.GetValues<uint16_t>(1);
+    float* out_values = out->GetValues<float>(1);
+    for (int64_t i = 0; i < arr.length; ++i) {
+      *out_values++ = Float16::FromBits(*in_values++).ToFloat();
+    }
+  }
+};
+
+template <>
+struct CastPrimitive<DoubleType, HalfFloatType, enable_if_t<true>> {
+  static void Exec(const ArraySpan& arr, ArraySpan* out) {
+    const uint16_t* in_values = arr.GetValues<uint16_t>(1);
+    double* out_values = out->GetValues<double>(1);
+    for (int64_t i = 0; i < arr.length; ++i) {
+      *out_values++ = Float16::FromBits(*in_values++).ToDouble();
+    }
+  }
+};
+
 template <typename OutType, typename InType>
 struct CastPrimitive<OutType, InType, enable_if_t<std::is_same<OutType, InType>::value>> {
   // memcpy output
@@ -56,6 +95,33 @@ struct CastPrimitive<OutType, InType, enable_if_t<std::is_same<OutType, InType>:
   }
 };
 
+// Cast int to half float
+template <typename InType>
+struct CastPrimitive<HalfFloatType, InType, enable_if_integer<InType>> {
+  static void Exec(const ArraySpan& arr, ArraySpan* out) {
+    using InT = typename InType::c_type;
+    const InT* in_values = arr.GetValues<InT>(1);
+    uint16_t* out_values = out->GetValues<uint16_t>(1);
+    for (int64_t i = 0; i < arr.length; ++i) {
+      float temp = static_cast<float>(*in_values++);
+      *out_values++ = Float16(temp).bits();
+    }
+  }
+};
+
+// Cast half float to int
+template <typename OutType>
+struct CastPrimitive<OutType, HalfFloatType, enable_if_integer<OutType>> {
+  static void Exec(const ArraySpan& arr, ArraySpan* out) {
+    using OutT = typename OutType::c_type;
+    const uint16_t* in_values = arr.GetValues<uint16_t>(1);
+    OutT* out_values = out->GetValues<OutT>(1);
+    for (int64_t i = 0; i < arr.length; ++i) {
+      *out_values++ = static_cast<OutT>(Float16::FromBits(*in_values++).ToFloat());
+    }
+  }
+};
+
 template <typename InType>
 void CastNumberImpl(Type::type out_type, const ArraySpan& input, ArraySpan* out) {
   switch (out_type) {
@@ -79,6 +145,8 @@ void CastNumberImpl(Type::type out_type, const ArraySpan& input, ArraySpan* out)
       return CastPrimitive<FloatType, InType>::Exec(input, out);
     case Type::DOUBLE:
       return CastPrimitive<DoubleType, InType>::Exec(input, out);
+    case Type::HALF_FLOAT:
+      return CastPrimitive<HalfFloatType, InType>::Exec(input, out);
     default:
       break;
   }
@@ -109,6 +177,8 @@ void CastNumberToNumberUnsafe(Type::type in_type, Type::type out_type,
       return CastNumberImpl<FloatType>(out_type, input, out);
     case Type::DOUBLE:
       return CastNumberImpl<DoubleType>(out_type, input, out);
+    case Type::HALF_FLOAT:
+      return CastNumberImpl<HalfFloatType>(out_type, input, out);
     default:
       DCHECK(false);
       break;
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
index b054e57f04d12..3df86e7d6936c 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
@@ -23,6 +23,7 @@
 #include "arrow/compute/kernels/util_internal.h"
 #include "arrow/scalar.h"
 #include "arrow/util/bit_block_counter.h"
+#include "arrow/util/float16.h"
 #include "arrow/util/int_util.h"
 #include "arrow/util/value_parsing.h"
 
@@ -34,6 +35,7 @@ using internal::IntegersCanFit;
 using internal::OptionalBitBlockCounter;
 using internal::ParseValue;
 using internal::PrimitiveScalarBase;
+using util::Float16;
 
 namespace compute {
 namespace internal {
@@ -56,18 +58,37 @@ Status CastFloatingToFloating(KernelContext*, const ExecSpan& batch, ExecResult*
 
 // ----------------------------------------------------------------------
 // Implement fast safe floating point to integer cast
+//
+template <typename InType, typename OutType, typename InT = typename InType::c_type,
+          typename OutT = typename OutType::c_type>
+struct WasTruncated {
+  static bool Check(OutT out_val, InT in_val) {
+    return static_cast<InT>(out_val) != in_val;
+  }
+
+  static bool CheckMaybeNull(OutT out_val, InT in_val, bool is_valid) {
+    return is_valid && static_cast<InT>(out_val) != in_val;
+  }
+};
+
+// Half float to int
+template <typename OutType>
+struct WasTruncated<HalfFloatType, OutType> {
+  using OutT = typename OutType::c_type;
+  static bool Check(OutT out_val, uint16_t in_val) {
+    return static_cast<float>(out_val) != Float16::FromBits(in_val).ToFloat();
+  }
+
+  static bool CheckMaybeNull(OutT out_val, uint16_t in_val, bool is_valid) {
+    return is_valid && static_cast<float>(out_val) != Float16::FromBits(in_val).ToFloat();
+  }
+};
 
 // InType is a floating point type we are planning to cast to integer
 template <typename InType, typename OutType, typename InT = typename InType::c_type,
           typename OutT = typename OutType::c_type>
 ARROW_DISABLE_UBSAN("float-cast-overflow")
 Status CheckFloatTruncation(const ArraySpan& input, const ArraySpan& output) {
-  auto WasTruncated = [&](OutT out_val, InT in_val) -> bool {
-    return static_cast<InT>(out_val) != in_val;
-  };
-  auto WasTruncatedMaybeNull = [&](OutT out_val, InT in_val, bool is_valid) -> bool {
-    return is_valid && static_cast<InT>(out_val) != in_val;
-  };
   auto GetErrorMessage = [&](InT val) {
     return Status::Invalid("Float value ", val, " was truncated converting to ",
                            *output.type);
@@ -86,26 +107,28 @@ Status CheckFloatTruncation(const ArraySpan& input, const ArraySpan& output) {
     if (block.popcount == block.length) {
       // Fast path: branchless
       for (int64_t i = 0; i < block.length; ++i) {
-        block_out_of_bounds |= WasTruncated(out_data[i], in_data[i]);
+        block_out_of_bounds |=
+            WasTruncated<InType, OutType>::Check(out_data[i], in_data[i]);
       }
     } else if (block.popcount > 0) {
       // Indices have nulls, must only boundscheck non-null values
       for (int64_t i = 0; i < block.length; ++i) {
-        block_out_of_bounds |= WasTruncatedMaybeNull(
+        block_out_of_bounds |= WasTruncated<InType, OutType>::CheckMaybeNull(
             out_data[i], in_data[i], bit_util::GetBit(bitmap, offset_position + i));
       }
     }
     if (ARROW_PREDICT_FALSE(block_out_of_bounds)) {
       if (input.GetNullCount() > 0) {
         for (int64_t i = 0; i < block.length; ++i) {
-          if (WasTruncatedMaybeNull(out_data[i], in_data[i],
-                                    bit_util::GetBit(bitmap, offset_position + i))) {
+          if (WasTruncated<InType, OutType>::CheckMaybeNull(
+                  out_data[i], in_data[i],
+                  bit_util::GetBit(bitmap, offset_position + i))) {
             return GetErrorMessage(in_data[i]);
           }
         }
       } else {
         for (int64_t i = 0; i < block.length; ++i) {
-          if (WasTruncated(out_data[i], in_data[i])) {
+          if (WasTruncated<InType, OutType>::Check(out_data[i], in_data[i])) {
             return GetErrorMessage(in_data[i]);
           }
         }
@@ -151,6 +174,9 @@ Status CheckFloatToIntTruncation(const ExecValue& input, const ExecResult& outpu
       return CheckFloatToIntTruncationImpl<FloatType>(input.array, *output.array_span());
     case Type::DOUBLE:
       return CheckFloatToIntTruncationImpl<DoubleType>(input.array, *output.array_span());
+    case Type::HALF_FLOAT:
+      return CheckFloatToIntTruncationImpl<HalfFloatType>(input.array,
+                                                          *output.array_span());
     default:
       break;
   }
@@ -293,6 +319,15 @@ struct CastFunctor<
   }
 };
 
+template <>
+struct CastFunctor<HalfFloatType, StringType, enable_if_t<true>> {
+  static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+    return applicator::ScalarUnaryNotNull<HalfFloatType, StringType,
+                                          ParseString<HalfFloatType>>::Exec(ctx, batch,
+                                                                            out);
+  }
+};
+
 // ----------------------------------------------------------------------
 // Decimal to integer
 
@@ -689,6 +724,10 @@ std::shared_ptr<CastFunction> GetCastToInteger(std::string name) {
     DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, out_ty, CastFloatingToInteger));
   }
 
+  // Cast from half-float
+  DCHECK_OK(func->AddKernel(Type::HALF_FLOAT, {InputType(Type::HALF_FLOAT)}, out_ty,
+                            CastFloatingToInteger));
+
   // From other numbers to integer
   AddCommonNumberCasts<OutType>(out_ty, func.get());
 
@@ -715,6 +754,10 @@ std::shared_ptr<CastFunction> GetCastToFloating(std::string name) {
     DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, out_ty, CastFloatingToFloating));
   }
 
+  // From half-float to float/double
+  DCHECK_OK(func->AddKernel(Type::HALF_FLOAT, {InputType(Type::HALF_FLOAT)}, out_ty,
+                            CastFloatingToFloating));
+
   // From other numbers to floating point
   AddCommonNumberCasts<OutType>(out_ty, func.get());
 
@@ -723,6 +766,7 @@ std::shared_ptr<CastFunction> GetCastToFloating(std::string name) {
                             CastFunctor<OutType, Decimal128Type>::Exec));
   DCHECK_OK(func->AddKernel(Type::DECIMAL256, {InputType(Type::DECIMAL256)}, out_ty,
                             CastFunctor<OutType, Decimal256Type>::Exec));
+
   return func;
 }
 
@@ -795,6 +839,32 @@ std::shared_ptr<CastFunction> GetCastToDecimal256() {
   return func;
 }
 
+std::shared_ptr<CastFunction> GetCastToHalfFloat() {
+  // HalfFloat is a bit brain-damaged for now
+  auto func = std::make_shared<CastFunction>("func", Type::HALF_FLOAT);
+  AddCommonCasts(Type::HALF_FLOAT, float16(), func.get());
+
+  // Casts from integer to floating point
+  for (const std::shared_ptr<DataType>& in_ty : IntTypes()) {
+    DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty},
+                              TypeTraits<HalfFloatType>::type_singleton(),
+                              CastIntegerToFloating));
+  }
+
+  // Cast from other strings to half float.
+  for (const std::shared_ptr<DataType>& in_ty : BaseBinaryTypes()) {
+    auto exec = GenerateVarBinaryBase<CastFunctor, HalfFloatType>(*in_ty);
+    DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty},
+                              TypeTraits<HalfFloatType>::type_singleton(), exec));
+  }
+
+  DCHECK_OK(func.get()->AddKernel(Type::FLOAT, {InputType(Type::FLOAT)}, float16(),
+                                  CastFloatingToFloating));
+  DCHECK_OK(func.get()->AddKernel(Type::DOUBLE, {InputType(Type::DOUBLE)}, float16(),
+                                  CastFloatingToFloating));
+  return func;
+}
+
 }  // namespace
 
 std::vector<std::shared_ptr<CastFunction>> GetNumericCasts() {
@@ -830,13 +900,14 @@ std::vector<std::shared_ptr<CastFunction>> GetNumericCasts() {
   functions.push_back(GetCastToInteger<UInt64Type>("cast_uint64"));
 
   // HalfFloat is a bit brain-damaged for now
-  auto cast_half_float =
-      std::make_shared<CastFunction>("cast_half_float", Type::HALF_FLOAT);
-  AddCommonCasts(Type::HALF_FLOAT, float16(), cast_half_float.get());
+  auto cast_half_float = GetCastToHalfFloat();
   functions.push_back(cast_half_float);
 
-  functions.push_back(GetCastToFloating<FloatType>("cast_float"));
-  functions.push_back(GetCastToFloating<DoubleType>("cast_double"));
+  auto cast_float = GetCastToFloating<FloatType>("cast_float");
+  functions.push_back(cast_float);
+
+  auto cast_double = GetCastToFloating<DoubleType>("cast_double");
+  functions.push_back(cast_double);
 
   functions.push_back(GetCastToDecimal128());
   functions.push_back(GetCastToDecimal256());
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
index a6576e4e4c26f..3a8352a9b870f 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
@@ -437,6 +437,10 @@ void AddNumberToStringCasts(CastFunction* func) {
                         GenerateNumeric<NumericToStringCastFunctor, OutType>(*in_ty),
                         NullHandling::COMPUTED_NO_PREALLOCATE));
   }
+
+  DCHECK_OK(func->AddKernel(Type::HALF_FLOAT, {float16()}, out_ty,
+                            NumericToStringCastFunctor<OutType, HalfFloatType>::Exec,
+                            NullHandling::COMPUTED_NO_PREALLOCATE));
 }
 
 template <typename OutType>
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index a8acf68f66c8b..af62b4da2caa5 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -389,7 +389,7 @@ TEST(Cast, ToIntDowncastUnsafe) {
 }
 
 TEST(Cast, FloatingToInt) {
-  for (auto from : {float32(), float64()}) {
+  for (auto from : {float16(), float32(), float64()}) {
     for (auto to : {int32(), int64()}) {
       // float to int no truncation
       CheckCast(ArrayFromJSON(from, "[1.0, null, 0.0, -1.0, 5.0]"),
@@ -407,6 +407,15 @@ TEST(Cast, FloatingToInt) {
   }
 }
 
+TEST(Cast, FloatingToFloating) {
+  for (auto from : {float16(), float32(), float64()}) {
+    for (auto to : {float16(), float32(), float64()}) {
+      CheckCast(ArrayFromJSON(from, "[1.0, 0.0, -1.0, 5.0]"),
+                ArrayFromJSON(to, "[1.0, 0.0, -1.0, 5.0]"));
+    }
+  }
+}
+
 TEST(Cast, IntToFloating) {
   for (auto from : {uint32(), int32()}) {
     std::string two_24 = "[16777216, 16777217]";
@@ -2220,14 +2229,12 @@ TEST(Cast, IntToString) {
 }
 
 TEST(Cast, FloatingToString) {
-  for (auto string_type : {utf8(), large_utf8()}) {
-    CheckCast(
-        ArrayFromJSON(float32(), "[0.0, -0.0, 1.5, -Inf, Inf, NaN, null]"),
-        ArrayFromJSON(string_type, R"(["0", "-0", "1.5", "-inf", "inf", "nan", null])"));
-
-    CheckCast(
-        ArrayFromJSON(float64(), "[0.0, -0.0, 1.5, -Inf, Inf, NaN, null]"),
-        ArrayFromJSON(string_type, R"(["0", "-0", "1.5", "-inf", "inf", "nan", null])"));
+  for (auto float_type : {float16(), float32(), float64()}) {
+    for (auto string_type : {utf8(), large_utf8()}) {
+      CheckCast(ArrayFromJSON(float_type, "[0.0, -0.0, 1.5, -Inf, Inf, NaN, null]"),
+                ArrayFromJSON(string_type,
+                              R"(["0", "-0", "1.5", "-inf", "inf", "nan", null])"));
+    }
   }
 }
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index 26289a7f787e1..c7dbdef2436c3 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -1988,6 +1988,11 @@ TYPED_TEST(TestBaseBinaryKernels, ExtractRegexInvalid) {
 #endif
 
 TYPED_TEST(TestStringKernels, Strptime) {
+#ifdef __EMSCRIPTEN__
+  GTEST_SKIP() << "Skipping some strptime tests due to emscripten bug "
+                  "https://github.com/emscripten-core/emscripten/issues/20466";
+#endif
+
   std::string input1 = R"(["5/1/2020", null, null, "12/13/1900", null])";
   std::string input2 = R"(["5-1-2020", "12/13/1900"])";
   std::string input3 = R"(["5/1/2020", "AA/BB/CCCC"])";
@@ -2008,6 +2013,7 @@ TYPED_TEST(TestStringKernels, Strptime) {
   this->CheckUnary("strptime", input4, unit, output4, &options);
 
   options.format = "%m/%d/%Y %%z";
+  // emscripten bug https://github.com/emscripten-core/emscripten/issues/20466
   this->CheckUnary("strptime", input5, unit, output1, &options);
 
   options.error_is_null = false;
@@ -2019,6 +2025,11 @@ TYPED_TEST(TestStringKernels, Strptime) {
 }
 
 TYPED_TEST(TestStringKernels, StrptimeZoneOffset) {
+#ifdef __EMSCRIPTEN__
+  GTEST_SKIP()
+      << "Emscripten bug https://github.com/emscripten-core/emscripten/issues/20467";
+#endif
+
   if (!arrow::internal::kStrptimeSupportsZone) {
     GTEST_SKIP() << "strptime does not support %z on this platform";
   }
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
index 8dac6525fe2e6..8da8c760ea22b 100644
--- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
@@ -2143,7 +2143,10 @@ TEST_F(ScalarTemporalTest, StrftimeCLocale) {
 TEST_F(ScalarTemporalTest, StrftimeOtherLocale) {
 #ifdef _WIN32
   GTEST_SKIP() << "There is a known bug in strftime for locales on Windows (ARROW-15922)";
-#else
+#elif defined(__EMSCRIPTEN__)
+  GTEST_SKIP() << "Emscripten doesn't build with multiple locales as default";
+#endif
+
   if (!LocaleExists("fr_FR.UTF-8")) {
     GTEST_SKIP() << "locale 'fr_FR.UTF-8' doesn't exist on this system";
   }
@@ -2155,10 +2158,12 @@ TEST_F(ScalarTemporalTest, StrftimeOtherLocale) {
       ["01 janvier 1970 00:00:59,123", "18 août 2021 15:11:50,456", null])";
   CheckScalarUnary("strftime", timestamp(TimeUnit::MILLI, "UTC"), milliseconds, utf8(),
                    expected, &options);
-#endif
 }
 
 TEST_F(ScalarTemporalTest, StrftimeInvalidLocale) {
+#ifdef __EMSCRIPTEN__
+  GTEST_SKIP() << "Emscripten doesn't build with multiple locales as default";
+#endif
   auto options = StrftimeOptions("%d %B %Y %H:%M:%S", "nonexistent");
   const char* seconds = R"(["1970-01-01T00:00:59", null])";
   auto arr = ArrayFromJSON(timestamp(TimeUnit::SECOND, "UTC"), seconds);
diff --git a/cpp/src/arrow/config.cc b/cpp/src/arrow/config.cc
index 1f852e84d3d5c..9e32e5437325f 100644
--- a/cpp/src/arrow/config.cc
+++ b/cpp/src/arrow/config.cc
@@ -58,8 +58,6 @@ std::string MakeSimdLevelString(QueryFlagFunction&& query_flag) {
     return "avx";
   } else if (query_flag(CpuInfo::SSE4_2)) {
     return "sse4_2";
-  } else if (query_flag(CpuInfo::ASIMD)) {
-    return "neon";
   } else {
     return "none";
   }
diff --git a/cpp/src/arrow/dataset/dataset_writer.cc b/cpp/src/arrow/dataset/dataset_writer.cc
index 34731d19ab3eb..754386275d60c 100644
--- a/cpp/src/arrow/dataset/dataset_writer.cc
+++ b/cpp/src/arrow/dataset/dataset_writer.cc
@@ -515,7 +515,7 @@ class DatasetWriter::DatasetWriterImpl {
                     std::function<void()> finish_callback, uint64_t max_rows_queued)
       : scheduler_(scheduler),
         write_tasks_(util::MakeThrottledAsyncTaskGroup(
-            scheduler_, 1, /*queue=*/nullptr,
+            scheduler_, /*max_concurrent_cost=*/1, /*queue=*/nullptr,
             [finish_callback = std::move(finish_callback)] {
               finish_callback();
               return Status::OK();
@@ -541,6 +541,23 @@ class DatasetWriter::DatasetWriterImpl {
     }
   }
 
+  void ResumeIfNeeded() {
+    if (!paused_) {
+      return;
+    }
+    bool needs_resume = false;
+    {
+      std::lock_guard lg(mutex_);
+      if (!write_tasks_ || write_tasks_->QueueSize() == 0) {
+        needs_resume = true;
+      }
+    }
+    if (needs_resume) {
+      paused_ = false;
+      resume_callback_();
+    }
+  }
+
   void WriteRecordBatch(std::shared_ptr<RecordBatch> batch, const std::string& directory,
                         const std::string& prefix) {
     write_tasks_->AddSimpleTask(
@@ -549,11 +566,14 @@ class DatasetWriter::DatasetWriterImpl {
               WriteAndCheckBackpressure(std::move(batch), directory, prefix);
           if (!has_room.is_finished()) {
             // We don't have to worry about sequencing backpressure here since
-            // task_group_ serves as our sequencer.  If batches continue to arrive after
-            // we pause they will queue up in task_group_ until we free up and call
-            // Resume
+            // task_group_ serves as our sequencer.  If batches continue to arrive
+            // after we pause they will queue up in task_group_ until we free up and
+            // call Resume
             pause_callback_();
-            return has_room.Then([this] { resume_callback_(); });
+            paused_ = true;
+            return has_room.Then([this] { ResumeIfNeeded(); });
+          } else {
+            ResumeIfNeeded();
           }
           return has_room;
         },
@@ -571,6 +591,9 @@ class DatasetWriter::DatasetWriterImpl {
           return Future<>::MakeFinished();
         },
         "DatasetWriter::FinishAll"sv);
+    // Reset write_tasks_ to signal that we are done adding tasks, this will allow
+    // us to invoke the finish callback once the tasks wrap up.
+    std::lock_guard lg(mutex_);
     write_tasks_.reset();
   }
 
@@ -660,7 +683,7 @@ class DatasetWriter::DatasetWriterImpl {
   }
 
   util::AsyncTaskScheduler* scheduler_ = nullptr;
-  std::unique_ptr<util::AsyncTaskScheduler> write_tasks_;
+  std::unique_ptr<util::ThrottledAsyncTaskScheduler> write_tasks_;
   Future<> finish_fut_ = Future<>::Make();
   FileSystemDatasetWriteOptions write_options_;
   DatasetWriterState writer_state_;
@@ -670,6 +693,7 @@ class DatasetWriter::DatasetWriterImpl {
   std::unordered_map<std::string, std::shared_ptr<DatasetWriterDirectoryQueue>>
       directory_queues_;
   std::mutex mutex_;
+  bool paused_ = false;
   Status err_;
 };
 
diff --git a/cpp/src/arrow/filesystem/localfs_test.cc b/cpp/src/arrow/filesystem/localfs_test.cc
index f90833a88d118..b76c7ebad45db 100644
--- a/cpp/src/arrow/filesystem/localfs_test.cc
+++ b/cpp/src/arrow/filesystem/localfs_test.cc
@@ -138,6 +138,9 @@ TEST(FileSystemFromUri, LinkedRegisteredFactory) {
 }
 
 TEST(FileSystemFromUri, LoadedRegisteredFactory) {
+#ifdef __EMSCRIPTEN__
+  GTEST_SKIP() << "Emscripten dynamic library testing disabled";
+#endif
   // Since the registrar's definition is in libarrow_filesystem_example.so,
   // its factory will be registered only after the library is dynamically loaded.
   std::string path;
diff --git a/cpp/src/arrow/io/compressed.cc b/cpp/src/arrow/io/compressed.cc
index d06101748dc0c..5faa4d095eb1e 100644
--- a/cpp/src/arrow/io/compressed.cc
+++ b/cpp/src/arrow/io/compressed.cc
@@ -405,7 +405,9 @@ class CompressedInputStream::Impl {
     ARROW_ASSIGN_OR_RAISE(auto buf, AllocateResizableBuffer(nbytes, pool_));
     ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, Read(nbytes, buf->mutable_data()));
     RETURN_NOT_OK(buf->Resize(bytes_read));
-    return buf;
+    // Using std::move because the some compiler might has issue below:
+    // https://wg21.cmeerw.net/cwg/issue1579
+    return std::move(buf);
   }
 
   const std::shared_ptr<InputStream>& raw() const { return raw_; }
diff --git a/cpp/src/arrow/io/compressed_benchmark.cc b/cpp/src/arrow/io/compressed_benchmark.cc
index 52a30d8cb0887..ae9ecd57143f3 100644
--- a/cpp/src/arrow/io/compressed_benchmark.cc
+++ b/cpp/src/arrow/io/compressed_benchmark.cc
@@ -170,6 +170,7 @@ static void CompressedInputStreamNonZeroCopyBufferReturnedByCallee(
                                  BufferReadMode::ReturnedByCallee>(state, kCompression);
 }
 
+#ifdef ARROW_WITH_LZ4
 static void CompressedInputArguments(::benchmark::internal::Benchmark* b) {
   b->ArgNames({"num_bytes", "batch_size"})
       ->Args({8 * 1024, 8 * 1024})
@@ -180,7 +181,6 @@ static void CompressedInputArguments(::benchmark::internal::Benchmark* b) {
       ->Args({1024 * 1024, 1024 * 1024});
 }
 
-#ifdef ARROW_WITH_LZ4
 // Benchmark LZ4 because it's lightweight, which makes benchmarking focused on the
 // overhead of the compression input stream.
 BENCHMARK_TEMPLATE(CompressedInputStreamZeroCopyBufferProvidedByCaller,
diff --git a/cpp/src/arrow/io/file.cc b/cpp/src/arrow/io/file.cc
index 00426f9957b1f..cc3a5187059e9 100644
--- a/cpp/src/arrow/io/file.cc
+++ b/cpp/src/arrow/io/file.cc
@@ -398,8 +398,14 @@ class MemoryMappedFile::MemoryMap
 
     ~Region() {
       if (data_ != nullptr) {
+#ifndef __EMSCRIPTEN__
         int result = munmap(data(), static_cast<size_t>(size_));
+        // emscripten erroneously reports failures in munmap
+        // https://github.com/emscripten-core/emscripten/issues/20459
         ARROW_CHECK_EQ(result, 0) << "munmap failed";
+#else
+        munmap(data(), static_cast<size_t>(size_));
+#endif
       }
     }
 
diff --git a/cpp/src/arrow/io/file_test.cc b/cpp/src/arrow/io/file_test.cc
index e7e7ba949c9fd..af414891b950e 100644
--- a/cpp/src/arrow/io/file_test.cc
+++ b/cpp/src/arrow/io/file_test.cc
@@ -42,6 +42,7 @@
 #include "arrow/status.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/util.h"
+#include "arrow/util/config.h"
 #include "arrow/util/future.h"
 #include "arrow/util/io_util.h"
 
@@ -486,6 +487,10 @@ TEST_F(TestReadableFile, CustomMemoryPool) {
 }
 
 TEST_F(TestReadableFile, ThreadSafety) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+
   std::string data = "foobar";
   {
     std::ofstream stream;
@@ -540,6 +545,9 @@ class TestPipeIO : public ::testing::Test {
 };
 
 TEST_F(TestPipeIO, TestWrite) {
+#ifdef __EMSCRIPTEN__
+  GTEST_SKIP() << "Pipes not supported on Emscripten";
+#endif
   std::string data1 = "test", data2 = "data!";
   std::shared_ptr<FileOutputStream> file;
   uint8_t buffer[10];
@@ -570,6 +578,9 @@ TEST_F(TestPipeIO, TestWrite) {
 }
 
 TEST_F(TestPipeIO, ReadableFileFails) {
+#ifdef __EMSCRIPTEN__
+  GTEST_SKIP() << "Pipes not supported on Emscripten";
+#endif
   // ReadableFile fails on non-seekable fd
   ASSERT_RAISES(IOError, ReadableFile::Open(pipe_.rfd.fd()));
 }
@@ -1048,6 +1059,10 @@ TEST_F(TestMemoryMappedFile, CastableToFileInterface) {
 }
 
 TEST_F(TestMemoryMappedFile, ThreadSafety) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+
   std::string data = "foobar";
   std::string path = TempFile("ipc-multithreading-test");
   CreateFile(path, static_cast<int>(data.size()));
diff --git a/cpp/src/arrow/ipc/json_simple.cc b/cpp/src/arrow/ipc/json_simple.cc
index ceeabe01677ed..9fd449831c980 100644
--- a/cpp/src/arrow/ipc/json_simple.cc
+++ b/cpp/src/arrow/ipc/json_simple.cc
@@ -36,6 +36,7 @@
 #include "arrow/type_traits.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/decimal.h"
+#include "arrow/util/float16.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/value_parsing.h"
 
@@ -52,6 +53,7 @@ namespace rj = arrow::rapidjson;
 namespace arrow {
 
 using internal::ParseValue;
+using util::Float16;
 
 namespace ipc {
 namespace internal {
@@ -232,9 +234,9 @@ enable_if_physical_signed_integer<T, Status> ConvertNumber(const rj::Value& json
 
 // Convert single unsigned integer value
 template <typename T>
-enable_if_physical_unsigned_integer<T, Status> ConvertNumber(const rj::Value& json_obj,
-                                                             const DataType& type,
-                                                             typename T::c_type* out) {
+enable_if_unsigned_integer<T, Status> ConvertNumber(const rj::Value& json_obj,
+                                                    const DataType& type,
+                                                    typename T::c_type* out) {
   if (json_obj.IsUint64()) {
     uint64_t v64 = json_obj.GetUint64();
     *out = static_cast<typename T::c_type>(v64);
@@ -249,6 +251,30 @@ enable_if_physical_unsigned_integer<T, Status> ConvertNumber(const rj::Value& js
   }
 }
 
+// Convert float16/HalfFloatType
+template <typename T>
+enable_if_half_float<T, Status> ConvertNumber(const rj::Value& json_obj,
+                                              const DataType& type, uint16_t* out) {
+  if (json_obj.IsDouble()) {
+    double f64 = json_obj.GetDouble();
+    *out = Float16(f64).bits();
+    return Status::OK();
+  } else if (json_obj.IsUint()) {
+    uint32_t u32t = json_obj.GetUint();
+    double f64 = static_cast<double>(u32t);
+    *out = Float16(f64).bits();
+    return Status::OK();
+  } else if (json_obj.IsInt()) {
+    int32_t i32t = json_obj.GetInt();
+    double f64 = static_cast<double>(i32t);
+    *out = Float16(f64).bits();
+    return Status::OK();
+  } else {
+    *out = static_cast<uint16_t>(0);
+    return JSONTypeError("unsigned int", json_obj.GetType());
+  }
+}
+
 // Convert single floating point value
 template <typename T>
 enable_if_physical_floating_point<T, Status> ConvertNumber(const rj::Value& json_obj,
diff --git a/cpp/src/arrow/ipc/json_simple_test.cc b/cpp/src/arrow/ipc/json_simple_test.cc
index ea3a9ae1a14a9..b3f7fc5b3458b 100644
--- a/cpp/src/arrow/ipc/json_simple_test.cc
+++ b/cpp/src/arrow/ipc/json_simple_test.cc
@@ -44,6 +44,7 @@
 #include "arrow/util/bitmap_builders.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/decimal.h"
+#include "arrow/util/float16.h"
 
 #if defined(_MSC_VER)
 // "warning C4307: '+': integral constant overflow"
@@ -51,6 +52,9 @@
 #endif
 
 namespace arrow {
+
+using util::Float16;
+
 namespace ipc {
 namespace internal {
 namespace json {
@@ -185,6 +189,21 @@ class TestIntegers : public ::testing::Test {
 
 TYPED_TEST_SUITE_P(TestIntegers);
 
+template <typename DataType>
+std::vector<typename DataType::c_type> TestIntegersMutateIfNeeded(
+    std::vector<typename DataType::c_type> data) {
+  return data;
+}
+
+// TODO: This works, but is it the right way to do this?
+template <>
+std::vector<HalfFloatType::c_type> TestIntegersMutateIfNeeded<HalfFloatType>(
+    std::vector<HalfFloatType::c_type> data) {
+  std::for_each(data.begin(), data.end(),
+                [](HalfFloatType::c_type& value) { value = Float16(value).bits(); });
+  return data;
+}
+
 TYPED_TEST_P(TestIntegers, Basics) {
   using T = TypeParam;
   using c_type = typename T::c_type;
@@ -193,16 +212,17 @@ TYPED_TEST_P(TestIntegers, Basics) {
   auto type = this->type();
 
   AssertJSONArray<T>(type, "[]", {});
-  AssertJSONArray<T>(type, "[4, 0, 5]", {4, 0, 5});
-  AssertJSONArray<T>(type, "[4, null, 5]", {true, false, true}, {4, 0, 5});
+  AssertJSONArray<T>(type, "[4, 0, 5]", TestIntegersMutateIfNeeded<T>({4, 0, 5}));
+  AssertJSONArray<T>(type, "[4, null, 5]", {true, false, true},
+                     TestIntegersMutateIfNeeded<T>({4, 0, 5}));
 
   // Test limits
   const auto min_val = std::numeric_limits<c_type>::min();
   const auto max_val = std::numeric_limits<c_type>::max();
   std::string json_string = JSONArray(0, 1, min_val);
-  AssertJSONArray<T>(type, json_string, {0, 1, min_val});
+  AssertJSONArray<T>(type, json_string, TestIntegersMutateIfNeeded<T>({0, 1, min_val}));
   json_string = JSONArray(0, 1, max_val);
-  AssertJSONArray<T>(type, json_string, {0, 1, max_val});
+  AssertJSONArray<T>(type, json_string, TestIntegersMutateIfNeeded<T>({0, 1, max_val}));
 }
 
 TYPED_TEST_P(TestIntegers, Errors) {
@@ -269,7 +289,12 @@ INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt8, TestIntegers, UInt8Type);
 INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt16, TestIntegers, UInt16Type);
 INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt32, TestIntegers, UInt32Type);
 INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt64, TestIntegers, UInt64Type);
-INSTANTIATE_TYPED_TEST_SUITE_P(TestHalfFloat, TestIntegers, HalfFloatType);
+// FIXME: I understand that HalfFloatType is backed by a uint16_t, but does it
+// make sense to run this test over it?
+// The way ConvertNumber for HalfFloatType is currently written, it allows the
+// conversion of floating point notation to a half float, which causes failures
+// in this test, one example is asserting 0.0 cannot be parsed as a half float.
+// INSTANTIATE_TYPED_TEST_SUITE_P(TestHalfFloat, TestIntegers, HalfFloatType);
 
 template <typename T>
 class TestStrings : public ::testing::Test {
diff --git a/cpp/src/arrow/ipc/read_write_test.cc b/cpp/src/arrow/ipc/read_write_test.cc
index c5075299a3e35..ff7838cc39d72 100644
--- a/cpp/src/arrow/ipc/read_write_test.cc
+++ b/cpp/src/arrow/ipc/read_write_test.cc
@@ -1046,6 +1046,9 @@ class RecursionLimits : public ::testing::Test, public io::MemoryMapFixture {
 };
 
 TEST_F(RecursionLimits, WriteLimit) {
+#ifdef __EMSCRIPTEN__
+  GTEST_SKIP() << "This crashes the Emscripten runtime.";
+#endif
   int32_t metadata_length = -1;
   int64_t body_length = -1;
   std::shared_ptr<Schema> schema;
@@ -1078,6 +1081,10 @@ TEST_F(RecursionLimits, ReadLimit) {
 // Test fails with a structured exception on Windows + Debug
 #if !defined(_WIN32) || defined(NDEBUG)
 TEST_F(RecursionLimits, StressLimit) {
+#ifdef __EMSCRIPTEN__
+  GTEST_SKIP() << "This crashes the Emscripten runtime.";
+#endif
+
   auto CheckDepth = [this](int recursion_depth, bool* it_works) {
     int32_t metadata_length = -1;
     int64_t body_length = -1;
diff --git a/cpp/src/arrow/record_batch_test.cc b/cpp/src/arrow/record_batch_test.cc
index 7e0eb1d460555..95f601465b440 100644
--- a/cpp/src/arrow/record_batch_test.cc
+++ b/cpp/src/arrow/record_batch_test.cc
@@ -36,11 +36,14 @@
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/type.h"
+#include "arrow/util/float16.h"
 #include "arrow/util/iterator.h"
 #include "arrow/util/key_value_metadata.h"
 
 namespace arrow {
 
+using util::Float16;
+
 class TestRecordBatch : public ::testing::Test {};
 
 TEST_F(TestRecordBatch, Equals) {
diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h
index ed66c9367dc36..8caf4400fe86d 100644
--- a/cpp/src/arrow/type_traits.h
+++ b/cpp/src/arrow/type_traits.h
@@ -305,6 +305,7 @@ struct TypeTraits<HalfFloatType> {
   using BuilderType = HalfFloatBuilder;
   using ScalarType = HalfFloatScalar;
   using TensorType = HalfFloatTensor;
+  using CType = uint16_t;
 
   static constexpr int64_t bytes_required(int64_t elements) {
     return elements * static_cast<int64_t>(sizeof(uint16_t));
diff --git a/cpp/src/arrow/util/async_generator_test.cc b/cpp/src/arrow/util/async_generator_test.cc
index 2b74313db279b..afb03b67209a6 100644
--- a/cpp/src/arrow/util/async_generator_test.cc
+++ b/cpp/src/arrow/util/async_generator_test.cc
@@ -399,6 +399,10 @@ TEST(TestAsyncUtil, MapParallelStress) {
 }
 
 TEST(TestAsyncUtil, MapQueuingFailStress) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+
   constexpr int NTASKS = 10;
   constexpr int NITEMS = 10;
   for (bool slow : {true, false}) {
@@ -1872,6 +1876,10 @@ TEST(PushGenerator, DanglingProducer) {
 }
 
 TEST(PushGenerator, Stress) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+
   const int NTHREADS = 20;
   const int NVALUES = 2000;
   const int NFUTURES = NVALUES + 100;
diff --git a/cpp/src/arrow/util/async_util.cc b/cpp/src/arrow/util/async_util.cc
index 63e27bfbe5773..fbd45eadac2cd 100644
--- a/cpp/src/arrow/util/async_util.cc
+++ b/cpp/src/arrow/util/async_util.cc
@@ -118,6 +118,8 @@ class FifoQueue : public ThrottledAsyncTaskScheduler::Queue {
 
   void Purge() override { tasks_.clear(); }
 
+  std::size_t Size() const override { return tasks_.size(); }
+
  private:
   std::list<std::unique_ptr<Task>> tasks_;
 };
@@ -332,6 +334,10 @@ class ThrottledAsyncTaskSchedulerImpl
 
   void Pause() override { throttle_->Pause(); }
   void Resume() override { throttle_->Resume(); }
+  std::size_t QueueSize() override {
+    std::lock_guard lk(mutex_);
+    return queue_->Size();
+  }
   const util::tracing::Span& span() const override { return target_->span(); }
 
  private:
@@ -499,6 +505,7 @@ class ThrottledAsyncTaskGroup : public ThrottledAsyncTaskScheduler {
       : throttle_(std::move(throttle)), task_group_(std::move(task_group)) {}
   void Pause() override { throttle_->Pause(); }
   void Resume() override { throttle_->Resume(); }
+  std::size_t QueueSize() override { return throttle_->QueueSize(); }
   const util::tracing::Span& span() const override { return task_group_->span(); }
   bool AddTask(std::unique_ptr<Task> task) override {
     return task_group_->AddTask(std::move(task));
diff --git a/cpp/src/arrow/util/async_util.h b/cpp/src/arrow/util/async_util.h
index 7a675da59facd..d9ed63bdbce22 100644
--- a/cpp/src/arrow/util/async_util.h
+++ b/cpp/src/arrow/util/async_util.h
@@ -226,6 +226,7 @@ class ARROW_EXPORT ThrottledAsyncTaskScheduler : public AsyncTaskScheduler {
     virtual bool Empty() = 0;
     /// Purge the queue of all items
     virtual void Purge() = 0;
+    virtual std::size_t Size() const = 0;
   };
 
   class Throttle {
@@ -277,6 +278,8 @@ class ARROW_EXPORT ThrottledAsyncTaskScheduler : public AsyncTaskScheduler {
   /// Allows task to be submitted again.  If there is a max_concurrent_cost limit then
   /// it will still apply.
   virtual void Resume() = 0;
+  /// Return the number of tasks queued but not yet submitted
+  virtual std::size_t QueueSize() = 0;
 
   /// Create a throttled view of a scheduler
   ///
diff --git a/cpp/src/arrow/util/async_util_test.cc b/cpp/src/arrow/util/async_util_test.cc
index 313ca91912335..1f9aad453e9c4 100644
--- a/cpp/src/arrow/util/async_util_test.cc
+++ b/cpp/src/arrow/util/async_util_test.cc
@@ -680,6 +680,7 @@ class PriorityQueue : public ThrottledAsyncTaskScheduler::Queue {
       queue_.pop();
     }
   }
+  std::size_t Size() const { return queue_.size(); }
 
  private:
   std::priority_queue<TaskWithPriority*, std::vector<TaskWithPriority*>,
diff --git a/cpp/src/arrow/util/atfork_test.cc b/cpp/src/arrow/util/atfork_test.cc
index 004e28e19514a..750f4d138793b 100644
--- a/cpp/src/arrow/util/atfork_test.cc
+++ b/cpp/src/arrow/util/atfork_test.cc
@@ -35,6 +35,7 @@
 
 #include "arrow/testing/gtest_util.h"
 #include "arrow/util/atfork_internal.h"
+#include "arrow/util/config.h"
 #include "arrow/util/io_util.h"
 #include "arrow/util/logging.h"
 
@@ -109,6 +110,10 @@ class TestAtFork : public ::testing::Test {
 #ifndef _WIN32
 
 TEST_F(TestAtFork, EmptyHandlers) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+
   auto handlers = std::make_shared<AtForkHandler>();
 
   RegisterAtFork(handlers);
@@ -130,6 +135,10 @@ TEST_F(TestAtFork, EmptyHandlers) {
 }
 
 TEST_F(TestAtFork, SingleThread) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+
   auto handlers1 = std::make_shared<AtForkHandler>(PushBefore(1), PushParentAfter(11),
                                                    PushChildAfter(21));
   auto handlers2 = std::make_shared<AtForkHandler>(PushBefore(2), PushParentAfter(12),
@@ -188,6 +197,10 @@ TEST_F(TestAtFork, SingleThread) {
 // https://github.com/google/sanitizers/issues/950.
 
 TEST_F(TestAtFork, MultipleThreads) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+
   const int kNumThreads = 5;
   const int kNumIterations = 40;
   const int kParentAfterAddend = 10000;
@@ -245,6 +258,9 @@ TEST_F(TestAtFork, NestedChild) {
 #ifdef __APPLE__
   GTEST_SKIP() << "Nested fork is not supported on macOS";
 #endif
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
 
   auto handlers1 = std::make_shared<AtForkHandler>(PushBefore(1), PushParentAfter(11),
                                                    PushChildAfter(21));
@@ -286,6 +302,10 @@ TEST_F(TestAtFork, NestedChild) {
 
 #ifdef _WIN32
 TEST_F(TestAtFork, NoOp) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+
   auto handlers = std::make_shared<AtForkHandler>(PushBefore(1), PushParentAfter(11),
                                                   PushChildAfter(21));
 
diff --git a/cpp/src/arrow/util/cache_test.cc b/cpp/src/arrow/util/cache_test.cc
index 6b71baa369b9b..264bfe68ec5d2 100644
--- a/cpp/src/arrow/util/cache_test.cc
+++ b/cpp/src/arrow/util/cache_test.cc
@@ -26,6 +26,7 @@
 
 #include "arrow/testing/gtest_util.h"
 #include "arrow/util/cache_internal.h"
+#include "arrow/util/config.h"
 
 namespace arrow {
 namespace internal {
@@ -255,6 +256,10 @@ TYPED_TEST(TestMemoizeLru, Basics) { this->TestBasics(); }
 class TestMemoizeLruThreadSafe : public TestMemoizeLru<MemoizeLruFactory> {};
 
 TEST_F(TestMemoizeLruThreadSafe, Threads) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+
   using V = IntValue;
   Callable c;
 
diff --git a/cpp/src/arrow/util/cancel_test.cc b/cpp/src/arrow/util/cancel_test.cc
index 45f6cde4f5579..713418f15a0cc 100644
--- a/cpp/src/arrow/util/cancel_test.cc
+++ b/cpp/src/arrow/util/cancel_test.cc
@@ -232,6 +232,10 @@ class SignalCancelTest : public CancelTest {
 };
 
 TEST_F(SignalCancelTest, Register) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+
   RegisterHandler();
 
   TriggerSignal();
@@ -239,6 +243,10 @@ TEST_F(SignalCancelTest, Register) {
 }
 
 TEST_F(SignalCancelTest, RegisterUnregister) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+
   // The signal stop source was set up but no handler was registered,
   // so the token shouldn't be signalled.
   TriggerSignal();
@@ -261,6 +269,10 @@ TEST_F(SignalCancelTest, RegisterUnregister) {
 #if !(defined(_WIN32) || defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) || \
       defined(THREAD_SANITIZER))
 TEST_F(SignalCancelTest, ForkSafetyUnregisteredHandlers) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+
   RunInChild([&]() {
     // Child
     TriggerSignal();
@@ -284,6 +296,10 @@ TEST_F(SignalCancelTest, ForkSafetyUnregisteredHandlers) {
 }
 
 TEST_F(SignalCancelTest, ForkSafetyRegisteredHandlers) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+
   RegisterHandler();
 
   RunInChild([&]() {
@@ -307,6 +323,10 @@ TEST_F(SignalCancelTest, ForkSafetyRegisteredHandlers) {
 #endif
 
 TEST_F(CancelTest, ThreadedPollSuccess) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+
   constexpr int kNumThreads = 10;
 
   std::vector<Status> results(kNumThreads);
@@ -339,6 +359,10 @@ TEST_F(CancelTest, ThreadedPollSuccess) {
 }
 
 TEST_F(CancelTest, ThreadedPollCancel) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+
   constexpr int kNumThreads = 10;
 
   std::vector<Status> results(kNumThreads);
diff --git a/cpp/src/arrow/util/counting_semaphore_test.cc b/cpp/src/arrow/util/counting_semaphore_test.cc
index a5fa9f6bde891..4de11ce852a03 100644
--- a/cpp/src/arrow/util/counting_semaphore_test.cc
+++ b/cpp/src/arrow/util/counting_semaphore_test.cc
@@ -22,12 +22,17 @@
 #include <vector>
 
 #include "arrow/testing/gtest_util.h"
+#include "arrow/util/config.h"
 #include "gtest/gtest.h"
 
 namespace arrow {
 namespace util {
 
 TEST(CountingSemaphore, Basic) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+
   CountingSemaphore semaphore;
   std::atomic<bool> acquired{false};
   std::atomic<bool> started{false};
@@ -50,6 +55,10 @@ TEST(CountingSemaphore, Basic) {
 }
 
 TEST(CountingSemaphore, CloseAborts) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+
   CountingSemaphore semaphore;
   std::atomic<bool> cleanup{false};
   std::thread acquirer([&] {
@@ -64,6 +73,10 @@ TEST(CountingSemaphore, CloseAborts) {
 }
 
 TEST(CountingSemaphore, Stress) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+
   constexpr uint32_t NTHREADS = 10;
   CountingSemaphore semaphore;
   std::vector<uint32_t> max_allowed_cases = {1, 3};
diff --git a/cpp/src/arrow/util/formatting.cc b/cpp/src/arrow/util/formatting.cc
index c16d42ce5cfe2..c5a7e03f8573a 100644
--- a/cpp/src/arrow/util/formatting.cc
+++ b/cpp/src/arrow/util/formatting.cc
@@ -18,10 +18,12 @@
 #include "arrow/util/formatting.h"
 #include "arrow/util/config.h"
 #include "arrow/util/double_conversion.h"
+#include "arrow/util/float16.h"
 #include "arrow/util/logging.h"
 
 namespace arrow {
 
+using util::Float16;
 using util::double_conversion::DoubleToStringConverter;
 
 static constexpr int kMinBufferSize = DoubleToStringConverter::kBase10MaximalLength + 1;
@@ -87,5 +89,14 @@ int FloatToStringFormatter::FormatFloat(double v, char* out_buffer, int out_size
   return builder.position();
 }
 
+int FloatToStringFormatter::FormatFloat(uint16_t v, char* out_buffer, int out_size) {
+  DCHECK_GE(out_size, kMinBufferSize);
+  util::double_conversion::StringBuilder builder(out_buffer, out_size);
+  bool result = impl_->converter_.ToShortest(Float16::FromBits(v).ToFloat(), &builder);
+  DCHECK(result);
+  ARROW_UNUSED(result);
+  return builder.position();
+}
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/formatting.h b/cpp/src/arrow/util/formatting.h
index 71bae74629e35..dd9af907ecc37 100644
--- a/cpp/src/arrow/util/formatting.h
+++ b/cpp/src/arrow/util/formatting.h
@@ -126,8 +126,10 @@ namespace detail {
 ARROW_EXPORT extern const char digit_pairs[];
 
 // Based on fmtlib's format_int class:
-// Write digits from right to left into a stack allocated buffer
-inline void FormatOneChar(char c, char** cursor) { *--*cursor = c; }
+// Write digits from right to left into a stack allocated buffer.
+// \pre *cursor points to the byte after the one that will be written.
+// \post *cursor points to the byte that was written.
+inline void FormatOneChar(char c, char** cursor) { *(--(*cursor)) = c; }
 
 template <typename Int>
 void FormatOneDigit(Int value, char** cursor) {
@@ -268,6 +270,7 @@ class ARROW_EXPORT FloatToStringFormatter {
   // Returns the number of characters written
   int FormatFloat(float v, char* out_buffer, int out_size);
   int FormatFloat(double v, char* out_buffer, int out_size);
+  int FormatFloat(uint16_t v, char* out_buffer, int out_size);
 
  protected:
   struct Impl;
@@ -301,6 +304,12 @@ class FloatToStringFormatterMixin : public FloatToStringFormatter {
   }
 };
 
+template <>
+class StringFormatter<HalfFloatType> : public FloatToStringFormatterMixin<HalfFloatType> {
+ public:
+  using FloatToStringFormatterMixin::FloatToStringFormatterMixin;
+};
+
 template <>
 class StringFormatter<FloatType> : public FloatToStringFormatterMixin<FloatType> {
  public:
@@ -319,6 +328,7 @@ class StringFormatter<DoubleType> : public FloatToStringFormatterMixin<DoubleTyp
 namespace detail {
 
 constexpr size_t BufferSizeYYYY_MM_DD() {
+  // "-"? "99999-12-31"
   return 1 + detail::Digits10(99999) + 1 + detail::Digits10(12) + 1 +
          detail::Digits10(31);
 }
@@ -345,6 +355,7 @@ inline void FormatYYYY_MM_DD(arrow_vendored::date::year_month_day ymd, char** cu
 
 template <typename Duration>
 constexpr size_t BufferSizeHH_MM_SS() {
+  // "23:59:59" ("." "9"+)?
   return detail::Digits10(23) + 1 + detail::Digits10(59) + 1 + detail::Digits10(59) + 1 +
          detail::Digits10(Duration::period::den) - 1;
 }
@@ -498,8 +509,9 @@ class StringFormatter<TimestampType> {
       timepoint_days -= days(1);
     }
 
+    // YYYY_MM_DD " " HH_MM_SS "Z"?
     constexpr size_t buffer_size =
-        detail::BufferSizeYYYY_MM_DD() + 1 + detail::BufferSizeHH_MM_SS<Duration>();
+        detail::BufferSizeYYYY_MM_DD() + 1 + detail::BufferSizeHH_MM_SS<Duration>() + 1;
 
     std::array<char, buffer_size> buffer;
     char* cursor = buffer.data() + buffer_size;
diff --git a/cpp/src/arrow/util/formatting_util_test.cc b/cpp/src/arrow/util/formatting_util_test.cc
index 13f57a495d639..fcbeec347d32a 100644
--- a/cpp/src/arrow/util/formatting_util_test.cc
+++ b/cpp/src/arrow/util/formatting_util_test.cc
@@ -533,6 +533,14 @@ TEST(Formatting, Timestamp) {
     }
   }
 
+  {
+    constexpr int64_t kMillisInDay = 24 * 60 * 60 * 1000;
+    auto ty = timestamp(TimeUnit::MILLI, "+01:00");
+    StringFormatter<TimestampType> formatter(ty.get());
+    AssertFormatting(formatter, -15000 * 365 * kMillisInDay + 1,
+                     "-13021-12-17 00:00:00.001Z");
+  }
+
   {
     auto ty = timestamp(TimeUnit::MILLI, "Pacific/Maruesas");
     StringFormatter<TimestampType> formatter(ty.get());
diff --git a/cpp/src/arrow/util/future_test.cc b/cpp/src/arrow/util/future_test.cc
index 87891e48efa5e..2ed2b69aed524 100644
--- a/cpp/src/arrow/util/future_test.cc
+++ b/cpp/src/arrow/util/future_test.cc
@@ -415,6 +415,10 @@ TEST(FutureRefTest, HeadRemoved) {
 }
 
 TEST(FutureStressTest, Callback) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+
 #ifdef ARROW_VALGRIND
   const int NITERS = 2;
 #else
@@ -471,6 +475,10 @@ TEST(FutureStressTest, Callback) {
 }
 
 TEST(FutureStressTest, TryAddCallback) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+
   for (unsigned int n = 0; n < 1; n++) {
     auto fut = Future<>::Make();
     std::atomic<unsigned int> callbacks_added(0);
@@ -527,6 +535,10 @@ TEST(FutureStressTest, TryAddCallback) {
 }
 
 TEST(FutureStressTest, DeleteAfterWait) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+
   constexpr int kNumTasks = 100;
   for (int i = 0; i < kNumTasks; i++) {
     {
@@ -1543,6 +1555,10 @@ TEST(FnOnceTest, MoveOnlyDataType) {
 }
 
 TEST(FutureTest, MatcherExamples) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+
   EXPECT_THAT(Future<int>::MakeFinished(Status::Invalid("arbitrary error")),
               Finishes(Raises(StatusCode::Invalid)));
 
diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc
index 5928ebcb88959..d48f9eb97d562 100644
--- a/cpp/src/arrow/util/io_util.cc
+++ b/cpp/src/arrow/util/io_util.cc
@@ -95,6 +95,7 @@
 #include "arrow/result.h"
 #include "arrow/util/atfork_internal.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/config.h"
 #include "arrow/util/io_util.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/mutex.h"
@@ -1485,6 +1486,7 @@ Status MemoryMapRemap(void* addr, size_t old_size, size_t new_size, int fildes,
 }
 
 Status MemoryAdviseWillNeed(const std::vector<MemoryRegion>& regions) {
+#ifndef __EMSCRIPTEN__
   const auto page_size = static_cast<size_t>(GetPageSize());
   DCHECK_GT(page_size, 0);
   const size_t page_mask = ~(page_size - 1);
@@ -1543,6 +1545,9 @@ Status MemoryAdviseWillNeed(const std::vector<MemoryRegion>& regions) {
 #else
   return Status::OK();
 #endif
+#else
+  return Status::OK();
+#endif
 }
 
 //
@@ -2067,7 +2072,9 @@ Status SendSignal(int signum) {
 }
 
 Status SendSignalToThread(int signum, uint64_t thread_id) {
-#ifdef _WIN32
+#ifndef ARROW_ENABLE_THREADING
+  return Status::NotImplemented("Can't send signal with no threads");
+#elif defined(_WIN32)
   return Status::NotImplemented("Cannot send signal to specific thread on Windows");
 #else
   // Have to use a C-style cast because pthread_t can be a pointer *or* integer type
diff --git a/cpp/src/arrow/util/io_util_test.cc b/cpp/src/arrow/util/io_util_test.cc
index d0569c799561f..73213bf9ce48a 100644
--- a/cpp/src/arrow/util/io_util_test.cc
+++ b/cpp/src/arrow/util/io_util_test.cc
@@ -40,6 +40,7 @@
 #include "arrow/buffer.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/util/bit_util.h"
+#include "arrow/util/config.h"
 #include "arrow/util/cpu_info.h"
 #include "arrow/util/io_util.h"
 #include "arrow/util/logging.h"
@@ -146,8 +147,8 @@ TEST(MemoryAdviseWillNeed, Basics) {
   ASSERT_OK(MemoryAdviseWillNeed({{addr1, 0}, {addr2 + 1, 0}}));
 
   // Should probably fail
-  // (but on Windows, MemoryAdviseWillNeed can be a no-op)
-#ifndef _WIN32
+  // (but on Windows or Emscripten, MemoryAdviseWillNeed can be a no-op)
+#if !defined(_WIN32) && !defined(__EMSCRIPTEN__)
   ASSERT_RAISES(IOError,
                 MemoryAdviseWillNeed({{nullptr, std::numeric_limits<size_t>::max()}}));
 #endif
@@ -368,6 +369,10 @@ TestSelfPipe* TestSelfPipe::instance_;
 TEST_F(TestSelfPipe, MakeAndShutdown) {}
 
 TEST_F(TestSelfPipe, WaitAndSend) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+
   StartReading();
   SleepABit();
   AssertPayloadsEventually({});
@@ -380,6 +385,10 @@ TEST_F(TestSelfPipe, WaitAndSend) {
 }
 
 TEST_F(TestSelfPipe, SendAndWait) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+
   self_pipe_->Send(123456789123456789ULL);
   StartReading();
   SleepABit();
@@ -390,6 +399,10 @@ TEST_F(TestSelfPipe, SendAndWait) {
 }
 
 TEST_F(TestSelfPipe, WaitAndShutdown) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+
   StartReading();
   SleepABit();
   ASSERT_OK(self_pipe_->Shutdown());
@@ -401,6 +414,9 @@ TEST_F(TestSelfPipe, WaitAndShutdown) {
 }
 
 TEST_F(TestSelfPipe, ShutdownAndWait) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
   self_pipe_->Send(123456789123456789ULL);
   ASSERT_OK(self_pipe_->Shutdown());
   StartReading();
@@ -413,6 +429,10 @@ TEST_F(TestSelfPipe, ShutdownAndWait) {
 }
 
 TEST_F(TestSelfPipe, WaitAndSendFromSignal) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+
   signal_received_.store(0);
   SignalHandlerGuard guard(SIGINT, &HandleSignal);
 
@@ -431,6 +451,10 @@ TEST_F(TestSelfPipe, WaitAndSendFromSignal) {
 }
 
 TEST_F(TestSelfPipe, SendFromSignalAndWait) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+
   signal_received_.store(0);
   SignalHandlerGuard guard(SIGINT, &HandleSignal);
 
@@ -450,6 +474,10 @@ TEST_F(TestSelfPipe, SendFromSignalAndWait) {
 #if !(defined(_WIN32) || defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) || \
       defined(THREAD_SANITIZER))
 TEST_F(TestSelfPipe, ForkSafety) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+
   self_pipe_->Send(123456789123456789ULL);
 
   auto child_pid = fork();
@@ -1025,6 +1053,9 @@ TEST_F(TestSendSignal, Generic) {
 }
 
 TEST_F(TestSendSignal, ToThread) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "SendSignalToThread requires threading";
+#endif
 #ifdef _WIN32
   uint64_t dummy_thread_id = 42;
   ASSERT_RAISES(NotImplemented, SendSignalToThread(SIGINT, dummy_thread_id));
diff --git a/cpp/src/arrow/util/macros.h b/cpp/src/arrow/util/macros.h
index 1d23e829d74a9..d80828869b33c 100644
--- a/cpp/src/arrow/util/macros.h
+++ b/cpp/src/arrow/util/macros.h
@@ -36,28 +36,74 @@
   TypeName& operator=(TypeName&&) = default
 #endif
 
-#define ARROW_UNUSED(x) (void)(x)
-#define ARROW_ARG_UNUSED(x)
+// With ARROW_PREDICT_FALSE, GCC and clang can be told that a certain branch is
+// not likely to be taken (for instance, a CHECK failure), and use that information in
+// static analysis. Giving the compiler this information can affect the generated code
+// layout in the absence of better information (i.e. -fprofile-arcs). [1] explains how
+// this feature can be used to improve code generation. It was written as a positive
+// comment to a negative article about the use of these annotations.
 //
-// GCC can be told that a certain branch is not likely to be taken (for
-// instance, a CHECK failure), and use that information in static analysis.
-// Giving it this information can help it optimize for the common case in
-// the absence of better information (ie. -fprofile-arcs).
+// ARROW_COMPILER_ASSUME allows the compiler to assume that a given expression is
+// true, without evaluating it, and to optimise based on this assumption [2]. If this
+// condition is violated at runtime, the behavior is undefined. This can be useful to
+// generate both faster and smaller code in compute kernels.
 //
-#if defined(__GNUC__)
-#define ARROW_PREDICT_FALSE(x) (__builtin_expect(!!(x), 0))
-#define ARROW_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
+// IMPORTANT: Different optimisers are likely to react differently to this annotation!
+// It should be used with care when we can prove by some means that the assumption
+// is (1) guaranteed to always hold and (2) is useful for optimization [3]. If the
+// assumption is pessimistic, it might even block the compiler from decisions that
+// could lead to better code [4]. If you have a good intuition for what the compiler
+// can do with assumptions [5], you can use this macro to guide it and end up with
+// results you would only get with more complex code transformations.
+// `clang -S -emit-llvm` can be used to check how the generated code changes with
+// your specific use of this macro.
+//
+// [1] https://lobste.rs/s/uwgtkt/don_t_use_likely_unlikely_attributes#c_xi3wmc
+// [2] "Portable assumptions"
+//     https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2021/p1774r4.pdf
+// [3] "Assertions Are Pessimistic, Assumptions Are Optimistic"
+//     https://blog.regehr.org/archives/1096
+// [4] https://discourse.llvm.org/t/llvm-assume-blocks-optimization/71609
+// [5] J. Doerfert et al. 2019. "Performance Exploration Through Optimistic Static
+//     Program Annotations". https://github.com/jdoerfert/PETOSPA/blob/master/ISC19.pdf
+#define ARROW_UNUSED(x) (void)(x)
+#define ARROW_ARG_UNUSED(x)
+#if defined(__GNUC__)  // GCC and compatible compilers (clang, Intel ICC)
 #define ARROW_NORETURN __attribute__((noreturn))
 #define ARROW_NOINLINE __attribute__((noinline))
 #define ARROW_FORCE_INLINE __attribute__((always_inline))
+#define ARROW_PREDICT_FALSE(x) (__builtin_expect(!!(x), 0))
+#define ARROW_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
 #define ARROW_PREFETCH(addr) __builtin_prefetch(addr)
-#elif defined(_MSC_VER)
+#define ARROW_RESTRICT __restrict
+#if defined(__clang__)  // clang-specific
+#define ARROW_COMPILER_ASSUME(expr) __builtin_assume(expr)
+#else  // GCC-specific
+#if __GNUC__ >= 13
+#define ARROW_COMPILER_ASSUME(expr) __attribute__((assume(expr)))
+#else
+// GCC does not have a built-in assume intrinsic before GCC 13, so we use an
+// if statement and __builtin_unreachable() to achieve the same effect [2].
+// Unlike clang's __builtin_assume and C++23's [[assume(expr)]], using this
+// on GCC won't warn about side-effects in the expression, so make sure expr
+// is side-effect free when working with GCC versions before 13 (Jan-2024),
+// otherwise clang/MSVC builds will fail in CI.
+#define ARROW_COMPILER_ASSUME(expr) \
+  if (expr) {                       \
+  } else {                          \
+    __builtin_unreachable();        \
+  }
+#endif  // __GNUC__ >= 13
+#endif
+#elif defined(_MSC_VER)  // MSVC
 #define ARROW_NORETURN __declspec(noreturn)
 #define ARROW_NOINLINE __declspec(noinline)
 #define ARROW_FORCE_INLINE __declspec(forceinline)
 #define ARROW_PREDICT_FALSE(x) (x)
 #define ARROW_PREDICT_TRUE(x) (x)
 #define ARROW_PREFETCH(addr)
+#define ARROW_RESTRICT __restrict
+#define ARROW_COMPILER_ASSUME(expr) __assume(expr)
 #else
 #define ARROW_NORETURN
 #define ARROW_NOINLINE
@@ -65,12 +111,8 @@
 #define ARROW_PREDICT_FALSE(x) (x)
 #define ARROW_PREDICT_TRUE(x) (x)
 #define ARROW_PREFETCH(addr)
-#endif
-
-#if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
-#define ARROW_RESTRICT __restrict
-#else
 #define ARROW_RESTRICT
+#define ARROW_COMPILER_ASSUME(expr)
 #endif
 
 // ----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/mutex.cc b/cpp/src/arrow/util/mutex.cc
index 9f82ad45b0740..bbf2a9a93e692 100644
--- a/cpp/src/arrow/util/mutex.cc
+++ b/cpp/src/arrow/util/mutex.cc
@@ -24,6 +24,7 @@
 #include <atomic>
 #endif
 
+#include "arrow/util/config.h"
 #include "arrow/util/logging.h"
 
 namespace arrow {
@@ -35,9 +36,12 @@ struct Mutex::Impl {
 
 Mutex::Guard::Guard(Mutex* locked)
     : locked_(locked, [](Mutex* locked) {
+#ifdef ARROW_ENABLE_THREADING
         DCHECK(!locked->impl_->mutex_.try_lock());
+#endif
         locked->impl_->mutex_.unlock();
-      }) {}
+      }) {
+}
 
 Mutex::Guard Mutex::TryLock() {
   DCHECK_NE(impl_, nullptr);
diff --git a/cpp/src/arrow/util/rle_encoding_test.cc b/cpp/src/arrow/util/rle_encoding_test.cc
index 01d1ffd767fc9..26984e5f7735d 100644
--- a/cpp/src/arrow/util/rle_encoding_test.cc
+++ b/cpp/src/arrow/util/rle_encoding_test.cc
@@ -214,7 +214,14 @@ TEST(BitUtil, RoundTripIntValues) {
 void ValidateRle(const std::vector<int>& values, int bit_width,
                  uint8_t* expected_encoding, int expected_len) {
   const int len = 64 * 1024;
+#ifdef __EMSCRIPTEN__
+  // don't make this on the stack as it is
+  // too big for emscripten
+  std::vector<uint8_t> buffer_vec(static_cast<size_t>(len));
+  uint8_t* buffer = buffer_vec.data();
+#else
   uint8_t buffer[len];
+#endif
   EXPECT_LE(expected_len, len);
 
   RleEncoder encoder(buffer, len, bit_width);
@@ -227,7 +234,7 @@ void ValidateRle(const std::vector<int>& values, int bit_width,
   if (expected_len != -1) {
     EXPECT_EQ(encoded_len, expected_len);
   }
-  if (expected_encoding != NULL) {
+  if (expected_encoding != NULL && encoded_len == expected_len) {
     EXPECT_EQ(memcmp(buffer, expected_encoding, encoded_len), 0);
   }
 
@@ -256,7 +263,14 @@ void ValidateRle(const std::vector<int>& values, int bit_width,
 // the returned values are not all the same
 bool CheckRoundTrip(const std::vector<int>& values, int bit_width) {
   const int len = 64 * 1024;
+#ifdef __EMSCRIPTEN__
+  // don't make this on the stack as it is
+  // too big for emscripten
+  std::vector<uint8_t> buffer_vec(static_cast<size_t>(len));
+  uint8_t* buffer = buffer_vec.data();
+#else
   uint8_t buffer[len];
+#endif
   RleEncoder encoder(buffer, len, bit_width);
   for (size_t i = 0; i < values.size(); ++i) {
     bool result = encoder.Put(values[i]);
diff --git a/cpp/src/arrow/util/value_parsing.cc b/cpp/src/arrow/util/value_parsing.cc
index f6a24ac1467f8..e84aac995e35f 100644
--- a/cpp/src/arrow/util/value_parsing.cc
+++ b/cpp/src/arrow/util/value_parsing.cc
@@ -22,8 +22,11 @@
 #include <string>
 #include <utility>
 
+#include "arrow/util/float16.h"
 #include "arrow/vendored/fast_float/fast_float.h"
 
+using arrow::util::Float16;
+
 namespace arrow {
 namespace internal {
 
@@ -43,6 +46,17 @@ bool StringToFloat(const char* s, size_t length, char decimal_point, double* out
   return res.ec == std::errc() && res.ptr == s + length;
 }
 
+// Half float
+bool StringToFloat(const char* s, size_t length, char decimal_point, uint16_t* out) {
+  ::arrow_vendored::fast_float::parse_options options{
+      ::arrow_vendored::fast_float::chars_format::general, decimal_point};
+  float temp_out;
+  const auto res =
+      ::arrow_vendored::fast_float::from_chars_advanced(s, s + length, temp_out, options);
+  *out = Float16::FromFloat(temp_out).bits();
+  return res.ec == std::errc() && res.ptr == s + length;
+}
+
 // ----------------------------------------------------------------------
 // strptime-like parsing
 
diff --git a/cpp/src/arrow/util/value_parsing.h b/cpp/src/arrow/util/value_parsing.h
index b3c711840f3e2..609906052cd20 100644
--- a/cpp/src/arrow/util/value_parsing.h
+++ b/cpp/src/arrow/util/value_parsing.h
@@ -135,6 +135,9 @@ bool StringToFloat(const char* s, size_t length, char decimal_point, float* out)
 ARROW_EXPORT
 bool StringToFloat(const char* s, size_t length, char decimal_point, double* out);
 
+ARROW_EXPORT
+bool StringToFloat(const char* s, size_t length, char decimal_point, uint16_t* out);
+
 template <>
 struct StringConverter<FloatType> {
   using value_type = float;
@@ -163,6 +166,20 @@ struct StringConverter<DoubleType> {
   const char decimal_point;
 };
 
+template <>
+struct StringConverter<HalfFloatType> {
+  using value_type = uint16_t;
+
+  explicit StringConverter(char decimal_point = '.') : decimal_point(decimal_point) {}
+
+  bool Convert(const HalfFloatType&, const char* s, size_t length, value_type* out) {
+    return ARROW_PREDICT_TRUE(StringToFloat(s, length, decimal_point, out));
+  }
+
+ private:
+  const char decimal_point;
+};
+
 // NOTE: HalfFloatType would require a half<->float conversion library
 
 inline uint8_t ParseDecimalDigit(char c) { return static_cast<uint8_t>(c - '0'); }
diff --git a/cpp/src/arrow/util/value_parsing_test.cc b/cpp/src/arrow/util/value_parsing_test.cc
index 30c5e6aae74ba..92d727019aaf5 100644
--- a/cpp/src/arrow/util/value_parsing_test.cc
+++ b/cpp/src/arrow/util/value_parsing_test.cc
@@ -794,6 +794,11 @@ TEST(TimestampParser, StrptimeZoneOffset) {
   if (!kStrptimeSupportsZone) {
     GTEST_SKIP() << "strptime does not support %z on this platform";
   }
+#ifdef __EMSCRIPTEN__
+  GTEST_SKIP() << "Test temporarily disabled due to emscripten bug "
+                  "https://github.com/emscripten-core/emscripten/issues/20467 ";
+#endif
+
   std::string format = "%Y-%d-%m %H:%M:%S%z";
   auto parser = TimestampParser::MakeStrptime(format);
 
diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc
index 3fb224154c4ec..af489c70a5233 100644
--- a/cpp/src/parquet/column_reader.cc
+++ b/cpp/src/parquet/column_reader.cc
@@ -1316,7 +1316,8 @@ class TypedRecordReader : public TypedColumnReaderImpl<DType>,
     levels_position_ = 0;
     levels_capacity_ = 0;
     read_dense_for_nullable_ = read_dense_for_nullable;
-    uses_values_ = !(descr->physical_type() == Type::BYTE_ARRAY);
+    // BYTE_ARRAY values are not stored in the `values_` buffer.
+    uses_values_ = descr->physical_type() != Type::BYTE_ARRAY;
 
     if (uses_values_) {
       values_ = AllocateBuffer(pool);
diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
index 3eed88f08b22a..3da5c64ace5dd 100644
--- a/cpp/src/parquet/encoding.cc
+++ b/cpp/src/parquet/encoding.cc
@@ -55,6 +55,7 @@ namespace bit_util = arrow::bit_util;
 using arrow::Status;
 using arrow::VisitNullBitmapInline;
 using arrow::internal::AddWithOverflow;
+using arrow::internal::BitBlockCounter;
 using arrow::internal::checked_cast;
 using arrow::internal::MultiplyWithOverflow;
 using arrow::internal::SafeSignedSubtract;
@@ -1173,13 +1174,15 @@ class PlainBooleanDecoder : public DecoderImpl, virtual public BooleanDecoder {
 
  private:
   std::unique_ptr<::arrow::bit_util::BitReader> bit_reader_;
+  int total_num_values_{0};
 };
 
 PlainBooleanDecoder::PlainBooleanDecoder(const ColumnDescriptor* descr)
     : DecoderImpl(descr, Encoding::PLAIN) {}
 
 void PlainBooleanDecoder::SetData(int num_values, const uint8_t* data, int len) {
-  num_values_ = num_values;
+  DecoderImpl::SetData(num_values, data, len);
+  total_num_values_ = num_values;
   bit_reader_ = std::make_unique<bit_util::BitReader>(data, len);
 }
 
@@ -1188,19 +1191,53 @@ int PlainBooleanDecoder::DecodeArrow(
     typename EncodingTraits<BooleanType>::Accumulator* builder) {
   int values_decoded = num_values - null_count;
   if (ARROW_PREDICT_FALSE(num_values_ < values_decoded)) {
-    ParquetException::EofException();
+    // A too large `num_values` was requested.
+    ParquetException::EofException(
+        "A too large `num_values` was requested in PlainBooleanDecoder: remain " +
+        std::to_string(num_values_) + ", requested: " + std::to_string(values_decoded));
+  }
+  if (ARROW_PREDICT_FALSE(!bit_reader_->Advance(values_decoded))) {
+    ParquetException::EofException("PlainDecoder doesn't have enough values in page");
   }
 
-  PARQUET_THROW_NOT_OK(builder->Reserve(num_values));
-
-  VisitNullBitmapInline(
-      valid_bits, valid_bits_offset, num_values, null_count,
-      [&]() {
-        bool value;
-        ARROW_IGNORE_EXPR(bit_reader_->GetValue(1, &value));
-        builder->UnsafeAppend(value);
-      },
-      [&]() { builder->UnsafeAppendNull(); });
+  if (null_count == 0) {
+    // FastPath: can copy the data directly
+    PARQUET_THROW_NOT_OK(builder->AppendValues(data_, values_decoded, NULLPTR,
+                                               total_num_values_ - num_values_));
+  } else {
+    // Handle nulls by BitBlockCounter
+    PARQUET_THROW_NOT_OK(builder->Reserve(num_values));
+    BitBlockCounter bit_counter(valid_bits, valid_bits_offset, num_values);
+    int64_t value_position = 0;
+    int64_t valid_bits_offset_position = valid_bits_offset;
+    int64_t previous_value_offset = total_num_values_ - num_values_;
+    while (value_position < num_values) {
+      auto block = bit_counter.NextWord();
+      if (block.AllSet()) {
+        // GH-40978: We don't have UnsafeAppendValues for booleans currently,
+        // so using `AppendValues` here.
+        PARQUET_THROW_NOT_OK(
+            builder->AppendValues(data_, block.length, NULLPTR, previous_value_offset));
+        previous_value_offset += block.length;
+      } else if (block.NoneSet()) {
+        // GH-40978: We don't have UnsafeAppendNulls for booleans currently,
+        // so using `AppendNulls` here.
+        PARQUET_THROW_NOT_OK(builder->AppendNulls(block.length));
+      } else {
+        for (int64_t i = 0; i < block.length; ++i) {
+          if (bit_util::GetBit(valid_bits, valid_bits_offset_position + i)) {
+            bool value = bit_util::GetBit(data_, previous_value_offset);
+            builder->UnsafeAppend(value);
+            previous_value_offset += 1;
+          } else {
+            builder->UnsafeAppendNull();
+          }
+        }
+      }
+      value_position += block.length;
+      valid_bits_offset_position += block.length;
+    }
+  }
 
   num_values_ -= values_decoded;
   return values_decoded;
@@ -1214,18 +1251,15 @@ inline int PlainBooleanDecoder::DecodeArrow(
 
 int PlainBooleanDecoder::Decode(uint8_t* buffer, int max_values) {
   max_values = std::min(max_values, num_values_);
-  bool val;
-  ::arrow::internal::BitmapWriter bit_writer(buffer, 0, max_values);
-  for (int i = 0; i < max_values; ++i) {
-    if (!bit_reader_->GetValue(1, &val)) {
-      ParquetException::EofException();
-    }
-    if (val) {
-      bit_writer.Set();
-    }
-    bit_writer.Next();
+  if (ARROW_PREDICT_FALSE(!bit_reader_->Advance(max_values))) {
+    ParquetException::EofException();
   }
-  bit_writer.Finish();
+  // Copy the data directly
+  // Parquet's boolean encoding is bit-packed using LSB. So
+  // we can directly copy the data to the buffer.
+  ::arrow::internal::CopyBitmap(this->data_, /*offset=*/total_num_values_ - num_values_,
+                                /*length=*/max_values, /*dest=*/buffer,
+                                /*dest_offset=*/0);
   num_values_ -= max_values;
   return max_values;
 }
@@ -1692,7 +1726,7 @@ class DictDecoderImpl : public DecoderImpl, virtual public DictDecoder<Type> {
   }
 
  protected:
-  Status IndexInBounds(int32_t index) {
+  Status IndexInBounds(int32_t index) const {
     if (ARROW_PREDICT_TRUE(0 <= index && index < dictionary_length_)) {
       return Status::OK();
     }
@@ -3110,27 +3144,60 @@ class RleBooleanDecoder : public DecoderImpl, virtual public BooleanDecoder {
   int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
                   int64_t valid_bits_offset,
                   typename EncodingTraits<BooleanType>::Accumulator* out) override {
-    if (null_count != 0) {
-      // TODO(ARROW-34660): implement DecodeArrow with null slots.
-      ParquetException::NYI("RleBoolean DecodeArrow with null slots");
+    if (null_count == num_values) {
+      PARQUET_THROW_NOT_OK(out->AppendNulls(null_count));
+      return 0;
     }
     constexpr int kBatchSize = 1024;
     std::array<bool, kBatchSize> values;
-    int sum_decode_count = 0;
-    do {
-      int current_batch = std::min(kBatchSize, num_values);
-      int decoded_count = decoder_->GetBatch(values.data(), current_batch);
-      if (decoded_count == 0) {
-        break;
+    const int num_non_null_values = num_values - null_count;
+    // Remaining non-null boolean values to read from decoder.
+    // We decode from `decoder_` with maximum 1024 size batches.
+    int num_remain_non_null_values = num_non_null_values;
+    int current_index_in_batch = 0;
+    int current_batch_size = 0;
+    auto next_boolean_batch = [&]() {
+      DCHECK_GT(num_remain_non_null_values, 0);
+      DCHECK_EQ(current_index_in_batch, current_batch_size);
+      current_batch_size = std::min(num_remain_non_null_values, kBatchSize);
+      int decoded_count = decoder_->GetBatch(values.data(), current_batch_size);
+      if (ARROW_PREDICT_FALSE(decoded_count != current_batch_size)) {
+        // required values is more than values in decoder.
+        ParquetException::EofException();
       }
-      sum_decode_count += decoded_count;
-      PARQUET_THROW_NOT_OK(out->Reserve(sum_decode_count));
-      for (int i = 0; i < decoded_count; ++i) {
-        PARQUET_THROW_NOT_OK(out->Append(values[i]));
+      num_remain_non_null_values -= current_batch_size;
+      current_index_in_batch = 0;
+    };
+
+    // Reserve all values including nulls first
+    PARQUET_THROW_NOT_OK(out->Reserve(num_values));
+    if (null_count == 0) {
+      // Fast-path for not having nulls.
+      do {
+        next_boolean_batch();
+        PARQUET_THROW_NOT_OK(
+            out->AppendValues(values.begin(), values.begin() + current_batch_size));
+        num_values -= current_batch_size;
+        // set current_index_in_batch to current_batch_size means
+        // the whole batch is totally consumed.
+        current_index_in_batch = current_batch_size;
+      } while (num_values > 0);
+      return num_non_null_values;
+    }
+    auto next_value = [&]() -> bool {
+      if (current_index_in_batch == current_batch_size) {
+        next_boolean_batch();
+        DCHECK_GT(current_batch_size, 0);
       }
-      num_values -= decoded_count;
-    } while (num_values > 0);
-    return sum_decode_count;
+      DCHECK_LT(current_index_in_batch, current_batch_size);
+      bool value = values[current_index_in_batch];
+      ++current_index_in_batch;
+      return value;
+    };
+    VisitNullBitmapInline(
+        valid_bits, valid_bits_offset, num_values, null_count,
+        [&]() { out->UnsafeAppend(next_value()); }, [&]() { out->UnsafeAppendNull(); });
+    return num_non_null_values;
   }
 
   int DecodeArrow(
diff --git a/cpp/src/parquet/encoding.h b/cpp/src/parquet/encoding.h
index de47bb7deb839..602009189595e 100644
--- a/cpp/src/parquet/encoding.h
+++ b/cpp/src/parquet/encoding.h
@@ -400,7 +400,9 @@ class BooleanDecoder : virtual public TypedDecoder<BooleanType> {
   /// \brief Decode and bit-pack values into a buffer
   ///
   /// \param[in] buffer destination for decoded values
-  /// This buffer will contain bit-packed values.
+  /// This buffer will contain bit-packed values. If
+  /// max_values is not a multiple of 8, the trailing bits
+  /// of the last byte will be undefined.
   /// \param[in] max_values max values to decode.
   /// \return The number of values decoded. Should be identical to max_values except
   /// at the end of the current data page.
diff --git a/cpp/src/parquet/encoding_benchmark.cc b/cpp/src/parquet/encoding_benchmark.cc
index 61959b659f633..a858c53e931d8 100644
--- a/cpp/src/parquet/encoding_benchmark.cc
+++ b/cpp/src/parquet/encoding_benchmark.cc
@@ -66,6 +66,7 @@ static void BM_PlainEncodingBoolean(benchmark::State& state) {
     typed_encoder->FlushValues();
   }
   state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(bool));
+  state.SetItemsProcessed(state.iterations() * state.range(0));
 }
 
 BENCHMARK(BM_PlainEncodingBoolean)->Range(MIN_RANGE, MAX_RANGE);
@@ -86,11 +87,34 @@ static void BM_PlainDecodingBoolean(benchmark::State& state) {
   }
 
   state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(bool));
+  state.SetItemsProcessed(state.iterations() * state.range(0));
   delete[] output;
 }
 
 BENCHMARK(BM_PlainDecodingBoolean)->Range(MIN_RANGE, MAX_RANGE);
 
+static void BM_PlainDecodingBooleanToBitmap(benchmark::State& state) {
+  std::vector<bool> values(state.range(0), true);
+  int64_t bitmap_bytes = ::arrow::bit_util::BytesForBits(state.range(0));
+  std::vector<uint8_t> output(bitmap_bytes, 0);
+  auto encoder = MakeEncoder(Type::BOOLEAN, Encoding::PLAIN);
+  auto typed_encoder = dynamic_cast<BooleanEncoder*>(encoder.get());
+  typed_encoder->Put(values, static_cast<int>(values.size()));
+  std::shared_ptr<Buffer> buf = encoder->FlushValues();
+
+  for (auto _ : state) {
+    auto decoder = MakeTypedDecoder<BooleanType>(Encoding::PLAIN);
+    decoder->SetData(static_cast<int>(values.size()), buf->data(),
+                     static_cast<int>(buf->size()));
+    decoder->Decode(output.data(), static_cast<int>(values.size()));
+  }
+  // Still set `BytesProcessed` to byte level.
+  state.SetBytesProcessed(state.iterations() * bitmap_bytes);
+  state.SetItemsProcessed(state.iterations() * state.range(0));
+}
+
+BENCHMARK(BM_PlainDecodingBooleanToBitmap)->Range(MIN_RANGE, MAX_RANGE);
+
 static void BM_PlainEncodingInt64(benchmark::State& state) {
   std::vector<int64_t> values(state.range(0), 64);
   auto encoder = MakeTypedEncoder<Int64Type>(Encoding::PLAIN);
@@ -1097,8 +1121,11 @@ BENCHMARK(BM_DictDecodingByteArray)->Apply(ByteArrayCustomArguments);
 using ::arrow::BinaryBuilder;
 using ::arrow::BinaryDictionary32Builder;
 
-class BenchmarkDecodeArrow : public ::benchmark::Fixture {
+template <typename ParquetType>
+class BenchmarkDecodeArrowBase : public ::benchmark::Fixture {
  public:
+  virtual ~BenchmarkDecodeArrowBase() = default;
+
   void SetUp(const ::benchmark::State& state) override {
     num_values_ = static_cast<int>(state.range());
     InitDataInputs();
@@ -1111,37 +1138,18 @@ class BenchmarkDecodeArrow : public ::benchmark::Fixture {
     values_.clear();
   }
 
-  void InitDataInputs() {
-    // Generate a random string dictionary without any nulls so that this dataset can
-    // be used for benchmarking the DecodeArrowNonNull API
-    constexpr int repeat_factor = 8;
-    constexpr int64_t min_length = 2;
-    constexpr int64_t max_length = 10;
-    ::arrow::random::RandomArrayGenerator rag(0);
-    input_array_ = rag.StringWithRepeats(num_values_, num_values_ / repeat_factor,
-                                         min_length, max_length, /*null_probability=*/0);
-    valid_bits_ = input_array_->null_bitmap_data();
-    total_size_ = input_array_->data()->buffers[2]->size();
-
-    values_.reserve(num_values_);
-    const auto& binary_array = static_cast<const ::arrow::BinaryArray&>(*input_array_);
-    for (int64_t i = 0; i < binary_array.length(); i++) {
-      auto view = binary_array.GetView(i);
-      values_.emplace_back(static_cast<uint32_t>(view.length()),
-                           reinterpret_cast<const uint8_t*>(view.data()));
-    }
-  }
-
+  virtual void InitDataInputs() = 0;
   virtual void DoEncodeArrow() = 0;
   virtual void DoEncodeLowLevel() = 0;
-
-  virtual std::unique_ptr<ByteArrayDecoder> InitializeDecoder() = 0;
+  virtual std::unique_ptr<TypedDecoder<ParquetType>> InitializeDecoder() = 0;
+  virtual typename EncodingTraits<ParquetType>::Accumulator CreateAccumulator() = 0;
 
   void EncodeArrowBenchmark(benchmark::State& state) {
     for (auto _ : state) {
       DoEncodeArrow();
     }
     state.SetBytesProcessed(state.iterations() * total_size_);
+    state.SetItemsProcessed(state.iterations() * num_values_);
   }
 
   void EncodeLowLevelBenchmark(benchmark::State& state) {
@@ -1149,26 +1157,27 @@ class BenchmarkDecodeArrow : public ::benchmark::Fixture {
       DoEncodeLowLevel();
     }
     state.SetBytesProcessed(state.iterations() * total_size_);
+    state.SetItemsProcessed(state.iterations() * num_values_);
   }
 
   void DecodeArrowDenseBenchmark(benchmark::State& state) {
     for (auto _ : state) {
       auto decoder = InitializeDecoder();
-      typename EncodingTraits<ByteArrayType>::Accumulator acc;
-      acc.builder.reset(new BinaryBuilder);
+      auto acc = CreateAccumulator();
       decoder->DecodeArrow(num_values_, 0, valid_bits_, 0, &acc);
     }
     state.SetBytesProcessed(state.iterations() * total_size_);
+    state.SetItemsProcessed(state.iterations() * num_values_);
   }
 
   void DecodeArrowNonNullDenseBenchmark(benchmark::State& state) {
     for (auto _ : state) {
       auto decoder = InitializeDecoder();
-      typename EncodingTraits<ByteArrayType>::Accumulator acc;
-      acc.builder.reset(new BinaryBuilder);
+      auto acc = CreateAccumulator();
       decoder->DecodeArrowNonNull(num_values_, &acc);
     }
     state.SetBytesProcessed(state.iterations() * total_size_);
+    state.SetItemsProcessed(state.iterations() * num_values_);
   }
 
   void DecodeArrowDictBenchmark(benchmark::State& state) {
@@ -1179,6 +1188,7 @@ class BenchmarkDecodeArrow : public ::benchmark::Fixture {
     }
 
     state.SetBytesProcessed(state.iterations() * total_size_);
+    state.SetItemsProcessed(state.iterations() * num_values_);
   }
 
   void DecodeArrowNonNullDictBenchmark(benchmark::State& state) {
@@ -1189,20 +1199,56 @@ class BenchmarkDecodeArrow : public ::benchmark::Fixture {
     }
 
     state.SetBytesProcessed(state.iterations() * total_size_);
+    state.SetItemsProcessed(state.iterations() * num_values_);
   }
 
  protected:
-  int num_values_;
+  int num_values_{0};
   std::shared_ptr<::arrow::Array> input_array_;
-  std::vector<ByteArray> values_;
-  uint64_t total_size_;
-  const uint8_t* valid_bits_;
+  uint64_t total_size_{0};
+  const uint8_t* valid_bits_{nullptr};
   std::shared_ptr<Buffer> buffer_;
+  std::vector<typename ParquetType::c_type> values_;
+};
+
+class BenchmarkDecodeArrowByteArray : public BenchmarkDecodeArrowBase<ByteArrayType> {
+ public:
+  using ByteArrayAccumulator = typename EncodingTraits<ByteArrayType>::Accumulator;
+
+  ByteArrayAccumulator CreateAccumulator() final {
+    ByteArrayAccumulator acc;
+    acc.builder = std::make_unique<BinaryBuilder>(default_memory_pool());
+    return acc;
+  }
+
+  void InitDataInputs() final {
+    // Generate a random string dictionary without any nulls so that this dataset can
+    // be used for benchmarking the DecodeArrowNonNull API
+    constexpr int repeat_factor = 8;
+    constexpr int64_t min_length = 2;
+    constexpr int64_t max_length = 10;
+    ::arrow::random::RandomArrayGenerator rag(0);
+    input_array_ = rag.StringWithRepeats(num_values_, num_values_ / repeat_factor,
+                                         min_length, max_length, /*null_probability=*/0);
+    valid_bits_ = input_array_->null_bitmap_data();
+    total_size_ = input_array_->data()->buffers[2]->size();
+
+    values_.reserve(num_values_);
+    const auto& binary_array = static_cast<const ::arrow::BinaryArray&>(*input_array_);
+    for (int64_t i = 0; i < binary_array.length(); i++) {
+      auto view = binary_array.GetView(i);
+      values_.emplace_back(static_cast<uint32_t>(view.length()),
+                           reinterpret_cast<const uint8_t*>(view.data()));
+    }
+  }
+
+ protected:
+  std::vector<ByteArray> values_;
 };
 
 // ----------------------------------------------------------------------
 // Benchmark Decoding from Plain Encoding
-class BM_ArrowBinaryPlain : public BenchmarkDecodeArrow {
+class BM_ArrowBinaryPlain : public BenchmarkDecodeArrowByteArray {
  public:
   void DoEncodeArrow() override {
     auto encoder = MakeTypedEncoder<ByteArrayType>(Encoding::PLAIN);
@@ -1251,7 +1297,7 @@ BENCHMARK_REGISTER_F(BM_ArrowBinaryPlain, DecodeArrowNonNull_Dict)
 
 // ----------------------------------------------------------------------
 // Benchmark Decoding from Dictionary Encoding
-class BM_ArrowBinaryDict : public BenchmarkDecodeArrow {
+class BM_ArrowBinaryDict : public BenchmarkDecodeArrowByteArray {
  public:
   template <typename PutValuesFunc>
   void DoEncode(PutValuesFunc&& put_values) {
@@ -1319,7 +1365,7 @@ class BM_ArrowBinaryDict : public BenchmarkDecodeArrow {
   }
 
   void TearDown(const ::benchmark::State& state) override {
-    BenchmarkDecodeArrow::TearDown(state);
+    BenchmarkDecodeArrowByteArray::TearDown(state);
     dict_buffer_.reset();
     descr_.reset();
   }
@@ -1327,7 +1373,7 @@ class BM_ArrowBinaryDict : public BenchmarkDecodeArrow {
  protected:
   std::unique_ptr<ColumnDescriptor> descr_;
   std::shared_ptr<Buffer> dict_buffer_;
-  int num_dict_entries_;
+  int num_dict_entries_{0};
 };
 
 BENCHMARK_DEFINE_F(BM_ArrowBinaryDict, EncodeArrow)
@@ -1373,4 +1419,121 @@ BENCHMARK_DEFINE_F(BM_ArrowBinaryDict, DecodeArrowNonNull_Dict)
 BENCHMARK_REGISTER_F(BM_ArrowBinaryDict, DecodeArrowNonNull_Dict)
     ->Range(MIN_RANGE, MAX_RANGE);
 
+class BenchmarkDecodeArrowBoolean : public BenchmarkDecodeArrowBase<BooleanType> {
+ public:
+  void InitDataInputs() final {
+    // Generate a random boolean array with `null_probability_`.
+    ::arrow::random::RandomArrayGenerator rag(0);
+    input_array_ = rag.Boolean(num_values_, /*true_probability=*/0.5, null_probability_);
+    valid_bits_ = input_array_->null_bitmap_data();
+
+    // Arrow uses a bitmap representation for boolean arrays,
+    // so, we uses this as "total_size" for the benchmark.
+    total_size_ = ::arrow::bit_util::BytesForBits(num_values_);
+
+    values_.reserve(num_values_);
+    const auto& boolean_array = static_cast<const ::arrow::BooleanArray&>(*input_array_);
+    for (int64_t i = 0; i < boolean_array.length(); i++) {
+      values_.push_back(boolean_array.Value(i));
+    }
+  }
+
+  typename EncodingTraits<BooleanType>::Accumulator CreateAccumulator() final {
+    return typename EncodingTraits<BooleanType>::Accumulator();
+  }
+
+  void DoEncodeLowLevel() final { ParquetException::NYI(); }
+
+  void DecodeArrowWithNullDenseBenchmark(benchmark::State& state);
+
+ protected:
+  void DoEncodeArrowImpl(Encoding::type encoding) {
+    auto encoder = MakeTypedEncoder<BooleanType>(encoding);
+    encoder->Put(*input_array_);
+    buffer_ = encoder->FlushValues();
+  }
+
+  std::unique_ptr<TypedDecoder<BooleanType>> InitializeDecoderImpl(
+      Encoding::type encoding) const {
+    auto decoder = MakeTypedDecoder<BooleanType>(encoding);
+    decoder->SetData(num_values_, buffer_->data(), static_cast<int>(buffer_->size()));
+    return decoder;
+  }
+
+ protected:
+  double null_probability_ = 0.0;
+};
+
+void BenchmarkDecodeArrowBoolean::DecodeArrowWithNullDenseBenchmark(
+    benchmark::State& state) {
+  // Change null_probability
+  null_probability_ = static_cast<double>(state.range(1)) / 10000;
+  InitDataInputs();
+  this->DoEncodeArrow();
+  int num_values_with_nulls = this->num_values_;
+
+  for (auto _ : state) {
+    auto decoder = this->InitializeDecoder();
+    auto acc = this->CreateAccumulator();
+    decoder->DecodeArrow(
+        num_values_with_nulls,
+        /*null_count=*/static_cast<int>(this->input_array_->null_count()),
+        this->valid_bits_, 0, &acc);
+  }
+  state.SetBytesProcessed(state.iterations() * static_cast<int64_t>(total_size_));
+  state.SetItemsProcessed(state.iterations() * state.range(0));
+}
+
+class BM_DecodeArrowBooleanPlain : public BenchmarkDecodeArrowBoolean {
+ public:
+  void DoEncodeArrow() final { DoEncodeArrowImpl(Encoding::PLAIN); }
+
+  std::unique_ptr<TypedDecoder<BooleanType>> InitializeDecoder() override {
+    return InitializeDecoderImpl(Encoding::PLAIN);
+  }
+};
+
+class BM_DecodeArrowBooleanRle : public BenchmarkDecodeArrowBoolean {
+ public:
+  void DoEncodeArrow() final { DoEncodeArrowImpl(Encoding::RLE); }
+
+  std::unique_ptr<TypedDecoder<BooleanType>> InitializeDecoder() override {
+    return InitializeDecoderImpl(Encoding::RLE);
+  }
+};
+
+static void BooleanWithNullCustomArguments(benchmark::internal::Benchmark* b) {
+  b->ArgsProduct({
+                     benchmark::CreateRange(MIN_RANGE, MAX_RANGE, /*multi=*/4),
+                     {1, 100, 1000, 5000, 10000},
+                 })
+      ->ArgNames({"num_values", "null_in_ten_thousand"});
+}
+
+BENCHMARK_DEFINE_F(BM_DecodeArrowBooleanRle, DecodeArrow)(benchmark::State& state) {
+  DecodeArrowDenseBenchmark(state);
+}
+BENCHMARK_REGISTER_F(BM_DecodeArrowBooleanRle, DecodeArrow)->Range(MIN_RANGE, MAX_RANGE);
+BENCHMARK_DEFINE_F(BM_DecodeArrowBooleanRle, DecodeArrowNonNull)
+(benchmark::State& state) { DecodeArrowNonNullDenseBenchmark(state); }
+BENCHMARK_REGISTER_F(BM_DecodeArrowBooleanRle, DecodeArrowNonNull)
+    ->Range(MIN_RANGE, MAX_RANGE);
+BENCHMARK_DEFINE_F(BM_DecodeArrowBooleanRle, DecodeArrowWithNull)
+(benchmark::State& state) { DecodeArrowWithNullDenseBenchmark(state); }
+BENCHMARK_REGISTER_F(BM_DecodeArrowBooleanRle, DecodeArrowWithNull)
+    ->Apply(BooleanWithNullCustomArguments);
+
+BENCHMARK_DEFINE_F(BM_DecodeArrowBooleanPlain, DecodeArrow)
+(benchmark::State& state) { DecodeArrowDenseBenchmark(state); }
+BENCHMARK_REGISTER_F(BM_DecodeArrowBooleanPlain, DecodeArrow)
+    ->Range(MIN_RANGE, MAX_RANGE);
+BENCHMARK_DEFINE_F(BM_DecodeArrowBooleanPlain, DecodeArrowNonNull)
+(benchmark::State& state) { DecodeArrowNonNullDenseBenchmark(state); }
+BENCHMARK_REGISTER_F(BM_DecodeArrowBooleanPlain, DecodeArrowNonNull)
+    ->Range(MIN_RANGE, MAX_RANGE);
+BENCHMARK_DEFINE_F(BM_DecodeArrowBooleanPlain, DecodeArrowWithNull)
+(benchmark::State& state) { DecodeArrowWithNullDenseBenchmark(state); }
+BENCHMARK_REGISTER_F(BM_DecodeArrowBooleanPlain, DecodeArrowWithNull)
+    ->Apply(BooleanWithNullCustomArguments);
+
 }  // namespace parquet
diff --git a/cpp/src/parquet/encoding_test.cc b/cpp/src/parquet/encoding_test.cc
index ea0029f4c7d7f..b91fcb0839cba 100644
--- a/cpp/src/parquet/encoding_test.cc
+++ b/cpp/src/parquet/encoding_test.cc
@@ -602,7 +602,7 @@ TEST(PlainEncodingAdHoc, ArrowBinaryDirectPut) {
 
 // Check that one can put several Arrow arrays into a given encoder
 // and decode to the right values (see GH-36939)
-TEST(PlainBooleanArrayEncoding, AdHocRoundTrip) {
+TEST(BooleanArrayEncoding, AdHocRoundTrip) {
   std::vector<std::shared_ptr<::arrow::Array>> arrays{
       ::arrow::ArrayFromJSON(::arrow::boolean(), R"([])"),
       ::arrow::ArrayFromJSON(::arrow::boolean(), R"([false, null, true])"),
@@ -610,27 +610,173 @@ TEST(PlainBooleanArrayEncoding, AdHocRoundTrip) {
       ::arrow::ArrayFromJSON(::arrow::boolean(), R"([true, null, false])"),
   };
 
-  auto encoder = MakeTypedEncoder<BooleanType>(Encoding::PLAIN,
-                                               /*use_dictionary=*/false);
-  for (const auto& array : arrays) {
-    encoder->Put(*array);
+  for (auto encoding : {Encoding::PLAIN, Encoding::RLE}) {
+    auto encoder = MakeTypedEncoder<BooleanType>(encoding,
+                                                 /*use_dictionary=*/false);
+    for (const auto& array : arrays) {
+      encoder->Put(*array);
+    }
+    auto buffer = encoder->FlushValues();
+    auto decoder = MakeTypedDecoder<BooleanType>(encoding);
+    EXPECT_OK_AND_ASSIGN(auto expected, ::arrow::Concatenate(arrays));
+    decoder->SetData(static_cast<int>(expected->length()), buffer->data(),
+                     static_cast<int>(buffer->size()));
+
+    ::arrow::BooleanBuilder builder;
+    ASSERT_EQ(static_cast<int>(expected->length() - expected->null_count()),
+              decoder->DecodeArrow(static_cast<int>(expected->length()),
+                                   static_cast<int>(expected->null_count()),
+                                   expected->null_bitmap_data(), 0, &builder));
+
+    std::shared_ptr<::arrow::Array> result;
+    ASSERT_OK(builder.Finish(&result));
+    ASSERT_EQ(expected->length(), result->length());
+    ::arrow::AssertArraysEqual(*expected, *result, /*verbose=*/true);
   }
-  auto buffer = encoder->FlushValues();
-  auto decoder = MakeTypedDecoder<BooleanType>(Encoding::PLAIN);
-  EXPECT_OK_AND_ASSIGN(auto expected, ::arrow::Concatenate(arrays));
-  decoder->SetData(static_cast<int>(expected->length()), buffer->data(),
-                   static_cast<int>(buffer->size()));
+}
 
-  ::arrow::BooleanBuilder builder;
-  ASSERT_EQ(static_cast<int>(expected->length() - expected->null_count()),
-            decoder->DecodeArrow(static_cast<int>(expected->length()),
-                                 static_cast<int>(expected->null_count()),
-                                 expected->null_bitmap_data(), 0, &builder));
+class TestBooleanArrowDecoding : public ::testing::Test {
+ public:
+  // number of values including nulls
+  constexpr static int kNumValues = 10000;
 
-  std::shared_ptr<::arrow::Array> result;
-  ASSERT_OK(builder.Finish(&result));
-  ASSERT_EQ(expected->length(), result->length());
-  ::arrow::AssertArraysEqual(*expected, *result, /*verbose=*/true);
+  void SetUp() override {
+    null_probabilities_ = {0.0, 0.001, 0.5, 0.999, 1.0};
+    read_batch_sizes_ = {1024, 4096, 10000};
+    true_probabilities_ = {0.0, 0.001, 0.5, 0.999, 1.0};
+  }
+  void TearDown() override {}
+
+  void InitTestCase(Encoding::type encoding, double null_probability,
+                    double true_probability) {
+    GenerateInputData(null_probability, true_probability);
+    SetupEncoderDecoder(encoding);
+  }
+
+  void GenerateInputData(double null_probability, double true_probability) {
+    ::arrow::random::RandomArrayGenerator rag(0);
+    expected_dense_ = rag.Boolean(kNumValues, true_probability, null_probability);
+    null_count_ = static_cast<int>(expected_dense_->null_count());
+    valid_bits_ = expected_dense_->null_bitmap_data();
+
+    // Initialize input_data_ for the encoder from the expected_array_ values
+    const auto& boolean_array =
+        checked_cast<const ::arrow::BooleanArray&>(*expected_dense_);
+    input_data_.resize(boolean_array.length());
+
+    for (int64_t i = 0; i < boolean_array.length(); ++i) {
+      input_data_[i] = boolean_array.Value(i);
+    }
+  }
+
+  // Setup encoder/decoder pair for testing with boolean encoding
+  void SetupEncoderDecoder(Encoding::type encoding) {
+    encoder_ = MakeTypedEncoder<BooleanType>(encoding);
+    decoder_ = MakeTypedDecoder<BooleanType>(encoding);
+    const auto* data_ptr = reinterpret_cast<const bool*>(input_data_.data());
+    if (valid_bits_ != nullptr) {
+      ASSERT_NO_THROW(encoder_->PutSpaced(data_ptr, kNumValues, valid_bits_, 0));
+    } else {
+      ASSERT_NO_THROW(encoder_->Put(data_ptr, kNumValues));
+    }
+    buffer_ = encoder_->FlushValues();
+    ResetTheDecoder();
+  }
+
+  void ResetTheDecoder() {
+    ASSERT_NE(nullptr, buffer_);
+    decoder_->SetData(kNumValues, buffer_->data(), static_cast<int>(buffer_->size()));
+  }
+
+  void CheckDense(int actual_num_values, const ::arrow::Array& chunk) {
+    ASSERT_EQ(actual_num_values, kNumValues - null_count_);
+    ASSERT_ARRAYS_EQUAL(chunk, *expected_dense_);
+  }
+
+  void CheckDecodeArrow(Encoding::type encoding) {
+    for (double np : null_probabilities_) {
+      for (double true_prob : true_probabilities_) {
+        InitTestCase(encoding, np, true_prob);
+        for (int read_batch_size : this->read_batch_sizes_) {
+          ResetTheDecoder();
+
+          int num_values_left = kNumValues;
+          ::arrow::BooleanBuilder acc;
+          int actual_num_not_null_values = 0;
+          while (num_values_left > 0) {
+            int batch_size = std::min(num_values_left, read_batch_size);
+            ASSERT_NE(0, batch_size);
+            // Counting nulls
+            int64_t batch_null_count = 0;
+            if (null_count_ != 0) {
+              batch_null_count =
+                  batch_size - ::arrow::internal::CountSetBits(
+                                   valid_bits_, kNumValues - num_values_left, batch_size);
+            }
+            int batch_num_values =
+                decoder_->DecodeArrow(batch_size, static_cast<int>(batch_null_count),
+                                      valid_bits_, kNumValues - num_values_left, &acc);
+            actual_num_not_null_values += batch_num_values;
+            num_values_left -= batch_size;
+          }
+          std::shared_ptr<::arrow::Array> chunk;
+          ASSERT_OK(acc.Finish(&chunk));
+          CheckDense(actual_num_not_null_values, *chunk);
+        }
+      }
+    }
+  }
+
+  void CheckDecodeArrowNonNull(Encoding::type encoding) {
+    // NonNull skips tests for null_prob != 0.
+    for (auto true_prob : true_probabilities_) {
+      InitTestCase(encoding, /*null_probability=*/0, true_prob);
+      for (int read_batch_size : this->read_batch_sizes_) {
+        // Resume the decoder
+        ResetTheDecoder();
+        ::arrow::BooleanBuilder acc;
+        int actual_num_values = 0;
+        int num_values_left = kNumValues;
+        while (num_values_left > 0) {
+          int batch_size = std::min(num_values_left, read_batch_size);
+          int batch_num_values = decoder_->DecodeArrowNonNull(batch_size, &acc);
+          actual_num_values += batch_num_values;
+          num_values_left -= batch_num_values;
+        }
+        std::shared_ptr<::arrow::Array> chunk;
+        ASSERT_OK(acc.Finish(&chunk));
+        CheckDense(actual_num_values, *chunk);
+      }
+    }
+  }
+
+ protected:
+  std::vector<double> null_probabilities_;
+  std::vector<double> true_probabilities_;
+  std::vector<int> read_batch_sizes_;
+  std::shared_ptr<::arrow::Array> expected_dense_;
+  int null_count_{0};
+  std::vector<uint8_t> input_data_;
+  const uint8_t* valid_bits_ = NULLPTR;
+  std::unique_ptr<BooleanEncoder> encoder_;
+  std::unique_ptr<BooleanDecoder> decoder_;
+  std::shared_ptr<Buffer> buffer_;
+};
+
+TEST_F(TestBooleanArrowDecoding, CheckDecodeArrowUsingPlain) {
+  this->CheckDecodeArrow(Encoding::PLAIN);
+}
+
+TEST_F(TestBooleanArrowDecoding, CheckDecodeArrowNonNullPlain) {
+  this->CheckDecodeArrowNonNull(Encoding::PLAIN);
+}
+
+TEST_F(TestBooleanArrowDecoding, CheckDecodeArrowRle) {
+  this->CheckDecodeArrow(Encoding::RLE);
+}
+
+TEST_F(TestBooleanArrowDecoding, CheckDecodeArrowNonNullRle) {
+  this->CheckDecodeArrowNonNull(Encoding::RLE);
 }
 
 template <typename T>
@@ -963,8 +1109,6 @@ TYPED_TEST(EncodingAdHocTyped, ByteStreamSplitArrowDirectPut) {
 }
 
 TYPED_TEST(EncodingAdHocTyped, RleArrowDirectPut) {
-  // TODO: test with nulls once RleBooleanDecoder::DecodeArrow supports them
-  this->null_probability_ = 0;
   for (auto seed : {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) {
     this->Rle(seed);
   }
diff --git a/csharp/src/Apache.Arrow/Arrays/StringArray.cs b/csharp/src/Apache.Arrow/Arrays/StringArray.cs
index af77fe1b1a83d..a3ec596adc7ba 100644
--- a/csharp/src/Apache.Arrow/Arrays/StringArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/StringArray.cs
@@ -13,12 +13,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-using Apache.Arrow.Types;
 using System;
 using System.Collections;
 using System.Collections.Generic;
 using System.Runtime.InteropServices;
 using System.Text;
+using Apache.Arrow.Types;
 
 namespace Apache.Arrow
 {
@@ -26,6 +26,8 @@ public class StringArray: BinaryArray, IReadOnlyList<string>
     {
         public static readonly Encoding DefaultEncoding = Encoding.UTF8;
 
+        private Dictionary<Encoding, string[]> materializedStringStore;
+
         public new class Builder : BuilderBase<StringArray, Builder>
         {
             public Builder() : base(StringType.Default) { }
@@ -71,16 +73,28 @@ public StringArray(int length,
 
         public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor);
 
+        /// <summary>
+        /// Get the string value at the given index
+        /// </summary>
+        /// <param name="index">Input index</param>
+        /// <param name="encoding">Optional: the string encoding, default is UTF8</param>
+        /// <returns>The string object at the given index</returns>
         public string GetString(int index, Encoding encoding = default)
         {
             encoding ??= DefaultEncoding;
 
+            if (materializedStringStore != null && materializedStringStore.TryGetValue(encoding, out string[] materializedStrings))
+            {
+                return materializedStrings[index];
+            }
+
             ReadOnlySpan<byte> bytes = GetBytes(index, out bool isNull);
 
             if (isNull)
             {
                 return null;
             }
+
             if (bytes.Length == 0)
             {
                 return string.Empty;
@@ -93,6 +107,50 @@ public string GetString(int index, Encoding encoding = default)
             }
         }
 
+        /// <summary>
+        /// Materialize the array for the given encoding to accelerate the string access
+        /// </summary>
+        /// <param name="encoding">Optional: the string encoding, default is UTF8</param>
+        /// <remarks>This method is not thread safe when it is called in parallel with <see cref="GetString(int, Encoding)"/> or <see cref="Materialize(Encoding)"/>.</remarks>
+        public void Materialize(Encoding encoding = default)
+        {
+            encoding ??= DefaultEncoding;
+
+            if (IsMaterialized(encoding))
+            {
+                return;
+            }
+
+            if (materializedStringStore == null)
+            {
+                materializedStringStore = new Dictionary<Encoding, string[]>();
+            }
+
+            var stringStore = new string[Length];
+            for (int i = 0; i < Length; i++)
+            {
+                stringStore[i] = GetString(i, encoding);
+            }
+
+            materializedStringStore[encoding] = stringStore;
+        }
+
+        /// <summary>
+        /// Check if the array has been materialized for the given encoding
+        /// </summary>
+        /// <param name="encoding">Optional: the string encoding, default is UTF8</param>
+        /// <returns>True of false whether the array has been materialized</returns>
+        public bool IsMaterialized(Encoding encoding = default)
+        {
+            if (materializedStringStore == null)
+            {
+                return false;
+            }
+
+            encoding ??= DefaultEncoding;
+            return materializedStringStore.ContainsKey(encoding);
+        }
+
         int IReadOnlyCollection<string>.Count => Length;
 
         string IReadOnlyList<string>.this[int index] => GetString(index);
diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
index fbb2be661fc5d..abe02dcbb591f 100644
--- a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
@@ -17,6 +17,7 @@
 
 using System;
 using System.Collections.Generic;
+using System.Diagnostics;
 using System.Runtime.InteropServices;
 using Apache.Arrow.Memory;
 using Apache.Arrow.Types;
@@ -36,7 +37,7 @@ public static class CArrowArrayImporter
         /// Typically, you will allocate an uninitialized CArrowArray pointer,
         /// pass that to external function, and then use this method to import
         /// the result.
-        /// 
+        ///
         /// <code>
         /// CArrowArray* importedPtr = CArrowArray.Create();
         /// foreign_export_function(importedPtr);
@@ -71,7 +72,7 @@ public static unsafe IArrowArray ImportArray(CArrowArray* ptr, IArrowType type)
         /// Typically, you will allocate an uninitialized CArrowArray pointer,
         /// pass that to external function, and then use this method to import
         /// the result.
-        /// 
+        ///
         /// <code>
         /// CArrowArray* importedPtr = CArrowArray.Create();
         /// foreign_export_function(importedPtr);
@@ -256,6 +257,19 @@ private ArrowBuffer ImportValidityBuffer(CArrowArray* cArray)
                 return (cArray->buffers[0] == null) ? ArrowBuffer.Empty : new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[0], 0, validityLength));
             }
 
+            private ArrowBuffer ImportCArrayBuffer(CArrowArray* cArray, int i, int lengthBytes)
+            {
+                if (lengthBytes > 0)
+                {
+                    Debug.Assert(cArray->buffers[i] != null);
+                    return new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[i], 0, lengthBytes));
+                }
+                else
+                {
+                    return ArrowBuffer.Empty;
+                }
+            }
+
             private ArrowBuffer[] ImportByteArrayBuffers(CArrowArray* cArray)
             {
                 if (cArray->n_buffers != 3)
@@ -266,12 +280,13 @@ private ArrowBuffer[] ImportByteArrayBuffers(CArrowArray* cArray)
                 int length = checked((int)cArray->length);
                 int offsetsLength = (length + 1) * 4;
                 int* offsets = (int*)cArray->buffers[1];
+                Debug.Assert(offsets != null);
                 int valuesLength = offsets[length];
 
                 ArrowBuffer[] buffers = new ArrowBuffer[3];
                 buffers[0] = ImportValidityBuffer(cArray);
-                buffers[1] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[1], 0, offsetsLength));
-                buffers[2] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[2], 0, valuesLength));
+                buffers[1] = ImportCArrayBuffer(cArray, 1, offsetsLength);
+                buffers[2] = ImportCArrayBuffer(cArray, 2, valuesLength);
 
                 return buffers;
             }
@@ -289,10 +304,10 @@ private ArrowBuffer[] ImportByteArrayViewBuffers(CArrowArray* cArray)
                 long* bufferLengths = (long*)cArray->buffers[cArray->n_buffers - 1];
                 ArrowBuffer[] buffers = new ArrowBuffer[cArray->n_buffers - 1];
                 buffers[0] = ImportValidityBuffer(cArray);
-                buffers[1] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[1], 0, viewsLength));
+                buffers[1] = ImportCArrayBuffer(cArray, 1, viewsLength);
                 for (int i = 2; i < buffers.Length; i++)
                 {
-                    buffers[i] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[i], 0, checked((int)bufferLengths[i - 2])));
+                    buffers[i] = ImportCArrayBuffer(cArray, i, checked((int)bufferLengths[i - 2]));
                 }
 
                 return buffers;
@@ -310,7 +325,7 @@ private ArrowBuffer[] ImportListBuffers(CArrowArray* cArray)
 
                 ArrowBuffer[] buffers = new ArrowBuffer[2];
                 buffers[0] = ImportValidityBuffer(cArray);
-                buffers[1] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[1], 0, offsetsLength));
+                buffers[1] = ImportCArrayBuffer(cArray, 1, offsetsLength);
 
                 return buffers;
             }
@@ -327,8 +342,8 @@ private ArrowBuffer[] ImportListViewBuffers(CArrowArray* cArray)
 
                 ArrowBuffer[] buffers = new ArrowBuffer[3];
                 buffers[0] = ImportValidityBuffer(cArray);
-                buffers[1] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[1], 0, offsetsLength));
-                buffers[2] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[2], 0, offsetsLength));
+                buffers[1] = ImportCArrayBuffer(cArray, 1, offsetsLength);
+                buffers[2] = ImportCArrayBuffer(cArray, 2, offsetsLength);
 
                 return buffers;
             }
@@ -356,8 +371,8 @@ private ArrowBuffer[] ImportDenseUnionBuffers(CArrowArray* cArray)
                 int offsetsLength = length * 4;
 
                 ArrowBuffer[] buffers = new ArrowBuffer[2];
-                buffers[0] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[0], 0, length));
-                buffers[1] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[1], 0, offsetsLength));
+                buffers[0] = ImportCArrayBuffer(cArray, 0, length);
+                buffers[1] = ImportCArrayBuffer(cArray, 1, offsetsLength);
 
                 return buffers;
             }
@@ -370,7 +385,7 @@ private ArrowBuffer[] ImportSparseUnionBuffers(CArrowArray* cArray)
                 }
 
                 ArrowBuffer[] buffers = new ArrowBuffer[1];
-                buffers[0] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[0], 0, checked((int)cArray->length)));
+                buffers[0] = ImportCArrayBuffer(cArray, 0, checked((int)cArray->length));
 
                 return buffers;
             }
@@ -392,7 +407,7 @@ private ArrowBuffer[] ImportFixedWidthBuffers(CArrowArray* cArray, int bitWidth)
 
                 ArrowBuffer[] buffers = new ArrowBuffer[2];
                 buffers[0] = ImportValidityBuffer(cArray);
-                buffers[1] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[1], 0, valuesLength));
+                buffers[1] = ImportCArrayBuffer(cArray, 1, valuesLength);
 
                 return buffers;
             }
diff --git a/csharp/test/Apache.Arrow.Benchmarks/Apache.Arrow.Benchmarks.csproj b/csharp/test/Apache.Arrow.Benchmarks/Apache.Arrow.Benchmarks.csproj
index df76fd4a7b45f..d44b7488e3b17 100644
--- a/csharp/test/Apache.Arrow.Benchmarks/Apache.Arrow.Benchmarks.csproj
+++ b/csharp/test/Apache.Arrow.Benchmarks/Apache.Arrow.Benchmarks.csproj
@@ -6,8 +6,8 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="BenchmarkDotNet" Version="0.13.10" />
-    <PackageReference Include="BenchmarkDotNet.Diagnostics.Windows" Version="0.13.10" />
+    <PackageReference Include="BenchmarkDotNet" Version="0.13.12" />
+    <PackageReference Include="BenchmarkDotNet.Diagnostics.Windows" Version="0.13.12" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj b/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj
index 3aaebb103f9da..d7a2042a4581a 100644
--- a/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj
+++ b/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj
@@ -5,7 +5,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Grpc.AspNetCore" Version="2.59.0" />
+    <PackageReference Include="Grpc.AspNetCore" Version="2.62.0" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Tests/StringArrayTests.cs b/csharp/test/Apache.Arrow.Tests/StringArrayTests.cs
index 0fd3d3d105a70..b19731535a29d 100644
--- a/csharp/test/Apache.Arrow.Tests/StringArrayTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/StringArrayTests.cs
@@ -49,6 +49,37 @@ public void ReturnsAppendedValue(string firstValue, string secondValue)
                 // Assert
                 Assert.Equal(firstValue, retrievedValue);
             }
+
+            [Theory]
+            [InlineData(null, null)]
+            [InlineData(null, "")]
+            [InlineData(null, "value")]
+            [InlineData("", null)]
+            [InlineData("", "")]
+            [InlineData("", "value")]
+            [InlineData("value", null)]
+            [InlineData("value", "")]
+            [InlineData("value", "value")]
+            public void ReturnsAppendedValueMaterialize(string firstValue, string secondValue)
+            {
+                // Arrange
+                // Create an array with two elements. The second element being null,
+                // empty, or non-empty may influence the underlying BinaryArray
+                // storage such that retrieving an empty first element could result
+                // in an empty span or a 0-length span backed by storage.
+                var array = new StringArray.Builder()
+                    .Append(firstValue)
+                    .Append(secondValue)
+                    .Build();
+
+                // Act
+                array.Materialize();
+                var retrievedValue = array.GetString(0);
+
+                // Assert
+                Assert.True(array.IsMaterialized());
+                Assert.Equal(firstValue, retrievedValue);
+            }
         }
     }
 }
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index 32921afb2e61b..5fa41e28a3208 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -44,6 +44,7 @@
 
 @click.group(context_settings={"help_option_names": ["-h", "--help"]})
 @click.option("--debug", type=BOOL, is_flag=True, default=False,
+              envvar='ARCHERY_DEBUG',
               help="Increase logging with debugging output.")
 @click.option("--pdb", type=BOOL, is_flag=True, default=False,
               help="Invoke pdb on uncaught exception.")
diff --git a/dev/archery/archery/docker/cli.py b/dev/archery/archery/docker/cli.py
index 20d9a16138bac..7053db2afccff 100644
--- a/dev/archery/archery/docker/cli.py
+++ b/dev/archery/archery/docker/cli.py
@@ -46,10 +46,19 @@ def _execute(self, *args, **kwargs):
               callback=validate_arrow_sources,
               help="Specify Arrow source directory.")
 @click.option('--dry-run/--execute', default=False,
-              help="Display the docker-compose commands instead of executing "
-                   "them.")
+              help="Display the docker commands instead of executing them.")
+@click.option('--using-docker-cli', default=False, is_flag=True,
+              envvar='ARCHERY_USE_DOCKER_CLI',
+              help="Use docker CLI directly for building instead of calling "
+                   "docker-compose. This may help to reuse cached layers.")
+@click.option('--using-docker-buildx', default=False, is_flag=True,
+              envvar='ARCHERY_USE_DOCKER_BUILDX',
+              help="Use buildx with docker CLI directly for building instead "
+                   "of calling docker-compose or the plain docker build "
+                   "command. This option makes the build cache reusable "
+                   "across hosts.")
 @click.pass_context
-def docker(ctx, src, dry_run):
+def docker(ctx, src, dry_run, using_docker_cli, using_docker_buildx):
     """
     Interact with docker-compose based builds.
     """
@@ -64,8 +73,13 @@ def docker(ctx, src, dry_run):
 
     # take the docker-compose parameters like PYTHON, PANDAS, UBUNTU from the
     # environment variables to keep the usage similar to docker-compose
+    using_docker_cli |= using_docker_buildx
     compose = DockerCompose(config_path, params=os.environ,
-                            debug=ctx.obj.get('debug', False))
+                            using_docker=using_docker_cli,
+                            using_buildx=using_docker_buildx,
+                            debug=ctx.obj.get('debug', False),
+                            compose_bin=("docker compose" if using_docker_cli
+                                         else "docker-compose"))
     if dry_run:
         _mock_compose_calls(compose)
     ctx.obj['compose'] = compose
@@ -83,24 +97,19 @@ def check_config(obj):
 
 @docker.command('pull')
 @click.argument('image')
-@click.option('--using-docker-cli', default=False, is_flag=True,
-              envvar='ARCHERY_USE_DOCKER_CLI',
-              help="Use docker CLI directly for pulling instead of calling "
-                   "docker-compose. This may help to reuse cached layers.")
 @click.option('--pull-leaf/--no-leaf', default=True,
               help="Whether to pull leaf images too.")
 @click.option('--ignore-pull-failures/--no-ignore-pull-failures', default=True,
               help="Whether to ignore pull failures.")
 @click.pass_obj
-def docker_pull(obj, image, *, using_docker_cli, pull_leaf,
-                ignore_pull_failures):
+def docker_pull(obj, image, *, pull_leaf, ignore_pull_failures):
     """
     Execute docker-compose pull.
     """
     compose = obj['compose']
 
     try:
-        compose.pull(image, pull_leaf=pull_leaf, using_docker=using_docker_cli,
+        compose.pull(image, pull_leaf=pull_leaf,
                      ignore_pull_failures=ignore_pull_failures)
     except UndefinedImage as e:
         raise click.ClickException(
@@ -115,16 +124,6 @@ def docker_pull(obj, image, *, using_docker_cli, pull_leaf,
 @click.argument('image')
 @click.option('--force-pull/--no-pull', default=True,
               help="Whether to force pull the image and its ancestor images")
-@click.option('--using-docker-cli', default=False, is_flag=True,
-              envvar='ARCHERY_USE_DOCKER_CLI',
-              help="Use docker CLI directly for building instead of calling "
-                   "docker-compose. This may help to reuse cached layers.")
-@click.option('--using-docker-buildx', default=False, is_flag=True,
-              envvar='ARCHERY_USE_DOCKER_BUILDX',
-              help="Use buildx with docker CLI directly for building instead "
-                   "of calling docker-compose or the plain docker build "
-                   "command. This option makes the build cache reusable "
-                   "across hosts.")
 @click.option('--use-cache/--no-cache', default=True,
               help="Whether to use cache when building the image and its "
                    "ancestor images")
@@ -133,22 +132,17 @@ def docker_pull(obj, image, *, using_docker_cli, pull_leaf,
                    "passed as the argument. To disable caching for both the "
                    "image and its ancestors use --no-cache option.")
 @click.pass_obj
-def docker_build(obj, image, *, force_pull, using_docker_cli,
-                 using_docker_buildx, use_cache, use_leaf_cache):
+def docker_build(obj, image, *, force_pull, use_cache, use_leaf_cache):
     """
     Execute docker-compose builds.
     """
     compose = obj['compose']
 
-    using_docker_cli |= using_docker_buildx
     try:
         if force_pull:
-            compose.pull(image, pull_leaf=use_leaf_cache,
-                         using_docker=using_docker_cli)
+            compose.pull(image, pull_leaf=use_leaf_cache)
         compose.build(image, use_cache=use_cache,
                       use_leaf_cache=use_leaf_cache,
-                      using_docker=using_docker_cli,
-                      using_buildx=using_docker_buildx,
                       pull_parents=force_pull)
     except UndefinedImage as e:
         raise click.ClickException(
@@ -172,16 +166,6 @@ def docker_build(obj, image, *, force_pull, using_docker_cli,
               help="Whether to force build the image and its ancestor images")
 @click.option('--build-only', default=False, is_flag=True,
               help="Pull and/or build the image, but do not run it")
-@click.option('--using-docker-cli', default=False, is_flag=True,
-              envvar='ARCHERY_USE_DOCKER_CLI',
-              help="Use docker CLI directly for building instead of calling "
-                   "docker-compose. This may help to reuse cached layers.")
-@click.option('--using-docker-buildx', default=False, is_flag=True,
-              envvar='ARCHERY_USE_DOCKER_BUILDX',
-              help="Use buildx with docker CLI directly for building instead "
-                   "of calling docker-compose or the plain docker build "
-                   "command. This option makes the build cache reusable "
-                   "across hosts.")
 @click.option('--use-cache/--no-cache', default=True,
               help="Whether to use cache when building the image and its "
                    "ancestor images")
@@ -191,7 +175,7 @@ def docker_build(obj, image, *, force_pull, using_docker_cli,
                    "image and its ancestors use --no-cache option.")
 @click.option('--resource-limit', default=None,
               help="A CPU/memory limit preset to mimic CI environments like "
-                   "GitHub Actions. Implies --using-docker-cli. Note that "
+                   "GitHub Actions. Mandates --using-docker-cli. Note that "
                    "exporting ARCHERY_DOCKER_BIN=\"sudo docker\" is likely "
                    "required, unless Docker is configured with cgroups v2 "
                    "(else Docker will silently ignore the limits).")
@@ -199,8 +183,8 @@ def docker_build(obj, image, *, force_pull, using_docker_cli,
               help="Set volume within the container")
 @click.pass_obj
 def docker_run(obj, image, command, *, env, user, force_pull, force_build,
-               build_only, using_docker_cli, using_docker_buildx, use_cache,
-               use_leaf_cache, resource_limit, volume):
+               build_only, use_cache, use_leaf_cache, resource_limit,
+               volume):
     """
     Execute docker-compose builds.
 
@@ -234,18 +218,14 @@ def docker_run(obj, image, command, *, env, user, force_pull, force_build,
     archery docker run ubuntu-cpp bash
     """
     compose = obj['compose']
-    using_docker_cli |= using_docker_buildx
 
     env = dict(kv.split('=', 1) for kv in env)
     try:
         if force_pull:
-            compose.pull(image, pull_leaf=use_leaf_cache,
-                         using_docker=using_docker_cli)
+            compose.pull(image, pull_leaf=use_leaf_cache)
         if force_build:
             compose.build(image, use_cache=use_cache,
-                          use_leaf_cache=use_leaf_cache,
-                          using_docker=using_docker_cli,
-                          using_buildx=using_docker_buildx)
+                          use_leaf_cache=use_leaf_cache)
         if build_only:
             return
         compose.run(
@@ -253,7 +233,6 @@ def docker_run(obj, image, command, *, env, user, force_pull, force_build,
             command=command,
             env=env,
             user=user,
-            using_docker=using_docker_cli,
             resource_limit=resource_limit,
             volumes=volume
         )
@@ -273,15 +252,11 @@ def docker_run(obj, image, command, *, env, user, force_pull, force_build,
 @click.option('--password', '-p', required=False,
               envvar='ARCHERY_DOCKER_PASSWORD',
               help='Docker repository password')
-@click.option('--using-docker-cli', default=False, is_flag=True,
-              help="Use docker CLI directly for building instead of calling "
-                   "docker-compose. This may help to reuse cached layers.")
 @click.pass_obj
-def docker_compose_push(obj, image, user, password, using_docker_cli):
+def docker_compose_push(obj, image, user, password):
     """Push the generated docker-compose image."""
     compose = obj['compose']
-    compose.push(image, user=user, password=password,
-                 using_docker=using_docker_cli)
+    compose.push(image, user=user, password=password)
 
 
 @docker.command('images')
diff --git a/dev/archery/archery/docker/core.py b/dev/archery/archery/docker/core.py
index 184d9808759b8..0b49111dd6944 100644
--- a/dev/archery/archery/docker/core.py
+++ b/dev/archery/archery/docker/core.py
@@ -58,12 +58,21 @@ class UndefinedImage(Exception):
 
 class ComposeConfig:
 
-    def __init__(self, config_path, dotenv_path, compose_bin, params=None):
+    def __init__(self, config_path, dotenv_path, compose_bin,
+                 using_docker=False, using_buildx=False,
+                 params=None, debug=False):
+        self.using_docker = using_docker
+        self.using_buildx = using_buildx
+        self.debug = debug
         config_path = _ensure_path(config_path)
         if dotenv_path:
             dotenv_path = _ensure_path(dotenv_path)
         else:
             dotenv_path = config_path.parent / '.env'
+        if self.debug:
+            # Log docker version
+            Docker().run('version')
+
         self._read_env(dotenv_path, params)
         self._read_config(config_path, compose_bin)
 
@@ -122,8 +131,13 @@ def _read_config(self, config_path, compose_bin):
             )
 
         # trigger docker-compose's own validation
-        compose = Command('docker-compose')
-        args = ['--file', str(config_path), 'config']
+        if self.using_docker:
+            compose = Docker()
+            args = ['compose']
+        else:
+            compose = Command('docker-compose')
+            args = []
+        args += ['--file', str(config_path), 'config']
         result = compose.run(*args, env=self.env, check=False,
                              stderr=subprocess.PIPE, stdout=subprocess.PIPE)
 
@@ -164,12 +178,13 @@ def __init__(self, docker_bin=None):
 class DockerCompose(Command):
 
     def __init__(self, config_path, dotenv_path=None, compose_bin=None,
-                 params=None, debug=False):
+                 using_docker=False, using_buildx=False, params=None,
+                 debug=False):
         compose_bin = default_bin(compose_bin, 'docker-compose')
         self.config = ComposeConfig(config_path, dotenv_path, compose_bin,
-                                    params)
+                                    params=params, using_docker=using_docker,
+                                    using_buildx=using_buildx, debug=debug)
         self.bin = compose_bin
-        self.debug = debug
         self.pull_memory = set()
 
     def clear_pull_memory(self):
@@ -215,14 +230,13 @@ def _execute_docker(self, *args, **kwargs):
                 )
             )
 
-    def pull(self, service_name, pull_leaf=True, using_docker=False,
-             ignore_pull_failures=True):
+    def pull(self, service_name, pull_leaf=True, ignore_pull_failures=True):
         def _pull(service):
             args = ['pull']
             if service['image'] in self.pull_memory:
                 return
 
-            if using_docker:
+            if self.config.using_docker:
                 try:
                     self._execute_docker(*args, service['image'])
                 except Exception as e:
@@ -245,7 +259,7 @@ def _pull(service):
             _pull(service)
 
     def build(self, service_name, use_cache=True, use_leaf_cache=True,
-              using_docker=False, using_buildx=False, pull_parents=True):
+              pull_parents=True):
         def _build(service, use_cache):
             if 'build' not in service:
                 # nothing to do
@@ -273,7 +287,7 @@ def _build(service, use_cache):
             if self.config.env.get('BUILDKIT_INLINE_CACHE') == '1':
                 args.extend(['--build-arg', 'BUILDKIT_INLINE_CACHE=1'])
 
-            if using_buildx:
+            if self.config.using_buildx:
                 for k, v in service['build'].get('args', {}).items():
                     args.extend(['--build-arg', '{}={}'.format(k, v)])
 
@@ -295,9 +309,9 @@ def _build(service, use_cache):
                     service['build'].get('context', '.')
                 ])
                 self._execute_docker("buildx", "build", *args)
-            elif using_docker:
+            elif self.config.using_docker:
                 # better for caching
-                if self.debug:
+                if self.config.debug:
                     args.append("--progress=plain")
                 for k, v in service['build'].get('args', {}).items():
                     args.extend(['--build-arg', '{}={}'.format(k, v)])
@@ -310,7 +324,7 @@ def _build(service, use_cache):
                 ])
                 self._execute_docker("build", *args)
             else:
-                if self.debug:
+                if self.config.debug:
                     args.append("--progress=plain")
                 self._execute_compose("build", *args, service['name'])
 
@@ -322,7 +336,7 @@ def _build(service, use_cache):
         _build(service, use_cache=use_cache and use_leaf_cache)
 
     def run(self, service_name, command=None, *, env=None, volumes=None,
-            user=None, using_docker=False, resource_limit=None):
+            user=None, resource_limit=None):
         service = self.config.get(service_name)
 
         args = []
@@ -337,7 +351,7 @@ def run(self, service_name, command=None, *, env=None, volumes=None,
             for volume in volumes:
                 args.extend(['--volume', volume])
 
-        if using_docker or service['need_gpu'] or resource_limit:
+        if self.config.using_docker or service['need_gpu'] or resource_limit:
             # use gpus, requires docker>=19.03
             if service['need_gpu']:
                 args.extend(['--gpus', 'all'])
@@ -388,6 +402,10 @@ def run(self, service_name, command=None, *, env=None, volumes=None,
                     # on the docker-compose yaml file.
                     if isinstance(cmd, list):
                         cmd = shlex.join(cmd)
+                    # Match behaviour from docker compose
+                    # to interpolate environment variables
+                    # https://docs.docker.com/compose/compose-file/12-interpolation/
+                    cmd = cmd.replace("$$", "$")
                     args.extend(shlex.split(cmd))
 
             # execute as a plain docker cli command
@@ -399,9 +417,9 @@ def run(self, service_name, command=None, *, env=None, volumes=None,
                 args.append(command)
             self._execute_compose('run', '--rm', *args)
 
-    def push(self, service_name, user=None, password=None, using_docker=False):
+    def push(self, service_name, user=None, password=None):
         def _push(service):
-            if using_docker:
+            if self.config.using_docker:
                 return self._execute_docker('push', service['image'])
             else:
                 return self._execute_compose('push', service['name'])
diff --git a/dev/archery/archery/docker/tests/test_docker_cli.py b/dev/archery/archery/docker/tests/test_docker_cli.py
index ab39c7b9dbb4a..c117a3edfff65 100644
--- a/dev/archery/archery/docker/tests/test_docker_cli.py
+++ b/dev/archery/archery/docker/tests/test_docker_cli.py
@@ -33,14 +33,12 @@ def test_docker_run_with_custom_command(run, build, pull):
 
     assert result.exit_code == 0
     pull.assert_called_once_with(
-        "ubuntu-cpp", pull_leaf=True, using_docker=False
+        "ubuntu-cpp", pull_leaf=True,
     )
     build.assert_called_once_with(
         "ubuntu-cpp",
         use_cache=True,
         use_leaf_cache=True,
-        using_docker=False,
-        using_buildx=False
     )
     run.assert_called_once_with(
         "ubuntu-cpp",
@@ -48,7 +46,6 @@ def test_docker_run_with_custom_command(run, build, pull):
         env={},
         resource_limit=None,
         user=None,
-        using_docker=False,
         volumes=(),
     )
 
@@ -75,14 +72,12 @@ def test_docker_run_options(run, build, pull):
     result = CliRunner().invoke(docker, args)
     assert result.exit_code == 0
     pull.assert_called_once_with(
-        "ubuntu-cpp", pull_leaf=True, using_docker=False
+        "ubuntu-cpp", pull_leaf=True,
     )
     build.assert_called_once_with(
         "ubuntu-cpp",
         use_cache=True,
         use_leaf_cache=True,
-        using_docker=False,
-        using_buildx=False
     )
     run.assert_called_once_with(
         "ubuntu-cpp",
@@ -90,7 +85,6 @@ def test_docker_run_options(run, build, pull):
         env={"ARROW_GANDIVA": "OFF", "ARROW_FLIGHT": "ON"},
         resource_limit=None,
         user="root",
-        using_docker=False,
         volumes=(
             "./build:/build",
             "./ccache:/ccache:delegated",
@@ -126,7 +120,6 @@ def test_docker_limit_options(run):
         env={"ARROW_GANDIVA": "OFF", "ARROW_FLIGHT": "ON"},
         resource_limit="github",
         user="root",
-        using_docker=False,
         volumes=(
             "./build:/build",
             "./ccache:/ccache:delegated",
@@ -145,7 +138,6 @@ def test_docker_run_without_pulling_or_building(run):
         env={},
         resource_limit=None,
         user=None,
-        using_docker=False,
         volumes=(),
     )
 
@@ -157,14 +149,12 @@ def test_docker_run_only_pulling_and_building(build, pull):
     result = CliRunner().invoke(docker, args)
     assert result.exit_code == 0
     pull.assert_called_once_with(
-        "ubuntu-cpp", pull_leaf=True, using_docker=False
+        "ubuntu-cpp", pull_leaf=True,
     )
     build.assert_called_once_with(
         "ubuntu-cpp",
         use_cache=True,
         use_leaf_cache=True,
-        using_docker=False,
-        using_buildx=False
     )
 
 
@@ -187,8 +177,6 @@ def test_docker_run_without_build_cache(run, build):
         "ubuntu-cpp",
         use_cache=False,
         use_leaf_cache=False,
-        using_docker=False,
-        using_buildx=False
     )
     run.assert_called_once_with(
         "ubuntu-cpp",
@@ -196,6 +184,5 @@ def test_docker_run_without_build_cache(run, build):
         env={},
         resource_limit=None,
         user="me",
-        using_docker=False,
         volumes=(),
     )
diff --git a/dev/archery/requirements-test.txt b/dev/archery/requirements-test.txt
index 208ec64cdf026..e3e62a993c2a2 100644
--- a/dev/archery/requirements-test.txt
+++ b/dev/archery/requirements-test.txt
@@ -1,2 +1,3 @@
+docker-compose
 pytest
 responses
diff --git a/dev/tasks/docker-tests/azure.linux.yml b/dev/tasks/docker-tests/azure.linux.yml
index be03957e925ed..b66bfbdfe940a 100644
--- a/dev/tasks/docker-tests/azure.linux.yml
+++ b/dev/tasks/docker-tests/azure.linux.yml
@@ -46,7 +46,7 @@ jobs:
     displayName: Setup Archery
 
   - script: |
-      archery docker run \
+      archery --debug docker --using-docker-cli run \
         -e ARROW_DOCS_VERSION="{{ arrow.no_rc_version }}" \
         -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" \
         {{ flags|default("") }} \
diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml
index 03cbcc7c98fcc..0437ee7864979 100644
--- a/dev/tasks/java-jars/github.yml
+++ b/dev/tasks/java-jars/github.yml
@@ -30,6 +30,7 @@ jobs:
       ARCH: {{ '${{ matrix.platform.archery_arch }}' }}
       ARCH_ALIAS: {{ '${{ matrix.platform.archery_arch_alias }}' }}
       ARCH_SHORT: {{ '${{ matrix.platform.archery_arch_short }}' }}
+      ARCHERY_USE_DOCKER_CLI: {{ "${{matrix.platform.archery_use_docker_cli || '1'}}" }}
     strategy:
       fail-fast: false
       matrix:
@@ -44,6 +45,7 @@ jobs:
             archery_arch: "arm64v8"
             archery_arch_alias: "aarch64"
             archery_arch_short: "arm64"
+            archery_use_docker_cli: "0"
     steps:
       {{ macros.github_checkout_arrow()|indent }}
       {{ macros.github_free_space()|indent }}
diff --git a/dev/tasks/linux-packages/github.linux.yml b/dev/tasks/linux-packages/github.linux.yml
index 6de3edfce07e1..9e24835b8b627 100644
--- a/dev/tasks/linux-packages/github.linux.yml
+++ b/dev/tasks/linux-packages/github.linux.yml
@@ -29,6 +29,7 @@ jobs:
     {% endif %}
     env:
       ARCHITECTURE: {{ architecture }}
+      ARCHERY_USE_DOCKER_CLI: {{ '0' if architecture == 'arm64' else '1' }}
     steps:
       {{ macros.github_checkout_arrow()|indent }}
       {{ macros.github_login_dockerhub()|indent }}
diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja
index bcafe53066ef8..f55a7f9481e56 100644
--- a/dev/tasks/macros.jinja
+++ b/dev/tasks/macros.jinja
@@ -23,6 +23,10 @@ on:
   push:
     branches:
       - "*-github-*"
+
+env:
+  ARCHERY_DEBUG: 1
+  ARCHERY_USE_DOCKER_CLI: 1
 {% endmacro %}
 
 {%- macro github_checkout_arrow(fetch_depth=1, submodules="recursive", action_v="4") -%}
diff --git a/dev/tasks/python-wheels/github.linux.yml b/dev/tasks/python-wheels/github.linux.yml
index 41b18684cee10..0ff3c56b695eb 100644
--- a/dev/tasks/python-wheels/github.linux.yml
+++ b/dev/tasks/python-wheels/github.linux.yml
@@ -31,8 +31,10 @@ jobs:
       # archery uses these environment variables
       {% if arch == "amd64" %}
       ARCH: amd64
+      ARCHERY_USE_DOCKER_CLI: 1
       {% else %}
       ARCH: arm64v8
+      ARCHERY_USE_DOCKER_CLI: 0
       {% endif %}
       PYTHON: "{{ python_version }}"
 
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 5e1ef8d13b988..f98c0a2b48caa 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -80,6 +80,7 @@ groups:
 
   java:
     - "*java*"
+    - test-*spark*
 
   python:
     - test-*python*
@@ -1090,6 +1091,7 @@ tasks:
     template: docker-tests/github.linux.yml
     params:
       env:
+        ARCHERY_USE_DOCKER_CLI: 0
         UBUNTU: 20.04
       flags: -e ARROW_SKYHOOK=ON
       image: ubuntu-cpp
@@ -1164,6 +1166,14 @@ tasks:
       flags: "-e ARROW_CSV=ON -e ARROW_PARQUET=ON"
       image: ubuntu-cpp-minimal
 
+  test-ubuntu-22.04-cpp-emscripten:
+    ci: github
+    template: docker-tests/github.linux.yml
+    params:
+      env:
+        UBUNTU: 22.04
+      image: ubuntu-cpp-emscripten
+
 {% for python_version in ["3.8", "3.9", "3.10", "3.11", "3.12"] %}
   test-conda-python-{{ python_version }}:
     ci: github
@@ -1456,12 +1466,16 @@ tasks:
     ci: github
     template: docker-tests/github.cuda.yml
     params:
+      env:
+        ARCHERY_USE_DOCKER_CLI: 0
       image: ubuntu-cuda-cpp
 
   test-cuda-python:
     ci: github
     template: docker-tests/github.cuda.yml
     params:
+      env:
+        ARCHERY_USE_DOCKER_CLI: 0
       image: ubuntu-cuda-python
 
   ############################## Fuzz tests #################################
@@ -1522,8 +1536,9 @@ tasks:
     template: docker-tests/github.linux.yml
     params:
       env:
-        PYTHON: "3.10"
+        ARCHERY_USE_DOCKER_CLI: 0
         HDFS: "{{ hdfs_version }}"
+        PYTHON: "3.10"
       image: conda-python-hdfs
 {% endfor %}
 
diff --git a/docker-compose.yml b/docker-compose.yml
index 9b0610fe553b5..46717557bc337 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -157,6 +157,7 @@ x-hierarchy:
   - ubuntu-csharp
   - ubuntu-cpp-sanitizer
   - ubuntu-cpp-thread-sanitizer
+  - ubuntu-cpp-emscripten
   - ubuntu-r-sanitizer
   - ubuntu-r-valgrind
   - ubuntu-swift
@@ -652,6 +653,31 @@ services:
       ARROW_USE_TSAN: "ON"
     command: *cpp-command
 
+  ubuntu-cpp-emscripten:
+    # Usage:
+    #   docker-compose build ubuntu-cpp-emscripten
+    #   docker-compose run --rm ubuntu-cpp-emscripten
+    # Parameters:
+    #   ARCH: amd64, arm64v8, ...
+    #   UBUNTU: 22.04
+    image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp
+    build:
+      context: .
+      dockerfile: ci/docker/ubuntu-${UBUNTU}-cpp.dockerfile
+      cache_from:
+        - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp
+      args:
+        arch: ${ARCH}
+        clang_tools: ${CLANG_TOOLS}
+        llvm: ${LLVM}
+    shm_size: *shm-size
+    volumes: *ubuntu-volumes
+    environment:
+      <<: [*common, *ccache, *sccache, *cpp]
+      ARROW_EMSCRIPTEN: "ON"
+      UBUNTU:
+    command: *cpp-command
+
   fedora-cpp:
     # Usage:
     #   docker-compose build fedora-cpp
diff --git a/docs/source/conf.py b/docs/source/conf.py
index ad8fa798d6aac..05340dc923c89 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -346,9 +346,9 @@
             "icon": "fa-brands fa-square-github",
         },
         {
-            "name": "Twitter",
+            "name": "X",
             "url": "https://twitter.com/ApacheArrow",
-            "icon": "fa-brands fa-square-twitter",
+            "icon": "fa-brands fa-square-x-twitter",
         },
     ],
     "show_version_warning_banner": True,
diff --git a/docs/source/developers/cpp/emscripten.rst b/docs/source/developers/cpp/emscripten.rst
new file mode 100644
index 0000000000000..b4c563aae1a3b
--- /dev/null
+++ b/docs/source/developers/cpp/emscripten.rst
@@ -0,0 +1,99 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+
+.. highlight:: console
+
+.. _developers-cpp-emscripten:
+===============================================
+Cross compiling for WebAssembly with Emscripten
+===============================================
+
+Prerequisites
+-------------
+You need CMake and compilers etc. installed as per the normal build instructions. Before building with Emscripten, you also need to install Emscripten and
+activate it using the commands below (see https://emscripten.org/docs/getting_started/downloads.html for details).
+
+.. code:: shell
+
+   git clone https://github.com/emscripten-core/emsdk.git
+   cd emsdk
+   # replace <version> with the desired EMSDK version.
+   # e.g. for Pyodide 0.24, you need EMSDK version 3.1.45
+   ./emsdk install <version>
+   ./emsdk activate <version>
+   source ./emsdk_env.sh
+
+If you want to build PyArrow for `Pyodide <https://pyodide.org>`_, you
+need ``pyodide-build`` installed via ``pip``, and to be running with the
+same version of Python that Pyodide is built for, along with the same
+versions of emsdk tools.
+
+.. code:: shell
+
+   # install Pyodide build tools.
+   # e.g. for version 0.24 of Pyodide:
+   pip install pyodide-build==0.24
+
+Then build with the ``ninja-release-emscripten`` CMake preset,
+like below:
+
+.. code:: shell
+
+   emcmake cmake --preset "ninja-release-emscripten"
+   ninja install
+
+This will install a built static library version of ``libarrow`` it into the
+Emscripten sysroot cache, meaning you can build things that depend on it
+and they will find ``libarrow``.
+
+e.g. if you want to build for Pyodide, run the commands above, and then
+go to ``arrow/python`` and run
+
+.. code:: shell
+
+   pyodide build
+
+It should make a wheel targeting the currently enabled version of
+Pyodide (i.e. the version corresponding to the currently installed
+``pyodide-build``) in the ``dist`` subdirectory.
+
+
+Manual Build
+------------
+
+If you want to manually build for Emscripten, take a look at the
+``CMakePresets.json`` file in the ``arrow/cpp`` directory for a list of things
+you will need to override. In particular you will need:
+
+#. Build dependencies set to ``BUNDLED``, so it uses properly cross
+   compiled build dependencies.
+
+#. ``CMAKE_TOOLCHAIN_FILE`` set by using ``emcmake cmake`` instead of just ``cmake``.
+
+#. You will quite likely need to set ``ARROW_ENABLE_THREADING`` to ``OFF``
+   for builds targeting single threaded Emscripten environments such as
+   Pyodide.
+
+#. ``ARROW_FLIGHT`` and anything else that uses network probably won't
+   work.
+
+#. ``ARROW_JEMALLOC`` and ``ARROW_MIMALLOC`` again probably need to be
+   ``OFF``
+
+#. ``ARROW_BUILD_STATIC`` set to ``ON`` and ``ARROW_BUILD_SHARED`` set to
+   ``OFF`` is most likely to work.
diff --git a/docs/source/developers/cpp/index.rst b/docs/source/developers/cpp/index.rst
index 36c9778bea1b0..603e1607dc543 100644
--- a/docs/source/developers/cpp/index.rst
+++ b/docs/source/developers/cpp/index.rst
@@ -27,5 +27,6 @@ C++ Development
    building
    development
    windows
+   emscripten
    conventions
    fuzzing
diff --git a/docs/source/java/install.rst b/docs/source/java/install.rst
index 7ac1a4990f37d..a551edc36c477 100644
--- a/docs/source/java/install.rst
+++ b/docs/source/java/install.rst
@@ -29,8 +29,8 @@ Java modules are regularly built and tested on macOS and Linux distributions.
 Java Compatibility
 ==================
 
-Java modules are compatible with JDK 8 and above.
-Currently, JDK 8, 11, 17, and 21 are tested in CI.
+Java modules are compatible with JDK 8 and above. Currently, JDK versions
+8, 11, 17, and 21 are tested in CI. The latest JDK is also tested in CI.
 
 When using Java 9 or later, some JDK internals must be exposed by
 adding ``--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED`` to the ``java`` command:
@@ -61,7 +61,7 @@ Modifying the command above for Flight:
    $ env _JAVA_OPTIONS="--add-reads=org.apache.arrow.flight.core=ALL-UNNAMED --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED" java -jar ...
 
 Otherwise, you may see errors like ``java.lang.IllegalAccessError: superclass access check failed: class
-org.apache.arrow.flight.ArrowMessage$ArrowBufRetainingCompositeByteBuf (in module org.apache.arrow.flight.core) 
+org.apache.arrow.flight.ArrowMessage$ArrowBufRetainingCompositeByteBuf (in module org.apache.arrow.flight.core)
 cannot access class io.netty.buffer.CompositeByteBuf (in unnamed module ...) because module
 org.apache.arrow.flight.core does not read unnamed module ...
 
@@ -74,8 +74,8 @@ Modifying the command above for arrow-memory:
    # Indirectly via environment variables
    $ env _JAVA_OPTIONS="--add-opens=java.base/java.nio=org.apache.arrow.dataset,org.apache.arrow.memory.core,ALL-UNNAMED" java -jar ...
 
-Otherwise you may see errors such as ``java.lang.RuntimeException: java.lang.reflect.InaccessibleObjectException: 
-Unable to make static void java.nio.Bits.reserveMemory(long,long) accessible: module 
+Otherwise you may see errors such as ``java.lang.RuntimeException: java.lang.reflect.InaccessibleObjectException:
+Unable to make static void java.nio.Bits.reserveMemory(long,long) accessible: module
 java.base does not "opens java.nio" to module org.apache.arrow.dataset``
 
 If using Maven and Surefire for unit testing, :ref:`this argument must
diff --git a/docs/source/status.rst b/docs/source/status.rst
index 9af2fd1921e22..dc60b311a1489 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -40,7 +40,7 @@ Data Types
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | UInt8/16/32/64    | ✓     | ✓     | ✓     | ✓          |  ✓    |  ✓    | ✓     | ✓     |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| Float16           | ✓ (1) | ✓ (2) | ✓     | ✓          |  ✓ (3)|  ✓    | ✓     |       |
+| Float16           | ✓     | ✓ (1) | ✓     | ✓          |  ✓ (2)|  ✓    | ✓     |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Float32/64        | ✓     | ✓     | ✓     | ✓          |  ✓    |  ✓    | ✓     | ✓     |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
@@ -104,22 +104,31 @@ Data Types
 | Data type         | C++   | Java  | Go    | JavaScript | C#    | Rust  | Julia | Swift |
 | (special)         |       |       |       |            |       |       |       |       |
 +===================+=======+=======+=======+============+=======+=======+=======+=======+
-| Dictionary        | ✓     | ✓ (4) | ✓     | ✓          | ✓     | ✓ (3) | ✓     |       |
+| Dictionary        | ✓     | ✓ (3) | ✓     | ✓          | ✓     | ✓ (3) | ✓     |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Extension         | ✓     | ✓     | ✓     |            |       | ✓     | ✓     |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Run-End Encoded   | ✓     |       | ✓     |            |       |       |       |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 
++-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
+| Canonical             | C++   | Java  | Go    | JavaScript | C#    | Rust  | Julia | Swift |
+| Extension types       |       |       |       |            |       |       |       |       |
++=======================+=======+=======+=======+============+=======+=======+=======+=======+
+| Fixed shape tensor    | ✓     |       |       |            |       |       |       |       |
++-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
+| Variable shape tensor |       |       |       |            |       |       |       |       |
++-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
+
 Notes:
 
-* \(1) Casting to/from Float16 in C++ is not supported.
-* \(2) Casting to/from Float16 in Java is not supported.
-* \(3) Float16 support in C# is only available when targeting .NET 6+.
-* \(4) Nested dictionaries not supported
+* \(1) Casting to/from Float16 in Java is not supported.
+* \(2) Float16 support in C# is only available when targeting .NET 6+.
+* \(3) Nested dictionaries not supported
 
 .. seealso::
-   The :ref:`format_columnar` specification.
+   The :ref:`format_columnar` and the
+   :ref:`format_canonical_extensions` specification.
 
 
 IPC Format
diff --git a/go/go.mod b/go/go.mod
index 9975ecfc69d34..af7c937800ff1 100644
--- a/go/go.mod
+++ b/go/go.mod
@@ -35,9 +35,9 @@ require (
 	github.com/stretchr/testify v1.9.0
 	github.com/zeebo/xxh3 v1.0.2
 	golang.org/x/exp v0.0.0-20240222234643-814bf88cf225
-	golang.org/x/sync v0.6.0
-	golang.org/x/sys v0.18.0
-	golang.org/x/tools v0.19.0
+	golang.org/x/sync v0.7.0
+	golang.org/x/sys v0.19.0
+	golang.org/x/tools v0.20.0
 	golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028
 	gonum.org/v1/gonum v0.15.0
 	google.golang.org/grpc v1.62.1
@@ -75,8 +75,8 @@ require (
 	github.com/tidwall/gjson v1.14.2 // indirect
 	github.com/tidwall/match v1.1.1 // indirect
 	github.com/tidwall/pretty v1.2.0 // indirect
-	golang.org/x/mod v0.16.0 // indirect
-	golang.org/x/net v0.22.0 // indirect
+	golang.org/x/mod v0.17.0 // indirect
+	golang.org/x/net v0.24.0 // indirect
 	golang.org/x/text v0.14.0 // indirect
 	google.golang.org/genproto/googleapis/rpc v0.0.0-20240123012728-ef4313101c80 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
diff --git a/go/go.sum b/go/go.sum
index 462c43021a29e..9d42ed49ab57c 100644
--- a/go/go.sum
+++ b/go/go.sum
@@ -113,25 +113,25 @@ github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ=
 github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0=
 github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0=
 github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA=
-golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA=
-golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs=
+golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30=
+golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M=
 golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 h1:LfspQV/FYTatPTr/3HzIcmiUFH7PGP+OQ6mgDYo3yuQ=
 golang.org/x/exp v0.0.0-20240222234643-814bf88cf225/go.mod h1:CxmFvTBINI24O/j8iY7H1xHzx2i4OsyguNBmN/uPtqc=
-golang.org/x/mod v0.16.0 h1:QX4fJ0Rr5cPQCF7O9lh9Se4pmwfwskqZfq5moyldzic=
-golang.org/x/mod v0.16.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
-golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc=
-golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
-golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ=
-golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA=
+golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w=
+golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8=
+golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
+golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
-golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o=
+golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
 golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
-golang.org/x/tools v0.19.0 h1:tfGCXNR1OsFG+sVdLAitlpjAvD/I6dHDKnYrpEZUHkw=
-golang.org/x/tools v0.19.0/go.mod h1:qoJWxmGSIBmAeriMx19ogtrEPrGtDbPK634QFIcLAhc=
+golang.org/x/tools v0.20.0 h1:hz/CVckiOxybQvFw6h7b/q80NTr9IUQb4s1IIzW7KNY=
+golang.org/x/tools v0.20.0/go.mod h1:WvitBU7JJf6A4jOdg4S1tviW9bhUxkgeCui/0JHctQg=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU=
 golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml
index 830caf8a28246..9ea6393f0f6df 100644
--- a/java/flight/flight-core/pom.xml
+++ b/java/flight/flight-core/pom.xml
@@ -164,7 +164,7 @@
           issues in the arrow-tools tests looking up FlatBuffer
           dependencies.
          -->
-        <version>3.5.2</version>
+        <version>3.2.4</version>
         <executions>
           <execution>
             <id>shade-main</id>
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java
index d873f7d2828d0..dc545c131828a 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java
@@ -42,6 +42,7 @@
 import org.apache.arrow.flight.auth2.Auth2Constants;
 import org.apache.arrow.flight.auth2.CallHeaderAuthenticator;
 import org.apache.arrow.flight.auth2.ServerCallHeaderAuthMiddleware;
+import org.apache.arrow.flight.grpc.ServerBackpressureThresholdInterceptor;
 import org.apache.arrow.flight.grpc.ServerInterceptorAdapter;
 import org.apache.arrow.flight.grpc.ServerInterceptorAdapter.KeyFactory;
 import org.apache.arrow.memory.BufferAllocator;
@@ -79,6 +80,9 @@ public class FlightServer implements AutoCloseable {
   /** The maximum size of an individual gRPC message. This effectively disables the limit. */
   static final int MAX_GRPC_MESSAGE_SIZE = Integer.MAX_VALUE;
 
+  /** The default number of bytes that can be queued on an output stream before blocking. */
+  public static final int DEFAULT_BACKPRESSURE_THRESHOLD = 10 * 1024 * 1024; // 10MB
+
   /** Create a new instance from a gRPC server. For internal use only. */
   private FlightServer(Location location, Server server, ExecutorService grpcExecutor) {
     this.location = location;
@@ -179,6 +183,7 @@ public static final class Builder {
     private CallHeaderAuthenticator headerAuthenticator = CallHeaderAuthenticator.NO_OP;
     private ExecutorService executor = null;
     private int maxInboundMessageSize = MAX_GRPC_MESSAGE_SIZE;
+    private int backpressureThreshold = DEFAULT_BACKPRESSURE_THRESHOLD;
     private InputStream certChain;
     private InputStream key;
     private InputStream mTlsCACert;
@@ -300,6 +305,7 @@ public FlightServer build() {
           .addService(
               ServerInterceptors.intercept(
                   flightService,
+                  new ServerBackpressureThresholdInterceptor(backpressureThreshold),
                   new ServerAuthInterceptor(authHandler)));
 
       // Allow hooking into the gRPC builder. This is not guaranteed to be available on all Arrow versions or
@@ -336,6 +342,15 @@ public Builder maxInboundMessageSize(int maxMessageSize) {
       return this;
     }
 
+    /**
+     * Set the number of bytes that may be queued on a server output stream before writes are blocked.
+     */
+    public Builder backpressureThreshold(int backpressureThreshold) {
+      Preconditions.checkArgument(backpressureThreshold > 0);
+      this.backpressureThreshold = backpressureThreshold;
+      return this;
+    }
+
     /**
      * A small utility function to ensure that InputStream attributes.
      * are closed if they are not null
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ServerBackpressureThresholdInterceptor.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ServerBackpressureThresholdInterceptor.java
new file mode 100644
index 0000000000000..bd42fbc8ad6a4
--- /dev/null
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ServerBackpressureThresholdInterceptor.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.grpc;
+
+import io.grpc.Metadata;
+import io.grpc.ServerCall;
+import io.grpc.ServerCallHandler;
+import io.grpc.ServerInterceptor;
+
+/**
+ * An interceptor for specifying the number of bytes that can be queued before a call with an output stream
+ * gets blocked by backpressure.
+ */
+public class ServerBackpressureThresholdInterceptor implements ServerInterceptor {
+
+  private final int numBytes;
+
+  public ServerBackpressureThresholdInterceptor(int numBytes) {
+    this.numBytes = numBytes;
+  }
+
+  @Override
+  public <ReqT, RespT> ServerCall.Listener<ReqT> interceptCall(ServerCall<ReqT, RespT> call, Metadata headers,
+       ServerCallHandler<ReqT, RespT> next) {
+    call.setOnReadyThreshold(numBytes);
+    return next.startCall(call, headers);
+  }
+}
diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadata.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadata.java
index d68b8070e2bb7..4af3e55ee11c0 100644
--- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadata.java
+++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadata.java
@@ -49,6 +49,7 @@
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Optional;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
@@ -197,31 +198,36 @@ public boolean isReadOnly() throws SQLException {
   @Override
   public String getSQLKeywords() throws SQLException {
     return convertListSqlInfoToString(
-        getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_KEYWORDS, List.class));
+        getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_KEYWORDS, List.class))
+        .orElse("");
   }
 
   @Override
   public String getNumericFunctions() throws SQLException {
     return convertListSqlInfoToString(
-        getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_NUMERIC_FUNCTIONS, List.class));
+        getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_NUMERIC_FUNCTIONS, List.class))
+        .orElse("");
   }
 
   @Override
   public String getStringFunctions() throws SQLException {
     return convertListSqlInfoToString(
-        getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_STRING_FUNCTIONS, List.class));
+        getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_STRING_FUNCTIONS, List.class))
+        .orElse("");
   }
 
   @Override
   public String getSystemFunctions() throws SQLException {
     return convertListSqlInfoToString(
-        getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_SYSTEM_FUNCTIONS, List.class));
+        getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_SYSTEM_FUNCTIONS, List.class))
+        .orElse("");
   }
 
   @Override
   public String getTimeDateFunctions() throws SQLException {
     return convertListSqlInfoToString(
-        getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_DATETIME_FUNCTIONS, List.class));
+        getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_DATETIME_FUNCTIONS, List.class))
+        .orElse("");
   }
 
   @Override
@@ -753,8 +759,12 @@ private <T> T getSqlInfoAndCacheIfCacheIsEmpty(final SqlInfo sqlInfoCommand,
     return desiredType.cast(cachedSqlInfo.get(sqlInfoCommand));
   }
 
-  private String convertListSqlInfoToString(final List<?> sqlInfoList) {
-    return sqlInfoList.stream().map(Object::toString).collect(Collectors.joining(", "));
+  private Optional<String> convertListSqlInfoToString(final List<?> sqlInfoList) {
+    if (sqlInfoList == null) {
+      return Optional.empty();
+    } else {
+      return Optional.of(sqlInfoList.stream().map(Object::toString).collect(Collectors.joining(", ")));
+    }
   }
 
   private boolean getSqlInfoEnumOptionAndCacheIfCacheIsEmpty(
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadataTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadataTest.java
index 0d930f4c44e1f..51334c77486bf 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadataTest.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadataTest.java
@@ -95,9 +95,14 @@
 public class ArrowDatabaseMetadataTest {
   public static final boolean EXPECTED_MAX_ROW_SIZE_INCLUDES_BLOBS = false;
   private static final MockFlightSqlProducer FLIGHT_SQL_PRODUCER = new MockFlightSqlProducer();
+  private static final MockFlightSqlProducer FLIGHT_SQL_PRODUCER_EMPTY_SQLINFO =
+      new MockFlightSqlProducer();
   @ClassRule
   public static final FlightServerTestRule FLIGHT_SERVER_TEST_RULE = FlightServerTestRule
       .createStandardTestRule(FLIGHT_SQL_PRODUCER);
+  @ClassRule
+  public static final FlightServerTestRule FLIGHT_SERVER_EMPTY_SQLINFO_TEST_RULE =
+      FlightServerTestRule.createStandardTestRule(FLIGHT_SQL_PRODUCER_EMPTY_SQLINFO);
   private static final int ROW_COUNT = 10;
   private static final List<List<Object>> EXPECTED_GET_CATALOGS_RESULTS =
       range(0, ROW_COUNT)
@@ -604,7 +609,7 @@ public static void setUpBeforeClass() throws SQLException {
 
   @AfterClass
   public static void tearDown() throws Exception {
-    AutoCloseables.close(connection, FLIGHT_SQL_PRODUCER);
+    AutoCloseables.close(connection, FLIGHT_SQL_PRODUCER, FLIGHT_SQL_PRODUCER_EMPTY_SQLINFO);
   }
 
 
@@ -1420,4 +1425,16 @@ public void testSqlToRegexLike() {
     Assert.assertEquals("\\*", ArrowDatabaseMetadata.sqlToRegexLike("*"));
     Assert.assertEquals("T\\*E.S.*T", ArrowDatabaseMetadata.sqlToRegexLike("T*E_S%T"));
   }
+
+  @Test
+  public void testEmptySqlInfo() throws Exception {
+    try (final Connection testConnection = FLIGHT_SERVER_EMPTY_SQLINFO_TEST_RULE.getConnection(false)) {
+      final DatabaseMetaData metaData = testConnection.getMetaData();
+      collector.checkThat(metaData.getSQLKeywords(), is(""));
+      collector.checkThat(metaData.getNumericFunctions(), is(""));
+      collector.checkThat(metaData.getStringFunctions(), is(""));
+      collector.checkThat(metaData.getSystemFunctions(), is(""));
+      collector.checkThat(metaData.getTimeDateFunctions(), is(""));
+    }
+  }
 }
diff --git a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestNettyAllocator.java b/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestNettyAllocator.java
index 07fdc3f784e43..a6da36bb35aa7 100644
--- a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestNettyAllocator.java
+++ b/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestNettyAllocator.java
@@ -69,9 +69,12 @@ public void testMemoryUsage() {
           break;
         }
       }
-      assertTrue("Log messages are:\n" +
+      synchronized (memoryLogsAppender.list) {
+        assertTrue("Log messages are:\n" +
               memoryLogsAppender.list.stream().map(ILoggingEvent::toString).collect(Collectors.joining("\n")),
-          result);
+            result);
+      }
+
     } finally {
       memoryLogsAppender.stop();
       logger.detachAppender(memoryLogsAppender);
diff --git a/java/pom.xml b/java/pom.xml
index 8e9ddd5480ea8..9892061677d09 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -34,7 +34,7 @@
     <dep.slf4j.version>2.0.11</dep.slf4j.version>
     <dep.guava-bom.version>33.0.0-jre</dep.guava-bom.version>
     <dep.netty-bom.version>4.1.108.Final</dep.netty-bom.version>
-    <dep.grpc-bom.version>1.62.2</dep.grpc-bom.version>
+    <dep.grpc-bom.version>1.63.0</dep.grpc-bom.version>
     <dep.protobuf-bom.version>3.23.1</dep.protobuf-bom.version>
     <dep.jackson-bom.version>2.17.0</dep.jackson-bom.version>
     <dep.hadoop.version>3.4.0</dep.hadoop.version>
@@ -46,7 +46,7 @@
     <errorprone.javac.version>9+181-r4173-1</errorprone.javac.version>
     <error_prone_core.version>2.24.0</error_prone_core.version>
     <maven-compiler-plugin.version>3.12.1</maven-compiler-plugin.version>
-    <mockito.core.version>5.5.0</mockito.core.version>
+    <mockito.core.version>5.11.0</mockito.core.version>
     <mockito.inline.version>5.2.0</mockito.inline.version>
     <checker.framework.version>3.42.0</checker.framework.version>
   </properties>
@@ -449,7 +449,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-shade-plugin</artifactId>
-          <version>3.5.2</version>
+          <version>3.5.1</version>
         </plugin>
         <plugin>
           <artifactId>maven-surefire-plugin</artifactId>
diff --git a/java/vector/pom.xml b/java/vector/pom.xml
index 20af3dbd38443..436ffd15b297d 100644
--- a/java/vector/pom.xml
+++ b/java/vector/pom.xml
@@ -179,7 +179,7 @@
           issues in the arrow-tools tests looking up FlatBuffer
           dependencies.
          -->
-        <version>3.5.2</version>
+        <version>3.2.4</version>
         <executions>
           <execution>
             <phase>package</phase>
diff --git a/java/vector/src/main/codegen/templates/DenseUnionVector.java b/java/vector/src/main/codegen/templates/DenseUnionVector.java
index c23caf3bb5a03..42e96f7aca335 100644
--- a/java/vector/src/main/codegen/templates/DenseUnionVector.java
+++ b/java/vector/src/main/codegen/templates/DenseUnionVector.java
@@ -124,7 +124,7 @@ public class DenseUnionVector extends AbstractContainerVector implements FieldVe
           ArrowType.Struct.INSTANCE, /*dictionary*/ null, /*metadata*/ null);
 
   public static DenseUnionVector empty(String name, BufferAllocator allocator) {
-    FieldType fieldType = FieldType.nullable(new ArrowType.Union(
+    FieldType fieldType = FieldType.notNullable(new ArrowType.Union(
             UnionMode.Dense, null));
     return new DenseUnionVector(name, allocator, fieldType, null);
   }
@@ -676,10 +676,12 @@ public void splitAndTransfer(int startIndex, int length) {
 
       for (int i = startIndex; i < startIndex + length; i++) {
         byte typeId = typeBuffer.getByte(i);
-        to.offsetBuffer.setInt((long) (i - startIndex) * OFFSET_WIDTH, typeCounts[typeId]);
-        typeCounts[typeId] += 1;
-        if (typeStarts[typeId] == -1) {
-          typeStarts[typeId] = offsetBuffer.getInt((long) i * OFFSET_WIDTH);
+        if (typeId >= 0) {
+          to.offsetBuffer.setInt((long) (i - startIndex) * OFFSET_WIDTH, typeCounts[typeId]);
+          typeCounts[typeId] += 1;
+          if (typeStarts[typeId] == -1) {
+            typeStarts[typeId] = offsetBuffer.getInt((long) i * OFFSET_WIDTH);
+          }
         }
       }
 
@@ -908,6 +910,14 @@ private int getTypeBufferValueCapacity() {
     return (int) typeBuffer.capacity() / TYPE_WIDTH;
   }
 
+  public void setOffset(int index, int offset) {
+    while (index >= getOffsetBufferValueCapacity()) {
+      reallocOffsetBuffer();
+    }
+
+    offsetBuffer.setInt((long) index * OFFSET_WIDTH, offset);
+  }
+
   private long getOffsetBufferValueCapacity() {
     return offsetBuffer.capacity() / OFFSET_WIDTH;
   }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java
index 34c9e73a0b072..2ef6e4bd8b374 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java
@@ -763,16 +763,14 @@ public void transferTo(BaseLargeVariableWidthVector target) {
    */
   public void splitAndTransferTo(int startIndex, int length,
                                  BaseLargeVariableWidthVector target) {
-    Preconditions.checkArgument(startIndex >= 0 && startIndex < valueCount,
-        "Invalid startIndex: %s", startIndex);
-    Preconditions.checkArgument(startIndex + length <= valueCount,
-        "Invalid length: %s", length);
+    Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
+        "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
     compareTypes(target, "splitAndTransferTo");
     target.clear();
-    splitAndTransferValidityBuffer(startIndex, length, target);
-    splitAndTransferOffsetBuffer(startIndex, length, target);
-    target.setLastSet(length - 1);
     if (length > 0) {
+      splitAndTransferValidityBuffer(startIndex, length, target);
+      splitAndTransferOffsetBuffer(startIndex, length, target);
+      target.setLastSet(length - 1);
       target.setValueCount(length);
     }
   }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
index 6b82dd7729a6c..d533629cdd44e 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
@@ -808,10 +808,10 @@ public void splitAndTransferTo(int startIndex, int length,
         "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
     compareTypes(target, "splitAndTransferTo");
     target.clear();
-    splitAndTransferValidityBuffer(startIndex, length, target);
-    splitAndTransferOffsetBuffer(startIndex, length, target);
-    target.setLastSet(length - 1);
     if (length > 0) {
+      splitAndTransferValidityBuffer(startIndex, length, target);
+      splitAndTransferOffsetBuffer(startIndex, length, target);
+      target.setLastSet(length - 1);
       target.setValueCount(length);
     }
   }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
index 91275ae73d2c3..220f177659038 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
@@ -552,21 +552,23 @@ public void transfer() {
     public void splitAndTransfer(int startIndex, int length) {
       Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
           "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
-      final int startPoint = offsetBuffer.getInt(startIndex * OFFSET_WIDTH);
-      final int sliceLength = offsetBuffer.getInt((startIndex + length) * OFFSET_WIDTH) - startPoint;
       to.clear();
-      to.offsetBuffer = to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH);
-      /* splitAndTransfer offset buffer */
-      for (int i = 0; i < length + 1; i++) {
-        final int relativeOffset = offsetBuffer.getInt((startIndex + i) * OFFSET_WIDTH) - startPoint;
-        to.offsetBuffer.setInt(i * OFFSET_WIDTH, relativeOffset);
+      if (length > 0) {
+        final int startPoint = offsetBuffer.getInt(startIndex * OFFSET_WIDTH);
+        final int sliceLength = offsetBuffer.getInt((startIndex + length) * OFFSET_WIDTH) - startPoint;
+        to.offsetBuffer = to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH);
+        /* splitAndTransfer offset buffer */
+        for (int i = 0; i < length + 1; i++) {
+          final int relativeOffset = offsetBuffer.getInt((startIndex + i) * OFFSET_WIDTH) - startPoint;
+          to.offsetBuffer.setInt(i * OFFSET_WIDTH, relativeOffset);
+        }
+        /* splitAndTransfer validity buffer */
+        splitAndTransferValidityBuffer(startIndex, length, to);
+        /* splitAndTransfer data buffer */
+        dataTransferPair.splitAndTransfer(startPoint, sliceLength);
+        to.lastSet = length - 1;
+        to.setValueCount(length);
       }
-      /* splitAndTransfer validity buffer */
-      splitAndTransferValidityBuffer(startIndex, length, to);
-      /* splitAndTransfer data buffer */
-      dataTransferPair.splitAndTransfer(startPoint, sliceLength);
-      to.lastSet = length - 1;
-      to.setValueCount(length);
     }
 
     /*
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/WriteChannel.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/WriteChannel.java
index 9ad71f6fe8847..73bc2ecb12714 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/WriteChannel.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/WriteChannel.java
@@ -105,9 +105,6 @@ public long align() throws IOException {
    */
   public long write(ByteBuffer buffer) throws IOException {
     long length = buffer.remaining();
-    if (LOGGER.isDebugEnabled()) {
-      LOGGER.debug("Writing buffer with size: {}", length);
-    }
     while (buffer.hasRemaining()) {
       out.write(buffer);
     }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java
index 8fd33eb5a8432..0621fd4527520 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java
@@ -99,6 +99,44 @@ public void testDenseUnionVector() throws Exception {
     }
   }
 
+  @Test
+  public void testSetOffset() {
+    try (DenseUnionVector duv = DenseUnionVector.empty("foo", allocator)) {
+      duv.allocateNew();
+      byte i32TypeId = duv.registerNewTypeId(Field.notNullable("i32", MinorType.INT.getType()));
+      byte f64TypeId = duv.registerNewTypeId(Field.notNullable("f64", MinorType.FLOAT8.getType()));
+
+      IntVector i32Vector = ((IntVector) duv.addVector(i32TypeId, new IntVector("i32", allocator)));
+      Float8Vector f64Vector = ((Float8Vector) duv.addVector(f64TypeId, new Float8Vector("f64", allocator)));
+
+      i32Vector.allocateNew(3);
+      f64Vector.allocateNew(1);
+
+      duv.setTypeId(0, i32TypeId);
+      duv.setOffset(0, 0);
+      i32Vector.set(0, 42);
+
+      duv.setTypeId(1, i32TypeId);
+      duv.setOffset(1, 1);
+      i32Vector.set(1, 43);
+
+      duv.setTypeId(2, f64TypeId);
+      duv.setOffset(2, 0);
+      f64Vector.set(0, 3.14);
+
+      duv.setTypeId(3, i32TypeId);
+      duv.setOffset(3, 2);
+      i32Vector.set(2, 44);
+
+      duv.setValueCount(4);
+
+      assertEquals(42, duv.getObject(0));
+      assertEquals(43, duv.getObject(1));
+      assertEquals(3.14, duv.getObject(2));
+      assertEquals(44, duv.getObject(3));
+    }
+  }
+
   @Test
   public void testTransfer() throws Exception {
     try (DenseUnionVector srcVector = new DenseUnionVector(EMPTY_SCHEMA_PATH, allocator, null, null)) {
@@ -325,6 +363,34 @@ public void testSplitAndTransferWithMixedVectors() throws Exception {
     }
   }
 
+  @Test
+  public void testSplitAndTransferDuvInStruct() {
+    try (StructVector struct = StructVector.empty("struct", allocator)) {
+      DenseUnionVector duv = struct.addOrGet("duv",
+          FieldType.notNullable(MinorType.DENSEUNION.getType()),
+          DenseUnionVector.class);
+      byte i32TypeId = duv.registerNewTypeId(Field.notNullable("i32", MinorType.INT.getType()));
+      duv.addVector(i32TypeId, new IntVector("i32", allocator));
+
+      struct.setIndexDefined(0);
+      duv.setTypeId(0, i32TypeId);
+      duv.setSafe(0, newIntHolder(42));
+
+      struct.setNull(1);
+      struct.setValueCount(2);
+
+      try (StructVector dest = StructVector.empty("dest", allocator)) {
+        TransferPair pair = struct.makeTransferPair(dest);
+        pair.splitAndTransfer(0, 2);
+
+        assertEquals(2, dest.getValueCount());
+        assertFalse(dest.isNull(0));
+        assertEquals(42, dest.getObject(0).get("duv"));
+        assertTrue(dest.isNull(1));
+      }
+    }
+  }
+
   @Test
   public void testGetFieldTypeInfo() throws Exception {
     Map<String, String> metadata = new HashMap<>();
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java
index 62d09da86d652..06b27a9eba156 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java
@@ -166,7 +166,7 @@ public void testInvalidStartIndex() {
           IllegalArgumentException.class,
           () -> tp.splitAndTransfer(valueCount, 10));
 
-      assertEquals("Invalid startIndex: 500", e.getMessage());
+      assertEquals("Invalid parameters startIndex: 500, length: 10 for valueCount: 500", e.getMessage());
     }
   }
 
@@ -185,7 +185,7 @@ public void testInvalidLength() {
           IllegalArgumentException.class,
           () -> tp.splitAndTransfer(0, valueCount * 2));
 
-      assertEquals("Invalid length: 1000", e.getMessage());
+      assertEquals("Invalid parameters startIndex: 0, length: 1000 for valueCount: 500", e.getMessage());
     }
   }
 
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java
index 3580a321f01c9..396f5665e0382 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java
@@ -28,6 +28,7 @@
 
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.complex.DenseUnionVector;
 import org.apache.arrow.vector.complex.FixedSizeListVector;
 import org.apache.arrow.vector.complex.ListVector;
 import org.apache.arrow.vector.complex.MapVector;
@@ -49,7 +50,7 @@ public class TestSplitAndTransfer {
   public void init() {
     allocator = new RootAllocator(Long.MAX_VALUE);
   }
-  
+
   @After
   public void terminate() throws Exception {
     allocator.close();
@@ -65,7 +66,116 @@ private void populateVarcharVector(final VarCharVector vector, int valueCount, S
     }
     vector.setValueCount(valueCount);
   }
-  
+
+  private void populateIntVector(final IntVector vector, int valueCount) {
+    for (int i = 0; i < valueCount; i++) {
+      vector.set(i, i);
+    }
+    vector.setValueCount(valueCount);
+  }
+
+  private void populateDenseUnionVector(final DenseUnionVector vector, int valueCount) {
+    VarCharVector varCharVector = vector.addOrGet("varchar", FieldType.nullable(new ArrowType.Utf8()),
+            VarCharVector.class);
+    BigIntVector intVector = vector.addOrGet("int",
+            FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+
+    for (int i = 0; i < valueCount; i++) {
+      vector.setTypeId(i, (byte) (i % 2));
+      if (i % 2 == 0) {
+        final String s = String.format("%010d", i);
+        varCharVector.setSafe(i / 2, s.getBytes(StandardCharsets.UTF_8));
+      } else {
+        intVector.setSafe(i / 2, i);
+      }
+    }
+    vector.setValueCount(valueCount);
+  }
+
+  @Test
+  public void testWithEmptyVector() {
+    // MapVector use TransferImpl from ListVector
+    ListVector listVector = ListVector.empty("", allocator);
+    TransferPair transferPair = listVector.getTransferPair(allocator);
+    transferPair.splitAndTransfer(0, 0);
+    assertEquals(0, transferPair.getTo().getValueCount());
+    // BaseFixedWidthVector
+    IntVector intVector = new IntVector("", allocator);
+    transferPair = intVector.getTransferPair(allocator);
+    transferPair.splitAndTransfer(0, 0);
+    assertEquals(0, transferPair.getTo().getValueCount());
+    // BaseVariableWidthVector
+    VarCharVector varCharVector = new VarCharVector("", allocator);
+    transferPair = varCharVector.getTransferPair(allocator);
+    transferPair.splitAndTransfer(0, 0);
+    assertEquals(0, transferPair.getTo().getValueCount());
+    // BaseLargeVariableWidthVector
+    LargeVarCharVector largeVarCharVector = new LargeVarCharVector("", allocator);
+    transferPair = largeVarCharVector.getTransferPair(allocator);
+    transferPair.splitAndTransfer(0, 0);
+    assertEquals(0, transferPair.getTo().getValueCount());
+    // StructVector
+    StructVector structVector = StructVector.empty("", allocator);
+    transferPair = structVector.getTransferPair(allocator);
+    transferPair.splitAndTransfer(0, 0);
+    assertEquals(0, transferPair.getTo().getValueCount());
+    // FixedSizeListVector
+    FixedSizeListVector fixedSizeListVector = FixedSizeListVector.empty("", 0, allocator);
+    transferPair = fixedSizeListVector.getTransferPair(allocator);
+    transferPair.splitAndTransfer(0, 0);
+    assertEquals(0, transferPair.getTo().getValueCount());
+    // FixedSizeBinaryVector
+    FixedSizeBinaryVector fixedSizeBinaryVector = new FixedSizeBinaryVector("", allocator, 4);
+    transferPair = fixedSizeBinaryVector.getTransferPair(allocator);
+    transferPair.splitAndTransfer(0, 0);
+    assertEquals(0, transferPair.getTo().getValueCount());
+    // UnionVector
+    UnionVector unionVector = UnionVector.empty("", allocator);
+    transferPair = unionVector.getTransferPair(allocator);
+    transferPair.splitAndTransfer(0, 0);
+    assertEquals(0, transferPair.getTo().getValueCount());
+    // DenseUnionVector
+    DenseUnionVector duv = DenseUnionVector.empty("", allocator);
+    transferPair = duv.getTransferPair(allocator);
+    transferPair.splitAndTransfer(0, 0);
+    assertEquals(0, transferPair.getTo().getValueCount());
+
+    // non empty from vector
+
+    // BaseFixedWidthVector
+    IntVector fromIntVector = new IntVector("", allocator);
+    fromIntVector.allocateNew(100);
+    populateIntVector(fromIntVector, 100);
+    transferPair = fromIntVector.getTransferPair(allocator);
+    IntVector toIntVector = (IntVector) transferPair.getTo();
+    transferPair.splitAndTransfer(0, 0);
+    assertEquals(0, toIntVector.getValueCount());
+
+    transferPair.splitAndTransfer(50, 0);
+    assertEquals(0, toIntVector.getValueCount());
+
+    transferPair.splitAndTransfer(100, 0);
+    assertEquals(0, toIntVector.getValueCount());
+    fromIntVector.clear();
+    toIntVector.clear();
+
+    // DenseUnionVector
+    DenseUnionVector fromDuv = DenseUnionVector.empty("", allocator);
+    populateDenseUnionVector(fromDuv, 100);
+    transferPair = fromDuv.getTransferPair(allocator);
+    DenseUnionVector toDUV = (DenseUnionVector) transferPair.getTo();
+    transferPair.splitAndTransfer(0, 0);
+    assertEquals(0, toDUV.getValueCount());
+
+    transferPair.splitAndTransfer(50, 0);
+    assertEquals(0, toDUV.getValueCount());
+
+    transferPair.splitAndTransfer(100, 0);
+    assertEquals(0, toDUV.getValueCount());
+    fromDuv.clear();
+    toDUV.clear();
+  }
+
   @Test /* VarCharVector */
   public void test() throws Exception {
     try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) {
@@ -73,13 +183,13 @@ public void test() throws Exception {
   
       final int valueCount = 500;
       final String[] compareArray = new String[valueCount];
-  
+
       populateVarcharVector(varCharVector, valueCount, compareArray);
-  
+
       final TransferPair tp = varCharVector.getTransferPair(allocator);
       final VarCharVector newVarCharVector = (VarCharVector) tp.getTo();
       final int[][] startLengths = {{0, 201}, {201, 0}, {201, 200}, {401, 99}};
-  
+
       for (final int[] startLength : startLengths) {
         final int start = startLength[0];
         final int length = startLength[1];
@@ -429,5 +539,4 @@ public void testMapVectorZeroStartIndexAndLength() {
       newMapVector.clear();
     }
   }
-
 }
diff --git a/js/package.json b/js/package.json
index bb70fd0a395b0..773cf23e35b36 100644
--- a/js/package.json
+++ b/js/package.json
@@ -11,7 +11,7 @@
     "build": "cross-env NODE_NO_WARNINGS=1 gulp build",
     "clean": "cross-env NODE_NO_WARNINGS=1 gulp clean",
     "debug": "cross-env NODE_NO_WARNINGS=1 gulp debug",
-    "perf": "perf/index.ts",
+    "perf": "node --no-warnings --loader ts-node/esm/transpile-only perf/index.ts",
     "test:integration": "bin/integration.ts --mode validate",
     "release": "./npm-release.sh",
     "clean:all": "yarn clean && yarn clean:testdata",
diff --git a/js/src/data.ts b/js/src/data.ts
index 6f8792508858b..45fcc35d37676 100644
--- a/js/src/data.ts
+++ b/js/src/data.ts
@@ -109,7 +109,10 @@ export class Data<T extends DataType = DataType> {
         let nullCount = this._nullCount;
         let nullBitmap: Uint8Array | undefined;
         if (nullCount <= kUnknownNullCount && (nullBitmap = this.nullBitmap)) {
-            this._nullCount = nullCount = this.length - popcnt_bit_range(nullBitmap, this.offset, this.offset + this.length);
+            this._nullCount = nullCount = nullBitmap.length === 0 ?
+                // no null bitmap, so all values are valid
+                0 :
+                this.length - popcnt_bit_range(nullBitmap, this.offset, this.offset + this.length);
         }
         return nullCount;
     }
@@ -177,16 +180,16 @@ export class Data<T extends DataType = DataType> {
                 // if we have a nullBitmap, truncate + slice and set it over the pre-filled 1s
                 if (this.nullCount > 0) {
                     nullBitmap.set(truncateBitmap(offset, length, this.nullBitmap), 0);
+                    Object.assign(this, { nullBitmap });
+                } else {
+                    Object.assign(this, { nullBitmap, _nullCount: 0 });
                 }
-                Object.assign(this, { nullBitmap, _nullCount: -1 });
             }
 
             const byte = nullBitmap[byteOffset];
 
             prev = (byte & mask) !== 0;
-            value ?
-                (nullBitmap[byteOffset] = byte | mask) :
-                (nullBitmap[byteOffset] = byte & ~mask);
+            nullBitmap[byteOffset] = value ? (byte | mask) : (byte & ~mask);
         }
 
         if (prev !== !!value) {
diff --git a/js/src/factories.ts b/js/src/factories.ts
index aa54498c50bc0..657ae1b95ab92 100644
--- a/js/src/factories.ts
+++ b/js/src/factories.ts
@@ -65,7 +65,7 @@ export function makeBuilder<T extends dtypes.DataType = any, TNull = any>(option
 export function vectorFromArray(values: readonly (null | undefined)[], type?: dtypes.Null): Vector<dtypes.Null>;
 export function vectorFromArray(values: readonly (null | undefined | boolean)[], type?: dtypes.Bool): Vector<dtypes.Bool>;
 export function vectorFromArray<T extends dtypes.Utf8 | dtypes.Dictionary<dtypes.Utf8> = dtypes.Dictionary<dtypes.Utf8, dtypes.Int32>>(values: readonly (null | undefined | string)[], type?: T): Vector<T>;
-export function vectorFromArray<T extends dtypes.Date_>(values: readonly (null | undefined | Date)[], type?: T): Vector<T>;
+export function vectorFromArray<T extends dtypes.TimestampMillisecond>(values: readonly (null | undefined | Date)[], type?: T): Vector<T>;
 export function vectorFromArray<T extends dtypes.Int>(values: readonly (null | undefined | number)[], type: T): Vector<T>;
 export function vectorFromArray<T extends dtypes.Int64 | dtypes.Uint64 = dtypes.Int64>(values: readonly (null | undefined | bigint)[], type?: T): Vector<T>;
 export function vectorFromArray<T extends dtypes.Float = dtypes.Float64>(values: readonly (null | undefined | number)[], type?: T): Vector<T>;
@@ -145,7 +145,7 @@ function inferType(value: readonly unknown[]): dtypes.DataType {
     } else if (booleansCount + nullsCount === value.length) {
         return new dtypes.Bool;
     } else if (datesCount + nullsCount === value.length) {
-        return new dtypes.DateMillisecond;
+        return new dtypes.TimestampMillisecond;
     } else if (arraysCount + nullsCount === value.length) {
         const array = value as Array<unknown>[];
         const childType = inferType(array[array.findIndex((ary) => ary != null)]);
diff --git a/js/src/type.ts b/js/src/type.ts
index ae3aefa025999..a42552d65ad27 100644
--- a/js/src/type.ts
+++ b/js/src/type.ts
@@ -349,7 +349,19 @@ export class Date_<T extends Dates = Dates> extends DataType<T> {
 
 /** @ignore */
 export class DateDay extends Date_<Type.DateDay> { constructor() { super(DateUnit.DAY); } }
-/** @ignore */
+/**
+ * A signed 64-bit date representing the elapsed time since UNIX epoch (1970-01-01) in milliseconds.
+ * According to the specification, this should be treated as the number of days, in milliseconds,  since the UNIX epoch.
+ * Therefore, values must be evenly divisible by `86_400_000` (the number of milliseconds in a standard day).
+ *
+ * Practically, validation that values of this type are evenly divisible by `86_400_000` is not enforced by this library
+ * for performance and usability reasons.
+ *
+ * Users should prefer to use {@link DateDay} to cleanly represent the number of days. For JS dates,
+ * {@link TimestampMillisecond} is the preferred type.
+ *
+ * @ignore
+ */
 export class DateMillisecond extends Date_<Type.DateMillisecond> { constructor() { super(DateUnit.MILLISECOND); } }
 
 /** @ignore */
diff --git a/js/src/vector.ts b/js/src/vector.ts
index a7c103bc326ee..1b0d9a05796f0 100644
--- a/js/src/vector.ts
+++ b/js/src/vector.ts
@@ -445,7 +445,7 @@ export function makeVector(init: any) {
             if (init instanceof DataView) {
                 init = new Uint8Array(init.buffer);
             }
-            const props = { offset: 0, length: init.length, nullCount: 0, data: init };
+            const props = { offset: 0, length: init.length, nullCount: -1, data: init };
             if (init instanceof Int8Array) { return new Vector([makeData({ ...props, type: new dtypes.Int8 })]); }
             if (init instanceof Int16Array) { return new Vector([makeData({ ...props, type: new dtypes.Int16 })]); }
             if (init instanceof Int32Array) { return new Vector([makeData({ ...props, type: new dtypes.Int32 })]); }
diff --git a/js/test/unit/vector/date-vector-tests.ts b/js/test/unit/vector/date-vector-tests.ts
index f8b4c1c7976d2..e5cd49933eac5 100644
--- a/js/test/unit/vector/date-vector-tests.ts
+++ b/js/test/unit/vector/date-vector-tests.ts
@@ -15,10 +15,19 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { DateDay, DateMillisecond, RecordBatchReader, Table, vectorFromArray } from 'apache-arrow';
+import { DateDay, DateMillisecond, TimestampMillisecond, RecordBatchReader, Table, vectorFromArray } from 'apache-arrow';
+
+describe(`TimestampVector`, () => {
+    test(`Dates are stored in TimestampMillisecond`, () => {
+        const date = new Date('2023-02-01T12:34:56Z');
+        const vec = vectorFromArray([date]);
+        expect(vec.type).toBeInstanceOf(TimestampMillisecond);
+        expect(vec.get(0)).toBe(date.valueOf());
+    });
+});
 
 describe(`DateVector`, () => {
-    it('returns days since the epoch as correct JS Dates', () => {
+    test(`returns days since the epoch as correct JS Dates`, () => {
         const table = new Table(RecordBatchReader.from(test_data));
         const expectedMillis = expectedMillis32();
         const date32 = table.getChildAt<DateDay>(0)!;
@@ -28,7 +37,7 @@ describe(`DateVector`, () => {
         }
     });
 
-    it('returns millisecond longs since the epoch as correct JS Dates', () => {
+    test(`returns millisecond longs since the epoch as correct JS Dates`, () => {
         const table = new Table(RecordBatchReader.from(test_data));
         const expectedMillis = expectedMillis64();
         const date64 = table.getChildAt<DateMillisecond>(1)!;
@@ -38,9 +47,9 @@ describe(`DateVector`, () => {
         }
     });
 
-    it('returns the same date that was in the vector', () => {
+    test(`returns the same date that was in the vector`, () => {
         const dates = [new Date(1950, 1, 0)];
-        const vec = vectorFromArray(dates);
+        const vec = vectorFromArray(dates, new DateMillisecond());
         for (const date of vec) {
             expect(date).toEqual(dates.shift());
         }
diff --git a/js/test/unit/vector/vector-tests.ts b/js/test/unit/vector/vector-tests.ts
index bfcf0d8547861..a10d7c757ca17 100644
--- a/js/test/unit/vector/vector-tests.ts
+++ b/js/test/unit/vector/vector-tests.ts
@@ -16,7 +16,7 @@
 // under the License.
 
 import {
-    Bool, DateDay, DateMillisecond, Dictionary, Float64, Int32, List, makeVector, Struct, Timestamp, TimeUnit, Utf8, LargeUtf8, util, Vector, vectorFromArray
+    Bool, DateDay, DateMillisecond, Dictionary, Float64, Int32, List, makeVector, Struct, Timestamp, TimeUnit, Utf8, LargeUtf8, util, Vector, vectorFromArray, makeData
 } from 'apache-arrow';
 
 describe(`makeVectorFromArray`, () => {
@@ -33,6 +33,47 @@ describe(`makeVectorFromArray`, () => {
     });
 });
 
+describe(`basic vector methods`, () => {
+    test(`not nullable`, () => {
+        const vector = makeVector([makeData({ data: new Int32Array([1, 2, 3]), nullCount: -1, type: new Int32() })]);
+        expect(vector.nullable).toBe(false);
+        expect(vector.nullCount).toBe(0);
+    });
+
+    test(`nullable`, () => {
+        const vector = makeVector([makeData({ data: new Int32Array([1, 2, 3]), nullCount: 0, type: new Int32() })]);
+        expect(vector.nullable).toBe(true);
+        expect(vector.nullCount).toBe(0);
+        expect(vector.isValid(0)).toBe(true);
+
+        // set a value to null
+        vector.set(0, null);
+        expect(vector.nullable).toBe(true);
+        expect(vector.nullCount).toBe(1);
+        expect(vector.isValid(0)).toBe(false);
+
+        // set the same value to null which should not change anything
+        vector.set(0, null);
+        expect(vector.nullable).toBe(true);
+        expect(vector.nullCount).toBe(1);
+
+        // set a different value to null
+        vector.set(1, null);
+        expect(vector.nullable).toBe(true);
+        expect(vector.nullCount).toBe(2);
+
+        // set first value to non-null
+        vector.set(0, 1);
+        expect(vector.nullable).toBe(true);
+        expect(vector.nullCount).toBe(1);
+
+        // set last null to non-null
+        vector.set(1, 2);
+        expect(vector.nullable).toBe(true);
+        expect(vector.nullCount).toBe(0);
+    });
+});
+
 describe(`StructVector`, () => {
     test(`makeVectorFromArray`, () => {
         const values: { a?: number; b?: string | null; c?: boolean | null }[] = [
@@ -108,7 +149,6 @@ describe(`DateVector`, () => {
 });
 
 describe(`DictionaryVector`, () => {
-
     const dictionary = ['foo', 'bar', 'baz'];
     const extras = ['abc', '123']; // values to search for that should NOT be found
     const dictionary_vec = vectorFromArray(dictionary, new Utf8).memoize();
@@ -117,7 +157,6 @@ describe(`DictionaryVector`, () => {
     const validity = Array.from({ length: indices.length }, () => Math.random() > 0.2);
 
     describe(`index with nullCount == 0`, () => {
-
         const values = indices.map((d) => dictionary[d]);
         const vector = makeVector({
             data: indices,
@@ -133,7 +172,6 @@ describe(`DictionaryVector`, () => {
     });
 
     describe(`index with nullCount > 0`, () => {
-
         const nullBitmap = util.packBools(validity);
         const nullCount = validity.reduce((acc, d) => acc + (d ? 0 : 1), 0);
         const values = indices.map((d, i) => validity[i] ? dictionary[d] : null);
diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx
index 58ef6145cf7d1..a55e889ba8246 100644
--- a/python/pyarrow/_dataset_parquet.pyx
+++ b/python/pyarrow/_dataset_parquet.pyx
@@ -198,6 +198,10 @@ cdef class ParquetFileFormat(FileFormat):
         -------
         pyarrow.dataset.FileWriteOptions
         """
+        # Safeguard from calling make_write_options as a static class method
+        if not isinstance(self, ParquetFileFormat):
+            raise TypeError("make_write_options() should be called on "
+                            "an instance of ParquetFileFormat")
         opts = FileFormat.make_write_options(self)
         (<ParquetFileWriteOptions> opts).update(**kwargs)
         return opts
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 59d2e91ef6c70..45fd29ad3b3f3 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -561,34 +561,7 @@ def _normalize_slice(object arrow_obj, slice key):
         Py_ssize_t start, stop, step
         Py_ssize_t n = len(arrow_obj)
 
-    step = key.step or 1
-
-    if key.start is None:
-        if step < 0:
-            start = n - 1
-        else:
-            start = 0
-    elif key.start < 0:
-        start = key.start + n
-        if start < 0:
-            start = 0
-    elif key.start >= n:
-        start = n
-    else:
-        start = key.start
-
-    if step < 0 and (key.stop is None or key.stop < -n):
-        stop = -1
-    elif key.stop is None:
-        stop = n
-    elif key.stop < 0:
-        stop = key.stop + n
-        if stop < 0:  # step > 0 in this case.
-            stop = 0
-    elif key.stop >= n:
-        stop = n
-    else:
-        stop = key.stop
+    start, stop, step = key.indices(n)
 
     if step != 1:
         indices = np.arange(start, stop, step)
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 60794ebef3281..472a6c5dce750 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -486,6 +486,7 @@ def test_array_slice_negative_step():
         slice(None, None, 2),
         slice(0, 10, 2),
         slice(15, -25, -1),  # GH-38768
+        slice(-22, -22, -1),  # GH-40642
     ]
 
     for case in cases:
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 3d77214c174c5..6bba7240c05df 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -5630,3 +5630,16 @@ def test_checksum_write_dataset_read_dataset_to_table(tempdir):
             corrupted_dir_path,
             format=pq_read_format_crc
         ).to_table()
+
+
+def test_make_write_options_error():
+    # GH-39440
+    msg = ("make_write_options\\(\\) should be called on an "
+           "instance of ParquetFileFormat")
+    with pytest.raises(TypeError, match=msg):
+        pa.dataset.ParquetFileFormat.make_write_options(43)
+
+    pformat = pa.dataset.ParquetFileFormat()
+    msg = "make_write_options\\(\\) takes exactly 0 positional arguments"
+    with pytest.raises(TypeError, match=msg):
+        pformat.make_write_options(43)
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 90b9bd8b8c453..3678b4e57a9a8 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -2571,7 +2571,7 @@ def test_list_view_to_pandas_with_null_values(self, klass):
         )
 
         actual = arr.to_pandas()
-        expected = pd.Series([[1, None], [], None])
+        expected = pd.Series([[1, np.nan], [], None])
 
         tm.assert_series_equal(actual, expected)
 
@@ -2593,7 +2593,7 @@ def test_list_view_to_pandas_multiple_chunks(self, klass):
         arr = pa.chunked_array([arr1, arr2])
 
         actual = arr.to_pandas()
-        expected = pd.Series([[3, 4], [2, 3], [1, 2], [5, 6, 7], [6, 7, None], None])
+        expected = pd.Series([[3, 4], [2, 3], [1, 2], [5, 6, 7], [6, 7, np.nan], None])
 
         tm.assert_series_equal(actual, expected)
 
diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 6062a8c4f4689..c9f84e2e794c4 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -43,7 +43,7 @@ Imports:
     utils,
     vctrs
 Roxygen: list(markdown = TRUE, r6 = FALSE, load = "source")
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
 Config/testthat/edition: 3
 Config/build/bootstrap: TRUE
 Suggests:
diff --git a/r/R/arrow-info.R b/r/R/arrow-info.R
index 54145b2ee1204..916b6683fbcce 100644
--- a/r/R/arrow-info.R
+++ b/r/R/arrow-info.R
@@ -139,7 +139,8 @@ arrow_with_json <- function() {
 some_features_are_off <- function(features) {
   # `features` is a named logical vector (as in arrow_info()$capabilities)
   # Let's exclude some less relevant ones
-  blocklist <- c("lzo", "bz2", "brotli", "substrait")
+  # jemalloc is only included because it is sometimes disabled in our build process
+  blocklist <- c("lzo", "bz2", "brotli", "substrait", "jemalloc")
   # Return TRUE if any of the other features are FALSE
   !all(features[setdiff(names(features), blocklist)])
 }
diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index 54e237192e080..f6977e626276b 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -182,37 +182,54 @@ configure_tzdb <- function() {
 .onAttach <- function(libname, pkgname) {
   # Just to be extra safe, let's wrap this in a try();
   # we don't want a failed startup message to prevent the package from loading
-  try({
-    # On macOS only, Check if we are running in under emulation, and warn this will not work
-    if (on_rosetta()) {
-      packageStartupMessage(
-        paste(
-          "Warning:",
-          "  It appears that you are running R and Arrow in emulation (i.e. you're",
-          "  running an Intel version of R on a non-Intel mac). This configuration is",
-          "  not supported by arrow, you should install a native (arm64) build of R",
-          "  and use arrow with that. See https://cran.r-project.org/bin/macosx/",
-          "",
-          sep = "\n"
+  try(
+    {
+      # On macOS only, Check if we are running in under emulation, and warn this will not work
+      if (on_rosetta()) {
+        packageStartupMessage(
+          paste(
+            "Warning:",
+            "  It appears that you are running R and Arrow in emulation (i.e. you're",
+            "  running an Intel version of R on a non-Intel mac). This configuration is",
+            "  not supported by arrow, you should install a native (arm64) build of R",
+            "  and use arrow with that. See https://cran.r-project.org/bin/macosx/",
+            "",
+            sep = "\n"
+          )
         )
-      )
-    }
+      }
 
 
-    features <- arrow_info()$capabilities
-    # That has all of the #ifdef features, plus the compression libs and the
-    # string libraries (but not the memory allocators, they're added elsewhere)
-    #
-    # Let's print a message if some are off
-    if (some_features_are_off(features)) {
-      packageStartupMessage(
-        paste(
-          "Some features are not enabled in this build of Arrow.",
-          "Run `arrow_info()` for more information."
+      features <- arrow_info()$capabilities
+      # That has all of the #ifdef features, plus the compression libs and the
+      # string libraries (but not the memory allocators, they're added elsewhere)
+      #
+      # Let's print a message if some are off
+      if (some_features_are_off(features)) {
+        packageStartupMessage(
+          paste(
+            "Some features are not enabled in this build of Arrow.",
+            "Run `arrow_info()` for more information."
+          )
         )
-      )
-    }
-  }, silent = TRUE)
+        # On macOS binaries from CRAN can be hobbled. They sometimes restrict access to our
+        # dependency source downloading even though we can build from source on their machines.
+        # They also refuse to allow libarrow binaries to be downloaded, so instead distribute
+        # hobbled arrow binaries
+        # If on macOS, and features are disabled, advise that reinstalling might help
+        if (identical(tolower(Sys.info()[["sysname"]]), "darwin")) {
+          packageStartupMessage(
+            paste0(
+              "The repository you retrieved Arrow from did not include all of Arrow's features.\n",
+              "You can install a fully-featured version by running:\n",
+              "`install.packages('arrow', repos = 'https://apache.r-universe.dev')`."
+            )
+          )
+        }
+      }
+    },
+    silent = TRUE
+  )
 }
 
 # Clean up the StopSource that was registered in .onLoad() so that if the
diff --git a/r/R/csv.R b/r/R/csv.R
index 03540006ca0a2..733547570391d 100644
--- a/r/R/csv.R
+++ b/r/R/csv.R
@@ -78,8 +78,9 @@
 #' `col_names`, and the CSV file has a header row that would otherwise be used
 #' to identify column names, you'll need to add `skip = 1` to skip that row.
 #'
-#' @param file A character file name or URI, literal data (either a single string or a [raw] vector),
-#' an Arrow input stream, or a `FileSystem` with path (`SubTreeFileSystem`).
+#' @param file A character file name or URI, connection, literal data (either a
+#' single string or a [raw] vector), an Arrow input stream, or a `FileSystem`
+#' with path (`SubTreeFileSystem`).
 #'
 #' If a file name, a memory-mapped Arrow [InputStream] will be opened and
 #' closed when finished; compression will be detected from the file extension
@@ -894,7 +895,7 @@ readr_to_csv_convert_options <- function(na,
 #' Write CSV file to disk
 #'
 #' @param x `data.frame`, [RecordBatch], or [Table]
-#' @param sink A string file path, URI, or [OutputStream], or path in a file
+#' @param sink A string file path, connection, URI, or [OutputStream], or path in a file
 #' system (`SubTreeFileSystem`)
 #' @param file file name. Specify this or `sink`, not both.
 #' @param include_header Whether to write an initial header line with column names
diff --git a/r/R/feather.R b/r/R/feather.R
index 474fc6118e44f..aa08dfdbc96a5 100644
--- a/r/R/feather.R
+++ b/r/R/feather.R
@@ -29,7 +29,7 @@
 #' [write_ipc_file()] can only write V2 files.
 #'
 #' @param x `data.frame`, [RecordBatch], or [Table]
-#' @param sink A string file path, URI, or [OutputStream], or path in a file
+#' @param sink A string file path, connection, URI, or [OutputStream], or path in a file
 #' system (`SubTreeFileSystem`)
 #' @param version integer Feather file version, Version 1 or Version 2. Version 2 is the default.
 #' @param chunk_size For V2 files, the number of rows that each chunk of data
diff --git a/r/R/install-arrow.R b/r/R/install-arrow.R
index 74d3a96454777..44b876537490c 100644
--- a/r/R/install-arrow.R
+++ b/r/R/install-arrow.R
@@ -76,22 +76,14 @@ install_arrow <- function(nightly = FALSE,
       ARROW_R_DEV = verbose,
       ARROW_USE_PKG_CONFIG = use_system
     )
-    # On the M1, we can't use the usual autobrew, which pulls Intel dependencies
-    apple_m1 <- grepl("arm-apple|aarch64.*darwin", R.Version()$platform)
-    # On Rosetta, we have to build without JEMALLOC, so we also can't autobrew
-    rosetta <- on_rosetta()
-    if (rosetta) {
+    # On Rosetta, we have to build without JEMALLOC
+    if (on_rosetta()) {
       Sys.setenv(ARROW_JEMALLOC = "OFF")
-    }
-    if (apple_m1 || rosetta) {
       Sys.setenv(FORCE_BUNDLED_BUILD = "true")
     }
 
     opts <- list()
-    if (apple_m1 || rosetta) {
-      # Skip binaries (esp. for rosetta)
-      opts$pkgType <- "source"
-    } else if (isTRUE(binary)) {
+    if (isTRUE(binary)) {
       # Unless otherwise directed, don't consider newer source packages when
       # options(pkgType) == "both" (default on win/mac)
       opts$install.packages.check.source <- "no"
diff --git a/r/R/ipc-stream.R b/r/R/ipc-stream.R
index 37ef0bbaf2126..26a61a790f936 100644
--- a/r/R/ipc-stream.R
+++ b/r/R/ipc-stream.R
@@ -82,8 +82,8 @@ write_to_raw <- function(x, format = c("stream", "file")) {
 #' a "stream" format and a "file" format, known as Feather. `read_ipc_stream()`
 #' and [read_feather()] read those formats, respectively.
 #'
-#' @param file A character file name or URI, `raw` vector, an Arrow input stream,
-#' or a `FileSystem` with path (`SubTreeFileSystem`).
+#' @param file A character file name or URI, connection, `raw` vector, an
+#' Arrow input stream, or a `FileSystem` with path (`SubTreeFileSystem`).
 #' If a file name or URI, an Arrow [InputStream] will be opened and
 #' closed when finished. If an input stream is provided, it will be left
 #' open.
diff --git a/r/R/parquet.R b/r/R/parquet.R
index d92e913cb5db3..0ee6c62601c1d 100644
--- a/r/R/parquet.R
+++ b/r/R/parquet.R
@@ -90,7 +90,7 @@ read_parquet <- function(file,
 #' article} for examples of this.
 #'
 #' @param x `data.frame`, [RecordBatch], or [Table]
-#' @param sink A string file path, URI, or [OutputStream], or path in a file
+#' @param sink A string file path, connection, URI, or [OutputStream], or path in a file
 #' system (`SubTreeFileSystem`)
 #' @param chunk_size how many rows of data to write to disk at once. This
 #'    directly corresponds to how many rows will be in each row group in
diff --git a/r/man/arrow-package.Rd b/r/man/arrow-package.Rd
index 15f672a1fe949..41ec7956fe815 100644
--- a/r/man/arrow-package.Rd
+++ b/r/man/arrow-package.Rd
@@ -18,15 +18,15 @@ Useful links:
 
 }
 \author{
-\strong{Maintainer}: Nic Crane \email{thisisnic@gmail.com}
+\strong{Maintainer}: Jonathan Keane \email{jkeane@gmail.com}
 
 Authors:
 \itemize{
   \item Neal Richardson \email{neal.p.richardson@gmail.com}
   \item Ian Cook \email{ianmcook@gmail.com}
+  \item Nic Crane \email{thisisnic@gmail.com}
   \item Dewey Dunnington \email{dewey@fishandwhistle.net} (\href{https://orcid.org/0000-0002-9415-4582}{ORCID})
   \item Romain François (\href{https://orcid.org/0000-0002-2444-4226}{ORCID})
-  \item Jonathan Keane \email{jkeane@gmail.com}
   \item Dragoș Moldovan-Grünfeld \email{dragos.mold@gmail.com}
   \item Jeroen Ooms \email{jeroen@berkeley.edu}
   \item Jacob Wujciak-Jens \email{jacob@wujciak.de}
diff --git a/r/man/format_schema.Rd b/r/man/format_schema.Rd
new file mode 100644
index 0000000000000..d1c81e3fe7623
--- /dev/null
+++ b/r/man/format_schema.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/schema.R
+\name{format_schema}
+\alias{format_schema}
+\title{Get a string representing a Dataset or RecordBatchReader object's schema}
+\usage{
+format_schema(obj)
+}
+\arguments{
+\item{obj}{a Dataset or RecordBatchReader}
+}
+\value{
+A string containing a formatted representation of the schema of \code{obj}
+}
+\description{
+Get a string representing a Dataset or RecordBatchReader object's schema
+}
+\keyword{internal}
diff --git a/r/man/read_delim_arrow.Rd b/r/man/read_delim_arrow.Rd
index b56d445c9e2e3..f946785e4a41e 100644
--- a/r/man/read_delim_arrow.Rd
+++ b/r/man/read_delim_arrow.Rd
@@ -90,8 +90,9 @@ read_tsv_arrow(
 )
 }
 \arguments{
-\item{file}{A character file name or URI, literal data (either a single string or a \link{raw} vector),
-an Arrow input stream, or a \code{FileSystem} with path (\code{SubTreeFileSystem}).
+\item{file}{A character file name or URI, connection, literal data (either a
+single string or a \link{raw} vector), an Arrow input stream, or a \code{FileSystem}
+with path (\code{SubTreeFileSystem}).
 
 If a file name, a memory-mapped Arrow \link{InputStream} will be opened and
 closed when finished; compression will be detected from the file extension
diff --git a/r/man/read_feather.Rd b/r/man/read_feather.Rd
index c3b4a54158c7f..95661d9778576 100644
--- a/r/man/read_feather.Rd
+++ b/r/man/read_feather.Rd
@@ -10,8 +10,8 @@ read_feather(file, col_select = NULL, as_data_frame = TRUE, mmap = TRUE)
 read_ipc_file(file, col_select = NULL, as_data_frame = TRUE, mmap = TRUE)
 }
 \arguments{
-\item{file}{A character file name or URI, \code{raw} vector, an Arrow input stream,
-or a \code{FileSystem} with path (\code{SubTreeFileSystem}).
+\item{file}{A character file name or URI, connection, \code{raw} vector, an
+Arrow input stream, or a \code{FileSystem} with path (\code{SubTreeFileSystem}).
 If a file name or URI, an Arrow \link{InputStream} will be opened and
 closed when finished. If an input stream is provided, it will be left
 open.}
diff --git a/r/man/read_ipc_stream.Rd b/r/man/read_ipc_stream.Rd
index db930b52bde18..49d3949bfcf22 100644
--- a/r/man/read_ipc_stream.Rd
+++ b/r/man/read_ipc_stream.Rd
@@ -7,8 +7,8 @@
 read_ipc_stream(file, as_data_frame = TRUE, ...)
 }
 \arguments{
-\item{file}{A character file name or URI, \code{raw} vector, an Arrow input stream,
-or a \code{FileSystem} with path (\code{SubTreeFileSystem}).
+\item{file}{A character file name or URI, connection, \code{raw} vector, an
+Arrow input stream, or a \code{FileSystem} with path (\code{SubTreeFileSystem}).
 If a file name or URI, an Arrow \link{InputStream} will be opened and
 closed when finished. If an input stream is provided, it will be left
 open.}
diff --git a/r/man/read_json_arrow.Rd b/r/man/read_json_arrow.Rd
index 9230a9a017495..f289d3356e559 100644
--- a/r/man/read_json_arrow.Rd
+++ b/r/man/read_json_arrow.Rd
@@ -13,8 +13,9 @@ read_json_arrow(
 )
 }
 \arguments{
-\item{file}{A character file name or URI, literal data (either a single string or a \link{raw} vector),
-an Arrow input stream, or a \code{FileSystem} with path (\code{SubTreeFileSystem}).
+\item{file}{A character file name or URI, connection, literal data (either a
+single string or a \link{raw} vector), an Arrow input stream, or a \code{FileSystem}
+with path (\code{SubTreeFileSystem}).
 
 If a file name, a memory-mapped Arrow \link{InputStream} will be opened and
 closed when finished; compression will be detected from the file extension
diff --git a/r/man/read_parquet.Rd b/r/man/read_parquet.Rd
index 4f1936529531a..95ee4ac5a8ceb 100644
--- a/r/man/read_parquet.Rd
+++ b/r/man/read_parquet.Rd
@@ -14,8 +14,8 @@ read_parquet(
 )
 }
 \arguments{
-\item{file}{A character file name or URI, \code{raw} vector, an Arrow input stream,
-or a \code{FileSystem} with path (\code{SubTreeFileSystem}).
+\item{file}{A character file name or URI, connection, \code{raw} vector, an
+Arrow input stream, or a \code{FileSystem} with path (\code{SubTreeFileSystem}).
 If a file name or URI, an Arrow \link{InputStream} will be opened and
 closed when finished. If an input stream is provided, it will be left
 open.}
diff --git a/r/man/write_csv_arrow.Rd b/r/man/write_csv_arrow.Rd
index 9fcca49fadc05..9f9fde74cc996 100644
--- a/r/man/write_csv_arrow.Rd
+++ b/r/man/write_csv_arrow.Rd
@@ -19,7 +19,7 @@ write_csv_arrow(
 \arguments{
 \item{x}{\code{data.frame}, \link{RecordBatch}, or \link{Table}}
 
-\item{sink}{A string file path, URI, or \link{OutputStream}, or path in a file
+\item{sink}{A string file path, connection, URI, or \link{OutputStream}, or path in a file
 system (\code{SubTreeFileSystem})}
 
 \item{file}{file name. Specify this or \code{sink}, not both.}
diff --git a/r/man/write_feather.Rd b/r/man/write_feather.Rd
index 0d3a7da3b90b4..823bd2224eac2 100644
--- a/r/man/write_feather.Rd
+++ b/r/man/write_feather.Rd
@@ -25,7 +25,7 @@ write_ipc_file(
 \arguments{
 \item{x}{\code{data.frame}, \link{RecordBatch}, or \link{Table}}
 
-\item{sink}{A string file path, URI, or \link{OutputStream}, or path in a file
+\item{sink}{A string file path, connection, URI, or \link{OutputStream}, or path in a file
 system (\code{SubTreeFileSystem})}
 
 \item{version}{integer Feather file version, Version 1 or Version 2. Version 2 is the default.}
diff --git a/r/man/write_ipc_stream.Rd b/r/man/write_ipc_stream.Rd
index 094e3ad11a0c8..da9bb6bcacb45 100644
--- a/r/man/write_ipc_stream.Rd
+++ b/r/man/write_ipc_stream.Rd
@@ -9,7 +9,7 @@ write_ipc_stream(x, sink, ...)
 \arguments{
 \item{x}{\code{data.frame}, \link{RecordBatch}, or \link{Table}}
 
-\item{sink}{A string file path, URI, or \link{OutputStream}, or path in a file
+\item{sink}{A string file path, connection, URI, or \link{OutputStream}, or path in a file
 system (\code{SubTreeFileSystem})}
 
 \item{...}{extra parameters passed to \code{write_feather()}.}
diff --git a/r/man/write_parquet.Rd b/r/man/write_parquet.Rd
index 480abb12fcf4a..954c692dad2f1 100644
--- a/r/man/write_parquet.Rd
+++ b/r/man/write_parquet.Rd
@@ -22,7 +22,7 @@ write_parquet(
 \arguments{
 \item{x}{\code{data.frame}, \link{RecordBatch}, or \link{Table}}
 
-\item{sink}{A string file path, URI, or \link{OutputStream}, or path in a file
+\item{sink}{A string file path, connection, URI, or \link{OutputStream}, or path in a file
 system (\code{SubTreeFileSystem})}
 
 \item{chunk_size}{how many rows of data to write to disk at once. This
diff --git a/r/src/io.cpp b/r/src/io.cpp
index 4d5ee31794ae8..2f36d51dcd7ab 100644
--- a/r/src/io.cpp
+++ b/r/src/io.cpp
@@ -212,11 +212,16 @@ void io___BufferOutputStream__Write(
 class RConnectionFileInterface : public virtual arrow::io::FileInterface {
  public:
   explicit RConnectionFileInterface(cpp11::sexp connection_sexp)
-      : connection_sexp_(connection_sexp), closed_(false) {
+      : connection_sexp_(connection_sexp),
+        closed_(false),
+        seekable_(false),
+        bytes_written_(0),
+        bytes_read_(0) {
     check_closed();
+    seekable_ = check_seekable();
   }
 
-  arrow::Status Close() {
+  arrow::Status Close() override {
     if (closed_) {
       return arrow::Status::OK();
     }
@@ -227,11 +232,21 @@ class RConnectionFileInterface : public virtual arrow::io::FileInterface {
                              "close() on R connection");
   }
 
-  arrow::Result<int64_t> Tell() const {
+  arrow::Result<int64_t> Tell() const override {
     if (closed()) {
       return arrow::Status::IOError("R connection is closed");
     }
 
+    // R connections use seek() with no additional arguments as a tell()
+    // implementation; however, non-seekable connections will error if you
+    // do this. This heuristic allows Tell() to return a reasonable value
+    // (used by at least the IPC writer).
+    if (!seekable_ && bytes_written_ > 0) {
+      return bytes_written_;
+    } else if (!seekable_) {
+      return bytes_read_;
+    }
+
     return SafeCallIntoR<int64_t>(
         [&]() {
           cpp11::sexp result = cpp11::package("base")["seek"](connection_sexp_);
@@ -240,7 +255,7 @@ class RConnectionFileInterface : public virtual arrow::io::FileInterface {
         "tell() on R connection");
   }
 
-  bool closed() const { return closed_; }
+  bool closed() const override { return closed_; }
 
  protected:
   cpp11::sexp connection_sexp_;
@@ -261,13 +276,14 @@ class RConnectionFileInterface : public virtual arrow::io::FileInterface {
     return SafeCallIntoR<int64_t>(
         [&] {
           cpp11::function read_bin = cpp11::package("base")["readBin"];
-          cpp11::writable::raws ptype((R_xlen_t)0);
+          cpp11::writable::raws ptype(static_cast<R_xlen_t>(0));
           cpp11::integers n = cpp11::as_sexp<int>(static_cast<int>(nbytes));
 
           cpp11::sexp result = read_bin(connection_sexp_, ptype, n);
 
           int64_t result_size = cpp11::safe[Rf_xlength](result);
           memcpy(out, cpp11::safe[RAW](result), result_size);
+          bytes_read_++;
           return result_size;
         },
         "readBin() on R connection");
@@ -294,6 +310,7 @@ class RConnectionFileInterface : public virtual arrow::io::FileInterface {
 
           cpp11::function write_bin = cpp11::package("base")["writeBin"];
           write_bin(data_raw, connection_sexp_);
+          bytes_written_ += nbytes;
         },
         "writeBin() on R connection");
   }
@@ -312,6 +329,9 @@ class RConnectionFileInterface : public virtual arrow::io::FileInterface {
 
  private:
   bool closed_;
+  bool seekable_;
+  int64_t bytes_written_;
+  int64_t bytes_read_;
 
   bool check_closed() {
     if (closed_) {
@@ -333,6 +353,15 @@ class RConnectionFileInterface : public virtual arrow::io::FileInterface {
 
     return closed_;
   }
+
+  bool check_seekable() {
+    auto is_seekable_result = SafeCallIntoR<bool>([&] {
+      cpp11::sexp result = cpp11::package("base")["isSeekable"](connection_sexp_);
+      return cpp11::as_cpp<bool>(result);
+    });
+
+    return is_seekable_result.ok() && *is_seekable_result;
+  }
 };
 
 class RConnectionInputStream : public virtual arrow::io::InputStream,
@@ -341,9 +370,11 @@ class RConnectionInputStream : public virtual arrow::io::InputStream,
   explicit RConnectionInputStream(cpp11::sexp connection_sexp)
       : RConnectionFileInterface(connection_sexp) {}
 
-  arrow::Result<int64_t> Read(int64_t nbytes, void* out) { return ReadBase(nbytes, out); }
+  arrow::Result<int64_t> Read(int64_t nbytes, void* out) override {
+    return ReadBase(nbytes, out);
+  }
 
-  arrow::Result<std::shared_ptr<arrow::Buffer>> Read(int64_t nbytes) {
+  arrow::Result<std::shared_ptr<arrow::Buffer>> Read(int64_t nbytes) override {
     return ReadBase(nbytes);
   }
 };
@@ -373,13 +404,15 @@ class RConnectionRandomAccessFile : public arrow::io::RandomAccessFile,
     }
   }
 
-  arrow::Result<int64_t> GetSize() { return size_; }
+  arrow::Result<int64_t> GetSize() override { return size_; }
 
-  arrow::Status Seek(int64_t pos) { return SeekBase(pos); }
+  arrow::Status Seek(int64_t pos) override { return SeekBase(pos); }
 
-  arrow::Result<int64_t> Read(int64_t nbytes, void* out) { return ReadBase(nbytes, out); }
+  arrow::Result<int64_t> Read(int64_t nbytes, void* out) override {
+    return ReadBase(nbytes, out);
+  }
 
-  arrow::Result<std::shared_ptr<arrow::Buffer>> Read(int64_t nbytes) {
+  arrow::Result<std::shared_ptr<arrow::Buffer>> Read(int64_t nbytes) override {
     return ReadBase(nbytes);
   }
 
@@ -393,7 +426,7 @@ class RConnectionOutputStream : public arrow::io::OutputStream,
   explicit RConnectionOutputStream(cpp11::sexp connection_sexp)
       : RConnectionFileInterface(connection_sexp) {}
 
-  arrow::Status Write(const void* data, int64_t nbytes) {
+  arrow::Status Write(const void* data, int64_t nbytes) override {
     return WriteBase(data, nbytes);
   }
 };
diff --git a/r/tests/testthat/test-duckdb.R b/r/tests/testthat/test-duckdb.R
index 33ab1ecc7aa4d..dd7b6dba7fde0 100644
--- a/r/tests/testthat/test-duckdb.R
+++ b/r/tests/testthat/test-duckdb.R
@@ -281,7 +281,7 @@ test_that("to_duckdb passing a connection", {
     to_duckdb(con = con_separate, auto_disconnect = FALSE)
 
   # Generates a query like SELECT * FROM arrow_xxx
-  table_four_query <- paste(show_query(table_four), collapse = "\n")
+  table_four_query <- paste(dbplyr::sql_build(table_four), collapse = "\n")
   table_four_name <- stringr::str_extract(table_four_query, "arrow_[0-9]{3}")
   expect_false(is.na(table_four_name))