diff --git a/.asf.yaml b/.asf.yaml index a1c6434587703..12438081cfc57 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -21,9 +21,9 @@ github: collaborators: - anjakefala - benibus - - davisusanibar - jbonofre - js8544 + - laurentgo - vibhatha - zanmato1984 - ZhangHuiGui diff --git a/.env b/.env index be35921f94c3a..1358aafe824a6 100644 --- a/.env +++ b/.env @@ -61,7 +61,7 @@ GCC_VERSION="" GO=1.21.8 STATICCHECK=v0.4.7 HDFS=3.2.1 -JDK=8 +JDK=11 KARTOTHEK=latest # LLVM 12 and GCC 11 reports -Wmismatched-new-delete. LLVM=14 diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index e495bfd147de6..03252657feaf1 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -40,7 +40,7 @@ /matlab/ @kevingurney @kou @sgilmore10 /python/pyarrow/_flight.pyx @lidavidm /python/pyarrow/**/*gandiva* @wjones127 -/r/ @paleolimbot @thisisnic +/r/ @thisisnic /ruby/ @kou /swift/ @kou diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 37a1be7d2c00b..beb126eaf9496 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -40,7 +40,7 @@ We prefer to receive contributions in the form of GitHub pull requests. Please send pull requests against the [github.com/apache/arrow][4] repository following the procedure below. -If you are looking for some ideas on what to contribute, check out the [JIRA +If you are looking for some ideas on what to contribute, check out the [GitHub issues][3] for the Apache Arrow project. Comment on the issue and/or contact [dev@arrow.apache.org](https://lists.apache.org/list.html?dev@arrow.apache.org) with your questions and ideas. @@ -53,8 +53,8 @@ To contribute a patch: 1. Break your work into small, single-purpose patches if possible. It’s much harder to merge in a large change with a lot of disjoint features. -2. If one doesn't already exist, create a JIRA for your patch on the [Arrow Project -JIRA](https://issues.apache.org/jira/browse/ARROW). +2. If one doesn't already exist, create a GitHub issue for your patch on the [Arrow Project +GitHub](https://github.com/apache/arrow/issues). 3. Submit the patch as a GitHub pull request against the main branch. For a tutorial, see the GitHub guides on [forking a repo](https://help.github.com/en/articles/fork-a-repo) and [sending a pull request](https://help.github.com/en/articles/creating-a-pull-request-from-a-fork). So that your pull request syncs with the JIRA issue, prefix your pull request @@ -68,5 +68,5 @@ Thank you in advance for your contributions! [1]: mailto:dev-subscribe@arrow.apache.org [2]: https://github.com/apache/arrow/tree/main/format -[3]: https://issues.apache.org/jira/browse/ARROW +[3]: https://github.com/apache/arrow/issues [4]: https://github.com/apache/arrow diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml index c698baba2c816..87f365b9065c8 100644 --- a/.github/workflows/archery.yml +++ b/.github/workflows/archery.yml @@ -59,7 +59,7 @@ jobs: shell: bash run: git branch $ARCHERY_DEFAULT_BRANCH origin/$ARCHERY_DEFAULT_BRANCH || true - name: Setup Python - uses: actions/setup-python@v5.1.0 + uses: actions/setup-python@v5.1.1 with: python-version: '3.9' - name: Install pygit2 binary wheel diff --git a/.github/workflows/comment_bot.yml b/.github/workflows/comment_bot.yml index a34856d2dc81a..1138c0a02f812 100644 --- a/.github/workflows/comment_bot.yml +++ b/.github/workflows/comment_bot.yml @@ -41,7 +41,7 @@ jobs: # fetch the tags for version number generation fetch-depth: 0 - name: Set up Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.12 - name: Install Archery and Crossbow dependencies diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index e539fadb859fe..eff0b0204e6bd 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -246,7 +246,7 @@ jobs: $(brew --prefix bash)/bin/bash \ ci/scripts/install_minio.sh latest ${ARROW_HOME} - name: Set up Python - uses: actions/setup-python@v5.1.0 + uses: actions/setup-python@v5.1.1 with: python-version: 3.12 - name: Install Google Cloud Storage Testbench @@ -467,7 +467,7 @@ jobs: https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2022-05-26T05-48-41Z chmod +x /usr/local/bin/minio.exe - name: Set up Python - uses: actions/setup-python@v5.1.0 + uses: actions/setup-python@v5.1.1 with: python-version: 3.9 - name: Install Google Cloud Storage Testbench diff --git a/.github/workflows/csharp.yml b/.github/workflows/csharp.yml index e4db9f482e206..6e8548dc960f4 100644 --- a/.github/workflows/csharp.yml +++ b/.github/workflows/csharp.yml @@ -49,7 +49,7 @@ jobs: dotnet: ['8.0.x'] steps: - name: Install C# - uses: actions/setup-dotnet@v4 + uses: actions/setup-dotnet@v4.0.1 with: dotnet-version: ${{ matrix.dotnet }} - name: Checkout Arrow @@ -77,7 +77,7 @@ jobs: dotnet: ['8.0.x'] steps: - name: Install C# - uses: actions/setup-dotnet@v4 + uses: actions/setup-dotnet@v4.0.1 with: dotnet-version: ${{ matrix.dotnet }} - name: Checkout Arrow @@ -104,11 +104,11 @@ jobs: dotnet: ['8.0.x'] steps: - name: Install C# - uses: actions/setup-dotnet@v4 + uses: actions/setup-dotnet@v4.0.1 with: dotnet-version: ${{ matrix.dotnet }} - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.12 - name: Checkout Arrow diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 1ea12b0a4d23d..49568102e11f8 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -46,7 +46,7 @@ jobs: with: fetch-depth: 0 - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.12 - name: Install pre-commit @@ -105,7 +105,7 @@ jobs: with: fetch-depth: 0 - name: Install Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: '3.12' - name: Install Ruby @@ -113,14 +113,14 @@ jobs: with: ruby-version: ruby - name: Install .NET - uses: actions/setup-dotnet@4d6c8fcf3c8f7a60068d26b594648e99df24cee3 # v4.0.0 + uses: actions/setup-dotnet@6bd8b7f7774af54e05809fcc5431931b3eb1ddee # v4.0.1 with: dotnet-version: '8.0.x' - name: Install Dependencies shell: bash run: | gem install test-unit - pip install "cython>=0.29.31" setuptools six pytest jira setuptools-scm + pip install "cython>=0.29.31" setuptools pytest jira setuptools-scm - name: Run Release Test env: ARROW_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 36a0dc014db8d..b6075746ff40a 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -53,7 +53,7 @@ jobs: key: debian-docs-${{ hashFiles('cpp/**') }} restore-keys: debian-docs- - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.12 - name: Setup Archery diff --git a/.github/workflows/docs_light.yml b/.github/workflows/docs_light.yml index 947e2ac21b83c..f66e8473e2516 100644 --- a/.github/workflows/docs_light.yml +++ b/.github/workflows/docs_light.yml @@ -59,7 +59,7 @@ jobs: key: conda-docs-${{ hashFiles('cpp/**') }} restore-keys: conda-docs- - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.12 - name: Setup Archery diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 0d32628859fa0..0d369d252b56c 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -168,8 +168,8 @@ jobs: python3 -m pip install benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python python3 ci/scripts/go_bench_adapt.py - build386: - name: Go Cross-build for 386 + build_test_386: + name: Go Cross-build and test for 386 runs-on: ubuntu-latest if: ${{ !contains(github.event.pull_request.title, 'WIP') }} timeout-minutes: 20 @@ -188,9 +188,12 @@ jobs: cache: true cache-dependency-path: go/go.sum - name: Run build - run: | - cd go - GOARCH=386 go build ./... + run: GOARCH=386 go build ./... + working-directory: ./go + - name: Run test + # WIP refactor, only tests in the specified dirs have been fixed + run: GOARCH=386 go test ./parquet/file/... + working-directory: ./go docker_cgo: name: AMD64 Debian 12 Go ${{ matrix.go }} - CGO @@ -210,7 +213,7 @@ jobs: fetch-depth: 0 submodules: recursive - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.8 - name: Setup Archery @@ -250,7 +253,7 @@ jobs: with: fetch-depth: 0 - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.8 - name: Setup Archery @@ -342,7 +345,7 @@ jobs: github.event_name == 'push' && github.repository == 'apache/arrow' && github.ref_name == 'main' - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: '3.10' - name: Run Benchmarks diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index f53f4aeb505d2..46d422a53ae69 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -90,7 +90,7 @@ jobs: key: conda-${{ hashFiles('cpp/**') }} restore-keys: conda- - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.8 - name: Setup Archery diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml index 08dbe7c8068c0..d4211c2c81cb5 100644 --- a/.github/workflows/java.yml +++ b/.github/workflows/java.yml @@ -58,7 +58,7 @@ jobs: strategy: fail-fast: false matrix: - jdk: [8, 11, 17, 21, 22] + jdk: [11, 17, 21, 22] maven: [3.9.6] image: [java] env: @@ -77,7 +77,7 @@ jobs: key: maven-${{ hashFiles('java/**') }} restore-keys: maven- - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.8 - name: Setup Archery diff --git a/.github/workflows/java_jni.yml b/.github/workflows/java_jni.yml index ea5f8d694a9c6..533da7c36be34 100644 --- a/.github/workflows/java_jni.yml +++ b/.github/workflows/java_jni.yml @@ -71,7 +71,7 @@ jobs: key: java-jni-manylinux-2014-${{ hashFiles('cpp/**', 'java/**') }} restore-keys: java-jni-manylinux-2014- - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.8 - name: Setup Archery @@ -111,7 +111,7 @@ jobs: key: maven-${{ hashFiles('java/**') }} restore-keys: maven- - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.8 - name: Setup Archery diff --git a/.github/workflows/java_nightly.yml b/.github/workflows/java_nightly.yml index f40d4ce5b42d6..72afb6dbf1c1d 100644 --- a/.github/workflows/java_nightly.yml +++ b/.github/workflows/java_nightly.yml @@ -58,7 +58,7 @@ jobs: repository: ursacomputing/crossbow ref: main - name: Set up Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: cache: 'pip' python-version: 3.12 diff --git a/.github/workflows/js.yml b/.github/workflows/js.yml index c11c8254011f6..ad22968a3a68b 100644 --- a/.github/workflows/js.yml +++ b/.github/workflows/js.yml @@ -55,7 +55,7 @@ jobs: with: fetch-depth: 0 - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.8 - name: Setup Archery diff --git a/.github/workflows/pr_bot.yml b/.github/workflows/pr_bot.yml index e589610f536b3..7dd06b6aeec09 100644 --- a/.github/workflows/pr_bot.yml +++ b/.github/workflows/pr_bot.yml @@ -82,7 +82,7 @@ jobs: # fetch the tags for version number generation fetch-depth: 0 - name: Set up Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.12 - name: Install Archery and Crossbow dependencies diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index a568f8346e7fc..daadd971f8ac8 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -102,7 +102,7 @@ jobs: key: ${{ matrix.cache }}-${{ hashFiles('cpp/**') }} restore-keys: ${{ matrix.cache }}- - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.8 - name: Setup Archery @@ -172,7 +172,7 @@ jobs: fetch-depth: 0 submodules: recursive - name: Setup Python - uses: actions/setup-python@v5.1.0 + uses: actions/setup-python@v5.1.1 with: python-version: '3.11' - name: Install Dependencies diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index 6bd940f806775..0ff7266860f29 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -62,7 +62,7 @@ jobs: strategy: matrix: include: - - cpp_version: "13.0.0" + - cpp_version: "15.0.2" steps: - name: Checkout Arrow uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0 @@ -144,7 +144,7 @@ jobs: ubuntu-${{ matrix.ubuntu }}-r-${{ matrix.r }}-${{ hashFiles('cpp/src/**/*.cc','cpp/src/**/*.h)') }}- ubuntu-${{ matrix.ubuntu }}-r-${{ matrix.r }}- - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.8 - name: Setup Archery @@ -204,7 +204,7 @@ jobs: fetch-depth: 0 submodules: recursive - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.8 - name: Setup Archery diff --git a/.github/workflows/r_nightly.yml b/.github/workflows/r_nightly.yml index af5382f90834c..1ec071b6bbb5e 100644 --- a/.github/workflows/r_nightly.yml +++ b/.github/workflows/r_nightly.yml @@ -60,7 +60,7 @@ jobs: repository: ursacomputing/crossbow ref: main - name: Set up Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: cache: 'pip' python-version: 3.12 diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml index 6a29ec8e72cab..ca2305a7f9357 100644 --- a/.github/workflows/ruby.yml +++ b/.github/workflows/ruby.yml @@ -84,7 +84,7 @@ jobs: key: ubuntu-${{ matrix.ubuntu }}-ruby-${{ hashFiles('cpp/**') }} restore-keys: ubuntu-${{ matrix.ubuntu }}-ruby- - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.8 - name: Setup Archery diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9bdd4f487bdec..bf0bcde14622a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -38,6 +38,7 @@ repos: # files: (/Dockerfile|\.dockerfile)$ files: >- ( + ?^ci/docker/conda-python-emscripten\.dockerfile$| ?^ci/docker/python-wheel-windows-test-vs2019\.dockerfile$| ) types: [] diff --git a/c_glib/meson.build b/c_glib/meson.build index 06aa5b941e77c..214c57747033e 100644 --- a/c_glib/meson.build +++ b/c_glib/meson.build @@ -35,7 +35,7 @@ project('arrow-glib', 'c', 'cpp', # * 22.04: 0.61.2 meson_version: '>=0.53.2') -version = '17.0.0-SNAPSHOT' +version = '18.0.0-SNAPSHOT' if version.endswith('-SNAPSHOT') version_numbers = version.split('-')[0].split('.') version_tag = version.split('-')[1] diff --git a/c_glib/tool/generate-version-header.py b/c_glib/tool/generate-version-header.py index 7422432251ff1..ba8cb03d15a3e 100755 --- a/c_glib/tool/generate-version-header.py +++ b/c_glib/tool/generate-version-header.py @@ -140,6 +140,7 @@ def generate_availability_macros(library: str) -> str: ALL_VERSIONS = [ + (18, 0), (17, 0), (16, 0), (15, 0), diff --git a/c_glib/vcpkg.json b/c_glib/vcpkg.json index e88d2b8fe30d5..3941edbfec527 100644 --- a/c_glib/vcpkg.json +++ b/c_glib/vcpkg.json @@ -1,6 +1,6 @@ { "name": "arrow-glib", - "version-string": "17.0.0-SNAPSHOT", + "version-string": "18.0.0-SNAPSHOT", "dependencies": [ "glib", "gobject-introspection", diff --git a/ci/docker/centos-7-cpp.dockerfile b/ci/docker/centos-7-cpp.dockerfile index 8c1893cbbb2ae..1f30eed694e4e 100644 --- a/ci/docker/centos-7-cpp.dockerfile +++ b/ci/docker/centos-7-cpp.dockerfile @@ -17,11 +17,25 @@ FROM centos:centos7 +# Update mirrors to use vault.centos.org as CentOS 7 +# is EOL since 2024-06-30 +RUN sed -i \ + -e 's/^mirrorlist/#mirrorlist/' \ + -e 's/^#baseurl/baseurl/' \ + -e 's/mirror\.centos\.org/vault.centos.org/' \ + /etc/yum.repos.d/*.repo + # devtoolset is required for C++17 RUN \ yum install -y \ centos-release-scl \ epel-release && \ + sed -i \ + -e 's/^mirrorlist/#mirrorlist/' \ + -e 's/^#baseurl/baseurl/' \ + -e 's/^# baseurl/baseurl/' \ + -e 's/mirror\.centos\.org/vault.centos.org/' \ + /etc/yum.repos.d/CentOS-SCLo-scl*.repo && \ yum install -y \ cmake3 \ curl \ diff --git a/ci/docker/conda-integration.dockerfile b/ci/docker/conda-integration.dockerfile index 30b9cd5199fab..c602490d6b729 100644 --- a/ci/docker/conda-integration.dockerfile +++ b/ci/docker/conda-integration.dockerfile @@ -23,7 +23,7 @@ ARG arch=amd64 ARG maven=3.8.7 ARG node=16 ARG yarn=1.22 -ARG jdk=8 +ARG jdk=11 ARG go=1.21.8 # Install Archery and integration dependencies @@ -44,8 +44,10 @@ RUN mamba install -q -y \ # Install Rust with only the needed components # (rustfmt is needed for tonic-build to compile the protobuf definitions) +# GH-41637: Version pinned at 1.77 because the glibc for conda-cpp is currently too old RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --profile=minimal -y && \ - $HOME/.cargo/bin/rustup toolchain install stable && \ + $HOME/.cargo/bin/rustup override set 1.77 && \ + $HOME/.cargo/bin/rustup toolchain install 1.77 && \ $HOME/.cargo/bin/rustup component add rustfmt ENV GOROOT=/opt/go \ diff --git a/ci/docker/conda-python-emscripten.dockerfile b/ci/docker/conda-python-emscripten.dockerfile new file mode 100644 index 0000000000000..8ad705c920ba8 --- /dev/null +++ b/ci/docker/conda-python-emscripten.dockerfile @@ -0,0 +1,64 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG repo +ARG arch +ARG python="3.12" +FROM ${repo}:${arch}-conda-python-${python} + +ARG selenium_version="4.15.2" +ARG pyodide_version="0.26.0" +ARG chrome_version="latest" +ARG required_python_min="(3,12)" +# fail if python version < 3.12 +RUN echo "check PYTHON>=${required_python_min}" && python -c "import sys;sys.exit(0 if sys.version_info>=${required_python_min} else 1)" + +# install selenium and pyodide-build and recent python + +# needs to be a login shell so ~/.profile is read +SHELL ["/bin/bash", "--login", "-c", "-o", "pipefail"] + +RUN python -m pip install --no-cache-dir selenium==${selenium_version} && \ + python -m pip install --no-cache-dir --upgrade pyodide-build==${pyodide_version} + +# install pyodide dist directory to /pyodide +RUN pyodide_dist_url="https://github.com/pyodide/pyodide/releases/download/${pyodide_version}/pyodide-${pyodide_version}.tar.bz2" && \ + wget -q "${pyodide_dist_url}" -O- | tar -xj -C / + +# install correct version of emscripten for this pyodide +COPY ci/scripts/install_emscripten.sh /arrow/ci/scripts/ +RUN bash /arrow/ci/scripts/install_emscripten.sh ~ /pyodide + +# make sure zlib is cached in the EMSDK folder +RUN source ~/emsdk/emsdk_env.sh && embuilder --pic build zlib + +# install node 20 (needed for async call support) +# and pthread-stubs for build, and unzip needed for chrome build to work +RUN conda install nodejs=20 unzip pthread-stubs make -c conda-forge + +# install chrome for testing browser based runner +COPY ci/scripts/install_chromedriver.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_chromedriver.sh "${chrome_version}" + +# make the version of make that is installed by conda be available everywhere +# or else pyodide's isolated build fails to find it +RUN ln -s "$(type -P make)" /bin/make + +ENV ARROW_BUILD_TESTS="OFF" \ + ARROW_BUILD_TYPE="release" \ + ARROW_DEPENDENCY_SOURCE="BUNDLED" \ + ARROW_EMSCRIPTEN="ON" diff --git a/ci/docker/conda-python-hdfs.dockerfile b/ci/docker/conda-python-hdfs.dockerfile index fa4fa0d1fb772..4e5e1a402e282 100644 --- a/ci/docker/conda-python-hdfs.dockerfile +++ b/ci/docker/conda-python-hdfs.dockerfile @@ -20,7 +20,7 @@ ARG arch=amd64 ARG python=3.8 FROM ${repo}:${arch}-conda-python-${python} -ARG jdk=8 +ARG jdk=11 ARG maven=3.8.7 RUN mamba install -q -y \ maven=${maven} \ diff --git a/ci/docker/conda-python-spark.dockerfile b/ci/docker/conda-python-spark.dockerfile index 866f6f37f8bd9..d95fe58b529f6 100644 --- a/ci/docker/conda-python-spark.dockerfile +++ b/ci/docker/conda-python-spark.dockerfile @@ -20,7 +20,7 @@ ARG arch=amd64 ARG python=3.8 FROM ${repo}:${arch}-conda-python-${python} -ARG jdk=8 +ARG jdk=11 ARG maven=3.8.7 ARG numpy=latest diff --git a/ci/docker/fedora-39-cpp.dockerfile b/ci/docker/fedora-39-cpp.dockerfile index 8ecaa6c3ca784..33d11823094ce 100644 --- a/ci/docker/fedora-39-cpp.dockerfile +++ b/ci/docker/fedora-39-cpp.dockerfile @@ -77,8 +77,7 @@ RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin # PYARROW_TEST_GANDIVA=OFF: GH-39695: We need to make LLVM symbols visible in # Python process explicitly if we use LLVM 17 or later. -ENV absl_SOURCE=BUNDLED \ - ARROW_ACERO=ON \ +ENV ARROW_ACERO=ON \ ARROW_AZURE=OFF \ ARROW_BUILD_TESTS=ON \ ARROW_DEPENDENCY_SOURCE=SYSTEM \ diff --git a/ci/docker/java-jni-manylinux-201x.dockerfile b/ci/docker/java-jni-manylinux-201x.dockerfile index 8b73c73c1d240..479f4aa598b18 100644 --- a/ci/docker/java-jni-manylinux-201x.dockerfile +++ b/ci/docker/java-jni-manylinux-201x.dockerfile @@ -33,7 +33,7 @@ RUN vcpkg install \ --x-feature=s3 # Install Java -ARG java=1.8.0 +ARG java=11 ARG maven=3.9.3 RUN yum install -y java-$java-openjdk-devel && \ yum clean all && \ diff --git a/ci/docker/linux-apt-docs.dockerfile b/ci/docker/linux-apt-docs.dockerfile index 1c916840e071b..0804f3543c283 100644 --- a/ci/docker/linux-apt-docs.dockerfile +++ b/ci/docker/linux-apt-docs.dockerfile @@ -19,7 +19,7 @@ ARG base FROM ${base} ARG r=4.4 -ARG jdk=8 +ARG jdk=11 ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium diff --git a/ci/docker/python-wheel-manylinux.dockerfile b/ci/docker/python-wheel-manylinux.dockerfile index 68d4b27d2ca96..cb39667af1e10 100644 --- a/ci/docker/python-wheel-manylinux.dockerfile +++ b/ci/docker/python-wheel-manylinux.dockerfile @@ -25,6 +25,18 @@ ARG manylinux ENV MANYLINUX_VERSION=${manylinux} # Ensure dnf is installed, especially for the manylinux2014 base +RUN if [ "${MANYLINUX_VERSION}" = "2014" ]; then \ + sed -i \ + -e 's/^mirrorlist/#mirrorlist/' \ + -e 's/^#baseurl/baseurl/' \ + -e 's/mirror\.centos\.org/vault.centos.org/' \ + /etc/yum.repos.d/*.repo; \ + if [ "${arch}" != "amd64" ]; then \ + sed -i \ + -e 's,vault\.centos\.org/centos,vault.centos.org/altarch,' \ + /etc/yum.repos.d/CentOS-SCLo-scl-rh.repo; \ + fi; \ + fi RUN yum install -y dnf # Install basic dependencies diff --git a/ci/scripts/PKGBUILD b/ci/scripts/PKGBUILD index f6bbc78be710e..ed68faae950b1 100644 --- a/ci/scripts/PKGBUILD +++ b/ci/scripts/PKGBUILD @@ -18,7 +18,7 @@ _realname=arrow pkgbase=mingw-w64-${_realname} pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}" -pkgver=16.1.0.9000 +pkgver=17.0.0.9000 pkgrel=8000 pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)" arch=("any") @@ -69,6 +69,12 @@ build() { mkdir -p ${cpp_build_dir} pushd ${cpp_build_dir} + # We use static cURL in google-cloud-cpp. If we can use cURL's CMake + # package, we don't need to specify CURL_STATICLIB explicitly. But + # we don't have cURL's CMake package. We need to use CXXFLAGS + # instead of ARROW_CXXFLAGS because ARROW_CXXFLAGS aren't passed to + # ExternProjects. + export CXXFLAGS="${CXXFLAGS} -DCURL_STATICLIB" # The Rtools libutf8proc is a static lib, but Findutf8proc.cmake doesn't # set the appropriate compiler definition. export CPPFLAGS="-DUTF8PROC_STATIC" diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index 3ee7fbd9d19cd..bc2bba915f73a 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -30,7 +30,7 @@ if [ -x "$(command -v git)" ]; then fi # TODO(kszucs): consider to move these to CMake -if [ ! -z "${CONDA_PREFIX}" ]; then +if [ ! -z "${CONDA_PREFIX}" ] && [ "${ARROW_EMSCRIPTEN:-OFF}" = "OFF" ]; then echo -e "===\n=== Conda environment for build\n===" conda list @@ -99,6 +99,10 @@ if [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then fi n_jobs=2 # Emscripten build fails on docker unless this is set really low source ~/emsdk/emsdk_env.sh + export CMAKE_INSTALL_PREFIX=$(em-config CACHE)/sysroot + # conda sets LDFLAGS / CFLAGS etc. which break + # emcmake so we unset them + unset LDFLAGS CFLAGS CXXFLAGS CPPFLAGS emcmake cmake \ --preset=ninja-${ARROW_BUILD_TYPE:-debug}-emscripten \ -DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE:-OFF} \ diff --git a/ci/scripts/install_chromedriver.sh b/ci/scripts/install_chromedriver.sh new file mode 100755 index 0000000000000..9097a20bfc5c9 --- /dev/null +++ b/ci/scripts/install_chromedriver.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Install Chrome and Chromedriver for Selenium + +set -e + +chrome_version=$1 + +if [ $chrome_version = "latest" ]; then + latest_release_path=LATEST_RELEASE_STABLE +else + latest_release_path=LATEST_RELEASE_${chrome_version} +fi +CHROME_VERSION_FULL=$(wget -q --no-verbose -O - "https://googlechromelabs.github.io/chrome-for-testing/${latest_release_path}") +CHROME_DOWNLOAD_URL="https://dl.google.com/linux/chrome/deb/pool/main/g/google-chrome-stable/google-chrome-stable_${CHROME_VERSION_FULL}-1_amd64.deb" +CHROMEDRIVER_DOWNLOAD_URL="https://storage.googleapis.com/chrome-for-testing-public/${CHROME_VERSION_FULL}/linux64/chromedriver-linux64.zip" +wget -q --no-verbose -O /tmp/google-chrome.deb "${CHROME_DOWNLOAD_URL}" +apt-get update +apt install -qqy /tmp/google-chrome.deb +rm -f /tmp/google-chrome.deb +rm -rf /var/lib/apt/lists/* +wget --no-verbose -O /tmp/chromedriver-linux64.zip "${CHROMEDRIVER_DOWNLOAD_URL}" +unzip /tmp/chromedriver-linux64.zip -d /opt/ +rm /tmp/chromedriver-linux64.zip +ln -fs /opt/chromedriver-linux64/chromedriver /usr/local/bin/chromedriver +echo "Using Chrome version: $(google-chrome --version)" +echo "Using Chrome Driver version: $(chromedriver --version)" diff --git a/ci/scripts/install_emscripten.sh b/ci/scripts/install_emscripten.sh new file mode 100755 index 0000000000000..4bad7238a6cdd --- /dev/null +++ b/ci/scripts/install_emscripten.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# install emscripten sdk version to match pyodide in $2 to directory $1/emsdk + +set -e + +target_path=$1 +pyodide_path=$2 + +emscripten_version=$(${pyodide_path}/python -c "import sys;print(*sys._emscripten_info.emscripten_version,sep='.')") + +cd ${target_path} +if [ ! -d emsdk ]; then + git clone https://github.com/emscripten-core/emsdk.git +fi +cd emsdk +./emsdk install ${emscripten_version} +./emsdk activate ${emscripten_version} +echo "Installed emsdk to: ${target_path}" \ No newline at end of file diff --git a/ci/scripts/python_build_emscripten.sh b/ci/scripts/python_build_emscripten.sh new file mode 100755 index 0000000000000..14e9626202079 --- /dev/null +++ b/ci/scripts/python_build_emscripten.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +arrow_dir=${1} +build_dir=${2} + + +source ~/emsdk/emsdk_env.sh + +source_dir=${arrow_dir}/python +python_build_dir=${build_dir}/python + +rm -rf ${python_build_dir} +cp -aL ${source_dir} ${python_build_dir} + +# conda sets LDFLAGS / CFLAGS etc. which break +# emcmake so we unset them +unset LDFLAGS CFLAGS CXXFLAGS CPPFLAGS + +pushd ${python_build_dir} +pyodide build +popd diff --git a/ci/scripts/python_test_emscripten.sh b/ci/scripts/python_test_emscripten.sh new file mode 100755 index 0000000000000..4029722568b9b --- /dev/null +++ b/ci/scripts/python_test_emscripten.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# run tests against Chrome and node.js as representative +# WebAssembly platforms (i.e. one browser, one non-browser). + +set -ex + +build_dir=${1}/python +pyodide_dist_dir=${2} + +cd ${build_dir} + +# note: this uses the newest wheel in dist +pyodide_wheel=$(ls -t dist/pyarrow*.whl | head -1) + +echo "-------------- Running emscripten tests in Node ----------------------" +python scripts/run_emscripten_tests.py ${pyodide_wheel} --dist-dir=${pyodide_dist_dir} --runtime=node + +echo "-------------- Running emscripten tests in Chrome --------------------" +python scripts/run_emscripten_tests.py ${pyodide_wheel} --dist-dir=${pyodide_dist_dir} --runtime=chrome + diff --git a/ci/vcpkg/ports.patch b/ci/vcpkg/ports.patch index 136b719ea72dd..67fb2a4a3ea76 100644 --- a/ci/vcpkg/ports.patch +++ b/ci/vcpkg/ports.patch @@ -65,3 +65,56 @@ index 000000000..e839c93a4 + } + + static inline bool LeftShiftOverflows(uint8_t value, uint32_t shift) { +diff --git a/ports/thrift/portfile.cmake b/ports/thrift/portfile.cmake +index 1501782..71d2147 100644 +--- a/ports/thrift/portfile.cmake ++++ b/ports/thrift/portfile.cmake +@@ -12,7 +12,7 @@ vcpkg_find_acquire_program(BISON) + vcpkg_from_github( + OUT_SOURCE_PATH SOURCE_PATH + REPO apache/thrift +- REF "${VERSION}" ++ REF "v${VERSION}" + SHA512 5e4ee9870b30fe5ba484d39781c435716f7f3903793dc8aae96594ca813b1a5a73363b84719038ca8fa3ab8ef0a419a28410d936ff7b3bbadf36fc085a6883ae + HEAD_REF master + PATCHES +diff --git a/ports/thrift/vcpkg.json b/ports/thrift/vcpkg.json +index 2d5a854..9ff49ec 100644 +--- a/ports/thrift/vcpkg.json ++++ b/ports/thrift/vcpkg.json +@@ -1,6 +1,7 @@ + { + "name": "thrift", + "version": "0.20.0", ++ "port-version": 1, + "description": "Apache Thrift is a software project spanning a variety of programming languages and use cases. Our goal is to make reliable, performant communication and data serialization across languages as efficient and seamless as possible.", + "homepage": "https://github.com/apache/thrift", + "license": "Apache-2.0", +diff --git a/versions/baseline.json b/versions/baseline.json +index c6ce736..9ad1d63 100644 +--- a/versions/baseline.json ++++ b/versions/baseline.json +@@ -8622,7 +8622,7 @@ + }, + "thrift": { + "baseline": "0.20.0", +- "port-version": 0 ++ "port-version": 1 + }, + "tidy-html5": { + "baseline": "5.8.0", +diff --git a/versions/t-/thrift.json b/versions/t-/thrift.json +index 3db38c5..7464bde 100644 +--- a/versions/t-/thrift.json ++++ b/versions/t-/thrift.json +@@ -1,5 +1,10 @@ + { + "versions": [ ++ { ++ "git-tree": "13757a6b05741cf3c9c39e3a1dcc5e5cd685e025", ++ "version": "0.20.0", ++ "port-version": 1 ++ }, + { + "git-tree": "6855be1ce96497811d4eb0a9879baf6cf1b3610c", + "version": "0.20.0", diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 679842c31e0b1..a1e3138da9e0b 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -71,7 +71,15 @@ if(POLICY CMP0135) cmake_policy(SET CMP0135 NEW) endif() -set(ARROW_VERSION "17.0.0-SNAPSHOT") +# https://cmake.org/cmake/help/latest/policy/CMP0170.html +# +# CMP0170 is for enforcing dependency populations by users with +# FETCHCONTENT_FULLY_DISCONNECTED=ON. +if(POLICY CMP0170) + cmake_policy(SET CMP0170 NEW) +endif() + +set(ARROW_VERSION "18.0.0-SNAPSHOT") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}") @@ -681,7 +689,7 @@ endif() if("${ARROW_TEST_LINKAGE}" STREQUAL "shared") if(ARROW_BUILD_TESTS AND NOT ARROW_BUILD_SHARED) - message(FATAL_ERROR "If using shared linkage for unit tests, must also \ + message(FATAL_ERROR "If using ARROW_TEST_LINKAGE=shared, must also \ pass ARROW_BUILD_SHARED=on") endif() # Use shared linking for unit tests if it's available diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json index cb4cdfc03ac82..8886db0e11017 100644 --- a/cpp/CMakePresets.json +++ b/cpp/CMakePresets.json @@ -53,6 +53,7 @@ "ARROW_ACERO": "ON", "ARROW_BUILD_SHARED": "OFF", "ARROW_BUILD_STATIC": "ON", + "ARROW_CSV": "ON", "ARROW_CUDA": "OFF", "ARROW_DEPENDENCY_SOURCE": "BUNDLED", "ARROW_DEPENDENCY_USE_SHARED": "OFF", @@ -60,6 +61,7 @@ "ARROW_FLIGHT": "OFF", "ARROW_IPC": "ON", "ARROW_JEMALLOC": "OFF", + "ARROW_JSON": "ON", "ARROW_MIMALLOC": "OFF", "ARROW_ORC": "ON", "ARROW_RUNTIME_SIMD_LEVEL": "NONE", diff --git a/cpp/build-support/lint_cpp_cli.py b/cpp/build-support/lint_cpp_cli.py index a0eb8f0efe6d5..47abd53fe925d 100755 --- a/cpp/build-support/lint_cpp_cli.py +++ b/cpp/build-support/lint_cpp_cli.py @@ -31,6 +31,7 @@ _NULLPTR_REGEX = re.compile(r'.*\bnullptr\b.*') _RETURN_NOT_OK_REGEX = re.compile(r'.*\sRETURN_NOT_OK.*') _ASSIGN_OR_RAISE_REGEX = re.compile(r'.*\sASSIGN_OR_RAISE.*') +_DCHECK_REGEX = re.compile(r'.*\sDCHECK.*') def _paths(paths): @@ -54,14 +55,12 @@ def lint_file(path): (lambda x: re.match(_RETURN_NOT_OK_REGEX, x), 'Use ARROW_RETURN_NOT_OK in header files', _paths('''\ arrow/status.h - test - arrow/util/hash.h arrow/python/util''')), (lambda x: re.match(_ASSIGN_OR_RAISE_REGEX, x), - 'Use ARROW_ASSIGN_OR_RAISE in header files', _paths('''\ - arrow/result_internal.h - test - ''')) + 'Use ARROW_ASSIGN_OR_RAISE in header files', []), + (lambda x: re.match(_DCHECK_REGEX, x), + 'Use ARROW_DCHECK in header files', _paths('''\ + arrow/util/logging.h''')) ] diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index fe859a0121ca6..5b89a831ff7fe 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -2532,6 +2532,7 @@ macro(build_zlib) set_property(TARGET ZLIB::ZLIB PROPERTY IMPORTED_LOCATION "${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a") + target_include_directories(ZLIB::ZLIB INTERFACE "${EMSCRIPTEN_SYSROOT}/include") list(APPEND ARROW_BUNDLED_STATIC_LIBS ZLIB::ZLIB) else() set(ZLIB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep/src/zlib_ep-install") @@ -2873,33 +2874,6 @@ endmacro() # ---------------------------------------------------------------------- # Dependencies for Arrow Flight RPC -macro(ensure_absl) - if(NOT absl_FOUND) - if(${absl_SOURCE} STREQUAL "AUTO") - # We can't use resolve_dependency(absl 20211102) to use Abseil - # 20211102 or later because Abseil's CMake package uses "EXACT" - # version match strategy. Our CMake configuration will work with - # Abseil LTS 20211102 or later. So we want to accept Abseil LTS - # 20211102 or later. We need to update - # ARROW_ABSL_REQUIRED_LTS_VERSIONS list when new Abseil LTS is - # released. - set(ARROW_ABSL_REQUIRED_LTS_VERSIONS 20230125 20220623 20211102) - foreach(_VERSION ${ARROW_ABSL_REQUIRED_LTS_VERSIONS}) - find_package(absl ${_VERSION}) - if(absl_FOUND) - break() - endif() - endforeach() - # If we can't find Abseil LTS 20211102 or later, we use bundled - # Abseil. - if(NOT absl_FOUND) - set(absl_SOURCE "BUNDLED") - endif() - endif() - resolve_dependency(absl) - endif() -endmacro() - macro(build_absl) message(STATUS "Building Abseil-cpp from source") set(absl_FOUND TRUE) @@ -3844,7 +3818,6 @@ macro(build_grpc) TRUE PC_PACKAGE_NAMES libcares) - ensure_absl() message(STATUS "Building gRPC from source") @@ -4134,12 +4107,40 @@ macro(build_grpc) endif() endmacro() +if(ARROW_WITH_GOOGLE_CLOUD_CPP OR ARROW_WITH_GRPC) + set(ARROW_ABSL_REQUIRED_VERSION 20211102) + # Google Cloud C++ SDK and gRPC require Google Abseil + if(ARROW_WITH_GOOGLE_CLOUD_CPP) + set(ARROW_ABSL_CMAKE_PACKAGE_NAME Arrow) + set(ARROW_ABSL_PC_PACKAGE_NAME arrow) + else() + set(ARROW_ABSL_CMAKE_PACKAGE_NAME ArrowFlight) + set(ARROW_ABSL_PC_PACKAGE_NAME arrow-flight) + endif() + resolve_dependency(absl + ARROW_CMAKE_PACKAGE_NAME + ${ARROW_ABSL_CMAKE_PACKAGE_NAME} + ARROW_PC_PACKAGE_NAME + ${ARROW_ABSL_PC_PACKAGE_NAME} + HAVE_ALT + FALSE + FORCE_ANY_NEWER_VERSION + TRUE + REQUIRED_VERSION + ${ARROW_ABSL_REQUIRED_VERSION}) +endif() + if(ARROW_WITH_GRPC) if(NOT ARROW_ENABLE_THREADING) message(FATAL_ERROR "Can't use gRPC with ARROW_ENABLE_THREADING=OFF") endif() set(ARROW_GRPC_REQUIRED_VERSION "1.30.0") + if(absl_SOURCE STREQUAL "BUNDLED" AND NOT gRPC_SOURCE STREQUAL "BUNDLED") + # System gRPC can't be used with bundled Abseil + message(STATUS "Forcing gRPC_SOURCE to BUNDLED because absl_SOURCE is BUNDLED") + set(gRPC_SOURCE "BUNDLED") + endif() if(NOT Protobuf_SOURCE STREQUAL gRPC_SOURCE) # ARROW-15495: Protobuf/gRPC must come from the same source message(STATUS "Forcing gRPC_SOURCE to Protobuf_SOURCE (${Protobuf_SOURCE})") @@ -4225,7 +4226,8 @@ macro(build_nlohmann_json) set(NLOHMANN_JSON_INCLUDE_DIR "${NLOHMANN_JSON_PREFIX}/include") set(NLOHMANN_JSON_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} "-DCMAKE_INSTALL_PREFIX=" - -DJSON_BuildTests=OFF) + # google-cloud-cpp requires JSON_MultipleHeaders=ON + -DJSON_BuildTests=OFF -DJSON_MultipleHeaders=ON) set(NLOHMANN_JSON_BUILD_BYPRODUCTS ${NLOHMANN_JSON_PREFIX}/include/nlohmann/json.hpp) @@ -4257,7 +4259,6 @@ macro(build_google_cloud_cpp_storage) message(STATUS "Only building the google-cloud-cpp::storage component") # List of dependencies taken from https://github.com/googleapis/google-cloud-cpp/blob/main/doc/packaging.md - ensure_absl() build_crc32c_once() # Curl is required on all platforms, but building it internally might also trip over S3's copy. @@ -4294,6 +4295,7 @@ macro(build_google_cloud_cpp_storage) # We need this to build with OpenSSL 3.0. # See also: https://github.com/googleapis/google-cloud-cpp/issues/8544 -DGOOGLE_CLOUD_CPP_ENABLE_WERROR=OFF + -DGOOGLE_CLOUD_CPP_WITH_MOCKS=OFF -DOPENSSL_CRYPTO_LIBRARY=${OPENSSL_CRYPTO_LIBRARY} -DOPENSSL_INCLUDE_DIR=${OPENSSL_INCLUDE_DIR} -DOPENSSL_SSL_LIBRARY=${OPENSSL_SSL_LIBRARY}) @@ -4380,6 +4382,9 @@ macro(build_google_cloud_cpp_storage) nlohmann_json::nlohmann_json OpenSSL::SSL OpenSSL::Crypto) + if(WIN32) + target_link_libraries(google-cloud-cpp::rest-internal INTERFACE ws2_32) + endif() add_library(google-cloud-cpp::storage STATIC IMPORTED) set_target_properties(google-cloud-cpp::storage @@ -4485,115 +4490,216 @@ target_include_directories(arrow::hadoop INTERFACE "${HADOOP_HOME}/include") # ---------------------------------------------------------------------- # Apache ORC -macro(build_orc) +function(build_orc) message(STATUS "Building Apache ORC from source") - set(ORC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/orc_ep-install") - set(ORC_HOME "${ORC_PREFIX}") - set(ORC_INCLUDE_DIR "${ORC_PREFIX}/include") - set(ORC_STATIC_LIB - "${ORC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}orc${CMAKE_STATIC_LIBRARY_SUFFIX}") + if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.29) + fetchcontent_declare(orc + ${FC_DECLARE_COMMON_OPTIONS} + URL ${ORC_SOURCE_URL} + URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}") + prepare_fetchcontent() + + set(CMAKE_UNITY_BUILD FALSE) + + set(ORC_PREFER_STATIC_LZ4 + OFF + CACHE BOOL "" FORCE) + get_target_property(LZ4_INCLUDE_DIR LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(LZ4_ROOT "${LZ4_INCLUDE_DIR}" DIRECTORY) + set(LZ4_HOME + ${LZ4_ROOT} + CACHE STRING "" FORCE) + set(LZ4_LIBRARY + LZ4::lz4 + CACHE STRING "" FORCE) + + set(ORC_PREFER_STATIC_PROTOBUF + OFF + CACHE BOOL "" FORCE) + get_target_property(PROTOBUF_INCLUDE_DIR ${ARROW_PROTOBUF_LIBPROTOBUF} + INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(Protobuf_ROOT "${PROTOBUF_INCLUDE_DIR}" DIRECTORY) + set(PROTOBUF_HOME + ${Protobuf_ROOT} + CACHE STRING "" FORCE) + # ORC uses this. + target_include_directories(${ARROW_PROTOBUF_LIBPROTOC} + INTERFACE "${PROTOBUF_INCLUDE_DIR}") + set(PROTOBUF_EXECUTABLE ${ARROW_PROTOBUF_PROTOC}) + set(PROTOBUF_LIBRARY ${ARROW_PROTOBUF_LIBPROTOBUF}) + set(PROTOC_LIBRARY ${ARROW_PROTOBUF_LIBPROTOC}) + + set(ORC_PREFER_STATIC_SNAPPY + OFF + CACHE BOOL "" FORCE) + get_target_property(SNAPPY_INCLUDE_DIR ${Snappy_TARGET} INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(Snappy_ROOT "${SNAPPY_INCLUDE_DIR}" DIRECTORY) + set(SNAPPY_HOME + ${Snappy_ROOT} + CACHE STRING "" FORCE) + set(SNAPPY_LIBRARY + ${Snappy_TARGET} + CACHE STRING "" FORCE) + + set(ORC_PREFER_STATIC_ZLIB + OFF + CACHE BOOL "" FORCE) + get_target_property(ZLIB_INCLUDE_DIR ZLIB::ZLIB INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ZLIB_ROOT "${ZLIB_INCLUDE_DIR}" DIRECTORY) + set(ZLIB_HOME + ${ZLIB_ROOT} + CACHE STRING "" FORCE) + set(ZLIB_LIBRARY + ZLIB::ZLIB + CACHE STRING "" FORCE) + + set(ORC_PREFER_STATIC_ZSTD + OFF + CACHE BOOL "" FORCE) + get_target_property(ZSTD_INCLUDE_DIR ${ARROW_ZSTD_LIBZSTD} + INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ZSTD_ROOT "${ZSTD_INCLUDE_DIR}" DIRECTORY) + set(ZSTD_HOME + ${ZSTD_ROOT} + CACHE STRING "" FORCE) + set(ZSTD_LIBRARY ${ARROW_ZSTD_LIBZSTD}) + + set(BUILD_CPP_TESTS + OFF + CACHE BOOL "" FORCE) + set(BUILD_JAVA + OFF + CACHE BOOL "" FORCE) + set(BUILD_LIBHDFSPP + OFF + CACHE BOOL "" FORCE) + set(BUILD_TOOLS + OFF + CACHE BOOL "" FORCE) + set(INSTALL_VENDORED_LIBS + OFF + CACHE BOOL "" FORCE) + set(STOP_BUILD_ON_WARNING + OFF + CACHE BOOL "" FORCE) + + # We can remove this with ORC 2.0.2 or later. + list(PREPEND CMAKE_MODULE_PATH + ${CMAKE_CURRENT_BINARY_DIR}/_deps/orc-src/cmake_modules) + + fetchcontent_makeavailable(orc) + + add_library(orc::orc INTERFACE IMPORTED) + target_link_libraries(orc::orc INTERFACE orc) + target_include_directories(orc::orc INTERFACE "${orc_BINARY_DIR}/c++/include" + "${orc_SOURCE_DIR}/c++/include") + + list(APPEND ARROW_BUNDLED_STATIC_LIBS orc) + else() + set(ORC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/orc_ep-install") + set(ORC_HOME "${ORC_PREFIX}") + set(ORC_INCLUDE_DIR "${ORC_PREFIX}/include") + set(ORC_STATIC_LIB + "${ORC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}orc${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) - get_target_property(ORC_PROTOBUF_ROOT ${ARROW_PROTOBUF_LIBPROTOBUF} - INTERFACE_INCLUDE_DIRECTORIES) - get_filename_component(ORC_PROTOBUF_ROOT "${ORC_PROTOBUF_ROOT}" DIRECTORY) + get_target_property(ORC_PROTOBUF_ROOT ${ARROW_PROTOBUF_LIBPROTOBUF} + INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ORC_PROTOBUF_ROOT "${ORC_PROTOBUF_ROOT}" DIRECTORY) - get_target_property(ORC_SNAPPY_INCLUDE_DIR ${Snappy_TARGET} - INTERFACE_INCLUDE_DIRECTORIES) - get_filename_component(ORC_SNAPPY_ROOT "${ORC_SNAPPY_INCLUDE_DIR}" DIRECTORY) + get_target_property(ORC_SNAPPY_INCLUDE_DIR ${Snappy_TARGET} + INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ORC_SNAPPY_ROOT "${ORC_SNAPPY_INCLUDE_DIR}" DIRECTORY) - get_target_property(ORC_LZ4_ROOT LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES) - get_filename_component(ORC_LZ4_ROOT "${ORC_LZ4_ROOT}" DIRECTORY) + get_target_property(ORC_LZ4_ROOT LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ORC_LZ4_ROOT "${ORC_LZ4_ROOT}" DIRECTORY) - get_target_property(ORC_ZSTD_ROOT ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES) - get_filename_component(ORC_ZSTD_ROOT "${ORC_ZSTD_ROOT}" DIRECTORY) + get_target_property(ORC_ZSTD_ROOT ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ORC_ZSTD_ROOT "${ORC_ZSTD_ROOT}" DIRECTORY) - set(ORC_CMAKE_ARGS - ${EP_COMMON_CMAKE_ARGS} - "-DCMAKE_INSTALL_PREFIX=${ORC_PREFIX}" - -DSTOP_BUILD_ON_WARNING=OFF - -DBUILD_LIBHDFSPP=OFF - -DBUILD_JAVA=OFF - -DBUILD_TOOLS=OFF - -DBUILD_CPP_TESTS=OFF - -DINSTALL_VENDORED_LIBS=OFF - "-DLZ4_HOME=${ORC_LZ4_ROOT}" - "-DPROTOBUF_EXECUTABLE=$" - "-DPROTOBUF_HOME=${ORC_PROTOBUF_ROOT}" - "-DPROTOBUF_INCLUDE_DIR=$" - "-DPROTOBUF_LIBRARY=$" - "-DPROTOC_LIBRARY=$" - "-DSNAPPY_HOME=${ORC_SNAPPY_ROOT}" - "-DSNAPPY_LIBRARY=$" - "-DLZ4_LIBRARY=$" - "-DLZ4_STATIC_LIB=$" - "-DLZ4_INCLUDE_DIR=${ORC_LZ4_ROOT}/include" - "-DSNAPPY_INCLUDE_DIR=${ORC_SNAPPY_INCLUDE_DIR}" - "-DZSTD_HOME=${ORC_ZSTD_ROOT}" - "-DZSTD_INCLUDE_DIR=$" - "-DZSTD_LIBRARY=$") - if(ZLIB_ROOT) - set(ORC_CMAKE_ARGS ${ORC_CMAKE_ARGS} "-DZLIB_HOME=${ZLIB_ROOT}") - endif() + set(ORC_CMAKE_ARGS + ${EP_COMMON_CMAKE_ARGS} + "-DCMAKE_INSTALL_PREFIX=${ORC_PREFIX}" + -DSTOP_BUILD_ON_WARNING=OFF + -DBUILD_LIBHDFSPP=OFF + -DBUILD_JAVA=OFF + -DBUILD_TOOLS=OFF + -DBUILD_CPP_TESTS=OFF + -DINSTALL_VENDORED_LIBS=OFF + "-DLZ4_HOME=${ORC_LZ4_ROOT}" + "-DPROTOBUF_EXECUTABLE=$" + "-DPROTOBUF_HOME=${ORC_PROTOBUF_ROOT}" + "-DPROTOBUF_INCLUDE_DIR=$" + "-DPROTOBUF_LIBRARY=$" + "-DPROTOC_LIBRARY=$" + "-DSNAPPY_HOME=${ORC_SNAPPY_ROOT}" + "-DSNAPPY_LIBRARY=$" + "-DLZ4_LIBRARY=$" + "-DLZ4_STATIC_LIB=$" + "-DLZ4_INCLUDE_DIR=${ORC_LZ4_ROOT}/include" + "-DSNAPPY_INCLUDE_DIR=${ORC_SNAPPY_INCLUDE_DIR}" + "-DZSTD_HOME=${ORC_ZSTD_ROOT}" + "-DZSTD_INCLUDE_DIR=$" + "-DZSTD_LIBRARY=$") + if(ZLIB_ROOT) + set(ORC_CMAKE_ARGS ${ORC_CMAKE_ARGS} "-DZLIB_HOME=${ZLIB_ROOT}") + endif() - # Work around CMake bug - file(MAKE_DIRECTORY ${ORC_INCLUDE_DIR}) + # Work around CMake bug + file(MAKE_DIRECTORY ${ORC_INCLUDE_DIR}) - externalproject_add(orc_ep - ${EP_COMMON_OPTIONS} - URL ${ORC_SOURCE_URL} - URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}" - BUILD_BYPRODUCTS ${ORC_STATIC_LIB} - CMAKE_ARGS ${ORC_CMAKE_ARGS} - DEPENDS ${ARROW_PROTOBUF_LIBPROTOBUF} - ${ARROW_ZSTD_LIBZSTD} - ${Snappy_TARGET} - LZ4::lz4 - ZLIB::ZLIB) - - set(ORC_VENDORED 1) - - add_library(orc::orc STATIC IMPORTED) - set_target_properties(orc::orc PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}") - target_include_directories(orc::orc BEFORE INTERFACE "${ORC_INCLUDE_DIR}") - target_link_libraries(orc::orc INTERFACE LZ4::lz4 ZLIB::ZLIB ${ARROW_ZSTD_LIBZSTD} - ${Snappy_TARGET}) - # Protobuf generated files may use ABSL_DCHECK*() and - # absl::log_internal_check_op is needed for them. - if(TARGET absl::log_internal_check_op) - target_link_libraries(orc::orc INTERFACE absl::log_internal_check_op) - endif() - if(NOT MSVC) - if(NOT APPLE AND ARROW_ENABLE_THREADING) - target_link_libraries(orc::orc INTERFACE Threads::Threads) - endif() - target_link_libraries(orc::orc INTERFACE ${CMAKE_DL_LIBS}) - endif() - if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "9") - target_link_libraries(orc::orc INTERFACE stdc++fs) + externalproject_add(orc_ep + ${EP_COMMON_OPTIONS} + URL ${ORC_SOURCE_URL} + URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}" + BUILD_BYPRODUCTS ${ORC_STATIC_LIB} + CMAKE_ARGS ${ORC_CMAKE_ARGS} + DEPENDS ${ARROW_PROTOBUF_LIBPROTOBUF} + ${ARROW_PROTOBUF_PROTOC} + ${ARROW_ZSTD_LIBZSTD} + ${Snappy_TARGET} + LZ4::lz4 + ZLIB::ZLIB) + add_library(orc::orc STATIC IMPORTED) + set_target_properties(orc::orc PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}") + target_include_directories(orc::orc BEFORE INTERFACE "${ORC_INCLUDE_DIR}") + target_link_libraries(orc::orc INTERFACE LZ4::lz4 ZLIB::ZLIB ${ARROW_ZSTD_LIBZSTD} + ${Snappy_TARGET}) + # Protobuf generated files may use ABSL_DCHECK*() and + # absl::log_internal_check_op is needed for them. + if(TARGET absl::log_internal_check_op) + target_link_libraries(orc::orc INTERFACE absl::log_internal_check_op) endif() - elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "8") - target_link_libraries(orc::orc INTERFACE c++fs) + if(NOT MSVC) + if(NOT APPLE AND ARROW_ENABLE_THREADING) + target_link_libraries(orc::orc INTERFACE Threads::Threads) + endif() + target_link_libraries(orc::orc INTERFACE ${CMAKE_DL_LIBS}) endif() + target_link_libraries(orc::orc INTERFACE ${ARROW_PROTOBUF_LIBPROTOBUF}) + add_dependencies(orc::orc orc_ep) + list(APPEND ARROW_BUNDLED_STATIC_LIBS orc::orc) endif() - add_dependencies(orc::orc orc_ep) - - list(APPEND ARROW_BUNDLED_STATIC_LIBS orc::orc) -endmacro() + set(ORC_VENDORED + TRUE + PARENT_SCOPE) + set(ARROW_BUNDLED_STATIC_LIBS + ${ARROW_BUNDLED_STATIC_LIBS} + PARENT_SCOPE) +endfunction() if(ARROW_ORC) resolve_dependency(orc HAVE_ALT TRUE) - target_link_libraries(orc::orc INTERFACE ${ARROW_PROTOBUF_LIBPROTOBUF}) if(ORC_VENDORED) set(ARROW_ORC_VERSION ${ARROW_ORC_BUILD_VERSION}) else() + target_link_libraries(orc::orc INTERFACE ${ARROW_PROTOBUF_LIBPROTOBUF}) set(ARROW_ORC_VERSION ${orcAlt_VERSION}) + message(STATUS "Found ORC static library: ${ORC_STATIC_LIB}") + message(STATUS "Found ORC headers: ${ORC_INCLUDE_DIR}") endif() - message(STATUS "Found ORC static library: ${ORC_STATIC_LIB}") - message(STATUS "Found ORC headers: ${ORC_INCLUDE_DIR}") endif() # ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/acero/asof_join_node.cc b/cpp/src/arrow/acero/asof_join_node.cc index 848cbdf7506ad..2248362241cd7 100644 --- a/cpp/src/arrow/acero/asof_join_node.cc +++ b/cpp/src/arrow/acero/asof_join_node.cc @@ -32,7 +32,7 @@ #include "arrow/acero/exec_plan.h" #include "arrow/acero/options.h" -#include "arrow/acero/unmaterialized_table.h" +#include "arrow/acero/unmaterialized_table_internal.h" #ifndef NDEBUG #include "arrow/acero/options_internal.h" #endif diff --git a/cpp/src/arrow/acero/sorted_merge_node.cc b/cpp/src/arrow/acero/sorted_merge_node.cc index a71ac79efcc46..2845383cee982 100644 --- a/cpp/src/arrow/acero/sorted_merge_node.cc +++ b/cpp/src/arrow/acero/sorted_merge_node.cc @@ -28,7 +28,7 @@ #include "arrow/acero/options.h" #include "arrow/acero/query_context.h" #include "arrow/acero/time_series_util.h" -#include "arrow/acero/unmaterialized_table.h" +#include "arrow/acero/unmaterialized_table_internal.h" #include "arrow/acero/util.h" #include "arrow/array/builder_base.h" #include "arrow/result.h" diff --git a/cpp/src/arrow/acero/unmaterialized_table.h b/cpp/src/arrow/acero/unmaterialized_table_internal.h similarity index 100% rename from cpp/src/arrow/acero/unmaterialized_table.h rename to cpp/src/arrow/acero/unmaterialized_table_internal.h diff --git a/cpp/src/arrow/acero/util.h b/cpp/src/arrow/acero/util.h index 0eb9f4c87e180..ee46e8527422a 100644 --- a/cpp/src/arrow/acero/util.h +++ b/cpp/src/arrow/acero/util.h @@ -65,7 +65,7 @@ class ARROW_ACERO_EXPORT AtomicCounter { // return true if the counter is complete bool Increment() { - DCHECK_NE(count_.load(), total_.load()); + ARROW_DCHECK_NE(count_.load(), total_.load()); int count = count_.fetch_add(1) + 1; if (count != total_.load()) return false; return DoneOnce(); diff --git a/cpp/src/arrow/array/concatenate.cc b/cpp/src/arrow/array/concatenate.cc index 87e55246c78fe..b4638dd6593d8 100644 --- a/cpp/src/arrow/array/concatenate.cc +++ b/cpp/src/arrow/array/concatenate.cc @@ -75,6 +75,31 @@ struct Bitmap { bool AllSet() const { return data == nullptr; } }; +enum class OffsetBufferOpOutcome { + kOk, + kOffsetOverflow, +}; + +Status OffsetOverflowStatus() { + return Status::Invalid("offset overflow while concatenating arrays"); +} + +#define RETURN_IF_NOT_OK_OUTCOME(outcome) \ + switch (outcome) { \ + case OffsetBufferOpOutcome::kOk: \ + break; \ + case OffsetBufferOpOutcome::kOffsetOverflow: \ + return OffsetOverflowStatus(); \ + } + +struct ErrorHints { + /// \brief Suggested cast to avoid overflow during concatenation. + /// + /// If the concatenation of offsets overflows, this field might be set to the + /// a type that uses larger offsets (e.g. large_utf8, large_list). + std::shared_ptr suggested_cast; +}; + // Allocate a buffer and concatenate bitmaps into it. Status ConcatenateBitmaps(const std::vector& bitmaps, MemoryPool* pool, std::shared_ptr* out) { @@ -112,15 +137,16 @@ int64_t SumBufferSizesInBytes(const BufferVector& buffers) { // Write offsets in src into dst, adjusting them such that first_offset // will be the first offset written. template -Status PutOffsets(const Buffer& src, Offset first_offset, Offset* dst, - Range* values_range); +Result PutOffsets(const Buffer& src, Offset first_offset, + Offset* dst, Range* values_range); // Concatenate buffers holding offsets into a single buffer of offsets, // also computing the ranges of values spanned by each buffer of offsets. template -Status ConcatenateOffsets(const BufferVector& buffers, MemoryPool* pool, - std::shared_ptr* out, - std::vector* values_ranges) { +Result ConcatenateOffsets(const BufferVector& buffers, + MemoryPool* pool, + std::shared_ptr* out, + std::vector* values_ranges) { values_ranges->resize(buffers.size()); // allocate output buffer @@ -133,26 +159,30 @@ Status ConcatenateOffsets(const BufferVector& buffers, MemoryPool* pool, for (size_t i = 0; i < buffers.size(); ++i) { // the first offset from buffers[i] will be adjusted to values_length // (the cumulative length of values spanned by offsets in previous buffers) - RETURN_NOT_OK(PutOffsets(*buffers[i], values_length, - out_data + elements_length, &(*values_ranges)[i])); + ARROW_ASSIGN_OR_RAISE(auto outcome, PutOffsets(*buffers[i], values_length, + out_data + elements_length, + &(*values_ranges)[i])); + if (ARROW_PREDICT_FALSE(outcome != OffsetBufferOpOutcome::kOk)) { + return outcome; + } elements_length += buffers[i]->size() / sizeof(Offset); values_length += static_cast((*values_ranges)[i].length); } // the final element in out_data is the length of all values spanned by the offsets out_data[out_size_in_bytes / sizeof(Offset)] = values_length; - return Status::OK(); + return OffsetBufferOpOutcome::kOk; } template -Status PutOffsets(const Buffer& src, Offset first_offset, Offset* dst, - Range* values_range) { +Result PutOffsets(const Buffer& src, Offset first_offset, + Offset* dst, Range* values_range) { if (src.size() == 0) { // It's allowed to have an empty offsets buffer for a 0-length array // (see Array::Validate) values_range->offset = 0; values_range->length = 0; - return Status::OK(); + return OffsetBufferOpOutcome::kOk; } // Get the range of offsets to transfer from src @@ -162,8 +192,9 @@ Status PutOffsets(const Buffer& src, Offset first_offset, Offset* dst, // Compute the range of values which is spanned by this range of offsets values_range->offset = src_begin[0]; values_range->length = *src_end - values_range->offset; - if (first_offset > std::numeric_limits::max() - values_range->length) { - return Status::Invalid("offset overflow while concatenating arrays"); + if (ARROW_PREDICT_FALSE(first_offset > + std::numeric_limits::max() - values_range->length)) { + return OffsetBufferOpOutcome::kOffsetOverflow; } // Write offsets into dst, ensuring that the first offset written is @@ -175,12 +206,14 @@ Status PutOffsets(const Buffer& src, Offset first_offset, Offset* dst, std::transform(src_begin, src_end, dst, [displacement](Offset offset) { return SafeSignedAdd(offset, displacement); }); - return Status::OK(); + return OffsetBufferOpOutcome::kOk; } template -Status PutListViewOffsets(const ArrayData& input, offset_type* sizes, const Buffer& src, - offset_type displacement, offset_type* dst); +Result PutListViewOffsets(const ArrayData& input, + offset_type* sizes, const Buffer& src, + offset_type displacement, + offset_type* dst); // Concatenate buffers holding list-view offsets into a single buffer of offsets // @@ -198,10 +231,10 @@ Status PutListViewOffsets(const ArrayData& input, offset_type* sizes, const Buff // \param[in] in The child arrays // \param[in,out] sizes The concatenated sizes buffer template -Status ConcatenateListViewOffsets(const ArrayDataVector& in, offset_type* sizes, - const BufferVector& offset_buffers, - const std::vector& value_ranges, - MemoryPool* pool, std::shared_ptr* out) { +Result ConcatenateListViewOffsets( + const ArrayDataVector& in, offset_type* sizes, const BufferVector& offset_buffers, + const std::vector& value_ranges, MemoryPool* pool, + std::shared_ptr* out) { DCHECK_EQ(offset_buffers.size(), value_ranges.size()); // Allocate resulting offsets buffer and initialize it with zeros @@ -216,26 +249,32 @@ Status ConcatenateListViewOffsets(const ArrayDataVector& in, offset_type* sizes, for (size_t i = 0; i < offset_buffers.size(); ++i) { const auto displacement = static_cast(num_child_values - value_ranges[i].offset); - RETURN_NOT_OK(PutListViewOffsets(*in[i], /*sizes=*/sizes + elements_length, - /*src=*/*offset_buffers[i], displacement, - /*dst=*/out_offsets + elements_length)); + ARROW_ASSIGN_OR_RAISE(auto outcome, + PutListViewOffsets(*in[i], /*sizes=*/sizes + elements_length, + /*src=*/*offset_buffers[i], displacement, + /*dst=*/out_offsets + elements_length)); + if (ARROW_PREDICT_FALSE(outcome != OffsetBufferOpOutcome::kOk)) { + return outcome; + } elements_length += offset_buffers[i]->size() / sizeof(offset_type); num_child_values += value_ranges[i].length; if (num_child_values > std::numeric_limits::max()) { - return Status::Invalid("offset overflow while concatenating arrays"); + return OffsetBufferOpOutcome::kOffsetOverflow; } } DCHECK_EQ(elements_length, static_cast(out_size_in_bytes / sizeof(offset_type))); - return Status::OK(); + return OffsetBufferOpOutcome::kOk; } template -Status PutListViewOffsets(const ArrayData& input, offset_type* sizes, const Buffer& src, - offset_type displacement, offset_type* dst) { +Result PutListViewOffsets(const ArrayData& input, + offset_type* sizes, const Buffer& src, + offset_type displacement, + offset_type* dst) { if (src.size() == 0) { - return Status::OK(); + return OffsetBufferOpOutcome::kOk; } const auto& validity_buffer = input.buffers[0]; if (validity_buffer) { @@ -291,7 +330,7 @@ Status PutListViewOffsets(const ArrayData& input, offset_type* sizes, const Buff } } } - return Status::OK(); + return OffsetBufferOpOutcome::kOk; } class ConcatenateImpl { @@ -316,11 +355,17 @@ class ConcatenateImpl { } } - Status Concatenate(std::shared_ptr* out) && { + Status Concatenate(std::shared_ptr* out, ErrorHints* out_hints) && { if (out_->null_count != 0 && internal::may_have_validity_bitmap(out_->type->id())) { RETURN_NOT_OK(ConcatenateBitmaps(Bitmaps(0), pool_, &out_->buffers[0])); } - RETURN_NOT_OK(VisitTypeInline(*out_->type, this)); + auto status = VisitTypeInline(*out_->type, this); + if (!status.ok()) { + if (out_hints) { + out_hints->suggested_cast = std::move(suggested_cast_); + } + return status; + } *out = std::move(out_); return Status::OK(); } @@ -337,11 +382,29 @@ class ConcatenateImpl { return ConcatenateBuffers(buffers, pool_).Value(&out_->buffers[1]); } - Status Visit(const BinaryType&) { + Status Visit(const BinaryType& input_type) { std::vector value_ranges; ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, sizeof(int32_t))); - RETURN_NOT_OK(ConcatenateOffsets(index_buffers, pool_, &out_->buffers[1], - &value_ranges)); + ARROW_ASSIGN_OR_RAISE( + auto outcome, ConcatenateOffsets(index_buffers, pool_, &out_->buffers[1], + &value_ranges)); + switch (outcome) { + case OffsetBufferOpOutcome::kOk: + break; + case OffsetBufferOpOutcome::kOffsetOverflow: + switch (input_type.id()) { + case Type::BINARY: + suggested_cast_ = large_binary(); + break; + case Type::STRING: + suggested_cast_ = large_utf8(); + break; + default: + DCHECK(false) << "unexpected type id from BinaryType: " << input_type; + break; + } + return OffsetOverflowStatus(); + } ARROW_ASSIGN_OR_RAISE(auto value_buffers, Buffers(2, value_ranges)); return ConcatenateBuffers(value_buffers, pool_).Value(&out_->buffers[2]); } @@ -349,8 +412,10 @@ class ConcatenateImpl { Status Visit(const LargeBinaryType&) { std::vector value_ranges; ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, sizeof(int64_t))); - RETURN_NOT_OK(ConcatenateOffsets(index_buffers, pool_, &out_->buffers[1], - &value_ranges)); + ARROW_ASSIGN_OR_RAISE( + auto outcome, ConcatenateOffsets(index_buffers, pool_, &out_->buffers[1], + &value_ranges)); + RETURN_IF_NOT_OK_OUTCOME(outcome); ARROW_ASSIGN_OR_RAISE(auto value_buffers, Buffers(2, value_ranges)); return ConcatenateBuffers(value_buffers, pool_).Value(&out_->buffers[2]); } @@ -394,22 +459,44 @@ class ConcatenateImpl { return Status::OK(); } - Status Visit(const ListType&) { + Status Visit(const ListType& input_type) { std::vector value_ranges; ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, sizeof(int32_t))); - RETURN_NOT_OK(ConcatenateOffsets(index_buffers, pool_, &out_->buffers[1], - &value_ranges)); + ARROW_ASSIGN_OR_RAISE(auto offsets_outcome, + ConcatenateOffsets(index_buffers, pool_, + &out_->buffers[1], &value_ranges)); + switch (offsets_outcome) { + case OffsetBufferOpOutcome::kOk: + break; + case OffsetBufferOpOutcome::kOffsetOverflow: + suggested_cast_ = large_list(input_type.value_type()); + return OffsetOverflowStatus(); + } ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(0, value_ranges)); - return ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[0]); + ErrorHints child_error_hints; + auto status = ConcatenateImpl(child_data, pool_) + .Concatenate(&out_->child_data[0], &child_error_hints); + if (!status.ok() && child_error_hints.suggested_cast) { + suggested_cast_ = list(std::move(child_error_hints.suggested_cast)); + } + return status; } Status Visit(const LargeListType&) { std::vector value_ranges; ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, sizeof(int64_t))); - RETURN_NOT_OK(ConcatenateOffsets(index_buffers, pool_, &out_->buffers[1], - &value_ranges)); + ARROW_ASSIGN_OR_RAISE( + auto outcome, ConcatenateOffsets(index_buffers, pool_, &out_->buffers[1], + &value_ranges)); + RETURN_IF_NOT_OK_OUTCOME(outcome); ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(0, value_ranges)); - return ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[0]); + ErrorHints child_error_hints; + auto status = ConcatenateImpl(child_data, pool_) + .Concatenate(&out_->child_data[0], &child_error_hints); + if (!status.ok() && child_error_hints.suggested_cast) { + suggested_cast_ = large_list(std::move(child_error_hints.suggested_cast)); + } + return status; } template @@ -430,8 +517,17 @@ class ConcatenateImpl { } // Concatenate the values + ErrorHints child_error_hints; ARROW_ASSIGN_OR_RAISE(ArrayDataVector value_data, ChildData(0, value_ranges)); - RETURN_NOT_OK(ConcatenateImpl(value_data, pool_).Concatenate(&out_->child_data[0])); + auto values_status = ConcatenateImpl(value_data, pool_) + .Concatenate(&out_->child_data[0], &child_error_hints); + if (!values_status.ok()) { + if (child_error_hints.suggested_cast) { + suggested_cast_ = std::make_shared>( + std::move(child_error_hints.suggested_cast)); + } + return values_status; + } out_->child_data[0]->type = type.value_type(); // Concatenate the sizes first @@ -440,22 +536,39 @@ class ConcatenateImpl { // Concatenate the offsets ARROW_ASSIGN_OR_RAISE(auto offset_buffers, Buffers(1, sizeof(offset_type))); - RETURN_NOT_OK(ConcatenateListViewOffsets( - in_, /*sizes=*/out_->buffers[2]->mutable_data_as(), offset_buffers, - value_ranges, pool_, &out_->buffers[1])); - + ARROW_ASSIGN_OR_RAISE( + auto outcome, ConcatenateListViewOffsets( + in_, /*sizes=*/out_->buffers[2]->mutable_data_as(), + offset_buffers, value_ranges, pool_, &out_->buffers[1])); + switch (outcome) { + case OffsetBufferOpOutcome::kOk: + break; + case OffsetBufferOpOutcome::kOffsetOverflow: + if constexpr (T::type_id == Type::LIST_VIEW) { + suggested_cast_ = large_list_view(type.value_type()); + } + return OffsetOverflowStatus(); + } return Status::OK(); } - Status Visit(const FixedSizeListType& fixed_size_list) { - ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(0, fixed_size_list.list_size())); - return ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[0]); + Status Visit(const FixedSizeListType& fsl_type) { + ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(0, fsl_type.list_size())); + ErrorHints hints; + auto status = + ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[0], &hints); + if (!status.ok() && hints.suggested_cast) { + suggested_cast_ = + fixed_size_list(std::move(hints.suggested_cast), fsl_type.list_size()); + } + return status; } Status Visit(const StructType& s) { for (int i = 0; i < s.num_fields(); ++i) { ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(i)); - RETURN_NOT_OK(ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[i])); + RETURN_NOT_OK(ConcatenateImpl(child_data, pool_) + .Concatenate(&out_->child_data[i], /*hints=*/nullptr)); } return Status::OK(); } @@ -570,8 +683,8 @@ class ConcatenateImpl { case UnionMode::SPARSE: { for (int i = 0; i < u.num_fields(); i++) { ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(i)); - RETURN_NOT_OK( - ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[i])); + RETURN_NOT_OK(ConcatenateImpl(child_data, pool_) + .Concatenate(&out_->child_data[i], /*hints=*/nullptr)); } break; } @@ -581,8 +694,8 @@ class ConcatenateImpl { for (size_t j = 0; j < in_.size(); j++) { child_data[j] = in_[j]->child_data[i]; } - RETURN_NOT_OK( - ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[i])); + RETURN_NOT_OK(ConcatenateImpl(child_data, pool_) + .Concatenate(&out_->child_data[i], /*hints=*/nullptr)); } break; } @@ -666,7 +779,8 @@ class ConcatenateImpl { storage_data[i]->type = e.storage_type(); } std::shared_ptr out_storage; - RETURN_NOT_OK(ConcatenateImpl(storage_data, pool_).Concatenate(&out_storage)); + RETURN_NOT_OK(ConcatenateImpl(storage_data, pool_) + .Concatenate(&out_storage, /*hints=*/nullptr)); out_storage->type = in_[0]->type; out_ = std::move(out_storage); return Status::OK(); @@ -797,11 +911,18 @@ class ConcatenateImpl { const ArrayDataVector& in_; MemoryPool* pool_; std::shared_ptr out_; + std::shared_ptr suggested_cast_; }; } // namespace -Result> Concatenate(const ArrayVector& arrays, MemoryPool* pool) { +namespace internal { + +Result> Concatenate( + const ArrayVector& arrays, MemoryPool* pool, + std::shared_ptr* out_suggested_cast) { + DCHECK(out_suggested_cast); + *out_suggested_cast = nullptr; if (arrays.size() == 0) { return Status::Invalid("Must pass at least one array"); } @@ -818,8 +939,31 @@ Result> Concatenate(const ArrayVector& arrays, MemoryPool } std::shared_ptr out_data; - RETURN_NOT_OK(ConcatenateImpl(data, pool).Concatenate(&out_data)); + ErrorHints hints; + auto status = ConcatenateImpl(data, pool).Concatenate(&out_data, &hints); + if (!status.ok()) { + if (hints.suggested_cast) { + DCHECK(status.IsInvalid()); + *out_suggested_cast = std::move(hints.suggested_cast); + } + return status; + } return MakeArray(std::move(out_data)); } +} // namespace internal + +Result> Concatenate(const ArrayVector& arrays, MemoryPool* pool) { + std::shared_ptr suggested_cast; + auto result = internal::Concatenate(arrays, pool, &suggested_cast); + if (!result.ok() && suggested_cast && arrays.size() > 0) { + DCHECK(result.status().IsInvalid()); + return Status::Invalid(result.status().message(), ", consider casting input from `", + *arrays[0]->type(), "` to `", *suggested_cast, "` first."); + } + return result; +} + +#undef RETURN_IF_NOT_OK_OUTCOME + } // namespace arrow diff --git a/cpp/src/arrow/array/concatenate.h b/cpp/src/arrow/array/concatenate.h index e7597aad812c4..aada5624d63a3 100644 --- a/cpp/src/arrow/array/concatenate.h +++ b/cpp/src/arrow/array/concatenate.h @@ -24,6 +24,22 @@ #include "arrow/util/visibility.h" namespace arrow { +namespace internal { + +/// \brief Concatenate arrays +/// +/// \param[in] arrays a vector of arrays to be concatenated +/// \param[in] pool memory to store the result will be allocated from this memory pool +/// \param[out] out_suggested_cast if a non-OK Result is returned, the function might set +/// out_suggested_cast to a cast suggestion that would allow concatenating the arrays +/// without overflow of offsets (e.g. string to large_string) +/// +/// \return the concatenated array +ARROW_EXPORT +Result> Concatenate(const ArrayVector& arrays, MemoryPool* pool, + std::shared_ptr* out_suggested_cast); + +} // namespace internal /// \brief Concatenate arrays /// diff --git a/cpp/src/arrow/array/concatenate_test.cc b/cpp/src/arrow/array/concatenate_test.cc index af595e897f9ee..aea5311575299 100644 --- a/cpp/src/arrow/array/concatenate_test.cc +++ b/cpp/src/arrow/array/concatenate_test.cc @@ -29,6 +29,7 @@ #include #include +#include #include #include "arrow/array.h" @@ -42,6 +43,7 @@ #include "arrow/testing/util.h" #include "arrow/type.h" #include "arrow/util/list_util.h" +#include "arrow/util/unreachable.h" namespace arrow { @@ -661,14 +663,103 @@ TEST_F(ConcatenateTest, ExtensionType) { }); } +std::shared_ptr LargeVersionOfType(const std::shared_ptr& type) { + switch (type->id()) { + case Type::BINARY: + return large_binary(); + case Type::STRING: + return large_utf8(); + case Type::LIST: + return large_list(static_cast(*type).value_type()); + case Type::LIST_VIEW: + return large_list_view(static_cast(*type).value_type()); + case Type::LARGE_BINARY: + case Type::LARGE_STRING: + case Type::LARGE_LIST: + case Type::LARGE_LIST_VIEW: + return type; + default: + Unreachable(); + } +} + +std::shared_ptr fixed_size_list_of_1(std::shared_ptr type) { + return fixed_size_list(std::move(type), 1); +} + TEST_F(ConcatenateTest, OffsetOverflow) { - auto fake_long = ArrayFromJSON(utf8(), "[\"\"]"); - fake_long->data()->GetMutableValues(1)[1] = + using TypeFactory = std::shared_ptr (*)(std::shared_ptr); + static const std::vector kNestedTypeFactories = { + list, large_list, list_view, large_list_view, fixed_size_list_of_1, + }; + + auto* pool = default_memory_pool(); + std::shared_ptr suggested_cast; + for (auto& ty : {binary(), utf8()}) { + auto large_ty = LargeVersionOfType(ty); + + auto fake_long = ArrayFromJSON(ty, "[\"\"]"); + fake_long->data()->GetMutableValues(1)[1] = + std::numeric_limits::max(); + // XXX: since the data fake_long claims to own isn't there, this would + // segfault if Concatenate didn't detect overflow and raise an error. + auto concatenate_status = Concatenate({fake_long, fake_long}); + EXPECT_RAISES_WITH_MESSAGE_THAT( + Invalid, + ::testing::StrEq("Invalid: offset overflow while concatenating arrays, " + "consider casting input from `" + + ty->ToString() + "` to `large_" + ty->ToString() + "` first."), + concatenate_status); + + concatenate_status = + internal::Concatenate({fake_long, fake_long}, pool, &suggested_cast); + // Message is doesn't contain the suggested cast type when the caller + // asks for it by passing the output parameter. + EXPECT_RAISES_WITH_MESSAGE_THAT( + Invalid, ::testing::StrEq("Invalid: offset overflow while concatenating arrays"), + concatenate_status); + ASSERT_TRUE(large_ty->Equals(*suggested_cast)); + + // Check that the suggested cast is correct when concatenation + // fails due to the child array being too large. + for (auto factory : kNestedTypeFactories) { + auto nested_ty = factory(ty); + auto expected_suggestion = factory(large_ty); + auto fake_long_list = ArrayFromJSON(nested_ty, "[[\"\"]]"); + fake_long_list->data()->child_data[0] = fake_long->data(); + + ASSERT_RAISES(Invalid, internal::Concatenate({fake_long_list, fake_long_list}, pool, + &suggested_cast) + .status()); + ASSERT_TRUE(suggested_cast->Equals(*expected_suggestion)); + } + } + + auto list_ty = list(utf8()); + auto fake_long_list = ArrayFromJSON(list_ty, "[[\"Hello\"]]"); + fake_long_list->data()->GetMutableValues(1)[1] = std::numeric_limits::max(); - std::shared_ptr concatenated; - // XX since the data fake_long claims to own isn't there, this will segfault if - // Concatenate doesn't detect overflow and raise an error. - ASSERT_RAISES(Invalid, Concatenate({fake_long, fake_long}).status()); + ASSERT_RAISES(Invalid, internal::Concatenate({fake_long_list, fake_long_list}, pool, + &suggested_cast) + .status()); + ASSERT_TRUE(suggested_cast->Equals(LargeVersionOfType(list_ty))); + + auto list_view_ty = list_view(null()); + auto fake_long_list_view = ArrayFromJSON(list_view_ty, "[[], []]"); + { + constexpr int kInt32Max = std::numeric_limits::max(); + auto* values = fake_long_list_view->data()->child_data[0].get(); + auto* mutable_offsets = fake_long_list_view->data()->GetMutableValues(1); + auto* mutable_sizes = fake_long_list_view->data()->GetMutableValues(2); + values->length = 2 * static_cast(kInt32Max); + mutable_offsets[1] = kInt32Max; + mutable_offsets[0] = kInt32Max; + mutable_sizes[0] = kInt32Max; + } + ASSERT_RAISES(Invalid, internal::Concatenate({fake_long_list_view, fake_long_list_view}, + pool, &suggested_cast) + .status()); + ASSERT_TRUE(suggested_cast->Equals(LargeVersionOfType(list_view_ty))); } TEST_F(ConcatenateTest, DictionaryConcatenateWithEmptyUint16) { diff --git a/cpp/src/arrow/compute/CMakeLists.txt b/cpp/src/arrow/compute/CMakeLists.txt index 0a8018cd580cf..e20b45897db95 100644 --- a/cpp/src/arrow/compute/CMakeLists.txt +++ b/cpp/src/arrow/compute/CMakeLists.txt @@ -92,6 +92,7 @@ add_arrow_test(internals_test key_hash_test.cc row/compare_test.cc row/grouper_test.cc + row/row_test.cc util_internal_test.cc) add_arrow_compute_test(expression_test SOURCES expression_test.cc) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc index a5612643913aa..a293000d56640 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc @@ -510,9 +510,12 @@ void AddCrossUnitCastNoPreallocate(CastFunction* func) { std::shared_ptr GetDate32Cast() { auto func = std::make_shared("cast_date32", Type::DATE32); - auto out_ty = date32(); + const auto& out_ty = date32(); AddCommonCasts(Type::DATE32, out_ty, func.get()); + // date32 -> date32 + AddZeroCopyCast(Type::DATE32, date32(), date32(), func.get()); + // int32 -> date32 AddZeroCopyCast(Type::INT32, int32(), date32(), func.get()); @@ -532,9 +535,12 @@ std::shared_ptr GetDate32Cast() { std::shared_ptr GetDate64Cast() { auto func = std::make_shared("cast_date64", Type::DATE64); - auto out_ty = date64(); + const auto& out_ty = date64(); AddCommonCasts(Type::DATE64, out_ty, func.get()); + // date64 -> date64 + AddZeroCopyCast(Type::DATE64, date64(), date64(), func.get()); + // int64 -> date64 AddZeroCopyCast(Type::INT64, int64(), date64(), func.get()); diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index f60d8f2e19e98..140789e59665b 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -1646,6 +1646,10 @@ TEST(Cast, DateToDate) { 86400000, 864000000])"); + // Zero copy + CheckCast(day_32, day_32); + CheckCast(day_64, day_64); + // Multiply promotion CheckCast(day_32, day_64); diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc index f49e201492c9b..35b1deb3cda58 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc @@ -1741,7 +1741,7 @@ const FunctionDoc millisecond_doc{ const FunctionDoc microsecond_doc{ "Extract microsecond values", - ("Millisecond returns number of microseconds since the last full millisecond.\n" + ("Microsecond returns number of microseconds since the last full millisecond.\n" "Null values emit null.\n" "An error is returned if the values have a defined timezone but it\n" "cannot be found in the timezone database."), diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc index ad22fa8d365c4..8766ca3baac96 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort.cc +++ b/cpp/src/arrow/compute/kernels/vector_sort.cc @@ -870,7 +870,7 @@ const SortOptions* GetDefaultSortOptions() { const FunctionDoc sort_indices_doc( "Return the indices that would sort an array, record batch or table", ("This function computes an array of indices that define a stable sort\n" - "of the input array, record batch or table. By default, nNull values are\n" + "of the input array, record batch or table. By default, null values are\n" "considered greater than any other value and are therefore sorted at the\n" "end of the input. For floating-point types, NaNs are considered greater\n" "than any other non-null value, but smaller than null values.\n" diff --git a/cpp/src/arrow/compute/row/compare_internal_avx2.cc b/cpp/src/arrow/compute/row/compare_internal_avx2.cc index ec511aa03a6d0..23238a3691c8a 100644 --- a/cpp/src/arrow/compute/row/compare_internal_avx2.cc +++ b/cpp/src/arrow/compute/row/compare_internal_avx2.cc @@ -180,6 +180,40 @@ uint32_t KeyCompare::NullUpdateColumnToRowImp_avx2( } } +namespace { + +// Intrinsics `_mm256_i32gather_epi32/64` treat the `vindex` as signed integer, and we +// are using `uint32_t` to represent the offset, in range of [0, 4G), within the row +// table. When the offset is larger than `0x80000000` (2GB), those intrinsics will treat +// it as negative offset and gather the data from undesired address. To avoid this issue, +// we normalize the addresses by translating `base` `0x80000000` higher, and `offset` +// `0x80000000` lower. This way, the offset is always in range of [-2G, 2G) and those +// intrinsics are safe. + +constexpr uint64_t kTwoGB = 0x80000000ull; + +template +inline __m256i UnsignedOffsetSafeGather32(int const* base, __m256i offset) { + int const* normalized_base = base + kTwoGB / sizeof(int); + __m256i normalized_offset = + _mm256_sub_epi32(offset, _mm256_set1_epi32(static_cast(kTwoGB / kScale))); + return _mm256_i32gather_epi32(normalized_base, normalized_offset, + static_cast(kScale)); +} + +template +inline __m256i UnsignedOffsetSafeGather64(arrow::util::int64_for_gather_t const* base, + __m128i offset) { + arrow::util::int64_for_gather_t const* normalized_base = + base + kTwoGB / sizeof(arrow::util::int64_for_gather_t); + __m128i normalized_offset = + _mm_sub_epi32(offset, _mm_set1_epi32(static_cast(kTwoGB / kScale))); + return _mm256_i32gather_epi64(normalized_base, normalized_offset, + static_cast(kScale)); +} + +} // namespace + template uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2( uint32_t offset_within_row, uint32_t num_rows_to_compare, @@ -236,10 +270,8 @@ uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2( irow_right = _mm256_loadu_si256(reinterpret_cast(left_to_right_map) + i); } - // TODO: Need to test if this gather is OK when irow_right is larger than - // 0x80000000u. __m256i offset_right = - _mm256_i32gather_epi32((const int*)offsets_right, irow_right, 4); + UnsignedOffsetSafeGather32<4>((int const*)offsets_right, irow_right); offset_right = _mm256_add_epi32(offset_right, _mm256_set1_epi32(offset_within_row)); reinterpret_cast(match_bytevector)[i] = @@ -253,40 +285,6 @@ uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2( } } -namespace { - -// Intrinsics `_mm256_i32gather_epi32/64` treat the `vindex` as signed integer, and we -// are using `uint32_t` to represent the offset, in range of [0, 4G), within the row -// table. When the offset is larger than `0x80000000` (2GB), those intrinsics will treat -// it as negative offset and gather the data from undesired address. To avoid this issue, -// we normalize the addresses by translating `base` `0x80000000` higher, and `offset` -// `0x80000000` lower. This way, the offset is always in range of [-2G, 2G) and those -// intrinsics are safe. - -constexpr uint64_t kTwoGB = 0x80000000ull; - -template -inline __m256i UnsignedOffsetSafeGather32(int const* base, __m256i offset) { - int const* normalized_base = base + kTwoGB / sizeof(int); - __m256i normalized_offset = - _mm256_sub_epi32(offset, _mm256_set1_epi32(static_cast(kTwoGB / kScale))); - return _mm256_i32gather_epi32(normalized_base, normalized_offset, - static_cast(kScale)); -} - -template -inline __m256i UnsignedOffsetSafeGather64(arrow::util::int64_for_gather_t const* base, - __m128i offset) { - arrow::util::int64_for_gather_t const* normalized_base = - base + kTwoGB / sizeof(arrow::util::int64_for_gather_t); - __m128i normalized_offset = - _mm_sub_epi32(offset, _mm_set1_epi32(static_cast(kTwoGB / kScale))); - return _mm256_i32gather_epi64(normalized_base, normalized_offset, - static_cast(kScale)); -} - -} // namespace - template inline uint64_t CompareSelected8_avx2(const uint8_t* left_base, const uint8_t* right_base, __m256i irow_left, __m256i offset_right, diff --git a/cpp/src/arrow/compute/row/compare_test.cc b/cpp/src/arrow/compute/row/compare_test.cc index 662862075c245..22af7e067d855 100644 --- a/cpp/src/arrow/compute/row/compare_test.cc +++ b/cpp/src/arrow/compute/row/compare_test.cc @@ -166,10 +166,9 @@ TEST(KeyCompare, CompareColumnsToRowsTempStackUsage) { } } -#ifndef ARROW_VALGRIND // Compare columns to rows at offsets over 2GB within a row table. // Certain AVX2 instructions may behave unexpectedly causing troubles like GH-41813. -TEST(KeyCompare, CompareColumnsToRowsLarge) { +TEST(KeyCompare, LARGE_MEMORY_TEST(CompareColumnsToRowsLarge)) { if constexpr (sizeof(void*) == 4) { GTEST_SKIP() << "Test only works on 64-bit platforms"; } @@ -300,7 +299,6 @@ TEST(KeyCompare, CompareColumnsToRowsLarge) { num_rows); } } -#endif // ARROW_VALGRIND } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/row/encode_internal.cc b/cpp/src/arrow/compute/row/encode_internal.cc index 01d552ef8270f..658e0dffcac68 100644 --- a/cpp/src/arrow/compute/row/encode_internal.cc +++ b/cpp/src/arrow/compute/row/encode_internal.cc @@ -17,6 +17,7 @@ #include "arrow/compute/row/encode_internal.h" #include "arrow/util/checked_cast.h" +#include "arrow/util/int_util_overflow.h" namespace arrow { namespace compute { @@ -152,14 +153,21 @@ void RowTableEncoder::PrepareEncodeSelected(int64_t start_row, int64_t num_rows, Status RowTableEncoder::EncodeSelected(RowTableImpl* rows, uint32_t num_selected, const uint16_t* selection) { rows->Clean(); - RETURN_NOT_OK( - rows->AppendEmpty(static_cast(num_selected), static_cast(0))); - - EncoderOffsets::GetRowOffsetsSelected(rows, batch_varbinary_cols_, num_selected, - selection); - RETURN_NOT_OK(rows->AppendEmpty(static_cast(0), - static_cast(rows->offsets()[num_selected]))); + // First AppendEmpty with num_selected rows and zero extra bytes to resize the + // fixed-length buffers (including buffer for offsets). + RETURN_NOT_OK( + rows->AppendEmpty(static_cast(num_selected), + /*num_extra_bytes_to_append=*/static_cast(0))); + // Then populate the offsets of the var-length columns, which will be used as the target + // size of the var-length buffers resizing below. + RETURN_NOT_OK(EncoderOffsets::GetRowOffsetsSelected(rows, batch_varbinary_cols_, + num_selected, selection)); + // Last AppendEmpty with zero rows and zero extra bytes to resize the var-length buffers + // based on the populated offsets. + RETURN_NOT_OK( + rows->AppendEmpty(/*num_rows_to_append=*/static_cast(0), + /*num_extra_bytes_to_append=*/static_cast(0))); for (size_t icol = 0; icol < batch_all_cols_.size(); ++icol) { if (batch_all_cols_[icol].metadata().is_fixed_length) { @@ -660,12 +668,12 @@ void EncoderOffsets::Decode(uint32_t start_row, uint32_t num_rows, } } -void EncoderOffsets::GetRowOffsetsSelected(RowTableImpl* rows, - const std::vector& cols, - uint32_t num_selected, - const uint16_t* selection) { +Status EncoderOffsets::GetRowOffsetsSelected(RowTableImpl* rows, + const std::vector& cols, + uint32_t num_selected, + const uint16_t* selection) { if (rows->metadata().is_fixed_length) { - return; + return Status::OK(); } uint32_t* row_offsets = rows->mutable_offsets(); @@ -706,9 +714,18 @@ void EncoderOffsets::GetRowOffsetsSelected(RowTableImpl* rows, uint32_t length = row_offsets[i]; length += RowTableMetadata::padding_for_alignment(length, row_alignment); row_offsets[i] = sum; - sum += length; + uint32_t sum_maybe_overflow = 0; + if (ARROW_PREDICT_FALSE( + arrow::internal::AddWithOverflow(sum, length, &sum_maybe_overflow))) { + return Status::Invalid( + "Offset overflow detected in EncoderOffsets::GetRowOffsetsSelected for row ", i, + " of length ", length, " bytes, current length in total is ", sum, " bytes"); + } + sum = sum_maybe_overflow; } row_offsets[num_selected] = sum; + + return Status::OK(); } template diff --git a/cpp/src/arrow/compute/row/encode_internal.h b/cpp/src/arrow/compute/row/encode_internal.h index 2afc150530b9e..0618ddd8e4b96 100644 --- a/cpp/src/arrow/compute/row/encode_internal.h +++ b/cpp/src/arrow/compute/row/encode_internal.h @@ -227,9 +227,9 @@ class EncoderBinaryPair { class EncoderOffsets { public: - static void GetRowOffsetsSelected(RowTableImpl* rows, - const std::vector& cols, - uint32_t num_selected, const uint16_t* selection); + static Status GetRowOffsetsSelected(RowTableImpl* rows, + const std::vector& cols, + uint32_t num_selected, const uint16_t* selection); static void EncodeSelected(RowTableImpl* rows, const std::vector& cols, uint32_t num_selected, const uint16_t* selection); diff --git a/cpp/src/arrow/compute/row/row_internal.cc b/cpp/src/arrow/compute/row/row_internal.cc index 469205e9b008d..2365ef5632cce 100644 --- a/cpp/src/arrow/compute/row/row_internal.cc +++ b/cpp/src/arrow/compute/row/row_internal.cc @@ -18,6 +18,7 @@ #include "arrow/compute/row/row_internal.h" #include "arrow/compute/util.h" +#include "arrow/util/int_util_overflow.h" namespace arrow { namespace compute { @@ -246,13 +247,13 @@ int64_t RowTableImpl::size_rows_varying_length(int64_t num_bytes) const { } void RowTableImpl::UpdateBufferPointers() { - buffers_[0] = null_masks_->mutable_data(); + buffers_[0] = null_masks_.get(); if (metadata_.is_fixed_length) { - buffers_[1] = rows_->mutable_data(); + buffers_[1] = rows_.get(); buffers_[2] = nullptr; } else { - buffers_[1] = offsets_->mutable_data(); - buffers_[2] = rows_->mutable_data(); + buffers_[1] = offsets_.get(); + buffers_[2] = rows_.get(); } } @@ -331,7 +332,15 @@ Status RowTableImpl::AppendSelectionFrom(const RowTableImpl& from, uint16_t row_id = source_row_ids ? source_row_ids[i] : i; uint32_t length = from_offsets[row_id + 1] - from_offsets[row_id]; total_length_to_append += length; - to_offsets[num_rows_ + i + 1] = total_length + total_length_to_append; + uint32_t to_offset_maybe_overflow = 0; + if (ARROW_PREDICT_FALSE(arrow::internal::AddWithOverflow( + total_length, total_length_to_append, &to_offset_maybe_overflow))) { + return Status::Invalid( + "Offset overflow detected in RowTableImpl::AppendSelectionFrom for row ", + num_rows_ + i, " of length ", length, " bytes, current length in total is ", + to_offsets[num_rows_ + i], " bytes"); + } + to_offsets[num_rows_ + i + 1] = to_offset_maybe_overflow; } RETURN_NOT_OK(ResizeOptionalVaryingLengthBuffer(total_length_to_append)); diff --git a/cpp/src/arrow/compute/row/row_internal.h b/cpp/src/arrow/compute/row/row_internal.h index 3220b7ffe6e40..80409f93d2b96 100644 --- a/cpp/src/arrow/compute/row/row_internal.h +++ b/cpp/src/arrow/compute/row/row_internal.h @@ -189,11 +189,17 @@ class ARROW_EXPORT RowTableImpl { // Accessors into the table's buffers const uint8_t* data(int i) const { ARROW_DCHECK(i >= 0 && i < kMaxBuffers); - return buffers_[i]; + if (ARROW_PREDICT_TRUE(buffers_[i])) { + return buffers_[i]->data(); + } + return NULLPTR; } uint8_t* mutable_data(int i) { ARROW_DCHECK(i >= 0 && i < kMaxBuffers); - return buffers_[i]; + if (ARROW_PREDICT_TRUE(buffers_[i])) { + return buffers_[i]->mutable_data(); + } + return NULLPTR; } const uint32_t* offsets() const { return reinterpret_cast(data(1)); } uint32_t* mutable_offsets() { return reinterpret_cast(mutable_data(1)); } @@ -207,6 +213,12 @@ class ARROW_EXPORT RowTableImpl { /// successive calls bool has_any_nulls(const LightContext* ctx) const; + /// \brief Size of the table's buffers + int64_t buffer_size(int i) const { + ARROW_DCHECK(i >= 0 && i < kMaxBuffers); + return buffers_[i]->size(); + } + private: Status ResizeFixedLengthBuffers(int64_t num_extra_rows); Status ResizeOptionalVaryingLengthBuffer(int64_t num_extra_bytes); @@ -236,7 +248,7 @@ class ARROW_EXPORT RowTableImpl { // Stores the fixed-length parts of the rows std::unique_ptr rows_; static constexpr int kMaxBuffers = 3; - uint8_t* buffers_[kMaxBuffers]; + ResizableBuffer* buffers_[kMaxBuffers]; // The number of rows in the table int64_t num_rows_; // The number of rows that can be stored in the table without resizing diff --git a/cpp/src/arrow/compute/row/row_test.cc b/cpp/src/arrow/compute/row/row_test.cc new file mode 100644 index 0000000000000..679ad519a9ef2 --- /dev/null +++ b/cpp/src/arrow/compute/row/row_test.cc @@ -0,0 +1,258 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "arrow/compute/row/encode_internal.h" +#include "arrow/compute/row/row_internal.h" +#include "arrow/testing/generator.h" +#include "arrow/testing/gtest_util.h" + +namespace arrow { +namespace compute { + +namespace { + +Result MakeRowTableFromColumn(const std::shared_ptr& column, + int64_t num_rows, int row_alignment, + int string_alignment) { + DCHECK_GE(column->length(), num_rows); + MemoryPool* pool = default_memory_pool(); + + std::vector column_arrays; + std::vector values{column}; + ExecBatch batch(std::move(values), num_rows); + RETURN_NOT_OK(ColumnArraysFromExecBatch(batch, &column_arrays)); + + std::vector column_metadatas; + RETURN_NOT_OK(ColumnMetadatasFromExecBatch(batch, &column_metadatas)); + RowTableMetadata table_metadata; + table_metadata.FromColumnMetadataVector(column_metadatas, row_alignment, + string_alignment); + + RowTableImpl row_table; + RETURN_NOT_OK(row_table.Init(pool, table_metadata)); + + RowTableEncoder row_encoder; + row_encoder.Init(column_metadatas, row_alignment, string_alignment); + row_encoder.PrepareEncodeSelected(0, num_rows, column_arrays); + + std::vector row_ids(num_rows); + std::iota(row_ids.begin(), row_ids.end(), 0); + + RETURN_NOT_OK(row_encoder.EncodeSelected(&row_table, static_cast(num_rows), + row_ids.data())); + + return row_table; +} + +} // namespace + +// GH-43129: Ensure that the memory consumption of the row table is reasonable, that is, +// with the growth factor of 2, the actual memory usage does not exceed twice the amount +// of memory actually needed. +TEST(RowTableMemoryConsumption, Encode) { + constexpr int64_t num_rows_max = 8192; + constexpr int64_t padding_for_vectors = 64; + + ASSERT_OK_AND_ASSIGN( + auto fixed_length_column, + ::arrow::gen::Constant(std::make_shared(0))->Generate(num_rows_max)); + ASSERT_OK_AND_ASSIGN(auto var_length_column, + ::arrow::gen::Constant(std::make_shared("X")) + ->Generate(num_rows_max)); + + for (int64_t num_rows : {1023, 1024, 1025, 4095, 4096, 4097}) { + // Fixed length column. + { + SCOPED_TRACE("encoding fixed length column of " + std::to_string(num_rows) + + " rows"); + ASSERT_OK_AND_ASSIGN(auto row_table, + MakeRowTableFromColumn(fixed_length_column, num_rows, + uint32()->byte_width(), 0)); + ASSERT_NE(row_table.data(0), NULLPTR); + ASSERT_NE(row_table.data(1), NULLPTR); + ASSERT_EQ(row_table.data(2), NULLPTR); + + int64_t actual_null_mask_size = + num_rows * row_table.metadata().null_masks_bytes_per_row; + ASSERT_LE(actual_null_mask_size, row_table.buffer_size(0) - padding_for_vectors); + ASSERT_GT(actual_null_mask_size * 2, + row_table.buffer_size(0) - padding_for_vectors); + + int64_t actual_rows_size = num_rows * uint32()->byte_width(); + ASSERT_LE(actual_rows_size, row_table.buffer_size(1) - padding_for_vectors); + ASSERT_GT(actual_rows_size * 2, row_table.buffer_size(1) - padding_for_vectors); + } + + // Var length column. + { + SCOPED_TRACE("encoding var length column of " + std::to_string(num_rows) + " rows"); + ASSERT_OK_AND_ASSIGN(auto row_table, + MakeRowTableFromColumn(var_length_column, num_rows, 4, 4)); + ASSERT_NE(row_table.data(0), NULLPTR); + ASSERT_NE(row_table.data(1), NULLPTR); + ASSERT_NE(row_table.data(2), NULLPTR); + + int64_t actual_null_mask_size = + num_rows * row_table.metadata().null_masks_bytes_per_row; + ASSERT_LE(actual_null_mask_size, row_table.buffer_size(0) - padding_for_vectors); + ASSERT_GT(actual_null_mask_size * 2, + row_table.buffer_size(0) - padding_for_vectors); + + int64_t actual_offset_size = num_rows * sizeof(uint32_t); + ASSERT_LE(actual_offset_size, row_table.buffer_size(1) - padding_for_vectors); + ASSERT_GT(actual_offset_size * 2, row_table.buffer_size(1) - padding_for_vectors); + + int64_t actual_rows_size = num_rows * row_table.offsets()[1]; + ASSERT_LE(actual_rows_size, row_table.buffer_size(2) - padding_for_vectors); + ASSERT_GT(actual_rows_size * 2, row_table.buffer_size(2) - padding_for_vectors); + } + } +} + +// GH-43202: Ensure that when offset overflow happens in encoding the row table, an +// explicit error is raised instead of a silent wrong result. +TEST(RowTableOffsetOverflow, LARGE_MEMORY_TEST(Encode)) { + if constexpr (sizeof(void*) == 4) { + GTEST_SKIP() << "Test only works on 64-bit platforms"; + } + + // Use 8 512MB var-length rows (occupies 4GB+) to overflow the offset in the row table. + constexpr int64_t num_rows = 8; + constexpr int64_t length_per_binary = 512 * 1024 * 1024; + constexpr int64_t row_alignment = sizeof(uint32_t); + constexpr int64_t var_length_alignment = sizeof(uint32_t); + + MemoryPool* pool = default_memory_pool(); + + // The column to encode. + std::vector columns; + std::vector values; + ASSERT_OK_AND_ASSIGN( + auto value, ::arrow::gen::Constant( + std::make_shared(std::string(length_per_binary, 'X'))) + ->Generate(1)); + values.push_back(std::move(value)); + ExecBatch batch = ExecBatch(std::move(values), 1); + ASSERT_OK(ColumnArraysFromExecBatch(batch, &columns)); + + // The row table. + std::vector column_metadatas; + ASSERT_OK(ColumnMetadatasFromExecBatch(batch, &column_metadatas)); + RowTableMetadata table_metadata; + table_metadata.FromColumnMetadataVector(column_metadatas, row_alignment, + var_length_alignment); + RowTableImpl row_table; + ASSERT_OK(row_table.Init(pool, table_metadata)); + RowTableEncoder row_encoder; + row_encoder.Init(column_metadatas, row_alignment, var_length_alignment); + + // The rows to encode. + std::vector row_ids(num_rows, 0); + + // Encoding 7 rows should be fine. + { + row_encoder.PrepareEncodeSelected(0, num_rows - 1, columns); + ASSERT_OK(row_encoder.EncodeSelected(&row_table, static_cast(num_rows - 1), + row_ids.data())); + } + + // Encoding 8 rows should overflow. + { + int64_t length_per_row = table_metadata.fixed_length + length_per_binary; + std::stringstream expected_error_message; + expected_error_message << "Invalid: Offset overflow detected in " + "EncoderOffsets::GetRowOffsetsSelected for row " + << num_rows - 1 << " of length " << length_per_row + << " bytes, current length in total is " + << length_per_row * (num_rows - 1) << " bytes"; + row_encoder.PrepareEncodeSelected(0, num_rows, columns); + ASSERT_RAISES_WITH_MESSAGE( + Invalid, expected_error_message.str(), + row_encoder.EncodeSelected(&row_table, static_cast(num_rows), + row_ids.data())); + } +} + +// GH-43202: Ensure that when offset overflow happens in appending to the row table, an +// explicit error is raised instead of a silent wrong result. +TEST(RowTableOffsetOverflow, LARGE_MEMORY_TEST(AppendFrom)) { + if constexpr (sizeof(void*) == 4) { + GTEST_SKIP() << "Test only works on 64-bit platforms"; + } + + // Use 8 512MB var-length rows (occupies 4GB+) to overflow the offset in the row table. + constexpr int64_t num_rows = 8; + constexpr int64_t length_per_binary = 512 * 1024 * 1024; + constexpr int64_t num_rows_seed = 1; + constexpr int64_t row_alignment = sizeof(uint32_t); + constexpr int64_t var_length_alignment = sizeof(uint32_t); + + MemoryPool* pool = default_memory_pool(); + + // The column to encode. + std::vector columns; + std::vector values; + ASSERT_OK_AND_ASSIGN( + auto value, ::arrow::gen::Constant( + std::make_shared(std::string(length_per_binary, 'X'))) + ->Generate(num_rows_seed)); + values.push_back(std::move(value)); + ExecBatch batch = ExecBatch(std::move(values), num_rows_seed); + ASSERT_OK(ColumnArraysFromExecBatch(batch, &columns)); + + // The seed row table. + std::vector column_metadatas; + ASSERT_OK(ColumnMetadatasFromExecBatch(batch, &column_metadatas)); + RowTableMetadata table_metadata; + table_metadata.FromColumnMetadataVector(column_metadatas, row_alignment, + var_length_alignment); + RowTableImpl row_table_seed; + ASSERT_OK(row_table_seed.Init(pool, table_metadata)); + RowTableEncoder row_encoder; + row_encoder.Init(column_metadatas, row_alignment, var_length_alignment); + row_encoder.PrepareEncodeSelected(0, num_rows_seed, columns); + std::vector row_ids(num_rows_seed, 0); + ASSERT_OK(row_encoder.EncodeSelected( + &row_table_seed, static_cast(num_rows_seed), row_ids.data())); + + // The target row table. + RowTableImpl row_table; + ASSERT_OK(row_table.Init(pool, table_metadata)); + + // Appending the seed 7 times should be fine. + for (int i = 0; i < num_rows - 1; ++i) { + ASSERT_OK(row_table.AppendSelectionFrom(row_table_seed, num_rows_seed, + /*source_row_ids=*/NULLPTR)); + } + + // Appending the seed the 8-th time should overflow. + int64_t length_per_row = table_metadata.fixed_length + length_per_binary; + std::stringstream expected_error_message; + expected_error_message + << "Invalid: Offset overflow detected in RowTableImpl::AppendSelectionFrom for row " + << num_rows - 1 << " of length " << length_per_row + << " bytes, current length in total is " << length_per_row * (num_rows - 1) + << " bytes"; + ASSERT_RAISES_WITH_MESSAGE(Invalid, expected_error_message.str(), + row_table.AppendSelectionFrom(row_table_seed, num_rows_seed, + /*source_row_ids=*/NULLPTR)); +} + +} // namespace compute +} // namespace arrow diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt index 0a31a64b7a3a4..dec4bb6e3d465 100644 --- a/cpp/src/arrow/filesystem/CMakeLists.txt +++ b/cpp/src/arrow/filesystem/CMakeLists.txt @@ -63,6 +63,23 @@ if(ARROW_AZURE) endif() if(ARROW_S3) + set(ARROW_S3_TEST_EXTRA_LINK_LIBS) + # arrow_shared/arrow_static is specified implicitly via + # arrow_testing_shared/arrow_testing_static but we specify + # arrow_shared/arrow_static explicitly here to ensure using libarrow + # before libaws* on link. If we use libaws*.a before libarrow, + # static variables storage of AWS SDK for C++ in libaws*.a may be + # mixed with one in libarrow. + if(ARROW_TEST_LINKAGE STREQUAL "shared") + list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS arrow_shared) + else() + list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS arrow_static) + endif() + list(APPEND + ARROW_S3_TEST_EXTRA_LINK_LIBS + ${AWSSDK_LINK_LIBRARIES} + Boost::filesystem + Boost::system) add_arrow_test(s3fs_test SOURCES s3fs_test.cc @@ -70,18 +87,17 @@ if(ARROW_S3) EXTRA_LABELS filesystem EXTRA_LINK_LIBS - ${AWSSDK_LINK_LIBRARIES} - Boost::filesystem - Boost::system) + ${ARROW_S3_TEST_EXTRA_LINK_LIBS}) if(TARGET arrow-s3fs-test) set(ARROW_S3FS_TEST_COMPILE_DEFINITIONS) get_target_property(AWS_CPP_SDK_S3_TYPE aws-cpp-sdk-s3 TYPE) - # We need to initialize AWS C++ SDK for direct use (not via + # We need to initialize AWS SDK for C++ for direct use (not via # arrow::fs::S3FileSystem) in arrow-s3fs-test if we use static AWS - # C++ SDK and hide symbols of them. Because AWS C++ SDK has - # internal static variables that aren't shared in libarrow and + # SDK for C++ and hide symbols of them. Because AWS SDK for C++ + # has internal static variables that aren't shared in libarrow and # arrow-s3fs-test. It means that arrow::fs::InitializeS3() doesn't - # initialize AWS C++ SDK that is directly used in arrow-s3fs-test. + # initialize AWS SDK for C++ that is directly used in + # arrow-s3fs-test. if(AWS_CPP_SDK_S3_TYPE STREQUAL "STATIC_LIBRARY" AND CXX_LINKER_SUPPORTS_VERSION_SCRIPT) list(APPEND ARROW_S3FS_TEST_COMPILE_DEFINITIONS "AWS_CPP_SDK_S3_PRIVATE_STATIC") diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index 99cee19ed1e78..fd5b2e5be2a3a 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -74,6 +75,7 @@ #include #include #include +#include #include // AWS_SDK_VERSION_{MAJOR,MINOR,PATCH} are available since 1.9.7. @@ -1335,7 +1337,7 @@ struct ObjectMetadataSetter { static std::unordered_map GetSetters() { return {{"ACL", CannedACLSetter()}, {"Cache-Control", StringSetter(&ObjectRequest::SetCacheControl)}, - {"Content-Type", StringSetter(&ObjectRequest::SetContentType)}, + {"Content-Type", ContentTypeSetter()}, {"Content-Language", StringSetter(&ObjectRequest::SetContentLanguage)}, {"Expires", DateTimeSetter(&ObjectRequest::SetExpires)}}; } @@ -1365,6 +1367,16 @@ struct ObjectMetadataSetter { }; } + /** We need a special setter here and can not use `StringSetter` because for e.g. the + * `PutObjectRequest`, the setter is located in the base class (instead of the concrete + * class). */ + static Setter ContentTypeSetter() { + return [](const std::string& str, ObjectRequest* req) { + req->SetContentType(str); + return Status::OK(); + }; + } + static Result ParseACL(const std::string& v) { if (v.empty()) { return S3Model::ObjectCannedACL::NOT_SET; @@ -1583,6 +1595,15 @@ class ObjectInputFile final : public io::RandomAccessFile { // (for rational, see: https://github.com/apache/arrow/issues/34363) static constexpr int64_t kPartUploadSize = 10 * 1024 * 1024; +// Above this threshold, use a multi-part upload instead of a single request upload. Only +// relevant if early sanitization of writing to the bucket is disabled (see +// `allow_delayed_open`). +static constexpr int64_t kMultiPartUploadThresholdSize = kPartUploadSize - 1; + +static_assert(kMultiPartUploadThresholdSize < kPartUploadSize, + "Multi part upload threshold size must be stricly less than the actual " + "multi part upload part size."); + // An OutputStream that writes to a S3 object class ObjectOutputStream final : public io::OutputStream { protected: @@ -1598,7 +1619,8 @@ class ObjectOutputStream final : public io::OutputStream { path_(path), metadata_(metadata), default_metadata_(options.default_metadata), - background_writes_(options.background_writes) {} + background_writes_(options.background_writes), + allow_delayed_open_(options.allow_delayed_open) {} ~ObjectOutputStream() override { // For compliance with the rest of the IO stack, Close rather than Abort, @@ -1606,29 +1628,47 @@ class ObjectOutputStream final : public io::OutputStream { io::internal::CloseFromDestructor(this); } + template + Status SetMetadataInRequest(ObjectRequest* request) { + std::shared_ptr metadata; + + if (metadata_ && metadata_->size() != 0) { + metadata = metadata_; + } else if (default_metadata_ && default_metadata_->size() != 0) { + metadata = default_metadata_; + } + + bool is_content_type_set{false}; + if (metadata) { + RETURN_NOT_OK(SetObjectMetadata(metadata, request)); + + is_content_type_set = metadata->Contains("Content-Type"); + } + + if (!is_content_type_set) { + // If we do not set anything then the SDK will default to application/xml + // which confuses some tools (https://github.com/apache/arrow/issues/11934) + // So we instead default to application/octet-stream which is less misleading + request->SetContentType("application/octet-stream"); + } + + return Status::OK(); + } + std::shared_ptr Self() { return std::dynamic_pointer_cast(shared_from_this()); } - Status Init() { + Status CreateMultipartUpload() { + DCHECK(ShouldBeMultipartUpload()); + ARROW_ASSIGN_OR_RAISE(auto client_lock, holder_->Lock()); // Initiate the multi-part upload S3Model::CreateMultipartUploadRequest req; req.SetBucket(ToAwsString(path_.bucket)); req.SetKey(ToAwsString(path_.key)); - if (metadata_ && metadata_->size() != 0) { - RETURN_NOT_OK(SetObjectMetadata(metadata_, &req)); - } else if (default_metadata_ && default_metadata_->size() != 0) { - RETURN_NOT_OK(SetObjectMetadata(default_metadata_, &req)); - } - - // If we do not set anything then the SDK will default to application/xml - // which confuses some tools (https://github.com/apache/arrow/issues/11934) - // So we instead default to application/octet-stream which is less misleading - if (!req.ContentTypeHasBeenSet()) { - req.SetContentType("application/octet-stream"); - } + RETURN_NOT_OK(SetMetadataInRequest(&req)); auto outcome = client_lock.Move()->CreateMultipartUpload(req); if (!outcome.IsSuccess()) { @@ -1637,7 +1677,19 @@ class ObjectOutputStream final : public io::OutputStream { path_.key, "' in bucket '", path_.bucket, "': "), "CreateMultipartUpload", outcome.GetError()); } - upload_id_ = outcome.GetResult().GetUploadId(); + multipart_upload_id_ = outcome.GetResult().GetUploadId(); + + return Status::OK(); + } + + Status Init() { + // If we are allowed to do delayed I/O, we can use a single request to upload the + // data. If not, we use a multi-part upload and initiate it here to + // sanitize that writing to the bucket is possible. + if (!allow_delayed_open_) { + RETURN_NOT_OK(CreateMultipartUpload()); + } + upload_state_ = std::make_shared(); closed_ = false; return Status::OK(); @@ -1648,42 +1700,62 @@ class ObjectOutputStream final : public io::OutputStream { return Status::OK(); } - ARROW_ASSIGN_OR_RAISE(auto client_lock, holder_->Lock()); + if (IsMultipartCreated()) { + ARROW_ASSIGN_OR_RAISE(auto client_lock, holder_->Lock()); - S3Model::AbortMultipartUploadRequest req; - req.SetBucket(ToAwsString(path_.bucket)); - req.SetKey(ToAwsString(path_.key)); - req.SetUploadId(upload_id_); + S3Model::AbortMultipartUploadRequest req; + req.SetBucket(ToAwsString(path_.bucket)); + req.SetKey(ToAwsString(path_.key)); + req.SetUploadId(multipart_upload_id_); - auto outcome = client_lock.Move()->AbortMultipartUpload(req); - if (!outcome.IsSuccess()) { - return ErrorToStatus( - std::forward_as_tuple("When aborting multiple part upload for key '", path_.key, - "' in bucket '", path_.bucket, "': "), - "AbortMultipartUpload", outcome.GetError()); + auto outcome = client_lock.Move()->AbortMultipartUpload(req); + if (!outcome.IsSuccess()) { + return ErrorToStatus( + std::forward_as_tuple("When aborting multiple part upload for key '", + path_.key, "' in bucket '", path_.bucket, "': "), + "AbortMultipartUpload", outcome.GetError()); + } } + current_part_.reset(); holder_ = nullptr; closed_ = true; + return Status::OK(); } // OutputStream interface + bool ShouldBeMultipartUpload() const { + return pos_ > kMultiPartUploadThresholdSize || !allow_delayed_open_; + } + + bool IsMultipartCreated() const { return !multipart_upload_id_.empty(); } + Status EnsureReadyToFlushFromClose() { - if (current_part_) { - // Upload last part - RETURN_NOT_OK(CommitCurrentPart()); - } + if (ShouldBeMultipartUpload()) { + if (current_part_) { + // Upload last part + RETURN_NOT_OK(CommitCurrentPart()); + } - // S3 mandates at least one part, upload an empty one if necessary - if (part_number_ == 1) { - RETURN_NOT_OK(UploadPart("", 0)); + // S3 mandates at least one part, upload an empty one if necessary + if (part_number_ == 1) { + RETURN_NOT_OK(UploadPart("", 0)); + } + } else { + RETURN_NOT_OK(UploadUsingSingleRequest()); } return Status::OK(); } + Status CleanupAfterClose() { + holder_ = nullptr; + closed_ = true; + return Status::OK(); + } + Status FinishPartUploadAfterFlush() { ARROW_ASSIGN_OR_RAISE(auto client_lock, holder_->Lock()); @@ -1697,7 +1769,7 @@ class ObjectOutputStream final : public io::OutputStream { S3Model::CompleteMultipartUploadRequest req; req.SetBucket(ToAwsString(path_.bucket)); req.SetKey(ToAwsString(path_.key)); - req.SetUploadId(upload_id_); + req.SetUploadId(multipart_upload_id_); req.SetMultipartUpload(std::move(completed_upload)); auto outcome = @@ -1709,8 +1781,6 @@ class ObjectOutputStream final : public io::OutputStream { "CompleteMultipartUpload", outcome.GetError()); } - holder_ = nullptr; - closed_ = true; return Status::OK(); } @@ -1720,7 +1790,12 @@ class ObjectOutputStream final : public io::OutputStream { RETURN_NOT_OK(EnsureReadyToFlushFromClose()); RETURN_NOT_OK(Flush()); - return FinishPartUploadAfterFlush(); + + if (IsMultipartCreated()) { + RETURN_NOT_OK(FinishPartUploadAfterFlush()); + } + + return CleanupAfterClose(); } Future<> CloseAsync() override { @@ -1729,8 +1804,12 @@ class ObjectOutputStream final : public io::OutputStream { RETURN_NOT_OK(EnsureReadyToFlushFromClose()); // Wait for in-progress uploads to finish (if async writes are enabled) - return FlushAsync().Then( - [self = Self()]() { return self->FinishPartUploadAfterFlush(); }); + return FlushAsync().Then([self = Self()]() { + if (self->IsMultipartCreated()) { + RETURN_NOT_OK(self->FinishPartUploadAfterFlush()); + } + return self->CleanupAfterClose(); + }); } bool closed() const override { return closed_; } @@ -1776,7 +1855,8 @@ class ObjectOutputStream final : public io::OutputStream { return Status::OK(); } - // Upload current buffer + // Upload current buffer. We're only reaching this point if we have accumulated + // enough data to upload. RETURN_NOT_OK(CommitCurrentPart()); } @@ -1810,40 +1890,73 @@ class ObjectOutputStream final : public io::OutputStream { } // Wait for background writes to finish std::unique_lock lock(upload_state_->mutex); - return upload_state_->pending_parts_completed; + return upload_state_->pending_uploads_completed; } // Upload-related helpers Status CommitCurrentPart() { + if (!IsMultipartCreated()) { + RETURN_NOT_OK(CreateMultipartUpload()); + } + ARROW_ASSIGN_OR_RAISE(auto buf, current_part_->Finish()); current_part_.reset(); current_part_size_ = 0; return UploadPart(buf); } - Status UploadPart(std::shared_ptr buffer) { - return UploadPart(buffer->data(), buffer->size(), buffer); + Status UploadUsingSingleRequest() { + std::shared_ptr buf; + if (current_part_ == nullptr) { + // In case the stream is closed directly after it has been opened without writing + // anything, we'll have to create an empty buffer. + buf = std::make_shared(""); + } else { + ARROW_ASSIGN_OR_RAISE(buf, current_part_->Finish()); + } + + current_part_.reset(); + current_part_size_ = 0; + return UploadUsingSingleRequest(buf); } - Status UploadPart(const void* data, int64_t nbytes, - std::shared_ptr owned_buffer = nullptr) { - S3Model::UploadPartRequest req; + template + using UploadResultCallbackFunction = + std::function, + int32_t part_number, OutcomeType outcome)>; + + static Result TriggerUploadRequest( + const Aws::S3::Model::PutObjectRequest& request, + const std::shared_ptr& holder) { + ARROW_ASSIGN_OR_RAISE(auto client_lock, holder->Lock()); + return client_lock.Move()->PutObject(request); + } + + static Result TriggerUploadRequest( + const Aws::S3::Model::UploadPartRequest& request, + const std::shared_ptr& holder) { + ARROW_ASSIGN_OR_RAISE(auto client_lock, holder->Lock()); + return client_lock.Move()->UploadPart(request); + } + + template + Status Upload( + RequestType&& req, + UploadResultCallbackFunction sync_result_callback, + UploadResultCallbackFunction async_result_callback, + const void* data, int64_t nbytes, std::shared_ptr owned_buffer = nullptr) { req.SetBucket(ToAwsString(path_.bucket)); req.SetKey(ToAwsString(path_.key)); - req.SetUploadId(upload_id_); - req.SetPartNumber(part_number_); + req.SetBody(std::make_shared(data, nbytes)); req.SetContentLength(nbytes); if (!background_writes_) { req.SetBody(std::make_shared(data, nbytes)); - ARROW_ASSIGN_OR_RAISE(auto client_lock, holder_->Lock()); - auto outcome = client_lock.Move()->UploadPart(req); - if (!outcome.IsSuccess()) { - return UploadPartError(req, outcome); - } else { - AddCompletedPart(upload_state_, part_number_, outcome.GetResult()); - } + + ARROW_ASSIGN_OR_RAISE(auto outcome, TriggerUploadRequest(req, holder_)); + + RETURN_NOT_OK(sync_result_callback(req, upload_state_, part_number_, outcome)); } else { // If the data isn't owned, make an immutable copy for the lifetime of the closure if (owned_buffer == nullptr) { @@ -1858,19 +1971,18 @@ class ObjectOutputStream final : public io::OutputStream { { std::unique_lock lock(upload_state_->mutex); - if (upload_state_->parts_in_progress++ == 0) { - upload_state_->pending_parts_completed = Future<>::Make(); + if (upload_state_->uploads_in_progress++ == 0) { + upload_state_->pending_uploads_completed = Future<>::Make(); } } // The closure keeps the buffer and the upload state alive auto deferred = [owned_buffer, holder = holder_, req = std::move(req), - state = upload_state_, + state = upload_state_, async_result_callback, part_number = part_number_]() mutable -> Status { - ARROW_ASSIGN_OR_RAISE(auto client_lock, holder->Lock()); - auto outcome = client_lock.Move()->UploadPart(req); - HandleUploadOutcome(state, part_number, req, outcome); - return Status::OK(); + ARROW_ASSIGN_OR_RAISE(auto outcome, TriggerUploadRequest(req, holder)); + + return async_result_callback(req, state, part_number, outcome); }; RETURN_NOT_OK(SubmitIO(io_context_, std::move(deferred))); } @@ -1880,9 +1992,118 @@ class ObjectOutputStream final : public io::OutputStream { return Status::OK(); } - static void HandleUploadOutcome(const std::shared_ptr& state, - int part_number, const S3Model::UploadPartRequest& req, - const Result& result) { + static Status UploadUsingSingleRequestError( + const Aws::S3::Model::PutObjectRequest& request, + const Aws::S3::Model::PutObjectOutcome& outcome) { + return ErrorToStatus( + std::forward_as_tuple("When uploading object with key '", request.GetKey(), + "' in bucket '", request.GetBucket(), "': "), + "PutObject", outcome.GetError()); + } + + Status UploadUsingSingleRequest(std::shared_ptr buffer) { + return UploadUsingSingleRequest(buffer->data(), buffer->size(), buffer); + } + + Status UploadUsingSingleRequest(const void* data, int64_t nbytes, + std::shared_ptr owned_buffer = nullptr) { + auto sync_result_callback = [](const Aws::S3::Model::PutObjectRequest& request, + std::shared_ptr state, + int32_t part_number, + Aws::S3::Model::PutObjectOutcome outcome) { + if (!outcome.IsSuccess()) { + return UploadUsingSingleRequestError(request, outcome); + } + return Status::OK(); + }; + + auto async_result_callback = [](const Aws::S3::Model::PutObjectRequest& request, + std::shared_ptr state, + int32_t part_number, + Aws::S3::Model::PutObjectOutcome outcome) { + HandleUploadUsingSingleRequestOutcome(state, request, outcome.GetResult()); + return Status::OK(); + }; + + Aws::S3::Model::PutObjectRequest req{}; + RETURN_NOT_OK(SetMetadataInRequest(&req)); + + return Upload( + std::move(req), std::move(sync_result_callback), std::move(async_result_callback), + data, nbytes, std::move(owned_buffer)); + } + + Status UploadPart(std::shared_ptr buffer) { + return UploadPart(buffer->data(), buffer->size(), buffer); + } + + static Status UploadPartError(const Aws::S3::Model::UploadPartRequest& request, + const Aws::S3::Model::UploadPartOutcome& outcome) { + return ErrorToStatus( + std::forward_as_tuple("When uploading part for key '", request.GetKey(), + "' in bucket '", request.GetBucket(), "': "), + "UploadPart", outcome.GetError()); + } + + Status UploadPart(const void* data, int64_t nbytes, + std::shared_ptr owned_buffer = nullptr) { + if (!IsMultipartCreated()) { + RETURN_NOT_OK(CreateMultipartUpload()); + } + + Aws::S3::Model::UploadPartRequest req{}; + req.SetPartNumber(part_number_); + req.SetUploadId(multipart_upload_id_); + + auto sync_result_callback = [](const Aws::S3::Model::UploadPartRequest& request, + std::shared_ptr state, + int32_t part_number, + Aws::S3::Model::UploadPartOutcome outcome) { + if (!outcome.IsSuccess()) { + return UploadPartError(request, outcome); + } else { + AddCompletedPart(state, part_number, outcome.GetResult()); + } + + return Status::OK(); + }; + + auto async_result_callback = [](const Aws::S3::Model::UploadPartRequest& request, + std::shared_ptr state, + int32_t part_number, + Aws::S3::Model::UploadPartOutcome outcome) { + HandleUploadPartOutcome(state, part_number, request, outcome.GetResult()); + return Status::OK(); + }; + + return Upload( + std::move(req), std::move(sync_result_callback), std::move(async_result_callback), + data, nbytes, std::move(owned_buffer)); + } + + static void HandleUploadUsingSingleRequestOutcome( + const std::shared_ptr& state, const S3Model::PutObjectRequest& req, + const Result& result) { + std::unique_lock lock(state->mutex); + if (!result.ok()) { + state->status &= result.status(); + } else { + const auto& outcome = *result; + if (!outcome.IsSuccess()) { + state->status &= UploadUsingSingleRequestError(req, outcome); + } + } + // GH-41862: avoid potential deadlock if the Future's callback is called + // with the mutex taken. + auto fut = state->pending_uploads_completed; + lock.unlock(); + fut.MarkFinished(state->status); + } + + static void HandleUploadPartOutcome(const std::shared_ptr& state, + int part_number, + const S3Model::UploadPartRequest& req, + const Result& result) { std::unique_lock lock(state->mutex); if (!result.ok()) { state->status &= result.status(); @@ -1895,10 +2116,10 @@ class ObjectOutputStream final : public io::OutputStream { } } // Notify completion - if (--state->parts_in_progress == 0) { + if (--state->uploads_in_progress == 0) { // GH-41862: avoid potential deadlock if the Future's callback is called // with the mutex taken. - auto fut = state->pending_parts_completed; + auto fut = state->pending_uploads_completed; lock.unlock(); // State could be mutated concurrently if another thread writes to the // stream, but in this case the Flush() call is only advisory anyway. @@ -1923,14 +2144,6 @@ class ObjectOutputStream final : public io::OutputStream { state->completed_parts[slot] = std::move(part); } - static Status UploadPartError(const S3Model::UploadPartRequest& req, - const S3Model::UploadPartOutcome& outcome) { - return ErrorToStatus( - std::forward_as_tuple("When uploading part for key '", req.GetKey(), - "' in bucket '", req.GetBucket(), "': "), - "UploadPart", outcome.GetError()); - } - protected: std::shared_ptr holder_; const io::IOContext io_context_; @@ -1938,8 +2151,9 @@ class ObjectOutputStream final : public io::OutputStream { const std::shared_ptr metadata_; const std::shared_ptr default_metadata_; const bool background_writes_; + const bool allow_delayed_open_; - Aws::String upload_id_; + Aws::String multipart_upload_id_; bool closed_ = true; int64_t pos_ = 0; int32_t part_number_ = 1; @@ -1950,10 +2164,11 @@ class ObjectOutputStream final : public io::OutputStream { // in the completion handler. struct UploadState { std::mutex mutex; + // Only populated for multi-part uploads. Aws::Vector completed_parts; - int64_t parts_in_progress = 0; + int64_t uploads_in_progress = 0; Status status; - Future<> pending_parts_completed = Future<>::MakeFinished(Status::OK()); + Future<> pending_uploads_completed = Future<>::MakeFinished(Status::OK()); }; std::shared_ptr upload_state_; }; diff --git a/cpp/src/arrow/filesystem/s3fs.h b/cpp/src/arrow/filesystem/s3fs.h index fbbe9d0b3f42b..85d5ff8fed553 100644 --- a/cpp/src/arrow/filesystem/s3fs.h +++ b/cpp/src/arrow/filesystem/s3fs.h @@ -177,6 +177,16 @@ struct ARROW_EXPORT S3Options { /// to be true to address these scenarios. bool check_directory_existence_before_creation = false; + /// Whether to allow file-open methods to return before the actual open. + /// + /// Enabling this may reduce the latency of `OpenInputStream`, `OpenOutputStream`, + /// and similar methods, by reducing the number of roundtrips necessary. It may also + /// allow usage of more efficient S3 APIs for small files. + /// The downside is that failure conditions such as attempting to open a file in a + /// non-existing bucket will only be reported when actual I/O is done (at worse, + /// when attempting to close the file). + bool allow_delayed_open = false; + /// \brief Default metadata for OpenOutputStream. /// /// This will be ignored if non-empty metadata is passed to OpenOutputStream. diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc index 5a160a78ceea0..c33fa4f5aac97 100644 --- a/cpp/src/arrow/filesystem/s3fs_test.cc +++ b/cpp/src/arrow/filesystem/s3fs_test.cc @@ -45,7 +45,9 @@ #include #include #include +#include #include +#include #include #include @@ -450,25 +452,8 @@ class TestS3FS : public S3TestMixin { req.SetBucket(ToAwsString("empty-bucket")); ASSERT_OK(OutcomeToStatus("CreateBucket", client_->CreateBucket(req))); } - { - Aws::S3::Model::PutObjectRequest req; - req.SetBucket(ToAwsString("bucket")); - req.SetKey(ToAwsString("emptydir/")); - req.SetBody(std::make_shared("")); - ASSERT_OK(OutcomeToStatus("PutObject", client_->PutObject(req))); - // NOTE: no need to create intermediate "directories" somedir/ and - // somedir/subdir/ - req.SetKey(ToAwsString("somedir/subdir/subfile")); - req.SetBody(std::make_shared("sub data")); - ASSERT_OK(OutcomeToStatus("PutObject", client_->PutObject(req))); - req.SetKey(ToAwsString("somefile")); - req.SetBody(std::make_shared("some data")); - req.SetContentType("x-arrow/test"); - ASSERT_OK(OutcomeToStatus("PutObject", client_->PutObject(req))); - req.SetKey(ToAwsString("otherdir/1/2/3/otherfile")); - req.SetBody(std::make_shared("other data")); - ASSERT_OK(OutcomeToStatus("PutObject", client_->PutObject(req))); - } + + ASSERT_OK(PopulateTestBucket()); } void TearDown() override { @@ -478,6 +463,72 @@ class TestS3FS : public S3TestMixin { S3TestMixin::TearDown(); } + Status PopulateTestBucket() { + Aws::S3::Model::PutObjectRequest req; + req.SetBucket(ToAwsString("bucket")); + req.SetKey(ToAwsString("emptydir/")); + req.SetBody(std::make_shared("")); + RETURN_NOT_OK(OutcomeToStatus("PutObject", client_->PutObject(req))); + // NOTE: no need to create intermediate "directories" somedir/ and + // somedir/subdir/ + req.SetKey(ToAwsString("somedir/subdir/subfile")); + req.SetBody(std::make_shared("sub data")); + RETURN_NOT_OK(OutcomeToStatus("PutObject", client_->PutObject(req))); + req.SetKey(ToAwsString("somefile")); + req.SetBody(std::make_shared("some data")); + req.SetContentType("x-arrow/test"); + RETURN_NOT_OK(OutcomeToStatus("PutObject", client_->PutObject(req))); + req.SetKey(ToAwsString("otherdir/1/2/3/otherfile")); + req.SetBody(std::make_shared("other data")); + RETURN_NOT_OK(OutcomeToStatus("PutObject", client_->PutObject(req))); + + return Status::OK(); + } + + Status RestoreTestBucket() { + // First empty the test bucket, and then re-upload initial test files. + + Aws::S3::Model::Delete delete_object; + { + // Mostly taken from + // https://github.com/awsdocs/aws-doc-sdk-examples/blob/main/cpp/example_code/s3/list_objects.cpp + Aws::S3::Model::ListObjectsV2Request req; + req.SetBucket(Aws::String{"bucket"}); + + Aws::String continuation_token; + do { + if (!continuation_token.empty()) { + req.SetContinuationToken(continuation_token); + } + + auto outcome = client_->ListObjectsV2(req); + + if (!outcome.IsSuccess()) { + return OutcomeToStatus("ListObjectsV2", outcome); + } else { + Aws::Vector objects = outcome.GetResult().GetContents(); + for (const auto& object : objects) { + delete_object.AddObjects( + Aws::S3::Model::ObjectIdentifier().WithKey(object.GetKey())); + } + + continuation_token = outcome.GetResult().GetNextContinuationToken(); + } + } while (!continuation_token.empty()); + } + + { + Aws::S3::Model::DeleteObjectsRequest req; + + req.SetDelete(std::move(delete_object)); + req.SetBucket(Aws::String{"bucket"}); + + RETURN_NOT_OK(OutcomeToStatus("DeleteObjects", client_->DeleteObjects(req))); + } + + return PopulateTestBucket(); + } + Result> MakeNewFileSystem( io::IOContext io_context = io::default_io_context()) { options_.ConfigureAccessKey(minio_->access_key(), minio_->secret_key()); @@ -518,11 +569,13 @@ class TestS3FS : public S3TestMixin { AssertFileInfo(infos[11], "empty-bucket", FileType::Directory); } - void TestOpenOutputStream() { + void TestOpenOutputStream(bool allow_delayed_open) { std::shared_ptr stream; - // Nonexistent - ASSERT_RAISES(IOError, fs_->OpenOutputStream("nonexistent-bucket/somefile")); + if (!allow_delayed_open) { + // Nonexistent + ASSERT_RAISES(IOError, fs_->OpenOutputStream("nonexistent-bucket/somefile")); + } // URI ASSERT_RAISES(Invalid, fs_->OpenOutputStream("s3:bucket/newfile1")); @@ -843,8 +896,8 @@ TEST_F(TestS3FS, GetFileInfoGenerator) { TEST_F(TestS3FS, GetFileInfoGeneratorStress) { // This test is slow because it needs to create a bunch of seed files. However, it is - // the only test that stresses listing and deleting when there are more than 1000 files - // and paging is required. + // the only test that stresses listing and deleting when there are more than 1000 + // files and paging is required. constexpr int32_t kNumDirs = 4; constexpr int32_t kNumFilesPerDir = 512; FileInfoVector expected_infos; @@ -1235,50 +1288,83 @@ TEST_F(TestS3FS, OpenInputFile) { ASSERT_RAISES(IOError, file->Seek(10)); } -TEST_F(TestS3FS, OpenOutputStreamBackgroundWrites) { TestOpenOutputStream(); } +struct S3OptionsTestParameters { + bool background_writes{false}; + bool allow_delayed_open{false}; -TEST_F(TestS3FS, OpenOutputStreamSyncWrites) { - options_.background_writes = false; - MakeFileSystem(); - TestOpenOutputStream(); -} + void ApplyToS3Options(S3Options* options) const { + options->background_writes = background_writes; + options->allow_delayed_open = allow_delayed_open; + } -TEST_F(TestS3FS, OpenOutputStreamAbortBackgroundWrites) { TestOpenOutputStreamAbort(); } + static std::vector GetCartesianProduct() { + return { + S3OptionsTestParameters{true, false}, + S3OptionsTestParameters{false, false}, + S3OptionsTestParameters{true, true}, + S3OptionsTestParameters{false, true}, + }; + } -TEST_F(TestS3FS, OpenOutputStreamAbortSyncWrites) { - options_.background_writes = false; - MakeFileSystem(); - TestOpenOutputStreamAbort(); -} + std::string ToString() const { + return std::string("background_writes = ") + (background_writes ? "true" : "false") + + ", allow_delayed_open = " + (allow_delayed_open ? "true" : "false"); + } +}; + +TEST_F(TestS3FS, OpenOutputStream) { + for (const auto& combination : S3OptionsTestParameters::GetCartesianProduct()) { + ARROW_SCOPED_TRACE(combination.ToString()); -TEST_F(TestS3FS, OpenOutputStreamDestructorBackgroundWrites) { - TestOpenOutputStreamDestructor(); + combination.ApplyToS3Options(&options_); + MakeFileSystem(); + TestOpenOutputStream(combination.allow_delayed_open); + ASSERT_OK(RestoreTestBucket()); + } } -TEST_F(TestS3FS, OpenOutputStreamDestructorSyncWrite) { - options_.background_writes = false; - MakeFileSystem(); - TestOpenOutputStreamDestructor(); +TEST_F(TestS3FS, OpenOutputStreamAbort) { + for (const auto& combination : S3OptionsTestParameters::GetCartesianProduct()) { + ARROW_SCOPED_TRACE(combination.ToString()); + + combination.ApplyToS3Options(&options_); + MakeFileSystem(); + TestOpenOutputStreamAbort(); + ASSERT_OK(RestoreTestBucket()); + } } -TEST_F(TestS3FS, OpenOutputStreamAsyncDestructorBackgroundWrites) { - TestOpenOutputStreamCloseAsyncDestructor(); +TEST_F(TestS3FS, OpenOutputStreamDestructor) { + for (const auto& combination : S3OptionsTestParameters::GetCartesianProduct()) { + ARROW_SCOPED_TRACE(combination.ToString()); + + combination.ApplyToS3Options(&options_); + MakeFileSystem(); + TestOpenOutputStreamDestructor(); + ASSERT_OK(RestoreTestBucket()); + } } -TEST_F(TestS3FS, OpenOutputStreamAsyncDestructorSyncWrite) { - options_.background_writes = false; - MakeFileSystem(); - TestOpenOutputStreamCloseAsyncDestructor(); +TEST_F(TestS3FS, OpenOutputStreamAsync) { + for (const auto& combination : S3OptionsTestParameters::GetCartesianProduct()) { + ARROW_SCOPED_TRACE(combination.ToString()); + + combination.ApplyToS3Options(&options_); + MakeFileSystem(); + TestOpenOutputStreamCloseAsyncDestructor(); + } } TEST_F(TestS3FS, OpenOutputStreamCloseAsyncFutureDeadlockBackgroundWrites) { TestOpenOutputStreamCloseAsyncFutureDeadlock(); + ASSERT_OK(RestoreTestBucket()); } TEST_F(TestS3FS, OpenOutputStreamCloseAsyncFutureDeadlockSyncWrite) { options_.background_writes = false; MakeFileSystem(); TestOpenOutputStreamCloseAsyncFutureDeadlock(); + ASSERT_OK(RestoreTestBucket()); } TEST_F(TestS3FS, OpenOutputStreamMetadata) { @@ -1396,8 +1482,8 @@ TEST_F(TestS3FS, CustomRetryStrategy) { auto retry_strategy = std::make_shared(); options_.retry_strategy = retry_strategy; MakeFileSystem(); - // Attempt to open file that doesn't exist. Should hit TestRetryStrategy::ShouldRetry() - // 3 times before bubbling back up here. + // Attempt to open file that doesn't exist. Should hit + // TestRetryStrategy::ShouldRetry() 3 times before bubbling back up here. ASSERT_RAISES(IOError, fs_->OpenInputStream("nonexistent-bucket/somefile")); ASSERT_EQ(retry_strategy->GetErrorsEncountered().size(), 3); for (const auto& error : retry_strategy->GetErrorsEncountered()) { diff --git a/cpp/src/arrow/flight/client.cc b/cpp/src/arrow/flight/client.cc index 58a3ba4ab83e5..d0aee8ab9b3d2 100644 --- a/cpp/src/arrow/flight/client.cc +++ b/cpp/src/arrow/flight/client.cc @@ -584,8 +584,8 @@ arrow::Result> FlightClient::DoAction( arrow::Result FlightClient::CancelFlightInfo( const FlightCallOptions& options, const CancelFlightInfoRequest& request) { - ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToString()); - Action action{ActionType::kCancelFlightInfo.type, Buffer::FromString(body)}; + ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToBuffer()); + Action action{ActionType::kCancelFlightInfo.type, std::move(body)}; ARROW_ASSIGN_OR_RAISE(auto stream, DoAction(options, action)); ARROW_ASSIGN_OR_RAISE(auto result, stream->Next()); ARROW_ASSIGN_OR_RAISE(auto cancel_result, CancelFlightInfoResult::Deserialize( @@ -596,8 +596,8 @@ arrow::Result FlightClient::CancelFlightInfo( arrow::Result FlightClient::RenewFlightEndpoint( const FlightCallOptions& options, const RenewFlightEndpointRequest& request) { - ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToString()); - Action action{ActionType::kRenewFlightEndpoint.type, Buffer::FromString(body)}; + ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToBuffer()); + Action action{ActionType::kRenewFlightEndpoint.type, std::move(body)}; ARROW_ASSIGN_OR_RAISE(auto stream, DoAction(options, action)); ARROW_ASSIGN_OR_RAISE(auto result, stream->Next()); ARROW_ASSIGN_OR_RAISE(auto renewed_endpoint, @@ -716,8 +716,8 @@ arrow::Result FlightClient::DoExchange( ::arrow::Result FlightClient::SetSessionOptions( const FlightCallOptions& options, const SetSessionOptionsRequest& request) { RETURN_NOT_OK(CheckOpen()); - ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToString()); - Action action{ActionType::kSetSessionOptions.type, Buffer::FromString(body)}; + ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToBuffer()); + Action action{ActionType::kSetSessionOptions.type, std::move(body)}; ARROW_ASSIGN_OR_RAISE(auto stream, DoAction(options, action)); ARROW_ASSIGN_OR_RAISE(auto result, stream->Next()); ARROW_ASSIGN_OR_RAISE( @@ -730,8 +730,8 @@ ::arrow::Result FlightClient::SetSessionOptions( ::arrow::Result FlightClient::GetSessionOptions( const FlightCallOptions& options, const GetSessionOptionsRequest& request) { RETURN_NOT_OK(CheckOpen()); - ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToString()); - Action action{ActionType::kGetSessionOptions.type, Buffer::FromString(body)}; + ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToBuffer()); + Action action{ActionType::kGetSessionOptions.type, std::move(body)}; ARROW_ASSIGN_OR_RAISE(auto stream, DoAction(options, action)); ARROW_ASSIGN_OR_RAISE(auto result, stream->Next()); ARROW_ASSIGN_OR_RAISE( @@ -744,8 +744,8 @@ ::arrow::Result FlightClient::GetSessionOptions( ::arrow::Result FlightClient::CloseSession( const FlightCallOptions& options, const CloseSessionRequest& request) { RETURN_NOT_OK(CheckOpen()); - ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToString()); - Action action{ActionType::kCloseSession.type, Buffer::FromString(body)}; + ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToBuffer()); + Action action{ActionType::kCloseSession.type, std::move(body)}; ARROW_ASSIGN_OR_RAISE(auto stream, DoAction(options, action)); ARROW_ASSIGN_OR_RAISE(auto result, stream->Next()); ARROW_ASSIGN_OR_RAISE(auto close_session_result, diff --git a/cpp/src/arrow/flight/flight_internals_test.cc b/cpp/src/arrow/flight/flight_internals_test.cc index 57f4f3e030420..caab357ef8f4a 100644 --- a/cpp/src/arrow/flight/flight_internals_test.cc +++ b/cpp/src/arrow/flight/flight_internals_test.cc @@ -79,8 +79,9 @@ void TestRoundtrip(const std::vector& values, ASSERT_OK(internal::ToProto(values[i], &pb_value)); if constexpr (std::is_same_v) { - ASSERT_OK_AND_ASSIGN(FlightInfo value, internal::FromProto(pb_value)); - EXPECT_EQ(values[i], value); + FlightInfo::Data info_data; + ASSERT_OK(internal::FromProto(pb_value, &info_data)); + EXPECT_EQ(values[i], FlightInfo{std::move(info_data)}); } else if constexpr (std::is_same_v) { std::string data; ASSERT_OK(internal::FromProto(pb_value, &data)); @@ -152,9 +153,11 @@ TEST(FlightTypes, BasicAuth) { } TEST(FlightTypes, Criteria) { - std::vector values = {{""}, {"criteria"}}; - std::vector reprs = {"", - ""}; + std::vector values = {Criteria{""}, Criteria{"criteria"}}; + std::vector reprs = { + "", + "", + }; ASSERT_NO_FATAL_FAILURE(TestRoundtrip(values, reprs)); } @@ -191,14 +194,14 @@ TEST(FlightTypes, FlightEndpoint) { Timestamp expiration_time( std::chrono::duration_cast(expiration_time_duration)); std::vector values = { - {{""}, {}, std::nullopt, {}}, - {{"foo"}, {}, std::nullopt, {}}, - {{"bar"}, {}, std::nullopt, {"\xDE\xAD\xBE\xEF"}}, - {{"foo"}, {}, expiration_time, {}}, - {{"foo"}, {location1}, std::nullopt, {}}, - {{"bar"}, {location1}, std::nullopt, {}}, - {{"foo"}, {location2}, std::nullopt, {}}, - {{"foo"}, {location1, location2}, std::nullopt, {"\xba\xdd\xca\xfe"}}, + {Ticket{""}, {}, std::nullopt, {}}, + {Ticket{"foo"}, {}, std::nullopt, {}}, + {Ticket{"bar"}, {}, std::nullopt, {"\xDE\xAD\xBE\xEF"}}, + {Ticket{"foo"}, {}, expiration_time, {}}, + {Ticket{"foo"}, {location1}, std::nullopt, {}}, + {Ticket{"bar"}, {location1}, std::nullopt, {}}, + {Ticket{"foo"}, {location2}, std::nullopt, {}}, + {Ticket{"foo"}, {location1, location2}, std::nullopt, {"\xba\xdd\xca\xfe"}}, }; std::vector reprs = { " locations=[] " @@ -299,9 +302,9 @@ TEST(FlightTypes, PollInfo) { TEST(FlightTypes, Result) { std::vector values = { - {Buffer::FromString("")}, - {Buffer::FromString("foo")}, - {Buffer::FromString("bar")}, + Result{Buffer::FromString("")}, + Result{Buffer::FromString("foo")}, + Result{Buffer::FromString("bar")}, }; std::vector reprs = { "", @@ -333,9 +336,9 @@ TEST(FlightTypes, SchemaResult) { TEST(FlightTypes, Ticket) { std::vector values = { - {""}, - {"foo"}, - {"bar"}, + Ticket{""}, + Ticket{"foo"}, + Ticket{"bar"}, }; std::vector reprs = { "", diff --git a/cpp/src/arrow/flight/flight_test.cc b/cpp/src/arrow/flight/flight_test.cc index e179f3406d65e..101bb06b21288 100644 --- a/cpp/src/arrow/flight/flight_test.cc +++ b/cpp/src/arrow/flight/flight_test.cc @@ -998,7 +998,8 @@ TEST_F(TestFlightClient, ListFlights) { } TEST_F(TestFlightClient, ListFlightsWithCriteria) { - ASSERT_OK_AND_ASSIGN(auto listing, client_->ListFlights(FlightCallOptions(), {"foo"})); + ASSERT_OK_AND_ASSIGN(auto listing, + client_->ListFlights(FlightCallOptions{}, Criteria{"foo"})); std::unique_ptr info; ASSERT_OK_AND_ASSIGN(info, listing->Next()); ASSERT_TRUE(info == nullptr); diff --git a/cpp/src/arrow/flight/serialization_internal.cc b/cpp/src/arrow/flight/serialization_internal.cc index 10600d055b3a8..fedfc7d5cd590 100644 --- a/cpp/src/arrow/flight/serialization_internal.cc +++ b/cpp/src/arrow/flight/serialization_internal.cc @@ -251,22 +251,28 @@ Status ToProto(const FlightDescriptor& descriptor, pb::FlightDescriptor* pb_desc // FlightInfo -arrow::Result FromProto(const pb::FlightInfo& pb_info) { - FlightInfo::Data info; - RETURN_NOT_OK(FromProto(pb_info.flight_descriptor(), &info.descriptor)); +Status FromProto(const pb::FlightInfo& pb_info, FlightInfo::Data* info) { + RETURN_NOT_OK(FromProto(pb_info.flight_descriptor(), &info->descriptor)); - info.schema = pb_info.schema(); + info->schema = pb_info.schema(); - info.endpoints.resize(pb_info.endpoint_size()); + info->endpoints.resize(pb_info.endpoint_size()); for (int i = 0; i < pb_info.endpoint_size(); ++i) { - RETURN_NOT_OK(FromProto(pb_info.endpoint(i), &info.endpoints[i])); + RETURN_NOT_OK(FromProto(pb_info.endpoint(i), &info->endpoints[i])); } - info.total_records = pb_info.total_records(); - info.total_bytes = pb_info.total_bytes(); - info.ordered = pb_info.ordered(); - info.app_metadata = pb_info.app_metadata(); - return FlightInfo(std::move(info)); + info->total_records = pb_info.total_records(); + info->total_bytes = pb_info.total_bytes(); + info->ordered = pb_info.ordered(); + info->app_metadata = pb_info.app_metadata(); + return Status::OK(); +} + +Status FromProto(const pb::FlightInfo& pb_info, std::unique_ptr* info) { + FlightInfo::Data info_data; + RETURN_NOT_OK(FromProto(pb_info, &info_data)); + *info = std::make_unique(std::move(info_data)); + return Status::OK(); } Status FromProto(const pb::BasicAuth& pb_basic_auth, BasicAuth* basic_auth) { @@ -315,8 +321,9 @@ Status ToProto(const FlightInfo& info, pb::FlightInfo* pb_info) { Status FromProto(const pb::PollInfo& pb_info, PollInfo* info) { if (pb_info.has_info()) { - ARROW_ASSIGN_OR_RAISE(auto flight_info, FromProto(pb_info.info())); - info->info = std::make_unique(std::move(flight_info)); + FlightInfo::Data info_data; + RETURN_NOT_OK(FromProto(pb_info.info(), &info_data)); + info->info = std::make_unique(std::move(info_data)); } if (pb_info.has_flight_descriptor()) { FlightDescriptor descriptor; @@ -340,6 +347,13 @@ Status FromProto(const pb::PollInfo& pb_info, PollInfo* info) { return Status::OK(); } +Status FromProto(const pb::PollInfo& pb_info, std::unique_ptr* info) { + PollInfo poll_info; + RETURN_NOT_OK(FromProto(pb_info, &poll_info)); + *info = std::make_unique(std::move(poll_info)); + return Status::OK(); +} + Status ToProto(const PollInfo& info, pb::PollInfo* pb_info) { if (info.info) { RETURN_NOT_OK(ToProto(*info.info, pb_info->mutable_info())); @@ -360,8 +374,9 @@ Status ToProto(const PollInfo& info, pb::PollInfo* pb_info) { Status FromProto(const pb::CancelFlightInfoRequest& pb_request, CancelFlightInfoRequest* request) { - ARROW_ASSIGN_OR_RAISE(FlightInfo info, FromProto(pb_request.info())); - request->info = std::make_unique(std::move(info)); + FlightInfo::Data info_data; + RETURN_NOT_OK(FromProto(pb_request.info(), &info_data)); + request->info = std::make_unique(std::move(info_data)); return Status::OK(); } diff --git a/cpp/src/arrow/flight/serialization_internal.h b/cpp/src/arrow/flight/serialization_internal.h index 90dde87d3a5eb..9922cb61ac004 100644 --- a/cpp/src/arrow/flight/serialization_internal.h +++ b/cpp/src/arrow/flight/serialization_internal.h @@ -60,8 +60,10 @@ Status FromProto(const pb::FlightDescriptor& pb_descr, FlightDescriptor* descr); Status FromProto(const pb::FlightEndpoint& pb_endpoint, FlightEndpoint* endpoint); Status FromProto(const pb::RenewFlightEndpointRequest& pb_request, RenewFlightEndpointRequest* request); -arrow::Result FromProto(const pb::FlightInfo& pb_info); +Status FromProto(const pb::FlightInfo& pb_info, FlightInfo::Data* info); +Status FromProto(const pb::FlightInfo& pb_info, std::unique_ptr* info); Status FromProto(const pb::PollInfo& pb_info, PollInfo* info); +Status FromProto(const pb::PollInfo& pb_info, std::unique_ptr* info); Status FromProto(const pb::CancelFlightInfoRequest& pb_request, CancelFlightInfoRequest* request); Status FromProto(const pb::SchemaResult& pb_result, std::string* result); @@ -92,6 +94,7 @@ Status ToProto(const Result& result, pb::Result* pb_result); Status ToProto(const CancelFlightInfoResult& result, pb::CancelFlightInfoResult* pb_result); Status ToProto(const Criteria& criteria, pb::Criteria* pb_criteria); +Status ToProto(const Location& location, pb::Location* pb_location); Status ToProto(const SchemaResult& result, pb::SchemaResult* pb_result); Status ToProto(const Ticket& ticket, pb::Ticket* pb_ticket); Status ToProto(const BasicAuth& basic_auth, pb::BasicAuth* pb_basic_auth); diff --git a/cpp/src/arrow/flight/sql/example/sqlite_server.cc b/cpp/src/arrow/flight/sql/example/sqlite_server.cc index 20b234e90ad3b..0651e6111c25d 100644 --- a/cpp/src/arrow/flight/sql/example/sqlite_server.cc +++ b/cpp/src/arrow/flight/sql/example/sqlite_server.cc @@ -126,7 +126,7 @@ arrow::Result> DoGetSQLiteQuery( arrow::Result> GetFlightInfoForCommand( const FlightDescriptor& descriptor, const std::shared_ptr& schema) { std::vector endpoints{ - FlightEndpoint{{descriptor.cmd}, {}, std::nullopt, ""}}; + FlightEndpoint{Ticket{descriptor.cmd}, {}, std::nullopt, ""}}; ARROW_ASSIGN_OR_RAISE(auto result, FlightInfo::Make(*schema, descriptor, endpoints, -1, -1, false)) @@ -389,7 +389,7 @@ class SQLiteFlightSqlServer::Impl { const ServerCallContext& context, const GetTables& command, const FlightDescriptor& descriptor) { std::vector endpoints{ - FlightEndpoint{{descriptor.cmd}, {}, std::nullopt, ""}}; + FlightEndpoint{Ticket{descriptor.cmd}, {}, std::nullopt, ""}}; bool include_schema = command.include_schema; ARROW_LOG(INFO) << "GetTables include_schema=" << include_schema; diff --git a/cpp/src/arrow/flight/sql/server.cc b/cpp/src/arrow/flight/sql/server.cc index 63d1f5c5225fa..ac89976690877 100644 --- a/cpp/src/arrow/flight/sql/server.cc +++ b/cpp/src/arrow/flight/sql/server.cc @@ -477,13 +477,11 @@ arrow::Result PackActionResult(ActionBeginTransactionResult result) { } arrow::Result PackActionResult(CancelFlightInfoResult result) { - ARROW_ASSIGN_OR_RAISE(auto serialized, result.SerializeToString()); - return Result{Buffer::FromString(std::move(serialized))}; + return result.SerializeToBuffer(); } arrow::Result PackActionResult(const FlightEndpoint& endpoint) { - ARROW_ASSIGN_OR_RAISE(auto serialized, endpoint.SerializeToString()); - return Result{Buffer::FromString(std::move(serialized))}; + return endpoint.SerializeToBuffer(); } arrow::Result PackActionResult(CancelResult result) { @@ -525,21 +523,6 @@ arrow::Result PackActionResult(ActionCreatePreparedStatementResult resul return PackActionResult(pb_result); } -arrow::Result PackActionResult(SetSessionOptionsResult result) { - ARROW_ASSIGN_OR_RAISE(auto serialized, result.SerializeToString()); - return Result{Buffer::FromString(std::move(serialized))}; -} - -arrow::Result PackActionResult(GetSessionOptionsResult result) { - ARROW_ASSIGN_OR_RAISE(auto serialized, result.SerializeToString()); - return Result{Buffer::FromString(std::move(serialized))}; -} - -arrow::Result PackActionResult(CloseSessionResult result) { - ARROW_ASSIGN_OR_RAISE(auto serialized, result.SerializeToString()); - return Result{Buffer::FromString(std::move(serialized))}; -} - } // namespace arrow::Result StatementQueryTicket::Deserialize( @@ -908,23 +891,23 @@ Status FlightSqlServerBase::DoAction(const ServerCallContext& context, std::string_view body(*action.body); ARROW_ASSIGN_OR_RAISE(auto request, SetSessionOptionsRequest::Deserialize(body)); ARROW_ASSIGN_OR_RAISE(auto result, SetSessionOptions(context, request)); - ARROW_ASSIGN_OR_RAISE(auto packed_result, PackActionResult(std::move(result))); + ARROW_ASSIGN_OR_RAISE(auto packed_result, result.SerializeToBuffer()); - results.push_back(std::move(packed_result)); + results.emplace_back(std::move(packed_result)); } else if (action.type == ActionType::kGetSessionOptions.type) { std::string_view body(*action.body); ARROW_ASSIGN_OR_RAISE(auto request, GetSessionOptionsRequest::Deserialize(body)); ARROW_ASSIGN_OR_RAISE(auto result, GetSessionOptions(context, request)); - ARROW_ASSIGN_OR_RAISE(auto packed_result, PackActionResult(std::move(result))); + ARROW_ASSIGN_OR_RAISE(auto packed_result, result.SerializeToBuffer()); - results.push_back(std::move(packed_result)); + results.emplace_back(std::move(packed_result)); } else if (action.type == ActionType::kCloseSession.type) { std::string_view body(*action.body); ARROW_ASSIGN_OR_RAISE(auto request, CloseSessionRequest::Deserialize(body)); ARROW_ASSIGN_OR_RAISE(auto result, CloseSession(context, request)); - ARROW_ASSIGN_OR_RAISE(auto packed_result, PackActionResult(std::move(result))); + ARROW_ASSIGN_OR_RAISE(auto packed_result, result.SerializeToBuffer()); - results.push_back(std::move(packed_result)); + results.emplace_back(std::move(packed_result)); } else { google::protobuf::Any any; if (!any.ParseFromArray(action.body->data(), static_cast(action.body->size()))) { @@ -1063,7 +1046,7 @@ arrow::Result> FlightSqlServerBase::GetFlightInfoSql } std::vector endpoints{ - FlightEndpoint{{descriptor.cmd}, {}, std::nullopt, {}}}; + FlightEndpoint{Ticket{descriptor.cmd}, {}, std::nullopt, {}}}; ARROW_ASSIGN_OR_RAISE( auto result, FlightInfo::Make(*SqlSchema::GetSqlInfoSchema(), descriptor, endpoints, -1, -1, false)) diff --git a/cpp/src/arrow/flight/test_util.cc b/cpp/src/arrow/flight/test_util.cc index bf2f4c2b4effc..8b4245e74e843 100644 --- a/cpp/src/arrow/flight/test_util.cc +++ b/cpp/src/arrow/flight/test_util.cc @@ -604,11 +604,11 @@ std::vector ExampleFlightInfo() { Location location4 = *Location::ForGrpcTcp("foo4.bar.com", 12345); Location location5 = *Location::ForGrpcTcp("foo5.bar.com", 12345); - FlightEndpoint endpoint1({{"ticket-ints-1"}, {location1}, std::nullopt, {}}); - FlightEndpoint endpoint2({{"ticket-ints-2"}, {location2}, std::nullopt, {}}); - FlightEndpoint endpoint3({{"ticket-cmd"}, {location3}, std::nullopt, {}}); - FlightEndpoint endpoint4({{"ticket-dicts-1"}, {location4}, std::nullopt, {}}); - FlightEndpoint endpoint5({{"ticket-floats-1"}, {location5}, std::nullopt, {}}); + FlightEndpoint endpoint1({Ticket{"ticket-ints-1"}, {location1}, std::nullopt, {}}); + FlightEndpoint endpoint2({Ticket{"ticket-ints-2"}, {location2}, std::nullopt, {}}); + FlightEndpoint endpoint3({Ticket{"ticket-cmd"}, {location3}, std::nullopt, {}}); + FlightEndpoint endpoint4({Ticket{"ticket-dicts-1"}, {location4}, std::nullopt, {}}); + FlightEndpoint endpoint5({Ticket{"ticket-floats-1"}, {location5}, std::nullopt, {}}); FlightDescriptor descr1{FlightDescriptor::PATH, "", {"examples", "ints"}}; FlightDescriptor descr2{FlightDescriptor::CMD, "my_command", {}}; diff --git a/cpp/src/arrow/flight/transport/grpc/grpc_client.cc b/cpp/src/arrow/flight/transport/grpc/grpc_client.cc index f799ba761c40d..6d8d40c2ebcf8 100644 --- a/cpp/src/arrow/flight/transport/grpc/grpc_client.cc +++ b/cpp/src/arrow/flight/transport/grpc/grpc_client.cc @@ -648,10 +648,10 @@ class UnaryUnaryAsyncCall : public ::grpc::ClientUnaryReactor, public internal:: void OnDone(const ::grpc::Status& status) override { if (status.ok()) { - auto result = internal::FromProto(pb_response); - client_status = result.status(); + FlightInfo::Data info_data; + client_status = internal::FromProto(pb_response, &info_data); if (client_status.ok()) { - listener->OnNext(std::move(result).MoveValueUnsafe()); + listener->OnNext(FlightInfo{std::move(info_data)}); } } Finish(status); @@ -889,7 +889,8 @@ class GrpcClientImpl : public internal::ClientTransport { pb::FlightInfo pb_info; while (!options.stop_token.IsStopRequested() && stream->Read(&pb_info)) { - ARROW_ASSIGN_OR_RAISE(FlightInfo info_data, internal::FromProto(pb_info)); + FlightInfo::Data info_data; + RETURN_NOT_OK(internal::FromProto(pb_info, &info_data)); flights.emplace_back(std::move(info_data)); } if (options.stop_token.IsStopRequested()) rpc.context.TryCancel(); @@ -939,7 +940,8 @@ class GrpcClientImpl : public internal::ClientTransport { stub_->GetFlightInfo(&rpc.context, pb_descriptor, &pb_response), &rpc.context); RETURN_NOT_OK(s); - ARROW_ASSIGN_OR_RAISE(auto info_data, internal::FromProto(pb_response)); + FlightInfo::Data info_data; + RETURN_NOT_OK(internal::FromProto(pb_response, &info_data)); *info = std::make_unique(std::move(info_data)); return Status::OK(); } diff --git a/cpp/src/arrow/flight/transport/ucx/ucx_client.cc b/cpp/src/arrow/flight/transport/ucx/ucx_client.cc index 32c2fd776f32b..946ac2d176203 100644 --- a/cpp/src/arrow/flight/transport/ucx/ucx_client.cc +++ b/cpp/src/arrow/flight/transport/ucx/ucx_client.cc @@ -97,7 +97,7 @@ class ClientConnection { ucp_worker_params_t worker_params; std::memset(&worker_params, 0, sizeof(worker_params)); worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; - worker_params.thread_mode = UCS_THREAD_MODE_SERIALIZED; + worker_params.thread_mode = UCS_THREAD_MODE_MULTI; ucp_worker_h ucp_worker; status = ucp_worker_create(ucp_context->get(), &worker_params, &ucp_worker); diff --git a/cpp/src/arrow/flight/transport/ucx/ucx_server.cc b/cpp/src/arrow/flight/transport/ucx/ucx_server.cc index cb9c8948ccf1e..55ff138348812 100644 --- a/cpp/src/arrow/flight/transport/ucx/ucx_server.cc +++ b/cpp/src/arrow/flight/transport/ucx/ucx_server.cc @@ -376,7 +376,7 @@ class UcxServerImpl : public arrow::flight::internal::ServerTransport { std::unique_ptr info; std::string response; SERVER_RETURN_NOT_OK(driver, base_->GetFlightInfo(context, descriptor, &info)); - SERVER_RETURN_NOT_OK(driver, info->SerializeToString().Value(&response)); + SERVER_RETURN_NOT_OK(driver, info->DoSerializeToString(&response)); RETURN_NOT_OK(driver->SendFrame(FrameType::kBuffer, reinterpret_cast(response.data()), static_cast(response.size()))); @@ -397,7 +397,7 @@ class UcxServerImpl : public arrow::flight::internal::ServerTransport { std::unique_ptr info; std::string response; SERVER_RETURN_NOT_OK(driver, base_->PollFlightInfo(context, descriptor, &info)); - SERVER_RETURN_NOT_OK(driver, info->SerializeToString().Value(&response)); + SERVER_RETURN_NOT_OK(driver, info->DoSerializeToString(&response)); RETURN_NOT_OK(driver->SendFrame(FrameType::kBuffer, reinterpret_cast(response.data()), static_cast(response.size()))); diff --git a/cpp/src/arrow/flight/types.cc b/cpp/src/arrow/flight/types.cc index 39b59f65d9cfb..bb5932a312567 100644 --- a/cpp/src/arrow/flight/types.cc +++ b/cpp/src/arrow/flight/types.cc @@ -41,6 +41,60 @@ namespace arrow { namespace flight { +namespace { + +ARROW_NOINLINE +Status ProtoStringInputTooBig(const char* name) { + return Status::Invalid("Serialized ", name, " size should not exceed 2 GiB"); +} + +ARROW_NOINLINE +Status ProtoStringOutputTooBig(const char* name) { + return Status::Invalid("Serialized ", name, " exceeded 2 GiB limit"); +} + +ARROW_NOINLINE +Status InvalidProtoString(const char* name) { + return Status::Invalid("Not a valid ", name); +} + +// Status-returning ser/de functions that allow reuse of the same output objects + +template +Status ParseFromString(const char* name, std::string_view serialized, PBType* out) { + if (serialized.size() > static_cast(std::numeric_limits::max())) { + return ProtoStringInputTooBig(name); + } + if (!out->ParseFromArray(serialized.data(), static_cast(serialized.size()))) { + return InvalidProtoString(name); + } + return Status::OK(); +} + +template +Status SerializeToString(const char* name, const T& in, PBType* out_pb, + std::string* out) { + RETURN_NOT_OK(internal::ToProto(in, out_pb)); + return out_pb->SerializeToString(out) ? Status::OK() : ProtoStringOutputTooBig(name); +} + +// Result-returning ser/de functions (more convenient) + +template +arrow::Status DeserializeProtoString(const char* name, std::string_view serialized, + T* out) { + PBType pb; + RETURN_NOT_OK(ParseFromString(name, serialized, &pb)); + return internal::FromProto(pb, out); +} + +template +Status SerializeToProtoString(const char* name, const T& in, std::string* out) { + PBType pb; + return SerializeToString(name, in, &pb, out); +} + +} // namespace const char* kSchemeGrpc = "grpc"; const char* kSchemeGrpcTcp = "grpc+tcp"; @@ -97,18 +151,57 @@ Status MakeFlightError(FlightStatusCode code, std::string message, std::make_shared(code, std::move(extra_info))); } -bool FlightDescriptor::Equals(const FlightDescriptor& other) const { - if (type != other.type) { - return false; +static std::ostream& operator<<(std::ostream& os, std::vector values) { + os << '['; + std::string sep = ""; + for (const auto& v : values) { + os << sep << std::quoted(v); + sep = ", "; } - switch (type) { - case PATH: - return path == other.path; - case CMD: - return cmd == other.cmd; - default: - return false; + os << ']'; + + return os; +} + +template +static std::ostream& operator<<(std::ostream& os, std::map m) { + os << '{'; + std::string sep = ""; + if constexpr (std::is_convertible_v) { + // std::string, char*, std::string_view + for (const auto& [k, v] : m) { + os << sep << '[' << k << "]: " << std::quoted(v) << '"'; + sep = ", "; + } + } else { + for (const auto& [k, v] : m) { + os << sep << '[' << k << "]: " << v; + sep = ", "; + } } + os << '}'; + + return os; +} + +//------------------------------------------------------------ +// Wrapper types for Flight RPC protobuf messages + +std::string BasicAuth::ToString() const { + return arrow::util::StringBuilder(""); +} + +bool BasicAuth::Equals(const BasicAuth& other) const { + return (username == other.username) && (password == other.password); +} + +arrow::Status BasicAuth::Deserialize(std::string_view serialized, BasicAuth* out) { + return DeserializeProtoString("BasicAuth", serialized, out); +} + +arrow::Status BasicAuth::SerializeToString(std::string* out) const { + return SerializeToProtoString("BasicAuth", *this, out); } std::string FlightDescriptor::ToString() const { @@ -138,124 +231,28 @@ std::string FlightDescriptor::ToString() const { return ss.str(); } -Status FlightPayload::Validate() const { - static constexpr int64_t kInt32Max = std::numeric_limits::max(); - if (descriptor && descriptor->size() > kInt32Max) { - return Status::CapacityError("Descriptor size overflow (>= 2**31)"); - } - if (app_metadata && app_metadata->size() > kInt32Max) { - return Status::CapacityError("app_metadata size overflow (>= 2**31)"); - } - if (ipc_message.body_length > kInt32Max) { - return Status::Invalid("Cannot send record batches exceeding 2GiB yet"); - } - return Status::OK(); -} - -arrow::Result> SchemaResult::GetSchema( - ipc::DictionaryMemo* dictionary_memo) const { - // Create a non-owned Buffer to avoid copying - io::BufferReader schema_reader(std::make_shared(raw_schema_)); - return ipc::ReadSchema(&schema_reader, dictionary_memo); -} - -arrow::Result> SchemaResult::Make(const Schema& schema) { - std::string schema_in; - RETURN_NOT_OK(internal::SchemaToString(schema, &schema_in)); - return std::make_unique(std::move(schema_in)); -} - -std::string SchemaResult::ToString() const { - return ""; -} - -bool SchemaResult::Equals(const SchemaResult& other) const { - return raw_schema_ == other.raw_schema_; -} - -arrow::Result SchemaResult::SerializeToString() const { - pb::SchemaResult pb_schema_result; - RETURN_NOT_OK(internal::ToProto(*this, &pb_schema_result)); - - std::string out; - if (!pb_schema_result.SerializeToString(&out)) { - return Status::IOError("Serialized SchemaResult exceeded 2 GiB limit"); - } - return out; -} - -arrow::Result SchemaResult::Deserialize(std::string_view serialized) { - pb::SchemaResult pb_schema_result; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid("Serialized SchemaResult size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_schema_result.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid SchemaResult"); - } - return SchemaResult{pb_schema_result.schema()}; -} - -arrow::Result FlightDescriptor::SerializeToString() const { - pb::FlightDescriptor pb_descriptor; - RETURN_NOT_OK(internal::ToProto(*this, &pb_descriptor)); - - std::string out; - if (!pb_descriptor.SerializeToString(&out)) { - return Status::IOError("Serialized FlightDescriptor exceeded 2 GiB limit"); - } - return out; -} - -arrow::Result FlightDescriptor::Deserialize( - std::string_view serialized) { - pb::FlightDescriptor pb_descriptor; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid("Serialized FlightDescriptor size should not exceed 2 GiB"); +bool FlightDescriptor::Equals(const FlightDescriptor& other) const { + if (type != other.type) { + return false; } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_descriptor.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid FlightDescriptor"); + switch (type) { + case PATH: + return path == other.path; + case CMD: + return cmd == other.cmd; + default: + return false; } - FlightDescriptor out; - RETURN_NOT_OK(internal::FromProto(pb_descriptor, &out)); - return out; } -std::string Ticket::ToString() const { - std::stringstream ss; - ss << ""; - return ss.str(); +arrow::Status FlightDescriptor::SerializeToString(std::string* out) const { + return SerializeToProtoString("FlightDescriptor", *this, out); } -bool Ticket::Equals(const Ticket& other) const { return ticket == other.ticket; } - -arrow::Result Ticket::SerializeToString() const { - pb::Ticket pb_ticket; - RETURN_NOT_OK(internal::ToProto(*this, &pb_ticket)); - - std::string out; - if (!pb_ticket.SerializeToString(&out)) { - return Status::IOError("Serialized Ticket exceeded 2 GiB limit"); - } - return out; -} - -arrow::Result Ticket::Deserialize(std::string_view serialized) { - pb::Ticket pb_ticket; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid("Serialized Ticket size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_ticket.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid Ticket"); - } - Ticket out; - RETURN_NOT_OK(internal::FromProto(pb_ticket, &out)); - return out; +arrow::Status FlightDescriptor::Deserialize(std::string_view serialized, + FlightDescriptor* out) { + return DeserializeProtoString( + "FlightDescriptor", serialized, out); } arrow::Result FlightInfo::Make(const Schema& schema, @@ -271,7 +268,7 @@ arrow::Result FlightInfo::Make(const Schema& schema, data.ordered = ordered; data.app_metadata = std::move(app_metadata); RETURN_NOT_OK(internal::SchemaToString(schema, &data.schema)); - return FlightInfo(data); + return FlightInfo(std::move(data)); } arrow::Result> FlightInfo::GetSchema( @@ -286,30 +283,14 @@ arrow::Result> FlightInfo::GetSchema( return schema_; } -arrow::Result FlightInfo::SerializeToString() const { - pb::FlightInfo pb_info; - RETURN_NOT_OK(internal::ToProto(*this, &pb_info)); - - std::string out; - if (!pb_info.SerializeToString(&out)) { - return Status::IOError("Serialized FlightInfo exceeded 2 GiB limit"); - } - return out; +arrow::Status FlightInfo::SerializeToString(std::string* out) const { + return SerializeToProtoString("FlightInfo", *this, out); } -arrow::Result> FlightInfo::Deserialize( - std::string_view serialized) { - pb::FlightInfo pb_info; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid("Serialized FlightInfo size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_info.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid FlightInfo"); - } - ARROW_ASSIGN_OR_RAISE(FlightInfo info, internal::FromProto(pb_info)); - return std::make_unique(std::move(info)); +arrow::Status FlightInfo::Deserialize(std::string_view serialized, + std::unique_ptr* out) { + return DeserializeProtoString>( + "FlightInfo", serialized, out); } std::string FlightInfo::ToString() const { @@ -346,31 +327,14 @@ bool FlightInfo::Equals(const FlightInfo& other) const { data_.app_metadata == other.data_.app_metadata; } -arrow::Result PollInfo::SerializeToString() const { - pb::PollInfo pb_info; - RETURN_NOT_OK(internal::ToProto(*this, &pb_info)); - - std::string out; - if (!pb_info.SerializeToString(&out)) { - return Status::IOError("Serialized PollInfo exceeded 2 GiB limit"); - } - return out; +arrow::Status PollInfo::SerializeToString(std::string* out) const { + return SerializeToProtoString("PollInfo", *this, out); } -arrow::Result> PollInfo::Deserialize( - std::string_view serialized) { - pb::PollInfo pb_info; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid("Serialized PollInfo size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_info.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid PollInfo"); - } - PollInfo info; - RETURN_NOT_OK(internal::FromProto(pb_info, &info)); - return std::make_unique(std::move(info)); +arrow::Status PollInfo::Deserialize(std::string_view serialized, + std::unique_ptr* out) { + return DeserializeProtoString>("PollInfo", + serialized, out); } std::string PollInfo::ToString() const { @@ -447,71 +411,60 @@ bool CancelFlightInfoRequest::Equals(const CancelFlightInfoRequest& other) const return info == other.info; } -arrow::Result CancelFlightInfoRequest::SerializeToString() const { - pb::CancelFlightInfoRequest pb_request; - RETURN_NOT_OK(internal::ToProto(*this, &pb_request)); - - std::string out; - if (!pb_request.SerializeToString(&out)) { - return Status::IOError("Serialized CancelFlightInfoRequest exceeded 2 GiB limit"); - } - return out; +arrow::Status CancelFlightInfoRequest::SerializeToString(std::string* out) const { + return SerializeToProtoString("CancelFlightInfoRequest", + *this, out); } -arrow::Result CancelFlightInfoRequest::Deserialize( - std::string_view serialized) { - pb::CancelFlightInfoRequest pb_request; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid( - "Serialized CancelFlightInfoRequest size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_request.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid CancelFlightInfoRequest"); - } - CancelFlightInfoRequest out; - RETURN_NOT_OK(internal::FromProto(pb_request, &out)); - return out; +arrow::Status CancelFlightInfoRequest::Deserialize(std::string_view serialized, + CancelFlightInfoRequest* out) { + return DeserializeProtoString( + "CancelFlightInfoRequest", serialized, out); } -static const char* const SetSessionOptionStatusNames[] = {"Unspecified", "InvalidName", - "InvalidValue", "Error"}; -static const char* const CloseSessionStatusNames[] = {"Unspecified", "Closed", "Closing", - "NotClosable"}; - -// Helpers for stringifying maps containing various types -std::string ToString(const SetSessionOptionErrorValue& error_value) { - return SetSessionOptionStatusNames[static_cast(error_value)]; +std::string CancelFlightInfoResult::ToString() const { + std::stringstream ss; + ss << ""; + return ss.str(); } -std::ostream& operator<<(std::ostream& os, - const SetSessionOptionErrorValue& error_value) { - os << ToString(error_value); - return os; +bool CancelFlightInfoResult::Equals(const CancelFlightInfoResult& other) const { + return status == other.status; } -std::string ToString(const CloseSessionStatus& status) { - return CloseSessionStatusNames[static_cast(status)]; +arrow::Status CancelFlightInfoResult::SerializeToString(std::string* out) const { + return SerializeToProtoString("CancelFlightInfoResult", + *this, out); } -std::ostream& operator<<(std::ostream& os, const CloseSessionStatus& status) { - os << ToString(status); - return os; +arrow::Status CancelFlightInfoResult::Deserialize(std::string_view serialized, + CancelFlightInfoResult* out) { + return DeserializeProtoString( + "CancelFlightInfoResult", serialized, out); } -std::ostream& operator<<(std::ostream& os, std::vector values) { - os << '['; - std::string sep = ""; - for (const auto& v : values) { - os << sep << std::quoted(v); - sep = ", "; +std::ostream& operator<<(std::ostream& os, CancelStatus status) { + switch (status) { + case CancelStatus::kUnspecified: + os << "Unspecified"; + break; + case CancelStatus::kCancelled: + os << "Cancelled"; + break; + case CancelStatus::kCancelling: + os << "Cancelling"; + break; + case CancelStatus::kNotCancellable: + os << "NotCancellable"; + break; } - os << ']'; - return os; } +// Session management messages + +// SessionOptionValue + std::ostream& operator<<(std::ostream& os, const SessionOptionValue& v) { if (std::holds_alternative(v)) { os << ""; @@ -530,33 +483,6 @@ std::ostream& operator<<(std::ostream& os, const SessionOptionValue& v) { return os; } -std::ostream& operator<<(std::ostream& os, const SetSessionOptionsResult::Error& e) { - os << '{' << e.value << '}'; - return os; -} - -template -std::ostream& operator<<(std::ostream& os, std::map m) { - os << '{'; - std::string sep = ""; - if constexpr (std::is_convertible_v) { - // std::string, char*, std::string_view - for (const auto& [k, v] : m) { - os << sep << '[' << k << "]: " << std::quoted(v) << '"'; - sep = ", "; - } - } else { - for (const auto& [k, v] : m) { - os << sep << '[' << k << "]: " << v; - sep = ", "; - } - } - os << '}'; - - return os; -} - -namespace { static bool CompareSessionOptionMaps(const std::map& a, const std::map& b) { if (a.size() != b.size()) { @@ -577,15 +503,30 @@ static bool CompareSessionOptionMaps(const std::map(error_value)]; +} + +std::ostream& operator<<(std::ostream& os, + const SetSessionOptionErrorValue& error_value) { + os << ToString(error_value); + return os; +} // SetSessionOptionsRequest std::string SetSessionOptionsRequest::ToString() const { std::stringstream ss; - ss << "("SetSessionOptionsRequest", + *this, out); } -arrow::Result SetSessionOptionsRequest::Deserialize( - std::string_view serialized) { - // TODO these & SerializeToString should all be factored out to a superclass - pb::SetSessionOptionsRequest pb_request; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid( - "Serialized SetSessionOptionsRequest size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_request.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid SetSessionOptionsRequest"); - } - SetSessionOptionsRequest out; - RETURN_NOT_OK(internal::FromProto(pb_request, &out)); - return out; +arrow::Status SetSessionOptionsRequest::Deserialize(std::string_view serialized, + SetSessionOptionsRequest* out) { + return DeserializeProtoString( + "SetSessionOptionsRequest", serialized, out); } // SetSessionOptionsResult +std::ostream& operator<<(std::ostream& os, const SetSessionOptionsResult::Error& e) { + os << '{' << e.value << '}'; + return os; +} + std::string SetSessionOptionsResult::ToString() const { std::stringstream ss; - ss << "("SetSessionOptionsResult", + *this, out); } -arrow::Result SetSessionOptionsResult::Deserialize( - std::string_view serialized) { - pb::SetSessionOptionsResult pb_result; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid( - "Serialized SetSessionOptionsResult size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_result.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid SetSessionOptionsResult"); - } - SetSessionOptionsResult out; - RETURN_NOT_OK(internal::FromProto(pb_result, &out)); - return out; +arrow::Status SetSessionOptionsResult::Deserialize(std::string_view serialized, + SetSessionOptionsResult* out) { + return DeserializeProtoString( + "SetSessionOptionsResult", serialized, out); } // GetSessionOptionsRequest @@ -677,41 +586,22 @@ bool GetSessionOptionsRequest::Equals(const GetSessionOptionsRequest& other) con return true; } -arrow::Result GetSessionOptionsRequest::SerializeToString() const { - pb::GetSessionOptionsRequest pb_request; - RETURN_NOT_OK(internal::ToProto(*this, &pb_request)); - - std::string out; - if (!pb_request.SerializeToString(&out)) { - return Status::IOError("Serialized GetSessionOptionsRequest exceeded 2GiB limit"); - } - return out; +arrow::Status GetSessionOptionsRequest::SerializeToString(std::string* out) const { + return SerializeToProtoString("GetSessionOptionsRequest", + *this, out); } -arrow::Result GetSessionOptionsRequest::Deserialize( - std::string_view serialized) { - pb::GetSessionOptionsRequest pb_request; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid( - "Serialized GetSessionOptionsRequest size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_request.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid GetSessionOptionsRequest"); - } - GetSessionOptionsRequest out; - RETURN_NOT_OK(internal::FromProto(pb_request, &out)); - return out; +arrow::Status GetSessionOptionsRequest::Deserialize(std::string_view serialized, + GetSessionOptionsRequest* out) { + return DeserializeProtoString( + "GetSessionOptionsRequest", serialized, out); } // GetSessionOptionsResult std::string GetSessionOptionsResult::ToString() const { std::stringstream ss; - ss << "("GetSessionOptionsResult", + *this, out); } -arrow::Result GetSessionOptionsResult::Deserialize( - std::string_view serialized) { - pb::GetSessionOptionsResult pb_result; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid( - "Serialized GetSessionOptionsResult size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_result.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid GetSessionOptionsResult"); - } - GetSessionOptionsResult out; - RETURN_NOT_OK(internal::FromProto(pb_result, &out)); - return out; +arrow::Status GetSessionOptionsResult::Deserialize(std::string_view serialized, + GetSessionOptionsResult* out) { + return DeserializeProtoString( + "GetSessionOptionsResult", serialized, out); } // CloseSessionRequest @@ -753,40 +626,39 @@ std::string CloseSessionRequest::ToString() const { return " CloseSessionRequest::SerializeToString() const { - pb::CloseSessionRequest pb_request; - RETURN_NOT_OK(internal::ToProto(*this, &pb_request)); +arrow::Status CloseSessionRequest::SerializeToString(std::string* out) const { + return SerializeToProtoString("CloseSessionRequest", *this, + out); +} - std::string out; - if (!pb_request.SerializeToString(&out)) { - return Status::IOError("Serialized CloseSessionRequest exceeded 2GiB limit"); - } - return out; +arrow::Status CloseSessionRequest::Deserialize(std::string_view serialized, + CloseSessionRequest* out) { + return DeserializeProtoString( + "CloseSessionRequest", serialized, out); } -arrow::Result CloseSessionRequest::Deserialize( - std::string_view serialized) { - pb::CloseSessionRequest pb_request; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid("Serialized CloseSessionRequest size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_request.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid CloseSessionRequest"); - } - CloseSessionRequest out; - RETURN_NOT_OK(internal::FromProto(pb_request, &out)); - return out; +// CloseSessionStatus + +std::string ToString(const CloseSessionStatus& status) { + static constexpr const char* CloseSessionStatusNames[] = { + "Unspecified", + "Closed", + "Closing", + "NotClosable", + }; + return CloseSessionStatusNames[static_cast(status)]; +} + +std::ostream& operator<<(std::ostream& os, const CloseSessionStatus& status) { + os << ToString(status); + return os; } // CloseSessionResult std::string CloseSessionResult::ToString() const { std::stringstream ss; - ss << "( + "CloseSessionResult", serialized, out); } -arrow::Result CloseSessionResult::Deserialize( - std::string_view serialized) { - pb::CloseSessionResult pb_result; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid("Serialized CloseSessionResult size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_result.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid CloseSessionResult"); - } - CloseSessionResult out; - RETURN_NOT_OK(internal::FromProto(pb_result, &out)); - return out; +// Ticket + +std::string Ticket::ToString() const { + std::stringstream ss; + ss << ""; + return ss.str(); +} + +bool Ticket::Equals(const Ticket& other) const { return ticket == other.ticket; } + +arrow::Status Ticket::SerializeToString(std::string* out) const { + return SerializeToProtoString("Ticket", *this, out); +} + +arrow::Status Ticket::Deserialize(std::string_view serialized, Ticket* out) { + return DeserializeProtoString("Ticket", serialized, out); } Location::Location() { uri_ = std::make_shared(); } @@ -860,7 +733,6 @@ arrow::Result Location::ForScheme(const std::string& scheme, return Location::Parse(uri_string.str()); } -std::string Location::ToString() const { return uri_->ToString(); } std::string Location::scheme() const { std::string scheme = uri_->scheme(); if (scheme.empty()) { @@ -870,6 +742,8 @@ std::string Location::scheme() const { return scheme; } +std::string Location::ToString() const { return uri_->ToString(); } + bool Location::Equals(const Location& other) const { return ToString() == other.ToString(); } @@ -923,30 +797,22 @@ bool FlightEndpoint::Equals(const FlightEndpoint& other) const { return true; } -arrow::Result FlightEndpoint::SerializeToString() const { - pb::FlightEndpoint pb_flight_endpoint; - RETURN_NOT_OK(internal::ToProto(*this, &pb_flight_endpoint)); +arrow::Status Location::SerializeToString(std::string* out) const { + return SerializeToProtoString("Location", *this, out); +} - std::string out; - if (!pb_flight_endpoint.SerializeToString(&out)) { - return Status::IOError("Serialized FlightEndpoint exceeded 2 GiB limit"); - } - return out; +arrow::Status Location::Deserialize(std::string_view serialized, Location* out) { + return DeserializeProtoString("Location", serialized, out); } -arrow::Result FlightEndpoint::Deserialize(std::string_view serialized) { - pb::FlightEndpoint pb_flight_endpoint; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid("Serialized FlightEndpoint size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_flight_endpoint.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid FlightEndpoint"); - } - FlightEndpoint out; - RETURN_NOT_OK(internal::FromProto(pb_flight_endpoint, &out)); - return out; +arrow::Status FlightEndpoint::SerializeToString(std::string* out) const { + return SerializeToProtoString("FlightEndpoint", *this, out); +} + +arrow::Status FlightEndpoint::Deserialize(std::string_view serialized, + FlightEndpoint* out) { + return DeserializeProtoString("FlightEndpoint", + serialized, out); } std::string RenewFlightEndpointRequest::ToString() const { @@ -959,32 +825,30 @@ bool RenewFlightEndpointRequest::Equals(const RenewFlightEndpointRequest& other) return endpoint == other.endpoint; } -arrow::Result RenewFlightEndpointRequest::SerializeToString() const { - pb::RenewFlightEndpointRequest pb_request; - RETURN_NOT_OK(internal::ToProto(*this, &pb_request)); +arrow::Status RenewFlightEndpointRequest::SerializeToString(std::string* out) const { + return SerializeToProtoString( + "RenewFlightEndpointRequest", *this, out); +} - std::string out; - if (!pb_request.SerializeToString(&out)) { - return Status::IOError("Serialized RenewFlightEndpointRequest exceeded 2 GiB limit"); - } - return out; +arrow::Status RenewFlightEndpointRequest::Deserialize(std::string_view serialized, + RenewFlightEndpointRequest* out) { + return DeserializeProtoString("RenewFlightEndpointRequest", + serialized, out); } -arrow::Result RenewFlightEndpointRequest::Deserialize( - std::string_view serialized) { - pb::RenewFlightEndpointRequest pb_request; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid( - "Serialized RenewFlightEndpointRequest size should not exceed 2 GiB"); +Status FlightPayload::Validate() const { + static constexpr int64_t kInt32Max = std::numeric_limits::max(); + if (descriptor && descriptor->size() > kInt32Max) { + return Status::CapacityError("Descriptor size overflow (>= 2**31)"); } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_request.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid RenewFlightEndpointRequest"); + if (app_metadata && app_metadata->size() > kInt32Max) { + return Status::CapacityError("app_metadata size overflow (>= 2**31)"); + } + if (ipc_message.body_length > kInt32Max) { + return Status::Invalid("Cannot send record batches exceeding 2GiB yet"); } - RenewFlightEndpointRequest out; - RETURN_NOT_OK(internal::FromProto(pb_request, &out)); - return out; + return Status::OK(); } std::string ActionType::ToString() const { @@ -1022,30 +886,13 @@ bool ActionType::Equals(const ActionType& other) const { return type == other.type && description == other.description; } -arrow::Result ActionType::SerializeToString() const { - pb::ActionType pb_action_type; - RETURN_NOT_OK(internal::ToProto(*this, &pb_action_type)); - - std::string out; - if (!pb_action_type.SerializeToString(&out)) { - return Status::IOError("Serialized ActionType exceeded 2 GiB limit"); - } - return out; +arrow::Status ActionType::SerializeToString(std::string* out) const { + return SerializeToProtoString("ActionType", *this, out); } -arrow::Result ActionType::Deserialize(std::string_view serialized) { - pb::ActionType pb_action_type; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid("Serialized ActionType size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_action_type.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid ActionType"); - } - ActionType out; - RETURN_NOT_OK(internal::FromProto(pb_action_type, &out)); - return out; +arrow::Status ActionType::Deserialize(std::string_view serialized, ActionType* out) { + return DeserializeProtoString("ActionType", serialized, + out); } std::string Criteria::ToString() const { @@ -1056,30 +903,12 @@ bool Criteria::Equals(const Criteria& other) const { return expression == other.expression; } -arrow::Result Criteria::SerializeToString() const { - pb::Criteria pb_criteria; - RETURN_NOT_OK(internal::ToProto(*this, &pb_criteria)); - - std::string out; - if (!pb_criteria.SerializeToString(&out)) { - return Status::IOError("Serialized Criteria exceeded 2 GiB limit"); - } - return out; +arrow::Status Criteria::SerializeToString(std::string* out) const { + return SerializeToProtoString("Criteria", *this, out); } -arrow::Result Criteria::Deserialize(std::string_view serialized) { - pb::Criteria pb_criteria; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid("Serialized Criteria size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_criteria.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid Criteria"); - } - Criteria out; - RETURN_NOT_OK(internal::FromProto(pb_criteria, &out)); - return out; +arrow::Status Criteria::Deserialize(std::string_view serialized, Criteria* out) { + return DeserializeProtoString("Criteria", serialized, out); } std::string Action::ToString() const { @@ -1100,30 +929,12 @@ bool Action::Equals(const Action& other) const { ((body == other.body) || (body && other.body && body->Equals(*other.body))); } -arrow::Result Action::SerializeToString() const { - pb::Action pb_action; - RETURN_NOT_OK(internal::ToProto(*this, &pb_action)); - - std::string out; - if (!pb_action.SerializeToString(&out)) { - return Status::IOError("Serialized Action exceeded 2 GiB limit"); - } - return out; +arrow::Status Action::SerializeToString(std::string* out) const { + return SerializeToProtoString("Action", *this, out); } -arrow::Result Action::Deserialize(std::string_view serialized) { - pb::Action pb_action; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid("Serialized Action size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_action.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid Action"); - } - Action out; - RETURN_NOT_OK(internal::FromProto(pb_action, &out)); - return out; +arrow::Status Action::Deserialize(std::string_view serialized, Action* out) { + return DeserializeProtoString("Action", serialized, out); } std::string Result::ToString() const { @@ -1141,89 +952,48 @@ bool Result::Equals(const Result& other) const { return (body == other.body) || (body && other.body && body->Equals(*other.body)); } -arrow::Result Result::SerializeToString() const { - pb::Result pb_result; - RETURN_NOT_OK(internal::ToProto(*this, &pb_result)); - - std::string out; - if (!pb_result.SerializeToString(&out)) { - return Status::IOError("Serialized Result exceeded 2 GiB limit"); - } - return out; +arrow::Status Result::SerializeToString(std::string* out) const { + return SerializeToProtoString("Result", *this, out); } -arrow::Result Result::Deserialize(std::string_view serialized) { - pb::Result pb_result; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid("Serialized Result size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_result.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid Result"); - } - Result out; - RETURN_NOT_OK(internal::FromProto(pb_result, &out)); - return out; +arrow::Status Result::Deserialize(std::string_view serialized, Result* out) { + return DeserializeProtoString("Result", serialized, out); } -std::string CancelFlightInfoResult::ToString() const { - std::stringstream ss; - ss << ""; - return ss.str(); +arrow::Result> SchemaResult::GetSchema( + ipc::DictionaryMemo* dictionary_memo) const { + // Create a non-owned Buffer to avoid copying + io::BufferReader schema_reader(std::make_shared(raw_schema_)); + return ipc::ReadSchema(&schema_reader, dictionary_memo); } -bool CancelFlightInfoResult::Equals(const CancelFlightInfoResult& other) const { - return status == other.status; +arrow::Result> SchemaResult::Make(const Schema& schema) { + std::string schema_in; + RETURN_NOT_OK(internal::SchemaToString(schema, &schema_in)); + return std::make_unique(std::move(schema_in)); } -arrow::Result CancelFlightInfoResult::SerializeToString() const { - pb::CancelFlightInfoResult pb_result; - RETURN_NOT_OK(internal::ToProto(*this, &pb_result)); +std::string SchemaResult::ToString() const { + return ""; +} - std::string out; - if (!pb_result.SerializeToString(&out)) { - return Status::IOError( - "Serialized ActionCancelFlightInfoResult exceeded 2 GiB limit"); - } - return out; +bool SchemaResult::Equals(const SchemaResult& other) const { + return raw_schema_ == other.raw_schema_; } -arrow::Result CancelFlightInfoResult::Deserialize( - std::string_view serialized) { - pb::CancelFlightInfoResult pb_result; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid( - "Serialized ActionCancelFlightInfoResult size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_result.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid CancelFlightInfoResult"); - } - CancelFlightInfoResult out; - RETURN_NOT_OK(internal::FromProto(pb_result, &out)); - return out; +arrow::Status SchemaResult::SerializeToString(std::string* out) const { + return SerializeToProtoString("SchemaResult", *this, out); } -std::ostream& operator<<(std::ostream& os, CancelStatus status) { - switch (status) { - case CancelStatus::kUnspecified: - os << "Unspecified"; - break; - case CancelStatus::kCancelled: - os << "Cancelled"; - break; - case CancelStatus::kCancelling: - os << "Cancelling"; - break; - case CancelStatus::kNotCancellable: - os << "NotCancellable"; - break; - } - return os; +arrow::Status SchemaResult::Deserialize(std::string_view serialized, SchemaResult* out) { + pb::SchemaResult pb_schema_result; + RETURN_NOT_OK(ParseFromString("SchemaResult", serialized, &pb_schema_result)); + *out = SchemaResult{pb_schema_result.schema()}; + return Status::OK(); } +//------------------------------------------------------------ + Status ResultStream::Drain() { while (true) { ARROW_ASSIGN_OR_RAISE(auto result, Next()); @@ -1311,40 +1081,6 @@ arrow::Result> SimpleResultStream::Next() { return std::make_unique(std::move(results_[position_++])); } -std::string BasicAuth::ToString() const { - return arrow::util::StringBuilder(""); -} - -bool BasicAuth::Equals(const BasicAuth& other) const { - return (username == other.username) && (password == other.password); -} - -arrow::Result BasicAuth::Deserialize(std::string_view serialized) { - pb::BasicAuth pb_result; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid("Serialized BasicAuth size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_result.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid BasicAuth"); - } - BasicAuth out; - RETURN_NOT_OK(internal::FromProto(pb_result, &out)); - return out; -} - -arrow::Result BasicAuth::SerializeToString() const { - pb::BasicAuth pb_result; - RETURN_NOT_OK(internal::ToProto(*this, &pb_result)); - std::string out; - if (!pb_result.SerializeToString(&out)) { - return Status::IOError("Serialized BasicAuth exceeded 2 GiB limit"); - } - return out; -} - //------------------------------------------------------------ // Error propagation helpers diff --git a/cpp/src/arrow/flight/types.h b/cpp/src/arrow/flight/types.h index cdf03f21041ee..de93750f75b25 100644 --- a/cpp/src/arrow/flight/types.h +++ b/cpp/src/arrow/flight/types.h @@ -31,6 +31,7 @@ #include #include +#include "arrow/buffer.h" #include "arrow/flight/type_fwd.h" #include "arrow/flight/visibility.h" #include "arrow/ipc/options.h" @@ -60,6 +61,18 @@ class Uri; namespace flight { +ARROW_FLIGHT_EXPORT +extern const char* kSchemeGrpc; +ARROW_FLIGHT_EXPORT +extern const char* kSchemeGrpcTcp; +ARROW_FLIGHT_EXPORT +extern const char* kSchemeGrpcUnix; +ARROW_FLIGHT_EXPORT +extern const char* kSchemeGrpcTls; + +class FlightClient; +class FlightServerBase; + /// \brief A timestamp compatible with Protocol Buffer's /// google.protobuf.Timestamp: /// @@ -159,29 +172,122 @@ struct ARROW_FLIGHT_EXPORT CertKeyPair { std::string pem_key; }; +namespace internal { + +template +struct remove_unique_ptr { + using type = T; +}; + +template +struct remove_unique_ptr> { + using type = T; +}; + +// Base CRTP type +template +struct BaseType { + protected: + using SuperT = BaseType; + using SelfT = typename remove_unique_ptr::type; + + const SelfT& self() const { return static_cast(*this); } + SelfT& self() { return static_cast(*this); } + + public: + BaseType() = default; + + friend bool operator==(const SelfT& left, const SelfT& right) { + return left.Equals(right); + } + friend bool operator!=(const SelfT& left, const SelfT& right) { + return !left.Equals(right); + } + + /// \brief Serialize this message to its wire-format representation. + inline arrow::Result SerializeToString() const { + std::string out; + ARROW_RETURN_NOT_OK(self().SelfT::SerializeToString(&out)); + return out; + } + + inline static arrow::Result Deserialize(std::string_view serialized) { + T out; + ARROW_RETURN_NOT_OK(SelfT::Deserialize(serialized, &out)); + return out; + } + + inline arrow::Result> SerializeToBuffer() const { + std::string out; + ARROW_RETURN_NOT_OK(self().SelfT::SerializeToString(&out)); + return Buffer::FromString(std::move(out)); + } +}; + +} // namespace internal + +//------------------------------------------------------------ +// Wrapper types for Flight RPC protobuf messages + +// A wrapper around arrow.flight.protocol.HandshakeRequest is not defined +// A wrapper around arrow.flight.protocol.HandshakeResponse is not defined + +/// \brief message for simple auth +struct ARROW_FLIGHT_EXPORT BasicAuth : public internal::BaseType { + std::string username; + std::string password; + + BasicAuth() = default; + BasicAuth(std::string username, std::string password) + : username(std::move(username)), password(std::move(password)) {} + + std::string ToString() const; + bool Equals(const BasicAuth& other) const; + + using SuperT::Deserialize; + using SuperT::SerializeToString; + + /// \brief Serialize this message to its wire-format representation. + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; + + /// \brief Deserialize this message from its wire-format representation. + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, BasicAuth* out); +}; + +// A wrapper around arrow.flight.protocol.Empty is not defined + /// \brief A type of action that can be performed with the DoAction RPC. -struct ARROW_FLIGHT_EXPORT ActionType { +struct ARROW_FLIGHT_EXPORT ActionType : public internal::BaseType { /// \brief The name of the action. std::string type; /// \brief A human-readable description of the action. std::string description; + ActionType() = default; + + ActionType(std::string type, std::string description) + : type(std::move(type)), description(std::move(description)) {} + std::string ToString() const; bool Equals(const ActionType& other) const; - friend bool operator==(const ActionType& left, const ActionType& right) { - return left.Equals(right); - } - friend bool operator!=(const ActionType& left, const ActionType& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, ActionType* out); static const ActionType kCancelFlightInfo; static const ActionType kRenewFlightEndpoint; @@ -191,138 +297,126 @@ struct ARROW_FLIGHT_EXPORT ActionType { }; /// \brief Opaque selection criteria for ListFlights RPC -struct ARROW_FLIGHT_EXPORT Criteria { +struct ARROW_FLIGHT_EXPORT Criteria : public internal::BaseType { /// Opaque criteria expression, dependent on server implementation std::string expression; + Criteria() = default; + Criteria(std::string expression) // NOLINT runtime/explicit + : expression(std::move(expression)) {} + std::string ToString() const; bool Equals(const Criteria& other) const; - friend bool operator==(const Criteria& left, const Criteria& right) { - return left.Equals(right); - } - friend bool operator!=(const Criteria& left, const Criteria& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, Criteria* out); }; /// \brief An action to perform with the DoAction RPC -struct ARROW_FLIGHT_EXPORT Action { +struct ARROW_FLIGHT_EXPORT Action : public internal::BaseType { /// The action type std::string type; /// The action content as a Buffer std::shared_ptr body; + Action() = default; + Action(std::string type, std::shared_ptr body) + : type(std::move(type)), body(std::move(body)) {} + std::string ToString() const; bool Equals(const Action& other) const; - friend bool operator==(const Action& left, const Action& right) { - return left.Equals(right); - } - friend bool operator!=(const Action& left, const Action& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, Action* out); }; /// \brief Opaque result returned after executing an action -struct ARROW_FLIGHT_EXPORT Result { +struct ARROW_FLIGHT_EXPORT Result : public internal::BaseType { std::shared_ptr body; + Result() = default; + Result(std::shared_ptr body) // NOLINT runtime/explicit + : body(std::move(body)) {} + std::string ToString() const; bool Equals(const Result& other) const; - friend bool operator==(const Result& left, const Result& right) { - return left.Equals(right); - } - friend bool operator!=(const Result& left, const Result& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, Result* out); }; -enum class CancelStatus { - /// The cancellation status is unknown. Servers should avoid using - /// this value (send a kNotCancellable if the requested FlightInfo - /// is not known). Clients can retry the request. - kUnspecified = 0, - /// The cancellation request is complete. Subsequent requests with - /// the same payload may return kCancelled or a kNotCancellable error. - kCancelled = 1, - /// The cancellation request is in progress. The client may retry - /// the cancellation request. - kCancelling = 2, - // The FlightInfo is not cancellable. The client should not retry the - // cancellation request. - kNotCancellable = 3, -}; +/// \brief Schema result returned after a schema request RPC +struct ARROW_FLIGHT_EXPORT SchemaResult : public internal::BaseType { + public: + SchemaResult() = default; + explicit SchemaResult(std::string schema) : raw_schema_(std::move(schema)) {} -/// \brief The result of the CancelFlightInfo action. -struct ARROW_FLIGHT_EXPORT CancelFlightInfoResult { - CancelStatus status; + /// \brief Factory method to construct a SchemaResult. + static arrow::Result> Make(const Schema& schema); + + /// \brief return schema + /// \param[in,out] dictionary_memo for dictionary bookkeeping, will + /// be modified + /// \return Arrow result with the reconstructed Schema + arrow::Result> GetSchema( + ipc::DictionaryMemo* dictionary_memo) const; + + const std::string& serialized_schema() const { return raw_schema_; } std::string ToString() const; - bool Equals(const CancelFlightInfoResult& other) const; + bool Equals(const SchemaResult& other) const; - friend bool operator==(const CancelFlightInfoResult& left, - const CancelFlightInfoResult& right) { - return left.Equals(right); - } - friend bool operator!=(const CancelFlightInfoResult& left, - const CancelFlightInfoResult& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); -}; - -ARROW_FLIGHT_EXPORT -std::ostream& operator<<(std::ostream& os, CancelStatus status); - -/// \brief message for simple auth -struct ARROW_FLIGHT_EXPORT BasicAuth { - std::string username; - std::string password; - - std::string ToString() const; - bool Equals(const BasicAuth& other) const; - - friend bool operator==(const BasicAuth& left, const BasicAuth& right) { - return left.Equals(right); - } - friend bool operator!=(const BasicAuth& left, const BasicAuth& right) { - return !(left == right); - } + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, SchemaResult* out); - /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); - /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + private: + std::string raw_schema_; }; /// \brief A request to retrieve or generate a dataset -struct ARROW_FLIGHT_EXPORT FlightDescriptor { +struct ARROW_FLIGHT_EXPORT FlightDescriptor + : public internal::BaseType { enum DescriptorType { UNKNOWN = 0, /// Unused PATH = 1, /// Named path identifying a dataset @@ -330,7 +424,7 @@ struct ARROW_FLIGHT_EXPORT FlightDescriptor { }; /// The descriptor type - DescriptorType type; + DescriptorType type = UNKNOWN; /// Opaque value used to express a command. Should only be defined when type /// is CMD @@ -340,22 +434,33 @@ struct ARROW_FLIGHT_EXPORT FlightDescriptor { /// when type is PATH std::vector path; - bool Equals(const FlightDescriptor& other) const; + FlightDescriptor() = default; + + FlightDescriptor(DescriptorType type, std::string cmd, std::vector path) + : type(type), cmd(std::move(cmd)), path(std::move(path)) {} /// \brief Get a human-readable form of this descriptor. std::string ToString() const; + bool Equals(const FlightDescriptor& other) const; + + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Get the wire-format representation of this type. /// /// Useful when interoperating with non-Flight systems (e.g. REST /// services) that may want to return Flight types. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Parse the wire-format representation of this type. /// /// Useful when interoperating with non-Flight systems (e.g. REST /// services) that may want to return Flight types. - static arrow::Result Deserialize(std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, FlightDescriptor* out); // Convenience factory functions @@ -366,69 +471,289 @@ struct ARROW_FLIGHT_EXPORT FlightDescriptor { static FlightDescriptor Path(const std::vector& p) { return FlightDescriptor{PATH, "", p}; } - - friend bool operator==(const FlightDescriptor& left, const FlightDescriptor& right) { - return left.Equals(right); - } - friend bool operator!=(const FlightDescriptor& left, const FlightDescriptor& right) { - return !(left == right); - } }; -/// \brief Data structure providing an opaque identifier or credential to use -/// when requesting a data stream with the DoGet RPC -struct ARROW_FLIGHT_EXPORT Ticket { - std::string ticket; +/// \brief The access coordinates for retrieval of a dataset, returned by +/// GetFlightInfo +class ARROW_FLIGHT_EXPORT FlightInfo + : public internal::BaseType> { + public: + struct Data { + std::string schema; + FlightDescriptor descriptor; + std::vector endpoints; + int64_t total_records = -1; + int64_t total_bytes = -1; + bool ordered = false; + std::string app_metadata; + }; - std::string ToString() const; - bool Equals(const Ticket& other) const; + explicit FlightInfo(Data data) : data_(std::move(data)), reconstructed_schema_(false) {} - friend bool operator==(const Ticket& left, const Ticket& right) { - return left.Equals(right); - } - friend bool operator!=(const Ticket& left, const Ticket& right) { - return !(left == right); - } + /// \brief Factory method to construct a FlightInfo. + static arrow::Result Make(const Schema& schema, + const FlightDescriptor& descriptor, + const std::vector& endpoints, + int64_t total_records, int64_t total_bytes, + bool ordered = false, + std::string app_metadata = ""); + + /// \brief Deserialize the Arrow schema of the dataset. Populate any + /// dictionary encoded fields into a DictionaryMemo for + /// bookkeeping + /// \param[in,out] dictionary_memo for dictionary bookkeeping, will + /// be modified + /// \return Arrow result with the reconstructed Schema + arrow::Result> GetSchema( + ipc::DictionaryMemo* dictionary_memo) const; + + const std::string& serialized_schema() const { return data_.schema; } + + /// The descriptor associated with this flight, may not be set + const FlightDescriptor& descriptor() const { return data_.descriptor; } + + /// A list of endpoints associated with the flight (dataset). To consume the + /// whole flight, all endpoints must be consumed + const std::vector& endpoints() const { return data_.endpoints; } + + /// The total number of records (rows) in the dataset. If unknown, set to -1 + int64_t total_records() const { return data_.total_records; } + + /// The total number of bytes in the dataset. If unknown, set to -1 + int64_t total_bytes() const { return data_.total_bytes; } + + /// Whether endpoints are in the same order as the data. + bool ordered() const { return data_.ordered; } + + /// Application-defined opaque metadata + const std::string& app_metadata() const { return data_.app_metadata; } + + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Get the wire-format representation of this type. /// /// Useful when interoperating with non-Flight systems (e.g. REST /// services) that may want to return Flight types. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Parse the wire-format representation of this type. /// /// Useful when interoperating with non-Flight systems (e.g. REST /// services) that may want to return Flight types. - static arrow::Result Deserialize(std::string_view serialized); -}; + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, + std::unique_ptr* out); -class FlightClient; -class FlightServerBase; + std::string ToString() const; -ARROW_FLIGHT_EXPORT -extern const char* kSchemeGrpc; -ARROW_FLIGHT_EXPORT -extern const char* kSchemeGrpcTcp; -ARROW_FLIGHT_EXPORT -extern const char* kSchemeGrpcUnix; -ARROW_FLIGHT_EXPORT -extern const char* kSchemeGrpcTls; + /// Compare two FlightInfo for equality. This will compare the + /// serialized schema representations, NOT the logical equality of + /// the schemas. + bool Equals(const FlightInfo& other) const; -/// \brief A host location (a URI) -struct ARROW_FLIGHT_EXPORT Location { + private: + Data data_; + mutable std::shared_ptr schema_; + mutable bool reconstructed_schema_; +}; + +/// \brief The information to process a long-running query. +class ARROW_FLIGHT_EXPORT PollInfo + : public internal::BaseType> { public: - /// \brief Initialize a blank location. - Location(); + /// The currently available results so far. + std::unique_ptr info = NULLPTR; + /// The descriptor the client should use on the next try. If unset, + /// the query is complete. + std::optional descriptor = std::nullopt; + /// Query progress. Must be in [0.0, 1.0] but need not be + /// monotonic or nondecreasing. If unknown, do not set. + std::optional progress = std::nullopt; + /// Expiration time for this request. After this passes, the server + /// might not accept the poll descriptor anymore (and the query may + /// be cancelled). This may be updated on a call to PollFlightInfo. + std::optional expiration_time = std::nullopt; - /// \brief Initialize a location by parsing a URI string - static arrow::Result Parse(const std::string& uri_string); + PollInfo() + : info(NULLPTR), + descriptor(std::nullopt), + progress(std::nullopt), + expiration_time(std::nullopt) {} - /// \brief Get the fallback URI. - /// - /// arrow-flight-reuse-connection://? means that a client may attempt to - /// reuse an existing connection to a Flight service to fetch data instead - /// of creating a new connection to one of the other locations listed in a + PollInfo(std::unique_ptr info, std::optional descriptor, + std::optional progress, std::optional expiration_time) + : info(std::move(info)), + descriptor(std::move(descriptor)), + progress(progress), + expiration_time(expiration_time) {} + + PollInfo(const PollInfo& other) + : info(other.info ? std::make_unique(*other.info) : NULLPTR), + descriptor(other.descriptor), + progress(other.progress), + expiration_time(other.expiration_time) {} + PollInfo(PollInfo&& other) noexcept = default; + ~PollInfo() = default; + PollInfo& operator=(const PollInfo& other) { + info = other.info ? std::make_unique(*other.info) : NULLPTR; + descriptor = other.descriptor; + progress = other.progress; + expiration_time = other.expiration_time; + return *this; + } + PollInfo& operator=(PollInfo&& other) = default; + + using SuperT::Deserialize; + using SuperT::SerializeToString; + + /// \brief Get the wire-format representation of this type. + /// + /// Useful when interoperating with non-Flight systems (e.g. REST + /// services) that may want to return Flight types. + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; + + /// \brief Parse the wire-format representation of this type. + /// + /// Useful when interoperating with non-Flight systems (e.g. REST + /// services) that may want to return Flight types. + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, + std::unique_ptr* out); + + std::string ToString() const; + + /// Compare two PollInfo for equality. This will compare the + /// serialized schema representations, NOT the logical equality of + /// the schemas. + bool Equals(const PollInfo& other) const; +}; + +/// \brief The request of the CancelFlightInfoRequest action. +struct ARROW_FLIGHT_EXPORT CancelFlightInfoRequest + : public internal::BaseType { + std::unique_ptr info; + + CancelFlightInfoRequest() = default; + CancelFlightInfoRequest(std::unique_ptr info) // NOLINT runtime/explicit + : info(std::move(info)) {} + + std::string ToString() const; + bool Equals(const CancelFlightInfoRequest& other) const; + + using SuperT::Deserialize; + using SuperT::SerializeToString; + + /// \brief Serialize this message to its wire-format representation. + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; + + /// \brief Deserialize this message from its wire-format representation. + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, + CancelFlightInfoRequest* out); +}; + +enum class CancelStatus { + /// The cancellation status is unknown. Servers should avoid using + /// this value (send a kNotCancellable if the requested FlightInfo + /// is not known). Clients can retry the request. + kUnspecified = 0, + /// The cancellation request is complete. Subsequent requests with + /// the same payload may return kCancelled or a kNotCancellable error. + kCancelled = 1, + /// The cancellation request is in progress. The client may retry + /// the cancellation request. + kCancelling = 2, + // The FlightInfo is not cancellable. The client should not retry the + // cancellation request. + kNotCancellable = 3, +}; + +/// \brief The result of the CancelFlightInfo action. +struct ARROW_FLIGHT_EXPORT CancelFlightInfoResult + : public internal::BaseType { + CancelStatus status = CancelStatus::kUnspecified; + + CancelFlightInfoResult() = default; + CancelFlightInfoResult(CancelStatus status) // NOLINT runtime/explicit + : status(status) {} + + std::string ToString() const; + bool Equals(const CancelFlightInfoResult& other) const; + + using SuperT::Deserialize; + using SuperT::SerializeToString; + + /// \brief Serialize this message to its wire-format representation. + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; + + /// \brief Deserialize this message from its wire-format representation. + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, + CancelFlightInfoResult* out); +}; + +ARROW_FLIGHT_EXPORT +std::ostream& operator<<(std::ostream& os, CancelStatus status); + +/// \brief Data structure providing an opaque identifier or credential to use +/// when requesting a data stream with the DoGet RPC +struct ARROW_FLIGHT_EXPORT Ticket : public internal::BaseType { + std::string ticket; + + Ticket() = default; + Ticket(std::string ticket) // NOLINT runtime/explicit + : ticket(std::move(ticket)) {} + + std::string ToString() const; + bool Equals(const Ticket& other) const; + + using SuperT::Deserialize; + using SuperT::SerializeToString; + + /// \brief Get the wire-format representation of this type. + /// + /// Useful when interoperating with non-Flight systems (e.g. REST + /// services) that may want to return Flight types. + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; + + /// \brief Parse the wire-format representation of this type. + /// + /// Useful when interoperating with non-Flight systems (e.g. REST + /// services) that may want to return Flight types. + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, Ticket* out); +}; + +/// \brief A host location (a URI) +struct ARROW_FLIGHT_EXPORT Location : public internal::BaseType { + public: + /// \brief Initialize a blank location. + Location(); + + /// \brief Initialize a location by parsing a URI string + static arrow::Result Parse(const std::string& uri_string); + + /// \brief Get the fallback URI. + /// + /// arrow-flight-reuse-connection://? means that a client may attempt to + /// reuse an existing connection to a Flight service to fetch data instead + /// of creating a new connection to one of the other locations listed in a /// FlightEndpoint response. static const Location& ReuseConnection(); @@ -456,20 +781,25 @@ struct ARROW_FLIGHT_EXPORT Location { static arrow::Result ForScheme(const std::string& scheme, const std::string& host, const int port); - /// \brief Get a representation of this URI as a string. - std::string ToString() const; - /// \brief Get the scheme of this URI. std::string scheme() const; + /// \brief Get a representation of this URI as a string. + std::string ToString() const; bool Equals(const Location& other) const; - friend bool operator==(const Location& left, const Location& right) { - return left.Equals(right); - } - friend bool operator!=(const Location& left, const Location& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; + + /// \brief Serialize this message to its wire-format representation. + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; + + /// \brief Deserialize this message from its wire-format representation. + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, Location* out); private: friend class FlightClient; @@ -479,7 +809,7 @@ struct ARROW_FLIGHT_EXPORT Location { /// \brief A flight ticket and list of locations where the ticket can be /// redeemed -struct ARROW_FLIGHT_EXPORT FlightEndpoint { +struct ARROW_FLIGHT_EXPORT FlightEndpoint : public internal::BaseType { /// Opaque ticket identify; use with DoGet RPC Ticket ticket; @@ -496,47 +826,60 @@ struct ARROW_FLIGHT_EXPORT FlightEndpoint { /// Opaque Application-defined metadata std::string app_metadata; + FlightEndpoint() = default; + FlightEndpoint(Ticket ticket, std::vector locations, + std::optional expiration_time, std::string app_metadata) + : ticket(std::move(ticket)), + locations(std::move(locations)), + expiration_time(expiration_time), + app_metadata(std::move(app_metadata)) {} + std::string ToString() const; bool Equals(const FlightEndpoint& other) const; - friend bool operator==(const FlightEndpoint& left, const FlightEndpoint& right) { - return left.Equals(right); - } - friend bool operator!=(const FlightEndpoint& left, const FlightEndpoint& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, FlightEndpoint* out); }; /// \brief The request of the RenewFlightEndpoint action. -struct ARROW_FLIGHT_EXPORT RenewFlightEndpointRequest { +struct ARROW_FLIGHT_EXPORT RenewFlightEndpointRequest + : public internal::BaseType { FlightEndpoint endpoint; + RenewFlightEndpointRequest() = default; + explicit RenewFlightEndpointRequest(FlightEndpoint endpoint) + : endpoint(std::move(endpoint)) {} + std::string ToString() const; bool Equals(const RenewFlightEndpointRequest& other) const; - friend bool operator==(const RenewFlightEndpointRequest& left, - const RenewFlightEndpointRequest& right) { - return left.Equals(right); - } - friend bool operator!=(const RenewFlightEndpointRequest& left, - const RenewFlightEndpointRequest& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize( - std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, + RenewFlightEndpointRequest* out); }; +// FlightData in Flight.proto maps to FlightPayload here. + /// \brief Staging data structure for messages about to be put on the wire /// /// This structure corresponds to FlightData in the protocol. @@ -545,241 +888,57 @@ struct ARROW_FLIGHT_EXPORT FlightPayload { std::shared_ptr app_metadata; ipc::IpcPayload ipc_message; + FlightPayload() = default; + FlightPayload(std::shared_ptr descriptor, std::shared_ptr app_metadata, + ipc::IpcPayload ipc_message) + : descriptor(std::move(descriptor)), + app_metadata(std::move(app_metadata)), + ipc_message(std::move(ipc_message)) {} + /// \brief Check that the payload can be written to the wire. Status Validate() const; }; -/// \brief Schema result returned after a schema request RPC -struct ARROW_FLIGHT_EXPORT SchemaResult { - public: - SchemaResult() = default; - explicit SchemaResult(std::string schema) : raw_schema_(std::move(schema)) {} +// A wrapper around arrow.flight.protocol.PutResult is not defined - /// \brief Factory method to construct a SchemaResult. - static arrow::Result> Make(const Schema& schema); +// Session management messages - /// \brief return schema - /// \param[in,out] dictionary_memo for dictionary bookkeeping, will - /// be modified - /// \return Arrow result with the reconstructed Schema - arrow::Result> GetSchema( - ipc::DictionaryMemo* dictionary_memo) const; - - const std::string& serialized_schema() const { return raw_schema_; } - - std::string ToString() const; - bool Equals(const SchemaResult& other) const; - - friend bool operator==(const SchemaResult& left, const SchemaResult& right) { - return left.Equals(right); - } - friend bool operator!=(const SchemaResult& left, const SchemaResult& right) { - return !(left == right); - } - - /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; - - /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); - - private: - std::string raw_schema_; -}; - -/// \brief The access coordinates for retrieval of a dataset, returned by -/// GetFlightInfo -class ARROW_FLIGHT_EXPORT FlightInfo { - public: - struct Data { - std::string schema; - FlightDescriptor descriptor; - std::vector endpoints; - int64_t total_records = -1; - int64_t total_bytes = -1; - bool ordered = false; - std::string app_metadata; - }; - - explicit FlightInfo(Data data) : data_(std::move(data)), reconstructed_schema_(false) {} - - /// \brief Factory method to construct a FlightInfo. - static arrow::Result Make(const Schema& schema, - const FlightDescriptor& descriptor, - const std::vector& endpoints, - int64_t total_records, int64_t total_bytes, - bool ordered = false, - std::string app_metadata = ""); - - /// \brief Deserialize the Arrow schema of the dataset. Populate any - /// dictionary encoded fields into a DictionaryMemo for - /// bookkeeping - /// \param[in,out] dictionary_memo for dictionary bookkeeping, will - /// be modified - /// \return Arrow result with the reconstructed Schema - arrow::Result> GetSchema( - ipc::DictionaryMemo* dictionary_memo) const; - - const std::string& serialized_schema() const { return data_.schema; } - - /// The descriptor associated with this flight, may not be set - const FlightDescriptor& descriptor() const { return data_.descriptor; } - - /// A list of endpoints associated with the flight (dataset). To consume the - /// whole flight, all endpoints must be consumed - const std::vector& endpoints() const { return data_.endpoints; } - - /// The total number of records (rows) in the dataset. If unknown, set to -1 - int64_t total_records() const { return data_.total_records; } - - /// The total number of bytes in the dataset. If unknown, set to -1 - int64_t total_bytes() const { return data_.total_bytes; } - - /// Whether endpoints are in the same order as the data. - bool ordered() const { return data_.ordered; } - - /// Application-defined opaque metadata - const std::string& app_metadata() const { return data_.app_metadata; } - - /// \brief Get the wire-format representation of this type. - /// - /// Useful when interoperating with non-Flight systems (e.g. REST - /// services) that may want to return Flight types. - arrow::Result SerializeToString() const; - - /// \brief Parse the wire-format representation of this type. - /// - /// Useful when interoperating with non-Flight systems (e.g. REST - /// services) that may want to return Flight types. - static arrow::Result> Deserialize( - std::string_view serialized); - - std::string ToString() const; - - /// Compare two FlightInfo for equality. This will compare the - /// serialized schema representations, NOT the logical equality of - /// the schemas. - bool Equals(const FlightInfo& other) const; - - friend bool operator==(const FlightInfo& left, const FlightInfo& right) { - return left.Equals(right); - } - friend bool operator!=(const FlightInfo& left, const FlightInfo& right) { - return !(left == right); - } - - private: - Data data_; - mutable std::shared_ptr schema_; - mutable bool reconstructed_schema_; -}; - -/// \brief The information to process a long-running query. -class ARROW_FLIGHT_EXPORT PollInfo { - public: - /// The currently available results so far. - std::unique_ptr info = NULLPTR; - /// The descriptor the client should use on the next try. If unset, - /// the query is complete. - std::optional descriptor = std::nullopt; - /// Query progress. Must be in [0.0, 1.0] but need not be - /// monotonic or nondecreasing. If unknown, do not set. - std::optional progress = std::nullopt; - /// Expiration time for this request. After this passes, the server - /// might not accept the poll descriptor anymore (and the query may - /// be cancelled). This may be updated on a call to PollFlightInfo. - std::optional expiration_time = std::nullopt; - - PollInfo() - : info(NULLPTR), - descriptor(std::nullopt), - progress(std::nullopt), - expiration_time(std::nullopt) {} - - explicit PollInfo(std::unique_ptr info, - std::optional descriptor, - std::optional progress, - std::optional expiration_time) - : info(std::move(info)), - descriptor(std::move(descriptor)), - progress(progress), - expiration_time(expiration_time) {} - - // Must not be explicit; to declare one we must declare all ("rule of five") - PollInfo(const PollInfo& other) // NOLINT(runtime/explicit) - : info(other.info ? std::make_unique(*other.info) : NULLPTR), - descriptor(other.descriptor), - progress(other.progress), - expiration_time(other.expiration_time) {} - PollInfo(PollInfo&& other) noexcept = default; // NOLINT(runtime/explicit) - ~PollInfo() = default; - PollInfo& operator=(const PollInfo& other) { - info = other.info ? std::make_unique(*other.info) : NULLPTR; - descriptor = other.descriptor; - progress = other.progress; - expiration_time = other.expiration_time; - return *this; - } - PollInfo& operator=(PollInfo&& other) = default; - - /// \brief Get the wire-format representation of this type. - /// - /// Useful when interoperating with non-Flight systems (e.g. REST - /// services) that may want to return Flight types. - arrow::Result SerializeToString() const; - - /// \brief Parse the wire-format representation of this type. - /// - /// Useful when interoperating with non-Flight systems (e.g. REST - /// services) that may want to return Flight types. - static arrow::Result> Deserialize( - std::string_view serialized); - - std::string ToString() const; - - /// Compare two PollInfo for equality. This will compare the - /// serialized schema representations, NOT the logical equality of - /// the schemas. - bool Equals(const PollInfo& other) const; +/// \brief Variant supporting all possible value types for {Set,Get}SessionOptions +/// +/// By convention, an attempt to set a valueless (std::monostate) SessionOptionValue +/// should attempt to unset or clear the named option value on the server. +using SessionOptionValue = std::variant>; +std::ostream& operator<<(std::ostream& os, const SessionOptionValue& v); - friend bool operator==(const PollInfo& left, const PollInfo& right) { - return left.Equals(right); - } - friend bool operator!=(const PollInfo& left, const PollInfo& right) { - return !(left == right); - } -}; +/// \brief A request to set a set of session options by name/value. +struct ARROW_FLIGHT_EXPORT SetSessionOptionsRequest + : public internal::BaseType { + std::map session_options; -/// \brief The request of the CancelFlightInfoRequest action. -struct ARROW_FLIGHT_EXPORT CancelFlightInfoRequest { - std::unique_ptr info; + SetSessionOptionsRequest() = default; + explicit SetSessionOptionsRequest( + std::map session_options) + : session_options(std::move(session_options)) {} std::string ToString() const; - bool Equals(const CancelFlightInfoRequest& other) const; + bool Equals(const SetSessionOptionsRequest& other) const; - friend bool operator==(const CancelFlightInfoRequest& left, - const CancelFlightInfoRequest& right) { - return left.Equals(right); - } - friend bool operator!=(const CancelFlightInfoRequest& left, - const CancelFlightInfoRequest& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, + SetSessionOptionsRequest* out); }; -/// \brief Variant supporting all possible value types for {Set,Get}SessionOptions -/// -/// By convention, an attempt to set a valueless (std::monostate) SessionOptionValue -/// should attempt to unset or clear the named option value on the server. -using SessionOptionValue = std::variant>; - /// \brief The result of setting a session option. enum class SetSessionOptionErrorValue : int8_t { /// \brief The status of setting the option is unknown. @@ -797,54 +956,9 @@ enum class SetSessionOptionErrorValue : int8_t { std::string ToString(const SetSessionOptionErrorValue& error_value); std::ostream& operator<<(std::ostream& os, const SetSessionOptionErrorValue& error_value); -/// \brief The result of closing a session. -enum class CloseSessionStatus : int8_t { - // \brief The session close status is unknown. - // - // Servers should avoid using this value (send a NOT_FOUND error if the requested - // session is not known). Clients can retry the request. - kUnspecified, - // \brief The session close request is complete. - // - // Subsequent requests with the same session produce a NOT_FOUND error. - kClosed, - // \brief The session close request is in progress. - // - // The client may retry the request. - kClosing, - // \brief The session is not closeable. - // - // The client should not retry the request. - kNotClosable -}; -std::string ToString(const CloseSessionStatus& status); -std::ostream& operator<<(std::ostream& os, const CloseSessionStatus& status); - -/// \brief A request to set a set of session options by name/value. -struct ARROW_FLIGHT_EXPORT SetSessionOptionsRequest { - std::map session_options; - - std::string ToString() const; - bool Equals(const SetSessionOptionsRequest& other) const; - - friend bool operator==(const SetSessionOptionsRequest& left, - const SetSessionOptionsRequest& right) { - return left.Equals(right); - } - friend bool operator!=(const SetSessionOptionsRequest& left, - const SetSessionOptionsRequest& right) { - return !(left == right); - } - - /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; - - /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); -}; - /// \brief The result(s) of setting session option(s). -struct ARROW_FLIGHT_EXPORT SetSessionOptionsResult { +struct ARROW_FLIGHT_EXPORT SetSessionOptionsResult + : public internal::BaseType { struct Error { SetSessionOptionErrorValue value; @@ -859,113 +973,152 @@ struct ARROW_FLIGHT_EXPORT SetSessionOptionsResult { std::map errors; + SetSessionOptionsResult() = default; + SetSessionOptionsResult(std::map errors) // NOLINT runtime/explicit + : errors(std::move(errors)) {} + std::string ToString() const; bool Equals(const SetSessionOptionsResult& other) const; - friend bool operator==(const SetSessionOptionsResult& left, - const SetSessionOptionsResult& right) { - return left.Equals(right); - } - friend bool operator!=(const SetSessionOptionsResult& left, - const SetSessionOptionsResult& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, + SetSessionOptionsResult* out); }; /// \brief A request to get current session options. -struct ARROW_FLIGHT_EXPORT GetSessionOptionsRequest { +struct ARROW_FLIGHT_EXPORT GetSessionOptionsRequest + : public internal::BaseType { + GetSessionOptionsRequest() = default; + std::string ToString() const; bool Equals(const GetSessionOptionsRequest& other) const; - friend bool operator==(const GetSessionOptionsRequest& left, - const GetSessionOptionsRequest& right) { - return left.Equals(right); - } - friend bool operator!=(const GetSessionOptionsRequest& left, - const GetSessionOptionsRequest& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, + GetSessionOptionsRequest* out); }; /// \brief The current session options. -struct ARROW_FLIGHT_EXPORT GetSessionOptionsResult { +struct ARROW_FLIGHT_EXPORT GetSessionOptionsResult + : public internal::BaseType { std::map session_options; + GetSessionOptionsResult() = default; + GetSessionOptionsResult( // NOLINT runtime/explicit + std::map session_options) + : session_options(std::move(session_options)) {} + std::string ToString() const; bool Equals(const GetSessionOptionsResult& other) const; - friend bool operator==(const GetSessionOptionsResult& left, - const GetSessionOptionsResult& right) { - return left.Equals(right); - } - friend bool operator!=(const GetSessionOptionsResult& left, - const GetSessionOptionsResult& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, + GetSessionOptionsResult* out); }; /// \brief A request to close the open client session. -struct ARROW_FLIGHT_EXPORT CloseSessionRequest { +struct ARROW_FLIGHT_EXPORT CloseSessionRequest + : public internal::BaseType { + CloseSessionRequest() = default; + std::string ToString() const; bool Equals(const CloseSessionRequest& other) const; - friend bool operator==(const CloseSessionRequest& left, - const CloseSessionRequest& right) { - return left.Equals(right); - } - friend bool operator!=(const CloseSessionRequest& left, - const CloseSessionRequest& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, CloseSessionRequest* out); +}; + +/// \brief The result of closing a session. +enum class CloseSessionStatus : int8_t { + // \brief The session close status is unknown. + // + // Servers should avoid using this value (send a NOT_FOUND error if the requested + // session is not known). Clients can retry the request. + kUnspecified, + // \brief The session close request is complete. + // + // Subsequent requests with the same session produce a NOT_FOUND error. + kClosed, + // \brief The session close request is in progress. + // + // The client may retry the request. + kClosing, + // \brief The session is not closeable. + // + // The client should not retry the request. + kNotClosable }; +std::string ToString(const CloseSessionStatus& status); +std::ostream& operator<<(std::ostream& os, const CloseSessionStatus& status); /// \brief The result of attempting to close the client session. -struct ARROW_FLIGHT_EXPORT CloseSessionResult { +struct ARROW_FLIGHT_EXPORT CloseSessionResult + : public internal::BaseType { CloseSessionStatus status; + CloseSessionResult() = default; + CloseSessionResult(CloseSessionStatus status) // NOLINT runtime/explicit + : status(status) {} + std::string ToString() const; bool Equals(const CloseSessionResult& other) const; - friend bool operator==(const CloseSessionResult& left, - const CloseSessionResult& right) { - return left.Equals(right); - } - friend bool operator!=(const CloseSessionResult& left, - const CloseSessionResult& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, CloseSessionResult* out); }; +//------------------------------------------------------------ + /// \brief An iterator to FlightInfo instances returned by ListFlights. class ARROW_FLIGHT_EXPORT FlightListing { public: diff --git a/cpp/src/arrow/gpu/cuda_test.cc b/cpp/src/arrow/gpu/cuda_test.cc index 4c450bf389919..4c9b961fa1465 100644 --- a/cpp/src/arrow/gpu/cuda_test.cc +++ b/cpp/src/arrow/gpu/cuda_test.cc @@ -679,6 +679,49 @@ TEST_F(TestCudaArrowIpc, BasicWriteRead) { CompareBatch(*batch, *cpu_batch); } +TEST_F(TestCudaArrowIpc, WriteIpcString) { + auto values = ArrayFromJSON(utf8(), R"(["foo", null, "quux"])"); + ASSERT_OK_AND_ASSIGN(auto values_device, values->CopyTo(mm_)); + auto batch = RecordBatch::Make(schema({field("vals", utf8())}), 3, + {values_device->data()}, DeviceAllocationType::kCUDA); + + ipc::IpcPayload payload; + ASSERT_OK( + ipc::GetRecordBatchPayload(*batch, ipc::IpcWriteOptions::Defaults(), &payload)); + + ASSERT_EQ(values_device->data()->buffers[0]->address(), + payload.body_buffers[0]->address()); + ASSERT_EQ(values_device->data()->buffers[1]->address(), + payload.body_buffers[1]->address()); +} + +TEST_F(TestCudaArrowIpc, WriteIpcList) { + auto values = + ArrayFromJSON(list(utf8()), R"([["foo", null], null, ["quux", "bar", "baz"]])"); + ASSERT_OK_AND_ASSIGN(auto values_device, values->CopyTo(mm_)); + auto batch = RecordBatch::Make(schema({field("vals", list(utf8()))}), 3, + {values_device->data()}, DeviceAllocationType::kCUDA); + + ipc::IpcPayload payload; + ASSERT_OK( + ipc::GetRecordBatchPayload(*batch, ipc::IpcWriteOptions::Defaults(), &payload)); + + ASSERT_EQ(values_device->data()->buffers[0]->address(), + payload.body_buffers[0]->address()); +} + +TEST_F(TestCudaArrowIpc, WriteIpcSlicedRecord) { + std::shared_ptr batch; + ASSERT_OK(ipc::test::MakeListRecordBatch(&batch)); + + ASSERT_OK_AND_ASSIGN(auto batch_device, batch->CopyTo(mm_)); + auto sliced_batch_device = batch_device->Slice(10); + + ipc::IpcPayload payload; + ASSERT_NOT_OK(ipc::GetRecordBatchPayload(*sliced_batch_device, + ipc::IpcWriteOptions::Defaults(), &payload)); +} + TEST_F(TestCudaArrowIpc, DictionaryWriteRead) { std::shared_ptr batch; ASSERT_OK(ipc::test::MakeDictionary(&batch)); diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc index 549fb34a2e880..f603e60c66555 100644 --- a/cpp/src/arrow/ipc/writer.cc +++ b/cpp/src/arrow/ipc/writer.cc @@ -154,6 +154,11 @@ class RecordBatchSerializer { return Status::CapacityError("Cannot write arrays larger than 2^31 - 1 in length"); } + if (arr.offset() != 0 && arr.device_type() != DeviceAllocationType::kCPU) { + // https://github.com/apache/arrow/issues/43029 + return Status::NotImplemented("Cannot compute null count for non-cpu sliced array"); + } + // push back all common elements field_nodes_.push_back({arr.length(), arr.null_count(), 0}); @@ -449,14 +454,22 @@ class RecordBatchSerializer { template enable_if_base_binary Visit(const T& array) { + using offset_type = typename T::offset_type; + std::shared_ptr value_offsets; RETURN_NOT_OK(GetZeroBasedValueOffsets(array, &value_offsets)); auto data = array.value_data(); int64_t total_data_bytes = 0; - if (value_offsets) { - total_data_bytes = array.value_offset(array.length()) - array.value_offset(0); + if (value_offsets && array.length() > 0) { + offset_type last_offset_value; + RETURN_NOT_OK(MemoryManager::CopyBufferSliceToCPU( + value_offsets, array.length() * sizeof(offset_type), sizeof(offset_type), + reinterpret_cast(&last_offset_value))); + + total_data_bytes = last_offset_value; } + if (NeedTruncate(array.offset(), data.get(), total_data_bytes)) { // Slice the data buffer to include only the range we need now const int64_t start_offset = array.value_offset(0); @@ -495,8 +508,15 @@ class RecordBatchSerializer { offset_type values_offset = 0; offset_type values_length = 0; if (value_offsets) { - values_offset = array.value_offset(0); - values_length = array.value_offset(array.length()) - values_offset; + RETURN_NOT_OK(MemoryManager::CopyBufferSliceToCPU( + array.value_offsets(), array.offset() * sizeof(offset_type), + sizeof(offset_type), reinterpret_cast(&values_offset))); + offset_type last_values_offset = 0; + RETURN_NOT_OK(MemoryManager::CopyBufferSliceToCPU( + array.value_offsets(), (array.offset() + array.length()) * sizeof(offset_type), + sizeof(offset_type), reinterpret_cast(&last_values_offset))); + + values_length = last_values_offset - values_offset; } if (array.offset() != 0 || values_length < values->length()) { diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc index 2f8ce3a6fa8c7..1e855311a98ed 100644 --- a/cpp/src/arrow/memory_pool.cc +++ b/cpp/src/arrow/memory_pool.cc @@ -85,19 +85,17 @@ struct SupportedBackend { const std::vector& SupportedBackends() { static std::vector backends = { - // ARROW-12316: Apple => mimalloc first, then jemalloc - // non-Apple => jemalloc first, then mimalloc -#if defined(ARROW_JEMALLOC) && !defined(__APPLE__) - {"jemalloc", MemoryPoolBackend::Jemalloc}, -#endif + // mimalloc is our preferred allocator for several reasons: + // 1) it has good performance + // 2) it is well-supported on all our main platforms (Linux, macOS, Windows) + // 3) it is easy to configure and has a consistent API. #ifdef ARROW_MIMALLOC - {"mimalloc", MemoryPoolBackend::Mimalloc}, + {"mimalloc", MemoryPoolBackend::Mimalloc}, #endif -#if defined(ARROW_JEMALLOC) && defined(__APPLE__) - {"jemalloc", MemoryPoolBackend::Jemalloc}, +#ifdef ARROW_JEMALLOC + {"jemalloc", MemoryPoolBackend::Jemalloc}, #endif - {"system", MemoryPoolBackend::System} - }; + {"system", MemoryPoolBackend::System}}; return backends; } diff --git a/cpp/src/arrow/util/bit_stream_utils.h b/cpp/src/arrow/util/bit_stream_utils_internal.h similarity index 100% rename from cpp/src/arrow/util/bit_stream_utils.h rename to cpp/src/arrow/util/bit_stream_utils_internal.h diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index e026dfec24065..c7674af57f167 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -43,7 +43,7 @@ #include "arrow/testing/util.h" #include "arrow/type_fwd.h" #include "arrow/util/bit_run_reader.h" -#include "arrow/util/bit_stream_utils.h" +#include "arrow/util/bit_stream_utils_internal.h" #include "arrow/util/bitmap.h" #include "arrow/util/bitmap_generate.h" #include "arrow/util/bitmap_ops.h" diff --git a/cpp/src/arrow/util/decimal_benchmark.cc b/cpp/src/arrow/util/decimal_benchmark.cc index d505532d71da1..fd77f451d3a05 100644 --- a/cpp/src/arrow/util/decimal_benchmark.cc +++ b/cpp/src/arrow/util/decimal_benchmark.cc @@ -77,7 +77,7 @@ static void ToString(benchmark::State& state) { // NOLINT non-const reference state.SetItemsProcessed(state.iterations() * values.size()); } -constexpr int32_t kValueSize = 10; +constexpr int32_t kValueSize = 12; static void BinaryCompareOp(benchmark::State& state) { // NOLINT non-const reference std::vector v1, v2; @@ -85,6 +85,8 @@ static void BinaryCompareOp(benchmark::State& state) { // NOLINT non-const refe v1.emplace_back(100 + x, 100 + x); v2.emplace_back(200 + x, 200 + x); } + static_assert(kValueSize % 4 == 0, + "kValueSize needs to be a multiple of 4 to avoid out-of-bounds accesses"); for (auto _ : state) { for (int x = 0; x < kValueSize; x += 4) { auto equal = v1[x] == v2[x]; @@ -93,7 +95,7 @@ static void BinaryCompareOp(benchmark::State& state) { // NOLINT non-const refe benchmark::DoNotOptimize(less_than_or_equal); auto greater_than_or_equal1 = v1[x + 2] >= v2[x + 2]; benchmark::DoNotOptimize(greater_than_or_equal1); - auto greater_than_or_equal2 = v1[x + 3] >= v1[x + 3]; + auto greater_than_or_equal2 = v1[x + 3] >= v2[x + 3]; benchmark::DoNotOptimize(greater_than_or_equal2); } } @@ -106,6 +108,8 @@ static void BinaryCompareOpConstant( for (int x = 0; x < kValueSize; x++) { v1.emplace_back(100 + x, 100 + x); } + static_assert(kValueSize % 4 == 0, + "kValueSize needs to be a multiple of 4 to avoid out-of-bounds accesses"); BasicDecimal128 constant(313, 212); for (auto _ : state) { for (int x = 0; x < kValueSize; x += 4) { @@ -245,6 +249,8 @@ static void UnaryOp(benchmark::State& state) { // NOLINT non-const reference v.emplace_back(100 + x, 100 + x); } + static_assert(kValueSize % 2 == 0, + "kValueSize needs to be a multiple of 2 to avoid out-of-bounds accesses"); for (auto _ : state) { for (int x = 0; x < kValueSize; x += 2) { auto abs = v[x].Abs(); @@ -274,6 +280,8 @@ static void BinaryBitOp(benchmark::State& state) { // NOLINT non-const referenc v2.emplace_back(200 + x, 200 + x); } + static_assert(kValueSize % 2 == 0, + "kValueSize needs to be a multiple of 2 to avoid out-of-bounds accesses"); for (auto _ : state) { for (int x = 0; x < kValueSize; x += 2) { benchmark::DoNotOptimize(v1[x] |= v2[x]); diff --git a/cpp/src/arrow/util/logging.h b/cpp/src/arrow/util/logging.h index 2a2175ec0fc72..be73c020c07f8 100644 --- a/cpp/src/arrow/util/logging.h +++ b/cpp/src/arrow/util/logging.h @@ -138,14 +138,31 @@ enum class ArrowLogLevel : int { #endif // NDEBUG +// These are internal-use macros and should not be used in public headers. +#ifndef DCHECK #define DCHECK ARROW_DCHECK +#endif +#ifndef DCHECK_OK #define DCHECK_OK ARROW_DCHECK_OK +#endif +#ifndef DCHECK_EQ #define DCHECK_EQ ARROW_DCHECK_EQ +#endif +#ifndef DCHECK_NE #define DCHECK_NE ARROW_DCHECK_NE +#endif +#ifndef DCHECK_LE #define DCHECK_LE ARROW_DCHECK_LE +#endif +#ifndef DCHECK_LT #define DCHECK_LT ARROW_DCHECK_LT +#endif +#ifndef DCHECK_GE #define DCHECK_GE ARROW_DCHECK_GE +#endif +#ifndef DCHECK_GT #define DCHECK_GT ARROW_DCHECK_GT +#endif // This code is adapted from // https://github.com/ray-project/ray/blob/master/src/ray/util/logging.h. diff --git a/cpp/src/arrow/util/rle_encoding.h b/cpp/src/arrow/util/rle_encoding_internal.h similarity index 99% rename from cpp/src/arrow/util/rle_encoding.h rename to cpp/src/arrow/util/rle_encoding_internal.h index e0f5690062a04..4575320659706 100644 --- a/cpp/src/arrow/util/rle_encoding.h +++ b/cpp/src/arrow/util/rle_encoding_internal.h @@ -27,7 +27,7 @@ #include "arrow/util/bit_block_counter.h" #include "arrow/util/bit_run_reader.h" -#include "arrow/util/bit_stream_utils.h" +#include "arrow/util/bit_stream_utils_internal.h" #include "arrow/util/bit_util.h" #include "arrow/util/macros.h" diff --git a/cpp/src/arrow/util/rle_encoding_test.cc b/cpp/src/arrow/util/rle_encoding_test.cc index 26984e5f7735d..0cc0a276a25f4 100644 --- a/cpp/src/arrow/util/rle_encoding_test.cc +++ b/cpp/src/arrow/util/rle_encoding_test.cc @@ -28,10 +28,10 @@ #include "arrow/buffer.h" #include "arrow/testing/random.h" #include "arrow/type.h" -#include "arrow/util/bit_stream_utils.h" +#include "arrow/util/bit_stream_utils_internal.h" #include "arrow/util/bit_util.h" #include "arrow/util/io_util.h" -#include "arrow/util/rle_encoding.h" +#include "arrow/util/rle_encoding_internal.h" namespace arrow { namespace util { diff --git a/cpp/src/arrow/util/tdigest.h b/cpp/src/arrow/util/tdigest.h index 308df468840eb..ea033ed696d1b 100644 --- a/cpp/src/arrow/util/tdigest.h +++ b/cpp/src/arrow/util/tdigest.h @@ -56,7 +56,7 @@ class ARROW_EXPORT TDigest { // this function is intensively called and performance critical // call it only if you are sure no NAN exists in input data void Add(double value) { - DCHECK(!std::isnan(value)) << "cannot add NAN"; + ARROW_DCHECK(!std::isnan(value)) << "cannot add NAN"; if (ARROW_PREDICT_FALSE(input_.size() == input_.capacity())) { MergeInput(); } diff --git a/cpp/src/arrow/util/vector.h b/cpp/src/arrow/util/vector.h index 74b6a2403a2bb..e77d713a44d01 100644 --- a/cpp/src/arrow/util/vector.h +++ b/cpp/src/arrow/util/vector.h @@ -31,8 +31,8 @@ namespace internal { template std::vector DeleteVectorElement(const std::vector& values, size_t index) { - DCHECK(!values.empty()); - DCHECK_LT(index, values.size()); + ARROW_DCHECK(!values.empty()); + ARROW_DCHECK_LT(index, values.size()); std::vector out; out.reserve(values.size() - 1); for (size_t i = 0; i < index; ++i) { @@ -47,7 +47,7 @@ std::vector DeleteVectorElement(const std::vector& values, size_t index) { template std::vector AddVectorElement(const std::vector& values, size_t index, T new_element) { - DCHECK_LE(index, values.size()); + ARROW_DCHECK_LE(index, values.size()); std::vector out; out.reserve(values.size() + 1); for (size_t i = 0; i < index; ++i) { @@ -63,7 +63,7 @@ std::vector AddVectorElement(const std::vector& values, size_t index, template std::vector ReplaceVectorElement(const std::vector& values, size_t index, T new_element) { - DCHECK_LE(index, values.size()); + ARROW_DCHECK_LE(index, values.size()); std::vector out; out.reserve(values.size()); for (size_t i = 0; i < index; ++i) { diff --git a/cpp/src/arrow/vendored/datetime/README.md b/cpp/src/arrow/vendored/datetime/README.md index 0dd663c5e5acc..5a0993b7b4336 100644 --- a/cpp/src/arrow/vendored/datetime/README.md +++ b/cpp/src/arrow/vendored/datetime/README.md @@ -17,7 +17,7 @@ copies or substantial portions of the Software. Sources for datetime are adapted from Howard Hinnant's date library (https://github.com/HowardHinnant/date). -Sources are taken from changeset cc4685a21e4a4fdae707ad1233c61bbaff241f93 +Sources are taken from changeset 1ead6715dec030d340a316c927c877a3c4e5a00c of the above project. The following changes are made: diff --git a/cpp/src/arrow/vendored/datetime/date.h b/cpp/src/arrow/vendored/datetime/date.h index fd2569c6de0f6..75e2624296672 100644 --- a/cpp/src/arrow/vendored/datetime/date.h +++ b/cpp/src/arrow/vendored/datetime/date.h @@ -4230,7 +4230,7 @@ inline std::basic_ostream& operator<<(std::basic_ostream& os, const local_time& ut) { - return (os << sys_time{ut.time_since_epoch()}); + return (date::operator<<(os, sys_time{ut.time_since_epoch()})); } namespace detail @@ -6353,7 +6353,10 @@ read_signed(std::basic_istream& is, unsigned m = 1, unsigned M = if (('0' <= c && c <= '9') || c == '-' || c == '+') { if (c == '-' || c == '+') + { (void)is.get(); + --M; + } auto x = static_cast(read_unsigned(is, std::max(m, 1u), M)); if (!is.fail()) { @@ -6526,7 +6529,14 @@ read(std::basic_istream& is, int a0, Args&& ...args) *e++ = static_cast(CharT(u % 10) + CharT{'0'}); u /= 10; } while (u > 0); +#if defined(__GNUC__) && __GNUC__ >= 11 +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstringop-overflow" +#endif std::reverse(buf, e); +#if defined(__GNUC__) && __GNUC__ >= 11 +#pragma GCC diagnostic pop +#endif for (auto p = buf; p != e && is.rdstate() == std::ios::goodbit; ++p) read(is, *p); } @@ -6592,7 +6602,7 @@ from_stream(std::basic_istream& is, const CharT* fmt, CONSTDATA int not_a_year = numeric_limits::min(); CONSTDATA int not_a_2digit_year = 100; - CONSTDATA int not_a_century = not_a_year / 100; + CONSTDATA int not_a_century = numeric_limits::min(); CONSTDATA int not_a_month = 0; CONSTDATA int not_a_day = 0; CONSTDATA int not_a_hour = numeric_limits::min(); @@ -7519,7 +7529,12 @@ from_stream(std::basic_istream& is, const CharT* fmt, { auto c = static_cast(Traits::to_char_type(ic)); if (c == '-') + { neg = true; + (void)is.get(); + } + else if (c == '+') + (void)is.get(); } if (modified == CharT{}) { @@ -7735,9 +7750,7 @@ from_stream(std::basic_istream& is, const CharT* fmt, year_month_day ymd_trial = sys_days(year{Y}/January/Sunday[1]) + weeks{U-1} + (weekday{static_cast(wd)} - Sunday); - if (Y == not_a_year) - Y = static_cast(ymd_trial.year()); - else if (year{Y} != ymd_trial.year()) + if (year{Y} != ymd_trial.year()) goto broken; if (m == not_a_month) m = static_cast(static_cast(ymd_trial.month())); @@ -7754,9 +7767,7 @@ from_stream(std::basic_istream& is, const CharT* fmt, year_month_day ymd_trial = sys_days(year{Y}/January/Monday[1]) + weeks{W-1} + (weekday{static_cast(wd)} - Monday); - if (Y == not_a_year) - Y = static_cast(ymd_trial.year()); - else if (year{Y} != ymd_trial.year()) + if (year{Y} != ymd_trial.year()) goto broken; if (m == not_a_month) m = static_cast(static_cast(ymd_trial.month())); diff --git a/cpp/src/arrow/vendored/datetime/tz.cpp b/cpp/src/arrow/vendored/datetime/tz.cpp index e94c1bc8ae682..44c627775f3d7 100644 --- a/cpp/src/arrow/vendored/datetime/tz.cpp +++ b/cpp/src/arrow/vendored/datetime/tz.cpp @@ -96,6 +96,10 @@ # define TARGET_OS_SIMULATOR 0 #endif +#if defined(ANDROID) || defined(__ANDROID__) +#include +#endif + #if USE_OS_TZDB # include #endif @@ -2709,7 +2713,8 @@ operator<<(std::ostream& os, const time_zone& z) os.width(8); os << s.format_ << " "; os << s.until_year_ << ' ' << s.until_date_; - os << " " << s.until_utc_ << " UTC"; + os << " "; + date::operator<<(os, s.until_utc_) << " UTC"; os << " " << s.until_std_ << " STD"; os << " " << s.until_loc_; os << " " << make_time(s.initial_save_); @@ -2734,8 +2739,7 @@ operator<<(std::ostream& os, const time_zone& z) std::ostream& operator<<(std::ostream& os, const leap_second& x) { - using namespace date; - return os << x.date_ << " +"; + return date::operator<<(os, x.date_) << " +"; } #if USE_OS_TZDB @@ -3716,6 +3720,67 @@ get_tzdb() return get_tzdb_list().front(); } +namespace { + +class recursion_limiter +{ + unsigned depth_ = 0; + unsigned limit_; + + class restore_recursion_depth; + +public: + recursion_limiter(recursion_limiter const&) = delete; + recursion_limiter& operator=(recursion_limiter const&) = delete; + + explicit constexpr recursion_limiter(unsigned limit) noexcept; + + restore_recursion_depth count(); +}; + +class recursion_limiter::restore_recursion_depth +{ + recursion_limiter* rc_; + +public: + ~restore_recursion_depth(); + restore_recursion_depth(restore_recursion_depth&&) = default; + + explicit restore_recursion_depth(recursion_limiter* rc) noexcept; +}; + +inline +recursion_limiter::restore_recursion_depth::~restore_recursion_depth() +{ + --(rc_->depth_); +} + +inline +recursion_limiter::restore_recursion_depth::restore_recursion_depth(recursion_limiter* rc) + noexcept + : rc_{rc} +{} + +inline +constexpr +recursion_limiter::recursion_limiter(unsigned limit) noexcept + : limit_{limit} +{ +} + +inline +recursion_limiter::restore_recursion_depth +recursion_limiter::count() +{ + ++depth_; + if (depth_ > limit_) + throw std::runtime_error("recursion limit of " + + std::to_string(limit_) + " exceeded"); + return restore_recursion_depth{this}; +} + +} // unnamed namespace + const time_zone* #if HAS_STRING_VIEW tzdb::locate_zone(std::string_view tz_name) const @@ -3723,6 +3788,10 @@ tzdb::locate_zone(std::string_view tz_name) const tzdb::locate_zone(const std::string& tz_name) const #endif { + // If a link-to-link chain exceeds this limit, give up + thread_local recursion_limiter rc{10}; + auto restore_count = rc.count(); + auto zi = std::lower_bound(zones.begin(), zones.end(), tz_name, #if HAS_STRING_VIEW [](const time_zone& z, const std::string_view& nm) @@ -3746,13 +3815,7 @@ tzdb::locate_zone(const std::string& tz_name) const }); if (li != links.end() && li->name() == tz_name) { - zi = std::lower_bound(zones.begin(), zones.end(), li->target(), - [](const time_zone& z, const std::string& nm) - { - return z.name() < nm; - }); - if (zi != zones.end() && zi->name() == li->target()) - return &*zi; + return locate_zone(li->target()); } #endif // !USE_OS_TZDB throw std::runtime_error(std::string(tz_name) + " not found in timezone database"); @@ -4038,6 +4101,18 @@ tzdb::current_zone() const if (!result.empty()) return locate_zone(result); #endif + // Fall through to try other means. + } + { + // On Android, it is not possible to use file based approach either, + // we have to ask the value of `persist.sys.timezone` system property +#if defined(ANDROID) || defined(__ANDROID__) + char sys_timezone[PROP_VALUE_MAX]; + if (__system_property_get("persist.sys.timezone", sys_timezone) > 0) + { + return locate_zone(sys_timezone); + } +#endif // defined(ANDROID) || defined(__ANDROID__) // Fall through to try other means. } { diff --git a/cpp/src/arrow/vendored/datetime/tz.h b/cpp/src/arrow/vendored/datetime/tz.h index 467db6d199793..df6d1a851ac9d 100644 --- a/cpp/src/arrow/vendored/datetime/tz.h +++ b/cpp/src/arrow/vendored/datetime/tz.h @@ -239,8 +239,8 @@ nonexistent_local_time::make_msg(local_time tp, const local_info& i) << i.first.abbrev << " and\n" << local_seconds{i.second.begin.time_since_epoch()} + i.second.offset << ' ' << i.second.abbrev - << " which are both equivalent to\n" - << i.first.end << " UTC"; + << " which are both equivalent to\n"; + date::operator<<(os, i.first.end) << " UTC"; return os.str(); } diff --git a/cpp/src/arrow/vendored/datetime/tz_private.h b/cpp/src/arrow/vendored/datetime/tz_private.h index 6b7a91493e103..a6bb8fd30a0c7 100644 --- a/cpp/src/arrow/vendored/datetime/tz_private.h +++ b/cpp/src/arrow/vendored/datetime/tz_private.h @@ -291,8 +291,7 @@ struct transition std::ostream& operator<<(std::ostream& os, const transition& t) { - using date::operator<<; - os << t.timepoint << "Z "; + date::operator<<(os, t.timepoint) << "Z "; if (t.info->offset >= std::chrono::seconds{0}) os << '+'; os << make_time(t.info->offset); diff --git a/cpp/src/gandiva/dex_visitor.h b/cpp/src/gandiva/dex_visitor.h index 5d160bb22ca68..4115df7ffb22b 100644 --- a/cpp/src/gandiva/dex_visitor.h +++ b/cpp/src/gandiva/dex_visitor.h @@ -70,7 +70,7 @@ class GANDIVA_EXPORT DexVisitor { /// Default implementation with only DCHECK(). #define VISIT_DCHECK(DEX_CLASS) \ - void Visit(const DEX_CLASS& dex) override { DCHECK(0); } + void Visit(const DEX_CLASS& dex) override { ARROW_DCHECK(0); } class GANDIVA_EXPORT DexDefaultVisitor : public DexVisitor { VISIT_DCHECK(VectorReadValidityDex) diff --git a/cpp/src/gandiva/engine.h b/cpp/src/gandiva/engine.h index 565c3f142502d..3a69500e38bcf 100644 --- a/cpp/src/gandiva/engine.h +++ b/cpp/src/gandiva/engine.h @@ -67,7 +67,7 @@ class GANDIVA_EXPORT Engine { /// Add the function to the list of IR functions that need to be compiled. /// Compiling only the functions that are used by the module saves time. void AddFunctionToCompile(const std::string& fname) { - DCHECK(!module_finalized_); + ARROW_DCHECK(!module_finalized_); functions_to_compile_.push_back(fname); } diff --git a/cpp/src/gandiva/eval_batch.h b/cpp/src/gandiva/eval_batch.h index 9644010b721af..feb4cdc975005 100644 --- a/cpp/src/gandiva/eval_batch.h +++ b/cpp/src/gandiva/eval_batch.h @@ -53,22 +53,22 @@ class EvalBatch { int GetNumBuffers() const { return num_buffers_; } const uint8_t* GetBuffer(int idx) const { - DCHECK(idx <= num_buffers_); + ARROW_DCHECK(idx <= num_buffers_); return (buffers_array_.get())[idx]; } uint8_t* GetBuffer(int idx) { - DCHECK(idx <= num_buffers_); + ARROW_DCHECK(idx <= num_buffers_); return (buffers_array_.get())[idx]; } int64_t GetBufferOffset(int idx) const { - DCHECK(idx <= num_buffers_); + ARROW_DCHECK(idx <= num_buffers_); return (buffer_offsets_array_.get())[idx]; } void SetBuffer(int idx, uint8_t* buffer, int64_t offset) { - DCHECK(idx <= num_buffers_); + ARROW_DCHECK(idx <= num_buffers_); (buffers_array_.get())[idx] = buffer; (buffer_offsets_array_.get())[idx] = offset; } @@ -80,11 +80,11 @@ class EvalBatch { } const uint8_t* GetLocalBitMap(int idx) const { - DCHECK(idx <= GetNumLocalBitMaps()); + ARROW_DCHECK(idx <= GetNumLocalBitMaps()); return local_bitmaps_holder_->GetLocalBitMap(idx); } uint8_t* GetLocalBitMap(int idx) { - DCHECK(idx <= GetNumLocalBitMaps()); + ARROW_DCHECK(idx <= GetNumLocalBitMaps()); return local_bitmaps_holder_->GetLocalBitMap(idx); } diff --git a/cpp/src/gandiva/llvm_types.h b/cpp/src/gandiva/llvm_types.h index d6f0952713efc..7768a7f7e4bde 100644 --- a/cpp/src/gandiva/llvm_types.h +++ b/cpp/src/gandiva/llvm_types.h @@ -97,7 +97,7 @@ class GANDIVA_EXPORT LLVMTypes { } else if (type->isFloatingPointTy()) { return llvm::ConstantFP::get(type, 0); } else { - DCHECK(type->isPointerTy()); + ARROW_DCHECK(type->isPointerTy()); return llvm::ConstantPointerNull::getNullValue(type); } } diff --git a/cpp/src/gandiva/local_bitmaps_holder.h b/cpp/src/gandiva/local_bitmaps_holder.h index a172fb973c4a5..dc24a32e7cad0 100644 --- a/cpp/src/gandiva/local_bitmaps_holder.h +++ b/cpp/src/gandiva/local_bitmaps_holder.h @@ -40,7 +40,7 @@ class LocalBitMapsHolder { uint8_t** GetLocalBitMapArray() const { return local_bitmaps_array_.get(); } uint8_t* GetLocalBitMap(int idx) const { - DCHECK(idx <= GetNumLocalBitMaps()); + ARROW_DCHECK(idx <= GetNumLocalBitMaps()); return local_bitmaps_array_.get()[idx]; } diff --git a/cpp/src/gandiva/selection_vector_impl.h b/cpp/src/gandiva/selection_vector_impl.h index dc9724ca86fe2..234298daf5748 100644 --- a/cpp/src/gandiva/selection_vector_impl.h +++ b/cpp/src/gandiva/selection_vector_impl.h @@ -60,7 +60,7 @@ class SelectionVectorImpl : public SelectionVector { int64_t GetNumSlots() const override { return num_slots_; } void SetNumSlots(int64_t num_slots) override { - DCHECK_LE(num_slots, max_slots_); + ARROW_DCHECK_LE(num_slots, max_slots_); num_slots_ = num_slots; } diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index dc80f08e72cfe..17574261d891d 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -408,6 +408,7 @@ add_parquet_test(arrow-internals-test SOURCES arrow/path_internal_test.cc if(PARQUET_REQUIRE_ENCRYPTION) add_parquet_test(encryption-test SOURCES + encryption/encryption_internal_test.cc encryption/write_configurations_test.cc encryption/read_configurations_test.cc encryption/properties_test.cc diff --git a/cpp/src/parquet/bloom_filter.h b/cpp/src/parquet/bloom_filter.h index 909563d013fed..82172f363ba7e 100644 --- a/cpp/src/parquet/bloom_filter.h +++ b/cpp/src/parquet/bloom_filter.h @@ -221,7 +221,7 @@ class PARQUET_EXPORT BlockSplitBloomFilter : public BloomFilter { /// kMaximumBloomFilterBytes, and the return value is always a power of 2 static uint32_t OptimalNumOfBytes(uint32_t ndv, double fpp) { uint32_t optimal_num_of_bits = OptimalNumOfBits(ndv, fpp); - DCHECK(::arrow::bit_util::IsMultipleOf8(optimal_num_of_bits)); + ARROW_DCHECK(::arrow::bit_util::IsMultipleOf8(optimal_num_of_bits)); return optimal_num_of_bits >> 3; } @@ -233,7 +233,7 @@ class PARQUET_EXPORT BlockSplitBloomFilter : public BloomFilter { /// @return it always return a value between kMinimumBloomFilterBytes * 8 and /// kMaximumBloomFilterBytes * 8, and the return value is always a power of 16 static uint32_t OptimalNumOfBits(uint32_t ndv, double fpp) { - DCHECK(fpp > 0.0 && fpp < 1.0); + ARROW_DCHECK(fpp > 0.0 && fpp < 1.0); const double m = -8.0 * ndv / log(1 - pow(fpp, 1.0 / 8)); uint32_t num_bits; diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index 52add8f339fc3..05ee6a16c5448 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -36,14 +36,14 @@ #include "arrow/array/builder_primitive.h" #include "arrow/chunked_array.h" #include "arrow/type.h" -#include "arrow/util/bit_stream_utils.h" +#include "arrow/util/bit_stream_utils_internal.h" #include "arrow/util/bit_util.h" #include "arrow/util/checked_cast.h" #include "arrow/util/compression.h" #include "arrow/util/crc32.h" #include "arrow/util/int_util_overflow.h" #include "arrow/util/logging.h" -#include "arrow/util/rle_encoding.h" +#include "arrow/util/rle_encoding_internal.h" #include "arrow/util/unreachable.h" #include "parquet/column_page.h" #include "parquet/encoding.h" @@ -512,10 +512,11 @@ std::shared_ptr SerializedPageReader::NextPage() { // Decrypt it if we need to if (crypto_ctx_.data_decryptor != nullptr) { PARQUET_THROW_NOT_OK(decryption_buffer_->Resize( - compressed_len - crypto_ctx_.data_decryptor->CiphertextSizeDelta(), + crypto_ctx_.data_decryptor->PlaintextLength(compressed_len), /*shrink_to_fit=*/false)); compressed_len = crypto_ctx_.data_decryptor->Decrypt( - page_buffer->data(), compressed_len, decryption_buffer_->mutable_data()); + page_buffer->span_as(), + decryption_buffer_->mutable_span_as()); page_buffer = decryption_buffer_; } diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index ac1c3ea2e3e20..90e0102b422bb 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -32,7 +32,7 @@ #include "arrow/status.h" #include "arrow/type.h" #include "arrow/type_traits.h" -#include "arrow/util/bit_stream_utils.h" +#include "arrow/util/bit_stream_utils_internal.h" #include "arrow/util/bit_util.h" #include "arrow/util/bitmap_ops.h" #include "arrow/util/checked_cast.h" @@ -41,7 +41,7 @@ #include "arrow/util/endian.h" #include "arrow/util/float16.h" #include "arrow/util/logging.h" -#include "arrow/util/rle_encoding.h" +#include "arrow/util/rle_encoding_internal.h" #include "arrow/util/type_traits.h" #include "arrow/visit_array_inline.h" #include "parquet/column_page.h" @@ -303,9 +303,10 @@ class SerializedPageWriter : public PageWriter { if (data_encryptor_.get()) { UpdateEncryption(encryption::kDictionaryPage); PARQUET_THROW_NOT_OK(encryption_buffer_->Resize( - data_encryptor_->CiphertextSizeDelta() + output_data_len, false)); - output_data_len = data_encryptor_->Encrypt(compressed_data->data(), output_data_len, - encryption_buffer_->mutable_data()); + data_encryptor_->CiphertextLength(output_data_len), false)); + output_data_len = + data_encryptor_->Encrypt(compressed_data->span_as(), + encryption_buffer_->mutable_span_as()); output_data_buffer = encryption_buffer_->data(); } @@ -395,11 +396,11 @@ class SerializedPageWriter : public PageWriter { if (data_encryptor_.get()) { PARQUET_THROW_NOT_OK(encryption_buffer_->Resize( - data_encryptor_->CiphertextSizeDelta() + output_data_len, false)); + data_encryptor_->CiphertextLength(output_data_len), false)); UpdateEncryption(encryption::kDataPage); - output_data_len = data_encryptor_->Encrypt(compressed_data->data(), - static_cast(output_data_len), - encryption_buffer_->mutable_data()); + output_data_len = + data_encryptor_->Encrypt(compressed_data->span_as(), + encryption_buffer_->mutable_span_as()); output_data_buffer = encryption_buffer_->data(); } diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc index 54e1e000040a1..16a1e249273f6 100644 --- a/cpp/src/parquet/encoding.cc +++ b/cpp/src/parquet/encoding.cc @@ -33,7 +33,7 @@ #include "arrow/type_traits.h" #include "arrow/util/bit_block_counter.h" #include "arrow/util/bit_run_reader.h" -#include "arrow/util/bit_stream_utils.h" +#include "arrow/util/bit_stream_utils_internal.h" #include "arrow/util/bit_util.h" #include "arrow/util/bitmap_ops.h" #include "arrow/util/bitmap_writer.h" @@ -42,7 +42,7 @@ #include "arrow/util/hashing.h" #include "arrow/util/int_util_overflow.h" #include "arrow/util/logging.h" -#include "arrow/util/rle_encoding.h" +#include "arrow/util/rle_encoding_internal.h" #include "arrow/util/ubsan.h" #include "arrow/visit_data_inline.h" #include "parquet/exception.h" @@ -2732,7 +2732,6 @@ class DeltaBitPackDecoder : public DecoderImpl, virtual public TypedDecoder class DeltaLengthByteArrayEncoder : public EncoderImpl, virtual public TypedEncoder { public: @@ -2783,8 +2782,7 @@ class DeltaLengthByteArrayEncoder : public EncoderImpl, DeltaBitPackEncoder length_encoder_; }; -template -void DeltaLengthByteArrayEncoder::Put(const ::arrow::Array& values) { +void DeltaLengthByteArrayEncoder::Put(const ::arrow::Array& values) { AssertBaseBinary(values); if (::arrow::is_binary_like(values.type_id())) { PutBinaryArray(checked_cast(values)); @@ -2793,8 +2791,7 @@ void DeltaLengthByteArrayEncoder::Put(const ::arrow::Array& values) { } } -template -void DeltaLengthByteArrayEncoder::Put(const T* src, int num_values) { +void DeltaLengthByteArrayEncoder::Put(const T* src, int num_values) { if (num_values == 0) { return; } @@ -2823,10 +2820,9 @@ void DeltaLengthByteArrayEncoder::Put(const T* src, int num_values) { } } -template -void DeltaLengthByteArrayEncoder::PutSpaced(const T* src, int num_values, - const uint8_t* valid_bits, - int64_t valid_bits_offset) { +void DeltaLengthByteArrayEncoder::PutSpaced(const T* src, int num_values, + const uint8_t* valid_bits, + int64_t valid_bits_offset) { if (valid_bits != NULLPTR) { PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T), this->memory_pool())); @@ -2839,8 +2835,7 @@ void DeltaLengthByteArrayEncoder::PutSpaced(const T* src, int num_values, } } -template -std::shared_ptr DeltaLengthByteArrayEncoder::FlushValues() { +std::shared_ptr DeltaLengthByteArrayEncoder::FlushValues() { std::shared_ptr encoded_lengths = length_encoder_.FlushValues(); std::shared_ptr data; @@ -3366,7 +3361,7 @@ class DeltaByteArrayEncoder : public EncoderImpl, virtual public TypedEncoder
prefix_length_encoder_; - DeltaLengthByteArrayEncoder suffix_encoder_; + DeltaLengthByteArrayEncoder suffix_encoder_; std::string last_value_; const ByteArray empty_; std::unique_ptr buffer_; @@ -3934,7 +3929,7 @@ std::unique_ptr MakeEncoder(Type::type type_num, Encoding::type encodin } else if (encoding == Encoding::DELTA_LENGTH_BYTE_ARRAY) { switch (type_num) { case Type::BYTE_ARRAY: - return std::make_unique>(descr, pool); + return std::make_unique(descr, pool); default: throw ParquetException("DELTA_LENGTH_BYTE_ARRAY only supports BYTE_ARRAY"); } diff --git a/cpp/src/parquet/encryption/encryption.h b/cpp/src/parquet/encryption/encryption.h index 8fd7ec8d3d015..1ddef9e8236db 100644 --- a/cpp/src/parquet/encryption/encryption.h +++ b/cpp/src/parquet/encryption/encryption.h @@ -89,6 +89,14 @@ inline const uint8_t* str2bytes(const std::string& str) { return reinterpret_cast(cbytes); } +inline ::arrow::util::span str2span(const std::string& str) { + if (str.empty()) { + return {}; + } + + return {reinterpret_cast(str.data()), str.size()}; +} + class PARQUET_EXPORT ColumnEncryptionProperties { public: class PARQUET_EXPORT Builder { diff --git a/cpp/src/parquet/encryption/encryption_internal.cc b/cpp/src/parquet/encryption/encryption_internal.cc index 465b14793219f..99d1707f4a8d4 100644 --- a/cpp/src/parquet/encryption/encryption_internal.cc +++ b/cpp/src/parquet/encryption/encryption_internal.cc @@ -31,6 +31,7 @@ #include "parquet/encryption/openssl_internal.h" #include "parquet/exception.h" +using ::arrow::util::span; using parquet::ParquetException; namespace parquet::encryption { @@ -57,12 +58,12 @@ class AesEncryptor::AesEncryptorImpl { ~AesEncryptorImpl() { WipeOut(); } - int Encrypt(const uint8_t* plaintext, int plaintext_len, const uint8_t* key, - int key_len, const uint8_t* aad, int aad_len, uint8_t* ciphertext); + int Encrypt(span plaintext, span key, + span aad, span ciphertext); - int SignedFooterEncrypt(const uint8_t* footer, int footer_len, const uint8_t* key, - int key_len, const uint8_t* aad, int aad_len, - const uint8_t* nonce, uint8_t* encrypted_footer); + int SignedFooterEncrypt(span footer, span key, + span aad, span nonce, + span encrypted_footer); void WipeOut() { if (nullptr != ctx_) { EVP_CIPHER_CTX_free(ctx_); @@ -70,7 +71,21 @@ class AesEncryptor::AesEncryptorImpl { } } - int ciphertext_size_delta() { return ciphertext_size_delta_; } + [[nodiscard]] int32_t CiphertextLength(int64_t plaintext_len) const { + if (plaintext_len < 0) { + std::stringstream ss; + ss << "Negative plaintext length " << plaintext_len; + throw ParquetException(ss.str()); + } else if (plaintext_len > + std::numeric_limits::max() - ciphertext_size_delta_) { + std::stringstream ss; + ss << "Plaintext length " << plaintext_len << " plus ciphertext size delta " + << ciphertext_size_delta_ << " overflows int32"; + throw ParquetException(ss.str()); + } + + return static_cast(plaintext_len + ciphertext_size_delta_); + } private: EVP_CIPHER_CTX* ctx_; @@ -79,12 +94,12 @@ class AesEncryptor::AesEncryptorImpl { int ciphertext_size_delta_; int length_buffer_length_; - int GcmEncrypt(const uint8_t* plaintext, int plaintext_len, const uint8_t* key, - int key_len, const uint8_t* nonce, const uint8_t* aad, int aad_len, - uint8_t* ciphertext); + int GcmEncrypt(span plaintext, span key, + span nonce, span aad, + span ciphertext); - int CtrEncrypt(const uint8_t* plaintext, int plaintext_len, const uint8_t* key, - int key_len, const uint8_t* nonce, uint8_t* ciphertext); + int CtrEncrypt(span plaintext, span key, + span nonce, span ciphertext); }; AesEncryptor::AesEncryptorImpl::AesEncryptorImpl(ParquetCipher::type alg_id, int key_len, @@ -136,12 +151,21 @@ AesEncryptor::AesEncryptorImpl::AesEncryptorImpl(ParquetCipher::type alg_id, int } } -int AesEncryptor::AesEncryptorImpl::SignedFooterEncrypt( - const uint8_t* footer, int footer_len, const uint8_t* key, int key_len, - const uint8_t* aad, int aad_len, const uint8_t* nonce, uint8_t* encrypted_footer) { - if (key_length_ != key_len) { +int AesEncryptor::AesEncryptorImpl::SignedFooterEncrypt(span footer, + span key, + span aad, + span nonce, + span encrypted_footer) { + if (static_cast(key_length_) != key.size()) { + std::stringstream ss; + ss << "Wrong key length " << key.size() << ". Should be " << key_length_; + throw ParquetException(ss.str()); + } + + if (encrypted_footer.size() != footer.size() + ciphertext_size_delta_) { std::stringstream ss; - ss << "Wrong key length " << key_len << ". Should be " << key_length_; + ss << "Encrypted footer buffer length " << encrypted_footer.size() + << " does not match expected length " << (footer.size() + ciphertext_size_delta_); throw ParquetException(ss.str()); } @@ -149,72 +173,85 @@ int AesEncryptor::AesEncryptorImpl::SignedFooterEncrypt( throw ParquetException("Must use AES GCM (metadata) encryptor"); } - return GcmEncrypt(footer, footer_len, key, key_len, nonce, aad, aad_len, - encrypted_footer); + return GcmEncrypt(footer, key, nonce, aad, encrypted_footer); } -int AesEncryptor::AesEncryptorImpl::Encrypt(const uint8_t* plaintext, int plaintext_len, - const uint8_t* key, int key_len, - const uint8_t* aad, int aad_len, - uint8_t* ciphertext) { - if (key_length_ != key_len) { +int AesEncryptor::AesEncryptorImpl::Encrypt(span plaintext, + span key, + span aad, + span ciphertext) { + if (static_cast(key_length_) != key.size()) { std::stringstream ss; - ss << "Wrong key length " << key_len << ". Should be " << key_length_; + ss << "Wrong key length " << key.size() << ". Should be " << key_length_; throw ParquetException(ss.str()); } - uint8_t nonce[kNonceLength]; - memset(nonce, 0, kNonceLength); + if (ciphertext.size() != plaintext.size() + ciphertext_size_delta_) { + std::stringstream ss; + ss << "Ciphertext buffer length " << ciphertext.size() + << " does not match expected length " + << (plaintext.size() + ciphertext_size_delta_); + throw ParquetException(ss.str()); + } + + std::array nonce{}; // Random nonce - RAND_bytes(nonce, sizeof(nonce)); + RAND_bytes(nonce.data(), kNonceLength); if (kGcmMode == aes_mode_) { - return GcmEncrypt(plaintext, plaintext_len, key, key_len, nonce, aad, aad_len, - ciphertext); + return GcmEncrypt(plaintext, key, nonce, aad, ciphertext); } - return CtrEncrypt(plaintext, plaintext_len, key, key_len, nonce, ciphertext); + return CtrEncrypt(plaintext, key, nonce, ciphertext); } -int AesEncryptor::AesEncryptorImpl::GcmEncrypt(const uint8_t* plaintext, - int plaintext_len, const uint8_t* key, - int key_len, const uint8_t* nonce, - const uint8_t* aad, int aad_len, - uint8_t* ciphertext) { +int AesEncryptor::AesEncryptorImpl::GcmEncrypt(span plaintext, + span key, + span nonce, + span aad, + span ciphertext) { int len; int ciphertext_len; - uint8_t tag[kGcmTagLength]; - memset(tag, 0, kGcmTagLength); + std::array tag{}; + + if (nonce.size() != static_cast(kNonceLength)) { + std::stringstream ss; + ss << "Invalid nonce size " << nonce.size() << ", expected " << kNonceLength; + throw ParquetException(ss.str()); + } // Setting key and IV (nonce) - if (1 != EVP_EncryptInit_ex(ctx_, nullptr, nullptr, key, nonce)) { + if (1 != EVP_EncryptInit_ex(ctx_, nullptr, nullptr, key.data(), nonce.data())) { throw ParquetException("Couldn't set key and nonce"); } // Setting additional authenticated data - if ((nullptr != aad) && (1 != EVP_EncryptUpdate(ctx_, nullptr, &len, aad, aad_len))) { + if ((!aad.empty()) && (1 != EVP_EncryptUpdate(ctx_, nullptr, &len, aad.data(), + static_cast(aad.size())))) { throw ParquetException("Couldn't set AAD"); } // Encryption - if (1 != EVP_EncryptUpdate(ctx_, ciphertext + length_buffer_length_ + kNonceLength, - &len, plaintext, plaintext_len)) { + if (1 != + EVP_EncryptUpdate(ctx_, ciphertext.data() + length_buffer_length_ + kNonceLength, + &len, plaintext.data(), static_cast(plaintext.size()))) { throw ParquetException("Failed encryption update"); } ciphertext_len = len; // Finalization - if (1 != EVP_EncryptFinal_ex( - ctx_, ciphertext + length_buffer_length_ + kNonceLength + len, &len)) { + if (1 != + EVP_EncryptFinal_ex( + ctx_, ciphertext.data() + length_buffer_length_ + kNonceLength + len, &len)) { throw ParquetException("Failed encryption finalization"); } ciphertext_len += len; // Getting the tag - if (1 != EVP_CIPHER_CTX_ctrl(ctx_, EVP_CTRL_GCM_GET_TAG, kGcmTagLength, tag)) { + if (1 != EVP_CIPHER_CTX_ctrl(ctx_, EVP_CTRL_GCM_GET_TAG, kGcmTagLength, tag.data())) { throw ParquetException("Couldn't get AES-GCM tag"); } @@ -226,45 +263,53 @@ int AesEncryptor::AesEncryptorImpl::GcmEncrypt(const uint8_t* plaintext, ciphertext[1] = static_cast(0xff & (buffer_size >> 8)); ciphertext[0] = static_cast(0xff & (buffer_size)); } - std::copy(nonce, nonce + kNonceLength, ciphertext + length_buffer_length_); - std::copy(tag, tag + kGcmTagLength, - ciphertext + length_buffer_length_ + kNonceLength + ciphertext_len); + std::copy(nonce.begin(), nonce.begin() + kNonceLength, + ciphertext.begin() + length_buffer_length_); + std::copy(tag.begin(), tag.end(), + ciphertext.begin() + length_buffer_length_ + kNonceLength + ciphertext_len); return length_buffer_length_ + buffer_size; } -int AesEncryptor::AesEncryptorImpl::CtrEncrypt(const uint8_t* plaintext, - int plaintext_len, const uint8_t* key, - int key_len, const uint8_t* nonce, - uint8_t* ciphertext) { +int AesEncryptor::AesEncryptorImpl::CtrEncrypt(span plaintext, + span key, + span nonce, + span ciphertext) { int len; int ciphertext_len; + if (nonce.size() != static_cast(kNonceLength)) { + std::stringstream ss; + ss << "Invalid nonce size " << nonce.size() << ", expected " << kNonceLength; + throw ParquetException(ss.str()); + } + // Parquet CTR IVs are comprised of a 12-byte nonce and a 4-byte initial // counter field. // The first 31 bits of the initial counter field are set to 0, the last bit // is set to 1. - uint8_t iv[kCtrIvLength]; - memset(iv, 0, kCtrIvLength); - std::copy(nonce, nonce + kNonceLength, iv); + std::array iv{}; + std::copy(nonce.begin(), nonce.begin() + kNonceLength, iv.begin()); iv[kCtrIvLength - 1] = 1; // Setting key and IV - if (1 != EVP_EncryptInit_ex(ctx_, nullptr, nullptr, key, iv)) { + if (1 != EVP_EncryptInit_ex(ctx_, nullptr, nullptr, key.data(), iv.data())) { throw ParquetException("Couldn't set key and IV"); } // Encryption - if (1 != EVP_EncryptUpdate(ctx_, ciphertext + length_buffer_length_ + kNonceLength, - &len, plaintext, plaintext_len)) { + if (1 != + EVP_EncryptUpdate(ctx_, ciphertext.data() + length_buffer_length_ + kNonceLength, + &len, plaintext.data(), static_cast(plaintext.size()))) { throw ParquetException("Failed encryption update"); } ciphertext_len = len; // Finalization - if (1 != EVP_EncryptFinal_ex( - ctx_, ciphertext + length_buffer_length_ + kNonceLength + len, &len)) { + if (1 != + EVP_EncryptFinal_ex( + ctx_, ciphertext.data() + length_buffer_length_ + kNonceLength + len, &len)) { throw ParquetException("Failed encryption finalization"); } @@ -278,29 +323,29 @@ int AesEncryptor::AesEncryptorImpl::CtrEncrypt(const uint8_t* plaintext, ciphertext[1] = static_cast(0xff & (buffer_size >> 8)); ciphertext[0] = static_cast(0xff & (buffer_size)); } - std::copy(nonce, nonce + kNonceLength, ciphertext + length_buffer_length_); + std::copy(nonce.begin(), nonce.begin() + kNonceLength, + ciphertext.begin() + length_buffer_length_); return length_buffer_length_ + buffer_size; } AesEncryptor::~AesEncryptor() {} -int AesEncryptor::SignedFooterEncrypt(const uint8_t* footer, int footer_len, - const uint8_t* key, int key_len, const uint8_t* aad, - int aad_len, const uint8_t* nonce, - uint8_t* encrypted_footer) { - return impl_->SignedFooterEncrypt(footer, footer_len, key, key_len, aad, aad_len, nonce, - encrypted_footer); +int AesEncryptor::SignedFooterEncrypt(span footer, span key, + span aad, span nonce, + span encrypted_footer) { + return impl_->SignedFooterEncrypt(footer, key, aad, nonce, encrypted_footer); } void AesEncryptor::WipeOut() { impl_->WipeOut(); } -int AesEncryptor::CiphertextSizeDelta() { return impl_->ciphertext_size_delta(); } +int32_t AesEncryptor::CiphertextLength(int64_t plaintext_len) const { + return impl_->CiphertextLength(plaintext_len); +} -int AesEncryptor::Encrypt(const uint8_t* plaintext, int plaintext_len, const uint8_t* key, - int key_len, const uint8_t* aad, int aad_len, - uint8_t* ciphertext) { - return impl_->Encrypt(plaintext, plaintext_len, key, key_len, aad, aad_len, ciphertext); +int AesEncryptor::Encrypt(span plaintext, span key, + span aad, span ciphertext) { + return impl_->Encrypt(plaintext, key, aad, ciphertext); } AesEncryptor::AesEncryptor(ParquetCipher::type alg_id, int key_len, bool metadata, @@ -315,8 +360,8 @@ class AesDecryptor::AesDecryptorImpl { ~AesDecryptorImpl() { WipeOut(); } - int Decrypt(const uint8_t* ciphertext, int ciphertext_len, const uint8_t* key, - int key_len, const uint8_t* aad, int aad_len, uint8_t* plaintext); + int Decrypt(span ciphertext, span key, + span aad, span plaintext); void WipeOut() { if (nullptr != ctx_) { @@ -325,7 +370,29 @@ class AesDecryptor::AesDecryptorImpl { } } - int ciphertext_size_delta() { return ciphertext_size_delta_; } + [[nodiscard]] int PlaintextLength(int ciphertext_len) const { + if (ciphertext_len < ciphertext_size_delta_) { + std::stringstream ss; + ss << "Ciphertext length " << ciphertext_len << " is invalid, expected at least " + << ciphertext_size_delta_; + throw ParquetException(ss.str()); + } + return ciphertext_len - ciphertext_size_delta_; + } + + [[nodiscard]] int CiphertextLength(int plaintext_len) const { + if (plaintext_len < 0) { + std::stringstream ss; + ss << "Negative plaintext length " << plaintext_len; + throw ParquetException(ss.str()); + } else if (plaintext_len > std::numeric_limits::max() - ciphertext_size_delta_) { + std::stringstream ss; + ss << "Plaintext length " << plaintext_len << " plus ciphertext size delta " + << ciphertext_size_delta_ << " overflows int32"; + throw ParquetException(ss.str()); + } + return plaintext_len + ciphertext_size_delta_; + } private: EVP_CIPHER_CTX* ctx_; @@ -333,17 +400,21 @@ class AesDecryptor::AesDecryptorImpl { int key_length_; int ciphertext_size_delta_; int length_buffer_length_; - int GcmDecrypt(const uint8_t* ciphertext, int ciphertext_len, const uint8_t* key, - int key_len, const uint8_t* aad, int aad_len, uint8_t* plaintext); - int CtrDecrypt(const uint8_t* ciphertext, int ciphertext_len, const uint8_t* key, - int key_len, uint8_t* plaintext); + /// Get the actual ciphertext length, inclusive of the length buffer length, + /// and validate that the provided buffer size is large enough. + [[nodiscard]] int GetCiphertextLength(span ciphertext) const; + + int GcmDecrypt(span ciphertext, span key, + span aad, span plaintext); + + int CtrDecrypt(span ciphertext, span key, + span plaintext); }; -int AesDecryptor::Decrypt(const uint8_t* plaintext, int plaintext_len, const uint8_t* key, - int key_len, const uint8_t* aad, int aad_len, - uint8_t* ciphertext) { - return impl_->Decrypt(plaintext, plaintext_len, key, key_len, aad, aad_len, ciphertext); +int AesDecryptor::Decrypt(span ciphertext, span key, + span aad, span plaintext) { + return impl_->Decrypt(ciphertext, key, aad, plaintext); } void AesDecryptor::WipeOut() { impl_->WipeOut(); } @@ -398,23 +469,20 @@ AesDecryptor::AesDecryptorImpl::AesDecryptorImpl(ParquetCipher::type alg_id, int } } -AesEncryptor* AesEncryptor::Make(ParquetCipher::type alg_id, int key_len, bool metadata, - std::vector* all_encryptors) { - return Make(alg_id, key_len, metadata, true /*write_length*/, all_encryptors); +std::unique_ptr AesEncryptor::Make(ParquetCipher::type alg_id, int key_len, + bool metadata) { + return Make(alg_id, key_len, metadata, true /*write_length*/); } -AesEncryptor* AesEncryptor::Make(ParquetCipher::type alg_id, int key_len, bool metadata, - bool write_length, - std::vector* all_encryptors) { +std::unique_ptr AesEncryptor::Make(ParquetCipher::type alg_id, int key_len, + bool metadata, bool write_length) { if (ParquetCipher::AES_GCM_V1 != alg_id && ParquetCipher::AES_GCM_CTR_V1 != alg_id) { std::stringstream ss; ss << "Crypto algorithm " << alg_id << " is not supported"; throw ParquetException(ss.str()); } - AesEncryptor* encryptor = new AesEncryptor(alg_id, key_len, metadata, write_length); - if (all_encryptors != nullptr) all_encryptors->push_back(encryptor); - return encryptor; + return std::make_unique(alg_id, key_len, metadata, write_length); } AesDecryptor::AesDecryptor(ParquetCipher::type alg_id, int key_len, bool metadata, @@ -438,56 +506,106 @@ std::shared_ptr AesDecryptor::Make( return decryptor; } -int AesDecryptor::CiphertextSizeDelta() { return impl_->ciphertext_size_delta(); } - -int AesDecryptor::AesDecryptorImpl::GcmDecrypt(const uint8_t* ciphertext, - int ciphertext_len, const uint8_t* key, - int key_len, const uint8_t* aad, - int aad_len, uint8_t* plaintext) { - int len; - int plaintext_len; +int AesDecryptor::PlaintextLength(int ciphertext_len) const { + return impl_->PlaintextLength(ciphertext_len); +} - uint8_t tag[kGcmTagLength]; - memset(tag, 0, kGcmTagLength); - uint8_t nonce[kNonceLength]; - memset(nonce, 0, kNonceLength); +int AesDecryptor::CiphertextLength(int plaintext_len) const { + return impl_->CiphertextLength(plaintext_len); +} +int AesDecryptor::AesDecryptorImpl::GetCiphertextLength( + span ciphertext) const { if (length_buffer_length_ > 0) { - // Extract ciphertext length - int written_ciphertext_len = ((ciphertext[3] & 0xff) << 24) | - ((ciphertext[2] & 0xff) << 16) | - ((ciphertext[1] & 0xff) << 8) | ((ciphertext[0] & 0xff)); + // Note: length_buffer_length_ must be either 0 or kBufferSizeLength + if (ciphertext.size() < static_cast(kBufferSizeLength)) { + std::stringstream ss; + ss << "Ciphertext buffer length " << ciphertext.size() + << " is insufficient to read the ciphertext length." + << " At least " << kBufferSizeLength << " bytes are required."; + throw ParquetException(ss.str()); + } - if (ciphertext_len > 0 && - ciphertext_len != (written_ciphertext_len + length_buffer_length_)) { - throw ParquetException("Wrong ciphertext length"); + // Extract ciphertext length + uint32_t written_ciphertext_len = (static_cast(ciphertext[3]) << 24) | + (static_cast(ciphertext[2]) << 16) | + (static_cast(ciphertext[1]) << 8) | + (static_cast(ciphertext[0])); + + if (written_ciphertext_len > + static_cast(std::numeric_limits::max() - length_buffer_length_)) { + std::stringstream ss; + ss << "Written ciphertext length " << written_ciphertext_len + << " plus length buffer length " << length_buffer_length_ << " overflows int"; + throw ParquetException(ss.str()); + } else if (ciphertext.size() < + static_cast(written_ciphertext_len) + length_buffer_length_) { + std::stringstream ss; + ss << "Serialized ciphertext length " + << (written_ciphertext_len + length_buffer_length_) + << " is greater than the provided ciphertext buffer length " + << ciphertext.size(); + throw ParquetException(ss.str()); } - ciphertext_len = written_ciphertext_len + length_buffer_length_; + + return static_cast(written_ciphertext_len) + length_buffer_length_; } else { - if (ciphertext_len == 0) { - throw ParquetException("Zero ciphertext length"); + if (ciphertext.size() > static_cast(std::numeric_limits::max())) { + std::stringstream ss; + ss << "Ciphertext buffer length " << ciphertext.size() << " overflows int"; + throw ParquetException(ss.str()); } + return static_cast(ciphertext.size()); + } +} + +int AesDecryptor::AesDecryptorImpl::GcmDecrypt(span ciphertext, + span key, + span aad, + span plaintext) { + int len; + int plaintext_len; + + std::array tag{}; + std::array nonce{}; + + int ciphertext_len = GetCiphertextLength(ciphertext); + + if (plaintext.size() < static_cast(ciphertext_len) - ciphertext_size_delta_) { + std::stringstream ss; + ss << "Plaintext buffer length " << plaintext.size() << " is insufficient " + << "for ciphertext length " << ciphertext_len; + throw ParquetException(ss.str()); + } + + if (ciphertext_len < length_buffer_length_ + kNonceLength + kGcmTagLength) { + std::stringstream ss; + ss << "Invalid ciphertext length " << ciphertext_len << ". Expected at least " + << length_buffer_length_ + kNonceLength + kGcmTagLength << "\n"; + throw ParquetException(ss.str()); } // Extracting IV and tag - std::copy(ciphertext + length_buffer_length_, - ciphertext + length_buffer_length_ + kNonceLength, nonce); - std::copy(ciphertext + ciphertext_len - kGcmTagLength, ciphertext + ciphertext_len, - tag); + std::copy(ciphertext.begin() + length_buffer_length_, + ciphertext.begin() + length_buffer_length_ + kNonceLength, nonce.begin()); + std::copy(ciphertext.begin() + ciphertext_len - kGcmTagLength, + ciphertext.begin() + ciphertext_len, tag.begin()); // Setting key and IV - if (1 != EVP_DecryptInit_ex(ctx_, nullptr, nullptr, key, nonce)) { + if (1 != EVP_DecryptInit_ex(ctx_, nullptr, nullptr, key.data(), nonce.data())) { throw ParquetException("Couldn't set key and IV"); } // Setting additional authenticated data - if ((nullptr != aad) && (1 != EVP_DecryptUpdate(ctx_, nullptr, &len, aad, aad_len))) { + if ((!aad.empty()) && (1 != EVP_DecryptUpdate(ctx_, nullptr, &len, aad.data(), + static_cast(aad.size())))) { throw ParquetException("Couldn't set AAD"); } // Decryption if (!EVP_DecryptUpdate( - ctx_, plaintext, &len, ciphertext + length_buffer_length_ + kNonceLength, + ctx_, plaintext.data(), &len, + ciphertext.data() + length_buffer_length_ + kNonceLength, ciphertext_len - length_buffer_length_ - kNonceLength - kGcmTagLength)) { throw ParquetException("Failed decryption update"); } @@ -495,12 +613,12 @@ int AesDecryptor::AesDecryptorImpl::GcmDecrypt(const uint8_t* ciphertext, plaintext_len = len; // Checking the tag (authentication) - if (!EVP_CIPHER_CTX_ctrl(ctx_, EVP_CTRL_GCM_SET_TAG, kGcmTagLength, tag)) { + if (!EVP_CIPHER_CTX_ctrl(ctx_, EVP_CTRL_GCM_SET_TAG, kGcmTagLength, tag.data())) { throw ParquetException("Failed authentication"); } // Finalization - if (1 != EVP_DecryptFinal_ex(ctx_, plaintext + len, &len)) { + if (1 != EVP_DecryptFinal_ex(ctx_, plaintext.data() + len, &len)) { throw ParquetException("Failed decryption finalization"); } @@ -508,35 +626,33 @@ int AesDecryptor::AesDecryptorImpl::GcmDecrypt(const uint8_t* ciphertext, return plaintext_len; } -int AesDecryptor::AesDecryptorImpl::CtrDecrypt(const uint8_t* ciphertext, - int ciphertext_len, const uint8_t* key, - int key_len, uint8_t* plaintext) { +int AesDecryptor::AesDecryptorImpl::CtrDecrypt(span ciphertext, + span key, + span plaintext) { int len; int plaintext_len; - uint8_t iv[kCtrIvLength]; - memset(iv, 0, kCtrIvLength); + std::array iv{}; - if (length_buffer_length_ > 0) { - // Extract ciphertext length - int written_ciphertext_len = ((ciphertext[3] & 0xff) << 24) | - ((ciphertext[2] & 0xff) << 16) | - ((ciphertext[1] & 0xff) << 8) | ((ciphertext[0] & 0xff)); + int ciphertext_len = GetCiphertextLength(ciphertext); - if (ciphertext_len > 0 && - ciphertext_len != (written_ciphertext_len + length_buffer_length_)) { - throw ParquetException("Wrong ciphertext length"); - } - ciphertext_len = written_ciphertext_len; - } else { - if (ciphertext_len == 0) { - throw ParquetException("Zero ciphertext length"); - } + if (plaintext.size() < static_cast(ciphertext_len) - ciphertext_size_delta_) { + std::stringstream ss; + ss << "Plaintext buffer length " << plaintext.size() << " is insufficient " + << "for ciphertext length " << ciphertext_len; + throw ParquetException(ss.str()); + } + + if (ciphertext_len < length_buffer_length_ + kNonceLength) { + std::stringstream ss; + ss << "Invalid ciphertext length " << ciphertext_len << ". Expected at least " + << length_buffer_length_ + kNonceLength << "\n"; + throw ParquetException(ss.str()); } // Extracting nonce - std::copy(ciphertext + length_buffer_length_, - ciphertext + length_buffer_length_ + kNonceLength, iv); + std::copy(ciphertext.begin() + length_buffer_length_, + ciphertext.begin() + length_buffer_length_ + kNonceLength, iv.begin()); // Parquet CTR IVs are comprised of a 12-byte nonce and a 4-byte initial // counter field. // The first 31 bits of the initial counter field are set to 0, the last bit @@ -544,21 +660,21 @@ int AesDecryptor::AesDecryptorImpl::CtrDecrypt(const uint8_t* ciphertext, iv[kCtrIvLength - 1] = 1; // Setting key and IV - if (1 != EVP_DecryptInit_ex(ctx_, nullptr, nullptr, key, iv)) { + if (1 != EVP_DecryptInit_ex(ctx_, nullptr, nullptr, key.data(), iv.data())) { throw ParquetException("Couldn't set key and IV"); } // Decryption - if (!EVP_DecryptUpdate(ctx_, plaintext, &len, - ciphertext + length_buffer_length_ + kNonceLength, - ciphertext_len - kNonceLength)) { + if (!EVP_DecryptUpdate(ctx_, plaintext.data(), &len, + ciphertext.data() + length_buffer_length_ + kNonceLength, + ciphertext_len - length_buffer_length_ - kNonceLength)) { throw ParquetException("Failed decryption update"); } plaintext_len = len; // Finalization - if (1 != EVP_DecryptFinal_ex(ctx_, plaintext + len, &len)) { + if (1 != EVP_DecryptFinal_ex(ctx_, plaintext.data() + len, &len)) { throw ParquetException("Failed decryption finalization"); } @@ -566,21 +682,21 @@ int AesDecryptor::AesDecryptorImpl::CtrDecrypt(const uint8_t* ciphertext, return plaintext_len; } -int AesDecryptor::AesDecryptorImpl::Decrypt(const uint8_t* ciphertext, int ciphertext_len, - const uint8_t* key, int key_len, - const uint8_t* aad, int aad_len, - uint8_t* plaintext) { - if (key_length_ != key_len) { +int AesDecryptor::AesDecryptorImpl::Decrypt(span ciphertext, + span key, + span aad, + span plaintext) { + if (static_cast(key_length_) != key.size()) { std::stringstream ss; - ss << "Wrong key length " << key_len << ". Should be " << key_length_; + ss << "Wrong key length " << key.size() << ". Should be " << key_length_; throw ParquetException(ss.str()); } if (kGcmMode == aes_mode_) { - return GcmDecrypt(ciphertext, ciphertext_len, key, key_len, aad, aad_len, plaintext); + return GcmDecrypt(ciphertext, key, aad, plaintext); } - return CtrDecrypt(ciphertext, ciphertext_len, key, key_len, plaintext); + return CtrDecrypt(ciphertext, key, plaintext); } static std::string ShortToBytesLe(int16_t input) { diff --git a/cpp/src/parquet/encryption/encryption_internal.h b/cpp/src/parquet/encryption/encryption_internal.h index 1bdf47c56f291..c874b137ad1ad 100644 --- a/cpp/src/parquet/encryption/encryption_internal.h +++ b/cpp/src/parquet/encryption/encryption_internal.h @@ -21,6 +21,7 @@ #include #include +#include "arrow/util/span.h" #include "parquet/properties.h" #include "parquet/types.h" @@ -44,34 +45,37 @@ constexpr int8_t kBloomFilterHeader = 8; constexpr int8_t kBloomFilterBitset = 9; /// Performs AES encryption operations with GCM or CTR ciphers. -class AesEncryptor { +class PARQUET_EXPORT AesEncryptor { public: /// Can serve one key length only. Possible values: 16, 24, 32 bytes. /// If write_length is true, prepend ciphertext length to the ciphertext explicit AesEncryptor(ParquetCipher::type alg_id, int key_len, bool metadata, bool write_length = true); - static AesEncryptor* Make(ParquetCipher::type alg_id, int key_len, bool metadata, - std::vector* all_encryptors); + static std::unique_ptr Make(ParquetCipher::type alg_id, int key_len, + bool metadata); - static AesEncryptor* Make(ParquetCipher::type alg_id, int key_len, bool metadata, - bool write_length, - std::vector* all_encryptors); + static std::unique_ptr Make(ParquetCipher::type alg_id, int key_len, + bool metadata, bool write_length); ~AesEncryptor(); - /// Size difference between plaintext and ciphertext, for this cipher. - int CiphertextSizeDelta(); + /// The size of the ciphertext, for this cipher and the specified plaintext length. + [[nodiscard]] int32_t CiphertextLength(int64_t plaintext_len) const; /// Encrypts plaintext with the key and aad. Key length is passed only for validation. /// If different from value in constructor, exception will be thrown. - int Encrypt(const uint8_t* plaintext, int plaintext_len, const uint8_t* key, - int key_len, const uint8_t* aad, int aad_len, uint8_t* ciphertext); + int Encrypt(::arrow::util::span plaintext, + ::arrow::util::span key, + ::arrow::util::span aad, + ::arrow::util::span ciphertext); /// Encrypts plaintext footer, in order to compute footer signature (tag). - int SignedFooterEncrypt(const uint8_t* footer, int footer_len, const uint8_t* key, - int key_len, const uint8_t* aad, int aad_len, - const uint8_t* nonce, uint8_t* encrypted_footer); + int SignedFooterEncrypt(::arrow::util::span footer, + ::arrow::util::span key, + ::arrow::util::span aad, + ::arrow::util::span nonce, + ::arrow::util::span encrypted_footer); void WipeOut(); @@ -82,7 +86,7 @@ class AesEncryptor { }; /// Performs AES decryption operations with GCM or CTR ciphers. -class AesDecryptor { +class PARQUET_EXPORT AesDecryptor { public: /// Can serve one key length only. Possible values: 16, 24, 32 bytes. /// If contains_length is true, expect ciphertext length prepended to the ciphertext @@ -104,13 +108,20 @@ class AesDecryptor { ~AesDecryptor(); void WipeOut(); - /// Size difference between plaintext and ciphertext, for this cipher. - int CiphertextSizeDelta(); + /// The size of the plaintext, for this cipher and the specified ciphertext length. + [[nodiscard]] int PlaintextLength(int ciphertext_len) const; + + /// The size of the ciphertext, for this cipher and the specified plaintext length. + [[nodiscard]] int CiphertextLength(int plaintext_len) const; /// Decrypts ciphertext with the key and aad. Key length is passed only for /// validation. If different from value in constructor, exception will be thrown. - int Decrypt(const uint8_t* ciphertext, int ciphertext_len, const uint8_t* key, - int key_len, const uint8_t* aad, int aad_len, uint8_t* plaintext); + /// The caller is responsible for ensuring that the plaintext buffer is at least as + /// large as PlaintextLength(ciphertext_len). + int Decrypt(::arrow::util::span ciphertext, + ::arrow::util::span key, + ::arrow::util::span aad, + ::arrow::util::span plaintext); private: // PIMPL Idiom diff --git a/cpp/src/parquet/encryption/encryption_internal_nossl.cc b/cpp/src/parquet/encryption/encryption_internal_nossl.cc index ead868643baae..2cce83915d7e5 100644 --- a/cpp/src/parquet/encryption/encryption_internal_nossl.cc +++ b/cpp/src/parquet/encryption/encryption_internal_nossl.cc @@ -29,24 +29,26 @@ class AesEncryptor::AesEncryptorImpl {}; AesEncryptor::~AesEncryptor() {} -int AesEncryptor::SignedFooterEncrypt(const uint8_t* footer, int footer_len, - const uint8_t* key, int key_len, const uint8_t* aad, - int aad_len, const uint8_t* nonce, - uint8_t* encrypted_footer) { +int AesEncryptor::SignedFooterEncrypt(::arrow::util::span footer, + ::arrow::util::span key, + ::arrow::util::span aad, + ::arrow::util::span nonce, + ::arrow::util::span encrypted_footer) { ThrowOpenSSLRequiredException(); return -1; } void AesEncryptor::WipeOut() { ThrowOpenSSLRequiredException(); } -int AesEncryptor::CiphertextSizeDelta() { +int32_t AesEncryptor::CiphertextLength(int64_t plaintext_len) const { ThrowOpenSSLRequiredException(); return -1; } -int AesEncryptor::Encrypt(const uint8_t* plaintext, int plaintext_len, const uint8_t* key, - int key_len, const uint8_t* aad, int aad_len, - uint8_t* ciphertext) { +int AesEncryptor::Encrypt(::arrow::util::span plaintext, + ::arrow::util::span key, + ::arrow::util::span aad, + ::arrow::util::span ciphertext) { ThrowOpenSSLRequiredException(); return -1; } @@ -58,9 +60,10 @@ AesEncryptor::AesEncryptor(ParquetCipher::type alg_id, int key_len, bool metadat class AesDecryptor::AesDecryptorImpl {}; -int AesDecryptor::Decrypt(const uint8_t* plaintext, int plaintext_len, const uint8_t* key, - int key_len, const uint8_t* aad, int aad_len, - uint8_t* ciphertext) { +int AesDecryptor::Decrypt(::arrow::util::span ciphertext, + ::arrow::util::span key, + ::arrow::util::span aad, + ::arrow::util::span plaintext) { ThrowOpenSSLRequiredException(); return -1; } @@ -69,14 +72,15 @@ void AesDecryptor::WipeOut() { ThrowOpenSSLRequiredException(); } AesDecryptor::~AesDecryptor() {} -AesEncryptor* AesEncryptor::Make(ParquetCipher::type alg_id, int key_len, bool metadata, - std::vector* all_encryptors) { +std::unique_ptr AesEncryptor::Make(ParquetCipher::type alg_id, int key_len, + bool metadata) { + ThrowOpenSSLRequiredException(); return NULLPTR; } -AesEncryptor* AesEncryptor::Make(ParquetCipher::type alg_id, int key_len, bool metadata, - bool write_length, - std::vector* all_encryptors) { +std::unique_ptr AesEncryptor::Make(ParquetCipher::type alg_id, int key_len, + bool metadata, bool write_length) { + ThrowOpenSSLRequiredException(); return NULLPTR; } @@ -88,10 +92,16 @@ AesDecryptor::AesDecryptor(ParquetCipher::type alg_id, int key_len, bool metadat std::shared_ptr AesDecryptor::Make( ParquetCipher::type alg_id, int key_len, bool metadata, std::vector>* all_decryptors) { + ThrowOpenSSLRequiredException(); return NULLPTR; } -int AesDecryptor::CiphertextSizeDelta() { +int AesDecryptor::PlaintextLength(int ciphertext_len) const { + ThrowOpenSSLRequiredException(); + return -1; +} + +int AesDecryptor::CiphertextLength(int plaintext_len) const { ThrowOpenSSLRequiredException(); return -1; } diff --git a/cpp/src/parquet/encryption/encryption_internal_test.cc b/cpp/src/parquet/encryption/encryption_internal_test.cc new file mode 100644 index 0000000000000..22e14663ea81f --- /dev/null +++ b/cpp/src/parquet/encryption/encryption_internal_test.cc @@ -0,0 +1,140 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "parquet/encryption/encryption_internal.h" + +namespace parquet::encryption::test { + +class TestAesEncryption : public ::testing::Test { + protected: + void SetUp() override { + key_length_ = 16; + key_ = "1234567890123450"; + aad_ = "abcdefgh"; + plain_text_ = + "Apache Parquet is an open source, column-oriented data file format designed for " + "efficient data storage and retrieval"; + } + + void EncryptionRoundTrip(ParquetCipher::type cipher_type, bool write_length) { + bool metadata = false; + + AesEncryptor encryptor(cipher_type, key_length_, metadata, write_length); + + int32_t expected_ciphertext_len = + encryptor.CiphertextLength(static_cast(plain_text_.size())); + std::vector ciphertext(expected_ciphertext_len, '\0'); + + int ciphertext_length = encryptor.Encrypt(str2span(plain_text_), str2span(key_), + str2span(aad_), ciphertext); + + ASSERT_EQ(ciphertext_length, expected_ciphertext_len); + + AesDecryptor decryptor(cipher_type, key_length_, metadata, write_length); + + int expected_plaintext_length = decryptor.PlaintextLength(ciphertext_length); + std::vector decrypted_text(expected_plaintext_length, '\0'); + + int plaintext_length = + decryptor.Decrypt(ciphertext, str2span(key_), str2span(aad_), decrypted_text); + + std::string decrypted_text_str(decrypted_text.begin(), decrypted_text.end()); + + ASSERT_EQ(plaintext_length, static_cast(plain_text_.size())); + ASSERT_EQ(plaintext_length, expected_plaintext_length); + ASSERT_EQ(decrypted_text_str, plain_text_); + } + + void DecryptInvalidCiphertext(ParquetCipher::type cipher_type) { + bool metadata = false; + bool write_length = true; + + AesDecryptor decryptor(cipher_type, key_length_, metadata, write_length); + + // Create ciphertext of all zeros, so the ciphertext length will be read as zero + const int ciphertext_length = 100; + std::vector ciphertext(ciphertext_length, '\0'); + + int expected_plaintext_length = decryptor.PlaintextLength(ciphertext_length); + std::vector decrypted_text(expected_plaintext_length, '\0'); + + EXPECT_THROW( + decryptor.Decrypt(ciphertext, str2span(key_), str2span(aad_), decrypted_text), + ParquetException); + } + + void DecryptCiphertextBufferTooSmall(ParquetCipher::type cipher_type) { + bool metadata = false; + bool write_length = true; + + AesEncryptor encryptor(cipher_type, key_length_, metadata, write_length); + + int32_t expected_ciphertext_len = + encryptor.CiphertextLength(static_cast(plain_text_.size())); + std::vector ciphertext(expected_ciphertext_len, '\0'); + + int ciphertext_length = encryptor.Encrypt(str2span(plain_text_), str2span(key_), + str2span(aad_), ciphertext); + + AesDecryptor decryptor(cipher_type, key_length_, metadata, write_length); + + int expected_plaintext_length = decryptor.PlaintextLength(ciphertext_length); + std::vector decrypted_text(expected_plaintext_length, '\0'); + + ::arrow::util::span truncated_ciphertext(ciphertext.data(), + ciphertext_length - 1); + EXPECT_THROW(decryptor.Decrypt(truncated_ciphertext, str2span(key_), str2span(aad_), + decrypted_text), + ParquetException); + } + + private: + int key_length_ = 0; + std::string key_; + std::string aad_; + std::string plain_text_; +}; + +TEST_F(TestAesEncryption, AesGcmRoundTrip) { + EncryptionRoundTrip(ParquetCipher::AES_GCM_V1, /*write_length=*/true); + EncryptionRoundTrip(ParquetCipher::AES_GCM_V1, /*write_length=*/false); +} + +TEST_F(TestAesEncryption, AesGcmCtrRoundTrip) { + EncryptionRoundTrip(ParquetCipher::AES_GCM_CTR_V1, /*write_length=*/true); + EncryptionRoundTrip(ParquetCipher::AES_GCM_CTR_V1, /*write_length=*/false); +} + +TEST_F(TestAesEncryption, AesGcmDecryptInvalidCiphertext) { + DecryptInvalidCiphertext(ParquetCipher::AES_GCM_V1); +} + +TEST_F(TestAesEncryption, AesGcmCtrDecryptInvalidCiphertext) { + DecryptInvalidCiphertext(ParquetCipher::AES_GCM_CTR_V1); +} + +TEST_F(TestAesEncryption, AesGcmDecryptCiphertextBufferTooSmall) { + DecryptCiphertextBufferTooSmall(ParquetCipher::AES_GCM_V1); +} + +TEST_F(TestAesEncryption, AesGcmCtrDecryptCiphertextBufferTooSmall) { + DecryptCiphertextBufferTooSmall(ParquetCipher::AES_GCM_CTR_V1); +} + +} // namespace parquet::encryption::test diff --git a/cpp/src/parquet/encryption/internal_file_decryptor.cc b/cpp/src/parquet/encryption/internal_file_decryptor.cc index c4416df90b121..fae5ce1f7a809 100644 --- a/cpp/src/parquet/encryption/internal_file_decryptor.cc +++ b/cpp/src/parquet/encryption/internal_file_decryptor.cc @@ -27,19 +27,23 @@ namespace parquet { Decryptor::Decryptor(std::shared_ptr aes_decryptor, const std::string& key, const std::string& file_aad, const std::string& aad, ::arrow::MemoryPool* pool) - : aes_decryptor_(aes_decryptor), + : aes_decryptor_(std::move(aes_decryptor)), key_(key), file_aad_(file_aad), aad_(aad), pool_(pool) {} -int Decryptor::CiphertextSizeDelta() { return aes_decryptor_->CiphertextSizeDelta(); } +int Decryptor::PlaintextLength(int ciphertext_len) const { + return aes_decryptor_->PlaintextLength(ciphertext_len); +} + +int Decryptor::CiphertextLength(int plaintext_len) const { + return aes_decryptor_->CiphertextLength(plaintext_len); +} -int Decryptor::Decrypt(const uint8_t* ciphertext, int ciphertext_len, - uint8_t* plaintext) { - return aes_decryptor_->Decrypt(ciphertext, ciphertext_len, str2bytes(key_), - static_cast(key_.size()), str2bytes(aad_), - static_cast(aad_.size()), plaintext); +int Decryptor::Decrypt(::arrow::util::span ciphertext, + ::arrow::util::span plaintext) { + return aes_decryptor_->Decrypt(ciphertext, str2span(key_), str2span(aad_), plaintext); } // InternalFileDecryptor @@ -152,9 +156,9 @@ std::shared_ptr InternalFileDecryptor::GetFooterDecryptor( } footer_metadata_decryptor_ = std::make_shared( - aes_metadata_decryptor, footer_key, file_aad_, aad, pool_); - footer_data_decryptor_ = - std::make_shared(aes_data_decryptor, footer_key, file_aad_, aad, pool_); + std::move(aes_metadata_decryptor), footer_key, file_aad_, aad, pool_); + footer_data_decryptor_ = std::make_shared(std::move(aes_data_decryptor), + footer_key, file_aad_, aad, pool_); if (metadata) return footer_metadata_decryptor_; return footer_data_decryptor_; diff --git a/cpp/src/parquet/encryption/internal_file_decryptor.h b/cpp/src/parquet/encryption/internal_file_decryptor.h index f12cdefbe67a7..8af3587acf884 100644 --- a/cpp/src/parquet/encryption/internal_file_decryptor.h +++ b/cpp/src/parquet/encryption/internal_file_decryptor.h @@ -45,8 +45,10 @@ class PARQUET_EXPORT Decryptor { void UpdateAad(const std::string& aad) { aad_ = aad; } ::arrow::MemoryPool* pool() { return pool_; } - int CiphertextSizeDelta(); - int Decrypt(const uint8_t* ciphertext, int ciphertext_len, uint8_t* plaintext); + [[nodiscard]] int PlaintextLength(int ciphertext_len) const; + [[nodiscard]] int CiphertextLength(int plaintext_len) const; + int Decrypt(::arrow::util::span ciphertext, + ::arrow::util::span plaintext); private: std::shared_ptr aes_decryptor_; diff --git a/cpp/src/parquet/encryption/internal_file_encryptor.cc b/cpp/src/parquet/encryption/internal_file_encryptor.cc index 15bf52b84dd1b..285c2100be813 100644 --- a/cpp/src/parquet/encryption/internal_file_encryptor.cc +++ b/cpp/src/parquet/encryption/internal_file_encryptor.cc @@ -31,12 +31,13 @@ Encryptor::Encryptor(encryption::AesEncryptor* aes_encryptor, const std::string& aad_(aad), pool_(pool) {} -int Encryptor::CiphertextSizeDelta() { return aes_encryptor_->CiphertextSizeDelta(); } +int32_t Encryptor::CiphertextLength(int64_t plaintext_len) const { + return aes_encryptor_->CiphertextLength(plaintext_len); +} -int Encryptor::Encrypt(const uint8_t* plaintext, int plaintext_len, uint8_t* ciphertext) { - return aes_encryptor_->Encrypt(plaintext, plaintext_len, str2bytes(key_), - static_cast(key_.size()), str2bytes(aad_), - static_cast(aad_.size()), ciphertext); +int Encryptor::Encrypt(::arrow::util::span plaintext, + ::arrow::util::span ciphertext) { + return aes_encryptor_->Encrypt(plaintext, str2span(key_), str2span(aad_), ciphertext); } // InternalFileEncryptor @@ -52,8 +53,15 @@ InternalFileEncryptor::InternalFileEncryptor(FileEncryptionProperties* propertie void InternalFileEncryptor::WipeOutEncryptionKeys() { properties_->WipeOutEncryptionKeys(); - for (auto const& i : all_encryptors_) { - i->WipeOut(); + for (auto const& i : meta_encryptor_) { + if (i != nullptr) { + i->WipeOut(); + } + } + for (auto const& i : data_encryptor_) { + if (i != nullptr) { + i->WipeOut(); + } } } @@ -135,7 +143,7 @@ InternalFileEncryptor::InternalFileEncryptor::GetColumnEncryptor( return encryptor; } -int InternalFileEncryptor::MapKeyLenToEncryptorArrayIndex(int key_len) { +int InternalFileEncryptor::MapKeyLenToEncryptorArrayIndex(int key_len) const { if (key_len == 16) return 0; else if (key_len == 24) @@ -150,8 +158,7 @@ encryption::AesEncryptor* InternalFileEncryptor::GetMetaAesEncryptor( int key_len = static_cast(key_size); int index = MapKeyLenToEncryptorArrayIndex(key_len); if (meta_encryptor_[index] == nullptr) { - meta_encryptor_[index].reset( - encryption::AesEncryptor::Make(algorithm, key_len, true, &all_encryptors_)); + meta_encryptor_[index] = encryption::AesEncryptor::Make(algorithm, key_len, true); } return meta_encryptor_[index].get(); } @@ -161,8 +168,7 @@ encryption::AesEncryptor* InternalFileEncryptor::GetDataAesEncryptor( int key_len = static_cast(key_size); int index = MapKeyLenToEncryptorArrayIndex(key_len); if (data_encryptor_[index] == nullptr) { - data_encryptor_[index].reset( - encryption::AesEncryptor::Make(algorithm, key_len, false, &all_encryptors_)); + data_encryptor_[index] = encryption::AesEncryptor::Make(algorithm, key_len, false); } return data_encryptor_[index].get(); } diff --git a/cpp/src/parquet/encryption/internal_file_encryptor.h b/cpp/src/parquet/encryption/internal_file_encryptor.h index 3cbe53500c2c5..91b6e9fe5aa2f 100644 --- a/cpp/src/parquet/encryption/internal_file_encryptor.h +++ b/cpp/src/parquet/encryption/internal_file_encryptor.h @@ -43,8 +43,10 @@ class PARQUET_EXPORT Encryptor { void UpdateAad(const std::string& aad) { aad_ = aad; } ::arrow::MemoryPool* pool() { return pool_; } - int CiphertextSizeDelta(); - int Encrypt(const uint8_t* plaintext, int plaintext_len, uint8_t* ciphertext); + [[nodiscard]] int32_t CiphertextLength(int64_t plaintext_len) const; + + int Encrypt(::arrow::util::span plaintext, + ::arrow::util::span ciphertext); bool EncryptColumnMetaData( bool encrypted_footer, @@ -86,8 +88,6 @@ class InternalFileEncryptor { std::shared_ptr footer_signing_encryptor_; std::shared_ptr footer_encryptor_; - std::vector all_encryptors_; - // Key must be 16, 24 or 32 bytes in length. Thus there could be up to three // types of meta_encryptors and data_encryptors. std::unique_ptr meta_encryptor_[3]; @@ -103,7 +103,7 @@ class InternalFileEncryptor { encryption::AesEncryptor* GetDataAesEncryptor(ParquetCipher::type algorithm, size_t key_len); - int MapKeyLenToEncryptorArrayIndex(int key_len); + int MapKeyLenToEncryptorArrayIndex(int key_len) const; }; } // namespace parquet diff --git a/cpp/src/parquet/encryption/key_toolkit_internal.cc b/cpp/src/parquet/encryption/key_toolkit_internal.cc index bdd65d8de3919..5d7925aa0318f 100644 --- a/cpp/src/parquet/encryption/key_toolkit_internal.cc +++ b/cpp/src/parquet/encryption/key_toolkit_internal.cc @@ -32,15 +32,14 @@ std::string EncryptKeyLocally(const std::string& key_bytes, const std::string& m static_cast(master_key.size()), false, false /*write_length*/); - int encrypted_key_len = - static_cast(key_bytes.size()) + key_encryptor.CiphertextSizeDelta(); + int32_t encrypted_key_len = + key_encryptor.CiphertextLength(static_cast(key_bytes.size())); std::string encrypted_key(encrypted_key_len, '\0'); - encrypted_key_len = key_encryptor.Encrypt( - reinterpret_cast(key_bytes.data()), - static_cast(key_bytes.size()), - reinterpret_cast(master_key.data()), - static_cast(master_key.size()), reinterpret_cast(aad.data()), - static_cast(aad.size()), reinterpret_cast(&encrypted_key[0])); + ::arrow::util::span encrypted_key_span( + reinterpret_cast(&encrypted_key[0]), encrypted_key_len); + + encrypted_key_len = key_encryptor.Encrypt(str2span(key_bytes), str2span(master_key), + str2span(aad), encrypted_key_span); return ::arrow::util::base64_encode( ::std::string_view(encrypted_key.data(), encrypted_key_len)); @@ -55,15 +54,13 @@ std::string DecryptKeyLocally(const std::string& encoded_encrypted_key, false /*contains_length*/); int decrypted_key_len = - static_cast(encrypted_key.size()) - key_decryptor.CiphertextSizeDelta(); + key_decryptor.PlaintextLength(static_cast(encrypted_key.size())); std::string decrypted_key(decrypted_key_len, '\0'); + ::arrow::util::span decrypted_key_span( + reinterpret_cast(&decrypted_key[0]), decrypted_key_len); - decrypted_key_len = key_decryptor.Decrypt( - reinterpret_cast(encrypted_key.data()), - static_cast(encrypted_key.size()), - reinterpret_cast(master_key.data()), - static_cast(master_key.size()), reinterpret_cast(aad.data()), - static_cast(aad.size()), reinterpret_cast(&decrypted_key[0])); + decrypted_key_len = key_decryptor.Decrypt(str2span(encrypted_key), str2span(master_key), + str2span(aad), decrypted_key_span); return decrypted_key; } diff --git a/cpp/src/parquet/encryption/read_configurations_test.cc b/cpp/src/parquet/encryption/read_configurations_test.cc index 94fb6362269e2..f450f9274c261 100644 --- a/cpp/src/parquet/encryption/read_configurations_test.cc +++ b/cpp/src/parquet/encryption/read_configurations_test.cc @@ -22,6 +22,7 @@ #include "arrow/io/file.h" #include "arrow/testing/gtest_compat.h" +#include "arrow/util/config.h" #include "parquet/column_reader.h" #include "parquet/column_writer.h" diff --git a/cpp/src/parquet/level_conversion_inc.h b/cpp/src/parquet/level_conversion_inc.h index d1ccedabfde50..3accb154e6f5a 100644 --- a/cpp/src/parquet/level_conversion_inc.h +++ b/cpp/src/parquet/level_conversion_inc.h @@ -296,7 +296,7 @@ template int64_t DefLevelsBatchToBitmap(const int16_t* def_levels, const int64_t batch_size, int64_t upper_bound_remaining, LevelInfo level_info, ::arrow::internal::FirstTimeBitmapWriter* writer) { - DCHECK_LE(batch_size, kExtractBitsSize); + ARROW_DCHECK_LE(batch_size, kExtractBitsSize); // Greater than level_info.def_level - 1 implies >= the def_level auto defined_bitmap = static_cast( diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index b24883cdc160b..139793219df90 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include #include @@ -29,6 +31,7 @@ #include "arrow/io/memory.h" #include "arrow/util/key_value_metadata.h" #include "arrow/util/logging.h" +#include "arrow/util/pcg_random.h" #include "parquet/encryption/encryption_internal.h" #include "parquet/encryption/internal_file_decryptor.h" #include "parquet/exception.h" @@ -599,6 +602,49 @@ std::vector RowGroupMetaData::sorting_columns() const { return impl_->sorting_columns(); } +// Replace string data with random-generated uppercase characters +static void Scrub(std::string* s) { + static ::arrow::random::pcg64 rng; + std::uniform_int_distribution<> caps(65, 90); + for (auto& c : *s) c = caps(rng); +} + +// Replace potentially sensitive metadata with random data +static void Scrub(format::FileMetaData* md) { + for (auto& s : md->schema) { + Scrub(&s.name); + } + for (auto& r : md->row_groups) { + for (auto& c : r.columns) { + Scrub(&c.file_path); + if (c.__isset.meta_data) { + auto& m = c.meta_data; + for (auto& p : m.path_in_schema) Scrub(&p); + for (auto& kv : m.key_value_metadata) { + Scrub(&kv.key); + Scrub(&kv.value); + } + Scrub(&m.statistics.max_value); + Scrub(&m.statistics.min_value); + Scrub(&m.statistics.min); + Scrub(&m.statistics.max); + } + + if (c.crypto_metadata.__isset.ENCRYPTION_WITH_COLUMN_KEY) { + auto& m = c.crypto_metadata.ENCRYPTION_WITH_COLUMN_KEY; + for (auto& p : m.path_in_schema) Scrub(&p); + Scrub(&m.key_metadata); + } + Scrub(&c.encrypted_column_metadata); + } + } + for (auto& kv : md->key_value_metadata) { + Scrub(&kv.key); + Scrub(&kv.value); + } + Scrub(&md->footer_signing_key_metadata); +} + // file metadata class FileMetaData::FileMetaDataImpl { public: @@ -640,29 +686,28 @@ class FileMetaData::FileMetaDataImpl { uint32_t serialized_len = metadata_len_; ThriftSerializer serializer; serializer.SerializeToBuffer(metadata_.get(), &serialized_len, &serialized_data); + ::arrow::util::span serialized_data_span(serialized_data, + serialized_len); // encrypt with nonce - auto nonce = const_cast(reinterpret_cast(signature)); - auto tag = const_cast(reinterpret_cast(signature)) + - encryption::kNonceLength; + ::arrow::util::span nonce(reinterpret_cast(signature), + encryption::kNonceLength); + auto tag = reinterpret_cast(signature) + encryption::kNonceLength; std::string key = file_decryptor_->GetFooterKey(); std::string aad = encryption::CreateFooterAad(file_decryptor_->file_aad()); - auto aes_encryptor = encryption::AesEncryptor::Make( - file_decryptor_->algorithm(), static_cast(key.size()), true, - false /*write_length*/, nullptr); + auto aes_encryptor = encryption::AesEncryptor::Make(file_decryptor_->algorithm(), + static_cast(key.size()), + true, false /*write_length*/); - std::shared_ptr encrypted_buffer = std::static_pointer_cast( - AllocateBuffer(file_decryptor_->pool(), - aes_encryptor->CiphertextSizeDelta() + serialized_len)); + std::shared_ptr encrypted_buffer = AllocateBuffer( + file_decryptor_->pool(), aes_encryptor->CiphertextLength(serialized_len)); uint32_t encrypted_len = aes_encryptor->SignedFooterEncrypt( - serialized_data, serialized_len, str2bytes(key), static_cast(key.size()), - str2bytes(aad), static_cast(aad.size()), nonce, - encrypted_buffer->mutable_data()); + serialized_data_span, str2span(key), str2span(aad), nonce, + encrypted_buffer->mutable_span_as()); // Delete AES encryptor object. It was created only to verify the footer signature. aes_encryptor->WipeOut(); - delete aes_encryptor; return 0 == memcmp(encrypted_buffer->data() + encrypted_len - encryption::kGcmTagLength, tag, encryption::kGcmTagLength); @@ -701,12 +746,12 @@ class FileMetaData::FileMetaDataImpl { uint8_t* serialized_data; uint32_t serialized_len; serializer.SerializeToBuffer(metadata_.get(), &serialized_len, &serialized_data); + ::arrow::util::span serialized_data_span(serialized_data, + serialized_len); // encrypt the footer key - std::vector encrypted_data(encryptor->CiphertextSizeDelta() + - serialized_len); - unsigned encrypted_len = - encryptor->Encrypt(serialized_data, serialized_len, encrypted_data.data()); + std::vector encrypted_data(encryptor->CiphertextLength(serialized_len)); + int encrypted_len = encryptor->Encrypt(serialized_data_span, encrypted_data); // write unencrypted footer PARQUET_THROW_NOT_OK(dst->Write(serialized_data, serialized_len)); @@ -822,6 +867,21 @@ class FileMetaData::FileMetaDataImpl { return out; } + std::string SerializeUnencrypted(bool scrub, bool debug) const { + auto md = *metadata_; + if (scrub) Scrub(&md); + if (debug) { + std::ostringstream ss; + md.printTo(ss); + return ss.str(); + } else { + ThriftSerializer serializer; + std::string out; + serializer.SerializeToString(&md, &out); + return out; + } + } + void set_file_decryptor(std::shared_ptr file_decryptor) { file_decryptor_ = std::move(file_decryptor); } @@ -993,6 +1053,10 @@ std::shared_ptr FileMetaData::Subset( return impl_->Subset(row_groups); } +std::string FileMetaData::SerializeUnencrypted(bool scrub, bool debug) const { + return impl_->SerializeUnencrypted(scrub, debug); +} + void FileMetaData::WriteTo(::arrow::io::OutputStream* dst, const std::shared_ptr& encryptor) const { return impl_->WriteTo(dst, encryptor); @@ -1360,9 +1424,9 @@ class ApplicationVersionParser { // For parsing. std::string spaces_; std::string digits_; - size_t version_parsing_position_; - size_t version_start_; - size_t version_end_; + size_t version_parsing_position_{0}; + size_t version_start_{0}; + size_t version_end_{0}; std::string version_string_; }; } // namespace @@ -1559,11 +1623,11 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl { serializer.SerializeToBuffer(&column_chunk_->meta_data, &serialized_len, &serialized_data); + ::arrow::util::span serialized_data_span(serialized_data, + serialized_len); - std::vector encrypted_data(encryptor->CiphertextSizeDelta() + - serialized_len); - unsigned encrypted_len = - encryptor->Encrypt(serialized_data, serialized_len, encrypted_data.data()); + std::vector encrypted_data(encryptor->CiphertextLength(serialized_len)); + int encrypted_len = encryptor->Encrypt(serialized_data_span, encrypted_data); const char* temp = const_cast(reinterpret_cast(encrypted_data.data())); diff --git a/cpp/src/parquet/metadata.h b/cpp/src/parquet/metadata.h index 9fc30df58e0d3..e02d2e7c852f0 100644 --- a/cpp/src/parquet/metadata.h +++ b/cpp/src/parquet/metadata.h @@ -396,6 +396,13 @@ class PARQUET_EXPORT FileMetaData { /// FileMetaData. std::shared_ptr Subset(const std::vector& row_groups) const; + /// \brief Serialize metadata unencrypted as string + /// + /// \param[in] scrub whether to remove sensitive information from the metadata. + /// \param[in] debug whether to serialize the metadata as Thrift (if false) or + /// debug text (if true). + std::string SerializeUnencrypted(bool scrub, bool debug) const; + private: friend FileMetaDataBuilder; friend class SerializedFile; diff --git a/cpp/src/parquet/schema.cc b/cpp/src/parquet/schema.cc index 4ddeef9e83975..47fa72d829658 100644 --- a/cpp/src/parquet/schema.cc +++ b/cpp/src/parquet/schema.cc @@ -600,7 +600,7 @@ class SchemaVisitor : public Node::ConstVisitor { void Visit(const Node* node) override { format::SchemaElement element; node->ToParquet(&element); - elements_->push_back(element); + elements_->push_back(std::move(element)); if (node->is_group()) { const GroupNode* group_node = static_cast(node); @@ -671,7 +671,7 @@ static void PrintType(const PrimitiveNode* node, std::ostream& stream) { static void PrintConvertedType(const PrimitiveNode* node, std::ostream& stream) { auto lt = node->converted_type(); - auto la = node->logical_type(); + const auto& la = node->logical_type(); if (la && la->is_valid() && !la->is_none()) { stream << " (" << la->ToString() << ")"; } else if (lt == ConvertedType::DECIMAL) { @@ -718,7 +718,7 @@ struct SchemaPrinter : public Node::ConstVisitor { stream_ << " group " << "field_id=" << node->field_id() << " " << node->name(); auto lt = node->converted_type(); - auto la = node->logical_type(); + const auto& la = node->logical_type(); if (la && la->is_valid() && !la->is_none()) { stream_ << " (" << la->ToString() << ")"; } else if (lt != ConvertedType::NONE) { diff --git a/cpp/src/parquet/schema.h b/cpp/src/parquet/schema.h index 896ec1e47968d..1addc73bd367d 100644 --- a/cpp/src/parquet/schema.h +++ b/cpp/src/parquet/schema.h @@ -175,7 +175,7 @@ class PARQUET_EXPORT Node { Node::type type_; std::string name_; Repetition::type repetition_; - ConvertedType::type converted_type_; + ConvertedType::type converted_type_{ConvertedType::NONE}; std::shared_ptr logical_type_; int field_id_; // Nodes should not be shared, they have a single parent. @@ -280,7 +280,8 @@ class PARQUET_EXPORT GroupNode : public Node { const NodeVector& fields, std::shared_ptr logical_type, int field_id = -1) { - return NodePtr(new GroupNode(name, repetition, fields, logical_type, field_id)); + return NodePtr( + new GroupNode(name, repetition, fields, std::move(logical_type), field_id)); } bool Equals(const Node* other) const override; @@ -376,7 +377,7 @@ class PARQUET_EXPORT ColumnDescriptor { ColumnOrder column_order() const { return primitive_node_->column_order(); } SortOrder::type sort_order() const { - auto la = logical_type(); + const auto& la = logical_type(); auto pt = physical_type(); return la ? GetSortOrder(la, pt) : GetSortOrder(converted_type(), pt); } @@ -416,8 +417,8 @@ class PARQUET_EXPORT ColumnDescriptor { // TODO(wesm): this object can be recomputed from a Schema class PARQUET_EXPORT SchemaDescriptor { public: - SchemaDescriptor() {} - ~SchemaDescriptor() {} + SchemaDescriptor() = default; + ~SchemaDescriptor() = default; // Analyze the schema void Init(std::unique_ptr schema); @@ -464,6 +465,7 @@ class PARQUET_EXPORT SchemaDescriptor { // Root Node schema::NodePtr schema_; // Root Node + // Would never be NULLPTR. const schema::GroupNode* group_node_; void BuildTree(const schema::NodePtr& node, int16_t max_def_level, diff --git a/cpp/src/parquet/thrift_internal.h b/cpp/src/parquet/thrift_internal.h index 9e02331b44ba0..b21b0e07afba2 100644 --- a/cpp/src/parquet/thrift_internal.h +++ b/cpp/src/parquet/thrift_internal.h @@ -411,17 +411,21 @@ class ThriftDeserializer { // thrift message is encrypted uint32_t clen; clen = *len; + if (clen > static_cast(std::numeric_limits::max())) { + std::stringstream ss; + ss << "Cannot decrypt buffer with length " << clen << ", which overflows int32\n"; + throw ParquetException(ss.str()); + } // decrypt - auto decrypted_buffer = std::static_pointer_cast( - AllocateBuffer(decryptor->pool(), - static_cast(clen - decryptor->CiphertextSizeDelta()))); - const uint8_t* cipher_buf = buf; + auto decrypted_buffer = AllocateBuffer( + decryptor->pool(), decryptor->PlaintextLength(static_cast(clen))); + ::arrow::util::span cipher_buf(buf, clen); uint32_t decrypted_buffer_len = - decryptor->Decrypt(cipher_buf, 0, decrypted_buffer->mutable_data()); + decryptor->Decrypt(cipher_buf, decrypted_buffer->mutable_span_as()); if (decrypted_buffer_len <= 0) { throw ParquetException("Couldn't decrypt buffer\n"); } - *len = decrypted_buffer_len + decryptor->CiphertextSizeDelta(); + *len = decryptor->CiphertextLength(static_cast(decrypted_buffer_len)); DeserializeUnencryptedMessage(decrypted_buffer->data(), &decrypted_buffer_len, deserialized_msg); } @@ -521,13 +525,13 @@ class ThriftSerializer { } } - int64_t SerializeEncryptedObj(ArrowOutputStream* out, uint8_t* out_buffer, + int64_t SerializeEncryptedObj(ArrowOutputStream* out, const uint8_t* out_buffer, uint32_t out_length, Encryptor* encryptor) { - auto cipher_buffer = std::static_pointer_cast(AllocateBuffer( - encryptor->pool(), - static_cast(encryptor->CiphertextSizeDelta() + out_length))); + auto cipher_buffer = + AllocateBuffer(encryptor->pool(), encryptor->CiphertextLength(out_length)); + ::arrow::util::span out_span(out_buffer, out_length); int cipher_buffer_len = - encryptor->Encrypt(out_buffer, out_length, cipher_buffer->mutable_data()); + encryptor->Encrypt(out_span, cipher_buffer->mutable_span_as()); PARQUET_THROW_NOT_OK(out->Write(cipher_buffer->data(), cipher_buffer_len)); return static_cast(cipher_buffer_len); diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index f080cf58bca85..ab988badec145 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -72,8 +72,8 @@ ARROW_GFLAGS_BUILD_VERSION=v2.2.2 ARROW_GFLAGS_BUILD_SHA256_CHECKSUM=34af2f15cf7367513b352bdcd2493ab14ce43692d2dcd9dfc499492966c64dcf ARROW_GLOG_BUILD_VERSION=v0.5.0 ARROW_GLOG_BUILD_SHA256_CHECKSUM=eede71f28371bf39aa69b45de23b329d37214016e2055269b3b5e7cfd40b59f5 -ARROW_GOOGLE_CLOUD_CPP_BUILD_VERSION=v2.12.0 -ARROW_GOOGLE_CLOUD_CPP_BUILD_SHA256_CHECKSUM=8cda870803925c62de8716a765e03eb9d34249977e5cdb7d0d20367e997a55e2 +ARROW_GOOGLE_CLOUD_CPP_BUILD_VERSION=v2.22.0 +ARROW_GOOGLE_CLOUD_CPP_BUILD_SHA256_CHECKSUM=0c68782e57959c82e0c81def805c01460a042c1aae0c2feee905acaa2a2dc9bf ARROW_GRPC_BUILD_VERSION=v1.46.3 ARROW_GRPC_BUILD_SHA256_CHECKSUM=d6cbf22cb5007af71b61c6be316a79397469c58c82a942552a62e708bce60964 ARROW_GTEST_BUILD_VERSION=1.11.0 @@ -90,8 +90,8 @@ ARROW_OPENTELEMETRY_BUILD_VERSION=v1.13.0 ARROW_OPENTELEMETRY_BUILD_SHA256_CHECKSUM=7735cc56507149686e6019e06f588317099d4522480be5f38a2a09ec69af1706 ARROW_OPENTELEMETRY_PROTO_BUILD_VERSION=v0.17.0 ARROW_OPENTELEMETRY_PROTO_BUILD_SHA256_CHECKSUM=f269fbcb30e17b03caa1decd231ce826e59d7651c0f71c3b28eb5140b4bb5412 -ARROW_ORC_BUILD_VERSION=2.0.0 -ARROW_ORC_BUILD_SHA256_CHECKSUM=9107730919c29eb39efaff1b9e36166634d1d4d9477e5fee76bfd6a8fec317df +ARROW_ORC_BUILD_VERSION=2.0.1 +ARROW_ORC_BUILD_SHA256_CHECKSUM=1ffac0228aa83f04a1b1cf2788a3af5953e82587ae3a77c41900e99f2557132d ARROW_PROTOBUF_BUILD_VERSION=v21.3 ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM=2f723218f6cb709ae4cdc4fb5ed56a5951fc5d466f0128ce4c946b8c78c8c49f # Because of https://github.com/Tencent/rapidjson/pull/1323, we require diff --git a/cpp/tools/parquet/CMakeLists.txt b/cpp/tools/parquet/CMakeLists.txt index 81ab49421d0f6..87c3254607589 100644 --- a/cpp/tools/parquet/CMakeLists.txt +++ b/cpp/tools/parquet/CMakeLists.txt @@ -16,7 +16,7 @@ # under the License. if(PARQUET_BUILD_EXECUTABLES) - set(PARQUET_TOOLS parquet-dump-schema parquet-reader parquet-scan) + set(PARQUET_TOOLS parquet-dump-footer parquet-dump-schema parquet-reader parquet-scan) foreach(TOOL ${PARQUET_TOOLS}) string(REGEX REPLACE "-" "_" TOOL_SOURCE ${TOOL}) diff --git a/cpp/tools/parquet/parquet_dump_footer.cc b/cpp/tools/parquet/parquet_dump_footer.cc new file mode 100644 index 0000000000000..4dd7476bc8ea3 --- /dev/null +++ b/cpp/tools/parquet/parquet_dump_footer.cc @@ -0,0 +1,135 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include + +#include "arrow/filesystem/filesystem.h" +#include "arrow/util/endian.h" +#include "arrow/util/ubsan.h" +#include "parquet/metadata.h" + +namespace parquet { +namespace { +uint32_t ReadLE32(const void* p) { + uint32_t x = ::arrow::util::SafeLoadAs(static_cast(p)); + return ::arrow::bit_util::FromLittleEndian(x); +} + +void AppendLE32(uint32_t v, std::string* out) { + v = ::arrow::bit_util::ToLittleEndian(v); + out->append(reinterpret_cast(&v), sizeof(v)); +} + +int DoIt(std::string in, bool scrub, bool debug, std::string out) { + std::string path; + auto fs = ::arrow::fs::FileSystemFromUriOrPath(in, &path).ValueOrDie(); + auto file = fs->OpenInputFile(path).ValueOrDie(); + int64_t file_len = file->GetSize().ValueOrDie(); + if (file_len < 8) { + std::cerr << "File too short: " << in << "\n"; + return 3; + } + // First do an opportunistic read of up to 1 MiB to try and get the entire footer. + int64_t tail_len = std::min(file_len, int64_t{1} << 20); + std::string tail; + tail.resize(tail_len); + char* data = tail.data(); + file->ReadAt(file_len - tail_len, tail_len, data).ValueOrDie(); + if (auto magic = ReadLE32(data + tail_len - 4); magic != ReadLE32("PAR1")) { + std::cerr << "Not a Parquet file: " << in << "\n"; + return 4; + } + uint32_t metadata_len = ReadLE32(data + tail_len - 8); + if (tail_len >= metadata_len + 8) { + // The footer is entirely in the initial read. Trim to size. + tail = tail.substr(tail_len - (metadata_len + 8)); + } else { + // The footer is larger than the initial read, read again the exact size. + if (metadata_len > file_len) { + std::cerr << "File too short: " << in << "\n"; + return 5; + } + tail_len = metadata_len + 8; + tail.resize(tail_len); + data = tail.data(); + file->ReadAt(file_len - tail_len, tail_len, data).ValueOrDie(); + } + auto md = FileMetaData::Make(tail.data(), &metadata_len); + std::string ser = md->SerializeUnencrypted(scrub, debug); + if (!debug) { + AppendLE32(static_cast(ser.size()), &ser); + ser.append("PAR1", 4); + } + std::optional fout; + if (!out.empty()) fout.emplace(out, std::ios::out); + std::ostream& os = fout ? *fout : std::cout; + if (!os.write(ser.data(), ser.size())) { + std::cerr << "Failed to write to output file: " << out << "\n"; + return 6; + } + + return 0; +} +} // namespace +} // namespace parquet + +static int PrintHelp() { + std::cerr << R"(Usage: parquet-dump-footer + -h|--help Print help and exit + --no-scrub Do not scrub potentially confidential metadata + --debug Output text represenation of footer for inspection + --in Input file (required): must be an URI or an absolute local path + --out Output file (optional, default stdout) + + Dump the footer of a Parquet file to stdout or to a file, optionally with + potentially confidential metadata scrubbed. +)"; + return 1; +} + +int main(int argc, char** argv) { + bool scrub = true; + bool debug = false; + std::string in; + std::string out; + for (int i = 1; i < argc; i++) { + char* arg = argv[i]; + if (!std::strcmp(arg, "-h") || !std::strcmp(arg, "--help")) { + return PrintHelp(); + } else if (!std::strcmp(arg, "--no-scrub")) { + scrub = false; + } else if (!std::strcmp(arg, "--debug")) { + debug = true; + } else if (!std::strcmp(arg, "--in")) { + if (i + 1 >= argc) return PrintHelp(); + in = argv[++i]; + } else if (!std::strcmp(arg, "--out")) { + if (i + 1 >= argc) return PrintHelp(); + out = argv[++i]; + } else { + // Unknown option. + return PrintHelp(); + } + } + if (in.empty()) return PrintHelp(); + + return parquet::DoIt(in, scrub, debug, out); +} diff --git a/cpp/vcpkg.json b/cpp/vcpkg.json index f087a6d24c8f9..6f825b55cfd94 100644 --- a/cpp/vcpkg.json +++ b/cpp/vcpkg.json @@ -1,6 +1,6 @@ { "name": "arrow", - "version-string": "17.0.0-SNAPSHOT", + "version-string": "18.0.0-SNAPSHOT", "dependencies": [ "abseil", { diff --git a/csharp/Directory.Build.props b/csharp/Directory.Build.props index 3c06d3cd31d90..43c93238d6a7b 100644 --- a/csharp/Directory.Build.props +++ b/csharp/Directory.Build.props @@ -29,7 +29,7 @@ Apache Arrow library Copyright 2016-2024 The Apache Software Foundation The Apache Software Foundation - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT diff --git a/csharp/README.md b/csharp/README.md index b36eb899db2d5..663aaf8ab243c 100644 --- a/csharp/README.md +++ b/csharp/README.md @@ -129,7 +129,8 @@ for currently available features. - Types - Tensor - Arrays - - Large Arrays + - Large Arrays. There are large array types provided to help with interoperability with other libraries, + but these do not support buffers larger than 2 GiB and an exception will be raised if trying to import an array that is too large. - Large Binary - Large List - Large String diff --git a/csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj b/csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj index 2dd1d9d8f98e2..ac1f8c9bae77a 100644 --- a/csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj +++ b/csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj @@ -5,7 +5,7 @@ - + diff --git a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj index ee6d42c8d17fc..1870888184906 100644 --- a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj +++ b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj @@ -5,7 +5,7 @@ - + diff --git a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj index be85bd3090e92..5334f877873e4 100644 --- a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj +++ b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj @@ -5,9 +5,9 @@ - - - + + + diff --git a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs index 67c4b21a2e531..bd06c3a1b8b14 100644 --- a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs +++ b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs @@ -53,18 +53,24 @@ public static IArrowArray BuildArray(ArrayData data) return new StringArray(data); case ArrowTypeId.StringView: return new StringViewArray(data); + case ArrowTypeId.LargeString: + return new LargeStringArray(data); case ArrowTypeId.FixedSizedBinary: return new FixedSizeBinaryArray(data); case ArrowTypeId.Binary: return new BinaryArray(data); case ArrowTypeId.BinaryView: return new BinaryViewArray(data); + case ArrowTypeId.LargeBinary: + return new LargeBinaryArray(data); case ArrowTypeId.Timestamp: return new TimestampArray(data); case ArrowTypeId.List: return new ListArray(data); case ArrowTypeId.ListView: return new ListViewArray(data); + case ArrowTypeId.LargeList: + return new LargeListArray(data); case ArrowTypeId.Map: return new MapArray(data); case ArrowTypeId.Struct: diff --git a/csharp/src/Apache.Arrow/Arrays/LargeBinaryArray.cs b/csharp/src/Apache.Arrow/Arrays/LargeBinaryArray.cs new file mode 100644 index 0000000000000..9eddbedab54ed --- /dev/null +++ b/csharp/src/Apache.Arrow/Arrays/LargeBinaryArray.cs @@ -0,0 +1,154 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; +using System; +using System.Collections; +using System.Collections.Generic; +using System.Runtime.CompilerServices; + +namespace Apache.Arrow; + +public class LargeBinaryArray : Array, IReadOnlyList, ICollection +{ + public LargeBinaryArray(ArrayData data) + : base(data) + { + data.EnsureDataType(ArrowTypeId.LargeBinary); + data.EnsureBufferCount(3); + } + + public LargeBinaryArray(ArrowTypeId typeId, ArrayData data) + : base(data) + { + data.EnsureDataType(typeId); + data.EnsureBufferCount(3); + } + + public LargeBinaryArray(IArrowType dataType, int length, + ArrowBuffer valueOffsetsBuffer, + ArrowBuffer dataBuffer, + ArrowBuffer nullBitmapBuffer, + int nullCount = 0, int offset = 0) + : this(new ArrayData(dataType, length, nullCount, offset, + new[] { nullBitmapBuffer, valueOffsetsBuffer, dataBuffer })) + { } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + + public ArrowBuffer ValueOffsetsBuffer => Data.Buffers[1]; + + public ArrowBuffer ValueBuffer => Data.Buffers[2]; + + public ReadOnlySpan ValueOffsets => ValueOffsetsBuffer.Span.CastTo().Slice(Offset, Length + 1); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int GetValueLength(int index) + { + if (index < 0 || index >= Length) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + if (!IsValid(index)) + { + return 0; + } + + ReadOnlySpan offsets = ValueOffsets; + return checked((int)(offsets[index + 1] - offsets[index])); + } + + /// + /// Get the collection of bytes, as a read-only span, at a given index in the array. + /// + /// + /// Note that this method cannot reliably identify null values, which are indistinguishable from empty byte + /// collection values when seen in the context of this method's return type of . + /// Use the method or the overload instead + /// to reliably determine null values. + /// + /// Index at which to get bytes. + /// Returns a object. + /// If the index is negative or beyond the length of the array. + /// + public ReadOnlySpan GetBytes(int index) => GetBytes(index, out _); + + /// + /// Get the collection of bytes, as a read-only span, at a given index in the array. + /// + /// Index at which to get bytes. + /// Set to if the value at the given index is null. + /// Returns a object. + /// If the index is negative or beyond the length of the array. + /// + public ReadOnlySpan GetBytes(int index, out bool isNull) + { + if (index < 0 || index >= Length) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + isNull = IsNull(index); + + if (isNull) + { + // Note that `return null;` is valid syntax, but would be misleading as `null` in the context of a span + // is actually returned as an empty span. + return ReadOnlySpan.Empty; + } + + var offset = checked((int)ValueOffsets[index]); + return ValueBuffer.Span.Slice(offset, GetValueLength(index)); + } + + int IReadOnlyCollection.Count => Length; + + byte[] IReadOnlyList.this[int index] => GetBytes(index).ToArray(); + + IEnumerator IEnumerable.GetEnumerator() + { + for (int index = 0; index < Length; index++) + { + yield return GetBytes(index).ToArray(); + } + } + + IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable)this).GetEnumerator(); + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(byte[] item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(byte[] item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(byte[] item) + { + for (int index = 0; index < Length; index++) + { + if (GetBytes(index).SequenceEqual(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(byte[][] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetBytes(srcIndex).ToArray(); + } + } +} diff --git a/csharp/src/Apache.Arrow/Arrays/LargeListArray.cs b/csharp/src/Apache.Arrow/Arrays/LargeListArray.cs new file mode 100644 index 0000000000000..6e37aa4c63536 --- /dev/null +++ b/csharp/src/Apache.Arrow/Arrays/LargeListArray.cs @@ -0,0 +1,97 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using Apache.Arrow.Types; + +namespace Apache.Arrow +{ + public class LargeListArray : Array + { + public IArrowArray Values { get; } + + public ArrowBuffer ValueOffsetsBuffer => Data.Buffers[1]; + + public ReadOnlySpan ValueOffsets => ValueOffsetsBuffer.Span.CastTo().Slice(Offset, Length + 1); + + public LargeListArray(IArrowType dataType, int length, + ArrowBuffer valueOffsetsBuffer, IArrowArray values, + ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0) + : this(new ArrayData(dataType, length, nullCount, offset, + new[] { nullBitmapBuffer, valueOffsetsBuffer }, new[] { values.Data }), + values) + { + } + + public LargeListArray(ArrayData data) + : this(data, ArrowArrayFactory.BuildArray(data.Children[0])) + { + } + + private LargeListArray(ArrayData data, IArrowArray values) : base(data) + { + data.EnsureBufferCount(2); + data.EnsureDataType(ArrowTypeId.LargeList); + Values = values; + } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + + public int GetValueLength(int index) + { + if (index < 0 || index >= Length) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + if (IsNull(index)) + { + return 0; + } + + ReadOnlySpan offsets = ValueOffsets; + return checked((int)(offsets[index + 1] - offsets[index])); + } + + public IArrowArray GetSlicedValues(int index) + { + if (index < 0 || index >= Length) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + if (IsNull(index)) + { + return null; + } + + if (!(Values is Array array)) + { + return default; + } + + return array.Slice(checked((int)ValueOffsets[index]), GetValueLength(index)); + } + + protected override void Dispose(bool disposing) + { + if (disposing) + { + Values?.Dispose(); + } + base.Dispose(disposing); + } + } +} diff --git a/csharp/src/Apache.Arrow/Arrays/LargeStringArray.cs b/csharp/src/Apache.Arrow/Arrays/LargeStringArray.cs new file mode 100644 index 0000000000000..2a65b828acfa1 --- /dev/null +++ b/csharp/src/Apache.Arrow/Arrays/LargeStringArray.cs @@ -0,0 +1,113 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using Apache.Arrow.Types; + +namespace Apache.Arrow; + +public class LargeStringArray: LargeBinaryArray, IReadOnlyList, ICollection +{ + public static readonly Encoding DefaultEncoding = StringArray.DefaultEncoding; + + public LargeStringArray(ArrayData data) + : base(ArrowTypeId.LargeString, data) { } + + public LargeStringArray(int length, + ArrowBuffer valueOffsetsBuffer, + ArrowBuffer dataBuffer, + ArrowBuffer nullBitmapBuffer, + int nullCount = 0, int offset = 0) + : this(new ArrayData(LargeStringType.Default, length, nullCount, offset, + new[] { nullBitmapBuffer, valueOffsetsBuffer, dataBuffer })) + { } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + + /// + /// Get the string value at the given index + /// + /// Input index + /// Optional: the string encoding, default is UTF8 + /// The string object at the given index + public string GetString(int index, Encoding encoding = default) + { + encoding ??= DefaultEncoding; + + ReadOnlySpan bytes = GetBytes(index, out bool isNull); + + if (isNull) + { + return null; + } + + if (bytes.Length == 0) + { + return string.Empty; + } + + unsafe + { + fixed (byte* data = &MemoryMarshal.GetReference(bytes)) + { + return encoding.GetString(data, bytes.Length); + } + } + } + + + int IReadOnlyCollection.Count => Length; + + string IReadOnlyList.this[int index] => GetString(index); + + IEnumerator IEnumerable.GetEnumerator() + { + for (int index = 0; index < Length; index++) + { + yield return GetString(index); + }; + } + + IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable)this).GetEnumerator(); + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(string item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(string item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(string item) + { + for (int index = 0; index < Length; index++) + { + if (GetString(index) == item) + return true; + } + + return false; + } + + void ICollection.CopyTo(string[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetString(srcIndex); + } + } +} diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs index abe02dcbb591f..68b67f3d7c620 100644 --- a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs @@ -162,6 +162,10 @@ private ArrayData GetAsArrayData(CArrowArray* cArray, IArrowType type) case ArrowTypeId.BinaryView: buffers = ImportByteArrayViewBuffers(cArray); break; + case ArrowTypeId.LargeString: + case ArrowTypeId.LargeBinary: + buffers = ImportLargeByteArrayBuffers(cArray); + break; case ArrowTypeId.List: children = ProcessListChildren(cArray, ((ListType)type).ValueDataType); buffers = ImportListBuffers(cArray); @@ -170,6 +174,10 @@ private ArrayData GetAsArrayData(CArrowArray* cArray, IArrowType type) children = ProcessListChildren(cArray, ((ListViewType)type).ValueDataType); buffers = ImportListViewBuffers(cArray); break; + case ArrowTypeId.LargeList: + children = ProcessListChildren(cArray, ((LargeListType)type).ValueDataType); + buffers = ImportLargeListBuffers(cArray); + break; case ArrowTypeId.FixedSizeList: children = ProcessListChildren(cArray, ((FixedSizeListType)type).ValueDataType); buffers = ImportFixedSizeListBuffers(cArray); @@ -313,6 +321,42 @@ private ArrowBuffer[] ImportByteArrayViewBuffers(CArrowArray* cArray) return buffers; } + private ArrowBuffer[] ImportLargeByteArrayBuffers(CArrowArray* cArray) + { + if (cArray->n_buffers != 3) + { + throw new InvalidOperationException("Large byte arrays are expected to have exactly three buffers"); + } + + const int maxLength = int.MaxValue / 8 - 1; + if (cArray->length > maxLength) + { + throw new OverflowException( + $"Cannot import large byte array. Array length {cArray->length} " + + $"is greater than the maximum supported large byte array length ({maxLength})"); + } + + int length = (int)cArray->length; + int offsetsLength = (length + 1) * 8; + long* offsets = (long*)cArray->buffers[1]; + Debug.Assert(offsets != null); + long valuesLength = offsets[length]; + + if (valuesLength > int.MaxValue) + { + throw new OverflowException( + $"Cannot import large byte array. Data length {valuesLength} " + + $"is greater than the maximum supported large byte array data length ({int.MaxValue})"); + } + + ArrowBuffer[] buffers = new ArrowBuffer[3]; + buffers[0] = ImportValidityBuffer(cArray); + buffers[1] = ImportCArrayBuffer(cArray, 1, offsetsLength); + buffers[2] = ImportCArrayBuffer(cArray, 2, (int)valuesLength); + + return buffers; + } + private ArrowBuffer[] ImportListBuffers(CArrowArray* cArray) { if (cArray->n_buffers != 2) @@ -348,6 +392,31 @@ private ArrowBuffer[] ImportListViewBuffers(CArrowArray* cArray) return buffers; } + private ArrowBuffer[] ImportLargeListBuffers(CArrowArray* cArray) + { + if (cArray->n_buffers != 2) + { + throw new InvalidOperationException("Large list arrays are expected to have exactly two buffers"); + } + + const int maxLength = int.MaxValue / 8 - 1; + if (cArray->length > maxLength) + { + throw new OverflowException( + $"Cannot import large list array. Array length {cArray->length} " + + $"is greater than the maximum supported large list array length ({maxLength})"); + } + + int length = (int)cArray->length; + int offsetsLength = (length + 1) * 8; + + ArrowBuffer[] buffers = new ArrowBuffer[2]; + buffers[0] = ImportValidityBuffer(cArray); + buffers[1] = ImportCArrayBuffer(cArray, 1, offsetsLength); + + return buffers; + } + private ArrowBuffer[] ImportFixedSizeListBuffers(CArrowArray* cArray) { if (cArray->n_buffers != 1) diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs index 3bb7134af3ba9..92d48a2d70880 100644 --- a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs @@ -168,8 +168,10 @@ private static string GetFormat(IArrowType datatype) // Binary case BinaryType _: return "z"; case BinaryViewType _: return "vz"; + case LargeBinaryType _: return "Z"; case StringType _: return "u"; case StringViewType _: return "vu"; + case LargeStringType _: return "U"; case FixedSizeBinaryType binaryType: return $"w:{binaryType.ByteWidth}"; // Date @@ -199,6 +201,7 @@ private static string GetFormat(IArrowType datatype) // Nested case ListType _: return "+l"; case ListViewType _: return "+vl"; + case LargeListType _: return "+L"; case FixedSizeListType fixedListType: return $"+w:{fixedListType.ListSize}"; case StructType _: return "+s"; @@ -208,7 +211,7 @@ private static string GetFormat(IArrowType datatype) case DictionaryType dictionaryType: return GetFormat(dictionaryType.IndexType); default: throw new NotImplementedException($"Exporting {datatype.Name} not implemented"); - }; + } } private static long GetFlags(IArrowType datatype, bool nullable = true) diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs index f1acc007bcef7..94177184dea00 100644 --- a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs @@ -165,7 +165,7 @@ public ArrowType GetAsType() } // Special handling for nested types - if (format == "+l" || format == "+vl") + if (format == "+l" || format == "+vl" || format == "+L") { if (_cSchema->n_children != 1) { @@ -180,7 +180,13 @@ public ArrowType GetAsType() Field childField = childSchema.GetAsField(); - return format[1] == 'v' ? new ListViewType(childField) : new ListType(childField); + return format[1] switch + { + 'l' => new ListType(childField), + 'v' => new ListViewType(childField), + 'L' => new LargeListType(childField), + _ => throw new InvalidDataException($"Invalid format for list: '{format}'"), + }; } else if (format == "+s") { @@ -304,10 +310,10 @@ public ArrowType GetAsType() // Binary data "z" => BinaryType.Default, "vz" => BinaryViewType.Default, - //"Z" => new LargeBinaryType() // Not yet implemented + "Z" => LargeBinaryType.Default, "u" => StringType.Default, "vu" => StringViewType.Default, - //"U" => new LargeStringType(), // Not yet implemented + "U" => LargeStringType.Default, // Date and time "tdD" => Date32Type.Default, "tdm" => Date64Type.Default, diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs b/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs index a37c501072f4b..7e766677f8b28 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs @@ -291,6 +291,8 @@ private ArrayData LoadField( break; case ArrowTypeId.String: case ArrowTypeId.Binary: + case ArrowTypeId.LargeString: + case ArrowTypeId.LargeBinary: case ArrowTypeId.ListView: buffers = 3; break; diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamReaderImplementation.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamReaderImplementation.cs index 5583a58487bf5..12a2a17cf04e2 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamReaderImplementation.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamReaderImplementation.cs @@ -132,7 +132,13 @@ protected ReadResult ReadMessage() Flatbuf.Message message = Flatbuf.Message.GetRootAsMessage(CreateByteBuffer(messageBuff)); - int bodyLength = checked((int)message.BodyLength); + if (message.BodyLength > int.MaxValue) + { + throw new OverflowException( + $"Arrow IPC message body length ({message.BodyLength}) is larger than " + + $"the maximum supported message size ({int.MaxValue})"); + } + int bodyLength = (int)message.BodyLength; IMemoryOwner bodyBuffOwner = _allocator.Allocate(bodyLength); Memory bodyBuff = bodyBuffOwner.Memory.Slice(0, bodyLength); diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs index c66569afeba85..eaa8471fa7bd3 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs @@ -57,11 +57,14 @@ private class ArrowRecordBatchFlatBufferBuilder : IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, + IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, + IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, + IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, @@ -199,6 +202,28 @@ public void Visit(ListViewArray array) VisitArray(values); } + public void Visit(LargeListArray array) + { + _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); + _buffers.Add(CreateBuffer(GetZeroBasedLongValueOffsets(array.ValueOffsetsBuffer, array.Offset, array.Length))); + + int valuesOffset = 0; + int valuesLength = 0; + if (array.Length > 0) + { + valuesOffset = checked((int)array.ValueOffsets[0]); + valuesLength = checked((int)array.ValueOffsets[array.Length] - valuesOffset); + } + + var values = array.Values; + if (valuesOffset > 0 || valuesLength < values.Length) + { + values = ArrowArrayFactory.Slice(values, valuesOffset, valuesLength); + } + + VisitArray(values); + } + public void Visit(FixedSizeListArray array) { _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); @@ -214,6 +239,8 @@ public void Visit(FixedSizeListArray array) public void Visit(StringViewArray array) => Visit(array as BinaryViewArray); + public void Visit(LargeStringArray array) => Visit(array as LargeBinaryArray); + public void Visit(BinaryArray array) { _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); @@ -242,6 +269,22 @@ public void Visit(BinaryViewArray array) VariadicCounts.Add(array.DataBufferCount); } + public void Visit(LargeBinaryArray array) + { + _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); + _buffers.Add(CreateBuffer(GetZeroBasedLongValueOffsets(array.ValueOffsetsBuffer, array.Offset, array.Length))); + + int valuesOffset = 0; + int valuesLength = 0; + if (array.Length > 0) + { + valuesOffset = checked((int)array.ValueOffsets[0]); + valuesLength = checked((int)array.ValueOffsets[array.Length]) - valuesOffset; + } + + _buffers.Add(CreateSlicedBuffer(array.ValueBuffer, valuesOffset, valuesLength)); + } + public void Visit(FixedSizeBinaryArray array) { var itemSize = ((FixedSizeBinaryType)array.Data.DataType).ByteWidth; @@ -327,6 +370,39 @@ private ArrowBuffer GetZeroBasedValueOffsets(ArrowBuffer valueOffsetsBuffer, int } } + private ArrowBuffer GetZeroBasedLongValueOffsets(ArrowBuffer valueOffsetsBuffer, int arrayOffset, int arrayLength) + { + var requiredBytes = CalculatePaddedBufferLength(checked(sizeof(long) * (arrayLength + 1))); + + if (arrayOffset != 0) + { + // Array has been sliced, so we need to shift and adjust the offsets + var originalOffsets = valueOffsetsBuffer.Span.CastTo().Slice(arrayOffset, arrayLength + 1); + var firstOffset = arrayLength > 0 ? originalOffsets[0] : 0L; + + var newValueOffsetsBuffer = _allocator.Allocate(requiredBytes); + var newValueOffsets = newValueOffsetsBuffer.Memory.Span.CastTo(); + + for (int i = 0; i < arrayLength + 1; ++i) + { + newValueOffsets[i] = originalOffsets[i] - firstOffset; + } + + return new ArrowBuffer(newValueOffsetsBuffer); + } + else if (valueOffsetsBuffer.Length > requiredBytes) + { + // Array may have been sliced but the offset is zero, + // so we can truncate the existing offsets + return new ArrowBuffer(valueOffsetsBuffer.Memory.Slice(0, requiredBytes)); + } + else + { + // Use the full buffer + return valueOffsetsBuffer; + } + } + private (ArrowBuffer Buffer, int minOffset, int maxEnd) GetZeroBasedListViewOffsets(ListViewArray array) { if (array.Length == 0) diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs index 473e18968f8cb..adc229a051227 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs @@ -57,6 +57,7 @@ class TypeVisitor : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, @@ -65,9 +66,11 @@ class TypeVisitor : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, @@ -120,6 +123,14 @@ public void Visit(BinaryViewType type) Flatbuf.Type.BinaryView, offset); } + public void Visit(LargeBinaryType type) + { + Flatbuf.LargeBinary.StartLargeBinary(Builder); + Offset offset = Flatbuf.LargeBinary.EndLargeBinary(Builder); + Result = FieldType.Build( + Flatbuf.Type.LargeBinary, offset); + } + public void Visit(ListType type) { Flatbuf.List.StartList(Builder); @@ -136,6 +147,14 @@ public void Visit(ListViewType type) Flatbuf.ListView.EndListView(Builder)); } + public void Visit(LargeListType type) + { + Flatbuf.LargeList.StartLargeList(Builder); + Result = FieldType.Build( + Flatbuf.Type.LargeList, + Flatbuf.LargeList.EndLargeList(Builder)); + } + public void Visit(FixedSizeListType type) { Result = FieldType.Build( @@ -166,6 +185,14 @@ public void Visit(StringViewType type) Flatbuf.Type.Utf8View, offset); } + public void Visit(LargeStringType type) + { + Flatbuf.LargeUtf8.StartLargeUtf8(Builder); + Offset offset = Flatbuf.LargeUtf8.EndLargeUtf8(Builder); + Result = FieldType.Build( + Flatbuf.Type.LargeUtf8, offset); + } + public void Visit(TimestampType type) { StringOffset timezoneStringOffset = default; @@ -363,7 +390,7 @@ private static Flatbuf.IntervalUnit ToFlatBuffer(Types.IntervalUnit unit) Types.IntervalUnit.DayTime => Flatbuf.IntervalUnit.DAY_TIME, Types.IntervalUnit.MonthDayNanosecond => Flatbuf.IntervalUnit.MONTH_DAY_NANO, _ => throw new ArgumentException($"unsupported interval unit <{unit}>", nameof(unit)) - }; ; + }; } } } diff --git a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs index 0e6f330aef091..8e15632c517e1 100644 --- a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs +++ b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs @@ -186,6 +186,8 @@ private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field, Field[] c return Types.StringType.Default; case Flatbuf.Type.Utf8View: return Types.StringViewType.Default; + case Flatbuf.Type.LargeUtf8: + return Types.LargeStringType.Default; case Flatbuf.Type.FixedSizeBinary: Flatbuf.FixedSizeBinary fixedSizeBinaryMetadata = field.Type().Value; return new Types.FixedSizeBinaryType(fixedSizeBinaryMetadata.ByteWidth); @@ -193,6 +195,8 @@ private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field, Field[] c return Types.BinaryType.Default; case Flatbuf.Type.BinaryView: return Types.BinaryViewType.Default; + case Flatbuf.Type.LargeBinary: + return Types.LargeBinaryType.Default; case Flatbuf.Type.List: if (childFields == null || childFields.Length != 1) { @@ -205,6 +209,12 @@ private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field, Field[] c throw new InvalidDataException($"List view type must have exactly one child."); } return new Types.ListViewType(childFields[0]); + case Flatbuf.Type.LargeList: + if (childFields == null || childFields.Length != 1) + { + throw new InvalidDataException($"Large list type must have exactly one child."); + } + return new Types.LargeListType(childFields[0]); case Flatbuf.Type.FixedSizeList: if (childFields == null || childFields.Length != 1) { diff --git a/csharp/src/Apache.Arrow/Types/IArrowType.cs b/csharp/src/Apache.Arrow/Types/IArrowType.cs index cf520391fe1e6..7a3159a1bbccd 100644 --- a/csharp/src/Apache.Arrow/Types/IArrowType.cs +++ b/csharp/src/Apache.Arrow/Types/IArrowType.cs @@ -53,6 +53,9 @@ public enum ArrowTypeId BinaryView, StringView, ListView, + LargeList, + LargeBinary, + LargeString, } public interface IArrowType diff --git a/csharp/src/Apache.Arrow/Types/LargeBinaryType.cs b/csharp/src/Apache.Arrow/Types/LargeBinaryType.cs new file mode 100644 index 0000000000000..e22c333824480 --- /dev/null +++ b/csharp/src/Apache.Arrow/Types/LargeBinaryType.cs @@ -0,0 +1,27 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +namespace Apache.Arrow.Types; + +public class LargeBinaryType: ArrowType +{ + public static readonly LargeBinaryType Default = new LargeBinaryType(); + + public override ArrowTypeId TypeId => ArrowTypeId.LargeBinary; + + public override string Name => "large_binary"; + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); +} diff --git a/csharp/src/Apache.Arrow/Types/LargeListType.cs b/csharp/src/Apache.Arrow/Types/LargeListType.cs new file mode 100644 index 0000000000000..2fe8166972931 --- /dev/null +++ b/csharp/src/Apache.Arrow/Types/LargeListType.cs @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +namespace Apache.Arrow.Types +{ + public sealed class LargeListType : NestedType + { + public override ArrowTypeId TypeId => ArrowTypeId.LargeList; + + public override string Name => "large_list"; + + public Field ValueField => Fields[0]; + + public IArrowType ValueDataType => Fields[0].DataType; + + public LargeListType(Field valueField) + : base(valueField) { } + + public LargeListType(IArrowType valueDataType) + : this(new Field("item", valueDataType, true)) { } + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/csharp/src/Apache.Arrow/Types/LargeStringType.cs b/csharp/src/Apache.Arrow/Types/LargeStringType.cs new file mode 100644 index 0000000000000..8698ca4747a0e --- /dev/null +++ b/csharp/src/Apache.Arrow/Types/LargeStringType.cs @@ -0,0 +1,27 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +namespace Apache.Arrow.Types; + +public sealed class LargeStringType : ArrowType +{ + public static readonly LargeStringType Default = new LargeStringType(); + + public override ArrowTypeId TypeId => ArrowTypeId.LargeString; + + public override string Name => "large_utf8"; + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); +} diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj index 65ca360c97814..047cdb94b963e 100644 --- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj +++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj @@ -8,8 +8,8 @@ - - + + diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj index 21ffe37cfa1af..dc95f9edf9f7f 100644 --- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj +++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj @@ -7,8 +7,8 @@ - - + + diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj index 3b00525a1ae18..e68a97670cc7e 100644 --- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj +++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj @@ -7,8 +7,8 @@ - - + + diff --git a/csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj b/csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj index 7f226fd08818f..21f06e3008774 100644 --- a/csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj +++ b/csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj @@ -9,7 +9,7 @@ - + diff --git a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs index 7232f74b8bec6..c9e44b8d2f491 100644 --- a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs +++ b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs @@ -177,8 +177,10 @@ private static IArrowType ToArrowType(JsonArrowType type, Field[] children) "decimal" => ToDecimalArrowType(type), "binary" => BinaryType.Default, "binaryview" => BinaryViewType.Default, + "largebinary" => LargeBinaryType.Default, "utf8" => StringType.Default, "utf8view" => StringViewType.Default, + "largeutf8" => LargeStringType.Default, "fixedsizebinary" => new FixedSizeBinaryType(type.ByteWidth), "date" => ToDateArrowType(type), "time" => ToTimeArrowType(type), @@ -188,6 +190,7 @@ private static IArrowType ToArrowType(JsonArrowType type, Field[] children) "timestamp" => ToTimestampArrowType(type), "list" => ToListArrowType(type, children), "listview" => ToListViewArrowType(type, children), + "largelist" => ToLargeListArrowType(type, children), "fixedsizelist" => ToFixedSizeListArrowType(type, children), "struct" => ToStructArrowType(type, children), "union" => ToUnionArrowType(type, children), @@ -303,6 +306,11 @@ private static IArrowType ToListViewArrowType(JsonArrowType type, Field[] childr return new ListViewType(children[0]); } + private static IArrowType ToLargeListArrowType(JsonArrowType type, Field[] children) + { + return new LargeListType(children[0]); + } + private static IArrowType ToFixedSizeListArrowType(JsonArrowType type, Field[] children) { return new FixedSizeListType(children[0], type.ListSize); @@ -461,11 +469,14 @@ private class ArrayCreator : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, @@ -696,6 +707,24 @@ public void Visit(StringViewType type) Array = new StringViewArray(arrayData); } + public void Visit(LargeStringType type) + { + ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); + ArrowBuffer offsetBuffer = GetLargeOffsetBuffer(); + + var json = JsonFieldData.Data.GetRawText(); + string[] values = JsonSerializer.Deserialize(json, s_options); + + ArrowBuffer.Builder valueBuilder = new ArrowBuffer.Builder(); + foreach (string value in values) + { + valueBuilder.Append(Encoding.UTF8.GetBytes(value)); + } + ArrowBuffer valueBuffer = valueBuilder.Build(default); + + Array = new LargeStringArray(JsonFieldData.Count, offsetBuffer, valueBuffer, validityBuffer, nullCount); + } + public void Visit(BinaryType type) { ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); @@ -747,6 +776,25 @@ public void Visit(BinaryViewType type) Array = new BinaryViewArray(arrayData); } + public void Visit(LargeBinaryType type) + { + ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); + ArrowBuffer offsetBuffer = GetLargeOffsetBuffer(); + + var json = JsonFieldData.Data.GetRawText(); + string[] values = JsonSerializer.Deserialize(json, s_options); + + ArrowBuffer.Builder valueBuilder = new ArrowBuffer.Builder(); + foreach (string value in values) + { + valueBuilder.Append(ConvertHexStringToByteArray(value)); + } + ArrowBuffer valueBuffer = valueBuilder.Build(default); + + ArrayData arrayData = new ArrayData(type, JsonFieldData.Count, nullCount, 0, new[] { validityBuffer, offsetBuffer, valueBuffer }); + Array = new LargeBinaryArray(arrayData); + } + public void Visit(FixedSizeBinaryType type) { ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); @@ -796,6 +844,21 @@ public void Visit(ListViewType type) Array = new ListViewArray(arrayData); } + public void Visit(LargeListType type) + { + ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); + ArrowBuffer offsetBuffer = GetLargeOffsetBuffer(); + + var data = JsonFieldData; + JsonFieldData = data.Children[0]; + type.ValueDataType.Accept(this); + JsonFieldData = data; + + ArrayData arrayData = new ArrayData(type, JsonFieldData.Count, nullCount, 0, + new[] { validityBuffer, offsetBuffer }, new[] { Array.Data }); + Array = new LargeListArray(arrayData); + } + public void Visit(FixedSizeListType type) { ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); @@ -975,6 +1038,13 @@ private ArrowBuffer GetOffsetBuffer() return valueOffsets.Build(default); } + private ArrowBuffer GetLargeOffsetBuffer() + { + ArrowBuffer.Builder valueOffsets = new ArrowBuffer.Builder(JsonFieldData.Offset.Count); + valueOffsets.AppendRange(JsonFieldData.LongOffset); + return valueOffsets.Build(default); + } + private ArrowBuffer GetSizeBuffer() { ArrowBuffer.Builder valueSizes = new ArrowBuffer.Builder(JsonFieldData.Size.Count); @@ -1039,6 +1109,12 @@ public IEnumerable IntOffset get { return Offset.Select(GetInt); } } + [JsonIgnore] + public IEnumerable LongOffset + { + get { return Offset.Select(GetLong); } + } + [JsonIgnore] public IEnumerable IntSize { @@ -1056,6 +1132,18 @@ static int GetInt(JsonNode node) return int.Parse(node.GetValue()); } } + + static long GetLong(JsonNode node) + { + try + { + return node.GetValue(); + } + catch + { + return long.Parse(node.GetValue()); + } + } } public class JsonView diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj index 71d7970f9ad7d..f05338313063c 100644 --- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj +++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj @@ -17,8 +17,8 @@ - - + + all runtime; build; native; contentfiles; analyzers diff --git a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs index 5c33d1fd43986..85f7b75f931ef 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs @@ -95,12 +95,15 @@ private class ArrayComparer : IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, + IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, + IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, + IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, @@ -144,14 +147,17 @@ public ArrayComparer(IArrowArray expectedArray, bool strictCompare) public void Visit(MonthDayNanosecondIntervalArray array) => CompareArrays(array); public void Visit(ListArray array) => CompareArrays(array); public void Visit(ListViewArray array) => CompareArrays(array); + public void Visit(LargeListArray array) => CompareArrays(array); public void Visit(FixedSizeListArray array) => CompareArrays(array); public void Visit(FixedSizeBinaryArray array) => CompareArrays(array); public void Visit(Decimal128Array array) => CompareArrays(array); public void Visit(Decimal256Array array) => CompareArrays(array); public void Visit(StringArray array) => CompareBinaryArrays(array); public void Visit(StringViewArray array) => CompareVariadicArrays(array); + public void Visit(LargeStringArray array) => CompareLargeBinaryArrays(array); public void Visit(BinaryArray array) => CompareBinaryArrays(array); public void Visit(BinaryViewArray array) => CompareVariadicArrays(array); + public void Visit(LargeBinaryArray array) => CompareLargeBinaryArrays(array); public void Visit(StructArray array) { @@ -276,6 +282,40 @@ private void CompareBinaryArrays(BinaryArray actualArray) } } + private void CompareLargeBinaryArrays(LargeBinaryArray actualArray) + where T : IArrowArray + { + Assert.IsAssignableFrom(_expectedArray); + Assert.IsAssignableFrom(actualArray); + + var expectedArray = (LargeBinaryArray)_expectedArray; + + actualArray.Data.DataType.Accept(_arrayTypeComparer); + + Assert.Equal(expectedArray.Length, actualArray.Length); + Assert.Equal(expectedArray.NullCount, actualArray.NullCount); + + CompareValidityBuffer( + expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, + expectedArray.Offset, actualArray.NullBitmapBuffer, actualArray.Offset); + + if (_strictCompare) + { + Assert.Equal(expectedArray.Offset, actualArray.Offset); + Assert.True(expectedArray.ValueOffsetsBuffer.Span.SequenceEqual(actualArray.ValueOffsetsBuffer.Span)); + Assert.True(expectedArray.ValueBuffer.Span.Slice(0, expectedArray.Length).SequenceEqual(actualArray.ValueBuffer.Span.Slice(0, actualArray.Length))); + } + else + { + for (int i = 0; i < expectedArray.Length; i++) + { + Assert.True( + expectedArray.GetBytes(i).SequenceEqual(actualArray.GetBytes(i)), + $"LargeBinaryArray values do not match at index {i}."); + } + } + } + private void CompareVariadicArrays(BinaryViewArray actualArray) where T : IArrowArray { @@ -469,6 +509,44 @@ private void CompareArrays(ListViewArray actualArray) } } + private void CompareArrays(LargeListArray actualArray) + { + Assert.IsAssignableFrom(_expectedArray); + LargeListArray expectedArray = (LargeListArray)_expectedArray; + + actualArray.Data.DataType.Accept(_arrayTypeComparer); + + Assert.Equal(expectedArray.Length, actualArray.Length); + Assert.Equal(expectedArray.NullCount, actualArray.NullCount); + + CompareValidityBuffer( + expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, + expectedArray.Offset, actualArray.NullBitmapBuffer, actualArray.Offset); + + if (_strictCompare) + { + Assert.Equal(expectedArray.Offset, actualArray.Offset); + Assert.True(expectedArray.ValueOffsetsBuffer.Span.SequenceEqual(actualArray.ValueOffsetsBuffer.Span)); + actualArray.Values.Accept(new ArrayComparer(expectedArray.Values, _strictCompare)); + } + else + { + for (int i = 0; i < actualArray.Length; ++i) + { + if (expectedArray.IsNull(i)) + { + Assert.True(actualArray.IsNull(i)); + } + else + { + var expectedList = expectedArray.GetSlicedValues(i); + var actualList = actualArray.GetSlicedValues(i); + actualList.Accept(new ArrayComparer(expectedList, _strictCompare)); + } + } + } + } + private void CompareArrays(FixedSizeListArray actualArray) { Assert.IsAssignableFrom(_expectedArray); diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs index 274434e4bab09..fee18d165cdbd 100644 --- a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs +++ b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs @@ -31,24 +31,19 @@ namespace Apache.Arrow.Tests { public class CDataSchemaPythonTest : IClassFixture { - class PythonNet : IDisposable + public class PythonNet : IDisposable { + public bool Initialized { get; } + public PythonNet() { - bool inCIJob = Environment.GetEnvironmentVariable("GITHUB_ACTIONS") == "true"; - bool inVerificationJob = Environment.GetEnvironmentVariable("TEST_CSHARP") == "1"; bool pythonSet = Environment.GetEnvironmentVariable("PYTHONNET_PYDLL") != null; - // We only skip if this is not in CI - if (inCIJob && !inVerificationJob && !pythonSet) - { - throw new Exception("PYTHONNET_PYDLL not set; skipping C Data Interface tests."); - } - else + if (!pythonSet) { - Skip.If(!pythonSet, "PYTHONNET_PYDLL not set; skipping C Data Interface tests."); + Initialized = false; + return; } - PythonEngine.Initialize(); if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) && @@ -57,6 +52,8 @@ public PythonNet() dynamic sys = Py.Import("sys"); sys.path.append(Path.Combine(Path.GetDirectoryName(Environment.GetEnvironmentVariable("PYTHONNET_PYDLL")), "DLLs")); } + + Initialized = true; } public void Dispose() @@ -65,6 +62,21 @@ public void Dispose() } } + public CDataSchemaPythonTest(PythonNet pythonNet) + { + if (!pythonNet.Initialized) + { + bool inCIJob = Environment.GetEnvironmentVariable("GITHUB_ACTIONS") == "true"; + bool inVerificationJob = Environment.GetEnvironmentVariable("TEST_CSHARP") == "1"; + + // Skip these tests if this is not in CI or is a verification job and PythonNet couldn't be initialized + Skip.If(inVerificationJob || !inCIJob, "PYTHONNET_PYDLL not set; skipping C Data Interface tests."); + + // Otherwise throw + throw new Exception("PYTHONNET_PYDLL not set; cannot run C Data Interface tests."); + } + } + private static Schema GetTestSchema() { using (Py.GIL()) diff --git a/csharp/test/Apache.Arrow.Tests/LargeBinaryArrayTests.cs b/csharp/test/Apache.Arrow.Tests/LargeBinaryArrayTests.cs new file mode 100644 index 0000000000000..4ee1f1d0e0ffa --- /dev/null +++ b/csharp/test/Apache.Arrow.Tests/LargeBinaryArrayTests.cs @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using Apache.Arrow.Types; +using Xunit; + +namespace Apache.Arrow.Tests; + +public class LargeBinaryArrayTests +{ + [Fact] + public void GetBytesReturnsCorrectValue() + { + var byteArrays = new byte[][] + { + new byte[] {0, 1, 2, 255}, + new byte[] {3, 4, 5}, + new byte[] {}, + null, + new byte[] {254, 253, 252}, + }; + var array = BuildArray(byteArrays); + + Assert.Equal(array.Length, byteArrays.Length); + for (var i = 0; i < byteArrays.Length; ++i) + { + var byteSpan = array.GetBytes(i, out var isNull); + var byteArray = isNull ? null : byteSpan.ToArray(); + Assert.Equal(byteArrays[i], byteArray); + } + } + + [Fact] + public void GetBytesChecksForOffsetOverflow() + { + var valueBuffer = new ArrowBuffer.Builder(); + var offsetBuffer = new ArrowBuffer.Builder(); + var validityBuffer = new ArrowBuffer.BitmapBuilder(); + + offsetBuffer.Append(0); + offsetBuffer.Append((long)int.MaxValue + 1); + validityBuffer.Append(true); + + var array = new LargeBinaryArray( + LargeBinaryType.Default, length: 1, + offsetBuffer.Build(), valueBuffer.Build(), validityBuffer.Build(), + validityBuffer.UnsetBitCount); + + Assert.Throws(() => array.GetBytes(0)); + } + + private static LargeBinaryArray BuildArray(IReadOnlyCollection byteArrays) + { + var valueBuffer = new ArrowBuffer.Builder(); + var offsetBuffer = new ArrowBuffer.Builder(); + var validityBuffer = new ArrowBuffer.BitmapBuilder(); + + long offset = 0; + offsetBuffer.Append(offset); + foreach (var bytes in byteArrays) + { + if (bytes == null) + { + validityBuffer.Append(false); + offsetBuffer.Append(offset); + } + else + { + valueBuffer.Append(bytes); + offset += bytes.Length; + offsetBuffer.Append(offset); + validityBuffer.Append(true); + } + } + + return new LargeBinaryArray( + LargeBinaryType.Default, byteArrays.Count, + offsetBuffer.Build(), valueBuffer.Build(), validityBuffer.Build(), + validityBuffer.UnsetBitCount); + } +} diff --git a/csharp/test/Apache.Arrow.Tests/LargeListArrayTests.cs b/csharp/test/Apache.Arrow.Tests/LargeListArrayTests.cs new file mode 100644 index 0000000000000..1d35a8ffd62c5 --- /dev/null +++ b/csharp/test/Apache.Arrow.Tests/LargeListArrayTests.cs @@ -0,0 +1,105 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Linq; +using Apache.Arrow.Types; +using Xunit; + +namespace Apache.Arrow.Tests; + +public class LargeListArrayTests +{ + [Fact] + public void GetSlicedValuesReturnsCorrectValues() + { + var values = new int?[][] + { + new int?[] {0, 1, 2}, + System.Array.Empty(), + null, + new int?[] {3, 4, null, 6}, + }; + + var array = BuildArray(values); + + Assert.Equal(values.Length, array.Length); + for (int i = 0; i < values.Length; ++i) + { + Assert.Equal(values[i] == null, array.IsNull(i)); + var arrayItem = (Int32Array) array.GetSlicedValues(i); + if (values[i] == null) + { + Assert.Null(arrayItem); + } + else + { + Assert.Equal(values[i], arrayItem.ToArray()); + } + } + } + + [Fact] + public void GetSlicedValuesChecksForOffsetOverflow() + { + var valuesArray = new Int32Array.Builder().Build(); + var offsetBuffer = new ArrowBuffer.Builder(); + var validityBuffer = new ArrowBuffer.BitmapBuilder(); + + offsetBuffer.Append(0); + offsetBuffer.Append((long)int.MaxValue + 1); + validityBuffer.Append(true); + + var array = new LargeListArray( + new LargeListType(new Int32Type()), length: 1, + offsetBuffer.Build(), valuesArray, validityBuffer.Build(), + validityBuffer.UnsetBitCount); + + Assert.Throws(() => array.GetSlicedValues(0)); + } + + private static LargeListArray BuildArray(int?[][] values) + { + var valuesBuilder = new Int32Array.Builder(); + var offsetBuffer = new ArrowBuffer.Builder(); + var validityBuffer = new ArrowBuffer.BitmapBuilder(); + + long offset = 0; + offsetBuffer.Append(offset); + foreach (var listValue in values) + { + if (listValue == null) + { + validityBuffer.Append(false); + offsetBuffer.Append(offset); + } + else + { + foreach (var value in listValue) + { + valuesBuilder.Append(value); + } + offset += listValue.Length; + offsetBuffer.Append(offset); + validityBuffer.Append(true); + } + } + + return new LargeListArray( + new LargeListType(new Int32Type()), values.Length, + offsetBuffer.Build(), valuesBuilder.Build(), validityBuffer.Build(), + validityBuffer.UnsetBitCount); + } +} diff --git a/csharp/test/Apache.Arrow.Tests/LargeStringArrayTests.cs b/csharp/test/Apache.Arrow.Tests/LargeStringArrayTests.cs new file mode 100644 index 0000000000000..aba97ba338c75 --- /dev/null +++ b/csharp/test/Apache.Arrow.Tests/LargeStringArrayTests.cs @@ -0,0 +1,91 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using Xunit; + +namespace Apache.Arrow.Tests; + +public class LargeStringArrayTests +{ + [Fact] + public void GetStringReturnsCorrectValue() + { + var strings = new string[] + { + "abc", + "defg", + "", + null, + "123", + }; + var array = BuildArray(strings); + + Assert.Equal(array.Length, strings.Length); + for (var i = 0; i < strings.Length; ++i) + { + Assert.Equal(strings[i], array.GetString(i)); + } + } + + [Fact] + public void GetStringChecksForOffsetOverflow() + { + var valueBuffer = new ArrowBuffer.Builder(); + var offsetBuffer = new ArrowBuffer.Builder(); + var validityBuffer = new ArrowBuffer.BitmapBuilder(); + + offsetBuffer.Append(0); + offsetBuffer.Append((long)int.MaxValue + 1); + validityBuffer.Append(true); + + var array = new LargeStringArray( + length: 1, offsetBuffer.Build(), valueBuffer.Build(), validityBuffer.Build(), + validityBuffer.UnsetBitCount); + + Assert.Throws(() => array.GetString(0)); + } + + private static LargeStringArray BuildArray(IReadOnlyCollection strings) + { + var valueBuffer = new ArrowBuffer.Builder(); + var offsetBuffer = new ArrowBuffer.Builder(); + var validityBuffer = new ArrowBuffer.BitmapBuilder(); + + long offset = 0; + offsetBuffer.Append(offset); + foreach (var value in strings) + { + if (value == null) + { + validityBuffer.Append(false); + offsetBuffer.Append(offset); + } + else + { + var bytes = LargeStringArray.DefaultEncoding.GetBytes(value); + valueBuffer.Append(bytes); + offset += value.Length; + offsetBuffer.Append(offset); + validityBuffer.Append(true); + } + } + + return new LargeStringArray( + strings.Count, offsetBuffer.Build(), valueBuffer.Build(), validityBuffer.Build(), + validityBuffer.UnsetBitCount); + } +} diff --git a/csharp/test/Apache.Arrow.Tests/TableTests.cs b/csharp/test/Apache.Arrow.Tests/TableTests.cs index 83c88265d172b..35fbe7cba68f1 100644 --- a/csharp/test/Apache.Arrow.Tests/TableTests.cs +++ b/csharp/test/Apache.Arrow.Tests/TableTests.cs @@ -63,9 +63,9 @@ public void TestTableFromRecordBatches() Table table1 = Table.TableFromRecordBatches(recordBatch1.Schema, recordBatches); Assert.Equal(20, table1.RowCount); #if NET5_0_OR_GREATER - Assert.Equal(35, table1.ColumnCount); + Assert.Equal(38, table1.ColumnCount); #else - Assert.Equal(34, table1.ColumnCount); + Assert.Equal(37, table1.ColumnCount); #endif Assert.Equal("ChunkedArray: Length=20, DataType=list", table1.Column(0).Data.ToString()); diff --git a/csharp/test/Apache.Arrow.Tests/TestData.cs b/csharp/test/Apache.Arrow.Tests/TestData.cs index 3ea42ee0fbcb7..36969766aeae0 100644 --- a/csharp/test/Apache.Arrow.Tests/TestData.cs +++ b/csharp/test/Apache.Arrow.Tests/TestData.cs @@ -49,6 +49,7 @@ void AddField(Field field) { AddField(CreateField(new ListType(Int64Type.Default), i)); AddField(CreateField(new ListViewType(Int64Type.Default), i)); + AddField(CreateField(new LargeListType(Int64Type.Default), i)); AddField(CreateField(BooleanType.Default, i)); AddField(CreateField(UInt8Type.Default, i)); AddField(CreateField(Int8Type.Default, i)); @@ -84,6 +85,8 @@ void AddField(Field field) AddField(CreateField(new UnionType(new[] { CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }, new[] { 0, 1 }, UnionMode.Sparse), i)); AddField(CreateField(new UnionType(new[] { CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }, new[] { 0, 1 }, UnionMode.Dense), -i)); AddField(CreateField(new DictionaryType(Int32Type.Default, StringType.Default, false), i)); + AddField(CreateField(new LargeBinaryType(), i)); + AddField(CreateField(new LargeStringType(), i)); } Schema schema = builder.Build(); @@ -144,8 +147,10 @@ private class ArrayCreator : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, @@ -154,6 +159,7 @@ private class ArrayCreator : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, @@ -335,6 +341,45 @@ public void Visit(StringViewType type) Array = builder.Build(); } + public void Visit(LargeStringType type) + { + var str = "hello"; + var valueBuffer = new ArrowBuffer.Builder(); + var offsetBuffer = new ArrowBuffer.Builder(); + var validityBuffer = new ArrowBuffer.BitmapBuilder(); + + long offset = 0; + offsetBuffer.Append(offset); + + for (var i = 0; i < Length; i++) + { + switch (i % 3) + { + case 0: + offsetBuffer.Append(offset); + validityBuffer.Append(false); + break; + case 1: + valueBuffer.Append(LargeStringArray.DefaultEncoding.GetBytes(str)); + offset += str.Length; + offsetBuffer.Append(offset); + validityBuffer.Append(true); + break; + case 2: + valueBuffer.Append(LargeStringArray.DefaultEncoding.GetBytes(str + str)); + offset += str.Length * 2; + offsetBuffer.Append(offset); + validityBuffer.Append(true); + break; + } + } + + var validity = validityBuffer.UnsetBitCount > 0 ? validityBuffer.Build() : ArrowBuffer.Empty; + Array = new LargeStringArray( + Length, offsetBuffer.Build(), valueBuffer.Build(), validity, + validityBuffer.UnsetBitCount); + } + public void Visit(ListType type) { var builder = new ListArray.Builder(type.ValueField).Reserve(Length); @@ -379,6 +424,37 @@ public void Visit(ListViewType type) Array = builder.Build(); } + public void Visit(LargeListType type) + { + var valueBuilder = new Int64Array.Builder().Reserve(Length * 3 / 2); + var offsetBuffer = new ArrowBuffer.Builder(); + var validityBuffer = new ArrowBuffer.BitmapBuilder(); + + offsetBuffer.Append(0); + + for (var i = 0; i < Length; i++) + { + if (i % 10 == 2) + { + offsetBuffer.Append(valueBuilder.Length); + validityBuffer.Append(false); + } + else + { + var listLength = i % 4; + valueBuilder.AppendRange(Enumerable.Range(i, listLength).Select(x => (long)x)); + offsetBuffer.Append(valueBuilder.Length); + validityBuffer.Append(true); + } + } + + var validity = validityBuffer.UnsetBitCount > 0 ? validityBuffer.Build() : ArrowBuffer.Empty; + Array = new LargeListArray( + new LargeListType(new Int64Type()), Length, + offsetBuffer.Build(), valueBuilder.Build(), validity, + validityBuffer.UnsetBitCount); + } + public void Visit(FixedSizeListType type) { var builder = new FixedSizeListArray.Builder(type.ValueField, type.ListSize).Reserve(Length); @@ -554,6 +630,48 @@ public void Visit(BinaryViewType type) Array = builder.Build(); } + public void Visit(LargeBinaryType type) + { + ReadOnlySpan shortData = new[] { (byte)0, (byte)1, (byte)2, (byte)3, (byte)4, (byte)5, (byte)6, (byte)7, (byte)8, (byte)9 }; + ReadOnlySpan longData = new[] + { + (byte)0, (byte)1, (byte)2, (byte)3, (byte)4, (byte)5, (byte)6, (byte)7, (byte)8, (byte)9, + (byte)10, (byte)11, (byte)12, (byte)13, (byte)14, (byte)15, (byte)16, (byte)17, (byte)18, (byte)19 + }; + var valueBuffer = new ArrowBuffer.Builder(); + var offsetBuffer = new ArrowBuffer.Builder(); + var validityBuffer = new ArrowBuffer.BitmapBuilder(); + + offsetBuffer.Append(0L); + + for (var i = 0; i < Length; i++) + { + switch (i % 3) + { + case 0: + offsetBuffer.Append(valueBuffer.Length); + validityBuffer.Append(false); + break; + case 1: + valueBuffer.Append(shortData); + offsetBuffer.Append(valueBuffer.Length); + validityBuffer.Append(true); + break; + case 2: + valueBuffer.Append(longData); + offsetBuffer.Append(valueBuffer.Length); + validityBuffer.Append(true); + break; + } + } + + var validity = validityBuffer.UnsetBitCount > 0 ? validityBuffer.Build() : ArrowBuffer.Empty; + Array = new LargeBinaryArray( + LargeBinaryType.Default, Length, + offsetBuffer.Build(), valueBuffer.Build(), validity, + validityBuffer.UnsetBitCount); + } + public void Visit(FixedSizeBinaryType type) { ArrowBuffer.Builder valueBuilder = new ArrowBuffer.Builder(); diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py index a91989fb95257..9ebb9226e3743 100644 --- a/dev/archery/archery/benchmark/runner.py +++ b/dev/archery/archery/benchmark/runner.py @@ -123,6 +123,8 @@ def default_configuration(**kwargs): with_csv=True, with_dataset=True, with_json=True, + with_jemalloc=True, + with_mimalloc=True, with_parquet=True, with_python=False, with_brotli=True, diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index 0b5d242bbaccf..4e6b42e485c0c 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -746,12 +746,19 @@ def get_version(root, **kwargs): subprojects, e.g. apache-arrow-js-XXX tags. """ from setuptools_scm.git import parse as parse_git_version + from setuptools_scm import Configuration # query the calculated version based on the git tags kwargs['describe_command'] = ( 'git describe --dirty --tags --long --match "apache-arrow-[0-9]*.*"' ) - version = parse_git_version(root, **kwargs) + + # Create a Configuration object with necessary parameters + config = Configuration( + git_describe_command=kwargs['describe_command'] + ) + + version = parse_git_version(root, config=config, **kwargs) tag = str(version.tag) # We may get a development tag for the next version, such as "5.0.0.dev0", diff --git a/dev/archery/archery/docker/core.py b/dev/archery/archery/docker/core.py index cb831060022a4..5be4887ea4f63 100644 --- a/dev/archery/archery/docker/core.py +++ b/dev/archery/archery/docker/core.py @@ -340,18 +340,9 @@ def run(self, service_name, command=None, *, env=None, volumes=None, service = self.config.get(service_name) args = [] - if user is not None: - args.extend(['-u', user]) - - if env is not None: - for k, v in env.items(): - args.extend(['-e', '{}={}'.format(k, v)]) - if volumes is not None: - for volume in volumes: - args.extend(['--volume', volume]) - - if self.config.using_docker or service['need_gpu'] or resource_limit: + use_docker = self.config.using_docker or service['need_gpu'] or resource_limit + if use_docker: # use gpus, requires docker>=19.03 if service['need_gpu']: args.extend(['--gpus', 'all']) @@ -392,6 +383,18 @@ def run(self, service_name, command=None, *, env=None, volumes=None, args.append(f'--memory={memory}') args.append(f'--memory-swap={memory}') + if user is not None: + args.extend(['-u', user]) + + if env is not None: + for k, v in env.items(): + args.extend(['-e', '{}={}'.format(k, v)]) + + if volumes is not None: + for volume in volumes: + args.extend(['--volume', volume]) + + if use_docker: # get the actual docker image name instead of the compose service # name which we refer as image in general args.append(service['image']) diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py index b51f3d876f820..47310c905a9ff 100644 --- a/dev/archery/archery/integration/datagen.py +++ b/dev/archery/archery/integration/datagen.py @@ -1872,8 +1872,7 @@ def _temp_path(): generate_primitive_case([17, 20], name='primitive'), generate_primitive_case([0, 0, 0], name='primitive_zerolength'), - generate_primitive_large_offsets_case([17, 20]) - .skip_tester('C#'), + generate_primitive_large_offsets_case([17, 20]), generate_null_case([10, 0]), @@ -1906,7 +1905,6 @@ def _temp_path(): generate_recursive_nested_case(), generate_nested_large_offsets_case() - .skip_tester('C#') .skip_tester('JS'), generate_unions_case(), diff --git a/dev/archery/setup.py b/dev/archery/setup.py index cd3e2e9ca0834..f87316dcc7ab9 100755 --- a/dev/archery/setup.py +++ b/dev/archery/setup.py @@ -34,7 +34,7 @@ extras = { 'benchmark': ['pandas'], 'crossbow': ['github3.py', jinja_req, 'pygit2>=1.14.0', 'requests', - 'ruamel.yaml', 'setuptools_scm<8.0.0'], + 'ruamel.yaml', 'setuptools_scm>=8.0.0'], 'crossbow-upload': ['github3.py', jinja_req, 'ruamel.yaml', 'setuptools_scm'], 'docker': ['ruamel.yaml', 'python-dotenv'], diff --git a/dev/conbench_envs/README.md b/dev/conbench_envs/README.md index 509dc5c0c9537..7fab503974805 100644 --- a/dev/conbench_envs/README.md +++ b/dev/conbench_envs/README.md @@ -99,16 +99,16 @@ Here are steps how `@ursabot` benchmark builds use `benchmarks.env` and `hooks.s ### 2. Install Arrow dependencies for Java sudo su - apt-get install openjdk-8-jdk + apt-get install openjdk-11-jdk apt-get install maven Verify that you have at least these versions of `java`, `javac` and `maven`: # java -version - openjdk version "1.8.0_292" + openjdk version "11.0.22" 2024-01-16 .. # javac -version - javac 1.8.0_292 + javac 11.0.22 ... # mvn -version Apache Maven 3.6.3 diff --git a/dev/merge_arrow_pr.py b/dev/merge_arrow_pr.py index 344d943fd87e1..6694d2373b8f1 100755 --- a/dev/merge_arrow_pr.py +++ b/dev/merge_arrow_pr.py @@ -44,9 +44,6 @@ import requests import getpass -from six.moves import input -import six - try: import jira.client import jira.exceptions @@ -99,7 +96,7 @@ def get_json(url, headers=None): def run_cmd(cmd): - if isinstance(cmd, six.string_types): + if isinstance(cmd, str): cmd = cmd.split(' ') try: @@ -113,7 +110,7 @@ def run_cmd(cmd): print('--------------') raise e - if isinstance(output, six.binary_type): + if isinstance(output, bytes): output = output.decode('utf-8') return output diff --git a/dev/release/01-prepare-test.rb b/dev/release/01-prepare-test.rb index fbd0b2996077c..fec99ef058c5b 100644 --- a/dev/release/01-prepare-test.rb +++ b/dev/release/01-prepare-test.rb @@ -323,6 +323,17 @@ def test_version_pre_tag "+#{new_line}", ] end + tag = "main" + target_lines = lines.grep(/#{Regexp.escape(tag)}/) + target_lines.each do |line| + new_line = line.gsub("main") do + "apache-arrow-#{@release_version}" + end + hunks << [ + "-#{line}", + "+#{new_line}", + ] + end expected_changes << {hunks: hunks, path: path} end diff --git a/dev/release/02-source-test.rb b/dev/release/02-source-test.rb index eab95c798f284..3dec19326f92b 100644 --- a/dev/release/02-source-test.rb +++ b/dev/release/02-source-test.rb @@ -22,7 +22,7 @@ class SourceTest < Test::Unit::TestCase def setup @current_commit = git_current_commit detect_versions - @tag_name = "apache-arrow-#{@release_version}-rc0" + @tag_name_no_rc = "apache-arrow-#{@release_version}" @archive_name = "apache-arrow-#{@release_version}.tar.gz" @script = File.expand_path("dev/release/02-source.sh") @tarball_script = File.expand_path("dev/release/utils-create-release-tarball.sh") @@ -50,7 +50,7 @@ def source(*targets) def test_symbolic_links source - Dir.chdir(@tag_name) do + Dir.chdir(@tag_name_no_rc) do assert_equal([], Find.find(".").find_all {|path| File.symlink?(path)}) end @@ -58,7 +58,7 @@ def test_symbolic_links def test_csharp_git_commit_information source - Dir.chdir("#{@tag_name}/csharp") do + Dir.chdir("#{@tag_name_no_rc}/csharp") do FileUtils.mv("dummy.git", "../.git") sh("dotnet", "pack", "-c", "Release") FileUtils.mv("../.git", "dummy.git") @@ -83,7 +83,7 @@ def test_csharp_git_commit_information def test_python_version source - Dir.chdir("#{@tag_name}/python") do + Dir.chdir("#{@tag_name_no_rc}/python") do sh("python3", "setup.py", "sdist") if on_release_branch? pyarrow_source_archive = "dist/pyarrow-#{@release_version}.tar.gz" diff --git a/dev/release/binary-task.rb b/dev/release/binary-task.rb index c2386a1f52f21..b06b1c46b8504 100644 --- a/dev/release/binary-task.rb +++ b/dev/release/binary-task.rb @@ -16,6 +16,7 @@ # under the License. require "cgi/util" +require "digest/sha1" require "digest/sha2" require "io/console" require "json" @@ -1894,7 +1895,7 @@ def define_docs_tasks :docs, "#{rc_dir}/docs/#{full_version}", "#{release_dir}/docs/#{full_version}", - "test-ubuntu-22.04-docs/**/*") + "test-debian-12-docs/**/*") end def define_nuget_tasks diff --git a/dev/release/post-01-tag.sh b/dev/release/post-01-tag.sh index df0f6756c0e7c..2fa48f1230219 100755 --- a/dev/release/post-01-tag.sh +++ b/dev/release/post-01-tag.sh @@ -17,7 +17,7 @@ # specific language governing permissions and limitations # under the License. -set -e +set -eu set -o pipefail if [ "$#" -ne 2 ]; then @@ -25,8 +25,11 @@ if [ "$#" -ne 2 ]; then exit fi +version=$1 +rc=$2 + # Create the release tag and trigger the Publish Release workflow. -release_candidate_tag=apache-arrow-${version}-rc${num} release_tag=apache-arrow-${version} -git tag -a ${release_tag} ${release_candidate_tag}^{} -m "[Release] Apache Arrow Release ${version}" +release_candidate_tag=${release_tag}-rc${rc} +git tag -a ${release_tag} ${release_candidate_tag} -m "[Release] Apache Arrow Release ${version}" git push apache ${release_tag} diff --git a/dev/release/post-12-bump-versions-test.rb b/dev/release/post-12-bump-versions-test.rb index 2bd14587461cc..f31e1a3122814 100644 --- a/dev/release/post-12-bump-versions-test.rb +++ b/dev/release/post-12-bump-versions-test.rb @@ -358,8 +358,15 @@ def test_version_post_tag def test_deb_package_names omit_on_release_branch unless bump_type.nil? current_commit = git_current_commit - stdout = bump_versions("DEB_PACKAGE_NAMES") - changes = parse_patch(git("log", "-p", "#{current_commit}..")) + stdout = bump_versions("VERSION_POST_TAG", "DEB_PACKAGE_NAMES") + log = git("log", "-p", "#{current_commit}..") + # Remove a commit for VERSION_POST_TAG + if log.scan(/^commit/).size == 1 + log = "" + else + log.gsub!(/\A(commit.*?)^commit .*\z/um, "\\1") + end + changes = parse_patch(log) sampled_changes = changes.collect do |change| first_hunk = change[:hunks][0] first_removed_line = first_hunk.find { |line| line.start_with?("-") } diff --git a/dev/release/post-12-bump-versions.sh b/dev/release/post-12-bump-versions.sh index 422821a66bde5..bf40f4ce5c4ea 100755 --- a/dev/release/post-12-bump-versions.sh +++ b/dev/release/post-12-bump-versions.sh @@ -40,6 +40,7 @@ fi version=$1 next_version=$2 next_version_snapshot="${next_version}-SNAPSHOT" +current_version_before_bump="$(current_version)" case "${version}" in *.0.0) @@ -64,7 +65,7 @@ if [ ${BUMP_VERSION_POST_TAG} -gt 0 ]; then fi if [ ${BUMP_DEB_PACKAGE_NAMES} -gt 0 ] && \ - [ "${next_version}" != "$(current_version)" ]; then + [ "${next_version}" != "${current_version_before_bump}" ]; then update_deb_package_names "${version}" "${next_version}" fi diff --git a/dev/release/setup-rhel-rebuilds.sh b/dev/release/setup-rhel-rebuilds.sh index dc190d2d2426e..e8861a19f35b7 100755 --- a/dev/release/setup-rhel-rebuilds.sh +++ b/dev/release/setup-rhel-rebuilds.sh @@ -35,7 +35,7 @@ dnf -y install \ cmake \ git \ gobject-introspection-devel \ - java-1.8.0-openjdk-devel \ + java-11-openjdk-devel \ libcurl-devel \ llvm-devel \ llvm-toolset \ @@ -55,3 +55,5 @@ npm install -g yarn python3 -m ensurepip --upgrade alternatives --set python /usr/bin/python3 +alternatives --set java java-11-openjdk.$(uname -i) +alternatives --set javac java-11-openjdk.$(uname -i) diff --git a/dev/release/utils-create-release-tarball.sh b/dev/release/utils-create-release-tarball.sh index 1a0ba83639b9a..0ca57ebe78c01 100755 --- a/dev/release/utils-create-release-tarball.sh +++ b/dev/release/utils-create-release-tarball.sh @@ -30,26 +30,27 @@ version=$1 rc=$2 tag=apache-arrow-${version}-rc${rc} +root_folder=apache-arrow-${version} tarball=apache-arrow-${version}.tar.gz : ${release_hash:=$(git rev-list --max-count=1 ${tag})} -rm -rf ${tag} +rm -rf ${root_folder} # be conservative and use the release hash, even though git produces the same # archive (identical hashes) using the scm tag (cd "${SOURCE_TOP_DIR}" && \ - git archive ${release_hash} --prefix ${tag}/) | \ + git archive ${release_hash} --prefix ${root_folder}/) | \ tar xf - # Resolve symbolic and hard links -rm -rf ${tag}.tmp -mv ${tag} ${tag}.tmp -cp -R -L ${tag}.tmp ${tag} -rm -rf ${tag}.tmp +rm -rf ${root_folder}.tmp +mv ${root_folder} ${root_folder}.tmp +cp -R -L ${root_folder}.tmp ${root_folder} +rm -rf ${root_folder}.tmp # Create a dummy .git/ directory to download the source files from GitHub with Source Link in C#. -dummy_git=${tag}/csharp/dummy.git +dummy_git=${root_folder}/csharp/dummy.git mkdir ${dummy_git} pushd ${dummy_git} echo ${release_hash} > HEAD @@ -58,5 +59,5 @@ mkdir objects refs popd # Create new tarball from modified source directory -tar czf ${tarball} ${tag} -rm -rf ${tag} +tar czf ${tarball} ${root_folder} +rm -rf ${root_folder} diff --git a/dev/release/utils-prepare.sh b/dev/release/utils-prepare.sh index c255e728a335b..6ba8b22a06e89 100644 --- a/dev/release/utils-prepare.sh +++ b/dev/release/utils-prepare.sh @@ -83,8 +83,12 @@ update_versions() { popd pushd "${ARROW_DIR}/java" - mvn versions:set -DnewVersion=${version} -DprocessAllModules - find . -type f -name pom.xml.versionsBackup -delete + mvn versions:set -DnewVersion=${version} -DprocessAllModules -DgenerateBackupPoms=false + if [ "${type}" = "release" ]; then + # versions-maven-plugin:set-scm-tag does not update the whole reactor. Invoking separately + mvn versions:set-scm-tag -DnewTag=apache-arrow-${version} -DgenerateBackupPoms=false -pl :arrow-java-root + mvn versions:set-scm-tag -DnewTag=apache-arrow-${version} -DgenerateBackupPoms=false -pl :arrow-bom + fi git add "pom.xml" git add "**/pom.xml" popd diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index fcaaa423a4c75..6a36109dc2fc1 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -21,7 +21,7 @@ # Requirements # - Ruby >= 2.3 # - Maven >= 3.8.7 -# - JDK >=8 +# - JDK >= 11 # - gcc >= 4.8 # - Node.js >= 18 # - Go >= 1.21 @@ -1153,7 +1153,7 @@ test_linux_wheels() { local pyver=${python/m} for platform in ${platform_tags}; do show_header "Testing Python ${pyver} wheel for platform ${platform}" - CONDA_ENV=wheel-${pyver}-${platform} PYTHON_VERSION=${pyver} maybe_setup_conda + CONDA_ENV=wheel-${pyver}-${platform} PYTHON_VERSION=${pyver} maybe_setup_conda tzdata if ! VENV_ENV=wheel-${pyver}-${platform} PYTHON_VERSION=${pyver} maybe_setup_virtualenv; then continue fi diff --git a/dev/release/verify-yum.sh b/dev/release/verify-yum.sh index aad82af21fc96..31ed9c160b48e 100755 --- a/dev/release/verify-yum.sh +++ b/dev/release/verify-yum.sh @@ -201,6 +201,11 @@ echo "::group::Test Apache Arrow C++" ${install_command} ${enablerepo_epel} arrow-devel-${package_version} if [ -n "${devtoolset}" ]; then ${install_command} ${scl_package} + sed -i \ + -e 's/^mirrorlist/#mirrorlist/' \ + -e 's/^#baseurl/baseurl/' \ + -e 's/mirror\.centos\.org/vault.centos.org/' \ + /etc/yum.repos.d/CentOS-SCLo-scl-rh.repo fi ${install_command} \ ${cmake_package} \ diff --git a/dev/requirements_merge_arrow_pr.txt b/dev/requirements_merge_arrow_pr.txt index 7ac17dc1b1933..99909e434a580 100644 --- a/dev/requirements_merge_arrow_pr.txt +++ b/dev/requirements_merge_arrow_pr.txt @@ -1,3 +1,2 @@ jira requests -six diff --git a/dev/tasks/docker-tests/github.cuda.yml b/dev/tasks/docker-tests/github.cuda.yml index b1b21c3e66319..30879042924c4 100644 --- a/dev/tasks/docker-tests/github.cuda.yml +++ b/dev/tasks/docker-tests/github.cuda.yml @@ -23,8 +23,9 @@ jobs: test: name: | Docker Test {{ flags|default("") }} {{ image }} {{ command|default("") }} - runs-on: ['self-hosted', 'cuda'] + runs-on: ['self-hosted', 'cuda'] {{ macros.github_set_env(env) }} + timeout-minutes: {{ timeout|default(60) }} steps: {{ macros.github_checkout_arrow(fetch_depth=fetch_depth|default(1))|indent }} # python 3.8 is installed on the runner, no need to install diff --git a/dev/tasks/docker-tests/github.linux.yml b/dev/tasks/docker-tests/github.linux.yml index 13e00abc70a84..697960360cfdc 100644 --- a/dev/tasks/docker-tests/github.linux.yml +++ b/dev/tasks/docker-tests/github.linux.yml @@ -25,6 +25,7 @@ jobs: Docker Test {{ flags|default("") }} {{ image }} {{ command|default("") }} runs-on: ubuntu-latest {{ macros.github_set_env(env) }} + timeout-minutes: {{ timeout|default(60) }} steps: {{ macros.github_checkout_arrow(fetch_depth=fetch_depth|default(1))|indent }} {{ macros.github_free_space()|indent }} diff --git a/dev/tasks/homebrew-formulae/apache-arrow-glib.rb b/dev/tasks/homebrew-formulae/apache-arrow-glib.rb index d8f5ca872dbec..6bcae64adb92f 100644 --- a/dev/tasks/homebrew-formulae/apache-arrow-glib.rb +++ b/dev/tasks/homebrew-formulae/apache-arrow-glib.rb @@ -29,7 +29,7 @@ class ApacheArrowGlib < Formula desc "GLib bindings for Apache Arrow" homepage "https://arrow.apache.org/" - url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-17.0.0-SNAPSHOT/apache-arrow-17.0.0-SNAPSHOT.tar.gz" + url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-18.0.0-SNAPSHOT/apache-arrow-18.0.0-SNAPSHOT.tar.gz" sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28" license "Apache-2.0" head "https://github.com/apache/arrow.git", branch: "main" diff --git a/dev/tasks/homebrew-formulae/apache-arrow.rb b/dev/tasks/homebrew-formulae/apache-arrow.rb index e17d524bd9d71..955dfa0ea9fa4 100644 --- a/dev/tasks/homebrew-formulae/apache-arrow.rb +++ b/dev/tasks/homebrew-formulae/apache-arrow.rb @@ -29,7 +29,7 @@ class ApacheArrow < Formula desc "Columnar in-memory analytics layer designed to accelerate big data" homepage "https://arrow.apache.org/" - url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-17.0.0-SNAPSHOT/apache-arrow-17.0.0-SNAPSHOT.tar.gz" + url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-18.0.0-SNAPSHOT/apache-arrow-18.0.0-SNAPSHOT.tar.gz" sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28" license "Apache-2.0" head "https://github.com/apache/arrow.git", branch: "main" diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog index 04aa586dc3c96..279c3cc14a4c8 100644 --- a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog +++ b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog @@ -1,3 +1,9 @@ +apache-arrow-apt-source (17.0.0-1) unstable; urgency=low + + * New upstream release. + + -- RaĂºl Cumplido Thu, 11 Jul 2024 08:57:21 -0000 + apache-arrow-apt-source (16.1.0-1) unstable; urgency=low * New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in index f0eb785dd6bc7..fd8165d748d78 100644 --- a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in +++ b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in @@ -102,6 +102,9 @@ else fi %changelog +* Thu Jul 11 2024 RaĂºl Cumplido - 17.0.0-1 +- New upstream release. + * Thu May 09 2024 RaĂºl Cumplido - 16.1.0-1 - New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-trixie/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/debian-trixie/Dockerfile index 3126c6d3cded0..8a6accbfc8b16 100644 --- a/dev/tasks/linux-packages/apache-arrow/apt/debian-trixie/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/apt/debian-trixie/Dockerfile @@ -70,7 +70,6 @@ RUN \ llvm-dev \ lsb-release \ meson \ - mold \ ninja-build \ nlohmann-json3-dev \ pkg-config \ diff --git a/dev/tasks/linux-packages/apache-arrow/debian/changelog b/dev/tasks/linux-packages/apache-arrow/debian/changelog index 35cc598fe6f87..f59bc9f66233e 100644 --- a/dev/tasks/linux-packages/apache-arrow/debian/changelog +++ b/dev/tasks/linux-packages/apache-arrow/debian/changelog @@ -1,3 +1,9 @@ +apache-arrow (17.0.0-1) unstable; urgency=low + + * New upstream release. + + -- RaĂºl Cumplido Thu, 11 Jul 2024 08:57:21 -0000 + apache-arrow (16.1.0-1) unstable; urgency=low * New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow/debian/control.in b/dev/tasks/linux-packages/apache-arrow/debian/control.in index c33e3ac791be1..24e2839021aa8 100644 --- a/dev/tasks/linux-packages/apache-arrow/debian/control.in +++ b/dev/tasks/linux-packages/apache-arrow/debian/control.in @@ -42,7 +42,7 @@ Build-Depends-Indep: libglib2.0-doc Standards-Version: 3.9.8 Homepage: https://arrow.apache.org/ -Package: libarrow1700 +Package: libarrow1800 Section: libs Architecture: any Multi-Arch: same @@ -62,12 +62,12 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow1700 (= ${binary:Version}) + libarrow1800 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides tools. -Package: libarrow-cuda1700 +Package: libarrow-cuda1800 Section: libs Architecture: @CUDA_ARCHITECTURE@ Multi-Arch: same @@ -75,12 +75,12 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow1700 (= ${binary:Version}) + libarrow1800 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ library files for CUDA support. -Package: libarrow-acero1700 +Package: libarrow-acero1800 Section: libs Architecture: any Multi-Arch: same @@ -88,12 +88,12 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow1700 (= ${binary:Version}) + libarrow1800 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ library files for Acero module. -Package: libarrow-dataset1700 +Package: libarrow-dataset1800 Section: libs Architecture: any Multi-Arch: same @@ -101,13 +101,13 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-acero1700 (= ${binary:Version}), - libparquet1700 (= ${binary:Version}) + libarrow-acero1800 (= ${binary:Version}), + libparquet1800 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ library files for Dataset module. -Package: libarrow-flight1700 +Package: libarrow-flight1800 Section: libs Architecture: any Multi-Arch: same @@ -115,12 +115,12 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow1700 (= ${binary:Version}) + libarrow1800 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ library files for Flight RPC system. -Package: libarrow-flight-sql1700 +Package: libarrow-flight-sql1800 Section: libs Architecture: any Multi-Arch: same @@ -128,7 +128,7 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-flight1700 (= ${binary:Version}) + libarrow-flight1800 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ library files for Flight SQL system. @@ -139,7 +139,8 @@ Architecture: any Multi-Arch: same Depends: ${misc:Depends}, - libarrow1700 (= ${binary:Version}), + libarrow1800 (= ${binary:Version}), +@USE_SYSTEM_GRPC@ libabsl-dev, libbrotli-dev, libbz2-dev, libcurl4-openssl-dev, @@ -166,7 +167,7 @@ Multi-Arch: same Depends: ${misc:Depends}, libarrow-dev (= ${binary:Version}), - libarrow-cuda1700 (= ${binary:Version}) + libarrow-cuda1800 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ header files for CUDA support. @@ -177,7 +178,7 @@ Architecture: any Multi-Arch: same Depends: ${misc:Depends}, - libarrow-acero1700 (= ${binary:Version}), + libarrow-acero1800 (= ${binary:Version}), libparquet-dev (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . @@ -190,7 +191,7 @@ Multi-Arch: same Depends: ${misc:Depends}, libarrow-acero-dev (= ${binary:Version}), - libarrow-dataset1700 (= ${binary:Version}), + libarrow-dataset1800 (= ${binary:Version}), libparquet-dev (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . @@ -203,7 +204,7 @@ Multi-Arch: same Depends: ${misc:Depends}, libarrow-dev (= ${binary:Version}), - libarrow-flight1700 (= ${binary:Version}), + libarrow-flight1800 (= ${binary:Version}), libc-ares-dev, @USE_SYSTEM_GRPC@ libgrpc++-dev Description: Apache Arrow is a data processing library for analysis @@ -217,12 +218,12 @@ Multi-Arch: same Depends: ${misc:Depends}, libarrow-flight-dev (= ${binary:Version}), - libarrow-flight-sql1700 (= ${binary:Version}) + libarrow-flight-sql1800 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ header files for Flight SQL system. -Package: libgandiva1700 +Package: libgandiva1800 Section: libs Architecture: any Multi-Arch: same @@ -230,7 +231,7 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow1700 (= ${binary:Version}) + libarrow1800 (= ${binary:Version}) Description: Gandiva is a toolset for compiling and evaluating expressions on Arrow Data. . @@ -243,13 +244,13 @@ Multi-Arch: same Depends: ${misc:Depends}, libarrow-dev (= ${binary:Version}), - libgandiva1700 (= ${binary:Version}) + libgandiva1800 (= ${binary:Version}) Description: Gandiva is a toolset for compiling and evaluating expressions on Arrow Data. . This package provides C++ header files. -Package: libparquet1700 +Package: libparquet1800 Section: libs Architecture: any Multi-Arch: same @@ -269,7 +270,7 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libparquet1700 (= ${binary:Version}) + libparquet1800 (= ${binary:Version}) Description: Apache Parquet is a columnar storage format . This package provides tools. @@ -281,13 +282,13 @@ Multi-Arch: same Depends: ${misc:Depends}, libarrow-dev (= ${binary:Version}), - libparquet1700 (= ${binary:Version}), + libparquet1800 (= ${binary:Version}), libthrift-dev Description: Apache Parquet is a columnar storage format . This package provides C++ header files. -Package: libarrow-glib1700 +Package: libarrow-glib1800 Section: libs Architecture: any Multi-Arch: same @@ -295,7 +296,7 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow1700 (= ${binary:Version}) + libarrow1800 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides GLib based library files. @@ -319,7 +320,7 @@ Depends: ${misc:Depends}, libglib2.0-dev, libarrow-acero-dev (= ${binary:Version}), - libarrow-glib1700 (= ${binary:Version}), + libarrow-glib1800 (= ${binary:Version}), gir1.2-arrow-1.0 (= ${binary:Version}) Suggests: libarrow-glib-doc Description: Apache Arrow is a data processing library for analysis @@ -337,7 +338,7 @@ Description: Apache Arrow is a data processing library for analysis . This package provides documentations. -Package: libarrow-cuda-glib1700 +Package: libarrow-cuda-glib1800 Section: libs Architecture: @CUDA_ARCHITECTURE@ Multi-Arch: same @@ -345,8 +346,8 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-glib1700 (= ${binary:Version}), - libarrow-cuda1700 (= ${binary:Version}) + libarrow-glib1800 (= ${binary:Version}), + libarrow-cuda1800 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides GLib based library files for CUDA support. @@ -371,13 +372,13 @@ Depends: ${misc:Depends}, libarrow-cuda-dev (= ${binary:Version}), libarrow-glib-dev (= ${binary:Version}), - libarrow-cuda-glib1700 (= ${binary:Version}), + libarrow-cuda-glib1800 (= ${binary:Version}), gir1.2-arrow-cuda-1.0 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides GLib based header files for CUDA support. -Package: libarrow-dataset-glib1700 +Package: libarrow-dataset-glib1800 Section: libs Architecture: any Multi-Arch: same @@ -385,8 +386,8 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-glib1700 (= ${binary:Version}), - libarrow-dataset1700 (= ${binary:Version}) + libarrow-glib1800 (= ${binary:Version}), + libarrow-dataset1800 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides GLib based library files for dataset module. @@ -411,7 +412,7 @@ Depends: ${misc:Depends}, libarrow-dataset-dev (= ${binary:Version}), libarrow-glib-dev (= ${binary:Version}), - libarrow-dataset-glib1700 (= ${binary:Version}), + libarrow-dataset-glib1800 (= ${binary:Version}), gir1.2-arrow-dataset-1.0 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . @@ -428,7 +429,7 @@ Description: Apache Arrow is a data processing library for analysis . This package provides documentations for dataset module. -Package: libarrow-flight-glib1700 +Package: libarrow-flight-glib1800 Section: libs Architecture: any Multi-Arch: same @@ -436,8 +437,8 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-glib1700 (= ${binary:Version}), - libarrow-flight1700 (= ${binary:Version}) + libarrow-glib1800 (= ${binary:Version}), + libarrow-flight1800 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides GLib based library files for Apache Arrow Flight. @@ -463,7 +464,7 @@ Depends: ${misc:Depends}, libarrow-flight-dev (= ${binary:Version}), libarrow-glib-dev (= ${binary:Version}), - libarrow-flight-glib1700 (= ${binary:Version}), + libarrow-flight-glib1800 (= ${binary:Version}), gir1.2-arrow-flight-1.0 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . @@ -480,7 +481,7 @@ Description: Apache Arrow is a data processing library for analysis . This package provides documentations for Apache Arrow Flight. -Package: libarrow-flight-sql-glib1700 +Package: libarrow-flight-sql-glib1800 Section: libs Architecture: any Multi-Arch: same @@ -488,8 +489,8 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-flight-glib1700 (= ${binary:Version}), - libarrow-flight-sql1700 (= ${binary:Version}) + libarrow-flight-glib1800 (= ${binary:Version}), + libarrow-flight-sql1800 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides GLib based library files for Apache Arrow Flight SQL. @@ -515,7 +516,7 @@ Depends: ${misc:Depends}, libarrow-flight-sql-dev (= ${binary:Version}), libarrow-flight-glib-dev (= ${binary:Version}), - libarrow-flight-sql-glib1700 (= ${binary:Version}), + libarrow-flight-sql-glib1800 (= ${binary:Version}), gir1.2-arrow-flight-sql-1.0 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . @@ -532,7 +533,7 @@ Description: Apache Arrow is a data processing library for analysis . This package provides documentations for Apache Arrow Flight SQL. -Package: libgandiva-glib1700 +Package: libgandiva-glib1800 Section: libs Architecture: any Multi-Arch: same @@ -540,8 +541,8 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-glib1700 (= ${binary:Version}), - libgandiva1700 (= ${binary:Version}) + libarrow-glib1800 (= ${binary:Version}), + libgandiva1800 (= ${binary:Version}) Description: Gandiva is a toolset for compiling and evaluating expressions on Arrow Data. . @@ -568,7 +569,7 @@ Depends: ${misc:Depends}, libgandiva-dev (= ${binary:Version}), libarrow-glib-dev (= ${binary:Version}), - libgandiva-glib1700 (= ${binary:Version}), + libgandiva-glib1800 (= ${binary:Version}), gir1.2-gandiva-1.0 (= ${binary:Version}) Description: Gandiva is a toolset for compiling and evaluating expressions on Arrow Data. @@ -587,7 +588,7 @@ Description: Gandiva is a toolset for compiling and evaluating expressions . This package provides documentations. -Package: libparquet-glib1700 +Package: libparquet-glib1800 Section: libs Architecture: any Multi-Arch: same @@ -595,8 +596,8 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-glib1700 (= ${binary:Version}), - libparquet1700 (= ${binary:Version}) + libarrow-glib1800 (= ${binary:Version}), + libparquet1800 (= ${binary:Version}) Description: Apache Parquet is a columnar storage format . This package provides GLib based library files. @@ -621,7 +622,7 @@ Depends: ${misc:Depends}, libarrow-glib-dev (= ${binary:Version}), libparquet-dev (= ${binary:Version}), - libparquet-glib1700 (= ${binary:Version}), + libparquet-glib1800 (= ${binary:Version}), gir1.2-parquet-1.0 (= ${binary:Version}) Suggests: libparquet-glib-doc Description: Apache Parquet is a columnar storage format diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-acero1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-acero1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-acero1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-acero1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libgandiva1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libgandiva1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libgandiva1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libgandiva1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libparquet1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libparquet1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libparquet1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libparquet1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/rules b/dev/tasks/linux-packages/apache-arrow/debian/rules index 83bcad98a7a6e..6c3074ab234e1 100755 --- a/dev/tasks/linux-packages/apache-arrow/debian/rules +++ b/dev/tasks/linux-packages/apache-arrow/debian/rules @@ -51,6 +51,7 @@ override_dh_auto_configure: -DARROW_WITH_ZSTD=ON \ -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \ -DCUDAToolkit_ROOT=/usr \ + -DFETCHCONTENT_FULLY_DISCONNECTED=OFF \ -DPARQUET_BUILD_EXECUTABLES=ON \ -DPARQUET_REQUIRE_ENCRYPTION=ON diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in index d5e6c3a332eb3..f588bb3f1ab5a 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in +++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in @@ -63,6 +63,7 @@ %define gcc_package gcc %endif +%define use_bundled_nlohmann_json (%{_rhel} == 8) %define use_flight (%{_rhel} >= 8 || %{_amzn} >= 2023) %define use_gandiva (%{_rhel} >= 8 || %{_amzn} >= 2023) %define use_gcs (%{_rhel} >= 8) @@ -117,7 +118,7 @@ BuildRequires: glog-devel BuildRequires: grpc-devel BuildRequires: grpc-plugins %endif -%if %{use_gcs} +%if %{use_gcs} && !%{use_bundled_nlohmann_json} BuildRequires: json-devel %endif BuildRequires: libzstd-devel @@ -194,6 +195,9 @@ cd cpp -DCMAKE_BUILD_TYPE=$cpp_build_type \ -DPARQUET_BUILD_EXECUTABLES=ON \ -DPARQUET_REQUIRE_ENCRYPTION=ON \ +%if %{use_bundled_nlohmann_json} + -Dnlohmann_json_SOURCE=BUNDLED \ +%endif -G"Unix Makefiles" %arrow_cmake_build cd - @@ -280,7 +284,7 @@ Requires: %{name}%{so_version}-libs = %{version}-%{release} Requires: brotli-devel Requires: bzip2-devel Requires: curl-devel -%if %{use_gcs} +%if %{use_gcs} && !%{use_bundled_nlohmann_json} Requires: json-devel %endif Requires: libzstd-devel @@ -883,6 +887,9 @@ Documentation for Apache Parquet GLib. %endif %changelog +* Thu Jul 11 2024 RaĂºl Cumplido - 17.0.0-1 +- New upstream release. + * Thu May 09 2024 RaĂºl Cumplido - 16.1.0-1 - New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile index 8c029d87c2493..e834e17a4bb30 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile @@ -24,7 +24,7 @@ ENV \ ARG DEBUG # GH-42128 -# Switch repos to point to to vault.centos.org because Centos 7 is EOL +# Switch repos to point to to vault.centos.org because CentOS 7 is EOL RUN sed -i \ -e 's/^mirrorlist/#mirrorlist/' \ -e 's/^#baseurl/baseurl/' \ @@ -37,6 +37,11 @@ RUN \ yum install -y ${quiet} \ centos-release-scl-rh \ epel-release && \ + sed -i \ + -e 's/^mirrorlist/#mirrorlist/' \ + -e 's/^#baseurl/baseurl/' \ + -e 's/mirror\.centos\.org/vault.centos.org/' \ + /etc/yum.repos.d/CentOS-SCLo-scl-rh.repo && \ yum install -y ${quiet} \ ${SCL}-gcc-c++ \ ${SCL}-make \ diff --git a/dev/tasks/linux-packages/github.linux.yml b/dev/tasks/linux-packages/github.linux.yml index 9e24835b8b627..891682c4358d8 100644 --- a/dev/tasks/linux-packages/github.linux.yml +++ b/dev/tasks/linux-packages/github.linux.yml @@ -64,7 +64,7 @@ jobs: run: | set -e pushd arrow/dev/tasks/linux-packages - rake version:update + rake version:update ARROW_RELEASE_TIME="$(date --iso-8601=seconds)" rake docker:pull || : rake --trace {{ task_namespace }}:build BUILD_DIR=build popd diff --git a/dev/tasks/python-wheels/github.linux.yml b/dev/tasks/python-wheels/github.linux.yml index 0ff3c56b695eb..5c82bf74b30b7 100644 --- a/dev/tasks/python-wheels/github.linux.yml +++ b/dev/tasks/python-wheels/github.linux.yml @@ -67,6 +67,7 @@ jobs: ALMALINUX: "8" run: | archery docker run \ + -e ARROW_GANDIVA=OFF \ -e TEST_DEFAULT=0 \ -e TEST_PYARROW_VERSION={{ arrow.no_rc_version }} \ -e TEST_PYTHON_VERSIONS={{ python_version }} \ diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 61df283960ccf..32534e80528af 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -492,59 +492,59 @@ tasks: - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-acero-dev_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-acero1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-acero1700_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-acero1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-acero1800_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-dataset-glib1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-dataset-glib1700_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-dataset1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-dataset1700_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-dataset-glib1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-dataset-glib1800_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-dataset1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-dataset1800_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-flight-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-flight-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-flight-glib1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-flight-glib1700_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-flight-glib1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-flight-glib1800_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-flight-sql-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-flight-sql-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-flight-sql-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-flight-sql-glib1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-flight-sql-glib1700_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-flight-sql1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-flight-sql1700_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-flight1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-flight1700_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-flight-sql-glib1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-flight-sql-glib1800_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-flight-sql1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-flight-sql1800_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-flight1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-flight1800_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-glib1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-glib1700_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow1700_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-glib1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-glib1800_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow1800_{no_rc_version}-1_[a-z0-9]+.deb - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libgandiva-glib1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libgandiva-glib1700_{no_rc_version}-1_[a-z0-9]+.deb - - libgandiva1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libgandiva1700_{no_rc_version}-1_[a-z0-9]+.deb + - libgandiva-glib1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libgandiva-glib1800_{no_rc_version}-1_[a-z0-9]+.deb + - libgandiva1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libgandiva1800_{no_rc_version}-1_[a-z0-9]+.deb - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libparquet-glib1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libparquet-glib1700_{no_rc_version}-1_[a-z0-9]+.deb - - libparquet1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libparquet1700_{no_rc_version}-1_[a-z0-9]+.deb + - libparquet-glib1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libparquet-glib1800_{no_rc_version}-1_[a-z0-9]+.deb + - libparquet1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libparquet1800_{no_rc_version}-1_[a-z0-9]+.deb - parquet-tools_{no_rc_version}-1_[a-z0-9]+.deb {% if architecture == "amd64" %} - gir1.2-arrow-cuda-1.0_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-cuda-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-cuda-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-cuda-glib1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-cuda-glib1700_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-cuda1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-cuda1700_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-cuda-glib1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-cuda-glib1800_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-cuda1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-cuda1800_{no_rc_version}-1_[a-z0-9]+.deb {% endif %} {% endfor %} {% endfor %} @@ -745,9 +745,6 @@ tasks: - arrow-jdbc-{no_rc_snapshot_version}-tests.jar - arrow-jdbc-{no_rc_snapshot_version}.jar - arrow-jdbc-{no_rc_snapshot_version}.pom - - arrow-maven-plugins-{no_rc_snapshot_version}-cyclonedx.json - - arrow-maven-plugins-{no_rc_snapshot_version}-cyclonedx.xml - - arrow-maven-plugins-{no_rc_snapshot_version}.pom - arrow-memory-core-{no_rc_snapshot_version}-cyclonedx.json - arrow-memory-core-{no_rc_snapshot_version}-cyclonedx.xml - arrow-memory-core-{no_rc_snapshot_version}-javadoc.jar @@ -810,10 +807,7 @@ tasks: - arrow-vector-{no_rc_snapshot_version}.pom - flight-core-{no_rc_snapshot_version}-cyclonedx.json - flight-core-{no_rc_snapshot_version}-cyclonedx.xml - - flight-core-{no_rc_snapshot_version}-jar-with-dependencies.jar - flight-core-{no_rc_snapshot_version}-javadoc.jar - - flight-core-{no_rc_snapshot_version}-shaded-ext.jar - - flight-core-{no_rc_snapshot_version}-shaded.jar - flight-core-{no_rc_snapshot_version}-sources.jar - flight-core-{no_rc_snapshot_version}-tests.jar - flight-core-{no_rc_snapshot_version}.jar @@ -846,12 +840,6 @@ tasks: - flight-sql-jdbc-driver-{no_rc_snapshot_version}-tests.jar - flight-sql-jdbc-driver-{no_rc_snapshot_version}.jar - flight-sql-jdbc-driver-{no_rc_snapshot_version}.pom - - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-cyclonedx.json - - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-cyclonedx.xml - - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-javadoc.jar - - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-sources.jar - - module-info-compiler-maven-plugin-{no_rc_snapshot_version}.jar - - module-info-compiler-maven-plugin-{no_rc_snapshot_version}.pom ############################## NuGet packages ############################### @@ -1194,6 +1182,15 @@ tasks: image: conda-python {% endfor %} + test-conda-python-emscripten: + ci: github + template: docker-tests/github.linux.yml + params: + env: + UBUNTU: 22.04 + PYTHON: 3.12 + image: conda-python-emscripten + test-conda-python-3.11-hypothesis: ci: github template: docker-tests/github.linux.yml @@ -1271,6 +1268,7 @@ tasks: env: ARROW_R_DEV: "TRUE" image: ubuntu-r-valgrind + timeout: 300 # 5 hours test-r-linux-rchk: ci: github @@ -1542,9 +1540,7 @@ tasks: image: conda-python-hdfs {% endfor %} -{% for python_version, spark_version, test_pyarrow_only, numpy_version, jdk_version in [("3.8", "v3.5.0", "false", "latest", "8"), - ("3.10", "v3.5.0", "false", "1.23", "8"), - ("3.11", "master", "false", "latest", "17")] %} +{% for python_version, spark_version, test_pyarrow_only, numpy_version, jdk_version in [("3.11", "master", "false", "latest", "17")] %} test-conda-python-{{ python_version }}-spark-{{ spark_version }}: ci: github template: docker-tests/github.linux.yml @@ -1555,10 +1551,11 @@ tasks: TEST_PYARROW_ONLY: "{{ test_pyarrow_only }}" NUMPY: "{{ numpy_version }}" JDK: "{{ jdk_version }}" + fetch_depth: 0 # use the branch-3.0 of spark, so prevent reusing any layers flags: --no-leaf-cache image: conda-python-spark - fetch_depth: 0 + timeout: 90 {% endfor %} {% for kind in ["static", "static-system-dependency"] %} diff --git a/dev/tasks/verify-rc/github.linux.amd64.docker.yml b/dev/tasks/verify-rc/github.linux.amd64.docker.yml index 7a28ba705dd50..f2c0673314826 100644 --- a/dev/tasks/verify-rc/github.linux.amd64.docker.yml +++ b/dev/tasks/verify-rc/github.linux.amd64.docker.yml @@ -38,6 +38,9 @@ jobs: run: | archery docker run \ -e VERIFY_VERSION="{{ release|default("") }}" \ + {% if distro == 'almalinux' and target|upper == 'PYTHON' %} + -e ARROW_GANDIVA=OFF \ + {% endif %} -e VERIFY_RC="{{ rc|default("") }}" \ -e TEST_DEFAULT=0 \ -e TEST_{{ target|upper }}=1 \ diff --git a/docker-compose.yml b/docker-compose.yml index a2a2b41c8747f..cf22324f7cfb4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -121,6 +121,7 @@ x-hierarchy: - conda-python-docs - conda-python-cython2 - conda-python-dask + - conda-python-emscripten - conda-python-hdfs - conda-python-java-integration - conda-python-jpype @@ -875,6 +876,38 @@ services: /arrow/ci/scripts/python_build.sh /arrow /build && /arrow/ci/scripts/python_test.sh /arrow"] + conda-python-emscripten: + # Usage: + # docker-compose build conda-python-emscripten + # docker-compose run --rm conda-python-emscripten + # Parameters: + # ARCH: amd64, arm64v8, ... + # UBUNTU: 22.04 + image: ${REPO}:${ARCH}-conda-python-emscripten + build: + context: . + dockerfile: ci/docker/conda-python-emscripten.dockerfile + cache_from: + - ${REPO}:${ARCH}-conda-python-${PYTHON} + args: + repo: ${REPO} + arch: ${ARCH} + clang_tools: ${CLANG_TOOLS} + llvm: ${LLVM} + pyodide_version: "0.26.0" + chrome_version: "122" + selenium_version: "4.15.2" + required_python_min: "(3,12)" + python: ${PYTHON} + shm_size: *shm-size + volumes: *ubuntu-volumes + environment: + <<: [*common, *ccache, *sccache, *cpp] + command: [" + /arrow/ci/scripts/cpp_build.sh /arrow /build && + /arrow/ci/scripts/python_build_emscripten.sh /arrow /build && + /arrow/ci/scripts/python_test_emscripten.sh /build /pyodide"] + ubuntu-cuda-python: # Usage: # docker-compose build cuda-cpp @@ -1169,7 +1202,7 @@ services: build: args: base: ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2014-vcpkg-${VCPKG} - java: 1.8.0 + java: 11 context: . dockerfile: ci/docker/java-jni-manylinux-201x.dockerfile cache_from: @@ -1714,7 +1747,7 @@ services: # docker-compose run java # Parameters: # MAVEN: 3.9.5 - # JDK: 8, 11, 17, 21 + # JDK: 11, 17, 21 image: ${ARCH}/maven:${MAVEN}-eclipse-temurin-${JDK} shm_size: *shm-size volumes: &java-volumes diff --git a/docs/source/_static/versions.json b/docs/source/_static/versions.json index e879fc69138d0..6a684b56d57b5 100644 --- a/docs/source/_static/versions.json +++ b/docs/source/_static/versions.json @@ -1,15 +1,20 @@ [ { - "name": "17.0 (dev)", + "name": "18.0 (dev)", "version": "dev/", "url": "https://arrow.apache.org/docs/dev/" }, { - "name": "16.1 (stable)", + "name": "17.0 (stable)", "version": "", "url": "https://arrow.apache.org/docs/", "preferred": true }, + { + "name": "16.1", + "version": "16.1/", + "url": "https://arrow.apache.org/docs/16.1/" + }, { "name": "16.0", "version": "16.0/", diff --git a/docs/source/cpp/dataset.rst b/docs/source/cpp/dataset.rst index a64b73b61c05d..79a731cd358cd 100644 --- a/docs/source/cpp/dataset.rst +++ b/docs/source/cpp/dataset.rst @@ -27,11 +27,6 @@ Tabular Datasets .. seealso:: :doc:`Dataset API reference ` -.. warning:: - - The ``arrow::dataset`` namespace is experimental, and a stable API - is not yet guaranteed. - The Arrow Datasets library provides functionality to efficiently work with tabular, potentially larger than memory, and multi-file datasets. This includes: diff --git a/docs/source/cpp/memory.rst b/docs/source/cpp/memory.rst index 33907b5580f61..032b7d1ac90f1 100644 --- a/docs/source/cpp/memory.rst +++ b/docs/source/cpp/memory.rst @@ -139,9 +139,9 @@ Default Memory Pool The default memory pool depends on how Arrow C++ was compiled: -- if enabled at compile time, a `jemalloc `_ heap; -- otherwise, if enabled at compile time, a - `mimalloc `_ heap; +- if enabled at compile time, a `mimalloc `_ + heap; +- otherwise, if enabled at compile time, a `jemalloc `_ heap; - otherwise, the C library ``malloc`` heap. Overriding the Default Memory Pool diff --git a/docs/source/developers/java/building.rst b/docs/source/developers/java/building.rst index 82053e901186c..63a7b4369b809 100644 --- a/docs/source/developers/java/building.rst +++ b/docs/source/developers/java/building.rst @@ -32,7 +32,7 @@ Arrow Java uses the `Maven `_ build system. Building requires: -* JDK 8+ +* JDK 11+ * Maven 3+ .. note:: @@ -335,7 +335,6 @@ Arrow repository, and update the following settings: right click the directory, and select Mark Directory as > Generated Sources Root. There is no need to mark other generated sources directories, as only the ``vector`` module generates sources. -* For JDK 8, disable the ``error-prone`` profile to build the project successfully. * For JDK 11, due to an `IntelliJ bug `__, you must go into Settings > Build, Execution, Deployment > Compiler > Java Compiler and disable @@ -538,3 +537,40 @@ Installing Manually .. _builds@arrow.apache.org: https://lists.apache.org/list.html?builds@arrow.apache.org .. _GitHub Nightly: https://github.com/ursacomputing/crossbow/releases/tag/nightly-packaging-2022-07-30-0-github-java-jars + +Installing Staging Packages +=========================== + +.. warning:: + These packages are not official releases. Use them at your own risk. + +Arrow staging builds are created when a Release Candidate (RC) is being prepared. This allows users to test the RC in their applications before voting on the release. + + +Installing from Apache Staging +-------------------------------- +1. Look up the next version number for the Arrow libraries used. + +2. Add Apache Staging Repository to the Maven/Gradle project. + + .. code-block:: xml + + + 9.0.0 + + ... + + + arrow-apache-staging + https://repository.apache.org/content/repositories/staging + + + ... + + + org.apache.arrow + arrow-vector + ${arrow.version} + + + ... diff --git a/docs/source/developers/release.rst b/docs/source/developers/release.rst index fae48e4d8e0f0..0d9af1f543cac 100644 --- a/docs/source/developers/release.rst +++ b/docs/source/developers/release.rst @@ -400,12 +400,13 @@ Be sure to go through on the following checklist: :class-title: sd-fs-5 :class-container: sd-shadow-md - A committer must run the following script: + A committer must run the following script. This has to be done once the + Pull Request from the Update Website script has been merged: .. code-block:: Bash # dev/release/post-05-update-gh-release-notes.sh 17.0.0 - dev/release/post-05-update-gh-release-notes.sh apache-arrow-X.Y.Z + dev/release/post-05-update-gh-release-notes.sh .. dropdown:: Update Homebrew packages :animate: fade-in-slide-down diff --git a/docs/source/format/Columnar.rst b/docs/source/format/Columnar.rst index 7ae0c2b4bdbd8..c5f822f41643f 100644 --- a/docs/source/format/Columnar.rst +++ b/docs/source/format/Columnar.rst @@ -1656,8 +1656,8 @@ the Arrow spec. .. _Message.fbs: https://github.com/apache/arrow/blob/main/format/Message.fbs .. _File.fbs: https://github.com/apache/arrow/blob/main/format/File.fbs .. _least-significant bit (LSB) numbering: https://en.wikipedia.org/wiki/Bit_numbering -.. _Intel performance guide: https://software.intel.com/en-us/articles/practical-intel-avx-optimization-on-2nd-generation-intel-core-processors +.. _Intel performance guide: https://web.archive.org/web/20151101074635/https://software.intel.com/en-us/articles/practical-intel-avx-optimization-on-2nd-generation-intel-core-processors .. _Endianness: https://en.wikipedia.org/wiki/Endianness -.. _SIMD: https://software.intel.com/en-us/cpp-compiler-developer-guide-and-reference-introduction-to-the-simd-data-layout-templates +.. _SIMD: https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/simd-data-layout-templates.html .. _Parquet: https://parquet.apache.org/docs/ .. _UmbraDB: https://db.in.tum.de/~freitag/papers/p29-neumann-cidr20.pdf diff --git a/docs/source/format/Integration.rst b/docs/source/format/Integration.rst index 436747989acf3..0ab5b832ad012 100644 --- a/docs/source/format/Integration.rst +++ b/docs/source/format/Integration.rst @@ -455,6 +455,7 @@ or ``DATA``. * ``VARIADIC_DATA_BUFFERS``: a JSON array of data buffers represented as hex encoded strings. * ``VIEWS``: a JSON array of encoded views, which are JSON objects with: + * ``SIZE``: an integer indicating the size of the view, * ``INLINED``: an encoded value (this field will be present if ``SIZE`` is smaller than 12, otherwise the next three fields will be present), diff --git a/docs/source/index.rst b/docs/source/index.rst index 0afe52758af25..6f38ab668d883 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -104,7 +104,7 @@ Implementations C/GLib C++ C# - Go + Go Java JavaScript Julia diff --git a/docs/source/java/flight_sql_jdbc_driver.rst b/docs/source/java/flight_sql_jdbc_driver.rst index f95c2ac755d97..0224cc3235652 100644 --- a/docs/source/java/flight_sql_jdbc_driver.rst +++ b/docs/source/java/flight_sql_jdbc_driver.rst @@ -28,7 +28,7 @@ This driver can be used with any database that implements Flight SQL. Installation and Requirements ============================= -The driver is compatible with JDK 8+. On JDK 9+, the following JVM +The driver is compatible with JDK 11+. Note that the following JVM parameter is required: .. code-block:: shell diff --git a/docs/source/java/install.rst b/docs/source/java/install.rst index dc6a55c87fcd6..3e01f72a56878 100644 --- a/docs/source/java/install.rst +++ b/docs/source/java/install.rst @@ -29,10 +29,10 @@ Java modules are regularly built and tested on macOS and Linux distributions. Java Compatibility ================== -Java modules are compatible with JDK 8 and above. Currently, JDK versions -8, 11, 17, and 21 are tested in CI. The latest JDK is also tested in CI. +Java modules are compatible with JDK 11 and above. Currently, JDK versions +11, 17, 21, and latest are tested in CI. -When using Java 9 or later, some JDK internals must be exposed by +Note that some JDK internals must be exposed by adding ``--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED`` to the ``java`` command: .. code-block:: shell @@ -67,6 +67,7 @@ org.apache.arrow.flight.core does not read unnamed module ...`` Finally, if you are using arrow-dataset, you'll also need to report that JDK internals need to be exposed. Modifying the command above for arrow-memory: + .. code-block:: shell # Directly on the command line diff --git a/docs/source/python/api/datatypes.rst b/docs/source/python/api/datatypes.rst index 62bf4b7723558..7edb4e161541d 100644 --- a/docs/source/python/api/datatypes.rst +++ b/docs/source/python/api/datatypes.rst @@ -58,6 +58,7 @@ These should be used to create Arrow data types and schemas. binary_view string_view decimal128 + decimal256 list_ large_list list_view @@ -101,6 +102,7 @@ functions above. Time64Type FixedSizeBinaryType Decimal128Type + Decimal256Type Field Schema RunEndEncodedType diff --git a/docs/source/python/memory.rst b/docs/source/python/memory.rst index 7b49d48ab20fa..029d30cc1b693 100644 --- a/docs/source/python/memory.rst +++ b/docs/source/python/memory.rst @@ -110,12 +110,12 @@ the buffer is garbage-collected, all of the memory is freed: pa.total_allocated_bytes() Besides the default built-in memory pool, there may be additional memory pools -to choose (such as `mimalloc `_) -from depending on how Arrow was built. One can get the backend -name for a memory pool:: +to choose from (such as `jemalloc `_) +depending on how Arrow was built. One can get the backend name for a memory +pool:: >>> pa.default_memory_pool().backend_name - 'jemalloc' + 'mimalloc' .. seealso:: :ref:`API documentation for memory pools `. diff --git a/docs/source/status.rst b/docs/source/status.rst index 266381175608a..c232aa280befb 100644 --- a/docs/source/status.rst +++ b/docs/source/status.rst @@ -62,11 +62,11 @@ Data Types +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+ | Binary | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+ -| Large Binary | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | | ✓ | +| Large Binary | ✓ | ✓ | ✓ | ✓ | \(4) | ✓ | ✓ | | ✓ | +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+ | Utf8 | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+ -| Large Utf8 | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | | ✓ | +| Large Utf8 | ✓ | ✓ | ✓ | ✓ | \(4) | ✓ | ✓ | | ✓ | +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+ | Binary View | ✓ | | ✓ | | ✓ | | | | | +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+ @@ -85,7 +85,7 @@ Data Types +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+ | List | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+ -| Large List | ✓ | ✓ | ✓ | | | ✓ | ✓ | | ✓ | +| Large List | ✓ | ✓ | ✓ | | \(4) | ✓ | ✓ | | ✓ | +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+ | List View | ✓ | | ✓ | | ✓ | | | | | +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+ @@ -125,6 +125,8 @@ Notes: * \(1) Casting to/from Float16 in Java is not supported. * \(2) Float16 support in C# is only available when targeting .NET 6+. * \(3) Nested dictionaries not supported +* \(4) C# large array types are provided to help with interoperability with other libraries, + but these do not support buffers larger than 2 GiB and an exception will be raised if trying to import an array that is too large. .. seealso:: The :ref:`format_columnar` and the diff --git a/format/Flight.proto b/format/Flight.proto index 4963e8c09ae47..2187a51ed48f4 100644 --- a/format/Flight.proto +++ b/format/Flight.proto @@ -208,24 +208,6 @@ message Action { bytes body = 2; } -/* - * The request of the CancelFlightInfo action. - * - * The request should be stored in Action.body. - */ -message CancelFlightInfoRequest { - FlightInfo info = 1; -} - -/* - * The request of the RenewFlightEndpoint action. - * - * The request should be stored in Action.body. - */ -message RenewFlightEndpointRequest { - FlightEndpoint endpoint = 1; -} - /* * An opaque result returned after executing an action. */ @@ -233,36 +215,6 @@ message Result { bytes body = 1; } -/* - * The result of a cancel operation. - * - * This is used by CancelFlightInfoResult.status. - */ -enum CancelStatus { - // The cancellation status is unknown. Servers should avoid using - // this value (send a NOT_FOUND error if the requested query is - // not known). Clients can retry the request. - CANCEL_STATUS_UNSPECIFIED = 0; - // The cancellation request is complete. Subsequent requests with - // the same payload may return CANCELLED or a NOT_FOUND error. - CANCEL_STATUS_CANCELLED = 1; - // The cancellation request is in progress. The client may retry - // the cancellation request. - CANCEL_STATUS_CANCELLING = 2; - // The query is not cancellable. The client should not retry the - // cancellation request. - CANCEL_STATUS_NOT_CANCELLABLE = 3; -} - -/* - * The result of the CancelFlightInfo action. - * - * The result should be stored in Result.body. - */ -message CancelFlightInfoResult { - CancelStatus status = 1; -} - /* * Wrap the result of a getSchema call */ @@ -423,6 +375,64 @@ message PollInfo { google.protobuf.Timestamp expiration_time = 4; } +/* + * The request of the CancelFlightInfo action. + * + * The request should be stored in Action.body. + */ +message CancelFlightInfoRequest { + FlightInfo info = 1; +} + +/* + * The result of a cancel operation. + * + * This is used by CancelFlightInfoResult.status. + */ +enum CancelStatus { + // The cancellation status is unknown. Servers should avoid using + // this value (send a NOT_FOUND error if the requested query is + // not known). Clients can retry the request. + CANCEL_STATUS_UNSPECIFIED = 0; + // The cancellation request is complete. Subsequent requests with + // the same payload may return CANCELLED or a NOT_FOUND error. + CANCEL_STATUS_CANCELLED = 1; + // The cancellation request is in progress. The client may retry + // the cancellation request. + CANCEL_STATUS_CANCELLING = 2; + // The query is not cancellable. The client should not retry the + // cancellation request. + CANCEL_STATUS_NOT_CANCELLABLE = 3; +} + +/* + * The result of the CancelFlightInfo action. + * + * The result should be stored in Result.body. + */ +message CancelFlightInfoResult { + CancelStatus status = 1; +} + +/* + * An opaque identifier that the service can use to retrieve a particular + * portion of a stream. + * + * Tickets are meant to be single use. It is an error/application-defined + * behavior to reuse a ticket. + */ +message Ticket { + bytes ticket = 1; +} + +/* + * A location where a Flight service will accept retrieval of a particular + * stream given a ticket. + */ +message Location { + string uri = 1; +} + /* * A particular stream or split associated with a flight. */ @@ -475,22 +485,12 @@ message FlightEndpoint { } /* - * A location where a Flight service will accept retrieval of a particular - * stream given a ticket. - */ -message Location { - string uri = 1; -} - -/* - * An opaque identifier that the service can use to retrieve a particular - * portion of a stream. + * The request of the RenewFlightEndpoint action. * - * Tickets are meant to be single use. It is an error/application-defined - * behavior to reuse a ticket. + * The request should be stored in Action.body. */ -message Ticket { - bytes ticket = 1; +message RenewFlightEndpointRequest { + FlightEndpoint endpoint = 1; } /* diff --git a/go/README.md b/go/README.md index 220b0a230a615..51ac06c87f171 100644 --- a/go/README.md +++ b/go/README.md @@ -20,7 +20,7 @@ Apache Arrow for Go =================== -[![Go Reference](https://pkg.go.dev/badge/github.com/apache/arrow/go/v17.svg)](https://pkg.go.dev/github.com/apache/arrow/go/v17) +[![Go Reference](https://pkg.go.dev/badge/github.com/apache/arrow/go/v18.svg)](https://pkg.go.dev/github.com/apache/arrow/go/v18) [Apache Arrow][arrow] is a cross-language development platform for in-memory data. It specifies a standardized language-independent columnar memory format diff --git a/go/arrow/_examples/helloworld/main.go b/go/arrow/_examples/helloworld/main.go index f4348d6e66771..7f932801917a4 100644 --- a/go/arrow/_examples/helloworld/main.go +++ b/go/arrow/_examples/helloworld/main.go @@ -19,10 +19,10 @@ package main import ( "os" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/math" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/math" + "github.com/apache/arrow/go/v18/arrow/memory" ) func main() { diff --git a/go/arrow/_tools/tmpl/main.go b/go/arrow/_tools/tmpl/main.go index 1f83a1b905ae9..33cb1686981f4 100644 --- a/go/arrow/_tools/tmpl/main.go +++ b/go/arrow/_tools/tmpl/main.go @@ -28,7 +28,7 @@ import ( "strings" "text/template" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/internal/json" ) const Ext = ".tmpl" diff --git a/go/arrow/array.go b/go/arrow/array.go index 7f04eab71e5f4..768b30f8e0690 100644 --- a/go/arrow/array.go +++ b/go/arrow/array.go @@ -19,8 +19,8 @@ package arrow import ( "fmt" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) // ArrayData is the underlying memory and metadata of an Arrow array, corresponding diff --git a/go/arrow/array/array.go b/go/arrow/array/array.go index be6ba864d1aa7..ae33ca5417db0 100644 --- a/go/arrow/array/array.go +++ b/go/arrow/array/array.go @@ -19,9 +19,9 @@ package array import ( "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) const ( diff --git a/go/arrow/array/array_test.go b/go/arrow/array/array_test.go index 9beadc3cdc654..4d83766b4fa3e 100644 --- a/go/arrow/array/array_test.go +++ b/go/arrow/array/array_test.go @@ -19,11 +19,11 @@ package array_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/internal/testing/tools" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/internal/testing/tools" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/types" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/binary.go b/go/arrow/array/binary.go index d36c0aac7d3f2..99764270bf39d 100644 --- a/go/arrow/array/binary.go +++ b/go/arrow/array/binary.go @@ -23,9 +23,9 @@ import ( "strings" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) type BinaryLike interface { diff --git a/go/arrow/array/binary_test.go b/go/arrow/array/binary_test.go index c8d793ef0670e..919fff7b5e5e8 100644 --- a/go/arrow/array/binary_test.go +++ b/go/arrow/array/binary_test.go @@ -20,9 +20,9 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/binarybuilder.go b/go/arrow/array/binarybuilder.go index e5b4dd63cf9a5..6fcc4eaf46479 100644 --- a/go/arrow/array/binarybuilder.go +++ b/go/arrow/array/binarybuilder.go @@ -25,10 +25,10 @@ import ( "sync/atomic" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) // A BinaryBuilder is used to build a Binary array using the Append methods. diff --git a/go/arrow/array/binarybuilder_test.go b/go/arrow/array/binarybuilder_test.go index e37c5f624f9cc..65d5c7385df4c 100644 --- a/go/arrow/array/binarybuilder_test.go +++ b/go/arrow/array/binarybuilder_test.go @@ -20,9 +20,9 @@ import ( "bytes" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/boolean.go b/go/arrow/array/boolean.go index 50a4101db18b3..eab26d273dd96 100644 --- a/go/arrow/array/boolean.go +++ b/go/arrow/array/boolean.go @@ -21,10 +21,10 @@ import ( "strconv" "strings" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) // A type which represents an immutable sequence of boolean values. diff --git a/go/arrow/array/boolean_test.go b/go/arrow/array/boolean_test.go index c3bc235de3f21..f980497d54521 100644 --- a/go/arrow/array/boolean_test.go +++ b/go/arrow/array/boolean_test.go @@ -22,8 +22,8 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/booleanbuilder.go b/go/arrow/array/booleanbuilder.go index d58ff80151c06..44d33018f94ea 100644 --- a/go/arrow/array/booleanbuilder.go +++ b/go/arrow/array/booleanbuilder.go @@ -23,11 +23,11 @@ import ( "strconv" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) type BooleanBuilder struct { diff --git a/go/arrow/array/booleanbuilder_test.go b/go/arrow/array/booleanbuilder_test.go index e01f6660c4c10..42e49f95a2f3e 100644 --- a/go/arrow/array/booleanbuilder_test.go +++ b/go/arrow/array/booleanbuilder_test.go @@ -19,9 +19,9 @@ package array_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/internal/testing/tools" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/internal/testing/tools" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/bufferbuilder.go b/go/arrow/array/bufferbuilder.go index b9638b311584d..037d220f0b141 100644 --- a/go/arrow/array/bufferbuilder.go +++ b/go/arrow/array/bufferbuilder.go @@ -20,10 +20,10 @@ import ( "sync/atomic" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" ) type bufBuilder interface { diff --git a/go/arrow/array/bufferbuilder_byte.go b/go/arrow/array/bufferbuilder_byte.go index dd39d6b835cc3..2ac7ec703b579 100644 --- a/go/arrow/array/bufferbuilder_byte.go +++ b/go/arrow/array/bufferbuilder_byte.go @@ -16,7 +16,7 @@ package array -import "github.com/apache/arrow/go/v17/arrow/memory" +import "github.com/apache/arrow/go/v18/arrow/memory" type byteBufferBuilder struct { bufferBuilder diff --git a/go/arrow/array/bufferbuilder_numeric.gen.go b/go/arrow/array/bufferbuilder_numeric.gen.go index 19388e27b0ee3..5215ecf65a312 100644 --- a/go/arrow/array/bufferbuilder_numeric.gen.go +++ b/go/arrow/array/bufferbuilder_numeric.gen.go @@ -19,9 +19,9 @@ package array import ( - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" ) type int64BufferBuilder struct { diff --git a/go/arrow/array/bufferbuilder_numeric.gen.go.tmpl b/go/arrow/array/bufferbuilder_numeric.gen.go.tmpl index 40c96b5987cf3..2b7fcaefcdeb2 100644 --- a/go/arrow/array/bufferbuilder_numeric.gen.go.tmpl +++ b/go/arrow/array/bufferbuilder_numeric.gen.go.tmpl @@ -17,9 +17,9 @@ package array import ( - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" ) {{range .In}} diff --git a/go/arrow/array/bufferbuilder_numeric_test.go b/go/arrow/array/bufferbuilder_numeric_test.go index 372ba6976269d..3c947c87eeaac 100644 --- a/go/arrow/array/bufferbuilder_numeric_test.go +++ b/go/arrow/array/bufferbuilder_numeric_test.go @@ -20,8 +20,8 @@ import ( "testing" "unsafe" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/builder.go b/go/arrow/array/builder.go index 88c0ac479868b..6c8ea877a2fb0 100644 --- a/go/arrow/array/builder.go +++ b/go/arrow/array/builder.go @@ -20,10 +20,10 @@ import ( "fmt" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) const ( diff --git a/go/arrow/array/builder_test.go b/go/arrow/array/builder_test.go index d508d4626d4e9..7eb2b3f7cf9e3 100644 --- a/go/arrow/array/builder_test.go +++ b/go/arrow/array/builder_test.go @@ -19,8 +19,8 @@ package array import ( "testing" - "github.com/apache/arrow/go/v17/arrow/internal/testing/tools" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/internal/testing/tools" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/compare.go b/go/arrow/array/compare.go index 7c6f3cab7a7a7..a54c1e23c1e1c 100644 --- a/go/arrow/array/compare.go +++ b/go/arrow/array/compare.go @@ -20,9 +20,9 @@ import ( "fmt" "math" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/internal/bitutils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/internal/bitutils" ) // RecordEqual reports whether the two provided records are equal. diff --git a/go/arrow/array/compare_test.go b/go/arrow/array/compare_test.go index 223535bf64000..f757ab9f25f07 100644 --- a/go/arrow/array/compare_test.go +++ b/go/arrow/array/compare_test.go @@ -22,11 +22,11 @@ import ( "sort" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/internal/arrdata" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/internal/arrdata" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/concat.go b/go/arrow/array/concat.go index 184f7143ee5df..3d2b4b4b83167 100644 --- a/go/arrow/array/concat.go +++ b/go/arrow/array/concat.go @@ -23,13 +23,13 @@ import ( "math/bits" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/encoded" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/bitutils" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/encoded" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/bitutils" + "github.com/apache/arrow/go/v18/internal/utils" ) // Concatenate creates a new arrow.Array which is the concatenation of the diff --git a/go/arrow/array/concat_test.go b/go/arrow/array/concat_test.go index bfde89ec6600d..7e6a3c08efd5c 100644 --- a/go/arrow/array/concat_test.go +++ b/go/arrow/array/concat_test.go @@ -23,11 +23,11 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/testing/gen" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/testing/gen" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" diff --git a/go/arrow/array/data.go b/go/arrow/array/data.go index 40fad0dfd0033..19513ebaacf50 100644 --- a/go/arrow/array/data.go +++ b/go/arrow/array/data.go @@ -22,9 +22,9 @@ import ( "sync/atomic" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" ) // Data represents the memory and metadata of an Arrow array. diff --git a/go/arrow/array/data_test.go b/go/arrow/array/data_test.go index 68f2ada97b037..2cfc64fbe2d7e 100644 --- a/go/arrow/array/data_test.go +++ b/go/arrow/array/data_test.go @@ -20,8 +20,8 @@ import ( "slices" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/decimal128.go b/go/arrow/array/decimal128.go index 4578fd4061dc4..fd9e53f7f4c06 100644 --- a/go/arrow/array/decimal128.go +++ b/go/arrow/array/decimal128.go @@ -24,12 +24,12 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) // A type which represents an immutable sequence of 128-bit decimal values. diff --git a/go/arrow/array/decimal128_test.go b/go/arrow/array/decimal128_test.go index 3e2d021924723..707a4f1a6c8d5 100644 --- a/go/arrow/array/decimal128_test.go +++ b/go/arrow/array/decimal128_test.go @@ -19,10 +19,10 @@ package array_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/decimal256.go b/go/arrow/array/decimal256.go index bc082638b7bc4..6431306f969c3 100644 --- a/go/arrow/array/decimal256.go +++ b/go/arrow/array/decimal256.go @@ -24,12 +24,12 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) // Decimal256 is a type that represents an immutable sequence of 256-bit decimal values. diff --git a/go/arrow/array/decimal256_test.go b/go/arrow/array/decimal256_test.go index dd671b53bf101..8adb810165430 100644 --- a/go/arrow/array/decimal256_test.go +++ b/go/arrow/array/decimal256_test.go @@ -19,10 +19,10 @@ package array_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/decimal_test.go b/go/arrow/array/decimal_test.go index 414bb3426ca34..b321bd7fbbe7b 100644 --- a/go/arrow/array/decimal_test.go +++ b/go/arrow/array/decimal_test.go @@ -21,12 +21,12 @@ import ( "math/big" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/suite" ) diff --git a/go/arrow/array/dictionary.go b/go/arrow/array/dictionary.go index 6e202deeb37cd..ca7fed5257085 100644 --- a/go/arrow/array/dictionary.go +++ b/go/arrow/array/dictionary.go @@ -25,16 +25,16 @@ import ( "sync/atomic" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/hashing" - "github.com/apache/arrow/go/v17/internal/json" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/hashing" + "github.com/apache/arrow/go/v18/internal/json" + "github.com/apache/arrow/go/v18/internal/utils" ) // Dictionary represents the type for dictionary-encoded data with a data diff --git a/go/arrow/array/dictionary_test.go b/go/arrow/array/dictionary_test.go index 128182d005eba..ea9587d8dcdf9 100644 --- a/go/arrow/array/dictionary_test.go +++ b/go/arrow/array/dictionary_test.go @@ -24,13 +24,13 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/types" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" diff --git a/go/arrow/array/diff.go b/go/arrow/array/diff.go index 7110804d1b2cb..e5c1ce1521d95 100644 --- a/go/arrow/array/diff.go +++ b/go/arrow/array/diff.go @@ -20,7 +20,7 @@ import ( "fmt" "strings" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) // Edit represents one entry in the edit script to compare two arrays. diff --git a/go/arrow/array/diff_test.go b/go/arrow/array/diff_test.go index 67fa1d04ab506..65d212be11838 100644 --- a/go/arrow/array/diff_test.go +++ b/go/arrow/array/diff_test.go @@ -23,11 +23,11 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" + "github.com/apache/arrow/go/v18/internal/types" ) type diffTestCase struct { diff --git a/go/arrow/array/encoded.go b/go/arrow/array/encoded.go index 7df66061b5eb7..748c4c1fec641 100644 --- a/go/arrow/array/encoded.go +++ b/go/arrow/array/encoded.go @@ -23,12 +23,12 @@ import ( "reflect" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/encoded" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/encoded" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" + "github.com/apache/arrow/go/v18/internal/utils" ) // RunEndEncoded represents an array containing two children: diff --git a/go/arrow/array/encoded_test.go b/go/arrow/array/encoded_test.go index 1c54e56aaea6f..03352ec44177c 100644 --- a/go/arrow/array/encoded_test.go +++ b/go/arrow/array/encoded_test.go @@ -20,10 +20,10 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" diff --git a/go/arrow/array/extension.go b/go/arrow/array/extension.go index 5df86c8964a7b..8c4ef840cb72c 100644 --- a/go/arrow/array/extension.go +++ b/go/arrow/array/extension.go @@ -20,9 +20,9 @@ import ( "fmt" "reflect" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) // ExtensionArray is the interface that needs to be implemented to handle diff --git a/go/arrow/array/extension_test.go b/go/arrow/array/extension_test.go index 4520117f857f8..71ea9f105af7c 100644 --- a/go/arrow/array/extension_test.go +++ b/go/arrow/array/extension_test.go @@ -19,10 +19,10 @@ package array_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/types" "github.com/stretchr/testify/suite" ) diff --git a/go/arrow/array/fixed_size_list.go b/go/arrow/array/fixed_size_list.go index 830361b518acd..a0eefd460c2bf 100644 --- a/go/arrow/array/fixed_size_list.go +++ b/go/arrow/array/fixed_size_list.go @@ -22,11 +22,11 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) // FixedSizeList represents an immutable sequence of N array values. diff --git a/go/arrow/array/fixed_size_list_test.go b/go/arrow/array/fixed_size_list_test.go index ff8c5fe552e4c..e0edb9868cffd 100644 --- a/go/arrow/array/fixed_size_list_test.go +++ b/go/arrow/array/fixed_size_list_test.go @@ -20,9 +20,9 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/fixedsize_binary.go b/go/arrow/array/fixedsize_binary.go index 535b2d51003a7..f4d16c6386d60 100644 --- a/go/arrow/array/fixedsize_binary.go +++ b/go/arrow/array/fixedsize_binary.go @@ -22,8 +22,8 @@ import ( "fmt" "strings" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/internal/json" ) // A type which represents an immutable sequence of fixed-length binary strings. diff --git a/go/arrow/array/fixedsize_binary_test.go b/go/arrow/array/fixedsize_binary_test.go index 927818a640a3a..4a32cb9692a06 100644 --- a/go/arrow/array/fixedsize_binary_test.go +++ b/go/arrow/array/fixedsize_binary_test.go @@ -21,9 +21,9 @@ import ( "github.com/stretchr/testify/assert" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" ) func TestFixedSizeBinary(t *testing.T) { diff --git a/go/arrow/array/fixedsize_binarybuilder.go b/go/arrow/array/fixedsize_binarybuilder.go index 08e8ae841b9e4..96d58632ab8c8 100644 --- a/go/arrow/array/fixedsize_binarybuilder.go +++ b/go/arrow/array/fixedsize_binarybuilder.go @@ -23,10 +23,10 @@ import ( "reflect" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) // A FixedSizeBinaryBuilder is used to build a FixedSizeBinary array using the Append methods. diff --git a/go/arrow/array/fixedsize_binarybuilder_test.go b/go/arrow/array/fixedsize_binarybuilder_test.go index 3bd8611f37230..0c58c65ecb02e 100644 --- a/go/arrow/array/fixedsize_binarybuilder_test.go +++ b/go/arrow/array/fixedsize_binarybuilder_test.go @@ -19,8 +19,8 @@ package array import ( "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/float16.go b/go/arrow/array/float16.go index b71f23b22c5e6..757b658a9150d 100644 --- a/go/arrow/array/float16.go +++ b/go/arrow/array/float16.go @@ -20,9 +20,9 @@ import ( "fmt" "strings" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/internal/json" ) // A type which represents an immutable sequence of Float16 values. diff --git a/go/arrow/array/float16_builder.go b/go/arrow/array/float16_builder.go index 9eeb22feca43e..7543f2b6f96dd 100644 --- a/go/arrow/array/float16_builder.go +++ b/go/arrow/array/float16_builder.go @@ -23,12 +23,12 @@ import ( "strconv" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) type Float16Builder struct { diff --git a/go/arrow/array/float16_builder_test.go b/go/arrow/array/float16_builder_test.go index 47d84302a9008..ab25e544ed833 100644 --- a/go/arrow/array/float16_builder_test.go +++ b/go/arrow/array/float16_builder_test.go @@ -19,9 +19,9 @@ package array_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/interval.go b/go/arrow/array/interval.go index 2cabfad18a84a..66c6eca21bca5 100644 --- a/go/arrow/array/interval.go +++ b/go/arrow/array/interval.go @@ -23,11 +23,11 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) func NewIntervalData(data arrow.ArrayData) arrow.Array { diff --git a/go/arrow/array/interval_test.go b/go/arrow/array/interval_test.go index c10112caaa5f1..6d36885a627d9 100644 --- a/go/arrow/array/interval_test.go +++ b/go/arrow/array/interval_test.go @@ -20,9 +20,9 @@ import ( "math" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/json_reader.go b/go/arrow/array/json_reader.go index 4f5ebea6b3a25..2944151a5f63c 100644 --- a/go/arrow/array/json_reader.go +++ b/go/arrow/array/json_reader.go @@ -22,10 +22,10 @@ import ( "io" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) type Option func(config) diff --git a/go/arrow/array/json_reader_test.go b/go/arrow/array/json_reader_test.go index 30a6d5833ee69..5e258dfdc07b1 100644 --- a/go/arrow/array/json_reader_test.go +++ b/go/arrow/array/json_reader_test.go @@ -20,9 +20,9 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/list.go b/go/arrow/array/list.go index 16b7cf1bc05a9..1e2191f2cfc3a 100644 --- a/go/arrow/array/list.go +++ b/go/arrow/array/list.go @@ -22,11 +22,11 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) type ListLike interface { diff --git a/go/arrow/array/list_test.go b/go/arrow/array/list_test.go index 5b624dccfc91a..f6f42a31299e4 100644 --- a/go/arrow/array/list_test.go +++ b/go/arrow/array/list_test.go @@ -20,9 +20,9 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/map.go b/go/arrow/array/map.go index eb11f963c5837..a692c2cd6d71a 100644 --- a/go/arrow/array/map.go +++ b/go/arrow/array/map.go @@ -20,9 +20,9 @@ import ( "bytes" "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) // Map represents an immutable sequence of Key/Value structs. It is a diff --git a/go/arrow/array/map_test.go b/go/arrow/array/map_test.go index cbea072e09045..e73508e6afe11 100644 --- a/go/arrow/array/map_test.go +++ b/go/arrow/array/map_test.go @@ -20,9 +20,9 @@ import ( "strconv" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/null.go b/go/arrow/array/null.go index 365964ebb0a2f..6dccd3af59f2a 100644 --- a/go/arrow/array/null.go +++ b/go/arrow/array/null.go @@ -23,10 +23,10 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) // Null represents an immutable, degenerate array with no physical storage. diff --git a/go/arrow/array/null_test.go b/go/arrow/array/null_test.go index ae645f9564220..61ccb472b1f7b 100644 --- a/go/arrow/array/null_test.go +++ b/go/arrow/array/null_test.go @@ -19,9 +19,9 @@ package array_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/numeric.gen.go b/go/arrow/array/numeric.gen.go index b962cda40b8b3..413a356c2a8ab 100644 --- a/go/arrow/array/numeric.gen.go +++ b/go/arrow/array/numeric.gen.go @@ -24,8 +24,8 @@ import ( "strconv" "strings" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/internal/json" ) // A type which represents an immutable sequence of int64 values. diff --git a/go/arrow/array/numeric.gen.go.tmpl b/go/arrow/array/numeric.gen.go.tmpl index d9daa55849619..1f4b56609f464 100644 --- a/go/arrow/array/numeric.gen.go.tmpl +++ b/go/arrow/array/numeric.gen.go.tmpl @@ -21,8 +21,8 @@ import ( "strings" "time" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/internal/json" ) {{range .In}} diff --git a/go/arrow/array/numeric_test.go b/go/arrow/array/numeric_test.go index 3013d45acbb2b..bb8acc3f41519 100644 --- a/go/arrow/array/numeric_test.go +++ b/go/arrow/array/numeric_test.go @@ -21,11 +21,11 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/numericbuilder.gen.go b/go/arrow/array/numericbuilder.gen.go index 227fcb2f68e83..c80f0c7c9578e 100644 --- a/go/arrow/array/numericbuilder.gen.go +++ b/go/arrow/array/numericbuilder.gen.go @@ -27,11 +27,11 @@ import ( "sync/atomic" "time" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) type Int64Builder struct { diff --git a/go/arrow/array/numericbuilder.gen.go.tmpl b/go/arrow/array/numericbuilder.gen.go.tmpl index c701c49c4210d..d8b92cf60cc39 100644 --- a/go/arrow/array/numericbuilder.gen.go.tmpl +++ b/go/arrow/array/numericbuilder.gen.go.tmpl @@ -17,11 +17,11 @@ package array import ( - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) {{range .In}} diff --git a/go/arrow/array/numericbuilder.gen_test.go b/go/arrow/array/numericbuilder.gen_test.go index b43aa7f807090..8adf86853b7c7 100644 --- a/go/arrow/array/numericbuilder.gen_test.go +++ b/go/arrow/array/numericbuilder.gen_test.go @@ -22,9 +22,9 @@ import ( "math" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/numericbuilder.gen_test.go.tmpl b/go/arrow/array/numericbuilder.gen_test.go.tmpl index 9f381aa055fac..f3cd08a63745d 100644 --- a/go/arrow/array/numericbuilder.gen_test.go.tmpl +++ b/go/arrow/array/numericbuilder.gen_test.go.tmpl @@ -19,9 +19,9 @@ package array_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/record.go b/go/arrow/array/record.go index 93543f929ed7f..2735f1baa9a30 100644 --- a/go/arrow/array/record.go +++ b/go/arrow/array/record.go @@ -22,10 +22,10 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) // RecordReader reads a stream of records. diff --git a/go/arrow/array/record_test.go b/go/arrow/array/record_test.go index 6712a1c9085be..8e6dc3b06d25e 100644 --- a/go/arrow/array/record_test.go +++ b/go/arrow/array/record_test.go @@ -21,9 +21,9 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/string.go b/go/arrow/array/string.go index f4735f2cb5e0e..88b4568ad5e84 100644 --- a/go/arrow/array/string.go +++ b/go/arrow/array/string.go @@ -23,9 +23,9 @@ import ( "strings" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) type StringLike interface { diff --git a/go/arrow/array/string_test.go b/go/arrow/array/string_test.go index e9d9e061b4c52..efbe51edd1a03 100644 --- a/go/arrow/array/string_test.go +++ b/go/arrow/array/string_test.go @@ -21,10 +21,10 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/struct.go b/go/arrow/array/struct.go index 1921ee86dd894..279ac1d87b25b 100644 --- a/go/arrow/array/struct.go +++ b/go/arrow/array/struct.go @@ -23,11 +23,11 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) // Struct represents an ordered sequence of relative types. diff --git a/go/arrow/array/struct_test.go b/go/arrow/array/struct_test.go index 03190585fadf0..4338bbd0b136e 100644 --- a/go/arrow/array/struct_test.go +++ b/go/arrow/array/struct_test.go @@ -20,9 +20,9 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/table.go b/go/arrow/array/table.go index 54299ba6b1922..3b742ae78803d 100644 --- a/go/arrow/array/table.go +++ b/go/arrow/array/table.go @@ -23,8 +23,8 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) // NewColumnSlice returns a new zero-copy slice of the column with the indicated diff --git a/go/arrow/array/table_test.go b/go/arrow/array/table_test.go index 2b9aaa7f352bf..e8357ac3dfb69 100644 --- a/go/arrow/array/table_test.go +++ b/go/arrow/array/table_test.go @@ -22,9 +22,9 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" ) func TestChunked(t *testing.T) { diff --git a/go/arrow/array/timestamp.go b/go/arrow/array/timestamp.go index dfd62ad68e568..679d9a5a8a4cc 100644 --- a/go/arrow/array/timestamp.go +++ b/go/arrow/array/timestamp.go @@ -24,11 +24,11 @@ import ( "sync/atomic" "time" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) // Timestamp represents an immutable sequence of arrow.Timestamp values. diff --git a/go/arrow/array/timestamp_test.go b/go/arrow/array/timestamp_test.go index 38ab9279f6785..cb9f957d3f255 100644 --- a/go/arrow/array/timestamp_test.go +++ b/go/arrow/array/timestamp_test.go @@ -20,9 +20,9 @@ import ( "testing" "time" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/union.go b/go/arrow/array/union.go index 10dc560348dae..5d2a8b8ecb2f0 100644 --- a/go/arrow/array/union.go +++ b/go/arrow/array/union.go @@ -25,12 +25,12 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/bitutils" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/bitutils" + "github.com/apache/arrow/go/v18/internal/json" ) // Union is a convenience interface to encompass both Sparse and Dense diff --git a/go/arrow/array/union_test.go b/go/arrow/array/union_test.go index d848340a96070..43e7afd693b6c 100644 --- a/go/arrow/array/union_test.go +++ b/go/arrow/array/union_test.go @@ -21,9 +21,9 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) diff --git a/go/arrow/array/util.go b/go/arrow/array/util.go index e82eb24679bc4..2b41dadaf4bfc 100644 --- a/go/arrow/array/util.go +++ b/go/arrow/array/util.go @@ -22,11 +22,11 @@ import ( "io" "strings" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/hashing" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/hashing" + "github.com/apache/arrow/go/v18/internal/json" ) func min(a, b int) int { diff --git a/go/arrow/array/util_test.go b/go/arrow/array/util_test.go index 2f395b03ffbbb..114ea6e546649 100644 --- a/go/arrow/array/util_test.go +++ b/go/arrow/array/util_test.go @@ -25,13 +25,13 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/internal/arrdata" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/internal/arrdata" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/arrow/arrio/arrio.go b/go/arrow/arrio/arrio.go index 74bebd7ebe691..53215c81f75eb 100644 --- a/go/arrow/arrio/arrio.go +++ b/go/arrow/arrio/arrio.go @@ -22,7 +22,7 @@ import ( "errors" "io" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) // Reader is the interface that wraps the Read method. diff --git a/go/arrow/arrio/arrio_test.go b/go/arrow/arrio/arrio_test.go index 09d47c50c8e23..26863ec252bf7 100644 --- a/go/arrow/arrio/arrio_test.go +++ b/go/arrow/arrio/arrio_test.go @@ -22,11 +22,11 @@ import ( "os" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/arrio" - "github.com/apache/arrow/go/v17/arrow/internal/arrdata" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/arrio" + "github.com/apache/arrow/go/v18/arrow/internal/arrdata" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" ) type copyKind int diff --git a/go/arrow/avro/avro2parquet/main.go b/go/arrow/avro/avro2parquet/main.go index 6a74940ffe2c1..ae514c5ed1fda 100644 --- a/go/arrow/avro/avro2parquet/main.go +++ b/go/arrow/avro/avro2parquet/main.go @@ -26,10 +26,10 @@ import ( "runtime/pprof" "time" - "github.com/apache/arrow/go/v17/arrow/avro" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/compress" - pq "github.com/apache/arrow/go/v17/parquet/pqarrow" + "github.com/apache/arrow/go/v18/arrow/avro" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/compress" + pq "github.com/apache/arrow/go/v18/parquet/pqarrow" ) var ( diff --git a/go/arrow/avro/reader.go b/go/arrow/avro/reader.go index e4a3aefd96da1..1463041499de2 100644 --- a/go/arrow/avro/reader.go +++ b/go/arrow/avro/reader.go @@ -23,10 +23,10 @@ import ( "io" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/hamba/avro/v2/ocf" "github.com/tidwall/sjson" diff --git a/go/arrow/avro/reader_test.go b/go/arrow/avro/reader_test.go index 3a02c995970a2..2cb1a7caa801c 100644 --- a/go/arrow/avro/reader_test.go +++ b/go/arrow/avro/reader_test.go @@ -20,7 +20,7 @@ import ( "fmt" "testing" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" hamba "github.com/hamba/avro/v2" ) diff --git a/go/arrow/avro/reader_types.go b/go/arrow/avro/reader_types.go index 3c3ebd3147053..e07cd380d511f 100644 --- a/go/arrow/avro/reader_types.go +++ b/go/arrow/avro/reader_types.go @@ -23,12 +23,12 @@ import ( "fmt" "math/big" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/types" ) type dataLoader struct { diff --git a/go/arrow/avro/schema.go b/go/arrow/avro/schema.go index 6e2ac00124df9..007dad06c19cd 100644 --- a/go/arrow/avro/schema.go +++ b/go/arrow/avro/schema.go @@ -22,10 +22,10 @@ import ( "math" "strconv" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/internal/types" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/internal/types" + "github.com/apache/arrow/go/v18/internal/utils" avro "github.com/hamba/avro/v2" ) diff --git a/go/arrow/avro/schema_test.go b/go/arrow/avro/schema_test.go index ee4525707aadf..395abcb694d84 100644 --- a/go/arrow/avro/schema_test.go +++ b/go/arrow/avro/schema_test.go @@ -20,7 +20,7 @@ import ( "fmt" "testing" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" hamba "github.com/hamba/avro/v2" ) diff --git a/go/arrow/bitutil/bitmaps.go b/go/arrow/bitutil/bitmaps.go index eb3210043537b..fb4fcd597b804 100644 --- a/go/arrow/bitutil/bitmaps.go +++ b/go/arrow/bitutil/bitmaps.go @@ -22,9 +22,9 @@ import ( "math/bits" "unsafe" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" ) // BitmapReader is a simple bitmap reader for a byte slice. diff --git a/go/arrow/bitutil/bitmaps_test.go b/go/arrow/bitutil/bitmaps_test.go index 8d6bfdd1dde28..726bfa050cc4b 100644 --- a/go/arrow/bitutil/bitmaps_test.go +++ b/go/arrow/bitutil/bitmaps_test.go @@ -22,8 +22,8 @@ import ( "strconv" "testing" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) diff --git a/go/arrow/bitutil/bitutil.go b/go/arrow/bitutil/bitutil.go index a6497196fe15a..c4b633c73aa40 100644 --- a/go/arrow/bitutil/bitutil.go +++ b/go/arrow/bitutil/bitutil.go @@ -21,7 +21,7 @@ import ( "math/bits" "unsafe" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/memory" ) var ( diff --git a/go/arrow/bitutil/bitutil_test.go b/go/arrow/bitutil/bitutil_test.go index fc5be55113b99..c03bf5268a5ff 100644 --- a/go/arrow/bitutil/bitutil_test.go +++ b/go/arrow/bitutil/bitutil_test.go @@ -21,8 +21,8 @@ import ( "math/rand" "testing" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/testing/tools" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/testing/tools" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/cdata/cdata.go b/go/arrow/cdata/cdata.go index 157df47adedc8..0562eaed0fb7a 100644 --- a/go/arrow/cdata/cdata.go +++ b/go/arrow/cdata/cdata.go @@ -50,10 +50,10 @@ import ( "syscall" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" "golang.org/x/xerrors" ) diff --git a/go/arrow/cdata/cdata_exports.go b/go/arrow/cdata/cdata_exports.go index b971cb21de1b9..59775926d7ef8 100644 --- a/go/arrow/cdata/cdata_exports.go +++ b/go/arrow/cdata/cdata_exports.go @@ -44,11 +44,11 @@ import ( "strings" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/arrow/internal" - "github.com/apache/arrow/go/v17/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/internal" + "github.com/apache/arrow/go/v18/arrow/ipc" ) func encodeCMetadata(keys, values []string) []byte { diff --git a/go/arrow/cdata/cdata_test.go b/go/arrow/cdata/cdata_test.go index a200e2db45145..3563aeb5f0f1e 100644 --- a/go/arrow/cdata/cdata_test.go +++ b/go/arrow/cdata/cdata_test.go @@ -35,12 +35,12 @@ import ( "time" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/internal/arrdata" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/memory/mallocator" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/internal/arrdata" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/memory/mallocator" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/cdata/cdata_test_framework.go b/go/arrow/cdata/cdata_test_framework.go index caa1208a20ae5..968b28b4e4afb 100644 --- a/go/arrow/cdata/cdata_test_framework.go +++ b/go/arrow/cdata/cdata_test_framework.go @@ -69,10 +69,10 @@ import ( "runtime/cgo" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/internal" - "github.com/apache/arrow/go/v17/arrow/memory/mallocator" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/internal" + "github.com/apache/arrow/go/v18/arrow/memory/mallocator" ) const ( diff --git a/go/arrow/cdata/exports.go b/go/arrow/cdata/exports.go index 40f5fb2023558..6dbcde831d889 100644 --- a/go/arrow/cdata/exports.go +++ b/go/arrow/cdata/exports.go @@ -20,8 +20,8 @@ import ( "runtime/cgo" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" ) // #include diff --git a/go/arrow/cdata/import_allocator.go b/go/arrow/cdata/import_allocator.go index ad107902eb8ba..4e5c2a7b38c72 100644 --- a/go/arrow/cdata/import_allocator.go +++ b/go/arrow/cdata/import_allocator.go @@ -20,7 +20,7 @@ import ( "sync/atomic" "unsafe" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) // #include "arrow/c/helpers.h" diff --git a/go/arrow/cdata/interface.go b/go/arrow/cdata/interface.go index ba821896168e2..005dda73ff0ec 100644 --- a/go/arrow/cdata/interface.go +++ b/go/arrow/cdata/interface.go @@ -22,10 +22,10 @@ package cdata import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/arrio" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/arrio" + "github.com/apache/arrow/go/v18/arrow/memory" "golang.org/x/xerrors" ) diff --git a/go/arrow/cdata/test/test_cimport.go b/go/arrow/cdata/test/test_cimport.go index 83ce27ece0d9f..5315853fc59ca 100644 --- a/go/arrow/cdata/test/test_cimport.go +++ b/go/arrow/cdata/test/test_cimport.go @@ -23,10 +23,10 @@ import ( "fmt" "runtime" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/cdata" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/cdata" + "github.com/apache/arrow/go/v18/arrow/memory" ) // #include diff --git a/go/arrow/compute/arithmetic.go b/go/arrow/compute/arithmetic.go index e152413cd3eed..51ca027d53375 100644 --- a/go/arrow/compute/arithmetic.go +++ b/go/arrow/compute/arithmetic.go @@ -22,12 +22,12 @@ import ( "context" "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/compute/internal/kernels" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/scalar" ) type ( diff --git a/go/arrow/compute/arithmetic_test.go b/go/arrow/compute/arithmetic_test.go index 307fa4389544b..6e693481a322c 100644 --- a/go/arrow/compute/arithmetic_test.go +++ b/go/arrow/compute/arithmetic_test.go @@ -26,16 +26,16 @@ import ( "testing" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/compute/internal/kernels" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/internal/testing/gen" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/internal/testing/gen" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/klauspost/cpuid/v2" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" diff --git a/go/arrow/compute/cast.go b/go/arrow/compute/cast.go index dc2a6d022b2a1..6ef6fdddd16ff 100644 --- a/go/arrow/compute/cast.go +++ b/go/arrow/compute/cast.go @@ -23,11 +23,11 @@ import ( "fmt" "sync" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/compute/internal/kernels" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" ) var ( diff --git a/go/arrow/compute/cast_test.go b/go/arrow/compute/cast_test.go index 11b9587e9bdca..2e748a2fee9c2 100644 --- a/go/arrow/compute/cast_test.go +++ b/go/arrow/compute/cast_test.go @@ -26,16 +26,16 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/internal/testing/gen" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/internal/testing/gen" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" + "github.com/apache/arrow/go/v18/internal/types" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" diff --git a/go/arrow/compute/datum.go b/go/arrow/compute/datum.go index 40d9aa5db8601..9619fe09610de 100644 --- a/go/arrow/compute/datum.go +++ b/go/arrow/compute/datum.go @@ -21,9 +21,9 @@ package compute import ( "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/scalar" ) //go:generate go run golang.org/x/tools/cmd/stringer -type=DatumKind -linecomment diff --git a/go/arrow/compute/example_test.go b/go/arrow/compute/example_test.go index 8d157aa40ac25..d427fb622d24a 100644 --- a/go/arrow/compute/example_test.go +++ b/go/arrow/compute/example_test.go @@ -23,11 +23,11 @@ import ( "fmt" "log" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/memory" ) // This example demonstrates how to register a custom scalar function. diff --git a/go/arrow/compute/exec.go b/go/arrow/compute/exec.go index e3e3fc0e5b887..1142297c1c396 100644 --- a/go/arrow/compute/exec.go +++ b/go/arrow/compute/exec.go @@ -22,9 +22,9 @@ import ( "context" "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) func haveChunkedArray(values []Datum) bool { diff --git a/go/arrow/compute/exec/kernel.go b/go/arrow/compute/exec/kernel.go index 657f18cb87373..600e52c681686 100644 --- a/go/arrow/compute/exec/kernel.go +++ b/go/arrow/compute/exec/kernel.go @@ -24,10 +24,10 @@ import ( "hash/maphash" "strings" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" "golang.org/x/exp/slices" ) diff --git a/go/arrow/compute/exec/kernel_test.go b/go/arrow/compute/exec/kernel_test.go index 13beb62d3d704..248bad323a307 100644 --- a/go/arrow/compute/exec/kernel_test.go +++ b/go/arrow/compute/exec/kernel_test.go @@ -22,12 +22,12 @@ import ( "fmt" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/compute/exec/span.go b/go/arrow/compute/exec/span.go index d62306e663882..6156acfd008aa 100644 --- a/go/arrow/compute/exec/span.go +++ b/go/arrow/compute/exec/span.go @@ -22,11 +22,11 @@ import ( "sync/atomic" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" ) // BufferSpan is a lightweight Buffer holder for ArraySpans that does not diff --git a/go/arrow/compute/exec/span_test.go b/go/arrow/compute/exec/span_test.go index 79452965b8f81..f5beb45ee1494 100644 --- a/go/arrow/compute/exec/span_test.go +++ b/go/arrow/compute/exec/span_test.go @@ -24,14 +24,14 @@ import ( "testing" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" + "github.com/apache/arrow/go/v18/internal/types" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/compute/exec/utils.go b/go/arrow/compute/exec/utils.go index 88390eef9cdef..832f93f13165d 100644 --- a/go/arrow/compute/exec/utils.go +++ b/go/arrow/compute/exec/utils.go @@ -24,10 +24,10 @@ import ( "sync/atomic" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" "golang.org/x/exp/constraints" "golang.org/x/exp/slices" ) diff --git a/go/arrow/compute/exec/utils_test.go b/go/arrow/compute/exec/utils_test.go index 82b2545b5ce61..b8b7212b538c5 100644 --- a/go/arrow/compute/exec/utils_test.go +++ b/go/arrow/compute/exec/utils_test.go @@ -21,10 +21,10 @@ package exec_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/compute/exec_internals_test.go b/go/arrow/compute/exec_internals_test.go index 34f14e69d06d4..f0c585f557ebc 100644 --- a/go/arrow/compute/exec_internals_test.go +++ b/go/arrow/compute/exec_internals_test.go @@ -24,13 +24,13 @@ import ( "fmt" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/internal/testing/gen" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/internal/testing/gen" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/stretchr/testify/suite" ) diff --git a/go/arrow/compute/exec_test.go b/go/arrow/compute/exec_test.go index 40b5e55ecb8d2..27f6676f3187c 100644 --- a/go/arrow/compute/exec_test.go +++ b/go/arrow/compute/exec_test.go @@ -22,12 +22,12 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/stretchr/testify/suite" ) diff --git a/go/arrow/compute/executor.go b/go/arrow/compute/executor.go index 3e605db305013..1d197e4220ab2 100644 --- a/go/arrow/compute/executor.go +++ b/go/arrow/compute/executor.go @@ -25,14 +25,14 @@ import ( "runtime" "sync" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/internal" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/internal" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" ) // ExecCtx holds simple contextual information for execution diff --git a/go/arrow/compute/expression.go b/go/arrow/compute/expression.go index 8dde6927a7c02..f6aadeda5634b 100644 --- a/go/arrow/compute/expression.go +++ b/go/arrow/compute/expression.go @@ -28,14 +28,14 @@ import ( "strconv" "strings" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/compute/internal/kernels" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" ) var hashSeed = maphash.MakeSeed() diff --git a/go/arrow/compute/expression_test.go b/go/arrow/compute/expression_test.go index 4f3188ea82d9f..1898bb3dc92b2 100644 --- a/go/arrow/compute/expression_test.go +++ b/go/arrow/compute/expression_test.go @@ -22,11 +22,11 @@ package compute_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/compute/exprs/builders.go b/go/arrow/compute/exprs/builders.go index 525aa2ade3fe5..a3af8dd6f287d 100644 --- a/go/arrow/compute/exprs/builders.go +++ b/go/arrow/compute/exprs/builders.go @@ -25,8 +25,8 @@ import ( "strings" "unicode" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute" "github.com/substrait-io/substrait-go/expr" "github.com/substrait-io/substrait-go/extensions" "github.com/substrait-io/substrait-go/types" diff --git a/go/arrow/compute/exprs/builders_test.go b/go/arrow/compute/exprs/builders_test.go index 167a4378f9dfa..21ad3bd642030 100644 --- a/go/arrow/compute/exprs/builders_test.go +++ b/go/arrow/compute/exprs/builders_test.go @@ -21,8 +21,8 @@ package exprs_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute/exprs" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute/exprs" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/substrait-io/substrait-go/expr" diff --git a/go/arrow/compute/exprs/exec.go b/go/arrow/compute/exprs/exec.go index 28c360a2e7dec..850acbb3cd492 100644 --- a/go/arrow/compute/exprs/exec.go +++ b/go/arrow/compute/exprs/exec.go @@ -23,15 +23,15 @@ import ( "fmt" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/substrait-io/substrait-go/expr" "github.com/substrait-io/substrait-go/extensions" "github.com/substrait-io/substrait-go/types" diff --git a/go/arrow/compute/exprs/exec_internal_test.go b/go/arrow/compute/exprs/exec_internal_test.go index e38ebb9fb8bd7..450db139e9357 100644 --- a/go/arrow/compute/exprs/exec_internal_test.go +++ b/go/arrow/compute/exprs/exec_internal_test.go @@ -23,10 +23,10 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/arrow/compute/exprs/exec_test.go b/go/arrow/compute/exprs/exec_test.go index 038a57a8408f8..b74f80057a0d7 100644 --- a/go/arrow/compute/exprs/exec_test.go +++ b/go/arrow/compute/exprs/exec_test.go @@ -23,12 +23,12 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/compute/exprs" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/compute/exprs" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/substrait-io/substrait-go/expr" diff --git a/go/arrow/compute/exprs/extension_types.go b/go/arrow/compute/exprs/extension_types.go index 5dd5d229b09a7..8177675592fc9 100644 --- a/go/arrow/compute/exprs/extension_types.go +++ b/go/arrow/compute/exprs/extension_types.go @@ -24,8 +24,8 @@ import ( "reflect" "strings" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" ) type simpleExtensionTypeFactory[P comparable] struct { diff --git a/go/arrow/compute/exprs/field_refs.go b/go/arrow/compute/exprs/field_refs.go index 888b7e605f8b8..0e039d9e26601 100644 --- a/go/arrow/compute/exprs/field_refs.go +++ b/go/arrow/compute/exprs/field_refs.go @@ -21,11 +21,11 @@ package exprs import ( "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/substrait-io/substrait-go/expr" ) diff --git a/go/arrow/compute/exprs/types.go b/go/arrow/compute/exprs/types.go index c231a62c28426..594a55c9041a8 100644 --- a/go/arrow/compute/exprs/types.go +++ b/go/arrow/compute/exprs/types.go @@ -24,8 +24,8 @@ import ( "strconv" "strings" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute" "github.com/substrait-io/substrait-go/expr" "github.com/substrait-io/substrait-go/extensions" "github.com/substrait-io/substrait-go/types" diff --git a/go/arrow/compute/fieldref.go b/go/arrow/compute/fieldref.go index ea8579f9b60b0..d69c7d91044c7 100644 --- a/go/arrow/compute/fieldref.go +++ b/go/arrow/compute/fieldref.go @@ -25,8 +25,8 @@ import ( "strings" "unicode" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" ) var ( diff --git a/go/arrow/compute/fieldref_hash.go b/go/arrow/compute/fieldref_hash.go index 5da85254e314b..21ef88f1ecb4f 100644 --- a/go/arrow/compute/fieldref_hash.go +++ b/go/arrow/compute/fieldref_hash.go @@ -23,7 +23,7 @@ import ( "math/bits" "unsafe" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) func (f FieldPath) hash(h *maphash.Hash) { diff --git a/go/arrow/compute/fieldref_test.go b/go/arrow/compute/fieldref_test.go index 45ca64acbcac2..ce2051f942271 100644 --- a/go/arrow/compute/fieldref_test.go +++ b/go/arrow/compute/fieldref_test.go @@ -19,10 +19,10 @@ package compute_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/compute/functions.go b/go/arrow/compute/functions.go index 44f54e09dee94..ebade11a8e60b 100644 --- a/go/arrow/compute/functions.go +++ b/go/arrow/compute/functions.go @@ -23,8 +23,8 @@ import ( "fmt" "strings" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute/exec" ) type Function interface { diff --git a/go/arrow/compute/functions_test.go b/go/arrow/compute/functions_test.go index 0e40c0a2f8082..31a4cf124e845 100644 --- a/go/arrow/compute/functions_test.go +++ b/go/arrow/compute/functions_test.go @@ -21,8 +21,8 @@ package compute_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/arrow/compute/internal/kernels/base_arithmetic.go b/go/arrow/compute/internal/kernels/base_arithmetic.go index 38aa073c76745..169fbba2f02af 100644 --- a/go/arrow/compute/internal/kernels/base_arithmetic.go +++ b/go/arrow/compute/internal/kernels/base_arithmetic.go @@ -24,11 +24,11 @@ import ( "math/bits" "github.com/JohnCGriffin/overflow" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/internal/debug" "golang.org/x/exp/constraints" ) diff --git a/go/arrow/compute/internal/kernels/base_arithmetic_amd64.go b/go/arrow/compute/internal/kernels/base_arithmetic_amd64.go index 902c5f341032b..b818e45dc1bb4 100644 --- a/go/arrow/compute/internal/kernels/base_arithmetic_amd64.go +++ b/go/arrow/compute/internal/kernels/base_arithmetic_amd64.go @@ -21,9 +21,9 @@ package kernels import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/internal/debug" "golang.org/x/exp/constraints" "golang.org/x/sys/cpu" ) diff --git a/go/arrow/compute/internal/kernels/base_arithmetic_avx2_amd64.go b/go/arrow/compute/internal/kernels/base_arithmetic_avx2_amd64.go index 223e6c29b755a..89384aa09fc73 100644 --- a/go/arrow/compute/internal/kernels/base_arithmetic_avx2_amd64.go +++ b/go/arrow/compute/internal/kernels/base_arithmetic_avx2_amd64.go @@ -21,7 +21,7 @@ package kernels import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) //go:noescape diff --git a/go/arrow/compute/internal/kernels/base_arithmetic_sse4_amd64.go b/go/arrow/compute/internal/kernels/base_arithmetic_sse4_amd64.go index 0c81f426c537b..fff54292e3b4b 100644 --- a/go/arrow/compute/internal/kernels/base_arithmetic_sse4_amd64.go +++ b/go/arrow/compute/internal/kernels/base_arithmetic_sse4_amd64.go @@ -21,7 +21,7 @@ package kernels import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) //go:noescape diff --git a/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go b/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go index 0291989d9a45b..36619106c93cf 100644 --- a/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go +++ b/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go @@ -19,8 +19,8 @@ package kernels import ( - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute/exec" "golang.org/x/exp/constraints" ) diff --git a/go/arrow/compute/internal/kernels/boolean_cast.go b/go/arrow/compute/internal/kernels/boolean_cast.go index 266b569df7b7d..66a49f2be0294 100644 --- a/go/arrow/compute/internal/kernels/boolean_cast.go +++ b/go/arrow/compute/internal/kernels/boolean_cast.go @@ -22,9 +22,9 @@ import ( "strconv" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" ) func isNonZero[T arrow.FixedWidthType](ctx *exec.KernelCtx, in []T, out []byte) error { diff --git a/go/arrow/compute/internal/kernels/cast.go b/go/arrow/compute/internal/kernels/cast.go index b7e3b59d7accc..8603d3ad1891f 100644 --- a/go/arrow/compute/internal/kernels/cast.go +++ b/go/arrow/compute/internal/kernels/cast.go @@ -19,9 +19,9 @@ package kernels import ( - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute/exec" ) type CastOptions struct { diff --git a/go/arrow/compute/internal/kernels/cast_numeric.go b/go/arrow/compute/internal/kernels/cast_numeric.go index 88c295271fe5f..41ad94d83e68b 100644 --- a/go/arrow/compute/internal/kernels/cast_numeric.go +++ b/go/arrow/compute/internal/kernels/cast_numeric.go @@ -21,7 +21,7 @@ package kernels import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) var castNumericUnsafe func(itype, otype arrow.Type, in, out []byte, len int) = castNumericGo diff --git a/go/arrow/compute/internal/kernels/cast_numeric_avx2_amd64.go b/go/arrow/compute/internal/kernels/cast_numeric_avx2_amd64.go index 186fac16bae82..925b4328002d0 100644 --- a/go/arrow/compute/internal/kernels/cast_numeric_avx2_amd64.go +++ b/go/arrow/compute/internal/kernels/cast_numeric_avx2_amd64.go @@ -21,7 +21,7 @@ package kernels import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) //go:noescape diff --git a/go/arrow/compute/internal/kernels/cast_numeric_neon_arm64.go b/go/arrow/compute/internal/kernels/cast_numeric_neon_arm64.go index 630bc06199f49..0b491244dcc44 100644 --- a/go/arrow/compute/internal/kernels/cast_numeric_neon_arm64.go +++ b/go/arrow/compute/internal/kernels/cast_numeric_neon_arm64.go @@ -21,7 +21,7 @@ package kernels import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" "golang.org/x/sys/cpu" ) diff --git a/go/arrow/compute/internal/kernels/cast_numeric_sse4_amd64.go b/go/arrow/compute/internal/kernels/cast_numeric_sse4_amd64.go index cea34a62a9489..4c19e06dc704e 100644 --- a/go/arrow/compute/internal/kernels/cast_numeric_sse4_amd64.go +++ b/go/arrow/compute/internal/kernels/cast_numeric_sse4_amd64.go @@ -21,7 +21,7 @@ package kernels import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) //go:noescape diff --git a/go/arrow/compute/internal/kernels/cast_temporal.go b/go/arrow/compute/internal/kernels/cast_temporal.go index 0ad73737439c7..183d47654ec64 100644 --- a/go/arrow/compute/internal/kernels/cast_temporal.go +++ b/go/arrow/compute/internal/kernels/cast_temporal.go @@ -24,10 +24,10 @@ import ( "time" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) const millisecondsInDay = 86400000 diff --git a/go/arrow/compute/internal/kernels/helpers.go b/go/arrow/compute/internal/kernels/helpers.go index a99670cbbb231..230a8e9112c29 100644 --- a/go/arrow/compute/internal/kernels/helpers.go +++ b/go/arrow/compute/internal/kernels/helpers.go @@ -22,13 +22,13 @@ import ( "fmt" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" - "github.com/apache/arrow/go/v17/internal/bitutils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" + "github.com/apache/arrow/go/v18/internal/bitutils" "golang.org/x/exp/constraints" ) diff --git a/go/arrow/compute/internal/kernels/numeric_cast.go b/go/arrow/compute/internal/kernels/numeric_cast.go index 286c45e41d7b2..ca3a9937594aa 100644 --- a/go/arrow/compute/internal/kernels/numeric_cast.go +++ b/go/arrow/compute/internal/kernels/numeric_cast.go @@ -23,13 +23,13 @@ import ( "strconv" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/internal/bitutils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/internal/bitutils" "golang.org/x/exp/constraints" ) diff --git a/go/arrow/compute/internal/kernels/rounding.go b/go/arrow/compute/internal/kernels/rounding.go index ab05082513679..1afe76065f4de 100644 --- a/go/arrow/compute/internal/kernels/rounding.go +++ b/go/arrow/compute/internal/kernels/rounding.go @@ -22,11 +22,11 @@ import ( "fmt" "math" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/scalar" "golang.org/x/exp/constraints" ) diff --git a/go/arrow/compute/internal/kernels/scalar_arithmetic.go b/go/arrow/compute/internal/kernels/scalar_arithmetic.go index 038cca507b379..f2b52a6bf7101 100644 --- a/go/arrow/compute/internal/kernels/scalar_arithmetic.go +++ b/go/arrow/compute/internal/kernels/scalar_arithmetic.go @@ -22,13 +22,13 @@ import ( "fmt" "time" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/scalar" ) // scalar kernel that ignores (assumed all-null inputs) and returns null diff --git a/go/arrow/compute/internal/kernels/scalar_boolean.go b/go/arrow/compute/internal/kernels/scalar_boolean.go index 7dbf76d4e847f..f23a7f568b192 100644 --- a/go/arrow/compute/internal/kernels/scalar_boolean.go +++ b/go/arrow/compute/internal/kernels/scalar_boolean.go @@ -19,9 +19,9 @@ package kernels import ( - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/scalar" ) type computeWordFN func(leftTrue, leftFalse, rightTrue, rightFalse uint64) (outValid, outData uint64) diff --git a/go/arrow/compute/internal/kernels/scalar_comparison_amd64.go b/go/arrow/compute/internal/kernels/scalar_comparison_amd64.go index 76de68a0ae44f..bf8594e8290f7 100644 --- a/go/arrow/compute/internal/kernels/scalar_comparison_amd64.go +++ b/go/arrow/compute/internal/kernels/scalar_comparison_amd64.go @@ -21,7 +21,7 @@ package kernels import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" "golang.org/x/sys/cpu" ) diff --git a/go/arrow/compute/internal/kernels/scalar_comparison_avx2_amd64.go b/go/arrow/compute/internal/kernels/scalar_comparison_avx2_amd64.go index b98b538570554..220e65cafe291 100644 --- a/go/arrow/compute/internal/kernels/scalar_comparison_avx2_amd64.go +++ b/go/arrow/compute/internal/kernels/scalar_comparison_avx2_amd64.go @@ -21,7 +21,7 @@ package kernels import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) //go:noescape diff --git a/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go b/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go index e897f767fd081..e7b4bce2362ba 100644 --- a/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go +++ b/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go @@ -18,7 +18,7 @@ package kernels -import "github.com/apache/arrow/go/v17/arrow" +import "github.com/apache/arrow/go/v18/arrow" func genCompareKernel[T arrow.NumericType](op CompareOperator) *CompareData { return genGoCompareKernel(getCmpOp[T](op)) diff --git a/go/arrow/compute/internal/kernels/scalar_comparison_sse4_amd64.go b/go/arrow/compute/internal/kernels/scalar_comparison_sse4_amd64.go index 739ee929136ea..7eea2dd1abb73 100644 --- a/go/arrow/compute/internal/kernels/scalar_comparison_sse4_amd64.go +++ b/go/arrow/compute/internal/kernels/scalar_comparison_sse4_amd64.go @@ -21,7 +21,7 @@ package kernels import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) //go:noescape diff --git a/go/arrow/compute/internal/kernels/scalar_comparisons.go b/go/arrow/compute/internal/kernels/scalar_comparisons.go index f7278d6b16e3a..dc43b74984a0d 100644 --- a/go/arrow/compute/internal/kernels/scalar_comparisons.go +++ b/go/arrow/compute/internal/kernels/scalar_comparisons.go @@ -23,14 +23,14 @@ import ( "fmt" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/scalar" - "github.com/apache/arrow/go/v17/internal/bitutils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/scalar" + "github.com/apache/arrow/go/v18/internal/bitutils" ) type binaryKernel func(left, right, out []byte, offset int) diff --git a/go/arrow/compute/internal/kernels/string_casts.go b/go/arrow/compute/internal/kernels/string_casts.go index ec64871d31263..6a50d6627140b 100644 --- a/go/arrow/compute/internal/kernels/string_casts.go +++ b/go/arrow/compute/internal/kernels/string_casts.go @@ -23,12 +23,12 @@ import ( "strconv" "unicode/utf8" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/internal/bitutils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/internal/bitutils" ) func validateUtf8Fsb(input *exec.ArraySpan) error { diff --git a/go/arrow/compute/internal/kernels/types.go b/go/arrow/compute/internal/kernels/types.go index a3df0b2db18ff..fb20ed02381fe 100644 --- a/go/arrow/compute/internal/kernels/types.go +++ b/go/arrow/compute/internal/kernels/types.go @@ -21,10 +21,10 @@ package kernels import ( "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/scalar" ) var ( diff --git a/go/arrow/compute/internal/kernels/vector_hash.go b/go/arrow/compute/internal/kernels/vector_hash.go index 6da52e075443f..57f925dc251b1 100644 --- a/go/arrow/compute/internal/kernels/vector_hash.go +++ b/go/arrow/compute/internal/kernels/vector_hash.go @@ -21,13 +21,13 @@ package kernels import ( "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/bitutils" - "github.com/apache/arrow/go/v17/internal/hashing" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/bitutils" + "github.com/apache/arrow/go/v18/internal/hashing" ) type HashState interface { diff --git a/go/arrow/compute/internal/kernels/vector_run_end_encode.go b/go/arrow/compute/internal/kernels/vector_run_end_encode.go index f9f517585699a..08f8cf44b9206 100644 --- a/go/arrow/compute/internal/kernels/vector_run_end_encode.go +++ b/go/arrow/compute/internal/kernels/vector_run_end_encode.go @@ -24,14 +24,14 @@ import ( "sort" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" ) type RunEndEncodeState struct { diff --git a/go/arrow/compute/internal/kernels/vector_selection.go b/go/arrow/compute/internal/kernels/vector_selection.go index b4fd90848a229..e96782f4cd5ea 100644 --- a/go/arrow/compute/internal/kernels/vector_selection.go +++ b/go/arrow/compute/internal/kernels/vector_selection.go @@ -22,13 +22,13 @@ import ( "fmt" "math" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/bitutils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/bitutils" ) type NullSelectionBehavior int8 diff --git a/go/arrow/compute/registry.go b/go/arrow/compute/registry.go index 8c42cc9724283..cb64c7e09de0a 100644 --- a/go/arrow/compute/registry.go +++ b/go/arrow/compute/registry.go @@ -21,7 +21,7 @@ package compute import ( "sync" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/debug" "golang.org/x/exp/maps" "golang.org/x/exp/slices" ) diff --git a/go/arrow/compute/registry_test.go b/go/arrow/compute/registry_test.go index 783f75e115333..15e561ada42d3 100644 --- a/go/arrow/compute/registry_test.go +++ b/go/arrow/compute/registry_test.go @@ -23,9 +23,9 @@ import ( "errors" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/compute/exec" "github.com/stretchr/testify/assert" "golang.org/x/exp/slices" ) diff --git a/go/arrow/compute/scalar_bool.go b/go/arrow/compute/scalar_bool.go index ed6007af9234b..39f4242163d49 100644 --- a/go/arrow/compute/scalar_bool.go +++ b/go/arrow/compute/scalar_bool.go @@ -21,9 +21,9 @@ package compute import ( "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/compute/internal/kernels" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" ) var ( diff --git a/go/arrow/compute/scalar_bool_test.go b/go/arrow/compute/scalar_bool_test.go index 2ae7f2cba532b..4b2c5d54f8ae2 100644 --- a/go/arrow/compute/scalar_bool_test.go +++ b/go/arrow/compute/scalar_bool_test.go @@ -23,11 +23,11 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/stretchr/testify/require" ) diff --git a/go/arrow/compute/scalar_compare.go b/go/arrow/compute/scalar_compare.go index 7092431a01549..8f51440bc6362 100644 --- a/go/arrow/compute/scalar_compare.go +++ b/go/arrow/compute/scalar_compare.go @@ -21,9 +21,9 @@ package compute import ( "context" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/compute/internal/kernels" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" ) type compareFunction struct { diff --git a/go/arrow/compute/scalar_compare_test.go b/go/arrow/compute/scalar_compare_test.go index 9f1115312829f..1f1b65bd0f25f 100644 --- a/go/arrow/compute/scalar_compare_test.go +++ b/go/arrow/compute/scalar_compare_test.go @@ -24,15 +24,15 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/compute/internal/kernels" - "github.com/apache/arrow/go/v17/arrow/internal/testing/gen" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" + "github.com/apache/arrow/go/v18/arrow/internal/testing/gen" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" diff --git a/go/arrow/compute/selection.go b/go/arrow/compute/selection.go index e2966189c9f6a..4aeaa8884649d 100644 --- a/go/arrow/compute/selection.go +++ b/go/arrow/compute/selection.go @@ -22,10 +22,10 @@ import ( "context" "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/compute/internal/kernels" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" "golang.org/x/sync/errgroup" ) diff --git a/go/arrow/compute/utils.go b/go/arrow/compute/utils.go index b20688539f146..909d0b68868f6 100644 --- a/go/arrow/compute/utils.go +++ b/go/arrow/compute/utils.go @@ -21,15 +21,15 @@ package compute import ( "fmt" "io" - "math" "time" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/compute/internal/kernels" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" "golang.org/x/xerrors" ) @@ -43,9 +43,9 @@ func (b *bufferWriteSeeker) Reserve(nbytes int) { if b.buf == nil { b.buf = memory.NewResizableBuffer(b.mem) } - newCap := int(math.Max(float64(b.buf.Cap()), 256)) + newCap := utils.Max(b.buf.Cap(), 256) for newCap < b.pos+nbytes { - newCap = bitutil.NextPowerOf2(newCap) + newCap = bitutil.NextPowerOf2(b.pos + nbytes) } b.buf.Reserve(newCap) } diff --git a/go/arrow/compute/vector_hash.go b/go/arrow/compute/vector_hash.go index 7fcbce19ada4c..8692a6fff074c 100644 --- a/go/arrow/compute/vector_hash.go +++ b/go/arrow/compute/vector_hash.go @@ -21,8 +21,8 @@ package compute import ( "context" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute/internal/kernels" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" ) var ( diff --git a/go/arrow/compute/vector_hash_test.go b/go/arrow/compute/vector_hash_test.go index 87a4eb6806a9b..58ff1263ca880 100644 --- a/go/arrow/compute/vector_hash_test.go +++ b/go/arrow/compute/vector_hash_test.go @@ -23,12 +23,12 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" diff --git a/go/arrow/compute/vector_run_end_test.go b/go/arrow/compute/vector_run_end_test.go index a6cc1306a625b..8c8e776feb23c 100644 --- a/go/arrow/compute/vector_run_end_test.go +++ b/go/arrow/compute/vector_run_end_test.go @@ -25,13 +25,13 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/internal/testing/gen" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/internal/testing/gen" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/suite" ) diff --git a/go/arrow/compute/vector_run_ends.go b/go/arrow/compute/vector_run_ends.go index 09b872f486a01..5dfdde4e00948 100644 --- a/go/arrow/compute/vector_run_ends.go +++ b/go/arrow/compute/vector_run_ends.go @@ -21,8 +21,8 @@ package compute import ( "context" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute/internal/kernels" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" ) var ( diff --git a/go/arrow/compute/vector_selection_test.go b/go/arrow/compute/vector_selection_test.go index 2331a2d0342b5..6fcb5c242f151 100644 --- a/go/arrow/compute/vector_selection_test.go +++ b/go/arrow/compute/vector_selection_test.go @@ -24,15 +24,15 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/compute/internal/kernels" - "github.com/apache/arrow/go/v17/arrow/internal/testing/gen" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" + "github.com/apache/arrow/go/v18/arrow/internal/testing/gen" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" + "github.com/apache/arrow/go/v18/internal/types" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" diff --git a/go/arrow/csv/common.go b/go/arrow/csv/common.go index 06fed69a77fe5..ed254ae35b353 100644 --- a/go/arrow/csv/common.go +++ b/go/arrow/csv/common.go @@ -23,8 +23,8 @@ import ( "fmt" "strings" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/memory" ) var ( diff --git a/go/arrow/csv/reader.go b/go/arrow/csv/reader.go index 46591a9a5adee..12d607b26c48d 100644 --- a/go/arrow/csv/reader.go +++ b/go/arrow/csv/reader.go @@ -29,13 +29,13 @@ import ( "time" "unicode/utf8" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" ) // Reader wraps encoding/csv.Reader and creates array.Records from a schema. diff --git a/go/arrow/csv/reader_test.go b/go/arrow/csv/reader_test.go index 65453db015a7e..b0775b9b11a96 100644 --- a/go/arrow/csv/reader_test.go +++ b/go/arrow/csv/reader_test.go @@ -25,13 +25,13 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/csv" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/csv" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/types" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/arrow/csv/transformer.go b/go/arrow/csv/transformer.go index 237437c0441e1..f99d047e3c8cf 100644 --- a/go/arrow/csv/transformer.go +++ b/go/arrow/csv/transformer.go @@ -25,8 +25,8 @@ import ( "math/big" "strconv" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" ) func (w *Writer) transformColToStringArr(typ arrow.DataType, col arrow.Array, stringsReplacer func(string) string) []string { diff --git a/go/arrow/csv/writer.go b/go/arrow/csv/writer.go index 243d83f7d847f..d0efbde170d65 100644 --- a/go/arrow/csv/writer.go +++ b/go/arrow/csv/writer.go @@ -22,7 +22,7 @@ import ( "strconv" "sync" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) // Writer wraps encoding/csv.Writer and writes arrow.Record based on a schema. diff --git a/go/arrow/csv/writer_test.go b/go/arrow/csv/writer_test.go index ef9769fc32876..be9ab961c3ef7 100644 --- a/go/arrow/csv/writer_test.go +++ b/go/arrow/csv/writer_test.go @@ -26,14 +26,14 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/csv" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/csv" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/types" "github.com/google/uuid" ) diff --git a/go/arrow/datatype.go b/go/arrow/datatype.go index 8fff5e652572b..96b7bf65505ec 100644 --- a/go/arrow/datatype.go +++ b/go/arrow/datatype.go @@ -21,7 +21,7 @@ import ( "hash/maphash" "strings" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) // Type is a logical type. They can be expressed as diff --git a/go/arrow/datatype_binary_test.go b/go/arrow/datatype_binary_test.go index 41dee140c5a44..a65d92a0f61ac 100644 --- a/go/arrow/datatype_binary_test.go +++ b/go/arrow/datatype_binary_test.go @@ -19,7 +19,7 @@ package arrow_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) func TestBinaryType(t *testing.T) { diff --git a/go/arrow/datatype_extension_test.go b/go/arrow/datatype_extension_test.go index 92fcacd620586..c3e595f523e57 100644 --- a/go/arrow/datatype_extension_test.go +++ b/go/arrow/datatype_extension_test.go @@ -20,8 +20,8 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/internal/types" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) diff --git a/go/arrow/datatype_fixedwidth.go b/go/arrow/datatype_fixedwidth.go index a1c2834b8d574..8bcae78d7d8af 100644 --- a/go/arrow/datatype_fixedwidth.go +++ b/go/arrow/datatype_fixedwidth.go @@ -22,7 +22,7 @@ import ( "sync" "time" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/internal/json" "golang.org/x/xerrors" ) diff --git a/go/arrow/datatype_fixedwidth_test.go b/go/arrow/datatype_fixedwidth_test.go index f4f3b071ff5d4..fbd1334626774 100644 --- a/go/arrow/datatype_fixedwidth_test.go +++ b/go/arrow/datatype_fixedwidth_test.go @@ -21,7 +21,7 @@ import ( "testing" "time" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/datatype_nested.go b/go/arrow/datatype_nested.go index b38983b7f2e5d..579b2c3306003 100644 --- a/go/arrow/datatype_nested.go +++ b/go/arrow/datatype_nested.go @@ -22,7 +22,7 @@ import ( "strconv" "strings" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) type ( diff --git a/go/arrow/datatype_null_test.go b/go/arrow/datatype_null_test.go index b72395843ef8b..83b3f0c44c549 100644 --- a/go/arrow/datatype_null_test.go +++ b/go/arrow/datatype_null_test.go @@ -19,7 +19,7 @@ package arrow_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) func TestNullType(t *testing.T) { diff --git a/go/arrow/datatype_viewheader.go b/go/arrow/datatype_viewheader.go index 8dd2fa4881e2f..e153251caaf03 100644 --- a/go/arrow/datatype_viewheader.go +++ b/go/arrow/datatype_viewheader.go @@ -20,9 +20,9 @@ import ( "bytes" "unsafe" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" ) const ( diff --git a/go/arrow/datatype_viewheader_inline.go b/go/arrow/datatype_viewheader_inline.go index 24f518d0e236a..2883ee380308e 100644 --- a/go/arrow/datatype_viewheader_inline.go +++ b/go/arrow/datatype_viewheader_inline.go @@ -21,7 +21,7 @@ package arrow import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) func (sh *ViewHeader) InlineString() (data string) { diff --git a/go/arrow/datatype_viewheader_inline_go1.19.go b/go/arrow/datatype_viewheader_inline_go1.19.go index 2f7299ec111b4..d72c0d6f17c2b 100644 --- a/go/arrow/datatype_viewheader_inline_go1.19.go +++ b/go/arrow/datatype_viewheader_inline_go1.19.go @@ -22,7 +22,7 @@ import ( "reflect" "unsafe" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) func (sh *ViewHeader) InlineString() (data string) { diff --git a/go/arrow/datatype_viewheader_inline_tinygo.go b/go/arrow/datatype_viewheader_inline_tinygo.go index b309c07710e3e..a342167972fe4 100644 --- a/go/arrow/datatype_viewheader_inline_tinygo.go +++ b/go/arrow/datatype_viewheader_inline_tinygo.go @@ -22,7 +22,7 @@ import ( "reflect" "unsafe" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) func (sh *ViewHeader) InlineString() (data string) { diff --git a/go/arrow/decimal128/decimal128.go b/go/arrow/decimal128/decimal128.go index e4b19797d5462..00ab253003559 100644 --- a/go/arrow/decimal128/decimal128.go +++ b/go/arrow/decimal128/decimal128.go @@ -23,7 +23,7 @@ import ( "math/big" "math/bits" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) const ( diff --git a/go/arrow/decimal128/decimal128_test.go b/go/arrow/decimal128/decimal128_test.go index 11e293ee3ebb0..18443512a36da 100644 --- a/go/arrow/decimal128/decimal128_test.go +++ b/go/arrow/decimal128/decimal128_test.go @@ -22,7 +22,7 @@ import ( "math/big" "testing" - "github.com/apache/arrow/go/v17/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal128" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/arrow/decimal256/decimal256.go b/go/arrow/decimal256/decimal256.go index d5abfc2298a40..8244d2cd8334c 100644 --- a/go/arrow/decimal256/decimal256.go +++ b/go/arrow/decimal256/decimal256.go @@ -23,8 +23,8 @@ import ( "math/big" "math/bits" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) const ( diff --git a/go/arrow/decimal256/decimal256_test.go b/go/arrow/decimal256/decimal256_test.go index 07c147bacf2f1..3d2ee8c543597 100644 --- a/go/arrow/decimal256/decimal256_test.go +++ b/go/arrow/decimal256/decimal256_test.go @@ -23,7 +23,7 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/decimal256" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/doc.go b/go/arrow/doc.go index 93e838d20e4d0..30e6b7eb8d6ea 100644 --- a/go/arrow/doc.go +++ b/go/arrow/doc.go @@ -34,7 +34,7 @@ To build with tinygo include the noasm build tag. */ package arrow -const PkgVersion = "17.0.0-SNAPSHOT" +const PkgVersion = "18.0.0-SNAPSHOT" //go:generate go run _tools/tmpl/main.go -i -data=numeric.tmpldata type_traits_numeric.gen.go.tmpl type_traits_numeric.gen_test.go.tmpl array/numeric.gen.go.tmpl array/numericbuilder.gen.go.tmpl array/bufferbuilder_numeric.gen.go.tmpl //go:generate go run _tools/tmpl/main.go -i -data=datatype_numeric.gen.go.tmpldata datatype_numeric.gen.go.tmpl tensor/numeric.gen.go.tmpl tensor/numeric.gen_test.go.tmpl diff --git a/go/arrow/encoded/ree_utils.go b/go/arrow/encoded/ree_utils.go index 0493d71f31f5c..822edd0303703 100644 --- a/go/arrow/encoded/ree_utils.go +++ b/go/arrow/encoded/ree_utils.go @@ -20,7 +20,7 @@ import ( "math" "sort" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) // FindPhysicalIndex performs a binary search on the run-ends to return diff --git a/go/arrow/encoded/ree_utils_test.go b/go/arrow/encoded/ree_utils_test.go index 99b8252d1d7c2..43a4f83b3b999 100644 --- a/go/arrow/encoded/ree_utils_test.go +++ b/go/arrow/encoded/ree_utils_test.go @@ -21,10 +21,10 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/encoded" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/encoded" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/endian/endian.go b/go/arrow/endian/endian.go index 234c58cde1d14..8ac496b996938 100644 --- a/go/arrow/endian/endian.go +++ b/go/arrow/endian/endian.go @@ -17,8 +17,8 @@ package endian import ( - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" ) type Endianness flatbuf.Endianness diff --git a/go/arrow/example_test.go b/go/arrow/example_test.go index fbb092ef5baeb..3dac447ae7c83 100644 --- a/go/arrow/example_test.go +++ b/go/arrow/example_test.go @@ -20,10 +20,10 @@ import ( "fmt" "log" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/tensor" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/tensor" ) // This example demonstrates how to build an array of int64 values using a builder and Append. diff --git a/go/arrow/flight/basic_auth_flight_test.go b/go/arrow/flight/basic_auth_flight_test.go index c602521714727..dac10e2657085 100755 --- a/go/arrow/flight/basic_auth_flight_test.go +++ b/go/arrow/flight/basic_auth_flight_test.go @@ -22,7 +22,7 @@ import ( "io" "testing" - "github.com/apache/arrow/go/v17/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/credentials/insecure" diff --git a/go/arrow/flight/client.go b/go/arrow/flight/client.go index b049072e19199..13c068e159f2b 100644 --- a/go/arrow/flight/client.go +++ b/go/arrow/flight/client.go @@ -26,7 +26,7 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow/flight/gen/flight" + "github.com/apache/arrow/go/v18/arrow/flight/gen/flight" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/metadata" diff --git a/go/arrow/flight/cookie_middleware_test.go b/go/arrow/flight/cookie_middleware_test.go index 53e60fb70b7ff..2e2d02bc21fb5 100644 --- a/go/arrow/flight/cookie_middleware_test.go +++ b/go/arrow/flight/cookie_middleware_test.go @@ -28,7 +28,7 @@ import ( "testing" "time" - "github.com/apache/arrow/go/v17/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "google.golang.org/grpc" diff --git a/go/arrow/flight/example_flight_server_test.go b/go/arrow/flight/example_flight_server_test.go index e5e684d951541..9dd7bc8efef48 100755 --- a/go/arrow/flight/example_flight_server_test.go +++ b/go/arrow/flight/example_flight_server_test.go @@ -23,7 +23,7 @@ import ( "io" "log" - "github.com/apache/arrow/go/v17/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/credentials/insecure" diff --git a/go/arrow/flight/flight_middleware_test.go b/go/arrow/flight/flight_middleware_test.go index 980987d5d4d0f..181b41ea41f2a 100755 --- a/go/arrow/flight/flight_middleware_test.go +++ b/go/arrow/flight/flight_middleware_test.go @@ -23,8 +23,8 @@ import ( sync "sync" "testing" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/internal/arrdata" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/internal/arrdata" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "google.golang.org/grpc" diff --git a/go/arrow/flight/flight_test.go b/go/arrow/flight/flight_test.go index 47c8183b3883d..fe896f39a2b21 100755 --- a/go/arrow/flight/flight_test.go +++ b/go/arrow/flight/flight_test.go @@ -23,11 +23,11 @@ import ( "io" "testing" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/internal/arrdata" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/internal/arrdata" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/credentials/insecure" diff --git a/go/arrow/flight/flightsql/client.go b/go/arrow/flight/flightsql/client.go index c6794820dc172..4a600e5253e9b 100644 --- a/go/arrow/flight/flightsql/client.go +++ b/go/arrow/flight/flightsql/client.go @@ -22,12 +22,12 @@ import ( "fmt" "io" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/flight" - pb "github.com/apache/arrow/go/v17/arrow/flight/gen/flight" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/flight" + pb "github.com/apache/arrow/go/v18/arrow/flight/gen/flight" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" "google.golang.org/grpc" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" diff --git a/go/arrow/flight/flightsql/client_test.go b/go/arrow/flight/flightsql/client_test.go index 33da79167c4ae..7604b554cbc6c 100644 --- a/go/arrow/flight/flightsql/client_test.go +++ b/go/arrow/flight/flightsql/client_test.go @@ -22,12 +22,12 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql" - pb "github.com/apache/arrow/go/v17/arrow/flight/gen/flight" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql" + pb "github.com/apache/arrow/go/v18/arrow/flight/gen/flight" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/suite" "google.golang.org/grpc" diff --git a/go/arrow/flight/flightsql/column_metadata.go b/go/arrow/flight/flightsql/column_metadata.go index ca9c633ab1e0f..60e48b5e0f5d4 100644 --- a/go/arrow/flight/flightsql/column_metadata.go +++ b/go/arrow/flight/flightsql/column_metadata.go @@ -19,7 +19,7 @@ package flightsql import ( "strconv" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) const ( diff --git a/go/arrow/flight/flightsql/driver/README.md b/go/arrow/flight/flightsql/driver/README.md index 5187c7e4248ec..802d050042c66 100644 --- a/go/arrow/flight/flightsql/driver/README.md +++ b/go/arrow/flight/flightsql/driver/README.md @@ -36,7 +36,7 @@ connection pooling, transactions combined with ease of use (see (#usage)). ## Prerequisites * Go 1.17+ -* Installation via `go get -u github.com/apache/arrow/go/v17/arrow/flight/flightsql` +* Installation via `go get -u github.com/apache/arrow/go/v18/arrow/flight/flightsql` * Backend speaking FlightSQL --------------------------------------- @@ -55,7 +55,7 @@ import ( "database/sql" "time" - _ "github.com/apache/arrow/go/v17/arrow/flight/flightsql" + _ "github.com/apache/arrow/go/v18/arrow/flight/flightsql" ) // Open the connection to an SQLite backend @@ -141,7 +141,7 @@ import ( "log" "time" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql" ) func main() { diff --git a/go/arrow/flight/flightsql/driver/config_test.go b/go/arrow/flight/flightsql/driver/config_test.go index 6984da68494be..d74f9d84d0f1e 100644 --- a/go/arrow/flight/flightsql/driver/config_test.go +++ b/go/arrow/flight/flightsql/driver/config_test.go @@ -22,7 +22,7 @@ import ( "github.com/stretchr/testify/require" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql/driver" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql/driver" ) func TestConfigTLSRegistry(t *testing.T) { diff --git a/go/arrow/flight/flightsql/driver/driver.go b/go/arrow/flight/flightsql/driver/driver.go index cd0487e7ad275..0f2b02deaca7c 100644 --- a/go/arrow/flight/flightsql/driver/driver.go +++ b/go/arrow/flight/flightsql/driver/driver.go @@ -26,11 +26,11 @@ import ( "sync" "time" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql" + "github.com/apache/arrow/go/v18/arrow/memory" "google.golang.org/grpc" "google.golang.org/grpc/credentials" diff --git a/go/arrow/flight/flightsql/driver/driver_test.go b/go/arrow/flight/flightsql/driver/driver_test.go index d4361eb2dd722..e5060ccbe33d0 100644 --- a/go/arrow/flight/flightsql/driver/driver_test.go +++ b/go/arrow/flight/flightsql/driver/driver_test.go @@ -34,13 +34,13 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql/driver" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql/example" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql/driver" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql/example" + "github.com/apache/arrow/go/v18/arrow/memory" ) const defaultTableName = "drivertest" diff --git a/go/arrow/flight/flightsql/driver/utils.go b/go/arrow/flight/flightsql/driver/utils.go index d07ed446b2bcb..a206d7753529d 100644 --- a/go/arrow/flight/flightsql/driver/utils.go +++ b/go/arrow/flight/flightsql/driver/utils.go @@ -21,8 +21,8 @@ import ( "fmt" "time" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" ) // *** GRPC helpers *** diff --git a/go/arrow/flight/flightsql/driver/utils_test.go b/go/arrow/flight/flightsql/driver/utils_test.go index 2c87075423e0a..0f6033b9282ea 100644 --- a/go/arrow/flight/flightsql/driver/utils_test.go +++ b/go/arrow/flight/flightsql/driver/utils_test.go @@ -22,12 +22,12 @@ import ( "testing" "time" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/require" ) diff --git a/go/arrow/flight/flightsql/example/cmd/sqlite_flightsql_server/main.go b/go/arrow/flight/flightsql/example/cmd/sqlite_flightsql_server/main.go index 6bce68d7949dc..529feeb04c88c 100644 --- a/go/arrow/flight/flightsql/example/cmd/sqlite_flightsql_server/main.go +++ b/go/arrow/flight/flightsql/example/cmd/sqlite_flightsql_server/main.go @@ -27,9 +27,9 @@ import ( "os" "strconv" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql/example" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql/example" ) func main() { diff --git a/go/arrow/flight/flightsql/example/sql_batch_reader.go b/go/arrow/flight/flightsql/example/sql_batch_reader.go index cb806ef1b27ab..bfd3e354b17e9 100644 --- a/go/arrow/flight/flightsql/example/sql_batch_reader.go +++ b/go/arrow/flight/flightsql/example/sql_batch_reader.go @@ -26,11 +26,11 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" "google.golang.org/protobuf/types/known/wrapperspb" diff --git a/go/arrow/flight/flightsql/example/sqlite_info.go b/go/arrow/flight/flightsql/example/sqlite_info.go index ea9d7b98aade4..6135911c7b908 100644 --- a/go/arrow/flight/flightsql/example/sqlite_info.go +++ b/go/arrow/flight/flightsql/example/sqlite_info.go @@ -20,8 +20,8 @@ package example import ( - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql" ) func SqlInfoResultMap() flightsql.SqlInfoResultMap { diff --git a/go/arrow/flight/flightsql/example/sqlite_server.go b/go/arrow/flight/flightsql/example/sqlite_server.go index b5a64d57dceb6..6a2b80e0dbc36 100644 --- a/go/arrow/flight/flightsql/example/sqlite_server.go +++ b/go/arrow/flight/flightsql/example/sqlite_server.go @@ -45,13 +45,13 @@ import ( "strings" "sync" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql/schema_ref" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql/schema_ref" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/metadata" diff --git a/go/arrow/flight/flightsql/example/sqlite_tables_schema_batch_reader.go b/go/arrow/flight/flightsql/example/sqlite_tables_schema_batch_reader.go index 4786110e232a8..373be784b9145 100644 --- a/go/arrow/flight/flightsql/example/sqlite_tables_schema_batch_reader.go +++ b/go/arrow/flight/flightsql/example/sqlite_tables_schema_batch_reader.go @@ -25,12 +25,12 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" sqlite3 "modernc.org/sqlite/lib" ) diff --git a/go/arrow/flight/flightsql/example/type_info.go b/go/arrow/flight/flightsql/example/type_info.go index cfe47b4090fe7..5e5e52cf2a4cf 100644 --- a/go/arrow/flight/flightsql/example/type_info.go +++ b/go/arrow/flight/flightsql/example/type_info.go @@ -22,10 +22,10 @@ package example import ( "strings" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql/schema_ref" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql/schema_ref" + "github.com/apache/arrow/go/v18/arrow/memory" ) func GetTypeInfoResult(mem memory.Allocator) arrow.Record { diff --git a/go/arrow/flight/flightsql/schema_ref/reference_schemas.go b/go/arrow/flight/flightsql/schema_ref/reference_schemas.go index 5b2684bae1694..d9ba7767feb83 100644 --- a/go/arrow/flight/flightsql/schema_ref/reference_schemas.go +++ b/go/arrow/flight/flightsql/schema_ref/reference_schemas.go @@ -18,7 +18,7 @@ // by FlightSQL servers and clients. package schema_ref -import "github.com/apache/arrow/go/v17/arrow" +import "github.com/apache/arrow/go/v18/arrow" var ( Catalogs = arrow.NewSchema( diff --git a/go/arrow/flight/flightsql/server.go b/go/arrow/flight/flightsql/server.go index 25bdc5f4d5bb3..b085619745c90 100644 --- a/go/arrow/flight/flightsql/server.go +++ b/go/arrow/flight/flightsql/server.go @@ -20,14 +20,14 @@ import ( "context" "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql/schema_ref" - pb "github.com/apache/arrow/go/v17/arrow/flight/gen/flight" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql/schema_ref" + pb "github.com/apache/arrow/go/v18/arrow/flight/gen/flight" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" "google.golang.org/protobuf/proto" diff --git a/go/arrow/flight/flightsql/server_test.go b/go/arrow/flight/flightsql/server_test.go index e594f8e773fd2..494dda1703fc4 100644 --- a/go/arrow/flight/flightsql/server_test.go +++ b/go/arrow/flight/flightsql/server_test.go @@ -22,13 +22,13 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql" - pb "github.com/apache/arrow/go/v17/arrow/flight/gen/flight" - "github.com/apache/arrow/go/v17/arrow/flight/session" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql" + pb "github.com/apache/arrow/go/v18/arrow/flight/gen/flight" + "github.com/apache/arrow/go/v18/arrow/flight/session" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" "google.golang.org/grpc" diff --git a/go/arrow/flight/flightsql/sql_info.go b/go/arrow/flight/flightsql/sql_info.go index 662d809955522..2cd7f91cfd70a 100644 --- a/go/arrow/flight/flightsql/sql_info.go +++ b/go/arrow/flight/flightsql/sql_info.go @@ -17,8 +17,8 @@ package flightsql import ( - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" ) const ( diff --git a/go/arrow/flight/flightsql/sqlite_server_test.go b/go/arrow/flight/flightsql/sqlite_server_test.go index eb30b0aa50695..fee2475b2b2ec 100644 --- a/go/arrow/flight/flightsql/sqlite_server_test.go +++ b/go/arrow/flight/flightsql/sqlite_server_test.go @@ -26,14 +26,14 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql/example" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql/schema_ref" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql/example" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql/schema_ref" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" "google.golang.org/grpc/codes" diff --git a/go/arrow/flight/flightsql/types.go b/go/arrow/flight/flightsql/types.go index c709a865da7bb..88840cd7d6caf 100644 --- a/go/arrow/flight/flightsql/types.go +++ b/go/arrow/flight/flightsql/types.go @@ -17,7 +17,7 @@ package flightsql import ( - pb "github.com/apache/arrow/go/v17/arrow/flight/gen/flight" + pb "github.com/apache/arrow/go/v18/arrow/flight/gen/flight" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" ) diff --git a/go/arrow/flight/record_batch_reader.go b/go/arrow/flight/record_batch_reader.go index 8332fedd2dc0a..9067e9e2982e5 100644 --- a/go/arrow/flight/record_batch_reader.go +++ b/go/arrow/flight/record_batch_reader.go @@ -21,13 +21,13 @@ import ( "fmt" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/arrio" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/arrio" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" ) // DataStreamReader is an interface for receiving flight data messages on a stream diff --git a/go/arrow/flight/record_batch_writer.go b/go/arrow/flight/record_batch_writer.go index ca0354e00bc99..27211277ab061 100644 --- a/go/arrow/flight/record_batch_writer.go +++ b/go/arrow/flight/record_batch_writer.go @@ -19,9 +19,9 @@ package flight import ( "bytes" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" ) // DataStreamWriter is an interface that represents an Arrow Flight stream diff --git a/go/arrow/flight/server.go b/go/arrow/flight/server.go index fc74ba7aa4d6e..b67e52f4357ce 100644 --- a/go/arrow/flight/server.go +++ b/go/arrow/flight/server.go @@ -23,7 +23,7 @@ import ( "os" "os/signal" - "github.com/apache/arrow/go/v17/arrow/flight/gen/flight" + "github.com/apache/arrow/go/v18/arrow/flight/gen/flight" "google.golang.org/grpc" ) diff --git a/go/arrow/flight/server_example_test.go b/go/arrow/flight/server_example_test.go index 8386147c311aa..60e5ec8f4e381 100644 --- a/go/arrow/flight/server_example_test.go +++ b/go/arrow/flight/server_example_test.go @@ -21,7 +21,7 @@ import ( "fmt" "net" - "github.com/apache/arrow/go/v17/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" "google.golang.org/grpc/health" diff --git a/go/arrow/flight/session/example_session_test.go b/go/arrow/flight/session/example_session_test.go index 7d92f27f2c840..5a9e7e83d21e0 100644 --- a/go/arrow/flight/session/example_session_test.go +++ b/go/arrow/flight/session/example_session_test.go @@ -19,9 +19,9 @@ package session_test import ( "log" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql" - "github.com/apache/arrow/go/v17/arrow/flight/session" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql" + "github.com/apache/arrow/go/v18/arrow/flight/session" "github.com/google/uuid" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" diff --git a/go/arrow/flight/session/session.go b/go/arrow/flight/session/session.go index f09a2ed620d8f..508f79befc258 100644 --- a/go/arrow/flight/session/session.go +++ b/go/arrow/flight/session/session.go @@ -36,7 +36,7 @@ import ( "net/http" "sync" - "github.com/apache/arrow/go/v17/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight" "google.golang.org/grpc" "google.golang.org/grpc/metadata" "google.golang.org/protobuf/proto" diff --git a/go/arrow/flight/session/stateful_session.go b/go/arrow/flight/session/stateful_session.go index 1a339c1bc4eb7..0fb1c8f729756 100644 --- a/go/arrow/flight/session/stateful_session.go +++ b/go/arrow/flight/session/stateful_session.go @@ -22,7 +22,7 @@ import ( "net/http" "sync" - "github.com/apache/arrow/go/v17/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight" "github.com/google/uuid" ) diff --git a/go/arrow/flight/session/stateless_session.go b/go/arrow/flight/session/stateless_session.go index 1e248a7a5e92d..d792a91f84ece 100644 --- a/go/arrow/flight/session/stateless_session.go +++ b/go/arrow/flight/session/stateless_session.go @@ -22,7 +22,7 @@ import ( "fmt" "net/http" - "github.com/apache/arrow/go/v17/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight" "google.golang.org/protobuf/proto" ) diff --git a/go/arrow/internal/arrdata/arrdata.go b/go/arrow/internal/arrdata/arrdata.go index 9a049c762a364..5111f2dbc4da6 100644 --- a/go/arrow/internal/arrdata/arrdata.go +++ b/go/arrow/internal/arrdata/arrdata.go @@ -21,14 +21,14 @@ import ( "fmt" "sort" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/types" ) var ( diff --git a/go/arrow/internal/arrdata/ioutil.go b/go/arrow/internal/arrdata/ioutil.go index 63fac86fde948..715451ad9634d 100644 --- a/go/arrow/internal/arrdata/ioutil.go +++ b/go/arrow/internal/arrdata/ioutil.go @@ -23,11 +23,11 @@ import ( "sync" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" ) // CheckArrowFile checks whether a given ARROW file contains the expected list of records. diff --git a/go/arrow/internal/arrjson/arrjson.go b/go/arrow/internal/arrjson/arrjson.go index 3f41f1b40a38e..42bbee8d5a2ec 100644 --- a/go/arrow/internal/arrjson/arrjson.go +++ b/go/arrow/internal/arrjson/arrjson.go @@ -26,16 +26,16 @@ import ( "strconv" "strings" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/internal/dictutils" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/internal/dictutils" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) type Schema struct { diff --git a/go/arrow/internal/arrjson/arrjson_test.go b/go/arrow/internal/arrjson/arrjson_test.go index 08a879e7ea3e3..7459ef8ae8f1d 100644 --- a/go/arrow/internal/arrjson/arrjson_test.go +++ b/go/arrow/internal/arrjson/arrjson_test.go @@ -22,9 +22,9 @@ import ( "os" "testing" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/internal/arrdata" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/internal/arrdata" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/internal/arrjson/option.go b/go/arrow/internal/arrjson/option.go index ec53f1e8f43b9..261bc75b64e6f 100644 --- a/go/arrow/internal/arrjson/option.go +++ b/go/arrow/internal/arrjson/option.go @@ -17,8 +17,8 @@ package arrjson import ( - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/memory" ) type config struct { diff --git a/go/arrow/internal/arrjson/reader.go b/go/arrow/internal/arrjson/reader.go index 8be44c001e180..97fe2904cbe5f 100644 --- a/go/arrow/internal/arrjson/reader.go +++ b/go/arrow/internal/arrjson/reader.go @@ -20,11 +20,11 @@ import ( "io" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/arrio" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/internal/dictutils" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/arrio" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/dictutils" + "github.com/apache/arrow/go/v18/internal/json" ) type Reader struct { diff --git a/go/arrow/internal/arrjson/writer.go b/go/arrow/internal/arrjson/writer.go index 1d1565885ba6b..25004863abe0d 100644 --- a/go/arrow/internal/arrjson/writer.go +++ b/go/arrow/internal/arrjson/writer.go @@ -20,11 +20,11 @@ import ( "fmt" "io" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/arrio" - "github.com/apache/arrow/go/v17/arrow/internal/dictutils" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/arrio" + "github.com/apache/arrow/go/v18/arrow/internal/dictutils" + "github.com/apache/arrow/go/v18/internal/json" ) const ( diff --git a/go/arrow/internal/cdata_integration/entrypoints.go b/go/arrow/internal/cdata_integration/entrypoints.go index 59e1db52b50e0..06f7cc8a41019 100644 --- a/go/arrow/internal/cdata_integration/entrypoints.go +++ b/go/arrow/internal/cdata_integration/entrypoints.go @@ -25,11 +25,11 @@ import ( "runtime" "unsafe" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/cdata" - "github.com/apache/arrow/go/v17/arrow/internal/arrjson" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/cdata" + "github.com/apache/arrow/go/v18/arrow/internal/arrjson" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" ) // #include diff --git a/go/arrow/internal/dictutils/dict.go b/go/arrow/internal/dictutils/dict.go index 76382a3dbcac2..da18c2d0e3fae 100644 --- a/go/arrow/internal/dictutils/dict.go +++ b/go/arrow/internal/dictutils/dict.go @@ -21,9 +21,9 @@ import ( "fmt" "hash/maphash" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" ) type Kind int8 diff --git a/go/arrow/internal/dictutils/dict_test.go b/go/arrow/internal/dictutils/dict_test.go index 393ad5d9e2de7..7a68ae3073ddb 100644 --- a/go/arrow/internal/dictutils/dict_test.go +++ b/go/arrow/internal/dictutils/dict_test.go @@ -20,10 +20,10 @@ import ( "fmt" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/internal/dictutils" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/internal/dictutils" + "github.com/apache/arrow/go/v18/arrow/memory" ) func TestDictMemo(t *testing.T) { diff --git a/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-client/main.go b/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-client/main.go index 8d7a3617f2ada..105491d38e936 100755 --- a/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-client/main.go +++ b/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-client/main.go @@ -22,7 +22,7 @@ import ( "fmt" "time" - "github.com/apache/arrow/go/v17/arrow/internal/flight_integration" + "github.com/apache/arrow/go/v18/arrow/internal/flight_integration" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" ) diff --git a/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-server/main.go b/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-server/main.go index 2ae02789b79e7..5de4076b708b2 100644 --- a/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-server/main.go +++ b/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-server/main.go @@ -23,7 +23,7 @@ import ( "os" "syscall" - "github.com/apache/arrow/go/v17/arrow/internal/flight_integration" + "github.com/apache/arrow/go/v18/arrow/internal/flight_integration" ) var ( diff --git a/go/arrow/internal/flight_integration/scenario.go b/go/arrow/internal/flight_integration/scenario.go index ccfc7a0ed45a3..1528bb05d9daa 100644 --- a/go/arrow/internal/flight_integration/scenario.go +++ b/go/arrow/internal/flight_integration/scenario.go @@ -31,16 +31,16 @@ import ( "strings" "time" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql/schema_ref" - "github.com/apache/arrow/go/v17/arrow/flight/session" - "github.com/apache/arrow/go/v17/arrow/internal/arrjson" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql/schema_ref" + "github.com/apache/arrow/go/v18/arrow/flight/session" + "github.com/apache/arrow/go/v18/arrow/internal/arrjson" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/types" "golang.org/x/xerrors" "google.golang.org/grpc" "google.golang.org/grpc/codes" diff --git a/go/arrow/internal/testing/gen/random_array_gen.go b/go/arrow/internal/testing/gen/random_array_gen.go index 05a8749d15dce..b4623bc4c3596 100644 --- a/go/arrow/internal/testing/gen/random_array_gen.go +++ b/go/arrow/internal/testing/gen/random_array_gen.go @@ -19,11 +19,11 @@ package gen import ( "math" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" "golang.org/x/exp/rand" "gonum.org/v1/gonum/stat/distuv" ) diff --git a/go/arrow/internal/testing/tools/bits_test.go b/go/arrow/internal/testing/tools/bits_test.go index e9a638a6b9b2a..6897485e4c702 100644 --- a/go/arrow/internal/testing/tools/bits_test.go +++ b/go/arrow/internal/testing/tools/bits_test.go @@ -20,7 +20,7 @@ import ( "fmt" "testing" - "github.com/apache/arrow/go/v17/arrow/internal/testing/tools" + "github.com/apache/arrow/go/v18/arrow/internal/testing/tools" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/internal/testing/tools/data_types.go b/go/arrow/internal/testing/tools/data_types.go index bbfa1af0f6703..545e3f8bc3a2c 100644 --- a/go/arrow/internal/testing/tools/data_types.go +++ b/go/arrow/internal/testing/tools/data_types.go @@ -21,8 +21,8 @@ package tools import ( "reflect" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/float16" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/float16" "golang.org/x/exp/constraints" ) diff --git a/go/arrow/internal/utils.go b/go/arrow/internal/utils.go index 2f1f2346f1d3b..bafd0cf0f6926 100644 --- a/go/arrow/internal/utils.go +++ b/go/arrow/internal/utils.go @@ -17,8 +17,8 @@ package internal import ( - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" ) const CurMetadataVersion = flatbuf.MetadataVersionV5 diff --git a/go/arrow/ipc/cmd/arrow-cat/main.go b/go/arrow/ipc/cmd/arrow-cat/main.go index 4faaabb05ddc1..3e8d47a86c249 100644 --- a/go/arrow/ipc/cmd/arrow-cat/main.go +++ b/go/arrow/ipc/cmd/arrow-cat/main.go @@ -63,8 +63,8 @@ import ( "log" "os" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" ) func main() { diff --git a/go/arrow/ipc/cmd/arrow-cat/main_test.go b/go/arrow/ipc/cmd/arrow-cat/main_test.go index 53b89fadaf89f..904480ed374d4 100644 --- a/go/arrow/ipc/cmd/arrow-cat/main_test.go +++ b/go/arrow/ipc/cmd/arrow-cat/main_test.go @@ -23,10 +23,10 @@ import ( "os" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/internal/arrdata" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/internal/arrdata" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" ) func TestCatStream(t *testing.T) { diff --git a/go/arrow/ipc/cmd/arrow-file-to-stream/main.go b/go/arrow/ipc/cmd/arrow-file-to-stream/main.go index 514f8d42ce3a4..e8cdcd66ea3cb 100644 --- a/go/arrow/ipc/cmd/arrow-file-to-stream/main.go +++ b/go/arrow/ipc/cmd/arrow-file-to-stream/main.go @@ -24,9 +24,9 @@ import ( "log" "os" - "github.com/apache/arrow/go/v17/arrow/arrio" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/arrio" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" ) func main() { diff --git a/go/arrow/ipc/cmd/arrow-file-to-stream/main_test.go b/go/arrow/ipc/cmd/arrow-file-to-stream/main_test.go index f671ff1d0987c..e31430f5fa392 100644 --- a/go/arrow/ipc/cmd/arrow-file-to-stream/main_test.go +++ b/go/arrow/ipc/cmd/arrow-file-to-stream/main_test.go @@ -21,8 +21,8 @@ import ( "os" "testing" - "github.com/apache/arrow/go/v17/arrow/internal/arrdata" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/internal/arrdata" + "github.com/apache/arrow/go/v18/arrow/memory" ) func TestFileToStream(t *testing.T) { diff --git a/go/arrow/ipc/cmd/arrow-json-integration-test/main.go b/go/arrow/ipc/cmd/arrow-json-integration-test/main.go index 31669f6a0f00e..b3e1dcac14119 100644 --- a/go/arrow/ipc/cmd/arrow-json-integration-test/main.go +++ b/go/arrow/ipc/cmd/arrow-json-integration-test/main.go @@ -22,12 +22,12 @@ import ( "log" "os" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/arrio" - "github.com/apache/arrow/go/v17/arrow/internal/arrjson" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/arrio" + "github.com/apache/arrow/go/v18/arrow/internal/arrjson" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/internal/types" ) func main() { diff --git a/go/arrow/ipc/cmd/arrow-json-integration-test/main_test.go b/go/arrow/ipc/cmd/arrow-json-integration-test/main_test.go index 1aaf1430b4ff8..44e6aeb472f32 100644 --- a/go/arrow/ipc/cmd/arrow-json-integration-test/main_test.go +++ b/go/arrow/ipc/cmd/arrow-json-integration-test/main_test.go @@ -20,8 +20,8 @@ import ( "os" "testing" - "github.com/apache/arrow/go/v17/arrow/internal/arrdata" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/internal/arrdata" + "github.com/apache/arrow/go/v18/arrow/memory" ) func TestIntegration(t *testing.T) { diff --git a/go/arrow/ipc/cmd/arrow-ls/main.go b/go/arrow/ipc/cmd/arrow-ls/main.go index 2f54744c4068d..f461131786d02 100644 --- a/go/arrow/ipc/cmd/arrow-ls/main.go +++ b/go/arrow/ipc/cmd/arrow-ls/main.go @@ -61,8 +61,8 @@ import ( "log" "os" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" ) func main() { diff --git a/go/arrow/ipc/cmd/arrow-ls/main_test.go b/go/arrow/ipc/cmd/arrow-ls/main_test.go index 2c123c064a638..6ec8ef862bc14 100644 --- a/go/arrow/ipc/cmd/arrow-ls/main_test.go +++ b/go/arrow/ipc/cmd/arrow-ls/main_test.go @@ -23,10 +23,10 @@ import ( "os" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/internal/arrdata" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/internal/arrdata" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" ) func TestLsStream(t *testing.T) { diff --git a/go/arrow/ipc/cmd/arrow-stream-to-file/main.go b/go/arrow/ipc/cmd/arrow-stream-to-file/main.go index 3ea635ec03a2a..5c9b768995ec9 100644 --- a/go/arrow/ipc/cmd/arrow-stream-to-file/main.go +++ b/go/arrow/ipc/cmd/arrow-stream-to-file/main.go @@ -24,9 +24,9 @@ import ( "log" "os" - "github.com/apache/arrow/go/v17/arrow/arrio" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/arrio" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" ) func main() { diff --git a/go/arrow/ipc/cmd/arrow-stream-to-file/main_test.go b/go/arrow/ipc/cmd/arrow-stream-to-file/main_test.go index 2ae0c7c64624f..13c566f3593cd 100644 --- a/go/arrow/ipc/cmd/arrow-stream-to-file/main_test.go +++ b/go/arrow/ipc/cmd/arrow-stream-to-file/main_test.go @@ -21,8 +21,8 @@ import ( "os" "testing" - "github.com/apache/arrow/go/v17/arrow/internal/arrdata" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/internal/arrdata" + "github.com/apache/arrow/go/v18/arrow/memory" ) func TestStreamToFile(t *testing.T) { diff --git a/go/arrow/ipc/compression.go b/go/arrow/ipc/compression.go index 12bb5d402d5e6..501d9b7c38159 100644 --- a/go/arrow/ipc/compression.go +++ b/go/arrow/ipc/compression.go @@ -19,9 +19,9 @@ package ipc import ( "io" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/klauspost/compress/zstd" "github.com/pierrec/lz4/v4" ) diff --git a/go/arrow/ipc/endian_swap.go b/go/arrow/ipc/endian_swap.go index af4dead45a6df..f10adf5c13e7d 100644 --- a/go/arrow/ipc/endian_swap.go +++ b/go/arrow/ipc/endian_swap.go @@ -21,9 +21,9 @@ import ( "fmt" "math/bits" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" ) // swap the endianness of the array's buffers as needed in-place to save diff --git a/go/arrow/ipc/endian_swap_test.go b/go/arrow/ipc/endian_swap_test.go index 102ae4364345f..498b7decad146 100644 --- a/go/arrow/ipc/endian_swap_test.go +++ b/go/arrow/ipc/endian_swap_test.go @@ -20,11 +20,11 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/types" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/arrow/ipc/file_reader.go b/go/arrow/ipc/file_reader.go index 031a028a558be..947bbd474328f 100644 --- a/go/arrow/ipc/file_reader.go +++ b/go/arrow/ipc/file_reader.go @@ -23,14 +23,14 @@ import ( "fmt" "io" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/arrow/internal" - "github.com/apache/arrow/go/v17/arrow/internal/dictutils" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/internal" + "github.com/apache/arrow/go/v18/arrow/internal/dictutils" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow/memory" ) // FileReader is an Arrow file reader. diff --git a/go/arrow/ipc/file_test.go b/go/arrow/ipc/file_test.go index a24f61e8c29c9..dea63579cfea6 100644 --- a/go/arrow/ipc/file_test.go +++ b/go/arrow/ipc/file_test.go @@ -21,9 +21,9 @@ import ( "os" "testing" - "github.com/apache/arrow/go/v17/arrow/internal/arrdata" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/internal/arrdata" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow/memory" ) func TestFile(t *testing.T) { diff --git a/go/arrow/ipc/file_writer.go b/go/arrow/ipc/file_writer.go index 8cea458192b85..8582c81baf2fe 100644 --- a/go/arrow/ipc/file_writer.go +++ b/go/arrow/ipc/file_writer.go @@ -21,11 +21,11 @@ import ( "fmt" "io" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/dictutils" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/dictutils" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow/memory" ) // PayloadWriter is an interface for injecting a different payloadwriter diff --git a/go/arrow/ipc/ipc.go b/go/arrow/ipc/ipc.go index b31a358a8a8e0..76d12f061efa5 100644 --- a/go/arrow/ipc/ipc.go +++ b/go/arrow/ipc/ipc.go @@ -19,10 +19,10 @@ package ipc import ( "io" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/arrio" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/arrio" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow/memory" ) const ( diff --git a/go/arrow/ipc/ipc_test.go b/go/arrow/ipc/ipc_test.go index 6348eff2e8393..7df9bc8c28bb0 100644 --- a/go/arrow/ipc/ipc_test.go +++ b/go/arrow/ipc/ipc_test.go @@ -29,10 +29,11 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" ) func TestArrow12072(t *testing.T) { @@ -620,3 +621,70 @@ func TestIpcEmptyMap(t *testing.T) { assert.Zero(t, r.Record().NumRows()) assert.True(t, arrow.TypeEqual(dt, r.Record().Column(0).DataType())) } + +// GH-41993 +func TestArrowBinaryIPCWriterTruncatedVOffsets(t *testing.T) { + var buf bytes.Buffer + buf.WriteString("apple") + buf.WriteString("pear") + buf.WriteString("banana") + values := buf.Bytes() + + offsets := []int32{5, 9, 15} // <-- only "pear" and "banana" + voffsets := arrow.Int32Traits.CastToBytes(offsets) + + validity := []byte{0} + bitutil.SetBit(validity, 0) + bitutil.SetBit(validity, 1) + + data := array.NewData( + arrow.BinaryTypes.String, + 2, // <-- only "pear" and "banana" + []*memory.Buffer{ + memory.NewBufferBytes(validity), + memory.NewBufferBytes(voffsets), + memory.NewBufferBytes(values), + }, + nil, + 0, + 0, + ) + + str := array.NewStringData(data) + require.Equal(t, 2, str.Len()) + require.Equal(t, "pear", str.Value(0)) + require.Equal(t, "banana", str.Value(1)) + + schema := arrow.NewSchema([]arrow.Field{ + { + Name: "string", + Type: arrow.BinaryTypes.String, + Nullable: true, + }, + }, nil) + record := array.NewRecord(schema, []arrow.Array{str}, 2) + + var output bytes.Buffer + writer := ipc.NewWriter(&output, ipc.WithSchema(schema)) + + require.NoError(t, writer.Write(record)) + require.NoError(t, writer.Close()) + + reader, err := ipc.NewReader(bytes.NewReader(output.Bytes()), ipc.WithSchema(schema)) + require.NoError(t, err) + defer reader.Release() + + require.True(t, reader.Next()) + require.NoError(t, reader.Err()) + + rec := reader.Record() + require.EqualValues(t, 1, rec.NumCols()) + require.EqualValues(t, 2, rec.NumRows()) + + col, ok := rec.Column(0).(*array.String) + require.True(t, ok) + require.Equal(t, "pear", col.Value(0)) + require.Equal(t, "banana", col.Value(1)) + + require.False(t, reader.Next()) +} diff --git a/go/arrow/ipc/message.go b/go/arrow/ipc/message.go index 23142d04c8229..897f031791b2b 100644 --- a/go/arrow/ipc/message.go +++ b/go/arrow/ipc/message.go @@ -22,9 +22,9 @@ import ( "io" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow/memory" ) // MetadataVersion represents the Arrow metadata version. diff --git a/go/arrow/ipc/message_test.go b/go/arrow/ipc/message_test.go index f81b0cb2845f7..e5760c6f70719 100644 --- a/go/arrow/ipc/message_test.go +++ b/go/arrow/ipc/message_test.go @@ -22,9 +22,9 @@ import ( "io" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" ) func TestMessageReaderBodyInAllocator(t *testing.T) { diff --git a/go/arrow/ipc/metadata.go b/go/arrow/ipc/metadata.go index 604adab322959..ba90c993885d6 100644 --- a/go/arrow/ipc/metadata.go +++ b/go/arrow/ipc/metadata.go @@ -23,11 +23,11 @@ import ( "io" "sort" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/arrow/internal/dictutils" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/internal/dictutils" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow/memory" flatbuffers "github.com/google/flatbuffers/go" ) diff --git a/go/arrow/ipc/metadata_test.go b/go/arrow/ipc/metadata_test.go index f24ac204129e2..33bc63c2a0068 100644 --- a/go/arrow/ipc/metadata_test.go +++ b/go/arrow/ipc/metadata_test.go @@ -21,12 +21,12 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/internal/dictutils" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/internal/dictutils" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/types" flatbuffers "github.com/google/flatbuffers/go" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/ipc/reader.go b/go/arrow/ipc/reader.go index aeb47cfbd25c5..147b22213debf 100644 --- a/go/arrow/ipc/reader.go +++ b/go/arrow/ipc/reader.go @@ -23,14 +23,14 @@ import ( "io" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/internal/dictutils" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/dictutils" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" ) // Reader reads records from an io.Reader. diff --git a/go/arrow/ipc/reader_test.go b/go/arrow/ipc/reader_test.go index 556d372881ec0..44aee19ecadd6 100644 --- a/go/arrow/ipc/reader_test.go +++ b/go/arrow/ipc/reader_test.go @@ -22,9 +22,9 @@ import ( "io" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/arrow/ipc/stream_test.go b/go/arrow/ipc/stream_test.go index 14937a3caca97..b044acff5350d 100644 --- a/go/arrow/ipc/stream_test.go +++ b/go/arrow/ipc/stream_test.go @@ -22,9 +22,9 @@ import ( "strconv" "testing" - "github.com/apache/arrow/go/v17/arrow/internal/arrdata" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/internal/arrdata" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow/memory" ) func TestStream(t *testing.T) { diff --git a/go/arrow/ipc/writer.go b/go/arrow/ipc/writer.go index f2afd2db4274c..ca4f77d35e17f 100644 --- a/go/arrow/ipc/writer.go +++ b/go/arrow/ipc/writer.go @@ -26,15 +26,15 @@ import ( "sync" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/internal/dictutils" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/dictutils" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" ) type swriter struct { @@ -853,19 +853,35 @@ func (w *recordEncoder) getZeroBasedValueOffsets(arr arrow.Array) *memory.Buffer return nil } + dataTypeWidth := arr.DataType().Layout().Buffers[1].ByteWidth + // if we have a non-zero offset, then the value offsets do not start at // zero. we must a) create a new offsets array with shifted offsets and // b) slice the values array accordingly - // + hasNonZeroOffset := data.Offset() != 0 + // or if there are more value offsets than values (the array has been sliced) // we need to trim off the trailing offsets - needsTruncateAndShift := data.Offset() != 0 || offsetBytesNeeded < voffsets.Len() + hasMoreOffsetsThanValues := offsetBytesNeeded < voffsets.Len() + + // or if the offsets do not start from the zero index, we need to shift them + // and slice the values array + var firstOffset int64 + if dataTypeWidth == 8 { + firstOffset = arrow.Int64Traits.CastFromBytes(voffsets.Bytes())[0] + } else { + firstOffset = int64(arrow.Int32Traits.CastFromBytes(voffsets.Bytes())[0]) + } + offsetsDoNotStartFromZero := firstOffset != 0 + + // determine whether the offsets array should be shifted + needsTruncateAndShift := hasNonZeroOffset || hasMoreOffsetsThanValues || offsetsDoNotStartFromZero if needsTruncateAndShift { shiftedOffsets := memory.NewResizableBuffer(w.mem) shiftedOffsets.Resize(offsetBytesNeeded) - switch arr.DataType().Layout().Buffers[1].ByteWidth { + switch dataTypeWidth { case 8: dest := arrow.Int64Traits.CastFromBytes(shiftedOffsets.Bytes()) offsets := arrow.Int64Traits.CastFromBytes(voffsets.Bytes())[data.Offset() : data.Offset()+data.Len()+1] diff --git a/go/arrow/ipc/writer_test.go b/go/arrow/ipc/writer_test.go index 4e519ed293422..e5683243e4546 100644 --- a/go/arrow/ipc/writer_test.go +++ b/go/arrow/ipc/writer_test.go @@ -24,11 +24,11 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/arrow/math/float64.go b/go/arrow/math/float64.go index 4f8fca9e0e959..b5429e50aec09 100644 --- a/go/arrow/math/float64.go +++ b/go/arrow/math/float64.go @@ -19,7 +19,7 @@ package math import ( - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) type Float64Funcs struct { diff --git a/go/arrow/math/float64_avx2_amd64.go b/go/arrow/math/float64_avx2_amd64.go index 73f0126e30f67..8f11b1f2481a3 100644 --- a/go/arrow/math/float64_avx2_amd64.go +++ b/go/arrow/math/float64_avx2_amd64.go @@ -24,7 +24,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) //go:noescape diff --git a/go/arrow/math/float64_neon_arm64.go b/go/arrow/math/float64_neon_arm64.go index 77f97f5e68bd8..c41801714ea20 100755 --- a/go/arrow/math/float64_neon_arm64.go +++ b/go/arrow/math/float64_neon_arm64.go @@ -24,7 +24,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) //go:noescape diff --git a/go/arrow/math/float64_sse4_amd64.go b/go/arrow/math/float64_sse4_amd64.go index ea1a1a009011f..bdd17559edfaf 100644 --- a/go/arrow/math/float64_sse4_amd64.go +++ b/go/arrow/math/float64_sse4_amd64.go @@ -24,7 +24,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) //go:noescape diff --git a/go/arrow/math/float64_test.go b/go/arrow/math/float64_test.go index 637db6e42b556..de1a1ef1ec3be 100644 --- a/go/arrow/math/float64_test.go +++ b/go/arrow/math/float64_test.go @@ -21,9 +21,9 @@ package math_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/math" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/math" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/math/int64.go b/go/arrow/math/int64.go index 457e9d37465d9..a7d2b76b69704 100644 --- a/go/arrow/math/int64.go +++ b/go/arrow/math/int64.go @@ -19,7 +19,7 @@ package math import ( - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) type Int64Funcs struct { diff --git a/go/arrow/math/int64_avx2_amd64.go b/go/arrow/math/int64_avx2_amd64.go index 791436adb0a15..353338d43282a 100644 --- a/go/arrow/math/int64_avx2_amd64.go +++ b/go/arrow/math/int64_avx2_amd64.go @@ -24,7 +24,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) //go:noescape diff --git a/go/arrow/math/int64_neon_arm64.go b/go/arrow/math/int64_neon_arm64.go index 6439e00e9f999..29c5a8eed6c89 100755 --- a/go/arrow/math/int64_neon_arm64.go +++ b/go/arrow/math/int64_neon_arm64.go @@ -24,7 +24,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) //go:noescape diff --git a/go/arrow/math/int64_sse4_amd64.go b/go/arrow/math/int64_sse4_amd64.go index dca70c838baf2..cf443c5f1a7ec 100644 --- a/go/arrow/math/int64_sse4_amd64.go +++ b/go/arrow/math/int64_sse4_amd64.go @@ -24,7 +24,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) //go:noescape diff --git a/go/arrow/math/int64_test.go b/go/arrow/math/int64_test.go index 4e6f808db0516..0719d6955b367 100644 --- a/go/arrow/math/int64_test.go +++ b/go/arrow/math/int64_test.go @@ -21,9 +21,9 @@ package math_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/math" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/math" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/math/type.go.tmpl b/go/arrow/math/type.go.tmpl index a4e25ae574548..28becffdb3842 100644 --- a/go/arrow/math/type.go.tmpl +++ b/go/arrow/math/type.go.tmpl @@ -17,7 +17,7 @@ package math import ( - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) {{$def := .D}} diff --git a/go/arrow/math/type_simd_amd64.go.tmpl b/go/arrow/math/type_simd_amd64.go.tmpl index 86b31e31152bc..cb11dc0ff808c 100644 --- a/go/arrow/math/type_simd_amd64.go.tmpl +++ b/go/arrow/math/type_simd_amd64.go.tmpl @@ -21,7 +21,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) {{$name := printf "%s_%s" .In.Type .D.arch}} diff --git a/go/arrow/math/type_simd_arm64.go.tmpl b/go/arrow/math/type_simd_arm64.go.tmpl index 86b31e31152bc..cb11dc0ff808c 100755 --- a/go/arrow/math/type_simd_arm64.go.tmpl +++ b/go/arrow/math/type_simd_arm64.go.tmpl @@ -21,7 +21,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) {{$name := printf "%s_%s" .In.Type .D.arch}} diff --git a/go/arrow/math/type_test.go.tmpl b/go/arrow/math/type_test.go.tmpl index 4b11c043155a6..cc3d39a4a1ad6 100644 --- a/go/arrow/math/type_test.go.tmpl +++ b/go/arrow/math/type_test.go.tmpl @@ -19,9 +19,9 @@ package math_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/math" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/math" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/math/uint64.go b/go/arrow/math/uint64.go index 3b752caecdaf9..2a24886ee5510 100644 --- a/go/arrow/math/uint64.go +++ b/go/arrow/math/uint64.go @@ -19,7 +19,7 @@ package math import ( - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) type Uint64Funcs struct { diff --git a/go/arrow/math/uint64_avx2_amd64.go b/go/arrow/math/uint64_avx2_amd64.go index a4092e2140b1e..ba6ea29b92201 100644 --- a/go/arrow/math/uint64_avx2_amd64.go +++ b/go/arrow/math/uint64_avx2_amd64.go @@ -24,7 +24,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) //go:noescape diff --git a/go/arrow/math/uint64_neon_arm64.go b/go/arrow/math/uint64_neon_arm64.go index 574bbe4064f2a..b83ca85e55701 100755 --- a/go/arrow/math/uint64_neon_arm64.go +++ b/go/arrow/math/uint64_neon_arm64.go @@ -24,7 +24,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) //go:noescape diff --git a/go/arrow/math/uint64_sse4_amd64.go b/go/arrow/math/uint64_sse4_amd64.go index fff3f3ffeeb31..a91ff4e5100d1 100644 --- a/go/arrow/math/uint64_sse4_amd64.go +++ b/go/arrow/math/uint64_sse4_amd64.go @@ -24,7 +24,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) //go:noescape diff --git a/go/arrow/math/uint64_test.go b/go/arrow/math/uint64_test.go index 3403bf5039159..20418557b2101 100644 --- a/go/arrow/math/uint64_test.go +++ b/go/arrow/math/uint64_test.go @@ -21,9 +21,9 @@ package math_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/math" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/math" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/memory/buffer.go b/go/arrow/memory/buffer.go index 55a17afcf7401..586ff387f83de 100644 --- a/go/arrow/memory/buffer.go +++ b/go/arrow/memory/buffer.go @@ -19,7 +19,7 @@ package memory import ( "sync/atomic" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) // Buffer is a wrapper type for a buffer of bytes. diff --git a/go/arrow/memory/buffer_test.go b/go/arrow/memory/buffer_test.go index d61b4e0a3bb4f..ddc5871c85379 100644 --- a/go/arrow/memory/buffer_test.go +++ b/go/arrow/memory/buffer_test.go @@ -19,7 +19,7 @@ package memory_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/memory/cgo_allocator.go b/go/arrow/memory/cgo_allocator.go index 5eb66ade9d861..c3cb0df47523d 100644 --- a/go/arrow/memory/cgo_allocator.go +++ b/go/arrow/memory/cgo_allocator.go @@ -22,7 +22,7 @@ package memory import ( "runtime" - cga "github.com/apache/arrow/go/v17/arrow/memory/internal/cgoalloc" + cga "github.com/apache/arrow/go/v18/arrow/memory/internal/cgoalloc" ) // CgoArrowAllocator is an allocator which exposes the C++ memory pool class diff --git a/go/arrow/memory/default_mallocator.go b/go/arrow/memory/default_mallocator.go index b30e7e75d8080..c1a4ed095fadf 100644 --- a/go/arrow/memory/default_mallocator.go +++ b/go/arrow/memory/default_mallocator.go @@ -19,7 +19,7 @@ package memory import ( - "github.com/apache/arrow/go/v17/arrow/memory/mallocator" + "github.com/apache/arrow/go/v18/arrow/memory/mallocator" ) // DefaultAllocator is a default implementation of Allocator and can be used anywhere diff --git a/go/arrow/memory/default_mallocator_test.go b/go/arrow/memory/default_mallocator_test.go index 8737a5224e0f4..7667de1111a31 100644 --- a/go/arrow/memory/default_mallocator_test.go +++ b/go/arrow/memory/default_mallocator_test.go @@ -21,8 +21,8 @@ package memory_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/memory/mallocator" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/memory/mallocator" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/memory/mallocator/mallocator_test.go b/go/arrow/memory/mallocator/mallocator_test.go index 4070cc32b2735..5e1482b73697f 100644 --- a/go/arrow/memory/mallocator/mallocator_test.go +++ b/go/arrow/memory/mallocator/mallocator_test.go @@ -23,7 +23,7 @@ import ( "fmt" "testing" - "github.com/apache/arrow/go/v17/arrow/memory/mallocator" + "github.com/apache/arrow/go/v18/arrow/memory/mallocator" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/memory/memory_test.go b/go/arrow/memory/memory_test.go index 35bd28e53c795..4a823494ff99b 100644 --- a/go/arrow/memory/memory_test.go +++ b/go/arrow/memory/memory_test.go @@ -19,7 +19,7 @@ package memory_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/record.go b/go/arrow/record.go index a2234d03a9f5b..b812fcd481a60 100644 --- a/go/arrow/record.go +++ b/go/arrow/record.go @@ -16,7 +16,7 @@ package arrow -import "github.com/apache/arrow/go/v17/internal/json" +import "github.com/apache/arrow/go/v18/internal/json" // Record is a collection of equal-length arrays matching a particular Schema. // Also known as a RecordBatch in the spec and in some implementations. diff --git a/go/arrow/scalar/append.go b/go/arrow/scalar/append.go index 9520514743443..3a5823775457d 100644 --- a/go/arrow/scalar/append.go +++ b/go/arrow/scalar/append.go @@ -21,11 +21,11 @@ package scalar import ( "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/float16" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/float16" "golang.org/x/exp/constraints" ) diff --git a/go/arrow/scalar/append_test.go b/go/arrow/scalar/append_test.go index bbfa5d289cf78..a8eca4ee455b4 100644 --- a/go/arrow/scalar/append_test.go +++ b/go/arrow/scalar/append_test.go @@ -23,11 +23,11 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/internal/testing/tools" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/internal/testing/tools" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" diff --git a/go/arrow/scalar/binary.go b/go/arrow/scalar/binary.go index 6e1fadb7a1256..26c153dc7a46d 100644 --- a/go/arrow/scalar/binary.go +++ b/go/arrow/scalar/binary.go @@ -21,8 +21,8 @@ import ( "fmt" "unicode/utf8" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/memory" ) type BinaryScalar interface { diff --git a/go/arrow/scalar/compare.go b/go/arrow/scalar/compare.go index f54ff7c82c84a..d32a685b819c6 100644 --- a/go/arrow/scalar/compare.go +++ b/go/arrow/scalar/compare.go @@ -16,7 +16,7 @@ package scalar -import "github.com/apache/arrow/go/v17/arrow" +import "github.com/apache/arrow/go/v18/arrow" // Equals returns true if two scalars are equal, which means they have the same // datatype, validity and value. diff --git a/go/arrow/scalar/nested.go b/go/arrow/scalar/nested.go index 484dad795e791..5f3447e686873 100644 --- a/go/arrow/scalar/nested.go +++ b/go/arrow/scalar/nested.go @@ -21,10 +21,10 @@ import ( "errors" "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" "golang.org/x/xerrors" ) diff --git a/go/arrow/scalar/numeric.gen.go b/go/arrow/scalar/numeric.gen.go index 7287d4ff00932..25bdc242ae8c9 100644 --- a/go/arrow/scalar/numeric.gen.go +++ b/go/arrow/scalar/numeric.gen.go @@ -24,9 +24,9 @@ import ( "reflect" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" ) type Int8 struct { diff --git a/go/arrow/scalar/numeric.gen_test.go b/go/arrow/scalar/numeric.gen_test.go index 071af7cfc1524..9349022b65591 100644 --- a/go/arrow/scalar/numeric.gen_test.go +++ b/go/arrow/scalar/numeric.gen_test.go @@ -21,8 +21,8 @@ package scalar_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/scalar/numeric.gen_test.go.tmpl b/go/arrow/scalar/numeric.gen_test.go.tmpl index e21b4f20eeb7c..f5de3f9dcbfa3 100644 --- a/go/arrow/scalar/numeric.gen_test.go.tmpl +++ b/go/arrow/scalar/numeric.gen_test.go.tmpl @@ -19,8 +19,8 @@ package scalar_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/scalar/parse.go b/go/arrow/scalar/parse.go index ba189523fcc01..866e627113d88 100644 --- a/go/arrow/scalar/parse.go +++ b/go/arrow/scalar/parse.go @@ -25,12 +25,12 @@ import ( "strings" "time" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/memory" ) type TypeToScalar interface { diff --git a/go/arrow/scalar/scalar.go b/go/arrow/scalar/scalar.go index 7f210c0a580cf..f81465278a1e0 100644 --- a/go/arrow/scalar/scalar.go +++ b/go/arrow/scalar/scalar.go @@ -26,16 +26,16 @@ import ( "strconv" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/encoded" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/encoded" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" "golang.org/x/xerrors" ) diff --git a/go/arrow/scalar/scalar_test.go b/go/arrow/scalar/scalar_test.go index fcb88c0fb7e4f..0775eecedd027 100644 --- a/go/arrow/scalar/scalar_test.go +++ b/go/arrow/scalar/scalar_test.go @@ -25,12 +25,12 @@ import ( "testing" "time" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" diff --git a/go/arrow/scalar/temporal.go b/go/arrow/scalar/temporal.go index 718a63b6b225b..ee43f1b629c1d 100644 --- a/go/arrow/scalar/temporal.go +++ b/go/arrow/scalar/temporal.go @@ -22,7 +22,7 @@ import ( "time" "unsafe" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) func temporalToString(s TemporalScalar) string { diff --git a/go/arrow/schema.go b/go/arrow/schema.go index bdfee7f4325e9..fd6c3cf1f4025 100644 --- a/go/arrow/schema.go +++ b/go/arrow/schema.go @@ -21,7 +21,7 @@ import ( "sort" "strings" - "github.com/apache/arrow/go/v17/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/endian" ) type Metadata struct { diff --git a/go/arrow/schema_test.go b/go/arrow/schema_test.go index 3d26a769e98b5..ccdd8a02c9c2b 100644 --- a/go/arrow/schema_test.go +++ b/go/arrow/schema_test.go @@ -21,7 +21,7 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/endian" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/table.go b/go/arrow/table.go index ccf28547c2177..5ad2aa08e9341 100644 --- a/go/arrow/table.go +++ b/go/arrow/table.go @@ -20,7 +20,7 @@ import ( "fmt" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) // Table represents a logical sequence of chunked arrays of equal length. It is diff --git a/go/arrow/tensor/numeric.gen.go b/go/arrow/tensor/numeric.gen.go index 51995d9ba147f..81ae6af41b09e 100644 --- a/go/arrow/tensor/numeric.gen.go +++ b/go/arrow/tensor/numeric.gen.go @@ -19,7 +19,7 @@ package tensor import ( - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) // Int8 is an n-dim array of int8s. diff --git a/go/arrow/tensor/numeric.gen.go.tmpl b/go/arrow/tensor/numeric.gen.go.tmpl index 17c348fa2b73d..9f30297e1ac32 100644 --- a/go/arrow/tensor/numeric.gen.go.tmpl +++ b/go/arrow/tensor/numeric.gen.go.tmpl @@ -17,8 +17,8 @@ package tensor import ( - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" ) {{range .In}} diff --git a/go/arrow/tensor/numeric.gen_test.go b/go/arrow/tensor/numeric.gen_test.go index 92f4f7015edd2..8039aea39667a 100644 --- a/go/arrow/tensor/numeric.gen_test.go +++ b/go/arrow/tensor/numeric.gen_test.go @@ -23,10 +23,10 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/tensor" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/tensor" ) func TestTensorInt8(t *testing.T) { diff --git a/go/arrow/tensor/numeric.gen_test.go.tmpl b/go/arrow/tensor/numeric.gen_test.go.tmpl index c080df30e0f36..593be259ce630 100644 --- a/go/arrow/tensor/numeric.gen_test.go.tmpl +++ b/go/arrow/tensor/numeric.gen_test.go.tmpl @@ -21,10 +21,10 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/tensor" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/tensor" ) {{range .In}} diff --git a/go/arrow/tensor/tensor.go b/go/arrow/tensor/tensor.go index b19cc2b916743..067015f3a97d0 100644 --- a/go/arrow/tensor/tensor.go +++ b/go/arrow/tensor/tensor.go @@ -21,8 +21,8 @@ import ( "fmt" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) // Interface represents an n-dimensional array of numerical data. diff --git a/go/arrow/tensor/tensor_test.go b/go/arrow/tensor/tensor_test.go index 5ed420b96bba7..73547e32c3bcb 100644 --- a/go/arrow/tensor/tensor_test.go +++ b/go/arrow/tensor/tensor_test.go @@ -21,10 +21,10 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/tensor" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/tensor" ) func TestTensor(t *testing.T) { diff --git a/go/arrow/type_traits.go b/go/arrow/type_traits.go index e8e7cf174ed2d..aae6ad106487f 100644 --- a/go/arrow/type_traits.go +++ b/go/arrow/type_traits.go @@ -20,9 +20,9 @@ import ( "reflect" "unsafe" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/float16" "golang.org/x/exp/constraints" ) diff --git a/go/arrow/type_traits_boolean.go b/go/arrow/type_traits_boolean.go index 9bdc0b824e6f6..74d643ba6206e 100644 --- a/go/arrow/type_traits_boolean.go +++ b/go/arrow/type_traits_boolean.go @@ -17,7 +17,7 @@ package arrow import ( - "github.com/apache/arrow/go/v17/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/bitutil" ) type booleanTraits struct{} diff --git a/go/arrow/type_traits_decimal128.go b/go/arrow/type_traits_decimal128.go index 604c5be3f3670..c93e781d934cb 100644 --- a/go/arrow/type_traits_decimal128.go +++ b/go/arrow/type_traits_decimal128.go @@ -19,8 +19,8 @@ package arrow import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/endian" ) // Decimal128 traits diff --git a/go/arrow/type_traits_decimal256.go b/go/arrow/type_traits_decimal256.go index de4d931fed748..9ef47c31bdd04 100644 --- a/go/arrow/type_traits_decimal256.go +++ b/go/arrow/type_traits_decimal256.go @@ -19,8 +19,8 @@ package arrow import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/endian" ) // Decimal256 traits diff --git a/go/arrow/type_traits_float16.go b/go/arrow/type_traits_float16.go index 46ab2b8fc9ed8..0552932cf9b02 100644 --- a/go/arrow/type_traits_float16.go +++ b/go/arrow/type_traits_float16.go @@ -19,8 +19,8 @@ package arrow import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/float16" ) // Float16 traits diff --git a/go/arrow/type_traits_interval.go b/go/arrow/type_traits_interval.go index 48ae7eb376453..94b5274d45968 100644 --- a/go/arrow/type_traits_interval.go +++ b/go/arrow/type_traits_interval.go @@ -19,8 +19,8 @@ package arrow import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) var ( diff --git a/go/arrow/type_traits_numeric.gen.go b/go/arrow/type_traits_numeric.gen.go index bf20dc9995a70..84a32a9115805 100644 --- a/go/arrow/type_traits_numeric.gen.go +++ b/go/arrow/type_traits_numeric.gen.go @@ -22,7 +22,7 @@ import ( "math" "unsafe" - "github.com/apache/arrow/go/v17/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/endian" ) var ( diff --git a/go/arrow/type_traits_numeric.gen.go.tmpl b/go/arrow/type_traits_numeric.gen.go.tmpl index 9e5c68a2fc8fd..e74194b5260fc 100644 --- a/go/arrow/type_traits_numeric.gen.go.tmpl +++ b/go/arrow/type_traits_numeric.gen.go.tmpl @@ -20,7 +20,7 @@ import ( "math" "unsafe" - "github.com/apache/arrow/go/v17/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/endian" ) var ( diff --git a/go/arrow/type_traits_numeric.gen_test.go b/go/arrow/type_traits_numeric.gen_test.go index 90324197a3352..3d021575a6654 100644 --- a/go/arrow/type_traits_numeric.gen_test.go +++ b/go/arrow/type_traits_numeric.gen_test.go @@ -22,7 +22,7 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) func TestInt64Traits(t *testing.T) { diff --git a/go/arrow/type_traits_numeric.gen_test.go.tmpl b/go/arrow/type_traits_numeric.gen_test.go.tmpl index 440f240ab30d2..9f7118eb8ec4a 100644 --- a/go/arrow/type_traits_numeric.gen_test.go.tmpl +++ b/go/arrow/type_traits_numeric.gen_test.go.tmpl @@ -20,7 +20,7 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) {{- range .In}} diff --git a/go/arrow/type_traits_test.go b/go/arrow/type_traits_test.go index ee28b81de09a6..ec653c0b5bbae 100644 --- a/go/arrow/type_traits_test.go +++ b/go/arrow/type_traits_test.go @@ -22,10 +22,10 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/float16" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/float16" ) func TestBooleanTraits(t *testing.T) { diff --git a/go/arrow/type_traits_timestamp.go b/go/arrow/type_traits_timestamp.go index 09ef09d8ea188..e506b6f473d80 100644 --- a/go/arrow/type_traits_timestamp.go +++ b/go/arrow/type_traits_timestamp.go @@ -19,7 +19,7 @@ package arrow import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/endian" ) var TimestampTraits timestampTraits diff --git a/go/arrow/type_traits_view.go b/go/arrow/type_traits_view.go index 36c83f011251e..ef35bc0d7ec55 100644 --- a/go/arrow/type_traits_view.go +++ b/go/arrow/type_traits_view.go @@ -19,7 +19,7 @@ package arrow import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/endian" ) var ViewHeaderTraits viewHeaderTraits diff --git a/go/arrow/util/byte_size.go b/go/arrow/util/byte_size.go index 37b47886f2652..bb163fcef5fc0 100644 --- a/go/arrow/util/byte_size.go +++ b/go/arrow/util/byte_size.go @@ -17,9 +17,9 @@ package util import ( - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" ) func isArrayDataNil(arrayData arrow.ArrayData) bool { diff --git a/go/arrow/util/byte_size_test.go b/go/arrow/util/byte_size_test.go index 6fbbe9dce094b..ff6d8ea7edf0c 100644 --- a/go/arrow/util/byte_size_test.go +++ b/go/arrow/util/byte_size_test.go @@ -20,10 +20,10 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/util" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/util" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/util/protobuf_reflect.go b/go/arrow/util/protobuf_reflect.go index b4c8d68db8b0d..03153563b8cb5 100644 --- a/go/arrow/util/protobuf_reflect.go +++ b/go/arrow/util/protobuf_reflect.go @@ -20,9 +20,9 @@ import ( "fmt" "reflect" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/huandu/xstrings" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/reflect/protoreflect" diff --git a/go/arrow/util/protobuf_reflect_test.go b/go/arrow/util/protobuf_reflect_test.go index ab3cbdf9a6b13..220552df8d89e 100644 --- a/go/arrow/util/protobuf_reflect_test.go +++ b/go/arrow/util/protobuf_reflect_test.go @@ -20,10 +20,10 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/util/util_message" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/util/util_message" "github.com/huandu/xstrings" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" diff --git a/go/go.mod b/go/go.mod index 7819ab146d80d..43c2c41b69eca 100644 --- a/go/go.mod +++ b/go/go.mod @@ -14,7 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -module github.com/apache/arrow/go/v17 +module github.com/apache/arrow/go/v18 go 1.21 @@ -36,8 +36,8 @@ require ( github.com/zeebo/xxh3 v1.0.2 golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 golang.org/x/sync v0.7.0 - golang.org/x/sys v0.21.0 - golang.org/x/tools v0.22.0 + golang.org/x/sys v0.22.0 + golang.org/x/tools v0.23.0 golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 gonum.org/v1/gonum v0.15.0 google.golang.org/grpc v1.63.2 @@ -47,9 +47,9 @@ require ( require ( github.com/google/uuid v1.6.0 - github.com/hamba/avro/v2 v2.22.1 + github.com/hamba/avro/v2 v2.23.0 github.com/huandu/xstrings v1.4.0 - github.com/substrait-io/substrait-go v0.4.2 + github.com/substrait-io/substrait-go v0.5.0 github.com/tidwall/sjson v1.2.5 ) @@ -75,8 +75,8 @@ require ( github.com/tidwall/gjson v1.14.2 // indirect github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.0 // indirect - golang.org/x/mod v0.18.0 // indirect - golang.org/x/net v0.26.0 // indirect + golang.org/x/mod v0.19.0 // indirect + golang.org/x/net v0.27.0 // indirect golang.org/x/text v0.16.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go/go.sum b/go/go.sum index 581930a3909cd..a96f0a3797c74 100644 --- a/go/go.sum +++ b/go/go.sum @@ -43,8 +43,8 @@ github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbu github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26/go.mod h1:dDKJzRmX4S37WGHujM7tX//fmj1uioxKzKxz3lo4HJo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/hamba/avro/v2 v2.22.1 h1:q1rAbfJsrbMaZPDLQvwUQMfQzp6H+hGXvckmU/lXemk= -github.com/hamba/avro/v2 v2.22.1/go.mod h1:HOeTrE3kvWnBAgsufqhAzDDV5gvS0QXs65Z6BHfGgbg= +github.com/hamba/avro/v2 v2.23.0 h1:DYWz6UqNCi21JflaZlcwNfW+rK+D/CwnrWWJtfmO4vw= +github.com/hamba/avro/v2 v2.23.0/go.mod h1:7vDfy/2+kYCE8WUHoj2et59GTv0ap7ptktMXu0QHePI= github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= @@ -99,8 +99,8 @@ github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= -github.com/substrait-io/substrait-go v0.4.2 h1:buDnjsb3qAqTaNbOR7VKmNgXf4lYQxWEcnSGUWBtmN8= -github.com/substrait-io/substrait-go v0.4.2/go.mod h1:qhpnLmrcvAnlZsUyPXZRqldiHapPTXC3t7xFgDi3aQg= +github.com/substrait-io/substrait-go v0.5.0 h1:8sYsoqcrzoNpThPyot1CQpwF6OokxvplLUQJTGlKws4= +github.com/substrait-io/substrait-go v0.5.0/go.mod h1:Co7ko6iIjdqCGcN3LfkKWPVlxONkNZem9omWAGIaOrQ= github.com/tidwall/gjson v1.14.2 h1:6BBkirS0rAHjumnjHF6qgy5d2YAJ1TLIaFE2lzfOLqo= github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= @@ -113,25 +113,25 @@ github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= -golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI= -golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM= +golang.org/x/crypto v0.25.0 h1:ypSNr+bnYL2YhwoMt2zPxHFmbAN1KZs/njMG3hxUp30= +golang.org/x/crypto v0.25.0/go.mod h1:T+wALwcMOSE0kXgUAnPAHqTLW+XHgcELELW8VaDgm/M= golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 h1:LfspQV/FYTatPTr/3HzIcmiUFH7PGP+OQ6mgDYo3yuQ= golang.org/x/exp v0.0.0-20240222234643-814bf88cf225/go.mod h1:CxmFvTBINI24O/j8iY7H1xHzx2i4OsyguNBmN/uPtqc= -golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0= -golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= -golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= -golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= +golang.org/x/mod v0.19.0 h1:fEdghXQSo20giMthA7cd28ZC+jts4amQ3YMXiP5oMQ8= +golang.org/x/mod v0.19.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/net v0.27.0 h1:5K3Njcw06/l2y9vpGCSdcxWOYHOUk3dVNGDXN+FvAys= +golang.org/x/net v0.27.0/go.mod h1:dDi0PyhWNoiUOrAS8uXv/vnScO4wnHQO4mj9fn/RytE= golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= -golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= +golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= -golang.org/x/tools v0.22.0 h1:gqSGLZqv+AI9lIQzniJ0nZDRG5GBPsSi+DRNHWNz6yA= -golang.org/x/tools v0.22.0/go.mod h1:aCwcsjqvq7Yqt6TNyX7QMU2enbQ/Gt0bo6krSeEri+c= +golang.org/x/tools v0.23.0 h1:SGsXPZ+2l4JsgaCKkx+FQ9YZ5XEtA1GZYuoDjenLjvg= +golang.org/x/tools v0.23.0/go.mod h1:pnu6ufv6vQkll6szChhK3C3L/ruaIv5eBeztNG8wtsI= golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU= golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= gonum.org/v1/gonum v0.15.0 h1:2lYxjRbTYyxkJxlhC+LvJIx3SsANPdRybu1tGj9/OrQ= diff --git a/go/internal/bitutils/bit_block_counter.go b/go/internal/bitutils/bit_block_counter.go index 677c497c14d23..89e50b2dc6b1d 100644 --- a/go/internal/bitutils/bit_block_counter.go +++ b/go/internal/bitutils/bit_block_counter.go @@ -21,8 +21,8 @@ import ( "math/bits" "unsafe" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/internal/utils" ) func loadWord(byt []byte) uint64 { diff --git a/go/internal/bitutils/bit_block_counter_test.go b/go/internal/bitutils/bit_block_counter_test.go index 7ddd9ca343624..064d4b46b452f 100644 --- a/go/internal/bitutils/bit_block_counter_test.go +++ b/go/internal/bitutils/bit_block_counter_test.go @@ -19,9 +19,9 @@ package bitutils_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/bitutils" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/bitutils" "github.com/stretchr/testify/assert" "golang.org/x/exp/rand" ) diff --git a/go/internal/bitutils/bit_run_reader.go b/go/internal/bitutils/bit_run_reader.go index aaba0bed8eb19..cce6792a6d0c8 100644 --- a/go/internal/bitutils/bit_run_reader.go +++ b/go/internal/bitutils/bit_run_reader.go @@ -22,9 +22,9 @@ import ( "math/bits" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/internal/utils" ) // BitRun represents a run of bits with the same value of length Len diff --git a/go/internal/bitutils/bit_run_reader_test.go b/go/internal/bitutils/bit_run_reader_test.go index 528fef09c313b..e3e53c92621cd 100644 --- a/go/internal/bitutils/bit_run_reader_test.go +++ b/go/internal/bitutils/bit_run_reader_test.go @@ -21,9 +21,9 @@ import ( "testing" "unsafe" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/internal/bitutils" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/internal/bitutils" "github.com/stretchr/testify/assert" ) diff --git a/go/internal/bitutils/bit_set_run_reader.go b/go/internal/bitutils/bit_set_run_reader.go index f84d7d975dba4..2c6a39f5352e6 100644 --- a/go/internal/bitutils/bit_set_run_reader.go +++ b/go/internal/bitutils/bit_set_run_reader.go @@ -20,8 +20,8 @@ import ( "encoding/binary" "math/bits" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/internal/utils" ) // IsMultipleOf64 returns whether v is a multiple of 64. diff --git a/go/internal/bitutils/bit_set_run_reader_test.go b/go/internal/bitutils/bit_set_run_reader_test.go index 322906804cae8..c42f8b0d6dce7 100644 --- a/go/internal/bitutils/bit_set_run_reader_test.go +++ b/go/internal/bitutils/bit_set_run_reader_test.go @@ -20,9 +20,9 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/internal/bitutils" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/internal/bitutils" + "github.com/apache/arrow/go/v18/internal/utils" "github.com/stretchr/testify/suite" ) diff --git a/go/internal/bitutils/bitmap_generate.go b/go/internal/bitutils/bitmap_generate.go index 1871b9570092e..a6d43b4622f93 100644 --- a/go/internal/bitutils/bitmap_generate.go +++ b/go/internal/bitutils/bitmap_generate.go @@ -16,7 +16,7 @@ package bitutils -import "github.com/apache/arrow/go/v17/arrow/bitutil" +import "github.com/apache/arrow/go/v18/arrow/bitutil" // GenerateBits writes sequential bits to a bitmap. Bits preceding the // initial start offset are preserved, bits following the bitmap may diff --git a/go/internal/bitutils/bitmap_generate_test.go b/go/internal/bitutils/bitmap_generate_test.go index 1367fe773c00d..d75f5a72147b1 100644 --- a/go/internal/bitutils/bitmap_generate_test.go +++ b/go/internal/bitutils/bitmap_generate_test.go @@ -19,7 +19,7 @@ package bitutils_test import ( "testing" - "github.com/apache/arrow/go/v17/internal/bitutils" + "github.com/apache/arrow/go/v18/internal/bitutils" "golang.org/x/exp/rand" ) diff --git a/go/internal/hashing/xxh3_memo_table.gen.go b/go/internal/hashing/xxh3_memo_table.gen.go index 47626d317f10e..c8f56ed21565e 100644 --- a/go/internal/hashing/xxh3_memo_table.gen.go +++ b/go/internal/hashing/xxh3_memo_table.gen.go @@ -21,9 +21,9 @@ package hashing import ( "math" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/internal/utils" ) type payloadInt8 struct { diff --git a/go/internal/hashing/xxh3_memo_table.gen.go.tmpl b/go/internal/hashing/xxh3_memo_table.gen.go.tmpl index 34fd25246f6a4..b852a9d79a39b 100644 --- a/go/internal/hashing/xxh3_memo_table.gen.go.tmpl +++ b/go/internal/hashing/xxh3_memo_table.gen.go.tmpl @@ -17,8 +17,8 @@ package hashing import ( - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/internal/utils" ) {{range .In}} diff --git a/go/internal/types/extension_types.go b/go/internal/types/extension_types.go index e0c9c4c185c9d..3c63b36874600 100644 --- a/go/internal/types/extension_types.go +++ b/go/internal/types/extension_types.go @@ -24,9 +24,9 @@ import ( "reflect" "strings" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/internal/json" "github.com/google/uuid" "golang.org/x/xerrors" ) diff --git a/go/internal/types/extension_types_test.go b/go/internal/types/extension_types_test.go index 632375c2685c0..50abaae3a9e06 100644 --- a/go/internal/types/extension_types_test.go +++ b/go/internal/types/extension_types_test.go @@ -20,11 +20,11 @@ import ( "bytes" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" + "github.com/apache/arrow/go/v18/internal/types" "github.com/google/uuid" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" diff --git a/go/internal/utils/ref_count.go b/go/internal/utils/ref_count.go new file mode 100644 index 0000000000000..9b85f75b14363 --- /dev/null +++ b/go/internal/utils/ref_count.go @@ -0,0 +1,26 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import "sync/atomic" + +// NewRefCount creates a new atomic counter set to the specified initial value. +func NewRefCount(initial int64) *atomic.Int64 { + var val atomic.Int64 + val.Store(initial) + return &val +} diff --git a/go/internal/utils/transpose_ints_def.go b/go/internal/utils/transpose_ints_def.go index 83f0a256a774f..2095b3d08c54b 100644 --- a/go/internal/utils/transpose_ints_def.go +++ b/go/internal/utils/transpose_ints_def.go @@ -19,7 +19,7 @@ package utils import ( "errors" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) //go:generate go run ../../arrow/_tools/tmpl -i -data=transpose_ints.tmpldata -d arch=avx2 transpose_ints_simd.go.tmpl=transpose_ints_avx2_amd64.go diff --git a/go/internal/utils/transpose_ints_test.go b/go/internal/utils/transpose_ints_test.go index c26782086802c..427a1ad041c55 100644 --- a/go/internal/utils/transpose_ints_test.go +++ b/go/internal/utils/transpose_ints_test.go @@ -22,7 +22,7 @@ import ( "math/rand" "testing" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/internal/utils" ) var ( diff --git a/go/parquet/cmd/parquet_reader/dumper.go b/go/parquet/cmd/parquet_reader/dumper.go index a7504fdd4e849..bab9939bfd243 100644 --- a/go/parquet/cmd/parquet_reader/dumper.go +++ b/go/parquet/cmd/parquet_reader/dumper.go @@ -22,9 +22,9 @@ import ( "reflect" "time" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/schema" ) const defaultBatchSize = 128 diff --git a/go/parquet/cmd/parquet_reader/main.go b/go/parquet/cmd/parquet_reader/main.go index d0577868e8670..6e04f4254f9fa 100644 --- a/go/parquet/cmd/parquet_reader/main.go +++ b/go/parquet/cmd/parquet_reader/main.go @@ -25,11 +25,11 @@ import ( "strconv" "strings" - "github.com/apache/arrow/go/v17/internal/json" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/metadata" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/internal/json" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/metadata" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/docopt/docopt-go" ) diff --git a/go/parquet/cmd/parquet_schema/main.go b/go/parquet/cmd/parquet_schema/main.go index d0ff87feb5a56..0cd0700e4e41e 100644 --- a/go/parquet/cmd/parquet_schema/main.go +++ b/go/parquet/cmd/parquet_schema/main.go @@ -20,8 +20,8 @@ import ( "fmt" "os" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/docopt/docopt-go" ) diff --git a/go/parquet/compress/brotli.go b/go/parquet/compress/brotli.go index e7574d2b1a779..9d3a22c9e58e8 100644 --- a/go/parquet/compress/brotli.go +++ b/go/parquet/compress/brotli.go @@ -21,7 +21,7 @@ import ( "io" "github.com/andybalholm/brotli" - "github.com/apache/arrow/go/v17/parquet/internal/debug" + "github.com/apache/arrow/go/v18/parquet/internal/debug" ) type brotliCodec struct{} diff --git a/go/parquet/compress/compress.go b/go/parquet/compress/compress.go index 83b174e449c9f..b6a1349133e84 100644 --- a/go/parquet/compress/compress.go +++ b/go/parquet/compress/compress.go @@ -23,7 +23,7 @@ import ( "fmt" "io" - "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" ) // Compression is an alias to the thrift compression codec enum type for easy use diff --git a/go/parquet/compress/compress_test.go b/go/parquet/compress/compress_test.go index 2aaec95bf2eee..843062c0d024a 100644 --- a/go/parquet/compress/compress_test.go +++ b/go/parquet/compress/compress_test.go @@ -22,7 +22,7 @@ import ( "math/rand" "testing" - "github.com/apache/arrow/go/v17/parquet/compress" + "github.com/apache/arrow/go/v18/parquet/compress" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/compress/zstd.go b/go/parquet/compress/zstd.go index e81d364fceeb4..ea2126be18cc9 100644 --- a/go/parquet/compress/zstd.go +++ b/go/parquet/compress/zstd.go @@ -20,7 +20,7 @@ import ( "io" "sync" - "github.com/apache/arrow/go/v17/parquet/internal/debug" + "github.com/apache/arrow/go/v18/parquet/internal/debug" "github.com/klauspost/compress/zstd" ) diff --git a/go/parquet/doc.go b/go/parquet/doc.go index 6ab08f83f063f..a4fdd6e5dda21 100644 --- a/go/parquet/doc.go +++ b/go/parquet/doc.go @@ -30,9 +30,9 @@ // // You can download the library and cli utilities via: // -// go get -u github.com/apache/arrow/go/v17/parquet -// go install github.com/apache/arrow/go/v17/parquet/cmd/parquet_reader@latest -// go install github.com/apache/arrow/go/v17/parquet/cmd/parquet_schema@latest +// go get -u github.com/apache/arrow/go/v18/parquet +// go install github.com/apache/arrow/go/v18/parquet/cmd/parquet_reader@latest +// go install github.com/apache/arrow/go/v18/parquet/cmd/parquet_schema@latest // // # Modules // @@ -60,8 +60,18 @@ // # Encodings // // The encoding types supported in this package are: -// Plain, Plain/RLE Dictionary, Delta Binary Packed (only integer types), Delta Byte Array -// (only ByteArray), Delta Length Byte Array (only ByteArray) +// +// - Plain +// +// - Plain/RLE Dictionary +// +// - Delta Binary Packed (only integer types) +// +// - Delta Byte Array (only ByteArray) +// +// - Delta Length Byte Array (only ByteArray) +// +// - Byte Stream Split (Float, Double, Int32, Int64, FixedLenByteArray) // // Tip: Some platforms don't necessarily support all kinds of encodings. If you're not // sure what to use, just use Plain and Dictionary encoding. diff --git a/go/parquet/encryption_properties.go b/go/parquet/encryption_properties.go index f29bf80da9b75..e9cb07d18bfe4 100644 --- a/go/parquet/encryption_properties.go +++ b/go/parquet/encryption_properties.go @@ -20,7 +20,7 @@ import ( "crypto/rand" "unicode/utf8" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" ) // Constants that will be used as the default values with encryption/decryption diff --git a/go/parquet/encryption_properties_test.go b/go/parquet/encryption_properties_test.go index 0519b7a9db96b..8f50e5880b555 100644 --- a/go/parquet/encryption_properties_test.go +++ b/go/parquet/encryption_properties_test.go @@ -19,8 +19,8 @@ package parquet_test import ( "testing" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encryption" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encryption" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/encryption_read_config_test.go b/go/parquet/encryption_read_config_test.go index e78dffc641905..1e2de16416d31 100644 --- a/go/parquet/encryption_read_config_test.go +++ b/go/parquet/encryption_read_config_test.go @@ -23,10 +23,10 @@ import ( "path" "testing" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/internal/encryption" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/internal/encryption" "github.com/stretchr/testify/suite" ) diff --git a/go/parquet/encryption_write_config_test.go b/go/parquet/encryption_write_config_test.go index 6b5c171e6ea0d..01a5c2be93a46 100644 --- a/go/parquet/encryption_write_config_test.go +++ b/go/parquet/encryption_write_config_test.go @@ -23,10 +23,10 @@ import ( "path/filepath" "testing" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/compress" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/compress" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/stretchr/testify/suite" ) diff --git a/go/parquet/file/column_reader.go b/go/parquet/file/column_reader.go index e441cd3e9c2d2..38ebcf2893c46 100644 --- a/go/parquet/file/column_reader.go +++ b/go/parquet/file/column_reader.go @@ -21,13 +21,13 @@ import ( "fmt" "sync" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - "github.com/apache/arrow/go/v17/parquet/internal/encryption" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/parquet/internal/encryption" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/schema" "golang.org/x/xerrors" ) @@ -354,13 +354,12 @@ func (c *columnChunkReader) initDataDecoder(page Page, lvlByteLen int64) error { case format.Encoding_PLAIN, format.Encoding_DELTA_BYTE_ARRAY, format.Encoding_DELTA_LENGTH_BYTE_ARRAY, - format.Encoding_DELTA_BINARY_PACKED: + format.Encoding_DELTA_BINARY_PACKED, + format.Encoding_BYTE_STREAM_SPLIT: c.curDecoder = c.decoderTraits.Decoder(parquet.Encoding(encoding), c.descr, false, c.mem) c.decoders[encoding] = c.curDecoder case format.Encoding_RLE_DICTIONARY: return errors.New("parquet: dictionary page must be before data page") - case format.Encoding_BYTE_STREAM_SPLIT: - return fmt.Errorf("parquet: unsupported data encoding %s", encoding) default: return fmt.Errorf("parquet: unknown encoding type %s", encoding) } diff --git a/go/parquet/file/column_reader_test.go b/go/parquet/file/column_reader_test.go index 7d8c600138e06..b744c561d3b96 100755 --- a/go/parquet/file/column_reader_test.go +++ b/go/parquet/file/column_reader_test.go @@ -24,12 +24,12 @@ import ( "sync" "testing" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/internal/testutils" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/internal/testutils" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) diff --git a/go/parquet/file/column_reader_types.gen.go b/go/parquet/file/column_reader_types.gen.go index 1e7837e5679fb..e0d0afac38ac2 100644 --- a/go/parquet/file/column_reader_types.gen.go +++ b/go/parquet/file/column_reader_types.gen.go @@ -21,9 +21,9 @@ package file import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" ) // Int32ColumnChunkReader is the Typed Column chunk reader instance for reading diff --git a/go/parquet/file/column_reader_types.gen.go.tmpl b/go/parquet/file/column_reader_types.gen.go.tmpl index a1cccbad16e37..b6056836d76f4 100644 --- a/go/parquet/file/column_reader_types.gen.go.tmpl +++ b/go/parquet/file/column_reader_types.gen.go.tmpl @@ -17,8 +17,8 @@ package file import ( - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" ) {{range .In}} diff --git a/go/parquet/file/column_writer.go b/go/parquet/file/column_writer.go index 520622f0da6c6..bbf30e03087d5 100755 --- a/go/parquet/file/column_writer.go +++ b/go/parquet/file/column_writer.go @@ -21,14 +21,14 @@ import ( "encoding/binary" "io" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - "github.com/apache/arrow/go/v17/parquet/metadata" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/parquet/metadata" + "github.com/apache/arrow/go/v18/parquet/schema" ) //go:generate go run ../../arrow/_tools/tmpl/main.go -i -data=../internal/encoding/physical_types.tmpldata column_writer_types.gen.go.tmpl diff --git a/go/parquet/file/column_writer_test.go b/go/parquet/file/column_writer_test.go index c8d61952064fe..009c8c8bc51fd 100755 --- a/go/parquet/file/column_writer_test.go +++ b/go/parquet/file/column_writer_test.go @@ -24,22 +24,22 @@ import ( "sync" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" - arrutils "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/compress" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - "github.com/apache/arrow/go/v17/parquet/internal/encryption" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/testutils" - "github.com/apache/arrow/go/v17/parquet/internal/utils" - "github.com/apache/arrow/go/v17/parquet/metadata" - "github.com/apache/arrow/go/v17/parquet/pqarrow" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" + arrutils "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/compress" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/parquet/internal/encryption" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/testutils" + "github.com/apache/arrow/go/v18/parquet/internal/utils" + "github.com/apache/arrow/go/v18/parquet/metadata" + "github.com/apache/arrow/go/v18/parquet/pqarrow" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/suite" @@ -459,6 +459,15 @@ func (p *PrimitiveWriterTestSuite) TestRequiredPlain() { p.testRequiredWithEncoding(parquet.Encodings.Plain) } +func (p *PrimitiveWriterTestSuite) TestRequiredByteStreamSplit() { + switch p.Typ { + case reflect.TypeOf(float32(0)), reflect.TypeOf(float64(0)), reflect.TypeOf(int32(0)), reflect.TypeOf(int64(0)), reflect.TypeOf(parquet.FixedLenByteArray{}): + p.testRequiredWithEncoding(parquet.Encodings.ByteStreamSplit) + default: + p.Panics(func() { p.testRequiredWithEncoding(parquet.Encodings.ByteStreamSplit) }) + } +} + func (p *PrimitiveWriterTestSuite) TestRequiredDictionary() { p.testRequiredWithEncoding(parquet.Encodings.PlainDict) } diff --git a/go/parquet/file/column_writer_types.gen.go b/go/parquet/file/column_writer_types.gen.go index d6c0e8142ab1b..612b4095098a1 100644 --- a/go/parquet/file/column_writer_types.gen.go +++ b/go/parquet/file/column_writer_types.gen.go @@ -19,14 +19,14 @@ package file import ( - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/metadata" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/metadata" + "github.com/apache/arrow/go/v18/parquet/schema" ) // Int32ColumnChunkWriter is the typed interface for writing columns to a parquet diff --git a/go/parquet/file/column_writer_types.gen.go.tmpl b/go/parquet/file/column_writer_types.gen.go.tmpl index 6fbd3d4ffde15..cb48dd64ceedc 100644 --- a/go/parquet/file/column_writer_types.gen.go.tmpl +++ b/go/parquet/file/column_writer_types.gen.go.tmpl @@ -19,11 +19,11 @@ package file import ( "fmt" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/metadata" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/metadata" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" ) {{range .In}} diff --git a/go/parquet/file/file_reader.go b/go/parquet/file/file_reader.go index 145c09bb5fcd4..f838482fbb0e9 100644 --- a/go/parquet/file/file_reader.go +++ b/go/parquet/file/file_reader.go @@ -25,10 +25,10 @@ import ( "runtime" "sync" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encryption" - "github.com/apache/arrow/go/v17/parquet/metadata" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encryption" + "github.com/apache/arrow/go/v18/parquet/metadata" "golang.org/x/xerrors" ) diff --git a/go/parquet/file/file_reader_mmap.go b/go/parquet/file/file_reader_mmap.go index c3e487b536f73..77afb6b639bff 100644 --- a/go/parquet/file/file_reader_mmap.go +++ b/go/parquet/file/file_reader_mmap.go @@ -22,7 +22,7 @@ package file import ( "io" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/parquet" "golang.org/x/exp/mmap" "golang.org/x/xerrors" ) diff --git a/go/parquet/file/file_reader_mmap_windows.go b/go/parquet/file/file_reader_mmap_windows.go index 776eb98d5d282..87aaafd9e7d81 100644 --- a/go/parquet/file/file_reader_mmap_windows.go +++ b/go/parquet/file/file_reader_mmap_windows.go @@ -22,7 +22,7 @@ package file import ( "errors" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/parquet" ) func mmapOpen(filename string) (parquet.ReaderAtSeeker, error) { diff --git a/go/parquet/file/file_reader_test.go b/go/parquet/file/file_reader_test.go index 8056a837ea19e..547ec475c2720 100644 --- a/go/parquet/file/file_reader_test.go +++ b/go/parquet/file/file_reader_test.go @@ -18,23 +18,28 @@ package file_test import ( "bytes" + "context" "crypto/rand" "encoding/binary" + "fmt" "io" "os" "path" "testing" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/compress" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/thrift" - "github.com/apache/arrow/go/v17/parquet/metadata" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/compress" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/thrift" + "github.com/apache/arrow/go/v18/parquet/metadata" + "github.com/apache/arrow/go/v18/parquet/pqarrow" + "github.com/apache/arrow/go/v18/parquet/schema" libthrift "github.com/apache/thrift/lib/go/thrift" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -446,3 +451,196 @@ func TestRleBooleanEncodingFileRead(t *testing.T) { assert.Equal(t, expected, values[:len(expected)]) } + +func TestByteStreamSplitEncodingFileRead(t *testing.T) { + dir := os.Getenv("PARQUET_TEST_DATA") + if dir == "" { + t.Skip("no path supplied with PARQUET_TEST_DATA") + } + require.DirExists(t, dir) + + props := parquet.NewReaderProperties(memory.DefaultAllocator) + fileReader, err := file.OpenParquetFile(path.Join(dir, "byte_stream_split_extended.gzip.parquet"), + false, file.WithReadProps(props)) + require.NoError(t, err) + defer fileReader.Close() + + nRows := 200 + nCols := 14 + require.Equal(t, 1, fileReader.NumRowGroups()) + rgr := fileReader.RowGroup(0) + require.EqualValues(t, nRows, rgr.NumRows()) + require.EqualValues(t, nCols, rgr.NumColumns()) + + // Helper to unpack values from column of a specific type + getValues := func(rdr file.ColumnChunkReader, typ parquet.Type) any { + var ( + vals any + total int64 + read int + err error + ) + + switch typ { + case parquet.Types.FixedLenByteArray: + r, ok := rdr.(*file.FixedLenByteArrayColumnChunkReader) + require.True(t, ok) + + values := make([]parquet.FixedLenByteArray, nRows) + total, read, err = r.ReadBatch(int64(nRows), values, nil, nil) + vals = values + case parquet.Types.Float: + r, ok := rdr.(*file.Float32ColumnChunkReader) + require.True(t, ok) + + values := make([]float32, nRows) + total, read, err = r.ReadBatch(int64(nRows), values, nil, nil) + vals = values + case parquet.Types.Double: + r, ok := rdr.(*file.Float64ColumnChunkReader) + require.True(t, ok) + + values := make([]float64, nRows) + total, read, err = r.ReadBatch(int64(nRows), values, nil, nil) + vals = values + case parquet.Types.Int32: + r, ok := rdr.(*file.Int32ColumnChunkReader) + require.True(t, ok) + + values := make([]int32, nRows) + total, read, err = r.ReadBatch(int64(nRows), values, nil, nil) + vals = values + case parquet.Types.Int64: + r, ok := rdr.(*file.Int64ColumnChunkReader) + require.True(t, ok) + + values := make([]int64, nRows) + total, read, err = r.ReadBatch(int64(nRows), values, nil, nil) + vals = values + default: + t.Fatalf("unrecognized parquet type: %s", typ) + } + + require.NoError(t, err) + require.EqualValues(t, nRows, total) + require.EqualValues(t, nRows, read) + + return vals + } + + // Test conformance against Parquet reference + // Expected structure: https://github.com/apache/parquet-testing/blob/1bf4bd39df2135d132451c281754268f03dc1c0e/data/README.md?plain=1#L358 + for i, tc := range []struct { + PhysicalType parquet.Type + LogicalType schema.LogicalType + }{ + { + PhysicalType: parquet.Types.FixedLenByteArray, + LogicalType: schema.Float16LogicalType{}, + }, + { + PhysicalType: parquet.Types.Float, + LogicalType: schema.NoLogicalType{}, + }, + { + PhysicalType: parquet.Types.Double, + LogicalType: schema.NoLogicalType{}, + }, + { + PhysicalType: parquet.Types.Int32, + LogicalType: schema.NoLogicalType{}, + }, + { + PhysicalType: parquet.Types.Int64, + LogicalType: schema.NoLogicalType{}, + }, + { + PhysicalType: parquet.Types.FixedLenByteArray, + LogicalType: schema.NoLogicalType{}, + }, + { + PhysicalType: parquet.Types.FixedLenByteArray, + LogicalType: schema.NewDecimalLogicalType(7, 3), + }, + } { + t.Run(fmt.Sprintf("(Physical:%s/Logical:%s)", tc.PhysicalType, tc.LogicalType), func(t *testing.T) { + // Iterate through pairs of adjacent columns + colIdx := 2 * i + + // Read Plain-encoded column + rdrPlain, err := rgr.Column(colIdx) + require.NoError(t, err) + + // Read ByteStreamSplit-encoded column + rdrByteStreamSplit, err := rgr.Column(colIdx + 1) + require.NoError(t, err) + + // Logical types match + require.True(t, rdrPlain.Descriptor().LogicalType().Equals(tc.LogicalType)) + require.True(t, rdrByteStreamSplit.Descriptor().LogicalType().Equals(tc.LogicalType)) + + // Decoded values match + valuesPlain := getValues(rdrPlain, tc.PhysicalType) + valuesByteStreamSplit := getValues(rdrByteStreamSplit, tc.PhysicalType) + require.Equal(t, valuesPlain, valuesByteStreamSplit) + }) + } +} + +func TestDeltaBinaryPackedMultipleBatches(t *testing.T) { + size := 10 + batchSize := size / 2 // write 2 batches + + // Define the schema for the test data + fields := []arrow.Field{ + {Name: "int64", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, + } + schema := arrow.NewSchema(fields, nil) + + // Create a record batch with the test data + b := array.NewRecordBuilder(memory.DefaultAllocator, schema) + defer b.Release() + + for i := 0; i < size; i++ { + b.Field(0).(*array.Int64Builder).Append(int64(i)) + } + rec := b.NewRecord() + defer rec.Release() + + // Write the data to Parquet using the file writer + props := parquet.NewWriterProperties( + parquet.WithDictionaryDefault(false), + parquet.WithEncoding(parquet.Encodings.DeltaBinaryPacked)) + writerProps := pqarrow.DefaultWriterProps() + + var buf bytes.Buffer + pw, err := pqarrow.NewFileWriter(schema, &buf, props, writerProps) + require.NoError(t, err) + require.NoError(t, pw.Write(rec)) + require.NoError(t, pw.Close()) + + // Read the data back from the Parquet file + reader, err := file.NewParquetReader(bytes.NewReader(buf.Bytes())) + require.NoError(t, err) + defer reader.Close() + + pr, err := pqarrow.NewFileReader(reader, pqarrow.ArrowReadProperties{BatchSize: int64(batchSize)}, memory.DefaultAllocator) + require.NoError(t, err) + + rr, err := pr.GetRecordReader(context.Background(), nil, nil) + require.NoError(t, err) + + totalRows := 0 + for rr.Next() { + rec := rr.Record() + for i := 0; i < int(rec.NumRows()); i++ { + col := rec.Column(0).(*array.Int64) + + val := col.Value(i) + require.Equal(t, val, int64(totalRows+i)) + } + totalRows += int(rec.NumRows()) + } + + require.Equalf(t, size, totalRows, "Expected %d rows, but got %d rows", size, totalRows) +} diff --git a/go/parquet/file/file_writer.go b/go/parquet/file/file_writer.go index ce5e13c24d05a..6fb64f3b8c315 100644 --- a/go/parquet/file/file_writer.go +++ b/go/parquet/file/file_writer.go @@ -21,11 +21,11 @@ import ( "fmt" "io" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encryption" - "github.com/apache/arrow/go/v17/parquet/internal/utils" - "github.com/apache/arrow/go/v17/parquet/metadata" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encryption" + "github.com/apache/arrow/go/v18/parquet/internal/utils" + "github.com/apache/arrow/go/v18/parquet/metadata" + "github.com/apache/arrow/go/v18/parquet/schema" ) // Writer is the primary interface for writing a parquet file diff --git a/go/parquet/file/file_writer_test.go b/go/parquet/file/file_writer_test.go index a183022357d62..0faf3f7233bd3 100644 --- a/go/parquet/file/file_writer_test.go +++ b/go/parquet/file/file_writer_test.go @@ -22,13 +22,13 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/compress" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - "github.com/apache/arrow/go/v17/parquet/internal/testutils" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/compress" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/parquet/internal/testutils" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" @@ -464,3 +464,79 @@ func TestCloseError(t *testing.T) { writer := file.NewParquetWriter(sink, sc) assert.Error(t, writer.Close()) } + +func TestBatchedByteStreamSplitFileRoundtrip(t *testing.T) { + input := []parquet.FixedLenByteArray{ + {1, 2}, + {3, 4}, + {5, 6}, + {7, 8}, + } + + size := len(input) + chunk := size / 2 + + props := parquet.NewWriterProperties( + parquet.WithEncoding(parquet.Encodings.ByteStreamSplit), + parquet.WithDictionaryDefault(false), + parquet.WithBatchSize(int64(chunk)), + parquet.WithDataPageSize(int64(size)*2), + ) + + field, err := schema.NewPrimitiveNodeLogical("f16", parquet.Repetitions.Required, schema.Float16LogicalType{}, parquet.Types.FixedLenByteArray, 2, 1) + require.NoError(t, err) + + schema, err := schema.NewGroupNode("test", parquet.Repetitions.Required, schema.FieldList{field}, 0) + require.NoError(t, err) + + sink := encoding.NewBufferWriter(0, memory.DefaultAllocator) + writer := file.NewParquetWriter(sink, schema, file.WithWriterProps(props)) + + rgw := writer.AppendRowGroup() + cw, err := rgw.NextColumn() + require.NoError(t, err) + + f16ColumnWriter, ok := cw.(*file.FixedLenByteArrayColumnChunkWriter) + require.True(t, ok) + + nVals, err := f16ColumnWriter.WriteBatch(input[:chunk], nil, nil) + require.NoError(t, err) + require.EqualValues(t, chunk, nVals) + + nVals, err = f16ColumnWriter.WriteBatch(input[chunk:], nil, nil) + require.NoError(t, err) + require.EqualValues(t, chunk, nVals) + + require.NoError(t, cw.Close()) + require.NoError(t, rgw.Close()) + require.NoError(t, writer.Close()) + + rdr, err := file.NewParquetReader(bytes.NewReader(sink.Bytes())) + require.NoError(t, err) + + require.Equal(t, 1, rdr.NumRowGroups()) + require.EqualValues(t, size, rdr.NumRows()) + + rgr := rdr.RowGroup(0) + cr, err := rgr.Column(0) + require.NoError(t, err) + + f16ColumnReader, ok := cr.(*file.FixedLenByteArrayColumnChunkReader) + require.True(t, ok) + + output := make([]parquet.FixedLenByteArray, size) + + total, valuesRead, err := f16ColumnReader.ReadBatch(int64(chunk), output[:chunk], nil, nil) + require.NoError(t, err) + require.EqualValues(t, chunk, total) + require.EqualValues(t, chunk, valuesRead) + + total, valuesRead, err = f16ColumnReader.ReadBatch(int64(chunk), output[chunk:], nil, nil) + require.NoError(t, err) + require.EqualValues(t, chunk, total) + require.EqualValues(t, chunk, valuesRead) + + require.Equal(t, input, output) + + require.NoError(t, rdr.Close()) +} diff --git a/go/parquet/file/level_conversion.go b/go/parquet/file/level_conversion.go index 9ab92bc74167e..29aa613de0db6 100755 --- a/go/parquet/file/level_conversion.go +++ b/go/parquet/file/level_conversion.go @@ -22,11 +22,11 @@ import ( "math/bits" "unsafe" - shared_utils "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/bmi" - "github.com/apache/arrow/go/v17/parquet/internal/utils" - "github.com/apache/arrow/go/v17/parquet/schema" + shared_utils "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/bmi" + "github.com/apache/arrow/go/v18/parquet/internal/utils" + "github.com/apache/arrow/go/v18/parquet/schema" "golang.org/x/xerrors" ) diff --git a/go/parquet/file/level_conversion_test.go b/go/parquet/file/level_conversion_test.go index 34a107163a197..740c0e674469b 100644 --- a/go/parquet/file/level_conversion_test.go +++ b/go/parquet/file/level_conversion_test.go @@ -20,9 +20,9 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/parquet/internal/bmi" - "github.com/apache/arrow/go/v17/parquet/internal/utils" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/parquet/internal/bmi" + "github.com/apache/arrow/go/v18/parquet/internal/utils" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/file/page_reader.go b/go/parquet/file/page_reader.go index 3c166253cdabe..91dcc3c66aa5d 100644 --- a/go/parquet/file/page_reader.go +++ b/go/parquet/file/page_reader.go @@ -23,13 +23,13 @@ import ( "sync" "github.com/JohnCGriffin/overflow" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/compress" - "github.com/apache/arrow/go/v17/parquet/internal/encryption" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/thrift" - "github.com/apache/arrow/go/v17/parquet/metadata" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/compress" + "github.com/apache/arrow/go/v18/parquet/internal/encryption" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/thrift" + "github.com/apache/arrow/go/v18/parquet/metadata" "golang.org/x/xerrors" ) diff --git a/go/parquet/file/page_writer.go b/go/parquet/file/page_writer.go index 1c23917fe202c..82cd37e1a7774 100644 --- a/go/parquet/file/page_writer.go +++ b/go/parquet/file/page_writer.go @@ -20,15 +20,15 @@ import ( "bytes" "sync" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/compress" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - "github.com/apache/arrow/go/v17/parquet/internal/encryption" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/thrift" - "github.com/apache/arrow/go/v17/parquet/internal/utils" - "github.com/apache/arrow/go/v17/parquet/metadata" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/compress" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/parquet/internal/encryption" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/thrift" + "github.com/apache/arrow/go/v18/parquet/internal/utils" + "github.com/apache/arrow/go/v18/parquet/metadata" libthrift "github.com/apache/thrift/lib/go/thrift" "golang.org/x/xerrors" ) diff --git a/go/parquet/file/record_reader.go b/go/parquet/file/record_reader.go index 1a1310195a88a..667ffca77a8d1 100755 --- a/go/parquet/file/record_reader.go +++ b/go/parquet/file/record_reader.go @@ -23,14 +23,14 @@ import ( "unsafe" "github.com/JohnCGriffin/overflow" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/parquet/schema" "golang.org/x/xerrors" ) diff --git a/go/parquet/file/row_group_reader.go b/go/parquet/file/row_group_reader.go index 1e8444e1fc1d0..25ca6d87d895f 100644 --- a/go/parquet/file/row_group_reader.go +++ b/go/parquet/file/row_group_reader.go @@ -20,10 +20,10 @@ import ( "fmt" "sync" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encryption" - "github.com/apache/arrow/go/v17/parquet/metadata" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encryption" + "github.com/apache/arrow/go/v18/parquet/metadata" "golang.org/x/xerrors" ) diff --git a/go/parquet/file/row_group_writer.go b/go/parquet/file/row_group_writer.go index dfb10d584cce6..d18ff270939d2 100644 --- a/go/parquet/file/row_group_writer.go +++ b/go/parquet/file/row_group_writer.go @@ -17,10 +17,10 @@ package file import ( - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encryption" - "github.com/apache/arrow/go/v17/parquet/internal/utils" - "github.com/apache/arrow/go/v17/parquet/metadata" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encryption" + "github.com/apache/arrow/go/v18/parquet/internal/utils" + "github.com/apache/arrow/go/v18/parquet/metadata" "golang.org/x/xerrors" ) diff --git a/go/parquet/file/row_group_writer_test.go b/go/parquet/file/row_group_writer_test.go index 2fdba06381f55..0074611235245 100644 --- a/go/parquet/file/row_group_writer_test.go +++ b/go/parquet/file/row_group_writer_test.go @@ -20,10 +20,10 @@ import ( "bytes" "testing" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/apache/thrift/lib/go/thrift" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/internal/bmi/bitmap_bmi2_386.go b/go/parquet/internal/bmi/bitmap_bmi2_386.go new file mode 100644 index 0000000000000..60f898f6bd557 --- /dev/null +++ b/go/parquet/internal/bmi/bitmap_bmi2_386.go @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !noasm +// +build !noasm + +package bmi + +func init() { + funclist.extractBits = extractBitsGo + funclist.gtbitmap = greaterThanBitmapGo +} diff --git a/go/parquet/internal/bmi/bmi_test.go b/go/parquet/internal/bmi/bmi_test.go index 2b7cc59000ac1..41a74ba3afcc3 100644 --- a/go/parquet/internal/bmi/bmi_test.go +++ b/go/parquet/internal/bmi/bmi_test.go @@ -20,7 +20,7 @@ import ( "fmt" "testing" - "github.com/apache/arrow/go/v17/parquet/internal/bmi" + "github.com/apache/arrow/go/v18/parquet/internal/bmi" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/internal/encoding/boolean_decoder.go b/go/parquet/internal/encoding/boolean_decoder.go index d21fb3dd56603..772fe96fde8f0 100644 --- a/go/parquet/internal/encoding/boolean_decoder.go +++ b/go/parquet/internal/encoding/boolean_decoder.go @@ -23,10 +23,10 @@ import ( "fmt" "io" - "github.com/apache/arrow/go/v17/arrow/bitutil" - shared_utils "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/utils" + "github.com/apache/arrow/go/v18/arrow/bitutil" + shared_utils "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/utils" ) // PlainBooleanDecoder is for the Plain Encoding type, there is no diff --git a/go/parquet/internal/encoding/boolean_encoder.go b/go/parquet/internal/encoding/boolean_encoder.go index f77ae05fe30ef..b95707cb2b1da 100644 --- a/go/parquet/internal/encoding/boolean_encoder.go +++ b/go/parquet/internal/encoding/boolean_encoder.go @@ -19,10 +19,10 @@ package encoding import ( "encoding/binary" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/debug" - "github.com/apache/arrow/go/v17/parquet/internal/utils" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/debug" + "github.com/apache/arrow/go/v18/parquet/internal/utils" ) const ( diff --git a/go/parquet/internal/encoding/byte_array_decoder.go b/go/parquet/internal/encoding/byte_array_decoder.go index 12aaed110b0bc..6a87e5f3b9832 100644 --- a/go/parquet/internal/encoding/byte_array_decoder.go +++ b/go/parquet/internal/encoding/byte_array_decoder.go @@ -19,12 +19,12 @@ package encoding import ( "encoding/binary" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - pqutils "github.com/apache/arrow/go/v17/parquet/internal/utils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + pqutils "github.com/apache/arrow/go/v18/parquet/internal/utils" "golang.org/x/xerrors" ) diff --git a/go/parquet/internal/encoding/byte_array_encoder.go b/go/parquet/internal/encoding/byte_array_encoder.go index fe6b9b147b0bd..518c2e7f7324e 100644 --- a/go/parquet/internal/encoding/byte_array_encoder.go +++ b/go/parquet/internal/encoding/byte_array_encoder.go @@ -21,11 +21,11 @@ import ( "fmt" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/internal/bitutils" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/internal/bitutils" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" ) // PlainByteArrayEncoder encodes byte arrays according to the spec for Plain encoding diff --git a/go/parquet/internal/encoding/byte_stream_split.go b/go/parquet/internal/encoding/byte_stream_split.go new file mode 100644 index 0000000000000..e5fe91ada6d77 --- /dev/null +++ b/go/parquet/internal/encoding/byte_stream_split.go @@ -0,0 +1,389 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package encoding + +import ( + "fmt" + "math" + + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/debug" + "golang.org/x/xerrors" +) + +// encodeByteStreamSplit encodes the raw bytes provided by 'in' into the output buffer 'data' using BYTE_STREAM_SPLIT encoding. +// 'data' must have space for at least len(in) bytes. +func encodeByteStreamSplit(data []byte, in []byte, width int) { + debug.Assert(len(data) >= len(in), fmt.Sprintf("not enough space in destination buffer for encoding, dest: %d bytes, src: %d bytes", len(data), len(in))) + numElements := len(in) / width + for stream := 0; stream < width; stream++ { + for element := 0; element < numElements; element++ { + encLoc := numElements*stream + element + decLoc := width*element + stream + data[encLoc] = in[decLoc] + } + } +} + +// encodeByteStreamSplitWidth2 implements encodeByteStreamSplit optimized for types stored using 2 bytes. +// 'data' must have space for at least len(in) bytes. +func encodeByteStreamSplitWidth2(data []byte, in []byte) { + debug.Assert(len(data) >= len(in), fmt.Sprintf("not enough space in destination buffer for encoding, dest: %d bytes, src: %d bytes", len(data), len(in))) + const width = 2 + numElements := len(in) / width + for element := 0; element < numElements; element++ { + decLoc := width * element + data[element] = in[decLoc] + data[numElements+element] = in[decLoc+1] + } +} + +// encodeByteStreamSplitWidth4 implements encodeByteStreamSplit optimized for types stored using 4 bytes. +// 'data' must have space for at least len(in) bytes. +func encodeByteStreamSplitWidth4(data []byte, in []byte) { + debug.Assert(len(data) >= len(in), fmt.Sprintf("not enough space in destination buffer for encoding, dest: %d bytes, src: %d bytes", len(data), len(in))) + const width = 4 + numElements := len(in) / width + for element := 0; element < numElements; element++ { + decLoc := width * element + data[element] = in[decLoc] + data[numElements+element] = in[decLoc+1] + data[numElements*2+element] = in[decLoc+2] + data[numElements*3+element] = in[decLoc+3] + } +} + +// encodeByteStreamSplitWidth8 implements encodeByteStreamSplit optimized for types stored using 8 bytes. +// 'data' must have space for at least len(in) bytes. +func encodeByteStreamSplitWidth8(data []byte, in []byte) { + debug.Assert(len(data) >= len(in), fmt.Sprintf("not enough space in destination buffer for encoding, dest: %d bytes, src: %d bytes", len(data), len(in))) + const width = 8 + numElements := len(in) / width + for element := 0; element < numElements; element++ { + decLoc := width * element + data[element] = in[decLoc] + data[numElements+element] = in[decLoc+1] + data[numElements*2+element] = in[decLoc+2] + data[numElements*3+element] = in[decLoc+3] + data[numElements*4+element] = in[decLoc+4] + data[numElements*5+element] = in[decLoc+5] + data[numElements*6+element] = in[decLoc+6] + data[numElements*7+element] = in[decLoc+7] + } +} + +// decodeByteStreamSplitBatchWidth4 decodes the batch of nValues raw bytes representing a 4-byte datatype provided by 'data', +// into the output buffer 'out' using BYTE_STREAM_SPLIT encoding. +// 'out' must have space for at least len(data) bytes. +func decodeByteStreamSplitBatchWidth4(data []byte, nValues, stride int, out []byte) { + debug.Assert(len(out) >= len(data), fmt.Sprintf("not enough space in output buffer for decoding, out: %d bytes, data: %d bytes", len(out), len(data))) + const width = 4 + for element := 0; element < nValues; element++ { + out[width*element] = data[element] + out[width*element+1] = data[stride+element] + out[width*element+2] = data[2*stride+element] + out[width*element+3] = data[3*stride+element] + } +} + +// decodeByteStreamSplitBatchWidth8 decodes the batch of nValues raw bytes representing a 8-byte datatype provided by 'data', +// into the output buffer 'out' using BYTE_STREAM_SPLIT encoding. +// 'out' must have space for at least len(data) bytes. +func decodeByteStreamSplitBatchWidth8(data []byte, nValues, stride int, out []byte) { + debug.Assert(len(out) >= len(data), fmt.Sprintf("not enough space in output buffer for decoding, out: %d bytes, data: %d bytes", len(out), len(data))) + const width = 8 + for element := 0; element < nValues; element++ { + out[width*element] = data[element] + out[width*element+1] = data[stride+element] + out[width*element+2] = data[2*stride+element] + out[width*element+3] = data[3*stride+element] + out[width*element+4] = data[4*stride+element] + out[width*element+5] = data[5*stride+element] + out[width*element+6] = data[6*stride+element] + out[width*element+7] = data[7*stride+element] + } +} + +// decodeByteStreamSplitBatchFLBA decodes the batch of nValues FixedLenByteArrays provided by 'data', +// into the output slice 'out' using BYTE_STREAM_SPLIT encoding. +// 'out' must have space for at least nValues slices. +func decodeByteStreamSplitBatchFLBA(data []byte, nValues, stride, width int, out []parquet.FixedLenByteArray) { + debug.Assert(len(out) >= nValues, fmt.Sprintf("not enough space in output slice for decoding, out: %d values, data: %d values", len(out), nValues)) + for stream := 0; stream < width; stream++ { + for element := 0; element < nValues; element++ { + encLoc := stride*stream + element + out[element][stream] = data[encLoc] + } + } +} + +// decodeByteStreamSplitBatchFLBAWidth2 decodes the batch of nValues FixedLenByteArrays of length 2 provided by 'data', +// into the output slice 'out' using BYTE_STREAM_SPLIT encoding. +// 'out' must have space for at least nValues slices. +func decodeByteStreamSplitBatchFLBAWidth2(data []byte, nValues, stride int, out []parquet.FixedLenByteArray) { + debug.Assert(len(out) >= nValues, fmt.Sprintf("not enough space in output slice for decoding, out: %d values, data: %d values", len(out), nValues)) + for element := 0; element < nValues; element++ { + out[element][0] = data[element] + out[element][1] = data[stride+element] + } +} + +// decodeByteStreamSplitBatchFLBAWidth4 decodes the batch of nValues FixedLenByteArrays of length 4 provided by 'data', +// into the output slice 'out' using BYTE_STREAM_SPLIT encoding. +// 'out' must have space for at least nValues slices. +func decodeByteStreamSplitBatchFLBAWidth4(data []byte, nValues, stride int, out []parquet.FixedLenByteArray) { + debug.Assert(len(out) >= nValues, fmt.Sprintf("not enough space in output slice for decoding, out: %d values, data: %d values", len(out), nValues)) + for element := 0; element < nValues; element++ { + out[element][0] = data[element] + out[element][1] = data[stride+element] + out[element][2] = data[stride*2+element] + out[element][3] = data[stride*3+element] + } +} + +// decodeByteStreamSplitBatchFLBAWidth8 decodes the batch of nValues FixedLenByteArrays of length 8 provided by 'data', +// into the output slice 'out' using BYTE_STREAM_SPLIT encoding. +// 'out' must have space for at least nValues slices. +func decodeByteStreamSplitBatchFLBAWidth8(data []byte, nValues, stride int, out []parquet.FixedLenByteArray) { + debug.Assert(len(out) >= nValues, fmt.Sprintf("not enough space in output slice for decoding, out: %d values, data: %d values", len(out), nValues)) + for element := 0; element < nValues; element++ { + out[element][0] = data[element] + out[element][1] = data[stride+element] + out[element][2] = data[stride*2+element] + out[element][3] = data[stride*3+element] + out[element][4] = data[stride*4+element] + out[element][5] = data[stride*5+element] + out[element][6] = data[stride*6+element] + out[element][7] = data[stride*7+element] + } +} + +func releaseBufferToPool(pooled *PooledBufferWriter) { + buf := pooled.buf + memory.Set(buf.Buf(), 0) + buf.ResizeNoShrink(0) + bufferPool.Put(buf) +} + +func validateByteStreamSplitPageData(typeLen, nvals int, data []byte) (int, error) { + if nvals*typeLen < len(data) { + return 0, fmt.Errorf("data size (%d) is too small for the number of values in in BYTE_STREAM_SPLIT (%d)", len(data), nvals) + } + + if len(data)%typeLen != 0 { + return 0, fmt.Errorf("ByteStreamSplit data size %d not aligned with byte_width: %d", len(data), typeLen) + } + + return len(data) / typeLen, nil +} + +// ByteStreamSplitFloat32Encoder writes the underlying bytes of the Float32 +// into interlaced streams as defined by the BYTE_STREAM_SPLIT encoding +type ByteStreamSplitFloat32Encoder struct { + PlainFloat32Encoder + flushBuffer *PooledBufferWriter +} + +func (enc *ByteStreamSplitFloat32Encoder) FlushValues() (Buffer, error) { + in, err := enc.PlainFloat32Encoder.FlushValues() + if err != nil { + return nil, err + } + + if enc.flushBuffer == nil { + enc.flushBuffer = NewPooledBufferWriter(in.Len()) + } + + enc.flushBuffer.buf.Resize(in.Len()) + encodeByteStreamSplitWidth4(enc.flushBuffer.Bytes(), in.Bytes()) + return enc.flushBuffer.Finish(), nil +} + +func (enc *ByteStreamSplitFloat32Encoder) Release() { + enc.PlainFloat32Encoder.Release() + releaseBufferToPool(enc.flushBuffer) + enc.flushBuffer = nil +} + +// ByteStreamSplitFloat64Encoder writes the underlying bytes of the Float64 +// into interlaced streams as defined by the BYTE_STREAM_SPLIT encoding +type ByteStreamSplitFloat64Encoder struct { + PlainFloat64Encoder + flushBuffer *PooledBufferWriter +} + +func (enc *ByteStreamSplitFloat64Encoder) FlushValues() (Buffer, error) { + in, err := enc.PlainFloat64Encoder.FlushValues() + if err != nil { + return nil, err + } + + if enc.flushBuffer == nil { + enc.flushBuffer = NewPooledBufferWriter(in.Len()) + } + + enc.flushBuffer.buf.Resize(in.Len()) + encodeByteStreamSplitWidth8(enc.flushBuffer.Bytes(), in.Bytes()) + return enc.flushBuffer.Finish(), nil +} + +func (enc *ByteStreamSplitFloat64Encoder) Release() { + enc.PlainFloat64Encoder.Release() + releaseBufferToPool(enc.flushBuffer) + enc.flushBuffer = nil +} + +// ByteStreamSplitInt32Encoder writes the underlying bytes of the Int32 +// into interlaced streams as defined by the BYTE_STREAM_SPLIT encoding +type ByteStreamSplitInt32Encoder struct { + PlainInt32Encoder + flushBuffer *PooledBufferWriter +} + +func (enc *ByteStreamSplitInt32Encoder) FlushValues() (Buffer, error) { + in, err := enc.PlainInt32Encoder.FlushValues() + if err != nil { + return nil, err + } + + if enc.flushBuffer == nil { + enc.flushBuffer = NewPooledBufferWriter(in.Len()) + } + + enc.flushBuffer.buf.Resize(in.Len()) + encodeByteStreamSplitWidth4(enc.flushBuffer.Bytes(), in.Bytes()) + return enc.flushBuffer.Finish(), nil +} + +func (enc *ByteStreamSplitInt32Encoder) Release() { + enc.PlainInt32Encoder.Release() + releaseBufferToPool(enc.flushBuffer) + enc.flushBuffer = nil +} + +// ByteStreamSplitInt64Encoder writes the underlying bytes of the Int64 +// into interlaced streams as defined by the BYTE_STREAM_SPLIT encoding +type ByteStreamSplitInt64Encoder struct { + PlainInt64Encoder + flushBuffer *PooledBufferWriter +} + +func (enc *ByteStreamSplitInt64Encoder) FlushValues() (Buffer, error) { + in, err := enc.PlainInt64Encoder.FlushValues() + if err != nil { + return nil, err + } + + if enc.flushBuffer == nil { + enc.flushBuffer = NewPooledBufferWriter(in.Len()) + } + + enc.flushBuffer.buf.Resize(in.Len()) + encodeByteStreamSplitWidth8(enc.flushBuffer.Bytes(), in.Bytes()) + return enc.flushBuffer.Finish(), nil +} + +func (enc *ByteStreamSplitInt64Encoder) Release() { + enc.PlainInt64Encoder.Release() + releaseBufferToPool(enc.flushBuffer) + enc.flushBuffer = nil +} + +// ByteStreamSplitFloat32Decoder is a decoder for BYTE_STREAM_SPLIT-encoded +// bytes representing Float32 values +type ByteStreamSplitFloat32Decoder = ByteStreamSplitDecoder[float32] + +// ByteStreamSplitFloat64Decoder is a decoder for BYTE_STREAM_SPLIT-encoded +// bytes representing Float64 values +type ByteStreamSplitFloat64Decoder = ByteStreamSplitDecoder[float64] + +// ByteStreamSplitInt32Decoder is a decoder for BYTE_STREAM_SPLIT-encoded +// bytes representing Int32 values +type ByteStreamSplitInt32Decoder = ByteStreamSplitDecoder[int32] + +// ByteStreamSplitInt64Decoder is a decoder for BYTE_STREAM_SPLIT-encoded +// bytes representing Int64 values +type ByteStreamSplitInt64Decoder = ByteStreamSplitDecoder[int64] + +type ByteStreamSplitDecoder[T float32 | float64 | int32 | int64] struct { + decoder + stride int +} + +func (dec *ByteStreamSplitDecoder[T]) Type() parquet.Type { + switch v := any(dec).(type) { + case *ByteStreamSplitDecoder[float32]: + return parquet.Types.Float + case *ByteStreamSplitDecoder[float64]: + return parquet.Types.Double + case *ByteStreamSplitDecoder[int32]: + return parquet.Types.Int32 + case *ByteStreamSplitDecoder[int64]: + return parquet.Types.Int64 + default: + panic(fmt.Sprintf("ByteStreamSplitDecoder is not supported for type: %T", v)) + } +} + +func (dec *ByteStreamSplitDecoder[T]) SetData(nvals int, data []byte) error { + nvals, err := validateByteStreamSplitPageData(dec.Type().ByteSize(), nvals, data) + if err != nil { + return err + } + + dec.stride = nvals + return dec.decoder.SetData(nvals, data) +} + +func (dec *ByteStreamSplitDecoder[T]) Decode(out []T) (int, error) { + typeLen := dec.Type().ByteSize() + toRead := len(out) + numBytesNeeded := toRead * typeLen + if numBytesNeeded > len(dec.data) || numBytesNeeded > math.MaxInt32 { + return 0, xerrors.New("parquet: eof exception") + } + + outBytes := arrow.GetBytes(out) + switch typeLen { + case 4: + decodeByteStreamSplitBatchWidth4(dec.data, toRead, dec.stride, outBytes) + case 8: + decodeByteStreamSplitBatchWidth8(dec.data, toRead, dec.stride, outBytes) + default: + return 0, fmt.Errorf("encoding ByteStreamSplit is only defined for numeric type of width 4 or 8, found: %d", typeLen) + } + + dec.nvals -= toRead + dec.data = dec.data[toRead:] + + return toRead, nil +} + +func (dec *ByteStreamSplitDecoder[T]) DecodeSpaced(out []T, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { + toRead := len(out) - nullCount + valuesRead, err := dec.Decode(out[:toRead]) + if err != nil { + return valuesRead, err + } + if valuesRead != toRead { + return valuesRead, xerrors.New("parquet: number of values / definitions levels read did not match") + } + + return spacedExpand(out, nullCount, validBits, validBitsOffset), nil +} diff --git a/go/parquet/internal/encoding/decoder.go b/go/parquet/internal/encoding/decoder.go index 71bfc872f133c..12a670198afa6 100644 --- a/go/parquet/internal/encoding/decoder.go +++ b/go/parquet/internal/encoding/decoder.go @@ -20,16 +20,16 @@ import ( "bytes" "reflect" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/bitutils" - shared_utils "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/debug" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/utils" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/bitutils" + shared_utils "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/debug" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/utils" + "github.com/apache/arrow/go/v18/parquet/schema" "golang.org/x/xerrors" ) diff --git a/go/parquet/internal/encoding/delta_bit_packing.go b/go/parquet/internal/encoding/delta_bit_packing.go index db42998818b39..ac91953a7f903 100644 --- a/go/parquet/internal/encoding/delta_bit_packing.go +++ b/go/parquet/internal/encoding/delta_bit_packing.go @@ -19,20 +19,20 @@ package encoding import ( "bytes" "errors" + "fmt" "math" "math/bits" - "reflect" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/memory" - shared_utils "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/utils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/memory" + shared_utils "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/utils" ) // see the deltaBitPack encoder for a description of the encoding format that is // used for delta-bitpacking. -type deltaBitPackDecoder struct { +type deltaBitPackDecoder[T int32 | int64] struct { decoder mem memory.Allocator @@ -52,18 +52,20 @@ type deltaBitPackDecoder struct { totalValues uint64 lastVal int64 + + miniBlockValues []T } // returns the number of bytes read so far -func (d *deltaBitPackDecoder) bytesRead() int64 { +func (d *deltaBitPackDecoder[T]) bytesRead() int64 { return d.bitdecoder.CurOffset() } -func (d *deltaBitPackDecoder) Allocator() memory.Allocator { return d.mem } +func (d *deltaBitPackDecoder[T]) Allocator() memory.Allocator { return d.mem } // SetData sets the bytes and the expected number of values to decode // into the decoder, updating the decoder and allowing it to be reused. -func (d *deltaBitPackDecoder) SetData(nvalues int, data []byte) error { +func (d *deltaBitPackDecoder[T]) SetData(nvalues int, data []byte) error { // set our data into the underlying decoder for the type if err := d.decoder.SetData(nvalues, data); err != nil { return err @@ -103,7 +105,7 @@ func (d *deltaBitPackDecoder) SetData(nvalues int, data []byte) error { } // initialize a block to decode -func (d *deltaBitPackDecoder) initBlock() error { +func (d *deltaBitPackDecoder[T]) initBlock() error { // first we grab the min delta value that we'll start from var ok bool if d.minDelta, ok = d.bitdecoder.GetZigZagVlqInt(); !ok { @@ -126,16 +128,9 @@ func (d *deltaBitPackDecoder) initBlock() error { return nil } -// DeltaBitPackInt32Decoder decodes Int32 values which are packed using the Delta BitPacking algorithm. -type DeltaBitPackInt32Decoder struct { - *deltaBitPackDecoder - - miniBlockValues []int32 -} - -func (d *DeltaBitPackInt32Decoder) unpackNextMini() error { +func (d *deltaBitPackDecoder[T]) unpackNextMini() error { if d.miniBlockValues == nil { - d.miniBlockValues = make([]int32, 0, int(d.valsPerMini)) + d.miniBlockValues = make([]T, 0, int(d.valsPerMini)) } else { d.miniBlockValues = d.miniBlockValues[:0] } @@ -149,7 +144,7 @@ func (d *DeltaBitPackInt32Decoder) unpackNextMini() error { } d.lastVal += int64(delta) + int64(d.minDelta) - d.miniBlockValues = append(d.miniBlockValues, int32(d.lastVal)) + d.miniBlockValues = append(d.miniBlockValues, T(d.lastVal)) } d.miniBlockIdx++ return nil @@ -157,15 +152,15 @@ func (d *DeltaBitPackInt32Decoder) unpackNextMini() error { // Decode retrieves min(remaining values, len(out)) values from the data and returns the number // of values actually decoded and any errors encountered. -func (d *DeltaBitPackInt32Decoder) Decode(out []int32) (int, error) { - max := shared_utils.Min(len(out), int(d.totalValues)) +func (d *deltaBitPackDecoder[T]) Decode(out []T) (int, error) { + max := shared_utils.Min(len(out), int(d.nvals)) if max == 0 { return 0, nil } out = out[:max] if !d.usedFirst { // starting value to calculate deltas against - out[0] = int32(d.lastVal) + out[0] = T(d.lastVal) out = out[1:] d.usedFirst = true } @@ -198,7 +193,7 @@ func (d *DeltaBitPackInt32Decoder) Decode(out []int32) (int, error) { } // DecodeSpaced is like Decode, but the result is spaced out appropriately based on the passed in bitmap -func (d *DeltaBitPackInt32Decoder) DecodeSpaced(out []int32, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { +func (d *deltaBitPackDecoder[T]) DecodeSpaced(out []T, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { toread := len(out) - nullCount values, err := d.Decode(out[:toread]) if err != nil { @@ -211,101 +206,23 @@ func (d *DeltaBitPackInt32Decoder) DecodeSpaced(out []int32, nullCount int, vali return spacedExpand(out, nullCount, validBits, validBitsOffset), nil } -// Type returns the physical parquet type that this decoder decodes, in this case Int32 -func (DeltaBitPackInt32Decoder) Type() parquet.Type { - return parquet.Types.Int32 -} - -// DeltaBitPackInt64Decoder decodes a delta bit packed int64 column of data. -type DeltaBitPackInt64Decoder struct { - *deltaBitPackDecoder - - miniBlockValues []int64 -} - -func (d *DeltaBitPackInt64Decoder) unpackNextMini() error { - if d.miniBlockValues == nil { - d.miniBlockValues = make([]int64, 0, int(d.valsPerMini)) - } else { - d.miniBlockValues = d.miniBlockValues[:0] - } - - d.deltaBitWidth = d.deltaBitWidths.Bytes()[int(d.miniBlockIdx)] - d.currentMiniBlockVals = d.valsPerMini - - for j := 0; j < int(d.valsPerMini); j++ { - delta, ok := d.bitdecoder.GetValue(int(d.deltaBitWidth)) - if !ok { - return errors.New("parquet: eof exception") - } - - d.lastVal += int64(delta) + d.minDelta - d.miniBlockValues = append(d.miniBlockValues, d.lastVal) - } - d.miniBlockIdx++ - return nil -} - -// Decode retrieves min(remaining values, len(out)) values from the data and returns the number -// of values actually decoded and any errors encountered. -func (d *DeltaBitPackInt64Decoder) Decode(out []int64) (int, error) { - max := shared_utils.Min(len(out), d.nvals) - if max == 0 { - return 0, nil - } - - out = out[:max] - if !d.usedFirst { - out[0] = d.lastVal - out = out[1:] - d.usedFirst = true - } - - var err error - for len(out) > 0 { - if d.currentBlockVals == 0 { - err = d.initBlock() - if err != nil { - return 0, err - } - } - if d.currentMiniBlockVals == 0 { - err = d.unpackNextMini() - } - - if err != nil { - return 0, err - } - - start := int(d.valsPerMini - d.currentMiniBlockVals) - numCopied := copy(out, d.miniBlockValues[start:]) - - out = out[numCopied:] - d.currentBlockVals -= uint32(numCopied) - d.currentMiniBlockVals -= uint32(numCopied) +// Type returns the underlying physical type this decoder works with +func (dec *deltaBitPackDecoder[T]) Type() parquet.Type { + switch v := any(dec).(type) { + case *deltaBitPackDecoder[int32]: + return parquet.Types.Int32 + case *deltaBitPackDecoder[int64]: + return parquet.Types.Int64 + default: + panic(fmt.Sprintf("deltaBitPackDecoder is not supported for type: %T", v)) } - d.nvals -= max - return max, nil -} - -// Type returns the physical parquet type that this decoder decodes, in this case Int64 -func (DeltaBitPackInt64Decoder) Type() parquet.Type { - return parquet.Types.Int64 } -// DecodeSpaced is like Decode, but the result is spaced out appropriately based on the passed in bitmap -func (d DeltaBitPackInt64Decoder) DecodeSpaced(out []int64, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { - toread := len(out) - nullCount - values, err := d.Decode(out[:toread]) - if err != nil { - return values, err - } - if values != toread { - return values, errors.New("parquet: number of values / definition levels read did not match") - } +// DeltaBitPackInt32Decoder decodes Int32 values which are packed using the Delta BitPacking algorithm. +type DeltaBitPackInt32Decoder = deltaBitPackDecoder[int32] - return spacedExpand(out, nullCount, validBits, validBitsOffset), nil -} +// DeltaBitPackInt64Decoder decodes Int64 values which are packed using the Delta BitPacking algorithm. +type DeltaBitPackInt64Decoder = deltaBitPackDecoder[int64] const ( // block size must be a multiple of 128 @@ -333,7 +250,7 @@ const ( // // Sets aside bytes at the start of the internal buffer where the header will be written, // and only writes the header when FlushValues is called before returning it. -type deltaBitPackEncoder struct { +type deltaBitPackEncoder[T int32 | int64] struct { encoder bitWriter *utils.BitWriter @@ -348,7 +265,7 @@ type deltaBitPackEncoder struct { } // flushBlock flushes out a finished block for writing to the underlying encoder -func (enc *deltaBitPackEncoder) flushBlock() { +func (enc *deltaBitPackEncoder[T]) flushBlock() { if len(enc.deltas) == 0 { return } @@ -400,9 +317,8 @@ func (enc *deltaBitPackEncoder) flushBlock() { // putInternal is the implementation for actually writing data which must be // integral data as int, int8, int32, or int64. -func (enc *deltaBitPackEncoder) putInternal(data interface{}) { - v := reflect.ValueOf(data) - if v.Len() == 0 { +func (enc *deltaBitPackEncoder[T]) Put(in []T) { + if len(in) == 0 { return } @@ -412,16 +328,16 @@ func (enc *deltaBitPackEncoder) putInternal(data interface{}) { enc.numMiniBlocks = defaultNumMiniBlocks enc.miniBlockSize = defaultNumValuesPerMini - enc.firstVal = v.Index(0).Int() + enc.firstVal = int64(in[0]) enc.currentVal = enc.firstVal idx = 1 enc.bitWriter = utils.NewBitWriter(enc.sink) } - enc.totalVals += uint64(v.Len()) - for ; idx < v.Len(); idx++ { - val := v.Index(idx).Int() + enc.totalVals += uint64(len(in)) + for ; idx < len(in); idx++ { + val := int64(in[idx]) enc.deltas = append(enc.deltas, val-enc.currentVal) enc.currentVal = val if len(enc.deltas) == int(enc.blockSize) { @@ -432,7 +348,7 @@ func (enc *deltaBitPackEncoder) putInternal(data interface{}) { // FlushValues flushes any remaining data and returns the finished encoded buffer // or returns nil and any error encountered during flushing. -func (enc *deltaBitPackEncoder) FlushValues() (Buffer, error) { +func (enc *deltaBitPackEncoder[T]) FlushValues() (Buffer, error) { if enc.bitWriter != nil { // write any remaining values enc.flushBlock() @@ -465,7 +381,7 @@ func (enc *deltaBitPackEncoder) FlushValues() (Buffer, error) { } // EstimatedDataEncodedSize returns the current amount of data actually flushed out and written -func (enc *deltaBitPackEncoder) EstimatedDataEncodedSize() int64 { +func (enc *deltaBitPackEncoder[T]) EstimatedDataEncodedSize() int64 { if enc.bitWriter == nil { return 0 } @@ -473,56 +389,33 @@ func (enc *deltaBitPackEncoder) EstimatedDataEncodedSize() int64 { return int64(enc.bitWriter.Written()) } -// DeltaBitPackInt32Encoder is an encoder for the delta bitpacking encoding for int32 data. -type DeltaBitPackInt32Encoder struct { - *deltaBitPackEncoder -} - -// Put writes the values from the provided slice of int32 to the encoder -func (enc DeltaBitPackInt32Encoder) Put(in []int32) { - enc.putInternal(in) -} - -// PutSpaced takes a slice of int32 along with a bitmap that describes the nulls and an offset into the bitmap +// PutSpaced takes a slice of values along with a bitmap that describes the nulls and an offset into the bitmap // in order to write spaced data to the encoder. -func (enc DeltaBitPackInt32Encoder) PutSpaced(in []int32, validBits []byte, validBitsOffset int64) { +func (enc *deltaBitPackEncoder[T]) PutSpaced(in []T, validBits []byte, validBitsOffset int64) { buffer := memory.NewResizableBuffer(enc.mem) - buffer.Reserve(arrow.Int32Traits.BytesRequired(len(in))) + dt := arrow.GetDataType[T]().(arrow.FixedWidthDataType) + buffer.Reserve(dt.Bytes() * len(in)) defer buffer.Release() - data := arrow.Int32Traits.CastFromBytes(buffer.Buf()) + data := arrow.GetData[T](buffer.Buf()) nvalid := spacedCompress(in, data, validBits, validBitsOffset) enc.Put(data[:nvalid]) } -// Type returns the underlying physical type this encoder works with, in this case Int32 -func (DeltaBitPackInt32Encoder) Type() parquet.Type { - return parquet.Types.Int32 -} - -// DeltaBitPackInt32Encoder is an encoder for the delta bitpacking encoding for int32 data. -type DeltaBitPackInt64Encoder struct { - *deltaBitPackEncoder -} - -// Put writes the values from the provided slice of int64 to the encoder -func (enc DeltaBitPackInt64Encoder) Put(in []int64) { - enc.putInternal(in) +// Type returns the underlying physical type this encoder works with +func (dec *deltaBitPackEncoder[T]) Type() parquet.Type { + switch v := any(dec).(type) { + case *deltaBitPackEncoder[int32]: + return parquet.Types.Int32 + case *deltaBitPackEncoder[int64]: + return parquet.Types.Int64 + default: + panic(fmt.Sprintf("deltaBitPackEncoder is not supported for type: %T", v)) + } } -// PutSpaced takes a slice of int64 along with a bitmap that describes the nulls and an offset into the bitmap -// in order to write spaced data to the encoder. -func (enc DeltaBitPackInt64Encoder) PutSpaced(in []int64, validBits []byte, validBitsOffset int64) { - buffer := memory.NewResizableBuffer(enc.mem) - buffer.Reserve(arrow.Int64Traits.BytesRequired(len(in))) - defer buffer.Release() +// DeltaBitPackInt32Encoder is an encoder for the delta bitpacking encoding for Int32 data. +type DeltaBitPackInt32Encoder = deltaBitPackEncoder[int32] - data := arrow.Int64Traits.CastFromBytes(buffer.Buf()) - nvalid := spacedCompress(in, data, validBits, validBitsOffset) - enc.Put(data[:nvalid]) -} - -// Type returns the underlying physical type this encoder works with, in this case Int64 -func (DeltaBitPackInt64Encoder) Type() parquet.Type { - return parquet.Types.Int64 -} +// DeltaBitPackInt64Encoder is an encoder for the delta bitpacking encoding for Int64 data. +type DeltaBitPackInt64Encoder = deltaBitPackEncoder[int64] diff --git a/go/parquet/internal/encoding/delta_byte_array.go b/go/parquet/internal/encoding/delta_byte_array.go index d8c9fb92c65ef..62c8d08999972 100644 --- a/go/parquet/internal/encoding/delta_byte_array.go +++ b/go/parquet/internal/encoding/delta_byte_array.go @@ -17,9 +17,9 @@ package encoding import ( - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" "golang.org/x/xerrors" ) @@ -53,11 +53,14 @@ func (enc *DeltaByteArrayEncoder) EstimatedDataEncodedSize() int64 { func (enc *DeltaByteArrayEncoder) initEncoders() { enc.prefixEncoder = &DeltaBitPackInt32Encoder{ - deltaBitPackEncoder: &deltaBitPackEncoder{encoder: newEncoderBase(enc.encoding, nil, enc.mem)}} + encoder: newEncoderBase(enc.encoding, nil, enc.mem), + } enc.suffixEncoder = &DeltaLengthByteArrayEncoder{ newEncoderBase(enc.encoding, nil, enc.mem), &DeltaBitPackInt32Encoder{ - deltaBitPackEncoder: &deltaBitPackEncoder{encoder: newEncoderBase(enc.encoding, nil, enc.mem)}}} + encoder: newEncoderBase(enc.encoding, nil, enc.mem), + }, + } } // Type returns the underlying physical type this operates on, in this case ByteArrays only @@ -160,9 +163,9 @@ func (d *DeltaByteArrayDecoder) Allocator() memory.Allocator { return d.mem } // blocks of suffix data in order to initialize the decoder. func (d *DeltaByteArrayDecoder) SetData(nvalues int, data []byte) error { prefixLenDec := DeltaBitPackInt32Decoder{ - deltaBitPackDecoder: &deltaBitPackDecoder{ - decoder: newDecoderBase(d.encoding, d.descr), - mem: d.mem}} + decoder: newDecoderBase(d.encoding, d.descr), + mem: d.mem, + } if err := prefixLenDec.SetData(nvalues, data); err != nil { return err diff --git a/go/parquet/internal/encoding/delta_byte_array_test.go b/go/parquet/internal/encoding/delta_byte_array_test.go index c2e4e6849396e..ec344cbecf845 100644 --- a/go/parquet/internal/encoding/delta_byte_array_test.go +++ b/go/parquet/internal/encoding/delta_byte_array_test.go @@ -20,8 +20,8 @@ import ( "fmt" "testing" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/internal/encoding/delta_length_byte_array.go b/go/parquet/internal/encoding/delta_length_byte_array.go index eb7a74ecdf117..87c48d574ed68 100644 --- a/go/parquet/internal/encoding/delta_length_byte_array.go +++ b/go/parquet/internal/encoding/delta_length_byte_array.go @@ -17,9 +17,9 @@ package encoding import ( - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" "golang.org/x/xerrors" ) @@ -110,9 +110,9 @@ func (d *DeltaLengthByteArrayDecoder) Allocator() memory.Allocator { return d.me // followed by the rest of the byte array data immediately after. func (d *DeltaLengthByteArrayDecoder) SetData(nvalues int, data []byte) error { dec := DeltaBitPackInt32Decoder{ - deltaBitPackDecoder: &deltaBitPackDecoder{ - decoder: newDecoderBase(d.encoding, d.descr), - mem: d.mem}} + decoder: newDecoderBase(d.encoding, d.descr), + mem: d.mem, + } if err := dec.SetData(nvalues, data); err != nil { return err diff --git a/go/parquet/internal/encoding/encoder.go b/go/parquet/internal/encoding/encoder.go index 74a6d8dac6566..2373449370f23 100644 --- a/go/parquet/internal/encoding/encoder.go +++ b/go/parquet/internal/encoding/encoder.go @@ -21,14 +21,14 @@ import ( "math/bits" "reflect" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/bitutils" - "github.com/apache/arrow/go/v17/parquet" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/utils" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/bitutils" + "github.com/apache/arrow/go/v18/parquet" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/utils" + "github.com/apache/arrow/go/v18/parquet/schema" ) //go:generate go run ../../../arrow/_tools/tmpl/main.go -i -data=physical_types.tmpldata plain_encoder_types.gen.go.tmpl typed_encoder.gen.go.tmpl diff --git a/go/parquet/internal/encoding/encoding_benchmarks_test.go b/go/parquet/internal/encoding/encoding_benchmarks_test.go index 0252aa4801430..2ca414eec6b90 100644 --- a/go/parquet/internal/encoding/encoding_benchmarks_test.go +++ b/go/parquet/internal/encoding/encoding_benchmarks_test.go @@ -21,14 +21,14 @@ import ( "math" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/hashing" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - "github.com/apache/arrow/go/v17/parquet/internal/testutils" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/hashing" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/parquet/internal/testutils" + "github.com/apache/arrow/go/v18/parquet/schema" ) const ( @@ -464,3 +464,218 @@ func BenchmarkDecodeDictByteArray(b *testing.B) { dictDec.Decode(out) } } + +func BenchmarkByteStreamSplitEncodingInt32(b *testing.B) { + for sz := MINSIZE; sz < MAXSIZE+1; sz *= 2 { + b.Run(fmt.Sprintf("len %d", sz), func(b *testing.B) { + values := make([]int32, sz) + for idx := range values { + values[idx] = 64 + } + encoder := encoding.NewEncoder(parquet.Types.Int32, parquet.Encodings.ByteStreamSplit, + false, nil, memory.DefaultAllocator).(encoding.Int32Encoder) + b.ResetTimer() + b.SetBytes(int64(len(values) * arrow.Int32SizeBytes)) + for n := 0; n < b.N; n++ { + encoder.Put(values) + buf, _ := encoder.FlushValues() + buf.Release() + } + }) + } +} + +func BenchmarkByteStreamSplitDecodingInt32(b *testing.B) { + for sz := MINSIZE; sz < MAXSIZE+1; sz *= 2 { + b.Run(fmt.Sprintf("len %d", sz), func(b *testing.B) { + output := make([]int32, sz) + values := make([]int32, sz) + for idx := range values { + values[idx] = 64 + } + encoder := encoding.NewEncoder(parquet.Types.Int32, parquet.Encodings.ByteStreamSplit, + false, nil, memory.DefaultAllocator).(encoding.Int32Encoder) + encoder.Put(values) + buf, _ := encoder.FlushValues() + defer buf.Release() + + decoder := encoding.NewDecoder(parquet.Types.Int32, parquet.Encodings.ByteStreamSplit, nil, memory.DefaultAllocator) + b.ResetTimer() + b.SetBytes(int64(len(values) * arrow.Int32SizeBytes)) + for n := 0; n < b.N; n++ { + decoder.SetData(sz, buf.Bytes()) + decoder.(encoding.Int32Decoder).Decode(output) + } + }) + } +} + +func BenchmarkByteStreamSplitDecodingInt32Batched(b *testing.B) { + const batchSize = 512 + for sz := MINSIZE; sz < MAXSIZE+1; sz *= 2 { + b.Run(fmt.Sprintf("len %d", sz), func(b *testing.B) { + output := make([]int32, sz) + values := make([]int32, sz) + for idx := range values { + values[idx] = 64 + } + encoder := encoding.NewEncoder(parquet.Types.Int32, parquet.Encodings.ByteStreamSplit, + false, nil, memory.DefaultAllocator).(encoding.Int32Encoder) + encoder.Put(values) + buf, _ := encoder.FlushValues() + defer buf.Release() + + decoder := encoding.NewDecoder(parquet.Types.Int32, parquet.Encodings.ByteStreamSplit, nil, memory.DefaultAllocator) + b.ResetTimer() + b.SetBytes(int64(len(values) * arrow.Int32SizeBytes)) + for n := 0; n < b.N; n++ { + decoder.SetData(sz, buf.Bytes()) + for batch := 0; batch*batchSize < sz; batch++ { + offset := batch * batchSize + decoder.(encoding.Int32Decoder).Decode(output[offset : offset+batchSize]) + } + } + }) + } +} + +func BenchmarkByteStreamSplitEncodingInt64(b *testing.B) { + for sz := MINSIZE; sz < MAXSIZE+1; sz *= 2 { + b.Run(fmt.Sprintf("len %d", sz), func(b *testing.B) { + values := make([]int64, sz) + for idx := range values { + values[idx] = 64 + } + encoder := encoding.NewEncoder(parquet.Types.Int64, parquet.Encodings.ByteStreamSplit, + false, nil, memory.DefaultAllocator).(encoding.Int64Encoder) + b.ResetTimer() + b.SetBytes(int64(len(values) * arrow.Int64SizeBytes)) + for n := 0; n < b.N; n++ { + encoder.Put(values) + buf, _ := encoder.FlushValues() + buf.Release() + } + }) + } +} + +func BenchmarkByteStreamSplitDecodingInt64(b *testing.B) { + for sz := MINSIZE; sz < MAXSIZE+1; sz *= 2 { + b.Run(fmt.Sprintf("len %d", sz), func(b *testing.B) { + output := make([]int64, sz) + values := make([]int64, sz) + for idx := range values { + values[idx] = 64 + } + encoder := encoding.NewEncoder(parquet.Types.Int64, parquet.Encodings.ByteStreamSplit, + false, nil, memory.DefaultAllocator).(encoding.Int64Encoder) + encoder.Put(values) + buf, _ := encoder.FlushValues() + defer buf.Release() + + decoder := encoding.NewDecoder(parquet.Types.Int64, parquet.Encodings.ByteStreamSplit, nil, memory.DefaultAllocator) + b.ResetTimer() + b.SetBytes(int64(len(values) * arrow.Int64SizeBytes)) + for n := 0; n < b.N; n++ { + decoder.SetData(sz, buf.Bytes()) + decoder.(encoding.Int64Decoder).Decode(output) + } + }) + } +} + +func BenchmarkByteStreamSplitEncodingFixedLenByteArray(b *testing.B) { + for sz := MINSIZE; sz < MAXSIZE+1; sz *= 2 { + b.Run(fmt.Sprintf("len %d", sz), func(b *testing.B) { + values := make([]parquet.FixedLenByteArray, sz) + for idx := range values { + values[idx] = []byte{0x12, 0x34, 0x56, 0x78} + } + + arraySize := len(values[0]) + col := schema.NewColumn(schema.NewFixedLenByteArrayNode("fixedlenbytearray", parquet.Repetitions.Required, int32(arraySize), -1), 0, 0) + encoder := encoding.NewEncoder(parquet.Types.FixedLenByteArray, parquet.Encodings.ByteStreamSplit, + false, col, memory.DefaultAllocator).(encoding.FixedLenByteArrayEncoder) + b.ResetTimer() + b.SetBytes(int64(len(values) * arraySize)) + for n := 0; n < b.N; n++ { + encoder.Put(values) + buf, _ := encoder.FlushValues() + buf.Release() + } + }) + } +} + +func BenchmarkByteStreamSplitDecodingFixedLenByteArray(b *testing.B) { + for sz := MINSIZE; sz < MAXSIZE+1; sz *= 2 { + b.Run(fmt.Sprintf("len %d", sz), func(b *testing.B) { + output := make([]parquet.FixedLenByteArray, sz) + values := make([]parquet.FixedLenByteArray, sz) + for idx := range values { + values[idx] = []byte{0x12, 0x34, 0x56, 0x78} + } + + arraySize := len(values[0]) + col := schema.NewColumn(schema.NewFixedLenByteArrayNode("fixedlenbytearray", parquet.Repetitions.Required, int32(arraySize), -1), 0, 0) + encoder := encoding.NewEncoder(parquet.Types.FixedLenByteArray, parquet.Encodings.ByteStreamSplit, + false, col, memory.DefaultAllocator).(encoding.FixedLenByteArrayEncoder) + encoder.Put(values) + buf, _ := encoder.FlushValues() + defer buf.Release() + + decoder := encoding.NewDecoder(parquet.Types.FixedLenByteArray, parquet.Encodings.ByteStreamSplit, col, memory.DefaultAllocator) + b.ResetTimer() + b.SetBytes(int64(len(values) * arraySize)) + for n := 0; n < b.N; n++ { + decoder.SetData(sz, buf.Bytes()) + decoder.(encoding.FixedLenByteArrayDecoder).Decode(output) + } + }) + } +} + +func BenchmarkDeltaBinaryPackedEncodingInt32(b *testing.B) { + for sz := MINSIZE; sz < MAXSIZE+1; sz *= 2 { + b.Run(fmt.Sprintf("len %d", sz), func(b *testing.B) { + values := make([]int32, sz) + for idx := range values { + values[idx] = 64 + } + encoder := encoding.NewEncoder(parquet.Types.Int32, parquet.Encodings.DeltaBinaryPacked, + false, nil, memory.DefaultAllocator).(encoding.Int32Encoder) + b.ResetTimer() + b.SetBytes(int64(len(values) * arrow.Int32SizeBytes)) + for n := 0; n < b.N; n++ { + encoder.Put(values) + buf, _ := encoder.FlushValues() + buf.Release() + } + }) + } +} + +func BenchmarkDeltaBinaryPackedDecodingInt32(b *testing.B) { + for sz := MINSIZE; sz < MAXSIZE+1; sz *= 2 { + b.Run(fmt.Sprintf("len %d", sz), func(b *testing.B) { + output := make([]int32, sz) + values := make([]int32, sz) + for idx := range values { + values[idx] = 64 + } + encoder := encoding.NewEncoder(parquet.Types.Int32, parquet.Encodings.DeltaBinaryPacked, + false, nil, memory.DefaultAllocator).(encoding.Int32Encoder) + encoder.Put(values) + buf, _ := encoder.FlushValues() + defer buf.Release() + + decoder := encoding.NewDecoder(parquet.Types.Int32, parquet.Encodings.DeltaBinaryPacked, nil, memory.DefaultAllocator) + b.ResetTimer() + b.SetBytes(int64(len(values) * arrow.Int32SizeBytes)) + for n := 0; n < b.N; n++ { + decoder.SetData(sz, buf.Bytes()) + decoder.(encoding.Int32Decoder).Decode(output) + } + }) + } +} diff --git a/go/parquet/internal/encoding/encoding_test.go b/go/parquet/internal/encoding/encoding_test.go index f2d1e31236adf..4d681eaf02307 100644 --- a/go/parquet/internal/encoding/encoding_test.go +++ b/go/parquet/internal/encoding/encoding_test.go @@ -26,13 +26,13 @@ import ( "testing" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - "github.com/apache/arrow/go/v17/parquet/internal/testutils" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/parquet/internal/testutils" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" @@ -406,6 +406,17 @@ func (b *BaseEncodingTestSuite) TestDeltaByteArrayRoundTrip() { } } +func (b *BaseEncodingTestSuite) TestByteStreamSplitRoundTrip() { + b.initData(10000, 1) + + switch b.typ { + case reflect.TypeOf(float32(0)), reflect.TypeOf(float64(0)), reflect.TypeOf(int32(0)), reflect.TypeOf(int64(0)), reflect.TypeOf(parquet.FixedLenByteArray{}): + b.checkRoundTrip(parquet.Encodings.ByteStreamSplit) + default: + b.Panics(func() { b.checkRoundTrip(parquet.Encodings.ByteStreamSplit) }) + } +} + func (b *BaseEncodingTestSuite) TestSpacedRoundTrip() { exec := func(vals, repeats int, validBitsOffset int64, nullProb float64) { b.Run(fmt.Sprintf("%d vals %d repeats %d offset %0.3f null", vals, repeats, validBitsOffset, 1-nullProb), func() { diff --git a/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go b/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go index ceb9f5a2e4a22..7e319845a8089 100644 --- a/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go +++ b/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go @@ -17,10 +17,11 @@ package encoding import ( + "fmt" "math" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" "golang.org/x/xerrors" ) @@ -64,3 +65,73 @@ func (pflba *PlainFixedLenByteArrayDecoder) DecodeSpaced(out []parquet.FixedLenB return spacedExpand(out, nullCount, validBits, validBitsOffset), nil } + +// ByteStreamSplitFixedLenByteArrayDecoder is a decoder for BYTE_STREAM_SPLIT-encoded +// bytes representing FixedLenByteArray values +type ByteStreamSplitFixedLenByteArrayDecoder struct { + decoder + stride int +} + +func (dec *ByteStreamSplitFixedLenByteArrayDecoder) Type() parquet.Type { + return parquet.Types.FixedLenByteArray +} + +func (dec *ByteStreamSplitFixedLenByteArrayDecoder) SetData(nvals int, data []byte) error { + if nvals*dec.typeLen < len(data) { + return fmt.Errorf("data size (%d) is too small for the number of values in in BYTE_STREAM_SPLIT (%d)", len(data), nvals) + } + + if len(data)%dec.typeLen != 0 { + return fmt.Errorf("ByteStreamSplit data size %d not aligned with type %s and byte_width: %d", len(data), dec.Type(), dec.typeLen) + } + + nvals = len(data) / dec.typeLen + dec.stride = nvals + + return dec.decoder.SetData(nvals, data) +} + +func (dec *ByteStreamSplitFixedLenByteArrayDecoder) Decode(out []parquet.FixedLenByteArray) (int, error) { + toRead := len(out) + numBytesNeeded := toRead * dec.typeLen + if numBytesNeeded > len(dec.data) || numBytesNeeded > math.MaxInt32 { + return 0, xerrors.New("parquet: eof exception") + } + + for i := range out { + if cap(out[i]) < dec.typeLen { + out[i] = make(parquet.FixedLenByteArray, dec.typeLen) + } else { + out[i] = out[i][:dec.typeLen] + } + } + + switch dec.typeLen { + case 2: + decodeByteStreamSplitBatchFLBAWidth2(dec.data, toRead, dec.stride, out) + case 4: + decodeByteStreamSplitBatchFLBAWidth4(dec.data, toRead, dec.stride, out) + case 8: + decodeByteStreamSplitBatchFLBAWidth8(dec.data, toRead, dec.stride, out) + default: + decodeByteStreamSplitBatchFLBA(dec.data, toRead, dec.stride, dec.typeLen, out) + } + + dec.nvals -= toRead + dec.data = dec.data[toRead:] + return toRead, nil +} + +func (dec *ByteStreamSplitFixedLenByteArrayDecoder) DecodeSpaced(out []parquet.FixedLenByteArray, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { + toRead := len(out) - nullCount + valuesRead, err := dec.Decode(out[:toRead]) + if err != nil { + return valuesRead, err + } + if valuesRead != toRead { + return valuesRead, xerrors.New("parquet: number of values / definitions levels read did not match") + } + + return spacedExpand(out, nullCount, validBits, validBitsOffset), nil +} diff --git a/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go b/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go index 1cdb3c84d9212..9e6377db868f1 100644 --- a/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go +++ b/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go @@ -19,9 +19,9 @@ package encoding import ( "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/internal/bitutils" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/internal/bitutils" + "github.com/apache/arrow/go/v18/parquet" ) // PlainFixedLenByteArrayEncoder writes the raw bytes of the byte array @@ -75,6 +75,45 @@ func (PlainFixedLenByteArrayEncoder) Type() parquet.Type { return parquet.Types.FixedLenByteArray } +// ByteStreamSplitFixedLenByteArrayEncoder writes the underlying bytes of the FixedLenByteArray +// into interlaced streams as defined by the BYTE_STREAM_SPLIT encoding +type ByteStreamSplitFixedLenByteArrayEncoder struct { + PlainFixedLenByteArrayEncoder + flushBuffer *PooledBufferWriter +} + +func (enc *ByteStreamSplitFixedLenByteArrayEncoder) FlushValues() (Buffer, error) { + in, err := enc.PlainFixedLenByteArrayEncoder.FlushValues() + if err != nil { + return nil, err + } + + if enc.flushBuffer == nil { + enc.flushBuffer = NewPooledBufferWriter(in.Len()) + } + + enc.flushBuffer.buf.ResizeNoShrink(in.Len()) + + switch enc.typeLen { + case 2: + encodeByteStreamSplitWidth2(enc.flushBuffer.Bytes(), in.Bytes()) + case 4: + encodeByteStreamSplitWidth4(enc.flushBuffer.Bytes(), in.Bytes()) + case 8: + encodeByteStreamSplitWidth8(enc.flushBuffer.Bytes(), in.Bytes()) + default: + encodeByteStreamSplit(enc.flushBuffer.Bytes(), in.Bytes(), enc.typeLen) + } + + return enc.flushBuffer.Finish(), nil +} + +func (enc *ByteStreamSplitFixedLenByteArrayEncoder) Release() { + enc.PlainFixedLenByteArrayEncoder.Release() + releaseBufferToPool(enc.flushBuffer) + enc.flushBuffer = nil +} + // WriteDict overrides the embedded WriteDict function to call a specialized function // for copying out the Fixed length values from the dictionary more efficiently. func (enc *DictFixedLenByteArrayEncoder) WriteDict(out []byte) { diff --git a/go/parquet/internal/encoding/levels.go b/go/parquet/internal/encoding/levels.go index d7ee657b34f6c..81c9011c78e1e 100644 --- a/go/parquet/internal/encoding/levels.go +++ b/go/parquet/internal/encoding/levels.go @@ -24,11 +24,11 @@ import ( "math/bits" "github.com/JohnCGriffin/overflow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - shared_utils "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/utils" + "github.com/apache/arrow/go/v18/arrow/bitutil" + shared_utils "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/utils" ) // LevelEncoder is for handling the encoding of Definition and Repetition levels diff --git a/go/parquet/internal/encoding/levels_test.go b/go/parquet/internal/encoding/levels_test.go index cce2cbe1ee495..1990df90a0195 100644 --- a/go/parquet/internal/encoding/levels_test.go +++ b/go/parquet/internal/encoding/levels_test.go @@ -21,11 +21,11 @@ import ( "strconv" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/internal/encoding/memo_table.go b/go/parquet/internal/encoding/memo_table.go index 7cf073cf910f6..117ca85346d57 100644 --- a/go/parquet/internal/encoding/memo_table.go +++ b/go/parquet/internal/encoding/memo_table.go @@ -20,11 +20,11 @@ import ( "math" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/hashing" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/hashing" + "github.com/apache/arrow/go/v18/parquet" ) //go:generate go run ../../../arrow/_tools/tmpl/main.go -i -data=physical_types.tmpldata memo_table_types.gen.go.tmpl diff --git a/go/parquet/internal/encoding/memo_table_test.go b/go/parquet/internal/encoding/memo_table_test.go index 9032872502871..904502cafc193 100644 --- a/go/parquet/internal/encoding/memo_table_test.go +++ b/go/parquet/internal/encoding/memo_table_test.go @@ -20,11 +20,11 @@ import ( "math" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/hashing" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/hashing" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" "github.com/stretchr/testify/suite" ) diff --git a/go/parquet/internal/encoding/memo_table_types.gen.go b/go/parquet/internal/encoding/memo_table_types.gen.go index 6d8d86d780fae..1169afc455ff5 100644 --- a/go/parquet/internal/encoding/memo_table_types.gen.go +++ b/go/parquet/internal/encoding/memo_table_types.gen.go @@ -19,8 +19,8 @@ package encoding import ( - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" ) // standard map based implementation of memo tables which can be more efficient diff --git a/go/parquet/internal/encoding/memo_table_types.gen.go.tmpl b/go/parquet/internal/encoding/memo_table_types.gen.go.tmpl index 3912c3eeaa87b..9708b0b97e527 100644 --- a/go/parquet/internal/encoding/memo_table_types.gen.go.tmpl +++ b/go/parquet/internal/encoding/memo_table_types.gen.go.tmpl @@ -17,7 +17,7 @@ package encoding import ( - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/parquet" ) // standard map based implementation of memo tables which can be more efficient diff --git a/go/parquet/internal/encoding/plain_encoder_types.gen.go b/go/parquet/internal/encoding/plain_encoder_types.gen.go index 34e8bf540ce06..b651fe2d71864 100644 --- a/go/parquet/internal/encoding/plain_encoder_types.gen.go +++ b/go/parquet/internal/encoding/plain_encoder_types.gen.go @@ -24,11 +24,11 @@ import ( "fmt" "math" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/internal/bitutils" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/internal/bitutils" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" "golang.org/x/xerrors" ) diff --git a/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl b/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl index bc5cebd698188..1f2bc047464ea 100644 --- a/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl +++ b/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl @@ -20,10 +20,10 @@ import ( "encoding/binary" "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/internal/bitutils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/internal/bitutils" ) var ( diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go b/go/parquet/internal/encoding/typed_encoder.gen.go index 0c473a989ef71..e67c976adc042 100644 --- a/go/parquet/internal/encoding/typed_encoder.gen.go +++ b/go/parquet/internal/encoding/typed_encoder.gen.go @@ -22,15 +22,15 @@ import ( "fmt" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/bitutils" - shared_utils "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/utils" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/bitutils" + shared_utils "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/utils" + "github.com/apache/arrow/go/v18/parquet/schema" "golang.org/x/xerrors" ) @@ -86,8 +86,11 @@ func (int32EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema case format.Encoding_PLAIN: return &PlainInt32Encoder{encoder: newEncoderBase(e, descr, mem)} case format.Encoding_DELTA_BINARY_PACKED: - return DeltaBitPackInt32Encoder{&deltaBitPackEncoder{ - encoder: newEncoderBase(e, descr, mem)}} + return &DeltaBitPackInt32Encoder{ + encoder: newEncoderBase(e, descr, mem), + } + case format.Encoding_BYTE_STREAM_SPLIT: + return &ByteStreamSplitInt32Encoder{PlainInt32Encoder: PlainInt32Encoder{encoder: newEncoderBase(e, descr, mem)}} default: panic("unimplemented encoding type") } @@ -116,10 +119,11 @@ func (int32DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useD mem = memory.DefaultAllocator } return &DeltaBitPackInt32Decoder{ - deltaBitPackDecoder: &deltaBitPackDecoder{ - decoder: newDecoderBase(format.Encoding(e), descr), - mem: mem, - }} + decoder: newDecoderBase(format.Encoding(e), descr), + mem: mem, + } + case parquet.Encodings.ByteStreamSplit: + return &ByteStreamSplitInt32Decoder{decoder: newDecoderBase(format.Encoding(e), descr)} default: panic("unimplemented encoding type") } @@ -323,8 +327,11 @@ func (int64EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema case format.Encoding_PLAIN: return &PlainInt64Encoder{encoder: newEncoderBase(e, descr, mem)} case format.Encoding_DELTA_BINARY_PACKED: - return DeltaBitPackInt64Encoder{&deltaBitPackEncoder{ - encoder: newEncoderBase(e, descr, mem)}} + return &DeltaBitPackInt64Encoder{ + encoder: newEncoderBase(e, descr, mem), + } + case format.Encoding_BYTE_STREAM_SPLIT: + return &ByteStreamSplitInt64Encoder{PlainInt64Encoder: PlainInt64Encoder{encoder: newEncoderBase(e, descr, mem)}} default: panic("unimplemented encoding type") } @@ -353,10 +360,11 @@ func (int64DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useD mem = memory.DefaultAllocator } return &DeltaBitPackInt64Decoder{ - deltaBitPackDecoder: &deltaBitPackDecoder{ - decoder: newDecoderBase(format.Encoding(e), descr), - mem: mem, - }} + decoder: newDecoderBase(format.Encoding(e), descr), + mem: mem, + } + case parquet.Encodings.ByteStreamSplit: + return &ByteStreamSplitInt64Decoder{decoder: newDecoderBase(format.Encoding(e), descr)} default: panic("unimplemented encoding type") } @@ -774,6 +782,8 @@ func (float32EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *sche switch e { case format.Encoding_PLAIN: return &PlainFloat32Encoder{encoder: newEncoderBase(e, descr, mem)} + case format.Encoding_BYTE_STREAM_SPLIT: + return &ByteStreamSplitFloat32Encoder{PlainFloat32Encoder: PlainFloat32Encoder{encoder: newEncoderBase(e, descr, mem)}} default: panic("unimplemented encoding type") } @@ -797,6 +807,8 @@ func (float32DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, us switch e { case parquet.Encodings.Plain: return &PlainFloat32Decoder{decoder: newDecoderBase(format.Encoding(e), descr)} + case parquet.Encodings.ByteStreamSplit: + return &ByteStreamSplitFloat32Decoder{decoder: newDecoderBase(format.Encoding(e), descr)} default: panic("unimplemented encoding type") } @@ -999,6 +1011,8 @@ func (float64EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *sche switch e { case format.Encoding_PLAIN: return &PlainFloat64Encoder{encoder: newEncoderBase(e, descr, mem)} + case format.Encoding_BYTE_STREAM_SPLIT: + return &ByteStreamSplitFloat64Encoder{PlainFloat64Encoder: PlainFloat64Encoder{encoder: newEncoderBase(e, descr, mem)}} default: panic("unimplemented encoding type") } @@ -1022,6 +1036,8 @@ func (float64DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, us switch e { case parquet.Encodings.Plain: return &PlainFloat64Decoder{decoder: newDecoderBase(format.Encoding(e), descr)} + case parquet.Encodings.ByteStreamSplit: + return &ByteStreamSplitFloat64Decoder{decoder: newDecoderBase(format.Encoding(e), descr)} default: panic("unimplemented encoding type") } @@ -1290,7 +1306,8 @@ func (byteArrayEncoderTraits) Encoder(e format.Encoding, useDict bool, descr *sc return &DeltaLengthByteArrayEncoder{ encoder: newEncoderBase(e, descr, mem), lengthEncoder: &DeltaBitPackInt32Encoder{ - &deltaBitPackEncoder{encoder: newEncoderBase(e, descr, mem)}}, + encoder: newEncoderBase(e, descr, mem), + }, } case format.Encoding_DELTA_BYTE_ARRAY: return &DeltaByteArrayEncoder{ @@ -1492,6 +1509,8 @@ func (fixedLenByteArrayEncoderTraits) Encoder(e format.Encoding, useDict bool, d switch e { case format.Encoding_PLAIN: return &PlainFixedLenByteArrayEncoder{encoder: newEncoderBase(e, descr, mem)} + case format.Encoding_BYTE_STREAM_SPLIT: + return &ByteStreamSplitFixedLenByteArrayEncoder{PlainFixedLenByteArrayEncoder: PlainFixedLenByteArrayEncoder{encoder: newEncoderBase(e, descr, mem)}} default: panic("unimplemented encoding type") } @@ -1515,6 +1534,8 @@ func (fixedLenByteArrayDecoderTraits) Decoder(e parquet.Encoding, descr *schema. switch e { case parquet.Encodings.Plain: return &PlainFixedLenByteArrayDecoder{decoder: newDecoderBase(format.Encoding(e), descr)} + case parquet.Encodings.ByteStreamSplit: + return &ByteStreamSplitFixedLenByteArrayDecoder{decoder: newDecoderBase(format.Encoding(e), descr)} default: panic("unimplemented encoding type") } diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl index 57d7e641fb5df..601d90712baa6 100644 --- a/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl +++ b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl @@ -17,13 +17,13 @@ package encoding import ( - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/schema" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/parquet/internal/utils" - shared_utils "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/internal/bitutils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/schema" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/parquet/internal/utils" + shared_utils "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/internal/bitutils" ) // fully typed encoder interfaces to enable writing against encoder/decoders @@ -79,20 +79,26 @@ func ({{.lower}}EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *s {{- end}} {{- if or (eq .Name "Int32") (eq .Name "Int64")}} case format.Encoding_DELTA_BINARY_PACKED: - return DeltaBitPack{{.Name}}Encoder{&deltaBitPackEncoder{ - encoder: newEncoderBase(e, descr, mem)}} + return &DeltaBitPack{{.Name}}Encoder{ + encoder: newEncoderBase(e, descr, mem), + } {{- end}} {{- if eq .Name "ByteArray"}} case format.Encoding_DELTA_LENGTH_BYTE_ARRAY: return &DeltaLengthByteArrayEncoder{ encoder: newEncoderBase(e, descr, mem), lengthEncoder: &DeltaBitPackInt32Encoder{ - &deltaBitPackEncoder{encoder: newEncoderBase(e, descr, mem)}}, + encoder: newEncoderBase(e, descr, mem), + }, } case format.Encoding_DELTA_BYTE_ARRAY: return &DeltaByteArrayEncoder{ encoder: newEncoderBase(e, descr, mem), } +{{- end}} +{{- if or (eq .Name "FixedLenByteArray") (eq .Name "Float32") (eq .Name "Float64") (eq .Name "Int32") (eq .Name "Int64")}} + case format.Encoding_BYTE_STREAM_SPLIT: + return &ByteStreamSplit{{.Name}}Encoder{Plain{{.Name}}Encoder: Plain{{.Name}}Encoder{encoder: newEncoderBase(e,descr,mem)}} {{- end}} default: panic("unimplemented encoding type") @@ -131,10 +137,9 @@ func ({{.lower}}DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, mem = memory.DefaultAllocator } return &DeltaBitPack{{.Name}}Decoder{ - deltaBitPackDecoder: &deltaBitPackDecoder{ - decoder: newDecoderBase(format.Encoding(e), descr), - mem: mem, - }} + decoder: newDecoderBase(format.Encoding(e), descr), + mem: mem, + } {{- end}} {{- if eq .Name "ByteArray"}} case parquet.Encodings.DeltaLengthByteArray: @@ -154,6 +159,10 @@ func ({{.lower}}DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, decoder: newDecoderBase(format.Encoding(e), descr), mem: mem, }} +{{- end}} +{{- if or (eq .Name "FixedLenByteArray") (eq .Name "Float32") (eq .Name "Float64") (eq .Name "Int32") (eq .Name "Int64")}} + case parquet.Encodings.ByteStreamSplit: + return &ByteStreamSplit{{.Name}}Decoder{decoder: newDecoderBase(format.Encoding(e), descr)} {{- end}} default: panic("unimplemented encoding type") diff --git a/go/parquet/internal/encoding/types.go b/go/parquet/internal/encoding/types.go index 2d7a5d6b1d166..fb81ba8729cca 100644 --- a/go/parquet/internal/encoding/types.go +++ b/go/parquet/internal/encoding/types.go @@ -20,11 +20,11 @@ import ( "io" "sync" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" "golang.org/x/xerrors" ) @@ -187,7 +187,7 @@ func (b *PooledBufferWriter) Reserve(nbytes int) { newCap := utils.Max(b.buf.Cap(), 256) for newCap < b.pos+nbytes { - newCap = bitutil.NextPowerOf2(newCap) + newCap = bitutil.NextPowerOf2(b.pos + nbytes) } b.buf.Reserve(newCap) } @@ -380,9 +380,9 @@ func (b *BufferWriter) Reserve(nbytes int) { if b.buffer == nil { b.buffer = memory.NewResizableBuffer(b.mem) } - newCap := utils.Max(b.buffer.Cap()+b.offset, 256) - for newCap < b.pos+nbytes+b.offset { - newCap = bitutil.NextPowerOf2(newCap) + newCap := utils.Max(b.buffer.Cap(), 256) + for newCap < b.pos+nbytes { + newCap = bitutil.NextPowerOf2(b.pos + nbytes) } b.buffer.Reserve(newCap) } diff --git a/go/parquet/internal/encryption/aes.go b/go/parquet/internal/encryption/aes.go index c613e4a02e3a8..1e861ffd2a1d0 100644 --- a/go/parquet/internal/encryption/aes.go +++ b/go/parquet/internal/encryption/aes.go @@ -29,7 +29,7 @@ import ( "fmt" "io" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/parquet" ) // important constants for handling the aes encryption diff --git a/go/parquet/internal/encryption/decryptor.go b/go/parquet/internal/encryption/decryptor.go index 6a28f6ed7b234..6af9a4aacfe15 100644 --- a/go/parquet/internal/encryption/decryptor.go +++ b/go/parquet/internal/encryption/decryptor.go @@ -19,8 +19,8 @@ package encryption import ( "io" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" ) // FileDecryptor is an interface used by the filereader for decrypting an diff --git a/go/parquet/internal/encryption/encryptor.go b/go/parquet/internal/encryption/encryptor.go index fd2860ef323c4..57ff0a4173cdf 100644 --- a/go/parquet/internal/encryption/encryptor.go +++ b/go/parquet/internal/encryption/encryptor.go @@ -19,8 +19,8 @@ package encryption import ( "io" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" ) // FileEncryptor is the interface for constructing encryptors for the different diff --git a/go/parquet/internal/testutils/pagebuilder.go b/go/parquet/internal/testutils/pagebuilder.go index cd1437638c485..e3b8ffccb341c 100644 --- a/go/parquet/internal/testutils/pagebuilder.go +++ b/go/parquet/internal/testutils/pagebuilder.go @@ -22,13 +22,13 @@ import ( "io" "reflect" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/compress" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/compress" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/stretchr/testify/mock" ) diff --git a/go/parquet/internal/testutils/primitive_typed.go b/go/parquet/internal/testutils/primitive_typed.go index 55366dc861352..d97677c54d727 100644 --- a/go/parquet/internal/testutils/primitive_typed.go +++ b/go/parquet/internal/testutils/primitive_typed.go @@ -20,11 +20,11 @@ import ( "fmt" "reflect" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/metadata" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/metadata" + "github.com/apache/arrow/go/v18/parquet/schema" ) type PrimitiveTypedTest struct { diff --git a/go/parquet/internal/testutils/random.go b/go/parquet/internal/testutils/random.go index 568d1a6b5f1df..1f4b1b7068beb 100644 --- a/go/parquet/internal/testutils/random.go +++ b/go/parquet/internal/testutils/random.go @@ -24,14 +24,14 @@ import ( "time" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/pqarrow" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/pqarrow" "golang.org/x/exp/rand" "gonum.org/v1/gonum/stat/distuv" diff --git a/go/parquet/internal/testutils/random_arrow.go b/go/parquet/internal/testutils/random_arrow.go index fe52932e51576..f9a199de77963 100644 --- a/go/parquet/internal/testutils/random_arrow.go +++ b/go/parquet/internal/testutils/random_arrow.go @@ -17,10 +17,10 @@ package testutils import ( - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/memory" "golang.org/x/exp/rand" ) diff --git a/go/parquet/internal/testutils/utils.go b/go/parquet/internal/testutils/utils.go index 057a055884561..823f7fbd07d1d 100644 --- a/go/parquet/internal/testutils/utils.go +++ b/go/parquet/internal/testutils/utils.go @@ -19,7 +19,7 @@ package testutils import ( "reflect" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/parquet" ) var typeToParquetTypeMap = map[reflect.Type]parquet.Type{ diff --git a/go/parquet/internal/thrift/helpers.go b/go/parquet/internal/thrift/helpers.go index e2600763dbf3c..f8b0f2170c45d 100644 --- a/go/parquet/internal/thrift/helpers.go +++ b/go/parquet/internal/thrift/helpers.go @@ -23,7 +23,7 @@ import ( "context" "io" - "github.com/apache/arrow/go/v17/parquet/internal/encryption" + "github.com/apache/arrow/go/v18/parquet/internal/encryption" "github.com/apache/thrift/lib/go/thrift" ) diff --git a/go/parquet/internal/utils/bit_benchmark_test.go b/go/parquet/internal/utils/bit_benchmark_test.go index 2227c22d3b4e5..d171e81e952fa 100644 --- a/go/parquet/internal/utils/bit_benchmark_test.go +++ b/go/parquet/internal/utils/bit_benchmark_test.go @@ -20,9 +20,9 @@ import ( "strconv" "testing" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/internal/bitutils" - "github.com/apache/arrow/go/v17/parquet/internal/testutils" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/internal/bitutils" + "github.com/apache/arrow/go/v18/parquet/internal/testutils" ) type linearBitRunReader struct { diff --git a/go/parquet/internal/utils/bit_packing_arm64.go b/go/parquet/internal/utils/bit_packing_arm64.go index a8d3a996d8c46..89a00b0c63b9d 100644 --- a/go/parquet/internal/utils/bit_packing_arm64.go +++ b/go/parquet/internal/utils/bit_packing_arm64.go @@ -23,7 +23,7 @@ import ( "github.com/klauspost/cpuid/v2" // import for side effect of initializing feature flags // based on ARM_ENABLE_EXT env var - _ "github.com/apache/arrow/go/v17/parquet/internal/bmi" + _ "github.com/apache/arrow/go/v18/parquet/internal/bmi" ) func init() { diff --git a/go/parquet/internal/utils/bit_reader.go b/go/parquet/internal/utils/bit_reader.go index d66968047adc8..2343b5500242c 100644 --- a/go/parquet/internal/utils/bit_reader.go +++ b/go/parquet/internal/utils/bit_reader.go @@ -24,10 +24,10 @@ import ( "reflect" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" ) // masks for grabbing the trailing bits based on the number of trailing bits desired diff --git a/go/parquet/internal/utils/bit_reader_test.go b/go/parquet/internal/utils/bit_reader_test.go index 24e4927ff8eca..5bb1c9a70190f 100644 --- a/go/parquet/internal/utils/bit_reader_test.go +++ b/go/parquet/internal/utils/bit_reader_test.go @@ -25,11 +25,11 @@ import ( "strconv" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet/internal/utils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet/internal/utils" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" "golang.org/x/exp/rand" diff --git a/go/parquet/internal/utils/bit_writer.go b/go/parquet/internal/utils/bit_writer.go index bb7d7a5e0c4fa..ab0cb3ce58445 100644 --- a/go/parquet/internal/utils/bit_writer.go +++ b/go/parquet/internal/utils/bit_writer.go @@ -21,7 +21,7 @@ import ( "io" "log" - "github.com/apache/arrow/go/v17/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/bitutil" ) // WriterAtBuffer is a convenience struct for providing a WriteAt function diff --git a/go/parquet/internal/utils/bitmap_writer.go b/go/parquet/internal/utils/bitmap_writer.go index 011330bde550f..163e928f4b689 100644 --- a/go/parquet/internal/utils/bitmap_writer.go +++ b/go/parquet/internal/utils/bitmap_writer.go @@ -20,7 +20,7 @@ import ( "encoding/binary" "math/bits" - "github.com/apache/arrow/go/v17/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/bitutil" ) // BitmapWriter is an interface for bitmap writers so that we can use multiple diff --git a/go/parquet/internal/utils/bitmap_writer_test.go b/go/parquet/internal/utils/bitmap_writer_test.go index 893b003a8957c..39838e87d3223 100644 --- a/go/parquet/internal/utils/bitmap_writer_test.go +++ b/go/parquet/internal/utils/bitmap_writer_test.go @@ -22,8 +22,8 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/parquet/internal/utils" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/parquet/internal/utils" "github.com/stretchr/testify/suite" ) diff --git a/go/parquet/internal/utils/rle.go b/go/parquet/internal/utils/rle.go index affda41ec0ecb..bf24a5822341d 100644 --- a/go/parquet/internal/utils/rle.go +++ b/go/parquet/internal/utils/rle.go @@ -24,10 +24,10 @@ import ( "encoding/binary" "math" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/internal/bitutils" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/internal/bitutils" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" "golang.org/x/xerrors" ) diff --git a/go/parquet/internal/utils/typed_rle_dict.gen.go b/go/parquet/internal/utils/typed_rle_dict.gen.go index be986b60786ba..80f76ef12d71a 100644 --- a/go/parquet/internal/utils/typed_rle_dict.gen.go +++ b/go/parquet/internal/utils/typed_rle_dict.gen.go @@ -19,9 +19,9 @@ package utils import ( - "github.com/apache/arrow/go/v17/internal/bitutils" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/internal/bitutils" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" "golang.org/x/xerrors" ) diff --git a/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl b/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl index 11b393e4a7ab8..992270d8d8e00 100644 --- a/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl +++ b/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl @@ -17,9 +17,9 @@ package utils import ( - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/internal/bitutils" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/internal/bitutils" + "github.com/apache/arrow/go/v18/internal/utils" ) {{range .In}} diff --git a/go/parquet/metadata/app_version.go b/go/parquet/metadata/app_version.go index fa54aec347575..887ed79343a42 100644 --- a/go/parquet/metadata/app_version.go +++ b/go/parquet/metadata/app_version.go @@ -21,8 +21,8 @@ import ( "strconv" "strings" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/schema" ) var ( diff --git a/go/parquet/metadata/column_chunk.go b/go/parquet/metadata/column_chunk.go index 5bc5c049cd592..a05b3c9124154 100644 --- a/go/parquet/metadata/column_chunk.go +++ b/go/parquet/metadata/column_chunk.go @@ -22,13 +22,13 @@ import ( "io" "reflect" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/compress" - "github.com/apache/arrow/go/v17/parquet/internal/encryption" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/thrift" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/compress" + "github.com/apache/arrow/go/v18/parquet/internal/encryption" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/thrift" + "github.com/apache/arrow/go/v18/parquet/schema" "golang.org/x/xerrors" ) diff --git a/go/parquet/metadata/file.go b/go/parquet/metadata/file.go index fde319d0b32e5..2f7fe53303c3f 100644 --- a/go/parquet/metadata/file.go +++ b/go/parquet/metadata/file.go @@ -24,12 +24,12 @@ import ( "reflect" "unicode/utf8" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/compress" - "github.com/apache/arrow/go/v17/parquet/internal/encryption" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/thrift" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/compress" + "github.com/apache/arrow/go/v18/parquet/internal/encryption" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/thrift" + "github.com/apache/arrow/go/v18/parquet/schema" "golang.org/x/xerrors" ) diff --git a/go/parquet/metadata/metadata_test.go b/go/parquet/metadata/metadata_test.go index a631d8b925bd2..8ecb95cf41e9d 100644 --- a/go/parquet/metadata/metadata_test.go +++ b/go/parquet/metadata/metadata_test.go @@ -21,9 +21,9 @@ import ( "testing" "unsafe" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/metadata" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/metadata" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/parquet/metadata/row_group.go b/go/parquet/metadata/row_group.go index c9a55ffdb5b16..5373fb6f783fc 100644 --- a/go/parquet/metadata/row_group.go +++ b/go/parquet/metadata/row_group.go @@ -20,10 +20,10 @@ import ( "fmt" "reflect" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encryption" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encryption" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/schema" ) // RowGroupMetaData is a proxy around the thrift RowGroup meta data object diff --git a/go/parquet/metadata/stat_compare_test.go b/go/parquet/metadata/stat_compare_test.go index f759ee9fe1734..dafbf3ed04f1a 100644 --- a/go/parquet/metadata/stat_compare_test.go +++ b/go/parquet/metadata/stat_compare_test.go @@ -20,8 +20,8 @@ import ( "encoding/binary" "testing" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/parquet/metadata/statistics.go b/go/parquet/metadata/statistics.go index 604fa50ee3b07..e7ffc2a096370 100644 --- a/go/parquet/metadata/statistics.go +++ b/go/parquet/metadata/statistics.go @@ -22,15 +22,15 @@ import ( "math" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/debug" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/debug" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/schema" ) //go:generate go run ../../arrow/_tools/tmpl/main.go -i -data=statistics_types.tmpldata statistics_types.gen.go.tmpl diff --git a/go/parquet/metadata/statistics_test.go b/go/parquet/metadata/statistics_test.go index 9760c0ee7d2fb..913629959e928 100644 --- a/go/parquet/metadata/statistics_test.go +++ b/go/parquet/metadata/statistics_test.go @@ -21,12 +21,12 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/metadata" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/metadata" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/metadata/statistics_types.gen.go b/go/parquet/metadata/statistics_types.gen.go index a8670e221b706..0c383fc7f5414 100644 --- a/go/parquet/metadata/statistics_types.gen.go +++ b/go/parquet/metadata/statistics_types.gen.go @@ -22,15 +22,15 @@ import ( "fmt" "math" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/bitutils" - shared_utils "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/bitutils" + shared_utils "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/parquet/schema" ) type minmaxPairInt32 [2]int32 diff --git a/go/parquet/metadata/statistics_types.gen.go.tmpl b/go/parquet/metadata/statistics_types.gen.go.tmpl index 4cf47d1915f83..4b3c2a7158ac8 100644 --- a/go/parquet/metadata/statistics_types.gen.go.tmpl +++ b/go/parquet/metadata/statistics_types.gen.go.tmpl @@ -19,13 +19,13 @@ package metadata import ( "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/schema" - "github.com/apache/arrow/go/v17/parquet/internal/utils" - shared_utils "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - "github.com/apache/arrow/go/v17/internal/bitutils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/schema" + "github.com/apache/arrow/go/v18/parquet/internal/utils" + shared_utils "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/internal/bitutils" ) {{range .In}} diff --git a/go/parquet/pqarrow/column_readers.go b/go/parquet/pqarrow/column_readers.go index 661d163213394..1e5d5958e6732 100644 --- a/go/parquet/pqarrow/column_readers.go +++ b/go/parquet/pqarrow/column_readers.go @@ -26,16 +26,16 @@ import ( "time" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/schema" "golang.org/x/sync/errgroup" ) diff --git a/go/parquet/pqarrow/encode_arrow.go b/go/parquet/pqarrow/encode_arrow.go index 9f5c870fa7875..5526c98d7872c 100644 --- a/go/parquet/pqarrow/encode_arrow.go +++ b/go/parquet/pqarrow/encode_arrow.go @@ -25,16 +25,16 @@ import ( "time" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/internal/debug" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/internal/debug" ) // get the count of the number of leaf arrays for the type diff --git a/go/parquet/pqarrow/encode_arrow_test.go b/go/parquet/pqarrow/encode_arrow_test.go index 4f955eae4e133..9b3419988d6df 100644 --- a/go/parquet/pqarrow/encode_arrow_test.go +++ b/go/parquet/pqarrow/encode_arrow_test.go @@ -25,22 +25,22 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/types" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/compress" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - "github.com/apache/arrow/go/v17/parquet/internal/testutils" - "github.com/apache/arrow/go/v17/parquet/pqarrow" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/types" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/compress" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/parquet/internal/testutils" + "github.com/apache/arrow/go/v18/parquet/pqarrow" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/google/uuid" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" diff --git a/go/parquet/pqarrow/encode_dict_compute.go b/go/parquet/pqarrow/encode_dict_compute.go index c698cdaf91fb1..647bb69db78d5 100644 --- a/go/parquet/pqarrow/encode_dict_compute.go +++ b/go/parquet/pqarrow/encode_dict_compute.go @@ -21,14 +21,14 @@ package pqarrow import ( "context" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/internal/debug" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/internal/debug" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" ) func isDictEncoding(enc parquet.Encoding) bool { diff --git a/go/parquet/pqarrow/encode_dict_nocompute.go b/go/parquet/pqarrow/encode_dict_nocompute.go index 26efa87538b66..aa405a90e8a12 100644 --- a/go/parquet/pqarrow/encode_dict_nocompute.go +++ b/go/parquet/pqarrow/encode_dict_nocompute.go @@ -21,8 +21,8 @@ package pqarrow import ( "errors" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/parquet/file" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/parquet/file" ) func writeDictionaryArrow(*arrowWriteContext, file.ColumnChunkWriter, arrow.Array, []int16, []int16, bool) (err error) { diff --git a/go/parquet/pqarrow/encode_dictionary_test.go b/go/parquet/pqarrow/encode_dictionary_test.go index 200b81b3fec86..cacdc7e39cab3 100644 --- a/go/parquet/pqarrow/encode_dictionary_test.go +++ b/go/parquet/pqarrow/encode_dictionary_test.go @@ -26,14 +26,14 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/internal/testutils" - "github.com/apache/arrow/go/v17/parquet/pqarrow" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/internal/testutils" + "github.com/apache/arrow/go/v18/parquet/pqarrow" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" diff --git a/go/parquet/pqarrow/file_reader.go b/go/parquet/pqarrow/file_reader.go index 3f958dab6f1ba..a2e84d9ce2795 100755 --- a/go/parquet/pqarrow/file_reader.go +++ b/go/parquet/pqarrow/file_reader.go @@ -18,18 +18,19 @@ package pqarrow import ( "context" + "errors" "fmt" "io" "sync" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/arrio" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/arrio" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/schema" "golang.org/x/sync/errgroup" "golang.org/x/xerrors" ) @@ -375,6 +376,10 @@ func (fr *FileReader) ReadRowGroups(ctx context.Context, indices, rowGroups []in data.data.Release() } + // if the context is in error, but we haven't set an error yet, then it means that the parent context + // was cancelled. In this case, we should exit early as some columns may not have been read yet. + err = errors.Join(err, ctx.Err()) + if err != nil { // if we encountered an error, consume any waiting data on the channel // so the goroutines don't leak and so memory can get cleaned up. we already diff --git a/go/parquet/pqarrow/file_reader_test.go b/go/parquet/pqarrow/file_reader_test.go index d7f03ac0531b7..fe5a4547a775c 100644 --- a/go/parquet/pqarrow/file_reader_test.go +++ b/go/parquet/pqarrow/file_reader_test.go @@ -26,14 +26,14 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/pqarrow" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/pqarrow" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -167,6 +167,29 @@ func TestArrowReaderAdHocReadFloat16s(t *testing.T) { } } +func TestArrowReaderCanceledContext(t *testing.T) { + dataDir := getDataDir() + + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(t, 0) + + filename := filepath.Join(dataDir, "int32_decimal.parquet") + require.FileExists(t, filename) + + rdr, err := file.OpenParquetFile(filename, false, file.WithReadProps(parquet.NewReaderProperties(mem))) + require.NoError(t, err) + defer rdr.Close() + arrowRdr, err := pqarrow.NewFileReader(rdr, pqarrow.ArrowReadProperties{}, mem) + require.NoError(t, err) + + // create a canceled context + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + _, err = arrowRdr.ReadTable(ctx) + require.ErrorIs(t, err, context.Canceled) +} + func TestRecordReaderParallel(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) diff --git a/go/parquet/pqarrow/file_writer.go b/go/parquet/pqarrow/file_writer.go index b1d266ff29080..539c544829e3b 100644 --- a/go/parquet/pqarrow/file_writer.go +++ b/go/parquet/pqarrow/file_writer.go @@ -22,12 +22,12 @@ import ( "fmt" "io" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/metadata" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/metadata" "golang.org/x/xerrors" ) @@ -246,7 +246,7 @@ func (fw *FileWriter) Write(rec arrow.Record) error { } } fw.colIdx = 0 - return nil + return fw.rgw.Close() } // WriteTable writes an arrow table to the underlying file using chunkSize to determine diff --git a/go/parquet/pqarrow/file_writer_test.go b/go/parquet/pqarrow/file_writer_test.go index fc965279a928d..5b807389a3eb1 100644 --- a/go/parquet/pqarrow/file_writer_test.go +++ b/go/parquet/pqarrow/file_writer_test.go @@ -22,11 +22,11 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/pqarrow" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/pqarrow" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -55,7 +55,11 @@ func TestFileWriterRowGroupNumRows(t *testing.T) { numRows, err := writer.RowGroupNumRows() require.NoError(t, err) assert.Equal(t, 4, numRows) + + // Make sure that row group stats are up-to-date immediately after writing + bytesWritten := writer.RowGroupTotalBytesWritten() require.NoError(t, writer.Close()) + require.Equal(t, bytesWritten, writer.RowGroupTotalBytesWritten()) } func TestFileWriterNumRows(t *testing.T) { diff --git a/go/parquet/pqarrow/helpers.go b/go/parquet/pqarrow/helpers.go index a9a4242fdb44c..237de4366c03e 100644 --- a/go/parquet/pqarrow/helpers.go +++ b/go/parquet/pqarrow/helpers.go @@ -17,7 +17,7 @@ package pqarrow import ( - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) func releaseArrays(arrays []arrow.Array) { @@ -38,6 +38,8 @@ func releaseArrayData(data []arrow.ArrayData) { func releaseColumns(columns []arrow.Column) { for _, col := range columns { - col.Release() + if col.Data() != nil { // data can be nil due to the way columns are constructed in ReadRowGroups + col.Release() + } } } diff --git a/go/parquet/pqarrow/path_builder.go b/go/parquet/pqarrow/path_builder.go index 13f2beca024f1..ff439d59a6e75 100644 --- a/go/parquet/pqarrow/path_builder.go +++ b/go/parquet/pqarrow/path_builder.go @@ -21,11 +21,12 @@ import ( "sync/atomic" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/bitutils" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/bitutils" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" "golang.org/x/xerrors" ) @@ -301,15 +302,15 @@ type pathBuilder struct { paths []pathInfo nullableInParent bool - refCount int64 + refCount *atomic.Int64 } func (p *pathBuilder) Retain() { - atomic.AddInt64(&p.refCount, 1) + p.refCount.Add(1) } func (p *pathBuilder) Release() { - if atomic.AddInt64(&p.refCount, -1) == 0 { + if p.refCount.Add(-1) == 0 { for idx := range p.paths { p.paths[idx].primitiveArr.Release() p.paths[idx].primitiveArr = nil @@ -498,15 +499,15 @@ type multipathLevelBuilder struct { data arrow.ArrayData builder pathBuilder - refCount int64 + refCount *atomic.Int64 } func (m *multipathLevelBuilder) Retain() { - atomic.AddInt64(&m.refCount, 1) + m.refCount.Add(1) } func (m *multipathLevelBuilder) Release() { - if atomic.AddInt64(&m.refCount, -1) == 0 { + if m.refCount.Add(-1) == 0 { m.data.Release() m.data = nil m.builder.Release() @@ -516,10 +517,10 @@ func (m *multipathLevelBuilder) Release() { func newMultipathLevelBuilder(arr arrow.Array, fieldNullable bool) (*multipathLevelBuilder, error) { ret := &multipathLevelBuilder{ - refCount: 1, + refCount: utils.NewRefCount(1), rootRange: elemRange{int64(0), int64(arr.Data().Len())}, data: arr.Data(), - builder: pathBuilder{nullableInParent: fieldNullable, paths: make([]pathInfo, 0), refCount: 1}, + builder: pathBuilder{nullableInParent: fieldNullable, paths: make([]pathInfo, 0), refCount: utils.NewRefCount(1)}, } if err := ret.builder.Visit(arr); err != nil { return nil, err diff --git a/go/parquet/pqarrow/path_builder_test.go b/go/parquet/pqarrow/path_builder_test.go index 4b7a8f9094b76..9bbae426b8a46 100644 --- a/go/parquet/pqarrow/path_builder_test.go +++ b/go/parquet/pqarrow/path_builder_test.go @@ -20,10 +20,10 @@ import ( "context" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/types" "github.com/google/uuid" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" diff --git a/go/parquet/pqarrow/properties.go b/go/parquet/pqarrow/properties.go index d3cf4de6ac74f..25a299c86f5f5 100755 --- a/go/parquet/pqarrow/properties.go +++ b/go/parquet/pqarrow/properties.go @@ -19,9 +19,9 @@ package pqarrow import ( "context" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" ) // ArrowWriterProperties are used to determine how to manipulate the arrow data diff --git a/go/parquet/pqarrow/reader_writer_test.go b/go/parquet/pqarrow/reader_writer_test.go index c573dbe43a562..31bd0eba84388 100644 --- a/go/parquet/pqarrow/reader_writer_test.go +++ b/go/parquet/pqarrow/reader_writer_test.go @@ -22,12 +22,12 @@ import ( "testing" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/pqarrow" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/pqarrow" "golang.org/x/exp/rand" "gonum.org/v1/gonum/stat/distuv" ) diff --git a/go/parquet/pqarrow/schema.go b/go/parquet/pqarrow/schema.go index 8b3ea854b7a8f..ce5cc6f905084 100644 --- a/go/parquet/pqarrow/schema.go +++ b/go/parquet/pqarrow/schema.go @@ -22,15 +22,15 @@ import ( "math" "strconv" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/metadata" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/metadata" + "github.com/apache/arrow/go/v18/parquet/schema" "golang.org/x/xerrors" ) diff --git a/go/parquet/pqarrow/schema_test.go b/go/parquet/pqarrow/schema_test.go index 3cbcb803fa68f..24b031c174bf2 100644 --- a/go/parquet/pqarrow/schema_test.go +++ b/go/parquet/pqarrow/schema_test.go @@ -20,15 +20,15 @@ import ( "encoding/base64" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/types" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/metadata" - "github.com/apache/arrow/go/v17/parquet/pqarrow" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/types" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/metadata" + "github.com/apache/arrow/go/v18/parquet/pqarrow" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/parquet/reader_properties.go b/go/parquet/reader_properties.go index 1b7a9ef5dcc17..a9db8efaffb23 100644 --- a/go/parquet/reader_properties.go +++ b/go/parquet/reader_properties.go @@ -21,8 +21,8 @@ import ( "fmt" "io" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" ) // ReaderProperties are used to define how the file reader will handle buffering and allocating buffers diff --git a/go/parquet/reader_writer_properties_test.go b/go/parquet/reader_writer_properties_test.go index 784c644c13590..f07219c6463aa 100644 --- a/go/parquet/reader_writer_properties_test.go +++ b/go/parquet/reader_writer_properties_test.go @@ -20,9 +20,9 @@ import ( "bytes" "testing" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/compress" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/compress" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/schema/column.go b/go/parquet/schema/column.go index b4c169eff06a4..e3cd9f709cddb 100644 --- a/go/parquet/schema/column.go +++ b/go/parquet/schema/column.go @@ -20,8 +20,8 @@ import ( "fmt" "strings" - "github.com/apache/arrow/go/v17/parquet" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" ) // Column encapsulates the information necessary to interpret primitive diff --git a/go/parquet/schema/converted_types.go b/go/parquet/schema/converted_types.go index 681f96dfe6c88..5fc10f61cebc1 100644 --- a/go/parquet/schema/converted_types.go +++ b/go/parquet/schema/converted_types.go @@ -17,7 +17,7 @@ package schema import ( - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" ) // ConvertedType corresponds to the ConvertedType in the parquet.Thrift, diff --git a/go/parquet/schema/converted_types_test.go b/go/parquet/schema/converted_types_test.go index bd15d24c5695c..074bcb823942a 100644 --- a/go/parquet/schema/converted_types_test.go +++ b/go/parquet/schema/converted_types_test.go @@ -19,7 +19,7 @@ package schema_test import ( "testing" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/schema/helpers.go b/go/parquet/schema/helpers.go index ae0bdd6307560..87022442128b1 100644 --- a/go/parquet/schema/helpers.go +++ b/go/parquet/schema/helpers.go @@ -17,7 +17,7 @@ package schema import ( - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/parquet" "golang.org/x/xerrors" ) diff --git a/go/parquet/schema/helpers_test.go b/go/parquet/schema/helpers_test.go index 8c19070df2789..a9127d155f213 100644 --- a/go/parquet/schema/helpers_test.go +++ b/go/parquet/schema/helpers_test.go @@ -21,8 +21,8 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/schema/logical_types.go b/go/parquet/schema/logical_types.go index 94a7eaaf07b5b..e8adce1ca140e 100644 --- a/go/parquet/schema/logical_types.go +++ b/go/parquet/schema/logical_types.go @@ -20,10 +20,10 @@ import ( "fmt" "math" - "github.com/apache/arrow/go/v17/internal/json" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/debug" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/internal/json" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/debug" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" ) // DecimalMetadata is a struct for managing scale and precision information between diff --git a/go/parquet/schema/logical_types_test.go b/go/parquet/schema/logical_types_test.go index da70b5a36139e..e33925966e178 100644 --- a/go/parquet/schema/logical_types_test.go +++ b/go/parquet/schema/logical_types_test.go @@ -19,9 +19,9 @@ package schema_test import ( "testing" - "github.com/apache/arrow/go/v17/internal/json" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/internal/json" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/schema/node.go b/go/parquet/schema/node.go index 08eec33019278..c395caf8a26c8 100644 --- a/go/parquet/schema/node.go +++ b/go/parquet/schema/node.go @@ -19,8 +19,8 @@ package schema import ( "fmt" - "github.com/apache/arrow/go/v17/parquet" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" "github.com/apache/thrift/lib/go/thrift" "golang.org/x/xerrors" ) diff --git a/go/parquet/schema/reflection.go b/go/parquet/schema/reflection.go index 5b4f6b5e1cb98..0bec9eb599dc8 100644 --- a/go/parquet/schema/reflection.go +++ b/go/parquet/schema/reflection.go @@ -22,10 +22,10 @@ import ( "strconv" "strings" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" ) type taggedInfo struct { diff --git a/go/parquet/schema/reflection_test.go b/go/parquet/schema/reflection_test.go index ca47459611611..6877f33c0169f 100644 --- a/go/parquet/schema/reflection_test.go +++ b/go/parquet/schema/reflection_test.go @@ -22,9 +22,9 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/schema/schema.go b/go/parquet/schema/schema.go index 81dca82ecad00..c8d53e647d6f9 100644 --- a/go/parquet/schema/schema.go +++ b/go/parquet/schema/schema.go @@ -35,8 +35,8 @@ import ( "io" "strings" - "github.com/apache/arrow/go/v17/parquet" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" "golang.org/x/xerrors" ) diff --git a/go/parquet/schema/schema_element_test.go b/go/parquet/schema/schema_element_test.go index 4f57652be6c9e..7da55ce93abe6 100644 --- a/go/parquet/schema/schema_element_test.go +++ b/go/parquet/schema/schema_element_test.go @@ -19,8 +19,8 @@ package schema import ( "testing" - "github.com/apache/arrow/go/v17/parquet" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) diff --git a/go/parquet/schema/schema_flatten_test.go b/go/parquet/schema/schema_flatten_test.go index a128232d46309..e95d56f4d0617 100644 --- a/go/parquet/schema/schema_flatten_test.go +++ b/go/parquet/schema/schema_flatten_test.go @@ -19,8 +19,8 @@ package schema import ( "testing" - "github.com/apache/arrow/go/v17/parquet" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) diff --git a/go/parquet/schema/schema_test.go b/go/parquet/schema/schema_test.go index f0bd941bcb429..fa6c74492460f 100644 --- a/go/parquet/schema/schema_test.go +++ b/go/parquet/schema/schema_test.go @@ -20,9 +20,9 @@ import ( "os" "testing" - "github.com/apache/arrow/go/v17/parquet" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/parquet" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/apache/thrift/lib/go/thrift" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" diff --git a/go/parquet/types.go b/go/parquet/types.go index c1ab3788ca577..ff696c1178e18 100644 --- a/go/parquet/types.go +++ b/go/parquet/types.go @@ -24,8 +24,8 @@ import ( "time" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/arrow" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" ) const ( @@ -296,6 +296,7 @@ var ( DeltaByteArray Encoding DeltaBinaryPacked Encoding DeltaLengthByteArray Encoding + ByteStreamSplit Encoding }{ Plain: Encoding(format.Encoding_PLAIN), PlainDict: Encoding(format.Encoding_PLAIN_DICTIONARY), @@ -305,6 +306,7 @@ var ( DeltaByteArray: Encoding(format.Encoding_DELTA_BYTE_ARRAY), DeltaBinaryPacked: Encoding(format.Encoding_DELTA_BINARY_PACKED), DeltaLengthByteArray: Encoding(format.Encoding_DELTA_LENGTH_BYTE_ARRAY), + ByteStreamSplit: Encoding(format.Encoding_BYTE_STREAM_SPLIT), } // ColumnOrders contains constants for the Column Ordering fields diff --git a/go/parquet/writer_properties.go b/go/parquet/writer_properties.go index 9ff7e5a582215..c38bb3b81fa6d 100644 --- a/go/parquet/writer_properties.go +++ b/go/parquet/writer_properties.go @@ -17,8 +17,8 @@ package parquet import ( - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet/compress" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet/compress" ) // Constants for default property values used for the default reader, writer and column props. @@ -46,7 +46,7 @@ const ( DefaultStatsEnabled = true // If the stats are larger than 4K the writer will skip writing them out anyways. DefaultMaxStatsSize int64 = 4096 - DefaultCreatedBy = "parquet-go version 17.0.0-SNAPSHOT" + DefaultCreatedBy = "parquet-go version 18.0.0-SNAPSHOT" DefaultRootName = "schema" ) diff --git a/java/.mvn/extensions.xml b/java/.mvn/extensions.xml index d6e80695e22d0..716e2f9e81c35 100644 --- a/java/.mvn/extensions.xml +++ b/java/.mvn/extensions.xml @@ -23,7 +23,7 @@ com.gradle develocity-maven-extension - 1.21.5 + 1.21.6 com.gradle diff --git a/java/README.md b/java/README.md index 25e35c10973e9..9f1b1c63c8f41 100644 --- a/java/README.md +++ b/java/README.md @@ -85,7 +85,7 @@ variable are set, the system property takes precedence. ## Java Properties - * For Java 9 or later, should set `-Dio.netty.tryReflectionSetAccessible=true`. + * `-Dio.netty.tryReflectionSetAccessible=true` should be set. This fixes `java.lang.UnsupportedOperationException: sun.misc.Unsafe or java.nio.DirectByteBuffer.(long, int) not available`. thrown by Netty. * To support duplicate fields in a `StructVector` enable `-Darrow.struct.conflict.policy=CONFLICT_APPEND`. Duplicate fields are ignored (`CONFLICT_REPLACE`) by default and overwritten. To support different policies for diff --git a/java/adapter/avro/pom.xml b/java/adapter/avro/pom.xml index 4dac64de1e7ab..cb4adccb76771 100644 --- a/java/adapter/avro/pom.xml +++ b/java/adapter/avro/pom.xml @@ -23,7 +23,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT ../../pom.xml diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml index 742dc9a82dc47..124cc535c25bf 100644 --- a/java/adapter/jdbc/pom.xml +++ b/java/adapter/jdbc/pom.xml @@ -23,7 +23,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT ../../pom.xml @@ -59,7 +59,7 @@ under the License. com.h2database h2 - 2.2.224 + 2.3.230 test @@ -82,7 +82,6 @@ under the License. com.fasterxml.jackson.core jackson-annotations - test @@ -93,24 +92,30 @@ under the License. - - - jdk11+ - - [11,] - - - - - org.apache.maven.plugins - maven-surefire-plugin - - --add-reads=org.apache.arrow.adapter.jdbc=com.fasterxml.jackson.dataformat.yaml --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED -Duser.timezone=UTC + + + + org.apache.maven.plugins + maven-dependency-plugin + + + analyze + verify + + + com.fasterxml.jackson.core:jackson-annotations + - - - - - - + + + + + org.apache.maven.plugins + maven-surefire-plugin + + --add-reads=org.apache.arrow.adapter.jdbc=com.fasterxml.jackson.dataformat.yaml --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED -Duser.timezone=UTC + + + + diff --git a/java/adapter/jdbc/src/main/java/module-info.java b/java/adapter/jdbc/src/main/java/module-info.java index 5b59ce768472a..04977222c1530 100644 --- a/java/adapter/jdbc/src/main/java/module-info.java +++ b/java/adapter/jdbc/src/main/java/module-info.java @@ -20,6 +20,7 @@ exports org.apache.arrow.adapter.jdbc; exports org.apache.arrow.adapter.jdbc.binder; + requires com.fasterxml.jackson.annotation; requires com.fasterxml.jackson.databind; requires java.sql; requires jdk.unsupported; diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml index b216ad5abeb14..ec8ddbbb780df 100644 --- a/java/adapter/orc/pom.xml +++ b/java/adapter/orc/pom.xml @@ -23,7 +23,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT ../../pom.xml diff --git a/java/algorithm/pom.xml b/java/algorithm/pom.xml index 36e96a8d6ce5d..6971b53638e48 100644 --- a/java/algorithm/pom.xml +++ b/java/algorithm/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT arrow-algorithm Arrow Algorithms diff --git a/java/bom/pom.xml b/java/bom/pom.xml index 5fafbf38c7cdf..ad6532b1192bb 100644 --- a/java/bom/pom.xml +++ b/java/bom/pom.xml @@ -17,7 +17,7 @@ KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> - + 4.0.0 @@ -28,16 +28,61 @@ under the License. org.apache.arrow arrow-bom - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT pom + Arrow Bill of Materials Arrow Bill of Materials + https://arrow.apache.org/ + + + + Developer List + dev-subscribe@arrow.apache.org + dev-unsubscribe@arrow.apache.org + dev@arrow.apache.org + https://lists.apache.org/list.html?dev@arrow.apache.org + + + Commits List + commits-subscribe@arrow.apache.org + commits-unsubscribe@arrow.apache.org + commits@arrow.apache.org + https://lists.apache.org/list.html?commits@arrow.apache.org + + + Issues List + issues-subscribe@arrow.apache.org + issues-unsubscribe@arrow.apache.org + https://lists.apache.org/list.html?issues@arrow.apache.org + + + GitHub List + github-subscribe@arrow.apache.org + github-unsubscribe@arrow.apache.org + https://lists.apache.org/list.html?github@arrow.apache.org + + + + + scm:git:https://github.com/apache/arrow.git + scm:git:https://github.com/apache/arrow.git + main + https://github.com/apache/arrow/tree/${project.scm.tag} + + + + GitHub + https://github.com/apache/arrow/issues + - 1.8 - 1.8 + 11 + 11 + 11 + 11 3.12.0 3.2.5 0.16.1 @@ -169,6 +214,11 @@ under the License. spotless-maven-plugin 2.30.0 + + org.codehaus.mojo + versions-maven-plugin + 2.17.0 + diff --git a/java/c/pom.xml b/java/c/pom.xml index b5a995de1ba2f..52962354047b1 100644 --- a/java/c/pom.xml +++ b/java/c/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT arrow-c-data diff --git a/java/compression/pom.xml b/java/compression/pom.xml index 561877bd5cd36..8774f7cabde94 100644 --- a/java/compression/pom.xml +++ b/java/compression/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT arrow-compression Arrow Compression @@ -55,7 +55,7 @@ under the License. com.github.luben zstd-jni - 1.5.6-3 + 1.5.6-4 diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml index 00e812a4c6ae6..74071a6c305ad 100644 --- a/java/dataset/pom.xml +++ b/java/dataset/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT arrow-dataset @@ -165,6 +165,7 @@ under the License. test + @@ -179,6 +180,7 @@ under the License. maven-surefire-plugin + --add-reads=org.apache.arrow.dataset=com.fasterxml.jackson.databind --add-opens=java.base/java.nio=org.apache.arrow.dataset,org.apache.arrow.memory.core,ALL-UNNAMED false ${project.basedir}/../../testing/data @@ -202,24 +204,4 @@ under the License. - - - - jdk11+ - - [11,] - - - - - org.apache.maven.plugins - maven-surefire-plugin - - --add-reads=org.apache.arrow.dataset=com.fasterxml.jackson.databind --add-opens=java.base/java.nio=org.apache.arrow.dataset,org.apache.arrow.memory.core,ALL-UNNAMED - - - - - - diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml index a60631dae01fa..d4083383a2f44 100644 --- a/java/flight/flight-core/pom.xml +++ b/java/flight/flight-core/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-flight - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT flight-core @@ -144,82 +144,19 @@ under the License. test + maven-surefire-plugin + --add-opens=org.apache.arrow.flight.core/org.apache.arrow.flight.perf.impl=protobuf.java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED false ${project.basedir}/../../../testing/data - - org.apache.maven.plugins - maven-shade-plugin - - - shade-main - - shade - - package - - false - true - shaded - - - io.grpc:* - com.google.protobuf:* - - - - - com.google.protobuf - arrow.flight.com.google.protobuf - - - - - - - - - shade-ext - - shade - - package - - false - true - shaded-ext - - - io.grpc:* - com.google.protobuf:* - com.google.guava:* - - - - - com.google.protobuf - arrow.flight.com.google.protobuf - - - com.google.common - arrow.flight.com.google.common - - - - - - - - - org.xolstice.maven.plugins protobuf-maven-plugin @@ -261,65 +198,6 @@ under the License. - - - org.codehaus.mojo - build-helper-maven-plugin - - - add-generated-sources-to-classpath - - add-source - - generate-sources - - - ${project.build.directory}/generated-sources/protobuf - - - - - - - maven-assembly-plugin - - - jar-with-dependencies - - - - - make-assembly - - single - - package - - - - - - - jdk11+ - - [11,] - - - - - org.apache.maven.plugins - maven-surefire-plugin - - --add-opens=org.apache.arrow.flight.core/org.apache.arrow.flight.perf.impl=protobuf.java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED - - ${project.basedir}/../../../testing/data - - - - - - - diff --git a/java/flight/flight-core/src/main/java/module-info.java b/java/flight/flight-core/src/main/java/module-info.java index f6bf5b73b0972..e668fe6149fb9 100644 --- a/java/flight/flight-core/src/main/java/module-info.java +++ b/java/flight/flight-core/src/main/java/module-info.java @@ -31,6 +31,7 @@ requires io.grpc.netty; requires io.grpc.protobuf; requires io.grpc.stub; + requires io.netty.buffer; requires io.netty.common; requires io.netty.handler; requires io.netty.transport; @@ -38,5 +39,6 @@ requires org.apache.arrow.memory.core; requires org.apache.arrow.vector; requires protobuf.java; + requires protobuf.java.util; requires org.slf4j; } diff --git a/java/flight/flight-integration-tests/pom.xml b/java/flight/flight-integration-tests/pom.xml index c5612644422e9..a154062ba814d 100644 --- a/java/flight/flight-integration-tests/pom.xml +++ b/java/flight/flight-integration-tests/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-flight - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT flight-integration-tests @@ -69,19 +69,29 @@ under the License. - maven-assembly-plugin - - - jar-with-dependencies - - + maven-shade-plugin make-assembly - single + shade package + + false + true + jar-with-dependencies + + + + **/module-info.class + + + + + + + diff --git a/java/flight/flight-sql-jdbc-core/pom.xml b/java/flight/flight-sql-jdbc-core/pom.xml index 5f7bd63f8f935..502d866fcc0bd 100644 --- a/java/flight/flight-sql-jdbc-core/pom.xml +++ b/java/flight/flight-sql-jdbc-core/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-flight - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT flight-sql-jdbc-core @@ -132,10 +132,8 @@ under the License. - com.google.code.findbugs - jsr305 - 3.0.2 - compile + org.checkerframework + checker-qual diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java index 845f5372d3f74..0e9c79a0907a5 100644 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java +++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java @@ -29,7 +29,6 @@ import java.util.Map; import java.util.Optional; import java.util.Set; -import javax.annotation.Nullable; import org.apache.arrow.driver.jdbc.client.utils.ClientAuthenticationUtils; import org.apache.arrow.flight.CallOption; import org.apache.arrow.flight.CallStatus; @@ -61,6 +60,7 @@ import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.types.pojo.Schema; import org.apache.calcite.avatica.Meta.StatementType; +import org.checkerframework.checker.nullness.qual.Nullable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/java/flight/flight-sql-jdbc-driver/pom.xml b/java/flight/flight-sql-jdbc-driver/pom.xml index 23b2c9c7cbef4..3dfe3bcd33f50 100644 --- a/java/flight/flight-sql-jdbc-driver/pom.xml +++ b/java/flight/flight-sql-jdbc-driver/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-flight - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT flight-sql-jdbc-driver @@ -161,6 +161,7 @@ under the License. META-INF/native/libio_grpc_netty* META-INF/native/io_grpc_netty_shaded* **/*.proto + **/module-info.class diff --git a/java/flight/flight-sql/pom.xml b/java/flight/flight-sql/pom.xml index 9c7b5b956d354..81bdc1a25636a 100644 --- a/java/flight/flight-sql/pom.xml +++ b/java/flight/flight-sql/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-flight - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT flight-sql @@ -87,7 +87,7 @@ under the License. org.apache.derby derby - 10.14.2.0 + 10.15.2.0 test @@ -120,24 +120,4 @@ under the License. true - - - - jdk11+ - - [11,] - - - - - org.apache.maven.plugins - maven-surefire-plugin - - --add-reads=org.apache.arrow.flight.sql=org.slf4j --add-reads=org.apache.arrow.flight.core=ALL-UNNAMED --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED - - - - - - diff --git a/java/flight/flight-sql/src/main/java/module-info.java b/java/flight/flight-sql/src/main/java/module-info.java index 5514d5b870afd..cb3835117daf6 100644 --- a/java/flight/flight-sql/src/main/java/module-info.java +++ b/java/flight/flight-sql/src/main/java/module-info.java @@ -25,5 +25,6 @@ requires org.apache.arrow.flight.core; requires org.apache.arrow.memory.core; requires org.apache.arrow.vector; + requires org.apache.commons.cli; requires protobuf.java; } diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java index 8387834947283..2eb74adc5bc0e 100644 --- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java +++ b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java @@ -123,10 +123,10 @@ protected static void setUpExpectedResultsMap() { Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME_VALUE), "Apache Derby"); GET_SQL_INFO_EXPECTED_RESULTS_MAP.put( Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_VERSION_VALUE), - "10.14.2.0 - (1828579)"); + "10.15.2.0 - (1873585)"); GET_SQL_INFO_EXPECTED_RESULTS_MAP.put( Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_ARROW_VERSION_VALUE), - "10.14.2.0 - (1828579)"); + "10.15.2.0 - (1873585)"); GET_SQL_INFO_EXPECTED_RESULTS_MAP.put( Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY_VALUE), "false"); GET_SQL_INFO_EXPECTED_RESULTS_MAP.put( diff --git a/java/flight/pom.xml b/java/flight/pom.xml index 38495fa356e51..55511eba82b3a 100644 --- a/java/flight/pom.xml +++ b/java/flight/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT arrow-flight @@ -37,17 +37,4 @@ under the License. flight-sql-jdbc-driver flight-integration-tests - - - - pin-mockito-jdk8 - - 1.8 - - - 4.11.0 - 5.2.0 - - - diff --git a/java/format/pom.xml b/java/format/pom.xml index f1edfb86253c0..1121930da42d2 100644 --- a/java/format/pom.xml +++ b/java/format/pom.xml @@ -23,7 +23,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT arrow-format diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml index 2a1e83f3e21fc..70bde084b4216 100644 --- a/java/gandiva/pom.xml +++ b/java/gandiva/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT org.apache.arrow.gandiva diff --git a/java/maven/module-info-compiler-maven-plugin/pom.xml b/java/maven/module-info-compiler-maven-plugin/pom.xml deleted file mode 100644 index b00c03a014980..0000000000000 --- a/java/maven/module-info-compiler-maven-plugin/pom.xml +++ /dev/null @@ -1,124 +0,0 @@ - - - - 4.0.0 - - org.apache.arrow.maven.plugins - arrow-maven-plugins - 17.0.0-SNAPSHOT - - module-info-compiler-maven-plugin - maven-plugin - - Module Info Compiler Maven Plugin - - https://arrow.apache.org - - - ${maven.version} - - - - 3.8.7 - - - - - org.glavo - module-info-compiler - 2.0 - - - org.apache.maven - maven-plugin-api - ${maven.version} - provided - - - org.apache.maven - maven-core - ${maven.version} - provided - - - org.apache.maven - maven-artifact - ${maven.version} - provided - - - org.apache.maven - maven-model - ${maven.version} - provided - - - org.apache.maven.plugin-tools - maven-plugin-annotations - ${maven.plugin.tools.version} - provided - - - - - - - - com.gradle - develocity-maven-extension - - - - - - arrow-git.properties - - - - - - - - - - - org.apache.maven.plugins - maven-plugin-plugin - - true - - - - mojo-descriptor - - descriptor - - - - help-goal - - helpmojo - - - - - - - diff --git a/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/BaseModuleInfoCompilerPlugin.java b/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/BaseModuleInfoCompilerPlugin.java deleted file mode 100644 index 4fc8fc46e6bcc..0000000000000 --- a/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/BaseModuleInfoCompilerPlugin.java +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.maven.plugins; - -import java.io.File; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.OutputStream; -import java.io.Reader; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.List; -import java.util.Optional; -import org.apache.maven.plugin.AbstractMojo; -import org.apache.maven.plugin.MojoExecutionException; -import org.glavo.mic.ModuleInfoCompiler; - -/** Compiles the first module-info.java file in the project purely syntactically. */ -public abstract class BaseModuleInfoCompilerPlugin extends AbstractMojo { - protected abstract List getSourceRoots(); - - protected abstract boolean skip(); - - protected abstract String getOutputDirectory(); - - @Override - public void execute() throws MojoExecutionException { - if (skip()) { - getLog().info("Skipping module-info-compiler-maven-plugin"); - return; - } - - Optional moduleInfoFile = findFirstModuleInfo(getSourceRoots()); - if (moduleInfoFile.isPresent()) { - // The compiled module-info.class file goes into target/classes/module-info/main - Path outputDir = Paths.get(getOutputDirectory()); - - outputDir.toFile().mkdirs(); - Path targetPath = outputDir.resolve("module-info.class"); - - // Invoke the compiler, - ModuleInfoCompiler compiler = new ModuleInfoCompiler(); - try (Reader reader = - new InputStreamReader( - Files.newInputStream(moduleInfoFile.get().toPath()), StandardCharsets.UTF_8); - OutputStream output = Files.newOutputStream(targetPath)) { - compiler.compile(reader, output); - getLog().info("Successfully wrote module-info.class file."); - } catch (IOException ex) { - throw new MojoExecutionException("Error compiling module-info.java", ex); - } - } else { - getLog().info("No module-info.java file found. module-info.class file was not generated."); - } - } - - /** Finds the first module-info.java file in the set of source directories. */ - private Optional findFirstModuleInfo(List sourceDirectories) { - if (sourceDirectories == null) { - return Optional.empty(); - } - - return sourceDirectories.stream() - .map(Paths::get) - .map( - sourcePath -> - sourcePath.toFile().listFiles(file -> file.getName().equals("module-info.java"))) - .filter(matchingFiles -> matchingFiles != null && matchingFiles.length != 0) - .map(matchingFiles -> matchingFiles[0]) - .findAny(); - } -} diff --git a/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoCompilerPlugin.java b/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoCompilerPlugin.java deleted file mode 100644 index e66a475dbf8be..0000000000000 --- a/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoCompilerPlugin.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.maven.plugins; - -import java.util.ArrayList; -import java.util.List; -import org.apache.maven.plugins.annotations.LifecyclePhase; -import org.apache.maven.plugins.annotations.Mojo; -import org.apache.maven.plugins.annotations.Parameter; -import org.apache.maven.project.MavenProject; - -/** A maven plugin for compiler module-info files in main code with JDK8. */ -@Mojo(name = "compile", defaultPhase = LifecyclePhase.COMPILE) -public class ModuleInfoCompilerPlugin extends BaseModuleInfoCompilerPlugin { - - @Parameter( - defaultValue = "${project.compileSourceRoots}", - property = "compileSourceRoots", - required = true) - private final List compileSourceRoots = new ArrayList<>(); - - @Parameter(defaultValue = "false", property = "skip", required = false) - private boolean skip = false; - - @Parameter(defaultValue = "${project}", readonly = true, required = true) - private MavenProject project; - - @Override - protected List getSourceRoots() { - return compileSourceRoots; - } - - @Override - protected boolean skip() { - return skip; - } - - @Override - protected String getOutputDirectory() { - return project.getBuild().getOutputDirectory(); - } -} diff --git a/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoTestCompilerPlugin.java b/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoTestCompilerPlugin.java deleted file mode 100644 index f18ac9faac735..0000000000000 --- a/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoTestCompilerPlugin.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.maven.plugins; - -import java.util.List; -import org.apache.maven.plugins.annotations.LifecyclePhase; -import org.apache.maven.plugins.annotations.Mojo; -import org.apache.maven.plugins.annotations.Parameter; -import org.apache.maven.project.MavenProject; - -/** A maven plugin for compiler module-info files in unit tests with JDK8. */ -@Mojo(name = "testCompile", defaultPhase = LifecyclePhase.TEST_COMPILE) -public class ModuleInfoTestCompilerPlugin extends BaseModuleInfoCompilerPlugin { - - @Parameter(defaultValue = "false", property = "skip", required = false) - private boolean skip = false; - - @Parameter(defaultValue = "${project}", readonly = true, required = true) - private MavenProject project; - - @Override - protected List getSourceRoots() { - return project.getTestCompileSourceRoots(); - } - - @Override - protected boolean skip() { - return skip; - } - - @Override - protected String getOutputDirectory() { - return project.getBuild().getTestOutputDirectory(); - } -} diff --git a/java/maven/pom.xml b/java/maven/pom.xml deleted file mode 100644 index 54b7757e8cebf..0000000000000 --- a/java/maven/pom.xml +++ /dev/null @@ -1,371 +0,0 @@ - - - - 4.0.0 - - - org.apache - apache - 31 - - - - org.apache.arrow.maven.plugins - arrow-maven-plugins - 17.0.0-SNAPSHOT - pom - Arrow Maven Plugins - - - module-info-compiler-maven-plugin - - - - true - - 1.8 - 1.8 - 3.13.1 - 3.2.5 - 0.16.1 - 3.7.1 - 3.12.1 - 3.6.1 - 3.2.4 - 3.2.2 - 3.6.3 - 3.5.0 - - - - - - - com.diffplug.spotless - spotless-maven-plugin - 2.30.0 - - - pl.project13.maven - git-commit-id-plugin - 4.9.10 - - - org.cyclonedx - cyclonedx-maven-plugin - 2.8.0 - - - - - - org.apache.rat - apache-rat-plugin - - false - - **/dependency-reduced-pom.xml - **/*.log - **/*.css - **/*.js - **/*.md - **/*.eps - **/*.json - **/*.seq - **/*.parquet - **/*.sql - **/arrow-git.properties - **/*.csv - **/*.csvh - **/*.csvh-test - **/*.tsv - **/*.txt - **/*.ssv - **/arrow-*.conf - **/.buildpath - **/*.proto - **/*.fmpp - **/target/** - **/*.tdd - **/*.project - **/TAGS - **/*.checkstyle - **/.classpath - **/.factorypath - **/.settings/** - .*/** - **/*.patch - **/*.pb.cc - **/*.pb.h - **/*.linux - **/client/build/** - **/*.tbl - **/*.iml - **/flight.properties - **/*.idea/** - - - - - rat-checks - - check - - validate - - - - - - org.apache.maven.plugins - maven-jar-plugin - - - **/logging.properties - **/logback-test.xml - **/logback.out.xml - **/logback.xml - - - - org.apache.arrow - ${username} - https://arrow.apache.org/ - - - - - - - test-jar - - - true - - - - - - - org.apache.maven.plugins - maven-compiler-plugin - - 2048m - true - - - - maven-enforcer-plugin - - - avoid_bad_dependencies - - enforce - - verify - - - - - commons-logging - javax.servlet:servlet-api - org.mortbay.jetty:servlet-api - org.mortbay.jetty:servlet-api-2.5 - log4j:log4j - - - - - - - - - pl.project13.maven - git-commit-id-plugin - - dd.MM.yyyy '@' HH:mm:ss z - false - false - true - false - - false - false - 7 - -dirty - true - - - - - for-jars - - revision - - true - - target/classes/arrow-git.properties - - - - for-source-tarball - - revision - - false - - ./arrow-git.properties - - - - - - - org.apache.maven.plugins - maven-checkstyle-plugin - - ../dev/checkstyle/checkstyle.xml - ../dev/license/asf-java.license - ../dev/checkstyle/suppressions.xml - true - UTF-8 - true - ${checkstyle.failOnViolation} - ${checkstyle.failOnViolation} - warning - xml - ${project.build.directory}/test/checkstyle-errors.xml - false - - - - com.puppycrawl.tools - checkstyle - 8.29 - - - org.slf4j - jcl-over-slf4j - 2.0.13 - - - - - validate - - check - - validate - - - - - org.cyclonedx - cyclonedx-maven-plugin - - - - makeBom - - package - - - - - org.apache.maven.plugins - maven-project-info-reports-plugin - - - org.apache.maven.plugins - maven-site-plugin - - - com.diffplug.spotless - spotless-maven-plugin - - - - ${maven.multiModuleProjectDirectory}/dev/license/asf-xml.license - (<configuration|<project) - - - - - - 1.7 - - - - ${maven.multiModuleProjectDirectory}/dev/license/asf-java.license - package - - - - - - spotless-check - - check - - - - - - - - - - - org.apache.maven.plugins - maven-project-info-reports-plugin - - - org.apache.maven.plugins - maven-site-plugin - - - - - - - apache-release - - - - org.apache.maven.plugins - maven-assembly-plugin - - - source-release-assembly - - - true - - - - - - - - - diff --git a/java/memory/memory-core/pom.xml b/java/memory/memory-core/pom.xml index 729cc512ab111..b9fa8ab1a6942 100644 --- a/java/memory/memory-core/pom.xml +++ b/java/memory/memory-core/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-memory - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT arrow-memory-core @@ -31,10 +31,6 @@ under the License. Core off-heap memory management libraries for Arrow ValueVectors. - - com.google.code.findbugs - jsr305 - org.slf4j slf4j-api @@ -47,14 +43,43 @@ under the License. org.checkerframework checker-qual + + com.google.errorprone + error_prone_annotations + + + org.apache.maven.plugins + maven-compiler-plugin + + + -Xmaxerrs + + 10000 + -Xmaxwarns + 10000 + -AskipDefs=.*Test + + -AatfDoNotCache + + + + + org.checkerframework + checker + ${checker.framework.version} + + + + org.apache.maven.plugins maven-surefire-plugin + --add-reads=org.apache.arrow.memory.core=ch.qos.logback.classic --add-opens=java.base/java.lang.reflect=org.apache.arrow.memory.core --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED **/TestOpens.java @@ -65,27 +90,6 @@ under the License. - - jdk11+ - - [11,] - - - - - org.apache.maven.plugins - maven-surefire-plugin - - --add-reads=org.apache.arrow.memory.core=ch.qos.logback.classic --add-opens=java.base/java.lang.reflect=org.apache.arrow.memory.core --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED - - - **/TestOpens.java - - - - - - opens-tests @@ -118,40 +122,5 @@ under the License. - - - checkerframework-jdk11+ - - [11,] - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - -Xmaxerrs - - 10000 - -Xmaxwarns - 10000 - -AskipDefs=.*Test - - -AatfDoNotCache - - - - - org.checkerframework - checker - ${checker.framework.version} - - - - - - - diff --git a/java/memory/memory-core/src/main/java/module-info.java b/java/memory/memory-core/src/main/java/module-info.java index 52fcb52d014a5..0a607bdf2f43a 100644 --- a/java/memory/memory-core/src/main/java/module-info.java +++ b/java/memory/memory-core/src/main/java/module-info.java @@ -22,7 +22,10 @@ exports org.apache.arrow.memory.util.hash; exports org.apache.arrow.util; + requires java.compiler; requires transitive jdk.unsupported; - requires jsr305; + requires static org.checkerframework.checker.qual; + requires static org.immutables.value.annotations; + requires static com.google.errorprone.annotations; requires org.slf4j; } diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/Accountant.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/Accountant.java index 5a31f4cd1914a..5d052c2cdeeec 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/Accountant.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/Accountant.java @@ -17,7 +17,6 @@ package org.apache.arrow.memory; import java.util.concurrent.atomic.AtomicLong; -import javax.annotation.concurrent.ThreadSafe; import org.apache.arrow.util.Preconditions; import org.checkerframework.checker.nullness.qual.Nullable; @@ -25,7 +24,6 @@ * Provides a concurrent way to manage account for memory usage without locking. Used as basis for * Allocators. All operations are threadsafe (except for close). */ -@ThreadSafe class Accountant implements AutoCloseable { /** The parent allocator. */ diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java index 3f4426d2c36e5..dd6375e910b92 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java @@ -16,6 +16,8 @@ */ package org.apache.arrow.memory; +import com.google.errorprone.annotations.FormatMethod; +import com.google.errorprone.annotations.FormatString; import java.util.Collection; import java.util.Collections; import java.util.HashSet; @@ -539,9 +541,8 @@ public String toVerboseString() { return sb.toString(); } - /* Remove @SuppressWarnings after fixing https://github.com/apache/arrow/issues/41951 */ - @SuppressWarnings("FormatStringAnnotation") - private void hist(String noteFormat, Object... args) { + @FormatMethod + private void hist(@FormatString String noteFormat, Object... args) { if (historicalLog != null) { historicalLog.recordEvent(noteFormat, args); } diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/HistoricalLog.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/HistoricalLog.java index 659ddde28df9b..5b1bdd8b7244c 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/HistoricalLog.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/HistoricalLog.java @@ -16,6 +16,8 @@ */ package org.apache.arrow.memory.util; +import com.google.errorprone.annotations.FormatMethod; +import com.google.errorprone.annotations.FormatString; import java.util.ArrayDeque; import java.util.Arrays; import java.util.Deque; @@ -42,9 +44,8 @@ public class HistoricalLog { * object instance is best. * @param args for the format string, or nothing if none are required */ - @SuppressWarnings("FormatStringAnnotation") - /* Remove @SuppressWarnings after fixing https://github.com/apache/arrow/issues/41951 */ - public HistoricalLog(final String idStringFormat, Object... args) { + @FormatMethod + public HistoricalLog(@FormatString final String idStringFormat, Object... args) { this(Integer.MAX_VALUE, idStringFormat, args); } @@ -65,9 +66,8 @@ public HistoricalLog(final String idStringFormat, Object... args) { * object instance is best. * @param args for the format string, or nothing if none are required */ - @SuppressWarnings("AnnotateFormatMethod") - public HistoricalLog(final int limit, final String idStringFormat, Object... args) { - // Remove @SuppressWarnings after fixing https://github.com/apache/arrow/issues/41951 + @FormatMethod + public HistoricalLog(final int limit, @FormatString final String idStringFormat, Object... args) { this.limit = limit; this.idString = String.format(idStringFormat, args); this.firstEvent = null; @@ -80,9 +80,8 @@ public HistoricalLog(final int limit, final String idStringFormat, Object... arg * @param noteFormat {@link String#format} format string that describes the event * @param args for the format string, or nothing if none are required */ - @SuppressWarnings("AnnotateFormatMethod") - public synchronized void recordEvent(final String noteFormat, Object... args) { - // Remove @SuppressWarnings after fixing https://github.com/apache/arrow/issues/41951 + @FormatMethod + public synchronized void recordEvent(@FormatString final String noteFormat, Object... args) { final String note = String.format(noteFormat, args); final Event event = new Event(note); if (firstEvent == null) { diff --git a/java/memory/memory-netty-buffer-patch/pom.xml b/java/memory/memory-netty-buffer-patch/pom.xml index 1f645472a3398..2e70279b23b00 100644 --- a/java/memory/memory-netty-buffer-patch/pom.xml +++ b/java/memory/memory-netty-buffer-patch/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-memory - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT arrow-memory-netty-buffer-patch diff --git a/java/memory/memory-netty/pom.xml b/java/memory/memory-netty/pom.xml index f66899151128a..e29ca3a4d053c 100644 --- a/java/memory/memory-netty/pom.xml +++ b/java/memory/memory-netty/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-memory - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT arrow-memory-netty diff --git a/java/memory/memory-unsafe/pom.xml b/java/memory/memory-unsafe/pom.xml index c1a42e3ff0081..6bba222d89cb3 100644 --- a/java/memory/memory-unsafe/pom.xml +++ b/java/memory/memory-unsafe/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-memory - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT arrow-memory-unsafe diff --git a/java/memory/pom.xml b/java/memory/pom.xml index 9eaad8a467895..0ca357beaa781 100644 --- a/java/memory/pom.xml +++ b/java/memory/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT arrow-memory pom diff --git a/java/performance/pom.xml b/java/performance/pom.xml index 83b0a88da063b..f6d3a26b4f352 100644 --- a/java/performance/pom.xml +++ b/java/performance/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT arrow-performance jar @@ -75,7 +75,7 @@ under the License. com.h2database h2 - 2.2.224 + 2.3.230 runtime diff --git a/java/pom.xml b/java/pom.xml index ace7a1e1f173e..7ba75af164eb6 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -17,7 +17,7 @@ KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> - + 4.0.0 @@ -28,7 +28,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT pom Apache Arrow Java Root POM @@ -41,25 +41,30 @@ under the License. dev-subscribe@arrow.apache.org dev-unsubscribe@arrow.apache.org dev@arrow.apache.org - https://mail-archives.apache.org/mod_mbox/arrow-dev/ + https://lists.apache.org/list.html?dev@arrow.apache.org Commits List commits-subscribe@arrow.apache.org commits-unsubscribe@arrow.apache.org commits@arrow.apache.org - https://mail-archives.apache.org/mod_mbox/arrow-commits/ + https://lists.apache.org/list.html?commits@arrow.apache.org Issues List issues-subscribe@arrow.apache.org issues-unsubscribe@arrow.apache.org - https://mail-archives.apache.org/mod_mbox/arrow-issues/ + https://lists.apache.org/list.html?issues@arrow.apache.org + + + GitHub List + github-subscribe@arrow.apache.org + github-unsubscribe@arrow.apache.org + https://lists.apache.org/list.html?github@arrow.apache.org - maven bom format memory @@ -73,44 +78,45 @@ under the License. compression - + scm:git:https://github.com/apache/arrow.git scm:git:https://github.com/apache/arrow.git - apache-arrow-2.0.0 - https://github.com/apache/arrow + main + https://github.com/apache/arrow/tree/${project.scm.tag} - Jira - https://issues.apache.org/jira/browse/arrow + GitHub + https://github.com/apache/arrow/issues ${project.build.directory}/generated-sources 1.9.0 - 5.10.2 + 5.10.3 2.0.13 33.2.1-jre 4.1.110.Final - 1.63.0 + 1.65.0 3.25.1 - 2.17.1 + 2.17.2 3.4.0 24.3.25 1.11.3 2 true - 9+181-r4173-1 - 2.28.0 + 2.29.2 5.11.0 5.2.0 - 3.44.0 + 3.45.0 none -Xdoclint:none - 1.8 - 1.8 + 11 + 11 + 11 + 11 3.12.0 3.2.5 0.16.1 @@ -149,9 +155,10 @@ under the License. ${dep.fbs.version} - com.google.code.findbugs - jsr305 - 3.0.2 + com.google.errorprone + error_prone_annotations + ${error_prone_core.version} + provided org.slf4j @@ -267,13 +274,13 @@ under the License. org.mockito mockito-junit-jupiter - 2.25.1 + 5.12.0 test ch.qos.logback logback-classic - 1.3.14 + 1.4.14 test @@ -292,8 +299,6 @@ under the License. maven-compiler-plugin true - **/module-info.java - **/module-info.java false @@ -307,6 +312,7 @@ under the License. maven-surefire-plugin + --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED true true ${forkCount} @@ -319,11 +325,13 @@ under the License. which in turn can cause OOM. --> 1048576 + false maven-failsafe-plugin + --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED ${project.build.directory} true @@ -438,13 +446,9 @@ under the License. **/module-info.java + arrow-memory-netty-buffer-patch,arrow-memory-netty,flight-sql-jdbc-core,flight-integration-tests,arrow-performance - - org.apache.arrow.maven.plugins - module-info-compiler-maven-plugin - ${project.version} - com.gradle develocity-maven-extension @@ -485,6 +489,7 @@ under the License. com.google.protobuf:protoc:${dep.protobuf-bom.version}:exe:${os.detected.classifier} grpc-java io.grpc:protoc-gen-grpc-java:${dep.grpc-bom.version}:exe:${os.detected.classifier} + @generated=omit @@ -507,6 +512,11 @@ under the License. exec-maven-plugin 3.3.0 + + org.codehaus.mojo + versions-maven-plugin + 2.17.0 + pl.project13.maven git-commit-id-plugin @@ -768,24 +778,6 @@ under the License. - - org.apache.arrow.maven.plugins - module-info-compiler-maven-plugin - - - default-compile - - compile - - - - default-testCompile - - testCompile - - - - org.apache.maven.plugins maven-project-info-reports-plugin @@ -845,6 +837,7 @@ under the License. **/module-info.java + arrow-memory-netty-buffer-patch,arrow-memory-netty,flight-sql-jdbc-core,flight-integration-tests,arrow-performance @@ -906,56 +899,13 @@ under the License. - error-prone-jdk8 + error-prone - 1.8 - - !m2e.version - - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - -XDcompilePolicy=simple - -Xplugin:ErrorProne - -J-Xbootclasspath/p:${settings.localRepository}/com/google/errorprone/javac/${errorprone.javac.version}/javac-${errorprone.javac.version}.jar - - - - com.google.errorprone - error_prone_core - - 2.10.0 - - - - - - - - - - error-prone-jdk11+ - - [11,] !m2e.version @@ -992,30 +942,6 @@ under the License. - - jdk11+ - - [11,] - - - - - org.apache.maven.plugins - maven-surefire-plugin - - --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED - - - - org.apache.maven.plugins - maven-failsafe-plugin - - --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED - - - - - code-coverage diff --git a/java/tools/pom.xml b/java/tools/pom.xml index 07a768e24d5e2..94566495dff19 100644 --- a/java/tools/pom.xml +++ b/java/tools/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT arrow-tools Arrow Tools @@ -59,7 +59,7 @@ under the License. ch.qos.logback logback-classic - 1.3.14 + 1.4.14 test -# arrow 16.1.0.9000 +# arrow 17.0.0.9000 -* R functions that users write that use functions that Arrow supports in dataset queries now can be used in queries too. Previously, only functions that used arithmetic operators worked. For example, `time_hours <- function(mins) mins / 60` worked, but `time_hours_rounded <- function(mins) round(mins / 60)` did not; now both work. These are automatic translations rather than true user-defined functions (UDFs); for UDFs, see `register_scalar_function()`. (#41223) -* `summarize()` supports more complex expressions, and correctly handles cases where column names are reused in expressions. -* The `na_matches` argument to the `dplyr::*_join()` functions is now supported. This argument controls whether `NA` values are considered equal when joining. (#41358) -* R metadata, stored in the Arrow schema to support round-tripping data between R and Arrow/Parquet, is now serialized and deserialized more strictly. This makes it safer to load data from files from unknown sources into R data.frames. (#41969) +# arrow 17.0.0 + +## New features + +* R functions that users write that use functions that Arrow supports in dataset + queries now can be used in queries too. Previously, only functions that used + arithmetic operators worked. + For example, `time_hours <- function(mins) mins / 60` worked, + but `time_hours_rounded <- function(mins) round(mins / 60)` did not; + now both work. These are automatic translations rather than true user-defined + functions (UDFs); for UDFs, see `register_scalar_function()`. (#41223) +* `mutate()` expressions can now include aggregations, such as `x - mean(x)`. (#41350) +* `summarize()` supports more complex expressions, and correctly handles cases + where column names are reused in expressions. +* The `na_matches` argument to the `dplyr::*_join()` functions is now supported. + This argument controls whether `NA` values are considered equal when joining. (#41358) +* R metadata, stored in the Arrow schema to support round-tripping data between + R and Arrow/Parquet, is now serialized and deserialized more strictly. + This makes it safer to load data from files from unknown sources into R data.frames. (#41969) + +## Minor improvements and fixes +* Turn on the S3 and ZSTD features by default for macOS. (#42210) +* Fix bindings in Math group generics. (#43162) +* Fix a bug in our implementation of `pull` on grouped datasets, it now + returns the expected column. (#43172) +* The minimum version of the Arrow C++ library the Arrow R package can be built + with has been bumped to 15.0.0 (#42241) # arrow 16.1.0 diff --git a/r/PACKAGING.md b/r/PACKAGING.md index abe86a62fdc4f..7f6f80745bd96 100644 --- a/r/PACKAGING.md +++ b/r/PACKAGING.md @@ -32,7 +32,7 @@ For a high-level overview of the release process see the - [ ] Ensure the contents of the README are accurate and up to date. - [ ] Run `urlchecker::url_check()` on the R directory at the release candidate. commit. Ignore any errors with badges as they will be removed in the CRAN release branch. -- [ ] [Polish NEWS](https://style.tidyverse.org/news.html#news-release) but do **not** update version numbers (this is done automatically later). You can find commits by, for example, `git log --oneline aa057d0..HEAD | grep "\[R\]"` +- [ ] [Polish NEWS](https://style.tidyverse.org/news.html#news-release) but do **not** update version numbers (this is done automatically later). You can find commits by, for example, `git log --oneline ..HEAD | grep "\[R\]"` - [ ] Run preliminary reverse dependency checks using `archery docker run r-revdepcheck`. - [ ] For major releases, prepare tweet thread highlighting new features. @@ -72,15 +72,10 @@ Wait for the release candidate to be cut: - [ ] Run `Rscript tools/update-checksums.R ` to download the checksums for the pre-compiled binaries from the ASF artifactory into the tools directory. - [ ] Regenerate arrow_X.X.X.tar.gz (i.e., `make build`) -Ensure linux binary packages are available: -- [ ] Ensure linux binaries are available in the artifactory: - https://apache.jfrog.io/ui/repos/tree/General/arrow/r - ## Check binary Arrow C++ distributions specific to the R package - [ ] Upload the .tar.gz to [win-builder](https://win-builder.r-project.org/upload.aspx) (r-devel only) - and confirm (with Nic, who will automatically receive an email about the results) that the check is clean. - This step cannot be completed before Jeroen has put the binaries in the MinGW repository, i.e. [here](https://ftp.opencpu.org/rtools/ucrt64/), [here](https://ftp.opencpu.org/rtools/mingw64/), and [here](https://ftp.opencpu.org/rtools/mingw32/). + and confirm (with Jon, who will automatically receive an email about the results) that the check is clean. - [ ] Upload the .tar.gz to [MacBuilder](https://mac.r-project.org/macbuilder/submit.html) and confirm that the check is clean - [ ] Check `install.packages("arrow_X.X.X.tar.gz")` on Ubuntu and ensure that the @@ -105,3 +100,4 @@ Wait for CRAN... [CRAN package page](https://cran.r-project.org/package=arrow) to reflect the new version - [ ] Tweet! + - Use Bryce's [script](https://gist.githubusercontent.com/amoeba/4e26c064d1a0d0227cd8c2260cf0072a/raw/bc0d983152bdde4820de9074d4caee9986624bc5/new_contributors.R) for contributor calculation. diff --git a/r/R/arrow-datum.R b/r/R/arrow-datum.R index 4770b03b9ca48..ba513ef470cfb 100644 --- a/r/R/arrow-datum.R +++ b/r/R/arrow-datum.R @@ -115,19 +115,19 @@ Math.ArrowDatum <- function(x, ..., base = exp(1), digits = 0) { switch(.Generic, abs = eval_array_expression("abs_checked", x), ceiling = eval_array_expression("ceil", x), - sign = , - floor = , - trunc = , - acos = , - asin = , - atan = , - cos = , - sin = , - tan = { - eval_array_expression(.Generic, x) - }, + sign = eval_array_expression("sign", x), + floor = eval_array_expression("floor", x), + trunc = eval_array_expression("trunc", x), + acos = eval_array_expression("acos_checked", x), + asin = eval_array_expression("asin_checked", x), + atan = eval_array_expression("atan", x), + cos = eval_array_expression("cos_checked", x), + sin = eval_array_expression("sin_checked", x), + tan = eval_array_expression("tan_checked", x), log = eval_array_expression("logb_checked", x, base), log10 = eval_array_expression("log10_checked", x), + log2 = eval_array_expression("log2_checked", x), + log1p = eval_array_expression("log1p_checked", x), round = eval_array_expression( "round", x, @@ -135,9 +135,12 @@ Math.ArrowDatum <- function(x, ..., base = exp(1), digits = 0) { ), sqrt = eval_array_expression("sqrt_checked", x), exp = eval_array_expression("power_checked", exp(1), x), + cumsum = eval_array_expression("cumulative_sum_checked", x), + cumprod = eval_array_expression("cumulative_prod_checked", x), + cummax = eval_array_expression("cumulative_max", x), + cummin = eval_array_expression("cumulative_min", x), signif = , expm1 = , - log1p = , cospi = , sinpi = , tanpi = , @@ -151,10 +154,6 @@ Math.ArrowDatum <- function(x, ..., base = exp(1), digits = 0) { gamma = , digamma = , trigamma = , - cumsum = eval_array_expression("cumulative_sum_checked", x), - cumprod = , - cummax = , - cummin = , stop(paste0("Unsupported operation on `", class(x)[1L], "` : "), .Generic, call. = FALSE) ) } diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R index c3232c6ff7230..08555cd9f3e6a 100644 --- a/r/R/dplyr-collect.R +++ b/r/R/dplyr-collect.R @@ -64,7 +64,7 @@ pull.Dataset <- function(.data, .data <- as_adq(.data) var <- vars_pull(names(.data), !!enquo(var)) .data$selected_columns <- set_names(.data$selected_columns[var], var) - out <- dplyr::compute(.data)[[1]] + out <- dplyr::compute(.data)[[var]] handle_pull_as_vector(out, as_vector) } pull.RecordBatchReader <- pull.arrow_dplyr_query <- pull.Dataset diff --git a/r/R/dplyr-mutate.R b/r/R/dplyr-mutate.R index fcb1cedbbb168..03659f5735708 100644 --- a/r/R/dplyr-mutate.R +++ b/r/R/dplyr-mutate.R @@ -77,12 +77,12 @@ mutate.arrow_dplyr_query <- function(.data, agg_query$aggregations <- mask$.aggregations agg_query <- collapse.arrow_dplyr_query(agg_query) if (length(grv)) { - out <- left_join(out, agg_query, by = grv) + out <- dplyr::left_join(out, agg_query, by = grv) } else { # If there are no group_by vars, add a scalar column to both and join on that agg_query$selected_columns[["..tempjoin"]] <- Expression$scalar(1L) out$selected_columns[["..tempjoin"]] <- Expression$scalar(1L) - out <- left_join(out, agg_query, by = "..tempjoin") + out <- dplyr::left_join(out, agg_query, by = "..tempjoin") } } diff --git a/r/R/parquet.R b/r/R/parquet.R index 0ee6c62601c1d..88ce1c77128f7 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -419,6 +419,7 @@ ParquetWriterProperties$create <- function(column_names, #' @section Methods: #' #' - `WriteTable` Write a [Table] to `sink` +#' - `WriteBatch` Write a [RecordBatch] to `sink` #' - `Close` Close the writer. Note: does not close the `sink`. #' [arrow::io::OutputStream][OutputStream] has its own `close()` method. #' @@ -428,8 +429,14 @@ ParquetFileWriter <- R6Class("ParquetFileWriter", inherit = ArrowObject, public = list( WriteTable = function(table, chunk_size) { + assert_is(table, "Table") parquet___arrow___FileWriter__WriteTable(self, table, chunk_size) }, + WriteBatch = function(batch, ...) { + assert_is(batch, "RecordBatch") + table <- Table$create(batch) + self$WriteTable(table, ...) + }, Close = function() parquet___arrow___FileWriter__Close(self) ) ) diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml index ceb68d773bdb4..10a233356b684 100644 --- a/r/_pkgdown.yml +++ b/r/_pkgdown.yml @@ -76,7 +76,7 @@ home: [C GLib](https://arrow.apache.org/docs/c_glib)
[C++](https://arrow.apache.org/docs/cpp)
[C#](https://github.com/apache/arrow/blob/main/csharp/README.md)
- [Go](https://pkg.go.dev/github.com/apache/arrow/go/v17)
+ [Go](https://pkg.go.dev/github.com/apache/arrow/go/v18)
[Java](https://arrow.apache.org/docs/java)
[JavaScript](https://arrow.apache.org/docs/js)
[Julia](https://github.com/apache/arrow-julia/blob/main/README.md)
diff --git a/r/man/ParquetFileWriter.Rd b/r/man/ParquetFileWriter.Rd index f36e85ab6c4ed..5779e574d46b1 100644 --- a/r/man/ParquetFileWriter.Rd +++ b/r/man/ParquetFileWriter.Rd @@ -24,6 +24,7 @@ takes the following arguments: \itemize{ \item \code{WriteTable} Write a \link{Table} to \code{sink} +\item \code{WriteBatch} Write a \link{RecordBatch} to \code{sink} \item \code{Close} Close the writer. Note: does not close the \code{sink}. \link[=OutputStream]{arrow::io::OutputStream} has its own \code{close()} method. } diff --git a/r/pkgdown/assets/versions.json b/r/pkgdown/assets/versions.json index 43f0b3fac62a1..e8e26f22b05cd 100644 --- a/r/pkgdown/assets/versions.json +++ b/r/pkgdown/assets/versions.json @@ -1,12 +1,16 @@ [ { - "name": "16.1.0.9000 (dev)", + "name": "17.0.0.9000 (dev)", "version": "dev/" }, { - "name": "16.1.0 (release)", + "name": "17.0.0 (release)", "version": "" }, + { + "name": "16.1.0", + "version": "16.1/" + }, { "name": "15.0.2", "version": "15.0/" diff --git a/r/src/arrow_cpp11.h b/r/src/arrow_cpp11.h index ab60586628164..b2ed66b83c3d1 100644 --- a/r/src/arrow_cpp11.h +++ b/r/src/arrow_cpp11.h @@ -39,14 +39,6 @@ #define ARROW_R_DCHECK(EXPR) #endif -// borrowed from enc package -// because R does not make these macros available (i.e. from Defn.h) -#define UTF8_MASK (1 << 3) -#define ASCII_MASK (1 << 6) - -#define IS_ASCII(x) (LEVELS(x) & ASCII_MASK) -#define IS_UTF8(x) (LEVELS(x) & UTF8_MASK) - // For context, see: // https://github.com/r-devel/r-svn/blob/6418faeb6f5d87d3d9b92b8978773bc3856b4b6f/src/main/altrep.c#L37 #define ALTREP_CLASS_SERIALIZED_CLASS(x) ATTRIB(x) @@ -133,19 +125,11 @@ class complexs { // functions that need to be called from an unwind_protect() namespace unsafe { -inline const char* utf8_string(SEXP s) { - if (!IS_UTF8(s) && !IS_ASCII(s)) { - return Rf_translateCharUTF8(s); - } else { - return CHAR(s); - } -} +inline const char* utf8_string(SEXP s) { return Rf_translateCharUTF8(s); } inline R_xlen_t r_string_size(SEXP s) { if (s == NA_STRING) { return 0; - } else if (IS_ASCII(s) || IS_UTF8(s)) { - return XLENGTH(s); } else { return strlen(Rf_translateCharUTF8(s)); } @@ -164,7 +148,7 @@ inline SEXP utf8_strings(SEXP x) { for (R_xlen_t i = 0; i < n; i++, ++p_x) { SEXP s = *p_x; - if (s != NA_STRING && !IS_UTF8(s) && !IS_ASCII(s)) { + if (s != NA_STRING) { SET_STRING_ELT(x, i, Rf_mkCharCE(Rf_translateCharUTF8(s), CE_UTF8)); } } @@ -394,9 +378,17 @@ SEXP to_r6(const std::shared_ptr& ptr, const char* r6_class_name) { cpp11::external_pointer> xp(new std::shared_ptr(ptr)); SEXP r6_class = Rf_install(r6_class_name); +// R_existsVarInFrame doesn't exist before R 4.2, so we need to fall back to +// Rf_findVarInFrame3 if it is not defined. +#ifdef R_existsVarInFrame + if (!R_existsVarInFrame(arrow::r::ns::arrow, r6_class)) { + cpp11::stop("No arrow R6 class named '%s'", r6_class_name); + } +#else if (Rf_findVarInFrame3(arrow::r::ns::arrow, r6_class, FALSE) == R_UnboundValue) { cpp11::stop("No arrow R6 class named '%s'", r6_class_name); } +#endif // make call: $new() SEXP call = PROTECT(Rf_lang3(R_DollarSymbol, r6_class, arrow::r::symbols::new_)); diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index a81210f0ad914..d2db11e14a787 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -1050,7 +1050,6 @@ class RDictionaryConverter> template struct RConverterTrait; -#if ARROW_VERSION_MAJOR >= 15 template struct RConverterTrait< T, enable_if_t::value && !is_interval_type::value && @@ -1062,14 +1061,6 @@ template struct RConverterTrait> { // not implemented }; -#else -template -struct RConverterTrait< - T, enable_if_t::value && !is_interval_type::value && - !is_extension_type::value>> { - using type = RPrimitiveConverter; -}; -#endif template struct RConverterTrait> { diff --git a/r/tests/testthat/test-compute-arith.R b/r/tests/testthat/test-compute-arith.R index 5cffafe41e668..bbdcb10a6b1c2 100644 --- a/r/tests/testthat/test-compute-arith.R +++ b/r/tests/testthat/test-compute-arith.R @@ -162,6 +162,8 @@ test_that("Math group generics work on Array objects", { Array$create(log(c(0.6, 2.1), base = 2)) ) expect_equal(log10(Array$create(c(0.6, 2.1))), Array$create(log10(c(0.6, 2.1)))) + expect_equal(log2(Array$create(c(0.6, 2.1))), Array$create(log2(c(0.6, 2.1)))) + expect_equal(log1p(Array$create(c(0.6, 2.1, 0))), Array$create(log1p(c(0.6, 2.1, 0)))) expect_equal(round(Array$create(c(0.6, 2.1))), Array$create(c(1, 2))) expect_equal( @@ -175,6 +177,7 @@ test_that("Math group generics work on Array objects", { round(exp(Array$create(c(2L, 1L))), digits = 10), Array$create(round(exp(c(2L, 1L)), 10)) ) + expect_as_vector( cumsum(Array$create(c(2.3, -1.0, 7.9, NA_real_, 1.0))), c(2.3, 1.3, 9.2, NA_real_, NA_real_) @@ -186,8 +189,56 @@ test_that("Math group generics work on Array objects", { c(2L, 9L, 17L, 16L, 18L, 35L, NA_integer_, NA_integer_, NA_integer_) ) - expect_error( - cumprod(Array$create(c(4L, 1L))), - "Unsupported operation on `Array`" + expect_as_vector( + cumprod(Array$create(c(2.3, -1.0, 7.9, NA_real_, 1.0))), + c(2.3, -2.3, -18.17, NA_real_, NA_real_) + ) + expect_equal(cumprod(Array$create(-10L)), Array$create(-10L)) + expect_equal(cumprod(Array$create(NA_integer_)), Array$create(NA_integer_)) + expect_as_vector( + cumprod(ChunkedArray$create(c(2L, 7L, 8L), c(-1L, 2L, 17L, NA_integer_, 3L), 18L)), + c(2L, 14L, 112L, -112L, -224L, -3808L, NA_integer_, NA_integer_, NA_integer_) + ) + + expect_as_vector( + cummax(Array$create(c(2.3, -1.0, 7.9, NA_real_, 1.0))), + c(2.3, 2.3, 7.9, NA_real_, NA_real_) ) + expect_equal(cummax(Array$create(-10L)), Array$create(-10L)) + expect_equal(cummax(Array$create(NA_integer_)), Array$create(NA_integer_)) + expect_as_vector( + cummax(ChunkedArray$create(c(2L, 7L, 8L), c(-1L, 2L, 17L, NA_integer_, 3L), 18L)), + c(2L, 7L, 8L, 8L, 8L, 17L, NA_integer_, NA_integer_, NA_integer_) + ) + + expect_as_vector( + cummin(Array$create(c(2.3, -1.0, 7.9, NA_real_, 1.0))), + c(2.3, -1, -1, NA_real_, NA_real_) + ) + expect_equal(cummin(Array$create(-10L)), Array$create(-10L)) + expect_equal(cummin(Array$create(NA_integer_)), Array$create(NA_integer_)) + expect_as_vector( + cummin(ChunkedArray$create(c(2L, 7L, 8L), c(-1L, 2L, 17L, NA_integer_, 3L), 18L)), + c(2L, 2L, 2L, -1L, -1L, -1L, NA_integer_, NA_integer_, NA_integer_) + ) + + expect_error(signif(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") + expect_error(expm1(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") + + expect_error(cospi(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") + expect_error(sinpi(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") + expect_error(tanpi(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") + + expect_error(cosh(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") + expect_error(sinh(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") + expect_error(tanh(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") + + expect_error(acosh(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") + expect_error(asinh(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") + expect_error(atanh(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") + + expect_error(lgamma(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") + expect_error(gamma(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") + expect_error(digamma(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") + expect_error(trigamma(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") }) diff --git a/r/tests/testthat/test-dplyr-query.R b/r/tests/testthat/test-dplyr-query.R index bab81a463e9ee..7c75a84234bfc 100644 --- a/r/tests/testthat/test-dplyr-query.R +++ b/r/tests/testthat/test-dplyr-query.R @@ -87,6 +87,7 @@ test_that("pull", { .input %>% filter(int > 4) %>% rename(strng = chr) %>% + group_by(dbl) %>% pull(strng) %>% as.vector(), tbl diff --git a/r/tests/testthat/test-parquet.R b/r/tests/testthat/test-parquet.R index f2359116fdaf1..cc57022600f8d 100644 --- a/r/tests/testthat/test-parquet.R +++ b/r/tests/testthat/test-parquet.R @@ -530,3 +530,31 @@ test_that("thrift string and container size can be specified when reading Parque data <- reader_container$ReadTable() expect_identical(collect.ArrowTabular(data), example_data) }) + +test_that("We can use WriteBatch on ParquetFileWriter", { + tf <- tempfile() + on.exit(unlink(tf)) + sink <- FileOutputStream$create(tf) + sch <- schema(a = int32()) + props <- ParquetWriterProperties$create(column_names = names(sch)) + writer <- ParquetFileWriter$create(schema = sch, sink = sink, properties = props) + + batch <- RecordBatch$create(data.frame(a = 1:10)) + writer$WriteBatch(batch, chunk_size = 10) + writer$WriteBatch(batch, chunk_size = 10) + writer$WriteBatch(batch, chunk_size = 10) + writer$Close() + + tbl <- read_parquet(tf) + expect_equal(nrow(tbl), 30) +}) + +test_that("WriteBatch on ParquetFileWriter errors when called on closed sink", { + sink <- FileOutputStream$create(tempfile()) + sch <- schema(a = int32()) + props <- ParquetWriterProperties$create(column_names = names(sch)) + writer <- ParquetFileWriter$create(schema = sch, sink = sink, properties = props) + writer$Close() + batch <- RecordBatch$create(data.frame(a = 1:10)) + expect_error(writer$WriteBatch(batch, chunk_size = 10), "Operation on closed file") +}) diff --git a/r/tools/check-versions.R b/r/tools/check-versions.R index 34b2ef680c547..ea7fe93c52471 100644 --- a/r/tools/check-versions.R +++ b/r/tools/check-versions.R @@ -24,10 +24,10 @@ release_version_supported <- function(r_version, cpp_version) { r_version <- package_version(r_version) cpp_version <- package_version(cpp_version) major <- function(x) as.numeric(x[1, 1]) - minimum_cpp_version <- package_version("13.0.0") + minimum_cpp_version <- package_version("15.0.0") allow_mismatch <- identical(tolower(Sys.getenv("ARROW_R_ALLOW_CPP_VERSION_MISMATCH", "false")), "true") - # If we allow a version mismatch we still need to cover the minimum version (13.0.0 for now) + # If we allow a version mismatch we still need to cover the minimum version (15.0.0 for now) # we don't allow newer C++ versions as new features without additional feature gates are likely to # break the R package version_valid <- cpp_version >= minimum_cpp_version && major(cpp_version) <= major(r_version) diff --git a/r/tools/test-check-versions.R b/r/tools/test-check-versions.R index f558648bed1e3..14c0bee3fd88a 100644 --- a/r/tools/test-check-versions.R +++ b/r/tools/test-check-versions.R @@ -61,16 +61,24 @@ test_that("check_versions without mismatch", { test_that("check_versions with mismatch", { withr::local_envvar(.new = c(ARROW_R_ALLOW_CPP_VERSION_MISMATCH = "false")) + expect_true( + release_version_supported("15.0.0", "15.0.0") + ) + expect_false( release_version_supported("15.0.0", "13.0.0") ) withr::local_envvar(.new = c(ARROW_R_ALLOW_CPP_VERSION_MISMATCH = "true")) - expect_true( + expect_false( release_version_supported("15.0.0", "13.0.0") ) + expect_true( + release_version_supported("16.0.0", "15.0.0") + ) + expect_false( release_version_supported("15.0.0", "16.0.0") ) diff --git a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb index daf40583ee7d3..1fbbe88bcc343 100644 --- a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb +++ b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowCUDA - VERSION = "17.0.0-SNAPSHOT" + VERSION = "18.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb index 5091384212023..a9eae43d59b9f 100644 --- a/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb +++ b/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowDataset - VERSION = "17.0.0-SNAPSHOT" + VERSION = "18.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb b/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb index 042029b2c40cf..f24688e9cb0f8 100644 --- a/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb +++ b/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowFlightSQL - VERSION = "17.0.0-SNAPSHOT" + VERSION = "18.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow-flight/lib/arrow-flight/version.rb b/ruby/red-arrow-flight/lib/arrow-flight/version.rb index 8245a0f12e681..263f35986616b 100644 --- a/ruby/red-arrow-flight/lib/arrow-flight/version.rb +++ b/ruby/red-arrow-flight/lib/arrow-flight/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowFlight - VERSION = "17.0.0-SNAPSHOT" + VERSION = "18.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow/lib/arrow/version.rb b/ruby/red-arrow/lib/arrow/version.rb index 1d41ae77dda04..42708a65026ee 100644 --- a/ruby/red-arrow/lib/arrow/version.rb +++ b/ruby/red-arrow/lib/arrow/version.rb @@ -16,7 +16,7 @@ # under the License. module Arrow - VERSION = "17.0.0-SNAPSHOT" + VERSION = "18.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-gandiva/lib/gandiva/version.rb b/ruby/red-gandiva/lib/gandiva/version.rb index 3d9b4d9d87fa6..4a9f27a7ef649 100644 --- a/ruby/red-gandiva/lib/gandiva/version.rb +++ b/ruby/red-gandiva/lib/gandiva/version.rb @@ -16,7 +16,7 @@ # under the License. module Gandiva - VERSION = "17.0.0-SNAPSHOT" + VERSION = "18.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-parquet/lib/parquet/version.rb b/ruby/red-parquet/lib/parquet/version.rb index 6d92829c23b31..ba66b2cad16f3 100644 --- a/ruby/red-parquet/lib/parquet/version.rb +++ b/ruby/red-parquet/lib/parquet/version.rb @@ -16,7 +16,7 @@ # under the License. module Parquet - VERSION = "17.0.0-SNAPSHOT" + VERSION = "18.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-") diff --git a/swift/Arrow/Sources/Arrow/ArrowArray.swift b/swift/Arrow/Sources/Arrow/ArrowArray.swift index 32b6ba1704511..4fc1b8b9fc71c 100644 --- a/swift/Arrow/Sources/Arrow/ArrowArray.swift +++ b/swift/Arrow/Sources/Arrow/ArrowArray.swift @@ -21,7 +21,7 @@ public protocol ArrowArrayHolder { var type: ArrowType {get} var length: UInt {get} var nullCount: UInt {get} - var array: Any {get} + var array: AnyArray {get} var data: ArrowData {get} var getBufferData: () -> [Data] {get} var getBufferDataSizes: () -> [Int] {get} @@ -29,11 +29,11 @@ public protocol ArrowArrayHolder { } public class ArrowArrayHolderImpl: ArrowArrayHolder { - public let array: Any public let data: ArrowData public let type: ArrowType public let length: UInt public let nullCount: UInt + public let array: AnyArray public let getBufferData: () -> [Data] public let getBufferDataSizes: () -> [Int] public let getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> ArrowColumn @@ -73,6 +73,50 @@ public class ArrowArrayHolderImpl: ArrowArrayHolder { return ArrowColumn(field, chunked: ChunkedArrayHolder(try ChunkedArray(arrays))) } } + + public static func loadArray( // swiftlint:disable:this cyclomatic_complexity + _ arrowType: ArrowType, with: ArrowData) throws -> ArrowArrayHolder { + switch arrowType.id { + case .int8: + return try ArrowArrayHolderImpl(FixedArray(with)) + case .int16: + return try ArrowArrayHolderImpl(FixedArray(with)) + case .int32: + return try ArrowArrayHolderImpl(FixedArray(with)) + case .int64: + return try ArrowArrayHolderImpl(FixedArray(with)) + case .uint8: + return try ArrowArrayHolderImpl(FixedArray(with)) + case .uint16: + return try ArrowArrayHolderImpl(FixedArray(with)) + case .uint32: + return try ArrowArrayHolderImpl(FixedArray(with)) + case .uint64: + return try ArrowArrayHolderImpl(FixedArray(with)) + case .double: + return try ArrowArrayHolderImpl(FixedArray(with)) + case .float: + return try ArrowArrayHolderImpl(FixedArray(with)) + case .date32: + return try ArrowArrayHolderImpl(Date32Array(with)) + case .date64: + return try ArrowArrayHolderImpl(Date64Array(with)) + case .time32: + return try ArrowArrayHolderImpl(Time32Array(with)) + case .time64: + return try ArrowArrayHolderImpl(Time64Array(with)) + case .string: + return try ArrowArrayHolderImpl(StringArray(with)) + case .boolean: + return try ArrowArrayHolderImpl(BoolArray(with)) + case .binary: + return try ArrowArrayHolderImpl(BinaryArray(with)) + case .strct: + return try ArrowArrayHolderImpl(StructArray(with)) + default: + throw ArrowError.invalid("Array not found for type: \(arrowType)") + } + } } public class ArrowArray: AsString, AnyArray { @@ -81,7 +125,7 @@ public class ArrowArray: AsString, AnyArray { public var nullCount: UInt {return self.arrowData.nullCount} public var length: UInt {return self.arrowData.length} - public required init(_ arrowData: ArrowData) { + public required init(_ arrowData: ArrowData) throws { self.arrowData = arrowData } @@ -221,10 +265,7 @@ public class BinaryArray: ArrowArray { } public override func asString(_ index: UInt) -> String { - if self[index] == nil { - return "" - } - + if self[index] == nil {return ""} let data = self[index]! if options.printAsHex { return data.hexEncodedString() @@ -233,3 +274,54 @@ public class BinaryArray: ArrowArray { } } } + +public class StructArray: ArrowArray<[Any?]> { + public private(set) var arrowFields: [ArrowArrayHolder]? + public required init(_ arrowData: ArrowData) throws { + try super.init(arrowData) + var fields = [ArrowArrayHolder]() + for child in arrowData.children { + fields.append(try ArrowArrayHolderImpl.loadArray(child.type, with: child)) + } + + self.arrowFields = fields + } + + public override subscript(_ index: UInt) -> [Any?]? { + if self.arrowData.isNull(index) { + return nil + } + + if let fields = arrowFields { + var result = [Any?]() + for field in fields { + result.append(field.array.asAny(index)) + } + + return result + } + + return nil + } + + public override func asString(_ index: UInt) -> String { + if self.arrowData.isNull(index) { + return "" + } + + var output = "{" + if let fields = arrowFields { + for fieldIndex in 0..> public func finish() throws -> ArrowArray { let buffers = self.bufferBuilder.finish() let arrowData = try ArrowData(self.type, buffers: buffers, nullCount: self.nullCount) - return U(arrowData) + let array = try U(arrowData) + return array } public func getStride() -> Int { @@ -118,6 +119,55 @@ public class Time64ArrayBuilder: ArrowArrayBuilder, T } } +public class StructArrayBuilder: ArrowArrayBuilder { + let builders: [any ArrowArrayHolderBuilder] + let fields: [ArrowField] + public init(_ fields: [ArrowField], builders: [any ArrowArrayHolderBuilder]) throws { + self.fields = fields + self.builders = builders + try super.init(ArrowNestedType(ArrowType.ArrowStruct, fields: fields)) + self.bufferBuilder.initializeTypeInfo(fields) + } + + public init(_ fields: [ArrowField]) throws { + self.fields = fields + var builders = [any ArrowArrayHolderBuilder]() + for field in fields { + builders.append(try ArrowArrayBuilders.loadBuilder(arrowType: field.type)) + } + + self.builders = builders + try super.init(ArrowNestedType(ArrowType.ArrowStruct, fields: fields)) + } + + public override func append(_ values: [Any?]?) { + self.bufferBuilder.append(values) + if let anyValues = values { + for index in 0.. StructArray { + let buffers = self.bufferBuilder.finish() + var childData = [ArrowData]() + for builder in self.builders { + childData.append(try builder.toHolder().array.arrowData) + } + + let arrowData = try ArrowData(self.type, buffers: buffers, + children: childData, nullCount: self.nullCount, + length: self.length) + let structArray = try StructArray(arrowData) + return structArray + } +} + public class ArrowArrayBuilders { public static func loadBuilder( // swiftlint:disable:this cyclomatic_complexity _ builderType: Any.Type) throws -> ArrowArrayHolderBuilder { @@ -168,6 +218,72 @@ public class ArrowArrayBuilders { type == Float.self || type == Date.self } + public static func loadStructArrayBuilderForType(_ obj: T) throws -> StructArrayBuilder { + let mirror = Mirror(reflecting: obj) + var builders = [ArrowArrayHolderBuilder]() + var fields = [ArrowField]() + for (property, value) in mirror.children { + guard let propertyName = property else { + continue + } + + let builderType = type(of: value) + let arrowType = ArrowType(ArrowType.infoForType(builderType)) + fields.append(ArrowField(propertyName, type: arrowType, isNullable: true)) + builders.append(try loadBuilder(arrowType: arrowType)) + } + + return try StructArrayBuilder(fields, builders: builders) + } + + public static func loadBuilder( // swiftlint:disable:this cyclomatic_complexity + arrowType: ArrowType) throws -> ArrowArrayHolderBuilder { + switch arrowType.id { + case .uint8: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .uint16: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .uint32: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .uint64: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .int8: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .int16: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .int32: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .int64: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .double: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .float: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .string: + return try StringArrayBuilder() + case .boolean: + return try BoolArrayBuilder() + case .binary: + return try BinaryArrayBuilder() + case .date32: + return try Date32ArrayBuilder() + case .date64: + return try Date64ArrayBuilder() + case .time32: + guard let timeType = arrowType as? ArrowTypeTime32 else { + throw ArrowError.invalid("Expected arrow type for \(arrowType.id) not found") + } + return try Time32ArrayBuilder(timeType.unit) + case .time64: + guard let timeType = arrowType as? ArrowTypeTime64 else { + throw ArrowError.invalid("Expected arrow type for \(arrowType.id) not found") + } + return try Time64ArrayBuilder(timeType.unit) + default: + throw ArrowError.unknownType("Builder not found for arrow type: \(arrowType.id)") + } + } + public static func loadNumberArrayBuilder() throws -> NumberArrayBuilder { let type = T.self if type == Int8.self { diff --git a/swift/Arrow/Sources/Arrow/ArrowBufferBuilder.swift b/swift/Arrow/Sources/Arrow/ArrowBufferBuilder.swift index e4c8036c327d1..47f9c40354b1b 100644 --- a/swift/Arrow/Sources/Arrow/ArrowBufferBuilder.swift +++ b/swift/Arrow/Sources/Arrow/ArrowBufferBuilder.swift @@ -30,18 +30,14 @@ public protocol ArrowBufferBuilder { func finish() -> [ArrowBuffer] } -public class BaseBufferBuilder { - var values: ArrowBuffer +public class BaseBufferBuilder { var nulls: ArrowBuffer - var stride: Int public var offset: UInt = 0 - public var capacity: UInt {return self.values.capacity} + public var capacity: UInt {return self.nulls.capacity} public var length: UInt = 0 public var nullCount: UInt = 0 - init(values: ArrowBuffer, nulls: ArrowBuffer, stride: Int = MemoryLayout.stride) { - self.stride = stride - self.values = values + init(_ nulls: ArrowBuffer) { self.nulls = nulls } @@ -61,7 +57,19 @@ public class BaseBufferBuilder { } } -public class FixedBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { +public class ValuesBufferBuilder: BaseBufferBuilder { + var values: ArrowBuffer + var stride: Int + public override var capacity: UInt {return self.values.capacity} + + init(values: ArrowBuffer, nulls: ArrowBuffer, stride: Int = MemoryLayout.stride) { + self.stride = stride + self.values = values + super.init(nulls) + } +} + +public class FixedBufferBuilder: ValuesBufferBuilder, ArrowBufferBuilder { public typealias ItemType = T private let defaultVal: ItemType public required init() throws { @@ -138,7 +146,7 @@ public class FixedBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { } } -public class BoolBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { +public class BoolBufferBuilder: ValuesBufferBuilder, ArrowBufferBuilder { public typealias ItemType = Bool public required init() throws { let values = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout.stride)) @@ -190,7 +198,7 @@ public class BoolBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { } } -public class VariableBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { +public class VariableBufferBuilder: ValuesBufferBuilder, ArrowBufferBuilder { public typealias ItemType = T var offsets: ArrowBuffer let binaryStride = MemoryLayout.stride @@ -327,3 +335,47 @@ public class Date64BufferBuilder: AbstractWrapperBufferBuilder { } } } + +public final class StructBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { + public typealias ItemType = [Any?] + var info: ArrowNestedType? + public init() throws { + let nulls = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout.stride)) + super.init(nulls) + } + + public func initializeTypeInfo(_ fields: [ArrowField]) { + info = ArrowNestedType(ArrowType.ArrowStruct, fields: fields) + } + + public func append(_ newValue: [Any?]?) { + let index = UInt(self.length) + self.length += 1 + if length > self.nulls.length { + self.resize(length) + } + + if newValue != nil { + BitUtility.setBit(index + self.offset, buffer: self.nulls) + } else { + self.nullCount += 1 + BitUtility.clearBit(index + self.offset, buffer: self.nulls) + } + } + + public func resize(_ length: UInt) { + if length > self.nulls.length { + let resizeLength = resizeLength(self.nulls) + var nulls = ArrowBuffer.createBuffer(resizeLength/8 + 1, size: UInt(MemoryLayout.size)) + ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: self.nulls.capacity) + self.nulls = nulls + } + } + + public func finish() -> [ArrowBuffer] { + let length = self.length + var nulls = ArrowBuffer.createBuffer(length/8 + 1, size: UInt(MemoryLayout.size)) + ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: nulls.capacity) + return [nulls] + } +} diff --git a/swift/Arrow/Sources/Arrow/ArrowCImporter.swift b/swift/Arrow/Sources/Arrow/ArrowCImporter.swift index 8a4cf47fc0b43..f55077ef3dc95 100644 --- a/swift/Arrow/Sources/Arrow/ArrowCImporter.swift +++ b/swift/Arrow/Sources/Arrow/ArrowCImporter.swift @@ -23,7 +23,7 @@ public class ImportArrayHolder: ArrowArrayHolder { public var type: ArrowType {self.holder.type} public var length: UInt {self.holder.length} public var nullCount: UInt {self.holder.nullCount} - public var array: Any {self.holder.array} + public var array: AnyArray {self.holder.array} public var data: ArrowData {self.holder.data} public var getBufferData: () -> [Data] {self.holder.getBufferData} public var getBufferDataSizes: () -> [Int] {self.holder.getBufferDataSizes} diff --git a/swift/Arrow/Sources/Arrow/ArrowData.swift b/swift/Arrow/Sources/Arrow/ArrowData.swift index 5e23e60c164b5..2728b9fc8b6fe 100644 --- a/swift/Arrow/Sources/Arrow/ArrowData.swift +++ b/swift/Arrow/Sources/Arrow/ArrowData.swift @@ -20,11 +20,18 @@ import Foundation public class ArrowData { public let type: ArrowType public let buffers: [ArrowBuffer] + public let children: [ArrowData] public let nullCount: UInt public let length: UInt public let stride: Int - init(_ arrowType: ArrowType, buffers: [ArrowBuffer], nullCount: UInt) throws { + convenience init(_ arrowType: ArrowType, buffers: [ArrowBuffer], nullCount: UInt) throws { + try self.init(arrowType, buffers: buffers, + children: [ArrowData](), nullCount: nullCount, + length: buffers[1].length) + } + + init(_ arrowType: ArrowType, buffers: [ArrowBuffer], children: [ArrowData], nullCount: UInt, length: UInt) throws { let infoType = arrowType.info switch infoType { case let .primitiveInfo(typeId): @@ -47,8 +54,9 @@ public class ArrowData { self.type = arrowType self.buffers = buffers + self.children = children self.nullCount = nullCount - self.length = buffers[1].length + self.length = length self.stride = arrowType.getStride() } diff --git a/swift/Arrow/Sources/Arrow/ArrowDecoder.swift b/swift/Arrow/Sources/Arrow/ArrowDecoder.swift index 7e684f360ac51..35dd4dcd1e899 100644 --- a/swift/Arrow/Sources/Arrow/ArrowDecoder.swift +++ b/swift/Arrow/Sources/Arrow/ArrowDecoder.swift @@ -96,11 +96,7 @@ public class ArrowDecoder: Decoder { throw ArrowError.invalid("Column for key \"\(name)\" not found") } - guard let anyArray = col.array as? AnyArray else { - throw ArrowError.invalid("Unable to convert array to AnyArray") - } - - return anyArray + return col.array } func getCol(_ index: Int) throws -> AnyArray { @@ -108,11 +104,7 @@ public class ArrowDecoder: Decoder { throw ArrowError.outOfBounds(index: Int64(index)) } - guard let anyArray = self.columns[index].array as? AnyArray else { - throw ArrowError.invalid("Unable to convert array to AnyArray") - } - - return anyArray + return self.columns[index].array } func doDecode(_ key: CodingKey) throws -> T? { diff --git a/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift b/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift index c701653ecb2c9..22c0672b27eac 100644 --- a/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift +++ b/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift @@ -23,7 +23,7 @@ private func makeBinaryHolder(_ buffers: [ArrowBuffer], do { let arrowType = ArrowType(ArrowType.ArrowBinary) let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolderImpl(BinaryArray(arrowData))) + return .success(ArrowArrayHolderImpl(try BinaryArray(arrowData))) } catch let error as ArrowError { return .failure(error) } catch { @@ -36,7 +36,7 @@ private func makeStringHolder(_ buffers: [ArrowBuffer], do { let arrowType = ArrowType(ArrowType.ArrowString) let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolderImpl(StringArray(arrowData))) + return .success(ArrowArrayHolderImpl(try StringArray(arrowData))) } catch let error as ArrowError { return .failure(error) } catch { @@ -51,11 +51,11 @@ private func makeDateHolder(_ field: ArrowField, do { if field.type.id == .date32 { let arrowData = try ArrowData(field.type, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolderImpl(Date32Array(arrowData))) + return .success(ArrowArrayHolderImpl(try Date32Array(arrowData))) } let arrowData = try ArrowData(field.type, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolderImpl(Date64Array(arrowData))) + return .success(ArrowArrayHolderImpl(try Date64Array(arrowData))) } catch let error as ArrowError { return .failure(error) } catch { @@ -71,7 +71,7 @@ private func makeTimeHolder(_ field: ArrowField, if field.type.id == .time32 { if let arrowType = field.type as? ArrowTypeTime32 { let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolderImpl(FixedArray(arrowData))) + return .success(ArrowArrayHolderImpl(try FixedArray(arrowData))) } else { return .failure(.invalid("Incorrect field type for time: \(field.type)")) } @@ -79,7 +79,7 @@ private func makeTimeHolder(_ field: ArrowField, if let arrowType = field.type as? ArrowTypeTime64 { let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolderImpl(FixedArray(arrowData))) + return .success(ArrowArrayHolderImpl(try FixedArray(arrowData))) } else { return .failure(.invalid("Incorrect field type for time: \(field.type)")) } @@ -95,7 +95,7 @@ private func makeBoolHolder(_ buffers: [ArrowBuffer], do { let arrowType = ArrowType(ArrowType.ArrowBool) let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolderImpl(BoolArray(arrowData))) + return .success(ArrowArrayHolderImpl(try BoolArray(arrowData))) } catch let error as ArrowError { return .failure(error) } catch { @@ -109,7 +109,7 @@ private func makeFixedHolder( ) -> Result { do { let arrowData = try ArrowData(field.type, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolderImpl(FixedArray(arrowData))) + return .success(ArrowArrayHolderImpl(try FixedArray(arrowData))) } catch let error as ArrowError { return .failure(error) } catch { diff --git a/swift/Arrow/Sources/Arrow/ArrowTable.swift b/swift/Arrow/Sources/Arrow/ArrowTable.swift index b9d15154c4f94..dedf90f791cce 100644 --- a/swift/Arrow/Sources/Arrow/ArrowTable.swift +++ b/swift/Arrow/Sources/Arrow/ArrowTable.swift @@ -185,7 +185,7 @@ public class RecordBatch { public func anyData(for columnIndex: Int) -> AnyArray { let arrayHolder = column(columnIndex) - return (arrayHolder.array as! AnyArray) // swiftlint:disable:this force_cast + return arrayHolder.array } public func column(_ index: Int) -> ArrowArrayHolder { diff --git a/swift/Arrow/Sources/Arrow/ChunkedArray.swift b/swift/Arrow/Sources/Arrow/ChunkedArray.swift index c5ccfe4aec0e6..fb5734f64b6ba 100644 --- a/swift/Arrow/Sources/Arrow/ChunkedArray.swift +++ b/swift/Arrow/Sources/Arrow/ChunkedArray.swift @@ -18,6 +18,7 @@ import Foundation public protocol AnyArray { + var arrowData: ArrowData {get} func asAny(_ index: UInt) -> Any? var length: UInt {get} } diff --git a/swift/Arrow/Tests/ArrowTests/ArrayTests.swift b/swift/Arrow/Tests/ArrowTests/ArrayTests.swift index ed0cb1148e871..bfd7492064352 100644 --- a/swift/Arrow/Tests/ArrowTests/ArrayTests.swift +++ b/swift/Arrow/Tests/ArrowTests/ArrayTests.swift @@ -212,6 +212,67 @@ final class ArrayTests: XCTestCase { // swiftlint:disable:this type_body_length XCTAssertEqual(microArray[2], 987654321) } + func testStructArray() throws { // swiftlint:disable:this function_body_length + class StructTest { + var fieldBool: Bool = false + var fieldInt8: Int8 = 0 + var fieldInt16: Int16 = 0 + var fieldInt32: Int32 = 0 + var fieldInt64: Int64 = 0 + var fieldUInt8: UInt8 = 0 + var fieldUInt16: UInt16 = 0 + var fieldUInt32: UInt32 = 0 + var fieldUInt64: UInt64 = 0 + var fieldDouble: Double = 0 + var fieldFloat: Float = 0 + var fieldString: String = "" + var fieldData = Data() + var fieldDate: Date = Date.now + } + + enum STIndex: Int { + case bool, int8, int16, int32, int64 + case uint8, uint16, uint32, uint64, double + case float, string, data, date + } + + let testData = StructTest() + let dateNow = Date.now + let structBuilder = try ArrowArrayBuilders.loadStructArrayBuilderForType(testData) + structBuilder.append([true, Int8(1), Int16(2), Int32(3), Int64(4), + UInt8(5), UInt16(6), UInt32(7), UInt64(8), Double(9.9), + Float(10.10), "11", Data("12".utf8), dateNow]) + structBuilder.append(nil) + structBuilder.append([true, Int8(13), Int16(14), Int32(15), Int64(16), + UInt8(17), UInt16(18), UInt32(19), UInt64(20), Double(21.21), + Float(22.22), "23", Data("24".utf8), dateNow]) + XCTAssertEqual(structBuilder.length, 3) + let structArray = try structBuilder.finish() + XCTAssertEqual(structArray.length, 3) + XCTAssertNil(structArray[1]) + XCTAssertEqual(structArray.arrowFields![0].length, 3) + XCTAssertNil(structArray.arrowFields![0].array.asAny(1)) + XCTAssertEqual(structArray[0]![STIndex.bool.rawValue] as? Bool, true) + XCTAssertEqual(structArray[0]![STIndex.int8.rawValue] as? Int8, 1) + XCTAssertEqual(structArray[0]![STIndex.int16.rawValue] as? Int16, 2) + XCTAssertEqual(structArray[0]![STIndex.int32.rawValue] as? Int32, 3) + XCTAssertEqual(structArray[0]![STIndex.int64.rawValue] as? Int64, 4) + XCTAssertEqual(structArray[0]![STIndex.uint8.rawValue] as? UInt8, 5) + XCTAssertEqual(structArray[0]![STIndex.uint16.rawValue] as? UInt16, 6) + XCTAssertEqual(structArray[0]![STIndex.uint32.rawValue] as? UInt32, 7) + XCTAssertEqual(structArray[0]![STIndex.uint64.rawValue] as? UInt64, 8) + XCTAssertEqual(structArray[0]![STIndex.double.rawValue] as? Double, 9.9) + XCTAssertEqual(structArray[0]![STIndex.float.rawValue] as? Float, 10.10) + XCTAssertEqual(structArray[2]![STIndex.string.rawValue] as? String, "23") + XCTAssertEqual( + String(decoding: (structArray[0]![STIndex.data.rawValue] as? Data)!, as: UTF8.self), "12") + let dateFormatter = DateFormatter() + dateFormatter.timeStyle = .full + XCTAssertTrue( + dateFormatter.string(from: (structArray[0]![STIndex.date.rawValue] as? Date)!) == + dateFormatter.string(from: dateNow)) + } + func checkHolderForType(_ checkType: ArrowType) throws { let buffers = [ArrowBuffer(length: 0, capacity: 0, rawPointer: UnsafeMutableRawPointer.allocate(byteCount: 0, alignment: .zero)), diff --git a/swift/Arrow/Tests/ArrowTests/CodableTests.swift b/swift/Arrow/Tests/ArrowTests/CodableTests.swift index a0c4e111e4360..b8f389a5e0089 100644 --- a/swift/Arrow/Tests/ArrowTests/CodableTests.swift +++ b/swift/Arrow/Tests/ArrowTests/CodableTests.swift @@ -227,7 +227,7 @@ final class CodableTests: XCTestCase { // swiftlint:disable:this type_body_lengt } func getArrayValue(_ rb: RecordBatch, colIndex: Int, rowIndex: UInt) -> T? { - let anyArray = rb.columns[colIndex].array as! AnyArray // swiftlint:disable:this force_cast + let anyArray = rb.columns[colIndex].array return anyArray.asAny(UInt(rowIndex)) as? T } @@ -324,7 +324,7 @@ final class CodableTests: XCTestCase { // swiftlint:disable:this type_body_lengt XCTAssertEqual(rb.columns[0].type.id, ArrowTypeId.int32) for index in 0..<100 { if index == 10 { - let anyArray = rb.columns[0].array as! AnyArray // swiftlint:disable:this force_cast + let anyArray = rb.columns[0].array XCTAssertNil(anyArray.asAny(UInt(index))) } else { XCTAssertEqual(getArrayValue(rb, colIndex: 0, rowIndex: UInt(index)), Int32(index))