Skip to content

Commit

Permalink
Merge branch 'main' into more-comment-in-compute-row
Browse files Browse the repository at this point in the history
  • Loading branch information
mapleFU committed Sep 2, 2024
2 parents a71cdd2 + 4f91c8f commit 34ec45f
Show file tree
Hide file tree
Showing 64 changed files with 570 additions and 216 deletions.
4 changes: 2 additions & 2 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ CUDA=11.2.2
DASK=latest
DOTNET=8.0
GCC_VERSION=""
GO=1.21.8
STATICCHECK=v0.4.7
GO=1.22.6
STATICCHECK=v0.5.1
HDFS=3.2.1
JDK=11
KARTOTHEK=latest
Expand Down
8 changes: 5 additions & 3 deletions .github/workflows/cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -465,15 +465,17 @@ jobs:
chmod +x /usr/local/bin/minio.exe
- name: Set up Python
uses: actions/[email protected]
id: python-install
with:
python-version: 3.9
- name: Install Google Cloud Storage Testbench
shell: bash
shell: msys2 {0}
env:
PIPX_BIN_DIR: /usr/local/bin
PIPX_PYTHON: ${{ steps.python-install.outputs.python-path }}
run: |
ci/scripts/install_gcs_testbench.sh default
echo "PYTHON_BIN_DIR=$(cygpath --windows $(dirname $(which python3.exe)))" >> $GITHUB_ENV
- name: Test
shell: msys2 {0}
run: |
PATH="$(cygpath --unix ${PYTHON_BIN_DIR}):${PATH}"
ci/scripts/cpp_test.sh "$(pwd)" "$(pwd)/build"
22 changes: 11 additions & 11 deletions .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,13 @@ jobs:
{
"arch-label": "AMD64",
"arch": "amd64",
"go": "1.21",
"go": "1.22",
"runs-on": "ubuntu-latest"
},
{
"arch-label": "AMD64",
"arch": "amd64",
"go": "1.22",
"go": "1.23",
"runs-on": "ubuntu-latest"
}
JSON
Expand All @@ -78,13 +78,13 @@ jobs:
{
"arch-label": "ARM64",
"arch": "arm64v8",
"go": "1.21",
"go": "1.22",
"runs-on": ["self-hosted", "arm", "linux"]
},
{
"arch-label": "ARM64",
"arch": "arm64v8",
"go": "1.22",
"go": "1.23",
"runs-on": ["self-hosted", "arm", "linux"]
}
JSON
Expand Down Expand Up @@ -197,7 +197,7 @@ jobs:
strategy:
fail-fast: false
matrix:
go: ['1.21', '1.22']
go: ['1.22', '1.23']
env:
GO: ${{ matrix.go }}
steps:
Expand Down Expand Up @@ -238,7 +238,7 @@ jobs:
strategy:
fail-fast: false
matrix:
go: ['1.21', '1.22']
go: ['1.22', '1.23']
env:
GO: ${{ matrix.go }}
steps:
Expand Down Expand Up @@ -277,7 +277,7 @@ jobs:
strategy:
fail-fast: false
matrix:
go: ['1.21', '1.22']
go: ['1.22', '1.23']
steps:
- name: Checkout Arrow
uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
Expand Down Expand Up @@ -310,7 +310,7 @@ jobs:
strategy:
fail-fast: false
matrix:
go: ['1.21', '1.22']
go: ['1.22', '1.23']
steps:
- name: Checkout Arrow
uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
Expand All @@ -323,7 +323,7 @@ jobs:
go-version: ${{ matrix.go }}
cache: true
cache-dependency-path: go/go.sum
- name: Install staticcheck
- name: Install staticcheck
run: |
. .env
go install honnef.co/go/tools/cmd/staticcheck@${STATICCHECK}
Expand Down Expand Up @@ -368,7 +368,7 @@ jobs:
strategy:
fail-fast: false
matrix:
go: ['1.21', '1.22']
go: ['1.22', '1.23']
env:
ARROW_GO_TESTCGO: "1"
steps:
Expand Down Expand Up @@ -439,7 +439,7 @@ jobs:
ci/scripts/msys2_setup.sh cgo
- name: Get required Go version
run: |
(. .env && echo "GO_VERSION=${GO}") >> $GITHUB_ENV
(. .env && echo "GO_VERSION=${GO}") >> $GITHUB_ENV
- name: Update CGO Env vars
shell: msys2 {0}
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ jobs:
ARROW_BUILD_TESTS: OFF
PYARROW_TEST_LARGE_MEMORY: ON
# Current oldest supported version according to https://endoflife.date/macos
MACOSX_DEPLOYMENT_TARGET: 10.15
MACOSX_DEPLOYMENT_TARGET: 12.0
steps:
- name: Checkout Arrow
uses: actions/checkout@v4
Expand Down
1 change: 1 addition & 0 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ only_commits:
- appveyor.yml
- ci/appveyor*
- ci/conda*
- ci/scripts/*.bat
- cpp/
- format/
- python/
Expand Down
2 changes: 2 additions & 0 deletions ci/appveyor-cpp-build.bat
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ set ARROW_CMAKE_ARGS=-DARROW_DEPENDENCY_SOURCE=CONDA -DARROW_WITH_BZ2=ON
set ARROW_CXXFLAGS=/WX /MP

@rem Install GCS testbench
set PIPX_BIN_DIR=C:\Windows\
call %CD%\ci\scripts\install_gcs_testbench.bat
storage-testbench -h || exit /B

@rem
@rem Build and test Arrow C++ libraries (including Parquet)
Expand Down
12 changes: 7 additions & 5 deletions ci/docker/conda-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,19 @@ RUN mamba install -q -y \
valgrind && \
mamba clean --all

# We want to install the GCS testbench using the Conda base environment's Python,
# because the test environment's Python may later change.
ENV PIPX_PYTHON=/opt/conda/bin/python3
COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
RUN /arrow/ci/scripts/install_gcs_testbench.sh default

# Ensure npm, node and azurite are on path. npm and node are required to install azurite, which will then need to
# be on the path for the tests to run.
# be on the path for the tests to run.
ENV PATH=/opt/conda/envs/arrow/bin:$PATH

COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_azurite.sh

# We want to install the GCS testbench using the same Python binary that the Conda code will use.
COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
RUN /arrow/ci/scripts/install_gcs_testbench.sh default

COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin

Expand Down
2 changes: 1 addition & 1 deletion ci/docker/conda-integration.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ ARG maven=3.8.7
ARG node=16
ARG yarn=1.22
ARG jdk=11
ARG go=1.21.8
ARG go=1.22.6

# Install Archery and integration dependencies
COPY ci/conda_env_archery.txt /arrow/ci/
Expand Down
5 changes: 0 additions & 5 deletions ci/docker/conda-python.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,6 @@ RUN mamba install -q -y \
nomkl && \
mamba clean --all

# XXX The GCS testbench was already installed in conda-cpp.dockerfile,
# but we changed the installed Python version above, so we need to reinstall it.
COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
RUN /arrow/ci/scripts/install_gcs_testbench.sh default

ENV ARROW_ACERO=ON \
ARROW_BUILD_STATIC=OFF \
ARROW_BUILD_TESTS=OFF \
Expand Down
4 changes: 2 additions & 2 deletions ci/docker/debian-12-go.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
# under the License.

ARG arch=amd64
ARG go=1.21
ARG staticcheck=v0.4.7
ARG go=1.22
ARG staticcheck=v0.5.1
FROM ${arch}/golang:${go}-bookworm

# FROM collects all the args, get back the staticcheck version arg
Expand Down
27 changes: 19 additions & 8 deletions ci/docker/python-wheel-windows-test-vs2019.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,27 @@ RUN setx path "%path%;C:\Program Files\Git\usr\bin"
RUN wmic product where "name like 'python%%'" call uninstall /nointeractive && \
rm -rf Python*

# Install the GCS testbench using a well-known Python version.
# NOTE: cannot use pipx's `--fetch-missing-python` because of
# https://github.com/pypa/pipx/issues/1521, therefore download Python ourselves.
RUN choco install -r -y --pre --no-progress python --version=3.11.9
ENV PIPX_BIN_DIR=C:\\Windows\\
ENV PIPX_PYTHON="C:\Python311\python.exe"
COPY ci/scripts/install_gcs_testbench.bat C:/arrow/ci/scripts/
RUN call "C:\arrow\ci\scripts\install_gcs_testbench.bat" && \
storage-testbench -h

# Define the full version number otherwise choco falls back to patch number 0 (3.8 => 3.8.0)
ARG python=3.8
RUN (if "%python%"=="3.8" setx PYTHON_VERSION "3.8.10" && setx PATH "%PATH%;C:\Python38;C:\Python38\Scripts") & \
(if "%python%"=="3.9" setx PYTHON_VERSION "3.9.13" && setx PATH "%PATH%;C:\Python39;C:\Python39\Scripts") & \
(if "%python%"=="3.10" setx PYTHON_VERSION "3.10.11" && setx PATH "%PATH%;C:\Python310;C:\Python310\Scripts") & \
(if "%python%"=="3.11" setx PYTHON_VERSION "3.11.9" && setx PATH "%PATH%;C:\Python311;C:\Python311\Scripts") & \
(if "%python%"=="3.12" setx PYTHON_VERSION "3.12.4" && setx PATH "%PATH%;C:\Python312;C:\Python312\Scripts") & \
(if "%python%"=="3.13" setx PYTHON_VERSION "3.13.0-rc1" && setx PATH "%PATH%;C:\Python313;C:\Python313\Scripts")
RUN (if "%python%"=="3.8" setx PYTHON_VERSION "3.8.10") & \
(if "%python%"=="3.9" setx PYTHON_VERSION "3.9.13") & \
(if "%python%"=="3.10" setx PYTHON_VERSION "3.10.11") & \
(if "%python%"=="3.11" setx PYTHON_VERSION "3.11.9") & \
(if "%python%"=="3.12" setx PYTHON_VERSION "3.12.4") & \
(if "%python%"=="3.13" setx PYTHON_VERSION "3.13.0-rc1")

# Install archiver to extract xz archives
RUN choco install -r -y --pre --no-progress python --version=%PYTHON_VERSION% & \
python -m pip install --no-cache-dir -U pip setuptools & \
RUN choco install -r -y --pre --no-progress --force python --version=%PYTHON_VERSION% && \
choco install --no-progress -r -y archiver

ENV PYTHON=$python
1 change: 1 addition & 0 deletions ci/docker/ubuntu-20.04-cpp-minimal.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ RUN apt-get update -y -q && \
libssl-dev \
libcurl4-openssl-dev \
python3-pip \
python3-venv \
tzdata \
wget && \
apt-get clean && \
Expand Down
1 change: 1 addition & 0 deletions ci/docker/ubuntu-22.04-cpp-minimal.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ RUN apt-get update -y -q && \
libssl-dev \
libcurl4-openssl-dev \
python3-pip \
python3-venv \
tzdata \
wget && \
apt-get clean && \
Expand Down
1 change: 1 addition & 0 deletions ci/docker/ubuntu-24.04-cpp-minimal.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ RUN apt-get update -y -q && \
libssl-dev \
libcurl4-openssl-dev \
python3-pip \
python3-venv \
tzdata \
tzdata-legacy \
wget && \
Expand Down
13 changes: 11 additions & 2 deletions ci/scripts/install_gcs_testbench.bat
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,18 @@

@echo on

set GCS_TESTBENCH_VERSION="v0.36.0"
set GCS_TESTBENCH_VERSION="v0.40.0"

set PIPX_FLAGS=--verbose
if NOT "%PIPX_PYTHON%"=="" (
set PIPX_FLAGS=--python %PIPX_PYTHON% %PIPX_FLAGS%
)

python -m pip install -U pipx || exit /B 1

@REM Install GCS testbench %GCS_TESTBENCH_VERSION%
python -m pip install ^
pipx install %PIPX_FLAGS% ^
"https://github.com/googleapis/storage-testbench/archive/%GCS_TESTBENCH_VERSION%.tar.gz" ^
|| exit /B 1

pipx list --verbose
20 changes: 12 additions & 8 deletions ci/scripts/install_gcs_testbench.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# specific language governing permissions and limitations
# under the License.

set -e
set -ex

if [ "$#" -ne 1 ]; then
echo "Usage: $0 <storage-testbench version>"
Expand All @@ -34,19 +34,23 @@ case "$(uname -m)" in
;;
esac

# On newer pythons install into the system will fail, so override that
export PIP_BREAK_SYSTEM_PACKAGES=1

version=$1
if [[ "${version}" -eq "default" ]]; then
version="v0.39.0"
# Latests versions of Testbench require newer setuptools
python3 -m pip install --upgrade setuptools
fi

: ${PIPX_PYTHON:=$(which python3)}

export PIP_BREAK_SYSTEM_PACKAGES=1
${PIPX_PYTHON} -m pip install -U pipx

# This script is run with PYTHON undefined in some places,
# but those only use older pythons.
if [[ -z "${PYTHON_VERSION}" ]] || [[ "${PYTHON_VERSION}" != "3.13" ]]; then
python3 -m pip install \
"https://github.com/googleapis/storage-testbench/archive/${version}.tar.gz"
pipx_flags=--verbose
if [[ $(id -un) == "root" ]]; then
# Install globally as /root/.local/bin is typically not in $PATH
pipx_flags="${pipx_flags} --global"
fi
${PIPX_PYTHON} -m pipx install ${pipx_flags} "https://github.com/googleapis/storage-testbench/archive/${version}.tar.gz"
fi
2 changes: 1 addition & 1 deletion ci/scripts/python_wheel_macos_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ rm -rf ${source_dir}/python/pyarrow/*.so.*

echo "=== (${PYTHON_VERSION}) Set SDK, C++ and Wheel flags ==="
export _PYTHON_HOST_PLATFORM="macosx-${MACOSX_DEPLOYMENT_TARGET}-${arch}"
export MACOSX_DEPLOYMENT_TARGET=${MACOSX_DEPLOYMENT_TARGET:-10.15}
export MACOSX_DEPLOYMENT_TARGET=${MACOSX_DEPLOYMENT_TARGET:-12.0}
export SDKROOT=${SDKROOT:-$(xcrun --sdk macosx --show-sdk-path)}

if [ $arch = "arm64" ]; then
Expand Down
40 changes: 22 additions & 18 deletions ci/scripts/python_wheel_windows_test.bat
Original file line number Diff line number Diff line change
Expand Up @@ -37,28 +37,32 @@ set PYARROW_TEST_TENSORFLOW=ON
set ARROW_TEST_DATA=C:\arrow\testing\data
set PARQUET_TEST_DATA=C:\arrow\cpp\submodules\parquet-testing\data

@REM Install testing dependencies
pip install -r C:\arrow\python\requirements-wheel-test.txt || exit /B 1
@REM List installed Pythons
py -0p

set PYTHON_CMD=py -%PYTHON%

@REM Install GCS testbench
call "C:\arrow\ci\scripts\install_gcs_testbench.bat"
%PYTHON_CMD% -m pip install -U pip setuptools || exit /B 1

@REM Install testing dependencies
%PYTHON_CMD% -m pip install -r C:\arrow\python\requirements-wheel-test.txt || exit /B 1

@REM Install the built wheels
python -m pip install --no-index --find-links=C:\arrow\python\dist\ pyarrow || exit /B 1
%PYTHON_CMD% -m pip install --no-index --find-links=C:\arrow\python\dist\ pyarrow || exit /B 1

@REM Test that the modules are importable
python -c "import pyarrow" || exit /B 1
python -c "import pyarrow._gcsfs" || exit /B 1
python -c "import pyarrow._hdfs" || exit /B 1
python -c "import pyarrow._s3fs" || exit /B 1
python -c "import pyarrow.csv" || exit /B 1
python -c "import pyarrow.dataset" || exit /B 1
python -c "import pyarrow.flight" || exit /B 1
python -c "import pyarrow.fs" || exit /B 1
python -c "import pyarrow.json" || exit /B 1
python -c "import pyarrow.orc" || exit /B 1
python -c "import pyarrow.parquet" || exit /B 1
python -c "import pyarrow.substrait" || exit /B 1
%PYTHON_CMD% -c "import pyarrow" || exit /B 1
%PYTHON_CMD% -c "import pyarrow._gcsfs" || exit /B 1
%PYTHON_CMD% -c "import pyarrow._hdfs" || exit /B 1
%PYTHON_CMD% -c "import pyarrow._s3fs" || exit /B 1
%PYTHON_CMD% -c "import pyarrow.csv" || exit /B 1
%PYTHON_CMD% -c "import pyarrow.dataset" || exit /B 1
%PYTHON_CMD% -c "import pyarrow.flight" || exit /B 1
%PYTHON_CMD% -c "import pyarrow.fs" || exit /B 1
%PYTHON_CMD% -c "import pyarrow.json" || exit /B 1
%PYTHON_CMD% -c "import pyarrow.orc" || exit /B 1
%PYTHON_CMD% -c "import pyarrow.parquet" || exit /B 1
%PYTHON_CMD% -c "import pyarrow.substrait" || exit /B 1

@rem Download IANA Timezone Database for ORC C++
curl https://cygwin.osuosl.org/noarch/release/tzdata/tzdata-2024a-1.tar.xz --output tzdata.tar.xz || exit /B
Expand All @@ -67,4 +71,4 @@ arc unarchive tzdata.tar.xz %USERPROFILE%\Downloads\test\tzdata
set TZDIR=%USERPROFILE%\Downloads\test\tzdata\usr\share\zoneinfo

@REM Execute unittest
pytest -r s --pyargs pyarrow || exit /B 1
%PYTHON_CMD% -m pytest -r s --pyargs pyarrow || exit /B 1
Loading

0 comments on commit 34ec45f

Please sign in to comment.