From 10ef8ef537c7859e50316a24a740331fe6721a34 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Fri, 23 Feb 2024 11:36:43 -0600 Subject: [PATCH 1/3] remove r_org variable --- ci/docker/ubuntu-24.04-cpp.dockerfile | 208 ++++++++++++++++++++++++++ ci/scripts/install_gcs_testbench.sh | 3 + dev/tasks/tasks.yml | 16 +- docker-compose.yml | 2 +- 4 files changed, 226 insertions(+), 3 deletions(-) create mode 100644 ci/docker/ubuntu-24.04-cpp.dockerfile diff --git a/ci/docker/ubuntu-24.04-cpp.dockerfile b/ci/docker/ubuntu-24.04-cpp.dockerfile new file mode 100644 index 0000000000000..a68c5ea8b2cb4 --- /dev/null +++ b/ci/docker/ubuntu-24.04-cpp.dockerfile @@ -0,0 +1,208 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG base=amd64/ubuntu:24.04 +FROM ${base} + +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +RUN echo "debconf debconf/frontend select Noninteractive" | \ + debconf-set-selections + +# Installs LLVM toolchain, for Gandiva and testing other compilers +# +# Note that this is installed before the base packages to improve iteration +# while debugging package list with docker build. +ARG clang_tools +ARG llvm +RUN latest_system_llvm=14 && \ + if [ ${llvm} -gt ${latest_system_llvm} -o \ + ${clang_tools} -gt ${latest_system_llvm} ]; then \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + apt-transport-https \ + ca-certificates \ + gnupg \ + lsb-release \ + wget && \ + wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \ + code_name=$(lsb_release --codename --short) && \ + if [ ${llvm} -gt 10 ]; then \ + echo "deb https://apt.llvm.org/${code_name}/ llvm-toolchain-${code_name}-${llvm} main" > \ + /etc/apt/sources.list.d/llvm.list; \ + fi && \ + if [ ${clang_tools} -ne ${llvm} -a \ + ${clang_tools} -gt ${latest_system_llvm} ]; then \ + echo "deb https://apt.llvm.org/${code_name}/ llvm-toolchain-${code_name}-${clang_tools} main" > \ + /etc/apt/sources.list.d/clang-tools.list; \ + fi; \ + fi && \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + clang-${clang_tools} \ + clang-${llvm} \ + clang-format-${clang_tools} \ + clang-tidy-${clang_tools} \ + llvm-${llvm}-dev && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists* + +# Installs C++ toolchain and dependencies +RUN apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + autoconf \ + ca-certificates \ + ccache \ + cmake \ + curl \ + gdb \ + git \ + libbenchmark-dev \ + libboost-filesystem-dev \ + libboost-system-dev \ + libbrotli-dev \ + libbz2-dev \ + libc-ares-dev \ + libcurl4-openssl-dev \ + libgflags-dev \ + libgmock-dev \ + libgoogle-glog-dev \ + libgrpc++-dev \ + libidn2-dev \ + libkrb5-dev \ + libldap-dev \ + liblz4-dev \ + libnghttp2-dev \ + libprotobuf-dev \ + libprotoc-dev \ + libpsl-dev \ + libre2-dev \ + librtmp-dev \ + libsnappy-dev \ + libsqlite3-dev \ + libssh-dev \ + libssh2-1-dev \ + libssl-dev \ + libthrift-dev \ + libutf8proc-dev \ + libxml2-dev \ + libzstd-dev \ + make \ + ninja-build \ + nlohmann-json3-dev \ + npm \ + pkg-config \ + protobuf-compiler \ + protobuf-compiler-grpc \ + python3-dev \ + python3-pip \ + python3-venv \ + rapidjson-dev \ + rsync \ + tzdata \ + wget && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists* + +ARG gcc_version="" +RUN if [ "${gcc_version}" = "" ]; then \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + g++ \ + gcc; \ + else \ + if [ "${gcc_version}" -gt "12" ]; then \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends software-properties-common && \ + add-apt-repository ppa:ubuntu-toolchain-r/volatile; \ + fi; \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + g++-${gcc_version} \ + gcc-${gcc_version} && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-${gcc_version} 100 && \ + update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-${gcc_version} 100 && \ + update-alternatives --install \ + /usr/bin/$(uname --machine)-linux-gnu-gcc \ + $(uname --machine)-linux-gnu-gcc \ + /usr/bin/$(uname --machine)-linux-gnu-gcc-${gcc_version} 100 && \ + update-alternatives --install \ + /usr/bin/$(uname --machine)-linux-gnu-g++ \ + $(uname --machine)-linux-gnu-g++ \ + /usr/bin/$(uname --machine)-linux-gnu-g++-${gcc_version} 100 && \ + update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 100 && \ + update-alternatives --set cc /usr/bin/gcc && \ + update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 100 && \ + update-alternatives --set c++ /usr/bin/g++; \ + fi + +COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_minio.sh latest /usr/local + +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_gcs_testbench.sh default + +COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_azurite.sh + +COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin + +# Prioritize system packages and local installation +# The following dependencies will be downloaded due to missing/invalid packages +# provided by the distribution: +# - Abseil is old +# - libc-ares-dev does not install CMake config files +ENV absl_SOURCE=BUNDLED \ + ARROW_ACERO=ON \ + ARROW_AZURE=ON \ + ARROW_BUILD_STATIC=ON \ + ARROW_BUILD_TESTS=ON \ + ARROW_DEPENDENCY_SOURCE=SYSTEM \ + ARROW_DATASET=ON \ + ARROW_FLIGHT=ON \ + ARROW_FLIGHT_SQL=ON \ + ARROW_GANDIVA=ON \ + ARROW_GCS=ON \ + ARROW_HDFS=ON \ + ARROW_HOME=/usr/local \ + ARROW_INSTALL_NAME_RPATH=OFF \ + ARROW_NO_DEPRECATED_API=ON \ + ARROW_ORC=ON \ + ARROW_PARQUET=ON \ + ARROW_S3=ON \ + ARROW_SUBSTRAIT=ON \ + ARROW_USE_ASAN=OFF \ + ARROW_USE_CCACHE=ON \ + ARROW_USE_UBSAN=OFF \ + ARROW_WITH_BROTLI=ON \ + ARROW_WITH_BZ2=ON \ + ARROW_WITH_LZ4=ON \ + ARROW_WITH_OPENTELEMETRY=ON \ + ARROW_WITH_SNAPPY=ON \ + ARROW_WITH_ZLIB=ON \ + ARROW_WITH_ZSTD=ON \ + ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-${llvm}/bin/llvm-symbolizer \ + AWSSDK_SOURCE=BUNDLED \ + Azure_SOURCE=BUNDLED \ + google_cloud_cpp_storage_SOURCE=BUNDLED \ + ORC_SOURCE=BUNDLED \ + PARQUET_BUILD_EXAMPLES=ON \ + PARQUET_BUILD_EXECUTABLES=ON \ + PATH=/usr/lib/ccache/:$PATH \ + PYTHON=python3 \ + xsimd_SOURCE=BUNDLED diff --git a/ci/scripts/install_gcs_testbench.sh b/ci/scripts/install_gcs_testbench.sh index 0aa6d20975b49..2090290c99322 100755 --- a/ci/scripts/install_gcs_testbench.sh +++ b/ci/scripts/install_gcs_testbench.sh @@ -34,6 +34,9 @@ case "$(uname -m)" in ;; esac +# On newer pythons install into the system will fail, so override that +export PIP_BREAK_SYSTEM_PACKAGES=1 + version=$1 if [[ "${version}" -eq "default" ]]; then version="v0.39.0" diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index cfc333c6b22f5..56c93c095c870 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -231,7 +231,7 @@ tasks: # # * On conda-forge the `pyarrow` and `arrow-cpp` packages are built in # the same feedstock as the dependency matrix is the same for them as - # Python and the OS are the main dimension. + # Python and the OS are the main dimension. # * The files in `dev/tasks/conda-recipes/.ci_support/` are automatically # generated and to be synced regularly from the feedstock. We have no way # yet to generate them inside the arrow repository automatically. @@ -1055,7 +1055,7 @@ tasks: params: image: conda-cpp-valgrind -{% for ubuntu_version in ["20.04", "22.04"] %} +{% for ubuntu_version in ["20.04", "22.04", "24.04"] %} test-ubuntu-{{ ubuntu_version }}-cpp: ci: github template: docker-tests/github.linux.yml @@ -1073,6 +1073,18 @@ tasks: UBUNTU: 20.04 image: ubuntu-cpp-bundled + test-ubuntu-24.04-cpp-gcc-14: + ci: github + template: docker-tests/github.linux.yml + params: + env: + UBUNTU: "24.04" + GCC_VERSION: 14 + # rapidjson 1.1.0 has an error caught by gcc 14. + # https://github.com/Tencent/rapidjson/issues/718 + flags: -e CC=gcc-14 -e CXX=g++-14 -e RapidJSON_SOURCE=BUNDLED + image: ubuntu-cpp + test-skyhook-integration: ci: github template: docker-tests/github.linux.yml diff --git a/docker-compose.yml b/docker-compose.yml index aec685775aab1..26a42fa13947b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -373,7 +373,7 @@ services: # docker-compose run --rm ubuntu-cpp # Parameters: # ARCH: amd64, arm64v8, s390x, ... - # UBUNTU: 20.04, 22.04 + # UBUNTU: 20.04, 22.04, 24.04 image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp build: context: . From 4efaa60ebe3377e7c0060f59b639568899f49f59 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 28 Feb 2024 22:40:10 -0600 Subject: [PATCH 2/3] tzdata-legacy --- ci/docker/ubuntu-24.04-cpp.dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/docker/ubuntu-24.04-cpp.dockerfile b/ci/docker/ubuntu-24.04-cpp.dockerfile index a68c5ea8b2cb4..38864b8186042 100644 --- a/ci/docker/ubuntu-24.04-cpp.dockerfile +++ b/ci/docker/ubuntu-24.04-cpp.dockerfile @@ -114,6 +114,7 @@ RUN apt-get update -y -q && \ rapidjson-dev \ rsync \ tzdata \ + tzdata-legacy \ wget && \ apt-get clean && \ rm -rf /var/lib/apt/lists* From 7659c2c3e43bfd92666dfaf89920bdc6f4ba3284 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Thu, 29 Feb 2024 18:00:11 -0600 Subject: [PATCH 3/3] Remove old abseil / libc-ares-dev --- ci/docker/ubuntu-24.04-cpp.dockerfile | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/ci/docker/ubuntu-24.04-cpp.dockerfile b/ci/docker/ubuntu-24.04-cpp.dockerfile index 38864b8186042..d56895a792f7c 100644 --- a/ci/docker/ubuntu-24.04-cpp.dockerfile +++ b/ci/docker/ubuntu-24.04-cpp.dockerfile @@ -164,12 +164,7 @@ COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin # Prioritize system packages and local installation -# The following dependencies will be downloaded due to missing/invalid packages -# provided by the distribution: -# - Abseil is old -# - libc-ares-dev does not install CMake config files -ENV absl_SOURCE=BUNDLED \ - ARROW_ACERO=ON \ +ENV ARROW_ACERO=ON \ ARROW_AZURE=ON \ ARROW_BUILD_STATIC=ON \ ARROW_BUILD_TESTS=ON \