From 2308e40e6f8050ead064b0d672e94e0662543dc0 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Thu, 29 Feb 2024 20:20:36 -0600 Subject: [PATCH] GH-40212: [R][CI] Add a C++ with gcc 14 build (#40244) This is an attempt to make sure we catching things like the issue we ran into in https://github.com/apache/arrow/issues/40009 in CI so that we could confirm that we don't run into this in the future. CRAN does runs using pre-release compilers, and we've hit this a time or two. We can wait for them to come and tell us we need to move in order to stay up, but it would be nice if we could detect this ourselves. And more importantly: it gives us a hopefully easier we to replicate the error and confirm we've fixed it so that we can have confidence when we submit. ``` [1m/tmp/RtmpLtR2pg/R.INSTALL1d415a4f31ad3b/arrow/tools/cpp/src/arrow/filesystem/util_internal.cc:143:7: [0m [0;1;31merror: [0m [1mno matching function for call to 'find' [0m 143 | if (std::find(supported_schemes.begin(), supported_schemes.end(), scheme) == [0m | [0;1;32m ^~~~~~~~~ [0m [1m/usr/bin/../lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14/bits/streambuf_iterator.h:435:5: [0m [0;1;30mnote: [0mcandidate template ignored: could not match 'istreambuf_iterator' against '__normal_iterator' [0m 435 | find(istreambuf_iterator<_CharT> __first, [0m | [0;1;32m ^ [0m1 error generated. ``` https://github.com/apache/arrow/pull/40244#issuecomment-1968156808 is a run before our fix showing the same failure. I've also downloaded + saved the log from CRAN since it will be overwritten soon now that we have a new release up: [Install log for 'arrow' with clang dev.txt](https://github.com/apache/arrow/files/14407630/Install.log.for.arrow.with.clang.dev.txt) * GitHub Issue: #40212 Authored-by: Jonathan Keane Signed-off-by: Sutou Kouhei --- ci/docker/ubuntu-24.04-cpp.dockerfile | 204 ++++++++++++++++++++++++++ ci/scripts/install_gcs_testbench.sh | 3 + dev/tasks/tasks.yml | 16 +- docker-compose.yml | 2 +- 4 files changed, 222 insertions(+), 3 deletions(-) create mode 100644 ci/docker/ubuntu-24.04-cpp.dockerfile diff --git a/ci/docker/ubuntu-24.04-cpp.dockerfile b/ci/docker/ubuntu-24.04-cpp.dockerfile new file mode 100644 index 0000000000000..d56895a792f7c --- /dev/null +++ b/ci/docker/ubuntu-24.04-cpp.dockerfile @@ -0,0 +1,204 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG base=amd64/ubuntu:24.04 +FROM ${base} + +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +RUN echo "debconf debconf/frontend select Noninteractive" | \ + debconf-set-selections + +# Installs LLVM toolchain, for Gandiva and testing other compilers +# +# Note that this is installed before the base packages to improve iteration +# while debugging package list with docker build. +ARG clang_tools +ARG llvm +RUN latest_system_llvm=14 && \ + if [ ${llvm} -gt ${latest_system_llvm} -o \ + ${clang_tools} -gt ${latest_system_llvm} ]; then \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + apt-transport-https \ + ca-certificates \ + gnupg \ + lsb-release \ + wget && \ + wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \ + code_name=$(lsb_release --codename --short) && \ + if [ ${llvm} -gt 10 ]; then \ + echo "deb https://apt.llvm.org/${code_name}/ llvm-toolchain-${code_name}-${llvm} main" > \ + /etc/apt/sources.list.d/llvm.list; \ + fi && \ + if [ ${clang_tools} -ne ${llvm} -a \ + ${clang_tools} -gt ${latest_system_llvm} ]; then \ + echo "deb https://apt.llvm.org/${code_name}/ llvm-toolchain-${code_name}-${clang_tools} main" > \ + /etc/apt/sources.list.d/clang-tools.list; \ + fi; \ + fi && \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + clang-${clang_tools} \ + clang-${llvm} \ + clang-format-${clang_tools} \ + clang-tidy-${clang_tools} \ + llvm-${llvm}-dev && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists* + +# Installs C++ toolchain and dependencies +RUN apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + autoconf \ + ca-certificates \ + ccache \ + cmake \ + curl \ + gdb \ + git \ + libbenchmark-dev \ + libboost-filesystem-dev \ + libboost-system-dev \ + libbrotli-dev \ + libbz2-dev \ + libc-ares-dev \ + libcurl4-openssl-dev \ + libgflags-dev \ + libgmock-dev \ + libgoogle-glog-dev \ + libgrpc++-dev \ + libidn2-dev \ + libkrb5-dev \ + libldap-dev \ + liblz4-dev \ + libnghttp2-dev \ + libprotobuf-dev \ + libprotoc-dev \ + libpsl-dev \ + libre2-dev \ + librtmp-dev \ + libsnappy-dev \ + libsqlite3-dev \ + libssh-dev \ + libssh2-1-dev \ + libssl-dev \ + libthrift-dev \ + libutf8proc-dev \ + libxml2-dev \ + libzstd-dev \ + make \ + ninja-build \ + nlohmann-json3-dev \ + npm \ + pkg-config \ + protobuf-compiler \ + protobuf-compiler-grpc \ + python3-dev \ + python3-pip \ + python3-venv \ + rapidjson-dev \ + rsync \ + tzdata \ + tzdata-legacy \ + wget && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists* + +ARG gcc_version="" +RUN if [ "${gcc_version}" = "" ]; then \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + g++ \ + gcc; \ + else \ + if [ "${gcc_version}" -gt "12" ]; then \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends software-properties-common && \ + add-apt-repository ppa:ubuntu-toolchain-r/volatile; \ + fi; \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + g++-${gcc_version} \ + gcc-${gcc_version} && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-${gcc_version} 100 && \ + update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-${gcc_version} 100 && \ + update-alternatives --install \ + /usr/bin/$(uname --machine)-linux-gnu-gcc \ + $(uname --machine)-linux-gnu-gcc \ + /usr/bin/$(uname --machine)-linux-gnu-gcc-${gcc_version} 100 && \ + update-alternatives --install \ + /usr/bin/$(uname --machine)-linux-gnu-g++ \ + $(uname --machine)-linux-gnu-g++ \ + /usr/bin/$(uname --machine)-linux-gnu-g++-${gcc_version} 100 && \ + update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 100 && \ + update-alternatives --set cc /usr/bin/gcc && \ + update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 100 && \ + update-alternatives --set c++ /usr/bin/g++; \ + fi + +COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_minio.sh latest /usr/local + +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_gcs_testbench.sh default + +COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_azurite.sh + +COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin + +# Prioritize system packages and local installation +ENV ARROW_ACERO=ON \ + ARROW_AZURE=ON \ + ARROW_BUILD_STATIC=ON \ + ARROW_BUILD_TESTS=ON \ + ARROW_DEPENDENCY_SOURCE=SYSTEM \ + ARROW_DATASET=ON \ + ARROW_FLIGHT=ON \ + ARROW_FLIGHT_SQL=ON \ + ARROW_GANDIVA=ON \ + ARROW_GCS=ON \ + ARROW_HDFS=ON \ + ARROW_HOME=/usr/local \ + ARROW_INSTALL_NAME_RPATH=OFF \ + ARROW_NO_DEPRECATED_API=ON \ + ARROW_ORC=ON \ + ARROW_PARQUET=ON \ + ARROW_S3=ON \ + ARROW_SUBSTRAIT=ON \ + ARROW_USE_ASAN=OFF \ + ARROW_USE_CCACHE=ON \ + ARROW_USE_UBSAN=OFF \ + ARROW_WITH_BROTLI=ON \ + ARROW_WITH_BZ2=ON \ + ARROW_WITH_LZ4=ON \ + ARROW_WITH_OPENTELEMETRY=ON \ + ARROW_WITH_SNAPPY=ON \ + ARROW_WITH_ZLIB=ON \ + ARROW_WITH_ZSTD=ON \ + ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-${llvm}/bin/llvm-symbolizer \ + AWSSDK_SOURCE=BUNDLED \ + Azure_SOURCE=BUNDLED \ + google_cloud_cpp_storage_SOURCE=BUNDLED \ + ORC_SOURCE=BUNDLED \ + PARQUET_BUILD_EXAMPLES=ON \ + PARQUET_BUILD_EXECUTABLES=ON \ + PATH=/usr/lib/ccache/:$PATH \ + PYTHON=python3 \ + xsimd_SOURCE=BUNDLED diff --git a/ci/scripts/install_gcs_testbench.sh b/ci/scripts/install_gcs_testbench.sh index 0aa6d20975b49..2090290c99322 100755 --- a/ci/scripts/install_gcs_testbench.sh +++ b/ci/scripts/install_gcs_testbench.sh @@ -34,6 +34,9 @@ case "$(uname -m)" in ;; esac +# On newer pythons install into the system will fail, so override that +export PIP_BREAK_SYSTEM_PACKAGES=1 + version=$1 if [[ "${version}" -eq "default" ]]; then version="v0.39.0" diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index cfc333c6b22f5..56c93c095c870 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -231,7 +231,7 @@ tasks: # # * On conda-forge the `pyarrow` and `arrow-cpp` packages are built in # the same feedstock as the dependency matrix is the same for them as - # Python and the OS are the main dimension. + # Python and the OS are the main dimension. # * The files in `dev/tasks/conda-recipes/.ci_support/` are automatically # generated and to be synced regularly from the feedstock. We have no way # yet to generate them inside the arrow repository automatically. @@ -1055,7 +1055,7 @@ tasks: params: image: conda-cpp-valgrind -{% for ubuntu_version in ["20.04", "22.04"] %} +{% for ubuntu_version in ["20.04", "22.04", "24.04"] %} test-ubuntu-{{ ubuntu_version }}-cpp: ci: github template: docker-tests/github.linux.yml @@ -1073,6 +1073,18 @@ tasks: UBUNTU: 20.04 image: ubuntu-cpp-bundled + test-ubuntu-24.04-cpp-gcc-14: + ci: github + template: docker-tests/github.linux.yml + params: + env: + UBUNTU: "24.04" + GCC_VERSION: 14 + # rapidjson 1.1.0 has an error caught by gcc 14. + # https://github.com/Tencent/rapidjson/issues/718 + flags: -e CC=gcc-14 -e CXX=g++-14 -e RapidJSON_SOURCE=BUNDLED + image: ubuntu-cpp + test-skyhook-integration: ci: github template: docker-tests/github.linux.yml diff --git a/docker-compose.yml b/docker-compose.yml index aec685775aab1..26a42fa13947b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -373,7 +373,7 @@ services: # docker-compose run --rm ubuntu-cpp # Parameters: # ARCH: amd64, arm64v8, s390x, ... - # UBUNTU: 20.04, 22.04 + # UBUNTU: 20.04, 22.04, 24.04 image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp build: context: .