From 11f18a26416b7fd5690b7176612dcdb8045e0efb Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Wed, 10 Aug 2022 11:19:28 +0900 Subject: [PATCH 1/2] ARROW-17368: [C++] Add support for installing utilities --- cpp/src/arrow/ipc/CMakeLists.txt | 4 +++ .../apache-arrow/debian/control.in | 26 +++++++++++++++++ .../linux-packages/apache-arrow/debian/rules | 1 + .../apache-arrow/yum/arrow.spec.in | 29 +++++++++++++++++++ dev/tasks/tasks.yml | 12 ++++++++ 5 files changed, 72 insertions(+) diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt index 495018ec09651..4b62bdc3a77f2 100644 --- a/cpp/src/arrow/ipc/CMakeLists.txt +++ b/cpp/src/arrow/ipc/CMakeLists.txt @@ -63,6 +63,10 @@ if(ARROW_BUILD_UTILITIES OR ARROW_BUILD_INTEGRATION) target_link_libraries(arrow-file-to-stream ${ARROW_UTIL_LIB}) add_executable(arrow-stream-to-file stream_to_file.cc) target_link_libraries(arrow-stream-to-file ${ARROW_UTIL_LIB}) + if(ARROW_BUILD_UTILITIES) + install(TARGETS arrow-file-to-stream arrow-stream-to-file ${INSTALL_IS_OPTIONAL} + DESTINATION ${CMAKE_INSTALL_BINDIR}) + endif() if(ARROW_BUILD_INTEGRATION) add_dependencies(arrow-integration arrow-file-to-stream) diff --git a/dev/tasks/linux-packages/apache-arrow/debian/control.in b/dev/tasks/linux-packages/apache-arrow/debian/control.in index 12729cbf9f84e..1e479238a98c6 100644 --- a/dev/tasks/linux-packages/apache-arrow/debian/control.in +++ b/dev/tasks/linux-packages/apache-arrow/debian/control.in @@ -53,6 +53,19 @@ Description: Apache Arrow is a data processing library for analysis . This package provides C++ library files. +Package: arrow-tools +Section: utils +Architecture: any +Multi-Arch: same +Pre-Depends: ${misc:Pre-Depends} +Depends: + ${misc:Depends}, + ${shlibs:Depends}, + libarrow1000 (= ${binary:Version}) +Description: Apache Arrow is a data processing library for analysis + . + This package provides tools. + Package: libarrow-cuda1000 Section: libs Architecture: @CUDA_ARCHITECTURE@ @@ -310,6 +323,19 @@ Description: Apache Parquet is a columnar storage format . This package provides C++ library files to process Apache Parquet format. +Package: parquet-tools +Section: utils +Architecture: any +Multi-Arch: same +Pre-Depends: ${misc:Pre-Depends} +Depends: + ${misc:Depends}, + ${shlibs:Depends}, + libparquet1000 (= ${binary:Version}) +Description: Apache Parquet is a columnar storage format + . + This package provides tools. + Package: libparquet-dev Section: libdevel Architecture: any diff --git a/dev/tasks/linux-packages/apache-arrow/debian/rules b/dev/tasks/linux-packages/apache-arrow/debian/rules index 2f3b694d6806c..fdea8877ad25e 100755 --- a/dev/tasks/linux-packages/apache-arrow/debian/rules +++ b/dev/tasks/linux-packages/apache-arrow/debian/rules @@ -31,6 +31,7 @@ override_dh_auto_configure: --builddirectory=cpp_build \ --buildsystem=cmake+ninja \ -- \ + -DARROW_BUILD_UTILITIES=ON \ -DARROW_COMPUTE=ON \ -DARROW_CSV=ON \ -DARROW_CUDA=$${ARROW_CUDA} \ diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in index 5fe63284d0bc6..c2d05ef91a880 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in +++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in @@ -153,6 +153,7 @@ Apache Arrow is a data processing library for analysis. cpp_build_type=release cd cpp %arrow_cmake \ + -DARROW_BUILD_UTILITIES=ON \ -DARROW_CSV=ON \ -DARROW_DATASET=ON \ %if %{use_flight} @@ -263,6 +264,20 @@ This package contains the libraries for Apache Arrow C++. %license LICENSE.txt NOTICE.txt %{_libdir}/libarrow.so.* +%package tools +Summary: Tools for Apache Arrow C++ +License: Apache-2.0 +Requires: %{name}%{major_version}-libs = %{version}-%{release} + +%description tools +Tools for Apache Arrow C++. + +%files tools +%defattr(-,root,root,-) +%doc README.md +%license LICENSE.txt NOTICE.txt +%{_bindir}/arrow-* + %package devel Summary: Libraries and header files for Apache Arrow C++ License: Apache-2.0 @@ -633,6 +648,20 @@ This package contains the libraries for Apache Parquet C++. %license LICENSE.txt NOTICE.txt %{_libdir}/libparquet.so.* +%package -n parquet-tools +Summary: Tools for Apache Parquet C++ +License: Apache-2.0 +Requires: parquet%{major_version}-libs = %{version}-%{release} + +%description -n parquet-tools +Tools for Apache Parquet C++. + +%files -n parquet-tools +%defattr(-,root,root,-) +%doc README.md +%license LICENSE.txt NOTICE.txt +%{_bindir}/parquet-* + %package -n parquet-devel Summary: Libraries and header files for Apache Parquet C++ License: Apache-2.0 diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 00d41ec008e71..61f65910e7814 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -584,6 +584,7 @@ tasks: - .dsc - .orig.tar.gz artifacts: + - arrow-tools_{no_rc_version}-1_[a-z0-9]+.deb {% if architecture == "amd64" %} - apache-arrow-apt-source_{no_rc_version}-1.debian.tar.xz - apache-arrow-apt-source_{no_rc_version}-1.dsc @@ -647,6 +648,7 @@ tasks: - libparquet-glib1000_{no_rc_version}-1_[a-z0-9]+.deb - libparquet1000-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - libparquet1000_{no_rc_version}-1_[a-z0-9]+.deb + - parquet-tools_{no_rc_version}-1_[a-z0-9]+.deb {% if architecture == "amd64" %} - gir1.2-arrow-cuda-1.0_{no_rc_version}-1_[a-z0-9]+.deb - gir1.2-plasma-1.0_{no_rc_version}-1_[a-z0-9]+.deb @@ -755,6 +757,11 @@ tasks: {% endif %} {% if architecture == "amd64" %} - arrow-{no_rc_version}-1.[a-z0-9]+.src.rpm + {% endif %} + - arrow-tools-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - arrow-tools-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% if not is_rhel7_based %} + - arrow-tools-debugsource-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm {% endif %} {% if not is_rhel7_based and architecture == "amd64" %} - gandiva-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm @@ -776,6 +783,11 @@ tasks: - parquet[0-9]+-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm {% endif %} - parquet[0-9]+-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - parquet-tools-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - parquet-tools-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% if not is_rhel7_based %} + - parquet-tools-debugsource-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% endif %} - plasma-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - plasma-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - plasma-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm From 1b6deb6d6a67c3f84917f50ac6d45ad2ac81c3e6 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 12 Aug 2022 14:03:27 +0900 Subject: [PATCH 2/2] Enable PARQUET_BUILD_EXECUTABLES --- dev/tasks/linux-packages/apache-arrow/debian/rules | 1 + dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in | 1 + 2 files changed, 2 insertions(+) diff --git a/dev/tasks/linux-packages/apache-arrow/debian/rules b/dev/tasks/linux-packages/apache-arrow/debian/rules index fdea8877ad25e..3889ddc42ed33 100755 --- a/dev/tasks/linux-packages/apache-arrow/debian/rules +++ b/dev/tasks/linux-packages/apache-arrow/debian/rules @@ -61,6 +61,7 @@ override_dh_auto_configure: -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \ -DCMAKE_UNITY_BUILD=ON \ -DCUDAToolkit_ROOT=/usr \ + -DPARQUET_BUILD_EXECUTABLES=ON \ -DPARQUET_REQUIRE_ENCRYPTION=ON \ -DPythonInterp_FIND_VERSION=ON \ -DPythonInterp_FIND_VERSION_MAJOR=3 diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in index c2d05ef91a880..96c4fcf4f4876 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in +++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in @@ -189,6 +189,7 @@ cd cpp -DARROW_WITH_ZLIB=ON \ -DARROW_WITH_ZSTD=ON \ -DCMAKE_BUILD_TYPE=$cpp_build_type \ + -DPARQUET_BUILD_EXECUTABLES=ON \ -DPARQUET_REQUIRE_ENCRYPTION=ON \ %if %{use_python} -DPythonInterp_FIND_VERSION=ON \