diff --git a/.github/workflows/ci-nightly.yml b/.github/workflows/ci-nightly.yml deleted file mode 100644 index 9f5eed6a2..000000000 --- a/.github/workflows/ci-nightly.yml +++ /dev/null @@ -1,67 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: GraphAr C++ CI Nightly - -on: - schedule: - # The notifications for scheduled workflows are sent to the user who - # last modified the cron syntax in the workflow file. - # Trigger the workflow at 03:00(CST) every day. - - cron: '00 19 * * *' -jobs: - GraphAr-ubuntu-arrow-from-source: - if: ${{ github.ref == 'refs/heads/main' && github.repository == 'apache/incubator-graphar' }} - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - with: - submodules: true - - - name: Cache for ccache - uses: actions/cache@v3 - with: - path: ~/.ccache - key: ${{ matrix.os }}-build-ccache-${{ hashFiles('**/git-modules.txt') }} - restore-keys: | - ${{ matrix.os }}-build-ccache- - - - name: Install dependencies - run: | - - sudo apt-get update -y - sudo apt-get install -y libboost-graph-dev ccache libcurl4-openssl-dev - - - name: CMake - run: | - mkdir build - pushd build - cmake ../cpp -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTS=ON -DBUILD_EXAMPLES=ON - popd - - - name: Build GraphAr - run: | - pushd build - make -j$(nproc) - make graphar-ccache-stats - popd - - - name: Test - run: | - cd build - export GAR_TEST_DATA=$PWD/../testing/ - make test diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3018b8f53..e81b49e4b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -38,7 +38,8 @@ concurrency: cancel-in-progress: true jobs: - GraphAr-ubuntu-arrow-installed: + ubuntu: + name: Ubuntu 22.04 C++ runs-on: ubuntu-latest env: GAR_TEST_DATA: ${{ github.workspace }}/testing/ @@ -143,7 +144,8 @@ jobs: ./graph_info_benchmark ./arrow_chunk_reader_benchmark - GraphAr-centos-arrow-installed: + centoo: + name: CentOS 7 C++ runs-on: ubuntu-latest container: image: centos:7 @@ -184,3 +186,38 @@ jobs: cmake .. make -j$(nproc) popd + + macos: + name: ${{ matrix.architecture }} macOS ${{ matrix.macos-version }} C++ + runs-on: macos-${{ matrix.macos-version }} + strategy: + fail-fast: false + matrix: + include: + - architecture: AMD64 + macos-version: "12" + - architecture: ARM64 + macos-version: "14" + steps: + - uses: actions/checkout@v3 + with: + submodules: true + + - name: Install dependencies + run: | + brew bundle --file=cpp/Brewfile + + - name: Build GraphAr + working-directory: "cpp" + run: | + mkdir build + pushd build + cmake .. -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTS=ON -DBUILD_EXAMPLES=ON -DBUILD_BENCHMARKS=ON + make -j$(nproc) + popd + + - name: Running Test + working-directory: "cpp/build" + run: | + export ASAN_OPTIONS=detect_leaks=0 + ctest --output-on-failure diff --git a/cpp/Brewfile b/cpp/Brewfile new file mode 100644 index 000000000..229975a77 --- /dev/null +++ b/cpp/Brewfile @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +brew "cmake" +brew "google-benchmark" +brew "apache-arrow" +brew "boost" +brew "doxygen" +brew "git" +brew "ccache" diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 34fd51eae..6b8431161 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -182,34 +182,26 @@ endmacro() # ------------------------------------------------------------------------------ # building or find third party library # ------------------------------------------------------------------------------ -find_package(Threads REQUIRED) -find_package(OpenSSL QUIET) -find_package(CURL REQUIRED) -if(OPENSSL_FOUND) - if(OPENSSL_VERSION LESS "1.1.0") - message(ERROR "The OpenSSL must be greater than or equal to 1.1.0, current version is ${OPENSSL_VERSION}") - endif() -endif() - # check if arrow is installed find_package(Arrow QUIET) +if (NOT ${Arrow_FOUND}) + message(FATAL_ERROR "apache-arrow is required, please install it and retry") +endif() find_package(ArrowDataset QUIET) -find_package(ArrowAcero QUIET) -find_package(Parquet QUIET) - -if (NOT Arrow_FOUND OR NOT ArrowDataset_FOUND OR NOT ArrowAcero_FOUND OR NOT Parquet_FOUND) - message(STATUS "Arrow is not installed, will build from source.") - set(BUILD_ARROW_FROM_SOURCE ON) -else() - message(STATUS "-- Found Arrow: ${Arrow_VERSION}") - set(BUILD_ARROW_FROM_SOURCE OFF) +if (NOT ${ArrowDataset_FOUND}) + message(FATAL_ERROR "apache-arrow-dataset is required, please install it and retry") endif() - -if(BUILD_ARROW_FROM_SOURCE) - include(apache-arrow) - build_arrow() +if (${Arrow_VERSION} VERSION_GREATER_EQUAL "12.0.0") + # ArrowAcero is available in Arrow 12.0.0 and later + find_package(ArrowAcero QUIET) + if (NOT ${ArrowAcero_FOUND}) + message(FATAL_ERROR "apache-arrow-acero is required, please install it and retry") + endif() +endif() +find_package(Parquet QUIET) +if (NOT ${Parquet_FOUND}) + message(FATAL_ERROR "parquet is required, please install it and retry") endif() - macro(get_target_location var target) if(TARGET ${target}) @@ -234,46 +226,18 @@ macro(build_graphar) $ ) target_include_directories(graphar PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty) - if(BUILD_ARROW_FROM_SOURCE) - target_include_directories(graphar SYSTEM BEFORE PRIVATE ${GAR_ARROW_INCLUDE_DIR}) - endif() target_link_libraries(graphar PRIVATE Threads::Threads ${CMAKE_DL_LIBS}) if(APPLE) - if(BUILD_ARROW_FROM_SOURCE) - target_link_libraries(graphar PRIVATE -Wl,-force_load gar_arrow_static - "${GAR_PARQUET_STATIC_LIB}" - "${GAR_DATASET_STATIC_LIB}" - "${GAR_ACERO_STATIC_LIB}" - "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}") - else() - target_link_libraries(graphar PRIVATE -Wl,-force_load Arrow::arrow_shared - Parquet::parquet_shared - ArrowDataset::arrow_dataset_shared - ArrowAcero::arrow_acero_shared) - endif() + target_link_libraries(graphar PRIVATE -Wl,-force_load Arrow::arrow_shared + Parquet::parquet_shared + ArrowDataset::arrow_dataset_shared + ArrowAcero::arrow_acero_shared) else() - if(BUILD_ARROW_FROM_SOURCE) - target_link_libraries(graphar PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive gar_arrow_static - "${GAR_PARQUET_STATIC_LIB}" - "${GAR_DATASET_STATIC_LIB}" - "${GAR_ARROW_ACERO_STATIC_LIB}" - "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}" -Wl,--no-whole-archive) - else() - target_link_libraries(graphar PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive Arrow::arrow_shared - Parquet::parquet_shared - ArrowDataset::arrow_dataset_shared - ArrowAcero::arrow_acero_shared -Wl,--no-whole-archive) - endif() - endif() - - # if OpenSSL library exists, link the OpenSSL library. - # OpenSSL has to be linked after GAR_ARROW_BUNDLED_DEPS_STATIC_LIB - if(OPENSSL_FOUND) - target_link_libraries(graphar PRIVATE OpenSSL::SSL) - endif() - if (CURL_FOUND) - target_link_libraries(graphar PRIVATE ${CURL_LIBRARIES}) + target_link_libraries(graphar PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive Arrow::arrow_shared + Parquet::parquet_shared + ArrowDataset::arrow_dataset_shared + ArrowAcero::arrow_acero_shared -Wl,--no-whole-archive) endif() endmacro() @@ -297,37 +261,13 @@ if (BUILD_EXAMPLES) $ ) target_include_directories(${E_NAME} SYSTEM PRIVATE ${Boost_INCLUDE_DIRS}) - if(BUILD_ARROW_FROM_SOURCE) - target_include_directories(${E_NAME} SYSTEM BEFORE PRIVATE ${GAR_ARROW_INCLUDE_DIR}) - endif() target_link_libraries(${E_NAME} PRIVATE graphar ${Boost_LIBRARIES} Threads::Threads ${CMAKE_DL_LIBS}) if(APPLE) - if(BUILD_ARROW_FROM_SOURCE) - target_link_libraries(${E_NAME} PRIVATE -Wl,-force_load gar_arrow_static - "${GAR_PARQUET_STATIC_LIB}" - "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}") - else() - target_link_libraries(${E_NAME} PRIVATE Arrow::arrow_shared - Parquet::parquet_shared) - endif() + target_link_libraries(${E_NAME} PRIVATE Arrow::arrow_shared + Parquet::parquet_shared) else() - if(BUILD_ARROW_FROM_SOURCE) - target_link_libraries(${E_NAME} PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive gar_arrow_static - "${GAR_PARQUET_STATIC_LIB}" - "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}" -Wl,--no-whole-archive) - else() - target_link_libraries(${E_NAME} PRIVATE Arrow::arrow_shared - Parquet::parquet_shared) - endif() - endif() - - # if OpenSSL library exists, link the OpenSSL library. - # OpenSSL has to be linked after GAR_ARROW_BUNDLED_DEPS_STATIC_LIB - if(OPENSSL_FOUND) - target_link_libraries(${E_NAME} PRIVATE OpenSSL::SSL) - endif() - if (CURL_FOUND) - target_link_libraries(${E_NAME} PRIVATE CURL::libcurl) + target_link_libraries(${E_NAME} PRIVATE Arrow::arrow_shared + Parquet::parquet_shared) endif() endforeach() endif() @@ -379,43 +319,18 @@ if (BUILD_TESTS) add_executable(${target} ${add_test_SRCS}) target_compile_features(${target} PRIVATE cxx_std_17) target_include_directories(${target} PRIVATE ${PROJECT_SOURCE_DIR}/thirdparty) - if(BUILD_ARROW_FROM_SOURCE) - target_include_directories(${target} SYSTEM BEFORE PRIVATE ${GAR_ARROW_INCLUDE_DIR}) - endif() target_link_libraries(${target} PRIVATE Catch2::Catch2 graphar Threads::Threads ${CMAKE_DL_LIBS}) if(APPLE) - if(BUILD_ARROW_FROM_SOURCE) - target_link_libraries(${target} PRIVATE -Wl,-force_load gar_arrow_static - "${GAR_PARQUET_STATIC_LIB}" - "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}") - else() - target_link_libraries(${target} PRIVATE Arrow::arrow_shared - Parquet::parquet_shared) - endif() + target_link_libraries(${target} PRIVATE Arrow::arrow_shared + Parquet::parquet_shared) else() - if(BUILD_ARROW_FROM_SOURCE) - target_link_libraries(${target} PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive gar_arrow_static - "${GAR_PARQUET_STATIC_LIB}" - "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}" -Wl,--no-whole-archive) - else() - target_link_libraries(${target} PRIVATE Arrow::arrow_shared - Parquet::parquet_shared) - endif() + target_link_libraries(${target} PRIVATE Arrow::arrow_shared + Parquet::parquet_shared) endif() target_include_directories(${target} PRIVATE ${PROJECT_SOURCE_DIR}/include $) - target_include_directories(${target} SYSTEM BEFORE PRIVATE ${GAR_ARROW_INCLUDE_DIR}) include(CTest) include(Catch) catch_discover_tests(${target}) - - # if OpenSSL library exists, link the OpenSSL library. - # OpenSSL has to be linked after GAR_ARROW_BUNDLED_DEPS_STATIC_LIB - if(OPENSSL_FOUND) - target_link_libraries(${target} PRIVATE OpenSSL::SSL) - endif() - if (CURL_FOUND) - target_link_libraries(${target} PRIVATE CURL::libcurl) - endif() endmacro() add_test(test_info SRCS test/test_info.cc) @@ -440,15 +355,6 @@ if (BUILD_BENCHMARKS) target_compile_features(${target} PRIVATE cxx_std_17) target_include_directories(${target} PRIVATE ${PROJECT_SOURCE_DIR}/thirdparty) target_link_libraries(${target} PRIVATE benchmark::benchmark_main graphar Threads::Threads ${CMAKE_DL_LIBS}) - - # if OpenSSL library exists, link the OpenSSL library. - # OpenSSL has to be linked after GAR_ARROW_BUNDLED_DEPS_STATIC_LIB - if(OPENSSL_FOUND) - target_link_libraries(${target} PRIVATE OpenSSL::SSL) - endif() - if (CURL_FOUND) - target_link_libraries(${target} PRIVATE CURL::libcurl) - endif() endmacro() add_benchmark(arrow_chunk_reader_benchmark SRCS benchmarks/arrow_chunk_reader_benchmark.cc) add_benchmark(graph_info_benchmark SRCS benchmarks/graph_info_benchmark.cc) diff --git a/cpp/README.md b/cpp/README.md index df0e462c5..ec06146ba 100644 --- a/cpp/README.md +++ b/cpp/README.md @@ -24,25 +24,49 @@ Building requires: sufficient. For MacOS, at least clang 5 is required - CMake 3.5 or higher - On Linux and macOS, ``make`` build utilities -- curl-devel with SSL (Linux) or curl (macOS), for s3 filesystem support -- Apache Arrow C++ (>= 12.0.0, requires `arrow-dev`, `arrow-dataset`, `arrow-acero` and `parquet` modules) for Arrow filesystem support and can use `BUILD_ARROW_FROM_SOURCE` option to build with GraphAr automatically. You can refer to [Apache Arrow Installation](https://arrow.apache.org/install/) to install Arrow directly too. +- Apache Arrow C++ (>= 12.0.0, requires `arrow-dev`, `arrow-dataset`, `arrow-acero` and `parquet` modules) for Arrow filesystem support. You can refer to [Apache Arrow Installation](https://arrow.apache.org/install/) to install the required modules. Dependencies for optional features: - [Doxygen](https://www.doxygen.nl/index.html) (>= 1.8) for generating documentation +- `clang-format-8` for code formatting +- [BGL](https://www.boost.org/doc/libs/1_80_0/libs/graph/doc/index.html) (>= 1.58) +- [Google Benchmark](https://github.com/google/benchmark) (>= 1.6.0) for benchmarking -Extra dependencies are required by examples: +On Ubuntu/Debian, you can install the required packages with: -- [BGL](https://www.boost.org/doc/libs/1_80_0/libs/graph/doc/index.html) (>= 1.58) +```bash +sudo apt-get install \ + build-essential \ + cmake \ + libboost-graph-dev \ + doxygen + +# Arrow C++ dependencies +wget -c \ + https://apache.jfrog.io/artifactory/arrow/"$(lsb_release --id --short | tr 'A-Z' 'a-z')"/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb \ + -P /tmp/ +sudo apt-get install -y /tmp/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb +sudo apt-get update +sudo apt-get install -y libarrow-dev libarrow-dataset-dev libarrow-acero-dev libparquet-dev +``` + +On macOS, you can use [Homebrew](https://brew.sh) to install the required packages: + +```bash +git clone https:://github.com/apache/graphar.git +cd graphar +brew update && brew bundle --file=cpp/Brewfile +``` ### Building All the instructions below assume that you have cloned the GraphAr git -repository and navigated to the ``cpp`` subdirectory: +repository and navigated to the ``cpp`` subdirectory with: ```bash - $ git clone https://github.com/apache/incubator-graphar.git - $ cd incubator-graphar + $ git clone https://github.com/apache/graphar.git + $ cd graphar $ git submodule update --init $ cd cpp ``` @@ -93,11 +117,21 @@ After the building, you can install the GraphAr C++ library with: You should build the project with `ENABLE_DOCS` option. Then run: ```bash - make docs + $ make docs ``` The API document is generated in the directory ``docs_doxygen``. +### Code formatting and linting + +To format and lint the code, run: + +```bash + $ cmake .. + $ make graphar-clformat # format the code + $ make graphar-cpplint # lint the code +``` + ## How to use Please refer to our [GraphAr C++ API Reference](https://graphar.apache.org/docs/libraries/cpp). diff --git a/cpp/benchmarks/benchmark_util.h b/cpp/benchmarks/benchmark_util.h index 4b843762e..8a6d9f8e2 100644 --- a/cpp/benchmarks/benchmark_util.h +++ b/cpp/benchmarks/benchmark_util.h @@ -30,25 +30,19 @@ namespace graphar { -// Return the value of the GAR_TEST_DATA environment variable or return error -// Status -Status GetTestResourceRoot(std::string* out) { - const char* c_root = std::getenv("GAR_TEST_DATA"); - if (!c_root) { - return Status::IOError( - "Test resources not found, set GAR_TEST_DATA to /testing"); - } - // FIXME(@acezen): This is a hack to get around the fact that the testing - *out = std::string(c_root); - return Status::OK(); -} +static const std::string TEST_DATA_DIR = // NOLINT + std::filesystem::path(__FILE__) + .parent_path() + .parent_path() + .parent_path() + .parent_path() + .string() + + "/testing"; class BenchmarkFixture : public ::benchmark::Fixture { public: void SetUp(const ::benchmark::State& state) override { - std::string root; - Status status = GetTestResourceRoot(&root); - path_ = root + "/ldbc_sample/parquet/ldbc_sample.graph.yml"; + path_ = TEST_DATA_DIR + "/ldbc_sample/parquet/ldbc_sample.graph.yml"; auto maybe_graph_info = GraphInfo::Load(path_); graph_info_ = maybe_graph_info.value(); }