From dde8e798e1ffb4ffdcda78960c6403183810a934 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 29 Sep 2023 19:12:52 +1100 Subject: [PATCH 1/3] add test for arrow feedstock --- tests/test_libboost.py | 2 + .../test_yaml/libboost_arrow_after_meta.yaml | 440 ++++++++++++++++++ .../test_yaml/libboost_arrow_before_meta.yaml | 440 ++++++++++++++++++ 3 files changed, 882 insertions(+) create mode 100644 tests/test_yaml/libboost_arrow_after_meta.yaml create mode 100644 tests/test_yaml/libboost_arrow_before_meta.yaml diff --git a/tests/test_libboost.py b/tests/test_libboost.py index 52c9fb51c..11a8ed0da 100644 --- a/tests/test_libboost.py +++ b/tests/test_libboost.py @@ -23,6 +23,8 @@ ("gudhi", "1.10.0"), # single output; with run-dep ("carve", "1.10.0"), + # multiple output; no run-dep + ("arrow", "1.10.0"), # multiple outputs, many don't depend on boost; comment trickiness ("fenics", "1.10.0"), # multiple outputs, jinja-style pinning diff --git a/tests/test_yaml/libboost_arrow_after_meta.yaml b/tests/test_yaml/libboost_arrow_after_meta.yaml new file mode 100644 index 000000000..b2b4f35a1 --- /dev/null +++ b/tests/test_yaml/libboost_arrow_after_meta.yaml @@ -0,0 +1,440 @@ +{% set version = "1.10.0" %} +{% set cuda_enabled = cuda_compiler_version != "None" %} +{% set build_ext_version = "4.0.0" %} +{% set build_ext = "cuda" if cuda_enabled else "cpu" %} +{% set proc_build_number = "0" %} +{% set llvm_version = "15" %} + +# see https://github.com/apache/arrow/blob/apache-arrow-10.0.1/cpp/CMakeLists.txt#L88-L90 +{% set so_version = (version.split(".")[0] | int * 100 + version.split(".")[1] | int) ~ "." ~ version.split(".")[2] ~ ".0" %} + +package: + name: apache-arrow + version: {{ version }} + +source: + # fake source url to get version migrator to pass + url: https://github.com/scipy/scipy/archive/refs/tags/v{{ version }}.tar.gz + sha256: 3f9e587a96844a9b4ee7f998cfe4dc3964dc95c4ca94d7de6a77bffb99f873da + # # arrow has the unfortunate habit of changing tags of X.0.0 in the + # # lead-up until release -> don't use github sources on main + # # - url: https://github.com/apache/arrow/archive/refs/tags/apache-arrow-{{ version }}.tar.gz + # - url: https://dist.apache.org/repos/dist/release/arrow/arrow-{{ version }}/apache-arrow-{{ version }}.tar.gz + # sha256: 35dfda191262a756be934eef8afee8d09762cad25021daa626eb249e251ac9e6 + # patches: + # - patches/0001-GH-15017-Python-Harden-test_memory.py-for-use-with-A.patch + # - patches/0002-GH-37480-Python-Bump-pandas-version-that-contains-re.patch + # # testing-submodule not part of release tarball + # - git_url: https://github.com/apache/arrow-testing.git + # git_rev: 47f7b56b25683202c1fd957668e13f2abafc0f12 + # folder: testing + +build: + number: 0 + # for cuda support, building with one version is enough to be compatible with + # all later versions, since arrow is only using libcuda, and not libcudart. + skip: true # [cuda_compiler_version not in ("None", cuda_compiler_version_min)] + # arrow promises API- & ABI-compatibility along SemVer, see #1096 + run_exports: + - {{ pin_subpackage("libarrow", max_pin="x") }} + +outputs: + - name: apache-arrow-proc + version: {{ build_ext_version }} + build: + number: {{ proc_build_number }} + string: {{ build_ext }} + test: + commands: + - exit 0 + about: + home: http://github.com/apache/arrow + license: Apache-2.0 + license_file: + - LICENSE.txt + summary: A meta-package to select Arrow build variant + + # compat output for old mutex-package naming + - name: arrow-cpp-proc + version: {{ build_ext_version }} + build: + number: {{ proc_build_number }} + string: {{ build_ext }} + requirements: + run: + - apache-arrow-proc ={{ build_ext_version }}={{ build_ext }} + test: + commands: + - exit 0 + + - name: libarrow + script: build-arrow.sh # [unix] + script: build-arrow.bat # [win] + version: {{ version }} + build: + string: h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}_{{ build_ext }} + run_exports: + - {{ pin_subpackage("libarrow", max_pin="x") }} + ignore_run_exports_from: + - {{ compiler("cuda") }} # [cuda_compiler_version != "None"] + # arrow only uses headers, apparently + - gflags + # shared lib linked on unix, not on win + - glog # [win] + ignore_run_exports: + # we don't need all of brotli's run-exports + - libbrotlicommon + track_features: {{ "[arrow-cuda]" if cuda_enabled else "" }} + missing_dso_whitelist: + - '*/libcuda.so.*' # [linux] + - '*/nvcuda.dll' # [win] + requirements: + build: + - {{ compiler("c") }} + - {{ compiler("cxx") }} + - {{ compiler("cuda") }} # [cuda_compiler_version != "None"] + # needs to run protoc & grpc_cpp_plugin + - libgrpc # [build_platform != target_platform] + - libprotobuf # [build_platform != target_platform] + # needed for gandiva + - clangdev {{ llvm_version }} # [build_platform != target_platform] + - llvmdev {{ llvm_version }} # [build_platform != target_platform] + - gnuconfig # [build_platform != target_platform] + - cmake + - ninja + # necessary for vendored jemalloc + - autoconf # [linux] + - make # [linux] + host: + # for required dependencies, see + # https://github.com/apache/arrow/blob/apache-arrow-11.0.0/cpp/cmake_modules/ThirdpartyToolchain.cmake#L46-L75 + - clangdev {{ llvm_version }} + - llvmdev {{ llvm_version }} + - aws-crt-cpp + - aws-sdk-cpp + - libboost-headers + - brotli + - bzip2 + # not yet: https://github.com/conda-forge/cpp-opentelemetry-sdk-feedstock/issues/38 + # - cpp-opentelemetry-sdk + # - proto-opentelemetry-proto =={{ cpp_opentelemetry_sdk }} + - gflags + - glog + - google-cloud-cpp + # arrow uses a customized jemalloc, see #944 + # - jemalloc + - libabseil + - libgrpc + - libprotobuf + - libutf8proc + - lz4-c + - nlohmann_json + # gandiva depends on openssl + - openssl + - orc + - rapidjson + - re2 + - snappy + - thrift-cpp + - ucx # [linux] + - xsimd + - zlib + - zstd + - __cuda >={{ cuda_compiler_version_min }} # [cuda_compiler_version != "None"] + # since libgoogle-cloud is static on windows, see + # https://github.com/conda-forge/google-cloud-cpp-feedstock/pull/108, + # its host deps (which aren't yet covered above) leak into the build here + - libcrc32c # [win] + - libcurl # [win] + run_constrained: + - apache-arrow-proc =*={{ build_ext }} + # make sure we don't co-install with old version of old package name + - arrow-cpp ={{ version }} + # old parquet lib output, now part of this feedstock + - parquet-cpp <0.0a0 + + test: + commands: + {% set headers = [ + "arrow/api.h", "arrow/acero/api.h", "arrow/flight/types.h", + "arrow/flight/sql/api.h", "gandiva/engine.h", "parquet/api/reader.h" + ] %} + {% for each_header in headers %} + # headers + - test -f $PREFIX/include/{{ each_header }} || (echo "{{ each_header }} not found" && exit 1) # [unix] + - if not exist %LIBRARY_INC%\{{ "\\".join(each_header.split("/")) }} exit 1 # [win] + {% endfor %} + + {% set libs = (cuda_compiler_version != "None") * ["arrow_cuda"] + [ + "arrow", "arrow_acero", "arrow_dataset", "arrow_flight", + "arrow_flight_sql", "arrow_substrait", "gandiva", "parquet" + ] %} + {% for each_lib in libs %} + # shared + - test -f $PREFIX/lib/lib{{ each_lib }}.so # [linux] + - test -f $PREFIX/lib/lib{{ each_lib }}.dylib # [osx] + - if not exist %LIBRARY_BIN%\{{ each_lib }}.dll exit 1 # [win] + - if not exist %LIBRARY_LIB%\{{ each_lib }}.lib exit 1 # [win] + + # absence of static libraries + - test ! -f $PREFIX/lib/lib{{ each_lib }}.a # [unix] + - if exist %LIBRARY_LIB%\{{ each_lib }}_static.lib exit 1 # [win] + {% endfor %} + + # absence of arrow_cuda for CPU builds + - test ! -f $PREFIX/lib/libarrow_cuda.so # [(cuda_compiler_version == "None") and linux] + - test ! -f $PREFIX/lib/libarrow_cuda.a # [(cuda_compiler_version == "None") and linux] + - if exist %LIBRARY_BIN%\arrow_cuda.dll exit 1 # [(cuda_compiler_version == "None") and win] + - if exist %LIBRARY_LIB%\arrow_cuda.lib exit 1 # [(cuda_compiler_version == "None") and win] + - if exist %LIBRARY_LIB%\arrow_cuda_static.lib exit 1 # [(cuda_compiler_version == "None") and win] + + # gdb-wrapper (paths are stacked intentionally) + - test -f $PREFIX/share/gdb/auto-load/$PREFIX/lib/libarrow.so.{{ so_version }}-gdb.py # [linux] + - test -f $PREFIX/share/gdb/auto-load/$PREFIX/lib/libarrow.{{ so_version }}.dylib-gdb.py # [osx] + + about: + home: http://github.com/apache/arrow + license: Apache-2.0 + license_file: + - LICENSE.txt + summary: C++ libraries for Apache Arrow + + # compat output for old naming scheme; switched for 10.0.0; keep for a few versions + - name: arrow-cpp + version: {{ version }} + build: + string: h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}_{{ build_ext }} + run_exports: + - {{ pin_subpackage("libarrow", max_pin="x.x.x") }} + requirements: + host: + - {{ pin_subpackage('libarrow', exact=True) }} + run: + - {{ pin_subpackage('libarrow', exact=True) }} + test: + commands: + - exit 0 + + - name: pyarrow + script: build-pyarrow.sh # [unix] + script: build-pyarrow.bat # [win] + version: {{ version }} + build: + string: py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}_{{ build_ext }} + ignore_run_exports_from: + - {{ compiler("cuda") }} # [cuda_compiler_version != "None"] + track_features: {{ "[arrow-cuda]" if cuda_enabled else "" }} + rpaths: + - lib/ + - {{ SP_DIR }}/pyarrow + missing_dso_whitelist: + # not actually missing, but installed into SP_DIR, see tests + - '*/arrow_python.dll' # [win] + - '*/arrow_python_flight.dll' # [win] + requirements: + build: + - {{ compiler("c") }} + - {{ compiler("cxx") }} + # pyarrow does not require nvcc but it needs to link against libraries in libarrow=*=*cuda + - {{ compiler("cuda") }} # [cuda_compiler_version != "None"] + - python # [build_platform != target_platform] + - cross-python_{{ target_platform }} # [build_platform != target_platform] + - cython <3 # [build_platform != target_platform] + - numpy # [build_platform != target_platform] + - cmake + - ninja + host: + - {{ pin_subpackage('libarrow', exact=True) }} + - clangdev {{ llvm_version }} + - llvmdev {{ llvm_version }} + - cython <3 + - numpy + - python + - setuptools + - setuptools_scm + run: + - {{ pin_subpackage('libarrow', exact=True) }} + - {{ pin_compatible('numpy') }} + - python + run_constrained: + - apache-arrow-proc =*={{ build_ext }} + + test: + files: + - test_read_parquet.py + imports: + - pyarrow + - pyarrow.dataset + - pyarrow.flight + - pyarrow.gandiva + - pyarrow.orc # [unix] + - pyarrow.parquet + - pyarrow.fs + - pyarrow._s3fs + - pyarrow._hdfs + # We can only test importing cuda package but cannot run when a + # CUDA device is not available, for instance, when building from CI. + # On Windows, we cannot even do that due to `nvcuda.dll` not being found, see + # https://conda-forge.org/docs/maintainer/knowledge_base.html#nvcuda-dll-cannot-be-found-on-windows + # However, we check below for (at least) the presence of a correctly-compiled module + - pyarrow.cuda # [cuda_compiler_version != "None" and not win] + commands: + # libraries that depend on python (and hence aren't in libarrow itself) + - test -f ${SP_DIR}/pyarrow/libarrow_python.so # [linux] + - test -f ${SP_DIR}/pyarrow/libarrow_python_flight.so # [linux] + - test -f ${SP_DIR}/pyarrow/libarrow_python.dylib # [osx] + - test -f ${SP_DIR}/pyarrow/libarrow_python_flight.dylib # [osx] + - if not exist %SP_DIR%\pyarrow\arrow_python.dll exit 1 # [win] + - if not exist %SP_DIR%\pyarrow\arrow_python_flight.dll exit 1 # [win] + + - test -f ${SP_DIR}/pyarrow/include/arrow/python/pyarrow.h # [unix] + - if not exist %SP_DIR%\pyarrow\include\arrow\python\pyarrow.h exit 1 # [win] + + - test ! -f ${SP_DIR}/pyarrow/tests/test_array.py # [unix] + - if exist %SP_DIR%/pyarrow/tests/test_array.py exit 1 # [win] + # Need to remove dot from PY_VER; %MYVAR:x=y% replaces "x" in %MYVAR% with "y" + - if not exist %SP_DIR%/pyarrow/_cuda.cp%PY_VER:.=%-win_amd64.pyd exit 1 # [win and cuda_compiler_version != "None"] + - python test_read_parquet.py + + about: + home: http://github.com/apache/arrow + license: Apache-2.0 + license_file: + - LICENSE.txt + summary: Python libraries for Apache Arrow + + - name: pyarrow-tests + script: build-pyarrow.sh # [unix] + script: build-pyarrow.bat # [win] + version: {{ version }} + build: + string: py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}_{{ build_ext }} + ignore_run_exports_from: + - {{ compiler("cuda") }} # [cuda_compiler_version != "None"] + track_features: {{ "[arrow-cuda]" if cuda_enabled else "" }} + requirements: + build: + - {{ compiler("c") }} + - {{ compiler("cxx") }} + # pyarrow does not require nvcc but it needs to link against libraries in libarrow=*=*cuda + - {{ compiler("cuda") }} # [cuda_compiler_version != "None"] + - python # [build_platform != target_platform] + - cross-python_{{ target_platform }} # [build_platform != target_platform] + - cython <3 # [build_platform != target_platform] + - numpy # [build_platform != target_platform] + - cmake + - ninja + host: + - {{ pin_subpackage('libarrow', exact=True) }} + - {{ pin_subpackage('pyarrow', exact=True) }} + - clangdev {{ llvm_version }} + - llvmdev {{ llvm_version }} + - cython <3 + - numpy + - python + - setuptools + - setuptools_scm + run: + - {{ pin_subpackage('pyarrow', exact=True) }} + - python + run_constrained: + - apache-arrow-proc =*={{ build_ext }} + + {% if not (aarch64 or ppc64le) or py == 311 %} + # only run the full test suite for one python version when in emulation + # (each run can take up to ~45min); there's essentially zero divergence + # in behaviour across python versions anyway + test: + requires: + # test_cpp_extension_in_python requires a compiler + - {{ compiler("cxx") }} # [linux] + - pytest + - pytest-lazy-fixture + - backports.zoneinfo # [py<39] + - boto3 + - cffi + - cloudpickle + - cython <3 + - fastparquet + - fsspec + - hypothesis + - minio-server + - pandas + - s3fs + - scipy + # these are generally (far) behind on migrating abseil/grpc/protobuf, + # and using them as test dependencies blocks the migrator unnecessarily + # - pytorch + # - tensorflow + # we're not building java bindings + # - jpype1 + # doesn't get picked up correctly + # - libhdfs3 + # causes segfaults + # - sparse + source_files: + - testing/data + commands: + - cd ${SP_DIR} # [unix] + - cd %SP_DIR% # [win] + - export ARROW_TEST_DATA="${SRC_DIR}/testing/data" # [unix] + - set "ARROW_TEST_DATA=%SRC_DIR%\testing\data" # [win] + + {% set tests_to_skip = "_not_a_real_test" %} + # we do not have GPUs in CI --> cannot test cuda + {% set tests_to_skip = tests_to_skip + " or test_cuda" %} + # skip tests that raise SIGINT and crash the test suite + {% set tests_to_skip = tests_to_skip + " or (test_csv and test_cancellation)" %} # [linux] + {% set tests_to_skip = tests_to_skip + " or (test_flight and test_interrupt)" %} # [linux] + # skip tests that make invalid(-for-conda) assumptions about the compilers setup + {% set tests_to_skip = tests_to_skip + " or test_cython_api" %} # [unix] + {% set tests_to_skip = tests_to_skip + " or test_visit_strings" %} # [unix] + # skip tests that cannot succeed in emulation + {% set tests_to_skip = tests_to_skip + " or test_debug_memory_pool_disabled" %} # [aarch64 or ppc64le] + {% set tests_to_skip = tests_to_skip + " or test_env_var_io_thread_count" %} # [aarch64 or ppc64le] + # XMinioInvalidObjectName on win: "Object name contains unsupported characters" + {% set tests_to_skip = tests_to_skip + " or test_write_to_dataset_with_partitions_s3fs" %} # [win] + # vvvvvvv TESTS THAT SHOULDN'T HAVE TO BE SKIPPED vvvvvvv + # new fsspec changed behaviour, see https://github.com/apache/arrow/issues/37555 + {% set tests_to_skip = tests_to_skip + " or test_get_file_info_with_selector" %} + # gandiva tests are segfaulting on ppc + {% set tests_to_skip = tests_to_skip + " or test_gandiva" %} # [ppc64le] + # test failures on ppc (both failing with: Float value was truncated converting to int32) + {% set tests_to_skip = tests_to_skip + " or test_safe_cast_from_float_with_nans_to_int" %} # [ppc64le] + {% set tests_to_skip = tests_to_skip + " or test_float_with_null_as_integer" %} # [ppc64le] + # ^^^^^^^ TESTS THAT SHOULDN'T HAVE TO BE SKIPPED ^^^^^^^ + - pytest pyarrow/ -rfEs -k "not ({{ tests_to_skip }})" + {% endif %} + + about: + home: http://github.com/apache/arrow + license: Apache-2.0 + license_file: + - LICENSE.txt + summary: Python test files for Apache Arrow + +about: + home: http://github.com/apache/arrow + license: Apache-2.0 + license_file: + - LICENSE.txt + summary: C++ and Python libraries for Apache Arrow + +extra: + recipe-maintainers: + - wesm + - xhochy + - leifwalsh + - jreback + - cpcloud + - pcmoritz + - robertnishihara + - siddharthteotia + - kou + - kszucs + - pitrou + - pearu + - nealrichardson + - jakirkham + - h-vetinari + feedstock-name: arrow-cpp diff --git a/tests/test_yaml/libboost_arrow_before_meta.yaml b/tests/test_yaml/libboost_arrow_before_meta.yaml new file mode 100644 index 000000000..77f4a8380 --- /dev/null +++ b/tests/test_yaml/libboost_arrow_before_meta.yaml @@ -0,0 +1,440 @@ +{% set version = "1.9.0" %} +{% set cuda_enabled = cuda_compiler_version != "None" %} +{% set build_ext_version = "4.0.0" %} +{% set build_ext = "cuda" if cuda_enabled else "cpu" %} +{% set proc_build_number = "0" %} +{% set llvm_version = "15" %} + +# see https://github.com/apache/arrow/blob/apache-arrow-10.0.1/cpp/CMakeLists.txt#L88-L90 +{% set so_version = (version.split(".")[0] | int * 100 + version.split(".")[1] | int) ~ "." ~ version.split(".")[2] ~ ".0" %} + +package: + name: apache-arrow + version: {{ version }} + +source: + # fake source url to get version migrator to pass + url: https://github.com/scipy/scipy/archive/refs/tags/v{{ version }}.tar.gz + sha256: b6d893dc7dcd4138b9e9df59a13c59695e50e80dc5c2cacee0674670693951a1 + # # arrow has the unfortunate habit of changing tags of X.0.0 in the + # # lead-up until release -> don't use github sources on main + # # - url: https://github.com/apache/arrow/archive/refs/tags/apache-arrow-{{ version }}.tar.gz + # - url: https://dist.apache.org/repos/dist/release/arrow/arrow-{{ version }}/apache-arrow-{{ version }}.tar.gz + # sha256: 35dfda191262a756be934eef8afee8d09762cad25021daa626eb249e251ac9e6 + # patches: + # - patches/0001-GH-15017-Python-Harden-test_memory.py-for-use-with-A.patch + # - patches/0002-GH-37480-Python-Bump-pandas-version-that-contains-re.patch + # # testing-submodule not part of release tarball + # - git_url: https://github.com/apache/arrow-testing.git + # git_rev: 47f7b56b25683202c1fd957668e13f2abafc0f12 + # folder: testing + +build: + number: 4 + # for cuda support, building with one version is enough to be compatible with + # all later versions, since arrow is only using libcuda, and not libcudart. + skip: true # [cuda_compiler_version not in ("None", cuda_compiler_version_min)] + # arrow promises API- & ABI-compatibility along SemVer, see #1096 + run_exports: + - {{ pin_subpackage("libarrow", max_pin="x") }} + +outputs: + - name: apache-arrow-proc + version: {{ build_ext_version }} + build: + number: {{ proc_build_number }} + string: {{ build_ext }} + test: + commands: + - exit 0 + about: + home: http://github.com/apache/arrow + license: Apache-2.0 + license_file: + - LICENSE.txt + summary: A meta-package to select Arrow build variant + + # compat output for old mutex-package naming + - name: arrow-cpp-proc + version: {{ build_ext_version }} + build: + number: {{ proc_build_number }} + string: {{ build_ext }} + requirements: + run: + - apache-arrow-proc ={{ build_ext_version }}={{ build_ext }} + test: + commands: + - exit 0 + + - name: libarrow + script: build-arrow.sh # [unix] + script: build-arrow.bat # [win] + version: {{ version }} + build: + string: h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}_{{ build_ext }} + run_exports: + - {{ pin_subpackage("libarrow", max_pin="x") }} + ignore_run_exports_from: + - {{ compiler("cuda") }} # [cuda_compiler_version != "None"] + # arrow only uses headers, apparently + - gflags + # shared lib linked on unix, not on win + - glog # [win] + ignore_run_exports: + # we don't need all of brotli's run-exports + - libbrotlicommon + track_features: {{ "[arrow-cuda]" if cuda_enabled else "" }} + missing_dso_whitelist: + - '*/libcuda.so.*' # [linux] + - '*/nvcuda.dll' # [win] + requirements: + build: + - {{ compiler("c") }} + - {{ compiler("cxx") }} + - {{ compiler("cuda") }} # [cuda_compiler_version != "None"] + # needs to run protoc & grpc_cpp_plugin + - libgrpc # [build_platform != target_platform] + - libprotobuf # [build_platform != target_platform] + # needed for gandiva + - clangdev {{ llvm_version }} # [build_platform != target_platform] + - llvmdev {{ llvm_version }} # [build_platform != target_platform] + - gnuconfig # [build_platform != target_platform] + - cmake + - ninja + # necessary for vendored jemalloc + - autoconf # [linux] + - make # [linux] + host: + # for required dependencies, see + # https://github.com/apache/arrow/blob/apache-arrow-11.0.0/cpp/cmake_modules/ThirdpartyToolchain.cmake#L46-L75 + - clangdev {{ llvm_version }} + - llvmdev {{ llvm_version }} + - aws-crt-cpp + - aws-sdk-cpp + - boost-cpp >=1.70 + - brotli + - bzip2 + # not yet: https://github.com/conda-forge/cpp-opentelemetry-sdk-feedstock/issues/38 + # - cpp-opentelemetry-sdk + # - proto-opentelemetry-proto =={{ cpp_opentelemetry_sdk }} + - gflags + - glog + - google-cloud-cpp + # arrow uses a customized jemalloc, see #944 + # - jemalloc + - libabseil + - libgrpc + - libprotobuf + - libutf8proc + - lz4-c + - nlohmann_json + # gandiva depends on openssl + - openssl + - orc + - rapidjson + - re2 + - snappy + - thrift-cpp + - ucx # [linux] + - xsimd + - zlib + - zstd + - __cuda >={{ cuda_compiler_version_min }} # [cuda_compiler_version != "None"] + # since libgoogle-cloud is static on windows, see + # https://github.com/conda-forge/google-cloud-cpp-feedstock/pull/108, + # its host deps (which aren't yet covered above) leak into the build here + - libcrc32c # [win] + - libcurl # [win] + run_constrained: + - apache-arrow-proc =*={{ build_ext }} + # make sure we don't co-install with old version of old package name + - arrow-cpp ={{ version }} + # old parquet lib output, now part of this feedstock + - parquet-cpp <0.0a0 + + test: + commands: + {% set headers = [ + "arrow/api.h", "arrow/acero/api.h", "arrow/flight/types.h", + "arrow/flight/sql/api.h", "gandiva/engine.h", "parquet/api/reader.h" + ] %} + {% for each_header in headers %} + # headers + - test -f $PREFIX/include/{{ each_header }} || (echo "{{ each_header }} not found" && exit 1) # [unix] + - if not exist %LIBRARY_INC%\{{ "\\".join(each_header.split("/")) }} exit 1 # [win] + {% endfor %} + + {% set libs = (cuda_compiler_version != "None") * ["arrow_cuda"] + [ + "arrow", "arrow_acero", "arrow_dataset", "arrow_flight", + "arrow_flight_sql", "arrow_substrait", "gandiva", "parquet" + ] %} + {% for each_lib in libs %} + # shared + - test -f $PREFIX/lib/lib{{ each_lib }}.so # [linux] + - test -f $PREFIX/lib/lib{{ each_lib }}.dylib # [osx] + - if not exist %LIBRARY_BIN%\{{ each_lib }}.dll exit 1 # [win] + - if not exist %LIBRARY_LIB%\{{ each_lib }}.lib exit 1 # [win] + + # absence of static libraries + - test ! -f $PREFIX/lib/lib{{ each_lib }}.a # [unix] + - if exist %LIBRARY_LIB%\{{ each_lib }}_static.lib exit 1 # [win] + {% endfor %} + + # absence of arrow_cuda for CPU builds + - test ! -f $PREFIX/lib/libarrow_cuda.so # [(cuda_compiler_version == "None") and linux] + - test ! -f $PREFIX/lib/libarrow_cuda.a # [(cuda_compiler_version == "None") and linux] + - if exist %LIBRARY_BIN%\arrow_cuda.dll exit 1 # [(cuda_compiler_version == "None") and win] + - if exist %LIBRARY_LIB%\arrow_cuda.lib exit 1 # [(cuda_compiler_version == "None") and win] + - if exist %LIBRARY_LIB%\arrow_cuda_static.lib exit 1 # [(cuda_compiler_version == "None") and win] + + # gdb-wrapper (paths are stacked intentionally) + - test -f $PREFIX/share/gdb/auto-load/$PREFIX/lib/libarrow.so.{{ so_version }}-gdb.py # [linux] + - test -f $PREFIX/share/gdb/auto-load/$PREFIX/lib/libarrow.{{ so_version }}.dylib-gdb.py # [osx] + + about: + home: http://github.com/apache/arrow + license: Apache-2.0 + license_file: + - LICENSE.txt + summary: C++ libraries for Apache Arrow + + # compat output for old naming scheme; switched for 10.0.0; keep for a few versions + - name: arrow-cpp + version: {{ version }} + build: + string: h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}_{{ build_ext }} + run_exports: + - {{ pin_subpackage("libarrow", max_pin="x.x.x") }} + requirements: + host: + - {{ pin_subpackage('libarrow', exact=True) }} + run: + - {{ pin_subpackage('libarrow', exact=True) }} + test: + commands: + - exit 0 + + - name: pyarrow + script: build-pyarrow.sh # [unix] + script: build-pyarrow.bat # [win] + version: {{ version }} + build: + string: py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}_{{ build_ext }} + ignore_run_exports_from: + - {{ compiler("cuda") }} # [cuda_compiler_version != "None"] + track_features: {{ "[arrow-cuda]" if cuda_enabled else "" }} + rpaths: + - lib/ + - {{ SP_DIR }}/pyarrow + missing_dso_whitelist: + # not actually missing, but installed into SP_DIR, see tests + - '*/arrow_python.dll' # [win] + - '*/arrow_python_flight.dll' # [win] + requirements: + build: + - {{ compiler("c") }} + - {{ compiler("cxx") }} + # pyarrow does not require nvcc but it needs to link against libraries in libarrow=*=*cuda + - {{ compiler("cuda") }} # [cuda_compiler_version != "None"] + - python # [build_platform != target_platform] + - cross-python_{{ target_platform }} # [build_platform != target_platform] + - cython <3 # [build_platform != target_platform] + - numpy # [build_platform != target_platform] + - cmake + - ninja + host: + - {{ pin_subpackage('libarrow', exact=True) }} + - clangdev {{ llvm_version }} + - llvmdev {{ llvm_version }} + - cython <3 + - numpy + - python + - setuptools + - setuptools_scm + run: + - {{ pin_subpackage('libarrow', exact=True) }} + - {{ pin_compatible('numpy') }} + - python + run_constrained: + - apache-arrow-proc =*={{ build_ext }} + + test: + files: + - test_read_parquet.py + imports: + - pyarrow + - pyarrow.dataset + - pyarrow.flight + - pyarrow.gandiva + - pyarrow.orc # [unix] + - pyarrow.parquet + - pyarrow.fs + - pyarrow._s3fs + - pyarrow._hdfs + # We can only test importing cuda package but cannot run when a + # CUDA device is not available, for instance, when building from CI. + # On Windows, we cannot even do that due to `nvcuda.dll` not being found, see + # https://conda-forge.org/docs/maintainer/knowledge_base.html#nvcuda-dll-cannot-be-found-on-windows + # However, we check below for (at least) the presence of a correctly-compiled module + - pyarrow.cuda # [cuda_compiler_version != "None" and not win] + commands: + # libraries that depend on python (and hence aren't in libarrow itself) + - test -f ${SP_DIR}/pyarrow/libarrow_python.so # [linux] + - test -f ${SP_DIR}/pyarrow/libarrow_python_flight.so # [linux] + - test -f ${SP_DIR}/pyarrow/libarrow_python.dylib # [osx] + - test -f ${SP_DIR}/pyarrow/libarrow_python_flight.dylib # [osx] + - if not exist %SP_DIR%\pyarrow\arrow_python.dll exit 1 # [win] + - if not exist %SP_DIR%\pyarrow\arrow_python_flight.dll exit 1 # [win] + + - test -f ${SP_DIR}/pyarrow/include/arrow/python/pyarrow.h # [unix] + - if not exist %SP_DIR%\pyarrow\include\arrow\python\pyarrow.h exit 1 # [win] + + - test ! -f ${SP_DIR}/pyarrow/tests/test_array.py # [unix] + - if exist %SP_DIR%/pyarrow/tests/test_array.py exit 1 # [win] + # Need to remove dot from PY_VER; %MYVAR:x=y% replaces "x" in %MYVAR% with "y" + - if not exist %SP_DIR%/pyarrow/_cuda.cp%PY_VER:.=%-win_amd64.pyd exit 1 # [win and cuda_compiler_version != "None"] + - python test_read_parquet.py + + about: + home: http://github.com/apache/arrow + license: Apache-2.0 + license_file: + - LICENSE.txt + summary: Python libraries for Apache Arrow + + - name: pyarrow-tests + script: build-pyarrow.sh # [unix] + script: build-pyarrow.bat # [win] + version: {{ version }} + build: + string: py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}_{{ build_ext }} + ignore_run_exports_from: + - {{ compiler("cuda") }} # [cuda_compiler_version != "None"] + track_features: {{ "[arrow-cuda]" if cuda_enabled else "" }} + requirements: + build: + - {{ compiler("c") }} + - {{ compiler("cxx") }} + # pyarrow does not require nvcc but it needs to link against libraries in libarrow=*=*cuda + - {{ compiler("cuda") }} # [cuda_compiler_version != "None"] + - python # [build_platform != target_platform] + - cross-python_{{ target_platform }} # [build_platform != target_platform] + - cython <3 # [build_platform != target_platform] + - numpy # [build_platform != target_platform] + - cmake + - ninja + host: + - {{ pin_subpackage('libarrow', exact=True) }} + - {{ pin_subpackage('pyarrow', exact=True) }} + - clangdev {{ llvm_version }} + - llvmdev {{ llvm_version }} + - cython <3 + - numpy + - python + - setuptools + - setuptools_scm + run: + - {{ pin_subpackage('pyarrow', exact=True) }} + - python + run_constrained: + - apache-arrow-proc =*={{ build_ext }} + + {% if not (aarch64 or ppc64le) or py == 311 %} + # only run the full test suite for one python version when in emulation + # (each run can take up to ~45min); there's essentially zero divergence + # in behaviour across python versions anyway + test: + requires: + # test_cpp_extension_in_python requires a compiler + - {{ compiler("cxx") }} # [linux] + - pytest + - pytest-lazy-fixture + - backports.zoneinfo # [py<39] + - boto3 + - cffi + - cloudpickle + - cython <3 + - fastparquet + - fsspec + - hypothesis + - minio-server + - pandas + - s3fs + - scipy + # these are generally (far) behind on migrating abseil/grpc/protobuf, + # and using them as test dependencies blocks the migrator unnecessarily + # - pytorch + # - tensorflow + # we're not building java bindings + # - jpype1 + # doesn't get picked up correctly + # - libhdfs3 + # causes segfaults + # - sparse + source_files: + - testing/data + commands: + - cd ${SP_DIR} # [unix] + - cd %SP_DIR% # [win] + - export ARROW_TEST_DATA="${SRC_DIR}/testing/data" # [unix] + - set "ARROW_TEST_DATA=%SRC_DIR%\testing\data" # [win] + + {% set tests_to_skip = "_not_a_real_test" %} + # we do not have GPUs in CI --> cannot test cuda + {% set tests_to_skip = tests_to_skip + " or test_cuda" %} + # skip tests that raise SIGINT and crash the test suite + {% set tests_to_skip = tests_to_skip + " or (test_csv and test_cancellation)" %} # [linux] + {% set tests_to_skip = tests_to_skip + " or (test_flight and test_interrupt)" %} # [linux] + # skip tests that make invalid(-for-conda) assumptions about the compilers setup + {% set tests_to_skip = tests_to_skip + " or test_cython_api" %} # [unix] + {% set tests_to_skip = tests_to_skip + " or test_visit_strings" %} # [unix] + # skip tests that cannot succeed in emulation + {% set tests_to_skip = tests_to_skip + " or test_debug_memory_pool_disabled" %} # [aarch64 or ppc64le] + {% set tests_to_skip = tests_to_skip + " or test_env_var_io_thread_count" %} # [aarch64 or ppc64le] + # XMinioInvalidObjectName on win: "Object name contains unsupported characters" + {% set tests_to_skip = tests_to_skip + " or test_write_to_dataset_with_partitions_s3fs" %} # [win] + # vvvvvvv TESTS THAT SHOULDN'T HAVE TO BE SKIPPED vvvvvvv + # new fsspec changed behaviour, see https://github.com/apache/arrow/issues/37555 + {% set tests_to_skip = tests_to_skip + " or test_get_file_info_with_selector" %} + # gandiva tests are segfaulting on ppc + {% set tests_to_skip = tests_to_skip + " or test_gandiva" %} # [ppc64le] + # test failures on ppc (both failing with: Float value was truncated converting to int32) + {% set tests_to_skip = tests_to_skip + " or test_safe_cast_from_float_with_nans_to_int" %} # [ppc64le] + {% set tests_to_skip = tests_to_skip + " or test_float_with_null_as_integer" %} # [ppc64le] + # ^^^^^^^ TESTS THAT SHOULDN'T HAVE TO BE SKIPPED ^^^^^^^ + - pytest pyarrow/ -rfEs -k "not ({{ tests_to_skip }})" + {% endif %} + + about: + home: http://github.com/apache/arrow + license: Apache-2.0 + license_file: + - LICENSE.txt + summary: Python test files for Apache Arrow + +about: + home: http://github.com/apache/arrow + license: Apache-2.0 + license_file: + - LICENSE.txt + summary: C++ and Python libraries for Apache Arrow + +extra: + recipe-maintainers: + - wesm + - xhochy + - leifwalsh + - jreback + - cpcloud + - pcmoritz + - robertnishihara + - siddharthteotia + - kou + - kszucs + - pitrou + - pearu + - nealrichardson + - jakirkham + - h-vetinari + feedstock-name: arrow-cpp From 2715d8319aefef97b240556bbd23c338304052c9 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 29 Sep 2023 20:03:42 +1100 Subject: [PATCH 2/3] fix dependency detection for boost-cpp with pins --- conda_forge_tick/migrators/libboost.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conda_forge_tick/migrators/libboost.py b/conda_forge_tick/migrators/libboost.py index c99d1767e..68a6f9ada 100644 --- a/conda_forge_tick/migrators/libboost.py +++ b/conda_forge_tick/migrators/libboost.py @@ -71,9 +71,9 @@ def _process_section(name, attrs, lines): host_req = reqs.get("host", set()) or set() run_req = reqs.get("run", set()) or set() - is_boost_in_build = "boost-cpp" in build_req - is_boost_in_host = "boost-cpp" in host_req - is_boost_in_run = "boost-cpp" in run_req + is_boost_in_build = any(x.startswith("boost-cpp") for x in build_req) + is_boost_in_host = any(x.startswith("boost-cpp") for x in host_req) + is_boost_in_run = any(x.startswith("boost-cpp") for x in run_req) # anything behind a comment needs to get replaced first, so it # doesn't mess up the counts below From d55ff7c3910166ace791093d301382c1671d414d Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 29 Sep 2023 20:19:03 +1100 Subject: [PATCH 3/3] fix case of None appearing in requirements --- conda_forge_tick/migrators/libboost.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conda_forge_tick/migrators/libboost.py b/conda_forge_tick/migrators/libboost.py index 68a6f9ada..03380ef9a 100644 --- a/conda_forge_tick/migrators/libboost.py +++ b/conda_forge_tick/migrators/libboost.py @@ -71,9 +71,9 @@ def _process_section(name, attrs, lines): host_req = reqs.get("host", set()) or set() run_req = reqs.get("run", set()) or set() - is_boost_in_build = any(x.startswith("boost-cpp") for x in build_req) - is_boost_in_host = any(x.startswith("boost-cpp") for x in host_req) - is_boost_in_run = any(x.startswith("boost-cpp") for x in run_req) + is_boost_in_build = any((x or "").startswith("boost-cpp") for x in build_req) + is_boost_in_host = any((x or "").startswith("boost-cpp") for x in host_req) + is_boost_in_run = any((x or "").startswith("boost-cpp") for x in run_req) # anything behind a comment needs to get replaced first, so it # doesn't mess up the counts below