Skip to content

Commit

Permalink
Merge branch 'branch-24.12' into rewrite_read_json
Browse files Browse the repository at this point in the history
  • Loading branch information
ttnghia authored Nov 8, 2024
2 parents a28a32a + 3c5f787 commit 181bf90
Show file tree
Hide file tree
Showing 644 changed files with 11,768 additions and 6,249 deletions.
2 changes: 1 addition & 1 deletion .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,6 @@ ENV PYTHONDONTWRITEBYTECODE="1"

ENV SCCACHE_REGION="us-east-2"
ENV SCCACHE_BUCKET="rapids-sccache-devs"
ENV VAULT_HOST="https://vault.ops.k8s.rapids.ai"
ENV AWS_ROLE_ARN="arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs"
ENV HISTFILE="/home/coder/.cache/._bash_history"
ENV LIBCUDF_KERNEL_CACHE_PATH="/home/coder/cudf/cpp/build/${PYTHON_PACKAGE_MANAGER}/cuda-${CUDA_VERSION}/latest/jitify_cache"
17 changes: 17 additions & 0 deletions .github/workflows/auto-assign.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: "Auto Assign PR"

on:
pull_request_target:
types:
- opened
- reopened
- synchronize

jobs:
add_assignees:
runs-on: ubuntu-latest
steps:
- uses: actions-ecosystem/action-add-assignees@v1
with:
github_token: "${{ secrets.GITHUB_TOKEN }}"
assignees: ${{ github.actor }}
1 change: 1 addition & 0 deletions .github/workflows/labeler.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
name: "Pull Request Labeler"

on:
- pull_request_target

Expand Down
14 changes: 2 additions & 12 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,6 @@ repos:
^cpp/cmake/thirdparty/patches/.*|
^python/cudf/cudf/tests/data/subword_tokenizer_data/.*
)
- repo: https://github.com/PyCQA/isort
rev: 5.13.2
hooks:
- id: isort
# Use the config file specific to each subproject so that each
# project can specify its own first/third-party packages.
args: ["--config-root=python/", "--resolve-all-configs"]
files: python/.*
exclude: |
(?x)^(^python/cudf_polars/.*)
types_or: [python, cython, pyi]
- repo: https://github.com/MarcoGorelli/cython-lint
rev: v0.16.2
hooks:
Expand Down Expand Up @@ -150,6 +139,7 @@ repos:
rev: v0.4.8
hooks:
- id: ruff
args: ["--fix"]
files: python/.*$
- id: ruff-format
files: python/.*$
Expand All @@ -165,7 +155,7 @@ repos:
)
- id: verify-alpha-spec
- repo: https://github.com/rapidsai/dependency-file-generator
rev: v1.13.11
rev: v1.16.0
hooks:
- id: rapids-dependency-file-generator
args: ["--clean"]
Expand Down
5 changes: 3 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ conduct. More information can be found at:
8. Verify that CI passes all [status checks](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks).
Fix if needed.
9. Wait for other developers to review your code and update code as needed.
Changes to any C++ files require at least 2 approvals from the cudf-cpp-codeowners before merging.
10. Once reviewed and approved, a RAPIDS developer will merge your pull request.

If you are unsure about anything, don't hesitate to comment on issues and ask for clarification!
Expand Down Expand Up @@ -293,8 +294,8 @@ In order to run doxygen as a linter on C++/CUDA code, run
./ci/checks/doxygen.sh
```

Python code runs several linters including [Black](https://black.readthedocs.io/en/stable/),
[isort](https://pycqa.github.io/isort/), and [flake8](https://flake8.pycqa.org/en/latest/).
Python code runs several linters including [Ruff](https://docs.astral.sh/ruff/)
with its various rules for Black-like formatting or Isort.

cuDF also uses [codespell](https://github.com/codespell-project/codespell) to find spelling
mistakes, and this check is run as a pre-commit hook. To apply the suggested spelling fixes,
Expand Down
4 changes: 4 additions & 0 deletions ci/build_cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,12 @@ rapids-print-env

rapids-logger "Begin cpp build"

sccache --zero-stats

# With boa installed conda build forward to boa
RAPIDS_PACKAGE_VERSION=$(rapids-generate-version) rapids-conda-retry mambabuild \
conda/recipes/libcudf

sccache --show-adv-stats

rapids-upload-conda-to-s3 cpp
10 changes: 10 additions & 0 deletions ci/build_python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ rapids-logger "Begin py build"

CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)

sccache --zero-stats

# TODO: Remove `--no-test` flag once importing on a CPU
# node works correctly
# With boa installed conda build forwards to the boa builder
Expand All @@ -28,12 +30,18 @@ RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \
--channel "${CPP_CHANNEL}" \
conda/recipes/pylibcudf

sccache --show-adv-stats
sccache --zero-stats

RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \
--no-test \
--channel "${CPP_CHANNEL}" \
--channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
conda/recipes/cudf

sccache --show-adv-stats
sccache --zero-stats

RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \
--no-test \
--channel "${CPP_CHANNEL}" \
Expand All @@ -46,6 +54,8 @@ RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \
--channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
conda/recipes/cudf_kafka

sccache --show-adv-stats

RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \
--no-test \
--channel "${CPP_CHANNEL}" \
Expand Down
15 changes: 13 additions & 2 deletions ci/build_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@

set -euo pipefail

package_dir=$1
package_name=$1
package_dir=$2

source rapids-configure-sccache
source rapids-date-string
Expand All @@ -12,4 +13,14 @@ rapids-generate-version > ./VERSION

cd "${package_dir}"

python -m pip wheel . -w dist -v --no-deps --disable-pip-version-check
sccache --zero-stats

rapids-logger "Building '${package_name}' wheel"
python -m pip wheel \
-w dist \
-v \
--no-deps \
--disable-pip-version-check \
.

sccache --show-adv-stats
3 changes: 2 additions & 1 deletion ci/build_wheel_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,12 @@ echo "libcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/libcudf_dist/libcudf
echo "pylibcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/pylibcudf_dist/pylibcudf_*.whl)" >> /tmp/constraints.txt
export PIP_CONSTRAINT="/tmp/constraints.txt"

./ci/build_wheel.sh ${package_dir}
./ci/build_wheel.sh cudf ${package_dir}

python -m auditwheel repair \
--exclude libcudf.so \
--exclude libnvcomp.so \
--exclude libkvikio.so \
-w ${package_dir}/final_dist \
${package_dir}/dist/*

Expand Down
4 changes: 2 additions & 2 deletions ci/build_wheel_cudf_polars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -euo pipefail

package_dir="python/cudf_polars"

./ci/build_wheel.sh ${package_dir}
./ci/build_wheel.sh cudf-polars ${package_dir}

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 ${package_dir}/dist
RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 python ${package_dir}/dist
4 changes: 2 additions & 2 deletions ci/build_wheel_dask_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -euo pipefail

package_dir="python/dask_cudf"

./ci/build_wheel.sh ${package_dir}
./ci/build_wheel.sh dask-cudf ${package_dir}

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 ${package_dir}/dist
RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 python ${package_dir}/dist
25 changes: 23 additions & 2 deletions ci/build_wheel_libcudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,38 @@

set -euo pipefail

package_name="libcudf"
package_dir="python/libcudf"

rapids-logger "Generating build requirements"

rapids-dependency-file-generator \
--output requirements \
--file-key "py_build_${package_name}" \
--file-key "py_rapids_build_${package_name}" \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};cuda_suffixed=true" \
| tee /tmp/requirements-build.txt

rapids-logger "Installing build requirements"
python -m pip install \
-v \
--prefer-binary \
-r /tmp/requirements-build.txt

# build with '--no-build-isolation', for better sccache hit rate
# 0 really means "add --no-build-isolation" (ref: https://github.com/pypa/pip/issues/5735)
export PIP_NO_BUILD_ISOLATION=0

export SKBUILD_CMAKE_ARGS="-DUSE_NVCOMP_RUNTIME_WHEEL=ON"
./ci/build_wheel.sh ${package_dir}
./ci/build_wheel.sh "${package_name}" "${package_dir}"

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"

mkdir -p ${package_dir}/final_dist
python -m auditwheel repair \
--exclude libnvcomp.so.4 \
--exclude libkvikio.so \
-w ${package_dir}/final_dist \
${package_dir}/dist/*

RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp ${package_dir}/final_dist
RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp "${package_dir}/final_dist"
5 changes: 3 additions & 2 deletions ci/build_wheel_pylibcudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@ RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-f
echo "libcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/libcudf_dist/libcudf_*.whl)" > /tmp/constraints.txt
export PIP_CONSTRAINT="/tmp/constraints.txt"

./ci/build_wheel.sh ${package_dir}
./ci/build_wheel.sh pylibcudf ${package_dir}

python -m auditwheel repair \
--exclude libcudf.so \
--exclude libnvcomp.so \
--exclude libkvikio.so \
-w ${package_dir}/final_dist \
${package_dir}/dist/*

RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist
RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python ${package_dir}/final_dist
11 changes: 2 additions & 9 deletions ci/cudf_pandas_scripts/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,8 @@ else
RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist
RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist

echo "" > ./constraints.txt
if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
# `test_python_cudf_pandas` constraints are for `[test]` not `[cudf-pandas-tests]`
rapids-dependency-file-generator \
--output requirements \
--file-key test_python_cudf_pandas \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
| tee ./constraints.txt
fi
# generate constraints (possibly pinning to oldest support versions of dependencies)
rapids-generate-pip-constraints test_python_cudf_pandas ./constraints.txt

python -m pip install \
-v \
Expand Down
11 changes: 2 additions & 9 deletions ci/test_wheel_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,8 @@ RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels

rapids-logger "Install cudf, pylibcudf, and test requirements"

# Constrain to minimum dependency versions if job is set up as "oldest"
echo "" > ./constraints.txt
if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
rapids-dependency-file-generator \
--output requirements \
--file-key py_test_cudf \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
| tee ./constraints.txt
fi
# generate constraints (possibly pinning to oldest support versions of dependencies)
rapids-generate-pip-constraints py_test_cudf ./constraints.txt

# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install \
Expand Down
12 changes: 3 additions & 9 deletions ci/test_wheel_cudf_polars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,9 @@ RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-f
RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist

rapids-logger "Installing cudf_polars and its dependencies"
# Constraint to minimum dependency versions if job is set up as "oldest"
echo "" > ./constraints.txt
if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
rapids-dependency-file-generator \
--output requirements \
--file-key py_test_cudf_polars \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
| tee ./constraints.txt
fi

# generate constraints (possibly pinning to oldest support versions of dependencies)
rapids-generate-pip-constraints py_test_cudf_polars ./constraints.txt

# echo to expand wildcard before adding `[test]` requires for pip
python -m pip install \
Expand Down
12 changes: 3 additions & 9 deletions ci/test_wheel_dask_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,9 @@ RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-f
RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist

rapids-logger "Install dask_cudf, cudf, pylibcudf, and test requirements"
# Constraint to minimum dependency versions if job is set up as "oldest"
echo "" > ./constraints.txt
if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
rapids-dependency-file-generator \
--output requirements \
--file-key py_test_dask_cudf \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
| tee ./constraints.txt
fi

# generate constraints (possibly pinning to oldest support versions of dependencies)
rapids-generate-pip-constraints py_test_dask_cudf ./constraints.txt

# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install \
Expand Down
10 changes: 6 additions & 4 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ dependencies:
- cramjam
- cubinlinker
- cuda-nvtx=11.8
- cuda-python>=11.7.1,<12.0a0
- cuda-python>=11.7.1,<12.0a0,<=11.8.3
- cuda-sanitizer-api=11.8.86
- cuda-version=11.8
- cudatoolkit
Expand All @@ -46,6 +46,7 @@ dependencies:
- librdkafka>=2.5.0,<2.6.0a0
- librmm==24.12.*,>=0.0.0a0
- make
- mmh3
- moto>=4.0.8
- msgpack-python
- myst-nb
Expand All @@ -58,24 +59,25 @@ dependencies:
- numpy>=1.23,<3.0a0
- numpydoc
- nvcc_linux-64=11.8
- nvcomp==4.0.1
- nvcomp==4.1.0.6
- nvtx>=0.2.1
- openpyxl
- packaging
- pandas
- pandas>=2.0,<2.2.4dev0
- pandoc
- polars>=1.11,<1.12
- polars>=1.11,<1.13
- pre-commit
- ptxcompiler
- pyarrow>=14.0.0,<18.0.0a0
- pyarrow>=14.0.0,<19.0.0a0
- pydata-sphinx-theme!=0.14.2
- pytest-benchmark
- pytest-cases>=3.8.2
- pytest-cov
- pytest-xdist
- pytest<8
- python-confluent-kafka>=2.5.0,<2.6.0a0
- python-xxhash
- python>=3.10,<3.13
- pytorch>=2.1.0
- rapids-build-backend>=0.3.0,<0.4.0.dev0
Expand Down
Loading

0 comments on commit 181bf90

Please sign in to comment.