From 27b00d74169ac7756c48d7b6878d66fa5d678530 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 29 Nov 2024 23:34:39 -0600 Subject: [PATCH] [ci] [python-package] [R-package] adapt to scikit-learn check_sample_weight_equivalence changes, stop testing against R 3.6 on Linux (#6733) --- .ci/install-old-r-packages.R | 79 ------------------------------ .ci/test-r-package.sh | 21 ++------ .github/workflows/r_package.yml | 19 +------ python-package/lightgbm/sklearn.py | 18 ++++--- 4 files changed, 18 insertions(+), 119 deletions(-) delete mode 100644 .ci/install-old-r-packages.R diff --git a/.ci/install-old-r-packages.R b/.ci/install-old-r-packages.R deleted file mode 100644 index e402c4d5ca12..000000000000 --- a/.ci/install-old-r-packages.R +++ /dev/null @@ -1,79 +0,0 @@ -# [description] -# -# Installs a pinned set of packages that worked together -# as of the last R 3.6 release. -# - -.install_packages <- function(packages) { - install.packages( # nolint: undesirable_function - pkgs = paste( # nolint: paste - "https://cran.r-project.org/src/contrib/Archive" - , packages - , sep = "/" - ) - , dependencies = FALSE - , lib = Sys.getenv("R_LIBS") - , repos = NULL - ) -} - -# when confronted with a bunch of URLs like this, install.packages() sometimes -# struggles to determine install order... so install packages in batches here, -# starting from the root of the dependency graph and working up - -# there was only a single release of {praise}, so there is no contrib/Archive URL for it -install.packages( # nolint: undesirable_function - pkgs = "https://cran.r-project.org/src/contrib/praise_1.0.0.tar.gz" - , dependencies = FALSE - , lib = Sys.getenv("R_LIBS") - , repos = NULL -) - -.install_packages(c( - "brio/brio_1.1.4.tar.gz" # nolint: non_portable_path - , "cli/cli_3.6.2.tar.gz" # nolint: non_portable_path - , "crayon/crayon_1.5.2.tar.gz" # nolint: non_portable_path - , "digest/digest_0.6.36.tar.gz" # nolint: non_portable_path - , "evaluate/evaluate_0.23.tar.gz" # nolint: non_portable_path - , "fansi/fansi_1.0.5.tar.gz" # nolint: non_portable_path - , "fs/fs_1.6.4.tar.gz" # nolint: non_portable_path - , "glue/glue_1.7.0.tar.gz" # nolint: non_portable_path - , "jsonlite/jsonlite_1.8.8.tar.gz" # nolint: non_portable_path - , "lattice/lattice_0.20-41.tar.gz" # nolint: non_portable_path - , "magrittr/magrittr_2.0.2.tar.gz" # nolint: non_portable_path - , "pkgconfig/pkgconfig_2.0.2.tar.gz" # nolint: non_portable_path - , "ps/ps_1.8.0.tar.gz" # nolint: non_portable_path - , "R6/R6_2.5.0.tar.gz" # nolint: non_portable_path - , "rlang/rlang_1.1.3.tar.gz" # nolint: non_portable_path - , "rprojroot/rprojroot_2.0.3.tar.gz" # nolint: non_portable_path - , "utf8/utf8_1.2.3.tar.gz" # nolint: non_portable_path - , "withr/withr_3.0.1.tar.gz" # nolint: non_portable_path -)) - -.install_packages(c( - "desc/desc_1.4.2.tar.gz" # nolint: non_portable_path - , "diffobj/diffobj_0.3.4.tar.gz" # nolint: non_portable_path - , "lifecycle/lifecycle_1.0.3.tar.gz" # nolint: non_portable_path - , "processx/processx_3.8.3.tar.gz" # nolint: non_portable_path -)) - -.install_packages(c( - "callr/callr_3.7.5.tar.gz" # nolint: non_portable_path - , "vctrs/vctrs_0.6.4.tar.gz" # nolint: non_portable_path -)) - -.install_packages(c( - "pillar/pillar_1.8.1.tar.gz" # nolint: non_portable_path - , "tibble/tibble_3.2.0.tar.gz" # nolint: non_portable_path -)) - -.install_packages(c( - "pkgbuild/pkgbuild_1.4.4.tar.gz" # nolint: non_portable_path - , "rematch2/rematch2_2.1.1.tar.gz" # nolint: non_portable_path - , "waldo/waldo_0.5.3.tar.gz" # nolint: non_portable_path -)) - -.install_packages(c( - "pkgload/pkgload_1.3.4.tar.gz" # nolint: non_portable_path - , "testthat/testthat_3.2.1.tar.gz" # nolint: non_portable_path -)) diff --git a/.ci/test-r-package.sh b/.ci/test-r-package.sh index a076fab0186c..2e414ec0d282 100755 --- a/.ci/test-r-package.sh +++ b/.ci/test-r-package.sh @@ -20,12 +20,7 @@ fi # Get details needed for installing R components R_MAJOR_VERSION="${R_VERSION%.*}" -if [[ "${R_MAJOR_VERSION}" == "3" ]]; then - export R_MAC_VERSION=3.6.3 - export R_MAC_PKG_URL=${CRAN_MIRROR}/bin/macosx/R-${R_MAC_VERSION}.nn.pkg - export R_LINUX_VERSION="3.6.3-1bionic" - export R_APT_REPO="bionic-cran35/" -elif [[ "${R_MAJOR_VERSION}" == "4" ]]; then +if [[ "${R_MAJOR_VERSION}" == "4" ]]; then export R_MAC_VERSION=4.3.1 export R_MAC_PKG_URL=${CRAN_MIRROR}/bin/macosx/big-sur-${ARCH}/base/R-${R_MAC_VERSION}-${ARCH}.pkg export R_LINUX_VERSION="4.3.1-1.2204.0" @@ -108,16 +103,10 @@ if [[ $OS_NAME == "macos" ]]; then export R_TIDYCMD=/usr/local/bin/tidy fi -# fix for issue where CRAN was not returning {evaluate}, {lattice}, or {waldo} when using R 3.6 -# "Warning: dependency ‘lattice’ is not available" -if [[ "${R_MAJOR_VERSION}" == "3" ]]; then - Rscript --vanilla ./.ci/install-old-r-packages.R -else - # {Matrix} needs {lattice}, so this needs to run before manually installing {Matrix}. - # This should be unnecessary on R >=4.4.0 - # ref: https://github.com/microsoft/LightGBM/issues/6433 - Rscript --vanilla -e "install.packages('lattice', repos = '${CRAN_MIRROR}', lib = '${R_LIB_PATH}')" -fi +# {Matrix} needs {lattice}, so this needs to run before manually installing {Matrix}. +# This should be unnecessary on R >=4.4.0 +# ref: https://github.com/microsoft/LightGBM/issues/6433 +Rscript --vanilla -e "install.packages('lattice', repos = '${CRAN_MIRROR}', lib = '${R_LIB_PATH}')" # manually install {Matrix}, as {Matrix}=1.7-0 raised its R floor all the way to R 4.4.0 # ref: https://github.com/microsoft/LightGBM/issues/6433 diff --git a/.github/workflows/r_package.yml b/.github/workflows/r_package.yml index 8811f53b61c0..66e05a18ba1f 100644 --- a/.github/workflows/r_package.yml +++ b/.github/workflows/r_package.yml @@ -14,10 +14,6 @@ concurrency: cancel-in-progress: true env: - # https://github.com/actions/checkout/issues/1590#issuecomment-2207052044 - # - # this could be removed (hopefully) when R 3.6 support is removed - ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true # in CMake-driven builds, parallelize compilation CMAKE_BUILD_PARALLEL_LEVEL: 4 # on Debian-based images, avoid interactive prompts @@ -48,12 +44,6 @@ jobs: ################ # CMake builds # ################ - - os: ubuntu-latest - task: r-package - compiler: gcc - r_version: 3.6 - build_type: cmake - container: 'ubuntu:18.04' - os: ubuntu-latest task: r-package compiler: gcc @@ -174,19 +164,12 @@ jobs: run: | git config --global --add safe.directory "${GITHUB_WORKSPACE}" - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 5 submodules: true - name: Install pandoc uses: r-lib/actions/setup-pandoc@v2 - if: matrix.container != 'ubuntu:18.04' - # R 3.6 binary isn't easily available on Ubuntu 18.04, - # but setup-pandoc>=2.7.1 is uses a too-new glibc for it. - # ref: https://github.com/microsoft/LightGBM/issues/6298 - - name: Install pandoc - uses: r-lib/actions/setup-pandoc@v2.6.0 - if: matrix.container == 'ubuntu:18.04' - name: Install tinytex if: startsWith(matrix.os, 'windows') uses: r-lib/actions/setup-tinytex@v2 diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index 614e3c3cbe7f..d730b66c3556 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -673,6 +673,15 @@ def __init__( # is >=1.6. # ref: https://github.com/microsoft/LightGBM/pull/6651 def _more_tags(self) -> Dict[str, Any]: + check_sample_weight_str = ( + "In LightGBM, setting a sample's weight to 0 can produce a different result than omitting the sample. " + "Such samples intentionally still affect count-based measures like 'min_data_in_leaf' " + "(https://github.com/microsoft/LightGBM/issues/5626#issuecomment-1712706678) and the estimated distribution " + "of features for Dataset construction (see https://github.com/microsoft/LightGBM/issues/5553)." + ) + # "check_sample_weight_equivalence" can be removed when lightgbm's + # minimum supported scikit-learn version is at least 1.6 + # ref: https://github.com/scikit-learn/scikit-learn/pull/30137 return { "allow_nan": True, "X_types": ["2darray", "sparse", "1dlabels"], @@ -680,12 +689,9 @@ def _more_tags(self) -> Dict[str, Any]: "check_no_attributes_set_in_init": "scikit-learn incorrectly asserts that private attributes " "cannot be set in __init__: " "(see https://github.com/microsoft/LightGBM/issues/2628)", - "check_sample_weight_equivalence": ( - "In LightGBM, setting a sample's weight to 0 can produce a different result than omitting the sample. " - "Such samples intentionally still affect count-based measures like 'min_data_in_leaf' " - "(https://github.com/microsoft/LightGBM/issues/5626#issuecomment-1712706678) and the estimated distribution " - "of features for Dataset construction (see https://github.com/microsoft/LightGBM/issues/5553)." - ), + "check_sample_weight_equivalence": check_sample_weight_str, + "check_sample_weight_equivalence_on_dense_data": check_sample_weight_str, + "check_sample_weight_equivalence_on_sparse_data": check_sample_weight_str, }, }