From 5f4500ec896229c1a138daf165b3751a2af042aa Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Wed, 24 Nov 2021 04:13:09 +0300 Subject: [PATCH 01/15] add C API function that returns all param names with aliases --- helpers/parameter_generator.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/helpers/parameter_generator.py b/helpers/parameter_generator.py index ccb6675f37af..bff37fed9284 100644 --- a/helpers/parameter_generator.py +++ b/helpers/parameter_generator.py @@ -6,6 +6,7 @@ along with parameters description in LightGBM/docs/Parameters.rst file from the information in LightGBM/include/LightGBM/config.h file. """ +from collections import defaultdict from pathlib import Path from typing import Any, Dict, List, Optional, Tuple @@ -291,6 +292,7 @@ def gen_parameter_code( keys, infos = get_parameter_infos(config_hpp) names = get_names(infos) alias = get_alias(infos) + names_with_aliases = defaultdict(list) str_to_write = r"""/*! * Copyright (c) 2018 Microsoft Corporation. All rights reserved. * Licensed under the MIT License. See LICENSE file in the project root for license information. @@ -306,6 +308,7 @@ def gen_parameter_code( for pair in alias: str_to_write += f' {{"{pair[0]}", "{pair[1]}"}},\n' + names_with_aliases[pair[1]].append(pair[0]) str_to_write += " });\n" str_to_write += " return aliases;\n" str_to_write += "}\n\n" @@ -353,6 +356,21 @@ def gen_parameter_code( # tails str_to_write += " return str_buf.str();\n" str_to_write += "}\n\n" + + str_to_write += "std::string Config::DumpAliases() const {\n" + str_to_write += " std::stringstream str_buf;\n" + str_to_write += ' str_buf << "{";\n' + for idx, name in enumerate(names): + if idx > 0: + str_to_write += ', ";\n' + aliases = '\\", \\"'.join([alias for alias in names_with_aliases[name]]) + aliases = f'[\\"{aliases}\\"]' if aliases else '[]' + str_to_write += f' str_buf << "\\"{name}\\": {aliases}' + str_to_write += '";\n' + str_to_write += ' str_buf << "}";\n' + str_to_write += " return str_buf.str();\n" + str_to_write += "}\n\n" + str_to_write += "} // namespace LightGBM\n" with open(config_out_cpp, "w") as config_out_cpp_file: config_out_cpp_file.write(str_to_write) From db64868e4a26b1a055ccb821b1a64fbd4985aeb5 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Wed, 24 Nov 2021 04:13:51 +0300 Subject: [PATCH 02/15] add C API function that returns all param names with aliases --- .vsts-ci.yml | 345 +----------------------- include/LightGBM/c_api.h | 11 + include/LightGBM/config.h | 1 + python-package/lightgbm/basic.py | 111 ++------ src/c_api.cpp | 12 + src/io/config_auto.cpp | 139 ++++++++++ tests/python_package_test/test_basic.py | 11 + 7 files changed, 196 insertions(+), 434 deletions(-) diff --git a/.vsts-ci.yml b/.vsts-ci.yml index 25dc240282a7..5230f789bd8f 100644 --- a/.vsts-ci.yml +++ b/.vsts-ci.yml @@ -1,7 +1,7 @@ trigger: branches: include: - - master + - param_aliases tags: include: - v* @@ -22,238 +22,6 @@ resources: image: wch1/r-debug jobs: ########################################### -- job: Linux -########################################### - variables: - COMPILER: gcc - SETUP_CONDA: 'false' - OS_NAME: 'linux' - PRODUCES_ARTIFACTS: 'true' - pool: sh-ubuntu - container: ubuntu1404 - strategy: - matrix: - regular: - TASK: regular - sdist: - TASK: sdist - PYTHON_VERSION: 3.7 - bdist: - TASK: bdist - inference: - TASK: if-else - mpi_source: - TASK: mpi - METHOD: source - PYTHON_VERSION: 3.8 - gpu_source: - TASK: gpu - METHOD: source - swig: - TASK: swig - steps: - - script: | - echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY" - echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)" - echo "##vso[task.prependpath]$CONDA/bin" - AMDAPPSDK_PATH=$BUILD_SOURCESDIRECTORY/AMDAPPSDK - echo "##vso[task.setvariable variable=AMDAPPSDK_PATH]$AMDAPPSDK_PATH" - LD_LIBRARY_PATH=$AMDAPPSDK_PATH/lib/x86_64:$LD_LIBRARY_PATH - echo "##vso[task.setvariable variable=LD_LIBRARY_PATH]$LD_LIBRARY_PATH" - echo "##vso[task.setvariable variable=OPENCL_VENDOR_PATH]$AMDAPPSDK_PATH/etc/OpenCL/vendors" - displayName: 'Set variables' - - bash: $(Build.SourcesDirectory)/.ci/setup.sh - displayName: Setup - - bash: $(Build.SourcesDirectory)/.ci/test.sh - displayName: Test - - task: PublishBuildArtifacts@1 - condition: and(succeeded(), in(variables['TASK'], 'regular', 'sdist', 'bdist', 'swig'), not(startsWith(variables['Build.SourceBranch'], 'refs/pull/'))) - inputs: - pathtoPublish: '$(Build.ArtifactStagingDirectory)' - artifactName: PackageAssets - artifactType: container -########################################### -- job: Linux_latest -########################################### - variables: - COMPILER: clang - DEBIAN_FRONTEND: 'noninteractive' - IN_UBUNTU_LATEST_CONTAINER: 'true' - OS_NAME: 'linux' - SETUP_CONDA: 'true' - pool: sh-ubuntu - container: ubuntu-latest - strategy: - matrix: - regular: - TASK: regular - PYTHON_VERSION: 3.6 - sdist: - TASK: sdist - bdist: - TASK: bdist - PYTHON_VERSION: 3.8 - inference: - TASK: if-else - mpi_source: - TASK: mpi - METHOD: source - mpi_pip: - TASK: mpi - METHOD: pip - PYTHON_VERSION: 3.8 - mpi_wheel: - TASK: mpi - METHOD: wheel - PYTHON_VERSION: 3.7 - gpu_source: - TASK: gpu - METHOD: source - gpu_pip: - TASK: gpu - METHOD: pip - PYTHON_VERSION: 3.6 - gpu_wheel: - TASK: gpu - METHOD: wheel - PYTHON_VERSION: 3.7 - cpp_tests: - TASK: cpp-tests - METHOD: with-sanitizers - steps: - - script: | - echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY" - echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)" - CONDA=$HOME/miniconda - echo "##vso[task.setvariable variable=CONDA]$CONDA" - echo "##vso[task.prependpath]$CONDA/bin" - AMDAPPSDK_PATH=$BUILD_SOURCESDIRECTORY/AMDAPPSDK - echo "##vso[task.setvariable variable=AMDAPPSDK_PATH]$AMDAPPSDK_PATH" - LD_LIBRARY_PATH=$AMDAPPSDK_PATH/lib/x86_64:$LD_LIBRARY_PATH - echo "##vso[task.setvariable variable=LD_LIBRARY_PATH]$LD_LIBRARY_PATH" - echo "##vso[task.setvariable variable=OPENCL_VENDOR_PATH]$AMDAPPSDK_PATH/etc/OpenCL/vendors" - displayName: 'Set variables' - # https://github.com/microsoft/azure-pipelines-agent/issues/2043#issuecomment-687983301 - - script: | - /tmp/docker exec -t -u 0 ci-container \ - sh -c "apt-get update && apt-get -o Dpkg::Options::="--force-confold" -y install sudo" - displayName: 'Install sudo' - - bash: $(Build.SourcesDirectory)/.ci/setup.sh - displayName: Setup - - bash: $(Build.SourcesDirectory)/.ci/test.sh - displayName: Test -########################################### -- job: QEMU_multiarch -########################################### - variables: - COMPILER: gcc - OS_NAME: 'linux' - PRODUCES_ARTIFACTS: 'true' - pool: - vmImage: ubuntu-latest - timeoutInMinutes: 180 - strategy: - matrix: - bdist: - TASK: bdist - ARCH: aarch64 - steps: - - script: | - sudo apt-get update - sudo apt-get install --no-install-recommends -y \ - binfmt-support \ - qemu \ - qemu-user \ - qemu-user-static - displayName: 'Install QEMU' - - script: | - docker run --rm --privileged multiarch/qemu-user-static --reset -p yes - displayName: 'Enable Docker multi-architecture support' - - script: | - export ROOT_DOCKER_FOLDER=/LightGBM - cat > docker.env < docker-script.sh <& parameter_set(); std::vector> auc_mu_weights_matrix; std::vector> interaction_constraints_vector; + static std::string DumpAliases(); private: void CheckParamConflict(); diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index d878d32f09ae..083295355a56 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -324,96 +324,27 @@ class LGBMDeprecationWarning(UserWarning): class _ConfigAliases: - aliases = {"bin_construct_sample_cnt": {"bin_construct_sample_cnt", - "subsample_for_bin"}, - "boosting": {"boosting", - "boosting_type", - "boost"}, - "categorical_feature": {"categorical_feature", - "cat_feature", - "categorical_column", - "cat_column", - "categorical_features"}, - "data_random_seed": {"data_random_seed", - "data_seed"}, - "early_stopping_round": {"early_stopping_round", - "early_stopping_rounds", - "early_stopping", - "n_iter_no_change"}, - "enable_bundle": {"enable_bundle", - "is_enable_bundle", - "bundle"}, - "eval_at": {"eval_at", - "ndcg_eval_at", - "ndcg_at", - "map_eval_at", - "map_at"}, - "group_column": {"group_column", - "group", - "group_id", - "query_column", - "query", - "query_id"}, - "header": {"header", - "has_header"}, - "ignore_column": {"ignore_column", - "ignore_feature", - "blacklist"}, - "is_enable_sparse": {"is_enable_sparse", - "is_sparse", - "enable_sparse", - "sparse"}, - "label_column": {"label_column", - "label"}, - "linear_tree": {"linear_tree", - "linear_trees"}, - "local_listen_port": {"local_listen_port", - "local_port", - "port"}, - "machines": {"machines", - "workers", - "nodes"}, - "max_bin": {"max_bin", - "max_bins"}, - "metric": {"metric", - "metrics", - "metric_types"}, - "num_class": {"num_class", - "num_classes"}, - "num_iterations": {"num_iterations", - "num_iteration", - "n_iter", - "num_tree", - "num_trees", - "num_round", - "num_rounds", - "nrounds", - "num_boost_round", - "n_estimators", - "max_iter"}, - "num_machines": {"num_machines", - "num_machine"}, - "num_threads": {"num_threads", - "num_thread", - "nthread", - "nthreads", - "n_jobs"}, - "objective": {"objective", - "objective_type", - "app", - "application", - "loss"}, - "pre_partition": {"pre_partition", - "is_pre_partition"}, - "tree_learner": {"tree_learner", - "tree", - "tree_type", - "tree_learner_type"}, - "two_round": {"two_round", - "two_round_loading", - "use_two_round_loading"}, - "weight_column": {"weight_column", - "weight"}} + buffer_len = 1 << 20 + tmp_out_len = ctypes.c_int64(0) + string_buffer = ctypes.create_string_buffer(buffer_len) + ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)]) + _safe_call(_LIB.LGBM_DumpParamAliases( + ctypes.c_int64(buffer_len), + ctypes.byref(tmp_out_len), + ptr_string_buffer)) + actual_len = tmp_out_len.value + # if buffer length is not long enough, re-allocate a buffer + if actual_len > buffer_len: + string_buffer = ctypes.create_string_buffer(actual_len) + ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)]) + _safe_call(_LIB.LGBM_DumpParamAliases( + ctypes.c_int64(actual_len), + ctypes.byref(tmp_out_len), + ptr_string_buffer)) + aliases = json.loads( + string_buffer.value.decode('utf-8'), + object_hook=lambda obj: {k: set(v) | {k} for k, v in obj.items()} + ) @classmethod def get(cls, *args): diff --git a/src/c_api.cpp b/src/c_api.cpp index 9a471815b799..d8a8deaf57b0 100644 --- a/src/c_api.cpp +++ b/src/c_api.cpp @@ -888,6 +888,18 @@ const char* LGBM_GetLastError() { return LastErrorMsg(); } +int LGBM_DumpParamAliases(int64_t buffer_len, + int64_t* out_len, + char* out_str) { + API_BEGIN(); + std::string aliases = Config::DumpAliases(); + *out_len = static_cast(aliases.size()) + 1; + if (*out_len <= buffer_len) { + std::memcpy(out_str, aliases.c_str(), *out_len); + } + API_END(); +} + int LGBM_RegisterLogCallback(void (*callback)(const char*)) { API_BEGIN(); Log::ResetCallBack(callback); diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp index 682264358893..15d3e9e351c4 100644 --- a/src/io/config_auto.cpp +++ b/src/io/config_auto.cpp @@ -756,4 +756,143 @@ std::string Config::SaveMembersToString() const { return str_buf.str(); } +std::string Config::DumpAliases() { + std::stringstream str_buf; + str_buf << "{"; + str_buf << "\"config\": [\"config_file\"], "; + str_buf << "\"task\": [\"task_type\"], "; + str_buf << "\"objective\": [\"objective_type\", \"app\", \"application\", \"loss\"], "; + str_buf << "\"boosting\": [\"boosting_type\", \"boost\"], "; + str_buf << "\"data\": [\"train\", \"train_data\", \"train_data_file\", \"data_filename\"], "; + str_buf << "\"valid\": [\"test\", \"valid_data\", \"valid_data_file\", \"test_data\", \"test_data_file\", \"valid_filenames\"], "; + str_buf << "\"num_iterations\": [\"num_iteration\", \"n_iter\", \"num_tree\", \"num_trees\", \"num_round\", \"num_rounds\", \"nrounds\", \"num_boost_round\", \"n_estimators\", \"max_iter\"], "; + str_buf << "\"learning_rate\": [\"shrinkage_rate\", \"eta\"], "; + str_buf << "\"num_leaves\": [\"num_leaf\", \"max_leaves\", \"max_leaf\", \"max_leaf_nodes\"], "; + str_buf << "\"tree_learner\": [\"tree\", \"tree_type\", \"tree_learner_type\"], "; + str_buf << "\"num_threads\": [\"num_thread\", \"nthread\", \"nthreads\", \"n_jobs\"], "; + str_buf << "\"device_type\": [\"device\"], "; + str_buf << "\"seed\": [\"random_seed\", \"random_state\"], "; + str_buf << "\"deterministic\": [], "; + str_buf << "\"force_col_wise\": [], "; + str_buf << "\"force_row_wise\": [], "; + str_buf << "\"histogram_pool_size\": [\"hist_pool_size\"], "; + str_buf << "\"max_depth\": [], "; + str_buf << "\"min_data_in_leaf\": [\"min_data_per_leaf\", \"min_data\", \"min_child_samples\", \"min_samples_leaf\"], "; + str_buf << "\"min_sum_hessian_in_leaf\": [\"min_sum_hessian_per_leaf\", \"min_sum_hessian\", \"min_hessian\", \"min_child_weight\"], "; + str_buf << "\"bagging_fraction\": [\"sub_row\", \"subsample\", \"bagging\"], "; + str_buf << "\"pos_bagging_fraction\": [\"pos_sub_row\", \"pos_subsample\", \"pos_bagging\"], "; + str_buf << "\"neg_bagging_fraction\": [\"neg_sub_row\", \"neg_subsample\", \"neg_bagging\"], "; + str_buf << "\"bagging_freq\": [\"subsample_freq\"], "; + str_buf << "\"bagging_seed\": [\"bagging_fraction_seed\"], "; + str_buf << "\"feature_fraction\": [\"sub_feature\", \"colsample_bytree\"], "; + str_buf << "\"feature_fraction_bynode\": [\"sub_feature_bynode\", \"colsample_bynode\"], "; + str_buf << "\"feature_fraction_seed\": [], "; + str_buf << "\"extra_trees\": [\"extra_tree\"], "; + str_buf << "\"extra_seed\": [], "; + str_buf << "\"early_stopping_round\": [\"early_stopping_rounds\", \"early_stopping\", \"n_iter_no_change\"], "; + str_buf << "\"first_metric_only\": [], "; + str_buf << "\"max_delta_step\": [\"max_tree_output\", \"max_leaf_output\"], "; + str_buf << "\"lambda_l1\": [\"reg_alpha\", \"l1_regularization\"], "; + str_buf << "\"lambda_l2\": [\"reg_lambda\", \"lambda\", \"l2_regularization\"], "; + str_buf << "\"linear_lambda\": [], "; + str_buf << "\"min_gain_to_split\": [\"min_split_gain\"], "; + str_buf << "\"drop_rate\": [\"rate_drop\"], "; + str_buf << "\"max_drop\": [], "; + str_buf << "\"skip_drop\": [], "; + str_buf << "\"xgboost_dart_mode\": [], "; + str_buf << "\"uniform_drop\": [], "; + str_buf << "\"drop_seed\": [], "; + str_buf << "\"top_rate\": [], "; + str_buf << "\"other_rate\": [], "; + str_buf << "\"min_data_per_group\": [], "; + str_buf << "\"max_cat_threshold\": [], "; + str_buf << "\"cat_l2\": [], "; + str_buf << "\"cat_smooth\": [], "; + str_buf << "\"max_cat_to_onehot\": [], "; + str_buf << "\"top_k\": [\"topk\"], "; + str_buf << "\"monotone_constraints\": [\"mc\", \"monotone_constraint\", \"monotonic_cst\"], "; + str_buf << "\"monotone_constraints_method\": [\"monotone_constraining_method\", \"mc_method\"], "; + str_buf << "\"monotone_penalty\": [\"monotone_splits_penalty\", \"ms_penalty\", \"mc_penalty\"], "; + str_buf << "\"feature_contri\": [\"feature_contrib\", \"fc\", \"fp\", \"feature_penalty\"], "; + str_buf << "\"forcedsplits_filename\": [\"fs\", \"forced_splits_filename\", \"forced_splits_file\", \"forced_splits\"], "; + str_buf << "\"refit_decay_rate\": [], "; + str_buf << "\"cegb_tradeoff\": [], "; + str_buf << "\"cegb_penalty_split\": [], "; + str_buf << "\"cegb_penalty_feature_lazy\": [], "; + str_buf << "\"cegb_penalty_feature_coupled\": [], "; + str_buf << "\"path_smooth\": [], "; + str_buf << "\"interaction_constraints\": [], "; + str_buf << "\"verbosity\": [\"verbose\"], "; + str_buf << "\"input_model\": [\"model_input\", \"model_in\"], "; + str_buf << "\"output_model\": [\"model_output\", \"model_out\"], "; + str_buf << "\"saved_feature_importance_type\": [], "; + str_buf << "\"snapshot_freq\": [\"save_period\"], "; + str_buf << "\"linear_tree\": [\"linear_trees\"], "; + str_buf << "\"max_bin\": [\"max_bins\"], "; + str_buf << "\"max_bin_by_feature\": [], "; + str_buf << "\"min_data_in_bin\": [], "; + str_buf << "\"bin_construct_sample_cnt\": [\"subsample_for_bin\"], "; + str_buf << "\"data_random_seed\": [\"data_seed\"], "; + str_buf << "\"is_enable_sparse\": [\"is_sparse\", \"enable_sparse\", \"sparse\"], "; + str_buf << "\"enable_bundle\": [\"is_enable_bundle\", \"bundle\"], "; + str_buf << "\"use_missing\": [], "; + str_buf << "\"zero_as_missing\": [], "; + str_buf << "\"feature_pre_filter\": [], "; + str_buf << "\"pre_partition\": [\"is_pre_partition\"], "; + str_buf << "\"two_round\": [\"two_round_loading\", \"use_two_round_loading\"], "; + str_buf << "\"header\": [\"has_header\"], "; + str_buf << "\"label_column\": [\"label\"], "; + str_buf << "\"weight_column\": [\"weight\"], "; + str_buf << "\"group_column\": [\"group\", \"group_id\", \"query_column\", \"query\", \"query_id\"], "; + str_buf << "\"ignore_column\": [\"ignore_feature\", \"blacklist\"], "; + str_buf << "\"categorical_feature\": [\"cat_feature\", \"categorical_column\", \"cat_column\", \"categorical_features\"], "; + str_buf << "\"forcedbins_filename\": [], "; + str_buf << "\"save_binary\": [\"is_save_binary\", \"is_save_binary_file\"], "; + str_buf << "\"precise_float_parser\": [], "; + str_buf << "\"parser_config_file\": [], "; + str_buf << "\"start_iteration_predict\": [], "; + str_buf << "\"num_iteration_predict\": [], "; + str_buf << "\"predict_raw_score\": [\"is_predict_raw_score\", \"predict_rawscore\", \"raw_score\"], "; + str_buf << "\"predict_leaf_index\": [\"is_predict_leaf_index\", \"leaf_index\"], "; + str_buf << "\"predict_contrib\": [\"is_predict_contrib\", \"contrib\"], "; + str_buf << "\"predict_disable_shape_check\": [], "; + str_buf << "\"pred_early_stop\": [], "; + str_buf << "\"pred_early_stop_freq\": [], "; + str_buf << "\"pred_early_stop_margin\": [], "; + str_buf << "\"output_result\": [\"predict_result\", \"prediction_result\", \"predict_name\", \"prediction_name\", \"pred_name\", \"name_pred\"], "; + str_buf << "\"convert_model_language\": [], "; + str_buf << "\"convert_model\": [\"convert_model_file\"], "; + str_buf << "\"objective_seed\": [], "; + str_buf << "\"num_class\": [\"num_classes\"], "; + str_buf << "\"is_unbalance\": [\"unbalance\", \"unbalanced_sets\"], "; + str_buf << "\"scale_pos_weight\": [], "; + str_buf << "\"sigmoid\": [], "; + str_buf << "\"boost_from_average\": [], "; + str_buf << "\"reg_sqrt\": [], "; + str_buf << "\"alpha\": [], "; + str_buf << "\"fair_c\": [], "; + str_buf << "\"poisson_max_delta_step\": [], "; + str_buf << "\"tweedie_variance_power\": [], "; + str_buf << "\"lambdarank_truncation_level\": [], "; + str_buf << "\"lambdarank_norm\": [], "; + str_buf << "\"label_gain\": [], "; + str_buf << "\"metric\": [\"metrics\", \"metric_types\"], "; + str_buf << "\"metric_freq\": [\"output_freq\"], "; + str_buf << "\"is_provide_training_metric\": [\"training_metric\", \"is_training_metric\", \"train_metric\"], "; + str_buf << "\"eval_at\": [\"ndcg_eval_at\", \"ndcg_at\", \"map_eval_at\", \"map_at\"], "; + str_buf << "\"multi_error_top_k\": [], "; + str_buf << "\"auc_mu_weights\": [], "; + str_buf << "\"num_machines\": [\"num_machine\"], "; + str_buf << "\"local_listen_port\": [\"local_port\", \"port\"], "; + str_buf << "\"time_out\": [], "; + str_buf << "\"machine_list_filename\": [\"machine_list_file\", \"machine_list\", \"mlist\"], "; + str_buf << "\"machines\": [\"workers\", \"nodes\"], "; + str_buf << "\"gpu_platform_id\": [], "; + str_buf << "\"gpu_device_id\": [], "; + str_buf << "\"gpu_use_dp\": [], "; + str_buf << "\"num_gpu\": []"; + str_buf << "}"; + return str_buf.str(); +} + } // namespace LightGBM diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py index 40ad062fb8a7..486ee2692cca 100644 --- a/tests/python_package_test/test_basic.py +++ b/tests/python_package_test/test_basic.py @@ -569,3 +569,14 @@ def test_smoke_custom_parser(tmp_path): with pytest.raises(lgb.basic.LightGBMError, match="Cannot find parser class 'dummy', please register first or check config format"): data.construct() + + +def test_param_aliases(): + aliases = lgb.basic._ConfigAliases.aliases + assert isinstance(aliases, dict) + assert len(aliases) > 100 + assert all(isinstance(i, set) for i in aliases.values()) + assert all(len(i) >= 1 for i in aliases.values()) + assert all(k in v for k, v in aliases.items()) + assert aliases['config'] == {'config', 'config_file'} + assert aliases['task'] == {'task', 'task_type'} From cdff5e8fee9233cb1cb3e001a638aab3b01e9593 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Sat, 27 Nov 2021 02:01:46 +0300 Subject: [PATCH 03/15] add R code --- R-package/R/aliases.R | 135 +++++---------------- R-package/src/lightgbm_R.cpp | 21 ++++ R-package/src/lightgbm_R.h | 6 + R-package/tests/testthat/test_parameters.R | 1 + 4 files changed, 59 insertions(+), 104 deletions(-) diff --git a/R-package/R/aliases.R b/R-package/R/aliases.R index 7cd3245727af..cbc76cd96f95 100644 --- a/R-package/R/aliases.R +++ b/R-package/R/aliases.R @@ -7,85 +7,30 @@ # [return] A named list, where each key is a parameter relevant to lgb.Dataset and each value is a character # vector of corresponding aliases. .DATASET_PARAMETERS <- function() { - return( - list( - "bin_construct_sample_cnt" = c( - "bin_construct_sample_cnt" - , "subsample_for_bin" - ) - , "categorical_feature" = c( - "categorical_feature" - , "cat_feature" - , "categorical_column" - , "cat_column" - , "categorical_features" - ) - , "data_random_seed" = c( - "data_random_seed" - , "data_seed" - ) - , "enable_bundle" = c( - "enable_bundle" - , "is_enable_bundle" - , "bundle" - ) - , "feature_pre_filter" = "feature_pre_filter" - , "forcedbins_filename" = "forcedbins_filename" - , "group_column" = c( - "group_column" - , "group" - , "group_id" - , "query_column" - , "query" - , "query_id" - ) - , "header" = c( - "header" - , "has_header" - ) - , "ignore_column" = c( - "ignore_column" - , "ignore_feature" - , "blacklist" - ) - , "is_enable_sparse" = c( - "is_enable_sparse" - , "is_sparse" - , "enable_sparse" - , "sparse" - ) - , "label_column" = c( - "label_column" - , "label" - ) - , "linear_tree" = c( - "linear_tree" - , "linear_trees" - ) - , "max_bin" = c( - "max_bin" - , "max_bins" - ) - , "max_bin_by_feature" = "max_bin_by_feature" - , "min_data_in_bin" = "min_data_in_bin" - , "pre_partition" = c( - "pre_partition" - , "is_pre_partition" - ) - , "precise_float_parser" = "precise_float_parser" - , "two_round" = c( - "two_round" - , "two_round_loading" - , "use_two_round_loading" - ) - , "use_missing" = "use_missing" - , "weight_column" = c( - "weight_column" - , "weight" - ) - , "zero_as_missing" = "zero_as_missing" - ) - ) + all_aliases <- .PARAMETER_ALIASES() + return(all_aliases[c( + "bin_construct_sample_cnt" + , "categorical_feature" + , "data_random_seed" + , "enable_bundle" + , "feature_pre_filter" + , "forcedbins_filename" + , "group_column" + , "header" + , "ignore_column" + , "is_enable_sparse" + , "label_column" + , "linear_tree" + , "max_bin" + , "max_bin_by_feature" + , "min_data_in_bin" + , "pre_partition" + , "precise_float_parser" + , "two_round" + , "use_missing" + , "weight_column" + , "zero_as_missing" + )]) } # [description] List of respected parameter aliases. Wrapped in a function to take advantage of @@ -93,33 +38,15 @@ # [return] A named list, where each key is a main LightGBM parameter and each value is a character # vector of corresponding aliases. .PARAMETER_ALIASES <- function() { - learning_params <- list( - "boosting" = c( - "boosting" - , "boost" - , "boosting_type" - ) - , "early_stopping_round" = c( - "early_stopping_round" - , "early_stopping_rounds" - , "early_stopping" - , "n_iter_no_change" - ) - , "num_iterations" = c( - "num_iterations" - , "num_iteration" - , "n_iter" - , "num_tree" - , "num_trees" - , "num_round" - , "num_rounds" - , "nrounds" - , "num_boost_round" - , "n_estimators" - , "max_iter" + aliases <- jsonlite::fromJSON( + .Call( + LGBM_DumpParamAliases_R ) ) - return(c(learning_params, .DATASET_PARAMETERS())) + for (alias in names(aliases)) { + aliases[[alias]] <- c(aliases[[alias]], alias) + } + return(aliases) } # [description] diff --git a/R-package/src/lightgbm_R.cpp b/R-package/src/lightgbm_R.cpp index 2150e652841f..3a3582b12cf1 100644 --- a/R-package/src/lightgbm_R.cpp +++ b/R-package/src/lightgbm_R.cpp @@ -872,6 +872,26 @@ SEXP LGBM_BoosterDumpModel_R(SEXP handle, R_API_END(); } +SEXP LGBM_DumpParamAliases_R() { + SEXP cont_token = PROTECT(R_MakeUnwindCont()); + R_API_BEGIN(); + SEXP aliases_str; + int64_t out_len = 0; + int64_t buf_len = 1024 * 1024; + std::vector inner_char_buf(buf_len); + CHECK_CALL(LGBM_DumpParamAliases(buf_len, &out_len, inner_char_buf.data())); + // if aliases string was larger than the initial buffer, allocate a bigger buffer and try again + if (out_len > buf_len) { + inner_char_buf.resize(out_len); + CHECK_CALL(LGBM_DumpParamAliases(out_len, &out_len, inner_char_buf.data())); + } + aliases_str = PROTECT(safe_R_string(static_cast(1), &cont_token)); + SET_STRING_ELT(aliases_str, 0, safe_R_mkChar(inner_char_buf.data(), &cont_token)); + UNPROTECT(2); + return aliases_str; + R_API_END(); +} + // .Call() calls static const R_CallMethodDef CallEntries[] = { {"LGBM_HandleIsNull_R" , (DL_FUNC) &LGBM_HandleIsNull_R , 1}, @@ -916,6 +936,7 @@ static const R_CallMethodDef CallEntries[] = { {"LGBM_BoosterSaveModel_R" , (DL_FUNC) &LGBM_BoosterSaveModel_R , 4}, {"LGBM_BoosterSaveModelToString_R" , (DL_FUNC) &LGBM_BoosterSaveModelToString_R , 3}, {"LGBM_BoosterDumpModel_R" , (DL_FUNC) &LGBM_BoosterDumpModel_R , 3}, + {"LGBM_DumpParamAliases_R" , (DL_FUNC) &LGBM_DumpParamAliases_R , 0}, {NULL, NULL, 0} }; diff --git a/R-package/src/lightgbm_R.h b/R-package/src/lightgbm_R.h index 41e2fbab13db..40032d7b44cb 100644 --- a/R-package/src/lightgbm_R.h +++ b/R-package/src/lightgbm_R.h @@ -596,4 +596,10 @@ LIGHTGBM_C_EXPORT SEXP LGBM_BoosterDumpModel_R( SEXP feature_importance_type ); +/*! +* \brief Dump parameter aliases to JSON +* \return R character vector (length=1) with aliases JSON +*/ +LIGHTGBM_C_EXPORT SEXP LGBM_DumpParamAliases_R(); + #endif // LIGHTGBM_R_H_ diff --git a/R-package/tests/testthat/test_parameters.R b/R-package/tests/testthat/test_parameters.R index 16d1e4a5a5e2..ab8e6624851e 100644 --- a/R-package/tests/testthat/test_parameters.R +++ b/R-package/tests/testthat/test_parameters.R @@ -50,6 +50,7 @@ context("parameter aliases") test_that(".PARAMETER_ALIASES() returns a named list of character vectors, where names are unique", { param_aliases <- .PARAMETER_ALIASES() expect_identical(class(param_aliases), "list") + expect_true(length(param_aliases) > 100) expect_true(is.character(names(param_aliases))) expect_true(is.character(param_aliases[["boosting"]])) expect_true(is.character(param_aliases[["early_stopping_round"]])) From 620cb94aee44cbc2bee4a2a09010a86f13a7a3f1 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Sat, 27 Nov 2021 02:02:09 +0300 Subject: [PATCH 04/15] test R code --- .github/workflows/r_package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/r_package.yml b/.github/workflows/r_package.yml index bff33548ce1d..93ac87eea5cb 100644 --- a/.github/workflows/r_package.yml +++ b/.github/workflows/r_package.yml @@ -3,7 +3,7 @@ name: R-package on: push: branches: - - master + - param_aliases pull_request: branches: - master From 0cbb6dcfb69c0bed17cd52e2839c30496da5ea0f Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Sat, 27 Nov 2021 02:27:11 +0300 Subject: [PATCH 05/15] remove debug CI --- .github/workflows/r_package.yml | 2 +- .vsts-ci.yml | 345 +++++++++++++++++++++++- tests/python_package_test/test_basic.py | 3 +- 3 files changed, 346 insertions(+), 4 deletions(-) diff --git a/.github/workflows/r_package.yml b/.github/workflows/r_package.yml index 93ac87eea5cb..bff33548ce1d 100644 --- a/.github/workflows/r_package.yml +++ b/.github/workflows/r_package.yml @@ -3,7 +3,7 @@ name: R-package on: push: branches: - - param_aliases + - master pull_request: branches: - master diff --git a/.vsts-ci.yml b/.vsts-ci.yml index 5230f789bd8f..25dc240282a7 100644 --- a/.vsts-ci.yml +++ b/.vsts-ci.yml @@ -1,7 +1,7 @@ trigger: branches: include: - - param_aliases + - master tags: include: - v* @@ -22,6 +22,238 @@ resources: image: wch1/r-debug jobs: ########################################### +- job: Linux +########################################### + variables: + COMPILER: gcc + SETUP_CONDA: 'false' + OS_NAME: 'linux' + PRODUCES_ARTIFACTS: 'true' + pool: sh-ubuntu + container: ubuntu1404 + strategy: + matrix: + regular: + TASK: regular + sdist: + TASK: sdist + PYTHON_VERSION: 3.7 + bdist: + TASK: bdist + inference: + TASK: if-else + mpi_source: + TASK: mpi + METHOD: source + PYTHON_VERSION: 3.8 + gpu_source: + TASK: gpu + METHOD: source + swig: + TASK: swig + steps: + - script: | + echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY" + echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)" + echo "##vso[task.prependpath]$CONDA/bin" + AMDAPPSDK_PATH=$BUILD_SOURCESDIRECTORY/AMDAPPSDK + echo "##vso[task.setvariable variable=AMDAPPSDK_PATH]$AMDAPPSDK_PATH" + LD_LIBRARY_PATH=$AMDAPPSDK_PATH/lib/x86_64:$LD_LIBRARY_PATH + echo "##vso[task.setvariable variable=LD_LIBRARY_PATH]$LD_LIBRARY_PATH" + echo "##vso[task.setvariable variable=OPENCL_VENDOR_PATH]$AMDAPPSDK_PATH/etc/OpenCL/vendors" + displayName: 'Set variables' + - bash: $(Build.SourcesDirectory)/.ci/setup.sh + displayName: Setup + - bash: $(Build.SourcesDirectory)/.ci/test.sh + displayName: Test + - task: PublishBuildArtifacts@1 + condition: and(succeeded(), in(variables['TASK'], 'regular', 'sdist', 'bdist', 'swig'), not(startsWith(variables['Build.SourceBranch'], 'refs/pull/'))) + inputs: + pathtoPublish: '$(Build.ArtifactStagingDirectory)' + artifactName: PackageAssets + artifactType: container +########################################### +- job: Linux_latest +########################################### + variables: + COMPILER: clang + DEBIAN_FRONTEND: 'noninteractive' + IN_UBUNTU_LATEST_CONTAINER: 'true' + OS_NAME: 'linux' + SETUP_CONDA: 'true' + pool: sh-ubuntu + container: ubuntu-latest + strategy: + matrix: + regular: + TASK: regular + PYTHON_VERSION: 3.6 + sdist: + TASK: sdist + bdist: + TASK: bdist + PYTHON_VERSION: 3.8 + inference: + TASK: if-else + mpi_source: + TASK: mpi + METHOD: source + mpi_pip: + TASK: mpi + METHOD: pip + PYTHON_VERSION: 3.8 + mpi_wheel: + TASK: mpi + METHOD: wheel + PYTHON_VERSION: 3.7 + gpu_source: + TASK: gpu + METHOD: source + gpu_pip: + TASK: gpu + METHOD: pip + PYTHON_VERSION: 3.6 + gpu_wheel: + TASK: gpu + METHOD: wheel + PYTHON_VERSION: 3.7 + cpp_tests: + TASK: cpp-tests + METHOD: with-sanitizers + steps: + - script: | + echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY" + echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)" + CONDA=$HOME/miniconda + echo "##vso[task.setvariable variable=CONDA]$CONDA" + echo "##vso[task.prependpath]$CONDA/bin" + AMDAPPSDK_PATH=$BUILD_SOURCESDIRECTORY/AMDAPPSDK + echo "##vso[task.setvariable variable=AMDAPPSDK_PATH]$AMDAPPSDK_PATH" + LD_LIBRARY_PATH=$AMDAPPSDK_PATH/lib/x86_64:$LD_LIBRARY_PATH + echo "##vso[task.setvariable variable=LD_LIBRARY_PATH]$LD_LIBRARY_PATH" + echo "##vso[task.setvariable variable=OPENCL_VENDOR_PATH]$AMDAPPSDK_PATH/etc/OpenCL/vendors" + displayName: 'Set variables' + # https://github.com/microsoft/azure-pipelines-agent/issues/2043#issuecomment-687983301 + - script: | + /tmp/docker exec -t -u 0 ci-container \ + sh -c "apt-get update && apt-get -o Dpkg::Options::="--force-confold" -y install sudo" + displayName: 'Install sudo' + - bash: $(Build.SourcesDirectory)/.ci/setup.sh + displayName: Setup + - bash: $(Build.SourcesDirectory)/.ci/test.sh + displayName: Test +########################################### +- job: QEMU_multiarch +########################################### + variables: + COMPILER: gcc + OS_NAME: 'linux' + PRODUCES_ARTIFACTS: 'true' + pool: + vmImage: ubuntu-latest + timeoutInMinutes: 180 + strategy: + matrix: + bdist: + TASK: bdist + ARCH: aarch64 + steps: + - script: | + sudo apt-get update + sudo apt-get install --no-install-recommends -y \ + binfmt-support \ + qemu \ + qemu-user \ + qemu-user-static + displayName: 'Install QEMU' + - script: | + docker run --rm --privileged multiarch/qemu-user-static --reset -p yes + displayName: 'Enable Docker multi-architecture support' + - script: | + export ROOT_DOCKER_FOLDER=/LightGBM + cat > docker.env < docker-script.sh <= 1 for i in aliases.values()) assert all(k in v for k, v in aliases.items()) - assert aliases['config'] == {'config', 'config_file'} - assert aliases['task'] == {'task', 'task_type'} + assert lgb.basic._ConfigAliases.get('config', 'task') == {'config', 'config_file', 'task', 'task_type'} From f25b7a16db0b2b65462c360f81231aed3c454af3 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Sat, 27 Nov 2021 02:40:42 +0300 Subject: [PATCH 06/15] fix R lint --- R-package/tests/testthat/test_parameters.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/tests/testthat/test_parameters.R b/R-package/tests/testthat/test_parameters.R index ab8e6624851e..f379c96e17b3 100644 --- a/R-package/tests/testthat/test_parameters.R +++ b/R-package/tests/testthat/test_parameters.R @@ -50,7 +50,7 @@ context("parameter aliases") test_that(".PARAMETER_ALIASES() returns a named list of character vectors, where names are unique", { param_aliases <- .PARAMETER_ALIASES() expect_identical(class(param_aliases), "list") - expect_true(length(param_aliases) > 100) + expect_true(length(param_aliases) > 100L) expect_true(is.character(names(param_aliases))) expect_true(is.character(param_aliases[["boosting"]])) expect_true(is.character(param_aliases[["early_stopping_round"]])) From d35233a0d7d46b4d4fa56fb95e37c26eea9ee5fd Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Sat, 27 Nov 2021 17:53:52 +0300 Subject: [PATCH 07/15] refactor --- python-package/lightgbm/basic.py | 38 ++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 083295355a56..6066e22703e6 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -324,27 +324,31 @@ class LGBMDeprecationWarning(UserWarning): class _ConfigAliases: - buffer_len = 1 << 20 - tmp_out_len = ctypes.c_int64(0) - string_buffer = ctypes.create_string_buffer(buffer_len) - ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)]) - _safe_call(_LIB.LGBM_DumpParamAliases( - ctypes.c_int64(buffer_len), - ctypes.byref(tmp_out_len), - ptr_string_buffer)) - actual_len = tmp_out_len.value - # if buffer length is not long enough, re-allocate a buffer - if actual_len > buffer_len: - string_buffer = ctypes.create_string_buffer(actual_len) + def _get_all_param_aliases(): + buffer_len = 1 << 20 + tmp_out_len = ctypes.c_int64(0) + string_buffer = ctypes.create_string_buffer(buffer_len) ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)]) _safe_call(_LIB.LGBM_DumpParamAliases( - ctypes.c_int64(actual_len), + ctypes.c_int64(buffer_len), ctypes.byref(tmp_out_len), ptr_string_buffer)) - aliases = json.loads( - string_buffer.value.decode('utf-8'), - object_hook=lambda obj: {k: set(v) | {k} for k, v in obj.items()} - ) + actual_len = tmp_out_len.value + # if buffer length is not long enough, re-allocate a buffer + if actual_len > buffer_len: + string_buffer = ctypes.create_string_buffer(actual_len) + ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)]) + _safe_call(_LIB.LGBM_DumpParamAliases( + ctypes.c_int64(actual_len), + ctypes.byref(tmp_out_len), + ptr_string_buffer)) + aliases = json.loads( + string_buffer.value.decode('utf-8'), + object_hook=lambda obj: {k: set(v) | {k} for k, v in obj.items()} + ) + return aliases + + aliases = _get_all_param_aliases() @classmethod def get(cls, *args): From a80f2f66a81ddb7e6883789ea590695010a5777c Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Sat, 27 Nov 2021 17:54:43 +0300 Subject: [PATCH 08/15] run CI --- .github/workflows/static_analysis.yml | 2 +- docs/conf.py | 30 +++++++++++---------------- python-package/lightgbm/basic.py | 8 +++++-- 3 files changed, 19 insertions(+), 21 deletions(-) diff --git a/.github/workflows/static_analysis.yml b/.github/workflows/static_analysis.yml index 123a93e2462b..356f3ba13fbf 100644 --- a/.github/workflows/static_analysis.yml +++ b/.github/workflows/static_analysis.yml @@ -5,7 +5,7 @@ name: Static Analysis on: push: branches: - - master + - dev pull_request: branches: - master diff --git a/docs/conf.py b/docs/conf.py index 13751145d422..adfbef507626 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -25,7 +25,6 @@ from shutil import copytree from subprocess import PIPE, Popen from typing import Any, List -from unittest.mock import Mock import sphinx from docutils.nodes import reference @@ -40,22 +39,6 @@ INTERNAL_REF_REGEX = compile(r"(?P\.\/.+)(?P\.rst)(?P$|#)") -# -- mock out modules -MOCK_MODULES = [ - 'dask', - 'dask.distributed', - 'datatable', - 'graphviz', - 'matplotlib', - 'numpy', - 'pandas', - 'scipy', - 'scipy.sparse', - 'sklearn' -] -for mod_name in MOCK_MODULES: - sys.modules[mod_name] = Mock() - class InternalRefTransform(Transform): """Replaces '.rst' with '.html' in all internal links like './[Something].rst[#anchor]'.""" @@ -109,7 +92,18 @@ def run(self) -> List: "inherited-members": True, "show-inheritance": True, } - +autodoc_mock_imports = [ + 'dask', + 'dask.distributed', + 'datatable', + 'graphviz', + 'matplotlib', + 'numpy', + 'pandas', + 'scipy', + 'scipy.sparse', + 'sklearn', +] # hide type hints in API docs autodoc_typehints = "none" diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 6066e22703e6..c2e647886fb0 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -324,6 +324,10 @@ class LGBMDeprecationWarning(UserWarning): class _ConfigAliases: + # lazy evaluation to allow import without dynamic library, e.g., for docs generation + aliases = None + + @staticmethod def _get_all_param_aliases(): buffer_len = 1 << 20 tmp_out_len = ctypes.c_int64(0) @@ -348,10 +352,10 @@ def _get_all_param_aliases(): ) return aliases - aliases = _get_all_param_aliases() - @classmethod def get(cls, *args): + if cls.aliases is None: + cls.aliases = cls._get_all_param_aliases() ret = set() for i in args: ret |= cls.aliases.get(i, {i}) From d0e689a8d437462745720d8ff0da811b6e109245 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Sat, 27 Nov 2021 18:59:06 +0300 Subject: [PATCH 09/15] fix R --- .github/workflows/r_package.yml | 2 +- R-package/R/aliases.R | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/r_package.yml b/.github/workflows/r_package.yml index bff33548ce1d..e0c3f0e45419 100644 --- a/.github/workflows/r_package.yml +++ b/.github/workflows/r_package.yml @@ -3,7 +3,7 @@ name: R-package on: push: branches: - - master + - dev pull_request: branches: - master diff --git a/R-package/R/aliases.R b/R-package/R/aliases.R index cbc76cd96f95..980c723f52e7 100644 --- a/R-package/R/aliases.R +++ b/R-package/R/aliases.R @@ -44,7 +44,8 @@ ) ) for (alias in names(aliases)) { - aliases[[alias]] <- c(aliases[[alias]], alias) + aliases_with_main_name <- if (!length(aliases[[alias]])) alias else c(aliases[[alias]], alias) + aliases[[alias]] <- aliases_with_main_name } return(aliases) } From 270ffa01e03ba88d8a71d9daab0d3a628f19a925 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Sat, 27 Nov 2021 19:15:14 +0300 Subject: [PATCH 10/15] fix --- helpers/parameter_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helpers/parameter_generator.py b/helpers/parameter_generator.py index bff37fed9284..a90ce22648fb 100644 --- a/helpers/parameter_generator.py +++ b/helpers/parameter_generator.py @@ -357,7 +357,7 @@ def gen_parameter_code( str_to_write += " return str_buf.str();\n" str_to_write += "}\n\n" - str_to_write += "std::string Config::DumpAliases() const {\n" + str_to_write += "std::string Config::DumpAliases() {\n" str_to_write += " std::stringstream str_buf;\n" str_to_write += ' str_buf << "{";\n' for idx, name in enumerate(names): From 3a8c7c1bd8750b22ee7ce09448a8dfd5d12a169b Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Sat, 27 Nov 2021 19:15:46 +0300 Subject: [PATCH 11/15] revert CI checks --- .github/workflows/r_package.yml | 2 +- .github/workflows/static_analysis.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/r_package.yml b/.github/workflows/r_package.yml index e0c3f0e45419..bff33548ce1d 100644 --- a/.github/workflows/r_package.yml +++ b/.github/workflows/r_package.yml @@ -3,7 +3,7 @@ name: R-package on: push: branches: - - dev + - master pull_request: branches: - master diff --git a/.github/workflows/static_analysis.yml b/.github/workflows/static_analysis.yml index 356f3ba13fbf..123a93e2462b 100644 --- a/.github/workflows/static_analysis.yml +++ b/.github/workflows/static_analysis.yml @@ -5,7 +5,7 @@ name: Static Analysis on: push: branches: - - dev + - master pull_request: branches: - master From ae38093a39b4aea1ada84280d677e0845e88f7b9 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Sat, 27 Nov 2021 19:17:10 +0300 Subject: [PATCH 12/15] revert changes in docs --- docs/conf.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index adfbef507626..13751145d422 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -25,6 +25,7 @@ from shutil import copytree from subprocess import PIPE, Popen from typing import Any, List +from unittest.mock import Mock import sphinx from docutils.nodes import reference @@ -39,6 +40,22 @@ INTERNAL_REF_REGEX = compile(r"(?P\.\/.+)(?P\.rst)(?P$|#)") +# -- mock out modules +MOCK_MODULES = [ + 'dask', + 'dask.distributed', + 'datatable', + 'graphviz', + 'matplotlib', + 'numpy', + 'pandas', + 'scipy', + 'scipy.sparse', + 'sklearn' +] +for mod_name in MOCK_MODULES: + sys.modules[mod_name] = Mock() + class InternalRefTransform(Transform): """Replaces '.rst' with '.html' in all internal links like './[Something].rst[#anchor]'.""" @@ -92,18 +109,7 @@ def run(self) -> List: "inherited-members": True, "show-inheritance": True, } -autodoc_mock_imports = [ - 'dask', - 'dask.distributed', - 'datatable', - 'graphviz', - 'matplotlib', - 'numpy', - 'pandas', - 'scipy', - 'scipy.sparse', - 'sklearn', -] + # hide type hints in API docs autodoc_typehints = "none" From a0745f2e6085743c56b110267b8c98e949605a75 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Wed, 1 Dec 2021 03:40:58 +0300 Subject: [PATCH 13/15] Try to make function `const` Co-authored-by: James Lamb --- include/LightGBM/config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h index 74e0def65606..481dd6638f9f 100644 --- a/include/LightGBM/config.h +++ b/include/LightGBM/config.h @@ -1040,7 +1040,7 @@ struct Config { static const std::unordered_set& parameter_set(); std::vector> auc_mu_weights_matrix; std::vector> interaction_constraints_vector; - static std::string DumpAliases(); + static const std::string DumpAliases(); private: void CheckParamConflict(); From 9a73cb4de65004b9d5740ebf4f19997f1b84f986 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Thu, 2 Dec 2021 04:42:38 +0300 Subject: [PATCH 14/15] add `const` in cpp file --- helpers/parameter_generator.py | 2 +- src/io/config_auto.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/helpers/parameter_generator.py b/helpers/parameter_generator.py index a90ce22648fb..bcc6d34bfa9a 100644 --- a/helpers/parameter_generator.py +++ b/helpers/parameter_generator.py @@ -357,7 +357,7 @@ def gen_parameter_code( str_to_write += " return str_buf.str();\n" str_to_write += "}\n\n" - str_to_write += "std::string Config::DumpAliases() {\n" + str_to_write += "const std::string Config::DumpAliases() {\n" str_to_write += " std::stringstream str_buf;\n" str_to_write += ' str_buf << "{";\n' for idx, name in enumerate(names): diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp index 15d3e9e351c4..9f3dd7a188f1 100644 --- a/src/io/config_auto.cpp +++ b/src/io/config_auto.cpp @@ -756,7 +756,7 @@ std::string Config::SaveMembersToString() const { return str_buf.str(); } -std::string Config::DumpAliases() { +const std::string Config::DumpAliases() { std::stringstream str_buf; str_buf << "{"; str_buf << "\"config\": [\"config_file\"], "; From 25467f3ac31e795a953dbe4be9f8f6d6cf75de9e Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Thu, 2 Dec 2021 23:13:32 +0300 Subject: [PATCH 15/15] address review comments and sync with `master` --- R-package/R/aliases.R | 10 +++++----- R-package/tests/testthat/test_parameters.R | 1 + python-package/lightgbm/basic.py | 8 +++++--- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/R-package/R/aliases.R b/R-package/R/aliases.R index 980c723f52e7..77fe74ef2af0 100644 --- a/R-package/R/aliases.R +++ b/R-package/R/aliases.R @@ -38,16 +38,16 @@ # [return] A named list, where each key is a main LightGBM parameter and each value is a character # vector of corresponding aliases. .PARAMETER_ALIASES <- function() { - aliases <- jsonlite::fromJSON( + params_to_aliases <- jsonlite::fromJSON( .Call( LGBM_DumpParamAliases_R ) ) - for (alias in names(aliases)) { - aliases_with_main_name <- if (!length(aliases[[alias]])) alias else c(aliases[[alias]], alias) - aliases[[alias]] <- aliases_with_main_name + for (main_name in names(params_to_aliases)) { + aliases_with_main_name <- c(main_name, unlist(params_to_aliases[[main_name]])) + params_to_aliases[[main_name]] <- aliases_with_main_name } - return(aliases) + return(params_to_aliases) } # [description] diff --git a/R-package/tests/testthat/test_parameters.R b/R-package/tests/testthat/test_parameters.R index f379c96e17b3..0e790e29bace 100644 --- a/R-package/tests/testthat/test_parameters.R +++ b/R-package/tests/testthat/test_parameters.R @@ -59,6 +59,7 @@ test_that(".PARAMETER_ALIASES() returns a named list of character vectors, where expect_true(length(names(param_aliases)) == length(param_aliases)) expect_true(all(sapply(param_aliases, is.character))) expect_true(length(unique(names(param_aliases))) == length(param_aliases)) + expect_equal(sort(param_aliases[["task"]]), c("task", "task_type")) }) test_that("training should warn if you use 'dart' boosting, specified with 'boosting' or aliases", { diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index befc692e825a..1b0824bc692c 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -329,7 +329,7 @@ class _ConfigAliases: aliases = None @staticmethod - def _get_all_param_aliases(): + def _get_all_param_aliases() -> Dict[str, Set[str]]: buffer_len = 1 << 20 tmp_out_len = ctypes.c_int64(0) string_buffer = ctypes.create_string_buffer(buffer_len) @@ -354,7 +354,7 @@ def _get_all_param_aliases(): return aliases @classmethod - def get(cls, *args): + def get(cls, *args) -> Set[str]: if cls.aliases is None: cls.aliases = cls._get_all_param_aliases() ret = set() @@ -363,7 +363,9 @@ def get(cls, *args): return ret @classmethod - def get_by_alias(cls, *args): + def get_by_alias(cls, *args) -> Set[str]: + if cls.aliases is None: + cls.aliases = cls._get_all_param_aliases() ret = set(args) for arg in args: for aliases in cls.aliases.values():