From 953cd393f05ef2c470f04bbc804bafb7af5624c7 Mon Sep 17 00:00:00 2001 From: guolinke Date: Fri, 28 Feb 2020 17:42:09 +0800 Subject: [PATCH 1/7] don't cache `num_thread`, to avoid change outside --- include/LightGBM/dataset.h | 5 +---- include/LightGBM/utils/array_args.h | 7 +------ include/LightGBM/utils/common.h | 12 ++---------- include/LightGBM/utils/openmp_wrapper.h | 9 +++++++++ include/LightGBM/utils/threading.h | 10 ++-------- src/application/predictor.hpp | 6 +----- src/c_api.cpp | 7 +------ src/io/dataset.cpp | 15 +++------------ src/io/multi_val_sparse_bin.hpp | 5 +---- src/io/sparse_bin.hpp | 7 +------ src/metric/map_metric.hpp | 13 +++---------- src/metric/rank_metric.hpp | 13 +++---------- src/treelearner/data_parallel_tree_learner.cpp | 5 +++-- src/treelearner/serial_tree_learner.cpp | 10 +++------- src/treelearner/serial_tree_learner.h | 1 - src/treelearner/voting_parallel_tree_learner.cpp | 5 +++-- 16 files changed, 37 insertions(+), 93 deletions(-) diff --git a/include/LightGBM/dataset.h b/include/LightGBM/dataset.h index 0ba7dd71764b..8846d7ade3af 100644 --- a/include/LightGBM/dataset.h +++ b/include/LightGBM/dataset.h @@ -292,10 +292,7 @@ struct TrainingTempState { return; } multi_val_bin.reset(bin); - int num_threads = 1; -#pragma omp parallel -#pragma omp master - { num_threads = omp_get_num_threads(); } + int num_threads = omp_get_num_threads(); num_bin_aligned = (bin->num_bin() + kAlignedSize - 1) / kAlignedSize * kAlignedSize; size_t new_size = static_cast(num_bin_aligned) * 2 * num_threads; diff --git a/include/LightGBM/utils/array_args.h b/include/LightGBM/utils/array_args.h index 49beb077823f..2935f1619397 100644 --- a/include/LightGBM/utils/array_args.h +++ b/include/LightGBM/utils/array_args.h @@ -21,12 +21,7 @@ template class ArrayArgs { public: inline static size_t ArgMaxMT(const std::vector& array) { - int num_threads = 1; -#pragma omp parallel -#pragma omp master - { - num_threads = omp_get_num_threads(); - } + int num_threads = omp_get_num_threads(); std::vector arg_maxs(num_threads, 0); int n_blocks = Threading::For( 0, array.size(), 1024, diff --git a/include/LightGBM/utils/common.h b/include/LightGBM/utils/common.h index 1e447e973c2e..8886fe2ec226 100644 --- a/include/LightGBM/utils/common.h +++ b/include/LightGBM/utils/common.h @@ -727,12 +727,7 @@ template inline static void ParallelSort(_RanIt _First, _RanIt _Last, _Pr _Pred, _VTRanIt*) { size_t len = _Last - _First; const size_t kMinInnerLen = 1024; - int num_threads = 1; - #pragma omp parallel - #pragma omp master - { - num_threads = omp_get_num_threads(); - } + int num_threads = omp_num_threads(); if (len <= kMinInnerLen || num_threads <= 1) { std::sort(_First, _Last, _Pred); return; @@ -1032,10 +1027,7 @@ class Timer { public: Timer() { #ifdef TIMETAG - int num_threads = 1; -#pragma omp parallel -#pragma omp master - { num_threads = omp_get_num_threads(); } + int num_threads = omp_num_threads(); start_time_.resize(num_threads); stats_.resize(num_threads); #endif // TIMETAG diff --git a/include/LightGBM/utils/openmp_wrapper.h b/include/LightGBM/utils/openmp_wrapper.h index 20c90e063291..4bb93463d74b 100644 --- a/include/LightGBM/utils/openmp_wrapper.h +++ b/include/LightGBM/utils/openmp_wrapper.h @@ -15,6 +15,14 @@ #include #include +inline int omp_num_threads() { + int ret = 1; +#pragma omp parallel +#pragma omp master + { ret = omp_get_num_threads(); } + return ret; +} + class ThreadExceptionHelper { public: ThreadExceptionHelper() { @@ -70,6 +78,7 @@ class ThreadExceptionHelper { inline void omp_set_num_threads(int) {} inline int omp_get_num_threads() {return 1;} inline int omp_get_thread_num() {return 0;} + inline int omp_num_threads() { return 1; } #ifdef __cplusplus }; // extern "C" #endif diff --git a/include/LightGBM/utils/threading.h b/include/LightGBM/utils/threading.h index c4cb57a49d73..4a0acedc0cb0 100644 --- a/include/LightGBM/utils/threading.h +++ b/include/LightGBM/utils/threading.h @@ -21,10 +21,7 @@ class Threading { template static inline void BlockInfo(INDEX_T cnt, INDEX_T min_cnt_per_block, int* out_nblock, INDEX_T* block_size) { - int num_threads = 1; -#pragma omp parallel -#pragma omp master - { num_threads = omp_get_num_threads(); } + int num_threads = omp_num_threads(); BlockInfo(num_threads, cnt, min_cnt_per_block, out_nblock, block_size); } @@ -84,10 +81,7 @@ class ParallelPartitionRunner { public: ParallelPartitionRunner(INDEX_T num_data, INDEX_T min_block_size) : min_block_size_(min_block_size) { - num_threads_ = 1; -#pragma omp parallel -#pragma omp master - { num_threads_ = omp_get_num_threads(); } + num_threads_ = omp_get_num_threads(); left_.resize(num_data); if (TWO_BUFFER) { right_.resize(num_data); diff --git a/src/application/predictor.hpp b/src/application/predictor.hpp index 6580186cb396..65c2fc88df7a 100644 --- a/src/application/predictor.hpp +++ b/src/application/predictor.hpp @@ -56,16 +56,13 @@ class Predictor { } } -#pragma omp parallel -#pragma omp master - { num_threads_ = omp_get_num_threads(); } boosting->InitPredict(num_iteration, predict_contrib); boosting_ = boosting; num_pred_one_row_ = boosting_->NumPredictOneRow( num_iteration, predict_leaf_index, predict_contrib); num_feature_ = boosting_->MaxFeatureIdx() + 1; predict_buf_.resize( - num_threads_, + omp_get_num_threads(), std::vector>( num_feature_, 0.0f)); const int kFeatureThreshold = 100000; @@ -281,7 +278,6 @@ class Predictor { PredictionEarlyStopInstance early_stop_; int num_feature_; int num_pred_one_row_; - int num_threads_; std::vector>> predict_buf_; }; diff --git a/src/c_api.cpp b/src/c_api.cpp index 60c32ce30968..763996d43c5b 100644 --- a/src/c_api.cpp +++ b/src/c_api.cpp @@ -1529,12 +1529,7 @@ int LGBM_BoosterPredictForCSC(BoosterHandle handle, if (config.num_threads > 0) { omp_set_num_threads(config.num_threads); } - int num_threads = 1; - #pragma omp parallel - #pragma omp master - { - num_threads = omp_get_num_threads(); - } + int num_threads = omp_get_num_threads(); int ncol = static_cast(ncol_ptr - 1); std::vector> iterators(num_threads, std::vector()); for (int i = 0; i < num_threads; ++i) { diff --git a/src/io/dataset.cpp b/src/io/dataset.cpp index 888bb22be963..ab4e53b8acf7 100644 --- a/src/io/dataset.cpp +++ b/src/io/dataset.cpp @@ -506,10 +506,7 @@ MultiValBin* Dataset::GetMultiBinFromSparseFeatures() const { } const auto& offsets = feature_groups_[multi_group_id]->bin_offsets_; const int num_feature = feature_groups_[multi_group_id]->num_feature_; - int num_threads = 1; -#pragma omp parallel -#pragma omp master - { num_threads = omp_get_num_threads(); } + int num_threads = omp_get_num_threads(); std::vector>> iters(num_threads); std::vector most_freq_bins; @@ -539,10 +536,7 @@ MultiValBin* Dataset::GetMultiBinFromSparseFeatures() const { MultiValBin* Dataset::GetMultiBinFromAllFeatures() const { Common::FunctionTimer fun_time("Dataset::GetMultiBinFromAllFeatures", global_timer); - int num_threads = 1; -#pragma omp parallel -#pragma omp master - { num_threads = omp_get_num_threads(); } + int num_threads = omp_get_num_threads(); double sum_dense_ratio = 0; std::unique_ptr ret; @@ -1185,10 +1179,7 @@ void Dataset::ConstructHistogramsMultiVal( if (multi_val_bin == nullptr) { return; } - int num_threads = 1; -#pragma omp parallel -#pragma omp master - { num_threads = omp_get_num_threads(); } + int num_threads = omp_get_num_threads(); global_timer.Start("Dataset::sparse_bin_histogram"); const int num_bin = multi_val_bin->num_bin(); diff --git a/src/io/multi_val_sparse_bin.hpp b/src/io/multi_val_sparse_bin.hpp index 7af83c92cbbd..47259ac9d3d5 100644 --- a/src/io/multi_val_sparse_bin.hpp +++ b/src/io/multi_val_sparse_bin.hpp @@ -27,10 +27,7 @@ class MultiValSparseBin : public MultiValBin { INDEX_T estimate_num_data = static_cast(estimate_element_per_row_ * 1.1) * static_cast(num_data_); - int num_threads = 1; -#pragma omp parallel -#pragma omp master - { num_threads = omp_get_num_threads(); } + int num_threads = omp_get_num_threads(); if (num_threads > 1) { t_data_.resize(num_threads - 1); for (size_t i = 0; i < t_data_.size(); ++i) { diff --git a/src/io/sparse_bin.hpp b/src/io/sparse_bin.hpp index 591add46f0ed..476e2cacd62a 100644 --- a/src/io/sparse_bin.hpp +++ b/src/io/sparse_bin.hpp @@ -73,12 +73,7 @@ class SparseBin: public Bin { explicit SparseBin(data_size_t num_data) : num_data_(num_data) { - int num_threads = 1; - #pragma omp parallel - #pragma omp master - { - num_threads = omp_get_num_threads(); - } + int num_threads = omp_get_num_threads(); push_buffers_.resize(num_threads); } diff --git a/src/metric/map_metric.hpp b/src/metric/map_metric.hpp index f6f79e1a4678..7d9c27fe1f36 100644 --- a/src/metric/map_metric.hpp +++ b/src/metric/map_metric.hpp @@ -23,12 +23,6 @@ class MapMetric:public Metric { // get eval position eval_at_ = config.eval_at; DCGCalculator::DefaultEvalAt(&eval_at_); - // get number of threads - #pragma omp parallel - #pragma omp master - { - num_threads_ = omp_get_num_threads(); - } } ~MapMetric() { @@ -110,8 +104,9 @@ class MapMetric:public Metric { } std::vector Eval(const double* score, const ObjectiveFunction*) const override { // some buffers for multi-threading sum up + int num_thread = omp_get_num_threads(); std::vector> result_buffer_; - for (int i = 0; i < num_threads_; ++i) { + for (int i = 0; i < num_thread; ++i) { result_buffer_.emplace_back(eval_at_.size(), 0.0f); } std::vector tmp_map(eval_at_.size(), 0.0f); @@ -139,7 +134,7 @@ class MapMetric:public Metric { // Get final average MAP std::vector result(eval_at_.size(), 0.0f); for (size_t j = 0; j < result.size(); ++j) { - for (int i = 0; i < num_threads_; ++i) { + for (int i = 0; i < num_thread; ++i) { result[j] += result_buffer_[i][j]; } result[j] /= sum_query_weights_; @@ -162,8 +157,6 @@ class MapMetric:public Metric { double sum_query_weights_; /*! \brief Evaluate position of Nmap */ std::vector eval_at_; - /*! \brief Number of threads */ - int num_threads_; std::vector name_; std::vector npos_per_query_; }; diff --git a/src/metric/rank_metric.hpp b/src/metric/rank_metric.hpp index 9c258dde6201..2c08256a2bb5 100644 --- a/src/metric/rank_metric.hpp +++ b/src/metric/rank_metric.hpp @@ -26,12 +26,6 @@ class NDCGMetric:public Metric { DCGCalculator::DefaultLabelGain(&label_gain); // initialize DCG calculator DCGCalculator::Init(label_gain); - // get number of threads - #pragma omp parallel - #pragma omp master - { - num_threads_ = omp_get_num_threads(); - } } ~NDCGMetric() { @@ -89,9 +83,10 @@ class NDCGMetric:public Metric { } std::vector Eval(const double* score, const ObjectiveFunction*) const override { + int num_threads = omp_num_threads(); // some buffers for multi-threading sum up std::vector> result_buffer_; - for (int i = 0; i < num_threads_; ++i) { + for (int i = 0; i < num_threads; ++i) { result_buffer_.emplace_back(eval_at_.size(), 0.0f); } std::vector tmp_dcg(eval_at_.size(), 0.0f); @@ -139,7 +134,7 @@ class NDCGMetric:public Metric { // Get final average NDCG std::vector result(eval_at_.size(), 0.0f); for (size_t j = 0; j < result.size(); ++j) { - for (int i = 0; i < num_threads_; ++i) { + for (int i = 0; i < num_threads; ++i) { result[j] += result_buffer_[i][j]; } result[j] /= sum_query_weights_; @@ -166,8 +161,6 @@ class NDCGMetric:public Metric { std::vector eval_at_; /*! \brief Cache the inverse max dcg for all queries */ std::vector> inverse_max_dcgs_; - /*! \brief Number of threads */ - int num_threads_; }; } // namespace LightGBM diff --git a/src/treelearner/data_parallel_tree_learner.cpp b/src/treelearner/data_parallel_tree_learner.cpp index 7c8c0d4c5cda..6e063fa7b579 100644 --- a/src/treelearner/data_parallel_tree_learner.cpp +++ b/src/treelearner/data_parallel_tree_learner.cpp @@ -165,8 +165,9 @@ void DataParallelTreeLearner::FindBestSplits() { template void DataParallelTreeLearner::FindBestSplitsFromHistograms(const std::vector&, bool) { - std::vector smaller_bests_per_thread(this->num_threads_, SplitInfo()); - std::vector larger_bests_per_thread(this->num_threads_, SplitInfo()); + int num_threads = omp_num_threads(); + std::vector smaller_bests_per_thread(num_threads, SplitInfo()); + std::vector larger_bests_per_thread(num_threads, SplitInfo()); std::vector smaller_node_used_features(this->num_features_, 1); std::vector larger_node_used_features(this->num_features_, 1); if (this->config_->feature_fraction_bynode < 1.0f) { diff --git a/src/treelearner/serial_tree_learner.cpp b/src/treelearner/serial_tree_learner.cpp index 6f9e65397853..b275fa6345b1 100644 --- a/src/treelearner/serial_tree_learner.cpp +++ b/src/treelearner/serial_tree_learner.cpp @@ -21,11 +21,6 @@ namespace LightGBM { SerialTreeLearner::SerialTreeLearner(const Config* config) :config_(config) { random_ = Random(config_->feature_fraction_seed); - #pragma omp parallel - #pragma omp master - { - num_threads_ = omp_get_num_threads(); - } } SerialTreeLearner::~SerialTreeLearner() { @@ -400,8 +395,9 @@ void SerialTreeLearner::FindBestSplitsFromHistograms( const std::vector& is_feature_used, bool use_subtract) { Common::FunctionTimer fun_timer( "SerialTreeLearner::FindBestSplitsFromHistograms", global_timer); - std::vector smaller_best(num_threads_); - std::vector larger_best(num_threads_); + int num_threads = omp_num_threads(); + std::vector smaller_best(num_threads); + std::vector larger_best(num_threads); std::vector smaller_node_used_features(num_features_, 1); std::vector larger_node_used_features(num_features_, 1); if (config_->feature_fraction_bynode < 1.0f) { diff --git a/src/treelearner/serial_tree_learner.h b/src/treelearner/serial_tree_learner.h index dd267208eadd..197ed032c484 100644 --- a/src/treelearner/serial_tree_learner.h +++ b/src/treelearner/serial_tree_learner.h @@ -188,7 +188,6 @@ class SerialTreeLearner: public TreeLearner { HistogramPool histogram_pool_; /*! \brief config of tree learner*/ const Config* config_; - int num_threads_; std::vector ordered_bin_indices_; bool is_constant_hessian_; std::unique_ptr temp_state_; diff --git a/src/treelearner/voting_parallel_tree_learner.cpp b/src/treelearner/voting_parallel_tree_learner.cpp index 71ebbf9a97e3..412992a389e1 100644 --- a/src/treelearner/voting_parallel_tree_learner.cpp +++ b/src/treelearner/voting_parallel_tree_learner.cpp @@ -349,8 +349,9 @@ void VotingParallelTreeLearner::FindBestSplits() { template void VotingParallelTreeLearner::FindBestSplitsFromHistograms(const std::vector&, bool) { - std::vector smaller_bests_per_thread(this->num_threads_); - std::vector larger_best_per_thread(this->num_threads_); + int num_threads = omp_num_threads(); + std::vector smaller_bests_per_thread(num_threads); + std::vector larger_best_per_thread(num_threads); std::vector smaller_node_used_features(this->num_features_, 1); std::vector larger_node_used_features(this->num_features_, 1); if (this->config_->feature_fraction_bynode < 1.0f) { From 8bc8510dab162700ce98976ea352469f5ca3eadc Mon Sep 17 00:00:00 2001 From: guolinke Date: Fri, 28 Feb 2020 17:47:15 +0800 Subject: [PATCH 2/7] rename --- include/LightGBM/dataset.h | 2 +- include/LightGBM/utils/array_args.h | 2 +- include/LightGBM/utils/common.h | 4 ++-- include/LightGBM/utils/openmp_wrapper.h | 4 ++-- include/LightGBM/utils/threading.h | 4 ++-- src/application/predictor.hpp | 2 +- src/c_api.cpp | 2 +- src/io/dataset.cpp | 6 +++--- src/io/multi_val_sparse_bin.hpp | 2 +- src/io/sparse_bin.hpp | 2 +- src/metric/map_metric.hpp | 2 +- src/metric/rank_metric.hpp | 2 +- src/treelearner/data_parallel_tree_learner.cpp | 2 +- src/treelearner/serial_tree_learner.cpp | 2 +- src/treelearner/voting_parallel_tree_learner.cpp | 2 +- 15 files changed, 20 insertions(+), 20 deletions(-) diff --git a/include/LightGBM/dataset.h b/include/LightGBM/dataset.h index 8846d7ade3af..d63f1c966960 100644 --- a/include/LightGBM/dataset.h +++ b/include/LightGBM/dataset.h @@ -292,7 +292,7 @@ struct TrainingTempState { return; } multi_val_bin.reset(bin); - int num_threads = omp_get_num_threads(); + int num_threads = OMP_NUM_THREADS(); num_bin_aligned = (bin->num_bin() + kAlignedSize - 1) / kAlignedSize * kAlignedSize; size_t new_size = static_cast(num_bin_aligned) * 2 * num_threads; diff --git a/include/LightGBM/utils/array_args.h b/include/LightGBM/utils/array_args.h index 2935f1619397..0183ecc22ddb 100644 --- a/include/LightGBM/utils/array_args.h +++ b/include/LightGBM/utils/array_args.h @@ -21,7 +21,7 @@ template class ArrayArgs { public: inline static size_t ArgMaxMT(const std::vector& array) { - int num_threads = omp_get_num_threads(); + int num_threads = OMP_NUM_THREADS(); std::vector arg_maxs(num_threads, 0); int n_blocks = Threading::For( 0, array.size(), 1024, diff --git a/include/LightGBM/utils/common.h b/include/LightGBM/utils/common.h index 8886fe2ec226..fffed09fc288 100644 --- a/include/LightGBM/utils/common.h +++ b/include/LightGBM/utils/common.h @@ -727,7 +727,7 @@ template inline static void ParallelSort(_RanIt _First, _RanIt _Last, _Pr _Pred, _VTRanIt*) { size_t len = _Last - _First; const size_t kMinInnerLen = 1024; - int num_threads = omp_num_threads(); + int num_threads = OMP_NUM_THREADS(); if (len <= kMinInnerLen || num_threads <= 1) { std::sort(_First, _Last, _Pred); return; @@ -1027,7 +1027,7 @@ class Timer { public: Timer() { #ifdef TIMETAG - int num_threads = omp_num_threads(); + int num_threads = OMP_NUM_THREADS(); start_time_.resize(num_threads); stats_.resize(num_threads); #endif // TIMETAG diff --git a/include/LightGBM/utils/openmp_wrapper.h b/include/LightGBM/utils/openmp_wrapper.h index 4bb93463d74b..123ca37c1a05 100644 --- a/include/LightGBM/utils/openmp_wrapper.h +++ b/include/LightGBM/utils/openmp_wrapper.h @@ -15,7 +15,7 @@ #include #include -inline int omp_num_threads() { +inline int OMP_NUM_THREADS() { int ret = 1; #pragma omp parallel #pragma omp master @@ -78,7 +78,7 @@ class ThreadExceptionHelper { inline void omp_set_num_threads(int) {} inline int omp_get_num_threads() {return 1;} inline int omp_get_thread_num() {return 0;} - inline int omp_num_threads() { return 1; } + inline int OMP_NUM_THREADS() { return 1; } #ifdef __cplusplus }; // extern "C" #endif diff --git a/include/LightGBM/utils/threading.h b/include/LightGBM/utils/threading.h index 4a0acedc0cb0..7202b47b85ad 100644 --- a/include/LightGBM/utils/threading.h +++ b/include/LightGBM/utils/threading.h @@ -21,7 +21,7 @@ class Threading { template static inline void BlockInfo(INDEX_T cnt, INDEX_T min_cnt_per_block, int* out_nblock, INDEX_T* block_size) { - int num_threads = omp_num_threads(); + int num_threads = OMP_NUM_THREADS(); BlockInfo(num_threads, cnt, min_cnt_per_block, out_nblock, block_size); } @@ -81,7 +81,7 @@ class ParallelPartitionRunner { public: ParallelPartitionRunner(INDEX_T num_data, INDEX_T min_block_size) : min_block_size_(min_block_size) { - num_threads_ = omp_get_num_threads(); + num_threads_ = OMP_NUM_THREADS(); left_.resize(num_data); if (TWO_BUFFER) { right_.resize(num_data); diff --git a/src/application/predictor.hpp b/src/application/predictor.hpp index 65c2fc88df7a..a38e872de5b3 100644 --- a/src/application/predictor.hpp +++ b/src/application/predictor.hpp @@ -62,7 +62,7 @@ class Predictor { num_iteration, predict_leaf_index, predict_contrib); num_feature_ = boosting_->MaxFeatureIdx() + 1; predict_buf_.resize( - omp_get_num_threads(), + OMP_NUM_THREADS(), std::vector>( num_feature_, 0.0f)); const int kFeatureThreshold = 100000; diff --git a/src/c_api.cpp b/src/c_api.cpp index 763996d43c5b..bb8585502073 100644 --- a/src/c_api.cpp +++ b/src/c_api.cpp @@ -1529,7 +1529,7 @@ int LGBM_BoosterPredictForCSC(BoosterHandle handle, if (config.num_threads > 0) { omp_set_num_threads(config.num_threads); } - int num_threads = omp_get_num_threads(); + int num_threads = OMP_NUM_THREADS(); int ncol = static_cast(ncol_ptr - 1); std::vector> iterators(num_threads, std::vector()); for (int i = 0; i < num_threads; ++i) { diff --git a/src/io/dataset.cpp b/src/io/dataset.cpp index ab4e53b8acf7..7cf179038948 100644 --- a/src/io/dataset.cpp +++ b/src/io/dataset.cpp @@ -506,7 +506,7 @@ MultiValBin* Dataset::GetMultiBinFromSparseFeatures() const { } const auto& offsets = feature_groups_[multi_group_id]->bin_offsets_; const int num_feature = feature_groups_[multi_group_id]->num_feature_; - int num_threads = omp_get_num_threads(); + int num_threads = OMP_NUM_THREADS(); std::vector>> iters(num_threads); std::vector most_freq_bins; @@ -536,7 +536,7 @@ MultiValBin* Dataset::GetMultiBinFromSparseFeatures() const { MultiValBin* Dataset::GetMultiBinFromAllFeatures() const { Common::FunctionTimer fun_time("Dataset::GetMultiBinFromAllFeatures", global_timer); - int num_threads = omp_get_num_threads(); + int num_threads = OMP_NUM_THREADS(); double sum_dense_ratio = 0; std::unique_ptr ret; @@ -1179,7 +1179,7 @@ void Dataset::ConstructHistogramsMultiVal( if (multi_val_bin == nullptr) { return; } - int num_threads = omp_get_num_threads(); + int num_threads = OMP_NUM_THREADS(); global_timer.Start("Dataset::sparse_bin_histogram"); const int num_bin = multi_val_bin->num_bin(); diff --git a/src/io/multi_val_sparse_bin.hpp b/src/io/multi_val_sparse_bin.hpp index 47259ac9d3d5..b2d06175bfb2 100644 --- a/src/io/multi_val_sparse_bin.hpp +++ b/src/io/multi_val_sparse_bin.hpp @@ -27,7 +27,7 @@ class MultiValSparseBin : public MultiValBin { INDEX_T estimate_num_data = static_cast(estimate_element_per_row_ * 1.1) * static_cast(num_data_); - int num_threads = omp_get_num_threads(); + int num_threads = OMP_NUM_THREADS(); if (num_threads > 1) { t_data_.resize(num_threads - 1); for (size_t i = 0; i < t_data_.size(); ++i) { diff --git a/src/io/sparse_bin.hpp b/src/io/sparse_bin.hpp index 476e2cacd62a..57e507b6bf76 100644 --- a/src/io/sparse_bin.hpp +++ b/src/io/sparse_bin.hpp @@ -73,7 +73,7 @@ class SparseBin: public Bin { explicit SparseBin(data_size_t num_data) : num_data_(num_data) { - int num_threads = omp_get_num_threads(); + int num_threads = OMP_NUM_THREADS(); push_buffers_.resize(num_threads); } diff --git a/src/metric/map_metric.hpp b/src/metric/map_metric.hpp index 7d9c27fe1f36..ec0f5f2d8179 100644 --- a/src/metric/map_metric.hpp +++ b/src/metric/map_metric.hpp @@ -104,7 +104,7 @@ class MapMetric:public Metric { } std::vector Eval(const double* score, const ObjectiveFunction*) const override { // some buffers for multi-threading sum up - int num_thread = omp_get_num_threads(); + int num_thread = OMP_NUM_THREADS(); std::vector> result_buffer_; for (int i = 0; i < num_thread; ++i) { result_buffer_.emplace_back(eval_at_.size(), 0.0f); diff --git a/src/metric/rank_metric.hpp b/src/metric/rank_metric.hpp index 2c08256a2bb5..3b3afb547eb9 100644 --- a/src/metric/rank_metric.hpp +++ b/src/metric/rank_metric.hpp @@ -83,7 +83,7 @@ class NDCGMetric:public Metric { } std::vector Eval(const double* score, const ObjectiveFunction*) const override { - int num_threads = omp_num_threads(); + int num_threads = OMP_NUM_THREADS(); // some buffers for multi-threading sum up std::vector> result_buffer_; for (int i = 0; i < num_threads; ++i) { diff --git a/src/treelearner/data_parallel_tree_learner.cpp b/src/treelearner/data_parallel_tree_learner.cpp index 6e063fa7b579..d0b732380763 100644 --- a/src/treelearner/data_parallel_tree_learner.cpp +++ b/src/treelearner/data_parallel_tree_learner.cpp @@ -165,7 +165,7 @@ void DataParallelTreeLearner::FindBestSplits() { template void DataParallelTreeLearner::FindBestSplitsFromHistograms(const std::vector&, bool) { - int num_threads = omp_num_threads(); + int num_threads = OMP_NUM_THREADS(); std::vector smaller_bests_per_thread(num_threads, SplitInfo()); std::vector larger_bests_per_thread(num_threads, SplitInfo()); std::vector smaller_node_used_features(this->num_features_, 1); diff --git a/src/treelearner/serial_tree_learner.cpp b/src/treelearner/serial_tree_learner.cpp index b275fa6345b1..e2ae3038fe21 100644 --- a/src/treelearner/serial_tree_learner.cpp +++ b/src/treelearner/serial_tree_learner.cpp @@ -395,7 +395,7 @@ void SerialTreeLearner::FindBestSplitsFromHistograms( const std::vector& is_feature_used, bool use_subtract) { Common::FunctionTimer fun_timer( "SerialTreeLearner::FindBestSplitsFromHistograms", global_timer); - int num_threads = omp_num_threads(); + int num_threads = OMP_NUM_THREADS(); std::vector smaller_best(num_threads); std::vector larger_best(num_threads); std::vector smaller_node_used_features(num_features_, 1); diff --git a/src/treelearner/voting_parallel_tree_learner.cpp b/src/treelearner/voting_parallel_tree_learner.cpp index 412992a389e1..d34c31ff5d4f 100644 --- a/src/treelearner/voting_parallel_tree_learner.cpp +++ b/src/treelearner/voting_parallel_tree_learner.cpp @@ -349,7 +349,7 @@ void VotingParallelTreeLearner::FindBestSplits() { template void VotingParallelTreeLearner::FindBestSplitsFromHistograms(const std::vector&, bool) { - int num_threads = omp_num_threads(); + int num_threads = OMP_NUM_THREADS(); std::vector smaller_bests_per_thread(num_threads); std::vector larger_best_per_thread(num_threads); std::vector smaller_node_used_features(this->num_features_, 1); From ae3f216f028e5bc6c8a9e7e8e25b399f74ec9d3e Mon Sep 17 00:00:00 2001 From: guolinke Date: Fri, 28 Feb 2020 18:29:44 +0800 Subject: [PATCH 3/7] update document --- docs/Parameters.rst | 2 ++ include/LightGBM/config.h | 1 + 2 files changed, 3 insertions(+) diff --git a/docs/Parameters.rst b/docs/Parameters.rst index 62f7f184609e..c811af0bd47b 100644 --- a/docs/Parameters.rst +++ b/docs/Parameters.rst @@ -171,6 +171,8 @@ Core Parameters - for parallel learning, do not use all CPU cores because this will cause poor performance for the network communication + - **Note**: Please **don't** change this during training, especifically when running multiple jobs simultaneously by external packages, otherwise may cause undesible errors. + - ``device_type`` :raw-html:`🔗︎`, default = ``cpu``, type = enum, options: ``cpu``, ``gpu``, aliases: ``device`` - device for the tree learning, you can use GPU to achieve the faster learning diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h index 6e5dd1b3ea51..243b11880f0f 100644 --- a/include/LightGBM/config.h +++ b/include/LightGBM/config.h @@ -192,6 +192,7 @@ struct Config { // desc = do not set it too large if your dataset is small (for instance, do not use 64 threads for a dataset with 10,000 rows) // desc = be aware a task manager or any similar CPU monitoring tool might report that cores not being fully utilized. **This is normal** // desc = for parallel learning, do not use all CPU cores because this will cause poor performance for the network communication + // desc = **Note**: Please **don't** change this during training, especifically when running multiple jobs simultaneously by external packages, otherwise may cause undesible errors. int num_threads = 0; // [doc-only] From bc4a26bc47e1e24e71a57bf1a6bb2962aae0b543 Mon Sep 17 00:00:00 2001 From: Guolin Ke Date: Fri, 28 Feb 2020 18:39:05 +0800 Subject: [PATCH 4/7] Update docs/Parameters.rst --- docs/Parameters.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Parameters.rst b/docs/Parameters.rst index c811af0bd47b..4c2c852e7c68 100644 --- a/docs/Parameters.rst +++ b/docs/Parameters.rst @@ -171,7 +171,7 @@ Core Parameters - for parallel learning, do not use all CPU cores because this will cause poor performance for the network communication - - **Note**: Please **don't** change this during training, especifically when running multiple jobs simultaneously by external packages, otherwise may cause undesible errors. + - **Note**: Please **don't** change this during training, especially when running multiple jobs simultaneously by external packages, otherwise it may cause undesirable errors. - ``device_type`` :raw-html:`🔗︎`, default = ``cpu``, type = enum, options: ``cpu``, ``gpu``, aliases: ``device`` From 77854ac399bdce5a4293200c784b0eeb98780344 Mon Sep 17 00:00:00 2001 From: Guolin Ke Date: Fri, 28 Feb 2020 18:39:41 +0800 Subject: [PATCH 5/7] Update include/LightGBM/config.h --- include/LightGBM/config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h index 243b11880f0f..beab1bd45afb 100644 --- a/include/LightGBM/config.h +++ b/include/LightGBM/config.h @@ -192,7 +192,7 @@ struct Config { // desc = do not set it too large if your dataset is small (for instance, do not use 64 threads for a dataset with 10,000 rows) // desc = be aware a task manager or any similar CPU monitoring tool might report that cores not being fully utilized. **This is normal** // desc = for parallel learning, do not use all CPU cores because this will cause poor performance for the network communication - // desc = **Note**: Please **don't** change this during training, especifically when running multiple jobs simultaneously by external packages, otherwise may cause undesible errors. + // desc = **Note**: Please **don't** change this during training, especially when running multiple jobs simultaneously by external packages, otherwise it may cause undesirable errors. int num_threads = 0; // [doc-only] From 76cec0e670cf954da6682c5ba05fe79a70f92313 Mon Sep 17 00:00:00 2001 From: Guolin Ke Date: Sat, 29 Feb 2020 10:32:44 +0800 Subject: [PATCH 6/7] Apply suggestions from code review Co-Authored-By: Nikita Titov --- docs/Parameters.rst | 2 +- include/LightGBM/config.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/Parameters.rst b/docs/Parameters.rst index 4c2c852e7c68..573602c47f61 100644 --- a/docs/Parameters.rst +++ b/docs/Parameters.rst @@ -171,7 +171,7 @@ Core Parameters - for parallel learning, do not use all CPU cores because this will cause poor performance for the network communication - - **Note**: Please **don't** change this during training, especially when running multiple jobs simultaneously by external packages, otherwise it may cause undesirable errors. + - **Note**: please **don't** change this during training, especially when running multiple jobs simultaneously by external packages, otherwise it may cause undesirable errors - ``device_type`` :raw-html:`🔗︎`, default = ``cpu``, type = enum, options: ``cpu``, ``gpu``, aliases: ``device`` diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h index beab1bd45afb..8ca2066e141b 100644 --- a/include/LightGBM/config.h +++ b/include/LightGBM/config.h @@ -192,7 +192,7 @@ struct Config { // desc = do not set it too large if your dataset is small (for instance, do not use 64 threads for a dataset with 10,000 rows) // desc = be aware a task manager or any similar CPU monitoring tool might report that cores not being fully utilized. **This is normal** // desc = for parallel learning, do not use all CPU cores because this will cause poor performance for the network communication - // desc = **Note**: Please **don't** change this during training, especially when running multiple jobs simultaneously by external packages, otherwise it may cause undesirable errors. + // desc = **Note**: please **don't** change this during training, especially when running multiple jobs simultaneously by external packages, otherwise it may cause undesirable errors int num_threads = 0; // [doc-only] From 560f652c6b8e70ac5d6c7b06ab8cf22081449e84 Mon Sep 17 00:00:00 2001 From: Guolin Ke Date: Sat, 29 Feb 2020 23:16:47 +0800 Subject: [PATCH 7/7] Apply suggestions from code review Co-Authored-By: Nikita Titov --- src/metric/map_metric.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/metric/map_metric.hpp b/src/metric/map_metric.hpp index ec0f5f2d8179..18539ee44ee0 100644 --- a/src/metric/map_metric.hpp +++ b/src/metric/map_metric.hpp @@ -104,9 +104,9 @@ class MapMetric:public Metric { } std::vector Eval(const double* score, const ObjectiveFunction*) const override { // some buffers for multi-threading sum up - int num_thread = OMP_NUM_THREADS(); + int num_threads = OMP_NUM_THREADS(); std::vector> result_buffer_; - for (int i = 0; i < num_thread; ++i) { + for (int i = 0; i < num_threads; ++i) { result_buffer_.emplace_back(eval_at_.size(), 0.0f); } std::vector tmp_map(eval_at_.size(), 0.0f); @@ -134,7 +134,7 @@ class MapMetric:public Metric { // Get final average MAP std::vector result(eval_at_.size(), 0.0f); for (size_t j = 0; j < result.size(); ++j) { - for (int i = 0; i < num_thread; ++i) { + for (int i = 0; i < num_threads; ++i) { result[j] += result_buffer_[i][j]; } result[j] /= sum_query_weights_;