diff --git a/src/objective/cuda/cuda_binary_objective.cpp b/src/objective/cuda/cuda_binary_objective.cpp index 35889c488ce5..5670e458bccc 100644 --- a/src/objective/cuda/cuda_binary_objective.cpp +++ b/src/objective/cuda/cuda_binary_objective.cpp @@ -69,7 +69,7 @@ double CUDABinaryLogloss::BoostFromScore(int) const { CopyFromCUDADeviceToHost(&boost_from_score, cuda_boost_from_score_, 1, __FILE__, __LINE__); double pavg = 0.0f; CopyFromCUDADeviceToHost(&pavg, cuda_sum_weights_, 1, __FILE__, __LINE__); - Log::Info("[%s:%s]: pavg=%f -> initscore=%f", GetName(), __func__, pavg, boost_from_score); + Log::Info("[%s:%s]: pavg=%f -> initscore=%f", GetName(), __func__, pavg, boost_from_score); return boost_from_score; } diff --git a/src/objective/cuda/cuda_binary_objective.cu b/src/objective/cuda/cuda_binary_objective.cu index 6f1711d64629..334ca6f2c963 100644 --- a/src/objective/cuda/cuda_binary_objective.cu +++ b/src/objective/cuda/cuda_binary_objective.cu @@ -12,9 +12,9 @@ namespace LightGBM { -template +template __global__ void BoostFromScoreKernel_1_BinaryLogloss(const label_t* cuda_labels, const data_size_t num_data, double* out_cuda_sum_labels, - double* out_cuda_sum_weights, const label_t* cuda_weights, const int ova_class_id) { + double* out_cuda_sum_weights, const label_t* cuda_weights) { __shared__ double shared_buffer[32]; const uint32_t mask = 0xffffffff; const uint32_t warpLane = threadIdx.x % warpSize; @@ -27,12 +27,12 @@ __global__ void BoostFromScoreKernel_1_BinaryLogloss(const label_t* cuda_labels, if (USE_WEIGHT) { const label_t cuda_label = cuda_labels[index]; const double sample_weight = cuda_weights[index]; - const label_t label = IS_OVA ? (static_cast(cuda_label) == ova_class_id ? 1 : 0) : (cuda_label > 0 ? 1 : 0); + const label_t label = cuda_label > 0 ? 1 : 0; label_value = label * sample_weight; weight_value = sample_weight; } else { const label_t cuda_label = cuda_labels[index]; - label_value = IS_OVA ? (static_cast(cuda_label) == ova_class_id ? 1 : 0) : (cuda_label > 0 ? 1 : 0); + label_value = cuda_label > 0 ? 1 : 0; } } for (uint32_t offset = warpSize / 2; offset >= 1; offset >>= 1) { @@ -88,22 +88,13 @@ __global__ void BoostFromScoreKernel_2_BinaryLogloss(double* out_cuda_sum_labels void CUDABinaryLogloss::LaunchBoostFromScoreKernel() const { const int num_blocks = (num_data_ + CALC_INIT_SCORE_BLOCK_SIZE_BINARY - 1) / CALC_INIT_SCORE_BLOCK_SIZE_BINARY; - if (ova_class_id_ == -1) { - if (cuda_weights_ == nullptr) { - BoostFromScoreKernel_1_BinaryLogloss<<>> - (cuda_label_, num_data_, cuda_boost_from_score_, cuda_sum_weights_, cuda_weights_, ova_class_id_); - } else { - BoostFromScoreKernel_1_BinaryLogloss<<>> - (cuda_label_, num_data_, cuda_boost_from_score_, cuda_sum_weights_, cuda_weights_, ova_class_id_); - } + SetCUDAMemory(cuda_boost_from_score_, 0, 1, __FILE__, __LINE__); + if (cuda_weights_ == nullptr) { + BoostFromScoreKernel_1_BinaryLogloss<<>> + (cuda_label_, num_data_, cuda_boost_from_score_, cuda_sum_weights_, cuda_weights_); } else { - if (cuda_weights_ == nullptr) { - BoostFromScoreKernel_1_BinaryLogloss<<>> - (cuda_label_, num_data_, cuda_boost_from_score_, cuda_sum_weights_, cuda_weights_, ova_class_id_); - } else { - BoostFromScoreKernel_1_BinaryLogloss<<>> - (cuda_label_, num_data_, cuda_boost_from_score_, cuda_sum_weights_, cuda_weights_, ova_class_id_); - } + BoostFromScoreKernel_1_BinaryLogloss<<>> + (cuda_label_, num_data_, cuda_boost_from_score_, cuda_sum_weights_, cuda_weights_); } SynchronizeCUDADevice(__FILE__, __LINE__); if (cuda_weights_ == nullptr) { @@ -114,15 +105,15 @@ void CUDABinaryLogloss::LaunchBoostFromScoreKernel() const { SynchronizeCUDADevice(__FILE__, __LINE__); } -template +template __global__ void GetGradientsKernel_BinaryLogloss(const double* cuda_scores, const label_t* cuda_labels, - const double* cuda_label_weights, const label_t* cuda_weights, const int ova_class_id, + const double* cuda_label_weights, const label_t* cuda_weights, const double sigmoid, const data_size_t num_data, score_t* cuda_out_gradients, score_t* cuda_out_hessians) { const data_size_t data_index = static_cast(blockDim.x * blockIdx.x + threadIdx.x); if (data_index < num_data) { const label_t cuda_label = static_cast(cuda_labels[data_index]); - const int label = IS_OVA ? (cuda_label == ova_class_id ? 1 : -1) : (cuda_label > 0 ? 1 : -1); + const int label = cuda_label > 0 ? 1 : -1; const double response = -label * sigmoid / (1.0f + exp(label * sigmoid * cuda_scores[data_index])); const double abs_response = fabs(response); if (!USE_WEIGHT) { @@ -153,7 +144,6 @@ __global__ void GetGradientsKernel_BinaryLogloss(const double* cuda_scores, cons cuda_label_, \ cuda_label_weights_, \ cuda_weights_, \ - ova_class_id_, \ sigmoid_, \ num_data_, \ gradients, \ @@ -161,33 +151,17 @@ __global__ void GetGradientsKernel_BinaryLogloss(const double* cuda_scores, cons void CUDABinaryLogloss::LaunchGetGradientsKernel(const double* scores, score_t* gradients, score_t* hessians) const { const int num_blocks = (num_data_ + GET_GRADIENTS_BLOCK_SIZE_BINARY - 1) / GET_GRADIENTS_BLOCK_SIZE_BINARY; - if (ova_class_id_ == -1) { - if (cuda_label_weights_ == nullptr) { - if (cuda_weights_ == nullptr) { - GetGradientsKernel_BinaryLogloss<<>>(GetGradientsKernel_BinaryLogloss_ARGS); - } else { - GetGradientsKernel_BinaryLogloss<<>>(GetGradientsKernel_BinaryLogloss_ARGS); - } + if (cuda_label_weights_ == nullptr) { + if (cuda_weights_ == nullptr) { + GetGradientsKernel_BinaryLogloss<<>>(GetGradientsKernel_BinaryLogloss_ARGS); } else { - if (cuda_weights_ == nullptr) { - GetGradientsKernel_BinaryLogloss<<>>(GetGradientsKernel_BinaryLogloss_ARGS); - } else { - GetGradientsKernel_BinaryLogloss<<>>(GetGradientsKernel_BinaryLogloss_ARGS); - } + GetGradientsKernel_BinaryLogloss<<>>(GetGradientsKernel_BinaryLogloss_ARGS); } } else { - if (cuda_label_weights_ == nullptr) { - if (cuda_weights_ == nullptr) { - GetGradientsKernel_BinaryLogloss<<>>(GetGradientsKernel_BinaryLogloss_ARGS); - } else { - GetGradientsKernel_BinaryLogloss<<>>(GetGradientsKernel_BinaryLogloss_ARGS); - } + if (cuda_weights_ == nullptr) { + GetGradientsKernel_BinaryLogloss<<>>(GetGradientsKernel_BinaryLogloss_ARGS); } else { - if (cuda_weights_ == nullptr) { - GetGradientsKernel_BinaryLogloss<<>>(GetGradientsKernel_BinaryLogloss_ARGS); - } else { - GetGradientsKernel_BinaryLogloss<<>>(GetGradientsKernel_BinaryLogloss_ARGS); - } + GetGradientsKernel_BinaryLogloss<<>>(GetGradientsKernel_BinaryLogloss_ARGS); } } } diff --git a/src/objective/cuda/cuda_multiclass_objective.cpp b/src/objective/cuda/cuda_multiclass_objective.cpp index 168e2a0b8807..3d65c5458fe1 100644 --- a/src/objective/cuda/cuda_multiclass_objective.cpp +++ b/src/objective/cuda/cuda_multiclass_objective.cpp @@ -36,6 +36,37 @@ void CUDAMulticlassSoftmax::ConvertOutputCUDA(const data_size_t num_data, const } +CUDAMulticlassOVA::CUDAMulticlassOVA(const Config& config): MulticlassOVA(config) { + for (int i = 0; i < num_class_; ++i) { + cuda_binary_loss_.emplace_back(new CUDABinaryLogloss(config, i)); + } +} + +CUDAMulticlassOVA::CUDAMulticlassOVA(const std::vector& strs): MulticlassOVA(strs) {} + +CUDAMulticlassOVA::~CUDAMulticlassOVA() {} + +void CUDAMulticlassOVA::Init(const Metadata& metadata, data_size_t num_data) { + MulticlassOVA::Init(metadata, num_data); + for (int i = 0; i < num_class_; ++i) { + cuda_binary_loss_[i]->Init(metadata, num_data); + } +} + +void CUDAMulticlassOVA::GetGradients(const double* score, score_t* gradients, score_t* hessians) const { + for (int i = 0; i < num_class_; ++i) { + int64_t offset = static_cast(num_data_) * i; + cuda_binary_loss_[i]->GetGradients(score + offset, gradients + offset, hessians + offset); + } +} + +void CUDAMulticlassOVA::ConvertOutputCUDA(const data_size_t num_data, const double* input, double* output) const { + for (int i = 0; i < num_class_; ++i) { + cuda_binary_loss_[i]->ConvertOutputCUDA(num_data, input + i * num_data, output + i * num_data); + } +} + + } // namespace LightGBM #endif // USE_CUDA_EXP diff --git a/src/objective/cuda/cuda_multiclass_objective.hpp b/src/objective/cuda/cuda_multiclass_objective.hpp index 8c5265f315c5..1e0aa3c8f05e 100644 --- a/src/objective/cuda/cuda_multiclass_objective.hpp +++ b/src/objective/cuda/cuda_multiclass_objective.hpp @@ -9,9 +9,12 @@ #include +#include #include #include +#include "cuda_binary_objective.hpp" + #include "../multiclass_objective.hpp" #define GET_GRADIENTS_BLOCK_SIZE_MULTICLASS (1024) @@ -54,6 +57,35 @@ class CUDAMulticlassSoftmax: public CUDAObjectiveInterface, public MulticlassSof }; +class CUDAMulticlassOVA: public CUDAObjectiveInterface, public MulticlassOVA { + public: + explicit CUDAMulticlassOVA(const Config& config); + + explicit CUDAMulticlassOVA(const std::vector& strs); + + void Init(const Metadata& metadata, data_size_t num_data) override; + + void GetGradients(const double* score, score_t* gradients, score_t* hessians) const override; + + void ConvertOutputCUDA(const data_size_t num_data, const double* input, double* output) const override; + + double BoostFromScore(int class_id) const override { + return cuda_binary_loss_[class_id]->BoostFromScore(0); + } + + bool ClassNeedTrain(int class_id) const override { + return cuda_binary_loss_[class_id]->ClassNeedTrain(0); + } + + ~CUDAMulticlassOVA(); + + bool IsCUDAObjective() const override { return true; } + + private: + std::vector> cuda_binary_loss_; +}; + + } // namespace LightGBM #endif // USE_CUDA_EXP diff --git a/src/objective/multiclass_objective.hpp b/src/objective/multiclass_objective.hpp index 1848680631ac..1d9c8da17556 100644 --- a/src/objective/multiclass_objective.hpp +++ b/src/objective/multiclass_objective.hpp @@ -266,7 +266,7 @@ class MulticlassOVA: public ObjectiveFunction { return binary_loss_[class_id]->ClassNeedTrain(0); } - private: + protected: /*! \brief Number of data */ data_size_t num_data_; /*! \brief Number of classes */ diff --git a/src/objective/objective_function.cpp b/src/objective/objective_function.cpp index 360a978ba5b5..d13711775ccf 100644 --- a/src/objective/objective_function.cpp +++ b/src/objective/objective_function.cpp @@ -43,8 +43,7 @@ ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string& } else if (type == std::string("multiclass")) { return new CUDAMulticlassSoftmax(config); } else if (type == std::string("multiclassova")) { - Log::Warning("Objective multiclassova is not implemented in cuda_exp version. Fall back to boosting on CPU."); - return new MulticlassOVA(config); + return new CUDAMulticlassOVA(config); } else if (type == std::string("cross_entropy")) { Log::Warning("Objective cross_entropy is not implemented in cuda_exp version. Fall back to boosting on CPU."); return new CrossEntropy(config);