Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CUDA] Add L2 metric for new CUDA version #5633

Merged
merged 13 commits into from
Dec 27, 2022
2 changes: 2 additions & 0 deletions include/LightGBM/cuda/cuda_tree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ class CUDATree : public Tree {
const data_size_t* used_data_indices,
data_size_t num_data, double* score) const override;

inline void AsConstantTree(double val) override;

const int* cuda_leaf_parent() const { return cuda_leaf_parent_; }

const int* cuda_left_child() const { return cuda_left_child_; }
Expand Down
3 changes: 3 additions & 0 deletions include/LightGBM/cuda/cuda_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,9 @@ class CUDAVector {
}

void Resize(size_t size) {
if (size == size_) {
return;
}
if (size == 0) {
Clear();
}
Expand Down
5 changes: 4 additions & 1 deletion include/LightGBM/objective_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,12 @@ class ObjectiveFunction {
/*!
* \brief Convert output for CUDA version
*/
const double* ConvertOutputCUDA(data_size_t /*num_data*/, const double* input, double* /*output*/) const {
virtual const double* ConvertOutputCUDA(data_size_t /*num_data*/, const double* input, double* /*output*/) const {
return input;
}

virtual bool NeedConvertOutputCUDA () const { return false; }

#endif // USE_CUDA_EXP
};

Expand Down
2 changes: 1 addition & 1 deletion include/LightGBM/tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ class Tree {
shrinkage_ = 1.0f;
}

inline void AsConstantTree(double val) {
virtual inline void AsConstantTree(double val) {
num_leaves_ = 1;
shrinkage_ = 1.0f;
leaf_value_[0] = val;
Expand Down
4 changes: 4 additions & 0 deletions src/io/cuda/cuda_tree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,10 @@ void CUDATree::SyncLeafOutputFromCUDAToHost() {
CopyFromCUDADeviceToHost<double>(leaf_value_.data(), cuda_leaf_value_, leaf_value_.size(), __FILE__, __LINE__);
}

void CUDATree::AsConstantTree(double val) {
Tree::AsConstantTree(val);
CopyFromHostToCUDADevice<double>(cuda_leaf_value_, &val, 1, __FILE__, __LINE__);
}

} // namespace LightGBM

Expand Down
8 changes: 7 additions & 1 deletion src/metric/cuda/cuda_regression_metric.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,19 @@ void CUDARegressionMetricInterface<HOST_METRIC, CUDA_METRIC>::Init(const Metadat

template <typename HOST_METRIC, typename CUDA_METRIC>
std::vector<double> CUDARegressionMetricInterface<HOST_METRIC, CUDA_METRIC>::Eval(const double* score, const ObjectiveFunction* objective) const {
const double* score_convert = objective->ConvertOutputCUDA(this->num_data_, score, score_convert_buffer_.RawData());
const double* score_convert = score;
if (objective != nullptr && objective->NeedConvertOutputCUDA()) {
score_convert_buffer_.Resize(static_cast<size_t>(this->num_data_) * static_cast<size_t>(this->num_class_));
score_convert = objective->ConvertOutputCUDA(this->num_data_, score, score_convert_buffer_.RawData());
}
const double eval_score = LaunchEvalKernel(score_convert);
return std::vector<double>{eval_score};
}

CUDARMSEMetric::CUDARMSEMetric(const Config& config): CUDARegressionMetricInterface<RMSEMetric, CUDARMSEMetric>(config) {}

CUDAL2Metric::CUDAL2Metric(const Config& config): CUDARegressionMetricInterface<L2Metric, CUDAL2Metric>(config) {}

} // namespace LightGBM

#endif // USE_CUDA_EXP
13 changes: 10 additions & 3 deletions src/metric/cuda/cuda_regression_metric.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,22 @@ __global__ void EvalKernel(const data_size_t num_data, const label_t* labels, co
const data_size_t index = static_cast<data_size_t>(threadIdx.x + blockIdx.x * blockDim.x);
double point_metric = 0.0;
if (index < num_data) {
point_metric = CUDA_METRIC::MetricOnPointCUDA(labels[index], scores[index]);
point_metric = USE_WEIGHTS ?
CUDA_METRIC::MetricOnPointCUDA(labels[index], scores[index]) * weights[index] :
CUDA_METRIC::MetricOnPointCUDA(labels[index], scores[index]);
}
const double block_sum_point_metric = ShuffleReduceSum<double>(point_metric, shared_mem_buffer, NUM_DATA_PER_EVAL_THREAD);
reduce_block_buffer[blockIdx.x] = block_sum_point_metric;
if (threadIdx.x == 0) {
reduce_block_buffer[blockIdx.x] = block_sum_point_metric;
}
if (USE_WEIGHTS) {
double weight = 0.0;
if (index < num_data) {
weight = static_cast<double>(weights[index]);
const double block_sum_weight = ShuffleReduceSum<double>(weight, shared_mem_buffer, NUM_DATA_PER_EVAL_THREAD);
reduce_block_buffer[blockIdx.x + blockDim.x] = block_sum_weight;
if (threadIdx.x == 0) {
reduce_block_buffer[blockIdx.x + gridDim.x] = block_sum_weight;
}
}
}
}
Expand All @@ -55,6 +61,7 @@ double CUDARegressionMetricInterface<HOST_METRIC, CUDA_METRIC>::LaunchEvalKernel
}

template double CUDARegressionMetricInterface<RMSEMetric, CUDARMSEMetric>::LaunchEvalKernel(const double* score) const;
template double CUDARegressionMetricInterface<L2Metric, CUDAL2Metric>::LaunchEvalKernel(const double* score) const;

} // namespace LightGBM

Expand Down
20 changes: 16 additions & 4 deletions src/metric/cuda/cuda_regression_metric.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ namespace LightGBM {
template <typename HOST_METRIC, typename CUDA_METRIC>
class CUDARegressionMetricInterface: public CUDAMetricInterface<HOST_METRIC> {
public:
explicit CUDARegressionMetricInterface(const Config& config): CUDAMetricInterface<HOST_METRIC>(config) {}
explicit CUDARegressionMetricInterface(const Config& config): CUDAMetricInterface<HOST_METRIC>(config), num_class_(config.num_class) {}

virtual ~CUDARegressionMetricInterface() {}

Expand All @@ -34,9 +34,10 @@ class CUDARegressionMetricInterface: public CUDAMetricInterface<HOST_METRIC> {
protected:
double LaunchEvalKernel(const double* score_convert) const;

CUDAVector<double> score_convert_buffer_;
mutable CUDAVector<double> score_convert_buffer_;
CUDAVector<double> reduce_block_buffer_;
CUDAVector<double> reduce_block_buffer_inner_;
const int num_class_;
};

class CUDARMSEMetric: public CUDARegressionMetricInterface<RMSEMetric, CUDARMSEMetric> {
Expand All @@ -45,8 +46,19 @@ class CUDARMSEMetric: public CUDARegressionMetricInterface<RMSEMetric, CUDARMSEM

virtual ~CUDARMSEMetric() {}

__device__ static double MetricOnPointCUDA(label_t label, double score) {
return (score - static_cast<double>(label));
__device__ inline static double MetricOnPointCUDA(label_t label, double score) {
return (score - label) * (score - label);
}
};

class CUDAL2Metric : public CUDARegressionMetricInterface<L2Metric, CUDAL2Metric> {
public:
explicit CUDAL2Metric(const Config& config);

virtual ~CUDAL2Metric() {}

__device__ inline static double MetricOnPointCUDA(label_t label, double score) {
return (score - label) * (score - label);
}
};

Expand Down
3 changes: 1 addition & 2 deletions src/metric/metric.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@ Metric* Metric::CreateMetric(const std::string& type, const Config& config) {
#ifdef USE_CUDA_EXP
if (config.device_type == std::string("cuda_exp")) {
if (type == std::string("l2")) {
Log::Warning("Metric l2 is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
return new L2Metric(config);
return new CUDAL2Metric(config);
} else if (type == std::string("rmse")) {
return new CUDARMSEMetric(config);
} else if (type == std::string("l1")) {
Expand Down
8 changes: 6 additions & 2 deletions src/objective/cuda/cuda_regression_objective.cu
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,12 @@ __global__ void ConvertOutputCUDAKernel_Regression(const bool sqrt, const data_s

const double* CUDARegressionL2loss::LaunchConvertOutputCUDAKernel(const data_size_t num_data, const double* input, double* output) const {
const int num_blocks = (num_data + GET_GRADIENTS_BLOCK_SIZE_REGRESSION - 1) / GET_GRADIENTS_BLOCK_SIZE_REGRESSION;
ConvertOutputCUDAKernel_Regression<<<num_blocks, GET_GRADIENTS_BLOCK_SIZE_REGRESSION>>>(sqrt_, num_data, input, output);
return output;
if (sqrt_) {
ConvertOutputCUDAKernel_Regression<<<num_blocks, GET_GRADIENTS_BLOCK_SIZE_REGRESSION>>>(sqrt_, num_data, input, output);
return output;
} else {
return input;
}
}

template <bool USE_WEIGHT>
Expand Down
4 changes: 4 additions & 0 deletions src/objective/cuda/cuda_regression_objective.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ class CUDARegressionL2loss : public CUDARegressionObjectiveInterface<RegressionL
void LaunchGetGradientsKernel(const double* score, score_t* gradients, score_t* hessians) const override;

const double* LaunchConvertOutputCUDAKernel(const data_size_t num_data, const double* input, double* output) const override;

bool NeedConvertOutputCUDA() const override { return sqrt_; }
};


Expand Down Expand Up @@ -123,6 +125,8 @@ class CUDARegressionPoissonLoss : public CUDARegressionObjectiveInterface<Regres

const double* LaunchConvertOutputCUDAKernel(const data_size_t num_data, const double* input, double* output) const override;

bool NeedConvertOutputCUDA() const override { return true; }

double LaunchCalcInitScoreKernel(const int class_id) const override;

void LaunchCheckLabelKernel() const;
Expand Down
1 change: 0 additions & 1 deletion src/objective/regression_objective.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,6 @@ class RegressionFairLoss: public RegressionL2loss {
class RegressionPoissonLoss: public RegressionL2loss {
public:
explicit RegressionPoissonLoss(const Config& config): RegressionL2loss(config) {
Log::Warning("RegressionPoissonLoss is created again");
max_delta_step_ = static_cast<double>(config.poisson_max_delta_step);
if (sqrt_) {
Log::Warning("Cannot use sqrt transform in %s Regression, will auto disable it", GetName());
Expand Down