Skip to content

Commit

Permalink
use CUDAVector for cuda tree and column data
Browse files Browse the repository at this point in the history
  • Loading branch information
shiyu1994 committed Oct 25, 2024
1 parent 6db879a commit b43f88b
Show file tree
Hide file tree
Showing 7 changed files with 228 additions and 448 deletions.
68 changes: 37 additions & 31 deletions include/LightGBM/cuda/cuda_column_data.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,11 @@ class CUDAColumnData {
const std::vector<uint8_t>& feature_mfb_is_na,
const std::vector<int>& feature_to_column);

const void* GetColumnData(const int column_index) const { return data_by_column_[column_index]; }
const void* GetColumnData(const int column_index) const { return data_by_column_[column_index]->RawData(); }

void CopySubrow(const CUDAColumnData* full_set, const data_size_t* used_indices, const data_size_t num_used_indices);

void* const* cuda_data_by_column() const { return cuda_data_by_column_; }

void* const* data_by_column() const { return data_by_column_.data(); }
void* const* cuda_data_by_column() const { return cuda_data_by_column_.RawData(); }

uint32_t feature_min_bin(const int feature_index) const { return feature_min_bin_[feature_index]; }

Expand All @@ -64,42 +62,50 @@ class CUDAColumnData {

uint8_t feature_mfb_is_na(const int feature_index) const { return feature_mfb_is_na_[feature_index]; }

const uint32_t* cuda_feature_min_bin() const { return cuda_feature_min_bin_; }
const uint32_t* cuda_feature_min_bin() const { return cuda_feature_min_bin_.RawData(); }

const uint32_t* cuda_feature_max_bin() const { return cuda_feature_max_bin_; }
const uint32_t* cuda_feature_max_bin() const { return cuda_feature_max_bin_.RawData(); }

const uint32_t* cuda_feature_offset() const { return cuda_feature_offset_; }
const uint32_t* cuda_feature_offset() const { return cuda_feature_offset_.RawData(); }

const uint32_t* cuda_feature_most_freq_bin() const { return cuda_feature_most_freq_bin_; }
const uint32_t* cuda_feature_most_freq_bin() const { return cuda_feature_most_freq_bin_.RawData(); }

const uint32_t* cuda_feature_default_bin() const { return cuda_feature_default_bin_; }
const uint32_t* cuda_feature_default_bin() const { return cuda_feature_default_bin_.RawData(); }

const uint8_t* cuda_feature_missing_is_zero() const { return cuda_feature_missing_is_zero_; }
const uint8_t* cuda_feature_missing_is_zero() const { return cuda_feature_missing_is_zero_.RawData(); }

const uint8_t* cuda_feature_missing_is_na() const { return cuda_feature_missing_is_na_; }
const uint8_t* cuda_feature_missing_is_na() const { return cuda_feature_missing_is_na_.RawData(); }

const uint8_t* cuda_feature_mfb_is_zero() const { return cuda_feature_mfb_is_zero_; }
const uint8_t* cuda_feature_mfb_is_zero() const { return cuda_feature_mfb_is_zero_.RawData(); }

const uint8_t* cuda_feature_mfb_is_na() const { return cuda_feature_mfb_is_na_; }
const uint8_t* cuda_feature_mfb_is_na() const { return cuda_feature_mfb_is_na_.RawData(); }

const int* cuda_feature_to_column() const { return cuda_feature_to_column_; }
const int* cuda_feature_to_column() const { return cuda_feature_to_column_.RawData(); }

const uint8_t* cuda_column_bit_type() const { return cuda_column_bit_type_; }
const uint8_t* cuda_column_bit_type() const { return cuda_column_bit_type_.RawData(); }

int feature_to_column(const int feature_index) const { return feature_to_column_[feature_index]; }

uint8_t column_bit_type(const int column_index) const { return column_bit_type_[column_index]; }

private:
template <bool IS_SPARSE, bool IS_4BIT, typename BIN_TYPE>
void InitOneColumnData(const void* in_column_data, BinIterator* bin_iterator, void** out_column_data_pointer);
void InitOneColumnData(const void* in_column_data, BinIterator* bin_iterator, CUDAVector<void>* out_column_data_pointer);

void LaunchCopySubrowKernel(void* const* in_cuda_data_by_column);

void InitColumnMetaInfo();

void ResizeWhenCopySubrow(const data_size_t num_used_indices);

std::vector<void*> GetDataByColumnPointers(const std::vector<std::unique_ptr<CUDAVector<void>>>& data_by_column) const {
std::vector<void*> data_by_column_pointers(data_by_column.size(), nullptr);
for (size_t i = 0; i < data_by_column.size(); ++i) {
data_by_column_pointers[i] = data_by_column[i].get();
}
return data_by_column_pointers;
}

int gpu_device_id_;
int num_threads_;
data_size_t num_data_;
Expand All @@ -114,24 +120,24 @@ class CUDAColumnData {
std::vector<uint8_t> feature_missing_is_na_;
std::vector<uint8_t> feature_mfb_is_zero_;
std::vector<uint8_t> feature_mfb_is_na_;
void** cuda_data_by_column_;
CUDAVector<void*> cuda_data_by_column_;
std::vector<int> feature_to_column_;
std::vector<void*> data_by_column_;

uint8_t* cuda_column_bit_type_;
uint32_t* cuda_feature_min_bin_;
uint32_t* cuda_feature_max_bin_;
uint32_t* cuda_feature_offset_;
uint32_t* cuda_feature_most_freq_bin_;
uint32_t* cuda_feature_default_bin_;
uint8_t* cuda_feature_missing_is_zero_;
uint8_t* cuda_feature_missing_is_na_;
uint8_t* cuda_feature_mfb_is_zero_;
uint8_t* cuda_feature_mfb_is_na_;
int* cuda_feature_to_column_;
std::vector<std::unique_ptr<CUDAVector<void>>> data_by_column_;

CUDAVector<uint8_t> cuda_column_bit_type_;
CUDAVector<uint32_t> cuda_feature_min_bin_;
CUDAVector<uint32_t> cuda_feature_max_bin_;
CUDAVector<uint32_t> cuda_feature_offset_;
CUDAVector<uint32_t> cuda_feature_most_freq_bin_;
CUDAVector<uint32_t> cuda_feature_default_bin_;
CUDAVector<uint8_t> cuda_feature_missing_is_zero_;
CUDAVector<uint8_t> cuda_feature_missing_is_na_;
CUDAVector<uint8_t> cuda_feature_mfb_is_zero_;
CUDAVector<uint8_t> cuda_feature_mfb_is_na_;
CUDAVector<int> cuda_feature_to_column_;

// used when bagging with subset
data_size_t* cuda_used_indices_;
CUDAVector<data_size_t> cuda_used_indices_;
data_size_t num_used_indices_;
data_size_t cur_subset_buffer_size_;
};
Expand Down
52 changes: 26 additions & 26 deletions include/LightGBM/cuda/cuda_tree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,25 +79,25 @@ class CUDATree : public Tree {

inline void AsConstantTree(double val, int count) override;

const int* cuda_leaf_parent() const { return cuda_leaf_parent_; }
const int* cuda_leaf_parent() const { return cuda_leaf_parent_.RawData(); }

const int* cuda_left_child() const { return cuda_left_child_; }
const int* cuda_left_child() const { return cuda_left_child_.RawData(); }

const int* cuda_right_child() const { return cuda_right_child_; }
const int* cuda_right_child() const { return cuda_right_child_.RawData(); }

const int* cuda_split_feature_inner() const { return cuda_split_feature_inner_; }
const int* cuda_split_feature_inner() const { return cuda_split_feature_inner_.RawData(); }

const int* cuda_split_feature() const { return cuda_split_feature_; }
const int* cuda_split_feature() const { return cuda_split_feature_.RawData(); }

const uint32_t* cuda_threshold_in_bin() const { return cuda_threshold_in_bin_; }
const uint32_t* cuda_threshold_in_bin() const { return cuda_threshold_in_bin_.RawData(); }

const double* cuda_threshold() const { return cuda_threshold_; }
const double* cuda_threshold() const { return cuda_threshold_.RawData(); }

const int8_t* cuda_decision_type() const { return cuda_decision_type_; }
const int8_t* cuda_decision_type() const { return cuda_decision_type_.RawData(); }

const double* cuda_leaf_value() const { return cuda_leaf_value_; }
const double* cuda_leaf_value() const { return cuda_leaf_value_.RawData(); }

double* cuda_leaf_value_ref() { return cuda_leaf_value_; }
double* cuda_leaf_value_ref() { return cuda_leaf_value_.RawData(); }

inline void Shrinkage(double rate) override;

Expand Down Expand Up @@ -140,22 +140,22 @@ class CUDATree : public Tree {
const int right_leaf_index,
const int real_feature_index);

int* cuda_left_child_;
int* cuda_right_child_;
int* cuda_split_feature_inner_;
int* cuda_split_feature_;
int* cuda_leaf_depth_;
int* cuda_leaf_parent_;
uint32_t* cuda_threshold_in_bin_;
double* cuda_threshold_;
double* cuda_internal_weight_;
double* cuda_internal_value_;
int8_t* cuda_decision_type_;
double* cuda_leaf_value_;
data_size_t* cuda_leaf_count_;
double* cuda_leaf_weight_;
data_size_t* cuda_internal_count_;
float* cuda_split_gain_;
CUDAVector<int> cuda_left_child_;
CUDAVector<int> cuda_right_child_;
CUDAVector<int> cuda_split_feature_inner_;
CUDAVector<int> cuda_split_feature_;
CUDAVector<int> cuda_leaf_depth_;
CUDAVector<int> cuda_leaf_parent_;
CUDAVector<uint32_t> cuda_threshold_in_bin_;
CUDAVector<double> cuda_threshold_;
CUDAVector<double> cuda_internal_weight_;
CUDAVector<double> cuda_internal_value_;
CUDAVector<int8_t> cuda_decision_type_;
CUDAVector<double> cuda_leaf_value_;
CUDAVector<data_size_t> cuda_leaf_count_;
CUDAVector<double> cuda_leaf_weight_;
CUDAVector<data_size_t> cuda_internal_count_;
CUDAVector<float> cuda_split_gain_;
CUDAVector<uint32_t> cuda_bitset_;
CUDAVector<uint32_t> cuda_bitset_inner_;
CUDAVector<int> cuda_cat_boundaries_;
Expand Down
10 changes: 5 additions & 5 deletions src/boosting/gbdt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,12 +104,12 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
boosting_on_gpu_ = objective_function_ != nullptr && objective_function_->IsCUDAObjective() &&
!data_sample_strategy_->IsHessianChange(); // for sample strategy with Hessian change, fall back to boosting on CPU

tree_learner_ = std::unique_ptr<TreeLearner>(TreeLearner::CreateTreeLearner(config_->tree_learner, config_->device_type,
config_.get(), boosting_on_gpu_));
tree_learner_ = nullptr; // std::unique_ptr<TreeLearner>(TreeLearner::CreateTreeLearner(config_->tree_learner, config_->device_type,
// config_.get(), boosting_on_gpu_));

// init tree learner
tree_learner_->Init(train_data_, is_constant_hessian_);
tree_learner_->SetForcedSplit(&forced_splits_json_);
// tree_learner_->Init(train_data_, is_constant_hessian_);
// tree_learner_->SetForcedSplit(&forced_splits_json_);

// push training metrics
training_metrics_.clear();
Expand Down Expand Up @@ -227,7 +227,7 @@ void GBDT::Boosting() {
if (config_->bagging_by_query) {
data_sample_strategy_->Bagging(iter_, tree_learner_.get(), gradients_.data(), hessians_.data());
objective_function_->
GetGradients(GetTrainingScore(&num_score), data_sample_strategy_->num_sampled_queries(), data_sample_strategy_->sampled_query_indices(), gradients_pointer_, hessians_pointer_);
GetGradientsWithSampledQueries(GetTrainingScore(&num_score), data_sample_strategy_->num_sampled_queries(), data_sample_strategy_->sampled_query_indices(), gradients_pointer_, hessians_pointer_);
} else {
objective_function_->
GetGradients(GetTrainingScore(&num_score), gradients_pointer_, hessians_pointer_);
Expand Down
Loading

0 comments on commit b43f88b

Please sign in to comment.