Skip to content

Commit

Permalink
[GpuGraph] fix kernel overflow (PaddlePaddle#138)
Browse files Browse the repository at this point in the history
* optimize mem in  uniq slot feature

* cherry-pick var slot_feature

* fix kernel overflow && add max feature num flag

Co-authored-by: huwei02 <[email protected]>
  • Loading branch information
Thunderbrook and huwei02 authored Oct 18, 2022
1 parent 4e8b290 commit dde1573
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 6 deletions.
5 changes: 3 additions & 2 deletions paddle/fluid/distributed/ps/table/common_graph_table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
DECLARE_bool(graph_load_in_parallel);
DECLARE_bool(graph_get_neighbor_id);
DECLARE_int32(gpugraph_storage_mode);
DECLARE_uint64(gpugraph_slot_feasign_max_num);

namespace paddle {
namespace distributed {
Expand Down Expand Up @@ -2010,8 +2011,8 @@ int GraphTable::parse_feature(int idx,
thread_local std::vector<paddle::string::str_ptr> fea_fields;
fea_fields.clear();
c = feature_separator_.at(0);
paddle::string::split_string_ptr(fields[1].ptr, fields[1].len, c, &fea_fields);

paddle::string::split_string_ptr(fields[1].ptr, fields[1].len, c, &fea_fields, FLAGS_gpugraph_slot_feasign_max_num);
std::string name = fields[0].to_string();
auto it = feat_id_map[idx].find(name);
if (it != feat_id_map[idx].end()) {
Expand Down
8 changes: 4 additions & 4 deletions paddle/fluid/framework/fleet/heter_ps/feature_value.cu
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,11 @@ __global__ void PullDedupCopy(const size_t N,
const int64_t* slot_lens,
uint64_t max_val_size,
const int* slot_dims,
const int hidden,
const size_t hidden,
const int* key2slot,
const uint32_t* restore_idx,
TAccess accessor) {
CUDA_KERNEL_LOOP(idx, N) {
CUDA_KERNEL_LOOP_TYPE(idx, N, size_t) {
int i = idx / hidden;
int off = idx % hidden;

Expand Down Expand Up @@ -158,7 +158,7 @@ __global__ void PushMergeCopyAtomic(const size_t N,
const uint32_t* d_restore_idx,
size_t grad_value_size,
TAccess accessor) {
CUDA_KERNEL_LOOP(idx, N) {
CUDA_KERNEL_LOOP_TYPE(idx, N, size_t) {
int i = idx / hidden;
int off = idx % hidden;
// filter 0 keys
Expand Down Expand Up @@ -224,7 +224,7 @@ __global__ void PushMergeCopy(const size_t N,
const uint32_t* d_sort_cnt,
size_t grad_value_size,
TAccess accessor) {
CUDA_KERNEL_LOOP(idx, N) {
CUDA_KERNEL_LOOP_TYPE(idx, N, size_t) {
int i = idx / hidden;
int off = idx % hidden;
// filter 0 keys
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/platform/flags.cc
Original file line number Diff line number Diff line change
Expand Up @@ -973,6 +973,10 @@ PADDLE_DEFINE_EXPORTED_uint64(
gpugraph_merge_grads_segment_size,
128,
"segment size with segment gradient merge, default 128");
PADDLE_DEFINE_EXPORTED_uint64(
gpugraph_slot_feasign_max_num,
5,
"max feasign number in one slot, default 5");
PADDLE_DEFINE_EXPORTED_int32(
gpugraph_dedup_pull_push_mode,
0,
Expand Down
36 changes: 36 additions & 0 deletions paddle/utils/string/string_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,42 @@ inline int split_string_ptr(const char* str,
return num;
}

inline int split_string_ptr(const char* str,
size_t len,
char delim,
std::vector<str_ptr>* values,
int max_num) {
if (len <= 0) {
return 0;
}

int num = 0;
const char* p = str;
const char* end = str + len;
const char* last = str;
while (p < end) {
if (*p != delim) {
++p;
continue;
}
values->emplace_back(last, (size_t)(p - last));
++num;
++p;
if (num >= max_num) {
return num;
}
// skip continue delim
while (*p == delim) {
++p;
}
last = p;
}
if (p > last) {
values->emplace_back(last, (size_t)(p - last));
++num;
}
return num;
}
// A helper class for reading lines from file. A line buffer is maintained. It
// doesn't need to know the maximum possible length of a line.

Expand Down

0 comments on commit dde1573

Please sign in to comment.