Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL] Caching device_info in device_ext to restore TG performance #8301

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions ggml/src/ggml-sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ bool ggml_backend_is_sycl(ggml_backend_t backend);
int ggml_backend_sycl_get_device(ggml_backend_t backend);
static bool ggml_backend_buffer_is_sycl_split(ggml_backend_buffer_t buffer);
static inline int get_sycl_env(const char *env_name, int default_val);
static inline int get_work_group_size(const sycl::device& device);

void dev2dev_memcpy(sycl::queue &q_dst, sycl::queue &q_src, void *ptr_dst,
const void *ptr_src, size_t size) {
Expand Down Expand Up @@ -1914,7 +1913,7 @@ static void soft_max_f32_sycl(const float * x, const float * mask,
const int nrows_y, const float scale, const float max_bias,
queue_ptr stream) {
int nth = WARP_SIZE;
int max_block_size = get_work_group_size(stream->get_device());
int max_block_size = dpct::dev_mgr::instance().get_work_group_size(stream->get_device());
while (nth < ncols_x && nth < max_block_size) nth *= 2;
if (nth>max_block_size) nth = max_block_size;

Expand Down
9 changes: 0 additions & 9 deletions ggml/src/ggml-sycl/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,15 +295,6 @@ struct ggml_backend_sycl_context {
}
};

// common host functions

static inline int get_work_group_size(const sycl::device& device) {
dpct::device_info prop;
dpct::get_device_info(prop, device);
return prop.get_max_work_group_size();
}


// common device functions

static __dpct_inline__ float warp_reduce_sum(float x,
Expand Down
25 changes: 19 additions & 6 deletions ggml/src/ggml-sycl/dpct/helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -672,13 +672,16 @@ namespace dpct
}

void get_device_info(device_info &out) const {
dpct::get_device_info(out, *this);
out = this->get_device_info();
}

device_info get_device_info() const {
device_info prop;
dpct::get_device_info(prop, *this);
return prop;
const device_info& get_device_info() const {
std::lock_guard<std::mutex> lock(m_mutex);
if (!_dev_info) {
_dev_info = device_info{};
dpct::get_device_info(*_dev_info, *this);
}
return *_dev_info;
}

void reset() {
Expand Down Expand Up @@ -801,6 +804,7 @@ namespace dpct
sycl::queue _saved_queue;
std::vector<sycl::queue> _queues;
mutable mutex_type m_mutex;
mutable std::optional<device_info> _dev_info;
};


Expand Down Expand Up @@ -852,7 +856,7 @@ namespace dpct
}
unsigned int device_count() { return _devs.size(); }

unsigned int get_device_id(const sycl::device &dev)
unsigned int get_device_id(const sycl::device &dev) const
{
unsigned int id = 0;
for (auto dev_item : _devs)
Expand Down Expand Up @@ -882,6 +886,15 @@ namespace dpct
static dev_mgr d_m;
return d_m;
}

int get_work_group_size(unsigned int id) const {
return get_device(id).get_max_work_group_size();
}

int get_work_group_size(const sycl::device &dev) const {
return get_work_group_size(get_device_id(dev));
}

dev_mgr(const dev_mgr &) = delete;
dev_mgr &operator=(const dev_mgr &) = delete;
dev_mgr(dev_mgr &&) = delete;
Expand Down
6 changes: 3 additions & 3 deletions ggml/src/ggml-sycl/norm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ static void norm_f32_sycl(const float* x, float* dst, const int ncols,
});
}
else {
const int work_group_size = get_work_group_size(stream->get_device());
const int work_group_size = dpct::dev_mgr::instance().get_work_group_size(stream->get_device());
const sycl::range<3> block_dims(1, 1, work_group_size);
/*
DPCT1049:17: The work-group size passed to the SYCL kernel may exceed
Expand Down Expand Up @@ -240,7 +240,7 @@ static void group_norm_f32_sycl(const float* x, float* dst,
});
}
else {
const int work_group_size = get_work_group_size(stream->get_device());
const int work_group_size = dpct::dev_mgr::instance().get_work_group_size(stream->get_device());
const sycl::range<3> block_dims(1, 1, work_group_size);
/*
DPCT1049:18: The work-group size passed to the SYCL kernel may exceed
Expand Down Expand Up @@ -286,7 +286,7 @@ static void rms_norm_f32_sycl(const float* x, float* dst, const int ncols,
});
}
else {
const int work_group_size = get_work_group_size(stream->get_device());
const int work_group_size = dpct::dev_mgr::instance().get_work_group_size(stream->get_device());
const sycl::range<3> block_dims(1, 1, work_group_size);
/*
DPCT1049:19: The work-group size passed to the SYCL kernel may exceed
Expand Down
Loading