Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Workgroups for Python #116

Merged
merged 1 commit into from
Jan 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 53 additions & 13 deletions python/src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,24 @@ PYBIND11_MODULE(kp, m) {
.value("storage", kp::Tensor::TensorTypes::eStorage, "Tensor with host visible gpu memory.")
.export_values();





py::class_<kp::Tensor, std::shared_ptr<kp::Tensor>>(m, "Tensor", DOC(kp, Tensor))
.def(py::init(
[](const std::vector<float>& data) {
return std::unique_ptr<kp::Tensor>(new kp::Tensor(data));
}), DOC(kp, Tensor, Tensor, 2))
.def(py::init(
[](const std::vector<float>& data, kp::Tensor::TensorTypes tensorTypes) {
return std::unique_ptr<kp::Tensor>(new kp::Tensor(data, tensorTypes));
}), "Initialiser with list of data components and tensor GPU memory type.")
[np](const py::array_t<float> data, kp::Tensor::TensorTypes tensor_type) {
axsaucedo marked this conversation as resolved.
Show resolved Hide resolved
const py::array_t<float> flatdata = np.attr("ravel")(data);
const py::buffer_info info = flatdata.request();
const float* ptr = (float*) info.ptr;
return std::unique_ptr<kp::Tensor>(
new kp::Tensor(std::vector<float>(ptr, ptr+flatdata.size()), tensor_type)
);
}),
"Construct Tensor with an array as initial data and an optional kp.TensorType (default:device).",
py::arg("data"),
py::arg("tensor_type") = kp::Tensor::TensorTypes::eDevice
)
.def("data", &kp::Tensor::data, DOC(kp, Tensor, data))
.def("numpy", [](kp::Tensor& self) {
return py::array(self.data().size(), self.data().data());
Expand Down Expand Up @@ -82,19 +91,27 @@ PYBIND11_MODULE(kp, m) {
.def("map_data_from_host", &kp::Tensor::mapDataFromHostMemory, "Maps data into GPU memory from tensor local data.")
.def("map_data_into_host", &kp::Tensor::mapDataIntoHostMemory, "Maps data from GPU memory into tensor local data.");





py::class_<kp::Sequence, std::shared_ptr<kp::Sequence>>(m, "Sequence")
.def("init", &kp::Sequence::init, "Initialises Vulkan resources within sequence using provided device.")

// record
.def("begin", &kp::Sequence::begin, "Clears previous commands and starts recording commands in sequence which can be run in batch.")
.def("end", &kp::Sequence::end, "Stops listening and recording for new commands.")

// eval
.def("eval", &kp::Sequence::eval, "Executes the currently recorded commands synchronously by waiting on Vulkan Fence.")
.def("eval_async", &kp::Sequence::evalAsync, "Executes the currently recorded commands asynchronously.")
.def("eval_await", &kp::Sequence::evalAwait, "Waits until the execution finishes using Vulkan Fence.")

// status
.def("is_running", &kp::Sequence::isRunning, "Checks whether the Sequence operations are currently still executing.")
.def("is_rec", &kp::Sequence::isRecording, "Checks whether the Sequence is currently in recording mode.")
.def("is_init", &kp::Sequence::isInit, "Checks if the Sequence has been initialized")

// record
.def("record_tensor_create", &kp::Sequence::record<kp::OpTensorCreate>,
"Records operation to create and initialise tensor GPU memory and buffer")
Expand All @@ -106,23 +123,41 @@ PYBIND11_MODULE(kp, m) {
"Records operation to sync tensor(s) from GPU memory to local memory using staging tensors")
.def("record_algo_mult", &kp::Sequence::record<kp::OpMult>,
"Records operation to run multiplication compute shader to two input tensors and an output tensor")
.def("record_algo_file", &kp::Sequence::record<kp::OpAlgoBase, std::string>,
"Records an operation using a custom shader provided from a shader path")
.def("record_algo_file", [](kp::Sequence &self,
std::vector<std::shared_ptr<kp::Tensor>> tensors,
const std::string& file_path,
std::tuple<uint32_t,uint32_t,uint32_t> work_group) -> bool {
axsaucedo marked this conversation as resolved.
Show resolved Hide resolved
const kp::OpAlgoBase::KomputeWorkgroup wgroup{
std::get<0>(work_group), std::get<1>(work_group), std::get<2>(work_group),
};
return self.record<kp::OpAlgoBase>(tensors, file_path, wgroup);
},
"Records an operation using a custom shader provided from a shader path",
py::arg("tensors"), py::arg("file_path"), py::arg("work_group") = std::make_tuple(0,0,0) )
.def("record_algo_data", [](kp::Sequence &self,
std::vector<std::shared_ptr<kp::Tensor>> tensors,
py::bytes &bytes) -> float {
py::bytes &bytes,
std::tuple<uint32_t,uint32_t,uint32_t> work_group) -> bool {
axsaucedo marked this conversation as resolved.
Show resolved Hide resolved
// Bytes have to be converted into std::vector
py::buffer_info info(py::buffer(bytes).request());
const char *data = reinterpret_cast<const char *>(info.ptr);
size_t length = static_cast<size_t>(info.size);
const kp::OpAlgoBase::KomputeWorkgroup wgroup{
std::get<0>(work_group), std::get<1>(work_group), std::get<2>(work_group),
};
return self.record<kp::OpAlgoBase>(
tensors,
std::vector<char>(data, data + length));
tensors, std::vector<char>(data, data + length), wgroup
);
},
"Records an operation using a custom shader provided as raw string or spirv bytes")
"Records an operation using a custom shader provided as spirv bytes",
py::arg("tensors"), py::arg("bytes"), py::arg("work_group") = std::make_tuple(0,0,0) )
.def("record_algo_lro", &kp::Sequence::record<kp::OpAlgoLhsRhsOut>,
"Records operation to run left right out operation with custom shader");





py::class_<kp::Manager>(m, "Manager")
.def(py::init(), "Default initializer uses device 0 and first compute compatible GPU queueFamily")
.def(py::init(
Expand All @@ -139,12 +174,14 @@ PYBIND11_MODULE(kp, m) {
.def("build_tensor", &kp::Manager::buildTensor,
py::arg("data"), py::arg("tensorType") = kp::Tensor::TensorTypes::eDevice,
"Build and initialise tensor")

// Await functions
.def("eval_await", &kp::Manager::evalOpAwait,
py::arg("sequenceName"), py::arg("waitFor") = UINT64_MAX,
"Awaits for asynchronous operation on a named Sequence")
.def("eval_await_def", &kp::Manager::evalOpAwaitDefault,
py::arg("waitFor") = UINT64_MAX, "Awaits for asynchronous operation on the last anonymous Sequence created")

// eval default
.def("eval_tensor_create_def", &kp::Manager::evalOpDefault<kp::OpTensorCreate>,
"Evaluates operation to create and initialise tensor GPU memory and buffer with new anonymous Sequence")
Expand Down Expand Up @@ -181,6 +218,7 @@ PYBIND11_MODULE(kp, m) {
"Evaluates an operation using a custom shader provided as spirv bytes with new anonymous Sequence")
.def("eval_algo_lro_def", &kp::Manager::evalOpDefault<kp::OpAlgoLhsRhsOut>,
"Evaluates operation to run left right out operation with custom shader with new anonymous Sequence")

// eval
.def("eval_tensor_create", &kp::Manager::evalOp<kp::OpTensorCreate>,
"Evaluates operation to create and initialise tensor GPU memory and buffer with explicitly named Sequence")
Expand Down Expand Up @@ -220,6 +258,7 @@ PYBIND11_MODULE(kp, m) {
"Evaluates an operation using a custom shader provided as spirv bytes with explicitly named Sequence")
.def("eval_algo_lro", &kp::Manager::evalOp<kp::OpAlgoLhsRhsOut>,
"Evaluates operation to run left right out operation with custom shader with explicitly named Sequence")

// eval async default
.def("eval_async_tensor_create_def", &kp::Manager::evalOpAsyncDefault<kp::OpTensorCreate>,
"Evaluates asynchronously operation to create and initialise tensor GPU memory and buffer with anonymous Sequence")
Expand Down Expand Up @@ -256,6 +295,7 @@ PYBIND11_MODULE(kp, m) {
"Evaluates asynchronously an operation using a custom shader provided as raw string or spirv bytes with anonymous Sequence")
.def("eval_async_algo_lro_def", &kp::Manager::evalOpAsyncDefault<kp::OpAlgoLhsRhsOut>,
"Evaluates asynchronously operation to run left right out operation with custom shader with anonymous Sequence")

// eval async
.def("eval_async_tensor_create", &kp::Manager::evalOpAsync<kp::OpTensorCreate>,
"Evaluates asynchronously operation to create and initialise tensor GPU memory and buffer with explicitly named Sequence")
Expand Down
37 changes: 37 additions & 0 deletions python/test/test_kompute.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,3 +110,40 @@ def test_sequence():

assert tensor_out.data() == [2.0, 4.0, 6.0]
assert np.all(tensor_out.numpy() == [2.0, 4.0, 6.0])



def test_workgroup():
mgr = kp.Manager(0)

tensor_a = kp.Tensor(np.zeros([16,8]))
tensor_b = kp.Tensor(np.zeros([16,8]))
mgr.eval_tensor_create_def([tensor_a, tensor_b])

shader_src = """
#version 450

layout (local_size_x = 1) in;

// The input tensors bind index is relative to index in parameter passed
layout(set = 0, binding = 0) writeonly buffer bout { float toutx[]; };
layout(set = 0, binding = 1) writeonly buffer bout2 { float touty[]; };

void main() {
uint index = gl_WorkGroupID.x*gl_NumWorkGroups.y + gl_WorkGroupID.y;

toutx[index] = gl_GlobalInvocationID.x;
touty[index] = gl_GlobalInvocationID.y;
}
"""
shader_src = bytes(shader_src, encoding='utf8')

seq = mgr.create_sequence()
seq.begin()
seq.record_algo_data([tensor_a, tensor_b], shader_src, (16,8,1))
axsaucedo marked this conversation as resolved.
Show resolved Hide resolved
seq.end()
seq.eval()

mgr.eval_tensor_sync_local_def([tensor_a, tensor_b])
assert np.all(tensor_a.numpy() == np.stack([np.arange(16)]*8, axis=1).ravel())
assert np.all(tensor_b.numpy() == np.stack([np.arange(8)]*16, axis=0).ravel())
2 changes: 1 addition & 1 deletion src/OpAlgoBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ OpAlgoBase::OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
// If at least the x value is provided we use mainly the parameters
// provided
this->mKomputeWorkgroup = {
0,
komputeWorkgroup.x,
axsaucedo marked this conversation as resolved.
Show resolved Hide resolved
komputeWorkgroup.y > 0 ? komputeWorkgroup.y : 1,
komputeWorkgroup.z > 0 ? komputeWorkgroup.z : 1
};
Expand Down