Skip to content

Commit

Permalink
add_share_external_data_interface
Browse files Browse the repository at this point in the history
  • Loading branch information
JZZ-NOTE committed Feb 23, 2022
1 parent 2820241 commit ea660ee
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 1 deletion.
66 changes: 66 additions & 0 deletions paddle/fluid/inference/api/analysis_predictor_tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.

#include "paddle/fluid/inference/api/analysis_predictor.h"
#include <cuda_runtime.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <thread> // NOLINT
Expand Down Expand Up @@ -405,4 +406,69 @@ TEST(Predictor, Run) {
predictor->TryShrinkMemory();
}

TEST(Tensor, CpuShareExternalData) {
Config config;
config.SetModel(FLAGS_dirname);

auto predictor = CreatePredictor(config);

auto w0 = predictor->GetInputHandle("firstw");
auto w1 = predictor->GetInputHandle("secondw");
auto w2 = predictor->GetInputHandle("thirdw");
auto w3 = predictor->GetInputHandle("forthw");

std::vector<std::vector<int64_t>> input_data(4, {0, 1, 2, 3});
w0->ShareExternalData<int64_t>(input_data[0].data(), {4, 1}, PlaceType::kCPU);
w1->ShareExternalData<int64_t>(input_data[1].data(), {4, 1}, PlaceType::kCPU);
w2->ShareExternalData<int64_t>(input_data[2].data(), {4, 1}, PlaceType::kCPU);
w3->ShareExternalData<int64_t>(input_data[3].data(), {4, 1}, PlaceType::kCPU);

predictor->Run();

auto out = predictor->GetOutputHandle("fc_1.tmp_2");
PlaceType place;
int size = 0;
out->data<float>(&place, &size);
LOG(INFO) << "output size: " << size / sizeof(float);
predictor->TryShrinkMemory();
}

#if defined(PADDLE_WITH_CUDA)
TEST(Tensor, GpuShareExternalData) {
Config config;
config.SetModel(FLAGS_dirname);
config.EnableUseGpu(100, 0);

auto predictor = CreatePredictor(config);

auto w0 = predictor->GetInputHandle("firstw");
auto w1 = predictor->GetInputHandle("secondw");
auto w2 = predictor->GetInputHandle("thirdw");
auto w3 = predictor->GetInputHandle("forthw");

std::vector<std::vector<int64_t>> input_data(4, {0, 1, 2, 3});
std::vector<int64_t*> input_gpu(4, nullptr);

for (size_t i = 0; i < 4; ++i) {
cudaMalloc(reinterpret_cast<void**>(&input_gpu[i]), 4 * sizeof(int64_t));
cudaMemcpy(input_gpu[i], input_data[i].data(), 4 * sizeof(int64_t),
cudaMemcpyHostToDevice);
}

w0->ShareExternalData<int64_t>(input_gpu[0], {4, 1}, PlaceType::kGPU);
w1->ShareExternalData<int64_t>(input_gpu[1], {4, 1}, PlaceType::kGPU);
w2->ShareExternalData<int64_t>(input_gpu[2], {4, 1}, PlaceType::kGPU);
w3->ShareExternalData<int64_t>(input_gpu[3], {4, 1}, PlaceType::kGPU);

predictor->Run();

auto out = predictor->GetOutputHandle("fc_1.tmp_2");
PlaceType place;
int size = 0;
out->data<float>(&place, &size);
LOG(INFO) << "output size: " << size / sizeof(float);
predictor->TryShrinkMemory();
}
#endif

} // namespace paddle_infer
5 changes: 4 additions & 1 deletion paddle/fluid/inference/api/details/zero_copy_tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -262,12 +262,15 @@ void Tensor::ShareExternalData(const T *data, const std::vector<int> &shape,
paddle::platform::CPUPlace()),
meta);
*tensor = std::move(dtensor);
} else {
} else if (place == PlaceType::kGPU) {
pten::DenseTensor dtensor(
std::make_shared<pten::Allocation>(
const_cast<T *>(data), size, paddle::platform::CUDAPlace(device_)),
meta);
*tensor = std::move(dtensor);
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"PlaceType must be PlaceType::kCPU or PlaceType::kGPU."));
}
}

Expand Down

0 comments on commit ea660ee

Please sign in to comment.