Skip to content

Commit

Permalink
Merge pull request #1420 from ShiningZhang/dev-fp16
Browse files Browse the repository at this point in the history
support datatype of fp16
  • Loading branch information
HexToString authored Oct 20, 2021
2 parents 9baf65e + a3324ba commit 4699f0b
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 40 deletions.
6 changes: 3 additions & 3 deletions core/general-client/src/client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@ using configure::GeneralModelConfig;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Tensor;
// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8
// will support: FLOAT16
// support: FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16
enum ProtoDataType {
P_INT64 = 0,
P_FLOAT32,
Expand Down Expand Up @@ -431,7 +430,8 @@ int PredictorOutputs::ParseProto(const Response& res,
output.tensor(idx).int_data().begin(),
output.tensor(idx).int_data().begin() + size);
} else if (fetch_name_to_type[name] == P_UINT8
|| fetch_name_to_type[name] == P_INT8) {
|| fetch_name_to_type[name] == P_INT8
|| fetch_name_to_type[name] == P_FP16) {
VLOG(2) << "fetch var [" << name << "]type="
<< fetch_name_to_type[name];
string_data_map[name] = output.tensor(idx).tensor_content();
Expand Down
3 changes: 1 addition & 2 deletions core/general-client/src/general_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Tensor;
// paddle inference support: FLOAT32, INT64, INT32, UINT8, INT8
// will support: FLOAT16
// support: FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16
enum ProtoDataType {
P_INT64 = 0,
P_FLOAT32,
Expand Down
13 changes: 6 additions & 7 deletions core/general-server/op/general_reader_op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,7 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8
// will support: FLOAT16
// support: FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16
enum ProtoDataType {
P_INT64 = 0,
P_FLOAT32,
Expand Down Expand Up @@ -130,11 +129,11 @@ int GeneralReaderOp::inference() {
data_len = tensor.tensor_content().size();
src_ptr = tensor.tensor_content().data();
} else if (elem_type == P_FP16) {
// paddle inference will support FLOAT16
// elem_size = 1;
// paddleTensor.dtype = paddle::PaddleDType::FLOAT16;
// data_len = tensor.tensor_content().size();
// src_ptr = tensor.tensor_content().data();
// copy bytes from tensor content to TensorVector
elem_size = 1;
paddleTensor.dtype = paddle::PaddleDType::FLOAT16;
data_len = tensor.tensor_content().size();
src_ptr = tensor.tensor_content().data();
} else if (elem_type == P_STRING) {
// use paddle::PaddleDType::UINT8 as for String.
elem_size = sizeof(char);
Expand Down
14 changes: 6 additions & 8 deletions core/general-server/op/general_response_op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,14 +178,12 @@ int GeneralResponseOp::inference() {
VLOG(2) << "(logid=" << log_id << ")Prepare int8 var ["
<< model_config->_fetch_name[idx] << "].";
tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
}
// inference will support fp16
// else if (dtype == paddle::PaddleDType::FLOAT16) {
// tensor->set_elem_type(5);
// VLOG(2) << "(logid=" << log_id << ")Prepare float16 var ["
// << model_config->_fetch_name[idx] << "].";
// tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
// }
} else if (dtype == paddle::PaddleDType::FLOAT16) {
tensor->set_elem_type(5);
VLOG(2) << "(logid=" << log_id << ")Prepare float16 var ["
<< model_config->_fetch_name[idx] << "].";
tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
}

VLOG(2) << "(logid=" << log_id << ") fetch var ["
<< model_config->_fetch_name[idx] << "] ready";
Expand Down
37 changes: 17 additions & 20 deletions core/predictor/framework/infer.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "core/predictor/framework/infer_data.h"
#include "core/predictor/framework/memory.h"
#include "paddle_inference_api.h" // NOLINT
#include "experimental/float16.h"
namespace baidu {
namespace paddle_serving {
namespace predictor {
Expand Down Expand Up @@ -541,19 +542,17 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> {
paddle::PaddleDType::INT8) {
int8_t* data = static_cast<int8_t*>(origin_data);
lod_tensor_in->CopyFromCpu(data);
} else if ((*tensorVector_in_pointer)[i].dtype ==
paddle::PaddleDType::FLOAT16) {
paddle::platform::float16* data =
static_cast<paddle::platform::float16*>(origin_data);
lod_tensor_in->CopyFromCpu(data);
} else {
LOG(ERROR) << "Inference not support type["
<< (*tensorVector_in_pointer)[i].dtype << "],name["
<< (*tensorVector_in_pointer)[i].name << "]"
<< " copy into core failed!";
}
// Paddle inference will support FP16 in next version.
// else if ((*tensorVector_in_pointer)[i].dtype ==
// paddle::PaddleDType::FLOAT16) {
// paddle::platform::float16* data =
// static_cast<paddle::platform::float16*>(origin_data);
// lod_tensor_in->CopyFromCpu(data);
// }
VLOG(2) << "Tensor:name=" << (*tensorVector_in_pointer)[i].name
<< ";in_dtype=" << (*tensorVector_in_pointer)[i].dtype
<< ";tensor_dtype=" << lod_tensor_in->type();
Expand Down Expand Up @@ -641,20 +640,18 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> {
int8_t* data_out = reinterpret_cast<int8_t*>(databuf_data);
lod_tensor_out->CopyToCpu(data_out);
databuf_char = reinterpret_cast<char*>(data_out);
} else if (dataType == paddle::PaddleDType::FLOAT16) {
databuf_size = out_num * sizeof(paddle::platform::float16);
databuf_data = MempoolWrapper::instance().malloc(databuf_size);
if (!databuf_data) {
LOG(ERROR) << "Malloc failed, size: " << databuf_size;
return -1;
}
paddle::platform::float16* data_out =
reinterpret_cast<paddle::platform::float16*>(databuf_data);
lod_tensor_out->CopyToCpu(data_out);
databuf_char = reinterpret_cast<char*>(data_out);
}
// Inference will support FP16 in next version
// else if (dataType == paddle::PaddleDType::FLOAT16) {
// using float16 = paddle::platform::float16;
// databuf_size = out_num * sizeof(float16);
// databuf_data = MempoolWrapper::instance().malloc(databuf_size);
// if (!databuf_data) {
// LOG(ERROR) << "Malloc failed, size: " << databuf_size;
// return -1;
// }
// float16* data_out = reinterpret_cast<float16*>(databuf_data);
// lod_tensor_out->CopyToCpu(data_out);
// databuf_char = reinterpret_cast<char*>(data_out);
// }

// Because task scheduling requires OPs to use 'Channel'
// (which is a data structure) to transfer data between OPs.
Expand Down
16 changes: 16 additions & 0 deletions python/paddle_serving_client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,22 @@ def predict(self,
tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
elif self.fetch_names_to_type_[name] == float16_type:
# result_map[name] will be py::array(numpy array)
tmp_str = result_batch_handle.get_string_by_name(
mi, name)
result_map[name] = np.fromstring(tmp_str, dtype = np.float16)
if result_map[name].size == 0:
raise ValueError(
"Failed to fetch, maybe the type of [{}]"
" is wrong, please check the model file".format(
name))
shape = result_batch_handle.get_shape(mi, name)
result_map[name].shape = shape
if name in self.lod_tensor_set:
tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
multi_result_map.append(result_map)
ret = None
if len(model_engine_names) == 1:
Expand Down

0 comments on commit 4699f0b

Please sign in to comment.