Skip to content

Commit

Permalink
Merge pull request PaddlePaddle#45 from jiweibo/lite_engine
Browse files Browse the repository at this point in the history
fluid-lite subgraph support content-dnn
  • Loading branch information
Shixiaowei02 authored Dec 26, 2019
2 parents 3dfb181 + 20fa772 commit 1a7715d
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -221,9 +221,9 @@ void LiteSubgraphPass::SetUpEngine(

bool use_gpu = Get<bool>("use_gpu");
bool enable_int8 = Get<bool>("enable_int8");
lite_api::TargetType target_type = use_gpu ? TARGET(kCUDA) : TARGET(kHost);
lite_api::TargetType target_type = use_gpu ? TARGET(kCUDA) : TARGET(kX86);
paddle::lite_api::PrecisionType precision_type =
enable_int8 ? PRECISION(kInt8) : PRECISION(kFloat);
enable_int8 ? PRECISION(kInt8) : PRECISION(kInt64);
std::set<std::string> param_names_set(repetitive_params.begin(),
repetitive_params.end());
const_cast<std::vector<std::string>&>(repetitive_params)
Expand All @@ -232,6 +232,7 @@ void LiteSubgraphPass::SetUpEngine(
config.model = program->Proto()->SerializeAsString();
config.valid_places = {
paddle::lite::Place({target_type, precision_type}),
paddle::lite::Place({target_type, PRECISION(kFloat)}),
paddle::lite::Place({TARGET(kHost), PRECISION(kFloat)}),
};
if (dump_model) {
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/inference/api/paddle_analysis_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ struct AnalysisConfig {
enum class Precision {
kFloat32 = 0,
kInt8,
kInt64,
kHalf,
};

Expand Down
8 changes: 8 additions & 0 deletions paddle/fluid/inference/lite/engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#ifdef PADDLE_WITH_CUDA
#define LITE_WITH_CUDA 1
#endif

#include "paddle/fluid/inference/lite/engine.h"
#include "lite/core/context.h"
#include "lite/core/device_info.h"

namespace paddle {
namespace inference {
Expand All @@ -34,7 +40,9 @@ paddle::lite::Predictor* EngineManager::Get(const std::string& name) const {
paddle::lite::Predictor* EngineManager::Create(const std::string& name,
const EngineConfig& cfg) {
auto* p = new paddle::lite::Predictor();
#ifdef PADDLE_WITH_CUDA
paddle::lite::Env<TARGET(kCUDA)>::Init();
#endif
p->Build("", cfg.model, cfg.param, cfg.valid_places, cfg.neglected_passes,
cfg.model_type, cfg.model_from_memory);
engines_[name].reset(p);
Expand Down
16 changes: 16 additions & 0 deletions paddle/fluid/inference/lite/op_teller.cc
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,22 @@ struct SimpleOpTeller : public Teller {
ops_.insert("fusion_elementwise_max_activation");
ops_.insert("fusion_elementwise_div_activation");
ops_.insert("pad2d");
ops_.insert("sequence_reverse");
ops_.insert("lookup_table");
ops_.insert("search_seq_arithmetic");
ops_.insert("search_grnn");
ops_.insert("sequence_pool");
ops_.insert("search_group_padding");
ops_.insert("search_seq_fc");
ops_.insert("search_aligned_mat_mul");
ops_.insert("search_attention_padding_mask");
ops_.insert("search_seq_softmax");
ops_.insert("search_seq_depadding");
ops_.insert("match_matrix_tensor");
ops_.insert("var_conv_2d");
ops_.insert("sequence_concat");
ops_.insert("sequence_topk_avg_pooling");
ops_.insert("search_fc");
}

bool operator()(const std::string& op_type,
Expand Down
6 changes: 6 additions & 0 deletions paddle/fluid/inference/lite/tensor_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ using paddle::lite_api::DataLayoutType;
template <typename DstLoD, typename SrcLoD>
void SetLoD(DstLoD* dst, const SrcLoD& src) {
dst->reserve(src.size());
dst->clear();
for (auto&& v : src) {
dst->emplace_back(v);
}
Expand All @@ -41,6 +42,7 @@ template void SetLoD<framework::LoD, paddle::lite::LoD>(
platform::Place GetNativePlace(const TargetType& type, int id = 0) {
switch (type) {
case TargetType::kHost:
case TargetType::kX86:
return platform::CPUPlace();
case TargetType::kCUDA:
return platform::CUDAPlace(id);
Expand All @@ -65,6 +67,8 @@ PrecisionType GetLitePrecisionType(framework::proto::VarType::Type type) {
return PrecisionType::kInt8;
case framework::proto::VarType_Type_INT32:
return PrecisionType::kInt32;
case framework::proto::VarType_Type_INT64:
return PrecisionType::kInt64;
default:
LOG(FATAL) << "Error precision type.";
return PrecisionType::kUnk;
Expand All @@ -80,6 +84,8 @@ framework::proto::VarType::Type GetNativePrecisionType(
return framework::proto::VarType_Type_INT8;
case PrecisionType::kInt32:
return framework::proto::VarType_Type_INT32;
case PrecisionType::kInt64:
return framework::proto::VarType_Type_INT64;
default:
LOG(FATAL) << "Error precision type.";
return static_cast<framework::proto::VarType::Type>(-1);
Expand Down
10 changes: 10 additions & 0 deletions paddle/fluid/operators/lite/lite_engine_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ class LiteEngineOp : public framework::OperatorBase {
inference::analysis::GetFromScope<framework::LoDTensor>(scope,
in_names_[i]);
paddle::lite::Tensor *dst_t = engine_->GetInput(i);
VLOG(3) << "fluid -> lite: " << in_names_[i];
inference::lite::utils::TensorCopyAsync(dst_t, src_t, *ctx);
}
#ifdef PADDLE_WITH_CUDA
Expand All @@ -85,14 +86,23 @@ class LiteEngineOp : public framework::OperatorBase {
static_cast<const platform::CUDADeviceContext *>(ctx)->stream());
}
#endif
VLOG(3) << "lite engine run";
engine_->Run();
VLOG(3) << "lite engine run done";
for (size_t i = 0; i < out_names_.size(); i++) {
const paddle::lite::Tensor &src_t = *(engine_->GetOutput(i));
framework::LoDTensor *dst_t =
&inference::analysis::GetFromScope<framework::LoDTensor>(
scope, out_names_[i]);
VLOG(3) << "lite -> fluid: " << out_names_[i];
inference::lite::utils::TensorCopyAsync(dst_t, src_t, *ctx);
}
#ifdef PADDLE_WITH_CUDA
if (platform::is_gpu_place(dev_place)) {
platform::GpuStreamSync(
static_cast<const platform::CUDADeviceContext *>(ctx)->stream());
}
#endif
}
};

Expand Down

0 comments on commit 1a7715d

Please sign in to comment.