Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
zhupengyang committed Apr 26, 2023
1 parent 55a8a33 commit c819b61
Show file tree
Hide file tree
Showing 7 changed files with 52 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -280,8 +280,8 @@ int FusedMultiTransformerXPUQuantPass::ApplyImpl(ir::Graph* graph,
with_time_step,
with_seq_lengths,
with_src_mask);
bool quant_weight_only =
Has("quant_weight_only") && Get<bool>("quant_weight_only");
int quant_weight_bits =
Has("quant_weight_bits") ? Get<int>("quant_weight_bits") : -1;

int found_subgraph_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Expand Down Expand Up @@ -334,7 +334,7 @@ int FusedMultiTransformerXPUQuantPass::ApplyImpl(ir::Graph* graph,
w_node,
nullptr,
platform::errors::Fatal("w node should not be nullptr"));
if (quant_weight_only) {
if (quant_weight_bits == 8) {
PrepareWeight<int8_t>(
graph, scope, block, w_node, &w_intx, &w_max, need_transpose);
} else {
Expand Down
9 changes: 6 additions & 3 deletions paddle/fluid/inference/analysis/argument.h
Original file line number Diff line number Diff line change
Expand Up @@ -289,9 +289,12 @@ struct Argument {
DECL_ARGUMENT_FIELD(xpu_adaptive_seqlen, XpuAdaptiveSeqlen, bool);
DECL_ARGUMENT_FIELD(xpu_device_id, XpuDeviceId, int);
DECL_ARGUMENT_FIELD(xpu_enable_multi_stream, XpuEnableMultiStream, bool);
DECL_ARGUMENT_FIELD(xpu_enable_quant_weight_only,
XpuEnableQuantWeightOnly,
bool);
DECL_ARGUMENT_FIELD(xpu_quant_post_dynamic_weight_bits,
XpuQuantPostDynamicWeightBits,
int);
DECL_ARGUMENT_FIELD(xpu_quant_post_dynamic_op_types,
XpuQuantPostDynamicOpTypss,
std::vector<std::string>);

DECL_ARGUMENT_FIELD(use_opencl, UseOpenCL, bool);

Expand Down
9 changes: 7 additions & 2 deletions paddle/fluid/inference/analysis/ir_pass_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,13 @@ void IRPassManager::CreatePasses(Argument *argument,
bool use_fc_padding = !fc_mkldnn_pass && argument->use_fc_padding();
pass->Set("use_fc_padding", new bool(use_fc_padding));
} else if (pass_name == "fused_multi_transformer_xpu_quant_pass") {
pass->Set("quant_weight_only",
new bool(argument->xpu_enable_quant_weight_only()));
auto op_types = argument->xpu_quant_post_dynamic_op_types();
if (std::count(op_types.begin(),
op_types.end(),
"fused_multi_transformer") > 0) {
pass->Set("quant_weight_bits",
new int(argument->xpu_quant_post_dynamic_weight_bits()));
}
}
pre_pass = pass_name;

Expand Down
24 changes: 18 additions & 6 deletions paddle/fluid/inference/api/analysis_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,11 @@ void AnalysisConfig::SetXpuDeviceId(int device_id) {
Update();
}

void AnalysisConfig::SetXpuConfig(bool xpu_enable_quant_weight_only) {
xpu_enable_quant_weight_only_ = xpu_enable_quant_weight_only;
void AnalysisConfig::SetXpuConfig(
int quant_post_dynamic_weight_bits,
const std::vector<std::string> &quant_post_dynamic_op_types) {
xpu_quant_post_dynamic_weight_bits_ = quant_post_dynamic_weight_bits;
xpu_quant_post_dynamic_op_types_ = quant_post_dynamic_op_types;
Update();
}

Expand Down Expand Up @@ -494,7 +497,8 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(xpu_precision_);
CP_MEMBER(xpu_adaptive_seqlen_);
CP_MEMBER(xpu_enable_multi_stream_);
CP_MEMBER(xpu_enable_quant_weight_only_);
CP_MEMBER(xpu_quant_post_dynamic_weight_bits_);
CP_MEMBER(xpu_quant_post_dynamic_op_types_);

// Lite OpenCL Related
CP_MEMBER(use_opencl_);
Expand Down Expand Up @@ -1097,7 +1101,10 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss << xpu_precision_;
ss << xpu_adaptive_seqlen_;
ss << xpu_enable_multi_stream_;
ss << xpu_enable_quant_weight_only_;
ss << xpu_quant_post_dynamic_weight_bits_;
for (auto op_type : xpu_quant_post_dynamic_op_types_) {
ss << op_type;
}

ss << use_npu_;
ss << npu_device_id_;
Expand Down Expand Up @@ -1338,8 +1345,13 @@ std::string AnalysisConfig::Summary() {
os.InsertRow({"xpu_device_id", std::to_string(xpu_device_id_)});
os.InsertRow(
{"xpu_l3_workspace_size", std::to_string(xpu_l3_workspace_size_)});
os.InsertRow({"xpu_enable_quant_weight_only",
std::to_string(xpu_enable_quant_weight_only_)});
os.InsertRow({"xpu_quant_post_dynamic_weight_bits",
std::to_string(xpu_quant_post_dynamic_weight_bits_)});
std::vector<std::string> op_types{"xpu_quant_post_dynamic_op_types"};
for (auto op_type : xpu_quant_post_dynamic_op_types_) {
op_types.push_back(op_type);
}
os.InsertRow(op_types);
}
os.InsetDivider();

Expand Down
5 changes: 4 additions & 1 deletion paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1426,7 +1426,10 @@ void AnalysisPredictor::PrepareArgument() {
argument_->SetXpuAdaptiveSeqlen(config_.xpu_adaptive_seqlen_);
argument_->SetXpuDeviceId(config_.xpu_device_id_);
argument_->SetXpuEnableMultiStream(config_.xpu_enable_multi_stream_);
argument_->SetXpuEnableQuantWeightOnly(config_.xpu_enable_quant_weight_only_);
argument_->SetXpuQuantPostDynamicWeightBits(
config_.xpu_quant_post_dynamic_weight_bits_);
argument_->SetXpuQuantPostDynamicOpTypss(
config_.xpu_quant_post_dynamic_op_types_);
#endif

auto *pass_builder = config_.pass_builder();
Expand Down
13 changes: 9 additions & 4 deletions paddle/fluid/inference/api/paddle_analysis_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -291,10 +291,14 @@ struct PD_INFER_DECL AnalysisConfig {
///
/// \brief configs of XPU
///
/// \param xpu_enable_quant_weight_only Whether to enable weight only optimize
/// on fused_multi_transformer.
/// \param quant_post_dynamic_weight_bits Weight bits used in dynamic post
/// quantization. Optional value: -1, 8, 16. Default value is -1, means using
/// the recommended way. \param quant_post_dynamic_op_types Ops used in
/// dynamic post quantization.
///
void SetXpuConfig(bool xpu_enable_quant_weight_only = false);
void SetXpuConfig(
int quant_post_dynamic_weight_bits = -1,
const std::vector<std::string>& quant_post_dynamic_op_types = {});

///
/// \brief configs of IPU
Expand Down Expand Up @@ -1189,7 +1193,8 @@ struct PD_INFER_DECL AnalysisConfig {
std::string xpu_precision_;
bool xpu_adaptive_seqlen_;
bool xpu_enable_multi_stream_;
bool xpu_enable_quant_weight_only_{false};
int xpu_quant_post_dynamic_weight_bits_{-1};
std::vector<std::string> xpu_quant_post_dynamic_op_types_;

// LITE OPENCL SETTINGS
bool use_opencl_{false};
Expand Down
8 changes: 5 additions & 3 deletions paddle/fluid/pybind/inference_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -767,9 +767,11 @@ void BindAnalysisConfig(py::module *m) {
.def("set_xpu_device_id",
&AnalysisConfig::SetXpuDeviceId,
py::arg("device_id") = 0)
.def("set_xpu_config",
&AnalysisConfig::SetXpuConfig,
py::arg("xpu_enable_quant_weight_only") = false)
.def(
"set_xpu_config",
&AnalysisConfig::SetXpuConfig,
py::arg("quant_post_dynamic_weight_bits") = -1,
py::arg("quant_post_dynamic_op_types") = std::vector<std::string>({}))
.def("enable_custom_device",
&AnalysisConfig::EnableCustomDevice,
py::arg("device_type"),
Expand Down

0 comments on commit c819b61

Please sign in to comment.