Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

【Hackathon 6th Fundable Projects 3 No.162】 [fluid_ops] fused_multi_transformer_int8 #67523

Merged
merged 1 commit into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
660 changes: 0 additions & 660 deletions paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cu

This file was deleted.

853 changes: 0 additions & 853 deletions paddle/fluid/operators/fused/fused_multi_transformer_op.cu

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/phi/core/compat/op_utils.h"

namespace phi {

KernelSignature FusedMultiTransformerInt8OpArgumentMapping(
const ArgumentMappingContext& ctx) {
paddle::small_vector<const char*> inputs{
"X", "LnScale", "LnBias", "QKVW",
"QKVBias", "CacheKV", "TimeStep", "SrcMask",
"OutLinearW", "OutLinearBias", "FFNLnScale", "FFNLnBias",
"FFN1Weight", "FFN1Bias", "FFN2Weight", "FFN2Bias",
"QKVOutScale", "OutLinearOutScale", "FFN1OutScale", "FFN2OutScale"};
paddle::small_vector<const char*> attrs;
attrs.emplace_back("pre_layer_norm");
attrs.emplace_back("epsilon");
attrs.emplace_back("dropout_rate");
attrs.emplace_back("is_test");
attrs.emplace_back("dropout_implementation");
attrs.emplace_back("act_method");
attrs.emplace_back("trans_qkvw");
attrs.emplace_back("ring_id");
attrs.emplace_back("num_head");
attrs.emplace_back("dim_head");
attrs.emplace_back("dim_ffn");
attrs.emplace_back("qkv_in_scale");
attrs.emplace_back("out_linear_in_scale");
attrs.emplace_back("ffn1_in_scale");
attrs.emplace_back("ffn2_in_scale");
attrs.emplace_back("quant_round_type");
attrs.emplace_back("quant_max_bound");
attrs.emplace_back("quant_min_bound");
paddle::small_vector<const char*> outputs{"CacheKVOut", "Out"};
return KernelSignature("fused_multi_transformer_int8",
std::move(inputs),
std::move(attrs),
std::move(outputs));
}

} // namespace phi

PD_REGISTER_ARG_MAPPING_FN(fused_multi_transformer_int8,
phi::FusedMultiTransformerInt8OpArgumentMapping);
1 change: 1 addition & 0 deletions paddle/fluid/pir/dialect/op_generator/ops_api_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@
'fused_elementwise_mul',
'fused_elementwise_sub',
'fused_embedding_fc_lstm',
'fused_multi_transformer_int8',
'fusion_group',
'fusion_lstm',
'fusion_seqpool_cvm_concat',
Expand Down
142 changes: 142 additions & 0 deletions paddle/phi/infermeta/fusion.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2140,6 +2140,148 @@ void FusedMultiTransformerInt8XpuInferMeta(
out->set_layout(x.layout());
}

void FusedMultiTransformerInt8InferMeta(
const MetaTensor& x,
const std::vector<const MetaTensor*>& ln_scale,
const std::vector<const MetaTensor*>& ln_bias,
const std::vector<const MetaTensor*>& qkv_w,
const paddle::optional<std::vector<const MetaTensor*>>& qkv_bias,
const paddle::optional<std::vector<const MetaTensor*>>& cache_kv,
const MetaTensor& time_step,
const MetaTensor& src_mask,
const std::vector<const MetaTensor*>& out_linear_w,
const paddle::optional<std::vector<const MetaTensor*>>& out_linear_bias,
const std::vector<const MetaTensor*>& ffn_ln_scale,
const std::vector<const MetaTensor*>& ffn_ln_bias,
const std::vector<const MetaTensor*>& ffn1_weight,
const paddle::optional<std::vector<const MetaTensor*>>& ffn1_bias,
const std::vector<const MetaTensor*>& ffn2_weight,
const paddle::optional<std::vector<const MetaTensor*>>& ffn2_bias,
const paddle::optional<std::vector<const MetaTensor*>>& qkv_out_scale,
const paddle::optional<std::vector<const MetaTensor*>>&
out_linear_out_scale,
const paddle::optional<std::vector<const MetaTensor*>>& ffn1_out_scale,
const paddle::optional<std::vector<const MetaTensor*>>& ffn2_out_scale,
bool pre_layer_norm,
float epsilon,
float dropout_rate,
bool is_test,
const std::string& dropout_implementation,
const std::string& act_method,
bool trans_qkvw,
int ring_id,
int num_head,
int dim_head,
int dim_ffn,
const std::vector<float>& qkv_in_scale,
const std::vector<float>& out_linear_in_scale,
const std::vector<float>& ffn1_in_scale,
const std::vector<float>& ffn2_in_scale,
int quant_round_type,
float quant_max_bound,
float quant_min_bound,
std::vector<MetaTensor*> cache_kv_out,
MetaTensor* out) {
// x: qkv's input [batch_size, seq_len, dim_embed]
// y: qkv's weight: [3, num_head, dim_head, dim_embed]
const auto& x_dim = x.dims();
const auto& y_dim = qkv_w[0]->dims();
PADDLE_ENFORCE_EQ(
x_dim.size(),
3,
common::errors::InvalidArgument("The dimensions of x must be 3"
"(batch_size, seq_len, dim_embed),"
"but received dimensions of"
"Input is [%d]",
x_dim.size()));
PADDLE_ENFORCE_EQ(
y_dim.size(),
4,
common::errors::InvalidArgument("The dimensions of qkv_weight must be 4"
"(3, num_head, dim_head, dim_embed),"
"but received dimensions of"
"Input is [%d]",
y_dim.size()));
PADDLE_ENFORCE_EQ(
x_dim[2],
trans_qkvw ? y_dim[3] : y_dim[0],
common::errors::InvalidArgument(
"ShapeError: the dimension of x_dim[2] and y_dim[3](trans_qkvw is "
"true) or y_dim[0](trans_qkvw is false)"
"must be equal. But received: the shape "
"of input x = [%s], and the shape of "
"input qkv_weight = [%s]",
x_dim,
y_dim));

if (ring_id == -1) {
if (trans_qkvw) {
PADDLE_ENFORCE_EQ(y_dim[1] * y_dim[2],
y_dim[3],
common::errors::InvalidArgument(
"The dimensions of qkv_weight must be 4"
"(3, num_head, dim_head, dim_embed),"
"and must satisfy the limitations: "
"(num_head * dim_head == dim_embed)"));

} else {
PADDLE_ENFORCE_EQ(y_dim[2] * y_dim[3],
y_dim[0],
common::errors::InvalidArgument(
"The dimensions of qkv_weight must be 4"
"(dim_embed, 3, num_head, dim_head),"
"and must satisfy the limitations: "
"(num_head * dim_head == dim_embed)"));
}
}

if (cache_kv && cache_kv.get().size() > 0) {
// [2, batch_size, num_head, max_seq_len, head_size]
const auto& c_dim = cache_kv.get()[0]->dims();

PADDLE_ENFORCE_EQ(
c_dim.size(),
5,
common::errors::InvalidArgument(
"The CacheKV must be 5 dims, but got %d", c_dim.size()));
PADDLE_ENFORCE_EQ(c_dim[0],
2,
common::errors::InvalidArgument(
"The first dim of CacheKV must be 2, but got %d",
c_dim[0])); // 2
PADDLE_ENFORCE_EQ(c_dim[1],
x_dim[0],
common::errors::InvalidArgument(
"The second dim of CacheKV must be equal with "
"batch size %d, but got %d",
x_dim[0],
c_dim[1])); // batch_size
PADDLE_ENFORCE_EQ(c_dim[2],
trans_qkvw ? y_dim[1] : y_dim[2],
common::errors::InvalidArgument(
"The third dim of CacheKV must be equal with num "
"head %d, but got %d",
trans_qkvw ? y_dim[1] : y_dim[2],
c_dim[2])); // num_head
PADDLE_ENFORCE_GT(
c_dim[3],
0,
common::errors::InvalidArgument(
"The forth dim of CacheKV must be greater than 0, but got %d",
c_dim[3])); // cache_seq_len
PADDLE_ENFORCE_EQ(c_dim[4],
trans_qkvw ? y_dim[2] : y_dim[3],
common::errors::InvalidArgument(
"The fifth dim of CacheKV must be equal with head "
"size %d, but got %d",
trans_qkvw ? y_dim[2] : y_dim[3],
c_dim[4])); // head_size
}

out->set_dims(x.dims());
out->set_dtype(x.dtype());
}

void YoloBoxXPUInferMeta(const MetaTensor& x,
const MetaTensor& x_max,
const MetaTensor& grid,
Expand Down
43 changes: 43 additions & 0 deletions paddle/phi/infermeta/fusion.h
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,49 @@ void FusedMultiTransformerInt8XpuInferMeta(
MetaTensor* out,
std::vector<MetaTensor*> cache_kv_out);

void FusedMultiTransformerInt8InferMeta(
const MetaTensor& x,
const std::vector<const MetaTensor*>& ln_scale,
const std::vector<const MetaTensor*>& ln_bias,
const std::vector<const MetaTensor*>& qkv_w,
const paddle::optional<std::vector<const MetaTensor*>>& qkv_bias,
const paddle::optional<std::vector<const MetaTensor*>>& cache_kv,
const MetaTensor& time_step,
const MetaTensor& src_mask,
const std::vector<const MetaTensor*>& out_linear_w,
const paddle::optional<std::vector<const MetaTensor*>>& out_linear_bias,
const std::vector<const MetaTensor*>& ffn_ln_scale,
const std::vector<const MetaTensor*>& ffn_ln_bias,
const std::vector<const MetaTensor*>& ffn1_weight,
const paddle::optional<std::vector<const MetaTensor*>>& ffn1_bias,
const std::vector<const MetaTensor*>& ffn2_weight,
const paddle::optional<std::vector<const MetaTensor*>>& ffn2_bias,
const paddle::optional<std::vector<const MetaTensor*>>& qkv_out_scale,
const paddle::optional<std::vector<const MetaTensor*>>&
out_linear_out_scale,
const paddle::optional<std::vector<const MetaTensor*>>& ffn1_out_scale,
const paddle::optional<std::vector<const MetaTensor*>>& ffn2_out_scale,
bool pre_layer_norm,
float epsilon,
float dropout_rate,
bool is_test,
const std::string& dropout_implementation,
const std::string& act_method,
bool trans_qkvw,
int ring_id,
int num_head,
int dim_head,
int dim_ffn,
const std::vector<float>& qkv_in_scale,
const std::vector<float>& out_linear_in_scale,
const std::vector<float>& ffn1_in_scale,
const std::vector<float>& ffn2_in_scale,
int quant_round_type,
float quant_max_bound,
float quant_min_bound,
std::vector<MetaTensor*> cache_kv_out,
MetaTensor* out);

void YoloBoxXPUInferMeta(const MetaTensor& x,
const MetaTensor& x_max,
const MetaTensor& grid,
Expand Down
2 changes: 2 additions & 0 deletions paddle/phi/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,8 @@ if(WITH_ROCM)
"fusion/gpu/fused_bn_add_activation_kernel.cu"
"fusion/gpu/fused_feedforward_kernel.cu"
"fusion/gpu/fused_feedforward_grad_kernel.cu"
"fusion/gpu/fused_multi_transformer_int8_kernel.cu"
"fusion/gpu/fused_multi_transformer_kernel.cu"
"fusion/gpu/fusion_transpose_flatten_concat_kernel.cu")
list(
REMOVE_ITEM
Expand Down
Loading