Skip to content

Commit

Permalink
Fix
Browse files Browse the repository at this point in the history
  • Loading branch information
co63oc committed Aug 19, 2024
1 parent 93b8aa6 commit e596800
Show file tree
Hide file tree
Showing 9 changed files with 963 additions and 660 deletions.
660 changes: 0 additions & 660 deletions paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cu

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/phi/core/compat/op_utils.h"

namespace phi {

KernelSignature FusedMultiTransformerInt8OpArgumentMapping(
const ArgumentMappingContext& ctx) {
paddle::small_vector<const char*> inputs{
"X", "LnScale", "LnBias", "QKVW",
"QKVBias", "CacheKV", "TimeStep", "SrcMask",
"OutLinearW", "OutLinearBias", "FFNLnScale", "FFNLnBias",
"FFN1Weight", "FFN1Bias", "FFN2Weight", "FFN2Bias",
"QKVOutScale", "OutLinearOutScale", "FFN1OutScale", "FFN2OutScale"};
paddle::small_vector<const char*> attrs;
attrs.emplace_back("pre_layer_norm");
attrs.emplace_back("epsilon");
attrs.emplace_back("dropout_rate");
attrs.emplace_back("is_test");
attrs.emplace_back("dropout_implementation");
attrs.emplace_back("act_method");
attrs.emplace_back("trans_qkvw");
attrs.emplace_back("ring_id");
attrs.emplace_back("num_head");
attrs.emplace_back("dim_head");
attrs.emplace_back("dim_ffn");
attrs.emplace_back("qkv_in_scale");
attrs.emplace_back("out_linear_in_scale");
attrs.emplace_back("ffn1_in_scale");
attrs.emplace_back("ffn2_in_scale");
attrs.emplace_back("quant_round_type");
attrs.emplace_back("quant_max_bound");
attrs.emplace_back("quant_min_bound");
paddle::small_vector<const char*> outputs{"CacheKVOut", "Out"};
return KernelSignature("fused_multi_transformer_int8",
std::move(inputs),
std::move(attrs),
std::move(outputs));
}

} // namespace phi

PD_REGISTER_ARG_MAPPING_FN(fused_multi_transformer_int8,
phi::FusedMultiTransformerInt8OpArgumentMapping);
1 change: 1 addition & 0 deletions paddle/fluid/pir/dialect/op_generator/ops_api_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@
'fused_elementwise_mul',
'fused_elementwise_sub',
'fused_embedding_fc_lstm',
'fused_multi_transformer_int8',
'fusion_group',
'fusion_lstm',
'fusion_seqpool_cvm_concat',
Expand Down
142 changes: 142 additions & 0 deletions paddle/phi/infermeta/fusion.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2140,6 +2140,148 @@ void FusedMultiTransformerInt8XpuInferMeta(
out->set_layout(x.layout());
}

void FusedMultiTransformerInt8InferMeta(
const MetaTensor& x,
const std::vector<const MetaTensor*>& ln_scale,
const std::vector<const MetaTensor*>& ln_bias,
const std::vector<const MetaTensor*>& qkv_w,
const paddle::optional<std::vector<const MetaTensor*>>& qkv_bias,
const paddle::optional<std::vector<const MetaTensor*>>& cache_kv,
const MetaTensor& time_step,
const MetaTensor& src_mask,
const std::vector<const MetaTensor*>& out_linear_w,
const paddle::optional<std::vector<const MetaTensor*>>& out_linear_bias,
const std::vector<const MetaTensor*>& ffn_ln_scale,
const std::vector<const MetaTensor*>& ffn_ln_bias,
const std::vector<const MetaTensor*>& ffn1_weight,
const paddle::optional<std::vector<const MetaTensor*>>& ffn1_bias,
const std::vector<const MetaTensor*>& ffn2_weight,
const paddle::optional<std::vector<const MetaTensor*>>& ffn2_bias,
const paddle::optional<std::vector<const MetaTensor*>>& qkv_out_scale,
const paddle::optional<std::vector<const MetaTensor*>>&
out_linear_out_scale,
const paddle::optional<std::vector<const MetaTensor*>>& ffn1_out_scale,
const paddle::optional<std::vector<const MetaTensor*>>& ffn2_out_scale,
bool pre_layer_norm,
float epsilon,
float dropout_rate,
bool is_test,
const std::string& dropout_implementation,
const std::string& act_method,
bool trans_qkvw,
int ring_id,
int num_head,
int dim_head,
int dim_ffn,
const std::vector<float>& qkv_in_scale,
const std::vector<float>& out_linear_in_scale,
const std::vector<float>& ffn1_in_scale,
const std::vector<float>& ffn2_in_scale,
int quant_round_type,
float quant_max_bound,
float quant_min_bound,
std::vector<MetaTensor*> cache_kv_out,
MetaTensor* out) {
// x: qkv's input [batch_size, seq_len, dim_embed]
// y: qkv's weight: [3, num_head, dim_head, dim_embed]
const auto& x_dim = x.dims();
const auto& y_dim = qkv_w[0]->dims();
PADDLE_ENFORCE_EQ(
x_dim.size(),
3,
common::errors::InvalidArgument("The dimensions of x must be 3"
"(batch_size, seq_len, dim_embed),"
"but received dimensions of"
"Input is [%d]",
x_dim.size()));
PADDLE_ENFORCE_EQ(
y_dim.size(),
4,
common::errors::InvalidArgument("The dimensions of qkv_weight must be 4"
"(3, num_head, dim_head, dim_embed),"
"but received dimensions of"
"Input is [%d]",
y_dim.size()));
PADDLE_ENFORCE_EQ(
x_dim[2],
trans_qkvw ? y_dim[3] : y_dim[0],
common::errors::InvalidArgument(
"ShapeError: the dimension of x_dim[2] and y_dim[3](trans_qkvw is "
"true) or y_dim[0](trans_qkvw is false)"
"must be equal. But received: the shape "
"of input x = [%s], and the shape of "
"input qkv_weight = [%s]",
x_dim,
y_dim));

if (ring_id == -1) {
if (trans_qkvw) {
PADDLE_ENFORCE_EQ(y_dim[1] * y_dim[2],
y_dim[3],
common::errors::InvalidArgument(
"The dimensions of qkv_weight must be 4"
"(3, num_head, dim_head, dim_embed),"
"and must satisfy the limitations: "
"(num_head * dim_head == dim_embed)"));

} else {
PADDLE_ENFORCE_EQ(y_dim[2] * y_dim[3],
y_dim[0],
common::errors::InvalidArgument(
"The dimensions of qkv_weight must be 4"
"(dim_embed, 3, num_head, dim_head),"
"and must satisfy the limitations: "
"(num_head * dim_head == dim_embed)"));
}
}

if (cache_kv && cache_kv.get().size() > 0) {
// [2, batch_size, num_head, max_seq_len, head_size]
const auto& c_dim = cache_kv.get()[0]->dims();

PADDLE_ENFORCE_EQ(
c_dim.size(),
5,
common::errors::InvalidArgument(
"The CacheKV must be 5 dims, but got %d", c_dim.size()));
PADDLE_ENFORCE_EQ(c_dim[0],
2,
common::errors::InvalidArgument(
"The first dim of CacheKV must be 2, but got %d",
c_dim[0])); // 2
PADDLE_ENFORCE_EQ(c_dim[1],
x_dim[0],
common::errors::InvalidArgument(
"The second dim of CacheKV must be equal with "
"batch size %d, but got %d",
x_dim[0],
c_dim[1])); // batch_size
PADDLE_ENFORCE_EQ(c_dim[2],
trans_qkvw ? y_dim[1] : y_dim[2],
common::errors::InvalidArgument(
"The third dim of CacheKV must be equal with num "
"head %d, but got %d",
trans_qkvw ? y_dim[1] : y_dim[2],
c_dim[2])); // num_head
PADDLE_ENFORCE_GT(
c_dim[3],
0,
common::errors::InvalidArgument(
"The forth dim of CacheKV must be greater than 0, but got %d",
c_dim[3])); // cache_seq_len
PADDLE_ENFORCE_EQ(c_dim[4],
trans_qkvw ? y_dim[2] : y_dim[3],
common::errors::InvalidArgument(
"The fifth dim of CacheKV must be equal with head "
"size %d, but got %d",
trans_qkvw ? y_dim[2] : y_dim[3],
c_dim[4])); // head_size
}

out->set_dims(x.dims());
out->set_dtype(x.dtype());
}

void YoloBoxXPUInferMeta(const MetaTensor& x,
const MetaTensor& x_max,
const MetaTensor& grid,
Expand Down
43 changes: 43 additions & 0 deletions paddle/phi/infermeta/fusion.h
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,49 @@ void FusedMultiTransformerInt8XpuInferMeta(
MetaTensor* out,
std::vector<MetaTensor*> cache_kv_out);

void FusedMultiTransformerInt8InferMeta(
const MetaTensor& x,
const std::vector<const MetaTensor*>& ln_scale,
const std::vector<const MetaTensor*>& ln_bias,
const std::vector<const MetaTensor*>& qkv_w,
const paddle::optional<std::vector<const MetaTensor*>>& qkv_bias,
const paddle::optional<std::vector<const MetaTensor*>>& cache_kv,
const MetaTensor& time_step,
const MetaTensor& src_mask,
const std::vector<const MetaTensor*>& out_linear_w,
const paddle::optional<std::vector<const MetaTensor*>>& out_linear_bias,
const std::vector<const MetaTensor*>& ffn_ln_scale,
const std::vector<const MetaTensor*>& ffn_ln_bias,
const std::vector<const MetaTensor*>& ffn1_weight,
const paddle::optional<std::vector<const MetaTensor*>>& ffn1_bias,
const std::vector<const MetaTensor*>& ffn2_weight,
const paddle::optional<std::vector<const MetaTensor*>>& ffn2_bias,
const paddle::optional<std::vector<const MetaTensor*>>& qkv_out_scale,
const paddle::optional<std::vector<const MetaTensor*>>&
out_linear_out_scale,
const paddle::optional<std::vector<const MetaTensor*>>& ffn1_out_scale,
const paddle::optional<std::vector<const MetaTensor*>>& ffn2_out_scale,
bool pre_layer_norm,
float epsilon,
float dropout_rate,
bool is_test,
const std::string& dropout_implementation,
const std::string& act_method,
bool trans_qkvw,
int ring_id,
int num_head,
int dim_head,
int dim_ffn,
const std::vector<float>& qkv_in_scale,
const std::vector<float>& out_linear_in_scale,
const std::vector<float>& ffn1_in_scale,
const std::vector<float>& ffn2_in_scale,
int quant_round_type,
float quant_max_bound,
float quant_min_bound,
std::vector<MetaTensor*> cache_kv_out,
MetaTensor* out);

void YoloBoxXPUInferMeta(const MetaTensor& x,
const MetaTensor& x_max,
const MetaTensor& grid,
Expand Down
1 change: 1 addition & 0 deletions paddle/phi/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ if(WITH_ROCM)
"fusion/gpu/fused_bn_add_activation_kernel.cu"
"fusion/gpu/fused_feedforward_kernel.cu"
"fusion/gpu/fused_feedforward_grad_kernel.cu"
"fusion/gpu/fused_multi_transformer_int8_kernel.cu"
"fusion/gpu/fusion_transpose_flatten_concat_kernel.cu")
list(
REMOVE_ITEM
Expand Down
Loading

0 comments on commit e596800

Please sign in to comment.