Skip to content

Commit

Permalink
Fix
Browse files Browse the repository at this point in the history
  • Loading branch information
co63oc committed May 30, 2024
2 parents 4c53647 + d3964c5 commit b56d976
Show file tree
Hide file tree
Showing 109 changed files with 2,330 additions and 1,842 deletions.
2 changes: 1 addition & 1 deletion .clang-tidy
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ clang-analyzer-core.uninitialized.Assign,
clang-analyzer-cplusplus.InnerPointer,
-clang-analyzer-cplusplus.Move,
-clang-analyzer-cplusplus.NewDelete,
-clang-analyzer-cplusplus.NewDeleteLeaks,
clang-analyzer-cplusplus.NewDeleteLeaks,
-clang-analyzer-cplusplus.PureVirtualCall,
-clang-analyzer-cplusplus.SelfAssignment,
-clang-analyzer-cplusplus.SmartPtr,
Expand Down
8 changes: 8 additions & 0 deletions paddle/cinn/hlir/framework/pir/op_lowering_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,14 @@ std::shared_ptr<GroupInfo> OpLowererImpl::GetGroupInfo(
for (auto& val : group->output_values()) {
group_info->direct_output_var_names.insert(ValueName(val));
}

group->WalkOps([&group_info](::pir::Operation* op) {
if (CompatibleInfo::OpKind(*op) == OpPatternKind::kReduction) {
group_info->raw_reduce_axis = cinn::fusion::GetReduceAxisIdx(op);
group_info->raw_data_rank =
cinn::fusion::GetCompitableRank(op->operand_source(0));
}
});
return group_info;
}

Expand Down
2 changes: 2 additions & 0 deletions paddle/cinn/hlir/framework/pir/op_lowering_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ typedef bool (OpLowererImpl::*ScheduleDetermineFunction)(::pir::Operation*);
struct GroupInfo {
std::vector<int64_t> data_space;
std::vector<int64_t> reduce_axis;
int64_t raw_data_rank;
std::vector<int64_t> raw_reduce_axis;
std::set<std::string> reduce_var_names;
std::set<std::string> shared_var_names;
std::set<std::string> direct_output_var_names;
Expand Down
12 changes: 12 additions & 0 deletions paddle/cinn/ir/group_schedule/config/filedatabase.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,25 @@

#include "paddle/cinn/ir/group_schedule/config/filedatabase.h"

#include <sys/stat.h>

#include <google/protobuf/text_format.h>
#include <google/protobuf/util/json_util.h>
#include <fstream>

#include "paddle/cinn/utils/multi_threading.h"

#define MKDIR(path) mkdir(path, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH)
PD_DECLARE_string(cinn_tile_config_filename_label);
static bool PathExists(const std::string& path) {
struct stat statbuf;
if (stat(path.c_str(), &statbuf) != -1) {
if (S_ISDIR(statbuf.st_mode)) {
return true;
}
}
return false;
}

namespace cinn {
namespace ir {
Expand Down
1 change: 0 additions & 1 deletion paddle/cinn/ir/group_schedule/config/filedatabase.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

#include "paddle/cinn/ir/group_schedule/config/database.h"
#include "paddle/cinn/ir/group_schedule/config/tileconfig_desc.pb.h"
#include "paddle/fluid/inference/analysis/helper.h"
namespace cinn {
namespace ir {

Expand Down
10 changes: 9 additions & 1 deletion paddle/cinn/ir/group_schedule/config/group_tile_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,24 @@ std::shared_ptr<ScheduleConfig::BaseInfo> InitBasicInfo(
base_info->broadcast_info = group_info->broadcast_info;
base_info->broadcast_to_elementwise = group_info->broadcast_to_elementwise;
base_info->data_rank = group_info->data_space.size();
base_info->raw_data_rank = group_info->raw_data_rank;

std::set<int64_t> reduce_dim_loc;
for (auto dim : group_info->reduce_axis) {
for (int64_t dim : group_info->reduce_axis) {
if (dim < 0) {
dim += base_info->data_rank;
}
base_info->reduce_axis.push_back(dim);
reduce_dim_loc.insert(dim);
}

for (int64_t dim : group_info->raw_reduce_axis) {
if (dim < 0) {
dim += base_info->data_rank;
}
base_info->raw_reduce_axis.push_back(dim);
}

base_info->spatial_numel = 1;
base_info->reduce_numel = 1;
for (int64_t i = 0; i < base_info->data_rank; ++i) {
Expand Down
2 changes: 2 additions & 0 deletions paddle/cinn/ir/group_schedule/config/group_tile_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ namespace ir {
struct ScheduleConfig {
struct BaseInfo {
std::vector<int64_t> reduce_axis;
std::vector<int64_t> raw_reduce_axis;
int64_t data_rank;
int64_t raw_data_rank;
int64_t reduce_numel;
int64_t spatial_numel;
bool has_dynamic_spatial{false};
Expand Down
4 changes: 4 additions & 0 deletions paddle/common/flags.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1724,3 +1724,7 @@ PHI_DEFINE_EXPORTED_string(cusolver_dir, // NOLINT
PHI_DEFINE_EXPORTED_string(cusparse_dir, // NOLINT
"",
"Specify path for loading libcusparse.so.*.");
PHI_DEFINE_EXPORTED_string(
win_cuda_bin_dir, // NOLINT
"",
"Specify path for loading *.dll about cuda on windows");
2 changes: 1 addition & 1 deletion paddle/fluid/distributed/ps/table/common_graph_table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

#include "paddle/fluid/distributed/ps/table/common_graph_table.h"

#include <time.h>
#include <ctime>

#include <algorithm>
#include <chrono>
Expand Down
7 changes: 6 additions & 1 deletion paddle/fluid/distributed/ps/table/graph/graph_node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,12 @@ void GraphNode::build_sampler(std::string sample_type) {
} else if (sample_type == "weighted") {
sampler = new WeightedSampler();
}
sampler->build(edges);
if (sampler != nullptr) {
sampler->build(edges);
} else {
throw std::runtime_error("Failed to create a sampler of type: " +
sample_type);
}
}
void FeatureNode::to_buffer(char* buffer, bool need_feature) {
memcpy(buffer, &id, id_size);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,10 @@ void DependencyBuilder::ShareDependencyFrom(const DependencyBuilder& src) {
is_build_ = true;
}

const std::string& DependencyBuilder::GetInstructionName(size_t op_idx) const {
return (*instructions_)[op_idx].OpBase()->Type();
}

const std::map<size_t, std::set<size_t>>& DependencyBuilder::OpDownstreamMap()
const {
PADDLE_ENFORCE_EQ(
Expand Down Expand Up @@ -340,6 +344,13 @@ void DependencyBuilder::AddDependencyForReadOp() {
void DependencyBuilder::AddDependencyForSequentialRun() {
size_t dependence_op_idx = ULLONG_MAX;
for (size_t op_idx = 0; op_idx < op_num_; ++op_idx) {
if (this->GetInstructionName(op_idx) == "pd_op.full_int_array") {
VLOG(8) << "Skip adding dependency for sequential run: "
<< dependence_op_idx << "->" << op_idx << " "
<< this->GetInstructionName(dependence_op_idx) << "->"
<< this->GetInstructionName(op_idx);
continue;
}
if (dependence_op_idx != ULLONG_MAX) {
AddDownstreamOp(dependence_op_idx, op_idx);
}
Expand Down Expand Up @@ -571,6 +582,11 @@ PirDependencyBuilder::PirDependencyBuilder() : instructions_() {
op_happens_before_ = std::make_shared<std::vector<std::vector<bool>>>();
}

const std::string& PirDependencyBuilder::GetInstructionName(
size_t op_idx) const {
return (instructions_)[op_idx]->Name();
}

void PirDependencyBuilder::AddDependencyForCommunicationOp() {
size_t dependence_op_idx = ULLONG_MAX;
for (size_t op_idx = 0; op_idx < op_num_; ++op_idx) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ class DependencyBuilder {
&((*instructions_)[op2].DeviceContext());
}

virtual const std::string& GetInstructionName(size_t op_idx) const;

protected:
void AddDependencyForCoalesceTensorOp();
virtual void AddDependencyForCommunicationOp();
Expand Down Expand Up @@ -127,6 +129,8 @@ class PirDependencyBuilder : public DependencyBuilder {
&((instructions_)[op2]->DeviceContext());
}

const std::string& GetInstructionName(size_t op_idx) const override;

private:
void AddDependencyForCommunicationOp() override;

Expand Down
5 changes: 3 additions & 2 deletions paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@

#include "paddle/common/flags.h"
#include "paddle/fluid/ir_adaptor/translator/translate.h"
#include "paddle/fluid/pir/transforms/general/common_subexpression_elimination_pass.h"
#include "paddle/fluid/pir/transforms/general/constant_folding_pass.h"
#include "paddle/fluid/pir/transforms/general/dead_code_elimination_pass.h"
#include "paddle/fluid/pir/transforms/general/inplace_pass.h"
Expand Down Expand Up @@ -906,7 +907,7 @@ bool AnalysisPredictor::PrepareExecutor() {
ctx->GetOrRegisterDialect<cinn::dialect::OperatorDialect>();
ctx->GetOrRegisterDialect<pir::shape::ShapeDialect>();
auto pass_manager = std::make_shared<::pir::PassManager>(
::pir::IrContext::Instance(), 2);
::pir::IrContext::Instance(), config_.pm_opt_level_);
if (!config_.glog_info_disabled()) {
pass_manager->EnablePrintStatistics();
}
Expand Down Expand Up @@ -999,7 +1000,7 @@ bool AnalysisPredictor::PrepareExecutor() {
// Apply some basic passes required by the framework
::pir::PassManager basic_pass_pm(::pir::IrContext::Instance(),
config_.pm_opt_level_);

basic_pass_pm.AddPass(::pir::CreateCommonSubexpressionEliminationPass());
auto params_sync_among_devices_pass =
::pir::CreateParamsSyncAmongDevicesPass();
params_sync_among_devices_pass->SetNotOwned(pir::Pass::kPlaceAttr,
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/inference/api/paddle_pass_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,7 @@ const std::vector<std::string> kPirGpuPasses{
"embedding_eltwise_layernorm_fuse_pass",
"fused_flash_attn_pass",
"multihead_matmul_fuse_pass",
"fused_weight_only_linear_pass",
"matmul_add_act_fuse_pass",
"fc_elementwise_layernorm_fuse_pass",
"matmul_scale_fuse_pass",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,11 @@ class BilinearInterpolateV2OpConverter : public OpConverter {

auto layer = TRT_ENGINE_ADD_LAYER(engine_, Resize, *input);
if (align_mode == 0) {
#if IS_TRT_VERSION_GE(8600)
layer->setResizeMode(nvinfer1::InterpolationMode::kLINEAR);
#else
layer->setResizeMode(nvinfer1::ResizeMode::kLINEAR);
#endif
}
#if IS_TRT_VERSION_GE(8000)
if (align_corners == true) {
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ class Conv2dOpConverter : public OpConverter {
return layer;
},
[](nvinfer1::IConvolutionLayer* layer, nvinfer1::DimsHW& dilations) {
layer->setDilation(dilations);
layer->setDilationNd(dilations);
},
"conv2d");
}
Expand All @@ -245,7 +245,7 @@ class Deconv2dOpConverter : public OpConverter {
TensorRTEngine::Weight& weight,
TensorRTEngine::Weight& bias) -> nvinfer1::IDeconvolutionLayer* {
auto* layer = TRT_ENGINE_ADD_LAYER(engine_,
Deconvolution,
DeconvolutionNd,
*inputs,
n_output,
ksize,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,21 @@ class CrossMultiheadMatMulOpConverter : public OpConverter {
nvinfer1::Weights weight_q{nvinfer1::DataType::kFLOAT,
static_cast<void*>(weight_q_data),
static_cast<int32_t>(weight_q_t->numel())};
nvinfer1::ITensor* input_q_shape_tensor = Shape(input_q);
#if IS_TRT_VERSION_GE(8600)
auto* fc_q_weight_layer = TRT_ENGINE_ADD_LAYER(
engine_, Constant, nvinfer1::Dims3(1, n_q, hidden_in_q), weight_q);
auto* fc_q_layer =
TRT_ENGINE_ADD_LAYER(engine_,
MatrixMultiply,
*input_q,
nvinfer1::MatrixOperation::kNONE,
*fc_q_weight_layer->getOutput(0),
nvinfer1::MatrixOperation::kTRANSPOSE);
#else
nvinfer1::Weights bias_q{};
// add shuffle for FullyConnected layer
std::vector<nvinfer1::ITensor*> reshape_before_fc_q_shape_tensor;
nvinfer1::ITensor* input_q_shape_tensor = Shape(input_q);
for (int i = 0; i < 5; i++) {
reshape_before_fc_q_shape_tensor.push_back(Add1DConstantLayer(1));
}
Expand All @@ -109,6 +120,7 @@ class CrossMultiheadMatMulOpConverter : public OpConverter {
n_q,
weight_q,
bias_q);
#endif
fc_q_layer->setName(
("multihead_matmul_fc_q(Output: " + output_name + ")").c_str());

Expand Down Expand Up @@ -184,11 +196,22 @@ class CrossMultiheadMatMulOpConverter : public OpConverter {
nvinfer1::Weights weight_kv{nvinfer1::DataType::kFLOAT,
static_cast<void*>(weight_kv_data),
static_cast<int32_t>(weight_kv_t->numel())};
nvinfer1::Weights bias_kv{};

nvinfer1::ITensor* input_shape_tensor = Shape(input_kv);
#if IS_TRT_VERSION_GE(8600)
auto* fc_weight_layer = TRT_ENGINE_ADD_LAYER(
engine_, Constant, nvinfer1::Dims3(1, n, hidden_in), weight_kv);
auto* fc_layer =
TRT_ENGINE_ADD_LAYER(engine_,
MatrixMultiply,
*input_q,
nvinfer1::MatrixOperation::kNONE,
*fc_weight_layer->getOutput(0),
nvinfer1::MatrixOperation::kTRANSPOSE);
#else
nvinfer1::Weights bias_kv{};
// add shuffle for FullyConnected layer
std::vector<nvinfer1::ITensor*> reshape_before_fc_shape_tensor;
nvinfer1::ITensor* input_shape_tensor = Shape(input_kv);
for (int i = 0; i < 5; i++) {
reshape_before_fc_shape_tensor.push_back(Add1DConstantLayer(1));
}
Expand All @@ -211,6 +234,7 @@ class CrossMultiheadMatMulOpConverter : public OpConverter {
n,
weight_kv,
bias_kv);
#endif
fc_layer->setName(
("multihead_matmul_fc(Output: " + output_name + ")").c_str());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,17 @@ class FlashMultiheadMatMulOpConverter : public OpConverter {
nvinfer1::Weights weight{nvinfer1::DataType::kFLOAT,
static_cast<void*>(weight_data),
static_cast<int32_t>(weight_t->numel())};
#if IS_TRT_VERSION_GE(8600)
auto* fc_weight_layer = TRT_ENGINE_ADD_LAYER(
engine_, Constant, nvinfer1::Dims3(1, n, hidden_in), weight);
auto* fc_layer =
TRT_ENGINE_ADD_LAYER(engine_,
MatrixMultiply,
*input,
nvinfer1::MatrixOperation::kNONE,
*fc_weight_layer->getOutput(0),
nvinfer1::MatrixOperation::kTRANSPOSE);
#else
nvinfer1::Weights bias{};
// add shuffle for FullyConnected layer
std::vector<nvinfer1::ITensor*> reshape_before_fc_shape_tensor;
Expand Down Expand Up @@ -138,6 +149,7 @@ class FlashMultiheadMatMulOpConverter : public OpConverter {
n,
weight,
bias);
#endif
fc_layer->setName(
("multihead_matmul_fc(Output: " + output_name + ")").c_str());
// add shuffle for fc layer
Expand Down Expand Up @@ -299,6 +311,20 @@ class FlashMultiheadMatMulOpConverter : public OpConverter {
nvinfer1::Weights weight{nvinfer1::DataType::kFLOAT,
static_cast<void*>(weight_data),
static_cast<int32_t>(weight_tensor->numel())};
#if IS_TRT_VERSION_GE(8600)
auto* qkv_fc_weight_layer =
TRT_ENGINE_ADD_LAYER(engine_,
Constant,
nvinfer1::Dims3(1, hidden_out, hidden_out),
weight);
qkv_fc_layers[i] =
TRT_ENGINE_ADD_LAYER(engine_,
MatrixMultiply,
*input,
nvinfer1::MatrixOperation::kNONE,
*qkv_fc_weight_layer->getOutput(0),
nvinfer1::MatrixOperation::kTRANSPOSE);
#else
nvinfer1::Weights bias{};
qkv_fc_layers[i] =
TRT_ENGINE_ADD_LAYER(engine_,
Expand All @@ -307,6 +333,7 @@ class FlashMultiheadMatMulOpConverter : public OpConverter {
hidden_out,
weight,
bias);
#endif
qkv_fc_layers[i]->setName(("multihead_matmul_fc_" + std::to_string(i) +
"_(Output: " + output_name + ")")
.c_str());
Expand Down
8 changes: 8 additions & 0 deletions paddle/fluid/inference/tensorrt/convert/grid_sampler_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,17 @@ class GridSamplerOpConverter : public OpConverter {
nvinfer1::InterpolationMode interpolationMode{
nvinfer1::InterpolationMode::kNEAREST};
if (mode == "nearest") {
#if IS_TRT_VERSION_GE(8600)
interpolationMode = nvinfer1::InterpolationMode::kNEAREST;
#else
interpolationMode = nvinfer1::ResizeMode::kNEAREST;
#endif
} else if (mode == "bilinear") {
#if IS_TRT_VERSION_GE(8600)
interpolationMode = nvinfer1::InterpolationMode::kLINEAR;
#else
interpolationMode = nvinfer1::ResizeMode::kLINEAR;
#endif
}

nvinfer1::SampleMode sampleMode{nvinfer1::SampleMode::kFILL};
Expand Down
Loading

0 comments on commit b56d976

Please sign in to comment.