diff --git a/cinn/frontend/syntax.cc b/cinn/frontend/syntax.cc index adb725b661df5..7e2ac9196c30f 100644 --- a/cinn/frontend/syntax.cc +++ b/cinn/frontend/syntax.cc @@ -45,7 +45,7 @@ Variable Program::conv2d(const Variable& a, instr.SetAttr(iter.first, iter.second); } AppendInstruction(instr); - return instr.GetOutput(2); + return instr.GetOutput(0); } Variable Program::depthwise_conv2d(const Variable& a, @@ -57,7 +57,7 @@ Variable Program::depthwise_conv2d(const Variable& a, instr.SetAttr(iter.first, iter.second); } AppendInstruction(instr); - return instr.GetOutput(1); + return instr.GetOutput(0); } Variable Program::pool2d(const Variable& a, const std::unordered_map& attr_store) { @@ -67,7 +67,7 @@ Variable Program::pool2d(const Variable& a, const std::unordered_map inputs; std::vector cinn_inputs; std::vector> output_shapes; + LOG(INFO) << "GetOpFunc of op " << node->id(); for (auto& i : node->inlinks_in_order()) { std::string input_id = i->source()->as()->id(); auto in_shape = shape_dict.at(input_id); diff --git a/cinn/hlir/op/CMakeLists.txt b/cinn/hlir/op/CMakeLists.txt index 340a9922493ee..3a97d1f7ac9b9 100644 --- a/cinn/hlir/op/CMakeLists.txt +++ b/cinn/hlir/op/CMakeLists.txt @@ -11,4 +11,3 @@ foreach(cpp ${srcs}) endforeach() cc_test(test_op_broadcast SRCS op_broadcast_test.cc DEPS core) -cc_test(test_op_nn SRCS op_nn_test.cc DEPS core) diff --git a/cinn/hlir/op/nn.cc b/cinn/hlir/op/nn.cc index caefe8a4cc1ab..53213b81526dc 100644 --- a/cinn/hlir/op/nn.cc +++ b/cinn/hlir/op/nn.cc @@ -6,6 +6,7 @@ #include "cinn/hlir/pe/broadcast.h" #include "cinn/hlir/pe/elementwise.h" #include "cinn/ir/node.h" +#include "cinn/poly/stage.h" namespace cinn { namespace hlir { @@ -141,7 +142,6 @@ std::shared_ptr StrategyForConv2d(const framework::NodeAttr &attrs, stride[1], dilation[0], dilation[1], - output_shapes, UniqName("Conv2d_nchw_out")); } else if (data_format == "NHWC") { // A is input: [N, H, W, C], B is filter: [C_out, C_in/group, filter_h, filter_w] @@ -153,7 +153,6 @@ std::shared_ptr StrategyForConv2d(const framework::NodeAttr &attrs, stride[1], dilation[0], dilation[1], - output_shapes, UniqName("Conv2d_nhwc_out")); } else { LOG(FATAL) << "Only support NCHW and NHWC data layout\n"; @@ -177,8 +176,14 @@ std::shared_ptr StrategyForConv2d(const framework::NodeAttr &attrs, CHECK(!args.empty()) << "The input argument of conv2d schedule is empty! Please check.\n"; CINNValuePack arg_pack = args[0]; CHECK_EQ(arg_pack.size(), 4UL); - Expr A [[maybe_unused]] = arg_pack[0]; - *ret = arg_pack; + poly::StageMap stages = arg_pack[3]; + Expr input_pad = arg_pack[0]; + CHECK(input_pad.as_tensor()); + stages[input_pad.as_tensor_ref()]->ComputeInline(); + Expr weights_dilation = arg_pack[1]; + CHECK(weights_dilation.as_tensor()); + stages[weights_dilation.as_tensor_ref()]->ComputeInline(); + *ret = CINNValuePack{{arg_pack[2], CINNValue(stages)}}; }); auto strategy = std::make_shared(); @@ -220,30 +225,14 @@ std::vector InferShapeForConv2d(const std::vector &inputs_shap (inputs_shape[0][2] - ((inputs_shape[1][2] - 1) * dilation[0] + 1) + 2 * padding[0]) / stride[0] + 1; int out_shape_w = (inputs_shape[0][3] - ((inputs_shape[1][3] - 1) * dilation[1] + 1) + 2 * padding[1]) / stride[1] + 1; - res = {{inputs_shape[0][0], - inputs_shape[0][1], - inputs_shape[0][2] + 2 * padding[0], - inputs_shape[0][3] + 2 * padding[1]}, - {inputs_shape[1][0], - inputs_shape[1][1], - (inputs_shape[1][2] - 1) * dilation[0] + 1, - (inputs_shape[1][3] - 1) * dilation[1] + 1}, - {inputs_shape[0][0], inputs_shape[1][0], out_shape_h, out_shape_w}}; + res = {{inputs_shape[0][0], inputs_shape[1][0], out_shape_h, out_shape_w}}; } else if (data_format == "NHWC") { // A is input: [N, H, W, C], B is filter: [C_out, C_in/group, filter_h, filter_w] int out_shape_h = (inputs_shape[0][1] - ((inputs_shape[1][2] - 1) * dilation[0] + 1) + 2 * padding[0]) / stride[0] + 1; int out_shape_w = (inputs_shape[0][2] - ((inputs_shape[1][3] - 1) * dilation[1] + 1) + 2 * padding[1]) / stride[1] + 1; - res = {{inputs_shape[0][0], - inputs_shape[0][1] + 2 * padding[0], - inputs_shape[0][2] + 2 * padding[1], - inputs_shape[0][3]}, - {inputs_shape[1][0], - inputs_shape[1][1], - (inputs_shape[1][2] - 1) * dilation[0] + 1, - (inputs_shape[1][3] - 1) * dilation[1] + 1}, - {inputs_shape[0][0], out_shape_h, out_shape_w, inputs_shape[1][0]}}; + res = {{inputs_shape[0][0], out_shape_h, out_shape_w, inputs_shape[1][0]}}; } else { LOG(FATAL) << "Only support NCHW and NHWC data layout\n"; } @@ -252,7 +241,7 @@ std::vector InferShapeForConv2d(const std::vector &inputs_shap std::vector InferDtypeForConv2d(const std::vector &inputs_type, const framework::NodeAttr &attrs) { CHECK(!inputs_type.empty()) << "The input's type size is 0! Please check again."; - std::vector res{inputs_type[0], inputs_type[1], inputs_type[0]}; + std::vector res{inputs_type[0]}; return res; } @@ -293,7 +282,6 @@ std::shared_ptr StrategyForDepthwiseConv2d(const framework::NodeAttr padding[1], stride[0], stride[1], - output_shapes, UniqName("T_depthwise_conv2d_nchw_out")); } else if (data_format == "NHWC") { out = pe::Depthwise_Conv2d_NHWC(A.as_tensor_ref(), @@ -302,7 +290,6 @@ std::shared_ptr StrategyForDepthwiseConv2d(const framework::NodeAttr padding[1], stride[0], stride[1], - output_shapes, UniqName("T_depthwise_conv2d_nhwc_out")); } else { LOG(FATAL) << "Only support NCHW and NHWC data layout\n"; @@ -314,8 +301,9 @@ std::shared_ptr StrategyForDepthwiseConv2d(const framework::NodeAttr stages->InsertLazily(t); res.push_back(CINNValue(t)); } - CHECK_EQ(out.size(), 2U) << "The output tensor sizes of depthwise_conv op in depthwise_conv op should be 2\n"; - out[1]->InitReduction(stages, make_const(out[1]->type(), 0)); // res + CHECK(out.size() == 2U || out.size() == 1U) + << "The output tensor sizes of depthwise_conv op in depthwise_conv op should be 1 or 2\n"; + out.back()->InitReduction(stages, make_const(out.back()->type(), 0)); // res res.push_back(CINNValue(stages)); *ret = CINNValuePack{res}; }); @@ -323,9 +311,16 @@ std::shared_ptr StrategyForDepthwiseConv2d(const framework::NodeAttr framework::CINNSchedule depthwise_conv2d_schedule([](lang::Args args, lang::RetValue *ret) { CHECK(!args.empty()) << "The input argument of depthwise_conv schedule is empty! Please check.\n"; CINNValuePack arg_pack = args[0]; - CHECK_EQ(arg_pack.size(), 3UL); - Expr A [[maybe_unused]] = arg_pack[0]; - *ret = arg_pack; + CHECK(arg_pack.size() == 2UL || arg_pack.size() == 3UL); + if (arg_pack.size() == 3UL) { + poly::StageMap stages = arg_pack[2]; + Expr input_pad = arg_pack[0]; + CHECK(input_pad.as_tensor()); + stages[input_pad.as_tensor_ref()]->ComputeInline(); + *ret = CINNValuePack{{arg_pack[1], CINNValue(stages)}}; + } else { + *ret = arg_pack; + } }); auto strategy = std::make_shared(); @@ -362,20 +357,12 @@ std::vector InferShapeForDepthwiseConv2d(const std::vector &in // A is input: [N, C, H, W], and B is filter: [C_in, channel_multiplier, f_h, f_w] int out_shape_h = (inputs_shape[0][2] - inputs_shape[1][2] + 2 * padding[0]) / stride[0] + 1; int out_shape_w = (inputs_shape[0][3] - inputs_shape[1][3] + 2 * padding[1]) / stride[1] + 1; - res = {{inputs_shape[0][0], - inputs_shape[0][1], - inputs_shape[0][2] + 2 * padding[0], - inputs_shape[0][3] + 2 * padding[1]}, - {inputs_shape[0][0], inputs_shape[1][1] * inputs_shape[0][1], out_shape_h, out_shape_w}}; + res = {{inputs_shape[0][0], inputs_shape[1][1] * inputs_shape[0][1], out_shape_h, out_shape_w}}; } else if (data_format == "NHWC") { // A is input: [N, H, W, C], and B is filter: [C_in, channel_multiplier, f_h, f_w] int out_shape_h = (inputs_shape[0][1] - inputs_shape[1][1] + 2 * padding[0]) / stride[0] + 1; int out_shape_w = (inputs_shape[0][2] - inputs_shape[1][2] + 2 * padding[1]) / stride[1] + 1; - res = {{inputs_shape[0][0], - inputs_shape[0][1] + 2 * padding[0], - inputs_shape[0][2] + 2 * padding[1], - inputs_shape[0][3]}, - {inputs_shape[0][0], out_shape_h, out_shape_w, inputs_shape[1][1] * inputs_shape[0][3]}}; + res = {{inputs_shape[0][0], out_shape_h, out_shape_w, inputs_shape[1][1] * inputs_shape[0][3]}}; } else { LOG(FATAL) << "Only support NCHW and NHWC data layout\n"; } @@ -384,7 +371,7 @@ std::vector InferShapeForDepthwiseConv2d(const std::vector &in std::vector InferDtypeForDepthwiseConv2d(const std::vector &inputs_type, const framework::NodeAttr &attrs) { CHECK(!inputs_type.empty()) << "The input's type size is 0! Please check again."; - std::vector res{inputs_type[0], inputs_type[0]}; + std::vector res{inputs_type[0]}; return res; } @@ -507,9 +494,9 @@ std::shared_ptr StrategyForPool1d(const framework::NodeAttr &attrs, UniqName("T_Pool1d_out")); auto stages = CreateStages(out); - CHECK_EQ(out.size(), 2U) << "The size of pe::Pool1d's output should be 2."; + CHECK(out.size() == 1U || out.size() == 2U) << "The size of pe::Pool1d's output should be 1 or 2."; CHECK(!out_type.empty()) << "Output type of Pool1d is empty! Please check.\n"; - out[1]->InitReduction(stages, ir::Zero(out_type[0])); + out.back()->InitReduction(stages, ir::Zero(out_type[0])); std::vector res; for (auto &t : out) { res.push_back(CINNValue(Expr(t.get()))); @@ -521,9 +508,16 @@ std::shared_ptr StrategyForPool1d(const framework::NodeAttr &attrs, framework::CINNSchedule pool1d_schedule([](lang::Args args, lang::RetValue *ret) { CHECK(!args.empty()) << "The input argument of pool1d schedule is empty! Please check.\n"; CINNValuePack arg_pack = args[0]; - CHECK_EQ(arg_pack.size(), 3UL); - Expr A [[maybe_unused]] = arg_pack[0]; - *ret = arg_pack; + CHECK(arg_pack.size() == 2UL || arg_pack.size() == 3UL); + if (arg_pack.size() == 3UL) { + poly::StageMap stages = arg_pack[2]; + Expr input_pad = arg_pack[0]; + CHECK(input_pad.as_tensor()); + stages[input_pad.as_tensor_ref()]->ComputeInline(); + *ret = CINNValuePack{{arg_pack[1], CINNValue(stages)}}; + } else { + *ret = arg_pack; + } }); auto strategy = std::make_shared(); @@ -562,9 +556,8 @@ std::vector> InferShapeForPool1d(const std::vector output_shape0 = inputs_shape[0]; std::vector output_shape1 = inputs_shape[0]; - CHECK_EQ(output_shape0.size(), 3U); + CHECK_EQ(output_shape1.size(), 3U); int width_axis = -1; if (data_format == "NCW") { width_axis = 2; @@ -574,9 +567,7 @@ std::vector> InferShapeForPool1d(const std::vector> InferShapeForPool1d(const std::vector> res{output_shape0, output_shape1}; + std::vector> res{output_shape1}; return res; } @@ -643,9 +634,9 @@ std::shared_ptr StrategyForPool2d(const framework::NodeAttr &attrs, UniqName("T_Pool2d_out")); auto stages = CreateStages(out); - CHECK_EQ(out.size(), 2U) << "The size of pe::Pool2d's output should be 2."; + CHECK(out.size() == 1U || out.size() == 2U) << "The size of pe::Pool2d's output should be 1 or 2."; CHECK(!out_type.empty()) << "Output type of Pool2d is empty! Please check.\n"; - out[1]->InitReduction(stages, ir::Zero(out_type[0])); + out.back()->InitReduction(stages, ir::Zero(out_type[0])); std::vector res; for (auto &t : out) { res.push_back(CINNValue(Expr(t.get()))); @@ -657,9 +648,16 @@ std::shared_ptr StrategyForPool2d(const framework::NodeAttr &attrs, framework::CINNSchedule pool2d_schedule([](lang::Args args, lang::RetValue *ret) { CHECK(!args.empty()) << "The input argument of pool2d schedule is empty! Please check.\n"; CINNValuePack arg_pack = args[0]; - CHECK_EQ(arg_pack.size(), 3UL); - Expr A [[maybe_unused]] = arg_pack[0]; - *ret = arg_pack; + CHECK(arg_pack.size() == 2UL || arg_pack.size() == 3UL); + if (arg_pack.size() == 3UL) { + poly::StageMap stages = arg_pack[2]; + Expr input_pad = arg_pack[0]; + CHECK(input_pad.as_tensor()); + stages[input_pad.as_tensor_ref()]->ComputeInline(); + *ret = CINNValuePack{{arg_pack[1], CINNValue(stages)}}; + } else { + *ret = arg_pack; + } }); auto strategy = std::make_shared(); @@ -697,7 +695,6 @@ std::vector> InferShapeForPool2d(const std::vector output_shape0 = inputs_shape[0]; std::vector output_shape1 = inputs_shape[0]; CHECK_EQ(inputs_shape[0].size(), 4U) << "input_shape size for pool2d should be 4.\n"; int height_axis = -1; @@ -716,11 +713,7 @@ std::vector> InferShapeForPool2d(const std::vector> InferShapeForPool2d(const std::vector> res{output_shape0, output_shape1}; + std::vector> res{output_shape1}; return res; } @@ -794,9 +787,9 @@ std::shared_ptr StrategyForPool3d(const framework::NodeAttr &attrs, UniqName("T_Pool3d_out")); auto stages = CreateStages(out); - CHECK_EQ(out.size(), 2U) << "The size of pe::Pool3d's output should be 2."; + CHECK(out.size() == 1U || out.size() == 2U) << "The size of pe::Pool3d's output should be 1 or 2."; CHECK(!out_type.empty()) << "Output type of Pool3d is empty! Please check.\n"; - out[1]->InitReduction(stages, ir::Zero(out_type[0])); + out.back()->InitReduction(stages, ir::Zero(out_type[0])); std::vector res; for (auto &t : out) { res.push_back(CINNValue(Expr(t.get()))); @@ -808,9 +801,16 @@ std::shared_ptr StrategyForPool3d(const framework::NodeAttr &attrs, framework::CINNSchedule pool3d_schedule([](lang::Args args, lang::RetValue *ret) { CHECK(!args.empty()) << "The input argument of pool3d schedule is empty! Please check.\n"; CINNValuePack arg_pack = args[0]; - CHECK_EQ(arg_pack.size(), 3UL); - Expr A [[maybe_unused]] = arg_pack[0]; - *ret = arg_pack; + CHECK(arg_pack.size() == 2UL || arg_pack.size() == 3UL); + if (arg_pack.size() == 3UL) { + poly::StageMap stages = arg_pack[2]; + Expr input_pad = arg_pack[0]; + CHECK(input_pad.as_tensor()); + stages[input_pad.as_tensor_ref()]->ComputeInline(); + *ret = CINNValuePack{{arg_pack[1], CINNValue(stages)}}; + } else { + *ret = arg_pack; + } }); auto strategy = std::make_shared(); @@ -850,7 +850,6 @@ std::vector> InferShapeForPool3d(const std::vector output_shape0 = inputs_shape[0]; std::vector output_shape1 = inputs_shape[0]; CHECK_EQ(inputs_shape[0].size(), 6U) << "input_shape size for pool3d should be 6.\n"; int depth_axis = -1; @@ -868,13 +867,7 @@ std::vector> InferShapeForPool3d(const std::vector> InferShapeForPool3d(const std::vector> res{output_shape0, output_shape1}; + std::vector> res{output_shape1}; return res; } std::vector InferDtypeForPool(const std::vector &inputs_type, const framework::NodeAttr &attrs) { CHECK(!inputs_type.empty()) << "The input's type size is 0! Please check again."; - std::vector res{inputs_type[0], inputs_type[0]}; + std::vector res{inputs_type[0]}; return res; } @@ -1152,7 +1145,7 @@ CINN_REGISTER_HELPER(nn_ops) { CINN_REGISTER_OP(conv2d) .describe("Do a 2-D convolution with an NCHW/NHWC layout.") .set_num_inputs(2) // here we consider filter as another input - .set_num_outputs(3) + .set_num_outputs(1) .set_attr("CINNStrategy", cinn::hlir::op::StrategyForConv2d) .set_attr("infershape", std::function(cinn::hlir::op::InferShapeForConv2d)) .set_attr("inferdtype", std::function(cinn::hlir::op::InferDtypeForConv2d)) @@ -1161,7 +1154,7 @@ CINN_REGISTER_HELPER(nn_ops) { CINN_REGISTER_OP(depthwise_conv2d) .describe("Do a 2-D depthwise convolution with an NCHW/NHWC layout.") .set_num_inputs(2) // here we consider filter as another input - .set_num_outputs(2) + .set_num_outputs(1) .set_attr("CINNStrategy", cinn::hlir::op::StrategyForDepthwiseConv2d) .set_attr("infershape", std::function(cinn::hlir::op::InferShapeForDepthwiseConv2d)) .set_attr("inferdtype", std::function(cinn::hlir::op::InferDtypeForDepthwiseConv2d)) @@ -1179,7 +1172,7 @@ CINN_REGISTER_HELPER(nn_ops) { CINN_REGISTER_OP(pool1d) .describe("Do pooling on the width dimension of the input tensor.") .set_num_inputs(1) - .set_num_outputs(2) + .set_num_outputs(1) .set_attr("CINNStrategy", cinn::hlir::op::StrategyForPool1d) .set_attr("infershape", std::function(cinn::hlir::op::InferShapeForPool1d)) .set_attr("inferdtype", std::function(cinn::hlir::op::InferDtypeForPool)) @@ -1188,7 +1181,7 @@ CINN_REGISTER_HELPER(nn_ops) { CINN_REGISTER_OP(pool2d) .describe("Do pooling on the height and width dimension of the input tensor.") .set_num_inputs(1) - .set_num_outputs(2) + .set_num_outputs(1) .set_attr("CINNStrategy", cinn::hlir::op::StrategyForPool2d) .set_attr("infershape", std::function(cinn::hlir::op::InferShapeForPool2d)) .set_attr("inferdtype", std::function(cinn::hlir::op::InferDtypeForPool)) @@ -1197,7 +1190,7 @@ CINN_REGISTER_HELPER(nn_ops) { CINN_REGISTER_OP(pool3d) .describe("Do pooling on the depth, height and width dimension of the input tensor.") .set_num_inputs(1) - .set_num_outputs(2) + .set_num_outputs(1) .set_attr("CINNStrategy", cinn::hlir::op::StrategyForPool3d) .set_attr("infershape", std::function(cinn::hlir::op::InferShapeForPool3d)) .set_attr("inferdtype", std::function(cinn::hlir::op::InferDtypeForPool)) @@ -1230,14 +1223,5 @@ CINN_REGISTER_HELPER(nn_ops) { .set_attr("inferdtype", std::function(cinn::hlir::op::InferDtypeForSlice)) .set_support_level(4); - CINN_REGISTER_OP(depthwise_conv2d) - .describe("Do a 2-D depthwise convolution with an NCHW/NHWC layout.") - .set_num_inputs(2) // here we consider filter as another input - .set_num_outputs(2) - .set_attr("CINNStrategy", cinn::hlir::op::StrategyForDepthwiseConv2d) - .set_attr("infershape", std::function(cinn::hlir::op::InferShapeForDepthwiseConv2d)) - .set_attr("inferdtype", std::function(cinn::hlir::op::InferDtypeForDepthwiseConv2d)) - .set_support_level(4); - return true; } diff --git a/cinn/hlir/pe/nn.cc b/cinn/hlir/pe/nn.cc index f023c5ccf9b18..050de1e80be1b 100644 --- a/cinn/hlir/pe/nn.cc +++ b/cinn/hlir/pe/nn.cc @@ -45,37 +45,24 @@ std::vector Conv2d_NCHW(const ir::Tensor &input, int stride_w, int dilation_h, int dilation_w, - const std::vector> &output_shapes, const std::string &output_name) { CHECK_EQ(input->shape.size(), 4U) << "Input's dimension of Conv2d_NCHW op is not 4! Please check."; CHECK_EQ(weights->shape.size(), 4U) << "Weight's dimension of Conv2d_NCHW op is not 4! Please check."; std::vector output_shape; std::vector new_weights_shape; std::vector input_pad_shape; - if (output_shapes.size() == 3) { - // already computed by infer_shape - CHECK_EQ(output_shapes[0].size(), 4U) << "The size of output_shapes[0] of Conv2d op is not 4! Please check."; - CHECK_EQ(output_shapes[1].size(), 4U) << "The size of output_shapes[1] of Conv2d op is not 4! Please check."; - CHECK_EQ(output_shapes[2].size(), 4U) << "The size of output_shapes[2] of Conv2d op is not 4! Please check."; - output_shape = { - Expr(output_shapes[2][0]), Expr(output_shapes[2][1]), Expr(output_shapes[2][2]), Expr(output_shapes[2][3])}; - new_weights_shape = { - Expr(output_shapes[1][0]), Expr(output_shapes[1][1]), Expr(output_shapes[1][2]), Expr(output_shapes[1][3])}; - input_pad_shape = { - Expr(output_shapes[0][0]), Expr(output_shapes[0][1]), Expr(output_shapes[0][2]), Expr(output_shapes[0][3])}; - } else { - output_shape = { - input->shape[0], // B - weights->shape[0], // O - Expr((input->shape[2] - ((weights->shape[2] - 1) * dilation_h + 1) + 2 * pad_h) / stride_h + 1), // H - Expr((input->shape[3] - ((weights->shape[3] - 1) * dilation_w + 1) + 2 * pad_w) / stride_w + 1) // W - }; - new_weights_shape = {weights->shape[0], - weights->shape[1], - dilation_h * (weights->shape[2] - 1) + 1, - dilation_w * (weights->shape[3] - 1) + 1}; - input_pad_shape = {input->shape[0], input->shape[1], input->shape[2] + 2 * pad_h, input->shape[3] + 2 * pad_w}; - } + output_shape = { + input->shape[0], // B + weights->shape[0], // O + Expr((input->shape[2] - ((weights->shape[2] - 1) * dilation_h + 1) + 2 * pad_h) / stride_h + 1), // H + Expr((input->shape[3] - ((weights->shape[3] - 1) * dilation_w + 1) + 2 * pad_w) / stride_w + 1) // W + }; + new_weights_shape = {weights->shape[0], + weights->shape[1], + dilation_h * (weights->shape[2] - 1) + 1, + dilation_w * (weights->shape[3] - 1) + 1}; + input_pad_shape = {input->shape[0], input->shape[1], input->shape[2] + 2 * pad_h, input->shape[3] + 2 * pad_w}; + auto input_pad = Compute( input_pad_shape, [=](Expr nn, Expr cc, Expr yy, Expr xx) { @@ -123,38 +110,25 @@ std::vector Conv2d_NHWC(const ir::Tensor &input, int stride_w, int dilation_h, int dilation_w, - const std::vector> &output_shapes, const std::string &output_name) { CHECK_EQ(input->shape.size(), 4U) << "Input's dimension of Conv2d_NHWC op is not 4! Please check."; CHECK_EQ(weights->shape.size(), 4U) << "Weight's dimension of Conv2d_NHWC op is not 4! Please check."; std::vector output_shape; std::vector new_weights_shape; std::vector input_pad_shape; - if (output_shapes.size() == 3) { - // already computed by infer_shape - CHECK_EQ(output_shapes[0].size(), 4U) << "The size of output_shapes[0] of Conv2d op is not 4! Please check."; - CHECK_EQ(output_shapes[1].size(), 4U) << "The size of output_shapes[1] of Conv2d op is not 4! Please check."; - CHECK_EQ(output_shapes[2].size(), 4U) << "The size of output_shapes[2] of Conv2d op is not 4! Please check."; - output_shape = { - Expr(output_shapes[2][0]), Expr(output_shapes[2][1]), Expr(output_shapes[2][2]), Expr(output_shapes[2][3])}; - new_weights_shape = { - Expr(output_shapes[1][0]), Expr(output_shapes[1][1]), Expr(output_shapes[1][2]), Expr(output_shapes[1][3])}; - input_pad_shape = { - Expr(output_shapes[0][0]), Expr(output_shapes[0][1]), Expr(output_shapes[0][2]), Expr(output_shapes[0][3])}; - } else { - output_shape = { - input->shape[0], // B - Expr((input->shape[1] - ((weights->shape[2] - 1) * dilation_h + 1) + 2 * pad_h) / stride_h + 1), // H - Expr((input->shape[2] - ((weights->shape[3] - 1) * dilation_w + 1) + 2 * pad_w) / stride_w + 1), // W - weights->shape[0] // O - }; - new_weights_shape = {weights->shape[0], - weights->shape[1], - dilation_h * (weights->shape[2] - 1) + 1, - dilation_w * (weights->shape[3] - 1) + 1}; - input_pad_shape = {input->shape[0], input->shape[1] + 2 * pad_h, input->shape[2] + 2 * pad_w, input->shape[3]}; - } - auto input_pad = Compute( + + output_shape = { + input->shape[0], // B + Expr((input->shape[1] - ((weights->shape[2] - 1) * dilation_h + 1) + 2 * pad_h) / stride_h + 1), // H + Expr((input->shape[2] - ((weights->shape[3] - 1) * dilation_w + 1) + 2 * pad_w) / stride_w + 1), // W + weights->shape[0] // O + }; + new_weights_shape = {weights->shape[0], + weights->shape[1], + dilation_h * (weights->shape[2] - 1) + 1, + dilation_w * (weights->shape[3] - 1) + 1}; + input_pad_shape = {input->shape[0], input->shape[1] + 2 * pad_h, input->shape[2] + 2 * pad_w, input->shape[3]}; + auto input_pad = Compute( input_pad_shape, [=](Expr nn, Expr yy, Expr xx, Expr cc) { auto cond = @@ -200,7 +174,6 @@ std::vector Depthwise_Conv2d_NCHW(const Tensor &input, int pad_w, int stride_h, int stride_w, - const std::vector> &output_shapes, const std::string output_name) { CHECK_EQ(input->shape.size(), 4U) << "Input's dimension of Depthwise_Conv2d_NCHW is not 4! Please check.\n"; CHECK_EQ(weight->shape.size(), 4U) << "Weight's dimension of Depthwise_Conv2d_NCHW is not 4! Please check.\n"; @@ -208,20 +181,13 @@ std::vector Depthwise_Conv2d_NCHW(const Tensor &input, Expr in_w = input->shape[3]; Expr c_m = weight->shape[1]; // channel_multiplier std::vector output_shape; - if (output_shapes.size() == 2) { - // already computed by infer_shape - CHECK_EQ(output_shapes[1].size(), 4U) - << "The size of output_shapes[1] of Depthwise_Conv2d op is not 4! Please check."; - output_shape = { - Expr(output_shapes[1][0]), Expr(output_shapes[1][1]), Expr(output_shapes[1][2]), Expr(output_shapes[1][3])}; - } else { - output_shape = { - input->shape[0], // B - weight->shape[1] * input->shape[1], // O - (input->shape[2] - weight->shape[2] + 2 * pad_h) / stride_h + 1, // H - (input->shape[3] - weight->shape[3] + 2 * pad_w) / stride_w + 1 // W - }; - } + + output_shape = { + input->shape[0], // B + weight->shape[1] * input->shape[1], // O + (input->shape[2] - weight->shape[2] + 2 * pad_h) / stride_h + 1, // H + (input->shape[3] - weight->shape[3] + 2 * pad_w) / stride_w + 1 // W + }; auto input_pad = (pad_h == 0 && pad_w == 0) ? Identity(input) : Pad(input, {Expr(0), Expr(0), Expr(pad_h), Expr(pad_w)}); @@ -245,7 +211,6 @@ std::vector Depthwise_Conv2d_NHWC(const Tensor &input, int pad_w, int stride_h, int stride_w, - const std::vector> &output_shapes, const std::string output_name) { CHECK_EQ(input->shape.size(), 4U) << "Input's dimension of Depthwise_Conv2d_NCHW is not 4! Please check.\n"; CHECK_EQ(weight->shape.size(), 4U) << "Weight's dimension of Depthwise_Conv2d_NCHW is not 4! Please check.\n"; @@ -253,20 +218,13 @@ std::vector Depthwise_Conv2d_NHWC(const Tensor &input, Expr in_w = input->shape[2]; Expr c_m = weight->shape[1]; // channel_multiplier std::vector output_shape; - if (output_shapes.size() == 2) { - // already computed by infer_shape - CHECK_EQ(output_shapes[1].size(), 4U) - << "The size of output_shapes[1] of Depthwise_Conv2d op is not 4! Please check."; - output_shape = { - Expr(output_shapes[1][0]), Expr(output_shapes[1][1]), Expr(output_shapes[1][2]), Expr(output_shapes[1][3])}; - } else { - output_shape = { - input->shape[0], // B - (input->shape[1] - weight->shape[2] + 2 * pad_h) / stride_h + 1, // H - (input->shape[2] - weight->shape[3] + 2 * pad_w) / stride_w + 1, // W - weight->shape[1] * input->shape[3] // O - }; - } + + output_shape = { + input->shape[0], // B + (input->shape[1] - weight->shape[2] + 2 * pad_h) / stride_h + 1, // H + (input->shape[2] - weight->shape[3] + 2 * pad_w) / stride_w + 1, // W + weight->shape[1] * input->shape[3] // O + }; auto input_pad = (pad_h == 0 && pad_w == 0) ? Identity(input) : Pad(input, {Expr(0), Expr(pad_h), Expr(pad_w), Expr(0)}); @@ -541,7 +499,7 @@ std::vector PoolImpl(const Tensor &tensor, if (pool_type == "max") { Expr min_value = ir::min_value(tensor->type()); // Pad the input tensor with the pad_value of type's minimum value - temp = do_pad ? Pad(tensor, pad_before, pad_after, min_value, UniqName("pad_temp")) : Identity(tensor); + temp = do_pad ? Pad(tensor, pad_before, pad_after, min_value, UniqName("pad_temp")) : tensor; res = Compute( out_shape, [=](const std::vector &output) { @@ -559,7 +517,7 @@ std::vector PoolImpl(const Tensor &tensor, daxis); } else if (pool_type == "avg") { // Pad the input tensor with pad_value zero - temp = do_pad ? Pad(tensor, pad_before, pad_after, 0, UniqName("pad_temp")) : Identity(tensor); + temp = do_pad ? Pad(tensor, pad_before, pad_after, 0, UniqName("pad_temp")) : tensor; res = Compute( out_shape, [=](const std::vector &output) { @@ -599,7 +557,11 @@ std::vector PoolImpl(const Tensor &tensor, } else { LOG(ERROR) << "Unrecognized pool_type: " << pool_type; } - return {temp, res}; + if (do_pad) { + return {temp, res}; + } else { + return {res}; + } } std::vector Pool1d(const Tensor &tensor, diff --git a/cinn/hlir/pe/nn.h b/cinn/hlir/pe/nn.h index 4a03505ab50c9..ee8aaaf3657b9 100644 --- a/cinn/hlir/pe/nn.h +++ b/cinn/hlir/pe/nn.h @@ -97,7 +97,6 @@ std::vector Conv2d_NCHW(const ir::Tensor &input, int stride_w, int dilation_h, int dilation_w, - const std::vector> &output_shapes, const std::string &output_name = UniqName("T_Conv2d_NCHW_out")); /** @@ -124,7 +123,6 @@ std::vector Conv2d_NHWC(const ir::Tensor &input, int stride_w, int dilation_h, int dilation_w, - const std::vector> &output_shapes, const std::string &output_name = UniqName("T_Conv2d_NHWC_out")); /** @@ -147,7 +145,6 @@ std::vector Depthwise_Conv2d_NCHW(const ir::Tensor &input, int pad_w, int stride_h, int stride_w, - const std::vector> &output_shapes, const std::string output_name = UniqName("T_depthwise_conv2d_nchw")); /** @@ -170,7 +167,6 @@ std::vector Depthwise_Conv2d_NHWC(const ir::Tensor &input, int pad_w, int stride_h, int stride_w, - const std::vector> &output_shapes, const std::string output_name = UniqName("T_depthwise_conv2d_nhwc")); ir::Tensor BatchNorm_NCHW(const ir::Tensor &input, diff --git a/cinn/lang/lower_impl.cc b/cinn/lang/lower_impl.cc index fbdf51d7a1aad..2ea457259e4ce 100644 --- a/cinn/lang/lower_impl.cc +++ b/cinn/lang/lower_impl.cc @@ -400,6 +400,14 @@ ir::LoweredFunc LowerImpl::operator()() { if (arg->is_placeholder_node()) continue; if (arg->buffer.defined()) continue; if (arg->body().As() && arg->body().type().is_void()) continue; // extern call + if (tensor_map.find(arg->name) == tensor_map.end()) { + LOG(INFO) << "Didn't find arg tensor " << arg->name << "in tensor_map.\n" + << "The function is " << fn_name_ << "\nAnd all the arg tensors are:\n"; + for (auto& i : tensor_args_) { + LOG(INFO) << i->name; + } + LOG(FATAL) << "Fatal Error!"; + } Reference(&arg)->buffer = tensor_map.at(arg->name)->buffer; } } @@ -421,7 +429,9 @@ ir::LoweredFunc LowerImpl::operator()() { auto func = ir::_LoweredFunc_::Make(fn_name_, func_args, func_body, temp_buffers); // some necessary modification. + LOG(INFO) << "Before optim::ComputeInlineExpand(&func->body, stages_); in function " << fn_name_; optim::ComputeInlineExpand(&func->body, stages_); + LOG(INFO) << "After optim::ComputeInlineExpand(&func->body, stages_); in function " << fn_name_; Target target = cuda_axis_info_.valid() ? common::DefaultNVGPUTarget() : common::DefaultHostTarget(); auto res = optim::Optimize(func, target, FLAGS_cinn_runtime_display_debug_info); diff --git a/cinn/optim/compute_inline_expand.cc b/cinn/optim/compute_inline_expand.cc index f4c6e934f12a7..3c5c04b7eedd3 100644 --- a/cinn/optim/compute_inline_expand.cc +++ b/cinn/optim/compute_inline_expand.cc @@ -16,7 +16,11 @@ struct TensorInlineExpandMutator : public ir::IRMutator<> { TensorInlineExpandMutator(const std::string &tensor_name) : tensor_name(tensor_name) {} - void operator()(Expr *expr) { ir::IRMutator<>::Visit(expr, expr); } + void operator()(Expr *expr) { + LOG(INFO) << "void operator()(Expr *expr) Begin"; + ir::IRMutator<>::Visit(expr, expr); + LOG(INFO) << "void operator()(Expr *expr) End"; + } void Visit(const ir::Load *op, Expr *expr) override { auto *node = expr->As(); diff --git a/cinn/pybind/framework.cc b/cinn/pybind/framework.cc index b938c190ec967..20af6aa50d0d3 100644 --- a/cinn/pybind/framework.cc +++ b/cinn/pybind/framework.cc @@ -35,19 +35,24 @@ void BindFramework(pybind11::module *m) { auto impl = OpStrategy::SelectImpl(self[op_ptr](attrs, inputs, out_types, output_shapes, target)); std::vector temp_inputs; std::vector res; - for (auto tensor : inputs) { + for (auto &tensor : inputs) { res.push_back(tensor); temp_inputs.push_back(common::CINNValue(tensor)); } - auto stages = CreateStages(inputs); - temp_inputs.push_back(common::CINNValue(stages)); common::CINNValuePack C = impl->fcompute(common::CINNValuePack{temp_inputs}); - C = impl->fschedule(C); - for (int i = 0; i < C.get()->size() - 1; i++) { + poly::StageMap stages = C.back(); + // make sure all the tensors in the stages before schedule launch. + for (int i = 0; i < C->size() - 1; i++) { + ir::Expr temp = C[i]; + stages->InsertLazily(temp.as_tensor_ref()); + } + C = impl->fschedule(C); + for (int i = 0; i < C->size() - 1; i++) { ir::Expr temp = C[i]; res.push_back(temp.as_tensor_ref()); } - return res; + auto func = Lower(key, stages, res); + return func; }); py::class_(*m, "NodeAttr") diff --git a/python/tests/conv2d_utils.py b/python/tests/conv2d_utils.py index 02c9f880ae382..dfab9ba545663 100644 --- a/python/tests/conv2d_utils.py +++ b/python/tests/conv2d_utils.py @@ -72,26 +72,8 @@ def conv2d_native(inputs_data, input_shape, filter_size, attrs, is_depthwise): print("output's shape is:", output.shape) res_shape = output.shape[1:] - pad_shape = list(input_shape) - dilation_shape = list(filter_size_new) - assert len(padding) == 2 - assert len(pad_shape) == 4 - assert len(dilation_shape) == 4 - if data_format == "NCHW": - h_index = 2 - w_index = 3 - else: - h_index = 1 - w_index = 2 - - pad_shape[h_index] += 2 * padding[0] - pad_shape[w_index] += 2 * padding[1] - dilation_shape[2] = (filter_size_new[2] - 1) * dilation[0] + 1 - dilation_shape[3] = (filter_size_new[3] - 1) * dilation[1] + 1 - print("pad's shape is:", pad_shape) - print("dilation's shape is:", dilation_shape) if is_depthwise: - return output, [pad_shape, res_shape] + return output, [res_shape] else: - return output, [pad_shape, dilation_shape, res_shape] + return output, [res_shape] diff --git a/python/tests/pool_utils.py b/python/tests/pool_utils.py index 05c09c0c6dd9f..3a78195a999db 100644 --- a/python/tests/pool_utils.py +++ b/python/tests/pool_utils.py @@ -51,14 +51,8 @@ def pool2d(np_data, attrs, dtype="float32"): else: pt, pl, pb, pr = padding_size - out_shape0 = list(in_shape) - out_shape0[height_axis] = in_shape[height_axis] + pt + pb - out_shape0[width_axis] = in_shape[width_axis] + pl + pr - out_shape = list(in_shape) if ceil_mode: - out_shape0[height_axis] += s_h - 1 - out_shape0[width_axis] += s_w - 1 out_shape[height_axis] = int( math.ceil(float(in_shape[height_axis] - k_h + pt + pb) / s_h) + 1) out_shape[width_axis] = int( @@ -144,7 +138,7 @@ def pool2d(np_data, attrs, dtype="float32"): raise ValueError("pool type {} is not supported".format(pool_type)) ret_np = np.maximum(ret_np, fill_value) - return ret_np, [out_shape0, out_shape] + return ret_np, [out_shape] def pool3d(np_data, attrs, dtype="float32"): @@ -196,16 +190,8 @@ def pool3d(np_data, attrs, dtype="float32"): else: pf, pt, pl, pk, pb, pr = padding_size - out_shape0 = list(in_shape) - out_shape0[depth_axis] = in_shape[depth_axis] + pf + pk - out_shape0[height_axis] = in_shape[height_axis] + pt + pb - out_shape0[width_axis] = in_shape[width_axis] + pl + pr - out_shape = list(in_shape) if ceil_mode: - out_shape0[depth_axis] += s_d - 1 - out_shape0[height_axis] += s_h - 1 - out_shape0[width_axis] += s_w - 1 out_shape[depth_axis] = int( math.ceil(float(in_shape[depth_axis] - k_d + pf + pk) / s_d) + 1) out_shape[height_axis] = int( @@ -302,7 +288,7 @@ def pool3d(np_data, attrs, dtype="float32"): raise ValueError("pool type {} is not supported".format(pool_type)) ret_np = np.maximum(ret_np, fill_value) - return ret_np, [out_shape0, out_shape] + return ret_np, [out_shape] def pool1d(np_data, attrs, dtype="float32"): @@ -350,12 +336,8 @@ def pool1d(np_data, attrs, dtype="float32"): else: pl, pr = padding_size - out_shape0 = list(in_shape) - out_shape0[width_axis] = in_shape[width_axis] + pl + pr - out_shape = list(in_shape) if ceil_mode: - out_shape0[width_axis] += s_w - 1 out_shape[width_axis] = int( math.ceil(float(in_shape[width_axis] - k_w + pl + pr) / s_w) + 1) else: @@ -422,4 +404,4 @@ def pool1d(np_data, attrs, dtype="float32"): raise ValueError("pool type {} is not supported".format(pool_type)) ret_np = np.maximum(ret_np, fill_value) - return ret_np, [out_shape0, out_shape] + return ret_np, [out_shape] diff --git a/python/tests/test_utils.py b/python/tests/test_utils.py index 8b088805b4651..45ddc2e86fca9 100644 --- a/python/tests/test_utils.py +++ b/python/tests/test_utils.py @@ -100,10 +100,8 @@ def to_test_op(self, input_shapes, output_shapes, op_name, attrs): def __codegen(self, op_name, inputs, output_shapes, attrs): types = [common.Float(32)] strategy_map = framework.Operator.get_op_attrs("CINNStrategy") - res = strategy_map.apply_strategy(op_name, attrs, inputs, types, - output_shapes, self.target) - stages = create_stages(res) - func = lang.lower(op_name, stages, res) + func = strategy_map.apply_strategy(op_name, attrs, inputs, types, + output_shapes, self.target) logging.warning('func:\n\n%s\n', func) builder = lang.Module.Builder(op_name, self.target) builder.add_function(func)