diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc index f37f8f0d6a1e8..ccb5e1e5320d5 100644 --- a/paddle/fluid/framework/naive_executor.cc +++ b/paddle/fluid/framework/naive_executor.cc @@ -32,9 +32,6 @@ #ifdef PADDLE_WITH_NVTX #include "paddle/fluid/platform/device/gpu/cuda/cuda_profiler.h" #endif -#ifdef PADDLE_WITH_LITE -#include "paddle/fluid/operators/lite/lite_engine_op.h" -#endif namespace paddle { namespace framework { @@ -334,38 +331,7 @@ void NaiveExecutor::ResetTrtOps(int num) { #endif } -void NaiveExecutor::CloneLiteEngine(int num, void *stream) { -#ifdef PADDLE_WITH_LITE - for (auto &op : ops_) { - if (op->Type() == "lite_engine") { - operators::LiteEngineOp *lite_op = - dynamic_cast(op.get()); - PADDLE_ENFORCE_NOT_NULL( - lite_op, - phi::errors::InvalidArgument( - "lite_op(type: lite_engine) should be created.")); - std::string engine_key = lite_op->Attr("engine_key"); - std::string new_engine_key = engine_key + "_" + std::to_string(num); - PADDLE_ENFORCE( - paddle::inference::Singleton::Global() - .Has(engine_key), - phi::errors::InvalidArgument( - "lite_engine(key: %s) should be created.", engine_key)); - auto *lite_engine = - paddle::inference::Singleton::Global() - .Get(engine_key); - auto new_lite_engine = lite_engine->Clone(); -#ifdef LITE_SUBGRAPH_WITH_XPU - new_lite_engine->SetStream(TARGET(kXPU), stream); -#endif - paddle::inference::Singleton::Global() - .Set(new_engine_key, new_lite_engine); - lite_op->SetAttr("engine_key", new_engine_key); - lite_op->SetEngine(new_lite_engine.get()); - } - } -#endif -} +void NaiveExecutor::CloneLiteEngine(int num, void *stream) {} } // namespace framework } // namespace paddle diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 280f24bdd6fa6..2b342add94906 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -19,7 +19,6 @@ add_subdirectory(controlflow) add_subdirectory(detection) add_subdirectory(elementwise) add_subdirectory(fused) -add_subdirectory(metrics) add_subdirectory(optimizers) add_subdirectory(reduce_ops) add_subdirectory(sequence_ops) @@ -49,10 +48,6 @@ if (WITH_DLNNE) add_subdirectory(dlnne) endif() -if (WITH_LITE) - add_subdirectory(lite) -endif() - if(WITH_CINN) add_subdirectory(cinn) endif() diff --git a/paddle/fluid/operators/ctc_align_op.cc b/paddle/fluid/operators/ctc_align_op.cc deleted file mode 100644 index a40ba84610293..0000000000000 --- a/paddle/fluid/operators/ctc_align_op.cc +++ /dev/null @@ -1,133 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/ctc_align_op.h" - -namespace paddle { -namespace operators { - -class CTCAlignOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("Input"), "Input", "Input", "ctc_align"); - OP_INOUT_CHECK(ctx->HasOutput("Output"), "Output", "Output", "ctc_align"); - - auto input_dims = ctx->GetInputDim("Input"); - - // TODO(wanghaoshuang): it is tricky to set the wrong dimension here. - ctx->SetOutputDim("Output", input_dims); - if (ctx->HasInput("InputLength")) { - ctx->SetOutputDim("OutputLength", {input_dims[0], 1}); - } - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "Input"), - ctx.device_context().GetPlace()); - } -}; - -class CTCAlignOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("Input", - "2-D Tensor or LodTensor with shape " - "[Lp, 1], where Lp is the sum of all input sequences' length."); - AddInput("InputLength", - "2-D Tensor with shape [batch_size, 1], " - " When Input is padding mode, InputLength is length of every " - "sequence in Input.") - .AsDispensable(); - AddOutput("Output", "(Tensor, default: Tensor), The align result."); - AddOutput("OutputLength", - "2-D Tensor with shape [batch_size, 1], " - "When Input is padding mode, OutputLength is length of every " - "sequence in Output.") - .AsDispensable(); - AddAttr("blank", - "(int, default: 0), the blank label set in Connectionist " - "Temporal Classification (CTC) op.") - .SetDefault(0); - AddAttr("merge_repeated", - "(bool, default: true), whether to " - "merge repeated elements between two blanks. ") - .SetDefault(true); - // add attr padding number for tensor input - AddAttr("padding_value", - "(int, default: 0), padding number " - "use to padding tensor. ") - .SetDefault(0); - AddComment(R"DOC( -CTCAlign op is used to merge repeated elements between two blanks -and then delete all blanks in sequence. - -Given: - Input.data = [0, 1, 2, 2, 0, 4, 0, 4, 5, 0, 6, - 6, 0, 0, 7, 7, 7, 0] - Input.dims = {18, 1} - Input.LoD = [[0, 11, 18]] - -And: - blank = 0 - merge_repeated = True - -Then: - Output.data = [1, 2, 4, 4, 5, 6, - 6, 7] - Output.dims = {8, 1} - Output.LoD = [[0, 6, 8]] -or Given: - Input.data = [[0, 1, 2, 2, 0, 4], - [0, 4, 5, 0, 6, 0], - [0, 7, 7, 7, 0, 0]] - InputLength.data = [[6], - [5], - [4]], - Input.dims = {3, 6}, - Input.Lod = [] -And: - blank = 0 - merge_repeated = True - padding_value = 0 - -Then: - Output.data = [[1, 2, 4, 0, 0, 0], - [4, 5, 6, 0, 0, 0], - [7, 0, 0, 0, 0, 0]], - OutputLength.data = [[3], - [3], - [1]], - Output.dims = {3, 6}, - Output.Lod = [] -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - ctc_align, - ops::CTCAlignOp, - ops::CTCAlignOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); - -PD_REGISTER_STRUCT_KERNEL( - ctc_align, CPU, ALL_LAYOUT, ops::CTCAlignKernel, int, int64_t) {} diff --git a/paddle/fluid/operators/ctc_align_op.cu b/paddle/fluid/operators/ctc_align_op.cu deleted file mode 100644 index 76466ed12ab88..0000000000000 --- a/paddle/fluid/operators/ctc_align_op.cu +++ /dev/null @@ -1,171 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include - -#include - -#include "paddle/fluid/operators/ctc_align_op.h" - -namespace paddle { -namespace operators { - -template -__global__ void MergeAndDelCudaKernel(const int64_t num_token, - const T* tokens, - const size_t num_seq, - size_t* lod0, - const int blank, - const int merge_repeated, - size_t* out_lod0, - T* output) { - int output_idx = 0; - out_lod0[0] = 0; - - for (int i = 0; i < num_seq; ++i) { - T pre_token = -1; - for (int j = lod0[i]; j < lod0[i + 1]; ++j) { - if (tokens[j] != blank && !(merge_repeated && tokens[j] == pre_token)) { - output[output_idx] = tokens[j]; - ++output_idx; - } - pre_token = tokens[j]; - } - out_lod0[i + 1] = output_idx; - } -} - -template -__global__ void PaddingMergeAndDelCudaKernel(const int64_t num_token, - const T* tokens, - const T* tokens_length, - const int blank, - const int merge_repeated, - const int padding_value, - const int64_t batch_size, - T* output, - T* output_length) { - int ind = blockIdx.x * blockDim.x + threadIdx.x; - if (ind >= batch_size) return; - int output_idx = ind * num_token; - T prev_token = -1; - for (int i = ind * num_token; i < ind * num_token + tokens_length[ind]; i++) { - if ((unsigned)tokens[i] != blank && - !(merge_repeated && tokens[i] == prev_token)) { - output[output_idx] = tokens[i]; - ++output_idx; - } - prev_token = tokens[i]; - } - output_length[ind] = output_idx - ind * num_token; - for (int i = output_idx; i < ind * num_token + num_token; i++) { - output[i] = padding_value; - } -} - -template -class CTCAlignOpCUDAKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), - true, - phi::errors::InvalidArgument( - "CTCAlign operator CUDA kernel must use CUDAPlace " - "rather than CPUPlace.")); - auto* input = ctx.Input("Input"); - auto* output = ctx.Output("Output"); - const int blank = ctx.Attr("blank"); - const int merge_repeated = - static_cast(ctx.Attr("merge_repeated")); - const T* tokens = input->data(); - auto stream = ctx.cuda_device_context().stream(); - - // tensor input which has no lod - if (input->lod().empty()) { - const int padding_value = ctx.Attr("padding_value"); - auto input_dims = input->dims(); - T* output_data = output->mutable_data({input_dims[0], input_dims[1]}, - ctx.GetPlace()); - auto* input_length = ctx.Input("InputLength"); - const T* input_length_data = input_length->data(); - auto* output_length = ctx.Output("OutputLength"); - T* output_length_data = - output_length->mutable_data({input_dims[0], 1}, ctx.GetPlace()); - PaddingMergeAndDelCudaKernel - <<<32, (input_dims[0] + 32 - 1) / 32, 0, stream>>>( - input_dims[1], - tokens, - input_length_data, - blank, - merge_repeated, - padding_value, - input_dims[0], - output_data, - output_length_data); - } else { - const size_t level = 0; - auto input_lod = framework::ToAbsOffset(input->lod()); - - const int64_t num_tokens = input->dims()[0]; - const size_t num_seq = input_lod[level].size() - 1; - - // prepare a lod to record lod information while merging elements - thrust::device_vector dev_out_lod0(input_lod[level].size()); - size_t* dev_out_lod0_ptr = thrust::raw_pointer_cast(dev_out_lod0.data()); - - // merge elements and delete blank - T* output_data = output->mutable_data({num_tokens, 1}, ctx.GetPlace()); - - phi::MixVector mixv_input_lod(&input_lod[level]); - MergeAndDelCudaKernel - <<<1, 1, 0, stream>>>(num_tokens, - tokens, - num_seq, - mixv_input_lod.CUDAMutableData(ctx.GetPlace()), - blank, - merge_repeated, - dev_out_lod0_ptr, - output_data); - mixv_input_lod.CopyToCPU(); - - // set output lod - std::vector host_out_lod0(dev_out_lod0.begin(), - dev_out_lod0.end()); - framework::LoD out_lod; - out_lod.push_back(host_out_lod0); - output->set_lod(out_lod); - - // resize output dims - output->Resize({static_cast(host_out_lod0.back()), 1}); - - if (host_out_lod0.back() == 0) { - output->Resize({1, 1}); - output->mutable_data(ctx.GetPlace()); - phi::funcs::SetConstant set_constant; - set_constant( - ctx.template device_context(), output, -1); - } - } - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -PD_REGISTER_STRUCT_KERNEL( - ctc_align, GPU, ALL_LAYOUT, ops::CTCAlignOpCUDAKernel, int, int64_t) {} diff --git a/paddle/fluid/operators/ctc_align_op.h b/paddle/fluid/operators/ctc_align_op.h deleted file mode 100644 index 9ebfa7196ecc5..0000000000000 --- a/paddle/fluid/operators/ctc_align_op.h +++ /dev/null @@ -1,119 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include - -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { - -template -class CTCAlignKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); - auto* output = ctx.Output("Output"); - size_t blank = static_cast(ctx.Attr("blank")); - bool merge_repeated = ctx.Attr("merge_repeated"); - T* output_data = output->mutable_data(ctx.GetPlace()); - auto input_dims = common::vectorize(input->dims()); - const T* input_data = input->data(); - - // support tensor input, no lod information - if (input->lod().empty()) { - size_t padding_value = - static_cast(ctx.Attr("padding_value")); - auto* input_length = ctx.Input("InputLength"); - const T* input_length_data = input_length->data(); - - auto* output_length = ctx.Output("OutputLength"); - T* output_length_data = output_length->mutable_data(ctx.GetPlace()); - - for (size_t batch_id = 0; batch_id < (unsigned)input_dims[0]; - batch_id++) { - T prev_token = -1; - size_t output_idx = 0; - for (size_t i = 0; i < (unsigned)input_length_data[batch_id]; i++) { - size_t input_ind = batch_id * input_dims[1] + i; - if ((unsigned)input_data[input_ind] != blank && - !(merge_repeated && input_data[input_ind] == prev_token)) { - output_data[batch_id * input_dims[1] + output_idx] = - input_data[input_ind]; - ++output_idx; - } - prev_token = input_data[input_ind]; - } - output_length_data[batch_id] = output_idx; - for (size_t j = output_idx; j < (unsigned)input_dims[1]; j++) - output_data[batch_id * input_dims[1] + j] = padding_value; - } - } else { - const size_t level = 0; - auto input_lod = framework::ToAbsOffset(input->lod()); - - // check input dims and lod - PADDLE_ENFORCE_EQ( - input_dims[0], - static_cast(input_lod[level].back()), - phi::errors::InvalidArgument( - "The first dimension %d of CTCAlign operator Input(Input) should " - "be equal to " - "the sum of all sequences' lengths %d.", - input_dims[0], - static_cast(input_lod[level].back()))); - - const size_t num_sequences = input_lod[level].size() - 1; - - // merge repeated tokens and delete blank - size_t output_idx = 0; - std::vector output_lod0(1, 0); - for (size_t seq_idx = 0; seq_idx < num_sequences; ++seq_idx) { - T prev_token = -1; - for (size_t i = input_lod[level][seq_idx]; - i < input_lod[level][seq_idx + 1]; - ++i) { - if ((unsigned)input_data[i] != blank && - !(merge_repeated && input_data[i] == prev_token)) { - output_data[output_idx] = input_data[i]; - ++output_idx; - } - prev_token = input_data[i]; - } - output_lod0.push_back(output_idx); - } - - // set output lod - framework::LoD output_lod; - output_lod.push_back(output_lod0); - output->set_lod(output_lod); - // resize output dims - output->Resize({static_cast(output_lod0.back()), 1}); - // for empty sequence - if (output_lod0.back() == 0) { - output->Resize({1, 1}); - output_data = output->mutable_data(ctx.GetPlace()); - output_data[0] = -1; - } - } - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/dequeue_op.cc b/paddle/fluid/operators/dequeue_op.cc deleted file mode 100644 index 8fcc0fbfb47da..0000000000000 --- a/paddle/fluid/operators/dequeue_op.cc +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -#include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/var_type.h" -#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h" -using LoDTensorBlockingQueueHolder = - paddle::operators::reader::LoDTensorBlockingQueueHolder; - -namespace paddle { -namespace operators { - -class DequeueOp : public framework::OperatorBase { - public: - using framework::OperatorBase::OperatorBase; - DequeueOp(const std::string& type, - const framework::VariableNameMap& inputs, - const framework::VariableNameMap& outputs, - const framework::AttributeMap& attrs) - : OperatorBase(type, inputs, outputs, attrs) {} - - private: - void RunImpl(const framework::Scope& scope, - const platform::Place& dev_place) const override { - const std::string& queue_name = Attr("queue_name"); - auto* queue_holder_var = scope.FindVar(queue_name); - PADDLE_ENFORCE_NOT_NULL( - queue_holder_var, - phi::errors::NotFound( - "No LoDTensorBlockingQueueHolder variable with name %s found.", - queue_name)); - auto* queue_holder = - queue_holder_var->template GetMutable(); - auto& out_names = Outputs("Out"); - PADDLE_ENFORCE_GT(out_names.size(), - 0, - phi::errors::InvalidArgument( - "The output for Op(dequeue) must be set.")); - for (const auto& out_name : out_names) { - auto out_var = scope.FindVar(out_name); - PADDLE_ENFORCE_NOT_NULL( - out_var, - phi::errors::NotFound("No variable with name %s found", out_name)); - auto* out_tensor = out_var->GetMutable(); - PADDLE_ENFORCE_NOT_NULL( - out_tensor, - phi::errors::InvalidArgument( - "Variable with name %s has not been initialized.", out_name)); - - paddle::framework::LoDTensorArray lod_tensor_vec; - bool success = false; - lod_tensor_vec = queue_holder->GetQueue()->Pop(&success); - PADDLE_ENFORCE_EQ(lod_tensor_vec.size(), - 1, - phi::errors::InvalidArgument( - "Expected to pop only one element per Pop call for " - "Op(dequeue), but poped %d element.", - lod_tensor_vec.size())); - for (auto& lod_tensor : lod_tensor_vec) { - paddle::framework::TensorCopySync(lod_tensor, dev_place, out_tensor); - } - } - } -}; - -class DequeueOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddAttr("queue_name", - "Name of the `LoDTensorBlockingQueueHolder` variable"); - AddOutput("Out", "A list of `lod_tensor` to dequeue and assigned.") - .AsDuplicable(); - AddComment(R"DOC( - Dequeue operator. - )DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = ::paddle::operators; - -REGISTER_OP_WITHOUT_GRADIENT(dequeue, ops::DequeueOp, ops::DequeueOpMaker); diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt index 84b5ab20144e3..9c8914d14a647 100644 --- a/paddle/fluid/operators/detection/CMakeLists.txt +++ b/paddle/fluid/operators/detection/CMakeLists.txt @@ -36,15 +36,10 @@ detection_library(anchor_generator_op SRCS anchor_generator_op.cc anchor_generator_op.cu) detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc polygon_box_transform_op.cu) -detection_library(rpn_target_assign_op SRCS rpn_target_assign_op.cc) -detection_library(generate_proposal_labels_op SRCS - generate_proposal_labels_op.cc) detection_library(multiclass_nms_op SRCS multiclass_nms_op.cc DEPS phi common) detection_library(box_clip_op SRCS box_clip_op.cc box_clip_op.cu) detection_library(box_decoder_and_assign_op SRCS box_decoder_and_assign_op.cc box_decoder_and_assign_op.cu) -detection_library(retinanet_detection_output_op SRCS - retinanet_detection_output_op.cc) if(WITH_GPU OR WITH_ROCM) if(WITH_GPU) @@ -67,8 +62,3 @@ endif() #Export local libraries to parent # set(DETECTION_LIBRARY ${LOCAL_DETECTION_LIBS} PARENT_SCOPE) - -cc_library(mask_util SRCS mask_util.cc) - -detection_library(generate_mask_labels_op SRCS generate_mask_labels_op.cc DEPS - mask_util) diff --git a/paddle/fluid/operators/detection/generate_mask_labels_op.cc b/paddle/fluid/operators/detection/generate_mask_labels_op.cc deleted file mode 100644 index 5ee843d72387b..0000000000000 --- a/paddle/fluid/operators/detection/generate_mask_labels_op.cc +++ /dev/null @@ -1,547 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include -#include -#include - -#include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/detection/bbox_util.h" -#include "paddle/fluid/operators/detection/mask_util.h" -#include "paddle/fluid/operators/math/concat_and_split.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { - -const int kBoxDim = 4; - -template -void AppendMask(phi::DenseTensor* out, - int64_t offset, - phi::DenseTensor* to_add) { - auto* out_data = out->data(); - auto* to_add_data = to_add->data(); - memcpy(out_data + offset, to_add_data, to_add->numel() * sizeof(T)); -} - -class GenerateMaskLabelsOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ( - ctx->HasInput("ImInfo"), - true, - phi::errors::InvalidArgument("Input(ImInfo) shouldn't be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("GtClasses"), - true, - phi::errors::InvalidArgument("Input(GtClasses) shouldn't be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("IsCrowd"), - true, - phi::errors::InvalidArgument("Input(IsCrowd) shouldn't be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("GtSegms"), - true, - phi::errors::InvalidArgument("Input(GtSegms) shouldn't be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("Rois"), - true, - phi::errors::InvalidArgument("Input(Rois) shouldn't be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("LabelsInt32"), - true, - phi::errors::InvalidArgument("Input(LabelsInt32) shouldn't be null.")); - - PADDLE_ENFORCE_EQ( - ctx->HasOutput("MaskRois"), - true, - phi::errors::InvalidArgument( - "Output(MaskRois) of GenerateMaskLabelsOp should not be null")); - PADDLE_ENFORCE_EQ(ctx->HasOutput("RoiHasMaskInt32"), - true, - phi::errors::InvalidArgument( - "Output(RoiHasMaskInt32) of GenerateMaskLabelsOp " - "should not be null")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput("MaskInt32"), - true, - phi::errors::InvalidArgument( - "Output(MaskInt32) of GenerateMaskLabelsOp should not be null")); - - auto im_info_dims = ctx->GetInputDim("ImInfo"); - auto gt_segms_dims = ctx->GetInputDim("GtSegms"); - PADDLE_ENFORCE_EQ( - im_info_dims.size(), - 2, - phi::errors::InvalidArgument("The rank of Input(ImInfo) must be 2.")); - PADDLE_ENFORCE_EQ( - gt_segms_dims.size(), - 2, - phi::errors::InvalidArgument("The rank of Input(GtSegms) must be 2.")); - PADDLE_ENFORCE_EQ(gt_segms_dims[1], - 2, - phi::errors::InvalidArgument( - "The second dim of Input(GtSegms) must be 2.")); - int num_classes = ctx->Attrs().Get("num_classes"); - int resolution = ctx->Attrs().Get("resolution"); - - ctx->SetOutputDim("MaskRois", {-1, 4}); - ctx->SetOutputDim("RoiHasMaskInt32", {-1, 1}); - ctx->SetOutputDim("MaskInt32", {-1, num_classes * resolution * resolution}); - if (!ctx->IsRuntime()) { - ctx->SetLoDLevel("MaskRois", ctx->GetLoDLevel("Rois")); - ctx->SetLoDLevel("RoiHasMaskInt32", ctx->GetLoDLevel("Rois")); - ctx->SetLoDLevel("MaskInt32", ctx->GetLoDLevel("Rois")); - } - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Rois"); - return phi::KernelKey(data_type, platform::CPUPlace()); - } -}; - -/* - * Expand masks from shape (#masks, M ** 2) to (#masks, #classes * M ** 2) - * to encode class specific mask targets. - */ -template -static inline void ExpandMaskTarget(const phi::CPUContext& ctx, - const phi::DenseTensor& masks, - const phi::DenseTensor& mask_class_labels, - const int resolution, - const int num_classes, - phi::DenseTensor* mask_targets) { - const uint8_t* masks_data = masks.data(); - int64_t num_mask = masks.dims()[0]; - const int* mask_class_labels_data = mask_class_labels.data(); - const int M = resolution * resolution; - const int mask_dim = M * num_classes; - - int* mask_targets_data = - mask_targets->mutable_data({num_mask, mask_dim}, ctx.GetPlace()); - phi::funcs::set_constant(ctx, mask_targets, static_cast(-1)); - for (int64_t mask_id = 0; mask_id < num_mask; ++mask_id) { - int cls = mask_class_labels_data[mask_id]; - int start = M * cls; - if (cls > 0) { - for (int i = 0; i < M; ++i) { - mask_targets_data[mask_id * mask_dim + start + i] = - static_cast(masks_data[mask_id * M + i]); - } - } - } -} - -template -std::vector SampleMaskForOneImage( - const phi::CPUContext& ctx, - const phi::DenseTensor& im_info, - const phi::DenseTensor& gt_classes, - const phi::DenseTensor& is_crowd, - const phi::DenseTensor& gt_segms, - const phi::DenseTensor& rois, - const phi::DenseTensor& label_int32, - const int num_classes, - const int resolution, - const framework::LoD& segm_length) { - // Prepare the mask targets by associating one gt mask to each training roi - // that has a fg (non-bg) class label. - const int64_t gt_size = static_cast(gt_classes.dims()[0]); - const int64_t roi_size = static_cast(rois.dims()[0]); - const int* gt_classes_data = gt_classes.data(); - const int* is_crowd_data = is_crowd.data(); - const int* label_int32_data = label_int32.data(); - PADDLE_ENFORCE_EQ(roi_size, - label_int32.dims()[0], - phi::errors::InvalidArgument( - "The first dim of label [%d] is the different from " - "roi_size [%d], they should be same.", - label_int32.dims()[0], - roi_size)); - - std::vector mask_gt_inds, fg_inds; - std::vector>> gt_polys; - - auto polys_num = segm_length[1]; - auto segm_lod_offset = framework::ConvertToOffsetBasedLoD(segm_length); - auto lod1 = segm_lod_offset[1]; - auto lod2 = segm_lod_offset[2]; - const T* polys_data = gt_segms.data(); - for (int64_t i = 0; i < gt_size; ++i) { - if ((gt_classes_data[i] > 0) && (is_crowd_data[i] == 0)) { - mask_gt_inds.emplace_back(i); - - // slice fg segmentation polys - int poly_num = static_cast(polys_num[i]); - std::vector> polys; - int s_idx = static_cast(lod1[i]); - for (int j = 0; j < poly_num; ++j) { - int s = static_cast(lod2[s_idx + j]); - int e = static_cast(lod2[s_idx + j + 1]); - PADDLE_ENFORCE_NE(s, - e, - phi::errors::InvalidArgument( - "The start point and the end point in the poly " - "segment [%d] should not be same, but received " - "the start point [%d] and the end point [%d].", - i, - s, - e)); - std::vector plts(polys_data + s * 2, polys_data + e * 2); - polys.push_back(plts); - } - gt_polys.push_back(polys); - } - } - for (int64_t i = 0; i < roi_size; ++i) { - if (label_int32_data[i] > 0) { - fg_inds.emplace_back(i); - } - } - int gt_num = static_cast(mask_gt_inds.size()); - int fg_num = static_cast(fg_inds.size()); - - phi::DenseTensor boxes_from_polys; - boxes_from_polys.mutable_data({gt_num, 4}, platform::CPUPlace()); - Poly2Boxes(gt_polys, boxes_from_polys.data()); - - std::vector roi_has_mask = - std::vector(fg_inds.begin(), fg_inds.end()); - phi::DenseTensor mask_class_labels; - phi::DenseTensor masks; - phi::DenseTensor rois_fg; - - auto im_scale = im_info.data()[2]; - if (fg_num > 0) { - // Class labels for the foreground rois - mask_class_labels.mutable_data({fg_num, 1}, ctx.GetPlace()); - Gather(label_int32_data, - 1, - fg_inds.data(), - static_cast(fg_inds.size()), - mask_class_labels.data()); - - uint8_t* masks_data = masks.mutable_data( - {fg_num, resolution * resolution}, ctx.GetPlace()); - - // Find overlap between all foreground rois and the bounding boxes - // enclosing each segmentation - T* rois_fg_data = rois_fg.mutable_data({fg_num, 4}, ctx.GetPlace()); - Gather( - rois.data(), 4, fg_inds.data(), fg_inds.size(), rois_fg.data()); - - for (int k = 0; k < rois_fg.numel(); ++k) { - rois_fg_data[k] = rois_fg_data[k] / im_scale; - } - - phi::DenseTensor overlaps_bbfg_bbpolys; - overlaps_bbfg_bbpolys.mutable_data({fg_num, gt_num}, ctx.GetPlace()); - BboxOverlaps(rois_fg, boxes_from_polys, &overlaps_bbfg_bbpolys); - - // Map from each fg rois to the index of the mask with highest overlap - // (measured by bbox overlap) - T* overlaps_bbfg_bbpolys_data = overlaps_bbfg_bbpolys.data(); - std::vector fg_masks_inds; - for (int64_t i = 0; i < fg_num; ++i) { - const T* v = overlaps_bbfg_bbpolys_data + i * gt_num; - T max_overlap = std::numeric_limits::min(); - int id = 0; - for (int64_t j = 0; j < gt_num; ++j) { - if (v[j] > max_overlap) { - max_overlap = v[j]; - id = static_cast(j); - } - } - fg_masks_inds.push_back(id); - } - - // add fg targets - for (int64_t i = 0; i < fg_num; ++i) { - int fg_polys_ind = fg_masks_inds[i]; - T* roi_fg = rois_fg_data + i * 4; - uint8_t* mask = masks_data + i * resolution * resolution; - Polys2MaskWrtBox(gt_polys[fg_polys_ind], roi_fg, resolution, mask); - } - } else { - // The network cannot handle empty blobs, so we must provide a mask - // We simply take the first bg roi, given it an all -1's mask (ignore - // label), and label it with class zero (bg). - int bg_num = 1; - T* rois_fg_data = rois_fg.mutable_data({bg_num, 4}, ctx.GetPlace()); - const T* rois_data = rois.data(); - std::vector bg_inds; - for (int64_t i = 0; i < roi_size; ++i) { - if (label_int32_data[i] == 0) { - bg_inds.emplace_back(i); - rois_fg_data[0] = rois_data[0] / im_scale; - rois_fg_data[1] = rois_data[1] / im_scale; - rois_fg_data[2] = rois_data[2] / im_scale; - rois_fg_data[3] = rois_data[3] / im_scale; - break; - } - } - masks.mutable_data({bg_num, resolution * resolution}, - ctx.GetPlace()); - phi::funcs::set_constant(ctx, &masks, static_cast(-1)); - int* mask_class_labels_data = - mask_class_labels.mutable_data({bg_num, 1}, ctx.GetPlace()); - mask_class_labels_data[0] = 0; - roi_has_mask = std::vector(bg_inds.begin(), bg_inds.end()); - } - - phi::DenseTensor masks_expand; - ExpandMaskTarget( - ctx, masks, mask_class_labels, resolution, num_classes, &masks_expand); - - T* rois_fg_data = rois_fg.data(); - for (int k = 0; k < rois_fg.numel(); ++k) { - rois_fg_data[k] = rois_fg_data[k] * im_scale; - } - - phi::DenseTensor roi_has_mask_t; - int roi_has_mask_size = static_cast(roi_has_mask.size()); - int* roi_has_mask_data = - roi_has_mask_t.mutable_data({roi_has_mask_size, 1}, ctx.GetPlace()); - std::copy(roi_has_mask.begin(), roi_has_mask.end(), roi_has_mask_data); - - std::vector res; - res.emplace_back(rois_fg); - res.emplace_back(roi_has_mask_t); - res.emplace_back(masks_expand); - return res; -} - -template -class GenerateMaskLabelsKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* im_info = ctx.Input("ImInfo"); - auto* gt_classes = ctx.Input("GtClasses"); - auto* is_crowd = ctx.Input("IsCrowd"); - auto* gt_segms = ctx.Input("GtSegms"); - auto* rois = ctx.Input("Rois"); - auto* label_int32 = ctx.Input("LabelsInt32"); - - auto* mask_rois = ctx.Output("MaskRois"); - auto* roi_has_mask_int32 = ctx.Output("RoiHasMaskInt32"); - auto* mask_int32 = ctx.Output("MaskInt32"); - - int num_classes = ctx.Attr("num_classes"); - int resolution = ctx.Attr("resolution"); - - PADDLE_ENFORCE_EQ( - gt_classes->lod().size(), - 1UL, - phi::errors::InvalidArgument( - "GenerateMaskLabelsOp gt_classes needs 1 level of LoD")); - PADDLE_ENFORCE_EQ( - is_crowd->lod().size(), - 1UL, - phi::errors::InvalidArgument( - "GenerateMaskLabelsOp is_crowd needs 1 level of LoD")); - PADDLE_ENFORCE_EQ(rois->lod().size(), - 1UL, - phi::errors::InvalidArgument( - "GenerateMaskLabelsOp rois needs 1 level of LoD")); - PADDLE_ENFORCE_EQ( - label_int32->lod().size(), - 1UL, - phi::errors::InvalidArgument( - "GenerateMaskLabelsOp label_int32 needs 1 level of LoD")); - - PADDLE_ENFORCE_EQ( - gt_segms->lod().size(), - 3UL, - phi::errors::InvalidArgument( - "GenerateMaskLabelsOp gt_segms needs 3 level of LoD")); - - int64_t n = static_cast(gt_classes->lod().back().size() - 1); - PADDLE_ENFORCE_EQ( - gt_segms->lod()[0].size() - 1, - n, - phi::errors::InvalidArgument( - "Batchsize of Input(gt_segms) and Input(gt_classes) should be " - "same, but received gt_segms[%d], gt_classes[%d].", - gt_segms->lod()[0].size() - 1, - n)); - - int mask_dim = num_classes * resolution * resolution; - int roi_num = static_cast(rois->lod().back()[n]); - mask_rois->mutable_data({roi_num, kBoxDim}, ctx.GetPlace()); - roi_has_mask_int32->mutable_data({roi_num, 1}, ctx.GetPlace()); - mask_int32->mutable_data({roi_num, mask_dim}, ctx.GetPlace()); - - framework::LoD lod; - std::vector lod0(1, 0); - - int64_t num_mask = 0; - auto& dev_ctx = ctx.device_context(); - - auto gt_classes_lod = gt_classes->lod().back(); - auto is_crowd_lod = is_crowd->lod().back(); - auto rois_lod = rois->lod().back(); - auto label_int32_lod = label_int32->lod().back(); - auto gt_segms_lod = gt_segms->lod(); - - for (int i = 0; i < n; ++i) { - if (rois_lod[i] == rois_lod[i + 1]) { - lod0.emplace_back(num_mask); - continue; - } - phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1); - phi::DenseTensor gt_classes_slice = - gt_classes->Slice(static_cast(gt_classes_lod[i]), - static_cast(gt_classes_lod[i + 1])); - phi::DenseTensor is_crowd_slice = - is_crowd->Slice(static_cast(is_crowd_lod[i]), - static_cast(is_crowd_lod[i + 1])); - phi::DenseTensor label_int32_slice = - label_int32->Slice(static_cast(label_int32_lod[i]), - static_cast(label_int32_lod[i + 1])); - phi::DenseTensor rois_slice = - rois->Slice(static_cast(rois_lod[i]), - static_cast(rois_lod[i + 1])); - - auto sub_lod_and_offset = - framework::GetSubLoDAndAbsoluteOffset(gt_segms_lod, i, i + 1, 0); - auto lod_length = sub_lod_and_offset.first; - size_t s = sub_lod_and_offset.second.first; - size_t e = sub_lod_and_offset.second.second; - phi::DenseTensor gt_segms_slice = - gt_segms->Slice(static_cast(s), static_cast(e)); - - std::vector tensor_output = - SampleMaskForOneImage(dev_ctx, - im_info_slice, - gt_classes_slice, - is_crowd_slice, - gt_segms_slice, - rois_slice, - label_int32_slice, - num_classes, - resolution, - lod_length); - - phi::DenseTensor sampled_mask_rois = tensor_output[0]; - phi::DenseTensor sampled_roi_has_mask_int32 = tensor_output[1]; - phi::DenseTensor sampled_mask_int32 = tensor_output[2]; - - AppendMask(mask_rois, kBoxDim * num_mask, &sampled_mask_rois); - AppendMask( - roi_has_mask_int32, num_mask, &sampled_roi_has_mask_int32); - AppendMask(mask_int32, mask_dim * num_mask, &sampled_mask_int32); - - num_mask += sampled_mask_rois.dims()[0]; - lod0.emplace_back(num_mask); - } - - lod.emplace_back(lod0); - mask_rois->set_lod(lod); - roi_has_mask_int32->set_lod(lod); - mask_int32->set_lod(lod); - mask_rois->Resize({num_mask, kBoxDim}); - roi_has_mask_int32->Resize({num_mask, 1}); - mask_int32->Resize({num_mask, mask_dim}); - } -}; - -class GenerateMaskLabelsOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("ImInfo", - "(Tensor), This input is a 2D Tensor with shape [B, 3]. " - "B is the number of input images, " - "each element consists of im_height, im_width, im_scale."); - AddInput("GtClasses", - "(phi::DenseTensor), This input is a 2D phi::DenseTensor with " - "shape [M, 1]. " - "M is the number of groundtruth, " - "each element is a class label of groundtruth."); - AddInput( - "IsCrowd", - "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape " - "[M, 1]. " - "M is the number of groundtruth, " - "each element is a flag indicates whether a groundtruth is crowd."); - AddInput( - "GtSegms", - "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape " - "[S, 2], it's LoD " - "level is 3. The LoD[0] represents the gt objects number of each " - "instance. LoD[1] represents the segmentation counts of each objects. " - "LoD[2] represents the polygons number of each segmentation. S the " - "total number of polygons coordinate points. Each element is (x, y) " - "coordinate points."); - AddInput( - "Rois", - "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape " - "[R, 4]. " - "R is the number of rois which is the output of " - "generate_proposal_labels, " - "each element is a bounding box with (xmin, ymin, xmax, ymax) format."); - AddInput("LabelsInt32", - "(phi::DenseTensor), This intput is a 2D phi::DenseTensor with " - "shape [R, 1], " - "each element represents a class label of a roi"); - AddOutput( - "MaskRois", - "(phi::DenseTensor), This output is a 2D phi::DenseTensor with shape " - "[P, 4]. " - "P is the number of mask, " - "each element is a bounding box with [xmin, ymin, xmax, ymax] format."); - AddOutput("RoiHasMaskInt32", - "(phi::DenseTensor), This output is a 2D phi::DenseTensor with " - "shape [P, 1], " - "each element represents the output mask rois index with regard " - "to input rois"); - AddOutput("MaskInt32", - "(phi::DenseTensor), This output is a 4D phi::DenseTensor with " - "shape [P, Q], " - "Q equal to num_classes * resolution * resolution"); - - AddAttr("num_classes", "Class number."); - AddAttr("resolution", "Resolution of mask."); - - AddComment(R"DOC( -This operator can be, for given the RoIs and corresponding labels, -to sample foreground RoIs. This mask branch also has -a :math: `K \\times M^{2}` dimensional output targets for each foreground -RoI, which encodes K binary masks of resolution M x M, one for each of the -K classes. This mask targets are used to compute loss of mask branch. - )DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - generate_mask_labels, - ops::GenerateMaskLabelsOp, - ops::GenerateMaskLabelsOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); - -PD_REGISTER_STRUCT_KERNEL(generate_mask_labels, - CPU, - ALL_LAYOUT, - ops::GenerateMaskLabelsKernel, - float) {} diff --git a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc deleted file mode 100644 index ad37aa2ae682f..0000000000000 --- a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc +++ /dev/null @@ -1,837 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include -#include -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/operators/detection/bbox_util.h" -#include "paddle/fluid/operators/math/concat_and_split.h" -#include "paddle/phi/kernels/funcs/gather.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { - -const int kBoxDim = 4; - -template -void AppendRois(phi::DenseTensor* out, - int64_t offset, - phi::DenseTensor* to_add) { - auto* out_data = out->data(); - auto* to_add_data = to_add->data(); - memcpy(out_data + offset, to_add_data, to_add->numel() * sizeof(T)); -} - -// Filter the ground-truth in RoIs and the RoIs with non-positive area. -// The ground-truth has max overlap with itself so the max_overlap is 1 -// and the corresponding RoI will be removed. -template -void FilterRoIs(const platform::DeviceContext& ctx, - const phi::DenseTensor& rpn_rois, - const phi::DenseTensor& max_overlap, - phi::DenseTensor* keep) { - const T* rpn_rois_dt = rpn_rois.data(); - const T* max_overlap_dt = max_overlap.data(); - int rois_num = static_cast(max_overlap.numel()); - keep->Resize({rois_num}); - int* keep_data = keep->mutable_data(ctx.GetPlace()); - int keep_len = 0; - for (int i = 0; i < rois_num; ++i) { - if ((rpn_rois_dt[i * 4 + 2] - rpn_rois_dt[i * 4 + 0] + 1) > 0 && - (rpn_rois_dt[i * 4 + 3] - rpn_rois_dt[i * 4 + 1] + 1) > 0 && - max_overlap_dt[i] < 1.) { - keep_data[keep_len++] = i; - } - } - keep->Resize({keep_len}); -} - -class GenerateProposalLabelsOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ( - ctx->HasInput("RpnRois"), - true, - phi::errors::NotFound("Input(RpnRois) shouldn't be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("GtClasses"), - true, - phi::errors::NotFound("Input(GtClasses) shouldn't be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("IsCrowd"), - true, - phi::errors::NotFound("Input(IsCrowd) shouldn't be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("GtBoxes"), - true, - phi::errors::NotFound("Input(GtBoxes) shouldn't be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("ImInfo"), - true, - phi::errors::NotFound("Input(ImInfo) shouldn't be null.")); - - PADDLE_ENFORCE_EQ( - ctx->HasOutput("Rois"), - true, - phi::errors::NotFound( - "Output(Rois) of GenerateProposalLabelsOp should not be null")); - PADDLE_ENFORCE_EQ(ctx->HasOutput("LabelsInt32"), - true, - phi::errors::NotFound("Output(LabelsInt32) of " - "GenerateProposalLabelsOp " - "should not be null")); - PADDLE_ENFORCE_EQ(ctx->HasOutput("BboxTargets"), - true, - phi::errors::NotFound("Output(BboxTargets) of " - "GenerateProposalLabelsOp " - "should not be null")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput("BboxInsideWeights"), - true, - phi::errors::NotFound( - "Output(BboxInsideWeights) of GenerateProposalLabelsOp " - "should not be null")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput("BboxOutsideWeights"), - true, - phi::errors::NotFound( - "Output(BboxOutsideWeights) of GenerateProposalLabelsOp " - "should not be null")); - - auto rpn_rois_dims = ctx->GetInputDim("RpnRois"); - auto gt_boxes_dims = ctx->GetInputDim("GtBoxes"); - auto im_info_dims = ctx->GetInputDim("ImInfo"); - - PADDLE_ENFORCE_EQ(rpn_rois_dims.size(), - 2, - phi::errors::InvalidArgument( - "The dimensions size of Input(RpnRois) must be 2. " - "But received dimensions size=[%d], dimensions=[%s].", - rpn_rois_dims.size(), - rpn_rois_dims)); - PADDLE_ENFORCE_EQ(gt_boxes_dims.size(), - 2, - phi::errors::InvalidArgument( - "The dimensions size of Input(GtBoxes) must be 2. " - "But received dimensions size=[%d], dimensions=[%s].", - gt_boxes_dims.size(), - gt_boxes_dims)); - PADDLE_ENFORCE_EQ(im_info_dims.size(), - 2, - phi::errors::InvalidArgument( - "The dimensions size of Input(ImInfo) must be 2. But " - "received dimensions size=[%d], dimensions=[%s].", - im_info_dims.size(), - im_info_dims)); - - int class_nums = ctx->Attrs().Get("class_nums"); - bool is_cascade_rcnn = ctx->Attrs().Get("is_cascade_rcnn"); - if (is_cascade_rcnn) { - PADDLE_ENFORCE_EQ( - ctx->HasInput("MaxOverlap"), - true, - phi::errors::NotFound( - "Input(MaxOverlap) of GenerateProposalLabelsOp " - "should not be null when is_cascade_rcnn is True.")); - } - - ctx->SetOutputDim("Rois", {-1, 4}); - ctx->SetOutputDim("LabelsInt32", {-1, 1}); - ctx->SetOutputDim("BboxTargets", {-1, 4 * class_nums}); - ctx->SetOutputDim("BboxInsideWeights", {-1, 4 * class_nums}); - ctx->SetOutputDim("BboxOutsideWeights", {-1, 4 * class_nums}); - ctx->SetOutputDim("MaxOverlapWithGT", {-1}); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "RpnRois"); - return phi::KernelKey(data_type, platform::CPUPlace()); - } -}; - -template -void Concat(const phi::CPUContext& context, - const phi::DenseTensor& in_tensor_a, - const phi::DenseTensor& in_tensor_b, - phi::DenseTensor* out_tensor) { - int axis = 0; - std::vector inputs; - inputs.emplace_back(in_tensor_a); - inputs.emplace_back(in_tensor_b); - math::ConcatFunctor concat_functor; - concat_functor(context, inputs, axis, out_tensor); -} - -template -std::vector> SampleFgBgGt(const phi::CPUContext& context, - phi::DenseTensor* iou, - const phi::DenseTensor& is_crowd, - const int batch_size_per_im, - const float fg_fraction, - const float fg_thresh, - const float bg_thresh_hi, - const float bg_thresh_lo, - std::minstd_rand engine, - const bool use_random, - const bool is_cascade_rcnn, - const phi::DenseTensor& rpn_rois) { - std::vector fg_inds; - std::vector bg_inds; - std::vector mapped_gt_inds; - int64_t gt_num = is_crowd.numel(); - const int* crowd_data = is_crowd.data(); - T* proposal_to_gt_overlaps = iou->data(); - int64_t row = iou->dims()[0]; - int64_t col = iou->dims()[1]; - float epsilon = 0.00001; - // Follow the Faster RCNN's implementation - for (int64_t i = 0; i < row; ++i) { - const T* v = proposal_to_gt_overlaps + i * col; - - T max_overlap = *std::max_element(v, v + col); - if ((i < gt_num) && (crowd_data[i])) { - max_overlap = -1.0; - } - if (max_overlap >= fg_thresh) { - // fg mapped gt label index - for (int64_t j = 0; j < col; ++j) { - T val = proposal_to_gt_overlaps[i * col + j]; - auto diff = std::abs(max_overlap - val); - if (diff < epsilon) { - fg_inds.emplace_back(i); - mapped_gt_inds.emplace_back(j); - break; - } - } - } else if ((max_overlap >= bg_thresh_lo) && (max_overlap < bg_thresh_hi)) { - bg_inds.emplace_back(i); - } else { - continue; - } - } - - std::vector> res; - if (is_cascade_rcnn) { - res.emplace_back(fg_inds); - res.emplace_back(bg_inds); - res.emplace_back(mapped_gt_inds); - } else { - // Reservoir Sampling - // sampling fg - std::uniform_real_distribution uniform(0, 1); - int fg_rois_per_im = std::floor(batch_size_per_im * fg_fraction); // NOLINT - int fg_rois_this_image = static_cast(fg_inds.size()); - int fg_rois_per_this_image = std::min(fg_rois_per_im, fg_rois_this_image); - if (use_random) { - const int64_t fg_size = static_cast(fg_inds.size()); - if (fg_size > fg_rois_per_this_image) { - for (int64_t i = fg_rois_per_this_image; i < fg_size; ++i) { - int rng_ind = std::floor(uniform(engine) * i); // NOLINT - if (rng_ind < fg_rois_per_this_image) { - std::iter_swap(fg_inds.begin() + rng_ind, fg_inds.begin() + i); - std::iter_swap(mapped_gt_inds.begin() + rng_ind, - mapped_gt_inds.begin() + i); - } - } - } - } - std::vector new_fg_inds(fg_inds.begin(), - fg_inds.begin() + fg_rois_per_this_image); - std::vector new_gt_inds( - mapped_gt_inds.begin(), - mapped_gt_inds.begin() + fg_rois_per_this_image); - // sampling bg - int bg_rois_per_image = batch_size_per_im - fg_rois_per_this_image; - int bg_rois_this_image = static_cast(bg_inds.size()); - int bg_rois_per_this_image = - std::min(bg_rois_per_image, bg_rois_this_image); - if (use_random) { - const int64_t bg_size = static_cast(bg_inds.size()); - if (bg_size > bg_rois_per_this_image) { - for (int64_t i = bg_rois_per_this_image; i < bg_size; ++i) { - int rng_ind = std::floor(uniform(engine) * i); // NOLINT - if (rng_ind < fg_rois_per_this_image) - std::iter_swap(bg_inds.begin() + rng_ind, bg_inds.begin() + i); - } - } - } - std::vector new_bg_inds(bg_inds.begin(), - bg_inds.begin() + bg_rois_per_this_image); - // - res.emplace_back(new_fg_inds); - res.emplace_back(new_bg_inds); - res.emplace_back(new_gt_inds); - } - - return res; -} - -template -void GatherBoxesLabels(const phi::CPUContext& context, - const phi::DenseTensor& boxes, - const phi::DenseTensor& max_overlap, - const phi::DenseTensor& gt_boxes, - const phi::DenseTensor& gt_classes, - const std::vector& fg_inds, - const std::vector& bg_inds, - const std::vector& gt_inds, - phi::DenseTensor* sampled_boxes, - phi::DenseTensor* sampled_labels, - phi::DenseTensor* sampled_gts, - phi::DenseTensor* sampled_max_overlap) { - int fg_num = static_cast(fg_inds.size()); - int bg_num = static_cast(bg_inds.size()); - phi::DenseTensor fg_inds_t, bg_inds_t, gt_box_inds_t, gt_label_inds_t; - int* fg_inds_data = fg_inds_t.mutable_data({fg_num}, context.GetPlace()); - int* bg_inds_data = bg_inds_t.mutable_data({bg_num}, context.GetPlace()); - int* gt_box_inds_data = - gt_box_inds_t.mutable_data({fg_num}, context.GetPlace()); - int* gt_label_inds_data = - gt_label_inds_t.mutable_data({fg_num}, context.GetPlace()); - std::copy(fg_inds.begin(), fg_inds.end(), fg_inds_data); - std::copy(bg_inds.begin(), bg_inds.end(), bg_inds_data); - std::copy(gt_inds.begin(), gt_inds.end(), gt_box_inds_data); - std::copy(gt_inds.begin(), gt_inds.end(), gt_label_inds_data); - - phi::DenseTensor fg_boxes, bg_boxes, fg_labels, bg_labels; - fg_boxes.mutable_data({fg_num, kBoxDim}, context.GetPlace()); - phi::funcs::CPUGather(context, boxes, fg_inds_t, &fg_boxes); - bg_boxes.mutable_data({bg_num, kBoxDim}, context.GetPlace()); - phi::funcs::CPUGather(context, boxes, bg_inds_t, &bg_boxes); - Concat(context, fg_boxes, bg_boxes, sampled_boxes); - phi::funcs::CPUGather(context, gt_boxes, gt_box_inds_t, sampled_gts); - fg_labels.mutable_data({fg_num}, context.GetPlace()); - phi::funcs::CPUGather(context, gt_classes, gt_label_inds_t, &fg_labels); - bg_labels.mutable_data({bg_num}, context.GetPlace()); - phi::funcs::set_constant(context, &bg_labels, static_cast(0)); - Concat(context, fg_labels, bg_labels, sampled_labels); - - phi::DenseTensor fg_max_overlap, bg_max_overlap; - fg_max_overlap.mutable_data({fg_num}, context.GetPlace()); - phi::funcs::CPUGather(context, max_overlap, fg_inds_t, &fg_max_overlap); - bg_max_overlap.mutable_data({bg_num}, context.GetPlace()); - phi::funcs::CPUGather(context, max_overlap, bg_inds_t, &bg_max_overlap); - Concat(context, fg_max_overlap, bg_max_overlap, sampled_max_overlap); -} - -template -std::vector SampleRoisForOneImage( - const phi::CPUContext& context, - const phi::DenseTensor& rpn_rois_in, - const phi::DenseTensor& gt_classes, - const phi::DenseTensor& is_crowd, - const phi::DenseTensor& gt_boxes, - const phi::DenseTensor& im_info, - const int batch_size_per_im, - const float fg_fraction, - const float fg_thresh, - const float bg_thresh_hi, - const float bg_thresh_lo, - const std::vector& bbox_reg_weights, - const int class_nums, - std::minstd_rand engine, - bool use_random, - bool is_cascade_rcnn, - bool is_cls_agnostic, - const phi::DenseTensor& max_overlap) { - // 1.1 map to original image - auto im_scale = im_info.data()[2]; - phi::DenseTensor rpn_rois; - rpn_rois.mutable_data(rpn_rois_in.dims(), context.GetPlace()); - const T* rpn_rois_in_dt = rpn_rois_in.data(); - T* rpn_rois_dt = rpn_rois.data(); - - for (int i = 0; i < rpn_rois.numel(); ++i) { - rpn_rois_dt[i] = rpn_rois_in_dt[i] / im_scale; - } - - int proposals_num = 1; - - if (is_cascade_rcnn) { - phi::DenseTensor keep; - FilterRoIs(context, rpn_rois, max_overlap, &keep); - phi::DenseTensor roi_filter; - // phi::DenseTensor box_filter; - if (keep.numel() == 0) { - phi::funcs::SetConstant set_zero; - roi_filter.mutable_data({proposals_num, kBoxDim}, context.GetPlace()); - set_zero(context, &roi_filter, static_cast(0)); - } else { - proposals_num = static_cast(keep.numel()); - roi_filter.mutable_data({proposals_num, kBoxDim}, context.GetPlace()); - phi::funcs::CPUGather(context, rpn_rois, keep, &roi_filter); - } - T* roi_filter_dt = roi_filter.data(); - memcpy(rpn_rois_dt, roi_filter_dt, roi_filter.numel() * sizeof(T)); - rpn_rois.Resize(roi_filter.dims()); - } else { - proposals_num = static_cast(rpn_rois.dims()[0]); - } - // 1.2 compute overlaps - proposals_num += static_cast(gt_boxes.dims()[0]); - - phi::DenseTensor proposal_to_gt_overlaps; - proposal_to_gt_overlaps.mutable_data({proposals_num, gt_boxes.dims()[0]}, - context.GetPlace()); - - phi::DenseTensor boxes; - boxes.mutable_data({proposals_num, kBoxDim}, context.GetPlace()); - Concat(context, gt_boxes, rpn_rois, &boxes); - BboxOverlaps(boxes, gt_boxes, &proposal_to_gt_overlaps); - - phi::DenseTensor proposal_with_max_overlap; - proposal_with_max_overlap.mutable_data({proposals_num}, - context.GetPlace()); - - MaxIoU(proposal_to_gt_overlaps, &proposal_with_max_overlap); - - // Generate proposal index - std::vector> fg_bg_gt = - SampleFgBgGt(context, - &proposal_to_gt_overlaps, - is_crowd, - batch_size_per_im, - fg_fraction, - fg_thresh, - bg_thresh_hi, - bg_thresh_lo, - engine, - use_random, - is_cascade_rcnn, - boxes); - std::vector fg_inds = fg_bg_gt[0]; - std::vector bg_inds = fg_bg_gt[1]; - std::vector mapped_gt_inds = fg_bg_gt[2]; // mapped_gt_labels - - // Gather boxes and labels - phi::DenseTensor sampled_boxes, sampled_labels, sampled_gts, - sampled_max_overlap; - int fg_num = static_cast(fg_inds.size()); - int bg_num = static_cast(bg_inds.size()); - int boxes_num = fg_num + bg_num; - framework::DDim bbox_dim({boxes_num, kBoxDim}); - sampled_boxes.mutable_data(bbox_dim, context.GetPlace()); - sampled_labels.mutable_data({boxes_num}, context.GetPlace()); - sampled_gts.mutable_data({fg_num, kBoxDim}, context.GetPlace()); - sampled_max_overlap.mutable_data({boxes_num}, context.GetPlace()); - GatherBoxesLabels(context, - boxes, - proposal_with_max_overlap, - gt_boxes, - gt_classes, - fg_inds, - bg_inds, - mapped_gt_inds, - &sampled_boxes, - &sampled_labels, - &sampled_gts, - &sampled_max_overlap); - - // Compute targets - phi::DenseTensor bbox_targets_single; - bbox_targets_single.mutable_data(bbox_dim, context.GetPlace()); - BoxToDelta(fg_num, - sampled_boxes, - sampled_gts, - bbox_reg_weights.data(), - false, - &bbox_targets_single); - - // Scale rois - phi::DenseTensor sampled_rois; - sampled_rois.mutable_data(sampled_boxes.dims(), context.GetPlace()); - auto sampled_rois_et = framework::EigenTensor::From(sampled_rois); - auto sampled_boxes_et = framework::EigenTensor::From(sampled_boxes); - sampled_rois_et = sampled_boxes_et * im_scale; - - // Expand box targets - phi::DenseTensor bbox_targets, bbox_inside_weights, bbox_outside_weights; - framework::DDim bbox_expand_dim({boxes_num, kBoxDim * class_nums}); - bbox_targets.mutable_data(bbox_expand_dim, context.GetPlace()); - bbox_inside_weights.mutable_data(bbox_expand_dim, context.GetPlace()); - bbox_outside_weights.mutable_data(bbox_expand_dim, context.GetPlace()); - phi::funcs::set_constant(context, &bbox_targets, static_cast(0.0)); - phi::funcs::set_constant(context, &bbox_inside_weights, static_cast(0.0)); - phi::funcs::set_constant(context, &bbox_outside_weights, static_cast(0.0)); - - auto* bbox_targets_single_data = bbox_targets_single.data(); - auto* sampled_labels_data = sampled_labels.data(); - auto* bbox_targets_data = bbox_targets.data(); - auto* bbox_inside_weights_data = bbox_inside_weights.data(); - auto* bbox_outside_weights_data = bbox_outside_weights.data(); - int width = kBoxDim * class_nums; - for (int64_t i = 0; i < boxes_num; ++i) { - int label = sampled_labels_data[i]; - if (label > 0) { - if (is_cls_agnostic) { - label = 1; - } - int dst_idx = static_cast(i * width + kBoxDim * label); - int src_idx = static_cast(kBoxDim * i); - bbox_targets_data[dst_idx] = bbox_targets_single_data[src_idx]; - bbox_targets_data[dst_idx + 1] = bbox_targets_single_data[src_idx + 1]; - bbox_targets_data[dst_idx + 2] = bbox_targets_single_data[src_idx + 2]; - bbox_targets_data[dst_idx + 3] = bbox_targets_single_data[src_idx + 3]; - bbox_inside_weights_data[dst_idx] = 1; - bbox_inside_weights_data[dst_idx + 1] = 1; - bbox_inside_weights_data[dst_idx + 2] = 1; - bbox_inside_weights_data[dst_idx + 3] = 1; - bbox_outside_weights_data[dst_idx] = 1; - bbox_outside_weights_data[dst_idx + 1] = 1; - bbox_outside_weights_data[dst_idx + 2] = 1; - bbox_outside_weights_data[dst_idx + 3] = 1; - } - } - std::vector res; - res.emplace_back(sampled_rois); - res.emplace_back(sampled_labels); - res.emplace_back(bbox_targets); - res.emplace_back(bbox_inside_weights); - res.emplace_back(bbox_outside_weights); - res.emplace_back(sampled_max_overlap); - return res; -} - -template -class GenerateProposalLabelsKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* rpn_rois = context.Input("RpnRois"); - auto* gt_classes = context.Input("GtClasses"); - auto* is_crowd = context.Input("IsCrowd"); - auto* gt_boxes = context.Input("GtBoxes"); - auto* im_info = context.Input("ImInfo"); - - auto* rois = context.Output("Rois"); - auto* labels_int32 = context.Output("LabelsInt32"); - auto* bbox_targets = context.Output("BboxTargets"); - auto* bbox_inside_weights = - context.Output("BboxInsideWeights"); - auto* bbox_outside_weights = - context.Output("BboxOutsideWeights"); - auto* max_overlap_with_gt = - context.Output("MaxOverlapWithGT"); - - int batch_size_per_im = context.Attr("batch_size_per_im"); - float fg_fraction = context.Attr("fg_fraction"); - float fg_thresh = context.Attr("fg_thresh"); - float bg_thresh_hi = context.Attr("bg_thresh_hi"); - float bg_thresh_lo = context.Attr("bg_thresh_lo"); - std::vector bbox_reg_weights = - context.Attr>("bbox_reg_weights"); - int class_nums = context.Attr("class_nums"); - bool use_random = context.Attr("use_random"); - bool is_cascade_rcnn = context.Attr("is_cascade_rcnn"); - bool is_cls_agnostic = context.Attr("is_cls_agnostic"); - PADDLE_ENFORCE_EQ( - rpn_rois->lod().size(), - 1UL, - phi::errors::InvalidArgument( - "GenerateProposalLabelsOp rpn_rois needs 1 level of LoD. But " - "received level of LoD is [%d], LoD is [%s].", - rpn_rois->lod().size(), - rpn_rois->lod())); - PADDLE_ENFORCE_EQ( - gt_classes->lod().size(), - 1UL, - phi::errors::InvalidArgument( - "GenerateProposalLabelsOp gt_classes needs 1 level of LoD. But " - "received level of LoD is [%d], LoD is [%s].", - gt_classes->lod().size(), - gt_classes->lod())); - PADDLE_ENFORCE_EQ( - is_crowd->lod().size(), - 1UL, - phi::errors::InvalidArgument( - "GenerateProposalLabelsOp is_crowd needs 1 level of LoD. But " - "received level of LoD is [%d], LoD is [%s].", - is_crowd->lod().size(), - is_crowd->lod())); - PADDLE_ENFORCE_EQ( - gt_boxes->lod().size(), - 1UL, - phi::errors::InvalidArgument( - "GenerateProposalLabelsOp gt_boxes needs 1 level of LoD. But " - "received level of LoD is [%d], LoD is [%s].", - gt_boxes->lod().size(), - gt_boxes->lod())); - int64_t n = static_cast(rpn_rois->lod().back().size() - 1); - int64_t rois_num = rpn_rois->dims()[0]; - int64_t gts_num = gt_boxes->dims()[0]; - int64_t init_num = - is_cascade_rcnn ? rois_num + gts_num : n * batch_size_per_im; - - rois->mutable_data({init_num, kBoxDim}, context.GetPlace()); - labels_int32->mutable_data({init_num, 1}, context.GetPlace()); - bbox_targets->mutable_data({init_num, kBoxDim * class_nums}, - context.GetPlace()); - bbox_inside_weights->mutable_data({init_num, kBoxDim * class_nums}, - context.GetPlace()); - bbox_outside_weights->mutable_data({init_num, kBoxDim * class_nums}, - context.GetPlace()); - max_overlap_with_gt->Resize({init_num}); - max_overlap_with_gt->mutable_data(context.GetPlace()); - - std::random_device rnd; - std::minstd_rand engine; - int seed = static_cast(rnd()); - engine.seed(seed); - - framework::LoD lod; - std::vector lod0(1, 0); - - int64_t num_rois = 0; - auto& dev_ctx = context.device_context(); - - auto rpn_rois_lod = rpn_rois->lod().back(); - auto gt_classes_lod = gt_classes->lod().back(); - auto is_crowd_lod = is_crowd->lod().back(); - auto gt_boxes_lod = gt_boxes->lod().back(); - for (int i = 0; i < n; ++i) { - if (rpn_rois_lod[i] == rpn_rois_lod[i + 1]) { - lod0.emplace_back(num_rois); - continue; - } - phi::DenseTensor rpn_rois_slice = - rpn_rois->Slice(static_cast(rpn_rois_lod[i]), - static_cast(rpn_rois_lod[i + 1])); - phi::DenseTensor gt_classes_slice = - gt_classes->Slice(static_cast(gt_classes_lod[i]), - static_cast(gt_classes_lod[i + 1])); - phi::DenseTensor is_crowd_slice = - is_crowd->Slice(static_cast(is_crowd_lod[i]), - static_cast(is_crowd_lod[i + 1])); - phi::DenseTensor gt_boxes_slice = - gt_boxes->Slice(static_cast(gt_boxes_lod[i]), - static_cast(gt_boxes_lod[i + 1])); - phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1); - phi::DenseTensor max_overlap_slice; - if (is_cascade_rcnn) { - auto* max_overlap = context.Input("MaxOverlap"); - max_overlap_slice = - max_overlap->Slice(static_cast(rpn_rois_lod[i]), - static_cast(rpn_rois_lod[i + 1])); - } else { - max_overlap_slice.mutable_data({rpn_rois_slice.dims()[0]}, - context.GetPlace()); - } - std::vector tensor_output = - SampleRoisForOneImage(dev_ctx, - rpn_rois_slice, - gt_classes_slice, - is_crowd_slice, - gt_boxes_slice, - im_info_slice, - batch_size_per_im, - fg_fraction, - fg_thresh, - bg_thresh_hi, - bg_thresh_lo, - bbox_reg_weights, - class_nums, - engine, - use_random, - is_cascade_rcnn, - is_cls_agnostic, - max_overlap_slice); - phi::DenseTensor sampled_rois = tensor_output[0]; - phi::DenseTensor sampled_labels_int32 = tensor_output[1]; - phi::DenseTensor sampled_bbox_targets = tensor_output[2]; - phi::DenseTensor sampled_bbox_inside_weights = tensor_output[3]; - phi::DenseTensor sampled_bbox_outside_weights = tensor_output[4]; - phi::DenseTensor sampled_max_overlap = tensor_output[5]; - - AppendRois(rois, kBoxDim * num_rois, &sampled_rois); - AppendRois(labels_int32, num_rois, &sampled_labels_int32); - int64_t offset = kBoxDim * num_rois * class_nums; - AppendRois(bbox_targets, offset, &sampled_bbox_targets); - AppendRois(bbox_inside_weights, offset, &sampled_bbox_inside_weights); - AppendRois( - bbox_outside_weights, offset, &sampled_bbox_outside_weights); - AppendRois(max_overlap_with_gt, num_rois, &sampled_max_overlap); - - num_rois += sampled_rois.dims()[0]; - lod0.emplace_back(num_rois); - } - - lod.emplace_back(lod0); - rois->set_lod(lod); - labels_int32->set_lod(lod); - bbox_targets->set_lod(lod); - bbox_inside_weights->set_lod(lod); - bbox_outside_weights->set_lod(lod); - rois->Resize({num_rois, kBoxDim}); - labels_int32->Resize({num_rois, 1}); - bbox_targets->Resize({num_rois, kBoxDim * class_nums}); - bbox_inside_weights->Resize({num_rois, kBoxDim * class_nums}); - bbox_outside_weights->Resize({num_rois, kBoxDim * class_nums}); - max_overlap_with_gt->Resize({num_rois}); - max_overlap_with_gt->set_lod(lod); - } -}; - -class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput( - "RpnRois", - "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape " - "[N, 4]. " - "N is the number of the GenerateProposalOp's output, " - "each element is a bounding box with [xmin, ymin, xmax, ymax] format."); - AddInput("GtClasses", - "(phi::DenseTensor), This input is a 2D phi::DenseTensor with " - "shape [M, 1]. " - "M is the number of groundtruth, " - "each element is a class label of groundtruth."); - AddInput( - "IsCrowd", - "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape " - "[M, 1]. " - "M is the number of groundtruth, " - "each element is a flag indicates whether a groundtruth is crowd."); - AddInput( - "GtBoxes", - "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape " - "[M, 4]. " - "M is the number of groundtruth, " - "each element is a bounding box with [xmin, ymin, xmax, ymax] format."); - AddInput("ImInfo", - "(Tensor), This input is a 2D Tensor with shape [B, 3]. " - "B is the number of input images, " - "each element consists of im_height, im_width, im_scale."); - AddInput("MaxOverlap", - "(phi::DenseTensor), This input is a 1D phi::DenseTensor with " - "shape [N]." - "N is the number of Input(RpnRois), " - "each element is the maximum overlap between " - "the proposal RoI and ground-truth.") - .AsDispensable(); - - AddOutput( - "Rois", - "(phi::DenseTensor), This output is a 2D phi::DenseTensor with shape " - "[P, 4]. " - "P usuall equal to batch_size_per_im * batch_size, " - "each element is a bounding box with [xmin, ymin, xmax, ymax] format."); - AddOutput("LabelsInt32", - "(phi::DenseTensor), This output is a 2D phi::DenseTensor with " - "shape [P, 1], " - "each element represents a class label of a roi"); - AddOutput("BboxTargets", - "(phi::DenseTensor), This output is a 2D phi::DenseTensor with " - "shape [P, 4 * " - "class_nums], " - "each element represents a box label of a roi"); - AddOutput( - "BboxInsideWeights", - "(phi::DenseTensor), This output is a 2D phi::DenseTensor with shape " - "[P, 4 * " - "class_nums], " - "each element indicates whether a box should contribute to loss."); - AddOutput( - "BboxOutsideWeights", - "(phi::DenseTensor), This output is a 2D phi::DenseTensor with shape " - "[P, 4 * " - "class_nums], " - "each element indicates whether a box should contribute to loss."); - AddOutput("MaxOverlapWithGT", - "(phi::DenseTensor), This output is a 1D phi::DenseTensor with " - "shape [P], " - "each element indicates the maxoverlap " - "between output RoIs and ground-truth. " - "The output RoIs may include ground-truth " - "and the output maxoverlap may contain 1."); - - AddAttr("batch_size_per_im", "Batch size of rois per images."); - AddAttr("fg_fraction", - "Foreground fraction in total batch_size_per_im."); - AddAttr( - "fg_thresh", - "Overlap threshold which is used to chose foreground sample."); - AddAttr("bg_thresh_hi", - "Overlap threshold upper bound which is used to chose " - "background sample."); - AddAttr("bg_thresh_lo", - "Overlap threshold lower bound which is used to chose " - "background sample."); - AddAttr>("bbox_reg_weights", "Box regression weights."); - AddAttr("class_nums", "Class number."); - AddAttr( - "use_random", - "Use random sampling to choose foreground and background boxes.") - .SetDefault(true); - AddAttr("is_cascade_rcnn", - "cascade rcnn sampling policy changed from stage 2.") - .SetDefault(false); - AddAttr( - "is_cls_agnostic", - "the box regress will only include fg and bg locations if set true ") - .SetDefault(false); - - AddComment(R"DOC( -This operator can be, for given the GenerateProposalOp output bounding boxes and groundtruth, -to sample foreground boxes and background boxes, and compute loss target. - -RpnRois is the output boxes of RPN and was processed by generate_proposal_op, these boxes -were combined with groundtruth boxes and sampled according to batch_size_per_im and fg_fraction, -If an instance with a groundtruth overlap greater than fg_thresh, then it was considered as a foreground sample. -If an instance with a groundtruth overlap greater than bg_thresh_lo and lower than bg_thresh_hi, -then it was considered as a background sample. -After all foreground and background boxes are chosen (so called Rois), -then we apply random sampling to make sure -the number of foreground boxes is no more than batch_size_per_im * fg_fraction. - -For each box in Rois, we assign the classification (class label) and regression targets (box label) to it. -Finally BboxInsideWeights and BboxOutsideWeights are used to specify whether it would contribute to training loss. - )DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - generate_proposal_labels, - ops::GenerateProposalLabelsOp, - ops::GenerateProposalLabelsOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); -PD_REGISTER_STRUCT_KERNEL(generate_proposal_labels, - CPU, - ALL_LAYOUT, - ops::GenerateProposalLabelsKernel, - float, - double) {} - -REGISTER_OP_VERSION(generate_proposal_labels) - .AddCheckpoint( - R"ROC( - Upgrade of output [MaxOverlapWithGT])ROC", - paddle::framework::compatible::OpVersionDesc().NewOutput( - "MaxOverlapWithGT", - "The maxoverlap between output RoIs and ground-truth.")) - .AddCheckpoint( - R"ROC( - Upgrade generate_proposal_labels add a new input [MaxOverlap])ROC", - paddle::framework::compatible::OpVersionDesc().NewInput( - "MaxOverlap", "MaxOverlap is dispensable.")); diff --git a/paddle/fluid/operators/detection/mask_util.cc b/paddle/fluid/operators/detection/mask_util.cc deleted file mode 100644 index 5b4dc92f4f6af..0000000000000 --- a/paddle/fluid/operators/detection/mask_util.cc +++ /dev/null @@ -1,242 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/detection/mask_util.h" - -#include -#include - -#include "paddle/fluid/memory/memory.h" - -namespace paddle { -namespace operators { - -uint32_t UMax(uint32_t a, uint32_t b) { return (a > b) ? a : b; } - -static inline int Compare(const void* a, const void* b) { - uint32_t c = *(reinterpret_cast(a)); - uint32_t d = *(reinterpret_cast(b)); - return c > d ? 1 : c < d ? -1 : 0; -} - -void Decode(const uint32_t* cnts, int m, uint8_t* mask) { - uint8_t v = 0; - for (int j = 0; j < m; j++) { - for (uint32_t k = 0; k < cnts[j]; k++) { - *(mask++) = v; - } - v = !v; - } -} - -typedef uint32_t uint; -void Poly2Mask(const float* xy, int k, int h, int w, uint8_t* mask) { - int j = 0, m = 0; - double scale = 5; - int *x = nullptr, *y = nullptr, *u = nullptr, *v = nullptr; - uint *a = nullptr, *b = nullptr; - platform::CPUPlace cpu; - auto xptr = memory::Alloc(cpu, sizeof(int) * (k + 1) * 2); - x = reinterpret_cast(xptr->ptr()); - y = x + (k + 1); - - for (j = 0; j < k; j++) - x[j] = static_cast(std::lround(scale * xy[j * 2 + 0])); - x[k] = x[0]; - for (j = 0; j < k; j++) - y[j] = static_cast(std::lround(scale * xy[j * 2 + 1])); - y[k] = y[0]; - for (j = 0; j < k; j++) { - m += static_cast(UMax(abs(x[j] - x[j + 1]), abs(y[j] - y[j + 1])) + 1); - } - auto vptr = memory::Alloc(cpu, sizeof(int) * m * 2); - u = reinterpret_cast(vptr->ptr()); - v = u + m; - m = 0; - for (j = 0; j < k; j++) { - int xs = x[j], xe = x[j + 1], ys = y[j], ye = y[j + 1], dx = 0, dy = 0, - t = 0, d = 0; - int flip = 0; - double s = NAN; - dx = abs(xe - xs); - dy = abs(ys - ye); - flip = (dx >= dy && xs > xe) || (dx < dy && ys > ye); - if (flip) { - t = xs; - xs = xe; - xe = t; - t = ys; - ys = ye; - ye = t; - } - if (dx >= dy) { - s = dx == 0 ? 0 : static_cast(ye - ys) / dx; - for (d = 0; d <= dx; d++) { - t = flip ? dx - d : d; - u[m] = t + xs; - v[m] = static_cast(std::lround(ys + s * t)); - m++; - } - } else { - s = dy == 0 ? 0 : static_cast(xe - xs) / dy; - for (d = 0; d <= dy; d++) { - t = flip ? dy - d : d; - v[m] = t + ys; - u[m] = static_cast(std::lround(xs + s * t)); - m++; - } - } - } - /* get points along y-boundary and downsample */ - k = m; - m = 0; - double xd = NAN, yd = NAN; - auto xyptr = memory::Alloc(cpu, sizeof(int) * k * 2); - x = reinterpret_cast(xyptr->ptr()); - y = x + k; - for (j = 1; j < k; j++) { - if (u[j] != u[j - 1]) { - xd = static_cast(u[j] < u[j - 1] ? u[j] : u[j] - 1); - xd = (xd + .5) / scale - .5; - if (floor(xd) != xd || xd < 0 || xd > w - 1) continue; - yd = static_cast(v[j] < v[j - 1] ? v[j] : v[j - 1]); - yd = (yd + .5) / scale - .5; - if (yd < 0) - yd = 0; - else if (yd > h) - yd = h; - yd = ceil(yd); - x[m] = static_cast(xd); - y[m] = static_cast(yd); - m++; - } - } - /* compute rle encoding given y-boundary points */ - k = m; - auto aptr = memory::Alloc(cpu, sizeof(uint) * (k + 1)); - a = reinterpret_cast(aptr->ptr()); - for (j = 0; j < k; j++) a[j] = static_cast(x[j] * h + y[j]); - a[k++] = static_cast(h * w); - - qsort(a, k, sizeof(uint), Compare); - uint p = 0; - for (j = 0; j < k; j++) { - uint t = a[j]; - a[j] -= p; - p = t; - } - auto bptr = memory::Alloc(cpu, sizeof(uint32_t) * k); - b = reinterpret_cast(bptr->ptr()); - j = m = 0; - b[m++] = a[j++]; - while (j < k) { - if (a[j] > 0) { - b[m++] = a[j++]; - } else { - j++; - if (j < k) b[m - 1] += a[j++]; - } - } - - // convert to mask - auto mskptr = memory::Alloc(cpu, sizeof(uint8_t) * h * w); - uint8_t* msk = reinterpret_cast(mskptr->ptr()); - Decode(b, m, msk); - - for (int ii = 0; ii < h; ++ii) { - for (int jj = 0; jj < w; ++jj) { - mask[ii * w + jj] = msk[jj * h + ii]; - } - } -} - -void Poly2Boxes(const std::vector>>& polys, - float* boxes) { - // lists - for (size_t i = 0; i < polys.size(); ++i) { - float x0 = std::numeric_limits::max(); - float x1 = std::numeric_limits::min(); - float y0 = std::numeric_limits::max(); - float y1 = std::numeric_limits::min(); - // each list may have more than one polys - for (const auto& item : polys[i]) { - for (size_t k = 0; k < item.size() / 2; ++k) { - x0 = std::min(x0, item[2 * k]); - x1 = std::max(x1, item[2 * k]); - y0 = std::min(y0, item[2 * k + 1]); - y1 = std::max(y1, item[2 * k + 1]); - } - } - boxes[i * 4] = x0; - boxes[i * 4 + 1] = y0; - boxes[i * 4 + 2] = x1; - boxes[i * 4 + 3] = y1; - } -} - -void Polys2MaskWrtBox(const std::vector>& polygons, - const float* box, - int M, - uint8_t* mask) { - float w = box[2] - box[0]; - float h = box[3] - box[1]; - w = std::max(w, static_cast(1.)); - h = std::max(h, static_cast(1.)); - - // short-circuit for case "polygons.size() == 1" - if (polygons.size() == 1UL) { - int k = static_cast(polygons[0].size() / 2); - std::vector p; - for (int j = 0; j < k; ++j) { - float pw = (polygons[0][2 * j] - box[0]) * M / w; // NOLINT - float ph = (polygons[0][2 * j + 1] - box[1]) * M / h; // NOLINT - p.push_back(pw); - p.push_back(ph); - } - Poly2Mask(p.data(), k, M, M, mask); - - return; - } - - uint8_t* msk = reinterpret_cast( - malloc(M * M * polygons.size() * sizeof(uint8_t))); // NOLINT - - for (size_t i = 0; i < polygons.size(); ++i) { - int k = static_cast(polygons[i].size() / 2); - std::vector p; - for (int j = 0; j < k; ++j) { - float pw = (polygons[i][2 * j] - box[0]) * M / w; // NOLINT - float ph = (polygons[i][2 * j + 1] - box[1]) * M / h; // NOLINT - p.push_back(pw); - p.push_back(ph); - } - uint8_t* msk_i = msk + i * M * M; - Poly2Mask(p.data(), k, M, M, msk_i); - } - - for (size_t i = 0; i < polygons.size(); ++i) { - uint8_t* msk_i = msk + i * M * M; - for (int j = 0; j < M * M; ++j) { - if (i == 0) { - mask[j] = msk_i[j]; - } else { - mask[j] = (mask[j] + msk_i[j]) > 0 ? 1 : 0; - } - } - } - free(msk); // NOLINT -} - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/detection/mask_util.h b/paddle/fluid/operators/detection/mask_util.h deleted file mode 100644 index 587a9c53794de..0000000000000 --- a/paddle/fluid/operators/detection/mask_util.h +++ /dev/null @@ -1,35 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include - -#include - -#include "paddle/utils/test_macros.h" - -namespace paddle { -namespace operators { - -TEST_API void Poly2Mask(const float* ploy, int k, int h, int w, uint8_t* mask); - -TEST_API void Poly2Boxes( - const std::vector>>& polys, float* boxes); - -TEST_API void Polys2MaskWrtBox(const std::vector>& polygons, - const float* box, - int M, - uint8_t* mask); -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/detection/retinanet_detection_output_op.cc b/paddle/fluid/operators/detection/retinanet_detection_output_op.cc deleted file mode 100644 index f43c7ec644a76..0000000000000 --- a/paddle/fluid/operators/detection/retinanet_detection_output_op.cc +++ /dev/null @@ -1,676 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -limitations under the License. */ - -#include - -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -class RetinanetDetectionOutputOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_GE( - ctx->Inputs("BBoxes").size(), - 1UL, - phi::errors::InvalidArgument("The length of Input(BBoxes) should " - "be greater than 0, but received " - "BBoxes length is:%d.", - ctx->Inputs("BBoxes").size())); - PADDLE_ENFORCE_GE( - ctx->Inputs("Scores").size(), - 1UL, - phi::errors::InvalidArgument("The length of Input(Scores) should " - "be greater than 0, but received " - "Scores length is:%d.", - ctx->Inputs("Scores").size())); - PADDLE_ENFORCE_GE( - ctx->Inputs("Anchors").size(), - 1UL, - phi::errors::InvalidArgument("The length of Input(Anchors) should " - "be greater than 0, but received " - "Anchors length is:%d.", - ctx->Inputs("Anchors").size())); - PADDLE_ENFORCE_EQ( - ctx->Inputs("BBoxes").size(), - ctx->Inputs("Scores").size(), - phi::errors::InvalidArgument( - "Input(BBoxes) and Input(Scores) should have the same length, but " - "received BBoxes length is:%d, Scores length is:%d.", - ctx->Inputs("BBoxes").size(), - ctx->Inputs("Scores").size())); - PADDLE_ENFORCE_EQ( - ctx->Inputs("BBoxes").size(), - ctx->Inputs("Anchors").size(), - phi::errors::InvalidArgument( - "Input(BBoxes) and Input(Anchors) should have the same length, but " - "received BBoxes length is:%d, Anchors length is:%d.", - ctx->Inputs("BBoxes").size(), - ctx->Inputs("Anchors").size())); - OP_INOUT_CHECK(ctx->HasInput("ImInfo"), - "Input", - "ImInfo", - "retinanet_detection_output"); - OP_INOUT_CHECK( - ctx->HasOutput("Out"), "Output", "Out", "retinanet_detection_output"); - - auto bboxes_dims = ctx->GetInputsDim("BBoxes"); - auto scores_dims = ctx->GetInputsDim("Scores"); - auto anchors_dims = ctx->GetInputsDim("Anchors"); - auto im_info_dims = ctx->GetInputDim("ImInfo"); - - const size_t b_n = bboxes_dims.size(); - PADDLE_ENFORCE_GT( - b_n, - 0, - phi::errors::InvalidArgument("The number of Variables in Input(BBoxes) " - "should be greater than 0, " - "but received number is:%d.", - b_n)); - const size_t s_n = scores_dims.size(); - PADDLE_ENFORCE_GT( - s_n, - 0, - phi::errors::InvalidArgument("The number of Variables in Input(Scores) " - "should be greater than 0, " - "but received number is:%d.", - s_n)); - const size_t a_n = anchors_dims.size(); - PADDLE_ENFORCE_GT(a_n, - 0, - phi::errors::InvalidArgument( - "The number of Variables in Input(Anchors) " - "should be greater than 0, " - "but received number is:%d.", - a_n)); - auto bbox_dims = bboxes_dims[0]; - auto score_dims = scores_dims[0]; - auto anchor_dims = anchors_dims[0]; - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ( - score_dims.size(), - 3, - phi::errors::InvalidArgument( - "The rank of each Variable in Input(Scores) must be 3, " - "but received rank is:%d.", - score_dims.size())); - PADDLE_ENFORCE_EQ( - bbox_dims.size(), - 3, - phi::errors::InvalidArgument( - "The rank of each Variable in Input(BBoxes) must be 3, " - "but received rank is:%d.", - bbox_dims.size())); - PADDLE_ENFORCE_EQ( - anchor_dims.size(), - 2, - phi::errors::InvalidArgument( - "The rank of each Variable in Input(Anchors) must be 2, " - "but received rank is:%d.", - anchor_dims.size())); - PADDLE_ENFORCE_EQ( - bbox_dims[2], - 4, - phi::errors::InvalidArgument( - "The last dimension of each Variable in Input(BBoxes) must be 4 " - "representing the layout of coordinate [xmin, ymin, xmax, ymax], " - "but received dimension is:%d.", - bbox_dims[2])); - PADDLE_ENFORCE_EQ(bbox_dims[1], - score_dims[1], - phi::errors::InvalidArgument( - "The 2nd dimension of Variables in Input(BBoxes) " - "and Input(Scores) " - "must be same, which represents the number of the " - "predicted boxes, " - "but received BBoxes 2nd dimension is:%d, Scores " - "2nd dimension is:%d.", - bbox_dims[1], - score_dims[1])); - PADDLE_ENFORCE_EQ( - anchor_dims[0], - bbox_dims[1], - phi::errors::InvalidArgument( - "The 1st dimension of each Variables in Input(Anchors) must be " - "equal " - "to the 2nd dimension of corresponding Variables in " - "Input(BBoxes), " - "which represents the number of the predicted boxes, but " - "received " - "Anchors 1st dimension is:%d, BBoxes 2nd dimension is:%d.", - anchor_dims[0], - bbox_dims[1])); - PADDLE_ENFORCE_EQ(im_info_dims.size(), - 2, - phi::errors::InvalidArgument( - "The rank of Input(ImInfo) must be 2, but " - "received ImInfo rank is:%d.", - im_info_dims.size())); - } - // Here the box_dims[0] is not the real dimension of output. - // It will be rewritten in the computing kernel. - ctx->SetOutputDim("Out", {bbox_dims[1], bbox_dims[2] + 2}); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - auto input_data_type = - OperatorWithKernel::IndicateVarDataType(ctx, "Scores"); - return phi::KernelKey(input_data_type, - platform::CPUPlace()); // ctx.GetPlace()); - } -}; - -template -bool SortScorePairDescend(const std::pair& pair1, - const std::pair& pair2) { - return pair1.first > pair2.first; -} - -template -bool SortScoreTwoPairDescend(const std::pair>& pair1, - const std::pair>& pair2) { - return pair1.first > pair2.first; -} - -template -static inline void GetMaxScoreIndex( - const std::vector& scores, - const T threshold, - int top_k, - std::vector>* sorted_indices) { - for (size_t i = 0; i < scores.size(); ++i) { - if (scores[i] > threshold) { - sorted_indices->push_back(std::make_pair(scores[i], i)); - } - } - // Sort the score pair according to the scores in descending order - std::stable_sort(sorted_indices->begin(), - sorted_indices->end(), - SortScorePairDescend); - // Keep top_k scores if needed. - if (top_k > -1 && top_k < static_cast(sorted_indices->size())) { - sorted_indices->resize(top_k); - } -} - -template -static inline T BBoxArea(const std::vector& box, const bool normalized) { - if (box[2] < box[0] || box[3] < box[1]) { - // If coordinate values are is invalid - // (e.g. xmax < xmin or ymax < ymin), return 0. - return static_cast(0.); - } else { - const T w = box[2] - box[0]; - const T h = box[3] - box[1]; - if (normalized) { - return w * h; - } else { - // If coordinate values are not within range [0, 1]. - return (w + 1) * (h + 1); - } - } -} - -template -static inline T JaccardOverlap(const std::vector& box1, - const std::vector& box2, - const bool normalized) { - if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] || - box2[3] < box1[1]) { - return static_cast(0.); - } else { - const T inter_xmin = std::max(box1[0], box2[0]); - const T inter_ymin = std::max(box1[1], box2[1]); - const T inter_xmax = std::min(box1[2], box2[2]); - const T inter_ymax = std::min(box1[3], box2[3]); - T norm = normalized ? static_cast(0.) : static_cast(1.); - T inter_w = inter_xmax - inter_xmin + norm; - T inter_h = inter_ymax - inter_ymin + norm; - const T inter_area = inter_w * inter_h; - const T bbox1_area = BBoxArea(box1, normalized); - const T bbox2_area = BBoxArea(box2, normalized); - return inter_area / (bbox1_area + bbox2_area - inter_area); - } -} - -template -class RetinanetDetectionOutputKernel : public framework::OpKernel { - public: - void NMSFast(const std::vector>& cls_dets, - const T nms_threshold, - const T eta, - std::vector* selected_indices) const { - int64_t num_boxes = cls_dets.size(); - std::vector> sorted_indices; - for (int64_t i = 0; i < num_boxes; ++i) { - sorted_indices.push_back(std::make_pair(cls_dets[i][4], i)); - } - // Sort the score pair according to the scores in descending order - std::stable_sort(sorted_indices.begin(), - sorted_indices.end(), - SortScorePairDescend); - selected_indices->clear(); - T adaptive_threshold = nms_threshold; - - while (!sorted_indices.empty()) { - const int idx = sorted_indices.front().second; - bool keep = true; - for (const auto kept_idx : *selected_indices) { - if (keep) { - T overlap = T(0.); - overlap = JaccardOverlap(cls_dets[idx], cls_dets[kept_idx], false); - keep = overlap <= adaptive_threshold; - } else { - break; - } - } - if (keep) { - selected_indices->push_back(idx); - } - sorted_indices.erase(sorted_indices.begin()); - if (keep && eta < 1 && adaptive_threshold > 0.5) { - adaptive_threshold *= eta; - } - } - } - - void DeltaScoreToPrediction( - const std::vector& bboxes_data, - const std::vector& anchors_data, - T im_height, - T im_width, - T im_scale, - int class_num, - const std::vector>& sorted_indices, - std::map>>* preds) const { - im_height = static_cast(round(im_height / im_scale)); - im_width = static_cast(round(im_width / im_scale)); - T zero(0); - int i = 0; - for (const auto& it : sorted_indices) { - T score = it.first; - int idx = it.second; - int a = idx / class_num; - int c = idx % class_num; - - int box_offset = a * 4; - T anchor_box_width = - anchors_data[box_offset + 2] - anchors_data[box_offset] + 1; - T anchor_box_height = - anchors_data[box_offset + 3] - anchors_data[box_offset + 1] + 1; - T anchor_box_center_x = anchors_data[box_offset] + anchor_box_width / 2; - T anchor_box_center_y = - anchors_data[box_offset + 1] + anchor_box_height / 2; - T target_box_center_x = 0, target_box_center_y = 0; - T target_box_width = 0, target_box_height = 0; - target_box_center_x = - bboxes_data[box_offset] * anchor_box_width + anchor_box_center_x; - target_box_center_y = - bboxes_data[box_offset + 1] * anchor_box_height + anchor_box_center_y; - target_box_width = - std::exp(bboxes_data[box_offset + 2]) * anchor_box_width; - target_box_height = - std::exp(bboxes_data[box_offset + 3]) * anchor_box_height; - T pred_box_xmin = target_box_center_x - target_box_width / 2; - T pred_box_ymin = target_box_center_y - target_box_height / 2; - T pred_box_xmax = target_box_center_x + target_box_width / 2 - 1; - T pred_box_ymax = target_box_center_y + target_box_height / 2 - 1; - pred_box_xmin = pred_box_xmin / im_scale; - pred_box_ymin = pred_box_ymin / im_scale; - pred_box_xmax = pred_box_xmax / im_scale; - pred_box_ymax = pred_box_ymax / im_scale; - - pred_box_xmin = std::max(std::min(pred_box_xmin, im_width - 1), zero); - pred_box_ymin = std::max(std::min(pred_box_ymin, im_height - 1), zero); - pred_box_xmax = std::max(std::min(pred_box_xmax, im_width - 1), zero); - pred_box_ymax = std::max(std::min(pred_box_ymax, im_height - 1), zero); - - std::vector one_pred; - one_pred.push_back(pred_box_xmin); - one_pred.push_back(pred_box_ymin); - one_pred.push_back(pred_box_xmax); - one_pred.push_back(pred_box_ymax); - one_pred.push_back(score); - (*preds)[c].push_back(one_pred); - i++; - } - } - - void MultiClassNMS(const std::map>>& preds, - int class_num, - const int keep_top_k, - const T nms_threshold, - const T nms_eta, - std::vector>* nmsed_out, - int* num_nmsed_out) const { - std::map> indices; - int num_det = 0; - for (int c = 0; c < class_num; ++c) { - if (static_cast(preds.count(c))) { - const std::vector> cls_dets = preds.at(c); - NMSFast(cls_dets, nms_threshold, nms_eta, &(indices[c])); - num_det += static_cast(indices[c].size()); - } - } - - std::vector>> score_index_pairs; - for (const auto& it : indices) { - int label = it.first; - const std::vector& label_indices = it.second; - for (auto idx : label_indices) { - score_index_pairs.push_back(std::make_pair(preds.at(label)[idx][4], - std::make_pair(label, idx))); - } - } - // Keep top k results per image. - std::stable_sort(score_index_pairs.begin(), - score_index_pairs.end(), - SortScoreTwoPairDescend); - if (num_det > keep_top_k) { - score_index_pairs.resize(keep_top_k); - } - - // Store the new indices. - std::map> new_indices; - for (const auto& it : score_index_pairs) { - int label = it.second.first; - int idx = it.second.second; - std::vector one_pred; - one_pred.push_back(label); - one_pred.push_back(preds.at(label)[idx][4]); - one_pred.push_back(preds.at(label)[idx][0]); - one_pred.push_back(preds.at(label)[idx][1]); - one_pred.push_back(preds.at(label)[idx][2]); - one_pred.push_back(preds.at(label)[idx][3]); - nmsed_out->push_back(one_pred); - } - - *num_nmsed_out = (num_det > keep_top_k ? keep_top_k : num_det); - } - - void RetinanetDetectionOutput(const framework::ExecutionContext& ctx, - const std::vector& scores, - const std::vector& bboxes, - const std::vector& anchors, - const phi::DenseTensor& im_info, - std::vector>* nmsed_out, - int* num_nmsed_out) const { - int64_t nms_top_k = ctx.Attr("nms_top_k"); - int64_t keep_top_k = ctx.Attr("keep_top_k"); - T nms_threshold = static_cast(ctx.Attr("nms_threshold")); - T nms_eta = static_cast(ctx.Attr("nms_eta")); - T score_threshold = static_cast(ctx.Attr("score_threshold")); - - int64_t class_num = scores[0].dims()[1]; - std::map>> preds; - for (size_t l = 0; l < scores.size(); ++l) { - // Fetch per level score - phi::DenseTensor scores_per_level = scores[l]; - // Fetch per level bbox - phi::DenseTensor bboxes_per_level = bboxes[l]; - // Fetch per level anchor - phi::DenseTensor anchors_per_level = anchors[l]; - - int64_t scores_num = scores_per_level.numel(); - int64_t bboxes_num = bboxes_per_level.numel(); - std::vector scores_data(scores_num); - std::vector bboxes_data(bboxes_num); - std::vector anchors_data(bboxes_num); - std::copy_n(scores_per_level.data(), scores_num, scores_data.begin()); - std::copy_n(bboxes_per_level.data(), bboxes_num, bboxes_data.begin()); - std::copy_n( - anchors_per_level.data(), bboxes_num, anchors_data.begin()); - std::vector> sorted_indices; - - // For the highest level, we take the threshold 0.0 - T threshold = (l < (scores.size() - 1) ? score_threshold : 0.0); - GetMaxScoreIndex(scores_data, threshold, nms_top_k, &sorted_indices); - auto* im_info_data = im_info.data(); - auto im_height = im_info_data[0]; - auto im_width = im_info_data[1]; - auto im_scale = im_info_data[2]; - DeltaScoreToPrediction(bboxes_data, - anchors_data, - im_height, - im_width, - im_scale, - class_num, - sorted_indices, - &preds); - } - - MultiClassNMS(preds, - class_num, - keep_top_k, - nms_threshold, - nms_eta, - nmsed_out, - num_nmsed_out); - } - - void MultiClassOutput(const platform::DeviceContext& ctx, - const std::vector>& nmsed_out, - phi::DenseTensor* outs) const { - auto* odata = outs->data(); - int count = 0; - int64_t out_dim = 6; - for (size_t i = 0; i < nmsed_out.size(); ++i) { - odata[count * out_dim] = nmsed_out[i][0] + 1; // label - odata[count * out_dim + 1] = nmsed_out[i][1]; // score - odata[count * out_dim + 2] = nmsed_out[i][2]; // xmin - odata[count * out_dim + 3] = nmsed_out[i][3]; // xmin - odata[count * out_dim + 4] = nmsed_out[i][4]; // xmin - odata[count * out_dim + 5] = nmsed_out[i][5]; // xmin - count++; - } - } - - void Compute(const framework::ExecutionContext& ctx) const override { - auto boxes = ctx.MultiInput("BBoxes"); - auto scores = ctx.MultiInput("Scores"); - auto anchors = ctx.MultiInput("Anchors"); - auto* im_info = ctx.Input("ImInfo"); - auto* outs = ctx.Output("Out"); - - std::vector boxes_list(boxes.size()); - std::vector scores_list(scores.size()); - std::vector anchors_list(anchors.size()); - for (size_t j = 0; j < boxes_list.size(); ++j) { - boxes_list[j] = *boxes[j]; - scores_list[j] = *scores[j]; - anchors_list[j] = *anchors[j]; - } - auto score_dims = scores_list[0].dims(); - int64_t batch_size = score_dims[0]; - auto box_dims = boxes_list[0].dims(); - int64_t box_dim = box_dims[2]; - int64_t out_dim = box_dim + 2; - - auto& dev_ctx = ctx.template device_context(); - - std::vector>> all_nmsed_out; - std::vector batch_starts = {0}; - for (int i = 0; i < batch_size; ++i) { - int num_nmsed_out = 0; - std::vector box_per_batch_list(boxes_list.size()); - std::vector score_per_batch_list(scores_list.size()); - for (size_t j = 0; j < boxes_list.size(); ++j) { - const auto& score_dims = scores_list[j].dims(); - score_per_batch_list[j] = scores_list[j].Slice(i, i + 1); - score_per_batch_list[j].Resize({score_dims[1], score_dims[2]}); - box_per_batch_list[j] = boxes_list[j].Slice(i, i + 1); - box_per_batch_list[j].Resize({score_dims[1], box_dim}); - } - phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1); - - std::vector> nmsed_out; - RetinanetDetectionOutput(ctx, - score_per_batch_list, - box_per_batch_list, - anchors_list, - im_info_slice, - &nmsed_out, - &num_nmsed_out); - all_nmsed_out.push_back(nmsed_out); - batch_starts.push_back(batch_starts.back() + num_nmsed_out); - } - - int num_kept = static_cast(batch_starts.back()); - if (num_kept == 0) { - outs->Resize({0, out_dim}); - } else { - outs->mutable_data({num_kept, out_dim}, ctx.GetPlace()); - for (int i = 0; i < batch_size; ++i) { - int64_t s = static_cast(batch_starts[i]); - int64_t e = static_cast(batch_starts[i + 1]); - if (e > s) { - phi::DenseTensor out = outs->Slice(s, e); - MultiClassOutput(dev_ctx, all_nmsed_out[i], &out); - } - } - } - - framework::LoD lod; - lod.emplace_back(batch_starts); - - outs->set_lod(lod); - } -}; - -class RetinanetDetectionOutputOpMaker - : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("BBoxes", - "(List) A list of tensors from multiple FPN levels. Each " - "element is a 3-D phi::DenseTensor with shape [N, Mi, 4] " - "represents the " - "predicted locations of Mi bounding boxes, N is the batch size. " - "Mi is the number of bounding boxes from i-th FPN level. Each " - "bounding box has four coordinate values and the layout is " - "[xmin, ymin, xmax, ymax].") - .AsDuplicable(); - AddInput("Scores", - "(List) A list of tensors from multiple FPN levels. Each " - "element is a 3-D phi::DenseTensor with shape [N, Mi, C] " - "represents the " - "predicted confidence from its FPN level. N is the batch size, " - "C is the class number (excluding background), Mi is the number " - "of bounding boxes from i-th FPN level. For each bounding box, " - "there are total C scores.") - .AsDuplicable(); - AddInput( - "Anchors", - "(List) A list of tensors from multiple FPN levels. Each" - "element is a 2-D phi::DenseTensor with shape [Mi, 4] represents the " - "locations of Mi anchor boxes from i-th FPN level. Each " - "bounding box has four coordinate values and the layout is " - "[xmin, ymin, xmax, ymax].") - .AsDuplicable(); - AddInput("ImInfo", - "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [N, 3] " - "represents the " - "image information. N is the batch size, each image information " - "includes height, width and scale."); - AddAttr("score_threshold", - "(float) " - "Threshold to filter out bounding boxes with a confidence " - "score."); - AddAttr("nms_top_k", - "(int64_t) " - "Maximum number of detections per FPN layer to be kept " - "according to the confidence before NMS."); - AddAttr("nms_threshold", - "(float) " - "The threshold to be used in NMS."); - AddAttr("nms_eta", - "(float) " - "The parameter for adaptive NMS."); - AddAttr( - "keep_top_k", - "(int64_t) " - "Number of total bounding boxes to be kept per image after NMS " - "step."); - AddOutput("Out", - "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 6] " - "represents the " - "detections. Each row has 6 values: " - "[label, confidence, xmin, ymin, xmax, ymax]" - "No is the total number of detections in this mini-batch." - "For each instance, " - "the offsets in first dimension are called LoD, the number of " - "offset is N + 1, if LoD[i + 1] - LoD[i] == 0, means there is " - "no detected bbox."); - AddComment(R"DOC( -This operator is to decode boxes and scores from each FPN layer and do -multi-class non maximum suppression (NMS) on merged predictions. - -Top-scoring predictions per FPN layer are decoded with the anchor -information. This operator greedily selects a subset of detection bounding -boxes from each FPN layer that have high scores larger than score_threshold, -if providing this threshold, then selects the largest nms_top_k confidences -scores per FPN layer, if nms_top_k is larger than -1. -The decoding schema is described below: - -ox = (pw * pxv * tx * + px) - tw / 2 - -oy = (ph * pyv * ty * + py) - th / 2 - -ow = exp(pwv * tw) * pw + tw / 2 - -oh = exp(phv * th) * ph + th / 2 - -where `tx`, `ty`, `tw`, `th` denote the predicted box's center coordinates, width -and height respectively. Similarly, `px`, `py`, `pw`, `ph` denote the -anchor's center coordinates, width and height. `pxv`, `pyv`, `pwv`, -`phv` denote the variance of the anchor box and `ox`, `oy`, `ow`, `oh` denote the -decoded coordinates, width and height. - -Then the top decoded prediction from all levels are merged followed by NMS. -In the NMS step, this operator prunes away boxes that have high IOU -(intersection over union) overlap with already selected boxes by adaptive -threshold NMS based on parameters of nms_threshold and nms_eta. -After NMS step, at most keep_top_k number of total bounding boxes are to be kept -per image if keep_top_k is larger than -1. -This operator support multi-class and batched inputs. It applying NMS -independently for each class. The outputs is a 2-D LoDTensor, for each -image, the offsets in first dimension of phi::DenseTensor are called LoD, the number -of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0, -means there is no detected bounding box for this image. If there is no detected boxes -for all images, all the elements in LoD are set to 0, and the output tensor is -empty (None). -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - retinanet_detection_output, - ops::RetinanetDetectionOutputOp, - ops::RetinanetDetectionOutputOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); -PD_REGISTER_STRUCT_KERNEL(retinanet_detection_output, - CPU, - ALL_LAYOUT, - ops::RetinanetDetectionOutputKernel, - float, - double) {} diff --git a/paddle/fluid/operators/detection/rpn_target_assign_op.cc b/paddle/fluid/operators/detection/rpn_target_assign_op.cc deleted file mode 100644 index d3c315b7bdfc5..0000000000000 --- a/paddle/fluid/operators/detection/rpn_target_assign_op.cc +++ /dev/null @@ -1,1262 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/detection/bbox_util.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { - -template -using EigenMatrix = framework::EigenMatrix; - -class RpnTargetAssignOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK( - ctx->HasInput("Anchor"), "Input", "Anchor", "rpn_target_assign"); - OP_INOUT_CHECK( - ctx->HasInput("GtBoxes"), "Input", "GtBoxes", "rpn_target_assign"); - OP_INOUT_CHECK( - ctx->HasInput("IsCrowd"), "Input", "IsCrowd", "rpn_target_assign"); - OP_INOUT_CHECK( - ctx->HasInput("ImInfo"), "Input", "ImInfo", "rpn_target_assign"); - - OP_INOUT_CHECK(ctx->HasOutput("LocationIndex"), - "Output", - "LocationIndex", - "rpn_target_assign"); - OP_INOUT_CHECK(ctx->HasOutput("ScoreIndex"), - "Output", - "ScoreIndex", - "rpn_target_assign"); - OP_INOUT_CHECK(ctx->HasOutput("TargetLabel"), - "Output", - "TargetLabel", - "rpn_target_assign"); - OP_INOUT_CHECK(ctx->HasOutput("TargetBBox"), - "Output", - "TargetBBox", - "rpn_target_assign"); - OP_INOUT_CHECK(ctx->HasOutput("BBoxInsideWeight"), - "Output", - "BBoxInsideWeight", - "rpn_target_assign"); - - auto anchor_dims = ctx->GetInputDim("Anchor"); - auto gt_boxes_dims = ctx->GetInputDim("GtBoxes"); - auto im_info_dims = ctx->GetInputDim("ImInfo"); - PADDLE_ENFORCE_EQ(anchor_dims.size(), - 2, - phi::errors::InvalidArgument( - "The dimensions size of Input(Anchor) must be 2. But " - "received dimensions size=[%d], dimensions=[%s].", - anchor_dims.size(), - anchor_dims)); - PADDLE_ENFORCE_EQ(gt_boxes_dims.size(), - 2, - phi::errors::InvalidArgument( - "The dimensions size of Input(GtBoxes) must be 2. " - "But received dimensions size=[%d], dimensions=[%s].", - gt_boxes_dims.size(), - gt_boxes_dims)); - PADDLE_ENFORCE_EQ(im_info_dims.size(), - 2, - phi::errors::InvalidArgument( - "The dimensions size of Input(ImInfo) must be 2. But " - "received dimensions size=[%d], dimensions=[%s].", - im_info_dims.size(), - im_info_dims)); - - ctx->SetOutputDim("LocationIndex", {-1}); - ctx->SetOutputDim("ScoreIndex", {-1}); - ctx->SetOutputDim("TargetLabel", {-1, 1}); - ctx->SetOutputDim("TargetBBox", {-1, 4}); - ctx->SetOutputDim("BBoxInsideWeight", {-1, 4}); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey( - OperatorWithKernel::IndicateVarDataType(ctx, "Anchor"), - platform::CPUPlace()); - } -}; - -template -void AppendRpns(phi::DenseTensor* out, - int64_t offset, - phi::DenseTensor* to_add) { - auto* out_data = out->data(); - auto* to_add_data = to_add->data(); - memcpy(out_data + offset, to_add_data, to_add->numel() * sizeof(T)); -} - -template -std::vector FilterStraddleAnchor( - const phi::CPUContext& context, - const phi::DenseTensor* anchor, - const float rpn_straddle_thresh, - T im_height, - T im_width) { - std::vector inds_inside; - int anchor_num = static_cast(anchor->dims()[0]); - auto* anchor_data = anchor->data(); - if (rpn_straddle_thresh >= 0) { - int index = 0; - for (int i = 0; i < anchor_num; ++i) { - index = i * 4; - if ((anchor_data[index + 0] >= -rpn_straddle_thresh) && - (anchor_data[index + 1] >= -rpn_straddle_thresh) && - (anchor_data[index + 2] < im_width + rpn_straddle_thresh) && - (anchor_data[index + 3] < im_height + rpn_straddle_thresh)) { - inds_inside.emplace_back(i); - } - } - } else { - for (int i = 0; i < anchor_num; ++i) { - inds_inside.emplace_back(i); - } - } - int inside_num = static_cast(inds_inside.size()); - phi::DenseTensor inds_inside_t; - int* inds_inside_data = - inds_inside_t.mutable_data({inside_num}, context.GetPlace()); - std::copy(inds_inside.begin(), inds_inside.end(), inds_inside_data); - phi::DenseTensor inside_anchor_t; - T* inside_anchor_data = - inside_anchor_t.mutable_data({inside_num, 4}, context.GetPlace()); - Gather( - anchor->data(), 4, inds_inside_data, inside_num, inside_anchor_data); - std::vector res; - res.emplace_back(inds_inside_t); - res.emplace_back(inside_anchor_t); - return res; -} - -template -phi::DenseTensor FilterCrowdGt(const phi::CPUContext& context, - phi::DenseTensor* gt_boxes, - phi::DenseTensor* is_crowd) { - int gt_num = static_cast(gt_boxes->dims()[0]); - std::vector not_crowd_inds; - auto* is_crowd_data = is_crowd->data(); - for (int i = 0; i < gt_num; ++i) { - if (is_crowd_data[i] == 0) { - not_crowd_inds.emplace_back(i); - } - } - int ncrowd_num = static_cast(not_crowd_inds.size()); - phi::DenseTensor ncrowd_gt_boxes; - T* ncrowd_gt_boxes_data = - ncrowd_gt_boxes.mutable_data({ncrowd_num, 4}, context.GetPlace()); - Gather(gt_boxes->data(), - 4, - not_crowd_inds.data(), - ncrowd_num, - ncrowd_gt_boxes_data); - return ncrowd_gt_boxes; -} - -void ReservoirSampling(const int num, - std::vector* inds, - std::minstd_rand engine, - bool use_random) { - std::uniform_real_distribution uniform(0, 1); - int len = static_cast(inds->size()); - if (len > num) { - if (use_random) { - for (int i = num; i < len; ++i) { - int rng_ind = std::floor(uniform(engine) * i); // NOLINT - if (rng_ind < num) - std::iter_swap(inds->begin() + rng_ind, inds->begin() + i); - } - } - inds->resize(num); - } -} - -template -void ScoreAssign(const T* anchor_by_gt_overlap_data, - const phi::DenseTensor& anchor_to_gt_max, - const phi::DenseTensor& gt_to_anchor_max, - const int rpn_batch_size_per_im, - const float rpn_fg_fraction, - const float rpn_positive_overlap, - const float rpn_negative_overlap, - std::vector* fg_inds, - std::vector* bg_inds, - std::vector* tgt_lbl, - std::vector* fg_fake, - std::vector* bbox_inside_weight, - std::minstd_rand engine, - bool use_random) { - float epsilon = 0.00001; - int anchor_num = static_cast(anchor_to_gt_max.dims()[0]); - int gt_num = static_cast(gt_to_anchor_max.dims()[0]); - std::vector target_label(anchor_num, -1); - std::vector fg_inds_fake; - std::vector bg_inds_fake; - const T* anchor_to_gt_max_data = anchor_to_gt_max.data(); - const T* gt_to_anchor_max_data = gt_to_anchor_max.data(); - // TODO(buxingyuan): Match with Detectron now - // but it seems here is a bug in two directions assignment - // in which the later one may overwrites the former one. - for (int64_t i = 0; i < anchor_num; ++i) { - bool is_anchors_with_max_overlap = false; - for (int64_t j = 0; j < gt_num; ++j) { - T value = anchor_by_gt_overlap_data[i * gt_num + j]; - T diff = std::abs(value - gt_to_anchor_max_data[j]); - if (diff < epsilon) { - is_anchors_with_max_overlap = true; - break; - } - } - bool is_anchor_great_than_thresh = - (anchor_to_gt_max_data[i] >= rpn_positive_overlap); - if (is_anchors_with_max_overlap || is_anchor_great_than_thresh) { - fg_inds_fake.push_back(i); // NOLINT - } - } - - // Reservoir Sampling - int fg_num = 0; - if (rpn_fg_fraction > 0 && rpn_batch_size_per_im > 0) { - fg_num = - static_cast(rpn_fg_fraction * rpn_batch_size_per_im); // NOLINT - ReservoirSampling(fg_num, &fg_inds_fake, engine, use_random); - } else { - fg_num = static_cast(fg_inds_fake.size()); - } - int fg_fake_num = static_cast(fg_inds_fake.size()); - for (int64_t i = 0; i < fg_fake_num; ++i) { - target_label[fg_inds_fake[i]] = 1; - } - - for (int64_t i = 0; i < anchor_num; ++i) { - if (anchor_to_gt_max_data[i] < rpn_negative_overlap) { - bg_inds_fake.push_back(i); // NOLINT - } - } - int bg_num = 0; - if (rpn_fg_fraction > 0 && rpn_batch_size_per_im > 0) { - bg_num = rpn_batch_size_per_im - fg_fake_num; - ReservoirSampling(bg_num, &bg_inds_fake, engine, use_random); - bg_num = static_cast(bg_inds_fake.size()); - } else { - bg_num = static_cast(bg_inds_fake.size()); - } - - int fake_num = 0; - for (int64_t i = 0; i < bg_num; ++i) { - // fg fake found - if (target_label[bg_inds_fake[i]] == 1) { - fake_num++; - fg_fake->emplace_back(fg_inds_fake[0]); - for (int j = 0; j < 4; ++j) { - bbox_inside_weight->emplace_back(T(0.)); - } - } - target_label[bg_inds_fake[i]] = 0; - } - - for (int64_t i = 0; i < (fg_fake_num - fake_num) * 4; ++i) { - bbox_inside_weight->emplace_back(T(1.)); - } - - for (int64_t i = 0; i < anchor_num; ++i) { - if (target_label[i] == 1) { - fg_inds->emplace_back(i); - fg_fake->emplace_back(i); - } - if (target_label[i] == 0) bg_inds->emplace_back(i); - } - fg_num = static_cast(fg_inds->size()); - bg_num = static_cast(bg_inds->size()); - - tgt_lbl->resize(fg_num + bg_num, 0); - std::vector fg_lbl(fg_num, 1); - std::vector bg_lbl(bg_num, 0); - std::copy(fg_lbl.begin(), fg_lbl.end(), tgt_lbl->data()); - std::copy(bg_lbl.begin(), bg_lbl.end(), tgt_lbl->data() + fg_num); -} - -template -std::vector SampleRpnFgBgGt( - const phi::CPUContext& ctx, - const phi::DenseTensor& anchor_by_gt_overlap, - const int rpn_batch_size_per_im, - const float rpn_positive_overlap, - const float rpn_negative_overlap, - const float rpn_fg_fraction, - std::minstd_rand engine, - bool use_random) { - auto* anchor_by_gt_overlap_data = anchor_by_gt_overlap.data(); - int anchor_num = static_cast(anchor_by_gt_overlap.dims()[0]); - int gt_num = static_cast(anchor_by_gt_overlap.dims()[1]); - - std::vector fg_inds; - std::vector bg_inds; - std::vector gt_inds; - std::vector tgt_lbl; - std::vector fg_fake; - std::vector bbox_inside_weight; - // Calculate the max IoU between anchors and gt boxes - // Map from anchor to gt box that has highest overlap - auto place = ctx.GetPlace(); - phi::DenseTensor anchor_to_gt_max, anchor_to_gt_argmax, gt_to_anchor_max; - anchor_to_gt_max.mutable_data({anchor_num}, place); - int* argmax = anchor_to_gt_argmax.mutable_data({anchor_num}, place); - gt_to_anchor_max.mutable_data({gt_num}, place); - - auto anchor_by_gt_overlap_et = - framework::EigenMatrix::From(anchor_by_gt_overlap); - auto anchor_to_gt_max_et = - framework::EigenVector::Flatten(anchor_to_gt_max); - auto gt_to_anchor_max_et = - framework::EigenVector::Flatten(gt_to_anchor_max); - auto anchor_to_gt_argmax_et = - framework::EigenVector::Flatten(anchor_to_gt_argmax); - anchor_to_gt_max_et = - anchor_by_gt_overlap_et.maximum(Eigen::DSizes(1)); - anchor_to_gt_argmax_et = - anchor_by_gt_overlap_et.argmax(1).template cast(); - gt_to_anchor_max_et = - anchor_by_gt_overlap_et.maximum(Eigen::DSizes(0)); - - // Follow the Faster RCNN's implementation - ScoreAssign(anchor_by_gt_overlap_data, - anchor_to_gt_max, - gt_to_anchor_max, - rpn_batch_size_per_im, - rpn_fg_fraction, - rpn_positive_overlap, - rpn_negative_overlap, - &fg_inds, - &bg_inds, - &tgt_lbl, - &fg_fake, - &bbox_inside_weight, - engine, - use_random); - - int fg_num = static_cast(fg_inds.size()); - int bg_num = static_cast(bg_inds.size()); - int fg_fake_num = static_cast(fg_fake.size()); - gt_inds.reserve(fg_fake_num); - for (int i = 0; i < fg_fake_num; ++i) { - gt_inds.emplace_back(argmax[fg_fake[i]]); - } - phi::DenseTensor loc_index_t, score_index_t, tgt_lbl_t, gt_inds_t, - bbox_inside_weight_t; - int* loc_index_data = loc_index_t.mutable_data({fg_fake_num}, place); - int* score_index_data = - score_index_t.mutable_data({fg_num + bg_num}, place); - int* tgt_lbl_data = tgt_lbl_t.mutable_data({fg_num + bg_num}, place); - int* gt_inds_data = gt_inds_t.mutable_data({fg_fake_num}, place); - T* bbox_inside_weight_data = - bbox_inside_weight_t.mutable_data({fg_fake_num, 4}, place); - std::copy(fg_fake.begin(), fg_fake.end(), loc_index_data); - std::copy(fg_inds.begin(), fg_inds.end(), score_index_data); - std::copy(bg_inds.begin(), bg_inds.end(), score_index_data + fg_num); - std::copy(tgt_lbl.begin(), tgt_lbl.end(), tgt_lbl_data); - std::copy(gt_inds.begin(), gt_inds.end(), gt_inds_data); - std::copy(bbox_inside_weight.begin(), - bbox_inside_weight.end(), - bbox_inside_weight_data); - std::vector loc_score_tgtlbl_gt; - loc_score_tgtlbl_gt.emplace_back(loc_index_t); - loc_score_tgtlbl_gt.emplace_back(score_index_t); - loc_score_tgtlbl_gt.emplace_back(tgt_lbl_t); - loc_score_tgtlbl_gt.emplace_back(gt_inds_t); - loc_score_tgtlbl_gt.emplace_back(bbox_inside_weight_t); - - return loc_score_tgtlbl_gt; -} - -template -class RpnTargetAssignKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* anchor = context.Input("Anchor"); // (H*W*A) * 4 - auto* gt_boxes = context.Input("GtBoxes"); - auto* is_crowd = context.Input("IsCrowd"); - auto* im_info = context.Input("ImInfo"); - - auto* loc_index = context.Output("LocationIndex"); - auto* score_index = context.Output("ScoreIndex"); - auto* tgt_bbox = context.Output("TargetBBox"); - auto* tgt_lbl = context.Output("TargetLabel"); - auto* bbox_inside_weight = - context.Output("BBoxInsideWeight"); - - PADDLE_ENFORCE_EQ(gt_boxes->lod().size(), - 1UL, - phi::errors::InvalidArgument( - "RpnTargetAssignOp gt_boxes needs 1 level of LoD. " - "But received level of LoD is [%d], LoD is [%s].", - gt_boxes->lod().size(), - gt_boxes->lod())); - PADDLE_ENFORCE_EQ(is_crowd->lod().size(), - 1UL, - phi::errors::InvalidArgument( - "RpnTargetAssignOp is_crowd needs 1 level of LoD. " - "But received level of LoD is [%d], LoD is [%s].", - is_crowd->lod().size(), - is_crowd->lod())); - int64_t anchor_num = static_cast(anchor->dims()[0]); - int64_t batch_num = static_cast(gt_boxes->lod().back().size() - 1); - - int rpn_batch_size_per_im = context.Attr("rpn_batch_size_per_im"); - float rpn_straddle_thresh = context.Attr("rpn_straddle_thresh"); - float rpn_positive_overlap = context.Attr("rpn_positive_overlap"); - float rpn_negative_overlap = context.Attr("rpn_negative_overlap"); - float rpn_fg_fraction = context.Attr("rpn_fg_fraction"); - bool use_random = context.Attr("use_random"); - - int64_t max_num = batch_num * rpn_batch_size_per_im; - auto place = context.GetPlace(); - - loc_index->mutable_data({max_num}, place); - score_index->mutable_data({max_num}, place); - tgt_bbox->mutable_data({max_num, 4}, place); - tgt_lbl->mutable_data({max_num, 1}, place); - bbox_inside_weight->mutable_data({max_num, 4}, place); - auto& dev_ctx = context.device_context(); - - std::random_device rnd; - std::minstd_rand engine; - int seed = static_cast(rnd()); - engine.seed(seed); - - framework::LoD lod_loc, loc_score; - std::vector lod0_loc(1, 0); - std::vector lod0_score(1, 0); - - int total_loc_num = 0; - int total_score_num = 0; - auto gt_boxes_lod = gt_boxes->lod().back(); - auto is_crowd_lod = is_crowd->lod().back(); - for (int i = 0; i < batch_num; ++i) { - phi::DenseTensor gt_boxes_slice = - gt_boxes->Slice(static_cast(gt_boxes_lod[i]), - static_cast(gt_boxes_lod[i + 1])); - phi::DenseTensor is_crowd_slice = - is_crowd->Slice(static_cast(is_crowd_lod[i]), - static_cast(is_crowd_lod[i + 1])); - phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1); - auto* im_info_data = im_info_slice.data(); - auto im_height = im_info_data[0]; - auto im_width = im_info_data[1]; - auto im_scale = im_info_data[2]; - - // Filter straddle anchor - std::vector filter_output = FilterStraddleAnchor( - dev_ctx, anchor, rpn_straddle_thresh, im_height, im_width); - phi::DenseTensor inds_inside = filter_output[0]; - phi::DenseTensor inside_anchor = filter_output[1]; - - // Filter crowd gt - phi::DenseTensor ncrowd_gt_boxes = - FilterCrowdGt(dev_ctx, >_boxes_slice, &is_crowd_slice); - auto ncrowd_gt_boxes_et = - framework::EigenTensor::From(ncrowd_gt_boxes); - ncrowd_gt_boxes_et = ncrowd_gt_boxes_et * im_scale; - - phi::DenseTensor anchor_by_gt_overlap; - anchor_by_gt_overlap.mutable_data( - {inside_anchor.dims()[0], ncrowd_gt_boxes.dims()[0]}, place); - BboxOverlaps(inside_anchor, ncrowd_gt_boxes, &anchor_by_gt_overlap); - - auto loc_score_tgtlbl_gt = SampleRpnFgBgGt(dev_ctx, - anchor_by_gt_overlap, - rpn_batch_size_per_im, - rpn_positive_overlap, - rpn_negative_overlap, - rpn_fg_fraction, - engine, - use_random); - - phi::DenseTensor sampled_loc_index = loc_score_tgtlbl_gt[0]; - phi::DenseTensor sampled_score_index = loc_score_tgtlbl_gt[1]; - phi::DenseTensor sampled_tgtlbl = loc_score_tgtlbl_gt[2]; - phi::DenseTensor sampled_gt_index = loc_score_tgtlbl_gt[3]; - phi::DenseTensor sampled_bbox_inside_weight = loc_score_tgtlbl_gt[4]; - - int loc_num = static_cast(sampled_loc_index.dims()[0]); - int score_num = static_cast(sampled_score_index.dims()[0]); - // unmap to all anchor - phi::DenseTensor sampled_loc_index_unmap, sampled_score_index_unmap; - sampled_loc_index_unmap.mutable_data({loc_num}, place); - sampled_score_index_unmap.mutable_data({score_num}, place); - Gather(inds_inside.data(), - 1, - sampled_loc_index.data(), - loc_num, - sampled_loc_index_unmap.data()); - Gather(inds_inside.data(), - 1, - sampled_score_index.data(), - score_num, - sampled_score_index_unmap.data()); - - // get target bbox deltas - phi::DenseTensor sampled_anchor, sampled_gt, sampled_tgt_bbox; - auto* sampled_anchor_data = - sampled_anchor.mutable_data({loc_num, 4}, place); - auto* sampled_gt_data = sampled_gt.mutable_data({loc_num, 4}, place); - Gather(anchor->data(), - 4, - sampled_loc_index_unmap.data(), - loc_num, - sampled_anchor_data); - Gather(ncrowd_gt_boxes.data(), - 4, - sampled_gt_index.data(), - loc_num, - sampled_gt_data); - sampled_tgt_bbox.mutable_data({loc_num, 4}, place); - BoxToDelta(loc_num, - sampled_anchor, - sampled_gt, - nullptr, - false, - &sampled_tgt_bbox); - - // Add anchor offset - int anchor_offset = static_cast(i * anchor_num); - auto sampled_loc_index_unmap_et = - framework::EigenTensor::From(sampled_loc_index_unmap); - sampled_loc_index_unmap_et = sampled_loc_index_unmap_et + anchor_offset; - auto sampled_score_index_unmap_et = - framework::EigenTensor::From(sampled_score_index_unmap); - sampled_score_index_unmap_et = - sampled_score_index_unmap_et + anchor_offset; - AppendRpns(loc_index, total_loc_num, &sampled_loc_index_unmap); - AppendRpns(score_index, total_score_num, &sampled_score_index_unmap); - AppendRpns(tgt_bbox, total_loc_num * 4, &sampled_tgt_bbox); - AppendRpns(tgt_lbl, total_score_num, &sampled_tgtlbl); - AppendRpns( - bbox_inside_weight, total_loc_num * 4, &sampled_bbox_inside_weight); - total_loc_num += loc_num; - - total_score_num += score_num; - lod0_loc.emplace_back(total_loc_num); - lod0_score.emplace_back(total_score_num); - } - - PADDLE_ENFORCE_LE( - total_loc_num, - max_num, - phi::errors::InvalidArgument( - "The number of sampled bboxes should not be greater than the " - "number of all anchor boxes(%d), but the number of sampled " - "bboxes is :%d.", - max_num, - total_loc_num)); - PADDLE_ENFORCE_LE( - total_score_num, - max_num, - phi::errors::InvalidArgument( - "The number of sampled scores should not be greater than the " - "number of all anchor boxes(%d), but the number of sampled " - "scores is :%d.", - max_num, - total_score_num)); - - lod_loc.emplace_back(lod0_loc); - loc_score.emplace_back(lod0_score); - loc_index->set_lod(lod_loc); - score_index->set_lod(loc_score); - tgt_bbox->set_lod(lod_loc); - tgt_lbl->set_lod(loc_score); - bbox_inside_weight->set_lod(lod_loc); - loc_index->Resize({total_loc_num}); - score_index->Resize({total_score_num}); - tgt_bbox->Resize({total_loc_num, 4}); - tgt_lbl->Resize({total_score_num, 1}); - bbox_inside_weight->Resize({total_loc_num, 4}); - } -}; - -class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("Anchor", - "(Tensor) input anchor is a 2-D Tensor with shape [H*W*A, 4]."); - AddInput("GtBoxes", - "(phi::DenseTensor) input ground-truth bbox with shape [K, 4]."); - AddInput("IsCrowd", - "(phi::DenseTensor) input which indicates ground-truth is crowd."); - AddInput("ImInfo", - "(phi::DenseTensor) input image information with shape [N, 3]. " - "N is the batch size, each image information includes height, " - "width and scale."); - AddAttr("rpn_batch_size_per_im", - "Total number of RPN examples per image.") - .SetDefault(256); - AddAttr( - "rpn_straddle_thresh", - "Remove RPN anchors that go outside the image by straddle_thresh " - "pixels, " - "Set to -1 or a large value, e.g. 100000, to disable pruning anchors."); - AddAttr( - "rpn_positive_overlap", - "Minimum overlap required between an anchor and ground-truth " - "box for the (anchor, gt box) pair to be a positive example.") - .SetDefault(0.7); - AddAttr( - "rpn_negative_overlap", - "Maximum overlap allowed between an anchor and ground-truth " - "box for the (anchor, gt box) pair to be a negative examples.") - .SetDefault(0.3); - AddAttr( - "rpn_fg_fraction", - "Target fraction of RoI minibatch that " - "is labeled foreground (i.e. class > 0), 0-th class is background.") - .SetDefault(0.25); - AddAttr("use_random", - "A flag indicating whether to use a ReservoirSampling. " - "NOTE: DO NOT set this flag to false in training. " - "Setting this flag to false is only useful in unittest.") - .SetDefault(true); - AddOutput( - "LocationIndex", - "(Tensor), The indexes of foreground anchors in all RPN anchors, the " - "shape of the LocationIndex is [F], F depends on the value of input " - "tensor and attributes."); - AddOutput( - "ScoreIndex", - "(Tensor), The indexes of foreground and background anchors in all " - "RPN anchors(The rest anchors are ignored). The shape of the " - "ScoreIndex is [F + B], F and B are sampled foreground and background " - " number."); - AddOutput("TargetBBox", - "(Tensor), The target bbox deltas with shape " - "[F, 4], F is the sampled foreground number."); - AddOutput( - "TargetLabel", - "(Tensor), The target labels of each anchor with shape " - "[F + B, 1], F and B are sampled foreground and background number."); - AddOutput("BBoxInsideWeight", - "(Tensor), The bbox inside weight with shape " - "[F, 4], F is the sampled foreground number."); - AddComment(R"DOC( -This operator can be, for a given set of ground truth bboxes and the -anchors, to assign classification and regression targets to each prediction. -The ScoreIndex and LocationIndex will be generated according to the anchor-groundtruth IOU. -The rest anchors would not contibute to the RPN training loss - -ScoreIndex is composed of foreground anchor indexes(positive labels) and -background anchor indexes(negative labels). LocationIndex is exactly same -as the foreground anchor indexes since we can not assign regression target to -the background anchors. - -The classification targets(TargetLabel) is a binary class label (of being -an object or not). Following the paper of Faster-RCNN, the positive labels -are two kinds of anchors: (i) the anchor/anchors with the highest IoU -overlap with a ground-truth box, or (ii) an anchor that has an IoU overlap -higher than rpn_positive_overlap(0.7) with any ground-truth box. Note that -a single ground-truth box may assign positive labels to multiple anchors. -A non-positive anchor is when its IoU ratio is lower than rpn_negative_overlap -(0.3) for all ground-truth boxes. Anchors that are neither positive nor -negative do not contribute to the training objective. - -)DOC"); - } -}; - -class RetinanetTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("Anchor", - "(Tensor) input anchor is a 2-D Tensor with shape [H*W*A, 4]."); - AddInput("GtBoxes", - "(phi::DenseTensor) input ground-truth bbox with shape [K, 4]."); - AddInput("GtLabels", - "(phi::DenseTensor) input ground-truth label with shape [K, 1]."); - AddInput("IsCrowd", - "(phi::DenseTensor) input which indicates ground-truth is crowd."); - AddInput("ImInfo", - "(phi::DenseTensor) input image information with shape [N, 3]. " - "N is the batch size, each image information includes height, " - "width and scale."); - AddAttr( - "positive_overlap", - "Minimum overlap required between an anchor and ground-truth " - "box for the (anchor, gt box) pair to be a positive example.") - .SetDefault(0.5); - AddAttr( - "negative_overlap", - "Maximum overlap allowed between an anchor and ground-truth " - "box for the (anchor, gt box) pair to be a negative examples.") - .SetDefault(0.4); - AddOutput( - "LocationIndex", - "(Tensor), The indexes of foreground anchors in all anchors, the " - "shape of the LocationIndex is [F], F depends on the value of input " - "tensor and attributes."); - AddOutput( - "ScoreIndex", - "(Tensor), The indexes of foreground and background anchors in all " - "RPN anchors(The rest anchors are ignored). The shape of the " - "ScoreIndex is [F + B], F and B are foreground and background " - " number."); - AddOutput("TargetBBox", - "(Tensor), The target bbox deltas with shape " - "[F, 4], F is the foreground number."); - AddOutput("TargetLabel", - "(Tensor), The target labels of each anchor with shape " - "[F + B, 1], F and B are foreground and background number."); - AddOutput("BBoxInsideWeight", - "(Tensor), The bbox inside weight with shape " - "[F, 4], F is the foreground number."); - AddOutput("ForegroundNumber", - "(Tensor), The foreground number. " - "[1, 1]."); - AddComment(R"DOC( - This layer can be, for given the Intersection-over-Union (IoU) overlap - between anchors and ground truth boxes, to assign classification and - regression targets to each anchor, these target labels are used for - train retinanet. - - Every anchor is assigned with a length C one-hot vector of - classification targets, and a 4-vector of box regression targets, - where C is the class number. The assignment rules are as followed: - - 1. Anchors are assigned to ground-truth boxes when: (i) it has the highest - IoU overlap with a ground-truth box, or (ii) it has an IoU overlap higher - than positive_overlap(0.5) with any ground-truth box. - - 2. Anchors are assigned to background when its IoU ratio is lower than - negative_overlap (0.4) for all ground-truth boxes. - - When an anchor is assigned with a ground-truth box which is the i-th category, - the i-th entry in its C vector of targets is set to 1 and all other entries - are set to 0. When an anchor is assigned with background, all entries are set - to 0. Anchors that are not assigned do not contribute to the training - objective. The regression targets are the encoded ground-truth boxes - associated with the assigned anchors. - -)DOC"); - } -}; - -class RetinanetTargetAssignOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK( - ctx->HasInput("Anchor"), "Input", "Anchor", "retinanet_target_assign"); - OP_INOUT_CHECK(ctx->HasInput("GtBoxes"), - "Input", - "GtBoxes", - "retinanet_target_assign"); - OP_INOUT_CHECK(ctx->HasInput("GtLabels"), - "Input", - "GtLabels", - "retinanet_target_assign"); - OP_INOUT_CHECK(ctx->HasInput("IsCrowd"), - "Input", - "IsCrowd", - "retinanet_target_assign"); - OP_INOUT_CHECK( - ctx->HasInput("ImInfo"), "Input", "ImInfo", "retinanet_target_assign"); - OP_INOUT_CHECK(ctx->HasOutput("LocationIndex"), - "Output", - "LocationIndex", - "retinanet_target_assign"); - OP_INOUT_CHECK(ctx->HasOutput("ScoreIndex"), - "Output", - "ScoreIndex", - "retinanet_target_assign"); - OP_INOUT_CHECK(ctx->HasOutput("TargetLabel"), - "Output", - "TargetLabel", - "retinanet_target_assign"); - OP_INOUT_CHECK(ctx->HasOutput("TargetBBox"), - "Output", - "TargetBBox", - "retinanet_target_assign"); - OP_INOUT_CHECK(ctx->HasOutput("BBoxInsideWeight"), - "Output", - "BBoxInsideWeight", - "retinanet_target_assign"); - OP_INOUT_CHECK(ctx->HasOutput("ForegroundNumber"), - "Output", - "ForegroundNumber", - "retinanet_target_assign"); - - auto anchor_dims = ctx->GetInputDim("Anchor"); - auto gt_boxes_dims = ctx->GetInputDim("GtBoxes"); - auto gt_labels_dims = ctx->GetInputDim("GtLabels"); - auto im_info_dims = ctx->GetInputDim("ImInfo"); - - PADDLE_ENFORCE_EQ( - anchor_dims.size(), - 2, - phi::errors::InvalidArgument( - "The rank of Input(Anchor) should be 2, but received Anchor " - "rank is :%d, Anchor shape is:[%s].", - anchor_dims.size(), - anchor_dims)); - PADDLE_ENFORCE_EQ( - gt_boxes_dims.size(), - 2, - phi::errors::InvalidArgument( - "The rank of Input(GtBoxes) should be 2, but received GtBoxes " - "rank is :%d, GtBoxes shape is:[%s].", - gt_boxes_dims.size(), - gt_boxes_dims)); - PADDLE_ENFORCE_EQ( - gt_labels_dims.size(), - 2, - phi::errors::InvalidArgument( - "The rank of Input(GtLabels) should be 2, but received GtLabels " - "rank is :%d, GtLabels shape is:[%s].", - gt_labels_dims.size(), - gt_labels_dims)); - PADDLE_ENFORCE_EQ( - im_info_dims.size(), - 2, - phi::errors::InvalidArgument( - "The rank of Input(ImInfo) should be 2, but received ImInfo " - "rank is :%d, ImInfo shape is:[%s].", - im_info_dims.size(), - im_info_dims)); - - ctx->SetOutputDim("LocationIndex", {gt_labels_dims[0]}); - ctx->SetOutputDim("ScoreIndex", {gt_labels_dims[0]}); - ctx->SetOutputDim("TargetBBox", {gt_labels_dims[0], 4}); - ctx->SetOutputDim("TargetLabel", {gt_labels_dims[0], 1}); - ctx->SetOutputDim("BBoxInsideWeight", {gt_labels_dims[0], 4}); - ctx->SetOutputDim("ForegroundNumber", {gt_labels_dims[0], 1}); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey( - OperatorWithKernel::IndicateVarDataType(ctx, "Anchor"), - platform::CPUPlace()); - } -}; - -template -std::vector FilterCrowdGtBoxLabel( - const phi::CPUContext& context, - phi::DenseTensor* gt_boxes, - phi::DenseTensor* gt_labels, - phi::DenseTensor* is_crowd) { - int gt_num = static_cast(gt_boxes->dims()[0]); - std::vector not_crowd_inds; - auto* is_crowd_data = is_crowd->data(); - for (int i = 0; i < gt_num; ++i) { - if (is_crowd_data[i] == 0) { - not_crowd_inds.emplace_back(i); - } - } - int ncrowd_num = static_cast(not_crowd_inds.size()); - phi::DenseTensor ncrowd_gt_boxes, ncrowd_gt_labels; - T* ncrowd_gt_boxes_data = - ncrowd_gt_boxes.mutable_data({ncrowd_num, 4}, context.GetPlace()); - int* ncrowd_gt_labels_data = - ncrowd_gt_labels.mutable_data({ncrowd_num, 1}, context.GetPlace()); - Gather(gt_boxes->data(), - 4, - not_crowd_inds.data(), - ncrowd_num, - ncrowd_gt_boxes_data); - Gather(gt_labels->data(), - 1, - not_crowd_inds.data(), - ncrowd_num, - ncrowd_gt_labels_data); - std::vector res; - res.emplace_back(ncrowd_gt_boxes); - res.emplace_back(ncrowd_gt_labels); - return res; -} - -template -std::vector GetAllFgBgGt( - const phi::CPUContext& ctx, - const phi::DenseTensor& anchor_by_gt_overlap, - const phi::DenseTensor& ncrowd_gt_labels, - const float positive_overlap, - const float negative_overlap, - std::minstd_rand engine) { - auto* anchor_by_gt_overlap_data = anchor_by_gt_overlap.data(); - int anchor_num = static_cast(anchor_by_gt_overlap.dims()[0]); - int gt_num = static_cast(anchor_by_gt_overlap.dims()[1]); - - std::vector fg_inds; - std::vector bg_inds; - std::vector gt_inds; - std::vector tgt_lbl; - std::vector fg_fake; - std::vector bbox_inside_weight; - // Calculate the max IoU between anchors and gt boxes - // Map from anchor to gt box that has highest overlap - auto place = ctx.GetPlace(); - phi::DenseTensor anchor_to_gt_max, anchor_to_gt_argmax, gt_to_anchor_max; - anchor_to_gt_max.mutable_data({anchor_num}, place); - int* argmax = anchor_to_gt_argmax.mutable_data({anchor_num}, place); - gt_to_anchor_max.mutable_data({gt_num}, place); - - auto anchor_by_gt_overlap_et = - framework::EigenMatrix::From(anchor_by_gt_overlap); - auto anchor_to_gt_max_et = - framework::EigenVector::Flatten(anchor_to_gt_max); - auto gt_to_anchor_max_et = - framework::EigenVector::Flatten(gt_to_anchor_max); - auto anchor_to_gt_argmax_et = - framework::EigenVector::Flatten(anchor_to_gt_argmax); - anchor_to_gt_max_et = - anchor_by_gt_overlap_et.maximum(Eigen::DSizes(1)); - anchor_to_gt_argmax_et = - anchor_by_gt_overlap_et.argmax(1).template cast(); - gt_to_anchor_max_et = - anchor_by_gt_overlap_et.maximum(Eigen::DSizes(0)); - - ScoreAssign(anchor_by_gt_overlap_data, - anchor_to_gt_max, - gt_to_anchor_max, - -1, - -1, - positive_overlap, - negative_overlap, - &fg_inds, - &bg_inds, - &tgt_lbl, - &fg_fake, - &bbox_inside_weight, - engine, - false); - const int* gt_labels_data = ncrowd_gt_labels.data(); - int64_t fg_num = static_cast(fg_inds.size()); - for (int64_t i = 0; i < fg_num; ++i) { - int gt_idx = argmax[fg_inds[i]]; - tgt_lbl[i] = gt_labels_data[gt_idx]; - } - - int bg_num = static_cast(bg_inds.size()); - int fg_fake_num = static_cast(fg_fake.size()); - gt_inds.reserve(fg_fake_num); - for (int i = 0; i < fg_fake_num; ++i) { - gt_inds.emplace_back(argmax[fg_fake[i]]); - } - - phi::DenseTensor loc_index_t, score_index_t, tgt_lbl_t, gt_inds_t, - bbox_inside_weight_t; - phi::DenseTensor fg_num_t; - int* loc_index_data = loc_index_t.mutable_data({fg_fake_num}, place); - int* score_index_data = - score_index_t.mutable_data({fg_num + bg_num}, place); - int* tgt_lbl_data = tgt_lbl_t.mutable_data({fg_num + bg_num}, place); - int* gt_inds_data = gt_inds_t.mutable_data({fg_fake_num}, place); - int* fg_num_data = fg_num_t.mutable_data({1}, place); - T* bbox_inside_weight_data = - bbox_inside_weight_t.mutable_data({fg_fake_num, 4}, place); - std::copy(fg_fake.begin(), fg_fake.end(), loc_index_data); - std::copy(fg_inds.begin(), fg_inds.end(), score_index_data); - std::copy(bg_inds.begin(), bg_inds.end(), score_index_data + fg_num); - std::copy(tgt_lbl.begin(), tgt_lbl.end(), tgt_lbl_data); - std::copy(gt_inds.begin(), gt_inds.end(), gt_inds_data); - std::copy(bbox_inside_weight.begin(), - bbox_inside_weight.end(), - bbox_inside_weight_data); - fg_num_data[0] = static_cast(fg_fake.size()) + 1; - std::vector loc_score_tgtlbl_gt; - loc_score_tgtlbl_gt.emplace_back(loc_index_t); - loc_score_tgtlbl_gt.emplace_back(score_index_t); - loc_score_tgtlbl_gt.emplace_back(tgt_lbl_t); - loc_score_tgtlbl_gt.emplace_back(gt_inds_t); - loc_score_tgtlbl_gt.emplace_back(bbox_inside_weight_t); - loc_score_tgtlbl_gt.emplace_back(fg_num_t); - - return loc_score_tgtlbl_gt; -} - -template -class RetinanetTargetAssignKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* anchor = context.Input("Anchor"); // (H*W*A) * 4 - auto* gt_boxes = context.Input("GtBoxes"); - auto* gt_labels = context.Input("GtLabels"); - auto* is_crowd = context.Input("IsCrowd"); - auto* im_info = context.Input("ImInfo"); - - auto* loc_index = context.Output("LocationIndex"); - auto* score_index = context.Output("ScoreIndex"); - auto* tgt_bbox = context.Output("TargetBBox"); - auto* tgt_lbl = context.Output("TargetLabel"); - auto* bbox_inside_weight = - context.Output("BBoxInsideWeight"); - auto* fg_num = context.Output("ForegroundNumber"); - - PADDLE_ENFORCE_EQ( - gt_boxes->lod().size(), - 1UL, - phi::errors::InvalidArgument( - "The LoD level of Input(GtBoxes) should be 1, but received GtBoxes " - "LoD level is :%d.", - gt_boxes->lod().size())); - PADDLE_ENFORCE_EQ( - gt_labels->lod().size(), - 1UL, - phi::errors::InvalidArgument("The LoD level of Input(GtLabels) " - "should be 1, but received GtLabels " - "LoD level is :%d.", - gt_labels->lod().size())); - PADDLE_ENFORCE_EQ( - is_crowd->lod().size(), - 1UL, - phi::errors::InvalidArgument( - "The LoD level of Input(IsCrowd) should be 1, but received IsCrowd " - "LoD level is :%d.", - is_crowd->lod().size())); - - int64_t anchor_num = static_cast(anchor->dims()[0]); - int64_t batch_num = static_cast(gt_boxes->lod().back().size() - 1); - - float positive_overlap = context.Attr("positive_overlap"); - float negative_overlap = context.Attr("negative_overlap"); - - int64_t max_num = batch_num * anchor_num; - auto place = context.GetPlace(); - - loc_index->mutable_data({max_num}, place); - score_index->mutable_data({max_num}, place); - tgt_bbox->mutable_data({max_num, 4}, place); - tgt_lbl->mutable_data({max_num, 1}, place); - bbox_inside_weight->mutable_data({max_num, 4}, place); - fg_num->mutable_data({batch_num, 1}, place); - auto& dev_ctx = context.device_context(); - - std::random_device rnd; - std::minstd_rand engine; - int seed = static_cast(rnd()); - engine.seed(seed); - - framework::LoD lod_loc, loc_score, lod_fg; - std::vector lod0_loc(1, 0); - std::vector lod0_score(1, 0); - std::vector lod0_fg(1, 0); - - int total_loc_num = 0; - int total_score_num = 0; - int total_fg_num = 0; - auto gt_boxes_lod = gt_boxes->lod().back(); - auto gt_labels_lod = gt_labels->lod().back(); - auto is_crowd_lod = is_crowd->lod().back(); - for (int i = 0; i < batch_num; ++i) { - phi::DenseTensor gt_boxes_slice = - gt_boxes->Slice(static_cast(gt_boxes_lod[i]), - static_cast(gt_boxes_lod[i + 1])); - phi::DenseTensor gt_labels_slice = - gt_labels->Slice(static_cast(gt_labels_lod[i]), - static_cast(gt_labels_lod[i + 1])); - phi::DenseTensor is_crowd_slice = - is_crowd->Slice(static_cast(is_crowd_lod[i]), - static_cast(is_crowd_lod[i + 1])); - phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1); - auto* im_info_data = im_info_slice.data(); - auto im_height = im_info_data[0]; - auto im_width = im_info_data[1]; - auto im_scale = im_info_data[2]; - - // Filter straddle anchor - std::vector filter_output = - FilterStraddleAnchor(dev_ctx, anchor, -1, im_height, im_width); - phi::DenseTensor inds_inside = filter_output[0]; - phi::DenseTensor inside_anchor = filter_output[1]; - - // Filter crowd gt - std::vector ncrowd_output = FilterCrowdGtBoxLabel( - dev_ctx, >_boxes_slice, >_labels_slice, &is_crowd_slice); - phi::DenseTensor ncrowd_gt_boxes = ncrowd_output[0]; - phi::DenseTensor ncrowd_gt_labels = ncrowd_output[1]; - - auto ncrowd_gt_boxes_et = - framework::EigenTensor::From(ncrowd_gt_boxes); - ncrowd_gt_boxes_et = ncrowd_gt_boxes_et * im_scale; - - phi::DenseTensor anchor_by_gt_overlap; - anchor_by_gt_overlap.mutable_data( - {inside_anchor.dims()[0], ncrowd_gt_boxes.dims()[0]}, place); - BboxOverlaps(inside_anchor, ncrowd_gt_boxes, &anchor_by_gt_overlap); - - auto loc_score_tgtlbl_gt = GetAllFgBgGt(dev_ctx, - anchor_by_gt_overlap, - ncrowd_gt_labels, - positive_overlap, - negative_overlap, - engine); - - phi::DenseTensor sampled_loc_index = loc_score_tgtlbl_gt[0]; - phi::DenseTensor sampled_score_index = loc_score_tgtlbl_gt[1]; - phi::DenseTensor sampled_tgtlbl = loc_score_tgtlbl_gt[2]; - phi::DenseTensor sampled_gt_index = loc_score_tgtlbl_gt[3]; - phi::DenseTensor sampled_bbox_inside_weight = loc_score_tgtlbl_gt[4]; - phi::DenseTensor sampled_fg_num = loc_score_tgtlbl_gt[5]; - - int loc_num = static_cast(sampled_loc_index.dims()[0]); - int score_num = static_cast(sampled_score_index.dims()[0]); - // unmap to all anchor - phi::DenseTensor sampled_loc_index_unmap, sampled_score_index_unmap; - sampled_loc_index_unmap.mutable_data({loc_num}, place); - sampled_score_index_unmap.mutable_data({score_num}, place); - Gather(inds_inside.data(), - 1, - sampled_loc_index.data(), - loc_num, - sampled_loc_index_unmap.data()); - Gather(inds_inside.data(), - 1, - sampled_score_index.data(), - score_num, - sampled_score_index_unmap.data()); - - // get target bbox deltas - phi::DenseTensor sampled_anchor, sampled_gt, sampled_tgt_bbox; - auto* sampled_anchor_data = - sampled_anchor.mutable_data({loc_num, 4}, place); - auto* sampled_gt_data = sampled_gt.mutable_data({loc_num, 4}, place); - Gather(anchor->data(), - 4, - sampled_loc_index_unmap.data(), - loc_num, - sampled_anchor_data); - Gather(ncrowd_gt_boxes.data(), - 4, - sampled_gt_index.data(), - loc_num, - sampled_gt_data); - sampled_tgt_bbox.mutable_data({loc_num, 4}, place); - BoxToDelta(loc_num, - sampled_anchor, - sampled_gt, - nullptr, - false, - &sampled_tgt_bbox); - - // Add anchor offset - int anchor_offset = static_cast(i * anchor_num); - auto sampled_loc_index_unmap_et = - framework::EigenTensor::From(sampled_loc_index_unmap); - sampled_loc_index_unmap_et = sampled_loc_index_unmap_et + anchor_offset; - auto sampled_score_index_unmap_et = - framework::EigenTensor::From(sampled_score_index_unmap); - sampled_score_index_unmap_et = - sampled_score_index_unmap_et + anchor_offset; - AppendRpns(loc_index, total_loc_num, &sampled_loc_index_unmap); - AppendRpns(score_index, total_score_num, &sampled_score_index_unmap); - AppendRpns(tgt_bbox, total_loc_num * 4, &sampled_tgt_bbox); - AppendRpns(tgt_lbl, total_score_num, &sampled_tgtlbl); - AppendRpns( - bbox_inside_weight, total_loc_num * 4, &sampled_bbox_inside_weight); - AppendRpns(fg_num, total_fg_num, &sampled_fg_num); - - total_loc_num += loc_num; - total_score_num += score_num; - total_fg_num += 1; - lod0_loc.emplace_back(total_loc_num); - lod0_score.emplace_back(total_score_num); - lod0_fg.emplace_back(total_fg_num); - } - - PADDLE_ENFORCE_LE( - total_loc_num, - max_num, - phi::errors::InvalidArgument( - "The number of sampled bboxes should not be greater than the " - "number of all anchor boxes(%d), but the number of sampled " - "bboxes is :%d.", - max_num, - total_loc_num)); - PADDLE_ENFORCE_LE( - total_score_num, - max_num, - phi::errors::InvalidArgument( - "The number of sampled scores should not be greater than the " - "number of all anchor boxes(%d), but the number of sampled " - "scores is :%d.", - max_num, - total_score_num)); - PADDLE_ENFORCE_LE( - total_fg_num, - batch_num, - phi::errors::InvalidArgument( - "The number of foreground numbers should not be greater than the " - "batch size(%d), but the number of foreground numbers is :%d.", - batch_num, - total_fg_num)); - - lod_loc.emplace_back(lod0_loc); - loc_score.emplace_back(lod0_score); - lod_fg.emplace_back(lod0_fg); - loc_index->set_lod(lod_loc); - score_index->set_lod(loc_score); - tgt_bbox->set_lod(lod_loc); - tgt_lbl->set_lod(loc_score); - bbox_inside_weight->set_lod(lod_loc); - fg_num->set_lod(lod_fg); - loc_index->Resize({total_loc_num}); - score_index->Resize({total_score_num}); - tgt_bbox->Resize({total_loc_num, 4}); - tgt_lbl->Resize({total_score_num, 1}); - bbox_inside_weight->Resize({total_loc_num, 4}); - fg_num->Resize({total_fg_num, 1}); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - rpn_target_assign, - ops::RpnTargetAssignOp, - ops::RpnTargetAssignOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); -PD_REGISTER_STRUCT_KERNEL(rpn_target_assign, - CPU, - ALL_LAYOUT, - ops::RpnTargetAssignKernel, - float, - double) {} -REGISTER_OPERATOR( - retinanet_target_assign, - ops::RetinanetTargetAssignOp, - ops::RetinanetTargetAssignOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); -PD_REGISTER_STRUCT_KERNEL(retinanet_target_assign, - CPU, - ALL_LAYOUT, - ops::RetinanetTargetAssignKernel, - float, - double) {} diff --git a/paddle/fluid/operators/detection_map_op.cc b/paddle/fluid/operators/detection_map_op.cc deleted file mode 100644 index cee37d49eb69b..0000000000000 --- a/paddle/fluid/operators/detection_map_op.cc +++ /dev/null @@ -1,229 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/detection_map_op.h" - -#include - -namespace paddle { -namespace operators { - -class DetectionMAPOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK( - ctx->HasInput("DetectRes"), "Input", "DetectRes", "DetectionMAP"); - OP_INOUT_CHECK(ctx->HasInput("Label"), "Input", "Label", "DetectionMAP"); - OP_INOUT_CHECK(ctx->HasOutput("AccumPosCount"), - "Output", - "AccumPosCount", - "DetectionMAP"); - OP_INOUT_CHECK(ctx->HasOutput("AccumTruePos"), - "Output", - "AccumTruePos", - "DetectionMAP"); - OP_INOUT_CHECK(ctx->HasOutput("AccumFalsePos"), - "Output", - "AccumFalsePos", - "DetectionMAP"); - OP_INOUT_CHECK(ctx->HasOutput("MAP"), "Output", "MAP", "DetectionMAP"); - - auto det_dims = ctx->GetInputDim("DetectRes"); - PADDLE_ENFORCE_EQ( - det_dims.size(), - 2UL, - phi::errors::InvalidArgument( - "Input(DetectRes) ndim must be 2, the shape is [N, 6]," - "but received the ndim is %d", - det_dims.size())); - PADDLE_ENFORCE_EQ( - det_dims[1], - 6UL, - phi::errors::InvalidArgument( - "The shape is of Input(DetectRes) [N, 6], but received" - " shape is [N, %d]", - det_dims[1])); - auto label_dims = ctx->GetInputDim("Label"); - PADDLE_ENFORCE_EQ(label_dims.size(), - 2, - phi::errors::InvalidArgument( - "The ndim of Input(Label) must be 2, but received %d", - label_dims.size())); - if (ctx->IsRuntime() || label_dims[1] > 0) { - PADDLE_ENFORCE_EQ( - (label_dims[1] == 6 || label_dims[1] == 5), - true, - phi::errors::InvalidArgument( - "The shape of Input(Label) is [N, 6] or [N, 5], but received " - "[N, %d]", - label_dims[1])); - } - - if (ctx->HasInput("PosCount")) { - PADDLE_ENFORCE( - ctx->HasInput("TruePos"), - phi::errors::InvalidArgument( - "Input(TruePos) of DetectionMAPOp should not be null when " - "Input(PosCount) is not null.")); - PADDLE_ENFORCE( - ctx->HasInput("FalsePos"), - phi::errors::InvalidArgument( - "Input(FalsePos) of DetectionMAPOp should not be null when " - "Input(PosCount) is not null.")); - } - - ctx->SetOutputDim("MAP", common::make_ddim({1})); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey( - OperatorWithKernel::IndicateVarDataType(ctx, "DetectRes"), - platform::CPUPlace()); - } -}; - -class DetectionMAPOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("DetectRes", - "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [M, 6] " - "represents the " - "detections. Each row has 6 values: " - "[label, confidence, xmin, ymin, xmax, ymax], M is the total " - "number of detect results in this mini-batch. For each instance, " - "the offsets in first dimension are called LoD, the number of " - "offset is N + 1, if LoD[i + 1] - LoD[i] == 0, means there is " - "no detected data."); - AddInput("Label", - "(phi::DenseTensor) A 2-D phi::DenseTensor represents the" - "Labeled ground-truth data. Each row has 6 values: " - "[label, xmin, ymin, xmax, ymax, is_difficult] or 5 values: " - "[label, xmin, ymin, xmax, ymax], where N is the total " - "number of ground-truth data in this mini-batch. For each " - "instance, the offsets in first dimension are called LoD, " - "the number of offset is N + 1, if LoD[i + 1] - LoD[i] == 0, " - "means there is no ground-truth data."); - AddInput("HasState", - "(Tensor) A tensor with shape [1], 0 means ignoring input " - "states, which including PosCount, TruePos, FalsePos.") - .AsDispensable(); - AddInput("PosCount", - "(Tensor) A tensor with shape [Ncls, 1], store the " - "input positive example count of each class, Ncls is the count of " - "input classification. " - "This input is used to pass the AccumPosCount generated by the " - "previous mini-batch when the multi mini-batches cumulative " - "calculation carried out. " - "When the input(PosCount) is empty, the cumulative " - "calculation is not carried out, and only the results of the " - "current mini-batch are calculated.") - .AsDispensable(); - AddInput("TruePos", - "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [Ntp, 2], " - "store the " - "input true positive example of each class." - "This input is used to pass the AccumTruePos generated by the " - "previous mini-batch when the multi mini-batches cumulative " - "calculation carried out. ") - .AsDispensable(); - AddInput("FalsePos", - "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [Nfp, 2], " - "store the " - "input false positive example of each class." - "This input is used to pass the AccumFalsePos generated by the " - "previous mini-batch when the multi mini-batches cumulative " - "calculation carried out. ") - .AsDispensable(); - AddOutput("AccumPosCount", - "(Tensor) A tensor with shape [Ncls, 1], store the " - "positive example count of each class. It combines the input " - "input(PosCount) and the positive example count computed from " - "input(Detection) and input(Label)."); - AddOutput( - "AccumTruePos", - "(phi::DenseTensor) A phi::DenseTensor with shape [Ntp', 2], store the " - "true positive example of each class. It combines the " - "input(TruePos) and the true positive examples computed from " - "input(Detection) and input(Label)."); - AddOutput( - "AccumFalsePos", - "(phi::DenseTensor) A phi::DenseTensor with shape [Nfp', 2], store the " - "false positive example of each class. It combines the " - "input(FalsePos) and the false positive examples computed from " - "input(Detection) and input(Label)."); - AddOutput("MAP", - "(Tensor) A tensor with shape [1], store the mAP evaluate " - "result of the detection."); - AddAttr("class_num", - "(int) " - "The class number."); - AddAttr( - "background_label", - "(int, default: 0) " - "The index of background label, the background label will be ignored. " - "If set to -1, then all categories will be considered.") - .SetDefault(0); - AddAttr( - "overlap_threshold", - "(float) " - "The lower bound jaccard overlap threshold of detection output and " - "ground-truth data.") - .SetDefault(.5f); - AddAttr("evaluate_difficult", - "(bool, default true) " - "Switch to control whether the difficult data is evaluated.") - .SetDefault(true); - AddAttr("ap_type", - "(string, default 'integral') " - "The AP algorithm type, 'integral' or '11point'.") - .SetDefault("integral") - .InEnum({"integral", "11point"}) - .AddCustomChecker([](const std::string& ap_type) { - PADDLE_ENFORCE_NE( - GetAPType(ap_type), - APType::kNone, - phi::errors::InvalidArgument( - "The ap_type should be 'integral' or '11point.")); - }); - AddComment(R"DOC( -Detection mAP evaluate operator. -The general steps are as follows. First, calculate the true positive and -false positive according to the input of detection and labels, then -calculate the mAP evaluate value. -Supporting '11 point' and 'integral' mAP algorithm. Please get more information -from the following articles: -https://sanchom.wordpress.com/tag/average-precision/ -https://arxiv.org/abs/1512.02325 - -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - detection_map, - ops::DetectionMAPOp, - ops::DetectionMAPOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); - -PD_REGISTER_STRUCT_KERNEL( - detection_map, CPU, ALL_LAYOUT, ops::DetectionMAPOpKernel, float, double) {} diff --git a/paddle/fluid/operators/detection_map_op.h b/paddle/fluid/operators/detection_map_op.h deleted file mode 100644 index 24fea9c431c63..0000000000000 --- a/paddle/fluid/operators/detection_map_op.h +++ /dev/null @@ -1,518 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include -#include -#include -#include - -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -enum APType { kNone = 0, kIntegral, k11point }; - -APType GetAPType(std::string str) { - if (str == "integral") { - return APType::kIntegral; - } else if (str == "11point") { - return APType::k11point; - } else { - return APType::kNone; - } -} - -template -inline bool SortScorePairDescend(const std::pair& pair1, - const std::pair& pair2) { - return pair1.first > pair2.first; -} - -template -inline void GetAccumulation(std::vector> in_pairs, - std::vector* accu_vec) { - std::stable_sort(in_pairs.begin(), in_pairs.end(), SortScorePairDescend); - accu_vec->clear(); - size_t sum = 0; - for (size_t i = 0; i < in_pairs.size(); ++i) { - auto count = in_pairs[i].second; - sum += count; - accu_vec->push_back(sum); - } -} - -template -class DetectionMAPOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* in_detect = ctx.Input("DetectRes"); - auto* in_label = ctx.Input("Label"); - auto* out_map = ctx.Output("MAP"); - - auto* in_pos_count = ctx.Input("PosCount"); - auto* in_true_pos = ctx.Input("TruePos"); - auto* in_false_pos = ctx.Input("FalsePos"); - - auto* out_pos_count = ctx.Output("AccumPosCount"); - auto* out_true_pos = ctx.Output("AccumTruePos"); - auto* out_false_pos = ctx.Output("AccumFalsePos"); - - float overlap_threshold = ctx.Attr("overlap_threshold"); - bool evaluate_difficult = ctx.Attr("evaluate_difficult"); - auto ap_type = GetAPType(ctx.Attr("ap_type")); - int class_num = ctx.Attr("class_num"); - - auto& label_lod = in_label->lod(); - auto& detect_lod = in_detect->lod(); - PADDLE_ENFORCE_EQ( - label_lod.size(), - 1UL, - phi::errors::InvalidArgument("Only support LodTensor of lod_level " - "with 1 in label, but received %d.", - label_lod.size())); - PADDLE_ENFORCE_EQ(label_lod[0].size(), - detect_lod[0].size(), - phi::errors::InvalidArgument( - "The batch_size of input(Label) and input(Detection) " - "must be the same, but received %d:%d", - label_lod[0].size(), - detect_lod[0].size())); - - std::vector>> gt_boxes; - std::vector>>> detect_boxes; - - GetBoxes(*in_label, *in_detect, >_boxes, detect_boxes); - - std::map label_pos_count; - std::map>> true_pos; - std::map>> false_pos; - - auto* has_state = ctx.Input("HasState"); - int state = 0; - if (has_state) { - state = has_state->data()[0]; - } - - if (in_pos_count != nullptr && state) { - GetInputPos(*in_pos_count, - *in_true_pos, - *in_false_pos, - &label_pos_count, - &true_pos, - &false_pos, - class_num); - } - - CalcTrueAndFalsePositive(gt_boxes, - detect_boxes, - evaluate_difficult, - overlap_threshold, - &label_pos_count, - &true_pos, - &false_pos); - - int background_label = ctx.Attr("background_label"); - T map = CalcMAP( - ap_type, label_pos_count, true_pos, false_pos, background_label); - - GetOutputPos(ctx, - label_pos_count, - true_pos, - false_pos, - out_pos_count, - out_true_pos, - out_false_pos, - class_num); - - T* map_data = out_map->mutable_data(ctx.GetPlace()); - map_data[0] = map; - } - - protected: - struct Box { - Box(T xmin, T ymin, T xmax, T ymax) - : xmin(xmin), ymin(ymin), xmax(xmax), ymax(ymax), is_difficult(false) {} - - T xmin, ymin, xmax, ymax; - bool is_difficult; - }; - - inline T JaccardOverlap(const Box& box1, const Box& box2) const { - if (box2.xmin > box1.xmax || box2.xmax < box1.xmin || - box2.ymin > box1.ymax || box2.ymax < box1.ymin) { - return 0.0; - } else { - T inter_xmin = std::max(box1.xmin, box2.xmin); - T inter_ymin = std::max(box1.ymin, box2.ymin); - T inter_xmax = std::min(box1.xmax, box2.xmax); - T inter_ymax = std::min(box1.ymax, box2.ymax); - - T inter_width = inter_xmax - inter_xmin; - T inter_height = inter_ymax - inter_ymin; - T inter_area = inter_width * inter_height; - - T bbox_area1 = (box1.xmax - box1.xmin) * (box1.ymax - box1.ymin); - T bbox_area2 = (box2.xmax - box2.xmin) * (box2.ymax - box2.ymin); - - return inter_area / (bbox_area1 + bbox_area2 - inter_area); - } - } - - inline void ClipBBox(const Box& bbox, Box* clipped_bbox) const { - T one = static_cast(1.0); - T zero = static_cast(0.0); - clipped_bbox->xmin = std::max(std::min(bbox.xmin, one), zero); - clipped_bbox->ymin = std::max(std::min(bbox.ymin, one), zero); - clipped_bbox->xmax = std::max(std::min(bbox.xmax, one), zero); - clipped_bbox->ymax = std::max(std::min(bbox.ymax, one), zero); - } - - void GetBoxes(const phi::DenseTensor& input_label, - const phi::DenseTensor& input_detect, - std::vector>>* gt_boxes, - std::vector>>>& - detect_boxes) const { - auto labels = framework::EigenTensor::From(input_label); - auto detect = framework::EigenTensor::From(input_detect); - - auto& label_lod = input_label.lod(); - auto& detect_lod = input_detect.lod(); - - int batch_size = label_lod[0].size() - 1; - auto& label_index = label_lod[0]; - - for (int n = 0; n < batch_size; ++n) { - std::map> boxes; - for (size_t i = label_index[n]; i < label_index[n + 1]; ++i) { - int label = labels(i, 0); - if (input_label.dims()[1] == 6) { - Box box(labels(i, 2), labels(i, 3), labels(i, 4), labels(i, 5)); - auto is_difficult = labels(i, 1); - if (std::abs(is_difficult - 0.0) < 1e-6) - box.is_difficult = false; - else - box.is_difficult = true; - boxes[label].push_back(box); - } else { - PADDLE_ENFORCE_EQ( - input_label.dims()[1], - 5, - phi::errors::InvalidArgument( - "The input label width" - " must be 5, but received %d, please check your input data", - input_label.dims()[1])); - Box box(labels(i, 1), labels(i, 2), labels(i, 3), labels(i, 4)); - boxes[label].push_back(box); - } - } - gt_boxes->push_back(boxes); - } - - auto detect_index = detect_lod[0]; - for (int n = 0; n < batch_size; ++n) { - std::map>> boxes; - for (size_t i = detect_index[n]; i < detect_index[n + 1]; ++i) { - Box box(detect(i, 2), detect(i, 3), detect(i, 4), detect(i, 5)); - int label = detect(i, 0); - auto score = detect(i, 1); - boxes[label].push_back(std::make_pair(score, box)); - } - detect_boxes.push_back(boxes); - } - } - - void GetOutputPos( - const framework::ExecutionContext& ctx, - const std::map& label_pos_count, - const std::map>>& true_pos, - const std::map>>& false_pos, - phi::DenseTensor* output_pos_count, - phi::DenseTensor* output_true_pos, - phi::DenseTensor* output_false_pos, - const int class_num) const { - int true_pos_count = 0; - int false_pos_count = 0; - for (auto it = true_pos.begin(); it != true_pos.end(); ++it) { - auto tp = it->second; - true_pos_count += tp.size(); - } - for (auto it = false_pos.begin(); it != false_pos.end(); ++it) { - auto fp = it->second; - false_pos_count += fp.size(); - } - - int* pos_count_data = output_pos_count->mutable_data( - common::make_ddim({class_num, 1}), ctx.GetPlace()); - - T* true_pos_data = output_true_pos->mutable_data( - common::make_ddim({true_pos_count, 2}), ctx.GetPlace()); - T* false_pos_data = output_false_pos->mutable_data( - common::make_ddim({false_pos_count, 2}), ctx.GetPlace()); - true_pos_count = 0; - false_pos_count = 0; - std::vector true_pos_starts = {0}; - std::vector false_pos_starts = {0}; - for (int i = 0; i < class_num; ++i) { - auto it_count = label_pos_count.find(i); - pos_count_data[i] = 0; - if (it_count != label_pos_count.end()) { - pos_count_data[i] = it_count->second; - } - auto it_true_pos = true_pos.find(i); - if (it_true_pos != true_pos.end()) { - const std::vector>& true_pos_vec = - it_true_pos->second; - for (const std::pair& tp : true_pos_vec) { - true_pos_data[true_pos_count * 2] = tp.first; - true_pos_data[true_pos_count * 2 + 1] = static_cast(tp.second); - true_pos_count++; - } - } - true_pos_starts.push_back(true_pos_count); - - auto it_false_pos = false_pos.find(i); - if (it_false_pos != false_pos.end()) { - const std::vector>& false_pos_vec = - it_false_pos->second; - for (const std::pair& fp : false_pos_vec) { - false_pos_data[false_pos_count * 2] = fp.first; - false_pos_data[false_pos_count * 2 + 1] = static_cast(fp.second); - false_pos_count++; - } - } - false_pos_starts.push_back(false_pos_count); - } - - framework::LoD true_pos_lod; - true_pos_lod.emplace_back(true_pos_starts); - framework::LoD false_pos_lod; - false_pos_lod.emplace_back(false_pos_starts); - - output_true_pos->set_lod(true_pos_lod); - output_false_pos->set_lod(false_pos_lod); - } - - void GetInputPos(const phi::DenseTensor& input_pos_count, - const phi::DenseTensor& input_true_pos, - const phi::DenseTensor& input_false_pos, - std::map* label_pos_count, - std::map>>* true_pos, - std::map>>* false_pos, - const int class_num) const { - const int* pos_count_data = input_pos_count.data(); - for (int i = 0; i < class_num; ++i) { - (*label_pos_count)[i] = pos_count_data[i]; - } - - auto SetData = [](const phi::DenseTensor& pos_tensor, - std::map>>& pos) { - const T* pos_data = pos_tensor.data(); - auto& pos_data_lod = pos_tensor.lod()[0]; - for (size_t i = 0; i < pos_data_lod.size() - 1; ++i) { - for (size_t j = pos_data_lod[i]; j < pos_data_lod[i + 1]; ++j) { - T score = pos_data[j * 2]; - int flag = pos_data[j * 2 + 1]; - pos[i].push_back(std::make_pair(score, flag)); - } - } - }; - - SetData(input_true_pos, *true_pos); - SetData(input_false_pos, *false_pos); - return; - } - - void CalcTrueAndFalsePositive( - const std::vector>>& gt_boxes, - const std::vector>>>& - detect_boxes, - bool evaluate_difficult, - float overlap_threshold, - std::map* label_pos_count, - std::map>>* true_pos, - std::map>>* false_pos) const { - int batch_size = gt_boxes.size(); - for (int n = 0; n < batch_size; ++n) { - auto& image_gt_boxes = gt_boxes[n]; - for (auto& image_gt_box : image_gt_boxes) { - size_t count = 0; - auto& labeled_bboxes = image_gt_box.second; - if (evaluate_difficult) { - count = labeled_bboxes.size(); - } else { - for (auto& box : labeled_bboxes) { - if (!box.is_difficult) { - ++count; - } - } - } - if (count == 0) { - continue; - } - int label = image_gt_box.first; - if (label_pos_count->find(label) == label_pos_count->end()) { - (*label_pos_count)[label] = count; - } else { - (*label_pos_count)[label] += count; - } - } - } - - for (size_t n = 0; n < detect_boxes.size(); ++n) { - auto image_gt_boxes = gt_boxes[n]; - auto detections = detect_boxes[n]; - - if (image_gt_boxes.size() == 0) { - for (auto it = detections.begin(); it != detections.end(); ++it) { - auto pred_boxes = it->second; - int label = it->first; - for (size_t i = 0; i < pred_boxes.size(); ++i) { - auto score = pred_boxes[i].first; - (*true_pos)[label].push_back(std::make_pair(score, 0)); - (*false_pos)[label].push_back(std::make_pair(score, 1)); - } - } - continue; - } - - for (auto it = detections.begin(); it != detections.end(); ++it) { - int label = it->first; - auto pred_boxes = it->second; - if (image_gt_boxes.find(label) == image_gt_boxes.end()) { - for (size_t i = 0; i < pred_boxes.size(); ++i) { - auto score = pred_boxes[i].first; - (*true_pos)[label].push_back(std::make_pair(score, 0)); - (*false_pos)[label].push_back(std::make_pair(score, 1)); - } - continue; - } - - auto matched_bboxes = image_gt_boxes.find(label)->second; - std::vector visited(matched_bboxes.size(), false); - // Sort detections in descend order based on scores - std::sort( - pred_boxes.begin(), pred_boxes.end(), SortScorePairDescend); - for (size_t i = 0; i < pred_boxes.size(); ++i) { - T max_overlap = -1.0; - size_t max_idx = 0; - auto score = pred_boxes[i].first; - for (size_t j = 0; j < matched_bboxes.size(); ++j) { - Box& pred_box = pred_boxes[i].second; - ClipBBox(pred_box, &pred_box); - T overlap = JaccardOverlap(pred_box, matched_bboxes[j]); - if (overlap > max_overlap) { - max_overlap = overlap; - max_idx = j; - } - } - if (max_overlap > overlap_threshold) { - bool match_evaluate_difficult = - evaluate_difficult || - (!evaluate_difficult && !matched_bboxes[max_idx].is_difficult); - if (match_evaluate_difficult) { - if (!visited[max_idx]) { - (*true_pos)[label].push_back(std::make_pair(score, 1)); - (*false_pos)[label].push_back(std::make_pair(score, 0)); - visited[max_idx] = true; - } else { - (*true_pos)[label].push_back(std::make_pair(score, 0)); - (*false_pos)[label].push_back(std::make_pair(score, 1)); - } - } - } else { - (*true_pos)[label].push_back(std::make_pair(score, 0)); - (*false_pos)[label].push_back(std::make_pair(score, 1)); - } - } - } - } - } - - T CalcMAP(APType ap_type, - const std::map& label_pos_count, - const std::map>>& true_pos, - const std::map>>& false_pos, - const int background_label) const { - T mAP = 0.0; - int count = 0; - for (auto it = label_pos_count.begin(); it != label_pos_count.end(); ++it) { - int label = it->first; - int label_num_pos = it->second; - if (label_num_pos == background_label) { - continue; - } - if (true_pos.find(label) == true_pos.end()) { - count++; - continue; - } - auto label_true_pos = true_pos.find(label)->second; - auto label_false_pos = false_pos.find(label)->second; - // Compute average precision. - std::vector tp_sum; - GetAccumulation(label_true_pos, &tp_sum); - std::vector fp_sum; - GetAccumulation(label_false_pos, &fp_sum); - std::vector precision, recall; - size_t num = tp_sum.size(); - // Compute Precision. - for (size_t i = 0; i < num; ++i) { - precision.push_back(static_cast(tp_sum[i]) / - static_cast(tp_sum[i] + fp_sum[i])); - recall.push_back(static_cast(tp_sum[i]) / label_num_pos); - } - // VOC2007 style - if (ap_type == APType::k11point) { - std::vector max_precisions(11, 0.0); - int start_idx = num - 1; - for (int j = 10; j >= 0; --j) - for (int i = start_idx; i >= 0; --i) { - if (recall[i] < j / 10.) { - start_idx = i; - if (j > 0) max_precisions[j - 1] = max_precisions[j]; - break; - } else { - if (max_precisions[j] < precision[i]) - max_precisions[j] = precision[i]; - } - } - for (int j = 10; j >= 0; --j) mAP += max_precisions[j] / 11; - ++count; - } else if (ap_type == APType::kIntegral) { - // Nature integral - float average_precisions = 0.; - float prev_recall = 0.; - for (size_t i = 0; i < num; ++i) { - if (fabs(recall[i] - prev_recall) > 1e-6) - average_precisions += precision[i] * fabs(recall[i] - prev_recall); - prev_recall = recall[i]; - } - mAP += average_precisions; - ++count; - } else { - PADDLE_THROW(phi::errors::Unimplemented( - "Unkown ap version %s. Now only supports integral and l1point.", - ap_type)); - } - } - if (count != 0) mAP /= count; - return mAP; - } -}; // namespace operators - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/enqueue_op.cc b/paddle/fluid/operators/enqueue_op.cc deleted file mode 100644 index 225a2e067e190..0000000000000 --- a/paddle/fluid/operators/enqueue_op.cc +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -#include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/var_type_traits.h" -#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h" - -namespace paddle { -namespace framework { -class OpDesc; -template -class EmptyGradOpMaker; -} // namespace framework -namespace imperative { -class OpBase; -} // namespace imperative -} // namespace paddle - -using LoDTensorBlockingQueueHolder = - paddle::operators::reader::LoDTensorBlockingQueueHolder; - -namespace paddle { -namespace operators { - -class EnqueueOp : public framework::OperatorBase { - public: - EnqueueOp(const std::string& type, - const framework::VariableNameMap& inputs, - const framework::VariableNameMap& outputs, - const framework::AttributeMap& attrs) - : OperatorBase(type, inputs, outputs, attrs) {} - - private: - void RunImpl(const framework::Scope& scope, - const platform::Place& dev_place) const override { - const std::string& queue_name = Attr("queue_name"); - auto* queue_holder_var = scope.FindVar(queue_name); - PADDLE_ENFORCE_NOT_NULL( - queue_holder_var, - phi::errors::NotFound( - "No LoDTensorBlockingQueueHolder variable with name %s found.", - queue_name)); - const std::string& var_name = Input("X"); - auto* in_var = scope.FindVar(var_name); - PADDLE_ENFORCE_NOT_NULL( - in_var, - phi::errors::NotFound("No variable with name %s found.", var_name)); - auto* in_tensor = in_var->GetMutable(); - auto* queue_holder = - queue_holder_var->template GetMutable(); - - paddle::framework::LoDTensorArray lod_tensor_vec; - lod_tensor_vec.emplace_back(*in_tensor); - queue_holder->GetQueue()->Push(lod_tensor_vec); - } -}; - -class EnqueueOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "`lod_tensor` to enqueue"); - AddAttr("queue_name", - "Name of the `LoDTensorBlockingQueueHolder` variable"); - AddComment(R"DOC( - Enqueue operator. - )DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = ::paddle::operators; - -REGISTER_OP_WITHOUT_GRADIENT(enqueue, ops::EnqueueOp, ops::EnqueueOpMaker); diff --git a/paddle/fluid/operators/fused/CMakeLists.txt b/paddle/fluid/operators/fused/CMakeLists.txt index 713ad1931ce23..517761bdba8b5 100755 --- a/paddle/fluid/operators/fused/CMakeLists.txt +++ b/paddle/fluid/operators/fused/CMakeLists.txt @@ -16,16 +16,12 @@ register_operators( fused_feedforward_op fused_multi_transformer_op fused_multi_transformer_int8_op - resnet_unit_op fused_gemm_epilogue_op - fused_gate_attention_op - resnet_basic_block_op) + fused_gate_attention_op) op_library(fusion_lstm_op) if(WITH_XPU) - op_library(resnet_basic_block_op) - op_library(resnet_unit_op) op_library(fused_gemm_epilogue_op) op_library(fused_attention_op) op_library(fused_feedforward_op) @@ -60,10 +56,6 @@ if(WITH_GPU OR WITH_ROCM) op_library(fused_multi_transformer_op) op_library(fused_multi_transformer_int8_op) endif() - # resnet_unit needs cudnn 8.0 above - if((NOT WITH_ROCM) AND (NOT ${CUDNN_VERSION} VERSION_LESS 8000)) - op_library(resnet_unit_op) - endif() if(CUDA_VERSION GREATER_EQUAL 11.6) op_library(fused_gemm_epilogue_op) diff --git a/paddle/fluid/operators/fused/resnet_basic_block_op.cc b/paddle/fluid/operators/fused/resnet_basic_block_op.cc deleted file mode 100644 index 37315367189fa..0000000000000 --- a/paddle/fluid/operators/fused/resnet_basic_block_op.cc +++ /dev/null @@ -1,566 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/common/ddim.h" -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -class ResNetBasicBlockOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const { - // Check input - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "ResNetBasicBlockOp"); - OP_INOUT_CHECK( - ctx->HasInput("Filter1"), "Input", "Filter1", "ResNetBasicBlockOp"); - OP_INOUT_CHECK( - ctx->HasInput("Scale1"), "Input", "Scale1", "ResNetBasicBlockOp"); - OP_INOUT_CHECK( - ctx->HasInput("Bias1"), "Input", "Bias1", "ResNetBasicBlockOp"); - OP_INOUT_CHECK( - ctx->HasInput("Mean1"), "Input", "Mean1", "ResNetBasicBlockOp"); - OP_INOUT_CHECK( - ctx->HasInput("Var1"), "Input", "Var1", "ResNetBasicBlockOp"); - OP_INOUT_CHECK( - ctx->HasInput("Filter2"), "Input", "Filter2", "ResNetBasicBlockOp"); - OP_INOUT_CHECK( - ctx->HasInput("Scale2"), "Input", "Scale2", "ResNetBasicBlockOp"); - OP_INOUT_CHECK( - ctx->HasInput("Bias2"), "Input", "Bias2", "ResNetBasicBlockOp"); - OP_INOUT_CHECK( - ctx->HasInput("Mean2"), "Input", "Mean2", "ResNetBasicBlockOp"); - OP_INOUT_CHECK( - ctx->HasInput("Var2"), "Input", "Var2", "ResNetBasicBlockOp"); - - bool has_shortcut = ctx->Attrs().Get("has_shortcut"); - if (has_shortcut) { - OP_INOUT_CHECK( - ctx->HasInput("Filter3"), "Input", "Filter3", "ResNetBasicBlockOp"); - OP_INOUT_CHECK( - ctx->HasInput("Scale3"), "Input", "Scale3", "ResNetBasicBlockOp"); - OP_INOUT_CHECK( - ctx->HasInput("Bias3"), "Input", "Bias3", "ResNetBasicBlockOp"); - OP_INOUT_CHECK( - ctx->HasInput("Mean3"), "Input", "Mean3", "ResNetBasicBlockOp"); - OP_INOUT_CHECK( - ctx->HasInput("Var3"), "Input", "Var3", "ResNetBasicBlockOp"); - } - - // Check output - OP_INOUT_CHECK(ctx->HasOutput("Y"), "Output", "Y", "ResNetBasicBlockOp"); - OP_INOUT_CHECK( - ctx->HasOutput("Conv1"), "Output", "Conv1", "ResNetBasicBlockOp"); - OP_INOUT_CHECK(ctx->HasOutput("SavedMean1"), - "Output", - "SavedMean1", - "ResNetBasicBlockOp"); - OP_INOUT_CHECK(ctx->HasOutput("SavedInvstd1"), - "Output", - "SavedInvstd1", - "ResNetBasicBlockOp"); - OP_INOUT_CHECK( - ctx->HasOutput("Mean1Out"), "Output", "Mean1Out", "ResNetBasicBlockOp"); - OP_INOUT_CHECK( - ctx->HasOutput("Var1Out"), "Output", "Var1Out", "ResNetBasicBlockOp"); - OP_INOUT_CHECK( - ctx->HasOutput("Conv2"), "Output", "Conv2", "ResNetBasicBlockOp"); - OP_INOUT_CHECK(ctx->HasOutput("SavedMean2"), - "Output", - "SavedMean2", - "ResNetBasicBlockOp"); - OP_INOUT_CHECK(ctx->HasOutput("SavedInvstd2"), - "Output", - "SavedInvstd2", - "ResNetBasicBlockOp"); - OP_INOUT_CHECK( - ctx->HasOutput("Mean2Out"), "Output", "Mean2Out", "ResNetBasicBlockOp"); - OP_INOUT_CHECK( - ctx->HasOutput("Var2Out"), "Output", "Var2Out", "ResNetBasicBlockOp"); - if (has_shortcut) { - OP_INOUT_CHECK( - ctx->HasOutput("Conv3"), "Output", "Conv3", "ResNetBasicBlockOp"); - OP_INOUT_CHECK(ctx->HasOutput("SavedMean3"), - "Output", - "SavedMean3", - "ResNetBasicBlockOp"); - OP_INOUT_CHECK(ctx->HasOutput("SavedInvstd3"), - "Output", - "SavedInvstd3", - "ResNetBasicBlockOp"); - OP_INOUT_CHECK(ctx->HasOutput("Mean3Out"), - "Output", - "Mean3Out", - "ResNetBasicBlockOp"); - OP_INOUT_CHECK( - ctx->HasOutput("Var3Out"), "Output", "Var3Out", "ResNetBasicBlockOp"); - } - - // make sure Mean/RunningMean and Var/RunningVar share memory - PADDLE_ENFORCE_EQ(ctx->Inputs("Mean1")[0], - ctx->Outputs("Mean1Out")[0], - phi::errors::InvalidArgument( - "Mean1 and Mean1Out should share the same memory")); - PADDLE_ENFORCE_EQ(ctx->Inputs("Var1")[0], - ctx->Outputs("Var1Out")[0], - phi::errors::InvalidArgument( - "Var1 and Var1Out should share the same memory")); - PADDLE_ENFORCE_EQ(ctx->Inputs("Mean2")[0], - ctx->Outputs("Mean2Out")[0], - phi::errors::InvalidArgument( - "Mean2 and Mean2Out should share the same memory")); - PADDLE_ENFORCE_EQ(ctx->Inputs("Var2")[0], - ctx->Outputs("Var2Out")[0], - phi::errors::InvalidArgument( - "Var2 and Var2Out should share the same memory")); - - if (has_shortcut) { - PADDLE_ENFORCE_EQ(ctx->Inputs("Mean3")[0], - ctx->Outputs("Mean3Out")[0], - phi::errors::InvalidArgument( - "Mean3 and Mean3Out should share the same memory")); - PADDLE_ENFORCE_EQ(ctx->Inputs("Var3")[0], - ctx->Outputs("Var3Out")[0], - phi::errors::InvalidArgument( - "Var3 and Var3Out should share the same memory")); - } - - // Check dims of inputs - auto data_format = ctx->Attrs().Get("data_format"); - PADDLE_ENFORCE_EQ( - data_format, - "NCHW", - phi::errors::InvalidArgument("The data format must equal to NCHW. " - "But received: the data format " - "= [%s]", - data_format)); - int stride1 = ctx->Attrs().Get("stride1"); - int stride2 = ctx->Attrs().Get("stride2"); - int padding1 = ctx->Attrs().Get("padding1"); - int padding2 = ctx->Attrs().Get("padding2"); - - const auto x1_dims = ctx->GetInputDim("X"); - const auto w1_dims = ctx->GetInputDim("Filter1"); - const auto bn1_param_dims = ctx->GetInputDim("Scale1"); - PADDLE_ENFORCE_EQ( - x1_dims.size(), - 4, - phi::errors::InvalidArgument("The dimensions of input " - "must equal to 4." - "But received: the shape of input " - "= [%s], the dimension of input = " - "[%d]", - x1_dims, - x1_dims.size())); - - // Calculate the dims of output1 - int batch = x1_dims[0]; - int output1_channel = w1_dims[0]; - int filter1_size = w1_dims[2]; - int out1_h = (x1_dims[2] + padding1 * 2 - filter1_size) / stride1 + 1; - int out1_w = (x1_dims[3] + padding1 * 2 - filter1_size) / stride1 + 1; - std::vector out1_shape = {batch, output1_channel, out1_h, out1_w}; - - const auto w2_dims = ctx->GetInputDim("Filter2"); - const auto bn2_param_dims = ctx->GetInputDim("Scale2"); - int output2_channel = w2_dims[0]; - int filter2_size = w2_dims[2]; - int out2_h = (out1_h + padding2 * 2 - filter2_size) / stride2 + 1; - int out2_w = (out1_w + padding2 * 2 - filter2_size) / stride2 + 1; - std::vector out2_shape = {batch, output2_channel, out2_h, out2_w}; - - auto y_dims = common::make_ddim(out2_shape); - auto conv1_dims = common::make_ddim(out1_shape); - ctx->SetOutputDim("Y", y_dims); - ctx->SetOutputDim("Conv1", conv1_dims); - ctx->SetOutputDim("SavedMean1", bn1_param_dims); - ctx->SetOutputDim("SavedInvstd1", bn1_param_dims); - ctx->SetOutputDim("Mean1Out", bn1_param_dims); - ctx->SetOutputDim("Var1Out", bn1_param_dims); - ctx->SetOutputDim("Conv2", y_dims); - ctx->SetOutputDim("Conv2Input", conv1_dims); - ctx->SetOutputDim("SavedMean2", bn2_param_dims); - ctx->SetOutputDim("SavedInvstd2", bn2_param_dims); - ctx->SetOutputDim("Mean2Out", bn2_param_dims); - ctx->SetOutputDim("Var2Out", bn2_param_dims); - if (has_shortcut) { - ctx->SetOutputDim("Conv3", y_dims); - ctx->SetOutputDim("SavedMean3", bn2_param_dims); - ctx->SetOutputDim("SavedInvstd3", bn2_param_dims); - ctx->SetOutputDim("Mean3Out", bn2_param_dims); - ctx->SetOutputDim("Var3Out", bn2_param_dims); - } - - bool find_max = ctx->Attrs().Get("find_conv_input_max"); - if (find_max) { - auto max_dims = common::make_ddim({6}); - ctx->SetOutputDim("MaxInput1", max_dims); - ctx->SetOutputDim("MaxFilter1", max_dims); - ctx->SetOutputDim("MaxInput2", max_dims); - ctx->SetOutputDim("MaxFilter2", max_dims); - if (has_shortcut) { - ctx->SetOutputDim("MaxInput3", max_dims); - ctx->SetOutputDim("MaxFilter3", max_dims); - } - } - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const { - auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); - - // By default, the type of the scale, bias, mean, - // and var tensors should be float when input tensor's dtype is float16. - auto bn_param_type = framework::proto::VarType::FP32; - PADDLE_ENFORCE_EQ( - bn_param_type, - framework::TransToProtoVarType( - ctx.Input("Scale1")->dtype()), - phi::errors::InvalidArgument("Scale input should be of float type")); - PADDLE_ENFORCE_EQ( - bn_param_type, - framework::TransToProtoVarType( - ctx.Input("Bias1")->dtype()), - phi::errors::InvalidArgument("Bias input should be of float type")); - PADDLE_ENFORCE_EQ( - bn_param_type, - framework::TransToProtoVarType( - ctx.Input("Scale2")->dtype()), - phi::errors::InvalidArgument("Scale input should be of float type")); - PADDLE_ENFORCE_EQ( - bn_param_type, - framework::TransToProtoVarType( - ctx.Input("Bias2")->dtype()), - phi::errors::InvalidArgument("Bias input should be of float type")); - - return phi::KernelKey(input_data_type, ctx.GetPlace()); - } -}; - -class ResNetBasicBlockOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() { - // has_shortcut = True: else: - // X X - // / / - // | | | | - // CONV1 | CONV1 | - // | | | | - // BN1 | BN1 | - // | | | | - // RELU1 | RELU1 | - // | | | | - // CONV2 CONV3 CONV2 | - // | | | | - // BN2 BN3 BN2 | - // \ / \ / - // ADD ADD - // | | - // RELU RELU - // | | - // Y Y - AddInput("X", "Input tensor of conv 1"); - AddInput("Filter1", "Filter tensor of conv 1"); - AddInput("Scale1", "Scale tensor of bn 1"); - AddInput("Bias1", "Bias tensor of bn 1"); - AddInput("Mean1", "Mean tensor of bn 1"); - AddInput("Var1", "Variance tensor of bn 1"); - AddInput("Filter2", "Filter tensor of conv 2"); - AddInput("Scale2", "Scale tensor of bn 2"); - AddInput("Bias2", "Bias tensor of bn 2"); - AddInput("Mean2", "Mean tensor of bn 2"); - AddInput("Var2", "Variance tensor of bn 2"); - AddInput("Filter3", "Filter tensor of conv 3").AsDispensable(); - AddInput("Scale3", "Scale tensor of bn 3").AsDispensable(); - AddInput("Bias3", "Bias tensor of bn 3").AsDispensable(); - AddInput("Mean3", "Mean tensor of bn 3").AsDispensable(); - AddInput("Var3", "Variance tensor of bn 3").AsDispensable(); - AddOutput("Y", "The result of ssd resnet unit"); - AddOutput("Conv1", "The result of conv 1"); - AddOutput("SavedMean1", "Mean of input 1 after conv 1"); - AddOutput("SavedInvstd1", "Invstd of input 1 after conv 1"); - AddOutput("Mean1Out", "Shared memory with Mean1"); - AddOutput("Var1Out", "Shared memory with Var1"); - AddOutput("Conv2", "The result of conv 2"); - AddOutput("Conv2Input", "Conv2 input data"); - AddOutput("SavedMean2", "Mean of input 2 after conv 2"); - AddOutput("SavedInvstd2", "Invstd of input 2 after conv 2"); - AddOutput("Mean2Out", "Shared memory with Mean2"); - AddOutput("Var2Out", "Shared memory with Var2"); - AddOutput("Conv3", "The result of conv 3").AsDispensable(); - AddOutput("SavedMean3", "Mean of input 3 after conv 3").AsDispensable(); - AddOutput("SavedInvstd3", "Invstd of input 3 after conv 3").AsDispensable(); - AddOutput("Mean3Out", "Shared memory with Mean3").AsDispensable(); - AddOutput("Var3Out", "Shared memory with Var3").AsDispensable(); - AddOutput("MaxInput1", "The max value of conv1 input tensor") - .AsDispensable(); - AddOutput("MaxFilter1", "The max value of conv1 filter tensor") - .AsDispensable(); - AddOutput("MaxInput2", "The max value of conv2 input tensor") - .AsDispensable(); - AddOutput("MaxFilter2", "The max value of conv2 filter tensor") - .AsDispensable(); - AddOutput("MaxInput3", "The max value of conv3 input tensor") - .AsDispensable(); - AddOutput("MaxFilter3", "The max value of conv3 filter tensor") - .AsDispensable(); - AddAttr("stride1", "Stride of conv1").SetDefault(1); - AddAttr("stride2", "Stride of conv2").SetDefault(1); - AddAttr("stride3", "Stride of conv3").SetDefault(1); - AddAttr("padding1", "Padding of conv1").SetDefault(0); - AddAttr("padding2", "Padding of conv2").SetDefault(0); - AddAttr("padding3", "Padding of conv3").SetDefault(0); - AddAttr("dilation1", "Dilation of conv1").SetDefault(1); - AddAttr("dilation2", "Dilation of conv2").SetDefault(1); - AddAttr("dilation3", "Dilation of conv3").SetDefault(1); - AddAttr("group", "Group of all the 3 conv").SetDefault(1); - AddAttr("momentum", "Momentum of all the 3 bn").SetDefault(0.9); - AddAttr("epsilon", "Epsilon of all the 3 bn").SetDefault(1e-5); - AddAttr("data_format", "").SetDefault("NCHW"); - AddAttr("has_shortcut", "").SetDefault(false); - AddAttr("use_global_stats", "").SetDefault(false); - AddAttr("is_test", - "(bool, default false) Set to true for inference only, false " - "for training. Some layers may run faster when this is true.") - .SetDefault(false); - AddAttr( - "trainable_statistics", - "(bool, default false) Whether to calculate mean and variance " - "in test mode. If setting true in test mode, mean and variace " - "will be calculated by current batch statistics.") - .SetDefault(false); - AddAttr("act_type", "The activation type to be fused.") - .SetDefault("relu"); - AddAttr("find_conv_input_max", - "(bool, default true) Whether to calculate max value of conv " - "input tensor.") - .SetDefault(true); - AddComment(R"DOC( -Fusion op of the basic unit of ssd resnet block. -** This is only use for XPU, if has problems, concat zhangyikun02@baidu.com ** -)DOC"); - } -}; - -template -class ResNetBasicBlockGradOpMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("resnet_basic_block_grad"); - op->SetInput("X", this->Input("X")); - op->SetInput("Filter1", this->Input("Filter1")); - op->SetInput("Conv1", this->Output("Conv1")); - op->SetInput("Scale1", this->Input("Scale1")); - op->SetInput("Bias1", this->Input("Bias1")); - op->SetInput("SavedMean1", this->Output("SavedMean1")); - op->SetInput("SavedInvstd1", this->Output("SavedInvstd1")); - op->SetInput("Filter2", this->Input("Filter2")); - op->SetInput("Conv2", this->Output("Conv2")); - op->SetInput("Conv2Input", this->Output("Conv2Input")); - op->SetInput("Scale2", this->Input("Scale2")); - op->SetInput("Bias2", this->Input("Bias2")); - op->SetInput("SavedMean2", this->Output("SavedMean2")); - op->SetInput("SavedInvstd2", this->Output("SavedInvstd2")); - op->SetInput("Filter3", this->Input("Filter3")); - op->SetInput("Conv3", this->Output("Conv3")); - op->SetInput("Scale3", this->Input("Scale3")); - op->SetInput("Bias3", this->Input("Bias3")); - op->SetInput("SavedMean3", this->Output("SavedMean3")); - op->SetInput("SavedInvstd3", this->Output("SavedInvstd3")); - op->SetInput("MaxInput1", this->Output("MaxInput1")); - op->SetInput("MaxFilter1", this->Output("MaxFilter1")); - op->SetInput("MaxInput2", this->Output("MaxInput2")); - op->SetInput("MaxFilter2", this->Output("MaxFilter2")); - op->SetInput("MaxInput3", this->Output("MaxInput3")); - op->SetInput("MaxFilter3", this->Output("MaxFilter3")); - op->SetInput("Y", this->Output("Y")); - op->SetInput(framework::GradVarName("Y"), this->OutputGrad("Y")); - - op->SetAttrMap(this->Attrs()); - - op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); - op->SetOutput(framework::GradVarName("Filter1"), - this->InputGrad("Filter1")); - op->SetOutput(framework::GradVarName("Scale1"), this->InputGrad("Scale1")); - op->SetOutput(framework::GradVarName("Bias1"), this->InputGrad("Bias1")); - op->SetOutput(framework::GradVarName("Filter2"), - this->InputGrad("Filter2")); - op->SetOutput(framework::GradVarName("Scale2"), this->InputGrad("Scale2")); - op->SetOutput(framework::GradVarName("Bias2"), this->InputGrad("Bias2")); - op->SetOutput(framework::GradVarName("Filter3"), - this->InputGrad("Filter3")); - op->SetOutput(framework::GradVarName("Scale3"), this->InputGrad("Scale3")); - op->SetOutput(framework::GradVarName("Bias3"), this->InputGrad("Bias3")); - } -}; - -class ResNetBasicBlockOpInferVarType - : public framework::PassInDtypeAndVarTypeToOutput { - protected: - std::unordered_map& GetInputOutputWithSameType() - const override { - static std::unordered_map m{{"X", /*->*/ "Y"}}; - return m; - } -}; - -class ResNetBasicBlockGradOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const { - // check input - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "ResNetBasicBlockGradOp"); - OP_INOUT_CHECK( - ctx->HasInput("Filter1"), "Input", "Filter1", "ResNetBasicBlockGradOp"); - OP_INOUT_CHECK( - ctx->HasInput("Conv1"), "Input", "Conv1", "ResNetBasicBlockGradOp"); - OP_INOUT_CHECK( - ctx->HasInput("Scale1"), "Input", "Scale1", "ResNetBasicBlockGradOp"); - OP_INOUT_CHECK( - ctx->HasInput("Bias1"), "Input", "Bias1", "ResNetBasicBlockGradOp"); - OP_INOUT_CHECK(ctx->HasInput("SavedMean1"), - "Input", - "SavedMean1", - "ResNetBasicBlockGradOp"); - OP_INOUT_CHECK(ctx->HasInput("SavedInvstd1"), - "Input", - "SavedInvstd1", - "ResNetBasicBlockGradOp"); - OP_INOUT_CHECK( - ctx->HasInput("Filter2"), "Input", "Filter2", "ResNetBasicBlockGradOp"); - OP_INOUT_CHECK( - ctx->HasInput("Conv2"), "Input", "Conv2", "ResNetBasicBlockGradOp"); - OP_INOUT_CHECK( - ctx->HasInput("Scale2"), "Input", "Scale2", "ResNetBasicBlockGradOp"); - OP_INOUT_CHECK( - ctx->HasInput("Bias2"), "Input", "Bias2", "ResNetBasicBlockGradOp"); - OP_INOUT_CHECK(ctx->HasInput("SavedMean2"), - "Input", - "SavedMean2", - "ResNetBasicBlockGradOp"); - OP_INOUT_CHECK(ctx->HasInput("SavedInvstd2"), - "Input", - "SavedInvstd2", - "ResNetBasicBlockGradOp"); - bool has_shortcut = ctx->Attrs().Get("has_shortcut"); - if (has_shortcut) { - OP_INOUT_CHECK(ctx->HasInput("Filter3"), - "Input", - "Filter3", - "ResNetBasicBlockGradOp"); - OP_INOUT_CHECK( - ctx->HasInput("Scale3"), "Input", "Scale3", "ResNetBasicBlockGradOp"); - OP_INOUT_CHECK( - ctx->HasInput("Bias3"), "Input", "Bias3", "ResNetBasicBlockGradOp"); - } - OP_INOUT_CHECK(ctx->HasInput("Y"), "Input", "Y", "ResNetBasicBlockGradOp"); - OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Y")), - "Input", - framework::GradVarName("Y"), - "ResNetBasicBlockGradOp"); - - // check output - OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("Filter1")), - "Output", - framework::GradVarName("Filter1"), - "ResNetBasicBlockGradOp"); - OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("Scale1")), - "Output", - framework::GradVarName("Scale1"), - "ResNetBasicBlockGradOp"); - OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("Bias1")), - "Output", - framework::GradVarName("Bias1"), - "ResNetBasicBlockGradOp"); - OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("Filter2")), - "Output", - framework::GradVarName("Filter2"), - "ResNetBasicBlockGradOp"); - OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("Scale2")), - "Output", - framework::GradVarName("Scale2"), - "ResNetBasicBlockGradOp"); - OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("Bias2")), - "Output", - framework::GradVarName("Bias2"), - "ResNetBasicBlockGradOp"); - OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("X")), - "Output", - framework::GradVarName("X"), - "ResNetBasicBlockGradOp"); - if (has_shortcut) { - OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("Filter3")), - "Output", - framework::GradVarName("Filter3"), - "ResNetBasicBlockGradOp"); - OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("Scale3")), - "Output", - framework::GradVarName("Scale3"), - "ResNetBasicBlockGradOp"); - OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("Bias3")), - "Output", - framework::GradVarName("Bias3"), - "ResNetBasicBlockGradOp"); - } - - const auto x1_dims = ctx->GetInputDim("X"); - const auto filter1_x_dims = ctx->GetInputDim("Filter1"); - const auto param1_dims = ctx->GetInputDim("Scale1"); - const auto filter2_x_dims = ctx->GetInputDim("Filter2"); - const auto param2_dims = ctx->GetInputDim("Scale2"); - ctx->SetOutputDim(framework::GradVarName("X"), x1_dims); - ctx->SetOutputDim(framework::GradVarName("Filter1"), filter1_x_dims); - ctx->SetOutputDim(framework::GradVarName("Scale1"), param1_dims); - ctx->SetOutputDim(framework::GradVarName("Bias1"), param1_dims); - ctx->SetOutputDim(framework::GradVarName("Filter2"), filter2_x_dims); - ctx->SetOutputDim(framework::GradVarName("Scale2"), param2_dims); - ctx->SetOutputDim(framework::GradVarName("Bias2"), param2_dims); - if (has_shortcut) { - const auto filter_z_dims = ctx->GetInputDim("Filter3"); - ctx->SetOutputDim(framework::GradVarName("Filter3"), filter_z_dims); - ctx->SetOutputDim(framework::GradVarName("Scale3"), param2_dims); - ctx->SetOutputDim(framework::GradVarName("Bias3"), param2_dims); - } - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const { - PADDLE_ENFORCE_NOT_NULL( - ctx.InputVar(framework::GradVarName("Y")), - phi::errors::NotFound("Can not find Y@GRAD in the execution context.")); - - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), - ctx.GetPlace()); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR(resnet_basic_block, - ops::ResNetBasicBlockOp, - ops::ResNetBasicBlockOpMaker, - ops::ResNetBasicBlockOpInferVarType, - ops::ResNetBasicBlockGradOpMaker, - ops::ResNetBasicBlockGradOpMaker); -REGISTER_OPERATOR(resnet_basic_block_grad, ops::ResNetBasicBlockGradOp); diff --git a/paddle/fluid/operators/fused/resnet_basic_block_op_xpu.cc b/paddle/fluid/operators/fused/resnet_basic_block_op_xpu.cc deleted file mode 100644 index 50a3b3c46137d..0000000000000 --- a/paddle/fluid/operators/fused/resnet_basic_block_op_xpu.cc +++ /dev/null @@ -1,1007 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifdef PADDLE_WITH_XPU -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/device/device_wrapper.h" -#include "paddle/fluid/platform/device/xpu/xpu_header.h" - -namespace paddle { -namespace operators { - -class ResnetBasicBlockAttr { - public: - explicit ResnetBasicBlockAttr(const framework::ExecutionContext& ctx) { - padding1 = ctx.Attr("padding1"); - padding2 = ctx.Attr("padding2"); - padding3 = ctx.Attr("padding3"); - stride1 = ctx.Attr("stride1"); - stride2 = ctx.Attr("stride2"); - stride3 = ctx.Attr("stride3"); - dilation1 = ctx.Attr("dilation1"); - dilation2 = ctx.Attr("dilation2"); - dilation3 = ctx.Attr("dilation3"); - group = ctx.Attr("group"); - - eps = static_cast(ctx.Attr("epsilon")); - momentum = static_cast(ctx.Attr("momentum")); - has_shortcut = ctx.Attr("has_shortcut"); - find_max = ctx.Attr("find_conv_input_max"); - - const auto is_test = ctx.Attr("is_test"); - const auto use_global_stats = ctx.Attr("use_global_stats"); - const auto trainable_stats = ctx.Attr("trainable_statistics"); - bool test_mode = is_test && (!trainable_stats); - global_stats = test_mode || use_global_stats; - - // init shape - auto input1 = ctx.Input("X"); - auto filter1 = ctx.Input("Filter1"); - auto conv1_out = ctx.Output("Conv1"); - auto filter2 = ctx.Input("Filter2"); - auto conv2_out = ctx.Output("Conv2"); - conv1_input_shape = common::vectorize(input1->dims()); - conv1_output_shape = common::vectorize(conv1_out->dims()); - conv1_filter_shape = common::vectorize(filter1->dims()); - conv1_filter_numel = filter1->numel(); - conv1_input_numel = input1->numel(); - conv1_output_numel = conv1_out->numel(); - - conv2_input_shape = common::vectorize(conv1_out->dims()); - conv2_output_shape = common::vectorize(conv2_out->dims()); - conv2_filter_shape = common::vectorize(filter2->dims()); - conv2_filter_numel = filter2->numel(); - conv2_input_numel = conv1_out->numel(); - conv2_output_numel = conv2_out->numel(); - - if (has_shortcut) { - auto filter3 = ctx.Input("Filter3"); - auto conv3_out = ctx.Output("Conv3"); - conv3_input_shape = common::vectorize(input1->dims()); - conv3_output_shape = common::vectorize(conv3_out->dims()); - conv3_filter_shape = common::vectorize(filter3->dims()); - conv3_filter_numel = filter3->numel(); - conv3_input_numel = input1->numel(); - conv3_output_numel = conv3_out->numel(); - } - } - - int padding1; - int padding2; - int padding3; - int stride1; - int stride2; - int stride3; - int dilation1; - int dilation2; - int dilation3; - int group; - - double eps; - double momentum; - - bool has_shortcut; - bool find_max; - bool global_stats; - - std::vector conv1_input_shape; - std::vector conv1_output_shape; - std::vector conv1_filter_shape; - std::vector conv2_input_shape; - std::vector conv2_output_shape; - std::vector conv2_filter_shape; - std::vector conv3_input_shape; - std::vector conv3_output_shape; - std::vector conv3_filter_shape; - - int conv1_filter_numel; - int conv2_filter_numel; - int conv3_filter_numel; - int conv1_input_numel; - int conv2_input_numel; - int conv3_input_numel; - int conv1_output_numel; - int conv2_output_numel; - int conv3_output_numel; -}; - -class ResnetBasicBlockGradAttr { - public: - explicit ResnetBasicBlockGradAttr(const framework::ExecutionContext& ctx) { - padding1 = ctx.Attr("padding1"); - padding2 = ctx.Attr("padding2"); - padding3 = ctx.Attr("padding3"); - stride1 = ctx.Attr("stride1"); - stride2 = ctx.Attr("stride2"); - stride3 = ctx.Attr("stride3"); - dilation1 = ctx.Attr("dilation1"); - dilation2 = ctx.Attr("dilation2"); - dilation3 = ctx.Attr("dilation3"); - group = ctx.Attr("group"); - - has_shortcut = ctx.Attr("has_shortcut"); - find_max = ctx.Attr("find_conv_input_max"); - - // init shape - auto input1 = ctx.Input("X"); - auto filter1 = ctx.Input("Filter1"); - auto conv1_out = ctx.Input("Conv1"); - auto filter2 = ctx.Input("Filter2"); - auto conv2_out = ctx.Input("Conv2"); - conv1_input_shape = common::vectorize(input1->dims()); - conv1_output_shape = common::vectorize(conv1_out->dims()); - conv1_filter_shape = common::vectorize(filter1->dims()); - conv1_filter_numel = filter1->numel(); - conv1_input_numel = input1->numel(); - conv1_output_numel = conv1_out->numel(); - - conv2_input_shape = common::vectorize(conv1_out->dims()); - conv2_output_shape = common::vectorize(conv2_out->dims()); - conv2_filter_shape = common::vectorize(filter2->dims()); - conv2_filter_numel = filter2->numel(); - conv2_input_numel = conv1_out->numel(); - conv2_output_numel = conv2_out->numel(); - - if (has_shortcut) { - auto filter3 = ctx.Input("Filter3"); - auto conv3_out = ctx.Input("Conv3"); - conv3_input_shape = common::vectorize(input1->dims()); - conv3_output_shape = common::vectorize(conv3_out->dims()); - conv3_filter_shape = common::vectorize(filter3->dims()); - conv3_filter_numel = filter3->numel(); - conv3_input_numel = input1->numel(); - conv3_output_numel = conv3_out->numel(); - } - } - - int padding1; - int padding2; - int padding3; - int stride1; - int stride2; - int stride3; - int dilation1; - int dilation2; - int dilation3; - int group; - - bool has_shortcut; - bool find_max; - - std::vector conv1_input_shape; - std::vector conv1_output_shape; - std::vector conv1_filter_shape; - std::vector conv2_input_shape; - std::vector conv2_output_shape; - std::vector conv2_filter_shape; - std::vector conv3_input_shape; - std::vector conv3_output_shape; - std::vector conv3_filter_shape; - - int conv1_filter_numel; - int conv2_filter_numel; - int conv3_filter_numel; - int conv1_input_numel; - int conv2_input_numel; - int conv3_input_numel; - int conv1_output_numel; - int conv2_output_numel; - int conv3_output_numel; -}; - -template -static inline void xpu_conv2d(xpu::Context* ctx, - const T* input_data, - const T* filter_data, - T* output_data, - float* input_max_data, - float* filter_max_data, - const std::vector& input_shape, - const std::vector& filter_shape, - int padding, - int stride, - int dilation, - int group) { - std::vector ksize{filter_shape[2], filter_shape[3]}; - std::vector stride_vec{stride, stride}; - std::vector dilation_vec{dilation, dilation}; - std::vector padding_vec{padding, padding}; - int N = input_shape[0]; - int C = input_shape[1]; - int H = input_shape[2]; - int W = input_shape[3]; - - int r = xpu::conv2d(ctx, - input_data, - filter_data, - output_data, - N, - C, - H, - W, - filter_shape[0], - ksize, - stride_vec, - padding_vec, - dilation_vec, - group, - input_max_data, - filter_max_data, - nullptr, - true); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "conv2d"); -} - -template -static inline void xpu_conv2d_grad(xpu::Context* ctx, - const T* input_data, - const T* filter_data, - const T* output_grad_data, - T* input_grad_data, - T* filter_grad_data, - const float* input_max_data, - const float* filter_max_data, - const std::vector& input_shape, - const std::vector& filter_shape, - int padding, - int stride, - int dilation, - int group) { - std::vector ksize{filter_shape[2], filter_shape[3]}; - std::vector stride_vec{stride, stride}; - std::vector dilation_vec{dilation, dilation}; - std::vector padding_vec{padding, padding}; - int N = input_shape[0]; - int C = input_shape[1]; - int H = input_shape[2]; - int W = input_shape[3]; - - int r = xpu::conv2d_grad(ctx, - input_data, - filter_data, - output_grad_data, - input_grad_data, - filter_grad_data, - N, - C, - H, - W, - filter_shape[0], - ksize, - stride_vec, - padding_vec, - dilation_vec, - group, - input_max_data, - filter_max_data, - nullptr, - nullptr, - nullptr, - true); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "conv2d_grad"); -} - -template -class ResNetBasicBlockXPUKernel : public framework::OpKernel { - public: - using XPUType = typename XPUTypeTrait::Type; - - void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ(platform::is_xpu_place(ctx.GetPlace()), - true, - phi::errors::PreconditionNotMet("It must use XPUPlace.")); - - // input - const phi::DenseTensor* x = ctx.Input("X"); - const phi::DenseTensor* filter1 = ctx.Input("Filter1"); - const phi::DenseTensor* scale1 = ctx.Input("Scale1"); - const phi::DenseTensor* bias1 = ctx.Input("Bias1"); - const phi::DenseTensor* filter2 = ctx.Input("Filter2"); - const phi::DenseTensor* scale2 = ctx.Input("Scale2"); - const phi::DenseTensor* bias2 = ctx.Input("Bias2"); - - // output - phi::DenseTensor* conv1_output = ctx.Output("Conv1"); - phi::DenseTensor* conv2_output = ctx.Output("Conv2"); - phi::DenseTensor* conv2_input = ctx.Output("Conv2Input"); - phi::DenseTensor* output = ctx.Output("Y"); - - auto place = ctx.GetPlace(); - auto x_data = reinterpret_cast(x->data()); - auto conv1_filter_data = - reinterpret_cast(filter1->data()); - auto conv2_filter_data = - reinterpret_cast(filter2->data()); - auto conv1_output_data = - reinterpret_cast(conv1_output->mutable_data(place)); - auto conv2_input_data = - reinterpret_cast(conv2_input->mutable_data(place)); - auto conv2_output_data = - reinterpret_cast(conv2_output->mutable_data(place)); - auto scale1_data = scale1->data(); - auto scale2_data = scale2->data(); - auto bias1_data = bias1->data(); - auto bias2_data = bias2->data(); - auto output_data = - reinterpret_cast(output->mutable_data(place)); - - float* conv1_input_max_data = nullptr; - float* conv1_filter_max_data = nullptr; - float* conv2_input_max_data = nullptr; - float* conv2_filter_max_data = nullptr; - float* conv3_input_max_data = nullptr; - float* conv3_filter_max_data = nullptr; - - ResnetBasicBlockAttr attr(ctx); - - // init find max - if (attr.find_max) { - phi::DenseTensor* max_input1 = ctx.Output("MaxInput1"); - phi::DenseTensor* max_filter1 = - ctx.Output("MaxFilter1"); - conv1_input_max_data = max_input1->mutable_data(place); - conv1_filter_max_data = max_filter1->mutable_data(place); - - phi::DenseTensor* max_input2 = ctx.Output("MaxInput2"); - phi::DenseTensor* max_filter2 = - ctx.Output("MaxFilter2"); - conv2_input_max_data = max_input2->mutable_data(place); - conv2_filter_max_data = max_filter2->mutable_data(place); - - if (attr.has_shortcut) { - phi::DenseTensor* max_input3 = - ctx.Output("MaxInput3"); - phi::DenseTensor* max_filter3 = - ctx.Output("MaxFilter3"); - conv3_input_max_data = max_input3->mutable_data(place); - conv3_filter_max_data = max_filter3->mutable_data(place); - } - } - - auto& dev_ctx = ctx.template device_context(); - xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); - int r = XPU_SUCCESS; - - // 1. short - const XPUType* z_out_data = nullptr; - if (attr.has_shortcut) { - phi::DenseTensor* conv3_out = ctx.Output("Conv3"); - const phi::DenseTensor* filter3 = ctx.Input("Filter3"); - auto conv3_filter_data = - reinterpret_cast(filter3->data()); - auto conv3_output_data = - reinterpret_cast(conv3_out->mutable_data(place)); - - XPUType* conv3_input_l3_data = nullptr; - XPUType* conv3_filter_l3_data = - RAII_GUARD.alloc_l3_or_gm(attr.conv3_filter_numel); - - if (attr.find_max) { - r = xpu::findmax_copy_fusion(dev_ctx.x_context(), - x_data, - conv3_input_max_data, - conv3_input_l3_data, - attr.conv3_input_numel); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "findmax_copy_fusion"); - - r = xpu::findmax_copy_fusion(dev_ctx.x_context(), - conv3_filter_data, - conv3_filter_max_data, - conv3_filter_l3_data, - attr.conv3_filter_numel); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "findmax_copy_fusion"); - } - - xpu_conv2d(dev_ctx.x_context(), - conv3_input_l3_data != nullptr ? conv3_input_l3_data : x_data, - conv3_filter_l3_data, - conv3_output_data, - conv3_input_max_data, - conv3_filter_max_data, - attr.conv3_input_shape, - attr.conv3_filter_shape, - attr.padding3, - attr.stride3, - attr.dilation3, - attr.group); - - // bn3 - const phi::DenseTensor* scale3 = ctx.Input("Scale3"); - const phi::DenseTensor* bias3 = ctx.Input("Bias3"); - auto bias3_data = bias3->data(); - auto scale3_data = scale3->data(); - - auto bn3_output_data = RAII_GUARD.alloc(attr.conv3_output_numel); - PADDLE_ENFORCE_XDNN_NOT_NULL(bn3_output_data); - - if (!attr.global_stats) { - phi::DenseTensor* saved_mean3 = - ctx.Output("SavedMean3"); - phi::DenseTensor* saved_invstd3 = - ctx.Output("SavedInvstd3"); - phi::DenseTensor* running_mean3 = - ctx.Output("Mean3Out"); - phi::DenseTensor* running_var3 = - ctx.Output("Var3Out"); - - auto saved_mean3_data = saved_mean3->mutable_data(place); - auto saved_invstd3_data = saved_invstd3->mutable_data(place); - auto running_mean3_data = running_mean3->mutable_data(place); - auto running_var3_data = running_var3->mutable_data(place); - - r = xpu::batch_norm_fusion(dev_ctx.x_context(), - conv3_output_data, - bn3_output_data, - attr.conv3_output_shape[0], - attr.conv3_output_shape[1], - attr.conv3_output_shape[3], - attr.conv3_output_shape[3], - attr.eps, - attr.momentum, - scale3_data, - bias3_data, - saved_mean3_data, - saved_invstd3_data, - running_mean3_data, - running_var3_data, - true, - nullptr, - xpu::Activation_t::LINEAR, - nullptr, - 0); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "batch_norm_fusion"); - } else { - const auto* mean3 = ctx.Input("Mean3"); - const auto* var3 = ctx.Input("Var3"); - const auto* mean3_data = mean3->data(); - const auto* variance3_data = var3->data(); - r = xpu::batch_norm_infer(dev_ctx.x_context(), - conv3_output_data, - bn3_output_data, - attr.conv3_output_shape[0], - attr.conv3_output_shape[1], - attr.conv3_output_shape[2], - attr.conv3_output_shape[3], - attr.eps, - scale3_data, - bias3_data, - mean3_data, - variance3_data, - true); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "batch_norm_infer"); - } - z_out_data = reinterpret_cast(bn3_output_data); - } else { - z_out_data = x_data; - } - - // 2. conv1 - XPUType* conv1_input_l3_data = nullptr; - XPUType* conv1_filter_l3_data = - RAII_GUARD.alloc_l3_or_gm(attr.conv1_filter_numel); - if (attr.find_max) { - r = xpu::findmax_copy_fusion(dev_ctx.x_context(), - x_data, - conv1_input_max_data, - conv1_input_l3_data, - attr.conv1_input_numel); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "findmax_copy_fusion"); - - r = xpu::findmax_copy_fusion(dev_ctx.x_context(), - conv1_filter_data, - conv1_filter_max_data, - conv1_filter_l3_data, - attr.conv1_filter_numel); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "findmax_copy_fusion"); - } - xpu_conv2d(dev_ctx.x_context(), - conv1_input_l3_data != nullptr ? conv1_input_l3_data : x_data, - conv1_filter_l3_data, - conv1_output_data, - conv1_input_max_data, - conv1_filter_max_data, - attr.conv1_input_shape, - attr.conv1_filter_shape, - attr.padding1, - attr.stride1, - attr.dilation1, - attr.group); - - // 3. bn1 + relu - if (!attr.global_stats) { - phi::DenseTensor* saved_mean1 = - ctx.Output("SavedMean1"); - phi::DenseTensor* saved_invstd1 = - ctx.Output("SavedInvstd1"); - phi::DenseTensor* running_mean1 = - ctx.Output("Mean1Out"); - phi::DenseTensor* running_var1 = ctx.Output("Var1Out"); - - auto saved_mean1_data = saved_mean1->mutable_data(place); - auto saved_invstd1_data = saved_invstd1->mutable_data(place); - auto running_mean1_data = running_mean1->mutable_data(place); - auto running_var1_data = running_var1->mutable_data(place); - - r = xpu::batch_norm_fusion(dev_ctx.x_context(), - conv1_output_data, - conv2_input_data, - attr.conv1_output_shape[0], - attr.conv1_output_shape[1], - attr.conv1_output_shape[2], - attr.conv1_output_shape[3], - attr.eps, - attr.momentum, - scale1_data, - bias1_data, - saved_mean1_data, - saved_invstd1_data, - running_mean1_data, - running_var1_data, - true, - nullptr, - xpu::Activation_t::RELU, - nullptr, - 0); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "batch_norm_fusion"); - } else { - // bn --> relu - auto bn1_output_data = RAII_GUARD.alloc(attr.conv1_output_numel); - PADDLE_ENFORCE_XDNN_NOT_NULL(bn1_output_data); - - const auto* mean1 = ctx.Input("Mean1"); - const auto* var1 = ctx.Input("Var1"); - const auto* mean_data = mean1->data(); - const auto* variance_data = var1->data(); - r = xpu::batch_norm_infer(dev_ctx.x_context(), - conv1_output_data, - bn1_output_data, - attr.conv1_output_shape[0], - attr.conv1_output_shape[1], - attr.conv1_output_shape[2], - attr.conv1_output_shape[3], - attr.eps, - scale1_data, - bias1_data, - mean_data, - variance_data, - true); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "batch_norm_infer"); - - r = xpu::relu(dev_ctx.x_context(), - bn1_output_data, - conv2_input_data, - attr.conv1_output_numel); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "relu"); - } - - // 4. conv2 - XPUType* conv2_input_l3_data = nullptr; - XPUType* conv2_filter_l3_data = - RAII_GUARD.alloc_l3_or_gm(attr.conv2_filter_numel); - if (attr.find_max) { - phi::DenseTensor* max_input2 = ctx.Output("MaxInput2"); - phi::DenseTensor* max_filter2 = - ctx.Output("MaxFilter2"); - conv2_input_max_data = max_input2->mutable_data(place); - conv2_filter_max_data = max_filter2->mutable_data(place); - - r = xpu::findmax_copy_fusion(dev_ctx.x_context(), - conv2_input_data, - conv2_input_max_data, - conv2_input_l3_data, - attr.conv2_input_numel); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "findmax_copy_fusion"); - - r = xpu::findmax_copy_fusion(dev_ctx.x_context(), - conv2_filter_data, - conv2_filter_max_data, - conv2_filter_l3_data, - attr.conv2_filter_numel); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "findmax_copy_fusion"); - } - xpu_conv2d( - dev_ctx.x_context(), - conv2_input_l3_data != nullptr ? conv2_input_l3_data : conv2_input_data, - conv2_filter_l3_data, - conv2_output_data, - conv2_input_max_data, - conv2_filter_max_data, - attr.conv2_input_shape, - attr.conv2_filter_shape, - attr.padding2, - attr.stride2, - attr.dilation2, - attr.group); - - // 5. bn2 - if (!attr.global_stats) { - phi::DenseTensor* saved_mean2 = - ctx.Output("SavedMean2"); - phi::DenseTensor* saved_var2 = - ctx.Output("SavedInvstd2"); - phi::DenseTensor* running_mean2 = - ctx.Output("Mean2Out"); - phi::DenseTensor* running_var2 = ctx.Output("Var2Out"); - - auto saved_mean2_data = saved_mean2->mutable_data(place); - auto saved_var2_data = saved_var2->mutable_data(place); - auto running_mean2_data = running_mean2->mutable_data(place); - auto running_var2_data = running_var2->mutable_data(place); - - r = xpu::batch_norm_fusion(dev_ctx.x_context(), - conv2_output_data, - output_data, - attr.conv2_output_shape[0], - attr.conv2_output_shape[1], - attr.conv2_output_shape[2], - attr.conv2_output_shape[3], - attr.eps, - attr.momentum, - scale2_data, - bias2_data, - saved_mean2_data, - saved_var2_data, - running_mean2_data, - running_var2_data, - true, - z_out_data, - xpu::Activation_t::RELU, - nullptr, - 0); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "batch_norm_fusion"); - } else { - auto bn2_out_data = RAII_GUARD.alloc(attr.conv2_output_numel); - PADDLE_ENFORCE_XDNN_NOT_NULL(bn2_out_data); - - const auto* mean2 = ctx.Input("Mean2"); - const auto* var2 = ctx.Input("Var2"); - const auto* mean_data = mean2->data(); - const auto* variance_data = var2->data(); - r = xpu::batch_norm_infer(dev_ctx.x_context(), - conv2_output_data, - bn2_out_data, - attr.conv2_output_shape[0], - attr.conv2_output_shape[1], - attr.conv2_output_shape[2], - attr.conv2_output_shape[3], - attr.eps, - scale2_data, - bias2_data, - mean_data, - variance_data, - true); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "batch_norm_infer"); - - r = xpu::add_activation_fusion(dev_ctx.x_context(), - bn2_out_data, - z_out_data, - output_data, - output->numel(), - nullptr, - nullptr, - nullptr, - xpu::Activation_t::RELU); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "add_activation_fusion"); - } - } -}; - -template -class ResNetBasicBlockGradXPUKernel : public framework::OpKernel { - public: - using XPUType = typename XPUTypeTrait::Type; - - void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ(platform::is_xpu_place(ctx.GetPlace()), - true, - phi::errors::PreconditionNotMet("It must use XPUPlace.")); - - const phi::DenseTensor* y_grad = - ctx.Input(framework::GradVarName("Y")); - const phi::DenseTensor* y = ctx.Input("Y"); - - const phi::DenseTensor* x = ctx.Input("X"); - const phi::DenseTensor* filter1 = ctx.Input("Filter1"); - const phi::DenseTensor* scale1 = ctx.Input("Scale1"); - const phi::DenseTensor* filter2 = ctx.Input("Filter2"); - const phi::DenseTensor* scale2 = ctx.Input("Scale2"); - const phi::DenseTensor* saved_mean1 = - ctx.Input("SavedMean1"); - const phi::DenseTensor* saved_invstd1 = - ctx.Input("SavedInvstd1"); - const phi::DenseTensor* saved_mean2 = - ctx.Input("SavedMean2"); - const phi::DenseTensor* saved_invstd2 = - ctx.Input("SavedInvstd2"); - const phi::DenseTensor* conv1_out = ctx.Input("Conv1"); - const phi::DenseTensor* conv2_out = ctx.Input("Conv2"); - const phi::DenseTensor* conv2_input = - ctx.Input("Conv2Input"); - - const phi::DenseTensor* filter3 = ctx.Input("Filter3"); - const phi::DenseTensor* conv3_out = ctx.Input("Conv3"); - const phi::DenseTensor* scale3 = ctx.Input("Scale3"); - const phi::DenseTensor* saved_mean3 = - ctx.Input("SavedMean3"); - const phi::DenseTensor* saved_invstd3 = - ctx.Input("SavedInvstd3"); - - const phi::DenseTensor* conv1_input_max = - ctx.Input("MaxInput1"); - const phi::DenseTensor* conv1_filter_max = - ctx.Input("MaxFilter1"); - const phi::DenseTensor* conv2_input_max = - ctx.Input("MaxInput2"); - const phi::DenseTensor* conv2_filter_max = - ctx.Input("MaxFilter2"); - const phi::DenseTensor* conv3_input_max = - ctx.Input("MaxInput3"); - const phi::DenseTensor* conv3_filter_max = - ctx.Input("MaxFilter3"); - - phi::DenseTensor* x_grad = - ctx.Output(framework::GradVarName("X")); - phi::DenseTensor* filter1_grad = - ctx.Output(framework::GradVarName("Filter1")); - phi::DenseTensor* scale1_grad = - ctx.Output(framework::GradVarName("Scale1")); - phi::DenseTensor* bias1_grad = - ctx.Output(framework::GradVarName("Bias1")); - phi::DenseTensor* filter2_grad = - ctx.Output(framework::GradVarName("Filter2")); - phi::DenseTensor* scale2_grad = - ctx.Output(framework::GradVarName("Scale2")); - phi::DenseTensor* bias2_grad = - ctx.Output(framework::GradVarName("Bias2")); - phi::DenseTensor* filter3_grad = - ctx.Output(framework::GradVarName("Filter3")); - phi::DenseTensor* scale3_grad = - ctx.Output(framework::GradVarName("Scale3")); - phi::DenseTensor* bias3_grad = - ctx.Output(framework::GradVarName("Bias3")); - - // attrs - ResnetBasicBlockGradAttr attr(ctx); - auto place = ctx.GetPlace(); - - const auto* y_grad_data = - reinterpret_cast(y_grad->data()); - const auto* y_data = reinterpret_cast(y->data()); - const auto* x_data = reinterpret_cast(x->data()); - const auto* conv1_output_data = - reinterpret_cast(conv1_out->data()); - const auto* conv1_filter_data = - reinterpret_cast(filter1->data()); - const auto* conv2_input_data = - reinterpret_cast(conv2_input->data()); - const auto* conv2_output_data = - reinterpret_cast(conv2_out->data()); - const auto* conv2_filter_data = - reinterpret_cast(filter2->data()); - - const auto* scale2_data = scale2->data(); - const auto* saved_mean2_data = saved_mean2->data(); - const auto* saved_invstd2_data = saved_invstd2->data(); - const auto* scale1_data = scale1->data(); - const auto* saved_mean1_data = saved_mean1->data(); - const auto* saved_invstd1_data = saved_invstd1->data(); - auto* scale2_grad_data = scale2_grad->mutable_data(place); - auto* bias2_grad_data = bias2_grad->mutable_data(place); - - const float* conv1_input_max_data = nullptr; - const float* conv1_filter_max_data = nullptr; - const float* conv2_input_max_data = nullptr; - const float* conv2_filter_max_data = nullptr; - const float* conv3_input_max_data = nullptr; - const float* conv3_filter_max_data = nullptr; - if (attr.find_max) { - conv1_input_max_data = - reinterpret_cast(conv1_input_max->data()); - conv1_filter_max_data = - reinterpret_cast(conv1_filter_max->data()); - conv2_input_max_data = - reinterpret_cast(conv2_input_max->data()); - conv2_filter_max_data = - reinterpret_cast(conv2_filter_max->data()); - if (attr.has_shortcut) { - conv3_input_max_data = - reinterpret_cast(conv3_input_max->data()); - conv3_filter_max_data = - reinterpret_cast(conv3_filter_max->data()); - } - } - - auto& dev_ctx = ctx.template device_context(); - xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); - int r = XPU_SUCCESS; - - // 0. bn2, bn2_fusion grad - auto conv2_output_grad_data = - RAII_GUARD.alloc(attr.conv2_output_numel); - PADDLE_ENFORCE_XDNN_NOT_NULL(conv2_output_grad_data); - - XPUType* z_output_grad_data = nullptr; - XPUType* z_grad_data = nullptr; - if (!attr.has_shortcut) { - z_output_grad_data = RAII_GUARD.alloc(attr.conv1_input_numel); - PADDLE_ENFORCE_XDNN_NOT_NULL(z_output_grad_data); - z_grad_data = z_output_grad_data; - } else { - z_output_grad_data = RAII_GUARD.alloc(attr.conv3_output_numel); - PADDLE_ENFORCE_XDNN_NOT_NULL(z_output_grad_data); - - z_grad_data = RAII_GUARD.alloc(attr.conv1_input_numel); - PADDLE_ENFORCE_XDNN_NOT_NULL(z_grad_data); - } - - r = xpu::batch_norm_grad_fusion(dev_ctx.x_context(), - conv2_output_data, - y_data, - y_grad_data, - conv2_output_grad_data, - attr.conv2_output_shape[0], - attr.conv2_output_shape[1], - attr.conv2_output_shape[2], - attr.conv2_output_shape[3], - scale2_data, - saved_mean2_data, - saved_invstd2_data, - scale2_grad_data, - bias2_grad_data, - true, - z_output_grad_data, - xpu::Activation_t::RELU, - nullptr, - 0); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "batch_norm_grad_fusion"); - - if (attr.has_shortcut) { - // bn3 grad - const auto* conv3_output_data = - reinterpret_cast(conv3_out->data()); - const auto* scale3_data = scale3->data(); - const auto* saved_mean3_data = saved_mean3->data(); - const auto* saved_invstd3_data = saved_invstd3->data(); - auto* scale3_grad_data = scale3_grad->mutable_data(place); - auto* bias3_grad_data = bias3_grad->mutable_data(place); - auto* conv3_output_grad_data = - RAII_GUARD.alloc(attr.conv3_output_numel); - - r = xpu::batch_norm_grad(dev_ctx.x_context(), - conv3_output_data, - z_output_grad_data, - conv3_output_grad_data, - attr.conv3_output_shape[0], - attr.conv3_output_shape[1], - attr.conv3_output_shape[2], - attr.conv3_output_shape[3], - scale3_data, - saved_mean3_data, - saved_invstd3_data, - scale3_grad_data, - bias3_grad_data, - true); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "batch_norm_grad"); - - // conv3 grad - auto* conv3_filter_grad_data = - reinterpret_cast(filter3_grad->mutable_data(place)); - auto* conv3_filter_data = - reinterpret_cast(filter3->data()); - xpu_conv2d_grad(dev_ctx.x_context(), - x_data, - conv3_filter_data, - conv3_output_grad_data, - z_grad_data, - conv3_filter_grad_data, - conv3_input_max_data, - conv3_filter_max_data, - attr.conv3_input_shape, - attr.conv3_filter_shape, - attr.padding3, - attr.stride3, - attr.dilation3, - attr.group); - } - - // 2. conv2_grad - auto* conv2_filter_grad_data = - reinterpret_cast(filter2_grad->mutable_data(place)); - auto* conv2_input_grad_data = - RAII_GUARD.alloc(attr.conv2_input_numel); - xpu_conv2d_grad(dev_ctx.x_context(), - conv2_input_data, - conv2_filter_data, - conv2_output_grad_data, - conv2_input_grad_data, - conv2_filter_grad_data, - conv2_input_max_data, - conv2_filter_max_data, - attr.conv2_input_shape, - attr.conv2_filter_shape, - attr.padding2, - attr.stride2, - attr.dilation2, - attr.group); - - // 3. b1 grad - auto* conv1_output_grad_data = - RAII_GUARD.alloc(attr.conv1_output_numel); - PADDLE_ENFORCE_XDNN_NOT_NULL(conv1_output_grad_data); - auto* scale1_grad_data = scale1_grad->mutable_data(ctx.GetPlace()); - auto* bias1_grad_data = bias1_grad->mutable_data(ctx.GetPlace()); - r = xpu::batch_norm_grad_fusion(dev_ctx.x_context(), - conv1_output_data, - conv2_input_data, - conv2_input_grad_data, - conv1_output_grad_data, - attr.conv1_output_shape[0], - attr.conv1_output_shape[1], - attr.conv1_output_shape[2], - attr.conv1_output_shape[3], - scale1_data, - saved_mean1_data, - saved_invstd1_data, - scale1_grad_data, - bias1_grad_data, - true, - nullptr, - xpu::Activation_t::RELU, - nullptr, - 0); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "batch_norm_grad_fusion"); - - // 4. conv1_grad - auto* x_grad_data = - reinterpret_cast(x_grad->mutable_data(place)); - auto* conv1_filter_grad_data = - reinterpret_cast(filter1_grad->mutable_data(place)); - xpu_conv2d_grad(dev_ctx.x_context(), - x_data, - conv1_filter_data, - conv1_output_grad_data, - x_grad_data, - conv1_filter_grad_data, - conv1_input_max_data, - conv1_filter_max_data, - attr.conv1_input_shape, - attr.conv1_filter_shape, - attr.padding1, - attr.stride1, - attr.dilation1, - attr.group); - - // add z_grad to x_grad - r = xpu::add( - dev_ctx.x_context(), x_grad_data, z_grad_data, x_grad_data, x->numel()); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "add"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; -PD_REGISTER_STRUCT_KERNEL(resnet_basic_block, - XPU, - ALL_LAYOUT, - ops::ResNetBasicBlockXPUKernel, - float) {} -PD_REGISTER_STRUCT_KERNEL(resnet_basic_block_grad, - XPU, - ALL_LAYOUT, - ops::ResNetBasicBlockGradXPUKernel, - float) {} -#endif diff --git a/paddle/fluid/operators/fused/resnet_unit_op.cc b/paddle/fluid/operators/fused/resnet_unit_op.cc deleted file mode 100644 index d4e9b3f8e4525..0000000000000 --- a/paddle/fluid/operators/fused/resnet_unit_op.cc +++ /dev/null @@ -1,465 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/common/float16.h" - -namespace paddle { -namespace operators { - -// Shape of bitmask -static framework::DDim GetBitmaskDims(std::vector out_shape) { - int c = out_shape.back(); - int64_t nhw = std::accumulate(out_shape.begin(), - out_shape.end(), - 1, - std::multiplies()) / // NOLINT - c; - int32_t c_int32_elems = ((c + 63) & ~63) / 32; - int32_t nhw_int32_elems = static_cast(((nhw + 31) & ~31)); - std::vector bitmask_shape = {nhw_int32_elems, c_int32_elems, 1}; - return common::make_ddim(bitmask_shape); -} - -class ResNetUnitOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - // Check input - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "ResNetUnitOp"); - OP_INOUT_CHECK( - ctx->HasInput("FilterX"), "Input", "FilterX", "ResNetUnitOp"); - OP_INOUT_CHECK(ctx->HasInput("ScaleX"), "Input", "ScaleX", "ResNetUnitOp"); - OP_INOUT_CHECK(ctx->HasInput("BiasX"), "Input", "BiasX", "ResNetUnitOp"); - OP_INOUT_CHECK(ctx->HasInput("MeanX"), "Input", "MeanX", "ResNetUnitOp"); - OP_INOUT_CHECK(ctx->HasInput("VarX"), "Input", "VarX", "ResNetUnitOp"); - - bool fuse_add = ctx->Attrs().Get("fuse_add"); - bool has_shortcut = ctx->Attrs().Get("has_shortcut"); - if (fuse_add || has_shortcut) { - OP_INOUT_CHECK(ctx->HasInput("Z"), "Input", "Z", "ResNetUnitOp"); - } - if (has_shortcut) { - OP_INOUT_CHECK( - ctx->HasInput("FilterZ"), "Input", "FilterZ", "ResNetUnitOp"); - OP_INOUT_CHECK( - ctx->HasInput("ScaleZ"), "Input", "ScaleZ", "ResNetUnitOp"); - OP_INOUT_CHECK(ctx->HasInput("BiasZ"), "Input", "BiasZ", "ResNetUnitOp"); - OP_INOUT_CHECK(ctx->HasInput("MeanZ"), "Input", "MeanZ", "ResNetUnitOp"); - OP_INOUT_CHECK(ctx->HasInput("VarZ"), "Input", "VarZ", "ResNetUnitOp"); - } - - // Check output - OP_INOUT_CHECK(ctx->HasOutput("Y"), "Output", "Y", "ResNetUnitOp"); - OP_INOUT_CHECK( - ctx->HasOutput("BitMask"), "Output", "BitMask", "ResNetUnitOp"); - OP_INOUT_CHECK(ctx->HasOutput("ConvX"), "Output", "ConvX", "ResNetUnitOp"); - OP_INOUT_CHECK( - ctx->HasOutput("SavedMeanX"), "Output", "SavedMeanX", "ResNetUnitOp"); - OP_INOUT_CHECK(ctx->HasOutput("SavedInvstdX"), - "Output", - "SavedInvstdX", - "ResNetUnitOp"); - OP_INOUT_CHECK(ctx->HasOutput("RunningMeanX"), - "Output", - "RunningMeanX", - "ResNetUnitOp"); - OP_INOUT_CHECK( - ctx->HasOutput("RunningVarX"), "Output", "RunningVarX", "ResNetUnitOp"); - if (has_shortcut) { - OP_INOUT_CHECK( - ctx->HasOutput("ConvZ"), "Output", "ConvZ", "ResNetUnitOp"); - OP_INOUT_CHECK( - ctx->HasOutput("SavedMeanZ"), "Output", "SavedMeanZ", "ResNetUnitOp"); - OP_INOUT_CHECK(ctx->HasOutput("SavedInvstdZ"), - "Output", - "SavedInvstdZ", - "ResNetUnitOp"); - OP_INOUT_CHECK(ctx->HasOutput("RunningMeanZ"), - "Output", - "RunningMeanZ", - "ResNetUnitOp"); - OP_INOUT_CHECK(ctx->HasOutput("RunningVarZ"), - "Output", - "RunningVarZ", - "ResNetUnitOp"); - } - - // make sure Mean/RunningMean and Var/RunningVar share memory - PADDLE_ENFORCE_EQ( - ctx->Inputs("MeanX")[0], - ctx->Outputs("RunningMeanX")[0], - phi::errors::InvalidArgument( - "MeanX and RunningMeanX should share the same memory")); - PADDLE_ENFORCE_EQ(ctx->Inputs("VarX")[0], - ctx->Outputs("RunningVarX")[0], - phi::errors::InvalidArgument( - "VarX and RunningVarX should share the same memory")); - if (has_shortcut) { - PADDLE_ENFORCE_EQ( - ctx->Inputs("MeanZ")[0], - ctx->Outputs("RunningMeanZ")[0], - phi::errors::InvalidArgument( - "MeanZ and RunningMeanZ should share the same memory")); - PADDLE_ENFORCE_EQ( - ctx->Inputs("VarZ")[0], - ctx->Outputs("RunningVarZ")[0], - phi::errors::InvalidArgument( - "VarZ and RunningVarZ should share the same memory")); - } - - // Check dims of inputs - const auto x_dims = ctx->GetInputDim("X"); - const auto w_dims = ctx->GetInputDim("FilterX"); - std::vector bn_param_shape = - common::vectorize(ctx->GetInputDim("ScaleX")); - if (1 == bn_param_shape.size()) { - bn_param_shape = {1, 1, 1, bn_param_shape[0]}; - } - framework::DDim bn_param_dims = common::make_ddim(bn_param_shape); - PADDLE_ENFORCE_EQ( - x_dims.size(), - 4, - phi::errors::InvalidArgument("The dimensions of input " - "must equal to 4." - "But received: the shape of input " - "= [%s], the dimension of input = " - "[%d]", - x_dims, - x_dims.size())); - PADDLE_ENFORCE_EQ( - w_dims.size(), - 4, - phi::errors::InvalidArgument("The dimensions of filter " - "must equal to 4." - "But received: the shape of filter " - "= [%s], the dimension of filter = [%d] ", - w_dims, - w_dims.size())); - PADDLE_ENFORCE_EQ(bn_param_dims.size(), - 4, - phi::errors::InvalidArgument( - "The dimensions of bn param " - "must equal to 4." - "But received: the shape of bn param " - "= [%s], the dimension of bn param = [%d] ", - bn_param_dims, - bn_param_dims.size())); - auto data_format = ctx->Attrs().Get("data_format"); - bool is_nchw = (data_format == "NCHW"); - // Calculate the dims of outputs - int batch = x_dims[0]; - int output_channel = w_dims[0]; - int filter_size = w_dims[2]; - int stride = ctx->Attrs().Get("stride"); - int padding = ctx->Attrs().Get("padding"); - std::vector out_shape; - out_shape.push_back(batch); - if (is_nchw) { - int out_h = (x_dims[2] + padding * 2 - filter_size) / stride + 1; - int out_w = (x_dims[3] + padding * 2 - filter_size) / stride + 1; - out_shape.push_back(output_channel); - out_shape.push_back(out_h); - out_shape.push_back(out_w); - } else { - int out_h = (x_dims[1] + padding * 2 - filter_size) / stride + 1; - int out_w = (x_dims[2] + padding * 2 - filter_size) / stride + 1; - out_shape.push_back(out_h); - out_shape.push_back(out_w); - out_shape.push_back(output_channel); - } - - auto y_dims = common::make_ddim(out_shape); - auto bitmask_dims = GetBitmaskDims(out_shape); - // Set dims of outputs - ctx->SetOutputDim("Y", y_dims); - ctx->SetOutputDim("BitMask", bitmask_dims); - ctx->SetOutputDim("ConvX", y_dims); - ctx->SetOutputDim("SavedMeanX", bn_param_dims); - ctx->SetOutputDim("SavedInvstdX", bn_param_dims); - ctx->SetOutputDim("RunningMeanX", bn_param_dims); - ctx->SetOutputDim("RunningVarX", bn_param_dims); - if (has_shortcut) { - ctx->SetOutputDim("ConvZ", y_dims); - ctx->SetOutputDim("SavedMeanZ", bn_param_dims); - ctx->SetOutputDim("SavedInvstdZ", bn_param_dims); - ctx->SetOutputDim("RunningMeanZ", bn_param_dims); - ctx->SetOutputDim("RunningVarZ", bn_param_dims); - } - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); - // By default, the type of the scale, bias, mean, - // and var tensors should be float when input tensor's dtype is float16. - auto bn_param_type = framework::proto::VarType::FP32; - - PADDLE_ENFORCE_EQ( - bn_param_type, - framework::TransToProtoVarType( - ctx.Input("ScaleX")->dtype()), - phi::errors::InvalidArgument("Scale input should be of float type")); - PADDLE_ENFORCE_EQ( - bn_param_type, - framework::TransToProtoVarType( - ctx.Input("BiasX")->dtype()), - phi::errors::InvalidArgument("Bias input should be of float type")); - return phi::KernelKey(input_data_type, ctx.GetPlace()); - } -}; - -class ResNetUnitOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "The input 1 tensor"); - AddInput("FilterX", "Filter tensor of input 1"); - AddInput("ScaleX", "Scale tensor of input 1 used in batchnorm"); - AddInput("BiasX", "Bias tensor of input 1 used in batchnorm"); - AddInput("MeanX", "Mean tensor of input 1 used in batchnorm"); - AddInput("VarX", "Variance tensor of input 1 used in batchnorm"); - AddInput("Z", "The input 2 tensor").AsDispensable(); - AddInput("FilterZ", "Filter tensor of input 2").AsDispensable(); - AddInput("ScaleZ", "Scale tensor of input 2").AsDispensable(); - AddInput("BiasZ", "Bias tensor of input 2").AsDispensable(); - AddInput("MeanZ", "Mean tensor of input 2").AsDispensable(); - AddInput("VarZ", "Variance tensor of input 2").AsDispensable(); - AddOutput("Y", "The result of the resnet unit"); - AddOutput("BitMask", "The bitmask generated after relu"); - AddOutput("ConvX", "The output of input 1 after conv"); - AddOutput("SavedMeanX", "Mean of input 1 in the current batch"); - AddOutput("SavedInvstdX", "Invstd of input 1 in the current batch"); - AddOutput("RunningMeanX", "Shared memory with MeanX"); - AddOutput("RunningVarX", "Shared memory with VarX"); - AddOutput("ConvZ", "The output of input 2 after conv").AsDispensable(); - AddOutput("SavedMeanZ", "Mean of input 1 in the current batch") - .AsDispensable(); - AddOutput("SavedInvstdZ", "Invstd of input 1 in the current batch") - .AsDispensable(); - AddOutput("RunningMeanZ", "Shared memory with MeanZ").AsDispensable(); - AddOutput("RunningVarZ", "Shared memory with VarZ").AsDispensable(); - AddAttr("stride", "").SetDefault(1); - AddAttr("stride_z", "").SetDefault(1); - AddAttr("padding", "").SetDefault(0); - AddAttr("dilation", "").SetDefault(1); - AddAttr("group", "").SetDefault(1); - AddAttr("momentum", "").SetDefault(0.9); - AddAttr("epsilon", "").SetDefault(1e-5); - AddAttr("data_format", "").SetDefault("NHWC"); - AddAttr("fuse_add", "").SetDefault(false); - AddAttr("has_shortcut", "").SetDefault(false); - AddAttr("use_global_stats", "").SetDefault(false); - AddAttr("is_test", - "(bool, default false) Set to true for inference only, false " - "for training. Some layers may run faster when this is true.") - .SetDefault(false); - AddAttr("use_addto", "").SetDefault(false); - AddAttr("act_type", "The activation type to be fused.") - .SetDefault("relu"); - AddComment(R"DOC( -Fusion op of the basic unit of resnet block. - -The implementation is based on the latest fusion op interface in cuDNN v8.0. -For more details: -https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnFusedOps_t - -)DOC"); - } -}; - -class ResNetUnitGradOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - // check input - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "ResNetUnitGradOp"); - OP_INOUT_CHECK( - ctx->HasInput("FilterX"), "Input", "FilterX", "ResNetUnitGradOp"); - OP_INOUT_CHECK( - ctx->HasInput("ConvX"), "Input", "ConvX", "ResNetUnitGradOp"); - OP_INOUT_CHECK( - ctx->HasInput("ScaleX"), "Input", "ScaleX", "ResNetUnitGradOp"); - OP_INOUT_CHECK( - ctx->HasInput("BiasX"), "Input", "BiasX", "ResNetUnitGradOp"); - OP_INOUT_CHECK( - ctx->HasInput("SavedMeanX"), "Input", "SavedMeanX", "ResNetUnitGradOp"); - OP_INOUT_CHECK(ctx->HasInput("SavedInvstdX"), - "Input", - "SavedInvstdX", - "ResNetUnitGradOp"); - - bool fuse_add = ctx->Attrs().Get("fuse_add"); - bool has_shortcut = ctx->Attrs().Get("has_shortcut"); - if (fuse_add || has_shortcut) { - OP_INOUT_CHECK(ctx->HasInput("Z"), "Input", "Z", "ResNetUnitGradOp"); - } - if (has_shortcut) { - OP_INOUT_CHECK( - ctx->HasInput("FilterZ"), "Input", "FilterZ", "ResNetUnitGradOp"); - OP_INOUT_CHECK( - ctx->HasInput("ConvZ"), "Input", "ConvZ", "ResNetUnitGradOp"); - OP_INOUT_CHECK( - ctx->HasInput("ScaleZ"), "Input", "ScaleZ", "ResNetUnitGradOp"); - OP_INOUT_CHECK( - ctx->HasInput("BiasZ"), "Input", "BiasZ", "ResNetUnitGradOp"); - OP_INOUT_CHECK(ctx->HasInput("SavedMeanZ"), - "Input", - "SavedMeanZ", - "ResNetUnitGradOp"); - OP_INOUT_CHECK(ctx->HasInput("SavedInvstdZ"), - "Input", - "SavedInvstdZ", - "ResNetUnitGradOp"); - } - OP_INOUT_CHECK(ctx->HasInput("Y"), "Input", "Y", "ResNetUnitGradOp"); - OP_INOUT_CHECK( - ctx->HasInput("BitMask"), "Input", "BitMask", "ResNetUnitGradOp"); - OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Y")), - "Input", - framework::GradVarName("Y"), - "ResNetUnitGradOp"); - - // check output - OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("X")), - "Output", - framework::GradVarName("X"), - "ResNetUnitGradOp"); - OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("FilterX")), - "Output", - framework::GradVarName("FilterX"), - "ResNetUnitGradOp"); - OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("ScaleX")), - "Output", - framework::GradVarName("ScaleX"), - "ResNetUnitGradOp"); - OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("BiasX")), - "Output", - framework::GradVarName("BiasX"), - "ResNetUnitGradOp"); - if (fuse_add) { - OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("Z")), - "Output", - framework::GradVarName("Z"), - "ResNetUnitGradOp"); - } - if (has_shortcut) { - OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("FilterZ")), - "Output", - framework::GradVarName("FilterZ"), - "ResNetUnitGradOp"); - OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("ScaleZ")), - "Output", - framework::GradVarName("ScaleZ"), - "ResNetUnitGradOp"); - OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("BiasZ")), - "Output", - framework::GradVarName("BiasZ"), - "ResNetUnitGradOp"); - } - const auto x_dims = ctx->GetInputDim("X"); - const auto filter_x_dims = ctx->GetInputDim("FilterX"); - const auto param_dims = ctx->GetInputDim("ScaleX"); - ctx->SetOutputDim(framework::GradVarName("X"), x_dims); - ctx->SetOutputDim(framework::GradVarName("FilterX"), filter_x_dims); - ctx->SetOutputDim(framework::GradVarName("ScaleX"), param_dims); - ctx->SetOutputDim(framework::GradVarName("BiasX"), param_dims); - if (fuse_add || has_shortcut) { - const auto z_dims = ctx->GetInputDim("Z"); - ctx->SetOutputDim(framework::GradVarName("Z"), z_dims); - } - if (has_shortcut) { - const auto filter_z_dims = ctx->GetInputDim("FilterZ"); - ctx->SetOutputDim(framework::GradVarName("FilterZ"), filter_z_dims); - ctx->SetOutputDim(framework::GradVarName("ScaleZ"), param_dims); - ctx->SetOutputDim(framework::GradVarName("BiasZ"), param_dims); - } - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_NOT_NULL( - ctx.InputVar(framework::GradVarName("Y")), - phi::errors::NotFound("Can not find Y@GRAD in the execution context.")); - - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), - ctx.GetPlace()); - } -}; - -template -class ResNetUnitGradOpMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("resnet_unit_grad"); - op->SetInput("X", this->Input("X")); - op->SetInput("FilterX", this->Input("FilterX")); - op->SetInput("ConvX", this->Output("ConvX")); - op->SetInput("ScaleX", this->Input("ScaleX")); - op->SetInput("BiasX", this->Input("BiasX")); - op->SetInput("SavedMeanX", this->Output("SavedMeanX")); - op->SetInput("SavedInvstdX", this->Output("SavedInvstdX")); - op->SetInput("Z", this->Input("Z")); - op->SetInput("FilterZ", this->Input("FilterZ")); - op->SetInput("ConvZ", this->Output("ConvZ")); - op->SetInput("ScaleZ", this->Input("ScaleZ")); - op->SetInput("BiasZ", this->Input("BiasZ")); - op->SetInput("SavedMeanZ", this->Output("SavedMeanZ")); - op->SetInput("SavedInvstdZ", this->Output("SavedInvstdZ")); - op->SetInput("Y", this->Output("Y")); - op->SetInput("BitMask", this->Output("BitMask")); - op->SetInput(framework::GradVarName("Y"), this->OutputGrad("Y")); - - op->SetAttrMap(this->Attrs()); - - op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); - op->SetOutput(framework::GradVarName("FilterX"), - this->InputGrad("FilterX")); - op->SetOutput(framework::GradVarName("ScaleX"), this->InputGrad("ScaleX")); - op->SetOutput(framework::GradVarName("BiasX"), this->InputGrad("BiasX")); - op->SetOutput(framework::GradVarName("Z"), this->InputGrad("Z")); - op->SetOutput(framework::GradVarName("FilterZ"), - this->InputGrad("FilterZ")); - op->SetOutput(framework::GradVarName("ScaleZ"), this->InputGrad("ScaleZ")); - op->SetOutput(framework::GradVarName("BiasZ"), this->InputGrad("BiasZ")); - } -}; - -class ResNetUnitOpInferVarType - : public framework::PassInDtypeAndVarTypeToOutput { - protected: - std::unordered_map& GetInputOutputWithSameType() - const override { - static std::unordered_map m{{"X", /*->*/ "Y"}}; - return m; - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR(resnet_unit, - ops::ResNetUnitOp, - ops::ResNetUnitOpMaker, - ops::ResNetUnitOpInferVarType, - ops::ResNetUnitGradOpMaker, - ops::ResNetUnitGradOpMaker); -REGISTER_OPERATOR(resnet_unit_grad, ops::ResNetUnitGradOp); diff --git a/paddle/fluid/operators/fused/resnet_unit_op.cu b/paddle/fluid/operators/fused/resnet_unit_op.cu deleted file mode 100644 index 6afe03a67ceab..0000000000000 --- a/paddle/fluid/operators/fused/resnet_unit_op.cu +++ /dev/null @@ -1,429 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h" -#include "paddle/fluid/operators/fused/cudnn_norm_conv.cu.h" -#include "paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h" -#include "paddle/phi/common/float16.h" - -namespace paddle { -namespace operators { - -template -class ResNetUnitKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - PADDLE_ENFORCE_EQ( - platform::is_gpu_place(ctx.GetPlace()), - true, - phi::errors::PreconditionNotMet("It must use CUDAPlace.")); - PADDLE_ENFORCE_EQ(platform::CudnnDataType::type, - CUDNN_DATA_HALF, - phi::errors::Unavailable( - "ResNetUnitOp only supports float16 for now.")); - - // input x - const phi::DenseTensor *input_x = ctx.Input("X"); - const phi::DenseTensor *filter_x = ctx.Input("FilterX"); - const phi::DenseTensor *scale_x = ctx.Input("ScaleX"); - const phi::DenseTensor *bias_x = ctx.Input("BiasX"); - // norm conv - phi::DenseTensor *conv_out_x = ctx.Output("ConvX"); - // bn finalize - phi::DenseTensor *saved_mean_x = ctx.Output("SavedMeanX"); - phi::DenseTensor *saved_invstd_x = - ctx.Output("SavedInvstdX"); - phi::DenseTensor *running_mean_x = - ctx.Output("RunningMeanX"); - phi::DenseTensor *running_var_x = - ctx.Output("RunningVarX"); - // sbar - phi::DenseTensor *output = ctx.Output("Y"); - phi::DenseTensor *bitmask = ctx.Output("BitMask"); - // attrs - int padding = ctx.Attr("padding"); - int stride = ctx.Attr("stride"); - int stride_z = ctx.Attr("stride_z"); - int dilation = ctx.Attr("dilation"); - int group = ctx.Attr("group"); - double eps = static_cast(ctx.Attr("epsilon")); - double momentum = static_cast(ctx.Attr("momentum")); - bool has_shortcut = ctx.Attr("has_shortcut"); - bool fuse_add = ctx.Attr("fuse_add"); - bool use_global_stats = ctx.Attr("use_global_stats"); - bool is_test = ctx.Attr("is_test"); - bool is_train = !is_test && !use_global_stats; - std::string act_type = ctx.Attr("act_type"); - - auto input_x_shape = common::vectorize(input_x->dims()); - auto filter_x_shape = common::vectorize(filter_x->dims()); - // std::swap used to convert shape of filter from conv2d when kernel size is - // 1. - if (filter_x_shape[1] != filter_x_shape[2] && 1 == filter_x_shape[2]) { - std::swap(filter_x_shape[1], filter_x_shape[3]); - } - auto param_dims = scale_x->dims(); - auto param_shape = common::vectorize(scale_x->dims()); - if (1 == param_shape.size()) { - param_shape = {1, 1, 1, param_shape[0]}; - } - auto output_shape = common::vectorize(output->dims()); - auto bitmask_shape = common::vectorize(bitmask->dims()); - int output_channel = filter_x_shape[0]; - int64_t ele_count = std::accumulate(output_shape.begin(), - output_shape.end(), - 1, - std::multiplies()) / - output_channel; - - auto place = ctx.GetPlace(); - auto &dev_ctx = ctx.template device_context(); - - // 1. Conv - phi::DenseTensor sum_x; - phi::DenseTensor sum_of_squares_x; - sum_x.Resize(param_dims); - sum_of_squares_x.Resize(param_dims); - CudnnNormConvolution conv_x_op(dev_ctx, - input_x_shape, - filter_x_shape, - output_shape, - padding, - stride, - dilation, - group); - conv_x_op.Forward( - dev_ctx, *input_x, *filter_x, conv_out_x, &sum_x, &sum_of_squares_x); - - // 2. BN - phi::DenseTensor equiv_scale_x; - phi::DenseTensor equiv_bias_x; - equiv_scale_x.Resize(param_dims); - equiv_bias_x.Resize(param_dims); - CudnnBNStatsFinalize bn_x_op(dev_ctx, param_shape); - bn_x_op.Forward(dev_ctx, - sum_x, - sum_of_squares_x, - *scale_x, - *bias_x, - saved_mean_x, - saved_invstd_x, - running_mean_x, - running_var_x, - &equiv_scale_x, - &equiv_bias_x, - eps, - momentum, - ele_count, - is_train); - - // 3. scale + bias + add + relu - CudnnScaleBiasAddRelu sbar_op(dev_ctx, - act_type, - fuse_add, - has_shortcut, - output_shape, - param_shape, - bitmask_shape); - if (has_shortcut) { - // input z - const phi::DenseTensor *input_z = ctx.Input("Z"); - const phi::DenseTensor *filter_z = ctx.Input("FilterZ"); - const phi::DenseTensor *scale_z = ctx.Input("ScaleZ"); - const phi::DenseTensor *bias_z = ctx.Input("BiasZ"); - // norm conv - phi::DenseTensor *conv_out_z = ctx.Output("ConvZ"); - // bn finalize - phi::DenseTensor *saved_mean_z = - ctx.Output("SavedMeanZ"); - phi::DenseTensor *saved_invstd_z = - ctx.Output("SavedInvstdZ"); - phi::DenseTensor *running_mean_z = - ctx.Output("RunningMeanZ"); - phi::DenseTensor *running_var_z = - ctx.Output("RunningVarZ"); - - auto input_z_shape = common::vectorize(input_z->dims()); - auto filter_z_shape = common::vectorize(filter_z->dims()); - - // 3.1 Conv for second input - phi::DenseTensor sum_z; - phi::DenseTensor sum_of_squares_z; - sum_z.Resize(param_dims); - sum_of_squares_z.Resize(param_dims); - CudnnNormConvolution conv_z_op(dev_ctx, - input_z_shape, - filter_z_shape, - output_shape, - padding, - stride_z, - dilation, - group); - conv_z_op.Forward( - dev_ctx, *input_z, *filter_z, conv_out_z, &sum_z, &sum_of_squares_z); - - // 3.2 BN for second input - phi::DenseTensor equiv_scale_z; - phi::DenseTensor equiv_bias_z; - equiv_scale_z.Resize(param_dims); - equiv_bias_z.Resize(param_dims); - CudnnBNStatsFinalize bn_z_op(dev_ctx, param_shape); - bn_z_op.Forward(dev_ctx, - sum_z, - sum_of_squares_z, - *scale_z, - *bias_z, - saved_mean_z, - saved_invstd_z, - running_mean_z, - running_var_z, - &equiv_scale_z, - &equiv_bias_z, - eps, - momentum, - ele_count, - is_train); - // 3.3 sbar - sbar_op.Forward(dev_ctx, - *conv_out_x, - equiv_scale_x, - equiv_bias_x, - conv_out_z, - &equiv_scale_z, - &equiv_bias_z, - output, - bitmask); - } else { - const phi::DenseTensor *input_z = - fuse_add ? ctx.Input("Z") : nullptr; - sbar_op.Forward(dev_ctx, - *conv_out_x, - equiv_scale_x, - equiv_bias_x, - input_z, - nullptr, - nullptr, - output, - bitmask); - } - } -}; - -template -class ResNetUnitGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - PADDLE_ENFORCE_EQ( - platform::is_gpu_place(ctx.GetPlace()), - true, - phi::errors::PreconditionNotMet("It must use CUDAPlace.")); - PADDLE_ENFORCE_EQ(platform::CudnnDataType::type, - CUDNN_DATA_HALF, - phi::errors::Unavailable( - "ResNetUnitOp only supports float16 for now.")); - - const phi::DenseTensor *y_grad = - ctx.Input(framework::GradVarName("Y")); - - const phi::DenseTensor *x = ctx.Input("X"); - const phi::DenseTensor *filter_x = ctx.Input("FilterX"); - const phi::DenseTensor *scale_x = ctx.Input("ScaleX"); - const phi::DenseTensor *bias_x = ctx.Input("BiasX"); - const phi::DenseTensor *saved_mean_x = - ctx.Input("SavedMeanX"); - const phi::DenseTensor *saved_invstd_x = - ctx.Input("SavedInvstdX"); - - const phi::DenseTensor *conv_out_x = ctx.Input("ConvX"); - const phi::DenseTensor *output = ctx.Input("Y"); - const phi::DenseTensor *bitmask = ctx.Input("BitMask"); - - phi::DenseTensor *x_grad = - ctx.Output(framework::GradVarName("X")); - phi::DenseTensor *filter_x_grad = - ctx.Output(framework::GradVarName("FilterX")); - phi::DenseTensor *scale_x_grad = - ctx.Output(framework::GradVarName("ScaleX")); - phi::DenseTensor *bias_x_grad = - ctx.Output(framework::GradVarName("BiasX")); - - int padding = ctx.Attr("padding"); - int stride = ctx.Attr("stride"); - int stride_z = ctx.Attr("stride_z"); - int dilation = ctx.Attr("dilation"); - int group = ctx.Attr("group"); - double eps = static_cast(ctx.Attr("epsilon")); - double momentum = static_cast(ctx.Attr("momentum")); - bool has_shortcut = ctx.Attr("has_shortcut"); - bool fuse_add = ctx.Attr("fuse_add"); - bool use_global_stats = ctx.Attr("use_global_stats"); - std::string act_type = ctx.Attr("act_type"); - - auto x_shape = common::vectorize(x->dims()); - auto filter_x_shape = common::vectorize(filter_x->dims()); - auto param_shape = common::vectorize(scale_x->dims()); - auto output_shape = common::vectorize(output->dims()); - auto bitmask_shape = common::vectorize(bitmask->dims()); - - auto place = ctx.GetPlace(); - auto &dev_ctx = ctx.template device_context(); - - // 1. Backward of BN (+ Add + Relu) for x, get conv_out_x_grad, - // scale_x_grad, bias_x_grad - phi::DenseTensor conv_out_x_grad; - conv_out_x_grad.Resize(conv_out_x->dims()); - CudnnScaleBiasAddRelu sbar_x_op(dev_ctx, - act_type, - fuse_add, - has_shortcut, - output_shape, - param_shape, - bitmask_shape); - if (has_shortcut) { - // X Z - // | | - // NormConv NormConv - // | | - // BNStatsFinalize BNStatsFinalize - // \ / - // ScaleBiasAddRelu - // | - // Y - const phi::DenseTensor *z = ctx.Input("Z"); - const phi::DenseTensor *filter_z = ctx.Input("FilterZ"); - const phi::DenseTensor *scale_z = ctx.Input("ScaleZ"); - const phi::DenseTensor *bias_z = ctx.Input("BiasZ"); - const phi::DenseTensor *saved_mean_z = - ctx.Input("SavedMeanZ"); - const phi::DenseTensor *saved_invstd_z = - ctx.Input("SavedInvstdZ"); - const phi::DenseTensor *conv_out_z = ctx.Input("ConvZ"); - - phi::DenseTensor *z_grad = - ctx.Output(framework::GradVarName("Z")); - phi::DenseTensor *filter_z_grad = - ctx.Output(framework::GradVarName("FilterZ")); - phi::DenseTensor *scale_z_grad = - ctx.Output(framework::GradVarName("ScaleZ")); - phi::DenseTensor *bias_z_grad = - ctx.Output(framework::GradVarName("BiasZ")); - - // 1.1 Backward of BN + Add (+ Relu) for x, get conv_out_x_grad, - // scale_x_grad, bias_x_grad and z_grad_temp - phi::DenseTensor z_grad_temp; - z_grad_temp.Resize(conv_out_z->dims()); - sbar_x_op.Backward(dev_ctx, - *y_grad, - *conv_out_x, - *scale_x, - *bias_x, - *saved_mean_x, - *saved_invstd_x, - bitmask, - &conv_out_x_grad, - &z_grad_temp, - scale_x_grad, - bias_x_grad, - eps); - - // 1.2 bn backward for z, get conv_out_z_grad, dscale_z, dbias_z - phi::DenseTensor conv_out_z_grad; - conv_out_z_grad.Resize(conv_out_z->dims()); - CudnnScaleBiasAddRelu sbar_z_op( - dev_ctx, "", false, false, output_shape, param_shape, bitmask_shape); - sbar_z_op.Backward(dev_ctx, - z_grad_temp, - *conv_out_z, - *scale_z, - *bias_z, - *saved_mean_z, - *saved_invstd_z, - nullptr, - &conv_out_z_grad, - nullptr, - scale_z_grad, - bias_z_grad, - eps); - - // 1.3 Backward of Conv for z, get z_grad and filter_z_grad - auto z_shape = common::vectorize(z->dims()); - auto filter_z_shape = common::vectorize(filter_z->dims()); - CudnnNormConvolutionGrad conv_z_op(dev_ctx, - z_shape, - filter_z_shape, - output_shape, - padding, - stride_z, - dilation, - group); - conv_z_op.Backward( - dev_ctx, *z, *filter_z, conv_out_z_grad, z_grad, filter_z_grad); - } else { - // 1.1 Backward of BN (+ Add + Relu) for x, get conv_out_x_grad, - // scale_x_grad, bias_x_grad (and z_grad) - phi::DenseTensor *z_grad = - fuse_add ? ctx.Output(framework::GradVarName("Z")) - : nullptr; - sbar_x_op.Backward(dev_ctx, - *y_grad, - *conv_out_x, - *scale_x, - *bias_x, - *saved_mean_x, - *saved_invstd_x, - bitmask, - &conv_out_x_grad, - z_grad, - scale_x_grad, - bias_x_grad, - eps); - } - - // 2. Backward of Conv for x, get x_grad and filter_x_grad - bool use_addto = ctx.Attr("use_addto"); - CudnnNormConvolutionGrad conv_x_op(dev_ctx, - x_shape, - filter_x_shape, - output_shape, - padding, - stride, - dilation, - group); - conv_x_op.Backward(dev_ctx, - *x, - *filter_x, - conv_out_x_grad, - x_grad, - filter_x_grad, - use_addto); - } -}; - -} // namespace operators -} // namespace paddle - -#if CUDNN_VERSION >= 8000 -namespace ops = paddle::operators; -namespace plat = paddle::platform; -PD_REGISTER_STRUCT_KERNEL( - resnet_unit, GPU, ALL_LAYOUT, ops::ResNetUnitKernel, phi::dtype::float16) {} -PD_REGISTER_STRUCT_KERNEL(resnet_unit_grad, - GPU, - ALL_LAYOUT, - ops::ResNetUnitGradKernel, - phi::dtype::float16) {} -#endif diff --git a/paddle/fluid/operators/fused/resnet_unit_op_xpu.cc b/paddle/fluid/operators/fused/resnet_unit_op_xpu.cc deleted file mode 100644 index f50d452d6c285..0000000000000 --- a/paddle/fluid/operators/fused/resnet_unit_op_xpu.cc +++ /dev/null @@ -1,373 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/device/device_wrapper.h" -#include "paddle/phi/common/float16.h" - -namespace paddle { -namespace operators { - -template -class ResNetUnitXPUKernel : public framework::OpKernel { - using XPUType = typename XPUTypeTrait::Type; - - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto place = ctx.GetPlace(); - PADDLE_ENFORCE_EQ(platform::is_xpu_place(place), - true, - phi::errors::PreconditionNotMet("It must use XPUPlace.")); - - bool is_nchw = (ctx.Attr("data_format") == "NCHW"); - // input x - const phi::DenseTensor *input_x = ctx.Input("X"); - const phi::DenseTensor *filter_x = ctx.Input("FilterX"); - const phi::DenseTensor *scale_x = ctx.Input("ScaleX"); - const phi::DenseTensor *bias_x = ctx.Input("BiasX"); - - // output x - phi::DenseTensor *conv_out_x = ctx.Output("ConvX"); - phi::DenseTensor *saved_mean_x = ctx.Output("SavedMeanX"); - phi::DenseTensor *saved_invstd_x = - ctx.Output("SavedInvstdX"); - phi::DenseTensor *running_mean_x = - ctx.Output("RunningMeanX"); - phi::DenseTensor *running_var_x = - ctx.Output("RunningVarX"); - - phi::DenseTensor *output = ctx.Output("Y"); - - // attrs - int padding = ctx.Attr("padding"); - int stride = ctx.Attr("stride"); - int stride_z = ctx.Attr("stride_z"); - int dilation = ctx.Attr("dilation"); - int group = ctx.Attr("group"); - float eps = ctx.Attr("epsilon"); - float momentum = ctx.Attr("momentum"); - bool has_shortcut = ctx.Attr("has_shortcut"); - bool fuse_add = ctx.Attr("fuse_add"); - bool use_global_stats = ctx.Attr("use_global_stats"); - bool is_test = ctx.Attr("is_test"); - bool is_train = !is_test && !use_global_stats; - std::string act_type = ctx.Attr("act_type"); - auto &dev_ctx = ctx.template device_context(); - - std::vector x_list = { - reinterpret_cast(input_x->data())}; - std::vector w_list = { - reinterpret_cast(filter_x->data())}; - std::vector conv_y_list = { - reinterpret_cast(conv_out_x->mutable_data(place))}; - - std::vector> x_shape_list = { - common::vectorize(input_x->dims())}; - - auto filter_x_shape = common::vectorize(filter_x->dims()); - std::vector ksize = {filter_x_shape[2], filter_x_shape[3]}; - if (!is_nchw) { - ksize[0] = filter_x_shape[1]; - ksize[1] = filter_x_shape[2]; - } - std::vector strides = {stride, stride}; - std::vector> ksize_list = {ksize}; - std::vector> stride_list = {strides}; - std::vector paddings = {padding, padding}; - std::vector dilations = {dilation, dilation}; - std::vector scale_list = {scale_x->data()}; - std::vector bias_list = {bias_x->data()}; - std::vector batch_mean_list = { - saved_mean_x->mutable_data(place)}; - std::vector batch_invstd_list = { - saved_invstd_x->mutable_data(place)}; - std::vector global_mean_list = { - running_mean_x->mutable_data(place)}; - std::vector global_var_list = { - running_var_x->mutable_data(place)}; - - std::vector x_maxlist = {nullptr}; - std::vector w_maxlist = {nullptr}; - if (has_shortcut) { - // input z - const phi::DenseTensor *input_z = ctx.Input("Z"); - const phi::DenseTensor *filter_z = ctx.Input("FilterZ"); - const phi::DenseTensor *scale_z = ctx.Input("ScaleZ"); - const phi::DenseTensor *bias_z = ctx.Input("BiasZ"); - - phi::DenseTensor *conv_out_z = ctx.Output("ConvZ"); - phi::DenseTensor *saved_mean_z = - ctx.Output("SavedMeanZ"); - phi::DenseTensor *saved_invstd_z = - ctx.Output("SavedInvstdZ"); - phi::DenseTensor *running_mean_z = - ctx.Output("RunningMeanZ"); - phi::DenseTensor *running_var_z = - ctx.Output("RunningVarZ"); - - x_list.push_back(reinterpret_cast(input_z->data())); - w_list.push_back(reinterpret_cast(filter_z->data())); - conv_y_list.push_back( - reinterpret_cast(conv_out_z->mutable_data(place))); - - x_shape_list.push_back(common::vectorize(input_z->dims())); - - auto filter_z_shape = common::vectorize(filter_z->dims()); - std::vector ksize_z = {filter_z_shape[2], filter_z_shape[3]}; - if (!is_nchw) { - ksize_z[0] = filter_z_shape[1]; - ksize_z[1] = filter_z_shape[2]; - } - ksize_list.push_back(ksize_z); - stride_list.push_back({stride_z, stride_z}); - scale_list.push_back(scale_z->data()); - bias_list.push_back(bias_z->data()); - batch_mean_list.push_back(saved_mean_z->mutable_data(place)); - batch_invstd_list.push_back(saved_invstd_z->mutable_data(place)); - global_mean_list.push_back(running_mean_z->mutable_data(place)); - global_var_list.push_back(running_var_z->mutable_data(place)); - x_maxlist.push_back(nullptr); - w_maxlist.push_back(nullptr); - } else { - if (fuse_add) { - const phi::DenseTensor *input_z = ctx.Input("Z"); - auto input_z_shape = common::vectorize(input_z->dims()); - x_list.push_back(reinterpret_cast(input_z->data())); - x_shape_list.push_back(input_z_shape); - x_maxlist.push_back(nullptr); - } - } - int r = xpu::resnet_unit_fusion( - dev_ctx.x_context(), - x_list, - w_list, - conv_y_list, - reinterpret_cast(output->mutable_data(place)), - x_shape_list, - filter_x_shape[0], - ksize_list, - stride_list, - paddings, - dilations, - group, - eps, - momentum, - x_maxlist, - w_maxlist, - scale_list, - bias_list, - batch_mean_list, - batch_invstd_list, - global_mean_list, - global_var_list, - xpu::Activation_t::RELU, - is_nchw, - has_shortcut, - fuse_add, - is_train); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "resnet_unit_fusion"); - } -}; - -template -class ResNetUnitGradXPUKernel : public framework::OpKernel { - using XPUType = typename XPUTypeTrait::Type; - - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto place = ctx.GetPlace(); - PADDLE_ENFORCE_EQ(platform::is_xpu_place(place), - true, - phi::errors::PreconditionNotMet("It must use XPUPlace.")); - - bool is_nchw = (ctx.Attr("data_format") == "NCHW"); - const phi::DenseTensor *y_grad = - ctx.Input(framework::GradVarName("Y")); - const phi::DenseTensor *x = ctx.Input("X"); - const phi::DenseTensor *filter_x = ctx.Input("FilterX"); - const phi::DenseTensor *scale_x = ctx.Input("ScaleX"); - const phi::DenseTensor *saved_mean_x = - ctx.Input("SavedMeanX"); - const phi::DenseTensor *saved_invstd_x = - ctx.Input("SavedInvstdX"); - const phi::DenseTensor *conv_out_x = ctx.Input("ConvX"); - const phi::DenseTensor *output = ctx.Input("Y"); - - phi::DenseTensor *x_grad = - ctx.Output(framework::GradVarName("X")); - phi::DenseTensor *filter_x_grad = - ctx.Output(framework::GradVarName("FilterX")); - phi::DenseTensor *scale_x_grad = - ctx.Output(framework::GradVarName("ScaleX")); - phi::DenseTensor *bias_x_grad = - ctx.Output(framework::GradVarName("BiasX")); - - int padding = ctx.Attr("padding"); - int stride = ctx.Attr("stride"); - int stride_z = ctx.Attr("stride_z"); - int dilation = ctx.Attr("dilation"); - int group = ctx.Attr("group"); - float eps = ctx.Attr("epsilon"); - bool has_shortcut = ctx.Attr("has_shortcut"); - bool fuse_add = ctx.Attr("fuse_add"); - std::string act_type = ctx.Attr("act_type"); - - auto &dev_ctx = ctx.template device_context(); - - std::vector x_list = { - reinterpret_cast(x->data())}; - std::vector w_list = { - reinterpret_cast(filter_x->data())}; - std::vector conv_y_list = { - reinterpret_cast(conv_out_x->data())}; - std::vector dx_list = { - reinterpret_cast(x_grad->mutable_data(place))}; - std::vector dw_list = { - reinterpret_cast(filter_x_grad->mutable_data(place))}; - - std::vector> x_shape_list = { - common::vectorize(x->dims())}; - - auto filter_x_shape = common::vectorize(filter_x->dims()); - std::vector x_ksize = {filter_x_shape[2], filter_x_shape[3]}; - if (!is_nchw) { - x_ksize[0] = filter_x_shape[1]; - x_ksize[1] = filter_x_shape[2]; - } - std::vector> ksize_list = {x_ksize}; - std::vector> stride_list = {{stride, stride}}; - std::vector paddings = {padding, padding}; - std::vector dilations = {dilation, dilation}; - - std::vector x_maxlist = {nullptr}; - std::vector w_maxlist = {nullptr}; - - std::vector scale_list = {scale_x->data()}; - std::vector batch_mean_list = {saved_mean_x->data()}; - std::vector batch_invstd_list = { - saved_invstd_x->data()}; - std::vector dscale_list = { - scale_x_grad->mutable_data(place)}; - std::vector dbias_list = {bias_x_grad->mutable_data(place)}; - - if (has_shortcut) { - // X Z - // | | - // NormConv NormConv - // | | - // BNStatsFinalize BNStatsFinalize - // \ / - // ScaleBiasAddRelu - // | - // Y - const phi::DenseTensor *z = ctx.Input("Z"); - const phi::DenseTensor *filter_z = ctx.Input("FilterZ"); - const phi::DenseTensor *scale_z = ctx.Input("ScaleZ"); - const phi::DenseTensor *saved_mean_z = - ctx.Input("SavedMeanZ"); - const phi::DenseTensor *saved_invstd_z = - ctx.Input("SavedInvstdZ"); - const phi::DenseTensor *conv_out_z = ctx.Input("ConvZ"); - - phi::DenseTensor *z_grad = - ctx.Output(framework::GradVarName("Z")); - phi::DenseTensor *filter_z_grad = - ctx.Output(framework::GradVarName("FilterZ")); - phi::DenseTensor *scale_z_grad = - ctx.Output(framework::GradVarName("ScaleZ")); - phi::DenseTensor *bias_z_grad = - ctx.Output(framework::GradVarName("BiasZ")); - x_list.push_back(reinterpret_cast(z->data())); - w_list.push_back(reinterpret_cast(filter_z->data())); - conv_y_list.push_back( - reinterpret_cast(conv_out_z->data())); - dx_list.push_back( - reinterpret_cast(z_grad->mutable_data(place))); - dw_list.push_back( - reinterpret_cast(filter_z_grad->mutable_data(place))); - x_shape_list.push_back(common::vectorize(z->dims())); - - auto filter_z_shape = common::vectorize(filter_z->dims()); - std::vector ksize_z = {filter_z_shape[2], filter_z_shape[3]}; - if (!is_nchw) { - ksize_z[0] = filter_z_shape[1]; - ksize_z[1] = filter_z_shape[2]; - } - ksize_list.push_back(ksize_z); - stride_list.push_back({stride_z, stride_z}); - x_maxlist.push_back(nullptr); - w_maxlist.push_back(nullptr); - - scale_list.push_back(scale_z->data()); - batch_mean_list.push_back(saved_mean_z->data()); - batch_invstd_list.push_back(saved_invstd_z->data()); - dscale_list.push_back(scale_z_grad->mutable_data(place)); - dbias_list.push_back(bias_z_grad->mutable_data(place)); - } else { - if (fuse_add) { - auto z_grad = ctx.Output(framework::GradVarName("Z")); - dx_list.push_back( - reinterpret_cast(z_grad->mutable_data(place))); - } - } - - int r = xpu::resnet_unit_grad_fusion( - dev_ctx.x_context(), - x_list, - w_list, - reinterpret_cast(y_grad->data()), - reinterpret_cast(output->data()), - conv_y_list, - dx_list, - dw_list, - x_shape_list, - filter_x_shape[0], - ksize_list, - stride_list, - paddings, - dilations, - group, - x_maxlist, - w_maxlist, - scale_list, - batch_mean_list, - batch_invstd_list, - dscale_list, - dbias_list, - xpu::Activation_t::RELU, - eps, - is_nchw, - has_shortcut, - fuse_add); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "resnet_unit_grad_fusion"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; -PD_REGISTER_STRUCT_KERNEL(resnet_unit, - XPU, - ALL_LAYOUT, - ops::ResNetUnitXPUKernel, - phi::dtype::float16, - float) {} -PD_REGISTER_STRUCT_KERNEL(resnet_unit_grad, - XPU, - ALL_LAYOUT, - ops::ResNetUnitGradXPUKernel, - phi::dtype::float16, - float) {} diff --git a/paddle/fluid/operators/linear_chain_crf_op.cc b/paddle/fluid/operators/linear_chain_crf_op.cc deleted file mode 100644 index a27863819fedd..0000000000000 --- a/paddle/fluid/operators/linear_chain_crf_op.cc +++ /dev/null @@ -1,410 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/linear_chain_crf_op.h" - -#include - -namespace paddle { -namespace operators { - -class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("Emission", - "(phi::DenseTensor). When a phi::DenseTensor " - "input,A 2-D phi::DenseTensor" - " with shape [N x D], where N is the size of the " - "mini-batch and D is the total tag number. The unscaled emission " - "weight matrix for the linear chain CRF. When a Tensor input," - "A Tensor with shape [N x S x D], where N is batch number," - "S is max length of sequences, D is the total tag number." - "A phi::DenseTensor with type float32, float64."); - AddInput("Transition", - "(Tensor, default Tensor) A 2-D Tensor with shape " - "[(D + 2) x D]. The learnable parameter for the linear_chain_crf " - "operator. See more details in the operator's comments."); - AddInput("Label", - "(phi::DenseTensor), when a phi::DenseTensor input, " - "[N x 1], where N is the total element number in a mini-batch. " - "when a Tensor input, [N x S], where N is batch number. " - "S is max length of sequences. The ground truth." - "A phi::DenseTensor with int64."); - AddInput("Length", - "(Tensor, default Tensor) A Tensor with shape " - "[M x 1], where M is the sequence number in a mini-batch." - "A Tensor with type int64.") - .AsDispensable(); - AddOutput( - "Alpha", - "(Tensor, default Tensor), the same shape with Emission. " - "The forward vectors for the entire batch. Denote it as $\alpha$. " - "$\alpha$ is a memo table used to calculate the normalization " - "factor in CRF. $\alpha[k, v]$ stores the unnormalized " - "probabilities of all possible unfinished sequences of tags that end " - "at position $k$ with tag $v$. For each $k$, " - "$\alpha[k, v]$ is a vector of length $D$ with a component for " - "each tag value $v$. This vector is called a forward vector and " - "will also be used in backward computations.") - .AsIntermediate(); - AddOutput( - "EmissionExps", - "(Tensor, default Tensor), the same shape with Emission. " - "The exponentials of Input(Emission). This is an intermediate " - "computational result in forward computation, and will be reused in " - "backward computation." - "A phi::DenseTensor with type float32, float64.") - .AsIntermediate(); - AddOutput( - "TransitionExps", - "(Tensor, default Tensor) A 2-D Tensor with shape " - "[(D + 2) x D]. The exponentials of Input(Transition). This is an " - "intermediate computational result in forward computation, and " - "will be reused in backward computation." - "A phi::DenseTensor with type float32, float64.") - .AsIntermediate(); - AddOutput( - "LogLikelihood", - "(Tensor, default Tensor) The logarithm of the conditional " - "likelihood of each training sample in a mini-batch. This is a 2-D " - "tensor with shape [S x 1], where S is the sequence number in a " - "mini-batch. Note: S is equal to the sequence number in a mini-batch. " - "A Tensor with type float32, float64."); - AddComment(R"DOC( -Conditional Random Field defines an undirected probabilistic graph with nodes -denoting random variables and edges denoting dependencies between these -variables. CRF learns the conditional probability $P(Y|X)$, where -$X = (x_1, x_2, ... , x_n)$ are structured inputs and -$Y = (y_1, y_2, ... , y_n)$ are labels for the inputs. - -Linear chain CRF is a special case of CRF that is useful for sequence labeling -task. Sequence labeling tasks do not assume a lot of conditional -independences among inputs. The only constraint they impose is that the input -and output must be linear sequences. Thus, the graph of such a CRF is a simple -chain or a line, which results in the linear chain CRF. - -This operator implements the Forward-Backward algorithm for the linear chain -CRF. Please refer to http://www.cs.columbia.edu/~mcollins/fb.pdf and -http://cseweb.ucsd.edu/~elkan/250Bwinter2012/loglinearCRFs.pdf for details. - -Equation: - -1. Denote Input(Emission) to this operator as $x$ here. -2. The first D values of Input(Transition) to this operator are for starting -weights, denoted as $a$ here. -3. The next D values of Input(Transition) of this operator are for ending -weights, denoted as $b$ here. -4. The remaining values of Input(Transition) are for transition weights, -denoted as $w$ here. -5. Denote Input(Label) as $s$ here. - -The probability of a sequence $s$ of length $L$ is defined as: -$$P(s) = (1/Z) \exp(a_{s_1} + b_{s_L} - + \sum_{l=1}^L x_{s_l} - + \sum_{l=2}^L w_{s_{l-1},s_l})$$ - -where $Z$ is a normalization value so that the sum of $P(s)$ over -all possible sequences is 1, and $x$ is the emission feature weight -to the linear chain CRF. - -Finally, the linear chain CRF operator outputs the logarithm of the conditional -likelihood of each training sample in a mini-batch. - -NOTE: - -1. The feature function for a CRF is made up of the emission features and the -transition features. The emission feature weights are NOT computed in -this operator. They MUST be computed first before this operator is called. - -2. Because this operator performs global normalization over all possible -sequences internally, it expects UNSCALED emission feature weights. -Please do not call this op with the emission feature being output of any -nonlinear activation. - -3. The 2nd dimension of Input(Emission) MUST be equal to the tag number. - -)DOC"); - } -}; - -class LinearChainCRFOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK( - ctx->HasInput("Emission"), "Input", "Emission", "LinearChainCRF"); - OP_INOUT_CHECK( - ctx->HasInput("Transition"), "Input", "Transition", "LinearChainCRF"); - OP_INOUT_CHECK(ctx->HasInput("Label"), "Input", "Label", "LinearChainCRF"); - - OP_INOUT_CHECK( - ctx->HasOutput("Alpha"), "Output", "Alpha", "LinearChainCRF"); - OP_INOUT_CHECK(ctx->HasOutput("EmissionExps"), - "Output", - "EmissionExps", - "LinearChainCRF"); - OP_INOUT_CHECK(ctx->HasOutput("TransitionExps"), - "Output", - "TransitionExps", - "LinearChainCRF"); - OP_INOUT_CHECK(ctx->HasOutput("LogLikelihood"), - "Output", - "LogLikelihood", - "LinearChainCRF"); - - auto transition_dims = ctx->GetInputDim("Transition"); - PADDLE_ENFORCE_EQ(transition_dims.size(), - 2UL, - phi::errors::InvalidArgument( - "The Input(Transition) should be a 2-D tensor. But " - "received: input rank %u, input shape [%s].", - transition_dims.size(), - transition_dims)); - bool check = true; - if ((!ctx->IsRuntime()) && - (transition_dims[0] <= 0 || transition_dims[1] <= 0)) { - check = false; - } - if (check) { - PADDLE_ENFORCE_EQ( - transition_dims[0] - 2, - transition_dims[1], - phi::errors::InvalidArgument( - "An invalid dimension for the Input(Transition), which should " - "be a 2-D tensor with shape [(D + 2) x D]. But received: input " - "rank %u, " - "input shape [%s].", - transition_dims.size(), - transition_dims)); - } - auto emission_dims = ctx->GetInputDim("Emission"); - if (ctx->HasInput("Length")) { - PADDLE_ENFORCE_EQ(emission_dims.size(), - 3, - phi::errors::InvalidArgument( - "The Input(Emission) should be a 3-D tensor. But " - "received: input rank %u, input shape [%s].", - emission_dims.size(), - emission_dims)); - auto label_dims = ctx->GetInputDim("Label"); - PADDLE_ENFORCE_EQ( - (label_dims.size() == 3UL && label_dims[2] == 1) || - (label_dims.size() == 2UL), - true, - phi::errors::InvalidArgument( - "The Input(Label) should be a 3-D tensor with last dimension " - "fixed to 1 or a 2-D tensor in padding mode. But received: input " - "rank %u, input shape [%s].", - label_dims.size(), - label_dims)); - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ(emission_dims[0], - label_dims[0], - phi::errors::InvalidArgument( - "The batch size of Input(Emission) " - "and Input(Label) should be the same. But " - "received Input(Emission): " - "rank %u, shape [%s]; received Input(Label): " - "rank %u, shape [%s].", - emission_dims.size(), - emission_dims, - label_dims.size(), - label_dims)); - PADDLE_ENFORCE_EQ(emission_dims[1], - label_dims[1], - phi::errors::InvalidArgument( - "The max length of Input(Emission) " - "and Input(Label) should be the same. But " - "received Input(Emission): " - "rank %u, shape [%s]; received Input(Label): " - "rank %u, shape [%s].", - emission_dims.size(), - emission_dims, - label_dims.size(), - label_dims)); - } - } else { - PADDLE_ENFORCE_EQ( - emission_dims.size(), - 2, - phi::errors::InvalidArgument( - "The Input(Emission) should be a 2-D tensor. But received: " - "input rank %u, input shape [%s].", - emission_dims.size(), - emission_dims)); - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ(emission_dims[1], - transition_dims[1], - phi::errors::InvalidArgument( - "The 2nd dimension of the Input(Emission) and " - "the Input(Transition) " - "should be equal to the tag number. But received " - "Input(Emission): rank " - "%u, shape [%s]; received Input(Transition): " - "rank %u, shape [%s].", - emission_dims.size(), - emission_dims, - transition_dims.size(), - transition_dims)); - } - - auto label_dims = ctx->GetInputDim("Label"); - PADDLE_ENFORCE_EQ( - label_dims.size(), - 2, - phi::errors::InvalidArgument( - "The Input(Label) should be a 2-D tensor with the 2nd " - "dimensions fixed to 1. But received: input rank %u, " - "input shape [%s].", - label_dims.size(), - label_dims)); - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ( - emission_dims[0], - label_dims[0], - phi::errors::InvalidArgument( - "The first dimension of Input(Emission) and Input(Label) " - "should be the same. But received Input(Emission): rank %u, " - "shape " - "[%s]; received Input(Label): rank %u, shape [%s].", - emission_dims.size(), - emission_dims, - label_dims.size(), - label_dims)); - } - } - ctx->SetOutputDim("Alpha", emission_dims); - ctx->SetOutputDim("EmissionExps", emission_dims); - ctx->SetOutputDim("TransitionExps", transition_dims); - // TODO(caoying) This is tricky. The 1st dimension of Output(LogLikelihood) - // is the sequence number in a mini-batch. The dimension set here should be - // resized to its correct size in the function Compute. Fix this once we can - // get LoD information in the InferShape interface. - ctx->SetOutputDim("LogLikelihood", {emission_dims[0], 1}); - } - - protected: - // Explicitly set that the data type of computation kernel of linear_chain_crf - // is determined by its input "Emission". - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey( - OperatorWithKernel::IndicateVarDataType(ctx, "Emission"), - platform::CPUPlace()); - } -}; - -class LinearChainCRFGradOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("EmissionExps"), - "Input", - "EmissionExps", - "LinearChainCRFGrad"); - OP_INOUT_CHECK(ctx->HasInput("TransitionExps"), - "Input", - "TransitionExps", - "LinearChainCRFGrad"); - OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("LogLikelihood")), - "Input", - framework::GradVarName("LogLikelihood"), - "LinearChainCRFGrad"); - - auto transition_exps_dims = ctx->GetInputDim("TransitionExps"); - auto emission_exps_dims = ctx->GetInputDim("EmissionExps"); - if (ctx->HasOutput(framework::GradVarName("Emission"))) { - ctx->SetOutputDim(framework::GradVarName("Emission"), emission_exps_dims); - if (ctx->HasInput("Length") == false) { - ctx->ShareLoD("Emission", framework::GradVarName("Emission")); - } - } - - if (ctx->HasOutput(framework::GradVarName("Transition"))) { - ctx->SetOutputDim(framework::GradVarName("Transition"), - transition_exps_dims); - ctx->ShareLoD("Transition", framework::GradVarName("Transition")); - } - } - - protected: - // Explicitly set that the data type of output of the linear_chain_crf_grad - // operator is determined by its input: gradients of LogLikelihood. - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("LogLikelihood")), - platform::CPUPlace()); - } -}; - -template -class LinearChainCRFGradMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("linear_chain_crf_grad"); - op->SetAttrMap(this->Attrs()); - op->SetInput("Emission", this->Input("Emission")); - op->SetInput("Transition", this->Input("Transition")); - op->SetInput("Label", this->Input("Label")); - op->SetInput("Alpha", this->Output("Alpha")); - op->SetInput("EmissionExps", this->Output("EmissionExps")); - op->SetInput("TransitionExps", this->Output("TransitionExps")); - if (this->HasInput("Length")) { - op->SetInput("Length", this->Input("Length")); - } - op->SetInput(framework::GradVarName("LogLikelihood"), - this->OutputGrad("LogLikelihood")); - - op->SetOutput(framework::GradVarName("Emission"), - this->InputGrad("Emission")); - op->SetOutput(framework::GradVarName("Transition"), - this->InputGrad("Transition")); - } -}; - -DECLARE_NO_NEED_BUFFER_VARS_INFERER(LinearChainCRFGradNoNeedBufferVarsInferer, - "Transition", - "Emission"); - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR(linear_chain_crf, - ops::LinearChainCRFOp, - ops::LinearChainCRFOpMaker, - ops::LinearChainCRFGradMaker, - ops::LinearChainCRFGradMaker); -REGISTER_OPERATOR(linear_chain_crf_grad, - ops::LinearChainCRFGradOp, - ops::LinearChainCRFGradNoNeedBufferVarsInferer); - -PD_REGISTER_STRUCT_KERNEL(linear_chain_crf, - CPU, - ALL_LAYOUT, - ops::LinearChainCRFOpKernel, - float, - double) {} -PD_REGISTER_STRUCT_KERNEL(linear_chain_crf_grad, - CPU, - ALL_LAYOUT, - ops::LinearChainCRFGradOpKernel, - float, - double) {} diff --git a/paddle/fluid/operators/linear_chain_crf_op.h b/paddle/fluid/operators/linear_chain_crf_op.h deleted file mode 100644 index 01ed8463701e7..0000000000000 --- a/paddle/fluid/operators/linear_chain_crf_op.h +++ /dev/null @@ -1,457 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { - -template -static inline T NormalizeL1(T* x, size_t len) { - T sum = 0.; - for (size_t i = 0; i < len; ++i) sum += x[i]; - // (This comment is from the old LinearChainCRFLayer.) - // Right now, we just bet that sum won't be zero. If this really happens, we - // will figure out what should be done then. - PADDLE_ENFORCE_GT( - sum, - 0., - phi::errors::InvalidArgument( - "The unnormalized probabilities of all possible unfinished " - "sequences must be greater than 0.")); - T s = 1. / sum; - for (size_t i = 0; i < len; ++i) x[i] *= s; - return sum; -} - -template -struct ScalarMul { - explicit ScalarMul(const T& scalar) : scalar(scalar) {} - T operator()(const T& val) const { return val * scalar; } - - T scalar; -}; - -using framework::LoD; - -template -class LinearChainCRFOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - const phi::DenseTensor* emission_weights = - ctx.Input("Emission"); - const phi::DenseTensor* transition_weights = - ctx.Input("Transition"); - - phi::DenseTensor* emission_exps = - ctx.Output("EmissionExps"); - phi::DenseTensor* transition_exps = - ctx.Output("TransitionExps"); - phi::DenseTensor* alpha = ctx.Output("Alpha"); - phi::DenseTensor* ll = ctx.Output("LogLikelihood"); - - // Because the computation codes only runs on CPU, here the memory for all - // the outputs is FIXED to be allocated on the CPU memory. - emission_exps->mutable_data(platform::CPUPlace()); - alpha->mutable_data(platform::CPUPlace()); - transition_exps->mutable_data(platform::CPUPlace()); - auto emission_dims = emission_weights->dims(); - - const phi::DenseTensor* label = ctx.Input("Label"); - phi::DenseTensor emission_weights_tmp = *emission_weights; - phi::DenseTensor label_tmp = *label; - phi::DenseTensor emission_exps_tmp = *emission_exps; - phi::DenseTensor alpha_tmp = *alpha; - int64_t seq_num = 0; - int64_t batch_size; - int64_t tag_num; - const int64_t* length_data = nullptr; - framework::LoD in_lod; - if (ctx.HasInput("Length")) { - const phi::DenseTensor* label_length = - ctx.Input("Length"); - length_data = label_length->data(); - seq_num = label_length->numel(); - PADDLE_ENFORCE_EQ( - seq_num, - emission_dims[0], - phi::errors::InvalidArgument( - "the size of Input(length) must be equal to " - "emission_dims[0]. But input_size = %d, emission_dims[0] = %d.", - seq_num, - emission_dims[0])); - auto label_dims = label->dims(); - PADDLE_ENFORCE_EQ( - seq_num, - label_dims[0], - phi::errors::InvalidArgument( - "the size of Input(length) must be equal to " - "label_dims[0]. But input_size = %d, label_dims[0] = %d.", - seq_num, - label_dims[0])); - - batch_size = emission_dims[0] * emission_dims[1]; - tag_num = emission_dims[2]; - emission_weights_tmp.Resize({batch_size, tag_num}); - label_tmp.Resize({batch_size, 1}); - alpha_tmp.Resize({batch_size, tag_num}); - emission_exps_tmp.Resize({batch_size, tag_num}); - phi::funcs::set_constant( - ctx.device_context(), emission_exps, static_cast(0.0)); - phi::funcs::set_constant( - ctx.device_context(), alpha, static_cast(0.0)); - } else { - in_lod = ctx.Input("Label")->lod(); - PADDLE_ENFORCE_NE( - in_lod.size(), - 0, - phi::errors::InvalidArgument("Input(Label) must be a sequence.")); - seq_num = in_lod[0].size() - 1; - batch_size = emission_dims[0]; - tag_num = emission_dims[1]; - } - - // Resize the output tensor to its correct dimension. - ll->Resize({seq_num, 1}); - ll->mutable_data(platform::CPUPlace()); - // Now, all the inputs and outputs should be on the CPU memory. - phi::DenseTensor emission_row_max; - emission_row_max.mutable_data( - common::make_ddim({static_cast(batch_size), 1}), - platform::CPUPlace()); - auto& place = - *ctx.template device_context().eigen_device(); - auto x = framework::EigenMatrix::From(emission_weights_tmp); - auto x_row_max = framework::EigenMatrix::From(emission_row_max); - x_row_max.device(place) = - x.maximum(Eigen::DSizes(1)) - .reshape(Eigen::DSizes(static_cast(batch_size), 1)); - auto x_exps = framework::EigenMatrix::From(emission_exps_tmp); - x_exps.device(place) = - (x - x_row_max.broadcast(Eigen::DSizes(1, tag_num))).exp(); - auto w = framework::EigenMatrix::From(*transition_weights); - auto w_exps = framework::EigenMatrix::From(*transition_exps); - w_exps.device(place) = w.exp(); - T* log_likelihood = ll->data(); - for (int64_t i = 0; i < seq_num; ++i) { - int64_t start_pos = 0; - int64_t end_pos = 0; - if (ctx.HasInput("Length")) { - start_pos = i * emission_dims[1]; - end_pos = start_pos + length_data[i]; - } else { - start_pos = static_cast(in_lod[0][i]); - end_pos = static_cast(in_lod[0][i + 1]); - } - if (end_pos == start_pos) { - // If an empty input sequence is given, pad 0 for its cost. - log_likelihood[i] = 0.; - continue; - } - const phi::DenseTensor one_seq = - emission_weights_tmp.Slice(start_pos, end_pos); - phi::DenseTensor one_seq_row_max = - emission_row_max.Slice(start_pos, end_pos); - phi::DenseTensor one_seq_exps = - emission_exps_tmp.Slice(start_pos, end_pos); - const phi::DenseTensor one_seq_label = - label_tmp.Slice(start_pos, end_pos); - phi::DenseTensor one_seq_alpha = alpha_tmp.Slice(start_pos, end_pos); - log_likelihood[i] = ForwardOneSequence(one_seq, - one_seq_row_max, - one_seq_exps, - *transition_weights, - *transition_exps, - one_seq_label, - &one_seq_alpha); - } - }; - - private: - T ForwardOneSequence(const phi::DenseTensor& emission, - const phi::DenseTensor& emission_row_max, - const phi::DenseTensor& emission_exps, - const phi::DenseTensor& trans_weights, - const phi::DenseTensor& trans_weight_exps, - const phi::DenseTensor& label, - phi::DenseTensor* alpha) const { - const T* x = emission.data(); - const T* x_row_max = emission_row_max.data(); - const T* x_exps = emission_exps.data(); - const T* w = trans_weights.data(); - const T* w_exps = trans_weight_exps.data(); - T* alpha_value = alpha->data(); - - auto x_dims = emission.dims(); - const size_t seq_length = x_dims[0]; - const size_t tag_num = x_dims[1]; - // The 1st row of w are transition weights for start mask. - // The 2nd row of w are transition weights for end mask. - // Transition weights between other tags begin from the 3rd row of w. - const size_t state_trans_base_idx = 2; - - for (size_t i = 0; i < tag_num; ++i) { - alpha_value[i] = w_exps[i] * x_exps[i]; - } - T ll = -x_row_max[0] - std::log(NormalizeL1(alpha_value, tag_num)); - - for (size_t k = 1; k < seq_length; ++k) { - for (size_t i = 0; i < tag_num; ++i) { - T sum = 0.; - for (size_t j = 0; j < tag_num; ++j) { - sum += alpha_value[(k - 1) * tag_num + j] * // (*) - w_exps[(j + state_trans_base_idx) * tag_num + i]; - } - alpha_value[k * tag_num + i] = x_exps[k * tag_num + i] * sum; - } - // NormalizeL1 is to avoid underflow or overflow at (*). - ll -= x_row_max[k] + - std::log(NormalizeL1(alpha_value + k * tag_num, tag_num)); - } - T sum = 0.; - for (size_t i = 0; i < tag_num; ++i) { - sum += alpha_value[(seq_length - 1) * tag_num + i] * w_exps[tag_num + i]; - } - ll -= std::log(sum); - // Now ll is equal to -log(Z). - - const int64_t* lbl = label.data(); - PADDLE_ENFORCE_LT( - static_cast(*std::max_element(lbl, lbl + seq_length)), - tag_num, - phi::errors::InvalidArgument( - "An invalid tag label that excesses the largest tag number.")); - - // Calculate the nominator part, which depends on the label sequence. - ll += w[lbl[0]] /*start transition*/ + x[lbl[0]] + - w[tag_num + lbl[seq_length - 1]] /*end transition*/; - for (size_t k = 1; k < seq_length; ++k) { - ll += x[k * tag_num + lbl[k]] + - w[(lbl[k - 1] + state_trans_base_idx) * tag_num + lbl[k]]; - } - return -ll; - } -}; - -template -class LinearChainCRFGradOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - const phi::DenseTensor* label = ctx.Input("Label"); - const phi::DenseTensor* emission_exps = - ctx.Input("EmissionExps"); - const phi::DenseTensor* transition_exps = - ctx.Input("TransitionExps"); - const phi::DenseTensor* alpha = ctx.Input("Alpha"); - const T* ll_grad = - ctx.Input(framework::GradVarName("LogLikelihood")) - ->data(); - phi::DenseTensor* emission_grad = - ctx.Output(framework::GradVarName("Emission")); - auto* emission_grad_data = - emission_grad->mutable_data(platform::CPUPlace()); - memset(emission_grad_data, 0, emission_grad->numel() * sizeof(T)); - phi::DenseTensor alpha_tmp = *alpha; - phi::DenseTensor label_tmp = *label; - phi::DenseTensor emission_exps_tmp = *emission_exps; - phi::DenseTensor emission_grad_tmp = *emission_grad; - // getting seq_num using padding or not - int64_t seq_num = 0; - framework::LoD in_lod; - const int64_t* length_data = nullptr; - if (ctx.HasInput("Length")) { - const phi::DenseTensor* label_length = - ctx.Input("Length"); - length_data = label_length->data(); - seq_num = label_length->numel(); - auto emission_dims = emission_grad->dims(); - auto label_dims = label->dims(); - emission_grad_tmp.Resize( - {emission_dims[0] * emission_dims[1], emission_dims[2]}); - label_tmp.Resize({label_dims[0] * label_dims[1], 1}); - alpha_tmp.Resize({emission_dims[0] * emission_dims[1], emission_dims[2]}); - emission_exps_tmp.Resize( - {emission_dims[0] * emission_dims[1], emission_dims[2]}); - } else { - in_lod = ctx.Input("Label")->lod(); - PADDLE_ENFORCE_NE( - in_lod.size(), - 0, - phi::errors::InvalidArgument("Input(Label) must be a sequence.")); - seq_num = static_cast(in_lod[0].size() - 1); - } - - phi::DenseTensor* transition_grad = - ctx.Output(framework::GradVarName("Transition")); - - // TODO(caoying) Fix this constraint. When the Input(Emission) is from the - // data reader operator, it can have no gradients. - if (transition_grad) { - transition_grad->mutable_data(platform::CPUPlace()); - phi::funcs::set_constant( - ctx.device_context(), transition_grad, static_cast(0.)); - } - // Now, all the inputs and outputs should be on the CPU memory. - auto emission_dims = emission_exps->dims(); - // Beta is the memo table used in dynamic programming to calculate the - // backward vectors. For a backward vector i (the i-th row of beta), it - // captures the unnormalized probabilities of partial sequences starting - // at position i. - phi::DenseTensor beta; - beta.mutable_data(emission_dims, platform::CPUPlace()); - if (ctx.HasInput("Length")) { - beta.Resize({emission_dims[0] * emission_dims[1], emission_dims[2]}); - } - - for (int64_t i = 0; i < seq_num; ++i) { - int64_t start_pos = 0; - int64_t end_pos = 0; - if (ctx.HasInput("Length")) { - start_pos = i * emission_dims[1]; - end_pos = start_pos + length_data[i]; - } else { - start_pos = static_cast(in_lod[0][i]); - end_pos = static_cast(in_lod[0][i + 1]); - } - - if (end_pos == start_pos) { - continue; - } - const phi::DenseTensor one_seq_emission_exps = - emission_exps_tmp.Slice(start_pos, end_pos); - const phi::DenseTensor one_seq_label = - label_tmp.Slice(start_pos, end_pos); - const phi::DenseTensor one_seq_alpha = - alpha_tmp.Slice(start_pos, end_pos); - phi::DenseTensor one_seq_beta = beta.Slice(start_pos, end_pos); - phi::DenseTensor one_seq_emission_grad = - emission_grad_tmp.Slice(start_pos, end_pos); - BackwardOneSequence(ctx.template device_context(), - ll_grad[i], - one_seq_emission_exps, - *transition_exps, - one_seq_alpha, - one_seq_label, - &one_seq_beta, - transition_grad, - &one_seq_emission_grad); - } - }; - - private: - void BackwardOneSequence(const phi::CPUContext& ctx, - const T ll_grad, - const phi::DenseTensor& emission_exps, - const phi::DenseTensor& transition_exps, - const phi::DenseTensor& alpha, - const phi::DenseTensor& label, - phi::DenseTensor* beta, - phi::DenseTensor* transition_grad, - phi::DenseTensor* emission_grad) const { - const T* w_exps = transition_exps.data(); - const T* x_exps = emission_exps.data(); - const int64_t* label_value = label.data(); - T* beta_value = beta->data(); - auto x_dims = emission_exps.dims(); - const size_t seq_length = x_dims[0]; - const size_t tag_num = x_dims[1]; - const size_t state_trans_base_idx = 2; - - // Calculate the backward vectors: beta. - // First, calculate the initial state. - for (size_t i = 0; i < tag_num; ++i) { - beta_value[(seq_length - 1) * tag_num + i] = w_exps[tag_num + i]; - } - NormalizeL1(beta_value + (seq_length - 1) * tag_num, tag_num); - for (int k = static_cast(seq_length) - 2; k >= 0; --k) { - for (size_t i = 0; i < tag_num; ++i) { - T sum = 0.; - for (size_t j = 0; j < tag_num; ++j) { - sum += w_exps[(i + state_trans_base_idx) * tag_num + j] * // (**) - x_exps[(k + 1) * tag_num + j] * - beta_value[(k + 1) * tag_num + j]; - } - beta_value[k * tag_num + i] = sum; - } - // NormalizeL1 is to avoid underflow or overflow at (**). - NormalizeL1(beta_value + k * tag_num, tag_num); - } - - auto x_grad_mat = framework::EigenMatrix::From(*emission_grad); - auto alpha_mat = framework::EigenMatrix::From(alpha); - auto beta_mat = framework::EigenMatrix::From(*beta); - - auto* place = ctx.eigen_device(); - auto prob = alpha_mat * beta_mat; - auto row_sum = prob.sum(Eigen::DSizes(1)) - .reshape(Eigen::DSizes(seq_length, 1)) - .broadcast(Eigen::DSizes(1, tag_num)); - x_grad_mat.device(*place) = - (prob / row_sum).unaryExpr(ScalarMul(ll_grad)); - - for (size_t k = 0; k < seq_length; ++k) { - x_grad_mat(k, label_value[k]) -= static_cast(ll_grad); - } - - if (transition_grad) { - T* trans_grad = transition_grad->data(); - for (size_t k = 0; k < tag_num; ++k) { - // Do not multiply by the output gradient here, because x_grad_mat has - // already done this. - trans_grad[k] += x_grad_mat(/*from start state*/ 0, k); - trans_grad[tag_num + k] += - x_grad_mat(/*to end state*/ seq_length - 1, k); - } - - auto x_exps_mat = framework::EigenMatrix::From(emission_exps); - - // TODO(caoying): Fix this to avoid using this local variable if we can - // profile the training process. - phi::DenseTensor tmp; - tmp.mutable_data(beta->dims(), platform::CPUPlace()); - auto tmp_mat = framework::EigenMatrix::From(tmp); - auto prob = beta_mat * x_exps_mat; - auto row_sum = prob.sum(Eigen::DSizes(1)) - .reshape(Eigen::DSizes(seq_length, 1)) - .broadcast(Eigen::DSizes(1, tag_num)); - tmp_mat.device(*place) = prob / row_sum; - - for (size_t k = 1; k < seq_length; ++k) { - T sum = 0.; - for (size_t i = 0; i < tag_num; ++i) { - for (size_t j = 0; j < tag_num; ++j) { - sum += w_exps[(i + state_trans_base_idx) * tag_num + j] * // (**) - alpha_mat(k - 1, i) * tmp_mat(k, j); - } - } - sum = 1. / sum; - for (size_t i = 0; i < tag_num; ++i) { - for (size_t j = 0; j < tag_num; ++j) { - trans_grad[(i + state_trans_base_idx) * tag_num + j] += - sum * w_exps[(i + state_trans_base_idx) * tag_num + j] * - alpha_mat(k - 1, i) * tmp_mat(k, j) * ll_grad; - } - } - trans_grad[(label_value[k - 1] + state_trans_base_idx) * tag_num + - label_value[k]] -= static_cast(ll_grad); - } - } - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/lite/CMakeLists.txt b/paddle/fluid/operators/lite/CMakeLists.txt deleted file mode 100644 index ca3b62648378b..0000000000000 --- a/paddle/fluid/operators/lite/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -op_library(lite_engine_op DEPS lite_engine lite_tensor_utils) diff --git a/paddle/fluid/operators/lite/lite_engine_op.cc b/paddle/fluid/operators/lite/lite_engine_op.cc deleted file mode 100644 index 0ec1c55f7abee..0000000000000 --- a/paddle/fluid/operators/lite/lite_engine_op.cc +++ /dev/null @@ -1,45 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/lite/lite_engine_op.h" - -#include -#include - -namespace paddle { - -namespace operators { - -class LiteEngineOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("Xs", "A list of inputs.").AsDuplicable(); - AddOutput("Ys", "A list of outputs.").AsDuplicable(); - AddAttr( - "engine_key", - "The engine_key here is used to distinguish different Lite Engines"); - AddComment("Lite engine operator."); - } -}; - -class LiteInferVarType : public framework::VarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override {} -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR(lite_engine, ops::LiteEngineOp, ops::LiteEngineOpMaker); diff --git a/paddle/fluid/operators/lite/lite_engine_op.h b/paddle/fluid/operators/lite/lite_engine_op.h deleted file mode 100644 index 756fec24d9874..0000000000000 --- a/paddle/fluid/operators/lite/lite_engine_op.h +++ /dev/null @@ -1,105 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#include "paddle/fluid/framework/executor.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/inference/analysis/helper.h" -#include "paddle/fluid/inference/lite/engine.h" -#include "paddle/fluid/inference/lite/tensor_utils.h" -#include "paddle/fluid/inference/utils/singleton.h" -#include "paddle/fluid/platform/device/gpu/gpu_info.h" - -namespace paddle { -namespace operators { - -class LiteEngineOp : public framework::OperatorBase { - private: - std::vector in_names_; - std::vector out_names_; - paddle::lite_api::PaddlePredictor *engine_; - framework::proto::VarType::Type precision_; - bool use_gpu_; - bool zero_copy_; - - public: - LiteEngineOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) { - in_names_ = Inputs("Xs"); - out_names_ = Outputs("Ys"); - engine_ = - inference::Singleton::Global().Get( - Attr("engine_key")); - if (Attr("enable_int8")) { - precision_ = framework::proto::VarType_Type_INT8; - } else { - precision_ = framework::proto::VarType_Type_FP32; - } - use_gpu_ = Attr("use_gpu"); - zero_copy_ = Attr("zero_copy"); - } - - void SetEngine(paddle::lite_api::PaddlePredictor *engine) { - engine_ = engine; - } - - protected: - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - Execute(scope, dev_place); - } - - void Execute(const framework::Scope &scope, - const platform::Place &dev_place) const { - const platform::DeviceContext *ctx = - platform::DeviceContextPool::Instance().Get(dev_place); - for (size_t i = 0; i < in_names_.size(); i++) { - phi::DenseTensor src_t = - inference::analysis::GetFromScope(scope, - in_names_[i]); - paddle::lite_api::Tensor dst_t = *(engine_->GetInput(i)); - VLOG(3) << "== fluid -> lite (" << in_names_[i] << " -> " - << engine_->GetInputNames()[i] << ")"; - inference::lite::utils::TensorCopy(&dst_t, &src_t, *ctx, zero_copy_); - } - VLOG(3) << "lite engine run"; - engine_->Run(); - VLOG(3) << "lite engine run done"; - for (size_t i = 0; i < out_names_.size(); i++) { - paddle::lite_api::Tensor src_t = *(engine_->GetOutput(i)); - phi::DenseTensor *dst_t = - &inference::analysis::GetFromScope(scope, - out_names_[i]); - VLOG(3) << "== lite -> fluid (" << out_names_[i] << " -> " - << engine_->GetOutputNames()[i] << ")"; - inference::lite::utils::TensorCopy(dst_t, &src_t, *ctx, zero_copy_); - } - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/metrics/CMakeLists.txt b/paddle/fluid/operators/metrics/CMakeLists.txt deleted file mode 100644 index b968dbf288ee2..0000000000000 --- a/paddle/fluid/operators/metrics/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include(operators) -if(WITH_UNITY_BUILD) - # Load Unity Build rules for operators in paddle/fluid/operators/metrics. - include(unity_build_rule.cmake) -endif() -register_operators() diff --git a/paddle/fluid/operators/metrics/precision_recall_op.cc b/paddle/fluid/operators/metrics/precision_recall_op.cc deleted file mode 100644 index 95a66cb2edd1d..0000000000000 --- a/paddle/fluid/operators/metrics/precision_recall_op.cc +++ /dev/null @@ -1,250 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/metrics/precision_recall_op.h" - -namespace paddle { -namespace operators { - -class PrecisionRecallOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE_EQ( - ctx->HasInput("MaxProbs"), - true, - phi::errors::NotFound( - "PrecisionRecallOp Input(MaxProbs) should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("Indices"), - true, - phi::errors::NotFound( - "PrecisionRecallOp Input(Indices) should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("Labels"), - true, - phi::errors::NotFound( - "PrecisionRecallOp Input(Labels) should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput("BatchMetrics"), - true, - phi::errors::NotFound( - "PrecisionRecallOp Output(BatchMetrics) should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput("AccumMetrics"), - true, - phi::errors::NotFound( - "PrecisionRecallOp Output(AccumMetrics) should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput("AccumStatesInfo"), - true, - phi::errors::NotFound( - "PrecisionRecallOp Output(AccumStatesInfo) should not be null.")); - - int64_t cls_num = - static_cast(ctx->Attrs().Get("class_number")); - auto max_probs_dims = ctx->GetInputDim("MaxProbs"); - auto labels_dims = ctx->GetInputDim("Labels"); - - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ(max_probs_dims[1], - 1, - phi::errors::InvalidArgument( - "Each instance of PrecisionRecallOp " - "Input(MaxProbs) contains one max probability, " - "the shape of Input(MaxProbs) should be " - "[batch_size, 1], the 2nd dimension of " - "Input(MaxProbs) should be 1. But the 2nd " - "dimension we received is %d", - max_probs_dims[1])); - PADDLE_ENFORCE_EQ( - ctx->GetInputDim("Indices"), - max_probs_dims, - phi::errors::InvalidArgument( - "The shape of PrecisionRecallOp Input(Indices) should be same " - "with " - "max_probs_dims. But received the shape of Input(Indices) is " - "[%d, %d], max_probs_dims is [%d, %d]", - ctx->GetInputDim("Indices")[0], - ctx->GetInputDim("Indices")[1], - max_probs_dims[0], - max_probs_dims[1])); - PADDLE_ENFORCE_EQ( - max_probs_dims[0], - labels_dims[0], - phi::errors::InvalidArgument( - "The 1st dimension of PrecisionRecallOp Input(MaxProbs) and " - "Input(Labels) both should be batch_size" - "But the 1st dimension we received max_probs_dims[0] = %d, " - "labels_dims[0] = %d", - max_probs_dims[0], - labels_dims[0])); - PADDLE_ENFORCE_EQ(labels_dims[1], - 1, - phi::errors::InvalidArgument( - "The 2nd dimension of PrecisionRecallOp " - "Input(Labels) contains instance label and " - "the shape should be equal to 1. But the 2nd " - "dimension we received is %d", - labels_dims[1])); - } - if (ctx->HasInput("Weights")) { - auto weights_dims = ctx->GetInputDim("Weights"); - - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ( - weights_dims, - common::make_ddim({max_probs_dims[0], 1}), - phi::errors::InvalidArgument( - "The shape of PrecisionRecallOp Input(Weights) should be " - "[batch_size, 1]. But the shape we received is [%d, %d]", - weights_dims[0], - weights_dims[1])); - } - } - if (ctx->HasInput("StatesInfo")) { - auto states_dims = ctx->GetInputDim("StatesInfo"); - - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ( - states_dims, - common::make_ddim({cls_num, 4}), - phi::errors::InvalidArgument( - "The shape of PrecisionRecallOp Input(StatesInfo) should be " - "[class_number, 4]. But the shape we received is [%d, %d]", - states_dims[0], - states_dims[1])); - } - } - - // Layouts of BatchMetrics and AccumMetrics both are: - // [ - // macro average precision, macro average recall, macro average F1 score, - // micro average precision, micro average recall, micro average F1 score - // ] - ctx->SetOutputDim("BatchMetrics", {6}); - ctx->SetOutputDim("AccumMetrics", {6}); - // Shape of AccumStatesInfo is [class_number, 4] - // The layout of each row is: - // [ TP, FP, TN, FN ] - ctx->SetOutputDim("AccumStatesInfo", {cls_num, 4}); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext &ctx) const override { - return phi::KernelKey( - OperatorWithKernel::IndicateVarDataType(ctx, "MaxProbs"), - ctx.GetPlace()); - } -}; - -class PrecisionRecallOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("MaxProbs", - "(Tensor, default Tensor) A 2-D tensor with shape N x 1, " - "where N is the batch size. Each row contains the max probability " - "of an instance which computed by the previous top_k (k=1) " - "operator."); - AddInput("Indices", - "(Tensor, default Tensor) A 2-D tensor with shape N x 1, " - "where N is the batch size. Each row contains the corresponding " - "index which computed by the previous top_k (k=1) operator."); - AddInput("Labels", - "(Tensor, default Tensor) A 2-D tensor with shape N x 1, " - "where N is the batch size. Each element is a label and the " - "value should be in [0, class_number - 1]."); - AddInput("Weights", - "(Tensor, default Tensor) A 2-D tensor with shape N x 1, " - "where N is the batch size. This input is optional. If provided, " - "weight of instance would be considered when computing metrics.") - .AsDispensable(); - AddInput("StatesInfo", - "(Tensor, default Tensor) A 2-D tensor with shape D x 4, " - "where D is the number of classes. This input is optional. If " - "provided, current state will be accumulated to this state and " - "the accumulation state will be the output state.") - .AsDispensable(); - AddOutput("BatchMetrics", - "(Tensor, default Tensor) A 1-D tensor with shape {6}. " - "This output tensor contains metrics for current batch data. " - "The layout is [macro average precision, macro average recall, " - "macro f1 score, micro average precision, micro average recall, " - "micro f1 score]."); - AddOutput("AccumMetrics", - "(Tensor, default Tensor) A 1-D tensor with shape {6}. " - "This output tensor contains metrics for accumulated data. " - "The layout is [macro average precision, macro average recall, " - "macro f1 score, micro average precision, micro average recall, " - "micro f1 score]."); - AddOutput("AccumStatesInfo", - "(Tensor, default Tensor) A 2-D tensor with shape D x 4, " - "where D is equal to class number. This output tensor contains " - "accumulated state variables used to compute metrics. The layout " - "for each class is [true positives, false positives, " - "true negatives, false negatives]."); - AddAttr("class_number", "(int) Number of classes to be evaluated."); - AddComment(R"DOC( -Precision Recall Operator. - -When given Input(Indices) and Input(Labels), this operator can be used -to compute various metrics including: -1. macro average precision -2. macro average recall -3. macro f1 score -4. micro average precision -5. micro average recall -6. micro f1 score - -To compute the above metrics, we need to do statistics for true positives, -false positives and false negatives. Here the count of true negatives is not -necessary, but counting it may provide potential usage and the cost is -trivial, so the operator also provides the count of true negatives. - -We define state as a 2-D tensor with shape [class_number, 4]. Each row of a -state contains statistic variables for corresponding class. Layout of each row -is: TP(true positives), FP(false positives), TN(true negatives), -FN(false negatives). If Input(Weights) is provided, TP, FP, TN, FN will be -calculated by given weight instead of the instance count. - -This operator also supports metrics computing for cross-batch situation. To -achieve this, Input(StatesInfo) should be provided. State of current batch -data will be accumulated to Input(StatesInfo) and Output(AccumStatesInfo) -is the accumulation state. - -Output(BatchMetrics) is metrics of current batch data while -Output(AccumStatesInfo) is metrics of accumulation data. - -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - precision_recall, - ops::PrecisionRecallOp, - ops::PrecisionRecallOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); -PD_REGISTER_STRUCT_KERNEL(precision_recall, - CPU, - ALL_LAYOUT, - ops::PrecisionRecallKernel, - float, - double) {} diff --git a/paddle/fluid/operators/metrics/precision_recall_op.h b/paddle/fluid/operators/metrics/precision_recall_op.h deleted file mode 100644 index 8a276d2fa5a32..0000000000000 --- a/paddle/fluid/operators/metrics/precision_recall_op.h +++ /dev/null @@ -1,186 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -template -using EigenMatrix = framework::EigenMatrix; - -enum StateVariable { TP = 0, FP, TN, FN }; - -template -class PrecisionRecallKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* in0 = ctx.Input("Indices"); - auto* in1 = ctx.Input("Labels"); - auto* in2 = ctx.Input("Weights"); - auto* in3 = ctx.Input("StatesInfo"); - auto* out0 = ctx.Output("BatchMetrics"); - auto* out1 = ctx.Output("AccumMetrics"); - auto* out2 = ctx.Output("AccumStatesInfo"); - - const int* ids_data = in0->data(); - const int* labels_data = in1->data(); - size_t cls_num = static_cast(ctx.Attr("class_number")); - const T* weights_data = in2 ? in2->data() : nullptr; - const T* states_data = in3 ? in3->data() : nullptr; - double* batch_metrics_data = out0->mutable_data(ctx.GetPlace()); - double* accum_metrics_data = out1->mutable_data(ctx.GetPlace()); - out2->mutable_data(ctx.GetPlace()); - auto accum_states = EigenMatrix::From(*out2); - accum_states.setZero(); - T* accum_states_data = out2->data(); - - size_t sample_num = in0->dims()[0]; - size_t state_var_num = 4; // TP FP TN FN - - // get states info for current batch - for (size_t i = 0; i < sample_num; ++i) { - size_t idx = ids_data[i]; - size_t label = labels_data[i]; - - PADDLE_ENFORCE_GE( - idx, - 0, - phi::errors::InvalidArgument( - "Class index of each instance should be " - "greater than or equal to 0, But the index we received is %d", - idx)); - PADDLE_ENFORCE_LT(idx, - cls_num, - phi::errors::InvalidArgument( - "Class index of each instance should be less than " - "cls_num = %d, But the index we received is %d", - cls_num, - idx)); - - PADDLE_ENFORCE_GE(label, - 0, - phi::errors::InvalidArgument( - "Label of each instance should be greater than or " - "equal to 0, But the label we received is %d", - label)); - PADDLE_ENFORCE_LT(label, - cls_num, - phi::errors::InvalidArgument( - "Label of each instance should be less than " - "cls_num = %d, But the label we received is %d", - cls_num, - label)); - - T w = weights_data ? weights_data[i] : 1.0; - if (idx == label) { - accum_states_data[idx * state_var_num + TP] += w; - for (size_t j = 0; j < cls_num; ++j) { - accum_states_data[j * state_var_num + TN] += w; - } - accum_states_data[idx * state_var_num + TN] -= w; - } else { - accum_states_data[label * state_var_num + FN] += w; - accum_states_data[idx * state_var_num + FP] += w; - for (size_t j = 0; j < cls_num; ++j) { - accum_states_data[j * state_var_num + TN] += w; - } - accum_states_data[idx * state_var_num + TN] -= w; - accum_states_data[label * state_var_num + TN] -= w; - } - } - - ComputeMetrics( - accum_states_data, batch_metrics_data, state_var_num, cls_num); - - if (states_data) { - for (size_t i = 0; i < cls_num; ++i) { - for (size_t j = 0; j < state_var_num; ++j) { - size_t idx = i * state_var_num + j; - accum_states_data[idx] += states_data[idx]; - } - } - } - - ComputeMetrics( - accum_states_data, accum_metrics_data, state_var_num, cls_num); - } - - // expose to be reused - static inline T CalcPrecision(T tp_count, T fp_count) { - if (tp_count > 0.0 || fp_count > 0.0) { - return tp_count / (tp_count + fp_count); - } - return 1.0; - } - - static inline T CalcRecall(T tp_count, T fn_count) { - if (tp_count > 0.0 || fn_count > 0.0) { - return tp_count / (tp_count + fn_count); - } - return 1.0; - } - - static inline T CalcF1Score(T precision, T recall) { - if (precision > 0.0 || recall > 0.0) { - return 2 * precision * recall / (precision + recall); - } - return 0.0; - } - - protected: - void ComputeMetrics(const T* states_data, - double* metrics_data, - size_t state_var_num, - size_t cls_num) const { - T total_tp_count = 0; - T total_fp_count = 0; - T total_fn_count = 0; - T macro_avg_precision = 0.0; - T macro_avg_recall = 0.0; - - for (size_t i = 0; i < cls_num; ++i) { - T tp_count = states_data[i * state_var_num + TP]; - T fp_count = states_data[i * state_var_num + FP]; - T fn_count = states_data[i * state_var_num + FN]; - total_tp_count += tp_count; - total_fp_count += fp_count; - total_fn_count += fn_count; - macro_avg_precision += CalcPrecision(tp_count, fp_count); - macro_avg_recall += CalcRecall(tp_count, fn_count); - } - macro_avg_precision /= cls_num; - macro_avg_recall /= cls_num; - T macro_f1_score = CalcF1Score(macro_avg_precision, macro_avg_recall); - - T micro_avg_precision = CalcPrecision(total_tp_count, total_fp_count); - T micro_avg_recall = CalcRecall(total_tp_count, total_fn_count); - T micro_f1_score = CalcF1Score(micro_avg_precision, micro_avg_recall); - - // fill metrics data - metrics_data[0] = macro_avg_precision; - metrics_data[1] = macro_avg_recall; - metrics_data[2] = macro_f1_score; - metrics_data[3] = micro_avg_precision; - metrics_data[4] = micro_avg_recall; - metrics_data[5] = micro_f1_score; - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/metrics/unity_build_rule.cmake b/paddle/fluid/operators/metrics/unity_build_rule.cmake deleted file mode 100644 index dee8680cc93d3..0000000000000 --- a/paddle/fluid/operators/metrics/unity_build_rule.cmake +++ /dev/null @@ -1,7 +0,0 @@ -# This file records the Unity Build compilation rules. -# The source files in a `register_unity_group` called are compiled in a unity -# file. -# Generally, the combination rules in this file do not need to be modified. -# If there are some redefined error in compiling with the source file which -# in combination rule, you can remove the source file from the following rules. -register_unity_group(cc precision_recall_op.cc) diff --git a/paddle/fluid/operators/minus_op.cc b/paddle/fluid/operators/minus_op.cc deleted file mode 100644 index 1726a8f818ec1..0000000000000 --- a/paddle/fluid/operators/minus_op.cc +++ /dev/null @@ -1,162 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/minus_op.h" - -#include -#include -#include -#include - -namespace paddle { -namespace operators { - -class MinusOp : public framework::OperatorWithKernel { - public: - MinusOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : OperatorWithKernel(type, inputs, outputs, attrs) {} - - void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE_EQ( - ctx->HasInput("X"), - true, - phi::errors::NotFound("Input(X) of MinusOp is not found.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("Y"), - true, - phi::errors::NotFound("Input(Y) of MinusOp is not found.")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput("Out"), - true, - phi::errors::NotFound("Output(Out) of MinusOp is not found.")); - - auto x_dims = ctx->GetInputDim("X"); - auto y_dims = ctx->GetInputDim("Y"); - - if (ctx->IsRuntime() || - (common::product(x_dims) > 0 && common::product(y_dims) > 0)) { - PADDLE_ENFORCE_EQ( - x_dims, - y_dims, - phi::errors::InvalidArgument( - "Minus operator must take two tensor with same dim, but received " - "input X dim is:[%s], Y dim is:[%s]", - x_dims, - y_dims)); - } - ctx->SetOutputDim("Out", x_dims); - ctx->ShareLoD("X", /*->*/ "Out"); - } -}; - -class MinusOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "The left tensor of minus operator."); - AddInput("Y", "The right tensor of minus operator."); - AddOutput("Out", "The output tensor of minus operator."); - - AddComment(R"DOC( -Minus Operator. - -Equation: - - $Out = X - Y$ - -Both the input `X` and `Y` can carry the LoD (Level of Details) information, -or not. But the output only shares the LoD information with input `X`. - -)DOC"); - } -}; - -class MinusGradDescMaker : public framework::GradOpDescMakerBase { - public: - using framework::GradOpDescMakerBase::GradOpDescMakerBase; - - std::vector> operator()() const override { - std::vector> ops; - auto x_g = this->InputGrad("X"); - if (!x_g.empty()) { - auto *x_g_op = new framework::OpDesc(); - x_g_op->SetType("scale"); - x_g_op->SetInput("X", this->OutputGrad("Out")); - x_g_op->SetOutput("Out", x_g); - x_g_op->SetAttr("scale", 1.0f); - ops.emplace_back(x_g_op); - } - - auto y_g = this->InputGrad("Y"); - if (!y_g.empty()) { - auto *y_g_op = new framework::OpDesc(); - y_g_op->SetType("scale"); - y_g_op->SetInput("X", this->OutputGrad("Out")); - y_g_op->SetOutput("Out", y_g); - y_g_op->SetAttr("scale", -1.0f); - ops.emplace_back(y_g_op); - } - - return ops; - } -}; - -class MinusGradMaker : public imperative::GradOpBaseMakerBase { - public: - using imperative::GradOpBaseMakerBase::GradOpBaseMakerBase; - - std::shared_ptr operator()() const override { - auto x_g = this->InputGrad("X"); - auto y_g = this->InputGrad("Y"); - - auto node = this->NewGradNode(); - - if (!x_g.empty()) { - imperative::TracedGradOp op(node); - op.SetType("scale"); - op.SetInput("X", this->OutputGrad("Out")); - op.SetOutput("Out", x_g); - op.SetAttr("scale", 1.0f); - op.SetDefaultAttrsMap(DefaultAttrsMap()); - } - - if (!y_g.empty()) { - imperative::TracedGradOp op(node); - op.SetType("scale"); - op.SetInput("X", this->OutputGrad("Out")); - op.SetOutput("Out", y_g); - op.SetAttr("scale", -1.0f); - op.SetDefaultAttrsMap(DefaultAttrsMap()); - } - - return node; - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR(minus, - ops::MinusOp, - ops::MinusOpMaker, - ops::MinusGradDescMaker, - ops::MinusGradMaker); -PD_REGISTER_STRUCT_KERNEL(minus, CPU, ALL_LAYOUT, ops::MinusKernel, float) {} - -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -PD_REGISTER_STRUCT_KERNEL(minus, GPU, ALL_LAYOUT, ops::MinusKernel, float) {} -#endif diff --git a/paddle/fluid/operators/minus_op.h b/paddle/fluid/operators/minus_op.h deleted file mode 100644 index 2f900a2b16bc2..0000000000000 --- a/paddle/fluid/operators/minus_op.h +++ /dev/null @@ -1,43 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/kernels/funcs/eigen/eigen_function.h" - -namespace paddle { -namespace operators { - -template -class MinusKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* left_tensor = context.Input("X"); - auto* right_tensor = context.Input("Y"); - auto* out_tensor = context.Output("Out"); - - out_tensor->mutable_data(context.GetPlace()); - auto& dev = - *context.template device_context().eigen_device(); - phi::funcs::EigenSub, T>::Eval( - dev, - framework::EigenVector::Flatten(*out_tensor), - framework::EigenVector::Flatten(*left_tensor), - framework::EigenVector::Flatten(*right_tensor)); - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/nccl/CMakeLists.txt b/paddle/fluid/operators/nccl/CMakeLists.txt index 629b41b4b582b..e46aa52527c06 100644 --- a/paddle/fluid/operators/nccl/CMakeLists.txt +++ b/paddle/fluid/operators/nccl/CMakeLists.txt @@ -15,10 +15,3 @@ if(WITH_ROCM AND NOT WIN32) SRCS nccl_gpu_common.cc DEPS device_context operator) endif() - -if(WITH_GPU OR WITH_ROCM) - op_library(nccl_op DEPS nccl_common) - set(OPERATOR_DEPS - ${OPERATOR_DEPS} nccl_common - PARENT_SCOPE) -endif() diff --git a/paddle/fluid/operators/nccl/nccl_op.cc b/paddle/fluid/operators/nccl/nccl_op.cc deleted file mode 100644 index dd3fd52d3b24d..0000000000000 --- a/paddle/fluid/operators/nccl/nccl_op.cc +++ /dev/null @@ -1,254 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/nccl/nccl_gpu_common.h" - -namespace paddle { -namespace operators { - -static constexpr char kParallelScopes[] = "parallel_scopes"; // NOLINT - -// NCCLinitOp -class NCCLInitOp : public framework::OperatorBase { - public: - NCCLInitOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : OperatorBase(type, inputs, outputs, attrs) {} - - private: - void RunImpl(const framework::Scope &scope, - const platform::Place &place) const override { - PADDLE_ENFORCE_NOT_NULL( - scope.FindVar(Input(kParallelScopes)), - phi::errors::NotFound("Can not find variable '%s' in the scope.", - kParallelScopes)); - const auto &name = Output("Communicator"); - PADDLE_ENFORCE_NOT_NULL( - scope.FindVar(name), - phi::errors::NotFound("Output(%s) is needed for ncclInit operator.", - name)); - // A parallel do may not use all the gpus. For example, the batch size is 7 - // in the last batch while we have 8 gpu. In this case, parallel_do will - // create 7 parallel scopes, so should ncclInitOp create 7 gpu peers - auto ¶llel_scopes = scope.FindVar(Input(kParallelScopes)) - ->Get>(); - std::vector gpus(parallel_scopes.size()); - for (int i = 0; i < static_cast(parallel_scopes.size()); ++i) { - gpus[i] = i; - } - PADDLE_ENFORCE_EQ(!gpus.empty(), - true, - phi::errors::PreconditionNotMet( - "gpus is empty, NCCL must init with gpus")); - - platform::Communicator *comm = - scope.FindVar(name)->GetMutable(); - comm->InitAll(gpus); - } -}; - -class NCCLInitOpVarTypeInference : public framework::VarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - ctx->SetOutputType("Communicator", framework::proto::VarType::RAW); - } -}; - -class NCCLInitOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override {} -}; - -class NCCLInitOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput(kParallelScopes, "The working place of parallel do."); - AddOutput("Communicator", - "Create Communicator for communicating between gpus"); - AddComment(R"DOC( -NCCLInit Operator. - -Create communicator. - -)DOC"); - } -}; - -// AllReduceOp -class NCCLAllReduceOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - void InferShape(framework::InferShapeContext *ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "NCCLAllReduce"); - OP_INOUT_CHECK(ctx->HasInput("Communicator"), - "Input", - "Communicator", - "NCCLAllReduce"); - - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "NCCLAllReduce"); - - std::string reduction = ctx->Attrs().Get("reduction"); - PADDLE_ENFORCE_EQ((reduction == "ncclSum" || reduction == "ncclProd" || - reduction == "ncclMin" || reduction == "ncclMax"), - true, - phi::errors::InvalidArgument("invalid nccl reduction.")); - - auto x_dims = ctx->GetInputsDim("X"); - ctx->SetOutputsDim("Out", x_dims); - ctx->ShareLoD("X", /*->*/ "Out"); - } -}; - -// AllReduceOp -class NCCLAllReduceOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "The input of AllReduce op"); - AddInput("Communicator", "Communicator for communicating between gpus"); - AddOutput("Out", "The output of AllReduce op"); - AddAttr("reduction", - "(string, default 'ncclSum') " - "{'ncclMin', 'ncclMax', 'ncclProd', 'ncclSum'}.") - .SetDefault("ncclSum"); - AddComment(R"DOC( -NCCLAllReduce Operator. - -AllReduce the input tensors. - -)DOC"); - } -}; - -// ReduceOp -class NCCLReduceOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - void InferShape(framework::InferShapeContext *ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "NCCLReduce"); - OP_INOUT_CHECK( - ctx->HasInput("Communicator"), "Input", "Communicator", "NCCLReduce"); - - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "NCCLReduce"); - - std::string reduction = ctx->Attrs().Get("reduction"); - PADDLE_ENFORCE_EQ((reduction == "ncclSum" || reduction == "ncclProd" || - reduction == "ncclMin" || reduction == "ncclMax"), - true, - phi::errors::InvalidArgument("invalid nccl reduction.")); - - auto x_dims = ctx->GetInputsDim("X"); - ctx->SetOutputsDim("Out", x_dims); - ctx->ShareLoD("X", /*->*/ "Out"); - } -}; - -// ReduceOp -class NCCLReduceOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "The input of Reduce op"); - AddInput("Communicator", "Communicator for communicating between gpus"); - AddOutput("Out", "The output of Reduce op"); - AddAttr("reduction", - "(string, default 'ncclSum') " - "{'ncclMin', 'ncclMax', 'ncclProd', 'ncclSum'}.") - .SetDefault("ncclSum"); - AddAttr("root", - "(int, default kInvalidGPUId) " - "Root gpu of the parameter. If not, " - "set(platform::kInvalidGPUId). Hashed by name.") - .SetDefault(platform::kInvalidGPUId); - AddComment(R"DOC( -NCCLReduce Operator. - -Reduce the tensors. - -)DOC"); - } -}; - -// BcastOp -class NCCLBcastOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - void InferShape(framework::InferShapeContext *ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "NCCLBcast"); - OP_INOUT_CHECK( - ctx->HasInput("Communicator"), "Input", "Communicator", "NCCLBcast"); - - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "NCCLBcast"); - - int root = ctx->Attrs().Get("root"); - PADDLE_ENFORCE_EQ(root != platform::kInvalidGPUId, - true, - phi::errors::InvalidArgument("Bcast root must be set.")); - - auto x_dims = ctx->GetInputsDim("X"); - ctx->SetOutputsDim("Out", x_dims); - ctx->ShareLoD("X", /*->*/ "Out"); - } -}; - -// BcastOp -class NCCLBcastOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "The input of BcastSend op"); - AddInput("Communicator", "Communicator for communicating between gpus"); - AddOutput("Out", "The output of Bcast"); - AddAttr("root", - "(int, default kInvalidGPUId) " - "Root gpu of the parameter. If not, " - "set(platform::kInvalidGPUId). Hashed by name.") - .SetDefault(platform::kInvalidGPUId); - AddComment(R"DOC( -NCCLBcast Operator. - -Bcast the tensors. - -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - ncclInit, - ops::NCCLInitOp, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker, - ops::NCCLInitOpMaker, - ops::NCCLInitOpVarTypeInference, - ops::NCCLInitOpShapeInference); - -REGISTER_OP_WITHOUT_GRADIENT(ncclAllReduce, - ops::NCCLAllReduceOp, - ops::NCCLAllReduceOpMaker); -REGISTER_OP_WITHOUT_GRADIENT(ncclBcast, - ops::NCCLBcastOp, - ops::NCCLBcastOpMaker); -REGISTER_OP_WITHOUT_GRADIENT(ncclReduce, - ops::NCCLReduceOp, - ops::NCCLReduceOpMaker); diff --git a/paddle/fluid/operators/nccl/nccl_op.cu.cc b/paddle/fluid/operators/nccl/nccl_op.cu.cc deleted file mode 100644 index f1d6073a37231..0000000000000 --- a/paddle/fluid/operators/nccl/nccl_op.cu.cc +++ /dev/null @@ -1,180 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenseshashernless required by applicable law or agreed -to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include - -#include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/nccl/nccl_gpu_common.h" - -namespace paddle { -namespace operators { - -using platform::Communicator; - -template -class NCCLTypeWrapper; - -template <> -class NCCLTypeWrapper { - public: - static const ncclDataType_t type = ncclFloat; -}; - -template <> -class NCCLTypeWrapper { - public: - static const ncclDataType_t type = ncclDouble; -}; - -static ncclRedOp_t str_to_nccl_red_type(std::string reduction) { - static const std::unordered_map str_to_type = { - {"ncclSum", ncclSum}, - {"ncclMin", ncclMin}, - {"ncclMax", ncclMax}, - {"ncclProd", ncclProd}, - }; - auto it = str_to_type.find(reduction); - PADDLE_ENFORCE_EQ(it != str_to_type.end(), - true, - phi::errors::InvalidArgument( - "Invalid nccl reduction. Must be ncclMin | ncclMax | " - "ncclProd | ncclSum")); - return it->second; -} - -template -class NCCLAllReduceKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), - true, - phi::errors::PreconditionNotMet( - "This kernel only runs on GPU device.")); - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); - auto* comm = ctx.Input("Communicator"); - std::string reduction = ctx.Attr("reduction"); - - auto reduction_op_ = str_to_nccl_red_type(reduction); - - // device id - int gpu_id = ctx.GetPlace().GetDeviceId(); - int idx = comm->GetCommId(gpu_id); - VLOG(3) << "gpu : " - << " invoke allreduce. send " << x->numel() << " recv " - << out->numel(); - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::ncclAllReduce(x->data(), - out->mutable_data(ctx.GetPlace()), - out->numel(), - NCCLTypeWrapper::type, - reduction_op_, - comm->comms().at(idx), - ctx.cuda_device_context().stream())); - VLOG(3) << "gpu : " - << " finished allreduce. send " << x->numel() << " recv " - << out->numel(); - } -}; - -template -class NCCLReduceKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ( - platform::is_gpu_place(ctx.GetPlace()), - true, - phi::errors::InvalidArgument("This kernel only runs on GPU device.")); - auto x = ctx.Input("X"); // x0, x1, x2 - auto out = ctx.Output("Out"); - auto* comm = ctx.Input("Communicator"); - int root = ctx.Attr("root"); - std::string reduction = ctx.Attr("reduction"); - - auto reduction_op_ = str_to_nccl_red_type(reduction); - - // device id - int gpu_id = ctx.GetPlace().GetDeviceId(); - int idx = comm->GetCommId(gpu_id); - T* recvbuffer = nullptr; - if (root == gpu_id) { - recvbuffer = out->mutable_data(ctx.GetPlace()); - } else { - out->Resize(common::make_ddim({0})); - } - VLOG(3) << "gpu : " << gpu_id << " invoke reduce. send " << x->numel() - << " recv " << out->numel(); - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::ncclReduce(x->data(), - recvbuffer, - x->numel(), - NCCLTypeWrapper::type, - reduction_op_, - root, - comm->comms().at(idx), - ctx.cuda_device_context().stream())); - VLOG(3) << "gpu : " << gpu_id << " finished reduce. send " << x->numel() - << " recv " << out->numel(); - } -}; - -template -class NCCLBcastKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ( - platform::is_gpu_place(ctx.GetPlace()), - true, - phi::errors::InvalidArgument("This kernel only runs on GPU device.")); - int root = ctx.Attr("root"); - auto* comm = ctx.Input("Communicator"); - // device id - int gpu_id = ctx.GetPlace().GetDeviceId(); - int idx = comm->GetCommId(gpu_id); - if (idx == root) { - auto* x = ctx.Input("X"); - VLOG(3) << "gpu : " << gpu_id << " invoke Bcast. send " << x->numel(); - PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclBcast( - reinterpret_cast(const_cast(x->data())), - x->numel(), - NCCLTypeWrapper::type, - root, - comm->comms().at(idx), - ctx.cuda_device_context().stream())); - VLOG(3) << "gpu : " << gpu_id << " finished Bcast."; - } else { - auto* out = ctx.Output("Out"); - VLOG(3) << "gpu : " << gpu_id << " invoke Bcast. recv buffer " - << common::product(out->dims()); - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::ncclBcast(out->mutable_data(ctx.GetPlace()), - out->numel(), - NCCLTypeWrapper::type, - root, - comm->comms().at(idx), - ctx.cuda_device_context().stream())); - VLOG(3) << "gpu : " << gpu_id << " finished Bcast. recv " << out->numel(); - } - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -PD_REGISTER_STRUCT_KERNEL( - ncclAllReduce, GPU, ALL_LAYOUT, ops::NCCLAllReduceKernel, float) {} -PD_REGISTER_STRUCT_KERNEL( - ncclBcast, GPU, ALL_LAYOUT, ops::NCCLBcastKernel, float) {} -PD_REGISTER_STRUCT_KERNEL( - ncclReduce, GPU, ALL_LAYOUT, ops::NCCLReduceKernel, float) {} diff --git a/paddle/fluid/operators/rank_attention.cu.h b/paddle/fluid/operators/rank_attention.cu.h deleted file mode 100644 index 7077bd7a7aa4c..0000000000000 --- a/paddle/fluid/operators/rank_attention.cu.h +++ /dev/null @@ -1,218 +0,0 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "paddle/common/dim.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { - -const int CUDA_NUM_THREADS = 1024; -static inline int GET_BLOCKS(const int N) { - return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS; -} - -template -__global__ void expand_input_by_rank_kernel(const T* input, - int input_row, - int input_col, - T* output, - int output_row, - int output_col, - const int* rank_offset, - int rank_offset_row, - int rank_offset_col, - T* ins_rank, - int max_rank) { - CUDA_KERNEL_LOOP(idx, output_row * output_col) { - int output_col_idx = idx % output_col; - int output_row_idx = idx / output_col; - int k = output_col_idx / input_col; - - int faster = rank_offset[output_row_idx * rank_offset_col + 2 * k + 1] - 1; - if (output_col_idx == 0) { - ins_rank[output_row_idx] = rank_offset[output_row_idx * rank_offset_col]; - } - - if (rank_offset[output_row_idx * rank_offset_col] - 1 < 0 || faster < 0) { - continue; - } - - int rank_input_col_idx = output_col_idx % input_col; - int index = rank_offset[output_row_idx * rank_offset_col + 2 * k + 2]; - output[idx] = input[rank_input_col_idx + index * input_col]; - } -} - -template -void expand_rank_attention_input(gpuStream_t stream, - const T* input, - int input_row, - int input_col, - T* output, - int output_row, - int output_col, - const int* rank_offset, - int rank_offset_row, - int rank_offset_col, - T* ins_rank, - int max_rank) { - expand_input_by_rank_kernel<<>>(input, - input_row, - input_col, - output, - output_row, - output_col, - rank_offset, - rank_offset_row, - rank_offset_col, - ins_rank, - max_rank); -} - -template -__global__ void expand_rank_attention_param_kernel(const T* input, - int input_row, - int input_col, - const int* rank_offset, - int rank_offset_row, - int rank_offset_col, - const T* param, - int param_row, - int param_col, - T* output_param, - int output_param_row, - int output_param_col, - int max_rank) { - CUDA_KERNEL_LOOP(idx, output_param_row * output_param_col) { - int output_col_idx = idx % output_param_col; - int output_row_idx = idx / output_param_col; - - int block_matrix_row = max_rank * input_col; - int ins_idx = output_row_idx / block_matrix_row; - int start_offset = output_row_idx % block_matrix_row; - - int k = start_offset / input_col; - int k_offset = start_offset % input_col; - - int lower = rank_offset[ins_idx * rank_offset_col] - 1; - int faster = rank_offset[2 * k + 1 + rank_offset_col * ins_idx] - 1; - - if (lower < 0 || faster < 0) { - continue; - } - int start = lower * max_rank + faster; - int ori_idx = - start * param_col * input_col + k_offset * param_col + output_col_idx; - output_param[idx] = param[ori_idx]; - } -} - -template -void expand_rank_attention_param(gpuStream_t stream, - const T* input, - int input_row, - int input_col, - const int* rank_offset, - int rank_offset_row, - int rank_offset_col, - const T* param, - int param_row, - int param_col, - T* output_param, - int output_param_row, - int output_param_col, - int max_rank) { - expand_rank_attention_param_kernel<<>>(input, - input_row, - input_col, - rank_offset, - rank_offset_row, - rank_offset_col, - param, - param_row, - param_col, - output_param, - output_param_row, - output_param_col, - max_rank); -} - -template -__global__ void merge_param_gradient_kernel(T* expanded_grad, - int expanded_grad_row, - int expanded_grad_col, - T* param_grad, - int param_grad_row, - int param_grad_col, - const T* ins_rank, - int ins_num, - int max_rank, - int input_col) { - CUDA_KERNEL_LOOP(tid, param_grad_row * param_grad_col) { - int param_col_idx = tid % param_grad_col; - int param_row_idx = tid / param_grad_col; - - int block_matrix_row = max_rank * input_col; - int rank_idx = param_row_idx / block_matrix_row; - int rank_offset = param_row_idx % block_matrix_row; - - T tmp = 0; - for (int i = 0; i < ins_num; ++i) { - if (ins_rank[i] == rank_idx + 1) { - int row = i * block_matrix_row + rank_offset; - tmp += expanded_grad[row * expanded_grad_col + param_col_idx]; - } - } - param_grad[tid] = tmp; - } -} - -template -void merge_rank_attention_param_grad(gpuStream_t stream, - T* expanded_grad, - int expanded_grad_row, - int expanded_grad_col, - T* param_grad, - int param_grad_row, - int param_grad_col, - const T* ins_rank, - int ins_num, - int max_rank, - int input_col) { - merge_param_gradient_kernel<<>>(expanded_grad, - expanded_grad_row, - expanded_grad_col, - param_grad, - param_grad_row, - param_grad_col, - ins_rank, - ins_num, - max_rank, - input_col); -} - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/rank_attention_op.cc b/paddle/fluid/operators/rank_attention_op.cc deleted file mode 100644 index aaef2782f5e21..0000000000000 --- a/paddle/fluid/operators/rank_attention_op.cc +++ /dev/null @@ -1,211 +0,0 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/rank_attention_op.h" - -#include -#include -#include - -#include "paddle/fluid/framework/op_version_registry.h" - -namespace paddle { -namespace operators { - -class RankAttentionOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput("X"), - true, - phi::errors::InvalidArgument( - "Input(X) of RankAttentionOp should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("RankOffset"), - true, - phi::errors::InvalidArgument( - "Input(RankOffset) of RankAttentionOp should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("RankParam"), - true, - phi::errors::InvalidArgument( - "Input(RankParam) of RankAttentionOp should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput("InsRank"), - true, - phi::errors::InvalidArgument( - "Output(InsRank) of RankAttentionOp should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput("InputHelp"), - true, - phi::errors::InvalidArgument( - "Output(InputHelp) of RankAttentionOp should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput("Out"), - true, - phi::errors::InvalidArgument( - "Output(Out) of RankAttentionOp should not be null.")); - auto max_rank = ctx->Attrs().Get("MaxRank"); - - auto x_dims = ctx->GetInputDim("X"); - auto ins_num = x_dims[0]; - auto param_dims = ctx->GetInputDim("RankParam"); - auto para_col = param_dims[1]; - auto rank_offset_dims = ctx->GetInputDim("RankOffset"); - auto x_fea_dim = x_dims[1]; - auto block_matrix_row = max_rank * x_fea_dim; - - PADDLE_ENFORCE_EQ( - (rank_offset_dims[1] - 1) / 2, - max_rank, - phi::errors::InvalidArgument("Input(RankOffset) has wrong columns, " - "except columns to be %d, but got %d", - max_rank, - (rank_offset_dims[1] - 1) / 2)); - - ctx->SetOutputDim("Out", {ins_num, para_col}); - ctx->SetOutputDim("InputHelp", {ins_num, block_matrix_row}); - ctx->SetOutputDim("InsRank", {ins_num, 1}); - ctx->ShareLoD("X", /*->*/ "Out"); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), - ctx.GetPlace()); - } -}; - -class RankAttentionGradOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ( - ctx->HasInput("X"), - true, - phi::errors::InvalidArgument("Input(X) should not be null")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("RankParam"), - true, - phi::errors::InvalidArgument("Input(RankParam) should not be null")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("RankOffset"), - true, - phi::errors::InvalidArgument("Input(RankOffset) should not be null")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("InputHelp"), - true, - phi::errors::InvalidArgument("Input(InputHelp) should not be null")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("InsRank"), - true, - phi::errors::InvalidArgument("Input(InsRank) should not be null")); - - ctx->SetOutputDim(framework::GradVarName("RankParam"), - ctx->GetInputDim("RankParam")); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Out")), - ctx.GetPlace()); - } -}; - -class RankAttentionOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor) Input tensor of rank_attention_Op operator."); - AddInput("RankOffset", - "(Tensor) Input tensor of rank_attention_Op operator."); - AddInput("RankParam", - "(Tensor) Input tensor of rank_attention_Op operator."); - AddOutput("InputHelp", "Output tensor of rank_attention_Op operator.") - .AsDispensable(); - AddOutput("Out", "Output tensor of rank_attention_Op operator."); - AddOutput("InsRank", "Output tensor of rank_attention_Op operator.") - .AsDispensable(); - AddAttr("MaxRank", "(int, default 3) max rank of rank_attention_Op") - .SetDefault(3); - AddAttr("MaxSize", "(int, default 0) max rank of rank_attention_Op") - .SetDefault(0); - AddComment(R"DOC( -RankAttention Operator. -This Op can calculate rank attention between input and rank_param, -and rank_param gives the organization of data. Notice: It currently supports GPU device. -This Op exists in contrib, which means that it is not shown to the public. -)DOC"); - } -}; - -template -class RankAttentionGradOpMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("rank_attention_grad"); - - op->SetInput("X", this->Input("X")); - op->SetInput("RankOffset", this->Input("RankOffset")); - op->SetInput("RankParam", this->Input("RankParam")); - op->SetInput("InputHelp", this->Output("InputHelp")); - op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); - op->SetInput("InsRank", this->Output("InsRank")); - - op->SetOutput(framework::GradVarName("RankParam"), - this->InputGrad("RankParam")); - op->SetAttrMap(this->Attrs()); - } -}; -DECLARE_NO_NEED_BUFFER_VARS_INFERER( - RankAttentionGradOpNoNeedBufferVarsInference, - "X", - "RankOffset", - "RankParam"); - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR(rank_attention, - ops::RankAttentionOp, - ops::RankAttentionOpMaker, - ops::RankAttentionGradOpMaker, - ops::RankAttentionGradOpMaker); - -REGISTER_OPERATOR(rank_attention_grad, - ops::RankAttentionGradOp, - ops::RankAttentionGradOpNoNeedBufferVarsInference); - -PD_REGISTER_STRUCT_KERNEL( - rank_attention, CPU, ALL_LAYOUT, ops::RankAttentionKernel, float, double) {} - -REGISTER_OP_VERSION(rank_attention) - .AddCheckpoint( - R"ROC( - Upgrade rank_attention, add 1 outputs [InputHelp] and 1 attribute - [MaxSize]. - )ROC", - paddle::framework::compatible::OpVersionDesc() - .NewOutput("InputHelp", - "Output tensor of rank_attention_Op operator " - "in order to assist calculation in the reverse process.") - .NewAttr( - "MaxSize", - "Forward calculation to set the pre-applied video memory size", - 0)); diff --git a/paddle/fluid/operators/rank_attention_op.cu b/paddle/fluid/operators/rank_attention_op.cu deleted file mode 100644 index d73de790a527e..0000000000000 --- a/paddle/fluid/operators/rank_attention_op.cu +++ /dev/null @@ -1,258 +0,0 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/operators/rank_attention.cu.h" -#include "paddle/fluid/operators/rank_attention_op.h" -#include "paddle/fluid/platform/device/gpu/gpu_info.h" -#include "paddle/phi/backends/gpu/gpu_primitives.h" -#include "paddle/phi/kernels/funcs/blas/blas.h" - -namespace paddle { -namespace operators { - -template -class RankAttentionCUDAKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto *X = ctx.Input("X"); - auto *rank_offset = ctx.Input("RankOffset"); - auto *param = ctx.Input("RankParam"); - auto *input_help = ctx.Output("InputHelp"); - auto *ins_rank = ctx.Output("InsRank"); - int max_rank = ctx.Attr("MaxRank"); - int64_t max_size = ctx.Attr("MaxSize"); - auto *Out = ctx.Output("Out"); - - // check dims - auto x_dims = X->dims(); - auto ins_num = x_dims[0]; - auto x_fea_dim = x_dims[1]; - auto para_dims = param->dims(); - auto para_row = para_dims[0]; - auto para_col = para_dims[1]; - auto rank_offset_dims = rank_offset->dims(); - PADDLE_ENFORCE_EQ( - rank_offset_dims[0], - ins_num, - phi::errors::InvalidArgument("Input(RankOffset) has wrong rows.")); - PADDLE_ENFORCE_EQ( - (rank_offset_dims[1] - 1) / 2, - max_rank, - phi::errors::InvalidArgument("Input(RankOffset) has wrong columns.")); - PADDLE_ENFORCE_EQ( - max_rank * max_rank * x_fea_dim, - para_row, - phi::errors::InvalidArgument("Input(RankParam) has wrong rows.")); - - int block_matrix_row = max_rank * x_fea_dim; - - auto &dev_ctx = ctx.template device_context(); - - int max_ins = std::max(ins_num, max_size); - - phi::DenseTensor param_help; - param_help = ctx.AllocateTmpTensor( - {max_ins * block_matrix_row, para_col}, dev_ctx); - param_help.mutable_data(ctx.GetPlace()); - - input_help->Resize({max_ins, block_matrix_row}); - ins_rank->Resize({max_ins, 1}); - input_help->mutable_data(ctx.GetPlace()); - ins_rank->mutable_data(ctx.GetPlace()); - Out->mutable_data(ctx.GetPlace()); - - // initialize - auto param_help_eigen = framework::EigenVector::Flatten(param_help); - auto input_help_eigen = framework::EigenVector::Flatten(*input_help); - auto ins_rank_eigen = framework::EigenVector::Flatten(*ins_rank); - auto out_eigen = framework::EigenVector::Flatten(*Out); - - auto &place = - *ctx.template device_context().eigen_device(); - - param_help_eigen.device(place) = - param_help_eigen.constant(static_cast(0)); - input_help_eigen.device(place) = - input_help_eigen.constant(static_cast(0)); - ins_rank_eigen.device(place) = ins_rank_eigen.constant(static_cast(-1)); - out_eigen.device(place) = out_eigen.constant(static_cast(0)); - - // get data ptr - T *input_help_data = input_help->data(); - T *param_help_data = param_help.data(); - T *ins_rank_data = ins_rank->data(); - T *out_data = Out->data(); - - expand_rank_attention_input(ctx.cuda_device_context().stream(), - X->data(), - ins_num, - x_fea_dim, - input_help_data, - ins_num, - block_matrix_row, - rank_offset->data(), - rank_offset_dims[0], - rank_offset_dims[1], - ins_rank_data, - max_rank); - - expand_rank_attention_param(ctx.cuda_device_context().stream(), - X->data(), - ins_num, - x_fea_dim, - rank_offset->data(), - rank_offset_dims[0], - rank_offset_dims[1], - param->data(), - para_row, - para_col, - param_help_data, - ins_num * block_matrix_row, - para_col, - max_rank); - - CBLAS_TRANSPOSE transA = CblasNoTrans; - CBLAS_TRANSPOSE transB = CblasNoTrans; - - T alpha = 1; - T beta = 0; - int64_t strideA = block_matrix_row; - int64_t strideB = block_matrix_row * para_col; - - auto blas = phi::funcs::GetBlas(dev_ctx); - blas.BatchedGEMM(transA, - transB, - 1, - para_col, - block_matrix_row, - alpha, - input_help_data, - param_help_data, - beta, - out_data, - ins_num, - strideA, - strideB); - } -}; - -template -class RankAttentionGradOpCUDAKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto *X = ctx.Input("X"); // not use data - auto *rank_offset = - ctx.Input("RankOffset"); // not use data - auto *param = ctx.Input("RankParam"); // not use data - auto *input_help = ctx.Input("InputHelp"); - auto *ins_rank = ctx.Input("InsRank"); - auto *dout = ctx.Input(framework::GradVarName("Out")); - int64_t max_size = ctx.Attr("MaxSize"); - - auto *drank_para = - ctx.Output(framework::GradVarName("RankParam")); - - // get dim - auto x_dims = X->dims(); - auto ins_num = x_dims[0]; - auto x_fea_dim = x_dims[1]; - auto para_dims = param->dims(); - auto para_row = para_dims[0]; - auto para_col = para_dims[1]; - auto rank_offset_dims = rank_offset->dims(); - auto max_rank = (rank_offset_dims[1] - 1) / 2; - int block_matrix_row = max_rank * x_fea_dim; - auto &dev_ctx = ctx.template device_context(); - auto &place = - *ctx.template device_context().eigen_device(); - - int max_ins = std::max(ins_num, max_size); - // initialize out grad - drank_para->mutable_data(ctx.GetPlace()); - auto drank_para_eigen = framework::EigenVector::Flatten(*drank_para); - drank_para_eigen.device(place) = - drank_para_eigen.constant(static_cast(0)); - - // copy data - phi::DenseTensor param_grad; - param_grad = ctx.AllocateTmpTensor( - {max_ins * block_matrix_row, para_col}, dev_ctx); - param_grad.mutable_data(ctx.GetPlace()); - // initialize - auto param_grad_eigen = framework::EigenVector::Flatten(param_grad); - param_grad_eigen.device(place) = - param_grad_eigen.constant(static_cast(0)); - // get data ptr - const T *input_help_data = input_help->data(); - const T *ins_rank_data = ins_rank->data(); - T *param_grad_data = param_grad.data(); - - auto blas = phi::funcs::GetBlas(dev_ctx); - T alpha = 1; - T beta = 0; - - // get param_grad - CBLAS_TRANSPOSE transA = CblasTrans; - CBLAS_TRANSPOSE transB = CblasNoTrans; - int64_t strideA = block_matrix_row; - int64_t strideB = para_col; - blas.BatchedGEMM(transA, - transB, - block_matrix_row, - para_col, - 1, - alpha, - input_help_data, - dout->data(), - beta, - param_grad_data, - ins_num, - strideA, - strideB); - // merge param_grad to get drank_para - merge_rank_attention_param_grad(ctx.cuda_device_context().stream(), - param_grad_data, - ins_num * block_matrix_row, - para_col, - drank_para->data(), - para_row, - para_col, - ins_rank_data, - ins_num, - max_rank, - x_fea_dim); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -PD_REGISTER_STRUCT_KERNEL(rank_attention, - GPU, - ALL_LAYOUT, - ops::RankAttentionCUDAKernel, - float, - double) {} - -PD_REGISTER_STRUCT_KERNEL(rank_attention_grad, - GPU, - ALL_LAYOUT, - ops::RankAttentionGradOpCUDAKernel, - float, - double) {} diff --git a/paddle/fluid/operators/rank_attention_op.h b/paddle/fluid/operators/rank_attention_op.h deleted file mode 100644 index f119c4a2f315c..0000000000000 --- a/paddle/fluid/operators/rank_attention_op.h +++ /dev/null @@ -1,33 +0,0 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -template -class RankAttentionKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ( - platform::is_gpu_place(ctx.GetPlace()), - true, - phi::errors::Unimplemented("Rank Attention only supports GPU now.")); - } -}; -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/similarity_focus_op.cc b/paddle/fluid/operators/similarity_focus_op.cc deleted file mode 100644 index 4889dd9dfbf6b..0000000000000 --- a/paddle/fluid/operators/similarity_focus_op.cc +++ /dev/null @@ -1,99 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/similarity_focus_op.h" - -namespace paddle { -namespace operators { -class SimilarityFocusOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", - "(Tensor, default Tensor), a 4-D tensor with shape," - " [BatchSize, X, Y, Z]"); - AddOutput("Out", - "(Tensor, default Tensor), the similarity focus mask" - " with the same shape of input X."); - AddAttr("axis", - "(int32), indicating the dimension to be select. It can" - " only be 1, 2, or 3."); - AddAttr>("indexes", - "(std::vector), indicating the indexes" - " of the selected dimension."); - AddComment(R"DOC( -SimilarityFocus Operator. - -Generate a similarity focus mask with the same shape of input using the following method: -1. Extract the 3-D tensor(here the first dimension is BatchSize) corresponding - to the axis according to the indexes. For example, if axis=1 and indexes=[a], - it will get the matrix T=X[:, a, :, :]. In this case, if the shape of input X - is (BatchSize, A, B, C), the shape of tensor T is (BatchSize, B, C). -2. For each index, find the largest numbers in the tensor T, so that the same - row and same column has at most one number(what it means is that if the - largest number has been found in the i-th row and the j-th column, then - the numbers in the i-th row or j-th column will be skipped. And then the - next largest number will be selected from the remaining numbers. Obviously - there will be min(B, C) numbers), and mark the corresponding position of the - 3-D similarity focus mask as 1, otherwise as 0. Do elementwise-or for - each index. -3. Broadcast the 3-D similarity focus mask to the same shape of input X. - -Refer to `Similarity Focus Layer `_ -)DOC"); - } -}; - -class SimilarityFocusOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "SimilarityFocus"); - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "SimilarityFocus"); - - auto x_dims = ctx->GetInputDim("X"); - PADDLE_ENFORCE_EQ( - x_dims.size(), - 4, - phi::errors::InvalidArgument( - "The dimension size of Input(X) be 4, but received %d.", - x_dims.size())); - ctx->SetOutputDim("Out", x_dims); - ctx->ShareLoD("X", /*->*/ "Out"); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), - platform::CPUPlace()); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - similarity_focus, - ops::SimilarityFocusOp, - ops::SimilarityFocusOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); -PD_REGISTER_STRUCT_KERNEL(similarity_focus, - CPU, - ALL_LAYOUT, - ops::SimilarityFocusKernel, - float, - double) {} diff --git a/paddle/fluid/operators/similarity_focus_op.h b/paddle/fluid/operators/similarity_focus_op.h deleted file mode 100644 index eea1d1953a4b9..0000000000000 --- a/paddle/fluid/operators/similarity_focus_op.h +++ /dev/null @@ -1,187 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include -#include - -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -template -class SimilarityFocusKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - phi::DenseTensor* out = context.Output("Out"); - const phi::DenseTensor* x = context.Input("X"); - T* out_data = out->mutable_data(context.GetPlace()); - const T* x_data = x->data(); - - int axis = context.Attr("axis"); - std::vector indexes = context.Attr>("indexes"); - - int64_t batch_size = x->dims()[0]; - int64_t dim[4]; - for (int i = 1; i <= 3; ++i) { - dim[i] = x->dims()[i]; - } - - PADDLE_ENFORCE_GT( - indexes.size(), - 0, - phi::errors::InvalidArgument("The size of Attr(indexes) must be " - "greater than 0, but received %d.", - indexes.size())); - - for (size_t i = 0; i < indexes.size(); i++) { - PADDLE_ENFORCE_GT( - dim[axis], - indexes[i], - phi::errors::InvalidArgument( - "Each value of Attr(indexes) must be less than X.dim[axis], " - "but indexes[%d] received %d.", - i, - indexes[i])); - } - - int64_t array_size = 1; - for (int i = 1; i <= 3; ++i) { - if (i != axis) { - array_size *= dim[i]; - } - } - - std::vector> array(array_size); - - bool (*cmp)(std::pair, std::pair) = - [](std::pair x, std::pair y) { - return x.first > y.first; - }; - - int64_t (*compute_index)(int64_t*, int, int, int, int) = - [](int64_t* dim, int d1, int d2, int d3, int d4) { - return d1 * dim[1] * dim[2] * dim[3] + d2 * dim[2] * dim[3] + - d3 * dim[3] + d4; - }; - - PADDLE_ENFORCE_GT( - axis, - 0, - phi::errors::InvalidArgument( - "The value of Attr(axis) must be 1 or 2 or 3, but received %d.", - axis)); - PADDLE_ENFORCE_LT( - axis, - 4, - phi::errors::InvalidArgument( - "The value of Attr(axis) must be 1 or 2 or 3, but received %d.", - axis)); - memset(out_data, 0, sizeof(T) * batch_size * dim[1] * dim[2] * dim[3]); - for (int i = 0; i < batch_size; ++i) { - for (auto index : indexes) { - if (axis == 1) { - for (int j = 0; j < dim[2]; ++j) { - for (int k = 0; k < dim[3]; ++k) { - array[j * dim[3] + k] = std::make_pair( - x_data[compute_index(dim, i, index, j, k)], j * dim[3] + k); - } - } - - std::sort(array.begin(), array.end(), cmp); - int tag_num = 0; - std::vector tag2(dim[2]), tag3(dim[3]); - for (auto x : array) { - int idx2 = x.second / dim[3]; - int idx3 = x.second % dim[3]; - if (tag2[idx2] || tag3[idx3]) { - continue; - } - tag_num++; - tag2[idx2] = true; - tag3[idx3] = true; - for (int j = 0; j < dim[1]; ++j) { - out_data[compute_index(dim, i, j, idx2, idx3)] = 1; - } - if (tag_num == std::min(dim[2], dim[3])) { - break; - } - } - } else if (axis == 2) { - for (int j = 0; j < dim[1]; ++j) { - for (int k = 0; k < dim[3]; ++k) { - array[j * dim[3] + k] = std::make_pair( - x_data[compute_index(dim, i, j, index, k)], j * dim[3] + k); - } - } - - std::sort(array.begin(), array.end(), cmp); - int tag_num = 0; - std::vector tag1(dim[1]), tag3(dim[3]); - for (auto x : array) { - int idx1 = x.second / dim[3]; - int idx3 = x.second % dim[3]; - if (tag1[idx1] || tag3[idx3]) { - continue; - } - tag_num++; - tag1[idx1] = true; - tag3[idx3] = true; - for (int j = 0; j < dim[2]; ++j) { - out_data[compute_index(dim, i, idx1, j, idx3)] = 1; - } - if (tag_num == std::min(dim[1], dim[3])) { - break; - } - } - } else if (axis == 3) { - for (int j = 0; j < dim[1]; ++j) { - for (int k = 0; k < dim[2]; ++k) { - array[j * dim[2] + k] = std::make_pair( - x_data[compute_index(dim, i, j, k, index)], j * dim[2] + k); - } - } - - std::sort(array.begin(), array.end(), cmp); - int tag_num = 0; - std::vector tag1(dim[1]), tag2(dim[2]); - for (auto x : array) { - int idx1 = x.second / dim[2]; - int idx2 = x.second % dim[2]; - if (tag1[idx1] || tag2[idx2]) { - continue; - } - tag_num++; - tag1[idx1] = true; - tag2[idx2] = true; - for (int j = 0; j < dim[3]; ++j) { - out_data[compute_index(dim, i, idx1, idx2, j)] = 1; - } - if (tag_num == std::min(dim[1], dim[2])) { - break; - } - } - } - } - } - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/spp_op.cc b/paddle/fluid/operators/spp_op.cc deleted file mode 100644 index ad2ded506cd85..0000000000000 --- a/paddle/fluid/operators/spp_op.cc +++ /dev/null @@ -1,117 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -Indicesou may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/spp_op.h" - -#include -#include -namespace paddle { -namespace operators { - -class SppOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput( - "X", - "(Tensor) The input tensor of spp operator. " - "The format of input tensor is NCHW. Where N is batch size, C is the " - "number of channels, H and W is the height and width of feature."); - AddOutput("Out", - "(Tensor) The output tensor of spp operator." - "N * M." - "M = C * H * W"); - AddAttr("pyramid_height", "(int), multi level pooling"); - AddAttr( - "pooling_type", - "(string), pooling type, can be \"max\" for max-pooling " - "and \"avg\" for average-pooling.") - .InEnum({"max", "avg"}); - AddComment(R"DOC( - "With spatial pyramid pooling, the input image can - be of any sizes. This not only allows arbitrary aspect - ratios, but also allows arbitrary scales. We can resize - the input image to any scale (e.g., min(w, h)=180, 224, - ...) and apply the same deep network. When the - input image is at different scales, the network (with - the same filter sizes) will extract features at different - scales. The scales play important roles in traditional - methods. - Input shape: $(N, C_{in}, H_{in}, W_{in})$ - Output shape: $(H_{out}, W_{out})$ - Where - $$ - H_{out} = N \\ - W_{out} = (((4^pyramid_height) - 1) / (4 - 1))$ * C_{in} - $$ - paper https://arxiv.org/pdf/1406.4729v4.pdf - )DOC"); - } -}; - -class SppOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ( - ctx->HasInput("X"), - true, - phi::errors::InvalidArgument("Input(X) of SppOp should not be null.")); - PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), - true, - phi::errors::InvalidArgument( - "Output(Out) of SppOp should not be null.")); - auto in_x_dims = ctx->GetInputDim("X"); - int pyramid_height = ctx->Attrs().Get("pyramid_height"); - PADDLE_ENFORCE_EQ(in_x_dims.size(), - 4, - phi::errors::InvalidArgument( - "Spping intput must be of 4-dimensional.")); - int outlen = - ((std::pow(4, pyramid_height) - 1) / (4 - 1)) * in_x_dims[1]; // NOLINT - std::vector output_shape({in_x_dims[0], outlen}); - ctx->SetOutputDim("Out", common::make_ddim(output_shape)); - } -}; - -class SppOpGrad : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ( - ctx->HasInput("X"), - true, - phi::errors::InvalidArgument("Input(X) must not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput(framework::GradVarName("X")), - true, - phi::errors::InvalidArgument("Input(X@GRAD) should not be null.")); - ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - spp, - ops::SppOp, - ops::SppOpMaker, - paddle::framework::DefaultGradOpMaker, - paddle::framework::DefaultGradOpMaker); -REGISTER_OPERATOR(spp_grad, ops::SppOpGrad); - -PD_REGISTER_STRUCT_KERNEL(spp, CPU, ALL_LAYOUT, ops::SppKernel, float, double) { -} -PD_REGISTER_STRUCT_KERNEL( - spp_grad, CPU, ALL_LAYOUT, ops::SppGradKernel, float, double) {} diff --git a/paddle/fluid/operators/spp_op.cu.cc b/paddle/fluid/operators/spp_op.cu.cc deleted file mode 100644 index b41fa8ae5fcf7..0000000000000 --- a/paddle/fluid/operators/spp_op.cu.cc +++ /dev/null @@ -1,22 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -Indicesou may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/spp_op.h" - -namespace ops = paddle::operators; - -PD_REGISTER_STRUCT_KERNEL(spp, GPU, ALL_LAYOUT, ops::SppKernel, float, double) { -} -PD_REGISTER_STRUCT_KERNEL( - spp_grad, GPU, ALL_LAYOUT, ops::SppGradKernel, float, double) {} diff --git a/paddle/fluid/operators/spp_op.h b/paddle/fluid/operators/spp_op.h deleted file mode 100644 index 5d3f4a78020a0..0000000000000 --- a/paddle/fluid/operators/spp_op.h +++ /dev/null @@ -1,220 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -Indicesou may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/phi_utils.h" -#include "paddle/phi/kernels/funcs/math_function.h" -#include "paddle/phi/kernels/funcs/pooling.h" -#include "paddle/phi/kernels/funcs/strided_memcpy.h" - -namespace paddle { -namespace operators { -template -class SppKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - const phi::DenseTensor* in_x = context.Input("X"); - auto* out = context.Output("Out"); - int pyramid_height = context.template Attr("pyramid_height"); - std::string pooling_type = - context.template Attr("pooling_type"); - out->mutable_data(context.GetPlace()); - auto out_stride = common::stride(out->dims()); - int input_h = in_x->dims()[2]; - int input_w = in_x->dims()[3]; - size_t output_offset = 0; - for (int p = 0; p < pyramid_height; ++p) { - int bins = std::pow(2, p); - int kernel_size_h = std::ceil(input_h / static_cast(bins)); - int kernel_size_w = std::ceil(input_w / static_cast(bins)); - int padding_h = (kernel_size_h * bins - input_h + 1) / 2; - int padding_w = (kernel_size_w * bins - input_w + 1) / 2; - std::vector kernel_size({kernel_size_h, kernel_size_w}); - std::vector strides({kernel_size_h, kernel_size_w}); - std::vector paddings({padding_h, padding_w}); - // pooling output shape - phi::DenseTensor out_level; - std::vector output_shape_vec( - {in_x->dims()[0], in_x->dims()[1], bins, bins}); - framework::DDim output_shape(common::make_ddim(output_shape_vec)); - out_level.mutable_data(output_shape, context.GetPlace()); - // pooling - if (pooling_type == "max") { - phi::funcs::Pool2dFunctor< - typename framework::ConvertToPhiContext::TYPE, - phi::funcs::MaxPool, - T> - pool_forward; - phi::funcs::MaxPool max_process; - pool_forward(context.template device_context(), - *in_x, - kernel_size, - strides, - paddings, - true, - false, - &out_level, - max_process); - } else if (pooling_type == "avg") { - phi::funcs::Pool2dFunctor< - typename framework::ConvertToPhiContext::TYPE, - phi::funcs::AvgPool, - T> - pool_forward; - phi::funcs::AvgPool avg_process; - pool_forward(context.template device_context(), - *in_x, - kernel_size, - strides, - paddings, - true, - false, - &out_level, - avg_process); - } - // flatten pooling output shape - int output_flatten_w = in_x->dims()[1] * bins * bins; - std::vector output_flatten_shape_vec( - {in_x->dims()[0], output_flatten_w}); - framework::DDim output_flatten_shape( - common::make_ddim(output_flatten_shape_vec)); - out_level.Resize(output_flatten_shape); - // concat - auto out_level_stride = common::stride(out_level.dims()); - phi::funcs::StridedMemcpy( - context.template device_context(), - out_level.data(), - out_level_stride, - out_level.dims(), - out_stride, - out->data() + output_offset); - output_offset += out_level.dims()[1] * out_level_stride[1]; - } - } -}; -template -class SppGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - const phi::DenseTensor* in_x = context.Input("X"); - const phi::DenseTensor* out = context.Input("Out"); - const phi::DenseTensor* out_grad = - context.Input(framework::GradVarName("Out")); - phi::DenseTensor* in_x_grad = - context.Output(framework::GradVarName("X")); - int pyramid_height = context.template Attr("pyramid_height"); - std::string pooling_type = - context.template Attr("pooling_type"); - auto& device_ctx = context.template device_context(); - phi::funcs::SetConstant< - typename framework::ConvertToPhiContext::TYPE, - T> - zero; - in_x_grad->mutable_data(context.GetPlace()); - zero(device_ctx, in_x_grad, static_cast(0)); - auto out_stride = common::stride(out->dims()); - int input_h = in_x->dims()[2]; - int input_w = in_x->dims()[3]; - size_t out_offset = 0; - for (int p = 0; p < pyramid_height; ++p) { - int bins = std::pow(2, p); - int kernel_size_h = std::ceil(input_h / static_cast(bins)); - int kernel_size_w = std::ceil(input_w / static_cast(bins)); - int padding_h = (kernel_size_h * bins - input_h + 1) / 2; - int padding_w = (kernel_size_w * bins - input_w + 1) / 2; - std::vector kernel_size({kernel_size_h, kernel_size_w}); - std::vector strides({kernel_size_h, kernel_size_w}); - std::vector paddings({padding_h, padding_w}); - // split out and outgrad ... to flatten - phi::DenseTensor out_level; - phi::DenseTensor outgrad_level; - int out_flatten_w = in_x->dims()[1] * bins * bins; - std::vector out_flatten_shape_vec( - {in_x->dims()[0], out_flatten_w}); - framework::DDim out_flatten_shape( - common::make_ddim(out_flatten_shape_vec)); - out_level.mutable_data(out_flatten_shape, context.GetPlace()); - outgrad_level.mutable_data(out_flatten_shape, context.GetPlace()); - auto flatten_stride = common::stride(out_level.dims()); - // memcpy - phi::funcs::StridedMemcpy( - context.template device_context(), - out->data() + out_offset, - out_stride, - out_level.dims(), - flatten_stride, - out_level.data()); - - phi::funcs::StridedMemcpy( - context.template device_context(), - out_grad->data() + out_offset, - out_stride, - outgrad_level.dims(), - flatten_stride, - outgrad_level.data()); - out_offset += out_level.dims()[1] * out_stride[1]; - // flatten backward to nchw - - std::vector out_shape_vec({in_x->dims()[0], in_x->dims()[1]}); - out_shape_vec.push_back( - (input_h - kernel_size_h + 2 * padding_h) / kernel_size_h + 1); - out_shape_vec.push_back( - (input_w - kernel_size_w + 2 * padding_w) / kernel_size_w + 1); - framework::DDim out_shape(common::make_ddim(out_shape_vec)); - out_level.ShareDataWith(out_level); - out_level.Resize(out_shape); - outgrad_level.ShareDataWith(outgrad_level); - outgrad_level.Resize(out_shape); - // pooling backward - if (pooling_type == "max") { - phi::funcs::MaxPool2dGradFunctor< - typename framework::ConvertToPhiContext::TYPE, - T> - pool2d_backward; - pool2d_backward(context.template device_context(), - *in_x, - *&out_level, - *&outgrad_level, - kernel_size, - strides, - paddings, - in_x_grad); - } else if (pooling_type == "avg") { - phi::funcs::Pool2dGradFunctor< - typename framework::ConvertToPhiContext::TYPE, - phi::funcs::AvgPoolGrad, - T> - pool_backward; - phi::funcs::AvgPoolGrad avg_process; - pool_backward(context.template device_context(), - *in_x, - *&out_level, - *&outgrad_level, - kernel_size, - strides, - paddings, - true, - false, - in_x_grad, - avg_process); - } - } - } -}; -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/tdm_child_op.cc b/paddle/fluid/operators/tdm_child_op.cc deleted file mode 100644 index e14dc0e316219..0000000000000 --- a/paddle/fluid/operators/tdm_child_op.cc +++ /dev/null @@ -1,130 +0,0 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#include "paddle/fluid/operators/tdm_child_op.h" - -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/sampler.h" -#include "paddle/fluid/platform/enforce.h" - -namespace paddle { -namespace operators { -class TDMChildOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", - "X(Tensor), dtype support int32/int64, X variable is the " - "node id of TDM-Tree"); - AddInput( - "TreeInfo", - "TreeInfo(Tensor), dtype support int32/int64, it stores the node " - "information in the following format: item_id(shape=1), " - "layer_id(shape=1), parent_id(shape=1), child_id(shape=child_nums)"); - AddAttr("child_nums", - "child_nums(int)" - "The child nums of one node, if the node hasn't enough child, " - "it should padding 0 until child nums equal to child_nums"); - AddOutput("Child", - "Return the children's node_id of input node, " - "if input don't have child, return 0"); - AddOutput("LeafMask", - "LeafMask has the same shape with Child" - "If child is leaf node, LeafMask value = 1, else = 0"); - AddAttr("dtype", - "(int, default INT32) " - "Output data type.") - .SetDefault(2); - AddComment(R"DOC(" - **Tdm Child** - According to the input node_id on the given tree, return the corresponding child node_id and - whether child is a leaf node by LeafMask.")DOC"); - } -}; - -class TDMChildOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput("X"), - true, - phi::errors::InvalidArgument( - "Inputs(X) of TdmChild should not be null.")); - PADDLE_ENFORCE_EQ(ctx->HasInput("TreeInfo"), - true, - phi::errors::InvalidArgument( - "Inputs(TreeInfo) of TdmChild should not be null.")); - - int child_nums = ctx->Attrs().Get("child_nums"); - PADDLE_ENFORCE_GT( - child_nums, - 0, - phi::errors::InvalidArgument( - "ValueError: The value of the 'child_nums' must greater than 0. " - "But received child_nums value = %d, ", - child_nums)); - - auto info_dims = ctx->GetInputDim("TreeInfo"); - auto input_dims = ctx->GetInputDim("X"); - - PADDLE_ENFORCE_EQ( - info_dims.size(), - 2, - phi::errors::InvalidArgument( - "ShapeError: The dimensions of the 'tree info' must be 2. " - "But received tree info's dimensions = %d, " - "tree info's shape = [%s].", - info_dims.size(), - info_dims)); - - auto output_dims = common::vectorize(input_dims); - output_dims.push_back(child_nums); - ctx->SetOutputDim("Child", common::make_ddim(output_dims)); - ctx->SetOutputDim("LeafMask", common::make_ddim(output_dims)); - - if (ctx->GetOutputsVarType("Child")[0] == - framework::proto::VarType::LOD_TENSOR) { - ctx->ShareLoD("X", /*->*/ "Child"); - ctx->ShareLoD("X", /*->*/ "LeafMask"); - } - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); - return phi::KernelKey(data_type, ctx.GetPlace()); - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OPERATOR( - tdm_child, - ops::TDMChildOp, - ops::TDMChildOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); - -PD_REGISTER_STRUCT_KERNEL(tdm_child, - CPU, - ALL_LAYOUT, - ops::TDMChildKernel, - float, - double, - int, - int64_t) {} diff --git a/paddle/fluid/operators/tdm_child_op.h b/paddle/fluid/operators/tdm_child_op.h deleted file mode 100644 index 3380062743047..0000000000000 --- a/paddle/fluid/operators/tdm_child_op.h +++ /dev/null @@ -1,189 +0,0 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "paddle/common/flags.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/core/mixed_vector.h" - -namespace paddle { -namespace operators { - -using DDim = framework::DDim; -using LoD = framework::LoD; - -template -void TDMChildInner(const framework::ExecutionContext &context, - const phi::DenseTensor &input, - const phi::DenseTensor &tree_info, - phi::DenseTensor *child, - phi::DenseTensor *mask) { - auto child_nums = context.Attr("child_nums"); - auto info_dims = tree_info.dims(); - int node_nums = info_dims[0]; - int length = info_dims[1]; - - int input_ids_num = input.numel(); - VLOG(4) << "TDM child op: input numel -> " << input_ids_num; - - std::vector child_vec{}; - std::vector item_mask_vec{}; - - auto *input_data = input.data(); - auto *tree_info_data = tree_info.data(); - - // TreeInfo: node_id : item_id; layer_id; ancestor_id; child_id - for (int input_ids = 0; input_ids < input_ids_num; ++input_ids) { - PADDLE_ENFORCE_LT( - input_data[input_ids], - node_nums, - phi::errors::InvalidArgument( - "input id of OP(paddle.incubate.layers.tdm_child) " - "expected >= 0 and < %ld, but got %ld. Please check input " - "value.", - node_nums, - input_data[input_ids])); - PADDLE_ENFORCE_LE( - 0, - input_data[input_ids], - phi::errors::InvalidArgument( - "input id of OP(paddle.incubate.layers.tdm_child) " - "expected >= 0 and < %ld, but got %ld. Please check input " - "value.", - node_nums, - input_data[input_ids])); - - bool has_child = - (input_data[input_ids] == 0 || - tree_info_data[static_cast(input_data[input_ids]) * length + 3] == - 0) - ? false - : true; - - if (has_child) { - for (int child_ids = 0; child_ids < child_nums; ++child_ids) { - OutT child_id = static_cast( - tree_info_data[static_cast(input_data[input_ids]) * length + - 3 + child_ids]); - child_vec.push_back(child_id); - OutT child_is_item = static_cast( - tree_info_data[static_cast(child_id) * length] == 0 ? 0 : 1); - item_mask_vec.push_back(child_is_item); - } - } else { - for (int child_ids = 0; child_ids < child_nums; ++child_ids) { - child_vec.push_back(0); - item_mask_vec.push_back(0); - } - } - } - - int output_nums = child_vec.size(); - auto *child_data = child->mutable_data(context.GetPlace()); - auto *leaf_mask_data = mask->mutable_data(context.GetPlace()); - - memcpy(child_data, &child_vec[0], sizeof(OutT) * output_nums); - memcpy(leaf_mask_data, &item_mask_vec[0], sizeof(OutT) * output_nums); -} - -template -class TDMChildKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto *input_var = ctx.InputVar("X"); - auto *tree_info_var = ctx.InputVar("TreeInfo"); - - auto &input_tensor = input_var->Get(); - const auto &input_type = - framework::TransToProtoVarType(input_tensor.dtype()); - bool input_type_match = input_type == framework::proto::VarType::INT32 || - input_type == framework::proto::VarType::INT64; - PADDLE_ENFORCE_EQ(input_type_match, - true, - phi::errors::InvalidArgument( - "Input(X) holds the wrong type, it holds %s, but " - "desires to be %s or %s", - paddle::framework::DataTypeToString(input_type), - paddle::framework::DataTypeToString( - framework::proto::VarType::INT32), - paddle::framework::DataTypeToString( - framework::proto::VarType::INT64))); - - auto &tree_info_tensor = tree_info_var->Get(); - const auto &info_type = - framework::TransToProtoVarType(tree_info_tensor.dtype()); - bool info_type_match = info_type == framework::proto::VarType::INT32 || - info_type == framework::proto::VarType::INT64; - PADDLE_ENFORCE_EQ( - info_type_match, - true, - phi::errors::InvalidArgument( - "Input(TreeInfo) holds the wrong type, it holds %s, but " - "desires to be %s or %s", - paddle::framework::DataTypeToString(info_type), - paddle::framework::DataTypeToString( - framework::proto::VarType::INT32), - paddle::framework::DataTypeToString( - framework::proto::VarType::INT64))); - - auto *child_var = ctx.OutputVar("Child"); - auto *leaf_mask_var = ctx.OutputVar("LeafMask"); - auto *child_tensor = child_var->GetMutable(); - auto *leaf_mask_tensor = leaf_mask_var->GetMutable(); - - auto output_type = - static_cast(ctx.Attr("dtype")); - bool out_type_match = output_type == framework::proto::VarType::INT32 || - output_type == framework::proto::VarType::INT64; - PADDLE_ENFORCE_EQ(out_type_match, - true, - phi::errors::InvalidArgument( - "Output(Child) & Output(LeafMask) holds the wrong " - "type, it holds %s, but " - "desires to be %s or %s", - paddle::framework::DataTypeToString(output_type), - paddle::framework::DataTypeToString( - framework::proto::VarType::INT32), - paddle::framework::DataTypeToString( - framework::proto::VarType::INT64))); - - if (info_type == framework::proto::VarType::INT32 && - output_type == framework::proto::VarType::INT32) { - TDMChildInner( - ctx, input_tensor, tree_info_tensor, child_tensor, leaf_mask_tensor); - } else if (info_type == framework::proto::VarType::INT64 && - output_type == framework::proto::VarType::INT32) { - TDMChildInner( - ctx, input_tensor, tree_info_tensor, child_tensor, leaf_mask_tensor); - } else if (info_type == framework::proto::VarType::INT32 && - output_type == framework::proto::VarType::INT64) { - TDMChildInner( - ctx, input_tensor, tree_info_tensor, child_tensor, leaf_mask_tensor); - } else if (info_type == framework::proto::VarType::INT64 && - output_type == framework::proto::VarType::INT64) { - TDMChildInner( - ctx, input_tensor, tree_info_tensor, child_tensor, leaf_mask_tensor); - } - } -}; -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc b/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc deleted file mode 100644 index 29344b1ace0b0..0000000000000 --- a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc +++ /dev/null @@ -1,263 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/teacher_student_sigmoid_loss_op.h" - -#include - -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { - -class TeacherStudentSigmoidLossOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK( - ctx->HasInput("X"), "Input", "X", "teacher_student_sigmoid_loss"); - OP_INOUT_CHECK(ctx->HasInput("Label"), - "Input", - "Label", - "teacher_student_sigmoid_loss"); - OP_INOUT_CHECK( - ctx->HasOutput("Y"), "Output", "Y", "teacher_student_sigmoid_loss"); - - auto x_dims = ctx->GetInputDim("X"); - auto label_dims = ctx->GetInputDim("Label"); - PADDLE_ENFORCE_EQ(x_dims.size(), - 2UL, - phi::errors::InvalidArgument( - "Input(X)'s rank should be 2. But received: " - "Input(X)'s rank is [%d]", - x_dims.size())); - PADDLE_ENFORCE_EQ( - label_dims.size(), - 2UL, - phi::errors::InvalidArgument("Input(Label)'s rank should be 2. But " - "received Input(Label)'s rank is [%d]", - label_dims.size())); - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ( - x_dims[0], - label_dims[0], - phi::errors::InvalidArgument( - "The 1st dimension of Input(X) and Input(Label) should " - "be equal. The difference is [%d]: [%d]", - x_dims[0], - label_dims[0])); - PADDLE_ENFORCE_EQ( - label_dims[1], - 1UL, - phi::errors::InvalidArgument("The 2nd dimension of " - "Input(Label) should be 1. But received " - "Input(Label)'s 2nd dim is [%d]", - label_dims[1])); - } - ctx->SetOutputDim("Y", {x_dims[0], 1}); - ctx->ShareLoD("X", /*->*/ "Y"); - } - - protected: - // Explicitly set that the data type of computation kernel of - // teacher_student_sigmoid_loss - // is determined by its input "X". - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), - ctx.GetPlace()); - } -}; - -template -class TeacherStudentSigmoidLossGradOpMaker - : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("teacher_student_sigmoid_loss_grad"); - - op->SetInput("X", this->Input("X")); - op->SetInput("Label", this->Input("Label")); - op->SetInput(framework::GradVarName("Y"), this->OutputGrad("Y")); - - op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); - - op->SetAttrMap(this->Attrs()); - } -}; - -class TeacherStudentSigmoidLossGradientOp - : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK( - ctx->HasInput("X"), "Input", "X", "teacher_student_sigmoid_loss_grad"); - OP_INOUT_CHECK(ctx->HasInput("Label"), - "Input", - "X", - "teacher_student_sigmoid_loss_grad"); - OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Y")), - "Input", - "Y@Grad", - "teacher_student_sigmoid_loss_grad"); - OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("X")), - "Input", - "X@Grad", - "teacher_student_sigmoid_loss_grad"); - - auto x_dims = ctx->GetInputDim("X"); - auto label_dims = ctx->GetInputDim("Label"); - auto dy_dims = ctx->GetInputDim(framework::GradVarName("Y")); - PADDLE_ENFORCE_EQ( - x_dims.size(), - 2, - phi::errors::InvalidArgument( - "Input(X)'s rank should be 2. But received Input(X)'s rank is [%d]", - x_dims.size())); - PADDLE_ENFORCE_EQ(dy_dims.size(), - 2, - phi::errors::InvalidArgument( - "Input(Y@Grad)'s rank should be 2. But received " - "Input(Y@Grad)'s rank is [%d]", - dy_dims.size())); - PADDLE_ENFORCE_EQ(label_dims.size(), - 2, - phi::errors::InvalidArgument( - "Input(Label)'s rank should be 2. But received " - "Input(Y@Grad)'s rank is [%d]", - label_dims.size())); - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ( - x_dims[0], - label_dims[0], - phi::errors::InvalidArgument( - "The 1st dimension of Input(X) and Input(Label) should " - "be equal. The difference is [%d]: [%d]", - x_dims[0], - label_dims[0])); - PADDLE_ENFORCE_EQ( - x_dims[0], - dy_dims[0], - phi::errors::InvalidArgument( - "The 1st dimension of Input(X) and Input(Y@Grad) should " - "be equal. The difference is [%d]: [%d]", - x_dims[0], - dy_dims[0])); - PADDLE_ENFORCE_EQ( - dy_dims[1], - 1, - phi::errors::InvalidArgument( - "The 2nd dimension of Input(Y@Grad) should be 1. " - "But received Input(Y@Grad)'s 2nd dimension is [%d]", - dy_dims[1])); - PADDLE_ENFORCE_EQ( - label_dims[1], - 1, - phi::errors::InvalidArgument( - "When Attr(soft_label) == false, the 2nd dimension of " - "Input(Label) should be 1. But received Input(Label)'s 2nd " - "dimension " - "is [%d]", - label_dims[1])); - } - ctx->SetOutputDim(framework::GradVarName("X"), x_dims); - ctx->ShareLoD("X", framework::GradVarName("X")); - } - - protected: - // Explicitly set that the data type of computation kernel of - // teacher_student_sigmoid_loss - // is determined by its input "X". - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), - ctx.GetPlace()); - } -}; - -class TeacherStudentSigmoidLossOpMaker - : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", - "(phi::DenseTensor, default phi::DenseTensor), a 2-D " - "tensor with shape [N x 1]," - " where N is the batch size and D is the output. " - "This input is a probability computed by the previous operator, " - "which is almost always the result of a softmax operator."); - AddInput("Label", - "(phi::DenseTensor), the ground truth which is a 2-D tensor. " - "Label is a phi::DenseTensor with shape [N x 1]. "); - AddOutput("Y", - "(phi::DenseTensor, default phi::DenseTensor), a 2-D " - "tensor with shape " - "[N x 1]. The teacher student sigmoid loss."); - AddAttr( - "soft_max_up_bound", - "fp32, if input > soft_max_up_bound, input will be bound, default 15.0") - .SetDefault(15.0); - AddAttr("soft_max_lower_bound", - "fp32, if input < soft_max_lower_bound, input will be " - "bound, default -15.0") - .SetDefault(-15.0); - AddComment(R"DOC( -TeacherStudentSigmoidLoss Operator. - -It's similarity to SigmoidCrossEntropyWithLogits Operator. The difference is that -we add another label(z') to original. - loss = max(x, 0) - x * z + log(1 + exp(-abs(x))) + max(x, 0) - x * z' + log(1 + exp(-abs(x))) - z is click or not - z' is teacher value - label = {-2, -1, [0, 2]} - when z' is not exist, clk = 0 : label = -2; - when z' is not exist, clk = 1 : label = -1; - when z' is exist , clk = 0 : label = 0 + z'; - when z' is exist , clk = 1 : label = 1 + z'; - -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - teacher_student_sigmoid_loss, - ops::TeacherStudentSigmoidLossOp, - ops::TeacherStudentSigmoidLossOpMaker, - ops::TeacherStudentSigmoidLossGradOpMaker, - ops::TeacherStudentSigmoidLossGradOpMaker); - -REGISTER_OPERATOR(teacher_student_sigmoid_loss_grad, - ops::TeacherStudentSigmoidLossGradientOp); - -PD_REGISTER_STRUCT_KERNEL(teacher_student_sigmoid_loss, - CPU, - ALL_LAYOUT, - ops::TeacherStudentSigmoidLossOpKernel, - float, - double) {} -PD_REGISTER_STRUCT_KERNEL(teacher_student_sigmoid_loss_grad, - CPU, - ALL_LAYOUT, - ops::TeacherStudentSigmoidLossGradOpKernel, - float, - double) {} diff --git a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.h b/paddle/fluid/operators/teacher_student_sigmoid_loss_op.h deleted file mode 100644 index 7ccb9438d4188..0000000000000 --- a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.h +++ /dev/null @@ -1,118 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -template -class TeacherStudentSigmoidLossOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - phi::DenseTensor* y = context.Output("Y"); - const phi::DenseTensor* x = context.Input("X"); - const phi::DenseTensor* labels = context.Input("Label"); - T* y_data = y->mutable_data(context.GetPlace()); - const T* x_data = x->data(); - const T* label_data = labels->data(); - int64_t batch_size = x->dims()[0]; - // loss = max(x, 0) - x * z + log(1 + exp(-abs(x))) + max(x, 0) - x * z' + - // log(1 + exp(-abs(x))) - // z is click or not - // z' is value q of feed_fine - // label = {-2, -1, [0, 2]} - // when z' is not exist, clk = 0 : label = -2; - // when z' is not exist, clk = 1 : label = -1; - // when z' is exist , clk = 0 : label = 0 + z'; - // when z' is exist , clk = 1 : label = 1 + z'; - for (int i = 0; i < batch_size; ++i) { - if (label_data[i] < -1.0) { - y_data[i] = (x_data[i] > 0 ? x_data[i] : 0.0) + - log(1.0 + exp(-fabs(x_data[i]))); - } else if (label_data[i] < 0.0) { - y_data[i] = (x_data[i] > 0 ? x_data[i] : 0.0) - x_data[i] + - log(1.0 + exp(-fabs(x_data[i]))); - } else if (label_data[i] < 1.0) { - y_data[i] = (x_data[i] > 0 ? x_data[i] : 0.0) + - log(1.0 + exp(-fabs(x_data[i]))) + - (x_data[i] > 0 ? x_data[i] : 0.0) - - x_data[i] * label_data[i] + - log(1.0 + exp(-fabs(x_data[i]))); - } else { - y_data[i] = (x_data[i] > 0 ? x_data[i] : 0.0) - x_data[i] + - log(1.0 + exp(-fabs(x_data[i]))) + - (x_data[i] > 0 ? x_data[i] : 0.0) - - x_data[i] * (label_data[i] - 1.0) + - log(1.0 + exp(-fabs(x_data[i]))); - } - } - } -}; - -template -class TeacherStudentSigmoidLossGradOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - const phi::DenseTensor* x = context.Input("X"); - const T* x_data = x->data(); - - phi::DenseTensor* dx = - context.Output(framework::GradVarName("X")); - T* dx_data = dx->mutable_data(context.GetPlace()); - - const phi::DenseTensor* labels = context.Input("Label"); - const T* label_data = labels->data(); - - T soft_max_up_bound = - static_cast(context.Attr("soft_max_up_bound")); - T soft_max_lower_bound = - static_cast(context.Attr("soft_max_lower_bound")); - - int64_t batch_size = x->dims()[0]; - - const phi::DenseTensor* dOut = - context.Input(framework::GradVarName("Y")); - - const T* dout_data = dOut->data(); - - for (int i = 0; i < batch_size; ++i) { - T sum_val = x_data[i]; - if (sum_val > soft_max_up_bound) { - sum_val = soft_max_up_bound; - } else { - if (sum_val < soft_max_lower_bound) { - sum_val = soft_max_lower_bound; - } - } - - T pred = 1.0 / (1.0 + exp(-sum_val)); - if (label_data[i] < -1.0) { - dx_data[i] = 0.0 - pred; - } else if (label_data[i] < 0.0) { - dx_data[i] = 1.0 - pred; - } else { - dx_data[i] = label_data[i] - 2.0 * pred; - } - if (sum_val >= soft_max_up_bound || sum_val <= soft_max_lower_bound) { - dx_data[i] = 0; - } - dx_data[i] *= dout_data[i] * -1; - } - } -}; -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/unique_with_counts_op.cc b/paddle/fluid/operators/unique_with_counts_op.cc deleted file mode 100644 index 2e4af44ac8a1f..0000000000000 --- a/paddle/fluid/operators/unique_with_counts_op.cc +++ /dev/null @@ -1,85 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/unique_with_counts_op.h" - -namespace paddle { -namespace operators { - -class UniqueWithCountsOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "unique_with_counts"); - OP_INOUT_CHECK( - ctx->HasOutput("Out"), "Output", "Out", "unique_with_counts"); - OP_INOUT_CHECK( - ctx->HasOutput("Index"), "Output", "Index", "unique_with_counts"); - OP_INOUT_CHECK( - ctx->HasOutput("Count"), "Output", "Count", "unique_with_counts"); - - auto in_dims = ctx->GetInputDim("X"); - PADDLE_ENFORCE_EQ( - in_dims.size(), - 1, - phi::errors::InvalidArgument("The Input(X) should be 1-D Tensor, " - "But now the dims of Input(X) is %d.", - in_dims.size())); - - ctx->SetOutputDim("Out", {-1}); - ctx->SetOutputDim("Index", in_dims); - ctx->SetOutputDim("Count", {-1}); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), - platform::CPUPlace()); - } -}; - -class UniqueWithCountsOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "Input tensor. It should be a 1-D tensor."); - AddAttr("dtype", "data type for output index"); - AddOutput("Out", "A unique subsequence for input tensor."); - AddOutput("Index", - "An index tensor pointing to unique subsequence, which has " - "identical shape with input tensor and the data type is set by " - "the attr `dtype`"); - AddOutput("Count", "A subsequence for the count of unique index"); - AddComment(R"DOC( - Return a unique subsequence for 1-D input tensor, index tensor pointing to this unique subsequence, - and the subsequence for the count of unique index. -)DOC"); - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_WITHOUT_GRADIENT(unique_with_counts, - ops::UniqueWithCountsOp, - ops::UniqueWithCountsOpMaker); -PD_REGISTER_STRUCT_KERNEL(unique_with_counts, - CPU, - ALL_LAYOUT, - ops::UniqueWithCountsKernel, - float, - double, - int32_t, - int64_t) {} diff --git a/paddle/fluid/operators/unique_with_counts_op.h b/paddle/fluid/operators/unique_with_counts_op.h deleted file mode 100644 index 4b1fef5e22447..0000000000000 --- a/paddle/fluid/operators/unique_with_counts_op.h +++ /dev/null @@ -1,44 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include -#include -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/unique_op.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { - -template -class UniqueWithCountsKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto data_type = static_cast( - context.Attr("dtype")); - auto* x = context.Input("X"); - auto* out = context.Output("Out"); - auto* index = context.Output("Index"); - auto* count = context.Output("Count"); - framework::VisitDataType(data_type, - UniqueOpFunctor(out, index, x, count)); - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/unzip_op.cc b/paddle/fluid/operators/unzip_op.cc deleted file mode 100644 index a72c0c6a878f3..0000000000000 --- a/paddle/fluid/operators/unzip_op.cc +++ /dev/null @@ -1,154 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/unzip_op.h" - -#include - -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { - -class unzipOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "lod"); - OP_INOUT_CHECK(ctx->HasOutput("Y"), "Output", "Y", "lod"); - auto lod_dims = ctx->GetInputDim("lod"); - PADDLE_ENFORCE_EQ( - lod_dims.size(), - 1UL, - phi::errors::InvalidArgument("Input(X)'s rank should be 1, but got %d", - lod_dims.size())); - auto len = static_cast(ctx->Attrs().Get("len")); - ctx->SetOutputDim("Y", {lod_dims[0] - 1, len}); - } - - protected: - // Explicitly set that the data type of computation kernel of - // unzip - // is determined by its input "X". - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), - ctx.device_context().GetPlace()); - } -}; - -class unzipGradientOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "unzipGradient"); - OP_INOUT_CHECK(ctx->HasInput("lod"), "Input", "unzip", "unzipGradient"); - OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Y")), - "Input", - framework::GradVarName("Y"), - "unzipGradient"); - OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("X")), - "Output", - framework::GradVarName("X"), - "unzipGradient"); - - auto x_dims = ctx->GetInputDim("X"); - auto lod_dims = ctx->GetInputDim("lod"); - PADDLE_ENFORCE_EQ( - x_dims.size(), - 2, - phi::errors::InvalidArgument("Expect Input(X)'s rank == 2, but got %d", - x_dims.size())); - PADDLE_ENFORCE_EQ( - lod_dims.size(), - 1, - phi::errors::InvalidArgument("Expect Input(X)'s rank == 1, but got %d", - lod_dims.size())); - - ctx->SetOutputDim(framework::GradVarName("X"), x_dims); - ctx->ShareLoD("X", framework::GradVarName("X")); - } - - protected: - // Explicitly set that the data type of computation kernel of - // unzip - // is determined by its input "X". - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Y")), - ctx.device_context().GetPlace()); - } -}; - -class unzipOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(LodTensor, default LodTensor)"); - AddInput("lod", "(Tensor), a 1-D Tensor with shape [K]"); - AddAttr("len", "The len of each original Tensor").SetDefault(1); - AddOutput("Y", - "(LodTensor, default LodTensor), a 2-D tensor with shape " - "[K-1 x len]."); - AddComment(R"DOC( -unzip Operator. -)DOC"); - } -}; - -template -class unzipGradOpMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("unzip_grad"); - op->SetInput("X", this->Input("X")); - op->SetInput("lod", this->Input("lod")); - op->SetAttr("len", this->GetAttr("len")); - op->SetInput(framework::GradVarName("Y"), this->OutputGrad("Y")); - op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); - op->SetAttrMap(this->Attrs()); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR(unzip, - ops::unzipOp, - ops::unzipOpMaker, - ops::unzipGradOpMaker, - ops::unzipGradOpMaker); - -REGISTER_OPERATOR(unzip_grad, ops::unzipGradientOp); - -PD_REGISTER_STRUCT_KERNEL(unzip, - CPU, - ALL_LAYOUT, - ops::unzipOpKernel, - int64_t, - phi::dtype::complex, - phi::dtype::complex) {} -PD_REGISTER_STRUCT_KERNEL(unzip_grad, - CPU, - ALL_LAYOUT, - ops::unzipGradOpKernel, - int64_t, - phi::dtype::complex, - phi::dtype::complex) {} diff --git a/paddle/fluid/operators/unzip_op.cu b/paddle/fluid/operators/unzip_op.cu deleted file mode 100644 index 5be9bdea2b752..0000000000000 --- a/paddle/fluid/operators/unzip_op.cu +++ /dev/null @@ -1,102 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "paddle/fluid/operators/unzip_op.h" -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/backends/gpu/gpu_primitives.h" -namespace paddle { -namespace operators { - -using phi::PADDLE_CUDA_NUM_THREADS; - -template -__global__ void unzipKernel( - const T* X, const LodType* lod, T* Y, size_t col_size, size_t n) { - CUDA_KERNEL_LOOP(i, n) { - int lod_idx = i / col_size; - int len = lod[lod_idx + 1] - lod[lod_idx]; - if (i >= lod_idx * col_size + len) { - Y[i] = 0; - } else { - Y[i] = X[lod[lod_idx] + i % col_size]; - } - } -} - -template -class unzipCUDAKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - const auto* x = context.Input("X"); - const T* x_data = x->data(); - - const auto* lod = context.Input("lod"); - const LodType* lod_data = lod->data(); - - auto col_size = context.Attr("len"); - auto row_size = lod->dims()[0] - 1; - auto y_numel = col_size * row_size; - - auto* y = context.Output("Y"); - T* y_data = y->mutable_data(context.GetPlace()); - - // for Input X do not have lod Information. - auto stream = context.template device_context().stream(); - unzipKernel<<<(y_numel + PADDLE_CUDA_NUM_THREADS - 1) / - PADDLE_CUDA_NUM_THREADS, - PADDLE_CUDA_NUM_THREADS, - 0, - stream>>>(x_data, lod_data, y_data, col_size, y_numel); - } -}; - -template -class unzipGradCUDAKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - PADDLE_THROW(phi::errors::Unimplemented("unzip_grad is unimplemented")); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; -PD_REGISTER_STRUCT_KERNEL(unzip, - GPU, - ALL_LAYOUT, - ops::unzipCUDAKernel, - float, - double, - phi::dtype::float16, - bool, - int, - int64_t, - phi::dtype::complex, - phi::dtype::complex) {} -PD_REGISTER_STRUCT_KERNEL(unzip_grad, - GPU, - ALL_LAYOUT, - ops::unzipGradCUDAKernel, - float, - double, - phi::dtype::float16, - bool, - int, - int64_t, - phi::dtype::complex, - phi::dtype::complex) {} diff --git a/paddle/fluid/operators/unzip_op.h b/paddle/fluid/operators/unzip_op.h deleted file mode 100644 index 6829d00dccf56..0000000000000 --- a/paddle/fluid/operators/unzip_op.h +++ /dev/null @@ -1,39 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -template -class unzipOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - PADDLE_THROW(phi::errors::Unimplemented("unzip is unimplemented")); - } -}; - -template -class unzipGradOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - PADDLE_THROW(phi::errors::Unimplemented("unzip_grad is unimplemented")); - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/phi/backends/xpu/xpu2_op_list.cc b/paddle/phi/backends/xpu/xpu2_op_list.cc index 1a083c30fcef9..e2de6183f01e6 100644 --- a/paddle/phi/backends/xpu/xpu2_op_list.cc +++ b/paddle/phi/backends/xpu/xpu2_op_list.cc @@ -804,10 +804,6 @@ XPUOpMap& get_kl2_ops() { phi::DataType::INT32, phi::DataType::BOOL, phi::DataType::FLOAT32})}, - {"resnet_unit", - XPUKernelSet({phi::DataType::FLOAT16, phi::DataType::FLOAT32})}, - {"resnet_unit_grad", - XPUKernelSet({phi::DataType::FLOAT16, phi::DataType::FLOAT32})}, {"rmsprop", XPUKernelSet({phi::DataType::FLOAT32})}, {"rnn", XPUKernelSet({phi::DataType::FLOAT32})}, {"rnn_grad", XPUKernelSet({phi::DataType::FLOAT32})}, @@ -1194,8 +1190,6 @@ XPUOpMap& get_kl2_ops() { // Fused op {"squeeze_excitation_block", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, - {"resnet_basic_block_grad", XPUKernelSet({phi::DataType::FLOAT32})}, - {"resnet_basic_block", XPUKernelSet({phi::DataType::FLOAT32})}, {"fused_gemm_epilogue", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, {"fused_gemm_epilogue_grad", diff --git a/python/paddle/distributed/passes/auto_parallel_fp16.py b/python/paddle/distributed/passes/auto_parallel_fp16.py index c1d8c54c6b4b2..8232ae1317c6c 100644 --- a/python/paddle/distributed/passes/auto_parallel_fp16.py +++ b/python/paddle/distributed/passes/auto_parallel_fp16.py @@ -103,8 +103,6 @@ def _keep_fp32_input(op, in_name): return in_name != 'X' if op_type == 'fused_bn_add_activation': return in_name not in {'X', 'Z'} - if op_type == 'resnet_unit': - return in_name not in {'X', 'FilterX', 'Z', 'FilterZ'} if op_type in ['fused_attention', 'fused_feedforward']: return in_name in { 'LnScale', @@ -132,8 +130,6 @@ def _keep_fp32_output(op, out_name): return out_name != 'Y' if op_type == 'layer_norm' and _keep_layer_norm_scale_bias_to_fp32(): return out_name != 'Y' - if op_type == 'resnet_unit': - return out_name not in {'Y', 'ConvX', 'ConvZ'} if op_type in ['fused_attention', 'fused_feedforward']: return out_name in { 'LnMean', diff --git a/python/paddle/incubate/__init__.py b/python/paddle/incubate/__init__.py index e6e2dc766fc87..ff434a6fffc00 100644 --- a/python/paddle/incubate/__init__.py +++ b/python/paddle/incubate/__init__.py @@ -43,7 +43,6 @@ LookAhead, ModelAverage, ) -from .passes import fuse_resnet_unit_pass # noqa: F401 from .tensor import ( _npu_identity, # noqa: F401 segment_max, diff --git a/python/paddle/incubate/layers/__init__.py b/python/paddle/incubate/layers/__init__.py index f25a845d0a4dc..f3645718720db 100644 --- a/python/paddle/incubate/layers/__init__.py +++ b/python/paddle/incubate/layers/__init__.py @@ -27,10 +27,8 @@ partial_concat, partial_sum, pow2_decay_with_linear_warmup, - rank_attention, search_pyramid_hash, shuffle_batch, - tdm_child, tdm_sampler, ) diff --git a/python/paddle/incubate/layers/nn.py b/python/paddle/incubate/layers/nn.py index 5b6236567e649..78b6b6034ff24 100644 --- a/python/paddle/incubate/layers/nn.py +++ b/python/paddle/incubate/layers/nn.py @@ -646,91 +646,6 @@ def partial_sum(input, start_index=0, length=-1): return out -def tdm_child(x, node_nums, child_nums, param_attr=None, dtype='int32'): - """ - **Tdm Child** - According to the input node_id on the given tree, return the corresponding child node_id and - whether child is a leaf node by leaf_mask value. - - .. code-block:: text - - Given: - tree[[0], [1, 2], [3, 4], [5, 6]] # A binary tree with seven nodes - x = [[2], [3]] - node_nums = 7 - child_nums = 2 - - We get: - child = [[5, 6], - [0, 0]] - leaf_mask = [[1, 1], - [0, 0]] - - Args: - x (Tensor): Tensor contained the node_id information, dtype support int32/int64. - node_nums (int): Number of total nodes. - child_nums (int): Maximum number of child nodes per node. - param_attr (ParamAttr, optional): To specify the tdm-tree-info parameter property. Default: None, which means the - default weight parameter property is used. See usage for details in: ref: `api_paddle_ParamAttr`, should - has shape (node_nums, 3 + child_nums), dtype support int32/int64. - The dimension[1] of tdm-tree-info contains the following: - 1. Item_id (int, shape(1)), if node is a leaf node, give its item_id corresponding to node_id, else give 0. - 2. Layer_id (int, shape(1)), indicates which layer the node is on. - 3. Parent_id (int, shape(1)), node's parent node. - 4. Child_id (int, shape(child_nums)), all child node's node_id of this node should be given. - If the number of child nodes is insufficient, padding 0 until child nums equal to child_nums. - dtype (str, optional): The data type of output child and leaf_mask, support int32/int64. Default: int32. - - Returns: - tuple: A tuple including input node's child(Tensor) and leaf_mask(Tensor). - If child is a leaf node, leaf_mask equal ot 1, otherwise equal to 0. - - Examples: - .. code-block:: python - - >>> import paddle - >>> import numpy as np - >>> paddle.enable_static() - - >>> x = paddle.static.data(name="x", shape=[None, 1], dtype="int32", lod_level=1) - >>> tree_info = [[0,0,0,1,2], - ... [0,1,0,3,4],[0,1,0,5,6], - ... [0,2,1,0,0],[1,2,1,0,0],[2,2,2,0,0],[3,2,2,0,0]] - >>> tree_info_np = np.array(tree_info) - >>> tree_info_np = np.reshape(tree_info_np, (7,5)) - >>> node_nums = 7 - >>> child_nums = 2 - >>> child, leaf_mask = paddle.incubate.layers.tdm_child(x, node_nums, child_nums, - ... param_attr=paddle.ParamAttr( - ... initializer=paddle.nn.initializer.Assign(tree_info_np))) - - """ - helper = LayerHelper("tdm_child", **locals()) - check_dtype( - dtype, 'dtype', ['int32', 'int64'], 'paddle.incubate.layers.tdm_child' - ) - c_dtype = convert_np_dtype_to_dtype_(dtype) - tree_info = helper.create_parameter( - attr=helper.param_attr, - shape=[node_nums, 3 + child_nums], - dtype=dtype, - default_initializer=paddle.nn.initializer.Constant(0), - ) - tree_info.stop_gradient = True - - child = helper.create_variable_for_type_inference(dtype=dtype) - leaf_mask = helper.create_variable_for_type_inference(dtype=dtype) - - helper.append_op( - type='tdm_child', - inputs={'X': x, 'TreeInfo': tree_info}, - outputs={'Child': child, 'LeafMask': leaf_mask}, - attrs={'child_nums': child_nums, 'dtype': c_dtype}, - stop_gradient=True, - ) - return (child, leaf_mask) - - def tdm_sampler( x, neg_samples_num_list, @@ -949,75 +864,6 @@ def tdm_sampler( return (out, labels, mask) -def rank_attention( - input, - rank_offset, - rank_param_shape, - rank_param_attr, - max_rank=3, - max_size=0, -): - """ - **Rank Attention layer** - This Op can calculate rank attention between input and rank_param, and - rank_param gives the organization of data. Notice: It currently supports - GPU device. - This Op exists in incubate layers, which means that it is not shown to the public. - - Args: - input (Tensor): Tensor with data type float32, float64. - rank_offset (Tensor): Tensor with data type int32. - rank_para_shape (list[int]): The shape of rank_param. - rank_param_attr (ParamAttr): Attribute initializer of rank_param. - max_rank (int, optional): The max rank of input's ranks. Default is 3. - max_size (int, optional): The max size of input's ranks. Default is 0. - Returns: - Tensor: A Tensor with the same data type as input's. - - Examples: - .. code-block:: python - - >>> import paddle - >>> paddle.enable_static() - - >>> input = paddle.static.data(name="input", shape=[None, 2], dtype="float32") - >>> rank_offset = paddle.static.data(name="rank_offset", shape=[None, 7], dtype="int32") - >>> out = paddle.incubate.layers.rank_attention(input=input, - ... rank_offset=rank_offset, - ... rank_param_shape=[18,3], - ... rank_param_attr= - ... paddle.ParamAttr(learning_rate=1.0, - ... name="ubm_rank_param.w_0"), - ... max_rank=3, - ... max_size=0) - """ - helper = LayerHelper('rank_attention', **locals()) - dtype = helper.input_dtype(input_param_name='input') - input_shape = input.shape - assert input_shape[1] * max_rank * max_rank == rank_param_shape[0] - - rank_param = helper.create_parameter( - attr=rank_param_attr, shape=rank_param_shape, dtype=dtype - ) - rank_param.stop_gradient = False - - output = helper.create_variable_for_type_inference(dtype) - input_help = helper.create_variable_for_type_inference( - dtype=dtype, stop_gradient=True - ) - ins_rank = helper.create_variable_for_type_inference( - dtype=dtype, stop_gradient=True - ) - - helper.append_op( - type="rank_attention", - inputs={"X": input, "RankOffset": rank_offset, "RankParam": rank_param}, - outputs={"Out": output, "InputHelp": input_help, "InsRank": ins_rank}, - attrs={"MaxRank": max_rank, "MaxSize": max_size}, - ) - return output - - def batch_fc(input, param_size, param_attr, bias_size, bias_attr, act=None): """ **Batch FC layer** diff --git a/python/paddle/incubate/operators/__init__.py b/python/paddle/incubate/operators/__init__.py index 653dc97ed6193..df1c3a47d78a5 100644 --- a/python/paddle/incubate/operators/__init__.py +++ b/python/paddle/incubate/operators/__init__.py @@ -16,7 +16,6 @@ from .graph_reindex import graph_reindex # noqa: F401 from .graph_sample_neighbors import graph_sample_neighbors # noqa: F401 from .graph_send_recv import graph_send_recv # noqa: F401 -from .resnet_unit import ResNetUnit # noqa: F401 from .softmax_mask_fuse import softmax_mask_fuse # noqa: F401 from .softmax_mask_fuse_upper_triangle import ( # noqa: F401 softmax_mask_fuse_upper_triangle, diff --git a/python/paddle/incubate/operators/resnet_unit.py b/python/paddle/incubate/operators/resnet_unit.py deleted file mode 100644 index af2faa4cac44a..0000000000000 --- a/python/paddle/incubate/operators/resnet_unit.py +++ /dev/null @@ -1,361 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np - -import paddle -from paddle import base -from paddle.base.layer_helper import LayerHelper -from paddle.base.param_attr import ParamAttr -from paddle.nn import ( - Layer, - initializer as I, -) - - -def resnet_unit( - x, - filter_x, - scale_x, - bias_x, - mean_x, - var_x, - z, - filter_z, - scale_z, - bias_z, - mean_z, - var_z, - stride, - stride_z, - padding, - dilation, - groups, - momentum, - eps, - data_format, - fuse_add, - has_shortcut, - use_global_stats, - is_test, - act, -): - helper = LayerHelper('resnet_unit', **locals()) - bn_param_dtype = base.core.VarDesc.VarType.FP32 - bit_mask_dtype = base.core.VarDesc.VarType.INT32 - out = helper.create_variable_for_type_inference(x.dtype) - bit_mask = helper.create_variable_for_type_inference( - dtype=bit_mask_dtype, stop_gradient=True - ) - # intermediate_out for x - conv_x = helper.create_variable_for_type_inference( - dtype=x.dtype, stop_gradient=True - ) - saved_mean_x = helper.create_variable_for_type_inference( - dtype=bn_param_dtype, stop_gradient=True - ) - saved_invstd_x = helper.create_variable_for_type_inference( - dtype=bn_param_dtype, stop_gradient=True - ) - running_mean_x = mean_x - running_var_x = var_x - # intermediate_out for z - conv_z = helper.create_variable_for_type_inference( - dtype=x.dtype, stop_gradient=True - ) - saved_mean_z = helper.create_variable_for_type_inference( - dtype=bn_param_dtype, stop_gradient=True - ) - saved_invstd_z = helper.create_variable_for_type_inference( - dtype=bn_param_dtype, stop_gradient=True - ) - running_mean_z = ( - helper.create_variable_for_type_inference( - dtype=bn_param_dtype, stop_gradient=True - ) - if mean_z is None - else mean_z - ) - running_var_z = ( - helper.create_variable_for_type_inference( - dtype=bn_param_dtype, stop_gradient=True - ) - if var_z is None - else var_z - ) - - inputs = { - 'X': x, - 'FilterX': filter_x, - 'ScaleX': scale_x, - 'BiasX': bias_x, - 'MeanX': mean_x, - 'VarX': var_x, - 'Z': z, - 'FilterZ': filter_z, - 'ScaleZ': scale_z, - 'BiasZ': bias_z, - 'MeanZ': mean_z, - 'VarZ': var_z, - } - - attrs = { - 'stride': stride, - 'stride_z': stride_z, - 'padding': padding, - 'dilation': dilation, - 'group': groups, - 'momentum': momentum, - 'epsilon': eps, - 'data_format': data_format, - 'fuse_add': fuse_add, - 'has_shortcut': has_shortcut, - 'use_global_stats': use_global_stats, - 'is_test': is_test, - 'act_type': act, - } - - outputs = { - 'Y': out, - 'BitMask': bit_mask, - 'ConvX': conv_x, - 'SavedMeanX': saved_mean_x, - 'SavedInvstdX': saved_invstd_x, - 'RunningMeanX': running_mean_x, - 'RunningVarX': running_var_x, - 'ConvZ': conv_z, - 'SavedMeanZ': saved_mean_z, - 'SavedInvstdZ': saved_invstd_z, - 'RunningMeanZ': running_mean_z, - 'RunningVarZ': running_var_z, - } - - helper.append_op( - type='resnet_unit', inputs=inputs, outputs=outputs, attrs=attrs - ) - - return out - - -class ResNetUnit(Layer): - r""" - ******Temporary version******. - ResNetUnit is designed for optimize the performance by using cudnnv8 API. - """ - - def __init__( - self, - num_channels_x, - num_filters, - filter_size, - stride=1, - momentum=0.9, - eps=1e-5, - data_format='NHWC', - act='relu', - fuse_add=False, - has_shortcut=False, - use_global_stats=False, - is_test=False, - filter_x_attr=None, - scale_x_attr=None, - bias_x_attr=None, - moving_mean_x_name=None, - moving_var_x_name=None, - num_channels_z=1, - stride_z=1, - filter_z_attr=None, - scale_z_attr=None, - bias_z_attr=None, - moving_mean_z_name=None, - moving_var_z_name=None, - ): - super().__init__() - self._stride = stride - self._stride_z = stride_z - self._dilation = 1 - self._kernel_size = paddle.utils.convert_to_list( - filter_size, 2, 'kernel_size' - ) - self._padding = (filter_size - 1) // 2 - self._groups = 1 - self._momentum = momentum - self._eps = eps - self._data_format = data_format - self._act = act - self._fuse_add = fuse_add - self._has_shortcut = has_shortcut - self._use_global_stats = use_global_stats - self._is_test = is_test - - # check format - valid_format = {'NHWC', 'NCHW'} - if data_format not in valid_format: - raise ValueError( - f"conv_format must be one of {valid_format}, but got conv_format='{data_format}'" - ) - - def _get_default_param_initializer(channels): - filter_elem_num = np.prod(self._kernel_size) * channels - std = (2.0 / filter_elem_num) ** 0.5 - return I.Normal(0.0, std) - - is_nchw = data_format == 'NCHW' - # initial filter - bn_param_dtype = base.core.VarDesc.VarType.FP32 - if not is_nchw: - bn_param_shape = [1, 1, 1, num_filters] - filter_x_shape = [ - num_filters, - filter_size, - filter_size, - num_channels_x, - ] - filter_z_shape = [ - num_filters, - filter_size, - filter_size, - num_channels_z, - ] - else: - bn_param_shape = [1, num_filters, 1, 1] - filter_x_shape = [ - num_filters, - num_channels_x, - filter_size, - filter_size, - ] - filter_z_shape = [ - num_filters, - num_channels_z, - filter_size, - filter_size, - ] - - self.filter_x = self.create_parameter( - shape=filter_x_shape, - attr=filter_x_attr, - default_initializer=_get_default_param_initializer(num_channels_x), - ) - self.scale_x = self.create_parameter( - shape=bn_param_shape, - attr=scale_x_attr, - dtype=bn_param_dtype, - default_initializer=I.Constant(1.0), - ) - self.bias_x = self.create_parameter( - shape=bn_param_shape, - attr=bias_x_attr, - dtype=bn_param_dtype, - is_bias=True, - ) - self.mean_x = self.create_parameter( - attr=ParamAttr( - name=moving_mean_x_name, - initializer=I.Constant(0.0), - trainable=False, - ), - shape=bn_param_shape, - dtype=bn_param_dtype, - ) - self.mean_x.stop_gradient = True - self.var_x = self.create_parameter( - attr=ParamAttr( - name=moving_var_x_name, - initializer=I.Constant(1.0), - trainable=False, - ), - shape=bn_param_shape, - dtype=bn_param_dtype, - ) - self.var_x.stop_gradient = True - if has_shortcut: - self.filter_z = self.create_parameter( - shape=filter_z_shape, - attr=filter_z_attr, - default_initializer=_get_default_param_initializer( - num_channels_z - ), - ) - self.scale_z = self.create_parameter( - shape=bn_param_shape, - attr=scale_z_attr, - dtype=bn_param_dtype, - default_initializer=I.Constant(1.0), - ) - self.bias_z = self.create_parameter( - shape=bn_param_shape, - attr=bias_z_attr, - dtype=bn_param_dtype, - is_bias=True, - ) - self.mean_z = self.create_parameter( - attr=ParamAttr( - name=moving_mean_z_name, - initializer=I.Constant(0.0), - trainable=False, - ), - shape=bn_param_shape, - dtype=bn_param_dtype, - ) - self.mean_z.stop_gradient = True - self.var_z = self.create_parameter( - attr=ParamAttr( - name=moving_var_z_name, - initializer=I.Constant(1.0), - trainable=False, - ), - shape=bn_param_shape, - dtype=bn_param_dtype, - ) - self.var_z.stop_gradient = True - else: - self.filter_z = None - self.scale_z = None - self.bias_z = None - self.mean_z = None - self.var_z = None - - def forward(self, x, z=None): - if self._fuse_add and z is None: - raise ValueError("z can not be None") - - out = resnet_unit( - x, - self.filter_x, - self.scale_x, - self.bias_x, - self.mean_x, - self.var_x, - z, - self.filter_z, - self.scale_z, - self.bias_z, - self.mean_z, - self.var_z, - self._stride, - self._stride_z, - self._padding, - self._dilation, - self._groups, - self._momentum, - self._eps, - self._data_format, - self._fuse_add, - self._has_shortcut, - self._use_global_stats, - self._is_test, - self._act, - ) - return out diff --git a/python/paddle/incubate/passes/fuse_resnet_unit_pass.py b/python/paddle/incubate/passes/fuse_resnet_unit_pass.py deleted file mode 100644 index 042e4dc7e85aa..0000000000000 --- a/python/paddle/incubate/passes/fuse_resnet_unit_pass.py +++ /dev/null @@ -1,135 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.incubate.passes import ir - - -def set_resnet_unit_attrs(resnet_unit, has_shortcut): - resnet_unit.SetAttr("fuse_add", False) - resnet_unit.SetAttr("act_type", "relu") - resnet_unit.SetAttr("has_shortcut", has_shortcut) - resnet_unit.SetAttr("data_format", 'NHWC') - resnet_unit.SetAttr("dilation", 1) - resnet_unit.Attr("stride").MappedPattern( - op="conv2d", name="strides", element_index=0 - ) - resnet_unit.Attr("padding").MappedPattern( - op="conv2d", name="paddings", element_index=0 - ) - resnet_unit.Attr("group").MappedPattern(op="conv2d", name="groups") - resnet_unit.Attr("op_device").MappedPattern(op="conv2d", name="op_device") - resnet_unit.Attr("op_namescope").MappedPattern( - op="conv2d", name="op_namescope" - ) - resnet_unit.Attr("momentum").MappedPattern(op="batch_norm", name="momentum") - resnet_unit.Attr("epsilon").MappedPattern(op="batch_norm", name="epsilon") - resnet_unit.Attr("use_global_stats").MappedPattern( - op="batch_norm", name="use_global_stats" - ) - - -def set_resnet_unit_outputs(resnet_unit, meanX, varX, meanZ=None, varZ=None): - resnet_unit.SetOutputs( - RunningMeanX=meanX, - RunningVarX=varX, - RunningMeanZ=meanZ, - RunningVarZ=varZ, - ) - - -@ir.RegisterPass -def fuse_resnet_unit(): - def pattern_conv_bn(x, filter, scale, bias, mean, var): - filter.Attr("shape")[0].Mod(32).EQ(0) - filter.Attr("shape")[1].Mod(8).EQ(0) - filter.Attr("shape")[2].EQ(1) - filter.Attr("shape")[3].EQ(1) - conv2d = ir.PassDesc.OP.conv2d(Input=x, Filter=filter) - conv2d.SetAttr("data_format", 'NHWC') - bn = ir.PassDesc.OP.batch_norm( - X=conv2d, Bias=bias, Mean=mean, Scale=scale, Variance=var - ) - return bn - - def pattern_one_input(x, filter, scale, bias, mean, var): - bn = pattern_conv_bn(x, filter, scale, bias, mean, var) - relu = ir.PassDesc.OP.relu(X=bn.Output("Y")) - return relu - - def replace_one_input(x, filter, scale, bias, mean, var): - resnet_unit = ir.PassDesc.OP.resnet_unit( - X=x, FilterX=filter, ScaleX=scale, BiasX=bias, MeanX=mean, VarX=var - ) - set_resnet_unit_attrs(resnet_unit, False) - set_resnet_unit_outputs(resnet_unit, mean, var) - return resnet_unit.Output("Y") - - def pattern_two_input( - x, - filterX, - scaleX, - biasX, - meanX, - varX, - z, - filterZ, - scaleZ, - biasZ, - meanZ, - varZ, - ): - bnX = pattern_conv_bn(x, filterX, scaleX, biasX, meanX, varX) - bnZ = pattern_conv_bn(z, filterZ, scaleZ, biasZ, meanZ, varZ) - ewadd = ir.PassDesc.OP.elementwise_add( - X=bnX.Output("Y"), Y=bnZ.Output("Y") - ) - relu = ir.PassDesc.OP.relu(X=ewadd) - return relu - - def replace_two_input( - x, - filterX, - scaleX, - biasX, - meanX, - varX, - z, - filterZ, - scaleZ, - biasZ, - meanZ, - varZ, - ): - resnet_unit = ir.PassDesc.OP.resnet_unit( - X=x, - FilterX=filterX, - ScaleX=scaleX, - BiasX=biasX, - MeanX=meanX, - VarX=varX, - Z=z, - FilterZ=filterZ, - ScaleZ=scaleZ, - BiasZ=biasZ, - MeanZ=meanZ, - VarZ=varZ, - ) - set_resnet_unit_attrs(resnet_unit, True) - set_resnet_unit_outputs(resnet_unit, meanX, varX, meanZ, varZ) - return resnet_unit.Output("Y") - - return (pattern_one_input, replace_one_input), ( - pattern_two_input, - replace_two_input, - ) diff --git a/python/paddle/static/amp/fp16_lists.py b/python/paddle/static/amp/fp16_lists.py index bec67fd7a7414..53f7d034ed193 100644 --- a/python/paddle/static/amp/fp16_lists.py +++ b/python/paddle/static/amp/fp16_lists.py @@ -127,7 +127,7 @@ def _get_unsupported_list(dtype): # The set of ops that support fp16 calculation and are considered numerically- # safe and performance-critical. These ops are always converted to fp16. -_only_supported_fp16_list = {'resnet_unit', 'fused_bn_add_activation'} +_only_supported_fp16_list = {'fused_bn_add_activation'} def _get_white_list(dtype): diff --git a/python/paddle/static/amp/fp16_utils.py b/python/paddle/static/amp/fp16_utils.py index f12f125462e48..5e6ddc6d7779d 100644 --- a/python/paddle/static/amp/fp16_utils.py +++ b/python/paddle/static/amp/fp16_utils.py @@ -132,8 +132,6 @@ def _keep_fp32_input(op, in_name): return in_name != 'X' if op_type == 'fused_bn_add_activation': return in_name not in {'X', 'Z'} - if op_type == 'resnet_unit': - return in_name not in {'X', 'FilterX', 'Z', 'FilterZ'} if op_type in ['fused_attention', 'fused_feedforward']: return in_name in { 'LnScale', @@ -154,8 +152,6 @@ def _keep_fp32_output(op, out_name): return out_name != 'Y' if op_type == 'layer_norm' and _keep_layer_norm_scale_bias_to_fp32(): return out_name != 'Y' - if op_type == 'resnet_unit': - return out_name not in {'Y', 'ConvX', 'ConvZ'} if op_type in ['fused_attention', 'fused_feedforward']: return out_name in { 'LnMean', diff --git a/test/cpp/fluid/CMakeLists.txt b/test/cpp/fluid/CMakeLists.txt index 948cbcc233dfe..088a4a9ec0789 100644 --- a/test/cpp/fluid/CMakeLists.txt +++ b/test/cpp/fluid/CMakeLists.txt @@ -7,20 +7,16 @@ if(WITH_CINN) add_subdirectory(cinn) endif() add_subdirectory(controlflow) -add_subdirectory(detection) + if(WITH_DLNNE) add_subdirectory(dlnne) endif() add_subdirectory(elementwise) add_subdirectory(fused) -if(WITH_LITE) - add_subdirectory(lite) -endif() add_subdirectory(math) if(WITH_ONEDNN) add_subdirectory(mkldnn) endif() -add_subdirectory(nccl) if(WITH_PSCORE) add_subdirectory(pscore) endif() diff --git a/test/cpp/fluid/detection/CMakeLists.txt b/test/cpp/fluid/detection/CMakeLists.txt deleted file mode 100644 index 6a69241e7846e..0000000000000 --- a/test/cpp/fluid/detection/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -paddle_test(mask_util_test SRCS mask_util_test.cc) - -if(WITH_ONNXRUNTIME AND WIN32) - # Copy onnxruntime for some c++ test in Windows, since the test will - # be build only in CI, so suppose the generator in Windows is Ninja. - copy_onnx(mask_util_test) -endif() diff --git a/test/cpp/fluid/detection/mask_util_test.cc b/test/cpp/fluid/detection/mask_util_test.cc deleted file mode 100644 index 274850c0a67dc..0000000000000 --- a/test/cpp/fluid/detection/mask_util_test.cc +++ /dev/null @@ -1,126 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/detection/mask_util.h" - -#include - -#include "paddle/fluid/memory/memory.h" - -namespace paddle { -namespace operators { - -template -void Compare(const T* a, const T* b, const int n) { - for (int i = 0; i < n; i++) { - EXPECT_EQ(a[i], b[i]); - } -} - -TEST(MaskUtil, Poly2MaskTest) { - float polys[] = {// NOLINT - 1.97f, - 1.88f, - 5.81f, - 1.88f, - 1.69f, - 6.53f, - 5.94f, - 6.38f, - 1.97f, - 1.88f}; - int h = 8, w = 8; - int k = 5; // length(polys) / 2 - // clang-format off - uint8_t expect_mask[] = { // NOLINT - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 0, 0, 0, 0, - 0, 0, 1, 1, 1, 0, 0, 0, - 0, 0, 1, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 - }; - // clang-format on - - // the ground-truth mask is computed by coco API: - // - // import pycocotools.mask as mask_util - // import numpy as np - // segm = [1.97, 1.88, 5.81, 1.88, 1.69, 6.53, 5.94, 6.38, 1.97, 1.88] - // rles = mask_util.frPyObjects([segm], im_h, im_w) - // mask = mask_util.decode(rles) - // print mask - platform::CPUPlace cpu; - auto allocation = memory::Alloc(cpu, sizeof(expect_mask)); - uint8_t* mask = reinterpret_cast(allocation->ptr()); - Poly2Mask(polys, k, h, w, mask); - Compare(expect_mask, mask, h * w); -} - -TEST(MaskUtil, Poly2BoxesTest) { - // clang-format off - std::vector>> polys = { - {{1.97f, 1.88f, 5.81f, 1.88f, 1.69f, 6.53f, 5.94f, 6.38f, 1.97f, 1.88f}}, - {{2.97f, 1.88f, 3.81f, 1.68f, 1.69f, 6.63f, 6.94f, 6.58f, 2.97f, 0.88f}} - }; - float expect_boxes[] = { // NOLINT - 1.69f, 1.88f, 5.94f, 6.53f, - 1.69f, 0.88f, 6.94f, 6.63f - }; - // clang-format on - - platform::CPUPlace cpu; - auto allocation = memory::Alloc(cpu, sizeof(expect_boxes)); - float* boxes = reinterpret_cast(allocation->ptr()); - Poly2Boxes(polys, boxes); - Compare(expect_boxes, boxes, 8); -} - -TEST(MaskUtil, Polys2MaskWrtBoxTest) { - // clang-format off - std::vector>> polys = {{ - {1.97f, 1.88f, 5.81f, 1.88f, 1.69f, 6.53f, 5.94f, 6.38f, 1.97f, 1.88f}, - {2.97f, 1.88f, 3.81f, 1.68f, 1.69f, 6.63f, 6.94f, 6.58f, 2.97f, 0.88f}}}; - float expect_boxes[] = { // NOLINT - 1.69f, 0.88f, 6.94f, 6.63f - }; - uint8_t expect_mask[] = { // NOLINT - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 1, 1, 1, 1, 0, 0, - 0, 0, 1, 1, 1, 0, 0, 0, - 0, 0, 1, 1, 1, 0, 0, 0, - 0, 0, 1, 1, 1, 0, 0, 0, - 0, 1, 1, 1, 1, 1, 0, 0, - 0, 1, 1, 1, 1, 1, 1, 0, - 1, 1, 1, 1, 1, 1, 1, 1 - }; - // clang-format on - - platform::CPUPlace cpu; - auto allocation = memory::Alloc(cpu, sizeof(expect_boxes)); - float* boxes = reinterpret_cast(allocation->ptr()); - Poly2Boxes(polys, boxes); - Compare(expect_boxes, boxes, 4); - - auto allocation_mask = memory::Alloc(cpu, sizeof(expect_mask)); - uint8_t* mask = reinterpret_cast(allocation_mask->ptr()); - int M = 8; - Polys2MaskWrtBox(polys[0], expect_boxes, M, mask); - Compare(expect_mask, mask, M * M); -} - -} // namespace operators -} // namespace paddle diff --git a/test/cpp/fluid/lite/CMakeLists.txt b/test/cpp/fluid/lite/CMakeLists.txt deleted file mode 100644 index 6533073258ff5..0000000000000 --- a/test/cpp/fluid/lite/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -paddle_test(test_lite_engine_op SRCS lite_engine_op_test.cc) - -if(WITH_ONNXRUNTIME AND WIN32) - # Copy onnxruntime for some c++ test in Windows, since the test will - # be build only in CI, so suppose the generator in Windows is Ninja. - copy_onnx(test_lite_engine_op) -endif() diff --git a/test/cpp/fluid/lite/lite_engine_op_test.cc b/test/cpp/fluid/lite/lite_engine_op_test.cc deleted file mode 100644 index ca4dd444335d0..0000000000000 --- a/test/cpp/fluid/lite/lite_engine_op_test.cc +++ /dev/null @@ -1,117 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. */ - -#include "paddle/fluid/operators/lite/lite_engine_op.h" - -#include - -#include "paddle/fluid/framework/block_desc.h" -#include "paddle/fluid/framework/op_desc.h" -#include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/inference/utils/singleton.h" -#include "paddle/fluid/operators/lite/ut_helper.h" - -USE_NO_KERNEL_OP(lite_engine) - -using paddle::inference::lite::AddFetchListToBlockDesc; -using paddle::inference::lite::AddTensorToBlockDesc; -using paddle::inference::lite::CreateTensor; -using paddle::inference::lite::serialize_params; -namespace paddle { -namespace operators { - -TEST(LiteEngineOp, engine_op) { - framework::ProgramDesc program; - auto* block_ = program.Proto()->mutable_blocks(0); - framework::BlockDesc block_desc(&program, block_); - auto* feed0 = block_desc.AppendOp(); - feed0->SetType("feed"); - feed0->SetInput("X", {"feed"}); - feed0->SetOutput("Out", {"x"}); - feed0->SetAttr("col", 0); - auto* feed1 = block_desc.AppendOp(); - feed1->SetType("feed"); - feed1->SetInput("X", {"feed"}); - feed1->SetOutput("Out", {"y"}); - feed1->SetAttr("col", 1); - LOG(INFO) << "create elementwise_add op"; - auto* elt_add = block_desc.AppendOp(); - elt_add->SetType("elementwise_add"); - elt_add->SetInput("X", std::vector({"x"})); - elt_add->SetInput("Y", std::vector({"y"})); - elt_add->SetOutput("Out", std::vector({"z"})); - elt_add->SetAttr("axis", -1); - LOG(INFO) << "create fetch op"; - auto* fetch = block_desc.AppendOp(); - fetch->SetType("fetch"); - fetch->SetInput("X", std::vector({"z"})); - fetch->SetOutput("Out", std::vector({"out"})); - fetch->SetAttr("col", 0); - // Set inputs' variable shape in BlockDesc - AddTensorToBlockDesc(block_, "x", std::vector({2, 4}), true); - AddTensorToBlockDesc(block_, "y", std::vector({2, 4}), true); - AddTensorToBlockDesc(block_, "z", std::vector({2, 4}), false); - AddFetchListToBlockDesc(block_, "out"); - *block_->add_ops() = *feed1->Proto(); - *block_->add_ops() = *feed0->Proto(); - *block_->add_ops() = *elt_add->Proto(); - *block_->add_ops() = *fetch->Proto(); - framework::Scope scope; - platform::CPUPlace place; - phi::CPUContext ctx(place); - // Prepare variables. - CreateTensor(&scope, "x", std::vector({2, 4})); - CreateTensor(&scope, "y", std::vector({2, 4})); - CreateTensor(&scope, "out", std::vector({2, 4})); - - ASSERT_EQ(block_->ops_size(), 4); - - std::vector repetitive_params{"x", "y"}; - inference::lite::EngineConfig config; - config.valid_places = { -#if defined(PADDLE_WITH_ARM) - paddle::lite_api::Place({TARGET(kARM), PRECISION(kFloat)}), -#else - paddle::lite_api::Place({TARGET(kX86), PRECISION(kFloat)}), -#endif - paddle::lite_api::Place({TARGET(kHost), PRECISION(kAny)}), - }; - serialize_params(&(config.param), &scope, repetitive_params); - config.model = program.Proto()->SerializeAsString(); - LOG(INFO) << "create lite_engine desc"; - framework::OpDesc engine_op_desc(nullptr); - engine_op_desc.SetType("lite_engine"); - engine_op_desc.SetInput("Xs", std::vector({"x", "y"})); - engine_op_desc.SetOutput("Ys", std::vector({"out"})); - std::string engine_key = "engine_0"; - engine_op_desc.SetAttr("engine_key", engine_key); - engine_op_desc.SetAttr("enable_int8", false); - engine_op_desc.SetAttr("use_gpu", true); - engine_op_desc.SetAttr("zero_copy", true); - engine_op_desc.SetBlockAttr("sub_block", &block_desc); - // TODO(wilber): The ut is out of date, we need to a new lite subgraph test. - // inference::Singleton::Global().Create( - // engine_key, config); - // LOG(INFO) << "create engine op"; - // auto engine_op = framework::OpRegistry::CreateOp(engine_op_desc); - // LOG(INFO) << "engine_op " << engine_op.get(); - // // Execute them. - // LOG(INFO) << "engine_op run"; - // engine_op->Run(scope, place); - // LOG(INFO) << "done"; -} - -} // namespace operators -} // namespace paddle diff --git a/test/cpp/fluid/nccl/CMakeLists.txt b/test/cpp/fluid/nccl/CMakeLists.txt deleted file mode 100644 index a8bd7b7f55634..0000000000000 --- a/test/cpp/fluid/nccl/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -if(NOT (WITH_NCCL OR WITH_RCCL)) - return() -endif() - -if(WITH_GPU AND NOT WIN32) - nv_test( - nccl_op_test - SRCS nccl_op_test.cu.cc - DEPS nccl_op gpu_info device_context) -endif() - -if(WITH_ROCM AND NOT WIN32) - hip_test( - nccl_op_test - SRCS nccl_op_test.cu.cc - DEPS nccl_op gpu_info device_context) -endif() diff --git a/test/cpp/fluid/nccl/nccl_op_test.cu.cc b/test/cpp/fluid/nccl/nccl_op_test.cu.cc deleted file mode 100644 index b8a47b9703165..0000000000000 --- a/test/cpp/fluid/nccl/nccl_op_test.cu.cc +++ /dev/null @@ -1,318 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include - -#include -#include // NOLINT -#include // NOLINT -#include - -#include "paddle/fluid/framework/op_desc.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/operators/nccl/nccl_gpu_common.h" -#include "paddle/fluid/platform/device/gpu/gpu_info.h" -#include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/init.h" -#include "paddle/fluid/platform/place.h" - -USE_NO_KERNEL_OP(ncclInit); -USE_OP_ITSELF(ncclAllReduce); -USE_OP_ITSELF(ncclReduce); -USE_OP_ITSELF(ncclBcast); -PD_DECLARE_KERNEL(ncclAllReduce, GPU, ALL_LAYOUT); -PD_DECLARE_KERNEL(ncclReduce, GPU, ALL_LAYOUT); -PD_DECLARE_KERNEL(ncclBcast, GPU, ALL_LAYOUT); - -namespace f = paddle::framework; -namespace p = paddle::platform; - -// test data amount -const f::DDim kDims = {20, 20}; - -// nccl op common tester, init communicator. -class NCCLTester : public ::testing::Test { - public: - void SetUp() override { - int count = p::GetGPUDeviceCount(); - if (count <= 0) { - LOG(WARNING) << "Cannot test gpu nccl, because the CUDA device count is " - << count; - exit(0); - } - for (int i = 0; i < count; ++i) { - gpu_list_.emplace_back(i); - } - - p::CPUPlace cpu_place; - f::InitDevices(); - pool_ptr_ = &p::DeviceContextPool::Instance(); - - NCCLInitOp(); - } - - void NCCLInitOp() { - paddle::platform::CPUPlace cpu_place; - std::unique_ptr op1(new f::OpDesc); - - op1->SetType("ncclInit"); - op1->SetInput("parallel_scopes", {"p_scopes"}); - op1->SetOutput("Communicator", {"comm"}); - - auto *var = g_scope_.Var("comm"); - var->GetMutable(); - - auto *scope_var = g_scope_.Var("p_scopes"); - auto *p_scopes = scope_var->GetMutable>(); - (*p_scopes).resize(gpu_list_.size()); - - auto op = f::OpRegistry::CreateOp(*op1); - VLOG(1) << "invoke NCCLInitOp."; - op->Run(g_scope_, cpu_place); - VLOG(1) << "NCCLInitOp finished."; - } - - int GetGPUData(int gpu_id) { return gpu_id + 42; } - - template - void PerThreadProgram(int gpu_id, const f::OpDesc &op_desc, f::Scope *scope) { - std::unique_lock lk(mu_); - const f::OpDesc *op1 = &op_desc; - - p::CUDAPlace place(gpu_id); - const auto &ctx = pool_ptr_->Get(place); - - auto *send_tensor = scope->Var("st")->GetMutable(); - auto *recv_tensor = scope->Var("rt")->GetMutable(); - - if (!send_tensor->numel()) { - send_tensor->mutable_data(kDims, place); - - std::vector send_vector(common::product(kDims), GetGPUData(gpu_id)); - paddle::framework::TensorFromVector(send_vector, *ctx, send_tensor); - VLOG(1) << "Send Tensor filled with elements " << send_tensor->numel(); - } - - lk.unlock(); - - PADDLE_ENFORCE_EQ( - send_tensor->numel(), - common::product(kDims), - paddle::platform::errors::InvalidArgument("Tensor numel not match!")); - - auto op = f::OpRegistry::CreateOp(*op1); - - VLOG(1) << "Device : " << gpu_id << " invoke " << op_desc.Type(); - VLOG(1) << " send_tensor : " << send_tensor->numel() - << " recv_tensor : " << recv_tensor->numel(); - op->Run(*scope, place); - VLOG(1) << "Device : " << gpu_id << " finished " << op_desc.Type(); - } - - void testNcclReduceOp(); - void testNcclAllReduceOp(); - void testNcclBcastOp(); - - public: - p::DeviceContextPool *pool_ptr_; - f::Scope g_scope_; - std::mutex mu_; - std::vector gpu_list_; -}; - -void NCCLTester::testNcclAllReduceOp() { - std::unique_ptr op2(new f::OpDesc); - op2->SetType("ncclAllReduce"); - op2->SetInput("X", {"st"}); - op2->SetInput("Communicator", {"comm"}); - op2->SetOutput("Out", {"rt"}); - - std::vector dev_scopes; - - std::vector ths; - - for (size_t i = 0; i < gpu_list_.size(); ++i) { - dev_scopes.emplace_back(&g_scope_.NewScope()); - std::thread th(&NCCLTester::PerThreadProgram, - this, - gpu_list_[i], - *op2.get(), - dev_scopes[i]); - ths.emplace_back(std::move(th)); - } - - for (size_t i = 0; i < gpu_list_.size(); ++i) { - ths[i].join(); - } - - float expected_result = 0.0; - for (int gpu_id : gpu_list_) { - expected_result = expected_result + GetGPUData(gpu_id); - } - - for (size_t i = 0; i < dev_scopes.size(); ++i) { - p::CPUPlace cpu_place; - p::CUDAPlace gpu_place(gpu_list_[i]); - - auto &recv_tensor = dev_scopes[i]->FindVar("rt")->Get(); - auto *rt = recv_tensor.data(); - auto *result_tensor = - dev_scopes[i]->Var("ct")->GetMutable(); - result_tensor->Resize(kDims); - auto *ct = result_tensor->mutable_data(cpu_place); - - auto *dev_ctx = static_cast(pool_ptr_->Get(gpu_place)); - paddle::memory::Copy(cpu_place, - ct, - p::CUDAPlace(gpu_list_[i]), - rt, - recv_tensor.numel() * sizeof(float), - dev_ctx->stream()); - dev_ctx->Wait(); - - for (int64_t j = 0; j < common::product(kDims); ++j) { - ASSERT_NEAR(ct[j], expected_result, 1e-5); - } - } -} - -void NCCLTester::testNcclReduceOp() { - std::unique_ptr op2(new f::OpDesc); - const int kRoot = 0; - op2->SetType("ncclReduce"); - op2->SetInput("X", {"st"}); - op2->SetInput("Communicator", {"comm"}); - op2->SetOutput("Out", {"rt"}); - op2->SetAttr("root", kRoot); - - std::vector dev_scopes; - - std::vector ths; - - for (size_t i = 0; i < gpu_list_.size(); ++i) { - dev_scopes.emplace_back(&g_scope_.NewScope()); - std::thread th(&NCCLTester::PerThreadProgram, - this, - gpu_list_[i], - *op2.get(), - dev_scopes[i]); - ths.emplace_back(std::move(th)); - } - - for (size_t i = 0; i < gpu_list_.size(); ++i) { - ths[i].join(); - } - - float expected_result = 0.0; - for (int gpu_id : gpu_list_) { - expected_result = expected_result + GetGPUData(gpu_id); - } - - p::CPUPlace cpu_place; - p::CUDAPlace gpu_place(gpu_list_[kRoot]); - - auto &recv_tensor = dev_scopes[kRoot]->FindVar("rt")->Get(); - auto *rt = recv_tensor.data(); - auto *result_tensor = - dev_scopes[kRoot]->Var("ct")->GetMutable(); - result_tensor->Resize(kDims); - auto *ct = result_tensor->mutable_data(cpu_place); - - auto *dev_ctx = static_cast(pool_ptr_->Get(gpu_place)); - paddle::memory::Copy(cpu_place, - ct, - p::CUDAPlace(gpu_list_[kRoot]), - rt, - recv_tensor.numel() * sizeof(float), - dev_ctx->stream()); - dev_ctx->Wait(); - - for (int64_t j = 0; j < common::product(kDims); ++j) { - ASSERT_NEAR(ct[j], expected_result, 1e-5); - } -} - -void NCCLTester::testNcclBcastOp() { - std::unique_ptr op2(new f::OpDesc); - const int kRoot = 0; - op2->SetType("ncclBcast"); - op2->SetInput("X", {"st"}); - op2->SetInput("Communicator", {"comm"}); - op2->SetOutput("Out", {"rt"}); - op2->SetAttr("root", kRoot); - - std::vector dev_scopes; - - std::vector ths; - - for (size_t i = 0; i < gpu_list_.size(); ++i) { - dev_scopes.emplace_back(&g_scope_.NewScope()); - std::thread th(&NCCLTester::PerThreadProgram, - this, - gpu_list_[i], - *op2.get(), - dev_scopes[i]); - ths.emplace_back(std::move(th)); - } - - for (size_t i = 0; i < gpu_list_.size(); ++i) { - ths[i].join(); - } - - const int idx = gpu_list_.size() - 1; - float result = GetGPUData(kRoot); - - p::CPUPlace cpu_place; - p::CUDAPlace gpu_place(gpu_list_[idx]); - - std::string rt_str = "rt"; - if (idx == kRoot) { - rt_str = "st"; - } - auto &recv_tensor = dev_scopes[idx]->FindVar(rt_str)->Get(); - auto *rt = recv_tensor.data(); - auto *result_tensor = - dev_scopes[idx]->Var("ct")->GetMutable(); - result_tensor->Resize(kDims); - auto *ct = result_tensor->mutable_data(cpu_place); - - auto *dev_ctx = static_cast(pool_ptr_->Get(gpu_place)); - paddle::memory::Copy(cpu_place, - ct, - p::CUDAPlace(gpu_list_[idx]), - rt, - recv_tensor.numel() * sizeof(float), - dev_ctx->stream()); - dev_ctx->Wait(); - - for (int64_t j = 0; j < common::product(kDims); ++j) { - ASSERT_NEAR(ct[j], result, 1e-5); - } -} - -// ncclInitOp with desc -TEST_F(NCCLTester, ncclInitOp) {} - -TEST_F(NCCLTester, ncclOp) { - // Serial execution is required for the same nccl comm. - - testNcclReduceOp(); - - testNcclAllReduceOp(); - - testNcclBcastOp(); -} diff --git a/test/cpp/inference/api/CMakeLists.txt b/test/cpp/inference/api/CMakeLists.txt index 14392a60feaf7..14052463f2b0c 100644 --- a/test/cpp/inference/api/CMakeLists.txt +++ b/test/cpp/inference/api/CMakeLists.txt @@ -772,14 +772,6 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) inference_analysis_api_test_build(${INT8_OBJ_DETECT_TEST_APP} ${INT8_OBJ_DETECT_TEST_APP_SRC}) - # mobilenet-ssd int8 - set(INT8_MOBILENET_SSD_MODEL_DIR "${INT8_DATA_DIR}/mobilenet-ssd") - download_int8_data_without_verify(${INT8_MOBILENET_SSD_MODEL_DIR} - "mobilenet_ssd_int8_model.tar.gz") - inference_analysis_api_object_dection_int8_test_run( - test_analyzer_int8_mobilenet_ssd ${INT8_OBJ_DETECT_TEST_APP} - ${INT8_MOBILENET_SSD_MODEL_DIR} ${PASCALVOC_DATA_PATH}) - ### Lexcial analysis GRU model set(GRU_PATH "${INFERENCE_DEMO_INSTALL_DIR}/gru") download_gru_data_without_verify("${GRU_PATH}" "GRU_eval_data.tar.gz") @@ -1328,8 +1320,6 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) if(WITH_ONEDNN) set_tests_properties(test_analyzer_int8_resnet50 PROPERTIES TIMEOUT 120) - set_tests_properties(test_analyzer_int8_mobilenet_ssd PROPERTIES TIMEOUT - 120) set_tests_properties(test_analyzer_quant_performance_benchmark PROPERTIES TIMEOUT 120) set_tests_properties(test_analyzer_int8_mobilenetv2 PROPERTIES TIMEOUT 120) diff --git a/test/ir/CMakeLists.txt b/test/ir/CMakeLists.txt index 134783e11c35d..44af6ff2518a0 100644 --- a/test/ir/CMakeLists.txt +++ b/test/ir/CMakeLists.txt @@ -18,7 +18,6 @@ else() set_tests_properties(${target} PROPERTIES LABELS "RUN_TYPE=INFER") endforeach() add_subdirectory(pir) - set_tests_properties(test_fuse_resnet_unit PROPERTIES TIMEOUT 120) set_tests_properties(test_convert_to_mixed_precision PROPERTIES TIMEOUT 300) endif() diff --git a/test/ir/test_fuse_resnet_unit.py b/test/ir/test_fuse_resnet_unit.py deleted file mode 100644 index 7e5885116e087..0000000000000 --- a/test/ir/test_fuse_resnet_unit.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np - -import paddle -import paddle.incubate -from paddle.base import core - -paddle.enable_static() -np.random.seed(0) - - -@unittest.skipIf( - not paddle.is_compiled_with_cuda() - or paddle.get_cudnn_version() < 8000 - or paddle.device.cuda.get_device_capability()[0] < 7 - or paddle.device.cuda.get_device_capability()[0] >= 9, - "only support with cuda and cudnn version is at least 8.0 " - "and device's compute capability is at least 7.0 and less than 9.0", -) -class TestFuseResNetUnit(unittest.TestCase): - def test_fuse_resnet_unit(self): - place = paddle.CUDAPlace(0) - program = paddle.static.Program() - startup_program = paddle.static.Program() - with paddle.static.amp.fp16_guard(): - with paddle.static.program_guard(program, startup_program): - x = paddle.static.data("x", [1, 64, 64, 8], dtype="float16") - conv2d = paddle.nn.Conv2D( - 8, 32, 1, bias_attr=False, data_format='NHWC' - ) - batch_norm = paddle.nn.BatchNorm( - 32, act='relu', data_layout='NHWC' - ) - out = batch_norm(conv2d(x)) - graph = core.Graph(program.desc) - core.get_pass("fuse_resnet_unit").apply(graph) - after_program = paddle.base.framework.IrGraph(graph).to_program() - params = paddle.static.amp.cast_model_to_fp16(program) - after_params = paddle.static.amp.cast_model_to_fp16(after_program) - exe = paddle.static.Executor(place) - exe.run(startup_program) - paddle.static.amp.cast_parameters_to_fp16( - place, program, to_fp16_var_names=params - ) - paddle.static.amp.cast_parameters_to_fp16( - place, after_program, to_fp16_var_names=after_params - ) - feed = {"x": np.random.randn(1, 64, 64, 8).astype("float16")} - before_out = exe.run(program, feed=feed, fetch_list=[out.name]) - after_out = exe.run(after_program, feed=feed, fetch_list=[out.name]) - np.testing.assert_allclose( - before_out[0], after_out[0], rtol=1e-05, atol=0.005 - ) - - -if __name__ == '__main__': - unittest.main() diff --git a/test/legacy_test/CMakeLists.txt b/test/legacy_test/CMakeLists.txt index 63d84ece4aa98..a0ab822524de4 100644 --- a/test/legacy_test/CMakeLists.txt +++ b/test/legacy_test/CMakeLists.txt @@ -191,8 +191,6 @@ endif() if((NOT WITH_GPU) AND (NOT WITH_ROCM)) list(REMOVE_ITEM TEST_OPS test_fused_conv2d_add_act_op) - list(REMOVE_ITEM TEST_OPS test_rank_attention_op) - # TODO(shenliang03): rank_attention_op support CPU device in future list(REMOVE_ITEM TEST_OPS test_batch_fc_op) # TODO(shenliang03): batch_fc_op support CPU device in future # TODO(Yancey1989): parallel dygraph support CPU device in future @@ -251,7 +249,6 @@ if(APPLE) ) # this op is not support on mac list(REMOVE_ITEM TEST_OPS test_fusion_seqexpand_concat_fc_op) - list(REMOVE_ITEM TEST_OPS test_detection_map_op) list(REMOVE_ITEM TEST_OPS test_fuse_elewise_add_act_pass) endif() if(NOT WITH_MKLML) @@ -525,7 +522,6 @@ set(TEST_OPS_WITH_GC test_fill_zeros_like2_op test_gather_op test_gather_nd_op - test_linear_chain_crf_op test_lod_reset_op test_lookup_table_op test_mean_op @@ -1187,9 +1183,6 @@ if((WITH_ROCM OR WITH_GPU) AND NOT WIN32) set_tests_properties(test_pipeline_parallel PROPERTIES TIMEOUT 120) set_tests_properties(test_fleet_perf_test PROPERTIES TIMEOUT 120) endif() -if(WITH_GPU OR WITH_ROCM) - set_tests_properties(test_rank_attention_op PROPERTIES TIMEOUT 120) -endif() if(WITH_GPU AND NOT WIN32) set_tests_properties(test_fused_multi_transformer_int8_op PROPERTIES TIMEOUT 60) diff --git a/test/legacy_test/test_ctc_align.py b/test/legacy_test/test_ctc_align.py deleted file mode 100644 index 699b176518be1..0000000000000 --- a/test/legacy_test/test_ctc_align.py +++ /dev/null @@ -1,232 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from op_test import OpTest - -import paddle - - -def CTCAlign(input, lod, blank, merge_repeated, padding=0, input_length=None): - if input_length is None: - lod0 = lod[0] - result = [] - cur_offset = 0 - for i in range(len(lod0)): - prev_token = -1 - for j in range(cur_offset, cur_offset + lod0[i]): - token = input[j][0] - if (token != blank) and not ( - merge_repeated and token == prev_token - ): - result.append(token) - prev_token = token - cur_offset += lod0[i] - result = np.array(result).reshape([len(result), 1]).astype("int32") - if len(result) == 0: - result = np.array([[-1]]) - return result - else: - result = [[] for i in range(len(input))] - output_length = [] - for i in range(len(input)): - prev_token = -1 - for j in range(input_length[i][0]): - token = input[i][j] - if (token != blank) and not ( - merge_repeated and token == prev_token - ): - result[i].append(token) - prev_token = token - start = len(result[i]) - output_length.append([start]) - for j in range(start, len(input[i])): - result[i].append(padding) - result = ( - np.array(result) - .reshape([len(input), len(input[0])]) - .astype("int32") - ) - output_length = ( - np.array(output_length).reshape([len(input), 1]).astype("int32") - ) - - return result, output_length - - -class TestCTCAlignOp(OpTest): - def config(self): - self.op_type = "ctc_align" - self.input_lod = [[11, 7]] - self.blank = 0 - self.merge_repeated = False - self.input = ( - np.array([0, 1, 2, 2, 0, 4, 0, 4, 5, 0, 6, 6, 0, 0, 7, 7, 7, 0]) - .reshape([18, 1]) - .astype("int32") - ) - - def setUp(self): - self.config() - output = CTCAlign( - self.input, self.input_lod, self.blank, self.merge_repeated - ) - - self.inputs = { - "Input": (self.input, self.input_lod), - } - self.outputs = {"Output": output} - self.attrs = { - "blank": self.blank, - "merge_repeated": self.merge_repeated, - } - - def test_check_output(self): - # NODE(yjjiang11): This op will be deprecated. - self.check_output(check_dygraph=False) - - -class TestCTCAlignOpCase1(TestCTCAlignOp): - def config(self): - self.op_type = "ctc_align" - self.input_lod = [[11, 8]] - self.blank = 0 - self.merge_repeated = True - self.input = ( - np.array([0, 1, 2, 2, 0, 4, 0, 4, 5, 0, 6, 6, 0, 0, 7, 7, 7, 0, 0]) - .reshape([19, 1]) - .astype("int32") - ) - - -class TestCTCAlignOpCase2(TestCTCAlignOp): - def config(self): - self.op_type = "ctc_align" - self.input_lod = [[4]] - self.blank = 0 - self.merge_repeated = True - self.input = np.array([0, 0, 0, 0]).reshape([4, 1]).astype("int32") - - -class TestCTCAlignPaddingOp(OpTest): - def config(self): - self.op_type = "ctc_align" - self.input_lod = [] - self.blank = 0 - self.padding_value = 0 - self.merge_repeated = True - self.input = ( - np.array( - [ - [0, 2, 4, 4, 0, 6, 3, 6, 6, 0, 0], - [1, 1, 3, 0, 0, 4, 5, 6, 0, 0, 0], - ] - ) - .reshape([2, 11]) - .astype("int32") - ) - self.input_length = np.array([[9], [8]]).reshape([2, 1]).astype("int32") - - def setUp(self): - self.config() - output, output_length = CTCAlign( - self.input, - self.input_lod, - self.blank, - self.merge_repeated, - self.padding_value, - self.input_length, - ) - self.inputs = { - "Input": (self.input, self.input_lod), - "InputLength": self.input_length, - } - self.outputs = {"Output": output, "OutputLength": output_length} - self.attrs = { - "blank": self.blank, - "merge_repeated": self.merge_repeated, - "padding_value": self.padding_value, - } - - def test_check_output(self): - # NODE(yjjiang11): This op will be deprecated. - self.check_output(check_dygraph=False) - - -class TestCTCAlignOpCase3(TestCTCAlignPaddingOp): - def config(self): - self.op_type = "ctc_align" - self.blank = 0 - self.input_lod = [] - self.merge_repeated = True - self.padding_value = 0 - self.input = ( - np.array( - [[0, 1, 2, 2, 0, 4], [0, 4, 5, 0, 6, 0], [0, 7, 7, 7, 0, 0]] - ) - .reshape([3, 6]) - .astype("int32") - ) - self.input_length = ( - np.array([[6], [5], [4]]).reshape([3, 1]).astype("int32") - ) - - -class TestCTCAlignOpCase4(TestCTCAlignPaddingOp): - ''' - # test tensor input which has attr input padding_value - ''' - - def config(self): - self.op_type = "ctc_align" - self.blank = 0 - self.input_lod = [] - self.merge_repeated = False - self.padding_value = 0 - self.input = ( - np.array( - [[0, 1, 2, 2, 0, 4], [0, 4, 5, 0, 6, 0], [0, 7, 7, 7, 0, 0]] - ) - .reshape([3, 6]) - .astype("int32") - ) - self.input_length = ( - np.array([[6], [5], [4]]).reshape([3, 1]).astype("int32") - ) - - -class TestCTCAlignOpCase5(TestCTCAlignPaddingOp): - def config(self): - self.op_type = "ctc_align" - self.blank = 0 - self.input_lod = [] - self.merge_repeated = False - self.padding_value = 1 - self.input = ( - np.array( - [[0, 1, 2, 2, 0, 4], [0, 4, 5, 0, 6, 0], [0, 7, 1, 7, 0, 0]] - ) - .reshape([3, 6]) - .astype("int32") - ) - self.input_length = ( - np.array([[6], [5], [4]]).reshape([3, 1]).astype("int32") - ) - - -if __name__ == "__main__": - paddle.enable_static() - unittest.main() diff --git a/test/legacy_test/test_detection_map_op.py b/test/legacy_test/test_detection_map_op.py deleted file mode 100644 index 376b9876cd46a..0000000000000 --- a/test/legacy_test/test_detection_map_op.py +++ /dev/null @@ -1,360 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import collections -import math -import unittest - -import numpy as np -from op_test import OpTest - - -class TestDetectionMAPOp(OpTest): - def set_data(self): - self.class_num = 4 - self.init_test_case() - self.mAP = [self.calc_map(self.tf_pos, self.tf_pos_lod)] - self.label = np.array(self.label).astype('float32') - self.detect = np.array(self.detect).astype('float32') - self.mAP = np.array(self.mAP).astype('float32') - - if len(self.class_pos_count) > 0: - self.class_pos_count = np.array(self.class_pos_count).astype( - 'int32' - ) - self.true_pos = np.array(self.true_pos).astype('float32') - self.false_pos = np.array(self.false_pos).astype('float32') - self.has_state = np.array([1]).astype('int32') - - self.inputs = { - 'Label': (self.label, self.label_lod), - 'DetectRes': (self.detect, self.detect_lod), - 'HasState': self.has_state, - 'PosCount': self.class_pos_count, - 'TruePos': (self.true_pos, self.true_pos_lod), - 'FalsePos': (self.false_pos, self.false_pos_lod), - } - else: - self.inputs = { - 'Label': (self.label, self.label_lod), - 'DetectRes': (self.detect, self.detect_lod), - } - - self.attrs = { - 'overlap_threshold': self.overlap_threshold, - 'evaluate_difficult': self.evaluate_difficult, - 'ap_type': self.ap_type, - 'class_num': self.class_num, - } - - self.out_class_pos_count = np.array(self.out_class_pos_count).astype( - 'int' - ) - self.out_true_pos = np.array(self.out_true_pos).astype('float32') - self.out_false_pos = np.array(self.out_false_pos).astype('float32') - - self.outputs = { - 'MAP': self.mAP, - 'AccumPosCount': self.out_class_pos_count, - 'AccumTruePos': (self.out_true_pos, self.out_true_pos_lod), - 'AccumFalsePos': (self.out_false_pos, self.out_false_pos_lod), - } - - def init_test_case(self): - self.overlap_threshold = 0.3 - self.evaluate_difficult = True - self.ap_type = "integral" - - self.label_lod = [[2, 2]] - # label difficult xmin ymin xmax ymax - self.label = [ - [1, 0, 0.1, 0.1, 0.3, 0.3], - [1, 1, 0.6, 0.6, 0.8, 0.8], - [2, 0, 0.3, 0.3, 0.6, 0.5], - [1, 0, 0.7, 0.1, 0.9, 0.3], - ] - - # label score xmin ymin xmax ymax difficult - self.detect_lod = [[3, 4]] - self.detect = [ - [1, 0.3, 0.1, 0.0, 0.4, 0.3], - [1, 0.7, 0.0, 0.1, 0.2, 0.3], - [1, 0.9, 0.7, 0.6, 0.8, 0.8], - [2, 0.8, 0.2, 0.1, 0.4, 0.4], - [2, 0.1, 0.4, 0.3, 0.7, 0.5], - [1, 0.2, 0.8, 0.1, 1.0, 0.3], - [3, 0.2, 0.8, 0.1, 1.0, 0.3], - ] - - # label score true_pos false_pos - self.tf_pos_lod = [[3, 4]] - self.tf_pos = [ - [1, 0.9, 1, 0], - [1, 0.7, 1, 0], - [1, 0.3, 0, 1], - [1, 0.2, 1, 0], - [2, 0.8, 0, 1], - [2, 0.1, 1, 0], - [3, 0.2, 0, 1], - ] - - self.class_pos_count = [] - self.true_pos_lod = [[]] - self.true_pos = [[]] - self.false_pos_lod = [[]] - self.false_pos = [[]] - - def calc_map(self, tf_pos, tf_pos_lod): - mAP = 0.0 - count = 0 - - def get_input_pos( - class_pos_count, true_pos, true_pos_lod, false_pos, false_pos_lod - ): - class_pos_count_dict = collections.Counter() - true_pos_dict = collections.defaultdict(list) - false_pos_dict = collections.defaultdict(list) - for i, count in enumerate(class_pos_count): - class_pos_count_dict[i] = count - - cur_pos = 0 - for i in range(len(true_pos_lod[0])): - start = cur_pos - cur_pos += true_pos_lod[0][i] - end = cur_pos - for j in range(start, end): - true_pos_dict[i].append(true_pos[j]) - - cur_pos = 0 - for i in range(len(false_pos_lod[0])): - start = cur_pos - cur_pos += false_pos_lod[0][i] - end = cur_pos - for j in range(start, end): - false_pos_dict[i].append(false_pos[j]) - - return class_pos_count_dict, true_pos_dict, false_pos_dict - - def get_output_pos(label_count, true_pos, false_pos): - label_number = self.class_num - - out_class_pos_count = [] - out_true_pos_lod = [] - out_true_pos = [] - out_false_pos_lod = [] - out_false_pos = [] - - for i in range(label_number): - out_class_pos_count.append([label_count[i]]) - true_pos_list = true_pos[i] - out_true_pos += true_pos_list - out_true_pos_lod.append(len(true_pos_list)) - false_pos_list = false_pos[i] - out_false_pos += false_pos_list - out_false_pos_lod.append(len(false_pos_list)) - - return ( - out_class_pos_count, - out_true_pos, - [out_true_pos_lod], - out_false_pos, - [out_false_pos_lod], - ) - - def get_accumulation(pos_list): - sorted_list = sorted(pos_list, key=lambda pos: pos[0], reverse=True) - sum = 0 - accu_list = [] - for score, count in sorted_list: - sum += count - accu_list.append(sum) - return accu_list - - label_count, true_pos, false_pos = get_input_pos( - self.class_pos_count, - self.true_pos, - self.true_pos_lod, - self.false_pos, - self.false_pos_lod, - ) - for v in self.label: - label = v[0] - difficult = False if len(v) == 5 else v[1] - if self.evaluate_difficult: - label_count[label] += 1 - elif not difficult: - label_count[label] += 1 - - for label, score, tp, fp in tf_pos: - true_pos[label].append([score, tp]) - false_pos[label].append([score, fp]) - - for label, label_pos_num in label_count.items(): - if label_pos_num == 0: - continue - if label not in true_pos: - count += 1 - continue - label_true_pos = true_pos[label] - label_false_pos = false_pos[label] - - accu_tp_sum = get_accumulation(label_true_pos) - accu_fp_sum = get_accumulation(label_false_pos) - - precision = [] - recall = [] - - for i in range(len(accu_tp_sum)): - precision.append( - float(accu_tp_sum[i]) - / float(accu_tp_sum[i] + accu_fp_sum[i]) - ) - recall.append(float(accu_tp_sum[i]) / label_pos_num) - - if self.ap_type == "11point": - max_precisions = [0.0] * 11 - start_idx = len(accu_tp_sum) - 1 - for j in range(10, -1, -1): - for i in range(start_idx, -1, -1): - if recall[i] < float(j) / 10.0: - start_idx = i - if j > 0: - max_precisions[j - 1] = max_precisions[j] - break - else: - if max_precisions[j] < precision[i]: - max_precisions[j] = precision[i] - for j in range(10, -1, -1): - mAP += max_precisions[j] / 11 - count += 1 - elif self.ap_type == "integral": - average_precisions = 0.0 - prev_recall = 0.0 - for i in range(len(accu_tp_sum)): - if math.fabs(recall[i] - prev_recall) > 1e-6: - average_precisions += precision[i] * math.fabs( - recall[i] - prev_recall - ) - prev_recall = recall[i] - - mAP += average_precisions - count += 1 - pcnt, tp, tp_lod, fp, fp_lod = get_output_pos( - label_count, true_pos, false_pos - ) - self.out_class_pos_count = pcnt - self.out_true_pos = tp - self.out_true_pos_lod = tp_lod - self.out_false_pos = fp - self.out_false_pos_lod = fp_lod - if count != 0: - mAP /= count - return mAP - - def setUp(self): - self.op_type = "detection_map" - self.set_data() - - def test_check_output(self): - # NODE(yjjiang11): This op will be deprecated. - self.check_output(check_dygraph=False) - - -class TestDetectionMAPOpSkipDiff(TestDetectionMAPOp): - def init_test_case(self): - super().init_test_case() - - self.evaluate_difficult = False - - self.tf_pos_lod = [[2, 4]] - # label score true_pos false_pos - self.tf_pos = [ - [1, 0.7, 1, 0], - [1, 0.3, 0, 1], - [1, 0.2, 1, 0], - [2, 0.8, 0, 1], - [2, 0.1, 1, 0], - [3, 0.2, 0, 1], - ] - - -class TestDetectionMAPOpWithoutDiff(TestDetectionMAPOp): - def init_test_case(self): - super().init_test_case() - - # label xmin ymin xmax ymax - self.label = [ - [1, 0.1, 0.1, 0.3, 0.3], - [1, 0.6, 0.6, 0.8, 0.8], - [2, 0.3, 0.3, 0.6, 0.5], - [1, 0.7, 0.1, 0.9, 0.3], - ] - - -class TestDetectionMAPOp11Point(TestDetectionMAPOp): - def init_test_case(self): - super().init_test_case() - - self.ap_type = "11point" - - -class TestDetectionMAPOpMultiBatch(TestDetectionMAPOp): - def init_test_case(self): - super().init_test_case() - self.class_pos_count = [0, 2, 1, 0] - self.true_pos_lod = [[0, 3, 2]] - self.true_pos = [ - [0.7, 1.0], - [0.3, 0.0], - [0.2, 1.0], - [0.8, 0.0], - [0.1, 1.0], - ] - self.false_pos_lod = [[0, 3, 2]] - self.false_pos = [ - [0.7, 0.0], - [0.3, 1.0], - [0.2, 0.0], - [0.8, 1.0], - [0.1, 0.0], - ] - - -class TestDetectionMAPOp11PointWithClassNoTP(TestDetectionMAPOp): - def init_test_case(self): - self.overlap_threshold = 0.3 - self.evaluate_difficult = True - self.ap_type = "11point" - - self.label_lod = [[2]] - # label difficult xmin ymin xmax ymax - self.label = [[2, 0, 0.3, 0.3, 0.6, 0.5], [1, 0, 0.7, 0.1, 0.9, 0.3]] - - # label score xmin ymin xmax ymax difficult - self.detect_lod = [[1]] - self.detect = [[1, 0.2, 0.8, 0.1, 1.0, 0.3]] - - # label score true_pos false_pos - self.tf_pos_lod = [[3, 4]] - self.tf_pos = [[1, 0.2, 1, 0]] - - self.class_pos_count = [] - self.true_pos_lod = [[]] - self.true_pos = [[]] - self.false_pos_lod = [[]] - self.false_pos = [[]] - - -if __name__ == '__main__': - unittest.main() diff --git a/test/legacy_test/test_generate_mask_labels_op.py b/test/legacy_test/test_generate_mask_labels_op.py deleted file mode 100644 index 86ab3cb088879..0000000000000 --- a/test/legacy_test/test_generate_mask_labels_op.py +++ /dev/null @@ -1,317 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -import unittest - -import numpy as np - -''' -# Equivalent code -rles = mask_util.frPyObjects([segm], im_h, im_w) -mask = mask_util.decode(rles) -''' - - -def decode(cnts, m): - v = 0 - mask = [] - for j in range(m): - for k in range(cnts[j]): - mask.append(v) - v = 1 - v - return mask - - -def poly2mask(xy, k, h, w): - scale = 5.0 - x = [int(scale * p + 0.5) for p in xy[::2]] - x = x + [x[0]] - y = [int(scale * p + 0.5) for p in xy[1::2]] - y = y + [y[0]] - m = sum( - [ - int(max(abs(x[j] - x[j + 1]), abs(y[j] - y[j + 1]))) + 1 - for j in range(k) - ] - ) - - u, v = [], [] - for j in range(k): - xs = x[j] - xe = x[j + 1] - ys = y[j] - ye = y[j + 1] - dx = abs(xe - xs) - dy = abs(ys - ye) - flip = (dx >= dy and xs > xe) or (dx < dy and ys > ye) - if flip: - xs, xe = xe, xs - ys, ye = ye, ys - - if dx >= dy: - if dx == 0: - assert ye - ys == 0 - s = 0 if dx == 0 else float(ye - ys) / dx - else: - if dy == 0: - assert xe - xs == 0 - s = 0 if dy == 0 else float(xe - xs) / dy - - if dx >= dy: - ts = [dx - d if flip else d for d in range(dx + 1)] - u.extend([xs + t for t in ts]) - v.extend([int(ys + s * t + 0.5) for t in ts]) - else: - ts = [dy - d if flip else d for d in range(dy + 1)] - v.extend([t + ys for t in ts]) - u.extend([int(xs + s * t + 0.5) for t in ts]) - - k = len(u) - x = np.zeros((k), np.int_) - y = np.zeros((k), np.int_) - m = 0 - for j in range(1, k): - if u[j] != u[j - 1]: - xd = float(u[j] if (u[j] < u[j - 1]) else (u[j] - 1)) - xd = (xd + 0.5) / scale - 0.5 - if math.floor(xd) != xd or xd < 0 or xd > (w - 1): - continue - yd = float(v[j] if v[j] < v[j - 1] else v[j - 1]) - yd = (yd + 0.5) / scale - 0.5 - yd = math.ceil(0 if yd < 0 else (h if yd > h else yd)) - x[m] = int(xd) - y[m] = int(yd) - m += 1 - k = m - a = [int(x[i] * h + y[i]) for i in range(k)] - a.append(h * w) - a.sort() - b = [0] + a[: len(a) - 1] - a = [c - d for (c, d) in zip(a, b)] - - k += 1 - b = [0 for i in range(k)] - b[0] = a[0] - m, j = 1, 1 - while j < k: - if a[j] > 0: - b[m] = a[j] - m += 1 - j += 1 - else: - j += 1 - if j < k: - b[m - 1] += a[j] - j += 1 - mask = decode(b, m) - mask = np.array(mask, dtype=np.int_).reshape((w, h)) - mask = mask.transpose((1, 0)) - return mask - - -def polys_to_boxes(polys): - """Convert a list of polygons into an array of tight bounding boxes.""" - boxes_from_polys = np.zeros((len(polys), 4), dtype=np.float32) - for i in range(len(polys)): - poly = polys[i] - x0 = min(min(p[::2]) for p in poly) - x1 = max(max(p[::2]) for p in poly) - y0 = min(min(p[1::2]) for p in poly) - y1 = max(max(p[1::2]) for p in poly) - boxes_from_polys[i, :] = [x0, y0, x1, y1] - return boxes_from_polys - - -def bbox_overlaps(boxes, query_boxes): - N = boxes.shape[0] - K = query_boxes.shape[0] - overlaps = np.zeros((N, K), dtype=boxes.dtype) - for k in range(K): - box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) * ( - query_boxes[k, 3] - query_boxes[k, 1] + 1 - ) - for n in range(N): - iw = ( - min(boxes[n, 2], query_boxes[k, 2]) - - max(boxes[n, 0], query_boxes[k, 0]) - + 1 - ) - if iw > 0: - ih = ( - min(boxes[n, 3], query_boxes[k, 3]) - - max(boxes[n, 1], query_boxes[k, 1]) - + 1 - ) - if ih > 0: - ua = float( - (boxes[n, 2] - boxes[n, 0] + 1) - * (boxes[n, 3] - boxes[n, 1] + 1) - + box_area - - iw * ih - ) - overlaps[n, k] = iw * ih / ua - return overlaps - - -def polys_to_mask_wrt_box(polygons, box, M): - """Convert from the COCO polygon segmentation format to a binary mask - encoded as a 2D array of data type numpy.float32. The polygon segmentation - is understood to be enclosed in the given box and rasterized to an M x M - mask. The resulting mask is therefore of shape (M, M). - """ - w = box[2] - box[0] - h = box[3] - box[1] - - w = np.maximum(w, 1) - h = np.maximum(h, 1) - - polygons_norm = [] - for poly in polygons: - p = np.array(poly, dtype=np.float32) - p[0::2] = (p[0::2] - box[0]) * M / w - p[1::2] = (p[1::2] - box[1]) * M / h - polygons_norm.append(p) - - mask = [] - for polygons in polygons_norm: - assert polygons.shape[0] % 2 == 0 - k = polygons.shape[0] // 2 - mask.append(poly2mask(polygons, k, M, M)) - mask = np.array(mask) - # Flatten in case polygons was a list - mask = np.sum(mask, axis=0) - mask = np.array(mask > 0, dtype=np.float32) - return mask - - -def expand_mask_targets(masks, mask_class_labels, resolution, num_classes): - """Expand masks from shape (#masks, resolution ** 2) - to (#masks, #classes * resolution ** 2) to encode class - specific mask targets. - """ - assert masks.shape[0] == mask_class_labels.shape[0] - - # Target values of -1 are "don't care" / ignore labels - mask_targets = -np.ones( - (masks.shape[0], num_classes * resolution**2), dtype=np.int32 - ) - for i in range(masks.shape[0]): - cls = int(mask_class_labels[i]) - start = resolution**2 * cls - end = start + resolution**2 - # Ignore background instance - # (only happens when there is no fg samples in an image) - if cls > 0: - mask_targets[i, start:end] = masks[i, :] - return mask_targets - - -def generate_mask_labels( - num_classes, - im_info, - gt_classes, - is_crowd, - label_int32, - gt_polys, - resolution, - rois, - roi_lod, - gt_lod, -): - mask_rois = [] - roi_has_mask_int32 = [] - mask_int32 = [] - new_lod = [] - for i in range(len(im_info)): - roi_s = roi_lod[i] - roi_e = roi_lod[i + 1] - gt_s = gt_lod[i] - gt_e = gt_lod[i + 1] - mask_blob = _sample_mask( - num_classes, - im_info[i], - gt_classes[gt_s:gt_e], - is_crowd[gt_s:gt_e], - label_int32[roi_s:roi_e], - gt_polys[i], - resolution, - rois[roi_s:roi_e], - ) - new_lod.append(mask_blob['mask_rois'].shape[0]) - mask_rois.append(mask_blob['mask_rois']) - roi_has_mask_int32.append(mask_blob['roi_has_mask_int32']) - mask_int32.append(mask_blob['mask_int32']) - return mask_rois, roi_has_mask_int32, mask_int32, new_lod - - -def _sample_mask( - num_classes, - im_info, - gt_classes, - is_crowd, - label_int32, - gt_polys, # [[[], []], []] - resolution, - rois, -): - mask_blob = {} - im_scale = im_info[2] - sample_boxes = rois - polys_gt_inds = np.where((gt_classes > 0) & (is_crowd == 0))[0] - polys_gt = [gt_polys[i] for i in polys_gt_inds] - boxes_from_polys = polys_to_boxes(polys_gt) - - fg_inds = np.where(label_int32 > 0)[0] - roi_has_mask = fg_inds.copy() - if fg_inds.shape[0] > 0: - mask_class_labels = label_int32[fg_inds] - masks = np.zeros((fg_inds.shape[0], resolution**2), dtype=np.int32) - rois_fg = sample_boxes[fg_inds] - overlaps_bbfg_bbpolys = bbox_overlaps( - rois_fg.astype(np.float32), boxes_from_polys.astype(np.float32) - ) - fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) - for i in range(rois_fg.shape[0]): - fg_polys_ind = fg_polys_inds[i] - poly_gt = polys_gt[fg_polys_ind] - roi_fg = rois_fg[i] - mask = polys_to_mask_wrt_box(poly_gt, roi_fg, resolution) - mask = np.array(mask > 0, dtype=np.int32) - masks[i, :] = np.reshape(mask, resolution**2) - else: - bg_inds = np.where(label_int32 == 0)[0] - rois_fg = sample_boxes[bg_inds[0]].reshape((1, -1)) - masks = -np.ones((1, resolution**2), dtype=np.int32) - mask_class_labels = np.zeros((1,)) - roi_has_mask = np.append(roi_has_mask, 0) - masks = expand_mask_targets( - masks, mask_class_labels, resolution, num_classes - ) - rois_fg *= im_scale - mask_blob['mask_rois'] = rois_fg - mask_blob['roi_has_mask_int32'] = roi_has_mask - mask_blob['mask_int32'] = masks - return mask_blob - - -def trans_lod(lod): - new_lod = [0] - for i in range(len(lod)): - new_lod.append(lod[i] + new_lod[i]) - return new_lod - - -if __name__ == '__main__': - unittest.main() diff --git a/test/legacy_test/test_generate_proposal_labels_op.py b/test/legacy_test/test_generate_proposal_labels_op.py deleted file mode 100644 index 903201b9856a7..0000000000000 --- a/test/legacy_test/test_generate_proposal_labels_op.py +++ /dev/null @@ -1,553 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from op_test import OpTest - - -def generate_proposal_labels_in_python( - rpn_rois, - gt_classes, - is_crowd, - gt_boxes, - im_info, - batch_size_per_im, - fg_fraction, - fg_thresh, - bg_thresh_hi, - bg_thresh_lo, - bbox_reg_weights, - class_nums, - use_random, - is_cls_agnostic, - is_cascade_rcnn, - max_overlaps=None, -): - rois = [] - labels_int32 = [] - bbox_targets = [] - bbox_inside_weights = [] - bbox_outside_weights = [] - max_overlap_with_gt = [] - lod = [] - assert len(rpn_rois) == len( - im_info - ), 'batch size of rpn_rois and ground_truth is not matched' - - for im_i in range(len(im_info)): - max_overlap = max_overlaps[im_i] if is_cascade_rcnn else None - frcn_blobs = _sample_rois( - rpn_rois[im_i], - gt_classes[im_i], - is_crowd[im_i], - gt_boxes[im_i], - im_info[im_i], - batch_size_per_im, - fg_fraction, - fg_thresh, - bg_thresh_hi, - bg_thresh_lo, - bbox_reg_weights, - class_nums, - use_random, - is_cls_agnostic, - is_cascade_rcnn, - max_overlap, - ) - lod.append(frcn_blobs['rois'].shape[0]) - rois.append(frcn_blobs['rois']) - labels_int32.append(frcn_blobs['labels_int32']) - bbox_targets.append(frcn_blobs['bbox_targets']) - bbox_inside_weights.append(frcn_blobs['bbox_inside_weights']) - bbox_outside_weights.append(frcn_blobs['bbox_outside_weights']) - max_overlap_with_gt.append(frcn_blobs['max_overlap']) - - return ( - rois, - labels_int32, - bbox_targets, - bbox_inside_weights, - bbox_outside_weights, - max_overlap_with_gt, - lod, - ) - - -def filter_roi(rois, max_overlap): - ws = rois[:, 2] - rois[:, 0] + 1 - hs = rois[:, 3] - rois[:, 1] + 1 - keep = np.where((ws > 0) & (hs > 0) & (max_overlap < 1.0))[0] - if len(keep) > 0: - return rois[keep, :] - return np.zeros((1, 4)).astype('float32') - - -def _sample_rois( - rpn_rois, - gt_classes, - is_crowd, - gt_boxes, - im_info, - batch_size_per_im, - fg_fraction, - fg_thresh, - bg_thresh_hi, - bg_thresh_lo, - bbox_reg_weights, - class_nums, - use_random, - is_cls_agnostic, - is_cascade_rcnn, - max_overlap, -): - rois_per_image = int(batch_size_per_im) - fg_rois_per_im = int(np.round(fg_fraction * rois_per_image)) - - # Roidb - im_scale = im_info[2] - inv_im_scale = 1.0 / im_scale - rpn_rois = rpn_rois * inv_im_scale - - if is_cascade_rcnn: - rpn_rois = filter_roi(rpn_rois, max_overlap) - - boxes = np.vstack([gt_boxes, rpn_rois]) - - gt_overlaps = np.zeros((boxes.shape[0], class_nums)) - box_to_gt_ind_map = np.zeros((boxes.shape[0]), dtype=np.int32) - proposal_to_gt_overlaps = _bbox_overlaps(boxes, gt_boxes) - - overlaps_argmax = proposal_to_gt_overlaps.argmax(axis=1) - overlaps_max = proposal_to_gt_overlaps.max(axis=1) - # Boxes which with non-zero overlap with gt boxes - overlapped_boxes_ind = np.where(overlaps_max > 0)[0] - overlapped_boxes_gt_classes = gt_classes[ - overlaps_argmax[overlapped_boxes_ind] - ] - gt_overlaps[ - overlapped_boxes_ind, overlapped_boxes_gt_classes - ] = overlaps_max[overlapped_boxes_ind] - box_to_gt_ind_map[overlapped_boxes_ind] = overlaps_argmax[ - overlapped_boxes_ind - ] - - crowd_ind = np.where(is_crowd)[0] - gt_overlaps[crowd_ind] = -1.0 - max_overlaps = gt_overlaps.max(axis=1) - max_classes = gt_overlaps.argmax(axis=1) - - if is_cascade_rcnn: - # Cascade RCNN Decode Filter - fg_inds = np.where(max_overlaps >= fg_thresh)[0] - bg_inds = np.where( - (max_overlaps < bg_thresh_hi) & (max_overlaps >= bg_thresh_lo) - )[0] - fg_rois_per_this_image = fg_inds.shape[0] - bg_rois_per_this_image = bg_inds.shape[0] - else: - # Foreground - fg_inds = np.where(max_overlaps >= fg_thresh)[0] - fg_rois_per_this_image = np.minimum(fg_rois_per_im, fg_inds.shape[0]) - # Sample foreground if there are too many - if (fg_inds.shape[0] > fg_rois_per_this_image) and use_random: - fg_inds = np.random.choice( - fg_inds, size=fg_rois_per_this_image, replace=False - ) - fg_inds = fg_inds[:fg_rois_per_this_image] - # Background - bg_inds = np.where( - (max_overlaps < bg_thresh_hi) & (max_overlaps >= bg_thresh_lo) - )[0] - bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image - bg_rois_per_this_image = np.minimum( - bg_rois_per_this_image, bg_inds.shape[0] - ) - # Sample background if there are too many - if (bg_inds.shape[0] > bg_rois_per_this_image) and use_random: - bg_inds = np.random.choice( - bg_inds, size=bg_rois_per_this_image, replace=False - ) - bg_inds = bg_inds[:bg_rois_per_this_image] - - keep_inds = np.append(fg_inds, bg_inds) - sampled_labels = max_classes[keep_inds] - sampled_labels[fg_rois_per_this_image:] = 0 - sampled_boxes = boxes[keep_inds] - sampled_max_overlap = max_overlaps[keep_inds] - sampled_gts = gt_boxes[box_to_gt_ind_map[keep_inds]] - sampled_gts[fg_rois_per_this_image:, :] = gt_boxes[0] - bbox_label_targets = _compute_targets( - sampled_boxes, sampled_gts, sampled_labels, bbox_reg_weights - ) - bbox_targets, bbox_inside_weights = _expand_bbox_targets( - bbox_label_targets, class_nums, is_cls_agnostic - ) - bbox_outside_weights = np.array( - bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype - ) - # Scale rois - sampled_rois = sampled_boxes * im_scale - - # Faster RCNN blobs - frcn_blobs = { - 'rois': sampled_rois, - 'labels_int32': sampled_labels, - 'bbox_targets': bbox_targets, - 'bbox_inside_weights': bbox_inside_weights, - 'bbox_outside_weights': bbox_outside_weights, - 'max_overlap': sampled_max_overlap, - } - return frcn_blobs - - -def _bbox_overlaps(roi_boxes, gt_boxes): - w1 = np.maximum(roi_boxes[:, 2] - roi_boxes[:, 0] + 1, 0) - h1 = np.maximum(roi_boxes[:, 3] - roi_boxes[:, 1] + 1, 0) - w2 = np.maximum(gt_boxes[:, 2] - gt_boxes[:, 0] + 1, 0) - h2 = np.maximum(gt_boxes[:, 3] - gt_boxes[:, 1] + 1, 0) - area1 = w1 * h1 - area2 = w2 * h2 - - overlaps = np.zeros((roi_boxes.shape[0], gt_boxes.shape[0])) - for ind1 in range(roi_boxes.shape[0]): - for ind2 in range(gt_boxes.shape[0]): - inter_x1 = np.maximum(roi_boxes[ind1, 0], gt_boxes[ind2, 0]) - inter_y1 = np.maximum(roi_boxes[ind1, 1], gt_boxes[ind2, 1]) - inter_x2 = np.minimum(roi_boxes[ind1, 2], gt_boxes[ind2, 2]) - inter_y2 = np.minimum(roi_boxes[ind1, 3], gt_boxes[ind2, 3]) - inter_w = np.maximum(inter_x2 - inter_x1 + 1, 0) - inter_h = np.maximum(inter_y2 - inter_y1 + 1, 0) - inter_area = inter_w * inter_h - iou = inter_area / (area1[ind1] + area2[ind2] - inter_area) - overlaps[ind1, ind2] = iou - return overlaps - - -def _compute_targets(roi_boxes, gt_boxes, labels, bbox_reg_weights): - assert roi_boxes.shape[0] == gt_boxes.shape[0] - assert roi_boxes.shape[1] == 4 - assert gt_boxes.shape[1] == 4 - - targets = np.zeros(roi_boxes.shape) - bbox_reg_weights = np.asarray(bbox_reg_weights) - targets = _box_to_delta( - ex_boxes=roi_boxes, gt_boxes=gt_boxes, weights=bbox_reg_weights - ) - - return np.hstack([labels[:, np.newaxis], targets]).astype( - np.float32, copy=False - ) - - -def _box_to_delta(ex_boxes, gt_boxes, weights): - ex_w = ex_boxes[:, 2] - ex_boxes[:, 0] + 1 - ex_h = ex_boxes[:, 3] - ex_boxes[:, 1] + 1 - ex_ctr_x = ex_boxes[:, 0] + 0.5 * ex_w - ex_ctr_y = ex_boxes[:, 1] + 0.5 * ex_h - - gt_w = gt_boxes[:, 2] - gt_boxes[:, 0] + 1 - gt_h = gt_boxes[:, 3] - gt_boxes[:, 1] + 1 - gt_ctr_x = gt_boxes[:, 0] + 0.5 * gt_w - gt_ctr_y = gt_boxes[:, 1] + 0.5 * gt_h - - dx = (gt_ctr_x - ex_ctr_x) / ex_w / weights[0] - dy = (gt_ctr_y - ex_ctr_y) / ex_h / weights[1] - dw = (np.log(gt_w / ex_w)) / weights[2] - dh = (np.log(gt_h / ex_h)) / weights[3] - - targets = np.vstack([dx, dy, dw, dh]).transpose() - return targets - - -def _expand_bbox_targets(bbox_targets_input, class_nums, is_cls_agnostic): - class_labels = bbox_targets_input[:, 0] - fg_inds = np.where(class_labels > 0)[0] - # if is_cls_agnostic: - # class_labels = [1 if ll > 0 else 0 for ll in class_labels] - # class_labels = np.array(class_labels, dtype=np.int32) - # class_nums = 2 - bbox_targets = np.zeros( - ( - class_labels.shape[0], - 4 * class_nums if not is_cls_agnostic else 4 * 2, - ) - ) - bbox_inside_weights = np.zeros(bbox_targets.shape) - for ind in fg_inds: - class_label = int(class_labels[ind]) if not is_cls_agnostic else 1 - start_ind = class_label * 4 - end_ind = class_label * 4 + 4 - bbox_targets[ind, start_ind:end_ind] = bbox_targets_input[ind, 1:] - bbox_inside_weights[ind, start_ind:end_ind] = (1.0, 1.0, 1.0, 1.0) - return bbox_targets, bbox_inside_weights - - -class TestGenerateProposalLabelsOp(OpTest): - def set_data(self): - # self.use_random = False - self.init_use_random() - self.init_test_params() - self.init_test_input() - self.init_test_cascade() - self.init_test_output() - - self.inputs = { - 'RpnRois': (self.rpn_rois[0], self.rpn_rois_lod), - 'GtClasses': (self.gt_classes[0], self.gts_lod), - 'IsCrowd': (self.is_crowd[0], self.gts_lod), - 'GtBoxes': (self.gt_boxes[0], self.gts_lod), - 'ImInfo': self.im_info, - } - if self.max_overlaps is not None: - self.inputs['MaxOverlap'] = ( - self.max_overlaps[0], - self.rpn_rois_lod, - ) - - self.attrs = { - 'batch_size_per_im': self.batch_size_per_im, - 'fg_fraction': self.fg_fraction, - 'fg_thresh': self.fg_thresh, - 'bg_thresh_hi': self.bg_thresh_hi, - 'bg_thresh_lo': self.bg_thresh_lo, - 'bbox_reg_weights': self.bbox_reg_weights, - 'class_nums': self.class_nums, - 'use_random': self.use_random, - 'is_cls_agnostic': self.is_cls_agnostic, - 'is_cascade_rcnn': self.is_cascade_rcnn, - } - self.outputs = { - 'Rois': (self.rois, [self.lod]), - 'LabelsInt32': (self.labels_int32, [self.lod]), - 'BboxTargets': (self.bbox_targets, [self.lod]), - 'BboxInsideWeights': (self.bbox_inside_weights, [self.lod]), - 'BboxOutsideWeights': (self.bbox_outside_weights, [self.lod]), - 'MaxOverlapWithGT': (self.max_overlap_with_gt, [self.lod]), - } - - def test_check_output(self): - # NODE(yjjiang11): This op will be deprecated. - self.check_output(check_dygraph=False) - - def setUp(self): - self.op_type = 'generate_proposal_labels' - self.set_data() - - def init_test_cascade( - self, - ): - self.is_cascade_rcnn = False - self.max_overlaps = None - - def init_use_random(self): - self.use_random = False - - def init_test_params(self): - self.batch_size_per_im = 100 - self.fg_fraction = 0.25 - self.fg_thresh = 0.5 - self.bg_thresh_hi = 0.5 - self.bg_thresh_lo = 0.0 - self.bbox_reg_weights = [0.1, 0.1, 0.2, 0.2] - self.is_cls_agnostic = False - self.class_nums = 2 if self.is_cls_agnostic else 81 - - def init_test_input(self): - np.random.seed(0) - gt_nums = 6 # Keep same with batch_size_per_im for unittest - proposal_nums = 200 - images_shape = [[64, 64]] - self.im_info = np.ones((len(images_shape), 3)).astype(np.float32) - for i in range(len(images_shape)): - self.im_info[i, 0] = images_shape[i][0] - self.im_info[i, 1] = images_shape[i][1] - self.im_info[i, 2] = 0.8 # scale - - self.rpn_rois, self.rpn_rois_lod = _generate_proposals( - images_shape, proposal_nums - ) - ground_truth, self.gts_lod = _generate_groundtruth( - images_shape, self.class_nums, gt_nums - ) - - self.gt_classes = [gt['gt_classes'] for gt in ground_truth] - self.gt_boxes = [gt['boxes'] for gt in ground_truth] - self.is_crowd = [gt['is_crowd'] for gt in ground_truth] - - def init_test_output(self): - ( - self.rois, - self.labels_int32, - self.bbox_targets, - self.bbox_inside_weights, - self.bbox_outside_weights, - self.max_overlap_with_gt, - self.lod, - ) = generate_proposal_labels_in_python( - self.rpn_rois, - self.gt_classes, - self.is_crowd, - self.gt_boxes, - self.im_info, - self.batch_size_per_im, - self.fg_fraction, - self.fg_thresh, - self.bg_thresh_hi, - self.bg_thresh_lo, - self.bbox_reg_weights, - self.class_nums, - self.use_random, - self.is_cls_agnostic, - self.is_cascade_rcnn, - self.max_overlaps, - ) - self.rois = np.vstack(self.rois) - self.labels_int32 = np.hstack(self.labels_int32) - self.labels_int32 = self.labels_int32[:, np.newaxis] - self.bbox_targets = np.vstack(self.bbox_targets) - self.bbox_inside_weights = np.vstack(self.bbox_inside_weights) - self.bbox_outside_weights = np.vstack(self.bbox_outside_weights) - self.max_overlap_with_gt = np.concatenate(self.max_overlap_with_gt) - - -class TestCascade(TestGenerateProposalLabelsOp): - def init_test_cascade(self): - self.is_cascade_rcnn = True - roi_num = len(self.rpn_rois[0]) - self.max_overlaps = [] - max_overlap = np.random.rand(roi_num).astype('float32') - # Make GT samples with overlap = 1 - max_overlap[max_overlap > 0.9] = 1.0 - self.max_overlaps.append(max_overlap) - - -class TestUseRandom(TestGenerateProposalLabelsOp): - def init_use_random(self): - self.use_random = True - self.is_cascade_rcnn = False - - def test_check_output(self): - self.check_output_customized(self.verify_out) - - def verify_out(self, outs): - print("skip") - - def init_test_params(self): - self.batch_size_per_im = 512 - self.fg_fraction = 0.025 - self.fg_thresh = 0.5 - self.bg_thresh_hi = 0.5 - self.bg_thresh_lo = 0.0 - self.bbox_reg_weights = [0.1, 0.1, 0.2, 0.2] - self.is_cls_agnostic = False - self.class_nums = 2 if self.is_cls_agnostic else 81 - - -class TestClsAgnostic(TestCascade): - def init_test_params(self): - self.batch_size_per_im = 512 - self.fg_fraction = 0.25 - self.fg_thresh = 0.5 - self.bg_thresh_hi = 0.5 - self.bg_thresh_lo = 0.0 - self.bbox_reg_weights = [0.1, 0.1, 0.2, 0.2] - self.is_cls_agnostic = True - self.class_nums = 2 if self.is_cls_agnostic else 81 - - -class TestOnlyGT(TestCascade): - def init_test_input(self): - np.random.seed(0) - gt_nums = 6 # Keep same with batch_size_per_im for unittest - proposal_nums = 6 - images_shape = [[64, 64]] - self.im_info = np.ones((len(images_shape), 3)).astype(np.float32) - for i in range(len(images_shape)): - self.im_info[i, 0] = images_shape[i][0] - self.im_info[i, 1] = images_shape[i][1] - self.im_info[i, 2] = 0.8 # scale - - ground_truth, self.gts_lod = _generate_groundtruth( - images_shape, self.class_nums, gt_nums - ) - - self.gt_classes = [gt['gt_classes'] for gt in ground_truth] - self.gt_boxes = [gt['boxes'] for gt in ground_truth] - self.is_crowd = [gt['is_crowd'] for gt in ground_truth] - self.rpn_rois = self.gt_boxes - self.rpn_rois_lod = self.gts_lod - - -class TestOnlyGT2(TestCascade): - def init_test_cascade(self): - self.is_cascade_rcnn = True - roi_num = len(self.rpn_rois[0]) - self.max_overlaps = [] - max_overlap = np.ones(roi_num).astype('float32') - self.max_overlaps.append(max_overlap) - - -def _generate_proposals(images_shape, proposal_nums): - rpn_rois = [] - rpn_rois_lod = [] - num_proposals = 0 - for i, image_shape in enumerate(images_shape): - proposals = _generate_boxes(image_shape, proposal_nums) - rpn_rois.append(proposals) - num_proposals = len(proposals) - rpn_rois_lod.append(num_proposals) - return rpn_rois, [rpn_rois_lod] - - -def _generate_groundtruth(images_shape, class_nums, gt_nums): - ground_truth = [] - gts_lod = [] - num_gts = 0 - for i, image_shape in enumerate(images_shape): - # Avoid background - gt_classes = np.random.randint( - low=1, high=class_nums, size=gt_nums - ).astype(np.int32) - gt_boxes = _generate_boxes(image_shape, gt_nums) - is_crowd = np.zeros((gt_nums), dtype=np.int32) - is_crowd[0] = 1 - ground_truth.append( - {'gt_classes': gt_classes, 'boxes': gt_boxes, 'is_crowd': is_crowd} - ) - num_gts += len(gt_classes) - gts_lod.append(num_gts) - return ground_truth, [gts_lod] - - -def _generate_boxes(image_size, box_nums): - width = image_size[0] - height = image_size[1] - xywh = np.random.rand(box_nums, 4) - xy1 = xywh[:, [0, 1]] * image_size - wh = xywh[:, [2, 3]] * (image_size - xy1) - xy2 = xy1 + wh - boxes = np.hstack([xy1, xy2]) - boxes[:, [0, 2]] = np.minimum( - width - 1.0, np.maximum(0.0, boxes[:, [0, 2]]) - ) - boxes[:, [1, 3]] = np.minimum( - height - 1.0, np.maximum(0.0, boxes[:, [1, 3]]) - ) - return boxes.astype(np.float32) - - -if __name__ == '__main__': - unittest.main() diff --git a/test/legacy_test/test_layers.py b/test/legacy_test/test_layers.py index b2e3691eac705..9d60992c186d9 100644 --- a/test/legacy_test/test_layers.py +++ b/test/legacy_test/test_layers.py @@ -30,7 +30,6 @@ batch_fc, partial_concat, partial_sum, - rank_attention, shuffle_batch, ) from paddle.pir_utils import test_with_pir_api @@ -2266,27 +2265,6 @@ def test_batch_fc(self): ) return out - def test_rank_attention(self): - with self.static_graph(): - input = paddle.static.data( - name="input", shape=[None, 2], dtype="float32" - ) - rank_offset = paddle.static.data( - name="rank_offset", shape=[None, 7], dtype="int32" - ) - out = rank_attention( - input=input, - rank_offset=rank_offset, - rank_param_shape=[18, 3], - rank_param_attr=base.ParamAttr( - learning_rate=1.0, - name="ubm_rank_param.w_0", - initializer=paddle.nn.initializer.XavierNormal(), - ), - max_rank=3, - ) - return out - def test_row_conv(self): # TODO(minqiyang): dygraph do not support lod now with self.static_graph(): diff --git a/test/legacy_test/test_linear_chain_crf_op.py b/test/legacy_test/test_linear_chain_crf_op.py deleted file mode 100755 index 6899a34063378..0000000000000 --- a/test/legacy_test/test_linear_chain_crf_op.py +++ /dev/null @@ -1,266 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import random -import unittest - -import numpy as np -from op_test import OpTest - - -class LinearChainCrfForward: - def __init__( - self, - seq_start_positions, - emission_weights, - emission_row_max, - emission_exps, - transition_weights, - transition_exps, - labels, - ): - self.tag_num = emission_weights.shape[1] - self.seq_num = len(seq_start_positions) - 1 - - self.seq_start_positions = seq_start_positions - self.labels = labels - self.x = emission_weights - - self.x_row_max = emission_row_max - self.x_exps = emission_exps - - # unnormalized logits of the transition weights for the start mark. - self.a = transition_weights[0, :] - self.a_exps = transition_exps[0, :] - # unnormalized logits of the transition weights for the end mark. - self.b = transition_weights[1, :] - self.b_exps = transition_exps[1, :] - # unnormalized logits of the transition weights for all the other tags. - self.w = transition_weights[2:, :] - self.w_exps = transition_exps[2:, :] - - # The output of linear chain crf operator. - # alpha is a memo table in dynamic programming to calculate - # nomalization factor. - self.alpha = np.zeros( - (seq_start_positions[-1], self.tag_num), dtype="float64" - ) - self.log_likelihood = np.zeros((self.seq_num, 1)) - - def _l1_norm(self, x): - s = np.sum(x) - x /= s - return s - - def _forward_a_sequence(self, x, x_row_max, x_exps, label, alpha): - seq_len = x_row_max.shape[0] - log_likelihood = 0.0 - - for i in range(self.tag_num): - alpha[0, i] = self.a_exps[i] * x_exps[0, i] - log_likelihood = -x_row_max[0] - np.log(self._l1_norm(alpha[0, :])) - - # calculate the unnormalized logits of the normalization factor. - for k in range(1, seq_len): - for i in range(self.tag_num): - s = 0.0 - for j in range(self.tag_num): - s += alpha[k - 1, j] * self.w_exps[j, i] - alpha[k, i] = x_exps[k, i] * s - log_likelihood -= x_row_max[k] + np.log(self._l1_norm(alpha[k, :])) - s = 0.0 - for i in range(self.tag_num): - s += alpha[-1, i] * self.b_exps[i] - log_likelihood -= np.log(s) - - # calculate the nominator part. - log_likelihood += self.a[label[0]] + x[0, label[0]] + self.b[label[-1]] - - for k in range(1, seq_len): - log_likelihood += x[k, label[k]] + self.w[label[k - 1], label[k]] - return -log_likelihood - - def crf_forward_compute(self): - for i in range(self.seq_num): - start = self.seq_start_positions[i] - end = self.seq_start_positions[i + 1] - if start >= end: - continue - self.log_likelihood[i] = self._forward_a_sequence( - self.x[start:end, :], - self.x_row_max[start:end, :], - self.x_exps[start:end, :], - self.labels[start:end, :], - self.alpha[start:end, :], - ) - return self.alpha, self.log_likelihood - - -class TestLinearChainCrfOp(OpTest): - def set_test_data(self): - # TODO(caoying) Fix the unittest by: add the boundary cases when - # sequence lengths are 1, 2, and 3. - - SEQ_NUM = 3 - TAG_NUM = 17 - MAX_SEQ_LEN = 5 - - # the linear_chain_crf operator only supports sequence (LoD level = 1) - lod = [[]] - seq_start_pos = [0] - for i in range(SEQ_NUM): - lod[-1].append(random.randint(1, MAX_SEQ_LEN)) - seq_start_pos.append(seq_start_pos[-1] + lod[-1][-1]) - emission = np.random.uniform( - -1, 1, [seq_start_pos[-1], TAG_NUM] - ).astype("float64") - emission_row_max = np.amax(emission, axis=1, keepdims=True) - emission_exps = np.exp(emission - emission_row_max) - - transition = np.random.uniform( - -0.5, 0.5, [TAG_NUM + 2, TAG_NUM] - ).astype("float64") - transition_exps = np.exp(transition) - - labels = np.random.randint( - low=0, high=TAG_NUM, size=(seq_start_pos[-1], 1), dtype="int64" - ) - - self.inputs = { - "Emission": (emission, lod), - "Transition": transition, - "Label": (labels, lod), - } - crf = LinearChainCrfForward( - seq_start_pos, - emission, - emission_row_max, - emission_exps, - transition, - transition_exps, - labels, - ) - alpha, log_likelihood = crf.crf_forward_compute() - - self.outputs = { - "Alpha": alpha, - "EmissionExps": emission_exps, - "TransitionExps": transition_exps, - "LogLikelihood": log_likelihood, - } - - def setUp(self): - self.op_type = "linear_chain_crf" - self.set_test_data() - - def test_check_output(self): - self.check_output() - - def test_check_grad(self): - self.check_grad(["Emission", "Transition"], "LogLikelihood") - - def test_check_grad_ignore_transition(self): - self.check_grad( - ["Emission"], "LogLikelihood", no_grad_set=set("Transition") - ) - - -class TestLinearChainCrfPaddingTensor(OpTest): - def seq_pad(self, data, length): - max_len = np.max(length) - shape = [len(length), max_len] + list(data.shape[1:]) - padded = np.zeros(shape).astype(data.dtype) - offset = 0 - for i, l in enumerate(length): - padded[i, 0:l] = data[offset : offset + l] - offset += l - return padded - - def seq_pad_exps(self, data, length): - # Adding for transition_exps - max_len = np.max(length) - shape = [len(length), max_len] + list(data.shape[1:]) - padded = np.ones(shape).astype(data.dtype) - offset = 0 - for i, l in enumerate(length): - padded[i, 0:l] = data[offset : offset + l] - offset += l - return padded - - def set_test_data_1(self): - # Fix the unittest by: add padding tensor in inputs - SEQ_NUM = 3 - TAG_NUM = 17 - MAX_SEQ_LEN = 5 - - # the linear_chain_crf operator only supports sequence (LoD level = 1) - lod = [[]] - seq_start_pos = [0] - for i in range(SEQ_NUM): - lod[-1].append(random.randint(1, MAX_SEQ_LEN)) - seq_start_pos.append(seq_start_pos[-1] + lod[-1][-1]) - emission = np.random.uniform( - -1, 1, [seq_start_pos[-1], TAG_NUM] - ).astype("float64") - emission_row_max = np.amax(emission, axis=1, keepdims=True) - emission_exps = np.exp(emission - emission_row_max) - transition = np.random.uniform( - -0.5, 0.5, [TAG_NUM + 2, TAG_NUM] - ).astype("float64") - transition_exps = np.exp(transition) - - labels = np.random.randint( - low=0, high=TAG_NUM, size=(seq_start_pos[-1], 1), dtype="int64" - ) - self.inputs = { - "Emission": self.seq_pad(emission, lod[0]), - "Transition": transition, - "Label": self.seq_pad(labels, lod[0]), - "Length": np.array(lod).astype("int64"), - } - crf = LinearChainCrfForward( - seq_start_pos, - emission, - emission_row_max, - emission_exps, - transition, - transition_exps, - labels, - ) - alpha, log_likelihood = crf.crf_forward_compute() - self.outputs = { - "Alpha": self.seq_pad(alpha, lod[0]), - "EmissionExps": self.seq_pad_exps(emission_exps, lod[0]), - "TransitionExps": transition_exps, - "LogLikelihood": log_likelihood, - } - - def setUp(self): - self.op_type = "linear_chain_crf" - self.set_test_data_1() - - def test_check_output(self): - self.check_output() - - def test_check_grad(self): - self.check_grad(["Emission", "Transition"], "LogLikelihood") - - def test_check_grad_ignore_transition(self): - self.check_grad( - ["Emission"], "LogLikelihood", no_grad_set=set("Transition") - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/test/legacy_test/test_minus_op.py b/test/legacy_test/test_minus_op.py deleted file mode 100644 index 26d01a179ff46..0000000000000 --- a/test/legacy_test/test_minus_op.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from op_test import OpTest - -import paddle - - -class TestMinusOp(OpTest): - def setUp(self): - self.op_type = "minus" - self.inputs = { - 'X': np.random.random((32, 84)).astype("float32"), - 'Y': np.random.random((32, 84)).astype("float32"), - } - self.outputs = {'Out': (self.inputs['X'] - self.inputs['Y'])} - - def test_check_output(self): - # NODE(yjjiang11): This op will be deprecated. - self.check_output(check_dygraph=False) - - def test_check_grad(self): - self.check_grad(['X', 'Y'], 'Out', check_dygraph=False) - - -if __name__ == "__main__": - paddle.enable_static() - unittest.main() diff --git a/test/legacy_test/test_precision_recall_op.py b/test/legacy_test/test_precision_recall_op.py deleted file mode 100644 index 97f3d7e7724a4..0000000000000 --- a/test/legacy_test/test_precision_recall_op.py +++ /dev/null @@ -1,206 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from op_test import OpTest - - -def calc_precision(tp_count, fp_count): - if tp_count > 0.0 or fp_count > 0.0: - return tp_count / (tp_count + fp_count) - return 1.0 - - -def calc_recall(tp_count, fn_count): - if tp_count > 0.0 or fn_count > 0.0: - return tp_count / (tp_count + fn_count) - return 1.0 - - -def calc_f1_score(precision, recall): - if precision > 0.0 or recall > 0.0: - return 2 * precision * recall / (precision + recall) - return 0.0 - - -def get_states(idxs, labels, cls_num, weights=None): - ins_num = idxs.shape[0] - # TP FP TN FN - states = np.zeros((cls_num, 4)).astype('float32') - for i in range(ins_num): - w = weights[i] if weights is not None else 1.0 - idx = idxs[i][0] - label = labels[i][0] - if idx == label: - states[idx][0] += w - for j in range(cls_num): - states[j][2] += w - states[idx][2] -= w - else: - states[label][3] += w - states[idx][1] += w - for j in range(cls_num): - states[j][2] += w - states[label][2] -= w - states[idx][2] -= w - return states - - -def compute_metrics(states, cls_num): - total_tp_count = 0.0 - total_fp_count = 0.0 - total_fn_count = 0.0 - macro_avg_precision = 0.0 - macro_avg_recall = 0.0 - for i in range(cls_num): - total_tp_count += states[i][0] - total_fp_count += states[i][1] - total_fn_count += states[i][3] - macro_avg_precision += calc_precision(states[i][0], states[i][1]) - macro_avg_recall += calc_recall(states[i][0], states[i][3]) - metrics = [] - macro_avg_precision /= cls_num - macro_avg_recall /= cls_num - metrics.append(macro_avg_precision) - metrics.append(macro_avg_recall) - metrics.append(calc_f1_score(macro_avg_precision, macro_avg_recall)) - micro_avg_precision = calc_precision(total_tp_count, total_fp_count) - metrics.append(micro_avg_precision) - micro_avg_recall = calc_recall(total_tp_count, total_fn_count) - metrics.append(micro_avg_recall) - metrics.append(calc_f1_score(micro_avg_precision, micro_avg_recall)) - return np.array(metrics).astype('float32') - - -class TestPrecisionRecallOp_0(OpTest): - def setUp(self): - self.op_type = "precision_recall" - ins_num = 64 - cls_num = 10 - max_probs = np.random.uniform(0, 1.0, (ins_num, 1)).astype('float32') - idxs = ( - np.random.choice(range(cls_num), ins_num) - .reshape((ins_num, 1)) - .astype('int32') - ) - labels = ( - np.random.choice(range(cls_num), ins_num) - .reshape((ins_num, 1)) - .astype('int32') - ) - states = get_states(idxs, labels, cls_num) - metrics = compute_metrics(states, cls_num) - - self.attrs = {'class_number': cls_num} - - self.inputs = {'MaxProbs': max_probs, 'Indices': idxs, 'Labels': labels} - - self.outputs = { - 'BatchMetrics': metrics, - 'AccumMetrics': metrics, - 'AccumStatesInfo': states, - } - - def test_check_output(self): - self.check_output() - - -class TestPrecisionRecallOp_1(OpTest): - def setUp(self): - self.op_type = "precision_recall" - ins_num = 64 - cls_num = 10 - max_probs = np.random.uniform(0, 1.0, (ins_num, 1)).astype('float32') - idxs = ( - np.random.choice(range(cls_num), ins_num) - .reshape((ins_num, 1)) - .astype('int32') - ) - weights = np.random.uniform(0, 1.0, (ins_num, 1)).astype('float32') - labels = ( - np.random.choice(range(cls_num), ins_num) - .reshape((ins_num, 1)) - .astype('int32') - ) - - states = get_states(idxs, labels, cls_num, weights) - metrics = compute_metrics(states, cls_num) - - self.attrs = {'class_number': cls_num} - - self.inputs = { - 'MaxProbs': max_probs, - 'Indices': idxs, - 'Labels': labels, - 'Weights': weights, - } - - self.outputs = { - 'BatchMetrics': metrics, - 'AccumMetrics': metrics, - 'AccumStatesInfo': states, - } - - def test_check_output(self): - self.check_output() - - -class TestPrecisionRecallOp_2(OpTest): - def setUp(self): - self.op_type = "precision_recall" - ins_num = 64 - cls_num = 10 - max_probs = np.random.uniform(0, 1.0, (ins_num, 1)).astype('float32') - idxs = ( - np.random.choice(range(cls_num), ins_num) - .reshape((ins_num, 1)) - .astype('int32') - ) - weights = np.random.uniform(0, 1.0, (ins_num, 1)).astype('float32') - labels = ( - np.random.choice(range(cls_num), ins_num) - .reshape((ins_num, 1)) - .astype('int32') - ) - states = np.random.randint(0, 30, (cls_num, 4)).astype('float32') - - accum_states = get_states(idxs, labels, cls_num, weights) - batch_metrics = compute_metrics(accum_states, cls_num) - accum_states += states - accum_metrics = compute_metrics(accum_states, cls_num) - - self.attrs = {'class_number': cls_num} - - self.inputs = { - 'MaxProbs': max_probs, - 'Indices': idxs, - 'Labels': labels, - 'Weights': weights, - 'StatesInfo': states, - } - - self.outputs = { - 'BatchMetrics': batch_metrics, - 'AccumMetrics': accum_metrics, - 'AccumStatesInfo': accum_states, - } - - def test_check_output(self): - self.check_output() - - -if __name__ == '__main__': - unittest.main() diff --git a/test/legacy_test/test_queue.py b/test/legacy_test/test_queue.py deleted file mode 100644 index 5a1cbd53d43aa..0000000000000 --- a/test/legacy_test/test_queue.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np - -import paddle -from paddle import base -from paddle.base import core - - -class TestQueue(unittest.TestCase): - def test_eq(self): - """ - test queue_generator op, enqueue op and dequeue op. - """ - - main_program = base.Program() - startup_program = base.Program() - value = np.random.rand(1) - with base.program_guard(main_program, startup_program): - data_in = paddle.static.create_global_var( - shape=[2, 3], - value=value, - dtype="float32", - persistable=True, - name='var_in', - ) - data_out = paddle.static.create_global_var( - shape=[2, 3], - value=value - 1.0, - dtype="float32", - persistable=True, - name='var_out', - ) - startup_block = startup_program.block(0) - queue_name = 'blocking_queue' - startup_block.create_var( - name=queue_name, persistable=True, type=core.VarDesc.VarType.RAW - ) - startup_block.append_op( - type="queue_generator", attrs={'names': [queue_name]} - ) - block = main_program.block(0) - block.append_op( - type='enqueue', - inputs={'X': data_in}, - attrs={'queue_name': queue_name}, - ) - block.append_op( - type='dequeue', - outputs={'Out': [data_out]}, - attrs={'queue_name': queue_name}, - ) - - place = ( - base.CUDAPlace(0) - if core.is_compiled_with_cuda() - else base.CPUPlace() - ) - exe = base.Executor(place) - exe.run(startup_program) - (ret,) = exe.run(main_program, fetch_list=[data_out.name]) - np.testing.assert_allclose( - np.asarray(ret), np.full((2, 3), value, np.float32), rtol=1e-05 - ) - - -if __name__ == '__main__': - unittest.main() diff --git a/test/legacy_test/test_rank_attention_op.py b/test/legacy_test/test_rank_attention_op.py deleted file mode 100644 index 514463b0cbae4..0000000000000 --- a/test/legacy_test/test_rank_attention_op.py +++ /dev/null @@ -1,251 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import random -import unittest - -import numpy as np -from op_test import OpTest - -from paddle.base import core - - -def gen_input_help(input, rank_offset, max_rank, max_size): - input_row, input_col = input.shape - max_ins = np.max((max_size, input_row)) - input_help = np.zeros(max_ins * max_rank * input_col) - ins_rank = np.zeros((max_ins, 1)) - ins_rank.fill(-1) - - output_col = max_rank * input_col - output_row = input_row - - for idx in range(output_col * output_row): - output_col_idx = idx % output_col - output_row_idx = int(idx / output_col) - k = int(output_col_idx / input_col) - faster = rank_offset[output_row_idx, 2 * k + 1] - 1 - - if output_col_idx == 0: - ins_rank[output_row_idx] = rank_offset[output_row_idx, 0] - - if rank_offset[output_row_idx, 0] - 1 < 0 or faster < 0: - continue - - rank_input_col_idx = output_col_idx % input_col - index = rank_offset[output_row_idx, 2 * k + 2] - input_help[idx] = input[index, rank_input_col_idx] - input_help = input_help.reshape([max_ins, max_rank * input_col]) - - return input_help, ins_rank - - -def gen_param_help(input, rank_offset, param, max_rank): - input_row, input_col = input.shape - rank_offset_row, rank_offset_col = rank_offset.shape - param_row, param_col = param.shape - - block_matrix_row = input_col * max_rank - - output_param_row = block_matrix_row * input_row - output_param_col = param_col - - output_param = np.zeros((output_param_row * output_param_col,)) - - for idx in range(output_param_row * output_param_col): - output_col_idx = idx % output_param_col - output_row_idx = int(idx / output_param_col) - ins_idx = int(output_row_idx / block_matrix_row) - start_offset = output_row_idx % block_matrix_row - k = int(start_offset / input_col) - k_offset = start_offset % input_col - - lower = rank_offset[ins_idx, 0] - 1 - faster = rank_offset[ins_idx, 2 * k + 1] - 1 - if lower < 0 or faster < 0: - continue - start = lower * max_rank + faster - ori_idx = ( - start * param_col * input_col - + k_offset * param_col - + output_col_idx - ) - output_param[idx] = param[int(ori_idx / param_col), ori_idx % param_col] - - output_param = output_param.reshape([output_param_row, output_param_col]) - return output_param - - -def np_rank_attention(input, rank_offset, rank_para, max_rank, max_size): - input_row, input_col = input.shape - rank_offset_row, rank_offset_col = rank_offset.shape - rank_para_row, rank_para_col = rank_para.shape - - assert input_row == rank_offset_row - assert max_rank == ((rank_offset_col - 1) / 2) - assert rank_para_row == max_rank * max_rank * input_col - - input_help, ins_rank = gen_input_help( - input, rank_offset, max_rank, max_size - ) - param_help = gen_param_help(input, rank_offset, rank_para, max_rank) - block_matrix_row = input_col * max_rank - - res = np.zeros((input_row, rank_para_col)) - for ins in range(input_row): - res[ins, :] = np.dot( - input_help[ins, :], - param_help[ - int(block_matrix_row * ins) : int(block_matrix_row * (ins + 1)), - :, - ], - ) - return res, input_help, param_help, ins_rank - - -def gen_rank_offset(pv_nums, max_rank): - all_ins_num = 0 - pv_rank_msg = [] - for _ in range(pv_nums): - ins_pv = np.random.randint(1, max_rank + 2) # 1~4 - rank_list = list(range(1, ins_pv + 1)) - random.shuffle(rank_list) - all_ins_num = all_ins_num + ins_pv - pv_rank_msg.append(rank_list) - - rank_offset = np.zeros((all_ins_num, max_rank * 2 + 1)).astype("int32") - rank_offset.fill(-1) - index = 0 - for pv_number in range(len(pv_rank_msg)): - pv_ins = pv_rank_msg[pv_number] - ad_num = len(pv_ins) - index_start = index - - for j in range(ad_num): - rank = -1 - if pv_ins[j] <= max_rank: - rank = pv_ins[j] - rank_offset[index, 0] = rank - - if rank > 0: - for k in range(ad_num): - fast_rank = -1 - if pv_ins[k] <= max_rank: - fast_rank = pv_ins[k] - if fast_rank > 0: - m = fast_rank - 1 - rank_offset[index, 2 * m + 1] = pv_ins[k] - rank_offset[index, 2 * m + 2] = index_start + k - index = index + 1 - return all_ins_num, rank_offset - - -class TestRankAttentionOpComplex(OpTest): - def config(self): - self.pv_num = 100 - self.x_feat = 10 - self.y_feat = 15 - self.max_rank = 3 - self.dtype = "float64" - - def setUp(self): - self.op_type = "rank_attention" - self.config() - ins_num, rank_offset = gen_rank_offset(self.pv_num, self.max_rank) - input = np.random.random((ins_num, self.x_feat)).astype(self.dtype) - rank_para_shape = [ - self.max_rank * self.max_rank * self.x_feat, - self.y_feat, - ] - rank_para = np.random.random(rank_para_shape).astype(self.dtype) - np_out, np_input_help, np_param_help, np_ins_rank = np_rank_attention( - input, - np.array(rank_offset), - rank_para, - self.max_rank, - self.pv_num * 7, - ) - self.inputs = { - "X": input, - "RankOffset": np.array(rank_offset).astype("int32"), - "RankParam": rank_para, - } - self.attrs = {'MaxRank': self.max_rank, 'MaxSize': self.pv_num * 7} - self.outputs = { - "Out": np_out, - "InputHelp": np_input_help, - "InsRank": np_ins_rank, - } - - def test_check_output_gpu(self): - if core.is_compiled_with_cuda(): - self.check_output_with_place(core.CUDAPlace(0)) - - def test_check_grad_gpu(self): - if core.is_compiled_with_cuda(): - self.check_grad_with_place(core.CUDAPlace(0), ["RankParam"], "Out") - - -class TestRankAttentionOpCpu(OpTest): - def config(self): - self.pv_num = 100 - self.x_feat = 10 - self.y_feat = 15 - self.max_rank = 3 - self.dtype = "float64" - - def setUp(self): - self.op_type = "rank_attention" - self.config() - ins_num, rank_offset = gen_rank_offset(self.pv_num, self.max_rank) - input = np.random.random((ins_num, self.x_feat)).astype(self.dtype) - rank_para_shape = [ - self.max_rank * self.max_rank * self.x_feat, - self.y_feat, - ] - rank_para = np.random.random(rank_para_shape).astype(self.dtype) - np_out, np_input_help, np_param_help, np_ins_rank = np_rank_attention( - input, - np.array(rank_offset), - rank_para, - self.max_rank, - self.pv_num * 7, - ) - self.inputs = { - "X": input, - "RankOffset": np.array(rank_offset).astype("int32"), - "RankParam": rank_para, - } - self.attrs = {'MaxRank': self.max_rank, 'MaxSize': self.pv_num * 7} - self.outputs = { - "Out": np_out, - "InputHelp": np_input_help, - "InsRank": np_ins_rank, - } - - def test_check_output_cpu(self): - try: - self.check_output_with_place(place=core.CPUPlace()) - except: - print("do not support cpu test, skip") - - def test_check_grad_cpu(self): - try: - self.check_grad_with_place(core.CPUPlace(), ["RankParam"], "Out") - except: - print("do not support cpu test, skip") - - -if __name__ == "__main__": - unittest.main() diff --git a/test/legacy_test/test_retinanet_detection_output.py b/test/legacy_test/test_retinanet_detection_output.py deleted file mode 100644 index a120dfd50eefc..0000000000000 --- a/test/legacy_test/test_retinanet_detection_output.py +++ /dev/null @@ -1,511 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License") -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -import unittest - -import numpy as np -from op_test import OpTest -from test_anchor_generator_op import anchor_generator_in_python -from test_multiclass_nms_op import nms - -import paddle - - -def multiclass_nms(prediction, class_num, keep_top_k, nms_threshold): - selected_indices = {} - num_det = 0 - for c in range(class_num): - if c not in prediction.keys(): - continue - cls_dets = prediction[c] - all_scores = np.zeros(len(cls_dets)) - for i in range(all_scores.shape[0]): - all_scores[i] = cls_dets[i][4] - indices = nms(cls_dets, all_scores, 0.0, nms_threshold, -1, False, 1.0) - selected_indices[c] = indices - num_det += len(indices) - - score_index = [] - for c, indices in selected_indices.items(): - for idx in indices: - score_index.append((prediction[c][idx][4], c, idx)) - - sorted_score_index = sorted( - score_index, key=lambda tup: tup[0], reverse=True - ) - if keep_top_k > -1 and num_det > keep_top_k: - sorted_score_index = sorted_score_index[:keep_top_k] - num_det = keep_top_k - nmsed_outs = [] - for s, c, idx in sorted_score_index: - xmin = prediction[c][idx][0] - ymin = prediction[c][idx][1] - xmax = prediction[c][idx][2] - ymax = prediction[c][idx][3] - nmsed_outs.append([c + 1, s, xmin, ymin, xmax, ymax]) - - return nmsed_outs, num_det - - -def retinanet_detection_out( - boxes_list, - scores_list, - anchors_list, - im_info, - score_threshold, - nms_threshold, - nms_top_k, - keep_top_k, -): - class_num = scores_list[0].shape[-1] - im_height, im_width, im_scale = im_info - - num_level = len(scores_list) - prediction = {} - for lvl in range(num_level): - scores_per_level = scores_list[lvl] - scores_per_level = scores_per_level.flatten() - bboxes_per_level = boxes_list[lvl] - bboxes_per_level = bboxes_per_level.flatten() - anchors_per_level = anchors_list[lvl] - anchors_per_level = anchors_per_level.flatten() - - thresh = score_threshold if lvl < (num_level - 1) else 0.0 - selected_indices = np.argwhere(scores_per_level > thresh) - scores = scores_per_level[selected_indices] - sorted_indices = np.argsort(-scores, axis=0, kind='mergesort') - if nms_top_k > -1 and nms_top_k < sorted_indices.shape[0]: - sorted_indices = sorted_indices[:nms_top_k] - - for i in range(sorted_indices.shape[0]): - idx = selected_indices[sorted_indices[i]] - idx = idx[0][0] - a = int(idx / class_num) - c = int(idx % class_num) - box_offset = a * 4 - anchor_box_width = ( - anchors_per_level[box_offset + 2] - - anchors_per_level[box_offset] - + 1 - ) - anchor_box_height = ( - anchors_per_level[box_offset + 3] - - anchors_per_level[box_offset + 1] - + 1 - ) - anchor_box_center_x = ( - anchors_per_level[box_offset] + anchor_box_width / 2 - ) - anchor_box_center_y = ( - anchors_per_level[box_offset + 1] + anchor_box_height / 2 - ) - - target_box_center_x = ( - bboxes_per_level[box_offset] * anchor_box_width - + anchor_box_center_x - ) - target_box_center_y = ( - bboxes_per_level[box_offset + 1] * anchor_box_height - + anchor_box_center_y - ) - target_box_width = ( - math.exp(bboxes_per_level[box_offset + 2]) * anchor_box_width - ) - target_box_height = ( - math.exp(bboxes_per_level[box_offset + 3]) * anchor_box_height - ) - - pred_box_xmin = target_box_center_x - target_box_width / 2 - pred_box_ymin = target_box_center_y - target_box_height / 2 - pred_box_xmax = target_box_center_x + target_box_width / 2 - 1 - pred_box_ymax = target_box_center_y + target_box_height / 2 - 1 - - pred_box_xmin = pred_box_xmin / im_scale - pred_box_ymin = pred_box_ymin / im_scale - pred_box_xmax = pred_box_xmax / im_scale - pred_box_ymax = pred_box_ymax / im_scale - - pred_box_xmin = max( - min(pred_box_xmin, np.round(im_width / im_scale) - 1), 0.0 - ) - pred_box_ymin = max( - min(pred_box_ymin, np.round(im_height / im_scale) - 1), 0.0 - ) - pred_box_xmax = max( - min(pred_box_xmax, np.round(im_width / im_scale) - 1), 0.0 - ) - pred_box_ymax = max( - min(pred_box_ymax, np.round(im_height / im_scale) - 1), 0.0 - ) - - if c not in prediction.keys(): - prediction[c] = [] - prediction[c].append( - [ - pred_box_xmin, - pred_box_ymin, - pred_box_xmax, - pred_box_ymax, - scores_per_level[idx], - ] - ) - - nmsed_outs, nmsed_num = multiclass_nms( - prediction, class_num, keep_top_k, nms_threshold - ) - return nmsed_outs, nmsed_num - - -def batched_retinanet_detection_out( - boxes, - scores, - anchors, - im_info, - score_threshold, - nms_threshold, - nms_top_k, - keep_top_k, -): - batch_size = scores[0].shape[0] - det_outs = [] - lod = [] - - for n in range(batch_size): - boxes_per_batch = [] - scores_per_batch = [] - - num_level = len(scores) - for lvl in range(num_level): - boxes_per_batch.append(boxes[lvl][n]) - scores_per_batch.append(scores[lvl][n]) - - nmsed_outs, nmsed_num = retinanet_detection_out( - boxes_per_batch, - scores_per_batch, - anchors, - im_info[n], - score_threshold, - nms_threshold, - nms_top_k, - keep_top_k, - ) - lod.append(nmsed_num) - if nmsed_num == 0: - continue - - det_outs.extend(nmsed_outs) - return det_outs, lod - - -class TestRetinanetDetectionOutOp1(OpTest): - def set_argument(self): - self.score_threshold = 0.05 - self.min_level = 3 - self.max_level = 7 - self.nms_threshold = 0.3 - self.nms_top_k = 1000 - self.keep_top_k = 200 - - self.scales_per_octave = 3 - self.aspect_ratios = [1.0, 2.0, 0.5] - self.anchor_scale = 4 - self.anchor_strides = [8, 16, 32, 64, 128] - - self.box_size = 4 - self.class_num = 80 - self.batch_size = 1 - self.input_channels = 20 - - self.layer_h = [] - self.layer_w = [] - num_levels = self.max_level - self.min_level + 1 - for i in range(num_levels): - self.layer_h.append(2 ** (num_levels - i)) - self.layer_w.append(2 ** (num_levels - i)) - - def init_test_input(self): - anchor_num = len(self.aspect_ratios) * self.scales_per_octave - num_levels = self.max_level - self.min_level + 1 - self.scores_list = [] - self.bboxes_list = [] - self.anchors_list = [] - - for i in range(num_levels): - layer_h = self.layer_h[i] - layer_w = self.layer_w[i] - - input_feat = np.random.random( - (self.batch_size, self.input_channels, layer_h, layer_w) - ).astype('float32') - score = np.random.random( - (self.batch_size, self.class_num * anchor_num, layer_h, layer_w) - ).astype('float32') - score = np.transpose(score, [0, 2, 3, 1]) - score = score.reshape((self.batch_size, -1, self.class_num)) - box = np.random.random( - (self.batch_size, self.box_size * anchor_num, layer_h, layer_w) - ).astype('float32') - box = np.transpose(box, [0, 2, 3, 1]) - box = box.reshape((self.batch_size, -1, self.box_size)) - anchor_sizes = [] - for octave in range(self.scales_per_octave): - anchor_sizes.append( - float(self.anchor_strides[i] * (2**octave)) - / float(self.scales_per_octave) - * self.anchor_scale - ) - anchor, var = anchor_generator_in_python( - input_feat=input_feat, - anchor_sizes=anchor_sizes, - aspect_ratios=self.aspect_ratios, - variances=[1.0, 1.0, 1.0, 1.0], - stride=[self.anchor_strides[i], self.anchor_strides[i]], - offset=0.5, - ) - anchor = np.reshape(anchor, [-1, 4]) - self.scores_list.append(score.astype('float32')) - self.bboxes_list.append(box.astype('float32')) - self.anchors_list.append(anchor.astype('float32')) - - self.im_info = np.array([[256.0, 256.0, 1.5]]).astype( - 'float32' - ) # im_height, im_width, scale - - def setUp(self): - self.set_argument() - self.init_test_input() - - nmsed_outs, lod = batched_retinanet_detection_out( - self.bboxes_list, - self.scores_list, - self.anchors_list, - self.im_info, - self.score_threshold, - self.nms_threshold, - self.nms_top_k, - self.keep_top_k, - ) - nmsed_outs = np.array(nmsed_outs).astype('float32') - self.op_type = 'retinanet_detection_output' - self.inputs = { - 'BBoxes': [ - ('b0', self.bboxes_list[0]), - ('b1', self.bboxes_list[1]), - ('b2', self.bboxes_list[2]), - ('b3', self.bboxes_list[3]), - ('b4', self.bboxes_list[4]), - ], - 'Scores': [ - ('s0', self.scores_list[0]), - ('s1', self.scores_list[1]), - ('s2', self.scores_list[2]), - ('s3', self.scores_list[3]), - ('s4', self.scores_list[4]), - ], - 'Anchors': [ - ('a0', self.anchors_list[0]), - ('a1', self.anchors_list[1]), - ('a2', self.anchors_list[2]), - ('a3', self.anchors_list[3]), - ('a4', self.anchors_list[4]), - ], - 'ImInfo': ( - self.im_info, - [ - [ - 1, - ] - ], - ), - } - self.outputs = {'Out': (nmsed_outs, [lod])} - self.attrs = { - 'score_threshold': self.score_threshold, - 'nms_top_k': self.nms_top_k, - 'nms_threshold': self.nms_threshold, - 'keep_top_k': self.keep_top_k, - 'nms_eta': 1.0, - } - - def test_check_output(self): - self.check_output() - - -class TestRetinanetDetectionOutOp2(OpTest): - def set_argument(self): - self.score_threshold = 0.05 - self.min_level = 3 - self.max_level = 7 - self.nms_threshold = 0.3 - self.nms_top_k = 1000 - self.keep_top_k = 200 - - self.scales_per_octave = 3 - self.aspect_ratios = [1.0, 2.0, 0.5] - self.anchor_scale = 4 - self.anchor_strides = [8, 16, 32, 64, 128] - - self.box_size = 4 - self.class_num = 80 - self.batch_size = 1 - self.input_channels = 20 - # Here test the case there the shape of each FPN level - # is irrelevant. - self.layer_h = [1, 4, 8, 8, 16] - self.layer_w = [1, 4, 8, 8, 16] - - -class TestRetinanetDetectionOutOpNo3(TestRetinanetDetectionOutOp1): - def set_argument(self): - # Here set 2.0 to test the case there is no outputs. - # In practical use, 0.0 < score_threshold < 1.0 - self.score_threshold = 2.0 - self.min_level = 3 - self.max_level = 7 - self.nms_threshold = 0.3 - self.nms_top_k = 1000 - self.keep_top_k = 200 - - self.scales_per_octave = 3 - self.aspect_ratios = [1.0, 2.0, 0.5] - self.anchor_scale = 4 - self.anchor_strides = [8, 16, 32, 64, 128] - - self.box_size = 4 - self.class_num = 80 - self.batch_size = 1 - self.input_channels = 20 - - self.layer_h = [] - self.layer_w = [] - num_levels = self.max_level - self.min_level + 1 - for i in range(num_levels): - self.layer_h.append(2 ** (num_levels - i)) - self.layer_w.append(2 ** (num_levels - i)) - - -class TestRetinanetDetectionOutOpNo4(TestRetinanetDetectionOutOp1): - def set_argument(self): - self.score_threshold = 0.05 - self.min_level = 2 - self.max_level = 5 - self.nms_threshold = 0.3 - self.nms_top_k = 1000 - self.keep_top_k = 200 - - self.scales_per_octave = 3 - self.aspect_ratios = [1.0, 2.0, 0.5] - self.anchor_scale = 4 - self.anchor_strides = [8, 16, 32, 64, 128] - - self.box_size = 4 - self.class_num = 80 - self.batch_size = 1 - self.input_channels = 20 - - self.layer_h = [] - self.layer_w = [] - num_levels = self.max_level - self.min_level + 1 - for i in range(num_levels): - self.layer_h.append(2 ** (num_levels - i)) - self.layer_w.append(2 ** (num_levels - i)) - - def setUp(self): - self.set_argument() - self.init_test_input() - - nmsed_outs, lod = batched_retinanet_detection_out( - self.bboxes_list, - self.scores_list, - self.anchors_list, - self.im_info, - self.score_threshold, - self.nms_threshold, - self.nms_top_k, - self.keep_top_k, - ) - nmsed_outs = np.array(nmsed_outs).astype('float32') - self.op_type = 'retinanet_detection_output' - self.inputs = { - 'BBoxes': [ - ('b0', self.bboxes_list[0]), - ('b1', self.bboxes_list[1]), - ('b2', self.bboxes_list[2]), - ('b3', self.bboxes_list[3]), - ], - 'Scores': [ - ('s0', self.scores_list[0]), - ('s1', self.scores_list[1]), - ('s2', self.scores_list[2]), - ('s3', self.scores_list[3]), - ], - 'Anchors': [ - ('a0', self.anchors_list[0]), - ('a1', self.anchors_list[1]), - ('a2', self.anchors_list[2]), - ('a3', self.anchors_list[3]), - ], - 'ImInfo': ( - self.im_info, - [ - [ - 1, - ] - ], - ), - } - self.outputs = {'Out': (nmsed_outs, [lod])} - self.attrs = { - 'score_threshold': self.score_threshold, - 'nms_top_k': self.nms_top_k, - 'nms_threshold': self.nms_threshold, - 'keep_top_k': self.keep_top_k, - 'nms_eta': 1.0, - } - - def test_check_output(self): - self.check_output() - - -class TestRetinanetDetectionOutOpNo5(TestRetinanetDetectionOutOp1): - def set_argument(self): - self.score_threshold = 0.05 - self.min_level = 3 - self.max_level = 7 - self.nms_threshold = 0.3 - self.nms_top_k = 100 - self.keep_top_k = 10 - - self.scales_per_octave = 3 - self.aspect_ratios = [1.0, 2.0, 0.5] - self.anchor_scale = 4 - self.anchor_strides = [8, 16, 32, 64, 128] - - self.box_size = 4 - self.class_num = 80 - self.batch_size = 1 - self.input_channels = 20 - - self.layer_h = [] - self.layer_w = [] - num_levels = self.max_level - self.min_level + 1 - for i in range(num_levels): - self.layer_h.append(2 ** (num_levels - i)) - self.layer_w.append(2 ** (num_levels - i)) - - -if __name__ == '__main__': - paddle.enable_static() - unittest.main() diff --git a/test/legacy_test/test_rpn_target_assign_op.py b/test/legacy_test/test_rpn_target_assign_op.py deleted file mode 100644 index d0147d8b700f1..0000000000000 --- a/test/legacy_test/test_rpn_target_assign_op.py +++ /dev/null @@ -1,486 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from op_test import OpTest -from test_anchor_generator_op import anchor_generator_in_python -from test_generate_proposal_labels_op import ( - _bbox_overlaps, - _box_to_delta, - _generate_groundtruth, -) - - -def rpn_target_assign( - anchor_by_gt_overlap, - rpn_batch_size_per_im, - rpn_positive_overlap, - rpn_negative_overlap, - rpn_fg_fraction, - use_random=True, -): - anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1) - anchor_to_gt_max = anchor_by_gt_overlap[ - np.arange(anchor_by_gt_overlap.shape[0]), anchor_to_gt_argmax - ] - - gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0) - gt_to_anchor_max = anchor_by_gt_overlap[ - gt_to_anchor_argmax, np.arange(anchor_by_gt_overlap.shape[1]) - ] - anchors_with_max_overlap = np.where( - anchor_by_gt_overlap == gt_to_anchor_max - )[0] - - labels = np.ones((anchor_by_gt_overlap.shape[0],), dtype=np.int32) * -1 - labels[anchors_with_max_overlap] = 1 - labels[anchor_to_gt_max >= rpn_positive_overlap] = 1 - - num_fg = int(rpn_fg_fraction * rpn_batch_size_per_im) - fg_inds = np.where(labels == 1)[0] - if len(fg_inds) > num_fg and use_random: - disable_inds = np.random.choice( - fg_inds, size=(len(fg_inds) - num_fg), replace=False - ) - else: - disable_inds = fg_inds[num_fg:] - - labels[disable_inds] = -1 - fg_inds = np.where(labels == 1)[0] - bbox_inside_weight = np.zeros((len(fg_inds), 4), dtype=np.float32) - - num_bg = rpn_batch_size_per_im - np.sum(labels == 1) - bg_inds = np.where(anchor_to_gt_max < rpn_negative_overlap)[0] - if len(bg_inds) > num_bg and use_random: - enable_inds = bg_inds[np.random.randint(len(bg_inds), size=num_bg)] - else: - enable_inds = bg_inds[:num_bg] - - fg_fake_inds = np.array([], np.int32) - fg_value = np.array([fg_inds[0]], np.int32) - fake_num = 0 - for bg_id in enable_inds: - if bg_id in fg_inds: - fake_num += 1 - fg_fake_inds = np.hstack([fg_fake_inds, fg_value]) - labels[enable_inds] = 0 - - bbox_inside_weight[fake_num:, :] = 1 - fg_inds = np.where(labels == 1)[0] - bg_inds = np.where(labels == 0)[0] - loc_index = np.hstack([fg_fake_inds, fg_inds]) - score_index = np.hstack([fg_inds, bg_inds]) - labels = labels[score_index] - assert not np.any(labels == -1), "Wrong labels with -1" - - gt_inds = anchor_to_gt_argmax[loc_index] - - return loc_index, score_index, labels, gt_inds, bbox_inside_weight - - -def get_anchor(n, c, h, w): - input_feat = np.random.random((n, c, h, w)).astype('float32') - anchors, _ = anchor_generator_in_python( - input_feat=input_feat, - anchor_sizes=[32.0, 64.0], - aspect_ratios=[0.5, 1.0], - variances=[1.0, 1.0, 1.0, 1.0], - stride=[16.0, 16.0], - offset=0.5, - ) - return anchors - - -def rpn_target_assign_in_python( - all_anchors, - gt_boxes, - is_crowd, - im_info, - lod, - rpn_straddle_thresh, - rpn_batch_size_per_im, - rpn_positive_overlap, - rpn_negative_overlap, - rpn_fg_fraction, - use_random=True, -): - anchor_num = all_anchors.shape[0] - batch_size = len(lod) - 1 - for i in range(batch_size): - im_height = im_info[i][0] - im_width = im_info[i][1] - im_scale = im_info[i][2] - if rpn_straddle_thresh >= 0: - # Only keep anchors inside the image by a margin of straddle_thresh - inds_inside = np.where( - (all_anchors[:, 0] >= -rpn_straddle_thresh) - & (all_anchors[:, 1] >= -rpn_straddle_thresh) - & (all_anchors[:, 2] < im_width + rpn_straddle_thresh) - & (all_anchors[:, 3] < im_height + rpn_straddle_thresh) - )[0] - # keep only inside anchors - inside_anchors = all_anchors[inds_inside, :] - else: - inds_inside = np.arange(all_anchors.shape[0]) - inside_anchors = all_anchors - - b, e = lod[i], lod[i + 1] - gt_boxes_slice = gt_boxes[b:e, :] * im_scale - is_crowd_slice = is_crowd[b:e] - - not_crowd_inds = np.where(is_crowd_slice == 0)[0] - gt_boxes_slice = gt_boxes_slice[not_crowd_inds] - iou = _bbox_overlaps(inside_anchors, gt_boxes_slice) - - ( - loc_inds, - score_inds, - labels, - gt_inds, - bbox_inside_weight, - ) = rpn_target_assign( - iou, - rpn_batch_size_per_im, - rpn_positive_overlap, - rpn_negative_overlap, - rpn_fg_fraction, - use_random, - ) - # unmap to all anchor - loc_inds = inds_inside[loc_inds] - score_inds = inds_inside[score_inds] - - sampled_gt = gt_boxes_slice[gt_inds] - sampled_anchor = all_anchors[loc_inds] - box_deltas = _box_to_delta( - sampled_anchor, sampled_gt, [1.0, 1.0, 1.0, 1.0] - ) - - if i == 0: - loc_indexes = loc_inds - score_indexes = score_inds - tgt_labels = labels - tgt_bboxes = box_deltas - bbox_inside_weights = bbox_inside_weight - else: - loc_indexes = np.concatenate( - [loc_indexes, loc_inds + i * anchor_num] - ) - score_indexes = np.concatenate( - [score_indexes, score_inds + i * anchor_num] - ) - tgt_labels = np.concatenate([tgt_labels, labels]) - tgt_bboxes = np.vstack([tgt_bboxes, box_deltas]) - bbox_inside_weights = np.vstack( - [bbox_inside_weights, bbox_inside_weight] - ) - - return ( - loc_indexes, - score_indexes, - tgt_bboxes, - tgt_labels, - bbox_inside_weights, - ) - - -def retinanet_target_assign( - anchor_by_gt_overlap, gt_labels, positive_overlap, negative_overlap -): - anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1) - anchor_to_gt_max = anchor_by_gt_overlap[ - np.arange(anchor_by_gt_overlap.shape[0]), anchor_to_gt_argmax - ] - - gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0) - gt_to_anchor_max = anchor_by_gt_overlap[ - gt_to_anchor_argmax, np.arange(anchor_by_gt_overlap.shape[1]) - ] - anchors_with_max_overlap = np.where( - anchor_by_gt_overlap == gt_to_anchor_max - )[0] - - labels = np.ones((anchor_by_gt_overlap.shape[0],), dtype=np.int32) * -1 - labels[anchors_with_max_overlap] = 1 - labels[anchor_to_gt_max >= positive_overlap] = 1 - - fg_inds = np.where(labels == 1)[0] - bbox_inside_weight = np.zeros((len(fg_inds), 4), dtype=np.float32) - - bg_inds = np.where(anchor_to_gt_max < negative_overlap)[0] - enable_inds = bg_inds - - fg_fake_inds = np.array([], np.int32) - fg_value = np.array([fg_inds[0]], np.int32) - fake_num = 0 - for bg_id in enable_inds: - if bg_id in fg_inds: - fake_num += 1 - fg_fake_inds = np.hstack([fg_fake_inds, fg_value]) - labels[enable_inds] = 0 - - bbox_inside_weight[fake_num:, :] = 1 - fg_inds = np.where(labels == 1)[0] - bg_inds = np.where(labels == 0)[0] - loc_index = np.hstack([fg_fake_inds, fg_inds]) - score_index = np.hstack([fg_inds, bg_inds]) - score_index_tmp = np.hstack([fg_inds]) - labels = labels[score_index] - - gt_inds = anchor_to_gt_argmax[loc_index] - label_inds = anchor_to_gt_argmax[score_index_tmp] - labels[0 : len(fg_inds)] = np.squeeze(gt_labels[label_inds]) - fg_num = len(fg_fake_inds) + len(fg_inds) + 1 - assert not np.any(labels == -1), "Wrong labels with -1" - return loc_index, score_index, labels, gt_inds, bbox_inside_weight, fg_num - - -def retinanet_target_assign_in_python( - all_anchors, - gt_boxes, - gt_labels, - is_crowd, - im_info, - lod, - positive_overlap, - negative_overlap, -): - anchor_num = all_anchors.shape[0] - batch_size = len(lod) - 1 - for i in range(batch_size): - im_scale = im_info[i][2] - - inds_inside = np.arange(all_anchors.shape[0]) - inside_anchors = all_anchors - b, e = lod[i], lod[i + 1] - gt_boxes_slice = gt_boxes[b:e, :] * im_scale - gt_labels_slice = gt_labels[b:e, :] - is_crowd_slice = is_crowd[b:e] - - not_crowd_inds = np.where(is_crowd_slice == 0)[0] - gt_boxes_slice = gt_boxes_slice[not_crowd_inds] - gt_labels_slice = gt_labels_slice[not_crowd_inds] - iou = _bbox_overlaps(inside_anchors, gt_boxes_slice) - - ( - loc_inds, - score_inds, - labels, - gt_inds, - bbox_inside_weight, - fg_num, - ) = retinanet_target_assign( - iou, gt_labels_slice, positive_overlap, negative_overlap - ) - # unmap to all anchor - loc_inds = inds_inside[loc_inds] - score_inds = inds_inside[score_inds] - - sampled_gt = gt_boxes_slice[gt_inds] - sampled_anchor = all_anchors[loc_inds] - box_deltas = _box_to_delta( - sampled_anchor, sampled_gt, [1.0, 1.0, 1.0, 1.0] - ) - - if i == 0: - loc_indexes = loc_inds - score_indexes = score_inds - tgt_labels = labels - tgt_bboxes = box_deltas - bbox_inside_weights = bbox_inside_weight - fg_nums = [[fg_num]] - else: - loc_indexes = np.concatenate( - [loc_indexes, loc_inds + i * anchor_num] - ) - score_indexes = np.concatenate( - [score_indexes, score_inds + i * anchor_num] - ) - tgt_labels = np.concatenate([tgt_labels, labels]) - tgt_bboxes = np.vstack([tgt_bboxes, box_deltas]) - bbox_inside_weights = np.vstack( - [bbox_inside_weights, bbox_inside_weight] - ) - fg_nums = np.concatenate([fg_nums, [[fg_num]]]) - - return ( - loc_indexes, - score_indexes, - tgt_bboxes, - tgt_labels, - bbox_inside_weights, - fg_nums, - ) - - -class TestRpnTargetAssignOp(OpTest): - def setUp(self): - n, c, h, w = 2, 4, 14, 14 - all_anchors = get_anchor(n, c, h, w) - gt_num = 10 - all_anchors = all_anchors.reshape(-1, 4) - anchor_num = all_anchors.shape[0] - - images_shape = [[64, 64], [64, 64]] - # images_shape = [[64, 64]] - groundtruth, lod = _generate_groundtruth(images_shape, 3, 4) - lod = [0, 4, 8] - # lod = [0, 4] - - im_info = np.ones((len(images_shape), 3)).astype(np.float32) - for i in range(len(images_shape)): - im_info[i, 0] = images_shape[i][0] - im_info[i, 1] = images_shape[i][1] - im_info[i, 2] = 0.8 # scale - gt_boxes = np.vstack([v['boxes'] for v in groundtruth]) - is_crowd = np.hstack([v['is_crowd'] for v in groundtruth]) - - all_anchors = all_anchors.astype('float32') - gt_boxes = gt_boxes.astype('float32') - - rpn_straddle_thresh = 0.0 - rpn_batch_size_per_im = 256 - rpn_positive_overlap = 0.7 - rpn_negative_overlap = 0.3 - rpn_fg_fraction = 0.5 - use_random = False - - ( - loc_index, - score_index, - tgt_bbox, - labels, - bbox_inside_weights, - ) = rpn_target_assign_in_python( - all_anchors, - gt_boxes, - is_crowd, - im_info, - lod, - rpn_straddle_thresh, - rpn_batch_size_per_im, - rpn_positive_overlap, - rpn_negative_overlap, - rpn_fg_fraction, - use_random, - ) - labels = labels[:, np.newaxis] - - self.op_type = "rpn_target_assign" - self.inputs = { - 'Anchor': all_anchors, - 'GtBoxes': (gt_boxes, [[4, 4]]), - 'IsCrowd': (is_crowd, [[4, 4]]), - 'ImInfo': (im_info, [[1, 1]]), - } - self.attrs = { - 'rpn_batch_size_per_im': rpn_batch_size_per_im, - 'rpn_straddle_thresh': rpn_straddle_thresh, - 'rpn_positive_overlap': rpn_positive_overlap, - 'rpn_negative_overlap': rpn_negative_overlap, - 'rpn_fg_fraction': rpn_fg_fraction, - 'use_random': use_random, - } - self.outputs = { - 'LocationIndex': loc_index.astype('int32'), - 'ScoreIndex': score_index.astype('int32'), - 'TargetBBox': tgt_bbox.astype('float32'), - 'TargetLabel': labels.astype('int32'), - 'BBoxInsideWeight': bbox_inside_weights.astype('float32'), - } - - def test_check_output(self): - self.check_output() - - -class TestRetinanetTargetAssignOp(OpTest): - def setUp(self): - n, c, h, w = 2, 4, 14, 14 - all_anchors = get_anchor(n, c, h, w) - gt_num = 10 - all_anchors = all_anchors.reshape(-1, 4) - anchor_num = all_anchors.shape[0] - - images_shape = [[64, 64], [64, 64]] - groundtruth, lod = _generate_groundtruth(images_shape, 3, 4) - lod = [0, 4, 8] - - im_info = np.ones((len(images_shape), 3)).astype(np.float32) - for i in range(len(images_shape)): - im_info[i, 0] = images_shape[i][0] - im_info[i, 1] = images_shape[i][1] - im_info[i, 2] = 0.8 # scale - gt_boxes = np.vstack([v['boxes'] for v in groundtruth]) - is_crowd = np.hstack([v['is_crowd'] for v in groundtruth]) - gt_labels = np.vstack( - [ - v['gt_classes'].reshape(len(v['gt_classes']), 1) - for v in groundtruth - ] - ) - gt_labels = gt_labels.reshape(len(gt_labels), 1) - all_anchors = all_anchors.astype('float32') - gt_boxes = gt_boxes.astype('float32') - gt_labels = gt_labels.astype('int32') - - positive_overlap = 0.5 - negative_overlap = 0.4 - - ( - loc_index, - score_index, - tgt_bbox, - labels, - bbox_inside_weights, - fg_num, - ) = retinanet_target_assign_in_python( - all_anchors, - gt_boxes, - gt_labels, - is_crowd, - im_info, - lod, - positive_overlap, - negative_overlap, - ) - labels = labels[:, np.newaxis] - self.op_type = "retinanet_target_assign" - self.inputs = { - 'Anchor': all_anchors, - 'GtBoxes': (gt_boxes, [[4, 4]]), - 'GtLabels': (gt_labels, [[4, 4]]), - 'IsCrowd': (is_crowd, [[4, 4]]), - 'ImInfo': (im_info, [[1, 1]]), - } - self.attrs = { - 'positive_overlap': positive_overlap, - 'negative_overlap': negative_overlap, - } - self.outputs = { - 'LocationIndex': loc_index.astype('int32'), - 'ScoreIndex': score_index.astype('int32'), - 'TargetBBox': tgt_bbox.astype('float32'), - 'TargetLabel': labels.astype('int32'), - 'BBoxInsideWeight': bbox_inside_weights.astype('float32'), - 'ForegroundNumber': fg_num.astype('int32'), - } - - def test_check_output(self): - self.check_output() - - -if __name__ == '__main__': - unittest.main() diff --git a/test/legacy_test/test_similarity_focus_op.py b/test/legacy_test/test_similarity_focus_op.py deleted file mode 100755 index 1227a48949341..0000000000000 --- a/test/legacy_test/test_similarity_focus_op.py +++ /dev/null @@ -1,232 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from op_test import OpTest - - -class TestSimilarityFocusOp(OpTest): - def setUp(self): - self.op_type = "similarity_focus" - batch_size = 2 - x_dim, y_dim, z_dim = 3, 2, 2 - self.inputs = { - 'X': np.array( - [ - [ - [[0.8, 0.1], [0.4, 0.5]], - [[0.9, 0.7], [0.9, 0.9]], - [[0.8, 0.9], [0.1, 0.2]], - ], - [ - [[0.2, 0.5], [0.3, 0.4]], - [[0.9, 0.7], [0.8, 0.4]], - [[0.0, 0.2], [0.4, 0.7]], - ], - ] - ), - } - self.attrs = { - 'axis': 1, - 'indexes': [0], - } - - output = None - for batch in range(batch_size): - res = np.zeros((1, y_dim, z_dim)).astype("float32").reshape(-1) - for index in self.attrs['indexes']: - channel = ( - self.inputs['X'][batch, index, :, :].reshape(-1).copy() - ) - tag1 = [0 for i in range(y_dim)] - tag2 = [0 for i in range(z_dim)] - cnt = 0 - for i in range(channel.size): - index = channel.argmax() - idx1 = index // z_dim - idx2 = index % z_dim - if tag1[idx1] + tag2[idx2] == 0: - tag1[idx1] = 1 - tag2[idx2] = 1 - res[index] = 1 - cnt += 1 - if cnt == min(y_dim, z_dim): - break - channel[index] = -1 - res = res.reshape(1, y_dim, z_dim).repeat([x_dim], axis=0) - res = res.reshape(1, x_dim, y_dim, z_dim) - if output is not None: - output = np.concatenate((output, res), axis=0) - else: - output = res - self.outputs = {'Out': output} - - def test_check_output(self): - self.check_output() - - -class TestSimilarityFocusOp_axis1(OpTest): - def setUp(self): - self.op_type = "similarity_focus" - batch_size = 3 - x_dim, y_dim, z_dim = 4, 5, 6 - self.inputs = { - 'X': np.random.random((batch_size, x_dim, y_dim, z_dim)).astype( - "float32" - ), - } - self.attrs = { - 'axis': 1, - 'indexes': [0, 3], - } - - output = None - for batch in range(batch_size): - res = np.zeros((1, y_dim, z_dim)).astype("float32").reshape(-1) - for index in self.attrs['indexes']: - channel = ( - self.inputs['X'][batch, index, :, :].reshape(-1).copy() - ) - tag1 = [0 for i in range(y_dim)] - tag2 = [0 for i in range(z_dim)] - cnt = 0 - for i in range(channel.size): - index = channel.argmax() - idx1 = index // z_dim - idx2 = index % z_dim - if tag1[idx1] + tag2[idx2] == 0: - tag1[idx1] = 1 - tag2[idx2] = 1 - res[index] = 1 - cnt += 1 - if cnt == min(y_dim, z_dim): - break - channel[index] = -1 - res = res.reshape(1, y_dim, z_dim) - res = res.repeat([x_dim], axis=0) - res = res.reshape(1, x_dim, y_dim, z_dim) - if output is not None: - output = np.concatenate((output, res), axis=0) - else: - output = res - self.outputs = {'Out': output} - - def test_check_output(self): - self.check_output() - - -class TestSimilarityFocusOp_axis2(OpTest): - def setUp(self): - self.op_type = "similarity_focus" - batch_size = 6 - x_dim, y_dim, z_dim = 7, 8, 9 - self.inputs = { - 'X': np.random.random((batch_size, x_dim, y_dim, z_dim)).astype( - "float32" - ), - } - self.attrs = { - 'axis': 2, - 'indexes': [0, 3, 5], - } - - output = None - for batch in range(batch_size): - res = np.zeros((x_dim, 1, z_dim)).astype("float32").reshape(-1) - for index in self.attrs['indexes']: - channel = ( - self.inputs['X'][batch, :, index, :].reshape(-1).copy() - ) - tag1 = [0 for i in range(x_dim)] - tag2 = [0 for i in range(z_dim)] - cnt = 0 - for i in range(channel.size): - index = channel.argmax() - idx1 = index // z_dim - idx2 = index % z_dim - if tag1[idx1] + tag2[idx2] == 0: - tag1[idx1] = 1 - tag2[idx2] = 1 - res[index] = 1 - cnt += 1 - if cnt == min(x_dim, z_dim): - break - channel[index] = -1 - res = res.reshape(x_dim, 1, z_dim) - res = res.repeat([y_dim], axis=1) - res = res.reshape(1, x_dim, y_dim, z_dim) - if output is not None: - output = np.concatenate((output, res), axis=0) - else: - output = res - self.outputs = {'Out': output} - - def test_check_output(self): - self.check_output() - - -class TestSimilarityFocusOp_axis3(OpTest): - def setUp(self): - self.op_type = "similarity_focus" - batch_size = 64 - x_dim, y_dim, z_dim = 48, 48, 13 - self.inputs = { - 'X': np.random.random((batch_size, x_dim, y_dim, z_dim)).astype( - "float32" - ), - } - self.attrs = { - 'axis': 3, - 'indexes': [0, 2, 7, 9], - } - - output = None - for batch in range(batch_size): - res = np.zeros((x_dim, y_dim, 1)).astype("float32").reshape(-1) - for index in self.attrs['indexes']: - channel = ( - self.inputs['X'][batch, :, :, index].reshape(-1).copy() - ) - tag1 = [0 for i in range(x_dim)] - tag2 = [0 for i in range(y_dim)] - cnt = 0 - for i in range(channel.size): - index = channel.argmax() - idx1 = index // y_dim - idx2 = index % y_dim - if tag1[idx1] + tag2[idx2] == 0: - tag1[idx1] = 1 - tag2[idx2] = 1 - res[index] = 1 - cnt += 1 - if cnt == min(x_dim, y_dim): - break - channel[index] = -1 - res = res.reshape(x_dim, y_dim, 1) - res = res.repeat([z_dim], axis=2) - res = res.reshape(1, x_dim, y_dim, z_dim) - if output is not None: - output = np.concatenate((output, res), axis=0) - else: - output = res - self.outputs = {'Out': output} - - def test_check_output(self): - self.check_output() - - -if __name__ == "__main__": - unittest.main() diff --git a/test/legacy_test/test_spp_op.py b/test/legacy_test/test_spp_op.py deleted file mode 100644 index fbf3440352590..0000000000000 --- a/test/legacy_test/test_spp_op.py +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from op_test import OpTest -from test_pool2d_op import avg_pool2D_forward_naive, max_pool2D_forward_naive - - -class TestSppOp(OpTest): - def setUp(self): - self.op_type = "spp" - self.init_test_case() - nsize, csize, hsize, wsize = self.shape - data = np.array(list(range(nsize * csize * hsize * wsize))) - input = data.reshape(self.shape) - input_random = np.random.random(self.shape).astype("float64") - input = input + input_random - out_level_flatten = [] - for i in range(self.pyramid_height): - bins = np.power(2, i) - kernel_size = [0, 0] - padding = [0, 0] - kernel_size[0] = np.ceil(hsize / bins.astype("double")).astype( - "int32" - ) - padding[0] = ((kernel_size[0] * bins - hsize + 1) / 2).astype( - "int32" - ) - - kernel_size[1] = np.ceil(wsize / bins.astype("double")).astype( - "int32" - ) - padding[1] = ((kernel_size[1] * bins - wsize + 1) / 2).astype( - "int32" - ) - out_level = self.pool2D_forward_naive( - input, kernel_size, kernel_size, padding - ) - out_level_flatten.append( - out_level.reshape(nsize, bins * bins * csize) - ) - if i == 0: - output = out_level_flatten[i] - else: - output = np.concatenate((output, out_level_flatten[i]), 1) - # output = np.concatenate(out_level_flatten.tolist(), 0); - self.inputs = { - 'X': input.astype('float64'), - } - self.attrs = { - 'pyramid_height': self.pyramid_height, - 'pooling_type': self.pool_type, - } - self.outputs = {'Out': output.astype('float64')} - - def test_check_output(self): - self.check_output() - - def test_check_grad(self): - self.check_grad(['X'], 'Out') - - def init_test_case(self): - self.shape = [3, 2, 16, 16] - self.pyramid_height = 3 - self.pool2D_forward_naive = max_pool2D_forward_naive - self.pool_type = "max" - - -class TestCase2(TestSppOp): - def init_test_case(self): - self.shape = [3, 2, 16, 16] - self.pyramid_height = 3 - self.pool2D_forward_naive = avg_pool2D_forward_naive - self.pool_type = "avg" - - -if __name__ == '__main__': - unittest.main() diff --git a/test/legacy_test/test_tdm_child_op.py b/test/legacy_test/test_tdm_child_op.py deleted file mode 100644 index b1c100a2a789f..0000000000000 --- a/test/legacy_test/test_tdm_child_op.py +++ /dev/null @@ -1,186 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from op_test import OpTest, paddle_static_guard - -import paddle -from paddle import base -from paddle.incubate.layers.nn import tdm_child - - -def create_tdm_tree(): - """Create tdm tree info""" - tree_info = [ - [0, 0, 0, 1, 2], - [0, 1, 0, 3, 4], - [0, 1, 0, 5, 6], - [0, 2, 1, 7, 8], - [0, 2, 1, 9, 10], - [0, 2, 2, 11, 12], - [0, 2, 2, 13, 0], - [0, 3, 3, 14, 15], - [0, 3, 3, 16, 17], - [0, 3, 4, 18, 19], - [0, 3, 4, 20, 21], - [0, 3, 5, 22, 23], - [0, 3, 5, 24, 25], - [12, 3, 6, 0, 0], - [0, 4, 7, 0, 0], - [1, 4, 7, 0, 0], - [2, 4, 8, 0, 0], - [3, 4, 8, 0, 0], - [4, 4, 9, 0, 0], - [5, 4, 9, 0, 0], - [6, 4, 10, 0, 0], - [7, 4, 10, 0, 0], - [8, 4, 11, 0, 0], - [9, 4, 11, 0, 0], - [10, 4, 12, 0, 0], - [11, 4, 12, 0, 0], - ] - return tree_info - - -class TestTDMChildOp(OpTest): - def setUp(self): - self.__class__.op_type = "tdm_child" - self.config() - tree_info = create_tdm_tree() - tree_info_np = np.array(tree_info).astype(self.info_type) - - x_np = np.random.randint(low=0, high=26, size=self.x_shape).astype( - self.x_type - ) - children_res = [] - leaf_mask_res = [] - for batch in x_np: - for node in batch: - children = [] - if node != 0: - children.append(tree_info[node][3]) - children.append(tree_info[node][4]) - else: - children.append(0) - children.append(0) - mask = [] - for child in children: - m = int(tree_info[child][0] != 0) - mask.append(m) - children_res += children - leaf_mask_res += mask - children_res_np = np.array(children_res).astype(self.info_type) - leaf_mask_res_np = np.array(leaf_mask_res).astype(self.info_type) - - child = np.reshape(children_res_np, self.child_shape) - leaf_mask = np.reshape(leaf_mask_res_np, self.child_shape) - - self.attrs = {'child_nums': 2} - self.inputs = {'X': x_np, 'TreeInfo': tree_info_np} - self.outputs = {'Child': child, 'LeafMask': leaf_mask} - - def config(self): - """set test shape & type""" - self.x_shape = (10, 20) - self.child_shape = (10, 20, 2) - self.x_type = 'int32' - self.info_type = 'int32' - - def test_check_output(self): - self.check_output() - - -class TestCase1(TestTDMChildOp): - def config(self): - """check int int64_t""" - self.x_shape = (10, 20) - self.child_shape = (10, 20, 2) - self.x_type = 'int32' - self.info_type = 'int64' - - -class TestCase2(TestTDMChildOp): - def config(self): - """check int64_t int64_t""" - self.x_shape = (10, 20) - self.child_shape = (10, 20, 2) - self.x_type = 'int64' - self.info_type = 'int64' - - -class TestCase3(TestTDMChildOp): - def config(self): - """check int64 int32""" - self.x_shape = (10, 20) - self.child_shape = (10, 20, 2) - self.x_type = 'int64' - self.info_type = 'int32' - - -class TestCase4(TestTDMChildOp): - def config(self): - """check large shape""" - self.x_shape = (100, 20) - self.child_shape = (100, 20, 2) - self.x_type = 'int32' - self.info_type = 'int32' - - -class TestTDMChildShape(unittest.TestCase): - def test_shape(self): - with paddle_static_guard(): - x = paddle.static.data( - name='x', shape=[-1, 1], dtype='int32', lod_level=1 - ) - tdm_tree_info = create_tdm_tree() - tree_info_np = np.array(tdm_tree_info).astype('int32') - - child, leaf_mask = tdm_child( - x=x, - node_nums=26, - child_nums=2, - param_attr=base.ParamAttr( - initializer=paddle.nn.initializer.Assign(tree_info_np) - ), - ) - - place = base.CPUPlace() - exe = base.Executor(place=place) - exe.run(base.default_startup_program()) - - feed = { - 'x': np.array( - [ - [1], - [2], - [3], - [4], - [5], - [6], - [7], - [8], - [9], - [10], - [11], - [12], - ] - ).astype('int32') - } - exe.run(feed=feed) - - -if __name__ == "__main__": - unittest.main() diff --git a/test/legacy_test/test_teacher_student_sigmoid_loss_op.py b/test/legacy_test/test_teacher_student_sigmoid_loss_op.py deleted file mode 100644 index 984a47831064e..0000000000000 --- a/test/legacy_test/test_teacher_student_sigmoid_loss_op.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from math import exp, log - -import numpy as np -from op_test import OpTest -from scipy.special import logit - - -class TestTeacherStudentSigmoidLossOp(OpTest): - """ - Test teacher_student_sigmoid_loss with discrete one-hot labels. - """ - - def setUp(self): - self.op_type = "teacher_student_sigmoid_loss" - batch_size = 100 - num_classes = 1 - self.inputs = { - 'X': logit( - np.random.uniform(0, 1, (batch_size, num_classes)).astype( - "float64" - ) - ), - 'Label': np.random.uniform(0, 2, (batch_size, num_classes)).astype( - "float64" - ), - } - outs = [] - for index, label in enumerate(self.inputs["Label"]): - x = self.inputs["X"][index] - if label < -1.0: - outs.append(max(x, 0.0) + log(1.0 + exp(-abs(x)))) - elif label < 0.0: - outs.append(max(x, 0.0) - x + log(1.0 + exp(-abs(x)))) - elif label < 1.0: - outs.append( - max(x, 0.0) - + log(1.0 + exp(-abs(x))) - + max(x, 0.0) - - x * label - + log(1.0 + exp(-abs(x))) - ) - else: - outs.append( - max(x, 0.0) - - x - + log(1.0 + exp(-abs(x))) - + max(x, 0.0) - - x * (label - 1.0) - + log(1.0 + exp(-abs(x))) - ) - self.outputs = {'Y': np.array(outs)} - - def test_check_output(self): - self.check_output() - - def test_check_grad(self): - self.check_grad(["X"], "Y", numeric_grad_delta=0.005) diff --git a/test/legacy_test/test_unique_with_counts.py b/test/legacy_test/test_unique_with_counts.py deleted file mode 100644 index 4cc2879bfab7a..0000000000000 --- a/test/legacy_test/test_unique_with_counts.py +++ /dev/null @@ -1,150 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from op_test import OpTest, paddle_static_guard - -import paddle -from paddle.base import core - - -class TestUniqueWithCountsOp(OpTest): - def setUp(self): - self.op_type = "unique_with_counts" - self.init_config() - - def test_check_output(self): - self.check_output() - - def init_config(self): - self.inputs = { - 'X': np.array([2, 3, 3, 1, 5, 3], dtype='int64'), - } - self.attrs = {'dtype': int(core.VarDesc.VarType.INT32)} - self.outputs = { - 'Out': np.array([2, 3, 1, 5], dtype='int64'), - 'Index': np.array([0, 1, 1, 2, 3, 1], dtype='int32'), - 'Count': np.array([1, 3, 1, 1], dtype='int32'), - } - - -class TestOne(TestUniqueWithCountsOp): - def init_config(self): - self.inputs = { - 'X': np.array([2], dtype='int64'), - } - self.attrs = {'dtype': int(core.VarDesc.VarType.INT32)} - self.outputs = { - 'Out': np.array([2], dtype='int64'), - 'Index': np.array([0], dtype='int32'), - 'Count': np.array([1], dtype='int32'), - } - - -class TestRandom(TestUniqueWithCountsOp): - def init_config(self): - input_data = np.random.randint(0, 100, (2000,), dtype='int64') - self.inputs = {'X': input_data} - self.attrs = {'dtype': int(core.VarDesc.VarType.INT64)} - np_unique, np_index, reverse_index = np.unique( - self.inputs['X'], True, True - ) - np_tuple = [(np_unique[i], np_index[i]) for i in range(len(np_unique))] - np_tuple.sort(key=lambda x: x[1]) - target_out = np.array([i[0] for i in np_tuple], dtype='int64') - target_index = np.array( - [list(target_out).index(i) for i in self.inputs['X']], dtype='int64' - ) - count = [0 for i in range(len(np_unique))] - for i in range(target_index.shape[0]): - count[target_index[i]] += 1 - target_count = np.array(count, dtype='int64') - self.outputs = { - 'Out': target_out, - 'Index': target_index, - 'Count': target_count, - } - - -class TestUniqueWithCountsRaiseError(unittest.TestCase): - def test_errors(self): - with paddle_static_guard(): - - def test_dtype(): - data = paddle.static.data( - shape=[10], dtype="int16", name="input" - ) - paddle.unique(data) - - self.assertRaises(TypeError, test_dtype) - - -@unittest.skipIf( - not core.is_compiled_with_cuda(), "core is not compiled with CUDA" -) -class TestOneGPU(TestUniqueWithCountsOp): - def init_config(self): - self.inputs = { - 'X': np.array([2], dtype='int64'), - } - self.attrs = {'dtype': int(core.VarDesc.VarType.INT32)} - self.outputs = { - 'Out': np.array([2], dtype='int64'), - 'Index': np.array([0], dtype='int32'), - 'Count': np.array([1], dtype='int32'), - } - - def test_check_output(self): - if core.is_compiled_with_cuda(): - place = core.CUDAPlace(0) - self.check_output_with_place(place, atol=1e-5) - - -@unittest.skipIf( - not core.is_compiled_with_cuda(), "core is not compiled with CUDA" -) -class TestRandomGPU(TestUniqueWithCountsOp): - def init_config(self): - input_data = np.random.randint(0, 100, (2000,), dtype='int64') - self.inputs = {'X': input_data} - self.attrs = {'dtype': int(core.VarDesc.VarType.INT64)} - np_unique, np_index, reverse_index = np.unique( - self.inputs['X'], True, True - ) - np_tuple = [(np_unique[i], np_index[i]) for i in range(len(np_unique))] - np_tuple.sort(key=lambda x: x[1]) - target_out = np.array([i[0] for i in np_tuple], dtype='int64') - target_index = np.array( - [list(target_out).index(i) for i in self.inputs['X']], dtype='int64' - ) - count = [0 for i in range(len(np_unique))] - for i in range(target_index.shape[0]): - count[target_index[i]] += 1 - target_count = np.array(count, dtype='int64') - self.outputs = { - 'Out': target_out, - 'Index': target_index, - 'Count': target_count, - } - - def test_check_output(self): - if core.is_compiled_with_cuda(): - place = core.CUDAPlace(0) - self.check_output_with_place(place, atol=1e-5) - - -if __name__ == "__main__": - unittest.main() diff --git a/test/legacy_test/test_unzip_op.py b/test/legacy_test/test_unzip_op.py deleted file mode 100644 index fd564fe6f3578..0000000000000 --- a/test/legacy_test/test_unzip_op.py +++ /dev/null @@ -1,115 +0,0 @@ -# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np - -import paddle -from paddle import base -from paddle.base import core - - -class TestUnzipOp(unittest.TestCase): - def test_result(self): - """ - For unzip op - """ - paddle.enable_static() - if core.is_compiled_with_cuda(): - place = base.CUDAPlace(0) - x = paddle.static.data(name='X', shape=[6], dtype='float64') - lod = paddle.static.data(name='lod', shape=[6], dtype='int64') - len = 4 - output = paddle.incubate.operators.unzip(x, lod, len) - - input = [1.0, 2.0, 3.0, 1.0, 2.0, 4.0] - lod = [0, 3, 3, 3, 4, 6] - - feed = { - 'X': np.array(input).astype("float64"), - 'lod': np.array(lod).astype("int64"), - } - - exe = base.Executor(place=place) - exe.run(base.default_startup_program()) - res = exe.run(feed=feed, fetch_list=[output]) - out = [ - [1.0, 2.0, 3.0, 0.0], - [0.0, 0.0, 0.0, 0.0], - [0.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0], - [2.0, 4.0, 0.0, 0.0], - ] - out_np = np.array(out, dtype="float64") - assert (res == out_np).all(), "output is not right" - - -class TestUnzipOp_Complex(unittest.TestCase): - def test_result(self): - """ - For unzip op - """ - self.dtype = self.get_dtype() - paddle.enable_static() - prog = paddle.static.Program() - startup_prog = paddle.static.Program() - with paddle.static.program_guard(prog, startup_prog): - if core.is_compiled_with_cuda(): - place = base.CUDAPlace(0) - x = paddle.static.data( - name='Complex64_X', shape=[6], dtype=self.dtype - ) - lod = paddle.static.data(name='lodx', shape=[6], dtype='int64') - len = 4 - output = paddle.incubate.operators.unzip(x, lod, len) - input = [ - 1.0 + 1.0j, - 2.0 + 2.0j, - 3.0 + 3.0j, - 1.0 + 1.0j, - 2.0 + 2.0j, - 4.0 + 4.0j, - ] - lod = [0, 3, 3, 3, 4, 6] - - feed = { - 'Complex64_X': np.array(input).astype(self.dtype), - 'lodx': np.array(lod).astype("int64"), - } - - exe = base.Executor(place=place) - exe.run(base.default_startup_program()) - res = exe.run(prog, feed=feed, fetch_list=[output]) - out = [ - [1.0 + 1.0j, 2.0 + 2.0j, 3.0 + 3.0j, 0.0j], - [0.0j, 0.0j, 0.0j, 0.0j], - [0.0j, 0.0j, 0.0j, 0.0j], - [1.0 + 1.0j, 0.0j, 0.0j, 0.0j], - [2.0 + 2.0j, 4.0 + 4.0j, 0.0j, 0.0j], - ] - out_np = np.array(out, dtype=self.dtype) - assert (res == out_np).all(), "output is not right" - - def get_dtype(self): - return np.complex64 - - -class TestUnzipOp_Complex128(TestUnzipOp_Complex): - def get_dtype(self): - return np.complex128 - - -if __name__ == '__main__': - unittest.main() diff --git a/test/xpu/get_test_cover_info.py b/test/xpu/get_test_cover_info.py index c6f3756a69456..628691711ccd8 100644 --- a/test/xpu/get_test_cover_info.py +++ b/test/xpu/get_test_cover_info.py @@ -87,8 +87,6 @@ "grad_add_float32", # no api for grad_add, skip "lamb_float16", "lars_momentum_float32", - "resnet_unit", - "resnet_unit_grad", "c_embedding_float32", # unittests of collective ops do not using xpu testing framework "c_sync_comm_stream_float32", "c_sync_calc_stream_float32", diff --git a/test/xpu/test_fused_resnet_basic_block_op_xpu.py b/test/xpu/test_fused_resnet_basic_block_op_xpu.py deleted file mode 100644 index 83aa25f54018f..0000000000000 --- a/test/xpu/test_fused_resnet_basic_block_op_xpu.py +++ /dev/null @@ -1,307 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import unittest - -import numpy as np -from get_test_cover_info import ( - XPUOpTestWrapper, - create_test_class, - get_xpu_op_support_types, -) -from op_test import OpTest - -import paddle -from paddle import base, nn -from paddle.base import core -from paddle.base.framework import default_main_program -from paddle.incubate.xpu.resnet_block import ResNetBasicBlock - - -class XPUTestResNetBasicBlockOp(XPUOpTestWrapper): - def __init__(self): - self.op_name = "resnet_basic_block" - self.use_dynamic_create_class = False - - class TestResNetBasicBlockOp(OpTest): - def setUp(self): - self.dtype = self.in_type - self.place = paddle.XPUPlace(0) - self.__class__.op_type = "resnet_basic_block" - self.__class__.no_need_check_grad = True - self.getShape() - self.getDiff() - self.getShortcut() - paddle.set_default_dtype(self.dtype) - - self.src = np.random.random(self.input_size).astype(self.dtype) - self.dout = np.random.random(self.output_size).astype(self.dtype) - - def getShape(self): - self.in_channels = 8 - self.out_channels = 8 - self.stride = 1 - self.input_size = [2, 8, 32, 32] # NCHW - self.output_size = [2, 8, 32, 32] # NCHW - - def getDiff(self): - self.rtol = 1e-3 - self.atol = 1e-3 - - def getShortcut(self): - self.has_shortcut = False - - def Base(self): - conv1_weight = base.ParamAttr( - initializer=paddle.nn.initializer.XavierNormal(), - learning_rate=0.001, - ) - conv2_weight = base.ParamAttr( - initializer=paddle.nn.initializer.XavierNormal(), - learning_rate=0.001, - ) - conv3_weight = base.ParamAttr( - initializer=paddle.nn.initializer.XavierNormal(), - learning_rate=0.001, - ) - bn1_weight = base.ParamAttr( - initializer=paddle.nn.initializer.Constant(value=1.0) - ) - bn1_bias = base.ParamAttr( - initializer=paddle.nn.initializer.Constant(value=0.0) - ) - bn2_weight = base.ParamAttr( - initializer=paddle.nn.initializer.Constant(value=1.0) - ) - bn2_bias = base.ParamAttr( - initializer=paddle.nn.initializer.Constant(value=0.0) - ) - bn3_weight = base.ParamAttr( - initializer=paddle.nn.initializer.Constant(value=1.0) - ) - bn3_bias = base.ParamAttr( - initializer=paddle.nn.initializer.Constant(value=0.0) - ) - - self.conv1 = nn.Conv2D( - in_channels=self.in_channels, - out_channels=self.out_channels, - kernel_size=3, - stride=self.stride, - padding=1, - weight_attr=conv1_weight, - bias_attr=None, - data_format='NCHW', - ) - self.bn1 = paddle.nn.BatchNorm( - self.out_channels, - act='relu', - param_attr=bn1_weight, - bias_attr=bn1_bias, - data_layout='NCHW', - ) - self.conv2 = nn.Conv2D( - in_channels=self.out_channels, - out_channels=self.out_channels, - kernel_size=3, - stride=1, - padding=1, - weight_attr=conv2_weight, - bias_attr=None, - data_format='NCHW', - ) - self.bn2 = paddle.nn.BatchNorm( - self.out_channels, - act=None, - param_attr=bn2_weight, - bias_attr=bn2_bias, - data_layout='NCHW', - ) - self.conv3 = nn.Conv2D( - in_channels=self.in_channels, - out_channels=self.out_channels, - kernel_size=1, - stride=self.stride, - padding=0, - weight_attr=conv3_weight, - bias_attr=None, - data_format='NCHW', - ) - self.bn3 = paddle.nn.BatchNorm( - self.out_channels, - act=None, - param_attr=bn3_weight, - bias_attr=bn3_bias, - data_layout='NCHW', - ) - self.relu = nn.ReLU() - - tensor_src = paddle.to_tensor(self.src, stop_gradient=False) - if self.has_shortcut: - z_out = self.bn3(self.conv3(tensor_src)) - else: - z_out = tensor_src - bn1_out = self.bn1(self.conv1(tensor_src)) - bn2_out = self.bn2(self.conv2(bn1_out)) - result = self.relu(bn2_out + z_out) - paddle.autograd.backward( - [result], [paddle.to_tensor(self.dout)], True - ) - return result, tensor_src.grad - - def FusedResNetBasicBlock(self): - fused_conv1_weight = base.ParamAttr( - initializer=paddle.nn.initializer.XavierNormal(), - learning_rate=0.001, - ) - fused_conv2_weight = base.ParamAttr( - initializer=paddle.nn.initializer.XavierNormal(), - learning_rate=0.001, - ) - fused_conv3_weight = base.ParamAttr( - initializer=paddle.nn.initializer.XavierNormal(), - learning_rate=0.001, - ) - fused_bn1_weight = base.ParamAttr( - initializer=paddle.nn.initializer.Constant(value=1.0) - ) - fused_bn1_bias = base.ParamAttr( - initializer=paddle.nn.initializer.Constant(value=0.0) - ) - fused_bn2_weight = base.ParamAttr( - initializer=paddle.nn.initializer.Constant(value=1.0) - ) - fused_bn2_bias = base.ParamAttr( - initializer=paddle.nn.initializer.Constant(value=0.0) - ) - fused_bn3_weight = base.ParamAttr( - initializer=paddle.nn.initializer.Constant(value=1.0) - ) - fused_bn3_bias = base.ParamAttr( - initializer=paddle.nn.initializer.Constant(value=0.0) - ) - - if self.has_shortcut: - self.resnet_basic_block = ResNetBasicBlock( - num_channels1=self.in_channels, - num_filter1=self.out_channels, - filter1_size=3, - num_channels2=self.out_channels, - num_filter2=self.out_channels, - filter2_size=3, - num_channels3=self.in_channels, - num_filter3=self.out_channels, - filter3_size=1, - filter1_attr=fused_conv1_weight, - scale1_attr=fused_bn1_weight, - bias1_attr=fused_bn1_bias, - filter2_attr=fused_conv2_weight, - scale2_attr=fused_bn2_weight, - bias2_attr=fused_bn2_bias, - filter3_attr=fused_conv3_weight, - scale3_attr=fused_bn3_weight, - bias3_attr=fused_bn3_bias, - stride1=self.stride, - stride2=1, - stride3=self.stride, - act='relu', - padding1=1, - padding2=1, - padding3=0, - has_shortcut=True, - ) - else: - self.resnet_basic_block = ResNetBasicBlock( - num_channels1=self.in_channels, - num_filter1=self.out_channels, - filter1_size=3, - num_channels2=self.out_channels, - num_filter2=self.out_channels, - filter2_size=3, - num_channels3=self.in_channels, - num_filter3=self.out_channels, - filter3_size=1, - filter1_attr=fused_conv1_weight, - scale1_attr=fused_bn1_weight, - bias1_attr=fused_bn1_bias, - filter2_attr=fused_conv2_weight, - scale2_attr=fused_bn2_weight, - bias2_attr=fused_bn2_bias, - filter3_attr=fused_conv3_weight, - scale3_attr=fused_bn3_weight, - bias3_attr=fused_bn3_bias, - stride1=self.stride, - stride2=1, - stride3=self.stride, - act='relu', - padding1=1, - padding2=1, - padding3=1, - has_shortcut=False, - ) - - x = paddle.to_tensor(self.src, stop_gradient=False) - out = self.resnet_basic_block.forward(x) - paddle.autograd.backward([out], [paddle.to_tensor(self.dout)]) - return out, x.grad - - def test_out_and_grad_has_shortcut(self): - self.has_shortcut = True - default_main_program().random_seed = 1 - base_out, base_grad = self.Base() - fused_out, fused_grad = self.FusedResNetBasicBlock() - np.testing.assert_allclose( - base_out.numpy(), - fused_out.numpy(), - rtol=self.rtol, - atol=self.atol, - ) - np.testing.assert_allclose( - base_grad.numpy(), - fused_grad.numpy(), - rtol=self.rtol, - atol=self.atol, - ) - - def test_out_and_grad(self): - self.has_shortcut = False - default_main_program().random_seed = 1 - base_out, base_grad = self.Base() - fused_out, fused_grad = self.FusedResNetBasicBlock() - np.testing.assert_allclose( - base_out.numpy(), - fused_out.numpy(), - rtol=self.rtol, - atol=self.atol, - ) - np.testing.assert_allclose( - base_grad.numpy(), - fused_grad.numpy(), - rtol=self.rtol, - atol=self.atol, - ) - - -support_types = get_xpu_op_support_types('resnet_basic_block') -for stype in support_types: - create_test_class( - globals(), - XPUTestResNetBasicBlockOp, - stype, - ignore_device_version=[core.XPUVersion.XPU1], - ) - -if __name__ == '__main__': - unittest.main()