Merge pull request #122 from kaih70/master

add mean normalize
PaddlePaddle · Sep 16, 2020 · 3105a2b · 3105a2b
2 parents 944cb37 + a74730a
commit 3105a2b
Show file tree

Hide file tree

Showing 19 changed files with 905 additions and 30 deletions.
diff --git a/core/paddlefl_mpc/mpc_protocol/aby3_operators.h b/core/paddlefl_mpc/mpc_protocol/aby3_operators.h
@@ -328,6 +328,17 @@ class Aby3OperatorsImpl : public MpcOperators {
         a_->max_pooling(out_, b_);
     }
 
+    void max(const Tensor* in, Tensor* out) override {
+
+        auto a_tuple = from_tensor(in);
+        auto a_ = std::get<0>(a_tuple).get();
+
+        auto out_tuple = from_tensor(out);
+        auto out_ = std::get<0>(out_tuple).get();
+
+        a_->max_pooling(out_, nullptr);
+    }
+
     void inverse_square_root(const Tensor* in, Tensor* out) override {
         auto x_tuple = from_tensor(in);
         auto x_ = std::get<0>(x_tuple).get();
@@ -377,6 +388,20 @@ class Aby3OperatorsImpl : public MpcOperators {
         FixedTensor::calc_precision_recall(in, &out_);
     }
 
+    void div(const Tensor *lhs, const Tensor *rhs, Tensor *out) override {
+
+        auto lhs_tuple = from_tensor(lhs);
+        auto rhs_tuple = from_tensor(rhs);
+        auto out_tuple = from_tensor(out);
+
+        auto lhs_ = std::get<0>(lhs_tuple).get();
+        auto rhs_ = std::get<0>(rhs_tuple).get();
+        auto out_ = std::get<0>(out_tuple).get();
+
+        lhs_->long_div(rhs_, out_);
+
+    }
+
 private:
     template <typename T>
     std::tuple<

diff --git a/core/paddlefl_mpc/mpc_protocol/mpc_operators.h b/core/paddlefl_mpc/mpc_protocol/mpc_operators.h
@@ -82,6 +82,10 @@ class MpcOperators {
     // for filter in other shape, reshape input first
     virtual void max_pooling(const Tensor* in, Tensor* out, Tensor* pos_info) {}
 
+    // column wise max
+    // in shape [n, ...], out shape [1, ...]
+    virtual void max(const Tensor* in, Tensor* out) {}
+
     virtual void inverse_square_root(const Tensor* in, Tensor* out) = 0;
 
     virtual void predicts_to_indices(const Tensor* in,
@@ -93,6 +97,8 @@ class MpcOperators {
                                Tensor* out) = 0;
 
     virtual void calc_precision_recall(const Tensor* tp_fp_fn, Tensor* out) = 0;
+
+    virtual void div(const Tensor *lhs, const Tensor *rhs, Tensor *out) = 0;
 };
 
 } // mpc

diff --git a/core/paddlefl_mpc/operators/mpc_mean_normalize_op.cc b/core/paddlefl_mpc/operators/mpc_mean_normalize_op.cc
@@ -0,0 +1,178 @@
+
+/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "mpc_mean_normalize_op.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include <string>
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+
+class MpcMeanNormalizationOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext *ctx) const override {
+    PADDLE_ENFORCE_EQ(ctx->HasInput("Min"), true,
+                      platform::errors::InvalidArgument(
+                          "Input(Min) should not be null."));
+    PADDLE_ENFORCE_EQ(
+        ctx->HasInput("Max"), true,
+        platform::errors::InvalidArgument("Input(Max) should not be null."));
+    PADDLE_ENFORCE_EQ(
+        ctx->HasInput("Mean"), true,
+        platform::errors::InvalidArgument("Input(Mean) should not be null."));
+    PADDLE_ENFORCE_EQ(
+        ctx->HasInput("SampleNum"), true,
+        platform::errors::InvalidArgument("Input(Sample) should not be null."));
+    PADDLE_ENFORCE_EQ(
+        ctx->HasInput("TotalNum"), true,
+        platform::errors::InvalidArgument("Input(TotalNum) should not be null."));
+    PADDLE_ENFORCE_EQ(ctx->HasOutput("Range"), true,
+                      platform::errors::InvalidArgument(
+                          "Output(Range) should not be null."));
+    PADDLE_ENFORCE_EQ(ctx->HasOutput("MeanOut"), true,
+                      platform::errors::InvalidArgument(
+                          "Output(Meanor) should not be null."));
+
+    auto min_dims = ctx->GetInputDim("Min");
+    auto max_dims = ctx->GetInputDim("Max");
+    auto mean_dims = ctx->GetInputDim("Mean");
+    auto sample_num_dims = ctx->GetInputDim("SampleNum");
+    auto total_num_dims = ctx->GetInputDim("TotalNum");
+
+    if (ctx->IsRuntime()) {
+      PADDLE_ENFORCE_EQ(min_dims, max_dims,
+                        platform::errors::InvalidArgument(
+                            "The dimension of Input(Min) and "
+                            "Input(Max) should be the same."
+                            "But received (%d) != (%d)",
+                            min_dims, max_dims));
+      PADDLE_ENFORCE_EQ(min_dims, mean_dims,
+                        platform::errors::InvalidArgument(
+                            "The dimension of Input(Min) and "
+                            "Input(Max) should be the same."
+                            "But received (%d) != (%d)",
+                            min_dims, mean_dims));
+      PADDLE_ENFORCE_EQ(
+          min_dims.size(), 3,
+          platform::errors::InvalidArgument(
+              "The dimension of Input(Min) should be equal to 3 "
+              "(share_num, party_num, feature_num). But received (%d)",
+              min_dims.size()));
+
+      PADDLE_ENFORCE_EQ(
+          sample_num_dims.size(), 2,
+          platform::errors::InvalidArgument(
+              "The dimension of Input(SampleNum) should be equal to 2 "
+              "(share_num, party_num). But received (%d)",
+              sample_num_dims.size()));
+
+      PADDLE_ENFORCE_EQ(
+          sample_num_dims[1], min_dims[1],
+          platform::errors::InvalidArgument(
+              "The party num of Input(SampleNum) and Input(Min) "
+              "should be equal But received (%d) != (%d)",
+              sample_num_dims[1], min_dims[1]));
+
+      PADDLE_ENFORCE_EQ(
+          total_num_dims.size(), 2,
+          platform::errors::InvalidArgument(
+              "The dimension of Input(TotalNum) "
+              "should be 2, But received (%d) != (%d)",
+              total_num_dims.size(), 2));
+
+      PADDLE_ENFORCE_EQ(
+          sample_num_dims[0], total_num_dims[0],
+          platform::errors::InvalidArgument(
+              "The share num of Input(SampleNum) and Input(TotalNum) "
+              "should be equal But received (%d) != (%d)",
+              sample_num_dims[0], total_num_dims[0]));
+
+      PADDLE_ENFORCE_EQ(
+          total_num_dims[1], 1,
+          platform::errors::InvalidArgument(
+              "The shape of Input(TotalNum) "
+              "should be [share_num,  1] But dims[1] received (%d) != (%d)",
+              total_num_dims[1], 1));
+    }
+
+    ctx->SetOutputDim("Range", {mean_dims[0], mean_dims[2]});
+    ctx->SetOutputDim("MeanOut", {mean_dims[0], mean_dims[2]});
+  }
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext &ctx) const override {
+    return framework::OpKernelType(
+        OperatorWithKernel::IndicateVarDataType(ctx, "Min"),
+        ctx.device_context());
+  }
+};
+
+class MpcMeanNormalizationOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() override {
+    AddInput("Min",
+             "(Tensor, default Tensor<int64_t>) A 2-D tensor with shape [P, N], "
+             "where P is the party num and N is the feature num. Each row contains "
+             " the local min feature val of N features.");
+    AddInput("Max",
+             "(Tensor, default Tensor<int64_t>) A 2-D tensor with shape [P, N], "
+             "where P is the party num and N is the feature num. Each row contains "
+             " the local max feature val of N features.");
+    AddInput("Mean",
+             "(Tensor, default Tensor<int64_t>) A 2-D tensor with shape [P, N], "
+             "where P is the party num and N is the feature num. Each row contains "
+             " the local mean feature val of N features.");
+    AddInput("SampleNum",
+             "(Tensor, default Tensor<int64_t>) A 1-D tensor with shape [P], "
+             "where P is the party num. Each element contains "
+             "sample num of party_i.");
+    AddInput("TotalNum",
+             "(Tensor, default Tensor<int64_t>) A 1-D tensor with shape [1], "
+             "Element contains sum of sample num of party_i.");
+    AddOutput("Range",
+              "(Tensor, default Tensor<int64_t>) A 1-D tensor with shape [N], "
+              "where N is the feature num. Each element contains "
+              "global range of feature_i.");
+    AddOutput("MeanOut",
+              "(Tensor, default Tensor<int64_t>) A 1-D tensor with shape [N], "
+              "where N is the feature num. Each element contains "
+              "global mean of feature_i.");
+    AddComment(R"DOC(
+Mean normalization Operator.
+When given Input(Min), Input(Max), Input(Mean), Input(SampleNum) and Input(TotalNum)
+this operator can be used to compute global range and mean for further feature
+scaling.
+Output(Range) is the global range of all features.
+Output(MeanOut) is the global mean of all features.
+)DOC");
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OPERATOR(
+    mpc_mean_normalize, ops::MpcMeanNormalizationOp, ops::MpcMeanNormalizationOpMaker,
+    paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
+    paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
+REGISTER_OP_CPU_KERNEL(
+    mpc_mean_normalize,
+    ops::MpcMeanNormalizationKernel<paddle::platform::CPUPlace, int64_t>);
diff --git a/core/paddlefl_mpc/operators/mpc_mean_normalize_op.h b/core/paddlefl_mpc/operators/mpc_mean_normalize_op.h
@@ -0,0 +1,106 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <algorithm>
+
+#include "paddle/fluid/framework/op_registry.h"
+#include "mpc_op.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+
+template <typename DeviceContext, typename T>
+class MpcMeanNormalizationKernel : public MpcOpKernel<T> {
+ public:
+  void ComputeImpl(const framework::ExecutionContext& context) const override {
+    const Tensor* min = context.Input<Tensor>("Min");
+    const Tensor* max = context.Input<Tensor>("Max");
+    const Tensor* mean = context.Input<Tensor>("Mean");
+    const Tensor* sample_num = context.Input<Tensor>("SampleNum");
+    const Tensor* total_num = context.Input<Tensor>("TotalNum");
+
+    Tensor* range = context.Output<Tensor>("Range");
+    Tensor* mean_out = context.Output<Tensor>("MeanOut");
+
+    int share_num = min->dims()[0];
+    int party_num = min->dims()[1];
+    int feat_num = min->dims()[2];
+
+    Tensor neg_min;
+    neg_min.mutable_data<T>(min->dims(), context.GetPlace(), 0);
+
+    Tensor neg_min_global;
+    Tensor max_global;
+
+    neg_min_global.mutable_data<T>(
+        framework::make_ddim({share_num, 1, feat_num}), context.GetPlace(), 0);
+    max_global.mutable_data<T>(
+        framework::make_ddim({share_num, 1, feat_num}), context.GetPlace(), 0);
+
+    mpc::MpcInstance::mpc_instance()->mpc_protocol()
+        ->mpc_operators()->neg(min, &neg_min);
+
+    mpc::MpcInstance::mpc_instance()->mpc_protocol()
+        ->mpc_operators()->max(&neg_min, &neg_min_global);
+
+    mpc::MpcInstance::mpc_instance()->mpc_protocol()
+        ->mpc_operators()->max(max, &max_global);
+
+    range->mutable_data<T>(
+        framework::make_ddim({share_num, 1, feat_num}), context.GetPlace(), 0);
+
+    mpc::MpcInstance::mpc_instance()->mpc_protocol()
+        ->mpc_operators()->add(&max_global, &neg_min_global, range);
+
+    range->mutable_data<T>(
+        framework::make_ddim({share_num, feat_num}), context.GetPlace(), 0);
+
+    Tensor sample_num_;
+
+    sample_num_.ShareDataWith(*sample_num);
+
+    sample_num_.mutable_data<T>(
+        framework::make_ddim({share_num, 1, party_num}), context.GetPlace(), 0);
+
+    mean_out->mutable_data<T>(
+        framework::make_ddim({share_num, 1, feat_num}), context.GetPlace(), 0);
+
+    mpc::MpcInstance::mpc_instance()->mpc_protocol()
+        ->mpc_operators()->matmul(&sample_num_, mean, mean_out);
+
+    mean_out->mutable_data<T>(
+        framework::make_ddim({share_num, feat_num}), context.GetPlace(), 0);
+
+    Tensor total_num_;
+
+    total_num_.mutable_data<T>(
+        framework::make_ddim({share_num, feat_num}), context.GetPlace(), 0);
+
+    // broadcasting total_num to shape [share_num, feat_num]
+    for (int i = 0; i < share_num; ++i) {
+        std::fill(total_num_.data<T>() + i * feat_num,
+                  total_num_.data<T>() + (i + 1) * feat_num,
+                  total_num->data<T>()[i]);
+    }
+
+    mpc::MpcInstance::mpc_instance()->mpc_protocol()
+        ->mpc_operators()->div(mean_out, &total_num_, mean_out);
+
+}
+};
+
+}  // namespace operators
+}  // namespace paddle