Skip to content

Commit

Permalink
add unzip op (PaddlePaddle#183)
Browse files Browse the repository at this point in the history
Co-authored-by: root <[email protected]>
  • Loading branch information
huwei02 and root authored Dec 8, 2022
1 parent 8b632a2 commit 5e845c5
Show file tree
Hide file tree
Showing 9 changed files with 464 additions and 3 deletions.
8 changes: 8 additions & 0 deletions paddle/fluid/distributed/ps/table/common_graph_table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,8 @@ void GraphTable::export_partition_files(int idx, std::string file_path) {

for (int i = 0; i < (int)tasks.size(); i++) tasks[i].get();
}
#endif

void GraphTable::clear_graph(int idx) {
for (auto p : edge_shards[idx]) {
p->clear();
Expand All @@ -502,6 +504,7 @@ void GraphTable::clear_graph(int idx) {
}
}

#ifdef PADDLE_WITH_HETERPS
void GraphTable::release_graph() {
// Before releasing graph, prepare for sampling ids and embedding keys.
build_graph_type_keys();
Expand Down Expand Up @@ -541,6 +544,7 @@ void GraphTable::release_graph_node() {
feature_shrink_to_fit();
}
}
#endif

void GraphTable::clear_edge_shard() {
VLOG(0) << "begin clear edge shard";
Expand Down Expand Up @@ -586,6 +590,7 @@ void GraphTable::clear_feature_shard() {
VLOG(0) << "finish clear feature shard";
}

#ifdef PADDLE_WITH_HETERPS
void GraphTable::feature_shrink_to_fit() {
std::vector<std::future<int>> tasks;
for (auto &type_shards : feature_shards) {
Expand Down Expand Up @@ -615,13 +620,16 @@ void GraphTable::merge_feature_shard() {
feature_shards.resize(1);
}

#endif

void GraphTable::clear_graph() {
VLOG(0) << "begin clear_graph";
clear_edge_shard();
clear_feature_shard();
VLOG(0) << "finish clear_graph";
}

#ifdef PADDLE_WITH_HETERPS
int32_t GraphTable::load_next_partition(int idx) {
if (next_partition >= (int)partitions[idx].size()) {
VLOG(0) << "partition iteration is done";
Expand Down
8 changes: 6 additions & 2 deletions paddle/fluid/framework/data_feed.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2119,11 +2119,15 @@ void SlotRecordInMemoryDataFeed::Init(const DataFeedDesc& data_feed_desc) {
}

void SlotRecordInMemoryDataFeed::InitGraphResource() {
#if defined(PADDLE_WITH_GPU_GRAPH) && defined(PADDLE_WITH_HETERPS)
gpu_graph_data_generator_.AllocResource(thread_id_, feed_vec_);
#endif
}

void SlotRecordInMemoryDataFeed::InitGraphTrainResource() {
#if defined(PADDLE_WITH_GPU_GRAPH) && defined(PADDLE_WITH_HETERPS)
gpu_graph_data_generator_.AllocTrainResource(thread_id_);
#endif
}

void SlotRecordInMemoryDataFeed::LoadIntoMemory() {
Expand Down Expand Up @@ -2704,11 +2708,11 @@ int SlotRecordInMemoryDataFeed::Next() {
#endif
}

#if defined(PADDLE_WITH_GPU_GRAPH) && defined(PADDLE_WITH_HETERPS)
void SlotRecordInMemoryDataFeed::DoWalkandSage() {
#if defined(PADDLE_WITH_GPU_GRAPH) && defined(PADDLE_WITH_HETERPS)
gpu_graph_data_generator_.DoWalkandSage();
}
#endif
}

#if defined(PADDLE_WITH_CUDA) && defined(PADDLE_WITH_HETERPS)
void SlotRecordInMemoryDataFeed::BuildSlotBatchGPU(const int ins_num) {
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/framework/data_feed.h
Original file line number Diff line number Diff line change
Expand Up @@ -1151,6 +1151,8 @@ class DataFeed {
virtual bool get_epoch_finish() {
#if defined(PADDLE_WITH_GPU_GRAPH) && defined(PADDLE_WITH_HETERPS)
return gpu_graph_data_generator_.get_epoch_finish();
#else
return false;
#endif
}

Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/operators/unity_build_rule.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ register_unity_group(
cudnn_lstm_op.cc
cumsum_op.cc
cvm_op.cc
unzip_op.cc
data_norm_op.cc
deformable_conv_op.cc
deformable_conv_v1_op.cc
Expand Down Expand Up @@ -406,6 +407,7 @@ register_unity_group(
ctc_align_op.cu
cumsum_op.cu
cvm_op.cu
unzip_op.cu
data_norm_op.cu
deformable_conv_op.cu
deformable_conv_v1_op.cu
Expand Down Expand Up @@ -585,3 +587,5 @@ register_unity_group(cu expand_op.cu)
register_unity_group(cu matmul_v2_op.cu)
register_unity_group(cu top_k_v2_op.cu)
register_unity_group(cu set_value_op.cu)
register_unity_group(cu unzip.cu)
register_unity_group(cc unzip.cc)
179 changes: 179 additions & 0 deletions paddle/fluid/operators/unzip_op.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/unzip_op.h"

#include <memory>

#include "paddle/phi/kernels/funcs/math_function.h"

namespace paddle {
namespace operators {

using Tensor = framework::Tensor;

class unzipOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;

void InferShape(framework::InferShapeContext* ctx) const override {
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "lod");
OP_INOUT_CHECK(ctx->HasOutput("Y"), "Output", "Y", "lod");

auto x_dims = ctx->GetInputDim("X");
PADDLE_ENFORCE_EQ(
x_dims.size(),
2UL,
platform::errors::InvalidArgument(
"Input(X)'s rank should be 2, but got %d", x_dims.size()));

auto lod_dims = ctx->GetInputDim("lod");
PADDLE_ENFORCE_EQ(
lod_dims.size(),
1UL,
platform::errors::InvalidArgument(
"Input(X)'s rank should be 1, but got %d", lod_dims.size()));

ctx->SetOutputDim("Y", {lod_dims[0] - 1, x_dims[1]});
}

protected:
// Explicitly set that the data type of computation kernel of
// unzip
// is determined by its input "X".
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(
OperatorWithKernel::IndicateVarDataType(ctx, "X"),
ctx.device_context());
}
};

class unzipGradientOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;

void InferShape(framework::InferShapeContext* ctx) const override {
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "unzipGradient");
OP_INOUT_CHECK(ctx->HasInput("lod"), "Input", "unzip", "unzipGradient");
OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Y")),
"Input",
framework::GradVarName("Y"),
"unzipGradient");
OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("X")),
"Output",
framework::GradVarName("X"),
"unzipGradient");

auto x_dims = ctx->GetInputDim("X");
auto lod_dims = ctx->GetInputDim("lod");
auto dy_dims = ctx->GetInputDim(framework::GradVarName("Y"));
PADDLE_ENFORCE_EQ(
x_dims.size(),
2,
platform::errors::InvalidArgument(
"Expect Input(X)'s rank == 2, but got %d", x_dims.size()));
PADDLE_ENFORCE_EQ(
dy_dims.size(),
2,
platform::errors::InvalidArgument(
"Expect Input(X)'s rank == 2, but got %d", dy_dims.size()));
PADDLE_ENFORCE_EQ(
lod_dims.size(),
1,
platform::errors::InvalidArgument(
"Expect Input(X)'s rank == 1, but got %d", lod_dims.size()));

PADDLE_ENFORCE_EQ(
x_dims[1],
dy_dims[1],
platform::errors::InvalidArgument(
"The 1st dimension of Input(X) and Input(Y@Grad) should "
"be equal, X is %d, Y@Grad is %d",
x_dims[1],
dy_dims[1]));

ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
ctx->ShareLoD("X", framework::GradVarName("X"));
}

protected:
// Explicitly set that the data type of computation kernel of
// unzip
// is determined by its input "X".
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType(
ctx, framework::GradVarName("Y")),
ctx.device_context());
}
};

class unzipOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X",
"(LodTensor, default LodTensor<float>), a 2-D tensor with shape "
"[M x N],"
" where N is the batch size and D is the emebdding dim. ");
AddInput("lod",
"(Tensor), a 1-D Tensor with shape [K]");
AddOutput("Y",
"(LodTensor, default LodTensor<float>), a 2-D tensor with shape "
"[K-1 x N].");
AddComment(R"DOC(
unzip Operator.
)DOC");
}
};

template <typename T>
class unzipGradOpMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;

protected:
void Apply(GradOpPtr<T> op) const override {
op->SetType("unzip_grad");
op->SetInput("X", this->Input("X"));
op->SetInput("lod", this->Input("lod"));
op->SetInput(framework::GradVarName("Y"), this->OutputGrad("Y"));
op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
op->SetAttrMap(this->Attrs());
}
};

DECLARE_NO_NEED_BUFFER_VARS_INFERER(unzipNoNeedBufferVarInferer, "lod");
DECLARE_NO_NEED_BUFFER_VARS_INFERER(unzipGradNoNeedBufferVarInferer, "X");

} // namespace operators
} // namespace paddle

namespace ops = paddle::operators;
REGISTER_OPERATOR(unzip,
ops::unzipOp,
ops::unzipOpMaker,
ops::unzipGradOpMaker<paddle::framework::OpDesc>,
ops::unzipGradOpMaker<paddle::imperative::OpBase>,
ops::unzipNoNeedBufferVarInferer);

REGISTER_OPERATOR(unzip_grad,
ops::unzipGradientOp,
ops::unzipGradNoNeedBufferVarInferer);

REGISTER_OP_CPU_KERNEL(unzip, ops::unzipOpKernel<int64_t>, ops::unzipOpKernel<int64_t>);

REGISTER_OP_CPU_KERNEL(unzip_grad,
ops::unzipGradOpKernel<int64_t>,
ops::unzipGradOpKernel<int64_t>);
Loading

0 comments on commit 5e845c5

Please sign in to comment.