Skip to content

Commit

Permalink
[Phi] Remove shared_storage (PaddlePaddle#42821)
Browse files Browse the repository at this point in the history
* remove shared_storage

* fix bug

* fix rnn bug
  • Loading branch information
zyfncg authored and Yaocheng committed May 19, 2022
1 parent 845b437 commit 3fa0183
Show file tree
Hide file tree
Showing 13 changed files with 48 additions and 181 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,7 @@ void ScaleAPI(const paddle::experimental::Tensor& x, float scale, float bias,
size_t bytes_size =
phi::product(dense_tensor->dims()) * SizeOf(dense_tensor->dtype());
auto dense_out = std::make_shared<phi::DenseTensor>(
phi::make_intrusive<paddle::experimental::SharedStorage>(
paddle::memory::Alloc(place, bytes_size)),
std::move(tensor_meta));
paddle::memory::Alloc(place, bytes_size), std::move(tensor_meta));
// Handle Device Context
const paddle::platform::Place& expected_kernel_place =
Controller::Instance().GetExpectedPlace();
Expand Down
4 changes: 1 addition & 3 deletions paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,7 @@ paddle::experimental::Tensor hook_function(
auto place = t_dense->place();
size_t bytes_size = phi::product(t_dense->dims()) * SizeOf(t_dense->dtype());
auto ret_dense = std::make_shared<phi::DenseTensor>(
phi::make_intrusive<paddle::experimental::SharedStorage>(
paddle::memory::Alloc(place, bytes_size)),
std::move(ret_meta));
paddle::memory::Alloc(place, bytes_size), std::move(ret_meta));

float* t_ptr = t_dense->mutable_data<float>(place);
float* ret_ptr = ret_dense->mutable_data<float>(place);
Expand Down
4 changes: 1 addition & 3 deletions paddle/fluid/eager/tests/task_tests/hook_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,7 @@ paddle::experimental::Tensor hook_function(
auto place = t_dense->place();
size_t bytes_size = phi::product(t_dense->dims()) * SizeOf(t_dense->dtype());
auto ret_dense = std::make_shared<phi::DenseTensor>(
phi::make_intrusive<paddle::experimental::SharedStorage>(
paddle::memory::Alloc(place, bytes_size)),
std::move(ret_meta));
paddle::memory::Alloc(place, bytes_size), std::move(ret_meta));

float* t_ptr = t_dense->mutable_data<float>(place);
float* ret_ptr = ret_dense->mutable_data<float>(place);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,7 @@ paddle::experimental::Tensor hook_function(
auto place = t_dense->place();
size_t bytes_size = phi::product(t_dense->dims()) * SizeOf(t_dense->dtype());
auto ret_dense = std::make_shared<phi::DenseTensor>(
phi::make_intrusive<paddle::experimental::SharedStorage>(
paddle::memory::Alloc(place, bytes_size)),
std::move(ret_meta));
paddle::memory::Alloc(place, bytes_size), std::move(ret_meta));

float* t_ptr = t_dense->mutable_data<float>(place);
float* ret_ptr = ret_dense->mutable_data<float>(place);
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/pybind/eager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ void EmptyTensorInitializer(TensorObject* self, const std::string& name,
} else {
// TODO(dev): we need enhance check for ddims.
dense_tensor = std::make_shared<phi::DenseTensor>(
phi::make_intrusive<paddle::experimental::SharedStorage>(place),
std::make_shared<phi::Allocation>(),
phi::DenseTensorMeta(paddle::framework::TransToPhiDataType(dtype),
ddims));
}
Expand Down
34 changes: 11 additions & 23 deletions paddle/phi/api/lib/sparse_api_custom_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,10 @@ Tensor to_sparse_coo_impl(const Tensor& x, const int64_t sparse_dim) {

// 5. Prepare outputs
// create empty SparseCooTensor
phi::DenseTensor non_zero_indices(
phi::make_intrusive<paddle::experimental::SharedStorage>(
phi::TransToPhiPlace(kernel_key.backend())),
std::move(indices_meta));
phi::DenseTensor non_zero_elements(
phi::make_intrusive<paddle::experimental::SharedStorage>(
phi::TransToPhiPlace(kernel_key.backend())),
std::move(elements_meta));
phi::DenseTensor non_zero_indices(std::make_shared<phi::Allocation>(),
std::move(indices_meta));
phi::DenseTensor non_zero_elements(std::make_shared<phi::Allocation>(),
std::move(elements_meta));
auto coo = std::make_shared<phi::SparseCooTensor>(
non_zero_indices, non_zero_elements, x.dims());

Expand Down Expand Up @@ -127,18 +123,12 @@ Tensor to_sparse_csr_impl(const Tensor& x) {

// 5. Prepare outputs
// create empty SparseCooTensor
phi::DenseTensor non_zero_crows(
phi::make_intrusive<paddle::experimental::SharedStorage>(
phi::TransToPhiPlace(kernel_key.backend())),
std::move(crows_meta));
phi::DenseTensor non_zero_cols(
phi::make_intrusive<paddle::experimental::SharedStorage>(
phi::TransToPhiPlace(kernel_key.backend())),
std::move(cols_meta));
phi::DenseTensor non_zero_elements(
phi::make_intrusive<paddle::experimental::SharedStorage>(
phi::TransToPhiPlace(kernel_key.backend())),
std::move(elements_meta));
phi::DenseTensor non_zero_crows(std::make_shared<phi::Allocation>(),
std::move(crows_meta));
phi::DenseTensor non_zero_cols(std::make_shared<phi::Allocation>(),
std::move(cols_meta));
phi::DenseTensor non_zero_elements(std::make_shared<phi::Allocation>(),
std::move(elements_meta));
auto csr = std::make_shared<phi::SparseCsrTensor>(
non_zero_crows, non_zero_cols, non_zero_elements, x.dims());

Expand Down Expand Up @@ -192,9 +182,7 @@ Tensor to_dense_impl(const Tensor& x) {
// 5. Prepare outputs
// create empty SparseCooTensor
auto dense_out = std::make_shared<phi::DenseTensor>(
phi::make_intrusive<paddle::experimental::SharedStorage>(
phi::TransToPhiPlace(kernel_key.backend())),
std::move(dense_meta));
std::make_shared<phi::Allocation>(), std::move(dense_meta));

kernel_context.EmplaceBackOutput(dense_out.get());
Tensor out;
Expand Down
73 changes: 0 additions & 73 deletions paddle/phi/api/lib/utils/storage.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,79 +65,6 @@ class ExternalStorage : public phi::Storage {
int64_t size_{0};
};

class SharedStorage : public phi::Storage {
public:
explicit SharedStorage(
const std::shared_ptr<paddle::memory::Allocation>& allocation)
: Storage(allocation) {
CHECK(allocation);
place_ = allocation->place();
size_ = allocation->size();
}

// In order to be compatible with the original Tensor design and execution
// system, we need to allow the uninitialized SharedStorage to exist,
// and it can be removed after the compatibility phase is over in the future
explicit SharedStorage(const phi::Place& place) { place_ = place; }

void Realloc(size_t n) override {
this->Clear();
data_ = paddle::memory::AllocShared(place(), n);
size_ = n;
}

static const char* name() { return "SharedStorage"; }

void Clear() override {
data_ = nullptr;
size_ = 0;
}

void set_data_shared(
const std::shared_ptr<paddle::memory::Allocation>& holder) override {
data_ = holder;
if (holder) {
size_ = holder->size();
place_ = holder->place();
}
}

std::shared_ptr<paddle::memory::Allocation>&& move_data_shared() override {
size_ = 0;
place_ = phi::Place();
return std::move(data_);
}

size_t size() const noexcept override {
return data_ ? data_->size() : size_;
}
const phi::Place& place() const override {
return data_ ? data_->place() : place_;
}
bool OwnsMemory() const noexcept override { return false; }

const std::shared_ptr<paddle::memory::Allocation>& GetAllocation() {
return data_;
}

// Temporary method: For compatible with fluid Tensor and improve performance
void ResetAllocation(std::shared_ptr<paddle::memory::Allocation> allocation) {
data_ = allocation;
size_ = allocation->size();
place_ = allocation->place();
}

// Temporary method: For compatible with fluid Tensor and improve performance
void ResetAllocationPlace(const phi::Place& place) { place_ = place; }

// Temporary method: For compatible with fluid Tensor and improve performance
void Reset() { this->Clear(); }

private:
phi::Place place_;
int64_t size_{0};
};

class TensorStorage : public paddle::memory::allocation::Allocation {
public:
explicit TensorStorage(phi::intrusive_ptr<phi::Storage> storage)
Expand Down
7 changes: 2 additions & 5 deletions paddle/phi/kernels/cpu/reduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ void GetShuffledInput(const DeviceContext& dev_ctx,
std::vector<int> perm_axis(input.dims().size());
GetShuffledDim(input.dims(), &shuffled_dims, dims, &perm_axis);

shuffled_input->ResizeAndAllocate(shuffled_dims);
shuffled_input->Resize(shuffled_dims);
dev_ctx.template Alloc<OutT>(shuffled_input);

phi::funcs::TransposeNormal<DeviceContext, OutT> trans;
Expand All @@ -132,10 +132,7 @@ void HandleLargeDim(const DeviceContext& dev_ctx,
const std::vector<int64_t>& dims,
bool keep_dim) {
// shuffle the reduced dim to the end
phi::DenseTensor shuffled_input = phi::DenseTensor(
phi::make_intrusive<paddle::experimental::SharedStorage>(input.place()),
input.meta());

phi::DenseTensor shuffled_input;
GetShuffledInput<DeviceContext, OutT>(dev_ctx, input, &shuffled_input, dims);

// transpose to 2D tensor whose shape is {unreduced, reduced}.
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/gpu/rnn_functor.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ class RNNDescriptors {

// ------------------- cudnn dropout descriptors ---------------------
size_t state_size;
bool is_initialized = dropout_state->IsInitialized();
bool is_initialized = dropout_state->initialized();
if (!is_test_ && !is_initialized) {
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS(
Expand Down
29 changes: 11 additions & 18 deletions paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -171,24 +171,17 @@ void SparseCooToCsrKernel(const Context& dev_ctx,
int batchs = x_dims.size() == 2 ? 1 : x_dims[0];
int rows = x_dims.size() == 2 ? x_dims[0] : x_dims[1];

const auto place = dev_ctx.GetPlace();
DenseTensorMeta crows_meta(
DataType::INT64, {batchs * (rows + 1)}, DataLayout::NCHW);
DenseTensorMeta cols_meta(DataType::INT64, {non_zero_num}, DataLayout::NCHW);
DenseTensorMeta values_meta(
x.dtype(), {non_zero_num}, x.non_zero_elements().layout());
phi::DenseTensor non_zero_crows(
phi::make_intrusive<paddle::experimental::SharedStorage>(place),
std::move(crows_meta));
phi::DenseTensor non_zero_cols(
phi::make_intrusive<paddle::experimental::SharedStorage>(place),
std::move(cols_meta));
phi::DenseTensor non_zero_elements(
phi::make_intrusive<paddle::experimental::SharedStorage>(place),
std::move(values_meta));
int64_t* csr_crows_data = non_zero_crows.mutable_data<int64_t>(place);
int64_t* csr_cols_data = non_zero_cols.mutable_data<int64_t>(place);
T* csr_values_data = non_zero_elements.mutable_data<T>(place);
phi::DenseTensor non_zero_crows;
non_zero_crows.Resize({batchs * (rows + 1)});
int64_t* csr_crows_data = dev_ctx.template Alloc<int64_t>(&non_zero_crows);

phi::DenseTensor non_zero_cols;
non_zero_cols.Resize({non_zero_num});
int64_t* csr_cols_data = dev_ctx.template Alloc<int64_t>(&non_zero_cols);

phi::DenseTensor non_zero_elements;
non_zero_elements.Resize({non_zero_num});
T* csr_values_data = dev_ctx.template Alloc<T>(&non_zero_elements);

const auto& coo_indices = x.non_zero_indices();
const auto& coo_values = x.non_zero_elements();
Expand Down
51 changes: 15 additions & 36 deletions paddle/phi/kernels/sparse/gpu/sparse_utils_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -173,20 +173,12 @@ void DenseToSparseCooKernel(const Context& dev_ctx,

const auto values_dims =
phi::funcs::sparse::InferDenseDims(x_dims, sparse_dim, non_zero_num);
DenseTensorMeta indices_meta(DataType::INT64,
{sparse_dim, static_cast<int64_t>(non_zero_num)},
DataLayout::NCHW);
DenseTensorMeta values_meta(x.meta().dtype, values_dims, x.meta().layout);
phi::DenseTensor indices(
phi::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(indices_meta));
phi::DenseTensor values(
phi::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(values_meta));
int64_t* indices_data = indices.mutable_data<int64_t>(place);
T* sparse_data = values.mutable_data<T>(place);
phi::DenseTensor indices = phi::Empty<int64_t>(
dev_ctx, {sparse_dim, static_cast<int64_t>(non_zero_num)});
int64_t* indices_data = indices.data<int64_t>();
phi::DenseTensor values;
values.Resize(values_dims);
T* sparse_data = dev_ctx.template Alloc<T>(&values);

// 3. calc indices by indexs and get values by indexs
config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, non_zero_num, 1);
Expand Down Expand Up @@ -382,24 +374,13 @@ void SparseCooToCsrKernel(const Context& dev_ctx,
int batchs = x_dims.size() == 2 ? 1 : x_dims[0];
int rows = x_dims.size() == 2 ? x_dims[0] : x_dims[1];

const auto place = dev_ctx.GetPlace();
DenseTensorMeta crows_meta(
DataType::INT64, {batchs * (rows + 1)}, DataLayout::NCHW);
DenseTensorMeta cols_meta(DataType::INT64, {non_zero_num}, DataLayout::NCHW);
DenseTensorMeta values_meta(
x.dtype(), {non_zero_num}, x.non_zero_elements().layout());
phi::DenseTensor non_zero_crows(
phi::make_intrusive<paddle::experimental::SharedStorage>(place),
std::move(crows_meta));
phi::DenseTensor non_zero_cols(
phi::make_intrusive<paddle::experimental::SharedStorage>(place),
std::move(cols_meta));
phi::DenseTensor non_zero_elements(
phi::make_intrusive<paddle::experimental::SharedStorage>(place),
std::move(values_meta));
int64_t* csr_crows_data = non_zero_crows.mutable_data<int64_t>(place);
int64_t* csr_cols_data = non_zero_cols.mutable_data<int64_t>(place);
T* csr_values_data = non_zero_elements.mutable_data<T>(place);
phi::DenseTensor non_zero_crows =
phi::Empty<int64_t>(dev_ctx, {batchs * (rows + 1)});
phi::DenseTensor non_zero_cols = phi::Empty<int64_t>(dev_ctx, {non_zero_num});
phi::DenseTensor non_zero_elements = phi::Empty<T>(dev_ctx, {non_zero_num});
int64_t* csr_crows_data = non_zero_crows.data<int64_t>();
int64_t* csr_cols_data = non_zero_cols.data<int64_t>();
T* csr_values_data = non_zero_elements.data<T>();

const auto& coo_indices = x.non_zero_indices();
const auto& coo_values = x.non_zero_elements();
Expand All @@ -416,10 +397,8 @@ void SparseCooToCsrKernel(const Context& dev_ctx,
auto config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, batchs, 1);
if (batchs > 1) {
DenseTensorMeta batchs_meta(DataType::INT64, {batchs}, DataLayout::NCHW);
phi::DenseTensor batchs_offset(
phi::make_intrusive<paddle::experimental::SharedStorage>(place),
std::move(batchs_meta));
int64_t* batchs_offset_ptr = batchs_offset.mutable_data<int64_t>(place);
phi::DenseTensor batchs_offset = phi::Empty<int64_t>(dev_ctx, {batchs});
int64_t* batchs_offset_ptr = batchs_offset.data<int64_t>();
GetBatchsOffset<<<config.block_per_grid.x,
config.thread_per_block.x,
0,
Expand Down
10 changes: 2 additions & 8 deletions paddle/phi/tests/api/scale_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,7 @@ PADDLE_API Tensor scale_kernel_context(const Tensor& x,
kernel_context.EmplaceBackAttr(bias);
kernel_context.EmplaceBackAttr(bias_after_scale);

auto dense_out = std::make_shared<phi::DenseTensor>(
phi::make_intrusive<paddle::experimental::SharedStorage>(
phi::TransToPhiPlace(kernel_backend)),
phi::DenseTensorMeta());
auto dense_out = std::make_shared<phi::DenseTensor>();
phi::MetaTensor meta_out(dense_out.get());
phi::UnchangedInferMeta(*dense_x, &meta_out);
kernel_context.EmplaceBackOutput(dense_out.get());
Expand Down Expand Up @@ -236,10 +233,7 @@ Tensor scale_switch_case(const Tensor& x,

auto dense_x = std::dynamic_pointer_cast<phi::DenseTensor>(x.impl());

auto dense_out = std::make_shared<phi::DenseTensor>(
phi::make_intrusive<paddle::experimental::SharedStorage>(
phi::TransToPhiPlace(kernel_backend)),
phi::DenseTensorMeta());
auto dense_out = std::make_shared<phi::DenseTensor>();
phi::MetaTensor meta_out(dense_out.get());
phi::UnchangedInferMeta(*dense_x, &meta_out);

Expand Down
5 changes: 1 addition & 4 deletions paddle/phi/tests/core/test_custom_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -264,10 +264,7 @@ TEST(CustomKernel, custom_kernel_dot) {
kernel_context.EmplaceBackAttr(fake_attr_int64_vec);
kernel_context.EmplaceBackAttr(fake_attr_int_vec);

auto dense_out = std::make_shared<phi::DenseTensor>(
phi::make_intrusive<paddle::experimental::SharedStorage>(
phi::TransToPhiPlace(backend)),
phi::DenseTensorMeta());
auto dense_out = std::make_shared<phi::DenseTensor>();

phi::MetaTensor meta_out(dense_out.get());
phi::DotInferMeta(*dense_x, *dense_y, &meta_out);
Expand Down

0 comments on commit 3fa0183

Please sign in to comment.