Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/copytensor #5455

Merged
merged 19 commits into from
Nov 26, 2017
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions paddle/framework/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,13 @@ cc_library(ddim SRCS ddim.cc DEPS eigen3)
cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
nv_test(dim_test SRCS dim_test.cu DEPS ddim)

# cc_library(tensor SRCS tensor.cc tensor_util.cc DEPS ddim place paddle_memory device_context)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No commented out codes please.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context)
# cc_library(tensor_util SRCS tensor_util.cc DEPS tensor)

cc_test(tensor_test SRCS tensor_test.cc DEPS tensor)
cc_test(tensor_util_test SRCS tensor_util_test.cc DEPS tensor)

cc_test(eigen_test SRCS eigen_test.cc DEPS tensor)

cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto)
Expand Down
16 changes: 8 additions & 8 deletions paddle/framework/lod_tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <glog/logging.h>
#include "paddle/framework/ddim.h"
#include "paddle/framework/tensor.h"
#include "paddle/framework/tensor_util.h"
#include "paddle/platform/enforce.h"
#include "paddle/platform/place.h"

Expand Down Expand Up @@ -56,8 +57,6 @@ using Vector = thrust::host_vector<
*/
using LoD = std::vector<Vector<size_t>>;

std::ostream& operator<<(std::ostream& os, const LoD& lod);

/*
* Slice levels from a LoD.
* NOTE the lowest level should always be the absolute offsets of the underlying
Expand Down Expand Up @@ -175,18 +174,19 @@ LoDTensor LodExpand(const LoDTensor& source, const LoD& lod, size_t level,
PADDLE_ENFORCE_EQ(num_instances, lod_level.size() - 1);
for (size_t ins = 0; ins < num_instances; ins++) {
for (size_t elem = lod_level[ins]; elem < lod_level[ins + 1]; elem++) {
tensor.Slice(elem, elem + 1)
.CopyFrom(source.Slice(ins, ins + 1), platform::CPUPlace(),
platform::CPUDeviceContext());
auto t = tensor.Slice(elem, elem + 1);
CopyFrom(source.Slice(ins, ins + 1), platform::CPUPlace(),
platform::CPUDeviceContext(), &t);
}
}
return tensor;
}

std::pair<LoD, std::pair<size_t, size_t>> GetSubLoDAndAbsoluteOffset(
const LoD& lod, size_t start_idx, size_t end_idx, size_t start_level);
void GetFineGrainedLoDLength(const LoD& lod, size_t start_idx, size_t end_idx,
std::vector<std::vector<size_t>>* lod_length,
size_t* start_offset);

void AppendLoD(LoD* lod, const LoD& lod_length);
void AppendLoD(LoD* lod, const std::vector<std::vector<size_t>>& lod_length);

} // namespace framework
} // namespace paddle
29 changes: 0 additions & 29 deletions paddle/framework/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,34 +89,6 @@ class Tensor {
/*! The internal of two tensors share the same memory block. */
inline Tensor& ShareDataWith(const Tensor& src);

/**
* @brief Copy the content of external tensor to a new place.
*
* @param[in] src The external tensor.
* @param[in] dst_place The dst place.
* @param[in] ctx The device context contains device resources.
*
* @note CopyFrom supports CPU <-> GPU, GPU <-> GPU.
*/
// TODO(qijun): https://github.com/PaddlePaddle/Paddle/issues/4647
// Remove `CopyFrom` and `CopyFromVector` from Tensor interface
// and make them global functions
inline void CopyFrom(const Tensor& src, const platform::Place& dst_place,
const platform::DeviceContext& ctx);

/**
* @brief Copy the content of an external vector to a tensor.
*
* @param[in] src The external tensor.
* @param[in] ctx The device context contains device resources.
*
* * @note CopyFromVector assumes that the tensor has been resized
* before invoking.
*/
template <typename T>
inline void CopyFromVector(const std::vector<T>& src,
const platform::DeviceContext& ctx);

/**
* @brief Return a sub-tensor of the given tensor.
*
Expand All @@ -141,7 +113,6 @@ class Tensor {

size_t memory_size() const;

private:
inline void check_memory_size() const;

private:
Expand Down
78 changes: 0 additions & 78 deletions paddle/framework/tensor_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,84 +150,6 @@ inline Tensor& Tensor::ShareDataWith(const Tensor& src) {
return *this;
}

inline void Tensor::CopyFrom(const Tensor& src,
const platform::Place& dst_place,
const platform::DeviceContext& ctx) {
src.check_memory_size();
Resize(src.dims());

auto src_place = src.holder_->place();
auto src_ptr = src.data<void>();

auto dst_ptr = mutable_data(dst_place, src.type());

auto size = src.numel() * SizeOfType(src.type());

if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
boost::get<platform::CPUPlace>(src_place), src_ptr, size);
}
#ifdef PADDLE_WITH_CUDA
else if (platform::is_gpu_place(src_place) &&
platform::is_cpu_place(dst_place)) {
auto src_gpu_place = boost::get<platform::GPUPlace>(src_place);
auto dst_cpu_place = boost::get<platform::CPUPlace>(dst_place);
auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE(platform::is_gpu_place(ctx_place));
auto ctx_gpu_place = boost::get<platform::GPUPlace>(ctx_place);
PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place);
memory::Copy(
dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size,
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream());
} else if (platform::is_cpu_place(src_place) &&
platform::is_gpu_place(dst_place)) {
auto src_cpu_place = boost::get<platform::CPUPlace>(src_place);
auto dst_gpu_place = boost::get<platform::GPUPlace>(dst_place);
auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE(platform::is_gpu_place(ctx_place));
auto ctx_gpu_place = boost::get<platform::GPUPlace>(ctx_place);
PADDLE_ENFORCE_EQ(dst_gpu_place, ctx_gpu_place);
memory::Copy(
dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size,
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream());
} else if (platform::is_gpu_place(src_place) &&
platform::is_gpu_place(dst_place)) {
auto src_gpu_place = boost::get<platform::GPUPlace>(src_place);
auto dst_gpu_place = boost::get<platform::GPUPlace>(dst_place);
auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE(platform::is_gpu_place(ctx_place));
auto ctx_gpu_place = boost::get<platform::GPUPlace>(ctx_place);
PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place);
memory::Copy(
dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream());
}
#endif
}

template <typename T>
inline void Tensor::CopyFromVector(const std::vector<T>& src,
const platform::DeviceContext& ctx) {
auto dst_place = ctx.GetPlace();
auto src_ptr = static_cast<const void*>(src.data());
platform::CPUPlace src_place;
auto dst_ptr = static_cast<void*>(mutable_data<T>(dst_place));
auto size = src.size() * sizeof(T);

if (platform::is_cpu_place(dst_place)) {
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr, src_place,
src_ptr, size);
}
#ifdef PADDLE_WITH_CUDA
else if (platform::is_gpu_place(dst_place)) {
memory::Copy(
boost::get<platform::GPUPlace>(dst_place), dst_ptr, src_place, src_ptr,
size,
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream());
}
#endif
}

inline Tensor Tensor::Slice(int begin_idx, int end_idx) const {
check_memory_size();
PADDLE_ENFORCE_GE(begin_idx, 0,
Expand Down
172 changes: 0 additions & 172 deletions paddle/framework/tensor_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -188,178 +188,6 @@ TEST(Tensor, Slice) {
#endif
}

TEST(Tensor, CopyFrom) {
using namespace paddle::framework;
using namespace paddle::platform;
{
Tensor src_tensor;
Tensor dst_tensor;
CPUDeviceContext cpu_ctx((CPUPlace()));

int* src_ptr = src_tensor.mutable_data<int>(make_ddim({3, 3}), CPUPlace());

int arr[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9};
memcpy(src_ptr, arr, 9 * sizeof(int));

auto cpu_place = new paddle::platform::CPUPlace();
dst_tensor.CopyFrom(src_tensor, *cpu_place, cpu_ctx);

const int* dst_ptr = dst_tensor.data<int>();
ASSERT_NE(src_ptr, dst_ptr);
for (size_t i = 0; i < 9; ++i) {
EXPECT_EQ(src_ptr[i], dst_ptr[i]);
}

Tensor slice_tensor = src_tensor.Slice(1, 2);
dst_tensor.CopyFrom(slice_tensor, *cpu_place, cpu_ctx);
const int* slice_ptr = slice_tensor.data<int>();
dst_ptr = dst_tensor.data<int>();
ASSERT_NE(dst_ptr, slice_ptr);
for (size_t i = 0; i < 3; ++i) {
EXPECT_EQ(dst_ptr[i], slice_ptr[i]);
}
}
#ifdef PADDLE_WITH_CUDA
{
Tensor src_tensor;
Tensor gpu_tensor;
Tensor dst_tensor;

int* src_ptr = src_tensor.mutable_data<int>(make_ddim({3, 3}), CPUPlace());

int arr[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9};
memcpy(src_ptr, arr, 9 * sizeof(int));

// CPU Tensor to GPU Tensor
auto gpu_place = new paddle::platform::GPUPlace(0);
CUDADeviceContext gpu_ctx(*gpu_place);
gpu_tensor.CopyFrom(src_tensor, *gpu_place, gpu_ctx);

// GPU Tensor to CPU Tensor
auto cpu_place = new paddle::platform::CPUPlace();
dst_tensor.CopyFrom(gpu_tensor, *cpu_place, gpu_ctx);

// Sync before Compare Tensors
gpu_ctx.Wait();
const int* dst_ptr = dst_tensor.data<int>();
ASSERT_NE(src_ptr, dst_ptr);
for (size_t i = 0; i < 9; ++i) {
EXPECT_EQ(src_ptr[i], dst_ptr[i]);
}

Tensor slice_tensor = src_tensor.Slice(1, 2);

// CPU Slice Tensor to GPU Tensor
gpu_tensor.CopyFrom(slice_tensor, *gpu_place, gpu_ctx);

// GPU Tensor to CPU Tensor
dst_tensor.CopyFrom(gpu_tensor, *cpu_place, gpu_ctx);

// Sync before Compare Slice Tensors
gpu_ctx.Wait();
const int* slice_ptr = slice_tensor.data<int>();
dst_ptr = dst_tensor.data<int>();
ASSERT_NE(dst_ptr, slice_ptr);
for (size_t i = 0; i < 3; ++i) {
EXPECT_EQ(dst_ptr[i], slice_ptr[i]);
}
}
#endif
}

TEST(Tensor, CopyFromVector) {
using namespace paddle::framework;
using namespace paddle::platform;
{
std::vector<int> src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9};
Tensor cpu_tensor;

// Copy to CPU Tensor
cpu_tensor.Resize(make_ddim({3, 3}));
auto cpu_place = new paddle::platform::CPUPlace();
CPUDeviceContext cpu_ctx(*cpu_place);
cpu_tensor.CopyFromVector<int>(src_vec, cpu_ctx);

// Compare Tensors
const int* cpu_ptr = cpu_tensor.data<int>();
const int* src_ptr = src_vec.data();
ASSERT_NE(src_ptr, cpu_ptr);
for (size_t i = 0; i < 9; ++i) {
EXPECT_EQ(src_ptr[i], cpu_ptr[i]);
}

src_vec.erase(src_vec.begin(), src_vec.begin() + 5);
cpu_tensor.Resize(make_ddim({2, 2}));
cpu_tensor.CopyFromVector<int>(src_vec, cpu_ctx);
cpu_ptr = cpu_tensor.data<int>();
src_ptr = src_vec.data();
ASSERT_NE(src_ptr, cpu_ptr);
for (size_t i = 0; i < 5; ++i) {
EXPECT_EQ(src_ptr[i], cpu_ptr[i]);
}

delete cpu_place;
}

#ifdef PADDLE_WITH_CUDA
{
std::vector<int> src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9};
Tensor cpu_tensor;
Tensor gpu_tensor;
Tensor dst_tensor;

// Copy to CPU Tensor
cpu_tensor.Resize(make_ddim({3, 3}));
auto cpu_place = new paddle::platform::CPUPlace();
CPUDeviceContext cpu_ctx(*cpu_place);
cpu_tensor.CopyFromVector<int>(src_vec, cpu_ctx);

// Copy to GPUTensor
gpu_tensor.Resize(make_ddim({3, 3}));
auto gpu_place = new paddle::platform::GPUPlace();
CUDADeviceContext gpu_ctx(*gpu_place);
gpu_tensor.CopyFromVector<int>(src_vec, gpu_ctx);
// Copy from GPU to CPU tensor for comparison
dst_tensor.CopyFrom(gpu_tensor, *cpu_place, gpu_ctx);

// Sync before Compare Tensors
gpu_ctx.Wait();
const int* src_ptr = src_vec.data();
const int* cpu_ptr = cpu_tensor.data<int>();
const int* dst_ptr = dst_tensor.data<int>();
ASSERT_NE(src_ptr, cpu_ptr);
ASSERT_NE(src_ptr, dst_ptr);
for (size_t i = 0; i < 9; ++i) {
EXPECT_EQ(src_ptr[i], cpu_ptr[i]);
EXPECT_EQ(src_ptr[i], dst_ptr[i]);
}

src_vec.erase(src_vec.begin(), src_vec.begin() + 5);

cpu_tensor.Resize(make_ddim({2, 2}));
cpu_tensor.CopyFromVector<int>(src_vec, cpu_ctx);
gpu_tensor.Resize(make_ddim({2, 2}));
gpu_tensor.CopyFromVector<int>(src_vec, gpu_ctx);
dst_tensor.CopyFrom(gpu_tensor, *cpu_place, gpu_ctx);

// Sync before Compare Tensors
gpu_ctx.Wait();
src_ptr = src_vec.data();
cpu_ptr = cpu_tensor.data<int>();
dst_ptr = dst_tensor.data<int>();
ASSERT_NE(src_ptr, cpu_ptr);
ASSERT_NE(src_ptr, dst_ptr);
for (size_t i = 0; i < 5; ++i) {
EXPECT_EQ(src_ptr[i], cpu_ptr[i]);
EXPECT_EQ(src_ptr[i], dst_ptr[i]);
}

delete cpu_place;
delete gpu_place;
}
#endif
}

TEST(Tensor, ReshapeToMatrix) {
using namespace paddle::framework;
using namespace paddle::platform;
Expand Down
Loading