PaddlePaddle · wangkuiyi · Jun 28, 2017 · Jun 25, 2017 · Jun 25, 2017 · Jun 26, 2017
@@ -27,6 +27,7 @@ if(NOT CMAKE_CROSSCOMPILING)
 endif(NOT CMAKE_CROSSCOMPILING)
 find_package(Git REQUIRED)
 find_package(Threads REQUIRED)
+find_package(Boost QUIET)
 
 include(simd)
 
@@ -109,6 +110,7 @@ include_directories("${PROJ_ROOT}")
 include_directories("${PROJ_ROOT}/paddle/cuda/include")
 include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto")
 include_directories("${CMAKE_CURRENT_BINARY_DIR}/go/pserver/cclient")
+include_directories(${Boost_INCLUDE_DIRS})
 
 set(EXTERNAL_LIBS
     ${GFLAGS_LIBRARIES}

diff --git a/cmake/generic.cmake b/cmake/generic.cmake
@@ -78,6 +78,10 @@
 #
 #   cc_test(example_test SRCS example_test.cc DEPS example glog gflags)
 
+if(WITH_GPU)
+  add_definitions(-DPADDLE_WITH_GPU)
+endif()
+
 if(NOT APPLE)
     find_package(Threads REQUIRED)
     link_libraries(${CMAKE_THREAD_LIBS_INIT})

diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt
@@ -11,15 +11,8 @@ add_subdirectory(scripts)
 add_subdirectory(optimizer)
 add_subdirectory(strings)
 
-# Do not build go directory until go cmake is working smoothly.
-# if(CMAKE_Go_COMPILER)
-#   add_subdirectory(go)
-# endif()
-
-find_package(Boost QUIET)
-
 if(Boost_FOUND)
-  include_directories(${Boost_INCLUDE_DIRS})
+  add_subdirectory(memory)
   add_subdirectory(platform)
   add_subdirectory(framework)
 endif()

diff --git a/paddle/memory/.clang-format b/paddle/memory/.clang-format
@@ -0,0 +1,5 @@
+---
+Language:        Cpp
+BasedOnStyle:  Google
+Standard:  Cpp11 
+...
diff --git a/paddle/memory/CMakeLists.txt b/paddle/memory/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_subdirectory(detail)
+
+if(${WITH_GPU})
+  nv_library(memory SRCS memory.cc)
+else(${WITH_GPU})
+  cc_library(memory SRCS memory.cc)
+endif(${WITH_GPU})
diff --git a/paddle/memory/README.md b/paddle/memory/README.md
@@ -97,6 +97,7 @@ class BuddyAllocator {
   struct Block {
     size_t size;
     Block* left, right;
+    size_t index; // allocator id
   };
   ...
 };

diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt
@@ -0,0 +1,5 @@
+if(${WITH_GPU})
+  nv_test(system_allocator_test SRCS system_allocator_test.cc DEPS gflags glog)
+else(${WITH_GPU})
+  cc_test(system_allocator_test SRCS system_allocator_test.cc DEPS gflags glog)
+endif(${WITH_GPU})
diff --git a/paddle/memory/detail/buddy_allocator.h b/paddle/memory/detail/buddy_allocator.h
@@ -0,0 +1,79 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/memory/detail/system_allocator.h"
+
+namespace paddle {
+namespace memory {
+namespace detail {
+
+template<typename Allocator>
+class BuddyAllocator {
+  public:
+    // TODO(gangliao): This is a draft, add Buddy Allocator Algorithm soon
+    BuddyAllocator() {}
+    ~BuddyAllocator() {}
+
+  public:
+    void* Alloc(size_t size) {
+        return Allocator::Alloc(size); 
+    }
+    void Free(void*) {
+      // Because all info like size are stored in meta data,
+      // thus it's duplicate if add the parameter `size` in
+      // `Free(void*)` interface.
+    }
+    size_t Used();
+
+  public:
+    BuddyAllocator(const BuddyAllocator&) = delete;
+    BuddyAllocator& operator=(const BuddyAllocator&) = delete;
+
+  private:
+    size_t min_alloc_size_;
+    size_t max_alloc_size_;
+
+  private:
+    std::mutex mutex_;
+};
+
+BuddyAllocator<CPUAllocator>* GetCPUBuddyAllocator() {
+  static BuddyAllocator<CPUAllocator>* a = nullptr;
+  if (a == nullptr) {
+    a = new BuddyAllocator<CPUAllocator>();
+  }
+  return a;
+}
+
+#ifndef PADDLE_ONLY_CPU  // The following code are for CUDA.
+
+BuddyAllocator<GPUAllocator>* GetGPUBuddyAllocator(int gpu_id) {
+  static BuddyAllocator<GPUAllocator>** as = NULL;
+  if (as == NULL) {
+    int gpu_num = platform::GetDeviceCount(); 
+    as = new BuddyAllocator<GPUAllocator>*[gpu_num];
+    for (int gpu = 0; gpu < gpu_num; gpu++) {
+        as[gpu] = new BuddyAllocator<GPUAllocator>();
+    }
+  }
+  return as[gpu_id];
+}
+
+#endif  // PADDLE_ONLY_CPU 
+
+}  // namespace detail
+}  // namespace memory
+}  // namespace paddle
diff --git a/paddle/memory/detail/system_allocator.h b/paddle/memory/detail/system_allocator.h
@@ -0,0 +1,91 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <stddef.h>    // for size_t
+#include <sys/mman.h>  // for mlock and munlock
+#include <cstdlib>     // for malloc and free
+
+#include <gflags/gflags.h>
+#include "paddle/platform/assert.h"
+#include "paddle/platform/cuda.h"
+
+DEFINE_bool(uses_pinned_memory, false,
+            "If set, allocate cpu/gpu pinned memory.");
+
+namespace paddle {
+namespace memory {
+namespace detail {
+
+// If uses_pinned_memory is true, CPUAllocator calls mlock, which
+// returns pinned and locked memory as staging areas for data exchange
+// between host and device.  Allocates too much would reduce the amount
+// of memory available to the system for paging.  So, by default, we
+// should set false to uses_pinned_memory.
+class CPUAllocator {
+ public:
+  static void* Alloc(size_t size) {
+    void* p = std::malloc(size);
+    if (p != nullptr && FLAGS_uses_pinned_memory) {
+      mlock(p, size);
+    }
+    return p;
+  }
+
+  static void Free(void* p, size_t size) {
+    if (p != nullptr && FLAGS_uses_pinned_memory) {
+      munlock(p, size);
+    }
+    std::free(p);
+  }
+};
+
+#ifndef PADDLE_ONLY_CPU  // The following code are for CUDA.
+
+// GPUAllocator<staging=true> calls cudaHostMalloc, which returns
+// pinned and locked memory as staging areas for data exchange
+// between host and device.  Allocates too much would reduce the
+// amount of memory available to the system for paging.  So, by
+// default, we should use GPUAllocator<staging=false>.
+class GPUAllocator {
+ public:
+  static void* Alloc(size_t size) {
+    void* p = 0;
+    cudaError_t result = FLAGS_uses_pinned_memory ? cudaMallocHost(&p, size)
+                                                  : cudaMalloc(&p, size);
+    if (result != cudaSuccess) {
+      cudaGetLastError();  // clear error if there is any.
+    }
+    return result == cudaSuccess ? p : nullptr;
+  }
+
+  static void Free(void* p, size_t size) {
+    // Purposefully allow cudaErrorCudartUnloading, because
+    // that is returned if you ever call cudaFree after the
+    // driver has already shutdown. This happens only if the
+    // process is terminating, in which case we don't care if
+    // cudaFree succeeds.
+    cudaError_t err = FLAGS_uses_pinned_memory ? cudaFreeHost(p) : cudaFree(p);
+    if (err != cudaErrorCudartUnloading) {
+      platform::throw_on_error(err, "cudaFree{Host} failed");
+    }
+  }
+};
+
+#endif  // PADDLE_ONLY_CPU
+
+}  // namespace detail
+}  // namespace memory
+}  // namespace paddle
diff --git a/paddle/memory/detail/system_allocator_test.cc b/paddle/memory/detail/system_allocator_test.cc
@@ -0,0 +1,60 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/memory/detail/system_allocator.h"
+
+#include <memory>
+#include <vector>
+
+#include "glog/logging.h"
+#include "gtest/gtest.h"
+
+template <typename Allocator>
+void TestAllocator(void* p) {
+  p = Allocator::Alloc(1024);
+
+  int* i = static_cast<int*>(p);
+  std::shared_ptr<int> ptr(i, [](int* p) { Allocator::Free(p, 1024); });
+
+  EXPECT_NE(p, nullptr);
+}
+
+TEST(CPUAllocator, NoLockMem) {
+  void* p = nullptr;
+  FLAGS_uses_pinned_memory = false;
+  TestAllocator<paddle::memory::detail::CPUAllocator>(p);
+  EXPECT_EQ(p, nullptr);
+}
+
+TEST(CPUAllocator, LockMem) {
+  void* p = nullptr;
+  FLAGS_uses_pinned_memory = true;
+  TestAllocator<paddle::memory::detail::CPUAllocator>(p);
+  EXPECT_EQ(p, nullptr);
+}
+
+#ifndef PADDLE_ONLY_CPU
+TEST(GPUAllocator, NoStaging) {
+  void* p = nullptr;
+  FLAGS_uses_pinned_memory = false;
+  TestAllocator<paddle::memory::detail::GPUAllocator>(p);
+  EXPECT_EQ(p, nullptr);
+}
+TEST(GPUAllocator, Staging) {
+  void* p = nullptr;
+  FLAGS_uses_pinned_memory = true;
+  TestAllocator<paddle::memory::detail::GPUAllocator>(p);
+  EXPECT_EQ(p, nullptr);
+}
+#endif  // PADDLE_ONLY_CPU
diff --git a/paddle/memory/memory.cc b/paddle/memory/memory.cc
@@ -0,0 +1,59 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/memory/memory.h"
+#include "paddle/memory/detail/buddy_allocator.h"
+#include "paddle/memory/detail/system_allocator.h"
+#include "paddle/platform/assert.h"
+
+#include <boost/variant.hpp>
+
+namespace paddle {
+namespace memory {
+
+void* Alloc(platform::Place pl, size_t size) {
+#ifndef PADDLE_ONLY_CPU
+  if (paddle::platform::is_gpu_place(pl)) {
+    size_t gpu_id = boost::get<platform::GPUPlace>(pl).device;
+    return detail::GetGPUBuddyAllocator(gpu_id)->Alloc(size);
+  }
+#endif  // PADDLE_ONLY_CPU
+  PADDLE_ASSERT(paddle::platform::is_cpu_place(pl));
+  return detail::GetCPUBuddyAllocator()->Alloc(size);
+}
+
+void Free(paddle::platform::Place pl, void* p) {
+#ifndef PADDLE_ONLY_CPU
+  if (paddle::platform::is_gpu_place(pl)) {
+    size_t gpu_id = boost::get<platform::GPUPlace>(pl).device;
+    detail::GetGPUBuddyAllocator(gpu_id)->Free(p);
+  }
+#endif  // PADDLE_ONLY_CPU
+  PADDLE_ASSERT(paddle::platform::is_cpu_place(pl));
+  detail::GetCPUBuddyAllocator()->Free(p);
+}
+
+size_t Used(paddle::platform::Place pl) {
+#ifndef PADDLE_ONLY_CPU
+  if (paddle::platform::is_gpu_place(pl)) {
+    size_t gpu_id = boost::get<platform::GPUPlace>(pl).device;
+    return detail::GetGPUBuddyAllocator(gpu_id)->Used();
+  }
+#endif  // PADDLE_ONLY_CPU
+  PADDLE_ASSERT(paddle::platform::is_cpu_place(pl));
+  return detail::GetCPUBuddyAllocator()->Used();
+}
+
+}  // namespace memory
+}  // namespace paddle
diff --git a/paddle/memory/memory.h b/paddle/memory/memory.h
@@ -0,0 +1,27 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/platform/place.h"
+
+namespace paddle {
+namespace memory {
+
+void* Alloc(paddle::platform::Place, size_t);
+void Free(paddle::platform::Place, void*);
+size_t Used(paddle::platform::Place);
+
+}  // namespace memory
+}  // namespace paddle