Refactor Device to not depend on Backend. (pytorch#10478)

Summary: Pull Request resolved: pytorch#10478 - Removed Backend constructor from Device, and fixed all use-sites to use DeviceType::CPU instead of kCPU, or use a new function backendToDeviceType to perform the conversion. - New method device_type() on Type; it gives you the underlying device type, e.g., CPU for SparseCPU. - We add backward compatibility for kCPU/kCUDA uses, by introducing a new special type which is implicitly convertible to both DeviceType and Backend. As long as you don't define a function that's overloaded on both DeviceType and Backend (but not on BackendOrDeviceType), the implicit conversions will ensure that uses of at::Device(at::kCPU) keep working. We fixed use-sites in the library, but did NOT fix sites in the test code, so that we can exercise this BC code. Reviewed By: Yangqing Differential Revision: D9301861 fbshipit-source-id: 9a9d88620500715c7b37e655b4fd761f6dd72716
flyingcarpet-network · Aug 19, 2018 · 6bdbad9 · 6bdbad9
1 parent f1420ad
commit 6bdbad9
Show file tree

Hide file tree

Showing 79 changed files with 290 additions and 202 deletions.
diff --git a/aten/src/ATen/Allocator.h b/aten/src/ATen/Allocator.h
@@ -23,7 +23,7 @@ class DataPtr {
 public:
   // Choice of CPU here is arbitrary; if there's an "undefined" device
   // we could use that too
-  DataPtr() : ptr_(), device_(kCPU) {}
+  DataPtr() : ptr_(), device_(DeviceType::CPU) {}
   DataPtr(void* data, Device device)
     : ptr_(data), device_(device) {}
   DataPtr(void* data, void* ctx, DeleterFnPtr ctx_deleter, Device device)

diff --git a/aten/src/ATen/Backend.h b/aten/src/ATen/Backend.h
@@ -3,18 +3,25 @@
 #include <ATen/core/TensorTypeId.h>
 #include <ATen/core/TensorTypeIdRegistration.h>
 #include <ATen/core/Error.h>
+#include <ATen/core/DeviceType.h>
 
 #include <stdexcept>
 
 namespace at {
 
+/**
+ * This legacy enum class defines the set of backends supported by
+ * old school, code generated Type-based ATen.  The reason we are
+ * sunsetting this enum class is because it doesn't allow for
+ * open registration of backends.  TensorTypeId is the replacement
+ * for Backend which supports open registration.
+ *
+ * ARE YOU SURE YOU WANT TO USE THIS TYPE?  Think about if SparseCPU/SparseCUDA
+ * would make sense in your use case.  If it doesn't make sense, maybe
+ * you want DeviceType.
+ */
 enum class Backend { CPU, CUDA, SparseCPU, SparseCUDA, Undefined, NumOptions };
 
-constexpr Backend kCPU = Backend::CPU;
-constexpr Backend kCUDA = Backend::CUDA;
-constexpr Backend kSparseCPU = Backend::SparseCPU;
-constexpr Backend kSparseCUDA = Backend::SparseCUDA;
-
 static inline Backend toSparse(Backend b) {
   switch (b) {
     case Backend::CPU:
@@ -78,6 +85,71 @@ static inline TensorTypeId backendToTensorTypeId(Backend b) {
   }
 }
 
+static inline DeviceType backendToDeviceType(Backend b) {
+  switch (b) {
+    case Backend::CPU:
+      return DeviceType::CPU;
+    case Backend::CUDA:
+      return DeviceType::CUDA;
+    case Backend::SparseCPU:
+      return DeviceType::CPU;
+    case Backend::SparseCUDA:
+      return DeviceType::CUDA;
+    case Backend::Undefined:
+      AT_ERROR("Undefined backend is not a valid device type");
+    default:
+      AT_ERROR("Unknown backend");
+  }
+}
+
+static inline Backend deviceTypeToBackend(DeviceType d) {
+  switch (d) {
+    case DeviceType::CPU:
+      return Backend::CPU;
+    case DeviceType::CUDA:
+      return Backend::CUDA;
+    default:
+      AT_ERROR("Unknown device type ", d);
+  }
+}
+
+static inline Backend backendToCPU(Backend b) {
+  switch (b) {
+    case Backend::CPU:
+      return Backend::CPU;
+    case Backend::CUDA:
+      return Backend::CPU;
+    case Backend::SparseCPU:
+      return Backend::SparseCPU;
+    case Backend::SparseCUDA:
+      return Backend::SparseCPU;
+    case Backend::Undefined:
+      return Backend::Undefined;
+    default:
+      AT_ERROR("Unknown backend");
+  }
+}
+
+static inline Backend backendToCUDA(Backend b) {
+  switch (b) {
+    case Backend::CPU:
+      return Backend::CUDA;
+    case Backend::CUDA:
+      return Backend::CUDA;
+    case Backend::SparseCPU:
+      return Backend::SparseCUDA;
+    case Backend::SparseCUDA:
+      return Backend::SparseCUDA;
+    case Backend::Undefined:
+      return Backend::Undefined;
+    default:
+      AT_ERROR("Unknown backend");
+  }
+}
+
+constexpr DeviceType kCPU = DeviceType::CPU;
+constexpr DeviceType kCUDA = DeviceType::CUDA;
+
 static inline const char* toString(Backend b) {
   switch (b) {
     case Backend::CPU:

diff --git a/aten/src/ATen/Context.cpp b/aten/src/ATen/Context.cpp
@@ -32,7 +32,7 @@ Context::Context()
   THSetDefaultErrorHandler(errorHandler,nullptr);
   THSetDefaultArgErrorHandler(argErrorHandler,nullptr);
 
-  generator_registry[static_cast<int>(Backend::CPU)]
+  generator_registry[static_cast<int>(DeviceType::CPU)]
     .reset(new CPUGenerator(this));
   Type::registerCPU(this);
 }

diff --git a/aten/src/ATen/Context.h b/aten/src/ATen/Context.h
@@ -25,7 +25,7 @@ class AT_API Context {
     return type_registry[static_cast<int>(p)][static_cast<int>(s)].get();
   }
   Type * getTypeOpt(Backend p, ScalarType s) {
-    initCUDAIfNeeded(p);
+    if (p != Backend::Undefined) initCUDAIfNeeded(backendToDeviceType(p));
     auto type = getTypeRaw(p, s);
 
     if(!type) {
@@ -42,11 +42,11 @@ class AT_API Context {
     if (!type) AT_ERROR(toString(p), toString(s), "Type is not enabled.");
     return *type;
   }
-  Generator & defaultGenerator(Backend p) {
-    initCUDAIfNeeded(p);
-    auto & generator = generator_registry[static_cast<int>(p)];
+  Generator & defaultGenerator(DeviceType device_type) {
+    initCUDAIfNeeded(device_type);
+    auto & generator = generator_registry[static_cast<int>(device_type)];
     if(!generator)
-      AT_ERROR(toString(p), " backend type not enabled.");
+      AT_ERROR(DeviceTypeName(device_type), " backend type not enabled.");
     return *generator;
   }
   bool hasMKL() const;
@@ -64,7 +64,7 @@ class AT_API Context {
   THCState* lazyInitCUDA() {
     std::call_once(thc_init,[&] {
       thc_state = detail::getCUDAHooks().initCUDA();
-      generator_registry[static_cast<int>(Backend::CUDA)] =
+      generator_registry[static_cast<int>(DeviceType::CUDA)] =
         detail::getCUDAHooks().initCUDAGenerator(this);
       detail::getCUDAHooks().registerCUDATypes(this);
     });
@@ -95,16 +95,17 @@ class AT_API Context {
   bool deterministicCuDNN() const;
   void setDeterministicCuDNN(bool);
   std::unique_ptr<Generator>
-    generator_registry[static_cast<int>(Backend::NumOptions)];
+    generator_registry[static_cast<int>(DeviceType::COMPILE_TIME_MAX_DEVICE_TYPES)];
 private:
   // NB: type_registry has nullptr for all CUDA backends until
   // CUDA initialization has occurred
   std::unique_ptr<Type> type_registry
     [static_cast<int>(Backend::NumOptions)]
     [static_cast<int>(ScalarType::NumOptions)];
-  void initCUDAIfNeeded(Backend p) {
-    if(p == Backend::CUDA)
+  void initCUDAIfNeeded(DeviceType p) {
+    if (p == DeviceType::CUDA) {
       lazyInitCUDA();
+    }
   }
   std::once_flag thc_init;
   bool enabled_cudnn = true;
@@ -132,6 +133,10 @@ static inline Type& getType(Backend p, ScalarType s) {
   return globalContext().getType(p, s);
 }
 
+static inline Type& getType(DeviceType p, ScalarType s) {
+  return globalContext().getType(deviceTypeToBackend(p), s);
+}
+
 static inline Type& CPU(ScalarType s) {
   return getType(Backend::CPU, s);
 }

diff --git a/aten/src/ATen/Device.h b/aten/src/ATen/Device.h
@@ -1,9 +1,10 @@
 #pragma once
 
-#include <ATen/ScalarType.h>
+#include <ATen/ATenGeneral.h>
 #include <ATen/core/Error.h>
 #include <ATen/core/DeviceType.h>
 #include <ATen/core/Error.h>
+#include <ATen/Backend.h>
 
 #include <cstddef>
 #include <iosfwd>
@@ -24,21 +25,6 @@ namespace at {
 struct Device {
   using Type = at::DeviceType;
 
-  /// Converts a `Backend` to a `DeviceType` if possible.
-  static DeviceType backend_to_type(Backend backend) {
-    switch (backend) {
-      case kCPU:
-      case kSparseCPU:
-        return DeviceType::CPU;
-      case kCUDA:
-      case kSparseCUDA:
-        return DeviceType::CUDA;
-      default:
-        AT_ERROR(
-            "Invalid backend ", toString(backend), " for Device construction");
-    }
-  }
-
   /// Constructs a new `Device` from a `DeviceType` and an optional device
   /// index.
   /* implicit */ Device(DeviceType type, int32_t index = -1)
@@ -60,11 +46,6 @@ struct Device {
   /// `<device-index>` optionally specifies a device index.
   /* implicit */ Device(const std::string& device_string);
 
-  /// Constructs a new `Device` from a `Backend` (which is converted to a
-  /// `DeviceType`, if possible) and an optional device index.
-  /* implicit */ Device(Backend backend, int32_t index = -1)
-      : Device(backend_to_type(backend), index) {}
-
   /// Returns true if the type and index of this `Device` matches that of
   /// `other`.
   bool operator==(const Device& other) const noexcept {

diff --git a/aten/src/ATen/Formatting.cpp b/aten/src/ATen/Formatting.cpp
@@ -250,7 +250,7 @@ std::ostream& print(std::ostream& stream, const Tensor & tensor_, int64_t linesi
     stream << "size:\n" << tensor_.sizes() << "\n";
     stream << "]";
   } else {
-    Type& cpudouble = tensor_.type().toBackend(kCPU).toScalarType(kDouble);
+    Type& cpudouble = tensor_.type().toBackend(Backend::CPU).toScalarType(kDouble);
     Tensor tensor = tensor_.toType(cpudouble).contiguous();
     if(tensor.ndimension() == 0) {
       stream << defaultfloat << tensor.data<double>()[0] << std::endl;

diff --git a/aten/src/ATen/TensorOptions.h b/aten/src/ATen/TensorOptions.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <ATen/Backend.h>
 #include <ATen/Context.h>
 #include <ATen/Device.h>
 #include <ATen/DeviceGuard.h>
@@ -67,7 +68,7 @@ struct AT_API TensorOptions {
       type_ = &type;
     }
     this->dtype(type.scalarType());
-    this->device({type.backend(), device_index});
+    this->device({backendToDeviceType(type.backend()), device_index});
     this->layout(type.layout());
   }
 
@@ -84,7 +85,12 @@ struct AT_API TensorOptions {
   /// Constructs a `TensorOptions` object from a backend, forwarded to the
   /// `Device` constructor.
   /* implicit */ TensorOptions(Backend backend)
-      : TensorOptions(Device(backend)) {}
+      : TensorOptions(Device(backendToDeviceType(backend))) {}
+
+  /// Constructs a `TensorOptions` object from a device type, forwarded to the
+  /// `Device` constructor.
+  /* implicit */ TensorOptions(DeviceType device_type)
+      : TensorOptions(Device(device_type)) {}
 
   /// Constructs a `TensorOptions` object with the given dtype.
   /* implicit */ TensorOptions(ScalarType dtype) : TensorOptions() {
@@ -190,9 +196,9 @@ struct AT_API TensorOptions {
   Backend backend() const noexcept {
     Backend backend;
     if (device_.type() == Device::Type::CPU) {
-      backend = (layout_ == kStrided) ? kCPU : kSparseCPU;
+      backend = (layout_ == kStrided) ? Backend::CPU : Backend::SparseCPU;
     } else {
-      backend = (layout_ == kStrided) ? kCUDA : kSparseCUDA;
+      backend = (layout_ == kStrided) ? Backend::CUDA : Backend::SparseCUDA;
     }
     return backend;
   }

diff --git a/aten/src/ATen/function_wrapper.py b/aten/src/ATen/function_wrapper.py
@@ -293,7 +293,7 @@ def __init__(self, reason):
             'Backend::${Backend}, ScalarType::${ScalarName})'),
     'THGenerator*':
         CodeTemplate(
-            'check_generator<${Backend}Generator>(${arg_name}, &globalContext().defaultGenerator(backend()))'),
+            'check_generator<${Backend}Generator>(${arg_name}, &globalContext().defaultGenerator(device_type()))'),
     # This is a cast done via direct-construction
     'IntListStride': CodeTemplate('at::IntList ${result_name} = get_intlist_stride_th(${arg_name});'),
     'real': CodeTemplate('${arg_name}.to${ScalarName}()'),

diff --git a/aten/src/ATen/native/BinaryOps.cpp b/aten/src/ATen/native/BinaryOps.cpp
@@ -28,7 +28,7 @@ Tensor& add_out(Tensor& result, const Tensor& self, const Tensor& other, Scalar
     AT_ERROR("add(sparse, dense) is not supported. Use add(dense, sparse) instead.");
   }
   auto iter = TensorIterator::binary_op(result, self, other);
-  add_stub(iter->backend(), *iter, alpha);
+  add_stub(iter->device_type(), *iter, alpha);
   return result;
 }
 
@@ -53,7 +53,7 @@ Tensor& div_out(Tensor& result, const Tensor& self, const Tensor& other) {
     return at::_sparse_div_out(result, self, Scalar(other));
   }
   auto iter = TensorIterator::binary_op(result, self, other);
-  div_stub(iter->backend(), *iter);
+  div_stub(iter->device_type(), *iter);
   return result;
 }
 
@@ -74,7 +74,7 @@ Tensor& mul_out(Tensor& result, const Tensor& self, const Tensor& other) {
     return at::_sparse_mul_out(result, self, other);
   }
   auto iter = TensorIterator::binary_op(result, self, other);
-  mul_stub(iter->backend(), *iter);
+  mul_stub(iter->device_type(), *iter);
   return result;
 }
 
@@ -105,7 +105,7 @@ Tensor& sub_out(Tensor& result, const Tensor& self, const Tensor& other, Scalar
     AT_ERROR("sub(sparse, dense) is not supported. Use sub(dense, sparse) instead.");
   }
   auto iter = TensorIterator::binary_op(result, self, other);
-  sub_stub(iter->backend(), *iter, alpha);
+  sub_stub(iter->device_type(), *iter, alpha);
   return result;
 }
 

diff --git a/aten/src/ATen/native/DispatchStub.h b/aten/src/ATen/native/DispatchStub.h
@@ -50,17 +50,17 @@ struct AT_API DispatchStub {
   static_assert(std::is_pointer<FnPtr>::value, "FnPtr should be a pointer type");
 
   template <typename... ArgTypes>
-  void operator()(Backend backend, ArgTypes&&... args) {
-    if (backend == Backend::CPU) {
+  void operator()(DeviceType device_type, ArgTypes&&... args) {
+    if (device_type == DeviceType::CPU) {
       if (!cpu_dispatch_ptr) {
         cpu_dispatch_ptr = choose_cpu_impl();
       }
       (*cpu_dispatch_ptr)(std::forward<ArgTypes>(args)...);
-    } else if (backend == Backend::CUDA) {
+    } else if (device_type == DeviceType::CUDA) {
       AT_ASSERTM(cuda_dispatch_ptr, "DispatchStub: missing CUDA kernel");
       (*cuda_dispatch_ptr)(std::forward<ArgTypes>(args)...);
     } else {
-      AT_ERROR("DispatchStub: unsupported backend", backend);
+      AT_ERROR("DispatchStub: unsupported device type", device_type);
     }
   }
 

diff --git a/aten/src/ATen/native/Distributions.cpp b/aten/src/ATen/native/Distributions.cpp
@@ -49,7 +49,7 @@ namespace {
  */
 
 THGenerator* get_generator(at::Generator* gen) {
-  auto default_gen = &at::globalContext().defaultGenerator(at::Backend::CPU);
+  auto default_gen = &at::globalContext().defaultGenerator(at::kCPU);
   auto gen_ = at::check_generator<at::CPUGenerator>(gen, default_gen);
   return gen_->generator;
 }

diff --git a/aten/src/ATen/native/Embedding.cpp b/aten/src/ATen/native/Embedding.cpp
@@ -67,7 +67,7 @@ Tensor embedding_sparse_backward(
   int64_t num_features = grad_.size(-1);
   auto weight_size = std::array<int64_t, 2>{{ num_weights, num_features }};
   auto& dense_type = grad.type();
-  auto& sparse_type = dense_type.toBackend(grad.is_cuda() ? kSparseCUDA : kSparseCPU);
+  auto& sparse_type = dense_type.toBackend(grad.is_cuda() ? Backend::SparseCUDA : Backend::SparseCPU);
 
   // check if all our grad come from padding_idx
   if (grad.numel() == 0) {

diff --git a/aten/src/ATen/native/LinearAlgebra.cpp b/aten/src/ATen/native/LinearAlgebra.cpp
@@ -89,7 +89,7 @@ Tensor inverse(const Tensor& self) {
 }
 
 Tensor& inverse_out(Tensor &result, const Tensor &self) {
-  AT_CHECK(self.type().backend() == kCPU || self.type().backend() == kCUDA,
+  AT_CHECK(self.type().backend() == Backend::CPU || self.type().backend() == Backend::CUDA,
            "tensor should have CPU or CUDA backend");
   AT_CHECK(self.dim() == 2, "tensor should be 2 dimensional");
   AT_CHECK(self.size(0) == self.size(1), "tensor should be square");

diff --git a/aten/src/ATen/native/LossCTC.cpp b/aten/src/ATen/native/LossCTC.cpp
@@ -364,8 +364,8 @@ Tensor ctc_loss(const Tensor& log_probs, const Tensor& targets, IntList input_le
 
 // Convenience function accepting Tensors
 Tensor ctc_loss(const Tensor& log_probs, const Tensor& targets, const Tensor& input_lengths, const Tensor& target_lengths, int64_t BLANK, int64_t reduction) {
-  Tensor ilc = input_lengths.toType(kLong).toBackend(kCPU).contiguous();
-  Tensor tlc = target_lengths.toType(kLong).toBackend(kCPU).contiguous();
+  Tensor ilc = input_lengths.toType(kLong).toBackend(Backend::CPU).contiguous();
+  Tensor tlc = target_lengths.toType(kLong).toBackend(Backend::CPU).contiguous();
   IntList il(ilc.data<int64_t>(), ilc.numel());
   IntList tl(tlc.data<int64_t>(), tlc.numel());
   return at::native::ctc_loss(log_probs, targets, il, tl, BLANK, reduction);

diff --git a/aten/src/ATen/native/Memory.cpp b/aten/src/ATen/native/Memory.cpp
@@ -7,7 +7,7 @@ namespace at {
 namespace native {
 
 Tensor pin_memory(const Tensor& self) {
-  if (self.type().backend() != kCPU) {
+  if (self.type().backend() != Backend::CPU) {
     AT_ERROR("cannot pin '", self.type().toString(), "' only CPU memory can be pinned");
   }
   auto* allocator = detail::getCUDAHooks().getPinnedMemoryAllocator();