diff --git a/aten/src/ATen/Declarations.cwrap b/aten/src/ATen/Declarations.cwrap index 889aecfa84b04c..aceec5106c56d1 100644 --- a/aten/src/ATen/Declarations.cwrap +++ b/aten/src/ATen/Declarations.cwrap @@ -3266,7 +3266,9 @@ name: alias return: THTensor* cpu_half: True - variants: [function] + variants: + - method + - function options: - cname: newWithTensor arguments: diff --git a/aten/src/ATen/SparseTensorImpl.cpp b/aten/src/ATen/SparseTensorImpl.cpp index d7f04e2f261d7c..21a823cb73701f 100644 --- a/aten/src/ATen/SparseTensorImpl.cpp +++ b/aten/src/ATen/SparseTensorImpl.cpp @@ -22,18 +22,18 @@ namespace { // a scalar and have one element) // // Thus, an empty sparse tensor should be a 1-dimensional tensor of size [0]. -// Furthermore, we have dim == sparseDims + denseDims; since this is a sparse -// tensor, let us say that an empty sparse tensor has sparseDims == 1 and -// denseDims == 0. (There is a degree of freedom here, but given that this -// is a sparse dimension, it seems reasonable to demand that sparseDims > 0). +// Furthermore, we have dim == sparse_dim + dense_dim; since this is a sparse +// tensor, let us say that an empty sparse tensor has sparse_dim == 1 and +// dense_dim == 0. (There is a degree of freedom here, but given that this +// is a sparse dimension, it seems reasonable to demand that sparse_dim > 0). // // This means that we allocate a [1,0] size indices tensor and a [0] size // values tensor for such an empty tensor. SparseTensorImpl::SparseTensorImpl(at::TensorTypeId type_id, const caffe2::TypeMeta& data_type) : TensorImpl(type_id, data_type, nullptr, false) , size_{0} - , sparseDims_(1) - , denseDims_(0) + , sparse_dim_(1) + , dense_dim_(0) , indices_(at::empty({1, 0}, at::initialTensorOptions().device(sparseTensorIdToDeviceType(type_id)).dtype(ScalarType::Long))) , values_(at::empty({0}, at::initialTensorOptions().device(sparseTensorIdToDeviceType(type_id)).dtype(dataTypeToScalarType(data_type.id())))) {} @@ -67,7 +67,7 @@ void SparseTensorImpl::set_storage_offset(int64_t storage_offset) { } int64_t SparseTensorImpl::dim() const { - return sparseDims_ + denseDims_; + return sparse_dim_ + dense_dim_; } TensorImpl* SparseTensorImpl::maybe_zero_dim(bool condition_when_zero_dim) { AT_CHECK(condition_when_zero_dim == (dim() == 0), @@ -83,17 +83,22 @@ int64_t SparseTensorImpl::storage_offset() const { AT_ERROR("sparse tensors do not have storage"); } void SparseTensorImpl::set_indices_and_values_unsafe(const Tensor& indices, const Tensor& values) { + AT_ASSERT(!indices.is_variable() && !values.is_variable()); // They should be plain tensors! + + AT_CHECK(!indices.is_sparse(), "expected indices to be a dense tensor, but got indices of layout ", indices.layout()); + AT_CHECK(!values.is_sparse(), "expected values to be a dense tensor, but got values of layout ", values.layout()); + AT_CHECK(values.type().toSparse() == type(), "values type must match sparse tensor type"); AT_CHECK(indices.type().scalarType() == kLong, "indices must be an int64 tensor"); AT_CHECK(indices.type().backend() == values.type().backend(), "backend of indices (", indices.type().backend(), ") must match backend of values (", values.type().backend(), ")"); AT_CHECK(!indices.is_cuda() || indices.get_device() == values.get_device(), "device of indices (", indices.get_device(), ") must match device of values (", values.get_device(), ")"); - AT_CHECK(indices.dim() == 2, "indices must be nDim x nnz, but got: ", indices.sizes()); + AT_CHECK(indices.dim() == 2, "indices must be sparse_dim x nnz, but got: ", indices.sizes()); AT_CHECK(indices.size(1) == values.size(0), "indices and values must have same nnz, but got nnz from indices: ", indices.size(1), ", nnz from values: ", values.size(0)); - AT_CHECK(indices.size(0) == sparseDims_, "indices has incorrect first dimension, expected ", sparseDims_, ", got ", indices.size(0)); - AT_CHECK(values.dim() == denseDims_ + 1, "values has incorrect number of dimensions, expected ", denseDims_ + 1, ", got ", values.dim()); + AT_CHECK(indices.size(0) == sparse_dim_, "indices has incorrect first dimension, expected ", sparse_dim_, ", got ", indices.size(0)); + AT_CHECK(values.dim() == dense_dim_ + 1, "values has incorrect number of dimensions, expected ", dense_dim_ + 1, ", got ", values.dim()); - auto dense_size_original = sizes().slice(sparseDims_); + auto dense_size_original = sizes().slice(sparse_dim_); std::vector expected_values_size_vec = {values.size(0)}; expected_values_size_vec.insert(expected_values_size_vec.end(), dense_size_original.begin(), dense_size_original.end()); IntList expected_values_size(expected_values_size_vec); diff --git a/aten/src/ATen/SparseTensorImpl.h b/aten/src/ATen/SparseTensorImpl.h index 9a68f80a912b61..1a8fa8b29baeaf 100644 --- a/aten/src/ATen/SparseTensorImpl.h +++ b/aten/src/ATen/SparseTensorImpl.h @@ -9,18 +9,18 @@ struct CAFFE2_API SparseTensorImpl : public TensorImpl { // Stored in COO format, indices + values. // INVARIANTS: - // _sparseDims: range [0, len(shape)]; _sparseDims + _denseDims = len(shape) - // _denseDims : range [0, len(shape)]; _sparseDims + _denseDims = len(shape) - // _indices.shape: dimensionality: 2, shape: (_sparseDims, nnz) - // _values.shape: dimensionality: 1 + _denseDims. shape: (nnz, shape[_sparseDims:]) + // sparse_dim: range [0, len(shape)]; sparse_dim + dense_dim = len(shape) + // dense_dim : range [0, len(shape)]; sparse_dim + dense_dim = len(shape) + // _indices.shape: dimensionality: 2, shape: (sparse_dim, nnz) + // _values.shape: dimensionality: 1 + dense_dim. shape: (nnz, shape[sparse_dim:]) // The true size of the sparse tensor (e.g., if you called to_dense() // on it). When THTensor merges into TensorImpl, this field // should move to the parent class. std::vector size_; - int64_t sparseDims_ = 0; // number of sparse dimensions - int64_t denseDims_ = 0; // number of dense dimensions + int64_t sparse_dim_ = 0; // number of sparse dimensions + int64_t dense_dim_ = 0; // number of dense dimensions Tensor indices_; // always a LongTensor Tensor values_; @@ -39,8 +39,8 @@ struct CAFFE2_API SparseTensorImpl : public TensorImpl { explicit SparseTensorImpl(at::TensorTypeId, const caffe2::TypeMeta&); int64_t nnz() const { return values_.size(0); } - int64_t sparseDims() const { return sparseDims_; } - int64_t denseDims() const { return denseDims_; } + int64_t sparse_dim() const { return sparse_dim_; } + int64_t dense_dim() const { return dense_dim_; } bool coalesced() const { return coalesced_; } Tensor indices() const { return indices_; } Tensor values() const { return values_; } @@ -60,16 +60,16 @@ struct CAFFE2_API SparseTensorImpl : public TensorImpl { const Storage& storage() const override; int64_t storage_offset() const override; - // WARNING: This function does NOT preserve invariants of sparseDims/denseDims with + // WARNING: This function does NOT preserve invariants of sparse_dim/dense_dim with // respect to indices and values - void raw_resize_(int64_t sparseDims, int64_t denseDims, IntList size) { + void raw_resize_(int64_t sparse_dim, int64_t dense_dim, IntList size) { size_ = size.vec(); - sparseDims_ = sparseDims; - denseDims_ = denseDims; + sparse_dim_ = sparse_dim; + dense_dim_ = dense_dim; refresh_numel(); } - // NOTE: This function preserves invariants of sparseDims/denseDims with respect to + // NOTE: This function preserves invariants of sparse_dim/dense_dim with respect to // indices and values. // // NOTE: This function supports the following cases: @@ -91,36 +91,36 @@ struct CAFFE2_API SparseTensorImpl : public TensorImpl { // and for API consistency we don't support it). // 4. When we attempt to shrink the size of any of the sparse dimensions on a non-empty sparse tensor // (this could make some of the stored indices out-of-bound and thus unsafe). - void resize_(int64_t sparseDims, int64_t denseDims, IntList size) { - AT_CHECK(sparseDims + denseDims == size.size(), "number of dimensions must be sparseDims (", sparseDims, ") + denseDims (", denseDims, "), but got ", size.size()); + void resize_(int64_t sparse_dim, int64_t dense_dim, IntList size) { + AT_CHECK(sparse_dim + dense_dim == size.size(), "number of dimensions must be sparse_dim (", sparse_dim, ") + dense_dim (", dense_dim, "), but got ", size.size()); if (nnz() > 0) { auto alt_options_msg = "You could try the following options:\n\ -1. If you need an empty sparse tensor of this size, call `x=torch.sparse_coo_tensor(size)`.\n\ +1. If you need an empty sparse tensor of this size, call `x = torch.sparse_coo_tensor(size)`.\n\ 2. If you need to resize this tensor, you have the following options:\n\ 1. For both sparse and dense dimensions, keep the number of them constant and the size of them non-shrinking, and then try the same call again.\n\ 2. Or, create a new sparse tensor with the correct indices and values from this sparse tensor."; - AT_CHECK(sparseDims == sparseDims_, - "changing the number of sparse dimensions (from ", sparseDims_, " to ", sparseDims, ") on a non-empty sparse tensor is not supported.\n", alt_options_msg); + AT_CHECK(sparse_dim == sparse_dim_, + "changing the number of sparse dimensions (from ", sparse_dim_, " to ", sparse_dim, ") on a non-empty sparse tensor is not supported.\n", alt_options_msg); - AT_CHECK(denseDims == denseDims_, - "changing the number of dense dimensions (from ", denseDims_, " to ", denseDims, ") on a non-empty sparse tensor is not supported.\n", alt_options_msg); + AT_CHECK(dense_dim == dense_dim_, + "changing the number of dense dimensions (from ", dense_dim_, " to ", dense_dim, ") on a non-empty sparse tensor is not supported.\n", alt_options_msg); bool shrinking_sparse_dims = false; - bool shrinking_dense_dims = false; - auto sparse_size_original = sizes().slice(0, sparseDims); - auto sparse_size_new = size.slice(0, sparseDims); - for (int i = 0; i < sparseDims; i++) { + bool shrinking_dense_dim = false; + auto sparse_size_original = sizes().slice(0, sparse_dim); + auto sparse_size_new = size.slice(0, sparse_dim); + for (int i = 0; i < sparse_dim; i++) { if (sparse_size_new[i] < sparse_size_original[i]) { shrinking_sparse_dims = true; break; } } - auto dense_size_original = sizes().slice(sparseDims); - auto dense_size_new = size.slice(sparseDims); - for (int i = 0; i < denseDims; i++) { + auto dense_size_original = sizes().slice(sparse_dim); + auto dense_size_new = size.slice(sparse_dim); + for (int i = 0; i < dense_dim; i++) { if (dense_size_new[i] < dense_size_original[i]) { - shrinking_dense_dims = true; + shrinking_dense_dim = true; break; } } @@ -128,38 +128,36 @@ struct CAFFE2_API SparseTensorImpl : public TensorImpl { AT_CHECK(!shrinking_sparse_dims, "shrinking the size of sparse dimensions (from ", sparse_size_original, " to ", sparse_size_new, ") on a non-empty sparse tensor is not supported.\n", alt_options_msg); - AT_CHECK(!shrinking_dense_dims, + AT_CHECK(!shrinking_dense_dim, "shrinking the size of dense dimensions (from ", dense_size_original, " to ", dense_size_new, ") on a non-empty sparse tensor is not supported.\n", alt_options_msg); } - if ((!size.equals(size_)) || (sparseDims != sparseDims_) || (denseDims != denseDims_)) { - std::vector values_size = {values().size(0)}; - auto dense_size = size.slice(sparseDims); + if ((!size.equals(size_)) || (sparse_dim != sparse_dim_) || (dense_dim != dense_dim_)) { + auto nnz = values().size(0); + std::vector values_size = {nnz}; + auto dense_size = size.slice(sparse_dim); values_size.insert(values_size.end(), dense_size.begin(), dense_size.end()); values_.resize_(values_size); - - std::vector indices_size = indices().sizes().vec(); - indices_size[0] = sparseDims; - indices_.resize_(indices_size); + indices_.resize_({sparse_dim, nnz}); } size_ = size.vec(); - sparseDims_ = sparseDims; - denseDims_ = denseDims; + sparse_dim_ = sparse_dim; + dense_dim_ = dense_dim; refresh_numel(); } // NOTE: this function will resize the sparse tensor and also set `indices` and `values` to empty. - void resize_and_clear_(int64_t sparseDims, int64_t denseDims, IntList size) { - AT_CHECK(sparseDims + denseDims == size.size(), "number of dimensions must be sparseDims (", sparseDims, ") + denseDims (", denseDims, "), but got ", size.size()); + void resize_and_clear_(int64_t sparse_dim, int64_t dense_dim, IntList size) { + AT_CHECK(sparse_dim + dense_dim == size.size(), "number of dimensions must be sparse_dim (", sparse_dim, ") + dense_dim (", dense_dim, "), but got ", size.size()); size_ = size.vec(); - sparseDims_ = sparseDims; - denseDims_ = denseDims; + sparse_dim_ = sparse_dim; + dense_dim_ = dense_dim; - auto empty_indices = at::empty({sparseDims, 0}, indices().options()); + auto empty_indices = at::empty({sparse_dim, 0}, indices().options()); std::vector values_size = {0}; - auto dense_size = sizes().slice(sparseDims); + auto dense_size = sizes().slice(sparse_dim); values_size.insert(values_size.end(), dense_size.begin(), dense_size.end()); auto empty_values = at::empty(values_size, values().options()); set_indices_and_values_unsafe(empty_indices, empty_values); @@ -169,9 +167,10 @@ struct CAFFE2_API SparseTensorImpl : public TensorImpl { void set_coalesced(bool coalesced) { coalesced_ = coalesced; } // NOTE: this function is only used internally and not exposed to Python frontend - void set_nnz_and_narrow(int64_t nnz) { - indices_ = indices_.narrow(1, 0, nnz); - values_ = values_.narrow(0, 0, nnz); + void set_nnz_and_narrow(int64_t new_nnz) { + AT_ASSERT(new_nnz <= nnz()); + indices_ = indices_.narrow(1, 0, new_nnz); + values_ = values_.narrow(0, 0, new_nnz); } // Takes indices and values and directly puts them into the sparse tensor, no copy. diff --git a/aten/src/ATen/SparseTensorUtils.h b/aten/src/ATen/SparseTensorUtils.h new file mode 100644 index 00000000000000..7bf2e938bc8440 --- /dev/null +++ b/aten/src/ATen/SparseTensorUtils.h @@ -0,0 +1,111 @@ +#include +#include + +namespace at { namespace sparse { + +// Just for documentary purposes +using SparseTensor = Tensor; +using LongTensor = Tensor; +using IntTensor = Tensor; +using SparseType = Type; + +// This is an internal utility function for getting at the SparseTensorImpl, +// so that we can write sparse tensor specific accessors for special fields +// in SparseTensor. You should only use this for writing low level +// setters/getters for SparseTensorImpl fields; otherwise, you should use +// the low level setters/getters that were implemented using this. +// +// This may be called repeatedly, so make sure it's pretty cheap. +inline SparseTensorImpl* get_sparse_impl(const SparseTensor& self) { + AT_ASSERTM(!self.is_variable(), "_internal_get_SparseTensorImpl: should not be a variable"); + AT_ASSERTM(self.is_sparse(), "_internal_get_SparseTensorImpl: not a sparse tensor"); + return static_cast(self.unsafeGetTensorImpl()); +} + +// Port of the old THCSTensor_(checkGPU), but it doesn't really belong here +// because it is more general +// NB: I dropped kernelP2PEnabled support +// NB: This only works if the tensors are KNOWN to be CUDA. +// TODO: Generalize it so it works on CPU as well +inline bool check_device(ArrayRef ts) { + if (ts.empty()) { + return true; + } + int64_t curDevice = current_device(); + for (const Tensor& t : ts) { + if (t.get_device() != curDevice) return false; + } + return true; +} + +// Takes indices and values and directly puts them into the sparse tensor, no +// copy. This used to be called THSTensor_(_move) +inline void alias_into_sparse(const SparseTensor& self, const LongTensor& indices, const Tensor& values) { + get_sparse_impl(self)->set_indices_and_values_unsafe(indices, values); +} + +// Take indices and values and makes a (data) copy of them to put into the sparse +// indices/values. This used to be called THSTensor_(_set) +inline void copy_into_sparse(const SparseTensor& self, const LongTensor& indices, const Tensor& values, bool non_blocking) { + alias_into_sparse(self, self._indices().type().copy(indices, non_blocking), self._values().type().copy(values, non_blocking)); +} + +// TODO: put this into the public API +inline bool is_same_tensor(const Tensor& lhs, const Tensor& rhs) { + return lhs.unsafeGetTensorImpl() == rhs.unsafeGetTensorImpl(); +} + +inline bool is_same_density(const SparseTensor& self, const SparseTensor& src) { + return self.sparse_dim() == src.sparse_dim() && self.dense_dim() == src.dense_dim(); +} + +// Give us a new values tensor, with the same dimensionality +// as 'values' but with a new number of non-zero elements. +// TODO: Expose this for real in ATen, some day? +// NB: Doesn't preserve data. +inline Tensor new_values_with_size_of(const Tensor& values, int64_t nnz) { + std::vector size = values.sizes().vec(); + size[0] = nnz; + return at::empty(size, values.options()); +} + +// This helper function flattens a sparse indices tensor (a LongTensor) into a 1D +// indices tensor. E.g., +// input = [[2, 4, 0], +// [3, 1, 10]] +// full_size = [2, 12] +// output = [ 2 * 12 + 3, 4 * 12 + 1, 0 * 12 + 10 ] = [27, 49, 10] +// +// In other words, assuming that each `indices[i, :]` is a valid index to a +// tensor `t` of shape `full_size`. This returns the corresponding indices to +// the flattened tensor `t.reshape( prod(full_size[:indices.size(0)]), -1 )`. +// if forceClone is true, the result will forced to be a clone of self. +// if force_clone is true, the result will forced to be a clone of self. +inline LongTensor flatten_indices(const Tensor& indices, IntList full_size, bool force_clone = false) { + int64_t sparse_dim = indices.size(0); + if (sparse_dim == 1) { + if (force_clone) { + return indices.squeeze(0).clone(); + } else { + return indices.squeeze(0); + } + } else { + std::vector indices_mult_cpu_vec; + indices_mult_cpu_vec.reserve(sparse_dim); + int64_t mult = 1; + for (int64_t i = sparse_dim - 1; i >= 0; i--) { + indices_mult_cpu_vec[i] = mult; + mult *= full_size[i]; + } + auto indices_mult_cpu = indices.type().cpu() + .tensorFromBlob(indices_mult_cpu_vec.data(), /*size=*/{sparse_dim, 1}); + // NB: must be blocking because this blob may be freed after this closure, + // and non_blocking copy will see garbage. + auto indices_mult = indices_mult_cpu.to(indices.device(), /*non_blocking=*/false); + // Ideally we want matmul but matmul is slow on CPU Long and not implemented + // on CUDA Long. So mul is faster. + return indices.mul(indices_mult).sum(0); + } +} + +}} // namespace at::sparse diff --git a/aten/src/ATen/core/Tensor.h b/aten/src/ATen/core/Tensor.h index e94b2a4152c513..8271e5b1de7e17 100644 --- a/aten/src/ATen/core/Tensor.h +++ b/aten/src/ATen/core/Tensor.h @@ -404,6 +404,7 @@ class CAFFE2_API Tensor { Tensor & log_normal_(double mean=1, double std=2, Generator * generator=nullptr); Tensor & exponential_(double lambd=1, Generator * generator=nullptr); Tensor & geometric_(double p, Generator * generator=nullptr); + Tensor alias() const; Tensor abs() const; Tensor & abs_(); Tensor acos() const; @@ -621,17 +622,22 @@ class CAFFE2_API Tensor { Tensor & sub_(Scalar other, Scalar alpha=1); Tensor addmm(const Tensor & mat1, const Tensor & mat2, Scalar beta=1, Scalar alpha=1) const; Tensor & addmm_(const Tensor & mat1, const Tensor & mat2, Scalar beta=1, Scalar alpha=1); - Tensor & sparse_resize_(IntList size, int64_t sparseDims, int64_t denseDims); - Tensor & sparse_resize_and_clear_(IntList size, int64_t sparseDims, int64_t denseDims); + Tensor & sparse_resize_(IntList size, int64_t sparse_dim, int64_t dense_dim); + Tensor & sparse_resize_and_clear_(IntList size, int64_t sparse_dim, int64_t dense_dim); Tensor sparse_mask(SparseTensorRef mask) const; Tensor to_dense() const; - int64_t _sparseDims() const; - int64_t _denseDims() const; + int64_t sparse_dim() const; + int64_t _dimI() const; + int64_t dense_dim() const; + int64_t _dimV() const; int64_t _nnz() const; Tensor coalesce() const; bool is_coalesced() const; Tensor _indices() const; Tensor _values() const; + Tensor & _coalesced_(bool coalesced); + Tensor indices() const; + Tensor values() const; int64_t numel() const; std::vector unbind(int64_t dim=0) const; int64_t get_device() const; diff --git a/aten/src/ATen/core/TensorMethods.h b/aten/src/ATen/core/TensorMethods.h index ba5044963c0fad..27e5a718d16870 100644 --- a/aten/src/ATen/core/TensorMethods.h +++ b/aten/src/ATen/core/TensorMethods.h @@ -515,6 +515,9 @@ inline Tensor & Tensor::exponential_(double lambd, Generator * generator) { inline Tensor & Tensor::geometric_(double p, Generator * generator) { return type().geometric_(*this, p, generator); } +inline Tensor Tensor::alias() const { + return type().alias(*this); +} inline Tensor Tensor::abs() const { return type().abs(*this); } @@ -1166,11 +1169,11 @@ inline Tensor Tensor::addmm(const Tensor & mat1, const Tensor & mat2, Scalar bet inline Tensor & Tensor::addmm_(const Tensor & mat1, const Tensor & mat2, Scalar beta, Scalar alpha) { return type().addmm_(*this, mat1, mat2, beta, alpha); } -inline Tensor & Tensor::sparse_resize_(IntList size, int64_t sparseDims, int64_t denseDims) { - return type().sparse_resize_(*this, size, sparseDims, denseDims); +inline Tensor & Tensor::sparse_resize_(IntList size, int64_t sparse_dim, int64_t dense_dim) { + return type().sparse_resize_(*this, size, sparse_dim, dense_dim); } -inline Tensor & Tensor::sparse_resize_and_clear_(IntList size, int64_t sparseDims, int64_t denseDims) { - return type().sparse_resize_and_clear_(*this, size, sparseDims, denseDims); +inline Tensor & Tensor::sparse_resize_and_clear_(IntList size, int64_t sparse_dim, int64_t dense_dim) { + return type().sparse_resize_and_clear_(*this, size, sparse_dim, dense_dim); } inline Tensor Tensor::sparse_mask(SparseTensorRef mask) const { return type().sparse_mask(*this, mask); @@ -1178,11 +1181,17 @@ inline Tensor Tensor::sparse_mask(SparseTensorRef mask) const { inline Tensor Tensor::to_dense() const { return type().to_dense(*this); } -inline int64_t Tensor::_sparseDims() const { - return type()._sparseDims(*this); +inline int64_t Tensor::sparse_dim() const { + return type().sparse_dim(*this); +} +inline int64_t Tensor::_dimI() const { + return type()._dimI(*this); +} +inline int64_t Tensor::dense_dim() const { + return type().dense_dim(*this); } -inline int64_t Tensor::_denseDims() const { - return type()._denseDims(*this); +inline int64_t Tensor::_dimV() const { + return type()._dimV(*this); } inline int64_t Tensor::_nnz() const { return type()._nnz(*this); @@ -1199,6 +1208,15 @@ inline Tensor Tensor::_indices() const { inline Tensor Tensor::_values() const { return type()._values(*this); } +inline Tensor & Tensor::_coalesced_(bool coalesced) { + return type()._coalesced_(*this, coalesced); +} +inline Tensor Tensor::indices() const { + return type().indices(*this); +} +inline Tensor Tensor::values() const { + return type().values(*this); +} inline int64_t Tensor::numel() const { return type().numel(*this); } diff --git a/aten/src/ATen/core/TensorOptions.h b/aten/src/ATen/core/TensorOptions.h index e830f3b778a726..cf5845c0a0f7ed 100644 --- a/aten/src/ATen/core/TensorOptions.h +++ b/aten/src/ATen/core/TensorOptions.h @@ -181,6 +181,11 @@ struct CAFFE2_API TensorOptions { return has_device_ ? device_ : getDefaultTensorOptions().device(); } + /// Returns whether the device is specified. + bool has_device() const noexcept { + return has_device_; + } + /// Returns the device of the `TensorOptions`, or `c10::nullopt` if /// device is not specified. optional device_opt() const noexcept { @@ -197,6 +202,11 @@ struct CAFFE2_API TensorOptions { return has_dtype_ ? dtype_ : getDefaultTensorOptions().dtype(); } + /// Returns whether the dtype is specified. + bool has_dtype() const noexcept { + return has_dtype_; + } + /// Returns the dtype of the `TensorOptions`, or `c10::nullopt` if /// device is not specified. optional dtype_opt() const noexcept { @@ -208,6 +218,11 @@ struct CAFFE2_API TensorOptions { return has_layout_ ? layout_ : getDefaultTensorOptions().layout(); } + /// Returns whether the layout is specified. + bool has_layout() const noexcept { + return has_layout_; + } + /// Returns the layout of the `TensorOptions`, or `c10::nullopt` if /// layout is not specified. optional layout_opt() const noexcept { @@ -219,6 +234,11 @@ struct CAFFE2_API TensorOptions { return has_requires_grad_ ? requires_grad_ : getDefaultTensorOptions().requires_grad(); } + /// Returns whether the `requires_grad` is specified. + bool has_requires_grad() const noexcept { + return has_requires_grad_; + } + /// Returns the `requires_grad` property of the `TensorOptions`, or /// `c10::nullopt` if `requires_grad` is not specified. optional requires_grad_opt() const noexcept { @@ -231,6 +251,11 @@ struct CAFFE2_API TensorOptions { return has_is_variable_ ? is_variable_ : getDefaultTensorOptions().is_variable(); } + /// Returns whether the `is_variable` is specified. + bool has_is_variable() const noexcept { + return has_is_variable_; + } + /// Returns the `is_variable` property of the `TensorOptions`, or /// `c10::nullopt` if `is_variable` is not specified. optional is_variable_opt() const noexcept { diff --git a/aten/src/ATen/core/Type.h b/aten/src/ATen/core/Type.h index 5f666cdcb61fc1..6ad8913363aa4b 100644 --- a/aten/src/ATen/core/Type.h +++ b/aten/src/ATen/core/Type.h @@ -367,6 +367,7 @@ struct CAFFE2_API Type { virtual Tensor & log_normal_(Tensor & self, double mean, double std, Generator * generator) const = 0; virtual Tensor & exponential_(Tensor & self, double lambd, Generator * generator) const = 0; virtual Tensor & geometric_(Tensor & self, double p, Generator * generator) const = 0; + virtual Tensor alias(const Tensor & self) const = 0; virtual Tensor abs(const Tensor & self) const = 0; virtual Tensor & abs_(Tensor & self) const = 0; virtual Tensor acos(const Tensor & self) const = 0; @@ -584,17 +585,22 @@ struct CAFFE2_API Type { virtual Tensor & sub_(Tensor & self, Scalar other, Scalar alpha) const = 0; virtual Tensor addmm(const Tensor & self, const Tensor & mat1, const Tensor & mat2, Scalar beta, Scalar alpha) const = 0; virtual Tensor & addmm_(Tensor & self, const Tensor & mat1, const Tensor & mat2, Scalar beta, Scalar alpha) const = 0; - virtual Tensor & sparse_resize_(Tensor & self, IntList size, int64_t sparseDims, int64_t denseDims) const = 0; - virtual Tensor & sparse_resize_and_clear_(Tensor & self, IntList size, int64_t sparseDims, int64_t denseDims) const = 0; + virtual Tensor & sparse_resize_(Tensor & self, IntList size, int64_t sparse_dim, int64_t dense_dim) const = 0; + virtual Tensor & sparse_resize_and_clear_(Tensor & self, IntList size, int64_t sparse_dim, int64_t dense_dim) const = 0; virtual Tensor sparse_mask(const Tensor & self, SparseTensorRef mask) const = 0; virtual Tensor to_dense(const Tensor & self) const = 0; - virtual int64_t _sparseDims(const Tensor & self) const = 0; - virtual int64_t _denseDims(const Tensor & self) const = 0; + virtual int64_t sparse_dim(const Tensor & self) const = 0; + virtual int64_t _dimI(const Tensor & self) const = 0; + virtual int64_t dense_dim(const Tensor & self) const = 0; + virtual int64_t _dimV(const Tensor & self) const = 0; virtual int64_t _nnz(const Tensor & self) const = 0; virtual Tensor coalesce(const Tensor & self) const = 0; virtual bool is_coalesced(const Tensor & self) const = 0; virtual Tensor _indices(const Tensor & self) const = 0; virtual Tensor _values(const Tensor & self) const = 0; + virtual Tensor & _coalesced_(Tensor & self, bool coalesced) const = 0; + virtual Tensor indices(const Tensor & self) const = 0; + virtual Tensor values(const Tensor & self) const = 0; virtual int64_t numel(const Tensor & self) const = 0; virtual std::vector unbind(const Tensor & self, int64_t dim) const = 0; virtual int64_t get_device(const Tensor & self) const = 0; diff --git a/aten/src/ATen/core/aten_interned_strings.h b/aten/src/ATen/core/aten_interned_strings.h index e7f487f264f64b..0ee9d337e91583 100644 --- a/aten/src/ATen/core/aten_interned_strings.h +++ b/aten/src/ATen/core/aten_interned_strings.h @@ -96,7 +96,6 @@ _(aten, _logspace) \ _(aten, _masked_scale) \ _(aten, _mm) \ _(aten, _mv) \ -_(aten, _native_sparse_coo_tensor_unsafe) \ _(aten, _nnz) \ _(aten, _pack_padded_sequence) \ _(aten, _pack_padded_sequence_backward) \ @@ -118,6 +117,8 @@ _(aten, _sin) \ _(aten, _sinh) \ _(aten, _sparseDims) \ _(aten, _sparse_add) \ +_(aten, _sparse_coo_tensor_with_dims) \ +_(aten, _sparse_coo_tensor_with_dims_and_tensors) \ _(aten, _sparse_coo_tensor_unsafe) \ _(aten, _sparse_dense_add) \ _(aten, _sparse_div_scalar) \ @@ -364,6 +365,7 @@ _(aten, index_copy) \ _(aten, index_fill) \ _(aten, index_put) \ _(aten, index_select) \ +_(aten, indices) \ _(aten, instance_norm) \ _(aten, inverse) \ _(aten, irfft) \ @@ -478,7 +480,6 @@ _(aten, native_get_device) \ _(aten, native_norm) \ _(aten, native_pow) \ _(aten, native_resize_as) \ -_(aten, native_sparse_coo_tensor) \ _(aten, native_tensor) \ _(aten, native_zero) \ _(aten, ne) \ @@ -685,6 +686,7 @@ _(aten, upsample_nearest3d_forward) \ _(aten, upsample_trilinear3d) \ _(aten, upsample_trilinear3d_backward) \ _(aten, upsample_trilinear3d_forward) \ +_(aten, values) \ _(aten, var) \ _(aten, view) \ _(aten, view_as) \ @@ -737,7 +739,7 @@ _(attr, cudnn_enabled) \ _(attr, cx) \ _(attr, cy) \ _(attr, data) \ -_(attr, denseDims) \ +_(attr, dense_dim) \ _(attr, descending) \ _(attr, deterministic) \ _(attr, device) \ @@ -931,7 +933,8 @@ _(attr, some) \ _(attr, sorted) \ _(attr, source) \ _(attr, sparse) \ -_(attr, sparseDims) \ +_(attr, sparse_dim) \ +_(attr, sparse_dtype) \ _(attr, spatialScale) \ _(attr, split_size) \ _(attr, split_sizes) \ diff --git a/aten/src/ATen/function_wrapper.py b/aten/src/ATen/function_wrapper.py index 132c036b0358f7..ad600b467758c1 100644 --- a/aten/src/ATen/function_wrapper.py +++ b/aten/src/ATen/function_wrapper.py @@ -110,7 +110,7 @@ def TypedDict(name, attrs, total=True): # type: ignore TYPE_DERIVED_DEFINITION_NATIVE = CodeTemplate("""\ ${return_type} ${Type}::${api_name}(${type_method_formals}) const { ${device_guard_declaration} - ${return_call} at::native::${native_type_method_dispatch}(/* actuals */ ${type_derived_call_actuals}); + ${return_call} at::native::${native_type_method_dispatch}(/* actuals */ ${actuals}); } """) TYPE_DERIVED_DEFINITION_NATIVE_MISSING = CodeTemplate("""\ @@ -499,6 +499,7 @@ def __getitem__(self, x): 'native_type_method_dispatch': str, # options should be List[FunctionOption] 'options': Any, + 'requires_tensor': bool, 'return_call': str, 'return_type': str, 'return': ReturnDecl, @@ -527,6 +528,7 @@ def __getitem__(self, x): ('returns', List[ReturnType]), ('inplace', bool), ('abstract', bool), + ('requires_tensor', bool), ('device_guard', bool), ('with_gil', bool), ('deprecated', bool), @@ -559,7 +561,8 @@ def is_mutable_formal_argument(argument, option): def check_methods_do_not_start_with_underscore(name, is_method): - if name in {'_local_scalar', '_values', '_indices', '_nnz', '_sparseDims', '_denseDims'}: + if name in {'_local_scalar', '_values', '_indices', '_nnz', '_dimI', + '_dimV', '_coalesced_'}: return if is_method and name.startswith('_') and not name.startswith('__'): message = "Function '{}' starts with a single underscore and is ".format(name) @@ -923,6 +926,7 @@ def process_option(option, output_options): inplace=option['inplace'], # See Note [Abstract ATen methods] abstract=abstract, + requires_tensor=option.get('requires_tensor', False), device_guard=option.get('device_guard', True), with_gil=option.get('with_gil', False), deprecated=option.get('deprecated', False) @@ -1040,14 +1044,20 @@ def find_formal(formal_name, formals): return formal return None + assert find_formal('Type', formals) is None, \ + "Found Type argument in {}({}). Use TensorOptions instead.".format( + option['name'], ", ".join(option['method_formals_with_defaults'])) + type_method_dispatch = option['type_method_definition_dispatch'] - dispatch_tensor = find_dispatch_tensor(formals) - # we only dispatch via options if there is backend-specific dispatch (otherwise it's a factory function that - # can dispatch directly to the native function). backend_dispatch = isinstance(type_method_dispatch, dict) + + # We only dispatch via options if there is backend-specific dispatch + # (otherwise it's a factory function that can dispatch directly to the + # native function). dispatch_options = (find_formal('TensorOptions', formals) - if not dispatch_tensor and backend_dispatch - else None) + if backend_dispatch else None) + # Only dispatch via tensor if there is no Options argument + dispatch_tensor = None if dispatch_options else find_dispatch_tensor(formals) option['type_method_formals'] = [format_formal(f) for f in formals] option['type_method_actuals'] = [f['name'] for f in formals] @@ -1160,6 +1170,7 @@ def find_formal(formal_name, formals): inplace=option['inplace'], # See Note [Abstract ATen methods] abstract=abstract, + requires_tensor=option.get('requires_tensor', False), device_guard=option.get('device_guard', True), with_gil=option.get('with_gil', False), deprecated=option['deprecated'], @@ -1572,15 +1583,8 @@ def process_native(option): TYPE_DERIVED_DEFINITION_NATIVE_MISSING.substitute(env)) else: option['native_type_method_dispatch'] = native_dispatch - type_derived_call_actuals = [] - for actual, arg in zip(option['actuals'], option['arguments']): - if arg.get('is_type_dispatched', False): - type_derived_call_actuals.append('*this') - else: - type_derived_call_actuals.append(actual) type_object_definitions.append( - TYPE_DERIVED_DEFINITION_NATIVE.substitute( - env, type_derived_call_actuals=type_derived_call_actuals)) + TYPE_DERIVED_DEFINITION_NATIVE.substitute(env)) for declaration in declarations: for option in declaration['options']: diff --git a/aten/src/ATen/native/LegacyBridge.cpp b/aten/src/ATen/native/LegacyBridge.cpp index 930832416db17f..1537fcd2db8c87 100644 --- a/aten/src/ATen/native/LegacyBridge.cpp +++ b/aten/src/ATen/native/LegacyBridge.cpp @@ -134,33 +134,6 @@ Tensor& addmm_(Tensor& self, const Tensor& mat1, const Tensor& mat2, Scalar beta } } -Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values) { - return at::getType(values.options().layout(at::kSparse)).native_sparse_coo_tensor(indices, values); -} - -Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values, ArrayRef size) { - return at::getType(values.options().layout(at::kSparse)).native_sparse_coo_tensor(indices, values, size); -} - -Tensor sparse_coo_tensor(ArrayRef size, const TensorOptions& options) { - TensorOptions toptions = TensorOptions(options).layout(at::kSparse); - return at::getType(toptions).native_sparse_coo_tensor(size, toptions); -} - -Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values, const TensorOptions& options) { - TensorOptions toptions = options; - return at::getType(toptions.layout(at::kSparse)).native_sparse_coo_tensor(indices, values); -} - -Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values, ArrayRef size, const TensorOptions& options) { - TensorOptions toptions = options; - return at::getType(toptions.layout(at::kSparse)).native_sparse_coo_tensor(indices, values, size); -} - -Tensor _sparse_coo_tensor_unsafe(const Tensor& indices, const Tensor& values, ArrayRef size) { - return at::getType(values.options().layout(at::kSparse))._native_sparse_coo_tensor_unsafe(indices, values, size); -} - int64_t get_device(const Tensor& self) { if (_has_native(self)) { return native_get_device(self); diff --git a/aten/src/ATen/native/TensorFactories.cpp b/aten/src/ATen/native/TensorFactories.cpp index e9241c368c34b8..4894c2cecc129e 100644 --- a/aten/src/ATen/native/TensorFactories.cpp +++ b/aten/src/ATen/native/TensorFactories.cpp @@ -153,7 +153,7 @@ Tensor empty_like(const Tensor& self) { Tensor empty_like(const Tensor& self, const TensorOptions& options) { if (options.layout() == kSparse && self.type().is_sparse()) { auto res = at::empty({0}, options); // to be resized - res.sparse_resize_and_clear_(self.sizes(), self._sparseDims(), self._denseDims()); + res.sparse_resize_and_clear_(self.sizes(), self.sparse_dim(), self.dense_dim()); return res; } return at::empty(self.sizes(), options); @@ -525,7 +525,7 @@ Tensor zeros_like(const Tensor& self) { Tensor zeros_like(const Tensor& self, const TensorOptions& options) { if (options.layout() == kSparse && self.type().is_sparse()) { auto res = at::empty({0}, options); // to be resized - res.sparse_resize_and_clear_(self.sizes(), self._sparseDims(), self._denseDims()); + res.sparse_resize_and_clear_(self.sizes(), self.sparse_dim(), self.dense_dim()); return res; } return native::zeros(self.sizes(), options); diff --git a/aten/src/ATen/native/TensorShape.cpp b/aten/src/ATen/native/TensorShape.cpp index 387bebce3932a1..c8f0d61aee6838 100644 --- a/aten/src/ATen/native/TensorShape.cpp +++ b/aten/src/ATen/native/TensorShape.cpp @@ -1,4 +1,3 @@ -#include #include #include #include @@ -9,6 +8,9 @@ #include "ATen/WrapDimUtils.h" #include "c10/util/Exception.h" #include "c10/util/Optional.h" +#include +#include +#include namespace at { namespace native { @@ -163,7 +165,7 @@ Tensor &as_strided_(Tensor& self, IntList size, IntList stride) { return at::as_strided_(self, size, stride, self.storage_offset()); } -Tensor narrow_copy_sparse(const Tensor& self, int64_t dim, int64_t start, int64_t length){ +Tensor narrow_copy_sparse(const Tensor& self, int64_t dim, int64_t start, int64_t length) { int64_t allDim = self.dim(); int64_t end = start+length; AT_CHECK(allDim > 0, "narrow() cannot be applied to a 0-dim tensor."); @@ -171,31 +173,30 @@ Tensor narrow_copy_sparse(const Tensor& self, int64_t dim, int64_t start, int64_ "Dimension ", dim, " out of range. Expecting 0 <= dim < ", allDim, "."); AT_CHECK(start >= 0 && length >= 0 && end <= self.size(dim), "Invalid range to narrow. range(start, start+length) must be a subset of range(0, ", self.size(dim), ").") - LongTensor indices = self._indices(); - int64_t sparseDims = self._sparseDims(); + Tensor indices = self._indices(); + int64_t sparse_dim = self.sparse_dim(); - std::vector newSizes = self.sizes().vec(); - newSizes[dim]=length; + std::vector new_sizes = self.sizes().vec(); + new_sizes[dim] = length; - Tensor newValues; - LongTensor newIndices; - if(dim < sparseDims){ + Tensor new_values; + Tensor new_indices; + if (dim < sparse_dim) { Tensor mask = (indices[dim] >= start).__and__((indices[dim] < end)); - newIndices = indices.masked_select(mask).view({sparseDims, -1}); - newIndices[dim].add_(-start); + new_indices = indices.masked_select(mask).view({sparse_dim, -1}); + new_indices[dim].sub_(start); Tensor nzIndices = mask.nonzero().view(-1); - newValues = self._values().index_select(0, nzIndices); - }else{ + new_values = self._values().index_select(0, nzIndices); + } else { /* This means we are narrowing on a dense dim, which is in effect just a regular narrow on _values() */ - newIndices = indices; - int64_t ddim = dim - sparseDims + 1; - newValues = self._values().narrow_copy(ddim, start, length); + new_indices = indices; + int64_t dense_dim = dim - sparse_dim + 1; + new_values = self._values().narrow_copy(dense_dim, start, length); } - SparseTensor newTensor = at::sparse_coo_tensor(newIndices, newValues, newSizes, self.type().options()); - _get_sparse_impl(newTensor)->set_coalesced(self.is_coalesced()); - return newTensor; + auto newTensor = at::sparse_coo_tensor(new_indices, new_values, new_sizes); + return newTensor._coalesced_(self.is_coalesced()); } Tensor narrow_copy_dense(const Tensor& self, int64_t dim, int64_t start, int64_t length){ @@ -397,16 +398,16 @@ Tensor& stack_out(Tensor& result, TensorList tensors, int64_t dim) { } static inline Tensor & sparse_transpose_(Tensor & self, int64_t dim0, int64_t dim1) { - int64_t nsparseDims = self._sparseDims(); - AT_CHECK(dim0 < nsparseDims && dim1 < nsparseDims, + int64_t nsparse_dim = self.sparse_dim(); + AT_CHECK(dim0 < nsparse_dim && dim1 < nsparse_dim, "sparse transpose: transposed dimensions must be sparse ", - "Got sparseDims: ", nsparseDims, ", d0: ", dim0, ", d1: ", dim1); + "Got sparse_dim: ", nsparse_dim, ", d0: ", dim0, ", d1: ", dim1); if (self._indices().numel() == 0 && self._values().numel() == 0) { auto sizes = self.sizes().vec(); std::swap(sizes[dim0], sizes[dim1]); - _get_sparse_impl(self)->raw_resize_(self._sparseDims(), self._denseDims(), sizes); + at::sparse::get_sparse_impl(self)->raw_resize_(self.sparse_dim(), self.dense_dim(), sizes); } else { auto indices = self._indices(); auto row0 = indices.select(0, dim0); @@ -418,12 +419,12 @@ static inline Tensor & sparse_transpose_(Tensor & self, int64_t dim0, int64_t di row0.copy_(row1); row1.copy_(tmp); - _get_sparse_impl(self)->set_coalesced(false); + self._coalesced_(false); auto sizes = self.sizes().vec(); std::swap(sizes[dim0], sizes[dim1]); - _get_sparse_impl(self)->raw_resize_(self._indices().size(0), self._values().dim() - 1, sizes); + at::sparse::get_sparse_impl(self)->raw_resize_(self._indices().size(0), self._values().dim() - 1, sizes); } return self; } @@ -469,11 +470,11 @@ Tensor transpose(const Tensor & self, int64_t dim0, int64_t dim1) { static void check_t(const Tensor& self, const char *fn) { if (self.is_sparse()) { - int64_t sparseDims = self._sparseDims(); - int64_t denseDims = self._denseDims(); - AT_CHECK(sparseDims == 2 && denseDims == 0, + int64_t sparse_dim = self.sparse_dim(); + int64_t dense_dim = self.dense_dim(); + AT_CHECK(sparse_dim == 2 && dense_dim == 0, fn, " expects a tensor with 2 sparse and 0 dense dimensions, but got ", - sparseDims, " sparse and ", denseDims, " dense dimensions"); + sparse_dim, " sparse and ", dense_dim, " dense dimensions"); } else if (self.dim() != 2) { AT_ERROR(fn, " expects a 2D tensor, but self is ", self.dim(), "D"); } diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml index 49c05ecdf6e78b..a95cfc3d817dfb 100644 --- a/aten/src/ATen/native/native_functions.yaml +++ b/aten/src/ATen/native/native_functions.yaml @@ -1929,66 +1929,160 @@ - func: addmm_(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor variants: method -# NB: I have to decompose sparse_coo_tensor into two functions, because -# it has custom dispatch logic for which Type to dispatch on (we must -# use the sparse equivalent of the type of the SECOND argument). -# -# The actual dispatcher, native_sparse_coo_tensor, has all of its overloads -# removed so you don't accidentally trigger the default behavior, which -# is to infer Type based on the first argument (indices), which is ~never -# what you want. (I guess hypothetically it would work; you'd -# just only ever dispatch to CPULongTensor or CUDALongTensor, but that -# seems a bit too finely balanced.) - -- func: native_sparse_coo_tensor(IntList size, TensorOptions options) -> Tensor - variants: [] - dispatch: - SparseCPU: new_with_size_sparse - SparseCUDA: new_with_size_sparse - -- func: native_sparse_coo_tensor(IndexTensor indices, Tensor values) -> Tensor - variants: [] - dispatch: - SparseCPU: new_with_tensor_sparse - SparseCUDA: new_with_tensor_sparse - -- func: native_sparse_coo_tensor(IndexTensor indices, Tensor values, IntList size) -> Tensor - variants: [] - dispatch: - SparseCPU: new_with_tensor_and_size_sparse - SparseCUDA: new_with_tensor_and_size_sparse -- func: sparse_coo_tensor(IndexTensor indices, Tensor values) -> Tensor +# NOTE [ Sparse: autograd and API ] +# +# +# Sparse Tensor Constructors +# ~~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# The API entry points to sparse tensor construction should be +# `sparse_coo tensor` and `_sparse_coo_tensor_unsafe`. Depending on whether the +# indices and values tensors are given, they eventually dispatch to either +# `sparse_coo_tensor_with_dims` or `sparse_coo_tensor_with_dims_and_tensors`. +# +# The autograd support for ctor is implement on `sparse_coo_tensor_with_dims_and_tensors`. +# +# The API methods `sparse_coo tensor` and `_sparse_coo_tensor_unsafe` +# **must not** have specific type dispatches because otherwise codegen will +# consider them as abstract methods (see Note [Abstract ATen methods]), dispatch +# using **Tensor** type, and thus lose autograd tracking on the actual method +# they dispatch to, e.g., `sparse_coo_tensor_with_dims_and_tensors`. +# +# The actual ctors `sparse_coo_tensor_with_dims` and `sparse_coo_tensor_with_dims_and_tensors`, +# on the other hand, need to create `SparseTensorImpl` and know nothing about +# how `VariableType`s work. So they need to be dispatched using Tensor types. +# We thus put `requires_tensor=True` to ensure that `VariableType` will unwrap +# the given variables and call with the Tensor type. +# +# +# Sparse Methods API Design +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# Goals: 1. Flexible API for users to write custom sparse ops +# 2. ctor and member accessor with autograd support +# +# To achieve 1, we need to provide a set of *dangerous* APIs (dangerous in the +# sense that misusing them will break sparse tensor invariant and may result in +# unexpected behavior, e.g., crash). These methods are all prefixed with +# underscore "_" to indicate that they should be used with care. We provide: +# +# + `_indices()`: returns the *raw* indices within the sparse tensor (not just +# sharing storage). Any inplace operation will change the +# actual indices, including t_, set_, as_strided_, resize_, +# etc. +# + `_values()`: returns the *raw* values within the sparse tensor. Similar +# semantics as `_indices()` +# + `_nnz()`: returns the number of non-zero entries. This will always be +# determined by the shapes of indices and values. +# + `_coalesced_(bool)`: inplace sets whether the tensor is coalesced, and +# returns itself. +# +# These methods are very useful in writing new operations, e.g., a custom +# autograd Function. +# +# We also provide other public *safe* APIs: +# + `indices()`: returns a **view** of the indices tensor if the sparse tensor +# is **coalesced**. +# + `values()`: returns a **view** of the values tensor if the containing +# sparse tensor is **coalesced**. +# + `sparse_dim()`: number of sparse dimensions +# + `dense_dim()`: number of dense dimensions +# + `is_coalesced()`: whether the sparse tensor is coalesced +# +# `_indices()` and `_values()` should returns the raw indices and values dense +# tensors within a sparse tensor. They can be quite unsafe with inplace +# operations like `t_()`, and exposes uncoalesced indices and values. The public +# recommended API is `indices()` and `values()`, both of which first check that +# the tensor is coalesced and return views on those tensors. +# +# +# Autograd Support +# ~~~~~~~~~~~~~~~~ +# +# Autograd is supported on `values()` and sparse tensor ctor with indices and +# values tensors. E.g., `torch.sparse_coo_tensor(i, v).values().sum()` is +# differentiable w.r.t. `v`. +# +# NB: The `values()` and `_values()` operators are special in that they are +# layout-aware, i.e., the output depends not just on the data it represents, but +# also on the input layout details (in this case, the `indices` tensor). See +# NOTE [ as_strided Backward and layout-aware/agnostic autograd ] in Functions.cpp +# for discussion on layout-aware vs layout-agnostic autograd. Since PyTorch ops +# operate in the layout-agnostic mode, similar to `as_strided`, backward of +# these two operators need to consider them in a layout-agnostic way: +# + `values()`: +# Input is coalesced. +# We just pretend having `input.indices()` as an additional argument +# `input_indices`, then forward is similar to +# `input.to(kStrided).index_select(input_indices)` regardless of the layout. +# Note that `values()` normally is layout-aware even if we constrain +# ourselves on sparse inputs since it may include all zeros values entries +# as "present" entries. +# + `_values()`: +# Input may be uncoalesced. +# It is not straightforward to construct a layout-agnostic version because +# duplicate indices entries may exist and additional parameterization is +# needed to distribute the value into different values entries. Furthermore, +# this op is intended to provide ways to write custom sparse ops, rather +# than being used in autograd graph, so it is marked as *non-differentiable* +# in derivatives.yaml. +# +# Before reading the following, see NOTE [ Autograd Variable Views ] in +# variable.h for details on views that are tracked by autograd, and views that +# are not. +# +# Moreover, these methods return tensors that share storage with inputs, so we +# mark these methods as view ops to support autograd history tracking. +# The sparse tensor ctor output should technically be view of both input indices +# and values tensors, but currently we only support setting as view of a single +# Variable, so it is only view of the values tensor. +# TODO: clone indices in sparse tensor ctor. +# +# For other methods that return outputs that share storage with inputs, i.e., +# `indices()` and `_indices()`. We mark their outputs as non-differentiable, so +# the view relation is not tracked by autograd, but the version counter is still +# shared. In other words, their outputs are non-differentiable views of the +# sparse tensor. -- func: sparse_coo_tensor(IndexTensor indices, Tensor values, IntList size) -> Tensor # FIXME: would be nicer if TensorOptions was optional based; not adding default arguments for options given # the default would never make sense. - func: sparse_coo_tensor(IntList size, *, TensorOptions options) -> Tensor -- func: sparse_coo_tensor(IndexTensor indices, Tensor values, *, TensorOptions options) -> Tensor +- func: sparse_coo_tensor(IndexTensor indices, Tensor values, *, TensorOptions options={}) -> Tensor + +- func: sparse_coo_tensor(IndexTensor indices, Tensor values, IntList size, *, TensorOptions options={}) -> Tensor + +- func: _sparse_coo_tensor_unsafe(IndexTensor indices, Tensor values, IntList size, *, TensorOptions options={}) -> Tensor + -- func: sparse_coo_tensor(IndexTensor indices, Tensor values, IntList size, *, TensorOptions options) -> Tensor +- func: _sparse_coo_tensor_with_dims(int64_t sparse_dim, int64_t dense_dim, IntList size, *, TensorOptions options) -> Tensor + dispatch: + SparseCPU: new_with_dims_sparse + SparseCUDA: new_with_dims_sparse + requires_tensor: True -- func: _native_sparse_coo_tensor_unsafe(IndexTensor indices, Tensor values, IntList size) -> Tensor - variants: [] +- func: _sparse_coo_tensor_with_dims_and_tensors(int64_t sparse_dim, int64_t dense_dim, IntList size, Tensor indices, Tensor values, *, TensorOptions options) -> Tensor dispatch: - SparseCPU: new_with_tensor_and_size_unsafe_sparse - SparseCUDA: new_with_tensor_and_size_unsafe_sparse + SparseCPU: new_with_dims_and_tensor_sparse + SparseCUDA: new_with_dims_and_tensor_sparse + requires_tensor: True -- func: _sparse_coo_tensor_unsafe(IndexTensor indices, Tensor values, IntList size) -> Tensor -- func: sparse_resize_(Tensor self, IntList size, int64_t sparseDims, int64_t denseDims) -> Tensor +- func: sparse_resize_(Tensor self, IntList size, int64_t sparse_dim, int64_t dense_dim) -> Tensor variants: method dispatch: SparseCPU: sparse_resize_ SparseCUDA: sparse_resize_ + requires_tensor: True -- func: sparse_resize_and_clear_(Tensor self, IntList size, int64_t sparseDims, int64_t denseDims) -> Tensor +- func: sparse_resize_and_clear_(Tensor self, IntList size, int64_t sparse_dim, int64_t dense_dim) -> Tensor variants: method dispatch: SparseCPU: sparse_resize_and_clear_ SparseCUDA: sparse_resize_and_clear_ + requires_tensor: True - func: sparse_mask(Tensor self, SparseTensorRef mask) -> Tensor @@ -1996,6 +2090,7 @@ dispatch: CPU: sparse_mask_cpu CUDA: sparse_mask_cuda + requires_tensor: True - func: to_dense(Tensor self) -> Tensor @@ -2003,33 +2098,38 @@ dispatch: SparseCPU: sparse_to_dense SparseCUDA: sparse_to_dense + requires_tensor: True -- func: _sparseDims(Tensor self) -> int64_t +- func: sparse_dim(Tensor self) -> int64_t variants: method dispatch: - SparseCPU: _sparseDims_sparse - SparseCUDA: _sparseDims_sparse + SparseCPU: sparse_dim_sparse + SparseCUDA: sparse_dim_sparse + requires_tensor: True device_guard: False # legacy method - func: _dimI(Tensor self) -> int64_t - variants: function - dispatch: _sparseDims_sparse + variants: method + dispatch: sparse_dim_sparse + requires_tensor: True device_guard: False -- func: _denseDims(Tensor self) -> int64_t +- func: dense_dim(Tensor self) -> int64_t variants: method dispatch: - SparseCPU: _denseDims_sparse - SparseCUDA: _denseDims_sparse + SparseCPU: dense_dim_sparse + SparseCUDA: dense_dim_sparse + requires_tensor: True device_guard: False # legacy method - func: _dimV(Tensor self) -> int64_t - variants: function - dispatch: _denseDims_sparse + variants: method + dispatch: dense_dim_sparse + requires_tensor: True device_guard: False @@ -2038,6 +2138,7 @@ dispatch: SparseCPU: _nnz_sparse SparseCUDA: _nnz_sparse + requires_tensor: True device_guard: False @@ -2046,6 +2147,7 @@ dispatch: SparseCPU: coalesce_sparse_cpu SparseCUDA: coalesce_sparse_cuda + requires_tensor: True - func: is_coalesced(Tensor self) -> bool @@ -2053,6 +2155,7 @@ dispatch: SparseCPU: is_coalesced_sparse SparseCUDA: is_coalesced_sparse + requires_tensor: True device_guard: False @@ -2061,14 +2164,42 @@ dispatch: SparseCPU: _indices_sparse SparseCUDA: _indices_sparse + requires_tensor: True device_guard: False - - func: _values(Tensor self) -> Tensor variants: method dispatch: SparseCPU: _values_sparse SparseCUDA: _values_sparse + requires_tensor: True + device_guard: False + +# This method doesn't do any check but only directly sets the flag. So it can be +# a bit unsafe. Similar to _indices and _values, this is useful for implementing +# custom sparse operations in Python/C++ extension. +- func: _coalesced_(Tensor self, bool coalesced) -> Tensor + variants: method + dispatch: + SparseCPU: _coalesced_sparse_ + SparseCUDA: _coalesced_sparse_ + requires_tensor: True + device_guard: False + +- func: indices(Tensor self) -> Tensor + variants: method + dispatch: + SparseCPU: indices_sparse + SparseCUDA: indices_sparse + requires_tensor: True + device_guard: False + +- func: values(Tensor self) -> Tensor + variants: method + dispatch: + SparseCPU: values_sparse + SparseCUDA: values_sparse + requires_tensor: True device_guard: False @@ -2076,17 +2207,20 @@ dispatch: SparseCPU: hspmm_out_sparse_cpu SparseCUDA: hspmm_out_sparse_cuda + requires_tensor: True - func: hspmm(Tensor mat1, Tensor mat2) -> Tensor dispatch: SparseCPU: hspmm_sparse_cpu SparseCUDA: hspmm_sparse_cuda + requires_tensor: True - func: copy_sparse_to_sparse_(Tensor self, Tensor src, bool non_blocking=false) -> Tensor variants: function dispatch: SparseCPU: copy_sparse_ SparseCUDA: copy_sparse_ + requires_tensor: True - func: numel(Tensor self) -> int64_t variants: function, method diff --git a/aten/src/ATen/native/sparse/SparseTensor.cpp b/aten/src/ATen/native/sparse/SparseTensor.cpp index 107233ecfccb48..aba245881b8bfc 100644 --- a/aten/src/ATen/native/sparse/SparseTensor.cpp +++ b/aten/src/ATen/native/sparse/SparseTensor.cpp @@ -1,64 +1,74 @@ // Basic functions on sparse tensors #include +#include #include #include #include -#include +#include #include namespace at { namespace native { +using namespace at::sparse; + + /****************************************************************************** * access methods ******************************************************************************/ -int64_t _sparseDims_sparse(const SparseTensor& self) { - return _get_sparse_impl(self)->sparseDims(); +int64_t sparse_dim_sparse(const SparseTensor& self) { + return get_sparse_impl(self)->sparse_dim(); } -int64_t _denseDims_sparse(const SparseTensor& self) { - return _get_sparse_impl(self)->denseDims(); +int64_t dense_dim_sparse(const SparseTensor& self) { + return get_sparse_impl(self)->dense_dim(); } bool is_coalesced_sparse(const SparseTensor& self) { - return _get_sparse_impl(self)->coalesced(); + return get_sparse_impl(self)->coalesced(); } int64_t _nnz_sparse(const SparseTensor& self) { - return _get_sparse_impl(self)->nnz(); + return get_sparse_impl(self)->nnz(); } -// TODO: This is wrong: if nnz == 0 but indices/values is not -// empty then we'll return all the values, even the ones that -// are "masked out" by nnz +// Why are there so many methods to get indices and value? +// See Note [ Sparse: different methods to get indices and values ] in native_functions.yaml Tensor _indices_sparse(const SparseTensor& self) { - auto nnz = self._nnz(); - if (nnz == 0) { - // Narrows don't work on 0-length tensors - // TODO: When we handle zero-size dims correctly, this will work and - // we can remove the special case. - return _get_sparse_impl(self)->indices(); - } - return _get_sparse_impl(self)->indices().narrow(1, 0, nnz); + return get_sparse_impl(self)->indices(); } Tensor _values_sparse(const SparseTensor& self) { - // See indices for some relevant notes - auto nnz = self._nnz(); - if (nnz == 0) { - return _get_sparse_impl(self)->values(); - } - return _get_sparse_impl(self)->values().narrow(0, 0, nnz); + return get_sparse_impl(self)->values(); +} + +Tensor &_coalesced_sparse_(SparseTensor& self, bool coalesced) { + get_sparse_impl(self)->set_coalesced(coalesced); + return self; +} + +Tensor indices_sparse(const Tensor& self) { + AT_CHECK(self.is_coalesced(), + "Cannot get indices on an uncoalesced tensor, please call .coalesce() first"); + return get_sparse_impl(self)->indices().alias(); +} + +Tensor values_sparse(const Tensor& self) { + AT_CHECK(self.is_coalesced(), + "Cannot get values on an uncoalesced tensor, please call .coalesce() first"); + return get_sparse_impl(self)->values().alias(); } /****************************************************************************** * creation methods + * See NOTE [ Sparse: autograd and API ] for details ******************************************************************************/ -/* Empty init */ +/*** Helper methods ***/ + SparseTensor new_sparse(const TensorOptions& options) { AT_ASSERT(!options.is_variable()); AT_ASSERT(options.layout() == kSparse); @@ -72,184 +82,201 @@ SparseTensor new_sparse(const TensorOptions& options) { type_id, scalarTypeToTypeMeta(options.dtype())); } -/*** Helper methods ***/ +/** Actual dispatched creation methods ***/ + +SparseTensor new_with_dims_sparse(int64_t sparse_dim, int64_t dense_dim, ArrayRef size, const TensorOptions& options) { + SparseTensor self = new_sparse(options); + AT_CHECK(size.size() != 0, + "cannot construct sparse tensor with 0 dimensions and no values; you must specify at least 1 dimension if you want to create a sparse tensor with no elements, \ +or you must provide a single-element `values` tensor (e.g. x = torch.sparse_coo_tensor(torch.zeros(0, 1), 12.3, [])) if you want to create a scalar sparse tensor"); + get_sparse_impl(self)->resize_and_clear_(sparse_dim, dense_dim, size); + return self; +} + +// Does NOT make copies of indices and values +SparseTensor new_with_dims_and_tensor_sparse( + int64_t sparse_dim, + int64_t dense_dim, + ArrayRef size, + const LongTensor& indices, + const Tensor& values, + const TensorOptions& options) { + SparseTensor self = new_sparse(options); + get_sparse_impl(self)->resize_(sparse_dim, dense_dim, size); + alias_into_sparse(self, indices, values); + return self; +} + +/** Public creation API that dispatch to methods above **/ + +/** Empty init **/ +Tensor empty_sparse(IntList size, const TensorOptions& options) { + return new_with_dims_sparse(size.size(), 0, size, options); +} + +/* Shape init */ +Tensor sparse_coo_tensor(ArrayRef size, const TensorOptions& options) { + return at::_sparse_coo_tensor_with_dims(size.size(), 0, size, options.layout(at::kSparse)); +} /* Pointer-copy init */ -SparseTensor new_with_tensor_sparse(const LongTensor& indices, const Tensor& values_) { - Tensor values; - if (values_.dim() == 0) { - // Mimic Numpy behavior here and treat it as a 1D tensor - values = values_.expand({1}); - } else { - values = values_; + +// helper +namespace { + static inline Tensor expand_values_if_needed(const Tensor& values) { + // expand + if (values.dim() == 0) { + // Mimic Numpy behavior here and treat it as a 1D tensor + return values.expand({1}); + } else { + return values; + } } +} + +Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_, const TensorOptions& options) { + Tensor values = expand_values_if_needed(values_); - const SparseType& dtype = values.type().toSparse(); + // arg checking + AT_CHECK(!options.has_layout() || options.layout() == kSparse, "expected sparse layout, but got layout ", options.layout()); + // the following checks are redundant because they are also checked in SparseTensorImpl::set_indices_and_values_unsafe + // but we need to ensure them in order to infer the shape. + AT_CHECK(indices.dim() == 2, "indices must be sparse_dim x nnz, but got: ", indices.sizes()) + AT_CHECK(!indices.is_sparse(), "expected indices to be a dense tensor, but got indices of layout ", indices.layout()); // If sizes are not given, it is inferred as max index of each dim. - int64_t sparseDims = indices.size(0); - int64_t denseDims = values.dim() - 1; + int64_t sparse_dim = indices.size(0); + int64_t dense_dim = values.dim() - 1; - std::vector computed_sizes(sparseDims + denseDims); + std::vector computed_sizes(sparse_dim + dense_dim); if (indices.numel() > 0) { // If the indices has elements in it, we infer the minimum sparse dimension sizes // as the max value of each dim in indices. // NB: It used to keepdim. I think that was wrong. + LongTensor min_indices = std::get(indices.min(/* dim */ 1, /* keepdim */ false)); LongTensor computed_indices_sizes = std::get(indices.max(/* dim */ 1, /* keepdim */ false)); computed_indices_sizes.add_(1); // len = max_index + 1 - LongTensor cpu_computed_indices_sizes; - if (computed_indices_sizes.is_cuda()) { - cpu_computed_indices_sizes = at::empty(computed_indices_sizes.sizes(), at::initialTensorOptions().dtype(kLong)); - cpu_computed_indices_sizes.copy_(computed_indices_sizes); - } else { - cpu_computed_indices_sizes = computed_indices_sizes; - } + LongTensor cpu_min_indices = min_indices.to(at::DeviceType::CPU); + LongTensor cpu_computed_indices_sizes = computed_indices_sizes.to(at::DeviceType::CPU); + auto cpu_min_indices_accessor = cpu_min_indices.accessor(); auto cpu_computed_indices_sizes_accessor = cpu_computed_indices_sizes.accessor(); - for (int64_t d = 0; d < sparseDims; d++) { + for (int64_t d = 0; d < sparse_dim; d++) { + int64_t min_index_in_dim = cpu_min_indices_accessor[d]; + AT_CHECK(min_index_in_dim >= 0, + "found negative index ", min_index_in_dim, " for dim ", d); computed_sizes[static_cast(d)] = cpu_computed_indices_sizes_accessor[d]; } } else { // If the indices doesn't have elements in it, there is not enough information // to know what the minimum sparse dimension sizes should be, and in this case // we set them to 0 - for (int64_t d = 0; d < sparseDims; d++) { + for (int64_t d = 0; d < sparse_dim; d++) { computed_sizes[static_cast(d)] = 0; } } - for (int64_t d = 0; d < denseDims; d++) { - computed_sizes[static_cast(sparseDims + d)] = values.size(d+1); + for (int64_t d = 0; d < dense_dim; d++) { + computed_sizes[static_cast(sparse_dim + d)] = values.size(d+1); } - return _new_with_dims_and_tensor_sparse(dtype, sparseDims, denseDims, computed_sizes, indices, values); -} -SparseTensor new_with_dims_and_size_sparse(int64_t sparseDims, int64_t denseDims, ArrayRef size, const TensorOptions& options) { - SparseTensor self = new_sparse(options); - AT_CHECK(size.size() != 0, - "cannot construct sparse tensor with 0 dimensions and no values; you must specify at least 1 dimension if you want to create a sparse tensor with no elements, \ -or you must provide a single-element `values` tensor (e.g. x = torch.sparse_coo_tensor(torch.zeros(0, 1), 12.3, [])) if you want to create a scalar sparse tensor"); - _get_sparse_impl(self)->resize_and_clear_(sparseDims, denseDims, size); - return self; + return at::_sparse_coo_tensor_with_dims_and_tensors( + sparse_dim, dense_dim, computed_sizes, indices, values, values.options().layout(kSparse)); } -Tensor empty_sparse(IntList size, const TensorOptions& options) { - AT_CHECK(size.size() != 0, - "cannot construct sparse tensor with 0 dimensions and no values; you must specify at least 1 dimension if you want to create a sparse tensor with no elements, \ - or you must provide a single-element `values` tensor (e.g. x = torch.sparse_coo_tensor(torch.zeros(0, 1), 12.3, [])) if you want to create a scalar sparse tensor"); - AT_ASSERT(!options.is_variable()); - AT_ASSERT(options.layout() == kSparse); - TensorTypeId type_id; - if (options.device().type() == kCUDA) { - type_id = SparseCUDATensorId(); - } else { - type_id = SparseCPUTensorId(); - } - auto tensor = Tensor(c10::make_intrusive(type_id, scalarTypeToTypeMeta(options.dtype()))); - _get_sparse_impl(tensor)->resize_and_clear_(size.size(), 0, size); - return tensor; -} - -SparseTensor new_with_size_sparse(IntList size, const TensorOptions& options) { - return new_with_dims_and_size_sparse(size.size(), 0, size, options); -} - -// NOTE: new_with_tensor_and_size_unsafe_sparse() differs from new_with_tensor_and_size_sparse() -// in that we don't check whether any indices are out of boundaries of `sizes`, thus avoiding a -// copy from CUDA to CPU. However, this function should ONLY be used where we know that the indices -// are guaranteed to be within bounds. // NB: Got rid of the sizes == NULL case -SparseTensor new_with_tensor_and_size_unsafe_sparse(const LongTensor& indices, const Tensor& values_, ArrayRef sizes) { - Tensor values; - if (values_.dim() == 0) { - // Mimic Numpy behavior here and treat it as a 1D tensor - values = values_.expand({1}); - } else { - values = values_; - } - - const SparseType& dtype = values.type().toSparse(); - - int64_t sparseDims = indices.size(0); - int64_t denseDims = values.dim() - 1; - return _new_with_dims_and_tensor_sparse(dtype, sparseDims, denseDims, sizes, indices, values); -} - -// NB: Got rid of the sizes == NULL case -SparseTensor new_with_tensor_and_size_sparse(const LongTensor& indices, const Tensor& values_, ArrayRef sizes) { - Tensor values; - if (values_.dim() == 0) { - // Mimic Numpy behavior here and treat it as a 1D tensor - values = values_.expand({1}); - } else { - values = values_; - } - - const SparseType& dtype = values.type().toSparse(); - - int64_t sparseDims = indices.size(0); - int64_t denseDims = values.dim() - 1; - AT_CHECK(sizes.size() == sparseDims + denseDims, "number of dimensions must be sparseDims (", sparseDims, ") + denseDims (", denseDims, "), but got ", sizes.size()); - - // Check to make sure all indices are within the boundaries of `sizes` +Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_, ArrayRef size, const TensorOptions& options) { + Tensor values = expand_values_if_needed(values_); + + // arg checking + AT_CHECK(!options.has_layout() || options.layout() == kSparse, "expected sparse layout, but got layout ", options.layout()); + // the following checks are redundant because they are also checked in SparseTensorImpl::set_indices_and_values_unsafe + // but we need to ensure them in order to infer the shape. + AT_CHECK(indices.dim() == 2, "indices must be sparse_dim x nnz, but got: ", indices.sizes()) + AT_CHECK(!indices.is_sparse(), "expected indices to be a dense tensor, but got indices of layout ", indices.layout()); + int64_t sparse_dim = indices.size(0); + int64_t dense_dim = values.dim() - 1; + AT_CHECK(size.size() == sparse_dim + dense_dim, + "number of dimensions must be sparse_dim (", sparse_dim, ") + dense_dim (", dense_dim, "), but got ", size.size()); + + // Check to make sure all indices are within the boundaries of `size` if (indices.numel() > 0) { LongTensor min_indices = std::get(indices.min(/* dim */ 1, /* keepdim */ false)); LongTensor max_indices = std::get(indices.max(/* dim */ 1, /* keepdim */ false)); LongTensor cpu_min_indices, cpu_max_indices; if (indices.is_cuda()) { - cpu_min_indices = at::CPU(kLong).copy(min_indices); - cpu_max_indices = at::CPU(kLong).copy(max_indices); + cpu_min_indices = min_indices.to(at::DeviceType::CPU); + cpu_max_indices = max_indices.to(at::DeviceType::CPU); } else { cpu_min_indices = min_indices; cpu_max_indices = max_indices; } auto cpu_min_indices_accessor = cpu_min_indices.accessor(); auto cpu_max_indices_accessor = cpu_max_indices.accessor(); - for (int64_t d = 0; d < sparseDims; d++) { + for (int64_t d = 0; d < sparse_dim; d++) { // NB: This used to sync ndim times to access each entry; now we copy // everything to CPU first and then access it. int64_t min_index_in_dim = cpu_min_indices_accessor[d]; AT_CHECK(min_index_in_dim >= 0, "found negative index ", min_index_in_dim, " for dim ", d); int64_t max_index_in_dim = cpu_max_indices_accessor[d]; - int64_t dim_size = sizes[static_cast(d)]; + int64_t dim_size = size[static_cast(d)]; AT_CHECK(max_index_in_dim < dim_size, - "sizes is inconsistent with indices: for dim ", d, ", size is ", dim_size, " but found index ", max_index_in_dim); + "size is inconsistent with indices: for dim ", d, ", size is ", dim_size, " but found index ", max_index_in_dim); } } - return _new_with_dims_and_tensor_sparse(dtype, sparseDims, denseDims, sizes, indices, values); + + return at::_sparse_coo_tensor_with_dims_and_tensors( + sparse_dim, dense_dim, size, indices, values, values.options().layout(kSparse)); +} + +// NOTE: _sparse_coo_tensor_unsafe() differs from sparse_coo_tensor() +// in that we don't check whether any indices are out of boundaries of `size`, thus avoiding a +// copy from CUDA to CPU. However, this function should ONLY be used where we know that the indices +// are guaranteed to be within bounds. +// NB: Got rid of the size == NULL case +Tensor _sparse_coo_tensor_unsafe(const Tensor& indices, const Tensor& values_, ArrayRef size, const TensorOptions& options) { + Tensor values = expand_values_if_needed(values_); + + // arg checking + AT_CHECK(!options.has_layout() || options.layout() == kSparse, "expected sparse layout, but got layout ", options.layout()); + + int64_t sparse_dim = indices.size(0); + int64_t dense_dim = values.dim() - 1; + + return at::_sparse_coo_tensor_with_dims_and_tensors( + sparse_dim, dense_dim, size, indices, values, values.options().layout(kSparse)); } // NB: Deleted newWithSizeNd variants SparseTensor clone_sparse(const SparseTensor& self) { - SparseTensor other = new_with_dims_and_size_sparse(self._sparseDims(), self._denseDims(), self.sizes(), self.options()); - _copy_into_sparse(other, _get_sparse_impl(self)->indices(), _get_sparse_impl(self)->values(), true); - _get_sparse_impl(other)->set_coalesced(self.is_coalesced()); - return other; + SparseTensor other = new_with_dims_sparse(self.sparse_dim(), self.dense_dim(), self.sizes(), self.options()); + copy_into_sparse(other, self._indices(), self._values(), true); + return other._coalesced_(self.is_coalesced()); } /****************************************************************************** * reshaping methods ******************************************************************************/ -SparseTensor& sparse_resize_(SparseTensor& self, ArrayRef size, int64_t sparseDims, int64_t denseDims) { - _get_sparse_impl(self)->resize_(sparseDims, denseDims, size); +SparseTensor& sparse_resize_(SparseTensor& self, ArrayRef size, int64_t sparse_dim, int64_t dense_dim) { + get_sparse_impl(self)->resize_(sparse_dim, dense_dim, size); return self; } -SparseTensor& sparse_resize_and_clear_(SparseTensor& self, ArrayRef size, int64_t sparseDims, int64_t denseDims) { - _get_sparse_impl(self)->resize_and_clear_(sparseDims, denseDims, size); +SparseTensor& sparse_resize_and_clear_(SparseTensor& self, ArrayRef size, int64_t sparse_dim, int64_t dense_dim) { + get_sparse_impl(self)->resize_and_clear_(sparse_dim, dense_dim, size); return self; } namespace { bool _is_same_size_as_sparse(const SparseTensor& self, const SparseTensor& src) { - return self._sparseDims() == src._sparseDims() && self._denseDims() == src._denseDims() && self.sizes().equals(src.sizes()); + return self.sparse_dim() == src.sparse_dim() && self.dense_dim() == src.dense_dim() && self.sizes().equals(src.sizes()); } } SparseTensor& resize_as_sparse_(SparseTensor& self, const SparseTensor& src) { if (!_is_same_size_as_sparse(self, src)) { - sparse_resize_(self, src.sizes(), src._sparseDims(), src._denseDims()); + sparse_resize_(self, src.sizes(), src.sparse_dim(), src.dense_dim()); } return self; } @@ -257,17 +284,15 @@ SparseTensor& resize_as_sparse_(SparseTensor& self, const SparseTensor& src) { // NB: Dropped the resizeNd variants Tensor sparse_to_dense(const SparseTensor& self) { - Tensor dst = at::zeros(self.sizes(), self.type().toDense()); + Tensor dst = at::zeros(self.sizes(), self.options().layout(kStrided)); return dst.add_(self); } SparseTensor& copy_sparse_(SparseTensor& self, const SparseTensor& src, bool non_blocking) { - if (isSameTensor(self, src)) return self; - _get_sparse_impl(self)->resize_(src._sparseDims(), src._denseDims(), src.sizes()); - // NB: This seems to copy the underlying full indices/values buffer - _copy_into_sparse(self, _get_sparse_impl(src)->indices(), _get_sparse_impl(src)->values(), non_blocking); - _get_sparse_impl(self)->set_coalesced(src.is_coalesced()); - return self; + if (is_same_tensor(self, src)) return self; + get_sparse_impl(self)->resize_(src.sparse_dim(), src.dense_dim(), src.sizes()); + copy_into_sparse(self, src._indices(), src._values(), non_blocking); + return self._coalesced_(src.is_coalesced()); } SparseTensor coalesce_sparse_cpu(const SparseTensor& self) { @@ -282,31 +307,24 @@ SparseTensor coalesce_sparse_cpu(const SparseTensor& self) { // we should keep the original tensor intact and do coalesce on a copy of the tensor if (self._nnz() < 2) { SparseTensor dst = self.clone(); - _get_sparse_impl(dst)->set_coalesced(true); + dst._coalesced_(true); return dst; } LongTensor indices = self._indices(); Tensor values = self._values().contiguous(); - int64_t sparseDims = self._sparseDims(); - int64_t denseDims = self._denseDims(); + int64_t sparse_dim = self.sparse_dim(); + int64_t dense_dim = self.dense_dim(); int64_t nnz = self._nnz(); - LongTensor indices_scalar = at::zeros({nnz}, kLong); - - int64_t factor = 1; - for (int64_t d = sparseDims - 1; d >= 0; d--) { - LongTensor indices_slice = indices.select(0, d); - indices_scalar.add_(indices_slice, factor); // cadd is swapped args - factor *= self.size(d); - } + LongTensor indices_scalar = flatten_indices(indices, self.sizes()); SparseTensor dst = new_sparse(self.options()); - _get_sparse_impl(dst)->resize_(sparseDims, denseDims, self.sizes()); + get_sparse_impl(dst)->resize_(sparse_dim, dense_dim, self.sizes()); // TODO: is there a more idiomatic way to do this? LongTensor newIndices = at::empty(indices.sizes(), indices.options()); Tensor newValues = at::empty(values.sizes(), values.options()); - _alias_into_sparse(dst, newIndices, newValues); + alias_into_sparse(dst, newIndices, newValues); LongTensor indicesBuffer; LongTensor indicesPermutation; @@ -333,7 +351,7 @@ SparseTensor coalesce_sparse_cpu(const SparseTensor& self) { } } else { ++i; - for (int64_t d = 0; d < sparseDims; d++) { + for (int64_t d = 0; d < sparse_dim; d++) { newIndicesAccessor[d][i] = indicesAccessor[d][pos]; } if (values.numel() > 0) { // if values is an empty tensor, there are no elements to copy @@ -344,8 +362,8 @@ SparseTensor coalesce_sparse_cpu(const SparseTensor& self) { } }); - _get_sparse_impl(dst)->set_coalesced(true); - _get_sparse_impl(dst)->set_nnz_and_narrow(i + 1); + dst._coalesced_(true); + get_sparse_impl(dst)->set_nnz_and_narrow(i + 1); return dst; } @@ -363,14 +381,15 @@ SparseTensor& sparse_mask_out_cpu(SparseTensor& r, const Tensor& t, const Sparse return r; } int64_t dim = t.dim(); - int64_t sparseDims = mask._sparseDims(); + int64_t sparse_dim = mask.sparse_dim(); LongTensor mask_indices = mask._indices(); Tensor mask_values = mask._values(); Tensor r_values = at::empty(mask_values.sizes(), r._values().options()); - _alias_into_sparse(r, mask_indices.clone(), r_values); - _get_sparse_impl(r)->set_coalesced(mask.is_coalesced()); + alias_into_sparse(r, mask_indices.clone(), r_values); + r._coalesced_(mask.is_coalesced()); int64_t r_nnz = mask._nnz(); - _get_sparse_impl(r)->set_nnz_and_narrow(r_nnz); + get_sparse_impl(r)->set_nnz_and_narrow(r_nnz); + if (t.numel() == 0) { // if t is an empty tensor, there is no need to mask its elements return r; } @@ -378,11 +397,11 @@ SparseTensor& sparse_mask_out_cpu(SparseTensor& r, const Tensor& t, const Sparse // NB: Relies on mask._nnz() == 0 test above auto mask_indices_accessor = mask_indices.accessor(); - if (dim > sparseDims) { + if (dim > sparse_dim) { // NB: This used to reuse buffers, but I deoptimized it for (int64_t i = 0; i < r_nnz; i++) { Tensor srcBuffer = t; - for (int64_t d = 0; d < sparseDims; d++) { + for (int64_t d = 0; d < sparse_dim; d++) { srcBuffer = srcBuffer.select(0, mask_indices_accessor[d][i]); } Tensor dstBuffer = r_values.select(0, i); @@ -398,7 +417,7 @@ SparseTensor& sparse_mask_out_cpu(SparseTensor& r, const Tensor& t, const Sparse scalar_t* t_ptr = t.data(); for (int64_t i = 0; i < r_nnz; i++) { int64_t idx = 0; - for (int64_t d = 0; d < sparseDims; d++) { + for (int64_t d = 0; d < sparse_dim; d++) { idx += mask_indices_accessor[d][i] * t.stride(d); } scalar_t val = t_ptr[idx]; diff --git a/aten/src/ATen/native/sparse/SparseTensorMath.cpp b/aten/src/ATen/native/sparse/SparseTensorMath.cpp index 053535a976c46a..994a3647d74a12 100644 --- a/aten/src/ATen/native/sparse/SparseTensorMath.cpp +++ b/aten/src/ATen/native/sparse/SparseTensorMath.cpp @@ -3,12 +3,14 @@ #include #include #include -#include +#include #include namespace at { namespace native { +using namespace at::sparse; + // -------------------------------------------------------------------- // Utility functions // -------------------------------------------------------------------- @@ -43,9 +45,8 @@ namespace { // hummu hummu SparseTensor& zero_sparse_(SparseTensor& self) { AT_ASSERT(self.is_sparse()); - at::zeros_out(self, _get_sparse_impl(self)->sizes()); - _get_sparse_impl(self)->set_coalesced(true); // NB: This is new - return self; + at::zeros_out(self, get_sparse_impl(self)->sizes()); + return self._coalesced_(true); } // NB: Don't need zeros, zeros_like, already implemented in TensorFactories @@ -65,16 +66,17 @@ SparseTensor& mul_out_sparse_zerodim(SparseTensor& r, const SparseTensor& t, con AT_ASSERT(t.is_sparse()); AT_ASSERT(value.dim() == 0); - if (isSameTensor(r, t)) { + if (is_same_tensor(r, t)) { r._values().mul_(value); } else { r.resize_as_(t); - r._indices().resize_as_(t._indices()); - r._indices().copy_(t._indices()); + auto indices = r._indices(); + indices.resize_as_(t._indices()); + indices.copy_(t._indices()); Tensor r_values = r._values(); // Sigh... needed because mul_out takes Tensor& at::mul_out(r_values, t._values(), value); - _get_sparse_impl(r)->set_nnz_and_narrow(t._nnz()); - _get_sparse_impl(r)->set_coalesced(t.is_coalesced()); + get_sparse_impl(r)->set_nnz_and_narrow(t._nnz()); + r._coalesced_(t.is_coalesced()); } return r; } @@ -93,7 +95,7 @@ SparseTensor& log1p_out_sparse(SparseTensor& r, const SparseTensor& t) { AT_ASSERT(r.is_sparse()); AT_ASSERT(t.is_sparse()); - if (isSameTensor(r, t)) { + if (is_same_tensor(r, t)) { // don't have in-place log1p for uncoalesced input because coalesce() is not in-place AT_CHECK( r.is_coalesced(), "log1p: in-place on uncoalesced tensors is not supported yet!"); @@ -125,14 +127,13 @@ SparseTensor& pow_out_sparse_scalar(SparseTensor& r, const SparseTensor& t_, Sca SparseTensor t = t_.coalesce(); r.resize_as_(t); - r._indices().resize_as_(t._indices()); - r._indices().copy_(t._indices()); + auto indices = r._indices(); + indices.resize_as_(t._indices()); + indices.copy_(t._indices()); Tensor r_values = r._values(); // Sigh... needed because pow_out takes Tensor& at::pow_out(r_values, t._values(), value); - _get_sparse_impl(r)->set_nnz_and_narrow(t._nnz()); - _get_sparse_impl(r)->set_coalesced(t.is_coalesced()); - - return r; + get_sparse_impl(r)->set_nnz_and_narrow(t._nnz()); + return r._coalesced_(t.is_coalesced()); } SparseTensor pow_sparse_scalar(const SparseTensor& t, Scalar value) { @@ -150,16 +151,17 @@ SparseTensor& div_out_sparse_zerodim(SparseTensor& r, const SparseTensor& t, con AT_ASSERT(t.is_sparse()); AT_ASSERT(value.dim() == 0); - if (isSameTensor(r, t)) { + if (is_same_tensor(r, t)) { r._values().div_(value); } else { r.resize_as_(t); - r._indices().resize_as_(t._indices()); - r._indices().copy_(t._indices()); + auto indices = r._indices(); + indices.resize_as_(t._indices()); + indices.copy_(t._indices()); Tensor r_values = r._values(); // Sigh... needed because div_out takes Tensor& at::div_out(r_values, t._values(), value); - _get_sparse_impl(r)->set_nnz_and_narrow(t._nnz()); - _get_sparse_impl(r)->set_coalesced(t.is_coalesced()); + get_sparse_impl(r)->set_nnz_and_narrow(t._nnz()); + r._coalesced_(t.is_coalesced()); } return r; } @@ -199,20 +201,20 @@ SparseTensor& add_out_sparse_cpu(SparseTensor& r, const SparseTensor& t, const S return mul_out_sparse_scalar(r, src, value); } - AT_CHECK(_is_same_density(t, src), "add: expected 'self' and 'other' to have same density, but 'self' has ", t._sparseDims(), " sparse dimensions while 'other' has ", src._sparseDims(), " sparse dimensions"); + AT_CHECK(is_same_density(t, src), "add: expected 'self' and 'other' to have same density, but 'self' has ", t.sparse_dim(), " sparse dimensions while 'other' has ", src.sparse_dim(), " sparse dimensions"); // saving those because they can be overwritten when doing in-place operations int64_t t_nnz = t._nnz(), s_nnz = src._nnz(), max_nnz = t_nnz + s_nnz; bool t_coalesced = t.is_coalesced(), s_coalesced = src.is_coalesced(); - int64_t sparseDims = src._sparseDims(); + int64_t sparse_dim = src.sparse_dim(); LongTensor t_indices = t._indices(); Tensor t_values = t._values(); LongTensor src_indices = src._indices(); Tensor s_values = src._values(); - LongTensor r_indices = at::empty({sparseDims, max_nnz}, t_indices.options()); - Tensor r_values = _new_values_with_size_of(s_values, max_nnz).zero_(); + LongTensor r_indices = at::empty({sparse_dim, max_nnz}, t_indices.options()); + Tensor r_values = new_values_with_size_of(s_values, max_nnz).zero_(); r.resize_as_(src); - _get_sparse_impl(r)->set_indices_and_values_unsafe(r_indices, r_values); + get_sparse_impl(r)->set_indices_and_values_unsafe(r_indices, r_values); int64_t blockSize = r_values.stride(0); int64_t cmp, d; @@ -236,7 +238,7 @@ SparseTensor& add_out_sparse_cpu(SparseTensor& r, const SparseTensor& t, const S cmp = 1; } else { cmp = 0; - for (d = 0; d < sparseDims; d++) { + for (d = 0; d < sparse_dim; d++) { if (t_indices_accessor[d][t_i] < src_indices_accessor[d][s_i]) { cmp = 1; break; @@ -248,7 +250,7 @@ SparseTensor& add_out_sparse_cpu(SparseTensor& r, const SparseTensor& t, const S } } if (cmp >= 0) { - for (d = 0; d < sparseDims; d++) { + for (d = 0; d < sparse_dim; d++) { r_indices_accessor[d][r_i] = t_indices_accessor[d][t_i]; } if (t_values.numel() > 0) { // We add all elements from t_values to r_values only if t_values is not an empty tensor @@ -259,7 +261,7 @@ SparseTensor& add_out_sparse_cpu(SparseTensor& r, const SparseTensor& t, const S t_i++; } if (cmp <= 0) { - for (d = 0; d < sparseDims; d++) { + for (d = 0; d < sparse_dim; d++) { r_indices_accessor[d][r_i] = src_indices_accessor[d][s_i]; } if (s_values.numel() > 0) { // We add all elements from s_values to r_values only if s_values is not an empty tensor @@ -274,14 +276,12 @@ SparseTensor& add_out_sparse_cpu(SparseTensor& r, const SparseTensor& t, const S } ); - _get_sparse_impl(r)->set_nnz_and_narrow(r_i); + get_sparse_impl(r)->set_nnz_and_narrow(r_i); // TODO: I think it may be possible to track inside the loop and // detect when we are uncoalesced (e.g., by observing that an // index goes backwards) which may be more precise than using the // coalesced flag here. But this is easy. - _get_sparse_impl(r)->set_coalesced(t_coalesced && s_coalesced); - - return r; + return r._coalesced_(t_coalesced && s_coalesced); } // -------------------------------------------------------------------- @@ -302,7 +302,7 @@ void add_dense_sparse_worker_cpu(Tensor& r, Scalar value, const SparseTensor& sp #pragma omp parallel for private(k) for (k = 0; k < sparse._nnz(); k++) { int64_t index = r.storage_offset(); - for (int64_t d = 0; d < sparse._sparseDims(); d++) { + for (int64_t d = 0; d < sparse.sparse_dim(); d++) { index += r.stride(d) * indices_accessor[d][k]; } r_ptr[index] += cast_value * values_accessor[k]; @@ -329,9 +329,9 @@ Tensor& add_out_dense_sparse_cpu(Tensor& r, const Tensor& dense, SparseTensorRef LongTensor indices = sparse._indices(); Tensor values = sparse._values(); int64_t nDim = dense.dim(); - int64_t nDimI = sparse._sparseDims(); + int64_t nDimI = sparse.sparse_dim(); - if (!isSameTensor(r, dense)) r.copy_(dense); + if (!is_same_tensor(r, dense)) r.copy_(dense); if (sparse._nnz() == 0) return r; // accessors rely on nnz test @@ -339,7 +339,7 @@ Tensor& add_out_dense_sparse_cpu(Tensor& r, const Tensor& dense, SparseTensorRef auto indices_accessor = indices.accessor(); for (int64_t k = 0; k < sparse._nnz(); k++) { Tensor dstBuffer = r; - for (int64_t d = 0; d < sparse._sparseDims(); d++) { + for (int64_t d = 0; d < sparse.sparse_dim(); d++) { dstBuffer = dstBuffer.select(0, indices_accessor[d][k]); } Tensor srcBuffer = values.select(0, k); @@ -383,15 +383,15 @@ SparseTensor& mul_out_sparse_cpu(SparseTensor& r, const Tensor& t_, const Tensor // saving those because they can be overwritten when doing in-place operations int64_t t_nnz = t._nnz(), s_nnz = src._nnz(); int64_t max_nnz = std::min(t_nnz, s_nnz); // multiply by zero is zero, and can be dropped - int64_t sparseDims = src._sparseDims(); + int64_t sparse_dim = src.sparse_dim(); LongTensor t_indices = t._indices(); Tensor t_values = t._values(); LongTensor src_indices = src._indices(); Tensor s_values = src._values(); - LongTensor r_indices = at::empty({sparseDims, max_nnz}, t_indices.options()); - Tensor r_values = _new_values_with_size_of(t_values, max_nnz).zero_(); + LongTensor r_indices = at::empty({sparse_dim, max_nnz}, t_indices.options()); + Tensor r_values = new_values_with_size_of(t_values, max_nnz).zero_(); r.resize_as_(src); - _get_sparse_impl(r)->set_indices_and_values_unsafe(r_indices, r_values); + get_sparse_impl(r)->set_indices_and_values_unsafe(r_indices, r_values); int64_t match, d; int64_t r_i = 0, t_i = 0, s_i = 0; @@ -406,7 +406,7 @@ SparseTensor& mul_out_sparse_cpu(SparseTensor& r, const Tensor& t_, const Tensor // indices were found. auto index_preamble = [&]() { match = 1; - for (d = 0; d < sparseDims; d++) { + for (d = 0; d < sparse_dim; d++) { if (t_indices_accessor[d][t_i] < src_indices_accessor[d][s_i]) { t_i++; match = 0; @@ -419,7 +419,7 @@ SparseTensor& mul_out_sparse_cpu(SparseTensor& r, const Tensor& t_, const Tensor } } if (!match) return false; - for (d = 0; d < sparseDims; d++) { + for (d = 0; d < sparse_dim; d++) { r_indices_accessor[d][r_i] = t_indices_accessor[d][t_i]; } return true; @@ -451,10 +451,8 @@ SparseTensor& mul_out_sparse_cpu(SparseTensor& r, const Tensor& t_, const Tensor ); } - _get_sparse_impl(r)->set_nnz_and_narrow(r_i); - _get_sparse_impl(r)->set_coalesced(true); - - return r; + get_sparse_impl(r)->set_nnz_and_narrow(r_i); + return r._coalesced_(true); } // -------------------------------------------------------------------- @@ -472,7 +470,7 @@ void s_addmm_out_sparse_dense_worker(int64_t nnz, int64_t dim_i, int64_t dim_j, if (cast_beta == 0) { r.zero_(); } else if (cast_beta == 1) { - if (!isSameTensor(r, t)) { + if (!is_same_tensor(r, t)) { r.copy_(t); } } else { @@ -523,8 +521,8 @@ Tensor& s_addmm_out_sparse_dense_cpu( AT_CHECK(!sparse_.is_cuda(), "addmm: expected 'mat1' to be a CPU tensor, but got a CUDA tensor"); AT_CHECK(!dense.is_cuda(), "addmm: expected 'mat2' to be a CPU tensor, but got a CUDA tensor"); - AT_CHECK(sparse_._sparseDims() == 2, "addmm: matrices expected, got ", sparse_._sparseDims(), "D tensor"); - AT_CHECK(sparse_._denseDims() == 0, "addmm: scalar values expected, got ", sparse_._denseDims(), "D values"); + AT_CHECK(sparse_.sparse_dim() == 2, "addmm: matrices expected, got ", sparse_.sparse_dim(), "D tensor"); + AT_CHECK(sparse_.dense_dim() == 0, "addmm: scalar values expected, got ", sparse_.dense_dim(), "D values"); AT_CHECK(dense.dim() == 2, "addmm: matrices expected, got ", dense.dim(), "D tensor"); SparseTensor sparse = sparse_.coalesce(); @@ -599,10 +597,10 @@ SparseTensor& hspmm_out_sparse_cpu(SparseTensor& r, const SparseTensor& sparse_, AT_CHECK(!r.is_cuda(), "hspmm: expected 'out' to be CPU tensor, but got CUDA tensor"); AT_CHECK(!dense.is_cuda(), "hspmm: expected 'other' to be a CPU tensor, but got a CUDA tensor"); - AT_CHECK(sparse_._sparseDims() == 2, - "hspmm: Argument #2: matrices expected, got ", sparse_._sparseDims(), "D tensor"); - AT_CHECK(sparse_._denseDims() == 0, - "hspmm: Argument #2: scalar values expected, got ", sparse_._denseDims(), "D values"); + AT_CHECK(sparse_.sparse_dim() == 2, + "hspmm: Argument #2: matrices expected, got ", sparse_.sparse_dim(), "D tensor"); + AT_CHECK(sparse_.dense_dim() == 0, + "hspmm: Argument #2: scalar values expected, got ", sparse_.dense_dim(), "D values"); AT_CHECK(dense.dim() == 2, "hspmm: Argument #3: matrices expected, got ", dense.dim(), "D tensor"); @@ -613,7 +611,7 @@ SparseTensor& hspmm_out_sparse_cpu(SparseTensor& r, const SparseTensor& sparse_, AT_CHECK(dense.size(0) == k, "hspmm: Argument #3: Expected dim 0 size ", k, ", got ", dense.size(0)); - _get_sparse_impl(r)->raw_resize_(1, 1, {m, n}); + get_sparse_impl(r)->raw_resize_(1, 1, {m, n}); SparseTensor sparse = sparse_.coalesce(); @@ -649,13 +647,13 @@ SparseTensor& hspmm_out_sparse_cpu(SparseTensor& r, const SparseTensor& sparse_, indices.resize_({1, outNnz}); Tensor values = at::empty({outNnz, n}, dense.options()); - std::vector new_size = _get_sparse_impl(newSparse)->sizes().vec(); + std::vector new_size = get_sparse_impl(newSparse)->sizes().vec(); new_size[0] = outNnz; - _get_sparse_impl(newSparse)->raw_resize_(_get_sparse_impl(newSparse)->sparseDims(), _get_sparse_impl(newSparse)->denseDims(), new_size); + get_sparse_impl(newSparse)->raw_resize_(get_sparse_impl(newSparse)->sparse_dim(), get_sparse_impl(newSparse)->dense_dim(), new_size); // Compute output values tensor with sparse * dense multiplication s_addmm_out_sparse_dense_cpu(values, values, newSparse, dense, 0, alpha); - _get_sparse_impl(r)->set_indices_and_values_unsafe(indices, values); + get_sparse_impl(r)->set_indices_and_values_unsafe(indices, values); return r; } @@ -683,10 +681,10 @@ SparseTensor& _sspaddmm_out_cpu( AT_CHECK(!sparse_.is_cuda(), "sspaddmm: expected 'mat1' to be a CPU tensor, but got a CUDA tensor"); AT_CHECK(!dense.is_cuda(), "sspaddmm: expected 'mat2' to be a CPU tensor, but got a CUDA tensor"); - AT_CHECK(sparse_._sparseDims() == 2, - "sspaddmm: Argument #2: matrices expected, got ", sparse_._sparseDims(), "D tensor"); - AT_CHECK(sparse_._denseDims() == 0, - "sspaddmm: Argument #2: scalar values expected, got ", sparse_._denseDims(), "D values"); + AT_CHECK(sparse_.sparse_dim() == 2, + "sspaddmm: Argument #2: matrices expected, got ", sparse_.sparse_dim(), "D tensor"); + AT_CHECK(sparse_.dense_dim() == 0, + "sspaddmm: Argument #2: scalar values expected, got ", sparse_.dense_dim(), "D values"); AT_CHECK(dense.dim() == 2, "sspaddmm: Argument #2: matrices expected, got ", dense.dim(), "D tensor"); @@ -699,7 +697,7 @@ SparseTensor& _sspaddmm_out_cpu( // NB: This has to occur before the checks, because r may alias t. // See test_saddmm - _get_sparse_impl(r)->raw_resize_(2, 0, {dim_i, dim_k}); + get_sparse_impl(r)->raw_resize_(2, 0, {dim_i, dim_k}); AT_CHECK(dense.size(0) == dim_j, "sspaddmm: Argument #3: Expected dim 0 size ", dim_j, ", got ", dense.size(0)); @@ -774,8 +772,8 @@ SparseTensor& _sspaddmm_out_cpu( ); // to avoid a clone - _get_sparse_impl(r)->set_indices_and_values_unsafe(newi, newv); - _get_sparse_impl(r)->set_nnz_and_narrow(p); + get_sparse_impl(r)->set_indices_and_values_unsafe(newi, newv); + get_sparse_impl(r)->set_nnz_and_narrow(p); return r; } diff --git a/aten/src/ATen/native/sparse/SparseUtils.h b/aten/src/ATen/native/sparse/SparseUtils.h deleted file mode 100644 index 2e48fe41238c22..00000000000000 --- a/aten/src/ATen/native/sparse/SparseUtils.h +++ /dev/null @@ -1,120 +0,0 @@ -#include -#include - -#include - -namespace at { namespace native { - -// Just for documentary purposes -using SparseTensor = Tensor; -using LongTensor = Tensor; -using IntTensor = Tensor; -using SparseType = Type; - -namespace { - -// This is an internal utility function for getting at the SparseTensorImpl, -// so that we can write sparse tensor specific accessors for special fields -// in SparseTensor. You should only use this for writing low level -// setters/getters for SparseTensorImpl fields; otherwise, you should use -// the low level setters/getters that were implemented using this. -// -// This may be called repeatedly, so make sure it's pretty cheap. -SparseTensorImpl* _get_sparse_impl(const SparseTensor& self) { - if (!self.is_sparse()) AT_ERROR("_internal_get_SparseTensorImpl: not a sparse tensor"); - return static_cast(self.unsafeGetTensorImpl()); -} - -// Port of the old THCSTensor_(checkGPU), but it doesn't really belong here -// because it is more general -// NB: I dropped kernelP2PEnabled support -// NB: This only works if the tensors are KNOWN to be CUDA. -// TODO: Generalize it so it works on CPU as well -inline bool _check_device(ArrayRef ts) { - if (ts.empty()) { - return true; - } - const Tensor& ref_t = ts.front(); - int64_t curDevice = current_device(); - for (const Tensor& t : ts) { - if (t.get_device() != curDevice) return false; - } - return true; -} - -// Takes indices and values and directly puts them into the sparse tensor, no -// copy. This used to be called THSTensor_(_move) -inline void _alias_into_sparse(const SparseTensor& self, const LongTensor& indices, const Tensor& values) { - _get_sparse_impl(self)->set_indices_and_values_unsafe(indices, values); -} - -// Take indices and values and makes a (data) copy of them to put into the sparse -// indices/values. This used to be called THSTensor_(_set) -inline void _copy_into_sparse(const SparseTensor& self, const LongTensor& indices, const Tensor& values, bool non_blocking) { - _alias_into_sparse(self, self._indices().type().copy(indices, non_blocking), self._values().type().copy(values, non_blocking)); -} - -// Does NOT make copies of indices/values -inline SparseTensor _new_with_dims_and_tensor_sparse( - const SparseType& dtype, - int64_t sparseDims, - int64_t denseDims, - ArrayRef sizes, - const LongTensor& indices, - const Tensor& values) { - SparseTensor self = at::empty({0}, dtype.options()); - _get_sparse_impl(self)->resize_(sparseDims, denseDims, sizes); - _alias_into_sparse(self, indices, values); - return self; -} - -// TODO: put this into the public API -inline bool isSameTensor(const Tensor& lhs, const Tensor& rhs) { - return lhs.unsafeGetTensorImpl() == rhs.unsafeGetTensorImpl(); -} - -inline bool _is_same_density(const SparseTensor& self, const SparseTensor& src) { - return self._sparseDims() == src._sparseDims() && self._denseDims() == src._denseDims(); -} - -// if forceClone is true, the result will forced to be a clone of self. -inline LongTensor _newFlattenedIndices(const SparseTensor& self, bool forceClone) { - LongTensor indices = self._indices(); - int64_t sparseDims = self._sparseDims(); - if (sparseDims == 1) { - if (forceClone) { - return indices.clone(); - } else { - return indices; - } - } else { - // FIXME TH_INDEX_BASE - int64_t factor = 1; - LongTensor indices1D = at::empty({1, self._nnz()}, indices.options()); - indices1D.fill_(TH_INDEX_BASE); - for (int64_t d = sparseDims - 1; d >= 0; d--) { - indices1D.add_(indices.select(0, d), factor); - if (TH_INDEX_BASE != 0) { - indices1D.add_(-TH_INDEX_BASE); - } - factor *= self.size(d); - } - return indices1D; - } -} - -// Give us a new values tensor, with the same dimensionality -// as 'values' but with a new number of non-zero elements. -// TODO: Expose this for real in ATen, some day? -// NB: Doesn't preserve data. -inline Tensor _new_values_with_size_of(const Tensor& values, int64_t nnz) { - std::vector size = values.sizes().vec(); - size[0] = nnz; - return at::empty(size, values.options()); -} - - - -} // anonymous namespace - -}} // namespace at::native diff --git a/aten/src/ATen/native/sparse/cuda/SparseCUDAApplyUtils.cuh b/aten/src/ATen/native/sparse/cuda/SparseCUDAApplyUtils.cuh index 44bd3ab8f7060b..3de5ced0edfaf4 100644 --- a/aten/src/ATen/native/sparse/cuda/SparseCUDAApplyUtils.cuh +++ b/aten/src/ATen/native/sparse/cuda/SparseCUDAApplyUtils.cuh @@ -38,6 +38,8 @@ __device__ void applyOp3( } } +// Assume both dense and values are contiguous. +// Currently only used in add_out_dense_sparse_cuda: add(dense, sparse, scalar). template __global__ void sparseElementwiseKernel( Op op, @@ -45,23 +47,26 @@ __global__ void sparseElementwiseKernel( TensorInfo indices, TensorInfo values, const IndexType nnz) { - IndexType indskip = indices.strides[0]; - IndexType valueSize = values.strides[0]; + IndexType ind_skip = indices.strides[0]; + IndexType ind_nnz_skip = indices.strides[1]; + IndexType value_size = values.strides[0]; // numel of each slice in values for (IndexType linearId = blockIdx.x; linearId < nnz; linearId += gridDim.x) { IndexType index = 0; for (IndexType d = 0; d < indices.sizes[0]; d++) { - index = dense.sizes[d] * index + indices.data[d * indskip + linearId]; + index = dense.sizes[d] * index + indices.data[d * ind_skip + linearId * ind_nnz_skip]; } - Real *dst = dense.data + index * valueSize; - Real *src = values.data + linearId * valueSize; - for (IndexType linearId2 = threadIdx.x; linearId2 < valueSize; linearId2 += blockDim.x) { + Real *dst = dense.data + index * value_size; + Real *src = values.data + linearId * value_size; + for (IndexType linearId2 = threadIdx.x; linearId2 < value_size; linearId2 += blockDim.x) { op(dst + linearId2, src + linearId2); } } } +// Assume dense is contiguous. +// Currently only used in add_out_dense_sparse_cuda: add(dense, sparse, scalar). template __global__ void sparseElementwiseKernelScalar( Op op, @@ -69,15 +74,17 @@ __global__ void sparseElementwiseKernelScalar( TensorInfo indices, TensorInfo values, const IndexType nnz) { - IndexType indskip = indices.strides[0]; + IndexType ind_skip = indices.strides[0]; + IndexType ind_nnz_skip = indices.strides[1]; + IndexType value_skip = values.strides[0]; for (IndexType linearId = blockIdx.x * blockDim.x + threadIdx.x; linearId < nnz; linearId += gridDim.x * blockDim.x) { IndexType index = 0; for (IndexType d = 0; d < indices.sizes[0]; d++) { - index = dense.sizes[d] * index + indices.data[d * indskip + linearId]; + index = dense.sizes[d] * index + indices.data[d * ind_skip + linearId * ind_nnz_skip]; } - op(dense.data + index, values.data + linearId); + op(dense.data + index, values.data + linearId * value_skip); } } diff --git a/aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cpp b/aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cpp index ab9fb15c628735..1da0345e3c36f2 100644 --- a/aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cpp +++ b/aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cpp @@ -1,18 +1,20 @@ #include #include -#include +#include namespace at { namespace native { +using namespace at::sparse; + SparseTensor& sparse_mask_out_cuda(SparseTensor& r, const Tensor& t, const SparseTensor& mask) { AT_CHECK(mask.is_coalesced(), "sparse_mask: mask is uncoalesced"); AT_CHECK(mask.sizes().equals(t.sizes()), "sparse_mask: operands have incompatible sizes; self has size ", t.sizes(), " but mask has size ", mask.sizes()); - AT_ASSERT(t.is_cuda()); // dispatch argument + AT_ASSERT(t.is_cuda()); // dispatch argument AT_CHECK(mask.is_cuda(), "sparse_mask: expected 'mask' to be CUDA, but got CPU"); AT_CHECK(r.is_cuda(), "sparse_mask: expected 'out' to be CUDA, but got CPU"); - AT_CHECK(_check_device({r, t, mask}), + AT_CHECK(check_device({r, t, mask}), "sparse_mask: arguments are located on different devices; self is on device ", t.get_device(), ", mask is on device ", mask.get_device(), ", out is on device ", r.get_device()); resize_as_sparse_(r, mask); @@ -22,25 +24,24 @@ SparseTensor& sparse_mask_out_cuda(SparseTensor& r, const Tensor& t, const Spars LongTensor mask_indices = mask._indices(); Tensor mask_values = mask._values(); Tensor r_values = at::empty(mask_values.sizes(), r._values().options()); - _alias_into_sparse(r, mask_indices.clone(), r_values); - _get_sparse_impl(r)->set_coalesced(mask.is_coalesced()); - _get_sparse_impl(r)->set_nnz_and_narrow(mask._nnz()); + alias_into_sparse(r, mask_indices.clone(), r_values); + r._coalesced_(mask.is_coalesced()); if (t.numel() == 0) { // if t is an empty tensor, there is no need to mask its elements return r; } LongTensor indices = at::zeros({mask._nnz()}, mask_indices.options()); - for (int64_t d = 0; d < mask._sparseDims(); d++) { + for (int64_t d = 0; d < mask.sparse_dim(); d++) { indices.mul_(mask.size(d)); // This used to use a buffer but I deoptimized it indices.add_(mask_indices.select(0, d)); } - std::vector view_size(1 + mask._denseDims()); + std::vector view_size(1 + mask.dense_dim()); view_size[0] = -1; - for (int64_t d = 0; d < mask._denseDims(); d++) { - view_size[d + 1] = mask.size(mask._sparseDims() + d); + for (int64_t d = 0; d < mask.dense_dim(); d++) { + view_size[d + 1] = mask.size(mask.sparse_dim() + d); } Tensor t_view = t.view(view_size); diff --git a/aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cu b/aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cu index 23cc9a944a35c1..401d3833fd4c38 100644 --- a/aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cu +++ b/aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cu @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include @@ -24,6 +24,8 @@ namespace at { namespace native { +using namespace at::sparse; + SparseTensor coalesce_sparse_cuda(const SparseTensor& self) { int64_t nnz = self._nnz(); if (self.is_coalesced()) { @@ -33,7 +35,7 @@ SparseTensor coalesce_sparse_cuda(const SparseTensor& self) { // we should keep the original tensor intact and do coalesce on a copy of the tensor if (nnz < 2) { SparseTensor dst = self.clone(); - _get_sparse_impl(dst)->set_coalesced(true); + dst._coalesced_(true); return dst; } @@ -45,15 +47,13 @@ SparseTensor coalesce_sparse_cuda(const SparseTensor& self) { // For indices, a simple sort + unique suffices // For values, we use a custom kernel for segmented reduction (can't use Thrust due to indirection). - // TODO: I'm not sure if this could ever be non-contiguous - LongTensor values = self._values().contiguous(); + Tensor values = self._values(); - int64_t sparseDims = self._sparseDims(); - int64_t stride = values.stride(0); + int64_t sparse_dim = self.sparse_dim(); // indices will be modified by Thrust, so we have to clone or use new storage // here. - LongTensor indices1D = _newFlattenedIndices(self, true); + LongTensor indices1D = flatten_indices(self._indices(), self.sizes(), true); LongTensor origIndices = at::empty({nnz}, self._indices().options()); LongTensor uniqueOffsets = at::empty({nnz}, self._indices().options()); @@ -88,21 +88,26 @@ SparseTensor coalesce_sparse_cuda(const SparseTensor& self) { newValues_size[0] = newNnz; Tensor newValues = at::empty(newValues_size, values.options()); - dim3 grid(THCCeilDiv(newNnz, (int64_t) 4), THCCeilDiv(stride, (int64_t) 128)); - dim3 block(32, 4); - AT_DISPATCH_ALL_TYPES_AND_HALF( - values.type(), "coalesce_sparse_cuda", [&] { - using cuda_accscalar_t = acc_type; - apply::coalesceValuesKernel<<>>( - uniqueOffsets.data(), - origIndices.data(), - values.data(), - newValues.data(), - nnz, - newNnz, - stride - ); - }); + // If there is no values to copy, save running the kernel. + if (newValues.numel() > 0) { + values = values.contiguous(); + int64_t stride = at::prod_intlist(values.sizes().slice(1)); + dim3 grid(THCCeilDiv(newNnz, (int64_t) 4), THCCeilDiv(stride, (int64_t) 128)); + dim3 block(32, 4); + AT_DISPATCH_ALL_TYPES_AND_HALF( + values.type(), "coalesce_sparse_cuda", [&] { + using cuda_accscalar_t = acc_type; + apply::coalesceValuesKernel<<>>( + uniqueOffsets.data(), + origIndices.data(), + values.data(), + newValues.data(), + nnz, + newNnz, + stride + ); + }); + } // this grid-strided version is slower but probably more flexible // to different sizes @@ -122,14 +127,14 @@ SparseTensor coalesce_sparse_cuda(const SparseTensor& self) { //////////////////////////////////////////////////////////// // unflatten indices if necessary LongTensor newIndices; - if (sparseDims == 1) { + if (sparse_dim == 1) { newIndices = indices1D; } else { - newIndices = at::empty({sparseDims, newNnz}, origIndices.options()); + newIndices = at::empty({sparse_dim, newNnz}, origIndices.options()); if (TH_INDEX_BASE != 0) { indices1D.add_(-1); } - for (int64_t d = sparseDims - 1; d >= 0; d--) { + for (int64_t d = sparse_dim - 1; d >= 0; d--) { // NB: Not a select, so I can preserve the outer dimension LongTensor indicesSlice = newIndices.narrow(0, d, 1); // Note for the porting guide: THCTensor_(copy) does NOT do normal @@ -145,8 +150,7 @@ SparseTensor coalesce_sparse_cuda(const SparseTensor& self) { } //////////////////////////////////////////////////////////// - SparseTensor dst = ::at::native::sparse_coo_tensor(newIndices, newValues, self.sizes()); - _get_sparse_impl(dst)->set_coalesced(true); + SparseTensor dst = ::at::native::sparse_coo_tensor(newIndices, newValues, self.sizes())._coalesced_(true); THCudaCheck(cudaGetLastError()); return dst; diff --git a/aten/src/ATen/native/sparse/cuda/SparseCUDATensorMath.cu b/aten/src/ATen/native/sparse/cuda/SparseCUDATensorMath.cu index d655d3e93289ec..eef84007ec3c02 100644 --- a/aten/src/ATen/native/sparse/cuda/SparseCUDATensorMath.cu +++ b/aten/src/ATen/native/sparse/cuda/SparseCUDATensorMath.cu @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include @@ -18,6 +18,8 @@ namespace at { namespace native { +using namespace at::sparse; + // -------------------------------------------------------------------- // Utility functions // -------------------------------------------------------------------- @@ -45,12 +47,11 @@ Tensor& s_addmm_out_sparse_dense_cuda(Tensor& r_, const Tensor& t, const SparseT AT_CHECK(sparse_.is_cuda(), "addmm: expected 'mat1' to be CUDA, but got CPU"); AT_CHECK(dense.is_cuda(), "addmm: expected 'mat2' to be CUDA, but got CPU"); - AT_CHECK(_check_device({sparse_, r_, t, dense})); + AT_CHECK(check_device({sparse_, r_, t, dense})); - // TODO: This error message seems awfully opaque - AT_CHECK(sparse_._sparseDims() == 2, "addmm: 2D tensor expected, got ", sparse_._sparseDims(), "D tensor"); - AT_CHECK(sparse_._denseDims() == 0, "addmm: scalar values expected, got ", sparse_._denseDims(), "D values"); AT_CHECK(dense.dim() == 2, "addmm: 2D tensor expected, got ", dense.dim(), "D tensor"); + AT_CHECK(sparse_.sparse_dim() == 2, "addmm: expected first two dims to be sparse (indices has size 2 at first dim), but got ", sparse_.sparse_dim(), " spase dims"); + // no need to check dense_dim because dense_dim + sparse_dim = dim // mxk * kxn = mxn int64_t m = sparse_.size(0); @@ -87,7 +88,7 @@ Tensor& s_addmm_out_sparse_dense_cuda(Tensor& r_, const Tensor& t, const SparseT if (cast_beta == 0) { r_.zero_(); } else if (cast_beta == 1) { - if (!isSameTensor(t, r_)) { + if (!is_same_tensor(t, r_)) { r_.copy_(t); } } else { @@ -174,12 +175,12 @@ SparseTensor& hspmm_out_sparse_cuda(SparseTensor& r_, const SparseTensor& sparse AT_CHECK(r_.is_cuda(), "hspmm: expected 'out' to be CUDA, but got CPU"); AT_CHECK(dense.is_cuda(), "hspmm: expected 'mat2' to be CUDA, but got CPU"); - AT_CHECK(_check_device({r_, sparse_, dense})); + AT_CHECK(check_device({r_, sparse_, dense})); - AT_CHECK(sparse_._sparseDims() == 2, - "hspmm: Argument #2: 2D tensor expected, got ", sparse_._sparseDims(), "D tensor"); - AT_CHECK(sparse_._denseDims() == 0, - "hspmm: Argument #2: scalar values expected, got ", sparse_._denseDims(), "D values"); + AT_CHECK(sparse_.sparse_dim() == 2, + "hspmm: Argument #2: 2D tensor expected, got ", sparse_.sparse_dim(), "D tensor"); + AT_CHECK(sparse_.dense_dim() == 0, + "hspmm: Argument #2: scalar values expected, got ", sparse_.dense_dim(), "D values"); AT_CHECK(dense.dim() == 2, "hspmm: Argument #3: 2D tensor expected, got ", dense.dim(), "D tensor"); @@ -190,7 +191,7 @@ SparseTensor& hspmm_out_sparse_cuda(SparseTensor& r_, const SparseTensor& sparse AT_CHECK(dense.size(0) == k, "hspmm: Argument #3: Expected dim 0 size ", k, ", got ", dense.size(0)); - _get_sparse_impl(r_)->resize_and_clear_(1, 1, {m, n}); + get_sparse_impl(r_)->resize_and_clear_(1, 1, {m, n}); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); auto allocator = THCThrustAllocator(globalContext().lazyInitCUDA()); @@ -217,12 +218,12 @@ SparseTensor& hspmm_out_sparse_cuda(SparseTensor& r_, const SparseTensor& sparse thrust::device_ptr indicesIter(dstIndices.data()); thrust::sequence(policy, indicesIter, indicesIter + nnz); - std::vector new_size = _get_sparse_impl(newSparse)->sizes().vec(); + std::vector new_size = get_sparse_impl(newSparse)->sizes().vec(); new_size[0] = nnz; - _get_sparse_impl(newSparse)->raw_resize_(_get_sparse_impl(newSparse)->sparseDims(), _get_sparse_impl(newSparse)->denseDims(), new_size); + get_sparse_impl(newSparse)->raw_resize_(get_sparse_impl(newSparse)->sparse_dim(), get_sparse_impl(newSparse)->dense_dim(), new_size); s_addmm_out_sparse_dense_cuda(values, values, newSparse, dense, 0, /*alpha*/ 1); - _get_sparse_impl(r_)->set_indices_and_values_unsafe(indices, values); + get_sparse_impl(r_)->set_indices_and_values_unsafe(indices, values); return r_; } @@ -245,7 +246,7 @@ Tensor& add_out_dense_sparse_cuda(Tensor& r_, const Tensor& dense, SparseTensorR AT_CHECK(sparse.is_cuda(), "add: expected 'other' to be CUDA, but got CPU"); AT_CHECK(r_.is_cuda(), "add: expected 'out' to be CUDA, but got CPU"); - AT_CHECK(_check_device({sparse, r_, dense})); + AT_CHECK(check_device({sparse, r_, dense})); AT_CHECK(dense.sizes().equals(sparse.sizes()), "add: expected 'self' and 'other' to have same size, but self has size ", dense.sizes(), " while other has size ", sparse.sizes(), " (FYI: dense-sparse addition does not currently support broadcasting)"); @@ -258,7 +259,7 @@ Tensor& add_out_dense_sparse_cuda(Tensor& r_, const Tensor& dense, SparseTensorR } Tensor r = r_; - if (!isSameTensor(r, dense)) { + if (!is_same_tensor(r, dense)) { r_.resize_as_(dense); r_.copy_(dense); } else { @@ -269,7 +270,7 @@ Tensor& add_out_dense_sparse_cuda(Tensor& r_, const Tensor& dense, SparseTensorR LongTensor indices = sparse._indices(); Tensor values = sparse._values(); int64_t nDim = dense.dim(); - int64_t nDimI = sparse._sparseDims(); + int64_t nDimI = sparse.sparse_dim(); if (sparse._values().numel() == 0) { return r_; @@ -282,7 +283,7 @@ Tensor& add_out_dense_sparse_cuda(Tensor& r_, const Tensor& dense, SparseTensorR int curDevice = -1; cudaGetDevice(&curDevice); cudaStream_t stream = at::cuda::getCurrentCUDAStream(curDevice); - if (sparse._denseDims() == 0) { + if (sparse.dense_dim() == 0) { AT_CHECK(cuda::getApplyGrid(nnz, grid, curDevice), "add: Argument #0: tensor too large or too many dimensions"); AT_DISPATCH_ALL_TYPES_AND_HALF( @@ -296,6 +297,9 @@ Tensor& add_out_dense_sparse_cuda(Tensor& r_, const Tensor& dense, SparseTensorR } else { AT_CHECK(cuda::getApplyGrid(nnz * block.x, grid, curDevice), "add: Argument #0: tensor too large or too many dimensions"); + // sparseElementwiseKernel needs values to be contiguous too + values = values.contiguous(); + AT_DISPATCH_ALL_TYPES_AND_HALF( values.type(), "add_out_dense_sparse_cuda", [&] { apply::sparseElementwiseKernel, uint64_t, scalar_t> @@ -306,7 +310,7 @@ Tensor& add_out_dense_sparse_cuda(Tensor& r_, const Tensor& dense, SparseTensorR }); } } else { - LongTensor indices1D = _newFlattenedIndices(sparse, 0).squeeze_(0).narrow(0, 0, nnz); + LongTensor indices1D = flatten_indices(indices, sparse.sizes(), 0); // FIXME: at some point we can wrap the scale into indexAdd // NB: Purposely not inplace! @@ -327,7 +331,7 @@ Tensor& add_out_dense_sparse_cuda(Tensor& r_, const Tensor& dense, SparseTensorR } Tensor r_view = r.view({view_rows, view_columns}); - values = values.narrow(0, 0, nnz).reshape({nnz, view_columns}); + values = values.reshape({nnz, view_columns}); r_view.index_add_(0, indices1D, values); } THCudaCheck(cudaGetLastError()); @@ -344,7 +348,7 @@ SparseTensor& add_out_sparse_cuda(SparseTensor& r_, const SparseTensor& t, const AT_CHECK(src.is_cuda(), "add: expected 'other' to be CUDA, but got CPU"); AT_CHECK(r_.is_cuda(), "add: expected 'out' to be CUDA, but got CPU"); - AT_CHECK(_check_device({r_, t, src})); + AT_CHECK(check_device({r_, t, src})); AT_CHECK(t.sizes().equals(src.sizes()), "add: expected 'self' and 'other' to have same size, but ", t.sizes(), " != ", src.sizes()); if (src._nnz() == 0) { @@ -354,7 +358,7 @@ SparseTensor& add_out_sparse_cuda(SparseTensor& r_, const SparseTensor& t, const return mul_out_sparse_scalar(r_, src, value); } - AT_CHECK(_is_same_density(t, src), "add: expected 'self' and 'other' to have same density, but 'self' has ", t._sparseDims(), " sparse dimensions while 'other' has ", src._sparseDims(), " sparse dimensions"); + AT_CHECK(is_same_density(t, src), "add: expected 'self' and 'other' to have same density, but 'self' has ", t.sparse_dim(), " sparse dimensions while 'other' has ", src.sparse_dim(), " sparse dimensions"); // We deliberately choose to simply concat the indices and values tensors // rather than merging them. This removes the need to synchronously fetch nnz @@ -375,7 +379,7 @@ SparseTensor& add_out_sparse_cuda(SparseTensor& r_, const SparseTensor& t, const LongTensor r_indices_ = at::cat({t_indices_, s_indices_}, 1); Tensor r_values_ = at::cat({t_values_, s_values_}, 0); r_.resize_as_(src); - _alias_into_sparse(r_, r_indices_, r_values_); + alias_into_sparse(r_, r_indices_, r_values_); // FIXME: add some heuristic about when to call coalesce() here, so that // tensors don't totally blow up in size by concatenation; e.g. @@ -402,7 +406,7 @@ SparseTensor& mul_out_sparse_cuda(SparseTensor& r_, const SparseTensor& t_, cons AT_ASSERT(t_.is_cuda()); // dispatch argument AT_CHECK(src_.is_cuda(), "mul: expected 'other' to be CUDA, but got CPU"); AT_CHECK(r_.is_cuda(), "mul: expected 'out' to be CUDA, but got CPU"); - AT_CHECK(_check_device({r_, t_, src_})); + AT_CHECK(check_device({r_, t_, src_})); AT_CHECK(t_.sizes().equals(src_.sizes()), "mul: expected 'self' and 'other' to have same size, but ", t_.sizes(), " != ", src_.sizes()); SparseTensor t = t_.coalesce(); @@ -416,15 +420,15 @@ SparseTensor& mul_out_sparse_cuda(SparseTensor& r_, const SparseTensor& t_, cons // saving those because they can be overwritten when doing in-place operations int64_t t_nnz = t._nnz(), s_nnz = src._nnz(); int64_t max_nnz = std::min(t_nnz, s_nnz); // multiply by zero is zero, and can be dropped - int64_t sparseDims = src._sparseDims(); + int64_t sparse_dim = src.sparse_dim(); LongTensor t_indices_ = t._indices(); Tensor t_values_ = t._values(); LongTensor s_indices_ = src._indices(); Tensor s_values_ = src._values(); - LongTensor r_indices_ = at::empty({sparseDims, max_nnz}, t_indices_.options()); - Tensor r_values_ = _new_values_with_size_of(t_values_, max_nnz).zero_(); + LongTensor r_indices_ = at::empty({sparse_dim, max_nnz}, t_indices_.options()); + Tensor r_values_ = new_values_with_size_of(t_values_, max_nnz).zero_(); r_.resize_as_(src); - _get_sparse_impl(r_)->set_indices_and_values_unsafe(r_indices_, r_values_); + get_sparse_impl(r_)->set_indices_and_values_unsafe(r_indices_, r_values_); int64_t valueSize = t_values_.stride(0); const dim3 block = dim3(std::min(static_cast(cuda::getApplyBlock().x), valueSize)); @@ -457,10 +461,9 @@ SparseTensor& mul_out_sparse_cuda(SparseTensor& r_, const SparseTensor& t_, cons // sync! (surely there is a more idiomatic way to do this...) LongTensor cpu_resultNnz = at::empty({1}, CPU(kLong)); cpu_resultNnz.copy_(resultNnz); - _get_sparse_impl(r_)->set_nnz_and_narrow(cpu_resultNnz.accessor()[0]); - _get_sparse_impl(r_)->set_coalesced(true); + get_sparse_impl(r_)->set_nnz_and_narrow(cpu_resultNnz.accessor()[0]); - return r_; + return r_._coalesced_(true); } }} // namespace at::native diff --git a/aten/src/ATen/native_parse.py b/aten/src/ATen/native_parse.py index 7f34b48e0228f0..3368a9f10178f0 100644 --- a/aten/src/ATen/native_parse.py +++ b/aten/src/ATen/native_parse.py @@ -129,6 +129,7 @@ def run(paths): output_arguments = [x for x in arguments if x.get('output')] declaration['return'] = return_type if len(output_arguments) == 0 else output_arguments declaration['variants'] = func.get('variants', ['function']) + declaration['requires_tensor'] = func.get('requires_tensor', False) declaration['cpu_half'] = func.get('cpu_half', False) declaration['deprecated'] = func.get('deprecated', False) declaration['device_guard'] = func.get('device_guard', True) diff --git a/aten/src/ATen/templates/TypeDefault.cpp b/aten/src/ATen/templates/TypeDefault.cpp index 76ad9465d6e324..c0fb9ca80bdac8 100644 --- a/aten/src/ATen/templates/TypeDefault.cpp +++ b/aten/src/ATen/templates/TypeDefault.cpp @@ -13,6 +13,7 @@ #include "ATen/Tensor.h" #include "ATen/core/TensorOptions.h" #include "ATen/DeviceGuard.h" +#include "ATen/SparseTensorUtils.h" namespace at { diff --git a/aten/src/TH/generic/THTensorEvenMoreMath.cpp b/aten/src/TH/generic/THTensorEvenMoreMath.cpp index 7efee5ee8d1be2..ad6f995b68e962 100644 --- a/aten/src/TH/generic/THTensorEvenMoreMath.cpp +++ b/aten/src/TH/generic/THTensorEvenMoreMath.cpp @@ -12,9 +12,9 @@ void THTensor_(fill)(THTensor *r_, scalar_t value) TH_TENSOR_APPLY(scalar_t, r_, if (r__stride == 1) { THVector_(fill)(r__data, value, r__size); - r__i = r__size; - r__data += r__stride * r__size; - break; + r__i = r__size; + r__data += r__stride * r__size; + break; } else { *r__data = value; } @@ -181,7 +181,7 @@ void THTensor_(indexSelect)(THTensor *tensor, THTensor *src, int dim, THLongTens tensor_data = tensor->data(); src_data = src->data(); auto src_size0 = THTensor_sizeLegacyNoScalars(src, 0); - ptrdiff_t rowsize = src_size0 == 0 ? 1: THTensor_(nElement)(src) / src_size0; + ptrdiff_t rowsize = src_size0 == 0 ? 1 : THTensor_(nElement)(src) / src_size0; // check that the indices are within range int64_t max = src_size0 - 1 + TH_INDEX_BASE; @@ -192,14 +192,18 @@ void THTensor_(indexSelect)(THTensor *tensor, THTensor *src, int dim, THLongTens } } - if (src->dim() <= 1) { - #pragma omp parallel for if(numel > TH_OMP_OVERHEAD_THRESHOLD) private(i) - for (i=0; i TH_OMP_OVERHEAD_THRESHOLD) private(i) - for (i=0; i 0) { + if (src->dim() <= 1) { + #pragma omp parallel for if(numel > TH_OMP_OVERHEAD_THRESHOLD) private(i) + for (i=0; i TH_OMP_OVERHEAD_THRESHOLD) private(i) + for (i=0; idim() <= 1) diff --git a/test/common_utils.py b/test/common_utils.py index 414c6dd8aae17e..377522b677fd1e 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -290,6 +290,33 @@ def assertTensorsSlowEqual(self, x, y, prec=None, message=''): max_err = max(max_err, abs(x[index] - y[index])) self.assertLessEqual(max_err, prec, message) + def genSparseTensor(self, size, sparse_dim, nnz, is_uncoalesced, device='cpu'): + # Assert not given impossible combination, where the sparse dims have + # empty numel, but nnz > 0 makes the indices containing values. + assert all(size[d] > 0 for d in range(sparse_dim)) or nnz == 0, 'invalid arguments' + + v_size = [nnz] + list(size[sparse_dim:]) + v = torch.randn(*v_size, device=device) + i = torch.rand(sparse_dim, nnz, device=device) + i.mul_(torch.tensor(size[:sparse_dim]).unsqueeze(1).to(i)) + i = i.to(torch.long) + if is_uncoalesced: + v = torch.cat([v, torch.randn_like(v)], 0) + i = torch.cat([i, i], 1) + + x = torch.sparse_coo_tensor(i, v, torch.Size(size)) + + if not is_uncoalesced: + x = x.coalesce() + else: + # FIXME: `x` is a sparse view of `v`. Currently rebase_history for + # sparse views is not implemented, so this workaround is + # needed for inplace operations done on `x`, e.g., copy_(). + # Remove after implementing something equivalent to CopySlice + # for sparse views. + x = x.detach() + return x, x._indices().clone(), x._values().clone() + def safeToDense(self, t): r = self.safeCoalesce(t) return r.to_dense() diff --git a/test/expect/TestCudaSparse.test_print.expect b/test/expect/TestCudaSparse.test_print.expect index cae534bd16e8d6..dd924a8385cc1c 100644 --- a/test/expect/TestCudaSparse.test_print.expect +++ b/test/expect/TestCudaSparse.test_print.expect @@ -1,6 +1,6 @@ # shape: torch.Size([]) # nnz: 2 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 2]) # values shape: torch.Size([2]) ########## torch.int32 ########## @@ -30,15 +30,13 @@ tensor(indices=tensor([], size=(0, 4)), device='cuda:0', size=(), nnz=4, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], device='cuda:0', size=(0, 2), dtype=torch.int64, - grad_fn=) +tensor([], device='cuda:0', size=(0, 2), dtype=torch.int64) # _values -tensor([0., 1.], device='cuda:0', dtype=torch.float32, - grad_fn=) +tensor([0., 1.], device='cuda:0', dtype=torch.float32) # shape: torch.Size([0]) # nnz: 10 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 10]) # values shape: torch.Size([10, 0]) ########## torch.int32 ########## @@ -68,15 +66,13 @@ tensor(indices=tensor([], size=(0, 20)), device='cuda:0', size=(0,), nnz=20, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], device='cuda:0', size=(0, 10), dtype=torch.int64, - grad_fn=) +tensor([], device='cuda:0', size=(0, 10), dtype=torch.int64) # _values -tensor([], device='cuda:0', size=(10, 0), dtype=torch.float32, - grad_fn=) +tensor([], device='cuda:0', size=(10, 0), dtype=torch.float32) # shape: torch.Size([2]) # nnz: 3 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 3]) # values shape: torch.Size([3, 2]) ########## torch.int32 ########## @@ -119,17 +115,15 @@ tensor(indices=tensor([], size=(0, 6)), device='cuda:0', size=(2,), nnz=6, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], device='cuda:0', size=(0, 3), dtype=torch.int64, - grad_fn=) +tensor([], device='cuda:0', size=(0, 3), dtype=torch.int64) # _values tensor([[0.0000, 0.3333], [0.6667, 1.0000], - [1.3333, 1.6667]], device='cuda:0', dtype=torch.float32, - grad_fn=) + [1.3333, 1.6667]], device='cuda:0', dtype=torch.float32) # shape: torch.Size([100, 3]) # nnz: 3 -# sparseDim: 1 +# sparse_dim: 1 # indices shape: torch.Size([1, 3]) # values shape: torch.Size([3, 3]) ########## torch.int32 ########## @@ -172,16 +166,15 @@ tensor(indices=tensor([[0, 1, 2, 0, 1, 2]]), device='cuda:0', size=(100, 3), nnz=6, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([[0, 1, 2]], device='cuda:0', grad_fn=) +tensor([[0, 1, 2]], device='cuda:0') # _values tensor([[0.0000, 0.2222, 0.4444], [0.6667, 0.8889, 1.1111], - [1.3333, 1.5556, 1.7778]], device='cuda:0', dtype=torch.float32, - grad_fn=) + [1.3333, 1.5556, 1.7778]], device='cuda:0', dtype=torch.float32) # shape: torch.Size([100, 20, 3]) # nnz: 0 -# sparseDim: 2 +# sparse_dim: 2 # indices shape: torch.Size([2, 0]) # values shape: torch.Size([0, 3]) ########## torch.int32 ########## @@ -211,15 +204,13 @@ tensor(indices=tensor([], size=(2, 0)), device='cuda:0', size=(100, 20, 3), nnz=0, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], device='cuda:0', size=(2, 0), dtype=torch.int64, - grad_fn=) +tensor([], device='cuda:0', size=(2, 0), dtype=torch.int64) # _values -tensor([], device='cuda:0', size=(0, 3), dtype=torch.float32, - grad_fn=) +tensor([], device='cuda:0', size=(0, 3), dtype=torch.float32) # shape: torch.Size([10, 0, 3]) # nnz: 3 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 3]) # values shape: torch.Size([3, 10, 0, 3]) ########## torch.int32 ########## @@ -249,15 +240,13 @@ tensor(indices=tensor([], size=(0, 6)), device='cuda:0', size=(10, 0, 3), nnz=6, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], device='cuda:0', size=(0, 3), dtype=torch.int64, - grad_fn=) +tensor([], device='cuda:0', size=(0, 3), dtype=torch.int64) # _values -tensor([], device='cuda:0', size=(3, 10, 0, 3), dtype=torch.float32, - grad_fn=) +tensor([], device='cuda:0', size=(3, 10, 0, 3), dtype=torch.float32) # shape: torch.Size([10, 0, 3]) # nnz: 0 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 0]) # values shape: torch.Size([0, 10, 0, 3]) ########## torch.int32 ########## @@ -287,8 +276,6 @@ tensor(indices=tensor([], size=(0, 0)), device='cuda:0', size=(10, 0, 3), nnz=0, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], device='cuda:0', size=(0, 0), dtype=torch.int64, - grad_fn=) +tensor([], device='cuda:0', size=(0, 0), dtype=torch.int64) # _values -tensor([], device='cuda:0', size=(0, 10, 0, 3), dtype=torch.float32, - grad_fn=) +tensor([], device='cuda:0', size=(0, 10, 0, 3), dtype=torch.float32) diff --git a/test/expect/TestCudaUncoalescedSparse.test_print.expect b/test/expect/TestCudaUncoalescedSparse.test_print.expect index e6207c6da3d278..e982b059933b98 100644 --- a/test/expect/TestCudaUncoalescedSparse.test_print.expect +++ b/test/expect/TestCudaUncoalescedSparse.test_print.expect @@ -1,6 +1,6 @@ # shape: torch.Size([]) # nnz: 2 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 2]) # values shape: torch.Size([2]) ########## torch.int32 ########## @@ -30,15 +30,13 @@ tensor(indices=tensor([], size=(0, 4)), device='cuda:0', size=(), nnz=4, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], device='cuda:0', size=(0, 2), dtype=torch.int64, - grad_fn=) +tensor([], device='cuda:0', size=(0, 2), dtype=torch.int64) # _values -tensor([0., 1.], device='cuda:0', dtype=torch.float32, - grad_fn=) +tensor([0., 1.], device='cuda:0', dtype=torch.float32) # shape: torch.Size([0]) # nnz: 10 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 10]) # values shape: torch.Size([10, 0]) ########## torch.int32 ########## @@ -68,15 +66,13 @@ tensor(indices=tensor([], size=(0, 20)), device='cuda:0', size=(0,), nnz=20, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], device='cuda:0', size=(0, 10), dtype=torch.int64, - grad_fn=) +tensor([], device='cuda:0', size=(0, 10), dtype=torch.int64) # _values -tensor([], device='cuda:0', size=(10, 0), dtype=torch.float32, - grad_fn=) +tensor([], device='cuda:0', size=(10, 0), dtype=torch.float32) # shape: torch.Size([2]) # nnz: 3 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 3]) # values shape: torch.Size([3, 2]) ########## torch.int32 ########## @@ -119,17 +115,15 @@ tensor(indices=tensor([], size=(0, 6)), device='cuda:0', size=(2,), nnz=6, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], device='cuda:0', size=(0, 3), dtype=torch.int64, - grad_fn=) +tensor([], device='cuda:0', size=(0, 3), dtype=torch.int64) # _values tensor([[0.0000, 0.3333], [0.6667, 1.0000], - [1.3333, 1.6667]], device='cuda:0', dtype=torch.float32, - grad_fn=) + [1.3333, 1.6667]], device='cuda:0', dtype=torch.float32) # shape: torch.Size([100, 3]) # nnz: 3 -# sparseDim: 1 +# sparse_dim: 1 # indices shape: torch.Size([1, 3]) # values shape: torch.Size([3, 3]) ########## torch.int32 ########## @@ -172,16 +166,15 @@ tensor(indices=tensor([[0, 1, 0, 0, 1, 0]]), device='cuda:0', size=(100, 3), nnz=6, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([[0, 1, 0]], device='cuda:0', grad_fn=) +tensor([[0, 1, 0]], device='cuda:0') # _values tensor([[0.0000, 0.2222, 0.4444], [0.6667, 0.8889, 1.1111], - [1.3333, 1.5556, 1.7778]], device='cuda:0', dtype=torch.float32, - grad_fn=) + [1.3333, 1.5556, 1.7778]], device='cuda:0', dtype=torch.float32) # shape: torch.Size([100, 20, 3]) # nnz: 0 -# sparseDim: 2 +# sparse_dim: 2 # indices shape: torch.Size([2, 0]) # values shape: torch.Size([0, 3]) ########## torch.int32 ########## @@ -211,15 +204,13 @@ tensor(indices=tensor([], size=(2, 0)), device='cuda:0', size=(100, 20, 3), nnz=0, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], device='cuda:0', size=(2, 0), dtype=torch.int64, - grad_fn=) +tensor([], device='cuda:0', size=(2, 0), dtype=torch.int64) # _values -tensor([], device='cuda:0', size=(0, 3), dtype=torch.float32, - grad_fn=) +tensor([], device='cuda:0', size=(0, 3), dtype=torch.float32) # shape: torch.Size([10, 0, 3]) # nnz: 3 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 3]) # values shape: torch.Size([3, 10, 0, 3]) ########## torch.int32 ########## @@ -249,15 +240,13 @@ tensor(indices=tensor([], size=(0, 6)), device='cuda:0', size=(10, 0, 3), nnz=6, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], device='cuda:0', size=(0, 3), dtype=torch.int64, - grad_fn=) +tensor([], device='cuda:0', size=(0, 3), dtype=torch.int64) # _values -tensor([], device='cuda:0', size=(3, 10, 0, 3), dtype=torch.float32, - grad_fn=) +tensor([], device='cuda:0', size=(3, 10, 0, 3), dtype=torch.float32) # shape: torch.Size([10, 0, 3]) # nnz: 0 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 0]) # values shape: torch.Size([0, 10, 0, 3]) ########## torch.int32 ########## @@ -287,8 +276,6 @@ tensor(indices=tensor([], size=(0, 0)), device='cuda:0', size=(10, 0, 3), nnz=0, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], device='cuda:0', size=(0, 0), dtype=torch.int64, - grad_fn=) +tensor([], device='cuda:0', size=(0, 0), dtype=torch.int64) # _values -tensor([], device='cuda:0', size=(0, 10, 0, 3), dtype=torch.float32, - grad_fn=) +tensor([], device='cuda:0', size=(0, 10, 0, 3), dtype=torch.float32) diff --git a/test/expect/TestSparse.test_print.expect b/test/expect/TestSparse.test_print.expect index c0223c1483741c..d0501228f9262a 100644 --- a/test/expect/TestSparse.test_print.expect +++ b/test/expect/TestSparse.test_print.expect @@ -1,6 +1,6 @@ # shape: torch.Size([]) # nnz: 2 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 2]) # values shape: torch.Size([2]) ########## torch.int32 ########## @@ -28,13 +28,13 @@ tensor(indices=tensor([], size=(0, 2)), size=(), nnz=2, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], size=(0, 2), dtype=torch.int64, grad_fn=) +tensor([], size=(0, 2), dtype=torch.int64) # _values -tensor([0., 1.], dtype=torch.float32, grad_fn=) +tensor([0., 1.], dtype=torch.float32) # shape: torch.Size([0]) # nnz: 10 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 10]) # values shape: torch.Size([10, 0]) ########## torch.int32 ########## @@ -62,13 +62,13 @@ tensor(indices=tensor([], size=(0, 10)), size=(0,), nnz=10, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], size=(0, 10), dtype=torch.int64, grad_fn=) +tensor([], size=(0, 10), dtype=torch.int64) # _values -tensor([], size=(10, 0), dtype=torch.float32, grad_fn=) +tensor([], size=(10, 0), dtype=torch.float32) # shape: torch.Size([2]) # nnz: 3 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 3]) # values shape: torch.Size([3, 2]) ########## torch.int32 ########## @@ -106,15 +106,15 @@ tensor(indices=tensor([], size=(0, 3)), size=(2,), nnz=3, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], size=(0, 3), dtype=torch.int64, grad_fn=) +tensor([], size=(0, 3), dtype=torch.int64) # _values tensor([[0.0000, 0.3333], [0.6667, 1.0000], - [1.3333, 1.6667]], dtype=torch.float32, grad_fn=) + [1.3333, 1.6667]], dtype=torch.float32) # shape: torch.Size([100, 3]) # nnz: 3 -# sparseDim: 1 +# sparse_dim: 1 # indices shape: torch.Size([1, 3]) # values shape: torch.Size([3, 3]) ########## torch.int32 ########## @@ -152,16 +152,15 @@ tensor(indices=tensor([[0, 1, 2]]), size=(100, 3), nnz=3, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([[0, 1, 2]], grad_fn=) +tensor([[0, 1, 2]]) # _values tensor([[0.0000, 0.2222, 0.4444], [0.6667, 0.8889, 1.1111], - [1.3333, 1.5556, 1.7778]], dtype=torch.float32, - grad_fn=) + [1.3333, 1.5556, 1.7778]], dtype=torch.float32) # shape: torch.Size([100, 20, 3]) # nnz: 0 -# sparseDim: 2 +# sparse_dim: 2 # indices shape: torch.Size([2, 0]) # values shape: torch.Size([0, 3]) ########## torch.int32 ########## @@ -189,13 +188,13 @@ tensor(indices=tensor([], size=(2, 0)), size=(100, 20, 3), nnz=0, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], size=(2, 0), dtype=torch.int64, grad_fn=) +tensor([], size=(2, 0), dtype=torch.int64) # _values -tensor([], size=(0, 3), dtype=torch.float32, grad_fn=) +tensor([], size=(0, 3), dtype=torch.float32) # shape: torch.Size([10, 0, 3]) # nnz: 3 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 3]) # values shape: torch.Size([3, 10, 0, 3]) ########## torch.int32 ########## @@ -223,13 +222,13 @@ tensor(indices=tensor([], size=(0, 3)), size=(10, 0, 3), nnz=3, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], size=(0, 3), dtype=torch.int64, grad_fn=) +tensor([], size=(0, 3), dtype=torch.int64) # _values -tensor([], size=(3, 10, 0, 3), dtype=torch.float32, grad_fn=) +tensor([], size=(3, 10, 0, 3), dtype=torch.float32) # shape: torch.Size([10, 0, 3]) # nnz: 0 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 0]) # values shape: torch.Size([0, 10, 0, 3]) ########## torch.int32 ########## @@ -257,6 +256,6 @@ tensor(indices=tensor([], size=(0, 0)), size=(10, 0, 3), nnz=0, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], size=(0, 0), dtype=torch.int64, grad_fn=) +tensor([], size=(0, 0), dtype=torch.int64) # _values -tensor([], size=(0, 10, 0, 3), dtype=torch.float32, grad_fn=) +tensor([], size=(0, 10, 0, 3), dtype=torch.float32) diff --git a/test/expect/TestUncoalescedSparse.test_print.expect b/test/expect/TestUncoalescedSparse.test_print.expect index 244442de0cc73d..7a2e3c68b9ed12 100644 --- a/test/expect/TestUncoalescedSparse.test_print.expect +++ b/test/expect/TestUncoalescedSparse.test_print.expect @@ -1,6 +1,6 @@ # shape: torch.Size([]) # nnz: 2 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 2]) # values shape: torch.Size([2]) ########## torch.int32 ########## @@ -28,13 +28,13 @@ tensor(indices=tensor([], size=(0, 2)), size=(), nnz=2, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], size=(0, 2), dtype=torch.int64, grad_fn=) +tensor([], size=(0, 2), dtype=torch.int64) # _values -tensor([0., 1.], dtype=torch.float32, grad_fn=) +tensor([0., 1.], dtype=torch.float32) # shape: torch.Size([0]) # nnz: 10 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 10]) # values shape: torch.Size([10, 0]) ########## torch.int32 ########## @@ -62,13 +62,13 @@ tensor(indices=tensor([], size=(0, 10)), size=(0,), nnz=10, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], size=(0, 10), dtype=torch.int64, grad_fn=) +tensor([], size=(0, 10), dtype=torch.int64) # _values -tensor([], size=(10, 0), dtype=torch.float32, grad_fn=) +tensor([], size=(10, 0), dtype=torch.float32) # shape: torch.Size([2]) # nnz: 3 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 3]) # values shape: torch.Size([3, 2]) ########## torch.int32 ########## @@ -106,15 +106,15 @@ tensor(indices=tensor([], size=(0, 3)), size=(2,), nnz=3, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], size=(0, 3), dtype=torch.int64, grad_fn=) +tensor([], size=(0, 3), dtype=torch.int64) # _values tensor([[0.0000, 0.3333], [0.6667, 1.0000], - [1.3333, 1.6667]], dtype=torch.float32, grad_fn=) + [1.3333, 1.6667]], dtype=torch.float32) # shape: torch.Size([100, 3]) # nnz: 3 -# sparseDim: 1 +# sparse_dim: 1 # indices shape: torch.Size([1, 3]) # values shape: torch.Size([3, 3]) ########## torch.int32 ########## @@ -152,16 +152,15 @@ tensor(indices=tensor([[0, 1, 0]]), size=(100, 3), nnz=3, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([[0, 1, 0]], grad_fn=) +tensor([[0, 1, 0]]) # _values tensor([[0.0000, 0.2222, 0.4444], [0.6667, 0.8889, 1.1111], - [1.3333, 1.5556, 1.7778]], dtype=torch.float32, - grad_fn=) + [1.3333, 1.5556, 1.7778]], dtype=torch.float32) # shape: torch.Size([100, 20, 3]) # nnz: 0 -# sparseDim: 2 +# sparse_dim: 2 # indices shape: torch.Size([2, 0]) # values shape: torch.Size([0, 3]) ########## torch.int32 ########## @@ -189,13 +188,13 @@ tensor(indices=tensor([], size=(2, 0)), size=(100, 20, 3), nnz=0, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], size=(2, 0), dtype=torch.int64, grad_fn=) +tensor([], size=(2, 0), dtype=torch.int64) # _values -tensor([], size=(0, 3), dtype=torch.float32, grad_fn=) +tensor([], size=(0, 3), dtype=torch.float32) # shape: torch.Size([10, 0, 3]) # nnz: 3 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 3]) # values shape: torch.Size([3, 10, 0, 3]) ########## torch.int32 ########## @@ -223,13 +222,13 @@ tensor(indices=tensor([], size=(0, 3)), size=(10, 0, 3), nnz=3, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], size=(0, 3), dtype=torch.int64, grad_fn=) +tensor([], size=(0, 3), dtype=torch.int64) # _values -tensor([], size=(3, 10, 0, 3), dtype=torch.float32, grad_fn=) +tensor([], size=(3, 10, 0, 3), dtype=torch.float32) # shape: torch.Size([10, 0, 3]) # nnz: 0 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 0]) # values shape: torch.Size([0, 10, 0, 3]) ########## torch.int32 ########## @@ -257,6 +256,6 @@ tensor(indices=tensor([], size=(0, 0)), size=(10, 0, 3), nnz=0, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], size=(0, 0), dtype=torch.int64, grad_fn=) +tensor([], size=(0, 0), dtype=torch.int64) # _values -tensor([], size=(0, 10, 0, 3), dtype=torch.float32, grad_fn=) +tensor([], size=(0, 10, 0, 3), dtype=torch.float32) diff --git a/test/test_autograd.py b/test/test_autograd.py index b42ab95993a9ef..fa00280ef55715 100644 --- a/test/test_autograd.py +++ b/test/test_autograd.py @@ -553,14 +553,14 @@ def backward(self, grad_x): [0, 2, 2], ]) v1 = torch.DoubleTensor([[1, 2], [4, 5], [7, 8]]) - sparse_grad1 = Variable(torch.sparse.DoubleTensor(i1, v1, size)) + sparse_grad1 = torch.sparse.DoubleTensor(i1, v1, size) i2 = torch.LongTensor([ [0, 1, 3, 4], [0, 1, 2, 2], ]) v2 = torch.DoubleTensor([[1, 2], [4, 3], [4, 5], [7, 8]]) - sparse_grad2 = Variable(torch.sparse.DoubleTensor(i2, v2, size)) - dense_grad = Variable(torch.rand(size).double()) + sparse_grad2 = torch.sparse.DoubleTensor(i2, v2, size) + dense_grad = torch.rand(size).double() sparse_fn1 = FixedGradientFunction(sparse_grad1) sparse_fn2 = FixedGradientFunction(sparse_grad2) dense_fn = FixedGradientFunction(dense_grad) @@ -578,6 +578,47 @@ def backward(self, grad_x): (sparse_fn1(x) + sparse_fn2(x)).sum().backward() self.assertEqual(x.grad, sparse_grad1 + sparse_grad2) + @skipIfRocm + def test_sparse_ctor_getter_backward(self): + # See NOTE [ Sparse: autograd and API ] on the expected behavior of this test + def test(size, sparse_dim, nnz, device): + v_size = [nnz] + list(size[sparse_dim:]) + i = torch.rand(sparse_dim, nnz) + i.mul_(torch.tensor(size[:sparse_dim]).unsqueeze(1).to(i)) + i = i.to(torch.long) + + inp = torch.randn(v_size, requires_grad=True) + other = self.genSparseTensor(size, sparse_dim, nnz, is_uncoalesced=True)[0] + other = other.to(device) + + def fn(v): + x = torch.sparse_coo_tensor(i, v, size, device=device) + y = (x + other).coalesce() + yv = y.values() + new_v = yv.tanh() + z = torch.sparse_coo_tensor(y.indices(), new_v, y.size()) + return z.coalesce().values() + + gradcheck(fn, (inp,)) + # FIXME: make gradgradcheck work. + # gradgradcheck(fn, (inp,)) + + # assert that _values is non-differentiable + with self.assertRaisesRegex(RuntimeError, "does not have a grad_fn"): + other.detach().requires_grad_()._values().backward(torch.ones_like(other._values())) + + devices = ['cpu'] + + if torch.cuda.is_available(): + devices.append('cuda') + + for empty_i, empty_v, empty_nnz in product([True, False], repeat=3): + sparse_size = [] if empty_i else [2, 1] + dense_size = [1, 0, 2] if empty_v else [1, 2] + nnz = 0 if empty_nnz else 5 + for device in devices: + test(sparse_size + dense_size, len(sparse_size), nnz, device) + def test_multi_backward(self): x = torch.randn(5, 5, requires_grad=True) y = torch.randn(5, 5, requires_grad=True) diff --git a/test/test_numba_integration.py b/test/test_numba_integration.py index 2894d5daaa54e9..ba14916a8fbc7c 100644 --- a/test/test_numba_integration.py +++ b/test/test_numba_integration.py @@ -67,14 +67,15 @@ def test_cuda_array_interface(self): # Sparse CPU/CUDA tensors do not implement the interface if tp not in (torch.HalfTensor,): - sparse_t = torch.sparse_coo_tensor(cput[None, :], cput) + indices_t = torch.empty(1, cput.size(0), dtype=torch.long).clamp_(min=0) + sparse_t = torch.sparse_coo_tensor(indices_t, cput) self.assertFalse(hasattr(sparse_t, "__cuda_array_interface__")) self.assertRaises( AttributeError, lambda: sparse_t.__cuda_array_interface__ ) - sparse_cuda_t = torch.sparse_coo_tensor(cput[None, :], cput).cuda() + sparse_cuda_t = torch.sparse_coo_tensor(indices_t, cput).cuda() self.assertFalse(hasattr(sparse_cuda_t, "__cuda_array_interface__")) self.assertRaises( diff --git a/test/test_sparse.py b/test/test_sparse.py index d32aabb145a65c..b08f3e79a2bb37 100644 --- a/test/test_sparse.py +++ b/test/test_sparse.py @@ -44,47 +44,16 @@ def setUp(self): self.SparseTensor = torch.sparse.DoubleTensor super(TestSparse, self).setUp() - def _gen_sparse(self, sparse_dims, nnz, with_size): - # TODO: Consider implementing this in the CUDA case by directly - # performing the operations on the GPU. You won't be able to - # use torch.rand/torch.randn in this case because they are - # CPU-only. If you do this, you can remove the is_cuda branch - # at the end. - # - # If you do this, be sure to update assert_uncoalesced too - + def _gen_sparse(self, sparse_dim, nnz, with_size): if isinstance(with_size, Number): - with_size = [with_size] * sparse_dims + with_size = [with_size] * sparse_dim + + x, i, v = self.genSparseTensor(with_size, sparse_dim, nnz, self.is_uncoalesced, self.device) if self.is_uncoalesced: - # We want to generate a tensor with a lot of uncoalesced - # entries to stress test whether or not we handle this - # (subtle) case correctly - v_size = [nnz * 2] + list(with_size[sparse_dims:]) - v = torch.randn(*v_size) - r = torch.rand(sparse_dims, nnz) - # Repeat the indexes, so every position shows up twice - i = torch.cat([r, r], dim=1) - if nnz > 0: - i *= torch.Tensor(with_size[:sparse_dims]).repeat(nnz * 2, 1).transpose(0, 1) - i = i.type(torch.LongTensor) - x = torch.sparse.DoubleTensor(i, v, torch.Size(with_size)) self.assert_uncoalesced(x) - else: - # Generate a sparse tensor with sparse_dims sparse dimensions; the - # rest the dimensions with_size[sparse_dims:] are dense. - v_size = [nnz] + list(with_size[sparse_dims:]) - v = torch.randn(*v_size) - i = torch.rand(sparse_dims, nnz) - if nnz > 0: - i *= torch.Tensor(with_size[:sparse_dims]).repeat(nnz, 1).transpose(0, 1) - i = i.type(torch.LongTensor) - x = torch.sparse.DoubleTensor(i, v, torch.Size(with_size)) - if self.is_cuda: - return x.cuda(), i.cuda(), v.cuda() - else: - return x, i.clone(), v.clone() + return x, i, v def assert_uncoalesced(self, x): """ @@ -109,7 +78,7 @@ def randn(self, *args, **kwargs): @skipIfRocm # ROCm stack doesn't like the x + x call def test_print(self): - shape_sparseDim_nnz = [ + shape_sparse_dim_nnz = [ ((), 0, 2), ((0,), 0, 10), ((2,), 0, 3), @@ -120,18 +89,18 @@ def test_print(self): ] printed = [] - for shape, sparseDim, nnz in shape_sparseDim_nnz: - indices_shape = torch.Size((sparseDim, nnz)) - values_shape = torch.Size((nnz,) + shape[sparseDim:]) + for shape, sparse_dim, nnz in shape_sparse_dim_nnz: + indices_shape = torch.Size((sparse_dim, nnz)) + values_shape = torch.Size((nnz,) + shape[sparse_dim:]) printed.append("# shape: {}".format(torch.Size(shape))) printed.append("# nnz: {}".format(nnz)) - printed.append("# sparseDim: {}".format(sparseDim)) + printed.append("# sparse_dim: {}".format(sparse_dim)) printed.append("# indices shape: {}".format(indices_shape)) printed.append("# values shape: {}".format(values_shape)) indices = torch.arange(indices_shape.numel(), dtype=self.IndexTensor.dtype, device=self.device).view(indices_shape) - for d in range(sparseDim): + for d in range(sparse_dim): indices[d].clamp_(max=(shape[d] - 1)) # make it valid index if self.is_uncoalesced and indices.numel() > 0: indices[:, -1] = indices[:, 0] # make it uncoalesced @@ -174,6 +143,16 @@ def test_shape(sparse_dims, nnz, with_size): self.assertEqual(self.safeCoalesce(x)._nnz(), nnz) self.assertEqual(list(x.size()), with_size) + # Test .indices() and .values() + if self.is_uncoalesced: + with self.assertRaisesRegex(RuntimeError, "Cannot get indices on an uncoalesced tensor"): + x.indices() + with self.assertRaisesRegex(RuntimeError, "Cannot get values on an uncoalesced tensor"): + x.values() + else: + self.assertEqual(x.indices(), x._indices()) + self.assertEqual(x.values(), x._values()) + test_shape(3, 10, 100) test_shape(3, 10, [100, 100, 100]) test_shape(3, 10, [100, 100, 100, 5, 5, 5, 0]) @@ -190,6 +169,16 @@ def test_shape(sparse_dims, nnz, with_size): self.assertEqual(x._indices().numel(), 0) self.assertEqual(x._values().numel(), 0) + @skipIfRocm + def test_coalecce(self): + for empty_i, empty_v, empty_nnz in itertools.product([True, False], repeat=3): + sparse_size = [] if empty_i else [2, 1] + dense_size = [1, 0, 2] if empty_v else [1, 2] + nnz = 0 if empty_nnz else 5 + + t, _, _ = self._gen_sparse(len(sparse_size), nnz, sparse_size + dense_size) + self.safeCoalesce(t) # this tests correctness + def test_ctor_size_checks(self): indices = self.IndexTensor([ [0, 0, 0], @@ -569,11 +558,11 @@ def test_cross_device(x1, x2): def test_cuda_empty(self): def test_tensor(x): y = x.cuda(0) - self.assertEqual(x._sparseDims(), y._sparseDims()) - self.assertEqual(x._denseDims(), y._denseDims()) + self.assertEqual(x.sparse_dim(), y.sparse_dim()) + self.assertEqual(x.dense_dim(), y.dense_dim()) x = y.cpu() - self.assertEqual(y._sparseDims(), x._sparseDims()) - self.assertEqual(y._denseDims(), x._denseDims()) + self.assertEqual(y.sparse_dim(), x.sparse_dim()) + self.assertEqual(y.dense_dim(), x.dense_dim()) x = torch.sparse.FloatTensor(2, 3, 4) test_tensor(x) @@ -609,8 +598,9 @@ def test_shape(di, dj, dk, nnz): x_coalesced = x.coalesce() self.assertTrue(x_coalesced.is_coalesced()) - x_coalesced_t = x.t() - self.assertFalse(x_coalesced_t.is_coalesced()) + x_coalesced_t = x_coalesced.t() + # Transpose is `colasced`-preserving if the indices tensor is empty. + self.assertEqual(x_coalesced_t.is_coalesced(), di * nnz == 0) res = torch.mm(x_coalesced_t, y) expected = torch.mm(self.safeToDense(x_coalesced_t), y) @@ -629,8 +619,8 @@ def test_in_place(x): self.assertEqual(torch.Size([shape_original[1], shape_original[0]]), x.size()) self.assertEqual(0, x._indices().numel()) self.assertEqual(0, x._values().numel()) - self.assertEqual(x._sparseDims(), 2) - self.assertEqual(x._denseDims(), 0) + self.assertEqual(x.sparse_dim(), 2) + self.assertEqual(x.dense_dim(), 0) def test_not_in_place(x): shape_original = x.shape @@ -638,8 +628,8 @@ def test_not_in_place(x): self.assertEqual(torch.Size([shape_original[1], shape_original[0]]), y.size()) self.assertEqual(0, y._indices().numel()) self.assertEqual(0, y._values().numel()) - self.assertEqual(x._sparseDims(), 2) - self.assertEqual(x._denseDims(), 0) + self.assertEqual(x.sparse_dim(), 2) + self.assertEqual(x.dense_dim(), 0) x = self.SparseTensor(2, 3) test_in_place(x) @@ -886,15 +876,21 @@ def _test_basic_ops_shape(self, nnz_x1, nnz_x2, shape_i, shape_v=None): expected = torch.zeros(x1.size()) self.assertEqual(self.safeToDense(y), expected) - self.assertFalse(x1.is_coalesced()) + self.assertEqual(x1.is_coalesced(), not self.is_uncoalesced) y = x1.coalesce() z = x1.coalesce() - self.assertFalse(x1.is_coalesced()) + self.assertEqual(x1.is_coalesced(), not self.is_uncoalesced) self.assertTrue(y.is_coalesced()) self.assertEqual(x1, y) - # check that coalesce is out of place y._values().add_(1) - self.assertEqual(z._values() + 1, y._values()) + if not x1.is_coalesced(): + # check that coalesce is out of place if the original tensor is not + # coalesced. + self.assertEqual(z._values() + 1, y._values()) + else: + # check that coalesce is in-place if the original tensor is + # coalesced. + self.assertEqual(z._values(), y._values()) @skipIfRocm def test_basic_ops(self): @@ -1051,8 +1047,8 @@ def _test_zeros(self, nnzs, shape, out_shape_i, out_shape_v=None): self.assertEqual(tuple(out.size()), tuple(shape)) self.assertTrue(out._indices().numel() == out._values().numel() == 0) self.assertEqual(out._nnz(), 0) - self.assertEqual(out._sparseDims(), len(shape)) - self.assertEqual(out._denseDims(), 0) + self.assertEqual(out.sparse_dim(), len(shape)) + self.assertEqual(out.dense_dim(), 0) def test_zeros(self): def test_shape(i_shapes, v_shapes, shape, nnzs): @@ -1075,8 +1071,8 @@ def _test_zeros_like(self, nnzs, template_shape_i, template_shape_v=None): self.assertEqual(tuple(res.size()), tuple(template_shape)) self.assertTrue(res._indices().numel() == res._values().numel() == 0) self.assertEqual(res._nnz(), 0) - self.assertEqual(res._sparseDims(), len(template_shape_i)) - self.assertEqual(res._denseDims(), len(template_shape_v)) + self.assertEqual(res.sparse_dim(), len(template_shape_i)) + self.assertEqual(res.dense_dim(), len(template_shape_v)) def test_zeros_like(self): def test_shape(i_shapes, v_shapes, nnzs): @@ -1119,7 +1115,7 @@ def test_narrow(self): self.assertRaises(RuntimeError, lambda: with_dense.narrow_copy(10, 0, 3)) # dim > sparseDim + denseDim def _test_log1p_tensor(self, input, dense_tensor): - expected_output = torch.tensor(dense_tensor).log1p_() + expected_output = dense_tensor.log1p() self.assertEqual(expected_output, input.log1p().to_dense()) self.assertEqual(expected_output, input.coalesce().log1p_().to_dense()) @@ -1143,7 +1139,7 @@ def test_log1p(self): torch.FloatTensor([3, 4, 5]), torch.Size([3]), device=self.device) - self._test_log1p_tensor(input, [3., 4., 5.]) + self._test_log1p_tensor(input, torch.as_tensor([3., 4., 5.])) # test uncoalesced input input_uncoalesced = torch.sparse_coo_tensor( @@ -1151,7 +1147,7 @@ def test_log1p(self): torch.FloatTensor([2, 3, 4, 1, 1, 1]), torch.Size([3]), device=self.device) - self._test_log1p_tensor(input_uncoalesced, [3., 4., 5.]) + self._test_log1p_tensor(input_uncoalesced, torch.as_tensor([3., 4., 5.])) input = torch.sparse_coo_tensor( torch.zeros([2, 0]), @@ -1309,7 +1305,7 @@ def test_factory_size_check(self): [0, 2]]) values = self.ValueTensor([.5, .5]) sizes = torch.Size([2, 3]) - with self.assertRaisesRegex(RuntimeError, "sizes is inconsistent with indices"): + with self.assertRaisesRegex(RuntimeError, "size is inconsistent with indices"): torch.sparse_coo_tensor(indices, values, sizes) indices.fill_(-1) @@ -1320,14 +1316,14 @@ def test_factory_size_check(self): [0, 2]]) values = self.ValueTensor(2, 1, 0) sizes = torch.Size([2, 3, 1, 0]) - with self.assertRaisesRegex(RuntimeError, "sizes is inconsistent with indices"): + with self.assertRaisesRegex(RuntimeError, "size is inconsistent with indices"): torch.sparse_coo_tensor(indices, values, sizes) indices = self.IndexTensor([[1, 2], [0, 2]]) values = self.ValueTensor(2, 2, 2) sizes = torch.Size([0, 0, 2, 2]) - with self.assertRaisesRegex(RuntimeError, "sizes is inconsistent with indices"): + with self.assertRaisesRegex(RuntimeError, "size is inconsistent with indices"): torch.sparse_coo_tensor(indices, values, sizes) indices = self.IndexTensor([[1, 2], @@ -1370,13 +1366,13 @@ def test_factory_empty_indices(self): self.assertEqual(tensor._indices(), expected_indices) def test_factory_nnz(self): - indices = self.IndexTensor([[0]]) # (sparseDims, nnz): (1, 1) + indices = self.IndexTensor([[0]]) # (sparse_dim, nnz): (1, 1) values = self.ValueTensor([[1, 1], [1, 1]]) # (nnz, ...): (2, 2) sizes = torch.Size([2, 2]) with self.assertRaisesRegex(RuntimeError, "indices and values must have same nnz"): torch.sparse_coo_tensor(indices, values, sizes) - indices = self.IndexTensor([[0]]) # (sparseDims, nnz): (1, 1) + indices = self.IndexTensor([[0]]) # (sparse_dim, nnz): (1, 1) values = self.ValueTensor(2, 0) # (nnz, ...): (2, 0) sizes = torch.Size([2, 0]) with self.assertRaisesRegex(RuntimeError, "indices and values must have same nnz"): @@ -1402,7 +1398,8 @@ def test_shape(i_shape, v_shape, size, expected_size): test_shape([3, 0], [0, 2, 4, 0], [0, 0, 0, 2, 4, 0], [0, 0, 0, 2, 4, 0]) test_shape([3, 0], [0, 2, 4, 0], [1, 2, 3, 2, 4, 0], [1, 2, 3, 2, 4, 0]) - def test_factory_dense_dims(self): + @skipIfRocm + def test_factory_dense_dim(self): indices = self.IndexTensor([[0]]) values = self.ValueTensor([[[1, 1, 1], [1, 1, 1]]]) sizes = torch.Size([1, 3, 4]) @@ -1563,7 +1560,7 @@ def do_test(t): y = t.new().resize_as_(t).zero_() self.assertEqual(y.shape, t.shape) # Check that y can be added to t. Currently, this requires that - # _sparseDims and _denseDims match. + # sparse_dim and dense_dim match. self.assertEqual(t, t + y) do_test(self.SparseTensor()) @@ -1585,8 +1582,8 @@ def _test_resize_shape(self, x_i, x_v, x_size, y_i, y_v, y_size): x.resize_as_(y) x_dense.resize_as_(y_dense) self.assertEqual(x.shape, y.shape) - self.assertEqual(x._sparseDims(), y._sparseDims()) - self.assertEqual(x._denseDims(), y._denseDims()) + self.assertEqual(x.sparse_dim(), y.sparse_dim()) + self.assertEqual(x.dense_dim(), y.dense_dim()) self.assertEqual(x.shape, x_dense.shape) self.assertEqual(y.shape, y_dense.shape) # Here we make sure that the original data are preserved after resizing diff --git a/tools/autograd/derivatives.yaml b/tools/autograd/derivatives.yaml index f0e87e85f3c7a6..e0d46634924278 100644 --- a/tools/autograd/derivatives.yaml +++ b/tools/autograd/derivatives.yaml @@ -8,6 +8,12 @@ # Note that a single gradient entry can specify the gradient # formula for multiple input names, by specifying a key # "input1, input2" (see atan2 for an example). +# - Optional entry with key 'output_differentiability' and value a list of the +# same length as the number of outputs from the forward function. The list +# should contain only booleans, specifying whether each of the output Tensor +# is differentiable. +# If None of the output is differentiable, you can also add the function +# name to `gen_variable_type.py`'s `DONT_REQUIRE_DERIVATIVE` list. # # If a function has out-of-place and in-place variants, then the derivative # definition for the in-place variant is optional. It will default to the @@ -23,13 +29,16 @@ # # When a function returns multiple *differentiable* outputs, # you can refer to the gradients of each outputs using 'grads', -# e.g., 'grads[0]', 'grads[1]' +# e.g., 'grads[0]', 'grads[1]'. # # When a function returns *one* differentiable output (the # first output) and some more nondifferentiable outputs, # you MUST refer to the gradient of the differentiable output with # 'grad' (this case is special-cased in our code generation). # +# Note that the number of differentibale outputs can be modified by the +# 'output_differentiability' entry (see above). +# # - Any of the input arguments, tensor or non-tensor, including # argument names that only appear in Declarations.cwrap, e.g. 'output'. # @@ -189,6 +198,9 @@ - name: clone(Tensor self) self: grad +- name: coalesce(Tensor self) + self: grad + - name: cos(Tensor self) self: grad * -self.sin() @@ -321,6 +333,12 @@ self: gesv_backward_self(grad, self, A) A: gesv_backward_A(grad, self, A, result0) +- name: indices(Tensor self) + output_differentiability: [False] + +- name: _indices(Tensor self) + output_differentiability: [False] + - name: grid_sampler_2d(Tensor input, Tensor grid, int64_t interpolation_mode, int64_t padding_mode) input, grid: grid_sampler_2d_backward(grad, input, grid, interpolation_mode, padding_mode) @@ -785,12 +803,23 @@ self: not_implemented("sparse_mask") mask: not_implemented("sparse_mask") +- name: _sparse_coo_tensor_with_dims_and_tensors(int64_t sparse_dim, int64_t dense_dim, IntList size, Tensor indices, Tensor values, TensorOptions options) + values: sparse_constructor_values_backward(grad, indices, values.sizes()) + - name: _standard_gamma(Tensor self, Generator generator) self: grad * _standard_gamma_grad(self, result) - name: _standard_gamma_grad(Tensor self, Tensor output) self: not_implemented("_standard_gamma_grad") +- name: values(Tensor self) + self: at::_sparse_coo_tensor_unsafe(self.indices(), grad, self.sizes())._coalesced_(true); + +# Why is _values() not differentiable? +# See NOTE [ Sparse: autograd and API ] +- name: _values(Tensor self) + output_differentiability: [False] + # NN - name: _trilinear(Tensor i1, Tensor i2, Tensor i3, IntList expand1, IntList expand2, IntList expand3, IntList sumdim, int64_t unroll_dim) i1, i2, i3: _trilinear_backward(grad, i1, i2, i3, expand1, expand2, expand3, sumdim, unroll_dim, grad_input_mask) @@ -1234,7 +1263,11 @@ save_var: not_implemented("cudnn_batch_norm_backward save_var") input, weight, grad_output: batchnorm_double_backward(input, weight, grads[0], grads[1], grads[2], grad_output, running_mean, running_var, true, epsilon, save_mean, save_var, grad_input_mask) + +# Only frst three of _cudnn_rnn outputs can have gradients. +# _cudnn_rnn outputs: (output, hy, cy, reserve, weight_buf) - name: _cudnn_rnn(Tensor input, TensorList weight, int64_t weight_stride0, Tensor weight_buf, Tensor hx, Tensor cx, int64_t mode, int64_t hidden_size, int64_t num_layers, bool batch_first, double dropout, bool train, bool bidirectional, IntList batch_sizes, Tensor dropout_state) + output_differentiability: [True, True, True, False, False] input, hx, cx, weight: "_cudnn_rnn_backward(input, weight, weight_stride0, result4, hx, cx, result0, grads[0], grads[1], grads[2], mode, hidden_size, num_layers, batch_first, dropout, train, bidirectional, batch_sizes, dropout_state, retain_variables ? result3.clone() : result3, grad_input_mask)" # miopen @@ -1274,7 +1307,11 @@ tensors: unbind(grad, dim) # fused RNN kernels + +# Only frst two of _thnn_fused_lstm_cell outputs can have gradients. +# _thnn_fused_lstm_cell outputs: (hy, cy, workspace) - name: _thnn_fused_lstm_cell(Tensor input_gates, Tensor hidden_gates, Tensor cx, Tensor input_bias, Tensor hidden_bias) + output_differentiability: [True, True, False] input_gates, hidden_gates, cx, input_bias, hidden_bias: _thnn_fused_lstm_cell_backward(grads[0], grads[1], cx, result1, result2, input_bias.defined()) - name: _thnn_fused_gru_cell(Tensor input_gates, Tensor hidden_gates, Tensor hx, Tensor input_bias, Tensor hidden_bias) diff --git a/tools/autograd/gen_autograd.py b/tools/autograd/gen_autograd.py index af62f0ad489036..e09bc810c3f8ad 100644 --- a/tools/autograd/gen_autograd.py +++ b/tools/autograd/gen_autograd.py @@ -14,29 +14,35 @@ from collections import defaultdict from .utils import YamlLoader, split_name_params +# See NOTE [ Autograd View Variables ] in variable.h for details. +# A map: function name => two options: +# 1. name of the argument that all outputs are view of +# 2. map: output idx => name of the argument that this result is view of VIEW_FUNCTIONS = { - 'alias', 'as_strided', 'diagonal', 'expand', 'narrow', 'permute', 'select', 'slice', - 'squeeze', 't', 'transpose', 'unfold', 'unsqueeze', 'view', 'unbind', -} - -# In principle this should live in derivatives.yaml, but I could not -# think of a good syntax for it -HARDCODED_DIFFERENTIABLE_OUTPUTS = { - # Suppose that 'foo' is a function for which outputs 0 and 1 are - # differentiable, and 2 is not. Then you would write: - # 'foo': (0, 1), - '_cudnn_rnn': (0, 1, 2), - # _cudnn_rnn outputs: - # 0 => output - # 1 => hy - # 2 => cy - # 3 => reserve - # 4 => weight_buf - '_thnn_fused_lstm_cell': (0, 1), - # _thnn_fused_lstm_cell outputs: - # 0 => hy - # 1 => cy - # 2 => workspace + 'alias': 'self', + 'as_strided': 'self', + 'diagonal': 'self', + 'expand': 'self', + 'narrow': 'self', + 'permute': 'self', + 'select': 'self', + 'slice': 'self', + 'squeeze': 'self', + 't': 'self', + 'transpose': 'self', + 'unfold': 'self', + 'unsqueeze': 'self', + 'view': 'self', + 'unbind': 'self', + '_indices': 'self', + '_values': 'self', + 'indices': 'self', + 'values': 'self', + # sparse_coo ctor output should really be views of both indices and values, + # but we only supports making as view of a single varible, and indices is + # discrete anyways. + # FIXME: clone indices on construction. + 'sparse_coo_tensor_with_dims_and_tensors': 'values', } diff --git a/tools/autograd/gen_python_functions.py b/tools/autograd/gen_python_functions.py index b1746e47bd5113..5376d1b6384907 100644 --- a/tools/autograd/gen_python_functions.py +++ b/tools/autograd/gen_python_functions.py @@ -19,8 +19,7 @@ SKIP_PYTHON_BINDINGS = [ 'alias', 'contiguous', 'is_cuda', 'is_sparse', 'size', 'stride', '.*_backward', '.*_backward_(out|input|weight|bias)', '.*_forward', - '.*_forward_out', '_unsafe_view', 'tensor', - 'sparse_coo_tensor', 'th_sparse_coo_tensor', 'native_sparse_coo_tensor', + '.*_forward_out', '_unsafe_view', 'tensor', '_?sparse_coo_tensor.*', '_arange.*', '_range.*', '_linspace.*', '_logspace.*', '_sparse_add.*', '_sparse_div.*', '_sparse_mul.*', '_sparse_sub.*', 'index', diff --git a/tools/autograd/gen_variable_type.py b/tools/autograd/gen_variable_type.py index 26a2a9539efbc2..16af5c65d04a95 100644 --- a/tools/autograd/gen_variable_type.py +++ b/tools/autograd/gen_variable_type.py @@ -26,7 +26,7 @@ import os import sys from .utils import CodeTemplate, nested_dict, write, uninplace_api_name -from .gen_autograd import VIEW_FUNCTIONS, HARDCODED_DIFFERENTIABLE_OUTPUTS +from .gen_autograd import VIEW_FUNCTIONS from .gen_autograd_functions import uses_single_grad @@ -43,7 +43,9 @@ DONT_RECORD_TRACE = { 'convolution', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d', 'conv_transpose2d', 'conv_transpose3d', 'lstm_cell', 'gru_cell', - 'rnn_tanh_cell', 'rnn_relu_cell', 'linear' + 'rnn_tanh_cell', 'rnn_relu_cell', 'linear', + # FIXME: figure out a better way when we support sparse tensors in jit + '_coalesced_', } # These functions have their names recorded under trace renamed, @@ -71,13 +73,13 @@ # tensors that have requires_grad=False. In-place functions listed here will # not examine or modify requires_grad or grad_fn. DONT_REQUIRE_DERIVATIVE = { - # These only depend on the input Tensor's shape and device, not the data + # These only depend on the input Tensor's shape and device, not the data 'ones_like', 'zeros_like', 'rand_like', 'randn_like', - # Tensor constructors - 'sparse_coo_tensor', 'th_sparse_coo_tensor', 'native_sparse_coo_tensor', # These are only implemented on integral types '__and__', '__iand__', '__ilshift__', '__ior__', '__irshift__', '__ixor__', '__lshift__', '__or__', '__rshift__', '__xor__', + # This is an unsafe method that is meant to be out of reach of autograd. + '_coalesced_', } METHOD_DECLARATION = CodeTemplate("""\ @@ -180,7 +182,7 @@ def should_trace(declaration): return False name = declaration['name'] base_name = name[:-1] if declaration['inplace'] else name[:-4] if name.endswith('_out') else name - if base_name in DONT_RECORD_TRACE: + if base_name in DONT_RECORD_TRACE or name in DONT_RECORD_TRACE: return False # We need to disable these because their inner implementations implement # broadcasting, and if we trace them top level we will lose the expand nodes. @@ -313,7 +315,7 @@ def emit_body(declaration): returns_void = len(returns) == 1 and returns[0]['type'] == 'void' base_name = name[:-1] if inplace else name[:-4] if is_out_fn else name - is_view = base_name in VIEW_FUNCTIONS + view_info = VIEW_FUNCTIONS.get(base_name, None) # These exclude things like BoolTensor, int64_t, and Scalar def is_differentiable(arg): @@ -329,18 +331,19 @@ def is_differentiable(arg): differentiable_inputs = list(filter(is_differentiable, inputs)) candidate_differentiable_outputs = list(filter(is_differentiable, returns)) - hardcoded_diff = HARDCODED_DIFFERENTIABLE_OUTPUTS.get(name) - if hardcoded_diff: + if func is not None and func.get('output_differentiability') is not None: differentiable_outputs = [] - for i in hardcoded_diff: - differentiable_outputs.append(candidate_differentiable_outputs[i]) + output_differentiability = func.get('output_differentiability') + for differentiable, output in zip(output_differentiability, returns): + if differentiable: + differentiable_outputs.append(output) elif uses_single_grad(func): differentiable_outputs = candidate_differentiable_outputs[:1] else: differentiable_outputs = candidate_differentiable_outputs requires_derivative = ( - base_name not in DONT_REQUIRE_DERIVATIVE and + base_name not in DONT_REQUIRE_DERIVATIVE and name not in DONT_REQUIRE_DERIVATIVE and len(differentiable_inputs) > 0 and len(differentiable_outputs) > 0 and strategy == 'use_derived') @@ -455,24 +458,77 @@ def declare_returned_variables(): return '\n'.join(names) def wrap_output(call): + # Returns a 2-tuple `(wrapped_call, extra_wrapping_stmts)`, where + # `wrapped_call` is to drop-in replace `call`, and + # `extra_wrapping_stmts` is a list of extra statements to run after + # `call`. if 'Tensor' not in declaration['return_type']: - return call - elif is_view: - return 'as_view(self, {})'.format(call) + return call, [] + elif view_info is not None: + # See NOTE [ Autograd View Variables ] in variable.h for details. + differentiable_output_vars = {r['name'] for r in differentiable_outputs} + tensor_output_vars = {r['name'] for r in returns if 'Tensor' in r['type']} + if not isinstance(view_info, dict): + if len(differentiable_output_vars) == len(tensor_output_vars): + # all outputs are differentiable + return 'as_view({}, {}, true)'.format(view_info, call), [] + elif len(differentiable_output_vars) == 0: + # no output is differentiable + return 'as_view({}, {}, false)'.format(view_info, call), [] + else: + # some of the outputs are differentiable + # need to expand to dict mode, i.e., one entry per output + base_name = view_info + view_info_dict = {} + for i, return_info in enumerate(returns): + if 'Tensor' in return_info['type']: + view_info_dict[i] = base_name + else: + view_info_dict = view_info + + def wrap_view_single(output_var, base_var): + fmt = '{output_var} = as_view({base_var}, {output_var}, {is_differentiable});' + if output_var in differentiable_output_vars: + # If `GradMode::is_enabled()` is False, this is a + # non-differentiable view. Gradients should not flow through. + is_differentiable = 'true' + else: + # This output is non-differentiable, so it is a + # non-differentiable view. Gradients should not flow through. + is_differentiable = 'false' + return fmt.format(output_var=output_var, base_var=base_var, + is_differentiable=is_differentiable) + + extra_wrapping_stmts = [] + for output_idx, return_info in enumerate(returns): + if 'Tensor' not in return_info['type']: + assert output_idx not in view_info_dict, 'Can not wrap non-Tensor output as a view' + continue + output_var = return_info['name'] + if output_idx in view_info_dict: + stmt = wrap_view_single(output_var, view_info_dict[output_idx]) + elif 'Tensor' in return_info['type']: + stmt = '{output_var} = as_variable({output_var});'.format(output_var=output_var) + extra_wrapping_stmts.append(stmt) + return call, extra_wrapping_stmts else: - return 'as_variable({})'.format(call) + return 'as_variable({})'.format(call), [] def emit_call(env): combined = nested_dict(env, declaration) + extra_wrapping_stmts = [] if strategy == 'use_derived': call = CALL_VIA_DERIVED.substitute(combined) if not modifies_arguments: - call = wrap_output(call) + call, extra_wrapping_stmts = wrap_output(call) else: call = CALL_VIA_TYPE.substitute(declaration) if not modifies_arguments and not returns_void: call = '{} = {}'.format(tie_return_values(), call) - return call + ';' + call = call + ';' + for stmt in extra_wrapping_stmts: + call += '\n' + stmt + return call def tie_return_values(): if len(declaration['returns']) == 1: @@ -497,7 +553,7 @@ def get_return_value(): return 'std::make_tuple({})'.format(', '.join(moved)) def emit_history(): - fn = 'rebase' if modifies_arguments and not is_view else 'set' + fn = 'rebase' if modifies_arguments and view_info is None else 'set' output_names = [r['name'] for r in differentiable_outputs] # TODO: flatten allocates a std::vector, which could be expensive outs = CodeTemplate("flatten_tensor_args( ${outs} )").substitute(outs=output_names) @@ -612,7 +668,8 @@ def dispatch_strategy(declaration): get dispatched back to VariableType (which will ensure that they are differentiable.) """ - if (declaration['abstract'] or declaration['derivative'] is not None): + if (declaration['abstract'] or declaration['requires_tensor'] or + declaration['derivative'] is not None): # If the function is abstract (not implemented on at::Type), we must # call the implementation on the derived type with unpacked tensors. @@ -631,6 +688,6 @@ def dispatch_strategy(declaration): else: # If the function is concrete (we don't have to override it) and we # didn't declare it in derivatives.yaml, we'll assume that it is - # actually implemented out of differentiable functions. (This + # actually implemented out of differentiable functions. (This # assumption might not hold, but then you'll see gradcheck fail.) return 'use_type' diff --git a/tools/autograd/load_derivatives.py b/tools/autograd/load_derivatives.py index ef4ef8d9acbb58..c2ede2d9d7b6d4 100644 --- a/tools/autograd/load_derivatives.py +++ b/tools/autograd/load_derivatives.py @@ -9,7 +9,6 @@ import yaml from .utils import YamlLoader from .utils import IDENT_REGEX, split_name_params -from .gen_autograd import HARDCODED_DIFFERENTIABLE_OUTPUTS def load_derivatives(path, declarations): @@ -30,7 +29,8 @@ def load_derivatives(path, declarations): # How do you feel about pasting declaration inside autograd function... -def create_autograd_function(name, derivatives, args_with_gradients, signature, declaration): +def create_autograd_function(name, derivatives, args_with_gradients, signature, + declaration, output_differentiability): op = to_camel_case(name) + 'Backward' op = op.replace('ForwardBackward', 'Backward') return { @@ -42,6 +42,7 @@ def create_autograd_function(name, derivatives, args_with_gradients, signature, 'derivatives': derivatives, 'saved_inputs': all_saved_variables(derivatives, 'saved_inputs'), 'saved_outputs': all_saved_variables(derivatives, 'saved_outputs'), + 'output_differentiability': output_differentiability, } @@ -128,36 +129,6 @@ def check_grad_usage(defn_name, declaration, derivatives): "otherwise, there is a likely error in your derivatives " "declaration.".format(defn_name)) - hardcoded_diff = HARDCODED_DIFFERENTIABLE_OUTPUTS.get(defn_name) - if hardcoded_diff: - if used_grad: - raise RuntimeError("Derivative definition {} has hard-coded differentiable " - "outputs in gen_autograd.py, but used grad (which implies " - "only the first output is differentiable) in its " - "derivative declaration. You likely meant to write " - "grads[i] for some i instead.".format(defn_name)) - if only_used_grads_indices and set(used_grads_indices) != set(hardcoded_diff): - raise RuntimeError("Derivative definition {} has hard-coded differentiable " - "outputs {}, but the used grads in the derivative " - "definitions are only {}. Either your derivatives " - "declaration is wrong, or the value of " - "HARDCODED_DIFFERENTIABLE_OUTPUTS in gen_autograd.py " - "is wrong.".format(defn_name, hardcoded_diff, - used_grads_indices)) - else: - if fully_implemented and not used_grad and \ - used_grads and only_used_grads_indices and \ - set(used_grads_indices) != set(range(len(declaration['returns']))): - raise RuntimeError("Derivative definition of {} in derivatives.yaml does " - "not refer to the gradients of all of its outputs. Either " - "the derivatives declaration is wrong, OR you have some " - "non-differentiable outputs. If you have a single " - "differentiable output, make it the first output in ATen " - "and reference its gradient with 'grad'; otherwise, hard " - "code the list of differentiable outputs in " - "HARDCODED_DIFFERENTIABLE_OUTPUTS in gen_autograd.py." - .format(defn_name)) - def set_up_derivatives(defn_name, defn, declaration): # Determine the set of inputs which have gradients args_with_gradients_set = set() @@ -188,6 +159,9 @@ def unzip(xs): # NB: Removes 'name' from defn dictionary defn_name, params = split_name_params(defn.pop('name')) + # NB: Removes 'output_differentiability' from defn dictionary + # `None` means all differentiable. + output_differentiability = defn.pop('output_differentiability', None) param_types, param_names = unzip([p.split(' ') for p in params if p != '*']) if 'grad_input_mask' in param_names: raise RuntimeError("Signature for {} has an argument named grad_input_mask, " @@ -221,7 +195,8 @@ def unzip(xs): .format(i, defn_name, x, y)) derivatives, args_with_gradients = set_up_derivatives(defn_name, defn, canonical) - return create_autograd_function(defn_name, derivatives, args_with_gradients, signature, canonical) + return create_autograd_function(defn_name, derivatives, args_with_gradients, + signature, canonical, output_differentiability) def ensure_unique_names(autograd_functions): diff --git a/tools/autograd/templates/Functions.cpp b/tools/autograd/templates/Functions.cpp index 68197d7d825151..2752a156b35ecd 100644 --- a/tools/autograd/templates/Functions.cpp +++ b/tools/autograd/templates/Functions.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -874,15 +875,15 @@ Tensor softplus_double_backward(const Tensor & grad, const Tensor & input, Scala } -// NOTE [ as_strided Backward ] +// NOTE [ as_strided Backward and layout-aware/agnostic autograd ] // // `storage_offset` is ignored for simplicity in this note. If you just want the // full algorithm without explanation, scroll down to bottom of this note. // // Implementing the backward of as_strided is tricky because you have to deal -// with mappings that maps one memory location to multiple indices, i.e., the -// output tensor indices pointing to **overlapping** memory addresses. This can -// happen in all in all sorts of weird cases. For example, +// with mappings that map one memory location to multiple indices, i.e., the +// output tensor has multiple indices pointing to **overlapping** memory +// addresses. This can happen in all in all sorts of weird cases. For example, // // x = torch.randn(15) // x.as_strided([3, 3], [1, 0]) # "expand" case @@ -891,7 +892,7 @@ Tensor softplus_double_backward(const Tensor & grad, const Tensor & input, Scala // # res[0, 1] points to 0*3 + 1*6 = 6 // // Here is the general strategy we apply in implementing as_strided backward: -// 0. ??? (optimizaiont step. we will talk about this later) +// 0. ??? (optimization step. we will talk about this later) // 1. Create some underlying flattened tensor as if it is the base tensor // representing the contiguous memory storage for both input and output. // 2. Use the output geometry to scatter (or index_add) the gradients into @@ -902,7 +903,7 @@ Tensor softplus_double_backward(const Tensor & grad, const Tensor & input, Scala // // In step (2), if the output tensor does't have overlapping memory, we can // safely scatter (`storage.as_strided(output_geometry).copy_(grad)`); -// otherwise, we must use `index_add` as gradient at different indices may need +// otherwise, we must use `index_add` as gradients at different indices may need // to be summed to a single location. // // For example, in this case: @@ -933,10 +934,10 @@ Tensor softplus_double_backward(const Tensor & grad, const Tensor & input, Scala // x = t.expand(3, 3) # input with overlapping memory // # size [3, 3] // # stride [0, 1] -// y = x.as_strided([3], [1]) # contiguous output -// # size [3] +// y = x.as_strided([1], [1]) # contiguous output +// # size [1] // # stride [1] -// y.backward() # step (1): contiguous storagte tensor `s` of size 3, which +// y.backward() # step (1): contiguous storage tensor `s` of size 3, which // is large enough to be used as underlying storage // for `x` and `y`. // s = [ 0, 0, 0] @@ -969,17 +970,17 @@ Tensor softplus_double_backward(const Tensor & grad, const Tensor & input, Scala // In the above computation of numerical gradients, they only match the // analytical results because strides and memory locations are considered in the // forward pass, i.e., this op (including both forward and backward) is -// stride-aware. +// layout-aware. // -// However, most (probably all) other ops (forward and backward) are -// stride-agnostic. E.g., +// However, in PyTorch, most (probably all) other ops (forward and backward) are +// layout-agnostic. E.g., // // t = torch.randn(1) // x = t.expand(2) // y = x.sum() // y.backward() // -// Stride-agnostic autograd (as it is currently in PyTorch) will give you +// Layout-agnostic autograd (as it is currently in PyTorch) will give you // // gy = 1 // gx = [ 1, 1] # SumBackward: torch.ones_like(x) @@ -989,28 +990,30 @@ Tensor softplus_double_backward(const Tensor & grad, const Tensor & input, Scala // (the other will also change by `delta`), `y` will change by `2 * delta`. So // the gradients, if strides are taken into consideration, should be 2. // -// Stride-aware autograd should give you +// Layout-aware autograd should give you // // gy = 1 // gx = [ 2, 2] # Because the backward considers the fact that the input `x` // # is already expanded. -// gt = [ 2] # Stride-aware backward of expand is just a slicing because +// gt = [ 2] # Layout-aware backward of expand is just a slicing because // # the previous backward should have already taken care of // # strides and made sure that gradients are the same along the // # expanded dimension. // // As shown above, these two types are not compatible. Therefore, we must either -// make as_strided stride-agnostic, or make all other ops stride-aware. +// make as_strided layout-agnostic, or make all other ops layout-aware. // -// It is unrealisitc to support stride-aware autograd (at least in the current -// structure), because it would mean +// It is difficult to support layout-aware autograd (at least in the current +// codebase structure), because it would mean // 1. storing tensor geometries of every input tensor for backward // 2. depending on input geometry, the gradient computed from backward change // 3. ideally enforcing gradient of T to always have same strides as T // (although these two methods only differ when it comes to overlapping memory) // -// To formulate `as_strided(input, size, stride)` in a stride-agnostic way, we -// consider `input.stride()` as a separate independent arguement `input_stride`: +// Therefore, we must formulate `as_strided` in a layout-agnostic way, i.e., +// giving the same output regardless of the input layout. We consider +// `input.stride()` as a separate independent fixed argument `input_stride`. +// Then, `as_strided(input, size, stride)` can be thought of as: // 1. "Scatter" each value of `input` into a "storage" using storage location // computed from the value's index in `input`, `input.size()` and // `input_stride`, but if N values end up in the same location, the value @@ -1020,7 +1023,9 @@ Tensor softplus_double_backward(const Tensor & grad, const Tensor & input, Scala // Denote the set of all input indices that pointing to the same storage // location `storage[n]` as `S(n)`, i.e., // -// S(n) = { index : index @ input_stride == n, index is valid given input.size() } +// S(n) = { index : == n, index is valid given input.size() }, +// +// where `` is the dot product between `x` and `y`. // // Then, the process is: // @@ -1028,7 +1033,9 @@ Tensor softplus_double_backward(const Tensor & grad, const Tensor & input, Scala // // Note that all values in `S(n)` are the same (they point to the same // memory location anyways, so this step doesn't change anything, but -// effectively avoids using `input.stride()`. +// effectively avoids having the denpendency on the layout of `input`. +// I.e., the result holds fixed regardless of the layout of `input`, as +// long as `input_stride` is fixed. // // NOTE: for forward pass, we can equivalently simply selet any one of // `S(n)` as `storage[n]`. However, cosnidering this as an average @@ -1038,7 +1045,7 @@ Tensor softplus_double_backward(const Tensor & grad, const Tensor & input, Scala // 2. As usual, return the as_strided view of `storage` using required output // `size` and `stride`. // -// To backward through this stride-agnostic version, we simply add the following +// To backward through this layout-agnostic version, we simply add the following // step: // .... (scatter gradients into the storage tensor using output geometry) // 3. For all storage location n, `storage[n] /= |S(n)|`. @@ -1311,13 +1318,13 @@ static inline int64_t _min_storage_size(IntList sizes, IntList strides, int64_t return storage_size; } -// See NOTE [ as_strided Backward ] for explanation +// See NOTE [ as_strided Backward and layout-aware/agnostic autograd ] for explanation Tensor as_strided_backward(Tensor grad, TensorGeometry input_geometry, IntList sizes, IntList strides, int64_t storage_offset) { // For output geometry, // check for size 0 dimensions, // skip size 1 dimensions, // reduce grad on expanded dims (stride=0, size>1) - // Step (0) for the algorithm in NOTE [ as_strided Backward ] + // Step (0) for the algorithm in NOTE [ as_strided Backward and layout-aware/agnostic autograd ] // Step (0)~(1) for the algorithm in NOTE [ Detecting Memory Overlap Within A Strided Tensor ] // on output geometry auto odim = grad.dim(); @@ -1368,7 +1375,7 @@ Tensor as_strided_backward(Tensor grad, TensorGeometry input_geometry, IntList s // Rest of this function implements - // Step (1)~(4) for the algorithm in NOTE [ as_strided Backward ] + // Step (1)~(4) for the algorithm in NOTE [ as_strided Backward and layout-aware/agnostic autograd ] // TODO: Raise if not all output values are visible in input geometry. // Technically speaking, if you treat those values as constants, not // raising is fine, and mathematically correct. However, these values @@ -1964,8 +1971,8 @@ std::tuple batchnorm_double_backward( } std::tuple _trilinear_backward(const Tensor& grad_out, const Tensor& i1, const Tensor& i2, const Tensor& i3, - IntList expand1, IntList expand2, IntList expand3, - IntList sumdim, int64_t unroll_dim, std::array grad_mask) { + IntList expand1, IntList expand2, IntList expand3, + IntList sumdim, int64_t unroll_dim, std::array grad_mask) { Tensor grad_i1, grad_i2, grad_i3; if (grad_mask[0]) grad_i1 = at::_trilinear(grad_out, i2, i3, sumdim, expand2, expand3, expand1); @@ -1987,6 +1994,17 @@ Tensor log1p_backward(const Tensor& grad, const Tensor& self) { return grad / (self + 1); } +Tensor sparse_constructor_values_backward(const Tensor& sparse_grad_out, const Tensor& indices, IntList values_shape) { + // TODO: improve this backward by writing a kernel (maybe) + auto dense_grad = sparse_grad_out.is_sparse() ? sparse_grad_out.to_dense() : sparse_grad_out; + auto full_size = sparse_grad_out.sizes(); + auto flattened_grad_shape = values_shape.vec(); + flattened_grad_shape[0] = at::prod_intlist(full_size.slice(0, indices.size(0))); + auto flattened_dense_grad = dense_grad.view(flattened_grad_shape); + auto flattened_indices = at::sparse::flatten_indices(indices, full_size); + return flattened_dense_grad.index_select(0, flattened_indices); +} + } // anonymous namespace ${autograd_function_definitions} diff --git a/torch/_tensor_docs.py b/torch/_tensor_docs.py index d5a876e4ca83e6..3e3114841a8e15 100644 --- a/torch/_tensor_docs.py +++ b/torch/_tensor_docs.py @@ -662,6 +662,17 @@ def add_docstr_all(method, docstr): Returns the address of the first element of :attr:`self` tensor. """) +add_docstr_all('dense_dim', + r""" +dense_dim() -> int + +If :attr:`self` is a sparse COO tensor (i.e., with ``torch.sparse_coo`` layout), +this returns a the number of dense dimensions. Otherwise, this throws an +error. + +See also :meth:`Tensor.sparse_dim`. +""") + add_docstr_all('diag', r""" diag(diagonal=0) -> Tensor @@ -980,9 +991,24 @@ def add_docstr_all(method, docstr): See :func:`torch.gesv` """) +add_docstr_all('indices', + r""" +indices() -> Tensor + +If :attr:`self` is a sparse COO tensor (i.e., with ``torch.sparse_coo`` layout), +this returns a view of the contained indices tensor. Otherwise, this throws an +error. + +See also :meth:`Tensor.values`. + +.. note:: + This method can only be called on a coalesced sparse tensor. See + :meth:`Tensor.coalesce` for details. +""") + add_docstr_all('get_device', r""" -get_device(A) -> Device ordinal (Integer) +get_device() -> Device ordinal (Integer) For CUDA tensors, this function returns the device ordinal of the GPU on which the tensor resides. For CPU tensors, an error is thrown. @@ -995,6 +1021,21 @@ def add_docstr_all(method, docstr): >>> x.cpu().get_device() # RuntimeError: get_device is not implemented for type torch.FloatTensor """) +add_docstr_all('values', + r""" +values() -> Tensor + +If :attr:`self` is a sparse COO tensor (i.e., with ``torch.sparse_coo`` layout), +this returns a view of the contained values tensor. Otherwise, this throws an +error. + +See also :meth:`Tensor.indices`. + +.. note:: + This method can only be called on a coalesced sparse tensor. See + :meth:`Tensor.coalesce` for details. +""") + add_docstr_all('gt', r""" gt(other) -> Tensor @@ -1462,7 +1503,7 @@ def callable(a, b) -> number Same as :meth:`Tensor.narrow` except returning a copy rather than shared storage. This is primarily for sparse tensors, which do not have a shared-storage narrow method. Calling ```narrow_copy`` -with ```dimemsion > self._sparseDims()``` will return a copy with the +with ```dimemsion > self.sparse_dim()``` will return a copy with the relevant dense dimension narrowed, and ```self.shape``` updated accordingly. """) @@ -2094,6 +2135,17 @@ def callable(a, b) -> number See :func:`torch.sort` """) +add_docstr_all('sparse_dim', + r""" +sparse_dim() -> int + +If :attr:`self` is a sparse COO tensor (i.e., with ``torch.sparse_coo`` layout), +this returns a the number of sparse dimensions. Otherwise, this throws an +error. + +See also :meth:`Tensor.dense_dim`. +""") + add_docstr_all('sqrt', r""" sqrt() -> Tensor diff --git a/torch/csrc/autograd/VariableTypeUtils.h b/torch/csrc/autograd/VariableTypeUtils.h index d28149e1282571..07cc420e429875 100644 --- a/torch/csrc/autograd/VariableTypeUtils.h +++ b/torch/csrc/autograd/VariableTypeUtils.h @@ -103,21 +103,24 @@ template inline variable_list flatten_tensor_args(Args&&... ar return out; // RVO } -inline Tensor as_view(const Tensor & base, Tensor tensor) { +// See NOTE [ Autograd View Variables ] for details. +inline Tensor as_view(const Tensor & base, Tensor tensor, bool is_differentiable = true) { auto base_var = Variable(base); if (base_var.is_view()) { base_var = base_var.base(); } - return make_variable_view(std::move(base_var), std::move(tensor)); + return make_variable_view(std::move(base_var), std::move(tensor), is_differentiable); } -inline std::vector as_view(const Tensor & base, std::vector tensors) { +// See NOTE [ Autograd View Variables ] for details. +inline std::vector as_view(const Tensor & base, std::vector tensors, + bool is_differentiable = true) { auto base_var = Variable(base); if (base_var.is_view()) { base_var = base_var.base(); } for(Tensor &tensor : tensors) { - tensor = make_variable_view(base_var, std::move(tensor)); + tensor = make_variable_view(base_var, std::move(tensor), is_differentiable); } return tensors; } diff --git a/torch/csrc/autograd/functions/tensor.h b/torch/csrc/autograd/functions/tensor.h index 101595368076e9..11c447cb8bae38 100644 --- a/torch/csrc/autograd/functions/tensor.h +++ b/torch/csrc/autograd/functions/tensor.h @@ -22,6 +22,10 @@ struct CopyBackwards : public Function { // Performs grad[idx] = fn(grad[idx]), but out-of-place. The slicing operation // grad[idx] is defined by the relative sizes, strides, and offset of base and // view. +// When an in-place operation is done on a differentiable view, the base's +// grad_fn is updated to become a `CopySlice` wrapping the backward of the +// in-place operation. +// See NOTE [ Autograd View Variables ]. struct CopySlices : public Function { CopySlices( const Variable& base_var, diff --git a/torch/csrc/autograd/variable.cpp b/torch/csrc/autograd/variable.cpp index c36a52a71a279a..74a58f8454bd96 100644 --- a/torch/csrc/autograd/variable.cpp +++ b/torch/csrc/autograd/variable.cpp @@ -115,12 +115,6 @@ std::shared_ptr Variable::Impl::get_grad_accumulator() { return result; } -Variable Variable::Impl::detach() const { - auto detached = make_variable(data_, /*requires_grad=*/false); - detached.set_version_counter(version_counter_); - return detached; -} - void Variable::Impl::detach_() { if (is_view_) { AT_ERROR("Can't detach views in-place. Use detach() instead"); @@ -172,7 +166,7 @@ void Variable::Impl::release_resources() { hooks_.clear(); } -Variable::ViewImpl::ViewImpl(Variable base, at::Tensor data, Edge gradient_edge) +Variable::DifferentiableViewImpl::DifferentiableViewImpl(Variable base, at::Tensor data, Edge gradient_edge) : Variable::Impl(std::move(data), false, std::move(gradient_edge)), base_(std::move(base)) { AT_CHECK(base_.defined(), "base is undefined"); @@ -184,7 +178,7 @@ Variable::ViewImpl::ViewImpl(Variable base, at::Tensor data, Edge gradient_edge) attr_version = version_counter_.current_version(); } -std::shared_ptr& Variable::ViewImpl::get_grad_fn() { +std::shared_ptr& Variable::DifferentiableViewImpl::get_grad_fn() { std::lock_guard lock(mutex_); if (!grad_fn_ && !base_.requires_grad()) { return grad_fn_; @@ -208,7 +202,7 @@ std::shared_ptr& Variable::ViewImpl::get_grad_fn() { return grad_fn_; } -void Variable::ViewImpl::rebase_history(Edge gradient_edge) { +void Variable::DifferentiableViewImpl::rebase_history(Edge gradient_edge) { AT_ASSERT(gradient_edge.input_nr == 0); AT_ASSERT(gradient_edge.function); AT_CHECK( @@ -221,7 +215,7 @@ void Variable::ViewImpl::rebase_history(Edge gradient_edge) { get_grad_fn(); // trigger an update to the view's grad_fn } -void Variable::ViewImpl::release_resources() { +void Variable::DifferentiableViewImpl::release_resources() { Variable::Impl::release_resources(); base_.reset(); } @@ -229,7 +223,7 @@ void Variable::ViewImpl::release_resources() { void Variable::rebase_history(Edge gradient_edge) { AT_ASSERT(gradient_edge.function != nullptr); if (is_view()) { - auto& impl = static_cast(*get()); + auto& impl = static_cast(*get()); impl.rebase_history(std::move(gradient_edge)); } else { set_gradient_edge(std::move(gradient_edge)); diff --git a/torch/csrc/autograd/variable.h b/torch/csrc/autograd/variable.h index 804be552392d83..48cc918ca288c9 100644 --- a/torch/csrc/autograd/variable.h +++ b/torch/csrc/autograd/variable.h @@ -61,8 +61,10 @@ struct Function; /// `Variable`. You can determine whether `Variable` is in fact a view by /// probing its `is_view()` method. Note that the *view* semantics are only /// meaningful for `Variable` relations that are relevant to autograd. For -/// example, if you hide your code from autograd using `.data`, the `Variable`s -/// will not be registered as having view relations, even if they share storage. +/// example, if you hide your code from autograd using `.no_grad()`, the +/// `Variable`s will not be registered as having view relations, even if they +/// share storage. +/// See NOTE [ Autograd View Variables ] for more details. /// /// /// Interface @@ -92,9 +94,13 @@ struct TORCH_API Variable : public at::Tensor { /// Creates a `Variable` that is a *view* of another (*base*) variable. /// The `gradient_edge` is an optional (gradient_function, input_number) pair. + /// `is_differentiable` is a bool that specifies whether this view is + /// differentiable, i.e., whether the relation should be tracked by autograd. + /// See NOTE [ Autograd View Variables ] for details. friend Variable make_variable_view( Variable base, at::Tensor data, + bool is_differentiable, Edge gradient_edge); /// Creates a `Variable` from the given `Tensor`. `requires_grad` should be @@ -263,7 +269,7 @@ struct TORCH_API Variable : public at::Tensor { /// and the `get()` method which exposes it shall forever remain private and /// never be exposed to the public interface of this class. struct Impl; - struct ViewImpl; + struct DifferentiableViewImpl; // Private Methods //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -330,7 +336,6 @@ struct TORCH_API Variable::Impl : public at::TensorImpl { return grad_; } - Variable detach() const; void detach_(); void set_data(Tensor new_data); @@ -372,15 +377,80 @@ struct TORCH_API Variable::Impl : public at::TensorImpl { }; //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -// Variable::ViewImpl +// Variable::DifferentiableViewImpl //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/// A Variable that is a view on another Variable. The base and view share the -/// same version_counter. The grad_fn field of the Variable may become stale -/// due to in-place modifications of the shared data. Accesses should go -/// through get_grad_fn(). All other fields are always valid. -struct TORCH_API Variable::ViewImpl : public Variable::Impl { - ViewImpl(Variable base, at::Tensor data, Edge gradient_edge); +/// NOTE [ Autograd View Variables ] +/// +/// Many operations return Variable that shares storage with an input Variable. +/// The returned Variable is called a **view** Variable on the input **base** +/// Variable. +/// +/// In PyTorch, we have two types of views: differentiable views, and +/// non-differentiable views. In either type, to support proper version +/// checking, the base and view Variables must always share the same +/// version_counter. +/// +/// +/// Differentiable Views +/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/// Differentiable views are the view variables where you want gradients to flow +/// back to the base variables. Out-of-place operations on views are quite +/// straightforward, but in-place ones are very tricky. Even if the base +/// variable may not require grad when we create the view, we still need to +/// track the view relation because future in-place ops may require back-proping +/// through it. For example, we need to support +/// +/// (1) in-place operation on view, e.g., +/// +/// # Have: +/// # base.requires_grad = False +/// # var.requires_grad = True +/// base[1] = var # i.e., base[1].copy_(var) +/// torch.autograd.grad(base.sum(), var) <- should return an all ones tensor +/// +/// (2) in-place operation on base after view is created, e.g., +/// +/// # Have: +/// # base.requires_grad = False +/// # var.requires_grad = True +/// view = base[1] +/// base.copy_(var) +/// torch.autograd.grad(view.sum(), var) <- should return a tensor with +/// var[1] filled with all ones and +/// zeros everywhere else +/// +/// Variable::DifferentiableViewImpl is created to support gradient tracking of +/// such **in-place** operations. In particular, +/// + if an in-place op is done on base, the grad_fn field of the view may +/// become stale. So accesses should always go through get_grad_fn(), which +/// reconstructs an updated grad_fn if the version_counter has incremented. +/// All other fields are always valid. +/// + if an in-place op is done on view, in rebase_history() of view, which is +/// called after every in-place op in VariableType.cpp, the grad_fn of base +/// is updated. +/// +/// +/// Non-Differentiable Views +/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/// In certain cases, although function outputs share storage with inputs, they +/// will **never** require gradient history tracking. Instead of registering the +/// view relation via DifferentiableViewImpl in autograd, the views will be +/// using usual Variable::Impl and just share the version counters with the base +/// Variables. +/// Such views include: +/// 1. Views created from .detach() +/// 2. Views that are non-differentiable by its nature. +/// E.g., `sparse_tensor.indices()` is a integral view on a (possibly) +/// floating point tensor. +/// See top of `derivatives.yaml` on how to specify that outputs of a +/// function are non-differentiable. +/// These are called non-differentiable views as the gradients do not flow +/// through the view relation. +/// Relevant logic for non-differentiable views is implemented in +/// make_variable_view below, and wrap_output of gen_variable_type.py. +struct TORCH_API Variable::DifferentiableViewImpl : public Variable::Impl { + DifferentiableViewImpl(Variable base, at::Tensor data, Edge gradient_edge); /// Gets the up-to-date grad_fn. If the shared data or base was modified, we /// re-create the grad_fn to express the up-to-date view relationship between @@ -414,13 +484,24 @@ struct TORCH_API Variable::ViewImpl : public Variable::Impl { // Factory Functions //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +// See NOTE [ Autograd View Variables ] for details. inline Variable make_variable_view( Variable base, at::Tensor data, + bool is_differentiable = true, Edge gradient_edge = Edge()) { if (data.defined()) { - return Variable(c10::make_intrusive( - std::move(base), std::move(data), std::move(gradient_edge))); + if (is_differentiable) { + /// Differentiable view. Track history with DifferentiableViewImpl. + return Variable(c10::make_intrusive( + std::move(base), std::move(data), std::move(gradient_edge))); + } else { + /// Non-differentiable view. Just share version counter. + auto var = Variable(c10::make_intrusive( + std::move(data), false, std::move(gradient_edge))); + var.set_version_counter(base.version_counter()); + return var; + } } return Variable(); } @@ -500,7 +581,7 @@ inline std::shared_ptr Variable::grad_accumulator() const { } inline Variable Variable::detach() const { - return get()->detach(); + return make_variable_view(*this, get()->data_, /*is_differentiable=*/false); } inline void Variable::detach_() { diff --git a/torch/csrc/cuda/comm.cpp b/torch/csrc/cuda/comm.cpp index d4259a70baeb75..5ba6c55cfe1af8 100644 --- a/torch/csrc/cuda/comm.cpp +++ b/torch/csrc/cuda/comm.cpp @@ -82,6 +82,7 @@ tensor_list2d broadcast_coalesced(TensorList tensors, IntList devices, size_t bu o.reserve(tensors.size()); unique_type_checker type_checker; + at::DeviceGuard device_guard(devices[0]); for (auto & chunk : utils::take_tensors(tensors, buffer_size)) { auto & type = chunk.type(); type_checker.show(type); @@ -92,7 +93,7 @@ tensor_list2d broadcast_coalesced(TensorList tensors, IntList devices, size_t bu std::vector broadcast_values = broadcast(flat_tuple.second, devices); results.reserve(devices.size()); for (size_t i = 1, num_devices = devices.size(); i < num_devices; ++i) { - at::DeviceGuard device_guard(devices[i]); + device_guard.set_index(devices[i]); auto & device_outputs = outputs[i]; auto & inds = broadcast_indices[i]; auto & vals = broadcast_values[i]; @@ -100,7 +101,6 @@ tensor_list2d broadcast_coalesced(TensorList tensors, IntList devices, size_t bu device_outputs.push_back(std::move(t)); } } else { - at::DeviceGuard device_guard(devices[0]); std::vector results = broadcast(utils::flatten_dense_tensors(chunk.tensors), devices); for (size_t i = 1, num_devices = devices.size(); i < num_devices; ++i) { diff --git a/torch/csrc/utils/tensor_flatten.cpp b/torch/csrc/utils/tensor_flatten.cpp index 29414368c7bd50..be01a66b5996bb 100644 --- a/torch/csrc/utils/tensor_flatten.cpp +++ b/torch/csrc/utils/tensor_flatten.cpp @@ -85,8 +85,11 @@ std::vector unflatten_sparse_tensors( std::vector outputs; outputs.reserve(tensors.size()); - for (size_t i = 0, num_tensors = tensors.size(); i < num_tensors; ++i) - outputs.emplace_back(at::_sparse_coo_tensor_unsafe(indices[i], values[i], tensors[i].sizes())); + for (size_t i = 0, num_tensors = tensors.size(); i < num_tensors; ++i) { + auto &ref_t = tensors[i]; + auto t = at::_sparse_coo_tensor_unsafe(indices[i], values[i], ref_t.sizes()); + outputs.emplace_back(t._coalesced_(ref_t.is_coalesced())); + } return outputs; } diff --git a/torch/csrc/utils/tensor_new.cpp b/torch/csrc/utils/tensor_new.cpp index 628409b18df981..7f2a70827acba3 100644 --- a/torch/csrc/utils/tensor_new.cpp +++ b/torch/csrc/utils/tensor_new.cpp @@ -221,8 +221,8 @@ Tensor internal_new_from_data( *torch::getLayout(type.backend()), type_inference_device_type); const auto& type_to_use = type_inference ? type_inference_type : type; - return copy_variables ? new_with_tensor_copy(type_to_use, var, device_index) : - new_with_type_conversion(type_to_use, var, device_index); + return copy_variables ? new_with_tensor_copy(type_to_use, var, device_index) + : new_with_type_conversion(type_to_use, var, device_index); } #ifdef USE_NUMPY @@ -291,12 +291,12 @@ Tensor legacy_sparse_tensor_ctor(const Type& type, PyObject* args, PyObject* kwa auto deviceOptional = r.deviceOptional(2); check_legacy_ctor_device(type, deviceOptional); at::DeviceGuard device_guard(deviceOptional); - return at::sparse_coo_tensor(r.tensor(0), r.tensor(1), type.options()); + return at::sparse_coo_tensor(r.tensor(0), r.tensor(1)); } else if (r.idx == 3) { auto deviceOptional = r.deviceOptional(3); check_legacy_ctor_device(type, deviceOptional); at::DeviceGuard device_guard(deviceOptional); - return at::sparse_coo_tensor(r.tensor(0), r.tensor(1), r.intlist(2), type.options()); + return at::sparse_coo_tensor(r.tensor(0), r.tensor(1), r.intlist(2)); } else if (r.idx == 4) { PyObject* arg = r.pyobject(0); auto deviceOptional = r.deviceOptional(1); @@ -335,14 +335,14 @@ Tensor legacy_sparse_tensor_new(const Type& type, PyObject* args, PyObject* kwar auto deviceOptional = r.deviceOptional(2); check_legacy_ctor_device(type, deviceOptional); at::DeviceGuard device_guard(deviceOptional); - return at::sparse_coo_tensor(r.tensor(0), r.tensor(1), type.options()); + return at::sparse_coo_tensor(r.tensor(0), r.tensor(1)); } else if (r.idx == 3) { // Note: this signature doesn't have a dtype, even though it has a device; it probably shouldn't // have a device (we should infer it). auto deviceOptional = r.deviceOptional(3); check_legacy_ctor_device(type, deviceOptional); at::DeviceGuard device_guard(deviceOptional); - return at::sparse_coo_tensor(r.tensor(0), r.tensor(1), r.intlist(2), type.options()); + return at::sparse_coo_tensor(r.tensor(0), r.tensor(1), r.intlist(2)); } else if (r.idx == 4) { PyObject* arg = r.pyobject(0); auto deviceOptional = r.deviceOptional(1); @@ -464,10 +464,7 @@ Tensor legacy_new_from_data( return internal_new_from_data(type, device, data, false, false, false); } -Tensor sparse_coo_tensor_ctor(const Type& type, PyObject* args, PyObject* kwargs) { - const auto sparse_backend = type.is_cuda() ? Backend::SparseCUDA : Backend::SparseCPU; - const auto& default_sparse_type = type.toBackend(sparse_backend); - +Tensor sparse_coo_tensor_ctor(const Type& default_type, PyObject* args, PyObject* kwargs) { static PythonArgParser parser({ "sparse_coo_tensor(PyObject* indices, PyObject* values, *, ScalarType dtype=None, Device? device=None, bool requires_grad=False)", "sparse_coo_tensor(PyObject* indices, PyObject* values, IntList size, *, ScalarType dtype=None, Device? device=None, bool requires_grad=False)", @@ -478,29 +475,27 @@ Tensor sparse_coo_tensor_ctor(const Type& type, PyObject* args, PyObject* kwargs auto r = parser.parse(args, kwargs, parsed_args); if (r.idx == 0) { bool type_inference = r.isNone(2); - const auto& sparse_type = typeWithDefault(r, 2, 3, default_sparse_type); - const auto& dense_type = sparse_type.toBackend(sparse_type.is_cuda() ? Backend::CUDA : Backend::CPU); + const auto& type = typeWithDefault(r, 2, 3, default_type); + const auto& values_type = type.toDense(); at::DeviceGuard device_guard(r.device(3)); - Tensor values = internal_new_from_data(dense_type, r.deviceOptional(3), r.pyobject(1), false, true, type_inference); // if no dtype provided, infer type based on value type. - const auto& index_type = values.type().toScalarType(kLong); - Tensor indices = internal_new_from_data(index_type, r.deviceOptional(3), r.pyobject(0), false, true, false); - const auto& sparse_type_to_use = values.type().toBackend(values.type().is_cuda() ? Backend::SparseCUDA : Backend::SparseCPU); - return at::sparse_coo_tensor(indices, values, sparse_type_to_use.options()).set_requires_grad(r.toBool(4)); + Tensor values = internal_new_from_data(values_type, r.deviceOptional(3), r.pyobject(1), false, true, type_inference); + const auto& indices_type = values.type().toScalarType(kLong); + Tensor indices = internal_new_from_data(indices_type, r.deviceOptional(3), r.pyobject(0), false, true, false); + return at::sparse_coo_tensor(indices, values, values.options().layout(at::kSparse)).set_requires_grad(r.toBool(4)); } else if (r.idx == 1) { bool type_inference = r.isNone(3); - const auto& sparse_type = typeWithDefault(r, 3, 4, default_sparse_type); - const auto& dense_type = sparse_type.toBackend(sparse_type.is_cuda() ? Backend::CUDA : Backend::CPU); + const auto& type = typeWithDefault(r, 3, 4, default_type); + const auto& values_type = type.toDense(); at::DeviceGuard device_guard(r.device(4)); - Tensor values = internal_new_from_data(dense_type, r.deviceOptional(4), r.pyobject(1), false, true, type_inference); - const auto& index_type = values.type().toScalarType(kLong); - Tensor indices = internal_new_from_data(index_type, r.deviceOptional(4), r.pyobject(0), false, true, false); - const auto& sparse_type_to_use = values.type().toBackend(values.type().is_cuda() ? Backend::SparseCUDA : Backend::SparseCPU); - return at::sparse_coo_tensor(indices, values, r.intlist(2), sparse_type_to_use.options()).set_requires_grad(r.toBool(5)); + Tensor values = internal_new_from_data(values_type, r.deviceOptional(4), r.pyobject(1), false, true, type_inference); + const auto& indices_type = values.type().toScalarType(kLong); + Tensor indices = internal_new_from_data(indices_type, r.deviceOptional(4), r.pyobject(0), false, true, false); + return at::sparse_coo_tensor(indices, values, r.intlist(2), values.options().layout(at::kSparse)).set_requires_grad(r.toBool(5)); } else if (r.idx == 2) { - const auto& sparse_type_to_use = typeWithDefault(r, 1, 2, default_sparse_type); + const auto& type = typeWithDefault(r, 1, 2, default_type); at::DeviceGuard device_guard(r.device(2)); - return at::sparse_coo_tensor(r.intlist(0), sparse_type_to_use.options()).set_requires_grad(r.toBool(3)); + return at::sparse_coo_tensor(r.intlist(0), type.options().layout(at::kSparse)).set_requires_grad(r.toBool(3)); } throw std::runtime_error("sparse_coo_tensor(): invalid arguments"); }