Skip to content

Commit

Permalink
Add Hexagon VTCM and discontiguous allocation support (apache#9525)
Browse files Browse the repository at this point in the history
* WIP Allocation abstraction for VTCM and DDR.

* Add Hexagon VTCM and discontiguous allocation support

* differentiate between dimensions and allocations

* remove change to llvm codegen

* add integration test_add_vtcm to demo vtcm alloc

* remove cmake change

* forcing contiguous allocation in device API, for now

Co-authored-by: Chris Sullivan <[email protected]>
  • Loading branch information
adstraw and csullivan authored Dec 11, 2021
1 parent a28a8bf commit 2b35cfd
Show file tree
Hide file tree
Showing 6 changed files with 352 additions and 119 deletions.
239 changes: 183 additions & 56 deletions src/runtime/hexagon/hexagon/hexagon_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,83 +23,151 @@

#include <tvm/runtime/module.h>

#include "hexagon_common.h"

#if defined(__hexagon__)
#include "HAP_compute_res.h"
#endif

#include <string>
#include <utility>

#include "hexagon_common.h"

namespace tvm {
namespace runtime {
namespace hexagon {

static size_t GetDataAlignment(const DLDataType dtype) {
size_t align = (dtype.bits / 8) * dtype.lanes;
if (align < kAllocAlignment) return kAllocAlignment;
return align;
}
struct Allocation {
Allocation(size_t nbytes, size_t alignment) : nbytes_(nbytes), alignment_(alignment) {}
virtual ~Allocation() {}
Allocation(const Allocation&) = delete;
Allocation& operator=(const Allocation&) = delete;
Allocation(Allocation&&) = delete;
Allocation& operator=(Allocation&&) = delete;

HexagonBuffer::HexagonBuffer(int ndim, const int64_t* shape, DLDataType dtype,
Optional<String> scope) {
// TODO(csullivan): Re-enable check on ndim <= 2 when physical layout support
// in MakePackedAPI is added.
// ICHECK_LE(ndim, 1) << "Hexagon currently only supports flat allocations "
// << "and arrays of flat allocations.";

DLTensor t;
t.shape = const_cast<int64_t*>(shape);
t.ndim = ndim;
t.dtype = dtype;
size_t nbytes = GetDataSize(t);
size_t alignment = GetDataAlignment(dtype);
// TODO(csullivan): Extend to support arrays of allocations.
// Move assignment from r-value constructed flat allocation.
*this = HexagonBuffer(nbytes, alignment, scope);
}
void* data_{nullptr};
size_t nbytes_;
size_t alignment_;
};

HexagonBuffer::HexagonBuffer(size_t nbytes, size_t alignment, Optional<String> scope) {
void* ptr = nullptr;
int ret = posix_memalign(&ptr, alignment, nbytes);
if (ret != 0) {
throw std::bad_alloc();
struct DDRAllocation : public Allocation {
DDRAllocation(size_t nbytes, size_t alignment) : Allocation(nbytes, alignment) {
#ifdef _WIN32
data_ = _aligned_malloc(nbytes, alignment);
CHECK(data_ != nullptr);
#else
int ret = posix_memalign(&data_, alignment, nbytes);
CHECK_EQ(ret, 0);
#endif
}
allocations_.push_back(ptr);
SetStorageScope(scope);
~DDRAllocation() {
#ifdef _WIN32
_aligned_free(data_);
#else
free(data_);
#endif
}
};

#if defined(__hexagon__)
struct VTCMAllocation : public Allocation {
VTCMAllocation(size_t nbytes, size_t alignment) : Allocation(nbytes, alignment) {
compute_res_attr_t res_info;
HEXAGON_SAFE_CALL(HAP_compute_res_attr_init(&res_info));

// allocate nbytes of vtcm on a single page
HEXAGON_SAFE_CALL(HAP_compute_res_attr_set_vtcm_param(&res_info, /*vtcm_size = */ nbytes,
/*b_single_page = */ 1));
context_id_ = HAP_compute_res_acquire(&res_info, /*timeout = */ 10000);

if (context_id_) {
data_ = HAP_compute_res_attr_get_vtcm_ptr(&res_info);
if (!data_) {
HEXAGON_PRINT(ERROR, "ERROR: Allocated VTCM ptr is null.");
HEXAGON_SAFE_CALL(HAP_compute_res_release(context_id_));
return;
}
} else {
HEXAGON_PRINT(ERROR, "ERROR: Unable to acquire requeisted resource.");
return;
}
// HEXAGON_PRINT(ALWAYS, "VTCMAllocation() - Context ID: %u, VTCM ptr: %p", context_id_, data_);
}
~VTCMAllocation() {
// HEXAGON_PRINT(ALWAYS, "~VTCMAllocation() - Context ID: %u, VTCM ptr: %p", context_id_,
// data_);
HEXAGON_SAFE_CALL(HAP_compute_res_release(context_id_));
data_ = nullptr;
}
unsigned int context_id_{0};
};
#else
struct VTCMAllocation : public DDRAllocation {
VTCMAllocation(size_t nbytes, size_t alignment) : DDRAllocation(nbytes, alignment) {}
};
#endif

template <HexagonBuffer::StorageScope S>
std::unique_ptr<Allocation> Allocator(size_t nbytes, size_t alignment);

template <>
std::unique_ptr<Allocation> Allocator<HexagonBuffer::StorageScope::kDDR>(size_t nbytes,
size_t alignment) {
return std::make_unique<DDRAllocation>(nbytes, alignment);
}

HexagonBuffer::HexagonBuffer(void* data, Optional<String> scope) : managed_{false} {
template <>
std::unique_ptr<Allocation> Allocator<HexagonBuffer::StorageScope::kVTCM>(size_t nbytes,
size_t alignment) {
return std::make_unique<VTCMAllocation>(nbytes, alignment);
}

HexagonBuffer::HexagonBuffer(size_t nbytes, size_t alignment, Optional<String> scope)
: nallocs_(1), nbytes_(nbytes) {
SetStorageScope(scope);
allocations_.push_back(data);

std::unique_ptr<Allocation> alloca = nullptr;
if (GetStorageScope() == StorageScope::kDDR) {
alloca = Allocator<StorageScope::kDDR>(nbytes, alignment);
} else if (GetStorageScope() == StorageScope::kVTCM) {
alloca = Allocator<StorageScope::kVTCM>(nbytes, alignment);
}
CHECK(alloca != nullptr);
allocations_.push_back(alloca->data_);
managed_allocations_.push_back(std::move(alloca));
}

HexagonBuffer::~HexagonBuffer() {
if (managed_) {
for (auto& ptr : allocations_) {
free(ptr);
HexagonBuffer::HexagonBuffer(size_t nallocs, size_t nbytes, size_t alignment,
Optional<String> scope)
: nallocs_(nallocs), nbytes_(nallocs * nbytes) {
SetStorageScope(scope);
for (size_t i = 0; i < nallocs; ++i) {
std::unique_ptr<Allocation> alloca = nullptr;
if (GetStorageScope() == StorageScope::kDDR) {
alloca = Allocator<StorageScope::kDDR>(nbytes, alignment);
} else if (GetStorageScope() == StorageScope::kVTCM) {
alloca = Allocator<StorageScope::kVTCM>(nbytes, alignment);
}
CHECK(alloca != nullptr);
allocations_.push_back(alloca->data_);
managed_allocations_.push_back(std::move(alloca));
}
}

HexagonBuffer::HexagonBuffer(HexagonBuffer&& other)
: allocations_(other.allocations_),
managed_(other.managed_),
storage_scope_(other.storage_scope_) {
other.allocations_.clear();
other.managed_ = false;
other.storage_scope_ = StorageScope::kDDR;
HexagonBuffer::HexagonBuffer(void* data, size_t nbytes, Optional<String> scope)
: nallocs_(1), nbytes_(nbytes) {
SetStorageScope(scope);
// disallow external VTCM allocations
CHECK(GetStorageScope() != HexagonBuffer::StorageScope::kVTCM);
allocations_.push_back(data);
}

HexagonBuffer& HexagonBuffer::operator=(HexagonBuffer&& other) {
std::swap(allocations_, other.allocations_);
std::swap(managed_, other.managed_);
std::swap(storage_scope_, other.storage_scope_);
return *this;
}
HexagonBuffer::~HexagonBuffer() { managed_allocations_.clear(); }

void* HexagonBuffer::GetPointer() {
void** HexagonBuffer::GetPointer() {
if (!allocations_.size()) {
return nullptr;
}
return (allocations_.size() > 1) ? allocations_.data() : allocations_[0];
return allocations_.data();
}

HexagonBuffer::StorageScope HexagonBuffer::GetStorageScope() const { return storage_scope_; }
Expand All @@ -119,11 +187,70 @@ void HexagonBuffer::SetStorageScope(Optional<String> scope) {
}
}

HexagonBuffer* IsHexagonBuffer(DLTensor* tensor) {
if (TVMDeviceExtType(tensor->device.device_type) == kDLHexagon) {
return static_cast<HexagonBuffer*>(tensor->data);
void HexagonBuffer::CopyTo(void* data, size_t nbytes) {
CHECK(nbytes_ == nbytes);
size_t offset = 0;
for (size_t i = 0; i < nallocs_; ++i) {
CHECK(nbytes / nallocs_ == managed_allocations_[i]->nbytes_);

memcpy(static_cast<char*>(data) + offset,
static_cast<const char*>(managed_allocations_[i]->data_),
managed_allocations_[i]->nbytes_);

offset += managed_allocations_[i]->nbytes_;
}
}

void HexagonBuffer::CopyFrom(void* data, size_t nbytes) {
CHECK(nbytes_ == nbytes);
size_t offset = 0;
for (size_t i = 0; i < nallocs_; ++i) {
CHECK(nbytes / nallocs_ == managed_allocations_[i]->nbytes_);

memcpy(static_cast<char*>(managed_allocations_[i]->data_),
static_cast<const char*>(data) + offset, managed_allocations_[i]->nbytes_);

offset += managed_allocations_[i]->nbytes_;
}
}

void HexagonBuffer::CopyFrom(const HexagonBuffer& other) {
CHECK(nbytes_ == other.nbytes_);

if (nallocs_ == other.nallocs_) {
for (size_t i = 0; i < nallocs_; ++i) {
CHECK(managed_allocations_[i]->nbytes_ == other.managed_allocations_[i]->nbytes_);

memcpy(static_cast<char*>(managed_allocations_[i]->data_),
static_cast<const char*>(other.managed_allocations_[i]->data_),
managed_allocations_[i]->nbytes_);
}
} else if (nallocs_ == 1) {
size_t offset = 0;
for (size_t i = 0; i < other.nallocs_; ++i) {
CHECK(nbytes_ / other.nallocs_ == other.managed_allocations_[i]->nbytes_);

memcpy(static_cast<char*>(managed_allocations_[0]->data_) + offset,
static_cast<const char*>(other.managed_allocations_[i]->data_),
other.managed_allocations_[i]->nbytes_);

offset += other.managed_allocations_[i]->nbytes_;
}
} else if (other.nallocs_ == 1) {
size_t offset = 0;
for (size_t i = 0; i < nallocs_; ++i) {
CHECK(other.nbytes_ / nallocs_ == managed_allocations_[i]->nbytes_);

memcpy(static_cast<char*>(managed_allocations_[i]->data_),
static_cast<const char*>(other.managed_allocations_[0]->data_) + offset,
managed_allocations_[i]->nbytes_);

offset += managed_allocations_[i]->nbytes_;
}
} else {
CHECK(false) << "To copy between Hexagon Buffers they must either have the same number of "
"dimensions or one of the Hexagon Buffers must have a single dimension.";
}
return nullptr;
}

} // namespace hexagon
Expand Down
Loading

0 comments on commit 2b35cfd

Please sign in to comment.