Skip to content

Commit

Permalink
import from vin repo
Browse files Browse the repository at this point in the history
  • Loading branch information
VinInn committed May 4, 2022
1 parent 4ac34bc commit f37385a
Show file tree
Hide file tree
Showing 8 changed files with 823 additions and 0 deletions.
216 changes: 216 additions & 0 deletions HeterogeneousCore/CUDAUtilities/interface/SimplePoolAllocator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
#pragma once

#include<atomic>
#include<array>
#include<memory>
#include<algorithm>
#include<cassert>
#include<mutex>
#include <vector>
#include <cstdint>
#include<iostream>
#include<chrono>

namespace poolDetails {

constexpr int bucket(uint64_t s) { return 64-__builtin_clzl(s-1); }
constexpr uint64_t bucketSize(int b) { return 1LL<<b;}

};


class SimplePoolAllocator {

public:

using Pointer = void *;


virtual ~SimplePoolAllocator() = default;

virtual Pointer doAlloc(size_t size) =0;
virtual void doFree(Pointer ptr) = 0;

SimplePoolAllocator(int maxSlots) : m_maxSlots(maxSlots) {
for ( auto & p : m_used) p = true;
}

int size() const { return m_size;}

Pointer pointer(int i) const { return m_slots[i]; }

void free(int i) {
m_last[i] = -1;
m_used[i]=false;
}

int alloc(uint64_t s) {
auto i = allocImpl(s);

//test garbage
// if(totBytes>4507964512) garbageCollect();

if (i>=0) {
assert(m_used[i]);
if (nullptr==m_slots[i]) std::cout << "race ??? " << i << ' ' << m_bucket[i] << ' ' << m_last[i] << std::endl;
assert(m_slots[i]);
return i;
}
garbageCollect();
i = allocImpl(s);
if (i>=0) { assert(m_used[i]); assert(m_slots[i]);assert(m_last[i]>=0);}
return i;
}

int allocImpl(uint64_t s) {
auto b = poolDetails::bucket(s);
assert(s<=poolDetails::bucketSize(b));
int ls = size();
// look for an existing slot
for (int i=0; i<ls; ++i) {
if (b!=m_bucket[i]) continue;
if (m_used[i]) continue;
bool exp = false;
if (m_used[i].compare_exchange_strong(exp,true)) {
// verify if in the mean time the garbage collector did operate
if(nullptr == m_slots[i]) {
assert(m_bucket[i]<0);
m_used[i] = false;
continue;
}
m_last[i] = 0;
return i;
}
}

// try to create in existing slot (if garbage has been collected)
ls = useOld(b);
if (ls>=0) return ls;

// try to allocate a new slot
if (m_size>=m_maxSlots) return -1;
ls = m_size++;
if (ls>=m_maxSlots) return -1;
m_last[ls] = 2;
return createAt(ls,b);
}

int createAt(int ls, int b) {
assert(m_used[ls]);
assert(m_last[ls]>0);
m_bucket[ls]=b;
auto as = poolDetails::bucketSize(b);
assert(nullptr==m_slots[ls]);
m_slots[ls] = doAlloc(as);
if (nullptr == m_slots[ls]) return -1;
totBytes+=as;
nAlloc++;
return ls;
}

void garbageCollect() {
int ls = size();
for (int i=0; i<ls; ++i) {
if (m_used[i]) continue;
if (m_bucket[i]<0) continue;
bool exp = false;
if (!m_used[i].compare_exchange_strong(exp,true)) continue;
assert(m_used[i]);
if( nullptr != m_slots[i]) {
assert(m_bucket[i]>=0);
doFree(m_slots[i]);
nFree++;
totBytes-= poolDetails::bucketSize(m_bucket[i]);
}
m_slots[i] = nullptr;
m_bucket[i] = -1;
m_last[i] = -3;
m_used[i] = false; // here memory fence as well
}
}


int useOld(int b) {
int ls = size();
for (int i=0; i<ls; ++i) {
if ( m_bucket[i]>=0) continue;
if (m_used[i]) continue;
bool exp = false;
if (!m_used[i].compare_exchange_strong(exp,true)) continue;
if( nullptr != m_slots[i]) { // ops allocated and freed
assert(m_bucket[i]>=0);
assert(m_last[i] = -1);
m_used[i] = false;
continue;
}
assert(m_used[i]);
m_last[i] = 1;
return createAt(i,b);
}
return -1;
}

void dumpStat() const {
uint64_t fn=0;
uint64_t fs=0;
int ls = size();
for (int i=0; i<ls; ++i) {
if (m_used[i]) {
auto b = m_bucket[i];
if (b<0) continue;
fn++;
fs += (1LL<<b);
}
}
std::cout << "# slots " << size() << '\n'
<< "# bytes " << totBytes << '\n'
<< "# alloc " << nAlloc << '\n'
<< "# free " << nFree << '\n'
<< "# used " << fn << ' ' << fs << '\n'
<< std::endl;
}


private:

const int m_maxSlots;

std::vector<int> m_last = std::vector<int>(m_maxSlots,-2);


std::vector<int> m_bucket = std::vector<int>(m_maxSlots,-1);
std::vector<Pointer> m_slots = std::vector<Pointer>(m_maxSlots,nullptr);
std::vector<std::atomic<bool>> m_used = std::vector<std::atomic<bool>>(m_maxSlots);
std::atomic<int> m_size=0;

std::atomic<uint64_t> totBytes = 0;
std::atomic<uint64_t> nAlloc = 0;
std::atomic<uint64_t> nFree = 0;

};


template<typename Traits>
struct SimplePoolAllocatorImpl final : public SimplePoolAllocator {

SimplePoolAllocatorImpl(int maxSlots) : SimplePoolAllocator(maxSlots){}

~SimplePoolAllocatorImpl() override = default;

Pointer doAlloc(size_t size) override { return Traits::alloc(size);}
void doFree(Pointer ptr) override { Traits::free(ptr);}

};


#include <cstdlib>
struct PosixAlloc {

using Pointer = void *;

static Pointer alloc(size_t size) { return ::malloc(size); }
static void free(Pointer ptr) { ::free(ptr); }

};


86 changes: 86 additions & 0 deletions HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPool.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#pragma once
#include "memoryPool.h"
#include <vector>

// only for cudaStream_t
#include<cuda.h>


namespace memoryPool {
namespace cuda {

void dumpStat();

SimplePoolAllocator * getPool(Where where);

// allocate either on current device or on host
std::pair<void *,int> alloc(uint64_t size, SimplePoolAllocator & pool);

// schedule free
void free(cudaStream_t stream, std::vector<int> buckets, SimplePoolAllocator & pool);


struct CudaDeleterBase : public DeleterBase {

CudaDeleterBase(cudaStream_t const & stream, Where where) : DeleterBase(getPool(where)),
m_stream(stream) {}

CudaDeleterBase(cudaStream_t const & stream, SimplePoolAllocator * pool) : DeleterBase(pool),
m_stream(stream) {}

~CudaDeleterBase() override = default;


cudaStream_t m_stream;

};

struct DeleteOne final : public CudaDeleterBase {

using CudaDeleterBase::CudaDeleterBase;

~DeleteOne() override = default;
void operator()(int bucket) override {
free(m_stream, std::vector<int>(1,bucket), *pool());
}

};

struct BundleDelete final : public CudaDeleterBase {

using CudaDeleterBase::CudaDeleterBase;

~BundleDelete() override {
free(m_stream, std::move(m_buckets), *pool());
}

void operator()(int bucket) override {
m_buckets.push_back(bucket);
}

std::vector<int> m_buckets;

};

template<typename T>
unique_ptr<T> make_unique(uint64_t size, Deleter del) {
auto ret = alloc(sizeof(T)*size,*del.pool());
if (ret.second<0) throw std::bad_alloc();
del.setBucket(ret.second);
return unique_ptr<T>((T*)(ret.first),del);
}

template<typename T>
unique_ptr<T> make_unique(uint64_t size, cudaStream_t const & stream, Where where) {
return make_unique<T>(sizeof(T)*size,Deleter(std::make_shared<DeleteOne>(stream,getPool(where))));
}

/*
template< class T, class... Args >
memoryPool::unique_ptr<T> make_unique( Args&&... args );
template< class T, class... Args >
memoryPool::unique_ptr<T> make_unique(Deleter del, Args&&... args );
*/

} // cuda
} // memoryPool
50 changes: 50 additions & 0 deletions HeterogeneousCore/CUDAUtilities/interface/memoryPool.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#pragma once
#include<memory>
#include<new>


class SimplePoolAllocator;

namespace memoryPool {

enum Where {onCPU,onDevice,onHost, unified};

class DeleterBase {
public:

explicit DeleterBase(SimplePoolAllocator * pool) : m_pool(pool){}
virtual ~DeleterBase() = default;
virtual void operator()(int bucket) =0;

SimplePoolAllocator * pool() const { return m_pool;}

protected:
SimplePoolAllocator * m_pool;
};

class Deleter {
public:
explicit Deleter(int bucket=-1) : m_bucket(bucket) {}
Deleter(std::shared_ptr<DeleterBase> del, int bucket=-1) : me(del), m_bucket(bucket) {}

void set(std::shared_ptr<DeleterBase> del) { me=del;}
void setBucket(int bucket) { m_bucket = bucket;}

void operator()(void * p) {
if (!me) throw std::bad_alloc();
(*me)(m_bucket);
}

SimplePoolAllocator * pool() const { return me->pool();}

private:
std::shared_ptr<DeleterBase> me;
int m_bucket;
};


template <typename T>
using unique_ptr = std::unique_ptr<T,Deleter>;


}
Loading

0 comments on commit f37385a

Please sign in to comment.