Skip to content

Commit

Permalink
DBString: Dedup strings
Browse files Browse the repository at this point in the history
  • Loading branch information
mateofio committed Aug 17, 2020
1 parent 37d8f39 commit f03e652
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 53 deletions.
148 changes: 113 additions & 35 deletions src/dbstring.cpp
Original file line number Diff line number Diff line change
@@ -1,63 +1,141 @@
#include "lcf/dbstring.h"
#include <unordered_map>
#include <memory>
#include <iostream>

namespace lcf {

constexpr DBString::size_type DBString::npos;
alignas(DBString::size_type) constexpr char DBString::_empty_str[sizeof(size_type)];

static constexpr size_t AllocSize(size_t len) {
return sizeof(DBString::size_type) + len + 1;
}
struct DBStringData {
using size_type = DBString::size_type;

static char* Alloc(size_t len) {
return reinterpret_cast<char*>(::operator new(AllocSize(len)));
}
size_type refcnt;
size_type size;

static char* Dup(const char* other, size_t size) {
if (size > 0) {
auto* s = Alloc(size);
std::memcpy(s, other, AllocSize(size));
return s;
const char* data() const {
return reinterpret_cast<const char*>(this + 1);
}
return nullptr;
}

static void Free(void* p) {
::operator delete(p);
}

DBString::DBString(const char* s, size_t len)
{
if (len > 0) {
auto* ptr = Alloc(len);
_storage = ptr;

*reinterpret_cast<size_type*>(ptr) = len;
ptr += sizeof(size_type);
char* data() {
return reinterpret_cast<char*>(this + 1);
}

std::memcpy(ptr, s, len);
ptr += len;
static size_type alloc_size(StringView str) {
return sizeof(DBStringData) + str.size() + 1;
}

*ptr = '\0';
static DBStringData* from_data(char* s) {
return reinterpret_cast<DBStringData*>(s) - 1;
}
};

struct DBStringDataDeleter {
void operator()(DBStringData* p);
};

using DBStringDataPtr = std::unique_ptr<DBStringData,DBStringDataDeleter>;

class DBStringAllocator {
public:
using size_type = DBString::size_type;

static DBStringDataPtr Alloc(StringView str) {
auto* raw = ::operator new(DBStringData::alloc_size(str));
auto data = DBStringDataPtr(new (raw) DBStringData());
data->refcnt = 1;
data->size = str.size();
std::memcpy(data->data(), str.data(), data->size);
data->data()[data->size] = '\0';

return data;
}

static void Free(DBStringData* data) {
data->~DBStringData();
::operator delete(data);
}

const char* Acquire(StringView str) {
if (str.empty()) {
return DBString::empty_str();
}

auto iter = _map.find(str);
if (iter != _map.end()) {
iter->second->refcnt += 1;
} else {
auto ptr = Alloc(str);
auto sv = StringView(ptr->data(), ptr->size);
// FIXME: Double hash lookup because the key changes..
iter = _map.insert({ sv, std::move(ptr) }).first;
}
return iter->second->data();
}

const char* Dup(const char* s) {
if (s != DBString::empty_str()) {
auto* data = DBStringData::from_data(const_cast<char*>(s));
data->refcnt += 1;
}
return s;
}

void Release(StringView str) {
if (str.empty()) {
// This is needed, due to global DBStrings which are initialized to null.
// They may be destroyed *after* DBStringAllocator is destroyed!
// FIMXE: To fix this, use a hash table with constexpr default constructor.
return;
}
auto iter = _map.find(str);
if (iter != _map.end()) {
auto& data = iter->second;
data->refcnt -= 1;
assert(data->refcnt >= 0);
if (data->refcnt == 0) {
_map.erase(iter);
}
}
}

static DBStringAllocator& instance() {
static DBStringAllocator alloc;
return alloc;
}
private:
DBStringAllocator() = default;
private:
std::unordered_map<StringView,DBStringDataPtr> _map;
};

void DBStringDataDeleter::operator()(DBStringData* p) {
DBStringAllocator::Free(p);
}

DBString::DBString(StringView s)
: _storage(DBStringAllocator::instance().Acquire(s))
{
}

DBString::DBString(const DBString& o)
: _storage(Dup(o._storage, o.size()))
{ }
: _storage(DBStringAllocator::instance().Dup(o._storage))
{
}

DBString& DBString::operator=(const DBString& o) {
if (this != &o) {
// What is strings are the same, skip double lookup?
_reset();
_storage = Dup(o._storage, o.size());
_storage = DBStringAllocator::instance().Dup(o._storage);
}
return *this;
}



void DBString::_reset() noexcept {
Free(_storage);
_storage = nullptr;
assert(_storage != nullptr);
DBStringAllocator::instance().Release(*this);
}

} // namespace lcf
41 changes: 23 additions & 18 deletions src/lcf/dbstring.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

#include "lcf/string_view.h"


namespace lcf {

// A read-only string class optimized for database storage.
Expand All @@ -36,22 +35,27 @@ class DBString {

static constexpr size_type npos = size_type(-1);

constexpr DBString() = default;
explicit DBString(const std::string& s) : DBString(s.c_str(), s.size()) {}
explicit DBString(StringView s) : DBString(s.data(), s.size()) {}
// FIXME: When the allocator constructor is constexpr, this can be also
DBString() : DBString(StringView()) {}
explicit DBString(StringView s);
explicit DBString(const std::string& s) : DBString(StringView(s)) {}

// Explicit construct for general const char*
explicit DBString(const char* s) : DBString(s, std::strlen(s)) {}
explicit DBString(const char* s) : DBString(StringView(s)) {}
// Implicit constructor to capture string literals
template <size_t N>
DBString(const char(&literal)[N]) : DBString(StringView(literal)) {}
DBString(const char* s, size_t len);
DBString(const char* s, size_t len) : DBString(StringView(s, len)) {}

DBString(const DBString&);
DBString(const DBString& o);
DBString& operator=(const DBString&);
DBString(DBString&&) noexcept;
DBString& operator=(DBString&&) noexcept;

void swap(DBString& o) noexcept {
std::swap(_storage, o._storage);
}

~DBString() { _reset(); }

explicit operator std::string() const { return std::string(c_str(), size()); }
Expand All @@ -60,7 +64,7 @@ class DBString {
char operator[](size_t i) const;
char front() const { return (*this)[0]; }
char back() const { return (*this)[size()-1]; }
const char* data() const;
const char* data() const { return _storage; }
const char* c_str() const { return data(); }

iterator begin() const { return data(); }
Expand All @@ -71,10 +75,15 @@ class DBString {

bool empty() const { return size() == 0; }
size_type size() const;

static constexpr const char* empty_str() {
return _empty_str + sizeof(size_type);
}
private:
void _reset() noexcept;
private:
char* _storage = nullptr;
alignas(size_type) static constexpr char _empty_str[sizeof(size_type)] = {};
const char* _storage = empty_str();
};

// This should be used over the conversion operator, so we can track all dbstr -> str instances
Expand Down Expand Up @@ -116,28 +125,24 @@ template <> struct hash<lcf::DBString> {
namespace lcf {

inline DBString::DBString(DBString&& o) noexcept
: _storage(o._storage)
{
o._storage = nullptr;
std::swap(_storage, o._storage);
}

inline DBString& DBString::operator=(DBString&& o) noexcept {
return operator=(o);
if (this != &o) {
if (_storage) {
if (!empty()) {
_reset();
}
_storage = o._storage;
o._storage = nullptr;
o._storage = empty_str();
}
return *this;
}

inline const char* DBString::data() const {
return _storage ? _storage + sizeof(size_type) : nullptr;
}

inline DBString::size_type DBString::size() const {
return _storage ? *reinterpret_cast<const size_type*>(_storage) : 0;
return *(reinterpret_cast<const size_type*>(_storage) - 1);
}

} // namespace lcf
Expand Down

0 comments on commit f03e652

Please sign in to comment.