Skip to content

Commit

Permalink
Refactor and improve string literal emission (#3492)
Browse files Browse the repository at this point in the history
Fixes #3490 by avoiding unnecessary extra key allocations for the literals cache etc.
Also gets rid of code duplication and improves IRState encapsulation.
  • Loading branch information
looked-at-me authored Jul 10, 2020
1 parent e6a2dd8 commit 1b5b405
Show file tree
Hide file tree
Showing 8 changed files with 85 additions and 80 deletions.
5 changes: 5 additions & 0 deletions dmd/expression.h
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,11 @@ class StringExp : public Expression
assert(sz == 1);
return {len, static_cast<const char *>(string)};
}
// ditto
DArray<const unsigned char> peekData() const
{
return {len * sz, static_cast<const unsigned char *>(string)};
}
#endif
size_t numberOfCodeUnits(int tynto = 0) const;
void writeTo(void* dest, bool zero, int tyto = 0) const;
Expand Down
58 changes: 58 additions & 0 deletions gen/irstate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "dmd/statement.h"
#include "gen/funcgenstate.h"
#include "gen/llvm.h"
#include "gen/llvmhelpers.h"
#include "gen/tollvm.h"
#include "ir/irfunction.h"
#include <cstdarg>
Expand Down Expand Up @@ -188,6 +189,63 @@ void IRState::setStructLiteralConstant(StructLiteralExp *sle,

////////////////////////////////////////////////////////////////////////////////

namespace {
template <typename F>
LLGlobalVariable *
getCachedStringLiteralImpl(llvm::Module &module,
llvm::StringMap<LLGlobalVariable *> &cache,
llvm::StringRef key, F initFactory) {
auto iter = cache.find(key);
if (iter != cache.end()) {
return iter->second;
}

LLConstant *constant = initFactory();

auto gvar =
new LLGlobalVariable(module, constant->getType(), true,
LLGlobalValue::PrivateLinkage, constant, ".str");
gvar->setUnnamedAddr(LLGlobalValue::UnnamedAddr::Global);

cache[key] = gvar;

return gvar;
}
}

LLGlobalVariable *IRState::getCachedStringLiteral(StringExp *se) {
llvm::StringMap<LLGlobalVariable *> *cache;
switch (se->sz) {
default:
llvm_unreachable("Unknown char type");
case 1:
cache = &cachedStringLiterals;
break;
case 2:
cache = &cachedWstringLiterals;
break;
case 4:
cache = &cachedDstringLiterals;
break;
}

const DArray<const unsigned char> keyData = se->peekData();
const llvm::StringRef key(reinterpret_cast<const char *>(keyData.ptr),
keyData.length);

return getCachedStringLiteralImpl(module, *cache, key, [se]() {
return buildStringLiteralConstant(se, true);
});
}

LLGlobalVariable *IRState::getCachedStringLiteral(llvm::StringRef s) {
return getCachedStringLiteralImpl(module, cachedStringLiterals, s, [&]() {
return llvm::ConstantDataArray::getString(context(), s, true);
});
}

////////////////////////////////////////////////////////////////////////////////

void IRState::addLinkerOption(llvm::ArrayRef<llvm::StringRef> options) {
llvm::SmallVector<llvm::Metadata *, 2> mdStrings;
mdStrings.reserve(options.size());
Expand Down
21 changes: 13 additions & 8 deletions gen/irstate.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,13 @@ struct IRState {
// enough to use it directly.]
llvm::DenseMap<void *, llvm::Constant *> structLiteralConstants;

// Global variables bound to string literals. Once created such a variable
// is reused whenever an equivalent string literal is referenced in the
// module, to prevent duplicates.
llvm::StringMap<llvm::GlobalVariable *> cachedStringLiterals;
llvm::StringMap<llvm::GlobalVariable *> cachedWstringLiterals;
llvm::StringMap<llvm::GlobalVariable *> cachedDstringLiterals;

public:
IRState(const char *name, llvm::LLVMContext &context);
~IRState();
Expand Down Expand Up @@ -207,14 +214,6 @@ struct IRState {
/// Whether to emit array bounds checking in the current function.
bool emitArrayBoundsChecks();

// Global variables bound to string literals. Once created such a
// variable is reused whenever the same string literal is
// referenced in the module. Caching them per module prevents the
// duplication of identical literals.
llvm::StringMap<llvm::GlobalVariable *> stringLiteral1ByteCache;
llvm::StringMap<llvm::GlobalVariable *> stringLiteral2ByteCache;
llvm::StringMap<llvm::GlobalVariable *> stringLiteral4ByteCache;

// Sets the initializer for a global LL variable.
// If the types don't match, this entails creating a new helper global
// matching the initializer type and replacing all existing uses of globalVar
Expand All @@ -234,6 +233,12 @@ struct IRState {
void setStructLiteralConstant(StructLiteralExp *sle,
llvm::Constant *constant);

// Constructs a global variable for a StringExp.
// Caches the result based on StringExp::peekData() such that any subsequent
// calls with a StringExp with matching data will return the same variable.
llvm::GlobalVariable *getCachedStringLiteral(StringExp *se);
llvm::GlobalVariable *getCachedStringLiteral(llvm::StringRef s);

// List of functions with cpu or features attributes overriden by user
std::vector<IrFunction *> targetCpuOrFeaturesOverridden;

Expand Down
18 changes: 5 additions & 13 deletions gen/llvmhelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1707,21 +1707,13 @@ llvm::Constant *DtoConstSymbolAddress(Loc &loc, Declaration *decl) {
llvm_unreachable("Taking constant address not implemented.");
}

llvm::StringMap<llvm::GlobalVariable *> *
stringLiteralCacheForType(Type *charType) {
switch (charType->size()) {
default:
llvm_unreachable("Unknown char type");
case 1:
return &gIR->stringLiteral1ByteCache;
case 2:
return &gIR->stringLiteral2ByteCache;
case 4:
return &gIR->stringLiteral4ByteCache;
llvm::Constant *buildStringLiteralConstant(StringExp *se, bool zeroTerm) {
if (se->sz == 1) {
const DString data = se->peekString();
return llvm::ConstantDataArray::getString(
gIR->context(), {data.ptr, data.length}, zeroTerm);
}
}

llvm::Constant *buildStringLiteralConstant(StringExp *se, bool zeroTerm) {
Type *dtype = se->type->toBasetype();
Type *cty = dtype->nextOf()->toBasetype();

Expand Down
5 changes: 0 additions & 5 deletions gen/llvmhelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,11 +240,6 @@ LLConstant *toConstantArray(LLType *ct, LLArrayType *at, T *str, size_t len,
return LLConstantArray::get(at, vals);
}

/// Returns the cache for string literals of the given character type (for the
/// current IRState).
llvm::StringMap<llvm::GlobalVariable *> *
stringLiteralCacheForType(Type *charType);

llvm::Constant *buildStringLiteralConstant(StringExp *se, bool zeroTerm);

/// Tries to declare an LLVM global. If a variable with the same mangled name
Expand Down
20 changes: 2 additions & 18 deletions gen/toconstelem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,29 +169,13 @@ class ToConstElemVisitor : public Visitor {
LOG_SCOPE;

Type *const t = e->type->toBasetype();
Type *const cty = t->nextOf()->toBasetype();

auto _init = buildStringLiteralConstant(e, t->ty != Tsarray);

if (t->ty == Tsarray) {
result = _init;
result = buildStringLiteralConstant(e, false);
return;
}

auto stringLiteralCache = stringLiteralCacheForType(cty);
llvm::StringRef key(e->toChars());
llvm::GlobalVariable *gvar =
(stringLiteralCache->find(key) == stringLiteralCache->end())
? nullptr
: (*stringLiteralCache)[key];
if (gvar == nullptr) {
llvm::GlobalValue::LinkageTypes _linkage =
llvm::GlobalValue::PrivateLinkage;
gvar = new llvm::GlobalVariable(gIR->module, _init->getType(), true,
_linkage, _init, ".str");
gvar->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
(*stringLiteralCache)[key] = gvar;
}
llvm::GlobalVariable *gvar = p->getCachedStringLiteral(e);

llvm::ConstantInt *zero =
LLConstantInt::get(LLType::getInt32Ty(gIR->context()), 0, false);
Expand Down
24 changes: 1 addition & 23 deletions gen/toir.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -383,31 +383,9 @@ class ToElemVisitor : public Visitor {

Type *dtype = e->type->toBasetype();
Type *cty = dtype->nextOf()->toBasetype();

LLType *ct = DtoMemType(cty);

llvm::StringMap<llvm::GlobalVariable *> *stringLiteralCache =
stringLiteralCacheForType(cty);
LLConstant *_init = buildStringLiteralConstant(e, true);
const auto at = _init->getType();

llvm::StringRef key(e->toChars());
llvm::GlobalVariable *gvar =
(stringLiteralCache->find(key) == stringLiteralCache->end())
? nullptr
: (*stringLiteralCache)[key];
if (gvar == nullptr) {
llvm::GlobalValue::LinkageTypes _linkage =
llvm::GlobalValue::PrivateLinkage;
IF_LOG {
Logger::cout() << "type: " << *at << '\n';
Logger::cout() << "init: " << *_init << '\n';
}
gvar = new llvm::GlobalVariable(gIR->module, at, true, _linkage, _init,
".str");
gvar->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
(*stringLiteralCache)[key] = gvar;
}
llvm::GlobalVariable *gvar = p->getCachedStringLiteral(e);

llvm::ConstantInt *zero =
LLConstantInt::get(LLType::getInt32Ty(gIR->context()), 0, false);
Expand Down
14 changes: 1 addition & 13 deletions gen/tollvm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -429,19 +429,7 @@ LLConstant *DtoConstFP(Type *t, const real_t value) {
LLConstant *DtoConstCString(const char *str) {
llvm::StringRef s(str ? str : "");

const auto it = gIR->stringLiteral1ByteCache.find(s);
llvm::GlobalVariable *gvar =
it == gIR->stringLiteral1ByteCache.end() ? nullptr : it->getValue();

if (gvar == nullptr) {
llvm::Constant *init =
llvm::ConstantDataArray::getString(gIR->context(), s, true);
gvar = new llvm::GlobalVariable(gIR->module, init->getType(), true,
llvm::GlobalValue::PrivateLinkage, init,
".str");
gvar->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
gIR->stringLiteral1ByteCache[s] = gvar;
}
LLGlobalVariable *gvar = gIR->getCachedStringLiteral(s);

LLConstant *idxs[] = {DtoConstUint(0), DtoConstUint(0)};
return llvm::ConstantExpr::getGetElementPtr(gvar->getInitializer()->getType(),
Expand Down

0 comments on commit 1b5b405

Please sign in to comment.