Skip to content

Commit

Permalink
llpc: optimize constant local arrays to constant globals when multipl…
Browse files Browse the repository at this point in the history
…e stores are used

For allocas of array type, detect the case where the array is filled by
multiple stores of constant values to constant (and disjoint) indices,
and promote such allocas to constant globals.

Co-authored-by: Nicolai Hähnle <[email protected]>
  • Loading branch information
mariusz-sikora-at-amd and nhaehnle committed Oct 3, 2023
1 parent 1aebd12 commit e25dd51
Show file tree
Hide file tree
Showing 3 changed files with 223 additions and 123 deletions.
253 changes: 134 additions & 119 deletions llpc/lower/llpcSpirvLowerConstImmediateStore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include "SPIRVInternal.h"
#include "llpcContext.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
Expand All @@ -51,164 +52,178 @@ namespace Llpc {
// @param [in/out] module : LLVM module to be run on (empty on entry)
// @param [in/out] analysisManager : Analysis manager to use for this transformation
PreservedAnalyses SpirvLowerConstImmediateStore::run(Module &module, ModuleAnalysisManager &analysisManager) {
runImpl(module);
return PreservedAnalyses::none();
}

// =====================================================================================================================
// Executes this SPIR-V lowering pass on the specified LLVM module.
//
// @param [in/out] module : LLVM module to be run on
bool SpirvLowerConstImmediateStore::runImpl(Module &module) {
LLVM_DEBUG(dbgs() << "Run the pass Spirv-Lower-Const-Immediate-Store\n");

SpirvLower::init(&module);

// Process "alloca" instructions to see if they can be optimized to a read-only global
// variable.
for (auto funcIt = module.begin(), funcItEnd = module.end(); funcIt != funcItEnd; ++funcIt) {
if (auto func = dyn_cast<Function>(&*funcIt)) {
if (!func->empty())
processAllocaInsts(func);
bool changed = false;
for (auto &func : module.functions()) {
if (!func.empty()) {
if (processAllocaInsts(&func))
changed = true;
}
}

return true;
return changed ? PreservedAnalyses::allInSet<CFGAnalyses>() : PreservedAnalyses::all();
}

// =====================================================================================================================
// Processes "alloca" instructions at the beginning of the given non-empty function to see if they
// can be optimized to a read-only global variable.
//
// @param func : Function to process
void SpirvLowerConstImmediateStore::processAllocaInsts(Function *func) {
bool SpirvLowerConstImmediateStore::processAllocaInsts(Function *func) {
// NOTE: We only visit the entry block on the basis that SPIR-V translator puts all "alloca"
// instructions there.
auto entryBlock = &func->front();
for (auto instIt = entryBlock->begin(), instItEnd = entryBlock->end(); instIt != instItEnd; ++instIt) {
auto inst = &*instIt;
if (auto allocaInst = dyn_cast<AllocaInst>(inst)) {
if (allocaInst->getAllocatedType()->isAggregateType()) {
// Got an "alloca" instruction of aggregate type.
auto storeInst = findSingleStore(allocaInst);
if (storeInst && isa<Constant>(storeInst->getValueOperand())) {
// Got an aggregate "alloca" with a single store to the whole type.
// Do the optimization.
convertAllocaToReadOnlyGlobal(storeInst);
}
}
bool changed = false;
SmallVector<AllocaInst *> candidates;
for (auto &inst : func->getEntryBlock()) {
if (auto allocaInst = dyn_cast<AllocaInst>(&inst)) {
if (allocaInst->getAllocatedType()->isAggregateType())
candidates.push_back(allocaInst);
}
}
for (auto *alloca : candidates) {
if (tryProcessAlloca(alloca))
changed = true;
}
return changed;
}

// =====================================================================================================================
// Finds the single "store" instruction storing to this pointer.
//
// Returns nullptr if no "store", multiple "store", or partial "store" instructions (store only part
// of the memory) are found.
//
// NOTE: This is conservative in that it returns nullptr if the pointer escapes by being used in anything
// other than "store" (as the pointer), "load" or "getelementptr" instruction.
// For a single alloca, try to replace it by a constant global variable.
//
// @param allocaInst : The "alloca" instruction to process
StoreInst *SpirvLowerConstImmediateStore::findSingleStore(AllocaInst *allocaInst) {
std::vector<Instruction *> pointers;
bool isOuterPointer = true;
StoreInst *storeInstFound = nullptr;
Instruction *pointer = allocaInst;
while (true) {
for (auto useIt = pointer->use_begin(), useItEnd = pointer->use_end(); useIt != useItEnd; ++useIt) {
auto user = cast<Instruction>(useIt->getUser());
// @return true if the alloca was replaced
bool SpirvLowerConstImmediateStore::tryProcessAlloca(AllocaInst *allocaInst) {
auto *allocatedTy = allocaInst->getAllocatedType();
auto *arrayTy = dyn_cast<ArrayType>(allocatedTy);

StoreInst *aggregateStore = nullptr;
std::vector<StoreInst *> elementStores;

SmallVector<Instruction *> toErase;
SmallVector<GetElementPtrInst *> geps;

// Step 1: Determine if the alloca can be converted and find the relevant store(s)
SmallVector<std::pair<Value *, std::optional<uint64_t>>> pointers;
pointers.emplace_back(allocaInst, 0);
do {
auto [pointer, index] = pointers.pop_back_val();
for (Use &use : pointer->uses()) {
auto user = cast<Instruction>(use.getUser());
if (auto storeInst = dyn_cast<StoreInst>(user)) {
if (pointer == storeInst->getValueOperand() || storeInstFound || !isOuterPointer) {
// Pointer escapes by being stored, or we have already found a "store"
// instruction, or this is a partial "store" instruction.
return nullptr;
if (pointer == storeInst->getValueOperand() || !index.has_value()) {
// Pointer escapes by being stored, or we store to a dynamically indexed (or otherwise complex) pointer.
return false;
}
storeInstFound = storeInst;
} else if (auto getElemPtrInst = dyn_cast<GetElementPtrInst>(user))
pointers.push_back(getElemPtrInst);
else if (!isa<LoadInst>(user) && !isAssumeLikeIntrinsic(user)) {
// Pointer escapes by being used in some way other than "load/store/getelementptr".
return nullptr;
}
}

if (pointers.empty())
break;
Value *storeValue = storeInst->getValueOperand();
if (!isa<Constant>(storeValue))
return false;

pointer = pointers.back();
pointers.pop_back();
isOuterPointer = false;
}
if (storeValue->getType() == allocatedTy) {
if (aggregateStore || !elementStores.empty() || index.value() != 0)
return false;
aggregateStore = storeInst;
continue;
}

return storeInstFound;
}
if (arrayTy && storeValue->getType() == arrayTy->getArrayElementType()) {
if (aggregateStore)
return false;
if (index.value() >= arrayTy->getArrayNumElements())
return false;

// =====================================================================================================================
// Converts an "alloca" instruction with a single constant store into a read-only global variable.
//
// NOTE: This erases the "store" instruction (so it will not be lowered by a later lowering pass
// any more) but not the "alloca" or replaced "getelementptr" instruction (they will be removed
// later by DCE pass).
//
// @param storeInst : The single constant store into the "alloca"
void SpirvLowerConstImmediateStore::convertAllocaToReadOnlyGlobal(StoreInst *storeInst) {
auto allocaInst = cast<AllocaInst>(storeInst->getPointerOperand());
auto globalType = allocaInst->getAllocatedType();
auto initVal = cast<Constant>(storeInst->getValueOperand());
if (elementStores.empty())
elementStores.resize(arrayTy->getArrayNumElements());

if (globalType != initVal->getType())
return;
// Check if we are not trying to add the same idx. This may happen when the
// same place in table is initialized twice or more.
if (elementStores[index.value()])
return false;
elementStores[index.value()] = storeInst;
continue;
}

auto global = new GlobalVariable(*m_module, globalType,
true, // isConstant
GlobalValue::InternalLinkage, initVal, "", nullptr, GlobalValue::NotThreadLocal,
SPIRAS_Constant);
global->takeName(allocaInst);
// Change all uses of allocaInst to use global. We need to do it manually, as there is a change
// of address space, and we also need to recreate "getelementptr"s.
std::vector<std::pair<Instruction *, Value *>> allocaToGlobalMap;
allocaToGlobalMap.push_back(std::pair<Instruction *, Value *>(allocaInst, global));
do {
auto allocaInst = allocaToGlobalMap.back().first;
auto global = allocaToGlobalMap.back().second;
allocaToGlobalMap.pop_back();
while (!allocaInst->use_empty()) {
auto useIt = allocaInst->use_begin();
if (auto origGetElemPtrInst = dyn_cast<GetElementPtrInst>(useIt->getUser())) {
// This use is a "getelementptr" instruction. Create a replacement one with the new address space.
SmallVector<Value *, 4> indices;
for (auto idxIt = origGetElemPtrInst->idx_begin(), idxItEnd = origGetElemPtrInst->idx_end(); idxIt != idxItEnd;
++idxIt)
indices.push_back(*idxIt);
auto newGetElemPtrInst = GetElementPtrInst::Create(globalType, global, indices, "", origGetElemPtrInst);
newGetElemPtrInst->takeName(origGetElemPtrInst);
newGetElemPtrInst->setIsInBounds(origGetElemPtrInst->isInBounds());
newGetElemPtrInst->copyMetadata(*origGetElemPtrInst);
// Remember that we need to replace the uses of the original "getelementptr" with the new one.
allocaToGlobalMap.push_back(std::pair<Instruction *, Value *>(origGetElemPtrInst, newGetElemPtrInst));
// Remove the use from the original "getelementptr".
*useIt = PoisonValue::get(allocaInst->getType());
continue;
return false;
}

if (auto *intrinsic = dyn_cast<IntrinsicInst>(useIt->getUser())) {
switch (intrinsic->getIntrinsicID()) {
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
// Lifetime markers are only useful for allocas, not for globals, and if we did not erase them we would have
// to change their name mangling because of the change of address space.
intrinsic->eraseFromParent();
continue;
if (auto gep = dyn_cast<GetElementPtrInst>(user)) {
geps.push_back(gep);

if (index.has_value() && arrayTy && gep->getSourceElementType() == allocatedTy &&
gep->hasAllConstantIndices() && gep->getNumIndices() == 2) {
int64_t gepIdx = cast<ConstantInt>(gep->getOperand(2))->getSExtValue();
if (gepIdx >= 0) {
pointers.emplace_back(gep, index.value() + gepIdx);
continue;
}
}

pointers.emplace_back(gep, std::nullopt);
continue;
}

if (isa<LoadInst>(user))
continue;

if (isAssumeLikeIntrinsic(user)) {
toErase.push_back(user);
continue;
}

*useIt = global;
// Pointer escapes by being used in some way other than "load/store/getelementptr".
return false;
}
// Visit next map pair.
} while (!allocaToGlobalMap.empty());
storeInst->eraseFromParent();
} while (!pointers.empty());

// Step 2: Extract or build the initializer constant
Constant *initializer = nullptr;

if (aggregateStore) {
initializer = cast<Constant>(aggregateStore->getValueOperand());
} else if (!elementStores.empty()) {
std::vector<Constant *> elements;
elements.resize(arrayTy->getArrayNumElements());
for (auto [element, storeInst] : llvm::zip(elements, elementStores)) {
if (storeInst)
element = cast<Constant>(storeInst->getValueOperand());
else
element = PoisonValue::get(arrayTy->getElementType());
}

initializer = ConstantArray::get(arrayTy, elements);
} else {
initializer = PoisonValue::get(allocatedTy);
}

// Step 3: Create the global variable and replace the alloca
auto global = new GlobalVariable(*m_module, allocatedTy,
true, // isConstant
GlobalValue::InternalLinkage, initializer, "", nullptr, GlobalValue::NotThreadLocal,
SPIRAS_Constant);
global->takeName(allocaInst);

for (Use &use : llvm::make_early_inc_range(allocaInst->uses()))
use.set(global);

for (auto *gep : geps)
gep->mutateType(global->getType());

for (auto *inst : toErase)
inst->eraseFromParent();
if (aggregateStore)
aggregateStore->eraseFromParent();
for (auto *store : elementStores) {
if (store)
store->eraseFromParent();
}
allocaInst->eraseFromParent();

return true;
}

} // namespace Llpc
6 changes: 2 additions & 4 deletions llpc/lower/llpcSpirvLowerConstImmediateStore.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,12 @@ namespace Llpc {
class SpirvLowerConstImmediateStore : public SpirvLower, public llvm::PassInfoMixin<SpirvLowerConstImmediateStore> {
public:
llvm::PreservedAnalyses run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager);
bool runImpl(llvm::Module &module);

static llvm::StringRef name() { return "Lower SPIR-V constant immediate store"; }

private:
void processAllocaInsts(llvm::Function *func);
llvm::StoreInst *findSingleStore(llvm::AllocaInst *allocaInst);
void convertAllocaToReadOnlyGlobal(llvm::StoreInst *storeInst);
bool processAllocaInsts(llvm::Function *func);
bool tryProcessAlloca(llvm::AllocaInst *allocaInst);
};

} // namespace Llpc
Loading

0 comments on commit e25dd51

Please sign in to comment.