Skip to content

Commit

Permalink
[MLIR][OpenMP] Fix MLIR->LLVM value matching in privatization logic (l…
Browse files Browse the repository at this point in the history
…lvm#103718)

Fixes llvm#102935

Updates matching logic for finding the LLVM value that corresponds to an
MLIR value. We need that matching to find the delayed privatizer for an
LLVM value being privatized.

The issue occures when there is an "indirect" correspondence between
MLIR and LLVM values: in some cases the values we are trying to match
stem from a pair of load/store ops that point to the same memref. This
PR adds such matching logic.
  • Loading branch information
ergawy authored Aug 18, 2024
1 parent f499a3f commit c4c9f39
Show file tree
Hide file tree
Showing 2 changed files with 142 additions and 28 deletions.
128 changes: 100 additions & 28 deletions mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -690,14 +690,14 @@ inlineOmpRegionCleanup(llvm::SmallVectorImpl<Region *> &cleanupRegions,
: &builder.GetInsertBlock()->back();
if (potentialTerminator && potentialTerminator->isTerminator())
builder.SetInsertPoint(potentialTerminator);
llvm::Value *prviateVarValue =
llvm::Value *privateVarValue =
shouldLoadCleanupRegionArg
? builder.CreateLoad(
moduleTranslation.convertType(entry.getArgument(0).getType()),
privateVariables[i])
: privateVariables[i];

moduleTranslation.mapValue(entry.getArgument(0), prviateVarValue);
moduleTranslation.mapValue(entry.getArgument(0), privateVarValue);

if (failed(inlineConvertOmpRegions(*cleanupRegion, regionName, builder,
moduleTranslation)))
Expand Down Expand Up @@ -1424,35 +1424,106 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
}
};

SmallVector<omp::PrivateClauseOp> privatizerClones;
SmallVector<llvm::Value *> privateVariables;
SmallVector<omp::PrivateClauseOp> mlirPrivatizerClones;
SmallVector<llvm::Value *> llvmPrivateVars;

// TODO: Perform appropriate actions according to the data-sharing
// attribute (shared, private, firstprivate, ...) of variables.
// Currently shared and private are supported.
auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
llvm::Value &, llvm::Value &vPtr,
llvm::Value *&replacementValue) -> InsertPointTy {
replacementValue = &vPtr;
llvm::Value &, llvm::Value &llvmOmpRegionInput,
llvm::Value *&llvmReplacementValue) -> InsertPointTy {
llvmReplacementValue = &llvmOmpRegionInput;

// If this is a private value, this lambda will return the corresponding
// mlir value and its `PrivateClauseOp`. Otherwise, empty values are
// returned.
auto [privVar, privatizerClone] =
auto [mlirPrivVar, mlirPrivatizerClone] =
[&]() -> std::pair<mlir::Value, omp::PrivateClauseOp> {
if (!opInst.getPrivateVars().empty()) {
auto privateVars = opInst.getPrivateVars();
auto privateSyms = opInst.getPrivateSyms();
auto mlirPrivVars = opInst.getPrivateVars();
auto mlirPrivSyms = opInst.getPrivateSyms();

for (auto [privVar, privatizerAttr] :
llvm::zip_equal(privateVars, *privateSyms)) {
// Try to find a privatizer that corresponds to the LLVM value being
// privatized.
for (auto [mlirPrivVar, mlirPrivatizerAttr] :
llvm::zip_equal(mlirPrivVars, *mlirPrivSyms)) {
// Find the MLIR private variable corresponding to the LLVM value
// being privatized.
llvm::Value *llvmPrivVar = moduleTranslation.lookupValue(privVar);
if (llvmPrivVar != &vPtr)
llvm::Value *mlirToLLVMPrivVar =
moduleTranslation.lookupValue(mlirPrivVar);

// Check if the LLVM value being privatized matches the LLVM value
// mapped to privVar. In some cases, this is not trivial ...
auto isMatch = [&]() {
if (mlirToLLVMPrivVar == nullptr)
return false;

// If both values are trivially equal, we found a match.
if (mlirToLLVMPrivVar == &llvmOmpRegionInput)
return true;

// Otherwise, we check if both llvmOmpRegionInputPtr and
// mlirToLLVMPrivVar refer to the same memory (through a load/store
// pair). This happens if a struct (i.e. multi-field value) is being
// privatized.
//
// For example, if the privatized value is defined by:
// ```
// %priv_val = alloca { ptr, i64 }, align 8
// ```
//
// The initialization of this value (outside the omp region) will be
// something like this:
//
// clang-format off
// ```
// %partially_init_priv_val = insertvalue { ptr, i64 } undef,
// ptr %some_ptr, 0
// %fully_init_priv_val = insertvalue { ptr, i64 } %partially_init_priv_val,
// i64 %some_i64, 1
// ...
// store { ptr, i64 } %fully_init_priv_val, ptr %priv_val, align 8
// ```
// clang-format on
//
// Now, we enter the OMP region, in order to access this privatized
// value, we need to load from the allocated memory:
// ```
// omp.par.entry:
// %priv_val_load = load { ptr, i64 }, ptr %priv_val, align 8
// ```
//
// The 2 LLVM values tracked here map as follows:
// - `mlirToLLVMPrivVar` -> `%fully_init_priv_val`
// - `llvmOmpRegionInputPtr` -> `%priv_val_load`
//
// Even though they eventually refer to the same memory reference
// (the memory being privatized), they are 2 distinct LLVM values.
// Therefore, we need to discover their correspondence by finding
// out if they store into and load from the same mem ref.
auto *llvmOmpRegionInputPtrLoad =
llvm::dyn_cast_if_present<llvm::LoadInst>(&llvmOmpRegionInput);

if (llvmOmpRegionInputPtrLoad == nullptr)
return false;

for (auto &use : mlirToLLVMPrivVar->uses()) {
auto *mlirToLLVMPrivVarStore =
llvm::dyn_cast_if_present<llvm::StoreInst>(use.getUser());
if (mlirToLLVMPrivVarStore &&
(llvmOmpRegionInputPtrLoad->getPointerOperand() ==
mlirToLLVMPrivVarStore->getPointerOperand()))
return true;
}

return false;
};

if (!isMatch())
continue;

SymbolRefAttr privSym = llvm::cast<SymbolRefAttr>(privatizerAttr);
SymbolRefAttr privSym = llvm::cast<SymbolRefAttr>(mlirPrivatizerAttr);
omp::PrivateClauseOp privatizer =
SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(
opInst, privSym);
Expand Down Expand Up @@ -1480,24 +1551,24 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
counter);

clone.setSymName(cloneName);
return {privVar, clone};
return {mlirPrivVar, clone};
}
}

return {mlir::Value(), omp::PrivateClauseOp()};
}();

if (privVar) {
Region &allocRegion = privatizerClone.getAllocRegion();
if (mlirPrivVar) {
Region &allocRegion = mlirPrivatizerClone.getAllocRegion();

// If this is a `firstprivate` clause, prepare the `omp.private` op by:
if (privatizerClone.getDataSharingType() ==
if (mlirPrivatizerClone.getDataSharingType() ==
omp::DataSharingClauseType::FirstPrivate) {
auto oldAllocBackBlock = std::prev(allocRegion.end());
omp::YieldOp oldAllocYieldOp =
llvm::cast<omp::YieldOp>(oldAllocBackBlock->getTerminator());

Region &copyRegion = privatizerClone.getCopyRegion();
Region &copyRegion = mlirPrivatizerClone.getCopyRegion();

mlir::IRRewriter copyCloneBuilder(&moduleTranslation.getContext());
// 1. Cloning the `copy` region to the end of the `alloc` region.
Expand All @@ -1524,7 +1595,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
// This way, the body of the privatizer will be changed from using the
// region/block argument to the value being privatized.
auto allocRegionArg = allocRegion.getArgument(0);
replaceAllUsesInRegionWith(allocRegionArg, privVar, allocRegion);
replaceAllUsesInRegionWith(allocRegionArg, mlirPrivVar, allocRegion);

auto oldIP = builder.saveIP();
builder.restoreIP(allocaIP);
Expand All @@ -1535,15 +1606,15 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
opInst.emitError("failed to inline `alloc` region of an `omp.private` "
"op in the parallel region");
bodyGenStatus = failure();
privatizerClone.erase();
mlirPrivatizerClone.erase();
} else {
assert(yieldedValues.size() == 1);
replacementValue = yieldedValues.front();
llvmReplacementValue = yieldedValues.front();

// Keep the LLVM replacement value and the op clone in case we need to
// emit cleanup (i.e. deallocation) logic.
privateVariables.push_back(replacementValue);
privatizerClones.push_back(privatizerClone);
llvmPrivateVars.push_back(llvmReplacementValue);
mlirPrivatizerClones.push_back(mlirPrivatizerClone);
}

builder.restoreIP(oldIP);
Expand Down Expand Up @@ -1571,13 +1642,14 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
bodyGenStatus = failure();

SmallVector<Region *> privateCleanupRegions;
llvm::transform(privatizerClones, std::back_inserter(privateCleanupRegions),
llvm::transform(mlirPrivatizerClones,
std::back_inserter(privateCleanupRegions),
[](omp::PrivateClauseOp privatizer) {
return &privatizer.getDeallocRegion();
});

if (failed(inlineOmpRegionCleanup(
privateCleanupRegions, privateVariables, moduleTranslation, builder,
privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder,
"omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false)))
bodyGenStatus = failure();

Expand All @@ -1604,7 +1676,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB,
ifCond, numThreads, pbKind, isCancellable));

for (mlir::omp::PrivateClauseOp privatizerClone : privatizerClones)
for (mlir::omp::PrivateClauseOp privatizerClone : mlirPrivatizerClones)
privatizerClone.erase();

return bodyGenStatus;
Expand Down
42 changes: 42 additions & 0 deletions mlir/test/Target/LLVMIR/openmp-firstprivate.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,45 @@ omp.private {type = firstprivate} @multi_block.privatizer : !llvm.ptr alloc {
llvm.store %arg2, %arg3 : f32, !llvm.ptr
omp.yield(%arg3 : !llvm.ptr)
}

// -----

// Verifies fix for https://github.com/llvm/llvm-project/issues/102935.
//
// The issue happens since we previously failed to match MLIR values to their
// corresponding LLVM values in some cases (e.g. char strings with non-const
// length).
llvm.func @non_const_len_char_test(%n: !llvm.ptr {fir.bindc_name = "n"}) {
%n_val = llvm.load %n : !llvm.ptr -> i64
%orig_alloc = llvm.alloca %n_val x i8 {bindc_name = "str"} : (i64) -> !llvm.ptr
%orig_val = llvm.mlir.undef : !llvm.struct<(ptr, i64)>
%orig_val_init = llvm.insertvalue %orig_alloc, %orig_val[0] : !llvm.struct<(ptr, i64)>
omp.parallel private(@non_const_len_char %orig_val_init -> %priv_arg : !llvm.struct<(ptr, i64)>) {
%dummy = llvm.extractvalue %priv_arg[0] : !llvm.struct<(ptr, i64)>
omp.terminator
}
llvm.return
}

omp.private {type = firstprivate} @non_const_len_char : !llvm.struct<(ptr, i64)> alloc {
^bb0(%orig_val: !llvm.struct<(ptr, i64)>):
%str_len = llvm.extractvalue %orig_val[1] : !llvm.struct<(ptr, i64)>
%priv_alloc = llvm.alloca %str_len x i8 {bindc_name = "str", pinned} : (i64) -> !llvm.ptr
%priv_val = llvm.mlir.undef : !llvm.struct<(ptr, i64)>
%priv_val_init = llvm.insertvalue %priv_alloc, %priv_val[0] : !llvm.struct<(ptr, i64)>
omp.yield(%priv_val_init : !llvm.struct<(ptr, i64)>)
} copy {
^bb0(%orig_val: !llvm.struct<(ptr, i64)>, %priv_val: !llvm.struct<(ptr, i64)>):
llvm.call @foo() : () -> ()
omp.yield(%priv_val : !llvm.struct<(ptr, i64)>)
}

llvm.func @foo()

// CHECK-LABEL: @non_const_len_char_test..omp_par({{.*}})
// CHECK-NEXT: omp.par.entry:
// Verify that we found the privatizer by checking that we properly inlined the
// bodies of the alloc and copy regions.
// CHECK: %[[STR_LEN:.*]] = extractvalue { ptr, i64 } %{{.*}}, 1
// CHECK: %{{.*}} = alloca i8, i64 %[[STR_LEN]], align 1
// CHECK: call void @foo()

0 comments on commit c4c9f39

Please sign in to comment.