Skip to content

Commit

Permalink
Parallel tune: transformations for Roberta with BS=1 (onnx#2840)
Browse files Browse the repository at this point in the history
Signed-off-by: Alexandre Eichenberger <[email protected]>
Co-authored-by: Tung D. Le <[email protected]>
  • Loading branch information
AlexandreEichenberger and tungld authored Jun 12, 2024
1 parent 383e805 commit 7f4f510
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 19 deletions.
1 change: 0 additions & 1 deletion src/Conversion/ONNXToKrnl/Math/Reduction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -778,7 +778,6 @@ struct ONNXReductionOpLowering : public OpConversionPattern<ONNXReductionOp> {
int64_t parId;
if (findSuitableParallelDimension(lbs3, ubs3, 0, 1, parId,
/*min iter for going parallel*/ 4)) {

create.krnl.parallel(loop3Def[0]);
onnxToKrnlParallelReport(
op, true, 0, lbs3[0], ubs3[0], "reduction scalar mean");
Expand Down
20 changes: 13 additions & 7 deletions src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -617,17 +617,23 @@ bool hasNonIdentityLayout(ValueRange operands) {
// Support functions for parallel region.
//===----------------------------------------------------------------------===//

// Return the outermost loop within [firstDim, lastDim) for which (ub-lb) >
// minSize. Runtime dimensions are assumed to satisfy the size requirement by
// definition. If found one, it is parDim and the function returns true.
// Return the outermost loop within [firstInclusiveDim, lastExclusiveDim) for
// which (ub-lb) > minSize. Runtime dimensions are assumed to satisfy the size
// requirement by definition. If found one, it is parDim and the function
// returns true.

bool findSuitableParallelDimension(llvm::SmallVectorImpl<IndexExpr> &lb,
llvm::SmallVectorImpl<IndexExpr> &ub, int64_t firstDim, int64_t lastDim,
int64_t &parDim, int64_t minSize) {
for (int64_t i = firstDim; i < lastDim; ++i) {
llvm::SmallVectorImpl<IndexExpr> &ub, int64_t firstInclusiveDim,
int64_t lastExclusiveDim, int64_t &parDim, int64_t minSize) {
assert(lb.size() == ub.size() && "expected identical ranks for lb/ub");
if (firstInclusiveDim < 0)
firstInclusiveDim = 0;
if (lastExclusiveDim > (int64_t)lb.size())
lastExclusiveDim = lb.size();
for (int64_t i = firstInclusiveDim; i < lastExclusiveDim; ++i) {
IndexExpr tripCount = ub[i] - lb[i];
if (!tripCount.isLiteral() || tripCount.getLiteral() >= minSize) {
// Got one
// Got one.
parDim = i;
return true;
}
Expand Down
4 changes: 2 additions & 2 deletions src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -592,8 +592,8 @@ bool hasNonIdentityLayout(mlir::ValueRange operands);
// minSize. Runtime dimensions are assumed to satisfy the size requirement by
// definition. If found one, it is parDim and the function returns true.
bool findSuitableParallelDimension(llvm::SmallVectorImpl<IndexExpr> &lb,
llvm::SmallVectorImpl<IndexExpr> &ub, int64_t firstDim /*inclusive*/,
int64_t lastDim /*exclusive*/, int64_t &parDim, int64_t minSize = 4);
llvm::SmallVectorImpl<IndexExpr> &ub, int64_t firstInclusiveDim,
int64_t lastExclusiveDim, int64_t &parDim, int64_t minSize = 4);

//===----------------------------------------------------------------------===//
// Support functions for reporting.
Expand Down
17 changes: 8 additions & 9 deletions src/Conversion/ONNXToKrnl/Tensor/Transpose.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,10 @@ struct ONNXTransposeOpLowering : public OpConversionPattern<ONNXTransposeOp> {
if (enableParallel) {
int64_t parId;
// TODO: consider flattening the outer dims, or along inner dims.
if (findSuitableParallelDimension(lbs, ubs, 0, 1, parId, 8)) {
assert(parId == 0 && "only outermost at this time");
create->krnl.parallel(loopDef[0]);
if (findSuitableParallelDimension(lbs, ubs, 0, 2, parId, 8)) {
create->krnl.parallel(loopDef[parId]);
onnxToKrnlParallelReport(
op, true, 0, lbs[0], ubs[0], "scalar transpose");
op, true, parId, lbs[parId], ubs[parId], "scalar transpose");
} else {
onnxToKrnlParallelReport(
op, false, -1, -1, "no dim with enough work in scalar transpose");
Expand Down Expand Up @@ -224,12 +223,12 @@ struct ONNXTransposeOpLowering : public OpConversionPattern<ONNXTransposeOp> {
SmallVector<IndexExpr, 4> lbs(outerRank, LiteralIndexExpr(0));
if (enableParallel) {
int64_t parId;
// TODO: consider flattening the outer dims, or along inner dims.
if (findSuitableParallelDimension(lbs, inUBs, 0, 1, parId, 8)) {
assert(parId == 0 && "only outermost at this time");
create->krnl.parallel(loopDef[0]);
// Note that if there is only 1 dim, lastExclusiveDim is automatically
// reduced to 1 in the findSuitableParallelDimension call.
if (findSuitableParallelDimension(lbs, inUBs, 0, 2, parId, 8)) {
create->krnl.parallel(loopDef[parId]);
onnxToKrnlParallelReport(
op, true, 0, lbs[0], inUBs[0], "block transpose");
op, true, parId, lbs[parId], inUBs[parId], "block transpose");
} else {
onnxToKrnlParallelReport(
op, false, -1, -1, "no dim with enough work in block transpose");
Expand Down

0 comments on commit 7f4f510

Please sign in to comment.