onnx · chentong319 · Oct 13, 2022 · Oct 11, 2022 · Oct 11, 2022 · Oct 11, 2022
diff --git a/src/Conversion/ONNXToKrnl/Tensor/Concat.cpp b/src/Conversion/ONNXToKrnl/Tensor/Concat.cpp
@@ -40,6 +40,7 @@ struct ONNXConcatOpLowering : public ConversionPattern {
     assert(succeeded(shapecomputed) && "Could not compute output shape");
 
     auto axis = concatOp.axis();
+    assert(axis >= 0 && "negative axis is supposed to have been normalized");
     unsigned int inputNum = operands.size();
 
     // Convert the output type to MemRefType.
@@ -57,16 +58,28 @@ struct ONNXConcatOpLowering : public ConversionPattern {
     MultiDialectBuilder<KrnlBuilder> create(rewriter, loc);
 
     // Creates loops, one for each input.
+    // Since the each input should have same size for each dimension(except
+    // axis), we will try to make the loop upper bound the same for futher
+    // optimization. Difference may come from constant vs. dynamic, or dynamic
+    // dim of different inputs.
     KrnlBuilder createKrnl(rewriter, loc);
+    SmallVector<IndexExpr, 4> commonUB(shapeHelper.dimsForOutput());
+    // IndexExprScope IEScope(&rewriter, loc);
+    IndexExpr accumulatedOffset = LiteralIndexExpr(0);
     for (unsigned int i = 0; i < inputNum; ++i) {
+      // Since the acculatedOffsetValue will be used in a nested IndexExprScope,
+      // we get the Value of this IndexExpr and pass it as a symbol
+      Value accumulatedOffsetValue = accumulatedOffset.getValue();
       OpBuilder::InsertionGuard insertGuard(rewriter);
       // Create loop.
       ValueRange loopDef = createKrnl.defineLoops(rank);
       SmallVector<IndexExpr, 4> lbs(rank, LiteralIndexExpr(0));
       MemRefBoundsIndexCapture bounds(operands[i]);
       SmallVector<IndexExpr, 4> ubs;
       bounds.getDimList(ubs);
-      createKrnl.iterateIE(loopDef, loopDef, lbs, ubs,
+      // For each input, only the dimension 'axis' is different
+      commonUB[axis] = ubs[axis];
+      createKrnl.iterateIE(loopDef, loopDef, lbs, commonUB,
           [&](KrnlBuilder &createKrnl, ValueRange loopInd) {
             // Indices for the read and write.
             SmallVector<Value, 4> readIndices, writeIndices;
@@ -76,17 +89,18 @@ struct ONNXConcatOpLowering : public ConversionPattern {
               else {
                 IndexExprScope IEScope(&rewriter, loc);
                 IndexExpr writeOffset = DimIndexExpr(loopInd[r]);
-                for (unsigned int j = 0; j < i; j++) {
-                  MemRefBoundsIndexCapture operandJBounds(operands[j]);
-                  writeOffset = writeOffset + operandJBounds.getDim(r);
-                }
+                IndexExpr accumulatedOffsetIE =
+                    SymbolIndexExpr(accumulatedOffsetValue);
+                writeOffset = writeOffset + accumulatedOffsetIE;
                 writeIndices.emplace_back(writeOffset.getValue());
               }
             }
             // Insert copy.
             Value loadData = createKrnl.load(operands[i], loopInd);
             createKrnl.store(loadData, alloc, writeIndices);
           });
+      MemRefBoundsIndexCapture operandJBounds(operands[i]);
+      accumulatedOffset = accumulatedOffset + operandJBounds.getDim(axis);
     }
     rewriter.replaceOp(op, alloc);
     return success();

diff --git a/src/Dialect/ONNX/ShapeInference/Concat.cpp b/src/Dialect/ONNX/ShapeInference/Concat.cpp
@@ -37,19 +37,36 @@ LogicalResult ONNXConcatOpShapeHelper::computeShape(
   if (axisIndex < 0)
     axisIndex += commonRank;
 
+  // For Concat Op, the size of each dimension of inputs should be the same,
+  // except for concatenated dimension. To simplify the result, constant
+  // size is used if there is one. Otherwise, the dimension of the last
+  // input tensor (implementation dependent) is used for the output tensor.
+  DimsExpr outputDims(commonRank);
   IndexExpr cumulativeAxisSize = LiteralIndexExpr(0);
+  SmallVector<bool, 4> isConstant(commonRank, false);
   for (unsigned i = 0; i < numInputs; ++i) {
     Value currentInput = operandAdaptor.inputs()[i];
     MemRefBoundsIndexCapture currInputBounds(currentInput);
-    DimIndexExpr currentSize(currInputBounds.getDim(axisIndex));
-    cumulativeAxisSize = cumulativeAxisSize + currentSize;
+    for (unsigned dim = 0; dim < commonRank; dim++) {
+      if (dim == axisIndex) {
+        DimIndexExpr currentSize(currInputBounds.getDim(axisIndex));
+        cumulativeAxisSize = cumulativeAxisSize + currentSize;
+      } else {
+        if (!isConstant[dim]) {
+          if (currInputBounds.getDim(dim).isLiteral()) {
+            // The size of current dimension of current input  is a constant
+            outputDims[dim] = currInputBounds.getDim(dim);
+            isConstant[dim] = true;
+          } else if (i == numInputs - 1) {
+            // If no constant dimension found for all the inputs, use the
+            // dynamic size of the last input.
+            outputDims[dim] = currInputBounds.getDim(dim);
+          }
+        }
+      }
+    }
   }
-
-  DimsExpr outputDims(commonRank);
-  MemRefBoundsIndexCapture firstInputBounds(firstInput);
-  for (unsigned i = 0; i < commonRank; i++)
-    outputDims[i] =
-        (i == axisIndex) ? cumulativeAxisSize : firstInputBounds.getDim(i);
+  outputDims[axisIndex] = cumulativeAxisSize;
 
   setOutputDims(outputDims);
   return success();