From c7ebbbdbba9dbfcc84b3a7cc0b1491fc232294d4 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sat, 22 Jul 2017 09:16:29 -0700 Subject: [PATCH] JIT: Fix value type box optimization Boxing a value type produces a non-null result. If the result of the box is only used to feed a compare against null, the jit tries to optimize the box away entirely since the result of the comparison is known. Such idiomatic expressions arise fairly often in generics instantiated over value types. In the current implementation the box expands into two parts: an upstream statement to allocate heap space, and then an expression tree containing an encapsulated copy from the value being boxed to the payload of the new heap object. Wrapping around that is a reference to the new object, which is the result of the box. When the optimization fires, the upstream allocation is removed, and the current implementation also removes the entire box expression tree. Howver this tree can include important side effects from the evaluation of the value being boxed that must be preserved. For instance the value might come from an array, in which case and the box expression tree would contain the array null check and bounds check. So removing the entire tree can alter behavior. This fix attempts to carefull preserve the important side effects by moving the copy into a second statement upstream from the box. The box itself is then just a trivial side-effect-free reference to the box temp local. When the optimization fires the jit removes the upstream heap allocation as before, as well as the now-trivial box tree. It analyzes the source side of the upstream copy. If it is side effect free the copy is removed entirely. If not, the jit modifies the copy into a minimal load of the boxed value, and this load should reproduce the necessary side effects. Fixes #12949. --- src/jit/gentree.cpp | 129 +++++++++++++++++++++++++++++++++++++++---- src/jit/gentree.h | 13 ++++- src/jit/importer.cpp | 12 ++-- 3 files changed, 135 insertions(+), 19 deletions(-) diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index 9503206874ce..15f791d92400 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -7441,7 +7441,8 @@ GenTreePtr Compiler::gtCloneExpr( case GT_BOX: copy = new (this, GT_BOX) - GenTreeBox(tree->TypeGet(), tree->gtOp.gtOp1, tree->gtBox.gtAsgStmtWhenInlinedBoxValue); + GenTreeBox(tree->TypeGet(), tree->gtOp.gtOp1, tree->gtBox.gtAsgStmtWhenInlinedBoxValue, + tree->gtBox.gtCopyStmtWhenInlinedBoxValue); break; case GT_INTRINSIC: @@ -12019,32 +12020,130 @@ GenTreePtr Compiler::gtFoldExprSpecial(GenTreePtr tree) switch (oper) { - case GT_EQ: case GT_NE: + case GT_GT: // Optimize boxed value classes; these are always false. This IL is // generated when a generic value is tested against null: // ... foo(T x) { ... if ((object)x == null) ... if (val == 0 && op->IsBoxedValue()) { - // Change the assignment node so we don't generate any code for it. + // The tree under the box must be side effect free + // since we drop it if we optimize the compare. + assert(!gtTreeHasSideEffects(op->gtBox.gtOp.gtOp1, GTF_SIDE_EFFECT)); + // grab related parts for the optimization GenTreePtr asgStmt = op->gtBox.gtAsgStmtWhenInlinedBoxValue; assert(asgStmt->gtOper == GT_STMT); - GenTreePtr asg = asgStmt->gtStmt.gtStmtExpr; - assert(asg->gtOper == GT_ASG); + GenTreePtr copyStmt = op->gtBox.gtCopyStmtWhenInlinedBoxValue; + assert(copyStmt->gtOper == GT_STMT); #ifdef DEBUG if (verbose) { - printf("Bashing "); - printTreeID(asg); - printf(" to NOP as part of dead box operation\n"); + printf("\nAttempting to optimize BOX(valueType) %s null\n", GenTree::OpName(oper)); gtDispTree(tree); + printf("\nWith assign\n"); + gtDispTree(asgStmt); + printf("\nAnd copy\n"); + gtDispTree(copyStmt); } #endif + + // If we don't recognize the form of the assign, bail. + GenTreePtr asg = asgStmt->gtStmt.gtStmtExpr; + if (asg->gtOper != GT_ASG) + { + JITDUMP(" bailing; unexpected assignment op %s\n", GenTree::OpName(asg->gtOper)); + goto FAIL; + } + + // If we don't recognize the form of the copy, bail. + GenTree* copy = copyStmt->gtStmt.gtStmtExpr; + if (copy->gtOper != GT_ASG) + { + // GT_RET_EXPR is a tolerable temporary failure. + // The jit will revisit this optimization after + // inlining is done. + if (copy->gtOper == GT_RET_EXPR) + { + JITDUMP(" bailing; must wait for replacement of %s\n", GenTree::OpName(copy->gtOper)); + } + else + { + // Anything else is a missed case we should + // figure out how to handle. One known case + // is GT_COMMAs enclosing the GT_ASG we are + // looking for. + JITDUMP(" bailing; unexpected copy op %s\n", GenTree::OpName(copy->gtOper)); + } + goto FAIL; + } + + // If the copy is a struct copy, make sure we know how to isolate + // any source side effects. + GenTreePtr copySrc = copy->gtOp.gtOp2; + bool hasSrcSideEffect = false; + bool isStructCopy = false; + + if (gtTreeHasSideEffects(copySrc, GTF_SIDE_EFFECT)) + { + hasSrcSideEffect = true; + + if (copySrc->gtType == TYP_STRUCT) + { + isStructCopy = true; + + if ((copySrc->gtOper != GT_OBJ) && (copySrc->gtOper != GT_IND) && (copySrc->gtOper != GT_FIELD)) + { + // We don't know how to handle other cases, yet. + JITDUMP(" bailing; unexpected copy source struct op with side effect %s\n", + GenTree::OpName(copySrc->gtOper)); + goto FAIL; + } + } + } + + // Proceed with the optimization + // + // Change the assignment expression to a NOP. + JITDUMP("\nBashing NEWOBJ [%06u] to NOP\n", dspTreeID(asg)); asg->gtBashToNOP(); - op = gtNewIconNode(oper == GT_NE); + // Change the copy expression so it preserves key + // source side effects. + JITDUMP("\nBashing COPY [%06u]", dspTreeID(copy)); + + if (!hasSrcSideEffect) + { + // If there were no copy source side effects just bash + // the copy to a NOP. + copy->gtBashToNOP(); + JITDUMP(" to NOP\n"); + } + else if (!isStructCopy) + { + // For scalar types, go ahead and produce the + // value as the copy is fairly cheap and likely + // the optimizer can trim things down to just the + // minimal side effect parts. + copyStmt->gtStmt.gtStmtExpr = copySrc; + JITDUMP(" to scalar read via [%06u]\n", dspTreeID(copySrc)); + } + else + { + // For struct types read the first byte of the + // source struct; there's no need to read the + // entire thing, and no place to put it. + assert(copySrc->gtOper == GT_OBJ || copySrc->gtOper == GT_IND || copySrc->gtOper == GT_FIELD); + + copySrc->ChangeOper(GT_IND); + copySrc->gtType = TYP_BYTE; + copyStmt->gtStmt.gtStmtExpr = copySrc; + JITDUMP(" to read first byte of struct via modified [%06u]\n", dspTreeID(copySrc)); + } + + // Set up the result of the compare. + op = gtNewIconNode(oper != GT_EQ); if (fgGlobalMorph) { if (!fgIsInlining()) @@ -12057,9 +12156,15 @@ GenTreePtr Compiler::gtFoldExprSpecial(GenTreePtr tree) op->gtNext = tree->gtNext; op->gtPrev = tree->gtPrev; } - fgSetStmtSeq(asgStmt); + + if (fgStmtListThreaded) + { + fgSetStmtSeq(asgStmt); + fgSetStmtSeq(copyStmt); + } return op; } + break; case GT_ADD: @@ -12227,7 +12332,9 @@ GenTreePtr Compiler::gtFoldExprSpecial(GenTreePtr tree) break; } - /* The node is not foldable */ +/* The node is not foldable */ + +FAIL: return tree; diff --git a/src/jit/gentree.h b/src/jit/gentree.h index 08154367cb31..0293fb84e9f8 100644 --- a/src/jit/gentree.h +++ b/src/jit/gentree.h @@ -2960,9 +2960,16 @@ struct GenTreeBox : public GenTreeUnOp // This is the statement that contains the assignment tree when the node is an inlined GT_BOX on a value // type GenTreePtr gtAsgStmtWhenInlinedBoxValue; - - GenTreeBox(var_types type, GenTreePtr boxOp, GenTreePtr asgStmtWhenInlinedBoxValue) - : GenTreeUnOp(GT_BOX, type, boxOp), gtAsgStmtWhenInlinedBoxValue(asgStmtWhenInlinedBoxValue) + // And this is the statement that copies from the value being boxed to the box payload + GenTreePtr gtCopyStmtWhenInlinedBoxValue; + + GenTreeBox(var_types type, + GenTreePtr boxOp, + GenTreePtr asgStmtWhenInlinedBoxValue, + GenTreePtr copyStmtWhenInlinedBoxValue) + : GenTreeUnOp(GT_BOX, type, boxOp) + , gtAsgStmtWhenInlinedBoxValue(asgStmtWhenInlinedBoxValue) + , gtCopyStmtWhenInlinedBoxValue(copyStmtWhenInlinedBoxValue) { } #if DEBUGGABLE_GENTREE diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp index 47f51063550e..b69550c482d9 100644 --- a/src/jit/importer.cpp +++ b/src/jit/importer.cpp @@ -5260,7 +5260,7 @@ void Compiler::impImportAndPushBox(CORINFO_RESOLVED_TOKEN* pResolvedToken) gtNewArgList(op2)); } - /* Remember that this basic block contains 'new' of an array */ + /* Remember that this basic block contains 'new' of an object */ compCurBB->bbFlags |= BBF_HAS_NEWOBJ; GenTreePtr asg = gtNewTempAssign(impBoxTemp, op1); @@ -5302,11 +5302,13 @@ void Compiler::impImportAndPushBox(CORINFO_RESOLVED_TOKEN* pResolvedToken) op1 = gtNewAssignNode(gtNewOperNode(GT_IND, lclTyp, op1), exprToBox); } - op2 = gtNewLclvNode(impBoxTemp, TYP_REF); - op1 = gtNewOperNode(GT_COMMA, TYP_REF, op1, op2); + // Set up this copy as a second assignment. + GenTreePtr copyStmt = impAppendTree(op1, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs); - // Record that this is a "box" node. - op1 = new (this, GT_BOX) GenTreeBox(TYP_REF, op1, asgStmt); + op1 = gtNewLclvNode(impBoxTemp, TYP_REF); + + // Record that this is a "box" node and keep track of the matching parts. + op1 = new (this, GT_BOX) GenTreeBox(TYP_REF, op1, asgStmt, copyStmt); // If it is a value class, mark the "box" node. We can use this information // to optimise several cases: