-
Notifications
You must be signed in to change notification settings - Fork 2.7k
JIT: Fix value type box optimization #13016
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7454,7 +7454,8 @@ GenTreePtr Compiler::gtCloneExpr( | |
|
||
case GT_BOX: | ||
copy = new (this, GT_BOX) | ||
GenTreeBox(tree->TypeGet(), tree->gtOp.gtOp1, tree->gtBox.gtAsgStmtWhenInlinedBoxValue); | ||
GenTreeBox(tree->TypeGet(), tree->gtOp.gtOp1, tree->gtBox.gtAsgStmtWhenInlinedBoxValue, | ||
tree->gtBox.gtCopyStmtWhenInlinedBoxValue); | ||
break; | ||
|
||
case GT_INTRINSIC: | ||
|
@@ -12032,32 +12033,168 @@ GenTreePtr Compiler::gtFoldExprSpecial(GenTreePtr tree) | |
|
||
switch (oper) | ||
{ | ||
|
||
case GT_EQ: | ||
case GT_NE: | ||
case GT_GT: | ||
// Optimize boxed value classes; these are always false. This IL is | ||
// generated when a generic value is tested against null: | ||
// <T> ... foo(T x) { ... if ((object)x == null) ... | ||
if (val == 0 && op->IsBoxedValue()) | ||
{ | ||
// Change the assignment node so we don't generate any code for it. | ||
// The tree under the box must be side effect free | ||
// since we drop it if we optimize the compare. | ||
assert(!gtTreeHasSideEffects(op->gtBox.gtOp.gtOp1, GTF_SIDE_EFFECT)); | ||
|
||
// grab related parts for the optimization | ||
GenTreePtr asgStmt = op->gtBox.gtAsgStmtWhenInlinedBoxValue; | ||
assert(asgStmt->gtOper == GT_STMT); | ||
GenTreePtr asg = asgStmt->gtStmt.gtStmtExpr; | ||
assert(asg->gtOper == GT_ASG); | ||
GenTreePtr copyStmt = op->gtBox.gtCopyStmtWhenInlinedBoxValue; | ||
assert(copyStmt->gtOper == GT_STMT); | ||
#ifdef DEBUG | ||
if (verbose) | ||
{ | ||
printf("Bashing "); | ||
printTreeID(asg); | ||
printf(" to NOP as part of dead box operation\n"); | ||
printf("\nAttempting to optimize BOX(valueType) %s null\n", GenTree::OpName(oper)); | ||
gtDispTree(tree); | ||
printf("\nWith assign\n"); | ||
gtDispTree(asgStmt); | ||
printf("\nAnd copy\n"); | ||
gtDispTree(copyStmt); | ||
} | ||
#endif | ||
|
||
// We don't expect GT_GT with signed compares, and we | ||
// can't predict the result if we do see it, since the | ||
// boxed object addr could have its high bit set. | ||
if ((oper == GT_GT) && !tree->IsUnsigned()) | ||
{ | ||
JITDUMP(" bailing; unexpected signed compare via GT_GT\n"); | ||
goto FAIL; | ||
} | ||
|
||
// If we don't recognize the form of the assign, bail. | ||
GenTreePtr asg = asgStmt->gtStmt.gtStmtExpr; | ||
if (asg->gtOper != GT_ASG) | ||
{ | ||
JITDUMP(" bailing; unexpected assignment op %s\n", GenTree::OpName(asg->gtOper)); | ||
goto FAIL; | ||
} | ||
|
||
// If we don't recognize the form of the copy, bail. | ||
GenTree* copy = copyStmt->gtStmt.gtStmtExpr; | ||
if (copy->gtOper != GT_ASG) | ||
{ | ||
// GT_RET_EXPR is a tolerable temporary failure. | ||
// The jit will revisit this optimization after | ||
// inlining is done. | ||
if (copy->gtOper == GT_RET_EXPR) | ||
{ | ||
JITDUMP(" bailing; must wait for replacement of copy %s\n", GenTree::OpName(copy->gtOper)); | ||
} | ||
else | ||
{ | ||
// Anything else is a missed case we should | ||
// figure out how to handle. One known case | ||
// is GT_COMMAs enclosing the GT_ASG we are | ||
// looking for. | ||
JITDUMP(" bailing; unexpected copy op %s\n", GenTree::OpName(copy->gtOper)); | ||
} | ||
goto FAIL; | ||
} | ||
|
||
// If the copy is a struct copy, make sure we know how to isolate | ||
// any source side effects. | ||
GenTreePtr copySrc = copy->gtOp.gtOp2; | ||
|
||
// If the copy source is from a pending inline, wait for it to resolve. | ||
if (copySrc->gtOper == GT_RET_EXPR) | ||
{ | ||
JITDUMP(" bailing; must wait for replacement of copy source %s\n", | ||
GenTree::OpName(copySrc->gtOper)); | ||
goto FAIL; | ||
} | ||
|
||
bool hasSrcSideEffect = false; | ||
bool isStructCopy = false; | ||
|
||
if (gtTreeHasSideEffects(copySrc, GTF_SIDE_EFFECT)) | ||
{ | ||
hasSrcSideEffect = true; | ||
|
||
if (copySrc->gtType == TYP_STRUCT) | ||
{ | ||
isStructCopy = true; | ||
|
||
if ((copySrc->gtOper != GT_OBJ) && (copySrc->gtOper != GT_IND) && (copySrc->gtOper != GT_FIELD)) | ||
{ | ||
// We don't know how to handle other cases, yet. | ||
JITDUMP(" bailing; unexpected copy source struct op with side effect %s\n", | ||
GenTree::OpName(copySrc->gtOper)); | ||
goto FAIL; | ||
} | ||
} | ||
} | ||
|
||
// Proceed with the optimization | ||
// | ||
// Change the assignment expression to a NOP. | ||
JITDUMP("\nBashing NEWOBJ [%06u] to NOP\n", dspTreeID(asg)); | ||
asg->gtBashToNOP(); | ||
|
||
op = gtNewIconNode(oper == GT_NE); | ||
// Change the copy expression so it preserves key | ||
// source side effects. | ||
JITDUMP("\nBashing COPY [%06u]", dspTreeID(copy)); | ||
|
||
if (!hasSrcSideEffect) | ||
{ | ||
// If there were no copy source side effects just bash | ||
// the copy to a NOP. | ||
copy->gtBashToNOP(); | ||
JITDUMP(" to NOP\n"); | ||
} | ||
else if (!isStructCopy) | ||
{ | ||
// For scalar types, go ahead and produce the | ||
// value as the copy is fairly cheap and likely | ||
// the optimizer can trim things down to just the | ||
// minimal side effect parts. | ||
copyStmt->gtStmt.gtStmtExpr = copySrc; | ||
JITDUMP(" to scalar read via [%06u]\n", dspTreeID(copySrc)); | ||
} | ||
else | ||
{ | ||
// For struct types read the first byte of the | ||
// source struct; there's no need to read the | ||
// entire thing, and no place to put it. | ||
assert(copySrc->gtOper == GT_OBJ || copySrc->gtOper == GT_IND || copySrc->gtOper == GT_FIELD); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What happens if you take the logic you're using for scalars and apply it to structs as well? Does it produce something incorrect, or just inefficient? And if inefficient, is it common? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suspect that doing this on x86 will have a negative impact on register allocation, we need to generate something like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Presumably we are just reading and throwing away the result here, and There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As far as treating structs like scalars goes -- because we're removing the destination of the copy entirely (since it was the box payload in the newobj which we've zapped) we either need to suppress/alter the copy to something the jit can handle as a standalone source, or need to add a suitable destination to ensure the copy ends up somewhere. GT_OBJ can't be at the root of a tree, there is code in morph (and possibly elsewhere) that assumes it has a parent. So for that case at least we'd need to allocate a new temp to be the LHS. Not sure about GT_IND and GT_FIELD cases -- perhaps if they are at the root of a tree the jit will be able to create the right kind of temp to hold the result. Copying the entire struct just to cause the side effects from address formation and null checking is indeed inefficient, though as @mikedn notes this evidently is what JIT64 does. The struct case is fairly common. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Am going to change the struct case to read a 32 bit unsigned value, so we end up with slightly smaller encodings on x86/x64; should also mitigate any worries about byteable dests. 458B11 mov r10d, dword ptr [r9] instead of 4D0FBE11 movsx r10, byte ptr [r9] There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You still will have to handle the smaller struct case (such as a three byte struct) differently. Since you are not allowed to read beyond the end of the struct (as that may be an unmapped page) |
||
copySrc->ChangeOper(GT_IND); | ||
copySrc->gtType = TYP_BYTE; | ||
copyStmt->gtStmt.gtStmtExpr = copySrc; | ||
JITDUMP(" to read first byte of struct via modified [%06u]\n", dspTreeID(copySrc)); | ||
} | ||
|
||
// Set up the result of the compare. | ||
int compareResult = 0; | ||
if (oper == GT_GT) | ||
{ | ||
// GT_GT(null, box) == false | ||
// GT_GT(box, null) == true | ||
compareResult = (op1 == op); | ||
} | ||
else if (oper == GT_EQ) | ||
{ | ||
// GT_EQ(box, null) == false | ||
// GT_EQ(null, box) == false | ||
compareResult = 0; | ||
} | ||
else | ||
{ | ||
assert(oper == GT_NE); | ||
// GT_NE(box, null) == true | ||
// GT_NE(null, box) == true | ||
compareResult = 1; | ||
} | ||
op = gtNewIconNode(compareResult); | ||
|
||
if (fgGlobalMorph) | ||
{ | ||
if (!fgIsInlining()) | ||
|
@@ -12070,9 +12207,15 @@ GenTreePtr Compiler::gtFoldExprSpecial(GenTreePtr tree) | |
op->gtNext = tree->gtNext; | ||
op->gtPrev = tree->gtPrev; | ||
} | ||
fgSetStmtSeq(asgStmt); | ||
|
||
if (fgStmtListThreaded) | ||
{ | ||
fgSetStmtSeq(asgStmt); | ||
fgSetStmtSeq(copyStmt); | ||
} | ||
return op; | ||
} | ||
|
||
break; | ||
|
||
case GT_ADD: | ||
|
@@ -12240,7 +12383,9 @@ GenTreePtr Compiler::gtFoldExprSpecial(GenTreePtr tree) | |
break; | ||
} | ||
|
||
/* The node is not foldable */ | ||
/* The node is not foldable */ | ||
|
||
FAIL: | ||
|
||
return tree; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
// See the LICENSE file in the project root for more information. | ||
|
||
using System; | ||
|
||
public struct S<K> | ||
{ | ||
public int x; | ||
public int y; | ||
public K val; | ||
} | ||
|
||
public class X<K,V> | ||
{ | ||
public X(K k) | ||
{ | ||
a = new S<K>[2]; | ||
a[1].val = k; | ||
a[1].x = 3; | ||
a[1].y = 4; | ||
} | ||
|
||
public void Assert(bool b) | ||
{ | ||
if (!b) throw new Exception("bad!"); | ||
} | ||
|
||
public int Test() | ||
{ | ||
int r = 0; | ||
for (int i = 0; i < a.Length; i++) | ||
{ | ||
Assert(a[i].val != null); | ||
r += a[i].val.GetHashCode(); | ||
} | ||
return r; | ||
} | ||
|
||
S<K>[] a; | ||
} | ||
|
||
class B | ||
{ | ||
public static int Main() | ||
{ | ||
var a = new X<int, string>(11); | ||
int z = a.Test(); | ||
return (z == 11 ? 100 : 0); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
<?xml version="1.0" encoding="utf-8"?> | ||
<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> | ||
<Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" /> | ||
<PropertyGroup> | ||
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration> | ||
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform> | ||
<AssemblyName>$(MSBuildProjectName)</AssemblyName> | ||
<SchemaVersion>2.0</SchemaVersion> | ||
<ProjectGuid>{7B521917-193E-48BB-86C6-FE013F3DFF35}</ProjectGuid> | ||
<OutputType>Exe</OutputType> | ||
<AppDesignerFolder>Properties</AppDesignerFolder> | ||
<FileAlignment>512</FileAlignment> | ||
<ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids> | ||
<ReferencePath>$(ProgramFiles)\Common Files\microsoft shared\VSTT\11.0\UITestExtensionPackages</ReferencePath> | ||
<SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir> | ||
|
||
<NuGetPackageImportStamp>7a9bfb7d</NuGetPackageImportStamp> | ||
</PropertyGroup> | ||
<!-- Default configurations to help VS understand the configurations --> | ||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> | ||
</PropertyGroup> | ||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' "> | ||
</PropertyGroup> | ||
<ItemGroup> | ||
<CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies"> | ||
<Visible>False</Visible> | ||
</CodeAnalysisDependentAssemblyPaths> | ||
</ItemGroup> | ||
<PropertyGroup> | ||
<DebugType></DebugType> | ||
<Optimize>True</Optimize> | ||
<AllowUnsafeBlocks>True</AllowUnsafeBlocks> | ||
</PropertyGroup> | ||
<ItemGroup> | ||
<Compile Include="$(MSBuildProjectName).cs" /> | ||
</ItemGroup> | ||
<ItemGroup> | ||
<Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" /> | ||
</ItemGroup> | ||
<Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" /> | ||
<PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "> | ||
</PropertyGroup> | ||
</Project> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Where's the check for GTF_UNSIGNED?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also: need to account for operand order...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can't get CSC to emit the ldnull first, but it could happen in IL. And we shouldn't see signed compares but to be on the safe side I'll add a bail out for that too.