Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: fix self-conflicting HFA arg prolog handling for arm64 #92355

Merged
merged 2 commits into from
Sep 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 96 additions & 2 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2919,6 +2919,8 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
bool writeThru; // true if the argument gets homed to both stack and register
bool processed; // true after we've processed the argument (and it is in its final location)
bool circular; // true if this register participates in a circular dependency loop.
bool hfaConflict; // arg is part of an HFA that will end up in the same register
// but in a different slot (eg arg in s3 = v3.s[0], needs to end up in v3.s[3])
} regArgTab[max(MAX_REG_ARG + 1, MAX_FLOAT_REG_ARG)] = {};

unsigned varNum;
Expand Down Expand Up @@ -3284,7 +3286,8 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
* A circular dependency is a set of registers R1, R2, ..., Rn
* such that R1->R2 (that is, R1 needs to be moved to R2), R2->R3, ..., Rn->R1 */

bool change = true;
bool change = true;
bool hasHfaConflict = false;
if (regArgMaskLive)
{
/* Possible circular dependencies still exist; the previous pass was not enough
Expand Down Expand Up @@ -3337,10 +3340,32 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
{
// This must be a SIMD type that's fully enregistered, but is passed as an HFA.
// Each field will be inserted into the same destination register.
//
assert(varTypeIsSIMD(varDsc));
assert(regArgTab[argNum].slot <= (int)varDsc->lvHfaSlots());
assert(argNum > 0);
assert(regArgTab[argNum - 1].varNum == varNum);

// If the field is passed in the same register as the destination,
// but is in the wrong part of the register, mark it specially so later
// we make sure to move it to the right spot before "freeing" the destination.
//
destRegNum = varDsc->GetRegNum();
if (regNum == destRegNum)
{
// We only get here if the HFA part is not already in the right slot in
// the destination. That is, it is not slot-1.
//
const int slot = regArgTab[argNum].slot;
assert(slot != 1);
JITDUMP("HFA conflict; arg num %u needs to move from %s[%u] to %s[%u]\n", argNum,
getRegName(regNum), 0, getRegName(destRegNum), slot - 1);
regArgTab[argNum].hfaConflict = true;

// We'll need to do a special pass later to resolve these
//
hasHfaConflict = true;
}
regArgMaskLive &= ~genRegMask(regNum);
regArgTab[argNum].circular = false;
change = true;
Expand Down Expand Up @@ -3736,13 +3761,13 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
{
size = EA_4BYTE;
}
// HVA types...?

/* move the dest reg (begReg) in the extra reg */

assert(xtraReg != REG_NA);

regNumber begRegNum = genMapRegArgNumToRegNum(begReg, destMemType);

GetEmitter()->emitIns_Mov(insCopy, size, xtraReg, begRegNum, /* canSkip */ false);

regSet.verifyRegUsed(xtraReg);
Expand Down Expand Up @@ -3823,6 +3848,75 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
}
}

#if defined(TARGET_ARM64) && defined(FEATURE_SIMD)
// If we saw any hfa conflicts, handle those now.
//
if (hasHfaConflict)
{
// Up above we noticed that there was at least one non-slot-1 HFA arg whose
// destination register was the same as the arg register.
//
// For example, say an HFA was passed as s0-s3 and the destination was v3.
// s3 is in the right register, but not in the right slot in the register.
//
// We handle this by first moving the conflicting part to the right slot
// in the destination (via pass 0 below), and then moving the remaining parts
// to their respective slots (via pass 1).
//
// Note the slot index in the register is one less than value of
// regArgTab[argNum].slot, so a slot-1 hfa arg goes into slot 0 of the destination).
//
// So for the above example, we'd first move the "slot-4" s3 (== v3.s[0]) to v3.s[3].
// Then we can insert s0 to v3.s[0]) and so on.
//
// We can exempt slot-1 cases as the conflicting part is already in the
// right slot, and code lower down correctly handles populating the remaining slots.
//
for (argNum = 0; argNum < argMax; argNum++)
{
if (!regArgTab[argNum].hfaConflict)
{
continue;
}

varNum = regArgTab[argNum].varNum;
varDsc = compiler->lvaGetDesc(varNum);
const regNumber destRegNum = varDsc->GetRegNum();
const var_types regType = regArgTab[argNum].type;
const unsigned firstArgNum = argNum - (regArgTab[argNum].slot - 1);
const unsigned lastArgNum = firstArgNum + varDsc->lvHfaSlots() - 1;

assert(varDsc->lvIsHfa());
assert((argNum >= firstArgNum) && (argNum <= lastArgNum));
assert(destRegNum == genMapRegArgNumToRegNum(argNum, regType));

// Pass 0: move the conflicting part; Pass1: insert everything else
//
for (int pass = 0; pass <= 1; pass++)
{
for (unsigned currentArgNum = firstArgNum; currentArgNum <= lastArgNum; currentArgNum++)
{
const regNumber regNum = genMapRegArgNumToRegNum(currentArgNum, regType);
bool insertArg =
((pass == 0) && (currentArgNum == argNum)) || ((pass == 1) && (currentArgNum != argNum));

if (insertArg)
{
assert(!regArgTab[currentArgNum].processed);

// EA_4BYTE is probably wrong here (and below)
// todo -- suppress self move
GetEmitter()->emitIns_R_R_I_I(INS_mov, EA_4BYTE, destRegNum, regNum,
regArgTab[currentArgNum].slot - 1, 0);
regArgTab[currentArgNum].processed = true;
regArgMaskLive &= ~genRegMask(regNum);
}
}
}
}
}
#endif // defined(TARGET_ARM64) && defined(FEATURE_SIMD)

/* Finally take care of the remaining arguments that must be enregistered */
while (regArgMaskLive)
{
Expand Down
4 changes: 0 additions & 4 deletions src/libraries/tests.proj
Original file line number Diff line number Diff line change
Expand Up @@ -417,10 +417,6 @@
</ItemGroup>

<ItemGroup Condition="'$(TestNativeAot)' == 'true' and '$(RunDisabledNativeAotTests)' != 'true'">
<!-- https://github.com/dotnet/runtime/issues/83167 -->
<ProjectExclusions Include="$(MSBuildThisFileDirectory)System.Numerics.Vectors\tests\System.Numerics.Vectors.Tests.csproj"
Condition="'$(TargetArchitecture)' == 'arm64'" />

<!-- https://github.com/dotnet/runtime/issues/72908 -->
<ProjectExclusions Include="$(MSBuildThisFileDirectory)System.Reflection.MetadataLoadContext\tests\System.Reflection.MetadataLoadContext.Tests.csproj" />

Expand Down
27 changes: 27 additions & 0 deletions src/tests/JIT/Regression/JitBlue/Runtime_83167/Runtime_83167.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Collections.Generic;
using System.Numerics;
using System.Runtime.CompilerServices;
using Xunit;

public class Runtime_83167
{
[MethodImpl(MethodImplOptions.NoOptimization)]
[Fact]
public static int Problem()
{
Plane p = new Plane (new Vector3(2.0f, 3.0f, 4.0f), 1.0f);
int pH = p.GetHashCode();
EqualityComparer<Plane> c = EqualityComparer<Plane>.Default;
int cH = c.GetHashCode(p);
if (pH != cH)
{
Console.WriteLine($"Failed: {pH:X8} != {cH:X8}");
return 101;
}
return 100;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<Optimize>True</Optimize>
<DebugType>None</DebugType>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs" />
</ItemGroup>
</Project>
Loading