Skip to content

Commit

Permalink
Implement volatile barrier APIs (#107843)
Browse files Browse the repository at this point in the history
* Initial commit

* Follow-up commit

Now that I'm on the correct branch

* jit-format

* Implement Feedback

- And fix missed file from jit-format

* Fix typos & use appropriate barrier type in mono

* Use optimised dmb on arm64 where possible on mini-mono runtime

- And fix up some FIXMEs & comments re cpobj and cpblk

* Update Memory-model.md

* Address feedback

* Fix compile error

* Move BarrierKind into compiler.h

* Fix build & jit-format

* Update jiteeversionguid.h

* Update jiteeversionguid.h
  • Loading branch information
hamarb123 authored Oct 17, 2024
1 parent bf23102 commit 830ce3a
Show file tree
Hide file tree
Showing 23 changed files with 129 additions and 84 deletions.
6 changes: 4 additions & 2 deletions docs/design/specs/Memory-model.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,14 +74,16 @@ The effects of ordinary reads and writes can be reordered as long as that preser
- IL load instructions with `volatile.` prefix when instruction supports such prefix
- `System.Threading.Volatile.Read`
- `System.Thread.VolatileRead`
- Acquiring a lock (`System.Threading.Monitor.Enter` or entering a synchronized method)
- `System.Threading.Volatile.ReadBarrier` (applies to all prior reads)
- Acquiring a lock (`System.Threading.Monitor.Enter` or entering a synchronized method, applies to all prior reads)

* **Volatile writes** have "release semantics" - the effects of a volatile write will not be observable before effects of all previous, in program order, reads and writes become observable.
Operations with release semantics:
- IL store instructions with `volatile.` prefix when such prefix is supported
- `System.Threading.Volatile.Write`
- `System.Thread.VolatileWrite`
- Releasing a lock (`System.Threading.Monitor.Exit` or leaving a synchronized method)
- `System.Threading.Volatile.WriteBarrier` (applies to all following writes)
- Releasing a lock (`System.Threading.Monitor.Exit` or leaving a synchronized method, applies to all following writes)

* **volatile. initblk** has "release semantics" - the effects of `.volatile initblk` will not be observable earlier than the effects of preceeding reads and writes.

Expand Down
10 changes: 5 additions & 5 deletions src/coreclr/inc/jiteeversionguid.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
#define GUID_DEFINED
#endif // !GUID_DEFINED

constexpr GUID JITEEVersionIdentifier = { /* 7a8070f8-2951-4394-b97b-3d7c9bd8c91f */
0x7a8070f8,
0x2951,
0x4394,
{0xb9, 0x7b, 0x3d, 0x7c, 0x9b, 0xd8, 0xc9, 0x1f}
constexpr GUID JITEEVersionIdentifier = { /* ac04f79d-8d06-4a15-9692-1b4f59265825 */
0xac04f79d,
0x8d06,
0x4a15,
{0x96, 0x92, 0x1b, 0x4f, 0x59, 0x26, 0x58, 0x25}
};

//////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down
6 changes: 0 additions & 6 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -1627,12 +1627,6 @@ class CodeGen final : public CodeGenInterface

void instGen_Return(unsigned stkArgSize);

enum BarrierKind
{
BARRIER_FULL, // full barrier
BARRIER_LOAD_ONLY, // load barier
};

void instGen_MemoryBarrier(BarrierKind barrierKind = BARRIER_FULL);

void instGen_Set_Reg_To_Zero(emitAttr size, regNumber reg, insFlags flags = INS_FLAGS_DONT_CARE);
Expand Down
8 changes: 6 additions & 2 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3704,8 +3704,8 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)

if (cpObjNode->IsVolatile())
{
// issue a full memory barrier before a volatile CpObj operation
instGen_MemoryBarrier();
// issue a store barrier before a volatile CpObj operation
instGen_MemoryBarrier(BARRIER_STORE_ONLY);
}

emitter* emit = GetEmitter();
Expand Down Expand Up @@ -5755,6 +5755,10 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind)
}
#endif // DEBUG

// We cannot emit BARRIER_STORE_ONLY better than BARRIER_FULL on arm64 today
if (barrierKind == BARRIER_STORE_ONLY)
barrierKind = BARRIER_FULL;

// Avoid emitting redundant memory barriers on arm64 if they belong to the same IG
// and there were no memory accesses in-between them
emitter::instrDesc* lastMemBarrier = GetEmitter()->emitLastMemBarrier;
Expand Down
14 changes: 8 additions & 6 deletions src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -428,8 +428,10 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)

case GT_MEMORYBARRIER:
{
CodeGen::BarrierKind barrierKind =
treeNode->gtFlags & GTF_MEMORYBARRIER_LOAD ? BARRIER_LOAD_ONLY : BARRIER_FULL;
BarrierKind barrierKind =
treeNode->gtFlags & GTF_MEMORYBARRIER_LOAD
? BARRIER_LOAD_ONLY
: (treeNode->gtFlags & GTF_MEMORYBARRIER_STORE ? BARRIER_STORE_ONLY : BARRIER_FULL);

instGen_MemoryBarrier(barrierKind);
break;
Expand Down Expand Up @@ -2800,8 +2802,8 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node)

if (node->IsVolatile())
{
// issue a full memory barrier before a volatile CpBlk operation
instGen_MemoryBarrier();
// issue a store barrier before a volatile CpBlk operation
instGen_MemoryBarrier(BARRIER_STORE_ONLY);
}

emitter* emit = GetEmitter();
Expand Down Expand Up @@ -3225,8 +3227,8 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode)

if (initBlkNode->IsVolatile())
{
// issue a full memory barrier before a volatile initBlock Operation
instGen_MemoryBarrier();
// issue a store barrier before a volatile initBlock Operation
instGen_MemoryBarrier(BARRIER_STORE_ONLY);
}

// str xzr, [dstReg]
Expand Down
6 changes: 4 additions & 2 deletions src/coreclr/jit/codegenloongarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4434,8 +4434,10 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)

case GT_MEMORYBARRIER:
{
CodeGen::BarrierKind barrierKind =
treeNode->gtFlags & GTF_MEMORYBARRIER_LOAD ? BARRIER_LOAD_ONLY : BARRIER_FULL;
BarrierKind barrierKind =
treeNode->gtFlags & GTF_MEMORYBARRIER_LOAD
? BARRIER_LOAD_ONLY
: (treeNode->gtFlags & GTF_MEMORYBARRIER_STORE ? BARRIER_STORE_ONLY : BARRIER_FULL);

instGen_MemoryBarrier(barrierKind);
break;
Expand Down
6 changes: 4 additions & 2 deletions src/coreclr/jit/codegenriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4491,8 +4491,10 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)

case GT_MEMORYBARRIER:
{
CodeGen::BarrierKind barrierKind =
treeNode->gtFlags & GTF_MEMORYBARRIER_LOAD ? BARRIER_LOAD_ONLY : BARRIER_FULL;
BarrierKind barrierKind =
treeNode->gtFlags & GTF_MEMORYBARRIER_LOAD
? BARRIER_LOAD_ONLY
: (treeNode->gtFlags & GTF_MEMORYBARRIER_STORE ? BARRIER_STORE_ONLY : BARRIER_FULL);

instGen_MemoryBarrier(barrierKind);
break;
Expand Down
8 changes: 5 additions & 3 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2144,8 +2144,10 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)

case GT_MEMORYBARRIER:
{
CodeGen::BarrierKind barrierKind =
treeNode->gtFlags & GTF_MEMORYBARRIER_LOAD ? BARRIER_LOAD_ONLY : BARRIER_FULL;
BarrierKind barrierKind =
treeNode->gtFlags & GTF_MEMORYBARRIER_LOAD
? BARRIER_LOAD_ONLY
: (treeNode->gtFlags & GTF_MEMORYBARRIER_STORE ? BARRIER_STORE_ONLY : BARRIER_FULL);

instGen_MemoryBarrier(barrierKind);
break;
Expand Down Expand Up @@ -11075,7 +11077,7 @@ void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize)
// instGen_MemoryBarrier: Emit a MemoryBarrier instruction
//
// Arguments:
// barrierKind - kind of barrier to emit (Load-only is no-op on xarch)
// barrierKind - kind of barrier to emit (Load-only and Store-only are no-ops on xarch)
//
// Notes:
// All MemoryBarriers instructions can be removed by DOTNET_JitNoMemoryBarriers=1
Expand Down
9 changes: 8 additions & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,13 @@ struct VarScopeDsc
#endif
};

enum BarrierKind
{
BARRIER_FULL, // full barrier
BARRIER_LOAD_ONLY, // load barrier
BARRIER_STORE_ONLY, // store barrier
};

// This class stores information associated with a LclVar SSA definition.
class LclSsaVarDsc
{
Expand Down Expand Up @@ -3455,7 +3462,7 @@ class Compiler
#endif
#endif // FEATURE_HW_INTRINSICS

GenTree* gtNewMemoryBarrier(bool loadOnly = false);
GenTree* gtNewMemoryBarrier(BarrierKind barrierKind);

GenTree* gtNewMustThrowException(unsigned helper, var_types type, CORINFO_CLASS_HANDLE clsHnd);

Expand Down
10 changes: 7 additions & 3 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8630,19 +8630,23 @@ GenTreeBlk* Compiler::gtNewBlkIndir(ClassLayout* layout, GenTree* addr, GenTreeF
// gtNewMemoryBarrier: Create a memory barrier node
//
// Arguments:
// loadOnly - relaxes the full memory barrier to be load-only
// barrierKind - the kind of barrer we are creating
//
// Return Value:
// The created GT_MEMORYBARRIER node.
//
GenTree* Compiler::gtNewMemoryBarrier(bool loadOnly)
GenTree* Compiler::gtNewMemoryBarrier(BarrierKind barrierKind)
{
GenTree* tree = new (this, GT_MEMORYBARRIER) GenTree(GT_MEMORYBARRIER, TYP_VOID);
tree->gtFlags |= GTF_GLOB_REF | GTF_ASG;
if (loadOnly)
if (barrierKind == BARRIER_LOAD_ONLY)
{
tree->gtFlags |= GTF_MEMORYBARRIER_LOAD;
}
else if (barrierKind == BARRIER_STORE_ONLY)
{
tree->gtFlags |= GTF_MEMORYBARRIER_STORE;
}
return tree;
}

Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,7 @@ enum GenTreeFlags : unsigned int
GTF_TLS_GET_ADDR = 0x01000000, // GT_CALL -- call is tls_get_addr

GTF_MEMORYBARRIER_LOAD = 0x40000000, // GT_MEMORYBARRIER -- Load barrier
GTF_MEMORYBARRIER_STORE = 0x80000000, // GT_MEMORYBARRIER -- Store barrier

GTF_FLD_TLS = 0x80000000, // GT_FIELD_ADDR -- field address is a Windows x86 TLS reference
GTF_FLD_DEREFERENCED = 0x40000000, // GT_FIELD_ADDR -- used to preserve previous behavior
Expand Down
6 changes: 3 additions & 3 deletions src/coreclr/jit/importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10644,10 +10644,10 @@ void Compiler::impImportBlockCode(BasicBlock* block)

if (isVolatile)
{
// Wrap with memory barriers: full-barrier + call + load-barrier
impAppendTree(gtNewMemoryBarrier(), CHECK_SPILL_ALL, impCurStmtDI);
// Wrap with memory barriers: store-barrier + call + load-barrier
impAppendTree(gtNewMemoryBarrier(BARRIER_STORE_ONLY), CHECK_SPILL_ALL, impCurStmtDI);
impAppendTree(call, CHECK_SPILL_ALL, impCurStmtDI);
op1 = gtNewMemoryBarrier(true);
op1 = gtNewMemoryBarrier(BARRIER_LOAD_ONLY);
}
else
{
Expand Down
37 changes: 29 additions & 8 deletions src/coreclr/jit/importercalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3381,9 +3381,11 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
case NI_System_Threading_Interlocked_Exchange:
case NI_System_Threading_Interlocked_ExchangeAdd:
case NI_System_Threading_Interlocked_MemoryBarrier:
case NI_System_Threading_Interlocked_ReadMemoryBarrier:
case NI_System_Threading_Volatile_Read:
case NI_System_Threading_Volatile_Write:
case NI_System_Threading_Volatile_ReadBarrier:
case NI_System_Threading_Volatile_WriteBarrier:

betterToExpand = true;
break;

Expand Down Expand Up @@ -4207,12 +4209,27 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
#endif // defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_RISCV64)

case NI_System_Threading_Interlocked_MemoryBarrier:
case NI_System_Threading_Interlocked_ReadMemoryBarrier:
{
assert(sig->numArgs == 0);
// On XARCH `NI_System_Threading_Interlocked_ReadMemoryBarrier` fences need not be emitted.
retNode = gtNewMemoryBarrier(BARRIER_FULL);
break;
}

case NI_System_Threading_Volatile_ReadBarrier:
{
assert(sig->numArgs == 0);
// On XARCH `NI_System_Threading_Volatile_ReadBarrier` fences need not be emitted.
// However, we still need to capture the effect on reordering.
retNode = gtNewMemoryBarrier(BARRIER_LOAD_ONLY);
break;
}

case NI_System_Threading_Volatile_WriteBarrier:
{
assert(sig->numArgs == 0);
// On XARCH `NI_System_Threading_Volatile_WriteBarrier` fences need not be emitted.
// However, we still need to capture the effect on reordering.
retNode = gtNewMemoryBarrier(ni == NI_System_Threading_Interlocked_ReadMemoryBarrier);
retNode = gtNewMemoryBarrier(BARRIER_STORE_ONLY);
break;
}

Expand Down Expand Up @@ -10835,10 +10852,6 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method)
{
result = NI_System_Threading_Interlocked_MemoryBarrier;
}
else if (strcmp(methodName, "ReadMemoryBarrier") == 0)
{
result = NI_System_Threading_Interlocked_ReadMemoryBarrier;
}
}
else if (strcmp(className, "Thread") == 0)
{
Expand All @@ -10861,6 +10874,14 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method)
{
result = NI_System_Threading_Volatile_Write;
}
else if (strcmp(methodName, "ReadBarrier") == 0)
{
result = NI_System_Threading_Volatile_ReadBarrier;
}
else if (strcmp(methodName, "WriteBarrier") == 0)
{
result = NI_System_Threading_Volatile_WriteBarrier;
}
}
}
}
Expand Down
10 changes: 4 additions & 6 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8747,19 +8747,17 @@ void Lowering::LowerBlockStoreAsHelperCall(GenTreeBlk* blkNode)
BlockRange().Remove(dataPlaceholder);
}

// Wrap with memory barriers on weak memory models
// if the block store was volatile
#ifndef TARGET_XARCH
// Wrap with memory barriers if the block store was volatile
// Note: on XARCH these half-barriers only have optimization inhibiting effects, and do not emit anything
if (isVolatile)
{
GenTree* firstBarrier = comp->gtNewMemoryBarrier();
GenTree* secondBarrier = comp->gtNewMemoryBarrier(/*loadOnly*/ true);
GenTree* firstBarrier = comp->gtNewMemoryBarrier(BARRIER_STORE_ONLY);
GenTree* secondBarrier = comp->gtNewMemoryBarrier(BARRIER_LOAD_ONLY);
BlockRange().InsertBefore(call, firstBarrier);
BlockRange().InsertAfter(call, secondBarrier);
LowerNode(firstBarrier);
LowerNode(secondBarrier);
}
#endif
}

//------------------------------------------------------------------------
Expand Down
3 changes: 2 additions & 1 deletion src/coreclr/jit/namedintrinsiclist.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ enum NamedIntrinsic : unsigned short
NI_System_Threading_Thread_get_ManagedThreadId,
NI_System_Threading_Volatile_Read,
NI_System_Threading_Volatile_Write,
NI_System_Threading_Volatile_ReadBarrier,
NI_System_Threading_Volatile_WriteBarrier,
NI_System_Type_get_IsEnum,
NI_System_Type_GetEnumUnderlyingType,
NI_System_Type_get_IsValueType,
Expand Down Expand Up @@ -144,7 +146,6 @@ enum NamedIntrinsic : unsigned short
NI_System_Threading_Interlocked_Exchange,
NI_System_Threading_Interlocked_ExchangeAdd,
NI_System_Threading_Interlocked_MemoryBarrier,
NI_System_Threading_Interlocked_ReadMemoryBarrier,

// These two are special marker IDs so that we still get the inlining profitability boost
NI_System_Numerics_Intrinsic,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ internal static CastResult TryGet(int[] table, nuint source, nuint target)
if (entrySource == source)
{
// we do ordinary reads of the entry parts and
// Interlocked.ReadMemoryBarrier() before reading the version
// Volatile.ReadBarrier() before reading the version
nuint entryTargetAndResult = pEntry._targetAndResult;
// target never has its lower bit set.
// a matching entryTargetAndResult would the have same bits, except for the lowest one, which is the result.
Expand All @@ -177,7 +177,7 @@ internal static CastResult TryGet(int[] table, nuint source, nuint target)
// - use acquires for both _source and _targetAndResults or
// - issue a load barrier before reading _version
// benchmarks on available hardware (Jan 2020) show that use of a read barrier is cheaper.
Interlocked.ReadMemoryBarrier();
Volatile.ReadBarrier();

if (version != pEntry._version)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ internal bool TryGet(TKey key, out TValue? value)
value = pEntry._value;

// make sure the second read of 'version' happens after reading '_value'
Interlocked.ReadMemoryBarrier();
Volatile.ReadBarrier();

// mask the lower version bit to make it even.
// This way we can check if version is odd or changing in just one compare.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -716,14 +716,6 @@ public static ulong Or(ref ulong location1, ulong value) =>
/// </summary>
[Intrinsic]
public static void MemoryBarrier() => MemoryBarrier();

/// <summary>
/// Synchronizes memory access as follows:
/// The processor that executes the current thread cannot reorder instructions in such a way that memory reads before
/// the call to <see cref="ReadMemoryBarrier"/> execute after memory accesses that follow the call to <see cref="ReadMemoryBarrier"/>.
/// </summary>
[Intrinsic]
internal static void ReadMemoryBarrier() => ReadMemoryBarrier();
#endregion
}
}
Loading

0 comments on commit 830ce3a

Please sign in to comment.