From b659701cef454ea0cd78e12f72ed2b4c3b8036c5 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Wed, 21 Aug 2024 22:24:08 +0000 Subject: [PATCH] ARM64EC: AVX register save/restore support under wine Requires corresponding a wine patch to function: https://github.com/bylaws/wine/commit/7d411bd74bd03ba90cf202623560733cb229cc7d Since AVX still works in almost all cases without this, I chose not to e.g. detect the patch and disable AVX support when it is missing. --- Source/Windows/ARM64EC/Module.cpp | 15 +++++++++++++-- Source/Windows/include/winnt.h | 17 +++++++++++++++++ Source/Windows/include/winternl.h | 2 ++ 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/Source/Windows/ARM64EC/Module.cpp b/Source/Windows/ARM64EC/Module.cpp index f2d8ed10a4..fdb73b7be6 100644 --- a/Source/Windows/ARM64EC/Module.cpp +++ b/Source/Windows/ARM64EC/Module.cpp @@ -291,7 +291,13 @@ static void LoadStateFromECContext(FEXCore::Core::InternalThreadState* Thread, C if ((Context.ContextFlags & CONTEXT_FLOATING_POINT) == CONTEXT_FLOATING_POINT) { // Floating-point register state - CTX->SetXMMRegistersFromState(Thread, reinterpret_cast(Context.FltSave.XmmRegisters), nullptr); + if ((Context.ContextFlags & CONTEXT_XSTATE) == CONTEXT_XSTATE) { + const auto* Ymm = RtlLocateExtendedFeature(reinterpret_cast(&Context + 1), XSTATE_AVX, nullptr); + CTX->SetXMMRegistersFromState(Thread, reinterpret_cast(Context.FltSave.XmmRegisters), + reinterpret_cast(Ymm)); + } else { + CTX->SetXMMRegistersFromState(Thread, reinterpret_cast(Context.FltSave.XmmRegisters), nullptr); + } memcpy(State.mm, Context.FltSave.FloatRegisters, sizeof(State.mm)); State.FCW = Context.FltSave.ControlWord; @@ -330,6 +336,11 @@ static ARM64_NT_CONTEXT StoreStateToPackedECContext(FEXCore::Core::InternalThrea ARM64_NT_CONTEXT ECContext {}; ECContext.ContextFlags = CONTEXT_ARM64_FULL; + if (CPUFeatures->IsFeaturePresent(PF_AVX2_INSTRUCTIONS_AVAILABLE)) { + // This is a FEX extension and requires corresponding wine-side patches to be of use, however it is harmless to set + // even if those patches are not used. + ECContext.ContextFlags |= CONTEXT_ARM64_FEX_YMMSTATE; + } auto& State = Thread->CurrentFrame->State; @@ -352,7 +363,7 @@ static ARM64_NT_CONTEXT StoreStateToPackedECContext(FEXCore::Core::InternalThrea ECContext.Pc = State.rip; - CTX->ReconstructXMMRegisters(Thread, reinterpret_cast<__uint128_t*>(&ECContext.V[0]), nullptr); + CTX->ReconstructXMMRegisters(Thread, reinterpret_cast<__uint128_t*>(&ECContext.V[0]), reinterpret_cast<__uint128_t*>(&ECContext.V[16])); ECContext.Lr = State.mm[0][0]; ECContext.X6 = State.mm[1][0]; diff --git a/Source/Windows/include/winnt.h b/Source/Windows/include/winnt.h index ade1de9888..46d4163d53 100644 --- a/Source/Windows/include/winnt.h +++ b/Source/Windows/include/winnt.h @@ -122,8 +122,25 @@ typedef struct _IMAGE_ARM64EC_CODE_RANGE_ENTRY_POINT { ULONG EntryPoint; } IMAGE_ARM64EC_CODE_RANGE_ENTRY_POINT; +typedef struct _CONTEXT_CHUNK { + LONG Offset; + ULONG Length; +} CONTEXT_CHUNK, *PCONTEXT_CHUNK; + +typedef struct _CONTEXT_EX { + CONTEXT_CHUNK All; + CONTEXT_CHUNK Legacy; + CONTEXT_CHUNK XState; +#ifdef _WIN64 + ULONG64 align; +#endif +} CONTEXT_EX, *PCONTEXT_EX; + NTSYSAPI DWORD WINAPI RtlRunOnceExecuteOnce(PRTL_RUN_ONCE, PRTL_RUN_ONCE_INIT_FN, PVOID, PVOID*); +// This is a FEX extension, and requires corresponding wine patches +#define CONTEXT_ARM64_FEX_YMMSTATE (CONTEXT_ARM64 | 0x00000040) + #ifdef __cplusplus } #endif diff --git a/Source/Windows/include/winternl.h b/Source/Windows/include/winternl.h index c2b347f37e..4566f864c0 100644 --- a/Source/Windows/include/winternl.h +++ b/Source/Windows/include/winternl.h @@ -4,6 +4,7 @@ #pragma once #include_next +#include #ifdef __cplusplus extern "C" { @@ -468,6 +469,7 @@ void WINAPI RtlInitializeConditionVariable(RTL_CONDITION_VARIABLE*); NTSTATUS WINAPI RtlInitializeCriticalSection(RTL_CRITICAL_SECTION*); void WINAPI RtlInitializeSRWLock(RTL_SRWLOCK*); NTSTATUS WINAPI RtlLeaveCriticalSection(RTL_CRITICAL_SECTION*); +void* WINAPI RtlLocateExtendedFeature(CONTEXT_EX*, ULONG, ULONG*); NTSTATUS WINAPI RtlMultiByteToUnicodeN(LPWSTR, DWORD, LPDWORD, LPCSTR, DWORD); NTSTATUS WINAPI RtlMultiByteToUnicodeSize(DWORD*, LPCSTR, ULONG); BOOL WINAPI RtlQueryPerformanceCounter(LARGE_INTEGER*);