From 582b3997b8eb99d490cf409c19c03c1878d4faeb Mon Sep 17 00:00:00 2001 From: Ruihan Yin Date: Fri, 10 Feb 2023 15:18:33 -0800 Subject: [PATCH 1/3] Optimization on LinearScan::buildPhysRegRecords by skipping non-AVX512 register if AVX512 not available. --- src/coreclr/jit/lsrabuild.cpp | 12 ++++++++++++ src/coreclr/jit/targetamd64.h | 9 ++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 882008acc9921..44384801d8455 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1899,6 +1899,18 @@ void LinearScan::buildPhysRegRecords() RegRecord* curr = &physRegs[reg]; curr->regOrder = (unsigned char)i; } +#if defined(TARGET_AMD64) + if (compiler->DoJitStressEvexEncoding()) + { + for (unsigned int i = 0; i < lsraRegOrderFltSize; i++) + { + const regNumber lsraRegOrderFltUpper[] = {REG_VAR_ORDER_FLT_UPPER}; + regNumber reg = lsraRegOrderFltUpper[i]; + RegRecord* curr = &physRegs[reg]; + curr->regOrder = (unsigned char)(i + lsraRegOrderFltSize); + } + } +#endif // TARGET_AMD64 } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/targetamd64.h b/src/coreclr/jit/targetamd64.h index 408bbd99c3e5c..bf5b0d57c8d81 100644 --- a/src/coreclr/jit/targetamd64.h +++ b/src/coreclr/jit/targetamd64.h @@ -227,9 +227,12 @@ #endif #define REG_VAR_ORDER_FLT REG_XMM0,REG_XMM1,REG_XMM2,REG_XMM3,REG_XMM4,REG_XMM5,REG_XMM6,REG_XMM7, \ - REG_XMM8,REG_XMM9,REG_XMM10,REG_XMM11,REG_XMM12,REG_XMM13,REG_XMM14,REG_XMM15, \ - REG_XMM16,REG_XMM17,REG_XMM18,REG_XMM19,REG_XMM20,REG_XMM21,REG_XMM22,REG_XMM23, \ - REG_XMM24,REG_XMM25,REG_XMM26,REG_XMM27,REG_XMM28,REG_XMM29,REG_XMM30,REG_XMM31 + REG_XMM8,REG_XMM9,REG_XMM10,REG_XMM11,REG_XMM12,REG_XMM13,REG_XMM14,REG_XMM15 +#if defined(TARGET_AMD64) + #define REG_VAR_ORDER_FLT_UPPER REG_XMM16,REG_XMM17,REG_XMM18,REG_XMM19,REG_XMM20,REG_XMM21,REG_XMM22,REG_XMM23, \ + REG_XMM24,REG_XMM25,REG_XMM26,REG_XMM27,REG_XMM28,REG_XMM29,REG_XMM30,REG_XMM31 +#endif // TARGET_AMD64 + #ifdef UNIX_AMD64_ABI #define CNT_CALLEE_SAVED (5 + REG_ETW_FRAMED_EBP_COUNT) From c0f07126ddcc314714a181fcf727217af6607b3d Mon Sep 17 00:00:00 2001 From: Yin Date: Mon, 27 Mar 2023 12:16:48 -0700 Subject: [PATCH 2/3] code changes based on the reviews. --- src/coreclr/jit/lsrabuild.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 44384801d8455..ee0d59c89b470 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1900,14 +1900,15 @@ void LinearScan::buildPhysRegRecords() curr->regOrder = (unsigned char)i; } #if defined(TARGET_AMD64) - if (compiler->DoJitStressEvexEncoding()) + if (compiler->canUseEvexEncoding()) { - for (unsigned int i = 0; i < lsraRegOrderFltSize; i++) + const regNumber lsraRegOrderFltUpper[] = {REG_VAR_ORDER_FLT_UPPER}; + const unsigned lsraRegOrderUpperFltSize = ArrLen(lsraRegOrderFltUpper); + for (unsigned int i = 0; i < lsraRegOrderUpperFltSize; i++) { - const regNumber lsraRegOrderFltUpper[] = {REG_VAR_ORDER_FLT_UPPER}; - regNumber reg = lsraRegOrderFltUpper[i]; - RegRecord* curr = &physRegs[reg]; - curr->regOrder = (unsigned char)(i + lsraRegOrderFltSize); + regNumber reg = lsraRegOrderFltUpper[i]; + RegRecord* curr = &physRegs[reg]; + curr->regOrder = (unsigned char)(i + lsraRegOrderUpperFltSize); } } #endif // TARGET_AMD64 From 42ae0c09c6b9969d222fff75a7cc52ee0c9829d9 Mon Sep 17 00:00:00 2001 From: Yin Date: Tue, 28 Mar 2023 09:46:45 -0700 Subject: [PATCH 3/3] put the upper register group declaration in global fix the offset value when allocating upper registers, it should be the length of the lower register group. --- src/coreclr/jit/lsrabuild.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index ee0d59c89b470..f5f2ad7f409d9 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1876,6 +1876,10 @@ const unsigned lsraRegOrderSize = ArrLen(lsraRegOrder); // TODO-XARCH-AVX512 we might want to move this to be configured with the rbm variables too static const regNumber lsraRegOrderFlt[] = {REG_VAR_ORDER_FLT}; const unsigned lsraRegOrderFltSize = ArrLen(lsraRegOrderFlt); +#if defined(TARGET_AMD64) +static const regNumber lsraRegOrderFltUpper[] = {REG_VAR_ORDER_FLT_UPPER}; +const unsigned lsraRegOrderUpperFltSize = ArrLen(lsraRegOrderFltUpper); +#endif // TARGET_AMD64 //------------------------------------------------------------------------ // buildPhysRegRecords: Make an interval for each physical register @@ -1902,13 +1906,11 @@ void LinearScan::buildPhysRegRecords() #if defined(TARGET_AMD64) if (compiler->canUseEvexEncoding()) { - const regNumber lsraRegOrderFltUpper[] = {REG_VAR_ORDER_FLT_UPPER}; - const unsigned lsraRegOrderUpperFltSize = ArrLen(lsraRegOrderFltUpper); for (unsigned int i = 0; i < lsraRegOrderUpperFltSize; i++) { regNumber reg = lsraRegOrderFltUpper[i]; RegRecord* curr = &physRegs[reg]; - curr->regOrder = (unsigned char)(i + lsraRegOrderUpperFltSize); + curr->regOrder = (unsigned char)(i + lsraRegOrderFltSize); } } #endif // TARGET_AMD64