Skip to content

Commit

Permalink
Generate SVE for 80bit stores when possible
Browse files Browse the repository at this point in the history
Fixes #4126
  • Loading branch information
pmatos committed Nov 22, 2024
1 parent 7ad9f7a commit 214f04a
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 13 deletions.
11 changes: 11 additions & 0 deletions FEXCore/Source/Interface/Core/JIT/MemoryOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1536,6 +1536,17 @@ DEF_OP(StoreMem) {
str(Src.D(), MemSrc);
break;
}
case IR::OpSize::f80Bit: {
LOGMAN_THROW_A_FMT(HostSupportsSVE128, "80-bit stores are only supported with SVE128 support");
const auto MemSrc = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale);
// We need to generate the predicate register to copy the 10-byte value
// FIXME: don't hardcode p5
ARMEmitter::PRegister preg = ARMEmitter::PReg::p5;
mov(TMP1, 10);
whilelt(ARMEmitter::SubRegSize::i8Bit, preg, ARMEmitter::XReg::zr, TMP1);
st1b<ARMEmitter::SubRegSize::i8Bit>(Src.Z(), preg, MemSrc);
break;
}
case IR::OpSize::i128Bit: {
str(Src.Q(), MemSrc);
break;
Expand Down
2 changes: 1 addition & 1 deletion FEXCore/Source/Interface/IR/PassManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ void PassManager::AddDefaultPasses(FEXCore::Context::ContextImpl* ctx) {
FEX_CONFIG_OPT(DisablePasses, O0);

if (!DisablePasses()) {
InsertPass(CreateX87StackOptimizationPass());
InsertPass(CreateX87StackOptimizationPass(ctx->HostFeatures));
InsertPass(CreateConstProp(ctx->HostFeatures.SupportsTSOImm9, &ctx->CPUID));
InsertPass(CreateDeadFlagCalculationEliminination());
}
Expand Down
3 changes: 2 additions & 1 deletion FEXCore/Source/Interface/IR/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

namespace FEXCore {
class CPUIDEmu;
struct HostFeatures;
}

namespace FEXCore::Utils {
Expand All @@ -19,7 +20,7 @@ class RegisterAllocationData;
fextl::unique_ptr<FEXCore::IR::Pass> CreateConstProp(bool SupportsTSOImm9, const FEXCore::CPUIDEmu* CPUID);
fextl::unique_ptr<FEXCore::IR::Pass> CreateDeadFlagCalculationEliminination();
fextl::unique_ptr<FEXCore::IR::RegisterAllocationPass> CreateRegisterAllocationPass();
fextl::unique_ptr<FEXCore::IR::Pass> CreateX87StackOptimizationPass();
fextl::unique_ptr<FEXCore::IR::Pass> CreateX87StackOptimizationPass(const FEXCore::HostFeatures&);

namespace Validation {
fextl::unique_ptr<FEXCore::IR::Pass> CreateIRValidation();
Expand Down
29 changes: 18 additions & 11 deletions FEXCore/Source/Interface/IR/Passes/x87StackOptimizationPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
#include "Interface/IR/IR.h"
#include "Interface/IR/IREmitter.h"
#include "Interface/IR/PassManager.h"
#include <FEXCore/IR/IR.h>
#include <FEXCore/Utils/Profiler.h>
#include <FEXCore/fextl/deque.h>
#include "FEXCore/IR/IR.h"
#include "FEXCore/Utils/Profiler.h"
#include "FEXCore/fextl/deque.h"
#include "FEXCore/Core/HostFeatures.h"

#include <array>
#include <cstddef>
Expand Down Expand Up @@ -146,13 +147,15 @@ class FixedSizeStack {

class X87StackOptimization final : public Pass {
public:
X87StackOptimization() {
X87StackOptimization(const FEXCore::HostFeatures& Features)
: Features(Features) {
FEX_CONFIG_OPT(ReducedPrecision, X87REDUCEDPRECISION);
ReducedPrecisionMode = ReducedPrecision;
}
void Run(IREmitter* Emit) override;

private:
const FEXCore::HostFeatures& Features;
bool ReducedPrecisionMode;

// Helpers
Expand Down Expand Up @@ -820,11 +823,15 @@ void X87StackOptimization::Run(IREmitter* Emit) {
StackNode = IREmit->_F80CVT(Op->StoreSize, StackNode);
}
if (Op->StoreSize == OpSize::f80Bit) { // Part of code from StoreResult_WithOpSize()
// For X87 extended doubles, split before storing
IREmit->_StoreMem(FPRClass, OpSize::i64Bit, AddrNode, StackNode);
auto Upper = IREmit->_VExtractToGPR(OpSize::i128Bit, OpSize::i64Bit, StackNode, 1);
auto DestAddr = IREmit->_Add(OpSize::i64Bit, AddrNode, GetConstant(8));
IREmit->_StoreMem(GPRClass, OpSize::i16Bit, DestAddr, Upper, OpSize::i64Bit);
if (Features.SupportsSVE128) {
IREmit->_StoreMem(FPRClass, OpSize::f80Bit, AddrNode, StackNode);
} else {
// For X87 extended doubles, split before storing
IREmit->_StoreMem(FPRClass, OpSize::i64Bit, AddrNode, StackNode);
auto Upper = IREmit->_VExtractToGPR(OpSize::i128Bit, OpSize::i64Bit, StackNode, 1);
auto DestAddr = IREmit->_Add(OpSize::i64Bit, AddrNode, GetConstant(8));
IREmit->_StoreMem(GPRClass, OpSize::i16Bit, DestAddr, Upper, OpSize::i64Bit);
}
} else {
IREmit->_StoreMem(FPRClass, Op->StoreSize, AddrNode, StackNode);
}
Expand Down Expand Up @@ -1025,7 +1032,7 @@ void X87StackOptimization::Run(IREmitter* Emit) {
return;
}

fextl::unique_ptr<Pass> CreateX87StackOptimizationPass() {
return fextl::make_unique<X87StackOptimization>();
fextl::unique_ptr<Pass> CreateX87StackOptimizationPass(const FEXCore::HostFeatures& Features) {
return fextl::make_unique<X87StackOptimization>(Features);
}
} // namespace FEXCore::IR

0 comments on commit 214f04a

Please sign in to comment.