FEX-Emu · lioncash · Dec 10, 2024 · Dec 5, 2024 · Dec 5, 2024 · Dec 5, 2024
diff --git a/FEXCore/Source/Interface/Core/Interpreter/Fallbacks/F80Fallbacks.h b/FEXCore/Source/Interface/Core/Interpreter/Fallbacks/F80Fallbacks.h
@@ -6,17 +6,20 @@
 #include "Interface/IR/IR.h"
 
 namespace FEXCore::CPU {
-FEXCORE_PRESERVE_ALL_ATTR static softfloat_state SoftFloatStateFromFCW(uint16_t FCW) {
+FEXCORE_PRESERVE_ALL_ATTR static softfloat_state SoftFloatStateFromFCW(uint16_t FCW, bool Force80BitPrecision = false) {
   softfloat_state State {};
   State.detectTininess = softfloat_tininess_afterRounding;
   State.exceptionFlags = 0;
-
-  auto PC = (FCW >> 8) & 3;
-  switch (PC) {
-  case 0: State.roundingPrecision = 32; break;
-  case 2: State.roundingPrecision = 64; break;
-  case 3: State.roundingPrecision = 80; break;
-  case 1: LOGMAN_MSG_A_FMT("Invalid x87 precision mode, {}", PC);
+  State.roundingPrecision = 80;
+
+  if (!Force80BitPrecision) {
+    auto PC = (FCW >> 8) & 3;
+    switch (PC) {
+    case 0: State.roundingPrecision = 32; break;
+    case 2: State.roundingPrecision = 64; break;
+    case 3: State.roundingPrecision = 80; break;
+    case 1: LOGMAN_MSG_A_FMT("Invalid x87 precision mode, {}", PC);
+    }
   }
 
   auto RC = (FCW >> 10) & 3;
@@ -132,23 +135,23 @@ struct OpHandlers<IR::OP_F80CVTTOINT> {
 template<>
 struct OpHandlers<IR::OP_F80ROUND> {
   FEXCORE_PRESERVE_ALL_ATTR static X80SoftFloat handle(uint16_t FCW, X80SoftFloat Src1) {
-    softfloat_state State = SoftFloatStateFromFCW(FCW);
+    softfloat_state State = SoftFloatStateFromFCW(FCW, true);
     return X80SoftFloat::FRNDINT(&State, Src1);
   }
 };
 
 template<>
 struct OpHandlers<IR::OP_F80F2XM1> {
   FEXCORE_PRESERVE_ALL_ATTR static X80SoftFloat handle(uint16_t FCW, X80SoftFloat Src1) {
-    softfloat_state State = SoftFloatStateFromFCW(FCW);
+    softfloat_state State = SoftFloatStateFromFCW(FCW, true);
     return X80SoftFloat::F2XM1(&State, Src1);
   }
 };
 
 template<>
 struct OpHandlers<IR::OP_F80TAN> {
   FEXCORE_PRESERVE_ALL_ATTR static X80SoftFloat handle(uint16_t FCW, X80SoftFloat Src1) {
-    softfloat_state State = SoftFloatStateFromFCW(FCW);
+    softfloat_state State = SoftFloatStateFromFCW(FCW, true);
     return X80SoftFloat::FTAN(&State, Src1);
   }
 };
@@ -164,15 +167,15 @@ struct OpHandlers<IR::OP_F80SQRT> {
 template<>
 struct OpHandlers<IR::OP_F80SIN> {
   FEXCORE_PRESERVE_ALL_ATTR static X80SoftFloat handle(uint16_t FCW, X80SoftFloat Src1) {
-    softfloat_state State = SoftFloatStateFromFCW(FCW);
+    softfloat_state State = SoftFloatStateFromFCW(FCW, true);
     return X80SoftFloat::FSIN(&State, Src1);
   }
 };
 
 template<>
 struct OpHandlers<IR::OP_F80COS> {
   FEXCORE_PRESERVE_ALL_ATTR static X80SoftFloat handle(uint16_t FCW, X80SoftFloat Src1) {
-    softfloat_state State = SoftFloatStateFromFCW(FCW);
+    softfloat_state State = SoftFloatStateFromFCW(FCW, true);
     return X80SoftFloat::FCOS(&State, Src1);
   }
 };
@@ -226,39 +229,39 @@ struct OpHandlers<IR::OP_F80DIV> {
 template<>
 struct OpHandlers<IR::OP_F80FYL2X> {
   FEXCORE_PRESERVE_ALL_ATTR static X80SoftFloat handle(uint16_t FCW, X80SoftFloat Src1, X80SoftFloat Src2) {
-    softfloat_state State = SoftFloatStateFromFCW(FCW);
+    softfloat_state State = SoftFloatStateFromFCW(FCW, true);
     return X80SoftFloat::FYL2X(&State, Src1, Src2);
   }
 };
 
 template<>
 struct OpHandlers<IR::OP_F80ATAN> {
   FEXCORE_PRESERVE_ALL_ATTR static X80SoftFloat handle(uint16_t FCW, X80SoftFloat Src1, X80SoftFloat Src2) {
-    softfloat_state State = SoftFloatStateFromFCW(FCW);
+    softfloat_state State = SoftFloatStateFromFCW(FCW, true);
     return X80SoftFloat::FATAN(&State, Src1, Src2);
   }
 };
 
 template<>
 struct OpHandlers<IR::OP_F80FPREM1> {
   FEXCORE_PRESERVE_ALL_ATTR static X80SoftFloat handle(uint16_t FCW, X80SoftFloat Src1, X80SoftFloat Src2) {
-    softfloat_state State = SoftFloatStateFromFCW(FCW);
+    softfloat_state State = SoftFloatStateFromFCW(FCW, true);
     return X80SoftFloat::FREM1(&State, Src1, Src2);
   }
 };
 
 template<>
 struct OpHandlers<IR::OP_F80FPREM> {
   FEXCORE_PRESERVE_ALL_ATTR static X80SoftFloat handle(uint16_t FCW, X80SoftFloat Src1, X80SoftFloat Src2) {
-    softfloat_state State = SoftFloatStateFromFCW(FCW);
+    softfloat_state State = SoftFloatStateFromFCW(FCW, true);
     return X80SoftFloat::FREM(&State, Src1, Src2);
   }
 };
 
 template<>
 struct OpHandlers<IR::OP_F80SCALE> {
   FEXCORE_PRESERVE_ALL_ATTR static X80SoftFloat handle(uint16_t FCW, X80SoftFloat Src1, X80SoftFloat Src2) {
-    softfloat_state State = SoftFloatStateFromFCW(FCW);
+    softfloat_state State = SoftFloatStateFromFCW(FCW, true);
     return X80SoftFloat::FSCALE(&State, Src1, Src2);
   }
 };

diff --git a/unittests/ASM/Disabled_Tests_Simulator b/unittests/ASM/Disabled_Tests_Simulator
@@ -1,3 +1,15 @@
+# Simulator on x86 doesn't pass these tests due to not using float128
+Test_X87/precision_test_fcos.asm
+Test_X87/precision_test_fsin.asm
+Test_X87/precision_test_ftan.asm
+Test_X87/precision_test_fatan.asm
+Test_X87/precision_test_fyl2xp1.asm
+Test_X87/precision_test_neg_fcos.asm
+Test_X87/precision_test_neg_fsin.asm
+Test_X87/precision_test_neg_ftan.asm
+Test_X87/precision_test_neg_fatan.asm
+Test_X87/precision_test_neg_fyl2xp1.asm
+
 # AES unsupported in simulator
 Test_H0F38/66_DB.asm
 Test_H0F38/66_DC.asm

diff --git a/unittests/ASM/Includes/x87cw.mac b/unittests/ASM/Includes/x87cw.mac
@@ -0,0 +1,34 @@
+%ifndef X87_CW_INC
+%define X87_CW_INC
+
+; Sets x87 precision and rounding modes
+; Uses the stack and clobbers rax
+; Args: precision constant, rounding constant
+%macro set_cw_precision_rounding 2
+  sub rsp, 2
+  fnstcw [rsp]
+  movzx ax, [rsp]
+
+  ; Precision
+  and eax, ~(3 << 8)
+  or eax, %1 << 8
+
+  ; Rounding
+  and eax, ~(3 << 10)
+  or eax, %2 << 10
+
+  mov [rsp], ax
+  fldcw [rsp]
+  add rsp, 2
+%endmacro
+
+x87_prec_32 equ 00b
+x87_prec_64 equ 10b
+x87_prec_80 equ 11b
+
+x87_round_nearest equ 00b
+x87_round_down equ 01b
+x87_round_up equ 10b
+x87_round_towards_zero equ 11b
+
+%endif
diff --git a/unittests/ASM/X87/precision_test_fabs.asm b/unittests/ASM/X87/precision_test_fabs.asm
@@ -0,0 +1,165 @@
+%ifdef CONFIG
+{
+  "RegData": {
+    "XMM0":  ["0x8111111111111111", "0x3fff"],
+    "XMM1":  ["0x8111111111111111", "0x3fff"],
+    "XMM2":  ["0x8111111111111111", "0x3fff"],
+    "XMM3":  ["0x8111111111111111", "0x3fff"],
+    "XMM4":  ["0x8111111111111111", "0x3fff"],
+    "XMM5":  ["0x8111111111111111", "0x3fff"],
+    "XMM6":  ["0x8111111111111111", "0x3fff"],
+    "XMM7":  ["0x8111111111111111", "0x3fff"],
+    "XMM8":  ["0x8111111111111111", "0x3fff"],
+    "XMM9":  ["0x8111111111111111", "0x3fff"],
+    "XMM10":  ["0x8111111111111111", "0x3fff"],
+    "XMM11":  ["0x8111111111111111", "0x3fff"]
+  }
+}
+%endif
+
+%include "x87cw.mac"
+
+mov rsp, 0xe000_1000
+
+finit ; enters x87 state
+
+; 80-bit mode, round-nearest
+set_cw_precision_rounding x87_prec_80, x87_round_nearest
+fld tword [rel .source_1]
+fabs
+fstp tword [rel .result_1]
+
+; 64-bit mode, round-nearest
+set_cw_precision_rounding x87_prec_64, x87_round_nearest
+fld tword [rel .source_1]
+fabs
+fstp tword [rel .result_2]
+
+; 32-bit mode, round-nearest
+set_cw_precision_rounding x87_prec_32, x87_round_nearest
+fld tword [rel .source_1]
+fabs
+fstp tword [rel .result_3]
+
+; 80-bit mode, round-down
+set_cw_precision_rounding x87_prec_80, x87_round_down
+fld tword [rel .source_1]
+fabs
+fstp tword [rel .result_4]
+
+; 64-bit mode, round-down
+set_cw_precision_rounding x87_prec_64, x87_round_down
+fld tword [rel .source_1]
+fabs
+fstp tword [rel .result_5]
+
+; 32-bit mode, round-down
+set_cw_precision_rounding x87_prec_32, x87_round_down
+fld tword [rel .source_1]
+fabs
+fstp tword [rel .result_6]
+
+; 80-bit mode, round-up
+set_cw_precision_rounding x87_prec_80, x87_round_up
+fld tword [rel .source_1]
+fabs
+fstp tword [rel .result_7]
+
+; 64-bit mode, round-up
+set_cw_precision_rounding x87_prec_64, x87_round_up
+fld tword [rel .source_1]
+fabs
+fstp tword [rel .result_8]
+
+; 32-bit mode, round-up
+set_cw_precision_rounding x87_prec_32, x87_round_up
+fld tword [rel .source_1]
+fabs
+fstp tword [rel .result_9]
+
+; 80-bit mode, round-towards_zero
+set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
+fld tword [rel .source_1]
+fabs
+fstp tword [rel .result_10]
+
+; 64-bit mode, round-towards_zero
+set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
+fld tword [rel .source_1]
+fabs
+fstp tword [rel .result_11]
+
+; 32-bit mode, round-towards_zero
+set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
+fld tword [rel .source_1]
+fabs
+fstp tword [rel .result_12]
+
+; Fetch results
+movups xmm0, [rel .result_1]
+movups xmm1, [rel .result_2]
+movups xmm2, [rel .result_3]
+movups xmm3, [rel .result_4]
+movups xmm4, [rel .result_5]
+movups xmm5, [rel .result_6]
+movups xmm6, [rel .result_7]
+movups xmm7, [rel .result_8]
+movups xmm8, [rel .result_9]
+movups xmm9, [rel .result_10]
+movups xmm10, [rel .result_11]
+movups xmm11, [rel .result_12]
+
+hlt
+
+; Positive
+.source_1:
+dq 0x8111_1111_1111_1111
+dw 0x3fff
+
+.result_1:
+dq 0
+dq 0
+
+.result_2:
+dq 0
+dq 0
+
+.result_3:
+dq 0
+dq 0
+
+.result_4:
+dq 0
+dq 0
+
+.result_5:
+dq 0
+dq 0
+
+.result_6:
+dq 0
+dq 0
+
+.result_7:
+dq 0
+dq 0
+
+.result_8:
+dq 0
+dq 0
+
+.result_9:
+dq 0
+dq 0
+
+.result_10:
+dq 0
+dq 0
+
+.result_11:
+dq 0
+dq 0
+
+.result_12:
+dq 0
+dq 0