Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add gcc target tests #704

Merged
merged 3 commits into from
Jan 26, 2021
Merged

Add gcc target tests #704

merged 3 commits into from
Jan 26, 2021

Conversation

skmp
Copy link
Contributor

@skmp skmp commented Jan 25, 2021

Overview

This adds gcc's "gcc.target" tests for i386 and amd64. We mostly pass these, however there are a few failures that should be investigated (#713). The 32-bit tests don't work on CI but work for me locally, so that also needs to be investigated (#714).

There's test cases all the way up to avx512, though I haven't included anything avx*

@skmp skmp force-pushed the skmp/add-gcc-target-tests branch 2 times, most recently from 2ad061f to d0aa661 Compare January 25, 2021 12:57
@skmp
Copy link
Contributor Author

skmp commented Jan 25, 2021

@Sonicadvance1 can you investigate why it fails on the runners? I suspect missing 32-bit libs from the rootfs?

@skmp skmp force-pushed the skmp/add-gcc-target-tests branch from d0aa661 to abcd338 Compare January 25, 2021 13:32
@skmp skmp force-pushed the skmp/add-gcc-target-tests branch from abcd338 to 29a8fd0 Compare January 26, 2021 10:00
@skmp skmp force-pushed the skmp/add-gcc-target-tests branch from 0fdb887 to b453ede Compare January 26, 2021 12:55
@Sonicadvance1 Sonicadvance1 merged commit 536be23 into main Jan 26, 2021
@Sonicadvance1 Sonicadvance1 deleted the skmp/add-gcc-target-tests branch January 26, 2021 23:45
Sonicadvance1 added a commit to Sonicadvance1/FEX that referenced this pull request Jul 7, 2023
Only return the particular flags that are being requested in the moment
since compacting them all when requested is fairly slow.

x87 fcmov in particular was requesting all the flags when it only needs
a couple.
This reduces a `fcmovb` instruction count blowup from 103x to 48x. Still
more room to go but this one stood out as being particularly bad.

Old:
```asm
0x0000000265a002bc  10ffffe0    adr x0, #-0x4 (addr 0x265a002b8)
0x0000000265a002c0  f9005f80    str x0, [x28, FEX-Emu#184]
0x0000000265a002c4  d2800014    mov x20, #0x0
0x0000000265a002c8  d2800035    mov x21, #0x1
0x0000000265a002cc  d2800056    mov x22, #0x2
0x0000000265a002d0  394b0397    ldrb w23, [x28, FEX-Emu#704]
0x0000000265a002d4  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a002d8  aa1702d6    orr x22, x22, x23
0x0000000265a002dc  394b0b97    ldrb w23, [x28, FEX-Emu#706]
0x0000000265a002e0  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a002e4  531e76f7    lsl w23, w23, FEX-Emu#2
0x0000000265a002e8  aa1702d6    orr x22, x22, x23
0x0000000265a002ec  394b1397    ldrb w23, [x28, FEX-Emu#708]
0x0000000265a002f0  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a002f4  531c6ef7    lsl w23, w23, FEX-Emu#4
0x0000000265a002f8  aa1702d6    orr x22, x22, x23
0x0000000265a002fc  394b1b97    ldrb w23, [x28, FEX-Emu#710]
0x0000000265a00300  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00304  531a66f7    lsl w23, w23, FEX-Emu#6
0x0000000265a00308  aa1702d6    orr x22, x22, x23
0x0000000265a0030c  394b1f97    ldrb w23, [x28, FEX-Emu#711]
0x0000000265a00310  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00314  531962f7    lsl w23, w23, FEX-Emu#7
0x0000000265a00318  aa1702d6    orr x22, x22, x23
0x0000000265a0031c  394b2397    ldrb w23, [x28, FEX-Emu#712]
0x0000000265a00320  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00324  53185ef7    lsl w23, w23, FEX-Emu#8
0x0000000265a00328  aa1702d6    orr x22, x22, x23
0x0000000265a0032c  394b2797    ldrb w23, [x28, FEX-Emu#713]
0x0000000265a00330  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00334  53175af7    lsl w23, w23, FEX-Emu#9
0x0000000265a00338  aa1702d6    orr x22, x22, x23
0x0000000265a0033c  394b2b97    ldrb w23, [x28, FEX-Emu#714]
0x0000000265a00340  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00344  531656f7    lsl w23, w23, FEX-Emu#10
0x0000000265a00348  aa1702d6    orr x22, x22, x23
0x0000000265a0034c  394b2f97    ldrb w23, [x28, FEX-Emu#715]
0x0000000265a00350  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00354  531552f7    lsl w23, w23, FEX-Emu#11
0x0000000265a00358  aa1702d6    orr x22, x22, x23
0x0000000265a0035c  394b3397    ldrb w23, [x28, FEX-Emu#716]
0x0000000265a00360  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00364  53144ef7    lsl w23, w23, FEX-Emu#12
0x0000000265a00368  aa1702d6    orr x22, x22, x23
0x0000000265a0036c  394b3b97    ldrb w23, [x28, FEX-Emu#718]
0x0000000265a00370  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00374  531246f7    lsl w23, w23, FEX-Emu#14
0x0000000265a00378  aa1702d6    orr x22, x22, x23
0x0000000265a0037c  394b4397    ldrb w23, [x28, FEX-Emu#720]
0x0000000265a00380  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00384  53103ef7    lsl w23, w23, FEX-Emu#16
0x0000000265a00388  aa1702d6    orr x22, x22, x23
0x0000000265a0038c  394b4797    ldrb w23, [x28, FEX-Emu#721]
0x0000000265a00390  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00394  530f3af7    lsl w23, w23, FEX-Emu#17
0x0000000265a00398  aa1702d6    orr x22, x22, x23
0x0000000265a0039c  394b4b97    ldrb w23, [x28, FEX-Emu#722]
0x0000000265a003a0  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a003a4  530e36f7    lsl w23, w23, FEX-Emu#18
0x0000000265a003a8  aa1702d6    orr x22, x22, x23
0x0000000265a003ac  394b4f97    ldrb w23, [x28, FEX-Emu#723]
0x0000000265a003b0  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a003b4  530d32f7    lsl w23, w23, FEX-Emu#19
0x0000000265a003b8  aa1702d6    orr x22, x22, x23
0x0000000265a003bc  394b5397    ldrb w23, [x28, FEX-Emu#724]
0x0000000265a003c0  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a003c4  530c2ef7    lsl w23, w23, FEX-Emu#20
0x0000000265a003c8  aa1702d6    orr x22, x22, x23
0x0000000265a003cc  394b5797    ldrb w23, [x28, FEX-Emu#725]
0x0000000265a003d0  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a003d4  530b2af7    lsl w23, w23, FEX-Emu#21
0x0000000265a003d8  aa1702d6    orr x22, x22, x23
0x0000000265a003dc  924002d6    and x22, x22, #0x1
0x0000000265a003e0  93400294    sbfx x20, x20, #0, FEX-Emu#1
0x0000000265a003e4  934002b5    sbfx x21, x21, #0, FEX-Emu#1
0x0000000265a003e8  f10002df    cmp x22, #0x0 (0)
0x0000000265a003ec  9a950294    csel x20, x20, x21, eq
0x0000000265a003f0  4e080e84    dup v4.2d, x20
0x0000000265a003f4  394baf94    ldrb w20, [x28, FEX-Emu#747]
0x0000000265a003f8  91000695    add x21, x20, #0x1 (1)
0x0000000265a003fc  92400ab5    and x21, x21, #0x7
0x0000000265a00400  d2800200    mov x0, #0x10
0x0000000265a00404  9b007e80    mul x0, x20, x0
0x0000000265a00408  8b000380    add x0, x28, x0
0x0000000265a0040c  3dc0bc05    ldr q5, [x0, FEX-Emu#752]
0x0000000265a00410  d2800200    mov x0, #0x10
0x0000000265a00414  9b007ea0    mul x0, x21, x0
0x0000000265a00418  8b000380    add x0, x28, x0
0x0000000265a0041c  3dc0bc06    ldr q6, [x0, FEX-Emu#752]
0x0000000265a00420  4ea41c80    mov v0.16b, v4.16b
0x0000000265a00424  6e651cc0    bsl v0.16b, v6.16b, v5.16b
0x0000000265a00428  4ea01c04    mov v4.16b, v0.16b
0x0000000265a0042c  d2800200    mov x0, #0x10
0x0000000265a00430  9b007e80    mul x0, x20, x0
0x0000000265a00434  8b000380    add x0, x28, x0
0x0000000265a00438  3d80bc04    str q4, [x0, FEX-Emu#752]
0x0000000265a0043c  58000040    ldr x0, pc+8 (addr 0x265a00444)
0x0000000265a00440  d63f0000    blr x0
```

New:
```asm
0x0000000265a002bc  10ffffe0    adr x0, #-0x4 (addr 0x265a002b8)
0x0000000265a002c0  f9005f80    str x0, [x28, FEX-Emu#184]
0x0000000265a002c4  d2800014    mov x20, #0x0
0x0000000265a002c8  d2800035    mov x21, #0x1
0x0000000265a002cc  d2800056    mov x22, #0x2
0x0000000265a002d0  394b1f97    ldrb w23, [x28, FEX-Emu#711]
0x0000000265a002d4  331902f6    bfi w22, w23, FEX-Emu#7, FEX-Emu#1
0x0000000265a002d8  394b2797    ldrb w23, [x28, FEX-Emu#713]
0x0000000265a002dc  331702f6    bfi w22, w23, FEX-Emu#9, FEX-Emu#1
0x0000000265a002e0  394b2f97    ldrb w23, [x28, FEX-Emu#715]
0x0000000265a002e4  331502f6    bfi w22, w23, FEX-Emu#11, FEX-Emu#1
0x0000000265a002e8  394b4797    ldrb w23, [x28, FEX-Emu#721]
0x0000000265a002ec  330f02f6    bfi w22, w23, FEX-Emu#17, FEX-Emu#1
0x0000000265a002f0  394b4f97    ldrb w23, [x28, FEX-Emu#723]
0x0000000265a002f4  330d02f6    bfi w22, w23, FEX-Emu#19, FEX-Emu#1
0x0000000265a002f8  394b5797    ldrb w23, [x28, FEX-Emu#725]
0x0000000265a002fc  330b02f6    bfi w22, w23, FEX-Emu#21, FEX-Emu#1
0x0000000265a00300  924002d6    and x22, x22, #0x1
0x0000000265a00304  93400294    sbfx x20, x20, #0, FEX-Emu#1
0x0000000265a00308  934002b5    sbfx x21, x21, #0, FEX-Emu#1
0x0000000265a0030c  f10002df    cmp x22, #0x0 (0)
0x0000000265a00310  9a950294    csel x20, x20, x21, eq
0x0000000265a00314  4e080e84    dup v4.2d, x20
0x0000000265a00318  394baf94    ldrb w20, [x28, FEX-Emu#747]
0x0000000265a0031c  91000695    add x21, x20, #0x1 (1)
0x0000000265a00320  92400ab5    and x21, x21, #0x7
0x0000000265a00324  d2800200    mov x0, #0x10
0x0000000265a00328  9b007e80    mul x0, x20, x0
0x0000000265a0032c  8b000380    add x0, x28, x0
0x0000000265a00330  3dc0bc05    ldr q5, [x0, FEX-Emu#752]
0x0000000265a00334  d2800200    mov x0, #0x10
0x0000000265a00338  9b007ea0    mul x0, x21, x0
0x0000000265a0033c  8b000380    add x0, x28, x0
0x0000000265a00340  3dc0bc06    ldr q6, [x0, FEX-Emu#752]
0x0000000265a00344  4ea41c80    mov v0.16b, v4.16b
0x0000000265a00348  6e651cc0    bsl v0.16b, v6.16b, v5.16b
0x0000000265a0034c  4ea01c04    mov v4.16b, v0.16b
0x0000000265a00350  d2800200    mov x0, #0x10
0x0000000265a00354  9b007e80    mul x0, x20, x0
0x0000000265a00358  8b000380    add x0, x28, x0
0x0000000265a0035c  3d80bc04    str q4, [x0, FEX-Emu#752]
0x0000000265a00360  58000040    ldr x0, pc+8 (addr 0x265a00368)
0x0000000265a00364  d63f0000    blr x0
```
Sonicadvance1 added a commit to Sonicadvance1/FEX that referenced this pull request Jul 7, 2023
Only return the particular flags that are being requested in the moment
since compacting them all when requested is fairly slow.

x87 fcmov in particular was requesting all the flags when it only needs
a couple.
This reduces a `fcmovb` instruction count blowup from 103x to 38x. Still
more room to go but this one stood out as being particularly bad.

Old:
```asm
0x0000000265a002bc  10ffffe0    adr x0, #-0x4 (addr 0x265a002b8)
0x0000000265a002c0  f9005f80    str x0, [x28, FEX-Emu#184]
0x0000000265a002c4  d2800014    mov x20, #0x0
0x0000000265a002c8  d2800035    mov x21, #0x1
0x0000000265a002cc  d2800056    mov x22, #0x2
0x0000000265a002d0  394b0397    ldrb w23, [x28, FEX-Emu#704]
0x0000000265a002d4  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a002d8  aa1702d6    orr x22, x22, x23
0x0000000265a002dc  394b0b97    ldrb w23, [x28, FEX-Emu#706]
0x0000000265a002e0  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a002e4  531e76f7    lsl w23, w23, FEX-Emu#2
0x0000000265a002e8  aa1702d6    orr x22, x22, x23
0x0000000265a002ec  394b1397    ldrb w23, [x28, FEX-Emu#708]
0x0000000265a002f0  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a002f4  531c6ef7    lsl w23, w23, FEX-Emu#4
0x0000000265a002f8  aa1702d6    orr x22, x22, x23
0x0000000265a002fc  394b1b97    ldrb w23, [x28, FEX-Emu#710]
0x0000000265a00300  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00304  531a66f7    lsl w23, w23, FEX-Emu#6
0x0000000265a00308  aa1702d6    orr x22, x22, x23
0x0000000265a0030c  394b1f97    ldrb w23, [x28, FEX-Emu#711]
0x0000000265a00310  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00314  531962f7    lsl w23, w23, FEX-Emu#7
0x0000000265a00318  aa1702d6    orr x22, x22, x23
0x0000000265a0031c  394b2397    ldrb w23, [x28, FEX-Emu#712]
0x0000000265a00320  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00324  53185ef7    lsl w23, w23, FEX-Emu#8
0x0000000265a00328  aa1702d6    orr x22, x22, x23
0x0000000265a0032c  394b2797    ldrb w23, [x28, FEX-Emu#713]
0x0000000265a00330  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00334  53175af7    lsl w23, w23, FEX-Emu#9
0x0000000265a00338  aa1702d6    orr x22, x22, x23
0x0000000265a0033c  394b2b97    ldrb w23, [x28, FEX-Emu#714]
0x0000000265a00340  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00344  531656f7    lsl w23, w23, FEX-Emu#10
0x0000000265a00348  aa1702d6    orr x22, x22, x23
0x0000000265a0034c  394b2f97    ldrb w23, [x28, FEX-Emu#715]
0x0000000265a00350  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00354  531552f7    lsl w23, w23, FEX-Emu#11
0x0000000265a00358  aa1702d6    orr x22, x22, x23
0x0000000265a0035c  394b3397    ldrb w23, [x28, FEX-Emu#716]
0x0000000265a00360  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00364  53144ef7    lsl w23, w23, FEX-Emu#12
0x0000000265a00368  aa1702d6    orr x22, x22, x23
0x0000000265a0036c  394b3b97    ldrb w23, [x28, FEX-Emu#718]
0x0000000265a00370  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00374  531246f7    lsl w23, w23, FEX-Emu#14
0x0000000265a00378  aa1702d6    orr x22, x22, x23
0x0000000265a0037c  394b4397    ldrb w23, [x28, FEX-Emu#720]
0x0000000265a00380  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00384  53103ef7    lsl w23, w23, FEX-Emu#16
0x0000000265a00388  aa1702d6    orr x22, x22, x23
0x0000000265a0038c  394b4797    ldrb w23, [x28, FEX-Emu#721]
0x0000000265a00390  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00394  530f3af7    lsl w23, w23, FEX-Emu#17
0x0000000265a00398  aa1702d6    orr x22, x22, x23
0x0000000265a0039c  394b4b97    ldrb w23, [x28, FEX-Emu#722]
0x0000000265a003a0  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a003a4  530e36f7    lsl w23, w23, FEX-Emu#18
0x0000000265a003a8  aa1702d6    orr x22, x22, x23
0x0000000265a003ac  394b4f97    ldrb w23, [x28, FEX-Emu#723]
0x0000000265a003b0  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a003b4  530d32f7    lsl w23, w23, FEX-Emu#19
0x0000000265a003b8  aa1702d6    orr x22, x22, x23
0x0000000265a003bc  394b5397    ldrb w23, [x28, FEX-Emu#724]
0x0000000265a003c0  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a003c4  530c2ef7    lsl w23, w23, FEX-Emu#20
0x0000000265a003c8  aa1702d6    orr x22, x22, x23
0x0000000265a003cc  394b5797    ldrb w23, [x28, FEX-Emu#725]
0x0000000265a003d0  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a003d4  530b2af7    lsl w23, w23, FEX-Emu#21
0x0000000265a003d8  aa1702d6    orr x22, x22, x23
0x0000000265a003dc  924002d6    and x22, x22, #0x1
0x0000000265a003e0  93400294    sbfx x20, x20, #0, FEX-Emu#1
0x0000000265a003e4  934002b5    sbfx x21, x21, #0, FEX-Emu#1
0x0000000265a003e8  f10002df    cmp x22, #0x0 (0)
0x0000000265a003ec  9a950294    csel x20, x20, x21, eq
0x0000000265a003f0  4e080e84    dup v4.2d, x20
0x0000000265a003f4  394baf94    ldrb w20, [x28, FEX-Emu#747]
0x0000000265a003f8  91000695    add x21, x20, #0x1 (1)
0x0000000265a003fc  92400ab5    and x21, x21, #0x7
0x0000000265a00400  d2800200    mov x0, #0x10
0x0000000265a00404  9b007e80    mul x0, x20, x0
0x0000000265a00408  8b000380    add x0, x28, x0
0x0000000265a0040c  3dc0bc05    ldr q5, [x0, FEX-Emu#752]
0x0000000265a00410  d2800200    mov x0, #0x10
0x0000000265a00414  9b007ea0    mul x0, x21, x0
0x0000000265a00418  8b000380    add x0, x28, x0
0x0000000265a0041c  3dc0bc06    ldr q6, [x0, FEX-Emu#752]
0x0000000265a00420  4ea41c80    mov v0.16b, v4.16b
0x0000000265a00424  6e651cc0    bsl v0.16b, v6.16b, v5.16b
0x0000000265a00428  4ea01c04    mov v4.16b, v0.16b
0x0000000265a0042c  d2800200    mov x0, #0x10
0x0000000265a00430  9b007e80    mul x0, x20, x0
0x0000000265a00434  8b000380    add x0, x28, x0
0x0000000265a00438  3d80bc04    str q4, [x0, FEX-Emu#752]
0x0000000265a0043c  58000040    ldr x0, pc+8 (addr 0x265a00444)
0x0000000265a00440  d63f0000    blr x0
```

New:
```asm
0x0000000265a002bc  10ffffe0    adr x0, #-0x4 (addr 0x265a002b8)
0x0000000265a002c0  f9005f80    str x0, [x28, FEX-Emu#184]
0x0000000265a002c4  d2800014    mov x20, #0x0
0x0000000265a002c8  d2800035    mov x21, #0x1
0x0000000265a002cc  d2800056    mov x22, #0x2
0x0000000265a002d0  394b0397    ldrb w23, [x28, FEX-Emu#704]
0x0000000265a002d4  330002f6    bfxil w22, w23, #0, FEX-Emu#1
0x0000000265a002d8  924002d6    and x22, x22, #0x1
0x0000000265a002dc  93400294    sbfx x20, x20, #0, FEX-Emu#1
0x0000000265a002e0  934002b5    sbfx x21, x21, #0, FEX-Emu#1
0x0000000265a002e4  f10002df    cmp x22, #0x0 (0)
0x0000000265a002e8  9a950294    csel x20, x20, x21, eq
0x0000000265a002ec  4e080e84    dup v4.2d, x20
0x0000000265a002f0  394baf94    ldrb w20, [x28, FEX-Emu#747]
0x0000000265a002f4  91000695    add x21, x20, #0x1 (1)
0x0000000265a002f8  92400ab5    and x21, x21, #0x7
0x0000000265a002fc  d2800200    mov x0, #0x10
0x0000000265a00300  9b007e80    mul x0, x20, x0
0x0000000265a00304  8b000380    add x0, x28, x0
0x0000000265a00308  3dc0bc05    ldr q5, [x0, FEX-Emu#752]
0x0000000265a0030c  d2800200    mov x0, #0x10
0x0000000265a00310  9b007ea0    mul x0, x21, x0
0x0000000265a00314  8b000380    add x0, x28, x0
0x0000000265a00318  3dc0bc06    ldr q6, [x0, FEX-Emu#752]
0x0000000265a0031c  4ea41c80    mov v0.16b, v4.16b
0x0000000265a00320  6e651cc0    bsl v0.16b, v6.16b, v5.16b
0x0000000265a00324  4ea01c04    mov v4.16b, v0.16b
0x0000000265a00328  d2800200    mov x0, #0x10
0x0000000265a0032c  9b007e80    mul x0, x20, x0
0x0000000265a00330  8b000380    add x0, x28, x0
0x0000000265a00334  3d80bc04    str q4, [x0, FEX-Emu#752]
0x0000000265a00338  58000040    ldr x0, pc+8 (addr 0x265a00340)
0x0000000265a0033c  d63f0000    blr x0
```
Sonicadvance1 added a commit to Sonicadvance1/FEX that referenced this pull request Jul 8, 2023
Only return the particular flags that are being requested in the moment
since compacting them all when requested is fairly slow.

x87 fcmov in particular was requesting all the flags when it only needs
a couple.
This reduces a `fcmovb` instruction count blowup from 103x to 38x. Still
more room to go but this one stood out as being particularly bad.

Old:
```asm
0x0000000265a002bc  10ffffe0    adr x0, #-0x4 (addr 0x265a002b8)
0x0000000265a002c0  f9005f80    str x0, [x28, FEX-Emu#184]
0x0000000265a002c4  d2800014    mov x20, #0x0
0x0000000265a002c8  d2800035    mov x21, #0x1
0x0000000265a002cc  d2800056    mov x22, #0x2
0x0000000265a002d0  394b0397    ldrb w23, [x28, FEX-Emu#704]
0x0000000265a002d4  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a002d8  aa1702d6    orr x22, x22, x23
0x0000000265a002dc  394b0b97    ldrb w23, [x28, FEX-Emu#706]
0x0000000265a002e0  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a002e4  531e76f7    lsl w23, w23, FEX-Emu#2
0x0000000265a002e8  aa1702d6    orr x22, x22, x23
0x0000000265a002ec  394b1397    ldrb w23, [x28, FEX-Emu#708]
0x0000000265a002f0  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a002f4  531c6ef7    lsl w23, w23, FEX-Emu#4
0x0000000265a002f8  aa1702d6    orr x22, x22, x23
0x0000000265a002fc  394b1b97    ldrb w23, [x28, FEX-Emu#710]
0x0000000265a00300  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00304  531a66f7    lsl w23, w23, FEX-Emu#6
0x0000000265a00308  aa1702d6    orr x22, x22, x23
0x0000000265a0030c  394b1f97    ldrb w23, [x28, FEX-Emu#711]
0x0000000265a00310  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00314  531962f7    lsl w23, w23, FEX-Emu#7
0x0000000265a00318  aa1702d6    orr x22, x22, x23
0x0000000265a0031c  394b2397    ldrb w23, [x28, FEX-Emu#712]
0x0000000265a00320  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00324  53185ef7    lsl w23, w23, FEX-Emu#8
0x0000000265a00328  aa1702d6    orr x22, x22, x23
0x0000000265a0032c  394b2797    ldrb w23, [x28, FEX-Emu#713]
0x0000000265a00330  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00334  53175af7    lsl w23, w23, FEX-Emu#9
0x0000000265a00338  aa1702d6    orr x22, x22, x23
0x0000000265a0033c  394b2b97    ldrb w23, [x28, FEX-Emu#714]
0x0000000265a00340  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00344  531656f7    lsl w23, w23, FEX-Emu#10
0x0000000265a00348  aa1702d6    orr x22, x22, x23
0x0000000265a0034c  394b2f97    ldrb w23, [x28, FEX-Emu#715]
0x0000000265a00350  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00354  531552f7    lsl w23, w23, FEX-Emu#11
0x0000000265a00358  aa1702d6    orr x22, x22, x23
0x0000000265a0035c  394b3397    ldrb w23, [x28, FEX-Emu#716]
0x0000000265a00360  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00364  53144ef7    lsl w23, w23, FEX-Emu#12
0x0000000265a00368  aa1702d6    orr x22, x22, x23
0x0000000265a0036c  394b3b97    ldrb w23, [x28, FEX-Emu#718]
0x0000000265a00370  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00374  531246f7    lsl w23, w23, FEX-Emu#14
0x0000000265a00378  aa1702d6    orr x22, x22, x23
0x0000000265a0037c  394b4397    ldrb w23, [x28, FEX-Emu#720]
0x0000000265a00380  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00384  53103ef7    lsl w23, w23, FEX-Emu#16
0x0000000265a00388  aa1702d6    orr x22, x22, x23
0x0000000265a0038c  394b4797    ldrb w23, [x28, FEX-Emu#721]
0x0000000265a00390  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a00394  530f3af7    lsl w23, w23, FEX-Emu#17
0x0000000265a00398  aa1702d6    orr x22, x22, x23
0x0000000265a0039c  394b4b97    ldrb w23, [x28, FEX-Emu#722]
0x0000000265a003a0  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a003a4  530e36f7    lsl w23, w23, FEX-Emu#18
0x0000000265a003a8  aa1702d6    orr x22, x22, x23
0x0000000265a003ac  394b4f97    ldrb w23, [x28, FEX-Emu#723]
0x0000000265a003b0  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a003b4  530d32f7    lsl w23, w23, FEX-Emu#19
0x0000000265a003b8  aa1702d6    orr x22, x22, x23
0x0000000265a003bc  394b5397    ldrb w23, [x28, FEX-Emu#724]
0x0000000265a003c0  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a003c4  530c2ef7    lsl w23, w23, FEX-Emu#20
0x0000000265a003c8  aa1702d6    orr x22, x22, x23
0x0000000265a003cc  394b5797    ldrb w23, [x28, FEX-Emu#725]
0x0000000265a003d0  d3407ef7    ubfx x23, x23, #0, FEX-Emu#32
0x0000000265a003d4  530b2af7    lsl w23, w23, FEX-Emu#21
0x0000000265a003d8  aa1702d6    orr x22, x22, x23
0x0000000265a003dc  924002d6    and x22, x22, #0x1
0x0000000265a003e0  93400294    sbfx x20, x20, #0, FEX-Emu#1
0x0000000265a003e4  934002b5    sbfx x21, x21, #0, FEX-Emu#1
0x0000000265a003e8  f10002df    cmp x22, #0x0 (0)
0x0000000265a003ec  9a950294    csel x20, x20, x21, eq
0x0000000265a003f0  4e080e84    dup v4.2d, x20
0x0000000265a003f4  394baf94    ldrb w20, [x28, FEX-Emu#747]
0x0000000265a003f8  91000695    add x21, x20, #0x1 (1)
0x0000000265a003fc  92400ab5    and x21, x21, #0x7
0x0000000265a00400  d2800200    mov x0, #0x10
0x0000000265a00404  9b007e80    mul x0, x20, x0
0x0000000265a00408  8b000380    add x0, x28, x0
0x0000000265a0040c  3dc0bc05    ldr q5, [x0, FEX-Emu#752]
0x0000000265a00410  d2800200    mov x0, #0x10
0x0000000265a00414  9b007ea0    mul x0, x21, x0
0x0000000265a00418  8b000380    add x0, x28, x0
0x0000000265a0041c  3dc0bc06    ldr q6, [x0, FEX-Emu#752]
0x0000000265a00420  4ea41c80    mov v0.16b, v4.16b
0x0000000265a00424  6e651cc0    bsl v0.16b, v6.16b, v5.16b
0x0000000265a00428  4ea01c04    mov v4.16b, v0.16b
0x0000000265a0042c  d2800200    mov x0, #0x10
0x0000000265a00430  9b007e80    mul x0, x20, x0
0x0000000265a00434  8b000380    add x0, x28, x0
0x0000000265a00438  3d80bc04    str q4, [x0, FEX-Emu#752]
0x0000000265a0043c  58000040    ldr x0, pc+8 (addr 0x265a00444)
0x0000000265a00440  d63f0000    blr x0
```

New:
```asm
0x0000000265a002bc  10ffffe0    adr x0, #-0x4 (addr 0x265a002b8)
0x0000000265a002c0  f9005f80    str x0, [x28, FEX-Emu#184]
0x0000000265a002c4  d2800014    mov x20, #0x0
0x0000000265a002c8  d2800035    mov x21, #0x1
0x0000000265a002cc  d2800056    mov x22, #0x2
0x0000000265a002d0  394b0397    ldrb w23, [x28, FEX-Emu#704]
0x0000000265a002d4  330002f6    bfxil w22, w23, #0, FEX-Emu#1
0x0000000265a002d8  924002d6    and x22, x22, #0x1
0x0000000265a002dc  93400294    sbfx x20, x20, #0, FEX-Emu#1
0x0000000265a002e0  934002b5    sbfx x21, x21, #0, FEX-Emu#1
0x0000000265a002e4  f10002df    cmp x22, #0x0 (0)
0x0000000265a002e8  9a950294    csel x20, x20, x21, eq
0x0000000265a002ec  4e080e84    dup v4.2d, x20
0x0000000265a002f0  394baf94    ldrb w20, [x28, FEX-Emu#747]
0x0000000265a002f4  91000695    add x21, x20, #0x1 (1)
0x0000000265a002f8  92400ab5    and x21, x21, #0x7
0x0000000265a002fc  d2800200    mov x0, #0x10
0x0000000265a00300  9b007e80    mul x0, x20, x0
0x0000000265a00304  8b000380    add x0, x28, x0
0x0000000265a00308  3dc0bc05    ldr q5, [x0, FEX-Emu#752]
0x0000000265a0030c  d2800200    mov x0, #0x10
0x0000000265a00310  9b007ea0    mul x0, x21, x0
0x0000000265a00314  8b000380    add x0, x28, x0
0x0000000265a00318  3dc0bc06    ldr q6, [x0, FEX-Emu#752]
0x0000000265a0031c  4ea41c80    mov v0.16b, v4.16b
0x0000000265a00320  6e651cc0    bsl v0.16b, v6.16b, v5.16b
0x0000000265a00324  4ea01c04    mov v4.16b, v0.16b
0x0000000265a00328  d2800200    mov x0, #0x10
0x0000000265a0032c  9b007e80    mul x0, x20, x0
0x0000000265a00330  8b000380    add x0, x28, x0
0x0000000265a00334  3d80bc04    str q4, [x0, FEX-Emu#752]
0x0000000265a00338  58000040    ldr x0, pc+8 (addr 0x265a00340)
0x0000000265a0033c  d63f0000    blr x0
```
Sonicadvance1 added a commit to Sonicadvance1/FEX that referenced this pull request Jul 16, 2023
The BFI cascades in this particular instruction weren't optimal.
Biggest improvement is the 8-bit version, while the 16-bit version gets
a minor improvement.

8-bit instruction count reduced from 38 to 29.
16-bit instruction count reduced from 34 to 28.

RCL can have a similar optimization done to it.
```asm
Before 16-bit:
0x0000ffff80a801e0  10ffffe0    adr x0, #-0x4 (addr 0xffff80a801dc)
0x0000ffff80a801e4  f9005f80    str x0, [x28, FEX-Emu#184]
0x0000ffff80a801e8  d3403cb4    uxth x20, w5
0x0000ffff80a801ec  d3403cf5    uxth x21, w7
0x0000ffff80a801f0  394b0396    ldrb w22, [x28, FEX-Emu#704]
0x0000ffff80a801f4  12001294    and w20, w20, #0x1f
0x0000ffff80a801f8  d2800017    mov x23, #0x0
0x0000ffff80a801fc  b3403eb7    bfxil x23, x21, #0, FEX-Emu#16
0x0000ffff80a80200  b37002d7    bfi x23, x22, FEX-Emu#16, FEX-Emu#1
0x0000ffff80a80204  b36f3eb7    bfi x23, x21, FEX-Emu#17, FEX-Emu#16
0x0000ffff80a80208  b35f02d7    bfi x23, x22, FEX-Emu#33, FEX-Emu#1
0x0000ffff80a8020c  aa1703e0    mov x0, x23
0x0000ffff80a80210  b35e3ea0    bfi x0, x21, FEX-Emu#34, FEX-Emu#16
0x0000ffff80a80214  aa0003f5    mov x21, x0
0x0000ffff80a80218  b34e02d5    bfi x21, x22, FEX-Emu#50, FEX-Emu#1
0x0000ffff80a8021c  9ad426b7    lsr x23, x21, x20
0x0000ffff80a80220  b3403ee7    bfxil x7, x23, #0, FEX-Emu#16
0x0000ffff80a80224  51000698    sub w24, w20, #0x1 (1)
0x0000ffff80a80228  9ad826b5    lsr x21, x21, x24
0x0000ffff80a8022c  d34002b5    ubfx x21, x21, #0, FEX-Emu#1
0x0000ffff80a80230  7100069f    cmp w20, #0x1 (1)
0x0000ffff80a80234  9a9622b4    csel x20, x21, x22, hs
0x0000ffff80a80238  390b0394    strb w20, [x28, FEX-Emu#704]
0x0000ffff80a8023c  d34f3ef4    ubfx x20, x23, FEX-Emu#15, FEX-Emu#1
0x0000ffff80a80240  d34e3af5    ubfx x21, x23, FEX-Emu#14, FEX-Emu#1
0x0000ffff80a80244  ca150294    eor x20, x20, x21
0x0000ffff80a80248  390b2f94    strb w20, [x28, FEX-Emu#715]
0x0000ffff80a8024c  58000040    ldr x0, pc+8 (addr 0xffff80a80254)
0x0000ffff80a80250  d63f0000    blr x0
0x0000ffff80a80254  967da128    bl #-0x6097b60 (addr 0xffff7a9e86f4)
0x0000ffff80a80258  0000ffff    udf #0xffff
0x0000ffff80a8025c  00010023    unallocated (Unallocated)
0x0000ffff80a80260  00000000    udf #0x0
[DEBUG] RIP: 0x10020
[DEBUG] Guest Code instructions: 1
[DEBUG] Host Code instructions: 34
[DEBUG] Blow-up Amt: 34x

After 16-bit:
0x0000ffffa7c801e0  10ffffe0            adr x0, #-0x4 (addr 0xffffa7c801dc)
0x0000ffffa7c801e4  f9005f80            str x0, [x28, FEX-Emu#184]
0x0000ffffa7c801e8  d3403cb4            uxth x20, w5
0x0000ffffa7c801ec  d3403cf5            uxth x21, w7
0x0000ffffa7c801f0  394b0396            ldrb w22, [x28, FEX-Emu#704]
0x0000ffffa7c801f4  12001294            and w20, w20, #0x1f
0x0000ffffa7c801f8  b37002d5            bfi x21, x22, FEX-Emu#16, FEX-Emu#1
0x0000ffffa7c801fc  b36f42b5            bfi x21, x21, FEX-Emu#17, FEX-Emu#17
0x0000ffffa7c80200  b35e42b5            bfi x21, x21, FEX-Emu#34, FEX-Emu#17
0x0000ffffa7c80204  9ad426b7            lsr x23, x21, x20
0x0000ffffa7c80208  b3403ee7            bfxil x7, x23, #0, FEX-Emu#16
0x0000ffffa7c8020c  51000698            sub w24, w20, #0x1 (1)
0x0000ffffa7c80210  9ad826b5            lsr x21, x21, x24
0x0000ffffa7c80214  d34002b5            ubfx x21, x21, #0, FEX-Emu#1
0x0000ffffa7c80218  7100069f            cmp w20, #0x1 (1)
0x0000ffffa7c8021c  9a9622b4            csel x20, x21, x22, hs
0x0000ffffa7c80220  390b0394            strb w20, [x28, FEX-Emu#704]
0x0000ffffa7c80224  d34f3ef4            ubfx x20, x23, FEX-Emu#15, FEX-Emu#1
0x0000ffffa7c80228  d34e3af5            ubfx x21, x23, FEX-Emu#14, FEX-Emu#1
0x0000ffffa7c8022c  ca150294            eor x20, x20, x21
0x0000ffffa7c80230  390b2f94            strb w20, [x28, FEX-Emu#715]
0x0000ffffa7c80234  58000040            ldr x0, pc+8 (addr 0xffffa7c8023c)
0x0000ffffa7c80238  d63f0000            blr x0
0x0000ffffa7c8023c  bd9cc128            unallocated (Unallocated)
0x0000ffffa7c80240  0000ffff            udf #0xffff
0x0000ffffa7c80244  00010023            unallocated (Unallocated)
0x0000ffffa7c80248  00000000            udf #0x0
[DEBUG] RIP: 0x10020
[DEBUG] Guest Code instructions: 1
[DEBUG] Host Code instructions: 28
[DEBUG] Blow-up Amt: 28x

Before 8-bit:
0x0000ffffa92801e0  10ffffe0            adr x0, #-0x4 (addr 0xffffa92801dc)
0x0000ffffa92801e4  f9005f80            str x0, [x28, FEX-Emu#184]
0x0000ffffa92801e8  d3401cb4            uxtb x20, w5
0x0000ffffa92801ec  d3401cf5            uxtb x21, w7
0x0000ffffa92801f0  394b0396            ldrb w22, [x28, FEX-Emu#704]
0x0000ffffa92801f4  12001294            and w20, w20, #0x1f
0x0000ffffa92801f8  d2800017            mov x23, #0x0
0x0000ffffa92801fc  b3401eb7            bfxil x23, x21, #0, FEX-Emu#8
0x0000ffffa9280200  b37802d7            bfi x23, x22, FEX-Emu#8, FEX-Emu#1
0x0000ffffa9280204  b3771eb7            bfi x23, x21, FEX-Emu#9, FEX-Emu#8
0x0000ffffa9280208  b36f02d7            bfi x23, x22, FEX-Emu#17, FEX-Emu#1
0x0000ffffa928020c  b36e1eb7            bfi x23, x21, FEX-Emu#18, FEX-Emu#8
0x0000ffffa9280210  b36602d7            bfi x23, x22, FEX-Emu#26, FEX-Emu#1
0x0000ffffa9280214  b3651eb7            bfi x23, x21, FEX-Emu#27, FEX-Emu#8
0x0000ffffa9280218  b35d02d7            bfi x23, x22, FEX-Emu#35, FEX-Emu#1
0x0000ffffa928021c  aa1703e0            mov x0, x23
0x0000ffffa9280220  b35c1ea0            bfi x0, x21, FEX-Emu#36, FEX-Emu#8
0x0000ffffa9280224  aa0003f5            mov x21, x0
0x0000ffffa9280228  b35402d5            bfi x21, x22, FEX-Emu#44, FEX-Emu#1
0x0000ffffa928022c  9ad426b7            lsr x23, x21, x20
0x0000ffffa9280230  b3401ee7            bfxil x7, x23, #0, FEX-Emu#8
0x0000ffffa9280234  51000698            sub w24, w20, #0x1 (1)
0x0000ffffa9280238  9ad826b5            lsr x21, x21, x24
0x0000ffffa928023c  d34002b5            ubfx x21, x21, #0, FEX-Emu#1
0x0000ffffa9280240  7100069f            cmp w20, #0x1 (1)
0x0000ffffa9280244  9a9622b4            csel x20, x21, x22, hs
0x0000ffffa9280248  390b0394            strb w20, [x28, FEX-Emu#704]
0x0000ffffa928024c  d3471ef4            ubfx x20, x23, FEX-Emu#7, FEX-Emu#1
0x0000ffffa9280250  d3461af5            ubfx x21, x23, FEX-Emu#6, FEX-Emu#1
0x0000ffffa9280254  ca150294            eor x20, x20, x21
0x0000ffffa9280258  390b2f94            strb w20, [x28, FEX-Emu#715]
0x0000ffffa928025c  58000040            ldr x0, pc+8 (addr 0xffffa9280264)
0x0000ffffa9280260  d63f0000            blr x0
0x0000ffffa9280264  bf062128            unallocated (Unallocated)
0x0000ffffa9280268  0000ffff            udf #0xffff
0x0000ffffa928026c  00010022            unallocated (Unallocated)
0x0000ffffa9280270  00000000            udf #0x0
[DEBUG] RIP: 0x10020
[DEBUG] Guest Code instructions: 1
[DEBUG] Host Code instructions: 38
[DEBUG] Blow-up Amt: 38x

After 8-bit:
0x0000ffff9cc801e0  10ffffe0    adr x0, #-0x4 (addr 0xffff9cc801dc)
0x0000ffff9cc801e4  f9005f80    str x0, [x28, FEX-Emu#184]
0x0000ffff9cc801e8  d3401cb4    uxtb x20, w5
0x0000ffff9cc801ec  d3401cf5    uxtb x21, w7
0x0000ffff9cc801f0  394b0396    ldrb w22, [x28, FEX-Emu#704]
0x0000ffff9cc801f4  12001294    and w20, w20, #0x1f
0x0000ffff9cc801f8  b37802d5    bfi x21, x22, FEX-Emu#8, FEX-Emu#1
0x0000ffff9cc801fc  b37722b5    bfi x21, x21, FEX-Emu#9, FEX-Emu#9
0x0000ffff9cc80200  b36e46b5    bfi x21, x21, FEX-Emu#18, FEX-Emu#18
0x0000ffff9cc80204  b3778eb5    bfi x21, x21, FEX-Emu#9, FEX-Emu#36
0x0000ffff9cc80208  9ad426b7    lsr x23, x21, x20
0x0000ffff9cc8020c  b3401ee7    bfxil x7, x23, #0, FEX-Emu#8
0x0000ffff9cc80210  51000698    sub w24, w20, #0x1 (1)
0x0000ffff9cc80214  9ad826b5    lsr x21, x21, x24
0x0000ffff9cc80218  d34002b5    ubfx x21, x21, #0, FEX-Emu#1
0x0000ffff9cc8021c  7100069f    cmp w20, #0x1 (1)
0x0000ffff9cc80220  9a9622b4    csel x20, x21, x22, hs
0x0000ffff9cc80224  390b0394    strb w20, [x28, FEX-Emu#704]
0x0000ffff9cc80228  d3471ef4    ubfx x20, x23, FEX-Emu#7, FEX-Emu#1
0x0000ffff9cc8022c  d3461af5    ubfx x21, x23, FEX-Emu#6, FEX-Emu#1
0x0000ffff9cc80230  ca150294    eor x20, x20, x21
0x0000ffff9cc80234  390b2f94    strb w20, [x28, FEX-Emu#715]
0x0000ffff9cc80238  58000040    ldr x0, pc+8 (addr 0xffff9cc80240)
0x0000ffff9cc8023c  d63f0000    blr x0
0x0000ffff9cc80240  b2a75128    unallocated (Unallocated)
0x0000ffff9cc80244  0000ffff    udf #0xffff
0x0000ffff9cc80248  00010022    unallocated (Unallocated)
0x0000ffff9cc8024c  00000000    udf #0x0
[DEBUG] RIP: 0x10020
[DEBUG] Guest Code instructions: 1
[DEBUG] Host Code instructions: 29
[DEBUG] Blow-up Amt: 29x
```
Sonicadvance1 added a commit to Sonicadvance1/FEX that referenced this pull request Jul 18, 2023
The BFI cascades in this particular instruction weren't optimal.
Biggest improvement is the 8-bit version, while the 16-bit version gets
a minor improvement.

8-bit instruction count reduced from 38 to 29.
16-bit instruction count reduced from 34 to 28.

RCL can have a similar optimization done to it.
```asm
Before 16-bit:
0x0000ffff80a801e0  10ffffe0    adr x0, #-0x4 (addr 0xffff80a801dc)
0x0000ffff80a801e4  f9005f80    str x0, [x28, FEX-Emu#184]
0x0000ffff80a801e8  d3403cb4    uxth x20, w5
0x0000ffff80a801ec  d3403cf5    uxth x21, w7
0x0000ffff80a801f0  394b0396    ldrb w22, [x28, FEX-Emu#704]
0x0000ffff80a801f4  12001294    and w20, w20, #0x1f
0x0000ffff80a801f8  d2800017    mov x23, #0x0
0x0000ffff80a801fc  b3403eb7    bfxil x23, x21, #0, FEX-Emu#16
0x0000ffff80a80200  b37002d7    bfi x23, x22, FEX-Emu#16, FEX-Emu#1
0x0000ffff80a80204  b36f3eb7    bfi x23, x21, FEX-Emu#17, FEX-Emu#16
0x0000ffff80a80208  b35f02d7    bfi x23, x22, FEX-Emu#33, FEX-Emu#1
0x0000ffff80a8020c  aa1703e0    mov x0, x23
0x0000ffff80a80210  b35e3ea0    bfi x0, x21, FEX-Emu#34, FEX-Emu#16
0x0000ffff80a80214  aa0003f5    mov x21, x0
0x0000ffff80a80218  b34e02d5    bfi x21, x22, FEX-Emu#50, FEX-Emu#1
0x0000ffff80a8021c  9ad426b7    lsr x23, x21, x20
0x0000ffff80a80220  b3403ee7    bfxil x7, x23, #0, FEX-Emu#16
0x0000ffff80a80224  51000698    sub w24, w20, #0x1 (1)
0x0000ffff80a80228  9ad826b5    lsr x21, x21, x24
0x0000ffff80a8022c  d34002b5    ubfx x21, x21, #0, FEX-Emu#1
0x0000ffff80a80230  7100069f    cmp w20, #0x1 (1)
0x0000ffff80a80234  9a9622b4    csel x20, x21, x22, hs
0x0000ffff80a80238  390b0394    strb w20, [x28, FEX-Emu#704]
0x0000ffff80a8023c  d34f3ef4    ubfx x20, x23, FEX-Emu#15, FEX-Emu#1
0x0000ffff80a80240  d34e3af5    ubfx x21, x23, FEX-Emu#14, FEX-Emu#1
0x0000ffff80a80244  ca150294    eor x20, x20, x21
0x0000ffff80a80248  390b2f94    strb w20, [x28, FEX-Emu#715]
0x0000ffff80a8024c  58000040    ldr x0, pc+8 (addr 0xffff80a80254)
0x0000ffff80a80250  d63f0000    blr x0
0x0000ffff80a80254  967da128    bl #-0x6097b60 (addr 0xffff7a9e86f4)
0x0000ffff80a80258  0000ffff    udf #0xffff
0x0000ffff80a8025c  00010023    unallocated (Unallocated)
0x0000ffff80a80260  00000000    udf #0x0
[DEBUG] RIP: 0x10020
[DEBUG] Guest Code instructions: 1
[DEBUG] Host Code instructions: 34
[DEBUG] Blow-up Amt: 34x

After 16-bit:
0x0000ffffa7c801e0  10ffffe0            adr x0, #-0x4 (addr 0xffffa7c801dc)
0x0000ffffa7c801e4  f9005f80            str x0, [x28, FEX-Emu#184]
0x0000ffffa7c801e8  d3403cb4            uxth x20, w5
0x0000ffffa7c801ec  d3403cf5            uxth x21, w7
0x0000ffffa7c801f0  394b0396            ldrb w22, [x28, FEX-Emu#704]
0x0000ffffa7c801f4  12001294            and w20, w20, #0x1f
0x0000ffffa7c801f8  b37002d5            bfi x21, x22, FEX-Emu#16, FEX-Emu#1
0x0000ffffa7c801fc  b36f42b5            bfi x21, x21, FEX-Emu#17, FEX-Emu#17
0x0000ffffa7c80200  b35e42b5            bfi x21, x21, FEX-Emu#34, FEX-Emu#17
0x0000ffffa7c80204  9ad426b7            lsr x23, x21, x20
0x0000ffffa7c80208  b3403ee7            bfxil x7, x23, #0, FEX-Emu#16
0x0000ffffa7c8020c  51000698            sub w24, w20, #0x1 (1)
0x0000ffffa7c80210  9ad826b5            lsr x21, x21, x24
0x0000ffffa7c80214  d34002b5            ubfx x21, x21, #0, FEX-Emu#1
0x0000ffffa7c80218  7100069f            cmp w20, #0x1 (1)
0x0000ffffa7c8021c  9a9622b4            csel x20, x21, x22, hs
0x0000ffffa7c80220  390b0394            strb w20, [x28, FEX-Emu#704]
0x0000ffffa7c80224  d34f3ef4            ubfx x20, x23, FEX-Emu#15, FEX-Emu#1
0x0000ffffa7c80228  d34e3af5            ubfx x21, x23, FEX-Emu#14, FEX-Emu#1
0x0000ffffa7c8022c  ca150294            eor x20, x20, x21
0x0000ffffa7c80230  390b2f94            strb w20, [x28, FEX-Emu#715]
0x0000ffffa7c80234  58000040            ldr x0, pc+8 (addr 0xffffa7c8023c)
0x0000ffffa7c80238  d63f0000            blr x0
0x0000ffffa7c8023c  bd9cc128            unallocated (Unallocated)
0x0000ffffa7c80240  0000ffff            udf #0xffff
0x0000ffffa7c80244  00010023            unallocated (Unallocated)
0x0000ffffa7c80248  00000000            udf #0x0
[DEBUG] RIP: 0x10020
[DEBUG] Guest Code instructions: 1
[DEBUG] Host Code instructions: 28
[DEBUG] Blow-up Amt: 28x

Before 8-bit:
0x0000ffffa92801e0  10ffffe0            adr x0, #-0x4 (addr 0xffffa92801dc)
0x0000ffffa92801e4  f9005f80            str x0, [x28, FEX-Emu#184]
0x0000ffffa92801e8  d3401cb4            uxtb x20, w5
0x0000ffffa92801ec  d3401cf5            uxtb x21, w7
0x0000ffffa92801f0  394b0396            ldrb w22, [x28, FEX-Emu#704]
0x0000ffffa92801f4  12001294            and w20, w20, #0x1f
0x0000ffffa92801f8  d2800017            mov x23, #0x0
0x0000ffffa92801fc  b3401eb7            bfxil x23, x21, #0, FEX-Emu#8
0x0000ffffa9280200  b37802d7            bfi x23, x22, FEX-Emu#8, FEX-Emu#1
0x0000ffffa9280204  b3771eb7            bfi x23, x21, FEX-Emu#9, FEX-Emu#8
0x0000ffffa9280208  b36f02d7            bfi x23, x22, FEX-Emu#17, FEX-Emu#1
0x0000ffffa928020c  b36e1eb7            bfi x23, x21, FEX-Emu#18, FEX-Emu#8
0x0000ffffa9280210  b36602d7            bfi x23, x22, FEX-Emu#26, FEX-Emu#1
0x0000ffffa9280214  b3651eb7            bfi x23, x21, FEX-Emu#27, FEX-Emu#8
0x0000ffffa9280218  b35d02d7            bfi x23, x22, FEX-Emu#35, FEX-Emu#1
0x0000ffffa928021c  aa1703e0            mov x0, x23
0x0000ffffa9280220  b35c1ea0            bfi x0, x21, FEX-Emu#36, FEX-Emu#8
0x0000ffffa9280224  aa0003f5            mov x21, x0
0x0000ffffa9280228  b35402d5            bfi x21, x22, FEX-Emu#44, FEX-Emu#1
0x0000ffffa928022c  9ad426b7            lsr x23, x21, x20
0x0000ffffa9280230  b3401ee7            bfxil x7, x23, #0, FEX-Emu#8
0x0000ffffa9280234  51000698            sub w24, w20, #0x1 (1)
0x0000ffffa9280238  9ad826b5            lsr x21, x21, x24
0x0000ffffa928023c  d34002b5            ubfx x21, x21, #0, FEX-Emu#1
0x0000ffffa9280240  7100069f            cmp w20, #0x1 (1)
0x0000ffffa9280244  9a9622b4            csel x20, x21, x22, hs
0x0000ffffa9280248  390b0394            strb w20, [x28, FEX-Emu#704]
0x0000ffffa928024c  d3471ef4            ubfx x20, x23, FEX-Emu#7, FEX-Emu#1
0x0000ffffa9280250  d3461af5            ubfx x21, x23, FEX-Emu#6, FEX-Emu#1
0x0000ffffa9280254  ca150294            eor x20, x20, x21
0x0000ffffa9280258  390b2f94            strb w20, [x28, FEX-Emu#715]
0x0000ffffa928025c  58000040            ldr x0, pc+8 (addr 0xffffa9280264)
0x0000ffffa9280260  d63f0000            blr x0
0x0000ffffa9280264  bf062128            unallocated (Unallocated)
0x0000ffffa9280268  0000ffff            udf #0xffff
0x0000ffffa928026c  00010022            unallocated (Unallocated)
0x0000ffffa9280270  00000000            udf #0x0
[DEBUG] RIP: 0x10020
[DEBUG] Guest Code instructions: 1
[DEBUG] Host Code instructions: 38
[DEBUG] Blow-up Amt: 38x

After 8-bit:
0x0000ffff9cc801e0  10ffffe0    adr x0, #-0x4 (addr 0xffff9cc801dc)
0x0000ffff9cc801e4  f9005f80    str x0, [x28, FEX-Emu#184]
0x0000ffff9cc801e8  d3401cb4    uxtb x20, w5
0x0000ffff9cc801ec  d3401cf5    uxtb x21, w7
0x0000ffff9cc801f0  394b0396    ldrb w22, [x28, FEX-Emu#704]
0x0000ffff9cc801f4  12001294    and w20, w20, #0x1f
0x0000ffff9cc801f8  b37802d5    bfi x21, x22, FEX-Emu#8, FEX-Emu#1
0x0000ffff9cc801fc  b37722b5    bfi x21, x21, FEX-Emu#9, FEX-Emu#9
0x0000ffff9cc80200  b36e46b5    bfi x21, x21, FEX-Emu#18, FEX-Emu#18
0x0000ffff9cc80204  b3778eb5    bfi x21, x21, FEX-Emu#9, FEX-Emu#36
0x0000ffff9cc80208  9ad426b7    lsr x23, x21, x20
0x0000ffff9cc8020c  b3401ee7    bfxil x7, x23, #0, FEX-Emu#8
0x0000ffff9cc80210  51000698    sub w24, w20, #0x1 (1)
0x0000ffff9cc80214  9ad826b5    lsr x21, x21, x24
0x0000ffff9cc80218  d34002b5    ubfx x21, x21, #0, FEX-Emu#1
0x0000ffff9cc8021c  7100069f    cmp w20, #0x1 (1)
0x0000ffff9cc80220  9a9622b4    csel x20, x21, x22, hs
0x0000ffff9cc80224  390b0394    strb w20, [x28, FEX-Emu#704]
0x0000ffff9cc80228  d3471ef4    ubfx x20, x23, FEX-Emu#7, FEX-Emu#1
0x0000ffff9cc8022c  d3461af5    ubfx x21, x23, FEX-Emu#6, FEX-Emu#1
0x0000ffff9cc80230  ca150294    eor x20, x20, x21
0x0000ffff9cc80234  390b2f94    strb w20, [x28, FEX-Emu#715]
0x0000ffff9cc80238  58000040    ldr x0, pc+8 (addr 0xffff9cc80240)
0x0000ffff9cc8023c  d63f0000    blr x0
0x0000ffff9cc80240  b2a75128    unallocated (Unallocated)
0x0000ffff9cc80244  0000ffff    udf #0xffff
0x0000ffff9cc80248  00010022    unallocated (Unallocated)
0x0000ffff9cc8024c  00000000    udf #0x0
[DEBUG] RIP: 0x10020
[DEBUG] Guest Code instructions: 1
[DEBUG] Host Code instructions: 29
[DEBUG] Blow-up Amt: 29x
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants