Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
OpcodeDispatcher: Optimize 8/16-bit RCR
The BFI cascades in this particular instruction weren't optimal. Biggest improvement is the 8-bit version, while the 16-bit version gets a minor improvement. 8-bit instruction count reduced from 38 to 29. 16-bit instruction count reduced from 34 to 28. RCL can have a similar optimization done to it. ```asm Before 16-bit: 0x0000ffff80a801e0 10ffffe0 adr x0, #-0x4 (addr 0xffff80a801dc) 0x0000ffff80a801e4 f9005f80 str x0, [x28, FEX-Emu#184] 0x0000ffff80a801e8 d3403cb4 uxth x20, w5 0x0000ffff80a801ec d3403cf5 uxth x21, w7 0x0000ffff80a801f0 394b0396 ldrb w22, [x28, FEX-Emu#704] 0x0000ffff80a801f4 12001294 and w20, w20, #0x1f 0x0000ffff80a801f8 d2800017 mov x23, #0x0 0x0000ffff80a801fc b3403eb7 bfxil x23, x21, #0, FEX-Emu#16 0x0000ffff80a80200 b37002d7 bfi x23, x22, FEX-Emu#16, FEX-Emu#1 0x0000ffff80a80204 b36f3eb7 bfi x23, x21, FEX-Emu#17, FEX-Emu#16 0x0000ffff80a80208 b35f02d7 bfi x23, x22, FEX-Emu#33, FEX-Emu#1 0x0000ffff80a8020c aa1703e0 mov x0, x23 0x0000ffff80a80210 b35e3ea0 bfi x0, x21, FEX-Emu#34, FEX-Emu#16 0x0000ffff80a80214 aa0003f5 mov x21, x0 0x0000ffff80a80218 b34e02d5 bfi x21, x22, FEX-Emu#50, FEX-Emu#1 0x0000ffff80a8021c 9ad426b7 lsr x23, x21, x20 0x0000ffff80a80220 b3403ee7 bfxil x7, x23, #0, FEX-Emu#16 0x0000ffff80a80224 51000698 sub w24, w20, #0x1 (1) 0x0000ffff80a80228 9ad826b5 lsr x21, x21, x24 0x0000ffff80a8022c d34002b5 ubfx x21, x21, #0, FEX-Emu#1 0x0000ffff80a80230 7100069f cmp w20, #0x1 (1) 0x0000ffff80a80234 9a9622b4 csel x20, x21, x22, hs 0x0000ffff80a80238 390b0394 strb w20, [x28, FEX-Emu#704] 0x0000ffff80a8023c d34f3ef4 ubfx x20, x23, FEX-Emu#15, FEX-Emu#1 0x0000ffff80a80240 d34e3af5 ubfx x21, x23, FEX-Emu#14, FEX-Emu#1 0x0000ffff80a80244 ca150294 eor x20, x20, x21 0x0000ffff80a80248 390b2f94 strb w20, [x28, FEX-Emu#715] 0x0000ffff80a8024c 58000040 ldr x0, pc+8 (addr 0xffff80a80254) 0x0000ffff80a80250 d63f0000 blr x0 0x0000ffff80a80254 967da128 bl #-0x6097b60 (addr 0xffff7a9e86f4) 0x0000ffff80a80258 0000ffff udf #0xffff 0x0000ffff80a8025c 00010023 unallocated (Unallocated) 0x0000ffff80a80260 00000000 udf #0x0 [DEBUG] RIP: 0x10020 [DEBUG] Guest Code instructions: 1 [DEBUG] Host Code instructions: 34 [DEBUG] Blow-up Amt: 34x After 16-bit: 0x0000ffffa7c801e0 10ffffe0 adr x0, #-0x4 (addr 0xffffa7c801dc) 0x0000ffffa7c801e4 f9005f80 str x0, [x28, FEX-Emu#184] 0x0000ffffa7c801e8 d3403cb4 uxth x20, w5 0x0000ffffa7c801ec d3403cf5 uxth x21, w7 0x0000ffffa7c801f0 394b0396 ldrb w22, [x28, FEX-Emu#704] 0x0000ffffa7c801f4 12001294 and w20, w20, #0x1f 0x0000ffffa7c801f8 b37002d5 bfi x21, x22, FEX-Emu#16, FEX-Emu#1 0x0000ffffa7c801fc b36f42b5 bfi x21, x21, FEX-Emu#17, FEX-Emu#17 0x0000ffffa7c80200 b35e42b5 bfi x21, x21, FEX-Emu#34, FEX-Emu#17 0x0000ffffa7c80204 9ad426b7 lsr x23, x21, x20 0x0000ffffa7c80208 b3403ee7 bfxil x7, x23, #0, FEX-Emu#16 0x0000ffffa7c8020c 51000698 sub w24, w20, #0x1 (1) 0x0000ffffa7c80210 9ad826b5 lsr x21, x21, x24 0x0000ffffa7c80214 d34002b5 ubfx x21, x21, #0, FEX-Emu#1 0x0000ffffa7c80218 7100069f cmp w20, #0x1 (1) 0x0000ffffa7c8021c 9a9622b4 csel x20, x21, x22, hs 0x0000ffffa7c80220 390b0394 strb w20, [x28, FEX-Emu#704] 0x0000ffffa7c80224 d34f3ef4 ubfx x20, x23, FEX-Emu#15, FEX-Emu#1 0x0000ffffa7c80228 d34e3af5 ubfx x21, x23, FEX-Emu#14, FEX-Emu#1 0x0000ffffa7c8022c ca150294 eor x20, x20, x21 0x0000ffffa7c80230 390b2f94 strb w20, [x28, FEX-Emu#715] 0x0000ffffa7c80234 58000040 ldr x0, pc+8 (addr 0xffffa7c8023c) 0x0000ffffa7c80238 d63f0000 blr x0 0x0000ffffa7c8023c bd9cc128 unallocated (Unallocated) 0x0000ffffa7c80240 0000ffff udf #0xffff 0x0000ffffa7c80244 00010023 unallocated (Unallocated) 0x0000ffffa7c80248 00000000 udf #0x0 [DEBUG] RIP: 0x10020 [DEBUG] Guest Code instructions: 1 [DEBUG] Host Code instructions: 28 [DEBUG] Blow-up Amt: 28x Before 8-bit: 0x0000ffffa92801e0 10ffffe0 adr x0, #-0x4 (addr 0xffffa92801dc) 0x0000ffffa92801e4 f9005f80 str x0, [x28, FEX-Emu#184] 0x0000ffffa92801e8 d3401cb4 uxtb x20, w5 0x0000ffffa92801ec d3401cf5 uxtb x21, w7 0x0000ffffa92801f0 394b0396 ldrb w22, [x28, FEX-Emu#704] 0x0000ffffa92801f4 12001294 and w20, w20, #0x1f 0x0000ffffa92801f8 d2800017 mov x23, #0x0 0x0000ffffa92801fc b3401eb7 bfxil x23, x21, #0, FEX-Emu#8 0x0000ffffa9280200 b37802d7 bfi x23, x22, FEX-Emu#8, FEX-Emu#1 0x0000ffffa9280204 b3771eb7 bfi x23, x21, FEX-Emu#9, FEX-Emu#8 0x0000ffffa9280208 b36f02d7 bfi x23, x22, FEX-Emu#17, FEX-Emu#1 0x0000ffffa928020c b36e1eb7 bfi x23, x21, FEX-Emu#18, FEX-Emu#8 0x0000ffffa9280210 b36602d7 bfi x23, x22, FEX-Emu#26, FEX-Emu#1 0x0000ffffa9280214 b3651eb7 bfi x23, x21, FEX-Emu#27, FEX-Emu#8 0x0000ffffa9280218 b35d02d7 bfi x23, x22, FEX-Emu#35, FEX-Emu#1 0x0000ffffa928021c aa1703e0 mov x0, x23 0x0000ffffa9280220 b35c1ea0 bfi x0, x21, FEX-Emu#36, FEX-Emu#8 0x0000ffffa9280224 aa0003f5 mov x21, x0 0x0000ffffa9280228 b35402d5 bfi x21, x22, FEX-Emu#44, FEX-Emu#1 0x0000ffffa928022c 9ad426b7 lsr x23, x21, x20 0x0000ffffa9280230 b3401ee7 bfxil x7, x23, #0, FEX-Emu#8 0x0000ffffa9280234 51000698 sub w24, w20, #0x1 (1) 0x0000ffffa9280238 9ad826b5 lsr x21, x21, x24 0x0000ffffa928023c d34002b5 ubfx x21, x21, #0, FEX-Emu#1 0x0000ffffa9280240 7100069f cmp w20, #0x1 (1) 0x0000ffffa9280244 9a9622b4 csel x20, x21, x22, hs 0x0000ffffa9280248 390b0394 strb w20, [x28, FEX-Emu#704] 0x0000ffffa928024c d3471ef4 ubfx x20, x23, FEX-Emu#7, FEX-Emu#1 0x0000ffffa9280250 d3461af5 ubfx x21, x23, FEX-Emu#6, FEX-Emu#1 0x0000ffffa9280254 ca150294 eor x20, x20, x21 0x0000ffffa9280258 390b2f94 strb w20, [x28, FEX-Emu#715] 0x0000ffffa928025c 58000040 ldr x0, pc+8 (addr 0xffffa9280264) 0x0000ffffa9280260 d63f0000 blr x0 0x0000ffffa9280264 bf062128 unallocated (Unallocated) 0x0000ffffa9280268 0000ffff udf #0xffff 0x0000ffffa928026c 00010022 unallocated (Unallocated) 0x0000ffffa9280270 00000000 udf #0x0 [DEBUG] RIP: 0x10020 [DEBUG] Guest Code instructions: 1 [DEBUG] Host Code instructions: 38 [DEBUG] Blow-up Amt: 38x After 8-bit: 0x0000ffff9cc801e0 10ffffe0 adr x0, #-0x4 (addr 0xffff9cc801dc) 0x0000ffff9cc801e4 f9005f80 str x0, [x28, FEX-Emu#184] 0x0000ffff9cc801e8 d3401cb4 uxtb x20, w5 0x0000ffff9cc801ec d3401cf5 uxtb x21, w7 0x0000ffff9cc801f0 394b0396 ldrb w22, [x28, FEX-Emu#704] 0x0000ffff9cc801f4 12001294 and w20, w20, #0x1f 0x0000ffff9cc801f8 b37802d5 bfi x21, x22, FEX-Emu#8, FEX-Emu#1 0x0000ffff9cc801fc b37722b5 bfi x21, x21, FEX-Emu#9, FEX-Emu#9 0x0000ffff9cc80200 b36e46b5 bfi x21, x21, FEX-Emu#18, FEX-Emu#18 0x0000ffff9cc80204 b3778eb5 bfi x21, x21, FEX-Emu#9, FEX-Emu#36 0x0000ffff9cc80208 9ad426b7 lsr x23, x21, x20 0x0000ffff9cc8020c b3401ee7 bfxil x7, x23, #0, FEX-Emu#8 0x0000ffff9cc80210 51000698 sub w24, w20, #0x1 (1) 0x0000ffff9cc80214 9ad826b5 lsr x21, x21, x24 0x0000ffff9cc80218 d34002b5 ubfx x21, x21, #0, FEX-Emu#1 0x0000ffff9cc8021c 7100069f cmp w20, #0x1 (1) 0x0000ffff9cc80220 9a9622b4 csel x20, x21, x22, hs 0x0000ffff9cc80224 390b0394 strb w20, [x28, FEX-Emu#704] 0x0000ffff9cc80228 d3471ef4 ubfx x20, x23, FEX-Emu#7, FEX-Emu#1 0x0000ffff9cc8022c d3461af5 ubfx x21, x23, FEX-Emu#6, FEX-Emu#1 0x0000ffff9cc80230 ca150294 eor x20, x20, x21 0x0000ffff9cc80234 390b2f94 strb w20, [x28, FEX-Emu#715] 0x0000ffff9cc80238 58000040 ldr x0, pc+8 (addr 0xffff9cc80240) 0x0000ffff9cc8023c d63f0000 blr x0 0x0000ffff9cc80240 b2a75128 unallocated (Unallocated) 0x0000ffff9cc80244 0000ffff udf #0xffff 0x0000ffff9cc80248 00010022 unallocated (Unallocated) 0x0000ffff9cc8024c 00000000 udf #0x0 [DEBUG] RIP: 0x10020 [DEBUG] Guest Code instructions: 1 [DEBUG] Host Code instructions: 29 [DEBUG] Blow-up Amt: 29x ```
- Loading branch information