diff --git a/unittests/ASM/Known_Failures_host b/unittests/ASM/Known_Failures_host new file mode 100644 index 0000000000..322f758bca --- /dev/null +++ b/unittests/ASM/Known_Failures_host @@ -0,0 +1 @@ +Test_X87/FXAM_Simple.asm diff --git a/unittests/ASM/X87/FXAM_Push.asm b/unittests/ASM/X87/FXAM_Push.asm index a26e039bc8..d94511f3ca 100644 --- a/unittests/ASM/X87/FXAM_Push.asm +++ b/unittests/ASM/X87/FXAM_Push.asm @@ -11,7 +11,7 @@ mov rdx, 0xe0000000 ; This behaviour was seen around Wine 32-bit libraries ; Anything doing a call to a double application would spin ; the x87 stack on to the stack looking for fxam to return empty -; Empty in this case is that C0 and C3 is set whiel C2 is not +; Empty in this case is that C0 and C3 is set while C2 is not fninit ; Fill the x87 stack diff --git a/unittests/ASM/X87/FXAM_Push_2.asm b/unittests/ASM/X87/FXAM_Push_2.asm index c6ef4ff3fd..78edde24d3 100644 --- a/unittests/ASM/X87/FXAM_Push_2.asm +++ b/unittests/ASM/X87/FXAM_Push_2.asm @@ -11,7 +11,7 @@ mov rdx, 0xe0000000 ; This behaviour was seen around Wine 32-bit libraries ; Anything doing a call to a double application would spin ; the x87 stack on to the stack looking for fxam to return empty -; Empty in this case is that C0 and C3 is set whiel C2 is not +; Empty in this case is that C0 and C3 is set while C2 is not fninit ; Empty stack to make sure we don't push anything diff --git a/unittests/ASM/X87/FXAM_Push_Simple.asm b/unittests/ASM/X87/FXAM_Push_Simple.asm new file mode 100644 index 0000000000..de7ab5086f --- /dev/null +++ b/unittests/ASM/X87/FXAM_Push_Simple.asm @@ -0,0 +1,40 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "8" + } +} +%endif + +fninit +fld1 +fld1 +fld1 +fld1 +fld1 +fld1 +fld1 +fld1 + +mov ebx, 0 + +.ExamineStack: +; Examine st(0) +fxam +fwait +; Get the results in to AX +fnstsw ax +and ax, 0x4500 +; Check for empty +cmp ax, 0x4100 +je .Done + +; Now push the x87 stack value +; We know it isn't empty +fstp st0 +fwait +inc ebx +jmp .ExamineStack +.Done: +mov eax, ebx +hlt diff --git a/unittests/ASM/X87/FXAM_Push_Simple_2.asm b/unittests/ASM/X87/FXAM_Push_Simple_2.asm new file mode 100644 index 0000000000..429a0ae30c --- /dev/null +++ b/unittests/ASM/X87/FXAM_Push_Simple_2.asm @@ -0,0 +1,51 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "8" + } +} +%endif + +mov rdx, 0xe0000000 + +; This behaviour was seen around Wine 32-bit libraries +; Anything doing a call to a double application would spin +; the x87 stack on to the stack looking for fxam to return empty +; Empty in this case is that C0 and C3 is set while C2 is not + +fninit +; Fill the x87 stack +fldz +fldz +fldz +fldz +fldz +fldz +fldz +fldz + +mov eax, 0 +mov ecx, 0 + +.ExamineStack: +; Examine st(0) +fxam +fwait +; Get the results in to AX +fnstsw ax +and ax, 0x4500 +; Check for empty +cmp ax, 0x4100 +je .Done + +; Now push the x87 stack value +; We know it isn't empty +fstp qword [rdx + rcx * 8] +fwait +inc ecx +jmp .ExamineStack + +.Done: +; Save how many we stored +mov eax, ecx +hlt diff --git a/unittests/ASM/X87/FXAM_Simple.asm b/unittests/ASM/X87/FXAM_Simple.asm new file mode 100644 index 0000000000..7e76fbaaa3 --- /dev/null +++ b/unittests/ASM/X87/FXAM_Simple.asm @@ -0,0 +1,49 @@ +;; Simpler versions of FXAM_Push* tests. +;; In hostrunner tests this will fail because we mentioned below there's no support +;; for the zero flag. In hostrunner RCX should contain 0x4000 instead of 0x400. +%ifdef CONFIG +{ + "RegData": { + "RAX": "0x6", + "RBX": "0x0400", + "RCX": "0x0400", + "RDX": "0x4100" + } +} +%endif + +mov rdx, 0xe0000000 + +fninit +;; Before adding anything to the stack, lets examine it. +;; The result should be empty. +fxam +fwait + +fnstsw ax +and ax, 0x4500 ; should be 0x4100 for zero +mov edx, eax + +fldz +fxam +fwait + +fnstsw ax +and ax, 0x4500 ; should be 0x4000 for zero, but there's no support for it at the moment, so it'll return 0x0400 as it does for a normal number. +mov ecx, eax + +fld1 +fxam +fwait + +fnstsw ax +mov ebx, eax +and ebx, 0x4500 ; should be 0x0400 for normal + +;; Top should be 6 +;; right shift status word by 11 and and with 0x7. +shr eax, 11 +and eax, 0x7 + + +hlt diff --git a/unittests/ASM/X87/Memcopy.asm b/unittests/ASM/X87/Memcopy.asm new file mode 100644 index 0000000000..47d299a203 --- /dev/null +++ b/unittests/ASM/X87/Memcopy.asm @@ -0,0 +1,21 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "0x3ff8000000000000", + "RBX": "0x3ff8000000000000" + }, + "MemoryRegions": { + "0x100000000": "4096" + } +} +%endif + +mov rdx, 0x100000000 +mov rax, 0x3ff8000000000000 ; 1.5 +mov [rdx], rax + +fld qword [rdx] +fstp qword [rdx + 8] + +mov rbx, [rdx + 8] +hlt diff --git a/unittests/ASM/x87_stack.asm b/unittests/ASM/x87_stack.asm new file mode 100644 index 0000000000..f032ca9c60 --- /dev/null +++ b/unittests/ASM/x87_stack.asm @@ -0,0 +1,26 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "0x4142434445464748", + "RBX": "0" + } +} +%endif + +lea rax, [rel .data] +lea rbx, [rel .data_mov] + +fld qword [rax] +fstp qword [rbx] + +mov rax, [rbx] +mov rbx, [rbx + 8] +hlt + +.data: +dq 0x4142434445464748 +dq 0x5152535455565758 + +.data_mov: +dq 0 +dq 0 diff --git a/unittests/FEXLinuxTests/Known_Failures_Host b/unittests/FEXLinuxTests/Known_Failures_Host index e69de29bb2..81acd73a95 100644 --- a/unittests/FEXLinuxTests/Known_Failures_Host +++ b/unittests/FEXLinuxTests/Known_Failures_Host @@ -0,0 +1,2 @@ +## Unable to support zero flag +FXAM_Simple.asm diff --git a/unittests/InstructionCountCI/FlagM/x87.json b/unittests/InstructionCountCI/FlagM/x87.json index 51bb0bddf3..7db80f383e 100644 --- a/unittests/InstructionCountCI/FlagM/x87.json +++ b/unittests/InstructionCountCI/FlagM/x87.json @@ -17853,6 +17853,498 @@ "strb w20, [x28, #1019]", "strb w21, [x28, #1298]" ] + }, + "memcpy4_32": { + "ExpectedInstructionCount": 231, + "x86Insts": [ + "fld dword [rax]", + "fstp dword [rdx]", + "fld dword [rax + 4]", + "fstp dword [rdx + 4]", + "fld dword [rax + 8]", + "fstp dword [rdx + 8]", + "fld dword [rax + 12]", + "fstp dword [rdx + 12]" + ], + "ExpectedArm64ASM": [ + "ldr s2, [x4]", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "fmov s0, s2", + "ldrh w0, [x28, #1296]", + "ldr x1, [x28, #1424]", + "blr x1", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "eor v2.16b, v2.16b, v2.16b", + "mov v2.d[0], x0", + "mov v2.h[4], w1", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "ldrh w0, [x28, #1296]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "ldr x3, [x28, #1440]", + "blr x3", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "fmov s2, s0", + "str s2, [x6]", + "ldr s2, [x4, #4]", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "fmov s0, s2", + "ldrh w0, [x28, #1296]", + "ldr x1, [x28, #1424]", + "blr x1", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "eor v2.16b, v2.16b, v2.16b", + "mov v2.d[0], x0", + "mov v2.h[4], w1", + "add x20, x6, #0x4 (4)", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "ldrh w0, [x28, #1296]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "ldr x3, [x28, #1440]", + "blr x3", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "fmov s2, s0", + "str s2, [x20]", + "ldr s2, [x4, #8]", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "fmov s0, s2", + "ldrh w0, [x28, #1296]", + "ldr x1, [x28, #1424]", + "blr x1", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "eor v2.16b, v2.16b, v2.16b", + "mov v2.d[0], x0", + "mov v2.h[4], w1", + "add x20, x6, #0x8 (8)", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "ldrh w0, [x28, #1296]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "ldr x3, [x28, #1440]", + "blr x3", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "fmov s2, s0", + "str s2, [x20]", + "ldr s2, [x4, #12]", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "fmov s0, s2", + "ldrh w0, [x28, #1296]", + "ldr x1, [x28, #1424]", + "blr x1", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "eor v2.16b, v2.16b, v2.16b", + "mov v2.d[0], x0", + "mov v2.h[4], w1", + "add x20, x6, #0xc (12)", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "ldrh w0, [x28, #1296]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "ldr x3, [x28, #1440]", + "blr x3", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "fmov s2, s0", + "str s2, [x20]", + "ldrb w20, [x28, #1019]", + "add w20, w20, #0x7 (7)", + "and w20, w20, #0x7", + "ldrb w21, [x28, #1298]", + "mov w22, #0x1", + "lsl w20, w22, w20", + "bic w20, w21, w20", + "strb w20, [x28, #1298]" + ] + }, + "memcpy4_64": { + "ExpectedInstructionCount": 231, + "x86Insts": [ + "fld qword [rax]", + "fstp qword [rdx]", + "fld qword [rax + 8]", + "fstp qword [rdx + 8]", + "fld qword [rax + 16]", + "fstp qword [rdx + 16]", + "fld qword [rax + 32]", + "fstp qword [rdx + 32]" + ], + "ExpectedArm64ASM": [ + "ldr d2, [x4]", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "mov v0.8b, v2.8b", + "ldrh w0, [x28, #1296]", + "ldr x1, [x28, #1432]", + "blr x1", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "eor v2.16b, v2.16b, v2.16b", + "mov v2.d[0], x0", + "mov v2.h[4], w1", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "ldrh w0, [x28, #1296]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "ldr x3, [x28, #1448]", + "blr x3", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "mov v2.8b, v0.8b", + "str d2, [x6]", + "ldr d2, [x4, #8]", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "mov v0.8b, v2.8b", + "ldrh w0, [x28, #1296]", + "ldr x1, [x28, #1432]", + "blr x1", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "eor v2.16b, v2.16b, v2.16b", + "mov v2.d[0], x0", + "mov v2.h[4], w1", + "add x20, x6, #0x8 (8)", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "ldrh w0, [x28, #1296]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "ldr x3, [x28, #1448]", + "blr x3", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "mov v2.8b, v0.8b", + "str d2, [x20]", + "ldr d2, [x4, #16]", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "mov v0.8b, v2.8b", + "ldrh w0, [x28, #1296]", + "ldr x1, [x28, #1432]", + "blr x1", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "eor v2.16b, v2.16b, v2.16b", + "mov v2.d[0], x0", + "mov v2.h[4], w1", + "add x20, x6, #0x10 (16)", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "ldrh w0, [x28, #1296]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "ldr x3, [x28, #1448]", + "blr x3", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "mov v2.8b, v0.8b", + "str d2, [x20]", + "ldr d2, [x4, #32]", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "mov v0.8b, v2.8b", + "ldrh w0, [x28, #1296]", + "ldr x1, [x28, #1432]", + "blr x1", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "eor v2.16b, v2.16b, v2.16b", + "mov v2.d[0], x0", + "mov v2.h[4], w1", + "add x20, x6, #0x20 (32)", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "ldrh w0, [x28, #1296]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "ldr x3, [x28, #1448]", + "blr x3", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "mov v2.8b, v0.8b", + "str d2, [x20]", + "ldrb w20, [x28, #1019]", + "add w20, w20, #0x7 (7)", + "and w20, w20, #0x7", + "ldrb w21, [x28, #1298]", + "mov w22, #0x1", + "lsl w20, w22, w20", + "bic w20, w21, w20", + "strb w20, [x28, #1298]" + ] } } } diff --git a/unittests/InstructionCountCI/FlagM/x87_f64.json b/unittests/InstructionCountCI/FlagM/x87_f64.json index 84fe07d0c7..4acbd9ee91 100644 --- a/unittests/InstructionCountCI/FlagM/x87_f64.json +++ b/unittests/InstructionCountCI/FlagM/x87_f64.json @@ -10274,6 +10274,82 @@ "strb w20, [x28, #1019]", "strb w21, [x28, #1298]" ] + }, + "memcpy4_32": { + "ExpectedInstructionCount": 27, + "x86Insts": [ + "fld dword [rax]", + "fstp dword [rdx]", + "fld dword [rax + 4]", + "fstp dword [rdx + 4]", + "fld dword [rax + 8]", + "fstp dword [rdx + 8]", + "fld dword [rax + 12]", + "fstp dword [rdx + 12]" + ], + "ExpectedArm64ASM": [ + "ldr s2, [x4]", + "fcvt d2, s2", + "fcvt s2, d2", + "str s2, [x6]", + "ldr s2, [x4, #4]", + "fcvt d2, s2", + "add x20, x6, #0x4 (4)", + "fcvt s2, d2", + "str s2, [x20]", + "ldr s2, [x4, #8]", + "fcvt d2, s2", + "add x20, x6, #0x8 (8)", + "fcvt s2, d2", + "str s2, [x20]", + "ldr s2, [x4, #12]", + "fcvt d2, s2", + "add x20, x6, #0xc (12)", + "fcvt s2, d2", + "str s2, [x20]", + "ldrb w20, [x28, #1019]", + "add w20, w20, #0x7 (7)", + "and w20, w20, #0x7", + "ldrb w21, [x28, #1298]", + "mov w22, #0x1", + "lsl w20, w22, w20", + "bic w20, w21, w20", + "strb w20, [x28, #1298]" + ] + }, + "memcpy4_64": { + "ExpectedInstructionCount": 19, + "x86Insts": [ + "fld qword [rax]", + "fstp qword [rdx]", + "fld qword [rax + 8]", + "fstp qword [rdx + 8]", + "fld qword [rax + 16]", + "fstp qword [rdx + 16]", + "fld qword [rax + 32]", + "fstp qword [rdx + 32]" + ], + "ExpectedArm64ASM": [ + "ldr d2, [x4]", + "str d2, [x6]", + "ldr d2, [x4, #8]", + "add x20, x6, #0x8 (8)", + "str d2, [x20]", + "ldr d2, [x4, #16]", + "add x20, x6, #0x10 (16)", + "str d2, [x20]", + "ldr d2, [x4, #32]", + "add x20, x6, #0x20 (32)", + "str d2, [x20]", + "ldrb w20, [x28, #1019]", + "add w20, w20, #0x7 (7)", + "and w20, w20, #0x7", + "ldrb w21, [x28, #1298]", + "mov w22, #0x1", + "lsl w20, w22, w20", + "bic w20, w21, w20", + "strb w20, [x28, #1298]" + ] } } } diff --git a/unittests/InstructionCountCI/x87.json b/unittests/InstructionCountCI/x87.json index 8d20b60613..6680bb2a1c 100644 --- a/unittests/InstructionCountCI/x87.json +++ b/unittests/InstructionCountCI/x87.json @@ -17884,6 +17884,498 @@ "msr nzcv, x22", "strb w21, [x28, #1298]" ] + }, + "memcpy4_32": { + "ExpectedInstructionCount": 231, + "x86Insts": [ + "fld dword [rax]", + "fstp dword [rdx]", + "fld dword [rax + 4]", + "fstp dword [rdx + 4]", + "fld dword [rax + 8]", + "fstp dword [rdx + 8]", + "fld dword [rax + 12]", + "fstp dword [rdx + 12]" + ], + "ExpectedArm64ASM": [ + "ldr s2, [x4]", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "fmov s0, s2", + "ldrh w0, [x28, #1296]", + "ldr x1, [x28, #1424]", + "blr x1", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "eor v2.16b, v2.16b, v2.16b", + "mov v2.d[0], x0", + "mov v2.h[4], w1", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "ldrh w0, [x28, #1296]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "ldr x3, [x28, #1440]", + "blr x3", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "fmov s2, s0", + "str s2, [x6]", + "ldr s2, [x4, #4]", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "fmov s0, s2", + "ldrh w0, [x28, #1296]", + "ldr x1, [x28, #1424]", + "blr x1", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "eor v2.16b, v2.16b, v2.16b", + "mov v2.d[0], x0", + "mov v2.h[4], w1", + "add x20, x6, #0x4 (4)", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "ldrh w0, [x28, #1296]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "ldr x3, [x28, #1440]", + "blr x3", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "fmov s2, s0", + "str s2, [x20]", + "ldr s2, [x4, #8]", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "fmov s0, s2", + "ldrh w0, [x28, #1296]", + "ldr x1, [x28, #1424]", + "blr x1", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "eor v2.16b, v2.16b, v2.16b", + "mov v2.d[0], x0", + "mov v2.h[4], w1", + "add x20, x6, #0x8 (8)", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "ldrh w0, [x28, #1296]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "ldr x3, [x28, #1440]", + "blr x3", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "fmov s2, s0", + "str s2, [x20]", + "ldr s2, [x4, #12]", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "fmov s0, s2", + "ldrh w0, [x28, #1296]", + "ldr x1, [x28, #1424]", + "blr x1", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "eor v2.16b, v2.16b, v2.16b", + "mov v2.d[0], x0", + "mov v2.h[4], w1", + "add x20, x6, #0xc (12)", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "ldrh w0, [x28, #1296]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "ldr x3, [x28, #1440]", + "blr x3", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "fmov s2, s0", + "str s2, [x20]", + "ldrb w20, [x28, #1019]", + "add w20, w20, #0x7 (7)", + "and w20, w20, #0x7", + "ldrb w21, [x28, #1298]", + "mov w22, #0x1", + "lsl w20, w22, w20", + "bic w20, w21, w20", + "strb w20, [x28, #1298]" + ] + }, + "memcpy4_64": { + "ExpectedInstructionCount": 231, + "x86Insts": [ + "fld qword [rax]", + "fstp qword [rdx]", + "fld qword [rax + 8]", + "fstp qword [rdx + 8]", + "fld qword [rax + 16]", + "fstp qword [rdx + 16]", + "fld qword [rax + 32]", + "fstp qword [rdx + 32]" + ], + "ExpectedArm64ASM": [ + "ldr d2, [x4]", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "mov v0.8b, v2.8b", + "ldrh w0, [x28, #1296]", + "ldr x1, [x28, #1432]", + "blr x1", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "eor v2.16b, v2.16b, v2.16b", + "mov v2.d[0], x0", + "mov v2.h[4], w1", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "ldrh w0, [x28, #1296]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "ldr x3, [x28, #1448]", + "blr x3", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "mov v2.8b, v0.8b", + "str d2, [x6]", + "ldr d2, [x4, #8]", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "mov v0.8b, v2.8b", + "ldrh w0, [x28, #1296]", + "ldr x1, [x28, #1432]", + "blr x1", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "eor v2.16b, v2.16b, v2.16b", + "mov v2.d[0], x0", + "mov v2.h[4], w1", + "add x20, x6, #0x8 (8)", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "ldrh w0, [x28, #1296]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "ldr x3, [x28, #1448]", + "blr x3", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "mov v2.8b, v0.8b", + "str d2, [x20]", + "ldr d2, [x4, #16]", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "mov v0.8b, v2.8b", + "ldrh w0, [x28, #1296]", + "ldr x1, [x28, #1432]", + "blr x1", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "eor v2.16b, v2.16b, v2.16b", + "mov v2.d[0], x0", + "mov v2.h[4], w1", + "add x20, x6, #0x10 (16)", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "ldrh w0, [x28, #1296]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "ldr x3, [x28, #1448]", + "blr x3", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "mov v2.8b, v0.8b", + "str d2, [x20]", + "ldr d2, [x4, #32]", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "mov v0.8b, v2.8b", + "ldrh w0, [x28, #1296]", + "ldr x1, [x28, #1432]", + "blr x1", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "eor v2.16b, v2.16b, v2.16b", + "mov v2.d[0], x0", + "mov v2.h[4], w1", + "add x20, x6, #0x20 (32)", + "mrs x0, nzcv", + "str w0, [x28, #1000]", + "stp x4, x5, [x28, #280]", + "stp x6, x7, [x28, #296]", + "str x8, [x28, #312]", + "stp x16, x17, [x28, #376]", + "sub sp, sp, #0x70 (112)", + "mov x0, sp", + "st1 {v2.2d, v3.2d}, [x0], #32", + "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", + "str x30, [x0], #16", + "ldrh w0, [x28, #1296]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "ldr x3, [x28, #1448]", + "blr x3", + "ldr w4, [x28, #1000]", + "msr nzcv, x4", + "ldp x4, x5, [x28, #280]", + "ldp x6, x7, [x28, #296]", + "ldr x8, [x28, #312]", + "ldp x16, x17, [x28, #376]", + "ld1 {v2.2d, v3.2d}, [sp], #32", + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "mov v2.8b, v0.8b", + "str d2, [x20]", + "ldrb w20, [x28, #1019]", + "add w20, w20, #0x7 (7)", + "and w20, w20, #0x7", + "ldrb w21, [x28, #1298]", + "mov w22, #0x1", + "lsl w20, w22, w20", + "bic w20, w21, w20", + "strb w20, [x28, #1298]" + ] } } } diff --git a/unittests/InstructionCountCI/x87_f64.json b/unittests/InstructionCountCI/x87_f64.json index f0b5d9010b..81ebdb93cc 100644 --- a/unittests/InstructionCountCI/x87_f64.json +++ b/unittests/InstructionCountCI/x87_f64.json @@ -10508,6 +10508,82 @@ "msr nzcv, x22", "strb w21, [x28, #1298]" ] + }, + "memcpy4_32": { + "ExpectedInstructionCount": 27, + "x86Insts": [ + "fld dword [rax]", + "fstp dword [rdx]", + "fld dword [rax + 4]", + "fstp dword [rdx + 4]", + "fld dword [rax + 8]", + "fstp dword [rdx + 8]", + "fld dword [rax + 12]", + "fstp dword [rdx + 12]" + ], + "ExpectedArm64ASM": [ + "ldr s2, [x4]", + "fcvt d2, s2", + "fcvt s2, d2", + "str s2, [x6]", + "ldr s2, [x4, #4]", + "fcvt d2, s2", + "add x20, x6, #0x4 (4)", + "fcvt s2, d2", + "str s2, [x20]", + "ldr s2, [x4, #8]", + "fcvt d2, s2", + "add x20, x6, #0x8 (8)", + "fcvt s2, d2", + "str s2, [x20]", + "ldr s2, [x4, #12]", + "fcvt d2, s2", + "add x20, x6, #0xc (12)", + "fcvt s2, d2", + "str s2, [x20]", + "ldrb w20, [x28, #1019]", + "add w20, w20, #0x7 (7)", + "and w20, w20, #0x7", + "ldrb w21, [x28, #1298]", + "mov w22, #0x1", + "lsl w20, w22, w20", + "bic w20, w21, w20", + "strb w20, [x28, #1298]" + ] + }, + "memcpy4_64": { + "ExpectedInstructionCount": 19, + "x86Insts": [ + "fld qword [rax]", + "fstp qword [rdx]", + "fld qword [rax + 8]", + "fstp qword [rdx + 8]", + "fld qword [rax + 16]", + "fstp qword [rdx + 16]", + "fld qword [rax + 32]", + "fstp qword [rdx + 32]" + ], + "ExpectedArm64ASM": [ + "ldr d2, [x4]", + "str d2, [x6]", + "ldr d2, [x4, #8]", + "add x20, x6, #0x8 (8)", + "str d2, [x20]", + "ldr d2, [x4, #16]", + "add x20, x6, #0x10 (16)", + "str d2, [x20]", + "ldr d2, [x4, #32]", + "add x20, x6, #0x20 (32)", + "str d2, [x20]", + "ldrb w20, [x28, #1019]", + "add w20, w20, #0x7 (7)", + "and w20, w20, #0x7", + "ldrb w21, [x28, #1298]", + "mov w22, #0x1", + "lsl w20, w22, w20", + "bic w20, w21, w20", + "strb w20, [x28, #1298]" + ] } } }