From 18885d93b718016661741781fc7d28dd91573aec Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 31 Aug 2023 22:18:25 +0200 Subject: [PATCH] aarch64: Use `RegScaled*` addressing modes (#6945) This commit adds a few cases to `amode` construction on AArch64 for using the `RegScaled*` variants of `AMode`. This won't affect wasm due to this only matching the sign-extension happening before the shift, but it should otherwise help non-wasm Cranelift use cases. Closes #6742 --- cranelift/codegen/src/isa/aarch64/inst.isle | 26 +- cranelift/codegen/src/prelude.isle | 1 + .../filetests/isa/aarch64/amodes.clif | 266 ++++++++++++++++++ 3 files changed, 291 insertions(+), 2 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index 806387d9183a..edfa31432ecf 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -3122,15 +3122,37 @@ (rule 5 (amode ty (iadd (sextend x @ (value_type $I32)) y) offset) (AMode.RegExtended (amode_add y offset) x (ExtendOp.SXTW))) +;; `RegScaled*` rules where this matches an addition of an "index register" to a +;; base register. The index register is shifted by the size of the type loaded +;; in bytes to enable this mode matching. +;; +;; Note that this can additionally bundle an extending operation but the +;; extension must happen before the shift. This will pattern-match the shift +;; first and then if that succeeds afterwards try to find an extend. +(rule 6 (amode ty (iadd x (ishl y (iconst (u64_from_imm64 n)))) offset) + (if-let $true (u64_eq (ty_bytes ty) (u64_shl 1 n))) + (amode_reg_scaled (amode_add x offset) y ty)) +(rule 7 (amode ty (iadd (ishl y (iconst (u64_from_imm64 n))) x) offset) + (if-let $true (u64_eq (ty_bytes ty) (u64_shl 1 n))) + (amode_reg_scaled (amode_add x offset) y ty)) + +(decl amode_reg_scaled (Reg Value Type) AMode) +(rule 0 (amode_reg_scaled base index ty) + (AMode.RegScaled base index ty)) +(rule 1 (amode_reg_scaled base (uextend index @ (value_type $I32)) ty) + (AMode.RegScaledExtended base index ty (ExtendOp.UXTW))) +(rule 2 (amode_reg_scaled base (sextend index @ (value_type $I32)) ty) + (AMode.RegScaledExtended base index ty (ExtendOp.SXTW))) + ;; Small optimizations where constants found in `iadd` are folded into the ;; `offset` immediate. ;; ;; NB: this should probably be done by mid-end optimizations rather than here ;; in the backend, but currently Cranelift doesn't do that. -(rule 6 (amode ty (iadd x (iconst (simm32 y))) offset) +(rule 8 (amode ty (iadd x (iconst (simm32 y))) offset) (if-let new_offset (s32_add_fallible y offset)) (amode ty x new_offset)) -(rule 7 (amode ty (iadd (iconst (simm32 x)) y) offset) +(rule 9 (amode ty (iadd (iconst (simm32 x)) y) offset) (if-let new_offset (s32_add_fallible x offset)) (amode ty y new_offset)) diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index d83d4b518e86..2f4b720b1063 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -87,6 +87,7 @@ (decl pure u16_as_u64 (u16) u64) (extern constructor u16_as_u64 u16_as_u64) +(convert u16 u64 u16_as_u64) (decl pure u32_as_u64 (u32) u64) (extern constructor u32_as_u64 u32_as_u64) diff --git a/cranelift/filetests/filetests/isa/aarch64/amodes.clif b/cranelift/filetests/filetests/isa/aarch64/amodes.clif index 1bb7e0b2333e..176f70da7bd3 100644 --- a/cranelift/filetests/filetests/isa/aarch64/amodes.clif +++ b/cranelift/filetests/filetests/isa/aarch64/amodes.clif @@ -519,3 +519,269 @@ block0(v0: i64, v1: i32): ; stp x0, x1, [x6] ; ret +function %load_scaled16(i64, i64) -> i8 { +block0(v0: i64, v1: i64): + v2 = ishl_imm v1, 0 + v3 = iadd v0, v2 + v4 = load.i8 v3 + return v4 +} + +; VCode: +; block0: +; ldrb w0, [x0, x1, LSL #0] +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; ldrb w0, [x0, x1, lsl #0] +; ret + +function %load_scaled16(i64, i64) -> i16 { +block0(v0: i64, v1: i64): + v2 = ishl_imm v1, 1 + v3 = iadd v0, v2 + v4 = load.i16 v3 + return v4 +} + +; VCode: +; block0: +; ldrh w0, [x0, x1, LSL #1] +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; ldrh w0, [x0, x1, lsl #1] +; ret + +function %load_scaled32(i64, i64) -> i32 { +block0(v0: i64, v1: i64): + v2 = ishl_imm v1, 2 + v3 = iadd v0, v2 + v4 = load.i32 v3 + return v4 +} + +; VCode: +; block0: +; ldr w0, [x0, x1, LSL #2] +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; ldr w0, [x0, x1, lsl #2] +; ret + +function %load_scaled64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = ishl_imm v1, 3 + v3 = iadd v0, v2 + v4 = load.i64 v3 + return v4 +} + +; VCode: +; block0: +; ldr x0, [x0, x1, LSL #3] +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; ldr x0, [x0, x1, lsl #3] +; ret + +function %load_not_scaled64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = ishl_imm v1, 2 + v3 = iadd v0, v2 + v4 = load.i64 v3 + return v4 +} + +; VCode: +; block0: +; lsl x4, x1, #2 +; ldr x0, [x0, x4] +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; lsl x4, x1, #2 +; ldr x0, [x0, x4] +; ret + +function %load_uextend_scaled16(i64, i32) -> i8 { +block0(v0: i64, v1: i32): + v2 = uextend.i64 v1 + v3 = ishl_imm v2, 0 + v4 = iadd v0, v3 + v5 = load.i8 v4 + return v5 +} + +; VCode: +; block0: +; ldrb w0, [x0, w1, UXTW #0] +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; ldrb w0, [x0, w1, uxtw #0] +; ret + +function %load_uextend_scaled16(i64, i32) -> i16 { +block0(v0: i64, v1: i32): + v2 = uextend.i64 v1 + v3 = ishl_imm v2, 1 + v4 = iadd v0, v3 + v5 = load.i16 v4 + return v5 +} + +; VCode: +; block0: +; ldrh w0, [x0, w1, UXTW #1] +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; ldrh w0, [x0, w1, uxtw #1] +; ret + +function %load_uextend_scaled32(i64, i32) -> i32 { +block0(v0: i64, v1: i32): + v2 = uextend.i64 v1 + v3 = ishl_imm v2, 2 + v4 = iadd v0, v3 + v5 = load.i32 v4 + return v5 +} + +; VCode: +; block0: +; ldr w0, [x0, w1, UXTW #2] +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; ldr w0, [x0, w1, uxtw #2] +; ret + + +function %load_uextend_scaled64(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = uextend.i64 v1 + v3 = ishl_imm v2, 3 + v4 = iadd v0, v3 + v5 = load.i64 v4 + return v5 +} + +; VCode: +; block0: +; ldr x0, [x0, w1, UXTW #3] +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; ldr x0, [x0, w1, uxtw #3] +; ret + +function %load_not_extend_scaled64(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = ishl_imm v1, 3 + v3 = uextend.i64 v2 + v4 = iadd v0, v3 + v5 = load.i64 v4 + return v5 +} + +; VCode: +; block0: +; lsl w4, w1, #3 +; ldr x0, [x0, w4, UXTW] +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; lsl w4, w1, #3 +; ldr x0, [x0, w4, uxtw] +; ret + +function %load_sextend_scaled8(i64, i32) -> i8 { +block0(v0: i64, v1: i32): + v2 = sextend.i64 v1 + v3 = ishl_imm v2, 0 + v4 = iadd v0, v3 + v5 = load.i8 v4 + return v5 +} + +; VCode: +; block0: +; ldrb w0, [x0, w1, SXTW #0] +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; ldrb w0, [x0, w1, sxtw #0] +; ret + +function %load_sextend_scaled16(i64, i32) -> i16 { +block0(v0: i64, v1: i32): + v2 = sextend.i64 v1 + v3 = ishl_imm v2, 1 + v4 = iadd v0, v3 + v5 = load.i16 v4 + return v5 +} + +; VCode: +; block0: +; ldrh w0, [x0, w1, SXTW #1] +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; ldrh w0, [x0, w1, sxtw #1] +; ret + +function %load_sextend_scaled32(i64, i32) -> i32 { +block0(v0: i64, v1: i32): + v2 = sextend.i64 v1 + v3 = ishl_imm v2, 2 + v4 = iadd v0, v3 + v5 = load.i32 v4 + return v5 +} + +; VCode: +; block0: +; ldr w0, [x0, w1, SXTW #2] +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; ldr w0, [x0, w1, sxtw #2] +; ret + +function %load_sextend_scaled64(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = sextend.i64 v1 + v3 = ishl_imm v2, 3 + v4 = iadd v0, v3 + v5 = load.i64 v4 + return v5 +} + +; VCode: +; block0: +; ldr x0, [x0, w1, SXTW #3] +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; ldr x0, [x0, w1, sxtw #3] +; ret +