Skip to content

Commit

Permalink
aarch64: Use RegScaled* addressing modes (bytecodealliance#6945)
Browse files Browse the repository at this point in the history
This commit adds a few cases to `amode` construction on AArch64 for
using the `RegScaled*` variants of `AMode`. This won't affect wasm due
to this only matching the sign-extension happening before the shift, but
it should otherwise help non-wasm Cranelift use cases.

Closes bytecodealliance#6742
  • Loading branch information
alexcrichton authored and eduardomourar committed Sep 6, 2023
1 parent 334f90c commit 18885d9
Show file tree
Hide file tree
Showing 3 changed files with 291 additions and 2 deletions.
26 changes: 24 additions & 2 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -3122,15 +3122,37 @@
(rule 5 (amode ty (iadd (sextend x @ (value_type $I32)) y) offset)
(AMode.RegExtended (amode_add y offset) x (ExtendOp.SXTW)))

;; `RegScaled*` rules where this matches an addition of an "index register" to a
;; base register. The index register is shifted by the size of the type loaded
;; in bytes to enable this mode matching.
;;
;; Note that this can additionally bundle an extending operation but the
;; extension must happen before the shift. This will pattern-match the shift
;; first and then if that succeeds afterwards try to find an extend.
(rule 6 (amode ty (iadd x (ishl y (iconst (u64_from_imm64 n)))) offset)
(if-let $true (u64_eq (ty_bytes ty) (u64_shl 1 n)))
(amode_reg_scaled (amode_add x offset) y ty))
(rule 7 (amode ty (iadd (ishl y (iconst (u64_from_imm64 n))) x) offset)
(if-let $true (u64_eq (ty_bytes ty) (u64_shl 1 n)))
(amode_reg_scaled (amode_add x offset) y ty))

(decl amode_reg_scaled (Reg Value Type) AMode)
(rule 0 (amode_reg_scaled base index ty)
(AMode.RegScaled base index ty))
(rule 1 (amode_reg_scaled base (uextend index @ (value_type $I32)) ty)
(AMode.RegScaledExtended base index ty (ExtendOp.UXTW)))
(rule 2 (amode_reg_scaled base (sextend index @ (value_type $I32)) ty)
(AMode.RegScaledExtended base index ty (ExtendOp.SXTW)))

;; Small optimizations where constants found in `iadd` are folded into the
;; `offset` immediate.
;;
;; NB: this should probably be done by mid-end optimizations rather than here
;; in the backend, but currently Cranelift doesn't do that.
(rule 6 (amode ty (iadd x (iconst (simm32 y))) offset)
(rule 8 (amode ty (iadd x (iconst (simm32 y))) offset)
(if-let new_offset (s32_add_fallible y offset))
(amode ty x new_offset))
(rule 7 (amode ty (iadd (iconst (simm32 x)) y) offset)
(rule 9 (amode ty (iadd (iconst (simm32 x)) y) offset)
(if-let new_offset (s32_add_fallible x offset))
(amode ty y new_offset))

Expand Down
1 change: 1 addition & 0 deletions cranelift/codegen/src/prelude.isle
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@

(decl pure u16_as_u64 (u16) u64)
(extern constructor u16_as_u64 u16_as_u64)
(convert u16 u64 u16_as_u64)

(decl pure u32_as_u64 (u32) u64)
(extern constructor u32_as_u64 u32_as_u64)
Expand Down
266 changes: 266 additions & 0 deletions cranelift/filetests/filetests/isa/aarch64/amodes.clif
Original file line number Diff line number Diff line change
Expand Up @@ -519,3 +519,269 @@ block0(v0: i64, v1: i32):
; stp x0, x1, [x6]
; ret

function %load_scaled16(i64, i64) -> i8 {
block0(v0: i64, v1: i64):
v2 = ishl_imm v1, 0
v3 = iadd v0, v2
v4 = load.i8 v3
return v4
}

; VCode:
; block0:
; ldrb w0, [x0, x1, LSL #0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrb w0, [x0, x1, lsl #0]
; ret

function %load_scaled16(i64, i64) -> i16 {
block0(v0: i64, v1: i64):
v2 = ishl_imm v1, 1
v3 = iadd v0, v2
v4 = load.i16 v3
return v4
}

; VCode:
; block0:
; ldrh w0, [x0, x1, LSL #1]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrh w0, [x0, x1, lsl #1]
; ret

function %load_scaled32(i64, i64) -> i32 {
block0(v0: i64, v1: i64):
v2 = ishl_imm v1, 2
v3 = iadd v0, v2
v4 = load.i32 v3
return v4
}

; VCode:
; block0:
; ldr w0, [x0, x1, LSL #2]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldr w0, [x0, x1, lsl #2]
; ret

function %load_scaled64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = ishl_imm v1, 3
v3 = iadd v0, v2
v4 = load.i64 v3
return v4
}

; VCode:
; block0:
; ldr x0, [x0, x1, LSL #3]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldr x0, [x0, x1, lsl #3]
; ret

function %load_not_scaled64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = ishl_imm v1, 2
v3 = iadd v0, v2
v4 = load.i64 v3
return v4
}

; VCode:
; block0:
; lsl x4, x1, #2
; ldr x0, [x0, x4]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lsl x4, x1, #2
; ldr x0, [x0, x4]
; ret

function %load_uextend_scaled16(i64, i32) -> i8 {
block0(v0: i64, v1: i32):
v2 = uextend.i64 v1
v3 = ishl_imm v2, 0
v4 = iadd v0, v3
v5 = load.i8 v4
return v5
}

; VCode:
; block0:
; ldrb w0, [x0, w1, UXTW #0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrb w0, [x0, w1, uxtw #0]
; ret

function %load_uextend_scaled16(i64, i32) -> i16 {
block0(v0: i64, v1: i32):
v2 = uextend.i64 v1
v3 = ishl_imm v2, 1
v4 = iadd v0, v3
v5 = load.i16 v4
return v5
}

; VCode:
; block0:
; ldrh w0, [x0, w1, UXTW #1]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrh w0, [x0, w1, uxtw #1]
; ret

function %load_uextend_scaled32(i64, i32) -> i32 {
block0(v0: i64, v1: i32):
v2 = uextend.i64 v1
v3 = ishl_imm v2, 2
v4 = iadd v0, v3
v5 = load.i32 v4
return v5
}

; VCode:
; block0:
; ldr w0, [x0, w1, UXTW #2]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldr w0, [x0, w1, uxtw #2]
; ret


function %load_uextend_scaled64(i64, i32) -> i64 {
block0(v0: i64, v1: i32):
v2 = uextend.i64 v1
v3 = ishl_imm v2, 3
v4 = iadd v0, v3
v5 = load.i64 v4
return v5
}

; VCode:
; block0:
; ldr x0, [x0, w1, UXTW #3]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldr x0, [x0, w1, uxtw #3]
; ret

function %load_not_extend_scaled64(i64, i32) -> i64 {
block0(v0: i64, v1: i32):
v2 = ishl_imm v1, 3
v3 = uextend.i64 v2
v4 = iadd v0, v3
v5 = load.i64 v4
return v5
}

; VCode:
; block0:
; lsl w4, w1, #3
; ldr x0, [x0, w4, UXTW]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lsl w4, w1, #3
; ldr x0, [x0, w4, uxtw]
; ret

function %load_sextend_scaled8(i64, i32) -> i8 {
block0(v0: i64, v1: i32):
v2 = sextend.i64 v1
v3 = ishl_imm v2, 0
v4 = iadd v0, v3
v5 = load.i8 v4
return v5
}

; VCode:
; block0:
; ldrb w0, [x0, w1, SXTW #0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrb w0, [x0, w1, sxtw #0]
; ret

function %load_sextend_scaled16(i64, i32) -> i16 {
block0(v0: i64, v1: i32):
v2 = sextend.i64 v1
v3 = ishl_imm v2, 1
v4 = iadd v0, v3
v5 = load.i16 v4
return v5
}

; VCode:
; block0:
; ldrh w0, [x0, w1, SXTW #1]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrh w0, [x0, w1, sxtw #1]
; ret

function %load_sextend_scaled32(i64, i32) -> i32 {
block0(v0: i64, v1: i32):
v2 = sextend.i64 v1
v3 = ishl_imm v2, 2
v4 = iadd v0, v3
v5 = load.i32 v4
return v5
}

; VCode:
; block0:
; ldr w0, [x0, w1, SXTW #2]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldr w0, [x0, w1, sxtw #2]
; ret

function %load_sextend_scaled64(i64, i32) -> i64 {
block0(v0: i64, v1: i32):
v2 = sextend.i64 v1
v3 = ishl_imm v2, 3
v4 = iadd v0, v3
v5 = load.i64 v4
return v5
}

; VCode:
; block0:
; ldr x0, [x0, w1, SXTW #3]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldr x0, [x0, w1, sxtw #3]
; ret

0 comments on commit 18885d9

Please sign in to comment.