-
Notifications
You must be signed in to change notification settings - Fork 751
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[AArch64] Use INDEX for constant Neon step vectors (#113424)
When compiling for an SVE target we can use INDEX to generate constant fixed-length step vectors, e.g.: ``` uint32x4_t foo() { return (uint32x4_t){0, 1, 2, 3}; } ``` Currently: ``` foo(): adrp x8, .LCPI1_0 ldr q0, [x8, :lo12:.LCPI1_0] ret ``` With INDEX: ``` foo(): index z0.s, #0, #1 ret ``` The logic for this was already in `LowerBUILD_VECTOR`, though it was hidden under a check for `!Subtarget->isNeonAvailable()`. This patch refactors this to enable the corresponding code path unconditionally for constant step vectors (as long as we can use SVE for them).
- Loading branch information
Showing
3 changed files
with
146 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
135 changes: 135 additions & 0 deletions
135
llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s | ||
|
||
; 128-bit vectors | ||
|
||
define <16 x i8> @v16i8() #0 { | ||
; CHECK-LABEL: v16i8: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: index z0.b, #0, #1 | ||
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 | ||
; CHECK-NEXT: ret | ||
ret <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15> | ||
} | ||
|
||
define <8 x i16> @v8i16() #0 { | ||
; CHECK-LABEL: v8i16: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: index z0.h, #0, #1 | ||
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 | ||
; CHECK-NEXT: ret | ||
ret <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7> | ||
} | ||
|
||
define <4 x i32> @v4i32() #0 { | ||
; CHECK-LABEL: v4i32: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: index z0.s, #0, #1 | ||
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 | ||
; CHECK-NEXT: ret | ||
ret <4 x i32> <i32 0, i32 1, i32 2, i32 3> | ||
} | ||
|
||
define <2 x i64> @v2i64() #0 { | ||
; CHECK-LABEL: v2i64: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: index z0.d, #0, #1 | ||
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 | ||
; CHECK-NEXT: ret | ||
ret <2 x i64> <i64 0, i64 1> | ||
} | ||
|
||
; 64-bit vectors | ||
|
||
define <8 x i8> @v8i8() #0 { | ||
; CHECK-LABEL: v8i8: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: index z0.b, #0, #1 | ||
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 | ||
; CHECK-NEXT: ret | ||
ret <8 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7> | ||
} | ||
|
||
define <4 x i16> @v4i16() #0 { | ||
; CHECK-LABEL: v4i16: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: index z0.h, #0, #1 | ||
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 | ||
; CHECK-NEXT: ret | ||
ret <4 x i16> <i16 0, i16 1, i16 2, i16 3> | ||
} | ||
|
||
define <2 x i32> @v2i32() #0 { | ||
; CHECK-LABEL: v2i32: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: index z0.s, #0, #1 | ||
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 | ||
; CHECK-NEXT: ret | ||
ret <2 x i32> <i32 0, i32 1> | ||
} | ||
|
||
; Positive test, non-zero start and non-unitary step. | ||
; Note: This should be INDEX z0.s, #1, #2 (without the ORR). | ||
define <4 x i32> @v4i32_non_zero_non_one() #0 { | ||
; CHECK-LABEL: v4i32_non_zero_non_one: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: index z0.s, #0, #2 | ||
; CHECK-NEXT: orr z0.s, z0.s, #0x1 | ||
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 | ||
; CHECK-NEXT: ret | ||
ret <4 x i32> <i32 1, i32 3, i32 5, i32 7> | ||
} | ||
|
||
; Positive test, same as above but negative immediates. | ||
define <4 x i32> @v4i32_neg_immediates() #0 { | ||
; CHECK-LABEL: v4i32_neg_immediates: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: index z0.s, #-1, #-2 | ||
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 | ||
; CHECK-NEXT: ret | ||
ret <4 x i32> <i32 -1, i32 -3, i32 -5, i32 -7> | ||
} | ||
|
||
; Positive test, out of imm range start. | ||
define <4 x i32> @v4i32_out_range_start() #0 { | ||
; CHECK-LABEL: v4i32_out_range_start: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: index z0.s, #0, #1 | ||
; CHECK-NEXT: add z0.s, z0.s, #16 // =0x10 | ||
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 | ||
; CHECK-NEXT: ret | ||
ret <4 x i32> <i32 16, i32 17, i32 18, i32 19> | ||
} | ||
|
||
; Positive test, out of imm range step. | ||
define <4 x i32> @v4i32_out_range_step() #0 { | ||
; CHECK-LABEL: v4i32_out_range_step: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: mov w8, #16 // =0x10 | ||
; CHECK-NEXT: index z0.s, #0, w8 | ||
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 | ||
; CHECK-NEXT: ret | ||
ret <4 x i32> <i32 0, i32 16, i32 32, i32 48> | ||
} | ||
|
||
; Positive test, out of imm range start and step. | ||
define <4 x i32> @v4i32_out_range_start_step() #0 { | ||
; CHECK-LABEL: v4i32_out_range_start_step: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: mov w8, #16 // =0x10 | ||
; CHECK-NEXT: index z0.s, #0, w8 | ||
; CHECK-NEXT: add z0.s, z0.s, #16 // =0x10 | ||
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 | ||
; CHECK-NEXT: ret | ||
ret <4 x i32> <i32 16, i32 32, i32 48, i32 64> | ||
} | ||
|
||
; Negative test, non sequential. | ||
define <4 x i32> @v4i32_non_sequential() #0 { | ||
; CHECK-LABEL: v4i32_non_sequential: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: adrp x8, .LCPI12_0 | ||
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI12_0] | ||
; CHECK-NEXT: ret | ||
ret <4 x i32> <i32 0, i32 2, i32 2, i32 3> | ||
} |