mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-16 00:16:50 +00:00
[AArch64] Improve codegen of store lane instructions by avoiding GPR usage.
We used to generate code similar to: umov.b w8, v0[2] strb w8, [x0, x1] because the STR*ro* patterns were preferred to ST1*. Instead, we can avoid going through GPRs, and generate: add x8, x0, x1 st1.b { v0 }[2], [x8] This patch increases the ST1* AddedComplexity to achieve that. rdar://16372710 Differential Revision: http://reviews.llvm.org/D6202 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225183 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
c52cd839b9
commit
3c9fb6e1ad
@ -4856,7 +4856,7 @@ defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>;
|
||||
defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>;
|
||||
defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>;
|
||||
|
||||
let AddedComplexity = 15 in
|
||||
let AddedComplexity = 19 in
|
||||
class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex,
|
||||
ValueType VTy, ValueType STy, Instruction ST1>
|
||||
: Pat<(scalar_store
|
||||
@ -4872,7 +4872,7 @@ def : St1Lane128Pat<store, VectorIndexD, v2i64, i64, ST1i64>;
|
||||
def : St1Lane128Pat<store, VectorIndexD, v2f64, f64, ST1i64>;
|
||||
def : St1Lane128Pat<store, VectorIndexH, v8f16, f16, ST1i16>;
|
||||
|
||||
let AddedComplexity = 15 in
|
||||
let AddedComplexity = 19 in
|
||||
class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex,
|
||||
ValueType VTy, ValueType STy, Instruction ST1>
|
||||
: Pat<(scalar_store
|
||||
|
@ -8,10 +8,20 @@ define void @st1lane_16b(<16 x i8> %A, i8* %D) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane_ro_16b
|
||||
; CHECK: add x[[XREG:[0-9]+]], x0, x1
|
||||
; CHECK: st1.b { v0 }[1], [x[[XREG]]]
|
||||
%ptr = getelementptr i8* %D, i64 %offset
|
||||
%tmp = extractelement <16 x i8> %A, i32 1
|
||||
store i8 %tmp, i8* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane0_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane0_ro_16b
|
||||
; CHECK: umov.b w[[WREG:[0-9]+]], v0[0]
|
||||
; CHECK: strb w[[WREG]], [x0, x1]
|
||||
; CHECK: add x[[XREG:[0-9]+]], x0, x1
|
||||
; CHECK: st1.b { v0 }[0], [x[[XREG]]]
|
||||
%ptr = getelementptr i8* %D, i64 %offset
|
||||
%tmp = extractelement <16 x i8> %A, i32 0
|
||||
store i8 %tmp, i8* %ptr
|
||||
@ -26,6 +36,16 @@ define void @st1lane_8h(<8 x i16> %A, i16* %D) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane_ro_8h
|
||||
; CHECK: add x[[XREG:[0-9]+]], x0, x1
|
||||
; CHECK: st1.h { v0 }[1], [x[[XREG]]]
|
||||
%ptr = getelementptr i16* %D, i64 %offset
|
||||
%tmp = extractelement <8 x i16> %A, i32 1
|
||||
store i16 %tmp, i16* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane0_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane0_ro_8h
|
||||
; CHECK: str h0, [x0, x1, lsl #1]
|
||||
@ -43,6 +63,16 @@ define void @st1lane_4s(<4 x i32> %A, i32* %D) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane_ro_4s
|
||||
; CHECK: add x[[XREG:[0-9]+]], x0, x1
|
||||
; CHECK: st1.s { v0 }[1], [x[[XREG]]]
|
||||
%ptr = getelementptr i32* %D, i64 %offset
|
||||
%tmp = extractelement <4 x i32> %A, i32 1
|
||||
store i32 %tmp, i32* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane0_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane0_ro_4s
|
||||
; CHECK: str s0, [x0, x1, lsl #2]
|
||||
@ -60,6 +90,16 @@ define void @st1lane_4s_float(<4 x float> %A, float* %D) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane_ro_4s_float
|
||||
; CHECK: add x[[XREG:[0-9]+]], x0, x1
|
||||
; CHECK: st1.s { v0 }[1], [x[[XREG]]]
|
||||
%ptr = getelementptr float* %D, i64 %offset
|
||||
%tmp = extractelement <4 x float> %A, i32 1
|
||||
store float %tmp, float* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane0_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane0_ro_4s_float
|
||||
; CHECK: str s0, [x0, x1, lsl #2]
|
||||
@ -77,6 +117,16 @@ define void @st1lane_2d(<2 x i64> %A, i64* %D) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane_ro_2d
|
||||
; CHECK: add x[[XREG:[0-9]+]], x0, x1
|
||||
; CHECK: st1.d { v0 }[1], [x[[XREG]]]
|
||||
%ptr = getelementptr i64* %D, i64 %offset
|
||||
%tmp = extractelement <2 x i64> %A, i32 1
|
||||
store i64 %tmp, i64* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane0_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane0_ro_2d
|
||||
; CHECK: str d0, [x0, x1, lsl #3]
|
||||
@ -94,6 +144,16 @@ define void @st1lane_2d_double(<2 x double> %A, double* %D) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane_ro_2d_double
|
||||
; CHECK: add x[[XREG:[0-9]+]], x0, x1
|
||||
; CHECK: st1.d { v0 }[1], [x[[XREG]]]
|
||||
%ptr = getelementptr double* %D, i64 %offset
|
||||
%tmp = extractelement <2 x double> %A, i32 1
|
||||
store double %tmp, double* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane0_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane0_ro_2d_double
|
||||
; CHECK: str d0, [x0, x1, lsl #3]
|
||||
@ -111,10 +171,20 @@ define void @st1lane_8b(<8 x i8> %A, i8* %D) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane_ro_8b
|
||||
; CHECK: add x[[XREG:[0-9]+]], x0, x1
|
||||
; CHECK: st1.b { v0 }[1], [x[[XREG]]]
|
||||
%ptr = getelementptr i8* %D, i64 %offset
|
||||
%tmp = extractelement <8 x i8> %A, i32 1
|
||||
store i8 %tmp, i8* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane0_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane0_ro_8b
|
||||
; CHECK: umov.b w[[WREG:[0-9]+]], v0[0]
|
||||
; CHECK: strb w[[WREG]], [x0, x1]
|
||||
; CHECK: add x[[XREG:[0-9]+]], x0, x1
|
||||
; CHECK: st1.b { v0 }[0], [x[[XREG]]]
|
||||
%ptr = getelementptr i8* %D, i64 %offset
|
||||
%tmp = extractelement <8 x i8> %A, i32 0
|
||||
store i8 %tmp, i8* %ptr
|
||||
@ -129,6 +199,16 @@ define void @st1lane_4h(<4 x i16> %A, i16* %D) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane_ro_4h
|
||||
; CHECK: add x[[XREG:[0-9]+]], x0, x1
|
||||
; CHECK: st1.h { v0 }[1], [x[[XREG]]]
|
||||
%ptr = getelementptr i16* %D, i64 %offset
|
||||
%tmp = extractelement <4 x i16> %A, i32 1
|
||||
store i16 %tmp, i16* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane0_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane0_ro_4h
|
||||
; CHECK: str h0, [x0, x1, lsl #1]
|
||||
@ -146,6 +226,16 @@ define void @st1lane_2s(<2 x i32> %A, i32* %D) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane_ro_2s
|
||||
; CHECK: add x[[XREG:[0-9]+]], x0, x1
|
||||
; CHECK: st1.s { v0 }[1], [x[[XREG]]]
|
||||
%ptr = getelementptr i32* %D, i64 %offset
|
||||
%tmp = extractelement <2 x i32> %A, i32 1
|
||||
store i32 %tmp, i32* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane0_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane0_ro_2s
|
||||
; CHECK: str s0, [x0, x1, lsl #2]
|
||||
@ -163,6 +253,16 @@ define void @st1lane_2s_float(<2 x float> %A, float* %D) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane_ro_2s_float
|
||||
; CHECK: add x[[XREG:[0-9]+]], x0, x1
|
||||
; CHECK: st1.s { v0 }[1], [x[[XREG]]]
|
||||
%ptr = getelementptr float* %D, i64 %offset
|
||||
%tmp = extractelement <2 x float> %A, i32 1
|
||||
store float %tmp, float* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane0_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane0_ro_2s_float
|
||||
; CHECK: str s0, [x0, x1, lsl #2]
|
||||
|
Loading…
Reference in New Issue
Block a user