mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-23 19:17:17 +00:00
Add float patterns for Neon vld1-lane/dup and vst1-lane operations.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@121583 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
20d5515aa5
commit
746fa17d59
@ -546,6 +546,13 @@ def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>;
|
||||
def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
|
||||
def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
|
||||
|
||||
def : Pat<(vector_insert (v2f32 DPR:$src),
|
||||
(f32 (load addrmode6:$addr)), imm:$lane),
|
||||
(VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
|
||||
def : Pat<(vector_insert (v4f32 QPR:$src),
|
||||
(f32 (load addrmode6:$addr)), imm:$lane),
|
||||
(VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
|
||||
|
||||
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
|
||||
|
||||
// ...with address register writeback:
|
||||
@ -813,6 +820,11 @@ def VLD1DUPq8Pseudo : VLD1QDUPPseudo<v16i8, extloadi8>;
|
||||
def VLD1DUPq16Pseudo : VLD1QDUPPseudo<v8i16, extloadi16>;
|
||||
def VLD1DUPq32Pseudo : VLD1QDUPPseudo<v4i32, load>;
|
||||
|
||||
def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
|
||||
(VLD1DUPd32 addrmode6:$addr)>;
|
||||
def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
|
||||
(VLD1DUPq32Pseudo addrmode6:$addr)>;
|
||||
|
||||
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
|
||||
|
||||
class VLD1QDUP<bits<4> op7_4, string Dt>
|
||||
@ -1365,6 +1377,11 @@ def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>;
|
||||
def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>;
|
||||
def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
|
||||
|
||||
def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
|
||||
(VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
|
||||
def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
|
||||
(VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
|
||||
|
||||
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
|
||||
|
||||
// ...with address register writeback:
|
||||
|
@ -162,24 +162,6 @@ define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
|
||||
ret <4 x float> %tmp2
|
||||
}
|
||||
|
||||
define <2 x float> @v_shuffledupfloat2(float* %A) nounwind {
|
||||
;CHECK: v_shuffledupfloat2:
|
||||
;CHECK: vdup.32
|
||||
%tmp0 = load float* %A
|
||||
%tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0
|
||||
%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
|
||||
ret <2 x float> %tmp2
|
||||
}
|
||||
|
||||
define <4 x float> @v_shuffledupQfloat2(float* %A) nounwind {
|
||||
;CHECK: v_shuffledupQfloat2:
|
||||
;CHECK: vdup.32
|
||||
%tmp0 = load float* %A
|
||||
%tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0
|
||||
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
|
||||
ret <4 x float> %tmp2
|
||||
}
|
||||
|
||||
define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
|
||||
;CHECK: vduplane8:
|
||||
;CHECK: vdup.8
|
||||
|
@ -30,6 +30,15 @@ define <2 x i32> @vld1dupi32(i32* %A) nounwind {
|
||||
ret <2 x i32> %tmp3
|
||||
}
|
||||
|
||||
define <2 x float> @vld1dupf(float* %A) nounwind {
|
||||
;CHECK: vld1dupf:
|
||||
;CHECK: vld1.32 {d16[]}, [r0]
|
||||
%tmp0 = load float* %A
|
||||
%tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0
|
||||
%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
|
||||
ret <2 x float> %tmp2
|
||||
}
|
||||
|
||||
define <16 x i8> @vld1dupQi8(i8* %A) nounwind {
|
||||
;CHECK: vld1dupQi8:
|
||||
;Check the (default) alignment value.
|
||||
@ -40,6 +49,15 @@ define <16 x i8> @vld1dupQi8(i8* %A) nounwind {
|
||||
ret <16 x i8> %tmp3
|
||||
}
|
||||
|
||||
define <4 x float> @vld1dupQf(float* %A) nounwind {
|
||||
;CHECK: vld1dupQf:
|
||||
;CHECK: vld1.32 {d16[], d17[]}, [r0]
|
||||
%tmp0 = load float* %A
|
||||
%tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0
|
||||
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
|
||||
ret <4 x float> %tmp2
|
||||
}
|
||||
|
||||
%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
|
||||
%struct.__neon_int4x16x2_t = type { <4 x i16>, <4 x i16> }
|
||||
%struct.__neon_int2x32x2_t = type { <2 x i32>, <2 x i32> }
|
||||
|
@ -30,6 +30,15 @@ define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
|
||||
ret <2 x i32> %tmp3
|
||||
}
|
||||
|
||||
define <2 x float> @vld1lanef(float* %A, <2 x float>* %B) nounwind {
|
||||
;CHECK: vld1lanef:
|
||||
;CHECK: vld1.32 {d16[1]}, [r0]
|
||||
%tmp1 = load <2 x float>* %B
|
||||
%tmp2 = load float* %A, align 4
|
||||
%tmp3 = insertelement <2 x float> %tmp1, float %tmp2, i32 1
|
||||
ret <2 x float> %tmp3
|
||||
}
|
||||
|
||||
define <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
|
||||
;CHECK: vld1laneQi8:
|
||||
;CHECK: vld1.8 {d17[1]}, [r0]
|
||||
@ -57,6 +66,15 @@ define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
|
||||
ret <4 x i32> %tmp3
|
||||
}
|
||||
|
||||
define <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind {
|
||||
;CHECK: vld1laneQf:
|
||||
;CHECK: vld1.32 {d16[0]}, [r0]
|
||||
%tmp1 = load <4 x float>* %B
|
||||
%tmp2 = load float* %A
|
||||
%tmp3 = insertelement <4 x float> %tmp1, float %tmp2, i32 0
|
||||
ret <4 x float> %tmp3
|
||||
}
|
||||
|
||||
%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
|
||||
%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
|
||||
%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
|
||||
|
@ -30,6 +30,15 @@ define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vst1lanef(float* %A, <2 x float>* %B) nounwind {
|
||||
;CHECK: vst1lanef:
|
||||
;CHECK: vst1.32 {d16[1]}, [r0]
|
||||
%tmp1 = load <2 x float>* %B
|
||||
%tmp2 = extractelement <2 x float> %tmp1, i32 1
|
||||
store float %tmp2, float* %A
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
|
||||
;CHECK: vst1laneQi8:
|
||||
;CHECK: vst1.8 {d17[1]}, [r0]
|
||||
@ -57,6 +66,15 @@ define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vst1laneQf(float* %A, <4 x float>* %B) nounwind {
|
||||
;CHECK: vst1laneQf:
|
||||
;CHECK: vst1.32 {d17[1]}, [r0]
|
||||
%tmp1 = load <4 x float>* %B
|
||||
%tmp2 = extractelement <4 x float> %tmp1, i32 3
|
||||
store float %tmp2, float* %A
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
|
||||
;CHECK: vst2lanei8:
|
||||
;Check the alignment value. Max for this instruction is 16 bits:
|
||||
|
Loading…
x
Reference in New Issue
Block a user