mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-13 17:00:01 +00:00
[ARM] FP16: vector vmov and vdup support
This adds codegen support for the vmov_n_f16 and vdup_n_f16 variants. Differential Revision: https://reviews.llvm.org/D50329 llvm-svn: 339238
This commit is contained in:
parent
b327cfbb72
commit
27bba1453d
@ -6336,6 +6336,9 @@ def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> {
|
||||
let Inst{19} = lane{0};
|
||||
}
|
||||
|
||||
def : Pat<(v4f16 (NEONvduplane (v4f16 DPR:$Vm), imm:$lane)),
|
||||
(VDUPLN32d DPR:$Vm, imm:$lane)>;
|
||||
|
||||
def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
|
||||
(VDUPLN32d DPR:$Vm, imm:$lane)>;
|
||||
|
||||
@ -6350,6 +6353,10 @@ def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)),
|
||||
(v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
|
||||
(DSubReg_i16_reg imm:$lane))),
|
||||
(SubReg_i16_lane imm:$lane)))>;
|
||||
def : Pat<(v8f16 (NEONvduplane (v8f16 QPR:$src), imm:$lane)),
|
||||
(v8f16 (VDUPLN16q (v4f16 (EXTRACT_SUBREG QPR:$src,
|
||||
(DSubReg_i16_reg imm:$lane))),
|
||||
(SubReg_i16_lane imm:$lane)))>;
|
||||
def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)),
|
||||
(v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
|
||||
(DSubReg_i32_reg imm:$lane))),
|
||||
@ -6359,12 +6366,18 @@ def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
|
||||
(DSubReg_i32_reg imm:$lane))),
|
||||
(SubReg_i32_lane imm:$lane)))>;
|
||||
|
||||
def : Pat<(v4f16 (NEONvdup HPR:$src)),
|
||||
(v4f16 (VDUPLN16d (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
|
||||
HPR:$src, ssub_0), (i32 0)))>;
|
||||
def : Pat<(v2f32 (NEONvdup (f32 SPR:$src))),
|
||||
(v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
|
||||
SPR:$src, ssub_0), (i32 0)))>;
|
||||
def : Pat<(v4f32 (NEONvdup (f32 SPR:$src))),
|
||||
(v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
|
||||
SPR:$src, ssub_0), (i32 0)))>;
|
||||
def : Pat<(v8f16 (NEONvdup HPR:$src)),
|
||||
(v8f16 (VDUPLN16q (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
|
||||
HPR:$src, ssub_0), (i32 0)))>;
|
||||
|
||||
// VMOVN : Vector Narrowing Move
|
||||
defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
|
||||
|
@ -1120,58 +1120,78 @@ entry:
|
||||
; %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vtrn1.i, 0, 1
|
||||
; ret %struct.float16x8x2_t %.fca.0.1.insert
|
||||
;}
|
||||
;
|
||||
;define dso_local <4 x half> @test_vmov_n_f16(float %a.coerce) {
|
||||
;entry:
|
||||
; %0 = bitcast float %a.coerce to i32
|
||||
; %tmp.0.extract.trunc = trunc i32 %0 to i16
|
||||
; %1 = bitcast i16 %tmp.0.extract.trunc to half
|
||||
; %vecinit = insertelement <4 x half> undef, half %1, i32 0
|
||||
; %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
|
||||
; ret <4 x half> %vecinit4
|
||||
;}
|
||||
;
|
||||
;define dso_local <8 x half> @test_vmovq_n_f16(float %a.coerce) {
|
||||
;entry:
|
||||
; %0 = bitcast float %a.coerce to i32
|
||||
; %tmp.0.extract.trunc = trunc i32 %0 to i16
|
||||
; %1 = bitcast i16 %tmp.0.extract.trunc to half
|
||||
; %vecinit = insertelement <8 x half> undef, half %1, i32 0
|
||||
; %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
|
||||
; ret <8 x half> %vecinit8
|
||||
;}
|
||||
;
|
||||
;define dso_local <4 x half> @test_vdup_n_f16(float %a.coerce) {
|
||||
;entry:
|
||||
; %0 = bitcast float %a.coerce to i32
|
||||
; %tmp.0.extract.trunc = trunc i32 %0 to i16
|
||||
; %1 = bitcast i16 %tmp.0.extract.trunc to half
|
||||
; %vecinit = insertelement <4 x half> undef, half %1, i32 0
|
||||
; %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
|
||||
; ret <4 x half> %vecinit4
|
||||
;}
|
||||
;
|
||||
;define dso_local <8 x half> @test_vdupq_n_f16(float %a.coerce) {
|
||||
;entry:
|
||||
; %0 = bitcast float %a.coerce to i32
|
||||
; %tmp.0.extract.trunc = trunc i32 %0 to i16
|
||||
; %1 = bitcast i16 %tmp.0.extract.trunc to half
|
||||
; %vecinit = insertelement <8 x half> undef, half %1, i32 0
|
||||
; %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
|
||||
; ret <8 x half> %vecinit8
|
||||
;}
|
||||
;
|
||||
;define dso_local <4 x half> @test_vdup_lane_f16(<4 x half> %a) {
|
||||
;entry:
|
||||
; %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
||||
; ret <4 x half> %shuffle
|
||||
;}
|
||||
;
|
||||
;define dso_local <8 x half> @test_vdupq_lane_f16(<4 x half> %a) {
|
||||
;entry:
|
||||
; %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
; ret <8 x half> %shuffle
|
||||
;}
|
||||
|
||||
define dso_local <4 x half> @test_vmov_n_f16(float %a.coerce) {
|
||||
; CHECK-LABEL: test_vmov_n_f16:
|
||||
; CHECK: vdup.16 d0, d0[0]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = bitcast float %a.coerce to i32
|
||||
%tmp.0.extract.trunc = trunc i32 %0 to i16
|
||||
%1 = bitcast i16 %tmp.0.extract.trunc to half
|
||||
%vecinit = insertelement <4 x half> undef, half %1, i32 0
|
||||
%vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
|
||||
ret <4 x half> %vecinit4
|
||||
}
|
||||
|
||||
define dso_local <8 x half> @test_vmovq_n_f16(float %a.coerce) {
|
||||
; CHECK-LABEL: test_vmovq_n_f16:
|
||||
; CHECK: vdup.16 q0, d0[0]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = bitcast float %a.coerce to i32
|
||||
%tmp.0.extract.trunc = trunc i32 %0 to i16
|
||||
%1 = bitcast i16 %tmp.0.extract.trunc to half
|
||||
%vecinit = insertelement <8 x half> undef, half %1, i32 0
|
||||
%vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
|
||||
ret <8 x half> %vecinit8
|
||||
}
|
||||
|
||||
define dso_local <4 x half> @test_vdup_n_f16(float %a.coerce) {
|
||||
; CHECK-LABEL: test_vdup_n_f16:
|
||||
; CHECK: vdup.16 d0, d0[0]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = bitcast float %a.coerce to i32
|
||||
%tmp.0.extract.trunc = trunc i32 %0 to i16
|
||||
%1 = bitcast i16 %tmp.0.extract.trunc to half
|
||||
%vecinit = insertelement <4 x half> undef, half %1, i32 0
|
||||
%vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
|
||||
ret <4 x half> %vecinit4
|
||||
}
|
||||
|
||||
define dso_local <8 x half> @test_vdupq_n_f16(float %a.coerce) {
|
||||
; CHECK-LABEL: test_vdupq_n_f16:
|
||||
; CHECK: vdup.16 q0, d0[0]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = bitcast float %a.coerce to i32
|
||||
%tmp.0.extract.trunc = trunc i32 %0 to i16
|
||||
%1 = bitcast i16 %tmp.0.extract.trunc to half
|
||||
%vecinit = insertelement <8 x half> undef, half %1, i32 0
|
||||
%vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
|
||||
ret <8 x half> %vecinit8
|
||||
}
|
||||
|
||||
define dso_local <4 x half> @test_vdup_lane_f16(<4 x half> %a) {
|
||||
; CHECK-LABEL: test_vdup_lane_f16:
|
||||
; CHECK: vdup.32 d0, d0[3]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%shuffle = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
||||
ret <4 x half> %shuffle
|
||||
}
|
||||
|
||||
define dso_local <8 x half> @test_vdupq_lane_f16(<4 x half> %a) {
|
||||
; CHECK-LABEL: test_vdupq_lane_f16:
|
||||
; CHECK: vdup.16 q0, d0[3]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%shuffle = shufflevector <4 x half> %a, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
ret <8 x half> %shuffle
|
||||
}
|
||||
|
||||
; FIXME (PR38404)
|
||||
;
|
||||
;define dso_local <4 x half> @test_vext_f16(<4 x half> %a, <4 x half> %b) {
|
||||
;entry:
|
||||
|
Loading…
x
Reference in New Issue
Block a user