[AArch64] Remove q and non-q intrinsic definitions in the NEON scalar reduce

pairwise implementation, using an overloaded definition instead.

llvm-svn: 196831
This commit is contained in:
Chad Rosier 2013-12-09 22:47:31 +00:00
parent a917cd15ae
commit 850366132e
3 changed files with 61 additions and 76 deletions

View File

@ -107,9 +107,6 @@ def int_aarch64_neon_vuqrshrn : Neon_N2V_Narrow_Intrinsic;
class Neon_Across_Intrinsic class Neon_Across_Intrinsic
: Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
class Neon_2Arg_Across_Float_Intrinsic
: Intrinsic<[llvm_anyvector_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_aarch64_neon_saddlv : Neon_Across_Intrinsic; def int_aarch64_neon_saddlv : Neon_Across_Intrinsic;
def int_aarch64_neon_uaddlv : Neon_Across_Intrinsic; def int_aarch64_neon_uaddlv : Neon_Across_Intrinsic;
def int_aarch64_neon_smaxv : Neon_Across_Intrinsic; def int_aarch64_neon_smaxv : Neon_Across_Intrinsic;
@ -233,29 +230,19 @@ def int_aarch64_neon_vqrshlu : Neon_2Arg_Intrinsic;
def int_aarch64_neon_vpadd : def int_aarch64_neon_vpadd :
Intrinsic<[llvm_v1i64_ty], [llvm_v2i64_ty],[IntrNoMem]>; Intrinsic<[llvm_v1i64_ty], [llvm_v2i64_ty],[IntrNoMem]>;
def int_aarch64_neon_vpfadd : def int_aarch64_neon_vpfadd :
Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>; Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
def int_aarch64_neon_vpfaddq :
Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
// Scalar Reduce Pairwise Floating Point Max/Min. // Scalar Reduce Pairwise Floating Point Max/Min.
def int_aarch64_neon_vpmax : def int_aarch64_neon_vpmax :
Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>; Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
def int_aarch64_neon_vpmaxq :
Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_aarch64_neon_vpmin : def int_aarch64_neon_vpmin :
Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>; Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
def int_aarch64_neon_vpminq :
Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
// Scalar Reduce Pairwise Floating Point Maxnm/Minnm. // Scalar Reduce Pairwise Floating Point Maxnm/Minnm.
def int_aarch64_neon_vpfmaxnm : def int_aarch64_neon_vpfmaxnm :
Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>; Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
def int_aarch64_neon_vpfmaxnmq :
Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_aarch64_neon_vpfminnm : def int_aarch64_neon_vpfminnm :
Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>; Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
def int_aarch64_neon_vpfminnmq :
Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
// Scalar Signed Integer Convert To Floating-point // Scalar Signed Integer Convert To Floating-point
def int_aarch64_neon_vcvtf32_s32 : def int_aarch64_neon_vcvtf32_s32 :

View File

@ -5307,35 +5307,34 @@ defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>;
// Scalar Reduce minNum Pairwise (Floating Point) // Scalar Reduce minNum Pairwise (Floating Point)
defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>; defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>;
multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnodeS, multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnode,
SDPatternOperator opnodeD,
Instruction INSTS, Instruction INSTS,
Instruction INSTD> { Instruction INSTD> {
def : Pat<(v1f32 (opnodeS (v2f32 VPR64:$Rn))), def : Pat<(v1f32 (opnode (v2f32 VPR64:$Rn))),
(INSTS VPR64:$Rn)>; (INSTS VPR64:$Rn)>;
def : Pat<(v1f64 (opnodeD (v2f64 VPR128:$Rn))), def : Pat<(v1f64 (opnode (v2f64 VPR128:$Rn))),
(INSTD VPR128:$Rn)>; (INSTD VPR128:$Rn)>;
} }
// Patterns to match llvm.aarch64.* intrinsic for // Patterns to match llvm.aarch64.* intrinsic for
// Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point) // Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point)
defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfadd, defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfadd,
int_aarch64_neon_vpfaddq, FADDPvv_S_2S, FADDPvv_D_2D>; FADDPvv_S_2S, FADDPvv_D_2D>;
defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmax, defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmax,
int_aarch64_neon_vpmaxq, FMAXPvv_S_2S, FMAXPvv_D_2D>; FMAXPvv_S_2S, FMAXPvv_D_2D>;
defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmin, defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmin,
int_aarch64_neon_vpminq, FMINPvv_S_2S, FMINPvv_D_2D>; FMINPvv_S_2S, FMINPvv_D_2D>;
defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm, defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm,
int_aarch64_neon_vpfmaxnmq, FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>; FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>;
defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm, defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm,
int_aarch64_neon_vpfminnmq, FMINNMPvv_S_2S, FMINNMPvv_D_2D>; FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vaddv, defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vaddv,
int_aarch64_neon_vaddv, FADDPvv_S_2S, FADDPvv_D_2D>; FADDPvv_S_2S, FADDPvv_D_2D>;
def : Pat<(v1f32 (int_aarch64_neon_vaddv (v4f32 VPR128:$Rn))), def : Pat<(v1f32 (int_aarch64_neon_vaddv (v4f32 VPR128:$Rn))),
(FADDPvv_S_2S (v2f32 (FADDPvv_S_2S (v2f32
@ -5344,16 +5343,16 @@ def : Pat<(v1f32 (int_aarch64_neon_vaddv (v4f32 VPR128:$Rn))),
sub_64)))>; sub_64)))>;
defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vmaxv, defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vmaxv,
int_aarch64_neon_vmaxv, FMAXPvv_S_2S, FMAXPvv_D_2D>; FMAXPvv_S_2S, FMAXPvv_D_2D>;
defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vminv, defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vminv,
int_aarch64_neon_vminv, FMINPvv_S_2S, FMINPvv_D_2D>; FMINPvv_S_2S, FMINPvv_D_2D>;
defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vmaxnmv, defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vmaxnmv,
int_aarch64_neon_vmaxnmv, FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>; FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>;
defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vminnmv, defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vminnmv,
int_aarch64_neon_vminnmv, FMINNMPvv_S_2S, FMINNMPvv_D_2D>; FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
// Scalar by element Arithmetic // Scalar by element Arithmetic

View File

@ -4,101 +4,100 @@ declare <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64>)
define <1 x i64> @test_addp_v1i64(<2 x i64> %a) { define <1 x i64> @test_addp_v1i64(<2 x i64> %a) {
; CHECK: test_addp_v1i64: ; CHECK: test_addp_v1i64:
%val = call <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64> %a) ; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d
; CHECK: addp d0, v0.2d %val = call <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64> %a)
ret <1 x i64> %val ret <1 x i64> %val
} }
declare <1 x float> @llvm.aarch64.neon.vpfadd(<2 x float>) declare <1 x float> @llvm.aarch64.neon.vpfadd.v1f32.v2f32(<2 x float>)
define <1 x float> @test_faddp_v1f32(<2 x float> %a) { define <1 x float> @test_faddp_v1f32(<2 x float> %a) {
; CHECK: test_faddp_v1f32: ; CHECK: test_faddp_v1f32:
%val = call <1 x float> @llvm.aarch64.neon.vpfadd(<2 x float> %a) ; CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s
; CHECK: faddp s0, v0.2s %val = call <1 x float> @llvm.aarch64.neon.vpfadd.v1f32.v2f32(<2 x float> %a)
ret <1 x float> %val ret <1 x float> %val
} }
declare <1 x double> @llvm.aarch64.neon.vpfaddq(<2 x double>) declare <1 x double> @llvm.aarch64.neon.vpfadd.v1f64.v2f64(<2 x double>)
define <1 x double> @test_faddp_v1f64(<2 x double> %a) { define <1 x double> @test_faddp_v1f64(<2 x double> %a) {
; CHECK: test_faddp_v1f64: ; CHECK: test_faddp_v1f64:
%val = call <1 x double> @llvm.aarch64.neon.vpfaddq(<2 x double> %a) ; CHECK: faddp {{d[0-9]+}}, {{v[0-9]+}}.2d
; CHECK: faddp d0, v0.2d %val = call <1 x double> @llvm.aarch64.neon.vpfadd.v1f64.v2f64(<2 x double> %a)
ret <1 x double> %val ret <1 x double> %val
} }
declare <1 x float> @llvm.aarch64.neon.vpmax(<2 x float>) declare <1 x float> @llvm.aarch64.neon.vpmax.v1f32.v2f32(<2 x float>)
define <1 x float> @test_fmaxp_v1f32(<2 x float> %a) { define <1 x float> @test_fmaxp_v1f32(<2 x float> %a) {
; CHECK: test_fmaxp_v1f32: ; CHECK: test_fmaxp_v1f32:
%val = call <1 x float> @llvm.aarch64.neon.vpmax(<2 x float> %a) ; CHECK: fmaxp {{s[0-9]+}}, {{v[0-9]+}}.2s
; CHECK: fmaxp s0, v0.2s %val = call <1 x float> @llvm.aarch64.neon.vpmax.v1f32.v2f32(<2 x float> %a)
ret <1 x float> %val ret <1 x float> %val
} }
declare <1 x double> @llvm.aarch64.neon.vpmaxq(<2 x double>) declare <1 x double> @llvm.aarch64.neon.vpmax.v1f64.v2f64(<2 x double>)
define <1 x double> @test_fmaxp_v1f64(<2 x double> %a) { define <1 x double> @test_fmaxp_v1f64(<2 x double> %a) {
; CHECK: test_fmaxp_v1f64: ; CHECK: test_fmaxp_v1f64:
%val = call <1 x double> @llvm.aarch64.neon.vpmaxq(<2 x double> %a) ; CHECK: fmaxp {{d[0-9]+}}, {{v[0-9]+}}.2d
; CHECK: fmaxp d0, v0.2d %val = call <1 x double> @llvm.aarch64.neon.vpmax.v1f64.v2f64(<2 x double> %a)
ret <1 x double> %val ret <1 x double> %val
} }
declare <1 x float> @llvm.aarch64.neon.vpmin.v1f32.v2f32(<2 x float>)
declare <1 x float> @llvm.aarch64.neon.vpmin(<2 x float>)
define <1 x float> @test_fminp_v1f32(<2 x float> %a) { define <1 x float> @test_fminp_v1f32(<2 x float> %a) {
; CHECK: test_fminp_v1f32: ; CHECK: test_fminp_v1f32:
%val = call <1 x float> @llvm.aarch64.neon.vpmin(<2 x float> %a) ; CHECK: fminp {{s[0-9]+}}, {{v[0-9]+}}.2s
; CHECK: fminp s0, v0.2s %val = call <1 x float> @llvm.aarch64.neon.vpmin.v1f32.v2f32(<2 x float> %a)
ret <1 x float> %val ret <1 x float> %val
} }
declare <1 x double> @llvm.aarch64.neon.vpminq(<2 x double>) declare <1 x double> @llvm.aarch64.neon.vpmin.v1f64.v2f64(<2 x double>)
define <1 x double> @test_fminp_v1f64(<2 x double> %a) { define <1 x double> @test_fminp_v1f64(<2 x double> %a) {
; CHECK: test_fminp_v1f64: ; CHECK: test_fminp_v1f64:
%val = call <1 x double> @llvm.aarch64.neon.vpminq(<2 x double> %a) ; CHECK: fminp {{d[0-9]+}}, {{v[0-9]+}}.2d
; CHECK: fminp d0, v0.2d %val = call <1 x double> @llvm.aarch64.neon.vpmin.v1f64.v2f64(<2 x double> %a)
ret <1 x double> %val ret <1 x double> %val
} }
declare <1 x float> @llvm.aarch64.neon.vpfmaxnm(<2 x float>) declare <1 x float> @llvm.aarch64.neon.vpfmaxnm.v1f32.v2f32(<2 x float>)
define <1 x float> @test_fmaxnmp_v1f32(<2 x float> %a) { define <1 x float> @test_fmaxnmp_v1f32(<2 x float> %a) {
; CHECK: test_fmaxnmp_v1f32: ; CHECK: test_fmaxnmp_v1f32:
%val = call <1 x float> @llvm.aarch64.neon.vpfmaxnm(<2 x float> %a) ; CHECK: fmaxnmp {{s[0-9]+}}, {{v[0-9]+}}.2s
; CHECK: fmaxnmp s0, v0.2s %val = call <1 x float> @llvm.aarch64.neon.vpfmaxnm.v1f32.v2f32(<2 x float> %a)
ret <1 x float> %val ret <1 x float> %val
} }
declare <1 x double> @llvm.aarch64.neon.vpfmaxnmq(<2 x double>) declare <1 x double> @llvm.aarch64.neon.vpfmaxnm.v1f64.v2f64(<2 x double>)
define <1 x double> @test_fmaxnmp_v1f64(<2 x double> %a) { define <1 x double> @test_fmaxnmp_v1f64(<2 x double> %a) {
; CHECK: test_fmaxnmp_v1f64: ; CHECK: test_fmaxnmp_v1f64:
%val = call <1 x double> @llvm.aarch64.neon.vpfmaxnmq(<2 x double> %a) ; CHECK: fmaxnmp {{d[0-9]+}}, {{v[0-9]+}}.2d
; CHECK: fmaxnmp d0, v0.2d %val = call <1 x double> @llvm.aarch64.neon.vpfmaxnm.v1f64.v2f64(<2 x double> %a)
ret <1 x double> %val ret <1 x double> %val
} }
declare <1 x float> @llvm.aarch64.neon.vpfminnm(<2 x float>) declare <1 x float> @llvm.aarch64.neon.vpfminnm.v1f32.v2f32(<2 x float>)
define <1 x float> @test_fminnmp_v1f32(<2 x float> %a) { define <1 x float> @test_fminnmp_v1f32(<2 x float> %a) {
; CHECK: test_fminnmp_v1f32: ; CHECK: test_fminnmp_v1f32:
%val = call <1 x float> @llvm.aarch64.neon.vpfminnm(<2 x float> %a) ; CHECK: fminnmp {{s[0-9]+}}, {{v[0-9]+}}.2s
; CHECK: fminnmp s0, v0.2s %val = call <1 x float> @llvm.aarch64.neon.vpfminnm.v1f32.v2f32(<2 x float> %a)
ret <1 x float> %val ret <1 x float> %val
} }
declare <1 x double> @llvm.aarch64.neon.vpfminnmq(<2 x double>) declare <1 x double> @llvm.aarch64.neon.vpfminnm.v1f64.v2f64(<2 x double>)
define <1 x double> @test_fminnmp_v1f64(<2 x double> %a) { define <1 x double> @test_fminnmp_v1f64(<2 x double> %a) {
; CHECK: test_fminnmp_v1f64: ; CHECK: test_fminnmp_v1f64:
%val = call <1 x double> @llvm.aarch64.neon.vpfminnmq(<2 x double> %a) ; CHECK: fminnmp {{d[0-9]+}}, {{v[0-9]+}}.2d
; CHECK: fminnmp d0, v0.2d %val = call <1 x double> @llvm.aarch64.neon.vpfminnm.v1f64.v2f64(<2 x double> %a)
ret <1 x double> %val ret <1 x double> %val
} }
define float @test_vaddv_f32(<2 x float> %a) { define float @test_vaddv_f32(<2 x float> %a) {