mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-19 08:24:12 +00:00
Implemented Neon scalar by element intrinsics.
Intrinsics implemented: vqdmull_lane, vqdmulh_lane, vqrdmulh_lane, vqdmlal_lane, vqdmlsl_lane scalar Neon intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@195327 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
42a9da35b8
commit
1c93766aa5
@ -196,9 +196,6 @@ def int_aarch64_neon_vaddds :
|
||||
def int_aarch64_neon_vadddu :
|
||||
Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
|
||||
|
||||
// Scalar Saturating Add (Signed, Unsigned)
|
||||
def int_aarch64_neon_vqadds : Neon_2Arg_Intrinsic;
|
||||
def int_aarch64_neon_vqaddu : Neon_2Arg_Intrinsic;
|
||||
|
||||
// Scalar Sub
|
||||
def int_aarch64_neon_vsubds :
|
||||
@ -206,9 +203,6 @@ def int_aarch64_neon_vsubds :
|
||||
def int_aarch64_neon_vsubdu :
|
||||
Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
|
||||
|
||||
// Scalar Saturating Sub (Signed, Unsigned)
|
||||
def int_aarch64_neon_vqsubs : Neon_2Arg_Intrinsic;
|
||||
def int_aarch64_neon_vqsubu : Neon_2Arg_Intrinsic;
|
||||
|
||||
// Scalar Shift
|
||||
// Scalar Shift Left
|
||||
@ -324,9 +318,6 @@ def int_aarch64_neon_vqdmlal : Neon_3Arg_Long_Intrinsic;
|
||||
// Signed Saturating Doubling Multiply-Subtract Long
|
||||
def int_aarch64_neon_vqdmlsl : Neon_3Arg_Long_Intrinsic;
|
||||
|
||||
// Signed Saturating Doubling Multiply Long
|
||||
def int_aarch64_neon_vqdmull : Neon_2Arg_Long_Intrinsic;
|
||||
|
||||
class Neon_2Arg_ShiftImm_Intrinsic
|
||||
: Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
|
@ -4958,22 +4958,16 @@ defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>;
|
||||
defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>;
|
||||
defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>;
|
||||
|
||||
// Patterns to match llvm.arm.* intrinsic for
|
||||
// Scalar Integer Saturating Add, Sub (Signed, Unsigned)
|
||||
defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqadds, SQADDddd>;
|
||||
defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqaddu, UQADDddd>;
|
||||
defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqsubs, SQSUBddd>;
|
||||
defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqsubu, UQSUBddd>;
|
||||
|
||||
// Patterns to match llvm.aarch64.* intrinsic for
|
||||
// Scalar Integer Saturating Add, Sub (Signed, Unsigned)
|
||||
defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqadds, SQADDbbb,
|
||||
defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqadds, SQADDbbb,
|
||||
SQADDhhh, SQADDsss, SQADDddd>;
|
||||
defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqaddu, UQADDbbb,
|
||||
defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqaddu, UQADDbbb,
|
||||
UQADDhhh, UQADDsss, UQADDddd>;
|
||||
defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqsubs, SQSUBbbb,
|
||||
defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqsubs, SQSUBbbb,
|
||||
SQSUBhhh, SQSUBsss, SQSUBddd>;
|
||||
defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqsubu, UQSUBbbb,
|
||||
defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqsubu, UQSUBbbb,
|
||||
UQSUBhhh, UQSUBsss, UQSUBddd>;
|
||||
|
||||
// Scalar Integer Saturating Doubling Multiply Half High
|
||||
@ -5093,7 +5087,7 @@ defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlsl,
|
||||
|
||||
// Signed Saturating Doubling Multiply Long
|
||||
defm SQDMULL : NeonI_Scalar3Diff_HS_size<0b0, 0b1101, "sqdmull">;
|
||||
defm : Neon_Scalar3Diff_HS_size_patterns<int_aarch64_neon_vqdmull,
|
||||
defm : Neon_Scalar3Diff_HS_size_patterns<int_arm_neon_vqdmull,
|
||||
SQDMULLshh, SQDMULLdss>;
|
||||
|
||||
// Scalar Signed Integer Convert To Floating-point
|
||||
@ -5564,7 +5558,8 @@ multiclass Neon_ScalarXIndexedElem_FMA_Patterns<
|
||||
OpNImm:$Imm))>;
|
||||
}
|
||||
|
||||
// Scalar Floating Point fused multiply-add and multiply-subtract (scalar, by element)
|
||||
// Scalar Floating Point fused multiply-add and
|
||||
// multiply-subtract (scalar, by element)
|
||||
defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAssv_4S, FMLSssv_4S,
|
||||
f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
|
||||
defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D,
|
||||
@ -5572,6 +5567,70 @@ defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D,
|
||||
defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D,
|
||||
f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
|
||||
|
||||
// Scalar Signed saturating doubling multiply long (scalar, by element)
|
||||
def SQDMULLshv_4H : NeonI_ScalarXIndexedElemArith<"sqdmull",
|
||||
0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
|
||||
let Inst{11} = 0b0; // h
|
||||
let Inst{21} = Imm{1}; // l
|
||||
let Inst{20} = Imm{0}; // m
|
||||
let Inst{19-16} = MRm{3-0};
|
||||
}
|
||||
def SQDMULLshv_8H : NeonI_ScalarXIndexedElemArith<"sqdmull",
|
||||
0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
|
||||
let Inst{11} = Imm{2}; // h
|
||||
let Inst{21} = Imm{1}; // l
|
||||
let Inst{20} = Imm{0}; // m
|
||||
let Inst{19-16} = MRm{3-0};
|
||||
}
|
||||
def SQDMULLdsv_2S : NeonI_ScalarXIndexedElemArith<"sqdmull",
|
||||
0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
|
||||
let Inst{11} = 0b0; // h
|
||||
let Inst{21} = Imm{0}; // l
|
||||
let Inst{20-16} = MRm;
|
||||
}
|
||||
def SQDMULLdsv_4S : NeonI_ScalarXIndexedElemArith<"sqdmull",
|
||||
0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
|
||||
let Inst{11} = Imm{1}; // h
|
||||
let Inst{21} = Imm{0}; // l
|
||||
let Inst{20-16} = MRm;
|
||||
}
|
||||
|
||||
multiclass Neon_ScalarXIndexedElem_MUL_Patterns<
|
||||
SDPatternOperator opnode,
|
||||
Instruction INST,
|
||||
ValueType ResTy, RegisterClass FPRC,
|
||||
ValueType OpVTy, ValueType OpTy,
|
||||
ValueType VecOpTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> {
|
||||
|
||||
def : Pat<(ResTy (opnode (OpVTy FPRC:$Rn),
|
||||
(OpVTy (scalar_to_vector
|
||||
(ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))))),
|
||||
(ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
|
||||
|
||||
//swapped operands
|
||||
def : Pat<(ResTy (opnode
|
||||
(OpVTy (scalar_to_vector
|
||||
(ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))),
|
||||
(OpVTy FPRC:$Rn))),
|
||||
(ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
|
||||
}
|
||||
|
||||
|
||||
// Patterns for Scalar Signed saturating doubling
|
||||
// multiply long (scalar, by element)
|
||||
defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
|
||||
SQDMULLshv_4H, v1i32, FPR16, v1i16, i16, v4i16,
|
||||
i32, VPR64Lo, neon_uimm2_bare>;
|
||||
defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
|
||||
SQDMULLshv_8H, v1i32, FPR16, v1i16, i16, v8i16,
|
||||
i32, VPR128Lo, neon_uimm3_bare>;
|
||||
defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
|
||||
SQDMULLdsv_2S, v1i64, FPR32, v1i32, i32, v2i32,
|
||||
i32, VPR64Lo, neon_uimm1_bare>;
|
||||
defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
|
||||
SQDMULLdsv_4S, v1i64, FPR32, v1i32, i32, v4i32,
|
||||
i32, VPR128Lo, neon_uimm2_bare>;
|
||||
|
||||
// Scalar Signed saturating doubling multiply-add long (scalar, by element)
|
||||
def SQDMLALshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
|
||||
0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
|
||||
@ -5629,34 +5688,64 @@ def SQDMLSLdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
|
||||
let Inst{20-16} = MRm;
|
||||
}
|
||||
|
||||
// Scalar Signed saturating doubling multiply long (scalar, by element)
|
||||
def SQDMULLshv_4H : NeonI_ScalarXIndexedElemArith<"sqdmull",
|
||||
0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
|
||||
let Inst{11} = 0b0; // h
|
||||
let Inst{21} = Imm{1}; // l
|
||||
let Inst{20} = Imm{0}; // m
|
||||
let Inst{19-16} = MRm{3-0};
|
||||
}
|
||||
def SQDMULLshv_8H : NeonI_ScalarXIndexedElemArith<"sqdmull",
|
||||
0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
|
||||
let Inst{11} = Imm{2}; // h
|
||||
let Inst{21} = Imm{1}; // l
|
||||
let Inst{20} = Imm{0}; // m
|
||||
let Inst{19-16} = MRm{3-0};
|
||||
}
|
||||
def SQDMULLdsv_2S : NeonI_ScalarXIndexedElemArith<"sqdmull",
|
||||
0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
|
||||
let Inst{11} = 0b0; // h
|
||||
let Inst{21} = Imm{0}; // l
|
||||
let Inst{20-16} = MRm;
|
||||
}
|
||||
def SQDMULLdsv_4S : NeonI_ScalarXIndexedElemArith<"sqdmull",
|
||||
0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
|
||||
let Inst{11} = Imm{1}; // h
|
||||
let Inst{21} = Imm{0}; // l
|
||||
let Inst{20-16} = MRm;
|
||||
multiclass Neon_ScalarXIndexedElem_MLAL_Patterns<
|
||||
SDPatternOperator opnode,
|
||||
SDPatternOperator coreopnode,
|
||||
Instruction INST,
|
||||
ValueType ResTy, RegisterClass ResFPRC, RegisterClass FPRC,
|
||||
ValueType OpTy,
|
||||
ValueType OpVTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> {
|
||||
|
||||
def : Pat<(ResTy (opnode
|
||||
(ResTy ResFPRC:$Ra),
|
||||
(ResTy (coreopnode (OpTy FPRC:$Rn),
|
||||
(OpTy (scalar_to_vector
|
||||
(ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))))))),
|
||||
(ResTy (INST (ResTy ResFPRC:$Ra),
|
||||
(OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
|
||||
|
||||
// swapped operands
|
||||
def : Pat<(ResTy (opnode
|
||||
(ResTy ResFPRC:$Ra),
|
||||
(ResTy (coreopnode
|
||||
(OpTy (scalar_to_vector
|
||||
(ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))),
|
||||
(OpTy FPRC:$Rn))))),
|
||||
(ResTy (INST (ResTy ResFPRC:$Ra),
|
||||
(OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
|
||||
}
|
||||
|
||||
// Patterns for Scalar Signed saturating
|
||||
// doubling multiply-add long (scalar, by element)
|
||||
defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
|
||||
int_arm_neon_vqdmull, SQDMLALshv_4H, v1i32, FPR32, FPR16, v1i16, v4i16,
|
||||
i32, VPR64Lo, neon_uimm2_bare>;
|
||||
defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
|
||||
int_arm_neon_vqdmull, SQDMLALshv_8H, v1i32, FPR32, FPR16, v1i16, v8i16,
|
||||
i32, VPR128Lo, neon_uimm3_bare>;
|
||||
defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
|
||||
int_arm_neon_vqdmull, SQDMLALdsv_2S, v1i64, FPR64, FPR32, v1i32, v2i32,
|
||||
i32, VPR64Lo, neon_uimm1_bare>;
|
||||
defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
|
||||
int_arm_neon_vqdmull, SQDMLALdsv_4S, v1i64, FPR64, FPR32, v1i32, v4i32,
|
||||
i32, VPR128Lo, neon_uimm2_bare>;
|
||||
|
||||
// Patterns for Scalar Signed saturating
|
||||
// doubling multiply-sub long (scalar, by element)
|
||||
defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
|
||||
int_arm_neon_vqdmull, SQDMLSLshv_4H, v1i32, FPR32, FPR16, v1i16, v4i16,
|
||||
i32, VPR64Lo, neon_uimm2_bare>;
|
||||
defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
|
||||
int_arm_neon_vqdmull, SQDMLSLshv_8H, v1i32, FPR32, FPR16, v1i16, v8i16,
|
||||
i32, VPR128Lo, neon_uimm3_bare>;
|
||||
defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
|
||||
int_arm_neon_vqdmull, SQDMLSLdsv_2S, v1i64, FPR64, FPR32, v1i32, v2i32,
|
||||
i32, VPR64Lo, neon_uimm1_bare>;
|
||||
defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
|
||||
int_arm_neon_vqdmull, SQDMLSLdsv_4S, v1i64, FPR64, FPR32, v1i32, v4i32,
|
||||
i32, VPR128Lo, neon_uimm2_bare>;
|
||||
|
||||
|
||||
// Scalar Signed saturating doubling multiply returning
|
||||
// high half (scalar, by element)
|
||||
def SQDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
|
||||
@ -5686,6 +5775,21 @@ def SQDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
|
||||
let Inst{20-16} = MRm;
|
||||
}
|
||||
|
||||
// Patterns for Scalar Signed saturating doubling multiply returning
|
||||
// high half (scalar, by element)
|
||||
defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
|
||||
SQDMULHhhv_4H, v1i16, FPR16, v1i16, i16, v4i16,
|
||||
i32, VPR64Lo, neon_uimm2_bare>;
|
||||
defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
|
||||
SQDMULHhhv_8H, v1i16, FPR16, v1i16, i16, v8i16,
|
||||
i32, VPR128Lo, neon_uimm3_bare>;
|
||||
defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
|
||||
SQDMULHssv_2S, v1i32, FPR32, v1i32, i32, v2i32,
|
||||
i32, VPR64Lo, neon_uimm1_bare>;
|
||||
defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
|
||||
SQDMULHssv_4S, v1i32, FPR32, v1i32, i32, v4i32,
|
||||
i32, VPR128Lo, neon_uimm2_bare>;
|
||||
|
||||
// Scalar Signed saturating rounding doubling multiply
|
||||
// returning high half (scalar, by element)
|
||||
def SQRDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
|
||||
@ -5715,6 +5819,18 @@ def SQRDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
|
||||
let Inst{20-16} = MRm;
|
||||
}
|
||||
|
||||
defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
|
||||
SQRDMULHhhv_4H, v1i16, FPR16, v1i16, i16, v4i16, i32,
|
||||
VPR64Lo, neon_uimm2_bare>;
|
||||
defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
|
||||
SQRDMULHhhv_8H, v1i16, FPR16, v1i16, i16, v8i16, i32,
|
||||
VPR128Lo, neon_uimm3_bare>;
|
||||
defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
|
||||
SQRDMULHssv_2S, v1i32, FPR32, v1i32, i32, v2i32, i32,
|
||||
VPR64Lo, neon_uimm1_bare>;
|
||||
defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
|
||||
SQRDMULHssv_4S, v1i32, FPR32, v1i32, i32, v4i32, i32,
|
||||
VPR128Lo, neon_uimm2_bare>;
|
||||
|
||||
// Scalar Copy - DUP element to scalar
|
||||
class NeonI_Scalar_DUP<string asmop, string asmlane,
|
||||
|
@ -123,7 +123,7 @@ define i32 @test_vqdmullh_s16(i16 %a, i16 %b) {
|
||||
entry:
|
||||
%vqdmull.i = insertelement <1 x i16> undef, i16 %a, i32 0
|
||||
%vqdmull1.i = insertelement <1 x i16> undef, i16 %b, i32 0
|
||||
%vqdmull2.i = call <1 x i32> @llvm.aarch64.neon.vqdmull.v1i32(<1 x i16> %vqdmull.i, <1 x i16> %vqdmull1.i)
|
||||
%vqdmull2.i = call <1 x i32> @llvm.arm.neon.vqdmull.v1i32(<1 x i16> %vqdmull.i, <1 x i16> %vqdmull1.i)
|
||||
%0 = extractelement <1 x i32> %vqdmull2.i, i32 0
|
||||
ret i32 %0
|
||||
}
|
||||
@ -134,10 +134,10 @@ define i64 @test_vqdmulls_s32(i32 %a, i32 %b) {
|
||||
entry:
|
||||
%vqdmull.i = insertelement <1 x i32> undef, i32 %a, i32 0
|
||||
%vqdmull1.i = insertelement <1 x i32> undef, i32 %b, i32 0
|
||||
%vqdmull2.i = call <1 x i64> @llvm.aarch64.neon.vqdmull.v1i64(<1 x i32> %vqdmull.i, <1 x i32> %vqdmull1.i)
|
||||
%vqdmull2.i = call <1 x i64> @llvm.arm.neon.vqdmull.v1i64(<1 x i32> %vqdmull.i, <1 x i32> %vqdmull1.i)
|
||||
%0 = extractelement <1 x i64> %vqdmull2.i, i32 0
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
declare <1 x i32> @llvm.aarch64.neon.vqdmull.v1i32(<1 x i16>, <1 x i16>)
|
||||
declare <1 x i64> @llvm.aarch64.neon.vqdmull.v1i64(<1 x i32>, <1 x i32>)
|
||||
declare <1 x i32> @llvm.arm.neon.vqdmull.v1i32(<1 x i16>, <1 x i16>)
|
||||
declare <1 x i64> @llvm.arm.neon.vqdmull.v1i64(<1 x i32>, <1 x i32>)
|
||||
|
@ -1,171 +1,138 @@
|
||||
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
|
||||
|
||||
declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_uqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_uqadd_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: uqadd d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_sqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_sqadd_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: sqadd d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_uqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_uqsub_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: uqsub d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_sqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_sqsub_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: sqsub d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i8> @llvm.aarch64.neon.vqaddu.v1i8(<1 x i8>, <1 x i8>)
|
||||
declare <1 x i8> @llvm.aarch64.neon.vqadds.v1i8(<1 x i8>, <1 x i8>)
|
||||
declare <1 x i8> @llvm.arm.neon.vqaddu.v1i8(<1 x i8>, <1 x i8>)
|
||||
declare <1 x i8> @llvm.arm.neon.vqadds.v1i8(<1 x i8>, <1 x i8>)
|
||||
|
||||
define <1 x i8> @test_uqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
|
||||
; CHECK: test_uqadd_v1i8_aarch64:
|
||||
%tmp1 = call <1 x i8> @llvm.aarch64.neon.vqaddu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
|
||||
%tmp1 = call <1 x i8> @llvm.arm.neon.vqaddu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
|
||||
;CHECK: uqadd {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
|
||||
ret <1 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i8> @test_sqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
|
||||
; CHECK: test_sqadd_v1i8_aarch64:
|
||||
%tmp1 = call <1 x i8> @llvm.aarch64.neon.vqadds.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
|
||||
%tmp1 = call <1 x i8> @llvm.arm.neon.vqadds.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
|
||||
;CHECK: sqadd {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
|
||||
ret <1 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i8> @llvm.aarch64.neon.vqsubu.v1i8(<1 x i8>, <1 x i8>)
|
||||
declare <1 x i8> @llvm.aarch64.neon.vqsubs.v1i8(<1 x i8>, <1 x i8>)
|
||||
declare <1 x i8> @llvm.arm.neon.vqsubu.v1i8(<1 x i8>, <1 x i8>)
|
||||
declare <1 x i8> @llvm.arm.neon.vqsubs.v1i8(<1 x i8>, <1 x i8>)
|
||||
|
||||
define <1 x i8> @test_uqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
|
||||
; CHECK: test_uqsub_v1i8_aarch64:
|
||||
%tmp1 = call <1 x i8> @llvm.aarch64.neon.vqsubu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
|
||||
%tmp1 = call <1 x i8> @llvm.arm.neon.vqsubu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
|
||||
;CHECK: uqsub {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
|
||||
ret <1 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i8> @test_sqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
|
||||
; CHECK: test_sqsub_v1i8_aarch64:
|
||||
%tmp1 = call <1 x i8> @llvm.aarch64.neon.vqsubs.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
|
||||
%tmp1 = call <1 x i8> @llvm.arm.neon.vqsubs.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
|
||||
;CHECK: sqsub {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
|
||||
ret <1 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i16> @llvm.aarch64.neon.vqaddu.v1i16(<1 x i16>, <1 x i16>)
|
||||
declare <1 x i16> @llvm.aarch64.neon.vqadds.v1i16(<1 x i16>, <1 x i16>)
|
||||
declare <1 x i16> @llvm.arm.neon.vqaddu.v1i16(<1 x i16>, <1 x i16>)
|
||||
declare <1 x i16> @llvm.arm.neon.vqadds.v1i16(<1 x i16>, <1 x i16>)
|
||||
|
||||
define <1 x i16> @test_uqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
|
||||
; CHECK: test_uqadd_v1i16_aarch64:
|
||||
%tmp1 = call <1 x i16> @llvm.aarch64.neon.vqaddu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
|
||||
%tmp1 = call <1 x i16> @llvm.arm.neon.vqaddu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
|
||||
;CHECK: uqadd {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
|
||||
ret <1 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i16> @test_sqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
|
||||
; CHECK: test_sqadd_v1i16_aarch64:
|
||||
%tmp1 = call <1 x i16> @llvm.aarch64.neon.vqadds.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
|
||||
%tmp1 = call <1 x i16> @llvm.arm.neon.vqadds.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
|
||||
;CHECK: sqadd {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
|
||||
ret <1 x i16> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i16> @llvm.aarch64.neon.vqsubu.v1i16(<1 x i16>, <1 x i16>)
|
||||
declare <1 x i16> @llvm.aarch64.neon.vqsubs.v1i16(<1 x i16>, <1 x i16>)
|
||||
declare <1 x i16> @llvm.arm.neon.vqsubu.v1i16(<1 x i16>, <1 x i16>)
|
||||
declare <1 x i16> @llvm.arm.neon.vqsubs.v1i16(<1 x i16>, <1 x i16>)
|
||||
|
||||
define <1 x i16> @test_uqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
|
||||
; CHECK: test_uqsub_v1i16_aarch64:
|
||||
%tmp1 = call <1 x i16> @llvm.aarch64.neon.vqsubu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
|
||||
%tmp1 = call <1 x i16> @llvm.arm.neon.vqsubu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
|
||||
;CHECK: uqsub {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
|
||||
ret <1 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i16> @test_sqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
|
||||
; CHECK: test_sqsub_v1i16_aarch64:
|
||||
%tmp1 = call <1 x i16> @llvm.aarch64.neon.vqsubs.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
|
||||
%tmp1 = call <1 x i16> @llvm.arm.neon.vqsubs.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
|
||||
;CHECK: sqsub {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
|
||||
ret <1 x i16> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i32> @llvm.aarch64.neon.vqaddu.v1i32(<1 x i32>, <1 x i32>)
|
||||
declare <1 x i32> @llvm.aarch64.neon.vqadds.v1i32(<1 x i32>, <1 x i32>)
|
||||
declare <1 x i32> @llvm.arm.neon.vqaddu.v1i32(<1 x i32>, <1 x i32>)
|
||||
declare <1 x i32> @llvm.arm.neon.vqadds.v1i32(<1 x i32>, <1 x i32>)
|
||||
|
||||
define <1 x i32> @test_uqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
|
||||
; CHECK: test_uqadd_v1i32_aarch64:
|
||||
%tmp1 = call <1 x i32> @llvm.aarch64.neon.vqaddu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
|
||||
%tmp1 = call <1 x i32> @llvm.arm.neon.vqaddu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
|
||||
;CHECK: uqadd {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
|
||||
ret <1 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i32> @test_sqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
|
||||
; CHECK: test_sqadd_v1i32_aarch64:
|
||||
%tmp1 = call <1 x i32> @llvm.aarch64.neon.vqadds.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
|
||||
%tmp1 = call <1 x i32> @llvm.arm.neon.vqadds.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
|
||||
;CHECK: sqadd {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
|
||||
ret <1 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i32> @llvm.aarch64.neon.vqsubu.v1i32(<1 x i32>, <1 x i32>)
|
||||
declare <1 x i32> @llvm.aarch64.neon.vqsubs.v1i32(<1 x i32>, <1 x i32>)
|
||||
declare <1 x i32> @llvm.arm.neon.vqsubu.v1i32(<1 x i32>, <1 x i32>)
|
||||
declare <1 x i32> @llvm.arm.neon.vqsubs.v1i32(<1 x i32>, <1 x i32>)
|
||||
|
||||
define <1 x i32> @test_uqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
|
||||
; CHECK: test_uqsub_v1i32_aarch64:
|
||||
%tmp1 = call <1 x i32> @llvm.aarch64.neon.vqsubu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
|
||||
%tmp1 = call <1 x i32> @llvm.arm.neon.vqsubu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
|
||||
;CHECK: uqsub {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
|
||||
ret <1 x i32> %tmp1
|
||||
}
|
||||
|
||||
|
||||
define <1 x i32> @test_sqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
|
||||
; CHECK: test_sqsub_v1i32_aarch64:
|
||||
%tmp1 = call <1 x i32> @llvm.aarch64.neon.vqsubs.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
|
||||
%tmp1 = call <1 x i32> @llvm.arm.neon.vqsubs.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
|
||||
;CHECK: sqsub {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
|
||||
ret <1 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.aarch64.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.aarch64.neon.vqadds.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_uqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_uqadd_v1i64_aarch64:
|
||||
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
;CHECK: uqadd {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_sqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_sqadd_v1i64_aarch64:
|
||||
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
;CHECK: sqadd {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.aarch64.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.aarch64.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_uqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_uqsub_v1i64_aarch64:
|
||||
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
;CHECK: uqsub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_sqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_sqsub_v1i64_aarch64:
|
||||
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
;CHECK: sqsub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user