diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td index 8c550b7c859..6fbcfe8b669 100644 --- a/include/llvm/IR/IntrinsicsAArch64.td +++ b/include/llvm/IR/IntrinsicsAArch64.td @@ -133,6 +133,10 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". : Intrinsic<[llvm_anyvector_ty], [LLVMHalfElementsVectorType<0>, llvm_anyvector_ty], [IntrNoMem]>; + class AdvSIMD_2VectorArg_Lane_Intrinsic + : Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, llvm_anyint_ty, llvm_i32_ty], + [IntrNoMem]>; class AdvSIMD_3VectorArg_Intrinsic : Intrinsic<[llvm_anyvector_ty], @@ -207,9 +211,13 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in { // Vector Saturating Doubling Multiply High def int_aarch64_neon_sqdmulh : AdvSIMD_2IntArg_Intrinsic; + def int_aarch64_neon_sqdmulh_lane : AdvSIMD_2VectorArg_Lane_Intrinsic; + def int_aarch64_neon_sqdmulh_laneq : AdvSIMD_2VectorArg_Lane_Intrinsic; // Vector Saturating Rounding Doubling Multiply High def int_aarch64_neon_sqrdmulh : AdvSIMD_2IntArg_Intrinsic; + def int_aarch64_neon_sqrdmulh_lane : AdvSIMD_2VectorArg_Lane_Intrinsic; + def int_aarch64_neon_sqrdmulh_laneq : AdvSIMD_2VectorArg_Lane_Intrinsic; // Vector Polynominal Multiply def int_aarch64_neon_pmul : AdvSIMD_2VectorArg_Intrinsic; diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index d91ef35afec..db27a53963d 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -360,6 +360,9 @@ def am_indexed7s128 : ComplexPattern; def am_indexedu6s128 : ComplexPattern; def am_indexeds9s128 : ComplexPattern; +def UImmS1XForm : SDNodeXFormgetTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i64); +}]>; def UImmS2XForm : SDNodeXFormgetTargetConstant(N->getZExtValue() / 2, SDLoc(N), MVT::i64); }]>; @@ -7968,6 +7971,64 @@ multiclass SIMDFPIndexedTied opc, string asm> { } } +multiclass SIMDIndexedHSPatterns { + + def : Pat<(v4i16 (OpNodeLane + (v4i16 V64:$Rn), (v4i16 V64_lo:$Rm), + VectorIndexS32b:$idx)), + (!cast(NAME # v4i16_indexed) $Rn, + (SUBREG_TO_REG (i32 0), (v4i16 V64_lo:$Rm), dsub), + (UImmS1XForm $idx))>; + + def : Pat<(v4i16 (OpNodeLaneQ + (v4i16 V64:$Rn), (v8i16 V128_lo:$Rm), + VectorIndexH32b:$idx)), + (!cast(NAME # v4i16_indexed) $Rn, $Rm, + (UImmS1XForm $idx))>; + + def : Pat<(v8i16 (OpNodeLane + (v8i16 V128:$Rn), (v4i16 V64_lo:$Rm), + VectorIndexS32b:$idx)), + (!cast(NAME # v8i16_indexed) $Rn, + (SUBREG_TO_REG (i32 0), $Rm, dsub), + (UImmS1XForm $idx))>; + + def : Pat<(v8i16 (OpNodeLaneQ + (v8i16 V128:$Rn), (v8i16 V128_lo:$Rm), + VectorIndexH32b:$idx)), + (!cast(NAME # v8i16_indexed) $Rn, $Rm, + (UImmS1XForm $idx))>; + + def : Pat<(v2i32 (OpNodeLane + (v2i32 V64:$Rn), (v2i32 V64:$Rm), + VectorIndexD32b:$idx)), + (!cast(NAME # v2i32_indexed) $Rn, + (SUBREG_TO_REG (i32 0), (v2i32 V64_lo:$Rm), dsub), + (UImmS1XForm $idx))>; + + def : Pat<(v2i32 (OpNodeLaneQ + (v2i32 V64:$Rn), (v4i32 V128:$Rm), + VectorIndexS32b:$idx)), + (!cast(NAME # v2i32_indexed) $Rn, $Rm, + (UImmS1XForm $idx))>; + + def : Pat<(v4i32 (OpNodeLane + (v4i32 V128:$Rn), (v2i32 V64:$Rm), + VectorIndexD32b:$idx)), + (!cast(NAME # v4i32_indexed) $Rn, + (SUBREG_TO_REG (i32 0), $Rm, dsub), + (UImmS1XForm $idx))>; + + def : Pat<(v4i32 (OpNodeLaneQ + (v4i32 V128:$Rn), + (v4i32 V128:$Rm), + VectorIndexS32b:$idx)), + (!cast(NAME # v4i32_indexed) $Rn, $Rm, + (UImmS1XForm $idx))>; + +} + multiclass SIMDIndexedHS opc, string asm, SDPatternOperator OpNode> { def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, V64, V64, diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index c2853da050f..9cf1a5166fe 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -5631,6 +5631,11 @@ def : Pat<(v2f64 (fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))), defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>; defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>; +defm SQDMULH : SIMDIndexedHSPatterns; +defm SQRDMULH : SIMDIndexedHSPatterns; + // Generated by MachineCombine defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla", null_frag>; defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", null_frag>; diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp index 40efac261fd..58308378401 100644 --- a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp @@ -230,6 +230,7 @@ AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, case AArch64::FPR16RegClassID: case AArch64::FPR32RegClassID: case AArch64::FPR64RegClassID: + case AArch64::FPR64_loRegClassID: case AArch64::FPR128RegClassID: case AArch64::FPR128_loRegClassID: case AArch64::DDRegClassID: diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp index cdfbc0f5f69..61834874a86 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -596,6 +596,7 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, return 32; case AArch64::FPR128_loRegClassID: + case AArch64::FPR64_loRegClassID: return 16; } } diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td index f52feab0395..4d8939195b5 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/lib/Target/AArch64/AArch64RegisterInfo.td @@ -429,6 +429,10 @@ def FPR32 : RegisterClass<"AArch64", [f32, i32], 32,(sequence "S%u", 0, 31)>; def FPR64 : RegisterClass<"AArch64", [f64, i64, v2f32, v1f64, v8i8, v4i16, v2i32, v1i64, v4f16], 64, (sequence "D%u", 0, 31)>; +def FPR64_lo : RegisterClass<"AArch64", + [v8i8, v4i16, v2i32, v1i64, v4f16, v2f32, v1f64], + 64, (trunc FPR64, 16)>; + // We don't (yet) have an f128 legal type, so don't use that here. We // normalize 128-bit vectors to v2f64 for arg passing and such, so use // that here. @@ -503,6 +507,9 @@ def VectorRegLoAsmOperand : AsmOperandClass { let Name = "VectorRegLo"; let PredicateMethod = "isNeonVectorRegLo"; } +def V64_lo : RegisterOperand { + let ParserMatchClass = VectorRegLoAsmOperand; +} def V128_lo : RegisterOperand { let ParserMatchClass = VectorRegLoAsmOperand; } diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 5c7697a8bbc..fb09cc247a1 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -1033,8 +1033,10 @@ public: bool isNeonVectorRegLo() const { return Kind == k_Register && Reg.Kind == RegKind::NeonVector && - AArch64MCRegisterClasses[AArch64::FPR128_loRegClassID].contains( - Reg.RegNum); + (AArch64MCRegisterClasses[AArch64::FPR128_loRegClassID].contains( + Reg.RegNum) || + AArch64MCRegisterClasses[AArch64::FPR64_loRegClassID].contains( + Reg.RegNum)); } template bool isSVEVectorReg() const { diff --git a/test/CodeGen/AArch64/arm64-neon-2velem.ll b/test/CodeGen/AArch64/arm64-neon-2velem.ll index 26826789107..eee0d77d98e 100644 --- a/test/CodeGen/AArch64/arm64-neon-2velem.ll +++ b/test/CodeGen/AArch64/arm64-neon-2velem.ll @@ -9,20 +9,36 @@ declare <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float>, <4 x float>) declare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>) declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.lane.v4i32.v2i32(<4 x i32>, <2 x i32>, i32) +declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.laneq.v4i32.v4i32(<4 x i32>, <4 x i32>, i32) declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) +declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.lane.v2i32.v2i32(<2 x i32>, <2 x i32>, i32) +declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.laneq.v2i32.v4i32(<2 x i32>, <4 x i32>, i32) declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.lane.v8i16.v4i16(<8 x i16>, <4 x i16>, i32) +declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.laneq.v8i16.v8i16(<8 x i16>, <8 x i16>, i32) declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) +declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.lane.v4i16.v4i16(<4 x i16>, <4 x i16>, i32) +declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.laneq.v4i16.v8i16(<4 x i16>, <8 x i16>, i32) declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.neon.sqdmulh.lane.v4i32.v2i32(<4 x i32>, <2 x i32>, i32) +declare <4 x i32> @llvm.aarch64.neon.sqdmulh.laneq.v4i32.v4i32(<4 x i32>, <4 x i32>, i32) declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) +declare <2 x i32> @llvm.aarch64.neon.sqdmulh.lane.v2i32.v2i32(<2 x i32>, <2 x i32>, i32) +declare <2 x i32> @llvm.aarch64.neon.sqdmulh.laneq.v2i32.v4i32(<2 x i32>, <4 x i32>, i32) declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.aarch64.neon.sqdmulh.lane.v8i16.v4i16(<8 x i16>, <4 x i16>, i32) +declare <8 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v8i16.v8i16(<8 x i16>, <8 x i16>, i32) declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>) +declare <4 x i16> @llvm.aarch64.neon.sqdmulh.lane.v4i16.v4i16(<4 x i16>, <4 x i16>, i32) +declare <4 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v4i16.v8i16(<4 x i16>, <8 x i16>, i32) declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) @@ -1515,6 +1531,37 @@ entry: ret <4 x i16> %vqdmulh2.i } +define <4 x i16> @test_vqdmulh_lane_s16_intrinsic(<4 x i16> %a, <4 x i16> %v) { +; CHECK-LABEL: test_vqdmulh_lane_s16_intrinsic: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqdmulh v0.4h, v0.4h, v1.h[3] +; CHECK-NEXT: ret +entry: + %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.lane.v4i16.v4i16(<4 x i16> %a, <4 x i16> %v, i32 3) + ret <4 x i16> %vqdmulh2.i +} + +define <4 x i16> @test_vqdmulh_laneq_s16_intrinsic_lo(<4 x i16> %a, <8 x i16> %v) { +; CHECK-LABEL: test_vqdmulh_laneq_s16_intrinsic_lo: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqdmulh v0.4h, v0.4h, v1.h[3] +; CHECK-NEXT: ret +entry: + %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v4i16.v8i16(<4 x i16> %a, <8 x i16> %v, i32 3) + ret <4 x i16> %vqdmulh2.i +} + +define <4 x i16> @test_vqdmulh_laneq_s16_intrinsic_hi(<4 x i16> %a, <8 x i16> %v) { +; CHECK-LABEL: test_vqdmulh_laneq_s16_intrinsic_hi: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqdmulh v0.4h, v0.4h, v1.h[7] +; CHECK-NEXT: ret +entry: + %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v4i16.v8i16(<4 x i16> %a, <8 x i16> %v, i32 7) + ret <4 x i16> %vqdmulh2.i +} + define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmulhq_lane_s16: ; CHECK: // %bb.0: // %entry @@ -1527,6 +1574,37 @@ entry: ret <8 x i16> %vqdmulh2.i } +define <8 x i16> @test_vqdmulhq_lane_s16_intrinsic(<8 x i16> %a, <4 x i16> %v) { +; CHECK-LABEL: test_vqdmulhq_lane_s16_intrinsic: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqdmulh v0.8h, v0.8h, v1.h[3] +; CHECK-NEXT: ret +entry: + %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.lane.v8i16.v4i16(<8 x i16> %a, <4 x i16> %v, i32 3) + ret <8 x i16> %vqdmulh2.i +} + +define <8 x i16> @test_vqdmulhq_laneq_s16_intrinsic_lo(<8 x i16> %a, <8 x i16> %v) { +; CHECK-LABEL: test_vqdmulhq_laneq_s16_intrinsic_lo: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqdmulh v0.8h, v0.8h, v1.h[3] +; CHECK-NEXT: ret +entry: + %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v8i16.v8i16(<8 x i16> %a, <8 x i16> %v, i32 3) + ret <8 x i16> %vqdmulh2.i +} + +define <8 x i16> @test_vqdmulhq_laneq_s16_intrinsic_hi(<8 x i16> %a, <8 x i16> %v) { +; CHECK-LABEL: test_vqdmulhq_laneq_s16_intrinsic_hi: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqdmulh v0.8h, v0.8h, v1.h[7] +; CHECK-NEXT: ret +entry: + %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v8i16.v8i16(<8 x i16> %a, <8 x i16> %v, i32 7) + ret <8 x i16> %vqdmulh2.i +} + define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmulh_lane_s32: ; CHECK: // %bb.0: // %entry @@ -1539,6 +1617,37 @@ entry: ret <2 x i32> %vqdmulh2.i } +define <2 x i32> @test_vqdmulh_lane_s32_intrinsic(<2 x i32> %a, <2 x i32> %v) { +; CHECK-LABEL: test_vqdmulh_lane_s32_intrinsic: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqdmulh v0.2s, v0.2s, v1.s[1] +; CHECK-NEXT: ret +entry: + %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.lane.v2i32.v2i32(<2 x i32> %a, <2 x i32> %v, i32 1) + ret <2 x i32> %vqdmulh2.i +} + +define <2 x i32> @test_vqdmulh_laneq_s32_intrinsic_lo(<2 x i32> %a, <4 x i32> %v) { +; CHECK-LABEL: test_vqdmulh_laneq_s32_intrinsic_lo: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqdmulh v0.2s, v0.2s, v1.s[1] +; CHECK-NEXT: ret +entry: + %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.laneq.v2i32.v4i32(<2 x i32> %a, <4 x i32> %v, i32 1) + ret <2 x i32> %vqdmulh2.i +} + +define <2 x i32> @test_vqdmulh_laneq_s32_intrinsic_hi(<2 x i32> %a, <4 x i32> %v) { +; CHECK-LABEL: test_vqdmulh_laneq_s32_intrinsic_hi: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqdmulh v0.2s, v0.2s, v1.s[3] +; CHECK-NEXT: ret +entry: + %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.laneq.v2i32.v4i32(<2 x i32> %a, <4 x i32> %v, i32 3) + ret <2 x i32> %vqdmulh2.i +} + define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmulhq_lane_s32: ; CHECK: // %bb.0: // %entry @@ -1551,6 +1660,37 @@ entry: ret <4 x i32> %vqdmulh2.i } +define <4 x i32> @test_vqdmulhq_lane_s32_intrinsic(<4 x i32> %a, <2 x i32> %v) { +; CHECK-LABEL: test_vqdmulhq_lane_s32_intrinsic: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqdmulh v0.4s, v0.4s, v1.s[1] +; CHECK-NEXT: ret +entry: + %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.lane.v4i32.v2i32(<4 x i32> %a, <2 x i32> %v, i32 1) + ret <4 x i32> %vqdmulh2.i +} + +define <4 x i32> @test_vqdmulhq_laneq_s32_intrinsic_lo(<4 x i32> %a, <4 x i32> %v) { +; CHECK-LABEL: test_vqdmulhq_laneq_s32_intrinsic_lo: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqdmulh v0.4s, v0.4s, v1.s[1] +; CHECK-NEXT: ret +entry: + %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.laneq.v4i32.v4i32(<4 x i32> %a, <4 x i32> %v, i32 1) + ret <4 x i32> %vqdmulh2.i +} + +define <4 x i32> @test_vqdmulhq_laneq_s32_intrinsic_hi(<4 x i32> %a, <4 x i32> %v) { +; CHECK-LABEL: test_vqdmulhq_laneq_s32_intrinsic_hi: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqdmulh v0.4s, v0.4s, v1.s[3] +; CHECK-NEXT: ret +entry: + %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.laneq.v4i32.v4i32(<4 x i32> %a, <4 x i32> %v, i32 3) + ret <4 x i32> %vqdmulh2.i +} + define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqrdmulh_lane_s16: ; CHECK: // %bb.0: // %entry @@ -1563,6 +1703,37 @@ entry: ret <4 x i16> %vqrdmulh2.i } +define <4 x i16> @test_vqrdmulh_lane_s16_intrinsic(<4 x i16> %a, <4 x i16> %v) { +; CHECK-LABEL: test_vqrdmulh_lane_s16_intrinsic: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqrdmulh v0.4h, v0.4h, v1.h[3] +; CHECK-NEXT: ret +entry: + %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.lane.v4i16.v4i16(<4 x i16> %a, <4 x i16> %v, i32 3) + ret <4 x i16> %vqrdmulh2.i +} + +define <4 x i16> @test_vqrdmulh_laneq_s16_intrinsic_lo(<4 x i16> %a, <8 x i16> %v) { +; CHECK-LABEL: test_vqrdmulh_laneq_s16_intrinsic_lo: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqrdmulh v0.4h, v0.4h, v1.h[3] +; CHECK-NEXT: ret +entry: + %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.laneq.v4i16.v8i16(<4 x i16> %a, <8 x i16> %v, i32 3) + ret <4 x i16> %vqrdmulh2.i +} + +define <4 x i16> @test_vqrdmulh_laneq_s16_intrinsic_hi(<4 x i16> %a, <8 x i16> %v) { +; CHECK-LABEL: test_vqrdmulh_laneq_s16_intrinsic_hi: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqrdmulh v0.4h, v0.4h, v1.h[7] +; CHECK-NEXT: ret +entry: + %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.laneq.v4i16.v8i16(<4 x i16> %a, <8 x i16> %v, i32 7) + ret <4 x i16> %vqrdmulh2.i +} + define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqrdmulhq_lane_s16: ; CHECK: // %bb.0: // %entry @@ -1575,6 +1746,37 @@ entry: ret <8 x i16> %vqrdmulh2.i } +define <8 x i16> @test_vqrdmulhq_lane_s16_intrinsic(<8 x i16> %a, <4 x i16> %v) { +; CHECK-LABEL: test_vqrdmulhq_lane_s16_intrinsic: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqrdmulh v0.8h, v0.8h, v1.h[3] +; CHECK-NEXT: ret +entry: + %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.lane.v8i16.v4i16(<8 x i16> %a, <4 x i16> %v, i32 3) + ret <8 x i16> %vqrdmulh2.i +} + +define <8 x i16> @test_vqrdmulhq_laneq_s16_intrinsic_lo(<8 x i16> %a, <8 x i16> %v) { +; CHECK-LABEL: test_vqrdmulhq_laneq_s16_intrinsic_lo: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqrdmulh v0.8h, v0.8h, v1.h[3] +; CHECK-NEXT: ret +entry: + %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.laneq.v8i16.v8i16(<8 x i16> %a, <8 x i16> %v, i32 3) + ret <8 x i16> %vqrdmulh2.i +} + +define <8 x i16> @test_vqrdmulhq_laneq_s16_intrinsic_hi(<8 x i16> %a, <8 x i16> %v) { +; CHECK-LABEL: test_vqrdmulhq_laneq_s16_intrinsic_hi: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqrdmulh v0.8h, v0.8h, v1.h[7] +; CHECK-NEXT: ret +entry: + %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.laneq.v8i16.v8i16(<8 x i16> %a, <8 x i16> %v, i32 7) + ret <8 x i16> %vqrdmulh2.i +} + define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vqrdmulh_lane_s32: ; CHECK: // %bb.0: // %entry @@ -1587,6 +1789,37 @@ entry: ret <2 x i32> %vqrdmulh2.i } +define <2 x i32> @test_vqrdmulh_lane_s32_intrinsic(<2 x i32> %a, <2 x i32> %v) { +; CHECK-LABEL: test_vqrdmulh_lane_s32_intrinsic: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqrdmulh v0.2s, v0.2s, v1.s[1] +; CHECK-NEXT: ret +entry: + %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.lane.v2i32.v2i32(<2 x i32> %a, <2 x i32> %v, i32 1) + ret <2 x i32> %vqrdmulh2.i +} + +define <2 x i32> @test_vqrdmulh_laneq_s32_intrinsic_lo(<2 x i32> %a, <4 x i32> %v) { +; CHECK-LABEL: test_vqrdmulh_laneq_s32_intrinsic_lo: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqrdmulh v0.2s, v0.2s, v1.s[1] +; CHECK-NEXT: ret +entry: + %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.laneq.v2i32.v4i32(<2 x i32> %a, <4 x i32> %v, i32 1) + ret <2 x i32> %vqrdmulh2.i +} + +define <2 x i32> @test_vqrdmulh_laneq_s32_intrinsic_hi(<2 x i32> %a, <4 x i32> %v) { +; CHECK-LABEL: test_vqrdmulh_laneq_s32_intrinsic_hi: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqrdmulh v0.2s, v0.2s, v1.s[3] +; CHECK-NEXT: ret +entry: + %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.laneq.v2i32.v4i32(<2 x i32> %a, <4 x i32> %v, i32 3) + ret <2 x i32> %vqrdmulh2.i +} + define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vqrdmulhq_lane_s32: ; CHECK: // %bb.0: // %entry @@ -1599,6 +1832,37 @@ entry: ret <4 x i32> %vqrdmulh2.i } +define <4 x i32> @test_vqrdmulhq_lane_s32_intrinsic(<4 x i32> %a, <2 x i32> %v) { +; CHECK-LABEL: test_vqrdmulhq_lane_s32_intrinsic: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqrdmulh v0.4s, v0.4s, v1.s[1] +; CHECK-NEXT: ret +entry: + %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.lane.v4i32.v2i32(<4 x i32> %a, <2 x i32> %v, i32 1) + ret <4 x i32> %vqrdmulh2.i +} + +define <4 x i32> @test_vqrdmulhq_laneq_s32_intrinsic_lo(<4 x i32> %a, <4 x i32> %v) { +; CHECK-LABEL: test_vqrdmulhq_laneq_s32_intrinsic_lo: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqrdmulh v0.4s, v0.4s, v1.s[1] +; CHECK-NEXT: ret +entry: + %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.laneq.v4i32.v4i32(<4 x i32> %a, <4 x i32> %v, i32 1) + ret <4 x i32> %vqrdmulh2.i +} + +define <4 x i32> @test_vqrdmulhq_laneq_s32_intrinsic_hi(<4 x i32> %a, <4 x i32> %v) { +; CHECK-LABEL: test_vqrdmulhq_laneq_s32_intrinsic_hi: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqrdmulh v0.4s, v0.4s, v1.s[3] +; CHECK-NEXT: ret +entry: + %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.laneq.v4i32.v4i32(<4 x i32> %a, <4 x i32> %v, i32 3) + ret <4 x i32> %vqrdmulh2.i +} + define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) { ; CHECK-LABEL: test_vmul_lane_f32: ; CHECK: // %bb.0: // %entry