diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index fef39a61c5b..2df268eea2d 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -453,13 +453,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse2_psrai_d : GCCBuiltin<"__builtin_ia32_psradi128">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; - - def int_x86_sse2_psll_dq : - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, - llvm_i32_ty], [IntrNoMem]>; - def int_x86_sse2_psrl_dq : - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, - llvm_i32_ty], [IntrNoMem]>; } // Conversion ops @@ -1580,13 +1573,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx2_psll_dq : - Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, - llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx2_psrl_dq : - Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, - llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi512">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp index aefe69d2e60..0da77844752 100644 --- a/lib/IR/AutoUpgrade.cpp +++ b/lib/IR/AutoUpgrade.cpp @@ -163,6 +163,10 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name == "x86.avx.vbroadcast.ss" || Name == "x86.avx.vbroadcast.ss.256" || Name == "x86.avx.vbroadcast.sd.256" || + Name == "x86.sse2.psll.dq" || + Name == "x86.sse2.psrl.dq" || + Name == "x86.avx2.psll.dq" || + Name == "x86.avx2.psrl.dq" || Name == "x86.sse2.psll.dq.bs" || Name == "x86.sse2.psrl.dq.bs" || Name == "x86.avx2.psll.dq.bs" || @@ -372,6 +376,80 @@ static MetadataAsValue *getExpression(Value *VarOperand, Function *F) { return MetadataAsValue::get(F->getContext(), Expr); } +// Handles upgrading SSE2 and AVX2 PSLLDQ intrinsics by converting them +// to byte shuffles. +static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C, + Value *Op, unsigned NumLanes, + unsigned Shift) { + // Each lane is 16 bytes. + unsigned NumElts = NumLanes * 16; + + // Bitcast from a 64-bit element type to a byte element type. + Op = Builder.CreateBitCast(Op, + VectorType::get(Type::getInt8Ty(C), NumElts), + "cast"); + // We'll be shuffling in zeroes. + Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0)); + + // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, + // we'll just return the zero vector. + if (Shift < 16) { + SmallVector Idxs; + // 256-bit version is split into two 16-byte lanes. + for (unsigned l = 0; l != NumElts; l += 16) + for (unsigned i = 0; i != 16; ++i) { + unsigned Idx = NumElts + i - Shift; + if (Idx < NumElts) + Idx -= NumElts - 16; // end of lane, switch operand. + Idxs.push_back(Builder.getInt32(Idx + l)); + } + + Res = Builder.CreateShuffleVector(Res, Op, ConstantVector::get(Idxs)); + } + + // Bitcast back to a 64-bit element type. + return Builder.CreateBitCast(Res, + VectorType::get(Type::getInt64Ty(C), 2*NumLanes), + "cast"); +} + +// Handles upgrading SSE2 and AVX2 PSRLDQ intrinsics by converting them +// to byte shuffles. +static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C, + Value *Op, unsigned NumLanes, + unsigned Shift) { + // Each lane is 16 bytes. + unsigned NumElts = NumLanes * 16; + + // Bitcast from a 64-bit element type to a byte element type. + Op = Builder.CreateBitCast(Op, + VectorType::get(Type::getInt8Ty(C), NumElts), + "cast"); + // We'll be shuffling in zeroes. + Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0)); + + // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, + // we'll just return the zero vector. + if (Shift < 16) { + SmallVector Idxs; + // 256-bit version is split into two 16-byte lanes. + for (unsigned l = 0; l != NumElts; l += 16) + for (unsigned i = 0; i != 16; ++i) { + unsigned Idx = i + Shift; + if (Idx >= 16) + Idx += NumElts - 16; // end of lane, switch operand. + Idxs.push_back(Builder.getInt32(Idx + l)); + } + + Res = Builder.CreateShuffleVector(Op, Res, ConstantVector::get(Idxs)); + } + + // Bitcast back to a 64-bit element type. + return Builder.CreateBitCast(Res, + VectorType::get(Type::getInt64Ty(C), 2*NumLanes), + "cast"); +} + // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the // upgraded intrinsic. All argument and return casting must be provided in // order to seamlessly integrate with existing context. @@ -491,89 +569,46 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { for (unsigned I = 0; I < EltNum; ++I) Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I)); + } else if (Name == "llvm.x86.sse2.psll.dq") { + // 128-bit shift left specified in bits. + unsigned Shift = cast(CI->getArgOperand(1))->getZExtValue(); + Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1, + Shift / 8); // Shift is in bits. + } else if (Name == "llvm.x86.sse2.psrl.dq") { + // 128-bit shift right specified in bits. + unsigned Shift = cast(CI->getArgOperand(1))->getZExtValue(); + Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1, + Shift / 8); // Shift is in bits. + } else if (Name == "llvm.x86.avx2.psll.dq") { + // 256-bit shift left specified in bits. + unsigned Shift = cast(CI->getArgOperand(1))->getZExtValue(); + Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2, + Shift / 8); // Shift is in bits. + } else if (Name == "llvm.x86.avx2.psrl.dq") { + // 256-bit shift right specified in bits. + unsigned Shift = cast(CI->getArgOperand(1))->getZExtValue(); + Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2, + Shift / 8); // Shift is in bits. } else if (Name == "llvm.x86.sse2.psll.dq.bs") { - Value *Op0 = ConstantVector::getSplat(16, Builder.getInt8(0)); - Value *Op1 = Builder.CreateBitCast(CI->getArgOperand(0), - VectorType::get(Type::getInt8Ty(C),16), - "cast"); - + // 128-bit shift left specified in bytes. unsigned Shift = cast(CI->getArgOperand(1))->getZExtValue(); - - if (Shift < 16) { - SmallVector Idxs; - for (unsigned i = 16; i != 32; ++i) - Idxs.push_back(Builder.getInt32(i - Shift)); - - Op0 = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs)); - } - - Rep = Builder.CreateBitCast(Op0, - VectorType::get(Type::getInt64Ty(C), 2), - "cast"); + Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1, + Shift); } else if (Name == "llvm.x86.sse2.psrl.dq.bs") { - Value *Op0 = Builder.CreateBitCast(CI->getArgOperand(0), - VectorType::get(Type::getInt8Ty(C),16), - "cast"); - Value *Op1 = ConstantVector::getSplat(16, Builder.getInt8(0)); - + // 128-bit shift right specified in bytes. unsigned Shift = cast(CI->getArgOperand(1))->getZExtValue(); - - if (Shift < 16) { - SmallVector Idxs; - for (unsigned i = 0; i != 16; ++i) - Idxs.push_back(Builder.getInt32(i + Shift)); - - Op1 = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs)); - } - Rep = Builder.CreateBitCast(Op1, - VectorType::get(Type::getInt64Ty(C), 2), - "cast"); + Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1, + Shift); } else if (Name == "llvm.x86.avx2.psll.dq.bs") { - Value *Op0 = ConstantVector::getSplat(32, Builder.getInt8(0)); - Value *Op1 = Builder.CreateBitCast(CI->getArgOperand(0), - VectorType::get(Type::getInt8Ty(C),32), - "cast"); - + // 256-bit shift left specified in bytes. unsigned Shift = cast(CI->getArgOperand(1))->getZExtValue(); - - if (Shift < 16) { - SmallVector Idxs; - for (unsigned l = 0; l != 32; l += 16) - for (unsigned i = 0; i != 16; ++i) { - unsigned Idx = 32 + i - Shift; - if (Idx < 32) Idx -= 16; // end of lane, switch operand. - Idxs.push_back(Builder.getInt32(Idx + l)); - } - - Op1 = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs)); - } - - Rep = Builder.CreateBitCast(Op1, - VectorType::get(Type::getInt64Ty(C), 4), - "cast"); + Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2, + Shift); } else if (Name == "llvm.x86.avx2.psrl.dq.bs") { - Value *Op0 = Builder.CreateBitCast(CI->getArgOperand(0), - VectorType::get(Type::getInt8Ty(C),32), - "cast"); - Value *Op1 = ConstantVector::getSplat(32, Builder.getInt8(0)); - + // 256-bit shift right specified in bytes. unsigned Shift = cast(CI->getArgOperand(1))->getZExtValue(); - - if (Shift < 16) { - SmallVector Idxs; - for (unsigned l = 0; l != 32; l += 16) - for (unsigned i = 0; i != 16; ++i) { - unsigned Idx = i + Shift; - if (Idx >= 16) Idx += 16; // end of lane, switch operand. - Idxs.push_back(Builder.getInt32(Idx + l)); - } - - Op0 = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs)); - } - - Rep = Builder.CreateBitCast(Op0, - VectorType::get(Type::getInt64Ty(C), 4), - "cast"); + Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2, + Shift); } else { bool PD128 = false, PD256 = false, PS128 = false, PS256 = false; if (Name == "llvm.x86.avx.vpermil.pd.256") diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 9d2c83001d5..079487d19b4 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -4283,26 +4283,11 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in { } // Constraints = "$src1 = $dst" let Predicates = [HasAVX] in { - def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), - (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>; - def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), - (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; } -let Predicates = [HasAVX2] in { - def : Pat<(int_x86_avx2_psll_dq VR256:$src1, imm:$src2), - (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>; - def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2), - (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>; -} - let Predicates = [UseSSE2] in { - def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), - (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>; - def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), - (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; } diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 54049aee78a..e595662e99b 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -2270,7 +2270,6 @@ struct MemorySanitizerVisitor : public InstVisitor { case llvm::Intrinsic::x86_avx2_pslli_w: case llvm::Intrinsic::x86_avx2_pslli_d: case llvm::Intrinsic::x86_avx2_pslli_q: - case llvm::Intrinsic::x86_avx2_psll_dq: case llvm::Intrinsic::x86_avx2_psrl_w: case llvm::Intrinsic::x86_avx2_psrl_d: case llvm::Intrinsic::x86_avx2_psrl_q: @@ -2281,14 +2280,12 @@ struct MemorySanitizerVisitor : public InstVisitor { case llvm::Intrinsic::x86_avx2_psrli_q: case llvm::Intrinsic::x86_avx2_psrai_w: case llvm::Intrinsic::x86_avx2_psrai_d: - case llvm::Intrinsic::x86_avx2_psrl_dq: case llvm::Intrinsic::x86_sse2_psll_w: case llvm::Intrinsic::x86_sse2_psll_d: case llvm::Intrinsic::x86_sse2_psll_q: case llvm::Intrinsic::x86_sse2_pslli_w: case llvm::Intrinsic::x86_sse2_pslli_d: case llvm::Intrinsic::x86_sse2_pslli_q: - case llvm::Intrinsic::x86_sse2_psll_dq: case llvm::Intrinsic::x86_sse2_psrl_w: case llvm::Intrinsic::x86_sse2_psrl_d: case llvm::Intrinsic::x86_sse2_psrl_q: @@ -2299,7 +2296,6 @@ struct MemorySanitizerVisitor : public InstVisitor { case llvm::Intrinsic::x86_sse2_psrli_q: case llvm::Intrinsic::x86_sse2_psrai_w: case llvm::Intrinsic::x86_sse2_psrai_d: - case llvm::Intrinsic::x86_sse2_psrl_dq: case llvm::Intrinsic::x86_mmx_psll_w: case llvm::Intrinsic::x86_mmx_psll_d: case llvm::Intrinsic::x86_mmx_psll_q: @@ -2334,10 +2330,6 @@ struct MemorySanitizerVisitor : public InstVisitor { // Byte shifts are not implemented. // case llvm::Intrinsic::x86_avx512_psll_dq_bs: // case llvm::Intrinsic::x86_avx512_psrl_dq_bs: - // case llvm::Intrinsic::x86_avx2_psll_dq_bs: - // case llvm::Intrinsic::x86_avx2_psrl_dq_bs: - // case llvm::Intrinsic::x86_sse2_psll_dq_bs: - // case llvm::Intrinsic::x86_sse2_psrl_dq_bs: case llvm::Intrinsic::x86_sse2_packsswb_128: case llvm::Intrinsic::x86_sse2_packssdw_128: diff --git a/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll b/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll index d2b44cd64ef..c65b0214a77 100644 --- a/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll +++ b/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll @@ -24,3 +24,17 @@ define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) { declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone +define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) { + ; CHECK: vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] + %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) { + ; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero + %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll index 2696f3b9652..3ecf709c87e 100644 --- a/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -457,14 +457,6 @@ define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) { declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone -define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) { - ; CHECK: vpslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] - %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %res -} -declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone - - define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) { ; CHECK: vpsllq %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] @@ -545,14 +537,6 @@ define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) { declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone -define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) { - ; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] - %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %res -} -declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone - - define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) { ; CHECK: vpsrlq %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] diff --git a/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll b/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll index fa1e9ee66eb..acc30983ea2 100644 --- a/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll +++ b/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll @@ -46,3 +46,19 @@ define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) { ret <4 x i64> %res } declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone + + +define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) { + ; CHECK: vpslldq {{.*#+}} ymm0 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] + %res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 8) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone + + +define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) { + ; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero + %res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 8) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll index 2dec98991cb..da0f17a88de 100644 --- a/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -160,14 +160,6 @@ define <8 x i32> @test_x86_avx2_psll_d(<8 x i32> %a0, <4 x i32> %a1) { declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone -define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) { - ; CHECK: vpslldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] - %res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] - ret <4 x i64> %res -} -declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone - - define <4 x i64> @test_x86_avx2_psll_q(<4 x i64> %a0, <2 x i64> %a1) { ; CHECK: vpsllq %res = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1] @@ -248,14 +240,6 @@ define <8 x i32> @test_x86_avx2_psrl_d(<8 x i32> %a0, <4 x i32> %a1) { declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone -define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) { - ; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] - %res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] - ret <4 x i64> %res -} -declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone - - define <4 x i64> @test_x86_avx2_psrl_q(<4 x i64> %a0, <2 x i64> %a1) { ; CHECK: vpsrlq %res = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1] diff --git a/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll b/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll new file mode 100644 index 00000000000..b0412b96bdb --- /dev/null +++ b/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=pentium4 -mattr=sse2 | FileCheck %s + +define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) { + ; CHECK: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8] + %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) { + ; CHECK: psrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero + %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone + +define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) { + ; CHECK: pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] + %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) { + ; CHECK: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero + %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone diff --git a/test/CodeGen/X86/sse2-intrinsics-x86.ll b/test/CodeGen/X86/sse2-intrinsics-x86.ll index 99357e73766..cab62a38661 100644 --- a/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ b/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -410,14 +410,6 @@ define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) { declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone -define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) { - ; CHECK: pslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] - %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %res -} -declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone - - define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) { ; CHECK: psllq %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] @@ -498,14 +490,6 @@ define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) { declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone -define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) { - ; CHECK: psrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] - %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %res -} -declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone - - define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) { ; CHECK: psrlq %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]