diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index fc2d5ce64e8..a351fa5ca86 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -12333,6 +12333,33 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(), Mask.getBitWidth() - 1); break; + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntId = cast(Op.getOperand(0))->getZExtValue(); + unsigned NumLoBits = 0; + switch (IntId) { + default: break; + case Intrinsic::x86_sse_movmsk_ps: + case Intrinsic::x86_avx_movmsk_ps_256: + case Intrinsic::x86_sse2_movmsk_pd: + case Intrinsic::x86_avx_movmsk_pd_256: + case Intrinsic::x86_mmx_pmovmskb: + case Intrinsic::x86_sse2_pmovmskb_128: { + // High bits of movmskp{s|d}, pmovmskb are known zero. + switch (IntId) { + case Intrinsic::x86_sse_movmsk_ps: NumLoBits = 4; break; + case Intrinsic::x86_avx_movmsk_ps_256: NumLoBits = 8; break; + case Intrinsic::x86_sse2_movmsk_pd: NumLoBits = 2; break; + case Intrinsic::x86_avx_movmsk_pd_256: NumLoBits = 4; break; + case Intrinsic::x86_mmx_pmovmskb: NumLoBits = 8; break; + case Intrinsic::x86_sse2_pmovmskb_128: NumLoBits = 16; break; + } + KnownZero = APInt::getHighBitsSet(Mask.getBitWidth(), + Mask.getBitWidth() - NumLoBits); + break; + } + } + break; + } } } diff --git a/test/CodeGen/X86/2011-05-31-movmsk.ll b/test/CodeGen/X86/movmsk.ll similarity index 71% rename from test/CodeGen/X86/2011-05-31-movmsk.ll rename to test/CodeGen/X86/movmsk.ll index 2b54d5cc853..2368548bfa8 100644 --- a/test/CodeGen/X86/2011-05-31-movmsk.ll +++ b/test/CodeGen/X86/movmsk.ll @@ -77,3 +77,34 @@ entry: %shr.i = lshr i32 %2, 31 ret i32 %shr.i } + +; rdar://10247336 +; movmskp{s|d} only set low 4/2 bits, high bits are known zero + +define i32 @t1(<4 x float> %x, i32* nocapture %indexTable) nounwind uwtable readonly ssp { +entry: +; CHECK: t1: +; CHECK: movmskps +; CHECK-NOT: movslq + %0 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %x) nounwind + %idxprom = sext i32 %0 to i64 + %arrayidx = getelementptr inbounds i32* %indexTable, i64 %idxprom + %1 = load i32* %arrayidx, align 4 + ret i32 %1 +} + +define i32 @t2(<4 x float> %x, i32* nocapture %indexTable) nounwind uwtable readonly ssp { +entry: +; CHECK: t2: +; CHECK: movmskpd +; CHECK-NOT: movslq + %0 = bitcast <4 x float> %x to <2 x double> + %1 = tail call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %0) nounwind + %idxprom = sext i32 %1 to i64 + %arrayidx = getelementptr inbounds i32* %indexTable, i64 %idxprom + %2 = load i32* %arrayidx, align 4 + ret i32 %2 +} + +declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone +declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone