mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-03 09:21:13 +00:00
High bits of movmskp{s|d} and pmovmskb are known zero. rdar://10247336
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@141371 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
6d2f9cec71
commit
7c1780c5fe
@ -12333,6 +12333,33 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
|
||||
KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(),
|
||||
Mask.getBitWidth() - 1);
|
||||
break;
|
||||
case ISD::INTRINSIC_WO_CHAIN: {
|
||||
unsigned IntId = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
|
||||
unsigned NumLoBits = 0;
|
||||
switch (IntId) {
|
||||
default: break;
|
||||
case Intrinsic::x86_sse_movmsk_ps:
|
||||
case Intrinsic::x86_avx_movmsk_ps_256:
|
||||
case Intrinsic::x86_sse2_movmsk_pd:
|
||||
case Intrinsic::x86_avx_movmsk_pd_256:
|
||||
case Intrinsic::x86_mmx_pmovmskb:
|
||||
case Intrinsic::x86_sse2_pmovmskb_128: {
|
||||
// High bits of movmskp{s|d}, pmovmskb are known zero.
|
||||
switch (IntId) {
|
||||
case Intrinsic::x86_sse_movmsk_ps: NumLoBits = 4; break;
|
||||
case Intrinsic::x86_avx_movmsk_ps_256: NumLoBits = 8; break;
|
||||
case Intrinsic::x86_sse2_movmsk_pd: NumLoBits = 2; break;
|
||||
case Intrinsic::x86_avx_movmsk_pd_256: NumLoBits = 4; break;
|
||||
case Intrinsic::x86_mmx_pmovmskb: NumLoBits = 8; break;
|
||||
case Intrinsic::x86_sse2_pmovmskb_128: NumLoBits = 16; break;
|
||||
}
|
||||
KnownZero = APInt::getHighBitsSet(Mask.getBitWidth(),
|
||||
Mask.getBitWidth() - NumLoBits);
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -77,3 +77,34 @@ entry:
|
||||
%shr.i = lshr i32 %2, 31
|
||||
ret i32 %shr.i
|
||||
}
|
||||
|
||||
; rdar://10247336
|
||||
; movmskp{s|d} only set low 4/2 bits, high bits are known zero
|
||||
|
||||
define i32 @t1(<4 x float> %x, i32* nocapture %indexTable) nounwind uwtable readonly ssp {
|
||||
entry:
|
||||
; CHECK: t1:
|
||||
; CHECK: movmskps
|
||||
; CHECK-NOT: movslq
|
||||
%0 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %x) nounwind
|
||||
%idxprom = sext i32 %0 to i64
|
||||
%arrayidx = getelementptr inbounds i32* %indexTable, i64 %idxprom
|
||||
%1 = load i32* %arrayidx, align 4
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i32 @t2(<4 x float> %x, i32* nocapture %indexTable) nounwind uwtable readonly ssp {
|
||||
entry:
|
||||
; CHECK: t2:
|
||||
; CHECK: movmskpd
|
||||
; CHECK-NOT: movslq
|
||||
%0 = bitcast <4 x float> %x to <2 x double>
|
||||
%1 = tail call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %0) nounwind
|
||||
%idxprom = sext i32 %1 to i64
|
||||
%arrayidx = getelementptr inbounds i32* %indexTable, i64 %idxprom
|
||||
%2 = load i32* %arrayidx, align 4
|
||||
ret i32 %2
|
||||
}
|
||||
|
||||
declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
|
||||
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
|
Loading…
Reference in New Issue
Block a user