mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-14 16:40:42 +00:00
[X86][InstCombine] Teach SimplifyDemandedVectorElts to handle scalar round intrinsics more correctly.
Now we only pass bit 0 of the DemandedElts to optimize operand 1 as we recurse since the upper bits are unused. Similarly we clear bit 0 for optimizing operand 0. Also calculate UndefElts correctly. Simplify InstCombineCalls for these instrinics to just call SimplifyDemandedVectorElts for the call instrution to reuse this support. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@289629 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
52ed6069ee
commit
23156f1924
@ -1437,12 +1437,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth);
|
||||
return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
|
||||
};
|
||||
auto SimplifyDemandedVectorEltsHigh = [this](Value *Op, unsigned Width,
|
||||
unsigned DemandedWidth) {
|
||||
APInt UndefElts(Width, 0);
|
||||
APInt DemandedElts = APInt::getHighBitsSet(Width, DemandedWidth);
|
||||
return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
|
||||
};
|
||||
|
||||
switch (II->getIntrinsicID()) {
|
||||
default: break;
|
||||
@ -1799,33 +1793,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
break;
|
||||
}
|
||||
|
||||
case Intrinsic::x86_sse41_round_ss:
|
||||
case Intrinsic::x86_sse41_round_sd: {
|
||||
// These intrinsics demand the upper elements of the first input vector and
|
||||
// the lowest element of the second input vector.
|
||||
bool MadeChange = false;
|
||||
Value *Arg0 = II->getArgOperand(0);
|
||||
Value *Arg1 = II->getArgOperand(1);
|
||||
unsigned VWidth = Arg0->getType()->getVectorNumElements();
|
||||
if (Value *V = SimplifyDemandedVectorEltsHigh(Arg0, VWidth, VWidth - 1)) {
|
||||
II->setArgOperand(0, V);
|
||||
MadeChange = true;
|
||||
}
|
||||
if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
|
||||
II->setArgOperand(1, V);
|
||||
MadeChange = true;
|
||||
}
|
||||
if (MadeChange)
|
||||
return II;
|
||||
break;
|
||||
}
|
||||
|
||||
case Intrinsic::x86_sse_cmp_ss:
|
||||
case Intrinsic::x86_sse_min_ss:
|
||||
case Intrinsic::x86_sse_max_ss:
|
||||
case Intrinsic::x86_sse2_cmp_sd:
|
||||
case Intrinsic::x86_sse2_min_sd:
|
||||
case Intrinsic::x86_sse2_max_sd:
|
||||
case Intrinsic::x86_sse41_round_ss:
|
||||
case Intrinsic::x86_sse41_round_sd:
|
||||
case Intrinsic::x86_xop_vfrcz_ss:
|
||||
case Intrinsic::x86_xop_vfrcz_sd: {
|
||||
unsigned VWidth = II->getType()->getVectorNumElements();
|
||||
|
@ -1321,25 +1321,33 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
|
||||
break;
|
||||
}
|
||||
|
||||
// Binary scalar-as-vector operations that work column-wise. A dest element
|
||||
// is a function of the corresponding input elements from the two inputs.
|
||||
// Binary scalar-as-vector operations that work column-wise. The high
|
||||
// elements come from operand 0 and the low element comes from operand 1.
|
||||
case Intrinsic::x86_sse41_round_ss:
|
||||
case Intrinsic::x86_sse41_round_sd:
|
||||
TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
|
||||
case Intrinsic::x86_sse41_round_sd: {
|
||||
// Don't use the low element of operand 0.
|
||||
APInt DemandedElts2 = DemandedElts;
|
||||
DemandedElts2.clearBit(0);
|
||||
TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts2,
|
||||
UndefElts, Depth + 1);
|
||||
if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
|
||||
|
||||
// If lowest element of a scalar op isn't used then use Arg0.
|
||||
if (!DemandedElts[0])
|
||||
return II->getArgOperand(0);
|
||||
|
||||
// Only lower element is used for operand 1.
|
||||
DemandedElts = 1;
|
||||
TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
|
||||
UndefElts2, Depth + 1);
|
||||
if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
|
||||
|
||||
// If lowest element of a scalar op isn't used then use Arg0.
|
||||
if (DemandedElts.getLoBits(1) != 1)
|
||||
return II->getArgOperand(0);
|
||||
|
||||
// Output elements are undefined if both are undefined. Consider things
|
||||
// like undef&0. The result is known zero, not undef.
|
||||
UndefElts &= UndefElts2;
|
||||
// Take the high undef elements from operand 0 and take the lower element
|
||||
// from operand 1.
|
||||
UndefElts.clearBit(0);
|
||||
UndefElts |= UndefElts2[0];
|
||||
break;
|
||||
}
|
||||
|
||||
case Intrinsic::x86_fma_vfmadd_ss:
|
||||
case Intrinsic::x86_fma_vfmsub_ss:
|
||||
|
Loading…
x
Reference in New Issue
Block a user