mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-26 04:34:41 +00:00
Reapply with r182909 with a fix to the calculation of the new indices for
insertelement instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@182976 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
6f6ec56b12
commit
903f26d904
@ -234,6 +234,7 @@ private:
|
||||
bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS);
|
||||
Value *EmitGEPOffset(User *GEP);
|
||||
Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
|
||||
Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask);
|
||||
|
||||
public:
|
||||
// InsertNewInstBefore - insert an instruction New before instruction Old
|
||||
|
@ -494,6 +494,250 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Return true if we can evaluate the specified expression tree if the vector
|
||||
/// elements were shuffled in a different order.
|
||||
static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask,
|
||||
unsigned Depth = 100) {
|
||||
// We can always reorder the elements of a constant.
|
||||
if (isa<Constant>(V))
|
||||
return true;
|
||||
|
||||
// We won't reorder vector arguments. No IPO here.
|
||||
Instruction *I = dyn_cast<Instruction>(V);
|
||||
if (!I) return false;
|
||||
|
||||
// Two users may expect different orders of the elements. Don't try it.
|
||||
if (!I->hasOneUse())
|
||||
return false;
|
||||
|
||||
if (Depth == 0) return false;
|
||||
|
||||
switch (I->getOpcode()) {
|
||||
case Instruction::Add:
|
||||
case Instruction::FAdd:
|
||||
case Instruction::Sub:
|
||||
case Instruction::FSub:
|
||||
case Instruction::Mul:
|
||||
case Instruction::FMul:
|
||||
case Instruction::UDiv:
|
||||
case Instruction::SDiv:
|
||||
case Instruction::FDiv:
|
||||
case Instruction::URem:
|
||||
case Instruction::SRem:
|
||||
case Instruction::FRem:
|
||||
case Instruction::Shl:
|
||||
case Instruction::LShr:
|
||||
case Instruction::AShr:
|
||||
case Instruction::And:
|
||||
case Instruction::Or:
|
||||
case Instruction::Xor:
|
||||
case Instruction::ICmp:
|
||||
case Instruction::FCmp:
|
||||
case Instruction::Trunc:
|
||||
case Instruction::ZExt:
|
||||
case Instruction::SExt:
|
||||
case Instruction::FPToUI:
|
||||
case Instruction::FPToSI:
|
||||
case Instruction::UIToFP:
|
||||
case Instruction::SIToFP:
|
||||
case Instruction::FPTrunc:
|
||||
case Instruction::FPExt:
|
||||
case Instruction::GetElementPtr: {
|
||||
for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
|
||||
if (!CanEvaluateShuffled(I->getOperand(i), Mask, Depth-1))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
case Instruction::InsertElement: {
|
||||
ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
|
||||
if (!CI) return false;
|
||||
int ElementNumber = CI->getLimitedValue();
|
||||
|
||||
// Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
|
||||
// can't put an element into multiple indices.
|
||||
bool SeenOnce = false;
|
||||
for (int i = 0, e = Mask.size(); i != e; ++i) {
|
||||
if (Mask[i] == ElementNumber) {
|
||||
if (SeenOnce)
|
||||
return false;
|
||||
SeenOnce = true;
|
||||
}
|
||||
}
|
||||
return CanEvaluateShuffled(I->getOperand(0), Mask, Depth-1);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Rebuild a new instruction just like 'I' but with the new operands given.
|
||||
/// In the event of type mismatch, the type of the operands is correct.
|
||||
static Value *BuildNew(Instruction *I, ArrayRef<Value*> NewOps) {
|
||||
// We don't want to use the IRBuilder here because we want the replacement
|
||||
// instructions to appear next to 'I', not the builder's insertion point.
|
||||
switch (I->getOpcode()) {
|
||||
case Instruction::Add:
|
||||
case Instruction::FAdd:
|
||||
case Instruction::Sub:
|
||||
case Instruction::FSub:
|
||||
case Instruction::Mul:
|
||||
case Instruction::FMul:
|
||||
case Instruction::UDiv:
|
||||
case Instruction::SDiv:
|
||||
case Instruction::FDiv:
|
||||
case Instruction::URem:
|
||||
case Instruction::SRem:
|
||||
case Instruction::FRem:
|
||||
case Instruction::Shl:
|
||||
case Instruction::LShr:
|
||||
case Instruction::AShr:
|
||||
case Instruction::And:
|
||||
case Instruction::Or:
|
||||
case Instruction::Xor: {
|
||||
BinaryOperator *BO = cast<BinaryOperator>(I);
|
||||
assert(NewOps.size() == 2 && "binary operator with #ops != 2");
|
||||
BinaryOperator *New =
|
||||
BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
|
||||
NewOps[0], NewOps[1], "", BO);
|
||||
if (isa<OverflowingBinaryOperator>(BO)) {
|
||||
New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
|
||||
New->setHasNoSignedWrap(BO->hasNoSignedWrap());
|
||||
}
|
||||
if (isa<PossiblyExactOperator>(BO)) {
|
||||
New->setIsExact(BO->isExact());
|
||||
}
|
||||
return New;
|
||||
}
|
||||
case Instruction::ICmp:
|
||||
assert(NewOps.size() == 2 && "icmp with #ops != 2");
|
||||
return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
|
||||
NewOps[0], NewOps[1]);
|
||||
case Instruction::FCmp:
|
||||
assert(NewOps.size() == 2 && "fcmp with #ops != 2");
|
||||
return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
|
||||
NewOps[0], NewOps[1]);
|
||||
case Instruction::Trunc:
|
||||
case Instruction::ZExt:
|
||||
case Instruction::SExt:
|
||||
case Instruction::FPToUI:
|
||||
case Instruction::FPToSI:
|
||||
case Instruction::UIToFP:
|
||||
case Instruction::SIToFP:
|
||||
case Instruction::FPTrunc:
|
||||
case Instruction::FPExt: {
|
||||
// It's possible that the mask has a different number of elements from
|
||||
// the original cast. We recompute the destination type to match the mask.
|
||||
Type *DestTy =
|
||||
VectorType::get(I->getType()->getScalarType(),
|
||||
NewOps[0]->getType()->getVectorNumElements());
|
||||
assert(NewOps.size() == 1 && "cast with #ops != 1");
|
||||
return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
|
||||
"", I);
|
||||
}
|
||||
case Instruction::GetElementPtr: {
|
||||
Value *Ptr = NewOps[0];
|
||||
ArrayRef<Value*> Idx = NewOps.slice(1);
|
||||
GetElementPtrInst *GEP = GetElementPtrInst::Create(Ptr, Idx, "", I);
|
||||
GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
|
||||
return GEP;
|
||||
}
|
||||
}
|
||||
llvm_unreachable("failed to rebuild vector instructions");
|
||||
}
|
||||
|
||||
Value *
|
||||
InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
|
||||
// Mask.size() does not need to be equal to the number of vector elements.
|
||||
|
||||
assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
|
||||
if (isa<UndefValue>(V)) {
|
||||
return UndefValue::get(VectorType::get(V->getType()->getScalarType(),
|
||||
Mask.size()));
|
||||
}
|
||||
if (isa<ConstantAggregateZero>(V)) {
|
||||
return ConstantAggregateZero::get(
|
||||
VectorType::get(V->getType()->getScalarType(),
|
||||
Mask.size()));
|
||||
}
|
||||
if (Constant *C = dyn_cast<Constant>(V)) {
|
||||
SmallVector<Constant *, 16> MaskValues;
|
||||
for (int i = 0, e = Mask.size(); i != e; ++i) {
|
||||
if (Mask[i] == -1)
|
||||
MaskValues.push_back(UndefValue::get(Builder->getInt32Ty()));
|
||||
else
|
||||
MaskValues.push_back(Builder->getInt32(Mask[i]));
|
||||
}
|
||||
return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()),
|
||||
ConstantVector::get(MaskValues));
|
||||
}
|
||||
|
||||
Instruction *I = cast<Instruction>(V);
|
||||
switch (I->getOpcode()) {
|
||||
case Instruction::Add:
|
||||
case Instruction::FAdd:
|
||||
case Instruction::Sub:
|
||||
case Instruction::FSub:
|
||||
case Instruction::Mul:
|
||||
case Instruction::FMul:
|
||||
case Instruction::UDiv:
|
||||
case Instruction::SDiv:
|
||||
case Instruction::FDiv:
|
||||
case Instruction::URem:
|
||||
case Instruction::SRem:
|
||||
case Instruction::FRem:
|
||||
case Instruction::Shl:
|
||||
case Instruction::LShr:
|
||||
case Instruction::AShr:
|
||||
case Instruction::And:
|
||||
case Instruction::Or:
|
||||
case Instruction::Xor:
|
||||
case Instruction::ICmp:
|
||||
case Instruction::FCmp:
|
||||
case Instruction::Trunc:
|
||||
case Instruction::ZExt:
|
||||
case Instruction::SExt:
|
||||
case Instruction::FPToUI:
|
||||
case Instruction::FPToSI:
|
||||
case Instruction::UIToFP:
|
||||
case Instruction::SIToFP:
|
||||
case Instruction::FPTrunc:
|
||||
case Instruction::FPExt:
|
||||
case Instruction::Select:
|
||||
case Instruction::GetElementPtr: {
|
||||
SmallVector<Value*, 8> NewOps;
|
||||
bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements());
|
||||
for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
|
||||
Value *V = EvaluateInDifferentElementOrder(I->getOperand(i), Mask);
|
||||
NewOps.push_back(V);
|
||||
NeedsRebuild |= (V != I->getOperand(i));
|
||||
}
|
||||
if (NeedsRebuild) {
|
||||
return BuildNew(I, NewOps);
|
||||
}
|
||||
return I;
|
||||
}
|
||||
case Instruction::InsertElement: {
|
||||
int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
|
||||
if (Element < 0 || Element >= (int)Mask.size()) {
|
||||
// Such instructions are valid and exhibit undefined behaviour.
|
||||
return UndefValue::get(I->getType());
|
||||
}
|
||||
|
||||
// The insertelement was inserting at Element. Figure out which element
|
||||
// that becomes after shuffling. The answer is guaranteed to be unique
|
||||
// by CanEvaluateShuffled.
|
||||
int Index = 0;
|
||||
for (int e = Mask.size(); Index != e; ++Index)
|
||||
if (Mask[Index] == Element)
|
||||
break;
|
||||
|
||||
Value *V = EvaluateInDifferentElementOrder(I->getOperand(0), Mask);
|
||||
return InsertElementInst::Create(V, I->getOperand(1),
|
||||
Builder->getInt32(Index), "", I);
|
||||
}
|
||||
}
|
||||
llvm_unreachable("failed to reorder elements of vector instruction!");
|
||||
}
|
||||
|
||||
Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
|
||||
Value *LHS = SVI.getOperand(0);
|
||||
@ -525,9 +769,9 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
|
||||
if (LHS == RHS || isa<UndefValue>(LHS)) {
|
||||
if (isa<UndefValue>(LHS) && LHS == RHS) {
|
||||
// shuffle(undef,undef,mask) -> undef.
|
||||
Value* result = (VWidth == LHSWidth)
|
||||
Value *Result = (VWidth == LHSWidth)
|
||||
? LHS : UndefValue::get(SVI.getType());
|
||||
return ReplaceInstUsesWith(SVI, result);
|
||||
return ReplaceInstUsesWith(SVI, Result);
|
||||
}
|
||||
|
||||
// Remap any references to RHS to use LHS.
|
||||
@ -574,6 +818,16 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
|
||||
if (isRHSID) return ReplaceInstUsesWith(SVI, RHS);
|
||||
}
|
||||
|
||||
if (isa<UndefValue>(RHS) &&
|
||||
// This isn't necessary for correctness, but the comment block below
|
||||
// claims that there are cases where folding two shuffles into one would
|
||||
// cause worse codegen on some targets.
|
||||
!isa<ShuffleVectorInst>(LHS) &&
|
||||
CanEvaluateShuffled(LHS, Mask)) {
|
||||
Value *V = EvaluateInDifferentElementOrder(LHS, Mask);
|
||||
return ReplaceInstUsesWith(SVI, V);
|
||||
}
|
||||
|
||||
// If the LHS is a shufflevector itself, see if we can combine it with this
|
||||
// one without producing an unusual shuffle.
|
||||
// Cases that might be simplified:
|
||||
|
@ -153,3 +153,24 @@ define <8 x i8> @test12a(<8 x i8> %tmp6, <8 x i8> %tmp2) nounwind {
|
||||
ret <8 x i8> %tmp3
|
||||
}
|
||||
|
||||
define <2 x i8> @test13a(i8 %x1, i8 %x2) {
|
||||
; CHECK: @test13a
|
||||
; CHECK-NEXT: insertelement {{.*}} undef, i8 %x1, i32 1
|
||||
; CHECK-NEXT: insertelement {{.*}} i8 %x2, i32 0
|
||||
; CHECK-NEXT: add {{.*}} <i8 7, i8 5>
|
||||
; CHECK-NEXT: ret
|
||||
%A = insertelement <2 x i8> undef, i8 %x1, i32 0
|
||||
%B = insertelement <2 x i8> %A, i8 %x2, i32 1
|
||||
%C = add <2 x i8> %B, <i8 5, i8 7>
|
||||
%D = shufflevector <2 x i8> %C, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
|
||||
ret <2 x i8> %D
|
||||
}
|
||||
|
||||
define <2 x i8> @test13b(i8 %x) {
|
||||
; CHECK: @test13b
|
||||
; CHECK-NEXT: insertelement <2 x i8> undef, i8 %x, i32 1
|
||||
; CHECK-NEXT: ret
|
||||
%A = insertelement <2 x i8> undef, i8 %x, i32 0
|
||||
%B = shufflevector <2 x i8> %A, <2 x i8> undef, <2 x i32> <i32 undef, i32 0>
|
||||
ret <2 x i8> %B
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user