mirror of
https://github.com/RPCS3/llvm.git
synced 2024-11-25 21:00:00 +00:00
Rename ValueRequiresCast to ShouldOptimizeCast, to better reflect
what it does. Enhance it to return false to optimizing vector sign extensions from vector comparisions, which is the idiom used to get a splatted vector for a vector comparison. Doing this breaks vector-casts.ll, add some compensating transformations to handle the important case they cover without depending on this canonicalization. This fixes rdar://7434900 a serious pessimization of vector compares. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@95855 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
2e1cdbf92d
commit
8c5ad3a5da
@ -199,11 +199,12 @@ private:
|
||||
SmallVectorImpl<Value*> &NewIndices);
|
||||
Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
|
||||
|
||||
/// ValueRequiresCast - Return true if the cast from "V to Ty" actually
|
||||
/// results in any code being generated. It does not require codegen if V is
|
||||
/// simple enough or if the cast can be folded into other casts.
|
||||
bool ValueRequiresCast(Instruction::CastOps opcode,const Value *V,
|
||||
const Type *Ty);
|
||||
/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
|
||||
/// results in any code being generated and is interesting to optimize out. If
|
||||
/// the cast can be eliminated by some other simple transformation, we prefer
|
||||
/// to do the simplification first.
|
||||
bool ShouldOptimizeCast(Instruction::CastOps opcode,const Value *V,
|
||||
const Type *Ty);
|
||||
|
||||
Instruction *visitCallSite(CallSite CS);
|
||||
bool transformConstExprCastCall(CallSite CS);
|
||||
|
@ -932,24 +932,49 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
|
||||
if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0))
|
||||
if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS))
|
||||
return Res;
|
||||
|
||||
|
||||
// If and'ing two fcmp, try combine them into one.
|
||||
if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
|
||||
if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
|
||||
if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS))
|
||||
return Res;
|
||||
|
||||
|
||||
// fold (and (cast A), (cast B)) -> (cast (and A, B))
|
||||
if (CastInst *Op0C = dyn_cast<CastInst>(Op0))
|
||||
if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
|
||||
if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ?
|
||||
const Type *SrcTy = Op0C->getOperand(0)->getType();
|
||||
if (SrcTy == Op1C->getOperand(0)->getType() &&
|
||||
SrcTy->isIntOrIntVector() &&
|
||||
// Only do this if the casts both really cause code to be generated.
|
||||
ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
|
||||
I.getType()) &&
|
||||
ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
|
||||
I.getType())) {
|
||||
Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0),
|
||||
Op1C->getOperand(0), I.getName());
|
||||
if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) {
|
||||
const Type *SrcTy = Op0C->getOperand(0)->getType();
|
||||
if (Op0C->getOpcode() == Op1C->getOpcode() && // same cast kind ?
|
||||
SrcTy == Op1C->getOperand(0)->getType() &&
|
||||
SrcTy->isIntOrIntVector()) {
|
||||
Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
|
||||
|
||||
// Only do this if the casts both really cause code to be generated.
|
||||
if (ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
|
||||
ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
|
||||
Value *NewOp = Builder->CreateAnd(Op0COp, Op1COp, I.getName());
|
||||
return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
|
||||
}
|
||||
|
||||
// If this is and(cast(icmp), cast(icmp)), try to fold this even if the
|
||||
// cast is otherwise not optimizable. This happens for vector sexts.
|
||||
if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
|
||||
if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
|
||||
if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS)) {
|
||||
InsertNewInstBefore(Res, I);
|
||||
return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
|
||||
}
|
||||
|
||||
// If this is and(cast(fcmp), cast(fcmp)), try to fold this even if the
|
||||
// cast is otherwise not optimizable. This happens for vector sexts.
|
||||
if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
|
||||
if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
|
||||
if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) {
|
||||
InsertNewInstBefore(Res, I);
|
||||
return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts.
|
||||
if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
|
||||
@ -965,13 +990,6 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
|
||||
}
|
||||
}
|
||||
|
||||
// If and'ing two fcmp, try combine them into one.
|
||||
if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
|
||||
if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
|
||||
if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS))
|
||||
return Res;
|
||||
}
|
||||
|
||||
return Changed ? &I : 0;
|
||||
}
|
||||
|
||||
@ -1669,37 +1687,51 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
|
||||
if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS))
|
||||
return Res;
|
||||
|
||||
// (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y)
|
||||
if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
|
||||
if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
|
||||
if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS))
|
||||
return Res;
|
||||
|
||||
// fold (or (cast A), (cast B)) -> (cast (or A, B))
|
||||
if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
|
||||
if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
|
||||
if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
|
||||
if (!isa<ICmpInst>(Op0C->getOperand(0)) ||
|
||||
!isa<ICmpInst>(Op1C->getOperand(0))) {
|
||||
const Type *SrcTy = Op0C->getOperand(0)->getType();
|
||||
if (SrcTy == Op1C->getOperand(0)->getType() &&
|
||||
SrcTy->isIntOrIntVector() &&
|
||||
const Type *SrcTy = Op0C->getOperand(0)->getType();
|
||||
if (SrcTy == Op1C->getOperand(0)->getType() &&
|
||||
SrcTy->isIntOrIntVector()) {
|
||||
Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
|
||||
|
||||
if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) &&
|
||||
// Only do this if the casts both really cause code to be
|
||||
// generated.
|
||||
ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
|
||||
I.getType()) &&
|
||||
ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
|
||||
I.getType())) {
|
||||
Value *NewOp = Builder->CreateOr(Op0C->getOperand(0),
|
||||
Op1C->getOperand(0), I.getName());
|
||||
ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
|
||||
ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
|
||||
Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName());
|
||||
return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
|
||||
}
|
||||
|
||||
// If this is or(cast(icmp), cast(icmp)), try to fold this even if the
|
||||
// cast is otherwise not optimizable. This happens for vector sexts.
|
||||
if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
|
||||
if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
|
||||
if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS)) {
|
||||
InsertNewInstBefore(Res, I);
|
||||
return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
|
||||
}
|
||||
|
||||
// If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the
|
||||
// cast is otherwise not optimizable. This happens for vector sexts.
|
||||
if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
|
||||
if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
|
||||
if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) {
|
||||
InsertNewInstBefore(Res, I);
|
||||
return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y)
|
||||
if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
|
||||
if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
|
||||
if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS))
|
||||
return Res;
|
||||
}
|
||||
|
||||
return Changed ? &I : 0;
|
||||
}
|
||||
|
||||
@ -1986,10 +2018,10 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
|
||||
const Type *SrcTy = Op0C->getOperand(0)->getType();
|
||||
if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() &&
|
||||
// Only do this if the casts both really cause code to be generated.
|
||||
ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
|
||||
I.getType()) &&
|
||||
ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
|
||||
I.getType())) {
|
||||
ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0),
|
||||
I.getType()) &&
|
||||
ShouldOptimizeCast(Op1C->getOpcode(), Op1C->getOperand(0),
|
||||
I.getType())) {
|
||||
Value *NewOp = Builder->CreateXor(Op0C->getOperand(0),
|
||||
Op1C->getOperand(0), I.getName());
|
||||
return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
|
||||
|
@ -255,17 +255,26 @@ isEliminableCastPair(
|
||||
return Instruction::CastOps(Res);
|
||||
}
|
||||
|
||||
/// ValueRequiresCast - Return true if the cast from "V to Ty" actually results
|
||||
/// in any code being generated. It does not require codegen if V is simple
|
||||
/// enough or if the cast can be folded into other casts.
|
||||
bool InstCombiner::ValueRequiresCast(Instruction::CastOps opcode,const Value *V,
|
||||
const Type *Ty) {
|
||||
/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
|
||||
/// results in any code being generated and is interesting to optimize out. If
|
||||
/// the cast can be eliminated by some other simple transformation, we prefer
|
||||
/// to do the simplification first.
|
||||
bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
|
||||
const Type *Ty) {
|
||||
// Noop casts and casts of constants should be eliminated trivially.
|
||||
if (V->getType() == Ty || isa<Constant>(V)) return false;
|
||||
|
||||
// If this is another cast that can be eliminated, it isn't codegen either.
|
||||
// If this is another cast that can be eliminated, we prefer to have it
|
||||
// eliminated.
|
||||
if (const CastInst *CI = dyn_cast<CastInst>(V))
|
||||
if (isEliminableCastPair(CI, opcode, Ty, TD))
|
||||
if (isEliminableCastPair(CI, opc, Ty, TD))
|
||||
return false;
|
||||
|
||||
// If this is a vector sext from a compare, then we don't want to break the
|
||||
// idiom where each element of the extended vector is either zero or all ones.
|
||||
if (opc == Instruction::SExt && isa<CmpInst>(V) && isa<VectorType>(Ty))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -51,6 +51,22 @@ entry:
|
||||
}
|
||||
|
||||
|
||||
; rdar://7434900
|
||||
define <2 x i64> @test5(<4 x float> %a, <4 x float> %b) nounwind readnone {
|
||||
entry:
|
||||
%cmp = fcmp ult <4 x float> %a, zeroinitializer
|
||||
%sext = sext <4 x i1> %cmp to <4 x i32>
|
||||
%cmp4 = fcmp ult <4 x float> %b, zeroinitializer
|
||||
%sext5 = sext <4 x i1> %cmp4 to <4 x i32>
|
||||
%and = and <4 x i32> %sext, %sext5
|
||||
%conv = bitcast <4 x i32> %and to <2 x i64>
|
||||
ret <2 x i64> %conv
|
||||
|
||||
; CHECK: @test5
|
||||
; CHECK: sext <4 x i1> %cmp to <4 x i32>
|
||||
; CHECK: sext <4 x i1> %cmp4 to <4 x i32>
|
||||
}
|
||||
|
||||
|
||||
define void @convert(<2 x i32>* %dst.addr, <2 x i64> %src) nounwind {
|
||||
entry:
|
||||
|
Loading…
Reference in New Issue
Block a user