mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-22 02:05:01 +00:00
fix another miscompile in the llvm.sadd formation logic: it wasn't
checking to see if the high bits of the original add result were dead. Inserting a smaller add and zexting back to that size is not good enough. This is likely to be the fix for 8816. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122177 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d959da92a5
commit
dd7e837374
@ -1585,6 +1585,11 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
|
||||
|
||||
/// ProcessUGT_ADDCST_ADD - The caller has matched a pattern of the form:
|
||||
/// I = icmp ugt (add (add A, B), CI2), CI1
|
||||
/// If this is of the form:
|
||||
/// sum = a + b
|
||||
/// if (sum+128 >u 255)
|
||||
/// Then replace it with llvm.sadd.with.overflow.i8.
|
||||
///
|
||||
static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
|
||||
ConstantInt *CI2, ConstantInt *CI1,
|
||||
InstCombiner::BuilderTy *Builder) {
|
||||
@ -1595,9 +1600,41 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
|
||||
|
||||
// In order to eliminate the add-with-constant, the compare can be its only
|
||||
// use.
|
||||
Value *AddWithCst = I.getOperand(0);
|
||||
Instruction *AddWithCst = cast<Instruction>(I.getOperand(0));
|
||||
if (!AddWithCst->hasOneUse()) return 0;
|
||||
|
||||
|
||||
// If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow.
|
||||
if (!CI2->getValue().isPowerOf2()) return 0;
|
||||
unsigned NewWidth = CI2->getValue().countTrailingZeros();
|
||||
if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return 0;
|
||||
|
||||
// The width of the new add formed is 1 more than the bias.
|
||||
++NewWidth;
|
||||
|
||||
// Check to see that CI1 is an all-ones value with NewWidth bits.
|
||||
if (CI1->getBitWidth() == NewWidth ||
|
||||
CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
|
||||
return 0;
|
||||
|
||||
// In order to replace the original add with a narrower
|
||||
// llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
|
||||
// and truncates that discard the high bits of the add. Verify that this is
|
||||
// the case.
|
||||
Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0));
|
||||
for (Value::use_iterator UI = OrigAdd->use_begin(), E = OrigAdd->use_end();
|
||||
UI != E; ++UI) {
|
||||
if (*UI == AddWithCst) continue;
|
||||
|
||||
// Only accept truncates for now. We would really like a nice recursive
|
||||
// predicate like SimplifyDemandedBits, but which goes downwards the use-def
|
||||
// chain to see which bits of a value are actually demanded. If the
|
||||
// original add had another add which was then immediately truncated, we
|
||||
// could still do the transformation.
|
||||
TruncInst *TI = dyn_cast<TruncInst>(*UI);
|
||||
if (TI == 0 ||
|
||||
TI->getType()->getPrimitiveSizeInBits() > NewWidth) return 0;
|
||||
}
|
||||
|
||||
const IntegerType *WideType = cast<IntegerType>(CI1->getType());
|
||||
unsigned WideWidth = WideType->getBitWidth();
|
||||
unsigned NarrowWidth = WideWidth / 2;
|
||||
@ -1630,8 +1667,6 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
|
||||
// If the pattern matches, truncate the inputs to the narrower type and
|
||||
// use the sadd_with_overflow intrinsic to efficiently compute both the
|
||||
// result and the overflow bit.
|
||||
Instruction *OrigAdd =
|
||||
cast<Instruction>(cast<Instruction>(I.getOperand(0))->getOperand(0));
|
||||
Builder->SetInsertPoint(OrigAdd->getParent(),
|
||||
BasicBlock::iterator(OrigAdd));
|
||||
|
||||
|
@ -11,7 +11,7 @@ entry:
|
||||
%conv2 = sext i32 %b to i64
|
||||
%add = add nsw i64 %conv2, %conv
|
||||
%add.off = add i64 %add, 2147483648
|
||||
; CHECK: llvm.sadd.with.overflow
|
||||
; CHECK: llvm.sadd.with.overflow.i32
|
||||
%0 = icmp ugt i64 %add.off, 4294967295
|
||||
br i1 %0, label %if.then, label %if.end
|
||||
|
||||
@ -53,3 +53,24 @@ if.end:
|
||||
ret i32 %conv9
|
||||
}
|
||||
|
||||
; CHECK: test3
|
||||
; This is illegal to transform because the high bits of the original add are
|
||||
; live out.
|
||||
define i64 @test3(i32 %a, i32 %b) nounwind ssp {
|
||||
entry:
|
||||
%conv = sext i32 %a to i64
|
||||
%conv2 = sext i32 %b to i64
|
||||
%add = add nsw i64 %conv2, %conv
|
||||
%add.off = add i64 %add, 2147483648
|
||||
; CHECK-NOT: llvm.sadd.with.overflow
|
||||
%0 = icmp ugt i64 %add.off, 4294967295
|
||||
br i1 %0, label %if.then, label %if.end
|
||||
|
||||
if.then:
|
||||
%call = tail call i32 (...)* @throwAnExceptionOrWhatever() nounwind
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
ret i64 %add
|
||||
; CHECK: ret i64
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user