[BDCE] Add a bit-tracking DCE pass

BDCE is a bit-tracking dead code elimination pass. It is based on ADCE (the
"aggressive DCE" pass), with the added capability to track dead bits of integer
valued instructions and remove those instructions when all of the bits are
dead.

Currently, it does not actually do this all-bits-dead removal, but rather
replaces the instruction's uses with a constant zero, and lets instcombine (and
the later run of ADCE) do the rest. Because we essentially get a run of ADCE
"for free" while tracking the dead bits, we also do what ADCE does and removes
actually-dead instructions as well (this includes instructions newly trivially
dead because all bits were dead, but not all such instructions can be removed).

The motivation for this is a case like:

int __attribute__((const)) foo(int i);
int bar(int x) {
  x |= (4 & foo(5));
  x |= (8 & foo(3));
  x |= (16 & foo(2));
  x |= (32 & foo(1));
  x |= (64 & foo(0));
  x |= (128& foo(4));
  return x >> 4;
}

As it turns out, if you order the bit-field insertions so that all of the dead
ones come last, then instcombine will remove them. However, if you pick some
other order (such as the one above), the fact that some of the calls to foo()
are useless is not locally obvious, and we don't remove them (without this
pass).

I did a quick compile-time overhead check using sqlite from the test suite
(Release+Asserts). BDCE took ~0.4% of the compilation time (making it about
twice as expensive as ADCE).

I've not looked at why yet, but we eliminate instructions due to having
all-dead bits in:
External/SPEC/CFP2006/447.dealII/447.dealII
External/SPEC/CINT2006/400.perlbench/400.perlbench
External/SPEC/CINT2006/403.gcc/403.gcc
MultiSource/Applications/ClamAV/clamscan
MultiSource/Benchmarks/7zip/7zip-benchmark

llvm-svn: 229462
This commit is contained in:
Hal Finkel 2015-02-17 01:36:59 +00:00
parent 2754714fb9
commit 2bb61ba2fe
10 changed files with 812 additions and 0 deletions

View File

@ -35,6 +35,9 @@ extern "C" {
/** See llvm::createAggressiveDCEPass function. */
void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM);
/** See llvm::createBitTrackingDCEPass function. */
void LLVMAddBitTrackingDCEPass(LLVMPassManagerRef PM);
/** See llvm::createAlignmentFromAssumptionsPass function. */
void LLVMAddAlignmentFromAssumptionsPass(LLVMPassManagerRef PM);

View File

@ -65,6 +65,7 @@ void initializeTarget(PassRegistry&);
void initializeAAEvalPass(PassRegistry&);
void initializeAddDiscriminatorsPass(PassRegistry&);
void initializeADCEPass(PassRegistry&);
void initializeBDCEPass(PassRegistry&);
void initializeAliasAnalysisAnalysisGroup(PassRegistry&);
void initializeAliasAnalysisCounterPass(PassRegistry&);
void initializeAliasDebuggerPass(PassRegistry&);

View File

@ -49,6 +49,7 @@ namespace {
(void) llvm::createAAEvalPass();
(void) llvm::createAggressiveDCEPass();
(void) llvm::createBitTrackingDCEPass();
(void) llvm::createAliasAnalysisCounterPass();
(void) llvm::createAliasDebugger();
(void) llvm::createArgumentPromotionPass();

View File

@ -80,6 +80,13 @@ FunctionPass *createDeadStoreEliminationPass();
//
FunctionPass *createAggressiveDCEPass();
//===----------------------------------------------------------------------===//
//
// BitTrackingDCE - This pass uses a bit-tracking DCE algorithm in order to
// remove computations of dead bits.
//
FunctionPass *createBitTrackingDCEPass();
//===----------------------------------------------------------------------===//
//
// SROA - Replace aggregates or pieces of aggregates with scalar SSA values.

View File

@ -252,6 +252,11 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset
MPM.add(createSCCPPass()); // Constant prop with SCCP
// Delete dead bit computations (instcombine runs after to fold away the dead
// computations, and then ADCE will run later to exploit any new DCE
// opportunities that creates).
MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations
// Run instcombine after redundancy elimination to exploit opportunities
// opened up by them.
MPM.add(createInstructionCombiningPass());

View File

@ -0,0 +1,408 @@
//===---- BDCE.cpp - Bit-tracking dead code elimination -------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the Bit-Tracking Dead Code Elimination pass. Some
// instructions (shifts, some ands, ors, etc.) kill some of their input bits.
// We track these dead bits and remove instructions that compute only these
// dead bits.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "bdce"
STATISTIC(NumRemoved, "Number of instructions removed (unused)");
STATISTIC(NumSimplified, "Number of instructions trivialized (dead bits)");
namespace {
struct BDCE : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
BDCE() : FunctionPass(ID) {
initializeBDCEPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function& F) override;
void getAnalysisUsage(AnalysisUsage& AU) const override {
AU.setPreservesCFG();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
}
void determineLiveOperandBits(const Instruction *UserI,
const Instruction *I, unsigned OperandNo,
const APInt &AOut, APInt &AB,
APInt &KnownZero, APInt &KnownOne,
APInt &KnownZero2, APInt &KnownOne2);
AssumptionCache *AC;
const DataLayout *DL;
DominatorTree *DT;
};
}
char BDCE::ID = 0;
INITIALIZE_PASS_BEGIN(BDCE, "bdce", "Bit-Tracking Dead Code Elimination",
false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(BDCE, "bdce", "Bit-Tracking Dead Code Elimination",
false, false)
static bool isAlwaysLive(Instruction *I) {
return isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) ||
isa<LandingPadInst>(I) || I->mayHaveSideEffects();
}
void BDCE::determineLiveOperandBits(const Instruction *UserI,
const Instruction *I, unsigned OperandNo,
const APInt &AOut, APInt &AB,
APInt &KnownZero, APInt &KnownOne,
APInt &KnownZero2, APInt &KnownOne2) {
unsigned BitWidth = AB.getBitWidth();
// We're called once per operand, but for some instructions, we need to
// compute known bits of both operands in order to determine the live bits of
// either (when both operands are instructions themselves). We don't,
// however, want to do this twice, so we cache the result in APInts that live
// in the caller. For the two-relevant-operands case, both operand values are
// provided here.
auto ComputeKnownBits = [&](unsigned BitWidth, const Value *V1,
const Value *V2) {
KnownZero = APInt(BitWidth, 0);
KnownOne = APInt(BitWidth, 0);
computeKnownBits(const_cast<Value*>(V1), KnownZero, KnownOne, DL, 0, AC,
UserI, DT);
if (V2) {
KnownZero2 = APInt(BitWidth, 0);
KnownOne2 = APInt(BitWidth, 0);
computeKnownBits(const_cast<Value*>(V2), KnownZero2, KnownOne2, DL, 0, AC,
UserI, DT);
}
};
switch (UserI->getOpcode()) {
default: break;
case Instruction::Call:
case Instruction::Invoke:
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(UserI))
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::bswap:
// The alive bits of the input are the swapped alive bits of
// the output.
AB = AOut.byteSwap();
break;
case Intrinsic::ctlz:
if (OperandNo == 0) {
// We need some output bits, so we need all bits of the
// input to the left of, and including, the leftmost bit
// known to be one.
ComputeKnownBits(BitWidth, I, nullptr);
AB = APInt::getHighBitsSet(BitWidth,
std::min(BitWidth, KnownOne.countLeadingZeros()+1));
}
break;
case Intrinsic::cttz:
if (OperandNo == 0) {
// We need some output bits, so we need all bits of the
// input to the right of, and including, the rightmost bit
// known to be one.
ComputeKnownBits(BitWidth, I, nullptr);
AB = APInt::getLowBitsSet(BitWidth,
std::min(BitWidth, KnownOne.countTrailingZeros()+1));
}
break;
}
break;
case Instruction::Add:
case Instruction::Sub:
// Find the highest live output bit. We don't need any more input
// bits than that (adds, and thus subtracts, ripple only to the
// left).
AB = APInt::getLowBitsSet(BitWidth, AOut.getActiveBits());
break;
case Instruction::Shl:
if (OperandNo == 0)
if (ConstantInt *CI =
dyn_cast<ConstantInt>(UserI->getOperand(1))) {
uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
AB = AOut.lshr(ShiftAmt);
// If the shift is nuw/nsw, then the high bits are not dead
// (because we've promised that they *must* be zero).
const ShlOperator *S = cast<ShlOperator>(UserI);
if (S->hasNoSignedWrap())
AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
else if (S->hasNoUnsignedWrap())
AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
}
break;
case Instruction::LShr:
if (OperandNo == 0)
if (ConstantInt *CI =
dyn_cast<ConstantInt>(UserI->getOperand(1))) {
uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
AB = AOut.shl(ShiftAmt);
// If the shift is exact, then the low bits are not dead
// (they must be zero).
if (cast<LShrOperator>(UserI)->isExact())
AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
}
break;
case Instruction::AShr:
if (OperandNo == 0)
if (ConstantInt *CI =
dyn_cast<ConstantInt>(UserI->getOperand(1))) {
uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
AB = AOut.shl(ShiftAmt);
// Because the high input bit is replicated into the
// high-order bits of the result, if we need any of those
// bits, then we must keep the highest input bit.
if ((AOut & APInt::getHighBitsSet(BitWidth, ShiftAmt))
.getBoolValue())
AB.setBit(BitWidth-1);
// If the shift is exact, then the low bits are not dead
// (they must be zero).
if (cast<AShrOperator>(UserI)->isExact())
AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
}
break;
case Instruction::And:
AB = AOut;
// For bits that are known zero, the corresponding bits in the
// other operand are dead (unless they're both zero, in which
// case they can't both be dead, so just mark the LHS bits as
// dead).
if (OperandNo == 0) {
ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
AB &= ~KnownZero2;
} else {
if (!isa<Instruction>(UserI->getOperand(0)))
ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
AB &= ~(KnownZero & ~KnownZero2);
}
break;
case Instruction::Or:
AB = AOut;
// For bits that are known one, the corresponding bits in the
// other operand are dead (unless they're both one, in which
// case they can't both be dead, so just mark the LHS bits as
// dead).
if (OperandNo == 0) {
ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
AB &= ~KnownOne2;
} else {
if (!isa<Instruction>(UserI->getOperand(0)))
ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
AB &= ~(KnownOne & ~KnownOne2);
}
break;
case Instruction::Xor:
case Instruction::PHI:
AB = AOut;
break;
case Instruction::Trunc:
AB = AOut.zext(BitWidth);
break;
case Instruction::ZExt:
AB = AOut.trunc(BitWidth);
break;
case Instruction::SExt:
AB = AOut.trunc(BitWidth);
// Because the high input bit is replicated into the
// high-order bits of the result, if we need any of those
// bits, then we must keep the highest input bit.
if ((AOut & APInt::getHighBitsSet(AOut.getBitWidth(),
AOut.getBitWidth() - BitWidth))
.getBoolValue())
AB.setBit(BitWidth-1);
break;
case Instruction::Select:
if (OperandNo != 0)
AB = AOut;
break;
}
}
bool BDCE::runOnFunction(Function& F) {
if (skipOptnoneFunction(F))
return false;
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
DL = F.getParent()->getDataLayout();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
DenseMap<Instruction *, APInt> AliveBits;
SmallVector<Instruction*, 128> Worklist;
// The set of visited instructions (non-integer-typed only).
SmallPtrSet<Instruction*, 128> Visited;
// Collect the set of "root" instructions that are known live.
for (Instruction &I : inst_range(F)) {
if (!isAlwaysLive(&I))
continue;
// For integer-valued instructions, set up an initial empty set of alive
// bits and add the instruction to the work list. For other instructions
// add their operands to the work list (for integer values operands, mark
// all bits as live).
if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
AliveBits[&I] = APInt(IT->getBitWidth(), 0);
Worklist.push_back(&I);
continue;
}
// Non-integer-typed instructions...
for (Use &OI : I.operands()) {
if (Instruction *J = dyn_cast<Instruction>(OI)) {
if (IntegerType *IT = dyn_cast<IntegerType>(J->getType()))
AliveBits[J] = APInt::getAllOnesValue(IT->getBitWidth());
Worklist.push_back(J);
}
}
// To save memory, we don't add I to the Visited set here. Instead, we
// check isAlwaysLive on every instruction when searching for dead
// instructions later (we need to check isAlwaysLive for the
// integer-typed instructions anyway).
}
// Propagate liveness backwards to operands.
while (!Worklist.empty()) {
Instruction *UserI = Worklist.pop_back_val();
DEBUG(dbgs() << "BDCE: Visiting: " << *UserI);
APInt AOut;
if (UserI->getType()->isIntegerTy()) {
AOut = AliveBits[UserI];
DEBUG(dbgs() << " Alive Out: " << AOut);
}
DEBUG(dbgs() << "\n");
if (!UserI->getType()->isIntegerTy())
Visited.insert(UserI);
APInt KnownZero, KnownOne, KnownZero2, KnownOne2;
// Compute the set of alive bits for each operand. These are anded into the
// existing set, if any, and if that changes the set of alive bits, the
// operand is added to the work-list.
for (Use &OI : UserI->operands()) {
if (Instruction *I = dyn_cast<Instruction>(OI)) {
if (IntegerType *IT = dyn_cast<IntegerType>(I->getType())) {
unsigned BitWidth = IT->getBitWidth();
APInt AB = APInt::getAllOnesValue(BitWidth);
if (UserI->getType()->isIntegerTy() && !AOut &&
!isAlwaysLive(UserI)) {
AB = APInt(BitWidth, 0);
} else {
// If all bits of the output are dead, then all bits of the input
// Bits of each operand that are used to compute alive bits of the
// output are alive, all others are dead.
determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB,
KnownZero, KnownOne,
KnownZero2, KnownOne2);
}
// If we've added to the set of alive bits (or the operand has not
// been previously visited), then re-queue the operand to be visited
// again.
APInt ABPrev(BitWidth, 0);
auto ABI = AliveBits.find(I);
if (ABI != AliveBits.end())
ABPrev = ABI->second;
APInt ABNew = AB | ABPrev;
if (ABNew != ABPrev || ABI == AliveBits.end()) {
AliveBits[I] = std::move(ABNew);
Worklist.push_back(I);
}
} else if (!Visited.count(I)) {
Worklist.push_back(I);
}
}
}
}
bool Changed = false;
// The inverse of the live set is the dead set. These are those instructions
// which have no side effects and do not influence the control flow or return
// value of the function, and may therefore be deleted safely.
// NOTE: We reuse the Worklist vector here for memory efficiency.
for (Instruction &I : inst_range(F)) {
// For live instructions that have all dead bits, first make them dead by
// replacing all uses with something else. Then, if they don't need to
// remain live (because they have side effects, etc.) we can remove them.
if (I.getType()->isIntegerTy()) {
auto ABI = AliveBits.find(&I);
if (ABI != AliveBits.end()) {
if (ABI->second.getBoolValue())
continue;
DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n");
// FIXME: In theory we could substitute undef here instead of zero.
// This should be reconsidered once we settle on the semantics of
// undef, poison, etc.
Value *Zero = ConstantInt::get(I.getType(), 0);
++NumSimplified;
I.replaceAllUsesWith(Zero);
Changed = true;
}
} else if (Visited.count(&I)) {
continue;
}
if (isAlwaysLive(&I))
continue;
DEBUG(dbgs() << "BDCE: Removing: " << I << " (unused)\n");
Worklist.push_back(&I);
I.dropAllReferences();
Changed = true;
}
for (Instruction *&I : Worklist) {
++NumRemoved;
I->eraseFromParent();
}
return Changed;
}
FunctionPass *llvm::createBitTrackingDCEPass() {
return new BDCE();
}

View File

@ -1,6 +1,7 @@
add_llvm_library(LLVMScalarOpts
ADCE.cpp
AlignmentFromAssumptions.cpp
BDCE.cpp
ConstantHoisting.cpp
ConstantProp.cpp
CorrelatedValuePropagation.cpp

View File

@ -28,6 +28,7 @@ using namespace llvm;
/// ScalarOpts library.
void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeADCEPass(Registry);
initializeBDCEPass(Registry);
initializeAlignmentFromAssumptionsPass(Registry);
initializeSampleProfileLoaderPass(Registry);
initializeConstantHoistingPass(Registry);
@ -83,6 +84,10 @@ void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createAggressiveDCEPass());
}
void LLVMAddBitTrackingDCEPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createBitTrackingDCEPass());
}
void LLVMAddAlignmentFromAssumptionsPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createAlignmentFromAssumptionsPass());
}

View File

@ -0,0 +1,348 @@
; RUN: opt -S -bdce -instsimplify < %s | FileCheck %s
; RUN: opt -S -instsimplify < %s | FileCheck %s -check-prefix=CHECK-IO
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
; Function Attrs: nounwind readnone
define signext i32 @bar(i32 signext %x) #0 {
entry:
%call = tail call signext i32 @foo(i32 signext 5) #0
%and = and i32 %call, 4
%or = or i32 %and, %x
%call1 = tail call signext i32 @foo(i32 signext 3) #0
%and2 = and i32 %call1, 8
%or3 = or i32 %or, %and2
%call4 = tail call signext i32 @foo(i32 signext 2) #0
%and5 = and i32 %call4, 16
%or6 = or i32 %or3, %and5
%call7 = tail call signext i32 @foo(i32 signext 1) #0
%and8 = and i32 %call7, 32
%or9 = or i32 %or6, %and8
%call10 = tail call signext i32 @foo(i32 signext 0) #0
%and11 = and i32 %call10, 64
%or12 = or i32 %or9, %and11
%call13 = tail call signext i32 @foo(i32 signext 4) #0
%and14 = and i32 %call13, 128
%or15 = or i32 %or12, %and14
%shr = ashr i32 %or15, 4
ret i32 %shr
; CHECK-LABEL: @bar
; CHECK-NOT: tail call signext i32 @foo(i32 signext 5)
; CHECK-NOT: tail call signext i32 @foo(i32 signext 3)
; CHECK: tail call signext i32 @foo(i32 signext 2)
; CHECK: tail call signext i32 @foo(i32 signext 1)
; CHECK: tail call signext i32 @foo(i32 signext 0)
; CHECK: tail call signext i32 @foo(i32 signext 4)
; CHECK: ret i32
; Check that instsimplify is not doing this all on its own.
; CHECK-IO-LABEL: @bar
; CHECK-IO: tail call signext i32 @foo(i32 signext 5)
; CHECK-IO: tail call signext i32 @foo(i32 signext 3)
; CHECK-IO: tail call signext i32 @foo(i32 signext 2)
; CHECK-IO: tail call signext i32 @foo(i32 signext 1)
; CHECK-IO: tail call signext i32 @foo(i32 signext 0)
; CHECK-IO: tail call signext i32 @foo(i32 signext 4)
; CHECK-IO: ret i32
}
; Function Attrs: nounwind readnone
declare signext i32 @foo(i32 signext) #0
; Function Attrs: nounwind readnone
define signext i32 @far(i32 signext %x) #1 {
entry:
%call = tail call signext i32 @goo(i32 signext 5) #1
%and = and i32 %call, 4
%or = or i32 %and, %x
%call1 = tail call signext i32 @goo(i32 signext 3) #1
%and2 = and i32 %call1, 8
%or3 = or i32 %or, %and2
%call4 = tail call signext i32 @goo(i32 signext 2) #1
%and5 = and i32 %call4, 16
%or6 = or i32 %or3, %and5
%call7 = tail call signext i32 @goo(i32 signext 1) #1
%and8 = and i32 %call7, 32
%or9 = or i32 %or6, %and8
%call10 = tail call signext i32 @goo(i32 signext 0) #1
%and11 = and i32 %call10, 64
%or12 = or i32 %or9, %and11
%call13 = tail call signext i32 @goo(i32 signext 4) #1
%and14 = and i32 %call13, 128
%or15 = or i32 %or12, %and14
%shr = ashr i32 %or15, 4
ret i32 %shr
; CHECK-LABEL: @far
; Calls to foo(5) and foo(3) are still there, but their results are not used.
; CHECK: tail call signext i32 @goo(i32 signext 5)
; CHECK-NEXT: tail call signext i32 @goo(i32 signext 3)
; CHECK-NEXT: tail call signext i32 @goo(i32 signext 2)
; CHECK: tail call signext i32 @goo(i32 signext 1)
; CHECK: tail call signext i32 @goo(i32 signext 0)
; CHECK: tail call signext i32 @goo(i32 signext 4)
; CHECK: ret i32
; Check that instsimplify is not doing this all on its own.
; CHECK-IO-LABEL: @far
; CHECK-IO: tail call signext i32 @goo(i32 signext 5)
; CHECK-IO: tail call signext i32 @goo(i32 signext 3)
; CHECK-IO: tail call signext i32 @goo(i32 signext 2)
; CHECK-IO: tail call signext i32 @goo(i32 signext 1)
; CHECK-IO: tail call signext i32 @goo(i32 signext 0)
; CHECK-IO: tail call signext i32 @goo(i32 signext 4)
; CHECK-IO: ret i32
}
declare signext i32 @goo(i32 signext) #1
; Function Attrs: nounwind readnone
define signext i32 @tar1(i32 signext %x) #0 {
entry:
%call = tail call signext i32 @foo(i32 signext 5) #0
%and = and i32 %call, 33554432
%or = or i32 %and, %x
%call1 = tail call signext i32 @foo(i32 signext 3) #0
%and2 = and i32 %call1, 67108864
%or3 = or i32 %or, %and2
%call4 = tail call signext i32 @foo(i32 signext 2) #0
%and5 = and i32 %call4, 16
%or6 = or i32 %or3, %and5
%call7 = tail call signext i32 @foo(i32 signext 1) #0
%and8 = and i32 %call7, 32
%or9 = or i32 %or6, %and8
%call10 = tail call signext i32 @foo(i32 signext 0) #0
%and11 = and i32 %call10, 64
%or12 = or i32 %or9, %and11
%call13 = tail call signext i32 @foo(i32 signext 4) #0
%and14 = and i32 %call13, 128
%or15 = or i32 %or12, %and14
%bs = tail call i32 @llvm.bswap.i32(i32 %or15) #0
%shr = ashr i32 %bs, 4
ret i32 %shr
; CHECK-LABEL: @tar1
; CHECK-NOT: tail call signext i32 @foo(i32 signext 5)
; CHECK-NOT: tail call signext i32 @foo(i32 signext 3)
; CHECK: tail call signext i32 @foo(i32 signext 2)
; CHECK: tail call signext i32 @foo(i32 signext 1)
; CHECK: tail call signext i32 @foo(i32 signext 0)
; CHECK: tail call signext i32 @foo(i32 signext 4)
; CHECK: ret i32
}
; Function Attrs: nounwind readnone
declare i32 @llvm.bswap.i32(i32) #0
; Function Attrs: nounwind readnone
define signext i32 @tar2(i32 signext %x) #0 {
entry:
%call = tail call signext i32 @foo(i32 signext 5) #0
%and = and i32 %call, 33554432
%or = or i32 %and, %x
%call1 = tail call signext i32 @foo(i32 signext 3) #0
%and2 = and i32 %call1, 67108864
%or3 = or i32 %or, %and2
%call4 = tail call signext i32 @foo(i32 signext 2) #0
%and5 = and i32 %call4, 16
%or6 = or i32 %or3, %and5
%call7 = tail call signext i32 @foo(i32 signext 1) #0
%and8 = and i32 %call7, 32
%or9 = or i32 %or6, %and8
%call10 = tail call signext i32 @foo(i32 signext 0) #0
%and11 = and i32 %call10, 64
%or12 = or i32 %or9, %and11
%call13 = tail call signext i32 @foo(i32 signext 4) #0
%and14 = and i32 %call13, 128
%or15 = or i32 %or12, %and14
%shl = shl i32 %or15, 10
ret i32 %shl
; CHECK-LABEL: @tar2
; CHECK-NOT: tail call signext i32 @foo(i32 signext 5)
; CHECK-NOT: tail call signext i32 @foo(i32 signext 3)
; CHECK: tail call signext i32 @foo(i32 signext 2)
; CHECK: tail call signext i32 @foo(i32 signext 1)
; CHECK: tail call signext i32 @foo(i32 signext 0)
; CHECK: tail call signext i32 @foo(i32 signext 4)
; CHECK: ret i32
}
; Function Attrs: nounwind readnone
define signext i32 @tar3(i32 signext %x) #0 {
entry:
%call = tail call signext i32 @foo(i32 signext 5) #0
%and = and i32 %call, 33554432
%or = or i32 %and, %x
%call1 = tail call signext i32 @foo(i32 signext 3) #0
%and2 = and i32 %call1, 67108864
%or3 = or i32 %or, %and2
%call4 = tail call signext i32 @foo(i32 signext 2) #0
%and5 = and i32 %call4, 16
%or6 = or i32 %or3, %and5
%call7 = tail call signext i32 @foo(i32 signext 1) #0
%and8 = and i32 %call7, 32
%or9 = or i32 %or6, %and8
%call10 = tail call signext i32 @foo(i32 signext 0) #0
%and11 = and i32 %call10, 64
%or12 = or i32 %or9, %and11
%call13 = tail call signext i32 @foo(i32 signext 4) #0
%and14 = and i32 %call13, 128
%or15 = or i32 %or12, %and14
%add = add i32 %or15, 5
%shl = shl i32 %add, 10
ret i32 %shl
; CHECK-LABEL: @tar3
; CHECK-NOT: tail call signext i32 @foo(i32 signext 5)
; CHECK-NOT: tail call signext i32 @foo(i32 signext 3)
; CHECK: tail call signext i32 @foo(i32 signext 2)
; CHECK: tail call signext i32 @foo(i32 signext 1)
; CHECK: tail call signext i32 @foo(i32 signext 0)
; CHECK: tail call signext i32 @foo(i32 signext 4)
; CHECK: ret i32
}
; Function Attrs: nounwind readnone
define signext i32 @tar4(i32 signext %x) #0 {
entry:
%call = tail call signext i32 @foo(i32 signext 5) #0
%and = and i32 %call, 33554432
%or = or i32 %and, %x
%call1 = tail call signext i32 @foo(i32 signext 3) #0
%and2 = and i32 %call1, 67108864
%or3 = or i32 %or, %and2
%call4 = tail call signext i32 @foo(i32 signext 2) #0
%and5 = and i32 %call4, 16
%or6 = or i32 %or3, %and5
%call7 = tail call signext i32 @foo(i32 signext 1) #0
%and8 = and i32 %call7, 32
%or9 = or i32 %or6, %and8
%call10 = tail call signext i32 @foo(i32 signext 0) #0
%and11 = and i32 %call10, 64
%or12 = or i32 %or9, %and11
%call13 = tail call signext i32 @foo(i32 signext 4) #0
%and14 = and i32 %call13, 128
%or15 = or i32 %or12, %and14
%sub = sub i32 %or15, 5
%shl = shl i32 %sub, 10
ret i32 %shl
; CHECK-LABEL: @tar4
; CHECK-NOT: tail call signext i32 @foo(i32 signext 5)
; CHECK-NOT: tail call signext i32 @foo(i32 signext 3)
; CHECK: tail call signext i32 @foo(i32 signext 2)
; CHECK: tail call signext i32 @foo(i32 signext 1)
; CHECK: tail call signext i32 @foo(i32 signext 0)
; CHECK: tail call signext i32 @foo(i32 signext 4)
; CHECK: ret i32
}
; Function Attrs: nounwind readnone
define signext i32 @tar5(i32 signext %x) #0 {
entry:
%call = tail call signext i32 @foo(i32 signext 5) #0
%and = and i32 %call, 33554432
%or = or i32 %and, %x
%call1 = tail call signext i32 @foo(i32 signext 3) #0
%and2 = and i32 %call1, 67108864
%or3 = or i32 %or, %and2
%call4 = tail call signext i32 @foo(i32 signext 2) #0
%and5 = and i32 %call4, 16
%or6 = or i32 %or3, %and5
%call7 = tail call signext i32 @foo(i32 signext 1) #0
%and8 = and i32 %call7, 32
%or9 = or i32 %or6, %and8
%call10 = tail call signext i32 @foo(i32 signext 0) #0
%and11 = and i32 %call10, 64
%or12 = or i32 %or9, %and11
%call13 = tail call signext i32 @foo(i32 signext 4) #0
%and14 = and i32 %call13, 128
%or15 = or i32 %or12, %and14
%xor = xor i32 %or15, 5
%shl = shl i32 %xor, 10
ret i32 %shl
; CHECK-LABEL: @tar5
; CHECK-NOT: tail call signext i32 @foo(i32 signext 5)
; CHECK-NOT: tail call signext i32 @foo(i32 signext 3)
; CHECK: tail call signext i32 @foo(i32 signext 2)
; CHECK: tail call signext i32 @foo(i32 signext 1)
; CHECK: tail call signext i32 @foo(i32 signext 0)
; CHECK: tail call signext i32 @foo(i32 signext 4)
; CHECK: ret i32
}
; Function Attrs: nounwind readnone
define signext i32 @tar7(i32 signext %x, i1 %b) #0 {
entry:
%call = tail call signext i32 @foo(i32 signext 5) #0
%and = and i32 %call, 33554432
%or = or i32 %and, %x
%call1 = tail call signext i32 @foo(i32 signext 3) #0
%and2 = and i32 %call1, 67108864
%or3 = or i32 %or, %and2
%call4 = tail call signext i32 @foo(i32 signext 2) #0
%and5 = and i32 %call4, 16
%or6 = or i32 %or3, %and5
%call7 = tail call signext i32 @foo(i32 signext 1) #0
%and8 = and i32 %call7, 32
%or9 = or i32 %or6, %and8
%call10 = tail call signext i32 @foo(i32 signext 0) #0
%and11 = and i32 %call10, 64
%or12 = or i32 %or9, %and11
%call13 = tail call signext i32 @foo(i32 signext 4) #0
%and14 = and i32 %call13, 128
%or15 = or i32 %or12, %and14
%v = select i1 %b, i32 %or15, i32 5
%shl = shl i32 %v, 10
ret i32 %shl
; CHECK-LABEL: @tar7
; CHECK-NOT: tail call signext i32 @foo(i32 signext 5)
; CHECK-NOT: tail call signext i32 @foo(i32 signext 3)
; CHECK: tail call signext i32 @foo(i32 signext 2)
; CHECK: tail call signext i32 @foo(i32 signext 1)
; CHECK: tail call signext i32 @foo(i32 signext 0)
; CHECK: tail call signext i32 @foo(i32 signext 4)
; CHECK: ret i32
}
; Function Attrs: nounwind readnone
define signext i16 @tar8(i32 signext %x) #0 {
entry:
%call = tail call signext i32 @foo(i32 signext 5) #0
%and = and i32 %call, 33554432
%or = or i32 %and, %x
%call1 = tail call signext i32 @foo(i32 signext 3) #0
%and2 = and i32 %call1, 67108864
%or3 = or i32 %or, %and2
%call4 = tail call signext i32 @foo(i32 signext 2) #0
%and5 = and i32 %call4, 16
%or6 = or i32 %or3, %and5
%call7 = tail call signext i32 @foo(i32 signext 1) #0
%and8 = and i32 %call7, 32
%or9 = or i32 %or6, %and8
%call10 = tail call signext i32 @foo(i32 signext 0) #0
%and11 = and i32 %call10, 64
%or12 = or i32 %or9, %and11
%call13 = tail call signext i32 @foo(i32 signext 4) #0
%and14 = and i32 %call13, 128
%or15 = or i32 %or12, %and14
%tr = trunc i32 %or15 to i16
ret i16 %tr
; CHECK-LABEL: @tar8
; CHECK-NOT: tail call signext i32 @foo(i32 signext 5)
; CHECK-NOT: tail call signext i32 @foo(i32 signext 3)
; CHECK: tail call signext i32 @foo(i32 signext 2)
; CHECK: tail call signext i32 @foo(i32 signext 1)
; CHECK: tail call signext i32 @foo(i32 signext 0)
; CHECK: tail call signext i32 @foo(i32 signext 4)
; CHECK: ret i16
}
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }

View File

@ -0,0 +1,33 @@
; RUN: opt -bdce -S < %s | FileCheck %s
declare i32 @strlen(i8*) readonly nounwind
define void @test1() {
call i32 @strlen( i8* null )
ret void
; CHECK-LABEL: @test1
; CHECK-NOT: call
; CHECK: ret void
}
define i32 @test2() {
; invoke of pure function should not be deleted!
invoke i32 @strlen( i8* null ) readnone
to label %Cont unwind label %Other
Cont: ; preds = %0
ret i32 0
Other: ; preds = %0
%exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
cleanup
ret i32 1
; CHECK-LABEL: @test2
; CHECK: invoke
; CHECK: ret i32 1
}
declare i32 @__gxx_personality_v0(...)