[PPC64] Convert bool literals to i32

Convert i1 values to i32 values if they should be allocated in GPRs instead of CRs.

Phabricator: http://reviews.llvm.org/D14064
llvm-svn: 254942
This commit is contained in:
Kit Barton 2015-12-07 20:50:29 +00:00
parent c5e25856f0
commit d8708a5236
5 changed files with 464 additions and 0 deletions

View File

@ -13,6 +13,7 @@ tablegen(LLVM PPCGenSubtargetInfo.inc -gen-subtarget)
add_public_tablegen_target(PowerPCCommonTableGen)
add_llvm_target(PowerPCCodeGen
PPCBoolRetToInt.cpp
PPCAsmPrinter.cpp
PPCBranchSelector.cpp
PPCCTRLoops.cpp

View File

@ -45,10 +45,12 @@ namespace llvm {
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
FunctionPass *createPPCTLSDynamicCallPass();
FunctionPass *createPPCBoolRetToIntPass();
void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP, bool isDarwin);
void initializePPCVSXFMAMutatePass(PassRegistry&);
void initializePPCBoolRetToIntPass(PassRegistry&);
extern char &PPCVSXFMAMutateID;
namespace PPCII {

View File

@ -0,0 +1,253 @@
//===- PPCBoolRetToInt.cpp - Convert bool literals to i32 if they are returned ==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements converting i1 values to i32 if they could be more
// profitably allocated as GPRs rather than CRs. This pass will become totally
// unnecessary if Register Bank Allocation and Global Instruction Selection ever
// go upstream.
//
// Presently, the pass converts i1 Constants, and Arguments to i32 if the
// transitive closure of their uses includes only PHINodes, CallInsts, and
// ReturnInsts. The rational is that arguments are generally passed and returned
// in GPRs rather than CRs, so casting them to i32 at the LLVM IR level will
// actually save casts at the Machine Instruction level.
//
// It might be useful to expand this pass to add bit-wise operations to the list
// of safe transitive closure types. Also, we miss some opportunities when LLVM
// represents logical AND and OR operations with control flow rather than data
// flow. For example by lowering the expression: return (A && B && C)
//
// as: return A ? true : B && C.
//
// There's code in SimplifyCFG that code be used to turn control flow in data
// flow using SelectInsts. Selects are slow on some architectures (P7/P8), so
// this probably isn't good in general, but for the special case of i1, the
// Selects could be further lowered to bit operations that are fast everywhere.
//
//===----------------------------------------------------------------------===//
#include "PPC.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Pass.h"
using namespace llvm;
namespace {
#define DEBUG_TYPE "bool-ret-to-int"
STATISTIC(NumBoolRetPromotion,
"Number of times a bool feeding a RetInst was promoted to an int");
STATISTIC(NumBoolCallPromotion,
"Number of times a bool feeding a CallInst was promoted to an int");
STATISTIC(NumBoolToIntPromotion,
"Total number of times a bool was promoted to an int");
class PPCBoolRetToInt : public FunctionPass {
static SmallPtrSet<Value *, 8> findAllDefs(Value *V) {
SmallPtrSet<Value *, 8> Defs;
SmallVector<Value *, 8> WorkList;
WorkList.push_back(V);
Defs.insert(V);
while (!WorkList.empty()) {
Value *Curr = WorkList.back();
WorkList.pop_back();
if (User *CurrUser = dyn_cast<User>(Curr))
for (auto &Op : CurrUser->operands())
if (Defs.insert(Op).second)
WorkList.push_back(Op);
}
return Defs;
}
// Translate a i1 value to an equivalent i32 value:
static Value *translate(Value *V) {
Type *Int32Ty = Type::getInt32Ty(V->getContext());
if (Constant *C = dyn_cast<Constant>(V))
return ConstantExpr::getZExt(C, Int32Ty);
if (PHINode *P = dyn_cast<PHINode>(V)) {
// Temporarily set the operands to 0. We'll fix this later in
// runOnUse.
Value *Zero = Constant::getNullValue(Int32Ty);
PHINode *Q =
PHINode::Create(Int32Ty, P->getNumIncomingValues(), P->getName(), P);
for (unsigned i = 0; i < P->getNumOperands(); ++i)
Q->addIncoming(Zero, P->getIncomingBlock(i));
return Q;
}
Argument *A = dyn_cast<Argument>(V);
Instruction *I = dyn_cast<Instruction>(V);
assert((A || I) && "Unknown value type");
auto InstPt =
A ? &*A->getParent()->getEntryBlock().begin() : I->getNextNode();
return new ZExtInst(V, Int32Ty, "", InstPt);
}
typedef SmallPtrSet<const PHINode *, 8> PHINodeSet;
// A PHINode is Promotable if:
// 1. Its type is i1 AND
// 2. All of its uses are ReturnInt, CallInst, PHINode, or DbgInfoIntrinsic
// AND
// 3. All of its operands are Constant or Argument or
// CallInst or PHINode AND
// 4. All of its PHINode uses are Promotable AND
// 5. All of its PHINode operands are Promotable
static PHINodeSet getPromotablePHINodes(const Function &F) {
PHINodeSet Promotable;
// Condition 1
for (auto &BB : F)
for (auto &I : BB)
if (const PHINode *P = dyn_cast<PHINode>(&I))
if (P->getType()->isIntegerTy(1))
Promotable.insert(P);
SmallVector<const PHINode *, 8> ToRemove;
for (const auto &P : Promotable) {
// Condition 2 and 3
auto IsValidUser = [] (const Value *V) -> bool {
return isa<ReturnInst>(V) || isa<CallInst>(V) || isa<PHINode>(V) ||
isa<DbgInfoIntrinsic>(V);
};
auto IsValidOperand = [] (const Value *V) -> bool {
return isa<Constant>(V) || isa<Argument>(V) || isa<CallInst>(V) ||
isa<PHINode>(V);
};
const auto &Users = P->users();
const auto &Operands = P->operands();
if (!std::all_of(Users.begin(), Users.end(), IsValidUser) ||
!std::all_of(Operands.begin(), Operands.end(), IsValidOperand))
ToRemove.push_back(P);
}
// Iterate to convergence
auto IsPromotable = [&Promotable] (const Value *V) -> bool {
const PHINode *Phi = dyn_cast<PHINode>(V);
return !Phi || Promotable.count(Phi);
};
while (!ToRemove.empty()) {
for (auto &User : ToRemove)
Promotable.erase(User);
ToRemove.clear();
for (const auto &P : Promotable) {
// Condition 4 and 5
const auto &Users = P->users();
const auto &Operands = P->operands();
if (!std::all_of(Users.begin(), Users.end(), IsPromotable) ||
!std::all_of(Operands.begin(), Operands.end(), IsPromotable))
ToRemove.push_back(P);
}
}
return Promotable;
}
typedef DenseMap<Value *, Value *> B2IMap;
public:
static char ID;
PPCBoolRetToInt() : FunctionPass(ID) {
initializePPCBoolRetToIntPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) {
PHINodeSet PromotablePHINodes = getPromotablePHINodes(F);
B2IMap Bool2IntMap;
bool Changed = false;
for (auto &BB : F) {
for (auto &I : BB) {
if (ReturnInst *R = dyn_cast<ReturnInst>(&I))
if (F.getReturnType()->isIntegerTy(1))
Changed |=
runOnUse(R->getOperandUse(0), PromotablePHINodes, Bool2IntMap);
if (CallInst *CI = dyn_cast<CallInst>(&I))
for (auto &U : CI->operands())
if (U->getType()->isIntegerTy(1))
Changed |= runOnUse(U, PromotablePHINodes, Bool2IntMap);
}
}
return Changed;
}
static bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes,
B2IMap &BoolToIntMap) {
auto Defs = findAllDefs(U);
// If the values are all Constants or Arguments, don't bother
if (!std::any_of(Defs.begin(), Defs.end(), isa<Instruction, Value *>))
return false;
// Presently, we only know how to handle PHINode, Constant, and Arguments.
// Potentially, bitwise operations (AND, OR, XOR, NOT) and sign extension
// could also be handled in the future.
for (const auto &V : Defs)
if (!isa<PHINode>(V) && !isa<Constant>(V) && !isa<Argument>(V))
return false;
for (const auto &V : Defs)
if (const PHINode *P = dyn_cast<PHINode>(V))
if (!PromotablePHINodes.count(P))
return false;
if (isa<ReturnInst>(U.getUser()))
++NumBoolRetPromotion;
if (isa<CallInst>(U.getUser()))
++NumBoolCallPromotion;
++NumBoolToIntPromotion;
for (const auto &V : Defs)
if (!BoolToIntMap.count(V))
BoolToIntMap[V] = translate(V);
// Replace the operands of the translated instructions. There were set to
// zero in the translate function.
for (auto &Pair : BoolToIntMap) {
User *First = dyn_cast<User>(Pair.first);
User *Second = dyn_cast<User>(Pair.second);
assert((!First || Second) && "translated from user to non-user!?");
if (First)
for (unsigned i = 0; i < First->getNumOperands(); ++i)
Second->setOperand(i, BoolToIntMap[First->getOperand(i)]);
}
Value *IntRetVal = BoolToIntMap[U];
Type *Int1Ty = Type::getInt1Ty(U->getContext());
Instruction *I = cast<Instruction>(U.getUser());
Value *BackToBool = new TruncInst(IntRetVal, Int1Ty, "backToBool", I);
U.set(BackToBool);
return true;
}
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTreeWrapperPass>();
FunctionPass::getAnalysisUsage(AU);
}
};
}
char PPCBoolRetToInt::ID = 0;
INITIALIZE_PASS(PPCBoolRetToInt, "bool-ret-to-int",
"Convert i1 constants to i32 if they are returned",
false, false)
FunctionPass *llvm::createPPCBoolRetToIntPass() { return new PPCBoolRetToInt(); }

View File

@ -71,6 +71,9 @@ extern "C" void LLVMInitializePowerPCTarget() {
RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target);
RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
PassRegistry &PR = *PassRegistry::getPassRegistry();
initializePPCBoolRetToIntPass(PR);
}
/// Return the datalayout string of a subtarget.
@ -286,6 +289,8 @@ TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
}
void PPCPassConfig::addIRPasses() {
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createPPCBoolRetToIntPass());
addPass(createAtomicExpandPass(&getPPCTargetMachine()));
// For the BG/Q (or if explicitly requested), add explicit data prefetch

View File

@ -0,0 +1,203 @@
; RUN: opt -bool-ret-to-int -S -o - < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"
; CHECK-LABEL: notBoolRet
define signext i32 @notBoolRet() {
entry:
; CHECK: ret i32 1
ret i32 1
}
; CHECK-LABEL: find
define zeroext i1 @find(i8** readonly %begin, i8** readnone %end, i1 (i8*)* nocapture %hasProp) {
entry:
%cmp.4 = icmp eq i8** %begin, %end
br i1 %cmp.4, label %cleanup, label %for.body.preheader
for.body.preheader: ; preds = %entry
br label %for.body
for.cond: ; preds = %for.body
%cmp = icmp eq i8** %incdec.ptr, %end
br i1 %cmp, label %cleanup.loopexit, label %for.body
for.body: ; preds = %for.body.preheader, %for.cond
%curr.05 = phi i8** [ %incdec.ptr, %for.cond ], [ %begin, %for.body.preheader ]
%0 = load i8*, i8** %curr.05, align 8
%call = tail call zeroext i1 %hasProp(i8* %0)
%incdec.ptr = getelementptr inbounds i8*, i8** %curr.05, i64 1
br i1 %call, label %cleanup.loopexit, label %for.cond
cleanup.loopexit: ; preds = %for.body, %for.cond
; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ]
%cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ]
br label %cleanup
cleanup: ; preds = %cleanup.loopexit, %entry
; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ]
%cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ]
; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
; CHECK: ret i1 [[REG]]
ret i1 %cleanup.dest.slot.0
}
; CHECK-LABEL: retFalse
define zeroext i1 @retFalse() {
entry:
; CHECK: ret i1 false
ret i1 false
}
; CHECK-LABEL: retCvtFalse
define zeroext i1 @retCvtFalse() {
entry:
; CHECK: ret i1 false
ret i1 trunc(i32 0 to i1)
}
; CHECK-LABEL: find_cont
define void @find_cont(i8** readonly %begin, i8** readnone %end, i1 (i8*)* nocapture %hasProp, void (i1)* nocapture %cont) {
entry:
%cmp.4 = icmp eq i8** %begin, %end
br i1 %cmp.4, label %cleanup, label %for.body.preheader
for.body.preheader: ; preds = %entry
br label %for.body
for.cond: ; preds = %for.body
%cmp = icmp eq i8** %incdec.ptr, %end
br i1 %cmp, label %cleanup.loopexit, label %for.body
for.body: ; preds = %for.body.preheader, %for.cond
%curr.05 = phi i8** [ %incdec.ptr, %for.cond ], [ %begin, %for.body.preheader ]
%0 = load i8*, i8** %curr.05, align 8
%call = tail call zeroext i1 %hasProp(i8* %0)
%incdec.ptr = getelementptr inbounds i8*, i8** %curr.05, i64 1
br i1 %call, label %cleanup.loopexit, label %for.cond
cleanup.loopexit: ; preds = %for.body, %for.cond
; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ]
%cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ]
br label %cleanup
cleanup: ; preds = %cleanup.loopexit, %entry
; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ]
%cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ]
; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
; CHECK: call void %cont(i1 [[REG]]
tail call void %cont(i1 %cleanup.dest.slot.0)
ret void
}
; CHECK-LABEL: find_cont_ret
define zeroext i1 @find_cont_ret(i8** readonly %begin, i8** readnone %end, i1 (i8*)* nocapture %hasProp, void (i1)* nocapture %cont) {
entry:
%cmp.4 = icmp eq i8** %begin, %end
br i1 %cmp.4, label %cleanup, label %for.body.preheader
for.body.preheader: ; preds = %entry
br label %for.body
for.cond: ; preds = %for.body
%cmp = icmp eq i8** %incdec.ptr, %end
br i1 %cmp, label %cleanup.loopexit, label %for.body
for.body: ; preds = %for.body.preheader, %for.cond
%curr.05 = phi i8** [ %incdec.ptr, %for.cond ], [ %begin, %for.body.preheader ]
%0 = load i8*, i8** %curr.05, align 8
%call = tail call zeroext i1 %hasProp(i8* %0)
%incdec.ptr = getelementptr inbounds i8*, i8** %curr.05, i64 1
br i1 %call, label %cleanup.loopexit, label %for.cond
cleanup.loopexit: ; preds = %for.body, %for.cond
; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ]
%cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ]
br label %cleanup
cleanup: ; preds = %cleanup.loopexit, %entry
; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ]
%cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ]
; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
; CHECK: call void %cont(i1 [[REG]]
tail call void %cont(i1 %cleanup.dest.slot.0)
; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
; CHECK: ret i1 [[REG]]
ret i1 %cleanup.dest.slot.0
}
; CHECK-LABEL: arg_operand
define zeroext i1 @arg_operand(i1 %operand) {
entry:
br i1 %operand, label %foo, label %cleanup
foo:
br label %cleanup
cleanup:
; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
; CHECK: ret i1 [[REG]]
%result = phi i1 [ false, %foo ], [ %operand, %entry ]
ret i1 %result
}
; CHECK-LABEL: bad_use
define zeroext i1 @bad_use(i1 %operand) {
entry:
br i1 %operand, label %foo, label %cleanup
foo:
br label %cleanup
cleanup:
; CHECK: [[REG:%.+]] = phi i1
; CHECK: ret i1 [[REG]]
%result = phi i1 [ false, %foo], [ true, %entry ]
%0 = icmp eq i1 %result, %operand
ret i1 %result
}
; CHECK-LABEL: bad_use_closure
define zeroext i1 @bad_use_closure(i1 %operand) {
entry:
br i1 %operand, label %foo, label %cleanup
foo:
%bar = phi i1 [ false, %entry ]
%0 = icmp eq i1 %bar, %operand
br label %cleanup
cleanup:
; CHECK: [[REG:%.+]] = phi i1 [ true
; CHECK: ret i1 [[REG]]
%result = phi i1 [ true, %entry ], [ %bar, %foo]
ret i1 %result
}
; CHECK-LABEL: arg_test
define zeroext i1 @arg_test(i1 %operand) {
entry:
br i1 %operand, label %foo, label %cleanup
foo:
%bar = phi i1 [ false, %entry ]
br label %cleanup
; CHECK-LABEL: cleanup
cleanup:
; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
; CHECK: ret i1 [[REG]]
%result = phi i1 [ %bar, %foo], [ %operand, %entry ]
ret i1 %result
}
declare zeroext i1 @return_i1()
; CHECK-LABEL: call_test
define zeroext i1 @call_test() {
; CHECK: [[REG:%.+]] = call i1
%result = call i1 @return_i1()
; CHECK: ret i1 [[REG]]
ret i1 %result
}