mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-16 16:37:42 +00:00
[BypassSlowDivision] Refactor fast division insertion logic (NFC)
The most important goal of the patch is to break large insertFastDiv function into separate pieces, so that later a different fast insertion logic can be implemented using some of these pieces. Differential Revision: https://reviews.llvm.org/D29896 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@296828 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
15497c13fd
commit
7c6958332a
@ -36,12 +36,21 @@ namespace {
|
||||
: SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
|
||||
};
|
||||
|
||||
struct DivPhiNodes {
|
||||
PHINode *Quotient;
|
||||
PHINode *Remainder;
|
||||
struct QuotRemPair {
|
||||
Value *Quotient;
|
||||
Value *Remainder;
|
||||
|
||||
DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
|
||||
: Quotient(InQuotient), Remainder(InRemainder) {}
|
||||
QuotRemPair(Value *InQuotient, Value *InRemainder)
|
||||
: Quotient(InQuotient), Remainder(InRemainder) {}
|
||||
};
|
||||
|
||||
/// A quotient and remainder, plus a BB from which they logically "originate".
|
||||
/// If you use Quotient or Remainder in a Phi node, you should use BB as its
|
||||
/// corresponding predecessor.
|
||||
struct QuotRemWithBB {
|
||||
BasicBlock *BB = nullptr;
|
||||
Value *Quotient = nullptr;
|
||||
Value *Remainder = nullptr;
|
||||
};
|
||||
}
|
||||
|
||||
@ -69,92 +78,174 @@ namespace llvm {
|
||||
}
|
||||
};
|
||||
|
||||
typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
|
||||
typedef DenseMap<DivOpInfo, QuotRemPair> DivCacheTy;
|
||||
typedef DenseMap<unsigned, unsigned> BypassWidthsTy;
|
||||
}
|
||||
|
||||
// insertFastDiv - Substitutes the div/rem instruction with code that checks the
|
||||
// value of the operands and uses a shorter-faster div/rem instruction when
|
||||
// possible and the longer-slower div/rem instruction otherwise.
|
||||
static bool insertFastDiv(Instruction *I, IntegerType *BypassType,
|
||||
bool UseDivOp, bool UseSignedOp,
|
||||
DivCacheTy &PerBBDivCache) {
|
||||
Function *F = I->getParent()->getParent();
|
||||
// Get instruction operands
|
||||
Value *Dividend = I->getOperand(0);
|
||||
Value *Divisor = I->getOperand(1);
|
||||
namespace {
|
||||
class FastDivInsertionTask {
|
||||
bool IsValidTask = false;
|
||||
Instruction *SlowDivOrRem = nullptr;
|
||||
IntegerType *BypassType = nullptr;
|
||||
BasicBlock *MainBB = nullptr;
|
||||
|
||||
if (isa<ConstantInt>(Divisor)) {
|
||||
// Division by a constant should have been been solved and replaced earlier
|
||||
// in the pipeline.
|
||||
return false;
|
||||
QuotRemWithBB createSlowBB(BasicBlock *Successor);
|
||||
QuotRemWithBB createFastBB(BasicBlock *Successor);
|
||||
QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS,
|
||||
BasicBlock *PhiBB);
|
||||
Value *insertOperandRuntimeCheck();
|
||||
Optional<QuotRemPair> insertFastDivAndRem();
|
||||
|
||||
bool isSignedOp() {
|
||||
return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
|
||||
SlowDivOrRem->getOpcode() == Instruction::SRem;
|
||||
}
|
||||
bool isDivisionOp() {
|
||||
return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
|
||||
SlowDivOrRem->getOpcode() == Instruction::UDiv;
|
||||
}
|
||||
Type *getSlowType() { return SlowDivOrRem->getType(); }
|
||||
|
||||
public:
|
||||
FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths);
|
||||
Value *getReplacement(DivCacheTy &Cache);
|
||||
};
|
||||
} // anonymous namespace
|
||||
|
||||
FastDivInsertionTask::FastDivInsertionTask(Instruction *I,
|
||||
const BypassWidthsTy &BypassWidths) {
|
||||
switch (I->getOpcode()) {
|
||||
case Instruction::UDiv:
|
||||
case Instruction::SDiv:
|
||||
case Instruction::URem:
|
||||
case Instruction::SRem:
|
||||
SlowDivOrRem = I;
|
||||
break;
|
||||
default:
|
||||
// I is not a div/rem operation.
|
||||
return;
|
||||
}
|
||||
|
||||
// If the numerator is a constant, bail if it doesn't fit into BypassType.
|
||||
if (ConstantInt *ConstDividend = dyn_cast<ConstantInt>(Dividend))
|
||||
if (ConstDividend->getValue().getActiveBits() > BypassType->getBitWidth())
|
||||
return false;
|
||||
// Skip division on vector types. Only optimize integer instructions.
|
||||
IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType());
|
||||
if (!SlowType)
|
||||
return;
|
||||
|
||||
// Basic Block is split before divide
|
||||
BasicBlock *MainBB = &*I->getParent();
|
||||
BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I);
|
||||
// Skip if this bitwidth is not bypassed.
|
||||
auto BI = BypassWidths.find(SlowType->getBitWidth());
|
||||
if (BI == BypassWidths.end())
|
||||
return;
|
||||
|
||||
// Add new basic block for slow divide operation
|
||||
BasicBlock *SlowBB =
|
||||
BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
|
||||
SlowBB->moveBefore(SuccessorBB);
|
||||
IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
|
||||
Value *SlowQuotientV;
|
||||
Value *SlowRemainderV;
|
||||
if (UseSignedOp) {
|
||||
SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
|
||||
SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
|
||||
// Get type for div/rem instruction with bypass bitwidth.
|
||||
IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
|
||||
BypassType = BT;
|
||||
|
||||
// The original basic block.
|
||||
MainBB = I->getParent();
|
||||
|
||||
// The instruction is indeed a slow div or rem operation.
|
||||
IsValidTask = true;
|
||||
}
|
||||
|
||||
/// Reuses previously-computed dividend or remainder from the current BB if
|
||||
/// operands and operation are identical. Otherwise calls insertFastDivAndRem to
|
||||
/// perform the optimization and caches the resulting dividend and remainder.
|
||||
/// If no replacement can be generated, nullptr is returned.
|
||||
Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
|
||||
// First, make sure that the task is valid.
|
||||
if (!IsValidTask)
|
||||
return nullptr;
|
||||
|
||||
// Then, look for a value in Cache.
|
||||
Value *Dividend = SlowDivOrRem->getOperand(0);
|
||||
Value *Divisor = SlowDivOrRem->getOperand(1);
|
||||
DivOpInfo Key(isSignedOp(), Dividend, Divisor);
|
||||
auto CacheI = Cache.find(Key);
|
||||
|
||||
if (CacheI == Cache.end()) {
|
||||
// If previous instance does not exist, try to insert fast div.
|
||||
Optional<QuotRemPair> OptResult = insertFastDivAndRem();
|
||||
// Bail out if insertFastDivAndRem has failed.
|
||||
if (!OptResult)
|
||||
return nullptr;
|
||||
CacheI = Cache.insert({Key, *OptResult}).first;
|
||||
}
|
||||
|
||||
QuotRemPair &Value = CacheI->second;
|
||||
return isDivisionOp() ? Value.Quotient : Value.Remainder;
|
||||
}
|
||||
|
||||
/// Add new basic block for slow div and rem operations and put it before
|
||||
/// SuccessorBB.
|
||||
QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) {
|
||||
QuotRemWithBB DivRemPair;
|
||||
DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
|
||||
MainBB->getParent(), SuccessorBB);
|
||||
IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
|
||||
|
||||
Value *Dividend = SlowDivOrRem->getOperand(0);
|
||||
Value *Divisor = SlowDivOrRem->getOperand(1);
|
||||
|
||||
if (isSignedOp()) {
|
||||
DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor);
|
||||
DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor);
|
||||
} else {
|
||||
SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
|
||||
SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
|
||||
DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor);
|
||||
DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor);
|
||||
}
|
||||
SlowBuilder.CreateBr(SuccessorBB);
|
||||
|
||||
// Add new basic block for fast divide operation
|
||||
BasicBlock *FastBB =
|
||||
BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
|
||||
FastBB->moveBefore(SlowBB);
|
||||
IRBuilder<> FastBuilder(FastBB, FastBB->begin());
|
||||
Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
|
||||
BypassType);
|
||||
Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
|
||||
BypassType);
|
||||
Builder.CreateBr(SuccessorBB);
|
||||
return DivRemPair;
|
||||
}
|
||||
|
||||
// udiv/urem because optimization only handles positive numbers
|
||||
Value *ShortQuotientV = FastBuilder.CreateUDiv(ShortDividendV, ShortDivisorV);
|
||||
Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
|
||||
ShortDivisorV);
|
||||
Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
|
||||
ShortQuotientV,
|
||||
Dividend->getType());
|
||||
Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
|
||||
ShortRemainderV,
|
||||
Dividend->getType());
|
||||
FastBuilder.CreateBr(SuccessorBB);
|
||||
/// Add new basic block for fast div and rem operations and put it before
|
||||
/// SuccessorBB.
|
||||
QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) {
|
||||
QuotRemWithBB DivRemPair;
|
||||
DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
|
||||
MainBB->getParent(), SuccessorBB);
|
||||
IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
|
||||
|
||||
// Phi nodes for result of div and rem
|
||||
IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
|
||||
PHINode *QuoPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
|
||||
QuoPhi->addIncoming(SlowQuotientV, SlowBB);
|
||||
QuoPhi->addIncoming(FastQuotientV, FastBB);
|
||||
PHINode *RemPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
|
||||
RemPhi->addIncoming(SlowRemainderV, SlowBB);
|
||||
RemPhi->addIncoming(FastRemainderV, FastBB);
|
||||
Value *Dividend = SlowDivOrRem->getOperand(0);
|
||||
Value *Divisor = SlowDivOrRem->getOperand(1);
|
||||
Value *ShortDivisorV =
|
||||
Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);
|
||||
Value *ShortDividendV =
|
||||
Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);
|
||||
|
||||
// Replace I with appropriate phi node
|
||||
if (UseDivOp)
|
||||
I->replaceAllUsesWith(QuoPhi);
|
||||
else
|
||||
I->replaceAllUsesWith(RemPhi);
|
||||
I->eraseFromParent();
|
||||
// udiv/urem because this optimization only handles positive numbers.
|
||||
Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV);
|
||||
Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV);
|
||||
DivRemPair.Quotient =
|
||||
Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());
|
||||
DivRemPair.Remainder =
|
||||
Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());
|
||||
Builder.CreateBr(SuccessorBB);
|
||||
|
||||
// Combine operands into a single value with OR for value testing below
|
||||
MainBB->getInstList().back().eraseFromParent();
|
||||
IRBuilder<> MainBuilder(MainBB, MainBB->end());
|
||||
return DivRemPair;
|
||||
}
|
||||
|
||||
/// Creates Phi nodes for result of Div and Rem.
|
||||
QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,
|
||||
QuotRemWithBB &RHS,
|
||||
BasicBlock *PhiBB) {
|
||||
IRBuilder<> Builder(PhiBB, PhiBB->begin());
|
||||
PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);
|
||||
QuoPhi->addIncoming(LHS.Quotient, LHS.BB);
|
||||
QuoPhi->addIncoming(RHS.Quotient, RHS.BB);
|
||||
PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2);
|
||||
RemPhi->addIncoming(LHS.Remainder, LHS.BB);
|
||||
RemPhi->addIncoming(RHS.Remainder, RHS.BB);
|
||||
return QuotRemPair(QuoPhi, RemPhi);
|
||||
}
|
||||
|
||||
/// Creates a runtime check to test whether both the divisor and dividend fit
|
||||
/// into BypassType. The check is inserted at the end of MainBB. True return
|
||||
/// value means that the operands fit.
|
||||
Value *FastDivInsertionTask::insertOperandRuntimeCheck() {
|
||||
IRBuilder<> Builder(MainBB, MainBB->end());
|
||||
Value *Dividend = SlowDivOrRem->getOperand(0);
|
||||
Value *Divisor = SlowDivOrRem->getOperand(1);
|
||||
|
||||
// We should have bailed out above if the divisor is a constant, but the
|
||||
// dividend may still be a constant. Set OrV to our non-constant operands
|
||||
@ -163,65 +254,54 @@ static bool insertFastDiv(Instruction *I, IntegerType *BypassType,
|
||||
|
||||
Value *OrV;
|
||||
if (!isa<ConstantInt>(Dividend))
|
||||
OrV = MainBuilder.CreateOr(Dividend, Divisor);
|
||||
OrV = Builder.CreateOr(Dividend, Divisor);
|
||||
else
|
||||
OrV = Divisor;
|
||||
|
||||
// BitMask is inverted to check if the operands are
|
||||
// larger than the bypass type
|
||||
uint64_t BitMask = ~BypassType->getBitMask();
|
||||
Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
|
||||
Value *AndV = Builder.CreateAnd(OrV, BitMask);
|
||||
|
||||
// Compare operand values and branch
|
||||
Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
|
||||
Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
|
||||
MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
|
||||
|
||||
// Cache phi nodes to be used later in place of other instances
|
||||
// of div or rem with the same sign, dividend, and divisor
|
||||
DivOpInfo Key(UseSignedOp, Dividend, Divisor);
|
||||
DivPhiNodes Value(QuoPhi, RemPhi);
|
||||
PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
|
||||
return true;
|
||||
// Compare operand values
|
||||
Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0);
|
||||
return Builder.CreateICmpEQ(AndV, ZeroV);
|
||||
}
|
||||
|
||||
// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder from
|
||||
// the current BB if operands and operation are identical. Otherwise calls
|
||||
// insertFastDiv to perform the optimization and caches the resulting dividend
|
||||
// and remainder.
|
||||
static bool reuseOrInsertFastDiv(Instruction *I, IntegerType *BypassType,
|
||||
bool UseDivOp, bool UseSignedOp,
|
||||
DivCacheTy &PerBBDivCache) {
|
||||
// Get instruction operands
|
||||
DivOpInfo Key(UseSignedOp, I->getOperand(0), I->getOperand(1));
|
||||
DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
|
||||
/// Substitutes the div/rem instruction with code that checks the value of the
|
||||
/// operands and uses a shorter-faster div/rem instruction when possible.
|
||||
Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
|
||||
Value *Dividend = SlowDivOrRem->getOperand(0);
|
||||
Value *Divisor = SlowDivOrRem->getOperand(1);
|
||||
|
||||
if (CacheI == PerBBDivCache.end()) {
|
||||
// If previous instance does not exist, insert fast div
|
||||
return insertFastDiv(I, BypassType, UseDivOp, UseSignedOp, PerBBDivCache);
|
||||
if (isa<ConstantInt>(Divisor)) {
|
||||
// Keep division by a constant for DAGCombiner.
|
||||
return None;
|
||||
}
|
||||
|
||||
// Replace operation value with previously generated phi node
|
||||
DivPhiNodes &Value = CacheI->second;
|
||||
if (UseDivOp) {
|
||||
// Replace all uses of div instruction with quotient phi node
|
||||
I->replaceAllUsesWith(Value.Quotient);
|
||||
} else {
|
||||
// Replace all uses of rem instruction with remainder phi node
|
||||
I->replaceAllUsesWith(Value.Remainder);
|
||||
}
|
||||
// If the numerator is a constant, bail if it doesn't fit into BypassType.
|
||||
if (ConstantInt *ConstDividend = dyn_cast<ConstantInt>(Dividend))
|
||||
if (ConstDividend->getValue().getActiveBits() > BypassType->getBitWidth())
|
||||
return None;
|
||||
|
||||
// Remove redundant operation
|
||||
I->eraseFromParent();
|
||||
return true;
|
||||
// Split the basic block before the div/rem.
|
||||
BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
|
||||
// Remove the unconditional branch from MainBB to SuccessorBB.
|
||||
MainBB->getInstList().back().eraseFromParent();
|
||||
QuotRemWithBB Fast = createFastBB(SuccessorBB);
|
||||
QuotRemWithBB Slow = createSlowBB(SuccessorBB);
|
||||
QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB);
|
||||
Value *CmpV = insertOperandRuntimeCheck();
|
||||
IRBuilder<> Builder(MainBB, MainBB->end());
|
||||
Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB);
|
||||
return Result;
|
||||
}
|
||||
|
||||
// bypassSlowDivision - This optimization identifies DIV instructions in a BB
|
||||
// that can be profitably bypassed and carried out with a shorter, faster
|
||||
// divide.
|
||||
bool llvm::bypassSlowDivision(
|
||||
BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidths) {
|
||||
DivCacheTy DivCache;
|
||||
/// This optimization identifies DIV/REM instructions in a BB that can be
|
||||
/// profitably bypassed and carried out with a shorter, faster divide.
|
||||
bool llvm::bypassSlowDivision(BasicBlock *BB,
|
||||
const BypassWidthsTy &BypassWidths) {
|
||||
DivCacheTy PerBBDivCache;
|
||||
|
||||
bool MadeChange = false;
|
||||
Instruction* Next = &*BB->begin();
|
||||
@ -231,42 +311,20 @@ bool llvm::bypassSlowDivision(
|
||||
Instruction* I = Next;
|
||||
Next = Next->getNextNode();
|
||||
|
||||
// Get instruction details
|
||||
unsigned Opcode = I->getOpcode();
|
||||
bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
|
||||
bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
|
||||
bool UseSignedOp = Opcode == Instruction::SDiv ||
|
||||
Opcode == Instruction::SRem;
|
||||
|
||||
// Only optimize div or rem ops
|
||||
if (!UseDivOp && !UseRemOp)
|
||||
continue;
|
||||
|
||||
// Skip division on vector types, only optimize integer instructions
|
||||
if (!I->getType()->isIntegerTy())
|
||||
continue;
|
||||
|
||||
// Get bitwidth of div/rem instruction
|
||||
IntegerType *T = cast<IntegerType>(I->getType());
|
||||
unsigned int bitwidth = T->getBitWidth();
|
||||
|
||||
// Continue if bitwidth is not bypassed
|
||||
DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
|
||||
if (BI == BypassWidths.end())
|
||||
continue;
|
||||
|
||||
// Get type for div/rem instruction with bypass bitwidth
|
||||
IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
|
||||
|
||||
MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache);
|
||||
FastDivInsertionTask Task(I, BypassWidths);
|
||||
if (Value *Replacement = Task.getReplacement(PerBBDivCache)) {
|
||||
I->replaceAllUsesWith(Replacement);
|
||||
I->eraseFromParent();
|
||||
MadeChange = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Above we eagerly create divs and rems, as pairs, so that we can efficiently
|
||||
// create divrem machine instructions. Now erase any unused divs / rems so we
|
||||
// don't leave extra instructions sitting around.
|
||||
for (auto &KV : DivCache)
|
||||
for (Instruction *Phi : {KV.second.Quotient, KV.second.Remainder})
|
||||
RecursivelyDeleteTriviallyDeadInstructions(Phi);
|
||||
for (auto &KV : PerBBDivCache)
|
||||
for (Value *V : {KV.second.Quotient, KV.second.Remainder})
|
||||
RecursivelyDeleteTriviallyDeadInstructions(V);
|
||||
|
||||
return MadeChange;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user