llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Amara Emerson 1e1e4fd87f [AArch64][GlobalISel] Optimize conditional branches followed by unconditional branches
If we have an icmp->brcond->br sequence where the brcond just branches to the
next block jumping over the br, while the br takes the false edge, then we can
modify the conditional branch to jump to the br's target while inverting the
condition of the incoming icmp. This means we can eliminate the br as an
unconditional branch to the fallthrough block.

Differential Revision: https://reviews.llvm.org/D64354

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@365510 91177308-0d34-0410-b5e6-96231b3b80d8
2019-07-09 16:05:59 +00:00

412 lines
16 KiB
C++

//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#define DEBUG_TYPE "gi-combiner"
using namespace llvm;
CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
MachineIRBuilder &B)
: Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer) {}
void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
Register ToReg) const {
Observer.changingAllUsesOfReg(MRI, FromReg);
if (MRI.constrainRegAttrs(ToReg, FromReg))
MRI.replaceRegWith(FromReg, ToReg);
else
Builder.buildCopy(ToReg, FromReg);
Observer.finishedChangingAllUsesOfReg();
}
void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
MachineOperand &FromRegOp,
Register ToReg) const {
assert(FromRegOp.getParent() && "Expected an operand in an MI");
Observer.changingInstr(*FromRegOp.getParent());
FromRegOp.setReg(ToReg);
Observer.changedInstr(*FromRegOp.getParent());
}
bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
if (matchCombineCopy(MI)) {
applyCombineCopy(MI);
return true;
}
return false;
}
bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
if (MI.getOpcode() != TargetOpcode::COPY)
return false;
unsigned DstReg = MI.getOperand(0).getReg();
unsigned SrcReg = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(DstReg);
LLT SrcTy = MRI.getType(SrcReg);
// Simple Copy Propagation.
// a(sx) = COPY b(sx) -> Replace all uses of a with b.
if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy)
return true;
return false;
}
void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
unsigned DstReg = MI.getOperand(0).getReg();
unsigned SrcReg = MI.getOperand(1).getReg();
MI.eraseFromParent();
replaceRegWith(MRI, DstReg, SrcReg);
}
namespace {
/// Select a preference between two uses. CurrentUse is the current preference
/// while *ForCandidate is attributes of the candidate under consideration.
PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
const LLT &TyForCandidate,
unsigned OpcodeForCandidate,
MachineInstr *MIForCandidate) {
if (!CurrentUse.Ty.isValid()) {
if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
return CurrentUse;
}
// We permit the extend to hoist through basic blocks but this is only
// sensible if the target has extending loads. If you end up lowering back
// into a load and extend during the legalizer then the end result is
// hoisting the extend up to the load.
// Prefer defined extensions to undefined extensions as these are more
// likely to reduce the number of instructions.
if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
return CurrentUse;
else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
OpcodeForCandidate != TargetOpcode::G_ANYEXT)
return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
// Prefer sign extensions to zero extensions as sign-extensions tend to be
// more expensive.
if (CurrentUse.Ty == TyForCandidate) {
if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
OpcodeForCandidate == TargetOpcode::G_ZEXT)
return CurrentUse;
else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
OpcodeForCandidate == TargetOpcode::G_SEXT)
return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
}
// This is potentially target specific. We've chosen the largest type
// because G_TRUNC is usually free. One potential catch with this is that
// some targets have a reduced number of larger registers than smaller
// registers and this choice potentially increases the live-range for the
// larger value.
if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
}
return CurrentUse;
}
/// Find a suitable place to insert some instructions and insert them. This
/// function accounts for special cases like inserting before a PHI node.
/// The current strategy for inserting before PHI's is to duplicate the
/// instructions for each predecessor. However, while that's ok for G_TRUNC
/// on most targets since it generally requires no code, other targets/cases may
/// want to try harder to find a dominating block.
static void InsertInsnsWithoutSideEffectsBeforeUse(
MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator,
MachineOperand &UseMO)>
Inserter) {
MachineInstr &UseMI = *UseMO.getParent();
MachineBasicBlock *InsertBB = UseMI.getParent();
// If the use is a PHI then we want the predecessor block instead.
if (UseMI.isPHI()) {
MachineOperand *PredBB = std::next(&UseMO);
InsertBB = PredBB->getMBB();
}
// If the block is the same block as the def then we want to insert just after
// the def instead of at the start of the block.
if (InsertBB == DefMI.getParent()) {
MachineBasicBlock::iterator InsertPt = &DefMI;
Inserter(InsertBB, std::next(InsertPt), UseMO);
return;
}
// Otherwise we want the start of the BB
Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
}
} // end anonymous namespace
bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
PreferredTuple Preferred;
if (matchCombineExtendingLoads(MI, Preferred)) {
applyCombineExtendingLoads(MI, Preferred);
return true;
}
return false;
}
bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
PreferredTuple &Preferred) {
// We match the loads and follow the uses to the extend instead of matching
// the extends and following the def to the load. This is because the load
// must remain in the same position for correctness (unless we also add code
// to find a safe place to sink it) whereas the extend is freely movable.
// It also prevents us from duplicating the load for the volatile case or just
// for performance.
if (MI.getOpcode() != TargetOpcode::G_LOAD &&
MI.getOpcode() != TargetOpcode::G_SEXTLOAD &&
MI.getOpcode() != TargetOpcode::G_ZEXTLOAD)
return false;
auto &LoadValue = MI.getOperand(0);
assert(LoadValue.isReg() && "Result wasn't a register?");
LLT LoadValueTy = MRI.getType(LoadValue.getReg());
if (!LoadValueTy.isScalar())
return false;
// Most architectures are going to legalize <s8 loads into at least a 1 byte
// load, and the MMOs can only describe memory accesses in multiples of bytes.
// If we try to perform extload combining on those, we can end up with
// %a(s8) = extload %ptr (load 1 byte from %ptr)
// ... which is an illegal extload instruction.
if (LoadValueTy.getSizeInBits() < 8)
return false;
// For non power-of-2 types, they will very likely be legalized into multiple
// loads. Don't bother trying to match them into extending loads.
if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
return false;
// Find the preferred type aside from the any-extends (unless it's the only
// one) and non-extending ops. We'll emit an extending load to that type and
// and emit a variant of (extend (trunc X)) for the others according to the
// relative type sizes. At the same time, pick an extend to use based on the
// extend involved in the chosen type.
unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD
? TargetOpcode::G_ANYEXT
: MI.getOpcode() == TargetOpcode::G_SEXTLOAD
? TargetOpcode::G_SEXT
: TargetOpcode::G_ZEXT;
Preferred = {LLT(), PreferredOpcode, nullptr};
for (auto &UseMI : MRI.use_instructions(LoadValue.getReg())) {
if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
UseMI.getOpcode() == TargetOpcode::G_ANYEXT) {
Preferred = ChoosePreferredUse(Preferred,
MRI.getType(UseMI.getOperand(0).getReg()),
UseMI.getOpcode(), &UseMI);
}
}
// There were no extends
if (!Preferred.MI)
return false;
// It should be impossible to chose an extend without selecting a different
// type since by definition the result of an extend is larger.
assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
return true;
}
void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
PreferredTuple &Preferred) {
// Rewrite the load to the chosen extending load.
Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
// Inserter to insert a truncate back to the original type at a given point
// with some basic CSE to limit truncate duplication to one per BB.
DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
MachineBasicBlock::iterator InsertBefore,
MachineOperand &UseMO) {
MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
if (PreviouslyEmitted) {
Observer.changingInstr(*UseMO.getParent());
UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
Observer.changedInstr(*UseMO.getParent());
return;
}
Builder.setInsertPt(*InsertIntoBB, InsertBefore);
Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
EmittedInsns[InsertIntoBB] = NewMI;
replaceRegOpWith(MRI, UseMO, NewDstReg);
};
Observer.changingInstr(MI);
MI.setDesc(
Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
? TargetOpcode::G_SEXTLOAD
: Preferred.ExtendOpcode == TargetOpcode::G_ZEXT
? TargetOpcode::G_ZEXTLOAD
: TargetOpcode::G_LOAD));
// Rewrite all the uses to fix up the types.
auto &LoadValue = MI.getOperand(0);
SmallVector<MachineOperand *, 4> Uses;
for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
Uses.push_back(&UseMO);
for (auto *UseMO : Uses) {
MachineInstr *UseMI = UseMO->getParent();
// If the extend is compatible with the preferred extend then we should fix
// up the type and extend so that it uses the preferred use.
if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
unsigned UseDstReg = UseMI->getOperand(0).getReg();
MachineOperand &UseSrcMO = UseMI->getOperand(1);
const LLT &UseDstTy = MRI.getType(UseDstReg);
if (UseDstReg != ChosenDstReg) {
if (Preferred.Ty == UseDstTy) {
// If the use has the same type as the preferred use, then merge
// the vregs and erase the extend. For example:
// %1:_(s8) = G_LOAD ...
// %2:_(s32) = G_SEXT %1(s8)
// %3:_(s32) = G_ANYEXT %1(s8)
// ... = ... %3(s32)
// rewrites to:
// %2:_(s32) = G_SEXTLOAD ...
// ... = ... %2(s32)
replaceRegWith(MRI, UseDstReg, ChosenDstReg);
Observer.erasingInstr(*UseMO->getParent());
UseMO->getParent()->eraseFromParent();
} else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
// If the preferred size is smaller, then keep the extend but extend
// from the result of the extending load. For example:
// %1:_(s8) = G_LOAD ...
// %2:_(s32) = G_SEXT %1(s8)
// %3:_(s64) = G_ANYEXT %1(s8)
// ... = ... %3(s64)
/// rewrites to:
// %2:_(s32) = G_SEXTLOAD ...
// %3:_(s64) = G_ANYEXT %2:_(s32)
// ... = ... %3(s64)
replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
} else {
// If the preferred size is large, then insert a truncate. For
// example:
// %1:_(s8) = G_LOAD ...
// %2:_(s64) = G_SEXT %1(s8)
// %3:_(s32) = G_ZEXT %1(s8)
// ... = ... %3(s32)
/// rewrites to:
// %2:_(s64) = G_SEXTLOAD ...
// %4:_(s8) = G_TRUNC %2:_(s32)
// %3:_(s64) = G_ZEXT %2:_(s8)
// ... = ... %3(s64)
InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
InsertTruncAt);
}
continue;
}
// The use is (one of) the uses of the preferred use we chose earlier.
// We're going to update the load to def this value later so just erase
// the old extend.
Observer.erasingInstr(*UseMO->getParent());
UseMO->getParent()->eraseFromParent();
continue;
}
// The use isn't an extend. Truncate back to the type we originally loaded.
// This is free on many targets.
InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
}
MI.getOperand(0).setReg(ChosenDstReg);
Observer.changedInstr(MI);
}
bool CombinerHelper::matchCombineBr(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_BR && "Expected a G_BR");
// Try to match the following:
// bb1:
// %c(s32) = G_ICMP pred, %a, %b
// %c1(s1) = G_TRUNC %c(s32)
// G_BRCOND %c1, %bb2
// G_BR %bb3
// bb2:
// ...
// bb3:
// The above pattern does not have a fall through to the successor bb2, always
// resulting in a branch no matter which path is taken. Here we try to find
// and replace that pattern with conditional branch to bb3 and otherwise
// fallthrough to bb2.
MachineBasicBlock *MBB = MI.getParent();
MachineBasicBlock::iterator BrIt(MI);
if (BrIt == MBB->begin())
return false;
assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
MachineInstr *BrCond = &*std::prev(BrIt);
if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
return false;
// Check that the next block is the conditional branch target.
if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB()))
return false;
MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP ||
!MRI.hasOneUse(CmpMI->getOperand(0).getReg()))
return false;
return true;
}
bool CombinerHelper::tryCombineBr(MachineInstr &MI) {
if (!matchCombineBr(MI))
return false;
MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
MachineBasicBlock::iterator BrIt(MI);
MachineInstr *BrCond = &*std::prev(BrIt);
MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
CmpInst::Predicate InversePred = CmpInst::getInversePredicate(
(CmpInst::Predicate)CmpMI->getOperand(1).getPredicate());
// Invert the G_ICMP condition.
Observer.changingInstr(*CmpMI);
CmpMI->getOperand(1).setPredicate(InversePred);
Observer.changedInstr(*CmpMI);
// Change the conditional branch target.
Observer.changingInstr(*BrCond);
BrCond->getOperand(1).setMBB(BrTarget);
Observer.changedInstr(*BrCond);
MI.eraseFromParent();
return true;
}
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
return tryCombineExtendingLoads(MI);
}