Re-commit: [globalisel] Add a combiner helpers for extending loads and use them in a pre-legalize combiner for AArch64

Summary: Depends on D45541

Reviewers: ab, aditya_nandakumar, bogner, rtereshin, volkan, rovka, javed.absar, aemerson

Subscribers: aemerson, rengolin, mgorny, javed.absar, kristof.beyls, llvm-commits

Differential Revision: https://reviews.llvm.org/D45543

The previous commit failed portions of the test-suite on GreenDragon due to
duplicate COPY instructions and iterator invalidation. Both issues have now
been fixed. To assist with this, a helper (cloneVirtualRegister) has been added
to MachineRegisterInfo that can be used to get another register that has the same
type and class/bank as an existing one.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@343654 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Daniel Sanders 2018-10-03 02:12:17 +00:00
parent db3d49018b
commit 5a81c73c73
14 changed files with 307 additions and 13 deletions

View File

@ -24,6 +24,16 @@ class CombinerInfo;
class TargetPassConfig;
class MachineFunction;
class CombinerChangeObserver {
public:
virtual ~CombinerChangeObserver() {}
/// An instruction was erased.
virtual void erasedInstr(MachineInstr &MI) = 0;
/// An instruction was created and inseerted into the function.
virtual void createdInstr(MachineInstr &MI) = 0;
};
class Combiner {
public:
Combiner(CombinerInfo &CombinerInfo, const TargetPassConfig *TPC);

View File

@ -20,6 +20,7 @@
namespace llvm {
class CombinerChangeObserver;
class MachineIRBuilder;
class MachineRegisterInfo;
class MachineInstr;
@ -27,14 +28,22 @@ class MachineInstr;
class CombinerHelper {
MachineIRBuilder &Builder;
MachineRegisterInfo &MRI;
CombinerChangeObserver &Observer;
void eraseInstr(MachineInstr &MI);
void scheduleForVisit(MachineInstr &MI);
public:
CombinerHelper(MachineIRBuilder &B);
CombinerHelper(CombinerChangeObserver &Observer, MachineIRBuilder &B);
/// If \p MI is COPY, try to combine it.
/// Returns true if MI changed.
bool tryCombineCopy(MachineInstr &MI);
/// If \p MI is extend that consumes the result of a load, try to combine it.
/// Returns true if MI changed.
bool tryCombineExtendingLoads(MachineInstr &MI);
/// Try to transform \p MI by using all of the above
/// combine functions. Returns true if changed.
bool tryCombine(MachineInstr &MI);

View File

@ -17,10 +17,12 @@
#include <cassert>
namespace llvm {
class CombinerChangeObserver;
class LegalizerInfo;
class MachineInstr;
class MachineIRBuilder;
class MachineRegisterInfo;
// Contains information relevant to enabling/disabling various combines for a
// pass.
class CombinerInfo {
@ -41,7 +43,8 @@ public:
/// illegal ops that are created.
bool LegalizeIllegalOps; // TODO: Make use of this.
const LegalizerInfo *LInfo;
virtual bool combine(MachineInstr &MI, MachineIRBuilder &B) const = 0;
virtual bool combine(CombinerChangeObserver &Observer, MachineInstr &MI,
MachineIRBuilder &B) const = 0;
};
} // namespace llvm

View File

@ -60,11 +60,6 @@ struct MachineIRBuilderState {
class MachineIRBuilderBase {
MachineIRBuilderState State;
const TargetInstrInfo &getTII() {
assert(State.TII && "TargetInstrInfo is not set");
return *State.TII;
}
void validateTruncExt(unsigned Dst, unsigned Src, bool IsExtend);
protected:
@ -107,6 +102,11 @@ public:
MachineIRBuilderBase(const MachineIRBuilderState &BState) : State(BState) {}
const TargetInstrInfo &getTII() {
assert(State.TII && "TargetInstrInfo is not set");
return *State.TII;
}
/// Getter for the function we currently build.
MachineFunction &getMF() {
assert(State.MF && "MachineFunction is not set");

View File

@ -717,6 +717,10 @@ public:
unsigned createVirtualRegister(const TargetRegisterClass *RegClass,
StringRef Name = "");
/// Create and return a new virtual register in the function with the same
/// attributes as the given register.
unsigned cloneVirtualRegister(unsigned VReg, StringRef Name = "");
/// Get the low-level type of \p Reg or LLT{} if Reg is not a generic
/// (target independent) virtual register.
LLT getType(unsigned Reg) const {

View File

@ -25,6 +25,34 @@
using namespace llvm;
namespace {
/// This class acts as the glue the joins the CombinerHelper to the overall
/// Combine algorithm. The CombinerHelper is intended to report the
/// modifications it makes to the MIR to the CombinerChangeObserver and the
/// observer subclass will act on these events. In this case, instruction
/// erasure will cancel any future visits to the erased instruction and
/// instruction creation will schedule that instruction for a future visit.
/// Other Combiner implementations may require more complex behaviour from
/// their CombinerChangeObserver subclass.
class WorkListMaintainer : public CombinerChangeObserver {
using WorkListTy = GISelWorkList<512>;
WorkListTy &WorkList;
public:
WorkListMaintainer(WorkListTy &WorkList) : WorkList(WorkList) {}
virtual ~WorkListMaintainer() {}
void erasedInstr(MachineInstr &MI) override {
LLVM_DEBUG(dbgs() << "Erased: "; MI.print(dbgs()); dbgs() << "\n");
WorkList.remove(&MI);
}
void createdInstr(MachineInstr &MI) override {
LLVM_DEBUG(dbgs() << "Created: "; MI.print(dbgs()); dbgs() << "\n");
WorkList.insert(&MI);
}
};
}
Combiner::Combiner(CombinerInfo &Info, const TargetPassConfig *TPC)
: CInfo(Info), TPC(TPC) {
(void)this->TPC; // FIXME: Remove when used.
@ -53,6 +81,7 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF) {
// down RPOT.
Changed = false;
GISelWorkList<512> WorkList;
WorkListMaintainer Observer(WorkList);
for (MachineBasicBlock *MBB : post_order(&MF)) {
if (MBB->empty())
continue;
@ -72,7 +101,7 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF) {
while (!WorkList.empty()) {
MachineInstr *CurrInst = WorkList.pop_back_val();
LLVM_DEBUG(dbgs() << "Try combining " << *CurrInst << "\n";);
Changed |= CInfo.combine(*CurrInst, Builder);
Changed |= CInfo.combine(Observer, *CurrInst, Builder);
}
MFChanged |= Changed;
} while (Changed);

View File

@ -6,18 +6,28 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#define DEBUG_TYPE "gi-combine"
using namespace llvm;
CombinerHelper::CombinerHelper(MachineIRBuilder &B) :
Builder(B), MRI(Builder.getMF().getRegInfo()) {}
CombinerHelper::CombinerHelper(CombinerChangeObserver &Observer,
MachineIRBuilder &B)
: Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer) {}
void CombinerHelper::eraseInstr(MachineInstr &MI) {
Observer.erasedInstr(MI);
}
void CombinerHelper::scheduleForVisit(MachineInstr &MI) {
Observer.createdInstr(MI);
}
bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
if (MI.getOpcode() != TargetOpcode::COPY)
@ -36,6 +46,214 @@ bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
return false;
}
bool CombinerHelper::tryCombine(MachineInstr &MI) {
return tryCombineCopy(MI);
namespace {
struct PreferredTuple {
LLT Ty; // The result type of the extend.
unsigned ExtendOpcode; // G_ANYEXT/G_SEXT/G_ZEXT
MachineInstr *MI;
};
/// Select a preference between two uses. CurrentUse is the current preference
/// while *ForCandidate is attributes of the candidate under consideration.
PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
const LLT &TyForCandidate,
unsigned OpcodeForCandidate,
MachineInstr *MIForCandidate) {
if (!CurrentUse.Ty.isValid()) {
if (CurrentUse.ExtendOpcode == OpcodeForCandidate)
return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
(OpcodeForCandidate == TargetOpcode::G_SEXT ||
OpcodeForCandidate == TargetOpcode::G_ZEXT ||
OpcodeForCandidate == TargetOpcode::G_ANYEXT))
return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
return CurrentUse;
}
// We permit the extend to hoist through basic blocks but this is only
// sensible if the target has extending loads. If you end up lowering back
// into a load and extend during the legalizer then the end result is
// hoisting the extend up to the load.
// Prefer defined extensions to undefined extensions as these are more
// likely to reduce the number of instructions.
if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
return CurrentUse;
else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
OpcodeForCandidate != TargetOpcode::G_ANYEXT)
return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
// Prefer sign extensions to zero extensions as sign-extensions tend to be
// more expensive.
if (CurrentUse.Ty == TyForCandidate) {
if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
OpcodeForCandidate == TargetOpcode::G_ZEXT)
return CurrentUse;
else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
OpcodeForCandidate == TargetOpcode::G_SEXT)
return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
}
// This is potentially target specific. We've chosen the largest type
// because G_TRUNC is usually free. One potential catch with this is that
// some targets have a reduced number of larger registers than smaller
// registers and this choice potentially increases the live-range for the
// larger value.
if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
}
return CurrentUse;
};
} // end anonymous namespace
bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
// We match the loads and follow the uses to the extend instead of matching
// the extends and following the def to the load. This is because the load
// must remain in the same position for correctness (unless we also add code
// to find a safe place to sink it) whereas the extend is freely movable.
// It also prevents us from duplicating the load for the volatile case or just
// for performance.
if (MI.getOpcode() != TargetOpcode::G_LOAD &&
MI.getOpcode() != TargetOpcode::G_SEXTLOAD &&
MI.getOpcode() != TargetOpcode::G_ZEXTLOAD)
return false;
auto &LoadValue = MI.getOperand(0);
assert(LoadValue.isReg() && "Result wasn't a register?");
LLT LoadValueTy = MRI.getType(LoadValue.getReg());
if (!LoadValueTy.isScalar())
return false;
// Find the preferred type aside from the any-extends (unless it's the only
// one) and non-extending ops. We'll emit an extending load to that type and
// and emit a variant of (extend (trunc X)) for the others according to the
// relative type sizes. At the same time, pick an extend to use based on the
// extend involved in the chosen type.
unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD
? TargetOpcode::G_ANYEXT
: MI.getOpcode() == TargetOpcode::G_SEXTLOAD
? TargetOpcode::G_SEXT
: TargetOpcode::G_ZEXT;
PreferredTuple Preferred = {LLT(), PreferredOpcode, nullptr};
for (auto &UseMI : MRI.use_instructions(LoadValue.getReg())) {
if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
UseMI.getOpcode() == TargetOpcode::G_ZEXT || !Preferred.Ty.isValid())
Preferred = ChoosePreferredUse(Preferred,
MRI.getType(UseMI.getOperand(0).getReg()),
UseMI.getOpcode(), &UseMI);
}
// There were no extends
if (!Preferred.MI)
return false;
// It should be impossible to chose an extend without selecting a different
// type since by definition the result of an extend is larger.
assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
// Rewrite the load and schedule the canonical use for erasure.
const auto TruncateUse = [](MachineIRBuilder &Builder, MachineOperand &UseMO,
unsigned DstReg, unsigned SrcReg) {
MachineInstr &UseMI = *UseMO.getParent();
MachineBasicBlock &UseMBB = *UseMI.getParent();
Builder.setInsertPt(UseMBB, MachineBasicBlock::iterator(UseMI));
Builder.buildTrunc(DstReg, SrcReg);
};
// Rewrite the load to the chosen extending load.
unsigned ChosenDstReg = Preferred.MI->getOperand(0).getReg();
MI.setDesc(
Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
? TargetOpcode::G_SEXTLOAD
: Preferred.ExtendOpcode == TargetOpcode::G_ZEXT
? TargetOpcode::G_ZEXTLOAD
: TargetOpcode::G_LOAD));
// Rewrite all the uses to fix up the types.
SmallVector<MachineInstr *, 1> ScheduleForErase;
SmallVector<std::pair<MachineOperand*, unsigned>, 4> ScheduleForAssignReg;
for (auto &UseMO : MRI.use_operands(LoadValue.getReg())) {
MachineInstr *UseMI = UseMO.getParent();
// If the extend is compatible with the preferred extend then we should fix
// up the type and extend so that it uses the preferred use.
if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
unsigned UseDstReg = UseMI->getOperand(0).getReg();
unsigned UseSrcReg = UseMI->getOperand(1).getReg();
const LLT &UseDstTy = MRI.getType(UseDstReg);
if (UseDstReg != ChosenDstReg) {
if (Preferred.Ty == UseDstTy) {
// If the use has the same type as the preferred use, then merge
// the vregs and erase the extend. For example:
// %1:_(s8) = G_LOAD ...
// %2:_(s32) = G_SEXT %1(s8)
// %3:_(s32) = G_ANYEXT %1(s8)
// ... = ... %3(s32)
// rewrites to:
// %2:_(s32) = G_SEXTLOAD ...
// ... = ... %2(s32)
MRI.replaceRegWith(UseDstReg, ChosenDstReg);
ScheduleForErase.push_back(UseMO.getParent());
Observer.erasedInstr(*UseMO.getParent());
} else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
// If the preferred size is smaller, then keep the extend but extend
// from the result of the extending load. For example:
// %1:_(s8) = G_LOAD ...
// %2:_(s32) = G_SEXT %1(s8)
// %3:_(s64) = G_ANYEXT %1(s8)
// ... = ... %3(s64)
/// rewrites to:
// %2:_(s32) = G_SEXTLOAD ...
// %3:_(s64) = G_ANYEXT %2:_(s32)
// ... = ... %3(s64)
MRI.replaceRegWith(UseSrcReg, ChosenDstReg);
} else {
// If the preferred size is large, then insert a truncate. For
// example:
// %1:_(s8) = G_LOAD ...
// %2:_(s64) = G_SEXT %1(s8)
// %3:_(s32) = G_ZEXT %1(s8)
// ... = ... %3(s32)
/// rewrites to:
// %2:_(s64) = G_SEXTLOAD ...
// %4:_(s8) = G_TRUNC %2:_(s32)
// %3:_(s64) = G_ZEXT %2:_(s8)
// ... = ... %3(s64)
unsigned NewVReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
TruncateUse(Builder, UseMO, NewVReg, ChosenDstReg);
ScheduleForAssignReg.emplace_back(&UseMO, NewVReg);
}
continue;
}
// The use is (one of) the uses of the preferred use we chose earlier.
// We're going to update the load to def this value later so just erase
// the old extend.
ScheduleForErase.push_back(UseMO.getParent());
Observer.erasedInstr(*UseMO.getParent());
continue;
}
// The use isn't an extend. Truncate back to the type we originally loaded.
// This is free on many targets.
unsigned NewVReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
TruncateUse(Builder, UseMO, NewVReg, ChosenDstReg);
ScheduleForAssignReg.emplace_back(&UseMO, NewVReg);
}
for (auto &Assignment : ScheduleForAssignReg)
Assignment.first->setReg(Assignment.second);
for (auto &EraseMI : ScheduleForErase)
EraseMI->eraseFromParent();
MI.getOperand(0).setReg(ChosenDstReg);
return true;
}
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
return tryCombineExtendingLoads(MI);
}

View File

@ -177,6 +177,16 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass,
return Reg;
}
unsigned MachineRegisterInfo::cloneVirtualRegister(unsigned VReg,
StringRef Name) {
unsigned Reg = createIncompleteVirtualRegister(Name);
VRegInfo[Reg].first = VRegInfo[VReg].first;
setType(Reg, getType(VReg));
if (TheDelegate)
TheDelegate->MRI_NoteNewVirtualRegister(Reg);
return Reg;
}
void MachineRegisterInfo::setType(unsigned VReg, LLT Ty) {
// Check that VReg doesn't have a class.
assert((getRegClassOrRegBank(VReg).isNull() ||

View File

@ -53,6 +53,7 @@ FunctionPass *createAArch64CollectLOHPass();
InstructionSelector *
createAArch64InstructionSelector(const AArch64TargetMachine &,
AArch64Subtarget &, AArch64RegisterBankInfo &);
FunctionPass *createAArch64PreLegalizeCombiner();
void initializeAArch64A53Fix835769Pass(PassRegistry&);
void initializeAArch64A57FPLoadBalancingPass(PassRegistry&);
@ -65,6 +66,7 @@ void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&);
void initializeAArch64ExpandPseudoPass(PassRegistry&);
void initializeAArch64LoadStoreOptPass(PassRegistry&);
void initializeAArch64SIMDInstrOptPass(PassRegistry&);
void initializeAArch64PreLegalizerCombinerPass(PassRegistry&);
void initializeAArch64PromoteConstantPass(PassRegistry&);
void initializeAArch64RedundantCopyEliminationPass(PassRegistry&);
void initializeAArch64StorePairSuppressPass(PassRegistry&);

View File

@ -158,6 +158,7 @@ extern "C" void LLVMInitializeAArch64Target() {
initializeAArch64ExpandPseudoPass(*PR);
initializeAArch64LoadStoreOptPass(*PR);
initializeAArch64SIMDInstrOptPass(*PR);
initializeAArch64PreLegalizerCombinerPass(*PR);
initializeAArch64PromoteConstantPass(*PR);
initializeAArch64RedundantCopyEliminationPass(*PR);
initializeAArch64StorePairSuppressPass(*PR);
@ -348,6 +349,7 @@ public:
bool addPreISel() override;
bool addInstSelector() override;
bool addIRTranslator() override;
void addPreLegalizeMachineIR() override;
bool addLegalizeMachineIR() override;
bool addRegBankSelect() override;
void addPreGlobalInstructionSelect() override;
@ -449,6 +451,10 @@ bool AArch64PassConfig::addIRTranslator() {
return false;
}
void AArch64PassConfig::addPreLegalizeMachineIR() {
addPass(createAArch64PreLegalizeCombiner());
}
bool AArch64PassConfig::addLegalizeMachineIR() {
addPass(new Legalizer());
return false;

View File

@ -43,6 +43,7 @@ add_llvm_target(AArch64CodeGen
AArch64LoadStoreOptimizer.cpp
AArch64MacroFusion.cpp
AArch64MCInstLower.cpp
AArch64PreLegalizerCombiner.cpp
AArch64PromoteConstant.cpp
AArch64PBQPRegAlloc.cpp
AArch64RegisterBankInfo.cpp

View File

@ -54,7 +54,7 @@ false:
}
; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %0:_(s24) = G_LOAD %1:_(p0) :: (load 3 from `i24* undef`, align 1) (in function: odd_type_load)
; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %2:_(s32) = G_ZEXTLOAD %1:_(p0) :: (load 3 from `i24* undef`, align 1) (in function: odd_type_load)
; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_type_load
; FALLBACK-WITH-REPORT-OUT-LABEL: odd_type_load
define i32 @odd_type_load() {

View File

@ -42,6 +42,7 @@
; RUN: | FileCheck %s --check-prefix DISABLED
; ENABLED: IRTranslator
; ENABLED-NEXT: PreLegalizerCombiner
; VERIFY-NEXT: Verify generated machine code
; ENABLED-NEXT: Legalizer
; VERIFY-NEXT: Verify generated machine code

View File

@ -33,6 +33,7 @@
; CHECK-NEXT: Insert stack protectors
; CHECK-NEXT: Module Verifier
; CHECK-NEXT: IRTranslator
; CHECK-NEXT: AArch64PreLegalizerCombiner
; CHECK-NEXT: Legalizer
; CHECK-NEXT: RegBankSelect
; CHECK-NEXT: Localizer