mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-06 08:02:18 +00:00
SplitKit: Correctly implement partial subregister copies
- This fixes a bug where subregister incompatible with the vregs register class where used. - Implement the case where multiple copies are necessary to cover a given lanemask. Differential Revision: https://reviews.llvm.org/D30438 llvm-svn: 298025
This commit is contained in:
parent
fa289ec7f0
commit
f0b68d3fbc
@ -23,6 +23,7 @@
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/VirtRegMap.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
@ -487,12 +488,127 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) {
|
||||
VFP = ValueForcePair(nullptr, true);
|
||||
}
|
||||
|
||||
SlotIndex SplitEditor::buildSingleSubRegCopy(unsigned FromReg, unsigned ToReg,
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
|
||||
unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex Def) {
|
||||
const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
|
||||
bool FirstCopy = !Def.isValid();
|
||||
MachineInstr *CopyMI = BuildMI(MBB, InsertBefore, DebugLoc(), Desc)
|
||||
.addReg(ToReg, RegState::Define | getUndefRegState(FirstCopy)
|
||||
| getInternalReadRegState(!FirstCopy), SubIdx)
|
||||
.addReg(FromReg, 0, SubIdx);
|
||||
|
||||
BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();
|
||||
if (FirstCopy) {
|
||||
SlotIndexes &Indexes = *LIS.getSlotIndexes();
|
||||
Def = Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot();
|
||||
DestLI.createDeadDef(Def, Allocator);
|
||||
} else {
|
||||
CopyMI->bundleWithPred();
|
||||
}
|
||||
LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubIdx);
|
||||
DestLI.refineSubRanges(Allocator, LaneMask,
|
||||
[Def, &Allocator](LiveInterval::SubRange& SR) {
|
||||
SR.createDeadDef(Def, Allocator);
|
||||
});
|
||||
return Def;
|
||||
}
|
||||
|
||||
SlotIndex SplitEditor::buildCopy(unsigned FromReg, unsigned ToReg,
|
||||
LaneBitmask LaneMask, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator InsertBefore, bool Late, unsigned RegIdx) {
|
||||
const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
|
||||
if (LaneMask.all() || LaneMask == MRI.getMaxLaneMaskForVReg(FromReg)) {
|
||||
// The full vreg is copied.
|
||||
MachineInstr *CopyMI =
|
||||
BuildMI(MBB, InsertBefore, DebugLoc(), Desc, ToReg).addReg(FromReg);
|
||||
SlotIndexes &Indexes = *LIS.getSlotIndexes();
|
||||
return Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot();
|
||||
}
|
||||
|
||||
// Only a subset of lanes needs to be copied. The following is a simple
|
||||
// heuristic to construct a sequence of COPYs. We could add a target
|
||||
// specific callback if this turns out to be suboptimal.
|
||||
LiveInterval &DestLI = LIS.getInterval(Edit->get(RegIdx));
|
||||
|
||||
// First pass: Try to find a perfectly matching subregister index. If none
|
||||
// exists find the one covering the most lanemask bits.
|
||||
SmallVector<unsigned, 8> PossibleIndexes;
|
||||
unsigned BestIdx = 0;
|
||||
unsigned BestCover = 0;
|
||||
const TargetRegisterClass *RC = MRI.getRegClass(FromReg);
|
||||
assert(RC == MRI.getRegClass(ToReg) && "Should have same reg class");
|
||||
for (unsigned Idx = 1, E = TRI.getNumSubRegIndices(); Idx < E; ++Idx) {
|
||||
// Is this index even compatible with the given class?
|
||||
if (TRI.getSubClassWithSubReg(RC, Idx) != RC)
|
||||
continue;
|
||||
LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(Idx);
|
||||
// Early exit if we found a perfect match.
|
||||
if (SubRegMask == LaneMask) {
|
||||
BestIdx = Idx;
|
||||
break;
|
||||
}
|
||||
|
||||
// The index must not cover any lanes outside \p LaneMask.
|
||||
if ((SubRegMask & ~LaneMask).any())
|
||||
continue;
|
||||
|
||||
unsigned PopCount = countPopulation(SubRegMask.getAsInteger());
|
||||
PossibleIndexes.push_back(Idx);
|
||||
if (PopCount > BestCover) {
|
||||
BestCover = PopCount;
|
||||
BestIdx = Idx;
|
||||
}
|
||||
}
|
||||
|
||||
// Abort if we cannot possibly implement the COPY with the given indexes.
|
||||
if (BestIdx == 0)
|
||||
report_fatal_error("Impossible to implement partial COPY");
|
||||
|
||||
SlotIndex Def = buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore,
|
||||
BestIdx, DestLI, Late, SlotIndex());
|
||||
|
||||
// Greedy heuristic: Keep iterating keeping the best covering subreg index
|
||||
// each time.
|
||||
LaneBitmask LanesLeft =
|
||||
LaneMask & ~(TRI.getSubRegIndexLaneMask(BestCover));
|
||||
while (LanesLeft.any()) {
|
||||
unsigned BestIdx = 0;
|
||||
int BestCover = INT_MIN;
|
||||
for (unsigned Idx : PossibleIndexes) {
|
||||
LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(Idx);
|
||||
// Early exit if we found a perfect match.
|
||||
if (SubRegMask == LanesLeft) {
|
||||
BestIdx = Idx;
|
||||
break;
|
||||
}
|
||||
|
||||
// Try to cover as much of the remaining lanes as possible but
|
||||
// as few of the already covered lanes as possible.
|
||||
int Cover = countPopulation((SubRegMask & LanesLeft).getAsInteger())
|
||||
- countPopulation((SubRegMask & ~LanesLeft).getAsInteger());
|
||||
if (Cover > BestCover) {
|
||||
BestCover = Cover;
|
||||
BestIdx = Idx;
|
||||
}
|
||||
}
|
||||
|
||||
if (BestIdx == 0)
|
||||
report_fatal_error("Impossible to implement partial COPY");
|
||||
|
||||
buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore, BestIdx,
|
||||
DestLI, Late, Def);
|
||||
LanesLeft &= ~TRI.getSubRegIndexLaneMask(BestIdx);
|
||||
}
|
||||
|
||||
return Def;
|
||||
}
|
||||
|
||||
VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
|
||||
VNInfo *ParentVNI,
|
||||
SlotIndex UseIdx,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I) {
|
||||
MachineInstr *CopyMI = nullptr;
|
||||
SlotIndex Def;
|
||||
LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx));
|
||||
|
||||
@ -505,45 +621,29 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
|
||||
LiveInterval &OrigLI = LIS.getInterval(Original);
|
||||
VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx);
|
||||
|
||||
unsigned Reg = LI->reg;
|
||||
bool DidRemat = false;
|
||||
if (OrigVNI) {
|
||||
LiveRangeEdit::Remat RM(ParentVNI);
|
||||
RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def);
|
||||
if (Edit->canRematerializeAt(RM, OrigVNI, UseIdx, true)) {
|
||||
Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late);
|
||||
Def = Edit->rematerializeAt(MBB, I, Reg, RM, TRI, Late);
|
||||
++NumRemats;
|
||||
DidRemat = true;
|
||||
}
|
||||
}
|
||||
if (!DidRemat) {
|
||||
// Can't remat, just insert a copy from parent.
|
||||
CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg)
|
||||
.addReg(Edit->getReg());
|
||||
Def = LIS.getSlotIndexes()
|
||||
->insertMachineInstrInMaps(*CopyMI, Late)
|
||||
.getRegSlot();
|
||||
LaneBitmask LaneMask;
|
||||
if (LI->hasSubRanges()) {
|
||||
LaneBitmask LM = LaneBitmask::getNone();
|
||||
LaneMask = LaneBitmask::getNone();
|
||||
for (LiveInterval::SubRange &S : LI->subranges())
|
||||
LM |= S.LaneMask;
|
||||
|
||||
if (MRI.getMaxLaneMaskForVReg(LI->reg) != LM) {
|
||||
// Find subreg for the lane mask.
|
||||
unsigned SubIdx = 0;
|
||||
for (unsigned I = 1, E = TRI.getNumSubRegIndices(); I < E; ++I) {
|
||||
if (TRI.getSubRegIndexLaneMask(I) == LM) {
|
||||
SubIdx = I;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (SubIdx == 0)
|
||||
report_fatal_error("Cannot find subreg index to cover all alive lanes");
|
||||
CopyMI->getOperand(0).setSubReg(SubIdx);
|
||||
CopyMI->getOperand(1).setSubReg(SubIdx);
|
||||
CopyMI->getOperand(0).setIsUndef(true);
|
||||
}
|
||||
LaneMask |= S.LaneMask;
|
||||
} else {
|
||||
LaneMask = LaneBitmask::getAll();
|
||||
}
|
||||
|
||||
++NumCopies;
|
||||
Def = buildCopy(Edit->getReg(), Reg, LaneMask, MBB, I, Late, RegIdx);
|
||||
}
|
||||
|
||||
// Define the value in Reg.
|
||||
|
@ -405,6 +405,17 @@ private:
|
||||
/// deleteRematVictims - Delete defs that are dead after rematerializing.
|
||||
void deleteRematVictims();
|
||||
|
||||
/// Add a copy instruction copying \p FromReg to \p ToReg before
|
||||
/// \p InsertBefore. This can be invoked with a \p LaneMask which may make it
|
||||
/// necessary to construct a sequence of copies to cover it exactly.
|
||||
SlotIndex buildCopy(unsigned FromReg, unsigned ToReg, LaneBitmask LaneMask,
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
|
||||
bool Late, unsigned RegIdx);
|
||||
|
||||
SlotIndex buildSingleSubRegCopy(unsigned FromReg, unsigned ToReg,
|
||||
MachineBasicBlock &MB, MachineBasicBlock::iterator InsertBefore,
|
||||
unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex PrevCopy);
|
||||
|
||||
public:
|
||||
/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
|
||||
/// Newly created intervals will be appended to newIntervals.
|
||||
|
@ -167,6 +167,7 @@ class VirtRegRewriter : public MachineFunctionPass {
|
||||
bool readsUndefSubreg(const MachineOperand &MO) const;
|
||||
void addLiveInsForSubRanges(const LiveInterval &LI, unsigned PhysReg) const;
|
||||
void handleIdentityCopy(MachineInstr &MI) const;
|
||||
void expandCopyBundle(MachineInstr &MI) const;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
@ -372,6 +373,34 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const {
|
||||
DEBUG(dbgs() << " deleted.\n");
|
||||
}
|
||||
|
||||
/// The liverange splitting logic sometimes produces bundles of copies when
|
||||
/// subregisters are involved. Expand these into a sequence of copy instructions
|
||||
/// after processing the last in the bundle. Does not update LiveIntervals
|
||||
/// which we shouldn't need for this instruction anymore.
|
||||
void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const {
|
||||
if (!MI.isCopy())
|
||||
return;
|
||||
|
||||
if (MI.isBundledWithPred() && !MI.isBundledWithSucc()) {
|
||||
// Only do this when the complete bundle is made out of COPYs.
|
||||
for (MachineBasicBlock::reverse_instr_iterator I =
|
||||
std::next(MI.getReverseIterator()); I->isBundledWithSucc(); ++I) {
|
||||
if (!I->isCopy())
|
||||
return;
|
||||
}
|
||||
|
||||
for (MachineBasicBlock::reverse_instr_iterator I = MI.getReverseIterator();
|
||||
I->isBundledWithPred(); ) {
|
||||
MachineInstr &MI = *I;
|
||||
++I;
|
||||
|
||||
MI.unbundleFromPred();
|
||||
if (Indexes)
|
||||
Indexes->insertMachineInstrInMaps(MI);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void VirtRegRewriter::rewrite() {
|
||||
bool NoSubRegLiveness = !MRI->subRegLivenessEnabled();
|
||||
SmallVector<unsigned, 8> SuperDeads;
|
||||
@ -463,6 +492,8 @@ void VirtRegRewriter::rewrite() {
|
||||
|
||||
DEBUG(dbgs() << "> " << *MI);
|
||||
|
||||
expandCopyBundle(*MI);
|
||||
|
||||
// We can remove identity copies right now.
|
||||
handleIdentityCopy(*MI);
|
||||
}
|
||||
|
64
llvm/test/CodeGen/AMDGPU/splitkit.mir
Normal file
64
llvm/test/CodeGen/AMDGPU/splitkit.mir
Normal file
@ -0,0 +1,64 @@
|
||||
# RUN: llc -o - %s -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs -run-pass=greedy,virtregrewriter | FileCheck %s
|
||||
--- |
|
||||
define void @func0() #0 { ret void }
|
||||
define void @func1() #0 { ret void }
|
||||
|
||||
attributes #0 = { "amdgpu-num-sgpr"="12" }
|
||||
...
|
||||
---
|
||||
# Make sure we only get a single spill+reload even if liverange splitting
|
||||
# created a sequence of multiple copy instructions.
|
||||
# CHECK-LABEL: name: func0
|
||||
# CHECK: SI_SPILL_S128_SAVE
|
||||
# CHECK-NOT: SI_SPILL_S128_SAVE
|
||||
# CHECK: S_NOP 0
|
||||
# CHECK: SI_SPILL_S128_RESTORE
|
||||
# CHECK-NOT: SI_SPILL_S128_RESTORE
|
||||
name: func0
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0, implicit-def undef %0.sub0 : sreg_128
|
||||
S_NOP 0, implicit-def %0.sub3 : sreg_128
|
||||
|
||||
; Clobber registers
|
||||
S_NOP 0, implicit-def dead %sgpr0, implicit-def dead %sgpr1, implicit-def dead %sgpr2, implicit-def dead %sgpr3, implicit-def dead %sgpr4, implicit-def dead %sgpr5, implicit-def dead %sgpr6, implicit-def dead %sgpr7, implicit-def dead %sgpr8, implicit-def dead %sgpr9, implicit-def dead %sgpr10, implicit-def dead %sgpr11
|
||||
|
||||
S_NOP 0, implicit %0.sub0
|
||||
S_NOP 0, implicit %0.sub3
|
||||
S_NOP 0, implicit %0.sub0
|
||||
S_NOP 0, implicit %0.sub3
|
||||
...
|
||||
---
|
||||
# LiveRange splitting should split this into 2 intervals with the second getting
|
||||
# allocated to sgpr0_sgpr1 and the first to something else so we see two copies
|
||||
# in between for the two subregisters that are alive.
|
||||
# CHECK-LABEL: name: func1
|
||||
# CHECK: [[REG0:%sgpr[0-9]+]] = COPY %sgpr0
|
||||
# CHECK: [[REG1:%sgpr[0-9]+]] = COPY %sgpr2
|
||||
# CHECK: S_NOP 0
|
||||
# CHECK: S_NOP 0, implicit [[REG0]]
|
||||
# CHECK: S_NOP 0, implicit [[REG1]]
|
||||
# CHECK: %sgpr0 = COPY [[REG0]]
|
||||
# CHECK: %sgpr2 = COPY [[REG1]]
|
||||
# CHECK: S_NOP
|
||||
# CHECK: S_NOP 0, implicit %sgpr0
|
||||
# CHECK: S_NOP 0, implicit %sgpr2
|
||||
name: func1
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: %sgpr0, %sgpr1, %sgpr2
|
||||
undef %0.sub0 : sreg_128 = COPY %sgpr0
|
||||
%0.sub2 = COPY %sgpr2
|
||||
|
||||
S_NOP 0, implicit-def dead %sgpr0, implicit-def dead %sgpr1
|
||||
|
||||
S_NOP 0, implicit %0.sub0
|
||||
S_NOP 0, implicit %0.sub2
|
||||
|
||||
; Clobber everything but sgpr0-sgpr3
|
||||
S_NOP 0, implicit-def dead %sgpr4, implicit-def dead %sgpr5, implicit-def dead %sgpr6, implicit-def dead %sgpr7, implicit-def dead %sgpr8, implicit-def dead %sgpr9, implicit-def dead %sgpr10, implicit-def dead %sgpr11, implicit-def dead %sgpr12, implicit-def dead %sgpr13, implicit-def dead %sgpr14, implicit-def dead %sgpr15, implicit-def dead %vcc_lo, implicit-def dead %vcc_hi
|
||||
|
||||
S_NOP 0, implicit %0.sub0
|
||||
S_NOP 0, implicit %0.sub2
|
||||
...
|
Loading…
Reference in New Issue
Block a user