mirror of
https://github.com/RPCS3/llvm.git
synced 2025-04-04 14:22:26 +00:00
AMDGPU: Do not clobber SCC in SIWholeQuadMode
Reviewers: arsenm, tstellarAMD, mareko Subscribers: arsenm, llvm-commits, kzhuravl Differential Revision: http://reviews.llvm.org/D22198 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@281230 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ce3adfd9b4
commit
01a133c760
@ -343,11 +343,6 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||||||
const DebugLoc &DL, unsigned DestReg,
|
const DebugLoc &DL, unsigned DestReg,
|
||||||
unsigned SrcReg, bool KillSrc) const {
|
unsigned SrcReg, bool KillSrc) const {
|
||||||
|
|
||||||
// If we are trying to copy to or from SCC, there is a bug somewhere else in
|
|
||||||
// the backend. While it may be theoretically possible to do this, it should
|
|
||||||
// never be necessary.
|
|
||||||
assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
|
|
||||||
|
|
||||||
static const int16_t Sub0_15[] = {
|
static const int16_t Sub0_15[] = {
|
||||||
AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
|
AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
|
||||||
AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
|
AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
|
||||||
@ -392,6 +387,13 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||||||
ArrayRef<int16_t> SubIndices;
|
ArrayRef<int16_t> SubIndices;
|
||||||
|
|
||||||
if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
|
if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
|
||||||
|
if (SrcReg == AMDGPU::SCC) {
|
||||||
|
BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg)
|
||||||
|
.addImm(-1)
|
||||||
|
.addImm(0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
|
assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
|
||||||
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
|
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
|
||||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||||
@ -418,6 +420,12 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
} else if (DestReg == AMDGPU::SCC) {
|
||||||
|
assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
|
||||||
|
BuildMI(MBB, MI, DL, get(AMDGPU::S_CMP_LG_U32))
|
||||||
|
.addReg(SrcReg, getKillRegState(KillSrc))
|
||||||
|
.addImm(0);
|
||||||
|
return;
|
||||||
} else if (AMDGPU::SReg_128RegClass.contains(DestReg)) {
|
} else if (AMDGPU::SReg_128RegClass.contains(DestReg)) {
|
||||||
assert(AMDGPU::SReg_128RegClass.contains(SrcReg));
|
assert(AMDGPU::SReg_128RegClass.contains(SrcReg));
|
||||||
Opcode = AMDGPU::S_MOV_B64;
|
Opcode = AMDGPU::S_MOV_B64;
|
||||||
|
@ -129,6 +129,14 @@ private:
|
|||||||
void propagateBlock(MachineBasicBlock &MBB, std::vector<WorkItem> &Worklist);
|
void propagateBlock(MachineBasicBlock &MBB, std::vector<WorkItem> &Worklist);
|
||||||
char analyzeFunction(MachineFunction &MF);
|
char analyzeFunction(MachineFunction &MF);
|
||||||
|
|
||||||
|
bool requiresCorrectState(const MachineInstr &MI) const;
|
||||||
|
|
||||||
|
MachineBasicBlock::iterator saveSCC(MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock::iterator Before);
|
||||||
|
MachineBasicBlock::iterator
|
||||||
|
prepareInsertion(MachineBasicBlock &MBB, MachineBasicBlock::iterator First,
|
||||||
|
MachineBasicBlock::iterator Last, bool PreferLast,
|
||||||
|
bool SaveSCC);
|
||||||
void toExact(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,
|
void toExact(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,
|
||||||
unsigned SaveWQM, unsigned LiveMaskReg);
|
unsigned SaveWQM, unsigned LiveMaskReg);
|
||||||
void toWQM(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,
|
void toWQM(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,
|
||||||
@ -398,32 +406,140 @@ char SIWholeQuadMode::analyzeFunction(MachineFunction &MF) {
|
|||||||
return GlobalFlags;
|
return GlobalFlags;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Whether \p MI really requires the exec state computed during analysis.
|
||||||
|
///
|
||||||
|
/// Scalar instructions must occasionally be marked WQM for correct propagation
|
||||||
|
/// (e.g. thread masks leading up to branches), but when it comes to actual
|
||||||
|
/// execution, they don't care about EXEC.
|
||||||
|
bool SIWholeQuadMode::requiresCorrectState(const MachineInstr &MI) const {
|
||||||
|
if (MI.isTerminator())
|
||||||
|
return true;
|
||||||
|
|
||||||
|
// Skip instructions that are not affected by EXEC
|
||||||
|
if (TII->isScalarUnit(MI))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Generic instructions such as COPY will either disappear by register
|
||||||
|
// coalescing or be lowered to SALU or VALU instructions.
|
||||||
|
if (MI.isTransient()) {
|
||||||
|
if (MI.getNumExplicitOperands() >= 1) {
|
||||||
|
const MachineOperand &Op = MI.getOperand(0);
|
||||||
|
if (Op.isReg()) {
|
||||||
|
if (TRI->isSGPRReg(*MRI, Op.getReg())) {
|
||||||
|
// SGPR instructions are not affected by EXEC
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
MachineBasicBlock::iterator
|
||||||
|
SIWholeQuadMode::saveSCC(MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock::iterator Before) {
|
||||||
|
unsigned SaveReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
|
||||||
|
|
||||||
|
MachineInstr *Save =
|
||||||
|
BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), SaveReg)
|
||||||
|
.addReg(AMDGPU::SCC);
|
||||||
|
MachineInstr *Restore =
|
||||||
|
BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), AMDGPU::SCC)
|
||||||
|
.addReg(SaveReg);
|
||||||
|
|
||||||
|
LIS->InsertMachineInstrInMaps(*Save);
|
||||||
|
LIS->InsertMachineInstrInMaps(*Restore);
|
||||||
|
LIS->createAndComputeVirtRegInterval(SaveReg);
|
||||||
|
|
||||||
|
return Restore;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return an iterator in the (inclusive) range [First, Last] at which
|
||||||
|
// instructions can be safely inserted, keeping in mind that some of the
|
||||||
|
// instructions we want to add necessarily clobber SCC.
|
||||||
|
MachineBasicBlock::iterator SIWholeQuadMode::prepareInsertion(
|
||||||
|
MachineBasicBlock &MBB, MachineBasicBlock::iterator First,
|
||||||
|
MachineBasicBlock::iterator Last, bool PreferLast, bool SaveSCC) {
|
||||||
|
if (!SaveSCC)
|
||||||
|
return PreferLast ? Last : First;
|
||||||
|
|
||||||
|
LiveRange &LR = LIS->getRegUnit(*MCRegUnitIterator(AMDGPU::SCC, TRI));
|
||||||
|
auto MBBE = MBB.end();
|
||||||
|
SlotIndex FirstIdx = First != MBBE ? LIS->getInstructionIndex(*First)
|
||||||
|
: LIS->getMBBEndIdx(&MBB);
|
||||||
|
SlotIndex LastIdx =
|
||||||
|
Last != MBBE ? LIS->getInstructionIndex(*Last) : LIS->getMBBEndIdx(&MBB);
|
||||||
|
SlotIndex Idx = PreferLast ? LastIdx : FirstIdx;
|
||||||
|
const LiveRange::Segment *S;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
S = LR.getSegmentContaining(Idx);
|
||||||
|
if (!S)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (PreferLast) {
|
||||||
|
SlotIndex Next = S->start.getBaseIndex();
|
||||||
|
if (Next < FirstIdx)
|
||||||
|
break;
|
||||||
|
Idx = Next;
|
||||||
|
} else {
|
||||||
|
SlotIndex Next = S->end.getNextIndex().getBaseIndex();
|
||||||
|
if (Next > LastIdx)
|
||||||
|
break;
|
||||||
|
Idx = Next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MachineBasicBlock::iterator MBBI;
|
||||||
|
|
||||||
|
if (MachineInstr *MI = LIS->getInstructionFromIndex(Idx))
|
||||||
|
MBBI = MI;
|
||||||
|
else {
|
||||||
|
assert(Idx == LIS->getMBBEndIdx(&MBB));
|
||||||
|
MBBI = MBB.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (S)
|
||||||
|
MBBI = saveSCC(MBB, MBBI);
|
||||||
|
|
||||||
|
return MBBI;
|
||||||
|
}
|
||||||
|
|
||||||
void SIWholeQuadMode::toExact(MachineBasicBlock &MBB,
|
void SIWholeQuadMode::toExact(MachineBasicBlock &MBB,
|
||||||
MachineBasicBlock::iterator Before,
|
MachineBasicBlock::iterator Before,
|
||||||
unsigned SaveWQM, unsigned LiveMaskReg) {
|
unsigned SaveWQM, unsigned LiveMaskReg) {
|
||||||
|
MachineInstr *MI;
|
||||||
|
|
||||||
if (SaveWQM) {
|
if (SaveWQM) {
|
||||||
BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_AND_SAVEEXEC_B64),
|
MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_AND_SAVEEXEC_B64),
|
||||||
SaveWQM)
|
SaveWQM)
|
||||||
.addReg(LiveMaskReg);
|
.addReg(LiveMaskReg);
|
||||||
} else {
|
} else {
|
||||||
BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_AND_B64),
|
MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_AND_B64),
|
||||||
AMDGPU::EXEC)
|
AMDGPU::EXEC)
|
||||||
.addReg(AMDGPU::EXEC)
|
.addReg(AMDGPU::EXEC)
|
||||||
.addReg(LiveMaskReg);
|
.addReg(LiveMaskReg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LIS->InsertMachineInstrInMaps(*MI);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SIWholeQuadMode::toWQM(MachineBasicBlock &MBB,
|
void SIWholeQuadMode::toWQM(MachineBasicBlock &MBB,
|
||||||
MachineBasicBlock::iterator Before,
|
MachineBasicBlock::iterator Before,
|
||||||
unsigned SavedWQM) {
|
unsigned SavedWQM) {
|
||||||
|
MachineInstr *MI;
|
||||||
|
|
||||||
if (SavedWQM) {
|
if (SavedWQM) {
|
||||||
BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), AMDGPU::EXEC)
|
MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), AMDGPU::EXEC)
|
||||||
.addReg(SavedWQM);
|
.addReg(SavedWQM);
|
||||||
} else {
|
} else {
|
||||||
BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
|
MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
|
||||||
AMDGPU::EXEC)
|
AMDGPU::EXEC)
|
||||||
.addReg(AMDGPU::EXEC);
|
.addReg(AMDGPU::EXEC);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LIS->InsertMachineInstrInMaps(*MI);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
|
void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
|
||||||
@ -447,76 +563,77 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
|
|||||||
unsigned SavedWQMReg = 0;
|
unsigned SavedWQMReg = 0;
|
||||||
bool WQMFromExec = isEntry;
|
bool WQMFromExec = isEntry;
|
||||||
char State = isEntry ? StateExact : StateWQM;
|
char State = isEntry ? StateExact : StateWQM;
|
||||||
MachineInstr *FirstNonWQM = nullptr;
|
|
||||||
|
|
||||||
auto II = MBB.getFirstNonPHI(), IE = MBB.end();
|
auto II = MBB.getFirstNonPHI(), IE = MBB.end();
|
||||||
while (II != IE) {
|
if (isEntry)
|
||||||
MachineInstr &MI = *II;
|
++II; // Skip the instruction that saves LiveMask
|
||||||
++II;
|
|
||||||
|
|
||||||
// Skip instructions that are not affected by EXEC
|
|
||||||
if (TII->isScalarUnit(MI) && !MI.isTerminator())
|
|
||||||
continue;
|
|
||||||
|
|
||||||
// Generic instructions such as COPY will either disappear by register
|
|
||||||
// coalescing or be lowered to SALU or VALU instructions.
|
|
||||||
if (TargetInstrInfo::isGenericOpcode(MI.getOpcode())) {
|
|
||||||
if (MI.getNumExplicitOperands() >= 1) {
|
|
||||||
const MachineOperand &Op = MI.getOperand(0);
|
|
||||||
if (Op.isReg()) {
|
|
||||||
if (TRI->isSGPRReg(*MRI, Op.getReg())) {
|
|
||||||
// SGPR instructions are not affected by EXEC
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
MachineBasicBlock::iterator First = IE;
|
||||||
|
for (;;) {
|
||||||
|
MachineBasicBlock::iterator Next = II;
|
||||||
char Needs = 0;
|
char Needs = 0;
|
||||||
char OutNeeds = 0;
|
char OutNeeds = 0;
|
||||||
auto InstrInfoIt = Instructions.find(&MI);
|
|
||||||
if (InstrInfoIt != Instructions.end()) {
|
|
||||||
Needs = InstrInfoIt->second.Needs;
|
|
||||||
OutNeeds = InstrInfoIt->second.OutNeeds;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Keep track of the first consecutive non-WQM instruction, so that we
|
if (First == IE)
|
||||||
// switch away from WQM as soon as possible, potentially saving a small
|
First = II;
|
||||||
// bit of bandwidth on loads.
|
|
||||||
if (Needs == StateWQM)
|
|
||||||
FirstNonWQM = nullptr;
|
|
||||||
else if (!FirstNonWQM)
|
|
||||||
FirstNonWQM = &MI;
|
|
||||||
|
|
||||||
// State switching
|
if (II != IE) {
|
||||||
if (Needs && State != Needs) {
|
MachineInstr &MI = *II;
|
||||||
if (Needs == StateExact) {
|
|
||||||
assert(!SavedWQMReg);
|
|
||||||
|
|
||||||
if (!WQMFromExec && (OutNeeds & StateWQM))
|
if (requiresCorrectState(MI)) {
|
||||||
SavedWQMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
auto III = Instructions.find(&MI);
|
||||||
|
if (III != Instructions.end()) {
|
||||||
toExact(MBB, FirstNonWQM, SavedWQMReg, LiveMaskReg);
|
Needs = III->second.Needs;
|
||||||
} else {
|
OutNeeds = III->second.OutNeeds;
|
||||||
assert(WQMFromExec == (SavedWQMReg == 0));
|
}
|
||||||
toWQM(MBB, &MI, SavedWQMReg);
|
|
||||||
SavedWQMReg = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
State = Needs;
|
if (MI.isTerminator() && !Needs && OutNeeds == StateExact)
|
||||||
|
Needs = StateExact;
|
||||||
|
|
||||||
|
if (MI.getOpcode() == AMDGPU::SI_ELSE && BI.OutNeeds == StateExact)
|
||||||
|
MI.getOperand(3).setImm(1);
|
||||||
|
|
||||||
|
++Next;
|
||||||
|
} else {
|
||||||
|
// End of basic block
|
||||||
|
if (BI.OutNeeds & StateWQM)
|
||||||
|
Needs = StateWQM;
|
||||||
|
else if (BI.OutNeeds == StateExact)
|
||||||
|
Needs = StateExact;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (MI.getOpcode() == AMDGPU::SI_ELSE && BI.OutNeeds == StateExact)
|
if (Needs) {
|
||||||
MI.getOperand(3).setImm(1);
|
if (Needs != State) {
|
||||||
}
|
MachineBasicBlock::iterator Before =
|
||||||
|
prepareInsertion(MBB, First, II, Needs == StateWQM,
|
||||||
|
Needs == StateExact || WQMFromExec);
|
||||||
|
|
||||||
if ((BI.OutNeeds & StateWQM) && State != StateWQM) {
|
if (Needs == StateExact) {
|
||||||
assert(WQMFromExec == (SavedWQMReg == 0));
|
if (!WQMFromExec && (OutNeeds & StateWQM))
|
||||||
toWQM(MBB, MBB.end(), SavedWQMReg);
|
SavedWQMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||||
} else if (BI.OutNeeds == StateExact && State != StateExact) {
|
|
||||||
toExact(MBB, FirstNonWQM ? MachineBasicBlock::iterator(FirstNonWQM)
|
toExact(MBB, Before, SavedWQMReg, LiveMaskReg);
|
||||||
: MBB.getFirstTerminator(),
|
} else {
|
||||||
0, LiveMaskReg);
|
assert(WQMFromExec == (SavedWQMReg == 0));
|
||||||
|
|
||||||
|
toWQM(MBB, Before, SavedWQMReg);
|
||||||
|
|
||||||
|
if (SavedWQMReg) {
|
||||||
|
LIS->createAndComputeVirtRegInterval(SavedWQMReg);
|
||||||
|
SavedWQMReg = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
State = Needs;
|
||||||
|
}
|
||||||
|
|
||||||
|
First = IE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (II == IE)
|
||||||
|
break;
|
||||||
|
II = Next;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -524,8 +641,11 @@ void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) {
|
|||||||
for (MachineInstr *MI : LiveMaskQueries) {
|
for (MachineInstr *MI : LiveMaskQueries) {
|
||||||
const DebugLoc &DL = MI->getDebugLoc();
|
const DebugLoc &DL = MI->getDebugLoc();
|
||||||
unsigned Dest = MI->getOperand(0).getReg();
|
unsigned Dest = MI->getOperand(0).getReg();
|
||||||
BuildMI(*MI->getParent(), MI, DL, TII->get(AMDGPU::COPY), Dest)
|
MachineInstr *Copy =
|
||||||
.addReg(LiveMaskReg);
|
BuildMI(*MI->getParent(), MI, DL, TII->get(AMDGPU::COPY), Dest)
|
||||||
|
.addReg(LiveMaskReg);
|
||||||
|
|
||||||
|
LIS->ReplaceMachineInstrInMaps(*MI, *Copy);
|
||||||
MI->eraseFromParent();
|
MI->eraseFromParent();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -559,8 +679,10 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
|
|
||||||
if (GlobalFlags & StateExact || !LiveMaskQueries.empty()) {
|
if (GlobalFlags & StateExact || !LiveMaskQueries.empty()) {
|
||||||
LiveMaskReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
LiveMaskReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||||
BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::COPY), LiveMaskReg)
|
MachineInstr *MI = BuildMI(Entry, EntryMI, DebugLoc(),
|
||||||
.addReg(AMDGPU::EXEC);
|
TII->get(AMDGPU::COPY), LiveMaskReg)
|
||||||
|
.addReg(AMDGPU::EXEC);
|
||||||
|
LIS->InsertMachineInstrInMaps(*MI);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (GlobalFlags == StateWQM) {
|
if (GlobalFlags == StateWQM) {
|
||||||
@ -583,5 +705,10 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
for (auto BII : Blocks)
|
for (auto BII : Blocks)
|
||||||
processBlock(*BII.first, LiveMaskReg, BII.first == &*MF.begin());
|
processBlock(*BII.first, LiveMaskReg, BII.first == &*MF.begin());
|
||||||
|
|
||||||
|
// Physical registers like SCC aren't tracked by default anyway, so just
|
||||||
|
// removing the ranges we computed is the simplest option for maintaining
|
||||||
|
// the analysis results.
|
||||||
|
LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::SCC, TRI));
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -466,6 +466,42 @@ else:
|
|||||||
ret <4 x float> %dtex
|
ret <4 x float> %dtex
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; Test awareness that s_wqm_b64 clobbers SCC.
|
||||||
|
;
|
||||||
|
; CHECK-LABEL: {{^}}test_scc:
|
||||||
|
; CHECK: s_mov_b64 [[ORIG:s\[[0-9]+:[0-9]+\]]], exec
|
||||||
|
; CHECK: s_wqm_b64 exec, exec
|
||||||
|
; CHECK: s_cmp_
|
||||||
|
; CHECK-NEXT: s_cbranch_scc
|
||||||
|
; CHECK: ; %if
|
||||||
|
; CHECK: s_and_b64 exec, exec, [[ORIG]]
|
||||||
|
; CHECK: image_sample
|
||||||
|
; CHECK: ; %else
|
||||||
|
; CHECK: s_and_b64 exec, exec, [[ORIG]]
|
||||||
|
; CHECK: image_sample
|
||||||
|
; CHECK: ; %end
|
||||||
|
define amdgpu_ps <4 x float> @test_scc(i32 inreg %sel, i32 %idx) #1 {
|
||||||
|
main_body:
|
||||||
|
%cc = icmp sgt i32 %sel, 0
|
||||||
|
br i1 %cc, label %if, label %else
|
||||||
|
|
||||||
|
if:
|
||||||
|
%r.if = call <4 x float> @llvm.SI.image.sample.i32(i32 0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
|
||||||
|
br label %end
|
||||||
|
|
||||||
|
else:
|
||||||
|
%r.else = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 0, i32 1>, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
|
||||||
|
br label %end
|
||||||
|
|
||||||
|
end:
|
||||||
|
%r = phi <4 x float> [ %r.if, %if ], [ %r.else, %else ]
|
||||||
|
|
||||||
|
call void @llvm.amdgcn.buffer.store.f32(float 1.0, <4 x i32> undef, i32 %idx, i32 0, i1 0, i1 0)
|
||||||
|
|
||||||
|
ret <4 x float> %r
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
|
declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
|
||||||
declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #1
|
declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #1
|
||||||
declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1
|
declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1
|
||||||
@ -474,6 +510,7 @@ declare <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i1,
|
|||||||
declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #2
|
declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #2
|
||||||
|
|
||||||
declare <4 x float> @llvm.SI.image.sample.i32(i32, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3
|
declare <4 x float> @llvm.SI.image.sample.i32(i32, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3
|
||||||
|
declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3
|
||||||
declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3
|
declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3
|
||||||
|
|
||||||
declare void @llvm.AMDGPU.kill(float)
|
declare void @llvm.AMDGPU.kill(float)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user