mirror of
https://github.com/RPCS3/llvm.git
synced 2026-07-01 21:04:04 -04:00
[AMDGPU] Extend the SI Load/Store optimizer
Summary: Extend the SI Load/Store optimizer to merge MIMG load instructions. Handle different flavours of image_load and image_sample instructions. When the instructions of the same subclass differ only in dmask, merge them and update dmask accordingly. Reviewers: nhaehnle Reviewed By: nhaehnle Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64911 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@374984 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -97,6 +97,7 @@ enum InstClassEnum {
|
||||
S_BUFFER_LOAD_IMM,
|
||||
BUFFER_LOAD,
|
||||
BUFFER_STORE,
|
||||
MIMG,
|
||||
};
|
||||
|
||||
enum RegisterEnum {
|
||||
@@ -105,6 +106,7 @@ enum RegisterEnum {
|
||||
SOFFSET = 0x4,
|
||||
VADDR = 0x8,
|
||||
ADDR = 0x10,
|
||||
SSAMP = 0x20,
|
||||
};
|
||||
|
||||
class SILoadStoreOptimizer : public MachineFunctionPass {
|
||||
@@ -117,6 +119,8 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
|
||||
unsigned Width0;
|
||||
unsigned Width1;
|
||||
unsigned BaseOff;
|
||||
unsigned DMask0;
|
||||
unsigned DMask1;
|
||||
InstClassEnum InstClass;
|
||||
bool GLC0;
|
||||
bool GLC1;
|
||||
@@ -205,6 +209,7 @@ private:
|
||||
AliasAnalysis *AA = nullptr;
|
||||
bool OptimizeAgain;
|
||||
|
||||
static bool dmasksCanBeCombined(const CombineInfo &CI, const SIInstrInfo &TII);
|
||||
static bool offsetsCanBeCombined(CombineInfo &CI);
|
||||
static bool widthsFit(const GCNSubtarget &STM, const CombineInfo &CI);
|
||||
static unsigned getNewOpcode(const CombineInfo &CI);
|
||||
@@ -220,6 +225,7 @@ private:
|
||||
unsigned write2Opcode(unsigned EltSize) const;
|
||||
unsigned write2ST64Opcode(unsigned EltSize) const;
|
||||
MachineBasicBlock::iterator mergeWrite2Pair(CombineInfo &CI);
|
||||
MachineBasicBlock::iterator mergeImagePair(CombineInfo &CI);
|
||||
MachineBasicBlock::iterator mergeSBufferLoadImmPair(CombineInfo &CI);
|
||||
MachineBasicBlock::iterator mergeBufferLoadPair(CombineInfo &CI);
|
||||
MachineBasicBlock::iterator mergeBufferStorePair(CombineInfo &CI);
|
||||
@@ -273,6 +279,11 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
|
||||
// FIXME: Handle d16 correctly
|
||||
return AMDGPU::getMUBUFElements(Opc);
|
||||
}
|
||||
if (TII.isMIMG(MI)) {
|
||||
uint64_t DMaskImm =
|
||||
TII.getNamedOperand(MI, AMDGPU::OpName::dmask)->getImm();
|
||||
return countPopulation(DMaskImm);
|
||||
}
|
||||
|
||||
switch (Opc) {
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
|
||||
@@ -306,6 +317,15 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
|
||||
return BUFFER_STORE;
|
||||
}
|
||||
}
|
||||
if (TII.isMIMG(Opc)) {
|
||||
// Ignore instructions encoded without vaddr.
|
||||
if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr) == -1)
|
||||
return UNKNOWN;
|
||||
// TODO: Support IMAGE_GET_RESINFO and IMAGE_GET_LOD.
|
||||
if (TII.get(Opc).mayStore() || !TII.get(Opc).mayLoad() || TII.isGather4(Opc))
|
||||
return UNKNOWN;
|
||||
return MIMG;
|
||||
}
|
||||
return UNKNOWN;
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
|
||||
@@ -331,6 +351,11 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) {
|
||||
default:
|
||||
if (TII.isMUBUF(Opc))
|
||||
return AMDGPU::getMUBUFBaseOpcode(Opc);
|
||||
if (TII.isMIMG(Opc)) {
|
||||
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
|
||||
assert(Info);
|
||||
return Info->BaseOpcode;
|
||||
}
|
||||
return -1;
|
||||
case AMDGPU::DS_READ_B32:
|
||||
case AMDGPU::DS_READ_B32_gfx9:
|
||||
@@ -367,6 +392,14 @@ static unsigned getRegs(unsigned Opc, const SIInstrInfo &TII) {
|
||||
return result;
|
||||
}
|
||||
|
||||
if (TII.isMIMG(Opc)) {
|
||||
unsigned result = VADDR | SRSRC;
|
||||
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
|
||||
if (Info && AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode)->Sampler)
|
||||
result |= SSAMP;
|
||||
return result;
|
||||
}
|
||||
|
||||
switch (Opc) {
|
||||
default:
|
||||
return 0;
|
||||
@@ -416,14 +449,18 @@ void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI,
|
||||
break;
|
||||
}
|
||||
|
||||
int OffsetIdx =
|
||||
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::offset);
|
||||
Offset0 = I->getOperand(OffsetIdx).getImm();
|
||||
if (InstClass == MIMG) {
|
||||
DMask0 = TII.getNamedOperand(*I, AMDGPU::OpName::dmask)->getImm();
|
||||
} else {
|
||||
int OffsetIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::offset);
|
||||
Offset0 = I->getOperand(OffsetIdx).getImm();
|
||||
}
|
||||
|
||||
Width0 = getOpcodeWidth(*I, TII);
|
||||
|
||||
if ((InstClass == DS_READ) || (InstClass == DS_WRITE)) {
|
||||
Offset0 &= 0xffff;
|
||||
} else {
|
||||
} else if (InstClass != MIMG) {
|
||||
GLC0 = TII.getNamedOperand(*I, AMDGPU::OpName::glc)->getImm();
|
||||
if (InstClass != S_BUFFER_LOAD_IMM) {
|
||||
SLC0 = TII.getNamedOperand(*I, AMDGPU::OpName::slc)->getImm();
|
||||
@@ -455,6 +492,10 @@ void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI,
|
||||
AddrOpName[NumAddresses++] = AMDGPU::OpName::vaddr;
|
||||
}
|
||||
|
||||
if (Regs & SSAMP) {
|
||||
AddrOpName[NumAddresses++] = AMDGPU::OpName::ssamp;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < NumAddresses; i++) {
|
||||
AddrIdx[i] = AMDGPU::getNamedOperandIdx(I->getOpcode(), AddrOpName[i]);
|
||||
AddrReg[i] = &I->getOperand(AddrIdx[i]);
|
||||
@@ -467,13 +508,19 @@ void SILoadStoreOptimizer::CombineInfo::setPaired(MachineBasicBlock::iterator MI
|
||||
const SIInstrInfo &TII) {
|
||||
Paired = MI;
|
||||
assert(InstClass == getInstClass(Paired->getOpcode(), TII));
|
||||
int OffsetIdx =
|
||||
AMDGPU::getNamedOperandIdx(I->getOpcode(), AMDGPU::OpName::offset);
|
||||
Offset1 = Paired->getOperand(OffsetIdx).getImm();
|
||||
|
||||
if (InstClass == MIMG) {
|
||||
DMask1 = TII.getNamedOperand(*Paired, AMDGPU::OpName::dmask)->getImm();
|
||||
} else {
|
||||
int OffsetIdx =
|
||||
AMDGPU::getNamedOperandIdx(I->getOpcode(), AMDGPU::OpName::offset);
|
||||
Offset1 = Paired->getOperand(OffsetIdx).getImm();
|
||||
}
|
||||
|
||||
Width1 = getOpcodeWidth(*Paired, TII);
|
||||
if ((InstClass == DS_READ) || (InstClass == DS_WRITE)) {
|
||||
Offset1 &= 0xffff;
|
||||
} else {
|
||||
} else if (InstClass != MIMG) {
|
||||
GLC1 = TII.getNamedOperand(*Paired, AMDGPU::OpName::glc)->getImm();
|
||||
if (InstClass != S_BUFFER_LOAD_IMM) {
|
||||
SLC1 = TII.getNamedOperand(*Paired, AMDGPU::OpName::slc)->getImm();
|
||||
@@ -588,7 +635,44 @@ static MachineMemOperand *combineKnownAdjacentMMOs(MachineFunction &MF,
|
||||
return MMO;
|
||||
}
|
||||
|
||||
bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI, const SIInstrInfo &TII) {
|
||||
assert(CI.InstClass == MIMG);
|
||||
|
||||
// Ignore instructions with tfe/lwe set.
|
||||
const auto *TFEOp = TII.getNamedOperand(*CI.I, AMDGPU::OpName::tfe);
|
||||
const auto *LWEOp = TII.getNamedOperand(*CI.I, AMDGPU::OpName::lwe);
|
||||
|
||||
if ((TFEOp && TFEOp->getImm()) || (LWEOp && LWEOp->getImm()))
|
||||
return false;
|
||||
|
||||
// Check other optional immediate operands for equality.
|
||||
unsigned OperandsToMatch[] = {AMDGPU::OpName::glc, AMDGPU::OpName::slc,
|
||||
AMDGPU::OpName::d16, AMDGPU::OpName::unorm,
|
||||
AMDGPU::OpName::da, AMDGPU::OpName::r128};
|
||||
|
||||
for (auto op : OperandsToMatch) {
|
||||
int Idx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), op);
|
||||
if (AMDGPU::getNamedOperandIdx(CI.Paired->getOpcode(), op) != Idx)
|
||||
return false;
|
||||
if (Idx != -1 &&
|
||||
CI.I->getOperand(Idx).getImm() != CI.Paired->getOperand(Idx).getImm())
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check DMask for overlaps.
|
||||
unsigned MaxMask = std::max(CI.DMask0, CI.DMask1);
|
||||
unsigned MinMask = std::min(CI.DMask0, CI.DMask1);
|
||||
|
||||
unsigned AllowedBitsForMin = llvm::countTrailingZeros(MaxMask);
|
||||
if ((1u << AllowedBitsForMin) <= MinMask)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) {
|
||||
assert(CI.InstClass != MIMG);
|
||||
|
||||
// XXX - Would the same offset be OK? Is there any reason this would happen or
|
||||
// be useful?
|
||||
if (CI.Offset0 == CI.Offset1)
|
||||
@@ -744,13 +828,18 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
|
||||
if (Match) {
|
||||
CI.setPaired(MBBI, *TII);
|
||||
|
||||
// Check both offsets fit in the reduced range.
|
||||
// Check both offsets (or masks for MIMG) can be combined and fit in the
|
||||
// reduced range.
|
||||
bool canBeCombined =
|
||||
CI.InstClass == MIMG
|
||||
? dmasksCanBeCombined(CI, *TII)
|
||||
: widthsFit(*STM, CI) && offsetsCanBeCombined(CI);
|
||||
|
||||
// We also need to go through the list of instructions that we plan to
|
||||
// move and make sure they are all safe to move down past the merged
|
||||
// instruction.
|
||||
if (widthsFit(*STM, CI) && offsetsCanBeCombined(CI))
|
||||
if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA))
|
||||
return true;
|
||||
if (canBeCombined && canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA))
|
||||
return true;
|
||||
}
|
||||
|
||||
// We've found a load/store that we couldn't merge for some reason.
|
||||
@@ -945,6 +1034,60 @@ SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI) {
|
||||
return Write2;
|
||||
}
|
||||
|
||||
MachineBasicBlock::iterator
|
||||
SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI) {
|
||||
MachineBasicBlock *MBB = CI.I->getParent();
|
||||
DebugLoc DL = CI.I->getDebugLoc();
|
||||
const unsigned Opcode = getNewOpcode(CI);
|
||||
|
||||
const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI);
|
||||
|
||||
Register DestReg = MRI->createVirtualRegister(SuperRC);
|
||||
unsigned MergedDMask = CI.DMask0 | CI.DMask1;
|
||||
unsigned DMaskIdx =
|
||||
AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::dmask);
|
||||
|
||||
auto MIB = BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg);
|
||||
for (unsigned I = 1, E = (*CI.I).getNumOperands(); I != E; ++I) {
|
||||
if (I == DMaskIdx)
|
||||
MIB.addImm(MergedDMask);
|
||||
else
|
||||
MIB.add((*CI.I).getOperand(I));
|
||||
}
|
||||
|
||||
// It shouldn't be possible to get this far if the two instructions
|
||||
// don't have a single memoperand, because MachineInstr::mayAlias()
|
||||
// will return true if this is the case.
|
||||
assert(CI.I->hasOneMemOperand() && CI.Paired->hasOneMemOperand());
|
||||
|
||||
const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
|
||||
const MachineMemOperand *MMOb = *CI.Paired->memoperands_begin();
|
||||
|
||||
MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
|
||||
|
||||
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
|
||||
const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
|
||||
const unsigned SubRegIdx1 = std::get<1>(SubRegIdx);
|
||||
|
||||
// Copy to the old destination registers.
|
||||
const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
|
||||
const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata);
|
||||
const auto *Dest1 = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::vdata);
|
||||
|
||||
BuildMI(*MBB, CI.Paired, DL, CopyDesc)
|
||||
.add(*Dest0) // Copy to same destination including flags and sub reg.
|
||||
.addReg(DestReg, 0, SubRegIdx0);
|
||||
MachineInstr *Copy1 = BuildMI(*MBB, CI.Paired, DL, CopyDesc)
|
||||
.add(*Dest1)
|
||||
.addReg(DestReg, RegState::Kill, SubRegIdx1);
|
||||
|
||||
moveInstsAfter(Copy1, CI.InstsToMove);
|
||||
|
||||
CI.I->eraseFromParent();
|
||||
CI.Paired->eraseFromParent();
|
||||
return New;
|
||||
}
|
||||
|
||||
MachineBasicBlock::iterator
|
||||
SILoadStoreOptimizer::mergeSBufferLoadImmPair(CombineInfo &CI) {
|
||||
MachineBasicBlock *MBB = CI.I->getParent();
|
||||
@@ -1077,6 +1220,9 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI) {
|
||||
case 4:
|
||||
return AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM;
|
||||
}
|
||||
case MIMG:
|
||||
assert("No overlaps" && (countPopulation(CI.DMask0 | CI.DMask1) == Width));
|
||||
return AMDGPU::getMaskedMIMGOp(CI.I->getOpcode(), Width);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1086,7 +1232,13 @@ SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI) {
|
||||
if (CI.Width0 == 0 || CI.Width0 == 0 || CI.Width0 + CI.Width1 > 4)
|
||||
return std::make_pair(0, 0);
|
||||
|
||||
bool ReverseOrder = CI.Offset0 > CI.Offset1;
|
||||
bool ReverseOrder;
|
||||
if (CI.InstClass == MIMG) {
|
||||
assert((countPopulation(CI.DMask0 | CI.DMask1) == CI.Width0 + CI.Width1) &&
|
||||
"No overlaps");
|
||||
ReverseOrder = CI.DMask0 > CI.DMask1;
|
||||
} else
|
||||
ReverseOrder = CI.Offset0 > CI.Offset1;
|
||||
|
||||
static const unsigned Idxs[4][4] = {
|
||||
{AMDGPU::sub0, AMDGPU::sub0_sub1, AMDGPU::sub0_sub1_sub2, AMDGPU::sub0_sub1_sub2_sub3},
|
||||
@@ -1655,6 +1807,15 @@ SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr(
|
||||
OptimizeListAgain |= (CI.Width0 + CI.Width1) < 4;
|
||||
}
|
||||
break;
|
||||
case MIMG:
|
||||
if (findMatchingInst(CI)) {
|
||||
Modified = true;
|
||||
removeCombinedInst(MergeList, *CI.Paired);
|
||||
MachineBasicBlock::iterator NewMI = mergeImagePair(CI);
|
||||
CI.setMI(NewMI, *TII, *STM);
|
||||
OptimizeListAgain |= (CI.Width0 + CI.Width1) < 4;
|
||||
}
|
||||
break;
|
||||
}
|
||||
// Clear the InstsToMove after we have finished searching so we don't have
|
||||
// stale values left over if we search for this CI again in another pass
|
||||
|
||||
@@ -0,0 +1,471 @@
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
# GFX9-LABEL: name: image_load_merged_v1v3
|
||||
# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
|
||||
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
|
||||
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
|
||||
|
||||
name: image_load_merged_v1v3
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
%3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
|
||||
%4:vgpr_32 = COPY %2.sub3
|
||||
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sreg_256, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
%7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sreg_256, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
...
|
||||
---
|
||||
# GFX9-LABEL: name: image_load_merged_v1v3_reversed
|
||||
# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
|
||||
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub3
|
||||
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub0_sub1_sub2
|
||||
|
||||
name: image_load_merged_v1v3_reversed
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
%3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
|
||||
%4:vgpr_32 = COPY %2.sub3
|
||||
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sreg_256, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
%7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sreg_256, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
...
|
||||
---
|
||||
|
||||
# GFX9-LABEL: name: image_load_merged_v2v2
|
||||
# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
|
||||
# GFX9: %{{[0-9]+}}:vreg_64 = COPY %8.sub0_sub1
|
||||
# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub2_sub3
|
||||
|
||||
name: image_load_merged_v2v2
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
%3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
|
||||
%4:vgpr_32 = COPY %2.sub3
|
||||
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%6:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sreg_256, 3, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
|
||||
%7:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sreg_256, 12, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
|
||||
...
|
||||
---
|
||||
|
||||
# GFX9-LABEL: name: image_load_merged_v2v2_reversed
|
||||
# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
|
||||
# GFX9: %{{[0-9]+}}:vreg_64 = COPY %8.sub2_sub3
|
||||
# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub0_sub1
|
||||
|
||||
name: image_load_merged_v2v2_reversed
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
%3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
|
||||
%4:vgpr_32 = COPY %2.sub3
|
||||
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%6:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sreg_256, 12, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
|
||||
%7:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sreg_256, 3, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
|
||||
...
|
||||
---
|
||||
|
||||
# GFX9-LABEL: name: image_load_merged_v3v1
|
||||
# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
|
||||
# GFX9: %{{[0-9]+}}:vreg_96 = COPY %8.sub0_sub1_sub2
|
||||
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub3
|
||||
|
||||
name: image_load_merged_v3v1
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
%3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
|
||||
%4:vgpr_32 = COPY %2.sub3
|
||||
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%6:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sreg_256, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
%7:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sreg_256, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
...
|
||||
---
|
||||
|
||||
# GFX9-LABEL: name: image_load_merged_v3v1_reversed
|
||||
# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
|
||||
# GFX9: %{{[0-9]+}}:vreg_96 = COPY %8.sub1_sub2_sub3
|
||||
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub0
|
||||
|
||||
name: image_load_merged_v3v1_reversed
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
%3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
|
||||
%4:vgpr_32 = COPY %2.sub3
|
||||
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%6:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sreg_256, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
%7:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sreg_256, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
...
|
||||
---
|
||||
|
||||
# GFX9-LABEL: name: image_load_divided_merged
|
||||
# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
|
||||
|
||||
name: image_load_divided_merged
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
%3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
|
||||
%4:vgpr_32 = COPY %2.sub3
|
||||
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sreg_256, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
%7:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%8:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%9:vreg_96 = IMAGE_LOAD_V3_V4 %7:vreg_128, %3:sreg_256, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
%10:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%11:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sreg_256, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
...
|
||||
---
|
||||
|
||||
# GFX9-LABEL: name: image_load_divided_not_merged
|
||||
# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
|
||||
name: image_load_divided_not_merged
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
%3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
|
||||
%4:vreg_128 = COPY %2
|
||||
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sreg_256, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
IMAGE_STORE_V4_V4 %4:vreg_128, %5:vreg_128, %3:sreg_256, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sreg_256, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
...
|
||||
---
|
||||
|
||||
# GFX9-LABEL: name: image_load_dmask_overlapped_not_merged
|
||||
# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 4, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
|
||||
name: image_load_dmask_overlapped_not_merged
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
%3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
|
||||
%4:vgpr_32 = COPY %2.sub3
|
||||
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sreg_256, 4, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
%7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sreg_256, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
...
|
||||
---
|
||||
|
||||
# GFX9-LABEL: name: image_load_dmask_not_disjoint_not_merged
|
||||
# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 4, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 11, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
|
||||
name: image_load_dmask_not_disjoint_not_merged
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
%3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
|
||||
%4:vgpr_32 = COPY %2.sub3
|
||||
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sreg_256, 4, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
%7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sreg_256, 11, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
...
|
||||
---
|
||||
|
||||
# GFX9-LABEL: name: image_load_not_merged_0
|
||||
# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %6, %3, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
|
||||
name: image_load_not_merged_0
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
%3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
|
||||
%4:vgpr_32 = COPY %2.sub3
|
||||
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%7:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
%8:vreg_96 = IMAGE_LOAD_V3_V4 %6, %3, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
...
|
||||
---
|
||||
|
||||
# GFX9-LABEL: name: image_load_not_merged_1
|
||||
# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %6, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %6, %4, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
|
||||
name: image_load_not_merged_1
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
%3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
|
||||
%4:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
|
||||
%5:vgpr_32 = COPY %2.sub3
|
||||
%6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%7:vgpr_32 = IMAGE_LOAD_V1_V4 %6, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
%8:vreg_96 = IMAGE_LOAD_V3_V4 %6, %4, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
...
|
||||
---
|
||||
|
||||
# GFX9-LABEL: name: image_load_not_merged_10
|
||||
# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
|
||||
name: image_load_not_merged_10
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
%3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
|
||||
%4:vgpr_32 = COPY %2.sub3
|
||||
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
%7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
...
|
||||
---
|
||||
|
||||
# GFX9-LABEL: name: image_load_not_merged_3
|
||||
# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
|
||||
name: image_load_not_merged_3
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
%3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
|
||||
%4:vgpr_32 = COPY %2.sub3
|
||||
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
%7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
...
|
||||
---
|
||||
|
||||
# GFX9-LABEL: name: image_load_not_merged_4
|
||||
# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
|
||||
name: image_load_not_merged_4
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
%3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
|
||||
%4:vgpr_32 = COPY %2.sub3
|
||||
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
%7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
...
|
||||
---
|
||||
|
||||
# GFX9-LABEL: name: image_load_not_merged_5
|
||||
# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
|
||||
name: image_load_not_merged_5
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
%3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
|
||||
%4:vgpr_32 = COPY %2.sub3
|
||||
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
%7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
...
|
||||
---
|
||||
|
||||
# GFX9-LABEL: name: image_load_not_merged_6
|
||||
# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
|
||||
name: image_load_not_merged_6
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
%3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
|
||||
%4:vgpr_32 = COPY %2.sub3
|
||||
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
%7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
...
|
||||
---
|
||||
|
||||
# GFX9-LABEL: name: image_load_not_merged_7
|
||||
# GFX9: %{{[0-9]+}}:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
|
||||
# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
|
||||
name: image_load_not_merged_7
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
%3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
|
||||
%4:vgpr_32 = COPY %2.sub3
|
||||
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%6:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
|
||||
%7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
...
|
||||
---
|
||||
|
||||
# GFX9-LABEL: name: image_load_not_merged_8
|
||||
# GFX9: %{{[0-9]+}}:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
|
||||
# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
|
||||
name: image_load_not_merged_8
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
%3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
|
||||
%4:vgpr_32 = COPY %2.sub3
|
||||
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%6:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
|
||||
%7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
...
|
||||
---
|
||||
|
||||
# GFX9-LABEL: name: image_load_not_merged_9
|
||||
# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
|
||||
name: image_load_not_merged_9
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
%3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
|
||||
%4:vgpr_32 = COPY %2.sub3
|
||||
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
%7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
...
|
||||
---
|
||||
|
||||
# GFX9-LABEL: name: image_load_mip_merged_v1v3
|
||||
# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
|
||||
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
|
||||
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
|
||||
|
||||
name: image_load_mip_merged_v1v3
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
%3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
|
||||
%4:vgpr_32 = COPY %2.sub3
|
||||
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%6:vgpr_32 = IMAGE_LOAD_MIP_V1_V4 %5:vreg_128, %3:sreg_256, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
%7:vreg_96 = IMAGE_LOAD_MIP_V3_V4 %5:vreg_128, %3:sreg_256, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
...
|
||||
---
|
||||
|
||||
|
||||
|
||||
# GFX9-LABEL: name: image_load_mip_pck_merged_v1v3
|
||||
# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
|
||||
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
|
||||
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
|
||||
|
||||
name: image_load_mip_pck_merged_v1v3
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
%3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
|
||||
%4:vgpr_32 = COPY %2.sub3
|
||||
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%6:vgpr_32 = IMAGE_LOAD_MIP_PCK_V1_V4 %5:vreg_128, %3:sreg_256, 1, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
%7:vreg_96 = IMAGE_LOAD_MIP_PCK_V3_V4 %5:vreg_128, %3:sreg_256, 14, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
...
|
||||
---
|
||||
|
||||
|
||||
|
||||
# GFX9-LABEL: name: image_load_mip_pck_sgn_merged_v1v3
|
||||
# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_SGN_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
|
||||
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
|
||||
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
|
||||
|
||||
name: image_load_mip_pck_sgn_merged_v1v3
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
%3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
|
||||
%4:vgpr_32 = COPY %2.sub3
|
||||
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%6:vgpr_32 = IMAGE_LOAD_MIP_PCK_SGN_V1_V4 %5:vreg_128, %3:sreg_256, 1, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
%7:vreg_96 = IMAGE_LOAD_MIP_PCK_SGN_V3_V4 %5:vreg_128, %3:sreg_256, 14, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
...
|
||||
---
|
||||
|
||||
# GFX9-LABEL: name: image_load_pck_merged_v1v3
|
||||
# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
|
||||
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
|
||||
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
|
||||
|
||||
name: image_load_pck_merged_v1v3
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
%3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
|
||||
%4:vgpr_32 = COPY %2.sub3
|
||||
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%6:vgpr_32 = IMAGE_LOAD_PCK_V1_V4 %5:vreg_128, %3:sreg_256, 1, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
%7:vreg_96 = IMAGE_LOAD_PCK_V3_V4 %5:vreg_128, %3:sreg_256, 14, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
...
|
||||
---
|
||||
|
||||
# GFX9-LABEL: name: image_load_pck_sgn_merged_v1v3
|
||||
# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_SGN_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
|
||||
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
|
||||
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
|
||||
|
||||
name: image_load_pck_sgn_merged_v1v3
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
||||
%3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
|
||||
%4:vgpr_32 = COPY %2.sub3
|
||||
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
|
||||
%6:vgpr_32 = IMAGE_LOAD_PCK_SGN_V1_V4 %5:vreg_128, %3:sreg_256, 1, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
%7:vreg_96 = IMAGE_LOAD_PCK_SGN_V3_V4 %5:vreg_128, %3:sreg_256, 14, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
|
||||
...
|
||||
---
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user