mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-03-05 02:49:18 +00:00
AMDGPU: Constant fold when immediate is materialized
In future commits these patterns will appear after moveToVALU changes. llvm-svn: 291615
This commit is contained in:
parent
740f03ad29
commit
6b917afcf9
@ -25,25 +25,6 @@ using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class SIFoldOperands : public MachineFunctionPass {
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
public:
|
||||
SIFoldOperands() : MachineFunctionPass(ID) {
|
||||
initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
|
||||
StringRef getPassName() const override { return "SI Fold Operands"; }
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.setPreservesCFG();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
};
|
||||
|
||||
struct FoldCandidate {
|
||||
MachineInstr *UseMI;
|
||||
union {
|
||||
@ -79,6 +60,36 @@ struct FoldCandidate {
|
||||
}
|
||||
};
|
||||
|
||||
class SIFoldOperands : public MachineFunctionPass {
|
||||
public:
|
||||
static char ID;
|
||||
MachineRegisterInfo *MRI;
|
||||
const SIInstrInfo *TII;
|
||||
const SIRegisterInfo *TRI;
|
||||
|
||||
void foldOperand(MachineOperand &OpToFold,
|
||||
MachineInstr *UseMI,
|
||||
unsigned UseOpIdx,
|
||||
SmallVectorImpl<FoldCandidate> &FoldList,
|
||||
SmallVectorImpl<MachineInstr *> &CopiesToReplace) const;
|
||||
|
||||
void foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const;
|
||||
|
||||
public:
|
||||
SIFoldOperands() : MachineFunctionPass(ID) {
|
||||
initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
|
||||
StringRef getPassName() const override { return "SI Fold Operands"; }
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.setPreservesCFG();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
};
|
||||
|
||||
} // End anonymous namespace.
|
||||
|
||||
INITIALIZE_PASS(SIFoldOperands, DEBUG_TYPE,
|
||||
@ -141,7 +152,7 @@ static bool updateOperand(FoldCandidate &Fold,
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool isUseMIInFoldList(const std::vector<FoldCandidate> &FoldList,
|
||||
static bool isUseMIInFoldList(ArrayRef<FoldCandidate> FoldList,
|
||||
const MachineInstr *MI) {
|
||||
for (auto Candidate : FoldList) {
|
||||
if (Candidate.UseMI == MI)
|
||||
@ -150,7 +161,7 @@ static bool isUseMIInFoldList(const std::vector<FoldCandidate> &FoldList,
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
|
||||
static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
|
||||
MachineInstr *MI, unsigned OpNo,
|
||||
MachineOperand *OpToFold,
|
||||
const SIInstrInfo *TII) {
|
||||
@ -227,12 +238,12 @@ static bool isUseSafeToFold(const MachineInstr &MI,
|
||||
//return !MI.hasRegisterImplicitUseOperand(UseMO.getReg());
|
||||
}
|
||||
|
||||
static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
|
||||
unsigned UseOpIdx,
|
||||
std::vector<FoldCandidate> &FoldList,
|
||||
SmallVectorImpl<MachineInstr *> &CopiesToReplace,
|
||||
const SIInstrInfo *TII, const SIRegisterInfo &TRI,
|
||||
MachineRegisterInfo &MRI) {
|
||||
void SIFoldOperands::foldOperand(
|
||||
MachineOperand &OpToFold,
|
||||
MachineInstr *UseMI,
|
||||
unsigned UseOpIdx,
|
||||
SmallVectorImpl<FoldCandidate> &FoldList,
|
||||
SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
|
||||
const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
|
||||
|
||||
if (!isUseSafeToFold(*UseMI, UseOp))
|
||||
@ -264,7 +275,7 @@ static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
|
||||
unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
|
||||
|
||||
for (MachineRegisterInfo::use_iterator
|
||||
RSUse = MRI.use_begin(RegSeqDstReg), RSE = MRI.use_end();
|
||||
RSUse = MRI->use_begin(RegSeqDstReg), RSE = MRI->use_end();
|
||||
RSUse != RSE; ++RSUse) {
|
||||
|
||||
MachineInstr *RSUseMI = RSUse->getParent();
|
||||
@ -272,7 +283,7 @@ static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
|
||||
continue;
|
||||
|
||||
foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
|
||||
CopiesToReplace, TII, TRI, MRI);
|
||||
CopiesToReplace);
|
||||
}
|
||||
|
||||
return;
|
||||
@ -287,8 +298,8 @@ static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
|
||||
unsigned DestReg = UseMI->getOperand(0).getReg();
|
||||
const TargetRegisterClass *DestRC
|
||||
= TargetRegisterInfo::isVirtualRegister(DestReg) ?
|
||||
MRI.getRegClass(DestReg) :
|
||||
TRI.getPhysRegClass(DestReg);
|
||||
MRI->getRegClass(DestReg) :
|
||||
TRI->getPhysRegClass(DestReg);
|
||||
|
||||
unsigned MovOp = TII->getMovOpcode(DestRC);
|
||||
if (MovOp == AMDGPU::COPY)
|
||||
@ -318,7 +329,7 @@ static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
|
||||
|
||||
const MCInstrDesc &FoldDesc = OpToFold.getParent()->getDesc();
|
||||
const TargetRegisterClass *FoldRC =
|
||||
TRI.getRegClass(FoldDesc.OpInfo[0].RegClass);
|
||||
TRI->getRegClass(FoldDesc.OpInfo[0].RegClass);
|
||||
|
||||
APInt Imm(TII->operandBitWidth(FoldDesc.OpInfo[1].OperandType),
|
||||
OpToFold.getImm());
|
||||
@ -328,8 +339,8 @@ static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
|
||||
unsigned UseReg = UseOp.getReg();
|
||||
const TargetRegisterClass *UseRC
|
||||
= TargetRegisterInfo::isVirtualRegister(UseReg) ?
|
||||
MRI.getRegClass(UseReg) :
|
||||
TRI.getPhysRegClass(UseReg);
|
||||
MRI->getRegClass(UseReg) :
|
||||
TRI->getPhysRegClass(UseReg);
|
||||
|
||||
assert(Imm.getBitWidth() == 64);
|
||||
|
||||
@ -349,20 +360,51 @@ static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
|
||||
}
|
||||
|
||||
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result,
|
||||
int32_t LHS, int32_t RHS) {
|
||||
uint32_t LHS, uint32_t RHS) {
|
||||
switch (Opcode) {
|
||||
case AMDGPU::V_AND_B32_e64:
|
||||
case AMDGPU::V_AND_B32_e32:
|
||||
case AMDGPU::S_AND_B32:
|
||||
Result = LHS & RHS;
|
||||
return true;
|
||||
case AMDGPU::V_OR_B32_e64:
|
||||
case AMDGPU::V_OR_B32_e32:
|
||||
case AMDGPU::S_OR_B32:
|
||||
Result = LHS | RHS;
|
||||
return true;
|
||||
case AMDGPU::V_XOR_B32_e64:
|
||||
case AMDGPU::V_XOR_B32_e32:
|
||||
case AMDGPU::S_XOR_B32:
|
||||
Result = LHS ^ RHS;
|
||||
return true;
|
||||
case AMDGPU::V_LSHL_B32_e64:
|
||||
case AMDGPU::V_LSHL_B32_e32:
|
||||
case AMDGPU::S_LSHL_B32:
|
||||
// The instruction ignores the high bits for out of bounds shifts.
|
||||
Result = LHS << (RHS & 31);
|
||||
return true;
|
||||
case AMDGPU::V_LSHLREV_B32_e64:
|
||||
case AMDGPU::V_LSHLREV_B32_e32:
|
||||
Result = RHS << (LHS & 31);
|
||||
return true;
|
||||
case AMDGPU::V_LSHR_B32_e64:
|
||||
case AMDGPU::V_LSHR_B32_e32:
|
||||
case AMDGPU::S_LSHR_B32:
|
||||
Result = LHS >> (RHS & 31);
|
||||
return true;
|
||||
case AMDGPU::V_LSHRREV_B32_e64:
|
||||
case AMDGPU::V_LSHRREV_B32_e32:
|
||||
Result = RHS >> (LHS & 31);
|
||||
return true;
|
||||
case AMDGPU::V_ASHR_I32_e64:
|
||||
case AMDGPU::V_ASHR_I32_e32:
|
||||
case AMDGPU::S_ASHR_I32:
|
||||
Result = static_cast<int32_t>(LHS) >> (RHS & 31);
|
||||
return true;
|
||||
case AMDGPU::V_ASHRREV_I32_e64:
|
||||
case AMDGPU::V_ASHRREV_I32_e32:
|
||||
Result = static_cast<int32_t>(RHS) >> (LHS & 31);
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
@ -390,33 +432,47 @@ static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
|
||||
stripExtraCopyOperands(MI);
|
||||
}
|
||||
|
||||
// Try to simplify operations with a constant that may appear after instruction
|
||||
// selection.
|
||||
static bool tryConstantFoldOp(MachineRegisterInfo &MRI,
|
||||
const SIInstrInfo *TII,
|
||||
MachineInstr *MI) {
|
||||
unsigned Opc = MI->getOpcode();
|
||||
static MachineOperand *getImmOrMaterializedImm(MachineRegisterInfo &MRI,
|
||||
MachineOperand &Op) {
|
||||
if (Op.isReg()) {
|
||||
// If this has a subregister, it obviously is a register source.
|
||||
if (Op.getSubReg() != AMDGPU::NoSubRegister)
|
||||
return &Op;
|
||||
|
||||
if (Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
|
||||
Opc == AMDGPU::S_NOT_B32) {
|
||||
MachineOperand &Src0 = MI->getOperand(1);
|
||||
if (Src0.isImm()) {
|
||||
Src0.setImm(~Src0.getImm());
|
||||
mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32)));
|
||||
return true;
|
||||
MachineInstr *Def = MRI.getVRegDef(Op.getReg());
|
||||
if (Def->isMoveImmediate()) {
|
||||
MachineOperand &ImmSrc = Def->getOperand(1);
|
||||
if (ImmSrc.isImm())
|
||||
return &ImmSrc;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!MI->isCommutable())
|
||||
return &Op;
|
||||
}
|
||||
|
||||
// Try to simplify operations with a constant that may appear after instruction
|
||||
// selection.
|
||||
// TODO: See if a frame index with a fixed offset can fold.
|
||||
static bool tryConstantFoldOp(MachineRegisterInfo &MRI,
|
||||
const SIInstrInfo *TII,
|
||||
MachineInstr *MI,
|
||||
MachineOperand *ImmOp) {
|
||||
unsigned Opc = MI->getOpcode();
|
||||
if (Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
|
||||
Opc == AMDGPU::S_NOT_B32) {
|
||||
MI->getOperand(1).ChangeToImmediate(~ImmOp->getImm());
|
||||
mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32)));
|
||||
return true;
|
||||
}
|
||||
|
||||
int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
|
||||
if (Src1Idx == -1)
|
||||
return false;
|
||||
|
||||
int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
|
||||
int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
|
||||
MachineOperand *Src0 = getImmOrMaterializedImm(MRI, MI->getOperand(Src0Idx));
|
||||
MachineOperand *Src1 = getImmOrMaterializedImm(MRI, MI->getOperand(Src1Idx));
|
||||
|
||||
MachineOperand *Src0 = &MI->getOperand(Src0Idx);
|
||||
MachineOperand *Src1 = &MI->getOperand(Src1Idx);
|
||||
if (!Src0->isImm() && !Src1->isImm())
|
||||
return false;
|
||||
|
||||
@ -431,19 +487,26 @@ static bool tryConstantFoldOp(MachineRegisterInfo &MRI,
|
||||
const SIRegisterInfo &TRI = TII->getRegisterInfo();
|
||||
bool IsSGPR = TRI.isSGPRReg(MRI, MI->getOperand(0).getReg());
|
||||
|
||||
Src0->setImm(NewImm);
|
||||
// Be careful to change the right operand, src0 may belong to a different
|
||||
// instruction.
|
||||
MI->getOperand(Src0Idx).ChangeToImmediate(NewImm);
|
||||
MI->RemoveOperand(Src1Idx);
|
||||
mutateCopyOp(*MI, TII->get(getMovOpc(IsSGPR)));
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!MI->isCommutable())
|
||||
return false;
|
||||
|
||||
if (Src0->isImm() && !Src1->isImm()) {
|
||||
std::swap(Src0, Src1);
|
||||
std::swap(Src0Idx, Src1Idx);
|
||||
}
|
||||
|
||||
int32_t Src1Val = static_cast<int32_t>(Src1->getImm());
|
||||
if (Opc == AMDGPU::V_OR_B32_e64 || Opc == AMDGPU::S_OR_B32) {
|
||||
if (Opc == AMDGPU::V_OR_B32_e64 ||
|
||||
Opc == AMDGPU::V_OR_B32_e32 ||
|
||||
Opc == AMDGPU::S_OR_B32) {
|
||||
if (Src1Val == 0) {
|
||||
// y = or x, 0 => y = copy x
|
||||
MI->RemoveOperand(Src1Idx);
|
||||
@ -459,6 +522,7 @@ static bool tryConstantFoldOp(MachineRegisterInfo &MRI,
|
||||
}
|
||||
|
||||
if (MI->getOpcode() == AMDGPU::V_AND_B32_e64 ||
|
||||
MI->getOpcode() == AMDGPU::V_AND_B32_e32 ||
|
||||
MI->getOpcode() == AMDGPU::S_AND_B32) {
|
||||
if (Src1Val == 0) {
|
||||
// y = and x, 0 => y = v_mov_b32 0
|
||||
@ -476,29 +540,136 @@ static bool tryConstantFoldOp(MachineRegisterInfo &MRI,
|
||||
}
|
||||
|
||||
if (MI->getOpcode() == AMDGPU::V_XOR_B32_e64 ||
|
||||
MI->getOpcode() == AMDGPU::V_XOR_B32_e32 ||
|
||||
MI->getOpcode() == AMDGPU::S_XOR_B32) {
|
||||
if (Src1Val == 0) {
|
||||
// y = xor x, 0 => y = copy x
|
||||
MI->RemoveOperand(Src1Idx);
|
||||
mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void SIFoldOperands::foldInstOperand(MachineInstr &MI,
|
||||
MachineOperand &OpToFold) const {
|
||||
// We need mutate the operands of new mov instructions to add implicit
|
||||
// uses of EXEC, but adding them invalidates the use_iterator, so defer
|
||||
// this.
|
||||
SmallVector<MachineInstr *, 4> CopiesToReplace;
|
||||
SmallVector<FoldCandidate, 4> FoldList;
|
||||
MachineOperand &Dst = MI.getOperand(0);
|
||||
|
||||
bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
|
||||
if (FoldingImm) {
|
||||
unsigned NumLiteralUses = 0;
|
||||
MachineOperand *NonInlineUse = nullptr;
|
||||
int NonInlineUseOpNo = -1;
|
||||
|
||||
MachineRegisterInfo::use_iterator NextUse, NextInstUse;
|
||||
for (MachineRegisterInfo::use_iterator
|
||||
Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
|
||||
Use != E; Use = NextUse) {
|
||||
NextUse = std::next(Use);
|
||||
MachineInstr *UseMI = Use->getParent();
|
||||
unsigned OpNo = Use.getOperandNo();
|
||||
|
||||
// Folding the immediate may reveal operations that can be constant
|
||||
// folded or replaced with a copy. This can happen for example after
|
||||
// frame indices are lowered to constants or from splitting 64-bit
|
||||
// constants.
|
||||
//
|
||||
// We may also encounter cases where one or both operands are
|
||||
// immediates materialized into a register, which would ordinarily not
|
||||
// be folded due to multiple uses or operand constraints.
|
||||
|
||||
if (OpToFold.isImm() && tryConstantFoldOp(*MRI, TII, UseMI, &OpToFold)) {
|
||||
DEBUG(dbgs() << "Constant folded " << *UseMI <<'\n');
|
||||
|
||||
// Some constant folding cases change the same immediate's use to a new
|
||||
// instruction, e.g. and x, 0 -> 0. Make sure we re-visit the user
|
||||
// again. The same constant folded instruction could also have a second
|
||||
// use operand.
|
||||
NextUse = MRI->use_begin(Dst.getReg());
|
||||
continue;
|
||||
}
|
||||
|
||||
// Try to fold any inline immediate uses, and then only fold other
|
||||
// constants if they have one use.
|
||||
//
|
||||
// The legality of the inline immediate must be checked based on the use
|
||||
// operand, not the defining instruction, because 32-bit instructions
|
||||
// with 32-bit inline immediate sources may be used to materialize
|
||||
// constants used in 16-bit operands.
|
||||
//
|
||||
// e.g. it is unsafe to fold:
|
||||
// s_mov_b32 s0, 1.0 // materializes 0x3f800000
|
||||
// v_add_f16 v0, v1, s0 // 1.0 f16 inline immediate sees 0x00003c00
|
||||
|
||||
// Folding immediates with more than one use will increase program size.
|
||||
// FIXME: This will also reduce register usage, which may be better
|
||||
// in some cases. A better heuristic is needed.
|
||||
if (TII->isInlineConstant(*UseMI, OpNo, OpToFold)) {
|
||||
foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace);
|
||||
} else {
|
||||
if (++NumLiteralUses == 1) {
|
||||
NonInlineUse = &*Use;
|
||||
NonInlineUseOpNo = OpNo;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (NumLiteralUses == 1) {
|
||||
MachineInstr *UseMI = NonInlineUse->getParent();
|
||||
foldOperand(OpToFold, UseMI, NonInlineUseOpNo, FoldList, CopiesToReplace);
|
||||
}
|
||||
} else {
|
||||
// Folding register.
|
||||
for (MachineRegisterInfo::use_iterator
|
||||
Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
|
||||
Use != E; ++Use) {
|
||||
MachineInstr *UseMI = Use->getParent();
|
||||
|
||||
foldOperand(OpToFold, UseMI, Use.getOperandNo(),
|
||||
FoldList, CopiesToReplace);
|
||||
}
|
||||
}
|
||||
|
||||
MachineFunction *MF = MI.getParent()->getParent();
|
||||
// Make sure we add EXEC uses to any new v_mov instructions created.
|
||||
for (MachineInstr *Copy : CopiesToReplace)
|
||||
Copy->addImplicitDefUseOperands(*MF);
|
||||
|
||||
for (FoldCandidate &Fold : FoldList) {
|
||||
if (updateOperand(Fold, *TRI)) {
|
||||
// Clear kill flags.
|
||||
if (Fold.isReg()) {
|
||||
assert(Fold.OpToFold && Fold.OpToFold->isReg());
|
||||
// FIXME: Probably shouldn't bother trying to fold if not an
|
||||
// SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
|
||||
// copies.
|
||||
MRI->clearKillFlags(Fold.OpToFold->getReg());
|
||||
}
|
||||
DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
|
||||
static_cast<int>(Fold.UseOpNo) << " of " << *Fold.UseMI << '\n');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
|
||||
if (skipFunction(*MF.getFunction()))
|
||||
return false;
|
||||
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
const SIRegisterInfo &TRI = TII->getRegisterInfo();
|
||||
MRI = &MF.getRegInfo();
|
||||
TII = ST.getInstrInfo();
|
||||
TRI = &TII->getRegisterInfo();
|
||||
|
||||
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
|
||||
BI != BE; ++BI) {
|
||||
BI != BE; ++BI) {
|
||||
|
||||
MachineBasicBlock &MBB = *BI;
|
||||
MachineBasicBlock::iterator I, Next;
|
||||
@ -512,8 +683,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
|
||||
MachineOperand &OpToFold = MI.getOperand(1);
|
||||
bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
|
||||
|
||||
// FIXME: We could also be folding things like FrameIndexes and
|
||||
// TargetIndexes.
|
||||
// FIXME: We could also be folding things like TargetIndexes.
|
||||
if (!FoldingImm && !OpToFold.isReg())
|
||||
continue;
|
||||
|
||||
@ -532,90 +702,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
|
||||
!TargetRegisterInfo::isVirtualRegister(Dst.getReg()))
|
||||
continue;
|
||||
|
||||
// We need mutate the operands of new mov instructions to add implicit
|
||||
// uses of EXEC, but adding them invalidates the use_iterator, so defer
|
||||
// this.
|
||||
SmallVector<MachineInstr *, 4> CopiesToReplace;
|
||||
|
||||
std::vector<FoldCandidate> FoldList;
|
||||
if (FoldingImm) {
|
||||
unsigned NumLiteralUses = 0;
|
||||
MachineOperand *NonInlineUse = nullptr;
|
||||
int NonInlineUseOpNo = -1;
|
||||
|
||||
// Try to fold any inline immediate uses, and then only fold other
|
||||
// constants if they have one use.
|
||||
//
|
||||
// The legality of the inline immediate must be checked based on the use
|
||||
// operand, not the defining instruction, because 32-bit instructions
|
||||
// with 32-bit inline immediate sources may be used to materialize
|
||||
// constants used in 16-bit operands.
|
||||
//
|
||||
// e.g. it is unsafe to fold:
|
||||
// s_mov_b32 s0, 1.0 // materializes 0x3f800000
|
||||
// v_add_f16 v0, v1, s0 // 1.0 f16 inline immediate sees 0x00003c00
|
||||
|
||||
// Folding immediates with more than one use will increase program size.
|
||||
// FIXME: This will also reduce register usage, which may be better
|
||||
// in some cases. A better heuristic is needed.
|
||||
for (MachineRegisterInfo::use_iterator
|
||||
Use = MRI.use_begin(Dst.getReg()), E = MRI.use_end();
|
||||
Use != E; ++Use) {
|
||||
MachineInstr *UseMI = Use->getParent();
|
||||
unsigned OpNo = Use.getOperandNo();
|
||||
|
||||
if (TII->isInlineConstant(*UseMI, OpNo, OpToFold)) {
|
||||
foldOperand(OpToFold, UseMI, OpNo, FoldList,
|
||||
CopiesToReplace, TII, TRI, MRI);
|
||||
} else {
|
||||
if (++NumLiteralUses == 1) {
|
||||
NonInlineUse = &*Use;
|
||||
NonInlineUseOpNo = OpNo;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (NumLiteralUses == 1) {
|
||||
MachineInstr *UseMI = NonInlineUse->getParent();
|
||||
foldOperand(OpToFold, UseMI, NonInlineUseOpNo, FoldList,
|
||||
CopiesToReplace, TII, TRI, MRI);
|
||||
}
|
||||
} else {
|
||||
// Folding register.
|
||||
for (MachineRegisterInfo::use_iterator
|
||||
Use = MRI.use_begin(Dst.getReg()), E = MRI.use_end();
|
||||
Use != E; ++Use) {
|
||||
MachineInstr *UseMI = Use->getParent();
|
||||
|
||||
foldOperand(OpToFold, UseMI, Use.getOperandNo(), FoldList,
|
||||
CopiesToReplace, TII, TRI, MRI);
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure we add EXEC uses to any new v_mov instructions created.
|
||||
for (MachineInstr *Copy : CopiesToReplace)
|
||||
Copy->addImplicitDefUseOperands(MF);
|
||||
|
||||
for (FoldCandidate &Fold : FoldList) {
|
||||
if (updateOperand(Fold, TRI)) {
|
||||
// Clear kill flags.
|
||||
if (Fold.isReg()) {
|
||||
assert(Fold.OpToFold && Fold.OpToFold->isReg());
|
||||
// FIXME: Probably shouldn't bother trying to fold if not an
|
||||
// SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
|
||||
// copies.
|
||||
MRI.clearKillFlags(Fold.OpToFold->getReg());
|
||||
}
|
||||
DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
|
||||
static_cast<int>(Fold.UseOpNo) << " of " << *Fold.UseMI << '\n');
|
||||
|
||||
// Folding the immediate may reveal operations that can be constant
|
||||
// folded or replaced with a copy. This can happen for example after
|
||||
// frame indices are lowered to constants or from splitting 64-bit
|
||||
// constants.
|
||||
tryConstantFoldOp(MRI, TII, Fold.UseMI);
|
||||
}
|
||||
}
|
||||
foldInstOperand(MI, OpToFold);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
|
858
test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
Normal file
858
test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
Normal file
@ -0,0 +1,858 @@
|
||||
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=hawaii -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination -o - %s | FileCheck -check-prefix=GCN %s
|
||||
--- |
|
||||
define void @s_fold_and_imm_regimm_32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
|
||||
%and = and i32 %a, 1234567
|
||||
store volatile i32 %and, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @v_fold_and_imm_regimm_32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%idxprom = sext i32 %tid to i64
|
||||
%gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i64 %idxprom
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i64 %idxprom
|
||||
%a = load i32, i32 addrspace(1)* %gep.a
|
||||
%and = and i32 %a, 1234567
|
||||
store i32 %and, i32 addrspace(1)* %gep.out
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @s_fold_shl_imm_regimm_32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
|
||||
%shl = shl i32 %a, 12
|
||||
store volatile i32 %shl, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @v_fold_shl_imm_regimm_32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%idxprom = sext i32 %tid to i64
|
||||
%gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i64 %idxprom
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i64 %idxprom
|
||||
%a = load i32, i32 addrspace(1)* %gep.a
|
||||
%shl = shl i32 %a, 12
|
||||
store i32 %shl, i32 addrspace(1)* %gep.out
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @s_fold_ashr_imm_regimm_32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
|
||||
%ashr = ashr i32 %a, 12
|
||||
store volatile i32 %ashr, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @v_fold_ashr_imm_regimm_32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%idxprom = sext i32 %tid to i64
|
||||
%gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i64 %idxprom
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i64 %idxprom
|
||||
%a = load i32, i32 addrspace(1)* %gep.a
|
||||
%ashr = ashr i32 %a, 12
|
||||
store i32 %ashr, i32 addrspace(1)* %gep.out
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @s_fold_lshr_imm_regimm_32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
|
||||
%lshr = lshr i32 %a, 12
|
||||
store volatile i32 %lshr, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @v_fold_lshr_imm_regimm_32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%idxprom = sext i32 %tid to i64
|
||||
%gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i64 %idxprom
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i64 %idxprom
|
||||
%a = load i32, i32 addrspace(1)* %gep.a
|
||||
%lshr = lshr i32 %a, 12
|
||||
store i32 %lshr, i32 addrspace(1)* %gep.out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: s_fold_and_imm_regimm_32{{$}}
|
||||
# GCN: %10 = V_MOV_B32_e32 1543, implicit %exec
|
||||
# GCN: BUFFER_STORE_DWORD_OFFSET killed %10,
|
||||
name: s_fold_and_imm_regimm_32
|
||||
alignment: 0
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: sgpr_64 }
|
||||
- { id: 1, class: sreg_64_xexec }
|
||||
- { id: 2, class: sreg_32_xm0 }
|
||||
- { id: 3, class: sreg_32_xm0 }
|
||||
- { id: 4, class: sreg_32_xm0 }
|
||||
- { id: 5, class: sreg_32_xm0 }
|
||||
- { id: 6, class: sreg_128 }
|
||||
- { id: 7, class: sreg_32_xm0 }
|
||||
- { id: 8, class: sreg_32_xm0 }
|
||||
- { id: 9, class: sreg_32_xm0 }
|
||||
- { id: 10, class: vgpr_32 }
|
||||
liveins:
|
||||
- { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 0
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
maxCallFrameSize: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
body: |
|
||||
bb.0 (%ir-block.0):
|
||||
liveins: %sgpr0_sgpr1
|
||||
|
||||
%0 = COPY %sgpr0_sgpr1
|
||||
%1 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
|
||||
%2 = COPY %1.sub1
|
||||
%3 = COPY %1.sub0
|
||||
%4 = S_MOV_B32 61440
|
||||
%5 = S_MOV_B32 -1
|
||||
%6 = REG_SEQUENCE killed %2, 1, killed %3, 2, killed %4, 3, killed %5, 4
|
||||
%7 = S_MOV_B32 1234567
|
||||
%8 = S_MOV_B32 9999
|
||||
%9 = S_AND_B32 killed %7, killed %8, implicit-def dead %scc
|
||||
%10 = COPY %9
|
||||
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out)
|
||||
S_ENDPGM
|
||||
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: v_fold_and_imm_regimm_32{{$}}
|
||||
|
||||
# GCN: %9 = V_MOV_B32_e32 646, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %19, %9,
|
||||
|
||||
# GCN: %10 = V_MOV_B32_e32 646, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %19, %10
|
||||
|
||||
# GCN: %11 = V_MOV_B32_e32 646, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %19, %11,
|
||||
|
||||
# GCN: %12 = V_MOV_B32_e32 1234567, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %19, %12,
|
||||
|
||||
# GCN: %13 = V_MOV_B32_e32 63, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %19, %13,
|
||||
|
||||
name: v_fold_and_imm_regimm_32
|
||||
alignment: 0
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: sgpr_64 }
|
||||
- { id: 1, class: sreg_32_xm0 }
|
||||
- { id: 2, class: sgpr_32 }
|
||||
- { id: 3, class: vgpr_32 }
|
||||
- { id: 4, class: sreg_64_xexec }
|
||||
- { id: 20, class: sreg_32_xm0 }
|
||||
- { id: 24, class: vgpr_32 }
|
||||
- { id: 25, class: vreg_64 }
|
||||
- { id: 26, class: sreg_32_xm0 }
|
||||
- { id: 27, class: vgpr_32 }
|
||||
- { id: 28, class: vgpr_32 }
|
||||
- { id: 29, class: vgpr_32 }
|
||||
- { id: 30, class: vgpr_32 }
|
||||
- { id: 31, class: vgpr_32 }
|
||||
- { id: 32, class: vreg_64 }
|
||||
- { id: 33, class: vreg_64 }
|
||||
- { id: 34, class: vgpr_32 }
|
||||
- { id: 35, class: vgpr_32 }
|
||||
- { id: 36, class: vgpr_32 }
|
||||
- { id: 37, class: vreg_64 }
|
||||
- { id: 44, class: vgpr_32 }
|
||||
|
||||
liveins:
|
||||
- { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
|
||||
- { reg: '%vgpr0', virtual-reg: '%3' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 0
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
maxCallFrameSize: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
body: |
|
||||
bb.0 (%ir-block.0):
|
||||
liveins: %sgpr0_sgpr1, %vgpr0
|
||||
|
||||
%3 = COPY %vgpr0
|
||||
%0 = COPY %sgpr0_sgpr1
|
||||
%4 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
|
||||
%31 = V_ASHRREV_I32_e64 31, %3, implicit %exec
|
||||
%32 = REG_SEQUENCE %3, 1, %31, 2
|
||||
%33 = V_LSHLREV_B64 2, killed %32, implicit %exec
|
||||
%20 = COPY %4.sub1
|
||||
%44 = V_ADD_I32_e32 %4.sub0, %33.sub0, implicit-def %vcc, implicit %exec
|
||||
%36 = COPY killed %20
|
||||
%35 = V_ADDC_U32_e32 %33.sub1, %36, implicit-def %vcc, implicit %vcc, implicit %exec
|
||||
%37 = REG_SEQUENCE %44, 1, killed %35, 2
|
||||
%24 = V_MOV_B32_e32 982, implicit %exec
|
||||
%26 = S_MOV_B32 1234567
|
||||
%34 = V_MOV_B32_e32 63, implicit %exec
|
||||
|
||||
%27 = V_AND_B32_e64 %26, %24, implicit %exec
|
||||
FLAT_STORE_DWORD %37, %27, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%28 = V_AND_B32_e64 %24, %26, implicit %exec
|
||||
FLAT_STORE_DWORD %37, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%29 = V_AND_B32_e32 %26, %24, implicit %exec
|
||||
FLAT_STORE_DWORD %37, %29, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%30 = V_AND_B32_e64 %26, %26, implicit %exec
|
||||
FLAT_STORE_DWORD %37, %30, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%31 = V_AND_B32_e64 %34, %34, implicit %exec
|
||||
FLAT_STORE_DWORD %37, %31, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
S_ENDPGM
|
||||
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: s_fold_shl_imm_regimm_32{{$}}
|
||||
# GC1: %13 = V_MOV_B32_e32 4096, implicit %exec
|
||||
# GCN: BUFFER_STORE_DWORD_OFFSET killed %13,
|
||||
|
||||
name: s_fold_shl_imm_regimm_32
|
||||
alignment: 0
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: sgpr_64 }
|
||||
- { id: 1, class: sreg_32_xm0 }
|
||||
- { id: 2, class: sgpr_32 }
|
||||
- { id: 3, class: vgpr_32 }
|
||||
- { id: 4, class: sreg_64_xexec }
|
||||
- { id: 5, class: sreg_32_xm0_xexec }
|
||||
- { id: 6, class: sreg_32_xm0 }
|
||||
- { id: 7, class: sreg_32_xm0 }
|
||||
- { id: 8, class: sreg_32_xm0 }
|
||||
- { id: 9, class: sreg_32_xm0 }
|
||||
- { id: 10, class: sreg_128 }
|
||||
- { id: 11, class: sreg_32_xm0 }
|
||||
- { id: 12, class: sreg_32_xm0 }
|
||||
- { id: 13, class: vgpr_32 }
|
||||
liveins:
|
||||
- { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 0
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
maxCallFrameSize: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
body: |
|
||||
bb.0 (%ir-block.0):
|
||||
liveins: %sgpr0_sgpr1
|
||||
|
||||
%0 = COPY %sgpr0_sgpr1
|
||||
%4 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
|
||||
%5 = S_MOV_B32 1
|
||||
%6 = COPY %4.sub1
|
||||
%7 = COPY %4.sub0
|
||||
%8 = S_MOV_B32 61440
|
||||
%9 = S_MOV_B32 -1
|
||||
%10 = REG_SEQUENCE killed %7, 1, killed %6, 2, killed %9, 3, killed %8, 4
|
||||
%12 = S_LSHL_B32 killed %5, 12, implicit-def dead %scc
|
||||
%13 = COPY %12
|
||||
BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out)
|
||||
S_ENDPGM
|
||||
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: v_fold_shl_imm_regimm_32{{$}}
|
||||
|
||||
# GCN: %11 = V_MOV_B32_e32 40955904, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %11,
|
||||
|
||||
# GCN: %12 = V_MOV_B32_e32 24, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %12,
|
||||
|
||||
# GCN: %13 = V_MOV_B32_e32 4096, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %13,
|
||||
|
||||
# GCN: %14 = V_MOV_B32_e32 24, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %14,
|
||||
|
||||
# GCN: %15 = V_MOV_B32_e32 0, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %15,
|
||||
|
||||
# GCN: %22 = V_MOV_B32_e32 4096, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %22,
|
||||
|
||||
# GCN: %23 = V_MOV_B32_e32 1, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %23,
|
||||
|
||||
# GCN: %25 = V_MOV_B32_e32 2, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %25,
|
||||
|
||||
# GCN: %26 = V_MOV_B32_e32 7927808, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %26,
|
||||
|
||||
# GCN: %28 = V_MOV_B32_e32 -8, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %28,
|
||||
|
||||
name: v_fold_shl_imm_regimm_32
|
||||
alignment: 0
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: sgpr_64 }
|
||||
- { id: 1, class: sreg_32_xm0 }
|
||||
- { id: 2, class: vgpr_32 }
|
||||
- { id: 3, class: sreg_64_xexec }
|
||||
- { id: 4, class: sreg_64_xexec }
|
||||
- { id: 5, class: sreg_32_xm0 }
|
||||
- { id: 6, class: vgpr_32 }
|
||||
- { id: 7, class: sreg_32_xm0 }
|
||||
- { id: 8, class: sreg_64 }
|
||||
- { id: 9, class: sreg_32_xm0 }
|
||||
- { id: 10, class: vgpr_32 }
|
||||
- { id: 11, class: vgpr_32 }
|
||||
- { id: 12, class: vgpr_32 }
|
||||
- { id: 13, class: vgpr_32 }
|
||||
- { id: 14, class: vgpr_32 }
|
||||
- { id: 15, class: vgpr_32 }
|
||||
- { id: 16, class: vreg_64 }
|
||||
- { id: 17, class: vreg_64 }
|
||||
- { id: 18, class: vgpr_32 }
|
||||
- { id: 19, class: vgpr_32 }
|
||||
- { id: 20, class: vreg_64 }
|
||||
- { id: 21, class: vgpr_32 }
|
||||
- { id: 22, class: vgpr_32 }
|
||||
- { id: 23, class: vgpr_32 }
|
||||
- { id: 24, class: vgpr_32 }
|
||||
- { id: 25, class: vgpr_32 }
|
||||
- { id: 26, class: vgpr_32 }
|
||||
- { id: 27, class: sreg_32_xm0 }
|
||||
- { id: 28, class: vgpr_32 }
|
||||
liveins:
|
||||
- { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
|
||||
- { reg: '%vgpr0', virtual-reg: '%2' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 0
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
maxCallFrameSize: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
body: |
|
||||
bb.0 (%ir-block.0):
|
||||
liveins: %sgpr0_sgpr1, %vgpr0
|
||||
|
||||
%2 = COPY %vgpr0
|
||||
%0 = COPY %sgpr0_sgpr1
|
||||
%3 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
|
||||
%15 = V_ASHRREV_I32_e64 31, %2, implicit %exec
|
||||
%16 = REG_SEQUENCE %2, 1, %15, 2
|
||||
%17 = V_LSHLREV_B64 2, killed %16, implicit %exec
|
||||
%9 = COPY %3.sub1
|
||||
%21 = V_ADD_I32_e32 %3.sub0, %17.sub0, implicit-def %vcc, implicit %exec
|
||||
%19 = COPY killed %9
|
||||
%18 = V_ADDC_U32_e32 %17.sub1, %19, implicit-def %vcc, implicit %vcc, implicit %exec
|
||||
%20 = REG_SEQUENCE %21, 1, killed %18, 2
|
||||
%10 = V_MOV_B32_e32 9999, implicit %exec
|
||||
%24 = V_MOV_B32_e32 3871, implicit %exec
|
||||
%6 = V_MOV_B32_e32 1, implicit %exec
|
||||
%7 = S_MOV_B32 1
|
||||
%27 = S_MOV_B32 -4
|
||||
|
||||
%11 = V_LSHLREV_B32_e64 12, %10, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%12 = V_LSHLREV_B32_e64 %7, 12, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%13 = V_LSHL_B32_e64 %7, 12, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%14 = V_LSHL_B32_e64 12, %7, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%15 = V_LSHL_B32_e64 12, %24, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%22 = V_LSHL_B32_e64 %6, 12, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%23 = V_LSHL_B32_e64 %6, 32, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%25 = V_LSHL_B32_e32 %6, %6, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%26 = V_LSHLREV_B32_e32 11, %24, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%28 = V_LSHL_B32_e32 %27, %6, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
S_ENDPGM
|
||||
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: s_fold_ashr_imm_regimm_32{{$}}
|
||||
# GCN: %11 = V_MOV_B32_e32 243, implicit %exec
|
||||
# GCN: BUFFER_STORE_DWORD_OFFSET killed %11, killed %8,
|
||||
name: s_fold_ashr_imm_regimm_32
|
||||
alignment: 0
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: sgpr_64 }
|
||||
- { id: 1, class: sreg_32_xm0 }
|
||||
- { id: 4, class: sreg_64_xexec }
|
||||
- { id: 5, class: sreg_32_xm0_xexec }
|
||||
- { id: 6, class: sreg_32_xm0 }
|
||||
- { id: 7, class: sreg_32_xm0 }
|
||||
- { id: 8, class: sreg_32_xm0 }
|
||||
- { id: 9, class: sreg_32_xm0 }
|
||||
- { id: 10, class: sreg_128 }
|
||||
- { id: 11, class: sreg_32_xm0 }
|
||||
- { id: 12, class: sreg_32_xm0 }
|
||||
- { id: 13, class: vgpr_32 }
|
||||
liveins:
|
||||
- { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 0
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
maxCallFrameSize: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
body: |
|
||||
bb.0 (%ir-block.0):
|
||||
liveins: %sgpr0_sgpr1
|
||||
|
||||
%0 = COPY %sgpr0_sgpr1
|
||||
%4 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
|
||||
%5 = S_MOV_B32 999123
|
||||
%6 = COPY %4.sub1
|
||||
%7 = COPY %4.sub0
|
||||
%8 = S_MOV_B32 61440
|
||||
%9 = S_MOV_B32 -1
|
||||
%10 = REG_SEQUENCE killed %7, 1, killed %6, 2, killed %9, 3, killed %8, 4
|
||||
%12 = S_ASHR_I32 killed %5, 12, implicit-def dead %scc
|
||||
%13 = COPY %12
|
||||
BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out)
|
||||
S_ENDPGM
|
||||
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: v_fold_ashr_imm_regimm_32{{$}}
|
||||
# GCN: %11 = V_MOV_B32_e32 3903258, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %11,
|
||||
|
||||
# GCN: %12 = V_MOV_B32_e32 62452139, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %12,
|
||||
|
||||
# GCN: %13 = V_MOV_B32_e32 1678031, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %13,
|
||||
|
||||
# GCN: %14 = V_MOV_B32_e32 3, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %14,
|
||||
|
||||
# GCN: %15 = V_MOV_B32_e32 -1, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %15,
|
||||
|
||||
# GCN: %22 = V_MOV_B32_e32 62500, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %22,
|
||||
|
||||
# GCN: %23 = V_MOV_B32_e32 500000, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %23,
|
||||
|
||||
# GCN: %25 = V_MOV_B32_e32 1920, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %25,
|
||||
|
||||
# GCN: %26 = V_MOV_B32_e32 487907, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %26,
|
||||
|
||||
# GCN: %28 = V_MOV_B32_e32 -1, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %28,
|
||||
|
||||
name: v_fold_ashr_imm_regimm_32
|
||||
alignment: 0
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: sgpr_64 }
|
||||
- { id: 1, class: sreg_32_xm0 }
|
||||
- { id: 2, class: vgpr_32 }
|
||||
- { id: 3, class: sreg_64_xexec }
|
||||
- { id: 4, class: sreg_64_xexec }
|
||||
- { id: 5, class: sreg_32_xm0 }
|
||||
- { id: 6, class: vgpr_32 }
|
||||
- { id: 7, class: sreg_32_xm0 }
|
||||
- { id: 8, class: sreg_32_xm0 }
|
||||
- { id: 9, class: sreg_32_xm0 }
|
||||
- { id: 10, class: vgpr_32 }
|
||||
- { id: 11, class: vgpr_32 }
|
||||
- { id: 12, class: vgpr_32 }
|
||||
- { id: 13, class: vgpr_32 }
|
||||
- { id: 14, class: vgpr_32 }
|
||||
- { id: 15, class: vgpr_32 }
|
||||
- { id: 16, class: vreg_64 }
|
||||
- { id: 17, class: vreg_64 }
|
||||
- { id: 18, class: vgpr_32 }
|
||||
- { id: 19, class: vgpr_32 }
|
||||
- { id: 20, class: vreg_64 }
|
||||
- { id: 21, class: vgpr_32 }
|
||||
- { id: 22, class: vgpr_32 }
|
||||
- { id: 23, class: vgpr_32 }
|
||||
- { id: 24, class: vgpr_32 }
|
||||
- { id: 25, class: vgpr_32 }
|
||||
- { id: 26, class: vgpr_32 }
|
||||
- { id: 27, class: sreg_32_xm0 }
|
||||
- { id: 28, class: vgpr_32 }
|
||||
- { id: 32, class: sreg_32_xm0 }
|
||||
- { id: 33, class: sreg_32_xm0 }
|
||||
- { id: 34, class: vgpr_32 }
|
||||
- { id: 35, class: vgpr_32 }
|
||||
liveins:
|
||||
- { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
|
||||
- { reg: '%vgpr0', virtual-reg: '%2' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 0
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
maxCallFrameSize: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
body: |
|
||||
bb.0 (%ir-block.0):
|
||||
liveins: %sgpr0_sgpr1, %vgpr0
|
||||
|
||||
%2 = COPY %vgpr0
|
||||
%0 = COPY %sgpr0_sgpr1
|
||||
%3 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
|
||||
%15 = V_ASHRREV_I32_e64 31, %2, implicit %exec
|
||||
%16 = REG_SEQUENCE %2, 1, %15, 2
|
||||
%17 = V_LSHLREV_B64 2, killed %16, implicit %exec
|
||||
%9 = COPY %3.sub1
|
||||
%21 = V_ADD_I32_e32 %3.sub0, %17.sub0, implicit-def %vcc, implicit %exec
|
||||
%19 = COPY killed %9
|
||||
%18 = V_ADDC_U32_e32 %17.sub1, %19, implicit-def %vcc, implicit %vcc, implicit %exec
|
||||
%20 = REG_SEQUENCE %21, 1, killed %18, 2
|
||||
%10 = V_MOV_B32_e32 999234234, implicit %exec
|
||||
%24 = V_MOV_B32_e32 3871, implicit %exec
|
||||
%6 = V_MOV_B32_e32 1000000, implicit %exec
|
||||
%7 = S_MOV_B32 13424252
|
||||
%8 = S_MOV_B32 4
|
||||
%27 = S_MOV_B32 -4
|
||||
%32 = S_MOV_B32 1
|
||||
%33 = S_MOV_B32 3841
|
||||
%34 = V_MOV_B32_e32 3841, implicit %exec
|
||||
%35 = V_MOV_B32_e32 2, implicit %exec
|
||||
|
||||
%11 = V_ASHRREV_I32_e64 8, %10, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%12 = V_ASHRREV_I32_e64 %8, %10, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%13 = V_ASHR_I32_e64 %7, 3, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%14 = V_ASHR_I32_e64 7, %32, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%15 = V_ASHR_I32_e64 %27, %24, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%22 = V_ASHR_I32_e64 %6, 4, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%23 = V_ASHR_I32_e64 %6, %33, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%25 = V_ASHR_I32_e32 %34, %34, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%26 = V_ASHRREV_I32_e32 11, %10, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%28 = V_ASHR_I32_e32 %27, %35, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
S_ENDPGM
|
||||
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: s_fold_lshr_imm_regimm_32{{$}}
|
||||
# GCN: %11 = V_MOV_B32_e32 1048332, implicit %exec
|
||||
# GCN: BUFFER_STORE_DWORD_OFFSET killed %11, killed %8,
|
||||
name: s_fold_lshr_imm_regimm_32
|
||||
alignment: 0
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: sgpr_64 }
|
||||
- { id: 1, class: sreg_32_xm0 }
|
||||
- { id: 4, class: sreg_64_xexec }
|
||||
- { id: 5, class: sreg_32_xm0_xexec }
|
||||
- { id: 6, class: sreg_32_xm0 }
|
||||
- { id: 7, class: sreg_32_xm0 }
|
||||
- { id: 8, class: sreg_32_xm0 }
|
||||
- { id: 9, class: sreg_32_xm0 }
|
||||
- { id: 10, class: sreg_128 }
|
||||
- { id: 11, class: sreg_32_xm0 }
|
||||
- { id: 12, class: sreg_32_xm0 }
|
||||
- { id: 13, class: vgpr_32 }
|
||||
liveins:
|
||||
- { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 0
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
maxCallFrameSize: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
body: |
|
||||
bb.0 (%ir-block.0):
|
||||
liveins: %sgpr0_sgpr1
|
||||
|
||||
%0 = COPY %sgpr0_sgpr1
|
||||
%4 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
|
||||
%5 = S_MOV_B32 -999123
|
||||
%6 = COPY %4.sub1
|
||||
%7 = COPY %4.sub0
|
||||
%8 = S_MOV_B32 61440
|
||||
%9 = S_MOV_B32 -1
|
||||
%10 = REG_SEQUENCE killed %7, 1, killed %6, 2, killed %9, 3, killed %8, 4
|
||||
%12 = S_LSHR_B32 killed %5, 12, implicit-def dead %scc
|
||||
%13 = COPY %12
|
||||
BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out)
|
||||
S_ENDPGM
|
||||
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: v_fold_lshr_imm_regimm_32{{$}}
|
||||
# GCN: %11 = V_MOV_B32_e32 3903258, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %11,
|
||||
|
||||
# GCN: %12 = V_MOV_B32_e32 62452139, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %12,
|
||||
|
||||
# GCN: %13 = V_MOV_B32_e32 1678031, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %13,
|
||||
|
||||
# GCN: %14 = V_MOV_B32_e32 3, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %14,
|
||||
|
||||
# GCN: %15 = V_MOV_B32_e32 1, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %15,
|
||||
|
||||
# GCN: %22 = V_MOV_B32_e32 62500, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %22,
|
||||
|
||||
# GCN: %23 = V_MOV_B32_e32 500000, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %23,
|
||||
|
||||
# GCN: %25 = V_MOV_B32_e32 1920, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %25,
|
||||
|
||||
# GCN: %26 = V_MOV_B32_e32 487907, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %26,
|
||||
|
||||
# GCN: %28 = V_MOV_B32_e32 1073741823, implicit %exec
|
||||
# GCN: FLAT_STORE_DWORD %20, %28,
|
||||
|
||||
name: v_fold_lshr_imm_regimm_32
|
||||
alignment: 0
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: sgpr_64 }
|
||||
- { id: 1, class: sreg_32_xm0 }
|
||||
- { id: 2, class: vgpr_32 }
|
||||
- { id: 3, class: sreg_64_xexec }
|
||||
- { id: 4, class: sreg_64_xexec }
|
||||
- { id: 5, class: sreg_32_xm0 }
|
||||
- { id: 6, class: vgpr_32 }
|
||||
- { id: 7, class: sreg_32_xm0 }
|
||||
- { id: 8, class: sreg_32_xm0 }
|
||||
- { id: 9, class: sreg_32_xm0 }
|
||||
- { id: 10, class: vgpr_32 }
|
||||
- { id: 11, class: vgpr_32 }
|
||||
- { id: 12, class: vgpr_32 }
|
||||
- { id: 13, class: vgpr_32 }
|
||||
- { id: 14, class: vgpr_32 }
|
||||
- { id: 15, class: vgpr_32 }
|
||||
- { id: 16, class: vreg_64 }
|
||||
- { id: 17, class: vreg_64 }
|
||||
- { id: 18, class: vgpr_32 }
|
||||
- { id: 19, class: vgpr_32 }
|
||||
- { id: 20, class: vreg_64 }
|
||||
- { id: 21, class: vgpr_32 }
|
||||
- { id: 22, class: vgpr_32 }
|
||||
- { id: 23, class: vgpr_32 }
|
||||
- { id: 24, class: vgpr_32 }
|
||||
- { id: 25, class: vgpr_32 }
|
||||
- { id: 26, class: vgpr_32 }
|
||||
- { id: 27, class: sreg_32_xm0 }
|
||||
- { id: 28, class: vgpr_32 }
|
||||
- { id: 32, class: sreg_32_xm0 }
|
||||
- { id: 33, class: sreg_32_xm0 }
|
||||
- { id: 34, class: vgpr_32 }
|
||||
- { id: 35, class: vgpr_32 }
|
||||
liveins:
|
||||
- { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
|
||||
- { reg: '%vgpr0', virtual-reg: '%2' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 0
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
maxCallFrameSize: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
body: |
|
||||
bb.0 (%ir-block.0):
|
||||
liveins: %sgpr0_sgpr1, %vgpr0
|
||||
|
||||
%2 = COPY %vgpr0
|
||||
%0 = COPY %sgpr0_sgpr1
|
||||
%3 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
|
||||
%15 = V_ASHRREV_I32_e64 31, %2, implicit %exec
|
||||
%16 = REG_SEQUENCE %2, 1, %15, 2
|
||||
%17 = V_LSHLREV_B64 2, killed %16, implicit %exec
|
||||
%9 = COPY %3.sub1
|
||||
%21 = V_ADD_I32_e32 %3.sub0, %17.sub0, implicit-def %vcc, implicit %exec
|
||||
%19 = COPY killed %9
|
||||
%18 = V_ADDC_U32_e32 %17.sub1, %19, implicit-def %vcc, implicit %vcc, implicit %exec
|
||||
%20 = REG_SEQUENCE %21, 1, killed %18, 2
|
||||
%10 = V_MOV_B32_e32 999234234, implicit %exec
|
||||
%24 = V_MOV_B32_e32 3871, implicit %exec
|
||||
%6 = V_MOV_B32_e32 1000000, implicit %exec
|
||||
%7 = S_MOV_B32 13424252
|
||||
%8 = S_MOV_B32 4
|
||||
%27 = S_MOV_B32 -4
|
||||
%32 = S_MOV_B32 1
|
||||
%33 = S_MOV_B32 3841
|
||||
%34 = V_MOV_B32_e32 3841, implicit %exec
|
||||
%35 = V_MOV_B32_e32 2, implicit %exec
|
||||
|
||||
%11 = V_LSHRREV_B32_e64 8, %10, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%12 = V_LSHRREV_B32_e64 %8, %10, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%13 = V_LSHR_B32_e64 %7, 3, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%14 = V_LSHR_B32_e64 7, %32, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%15 = V_LSHR_B32_e64 %27, %24, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%22 = V_LSHR_B32_e64 %6, 4, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%23 = V_LSHR_B32_e64 %6, %33, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%25 = V_LSHR_B32_e32 %34, %34, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%26 = V_LSHRREV_B32_e32 11, %10, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
%28 = V_LSHR_B32_e32 %27, %35, implicit %exec
|
||||
FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||
|
||||
S_ENDPGM
|
||||
|
||||
...
|
Loading…
x
Reference in New Issue
Block a user