AMDGPU: Delete dead code

There is no context where s_mov_b64 is emitted
and could potentially be moved to the VALU.
It is currently only emitted for materializing
immediates, which can't be dependent on vector sources.

The immediate splitting is already done when selecting
constants. I'm not sure what contexts if any the register
splitting would have been used before.

Also clean up using s_mov_b64 in place of v_mov_b64_pseudo,
although this isn't required and just skips the extra step
of eliminating the copy from the SReg_64.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@246080 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matt Arsenault 2015-08-26 20:48:08 +00:00
parent 9506598fb2
commit 6f57102c6d
3 changed files with 4 additions and 68 deletions

View File

@ -1588,36 +1588,6 @@ MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
return MachineOperand::CreateReg(SubReg, false);
}
unsigned SIInstrInfo::split64BitImm(SmallVectorImpl<MachineInstr *> &Worklist,
MachineBasicBlock::iterator MI,
MachineRegisterInfo &MRI,
const TargetRegisterClass *RC,
const MachineOperand &Op) const {
MachineBasicBlock *MBB = MI->getParent();
DebugLoc DL = MI->getDebugLoc();
unsigned LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
unsigned HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
unsigned Dst = MRI.createVirtualRegister(RC);
MachineInstr *Lo = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32),
LoDst)
.addImm(Op.getImm() & 0xFFFFFFFF);
MachineInstr *Hi = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32),
HiDst)
.addImm(Op.getImm() >> 32);
BuildMI(*MBB, MI, DL, get(TargetOpcode::REG_SEQUENCE), Dst)
.addReg(LoDst)
.addImm(AMDGPU::sub0)
.addReg(HiDst)
.addImm(AMDGPU::sub1);
Worklist.push_back(Lo);
Worklist.push_back(Hi);
return Dst;
}
// Change the order of operands from (0, 1, 2) to (0, 2, 1)
void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst) const {
assert(Inst->getNumExplicitOperands() == 3);
@ -2170,30 +2140,6 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
moveSMRDToVALU(Inst, MRI);
}
break;
case AMDGPU::S_MOV_B64: {
DebugLoc DL = Inst->getDebugLoc();
// If the source operand is a register we can replace this with a
// copy.
if (Inst->getOperand(1).isReg()) {
MachineInstr *Copy = BuildMI(*MBB, Inst, DL, get(TargetOpcode::COPY))
.addOperand(Inst->getOperand(0))
.addOperand(Inst->getOperand(1));
Worklist.push_back(Copy);
} else {
// Otherwise, we need to split this into two movs, because there is
// no 64-bit VALU move instruction.
unsigned Reg = Inst->getOperand(0).getReg();
unsigned Dst = split64BitImm(Worklist,
Inst,
MRI,
MRI.getRegClass(Reg),
Inst->getOperand(1));
MRI.replaceRegWith(Reg, Dst);
}
Inst->eraseFromParent();
continue;
}
case AMDGPU::S_AND_B64:
splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64);
Inst->eraseFromParent();

View File

@ -39,12 +39,6 @@ private:
unsigned SubIdx,
const TargetRegisterClass *SubRC) const;
unsigned split64BitImm(SmallVectorImpl<MachineInstr *> &Worklist,
MachineBasicBlock::iterator MI,
MachineRegisterInfo &MRI,
const TargetRegisterClass *RC,
const MachineOperand &Op) const;
void swapOperands(MachineBasicBlock::iterator Inst) const;
void splitScalar64BitUnaryOp(SmallVectorImpl<MachineInstr *> &Worklist,

View File

@ -2817,10 +2817,6 @@ class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
// -1. For the non-rtn variants, the manual says it does
// DS[A] = (DS[A] >= D0) ? 0 : DS[A] + 1, and setting D0 to uint_max
// will always do the increment so I'm assuming it's the same.
//
// We also load this -1 with s_mov_b32 / s_mov_b64 even though this
// needs to be a VGPR. The SGPR copy pass will fix this, and it's
// easier since there is no v_mov_b64.
class DSAtomicIncRetPat<DS inst, ValueType vt,
Instruction LoadImm, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)),
@ -2836,9 +2832,9 @@ class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat <
// 32-bit atomics.
def : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
S_MOV_B32, si_atomic_load_add_local>;
V_MOV_B32_e32, si_atomic_load_add_local>;
def : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
S_MOV_B32, si_atomic_load_sub_local>;
V_MOV_B32_e32, si_atomic_load_sub_local>;
def : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, si_atomic_swap_local>;
def : DSAtomicRetPat<DS_ADD_RTN_U32, i32, si_atomic_load_add_local>;
@ -2855,9 +2851,9 @@ def : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, si_atomic_cmp_swap_32_local>;
// 64-bit atomics.
def : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
S_MOV_B64, si_atomic_load_add_local>;
V_MOV_B64_PSEUDO, si_atomic_load_add_local>;
def : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
S_MOV_B64, si_atomic_load_sub_local>;
V_MOV_B64_PSEUDO, si_atomic_load_sub_local>;
def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, si_atomic_swap_local>;
def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, si_atomic_load_add_local>;