mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-26 14:25:18 +00:00
R600/SI: Lower 64-bit immediates using REG_SEQUENCE
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205561 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
6d394eda49
commit
50c16fb65c
@ -48,6 +48,7 @@ public:
|
||||
virtual void PostprocessISelDAG();
|
||||
|
||||
private:
|
||||
bool isInlineImmediate(SDNode *N) const;
|
||||
inline SDValue getSmallIPtrImm(unsigned Imm);
|
||||
bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
|
||||
const R600InstrInfo *TII);
|
||||
@ -103,6 +104,12 @@ AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
|
||||
AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const {
|
||||
const SITargetLowering *TL
|
||||
= static_cast<const SITargetLowering *>(getTargetLowering());
|
||||
return TL->analyzeImmediate(N) == 0;
|
||||
}
|
||||
|
||||
/// \brief Determine the register class for \p OpNo
|
||||
/// \returns The register class of the virtual register that will be used for
|
||||
/// the given operand number \OpNo or NULL if the register class cannot be
|
||||
@ -357,6 +364,37 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
||||
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
|
||||
SDLoc(N), N->getValueType(0), Ops);
|
||||
}
|
||||
|
||||
case ISD::Constant:
|
||||
case ISD::ConstantFP: {
|
||||
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
|
||||
if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
|
||||
N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
|
||||
break;
|
||||
|
||||
uint64_t Imm;
|
||||
if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
|
||||
Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
|
||||
else {
|
||||
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
|
||||
assert(C);
|
||||
Imm = C->getZExtValue();
|
||||
}
|
||||
|
||||
SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
|
||||
CurDAG->getConstant(Imm & 0xFFFFFFFF, MVT::i32));
|
||||
SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
|
||||
CurDAG->getConstant(Imm >> 32, MVT::i32));
|
||||
const SDValue Ops[] = {
|
||||
CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
|
||||
SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
|
||||
SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32)
|
||||
};
|
||||
|
||||
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N),
|
||||
N->getValueType(0), Ops);
|
||||
}
|
||||
|
||||
case AMDGPUISD::REGISTER_LOAD: {
|
||||
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
|
||||
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
|
||||
|
@ -1029,9 +1029,11 @@ int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const {
|
||||
return -1;
|
||||
}
|
||||
Imm.I = Node->getSExtValue();
|
||||
} else if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N))
|
||||
} else if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N)) {
|
||||
if (N->getValueType(0) != MVT::f32)
|
||||
return -1;
|
||||
Imm.F = Node->getValueAPF().convertToFloat();
|
||||
else
|
||||
} else
|
||||
return -1; // It isn't an immediate
|
||||
|
||||
if ((Imm.I >= -16 && Imm.I <= 64) ||
|
||||
|
@ -117,8 +117,7 @@ def mubuf_vaddr_offset : PatFrag<
|
||||
>;
|
||||
|
||||
class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{
|
||||
return
|
||||
(*(const SITargetLowering *)getTargetLowering()).analyzeImmediate(N) == 0;
|
||||
return isInlineImmediate(N);
|
||||
}]>;
|
||||
|
||||
class SGPRImm <dag frag> : PatLeaf<frag, [{
|
||||
|
@ -1777,21 +1777,6 @@ def : Pat <
|
||||
(S_MOV_B64 InlineImm<i64>:$imm)
|
||||
>;
|
||||
|
||||
// i64 immediates aren't supported in hardware, split it into two 32bit values
|
||||
def : Pat <
|
||||
(i64 imm:$imm),
|
||||
(INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
|
||||
(S_MOV_B32 (i32 (LO32 imm:$imm))), sub0),
|
||||
(S_MOV_B32 (i32 (HI32 imm:$imm))), sub1)
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(f64 fpimm:$imm),
|
||||
(INSERT_SUBREG (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
|
||||
(V_MOV_B32_e32 (f32 (LO32f fpimm:$imm))), sub0),
|
||||
(V_MOV_B32_e32 (f32 (HI32f fpimm:$imm))), sub1)
|
||||
>;
|
||||
|
||||
/********** ===================== **********/
|
||||
/********** Interpolation Paterns **********/
|
||||
/********** ===================== **********/
|
||||
|
@ -1,8 +1,8 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
|
||||
|
||||
; CHECK: @fconst_f64
|
||||
; CHECK: V_MOV_B32_e32 {{v[0-9]+}}, 0.000000e+00
|
||||
; CHECK-NEXT: V_MOV_B32_e32 {{v[0-9]+}}, 2.312500e+00
|
||||
; CHECK-DAG: S_MOV_B32 {{s[0-9]+}}, 1075052544
|
||||
; CHECK-DAG: S_MOV_B32 {{s[0-9]+}}, 0
|
||||
|
||||
define void @fconst_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
|
||||
%r1 = load double addrspace(1)* %in
|
||||
|
@ -31,8 +31,9 @@ define void @trunc_load_shl_i64(i32 addrspace(1)* %out, i64 %a) {
|
||||
|
||||
; SI-LABEL: @trunc_shl_i64:
|
||||
; SI: S_LOAD_DWORDX2 s{{\[}}[[LO_SREG:[0-9]+]]:{{[0-9]+\]}},
|
||||
; SI: V_ADD_I32_e32 v[[LO_ADD:[0-9]+]], s[[LO_SREG]],
|
||||
; SI: V_LSHL_B64 v{{\[}}[[LO_VREG:[0-9]+]]:{{[0-9]+\]}}, v{{\[}}[[LO_ADD]]:{{[0-9]+\]}}, 2
|
||||
; SI: S_ADD_I32 s[[LO_ADD:[0-9]+]], s[[LO_SREG]],
|
||||
; SI: S_LSHL_B64 s{{\[}}[[LO_SREG2:[0-9]+]]:{{[0-9]+\]}}, s{{\[}}[[LO_ADD]]:{{[0-9]+\]}}, 2
|
||||
; SI: V_MOV_B32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG2]]
|
||||
; SI: BUFFER_STORE_DWORD v[[LO_VREG]],
|
||||
define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64 %a) {
|
||||
%aa = add i64 %a, 234 ; Prevent shrinking store.
|
||||
|
Loading…
x
Reference in New Issue
Block a user