mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-14 00:14:19 +00:00
R600: Move code handling literal folding into R600ISelLowering.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190644 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
fe78318614
commit
f57d692c11
@ -193,8 +193,6 @@ bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
|
||||
}
|
||||
|
||||
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
||||
const R600InstrInfo *TII =
|
||||
static_cast<const R600InstrInfo*>(TM.getInstrInfo());
|
||||
unsigned int Opc = N->getOpcode();
|
||||
if (N->isMachineOpcode()) {
|
||||
return NULL; // Already selected.
|
||||
@ -310,109 +308,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
||||
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
|
||||
SDLoc(N), N->getValueType(0), Ops);
|
||||
}
|
||||
|
||||
case ISD::ConstantFP:
|
||||
case ISD::Constant: {
|
||||
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
|
||||
// XXX: Custom immediate lowering not implemented yet. Instead we use
|
||||
// pseudo instructions defined in SIInstructions.td
|
||||
if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
|
||||
break;
|
||||
}
|
||||
|
||||
uint64_t ImmValue = 0;
|
||||
unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
|
||||
|
||||
if (N->getOpcode() == ISD::ConstantFP) {
|
||||
// XXX: 64-bit Immediates not supported yet
|
||||
assert(N->getValueType(0) != MVT::f64);
|
||||
|
||||
ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N);
|
||||
APFloat Value = C->getValueAPF();
|
||||
float FloatValue = Value.convertToFloat();
|
||||
if (FloatValue == 0.0) {
|
||||
ImmReg = AMDGPU::ZERO;
|
||||
} else if (FloatValue == 0.5) {
|
||||
ImmReg = AMDGPU::HALF;
|
||||
} else if (FloatValue == 1.0) {
|
||||
ImmReg = AMDGPU::ONE;
|
||||
} else {
|
||||
ImmValue = Value.bitcastToAPInt().getZExtValue();
|
||||
}
|
||||
} else {
|
||||
// XXX: 64-bit Immediates not supported yet
|
||||
assert(N->getValueType(0) != MVT::i64);
|
||||
|
||||
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
|
||||
if (C->getZExtValue() == 0) {
|
||||
ImmReg = AMDGPU::ZERO;
|
||||
} else if (C->getZExtValue() == 1) {
|
||||
ImmReg = AMDGPU::ONE_INT;
|
||||
} else {
|
||||
ImmValue = C->getZExtValue();
|
||||
}
|
||||
}
|
||||
|
||||
for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
|
||||
Use != SDNode::use_end(); Use = Next) {
|
||||
Next = llvm::next(Use);
|
||||
std::vector<SDValue> Ops;
|
||||
for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
|
||||
Ops.push_back(Use->getOperand(i));
|
||||
}
|
||||
|
||||
if (!Use->isMachineOpcode()) {
|
||||
if (ImmReg == AMDGPU::ALU_LITERAL_X) {
|
||||
// We can only use literal constants (e.g. AMDGPU::ZERO,
|
||||
// AMDGPU::ONE, etc) in machine opcodes.
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
switch(Use->getMachineOpcode()) {
|
||||
case AMDGPU::REG_SEQUENCE: break;
|
||||
default:
|
||||
if (!TII->isALUInstr(Use->getMachineOpcode()) ||
|
||||
(TII->get(Use->getMachineOpcode()).TSFlags &
|
||||
R600_InstFlag::VECTOR)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Check that we aren't already using an immediate.
|
||||
// XXX: It's possible for an instruction to have more than one
|
||||
// immediate operand, but this is not supported yet.
|
||||
if (ImmReg == AMDGPU::ALU_LITERAL_X) {
|
||||
int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(),
|
||||
AMDGPU::OpName::literal);
|
||||
if (ImmIdx == -1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (TII->getOperandIdx(Use->getMachineOpcode(),
|
||||
AMDGPU::OpName::dst) != -1) {
|
||||
// subtract one from ImmIdx, because the DST operand is usually index
|
||||
// 0 for MachineInstrs, but we have no DST in the Ops vector.
|
||||
ImmIdx--;
|
||||
}
|
||||
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
|
||||
assert(C);
|
||||
|
||||
if (C->getZExtValue() != 0) {
|
||||
// This instruction is already using an immediate.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Set the immediate value
|
||||
Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32);
|
||||
}
|
||||
}
|
||||
// Set the immediate register
|
||||
Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32);
|
||||
|
||||
CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
SDNode *Result = SelectCode(N);
|
||||
|
||||
|
@ -1632,7 +1632,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
|
||||
static bool
|
||||
FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
|
||||
SDValue &Abs, SDValue &Sel, SelectionDAG &DAG) {
|
||||
SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
|
||||
const R600InstrInfo *TII =
|
||||
static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
|
||||
if (!Src.isMachineOpcode())
|
||||
@ -1705,6 +1705,51 @@ FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
|
||||
Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
|
||||
return true;
|
||||
}
|
||||
case AMDGPU::MOV_IMM_I32:
|
||||
case AMDGPU::MOV_IMM_F32: {
|
||||
unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
|
||||
uint64_t ImmValue = 0;
|
||||
|
||||
|
||||
if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
|
||||
ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
|
||||
float FloatValue = FPC->getValueAPF().convertToFloat();
|
||||
if (FloatValue == 0.0) {
|
||||
ImmReg = AMDGPU::ZERO;
|
||||
} else if (FloatValue == 0.5) {
|
||||
ImmReg = AMDGPU::HALF;
|
||||
} else if (FloatValue == 1.0) {
|
||||
ImmReg = AMDGPU::ONE;
|
||||
} else {
|
||||
ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
|
||||
}
|
||||
} else {
|
||||
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
|
||||
uint64_t Value = C->getZExtValue();
|
||||
if (Value == 0) {
|
||||
ImmReg = AMDGPU::ZERO;
|
||||
} else if (Value == 1) {
|
||||
ImmReg = AMDGPU::ONE_INT;
|
||||
} else {
|
||||
ImmValue = Value;
|
||||
}
|
||||
}
|
||||
|
||||
// Check that we aren't already using an immediate.
|
||||
// XXX: It's possible for an instruction to have more than one
|
||||
// immediate operand, but this is not supported yet.
|
||||
if (ImmReg == AMDGPU::ALU_LITERAL_X) {
|
||||
if (!Imm.getNode())
|
||||
return false;
|
||||
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
|
||||
assert(C);
|
||||
if (C->getZExtValue())
|
||||
return false;
|
||||
Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
|
||||
}
|
||||
Src = DAG.getRegister(ImmReg, MVT::i32);
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
@ -1768,7 +1813,13 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
|
||||
if (HasDst)
|
||||
SelIdx--;
|
||||
SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
|
||||
if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG))
|
||||
if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
|
||||
return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
|
||||
}
|
||||
} else if (Opcode == AMDGPU::REG_SEQUENCE) {
|
||||
for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
|
||||
SDValue &Src = Ops[i];
|
||||
if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
|
||||
return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
|
||||
}
|
||||
} else {
|
||||
@ -1798,10 +1849,14 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
|
||||
SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
|
||||
bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
|
||||
int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
|
||||
if (HasDst)
|
||||
int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
|
||||
if (HasDst) {
|
||||
SelIdx--;
|
||||
ImmIdx--;
|
||||
}
|
||||
SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
|
||||
if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG))
|
||||
SDValue &Imm = Ops[ImmIdx];
|
||||
if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
|
||||
return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
|
||||
}
|
||||
}
|
||||
|
@ -46,3 +46,19 @@ entry:
|
||||
store <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: @inline_literal_dot4
|
||||
; CHECK: DOT4 T[[GPR:[0-9]]].X, 1.0
|
||||
; CHECK-NEXT: DOT4 T[[GPR]].Y (MASKED), 1.0
|
||||
; CHECK-NEXT: DOT4 T[[GPR]].Z (MASKED), 1.0
|
||||
; CHECK-NEXT: DOT4 * T[[GPR]].W (MASKED), 1.0
|
||||
define void @inline_literal_dot4(float addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = call float @llvm.AMDGPU.dp4(<4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>)
|
||||
store float %0, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
|
||||
|
||||
attributes #1 = { readnone }
|
||||
|
Loading…
x
Reference in New Issue
Block a user