diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp index 78036a8a145..e4fb07dea37 100644 --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -50,7 +50,7 @@ public: private: inline SDValue getSmallIPtrImm(unsigned Imm); bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, - const R600InstrInfo *TII, std::vector Cst); + const R600InstrInfo *TII); bool FoldOperands(unsigned, const R600InstrInfo *, std::vector &); bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector &); @@ -158,12 +158,100 @@ bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) { } SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { + const R600InstrInfo *TII = + static_cast(TM.getInstrInfo()); unsigned int Opc = N->getOpcode(); if (N->isMachineOpcode()) { return NULL; // Already selected. } switch (Opc) { default: break; + case AMDGPUISD::CONST_ADDRESS: { + for (SDNode::use_iterator I = N->use_begin(), Next = llvm::next(I); + I != SDNode::use_end(); I = Next) { + Next = llvm::next(I); + if (!I->isMachineOpcode()) { + continue; + } + unsigned Opcode = I->getMachineOpcode(); + bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1; + int SrcIdx = I.getOperandNo(); + int SelIdx; + // Unlike MachineInstrs, SDNodes do not have results in their operand + // list, so we need to increment the SrcIdx, since + // R600InstrInfo::getOperandIdx is based on the MachineInstr indices. + if (HasDst) { + SrcIdx++; + } + + SelIdx = TII->getSelIdx(I->getMachineOpcode(), SrcIdx); + if (SelIdx < 0) { + continue; + } + + SDValue CstOffset; + if (N->getValueType(0).isVector() || + !SelectGlobalValueConstantOffset(N->getOperand(0), CstOffset)) + continue; + + // Gather constants values + int SrcIndices[] = { + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src2), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W) + }; + std::vector Consts; + for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) { + int OtherSrcIdx = SrcIndices[i]; + int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx); + if (OtherSrcIdx < 0 || OtherSelIdx < 0) { + continue; + } + if (HasDst) { + OtherSrcIdx--; + OtherSelIdx--; + } + if (RegisterSDNode *Reg = + dyn_cast(I->getOperand(OtherSrcIdx))) { + if (Reg->getReg() == AMDGPU::ALU_CONST) { + ConstantSDNode *Cst = dyn_cast(I->getOperand(OtherSelIdx)); + Consts.push_back(Cst->getZExtValue()); + } + } + } + + ConstantSDNode *Cst = dyn_cast(CstOffset); + Consts.push_back(Cst->getZExtValue()); + if (!TII->fitsConstReadLimitations(Consts)) + continue; + + // Convert back to SDNode indices + if (HasDst) { + SrcIdx--; + SelIdx--; + } + std::vector Ops; + for (int i = 0, e = I->getNumOperands(); i != e; ++i) { + if (i == SrcIdx) { + Ops.push_back(CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32)); + } else if (i == SelIdx) { + Ops.push_back(CstOffset); + } else { + Ops.push_back(I->getOperand(i)); + } + } + CurDAG->UpdateNodeOperands(*I, Ops.data(), Ops.size()); + } + break; + } case ISD::BUILD_VECTOR: { const AMDGPUSubtarget &ST = TM.getSubtarget(); if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { @@ -224,7 +312,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { break; } - const R600InstrInfo *TII = static_cast(TM.getInstrInfo()); uint64_t ImmValue = 0; unsigned ImmReg = AMDGPU::ALU_LITERAL_X; @@ -342,7 +429,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { if (Result && Result->isMachineOpcode() && !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR) && TII->hasInstrModifiers(Result->getMachineOpcode())) { - // Fold FNEG/FABS/CONST_ADDRESS + // Fold FNEG/FABS // TODO: Isel can generate multiple MachineInst, we need to recursively // parse Result bool IsModified = false; @@ -382,24 +469,8 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { } bool AMDGPUDAGToDAGISel::FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, - SDValue &Abs, const R600InstrInfo *TII, - std::vector Consts) { + SDValue &Abs, const R600InstrInfo *TII) { switch (Src.getOpcode()) { - case AMDGPUISD::CONST_ADDRESS: { - SDValue CstOffset; - if (Src.getValueType().isVector() || - !SelectGlobalValueConstantOffset(Src.getOperand(0), CstOffset)) - return false; - - ConstantSDNode *Cst = dyn_cast(CstOffset); - Consts.push_back(Cst->getZExtValue()); - if (!TII->fitsConstReadLimitations(Consts)) - return false; - - Src = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32); - Sel = CstOffset; - return true; - } case ISD::FNEG: Src = Src.getOperand(0); Neg = CurDAG->getTargetConstant(1, MVT::i32); @@ -441,19 +512,6 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode, -1 }; - // Gather constants values - std::vector Consts; - for (unsigned j = 0; j < 3; j++) { - int SrcIdx = OperandIdx[j]; - if (SrcIdx < 0) - break; - if (RegisterSDNode *Reg = dyn_cast(Ops[SrcIdx - 1])) { - if (Reg->getReg() == AMDGPU::ALU_CONST) { - ConstantSDNode *Cst = dyn_cast(Ops[SelIdx[j] - 1]); - Consts.push_back(Cst->getZExtValue()); - } - } - } for (unsigned i = 0; i < 3; i++) { if (OperandIdx[i] < 0) @@ -463,7 +521,7 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode, SDValue &Neg = Ops[NegIdx[i] - 1]; SDValue FakeAbs; SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs; - if (FoldOperand(Src, Sel, Neg, Abs, TII, Consts)) + if (FoldOperand(Src, Sel, Neg, Abs, TII)) return true; } return false; @@ -512,20 +570,6 @@ bool AMDGPUDAGToDAGISel::FoldDotOperands(unsigned Opcode, TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W) }; - // Gather constants values - std::vector Consts; - for (unsigned j = 0; j < 8; j++) { - int SrcIdx = OperandIdx[j]; - if (SrcIdx < 0) - break; - if (RegisterSDNode *Reg = dyn_cast(Ops[SrcIdx - 1])) { - if (Reg->getReg() == AMDGPU::ALU_CONST) { - ConstantSDNode *Cst = dyn_cast(Ops[SelIdx[j] - 1]); - Consts.push_back(Cst->getZExtValue()); - } - } - } - for (unsigned i = 0; i < 8; i++) { if (OperandIdx[i] < 0) return false; @@ -533,7 +577,7 @@ bool AMDGPUDAGToDAGISel::FoldDotOperands(unsigned Opcode, SDValue &Sel = Ops[SelIdx[i] - 1]; SDValue &Neg = Ops[NegIdx[i] - 1]; SDValue &Abs = Ops[AbsIdx[i] - 1]; - if (FoldOperand(Src, Sel, Neg, Abs, TII, Consts)) + if (FoldOperand(Src, Sel, Neg, Abs, TII)) return true; } return false; diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index dd613d56a60..a2bc2c3a9fa 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -1154,6 +1154,30 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const return DAG.getMergeValues(MergedValues, 2, DL); } + // For most operations returning SDValue() will result int he node being + // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so + // we need to manually expand loads that may be legal in some address spaces + // and illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported + // for compute shaders, since the data is sign extended when it is uploaded + // to the buffer. Howerver SEXT loads from other addresspaces are not + // supported, so we need to expand them here. + if (LoadNode->getExtensionType() == ISD::SEXTLOAD) { + EVT MemVT = LoadNode->getMemoryVT(); + assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8)); + SDValue ShiftAmount = + DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32); + SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr, + LoadNode->getPointerInfo(), MemVT, + LoadNode->isVolatile(), + LoadNode->isNonTemporal(), + LoadNode->getAlignment()); + SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount); + + SDValue MergedValues[2] = { Sra, Chain }; + return DAG.getMergeValues(MergedValues, 2, DL); + } + if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { return SDValue(); } diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 0c059aa3c41..3bc170f538b 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -186,6 +186,42 @@ bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const { } } +int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const { + static const unsigned OpTable[] = { + AMDGPU::OpName::src0, + AMDGPU::OpName::src1, + AMDGPU::OpName::src2 + }; + + assert (SrcNum < 3); + return getOperandIdx(Opcode, OpTable[SrcNum]); +} + +#define SRC_SEL_ROWS 11 +int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const { + static const unsigned SrcSelTable[SRC_SEL_ROWS][2] = { + {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, + {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, + {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, + {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, + {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, + {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z}, + {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W}, + {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X}, + {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y}, + {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z}, + {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W} + }; + + for (unsigned i = 0; i < SRC_SEL_ROWS; ++i) { + if (getOperandIdx(Opcode, SrcSelTable[i][0]) == (int)SrcIdx) { + return getOperandIdx(Opcode, SrcSelTable[i][1]); + } + } + return -1; +} +#undef SRC_SEL_ROWS + SmallVector, 3> R600InstrInfo::getSrcs(MachineInstr *MI) const { SmallVector, 3> Result; diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h index 1ba4160747b..cdaa2fbefc8 100644 --- a/lib/Target/R600/R600InstrInfo.h +++ b/lib/Target/R600/R600InstrInfo.h @@ -76,6 +76,13 @@ namespace llvm { bool mustBeLastInClause(unsigned Opcode) const; + /// \returns The operand index for the given source number. Legal values + /// for SrcNum are 0, 1, and 2. + int getSrcIdx(unsigned Opcode, unsigned SrcNum) const; + /// \returns The operand Index for the Sel operand given an index to one + /// of the instruction's src operands. + int getSelIdx(unsigned Opcode, unsigned SrcIdx) const; + /// \returns a pair for each src of an ALU instructions. /// The first member of a pair is the register id. /// If register is ALU_CONST, second member is SEL. diff --git a/test/CodeGen/R600/bfi_int.ll b/test/CodeGen/R600/bfi_int.ll index b001ad0de00..501c5567fff 100644 --- a/test/CodeGen/R600/bfi_int.ll +++ b/test/CodeGen/R600/bfi_int.ll @@ -36,7 +36,7 @@ entry: ; SHA-256 Ma function ; ((x & z) | (y & (x | z))) ; R600-CHECK: @bfi_sha256_ma -; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], +; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W ; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W ; SI-CHECK: V_XOR_B32_e64 [[DST:VGPR[0-9]+]], {{[SV]GPR[0-9]+, [SV]GPR[0-9]+}} ; SI-CHECK: V_BFI_B32 {{VGPR[0-9]+}}, [[DST]], {{[SV]GPR[0-9]+, [SV]GPR[0-9]+}}