mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-23 04:35:43 +00:00
1f7a90d793
[DebugInfo] Add debug locations to constant SD nodes This adds debug location to constant nodes of Selection DAG and updates all places that create constants to pass debug locations (see PR13269). Can't guarantee that all locations are correct, but in a lot of cases choice is obvious, so most of them should be. At least all tests pass. Tests for these changes do not cover everything, instead just check it for SDNodes, ARM and AArch64 where it's easy to get incorrect locations on constants. This is not complete fix as FastISel contains workaround for wrong debug locations, which drops locations from instructions on processing constants, but there isn't currently a way to use debug locations from constants there as llvm::Constant doesn't cache it (yet). Although this is a bit different issue, not directly related to these changes. Differential Revision: http://reviews.llvm.org/D9084 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235989 91177308-0d34-0410-b5e6-96231b3b80d8
1333 lines
48 KiB
C++
1333 lines
48 KiB
C++
//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//==-----------------------------------------------------------------------===//
|
|
//
|
|
/// \file
|
|
/// \brief Defines an instruction selector for the AMDGPU target.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
#include "AMDGPUInstrInfo.h"
|
|
#include "AMDGPUISelLowering.h" // For AMDGPUISD
|
|
#include "AMDGPURegisterInfo.h"
|
|
#include "AMDGPUSubtarget.h"
|
|
#include "R600InstrInfo.h"
|
|
#include "SIDefines.h"
|
|
#include "SIISelLowering.h"
|
|
#include "SIMachineFunctionInfo.h"
|
|
#include "llvm/CodeGen/FunctionLoweringInfo.h"
|
|
#include "llvm/CodeGen/PseudoSourceValue.h"
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
#include "llvm/CodeGen/SelectionDAG.h"
|
|
#include "llvm/CodeGen/SelectionDAGISel.h"
|
|
#include "llvm/IR/Function.h"
|
|
|
|
using namespace llvm;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Instruction Selector Implementation
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
namespace {
|
|
/// AMDGPU specific code to select AMDGPU machine instructions for
|
|
/// SelectionDAG operations.
|
|
class AMDGPUDAGToDAGISel : public SelectionDAGISel {
|
|
// Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
|
|
// make the right decision when generating code for different targets.
|
|
const AMDGPUSubtarget *Subtarget;
|
|
public:
|
|
AMDGPUDAGToDAGISel(TargetMachine &TM);
|
|
virtual ~AMDGPUDAGToDAGISel();
|
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
|
SDNode *Select(SDNode *N) override;
|
|
const char *getPassName() const override;
|
|
void PostprocessISelDAG() override;
|
|
|
|
private:
|
|
bool isInlineImmediate(SDNode *N) const;
|
|
bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
|
|
const R600InstrInfo *TII);
|
|
bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
|
|
bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
|
|
|
|
// Complex pattern selectors
|
|
bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
|
|
bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
|
|
bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
|
|
|
|
static bool checkType(const Value *ptr, unsigned int addrspace);
|
|
static bool checkPrivateAddress(const MachineMemOperand *Op);
|
|
|
|
static bool isGlobalStore(const StoreSDNode *N);
|
|
static bool isFlatStore(const StoreSDNode *N);
|
|
static bool isPrivateStore(const StoreSDNode *N);
|
|
static bool isLocalStore(const StoreSDNode *N);
|
|
static bool isRegionStore(const StoreSDNode *N);
|
|
|
|
bool isCPLoad(const LoadSDNode *N) const;
|
|
bool isConstantLoad(const LoadSDNode *N, int cbID) const;
|
|
bool isGlobalLoad(const LoadSDNode *N) const;
|
|
bool isFlatLoad(const LoadSDNode *N) const;
|
|
bool isParamLoad(const LoadSDNode *N) const;
|
|
bool isPrivateLoad(const LoadSDNode *N) const;
|
|
bool isLocalLoad(const LoadSDNode *N) const;
|
|
bool isRegionLoad(const LoadSDNode *N) const;
|
|
|
|
const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
|
|
bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
|
|
bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
|
|
SDValue& Offset);
|
|
bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
|
|
bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
|
|
bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
|
|
unsigned OffsetBits) const;
|
|
bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
|
|
bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
|
|
SDValue &Offset1) const;
|
|
void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
|
|
SDValue &SOffset, SDValue &Offset, SDValue &Offen,
|
|
SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
|
|
SDValue &TFE) const;
|
|
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
|
|
SDValue &SOffset, SDValue &Offset, SDValue &GLC,
|
|
SDValue &SLC, SDValue &TFE) const;
|
|
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
|
|
SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
|
|
SDValue &SLC) const;
|
|
bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
|
|
SDValue &SOffset, SDValue &ImmOffset) const;
|
|
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
|
|
SDValue &Offset, SDValue &GLC, SDValue &SLC,
|
|
SDValue &TFE) const;
|
|
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
|
|
SDValue &Offset, SDValue &GLC) const;
|
|
SDNode *SelectAddrSpaceCast(SDNode *N);
|
|
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
|
|
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
|
|
SDValue &Clamp, SDValue &Omod) const;
|
|
|
|
bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
|
|
SDValue &Omod) const;
|
|
bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
|
|
SDValue &Clamp,
|
|
SDValue &Omod) const;
|
|
|
|
SDNode *SelectADD_SUB_I64(SDNode *N);
|
|
SDNode *SelectDIV_SCALE(SDNode *N);
|
|
|
|
SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
|
|
uint32_t Offset, uint32_t Width);
|
|
SDNode *SelectS_BFEFromShifts(SDNode *N);
|
|
SDNode *SelectS_BFE(SDNode *N);
|
|
|
|
// Include the pieces autogenerated from the target description.
|
|
#include "AMDGPUGenDAGISel.inc"
|
|
};
|
|
} // end anonymous namespace
|
|
|
|
/// \brief This pass converts a legalized DAG into a AMDGPU-specific
|
|
// DAG, ready for instruction scheduling.
|
|
FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) {
|
|
return new AMDGPUDAGToDAGISel(TM);
|
|
}
|
|
|
|
AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
|
|
: SelectionDAGISel(TM) {}
|
|
|
|
bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
|
|
Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget());
|
|
return SelectionDAGISel::runOnMachineFunction(MF);
|
|
}
|
|
|
|
AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const {
|
|
const SITargetLowering *TL
|
|
= static_cast<const SITargetLowering *>(getTargetLowering());
|
|
return TL->analyzeImmediate(N) == 0;
|
|
}
|
|
|
|
/// \brief Determine the register class for \p OpNo
|
|
/// \returns The register class of the virtual register that will be used for
|
|
/// the given operand number \OpNo or NULL if the register class cannot be
|
|
/// determined.
|
|
const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
|
|
unsigned OpNo) const {
|
|
if (!N->isMachineOpcode())
|
|
return nullptr;
|
|
|
|
switch (N->getMachineOpcode()) {
|
|
default: {
|
|
const MCInstrDesc &Desc =
|
|
Subtarget->getInstrInfo()->get(N->getMachineOpcode());
|
|
unsigned OpIdx = Desc.getNumDefs() + OpNo;
|
|
if (OpIdx >= Desc.getNumOperands())
|
|
return nullptr;
|
|
int RegClass = Desc.OpInfo[OpIdx].RegClass;
|
|
if (RegClass == -1)
|
|
return nullptr;
|
|
|
|
return Subtarget->getRegisterInfo()->getRegClass(RegClass);
|
|
}
|
|
case AMDGPU::REG_SEQUENCE: {
|
|
unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
|
|
const TargetRegisterClass *SuperRC =
|
|
Subtarget->getRegisterInfo()->getRegClass(RCID);
|
|
|
|
SDValue SubRegOp = N->getOperand(OpNo + 1);
|
|
unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
|
|
return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
|
|
SubRegIdx);
|
|
}
|
|
}
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectADDRParam(
|
|
SDValue Addr, SDValue& R1, SDValue& R2) {
|
|
|
|
if (Addr.getOpcode() == ISD::FrameIndex) {
|
|
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
|
|
R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
|
|
R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
|
|
} else {
|
|
R1 = Addr;
|
|
R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
|
|
}
|
|
} else if (Addr.getOpcode() == ISD::ADD) {
|
|
R1 = Addr.getOperand(0);
|
|
R2 = Addr.getOperand(1);
|
|
} else {
|
|
R1 = Addr;
|
|
R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
|
|
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
|
|
Addr.getOpcode() == ISD::TargetGlobalAddress) {
|
|
return false;
|
|
}
|
|
return SelectADDRParam(Addr, R1, R2);
|
|
}
|
|
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
|
|
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
|
|
Addr.getOpcode() == ISD::TargetGlobalAddress) {
|
|
return false;
|
|
}
|
|
|
|
if (Addr.getOpcode() == ISD::FrameIndex) {
|
|
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
|
|
R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
|
|
R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
|
|
} else {
|
|
R1 = Addr;
|
|
R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
|
|
}
|
|
} else if (Addr.getOpcode() == ISD::ADD) {
|
|
R1 = Addr.getOperand(0);
|
|
R2 = Addr.getOperand(1);
|
|
} else {
|
|
R1 = Addr;
|
|
R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
|
unsigned int Opc = N->getOpcode();
|
|
if (N->isMachineOpcode()) {
|
|
N->setNodeId(-1);
|
|
return nullptr; // Already selected.
|
|
}
|
|
|
|
switch (Opc) {
|
|
default: break;
|
|
// We are selecting i64 ADD here instead of custom lower it during
|
|
// DAG legalization, so we can fold some i64 ADDs used for address
|
|
// calculation into the LOAD and STORE instructions.
|
|
case ISD::ADD:
|
|
case ISD::SUB: {
|
|
if (N->getValueType(0) != MVT::i64 ||
|
|
Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
|
|
break;
|
|
|
|
return SelectADD_SUB_I64(N);
|
|
}
|
|
case ISD::SCALAR_TO_VECTOR:
|
|
case AMDGPUISD::BUILD_VERTICAL_VECTOR:
|
|
case ISD::BUILD_VECTOR: {
|
|
unsigned RegClassID;
|
|
const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
|
|
EVT VT = N->getValueType(0);
|
|
unsigned NumVectorElts = VT.getVectorNumElements();
|
|
EVT EltVT = VT.getVectorElementType();
|
|
assert(EltVT.bitsEq(MVT::i32));
|
|
if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
|
|
bool UseVReg = true;
|
|
for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
|
|
U != E; ++U) {
|
|
if (!U->isMachineOpcode()) {
|
|
continue;
|
|
}
|
|
const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
|
|
if (!RC) {
|
|
continue;
|
|
}
|
|
if (static_cast<const SIRegisterInfo *>(TRI)->isSGPRClass(RC)) {
|
|
UseVReg = false;
|
|
}
|
|
}
|
|
switch(NumVectorElts) {
|
|
case 1: RegClassID = UseVReg ? AMDGPU::VGPR_32RegClassID :
|
|
AMDGPU::SReg_32RegClassID;
|
|
break;
|
|
case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID :
|
|
AMDGPU::SReg_64RegClassID;
|
|
break;
|
|
case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID :
|
|
AMDGPU::SReg_128RegClassID;
|
|
break;
|
|
case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID :
|
|
AMDGPU::SReg_256RegClassID;
|
|
break;
|
|
case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID :
|
|
AMDGPU::SReg_512RegClassID;
|
|
break;
|
|
default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
|
|
}
|
|
} else {
|
|
// BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
|
|
// that adds a 128 bits reg copy when going through TwoAddressInstructions
|
|
// pass. We want to avoid 128 bits copies as much as possible because they
|
|
// can't be bundled by our scheduler.
|
|
switch(NumVectorElts) {
|
|
case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
|
|
case 4:
|
|
if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
|
|
RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
|
|
else
|
|
RegClassID = AMDGPU::R600_Reg128RegClassID;
|
|
break;
|
|
default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
|
|
}
|
|
}
|
|
|
|
SDLoc DL(N);
|
|
SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
|
|
|
|
if (NumVectorElts == 1) {
|
|
return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT,
|
|
N->getOperand(0), RegClass);
|
|
}
|
|
|
|
assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
|
|
"supported yet");
|
|
// 16 = Max Num Vector Elements
|
|
// 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
|
|
// 1 = Vector Register Class
|
|
SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
|
|
|
|
RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
|
|
bool IsRegSeq = true;
|
|
unsigned NOps = N->getNumOperands();
|
|
for (unsigned i = 0; i < NOps; i++) {
|
|
// XXX: Why is this here?
|
|
if (isa<RegisterSDNode>(N->getOperand(i))) {
|
|
IsRegSeq = false;
|
|
break;
|
|
}
|
|
RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
|
|
RegSeqArgs[1 + (2 * i) + 1] =
|
|
CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
|
|
MVT::i32);
|
|
}
|
|
|
|
if (NOps != NumVectorElts) {
|
|
// Fill in the missing undef elements if this was a scalar_to_vector.
|
|
assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
|
|
|
|
MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
|
|
DL, EltVT);
|
|
for (unsigned i = NOps; i < NumVectorElts; ++i) {
|
|
RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
|
|
RegSeqArgs[1 + (2 * i) + 1] =
|
|
CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
|
|
}
|
|
}
|
|
|
|
if (!IsRegSeq)
|
|
break;
|
|
return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
|
|
RegSeqArgs);
|
|
}
|
|
case ISD::BUILD_PAIR: {
|
|
SDValue RC, SubReg0, SubReg1;
|
|
if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
|
|
break;
|
|
}
|
|
SDLoc DL(N);
|
|
if (N->getValueType(0) == MVT::i128) {
|
|
RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
|
|
SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
|
|
SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
|
|
} else if (N->getValueType(0) == MVT::i64) {
|
|
RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
|
|
SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
|
|
SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
|
|
} else {
|
|
llvm_unreachable("Unhandled value type for BUILD_PAIR");
|
|
}
|
|
const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
|
|
N->getOperand(1), SubReg1 };
|
|
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
|
|
DL, N->getValueType(0), Ops);
|
|
}
|
|
|
|
case ISD::Constant:
|
|
case ISD::ConstantFP: {
|
|
if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
|
|
N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
|
|
break;
|
|
|
|
uint64_t Imm;
|
|
if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
|
|
Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
|
|
else {
|
|
ConstantSDNode *C = cast<ConstantSDNode>(N);
|
|
Imm = C->getZExtValue();
|
|
}
|
|
|
|
SDLoc DL(N);
|
|
SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
|
|
CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
|
|
MVT::i32));
|
|
SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
|
|
CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
|
|
const SDValue Ops[] = {
|
|
CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
|
|
SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
|
|
SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
|
|
};
|
|
|
|
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
|
|
N->getValueType(0), Ops);
|
|
}
|
|
|
|
case ISD::LOAD: {
|
|
// To simplify the TableGen patters, we replace all i64 loads with
|
|
// v2i32 loads. Alternatively, we could promote i64 loads to v2i32
|
|
// during DAG legalization, however, so places (ExpandUnalignedLoad)
|
|
// in the DAG legalizer assume that if i64 is legal, so doing this
|
|
// promotion early can cause problems.
|
|
EVT VT = N->getValueType(0);
|
|
LoadSDNode *LD = cast<LoadSDNode>(N);
|
|
if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD)
|
|
break;
|
|
|
|
SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SDLoc(N), LD->getChain(),
|
|
LD->getBasePtr(), LD->getMemOperand());
|
|
SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
|
|
MVT::i64, NewLoad);
|
|
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLoad.getValue(1));
|
|
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), BitCast);
|
|
SelectCode(NewLoad.getNode());
|
|
N = BitCast.getNode();
|
|
break;
|
|
}
|
|
|
|
case ISD::STORE: {
|
|
// Handle i64 stores here for the same reason mentioned above for loads.
|
|
StoreSDNode *ST = cast<StoreSDNode>(N);
|
|
SDValue Value = ST->getValue();
|
|
if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore())
|
|
break;
|
|
|
|
SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
|
|
MVT::v2i32, Value);
|
|
SDValue NewStore = CurDAG->getStore(ST->getChain(), SDLoc(N), NewValue,
|
|
ST->getBasePtr(), ST->getMemOperand());
|
|
|
|
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewStore);
|
|
|
|
if (NewValue.getOpcode() == ISD::BITCAST) {
|
|
Select(NewStore.getNode());
|
|
return SelectCode(NewValue.getNode());
|
|
}
|
|
|
|
// getNode() may fold the bitcast if its input was another bitcast. If that
|
|
// happens we should only select the new store.
|
|
N = NewStore.getNode();
|
|
break;
|
|
}
|
|
|
|
case AMDGPUISD::REGISTER_LOAD: {
|
|
if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
|
|
break;
|
|
SDValue Addr, Offset;
|
|
|
|
SDLoc DL(N);
|
|
SelectADDRIndirect(N->getOperand(1), Addr, Offset);
|
|
const SDValue Ops[] = {
|
|
Addr,
|
|
Offset,
|
|
CurDAG->getTargetConstant(0, DL, MVT::i32),
|
|
N->getOperand(0),
|
|
};
|
|
return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, DL,
|
|
CurDAG->getVTList(MVT::i32, MVT::i64,
|
|
MVT::Other),
|
|
Ops);
|
|
}
|
|
case AMDGPUISD::REGISTER_STORE: {
|
|
if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
|
|
break;
|
|
SDValue Addr, Offset;
|
|
SelectADDRIndirect(N->getOperand(2), Addr, Offset);
|
|
SDLoc DL(N);
|
|
const SDValue Ops[] = {
|
|
N->getOperand(1),
|
|
Addr,
|
|
Offset,
|
|
CurDAG->getTargetConstant(0, DL, MVT::i32),
|
|
N->getOperand(0),
|
|
};
|
|
return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, DL,
|
|
CurDAG->getVTList(MVT::Other),
|
|
Ops);
|
|
}
|
|
|
|
case AMDGPUISD::BFE_I32:
|
|
case AMDGPUISD::BFE_U32: {
|
|
if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
|
|
break;
|
|
|
|
// There is a scalar version available, but unlike the vector version which
|
|
// has a separate operand for the offset and width, the scalar version packs
|
|
// the width and offset into a single operand. Try to move to the scalar
|
|
// version if the offsets are constant, so that we can try to keep extended
|
|
// loads of kernel arguments in SGPRs.
|
|
|
|
// TODO: Technically we could try to pattern match scalar bitshifts of
|
|
// dynamic values, but it's probably not useful.
|
|
ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
|
|
if (!Offset)
|
|
break;
|
|
|
|
ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
|
|
if (!Width)
|
|
break;
|
|
|
|
bool Signed = Opc == AMDGPUISD::BFE_I32;
|
|
|
|
uint32_t OffsetVal = Offset->getZExtValue();
|
|
uint32_t WidthVal = Width->getZExtValue();
|
|
|
|
return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N),
|
|
N->getOperand(0), OffsetVal, WidthVal);
|
|
|
|
}
|
|
case AMDGPUISD::DIV_SCALE: {
|
|
return SelectDIV_SCALE(N);
|
|
}
|
|
case ISD::CopyToReg: {
|
|
const SITargetLowering& Lowering =
|
|
*static_cast<const SITargetLowering*>(getTargetLowering());
|
|
Lowering.legalizeTargetIndependentNode(N, *CurDAG);
|
|
break;
|
|
}
|
|
case ISD::ADDRSPACECAST:
|
|
return SelectAddrSpaceCast(N);
|
|
case ISD::AND:
|
|
case ISD::SRL:
|
|
case ISD::SRA:
|
|
if (N->getValueType(0) != MVT::i32 ||
|
|
Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
|
|
break;
|
|
|
|
return SelectS_BFE(N);
|
|
}
|
|
|
|
return SelectCode(N);
|
|
}
|
|
|
|
|
|
bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) {
|
|
assert(AS != 0 && "Use checkPrivateAddress instead.");
|
|
if (!Ptr)
|
|
return false;
|
|
|
|
return Ptr->getType()->getPointerAddressSpace() == AS;
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) {
|
|
if (Op->getPseudoValue())
|
|
return true;
|
|
|
|
if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType()))
|
|
return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
|
|
|
|
return false;
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
|
|
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
|
|
const Value *MemVal = N->getMemOperand()->getValue();
|
|
return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
|
|
!checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
|
|
!checkType(MemVal, AMDGPUAS::REGION_ADDRESS));
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
|
|
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::isFlatStore(const StoreSDNode *N) {
|
|
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
|
|
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const {
|
|
const Value *MemVal = N->getMemOperand()->getValue();
|
|
if (CbId == -1)
|
|
return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS);
|
|
|
|
return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {
|
|
if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
|
|
if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
|
|
N->getMemoryVT().bitsLT(MVT::i32))
|
|
return true;
|
|
|
|
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const {
|
|
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS);
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const {
|
|
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::isFlatLoad(const LoadSDNode *N) const {
|
|
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const {
|
|
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const {
|
|
MachineMemOperand *MMO = N->getMemOperand();
|
|
if (checkPrivateAddress(N->getMemOperand())) {
|
|
if (MMO) {
|
|
const PseudoSourceValue *PSV = MMO->getPseudoValue();
|
|
if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
|
|
if (checkPrivateAddress(N->getMemOperand())) {
|
|
// Check to make sure we are not a constant pool load or a constant load
|
|
// that is marked as a private load
|
|
if (isCPLoad(N) || isConstantLoad(N, -1)) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
const Value *MemVal = N->getMemOperand()->getValue();
|
|
if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
|
|
!checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
|
|
!checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) &&
|
|
!checkType(MemVal, AMDGPUAS::REGION_ADDRESS) &&
|
|
!checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) &&
|
|
!checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) &&
|
|
!checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
const char *AMDGPUDAGToDAGISel::getPassName() const {
|
|
return "AMDGPU DAG->DAG Pattern Instruction Selection";
|
|
}
|
|
|
|
#ifdef DEBUGTMP
|
|
#undef INT64_C
|
|
#endif
|
|
#undef DEBUGTMP
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Complex Patterns
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
|
|
SDValue& IntPtr) {
|
|
if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
|
|
IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
|
|
true);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
|
|
SDValue& BaseReg, SDValue &Offset) {
|
|
if (!isa<ConstantSDNode>(Addr)) {
|
|
BaseReg = Addr;
|
|
Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
|
|
SDValue &Offset) {
|
|
ConstantSDNode *IMMOffset;
|
|
|
|
if (Addr.getOpcode() == ISD::ADD
|
|
&& (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
|
|
&& isInt<16>(IMMOffset->getZExtValue())) {
|
|
|
|
Base = Addr.getOperand(0);
|
|
Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
|
|
MVT::i32);
|
|
return true;
|
|
// If the pointer address is constant, we can move it to the offset field.
|
|
} else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
|
|
&& isInt<16>(IMMOffset->getZExtValue())) {
|
|
Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
|
|
SDLoc(CurDAG->getEntryNode()),
|
|
AMDGPU::ZERO, MVT::i32);
|
|
Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
|
|
MVT::i32);
|
|
return true;
|
|
}
|
|
|
|
// Default case, no offset
|
|
Base = Addr;
|
|
Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
|
|
return true;
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
|
|
SDValue &Offset) {
|
|
ConstantSDNode *C;
|
|
SDLoc DL(Addr);
|
|
|
|
if ((C = dyn_cast<ConstantSDNode>(Addr))) {
|
|
Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
|
|
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
|
|
} else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
|
|
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
|
|
Base = Addr.getOperand(0);
|
|
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
|
|
} else {
|
|
Base = Addr;
|
|
Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
|
|
SDLoc DL(N);
|
|
SDValue LHS = N->getOperand(0);
|
|
SDValue RHS = N->getOperand(1);
|
|
|
|
bool IsAdd = (N->getOpcode() == ISD::ADD);
|
|
|
|
SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
|
|
SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
|
|
|
|
SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
|
|
DL, MVT::i32, LHS, Sub0);
|
|
SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
|
|
DL, MVT::i32, LHS, Sub1);
|
|
|
|
SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
|
|
DL, MVT::i32, RHS, Sub0);
|
|
SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
|
|
DL, MVT::i32, RHS, Sub1);
|
|
|
|
SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
|
|
SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
|
|
|
|
|
|
unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
|
|
unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
|
|
|
|
SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
|
|
SDValue Carry(AddLo, 1);
|
|
SDNode *AddHi
|
|
= CurDAG->getMachineNode(CarryOpc, DL, MVT::i32,
|
|
SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
|
|
|
|
SDValue Args[5] = {
|
|
CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
|
|
SDValue(AddLo,0),
|
|
Sub0,
|
|
SDValue(AddHi,0),
|
|
Sub1,
|
|
};
|
|
return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
|
|
}
|
|
|
|
// We need to handle this here because tablegen doesn't support matching
|
|
// instructions with multiple outputs.
|
|
SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
|
|
SDLoc SL(N);
|
|
EVT VT = N->getValueType(0);
|
|
|
|
assert(VT == MVT::f32 || VT == MVT::f64);
|
|
|
|
unsigned Opc
|
|
= (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
|
|
|
|
// src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
|
|
SDValue Ops[8];
|
|
|
|
SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
|
|
SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
|
|
SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
|
|
return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
|
|
unsigned OffsetBits) const {
|
|
if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
|
|
(OffsetBits == 8 && !isUInt<8>(Offset)))
|
|
return false;
|
|
|
|
if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS)
|
|
return true;
|
|
|
|
// On Southern Islands instruction with a negative base value and an offset
|
|
// don't seem to work.
|
|
return CurDAG->SignBitIsZero(Base);
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
|
|
SDValue &Offset) const {
|
|
if (CurDAG->isBaseWithConstantOffset(Addr)) {
|
|
SDValue N0 = Addr.getOperand(0);
|
|
SDValue N1 = Addr.getOperand(1);
|
|
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
|
|
if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
|
|
// (add n0, c0)
|
|
Base = N0;
|
|
Offset = N1;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
SDLoc DL(Addr);
|
|
|
|
// If we have a constant address, prefer to put the constant into the
|
|
// offset. This can save moves to load the constant address since multiple
|
|
// operations can share the zero base address register, and enables merging
|
|
// into read2 / write2 instructions.
|
|
if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
|
|
if (isUInt<16>(CAddr->getZExtValue())) {
|
|
SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
|
|
MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
|
|
DL, MVT::i32, Zero);
|
|
Base = SDValue(MovZero, 0);
|
|
Offset = Addr;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// default case
|
|
Base = Addr;
|
|
Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
|
|
return true;
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
|
|
SDValue &Offset0,
|
|
SDValue &Offset1) const {
|
|
SDLoc DL(Addr);
|
|
|
|
if (CurDAG->isBaseWithConstantOffset(Addr)) {
|
|
SDValue N0 = Addr.getOperand(0);
|
|
SDValue N1 = Addr.getOperand(1);
|
|
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
|
|
unsigned DWordOffset0 = C1->getZExtValue() / 4;
|
|
unsigned DWordOffset1 = DWordOffset0 + 1;
|
|
// (add n0, c0)
|
|
if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
|
|
Base = N0;
|
|
Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
|
|
Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
|
|
unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
|
|
unsigned DWordOffset1 = DWordOffset0 + 1;
|
|
assert(4 * DWordOffset0 == CAddr->getZExtValue());
|
|
|
|
if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
|
|
SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
|
|
MachineSDNode *MovZero
|
|
= CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
|
|
DL, MVT::i32, Zero);
|
|
Base = SDValue(MovZero, 0);
|
|
Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
|
|
Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// default case
|
|
Base = Addr;
|
|
Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
|
|
Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
|
|
return true;
|
|
}
|
|
|
|
static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
|
|
return isUInt<12>(Imm->getZExtValue());
|
|
}
|
|
|
|
void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
|
|
SDValue &VAddr, SDValue &SOffset,
|
|
SDValue &Offset, SDValue &Offen,
|
|
SDValue &Idxen, SDValue &Addr64,
|
|
SDValue &GLC, SDValue &SLC,
|
|
SDValue &TFE) const {
|
|
SDLoc DL(Addr);
|
|
|
|
GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
|
SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
|
TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
|
|
|
Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
|
Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
|
Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
|
SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
|
|
|
|
if (CurDAG->isBaseWithConstantOffset(Addr)) {
|
|
SDValue N0 = Addr.getOperand(0);
|
|
SDValue N1 = Addr.getOperand(1);
|
|
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
|
|
|
|
if (N0.getOpcode() == ISD::ADD) {
|
|
// (add (add N2, N3), C1) -> addr64
|
|
SDValue N2 = N0.getOperand(0);
|
|
SDValue N3 = N0.getOperand(1);
|
|
Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
|
|
Ptr = N2;
|
|
VAddr = N3;
|
|
} else {
|
|
|
|
// (add N0, C1) -> offset
|
|
VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
|
|
Ptr = N0;
|
|
}
|
|
|
|
if (isLegalMUBUFImmOffset(C1)) {
|
|
Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
|
|
return;
|
|
} else if (isUInt<32>(C1->getZExtValue())) {
|
|
// Illegal offset, store it in soffset.
|
|
Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
|
|
SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
|
|
CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
|
|
0);
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (Addr.getOpcode() == ISD::ADD) {
|
|
// (add N0, N1) -> addr64
|
|
SDValue N0 = Addr.getOperand(0);
|
|
SDValue N1 = Addr.getOperand(1);
|
|
Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
|
|
Ptr = N0;
|
|
VAddr = N1;
|
|
Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
|
|
return;
|
|
}
|
|
|
|
// default case -> offset
|
|
VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
|
|
Ptr = Addr;
|
|
Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
|
|
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
|
|
SDValue &VAddr, SDValue &SOffset,
|
|
SDValue &Offset, SDValue &GLC,
|
|
SDValue &SLC, SDValue &TFE) const {
|
|
SDValue Ptr, Offen, Idxen, Addr64;
|
|
|
|
SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
|
|
GLC, SLC, TFE);
|
|
|
|
ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
|
|
if (C->getSExtValue()) {
|
|
SDLoc DL(Addr);
|
|
|
|
const SITargetLowering& Lowering =
|
|
*static_cast<const SITargetLowering*>(getTargetLowering());
|
|
|
|
SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
|
|
SDValue &VAddr, SDValue &SOffset,
|
|
SDValue &Offset,
|
|
SDValue &SLC) const {
|
|
SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
|
|
SDValue GLC, TFE;
|
|
|
|
return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
|
|
SDValue &VAddr, SDValue &SOffset,
|
|
SDValue &ImmOffset) const {
|
|
|
|
SDLoc DL(Addr);
|
|
MachineFunction &MF = CurDAG->getMachineFunction();
|
|
const SIRegisterInfo *TRI =
|
|
static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
const SITargetLowering& Lowering =
|
|
*static_cast<const SITargetLowering*>(getTargetLowering());
|
|
|
|
unsigned ScratchOffsetReg =
|
|
TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
|
|
Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass,
|
|
ScratchOffsetReg, MVT::i32);
|
|
SDValue Sym0 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD0", MVT::i32);
|
|
SDValue ScratchRsrcDword0 =
|
|
SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym0), 0);
|
|
|
|
SDValue Sym1 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD1", MVT::i32);
|
|
SDValue ScratchRsrcDword1 =
|
|
SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0);
|
|
|
|
const SDValue RsrcOps[] = {
|
|
CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
|
|
ScratchRsrcDword0,
|
|
CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
|
|
ScratchRsrcDword1,
|
|
CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32),
|
|
};
|
|
SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
|
|
MVT::v2i32, RsrcOps), 0);
|
|
Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0);
|
|
SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
|
|
MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32);
|
|
|
|
// (add n0, c1)
|
|
if (CurDAG->isBaseWithConstantOffset(Addr)) {
|
|
SDValue N1 = Addr.getOperand(1);
|
|
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
|
|
|
|
if (isLegalMUBUFImmOffset(C1)) {
|
|
VAddr = Addr.getOperand(0);
|
|
ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// (node)
|
|
VAddr = Addr;
|
|
ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
|
|
return true;
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
|
|
SDValue &SOffset, SDValue &Offset,
|
|
SDValue &GLC, SDValue &SLC,
|
|
SDValue &TFE) const {
|
|
SDValue Ptr, VAddr, Offen, Idxen, Addr64;
|
|
const SIInstrInfo *TII =
|
|
static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
|
|
|
|
SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
|
|
GLC, SLC, TFE);
|
|
|
|
if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
|
|
!cast<ConstantSDNode>(Idxen)->getSExtValue() &&
|
|
!cast<ConstantSDNode>(Addr64)->getSExtValue()) {
|
|
uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
|
|
APInt::getAllOnesValue(32).getZExtValue(); // Size
|
|
SDLoc DL(Addr);
|
|
|
|
const SITargetLowering& Lowering =
|
|
*static_cast<const SITargetLowering*>(getTargetLowering());
|
|
|
|
SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
|
|
SDValue &Soffset, SDValue &Offset,
|
|
SDValue &GLC) const {
|
|
SDValue SLC, TFE;
|
|
|
|
return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
|
|
}
|
|
|
|
// FIXME: This is incorrect and only enough to be able to compile.
|
|
SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
|
|
AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N);
|
|
SDLoc DL(N);
|
|
|
|
assert(Subtarget->hasFlatAddressSpace() &&
|
|
"addrspacecast only supported with flat address space!");
|
|
|
|
assert((ASC->getSrcAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
|
|
ASC->getDestAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) &&
|
|
"Cannot cast address space to / from constant address!");
|
|
|
|
assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
|
|
ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) &&
|
|
"Can only cast to / from flat address space!");
|
|
|
|
// The flat instructions read the address as the index of the VGPR holding the
|
|
// address, so casting should just be reinterpreting the base VGPR, so just
|
|
// insert trunc / bitcast / zext.
|
|
|
|
SDValue Src = ASC->getOperand(0);
|
|
EVT DestVT = ASC->getValueType(0);
|
|
EVT SrcVT = Src.getValueType();
|
|
|
|
unsigned SrcSize = SrcVT.getSizeInBits();
|
|
unsigned DestSize = DestVT.getSizeInBits();
|
|
|
|
if (SrcSize > DestSize) {
|
|
assert(SrcSize == 64 && DestSize == 32);
|
|
return CurDAG->getMachineNode(
|
|
TargetOpcode::EXTRACT_SUBREG,
|
|
DL,
|
|
DestVT,
|
|
Src,
|
|
CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32));
|
|
}
|
|
|
|
|
|
if (DestSize > SrcSize) {
|
|
assert(SrcSize == 32 && DestSize == 64);
|
|
|
|
// FIXME: This is probably wrong, we should never be defining
|
|
// a register class with both VGPRs and SGPRs
|
|
SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, DL,
|
|
MVT::i32);
|
|
|
|
const SDValue Ops[] = {
|
|
RC,
|
|
Src,
|
|
CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
|
|
SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
|
|
CurDAG->getConstant(0, DL, MVT::i32)), 0),
|
|
CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
|
|
};
|
|
|
|
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
|
|
DL, N->getValueType(0), Ops);
|
|
}
|
|
|
|
assert(SrcSize == 64 && DestSize == 64);
|
|
return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode();
|
|
}
|
|
|
|
SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
|
|
uint32_t Offset, uint32_t Width) {
|
|
// Transformation function, pack the offset and width of a BFE into
|
|
// the format expected by the S_BFE_I32 / S_BFE_U32. In the second
|
|
// source, bits [5:0] contain the offset and bits [22:16] the width.
|
|
uint32_t PackedVal = Offset | (Width << 16);
|
|
SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
|
|
|
|
return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
|
|
}
|
|
|
|
SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
|
|
// "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
|
|
// "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
|
|
// Predicate: 0 < b <= c < 32
|
|
|
|
const SDValue &Shl = N->getOperand(0);
|
|
ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
|
|
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
|
|
|
|
if (B && C) {
|
|
uint32_t BVal = B->getZExtValue();
|
|
uint32_t CVal = C->getZExtValue();
|
|
|
|
if (0 < BVal && BVal <= CVal && CVal < 32) {
|
|
bool Signed = N->getOpcode() == ISD::SRA;
|
|
unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
|
|
|
|
return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0),
|
|
CVal - BVal, 32 - CVal);
|
|
}
|
|
}
|
|
return SelectCode(N);
|
|
}
|
|
|
|
SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
|
|
switch (N->getOpcode()) {
|
|
case ISD::AND:
|
|
if (N->getOperand(0).getOpcode() == ISD::SRL) {
|
|
// "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
|
|
// Predicate: isMask(mask)
|
|
const SDValue &Srl = N->getOperand(0);
|
|
ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
|
|
ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
|
|
|
|
if (Shift && Mask) {
|
|
uint32_t ShiftVal = Shift->getZExtValue();
|
|
uint32_t MaskVal = Mask->getZExtValue();
|
|
|
|
if (isMask_32(MaskVal)) {
|
|
uint32_t WidthVal = countPopulation(MaskVal);
|
|
|
|
return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0),
|
|
ShiftVal, WidthVal);
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case ISD::SRL:
|
|
if (N->getOperand(0).getOpcode() == ISD::AND) {
|
|
// "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
|
|
// Predicate: isMask(mask >> b)
|
|
const SDValue &And = N->getOperand(0);
|
|
ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
|
|
ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
|
|
|
|
if (Shift && Mask) {
|
|
uint32_t ShiftVal = Shift->getZExtValue();
|
|
uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
|
|
|
|
if (isMask_32(MaskVal)) {
|
|
uint32_t WidthVal = countPopulation(MaskVal);
|
|
|
|
return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0),
|
|
ShiftVal, WidthVal);
|
|
}
|
|
}
|
|
} else if (N->getOperand(0).getOpcode() == ISD::SHL)
|
|
return SelectS_BFEFromShifts(N);
|
|
break;
|
|
case ISD::SRA:
|
|
if (N->getOperand(0).getOpcode() == ISD::SHL)
|
|
return SelectS_BFEFromShifts(N);
|
|
break;
|
|
}
|
|
|
|
return SelectCode(N);
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
|
|
SDValue &SrcMods) const {
|
|
|
|
unsigned Mods = 0;
|
|
|
|
Src = In;
|
|
|
|
if (Src.getOpcode() == ISD::FNEG) {
|
|
Mods |= SISrcMods::NEG;
|
|
Src = Src.getOperand(0);
|
|
}
|
|
|
|
if (Src.getOpcode() == ISD::FABS) {
|
|
Mods |= SISrcMods::ABS;
|
|
Src = Src.getOperand(0);
|
|
}
|
|
|
|
SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
|
|
SDValue &SrcMods, SDValue &Clamp,
|
|
SDValue &Omod) const {
|
|
SDLoc DL(In);
|
|
// FIXME: Handle Clamp and Omod
|
|
Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
|
|
Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
|
|
|
|
return SelectVOP3Mods(In, Src, SrcMods);
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
|
|
SDValue &SrcMods,
|
|
SDValue &Omod) const {
|
|
// FIXME: Handle Omod
|
|
Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
|
|
|
|
return SelectVOP3Mods(In, Src, SrcMods);
|
|
}
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
|
|
SDValue &SrcMods,
|
|
SDValue &Clamp,
|
|
SDValue &Omod) const {
|
|
Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
|
|
return SelectVOP3Mods(In, Src, SrcMods);
|
|
}
|
|
|
|
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
|
|
const AMDGPUTargetLowering& Lowering =
|
|
*static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
|
|
bool IsModified = false;
|
|
do {
|
|
IsModified = false;
|
|
// Go over all selected nodes and try to fold them a bit more
|
|
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
|
|
E = CurDAG->allnodes_end(); I != E; ++I) {
|
|
|
|
SDNode *Node = I;
|
|
|
|
MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
|
|
if (!MachineNode)
|
|
continue;
|
|
|
|
SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
|
|
if (ResNode != Node) {
|
|
ReplaceUses(Node, ResNode);
|
|
IsModified = true;
|
|
}
|
|
}
|
|
CurDAG->RemoveDeadNodes();
|
|
} while (IsModified);
|
|
}
|