mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-09 05:31:37 +00:00
0b8c9a80f2
into their new header subdirectory: include/llvm/IR. This matches the directory structure of lib, and begins to correct a long standing point of file layout clutter in LLVM. There are still more header files to move here, but I wanted to handle them in separate commits to make tracking what files make sense at each layer easier. The only really questionable files here are the target intrinsic tablegen files. But that's a battle I'd rather not fight today. I've updated both CMake and Makefile build systems (I think, and my tests think, but I may have missed something). I've also re-sorted the includes throughout the project. I'll be committing updates to Clang, DragonEgg, and Polly momentarily. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171366 91177308-0d34-0410-b5e6-96231b3b80d8
652 lines
20 KiB
C++
652 lines
20 KiB
C++
//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//==-----------------------------------------------------------------------===//
|
|
//
|
|
/// \file
|
|
/// \brief TargetLowering functions borrowed from AMDIL.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "AMDGPUISelLowering.h"
|
|
#include "AMDGPURegisterInfo.h"
|
|
#include "AMDGPUSubtarget.h"
|
|
#include "AMDILDevices.h"
|
|
#include "AMDILIntrinsicInfo.h"
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
#include "llvm/CodeGen/PseudoSourceValue.h"
|
|
#include "llvm/CodeGen/SelectionDAG.h"
|
|
#include "llvm/CodeGen/SelectionDAGNodes.h"
|
|
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
|
|
#include "llvm/IR/CallingConv.h"
|
|
#include "llvm/IR/DerivedTypes.h"
|
|
#include "llvm/IR/Instructions.h"
|
|
#include "llvm/IR/Intrinsics.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
#include "llvm/Target/TargetInstrInfo.h"
|
|
#include "llvm/Target/TargetOptions.h"
|
|
|
|
using namespace llvm;
|
|
//===----------------------------------------------------------------------===//
|
|
// Calling Convention Implementation
|
|
//===----------------------------------------------------------------------===//
|
|
#include "AMDGPUGenCallingConv.inc"
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// TargetLowering Implementation Help Functions End
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// TargetLowering Class Implementation Begins
|
|
//===----------------------------------------------------------------------===//
|
|
void AMDGPUTargetLowering::InitAMDILLowering() {
|
|
int types[] = {
|
|
(int)MVT::i8,
|
|
(int)MVT::i16,
|
|
(int)MVT::i32,
|
|
(int)MVT::f32,
|
|
(int)MVT::f64,
|
|
(int)MVT::i64,
|
|
(int)MVT::v2i8,
|
|
(int)MVT::v4i8,
|
|
(int)MVT::v2i16,
|
|
(int)MVT::v4i16,
|
|
(int)MVT::v4f32,
|
|
(int)MVT::v4i32,
|
|
(int)MVT::v2f32,
|
|
(int)MVT::v2i32,
|
|
(int)MVT::v2f64,
|
|
(int)MVT::v2i64
|
|
};
|
|
|
|
int IntTypes[] = {
|
|
(int)MVT::i8,
|
|
(int)MVT::i16,
|
|
(int)MVT::i32,
|
|
(int)MVT::i64
|
|
};
|
|
|
|
int FloatTypes[] = {
|
|
(int)MVT::f32,
|
|
(int)MVT::f64
|
|
};
|
|
|
|
int VectorTypes[] = {
|
|
(int)MVT::v2i8,
|
|
(int)MVT::v4i8,
|
|
(int)MVT::v2i16,
|
|
(int)MVT::v4i16,
|
|
(int)MVT::v4f32,
|
|
(int)MVT::v4i32,
|
|
(int)MVT::v2f32,
|
|
(int)MVT::v2i32,
|
|
(int)MVT::v2f64,
|
|
(int)MVT::v2i64
|
|
};
|
|
size_t NumTypes = sizeof(types) / sizeof(*types);
|
|
size_t NumFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
|
|
size_t NumIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
|
|
size_t NumVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
|
|
|
|
const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
|
|
// These are the current register classes that are
|
|
// supported
|
|
|
|
for (unsigned int x = 0; x < NumTypes; ++x) {
|
|
MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
|
|
|
|
//FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
|
|
// We cannot sextinreg, expand to shifts
|
|
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
|
|
setOperationAction(ISD::SUBE, VT, Expand);
|
|
setOperationAction(ISD::SUBC, VT, Expand);
|
|
setOperationAction(ISD::ADDE, VT, Expand);
|
|
setOperationAction(ISD::ADDC, VT, Expand);
|
|
setOperationAction(ISD::BRCOND, VT, Custom);
|
|
setOperationAction(ISD::BR_JT, VT, Expand);
|
|
setOperationAction(ISD::BRIND, VT, Expand);
|
|
// TODO: Implement custom UREM/SREM routines
|
|
setOperationAction(ISD::SREM, VT, Expand);
|
|
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
|
|
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
|
|
if (VT != MVT::i64 && VT != MVT::v2i64) {
|
|
setOperationAction(ISD::SDIV, VT, Custom);
|
|
}
|
|
}
|
|
for (unsigned int x = 0; x < NumFloatTypes; ++x) {
|
|
MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
|
|
|
|
// IL does not have these operations for floating point types
|
|
setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
|
|
setOperationAction(ISD::SETOLT, VT, Expand);
|
|
setOperationAction(ISD::SETOGE, VT, Expand);
|
|
setOperationAction(ISD::SETOGT, VT, Expand);
|
|
setOperationAction(ISD::SETOLE, VT, Expand);
|
|
setOperationAction(ISD::SETULT, VT, Expand);
|
|
setOperationAction(ISD::SETUGE, VT, Expand);
|
|
setOperationAction(ISD::SETUGT, VT, Expand);
|
|
setOperationAction(ISD::SETULE, VT, Expand);
|
|
}
|
|
|
|
for (unsigned int x = 0; x < NumIntTypes; ++x) {
|
|
MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
|
|
|
|
// GPU also does not have divrem function for signed or unsigned
|
|
setOperationAction(ISD::SDIVREM, VT, Expand);
|
|
|
|
// GPU does not have [S|U]MUL_LOHI functions as a single instruction
|
|
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
|
|
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
|
|
|
|
// GPU doesn't have a rotl, rotr, or byteswap instruction
|
|
setOperationAction(ISD::ROTR, VT, Expand);
|
|
setOperationAction(ISD::BSWAP, VT, Expand);
|
|
|
|
// GPU doesn't have any counting operators
|
|
setOperationAction(ISD::CTPOP, VT, Expand);
|
|
setOperationAction(ISD::CTTZ, VT, Expand);
|
|
setOperationAction(ISD::CTLZ, VT, Expand);
|
|
}
|
|
|
|
for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) {
|
|
MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
|
|
|
|
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
|
|
setOperationAction(ISD::SDIVREM, VT, Expand);
|
|
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
|
|
// setOperationAction(ISD::VSETCC, VT, Expand);
|
|
setOperationAction(ISD::SELECT_CC, VT, Expand);
|
|
|
|
}
|
|
if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
|
|
setOperationAction(ISD::MULHU, MVT::i64, Expand);
|
|
setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
|
|
setOperationAction(ISD::MULHS, MVT::i64, Expand);
|
|
setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
|
|
setOperationAction(ISD::ADD, MVT::v2i64, Expand);
|
|
setOperationAction(ISD::SREM, MVT::v2i64, Expand);
|
|
setOperationAction(ISD::Constant , MVT::i64 , Legal);
|
|
setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
|
|
setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
|
|
setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
|
|
setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
|
|
setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
|
|
}
|
|
if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
|
|
// we support loading/storing v2f64 but not operations on the type
|
|
setOperationAction(ISD::FADD, MVT::v2f64, Expand);
|
|
setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
|
|
setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
|
|
setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
|
|
setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
|
|
setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
|
|
// We want to expand vector conversions into their scalar
|
|
// counterparts.
|
|
setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
|
|
setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
|
|
setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
|
|
setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
|
|
setOperationAction(ISD::FABS, MVT::f64, Expand);
|
|
setOperationAction(ISD::FABS, MVT::v2f64, Expand);
|
|
}
|
|
// TODO: Fix the UDIV24 algorithm so it works for these
|
|
// types correctly. This needs vector comparisons
|
|
// for this to work correctly.
|
|
setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
|
|
setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
|
|
setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
|
|
setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
|
|
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
|
|
setOperationAction(ISD::SUBC, MVT::Other, Expand);
|
|
setOperationAction(ISD::ADDE, MVT::Other, Expand);
|
|
setOperationAction(ISD::ADDC, MVT::Other, Expand);
|
|
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
|
|
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
|
|
setOperationAction(ISD::BRIND, MVT::Other, Expand);
|
|
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
|
|
|
|
|
|
// Use the default implementation.
|
|
setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
|
|
setOperationAction(ISD::Constant , MVT::i32 , Legal);
|
|
|
|
setSchedulingPreference(Sched::RegPressure);
|
|
setPow2DivIsCheap(false);
|
|
setSelectIsExpensive(true);
|
|
setJumpIsExpensive(true);
|
|
|
|
maxStoresPerMemcpy = 4096;
|
|
maxStoresPerMemmove = 4096;
|
|
maxStoresPerMemset = 4096;
|
|
|
|
}
|
|
|
|
bool
|
|
AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
|
const CallInst &I, unsigned Intrinsic) const {
|
|
return false;
|
|
}
|
|
|
|
// The backend supports 32 and 64 bit floating point immediates
|
|
bool
|
|
AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
|
|
if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
|
|
|| VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
|
|
return true;
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool
|
|
AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
|
|
if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
|
|
|| VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
|
|
return false;
|
|
} else {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
|
|
// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
|
|
// be zero. Op is expected to be a target specific node. Used by DAG
|
|
// combiner.
|
|
|
|
void
|
|
AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
|
|
const SDValue Op,
|
|
APInt &KnownZero,
|
|
APInt &KnownOne,
|
|
const SelectionDAG &DAG,
|
|
unsigned Depth) const {
|
|
APInt KnownZero2;
|
|
APInt KnownOne2;
|
|
KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
|
|
switch (Op.getOpcode()) {
|
|
default: break;
|
|
case ISD::SELECT_CC:
|
|
DAG.ComputeMaskedBits(
|
|
Op.getOperand(1),
|
|
KnownZero,
|
|
KnownOne,
|
|
Depth + 1
|
|
);
|
|
DAG.ComputeMaskedBits(
|
|
Op.getOperand(0),
|
|
KnownZero2,
|
|
KnownOne2
|
|
);
|
|
assert((KnownZero & KnownOne) == 0
|
|
&& "Bits known to be one AND zero?");
|
|
assert((KnownZero2 & KnownOne2) == 0
|
|
&& "Bits known to be one AND zero?");
|
|
// Only known if known in both the LHS and RHS
|
|
KnownOne &= KnownOne2;
|
|
KnownZero &= KnownZero2;
|
|
break;
|
|
};
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Other Lowering Hooks
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
SDValue
|
|
AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
|
|
EVT OVT = Op.getValueType();
|
|
SDValue DST;
|
|
if (OVT.getScalarType() == MVT::i64) {
|
|
DST = LowerSDIV64(Op, DAG);
|
|
} else if (OVT.getScalarType() == MVT::i32) {
|
|
DST = LowerSDIV32(Op, DAG);
|
|
} else if (OVT.getScalarType() == MVT::i16
|
|
|| OVT.getScalarType() == MVT::i8) {
|
|
DST = LowerSDIV24(Op, DAG);
|
|
} else {
|
|
DST = SDValue(Op.getNode(), 0);
|
|
}
|
|
return DST;
|
|
}
|
|
|
|
SDValue
|
|
AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
|
|
EVT OVT = Op.getValueType();
|
|
SDValue DST;
|
|
if (OVT.getScalarType() == MVT::i64) {
|
|
DST = LowerSREM64(Op, DAG);
|
|
} else if (OVT.getScalarType() == MVT::i32) {
|
|
DST = LowerSREM32(Op, DAG);
|
|
} else if (OVT.getScalarType() == MVT::i16) {
|
|
DST = LowerSREM16(Op, DAG);
|
|
} else if (OVT.getScalarType() == MVT::i8) {
|
|
DST = LowerSREM8(Op, DAG);
|
|
} else {
|
|
DST = SDValue(Op.getNode(), 0);
|
|
}
|
|
return DST;
|
|
}
|
|
|
|
SDValue
|
|
AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const {
|
|
SDValue Data = Op.getOperand(0);
|
|
VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
|
|
DebugLoc DL = Op.getDebugLoc();
|
|
EVT DVT = Data.getValueType();
|
|
EVT BVT = BaseType->getVT();
|
|
unsigned baseBits = BVT.getScalarType().getSizeInBits();
|
|
unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
|
|
unsigned shiftBits = srcBits - baseBits;
|
|
if (srcBits < 32) {
|
|
// If the op is less than 32 bits, then it needs to extend to 32bits
|
|
// so it can properly keep the upper bits valid.
|
|
EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
|
|
Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
|
|
shiftBits = 32 - baseBits;
|
|
DVT = IVT;
|
|
}
|
|
SDValue Shift = DAG.getConstant(shiftBits, DVT);
|
|
// Shift left by 'Shift' bits.
|
|
Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
|
|
// Signed shift Right by 'Shift' bits.
|
|
Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
|
|
if (srcBits < 32) {
|
|
// Once the sign extension is done, the op needs to be converted to
|
|
// its original type.
|
|
Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
|
|
}
|
|
return Data;
|
|
}
|
|
EVT
|
|
AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {
|
|
int iSize = (size * numEle);
|
|
int vEle = (iSize >> ((size == 64) ? 6 : 5));
|
|
if (!vEle) {
|
|
vEle = 1;
|
|
}
|
|
if (size == 64) {
|
|
if (vEle == 1) {
|
|
return EVT(MVT::i64);
|
|
} else {
|
|
return EVT(MVT::getVectorVT(MVT::i64, vEle));
|
|
}
|
|
} else {
|
|
if (vEle == 1) {
|
|
return EVT(MVT::i32);
|
|
} else {
|
|
return EVT(MVT::getVectorVT(MVT::i32, vEle));
|
|
}
|
|
}
|
|
}
|
|
|
|
SDValue
|
|
AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
|
|
SDValue Chain = Op.getOperand(0);
|
|
SDValue Cond = Op.getOperand(1);
|
|
SDValue Jump = Op.getOperand(2);
|
|
SDValue Result;
|
|
Result = DAG.getNode(
|
|
AMDGPUISD::BRANCH_COND,
|
|
Op.getDebugLoc(),
|
|
Op.getValueType(),
|
|
Chain, Jump, Cond);
|
|
return Result;
|
|
}
|
|
|
|
SDValue
|
|
AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
|
|
DebugLoc DL = Op.getDebugLoc();
|
|
EVT OVT = Op.getValueType();
|
|
SDValue LHS = Op.getOperand(0);
|
|
SDValue RHS = Op.getOperand(1);
|
|
MVT INTTY;
|
|
MVT FLTTY;
|
|
if (!OVT.isVector()) {
|
|
INTTY = MVT::i32;
|
|
FLTTY = MVT::f32;
|
|
} else if (OVT.getVectorNumElements() == 2) {
|
|
INTTY = MVT::v2i32;
|
|
FLTTY = MVT::v2f32;
|
|
} else if (OVT.getVectorNumElements() == 4) {
|
|
INTTY = MVT::v4i32;
|
|
FLTTY = MVT::v4f32;
|
|
}
|
|
unsigned bitsize = OVT.getScalarType().getSizeInBits();
|
|
// char|short jq = ia ^ ib;
|
|
SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
|
|
|
|
// jq = jq >> (bitsize - 2)
|
|
jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
|
|
|
|
// jq = jq | 0x1
|
|
jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
|
|
|
|
// jq = (int)jq
|
|
jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
|
|
|
|
// int ia = (int)LHS;
|
|
SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
|
|
|
|
// int ib, (int)RHS;
|
|
SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
|
|
|
|
// float fa = (float)ia;
|
|
SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
|
|
|
|
// float fb = (float)ib;
|
|
SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
|
|
|
|
// float fq = native_divide(fa, fb);
|
|
SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
|
|
|
|
// fq = trunc(fq);
|
|
fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
|
|
|
|
// float fqneg = -fq;
|
|
SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
|
|
|
|
// float fr = mad(fqneg, fb, fa);
|
|
SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
|
|
|
|
// int iq = (int)fq;
|
|
SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
|
|
|
|
// fr = fabs(fr);
|
|
fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
|
|
|
|
// fb = fabs(fb);
|
|
fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
|
|
|
|
// int cv = fr >= fb;
|
|
SDValue cv;
|
|
if (INTTY == MVT::i32) {
|
|
cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
|
|
} else {
|
|
cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
|
|
}
|
|
// jq = (cv ? jq : 0);
|
|
jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
|
|
DAG.getConstant(0, OVT));
|
|
// dst = iq + jq;
|
|
iq = DAG.getSExtOrTrunc(iq, DL, OVT);
|
|
iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
|
|
return iq;
|
|
}
|
|
|
|
SDValue
|
|
AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
|
|
DebugLoc DL = Op.getDebugLoc();
|
|
EVT OVT = Op.getValueType();
|
|
SDValue LHS = Op.getOperand(0);
|
|
SDValue RHS = Op.getOperand(1);
|
|
// The LowerSDIV32 function generates equivalent to the following IL.
|
|
// mov r0, LHS
|
|
// mov r1, RHS
|
|
// ilt r10, r0, 0
|
|
// ilt r11, r1, 0
|
|
// iadd r0, r0, r10
|
|
// iadd r1, r1, r11
|
|
// ixor r0, r0, r10
|
|
// ixor r1, r1, r11
|
|
// udiv r0, r0, r1
|
|
// ixor r10, r10, r11
|
|
// iadd r0, r0, r10
|
|
// ixor DST, r0, r10
|
|
|
|
// mov r0, LHS
|
|
SDValue r0 = LHS;
|
|
|
|
// mov r1, RHS
|
|
SDValue r1 = RHS;
|
|
|
|
// ilt r10, r0, 0
|
|
SDValue r10 = DAG.getSelectCC(DL,
|
|
r0, DAG.getConstant(0, OVT),
|
|
DAG.getConstant(-1, MVT::i32),
|
|
DAG.getConstant(0, MVT::i32),
|
|
ISD::SETLT);
|
|
|
|
// ilt r11, r1, 0
|
|
SDValue r11 = DAG.getSelectCC(DL,
|
|
r1, DAG.getConstant(0, OVT),
|
|
DAG.getConstant(-1, MVT::i32),
|
|
DAG.getConstant(0, MVT::i32),
|
|
ISD::SETLT);
|
|
|
|
// iadd r0, r0, r10
|
|
r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
|
|
|
|
// iadd r1, r1, r11
|
|
r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
|
|
|
|
// ixor r0, r0, r10
|
|
r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
|
|
|
|
// ixor r1, r1, r11
|
|
r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
|
|
|
|
// udiv r0, r0, r1
|
|
r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
|
|
|
|
// ixor r10, r10, r11
|
|
r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
|
|
|
|
// iadd r0, r0, r10
|
|
r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
|
|
|
|
// ixor DST, r0, r10
|
|
SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
|
|
return DST;
|
|
}
|
|
|
|
SDValue
|
|
AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
|
|
return SDValue(Op.getNode(), 0);
|
|
}
|
|
|
|
SDValue
|
|
AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
|
|
DebugLoc DL = Op.getDebugLoc();
|
|
EVT OVT = Op.getValueType();
|
|
MVT INTTY = MVT::i32;
|
|
if (OVT == MVT::v2i8) {
|
|
INTTY = MVT::v2i32;
|
|
} else if (OVT == MVT::v4i8) {
|
|
INTTY = MVT::v4i32;
|
|
}
|
|
SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
|
|
SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
|
|
LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
|
|
LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
|
|
return LHS;
|
|
}
|
|
|
|
SDValue
|
|
AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
|
|
DebugLoc DL = Op.getDebugLoc();
|
|
EVT OVT = Op.getValueType();
|
|
MVT INTTY = MVT::i32;
|
|
if (OVT == MVT::v2i16) {
|
|
INTTY = MVT::v2i32;
|
|
} else if (OVT == MVT::v4i16) {
|
|
INTTY = MVT::v4i32;
|
|
}
|
|
SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
|
|
SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
|
|
LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
|
|
LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
|
|
return LHS;
|
|
}
|
|
|
|
SDValue
|
|
AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
|
|
DebugLoc DL = Op.getDebugLoc();
|
|
EVT OVT = Op.getValueType();
|
|
SDValue LHS = Op.getOperand(0);
|
|
SDValue RHS = Op.getOperand(1);
|
|
// The LowerSREM32 function generates equivalent to the following IL.
|
|
// mov r0, LHS
|
|
// mov r1, RHS
|
|
// ilt r10, r0, 0
|
|
// ilt r11, r1, 0
|
|
// iadd r0, r0, r10
|
|
// iadd r1, r1, r11
|
|
// ixor r0, r0, r10
|
|
// ixor r1, r1, r11
|
|
// udiv r20, r0, r1
|
|
// umul r20, r20, r1
|
|
// sub r0, r0, r20
|
|
// iadd r0, r0, r10
|
|
// ixor DST, r0, r10
|
|
|
|
// mov r0, LHS
|
|
SDValue r0 = LHS;
|
|
|
|
// mov r1, RHS
|
|
SDValue r1 = RHS;
|
|
|
|
// ilt r10, r0, 0
|
|
SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
|
|
|
|
// ilt r11, r1, 0
|
|
SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
|
|
|
|
// iadd r0, r0, r10
|
|
r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
|
|
|
|
// iadd r1, r1, r11
|
|
r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
|
|
|
|
// ixor r0, r0, r10
|
|
r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
|
|
|
|
// ixor r1, r1, r11
|
|
r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
|
|
|
|
// udiv r20, r0, r1
|
|
SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
|
|
|
|
// umul r20, r20, r1
|
|
r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
|
|
|
|
// sub r0, r0, r20
|
|
r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
|
|
|
|
// iadd r0, r0, r10
|
|
r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
|
|
|
|
// ixor DST, r0, r10
|
|
SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
|
|
return DST;
|
|
}
|
|
|
|
SDValue
|
|
AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
|
|
return SDValue(Op.getNode(), 0);
|
|
}
|