mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-12 05:56:28 +00:00
[NVPTX] Add lowering of i128 params.
The patch adds support of i128 params lowering. The changes are quite trivial to support i128 as a "special case" of integer type. With this patch, we lower i128 params the same way as aggregates of size 16 bytes: .param .b8 _ [16]. Currently, NVPTX can't deal with the 128 bit integers: * in some cases because of failed assertions like ValVTs.size() == OutVals.size() && "Bad return value decomposition" * in other cases emitting PTX with .i128 or .u128 types (which are not valid [1]) [1] http://docs.nvidia.com/cuda/parallel-thread-execution/index.html#fundamental-types Differential Revision: https://reviews.llvm.org/D34555 Patch by: Denys Zariaiev (denys.zariaiev@gmail.com) llvm-svn: 307326
This commit is contained in:
parent
231abab692
commit
dc71dcb613
@ -400,7 +400,7 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
|
||||
O << " (";
|
||||
|
||||
if (isABI) {
|
||||
if (Ty->isFloatingPointTy() || Ty->isIntegerTy()) {
|
||||
if (Ty->isFloatingPointTy() || (Ty->isIntegerTy() && !Ty->isIntegerTy(128))) {
|
||||
unsigned size = 0;
|
||||
if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
|
||||
size = ITy->getBitWidth();
|
||||
@ -418,7 +418,7 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
|
||||
} else if (isa<PointerType>(Ty)) {
|
||||
O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
|
||||
<< " func_retval0";
|
||||
} else if (Ty->isAggregateType() || Ty->isVectorTy()) {
|
||||
} else if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
|
||||
unsigned totalsz = DL.getTypeAllocSize(Ty);
|
||||
unsigned retAlignment = 0;
|
||||
if (!getAlign(*F, 0, retAlignment))
|
||||
@ -1425,6 +1425,14 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
|
||||
else
|
||||
O << " .align " << GVar->getAlignment();
|
||||
|
||||
// Special case for i128
|
||||
if (ETy->isIntegerTy(128)) {
|
||||
O << " .b8 ";
|
||||
getSymbol(GVar)->print(O, MAI);
|
||||
O << "[16]";
|
||||
return;
|
||||
}
|
||||
|
||||
if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) {
|
||||
O << " .";
|
||||
O << getPTXFundamentalTypeStr(ETy);
|
||||
@ -1551,7 +1559,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
|
||||
}
|
||||
|
||||
if (!PAL.hasParamAttribute(paramIndex, Attribute::ByVal)) {
|
||||
if (Ty->isAggregateType() || Ty->isVectorTy()) {
|
||||
if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
|
||||
// Just print .param .align <a> .b8 .param[size];
|
||||
// <a> = PAL.getparamalignment
|
||||
// size = typeallocsize of element type
|
||||
|
@ -169,6 +169,19 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
|
||||
SmallVector<EVT, 16> TempVTs;
|
||||
SmallVector<uint64_t, 16> TempOffsets;
|
||||
|
||||
// Special case for i128 - decompose to (i64, i64)
|
||||
if (Ty->isIntegerTy(128)) {
|
||||
ValueVTs.push_back(EVT(MVT::i64));
|
||||
ValueVTs.push_back(EVT(MVT::i64));
|
||||
|
||||
if (Offsets) {
|
||||
Offsets->push_back(StartingOffset + 0);
|
||||
Offsets->push_back(StartingOffset + 8);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset);
|
||||
for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {
|
||||
EVT VT = TempVTs[i];
|
||||
@ -1263,7 +1276,7 @@ std::string NVPTXTargetLowering::getPrototype(
|
||||
O << "()";
|
||||
} else {
|
||||
O << "(";
|
||||
if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) {
|
||||
if (retTy->isFloatingPointTy() || (retTy->isIntegerTy() && !retTy->isIntegerTy(128))) {
|
||||
unsigned size = 0;
|
||||
if (auto *ITy = dyn_cast<IntegerType>(retTy)) {
|
||||
size = ITy->getBitWidth();
|
||||
@ -1281,7 +1294,7 @@ std::string NVPTXTargetLowering::getPrototype(
|
||||
O << ".param .b" << size << " _";
|
||||
} else if (isa<PointerType>(retTy)) {
|
||||
O << ".param .b" << PtrVT.getSizeInBits() << " _";
|
||||
} else if (retTy->isAggregateType() || retTy->isVectorTy()) {
|
||||
} else if (retTy->isAggregateType() || retTy->isVectorTy() || retTy->isIntegerTy(128)) {
|
||||
auto &DL = CS->getCalledFunction()->getParent()->getDataLayout();
|
||||
O << ".param .align " << retAlignment << " .b8 _["
|
||||
<< DL.getTypeAllocSize(retTy) << "]";
|
||||
@ -1303,7 +1316,7 @@ std::string NVPTXTargetLowering::getPrototype(
|
||||
first = false;
|
||||
|
||||
if (!Outs[OIdx].Flags.isByVal()) {
|
||||
if (Ty->isAggregateType() || Ty->isVectorTy()) {
|
||||
if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
|
||||
unsigned align = 0;
|
||||
const CallInst *CallI = cast<CallInst>(CS->getInstruction());
|
||||
// +1 because index 0 is reserved for return type alignment
|
||||
@ -1459,7 +1472,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
unsigned AllocSize = DL.getTypeAllocSize(Ty);
|
||||
SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
|
||||
bool NeedAlign; // Does argument declaration specify alignment?
|
||||
if (Ty->isAggregateType() || Ty->isVectorTy()) {
|
||||
if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
|
||||
// declare .param .align <align> .b8 .param<n>[<size>];
|
||||
SDValue DeclareParamOps[] = {
|
||||
Chain, DAG.getConstant(ArgAlign, dl, MVT::i32),
|
||||
@ -1635,8 +1648,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
// these three types to match the logic in
|
||||
// NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype.
|
||||
// Plus, this behavior is consistent with nvcc's.
|
||||
if (RetTy->isFloatingPointTy() || RetTy->isIntegerTy() ||
|
||||
RetTy->isPointerTy()) {
|
||||
if (RetTy->isFloatingPointTy() || RetTy->isPointerTy() ||
|
||||
(RetTy->isIntegerTy() && !RetTy->isIntegerTy(128))) {
|
||||
// Scalar needs to be at least 32bit wide
|
||||
if (resultsz < 32)
|
||||
resultsz = 32;
|
||||
@ -2367,7 +2380,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
|
||||
|
||||
if (theArgs[i]->use_empty()) {
|
||||
// argument is dead
|
||||
if (Ty->isAggregateType()) {
|
||||
if (Ty->isAggregateType() || Ty->isIntegerTy(128)) {
|
||||
SmallVector<EVT, 16> vtparts;
|
||||
|
||||
ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts);
|
||||
|
@ -81,7 +81,7 @@ static std::string computeDataLayout(bool is64Bit) {
|
||||
if (!is64Bit)
|
||||
Ret += "-p:32:32";
|
||||
|
||||
Ret += "-i64:64-v16:16-v32:32-n16:32:64";
|
||||
Ret += "-i64:64-i128:128-v16:16-v32:32-n16:32:64";
|
||||
|
||||
return Ret;
|
||||
}
|
||||
|
7
test/CodeGen/NVPTX/i128-global.ll
Normal file
7
test/CodeGen/NVPTX/i128-global.ll
Normal file
@ -0,0 +1,7 @@
|
||||
; RUN: llc < %s -O0 -march=nvptx64 -mcpu=sm_20 | FileCheck %s
|
||||
|
||||
; CHECK: .visible .global .align 16 .b8 G1[16] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
@G1 = global i128 1
|
||||
|
||||
; CHECK: .visible .global .align 16 .b8 G2[16];
|
||||
@G2 = global i128 0
|
58
test/CodeGen/NVPTX/i128-param.ll
Normal file
58
test/CodeGen/NVPTX/i128-param.ll
Normal file
@ -0,0 +1,58 @@
|
||||
; RUN: llc < %s -O0 -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: .visible .func callee(
|
||||
; CHECK-NEXT: .param .align 16 .b8 callee_param_0[16],
|
||||
; CHECK-NEXT: .param .align 16 .b8 callee_param_1[16],
|
||||
define void @callee(i128, i128, i128*) {
|
||||
; CHECK-DAG: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [callee_param_0];
|
||||
; CHECK-DAG: ld.param.v2.u64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [callee_param_1];
|
||||
|
||||
; CHECK: mul.lo.s64 %[[REG4:rd[0-9]+]], %[[REG0]], %[[REG3]];
|
||||
; CHECK-NEXT: mul.hi.u64 %[[REG5:rd[0-9]+]], %[[REG0]], %[[REG2]];
|
||||
; CHECK-NEXT: add.s64 %[[REG6:rd[0-9]+]], %[[REG5]], %[[REG4]];
|
||||
; CHECK-NEXT: mul.lo.s64 %[[REG7:rd[0-9]+]], %[[REG1]], %[[REG2]];
|
||||
; CHECK-NEXT: add.s64 %[[REG8:rd[0-9]+]], %[[REG6]], %[[REG7]];
|
||||
; CHECK-NEXT: mul.lo.s64 %[[REG9:rd[0-9]+]], %[[REG0]], %[[REG2]];
|
||||
%a = mul i128 %0, %1
|
||||
|
||||
store i128 %a, i128* %2
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: .visible .entry caller_kernel(
|
||||
; CHECK-NEXT: .param .align 16 .b8 caller_kernel_param_0[16],
|
||||
; CHECK-NEXT: .param .align 16 .b8 caller_kernel_param_1[16],
|
||||
define ptx_kernel void @caller_kernel(i128, i128, i128*) {
|
||||
start:
|
||||
; CHECK-DAG: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [caller_kernel_param_0];
|
||||
; CHECK-DAG: ld.param.v2.u64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [caller_kernel_param_1];
|
||||
|
||||
; CHECK: { // callseq [[CALLSEQ_ID:[0-9]]], 0
|
||||
; CHECK: .param .align 16 .b8 param0[16];
|
||||
; CHECK-NEXT: st.param.v2.b64 [param0+0], {%[[REG0]], %[[REG1]]}
|
||||
; CHECK: .param .align 16 .b8 param1[16];
|
||||
; CHECK-NEXT: st.param.v2.b64 [param1+0], {%[[REG2]], %[[REG3]]}
|
||||
; CHECK: } // callseq [[CALLSEQ_ID]]
|
||||
call void @callee(i128 %0, i128 %1, i128* %2)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: .visible .func caller_func(
|
||||
; CHECK-NEXT: .param .align 16 .b8 caller_func_param_0[16],
|
||||
; CHECK-NEXT: .param .align 16 .b8 caller_func_param_1[16],
|
||||
define void @caller_func(i128, i128, i128*) {
|
||||
start:
|
||||
; CHECK-DAG: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [caller_func_param_0]
|
||||
; CHECK-DAG: ld.param.v2.u64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [caller_func_param_1]
|
||||
|
||||
; CHECK: { // callseq [[CALLSEQ_ID:[0-9]]], 0
|
||||
; CHECK: .param .align 16 .b8 param0[16];
|
||||
; CHECK: st.param.v2.b64 [param0+0], {%[[REG0]], %[[REG1]]}
|
||||
; CHECK: .param .align 16 .b8 param1[16];
|
||||
; CHECK: st.param.v2.b64 [param1+0], {%[[REG2]], %[[REG3]]}
|
||||
; CHECK: } // callseq [[CALLSEQ_ID]]
|
||||
call void @callee(i128 %0, i128 %1, i128* %2)
|
||||
|
||||
ret void
|
||||
}
|
28
test/CodeGen/NVPTX/i128-retval.ll
Normal file
28
test/CodeGen/NVPTX/i128-retval.ll
Normal file
@ -0,0 +1,28 @@
|
||||
; RUN: llc < %s -O0 -march=nvptx64 -mcpu=sm_20 | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: .visible .func (.param .align 16 .b8 func_retval0[16]) callee(
|
||||
define i128 @callee(i128) {
|
||||
; CHECK: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [callee_param_0];
|
||||
; CHECK: st.param.v2.b64 [func_retval0+0], {%[[REG0]], %[[REG1]]}
|
||||
ret i128 %0
|
||||
}
|
||||
|
||||
; CHECK-LABEL: .visible .func caller(
|
||||
define void @caller(i128, i128*) {
|
||||
start:
|
||||
; CHECK-DAG: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [caller_param_0];
|
||||
; CHECK-DAG: ld.param.u64 %[[OUT:rd[0-9]+]], [caller_param_1];
|
||||
|
||||
; CHECK: { // callseq 0, 0
|
||||
; CHECK: .param .align 16 .b8 retval0[16];
|
||||
; CHECK: call.uni (retval0),
|
||||
; CHECK: ld.param.v2.b64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [retval0+0];
|
||||
; CHECK: } // callseq 0
|
||||
%a = call i128 @callee(i128 %0)
|
||||
|
||||
; CHECK-DAG: st.u64 [%[[OUT]]], %[[REG2]];
|
||||
; CHECK-DAG: st.u64 [%[[OUT]]+8], %[[REG3]];
|
||||
store i128 %a, i128* %1
|
||||
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue
Block a user