mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-27 13:40:30 +00:00
[NVPTX] Fix sign/zero-extending ldg/ldu instruction selection
Summary: We don't have sign-/zero-extending ldg/ldu instructions defined, so we need to emulate them with explicit CVTs. We were originally handling the i8 case, but not any other cases. Fixes PR26185 Reviewers: jingyue, jlebar Subscribers: jholewinski Differential Revision: http://reviews.llvm.org/D19615 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@268272 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1e2f626feb
commit
2ab91e26c5
@ -2062,61 +2062,33 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
|
||||
//
|
||||
// i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
|
||||
//
|
||||
// Since we load an i8 value, the matching logic above will have selected an
|
||||
// LDG instruction that reads i8 and stores it in an i16 register (NVPTX does
|
||||
// not expose 8-bit registers):
|
||||
//
|
||||
// i16,ch = INT_PTX_LDG_GLOBAL_i8areg64 t7, t0
|
||||
//
|
||||
// To get the correct type in this case, truncate back to i8 and then extend
|
||||
// to the original load type.
|
||||
// In this case, the matching logic above will select a load for the original
|
||||
// memory type (in this case, i8) and our types will not match (the node needs
|
||||
// to return an i32 in this case). Our LDG/LDU nodes do not support the
|
||||
// concept of sign-/zero-extension, so emulate it here by adding an explicit
|
||||
// CVT instruction. Ptxas should clean up any redundancies here.
|
||||
|
||||
EVT OrigType = N->getValueType(0);
|
||||
LoadSDNode *LDSD = dyn_cast<LoadSDNode>(N);
|
||||
if (LDSD && EltVT == MVT::i8 && OrigType.getScalarSizeInBits() >= 32) {
|
||||
unsigned CvtOpc = 0;
|
||||
LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
|
||||
|
||||
switch (LDSD->getExtensionType()) {
|
||||
default:
|
||||
llvm_unreachable("An extension is required for i8 loads");
|
||||
break;
|
||||
case ISD::SEXTLOAD:
|
||||
switch (OrigType.getSimpleVT().SimpleTy) {
|
||||
default:
|
||||
llvm_unreachable("Unhandled integer load type");
|
||||
break;
|
||||
case MVT::i32:
|
||||
CvtOpc = NVPTX::CVT_s32_s8;
|
||||
break;
|
||||
case MVT::i64:
|
||||
CvtOpc = NVPTX::CVT_s64_s8;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case ISD::EXTLOAD:
|
||||
case ISD::ZEXTLOAD:
|
||||
switch (OrigType.getSimpleVT().SimpleTy) {
|
||||
default:
|
||||
llvm_unreachable("Unhandled integer load type");
|
||||
break;
|
||||
case MVT::i32:
|
||||
CvtOpc = NVPTX::CVT_u32_u8;
|
||||
break;
|
||||
case MVT::i64:
|
||||
CvtOpc = NVPTX::CVT_u64_u8;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (OrigType != EltVT && LdNode) {
|
||||
// We have an extending-load. The instruction we selected operates on the
|
||||
// smaller type, but the SDNode we are replacing has the larger type. We
|
||||
// need to emit a CVT to make the types match.
|
||||
bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD;
|
||||
unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(),
|
||||
EltVT.getSimpleVT(), IsSigned);
|
||||
|
||||
// For each output value, truncate to i8 (since the upper 8 bits are
|
||||
// undefined) and then extend to the desired type.
|
||||
// For each output value, apply the manual sign/zero-extension and make sure
|
||||
// all users of the load go through that CVT.
|
||||
for (unsigned i = 0; i != NumElts; ++i) {
|
||||
SDValue Res(LD, i);
|
||||
SDValue OrigVal(N, i);
|
||||
|
||||
SDNode *CvtNode =
|
||||
CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
|
||||
CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, MVT::i32));
|
||||
CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
|
||||
DL, MVT::i32));
|
||||
ReplaceUses(OrigVal, SDValue(CvtNode, 0));
|
||||
}
|
||||
}
|
||||
@ -5199,3 +5171,57 @@ bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
|
||||
/// conversion from \p SrcTy to \p DestTy.
|
||||
unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
|
||||
bool IsSigned) {
|
||||
switch (SrcTy.SimpleTy) {
|
||||
default:
|
||||
llvm_unreachable("Unhandled source type");
|
||||
case MVT::i8:
|
||||
switch (DestTy.SimpleTy) {
|
||||
default:
|
||||
llvm_unreachable("Unhandled dest type");
|
||||
case MVT::i16:
|
||||
return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
|
||||
case MVT::i32:
|
||||
return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
|
||||
case MVT::i64:
|
||||
return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
|
||||
}
|
||||
case MVT::i16:
|
||||
switch (DestTy.SimpleTy) {
|
||||
default:
|
||||
llvm_unreachable("Unhandled dest type");
|
||||
case MVT::i8:
|
||||
return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
|
||||
case MVT::i32:
|
||||
return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
|
||||
case MVT::i64:
|
||||
return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
|
||||
}
|
||||
case MVT::i32:
|
||||
switch (DestTy.SimpleTy) {
|
||||
default:
|
||||
llvm_unreachable("Unhandled dest type");
|
||||
case MVT::i8:
|
||||
return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
|
||||
case MVT::i16:
|
||||
return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
|
||||
case MVT::i64:
|
||||
return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
|
||||
}
|
||||
case MVT::i64:
|
||||
switch (DestTy.SimpleTy) {
|
||||
default:
|
||||
llvm_unreachable("Unhandled dest type");
|
||||
case MVT::i8:
|
||||
return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
|
||||
case MVT::i16:
|
||||
return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
|
||||
case MVT::i32:
|
||||
return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -93,6 +93,7 @@ private:
|
||||
|
||||
bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
|
||||
|
||||
static unsigned GetConvertOpcode(MVT DestTy, MVT SrcTy, bool IsSigned);
|
||||
};
|
||||
} // end namespace llvm
|
||||
|
||||
|
@ -377,6 +377,8 @@ let hasSideEffects = 0 in {
|
||||
}
|
||||
|
||||
// Generate cvts from all types to all types.
|
||||
defm CVT_s8 : CVT_FROM_ALL<"s8", Int16Regs>;
|
||||
defm CVT_u8 : CVT_FROM_ALL<"u8", Int16Regs>;
|
||||
defm CVT_s16 : CVT_FROM_ALL<"s16", Int16Regs>;
|
||||
defm CVT_u16 : CVT_FROM_ALL<"u16", Int16Regs>;
|
||||
defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>;
|
||||
|
34
test/CodeGen/NVPTX/bug26185-2.ll
Normal file
34
test/CodeGen/NVPTX/bug26185-2.ll
Normal file
@ -0,0 +1,34 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck %s
|
||||
|
||||
; Verify that we correctly emit code for extending ldg/ldu. We do not expose
|
||||
; extending variants in the backend, but the ldg/ldu selection code may pick
|
||||
; extending loads as candidates. We do want to support this, so make sure we
|
||||
; emit the necessary cvt.* instructions to implement the extension and let ptxas
|
||||
; emit the real extending loads.
|
||||
|
||||
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
|
||||
target triple = "nvptx64-nvidia-cuda"
|
||||
|
||||
; CHECK-LABEL: spam
|
||||
define ptx_kernel void @spam(i8 addrspace(1)* noalias nocapture readonly %arg, i8 addrspace(1)* noalias nocapture %arg1, i64 %arg2, i64 %arg3) #0 {
|
||||
bb:
|
||||
%tmp = bitcast i8 addrspace(1)* %arg to i16 addrspace(1)*
|
||||
%tmp4 = bitcast i8 addrspace(1)* %arg1 to i64 addrspace(1)*
|
||||
%tmp5 = add nsw i64 %arg3, 8
|
||||
%tmp6 = getelementptr i16, i16 addrspace(1)* %tmp, i64 %tmp5
|
||||
; CHECK: ld.global.nc.u16
|
||||
%tmp7 = load i16, i16 addrspace(1)* %tmp6, align 2
|
||||
; CHECK: cvt.s32.s16
|
||||
%tmp8 = sext i16 %tmp7 to i64
|
||||
%tmp9 = mul nsw i64 %tmp8, %tmp8
|
||||
%tmp10 = load i64, i64 addrspace(1)* %tmp4, align 8
|
||||
%tmp11 = add nsw i64 %tmp9, %tmp10
|
||||
store i64 %tmp11, i64 addrspace(1)* %tmp4, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { norecurse nounwind "polly.skip.fn" }
|
||||
|
||||
!nvvm.annotations = !{!0}
|
||||
|
||||
!0 = !{void (i8 addrspace(1)*, i8 addrspace(1)*, i64, i64)* @spam, !"maxntidx", i64 1, !"maxntidy", i64 1, !"maxntidz", i64 1}
|
Loading…
Reference in New Issue
Block a user