mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-27 23:33:55 +00:00
[NVPTX] aligned byte-buffers for vector return types
Summary: Fixes PR21100 which is caused by inconsistency between the declared return type and the expected return type at the call site. The new behavior is consistent with nvcc and the NVPTXTargetLowering::getPrototype function. Test Plan: test/Codegen/NVPTX/vector-return.ll Reviewers: jholewinski Reviewed By: jholewinski Subscribers: llvm-commits, meheff, eliben, jholewinski Differential Revision: http://reviews.llvm.org/D5612 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@220607 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
b31d53a60f
commit
1d1d705a95
@ -1355,7 +1355,12 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
// .param .align 16 .b8 retval0[<size-in-bytes>], or
|
||||
// .param .b<size-in-bits> retval0
|
||||
unsigned resultsz = TD->getTypeAllocSizeInBits(retTy);
|
||||
if (retTy->isSingleValueType()) {
|
||||
// Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for
|
||||
// these three types to match the logic in
|
||||
// NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype.
|
||||
// Plus, this behavior is consistent with nvcc's.
|
||||
if (retTy->isFloatingPointTy() || retTy->isIntegerTy() ||
|
||||
retTy->isPointerTy()) {
|
||||
// Scalar needs to be at least 32bit wide
|
||||
if (resultsz < 32)
|
||||
resultsz = 32;
|
||||
|
14
test/CodeGen/NVPTX/vector-return.ll
Normal file
14
test/CodeGen/NVPTX/vector-return.ll
Normal file
@ -0,0 +1,14 @@
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s
|
||||
|
||||
declare <2 x float> @bar(<2 x float> %input)
|
||||
|
||||
define void @foo(<2 x float> %input, <2 x float>* %output) {
|
||||
; CHECK-LABEL: @foo
|
||||
entry:
|
||||
%call = tail call <2 x float> @bar(<2 x float> %input)
|
||||
; CHECK: .param .align 8 .b8 retval0[8];
|
||||
; CHECK: ld.param.v2.f32 {[[ELEM1:%f[0-9]+]], [[ELEM2:%f[0-9]+]]}, [retval0+0];
|
||||
store <2 x float> %call, <2 x float>* %output, align 8
|
||||
; CHECK: st.v2.f32 [{{%rd[0-9]+}}], {[[ELEM1]], [[ELEM2]]}
|
||||
ret void
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user