mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-10 14:10:58 +00:00
ee36276e53
Summary: This is done by first adding two additional instructions to convert the alloca returned address to local and convert it back to generic. Then replace all uses of alloca instruction with the converted generic address. Then we can rely NVPTXFavorNonGenericAddrSpace pass to combine the generic addresscast and the corresponding Load, Store, Bitcast, GEP Instruction together. Patched by Xuetian Weng (xweng@google.com). Test Plan: test/CodeGen/NVPTX/lower-alloca.ll Reviewers: jholewinski, jingyue Reviewed By: jingyue Subscribers: meheff, broune, eliben, jholewinski, llvm-commits Differential Revision: http://reviews.llvm.org/D10483 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@239964 91177308-0d34-0410-b5e6-96231b3b80d8
23 lines
772 B
LLVM
23 lines
772 B
LLVM
; RUN: opt < %s -S -nvptx-lower-alloca -nvptx-favor-non-generic -dce | FileCheck %s
|
|
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s --check-prefix PTX
|
|
|
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
|
target triple = "nvptx64-unknown-unknown"
|
|
|
|
define void @kernel() {
|
|
; LABEL: @lower_alloca
|
|
; PTX-LABEL: .visible .entry kernel(
|
|
%A = alloca i32
|
|
; CHECK: addrspacecast i32* %A to i32 addrspace(5)*
|
|
; CHECK: store i32 0, i32 addrspace(5)* {{%.+}}
|
|
; PTX: st.local.u32 [{{%rd[0-9]+}}], {{%r[0-9]+}}
|
|
store i32 0, i32* %A
|
|
call void @callee(i32* %A)
|
|
ret void
|
|
}
|
|
|
|
declare void @callee(i32*)
|
|
|
|
!nvvm.annotations = !{!0}
|
|
!0 = !{void ()* @kernel, !"kernel", i32 1}
|