mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-02 18:42:36 +00:00
877859e49f
Summary: With this change (plus some changes to prevent !invariant from being clobbered within llvm), clang will be able to model the __ldg CUDA builtin as an invariant load, rather than as a target-specific llvm intrinsic. This will let the optimizer play with these loads -- specifically, we should be able to vectorize them in the load-store vectorizer. Reviewers: tra Subscribers: jholewinski, hfinkel, llvm-commits, chandlerc Differential Revision: https://reviews.llvm.org/D23477 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@281152 91177308-0d34-0410-b5e6-96231b3b80d8
28 lines
696 B
LLVM
28 lines
696 B
LLVM
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s
|
|
|
|
; Check that invariant loads from the global addrspace are lowered to
|
|
; ld.global.nc.
|
|
|
|
; CHECK-LABEL: @ld_global
|
|
define i32 @ld_global(i32 addrspace(1)* %ptr) {
|
|
; CHECK: ld.global.nc.{{[a-z]}}32
|
|
%a = load i32, i32 addrspace(1)* %ptr, !invariant.load !0
|
|
ret i32 %a
|
|
}
|
|
|
|
; CHECK-LABEL: @ld_not_invariant
|
|
define i32 @ld_not_invariant(i32 addrspace(1)* %ptr) {
|
|
; CHECK: ld.global.{{[a-z]}}32
|
|
%a = load i32, i32 addrspace(1)* %ptr
|
|
ret i32 %a
|
|
}
|
|
|
|
; CHECK-LABEL: @ld_not_global_addrspace
|
|
define i32 @ld_not_global_addrspace(i32 addrspace(0)* %ptr) {
|
|
; CHECK: ld.{{[a-z]}}32
|
|
%a = load i32, i32 addrspace(0)* %ptr
|
|
ret i32 %a
|
|
}
|
|
|
|
!0 = !{}
|