mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-27 13:40:30 +00:00
[NVPTX] Make the alignment an explicit argument to ldu/ldg
Summary: Instead of specifying the alignment as metadata which may be destroyed by transformation passes, make the alignment the second argument to ldu/ldg intrinsic calls. Test Plan: ldu-ldg.ll ldu-i8.ll ldu-reg-plus-offset.ll Reviewers: eliben, meheff, jholewinski Reviewed By: meheff, jholewinski Subscribers: jholewinski, llvm-commits Differential Revision: http://reviews.llvm.org/D5093 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216731 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
c5484a2704
commit
87a2b36cf6
@ -797,24 +797,30 @@ def llvm_anyi64ptr_ty : LLVMAnyPointerType<llvm_i64_ty>; // (space)i64*
|
||||
|
||||
// Generated within nvvm. Use for ldu on sm_20 or later
|
||||
def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty],
|
||||
[IntrReadMem, NoCapture<0>],
|
||||
"llvm.nvvm.ldu.global.i">;
|
||||
def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty],
|
||||
[IntrReadMem, NoCapture<0>],
|
||||
"llvm.nvvm.ldu.global.f">;
|
||||
def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty],
|
||||
[IntrReadMem, NoCapture<0>],
|
||||
"llvm.nvvm.ldu.global.p">;
|
||||
|
||||
// Generated within nvvm. Use for ldg on sm_35 or later
|
||||
def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty],
|
||||
[IntrReadMem, NoCapture<0>],
|
||||
"llvm.nvvm.ldg.global.i">;
|
||||
def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty],
|
||||
[IntrReadMem, NoCapture<0>],
|
||||
"llvm.nvvm.ldg.global.f">;
|
||||
def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty],
|
||||
[IntrReadMem, NoCapture<0>],
|
||||
"llvm.nvvm.ldg.global.p">;
|
||||
|
||||
// Use for generic pointers
|
||||
|
@ -3270,16 +3270,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
|
||||
Info.vol = 0;
|
||||
Info.readMem = true;
|
||||
Info.writeMem = false;
|
||||
|
||||
// alignment is available as metadata.
|
||||
// Grab it and set the alignment.
|
||||
assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata");
|
||||
MDNode *AlignMD = I.getMetadata("align");
|
||||
assert(AlignMD && "Must have a non-null MDNode");
|
||||
assert(AlignMD->getNumOperands() == 1 && "Must have a single operand");
|
||||
Value *Align = AlignMD->getOperand(0);
|
||||
int64_t Alignment = cast<ConstantInt>(Align)->getZExtValue();
|
||||
Info.align = Alignment;
|
||||
Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -3299,16 +3290,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
|
||||
Info.vol = 0;
|
||||
Info.readMem = true;
|
||||
Info.writeMem = false;
|
||||
|
||||
// alignment is available as metadata.
|
||||
// Grab it and set the alignment.
|
||||
assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata");
|
||||
MDNode *AlignMD = I.getMetadata("align");
|
||||
assert(AlignMD && "Must have a non-null MDNode");
|
||||
assert(AlignMD->getNumOperands() == 1 && "Must have a single operand");
|
||||
Value *Align = AlignMD->getOperand(0);
|
||||
int64_t Alignment = cast<ConstantInt>(Align)->getZExtValue();
|
||||
Info.align = Alignment;
|
||||
Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -2,15 +2,13 @@
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
|
||||
declare i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8*)
|
||||
declare i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8*, i32)
|
||||
|
||||
define i8 @foo(i8* %a) {
|
||||
; Ensure we properly truncate off the high-order 24 bits
|
||||
; CHECK: ldu.global.u8
|
||||
; CHECK: cvt.u32.u16
|
||||
; CHECK: and.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, 255
|
||||
%val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8* %a), !align !0
|
||||
%val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8* %a, i32 4)
|
||||
ret i8 %val
|
||||
}
|
||||
|
||||
!0 = metadata !{i32 4}
|
||||
|
@ -1,40 +1,36 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
|
||||
|
||||
declare i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr)
|
||||
declare i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr)
|
||||
declare i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr)
|
||||
declare i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr)
|
||||
declare i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 %align)
|
||||
declare i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 %align)
|
||||
declare i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 %align)
|
||||
declare i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 %align)
|
||||
|
||||
|
||||
; CHECK: func0
|
||||
define i8 @func0(i8 addrspace(1)* %ptr) {
|
||||
; ldu.global.u8
|
||||
%val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr), !align !0
|
||||
%val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 4)
|
||||
ret i8 %val
|
||||
}
|
||||
|
||||
; CHECK: func1
|
||||
define i32 @func1(i32 addrspace(1)* %ptr) {
|
||||
; ldu.global.u32
|
||||
%val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr), !align !0
|
||||
%val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 4)
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
; CHECK: func2
|
||||
define i8 @func2(i8 addrspace(1)* %ptr) {
|
||||
; ld.global.nc.u8
|
||||
%val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr), !align !0
|
||||
%val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 4)
|
||||
ret i8 %val
|
||||
}
|
||||
|
||||
; CHECK: func3
|
||||
define i32 @func3(i32 addrspace(1)* %ptr) {
|
||||
; ld.global.nc.u32
|
||||
%val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr), !align !0
|
||||
%val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 4)
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
|
||||
|
||||
!0 = metadata !{i32 4}
|
||||
|
@ -7,15 +7,13 @@ define void @reg_plus_offset(i32* %a) {
|
||||
; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+32];
|
||||
; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+36];
|
||||
%p2 = getelementptr i32* %a, i32 8
|
||||
%t1 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p2), !align !1
|
||||
%t1 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p2, i32 4)
|
||||
%p3 = getelementptr i32* %a, i32 9
|
||||
%t2 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p3), !align !1
|
||||
%t2 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p3, i32 4)
|
||||
%t3 = mul i32 %t1, %t2
|
||||
store i32 %t3, i32* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
!1 = metadata !{ i32 4 }
|
||||
|
||||
declare i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32*)
|
||||
declare i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32*, i32)
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
||||
|
Loading…
Reference in New Issue
Block a user