mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-12-28 10:46:11 +00:00
[SeparateConstOffsetFromGEP] Preserve metadata when splitting GEPs
Summary: !amdgpu.uniform needs to be preserved for AMDGPU, otherwise bad things happen. Reviewers: arsenm, nhaehnle, jingyue, broune, majnemer, bjarke.roune, dblaikie Subscribers: wdng, tpr, llvm-commits Differential Revision: https://reviews.llvm.org/D42744 llvm-svn: 323907
This commit is contained in:
parent
862987abe7
commit
8e7d149a31
@ -1071,6 +1071,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
|
||||
NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP,
|
||||
ConstantInt::get(IntPtrTy, Index, true),
|
||||
GEP->getName(), GEP);
|
||||
NewGEP->copyMetadata(*GEP);
|
||||
// Inherit the inbounds attribute of the original GEP.
|
||||
cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
|
||||
} else {
|
||||
@ -1095,6 +1096,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
|
||||
Type::getInt8Ty(GEP->getContext()), NewGEP,
|
||||
ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), "uglygep",
|
||||
GEP);
|
||||
NewGEP->copyMetadata(*GEP);
|
||||
// Inherit the inbounds attribute of the original GEP.
|
||||
cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
|
||||
if (GEP->getType() != I8PtrTy)
|
||||
|
@ -92,3 +92,48 @@ define amdgpu_kernel void @sum_of_lds_array_over_max_mubuf_offset(i32 %x, i32 %y
|
||||
store float %tmp21, float addrspace(1)* %output, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; IR-LABEL: @keep_metadata(
|
||||
; IR: getelementptr {{.*}} !amdgpu.uniform
|
||||
; IR: getelementptr {{.*}} !amdgpu.uniform
|
||||
; IR: getelementptr {{.*}} !amdgpu.uniform
|
||||
define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @keep_metadata([0 x <4 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 {
|
||||
main_body:
|
||||
%22 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #8
|
||||
%23 = bitcast float %22 to i32
|
||||
%24 = shl i32 %23, 1
|
||||
%25 = getelementptr [0 x <8 x i32>], [0 x <8 x i32>] addrspace(6)* %1, i32 0, i32 %24, !amdgpu.uniform !0
|
||||
%26 = load <8 x i32>, <8 x i32> addrspace(6)* %25, align 32, !invariant.load !0
|
||||
%27 = shl i32 %23, 2
|
||||
%28 = or i32 %27, 3
|
||||
%29 = bitcast [0 x <8 x i32>] addrspace(6)* %1 to [0 x <4 x i32>] addrspace(6)*
|
||||
%30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(6)* %29, i32 0, i32 %28, !amdgpu.uniform !0
|
||||
%31 = load <4 x i32>, <4 x i32> addrspace(6)* %30, align 16, !invariant.load !0
|
||||
%32 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> zeroinitializer, <8 x i32> %26, <4 x i32> %31, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #8
|
||||
%33 = extractelement <4 x float> %32, i32 0
|
||||
%34 = extractelement <4 x float> %32, i32 1
|
||||
%35 = extractelement <4 x float> %32, i32 2
|
||||
%36 = extractelement <4 x float> %32, i32 3
|
||||
%37 = bitcast float %4 to i32
|
||||
%38 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %37, 4
|
||||
%39 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %38, float %33, 5
|
||||
%40 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %39, float %34, 6
|
||||
%41 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %40, float %35, 7
|
||||
%42 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %41, float %36, 8
|
||||
%43 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %42, float %20, 19
|
||||
ret <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %43
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone speculatable
|
||||
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #6
|
||||
|
||||
; Function Attrs: nounwind readonly
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #7
|
||||
|
||||
|
||||
!0 = !{}
|
||||
|
||||
attributes #5 = { "InitialPSInputAddr"="45175" }
|
||||
attributes #6 = { nounwind readnone speculatable }
|
||||
attributes #7 = { nounwind readonly }
|
||||
attributes #8 = { nounwind readnone }
|
||||
|
Loading…
Reference in New Issue
Block a user