mirror of
https://github.com/RPCS3/llvm.git
synced 2026-07-01 21:04:04 -04:00
[InstCombine][AMDGPU] Fix crash with v3i16/v3f16 buffer intrinsics
Summary: This is something of a workaround to avoid a crash later on in type legalizer (WidenVectorResult()). Also added some f16 tests, including a non-working v3f16 case with a FIXME. Reviewers: arsenm, tpr, nhaehnle Reviewed By: arsenm Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68865 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@374993 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -971,6 +971,13 @@ InstCombiner::simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1,
|
||||
Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
|
||||
APInt DemandedElts,
|
||||
int DMaskIdx) {
|
||||
|
||||
// FIXME: Allow v3i16/v3f16 in buffer intrinsics when the types are fully supported.
|
||||
if (DMaskIdx < 0 &&
|
||||
II->getType()->getScalarSizeInBits() != 32 &&
|
||||
DemandedElts.getActiveBits() == 3)
|
||||
return nullptr;
|
||||
|
||||
unsigned VWidth = II->getType()->getVectorNumElements();
|
||||
if (VWidth == 1)
|
||||
return nullptr;
|
||||
|
||||
@@ -1474,6 +1474,51 @@ declare <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32
|
||||
|
||||
declare <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32) #1
|
||||
|
||||
; CHECK-LABEL: @extract_elt3_raw_tbuffer_load_v4f16(
|
||||
; CHECK-NEXT: %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
|
||||
; CHECK-NEXT: %elt1 = extractelement <4 x half> %data, i32 3
|
||||
; CHECK-NEXT: ret half %elt1
|
||||
define amdgpu_ps half @extract_elt3_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
|
||||
%data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
|
||||
%elt1 = extractelement <4 x half> %data, i32 3
|
||||
ret half %elt1
|
||||
}
|
||||
|
||||
; FIXME: Enable load shortening when full support for v3f16 has been added (should expect call <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16).
|
||||
; CHECK-LABEL: @extract_elt2_raw_tbuffer_load_v4f16(
|
||||
; CHECK-NEXT: %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
|
||||
; CHECK-NEXT: %elt1 = extractelement <4 x half> %data, i32 2
|
||||
; CHECK-NEXT: ret half %elt1
|
||||
define amdgpu_ps half @extract_elt2_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
|
||||
%data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
|
||||
%elt1 = extractelement <4 x half> %data, i32 2
|
||||
ret half %elt1
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v4f16(
|
||||
; CHECK-NEXT: %data = call <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
|
||||
; CHECK-NEXT: %elt1 = extractelement <2 x half> %data, i32 1
|
||||
; CHECK-NEXT: ret half %elt1
|
||||
define amdgpu_ps half @extract_elt1_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
|
||||
%data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
|
||||
%elt1 = extractelement <4 x half> %data, i32 1
|
||||
ret half %elt1
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v4f16(
|
||||
; CHECK-NEXT: %data = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
|
||||
; CHECK-NEXT: ret half %data
|
||||
define amdgpu_ps half @extract_elt0_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
|
||||
%data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
|
||||
%elt1 = extractelement <4 x half> %data, i32 0
|
||||
ret half %elt1
|
||||
}
|
||||
|
||||
declare half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32>, i32, i32, i32, i32) #1
|
||||
declare <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32, i32) #1
|
||||
declare <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32, i32) #1
|
||||
declare <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32, i32) #1
|
||||
|
||||
; --------------------------------------------------------------------
|
||||
; llvm.amdgcn.struct.tbuffer.load
|
||||
; --------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user