[InstCombine][AMDGPU] Fix crash with v3i16/v3f16 buffer intrinsics

Summary:
This is something of a workaround to avoid a crash later on in type
legalizer (WidenVectorResult()).
Also added some f16 tests, including a non-working v3f16 case with
a FIXME.

Reviewers: arsenm, tpr, nhaehnle

Reviewed By: arsenm

Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D68865

llvm-svn: 374993
This commit is contained in:
Piotr Sobczak 2019-10-16 11:14:01 +00:00
parent 74dc11383e
commit f5c1ced2d0
2 changed files with 52 additions and 0 deletions

View File

@ -971,6 +971,13 @@ InstCombiner::simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1,
Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
APInt DemandedElts,
int DMaskIdx) {
// FIXME: Allow v3i16/v3f16 in buffer intrinsics when the types are fully supported.
if (DMaskIdx < 0 &&
II->getType()->getScalarSizeInBits() != 32 &&
DemandedElts.getActiveBits() == 3)
return nullptr;
unsigned VWidth = II->getType()->getVectorNumElements();
if (VWidth == 1)
return nullptr;

View File

@ -1474,6 +1474,51 @@ declare <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32
declare <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32) #1
; CHECK-LABEL: @extract_elt3_raw_tbuffer_load_v4f16(
; CHECK-NEXT: %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
; CHECK-NEXT: %elt1 = extractelement <4 x half> %data, i32 3
; CHECK-NEXT: ret half %elt1
define amdgpu_ps half @extract_elt3_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
%data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
%elt1 = extractelement <4 x half> %data, i32 3
ret half %elt1
}
; FIXME: Enable load shortening when full support for v3f16 has been added (should expect call <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16).
; CHECK-LABEL: @extract_elt2_raw_tbuffer_load_v4f16(
; CHECK-NEXT: %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
; CHECK-NEXT: %elt1 = extractelement <4 x half> %data, i32 2
; CHECK-NEXT: ret half %elt1
define amdgpu_ps half @extract_elt2_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
%data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
%elt1 = extractelement <4 x half> %data, i32 2
ret half %elt1
}
; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v4f16(
; CHECK-NEXT: %data = call <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
; CHECK-NEXT: %elt1 = extractelement <2 x half> %data, i32 1
; CHECK-NEXT: ret half %elt1
define amdgpu_ps half @extract_elt1_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
%data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
%elt1 = extractelement <4 x half> %data, i32 1
ret half %elt1
}
; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v4f16(
; CHECK-NEXT: %data = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
; CHECK-NEXT: ret half %data
define amdgpu_ps half @extract_elt0_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
%data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
%elt1 = extractelement <4 x half> %data, i32 0
ret half %elt1
}
declare half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32>, i32, i32, i32, i32) #1
declare <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32, i32) #1
declare <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32, i32) #1
declare <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32, i32) #1
; --------------------------------------------------------------------
; llvm.amdgcn.struct.tbuffer.load
; --------------------------------------------------------------------