[InstCombine] Allow values with multiple users in SimplifyDemandedVectorElts

Summary:
Allow for ignoring the check for a single use in SimplifyDemandedVectorElts
to be able to simplify operands if DemandedElts is known to contain
the union of elements used by all users.
It is a responsibility of a caller of SimplifyDemandedVectorElts to
supply correct DemandedElts.

Simplify a series of extractelement instructions if only a subset of
elements is used.

Reviewers: reames, arsenm, majnemer, nhaehnle

Reviewed By: nhaehnle

Subscribers: wdng, jvesely, nhaehnle, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D67345

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@375395 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Piotr Sobczak
2019-10-21 08:12:47 +00:00
parent 919bed3ee7
commit ec10cb25d9
4 changed files with 186 additions and 31 deletions
@@ -152,11 +152,10 @@ define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_buffer_load_v4f32(<4 x i32>
ret <3 x float> %shuf
}
; FIXME: Not handled even though only 2 elts used
; CHECK-LABEL: @extract_elt0_elt1_buffer_load_v4f32_2(
; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
; CHECK-NEXT: %elt0 = extractelement <4 x float> %data, i32 0
; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 1
; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
; CHECK-NEXT: %elt0 = extractelement <2 x float> %data, i32 0
; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
; CHECK-NEXT: %ins0 = insertvalue { float, float } undef, float %elt0, 0
; CHECK-NEXT: %ins1 = insertvalue { float, float } %ins0, float %elt1, 1
; CHECK-NEXT: ret { float, float } %ins1
@@ -169,6 +168,74 @@ define amdgpu_ps { float, float } @extract_elt0_elt1_buffer_load_v4f32_2(<4 x i3
ret { float, float } %ins1
}
; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_2(
; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
; CHECK-NEXT: %elt0 = extractelement <3 x float> %data, i32 0
; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 1
; CHECK-NEXT: %elt2 = extractelement <3 x float> %data, i32 2
; CHECK-NEXT: %ins0 = insertvalue { float, float, float } undef, float %elt0, 0
; CHECK-NEXT: %ins1 = insertvalue { float, float, float } %ins0, float %elt1, 1
; CHECK-NEXT: %ins2 = insertvalue { float, float, float } %ins1, float %elt2, 2
; CHECK-NEXT: ret { float, float, float } %ins2
define amdgpu_ps { float, float, float } @extract_elt0_elt1_elt2_buffer_load_v4f32_2(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
%data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
%elt0 = extractelement <4 x float> %data, i32 0
%elt1 = extractelement <4 x float> %data, i32 1
%elt2 = extractelement <4 x float> %data, i32 2
%ins0 = insertvalue { float, float, float } undef, float %elt0, 0
%ins1 = insertvalue { float, float, float } %ins0, float %elt1, 1
%ins2 = insertvalue { float, float, float } %ins1, float %elt2, 2
ret { float, float, float } %ins2
}
; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_3(
; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
; CHECK-NEXT: %ins1 = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 undef, i32 1>
; CHECK-NEXT: %ret = fadd <2 x float> %ins1, %shuf
define amdgpu_ps <2 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32_3(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
%data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
%elt0 = extractelement <4 x float> %data, i32 0
%elt2 = extractelement <4 x float> %data, i32 2
%ins0 = insertelement <2 x float> undef, float %elt0, i32 0
%ins1 = insertelement <2 x float> %ins0, float %elt2, i32 1
%shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 4, i32 1>
%ret = fadd <2 x float> %ins1, %shuf
ret <2 x float> %ret
}
; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_4(
; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
; CHECK-NEXT: %ins1 = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 undef>
; CHECK-NEXT: %ret = fadd <2 x float> %ins1, %shuf
; CHECK-NEXT: ret <2 x float> %ret
define amdgpu_ps <2 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32_4(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
%data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
%elt0 = extractelement <4 x float> %data, i32 0
%elt2 = extractelement <4 x float> %data, i32 2
%ins0 = insertelement <2 x float> undef, float %elt0, i32 0
%ins1 = insertelement <2 x float> %ins0, float %elt2, i32 1
%shuf = shufflevector <4 x float> undef, <4 x float> %data, <2 x i32> <i32 5, i32 1>
%ret = fadd <2 x float> %ins1, %shuf
ret <2 x float> %ret
}
; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_5(
; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
; CHECK-NEXT: %ins1 = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 2, i32 2>
; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: %ret = fadd <2 x float> %ins1, %shuf
define amdgpu_ps <2 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32_5(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
%data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
%elt2 = extractelement <4 x float> %data, i32 2
%ins0 = insertelement <2 x float> undef, float %elt2, i32 0
%ins1 = insertelement <2 x float> %ins0, float %elt2, i32 1
%shuf = shufflevector <4 x float> %data, <4 x float> %data, <2 x i32> <i32 0, i32 5>
%ret = fadd <2 x float> %ins1, %shuf
ret <2 x float> %ret
}
; CHECK-LABEL: @extract_elt0_buffer_load_v3f32(
; CHECK-NEXT: %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
; CHECK-NEXT: ret float %data