[SelectionDAG] Add basic demanded elements support to ComputeNumSignBits for BITCAST nodes

Only adds support to the existing 'large element' scalar/vector to 'small element' vector bitcasts.

The next step would be to support cases where the large elements aren't all sign bits, and determine the small element equivalent based on the demanded elements.

llvm-svn: 340143
This commit is contained in:
Simon Pilgrim 2018-08-19 17:47:50 +00:00
parent 42be58650a
commit 255b336590
2 changed files with 38 additions and 45 deletions

View File

@ -3245,7 +3245,14 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// Requires handling of DemandedElts and Endianness.
if ((SrcBits % VTBits) == 0) {
assert(Op.getValueType().isVector() && "Expected bitcast to vector");
Tmp = ComputeNumSignBits(N0, Depth + 1);
unsigned Scale = SrcBits / VTBits;
APInt SrcDemandedElts(NumElts / Scale, 0);
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i])
SrcDemandedElts.setBit(i / Scale);
Tmp = ComputeNumSignBits(N0, SrcDemandedElts, Depth + 1);
if (Tmp == SrcBits)
return VTBits;
}

View File

@ -107,29 +107,25 @@ define <8 x i16> @trunc_ashr_v4i32_icmp_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
define <8 x i16> @trunc_ashr_v4i64_demandedelts(<4 x i64> %a0) {
; X86-SSE-LABEL: trunc_ashr_v4i64_demandedelts:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movdqa %xmm0, %xmm2
; X86-SSE-NEXT: psllq $63, %xmm2
; X86-SSE-NEXT: movdqa %xmm0, %xmm3
; X86-SSE-NEXT: movsd {{.*#+}} xmm3 = xmm2[0],xmm3[1]
; X86-SSE-NEXT: movdqa %xmm1, %xmm2
; X86-SSE-NEXT: psllq $63, %xmm2
; X86-SSE-NEXT: movdqa %xmm1, %xmm4
; X86-SSE-NEXT: movdqa %xmm1, %xmm3
; X86-SSE-NEXT: movsd {{.*#+}} xmm3 = xmm2[0],xmm3[1]
; X86-SSE-NEXT: movdqa %xmm0, %xmm2
; X86-SSE-NEXT: psllq $63, %xmm2
; X86-SSE-NEXT: movdqa %xmm0, %xmm4
; X86-SSE-NEXT: movsd {{.*#+}} xmm4 = xmm2[0],xmm4[1]
; X86-SSE-NEXT: psrlq $63, %xmm4
; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm4[0],xmm1[1]
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
; X86-SSE-NEXT: movapd {{.*#+}} xmm2 = [4.940656e-324,-0.000000e+00]
; X86-SSE-NEXT: xorpd %xmm2, %xmm1
; X86-SSE-NEXT: psubq %xmm2, %xmm1
; X86-SSE-NEXT: psrlq $63, %xmm3
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
; X86-SSE-NEXT: xorpd %xmm2, %xmm0
; X86-SSE-NEXT: psubq %xmm2, %xmm0
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X86-SSE-NEXT: psrlq $63, %xmm3
; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm3[0],xmm1[1]
; X86-SSE-NEXT: xorpd %xmm2, %xmm1
; X86-SSE-NEXT: psubq %xmm2, %xmm1
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; X86-SSE-NEXT: pslld $16, %xmm1
; X86-SSE-NEXT: psrad $16, %xmm1
; X86-SSE-NEXT: pslld $16, %xmm0
; X86-SSE-NEXT: psrad $16, %xmm0
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X86-SSE-NEXT: packssdw %xmm1, %xmm0
; X86-SSE-NEXT: retl
;
@ -151,10 +147,7 @@ define <8 x i16> @trunc_ashr_v4i64_demandedelts(<4 x i64> %a0) {
; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; X86-AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; X86-AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; X86-AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X86-AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
;
@ -167,37 +160,33 @@ define <8 x i16> @trunc_ashr_v4i64_demandedelts(<4 x i64> %a0) {
; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vpsubq %ymm3, %ymm0, %ymm0
; X86-AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3,16,17,16,17,16,17,16,17,16,17,16,17,16,17,18,19]
; X86-AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; X86-AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; X86-AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X86-AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
;
; X64-SSE-LABEL: trunc_ashr_v4i64_demandedelts:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: movdqa %xmm0, %xmm2
; X64-SSE-NEXT: psllq $63, %xmm2
; X64-SSE-NEXT: movdqa %xmm0, %xmm3
; X64-SSE-NEXT: movsd {{.*#+}} xmm3 = xmm2[0],xmm3[1]
; X64-SSE-NEXT: movdqa %xmm1, %xmm2
; X64-SSE-NEXT: psllq $63, %xmm2
; X64-SSE-NEXT: movdqa %xmm1, %xmm4
; X64-SSE-NEXT: movdqa %xmm1, %xmm3
; X64-SSE-NEXT: movsd {{.*#+}} xmm3 = xmm2[0],xmm3[1]
; X64-SSE-NEXT: movdqa %xmm0, %xmm2
; X64-SSE-NEXT: psllq $63, %xmm2
; X64-SSE-NEXT: movdqa %xmm0, %xmm4
; X64-SSE-NEXT: movsd {{.*#+}} xmm4 = xmm2[0],xmm4[1]
; X64-SSE-NEXT: psrlq $63, %xmm4
; X64-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm4[0],xmm1[1]
; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
; X64-SSE-NEXT: movapd {{.*#+}} xmm2 = [1,9223372036854775808]
; X64-SSE-NEXT: xorpd %xmm2, %xmm1
; X64-SSE-NEXT: psubq %xmm2, %xmm1
; X64-SSE-NEXT: psrlq $63, %xmm3
; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
; X64-SSE-NEXT: xorpd %xmm2, %xmm0
; X64-SSE-NEXT: psubq %xmm2, %xmm0
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X64-SSE-NEXT: psrlq $63, %xmm3
; X64-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm3[0],xmm1[1]
; X64-SSE-NEXT: xorpd %xmm2, %xmm1
; X64-SSE-NEXT: psubq %xmm2, %xmm1
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; X64-SSE-NEXT: pslld $16, %xmm1
; X64-SSE-NEXT: psrad $16, %xmm1
; X64-SSE-NEXT: pslld $16, %xmm0
; X64-SSE-NEXT: psrad $16, %xmm0
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X64-SSE-NEXT: packssdw %xmm1, %xmm0
; X64-SSE-NEXT: retq
;
@ -220,10 +209,7 @@ define <8 x i16> @trunc_ashr_v4i64_demandedelts(<4 x i64> %a0) {
; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; X64-AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; X64-AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; X64-AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; X64-AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
;
@ -234,9 +220,9 @@ define <8 x i16> @trunc_ashr_v4i64_demandedelts(<4 x i64> %a0) {
; X64-AVX2-NEXT: # ymm1 = mem[0,1,0,1]
; X64-AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3,16,17,16,17,16,17,16,17,16,17,16,17,16,17,18,19]
; X64-AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; X64-AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; X64-AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
%1 = shl <4 x i64> %a0, <i64 63, i64 0, i64 63, i64 0>