[X86][AVX] Decode constant bits from insert_subvector(c1, c2, c3)

This mostly happens due to SimplifyDemandedVectorElts reducing a vector to insert_subvector(undef, c1, 0)

llvm-svn: 363499
This commit is contained in:
Simon Pilgrim 2019-06-15 17:05:24 +00:00
parent 5dd61974f9
commit 990f3ceb67
5 changed files with 31 additions and 29 deletions

View File

@ -5967,6 +5967,29 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
return CastBitData(UndefSrcElts, SrcEltBits);
}
// Insert constant bits from a base and sub vector sources.
if (Op.getOpcode() == ISD::INSERT_SUBVECTOR &&
isa<ConstantSDNode>(Op.getOperand(2))) {
// TODO - support insert_subvector through bitcasts.
if (EltSizeInBits != VT.getScalarSizeInBits())
return false;
APInt UndefSubElts;
SmallVector<APInt, 32> EltSubBits;
if (getTargetConstantBitsFromNode(Op.getOperand(1), EltSizeInBits,
UndefSubElts, EltSubBits,
AllowWholeUndefs, AllowPartialUndefs) &&
getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits,
UndefElts, EltBits, AllowWholeUndefs,
AllowPartialUndefs)) {
unsigned BaseIdx = Op.getConstantOperandVal(2);
UndefElts.insertBits(UndefSubElts, BaseIdx);
for (unsigned i = 0, e = EltSubBits.size(); i != e; ++i)
EltBits[BaseIdx + i] = EltSubBits[i];
return true;
}
}
// Extract constant bits from a subvector's source.
if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
isa<ConstantSDNode>(Op.getOperand(1))) {

View File

@ -1805,10 +1805,8 @@ define <4 x i32> @test_masked_z_16xi32_to_4xi32_perm_mem_mask3(<16 x i32>* %vp,
define <4 x i32> @test_16xi32_to_4xi32_perm_mask9(<16 x i32> %vec) {
; CHECK-LABEL: test_16xi32_to_4xi32_perm_mask9:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,12,3]
; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [4,1,0,2]
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vpermd %ymm3, %ymm1, %ymm1
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [4,1,12,2]
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; CHECK-NEXT: vpermt2d %ymm0, %ymm2, %ymm1
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: vzeroupper

View File

@ -207,23 +207,10 @@ define <8 x float> @combine_vpermilvar_8f32_movsldup(<8 x float> %a0) {
ret <8 x float> %1
}
define <8 x float> @demandedelts_vpermilvar_8f32_movsldup(<8 x float> %a0, i32 %a1) {
; AVX1-LABEL: demandedelts_vpermilvar_8f32_movsldup:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = <u,0,2,2,4,4,6,6>
; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],mem[4,5,6,7]
; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,2,3,4,5,6,7]
; AVX1-NEXT: ret{{[l|q]}}
;
; AVX2-LABEL: demandedelts_vpermilvar_8f32_movsldup:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
; AVX2-NEXT: ret{{[l|q]}}
;
; AVX512-LABEL: demandedelts_vpermilvar_8f32_movsldup:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
; AVX512-NEXT: ret{{[l|q]}}
; CHECK-LABEL: demandedelts_vpermilvar_8f32_movsldup:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: ret{{[l|q]}}
%1 = insertelement <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>, i32 %a1, i32 0
%2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %1)
%3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>

View File

@ -933,10 +933,7 @@ define <8 x double> @combine_vpermi2var_8f64_as_permpd(<8 x double> %x0, <8 x do
;
; X64-LABEL: combine_vpermi2var_8f64_as_permpd:
; X64: # %bb.0:
; X64-NEXT: vmovapd {{.*#+}} zmm2 = <u,2,1,3,4,6,5,7>
; X64-NEXT: vinsertf32x4 $0, {{.*}}(%rip), %zmm2, %zmm2
; X64-NEXT: vpermi2pd %zmm1, %zmm0, %zmm2
; X64-NEXT: vpermpd {{.*#+}} zmm0 = zmm2[2,3,1,1,6,7,5,5]
; X64-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[1,3,2,2,5,7,6,6]
; X64-NEXT: retq
%res0 = insertelement <8 x i64> <i64 0, i64 2, i64 1, i64 3, i64 4, i64 6, i64 5, i64 7>, i64 %a2, i32 0
%res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %res0, <8 x double> %x1, i8 -1)

View File

@ -155,10 +155,7 @@ define <4 x double> @demandedelts_vpermil2pd256_as_shufpd(<4 x double> %a0, <4 x
;
; X64-LABEL: demandedelts_vpermil2pd256_as_shufpd:
; X64: # %bb.0:
; X64-NEXT: vmovapd {{.*#+}} xmm2 = <u,4,2,7>
; X64-NEXT: vblendpd {{.*#+}} ymm2 = ymm2[0,1],mem[2,3]
; X64-NEXT: vpermil2pd $0, %ymm2, %ymm1, %ymm0, %ymm0
; X64-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,2,3]
; X64-NEXT: vpermil2pd {{.*#+}} ymm0 = ymm1[0,0],ymm0[3],ymm1[3]
; X64-NEXT: retq
%res0 = insertelement <4 x i64> <i64 0, i64 4, i64 2, i64 7>, i64 %a2, i32 0
%res1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %res0, i8 0)