mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-15 20:51:35 +00:00
[X86][AVX] Decode constant bits from insert_subvector(c1, c2, c3)
This mostly happens due to SimplifyDemandedVectorElts reducing a vector to insert_subvector(undef, c1, 0) llvm-svn: 363499
This commit is contained in:
parent
5dd61974f9
commit
990f3ceb67
@ -5967,6 +5967,29 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
|
||||
return CastBitData(UndefSrcElts, SrcEltBits);
|
||||
}
|
||||
|
||||
// Insert constant bits from a base and sub vector sources.
|
||||
if (Op.getOpcode() == ISD::INSERT_SUBVECTOR &&
|
||||
isa<ConstantSDNode>(Op.getOperand(2))) {
|
||||
// TODO - support insert_subvector through bitcasts.
|
||||
if (EltSizeInBits != VT.getScalarSizeInBits())
|
||||
return false;
|
||||
|
||||
APInt UndefSubElts;
|
||||
SmallVector<APInt, 32> EltSubBits;
|
||||
if (getTargetConstantBitsFromNode(Op.getOperand(1), EltSizeInBits,
|
||||
UndefSubElts, EltSubBits,
|
||||
AllowWholeUndefs, AllowPartialUndefs) &&
|
||||
getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits,
|
||||
UndefElts, EltBits, AllowWholeUndefs,
|
||||
AllowPartialUndefs)) {
|
||||
unsigned BaseIdx = Op.getConstantOperandVal(2);
|
||||
UndefElts.insertBits(UndefSubElts, BaseIdx);
|
||||
for (unsigned i = 0, e = EltSubBits.size(); i != e; ++i)
|
||||
EltBits[BaseIdx + i] = EltSubBits[i];
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Extract constant bits from a subvector's source.
|
||||
if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
|
||||
isa<ConstantSDNode>(Op.getOperand(1))) {
|
||||
|
@ -1805,10 +1805,8 @@ define <4 x i32> @test_masked_z_16xi32_to_4xi32_perm_mem_mask3(<16 x i32>* %vp,
|
||||
define <4 x i32> @test_16xi32_to_4xi32_perm_mask9(<16 x i32> %vec) {
|
||||
; CHECK-LABEL: test_16xi32_to_4xi32_perm_mask9:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,12,3]
|
||||
; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [4,1,0,2]
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
|
||||
; CHECK-NEXT: vpermd %ymm3, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [4,1,12,2]
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; CHECK-NEXT: vpermt2d %ymm0, %ymm2, %ymm1
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
|
@ -207,23 +207,10 @@ define <8 x float> @combine_vpermilvar_8f32_movsldup(<8 x float> %a0) {
|
||||
ret <8 x float> %1
|
||||
}
|
||||
define <8 x float> @demandedelts_vpermilvar_8f32_movsldup(<8 x float> %a0, i32 %a1) {
|
||||
; AVX1-LABEL: demandedelts_vpermilvar_8f32_movsldup:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = <u,0,2,2,4,4,6,6>
|
||||
; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],mem[4,5,6,7]
|
||||
; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,2,3,4,5,6,7]
|
||||
; AVX1-NEXT: ret{{[l|q]}}
|
||||
;
|
||||
; AVX2-LABEL: demandedelts_vpermilvar_8f32_movsldup:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
|
||||
; AVX2-NEXT: ret{{[l|q]}}
|
||||
;
|
||||
; AVX512-LABEL: demandedelts_vpermilvar_8f32_movsldup:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
|
||||
; AVX512-NEXT: ret{{[l|q]}}
|
||||
; CHECK-LABEL: demandedelts_vpermilvar_8f32_movsldup:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%1 = insertelement <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>, i32 %a1, i32 0
|
||||
%2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %1)
|
||||
%3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
|
@ -933,10 +933,7 @@ define <8 x double> @combine_vpermi2var_8f64_as_permpd(<8 x double> %x0, <8 x do
|
||||
;
|
||||
; X64-LABEL: combine_vpermi2var_8f64_as_permpd:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vmovapd {{.*#+}} zmm2 = <u,2,1,3,4,6,5,7>
|
||||
; X64-NEXT: vinsertf32x4 $0, {{.*}}(%rip), %zmm2, %zmm2
|
||||
; X64-NEXT: vpermi2pd %zmm1, %zmm0, %zmm2
|
||||
; X64-NEXT: vpermpd {{.*#+}} zmm0 = zmm2[2,3,1,1,6,7,5,5]
|
||||
; X64-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[1,3,2,2,5,7,6,6]
|
||||
; X64-NEXT: retq
|
||||
%res0 = insertelement <8 x i64> <i64 0, i64 2, i64 1, i64 3, i64 4, i64 6, i64 5, i64 7>, i64 %a2, i32 0
|
||||
%res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %res0, <8 x double> %x1, i8 -1)
|
||||
|
@ -155,10 +155,7 @@ define <4 x double> @demandedelts_vpermil2pd256_as_shufpd(<4 x double> %a0, <4 x
|
||||
;
|
||||
; X64-LABEL: demandedelts_vpermil2pd256_as_shufpd:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vmovapd {{.*#+}} xmm2 = <u,4,2,7>
|
||||
; X64-NEXT: vblendpd {{.*#+}} ymm2 = ymm2[0,1],mem[2,3]
|
||||
; X64-NEXT: vpermil2pd $0, %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,2,3]
|
||||
; X64-NEXT: vpermil2pd {{.*#+}} ymm0 = ymm1[0,0],ymm0[3],ymm1[3]
|
||||
; X64-NEXT: retq
|
||||
%res0 = insertelement <4 x i64> <i64 0, i64 4, i64 2, i64 7>, i64 %a2, i32 0
|
||||
%res1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %res0, i8 0)
|
||||
|
Loading…
x
Reference in New Issue
Block a user