From c8d2256f8e4f714894bfff61767d8563189dc4b3 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 30 Jul 2019 11:35:13 +0000 Subject: [PATCH] [X86][AVX] SimplifyDemandedVectorElts - handle extraction from X86ISD::SUBV_BROADCAST source (PR42819) PR42819 showed an issue that we couldn't handle the case where we demanded a 'sub-sub-vector' of the SUBV_BROADCAST 'sub-vector' source. This patch recognizes these cases and extracts the sub-sub-vector instead of trying to broadcast to a type smaller than the 'sub-vector' source. llvm-svn: 367306 --- lib/Target/X86/X86ISelLowering.cpp | 16 ++++++++------- test/CodeGen/X86/oddsubvector.ll | 32 ++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 334efa8b17b..9dfa661e7ab 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -34214,14 +34214,16 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( // Subvector broadcast. case X86ISD::SUBV_BROADCAST: { SDLoc DL(Op); - SDValue Ext = Op.getOperand(0); - if (Ext.getValueSizeInBits() != ExtSizeInBits) { - MVT ExtSVT = Ext.getSimpleValueType().getScalarType(); - MVT ExtVT = - MVT::getVectorVT(ExtSVT, ExtSizeInBits / ExtSVT.getSizeInBits()); - Ext = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, DL, ExtVT, Ext); + SDValue Src = Op.getOperand(0); + if (Src.getValueSizeInBits() > ExtSizeInBits) + Src = extractSubVector(Src, 0, TLO.DAG, DL, ExtSizeInBits); + else if (Src.getValueSizeInBits() < ExtSizeInBits) { + MVT SrcSVT = Src.getSimpleValueType().getScalarType(); + MVT SrcVT = + MVT::getVectorVT(SrcSVT, ExtSizeInBits / SrcSVT.getSizeInBits()); + Src = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, DL, SrcVT, Src); } - return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Ext, 0, + return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Src, 0, TLO.DAG, DL, ExtSizeInBits)); } // Byte shifts by immediate. diff --git a/test/CodeGen/X86/oddsubvector.ll b/test/CodeGen/X86/oddsubvector.ll index d9cc5232a83..d11bf712baa 100644 --- a/test/CodeGen/X86/oddsubvector.ll +++ b/test/CodeGen/X86/oddsubvector.ll @@ -158,3 +158,35 @@ define void @PR40815(%struct.Mat4* nocapture readonly dereferenceable(64), %stru store <4 x float> %5, <4 x float>* %13, align 16 ret void } + +define <16 x i32> @PR42819(<8 x i32>* %a0) { +; SSE-LABEL: PR42819: +; SSE: # %bb.0: +; SSE-NEXT: movdqu (%rdi), %xmm3 +; SSE-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7,8,9,10,11] +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: xorps %xmm2, %xmm2 +; SSE-NEXT: retq +; +; AVX-LABEL: PR42819: +; AVX: # %bb.0: +; AVX-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,1,2] +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm0[5,6,7] +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512-LABEL: PR42819: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovdqu (%rdi), %xmm0 +; AVX512-NEXT: movw $-8192, %ax # imm = 0xE000 +; AVX512-NEXT: kmovw %eax, %k1 +; AVX512-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z} +; AVX512-NEXT: retq + %1 = load <8 x i32>, <8 x i32>* %a0, align 4 + %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <16 x i32> + %3 = shufflevector <16 x i32> zeroinitializer, <16 x i32> %2, <16 x i32> + ret <16 x i32> %3 +}