From 75e33f71c2dae584b13a7d1186ae0a038ba98838 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 27 Sep 2021 10:39:28 +0100 Subject: [PATCH] [X86] combineVectorHADDSUB - remove the broken HOP(x,x) merging code (PR51974) This intention of this code turns out to be superfluous as we can handle this with shuffle combining, and it has a critical flaw in that it doesn't check for dependencies. Fixes PR51974 (cherry picked from commit 468ff703e114599ce8fb7457bd3c7ef0b219e952) --- llvm/lib/Target/X86/X86ISelLowering.cpp | 23 ------------------- llvm/test/CodeGen/X86/horizontal-shuffle-2.ll | 19 +++++++++++++++ 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f3b1e6ca70ad..4b13b5b540b6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -44076,32 +44076,9 @@ static SDValue combineVectorHADDSUB(SDNode *N, SelectionDAG &DAG, "Unexpected horizontal add/sub opcode"); if (!shouldUseHorizontalOp(true, DAG, Subtarget)) { - // For slow-hop targets, if we have a hop with a single op, see if we already - // have another user that we can reuse and shuffle the result. MVT VT = N->getSimpleValueType(0); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - if (VT.is128BitVector() && LHS == RHS) { - for (SDNode *User : LHS->uses()) { - if (User != N && User->getOpcode() == N->getOpcode()) { - MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32; - if (User->getOperand(0) == LHS && !User->getOperand(1).isUndef()) { - return DAG.getBitcast( - VT, - DAG.getVectorShuffle(ShufVT, SDLoc(N), - DAG.getBitcast(ShufVT, SDValue(User, 0)), - DAG.getUNDEF(ShufVT), {0, 1, 0, 1})); - } - if (User->getOperand(1) == LHS && !User->getOperand(0).isUndef()) { - return DAG.getBitcast( - VT, - DAG.getVectorShuffle(ShufVT, SDLoc(N), - DAG.getBitcast(ShufVT, SDValue(User, 0)), - DAG.getUNDEF(ShufVT), {2, 3, 2, 3})); - } - } - } - } // HOP(HOP'(X,X),HOP'(Y,Y)) -> HOP(PERMUTE(HOP'(X,Y)),PERMUTE(HOP'(X,Y)). if (LHS != RHS && LHS.getOpcode() == N->getOpcode() && diff --git a/llvm/test/CodeGen/X86/horizontal-shuffle-2.ll b/llvm/test/CodeGen/X86/horizontal-shuffle-2.ll index 78c30e431574..9c5d7d4dd31b 100644 --- a/llvm/test/CodeGen/X86/horizontal-shuffle-2.ll +++ b/llvm/test/CodeGen/X86/horizontal-shuffle-2.ll @@ -171,6 +171,25 @@ define <4 x float> @test_unpacklo_hadd_v4f32_unary(<4 x float> %0) { ret <4 x float> %3 } +define <8 x i16> @PR51974(<8 x i16> %a0) { +; SSE-LABEL: PR51974: +; SSE: ## %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: phaddw %xmm0, %xmm1 +; SSE-NEXT: phaddw %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: PR51974: +; AVX: ## %bb.0: +; AVX-NEXT: vphaddw %xmm0, %xmm0, %xmm1 +; AVX-NEXT: vphaddw %xmm0, %xmm1, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %r0 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a0) + %r1 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %r0, <8 x i16> %a0) + ret <8 x i16> %r1 +} + declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>)