From 26708fa166d7fde12549a2e1394ca19a85e4f435 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 1 Jul 2022 10:36:01 +0100 Subject: [PATCH] Revert rG057db2002bb3: [X86] combineAndnp - constant fold ANDNP(C,X) -> AND(~C,X) If the LHS op has a single use then using the more general AND op is likely to allow commutation, load folding, generic folds etc. Reverted due to reports from @alexfh about it causing an infinite loop (repro still pending). --- llvm/lib/Target/X86/X86ISelLowering.cpp | 13 +--------- llvm/test/CodeGen/X86/combine-udiv.ll | 25 ++++++++++--------- .../X86/insert-into-constant-vector.ll | 10 +++++--- 3 files changed, 20 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d5f97566b793..61c1fd25031d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -50981,19 +50981,8 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG, if (SDValue Not = IsNOT(N0, DAG)) return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getBitcast(VT, Not), N1); - // Constant fold NOT(N0) to allow us to use AND. + // TODO: Constant fold NOT(N0) to allow us to use AND. // TODO: Do this in IsNOT with suitable oneuse checks? - if (getTargetConstantFromNode(N0) && N0->hasOneUse()) { - APInt UndefElts; - SmallVector EltBits; - if (getTargetConstantBitsFromNode(N0, VT.getScalarSizeInBits(), UndefElts, - EltBits)) { - for (APInt &Elt : EltBits) - Elt = ~Elt; - SDValue Not = getConstVector(EltBits, UndefElts, VT, DAG, SDLoc(N)); - return DAG.getNode(ISD::AND, SDLoc(N), VT, Not, N1); - } - } // Attempt to recursively combine a bitmask ANDNP with shuffles. if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) { diff --git a/llvm/test/CodeGen/X86/combine-udiv.ll b/llvm/test/CodeGen/X86/combine-udiv.ll index c609d22cda47..8d1aff586c7c 100644 --- a/llvm/test/CodeGen/X86/combine-udiv.ll +++ b/llvm/test/CodeGen/X86/combine-udiv.ll @@ -688,18 +688,19 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) { define <8 x i16> @pr38477(<8 x i16> %a0) { ; SSE2-LABEL: pr38477: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,4957,57457,4103,16385,35545,2048,2115] -; SSE2-NEXT: pmulhuw %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: psubw %xmm1, %xmm2 -; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 -; SSE2-NEXT: paddw %xmm1, %xmm2 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,65535,0,65535] -; SSE2-NEXT: pandn %xmm2, %xmm1 -; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 -; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: por %xmm2, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,4957,57457,4103,16385,35545,2048,2115] +; SSE2-NEXT: pmulhuw %xmm0, %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535] +; SSE2-NEXT: pandn %xmm0, %xmm1 +; SSE2-NEXT: psubw %xmm2, %xmm0 +; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: paddw %xmm2, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535,65535,65535,0,65535] +; SSE2-NEXT: pandn %xmm0, %xmm2 +; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: por %xmm2, %xmm1 +; SSE2-NEXT: por %xmm0, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: pr38477: diff --git a/llvm/test/CodeGen/X86/insert-into-constant-vector.ll b/llvm/test/CodeGen/X86/insert-into-constant-vector.ll index 7fc1c173ba90..749ca979b491 100644 --- a/llvm/test/CodeGen/X86/insert-into-constant-vector.ll +++ b/llvm/test/CodeGen/X86/insert-into-constant-vector.ll @@ -13,15 +13,17 @@ define <16 x i8> @elt0_v16i8(i8 %x) { ; X86-SSE2-LABEL: elt0_v16i8: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: movaps {{.*#+}} xmm0 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; X86-SSE2-NEXT: andnps %xmm1, %xmm0 ; X86-SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; X86-SSE2-NEXT: retl ; ; X64-SSE2-LABEL: elt0_v16i8: ; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movd %edi, %xmm0 -; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE2-NEXT: movd %edi, %xmm1 +; X64-SSE2-NEXT: movdqa {{.*#+}} xmm0 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; X64-SSE2-NEXT: pandn %xmm1, %xmm0 ; X64-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; X64-SSE2-NEXT: retq ;