From c341b7c0ef5ef55c815d1cdcd26fa4c0eb4d22e0 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Wed, 5 Feb 2014 16:17:36 +0000 Subject: [PATCH] AVX-512: optimized icmp -> sext -> icmp pattern git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200849 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 88 +++++++++++++++++++++++------- test/CodeGen/X86/avx512-vec-cmp.ll | 65 ++++++++++++++++------ 2 files changed, 114 insertions(+), 39 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 04086b7b894..1307bc5e0c5 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4811,6 +4811,13 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops, 16); + } else if (VT.getScalarType() == MVT::i1) { + assert(VT.getVectorNumElements() <= 16 && "Unexpected vector type"); + SDValue Cst = DAG.getTargetConstant(0, MVT::i1); + SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, + Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, + Ops, VT.getVectorNumElements()); } else llvm_unreachable("Unexpected vector type"); @@ -9135,6 +9142,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In); InVT = ExtVT; } + SDValue Cst = DAG.getTargetConstant(1, InVT.getVectorElementType()); const Constant *C = (dyn_cast(Cst))->getConstantIntValue(); SDValue CP = DAG.getConstantPool(C, getPointerTy()); @@ -9999,38 +10007,44 @@ static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) { DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC)); } -static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue CC = Op.getOperand(2); MVT VT = Op.getSimpleValueType(); + SDLoc dl(Op); assert(Op0.getValueType().getVectorElementType().getSizeInBits() >= 32 && Op.getValueType().getScalarType() == MVT::i1 && "Cannot set masked compare for this operation"); ISD::CondCode SetCCOpcode = cast(CC)->get(); - SDLoc dl(Op); - + unsigned Opc = 0; bool Unsigned = false; + bool Swap = false; unsigned SSECC; switch (SetCCOpcode) { default: llvm_unreachable("Unexpected SETCC condition"); case ISD::SETNE: SSECC = 4; break; - case ISD::SETEQ: SSECC = 0; break; - case ISD::SETUGT: Unsigned = true; - case ISD::SETGT: SSECC = 6; break; // NLE - case ISD::SETULT: Unsigned = true; - case ISD::SETLT: SSECC = 1; break; - case ISD::SETUGE: Unsigned = true; - case ISD::SETGE: SSECC = 5; break; // NLT - case ISD::SETULE: Unsigned = true; + case ISD::SETEQ: Opc = X86ISD::PCMPEQM; break; + case ISD::SETUGT: SSECC = 6; Unsigned = true; break; + case ISD::SETLT: Swap = true; //fall-through + case ISD::SETGT: Opc = X86ISD::PCMPGTM; break; + case ISD::SETULT: SSECC = 1; Unsigned = true; break; + case ISD::SETUGE: SSECC = 5; Unsigned = true; break; //NLT + case ISD::SETGE: Swap = true; SSECC = 2; break; // LE + swap + case ISD::SETULE: Unsigned = true; //fall-through case ISD::SETLE: SSECC = 2; break; } - unsigned Opc = Unsigned ? X86ISD::CMPMU: X86ISD::CMPM; + + if (Swap) + std::swap(Op0, Op1); + if (Opc) + return DAG.getNode(Opc, dl, VT, Op0, Op1); + Opc = Unsigned ? X86ISD::CMPMU: X86ISD::CMPM; return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8)); - } static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, @@ -10086,7 +10100,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, if (Subtarget->hasAVX512()) { if (Op1.getValueType().is512BitVector() || (MaskResult && OpVT.getVectorElementType().getSizeInBits() >= 32)) - return LowerIntVSETCC_AVX512(Op, DAG); + return LowerIntVSETCC_AVX512(Op, DAG, Subtarget); // In AVX-512 architecture setcc returns mask with i1 elements, // But there is no compare instruction for i8 and i16 elements. @@ -10108,17 +10122,17 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, switch (SetCCOpcode) { default: llvm_unreachable("Unexpected SETCC condition"); case ISD::SETNE: Invert = true; - case ISD::SETEQ: Opc = MaskResult? X86ISD::PCMPEQM: X86ISD::PCMPEQ; break; + case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break; case ISD::SETLT: Swap = true; - case ISD::SETGT: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT; break; + case ISD::SETGT: Opc = X86ISD::PCMPGT; break; case ISD::SETGE: Swap = true; - case ISD::SETLE: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT; + case ISD::SETLE: Opc = X86ISD::PCMPGT; Invert = true; break; case ISD::SETULT: Swap = true; - case ISD::SETUGT: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT; + case ISD::SETUGT: Opc = X86ISD::PCMPGT; FlipSigns = true; break; case ISD::SETUGE: Swap = true; - case ISD::SETULE: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT; + case ISD::SETULE: Opc = X86ISD::PCMPGT; FlipSigns = true; Invert = true; break; } @@ -14040,6 +14054,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PTEST: return "X86ISD::PTEST"; case X86ISD::TESTP: return "X86ISD::TESTP"; case X86ISD::TESTM: return "X86ISD::TESTM"; + case X86ISD::TESTNM: return "X86ISD::TESTNM"; case X86ISD::KORTEST: return "X86ISD::KORTEST"; case X86ISD::PALIGNR: return "X86ISD::PALIGNR"; case X86ISD::PSHUFD: return "X86ISD::PSHUFD"; @@ -19203,10 +19218,13 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, // Optimize x == -y --> x+y == 0 // x != -y --> x+y != 0 -static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget* Subtarget) { ISD::CondCode CC = cast(N->getOperand(2))->get(); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); + EVT VT = N->getValueType(0); + SDLoc DL(N); if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB) if (ConstantSDNode *C = dyn_cast(LHS.getOperand(0))) @@ -19224,6 +19242,34 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) { return DAG.getSetCC(SDLoc(N), N->getValueType(0), addV, DAG.getConstant(0, addV.getValueType()), CC); } + + if (VT.getScalarType() == MVT::i1) { + bool IsSEXT0 = (LHS.getOpcode() == ISD::SIGN_EXTEND) && + (LHS.getOperand(0).getValueType().getScalarType() == MVT::i1); + bool IsVZero0 = ISD::isBuildVectorAllZeros(LHS.getNode()); + if (!IsSEXT0 && !IsVZero0) + return SDValue(); + bool IsSEXT1 = (RHS.getOpcode() == ISD::SIGN_EXTEND) && + (RHS.getOperand(0).getValueType().getScalarType() == MVT::i1); + bool IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode()); + + if (!IsSEXT1 && !IsVZero1) + return SDValue(); + + if (IsSEXT0 && IsVZero1) { + assert(VT == LHS.getOperand(0).getValueType() && "Uexpected operand type"); + if (CC == ISD::SETEQ) + return DAG.getNOT(DL, LHS.getOperand(0), VT); + return LHS.getOperand(0); + } + if (IsSEXT1 && IsVZero0) { + assert(VT == RHS.getOperand(0).getValueType() && "Uexpected operand type"); + if (CC == ISD::SETEQ) + return DAG.getNOT(DL, RHS.getOperand(0), VT); + return RHS.getOperand(0); + } + } + return SDValue(); } @@ -19508,7 +19554,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget); case ISD::SIGN_EXTEND_INREG: return PerformSIGN_EXTEND_INREGCombine(N, DAG, Subtarget); case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG,DCI,Subtarget); - case ISD::SETCC: return PerformISDSETCCCombine(N, DAG); + case ISD::SETCC: return PerformISDSETCCCombine(N, DAG, Subtarget); case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget); case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget); case X86ISD::VZEXT: return performVZEXTCombine(N, DAG, DCI, Subtarget); diff --git a/test/CodeGen/X86/avx512-vec-cmp.ll b/test/CodeGen/X86/avx512-vec-cmp.ll index bc7c148d23e..d762f0083e3 100644 --- a/test/CodeGen/X86/avx512-vec-cmp.ll +++ b/test/CodeGen/X86/avx512-vec-cmp.ll @@ -5,9 +5,9 @@ ; CHECK: vmovups ; CHECK: ret define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind { - %mask = fcmp ole <16 x float> %x, %y - %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y - ret <16 x float> %max + %mask = fcmp ole <16 x float> %x, %y + %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y + ret <16 x float> %max } ; CHECK-LABEL: test2 @@ -15,9 +15,9 @@ define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind { ; CHECK: vmovupd ; CHECK: ret define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind { - %mask = fcmp ole <8 x double> %x, %y - %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y - ret <8 x double> %max + %mask = fcmp ole <8 x double> %x, %y + %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y + ret <8 x double> %max } ; CHECK-LABEL: test3 @@ -26,9 +26,9 @@ define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind { ; CHECK: ret define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwind { %y = load <16 x i32>* %yp, align 4 - %mask = icmp eq <16 x i32> %x, %y - %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 - ret <16 x i32> %max + %mask = icmp eq <16 x i32> %x, %y + %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 + ret <16 x i32> %max } ; CHECK-LABEL: @test4_unsigned @@ -36,9 +36,9 @@ define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwin ; CHECK: vmovdqu32 ; CHECK: ret define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y) nounwind { - %mask = icmp uge <16 x i32> %x, %y - %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y - ret <16 x i32> %max + %mask = icmp uge <16 x i32> %x, %y + %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y + ret <16 x i32> %max } ; CHECK-LABEL: test5 @@ -46,9 +46,9 @@ define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y) nounwind { ; CHECK: vmovdqu64 {{.*}}%k1 ; CHECK: ret define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind { - %mask = icmp eq <8 x i64> %x, %y - %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y - ret <8 x i64> %max + %mask = icmp eq <8 x i64> %x, %y + %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y + ret <8 x i64> %max } ; CHECK-LABEL: test6_unsigned @@ -56,9 +56,9 @@ define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind { ; CHECK: vmovdqu64 {{.*}}%k1 ; CHECK: ret define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y) nounwind { - %mask = icmp ugt <8 x i64> %x, %y - %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y - ret <8 x i64> %max + %mask = icmp ugt <8 x i64> %x, %y + %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y + ret <8 x i64> %max } ; CHECK-LABEL: test7 @@ -133,3 +133,32 @@ define <16 x i32> @test13(<16 x float>%a, <16 x float>%b) %conv = zext <16 x i1> %cmpvector_i to <16 x i32> ret <16 x i32> %conv } + +; CHECK-LABEL: test14 +; CHECK: vpcmp +; CHECK-NOT: vpcmp +; CHECK: vmovdqu32 {{.*}}{%k1} {z} +; CHECK: ret +define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) { + %sub_r = sub <16 x i32> %a, %b + %cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a + %sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32> + %mask = icmp eq <16 x i32> %sext.i3.i, zeroinitializer + %res = select <16 x i1> %mask, <16 x i32> zeroinitializer, <16 x i32> %sub_r + ret <16 x i32>%res +} + +; CHECK-LABEL: test15 +; CHECK: vpcmpgtq +; CHECK-NOT: vpcmp +; CHECK: vmovdqu64 {{.*}}{%k1} {z} +; CHECK: ret +define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) { + %sub_r = sub <8 x i64> %a, %b + %cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a + %sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64> + %mask = icmp eq <8 x i64> %sext.i3.i, zeroinitializer + %res = select <8 x i1> %mask, <8 x i64> zeroinitializer, <8 x i64> %sub_r + ret <8 x i64>%res +} +