mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-26 22:26:16 +00:00
AVX-512: optimized icmp -> sext -> icmp pattern
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200849 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
abb572fbb9
commit
c341b7c0ef
@ -4811,6 +4811,13 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
|
||||
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
|
||||
Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
|
||||
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops, 16);
|
||||
} else if (VT.getScalarType() == MVT::i1) {
|
||||
assert(VT.getVectorNumElements() <= 16 && "Unexpected vector type");
|
||||
SDValue Cst = DAG.getTargetConstant(0, MVT::i1);
|
||||
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
|
||||
Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
|
||||
Ops, VT.getVectorNumElements());
|
||||
} else
|
||||
llvm_unreachable("Unexpected vector type");
|
||||
|
||||
@ -9135,6 +9142,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
|
||||
In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In);
|
||||
InVT = ExtVT;
|
||||
}
|
||||
|
||||
SDValue Cst = DAG.getTargetConstant(1, InVT.getVectorElementType());
|
||||
const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
|
||||
SDValue CP = DAG.getConstantPool(C, getPointerTy());
|
||||
@ -9999,38 +10007,44 @@ static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
|
||||
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC));
|
||||
}
|
||||
|
||||
static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
|
||||
static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG,
|
||||
const X86Subtarget *Subtarget) {
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
SDValue Op1 = Op.getOperand(1);
|
||||
SDValue CC = Op.getOperand(2);
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
SDLoc dl(Op);
|
||||
|
||||
assert(Op0.getValueType().getVectorElementType().getSizeInBits() >= 32 &&
|
||||
Op.getValueType().getScalarType() == MVT::i1 &&
|
||||
"Cannot set masked compare for this operation");
|
||||
|
||||
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
|
||||
SDLoc dl(Op);
|
||||
|
||||
unsigned Opc = 0;
|
||||
bool Unsigned = false;
|
||||
bool Swap = false;
|
||||
unsigned SSECC;
|
||||
switch (SetCCOpcode) {
|
||||
default: llvm_unreachable("Unexpected SETCC condition");
|
||||
case ISD::SETNE: SSECC = 4; break;
|
||||
case ISD::SETEQ: SSECC = 0; break;
|
||||
case ISD::SETUGT: Unsigned = true;
|
||||
case ISD::SETGT: SSECC = 6; break; // NLE
|
||||
case ISD::SETULT: Unsigned = true;
|
||||
case ISD::SETLT: SSECC = 1; break;
|
||||
case ISD::SETUGE: Unsigned = true;
|
||||
case ISD::SETGE: SSECC = 5; break; // NLT
|
||||
case ISD::SETULE: Unsigned = true;
|
||||
case ISD::SETEQ: Opc = X86ISD::PCMPEQM; break;
|
||||
case ISD::SETUGT: SSECC = 6; Unsigned = true; break;
|
||||
case ISD::SETLT: Swap = true; //fall-through
|
||||
case ISD::SETGT: Opc = X86ISD::PCMPGTM; break;
|
||||
case ISD::SETULT: SSECC = 1; Unsigned = true; break;
|
||||
case ISD::SETUGE: SSECC = 5; Unsigned = true; break; //NLT
|
||||
case ISD::SETGE: Swap = true; SSECC = 2; break; // LE + swap
|
||||
case ISD::SETULE: Unsigned = true; //fall-through
|
||||
case ISD::SETLE: SSECC = 2; break;
|
||||
}
|
||||
unsigned Opc = Unsigned ? X86ISD::CMPMU: X86ISD::CMPM;
|
||||
|
||||
if (Swap)
|
||||
std::swap(Op0, Op1);
|
||||
if (Opc)
|
||||
return DAG.getNode(Opc, dl, VT, Op0, Op1);
|
||||
Opc = Unsigned ? X86ISD::CMPMU: X86ISD::CMPM;
|
||||
return DAG.getNode(Opc, dl, VT, Op0, Op1,
|
||||
DAG.getConstant(SSECC, MVT::i8));
|
||||
|
||||
}
|
||||
|
||||
static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
|
||||
@ -10086,7 +10100,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
|
||||
if (Subtarget->hasAVX512()) {
|
||||
if (Op1.getValueType().is512BitVector() ||
|
||||
(MaskResult && OpVT.getVectorElementType().getSizeInBits() >= 32))
|
||||
return LowerIntVSETCC_AVX512(Op, DAG);
|
||||
return LowerIntVSETCC_AVX512(Op, DAG, Subtarget);
|
||||
|
||||
// In AVX-512 architecture setcc returns mask with i1 elements,
|
||||
// But there is no compare instruction for i8 and i16 elements.
|
||||
@ -10108,17 +10122,17 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
|
||||
switch (SetCCOpcode) {
|
||||
default: llvm_unreachable("Unexpected SETCC condition");
|
||||
case ISD::SETNE: Invert = true;
|
||||
case ISD::SETEQ: Opc = MaskResult? X86ISD::PCMPEQM: X86ISD::PCMPEQ; break;
|
||||
case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break;
|
||||
case ISD::SETLT: Swap = true;
|
||||
case ISD::SETGT: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT; break;
|
||||
case ISD::SETGT: Opc = X86ISD::PCMPGT; break;
|
||||
case ISD::SETGE: Swap = true;
|
||||
case ISD::SETLE: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT;
|
||||
case ISD::SETLE: Opc = X86ISD::PCMPGT;
|
||||
Invert = true; break;
|
||||
case ISD::SETULT: Swap = true;
|
||||
case ISD::SETUGT: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT;
|
||||
case ISD::SETUGT: Opc = X86ISD::PCMPGT;
|
||||
FlipSigns = true; break;
|
||||
case ISD::SETUGE: Swap = true;
|
||||
case ISD::SETULE: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT;
|
||||
case ISD::SETULE: Opc = X86ISD::PCMPGT;
|
||||
FlipSigns = true; Invert = true; break;
|
||||
}
|
||||
|
||||
@ -14040,6 +14054,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::PTEST: return "X86ISD::PTEST";
|
||||
case X86ISD::TESTP: return "X86ISD::TESTP";
|
||||
case X86ISD::TESTM: return "X86ISD::TESTM";
|
||||
case X86ISD::TESTNM: return "X86ISD::TESTNM";
|
||||
case X86ISD::KORTEST: return "X86ISD::KORTEST";
|
||||
case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
|
||||
case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
|
||||
@ -19203,10 +19218,13 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
|
||||
|
||||
// Optimize x == -y --> x+y == 0
|
||||
// x != -y --> x+y != 0
|
||||
static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG,
|
||||
const X86Subtarget* Subtarget) {
|
||||
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
|
||||
SDValue LHS = N->getOperand(0);
|
||||
SDValue RHS = N->getOperand(1);
|
||||
EVT VT = N->getValueType(0);
|
||||
SDLoc DL(N);
|
||||
|
||||
if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB)
|
||||
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(LHS.getOperand(0)))
|
||||
@ -19224,6 +19242,34 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
return DAG.getSetCC(SDLoc(N), N->getValueType(0),
|
||||
addV, DAG.getConstant(0, addV.getValueType()), CC);
|
||||
}
|
||||
|
||||
if (VT.getScalarType() == MVT::i1) {
|
||||
bool IsSEXT0 = (LHS.getOpcode() == ISD::SIGN_EXTEND) &&
|
||||
(LHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
|
||||
bool IsVZero0 = ISD::isBuildVectorAllZeros(LHS.getNode());
|
||||
if (!IsSEXT0 && !IsVZero0)
|
||||
return SDValue();
|
||||
bool IsSEXT1 = (RHS.getOpcode() == ISD::SIGN_EXTEND) &&
|
||||
(RHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
|
||||
bool IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode());
|
||||
|
||||
if (!IsSEXT1 && !IsVZero1)
|
||||
return SDValue();
|
||||
|
||||
if (IsSEXT0 && IsVZero1) {
|
||||
assert(VT == LHS.getOperand(0).getValueType() && "Uexpected operand type");
|
||||
if (CC == ISD::SETEQ)
|
||||
return DAG.getNOT(DL, LHS.getOperand(0), VT);
|
||||
return LHS.getOperand(0);
|
||||
}
|
||||
if (IsSEXT1 && IsVZero0) {
|
||||
assert(VT == RHS.getOperand(0).getValueType() && "Uexpected operand type");
|
||||
if (CC == ISD::SETEQ)
|
||||
return DAG.getNOT(DL, RHS.getOperand(0), VT);
|
||||
return RHS.getOperand(0);
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
@ -19508,7 +19554,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::SIGN_EXTEND_INREG: return PerformSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
|
||||
case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG,DCI,Subtarget);
|
||||
case ISD::SETCC: return PerformISDSETCCCombine(N, DAG);
|
||||
case ISD::SETCC: return PerformISDSETCCCombine(N, DAG, Subtarget);
|
||||
case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget);
|
||||
case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget);
|
||||
case X86ISD::VZEXT: return performVZEXTCombine(N, DAG, DCI, Subtarget);
|
||||
|
@ -5,9 +5,9 @@
|
||||
; CHECK: vmovups
|
||||
; CHECK: ret
|
||||
define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
|
||||
%mask = fcmp ole <16 x float> %x, %y
|
||||
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
|
||||
ret <16 x float> %max
|
||||
%mask = fcmp ole <16 x float> %x, %y
|
||||
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
|
||||
ret <16 x float> %max
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test2
|
||||
@ -15,9 +15,9 @@ define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
|
||||
; CHECK: vmovupd
|
||||
; CHECK: ret
|
||||
define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
|
||||
%mask = fcmp ole <8 x double> %x, %y
|
||||
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
|
||||
ret <8 x double> %max
|
||||
%mask = fcmp ole <8 x double> %x, %y
|
||||
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
|
||||
ret <8 x double> %max
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test3
|
||||
@ -26,9 +26,9 @@ define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
|
||||
; CHECK: ret
|
||||
define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwind {
|
||||
%y = load <16 x i32>* %yp, align 4
|
||||
%mask = icmp eq <16 x i32> %x, %y
|
||||
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
|
||||
ret <16 x i32> %max
|
||||
%mask = icmp eq <16 x i32> %x, %y
|
||||
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
|
||||
ret <16 x i32> %max
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @test4_unsigned
|
||||
@ -36,9 +36,9 @@ define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwin
|
||||
; CHECK: vmovdqu32
|
||||
; CHECK: ret
|
||||
define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y) nounwind {
|
||||
%mask = icmp uge <16 x i32> %x, %y
|
||||
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
|
||||
ret <16 x i32> %max
|
||||
%mask = icmp uge <16 x i32> %x, %y
|
||||
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
|
||||
ret <16 x i32> %max
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test5
|
||||
@ -46,9 +46,9 @@ define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y) nounwind {
|
||||
; CHECK: vmovdqu64 {{.*}}%k1
|
||||
; CHECK: ret
|
||||
define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
|
||||
%mask = icmp eq <8 x i64> %x, %y
|
||||
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
|
||||
ret <8 x i64> %max
|
||||
%mask = icmp eq <8 x i64> %x, %y
|
||||
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
|
||||
ret <8 x i64> %max
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test6_unsigned
|
||||
@ -56,9 +56,9 @@ define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
|
||||
; CHECK: vmovdqu64 {{.*}}%k1
|
||||
; CHECK: ret
|
||||
define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y) nounwind {
|
||||
%mask = icmp ugt <8 x i64> %x, %y
|
||||
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
|
||||
ret <8 x i64> %max
|
||||
%mask = icmp ugt <8 x i64> %x, %y
|
||||
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
|
||||
ret <8 x i64> %max
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test7
|
||||
@ -133,3 +133,32 @@ define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
|
||||
%conv = zext <16 x i1> %cmpvector_i to <16 x i32>
|
||||
ret <16 x i32> %conv
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test14
|
||||
; CHECK: vpcmp
|
||||
; CHECK-NOT: vpcmp
|
||||
; CHECK: vmovdqu32 {{.*}}{%k1} {z}
|
||||
; CHECK: ret
|
||||
define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
|
||||
%sub_r = sub <16 x i32> %a, %b
|
||||
%cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a
|
||||
%sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32>
|
||||
%mask = icmp eq <16 x i32> %sext.i3.i, zeroinitializer
|
||||
%res = select <16 x i1> %mask, <16 x i32> zeroinitializer, <16 x i32> %sub_r
|
||||
ret <16 x i32>%res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test15
|
||||
; CHECK: vpcmpgtq
|
||||
; CHECK-NOT: vpcmp
|
||||
; CHECK: vmovdqu64 {{.*}}{%k1} {z}
|
||||
; CHECK: ret
|
||||
define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
|
||||
%sub_r = sub <8 x i64> %a, %b
|
||||
%cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a
|
||||
%sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64>
|
||||
%mask = icmp eq <8 x i64> %sext.i3.i, zeroinitializer
|
||||
%res = select <8 x i1> %mask, <8 x i64> zeroinitializer, <8 x i64> %sub_r
|
||||
ret <8 x i64>%res
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user