[Codegen] (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0 fold

Summary:
This was originally reported in D62818.
https://rise4fun.com/Alive/oPH

InstCombine does the opposite fold, in hope that `C l>>/<< Y` expression
will be hoisted out of a loop if `Y` is invariant and `X` is not.
But as it is seen from the diffs here, if it didn't get hoisted,
the produced assembly is almost universally worse.

Much like with my recent "hoist add/sub by/from const" patches,
we should get almost universal win if we hoist constant,
there is almost always an "and/test by imm" instruction,
but "shift of imm" not so much, so we may avoid having to
materialize the immediate, and thus need one less register.
And since we now shift not by constant, but by something else,
the live-range of that something else may reduce.

Special care needs to be applied not to disturb x86 `BT` / hexagon `tstbit`
instruction pattern. And to not get into endless combine loop.

Reviewers: RKSimon, efriedma, t.p.northover, craig.topper, spatel, arsenm

Reviewed By: spatel

Subscribers: hiraditya, MaskRay, wuzish, xbolva00, nikic, nemanjai, jvesely, wdng, nhaehnle, javed.absar, tpr, kristof.beyls, jsji, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62871

llvm-svn: 366955
This commit is contained in:
Roman Lebedev 2019-07-24 22:57:22 +00:00
parent 8ee06aa75f
commit a0525b084a
16 changed files with 1330 additions and 1500 deletions

View File

@ -539,6 +539,12 @@ public:
return hasAndNotCompare(X);
}
/// Return true if the target has a bit-test instruction:
/// (X & (1 << Y)) ==/!= 0
/// This knowledge can be used to prevent breaking the pattern,
/// or creating it if it could be recognized.
virtual bool hasBitTest(SDValue X, SDValue Y) const { return false; }
/// There are two ways to clear extreme bits (either low or high):
/// Mask: x & (-1 << y) (the instcombine canonical form)
/// Shifts: x >> y << y
@ -571,6 +577,38 @@ public:
return false;
}
/// Given the pattern
/// (X & (C l>>/<< Y)) ==/!= 0
/// return true if it should be transformed into:
/// ((X <</l>> Y) & C) ==/!= 0
/// WARNING: if 'X' is a constant, the fold may deadlock!
/// FIXME: we could avoid passing XC, but we can't use isConstOrConstSplat()
/// here because it can end up being not linked in.
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
unsigned OldShiftOpcode, unsigned NewShiftOpcode,
SelectionDAG &DAG) const {
if (hasBitTest(X, Y)) {
// One interesting pattern that we'd want to form is 'bit test':
// ((1 << Y) & C) ==/!= 0
// But we also need to be careful not to try to reverse that fold.
// Is this '1 << Y' ?
if (OldShiftOpcode == ISD::SHL && CC->isOne())
return false; // Keep the 'bit test' pattern.
// Will it be '1 << Y' after the transform ?
if (XC && NewShiftOpcode == ISD::SHL && XC->isOne())
return true; // Do form the 'bit test' pattern.
}
// If 'X' is a constant, and we transform, then we will immediately
// try to undo the fold, thus causing endless combine loop.
// So by default, let's assume everyone prefers the fold
// iff 'X' is not a constant.
return !XC;
}
/// These two forms are equivalent:
/// sub %y, (xor %x, -1)
/// add (add %x, 1), %y
@ -4108,6 +4146,11 @@ private:
DAGCombinerInfo &DCI,
const SDLoc &DL) const;
// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
SDValue optimizeSetCCByHoistingAndByConstFromLogicalShift(
EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
DAGCombinerInfo &DCI, const SDLoc &DL) const;
SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
SDValue CompTargetNode, ISD::CondCode Cond,
DAGCombinerInfo &DCI, const SDLoc &DL,

View File

@ -2760,6 +2760,77 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
return T2;
}
// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
DAGCombinerInfo &DCI, const SDLoc &DL) const {
assert(isConstOrConstSplat(N1C) &&
isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() &&
"Should be a comparison with 0.");
assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
"Valid only for [in]equality comparisons.");
unsigned NewShiftOpcode;
SDValue X, C, Y;
SelectionDAG &DAG = DCI.DAG;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Look for '(C l>>/<< Y)'.
auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
// The shift should be one-use.
if (!V.hasOneUse())
return false;
unsigned OldShiftOpcode = V.getOpcode();
switch (OldShiftOpcode) {
case ISD::SHL:
NewShiftOpcode = ISD::SRL;
break;
case ISD::SRL:
NewShiftOpcode = ISD::SHL;
break;
default:
return false; // must be a logical shift.
}
// We should be shifting a constant.
// FIXME: best to use isConstantOrConstantVector().
C = V.getOperand(0);
ConstantSDNode *CC =
isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
if (!CC)
return false;
Y = V.getOperand(1);
ConstantSDNode *XC =
isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
};
// LHS of comparison should be an one-use 'and'.
if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
return SDValue();
X = N0.getOperand(0);
SDValue Mask = N0.getOperand(1);
// 'and' is commutative!
if (!Match(Mask)) {
std::swap(X, Mask);
if (!Match(Mask))
return SDValue();
}
EVT VT = X.getValueType();
// Produce:
// ((X 'OppositeShiftOpcode' Y) & C) Cond 0
SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
return T2;
}
/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
/// handle the commuted versions of these patterns.
@ -3328,6 +3399,14 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
if (C1.isNullValue())
if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
VT, N0, N1, Cond, DCI, dl))
return CC;
}
// If we have "setcc X, C0", check to see if we can shrink the immediate
// by changing cc.
// TODO: Support this for vectors after legalize ops.

View File

@ -12042,6 +12042,19 @@ bool AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial(
return Mask->getValue().isPowerOf2();
}
bool AArch64TargetLowering::
shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
unsigned OldShiftOpcode, unsigned NewShiftOpcode,
SelectionDAG &DAG) const {
// Does baseline recommend not to perform the fold by default?
if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
return false;
// Else, if this is a vector shift, prefer 'shl'.
return X.getValueType().isScalarInteger() || NewShiftOpcode == ISD::SHL;
}
void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
// Update IsSplitCSR in AArch64unctionInfo.
AArch64FunctionInfo *AFI = Entry->getParent()->getInfo<AArch64FunctionInfo>();

View File

@ -488,6 +488,11 @@ public:
return VT.getSizeInBits() >= 64; // vector 'bic'
}
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
unsigned OldShiftOpcode, unsigned NewShiftOpcode,
SelectionDAG &DAG) const override;
bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
if (DAG.getMachineFunction().getFunction().hasMinSize())
return false;

View File

@ -1817,6 +1817,10 @@ bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return false;
}
bool HexagonTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
return X.getValueType().isScalarInteger(); // 'tstbit'
}
bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
return isTruncateFree(EVT::getEVT(Ty1), EVT::getEVT(Ty2));
}

View File

@ -127,6 +127,8 @@ namespace HexagonISD {
bool isCheapToSpeculateCtlz() const override { return true; }
bool isCtlzFast() const override { return true; }
bool hasBitTest(SDValue X, SDValue Y) const override;
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
/// Return true if an FMA operation is faster than a pair of mul and add

View File

@ -5022,6 +5022,33 @@ bool X86TargetLowering::hasAndNot(SDValue Y) const {
return Subtarget.hasSSE2();
}
bool X86TargetLowering::hasBitTest(SDValue X, SDValue Y) const {
return X.getValueType().isScalarInteger(); // 'bt'
}
bool X86TargetLowering::
shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
unsigned OldShiftOpcode, unsigned NewShiftOpcode,
SelectionDAG &DAG) const {
// Does baseline recommend not to perform the fold by default?
if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
return false;
// For scalars this transform is always beneficial.
if (X.getValueType().isScalarInteger())
return true;
// If all the shift amounts are identical, then transform is beneficial even
// with rudimentary SSE2 shifts.
if (DAG.isSplatValue(Y, /*AllowUndefs=*/true))
return true;
// If we have AVX2 with it's powerful shift operations, then it's also good.
if (Subtarget.hasAVX2())
return true;
// Pre-AVX2 vector codegen for this pattern is best for variant with 'shl'.
return NewShiftOpcode == ISD::SHL;
}
bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
const SDNode *N, CombineLevel Level) const {
assert(((N->getOpcode() == ISD::SHL &&

View File

@ -840,6 +840,13 @@ namespace llvm {
bool hasAndNot(SDValue Y) const override;
bool hasBitTest(SDValue X, SDValue Y) const override;
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
unsigned OldShiftOpcode, unsigned NewShiftOpcode,
SelectionDAG &DAG) const override;
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
CombineLevel Level) const override;

View File

@ -15,11 +15,9 @@
define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
; CHECK-LABEL: scalar_i8_signbit_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #128
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: lsr w8, w8, w1
; CHECK-NEXT: and w8, w8, w0
; CHECK-NEXT: tst w8, #0xff
; CHECK-NEXT: lsl w8, w0, w1
; CHECK-NEXT: tst w8, #0x80
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = lshr i8 128, %y
@ -31,11 +29,9 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
; CHECK-LABEL: scalar_i8_lowestbit_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: lsr w8, w8, w1
; CHECK-NEXT: and w8, w8, w0
; CHECK-NEXT: tst w8, #0xff
; CHECK-NEXT: lsl w8, w0, w1
; CHECK-NEXT: tst w8, #0x1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = lshr i8 1, %y
@ -47,11 +43,9 @@ define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
; CHECK-LABEL: scalar_i8_bitsinmiddle_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #24
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: lsr w8, w8, w1
; CHECK-NEXT: and w8, w8, w0
; CHECK-NEXT: tst w8, #0xff
; CHECK-NEXT: lsl w8, w0, w1
; CHECK-NEXT: tst w8, #0x18
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = lshr i8 24, %y
@ -65,11 +59,9 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: scalar_i16_signbit_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #32768
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: lsr w8, w8, w1
; CHECK-NEXT: and w8, w8, w0
; CHECK-NEXT: tst w8, #0xffff
; CHECK-NEXT: lsl w8, w0, w1
; CHECK-NEXT: tst w8, #0x8000
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = lshr i16 32768, %y
@ -81,11 +73,9 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: scalar_i16_lowestbit_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: lsr w8, w8, w1
; CHECK-NEXT: and w8, w8, w0
; CHECK-NEXT: tst w8, #0xffff
; CHECK-NEXT: lsl w8, w0, w1
; CHECK-NEXT: tst w8, #0x1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = lshr i16 1, %y
@ -97,11 +87,9 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: scalar_i16_bitsinmiddle_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #4080
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: lsr w8, w8, w1
; CHECK-NEXT: and w8, w8, w0
; CHECK-NEXT: tst w8, #0xffff
; CHECK-NEXT: lsl w8, w0, w1
; CHECK-NEXT: tst w8, #0xff0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = lshr i16 4080, %y
@ -115,9 +103,8 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: scalar_i32_signbit_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #-2147483648
; CHECK-NEXT: lsr w8, w8, w1
; CHECK-NEXT: tst w8, w0
; CHECK-NEXT: lsl w8, w0, w1
; CHECK-NEXT: tst w8, #0x80000000
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = lshr i32 2147483648, %y
@ -129,9 +116,8 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: scalar_i32_lowestbit_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1
; CHECK-NEXT: lsr w8, w8, w1
; CHECK-NEXT: tst w8, w0
; CHECK-NEXT: lsl w8, w0, w1
; CHECK-NEXT: tst w8, #0x1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = lshr i32 1, %y
@ -143,9 +129,8 @@ define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: scalar_i32_bitsinmiddle_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #16776960
; CHECK-NEXT: lsr w8, w8, w1
; CHECK-NEXT: tst w8, w0
; CHECK-NEXT: lsl w8, w0, w1
; CHECK-NEXT: tst w8, #0xffff00
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = lshr i32 16776960, %y
@ -159,9 +144,8 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: scalar_i64_signbit_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #-9223372036854775808
; CHECK-NEXT: lsr x8, x8, x1
; CHECK-NEXT: tst x8, x0
; CHECK-NEXT: lsl x8, x0, x1
; CHECK-NEXT: tst x8, #0x8000000000000000
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = lshr i64 9223372036854775808, %y
@ -173,9 +157,8 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: scalar_i64_lowestbit_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1
; CHECK-NEXT: lsr x8, x8, x1
; CHECK-NEXT: tst x8, x0
; CHECK-NEXT: lsl x8, x0, x1
; CHECK-NEXT: tst x8, #0x1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = lshr i64 1, %y
@ -187,9 +170,8 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: scalar_i64_bitsinmiddle_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #281474976645120
; CHECK-NEXT: lsr x8, x8, x1
; CHECK-NEXT: tst x8, x0
; CHECK-NEXT: lsl x8, x0, x1
; CHECK-NEXT: tst x8, #0xffffffff0000
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = lshr i64 281474976645120, %y
@ -205,10 +187,9 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
; CHECK-LABEL: vec_4xi32_splat_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: neg v1.4s, v1.4s
; CHECK-NEXT: movi v2.4s, #1
; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s
; CHECK-NEXT: and v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
@ -238,10 +219,9 @@ define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
; CHECK-LABEL: vec_4xi32_nonsplat_undef0_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: neg v1.4s, v1.4s
; CHECK-NEXT: movi v2.4s, #1
; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s
; CHECK-NEXT: and v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
@ -288,12 +268,9 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwi
define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
; CHECK-LABEL: scalar_i8_signbit_ne:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #128
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: lsr w8, w8, w1
; CHECK-NEXT: and w8, w8, w0
; CHECK-NEXT: tst w8, #0xff
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: lsl w8, w0, w1
; CHECK-NEXT: ubfx w0, w8, #7, #1
; CHECK-NEXT: ret
%t0 = lshr i8 128, %y
%t1 = and i8 %t0, %x

View File

@ -15,11 +15,10 @@
define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
; CHECK-LABEL: scalar_i8_signbit_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #-128
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: lsl w8, w8, w1
; CHECK-NEXT: and w8, w8, w0
; CHECK-NEXT: tst w8, #0xff
; CHECK-NEXT: lsr w8, w8, w1
; CHECK-NEXT: tst w8, #0x80
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = shl i8 128, %y
@ -31,11 +30,10 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
; CHECK-LABEL: scalar_i8_lowestbit_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: lsl w8, w8, w1
; CHECK-NEXT: and w8, w8, w0
; CHECK-NEXT: tst w8, #0xff
; CHECK-NEXT: lsr w8, w8, w1
; CHECK-NEXT: tst w8, #0x1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = shl i8 1, %y
@ -47,11 +45,10 @@ define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
; CHECK-LABEL: scalar_i8_bitsinmiddle_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #24
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: lsl w8, w8, w1
; CHECK-NEXT: and w8, w8, w0
; CHECK-NEXT: tst w8, #0xff
; CHECK-NEXT: lsr w8, w8, w1
; CHECK-NEXT: tst w8, #0x18
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = shl i8 24, %y
@ -65,11 +62,10 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: scalar_i16_signbit_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #-32768
; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: lsl w8, w8, w1
; CHECK-NEXT: and w8, w8, w0
; CHECK-NEXT: tst w8, #0xffff
; CHECK-NEXT: lsr w8, w8, w1
; CHECK-NEXT: tst w8, #0x8000
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = shl i16 32768, %y
@ -81,11 +77,10 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: scalar_i16_lowestbit_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1
; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: lsl w8, w8, w1
; CHECK-NEXT: and w8, w8, w0
; CHECK-NEXT: tst w8, #0xffff
; CHECK-NEXT: lsr w8, w8, w1
; CHECK-NEXT: tst w8, #0x1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = shl i16 1, %y
@ -97,11 +92,10 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: scalar_i16_bitsinmiddle_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #4080
; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: lsl w8, w8, w1
; CHECK-NEXT: and w8, w8, w0
; CHECK-NEXT: tst w8, #0xffff
; CHECK-NEXT: lsr w8, w8, w1
; CHECK-NEXT: tst w8, #0xff0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = shl i16 4080, %y
@ -115,9 +109,8 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: scalar_i32_signbit_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #-2147483648
; CHECK-NEXT: lsl w8, w8, w1
; CHECK-NEXT: tst w8, w0
; CHECK-NEXT: lsr w8, w0, w1
; CHECK-NEXT: tst w8, #0x80000000
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = shl i32 2147483648, %y
@ -129,9 +122,8 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: scalar_i32_lowestbit_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1
; CHECK-NEXT: lsl w8, w8, w1
; CHECK-NEXT: tst w8, w0
; CHECK-NEXT: lsr w8, w0, w1
; CHECK-NEXT: tst w8, #0x1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = shl i32 1, %y
@ -143,9 +135,8 @@ define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: scalar_i32_bitsinmiddle_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #16776960
; CHECK-NEXT: lsl w8, w8, w1
; CHECK-NEXT: tst w8, w0
; CHECK-NEXT: lsr w8, w0, w1
; CHECK-NEXT: tst w8, #0xffff00
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = shl i32 16776960, %y
@ -159,9 +150,8 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: scalar_i64_signbit_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #-9223372036854775808
; CHECK-NEXT: lsl x8, x8, x1
; CHECK-NEXT: tst x8, x0
; CHECK-NEXT: lsr x8, x0, x1
; CHECK-NEXT: tst x8, #0x8000000000000000
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = shl i64 9223372036854775808, %y
@ -173,9 +163,8 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: scalar_i64_lowestbit_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1
; CHECK-NEXT: lsl x8, x8, x1
; CHECK-NEXT: tst x8, x0
; CHECK-NEXT: lsr x8, x0, x1
; CHECK-NEXT: tst x8, #0x1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = shl i64 1, %y
@ -187,9 +176,8 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: scalar_i64_bitsinmiddle_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #281474976645120
; CHECK-NEXT: lsl x8, x8, x1
; CHECK-NEXT: tst x8, x0
; CHECK-NEXT: lsr x8, x0, x1
; CHECK-NEXT: tst x8, #0xffffffff0000
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = shl i64 281474976645120, %y
@ -283,12 +271,10 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwi
define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
; CHECK-LABEL: scalar_i8_signbit_ne:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #-128
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: lsl w8, w8, w1
; CHECK-NEXT: and w8, w8, w0
; CHECK-NEXT: tst w8, #0xff
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: lsr w8, w8, w1
; CHECK-NEXT: ubfx w0, w8, #7, #1
; CHECK-NEXT: ret
%t0 = shl i8 128, %y
%t1 = and i8 %t0, %x

View File

@ -16,10 +16,10 @@ define amdgpu_ps float @main(float %arg0, float %arg1) #0 {
; SI-NEXT: s_mov_b32 s7, s0
; SI-NEXT: image_load v2, v0, s[0:7] dmask:0x1 unorm
; SI-NEXT: v_and_b32_e32 v0, 7, v0
; SI-NEXT: v_lshl_b32_e32 v0, 1, v0
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: v_and_b32_e32 v0, v2, v0
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; SI-NEXT: v_lshrrev_b32_e32 v0, v0, v2
; SI-NEXT: v_and_b32_e32 v0, 1, v0
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; SI-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; SI-NEXT: v_cvt_pkrtz_f16_f32_e32 v0, v0, v0
; SI-NEXT: ; return to shader part epilog
@ -37,10 +37,10 @@ define amdgpu_ps float @main(float %arg0, float %arg1) #0 {
; VI-NEXT: s_mov_b32 s7, s0
; VI-NEXT: image_load v2, v0, s[0:7] dmask:0x1 unorm
; VI-NEXT: v_and_b32_e32 v0, 7, v0
; VI-NEXT: v_lshlrev_b32_e64 v0, v0, 1
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: v_and_b32_e32 v0, v2, v0
; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; VI-NEXT: v_lshrrev_b32_e32 v0, v0, v2
; VI-NEXT: v_and_b32_e32 v0, 1, v0
; VI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; VI-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; VI-NEXT: v_cvt_pkrtz_f16_f32 v0, v0, v0
; VI-NEXT: ; return to shader part epilog

File diff suppressed because it is too large Load Diff

View File

@ -6,8 +6,7 @@ target triple = "powerpc64le-unknown-linux-gnu"
define i1 @and_cmp_variable_power_of_two(i32 %x, i32 %y) {
; CHECK-LABEL: and_cmp_variable_power_of_two:
; CHECK: # %bb.0:
; CHECK-NEXT: subfic 4, 4, 32
; CHECK-NEXT: rlwnm 3, 3, 4, 31, 31
; CHECK-NEXT: srw 3, 3, 4
; CHECK-NEXT: blr
%shl = shl i32 1, %y
%and = and i32 %x, %shl
@ -18,8 +17,7 @@ define i1 @and_cmp_variable_power_of_two(i32 %x, i32 %y) {
define i1 @and_cmp_variable_power_of_two_64(i64 %x, i64 %y) {
; CHECK-LABEL: and_cmp_variable_power_of_two_64:
; CHECK: # %bb.0:
; CHECK-NEXT: subfic 4, 4, 64
; CHECK-NEXT: rldcl 3, 3, 4, 63
; CHECK-NEXT: srd 3, 3, 4
; CHECK-NEXT: blr
%shl = shl i64 1, %y
%and = and i64 %x, %shl
@ -30,9 +28,8 @@ define i1 @and_cmp_variable_power_of_two_64(i64 %x, i64 %y) {
define i1 @and_ncmp_variable_power_of_two(i32 %x, i32 %y) {
; CHECK-LABEL: and_ncmp_variable_power_of_two:
; CHECK: # %bb.0:
; CHECK-NEXT: subfic 4, 4, 32
; CHECK-NEXT: nor 3, 3, 3
; CHECK-NEXT: rlwnm 3, 3, 4, 31, 31
; CHECK-NEXT: srw 3, 3, 4
; CHECK-NEXT: xori 3, 3, 1
; CHECK-NEXT: blr
%shl = shl i32 1, %y
%and = and i32 %x, %shl
@ -43,9 +40,8 @@ define i1 @and_ncmp_variable_power_of_two(i32 %x, i32 %y) {
define i1 @and_ncmp_variable_power_of_two_64(i64 %x, i64 %y) {
; CHECK-LABEL: and_ncmp_variable_power_of_two_64:
; CHECK: # %bb.0:
; CHECK-NEXT: not 3, 3
; CHECK-NEXT: subfic 4, 4, 64
; CHECK-NEXT: rldcl 3, 3, 4, 63
; CHECK-NEXT: srd 3, 3, 4
; CHECK-NEXT: xori 3, 3, 1
; CHECK-NEXT: blr
%shl = shl i64 1, %y
%and = and i64 %x, %shl

View File

@ -23,19 +23,18 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
; X86-LABEL: scalar_i8_signbit_eq:
; X86: # %bb.0:
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movb $-128, %al
; X86-NEXT: shrb %cl, %al
; X86-NEXT: testb %al, {{[0-9]+}}(%esp)
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: shlb %cl, %al
; X86-NEXT: testb $-128, %al
; X86-NEXT: sete %al
; X86-NEXT: retl
;
; X64-LABEL: scalar_i8_signbit_eq:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: movb $-128, %al
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shrb %cl, %al
; X64-NEXT: testb %dil, %al
; X64-NEXT: shlb %cl, %dil
; X64-NEXT: testb $-128, %dil
; X64-NEXT: sete %al
; X64-NEXT: retq
%t0 = lshr i8 128, %y
@ -48,19 +47,18 @@ define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
; X86-LABEL: scalar_i8_lowestbit_eq:
; X86: # %bb.0:
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movb $1, %al
; X86-NEXT: shrb %cl, %al
; X86-NEXT: testb %al, {{[0-9]+}}(%esp)
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: shlb %cl, %al
; X86-NEXT: testb $1, %al
; X86-NEXT: sete %al
; X86-NEXT: retl
;
; X64-LABEL: scalar_i8_lowestbit_eq:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: movb $1, %al
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shrb %cl, %al
; X64-NEXT: testb %dil, %al
; X64-NEXT: shlb %cl, %dil
; X64-NEXT: testb $1, %dil
; X64-NEXT: sete %al
; X64-NEXT: retq
%t0 = lshr i8 1, %y
@ -73,19 +71,18 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
; X86-LABEL: scalar_i8_bitsinmiddle_eq:
; X86: # %bb.0:
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movb $24, %al
; X86-NEXT: shrb %cl, %al
; X86-NEXT: testb %al, {{[0-9]+}}(%esp)
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: shlb %cl, %al
; X86-NEXT: testb $24, %al
; X86-NEXT: sete %al
; X86-NEXT: retl
;
; X64-LABEL: scalar_i8_bitsinmiddle_eq:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: movb $24, %al
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shrb %cl, %al
; X64-NEXT: testb %dil, %al
; X64-NEXT: shlb %cl, %dil
; X64-NEXT: testb $24, %dil
; X64-NEXT: sete %al
; X64-NEXT: retq
%t0 = lshr i8 24, %y
@ -100,36 +97,33 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
; X86-BMI1-LABEL: scalar_i16_signbit_eq:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI1-NEXT: movl $32768, %eax # imm = 0x8000
; X86-BMI1-NEXT: shrl %cl, %eax
; X86-BMI1-NEXT: testw %ax, {{[0-9]+}}(%esp)
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll %cl, %eax
; X86-BMI1-NEXT: testl $32768, %eax # imm = 0x8000
; X86-BMI1-NEXT: sete %al
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: scalar_i16_signbit_eq:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-BMI2-NEXT: movl $32768, %ecx # imm = 0x8000
; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax
; X86-BMI2-NEXT: testw %ax, {{[0-9]+}}(%esp)
; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: testl $32768, %eax # imm = 0x8000
; X86-BMI2-NEXT: sete %al
; X86-BMI2-NEXT: retl
;
; X64-BMI1-LABEL: scalar_i16_signbit_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movl %esi, %ecx
; X64-BMI1-NEXT: movl $32768, %eax # imm = 0x8000
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-BMI1-NEXT: shrl %cl, %eax
; X64-BMI1-NEXT: testw %di, %ax
; X64-BMI1-NEXT: shll %cl, %edi
; X64-BMI1-NEXT: testl $32768, %edi # imm = 0x8000
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i16_signbit_eq:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movl $32768, %eax # imm = 0x8000
; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
; X64-BMI2-NEXT: testw %di, %ax
; X64-BMI2-NEXT: shlxl %esi, %edi, %eax
; X64-BMI2-NEXT: testl $32768, %eax # imm = 0x8000
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: retq
%t0 = lshr i16 32768, %y
@ -142,36 +136,33 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
; X86-BMI1-LABEL: scalar_i16_lowestbit_eq:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI1-NEXT: movl $1, %eax
; X86-BMI1-NEXT: shrl %cl, %eax
; X86-BMI1-NEXT: testw %ax, {{[0-9]+}}(%esp)
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll %cl, %eax
; X86-BMI1-NEXT: testb $1, %al
; X86-BMI1-NEXT: sete %al
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: scalar_i16_lowestbit_eq:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-BMI2-NEXT: movl $1, %ecx
; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax
; X86-BMI2-NEXT: testw %ax, {{[0-9]+}}(%esp)
; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: testb $1, %al
; X86-BMI2-NEXT: sete %al
; X86-BMI2-NEXT: retl
;
; X64-BMI1-LABEL: scalar_i16_lowestbit_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movl %esi, %ecx
; X64-BMI1-NEXT: movl $1, %eax
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-BMI1-NEXT: shrl %cl, %eax
; X64-BMI1-NEXT: testw %di, %ax
; X64-BMI1-NEXT: shll %cl, %edi
; X64-BMI1-NEXT: testb $1, %dil
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i16_lowestbit_eq:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movl $1, %eax
; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
; X64-BMI2-NEXT: testw %di, %ax
; X64-BMI2-NEXT: shlxl %esi, %edi, %eax
; X64-BMI2-NEXT: testb $1, %al
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: retq
%t0 = lshr i16 1, %y
@ -184,36 +175,33 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
; X86-BMI1-LABEL: scalar_i16_bitsinmiddle_eq:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI1-NEXT: movl $4080, %eax # imm = 0xFF0
; X86-BMI1-NEXT: shrl %cl, %eax
; X86-BMI1-NEXT: testw %ax, {{[0-9]+}}(%esp)
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll %cl, %eax
; X86-BMI1-NEXT: testl $4080, %eax # imm = 0xFF0
; X86-BMI1-NEXT: sete %al
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: scalar_i16_bitsinmiddle_eq:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-BMI2-NEXT: movl $4080, %ecx # imm = 0xFF0
; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax
; X86-BMI2-NEXT: testw %ax, {{[0-9]+}}(%esp)
; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: testl $4080, %eax # imm = 0xFF0
; X86-BMI2-NEXT: sete %al
; X86-BMI2-NEXT: retl
;
; X64-BMI1-LABEL: scalar_i16_bitsinmiddle_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movl %esi, %ecx
; X64-BMI1-NEXT: movl $4080, %eax # imm = 0xFF0
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-BMI1-NEXT: shrl %cl, %eax
; X64-BMI1-NEXT: testw %di, %ax
; X64-BMI1-NEXT: shll %cl, %edi
; X64-BMI1-NEXT: testl $4080, %edi # imm = 0xFF0
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i16_bitsinmiddle_eq:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movl $4080, %eax # imm = 0xFF0
; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
; X64-BMI2-NEXT: testw %di, %ax
; X64-BMI2-NEXT: shlxl %esi, %edi, %eax
; X64-BMI2-NEXT: testl $4080, %eax # imm = 0xFF0
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: retq
%t0 = lshr i16 4080, %y
@ -228,36 +216,33 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
; X86-BMI1-LABEL: scalar_i32_signbit_eq:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI1-NEXT: movl $-2147483648, %eax # imm = 0x80000000
; X86-BMI1-NEXT: shrl %cl, %eax
; X86-BMI1-NEXT: testl %eax, {{[0-9]+}}(%esp)
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll %cl, %eax
; X86-BMI1-NEXT: testl $-2147483648, %eax # imm = 0x80000000
; X86-BMI1-NEXT: sete %al
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: scalar_i32_signbit_eq:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-BMI2-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax
; X86-BMI2-NEXT: testl %eax, {{[0-9]+}}(%esp)
; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: testl $-2147483648, %eax # imm = 0x80000000
; X86-BMI2-NEXT: sete %al
; X86-BMI2-NEXT: retl
;
; X64-BMI1-LABEL: scalar_i32_signbit_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movl %esi, %ecx
; X64-BMI1-NEXT: movl $-2147483648, %eax # imm = 0x80000000
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-BMI1-NEXT: shrl %cl, %eax
; X64-BMI1-NEXT: testl %edi, %eax
; X64-BMI1-NEXT: shll %cl, %edi
; X64-BMI1-NEXT: testl $-2147483648, %edi # imm = 0x80000000
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i32_signbit_eq:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movl $-2147483648, %eax # imm = 0x80000000
; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
; X64-BMI2-NEXT: testl %edi, %eax
; X64-BMI2-NEXT: shlxl %esi, %edi, %eax
; X64-BMI2-NEXT: testl $-2147483648, %eax # imm = 0x80000000
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: retq
%t0 = lshr i32 2147483648, %y
@ -270,36 +255,33 @@ define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
; X86-BMI1-LABEL: scalar_i32_lowestbit_eq:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI1-NEXT: movl $1, %eax
; X86-BMI1-NEXT: shrl %cl, %eax
; X86-BMI1-NEXT: testl %eax, {{[0-9]+}}(%esp)
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll %cl, %eax
; X86-BMI1-NEXT: testb $1, %al
; X86-BMI1-NEXT: sete %al
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: scalar_i32_lowestbit_eq:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-BMI2-NEXT: movl $1, %ecx
; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax
; X86-BMI2-NEXT: testl %eax, {{[0-9]+}}(%esp)
; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: testb $1, %al
; X86-BMI2-NEXT: sete %al
; X86-BMI2-NEXT: retl
;
; X64-BMI1-LABEL: scalar_i32_lowestbit_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movl %esi, %ecx
; X64-BMI1-NEXT: movl $1, %eax
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-BMI1-NEXT: shrl %cl, %eax
; X64-BMI1-NEXT: testl %edi, %eax
; X64-BMI1-NEXT: shll %cl, %edi
; X64-BMI1-NEXT: testb $1, %dil
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i32_lowestbit_eq:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movl $1, %eax
; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
; X64-BMI2-NEXT: testl %edi, %eax
; X64-BMI2-NEXT: shlxl %esi, %edi, %eax
; X64-BMI2-NEXT: testb $1, %al
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: retq
%t0 = lshr i32 1, %y
@ -312,36 +294,33 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
; X86-BMI1-LABEL: scalar_i32_bitsinmiddle_eq:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI1-NEXT: movl $16776960, %eax # imm = 0xFFFF00
; X86-BMI1-NEXT: shrl %cl, %eax
; X86-BMI1-NEXT: testl %eax, {{[0-9]+}}(%esp)
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll %cl, %eax
; X86-BMI1-NEXT: testl $16776960, %eax # imm = 0xFFFF00
; X86-BMI1-NEXT: sete %al
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: scalar_i32_bitsinmiddle_eq:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-BMI2-NEXT: movl $16776960, %ecx # imm = 0xFFFF00
; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax
; X86-BMI2-NEXT: testl %eax, {{[0-9]+}}(%esp)
; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: testl $16776960, %eax # imm = 0xFFFF00
; X86-BMI2-NEXT: sete %al
; X86-BMI2-NEXT: retl
;
; X64-BMI1-LABEL: scalar_i32_bitsinmiddle_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movl %esi, %ecx
; X64-BMI1-NEXT: movl $16776960, %eax # imm = 0xFFFF00
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-BMI1-NEXT: shrl %cl, %eax
; X64-BMI1-NEXT: testl %edi, %eax
; X64-BMI1-NEXT: shll %cl, %edi
; X64-BMI1-NEXT: testl $16776960, %edi # imm = 0xFFFF00
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i32_bitsinmiddle_eq:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movl $16776960, %eax # imm = 0xFFFF00
; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
; X64-BMI2-NEXT: testl %edi, %eax
; X64-BMI2-NEXT: shlxl %esi, %edi, %eax
; X64-BMI2-NEXT: testl $16776960, %eax # imm = 0xFFFF00
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: retq
%t0 = lshr i32 16776960, %y
@ -357,55 +336,44 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI1-NEXT: movl $-2147483648, %eax # imm = 0x80000000
; X86-BMI1-NEXT: xorl %edx, %edx
; X86-BMI1-NEXT: xorl %esi, %esi
; X86-BMI1-NEXT: shrdl %cl, %eax, %esi
; X86-BMI1-NEXT: shrl %cl, %eax
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: movl %eax, %esi
; X86-BMI1-NEXT: shll %cl, %esi
; X86-BMI1-NEXT: shldl %cl, %eax, %edx
; X86-BMI1-NEXT: testb $32, %cl
; X86-BMI1-NEXT: cmovnel %eax, %esi
; X86-BMI1-NEXT: cmovnel %edx, %eax
; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi
; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: orl %esi, %eax
; X86-BMI1-NEXT: cmovnel %esi, %edx
; X86-BMI1-NEXT: testl $-2147483648, %edx # imm = 0x80000000
; X86-BMI1-NEXT: sete %al
; X86-BMI1-NEXT: popl %esi
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: scalar_i64_signbit_eq:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: movl $-2147483648, %eax # imm = 0x80000000
; X86-BMI2-NEXT: xorl %edx, %edx
; X86-BMI2-NEXT: xorl %esi, %esi
; X86-BMI2-NEXT: shrdl %cl, %eax, %esi
; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: shldl %cl, %eax, %edx
; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax
; X86-BMI2-NEXT: testb $32, %cl
; X86-BMI2-NEXT: cmovnel %eax, %esi
; X86-BMI2-NEXT: cmovnel %edx, %eax
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: orl %esi, %eax
; X86-BMI2-NEXT: cmovel %edx, %eax
; X86-BMI2-NEXT: testl $-2147483648, %eax # imm = 0x80000000
; X86-BMI2-NEXT: sete %al
; X86-BMI2-NEXT: popl %esi
; X86-BMI2-NEXT: retl
;
; X64-BMI1-LABEL: scalar_i64_signbit_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movq %rsi, %rcx
; X64-BMI1-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-BMI1-NEXT: shrq %cl, %rax
; X64-BMI1-NEXT: testq %rdi, %rax
; X64-BMI1-NEXT: shlq %cl, %rdi
; X64-BMI1-NEXT: shrq $63, %rdi
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i64_signbit_eq:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax
; X64-BMI2-NEXT: testq %rdi, %rax
; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax
; X64-BMI2-NEXT: shrq $63, %rax
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: retq
%t0 = lshr i64 9223372036854775808, %y
@ -415,34 +383,42 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
}
define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
; X86-LABEL: scalar_i64_lowestbit_eq:
; X86: # %bb.0:
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: movl $1, %edx
; X86-NEXT: shrdl %cl, %eax, %edx
; X86-NEXT: testb $32, %cl
; X86-NEXT: cmovnel %eax, %edx
; X86-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NEXT: orl $0, %edx
; X86-NEXT: sete %al
; X86-NEXT: retl
; X86-BMI1-LABEL: scalar_i64_lowestbit_eq:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll %cl, %eax
; X86-BMI1-NEXT: xorl %edx, %edx
; X86-BMI1-NEXT: testb $32, %cl
; X86-BMI1-NEXT: cmovel %eax, %edx
; X86-BMI1-NEXT: testb $1, %dl
; X86-BMI1-NEXT: sete %al
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: scalar_i64_lowestbit_eq:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: xorl %edx, %edx
; X86-BMI2-NEXT: testb $32, %al
; X86-BMI2-NEXT: cmovel %ecx, %edx
; X86-BMI2-NEXT: testb $1, %dl
; X86-BMI2-NEXT: sete %al
; X86-BMI2-NEXT: retl
;
; X64-BMI1-LABEL: scalar_i64_lowestbit_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movq %rsi, %rcx
; X64-BMI1-NEXT: movl $1, %eax
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-BMI1-NEXT: shrq %cl, %rax
; X64-BMI1-NEXT: testq %rdi, %rax
; X64-BMI1-NEXT: shlq %cl, %rdi
; X64-BMI1-NEXT: testb $1, %dil
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i64_lowestbit_eq:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movl $1, %eax
; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax
; X64-BMI2-NEXT: testq %rdi, %rax
; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax
; X64-BMI2-NEXT: testb $1, %al
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: retq
%t0 = lshr i64 1, %y
@ -456,17 +432,18 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI1-NEXT: movl $65535, %eax # imm = 0xFFFF
; X86-BMI1-NEXT: movl $-65536, %edx # imm = 0xFFFF0000
; X86-BMI1-NEXT: shrdl %cl, %eax, %edx
; X86-BMI1-NEXT: shrl %cl, %eax
; X86-BMI1-NEXT: xorl %esi, %esi
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: movl %eax, %esi
; X86-BMI1-NEXT: shll %cl, %esi
; X86-BMI1-NEXT: shldl %cl, %eax, %edx
; X86-BMI1-NEXT: xorl %eax, %eax
; X86-BMI1-NEXT: testb $32, %cl
; X86-BMI1-NEXT: cmovnel %eax, %edx
; X86-BMI1-NEXT: cmovel %eax, %esi
; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi
; X86-BMI1-NEXT: orl %edx, %esi
; X86-BMI1-NEXT: cmovnel %esi, %edx
; X86-BMI1-NEXT: movzwl %dx, %ecx
; X86-BMI1-NEXT: cmovel %esi, %eax
; X86-BMI1-NEXT: andl $-65536, %eax # imm = 0xFFFF0000
; X86-BMI1-NEXT: orl %ecx, %eax
; X86-BMI1-NEXT: sete %al
; X86-BMI1-NEXT: popl %esi
; X86-BMI1-NEXT: retl
@ -475,17 +452,17 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: movl $65535, %eax # imm = 0xFFFF
; X86-BMI2-NEXT: movl $-65536, %edx # imm = 0xFFFF0000
; X86-BMI2-NEXT: shrdl %cl, %eax, %edx
; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: shldl %cl, %eax, %edx
; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax
; X86-BMI2-NEXT: xorl %esi, %esi
; X86-BMI2-NEXT: testb $32, %cl
; X86-BMI2-NEXT: cmovnel %eax, %edx
; X86-BMI2-NEXT: movzwl %dx, %ecx
; X86-BMI2-NEXT: cmovel %eax, %esi
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: orl %edx, %esi
; X86-BMI2-NEXT: andl $-65536, %esi # imm = 0xFFFF0000
; X86-BMI2-NEXT: orl %ecx, %esi
; X86-BMI2-NEXT: sete %al
; X86-BMI2-NEXT: popl %esi
; X86-BMI2-NEXT: retl
@ -493,18 +470,18 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
; X64-BMI1-LABEL: scalar_i64_bitsinmiddle_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movq %rsi, %rcx
; X64-BMI1-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-BMI1-NEXT: shrq %cl, %rax
; X64-BMI1-NEXT: testq %rdi, %rax
; X64-BMI1-NEXT: shlq %cl, %rdi
; X64-BMI1-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000
; X64-BMI1-NEXT: testq %rax, %rdi
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i64_bitsinmiddle_eq:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000
; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax
; X64-BMI2-NEXT: testq %rdi, %rax
; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax
; X64-BMI2-NEXT: movabsq $281474976645120, %rcx # imm = 0xFFFFFFFF0000
; X64-BMI2-NEXT: testq %rcx, %rax
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: retq
%t0 = lshr i64 281474976645120, %y
@ -518,37 +495,48 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
;------------------------------------------------------------------------------;
define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
; SSE2-LABEL: vec_4xi32_splat_eq:
; SSE2: # %bb.0:
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,1,1,1]
; SSE2-NEXT: movdqa %xmm3, %xmm4
; SSE2-NEXT: psrld %xmm2, %xmm4
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[0,1,1,1,4,5,6,7]
; SSE2-NEXT: movdqa %xmm3, %xmm5
; SSE2-NEXT: psrld %xmm2, %xmm5
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
; SSE2-NEXT: movdqa %xmm3, %xmm4
; SSE2-NEXT: psrld %xmm2, %xmm4
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
; SSE2-NEXT: psrld %xmm1, %xmm3
; SSE2-NEXT: punpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm4[1]
; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,3],xmm3[0,3]
; SSE2-NEXT: andps %xmm5, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
; SSE2-NEXT: ret{{[l|q]}}
; X86-SSE2-LABEL: vec_4xi32_splat_eq:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pxor %xmm2, %xmm2
; X86-SSE2-NEXT: pslld $23, %xmm1
; X86-SSE2-NEXT: paddd {{\.LCPI.*}}, %xmm1
; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
; X86-SSE2-NEXT: pmuludq %xmm1, %xmm0
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; X86-SSE2-NEXT: pmuludq %xmm3, %xmm1
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm0
; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm0
; X86-SSE2-NEXT: retl
;
; AVX2-LABEL: vec_4xi32_splat_eq:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
; AVX2-NEXT: vpsrlvd %xmm1, %xmm2, %xmm1
; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
; AVX2-NEXT: ret{{[l|q]}}
;
; X64-SSE2-LABEL: vec_4xi32_splat_eq:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: pxor %xmm2, %xmm2
; X64-SSE2-NEXT: pslld $23, %xmm1
; X64-SSE2-NEXT: paddd {{.*}}(%rip), %xmm1
; X64-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
; X64-SSE2-NEXT: pmuludq %xmm1, %xmm0
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; X64-SSE2-NEXT: pmuludq %xmm3, %xmm1
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X64-SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm0
; X64-SSE2-NEXT: retq
%t0 = lshr <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
%t1 = and <4 x i32> %t0, %x
%res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
@ -594,37 +582,48 @@ define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
}
define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
; SSE2-LABEL: vec_4xi32_nonsplat_undef0_eq:
; SSE2: # %bb.0:
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = <1,1,u,1>
; SSE2-NEXT: movdqa %xmm3, %xmm4
; SSE2-NEXT: psrld %xmm2, %xmm4
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[0,1,1,1,4,5,6,7]
; SSE2-NEXT: movdqa %xmm3, %xmm5
; SSE2-NEXT: psrld %xmm2, %xmm5
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
; SSE2-NEXT: movdqa %xmm3, %xmm4
; SSE2-NEXT: psrld %xmm2, %xmm4
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
; SSE2-NEXT: psrld %xmm1, %xmm3
; SSE2-NEXT: punpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm4[1]
; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,3],xmm3[0,3]
; SSE2-NEXT: andps %xmm5, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
; SSE2-NEXT: ret{{[l|q]}}
; X86-SSE2-LABEL: vec_4xi32_nonsplat_undef0_eq:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pxor %xmm2, %xmm2
; X86-SSE2-NEXT: pslld $23, %xmm1
; X86-SSE2-NEXT: paddd {{\.LCPI.*}}, %xmm1
; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
; X86-SSE2-NEXT: pmuludq %xmm1, %xmm0
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; X86-SSE2-NEXT: pmuludq %xmm3, %xmm1
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm0
; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm0
; X86-SSE2-NEXT: retl
;
; AVX2-LABEL: vec_4xi32_nonsplat_undef0_eq:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
; AVX2-NEXT: vpsrlvd %xmm1, %xmm2, %xmm1
; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
; AVX2-NEXT: ret{{[l|q]}}
;
; X64-SSE2-LABEL: vec_4xi32_nonsplat_undef0_eq:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: pxor %xmm2, %xmm2
; X64-SSE2-NEXT: pslld $23, %xmm1
; X64-SSE2-NEXT: paddd {{.*}}(%rip), %xmm1
; X64-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
; X64-SSE2-NEXT: pmuludq %xmm1, %xmm0
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; X64-SSE2-NEXT: pmuludq %xmm3, %xmm1
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X64-SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm0
; X64-SSE2-NEXT: retq
%t0 = lshr <4 x i32> <i32 1, i32 1, i32 undef, i32 1>, %y
%t1 = and <4 x i32> %t0, %x
%res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
@ -713,20 +712,19 @@ define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
; X86-LABEL: scalar_i8_signbit_ne:
; X86: # %bb.0:
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movb $-128, %al
; X86-NEXT: shrb %cl, %al
; X86-NEXT: testb %al, {{[0-9]+}}(%esp)
; X86-NEXT: setne %al
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: shlb %cl, %al
; X86-NEXT: shrb $7, %al
; X86-NEXT: retl
;
; X64-LABEL: scalar_i8_signbit_ne:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: movb $-128, %al
; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shrb %cl, %al
; X64-NEXT: testb %dil, %al
; X64-NEXT: setne %al
; X64-NEXT: shlb %cl, %al
; X64-NEXT: shrb $7, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%t0 = lshr i8 128, %y
%t1 = and i8 %t0, %x

View File

@ -23,19 +23,18 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
; X86-LABEL: scalar_i8_signbit_eq:
; X86: # %bb.0:
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movb $-128, %al
; X86-NEXT: shlb %cl, %al
; X86-NEXT: testb %al, {{[0-9]+}}(%esp)
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: shrb %cl, %al
; X86-NEXT: testb $-128, %al
; X86-NEXT: sete %al
; X86-NEXT: retl
;
; X64-LABEL: scalar_i8_signbit_eq:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: movb $-128, %al
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shlb %cl, %al
; X64-NEXT: testb %dil, %al
; X64-NEXT: shrb %cl, %dil
; X64-NEXT: testb $-128, %dil
; X64-NEXT: sete %al
; X64-NEXT: retq
%t0 = shl i8 128, %y
@ -68,19 +67,18 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
; X86-LABEL: scalar_i8_bitsinmiddle_eq:
; X86: # %bb.0:
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movb $24, %al
; X86-NEXT: shlb %cl, %al
; X86-NEXT: testb %al, {{[0-9]+}}(%esp)
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: shrb %cl, %al
; X86-NEXT: testb $24, %al
; X86-NEXT: sete %al
; X86-NEXT: retl
;
; X64-LABEL: scalar_i8_bitsinmiddle_eq:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: movb $24, %al
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shlb %cl, %al
; X64-NEXT: testb %dil, %al
; X64-NEXT: shrb %cl, %dil
; X64-NEXT: testb $24, %dil
; X64-NEXT: sete %al
; X64-NEXT: retq
%t0 = shl i8 24, %y
@ -95,36 +93,36 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
; X86-BMI1-LABEL: scalar_i16_signbit_eq:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI1-NEXT: movl $-32768, %eax # imm = 0x8000
; X86-BMI1-NEXT: shll %cl, %eax
; X86-BMI1-NEXT: testw %ax, {{[0-9]+}}(%esp)
; X86-BMI1-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shrl %cl, %eax
; X86-BMI1-NEXT: testl $32768, %eax # imm = 0x8000
; X86-BMI1-NEXT: sete %al
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: scalar_i16_signbit_eq:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-BMI2-NEXT: movl $-32768, %ecx # imm = 0x8000
; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax
; X86-BMI2-NEXT: testw %ax, {{[0-9]+}}(%esp)
; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax
; X86-BMI2-NEXT: testl $32768, %eax # imm = 0x8000
; X86-BMI2-NEXT: sete %al
; X86-BMI2-NEXT: retl
;
; X64-BMI1-LABEL: scalar_i16_signbit_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movl %esi, %ecx
; X64-BMI1-NEXT: movl $-32768, %eax # imm = 0x8000
; X64-BMI1-NEXT: movzwl %di, %eax
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-BMI1-NEXT: shll %cl, %eax
; X64-BMI1-NEXT: testw %di, %ax
; X64-BMI1-NEXT: shrl %cl, %eax
; X64-BMI1-NEXT: testl $32768, %eax # imm = 0x8000
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i16_signbit_eq:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movl $-32768, %eax # imm = 0x8000
; X64-BMI2-NEXT: shlxl %esi, %eax, %eax
; X64-BMI2-NEXT: testw %di, %ax
; X64-BMI2-NEXT: movzwl %di, %eax
; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
; X64-BMI2-NEXT: testl $32768, %eax # imm = 0x8000
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: retq
%t0 = shl i16 32768, %y
@ -157,36 +155,36 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
; X86-BMI1-LABEL: scalar_i16_bitsinmiddle_eq:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI1-NEXT: movl $4080, %eax # imm = 0xFF0
; X86-BMI1-NEXT: shll %cl, %eax
; X86-BMI1-NEXT: testw %ax, {{[0-9]+}}(%esp)
; X86-BMI1-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shrl %cl, %eax
; X86-BMI1-NEXT: testl $4080, %eax # imm = 0xFF0
; X86-BMI1-NEXT: sete %al
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: scalar_i16_bitsinmiddle_eq:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-BMI2-NEXT: movl $4080, %ecx # imm = 0xFF0
; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax
; X86-BMI2-NEXT: testw %ax, {{[0-9]+}}(%esp)
; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax
; X86-BMI2-NEXT: testl $4080, %eax # imm = 0xFF0
; X86-BMI2-NEXT: sete %al
; X86-BMI2-NEXT: retl
;
; X64-BMI1-LABEL: scalar_i16_bitsinmiddle_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movl %esi, %ecx
; X64-BMI1-NEXT: movl $4080, %eax # imm = 0xFF0
; X64-BMI1-NEXT: movzwl %di, %eax
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-BMI1-NEXT: shll %cl, %eax
; X64-BMI1-NEXT: testw %di, %ax
; X64-BMI1-NEXT: shrl %cl, %eax
; X64-BMI1-NEXT: testl $4080, %eax # imm = 0xFF0
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i16_bitsinmiddle_eq:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movl $4080, %eax # imm = 0xFF0
; X64-BMI2-NEXT: shlxl %esi, %eax, %eax
; X64-BMI2-NEXT: testw %di, %ax
; X64-BMI2-NEXT: movzwl %di, %eax
; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
; X64-BMI2-NEXT: testl $4080, %eax # imm = 0xFF0
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: retq
%t0 = shl i16 4080, %y
@ -201,36 +199,33 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
; X86-BMI1-LABEL: scalar_i32_signbit_eq:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI1-NEXT: movl $-2147483648, %eax # imm = 0x80000000
; X86-BMI1-NEXT: shll %cl, %eax
; X86-BMI1-NEXT: testl %eax, {{[0-9]+}}(%esp)
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shrl %cl, %eax
; X86-BMI1-NEXT: testl $-2147483648, %eax # imm = 0x80000000
; X86-BMI1-NEXT: sete %al
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: scalar_i32_signbit_eq:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-BMI2-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax
; X86-BMI2-NEXT: testl %eax, {{[0-9]+}}(%esp)
; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: testl $-2147483648, %eax # imm = 0x80000000
; X86-BMI2-NEXT: sete %al
; X86-BMI2-NEXT: retl
;
; X64-BMI1-LABEL: scalar_i32_signbit_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movl %esi, %ecx
; X64-BMI1-NEXT: movl $-2147483648, %eax # imm = 0x80000000
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-BMI1-NEXT: shll %cl, %eax
; X64-BMI1-NEXT: testl %edi, %eax
; X64-BMI1-NEXT: shrl %cl, %edi
; X64-BMI1-NEXT: testl $-2147483648, %edi # imm = 0x80000000
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i32_signbit_eq:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movl $-2147483648, %eax # imm = 0x80000000
; X64-BMI2-NEXT: shlxl %esi, %eax, %eax
; X64-BMI2-NEXT: testl %edi, %eax
; X64-BMI2-NEXT: shrxl %esi, %edi, %eax
; X64-BMI2-NEXT: testl $-2147483648, %eax # imm = 0x80000000
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: retq
%t0 = shl i32 2147483648, %y
@ -263,36 +258,33 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
; X86-BMI1-LABEL: scalar_i32_bitsinmiddle_eq:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI1-NEXT: movl $16776960, %eax # imm = 0xFFFF00
; X86-BMI1-NEXT: shll %cl, %eax
; X86-BMI1-NEXT: testl %eax, {{[0-9]+}}(%esp)
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shrl %cl, %eax
; X86-BMI1-NEXT: testl $16776960, %eax # imm = 0xFFFF00
; X86-BMI1-NEXT: sete %al
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: scalar_i32_bitsinmiddle_eq:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-BMI2-NEXT: movl $16776960, %ecx # imm = 0xFFFF00
; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax
; X86-BMI2-NEXT: testl %eax, {{[0-9]+}}(%esp)
; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: testl $16776960, %eax # imm = 0xFFFF00
; X86-BMI2-NEXT: sete %al
; X86-BMI2-NEXT: retl
;
; X64-BMI1-LABEL: scalar_i32_bitsinmiddle_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movl %esi, %ecx
; X64-BMI1-NEXT: movl $16776960, %eax # imm = 0xFFFF00
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-BMI1-NEXT: shll %cl, %eax
; X64-BMI1-NEXT: testl %edi, %eax
; X64-BMI1-NEXT: shrl %cl, %edi
; X64-BMI1-NEXT: testl $16776960, %edi # imm = 0xFFFF00
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i32_bitsinmiddle_eq:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movl $16776960, %eax # imm = 0xFFFF00
; X64-BMI2-NEXT: shlxl %esi, %eax, %eax
; X64-BMI2-NEXT: testl %edi, %eax
; X64-BMI2-NEXT: shrxl %esi, %edi, %eax
; X64-BMI2-NEXT: testl $16776960, %eax # imm = 0xFFFF00
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: retq
%t0 = shl i32 16776960, %y
@ -304,35 +296,43 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
; i64 scalar
define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
; X86-LABEL: scalar_i64_signbit_eq:
; X86: # %bb.0:
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: movl $-2147483648, %edx # imm = 0x80000000
; X86-NEXT: shldl %cl, %eax, %edx
; X86-NEXT: testb $32, %cl
; X86-NEXT: cmovnel %eax, %edx
; X86-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NEXT: orl $0, %edx
; X86-NEXT: sete %al
; X86-NEXT: retl
; X86-BMI1-LABEL: scalar_i64_signbit_eq:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shrl %cl, %eax
; X86-BMI1-NEXT: xorl %edx, %edx
; X86-BMI1-NEXT: testb $32, %cl
; X86-BMI1-NEXT: cmovel %eax, %edx
; X86-BMI1-NEXT: testl $-2147483648, %edx # imm = 0x80000000
; X86-BMI1-NEXT: sete %al
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: scalar_i64_signbit_eq:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: xorl %edx, %edx
; X86-BMI2-NEXT: testb $32, %al
; X86-BMI2-NEXT: cmovel %ecx, %edx
; X86-BMI2-NEXT: testl $-2147483648, %edx # imm = 0x80000000
; X86-BMI2-NEXT: sete %al
; X86-BMI2-NEXT: retl
;
; X64-BMI1-LABEL: scalar_i64_signbit_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movq %rsi, %rcx
; X64-BMI1-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-BMI1-NEXT: shlq %cl, %rax
; X64-BMI1-NEXT: testq %rdi, %rax
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: shrq %cl, %rdi
; X64-BMI1-NEXT: btq $63, %rdi
; X64-BMI1-NEXT: setae %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i64_signbit_eq:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax
; X64-BMI2-NEXT: testq %rdi, %rax
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax
; X64-BMI2-NEXT: btq $63, %rax
; X64-BMI2-NEXT: setae %al
; X64-BMI2-NEXT: retq
%t0 = shl i64 9223372036854775808, %y
%t1 = and i64 %t0, %x
@ -395,17 +395,18 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI1-NEXT: movl $-65536, %eax # imm = 0xFFFF0000
; X86-BMI1-NEXT: movl $65535, %edx # imm = 0xFFFF
; X86-BMI1-NEXT: shldl %cl, %eax, %edx
; X86-BMI1-NEXT: shll %cl, %eax
; X86-BMI1-NEXT: xorl %esi, %esi
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: movl %edx, %esi
; X86-BMI1-NEXT: shrl %cl, %esi
; X86-BMI1-NEXT: shrdl %cl, %edx, %eax
; X86-BMI1-NEXT: xorl %edx, %edx
; X86-BMI1-NEXT: testb $32, %cl
; X86-BMI1-NEXT: cmovnel %eax, %edx
; X86-BMI1-NEXT: cmovel %eax, %esi
; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi
; X86-BMI1-NEXT: orl %edx, %esi
; X86-BMI1-NEXT: cmovnel %esi, %eax
; X86-BMI1-NEXT: cmovel %esi, %edx
; X86-BMI1-NEXT: andl $-65536, %eax # imm = 0xFFFF0000
; X86-BMI1-NEXT: movzwl %dx, %ecx
; X86-BMI1-NEXT: orl %eax, %ecx
; X86-BMI1-NEXT: sete %al
; X86-BMI1-NEXT: popl %esi
; X86-BMI1-NEXT: retl
@ -414,17 +415,17 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: movl $-65536, %eax # imm = 0xFFFF0000
; X86-BMI2-NEXT: movl $65535, %edx # imm = 0xFFFF
; X86-BMI2-NEXT: shldl %cl, %eax, %edx
; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: shrdl %cl, %edx, %eax
; X86-BMI2-NEXT: shrxl %ecx, %edx, %edx
; X86-BMI2-NEXT: xorl %esi, %esi
; X86-BMI2-NEXT: testb $32, %cl
; X86-BMI2-NEXT: cmovnel %eax, %edx
; X86-BMI2-NEXT: cmovel %eax, %esi
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: orl %edx, %esi
; X86-BMI2-NEXT: cmovnel %edx, %eax
; X86-BMI2-NEXT: cmovel %edx, %esi
; X86-BMI2-NEXT: andl $-65536, %eax # imm = 0xFFFF0000
; X86-BMI2-NEXT: movzwl %si, %ecx
; X86-BMI2-NEXT: orl %eax, %ecx
; X86-BMI2-NEXT: sete %al
; X86-BMI2-NEXT: popl %esi
; X86-BMI2-NEXT: retl
@ -432,18 +433,18 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
; X64-BMI1-LABEL: scalar_i64_bitsinmiddle_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movq %rsi, %rcx
; X64-BMI1-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-BMI1-NEXT: shlq %cl, %rax
; X64-BMI1-NEXT: testq %rdi, %rax
; X64-BMI1-NEXT: shrq %cl, %rdi
; X64-BMI1-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000
; X64-BMI1-NEXT: testq %rax, %rdi
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i64_bitsinmiddle_eq:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000
; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax
; X64-BMI2-NEXT: testq %rdi, %rax
; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax
; X64-BMI2-NEXT: movabsq $281474976645120, %rcx # imm = 0xFFFFFFFF0000
; X64-BMI2-NEXT: testq %rcx, %rax
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: retq
%t0 = shl i64 281474976645120, %y
@ -477,10 +478,10 @@ define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
; AVX2-LABEL: vec_4xi32_splat_eq:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
; AVX2-NEXT: vpsllvd %xmm1, %xmm2, %xmm1
; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
; AVX2-NEXT: ret{{[l|q]}}
;
; X64-SSE2-LABEL: vec_4xi32_splat_eq:
@ -577,10 +578,10 @@ define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwi
; AVX2-LABEL: vec_4xi32_nonsplat_undef0_eq:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
; AVX2-NEXT: vpsllvd %xmm1, %xmm2, %xmm1
; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
; AVX2-NEXT: ret{{[l|q]}}
;
; X64-SSE2-LABEL: vec_4xi32_nonsplat_undef0_eq:
@ -709,20 +710,19 @@ define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
; X86-LABEL: scalar_i8_signbit_ne:
; X86: # %bb.0:
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movb $-128, %al
; X86-NEXT: shlb %cl, %al
; X86-NEXT: testb %al, {{[0-9]+}}(%esp)
; X86-NEXT: setne %al
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: shrb %cl, %al
; X86-NEXT: shrb $7, %al
; X86-NEXT: retl
;
; X64-LABEL: scalar_i8_signbit_ne:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: movb $-128, %al
; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shlb %cl, %al
; X64-NEXT: testb %dil, %al
; X64-NEXT: setne %al
; X64-NEXT: shrb %cl, %al
; X64-NEXT: shrb $7, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%t0 = shl i8 128, %y
%t1 = and i8 %t0, %x