mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-08 20:30:50 +00:00
[PowerPC] Add support for the CMPB instruction
Newer POWER cores, and the A2, support the cmpb instruction. This instruction compares its operands, treating each of the 8 bytes in the GPRs separately, returning a 'mask' result of 0 (for false) or -1 (for true) in each byte. Code generation support is added, in the form of a PPCISelDAGToDAG DAG-preprocessing routine, that recognizes patterns close to what the instruction computes (either exactly, or related by a constant masking operation), and generates the cmpb instruction (along with any necessary constant masking operation). This can be expanded if use cases arise. llvm-svn: 225106
This commit is contained in:
parent
48119df5c9
commit
fa0f576b41
@ -88,6 +88,8 @@ def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "true",
|
||||
"Enable the popcnt[dw] instructions">;
|
||||
def FeatureLDBRX : SubtargetFeature<"ldbrx","HasLDBRX", "true",
|
||||
"Enable the ldbrx instruction">;
|
||||
def FeatureCMPB : SubtargetFeature<"cmpb", "HasCMPB", "true",
|
||||
"Enable the cmpb instruction">;
|
||||
def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
|
||||
"Enable Book E instructions">;
|
||||
def FeatureMSYNC : SubtargetFeature<"msync", "HasOnlyMSYNC", "true",
|
||||
@ -116,7 +118,6 @@ def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true",
|
||||
// Note: Future features to add when support is extended to more
|
||||
// recent ISA levels:
|
||||
//
|
||||
// CMPB p6, p6x, p7 cmpb
|
||||
// DFP p6, p6x, p7 decimal floating-point instructions
|
||||
// POPCNTB p5 through p7 popcntb and related instructions
|
||||
|
||||
@ -258,7 +259,7 @@ def : ProcessorModel<"a2", PPCA2Model,
|
||||
FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
|
||||
FeatureSTFIWX, FeatureLFIWAX,
|
||||
FeatureFPRND, FeatureFPCVT, FeatureISEL,
|
||||
FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
|
||||
FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit
|
||||
/*, Feature64BitRegs */, DeprecatedMFTB]>;
|
||||
def : ProcessorModel<"a2q", PPCA2Model,
|
||||
[DirectiveA2, FeatureBookE, FeatureMFOCRF,
|
||||
@ -266,7 +267,7 @@ def : ProcessorModel<"a2q", PPCA2Model,
|
||||
FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
|
||||
FeatureSTFIWX, FeatureLFIWAX,
|
||||
FeatureFPRND, FeatureFPCVT, FeatureISEL,
|
||||
FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
|
||||
FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit
|
||||
/*, Feature64BitRegs */, FeatureQPX, DeprecatedMFTB]>;
|
||||
def : ProcessorModel<"pwr3", G5Model,
|
||||
[DirectivePwr3, FeatureAltivec,
|
||||
@ -292,14 +293,14 @@ def : ProcessorModel<"pwr6", G5Model,
|
||||
[DirectivePwr6, FeatureAltivec,
|
||||
FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
|
||||
FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
|
||||
FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
|
||||
FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB,
|
||||
FeatureFPRND, Feature64Bit /*, Feature64BitRegs */,
|
||||
DeprecatedMFTB, DeprecatedDST]>;
|
||||
def : ProcessorModel<"pwr6x", G5Model,
|
||||
[DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
|
||||
FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
|
||||
FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
|
||||
FeatureSTFIWX, FeatureLFIWAX,
|
||||
FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB,
|
||||
FeatureFPRND, Feature64Bit,
|
||||
DeprecatedMFTB, DeprecatedDST]>;
|
||||
def : ProcessorModel<"pwr7", P7Model,
|
||||
@ -308,7 +309,7 @@ def : ProcessorModel<"pwr7", P7Model,
|
||||
FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
|
||||
FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
|
||||
FeatureFPRND, FeatureFPCVT, FeatureISEL,
|
||||
FeaturePOPCNTD, FeatureLDBRX,
|
||||
FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
|
||||
Feature64Bit /*, Feature64BitRegs */,
|
||||
DeprecatedMFTB, DeprecatedDST]>;
|
||||
def : ProcessorModel<"pwr8", P8Model,
|
||||
@ -317,7 +318,7 @@ def : ProcessorModel<"pwr8", P8Model,
|
||||
FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
|
||||
FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
|
||||
FeatureFPRND, FeatureFPCVT, FeatureISEL,
|
||||
FeaturePOPCNTD, FeatureLDBRX,
|
||||
FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
|
||||
Feature64Bit /*, Feature64BitRegs */,
|
||||
DeprecatedMFTB, DeprecatedDST]>;
|
||||
def : Processor<"ppc", G3Itineraries, [Directive32]>;
|
||||
|
@ -83,6 +83,7 @@ namespace {
|
||||
return true;
|
||||
}
|
||||
|
||||
void PreprocessISelDAG() override;
|
||||
void PostprocessISelDAG() override;
|
||||
|
||||
/// getI32Imm - Return a target constant with the specified value, of type
|
||||
@ -215,6 +216,8 @@ private:
|
||||
void PeepholePPC64ZExt();
|
||||
void PeepholeCROps();
|
||||
|
||||
SDValue combineToCMPB(SDNode *N);
|
||||
|
||||
bool AllUsersSelectZero(SDNode *N);
|
||||
void SwapAllSelectUsers(SDNode *N);
|
||||
};
|
||||
@ -684,7 +687,6 @@ static SDNode *SelectInt64(SelectionDAG *CurDAG, SDNode *N) {
|
||||
return SelectInt64(CurDAG, dl, Imm);
|
||||
}
|
||||
|
||||
|
||||
namespace {
|
||||
class BitPermutationSelector {
|
||||
struct ValueBit {
|
||||
@ -2872,6 +2874,254 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
|
||||
return SelectCode(N);
|
||||
}
|
||||
|
||||
// If the target supports the cmpb instruction, do the idiom recognition here.
|
||||
// We don't do this as a DAG combine because we don't want to do it as nodes
|
||||
// are being combined (because we might miss part of the eventual idiom). We
|
||||
// don't want to do it during instruction selection because we want to reuse
|
||||
// the logic for lowering the masking operations already part of the
|
||||
// instruction selector.
|
||||
SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
|
||||
SDLoc dl(N);
|
||||
|
||||
assert(N->getOpcode() == ISD::OR &&
|
||||
"Only OR nodes are supported for CMPB");
|
||||
|
||||
SDValue Res;
|
||||
if (!PPCSubTarget->hasCMPB())
|
||||
return Res;
|
||||
|
||||
if (N->getValueType(0) != MVT::i32 &&
|
||||
N->getValueType(0) != MVT::i64)
|
||||
return Res;
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
SDValue RHS, LHS;
|
||||
bool BytesFound[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
uint64_t Mask = 0, Alt = 0;
|
||||
|
||||
auto IsByteSelectCC = [this](SDValue O, unsigned &b,
|
||||
uint64_t &Mask, uint64_t &Alt,
|
||||
SDValue &LHS, SDValue &RHS) {
|
||||
if (O.getOpcode() != ISD::SELECT_CC)
|
||||
return false;
|
||||
ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get();
|
||||
|
||||
if (!isa<ConstantSDNode>(O.getOperand(2)) ||
|
||||
!isa<ConstantSDNode>(O.getOperand(3)))
|
||||
return false;
|
||||
|
||||
uint64_t PM = O.getConstantOperandVal(2);
|
||||
uint64_t PAlt = O.getConstantOperandVal(3);
|
||||
for (b = 0; b < 8; ++b) {
|
||||
uint64_t Mask = UINT64_C(0xFF) << (8*b);
|
||||
if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)
|
||||
break;
|
||||
}
|
||||
|
||||
if (b == 8)
|
||||
return false;
|
||||
Mask |= PM;
|
||||
Alt |= PAlt;
|
||||
|
||||
if (!isa<ConstantSDNode>(O.getOperand(1)) ||
|
||||
O.getConstantOperandVal(1) != 0) {
|
||||
SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1);
|
||||
if (Op0.getOpcode() == ISD::TRUNCATE)
|
||||
Op0 = Op0.getOperand(0);
|
||||
if (Op1.getOpcode() == ISD::TRUNCATE)
|
||||
Op1 = Op1.getOperand(0);
|
||||
|
||||
if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&
|
||||
Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&
|
||||
isa<ConstantSDNode>(Op0.getOperand(1))) {
|
||||
|
||||
unsigned Bits = Op0.getValueType().getSizeInBits();
|
||||
if (b != Bits/8-1)
|
||||
return false;
|
||||
if (Op0.getConstantOperandVal(1) != Bits-8)
|
||||
return false;
|
||||
|
||||
LHS = Op0.getOperand(0);
|
||||
RHS = Op1.getOperand(0);
|
||||
return true;
|
||||
}
|
||||
|
||||
// When we have small integers (i16 to be specific), the form present
|
||||
// post-legalization uses SETULT in the SELECT_CC for the
|
||||
// higher-order byte, depending on the fact that the
|
||||
// even-higher-order bytes are known to all be zero, for example:
|
||||
// select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
|
||||
// (so when the second byte is the same, because all higher-order
|
||||
// bits from bytes 3 and 4 are known to be zero, the result of the
|
||||
// xor can be at most 255)
|
||||
if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&
|
||||
isa<ConstantSDNode>(O.getOperand(1))) {
|
||||
|
||||
uint64_t ULim = O.getConstantOperandVal(1);
|
||||
if (ULim != (UINT64_C(1) << b*8))
|
||||
return false;
|
||||
|
||||
// Now we need to make sure that the upper bytes are known to be
|
||||
// zero.
|
||||
unsigned Bits = Op0.getValueType().getSizeInBits();
|
||||
if (!CurDAG->MaskedValueIsZero(Op0,
|
||||
APInt::getHighBitsSet(Bits, Bits - (b+1)*8)))
|
||||
return false;
|
||||
|
||||
LHS = Op0.getOperand(0);
|
||||
RHS = Op0.getOperand(1);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if (CC != ISD::SETEQ)
|
||||
return false;
|
||||
|
||||
SDValue Op = O.getOperand(0);
|
||||
if (Op.getOpcode() == ISD::AND) {
|
||||
if (!isa<ConstantSDNode>(Op.getOperand(1)))
|
||||
return false;
|
||||
if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b)))
|
||||
return false;
|
||||
|
||||
SDValue XOR = Op.getOperand(0);
|
||||
if (XOR.getOpcode() == ISD::TRUNCATE)
|
||||
XOR = XOR.getOperand(0);
|
||||
if (XOR.getOpcode() != ISD::XOR)
|
||||
return false;
|
||||
|
||||
LHS = XOR.getOperand(0);
|
||||
RHS = XOR.getOperand(1);
|
||||
return true;
|
||||
} else if (Op.getOpcode() == ISD::SRL) {
|
||||
if (!isa<ConstantSDNode>(Op.getOperand(1)))
|
||||
return false;
|
||||
unsigned Bits = Op.getValueType().getSizeInBits();
|
||||
if (b != Bits/8-1)
|
||||
return false;
|
||||
if (Op.getConstantOperandVal(1) != Bits-8)
|
||||
return false;
|
||||
|
||||
SDValue XOR = Op.getOperand(0);
|
||||
if (XOR.getOpcode() == ISD::TRUNCATE)
|
||||
XOR = XOR.getOperand(0);
|
||||
if (XOR.getOpcode() != ISD::XOR)
|
||||
return false;
|
||||
|
||||
LHS = XOR.getOperand(0);
|
||||
RHS = XOR.getOperand(1);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
SmallVector<SDValue, 8> Queue(1, SDValue(N, 0));
|
||||
while (!Queue.empty()) {
|
||||
SDValue V = Queue.pop_back_val();
|
||||
|
||||
for (const SDValue &O : V.getNode()->ops()) {
|
||||
unsigned b;
|
||||
uint64_t M = 0, A = 0;
|
||||
SDValue OLHS, ORHS;
|
||||
if (O.getOpcode() == ISD::OR) {
|
||||
Queue.push_back(O);
|
||||
} else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {
|
||||
if (!LHS) {
|
||||
LHS = OLHS;
|
||||
RHS = ORHS;
|
||||
BytesFound[b] = true;
|
||||
Mask |= M;
|
||||
Alt |= A;
|
||||
} else if ((LHS == ORHS && RHS == OLHS) ||
|
||||
(RHS == ORHS && LHS == OLHS)) {
|
||||
BytesFound[b] = true;
|
||||
Mask |= M;
|
||||
Alt |= A;
|
||||
} else {
|
||||
return Res;
|
||||
}
|
||||
} else {
|
||||
return Res;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned LastB = 0, BCnt = 0;
|
||||
for (unsigned i = 0; i < 8; ++i)
|
||||
if (BytesFound[LastB]) {
|
||||
++BCnt;
|
||||
LastB = i;
|
||||
}
|
||||
|
||||
if (!LastB || BCnt < 2)
|
||||
return Res;
|
||||
|
||||
// Because we'll be zero-extending the output anyway if don't have a specific
|
||||
// value for each input byte (via the Mask), we can 'anyext' the inputs.
|
||||
if (LHS.getValueType() != VT) {
|
||||
LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT);
|
||||
RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT);
|
||||
}
|
||||
|
||||
Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS);
|
||||
|
||||
bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
|
||||
if (NonTrivialMask && !Alt) {
|
||||
// Res = Mask & CMPB
|
||||
Res = CurDAG->getNode(ISD::AND, dl, VT, Res, CurDAG->getConstant(Mask, VT));
|
||||
} else if (Alt) {
|
||||
// Res = (CMPB & Mask) | (~CMPB & Alt)
|
||||
// Which, as suggested here:
|
||||
// https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
|
||||
// can be written as:
|
||||
// Res = Alt ^ ((Alt ^ Mask) & CMPB)
|
||||
// useful because the (Alt ^ Mask) can be pre-computed.
|
||||
Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
|
||||
CurDAG->getConstant(Mask ^ Alt, VT));
|
||||
Res = CurDAG->getNode(ISD::XOR, dl, VT, Res, CurDAG->getConstant(Alt, VT));
|
||||
}
|
||||
|
||||
return Res;
|
||||
}
|
||||
|
||||
void PPCDAGToDAGISel::PreprocessISelDAG() {
|
||||
SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
|
||||
++Position;
|
||||
|
||||
bool MadeChange = false;
|
||||
while (Position != CurDAG->allnodes_begin()) {
|
||||
SDNode *N = --Position;
|
||||
if (N->use_empty())
|
||||
continue;
|
||||
|
||||
SDValue Res;
|
||||
switch (N->getOpcode()) {
|
||||
default: break;
|
||||
case ISD::OR:
|
||||
Res = combineToCMPB(N);
|
||||
break;
|
||||
}
|
||||
|
||||
if (Res) {
|
||||
DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
|
||||
DEBUG(N->dump(CurDAG));
|
||||
DEBUG(dbgs() << "\nNew: ");
|
||||
DEBUG(Res.getNode()->dump(CurDAG));
|
||||
DEBUG(dbgs() << "\n");
|
||||
|
||||
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
|
||||
MadeChange = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (MadeChange)
|
||||
CurDAG->RemoveDeadNodes();
|
||||
}
|
||||
|
||||
/// PostprocessISelDAG - Perform some late peephole optimizations
|
||||
/// on the DAG representation.
|
||||
void PPCDAGToDAGISel::PostprocessISelDAG() {
|
||||
|
@ -759,6 +759,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
|
||||
case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
|
||||
case PPCISD::VPERM: return "PPCISD::VPERM";
|
||||
case PPCISD::CMPB: return "PPCISD::CMPB";
|
||||
case PPCISD::Hi: return "PPCISD::Hi";
|
||||
case PPCISD::Lo: return "PPCISD::Lo";
|
||||
case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
|
||||
|
@ -61,6 +61,9 @@ namespace llvm {
|
||||
///
|
||||
VPERM,
|
||||
|
||||
/// The CMPB instruction (takes two operands of i32 or i64).
|
||||
CMPB,
|
||||
|
||||
/// Hi/Lo - These represent the high and low 16-bit parts of a global
|
||||
/// address respectively. These nodes have two operands, the first of
|
||||
/// which must be a TargetGlobalAddress, and the second of which must be a
|
||||
|
@ -591,6 +591,11 @@ def POPCNTD : XForm_11<31, 506, (outs g8rc:$rA), (ins g8rc:$rS),
|
||||
"popcntd $rA, $rS", IIC_IntGeneral,
|
||||
[(set i64:$rA, (ctpop i64:$rS))]>;
|
||||
|
||||
let isCodeGenOnly = 1, isCommutable = 1 in
|
||||
def CMPB8 : XForm_6<31, 508, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
|
||||
"cmpb $rA, $rS, $rB", IIC_IntGeneral,
|
||||
[(set i64:$rA, (PPCcmpb i64:$rS, i64:$rB))]>;
|
||||
|
||||
// popcntw also does a population count on the high 32 bits (storing the
|
||||
// results in the high 32-bits of the output). We'll ignore that here (which is
|
||||
// safe because we never separately use the high part of the 64-bit registers).
|
||||
|
@ -118,6 +118,8 @@ def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
|
||||
|
||||
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
|
||||
|
||||
def PPCcmpb : SDNode<"PPCISD::CMPB", SDTIntBinOp, []>;
|
||||
|
||||
// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
|
||||
// amounts. These nodes are generated by the multi-precision shift code.
|
||||
def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>;
|
||||
@ -1865,6 +1867,11 @@ defm EXTSB : XForm_11r<31, 954, (outs gprc:$rA), (ins gprc:$rS),
|
||||
defm EXTSH : XForm_11r<31, 922, (outs gprc:$rA), (ins gprc:$rS),
|
||||
"extsh", "$rA, $rS", IIC_IntSimple,
|
||||
[(set i32:$rA, (sext_inreg i32:$rS, i16))]>;
|
||||
|
||||
let isCommutable = 1 in
|
||||
def CMPB : XForm_6<31, 508, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
|
||||
"cmpb $rA, $rS, $rB", IIC_IntGeneral,
|
||||
[(set i32:$rA, (PPCcmpb i32:$rS, i32:$rB))]>;
|
||||
}
|
||||
let isCompare = 1, hasSideEffects = 0 in {
|
||||
def CMPW : XForm_16_ext<31, 0, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB),
|
||||
|
@ -108,6 +108,7 @@ void PPCSubtarget::initializeEnvironment() {
|
||||
HasFPCVT = false;
|
||||
HasISEL = false;
|
||||
HasPOPCNTD = false;
|
||||
HasCMPB = false;
|
||||
HasLDBRX = false;
|
||||
IsBookE = false;
|
||||
HasOnlyMSYNC = false;
|
||||
|
@ -102,6 +102,7 @@ protected:
|
||||
bool HasFPCVT;
|
||||
bool HasISEL;
|
||||
bool HasPOPCNTD;
|
||||
bool HasCMPB;
|
||||
bool HasLDBRX;
|
||||
bool IsBookE;
|
||||
bool HasOnlyMSYNC;
|
||||
@ -220,6 +221,7 @@ public:
|
||||
bool hasMFOCRF() const { return HasMFOCRF; }
|
||||
bool hasISEL() const { return HasISEL; }
|
||||
bool hasPOPCNTD() const { return HasPOPCNTD; }
|
||||
bool hasCMPB() const { return HasCMPB; }
|
||||
bool hasLDBRX() const { return HasLDBRX; }
|
||||
bool isBookE() const { return IsBookE; }
|
||||
bool hasOnlyMSYNC() const { return HasOnlyMSYNC; }
|
||||
|
50
test/CodeGen/PowerPC/cmpb-ppc32.ll
Normal file
50
test/CodeGen/PowerPC/cmpb-ppc32.ll
Normal file
@ -0,0 +1,50 @@
|
||||
; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
|
||||
target datalayout = "E-m:e-p:32:32-i64:64-n32"
|
||||
target triple = "powerpc-unknown-linux-gnu"
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define zeroext i16 @test16(i16 zeroext %x, i16 zeroext %y) #0 {
|
||||
entry:
|
||||
%0 = xor i16 %y, %x
|
||||
%1 = and i16 %0, 255
|
||||
%cmp = icmp eq i16 %1, 0
|
||||
%cmp20 = icmp ult i16 %0, 256
|
||||
%conv25 = select i1 %cmp, i32 255, i32 0
|
||||
%conv27 = select i1 %cmp20, i32 65280, i32 0
|
||||
%or = or i32 %conv25, %conv27
|
||||
%conv29 = trunc i32 %or to i16
|
||||
ret i16 %conv29
|
||||
|
||||
; CHECK-LABEL: @test16
|
||||
; CHECK: cmpb [[REG1:[0-9]+]], 4, 3
|
||||
; CHECK: rlwinm 3, [[REG1]], 0, 16, 31
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define i32 @test32(i32 %x, i32 %y) #0 {
|
||||
entry:
|
||||
%0 = xor i32 %y, %x
|
||||
%1 = and i32 %0, 255
|
||||
%cmp = icmp eq i32 %1, 0
|
||||
%2 = and i32 %0, 65280
|
||||
%cmp28 = icmp eq i32 %2, 0
|
||||
%3 = and i32 %0, 16711680
|
||||
%cmp34 = icmp eq i32 %3, 0
|
||||
%cmp40 = icmp ult i32 %0, 16777216
|
||||
%conv44 = select i1 %cmp, i32 255, i32 0
|
||||
%conv45 = select i1 %cmp28, i32 65280, i32 0
|
||||
%conv47 = select i1 %cmp34, i32 16711680, i32 0
|
||||
%conv50 = select i1 %cmp40, i32 -16777216, i32 0
|
||||
%or = or i32 %conv45, %conv50
|
||||
%or49 = or i32 %or, %conv44
|
||||
%or52 = or i32 %or49, %conv47
|
||||
ret i32 %or52
|
||||
|
||||
; CHECK-LABEL: @test32
|
||||
; CHECK: cmpb 3, 4, 3
|
||||
; CHECK-NOT: rlwinm
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
|
204
test/CodeGen/PowerPC/cmpb.ll
Normal file
204
test/CodeGen/PowerPC/cmpb.ll
Normal file
@ -0,0 +1,204 @@
|
||||
; RUN: llc -mcpu pwr7 < %s | FileCheck %s
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define zeroext i16 @test16(i16 zeroext %x, i16 zeroext %y) #0 {
|
||||
entry:
|
||||
%0 = xor i16 %y, %x
|
||||
%1 = and i16 %0, 255
|
||||
%cmp = icmp eq i16 %1, 0
|
||||
%cmp20 = icmp ult i16 %0, 256
|
||||
%conv25 = select i1 %cmp, i32 255, i32 0
|
||||
%conv27 = select i1 %cmp20, i32 65280, i32 0
|
||||
%or = or i32 %conv25, %conv27
|
||||
%conv29 = trunc i32 %or to i16
|
||||
ret i16 %conv29
|
||||
|
||||
; CHECK-LABEL: @test16
|
||||
; CHECK: cmpb [[REG1:[0-9]+]], 4, 3
|
||||
; CHECK: rldicl 3, [[REG1]], 0, 48
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define zeroext i16 @test16p1(i16 zeroext %x, i16 zeroext %y) #0 {
|
||||
entry:
|
||||
%0 = xor i16 %y, %x
|
||||
%1 = and i16 %0, 255
|
||||
%cmp = icmp eq i16 %1, 0
|
||||
%cmp20 = icmp ult i16 %0, 256
|
||||
%conv28 = select i1 %cmp, i32 5, i32 0
|
||||
%conv30 = select i1 %cmp20, i32 65280, i32 0
|
||||
%or = or i32 %conv28, %conv30
|
||||
%conv32 = trunc i32 %or to i16
|
||||
ret i16 %conv32
|
||||
|
||||
; CHECK-LABEL: @test16p1
|
||||
; CHECK: cmpb [[REG1:[0-9]+]], 4, 3
|
||||
; CHECK: andi. 3, [[REG1]], 65285
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define zeroext i16 @test16p2(i16 zeroext %x, i16 zeroext %y) #0 {
|
||||
entry:
|
||||
%0 = xor i16 %y, %x
|
||||
%1 = and i16 %0, 255
|
||||
%cmp = icmp eq i16 %1, 0
|
||||
%cmp20 = icmp ult i16 %0, 256
|
||||
%conv28 = select i1 %cmp, i32 255, i32 0
|
||||
%conv30 = select i1 %cmp20, i32 1280, i32 0
|
||||
%or = or i32 %conv28, %conv30
|
||||
%conv32 = trunc i32 %or to i16
|
||||
ret i16 %conv32
|
||||
|
||||
; CHECK-LABEL: @test16p2
|
||||
; CHECK: cmpb [[REG1:[0-9]+]], 4, 3
|
||||
; CHECK: andi. 3, [[REG1]], 1535
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define zeroext i16 @test16p3(i16 zeroext %x, i16 zeroext %y) #0 {
|
||||
entry:
|
||||
%0 = xor i16 %y, %x
|
||||
%1 = and i16 %0, 255
|
||||
%cmp = icmp eq i16 %1, 0
|
||||
%cmp20 = icmp ult i16 %0, 256
|
||||
%conv27 = select i1 %cmp, i32 255, i32 0
|
||||
%conv29 = select i1 %cmp20, i32 1024, i32 1280
|
||||
%or = or i32 %conv27, %conv29
|
||||
%conv31 = trunc i32 %or to i16
|
||||
ret i16 %conv31
|
||||
|
||||
; CHECK-LABEL: @test16p3
|
||||
; CHECK: cmpb [[REG1:[0-9]+]], 4, 3
|
||||
; CHECK: rldicl [[REG2:[0-9]+]], [[REG1]], 0, 55
|
||||
; CHECK: xori 3, [[REG2]], 1280
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define zeroext i32 @test32(i32 zeroext %x, i32 zeroext %y) #0 {
|
||||
entry:
|
||||
%0 = xor i32 %y, %x
|
||||
%1 = and i32 %0, 255
|
||||
%cmp = icmp eq i32 %1, 0
|
||||
%2 = and i32 %0, 65280
|
||||
%cmp28 = icmp eq i32 %2, 0
|
||||
%3 = and i32 %0, 16711680
|
||||
%cmp34 = icmp eq i32 %3, 0
|
||||
%cmp40 = icmp ult i32 %0, 16777216
|
||||
%conv44 = select i1 %cmp, i32 255, i32 0
|
||||
%conv45 = select i1 %cmp28, i32 65280, i32 0
|
||||
%conv47 = select i1 %cmp34, i32 16711680, i32 0
|
||||
%conv50 = select i1 %cmp40, i32 -16777216, i32 0
|
||||
%or = or i32 %conv45, %conv50
|
||||
%or49 = or i32 %or, %conv44
|
||||
%or52 = or i32 %or49, %conv47
|
||||
ret i32 %or52
|
||||
|
||||
; CHECK-LABEL: @test32
|
||||
; CHECK: cmpb [[REG1:[0-9]+]], 4, 3
|
||||
; CHECK: rldicl 3, [[REG1]], 0, 32
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define zeroext i32 @test32p1(i32 zeroext %x, i32 zeroext %y) #0 {
|
||||
entry:
|
||||
%0 = xor i32 %y, %x
|
||||
%1 = and i32 %0, 255
|
||||
%cmp = icmp eq i32 %1, 0
|
||||
%2 = and i32 %0, 65280
|
||||
%cmp28 = icmp eq i32 %2, 0
|
||||
%3 = and i32 %0, 16711680
|
||||
%cmp34 = icmp eq i32 %3, 0
|
||||
%cmp40 = icmp ult i32 %0, 16777216
|
||||
%conv47 = select i1 %cmp, i32 255, i32 0
|
||||
%conv48 = select i1 %cmp28, i32 65280, i32 0
|
||||
%conv50 = select i1 %cmp34, i32 458752, i32 0
|
||||
%conv53 = select i1 %cmp40, i32 -16777216, i32 0
|
||||
%or = or i32 %conv48, %conv53
|
||||
%or52 = or i32 %or, %conv47
|
||||
%or55 = or i32 %or52, %conv50
|
||||
ret i32 %or55
|
||||
|
||||
; CHECK-LABEL: @test32p1
|
||||
; CHECK: li [[REG1:[0-9]+]], 0
|
||||
; CHECK: cmpb [[REG4:[0-9]+]], 4, 3
|
||||
; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65287
|
||||
; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535
|
||||
; CHECK: and 3, [[REG4]], [[REG3]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define zeroext i32 @test32p2(i32 zeroext %x, i32 zeroext %y) #0 {
|
||||
entry:
|
||||
%0 = xor i32 %y, %x
|
||||
%1 = and i32 %0, 255
|
||||
%cmp = icmp eq i32 %1, 0
|
||||
%2 = and i32 %0, 65280
|
||||
%cmp22 = icmp eq i32 %2, 0
|
||||
%cmp28 = icmp ult i32 %0, 16777216
|
||||
%conv32 = select i1 %cmp, i32 255, i32 0
|
||||
%conv33 = select i1 %cmp22, i32 65280, i32 0
|
||||
%conv35 = select i1 %cmp28, i32 -16777216, i32 0
|
||||
%or = or i32 %conv33, %conv35
|
||||
%or37 = or i32 %or, %conv32
|
||||
ret i32 %or37
|
||||
|
||||
; CHECK-LABEL: @test32p2
|
||||
; CHECK: li [[REG1:[0-9]+]], 0
|
||||
; CHECK: cmpb [[REG4:[0-9]+]], 4, 3
|
||||
; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65280
|
||||
; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535
|
||||
; CHECK: and 3, [[REG4]], [[REG3]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define i64 @test64(i64 %x, i64 %y) #0 {
|
||||
entry:
|
||||
%shr19 = lshr i64 %x, 56
|
||||
%conv21 = trunc i64 %shr19 to i32
|
||||
%shr43 = lshr i64 %y, 56
|
||||
%conv45 = trunc i64 %shr43 to i32
|
||||
%0 = xor i64 %y, %x
|
||||
%1 = and i64 %0, 255
|
||||
%cmp = icmp eq i64 %1, 0
|
||||
%2 = and i64 %0, 65280
|
||||
%cmp52 = icmp eq i64 %2, 0
|
||||
%3 = and i64 %0, 16711680
|
||||
%cmp58 = icmp eq i64 %3, 0
|
||||
%4 = and i64 %0, 4278190080
|
||||
%cmp64 = icmp eq i64 %4, 0
|
||||
%5 = and i64 %0, 1095216660480
|
||||
%cmp70 = icmp eq i64 %5, 0
|
||||
%6 = and i64 %0, 280375465082880
|
||||
%cmp76 = icmp eq i64 %6, 0
|
||||
%7 = and i64 %0, 71776119061217280
|
||||
%cmp82 = icmp eq i64 %7, 0
|
||||
%cmp88 = icmp eq i32 %conv21, %conv45
|
||||
%conv92 = select i1 %cmp, i64 255, i64 0
|
||||
%conv93 = select i1 %cmp52, i64 65280, i64 0
|
||||
%or = or i64 %conv92, %conv93
|
||||
%conv95 = select i1 %cmp58, i64 16711680, i64 0
|
||||
%or97 = or i64 %or, %conv95
|
||||
%conv98 = select i1 %cmp64, i64 4278190080, i64 0
|
||||
%or100 = or i64 %or97, %conv98
|
||||
%conv101 = select i1 %cmp70, i64 1095216660480, i64 0
|
||||
%or103 = or i64 %or100, %conv101
|
||||
%conv104 = select i1 %cmp76, i64 280375465082880, i64 0
|
||||
%or106 = or i64 %or103, %conv104
|
||||
%conv107 = select i1 %cmp82, i64 71776119061217280, i64 0
|
||||
%or109 = or i64 %or106, %conv107
|
||||
%conv110 = select i1 %cmp88, i64 -72057594037927936, i64 0
|
||||
%or112 = or i64 %or109, %conv110
|
||||
ret i64 %or112
|
||||
|
||||
; CHECK-LABEL: @test64
|
||||
; CHECK: cmpb 3, 3, 4
|
||||
; CHECK-NOT: rldicl
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
|
@ -499,6 +499,9 @@
|
||||
# CHECK: popcntd 2, 3
|
||||
0x7c 0x62 0x03 0xf4
|
||||
|
||||
# CHECK: cmpb 7, 21, 4
|
||||
0x7e 0xa7 0x23 0xf8
|
||||
|
||||
# CHECK: rlwinm 2, 3, 4, 5, 6
|
||||
0x54 0x62 0x21 0x4c
|
||||
|
||||
|
@ -612,7 +612,9 @@
|
||||
# CHECK-BE: cntlzw. 2, 3 # encoding: [0x7c,0x62,0x00,0x35]
|
||||
# CHECK-LE: cntlzw. 2, 3 # encoding: [0x35,0x00,0x62,0x7c]
|
||||
cntlzw. 2, 3
|
||||
# FIXME: cmpb 2, 3, 4
|
||||
cmpb 7, 21, 4
|
||||
# CHECK-BE: cmpb 7, 21, 4 # encoding: [0x7e,0xa7,0x23,0xf8]
|
||||
# CHECK-LE: cmpb 7, 21, 4 # encoding: [0xf8,0x23,0xa7,0x7e]
|
||||
# FIXME: popcntb 2, 3
|
||||
# CHECK-BE: popcntw 2, 3 # encoding: [0x7c,0x62,0x02,0xf4]
|
||||
# CHECK-LE: popcntw 2, 3 # encoding: [0xf4,0x02,0x62,0x7c]
|
||||
|
Loading…
Reference in New Issue
Block a user