mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-19 02:42:58 +00:00
CellSPU:
- Rename fcmp.ll test to fcmp32.ll, start adding new double tests to fcmp64.ll - Fix select_bits.ll test - Capitulate to the DAGCombiner and move i64 constant loads to instruction selection (SPUISelDAGtoDAG.cpp). <rant>DAGCombiner will insert all kinds of 64-bit optimizations after operation legalization occurs and now we have to do most of the work that instruction selection should be doing twice (once to determine if v2i64 build_vector can be handled by SelectCode(), which then runs all of the predicates a second time to select the necessary instructions.) But, CellSPU is a good citizen.</rant> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@62990 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
5bf4b7556f
commit
c9c8b2a804
@ -30,8 +30,8 @@
|
||||
// selb instruction definition for i64. Note that the selection mask is
|
||||
// a vector, produced by various forms of FSM:
|
||||
def SELBr64_cond:
|
||||
SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
|
||||
[/* no pattern */]>;
|
||||
SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
|
||||
[/* no pattern */]>;
|
||||
|
||||
// The generic i64 select pattern, which assumes that the comparison result
|
||||
// is in a 32-bit register that contains a select mask pattern (i.e., gather
|
||||
|
@ -254,26 +254,56 @@ public:
|
||||
/// getSmallIPtrImm - Return a target constant of pointer type.
|
||||
inline SDValue getSmallIPtrImm(unsigned Imm) {
|
||||
return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
|
||||
}
|
||||
}
|
||||
|
||||
SDNode *emitBuildVector(SDValue build_vec) {
|
||||
MVT vecVT = build_vec.getValueType();
|
||||
SDNode *bvNode = build_vec.getNode();
|
||||
bool canBeSelected = false;
|
||||
|
||||
// Check to see if this vector can be represented as a CellSPU immediate
|
||||
// constant.
|
||||
if (vecVT == MVT::v8i16) {
|
||||
if (SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i16).getNode() != 0) {
|
||||
canBeSelected = true;
|
||||
}
|
||||
} else if (vecVT == MVT::v4i32) {
|
||||
if ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i32).getNode() != 0)
|
||||
|| (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i32).getNode() != 0)
|
||||
|| (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i32).getNode() != 0)
|
||||
|| (SPU::get_v4i32_imm(bvNode, *CurDAG).getNode() != 0)) {
|
||||
canBeSelected = true;
|
||||
}
|
||||
} else if (vecVT == MVT::v2i64) {
|
||||
if ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)
|
||||
|| (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)
|
||||
|| (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)) {
|
||||
canBeSelected = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (canBeSelected) {
|
||||
return Select(build_vec);
|
||||
}
|
||||
|
||||
// No, need to emit a constant pool spill:
|
||||
std::vector<Constant*> CV;
|
||||
|
||||
for (size_t i = 0; i < build_vec.getNumOperands(); ++i) {
|
||||
ConstantSDNode *V = dyn_cast<ConstantSDNode>(build_vec.getOperand(i));
|
||||
CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
|
||||
ConstantSDNode *V = dyn_cast<ConstantSDNode > (build_vec.getOperand(i));
|
||||
CV.push_back(const_cast<ConstantInt *> (V->getConstantIntValue()));
|
||||
}
|
||||
|
||||
Constant *CP = ConstantVector::get(CV);
|
||||
SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy());
|
||||
unsigned Alignment = 1 << cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
|
||||
unsigned Alignment = 1 << cast<ConstantPoolSDNode > (CPIdx)->getAlignment();
|
||||
SDValue CGPoolOffset =
|
||||
SPU::LowerConstantPool(CPIdx, *CurDAG,
|
||||
SPUtli.getSPUTargetMachine());
|
||||
return SelectCode(CurDAG->getLoad(build_vec.getValueType(),
|
||||
CurDAG->getEntryNode(), CGPoolOffset,
|
||||
PseudoSourceValue::getConstantPool(), 0,
|
||||
false, Alignment));
|
||||
CurDAG->getEntryNode(), CGPoolOffset,
|
||||
PseudoSourceValue::getConstantPool(), 0,
|
||||
false, Alignment));
|
||||
}
|
||||
|
||||
/// Select - Convert the specified operand from a target-independent to a
|
||||
@ -289,6 +319,9 @@ public:
|
||||
//! Emit the instruction sequence for i64 sra
|
||||
SDNode *SelectSRAi64(SDValue &Op, MVT OpVT);
|
||||
|
||||
//! Emit the necessary sequence for loading i64 constants:
|
||||
SDNode *SelectI64Constant(SDValue &Op, MVT OpVT);
|
||||
|
||||
//! Returns true if the address N is an A-form (local store) address
|
||||
bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base,
|
||||
SDValue &Index);
|
||||
@ -652,7 +685,9 @@ SPUDAGToDAGISel::Select(SDValue Op) {
|
||||
|
||||
if (N->isMachineOpcode()) {
|
||||
return NULL; // Already selected.
|
||||
} else if (Opc == ISD::FrameIndex) {
|
||||
}
|
||||
|
||||
if (Opc == ISD::FrameIndex) {
|
||||
int FI = cast<FrameIndexSDNode>(N)->getIndex();
|
||||
SDValue TFI = CurDAG->getTargetFrameIndex(FI, Op.getValueType());
|
||||
SDValue Imm0 = CurDAG->getTargetConstant(0, Op.getValueType());
|
||||
@ -669,6 +704,11 @@ SPUDAGToDAGISel::Select(SDValue Op) {
|
||||
TFI, Imm0), 0);
|
||||
n_ops = 2;
|
||||
}
|
||||
} else if (Opc == ISD::Constant && OpVT == MVT::i64) {
|
||||
// Catch the i64 constants that end up here. Note: The backend doesn't
|
||||
// attempt to legalize the constant (it's useless because DAGCombiner
|
||||
// will insert 64-bit constants and we can't stop it).
|
||||
return SelectI64Constant(Op, OpVT);
|
||||
} else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND)
|
||||
&& OpVT == MVT::i64) {
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
@ -745,27 +785,38 @@ SPUDAGToDAGISel::Select(SDValue Op) {
|
||||
return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, OpVT,
|
||||
Op.getOperand(0), Op.getOperand(1),
|
||||
SDValue(CGLoad, 0)));
|
||||
} else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
|
||||
SDNode *CGLoad =
|
||||
emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG));
|
||||
} else if (Opc == ISD::TRUNCATE) {
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL)
|
||||
&& OpVT == MVT::i32
|
||||
&& Op0.getValueType() == MVT::i64) {
|
||||
// Catch the (truncate:i32 ([sra|srl]:i64 arg, c), where c >= 32 to
|
||||
// take advantage of the fact that the upper 32 bits are in the
|
||||
// i32 preferred slot and avoid all kinds of other shuffle gymnastics:
|
||||
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
|
||||
if (CN != 0) {
|
||||
unsigned shift_amt = unsigned(CN->getZExtValue());
|
||||
|
||||
return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, OpVT,
|
||||
Op.getOperand(0), Op.getOperand(1),
|
||||
SDValue(CGLoad, 0)));
|
||||
} else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
|
||||
SDNode *CGLoad =
|
||||
emitBuildVector(SPU::getBorrowGenerateShufMask(*CurDAG));
|
||||
if (shift_amt >= 32) {
|
||||
SDNode *hi32 =
|
||||
CurDAG->getTargetNode(SPU::ORr32_r64, OpVT, Op0.getOperand(0));
|
||||
|
||||
return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, OpVT,
|
||||
Op.getOperand(0), Op.getOperand(1),
|
||||
SDValue(CGLoad, 0)));
|
||||
} else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
|
||||
SDNode *CGLoad =
|
||||
emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG));
|
||||
shift_amt -= 32;
|
||||
if (shift_amt > 0) {
|
||||
// Take care of the additional shift, if present:
|
||||
SDValue shift = CurDAG->getTargetConstant(shift_amt, MVT::i32);
|
||||
unsigned Opc = SPU::ROTMAIr32_i32;
|
||||
|
||||
if (Op0.getOpcode() == ISD::SRL)
|
||||
Opc = SPU::ROTMr32;
|
||||
|
||||
return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, OpVT,
|
||||
Op.getOperand(0), Op.getOperand(1),
|
||||
SDValue(CGLoad, 0)));
|
||||
hi32 = CurDAG->getTargetNode(Opc, OpVT, SDValue(hi32, 0), shift);
|
||||
}
|
||||
|
||||
return hi32;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (Opc == ISD::SHL) {
|
||||
if (OpVT == MVT::i64) {
|
||||
return SelectSHLi64(Op, OpVT);
|
||||
@ -1046,6 +1097,70 @@ SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, MVT OpVT) {
|
||||
return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(Shift, 0));
|
||||
}
|
||||
|
||||
/*!
|
||||
Do the necessary magic necessary to load a i64 constant
|
||||
*/
|
||||
SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, MVT OpVT) {
|
||||
ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
|
||||
MVT OpVecVT = MVT::getVectorVT(OpVT, 2);
|
||||
SDValue i64vec =
|
||||
SPU::LowerSplat_v2i64(OpVecVT, *CurDAG, CN->getZExtValue());
|
||||
|
||||
// Here's where it gets interesting, because we have to parse out the
|
||||
// subtree handed back in i64vec:
|
||||
|
||||
if (i64vec.getOpcode() == ISD::BIT_CONVERT) {
|
||||
// The degenerate case where the upper and lower bits in the splat are
|
||||
// identical:
|
||||
SDValue Op0 = i64vec.getOperand(0);
|
||||
ReplaceUses(i64vec, Op0);
|
||||
|
||||
return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT,
|
||||
SDValue(emitBuildVector(Op0), 0));
|
||||
} else if (i64vec.getOpcode() == SPUISD::SHUFB) {
|
||||
SDValue lhs = i64vec.getOperand(0);
|
||||
SDValue rhs = i64vec.getOperand(1);
|
||||
SDValue shufmask = i64vec.getOperand(2);
|
||||
|
||||
if (lhs.getOpcode() == ISD::BIT_CONVERT) {
|
||||
ReplaceUses(lhs, lhs.getOperand(0));
|
||||
lhs = lhs.getOperand(0);
|
||||
}
|
||||
|
||||
SDNode *lhsNode = (lhs.getNode()->isMachineOpcode()
|
||||
? lhs.getNode()
|
||||
: emitBuildVector(lhs));
|
||||
|
||||
if (rhs.getOpcode() == ISD::BIT_CONVERT) {
|
||||
ReplaceUses(rhs, rhs.getOperand(0));
|
||||
rhs = rhs.getOperand(0);
|
||||
}
|
||||
|
||||
SDNode *rhsNode = (rhs.getNode()->isMachineOpcode()
|
||||
? rhs.getNode()
|
||||
: emitBuildVector(rhs));
|
||||
|
||||
if (shufmask.getOpcode() == ISD::BIT_CONVERT) {
|
||||
ReplaceUses(shufmask, shufmask.getOperand(0));
|
||||
shufmask = shufmask.getOperand(0);
|
||||
}
|
||||
|
||||
SDNode *shufMaskNode = (shufmask.getNode()->isMachineOpcode()
|
||||
? shufmask.getNode()
|
||||
: emitBuildVector(shufmask));
|
||||
|
||||
SDNode *shufNode =
|
||||
Select(CurDAG->getNode(SPUISD::SHUFB, OpVecVT,
|
||||
SDValue(lhsNode, 0), SDValue(rhsNode, 0),
|
||||
SDValue(shufMaskNode, 0)));
|
||||
|
||||
return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(shufNode, 0));
|
||||
} else {
|
||||
cerr << "SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec condition\n";
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
/// createSPUISelDag - This pass converts a legalized DAG into a
|
||||
/// SPU-specific DAG, ready for instruction scheduling.
|
||||
///
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include "SPUFrameInfo.h"
|
||||
#include "llvm/ADT/APInt.h"
|
||||
#include "llvm/ADT/VectorExtras.h"
|
||||
#include "llvm/CallingConv.h"
|
||||
#include "llvm/CodeGen/CallingConvLower.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
@ -79,6 +80,43 @@ namespace {
|
||||
return retval;
|
||||
}
|
||||
|
||||
//! Expand a library call into an actual call DAG node
|
||||
/*!
|
||||
\note
|
||||
This code is taken from SelectionDAGLegalize, since it is not exposed as
|
||||
part of the LLVM SelectionDAG API.
|
||||
*/
|
||||
|
||||
SDValue
|
||||
ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
|
||||
bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
|
||||
// The input chain to this libcall is the entry node of the function.
|
||||
// Legalizing the call will automatically add the previous call to the
|
||||
// dependence.
|
||||
SDValue InChain = DAG.getEntryNode();
|
||||
|
||||
TargetLowering::ArgListTy Args;
|
||||
TargetLowering::ArgListEntry Entry;
|
||||
for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
|
||||
MVT ArgVT = Op.getOperand(i).getValueType();
|
||||
const Type *ArgTy = ArgVT.getTypeForMVT();
|
||||
Entry.Node = Op.getOperand(i);
|
||||
Entry.Ty = ArgTy;
|
||||
Entry.isSExt = isSigned;
|
||||
Entry.isZExt = !isSigned;
|
||||
Args.push_back(Entry);
|
||||
}
|
||||
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
|
||||
TLI.getPointerTy());
|
||||
|
||||
// Splice the libcall in wherever FindInputOutputChains tells us to.
|
||||
const Type *RetTy = Op.getNode()->getValueType(0).getTypeForMVT();
|
||||
std::pair<SDValue, SDValue> CallInfo =
|
||||
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
|
||||
CallingConv::C, false, Callee, Args, DAG);
|
||||
|
||||
return CallInfo.first;
|
||||
}
|
||||
}
|
||||
|
||||
SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
|
||||
@ -113,7 +151,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
|
||||
|
||||
// SPU constant load actions are custom lowered:
|
||||
setOperationAction(ISD::Constant, MVT::i64, Custom);
|
||||
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
|
||||
setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
|
||||
|
||||
@ -128,10 +165,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
|
||||
setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
|
||||
setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
|
||||
|
||||
// SMUL_LOHI, UMUL_LOHI are not legal for Cell:
|
||||
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
|
||||
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
|
||||
|
||||
for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
|
||||
MVT StoreVT = (MVT::SimpleValueType) stype;
|
||||
setTruncStoreAction(VT, StoreVT, Expand);
|
||||
@ -179,16 +212,14 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
|
||||
setOperationAction(ISD::FCOS , MVT::f32, Expand);
|
||||
setOperationAction(ISD::FREM , MVT::f32, Expand);
|
||||
|
||||
// If we're enabling GP optimizations, use hardware square root
|
||||
// Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
|
||||
// for f32!)
|
||||
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
|
||||
|
||||
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
|
||||
|
||||
// Make sure that DAGCombine doesn't insert illegal 64-bit constants
|
||||
setOperationAction(ISD::FABS, MVT::f64, Custom);
|
||||
|
||||
// SPU can do rotate right and left, so legalize it... but customize for i8
|
||||
// because instructions don't exist.
|
||||
|
||||
@ -254,22 +285,21 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
|
||||
// Custom lower i128 -> i64 truncates
|
||||
setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
|
||||
|
||||
// SPU has a legal FP -> signed INT instruction
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
|
||||
// SPU has a legal FP -> signed INT instruction for f32, but for f64, need
|
||||
// to expand to a libcall, hence the custom lowering:
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
|
||||
|
||||
// FDIV on SPU requires custom lowering
|
||||
setOperationAction(ISD::FDIV, MVT::f64, Expand); // libcall
|
||||
setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
|
||||
|
||||
// SPU has [U|S]INT_TO_FP
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
|
||||
|
||||
@ -338,24 +368,23 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
|
||||
MVT VT = (MVT::SimpleValueType)i;
|
||||
|
||||
// add/sub are legal for all supported vector VT's.
|
||||
setOperationAction(ISD::ADD , VT, Legal);
|
||||
setOperationAction(ISD::SUB , VT, Legal);
|
||||
setOperationAction(ISD::ADD, VT, Legal);
|
||||
setOperationAction(ISD::SUB, VT, Legal);
|
||||
// mul has to be custom lowered.
|
||||
// TODO: v2i64 vector multiply
|
||||
setOperationAction(ISD::MUL , VT, Legal);
|
||||
setOperationAction(ISD::MUL, VT, Legal);
|
||||
|
||||
setOperationAction(ISD::AND , VT, Legal);
|
||||
setOperationAction(ISD::OR , VT, Legal);
|
||||
setOperationAction(ISD::XOR , VT, Legal);
|
||||
setOperationAction(ISD::LOAD , VT, Legal);
|
||||
setOperationAction(ISD::SELECT, VT, Legal);
|
||||
setOperationAction(ISD::STORE, VT, Legal);
|
||||
setOperationAction(ISD::AND, VT, Legal);
|
||||
setOperationAction(ISD::OR, VT, Legal);
|
||||
setOperationAction(ISD::XOR, VT, Legal);
|
||||
setOperationAction(ISD::LOAD, VT, Legal);
|
||||
setOperationAction(ISD::SELECT, VT, Legal);
|
||||
setOperationAction(ISD::STORE, VT, Legal);
|
||||
|
||||
// These operations need to be expanded:
|
||||
setOperationAction(ISD::SDIV, VT, Expand);
|
||||
setOperationAction(ISD::SREM, VT, Expand);
|
||||
setOperationAction(ISD::UDIV, VT, Expand);
|
||||
setOperationAction(ISD::UREM, VT, Expand);
|
||||
setOperationAction(ISD::SDIV, VT, Expand);
|
||||
setOperationAction(ISD::SREM, VT, Expand);
|
||||
setOperationAction(ISD::UDIV, VT, Expand);
|
||||
setOperationAction(ISD::UREM, VT, Expand);
|
||||
|
||||
// Custom lower build_vector, constant pool spills, insert and
|
||||
// extract vector elements:
|
||||
@ -866,31 +895,6 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
//! Custom lower i64 integer constants
|
||||
/*!
|
||||
This code inserts all of the necessary juggling that needs to occur to load
|
||||
a 64-bit constant into a register.
|
||||
*/
|
||||
static SDValue
|
||||
LowerConstant(SDValue Op, SelectionDAG &DAG) {
|
||||
MVT VT = Op.getValueType();
|
||||
|
||||
if (VT == MVT::i64) {
|
||||
ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
|
||||
SDValue T = DAG.getConstant(CN->getZExtValue(), VT);
|
||||
return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
|
||||
} else {
|
||||
cerr << "LowerConstant: unhandled constant type "
|
||||
<< VT.getMVTString()
|
||||
<< "\n";
|
||||
abort();
|
||||
/*NOTREACHED*/
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
//! Custom lower double precision floating point constants
|
||||
static SDValue
|
||||
LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
|
||||
@ -1564,7 +1568,7 @@ static bool isConstantSplat(const uint64_t Bits128[2],
|
||||
|
||||
//! Lower a BUILD_VECTOR instruction creatively:
|
||||
SDValue
|
||||
SPU::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
|
||||
LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
|
||||
MVT VT = Op.getValueType();
|
||||
// If this is a vector of constants or undefs, get the bits. A bit in
|
||||
// UndefBits is set if the corresponding element of the vector is an
|
||||
@ -1588,7 +1592,7 @@ SPU::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
|
||||
abort();
|
||||
/*NOTREACHED*/
|
||||
case MVT::v4f32: {
|
||||
uint32_t Value32 = SplatBits;
|
||||
uint32_t Value32 = uint32_t(SplatBits);
|
||||
assert(SplatSize == 4
|
||||
&& "LowerBUILD_VECTOR: Unexpected floating point vector element.");
|
||||
// NOTE: pretend the constant is an integer. LLVM won't load FP constants
|
||||
@ -1598,7 +1602,7 @@ SPU::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
|
||||
break;
|
||||
}
|
||||
case MVT::v2f64: {
|
||||
uint64_t f64val = SplatBits;
|
||||
uint64_t f64val = uint64_t(SplatBits);
|
||||
assert(SplatSize == 8
|
||||
&& "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
|
||||
// NOTE: pretend the constant is an integer. LLVM won't load FP constants
|
||||
@ -1638,95 +1642,101 @@ SPU::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T);
|
||||
}
|
||||
case MVT::v2i64: {
|
||||
uint64_t val = SplatBits;
|
||||
uint32_t upper = uint32_t(val >> 32);
|
||||
uint32_t lower = uint32_t(val);
|
||||
|
||||
if (upper == lower) {
|
||||
// Magic constant that can be matched by IL, ILA, et. al.
|
||||
SDValue Val = DAG.getTargetConstant(val, MVT::i64);
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
|
||||
} else {
|
||||
SDValue LO32;
|
||||
SDValue HI32;
|
||||
SmallVector<SDValue, 16> ShufBytes;
|
||||
SDValue Result;
|
||||
bool upper_special, lower_special;
|
||||
|
||||
// NOTE: This code creates common-case shuffle masks that can be easily
|
||||
// detected as common expressions. It is not attempting to create highly
|
||||
// specialized masks to replace any and all 0's, 0xff's and 0x80's.
|
||||
|
||||
// Detect if the upper or lower half is a special shuffle mask pattern:
|
||||
upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000);
|
||||
lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000);
|
||||
|
||||
// Create lower vector if not a special pattern
|
||||
if (!lower_special) {
|
||||
SDValue LO32C = DAG.getConstant(lower, MVT::i32);
|
||||
LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
||||
LO32C, LO32C, LO32C, LO32C));
|
||||
}
|
||||
|
||||
// Create upper vector if not a special pattern
|
||||
if (!upper_special) {
|
||||
SDValue HI32C = DAG.getConstant(upper, MVT::i32);
|
||||
HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
||||
HI32C, HI32C, HI32C, HI32C));
|
||||
}
|
||||
|
||||
// If either upper or lower are special, then the two input operands are
|
||||
// the same (basically, one of them is a "don't care")
|
||||
if (lower_special)
|
||||
LO32 = HI32;
|
||||
if (upper_special)
|
||||
HI32 = LO32;
|
||||
if (lower_special && upper_special) {
|
||||
// Unhappy situation... both upper and lower are special, so punt with
|
||||
// a target constant:
|
||||
SDValue Zero = DAG.getConstant(0, MVT::i32);
|
||||
HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
|
||||
Zero, Zero);
|
||||
}
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
uint64_t val = 0;
|
||||
for (int j = 0; j < 4; ++j) {
|
||||
SDValue V;
|
||||
bool process_upper, process_lower;
|
||||
val <<= 8;
|
||||
process_upper = (upper_special && (i & 1) == 0);
|
||||
process_lower = (lower_special && (i & 1) == 1);
|
||||
|
||||
if (process_upper || process_lower) {
|
||||
if ((process_upper && upper == 0)
|
||||
|| (process_lower && lower == 0))
|
||||
val |= 0x80;
|
||||
else if ((process_upper && upper == 0xffffffff)
|
||||
|| (process_lower && lower == 0xffffffff))
|
||||
val |= 0xc0;
|
||||
else if ((process_upper && upper == 0x80000000)
|
||||
|| (process_lower && lower == 0x80000000))
|
||||
val |= (j == 0 ? 0xe0 : 0x80);
|
||||
} else
|
||||
val |= i * 4 + j + ((i & 1) * 16);
|
||||
}
|
||||
|
||||
ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
|
||||
}
|
||||
|
||||
return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
||||
&ShufBytes[0], ShufBytes.size()));
|
||||
}
|
||||
return SPU::LowerSplat_v2i64(VT, DAG, SplatBits);
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue
|
||||
SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal) {
|
||||
uint32_t upper = uint32_t(SplatVal >> 32);
|
||||
uint32_t lower = uint32_t(SplatVal);
|
||||
|
||||
if (upper == lower) {
|
||||
// Magic constant that can be matched by IL, ILA, et. al.
|
||||
SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
|
||||
return DAG.getNode(ISD::BIT_CONVERT, OpVT,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
||||
Val, Val, Val, Val));
|
||||
} else {
|
||||
SDValue LO32;
|
||||
SDValue HI32;
|
||||
SmallVector<SDValue, 16> ShufBytes;
|
||||
SDValue Result;
|
||||
bool upper_special, lower_special;
|
||||
|
||||
// NOTE: This code creates common-case shuffle masks that can be easily
|
||||
// detected as common expressions. It is not attempting to create highly
|
||||
// specialized masks to replace any and all 0's, 0xff's and 0x80's.
|
||||
|
||||
// Detect if the upper or lower half is a special shuffle mask pattern:
|
||||
upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
|
||||
lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
|
||||
|
||||
// Create lower vector if not a special pattern
|
||||
if (!lower_special) {
|
||||
SDValue LO32C = DAG.getConstant(lower, MVT::i32);
|
||||
LO32 = DAG.getNode(ISD::BIT_CONVERT, OpVT,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
||||
LO32C, LO32C, LO32C, LO32C));
|
||||
}
|
||||
|
||||
// Create upper vector if not a special pattern
|
||||
if (!upper_special) {
|
||||
SDValue HI32C = DAG.getConstant(upper, MVT::i32);
|
||||
HI32 = DAG.getNode(ISD::BIT_CONVERT, OpVT,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
||||
HI32C, HI32C, HI32C, HI32C));
|
||||
}
|
||||
|
||||
// If either upper or lower are special, then the two input operands are
|
||||
// the same (basically, one of them is a "don't care")
|
||||
if (lower_special)
|
||||
LO32 = HI32;
|
||||
if (upper_special)
|
||||
HI32 = LO32;
|
||||
if (lower_special && upper_special) {
|
||||
// Unhappy situation... both upper and lower are special, so punt with
|
||||
// a target constant:
|
||||
SDValue Zero = DAG.getConstant(0, MVT::i32);
|
||||
HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
|
||||
Zero, Zero);
|
||||
}
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
uint64_t val = 0;
|
||||
for (int j = 0; j < 4; ++j) {
|
||||
SDValue V;
|
||||
bool process_upper, process_lower;
|
||||
val <<= 8;
|
||||
process_upper = (upper_special && (i & 1) == 0);
|
||||
process_lower = (lower_special && (i & 1) == 1);
|
||||
|
||||
if (process_upper || process_lower) {
|
||||
if ((process_upper && upper == 0)
|
||||
|| (process_lower && lower == 0))
|
||||
val |= 0x80;
|
||||
else if ((process_upper && upper == 0xffffffff)
|
||||
|| (process_lower && lower == 0xffffffff))
|
||||
val |= 0xc0;
|
||||
else if ((process_upper && upper == 0x80000000)
|
||||
|| (process_lower && lower == 0x80000000))
|
||||
val |= (j == 0 ? 0xe0 : 0x80);
|
||||
} else
|
||||
val |= i * 4 + j + ((i & 1) * 16);
|
||||
}
|
||||
|
||||
ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
|
||||
}
|
||||
|
||||
return DAG.getNode(SPUISD::SHUFB, OpVT, HI32, LO32,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
||||
&ShufBytes[0], ShufBytes.size()));
|
||||
}
|
||||
}
|
||||
|
||||
/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
|
||||
/// which the Cell can operate. The code inspects V3 to ascertain whether the
|
||||
/// permutation vector, V3, is monotonically increasing with one "exception"
|
||||
@ -2384,81 +2394,180 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
//! Lower ISD::FABS
|
||||
//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
|
||||
/*!
|
||||
DAGCombine does the same basic reduction: convert the double to i64 and mask
|
||||
off the sign bit. Unfortunately, DAGCombine inserts the i64 constant, which
|
||||
CellSPU has to legalize. Hence, the custom lowering.
|
||||
f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
|
||||
All conversions to i64 are expanded to a libcall.
|
||||
*/
|
||||
|
||||
static SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) {
|
||||
static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
|
||||
SPUTargetLowering &TLI) {
|
||||
MVT OpVT = Op.getValueType();
|
||||
MVT IntVT(MVT::i64);
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
MVT Op0VT = Op0.getValueType();
|
||||
|
||||
assert(OpVT == MVT::f64 && "LowerFABS: expecting MVT::f64!\n");
|
||||
if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
|
||||
|| OpVT == MVT::i64) {
|
||||
// Convert f32 / f64 to i32 / i64 via libcall.
|
||||
RTLIB::Libcall LC =
|
||||
(Op.getOpcode() == ISD::FP_TO_SINT)
|
||||
? RTLIB::getFPTOSINT(Op0VT, OpVT)
|
||||
: RTLIB::getFPTOUINT(Op0VT, OpVT);
|
||||
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
|
||||
SDValue Dummy;
|
||||
return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
|
||||
}
|
||||
|
||||
SDValue iABS =
|
||||
DAG.getNode(ISD::AND, IntVT,
|
||||
DAG.getNode(ISD::BIT_CONVERT, IntVT, Op0),
|
||||
DAG.getConstant(~IntVT.getIntegerVTSignBit(), IntVT));
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
return DAG.getNode(ISD::BIT_CONVERT, MVT::f64, iABS);
|
||||
//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
|
||||
/*!
|
||||
i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
|
||||
All conversions from i64 are expanded to a libcall.
|
||||
*/
|
||||
static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
|
||||
SPUTargetLowering &TLI) {
|
||||
MVT OpVT = Op.getValueType();
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
MVT Op0VT = Op0.getValueType();
|
||||
|
||||
if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
|
||||
|| Op0VT == MVT::i64) {
|
||||
// Convert i32, i64 to f64 via libcall:
|
||||
RTLIB::Libcall LC =
|
||||
(Op.getOpcode() == ISD::SINT_TO_FP)
|
||||
? RTLIB::getSINTTOFP(Op0VT, OpVT)
|
||||
: RTLIB::getUINTTOFP(Op0VT, OpVT);
|
||||
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
|
||||
SDValue Dummy;
|
||||
return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
//! Lower ISD::SETCC
|
||||
/*!
|
||||
This handles MVT::f64 (double floating point) condition lowering
|
||||
*/
|
||||
|
||||
static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
|
||||
const TargetLowering &TLI) {
|
||||
CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
|
||||
assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
|
||||
|
||||
SDValue lhs = Op.getOperand(0);
|
||||
SDValue rhs = Op.getOperand(1);
|
||||
CondCodeSDNode *CC = dyn_cast<CondCodeSDNode > (Op.getOperand(2));
|
||||
MVT lhsVT = lhs.getValueType();
|
||||
SDValue posNaN = DAG.getConstant(0x7ff0000000000001ULL, MVT::i64);
|
||||
|
||||
assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
|
||||
assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
|
||||
|
||||
MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
|
||||
APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
|
||||
MVT IntVT(MVT::i64);
|
||||
|
||||
// Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
|
||||
// selected to a NOP:
|
||||
SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, IntVT, lhs);
|
||||
SDValue lhsHi32 =
|
||||
DAG.getNode(ISD::TRUNCATE, MVT::i32,
|
||||
DAG.getNode(ISD::SRL, IntVT,
|
||||
i64lhs, DAG.getConstant(32, MVT::i32)));
|
||||
SDValue lhsHi32abs =
|
||||
DAG.getNode(ISD::AND, MVT::i32,
|
||||
lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
|
||||
SDValue lhsLo32 =
|
||||
DAG.getNode(ISD::TRUNCATE, MVT::i32, i64lhs);
|
||||
|
||||
// SETO and SETUO only use the lhs operand:
|
||||
if (CC->get() == ISD::SETO) {
|
||||
// Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
|
||||
// SETUO
|
||||
APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
|
||||
return DAG.getNode(ISD::XOR, ccResultVT,
|
||||
DAG.getSetCC(ccResultVT,
|
||||
lhs, DAG.getConstantFP(0.0, lhsVT),
|
||||
ISD::SETUO),
|
||||
DAG.getConstant(ccResultAllOnes, ccResultVT));
|
||||
} else if (CC->get() == ISD::SETUO) {
|
||||
// Evaluates to true if Op0 is [SQ]NaN
|
||||
return DAG.getNode(ISD::AND, ccResultVT,
|
||||
DAG.getSetCC(ccResultVT,
|
||||
lhsHi32abs,
|
||||
DAG.getConstant(0x7ff00000, MVT::i32),
|
||||
ISD::SETGE),
|
||||
DAG.getSetCC(ccResultVT,
|
||||
lhsLo32,
|
||||
DAG.getConstant(0, MVT::i32),
|
||||
ISD::SETGT));
|
||||
}
|
||||
|
||||
SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, IntVT, rhs);
|
||||
SDValue rhsHi32 =
|
||||
DAG.getNode(ISD::TRUNCATE, MVT::i32,
|
||||
DAG.getNode(ISD::SRL, IntVT,
|
||||
i64rhs, DAG.getConstant(32, MVT::i32)));
|
||||
|
||||
// If a value is negative, subtract from the sign magnitude constant:
|
||||
SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
|
||||
|
||||
// Convert the sign-magnitude representation into 2's complement:
|
||||
SDValue lhsSelectMask = DAG.getNode(ISD::SRA, ccResultVT,
|
||||
lhsHi32, DAG.getConstant(31, MVT::i32));
|
||||
SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, IntVT, signMag2TC, i64lhs);
|
||||
SDValue lhsSelect =
|
||||
DAG.getNode(ISD::SELECT, IntVT,
|
||||
lhsSelectMask, lhsSignMag2TC, i64lhs);
|
||||
|
||||
SDValue rhsSelectMask = DAG.getNode(ISD::SRA, ccResultVT,
|
||||
rhsHi32, DAG.getConstant(31, MVT::i32));
|
||||
SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, IntVT, signMag2TC, i64rhs);
|
||||
SDValue rhsSelect =
|
||||
DAG.getNode(ISD::SELECT, IntVT,
|
||||
rhsSelectMask, rhsSignMag2TC, i64rhs);
|
||||
|
||||
unsigned compareOp;
|
||||
|
||||
switch (CC->get()) {
|
||||
case ISD::SETOEQ:
|
||||
case ISD::SETOGT:
|
||||
case ISD::SETOGE:
|
||||
case ISD::SETOLT:
|
||||
case ISD::SETOLE:
|
||||
case ISD::SETONE:
|
||||
cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
|
||||
abort();
|
||||
break;
|
||||
case ISD::SETO: {
|
||||
SDValue lhsfabs = DAG.getNode(ISD::FABS, MVT::f64, lhs);
|
||||
SDValue i64lhs =
|
||||
DAG.getNode(ISD::BIT_CONVERT, MVT::i64, lhsfabs);
|
||||
|
||||
return DAG.getSetCC(MVT::i32, i64lhs, posNaN, ISD::SETLT);
|
||||
}
|
||||
case ISD::SETUO: {
|
||||
SDValue lhsfabs = DAG.getNode(ISD::FABS, MVT::f64, lhs);
|
||||
SDValue i64lhs =
|
||||
DAG.getNode(ISD::BIT_CONVERT, MVT::i64, lhsfabs);
|
||||
|
||||
return DAG.getSetCC(MVT::i32, i64lhs, posNaN, ISD::SETGE);
|
||||
}
|
||||
case ISD::SETUEQ:
|
||||
compareOp = ISD::SETEQ; break;
|
||||
case ISD::SETOGT:
|
||||
case ISD::SETUGT:
|
||||
compareOp = ISD::SETGT; break;
|
||||
case ISD::SETOGE:
|
||||
case ISD::SETUGE:
|
||||
compareOp = ISD::SETGE; break;
|
||||
case ISD::SETOLT:
|
||||
case ISD::SETULT:
|
||||
compareOp = ISD::SETLT; break;
|
||||
case ISD::SETOLE:
|
||||
case ISD::SETULE:
|
||||
compareOp = ISD::SETLE; break;
|
||||
case ISD::SETUNE:
|
||||
case ISD::SETONE:
|
||||
compareOp = ISD::SETNE; break;
|
||||
default:
|
||||
cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
|
||||
abort();
|
||||
break;
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
SDValue result =
|
||||
DAG.getSetCC(ccResultVT, lhsSelect, rhsSelect, (ISD::CondCode) compareOp);
|
||||
|
||||
if ((CC->get() & 0x8) == 0) {
|
||||
// Ordered comparison:
|
||||
SDValue lhsNaN = DAG.getSetCC(ccResultVT,
|
||||
lhs, DAG.getConstantFP(0.0, MVT::f64),
|
||||
ISD::SETO);
|
||||
SDValue rhsNaN = DAG.getSetCC(ccResultVT,
|
||||
rhs, DAG.getConstantFP(0.0, MVT::f64),
|
||||
ISD::SETO);
|
||||
SDValue ordered = DAG.getNode(ISD::AND, ccResultVT, lhsNaN, rhsNaN);
|
||||
|
||||
result = DAG.getNode(ISD::AND, ccResultVT, ordered, result);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
//! Lower ISD::SELECT_CC
|
||||
@ -2566,8 +2675,6 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
|
||||
return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
|
||||
case ISD::JumpTable:
|
||||
return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
|
||||
case ISD::Constant:
|
||||
return LowerConstant(Op, DAG);
|
||||
case ISD::ConstantFP:
|
||||
return LowerConstantFP(Op, DAG);
|
||||
case ISD::FORMAL_ARGUMENTS:
|
||||
@ -2590,12 +2697,17 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
|
||||
break;
|
||||
}
|
||||
|
||||
case ISD::FABS:
|
||||
return LowerFABS(Op, DAG);
|
||||
case ISD::FP_TO_SINT:
|
||||
case ISD::FP_TO_UINT:
|
||||
return LowerFP_TO_INT(Op, DAG, *this);
|
||||
|
||||
case ISD::SINT_TO_FP:
|
||||
case ISD::UINT_TO_FP:
|
||||
return LowerINT_TO_FP(Op, DAG, *this);
|
||||
|
||||
// Vector-related lowering.
|
||||
case ISD::BUILD_VECTOR:
|
||||
return SPU::LowerBUILD_VECTOR(Op, DAG);
|
||||
return LowerBUILD_VECTOR(Op, DAG);
|
||||
case ISD::SCALAR_TO_VECTOR:
|
||||
return LowerSCALAR_TO_VECTOR(Op, DAG);
|
||||
case ISD::VECTOR_SHUFFLE:
|
||||
|
@ -61,7 +61,7 @@ namespace llvm {
|
||||
};
|
||||
}
|
||||
|
||||
//! Utility functions specific to CellSPU-only:
|
||||
//! Utility functions specific to CellSPU:
|
||||
namespace SPU {
|
||||
SDValue get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
|
||||
MVT ValueType);
|
||||
@ -78,7 +78,7 @@ namespace llvm {
|
||||
|
||||
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG,
|
||||
const SPUTargetMachine &TM);
|
||||
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG);
|
||||
SDValue LowerSplat_v2i64(MVT OpVT, SelectionDAG &DAG, uint64_t splat);
|
||||
|
||||
SDValue getBorrowGenerateShufMask(SelectionDAG &DAG);
|
||||
SDValue getCarryGenerateShufMask(SelectionDAG &DAG);
|
||||
|
@ -155,13 +155,13 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
|
||||
case SPU::ORr8_r32:
|
||||
case SPU::ORr32_r16:
|
||||
case SPU::ORr32_r8:
|
||||
case SPU::ORr32_r64:
|
||||
case SPU::ORr16_r64:
|
||||
case SPU::ORr8_r64:
|
||||
case SPU::ORr64_r32:
|
||||
case SPU::ORr64_r16:
|
||||
case SPU::ORr64_r8:
|
||||
*/
|
||||
case SPU::ORr64_r32:
|
||||
case SPU::ORr32_r64:
|
||||
case SPU::ORf32_r32:
|
||||
case SPU::ORr32_f32:
|
||||
case SPU::ORf64_r64:
|
||||
|
@ -1259,6 +1259,9 @@ multiclass BitwiseAnd
|
||||
def fabs32: ANDInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
|
||||
[/* Intentionally does not match a pattern */]>;
|
||||
|
||||
def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, VECREG:$rB),
|
||||
[/* Intentionally does not match a pattern */]>;
|
||||
|
||||
// Could use v4i32, but won't for clarity
|
||||
def fabsvec: ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
|
||||
[/* Intentionally does not match a pattern */]>;
|
||||
@ -1525,17 +1528,17 @@ multiclass BitwiseOr
|
||||
// Conversion from R32C to register
|
||||
def r32_r16: ORCvtFormR32Reg<R16C>;
|
||||
def r32_r8: ORCvtFormR32Reg<R8C>;
|
||||
|
||||
// Conversion from register to R64C:
|
||||
def r32_r64: ORCvtFormR64Reg<R32C>;
|
||||
def r16_r64: ORCvtFormR64Reg<R16C>;
|
||||
def r8_r64: ORCvtFormR64Reg<R8C>;
|
||||
|
||||
// Conversion from R64C to register
|
||||
def r64_r32: ORCvtFormRegR64<R32C>;
|
||||
def r64_r16: ORCvtFormRegR64<R16C>;
|
||||
def r64_r8: ORCvtFormRegR64<R8C>;
|
||||
*/
|
||||
|
||||
// Conversion to register from R64C:
|
||||
def r32_r64: ORCvtFormR64Reg<R32C>;
|
||||
// def r16_r64: ORCvtFormR64Reg<R16C>;
|
||||
// def r8_r64: ORCvtFormR64Reg<R8C>;
|
||||
|
||||
// Conversion to R64C from register
|
||||
def r64_r32: ORCvtFormRegR64<R32C>;
|
||||
// def r64_r16: ORCvtFormRegR64<R16C>;
|
||||
// def r64_r8: ORCvtFormRegR64<R8C>;
|
||||
|
||||
// bitconvert patterns:
|
||||
def r32_f32: ORCvtFormR32Reg<R32FP,
|
||||
@ -1910,11 +1913,11 @@ class SELBInst<dag OOL, dag IOL, list<dag> pattern>:
|
||||
RRRForm<0b1000, OOL, IOL, "selb\t$rT, $rA, $rB, $rC",
|
||||
IntegerOp, pattern>;
|
||||
|
||||
class SELBVecInst<ValueType vectype>:
|
||||
class SELBVecInst<ValueType vectype, PatFrag vnot_frag = vnot>:
|
||||
SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
|
||||
[(set (vectype VECREG:$rT),
|
||||
(or (and (vectype VECREG:$rC), (vectype VECREG:$rB)),
|
||||
(and (vnot (vectype VECREG:$rC)),
|
||||
(and (vnot_frag (vectype VECREG:$rC)),
|
||||
(vectype VECREG:$rA))))]>;
|
||||
|
||||
class SELBVecVCondInst<ValueType vectype>:
|
||||
@ -1947,7 +1950,7 @@ multiclass SelectBits
|
||||
def v16i8: SELBVecInst<v16i8>;
|
||||
def v8i16: SELBVecInst<v8i16>;
|
||||
def v4i32: SELBVecInst<v4i32>;
|
||||
def v2i64: SELBVecInst<v2i64>;
|
||||
def v2i64: SELBVecInst<v2i64, vnot_conv>;
|
||||
|
||||
def r128: SELBRegInst<GPRC>;
|
||||
def r64: SELBRegInst<R64C>;
|
||||
@ -4321,6 +4324,13 @@ def : Pat<(fabs (v4f32 VECREG:$rA)),
|
||||
(ANDfabsvec (v4f32 VECREG:$rA),
|
||||
(v4f32 (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f)))>;
|
||||
|
||||
def : Pat<(fabs R64FP:$rA),
|
||||
(ANDfabs64 R64FP:$rA, (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f))>;
|
||||
|
||||
def : Pat<(fabs (v2f64 VECREG:$rA)),
|
||||
(ANDfabsvec (v2f64 VECREG:$rA),
|
||||
(v2f64 (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f)))>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Hint for branch instructions:
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1,22 +1,23 @@
|
||||
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
|
||||
; RUN: grep fceq %t1.s | count 1
|
||||
; RUN: grep fcmeq %t1.s | count 1
|
||||
;
|
||||
; This file includes standard floating point arithmetic instructions
|
||||
|
||||
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
|
||||
target triple = "spu"
|
||||
|
||||
; Exercise the floating point comparison operators for f32:
|
||||
|
||||
declare double @fabs(double)
|
||||
declare float @fabsf(float)
|
||||
|
||||
define i1 @fcmp_eq(float %arg1, float %arg2) {
|
||||
%A = fcmp oeq float %arg1, %arg2 ; <float> [#uses=1]
|
||||
%A = fcmp oeq float %arg1, %arg2
|
||||
ret i1 %A
|
||||
}
|
||||
|
||||
define i1 @fcmp_mag_eq(float %arg1, float %arg2) {
|
||||
%A = call float @fabsf(float %arg1) ; <float> [#uses=1]
|
||||
%B = call float @fabsf(float %arg2) ; <float> [#uses=1]
|
||||
%C = fcmp oeq float %A, %B ; <float> [#uses=1]
|
||||
ret i1 %C
|
||||
%1 = call float @fabsf(float %arg1)
|
||||
%2 = call float @fabsf(float %arg2)
|
||||
%3 = fcmp oeq float %1, %2
|
||||
ret i1 %3
|
||||
}
|
7
test/CodeGen/CellSPU/fcmp64.ll
Normal file
7
test/CodeGen/CellSPU/fcmp64.ll
Normal file
@ -0,0 +1,7 @@
|
||||
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
|
||||
|
||||
define i1 @fcmp_eq_setcc_f64(double %arg1, double %arg2) nounwind {
|
||||
entry:
|
||||
%A = fcmp oeq double %arg1, %arg2
|
||||
ret i1 %A
|
||||
}
|
@ -1,9 +1,10 @@
|
||||
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
|
||||
; RUN: grep fsmbi %t1.s | count 2
|
||||
; RUN: grep fsmbi %t1.s | count 3
|
||||
; RUN: grep 32768 %t1.s | count 2
|
||||
; RUN: grep xor %t1.s | count 4
|
||||
; RUN: grep and %t1.s | count 4
|
||||
; RUN: grep andbi %t1.s | count 2
|
||||
; RUN: grep and %t1.s | count 5
|
||||
; RUN: grep andbi %t1.s | count 3
|
||||
|
||||
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
|
||||
target triple = "spu"
|
||||
|
||||
@ -33,11 +34,11 @@ declare double @fabs(double)
|
||||
declare float @fabsf(float)
|
||||
|
||||
define double @fabs_dp(double %X) {
|
||||
%Y = call double @fabs( double %X ) ; <double> [#uses=1]
|
||||
%Y = call double @fabs( double %X )
|
||||
ret double %Y
|
||||
}
|
||||
|
||||
define float @fabs_sp(float %X) {
|
||||
%Y = call float @fabsf( float %X ) ; <float> [#uses=1]
|
||||
%Y = call float @fabsf( float %X )
|
||||
ret float %Y
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
|
||||
; RUN: grep selb %t1.s | count 280
|
||||
; RUN: grep selb %t1.s | count 56
|
||||
|
||||
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
|
||||
target triple = "spu"
|
||||
@ -9,7 +9,7 @@ target triple = "spu"
|
||||
;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||
|
||||
; (or (and rC, rB), (and (not rC), rA))
|
||||
define <2 x i64> @selb_v2i64_01(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
|
||||
define <2 x i64> @selectbits_v2i64_01(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
|
||||
%C = and <2 x i64> %rC, %rB
|
||||
%A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
|
||||
%B = and <2 x i64> %A, %rA
|
||||
@ -18,7 +18,7 @@ define <2 x i64> @selb_v2i64_01(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
|
||||
}
|
||||
|
||||
; (or (and rB, rC), (and (not rC), rA))
|
||||
define <2 x i64> @selb_v2i64_02(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
|
||||
define <2 x i64> @selectbits_v2i64_02(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
|
||||
%C = and <2 x i64> %rB, %rC
|
||||
%A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
|
||||
%B = and <2 x i64> %A, %rA
|
||||
@ -27,7 +27,7 @@ define <2 x i64> @selb_v2i64_02(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
|
||||
}
|
||||
|
||||
; (or (and (not rC), rA), (and rB, rC))
|
||||
define <2 x i64> @selb_v2i64_03(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
|
||||
define <2 x i64> @selectbits_v2i64_03(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
|
||||
%A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
|
||||
%B = and <2 x i64> %A, %rA
|
||||
%C = and <2 x i64> %rB, %rC
|
||||
@ -36,7 +36,7 @@ define <2 x i64> @selb_v2i64_03(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
|
||||
}
|
||||
|
||||
; (or (and (not rC), rA), (and rC, rB))
|
||||
define <2 x i64> @selb_v2i64_04(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
|
||||
define <2 x i64> @selectbits_v2i64_04(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
|
||||
%A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
|
||||
%B = and <2 x i64> %A, %rA
|
||||
%C = and <2 x i64> %rC, %rB
|
||||
@ -45,7 +45,7 @@ define <2 x i64> @selb_v2i64_04(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
|
||||
}
|
||||
|
||||
; (or (and rC, rB), (and rA, (not rC)))
|
||||
define <2 x i64> @selb_v2i64_05(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
|
||||
define <2 x i64> @selectbits_v2i64_05(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
|
||||
%C = and <2 x i64> %rC, %rB
|
||||
%A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
|
||||
%B = and <2 x i64> %rA, %A
|
||||
@ -54,7 +54,7 @@ define <2 x i64> @selb_v2i64_05(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
|
||||
}
|
||||
|
||||
; (or (and rB, rC), (and rA, (not rC)))
|
||||
define <2 x i64> @selb_v2i64_06(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
|
||||
define <2 x i64> @selectbits_v2i64_06(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
|
||||
%C = and <2 x i64> %rB, %rC
|
||||
%A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
|
||||
%B = and <2 x i64> %rA, %A
|
||||
@ -63,7 +63,7 @@ define <2 x i64> @selb_v2i64_06(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
|
||||
}
|
||||
|
||||
; (or (and rA, (not rC)), (and rB, rC))
|
||||
define <2 x i64> @selb_v2i64_07(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
|
||||
define <2 x i64> @selectbits_v2i64_07(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
|
||||
%A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
|
||||
%B = and <2 x i64> %rA, %A
|
||||
%C = and <2 x i64> %rB, %rC
|
||||
@ -72,7 +72,7 @@ define <2 x i64> @selb_v2i64_07(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
|
||||
}
|
||||
|
||||
; (or (and rA, (not rC)), (and rC, rB))
|
||||
define <2 x i64> @selb_v2i64_08(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
|
||||
define <2 x i64> @selectbits_v2i64_08(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
|
||||
%A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
|
||||
%B = and <2 x i64> %rA, %A
|
||||
%C = and <2 x i64> %rC, %rB
|
||||
@ -85,7 +85,7 @@ define <2 x i64> @selb_v2i64_08(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
|
||||
;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||
|
||||
; (or (and rC, rB), (and (not rC), rA))
|
||||
define <4 x i32> @selb_v4i32_01(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
|
||||
define <4 x i32> @selectbits_v4i32_01(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
|
||||
%C = and <4 x i32> %rC, %rB
|
||||
%A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 >
|
||||
%B = and <4 x i32> %A, %rA
|
||||
@ -94,7 +94,7 @@ define <4 x i32> @selb_v4i32_01(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
|
||||
}
|
||||
|
||||
; (or (and rB, rC), (and (not rC), rA))
|
||||
define <4 x i32> @selb_v4i32_02(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
|
||||
define <4 x i32> @selectbits_v4i32_02(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
|
||||
%C = and <4 x i32> %rB, %rC
|
||||
%A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 >
|
||||
%B = and <4 x i32> %A, %rA
|
||||
@ -103,7 +103,7 @@ define <4 x i32> @selb_v4i32_02(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
|
||||
}
|
||||
|
||||
; (or (and (not rC), rA), (and rB, rC))
|
||||
define <4 x i32> @selb_v4i32_03(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
|
||||
define <4 x i32> @selectbits_v4i32_03(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
|
||||
%A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 >
|
||||
%B = and <4 x i32> %A, %rA
|
||||
%C = and <4 x i32> %rB, %rC
|
||||
@ -112,7 +112,7 @@ define <4 x i32> @selb_v4i32_03(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
|
||||
}
|
||||
|
||||
; (or (and (not rC), rA), (and rC, rB))
|
||||
define <4 x i32> @selb_v4i32_04(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
|
||||
define <4 x i32> @selectbits_v4i32_04(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
|
||||
%A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%B = and <4 x i32> %A, %rA
|
||||
%C = and <4 x i32> %rC, %rB
|
||||
@ -121,7 +121,7 @@ define <4 x i32> @selb_v4i32_04(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
|
||||
}
|
||||
|
||||
; (or (and rC, rB), (and rA, (not rC)))
|
||||
define <4 x i32> @selb_v4i32_05(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
|
||||
define <4 x i32> @selectbits_v4i32_05(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
|
||||
%C = and <4 x i32> %rC, %rB
|
||||
%A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%B = and <4 x i32> %rA, %A
|
||||
@ -130,7 +130,7 @@ define <4 x i32> @selb_v4i32_05(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
|
||||
}
|
||||
|
||||
; (or (and rB, rC), (and rA, (not rC)))
|
||||
define <4 x i32> @selb_v4i32_06(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
|
||||
define <4 x i32> @selectbits_v4i32_06(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
|
||||
%C = and <4 x i32> %rB, %rC
|
||||
%A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%B = and <4 x i32> %rA, %A
|
||||
@ -139,7 +139,7 @@ define <4 x i32> @selb_v4i32_06(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
|
||||
}
|
||||
|
||||
; (or (and rA, (not rC)), (and rB, rC))
|
||||
define <4 x i32> @selb_v4i32_07(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
|
||||
define <4 x i32> @selectbits_v4i32_07(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
|
||||
%A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%B = and <4 x i32> %rA, %A
|
||||
%C = and <4 x i32> %rB, %rC
|
||||
@ -148,7 +148,7 @@ define <4 x i32> @selb_v4i32_07(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
|
||||
}
|
||||
|
||||
; (or (and rA, (not rC)), (and rC, rB))
|
||||
define <4 x i32> @selb_v4i32_08(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
|
||||
define <4 x i32> @selectbits_v4i32_08(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
|
||||
%A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%B = and <4 x i32> %rA, %A
|
||||
%C = and <4 x i32> %rC, %rB
|
||||
@ -161,7 +161,7 @@ define <4 x i32> @selb_v4i32_08(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
|
||||
;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||
|
||||
; (or (and rC, rB), (and (not rC), rA))
|
||||
define <8 x i16> @selb_v8i16_01(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
|
||||
define <8 x i16> @selectbits_v8i16_01(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
|
||||
%C = and <8 x i16> %rC, %rB
|
||||
%A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
|
||||
i16 -1, i16 -1, i16 -1, i16 -1 >
|
||||
@ -171,7 +171,7 @@ define <8 x i16> @selb_v8i16_01(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
|
||||
}
|
||||
|
||||
; (or (and rB, rC), (and (not rC), rA))
|
||||
define <8 x i16> @selb_v8i16_02(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
|
||||
define <8 x i16> @selectbits_v8i16_02(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
|
||||
%C = and <8 x i16> %rB, %rC
|
||||
%A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
|
||||
i16 -1, i16 -1, i16 -1, i16 -1 >
|
||||
@ -181,7 +181,7 @@ define <8 x i16> @selb_v8i16_02(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
|
||||
}
|
||||
|
||||
; (or (and (not rC), rA), (and rB, rC))
|
||||
define <8 x i16> @selb_v8i16_03(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
|
||||
define <8 x i16> @selectbits_v8i16_03(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
|
||||
%A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
|
||||
i16 -1, i16 -1, i16 -1, i16 -1 >
|
||||
%B = and <8 x i16> %A, %rA
|
||||
@ -191,7 +191,7 @@ define <8 x i16> @selb_v8i16_03(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
|
||||
}
|
||||
|
||||
; (or (and (not rC), rA), (and rC, rB))
|
||||
define <8 x i16> @selb_v8i16_04(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
|
||||
define <8 x i16> @selectbits_v8i16_04(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
|
||||
%A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
|
||||
i16 -1, i16 -1, i16 -1, i16 -1 >
|
||||
%B = and <8 x i16> %A, %rA
|
||||
@ -201,7 +201,7 @@ define <8 x i16> @selb_v8i16_04(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
|
||||
}
|
||||
|
||||
; (or (and rC, rB), (and rA, (not rC)))
|
||||
define <8 x i16> @selb_v8i16_05(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
|
||||
define <8 x i16> @selectbits_v8i16_05(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
|
||||
%C = and <8 x i16> %rC, %rB
|
||||
%A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
|
||||
i16 -1, i16 -1, i16 -1, i16 -1 >
|
||||
@ -211,7 +211,7 @@ define <8 x i16> @selb_v8i16_05(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
|
||||
}
|
||||
|
||||
; (or (and rB, rC), (and rA, (not rC)))
|
||||
define <8 x i16> @selb_v8i16_06(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
|
||||
define <8 x i16> @selectbits_v8i16_06(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
|
||||
%C = and <8 x i16> %rB, %rC
|
||||
%A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
|
||||
i16 -1, i16 -1, i16 -1, i16 -1 >
|
||||
@ -221,7 +221,7 @@ define <8 x i16> @selb_v8i16_06(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
|
||||
}
|
||||
|
||||
; (or (and rA, (not rC)), (and rB, rC))
|
||||
define <8 x i16> @selb_v8i16_07(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
|
||||
define <8 x i16> @selectbits_v8i16_07(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
|
||||
%A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
|
||||
i16 -1, i16 -1, i16 -1, i16 -1 >
|
||||
%B = and <8 x i16> %rA, %A
|
||||
@ -231,7 +231,7 @@ define <8 x i16> @selb_v8i16_07(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
|
||||
}
|
||||
|
||||
; (or (and rA, (not rC)), (and rC, rB))
|
||||
define <8 x i16> @selb_v8i16_08(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
|
||||
define <8 x i16> @selectbits_v8i16_08(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
|
||||
%A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
|
||||
i16 -1, i16 -1, i16 -1, i16 -1 >
|
||||
%B = and <8 x i16> %rA, %A
|
||||
@ -245,7 +245,7 @@ define <8 x i16> @selb_v8i16_08(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
|
||||
;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||
|
||||
; (or (and rC, rB), (and (not rC), rA))
|
||||
define <16 x i8> @selb_v16i8_01(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
|
||||
define <16 x i8> @selectbits_v16i8_01(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
|
||||
%C = and <16 x i8> %rC, %rB
|
||||
%A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
|
||||
i8 -1, i8 -1, i8 -1, i8 -1,
|
||||
@ -257,7 +257,7 @@ define <16 x i8> @selb_v16i8_01(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
|
||||
}
|
||||
|
||||
; (or (and rB, rC), (and (not rC), rA))
|
||||
define <16 x i8> @selb_v16i8_02(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
|
||||
define <16 x i8> @selectbits_v16i8_02(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
|
||||
%C = and <16 x i8> %rB, %rC
|
||||
%A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
|
||||
i8 -1, i8 -1, i8 -1, i8 -1,
|
||||
@ -269,7 +269,7 @@ define <16 x i8> @selb_v16i8_02(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
|
||||
}
|
||||
|
||||
; (or (and (not rC), rA), (and rB, rC))
|
||||
define <16 x i8> @selb_v16i8_03(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
|
||||
define <16 x i8> @selectbits_v16i8_03(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
|
||||
%A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
|
||||
i8 -1, i8 -1, i8 -1, i8 -1,
|
||||
i8 -1, i8 -1, i8 -1, i8 -1,
|
||||
@ -281,7 +281,7 @@ define <16 x i8> @selb_v16i8_03(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
|
||||
}
|
||||
|
||||
; (or (and (not rC), rA), (and rC, rB))
|
||||
define <16 x i8> @selb_v16i8_04(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
|
||||
define <16 x i8> @selectbits_v16i8_04(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
|
||||
%A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
|
||||
i8 -1, i8 -1, i8 -1, i8 -1,
|
||||
i8 -1, i8 -1, i8 -1, i8 -1,
|
||||
@ -293,7 +293,7 @@ define <16 x i8> @selb_v16i8_04(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
|
||||
}
|
||||
|
||||
; (or (and rC, rB), (and rA, (not rC)))
|
||||
define <16 x i8> @selb_v16i8_05(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
|
||||
define <16 x i8> @selectbits_v16i8_05(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
|
||||
%C = and <16 x i8> %rC, %rB
|
||||
%A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
|
||||
i8 -1, i8 -1, i8 -1, i8 -1,
|
||||
@ -305,7 +305,7 @@ define <16 x i8> @selb_v16i8_05(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
|
||||
}
|
||||
|
||||
; (or (and rB, rC), (and rA, (not rC)))
|
||||
define <16 x i8> @selb_v16i8_06(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
|
||||
define <16 x i8> @selectbits_v16i8_06(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
|
||||
%C = and <16 x i8> %rB, %rC
|
||||
%A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
|
||||
i8 -1, i8 -1, i8 -1, i8 -1,
|
||||
@ -317,7 +317,7 @@ define <16 x i8> @selb_v16i8_06(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
|
||||
}
|
||||
|
||||
; (or (and rA, (not rC)), (and rB, rC))
|
||||
define <16 x i8> @selb_v16i8_07(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
|
||||
define <16 x i8> @selectbits_v16i8_07(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
|
||||
%A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
|
||||
i8 -1, i8 -1, i8 -1, i8 -1,
|
||||
i8 -1, i8 -1, i8 -1, i8 -1,
|
||||
@ -329,7 +329,7 @@ define <16 x i8> @selb_v16i8_07(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
|
||||
}
|
||||
|
||||
; (or (and rA, (not rC)), (and rC, rB))
|
||||
define <16 x i8> @selb_v16i8_08(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
|
||||
define <16 x i8> @selectbits_v16i8_08(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
|
||||
%A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
|
||||
i8 -1, i8 -1, i8 -1, i8 -1,
|
||||
i8 -1, i8 -1, i8 -1, i8 -1,
|
||||
@ -345,7 +345,7 @@ define <16 x i8> @selb_v16i8_08(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
|
||||
;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||
|
||||
; (or (and rC, rB), (and (not rC), rA))
|
||||
define i32 @selb_i32_01(i32 %rA, i32 %rB, i32 %rC) {
|
||||
define i32 @selectbits_i32_01(i32 %rA, i32 %rB, i32 %rC) {
|
||||
%C = and i32 %rC, %rB
|
||||
%A = xor i32 %rC, -1
|
||||
%B = and i32 %A, %rA
|
||||
@ -354,7 +354,7 @@ define i32 @selb_i32_01(i32 %rA, i32 %rB, i32 %rC) {
|
||||
}
|
||||
|
||||
; (or (and rB, rC), (and (not rC), rA))
|
||||
define i32 @selb_i32_02(i32 %rA, i32 %rB, i32 %rC) {
|
||||
define i32 @selectbits_i32_02(i32 %rA, i32 %rB, i32 %rC) {
|
||||
%C = and i32 %rB, %rC
|
||||
%A = xor i32 %rC, -1
|
||||
%B = and i32 %A, %rA
|
||||
@ -363,7 +363,7 @@ define i32 @selb_i32_02(i32 %rA, i32 %rB, i32 %rC) {
|
||||
}
|
||||
|
||||
; (or (and (not rC), rA), (and rB, rC))
|
||||
define i32 @selb_i32_03(i32 %rA, i32 %rB, i32 %rC) {
|
||||
define i32 @selectbits_i32_03(i32 %rA, i32 %rB, i32 %rC) {
|
||||
%A = xor i32 %rC, -1
|
||||
%B = and i32 %A, %rA
|
||||
%C = and i32 %rB, %rC
|
||||
@ -372,7 +372,7 @@ define i32 @selb_i32_03(i32 %rA, i32 %rB, i32 %rC) {
|
||||
}
|
||||
|
||||
; (or (and (not rC), rA), (and rC, rB))
|
||||
define i32 @selb_i32_04(i32 %rA, i32 %rB, i32 %rC) {
|
||||
define i32 @selectbits_i32_04(i32 %rA, i32 %rB, i32 %rC) {
|
||||
%A = xor i32 %rC, -1
|
||||
%B = and i32 %A, %rA
|
||||
%C = and i32 %rC, %rB
|
||||
@ -381,7 +381,7 @@ define i32 @selb_i32_04(i32 %rA, i32 %rB, i32 %rC) {
|
||||
}
|
||||
|
||||
; (or (and rC, rB), (and rA, (not rC)))
|
||||
define i32 @selb_i32_05(i32 %rA, i32 %rB, i32 %rC) {
|
||||
define i32 @selectbits_i32_05(i32 %rA, i32 %rB, i32 %rC) {
|
||||
%C = and i32 %rC, %rB
|
||||
%A = xor i32 %rC, -1
|
||||
%B = and i32 %rA, %A
|
||||
@ -390,7 +390,7 @@ define i32 @selb_i32_05(i32 %rA, i32 %rB, i32 %rC) {
|
||||
}
|
||||
|
||||
; (or (and rB, rC), (and rA, (not rC)))
|
||||
define i32 @selb_i32_06(i32 %rA, i32 %rB, i32 %rC) {
|
||||
define i32 @selectbits_i32_06(i32 %rA, i32 %rB, i32 %rC) {
|
||||
%C = and i32 %rB, %rC
|
||||
%A = xor i32 %rC, -1
|
||||
%B = and i32 %rA, %A
|
||||
@ -399,7 +399,7 @@ define i32 @selb_i32_06(i32 %rA, i32 %rB, i32 %rC) {
|
||||
}
|
||||
|
||||
; (or (and rA, (not rC)), (and rB, rC))
|
||||
define i32 @selb_i32_07(i32 %rA, i32 %rB, i32 %rC) {
|
||||
define i32 @selectbits_i32_07(i32 %rA, i32 %rB, i32 %rC) {
|
||||
%A = xor i32 %rC, -1
|
||||
%B = and i32 %rA, %A
|
||||
%C = and i32 %rB, %rC
|
||||
@ -408,7 +408,7 @@ define i32 @selb_i32_07(i32 %rA, i32 %rB, i32 %rC) {
|
||||
}
|
||||
|
||||
; (or (and rA, (not rC)), (and rC, rB))
|
||||
define i32 @selb_i32_08(i32 %rA, i32 %rB, i32 %rC) {
|
||||
define i32 @selectbits_i32_08(i32 %rA, i32 %rB, i32 %rC) {
|
||||
%A = xor i32 %rC, -1
|
||||
%B = and i32 %rA, %A
|
||||
%C = and i32 %rC, %rB
|
||||
@ -421,7 +421,7 @@ define i32 @selb_i32_08(i32 %rA, i32 %rB, i32 %rC) {
|
||||
;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||
|
||||
; (or (and rC, rB), (and (not rC), rA))
|
||||
define i16 @selb_i16_01(i16 %rA, i16 %rB, i16 %rC) {
|
||||
define i16 @selectbits_i16_01(i16 %rA, i16 %rB, i16 %rC) {
|
||||
%C = and i16 %rC, %rB
|
||||
%A = xor i16 %rC, -1
|
||||
%B = and i16 %A, %rA
|
||||
@ -430,7 +430,7 @@ define i16 @selb_i16_01(i16 %rA, i16 %rB, i16 %rC) {
|
||||
}
|
||||
|
||||
; (or (and rB, rC), (and (not rC), rA))
|
||||
define i16 @selb_i16_02(i16 %rA, i16 %rB, i16 %rC) {
|
||||
define i16 @selectbits_i16_02(i16 %rA, i16 %rB, i16 %rC) {
|
||||
%C = and i16 %rB, %rC
|
||||
%A = xor i16 %rC, -1
|
||||
%B = and i16 %A, %rA
|
||||
@ -439,7 +439,7 @@ define i16 @selb_i16_02(i16 %rA, i16 %rB, i16 %rC) {
|
||||
}
|
||||
|
||||
; (or (and (not rC), rA), (and rB, rC))
|
||||
define i16 @selb_i16_03(i16 %rA, i16 %rB, i16 %rC) {
|
||||
define i16 @selectbits_i16_03(i16 %rA, i16 %rB, i16 %rC) {
|
||||
%A = xor i16 %rC, -1
|
||||
%B = and i16 %A, %rA
|
||||
%C = and i16 %rB, %rC
|
||||
@ -448,7 +448,7 @@ define i16 @selb_i16_03(i16 %rA, i16 %rB, i16 %rC) {
|
||||
}
|
||||
|
||||
; (or (and (not rC), rA), (and rC, rB))
|
||||
define i16 @selb_i16_04(i16 %rA, i16 %rB, i16 %rC) {
|
||||
define i16 @selectbits_i16_04(i16 %rA, i16 %rB, i16 %rC) {
|
||||
%A = xor i16 %rC, -1
|
||||
%B = and i16 %A, %rA
|
||||
%C = and i16 %rC, %rB
|
||||
@ -457,7 +457,7 @@ define i16 @selb_i16_04(i16 %rA, i16 %rB, i16 %rC) {
|
||||
}
|
||||
|
||||
; (or (and rC, rB), (and rA, (not rC)))
|
||||
define i16 @selb_i16_05(i16 %rA, i16 %rB, i16 %rC) {
|
||||
define i16 @selectbits_i16_05(i16 %rA, i16 %rB, i16 %rC) {
|
||||
%C = and i16 %rC, %rB
|
||||
%A = xor i16 %rC, -1
|
||||
%B = and i16 %rA, %A
|
||||
@ -466,7 +466,7 @@ define i16 @selb_i16_05(i16 %rA, i16 %rB, i16 %rC) {
|
||||
}
|
||||
|
||||
; (or (and rB, rC), (and rA, (not rC)))
|
||||
define i16 @selb_i16_06(i16 %rA, i16 %rB, i16 %rC) {
|
||||
define i16 @selectbits_i16_06(i16 %rA, i16 %rB, i16 %rC) {
|
||||
%C = and i16 %rB, %rC
|
||||
%A = xor i16 %rC, -1
|
||||
%B = and i16 %rA, %A
|
||||
@ -475,7 +475,7 @@ define i16 @selb_i16_06(i16 %rA, i16 %rB, i16 %rC) {
|
||||
}
|
||||
|
||||
; (or (and rA, (not rC)), (and rB, rC))
|
||||
define i16 @selb_i16_07(i16 %rA, i16 %rB, i16 %rC) {
|
||||
define i16 @selectbits_i16_07(i16 %rA, i16 %rB, i16 %rC) {
|
||||
%A = xor i16 %rC, -1
|
||||
%B = and i16 %rA, %A
|
||||
%C = and i16 %rB, %rC
|
||||
@ -484,7 +484,7 @@ define i16 @selb_i16_07(i16 %rA, i16 %rB, i16 %rC) {
|
||||
}
|
||||
|
||||
; (or (and rA, (not rC)), (and rC, rB))
|
||||
define i16 @selb_i16_08(i16 %rA, i16 %rB, i16 %rC) {
|
||||
define i16 @selectbits_i16_08(i16 %rA, i16 %rB, i16 %rC) {
|
||||
%A = xor i16 %rC, -1
|
||||
%B = and i16 %rA, %A
|
||||
%C = and i16 %rC, %rB
|
||||
@ -497,7 +497,7 @@ define i16 @selb_i16_08(i16 %rA, i16 %rB, i16 %rC) {
|
||||
;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||
|
||||
; (or (and rC, rB), (and (not rC), rA))
|
||||
define i8 @selb_i8_01(i8 %rA, i8 %rB, i8 %rC) {
|
||||
define i8 @selectbits_i8_01(i8 %rA, i8 %rB, i8 %rC) {
|
||||
%C = and i8 %rC, %rB
|
||||
%A = xor i8 %rC, -1
|
||||
%B = and i8 %A, %rA
|
||||
@ -506,7 +506,7 @@ define i8 @selb_i8_01(i8 %rA, i8 %rB, i8 %rC) {
|
||||
}
|
||||
|
||||
; (or (and rB, rC), (and (not rC), rA))
|
||||
define i8 @selb_i8_02(i8 %rA, i8 %rB, i8 %rC) {
|
||||
define i8 @selectbits_i8_02(i8 %rA, i8 %rB, i8 %rC) {
|
||||
%C = and i8 %rB, %rC
|
||||
%A = xor i8 %rC, -1
|
||||
%B = and i8 %A, %rA
|
||||
@ -515,7 +515,7 @@ define i8 @selb_i8_02(i8 %rA, i8 %rB, i8 %rC) {
|
||||
}
|
||||
|
||||
; (or (and (not rC), rA), (and rB, rC))
|
||||
define i8 @selb_i8_03(i8 %rA, i8 %rB, i8 %rC) {
|
||||
define i8 @selectbits_i8_03(i8 %rA, i8 %rB, i8 %rC) {
|
||||
%A = xor i8 %rC, -1
|
||||
%B = and i8 %A, %rA
|
||||
%C = and i8 %rB, %rC
|
||||
@ -524,7 +524,7 @@ define i8 @selb_i8_03(i8 %rA, i8 %rB, i8 %rC) {
|
||||
}
|
||||
|
||||
; (or (and (not rC), rA), (and rC, rB))
|
||||
define i8 @selb_i8_04(i8 %rA, i8 %rB, i8 %rC) {
|
||||
define i8 @selectbits_i8_04(i8 %rA, i8 %rB, i8 %rC) {
|
||||
%A = xor i8 %rC, -1
|
||||
%B = and i8 %A, %rA
|
||||
%C = and i8 %rC, %rB
|
||||
@ -533,7 +533,7 @@ define i8 @selb_i8_04(i8 %rA, i8 %rB, i8 %rC) {
|
||||
}
|
||||
|
||||
; (or (and rC, rB), (and rA, (not rC)))
|
||||
define i8 @selb_i8_05(i8 %rA, i8 %rB, i8 %rC) {
|
||||
define i8 @selectbits_i8_05(i8 %rA, i8 %rB, i8 %rC) {
|
||||
%C = and i8 %rC, %rB
|
||||
%A = xor i8 %rC, -1
|
||||
%B = and i8 %rA, %A
|
||||
@ -542,7 +542,7 @@ define i8 @selb_i8_05(i8 %rA, i8 %rB, i8 %rC) {
|
||||
}
|
||||
|
||||
; (or (and rB, rC), (and rA, (not rC)))
|
||||
define i8 @selb_i8_06(i8 %rA, i8 %rB, i8 %rC) {
|
||||
define i8 @selectbits_i8_06(i8 %rA, i8 %rB, i8 %rC) {
|
||||
%C = and i8 %rB, %rC
|
||||
%A = xor i8 %rC, -1
|
||||
%B = and i8 %rA, %A
|
||||
@ -551,7 +551,7 @@ define i8 @selb_i8_06(i8 %rA, i8 %rB, i8 %rC) {
|
||||
}
|
||||
|
||||
; (or (and rA, (not rC)), (and rB, rC))
|
||||
define i8 @selb_i8_07(i8 %rA, i8 %rB, i8 %rC) {
|
||||
define i8 @selectbits_i8_07(i8 %rA, i8 %rB, i8 %rC) {
|
||||
%A = xor i8 %rC, -1
|
||||
%B = and i8 %rA, %A
|
||||
%C = and i8 %rB, %rC
|
||||
@ -560,7 +560,7 @@ define i8 @selb_i8_07(i8 %rA, i8 %rB, i8 %rC) {
|
||||
}
|
||||
|
||||
; (or (and rA, (not rC)), (and rC, rB))
|
||||
define i8 @selb_i8_08(i8 %rA, i8 %rB, i8 %rC) {
|
||||
define i8 @selectbits_i8_08(i8 %rA, i8 %rB, i8 %rC) {
|
||||
%A = xor i8 %rC, -1
|
||||
%B = and i8 %rA, %A
|
||||
%C = and i8 %rC, %rB
|
||||
|
@ -275,3 +275,9 @@ define i64 @ashr_i64_3(i64 %arg1, i32 %shift) {
|
||||
%2 = ashr i64 %arg1, %1
|
||||
ret i64 %2
|
||||
}
|
||||
|
||||
define i32 @hi32_i64(i64 %arg) {
|
||||
%1 = lshr i64 %arg, 32
|
||||
%2 = trunc i64 %1 to i32
|
||||
ret i32 %2
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user